Skip to content
Merged
Show file tree
Hide file tree
Changes from 11 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
29 changes: 29 additions & 0 deletions bigframes/functions/_function_session.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@
)

from bigframes import clients
from bigframes import version as bigframes_version

if TYPE_CHECKING:
from bigframes.session import Session
Expand Down Expand Up @@ -129,6 +130,14 @@ def remote_function(
.. deprecated:: 0.0.1
This is an internal method. Please use :func:`bigframes.pandas.remote_function` instead.

.. warning::
To use remote functions with Bigframes 2.0 and onwards, please (preferred)
set an explicit user-managed ``cloud_function_service_account`` or (discouraged)
set ``cloud_function_service_account`` to use the Compute Engine service account
by setting it to `"default"`.

See, https://cloud.google.com/functions/docs/securing/function-identity.

.. note::
Please make sure following is setup before using this API:

Expand Down Expand Up @@ -314,6 +323,26 @@ def remote_function(

session = cast(bigframes.session.Session, session or bpd.get_global_session())

# raise a UserWarning if user does not explicitly set cloud_function_service_account to a
# user-managed cloud_function_service_account of to default
msg = (
"You have not explicitly set a user-managed `cloud_function_service_account`. "
"Using the default Compute Engine service account. "
"To use Bigframes 2.0, please explicitly set `cloud_function_service_account` "
'either to a user-managed service account (preferred) or to `"default"` '
"to use the Compute Engine service account (discouraged). "
"See, https://cloud.google.com/functions/docs/securing/function-identity."
)

if (
bigframes_version.__version__.startswith("1.")
and cloud_function_service_account is None
):
warnings.warn(msg, stacklevel=2, category=FutureWarning)

if cloud_function_service_account == "default":
cloud_function_service_account = None

# A BigQuery client is required to perform BQ operations
if not bigquery_client:
bigquery_client = session.bqclient
Expand Down
8 changes: 8 additions & 0 deletions bigframes/session/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -1211,6 +1211,14 @@ def remote_function(
supports dataframe with column types ``Int64``/``Float64``/``boolean``/
``string``/``binary[pyarrow]``.

.. warning::
To use remote functions with Bigframes 2.0 and onwards, please (preferred)
set an explicit user-managed ``cloud_function_service_account`` or (discouraged)
set ``cloud_function_service_account`` to use the Compute Engine service account
by setting it to `"default"`.

See, https://cloud.google.com/functions/docs/securing/function-identity.

.. note::
Please make sure following is setup before using this API:

Expand Down
94 changes: 53 additions & 41 deletions notebooks/remote_functions/remote_function_usecases.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -44,7 +44,7 @@
},
{
"cell_type": "code",
"execution_count": 21,
"execution_count": 2,
"metadata": {
"id": "Y6QAttCqqMM0"
},
Expand All @@ -55,7 +55,7 @@
},
{
"cell_type": "code",
"execution_count": 22,
"execution_count": 3,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
Expand All @@ -66,17 +66,21 @@
},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/usr/local/google/home/shobs/code/bigframes1/venv/lib/python3.10/site-packages/IPython/core/interactiveshell.py:3550: UserWarning: Reading cached table from 2024-07-24 08:01:12.491984+00:00 to avoid incompatibilies with previous reads of this table. To read the latest version, set `use_cache=False` or close the current session with Session.close() or bigframes.pandas.close_session().\n",
" exec(code_obj, self.user_global_ns, self.user_ns)\n"
]
"data": {
"text/html": [
"Query job 1f6094e9-1942-477c-9ce3-87a614d71294 is DONE. 0 Bytes processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev&j=bq:US:1f6094e9-1942-477c-9ce3-87a614d71294&page=queryresults\">Open Job</a>"
],
"text/plain": [
"<IPython.core.display.HTML object>"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"Query job 9d155f10-e37a-4d20-b2ff-02868ecb58f4 is DONE. 582.8 kB processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev&j=bq:US:9d155f10-e37a-4d20-b2ff-02868ecb58f4&page=queryresults\">Open Job</a>"
"Query job ba19f29c-33d3-4f12-9605-ddeafb74918e is DONE. 582.8 kB processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev&j=bq:US:ba19f29c-33d3-4f12-9605-ddeafb74918e&page=queryresults\">Open Job</a>"
],
"text/plain": [
"<IPython.core.display.HTML object>"
Expand All @@ -88,7 +92,7 @@
{
"data": {
"text/html": [
"Query job 5a524e70-12dc-4116-b416-04570bbf754e is DONE. 82.0 kB processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev&j=bq:US:5a524e70-12dc-4116-b416-04570bbf754e&page=queryresults\">Open Job</a>"
"Query job dd1ff8be-700a-4ce5-91a0-31413f70cfad is DONE. 82.0 kB processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev&j=bq:US:dd1ff8be-700a-4ce5-91a0-31413f70cfad&page=queryresults\">Open Job</a>"
],
"text/plain": [
"<IPython.core.display.HTML object>"
Expand Down Expand Up @@ -125,49 +129,49 @@
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>36</th>\n",
" <td>Reds</td>\n",
" <td>Cubs</td>\n",
" <td>159</td>\n",
" <th>88</th>\n",
" <td>Royals</td>\n",
" <td>Athletics</td>\n",
" <td>176</td>\n",
" </tr>\n",
" <tr>\n",
" <th>358</th>\n",
" <th>106</th>\n",
" <td>Dodgers</td>\n",
" <td>Diamondbacks</td>\n",
" <td>223</td>\n",
" <td>Giants</td>\n",
" <td>216</td>\n",
" </tr>\n",
" <tr>\n",
" <th>416</th>\n",
" <td>Yankees</td>\n",
" <td>White Sox</td>\n",
" <td>216</td>\n",
" <th>166</th>\n",
" <td>Phillies</td>\n",
" <td>Royals</td>\n",
" <td>162</td>\n",
" </tr>\n",
" <tr>\n",
" <th>523</th>\n",
" <td>Rays</td>\n",
" <td>Athletics</td>\n",
" <td>187</td>\n",
" <th>247</th>\n",
" <td>Rangers</td>\n",
" <td>Royals</td>\n",
" <td>161</td>\n",
" </tr>\n",
" <tr>\n",
" <th>594</th>\n",
" <td>Pirates</td>\n",
" <td>Brewers</td>\n",
" <td>169</td>\n",
" <th>374</th>\n",
" <td>Athletics</td>\n",
" <td>Astros</td>\n",
" <td>161</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" homeTeamName awayTeamName duration_minutes\n",
"36 Reds Cubs 159\n",
"358 Dodgers Diamondbacks 223\n",
"416 Yankees White Sox 216\n",
"523 Rays Athletics 187\n",
"594 Pirates Brewers 169"
" homeTeamName awayTeamName duration_minutes\n",
"88 Royals Athletics 176\n",
"106 Dodgers Giants 216\n",
"166 Phillies Royals 162\n",
"247 Rangers Royals 161\n",
"374 Athletics Astros 161"
]
},
"execution_count": 22,
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
Expand Down Expand Up @@ -216,7 +220,7 @@
},
{
"cell_type": "code",
"execution_count": 23,
"execution_count": 4,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
Expand All @@ -226,10 +230,18 @@
"outputId": "19351206-116e-4da2-8ff0-f288b7745b27"
},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/usr/local/google/home/arwas/src1/python-bigquery-dataframes/bigframes/functions/_function_session.py:335: UserWarning: You have not explicitly set a user-managed cloud_function_service_account. Using the default compute service account, {cloud_function_service_account}. To use Bigframes 2.0, please set an explicit user-managed cloud_function_service_account or set cloud_function_service_account explicitly to `default`.See, https://cloud.google.com/functions/docs/securing/function-identity.\n",
" warnings.warn(msg, category=UserWarning)\n"
]
},
{
"data": {
"text/html": [
"Query job ec8d958d-93ef-45ae-8150-6ccfa8feb89a is DONE. 0 Bytes processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev&j=bq:US:ec8d958d-93ef-45ae-8150-6ccfa8feb89a&page=queryresults\">Open Job</a>"
"Query job 7c021760-59c4-4f3a-846c-9693a4d16eef is DONE. 0 Bytes processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev&j=bq:US:7c021760-59c4-4f3a-846c-9693a4d16eef&page=queryresults\">Open Job</a>"
],
"text/plain": [
"<IPython.core.display.HTML object>"
Expand All @@ -242,7 +254,7 @@
"name": "stdout",
"output_type": "stream",
"text": [
"Created cloud function 'projects/bigframes-dev/locations/us-central1/functions/bigframes-session54c8b0-e22dbecc9ec0374bda36bc23df3775b0-g8zp' and BQ remote function 'bigframes-dev._1b6c31ff1bcd5d2f6d86833cf8268317f1b12d57.bigframes_session54c8b0_e22dbecc9ec0374bda36bc23df3775b0_g8zp'.\n"
"Created cloud function 'projects/bigframes-dev/locations/us-central1/functions/bigframes-sessionca6012-ca541a90249f8b62951f38b7aba6a711-49to' and BQ remote function 'bigframes-dev._ed1e4d0f7d41174ba506d34d15dccf040d13f69e.bigframes_sessionca6012_ca541a90249f8b62951f38b7aba6a711_49to'.\n"
]
}
],
Expand Down Expand Up @@ -1430,7 +1442,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.12"
"version": "3.9.19"
}
},
"nbformat": 4,
Expand Down
46 changes: 46 additions & 0 deletions tests/system/large/functions/test_remote_function.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
import sys
import tempfile
import textwrap
import warnings

import google.api_core.exceptions
from google.cloud import bigquery, functions_v2, storage
Expand Down Expand Up @@ -1359,6 +1360,51 @@ def square_num(x):
)


@pytest.mark.flaky(retries=2, delay=120)
def test_remote_function_warns_default_cloud_function_service_account(scalars_dfs):
project = "bigframes-dev-perf"

gcf_service_account = "default"

rf_session = bigframes.Session(context=bigframes.BigQueryOptions(project=project))

try:
with warnings.catch_warnings(record=True) as w:
warnings.simplefilter("always")

@rf_session.remote_function(
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think we don't actually have to deploy a function to test this warning. Will try something and add a commit if so.

[int],
int,
reuse=False,
cloud_function_service_account=gcf_service_account,
)
def square_num(x):
if x is None:
return x
return x * x

scalars_df, scalars_pandas_df = scalars_dfs

bf_int64_col = scalars_df["int64_col"]
bf_result_col = bf_int64_col.apply(square_num)
bf_result = bf_int64_col.to_frame().assign(result=bf_result_col).to_pandas()

pd_int64_col = scalars_pandas_df["int64_col"]
pd_result_col = pd_int64_col.apply(lambda x: x if x is None else x * x)
pd_result = pd_int64_col.to_frame().assign(result=pd_result_col)

assert_pandas_df_equal(bf_result, pd_result, check_dtype=False)

if len(w) > 0:
assert issubclass(w[0].category, FutureWarning)
assert "You have not explicitly set a user-managed" in str(w[0].message)
finally:
# clean up the gcp assets created for the remote function
cleanup_remote_function_assets(
rf_session.bqclient, rf_session.cloudfunctionsclient, square_num
)


@pytest.mark.flaky(retries=2, delay=120)
def test_remote_function_with_gcf_cmek():
# TODO(shobs): Automate the following set-up during testing in the test project.
Expand Down