From 6c5a79758c176e7f4d1291f1b5325b214742b06b Mon Sep 17 00:00:00 2001 From: Shobhit Singh Date: Mon, 24 Mar 2025 08:13:16 +0000 Subject: [PATCH 1/3] feat: set vscode and jupyter environments in the BQ jobs In this change we are including the vscode and jupyter environments in the application name set in the BigQuery jobs. This would help understand the BigFrames usage coming from those environments. --- .pre-commit-config.yaml | 2 +- bigframes/session/clients.py | 21 +++++- bigframes/session/environment.py | 110 +++++++++++++++++++++++++++++++ 3 files changed, 130 insertions(+), 3 deletions(-) create mode 100644 bigframes/session/environment.py diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 2d11c951a1..8ca120bd07 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -38,6 +38,6 @@ repos: rev: v1.10.0 hooks: - id: mypy - additional_dependencies: [types-requests, types-tabulate, pandas-stubs] + additional_dependencies: [types-requests, types-tabulate, pandas-stubs<=2.2.3.241126] exclude: "^third_party" args: ["--check-untyped-defs", "--explicit-package-bases", "--ignore-missing-imports"] diff --git a/bigframes/session/clients.py b/bigframes/session/clients.py index 5b707ad478..2b24b6cb8b 100644 --- a/bigframes/session/clients.py +++ b/bigframes/session/clients.py @@ -35,6 +35,8 @@ import bigframes.exceptions as bfe import bigframes.version +from . import environment + _ENV_DEFAULT_PROJECT = "GOOGLE_CLOUD_PROJECT" _APPLICATION_NAME = f"bigframes/{bigframes.version.__version__} ibis/9.2.0" _SCOPES = ["https://www.googleapis.com/auth/cloud-platform"] @@ -57,6 +59,21 @@ def _get_default_credentials_with_project(): return pydata_google_auth.default(scopes=_SCOPES, use_local_webserver=False) +def _get_application_names(): + apps = [_APPLICATION_NAME] + + if environment.is_vscode(): + apps.append("vscode") + if environment.is_vscode_google_cloud_code_extension_installed(): + apps.append(environment.GOOGLE_CLOUD_CODE_EXTENSION_NAME) + elif environment.is_jupyter(): + apps.append("jupyter") + if environment.is_jupyter_bigquery_plugin_installed(): + apps.append(environment.BIGQUERY_JUPYTER_PLUGIN_NAME) + + return " ".join(apps) + + class ClientsProvider: """Provides client instances necessary to perform cloud operations.""" @@ -91,9 +108,9 @@ def __init__( ) self._application_name = ( - f"{_APPLICATION_NAME} {application_name}" + f"{_get_application_names()} {application_name}" if application_name - else _APPLICATION_NAME + else _get_application_names() ) self._project = project diff --git a/bigframes/session/environment.py b/bigframes/session/environment.py new file mode 100644 index 0000000000..c8b27e85ce --- /dev/null +++ b/bigframes/session/environment.py @@ -0,0 +1,110 @@ +# Copyright 2023 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import importlib +import json +import os + +# The identifier for GCP VS Code extension +# https://cloud.google.com/code/docs/vscode/install +GOOGLE_CLOUD_CODE_EXTENSION_NAME = "googlecloudtools.cloudcode" + + +# The identifier for BigQuery Jupyter notebook plugin +# https://cloud.google.com/bigquery/docs/jupyterlab-plugin +BIGQUERY_JUPYTER_PLUGIN_NAME = "bigquery_jupyter_plugin" + + +def _is_vscode_extension_installed(extension_id: str) -> bool: + """ + Checks if a given Visual Studio Code extension is installed. + + Args: + extension_id: The ID of the extension (e.g., "ms-python.python"). + + Returns: + True if the extension is installed, False otherwise. + """ + try: + # Determine the user's VS Code extensions directory. + user_home = os.path.expanduser("~") + if os.name == "nt": # Windows + vscode_extensions_dir = os.path.join(user_home, ".vscode", "extensions") + elif os.name == "posix": # macOS and Linux + vscode_extensions_dir = os.path.join(user_home, ".vscode", "extensions") + else: + raise OSError("Unsupported operating system.") + + # Check if the extensions directory exists. + if not os.path.exists(vscode_extensions_dir): + return False + + # Iterate through the subdirectories in the extensions directory. + for item in os.listdir(vscode_extensions_dir): + item_path = os.path.join(vscode_extensions_dir, item) + if os.path.isdir(item_path) and item.startswith( + extension_id + "-" + ): # check if the folder starts with the extension ID. + # Further check for manifest file, as a more robust check. + manifest_path = os.path.join(item_path, "package.json") + if os.path.exists(manifest_path): + try: + with open(manifest_path, "r", encoding="utf-8") as f: + json.load( + f + ) # attempt to load json, if it fails, the extension is likely corrupted. + return True + except (FileNotFoundError, json.JSONDecodeError): + pass # Corrupted or incomplete extension, or manifest missing. + return False + + except OSError as e: + print(f"Error: {e}") + return False + except Exception as e: + print(f"An unexpected error occurred: {e}") + return False + + +def _is_package_installed(package_name: str) -> bool: + """ + Checks if a Python package is installed. + + Args: + package_name: The name of the package to check (e.g., "requests", "numpy"). + + Returns: + True if the package is installed, False otherwise. + """ + try: + importlib.import_module(package_name) + return True + except ImportError: + return False + + +def is_vscode() -> bool: + return os.getenv("VSCODE_PID") is not None + + +def is_jupyter() -> bool: + return os.getenv("JPY_PARENT_PID") is not None + + +def is_vscode_google_cloud_code_extension_installed() -> bool: + return _is_vscode_extension_installed(GOOGLE_CLOUD_CODE_EXTENSION_NAME) + + +def is_jupyter_bigquery_plugin_installed() -> bool: + return _is_package_installed(BIGQUERY_JUPYTER_PLUGIN_NAME) From 1aa649df3b042e9eddd789128dc94938dd3e0516 Mon Sep 17 00:00:00 2001 From: Shobhit Singh Date: Wed, 26 Mar 2025 23:26:51 +0000 Subject: [PATCH 2/3] remove print, overlook all exceptions during extension detection --- bigframes/session/environment.py | 51 ++++++++++------------ tests/unit/session/test_clients.py | 68 ++++++++++++++++++++++++++++++ 2 files changed, 90 insertions(+), 29 deletions(-) diff --git a/bigframes/session/environment.py b/bigframes/session/environment.py index c8b27e85ce..1f3d48ac91 100644 --- a/bigframes/session/environment.py +++ b/bigframes/session/environment.py @@ -47,34 +47,27 @@ def _is_vscode_extension_installed(extension_id: str) -> bool: raise OSError("Unsupported operating system.") # Check if the extensions directory exists. - if not os.path.exists(vscode_extensions_dir): - return False - - # Iterate through the subdirectories in the extensions directory. - for item in os.listdir(vscode_extensions_dir): - item_path = os.path.join(vscode_extensions_dir, item) - if os.path.isdir(item_path) and item.startswith( - extension_id + "-" - ): # check if the folder starts with the extension ID. - # Further check for manifest file, as a more robust check. - manifest_path = os.path.join(item_path, "package.json") - if os.path.exists(manifest_path): - try: - with open(manifest_path, "r", encoding="utf-8") as f: - json.load( - f - ) # attempt to load json, if it fails, the extension is likely corrupted. - return True - except (FileNotFoundError, json.JSONDecodeError): - pass # Corrupted or incomplete extension, or manifest missing. - return False - - except OSError as e: - print(f"Error: {e}") - return False - except Exception as e: - print(f"An unexpected error occurred: {e}") - return False + if os.path.exists(vscode_extensions_dir): + # Iterate through the subdirectories in the extensions directory. + for item in os.listdir(vscode_extensions_dir): + item_path = os.path.join(vscode_extensions_dir, item) + if os.path.isdir(item_path) and item.startswith( + extension_id + "-" + ): # check if the folder starts with the extension ID. + # Further check for manifest file, as a more robust check. + manifest_path = os.path.join(item_path, "package.json") + if os.path.exists(manifest_path): + try: + with open(manifest_path, "r", encoding="utf-8") as f: + json.load(f) + return True + except (FileNotFoundError, json.JSONDecodeError): + # Corrupted or incomplete extension, or manifest missing. + pass + except Exception: + pass + + return False def _is_package_installed(package_name: str) -> bool: @@ -90,7 +83,7 @@ def _is_package_installed(package_name: str) -> bool: try: importlib.import_module(package_name) return True - except ImportError: + except Exception: return False diff --git a/tests/unit/session/test_clients.py b/tests/unit/session/test_clients.py index 30ba2f9091..c9a12be584 100644 --- a/tests/unit/session/test_clients.py +++ b/tests/unit/session/test_clients.py @@ -12,6 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. +import os from typing import Optional import unittest.mock as mock @@ -99,6 +100,33 @@ def assert_clients_w_user_agent( assert_constructed_w_user_agent(provider.resourcemanagerclient, expected_user_agent) +def assert_constructed_wo_user_agent( + mock_client: mock.Mock, not_expected_user_agent: str +): + assert ( + not_expected_user_agent + not in mock_client.call_args.kwargs["client_info"].to_user_agent() + ) + + +def assert_clients_wo_user_agent( + provider: clients.ClientsProvider, not_expected_user_agent: str +): + assert_constructed_wo_user_agent(provider.bqclient, not_expected_user_agent) + assert_constructed_wo_user_agent( + provider.bqconnectionclient, not_expected_user_agent + ) + assert_constructed_wo_user_agent( + provider.bqstoragereadclient, not_expected_user_agent + ) + assert_constructed_wo_user_agent( + provider.cloudfunctionsclient, not_expected_user_agent + ) + assert_constructed_wo_user_agent( + provider.resourcemanagerclient, not_expected_user_agent + ) + + def test_user_agent_default(monkeypatch): monkeypatch_client_constructors(monkeypatch) provider = create_clients_provider(application_name=None) @@ -113,3 +141,43 @@ def test_user_agent_custom(monkeypatch): # We still need to include attribution to bigframes, even if there's also a # partner using the package. assert_clients_w_user_agent(provider, f"bigframes/{bigframes.version.__version__}") + + +@mock.patch.dict(os.environ, {}, clear=True) +def test_user_agent_not_in_vscode(monkeypatch): + monkeypatch_client_constructors(monkeypatch) + provider = create_clients_provider() + assert_clients_wo_user_agent(provider, "vscode") + + # We still need to include attribution to bigframes + assert_clients_w_user_agent(provider, f"bigframes/{bigframes.version.__version__}") + + +@mock.patch.dict(os.environ, {"VSCODE_PID": "12345"}, clear=True) +def test_user_agent_in_vscode(monkeypatch): + monkeypatch_client_constructors(monkeypatch) + provider = create_clients_provider() + assert_clients_w_user_agent(provider, "vscode") + + # We still need to include attribution to bigframes + assert_clients_w_user_agent(provider, f"bigframes/{bigframes.version.__version__}") + + +@mock.patch.dict(os.environ, {}, clear=True) +def test_user_agent_not_in_jupyter(monkeypatch): + monkeypatch_client_constructors(monkeypatch) + provider = create_clients_provider() + assert_clients_wo_user_agent(provider, "jupyter") + + # We still need to include attribution to bigframes + assert_clients_w_user_agent(provider, f"bigframes/{bigframes.version.__version__}") + + +@mock.patch.dict(os.environ, {"JPY_PARENT_PID": "12345"}, clear=True) +def test_user_agent_in_jupyter(monkeypatch): + monkeypatch_client_constructors(monkeypatch) + provider = create_clients_provider() + assert_clients_w_user_agent(provider, "jupyter") + + # We still need to include attribution to bigframes + assert_clients_w_user_agent(provider, f"bigframes/{bigframes.version.__version__}") From ab449f002971e35fd55dbafadd4eb143c6ae8fea Mon Sep 17 00:00:00 2001 From: Shobhit Singh Date: Wed, 26 Mar 2025 23:29:57 +0000 Subject: [PATCH 3/3] slight formatting fix --- bigframes/session/environment.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/bigframes/session/environment.py b/bigframes/session/environment.py index 1f3d48ac91..3ed6ab98cd 100644 --- a/bigframes/session/environment.py +++ b/bigframes/session/environment.py @@ -51,9 +51,8 @@ def _is_vscode_extension_installed(extension_id: str) -> bool: # Iterate through the subdirectories in the extensions directory. for item in os.listdir(vscode_extensions_dir): item_path = os.path.join(vscode_extensions_dir, item) - if os.path.isdir(item_path) and item.startswith( - extension_id + "-" - ): # check if the folder starts with the extension ID. + if os.path.isdir(item_path) and item.startswith(extension_id + "-"): + # Check if the folder starts with the extension ID. # Further check for manifest file, as a more robust check. manifest_path = os.path.join(item_path, "package.json") if os.path.exists(manifest_path):