-
Notifications
You must be signed in to change notification settings - Fork 58
feat: Add experimental polars execution #1747
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 4 commits
5449696
98b300c
5d9e208
6a2ae4c
404f447
683f349
fa02dff
6c6b9b7
485cd98
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -94,6 +94,7 @@ def __init__( | |
requests_transport_adapters: Sequence[ | ||
Tuple[str, requests.adapters.BaseAdapter] | ||
] = (), | ||
enable_polars_execution: bool = False, | ||
): | ||
self._credentials = credentials | ||
self._project = project | ||
|
@@ -113,6 +114,7 @@ def __init__( | |
client_endpoints_override = {} | ||
|
||
self._client_endpoints_override = client_endpoints_override | ||
self._enable_polars_execution = enable_polars_execution | ||
|
||
@property | ||
def application_name(self) -> Optional[str]: | ||
|
@@ -424,3 +426,17 @@ def requests_transport_adapters( | |
SESSION_STARTED_MESSAGE.format(attribute="requests_transport_adapters") | ||
) | ||
self._requests_transport_adapters = value | ||
|
||
@property | ||
def enable_polars_execution(self) -> bool: | ||
"""If True, will use polars to execute some simple query plans locally.""" | ||
return self._enable_polars_execution | ||
|
||
@enable_polars_execution.setter | ||
def enable_polars_execution(self, value: bool): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Should we add a check that the session has already started? If not (perhaps because we want to safely ignore this if the global session has already started), maybe add a comment for why.
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. yeah, should error out probably, as a session cannot change once started. error added in new revision |
||
if value is True: | ||
tswast marked this conversation as resolved.
Show resolved
Hide resolved
|
||
msg = bfe.format_message( | ||
"Polars execution is an experimental feature, and may not be stable. Must have polars installed." | ||
) | ||
warnings.warn(msg, category=bfe.PreviewWarning) | ||
self._enable_polars_execution = value |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,76 @@ | ||
# Copyright 2025 Google LLC | ||
# | ||
# Licensed under the Apache License, Version 2.0 (the "License"); | ||
# you may not use this file except in compliance with the License. | ||
# You may obtain a copy of the License at | ||
# | ||
# http://www.apache.org/licenses/LICENSE-2.0 | ||
# | ||
# Unless required by applicable law or agreed to in writing, software | ||
# distributed under the License is distributed on an "AS IS" BASIS, | ||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
# See the License for the specific language governing permissions and | ||
# limitations under the License. | ||
import pytest | ||
|
||
import bigframes | ||
from bigframes.testing.utils import assert_pandas_df_equal | ||
|
||
polars = pytest.importorskip("polars", reason="polars is required for this test") | ||
|
||
|
||
@pytest.fixture(scope="module") | ||
def session_w_polars(): | ||
context = bigframes.BigQueryOptions(location="US", enable_polars_execution=True) | ||
session = bigframes.Session(context=context) | ||
yield session | ||
session.close() # close generated session at cleanup time | ||
|
||
|
||
def test_polar_execution_sorted(session_w_polars, scalars_pandas_df_index): | ||
execution_count_before = session_w_polars._metrics.execution_count | ||
bf_df = session_w_polars.read_pandas(scalars_pandas_df_index) | ||
|
||
pd_result = scalars_pandas_df_index.sort_index(ascending=False)[ | ||
["int64_too", "bool_col"] | ||
] | ||
bf_result = bf_df.sort_index(ascending=False)[["int64_too", "bool_col"]].to_pandas() | ||
|
||
assert session_w_polars._metrics.execution_count == execution_count_before | ||
assert_pandas_df_equal(bf_result, pd_result) | ||
|
||
|
||
def test_polar_execution_sorted_filtered(session_w_polars, scalars_pandas_df_index): | ||
execution_count_before = session_w_polars._metrics.execution_count | ||
bf_df = session_w_polars.read_pandas(scalars_pandas_df_index) | ||
|
||
pd_result = scalars_pandas_df_index.sort_index(ascending=False).dropna( | ||
subset=["int64_col", "string_col"] | ||
) | ||
bf_result = ( | ||
bf_df.sort_index(ascending=False) | ||
.dropna(subset=["int64_col", "string_col"]) | ||
.to_pandas() | ||
) | ||
|
||
# Filter and isnull not supported by polar engine yet, so falls back to bq execution | ||
assert session_w_polars._metrics.execution_count == (execution_count_before + 1) | ||
assert_pandas_df_equal(bf_result, pd_result) | ||
|
||
|
||
def test_polar_execution_unsupported_sql_fallback( | ||
session_w_polars, scalars_pandas_df_index | ||
): | ||
execution_count_before = session_w_polars._metrics.execution_count | ||
bf_df = session_w_polars.read_pandas(scalars_pandas_df_index) | ||
|
||
pd_df = scalars_pandas_df_index.copy() | ||
pd_df["str_len_col"] = pd_df.string_col.str.len() | ||
pd_result = pd_df | ||
|
||
bf_df["str_len_col"] = bf_df.string_col.str.len() | ||
bf_result = bf_df.to_pandas() | ||
|
||
# str len not supported by polar engine yet, so falls back to bq execution | ||
assert session_w_polars._metrics.execution_count == (execution_count_before + 1) | ||
assert_pandas_df_equal(bf_result, pd_result) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Should this be a compute option so it can be changed at runtime?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Kind of just want to keep it as a constant within-session for now (will need to commit if making this a GA feature though). Turning polars execution on and off mid-session will make things like caching, multi-part execution really tricky