From 218b7c20d5b817e7d93fc3af7b1f622d25ced1b8 Mon Sep 17 00:00:00 2001 From: Shuowei Li Date: Fri, 19 Dec 2025 01:04:33 +0000 Subject: [PATCH 01/25] feat: Enhance Series and DataFrame display with anywidget --- bigframes/series.py | 120 ++++- notebooks/dataframes/anywidget_mode.ipynb | 608 ++++++++++++++++++---- tests/js/package-lock.json | 99 ++++ tests/js/package.json | 1 + tests/system/small/test_anywidget.py | 59 ++- 5 files changed, 761 insertions(+), 126 deletions(-) diff --git a/bigframes/series.py b/bigframes/series.py index de3ce276d8..c538960adb 100644 --- a/bigframes/series.py +++ b/bigframes/series.py @@ -22,6 +22,7 @@ import itertools import numbers import textwrap +import traceback import typing from typing import ( Any, @@ -48,6 +49,7 @@ import pyarrow as pa import typing_extensions +import bigframes._config.display_options as display_options import bigframes.core from bigframes.core import agg_expressions, groupby, log_adapter import bigframes.core.block_transforms as block_ops @@ -568,6 +570,106 @@ def reset_index( block = block.assign_label(self._value_column, name) return bigframes.dataframe.DataFrame(block) + def _get_anywidget_bundle( + self, include=None, exclude=None + ) -> tuple[dict[str, Any], dict[str, Any]]: + """ + Helper method to create and return the anywidget mimebundle for Series. + """ + from bigframes import display + + # Convert Series to DataFrame for TableWidget + series_df = self.to_frame() + + # Create and display the widget + widget = display.TableWidget(series_df) + widget_repr_result = widget._repr_mimebundle_(include=include, exclude=exclude) + + # Handle both tuple (data, metadata) and dict returns + if isinstance(widget_repr_result, tuple): + widget_repr, widget_metadata = widget_repr_result + else: + widget_repr = widget_repr_result + widget_metadata = {} + + widget_repr = dict(widget_repr) + + # Add text representation + widget_repr["text/plain"] = self._create_text_representation( + widget._cached_data, widget.row_count + ) + + return widget_repr, widget_metadata + + def _create_text_representation( + self, pandas_df: pandas.DataFrame, total_rows: typing.Optional[int] + ) -> str: + """Create a text representation of the Series.""" + opts = bigframes.options.display + with display_options.pandas_repr(opts): + import pandas.io.formats + + # safe to mutate this, this dict is owned by this code, and does not affect global config + to_string_kwargs = ( + pandas.io.formats.format.get_series_repr_params() # type: ignore + ) + if len(self._block.index_columns) == 0: + to_string_kwargs.update({"index": False}) + # Get the first column since Series DataFrame has only one column + pd_series = pandas_df.iloc[:, 0] + repr_string = pd_series.to_string(**to_string_kwargs) + + lines = repr_string.split("\n") + + if total_rows is not None and total_rows > len(pd_series): + lines.append("...") + + lines.append("") + lines.append(f"[{total_rows} rows]") + return "\n".join(lines) + + def _repr_mimebundle_(self, include=None, exclude=None): + """ + Custom display method for IPython/Jupyter environments. + This is called by IPython's display system when the object is displayed. + """ + opts = bigframes.options.display + + # Only handle widget display in anywidget mode + if opts.repr_mode == "anywidget": + try: + return self._get_anywidget_bundle(include=include, exclude=exclude) + + except ImportError: + # Anywidget is an optional dependency, so warn rather than fail. + warnings.warn( + "Anywidget mode is not available. " + "Please `pip install anywidget traitlets` or `pip install 'bigframes[anywidget]'` to use interactive tables. " + f"Falling back to static HTML. Error: {traceback.format_exc()}" + ) + # Fall back to regular HTML representation + pass + + # Continue with regular HTML rendering for non-anywidget modes + self._cached() + pandas_df, row_count, query_job = self._block.retrieve_repr_request_results( + opts.max_rows + ) + self._set_internal_query_job(query_job) + + pd_series = pandas_df.iloc[:, 0] + + # Use pandas Series _repr_html_ if available, otherwise create basic HTML + try: + html_string = pd_series._repr_html_() + except AttributeError: + # Fallback for pandas versions without _repr_html_ + html_string = f"
{pd_series.to_string()}
" + + text_representation = self._create_text_representation(pandas_df, row_count) + + return {"text/html": html_string, "text/plain": text_representation} + def __repr__(self) -> str: # Protect against errors with uninitialized Series. See: # https://github.com/googleapis/python-bigquery-dataframes/issues/728 @@ -582,24 +684,16 @@ def __repr__(self) -> str: max_results = opts.max_rows # anywdiget mode uses the same display logic as the "deferred" mode # for faster execution - if opts.repr_mode in ("deferred", "anywidget"): + if opts.repr_mode == "deferred": return formatter.repr_query_job(self._compute_dry_run()) self._cached() - pandas_df, _, query_job = self._block.retrieve_repr_request_results(max_results) + pandas_df, row_count, query_job = self._block.retrieve_repr_request_results( + max_results + ) self._set_internal_query_job(query_job) - pd_series = pandas_df.iloc[:, 0] - - import pandas.io.formats - - # safe to mutate this, this dict is owned by this code, and does not affect global config - to_string_kwargs = pandas.io.formats.format.get_series_repr_params() # type: ignore - if len(self._block.index_columns) == 0: - to_string_kwargs.update({"index": False}) - repr_string = pd_series.to_string(**to_string_kwargs) - - return repr_string + return self._create_text_representation(pandas_df, row_count) def astype( self, diff --git a/notebooks/dataframes/anywidget_mode.ipynb b/notebooks/dataframes/anywidget_mode.ipynb index 0ce286ce64..e5d6ac90aa 100644 --- a/notebooks/dataframes/anywidget_mode.ipynb +++ b/notebooks/dataframes/anywidget_mode.ipynb @@ -106,17 +106,17 @@ "name": "stdout", "output_type": "stream", "text": [ - "state gender year name number\n", - " AL F 1910 Lillian 99\n", - " AL F 1910 Ruby 204\n", - " AL F 1910 Helen 76\n", - " AL F 1910 Eunice 41\n", - " AR F 1910 Dora 42\n", - " CA F 1910 Edna 62\n", - " CA F 1910 Helen 239\n", - " CO F 1910 Alice 46\n", - " FL F 1910 Willie 71\n", - " FL F 1910 Thelma 65\n", + "state gender year name number\n", + " AL F 1910 Vera 71\n", + " AR F 1910 Viola 37\n", + " AR F 1910 Alice 57\n", + " AR F 1910 Edna 95\n", + " AR F 1910 Ollie 40\n", + " CA F 1910 Beatrice 37\n", + " CT F 1910 Marion 36\n", + " CT F 1910 Marie 36\n", + " FL F 1910 Alice 53\n", + " GA F 1910 Thelma 133\n", "...\n", "\n", "[5552452 rows x 5 columns]\n" @@ -142,11 +142,64 @@ "id": "42bb02ab", "metadata": {}, "outputs": [ + { + "data": { + "text/html": [ + "✅ Completed. \n", + " Query processed 171.4 MB in 39 seconds of slot time. [Job bigframes-dev:US.bce70478-6a42-48ad-87e2-4cc7dcc24cb7 details]\n", + " " + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "✅ Completed. \n", + " Query processed 88.8 MB in a moment of slot time.\n", + " " + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "✅ Completed. " + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, { "name": "stdout", "output_type": "stream", "text": [ - "Computation deferred. Computation will process 44.4 MB\n" + "1910\n", + "1910\n", + "1910\n", + "1910\n", + "1910\n", + "1910\n", + "1910\n", + "1910\n", + "1910\n", + "1910\n", + "Name: year, Length: 10, dtype: Int64\n", + "...\n", + "\n", + "[5552452 rows]\n" ] } ], @@ -155,6 +208,73 @@ "print(test_series)" ] }, + { + "cell_type": "code", + "execution_count": 6, + "id": "da23e0f3", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "✅ Completed. \n", + " Query processed 88.8 MB in 4 seconds of slot time. [Job bigframes-dev:US.job_pZze_GGj23gUfvIxPHRZOthUQnk- details]\n", + " " + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "✅ Completed. \n", + " Query processed 88.8 MB in 2 seconds of slot time. [Job bigframes-dev:US.job_dAwaewh1LyOlW4uZtYcvy8vqIv35 details]\n", + " " + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "69512bd9ceb74f89bb80ac8962999386", + "version_major": 2, + "version_minor": 1 + }, + "text/plain": [ + "1910\n", + "1910\n", + "1910\n", + "1910\n", + "1910\n", + "1910\n", + "1910\n", + "1910\n", + "1910\n", + "1910\n", + "Name: year, Length: 10, dtype: Int64\n", + "...\n", + "\n", + "[5552452 rows]" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "test_series" + ] + }, { "cell_type": "markdown", "id": "7bcf1bb7", @@ -165,14 +285,16 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 7, "id": "ce250157", "metadata": {}, "outputs": [ { "data": { "text/html": [ - "✅ Completed. " + "✅ Completed. \n", + " Query processed 215.9 MB in 9 seconds of slot time. [Job bigframes-dev:US.job_9r9FBU8_-_d5eViVu8KYTdEJWPQf details]\n", + " " ], "text/plain": [ "" @@ -184,7 +306,9 @@ { "data": { "text/html": [ - "✅ Completed. " + "✅ Completed. \n", + " Query processed 215.9 MB in 6 seconds of slot time. [Job bigframes-dev:US.job_mwuMPvfGAyBIVjlNzhFDiXKw-5P1 details]\n", + " " ], "text/plain": [ "" @@ -196,7 +320,7 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "775e84ca212c4867bb889266b830ae68", + "model_id": "abf9a24044f84e47b2726f673c4b5d89", "version_major": 2, "version_minor": 1 }, @@ -329,7 +453,7 @@ "[5552452 rows x 5 columns]" ] }, - "execution_count": 6, + "execution_count": 7, "metadata": {}, "output_type": "execute_result" } @@ -371,14 +495,16 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 8, "id": "6920d49b", "metadata": {}, "outputs": [ { "data": { "text/html": [ - "✅ Completed. " + "✅ Completed. \n", + " Query processed 215.9 MB in 5 seconds of slot time. [Job bigframes-dev:US.job_Q_tE-b6K1I_8uzC-oov-X8sVZHB5 details]\n", + " " ], "text/plain": [ "" @@ -390,7 +516,9 @@ { "data": { "text/html": [ - "✅ Completed. " + "✅ Completed. \n", + " Query processed 215.9 MB in 5 seconds of slot time. [Job bigframes-dev:US.job_xs4XXSxBz4wcVlYc7pDQqmMZB-J0 details]\n", + " " ], "text/plain": [ "" @@ -409,15 +537,15 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "bf4224f8022042aea6d72507ddb5570b", + "model_id": "37be40847a41401980b30801e4a0dc9c", "version_major": 2, "version_minor": 1 }, "text/plain": [ - "" + "" ] }, - "execution_count": 7, + "execution_count": 8, "metadata": {}, "output_type": "execute_result" } @@ -444,7 +572,7 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 9, "id": "12b68f15", "metadata": {}, "outputs": [ @@ -481,7 +609,7 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 10, "id": "a9d5d13a", "metadata": {}, "outputs": [ @@ -489,7 +617,7 @@ "data": { "text/html": [ "✅ Completed. \n", - " Query processed 171.4 MB in a moment of slot time.\n", + " Query processed 215.9 MB in a moment of slot time.\n", " " ], "text/plain": [ @@ -503,7 +631,7 @@ "data": { "text/html": [ "✅ Completed. \n", - " Query processed 0 Bytes in a moment of slot time.\n", + " Query processed 215.9 MB in a moment of slot time.\n", " " ], "text/plain": [ @@ -523,15 +651,15 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "8d9bfeeba3ca4d11a56dccb28aacde23", + "model_id": "02dbf8257f4b46b19ed221bc8bdf9260", "version_major": 2, "version_minor": 1 }, "text/plain": [ - "" + "" ] }, - "execution_count": 9, + "execution_count": 10, "metadata": {}, "output_type": "execute_result" } @@ -555,7 +683,7 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 11, "id": "added-cell-1", "metadata": {}, "outputs": [ @@ -563,7 +691,7 @@ "data": { "text/html": [ "✅ Completed. \n", - " Query processed 85.9 kB in 13 seconds of slot time.\n", + " Query processed 0 Bytes in a moment of slot time.\n", " " ], "text/plain": [ @@ -624,7 +752,7 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "9fce25a077604e4882144d46d0d4ba45", + "model_id": "af06cffd02064b4d92626a91bb992284", "version_major": 2, "version_minor": 1 }, @@ -667,25 +795,25 @@ " \n", " \n", " 0\n", - " {'application_number': None, 'class_internatio...\n", + " {'application_number': 'DE56', 'class_internat...\n", " gs://gcs-public-data--labeled-patents/espacene...\n", " EU\n", " DE\n", - " 29.08.018\n", - " E04H 6/12\n", + " 03.10.2018\n", + " A01K 31/00\n", " <NA>\n", - " 18157874.1\n", - " 21.02.2018\n", - " 22.02.2017\n", - " Liedtke & Partner Patentanw√§lte\n", - " SHB Hebezeugbau GmbH\n", - " VOLGER, Alexander\n", - " STEUERUNGSSYSTEM F√úR AUTOMATISCHE PARKH√ÑUSER\n", - " EP 3 366 869 A1\n", + " 18171005.4\n", + " 05.02.2015\n", + " 05.02.2014\n", + " Stork Bamberger Patentanw√§lte\n", + " Linco Food Systems A/S\n", + " Thrane, Uffe\n", + " MASTH√ÑHNCHENCONTAINER ALS BESTANDTEIL EINER E...\n", + " EP 3 381 276 A1\n", " \n", " \n", " 1\n", - " {'application_number': None, 'class_internatio...\n", + " {'application_number': 'DE73', 'class_internat...\n", " gs://gcs-public-data--labeled-patents/espacene...\n", " EU\n", " DE\n", @@ -703,25 +831,7 @@ " \n", " \n", " 2\n", - " {'application_number': None, 'class_internatio...\n", - " gs://gcs-public-data--labeled-patents/espacene...\n", - " EU\n", - " DE\n", - " 03.10.2018\n", - " H01L 21/20\n", - " <NA>\n", - " 18166536.5\n", - " 16.02.2016\n", - " <NA>\n", - " Scheider, Sascha et al\n", - " EV Group E. Thallner GmbH\n", - " Kurz, Florian\n", - " VORRICHTUNG ZUM BONDEN VON SUBSTRATEN\n", - " EP 3 382 744 A1\n", - " \n", - " \n", - " 3\n", - " {'application_number': None, 'class_internatio...\n", + " {'application_number': 'DE5', 'class_internati...\n", " gs://gcs-public-data--labeled-patents/espacene...\n", " EU\n", " DE\n", @@ -738,22 +848,40 @@ " EP 3 382 553 A1\n", " \n", " \n", + " 3\n", + " {'application_number': 'DE2', 'class_internati...\n", + " gs://gcs-public-data--labeled-patents/espacene...\n", + " EU\n", + " DE\n", + " 29.08.018\n", + " E04H 6/12\n", + " <NA>\n", + " 18157874.1\n", + " 21.02.2018\n", + " 22.02.2017\n", + " Liedtke & Partner Patentanw√§lte\n", + " SHB Hebezeugbau GmbH\n", + " VOLGER, Alexander\n", + " STEUERUNGSSYSTEM F√úR AUTOMATISCHE PARKH√ÑUSER\n", + " EP 3 366 869 A1\n", + " \n", + " \n", " 4\n", - " {'application_number': None, 'class_internatio...\n", + " {'application_number': 'DE70', 'class_internat...\n", " gs://gcs-public-data--labeled-patents/espacene...\n", " EU\n", " DE\n", " 03.10.2018\n", - " A01K 31/00\n", + " H01L 21/20\n", " <NA>\n", - " 18171005.4\n", - " 05.02.2015\n", - " 05.02.2014\n", - " Stork Bamberger Patentanw√§lte\n", - " Linco Food Systems A/S\n", - " Thrane, Uffe\n", - " MASTH√ÑHNCHENCONTAINER ALS BESTANDTEIL EINER E...\n", - " EP 3 381 276 A1\n", + " 18166536.5\n", + " 16.02.2016\n", + " <NA>\n", + " Scheider, Sascha et al\n", + " EV Group E. Thallner GmbH\n", + " Kurz, Florian\n", + " VORRICHTUNG ZUM BONDEN VON SUBSTRATEN\n", + " EP 3 382 744 A1\n", " \n", " \n", "\n", @@ -762,11 +890,11 @@ ], "text/plain": [ " result \\\n", - "0 {'application_number': None, 'class_internatio... \n", - "1 {'application_number': None, 'class_internatio... \n", - "2 {'application_number': None, 'class_internatio... \n", - "3 {'application_number': None, 'class_internatio... \n", - "4 {'application_number': None, 'class_internatio... \n", + "0 {'application_number': 'DE56', 'class_internat... \n", + "1 {'application_number': 'DE73', 'class_internat... \n", + "2 {'application_number': 'DE5', 'class_internati... \n", + "3 {'application_number': 'DE2', 'class_internati... \n", + "4 {'application_number': 'DE70', 'class_internat... \n", "\n", " gcs_path issuer language \\\n", "0 gs://gcs-public-data--labeled-patents/espacene... EU DE \n", @@ -776,37 +904,37 @@ "4 gs://gcs-public-data--labeled-patents/espacene... EU DE \n", "\n", " publication_date class_international class_us application_number \\\n", - "0 29.08.018 E04H 6/12 18157874.1 \n", + "0 03.10.2018 A01K 31/00 18171005.4 \n", "1 03.10.2018 H05B 6/12 18165514.3 \n", - "2 03.10.2018 H01L 21/20 18166536.5 \n", - "3 03.10.2018 G06F 11/30 18157347.8 \n", - "4 03.10.2018 A01K 31/00 18171005.4 \n", + "2 03.10.2018 G06F 11/30 18157347.8 \n", + "3 29.08.018 E04H 6/12 18157874.1 \n", + "4 03.10.2018 H01L 21/20 18166536.5 \n", "\n", " filing_date priority_date_eu representative_line_1_eu \\\n", - "0 21.02.2018 22.02.2017 Liedtke & Partner Patentanw√§lte \n", + "0 05.02.2015 05.02.2014 Stork Bamberger Patentanw√§lte \n", "1 03.04.2018 30.03.2017 \n", - "2 16.02.2016 Scheider, Sascha et al \n", - "3 19.02.2018 31.03.2017 Hoffmann Eitle \n", - "4 05.02.2015 05.02.2014 Stork Bamberger Patentanw√§lte \n", + "2 19.02.2018 31.03.2017 Hoffmann Eitle \n", + "3 21.02.2018 22.02.2017 Liedtke & Partner Patentanw√§lte \n", + "4 16.02.2016 Scheider, Sascha et al \n", "\n", " applicant_line_1 inventor_line_1 \\\n", - "0 SHB Hebezeugbau GmbH VOLGER, Alexander \n", + "0 Linco Food Systems A/S Thrane, Uffe \n", "1 BSH Hausger√§te GmbH Acero Acero, Jesus \n", - "2 EV Group E. Thallner GmbH Kurz, Florian \n", - "3 FUJITSU LIMITED Kukihara, Kensuke \n", - "4 Linco Food Systems A/S Thrane, Uffe \n", + "2 FUJITSU LIMITED Kukihara, Kensuke \n", + "3 SHB Hebezeugbau GmbH VOLGER, Alexander \n", + "4 EV Group E. Thallner GmbH Kurz, Florian \n", "\n", " title_line_1 number \n", - "0 STEUERUNGSSYSTEM F√úR AUTOMATISCHE PARKH√ÑUSER EP 3 366 869 A1 \n", + "0 MASTH√ÑHNCHENCONTAINER ALS BESTANDTEIL EINER E... EP 3 381 276 A1 \n", "1 VORRICHTUNG ZUR INDUKTIVEN ENERGIE√úBERTRAGUNG EP 3 383 141 A2 \n", - "2 VORRICHTUNG ZUM BONDEN VON SUBSTRATEN EP 3 382 744 A1 \n", - "3 METHOD EXECUTED BY A COMPUTER, INFORMATION PRO... EP 3 382 553 A1 \n", - "4 MASTH√ÑHNCHENCONTAINER ALS BESTANDTEIL EINER E... EP 3 381 276 A1 \n", + "2 METHOD EXECUTED BY A COMPUTER, INFORMATION PRO... EP 3 382 553 A1 \n", + "3 STEUERUNGSSYSTEM F√úR AUTOMATISCHE PARKH√ÑUSER EP 3 366 869 A1 \n", + "4 VORRICHTUNG ZUM BONDEN VON SUBSTRATEN EP 3 382 744 A1 \n", "\n", "[5 rows x 15 columns]" ] }, - "execution_count": 10, + "execution_count": 11, "metadata": {}, "output_type": "execute_result" } @@ -823,6 +951,294 @@ " LIMIT 5;\n", "\"\"\")" ] + }, + { + "cell_type": "markdown", + "id": "e89b4784", + "metadata": {}, + "source": [ + "### Displaying Nested Data (STRUCTs and ARRAYs)\n", + "BigQuery DataFrames automatically flattens nested STRUCT and ARRAY columns into separate, more manageable columns when displayed in `anywidget` mode. This approach simplifies interaction and readability, as it avoids deeply nested or collapsible elements.\n", + "\n", + "This flattening ensures that all data is directly visible and sortable, enhancing the interactive table experience.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "id": "4fcebbd4", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "✅ Completed. \n", + " Query processed 0 Bytes in a moment of slot time.\n", + " " + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "✅ Completed. " + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "✅ Completed. " + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/usr/local/google/home/shuowei/src/python-bigquery-dataframes/bigframes/display/html.py:259: FutureWarning: errors='ignore' is deprecated and will raise in a future version. Use to_numeric without passing `errors` and catch exceptions explicitly instead\n", + " exploded_df[col] = pd.to_numeric(\n", + "/usr/local/google/home/shuowei/src/python-bigquery-dataframes/bigframes/display/html.py:259: FutureWarning: errors='ignore' is deprecated and will raise in a future version. Use to_numeric without passing `errors` and catch exceptions explicitly instead\n", + " exploded_df[col] = pd.to_numeric(\n" + ] + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "41a82986b609418799d64d6cc6d8cede", + "version_major": 2, + "version_minor": 1 + }, + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
idstruct_colarray_colnested_struct_array
01{'name': 'Alice', 'age': 30}[10 20 30][{'item': 'A', 'value': 100} {'item': 'B', 'va...
12{'name': 'Bob', 'age': 25}[40 50][{'item': 'C', 'value': 300}]
23{'name': 'Charlie', 'age': 35}[60 70 80][{'item': 'D', 'value': 400} {'item': 'E', 'va...
34{'name': 'David', 'age': 40}[ 90 100 110][{'item': 'F', 'value': 600} {'item': 'G', 'va...
45{'name': 'Eve', 'age': 45}[120 130 140][{'item': 'H', 'value': 800} {'item': 'I', 'va...
56{'name': 'Frank', 'age': 50}[150 160 170][{'item': 'J', 'value': 1000} {'item': 'K', 'v...
67{'name': 'Grace', 'age': 55}[180 190][{'item': 'L', 'value': 1200}]
78{'name': 'Heidi', 'age': 60}[200 210 220][{'item': 'M', 'value': 1300} {'item': 'N', 'v...
89{'name': 'Ivan', 'age': 65}[230 240 250 260][{'item': 'O', 'value': 1500} {'item': 'P', 'v...
910{'name': 'Judy', 'age': 70}[270 280][{'item': 'Q', 'value': 1700}]
\n", + "

10 rows × 4 columns

\n", + "
[12 rows x 4 columns in total]" + ], + "text/plain": [ + " id struct_col array_col \\\n", + "0 1 {'name': 'Alice', 'age': 30} [10 20 30] \n", + "1 2 {'name': 'Bob', 'age': 25} [40 50] \n", + "2 3 {'name': 'Charlie', 'age': 35} [60 70 80] \n", + "3 4 {'name': 'David', 'age': 40} [ 90 100 110] \n", + "4 5 {'name': 'Eve', 'age': 45} [120 130 140] \n", + "5 6 {'name': 'Frank', 'age': 50} [150 160 170] \n", + "6 7 {'name': 'Grace', 'age': 55} [180 190] \n", + "7 8 {'name': 'Heidi', 'age': 60} [200 210 220] \n", + "8 9 {'name': 'Ivan', 'age': 65} [230 240 250 260] \n", + "9 10 {'name': 'Judy', 'age': 70} [270 280] \n", + "\n", + " nested_struct_array \n", + "0 [{'item': 'A', 'value': 100} {'item': 'B', 'va... \n", + "1 [{'item': 'C', 'value': 300}] \n", + "2 [{'item': 'D', 'value': 400} {'item': 'E', 'va... \n", + "3 [{'item': 'F', 'value': 600} {'item': 'G', 'va... \n", + "4 [{'item': 'H', 'value': 800} {'item': 'I', 'va... \n", + "5 [{'item': 'J', 'value': 1000} {'item': 'K', 'v... \n", + "6 [{'item': 'L', 'value': 1200}] \n", + "7 [{'item': 'M', 'value': 1300} {'item': 'N', 'v... \n", + "8 [{'item': 'O', 'value': 1500} {'item': 'P', 'v... \n", + "9 [{'item': 'Q', 'value': 1700}] \n", + "...\n", + "\n", + "[12 rows x 4 columns]" + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "sql_nested_data = \"\"\"\n", + "SELECT\n", + " 1 AS id,\n", + " STRUCT('Alice' AS name, 30 AS age) AS struct_col,\n", + " [10, 20, 30] AS array_col,\n", + " [STRUCT('A' AS item, 100 AS value), STRUCT('B' AS item, 200 AS value)] AS nested_struct_array\n", + "UNION ALL\n", + "SELECT\n", + " 2 AS id,\n", + " STRUCT('Bob' AS name, 25 AS age) AS struct_col,\n", + " [40, 50] AS array_col,\n", + " [STRUCT('C' AS item, 300 AS value)] AS nested_struct_array\n", + "UNION ALL\n", + "SELECT\n", + " 3 AS id,\n", + " STRUCT('Charlie' AS name, 35 AS age) AS struct_col,\n", + " [60, 70, 80] AS array_col,\n", + " [STRUCT('D' AS item, 400 AS value), STRUCT('E' AS item, 500 AS value)] AS nested_struct_array\n", + "UNION ALL\n", + "SELECT\n", + " 4 AS id,\n", + " STRUCT('David' AS name, 40 AS age) AS struct_col,\n", + " [90, 100, 110] AS array_col,\n", + " [STRUCT('F' AS item, 600 AS value), STRUCT('G' AS item, 700 AS value)] AS nested_struct_array\n", + "UNION ALL\n", + "SELECT\n", + " 5 AS id,\n", + " STRUCT('Eve' AS name, 45 AS age) AS struct_col,\n", + " [120, 130, 140] AS array_col,\n", + " [STRUCT('H' AS item, 800 AS value), STRUCT('I' AS item, 900 AS value)] AS nested_struct_array\n", + "UNION ALL\n", + "SELECT\n", + " 6 AS id,\n", + " STRUCT('Frank' AS name, 50 AS age) AS struct_col,\n", + " [150, 160, 170] AS array_col,\n", + " [STRUCT('J' AS item, 1000 AS value), STRUCT('K' AS item, 1100 AS value)] AS nested_struct_array\n", + "UNION ALL\n", + "SELECT\n", + " 7 AS id,\n", + " STRUCT('Grace' AS name, 55 AS age) AS struct_col,\n", + " [180, 190] AS array_col,\n", + " [STRUCT('L' AS item, 1200 AS value)] AS nested_struct_array\n", + "UNION ALL\n", + "SELECT\n", + " 8 AS id,\n", + " STRUCT('Heidi' AS name, 60 AS age) AS struct_col,\n", + " [200, 210, 220] AS array_col,\n", + " [STRUCT('M' AS item, 1300 AS value), STRUCT('N' AS item, 1400 AS value)] AS nested_struct_array\n", + "UNION ALL\n", + "SELECT\n", + " 9 AS id,\n", + " STRUCT('Ivan' AS name, 65 AS age) AS struct_col,\n", + " [230, 240, 250, 260] AS array_col,\n", + " [STRUCT('O' AS item, 1500 AS value), STRUCT('P' AS item, 1600 AS value)] AS nested_struct_array\n", + "UNION ALL\n", + "SELECT\n", + " 10 AS id,\n", + " STRUCT('Judy' AS name, 70 AS age) AS struct_col,\n", + " [270, 280] AS array_col,\n", + " [STRUCT('Q' AS item, 1700 AS value)] AS nested_struct_array\n", + "UNION ALL\n", + "SELECT\n", + " 11 AS id,\n", + " STRUCT('Kevin' AS name, 75 AS age) AS struct_col,\n", + " [290, 300, 310] AS array_col,\n", + " [STRUCT('R' AS item, 1800 AS value), STRUCT('S' AS item, 1900 AS value), STRUCT('T' AS item, 2000 AS value), STRUCT('U' AS item, 2100 AS value)] AS nested_struct_array\n", + "UNION ALL\n", + "SELECT\n", + " 12 AS id,\n", + " STRUCT('Laura' AS name, 80 AS age) AS struct_col,\n", + " [320] AS array_col,\n", + " [STRUCT('V' AS item, 2200 AS value), STRUCT('W' AS item, 2300 AS value), STRUCT('X' AS item, 2400 AS value)] AS nested_struct_array\n", + "\"\"\"\n", + "\n", + "df_from_sql = bpd.read_gbq(sql_nested_data)\n", + "\n", + "# Display this DataFrame. The nested fields will be rendered as flattened elements.\n", + "df_from_sql" + ] } ], "metadata": { diff --git a/tests/js/package-lock.json b/tests/js/package-lock.json index 8a562a11ea..5526e0581e 100644 --- a/tests/js/package-lock.json +++ b/tests/js/package-lock.json @@ -10,11 +10,19 @@ "license": "ISC", "devDependencies": { "@babel/preset-env": "^7.24.7", + "@testing-library/jest-dom": "^6.4.6", "jest": "^29.7.0", "jest-environment-jsdom": "^29.7.0", "jsdom": "^24.1.0" } }, + "node_modules/@adobe/css-tools": { + "version": "4.4.4", + "resolved": "https://registry.npmjs.org/@adobe/css-tools/-/css-tools-4.4.4.tgz", + "integrity": "sha512-Elp+iwUx5rN5+Y8xLt5/GRoG20WGoDCQ/1Fb+1LiGtvwbDavuSk0jhD/eZdckHAuzcDzccnkv+rEjyWfRx18gg==", + "dev": true, + "license": "MIT" + }, "node_modules/@asamuzakjp/css-color": { "version": "3.2.0", "resolved": "https://registry.npmjs.org/@asamuzakjp/css-color/-/css-color-3.2.0.tgz", @@ -2453,6 +2461,26 @@ "@sinonjs/commons": "^3.0.0" } }, + "node_modules/@testing-library/jest-dom": { + "version": "6.9.1", + "resolved": "https://registry.npmjs.org/@testing-library/jest-dom/-/jest-dom-6.9.1.tgz", + "integrity": "sha512-zIcONa+hVtVSSep9UT3jZ5rizo2BsxgyDYU7WFD5eICBE7no3881HGeb/QkGfsJs6JTkY1aQhT7rIPC7e+0nnA==", + "dev": true, + "license": "MIT", + "dependencies": { + "@adobe/css-tools": "^4.4.0", + "aria-query": "^5.0.0", + "css.escape": "^1.5.1", + "dom-accessibility-api": "^0.6.3", + "picocolors": "^1.1.1", + "redent": "^3.0.0" + }, + "engines": { + "node": ">=14", + "npm": ">=6", + "yarn": ">=1" + } + }, "node_modules/@tootallnate/once": { "version": "2.0.0", "resolved": "https://registry.npmjs.org/@tootallnate/once/-/once-2.0.0.tgz", @@ -2706,6 +2734,16 @@ "sprintf-js": "~1.0.2" } }, + "node_modules/aria-query": { + "version": "5.3.2", + "resolved": "https://registry.npmjs.org/aria-query/-/aria-query-5.3.2.tgz", + "integrity": "sha512-COROpnaoap1E2F000S62r6A60uHZnmlvomhfyT2DlTcrY1OrBKn2UhH7qn5wTC9zMvD0AY7csdPSNwKP+7WiQw==", + "dev": true, + "license": "Apache-2.0", + "engines": { + "node": ">= 0.4" + } + }, "node_modules/asynckit": { "version": "0.4.0", "resolved": "https://registry.npmjs.org/asynckit/-/asynckit-0.4.0.tgz", @@ -3306,6 +3344,13 @@ "node": ">= 8" } }, + "node_modules/css.escape": { + "version": "1.5.1", + "resolved": "https://registry.npmjs.org/css.escape/-/css.escape-1.5.1.tgz", + "integrity": "sha512-YUifsXXuknHlUsmlgyY0PKzgPOr7/FjCePfHNt0jxm83wHZi44VDMQ7/fGNkjY3/jV1MC+1CmZbaHzugyeRtpg==", + "dev": true, + "license": "MIT" + }, "node_modules/cssom": { "version": "0.5.0", "resolved": "https://registry.npmjs.org/cssom/-/cssom-0.5.0.tgz", @@ -3428,6 +3473,13 @@ "node": "^14.15.0 || ^16.10.0 || >=18.0.0" } }, + "node_modules/dom-accessibility-api": { + "version": "0.6.3", + "resolved": "https://registry.npmjs.org/dom-accessibility-api/-/dom-accessibility-api-0.6.3.tgz", + "integrity": "sha512-7ZgogeTnjuHbo+ct10G9Ffp0mif17idi0IyWNVA/wcwcm7NPOD/WEHVP3n7n3MhXqxoIYm8d6MuZohYWIZ4T3w==", + "dev": true, + "license": "MIT" + }, "node_modules/domexception": { "version": "4.0.0", "resolved": "https://registry.npmjs.org/domexception/-/domexception-4.0.0.tgz", @@ -4020,6 +4072,16 @@ "node": ">=0.8.19" } }, + "node_modules/indent-string": { + "version": "4.0.0", + "resolved": "https://registry.npmjs.org/indent-string/-/indent-string-4.0.0.tgz", + "integrity": "sha512-EdDDZu4A2OyIK7Lr/2zG+w5jmbuk1DVBnEwREQvBzspBJkCEbRa8GxU1lghYcaGJCnRWibjDXlq779X1/y5xwg==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">=8" + } + }, "node_modules/inflight": { "version": "1.0.6", "resolved": "https://registry.npmjs.org/inflight/-/inflight-1.0.6.tgz", @@ -5321,6 +5383,16 @@ "node": ">=6" } }, + "node_modules/min-indent": { + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/min-indent/-/min-indent-1.0.1.tgz", + "integrity": "sha512-I9jwMn07Sy/IwOj3zVkVik2JTvgpaykDZEigL6Rx6N9LbMywwUSMtxET+7lVoDLLd3O3IXwJwvuuns8UB/HeAg==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">=4" + } + }, "node_modules/ms": { "version": "2.1.3", "resolved": "https://registry.npmjs.org/ms/-/ms-2.1.3.tgz", @@ -5655,6 +5727,20 @@ "dev": true, "license": "MIT" }, + "node_modules/redent": { + "version": "3.0.0", + "resolved": "https://registry.npmjs.org/redent/-/redent-3.0.0.tgz", + "integrity": "sha512-6tDA8g98We0zd0GvVeMT9arEOnTw9qM03L9cJXaCjrip1OO764RDBLBfrB4cwzNGDj5OA5ioymC9GkizgWJDUg==", + "dev": true, + "license": "MIT", + "dependencies": { + "indent-string": "^4.0.0", + "strip-indent": "^3.0.0" + }, + "engines": { + "node": ">=8" + } + }, "node_modules/regenerate": { "version": "1.4.2", "resolved": "https://registry.npmjs.org/regenerate/-/regenerate-1.4.2.tgz", @@ -5972,6 +6058,19 @@ "node": ">=6" } }, + "node_modules/strip-indent": { + "version": "3.0.0", + "resolved": "https://registry.npmjs.org/strip-indent/-/strip-indent-3.0.0.tgz", + "integrity": "sha512-laJTa3Jb+VQpaC6DseHhF7dXVqHTfJPCRDaEbid/drOhgitgYku/letMUqOXFoWV0zIIUbjpdH2t+tYj4bQMRQ==", + "dev": true, + "license": "MIT", + "dependencies": { + "min-indent": "^1.0.0" + }, + "engines": { + "node": ">=8" + } + }, "node_modules/strip-json-comments": { "version": "3.1.1", "resolved": "https://registry.npmjs.org/strip-json-comments/-/strip-json-comments-3.1.1.tgz", diff --git a/tests/js/package.json b/tests/js/package.json index 8de4b4747c..d34c5a065a 100644 --- a/tests/js/package.json +++ b/tests/js/package.json @@ -14,6 +14,7 @@ "@babel/preset-env": "^7.24.7", "jest": "^29.7.0", "jest-environment-jsdom": "^29.7.0", + "@testing-library/jest-dom": "^6.4.6", "jsdom": "^24.1.0" } } diff --git a/tests/system/small/test_anywidget.py b/tests/system/small/test_anywidget.py index b0eeb4a3c2..db40ec8ee8 100644 --- a/tests/system/small/test_anywidget.py +++ b/tests/system/small/test_anywidget.py @@ -201,6 +201,7 @@ def _assert_html_matches_pandas_slice( def test_widget_initialization_should_calculate_total_row_count( paginated_bf_df: bf.dataframe.DataFrame, ): + """Test that a TableWidget calculates the total row count on creation.""" """A TableWidget should correctly calculate the total row count on creation.""" from bigframes.display import TableWidget @@ -313,9 +314,7 @@ def test_widget_pagination_should_work_with_custom_page_size( start_row: int, end_row: int, ): - """ - A widget should paginate correctly with a custom page size of 3. - """ + """Test that a widget paginates correctly with a custom page size.""" with bigframes.option_context( "display.repr_mode", "anywidget", "display.max_rows", 3 ): @@ -943,23 +942,11 @@ def test_repr_mimebundle_should_return_widget_view_if_anywidget_is_available( assert "text/plain" in data -def test_repr_in_anywidget_mode_should_not_be_deferred( - paginated_bf_df: bf.dataframe.DataFrame, -): - """ - Test that repr(df) is not deferred in anywidget mode. - This is to ensure that print(df) works as expected. - """ - with bigframes.option_context("display.repr_mode", "anywidget"): - representation = repr(paginated_bf_df) - assert "Computation deferred" not in representation - assert "page_1_row_1" in representation - - -def test_dataframe_repr_mimebundle_anywidget_with_metadata( +def test_dataframe_repr_mimebundle_should_return_widget_with_metadata_in_anywidget_mode( monkeypatch: pytest.MonkeyPatch, session: bigframes.Session, # Add session as a fixture ): + """Test that _repr_mimebundle_ returns a widget view with metadata when anywidget is available.""" with bigframes.option_context("display.repr_mode", "anywidget"): # Create a real DataFrame object (or a mock that behaves like one minimally) # for _repr_mimebundle_ to operate on. @@ -1135,3 +1122,41 @@ def test_widget_with_custom_index_matches_pandas_output( # TODO(b/438181139): Add tests for custom multiindex # This may not be necessary for the SQL Cell use case but should be # considered for completeness. + + +def test_series_anywidget_integration_with_notebook_display( + paginated_bf_df: bf.dataframe.DataFrame, +): + """Test Series display integration in Jupyter-like environment.""" + pytest.importorskip("anywidget") + + with bf.option_context("display.repr_mode", "anywidget"): + series = paginated_bf_df["value"] + + # Test the full display pipeline + from IPython.display import display as ipython_display + + # This should work without errors + ipython_display(series) + + +def test_series_different_data_types_anywidget(session: bf.Session): + """Test Series with different data types in anywidget mode.""" + pytest.importorskip("anywidget") + + # Create Series with different types + test_data = pd.DataFrame( + { + "string_col": ["a", "b", "c"], + "int_col": [1, 2, 3], + "float_col": [1.1, 2.2, 3.3], + "bool_col": [True, False, True], + } + ) + bf_df = session.read_pandas(test_data) + + with bf.option_context("display.repr_mode", "anywidget"): + for col_name in test_data.columns: + series = bf_df[col_name] + widget = bigframes.display.TableWidget(series.to_frame()) + assert widget.row_count == 3 From 9e3163fe6a350141d7c5982defcec34560f8e38d Mon Sep 17 00:00:00 2001 From: Shuowei Li Date: Fri, 19 Dec 2025 01:05:15 +0000 Subject: [PATCH 02/25] test: add more npm tests --- tests/js/series_widget.test.js | 97 ++++++++++++++++++++++++++++++++++ 1 file changed, 97 insertions(+) create mode 100644 tests/js/series_widget.test.js diff --git a/tests/js/series_widget.test.js b/tests/js/series_widget.test.js new file mode 100644 index 0000000000..3af2a14b52 --- /dev/null +++ b/tests/js/series_widget.test.js @@ -0,0 +1,97 @@ +/** + * @jest-environment jsdom + */ + +// Copyright 2025 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +import { jest } from "@jest/globals"; +import "@testing-library/jest-dom"; + +describe("SeriesWidget", () => { + let model; + let el; + let render; + + beforeEach(async () => { + jest.resetModules(); + document.body.innerHTML = "
"; + el = document.body.querySelector("div"); + + const tableWidget = ( + await import("../../bigframes/display/table_widget.js") + ).default; + render = tableWidget.render; + + model = { + get: jest.fn(), + set: jest.fn(), + save_changes: jest.fn(), + on: jest.fn(), + }; + }); + + it("should render the series as a table with an index and one value column", () => { + // Mock the initial state + model.get.mockImplementation((property) => { + if (property === "table_html") { + return ` +
+
+ + + + + + + + + + + + + + + + + +
value
0a
1b
+
+
`; + } + if (property === "orderable_columns") { + return []; + } + return null; + }); + + render({ model, el }); + + // Manually trigger the table_html change handler + const tableHtmlChangeHandler = model.on.mock.calls.find( + (call) => call[0] === "change:table_html", + )[1]; + tableHtmlChangeHandler(); + + // Check that the table has two columns + const headers = el.querySelectorAll( + ".paginated-table-container .col-header-name", + ); + expect(headers).toHaveLength(2); + + // Check that the headers are an empty string (for the index) and "value" + expect(headers[0].textContent).toBe(""); + expect(headers[1].textContent).toBe("value"); + }); +}); From 3227b23a6438424326029566b90a1e952ec1f3bc Mon Sep 17 00:00:00 2001 From: Shuowei Li Date: Fri, 19 Dec 2025 01:07:33 +0000 Subject: [PATCH 03/25] test: add this file for faster and reliable npm tests --- package-lock.json | 6 ++++++ 1 file changed, 6 insertions(+) create mode 100644 package-lock.json diff --git a/package-lock.json b/package-lock.json new file mode 100644 index 0000000000..064bdaf362 --- /dev/null +++ b/package-lock.json @@ -0,0 +1,6 @@ +{ + "name": "python-bigquery-dataframes", + "lockfileVersion": 3, + "requires": true, + "packages": {} +} From c1a8f839f61e88ac2b88175f1372502967805a9c Mon Sep 17 00:00:00 2001 From: Shuowei Li Date: Fri, 19 Dec 2025 02:28:37 +0000 Subject: [PATCH 04/25] docs: notebook update --- notebooks/dataframes/anywidget_mode.ipynb | 544 +++++----------------- 1 file changed, 128 insertions(+), 416 deletions(-) diff --git a/notebooks/dataframes/anywidget_mode.ipynb b/notebooks/dataframes/anywidget_mode.ipynb index e5d6ac90aa..e4f1d9411d 100644 --- a/notebooks/dataframes/anywidget_mode.ipynb +++ b/notebooks/dataframes/anywidget_mode.ipynb @@ -106,17 +106,17 @@ "name": "stdout", "output_type": "stream", "text": [ - "state gender year name number\n", - " AL F 1910 Vera 71\n", - " AR F 1910 Viola 37\n", - " AR F 1910 Alice 57\n", - " AR F 1910 Edna 95\n", - " AR F 1910 Ollie 40\n", - " CA F 1910 Beatrice 37\n", - " CT F 1910 Marion 36\n", - " CT F 1910 Marie 36\n", - " FL F 1910 Alice 53\n", - " GA F 1910 Thelma 133\n", + "state gender year name number\n", + " AL F 1910 Cora 61\n", + " AL F 1910 Anna 74\n", + " AR F 1910 Willie 132\n", + " CO F 1910 Anna 42\n", + " FL F 1910 Louise 70\n", + " GA F 1910 Catherine 57\n", + " IL F 1910 Jessie 43\n", + " IN F 1910 Anna 100\n", + " IN F 1910 Pauline 77\n", + " IN F 1910 Beulah 39\n", "...\n", "\n", "[5552452 rows x 5 columns]\n" @@ -146,7 +146,7 @@ "data": { "text/html": [ "✅ Completed. \n", - " Query processed 171.4 MB in 39 seconds of slot time. [Job bigframes-dev:US.bce70478-6a42-48ad-87e2-4cc7dcc24cb7 details]\n", + " Query processed 171.4 MB in 30 seconds of slot time. [Job bigframes-dev:US.a5deda5a-cf92-475e-9596-4f92c7f749b6 details]\n", " " ], "text/plain": [ @@ -218,7 +218,7 @@ "data": { "text/html": [ "✅ Completed. \n", - " Query processed 88.8 MB in 4 seconds of slot time. [Job bigframes-dev:US.job_pZze_GGj23gUfvIxPHRZOthUQnk- details]\n", + " Query processed 88.8 MB in 2 seconds of slot time. [Job bigframes-dev:US.job_kS5Cpa0qjt2oaPbsj3Rd5Gsmk5Zf details]\n", " " ], "text/plain": [ @@ -232,7 +232,7 @@ "data": { "text/html": [ "✅ Completed. \n", - " Query processed 88.8 MB in 2 seconds of slot time. [Job bigframes-dev:US.job_dAwaewh1LyOlW4uZtYcvy8vqIv35 details]\n", + " Query processed 88.8 MB in 2 seconds of slot time. [Job bigframes-dev:US.job_QyhgBuHo8HQ-wxsovkW1eWZyATdo details]\n", " " ], "text/plain": [ @@ -245,7 +245,7 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "69512bd9ceb74f89bb80ac8962999386", + "model_id": "3971aa58b8b94313a6c5848c033eae14", "version_major": 2, "version_minor": 1 }, @@ -293,7 +293,7 @@ "data": { "text/html": [ "✅ Completed. \n", - " Query processed 215.9 MB in 9 seconds of slot time. [Job bigframes-dev:US.job_9r9FBU8_-_d5eViVu8KYTdEJWPQf details]\n", + " Query processed 215.9 MB in 8 seconds of slot time. [Job bigframes-dev:US.job_GNgZ21dOBLPqnQdcCRs_kxWHZnLO details]\n", " " ], "text/plain": [ @@ -307,7 +307,7 @@ "data": { "text/html": [ "✅ Completed. \n", - " Query processed 215.9 MB in 6 seconds of slot time. [Job bigframes-dev:US.job_mwuMPvfGAyBIVjlNzhFDiXKw-5P1 details]\n", + " Query processed 215.9 MB in 7 seconds of slot time. [Job bigframes-dev:US.job_SPvbV-t1egpXI-4ABmqjxJjY5FyA details]\n", " " ], "text/plain": [ @@ -320,7 +320,7 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "abf9a24044f84e47b2726f673c4b5d89", + "model_id": "7c726c80061d4ac39cc1dc6151e5c269", "version_major": 2, "version_minor": 1 }, @@ -356,79 +356,79 @@ " AL\n", " F\n", " 1910\n", - " Cora\n", - " 61\n", + " Hazel\n", + " 51\n", " \n", " \n", " 1\n", " AL\n", " F\n", " 1910\n", - " Anna\n", - " 74\n", + " Lucy\n", + " 76\n", " \n", " \n", " 2\n", " AR\n", " F\n", " 1910\n", - " Willie\n", - " 132\n", + " Nellie\n", + " 39\n", " \n", " \n", " 3\n", - " CO\n", + " AR\n", " F\n", " 1910\n", - " Anna\n", - " 42\n", + " Lena\n", + " 40\n", " \n", " \n", " 4\n", - " FL\n", + " CO\n", " F\n", " 1910\n", - " Louise\n", - " 70\n", + " Thelma\n", + " 36\n", " \n", " \n", " 5\n", - " GA\n", + " CO\n", " F\n", " 1910\n", - " Catherine\n", - " 57\n", + " Ruth\n", + " 68\n", " \n", " \n", " 6\n", - " IL\n", + " CT\n", " F\n", " 1910\n", - " Jessie\n", - " 43\n", + " Elizabeth\n", + " 86\n", " \n", " \n", " 7\n", - " IN\n", + " DC\n", " F\n", " 1910\n", - " Anna\n", - " 100\n", + " Mary\n", + " 80\n", " \n", " \n", " 8\n", - " IN\n", + " FL\n", " F\n", " 1910\n", - " Pauline\n", - " 77\n", + " Annie\n", + " 101\n", " \n", " \n", " 9\n", - " IN\n", + " FL\n", " F\n", " 1910\n", - " Beulah\n", + " Alma\n", " 39\n", " \n", " \n", @@ -438,16 +438,16 @@ ], "text/plain": [ "state gender year name number\n", - " AL F 1910 Cora 61\n", - " AL F 1910 Anna 74\n", - " AR F 1910 Willie 132\n", - " CO F 1910 Anna 42\n", - " FL F 1910 Louise 70\n", - " GA F 1910 Catherine 57\n", - " IL F 1910 Jessie 43\n", - " IN F 1910 Anna 100\n", - " IN F 1910 Pauline 77\n", - " IN F 1910 Beulah 39\n", + " AL F 1910 Hazel 51\n", + " AL F 1910 Lucy 76\n", + " AR F 1910 Nellie 39\n", + " AR F 1910 Lena 40\n", + " CO F 1910 Thelma 36\n", + " CO F 1910 Ruth 68\n", + " CT F 1910 Elizabeth 86\n", + " DC F 1910 Mary 80\n", + " FL F 1910 Annie 101\n", + " FL F 1910 Alma 39\n", "...\n", "\n", "[5552452 rows x 5 columns]" @@ -503,7 +503,7 @@ "data": { "text/html": [ "✅ Completed. \n", - " Query processed 215.9 MB in 5 seconds of slot time. [Job bigframes-dev:US.job_Q_tE-b6K1I_8uzC-oov-X8sVZHB5 details]\n", + " Query processed 215.9 MB in 6 seconds of slot time. [Job bigframes-dev:US.job_NO_gGB-KLVechm-gl9tIvEmtxpo7 details]\n", " " ], "text/plain": [ @@ -517,7 +517,7 @@ "data": { "text/html": [ "✅ Completed. \n", - " Query processed 215.9 MB in 5 seconds of slot time. [Job bigframes-dev:US.job_xs4XXSxBz4wcVlYc7pDQqmMZB-J0 details]\n", + " Query processed 215.9 MB in 9 seconds of slot time. [Job bigframes-dev:US.job_eUF7XO9UzQZwPzxy0w22_hbZGv3g details]\n", " " ], "text/plain": [ @@ -537,12 +537,12 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "37be40847a41401980b30801e4a0dc9c", + "model_id": "91a96e79860a4dcea06513f14f33c458", "version_major": 2, "version_minor": 1 }, "text/plain": [ - "" + "" ] }, "execution_count": 8, @@ -651,12 +651,12 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "02dbf8257f4b46b19ed221bc8bdf9260", + "model_id": "8926468cb2034aebb348b17d3f9ba6f6", "version_major": 2, "version_minor": 1 }, "text/plain": [ - "" + "" ] }, "execution_count": 10, @@ -691,7 +691,7 @@ "data": { "text/html": [ "✅ Completed. \n", - " Query processed 0 Bytes in a moment of slot time.\n", + " Query processed 85.9 kB in 19 seconds of slot time.\n", " " ], "text/plain": [ @@ -752,7 +752,7 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "af06cffd02064b4d92626a91bb992284", + "model_id": "d774ed63f08941b8bf11ec1087609f17", "version_major": 2, "version_minor": 1 }, @@ -795,25 +795,7 @@ " \n", " \n", " 0\n", - " {'application_number': 'DE56', 'class_internat...\n", - " gs://gcs-public-data--labeled-patents/espacene...\n", - " EU\n", - " DE\n", - " 03.10.2018\n", - " A01K 31/00\n", - " <NA>\n", - " 18171005.4\n", - " 05.02.2015\n", - " 05.02.2014\n", - " Stork Bamberger Patentanw√§lte\n", - " Linco Food Systems A/S\n", - " Thrane, Uffe\n", - " MASTH√ÑHNCHENCONTAINER ALS BESTANDTEIL EINER E...\n", - " EP 3 381 276 A1\n", - " \n", - " \n", - " 1\n", - " {'application_number': 'DE73', 'class_internat...\n", + " {'application_number': None, 'class_internatio...\n", " gs://gcs-public-data--labeled-patents/espacene...\n", " EU\n", " DE\n", @@ -830,26 +812,8 @@ " EP 3 383 141 A2\n", " \n", " \n", - " 2\n", - " {'application_number': 'DE5', 'class_internati...\n", - " gs://gcs-public-data--labeled-patents/espacene...\n", - " EU\n", - " DE\n", - " 03.10.2018\n", - " G06F 11/30\n", - " <NA>\n", - " 18157347.8\n", - " 19.02.2018\n", - " 31.03.2017\n", - " Hoffmann Eitle\n", - " FUJITSU LIMITED\n", - " Kukihara, Kensuke\n", - " METHOD EXECUTED BY A COMPUTER, INFORMATION PRO...\n", - " EP 3 382 553 A1\n", - " \n", - " \n", - " 3\n", - " {'application_number': 'DE2', 'class_internati...\n", + " 1\n", + " {'application_number': None, 'class_internatio...\n", " gs://gcs-public-data--labeled-patents/espacene...\n", " EU\n", " DE\n", @@ -866,8 +830,8 @@ " EP 3 366 869 A1\n", " \n", " \n", - " 4\n", - " {'application_number': 'DE70', 'class_internat...\n", + " 2\n", + " {'application_number': None, 'class_internatio...\n", " gs://gcs-public-data--labeled-patents/espacene...\n", " EU\n", " DE\n", @@ -883,6 +847,42 @@ " VORRICHTUNG ZUM BONDEN VON SUBSTRATEN\n", " EP 3 382 744 A1\n", " \n", + " \n", + " 3\n", + " {'application_number': None, 'class_internatio...\n", + " gs://gcs-public-data--labeled-patents/espacene...\n", + " EU\n", + " DE\n", + " 03.10.2018\n", + " G06F 11/30\n", + " <NA>\n", + " 18157347.8\n", + " 19.02.2018\n", + " 31.03.2017\n", + " Hoffmann Eitle\n", + " FUJITSU LIMITED\n", + " Kukihara, Kensuke\n", + " METHOD EXECUTED BY A COMPUTER, INFORMATION PRO...\n", + " EP 3 382 553 A1\n", + " \n", + " \n", + " 4\n", + " {'application_number': None, 'class_internatio...\n", + " gs://gcs-public-data--labeled-patents/espacene...\n", + " EU\n", + " DE\n", + " 03.10.2018\n", + " A01K 31/00\n", + " <NA>\n", + " 18171005.4\n", + " 05.02.2015\n", + " 05.02.2014\n", + " Stork Bamberger Patentanw√§lte\n", + " Linco Food Systems A/S\n", + " Thrane, Uffe\n", + " MASTH√ÑHNCHENCONTAINER ALS BESTANDTEIL EINER E...\n", + " EP 3 381 276 A1\n", + " \n", " \n", "\n", "

5 rows × 15 columns

\n", @@ -890,11 +890,11 @@ ], "text/plain": [ " result \\\n", - "0 {'application_number': 'DE56', 'class_internat... \n", - "1 {'application_number': 'DE73', 'class_internat... \n", - "2 {'application_number': 'DE5', 'class_internati... \n", - "3 {'application_number': 'DE2', 'class_internati... \n", - "4 {'application_number': 'DE70', 'class_internat... \n", + "0 {'application_number': None, 'class_internatio... \n", + "1 {'application_number': None, 'class_internatio... \n", + "2 {'application_number': None, 'class_internatio... \n", + "3 {'application_number': None, 'class_internatio... \n", + "4 {'application_number': None, 'class_internatio... \n", "\n", " gcs_path issuer language \\\n", "0 gs://gcs-public-data--labeled-patents/espacene... EU DE \n", @@ -904,32 +904,32 @@ "4 gs://gcs-public-data--labeled-patents/espacene... EU DE \n", "\n", " publication_date class_international class_us application_number \\\n", - "0 03.10.2018 A01K 31/00 18171005.4 \n", - "1 03.10.2018 H05B 6/12 18165514.3 \n", - "2 03.10.2018 G06F 11/30 18157347.8 \n", - "3 29.08.018 E04H 6/12 18157874.1 \n", - "4 03.10.2018 H01L 21/20 18166536.5 \n", + "0 03.10.2018 H05B 6/12 18165514.3 \n", + "1 29.08.018 E04H 6/12 18157874.1 \n", + "2 03.10.2018 H01L 21/20 18166536.5 \n", + "3 03.10.2018 G06F 11/30 18157347.8 \n", + "4 03.10.2018 A01K 31/00 18171005.4 \n", "\n", " filing_date priority_date_eu representative_line_1_eu \\\n", - "0 05.02.2015 05.02.2014 Stork Bamberger Patentanw√§lte \n", - "1 03.04.2018 30.03.2017 \n", - "2 19.02.2018 31.03.2017 Hoffmann Eitle \n", - "3 21.02.2018 22.02.2017 Liedtke & Partner Patentanw√§lte \n", - "4 16.02.2016 Scheider, Sascha et al \n", + "0 03.04.2018 30.03.2017 \n", + "1 21.02.2018 22.02.2017 Liedtke & Partner Patentanw√§lte \n", + "2 16.02.2016 Scheider, Sascha et al \n", + "3 19.02.2018 31.03.2017 Hoffmann Eitle \n", + "4 05.02.2015 05.02.2014 Stork Bamberger Patentanw√§lte \n", "\n", " applicant_line_1 inventor_line_1 \\\n", - "0 Linco Food Systems A/S Thrane, Uffe \n", - "1 BSH Hausger√§te GmbH Acero Acero, Jesus \n", - "2 FUJITSU LIMITED Kukihara, Kensuke \n", - "3 SHB Hebezeugbau GmbH VOLGER, Alexander \n", - "4 EV Group E. Thallner GmbH Kurz, Florian \n", + "0 BSH Hausger√§te GmbH Acero Acero, Jesus \n", + "1 SHB Hebezeugbau GmbH VOLGER, Alexander \n", + "2 EV Group E. Thallner GmbH Kurz, Florian \n", + "3 FUJITSU LIMITED Kukihara, Kensuke \n", + "4 Linco Food Systems A/S Thrane, Uffe \n", "\n", " title_line_1 number \n", - "0 MASTH√ÑHNCHENCONTAINER ALS BESTANDTEIL EINER E... EP 3 381 276 A1 \n", - "1 VORRICHTUNG ZUR INDUKTIVEN ENERGIE√úBERTRAGUNG EP 3 383 141 A2 \n", - "2 METHOD EXECUTED BY A COMPUTER, INFORMATION PRO... EP 3 382 553 A1 \n", - "3 STEUERUNGSSYSTEM F√úR AUTOMATISCHE PARKH√ÑUSER EP 3 366 869 A1 \n", - "4 VORRICHTUNG ZUM BONDEN VON SUBSTRATEN EP 3 382 744 A1 \n", + "0 VORRICHTUNG ZUR INDUKTIVEN ENERGIE√úBERTRAGUNG EP 3 383 141 A2 \n", + "1 STEUERUNGSSYSTEM F√úR AUTOMATISCHE PARKH√ÑUSER EP 3 366 869 A1 \n", + "2 VORRICHTUNG ZUM BONDEN VON SUBSTRATEN EP 3 382 744 A1 \n", + "3 METHOD EXECUTED BY A COMPUTER, INFORMATION PRO... EP 3 382 553 A1 \n", + "4 MASTH√ÑHNCHENCONTAINER ALS BESTANDTEIL EINER E... EP 3 381 276 A1 \n", "\n", "[5 rows x 15 columns]" ] @@ -951,294 +951,6 @@ " LIMIT 5;\n", "\"\"\")" ] - }, - { - "cell_type": "markdown", - "id": "e89b4784", - "metadata": {}, - "source": [ - "### Displaying Nested Data (STRUCTs and ARRAYs)\n", - "BigQuery DataFrames automatically flattens nested STRUCT and ARRAY columns into separate, more manageable columns when displayed in `anywidget` mode. This approach simplifies interaction and readability, as it avoids deeply nested or collapsible elements.\n", - "\n", - "This flattening ensures that all data is directly visible and sortable, enhancing the interactive table experience.\n" - ] - }, - { - "cell_type": "code", - "execution_count": 12, - "id": "4fcebbd4", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "✅ Completed. \n", - " Query processed 0 Bytes in a moment of slot time.\n", - " " - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "✅ Completed. " - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "✅ Completed. " - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/usr/local/google/home/shuowei/src/python-bigquery-dataframes/bigframes/display/html.py:259: FutureWarning: errors='ignore' is deprecated and will raise in a future version. Use to_numeric without passing `errors` and catch exceptions explicitly instead\n", - " exploded_df[col] = pd.to_numeric(\n", - "/usr/local/google/home/shuowei/src/python-bigquery-dataframes/bigframes/display/html.py:259: FutureWarning: errors='ignore' is deprecated and will raise in a future version. Use to_numeric without passing `errors` and catch exceptions explicitly instead\n", - " exploded_df[col] = pd.to_numeric(\n" - ] - }, - { - "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "41a82986b609418799d64d6cc6d8cede", - "version_major": 2, - "version_minor": 1 - }, - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
idstruct_colarray_colnested_struct_array
01{'name': 'Alice', 'age': 30}[10 20 30][{'item': 'A', 'value': 100} {'item': 'B', 'va...
12{'name': 'Bob', 'age': 25}[40 50][{'item': 'C', 'value': 300}]
23{'name': 'Charlie', 'age': 35}[60 70 80][{'item': 'D', 'value': 400} {'item': 'E', 'va...
34{'name': 'David', 'age': 40}[ 90 100 110][{'item': 'F', 'value': 600} {'item': 'G', 'va...
45{'name': 'Eve', 'age': 45}[120 130 140][{'item': 'H', 'value': 800} {'item': 'I', 'va...
56{'name': 'Frank', 'age': 50}[150 160 170][{'item': 'J', 'value': 1000} {'item': 'K', 'v...
67{'name': 'Grace', 'age': 55}[180 190][{'item': 'L', 'value': 1200}]
78{'name': 'Heidi', 'age': 60}[200 210 220][{'item': 'M', 'value': 1300} {'item': 'N', 'v...
89{'name': 'Ivan', 'age': 65}[230 240 250 260][{'item': 'O', 'value': 1500} {'item': 'P', 'v...
910{'name': 'Judy', 'age': 70}[270 280][{'item': 'Q', 'value': 1700}]
\n", - "

10 rows × 4 columns

\n", - "
[12 rows x 4 columns in total]" - ], - "text/plain": [ - " id struct_col array_col \\\n", - "0 1 {'name': 'Alice', 'age': 30} [10 20 30] \n", - "1 2 {'name': 'Bob', 'age': 25} [40 50] \n", - "2 3 {'name': 'Charlie', 'age': 35} [60 70 80] \n", - "3 4 {'name': 'David', 'age': 40} [ 90 100 110] \n", - "4 5 {'name': 'Eve', 'age': 45} [120 130 140] \n", - "5 6 {'name': 'Frank', 'age': 50} [150 160 170] \n", - "6 7 {'name': 'Grace', 'age': 55} [180 190] \n", - "7 8 {'name': 'Heidi', 'age': 60} [200 210 220] \n", - "8 9 {'name': 'Ivan', 'age': 65} [230 240 250 260] \n", - "9 10 {'name': 'Judy', 'age': 70} [270 280] \n", - "\n", - " nested_struct_array \n", - "0 [{'item': 'A', 'value': 100} {'item': 'B', 'va... \n", - "1 [{'item': 'C', 'value': 300}] \n", - "2 [{'item': 'D', 'value': 400} {'item': 'E', 'va... \n", - "3 [{'item': 'F', 'value': 600} {'item': 'G', 'va... \n", - "4 [{'item': 'H', 'value': 800} {'item': 'I', 'va... \n", - "5 [{'item': 'J', 'value': 1000} {'item': 'K', 'v... \n", - "6 [{'item': 'L', 'value': 1200}] \n", - "7 [{'item': 'M', 'value': 1300} {'item': 'N', 'v... \n", - "8 [{'item': 'O', 'value': 1500} {'item': 'P', 'v... \n", - "9 [{'item': 'Q', 'value': 1700}] \n", - "...\n", - "\n", - "[12 rows x 4 columns]" - ] - }, - "execution_count": 12, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "sql_nested_data = \"\"\"\n", - "SELECT\n", - " 1 AS id,\n", - " STRUCT('Alice' AS name, 30 AS age) AS struct_col,\n", - " [10, 20, 30] AS array_col,\n", - " [STRUCT('A' AS item, 100 AS value), STRUCT('B' AS item, 200 AS value)] AS nested_struct_array\n", - "UNION ALL\n", - "SELECT\n", - " 2 AS id,\n", - " STRUCT('Bob' AS name, 25 AS age) AS struct_col,\n", - " [40, 50] AS array_col,\n", - " [STRUCT('C' AS item, 300 AS value)] AS nested_struct_array\n", - "UNION ALL\n", - "SELECT\n", - " 3 AS id,\n", - " STRUCT('Charlie' AS name, 35 AS age) AS struct_col,\n", - " [60, 70, 80] AS array_col,\n", - " [STRUCT('D' AS item, 400 AS value), STRUCT('E' AS item, 500 AS value)] AS nested_struct_array\n", - "UNION ALL\n", - "SELECT\n", - " 4 AS id,\n", - " STRUCT('David' AS name, 40 AS age) AS struct_col,\n", - " [90, 100, 110] AS array_col,\n", - " [STRUCT('F' AS item, 600 AS value), STRUCT('G' AS item, 700 AS value)] AS nested_struct_array\n", - "UNION ALL\n", - "SELECT\n", - " 5 AS id,\n", - " STRUCT('Eve' AS name, 45 AS age) AS struct_col,\n", - " [120, 130, 140] AS array_col,\n", - " [STRUCT('H' AS item, 800 AS value), STRUCT('I' AS item, 900 AS value)] AS nested_struct_array\n", - "UNION ALL\n", - "SELECT\n", - " 6 AS id,\n", - " STRUCT('Frank' AS name, 50 AS age) AS struct_col,\n", - " [150, 160, 170] AS array_col,\n", - " [STRUCT('J' AS item, 1000 AS value), STRUCT('K' AS item, 1100 AS value)] AS nested_struct_array\n", - "UNION ALL\n", - "SELECT\n", - " 7 AS id,\n", - " STRUCT('Grace' AS name, 55 AS age) AS struct_col,\n", - " [180, 190] AS array_col,\n", - " [STRUCT('L' AS item, 1200 AS value)] AS nested_struct_array\n", - "UNION ALL\n", - "SELECT\n", - " 8 AS id,\n", - " STRUCT('Heidi' AS name, 60 AS age) AS struct_col,\n", - " [200, 210, 220] AS array_col,\n", - " [STRUCT('M' AS item, 1300 AS value), STRUCT('N' AS item, 1400 AS value)] AS nested_struct_array\n", - "UNION ALL\n", - "SELECT\n", - " 9 AS id,\n", - " STRUCT('Ivan' AS name, 65 AS age) AS struct_col,\n", - " [230, 240, 250, 260] AS array_col,\n", - " [STRUCT('O' AS item, 1500 AS value), STRUCT('P' AS item, 1600 AS value)] AS nested_struct_array\n", - "UNION ALL\n", - "SELECT\n", - " 10 AS id,\n", - " STRUCT('Judy' AS name, 70 AS age) AS struct_col,\n", - " [270, 280] AS array_col,\n", - " [STRUCT('Q' AS item, 1700 AS value)] AS nested_struct_array\n", - "UNION ALL\n", - "SELECT\n", - " 11 AS id,\n", - " STRUCT('Kevin' AS name, 75 AS age) AS struct_col,\n", - " [290, 300, 310] AS array_col,\n", - " [STRUCT('R' AS item, 1800 AS value), STRUCT('S' AS item, 1900 AS value), STRUCT('T' AS item, 2000 AS value), STRUCT('U' AS item, 2100 AS value)] AS nested_struct_array\n", - "UNION ALL\n", - "SELECT\n", - " 12 AS id,\n", - " STRUCT('Laura' AS name, 80 AS age) AS struct_col,\n", - " [320] AS array_col,\n", - " [STRUCT('V' AS item, 2200 AS value), STRUCT('W' AS item, 2300 AS value), STRUCT('X' AS item, 2400 AS value)] AS nested_struct_array\n", - "\"\"\"\n", - "\n", - "df_from_sql = bpd.read_gbq(sql_nested_data)\n", - "\n", - "# Display this DataFrame. The nested fields will be rendered as flattened elements.\n", - "df_from_sql" - ] } ], "metadata": { From c39293a6ec80fdb5331d720bd06dec9368bb64d4 Mon Sep 17 00:00:00 2001 From: Shuowei Li Date: Fri, 19 Dec 2025 02:45:15 +0000 Subject: [PATCH 05/25] test: update old testcase due to new feature implementation --- tests/unit/test_series_polars.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/tests/unit/test_series_polars.py b/tests/unit/test_series_polars.py index 516a46d4dd..a1aa887f94 100644 --- a/tests/unit/test_series_polars.py +++ b/tests/unit/test_series_polars.py @@ -2020,9 +2020,8 @@ def test_series_small_repr(scalars_dfs): bf_series = scalars_df[col_name] pd_series = scalars_pandas_df[col_name] with bigframes.pandas.option_context("display.repr_mode", "head"): - assert repr(bf_series) == pd_series.to_string( - length=False, dtype=True, name=True - ) + pd_string = pd_series.to_string(length=True, dtype=True, name=True) + assert repr(bf_series) == f"{pd_string}\n\n[{len(pd_series)} rows]" def test_sum(scalars_dfs): From 9dff0f42fb8f58626c26a705ed6fb68e612223c3 Mon Sep 17 00:00:00 2001 From: Shuowei Li Date: Fri, 19 Dec 2025 20:18:20 +0000 Subject: [PATCH 06/25] Revert "test: update old testcase due to new feature implementation" This reverts commit c39293a6ec80fdb5331d720bd06dec9368bb64d4. --- tests/unit/test_series_polars.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tests/unit/test_series_polars.py b/tests/unit/test_series_polars.py index a1aa887f94..516a46d4dd 100644 --- a/tests/unit/test_series_polars.py +++ b/tests/unit/test_series_polars.py @@ -2020,8 +2020,9 @@ def test_series_small_repr(scalars_dfs): bf_series = scalars_df[col_name] pd_series = scalars_pandas_df[col_name] with bigframes.pandas.option_context("display.repr_mode", "head"): - pd_string = pd_series.to_string(length=True, dtype=True, name=True) - assert repr(bf_series) == f"{pd_string}\n\n[{len(pd_series)} rows]" + assert repr(bf_series) == pd_series.to_string( + length=False, dtype=True, name=True + ) def test_sum(scalars_dfs): From f67e30fd530f123566a0526b48836714f24aad3e Mon Sep 17 00:00:00 2001 From: Shuowei Li Date: Fri, 19 Dec 2025 20:23:31 +0000 Subject: [PATCH 07/25] feat: only display row count when series is large than the number can be displayed in one page --- bigframes/series.py | 29 +- notebooks/dataframes/anywidget_mode.ipynb | 344 +++++++++++----------- 2 files changed, 186 insertions(+), 187 deletions(-) diff --git a/bigframes/series.py b/bigframes/series.py index c538960adb..9e94f04e24 100644 --- a/bigframes/series.py +++ b/bigframes/series.py @@ -607,26 +607,25 @@ def _create_text_representation( """Create a text representation of the Series.""" opts = bigframes.options.display with display_options.pandas_repr(opts): - import pandas.io.formats - - # safe to mutate this, this dict is owned by this code, and does not affect global config - to_string_kwargs = ( - pandas.io.formats.format.get_series_repr_params() # type: ignore - ) - if len(self._block.index_columns) == 0: - to_string_kwargs.update({"index": False}) # Get the first column since Series DataFrame has only one column pd_series = pandas_df.iloc[:, 0] - repr_string = pd_series.to_string(**to_string_kwargs) + if len(self._block.index_columns) == 0: + repr_string = pd_series.to_string( + length=False, index=False, name=True, dtype=True + ) + else: + repr_string = pd_series.to_string(length=False, name=True, dtype=True) - lines = repr_string.split("\n") + is_truncated = total_rows is not None and total_rows > len(pd_series) - if total_rows is not None and total_rows > len(pd_series): + if is_truncated: + lines = repr_string.split("\n") lines.append("...") - - lines.append("") - lines.append(f"[{total_rows} rows]") - return "\n".join(lines) + lines.append("") + lines.append(f"[{total_rows} rows]") + return "\n".join(lines) + else: + return repr_string def _repr_mimebundle_(self, include=None, exclude=None): """ diff --git a/notebooks/dataframes/anywidget_mode.ipynb b/notebooks/dataframes/anywidget_mode.ipynb index e4f1d9411d..b400fc5f9c 100644 --- a/notebooks/dataframes/anywidget_mode.ipynb +++ b/notebooks/dataframes/anywidget_mode.ipynb @@ -107,16 +107,16 @@ "output_type": "stream", "text": [ "state gender year name number\n", - " AL F 1910 Cora 61\n", - " AL F 1910 Anna 74\n", - " AR F 1910 Willie 132\n", - " CO F 1910 Anna 42\n", - " FL F 1910 Louise 70\n", - " GA F 1910 Catherine 57\n", - " IL F 1910 Jessie 43\n", - " IN F 1910 Anna 100\n", - " IN F 1910 Pauline 77\n", - " IN F 1910 Beulah 39\n", + " AL F 1910 Hazel 51\n", + " AL F 1910 Lucy 76\n", + " AR F 1910 Nellie 39\n", + " AR F 1910 Lena 40\n", + " CO F 1910 Thelma 36\n", + " CO F 1910 Ruth 68\n", + " CT F 1910 Elizabeth 86\n", + " DC F 1910 Mary 80\n", + " FL F 1910 Annie 101\n", + " FL F 1910 Alma 39\n", "...\n", "\n", "[5552452 rows x 5 columns]\n" @@ -146,7 +146,7 @@ "data": { "text/html": [ "✅ Completed. \n", - " Query processed 171.4 MB in 30 seconds of slot time. [Job bigframes-dev:US.a5deda5a-cf92-475e-9596-4f92c7f749b6 details]\n", + " Query processed 171.4 MB in 39 seconds of slot time. [Job bigframes-dev:US.7cbc5c5a-3096-4bfb-9f68-79bdcad9b406 details]\n", " " ], "text/plain": [ @@ -196,7 +196,7 @@ "1910\n", "1910\n", "1910\n", - "Name: year, Length: 10, dtype: Int64\n", + "Name: year, dtype: Int64\n", "...\n", "\n", "[5552452 rows]\n" @@ -208,73 +208,6 @@ "print(test_series)" ] }, - { - "cell_type": "code", - "execution_count": 6, - "id": "da23e0f3", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "✅ Completed. \n", - " Query processed 88.8 MB in 2 seconds of slot time. [Job bigframes-dev:US.job_kS5Cpa0qjt2oaPbsj3Rd5Gsmk5Zf details]\n", - " " - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "✅ Completed. \n", - " Query processed 88.8 MB in 2 seconds of slot time. [Job bigframes-dev:US.job_QyhgBuHo8HQ-wxsovkW1eWZyATdo details]\n", - " " - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "3971aa58b8b94313a6c5848c033eae14", - "version_major": 2, - "version_minor": 1 - }, - "text/plain": [ - "1910\n", - "1910\n", - "1910\n", - "1910\n", - "1910\n", - "1910\n", - "1910\n", - "1910\n", - "1910\n", - "1910\n", - "Name: year, Length: 10, dtype: Int64\n", - "...\n", - "\n", - "[5552452 rows]" - ] - }, - "execution_count": 6, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "test_series" - ] - }, { "cell_type": "markdown", "id": "7bcf1bb7", @@ -285,15 +218,15 @@ }, { "cell_type": "code", - "execution_count": 7, - "id": "ce250157", + "execution_count": 6, + "id": "71fa52ec", "metadata": {}, "outputs": [ { "data": { "text/html": [ "✅ Completed. \n", - " Query processed 215.9 MB in 8 seconds of slot time. [Job bigframes-dev:US.job_GNgZ21dOBLPqnQdcCRs_kxWHZnLO details]\n", + " Query processed 215.9 MB in 9 seconds of slot time. [Job bigframes-dev:US.job_IU7vod4QK1ou1TlbpmFQbTDG4ITH details]\n", " " ], "text/plain": [ @@ -307,7 +240,7 @@ "data": { "text/html": [ "✅ Completed. \n", - " Query processed 215.9 MB in 7 seconds of slot time. [Job bigframes-dev:US.job_SPvbV-t1egpXI-4ABmqjxJjY5FyA details]\n", + " Query processed 215.9 MB in 8 seconds of slot time. [Job bigframes-dev:US.job_fAljrol-9EGnTEYZEyllaW36FOI6 details]\n", " " ], "text/plain": [ @@ -320,7 +253,7 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "7c726c80061d4ac39cc1dc6151e5c269", + "model_id": "b007377c75cd46f68924e271074d52ff", "version_major": 2, "version_minor": 1 }, @@ -356,23 +289,23 @@ " AL\n", " F\n", " 1910\n", - " Hazel\n", - " 51\n", + " Sadie\n", + " 40\n", " \n", " \n", " 1\n", " AL\n", " F\n", " 1910\n", - " Lucy\n", - " 76\n", + " Mary\n", + " 875\n", " \n", " \n", " 2\n", " AR\n", " F\n", " 1910\n", - " Nellie\n", + " Vera\n", " 39\n", " \n", " \n", @@ -380,56 +313,56 @@ " AR\n", " F\n", " 1910\n", - " Lena\n", - " 40\n", + " Marie\n", + " 78\n", " \n", " \n", " 4\n", - " CO\n", + " AR\n", " F\n", " 1910\n", - " Thelma\n", - " 36\n", + " Lucille\n", + " 66\n", " \n", " \n", " 5\n", - " CO\n", + " CA\n", " F\n", " 1910\n", - " Ruth\n", - " 68\n", + " Virginia\n", + " 101\n", " \n", " \n", " 6\n", - " CT\n", + " DC\n", " F\n", " 1910\n", - " Elizabeth\n", - " 86\n", + " Margaret\n", + " 72\n", " \n", " \n", " 7\n", - " DC\n", + " GA\n", " F\n", " 1910\n", - " Mary\n", - " 80\n", + " Mildred\n", + " 133\n", " \n", " \n", " 8\n", - " FL\n", + " GA\n", " F\n", " 1910\n", - " Annie\n", - " 101\n", + " Vera\n", + " 51\n", " \n", " \n", " 9\n", - " FL\n", + " GA\n", " F\n", " 1910\n", - " Alma\n", - " 39\n", + " Sallie\n", + " 92\n", " \n", " \n", "\n", @@ -437,23 +370,23 @@ "[5552452 rows x 5 columns in total]" ], "text/plain": [ - "state gender year name number\n", - " AL F 1910 Hazel 51\n", - " AL F 1910 Lucy 76\n", - " AR F 1910 Nellie 39\n", - " AR F 1910 Lena 40\n", - " CO F 1910 Thelma 36\n", - " CO F 1910 Ruth 68\n", - " CT F 1910 Elizabeth 86\n", - " DC F 1910 Mary 80\n", - " FL F 1910 Annie 101\n", - " FL F 1910 Alma 39\n", + "state gender year name number\n", + " AL F 1910 Sadie 40\n", + " AL F 1910 Mary 875\n", + " AR F 1910 Vera 39\n", + " AR F 1910 Marie 78\n", + " AR F 1910 Lucille 66\n", + " CA F 1910 Virginia 101\n", + " DC F 1910 Margaret 72\n", + " GA F 1910 Mildred 133\n", + " GA F 1910 Vera 51\n", + " GA F 1910 Sallie 92\n", "...\n", "\n", "[5552452 rows x 5 columns]" ] }, - "execution_count": 7, + "execution_count": 6, "metadata": {}, "output_type": "execute_result" } @@ -462,6 +395,73 @@ "df" ] }, + { + "cell_type": "code", + "execution_count": 7, + "id": "da23e0f3", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "✅ Completed. \n", + " Query processed 88.8 MB in 2 seconds of slot time. [Job bigframes-dev:US.job_m5DlrymiC8C75Ky06_gYCa1zNOO- details]\n", + " " + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "✅ Completed. \n", + " Query processed 88.8 MB in 3 seconds of slot time. [Job bigframes-dev:US.job_GZiZyMOq33ShjyrL5iHd4-SbvYFP details]\n", + " " + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "4ca161fae40b46f5af9dd05a18a31f8b", + "version_major": 2, + "version_minor": 1 + }, + "text/plain": [ + "1910\n", + "1910\n", + "1910\n", + "1910\n", + "1910\n", + "1910\n", + "1910\n", + "1910\n", + "1910\n", + "1910\n", + "Name: year, dtype: Int64\n", + "...\n", + "\n", + "[5552452 rows]" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "test_series" + ] + }, { "cell_type": "markdown", "id": "sorting-intro", @@ -503,7 +503,7 @@ "data": { "text/html": [ "✅ Completed. \n", - " Query processed 215.9 MB in 6 seconds of slot time. [Job bigframes-dev:US.job_NO_gGB-KLVechm-gl9tIvEmtxpo7 details]\n", + " Query processed 215.9 MB in 9 seconds of slot time. [Job bigframes-dev:US.job_VHargXKi-0r5rTHw8MpLsH9SAHn4 details]\n", " " ], "text/plain": [ @@ -517,7 +517,7 @@ "data": { "text/html": [ "✅ Completed. \n", - " Query processed 215.9 MB in 9 seconds of slot time. [Job bigframes-dev:US.job_eUF7XO9UzQZwPzxy0w22_hbZGv3g details]\n", + " Query processed 215.9 MB in 8 seconds of slot time. [Job bigframes-dev:US.job_u35bTRyfTtiSX6aILqN44S5OJZMh details]\n", " " ], "text/plain": [ @@ -537,12 +537,12 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "91a96e79860a4dcea06513f14f33c458", + "model_id": "db36d4303b4a47479e2e6a92eba4f814", "version_major": 2, "version_minor": 1 }, "text/plain": [ - "" + "" ] }, "execution_count": 8, @@ -651,12 +651,12 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "8926468cb2034aebb348b17d3f9ba6f6", + "model_id": "46a644fa01a84ef18de5c7b7379322f1", "version_major": 2, "version_minor": 1 }, "text/plain": [ - "" + "" ] }, "execution_count": 10, @@ -691,7 +691,7 @@ "data": { "text/html": [ "✅ Completed. \n", - " Query processed 85.9 kB in 19 seconds of slot time.\n", + " Query processed 85.9 kB in 18 seconds of slot time.\n", " " ], "text/plain": [ @@ -752,7 +752,7 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "d774ed63f08941b8bf11ec1087609f17", + "model_id": "10be79ab2b40490baecbcc5ffdd300e0", "version_major": 2, "version_minor": 1 }, @@ -800,16 +800,16 @@ " EU\n", " DE\n", " 03.10.2018\n", - " H05B 6/12\n", + " H01L 21/20\n", " <NA>\n", - " 18165514.3\n", - " 03.04.2018\n", - " 30.03.2017\n", + " 18166536.5\n", + " 16.02.2016\n", " <NA>\n", - " BSH Hausger√§te GmbH\n", - " Acero Acero, Jesus\n", - " VORRICHTUNG ZUR INDUKTIVEN ENERGIE√úBERTRAGUNG\n", - " EP 3 383 141 A2\n", + " Scheider, Sascha et al\n", + " EV Group E. Thallner GmbH\n", + " Kurz, Florian\n", + " VORRICHTUNG ZUM BONDEN VON SUBSTRATEN\n", + " EP 3 382 744 A1\n", " \n", " \n", " 1\n", @@ -836,24 +836,6 @@ " EU\n", " DE\n", " 03.10.2018\n", - " H01L 21/20\n", - " <NA>\n", - " 18166536.5\n", - " 16.02.2016\n", - " <NA>\n", - " Scheider, Sascha et al\n", - " EV Group E. Thallner GmbH\n", - " Kurz, Florian\n", - " VORRICHTUNG ZUM BONDEN VON SUBSTRATEN\n", - " EP 3 382 744 A1\n", - " \n", - " \n", - " 3\n", - " {'application_number': None, 'class_internatio...\n", - " gs://gcs-public-data--labeled-patents/espacene...\n", - " EU\n", - " DE\n", - " 03.10.2018\n", " G06F 11/30\n", " <NA>\n", " 18157347.8\n", @@ -866,7 +848,7 @@ " EP 3 382 553 A1\n", " \n", " \n", - " 4\n", + " 3\n", " {'application_number': None, 'class_internatio...\n", " gs://gcs-public-data--labeled-patents/espacene...\n", " EU\n", @@ -883,6 +865,24 @@ " MASTH√ÑHNCHENCONTAINER ALS BESTANDTEIL EINER E...\n", " EP 3 381 276 A1\n", " \n", + " \n", + " 4\n", + " {'application_number': None, 'class_internatio...\n", + " gs://gcs-public-data--labeled-patents/espacene...\n", + " EU\n", + " DE\n", + " 03.10.2018\n", + " H05B 6/12\n", + " <NA>\n", + " 18165514.3\n", + " 03.04.2018\n", + " 30.03.2017\n", + " <NA>\n", + " BSH Hausger√§te GmbH\n", + " Acero Acero, Jesus\n", + " VORRICHTUNG ZUR INDUKTIVEN ENERGIE√úBERTRAGUNG\n", + " EP 3 383 141 A2\n", + " \n", " \n", "\n", "

5 rows × 15 columns

\n", @@ -904,32 +904,32 @@ "4 gs://gcs-public-data--labeled-patents/espacene... EU DE \n", "\n", " publication_date class_international class_us application_number \\\n", - "0 03.10.2018 H05B 6/12 18165514.3 \n", + "0 03.10.2018 H01L 21/20 18166536.5 \n", "1 29.08.018 E04H 6/12 18157874.1 \n", - "2 03.10.2018 H01L 21/20 18166536.5 \n", - "3 03.10.2018 G06F 11/30 18157347.8 \n", - "4 03.10.2018 A01K 31/00 18171005.4 \n", + "2 03.10.2018 G06F 11/30 18157347.8 \n", + "3 03.10.2018 A01K 31/00 18171005.4 \n", + "4 03.10.2018 H05B 6/12 18165514.3 \n", "\n", " filing_date priority_date_eu representative_line_1_eu \\\n", - "0 03.04.2018 30.03.2017 \n", + "0 16.02.2016 Scheider, Sascha et al \n", "1 21.02.2018 22.02.2017 Liedtke & Partner Patentanw√§lte \n", - "2 16.02.2016 Scheider, Sascha et al \n", - "3 19.02.2018 31.03.2017 Hoffmann Eitle \n", - "4 05.02.2015 05.02.2014 Stork Bamberger Patentanw√§lte \n", + "2 19.02.2018 31.03.2017 Hoffmann Eitle \n", + "3 05.02.2015 05.02.2014 Stork Bamberger Patentanw√§lte \n", + "4 03.04.2018 30.03.2017 \n", "\n", " applicant_line_1 inventor_line_1 \\\n", - "0 BSH Hausger√§te GmbH Acero Acero, Jesus \n", + "0 EV Group E. Thallner GmbH Kurz, Florian \n", "1 SHB Hebezeugbau GmbH VOLGER, Alexander \n", - "2 EV Group E. Thallner GmbH Kurz, Florian \n", - "3 FUJITSU LIMITED Kukihara, Kensuke \n", - "4 Linco Food Systems A/S Thrane, Uffe \n", + "2 FUJITSU LIMITED Kukihara, Kensuke \n", + "3 Linco Food Systems A/S Thrane, Uffe \n", + "4 BSH Hausger√§te GmbH Acero Acero, Jesus \n", "\n", " title_line_1 number \n", - "0 VORRICHTUNG ZUR INDUKTIVEN ENERGIE√úBERTRAGUNG EP 3 383 141 A2 \n", + "0 VORRICHTUNG ZUM BONDEN VON SUBSTRATEN EP 3 382 744 A1 \n", "1 STEUERUNGSSYSTEM F√úR AUTOMATISCHE PARKH√ÑUSER EP 3 366 869 A1 \n", - "2 VORRICHTUNG ZUM BONDEN VON SUBSTRATEN EP 3 382 744 A1 \n", - "3 METHOD EXECUTED BY A COMPUTER, INFORMATION PRO... EP 3 382 553 A1 \n", - "4 MASTH√ÑHNCHENCONTAINER ALS BESTANDTEIL EINER E... EP 3 381 276 A1 \n", + "2 METHOD EXECUTED BY A COMPUTER, INFORMATION PRO... EP 3 382 553 A1 \n", + "3 MASTH√ÑHNCHENCONTAINER ALS BESTANDTEIL EINER E... EP 3 381 276 A1 \n", + "4 VORRICHTUNG ZUR INDUKTIVEN ENERGIE√úBERTRAGUNG EP 3 383 141 A2 \n", "\n", "[5 rows x 15 columns]" ] From 81d1dbeb0c5f0ab1465106ad37e1adbd1373cd7f Mon Sep 17 00:00:00 2001 From: Chelsea Lin Date: Thu, 18 Dec 2025 23:58:49 -0800 Subject: [PATCH 08/25] refactor: Handle special float values and None consistently in sqlglot _literal (#2337) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This change can resolve two doctests failures in #2248: `groupby.GroupBy.rank` and `bigframes.ml.metrics.roc_curve` Fixes internal issue 417774347🦕 --------- Co-authored-by: Shenyang Cai --- bigframes/core/compile/sqlglot/sqlglot_ir.py | 45 +++++++++++-------- .../out.sql | 25 +++++++++++ .../compile/sqlglot/test_compile_readlocal.py | 23 ++++++++++ 3 files changed, 74 insertions(+), 19 deletions(-) create mode 100644 tests/unit/core/compile/sqlglot/snapshots/test_compile_readlocal/test_compile_readlocal_w_special_values/out.sql diff --git a/bigframes/core/compile/sqlglot/sqlglot_ir.py b/bigframes/core/compile/sqlglot/sqlglot_ir.py index 0d568b098b..cbc601ea63 100644 --- a/bigframes/core/compile/sqlglot/sqlglot_ir.py +++ b/bigframes/core/compile/sqlglot/sqlglot_ir.py @@ -21,6 +21,7 @@ from google.cloud import bigquery import numpy as np +import pandas as pd import pyarrow as pa import sqlglot as sg import sqlglot.dialects.bigquery @@ -28,7 +29,7 @@ from bigframes import dtypes from bigframes.core import guid, local_data, schema, utils -from bigframes.core.compile.sqlglot.expressions import typed_expr +from bigframes.core.compile.sqlglot.expressions import constants, typed_expr import bigframes.core.compile.sqlglot.sqlglot_types as sgt # shapely.wkt.dumps was moved to shapely.io.to_wkt in 2.0. @@ -639,12 +640,30 @@ def _select_to_cte(expr: sge.Select, cte_name: sge.Identifier) -> sge.Select: def _literal(value: typing.Any, dtype: dtypes.Dtype) -> sge.Expression: sqlglot_type = sgt.from_bigframes_dtype(dtype) if dtype else None if sqlglot_type is None: - if value is not None: - raise ValueError("Cannot infer SQLGlot type from None dtype.") + if not pd.isna(value): + raise ValueError(f"Cannot infer SQLGlot type from None dtype: {value}") return sge.Null() if value is None: return _cast(sge.Null(), sqlglot_type) + if dtypes.is_struct_like(dtype): + items = [ + _literal(value=value[field_name], dtype=field_dtype).as_( + field_name, quoted=True + ) + for field_name, field_dtype in dtypes.get_struct_fields(dtype).items() + ] + return sge.Struct.from_arg_list(items) + elif dtypes.is_array_like(dtype): + value_type = dtypes.get_array_inner_type(dtype) + values = sge.Array( + expressions=[_literal(value=v, dtype=value_type) for v in value] + ) + return values if len(value) > 0 else _cast(values, sqlglot_type) + elif pd.isna(value): + return _cast(sge.Null(), sqlglot_type) + elif dtype == dtypes.JSON_DTYPE: + return sge.ParseJSON(this=sge.convert(str(value))) elif dtype == dtypes.BYTES_DTYPE: return _cast(str(value), sqlglot_type) elif dtypes.is_time_like(dtype): @@ -658,24 +677,12 @@ def _literal(value: typing.Any, dtype: dtypes.Dtype) -> sge.Expression: elif dtypes.is_geo_like(dtype): wkt = value if isinstance(value, str) else to_wkt(value) return sge.func("ST_GEOGFROMTEXT", sge.convert(wkt)) - elif dtype == dtypes.JSON_DTYPE: - return sge.ParseJSON(this=sge.convert(str(value))) elif dtype == dtypes.TIMEDELTA_DTYPE: return sge.convert(utils.timedelta_to_micros(value)) - elif dtypes.is_struct_like(dtype): - items = [ - _literal(value=value[field_name], dtype=field_dtype).as_( - field_name, quoted=True - ) - for field_name, field_dtype in dtypes.get_struct_fields(dtype).items() - ] - return sge.Struct.from_arg_list(items) - elif dtypes.is_array_like(dtype): - value_type = dtypes.get_array_inner_type(dtype) - values = sge.Array( - expressions=[_literal(value=v, dtype=value_type) for v in value] - ) - return values if len(value) > 0 else _cast(values, sqlglot_type) + elif dtype == dtypes.FLOAT_DTYPE: + if np.isinf(value): + return constants._INF if value > 0 else constants._NEG_INF + return sge.convert(value) else: if isinstance(value, np.generic): value = value.item() diff --git a/tests/unit/core/compile/sqlglot/snapshots/test_compile_readlocal/test_compile_readlocal_w_special_values/out.sql b/tests/unit/core/compile/sqlglot/snapshots/test_compile_readlocal/test_compile_readlocal_w_special_values/out.sql new file mode 100644 index 0000000000..ba5e0c8f1c --- /dev/null +++ b/tests/unit/core/compile/sqlglot/snapshots/test_compile_readlocal/test_compile_readlocal_w_special_values/out.sql @@ -0,0 +1,25 @@ +WITH `bfcte_0` AS ( + SELECT + * + FROM UNNEST(ARRAY, `bfcol_5` STRUCT, `bfcol_6` ARRAY, `bfcol_7` INT64>>[STRUCT( + CAST(NULL AS FLOAT64), + CAST('Infinity' AS FLOAT64), + CAST('-Infinity' AS FLOAT64), + CAST(NULL AS FLOAT64), + CAST(NULL AS STRUCT), + STRUCT(CAST(NULL AS INT64) AS `foo`), + ARRAY[], + 0 + ), STRUCT(1.0, 1.0, 1.0, 1.0, STRUCT(1 AS `foo`), STRUCT(1 AS `foo`), [1, 2], 1), STRUCT(2.0, 2.0, 2.0, 2.0, STRUCT(2 AS `foo`), STRUCT(2 AS `foo`), [3, 4], 2)]) +) +SELECT + `bfcol_0` AS `col_none`, + `bfcol_1` AS `col_inf`, + `bfcol_2` AS `col_neginf`, + `bfcol_3` AS `col_nan`, + `bfcol_4` AS `col_struct_none`, + `bfcol_5` AS `col_struct_w_none`, + `bfcol_6` AS `col_list_none` +FROM `bfcte_0` +ORDER BY + `bfcol_7` ASC NULLS LAST \ No newline at end of file diff --git a/tests/unit/core/compile/sqlglot/test_compile_readlocal.py b/tests/unit/core/compile/sqlglot/test_compile_readlocal.py index 7307fd9b4e..c5fabd99e6 100644 --- a/tests/unit/core/compile/sqlglot/test_compile_readlocal.py +++ b/tests/unit/core/compile/sqlglot/test_compile_readlocal.py @@ -12,6 +12,9 @@ # See the License for the specific language governing permissions and # limitations under the License. +import sys + +import numpy as np import pandas as pd import pytest @@ -58,3 +61,23 @@ def test_compile_readlocal_w_json_df( ): bf_df = bpd.DataFrame(json_pandas_df, session=compiler_session_w_json_types) snapshot.assert_match(bf_df.sql, "out.sql") + + +def test_compile_readlocal_w_special_values( + compiler_session: bigframes.Session, snapshot +): + if sys.version_info < (3, 12): + pytest.skip("Skipping test due to inconsistent SQL formatting") + df = pd.DataFrame( + { + "col_none": [None, 1, 2], + "col_inf": [np.inf, 1.0, 2.0], + "col_neginf": [-np.inf, 1.0, 2.0], + "col_nan": [np.nan, 1.0, 2.0], + "col_struct_none": [None, {"foo": 1}, {"foo": 2}], + "col_struct_w_none": [{"foo": None}, {"foo": 1}, {"foo": 2}], + "col_list_none": [None, [1, 2], [3, 4]], + } + ) + bf_df = bpd.DataFrame(df, session=compiler_session) + snapshot.assert_match(bf_df.sql, "out.sql") From fd04e6ad7d052ac08d7ba425c44a76831db3f851 Mon Sep 17 00:00:00 2001 From: Shuowei Li Date: Fri, 19 Dec 2025 22:39:43 +0000 Subject: [PATCH 09/25] refactor: code refactor --- bigframes/dataframe.py | 141 +---------------------- bigframes/display/html.py | 162 ++++++++++++++++++++++++++- bigframes/series.py | 108 +----------------- tests/js/series_widget.test.js | 97 ---------------- tests/js/table_widget.test.js | 53 +++++++++ tests/system/small/test_anywidget.py | 15 ++- 6 files changed, 237 insertions(+), 339 deletions(-) delete mode 100644 tests/js/series_widget.test.js diff --git a/bigframes/dataframe.py b/bigframes/dataframe.py index 4d594ddfbc..df447b8bc7 100644 --- a/bigframes/dataframe.py +++ b/bigframes/dataframe.py @@ -23,7 +23,6 @@ import re import sys import textwrap -import traceback import typing from typing import ( Any, @@ -788,44 +787,18 @@ def __repr__(self) -> str: return object.__repr__(self) opts = bigframes.options.display - max_results = opts.max_rows if opts.repr_mode == "deferred": return formatter.repr_query_job(self._compute_dry_run()) - # TODO(swast): pass max_columns and get the true column count back. Maybe - # get 1 more column than we have requested so that pandas can add the - # ... for us? + max_results = opts.max_rows pandas_df, row_count, query_job = self._block.retrieve_repr_request_results( max_results ) self._set_internal_query_job(query_job) + from bigframes.display import html - column_count = len(pandas_df.columns) - - with display_options.pandas_repr(opts): - import pandas.io.formats - - # safe to mutate this, this dict is owned by this code, and does not affect global config - to_string_kwargs = ( - pandas.io.formats.format.get_dataframe_repr_params() # type: ignore - ) - if not self._has_index: - to_string_kwargs.update({"index": False}) - repr_string = pandas_df.to_string(**to_string_kwargs) - - # Modify the end of the string to reflect count. - lines = repr_string.split("\n") - pattern = re.compile("\\[[0-9]+ rows x [0-9]+ columns\\]") - if pattern.match(lines[-1]): - lines = lines[:-2] - - if row_count > len(lines) - 1: - lines.append("...") - - lines.append("") - lines.append(f"[{row_count} rows x {column_count} columns]") - return "\n".join(lines) + return html.create_text_representation(self, pandas_df, row_count) def _get_display_df_and_blob_cols(self) -> tuple[DataFrame, list[str]]: """Process blob columns for display.""" @@ -844,118 +817,14 @@ def _get_display_df_and_blob_cols(self) -> tuple[DataFrame, list[str]]: df[col] = df[col].blob._get_runtime(mode="R", with_metadata=True) return df, blob_cols - def _get_anywidget_bundle( - self, include=None, exclude=None - ) -> tuple[dict[str, Any], dict[str, Any]]: - """ - Helper method to create and return the anywidget mimebundle. - This function encapsulates the logic for anywidget display. - """ - from bigframes import display - - df, blob_cols = self._get_display_df_and_blob_cols() - - # Create and display the widget - widget = display.TableWidget(df) - widget_repr_result = widget._repr_mimebundle_(include=include, exclude=exclude) - - # Handle both tuple (data, metadata) and dict returns - if isinstance(widget_repr_result, tuple): - widget_repr, widget_metadata = widget_repr_result - else: - widget_repr = widget_repr_result - widget_metadata = {} - - widget_repr = dict(widget_repr) - - # At this point, we have already executed the query as part of the - # widget construction. Let's use the information available to render - # the HTML and plain text versions. - widget_repr["text/html"] = self._create_html_representation( - widget._cached_data, - widget.row_count, - len(self.columns), - blob_cols, - ) - - widget_repr["text/plain"] = self._create_text_representation( - widget._cached_data, widget.row_count - ) - - return widget_repr, widget_metadata - - def _create_text_representation( - self, pandas_df: pandas.DataFrame, total_rows: typing.Optional[int] - ) -> str: - """Create a text representation of the DataFrame.""" - opts = bigframes.options.display - with display_options.pandas_repr(opts): - import pandas.io.formats - - # safe to mutate this, this dict is owned by this code, and does not affect global config - to_string_kwargs = ( - pandas.io.formats.format.get_dataframe_repr_params() # type: ignore - ) - if not self._has_index: - to_string_kwargs.update({"index": False}) - - # We add our own dimensions string, so don't want pandas to. - to_string_kwargs.update({"show_dimensions": False}) - repr_string = pandas_df.to_string(**to_string_kwargs) - - lines = repr_string.split("\n") - - if total_rows is not None and total_rows > len(pandas_df): - lines.append("...") - - lines.append("") - column_count = len(self.columns) - lines.append(f"[{total_rows or '?'} rows x {column_count} columns]") - return "\n".join(lines) - def _repr_mimebundle_(self, include=None, exclude=None): """ Custom display method for IPython/Jupyter environments. This is called by IPython's display system when the object is displayed. """ - # TODO(b/467647693): Anywidget integration has been tested in Jupyter, VS Code, and - # BQ Studio, but there is a known compatibility issue with Marimo that needs to be addressed. - opts = bigframes.options.display - # Only handle widget display in anywidget mode - if opts.repr_mode == "anywidget": - try: - return self._get_anywidget_bundle(include=include, exclude=exclude) - - except ImportError: - # Anywidget is an optional dependency, so warn rather than fail. - # TODO(shuowei): When Anywidget becomes the default for all repr modes, - # remove this warning. - warnings.warn( - "Anywidget mode is not available. " - "Please `pip install anywidget traitlets` or `pip install 'bigframes[anywidget]'` to use interactive tables. " - f"Falling back to static HTML. Error: {traceback.format_exc()}" - ) - - # In non-anywidget mode, fetch data once and use it for both HTML - # and plain text representations to avoid multiple queries. - opts = bigframes.options.display - max_results = opts.max_rows - - df, blob_cols = self._get_display_df_and_blob_cols() - - pandas_df, row_count, query_job = df._block.retrieve_repr_request_results( - max_results - ) - self._set_internal_query_job(query_job) - column_count = len(pandas_df.columns) - - html_string = self._create_html_representation( - pandas_df, row_count, column_count, blob_cols - ) - - text_representation = self._create_text_representation(pandas_df, row_count) + from bigframes.display import html - return {"text/html": html_string, "text/plain": text_representation} + return html.repr_mimebundle(self, include=include, exclude=exclude) def _create_html_representation( self, diff --git a/bigframes/display/html.py b/bigframes/display/html.py index 101bd296f1..a2bbc0be8d 100644 --- a/bigframes/display/html.py +++ b/bigframes/display/html.py @@ -17,12 +17,18 @@ from __future__ import annotations import html -from typing import Any +import traceback +import typing +from typing import Any, Union +import warnings import pandas as pd import pandas.api.types -from bigframes._config import options +import bigframes +from bigframes._config import display_options, options +import bigframes.dataframe +import bigframes.series def _is_dtype_numeric(dtype: Any) -> bool: @@ -91,3 +97,155 @@ def render_html( table_html.append("") return "\n".join(table_html) + + +def create_text_representation( + obj: Union[bigframes.dataframe.DataFrame, bigframes.series.Series], + pandas_df: pd.DataFrame, + total_rows: typing.Optional[int], +) -> str: + """Create a text representation of the DataFrame or Series.""" + opts = bigframes.options.display + with display_options.pandas_repr(opts): + if isinstance(obj, bigframes.series.Series): + pd_series = pandas_df.iloc[:, 0] + if len(obj._block.index_columns) == 0: + repr_string = pd_series.to_string( + length=False, index=False, name=True, dtype=True + ) + else: + repr_string = pd_series.to_string(length=False, name=True, dtype=True) + else: + import pandas.io.formats + + to_string_kwargs = ( + pandas.io.formats.format.get_dataframe_repr_params() # type: ignore + ) + if not obj._has_index: + to_string_kwargs.update({"index": False}) + to_string_kwargs.update({"show_dimensions": False}) + repr_string = pandas_df.to_string(**to_string_kwargs) + + lines = repr_string.split("\n") + is_truncated = total_rows is not None and total_rows > len(pandas_df) + + if is_truncated: + lines.append("...") + lines.append("") # Add empty line for spacing only if truncated + if isinstance(obj, bigframes.series.Series): + lines.append(f"[{total_rows} rows]") + else: + column_count = len(obj.columns) + lines.append(f"[{total_rows or '?'} rows x {column_count} columns]") + elif isinstance(obj, bigframes.dataframe.DataFrame): + # For non-truncated DataFrames, we still need to add dimensions if show_dimensions was False + column_count = len(obj.columns) + lines.append("") + lines.append(f"[{total_rows or '?'} rows x {column_count} columns]") + + return "\n".join(lines) + + +def create_html_representation( + obj: Union[bigframes.dataframe.DataFrame, bigframes.series.Series], + pandas_df: pd.DataFrame, + total_rows: int, + total_columns: int, + blob_cols: list[str], +) -> str: + """Create an HTML representation of the DataFrame or Series.""" + if isinstance(obj, bigframes.series.Series): + pd_series = pandas_df.iloc[:, 0] + try: + html_string = pd_series._repr_html_() + except AttributeError: + html_string = f"
{pd_series.to_string()}
" + else: + html_string = obj._create_html_representation( + pandas_df, total_rows, total_columns, blob_cols + ) + return html_string + + +def get_anywidget_bundle( + obj: Union[bigframes.dataframe.DataFrame, bigframes.series.Series], + include=None, + exclude=None, +) -> tuple[dict[str, Any], dict[str, Any]]: + """ + Helper method to create and return the anywidget mimebundle. + This function encapsulates the logic for anywidget display. + """ + from bigframes import display + + if isinstance(obj, bigframes.series.Series): + df = obj.to_frame() + else: + df, blob_cols = obj._get_display_df_and_blob_cols() + + widget = display.TableWidget(df) + widget_repr_result = widget._repr_mimebundle_(include=include, exclude=exclude) + + if isinstance(widget_repr_result, tuple): + widget_repr, widget_metadata = widget_repr_result + else: + widget_repr = widget_repr_result + widget_metadata = {} + + widget_repr = dict(widget_repr) + + # Use cached data from widget to render HTML and plain text versions. + cached_pd = widget._cached_data + total_rows = widget.row_count + total_columns = len(df.columns) + + widget_repr["text/html"] = create_html_representation( + obj, + cached_pd, + total_rows, + total_columns, + blob_cols if "blob_cols" in locals() else [], + ) + widget_repr["text/plain"] = create_text_representation(obj, cached_pd, total_rows) + + return widget_repr, widget_metadata + + +def repr_mimebundle( + obj: Union[bigframes.dataframe.DataFrame, bigframes.series.Series], + include=None, + exclude=None, +): + """ + Custom display method for IPython/Jupyter environments. + """ + opts = bigframes.options.display + if opts.repr_mode == "anywidget": + try: + return get_anywidget_bundle(obj, include=include, exclude=exclude) + except ImportError: + warnings.warn( + "Anywidget mode is not available. " + "Please `pip install anywidget traitlets` or `pip install 'bigframes[anywidget]'` to use interactive tables. " + f"Falling back to static HTML. Error: {traceback.format_exc()}" + ) + + if isinstance(obj, bigframes.series.Series): + df = obj + blob_cols: list[str] = [] + else: + df, blob_cols = obj._get_display_df_and_blob_cols() + + pandas_df, row_count, query_job = df._block.retrieve_repr_request_results( + opts.max_rows + ) + obj._set_internal_query_job(query_job) + column_count = len(pandas_df.columns) + + html_string = create_html_representation( + obj, pandas_df, row_count, column_count, blob_cols + ) + + text_representation = create_text_representation(obj, pandas_df, row_count) + + return {"text/html": html_string, "text/plain": text_representation} diff --git a/bigframes/series.py b/bigframes/series.py index 9e94f04e24..663ee6e5a4 100644 --- a/bigframes/series.py +++ b/bigframes/series.py @@ -22,7 +22,6 @@ import itertools import numbers import textwrap -import traceback import typing from typing import ( Any, @@ -49,7 +48,6 @@ import pyarrow as pa import typing_extensions -import bigframes._config.display_options as display_options import bigframes.core from bigframes.core import agg_expressions, groupby, log_adapter import bigframes.core.block_transforms as block_ops @@ -570,104 +568,14 @@ def reset_index( block = block.assign_label(self._value_column, name) return bigframes.dataframe.DataFrame(block) - def _get_anywidget_bundle( - self, include=None, exclude=None - ) -> tuple[dict[str, Any], dict[str, Any]]: - """ - Helper method to create and return the anywidget mimebundle for Series. - """ - from bigframes import display - - # Convert Series to DataFrame for TableWidget - series_df = self.to_frame() - - # Create and display the widget - widget = display.TableWidget(series_df) - widget_repr_result = widget._repr_mimebundle_(include=include, exclude=exclude) - - # Handle both tuple (data, metadata) and dict returns - if isinstance(widget_repr_result, tuple): - widget_repr, widget_metadata = widget_repr_result - else: - widget_repr = widget_repr_result - widget_metadata = {} - - widget_repr = dict(widget_repr) - - # Add text representation - widget_repr["text/plain"] = self._create_text_representation( - widget._cached_data, widget.row_count - ) - - return widget_repr, widget_metadata - - def _create_text_representation( - self, pandas_df: pandas.DataFrame, total_rows: typing.Optional[int] - ) -> str: - """Create a text representation of the Series.""" - opts = bigframes.options.display - with display_options.pandas_repr(opts): - # Get the first column since Series DataFrame has only one column - pd_series = pandas_df.iloc[:, 0] - if len(self._block.index_columns) == 0: - repr_string = pd_series.to_string( - length=False, index=False, name=True, dtype=True - ) - else: - repr_string = pd_series.to_string(length=False, name=True, dtype=True) - - is_truncated = total_rows is not None and total_rows > len(pd_series) - - if is_truncated: - lines = repr_string.split("\n") - lines.append("...") - lines.append("") - lines.append(f"[{total_rows} rows]") - return "\n".join(lines) - else: - return repr_string - def _repr_mimebundle_(self, include=None, exclude=None): """ Custom display method for IPython/Jupyter environments. This is called by IPython's display system when the object is displayed. """ - opts = bigframes.options.display - - # Only handle widget display in anywidget mode - if opts.repr_mode == "anywidget": - try: - return self._get_anywidget_bundle(include=include, exclude=exclude) - - except ImportError: - # Anywidget is an optional dependency, so warn rather than fail. - warnings.warn( - "Anywidget mode is not available. " - "Please `pip install anywidget traitlets` or `pip install 'bigframes[anywidget]'` to use interactive tables. " - f"Falling back to static HTML. Error: {traceback.format_exc()}" - ) - # Fall back to regular HTML representation - pass - - # Continue with regular HTML rendering for non-anywidget modes - self._cached() - pandas_df, row_count, query_job = self._block.retrieve_repr_request_results( - opts.max_rows - ) - self._set_internal_query_job(query_job) - - pd_series = pandas_df.iloc[:, 0] + from bigframes.display import html - # Use pandas Series _repr_html_ if available, otherwise create basic HTML - try: - html_string = pd_series._repr_html_() - except AttributeError: - # Fallback for pandas versions without _repr_html_ - html_string = f"
{pd_series.to_string()}
" - - text_representation = self._create_text_representation(pandas_df, row_count) - - return {"text/html": html_string, "text/plain": text_representation} + return html.repr_mimebundle(self, include=include, exclude=exclude) def __repr__(self) -> str: # Protect against errors with uninitialized Series. See: @@ -675,24 +583,18 @@ def __repr__(self) -> str: if not hasattr(self, "_block"): return object.__repr__(self) - # TODO(swast): Add a timeout here? If the query is taking a long time, - # maybe we just print the job metadata that we have so far? - # TODO(swast): Avoid downloading the whole series by using job - # metadata, like we do with DataFrame. opts = bigframes.options.display - max_results = opts.max_rows - # anywdiget mode uses the same display logic as the "deferred" mode - # for faster execution if opts.repr_mode == "deferred": return formatter.repr_query_job(self._compute_dry_run()) self._cached() pandas_df, row_count, query_job = self._block.retrieve_repr_request_results( - max_results + opts.max_rows ) self._set_internal_query_job(query_job) + from bigframes.display import html - return self._create_text_representation(pandas_df, row_count) + return html.create_text_representation(self, pandas_df, row_count) def astype( self, diff --git a/tests/js/series_widget.test.js b/tests/js/series_widget.test.js deleted file mode 100644 index 3af2a14b52..0000000000 --- a/tests/js/series_widget.test.js +++ /dev/null @@ -1,97 +0,0 @@ -/** - * @jest-environment jsdom - */ - -// Copyright 2025 Google LLC -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -import { jest } from "@jest/globals"; -import "@testing-library/jest-dom"; - -describe("SeriesWidget", () => { - let model; - let el; - let render; - - beforeEach(async () => { - jest.resetModules(); - document.body.innerHTML = "
"; - el = document.body.querySelector("div"); - - const tableWidget = ( - await import("../../bigframes/display/table_widget.js") - ).default; - render = tableWidget.render; - - model = { - get: jest.fn(), - set: jest.fn(), - save_changes: jest.fn(), - on: jest.fn(), - }; - }); - - it("should render the series as a table with an index and one value column", () => { - // Mock the initial state - model.get.mockImplementation((property) => { - if (property === "table_html") { - return ` -
-
- - - - - - - - - - - - - - - - - -
value
0a
1b
-
-
`; - } - if (property === "orderable_columns") { - return []; - } - return null; - }); - - render({ model, el }); - - // Manually trigger the table_html change handler - const tableHtmlChangeHandler = model.on.mock.calls.find( - (call) => call[0] === "change:table_html", - )[1]; - tableHtmlChangeHandler(); - - // Check that the table has two columns - const headers = el.querySelectorAll( - ".paginated-table-container .col-header-name", - ); - expect(headers).toHaveLength(2); - - // Check that the headers are an empty string (for the index) and "value" - expect(headers[0].textContent).toBe(""); - expect(headers[1].textContent).toBe("value"); - }); -}); diff --git a/tests/js/table_widget.test.js b/tests/js/table_widget.test.js index 77ec7bcdd5..6b5dda48d1 100644 --- a/tests/js/table_widget.test.js +++ b/tests/js/table_widget.test.js @@ -206,4 +206,57 @@ describe("TableWidget", () => { expect(indicator2.textContent).toBe("●"); }); }); + + it("should render the series as a table with an index and one value column", () => { + // Mock the initial state + model.get.mockImplementation((property) => { + if (property === "table_html") { + return ` +
+
+ + + + + + + + + + + + + + + + + +
value
0a
1b
+
+
`; + } + if (property === "orderable_columns") { + return []; + } + return null; + }); + + render({ model, el }); + + // Manually trigger the table_html change handler + const tableHtmlChangeHandler = model.on.mock.calls.find( + (call) => call[0] === "change:table_html", + )[1]; + tableHtmlChangeHandler(); + + // Check that the table has two columns + const headers = el.querySelectorAll( + ".paginated-table-container .col-header-name", + ); + expect(headers).toHaveLength(2); + + // Check that the headers are an empty string (for the index) and "value" + expect(headers[0].textContent).toBe(""); + expect(headers[1].textContent).toBe("value"); + }); }); diff --git a/tests/system/small/test_anywidget.py b/tests/system/small/test_anywidget.py index db40ec8ee8..854e693fdf 100644 --- a/tests/system/small/test_anywidget.py +++ b/tests/system/small/test_anywidget.py @@ -942,6 +942,19 @@ def test_repr_mimebundle_should_return_widget_view_if_anywidget_is_available( assert "text/plain" in data +def test_repr_in_anywidget_mode_should_not_be_deferred( + paginated_bf_df: bf.dataframe.DataFrame, +): + """ + Test that repr(df) is not deferred in anywidget mode. + This is to ensure that print(df) works as expected. + """ + with bigframes.option_context("display.repr_mode", "anywidget"): + representation = repr(paginated_bf_df) + assert "Computation deferred" not in representation + assert "page_1_row_1" in representation + + def test_dataframe_repr_mimebundle_should_return_widget_with_metadata_in_anywidget_mode( monkeypatch: pytest.MonkeyPatch, session: bigframes.Session, # Add session as a fixture @@ -971,7 +984,7 @@ def test_dataframe_repr_mimebundle_should_return_widget_with_metadata_in_anywidg # Patch the class method directly with mock.patch( - "bigframes.dataframe.DataFrame._get_anywidget_bundle", + "bigframes.display.html.get_anywidget_bundle", return_value=mock_get_anywidget_bundle_return_value, ): result = test_df._repr_mimebundle_() From 4825aeba032ed718d9a53b557cb6c07d3178e127 Mon Sep 17 00:00:00 2001 From: Shuowei Li Date: Fri, 19 Dec 2025 22:50:13 +0000 Subject: [PATCH 10/25] fix: fix mypy --- bigframes/display/html.py | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/bigframes/display/html.py b/bigframes/display/html.py index a2bbc0be8d..85744a8b7f 100644 --- a/bigframes/display/html.py +++ b/bigframes/display/html.py @@ -230,15 +230,18 @@ def repr_mimebundle( f"Falling back to static HTML. Error: {traceback.format_exc()}" ) + blob_cols: list[str] if isinstance(obj, bigframes.series.Series): - df = obj - blob_cols: list[str] = [] + pandas_df, row_count, query_job = obj._block.retrieve_repr_request_results( + opts.max_rows + ) + blob_cols = [] else: df, blob_cols = obj._get_display_df_and_blob_cols() + pandas_df, row_count, query_job = df._block.retrieve_repr_request_results( + opts.max_rows + ) - pandas_df, row_count, query_job = df._block.retrieve_repr_request_results( - opts.max_rows - ) obj._set_internal_query_job(query_job) column_count = len(pandas_df.columns) From d36fc0fe15986a10b579ddda089ecf8ebb129fbc Mon Sep 17 00:00:00 2001 From: Shuowei Li Date: Mon, 22 Dec 2025 22:27:07 +0000 Subject: [PATCH 11/25] refactor: move code to plaintext file and add checks --- bigframes/display/plaintext.py | 35 ++++++++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) create mode 100644 bigframes/display/plaintext.py diff --git a/bigframes/display/plaintext.py b/bigframes/display/plaintext.py new file mode 100644 index 0000000000..f96e5571dd --- /dev/null +++ b/bigframes/display/plaintext.py @@ -0,0 +1,35 @@ +# Copyright 2024 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Plaintext display representations.""" + +from __future__ import annotations + +import typing + +if typing.TYPE_CHECKING: + import pandas as pd + + import bigframes + + +def create_text_representation( + obj: "bigframes.dataframe.DataFrame" | "bigframes.series.Series", + pandas_df: "pd.DataFrame", + total_rows: int | None, +) -> str: + """Create a text representation of the DataFrame or Series.""" + # TODO(swast): This module should probably just be removed and combined + # with the html module. + return obj._create_text_representation(pandas_df, total_rows) From 593f9aedc207192a45c8689740d2960f43841da1 Mon Sep 17 00:00:00 2001 From: Shuowei Li Date: Mon, 22 Dec 2025 22:29:42 +0000 Subject: [PATCH 12/25] refactor: move code to plaintext file and add checks --- bigframes/dataframe.py | 38 +++++++++++-- bigframes/display/html.py | 109 +++++++++++++++----------------------- bigframes/series.py | 53 ++++++++++++++++-- 3 files changed, 127 insertions(+), 73 deletions(-) diff --git a/bigframes/dataframe.py b/bigframes/dataframe.py index df447b8bc7..f24402ee96 100644 --- a/bigframes/dataframe.py +++ b/bigframes/dataframe.py @@ -796,9 +796,7 @@ def __repr__(self) -> str: ) self._set_internal_query_job(query_job) - from bigframes.display import html - - return html.create_text_representation(self, pandas_df, row_count) + return self._create_text_representation(pandas_df, row_count) def _get_display_df_and_blob_cols(self) -> tuple[DataFrame, list[str]]: """Process blob columns for display.""" @@ -826,6 +824,40 @@ def _repr_mimebundle_(self, include=None, exclude=None): return html.repr_mimebundle(self, include=include, exclude=exclude) + def _create_text_representation( + self, + pandas_df: pandas.DataFrame, + total_rows: typing.Optional[int], + ) -> str: + """Create a text representation of the DataFrame.""" + opts = bigframes.options.display + with display_options.pandas_repr(opts): + import pandas.io.formats + + to_string_kwargs = ( + pandas.io.formats.format.get_dataframe_repr_params() # type: ignore + ) + if not self._has_index: + to_string_kwargs.update({"index": False}) + to_string_kwargs.update({"show_dimensions": False}) + repr_string = pandas_df.to_string(**to_string_kwargs) + + lines = repr_string.split("\n") + is_truncated = total_rows is not None and total_rows > len(pandas_df) + + if is_truncated: + lines.append("...") + lines.append("") # Add empty line for spacing only if truncated + column_count = len(self.columns) + lines.append(f"[{total_rows or '?'} rows x {column_count} columns]") + else: + # For non-truncated DataFrames, we still need to add dimensions if show_dimensions was False + column_count = len(self.columns) + lines.append("") + lines.append(f"[{total_rows or '?'} rows x {column_count} columns]") + + return "\n".join(lines) + def _create_html_representation( self, pandas_df: pandas.DataFrame, diff --git a/bigframes/display/html.py b/bigframes/display/html.py index 85744a8b7f..64abb1c1e2 100644 --- a/bigframes/display/html.py +++ b/bigframes/display/html.py @@ -18,7 +18,6 @@ import html import traceback -import typing from typing import Any, Union import warnings @@ -26,8 +25,10 @@ import pandas.api.types import bigframes -from bigframes._config import display_options, options +from bigframes._config import options import bigframes.dataframe +from bigframes.display import plaintext +import bigframes.formatting_helpers import bigframes.series @@ -99,53 +100,6 @@ def render_html( return "\n".join(table_html) -def create_text_representation( - obj: Union[bigframes.dataframe.DataFrame, bigframes.series.Series], - pandas_df: pd.DataFrame, - total_rows: typing.Optional[int], -) -> str: - """Create a text representation of the DataFrame or Series.""" - opts = bigframes.options.display - with display_options.pandas_repr(opts): - if isinstance(obj, bigframes.series.Series): - pd_series = pandas_df.iloc[:, 0] - if len(obj._block.index_columns) == 0: - repr_string = pd_series.to_string( - length=False, index=False, name=True, dtype=True - ) - else: - repr_string = pd_series.to_string(length=False, name=True, dtype=True) - else: - import pandas.io.formats - - to_string_kwargs = ( - pandas.io.formats.format.get_dataframe_repr_params() # type: ignore - ) - if not obj._has_index: - to_string_kwargs.update({"index": False}) - to_string_kwargs.update({"show_dimensions": False}) - repr_string = pandas_df.to_string(**to_string_kwargs) - - lines = repr_string.split("\n") - is_truncated = total_rows is not None and total_rows > len(pandas_df) - - if is_truncated: - lines.append("...") - lines.append("") # Add empty line for spacing only if truncated - if isinstance(obj, bigframes.series.Series): - lines.append(f"[{total_rows} rows]") - else: - column_count = len(obj.columns) - lines.append(f"[{total_rows or '?'} rows x {column_count} columns]") - elif isinstance(obj, bigframes.dataframe.DataFrame): - # For non-truncated DataFrames, we still need to add dimensions if show_dimensions was False - column_count = len(obj.columns) - lines.append("") - lines.append(f"[{total_rows or '?'} rows x {column_count} columns]") - - return "\n".join(lines) - - def create_html_representation( obj: Union[bigframes.dataframe.DataFrame, bigframes.series.Series], pandas_df: pd.DataFrame, @@ -154,17 +108,9 @@ def create_html_representation( blob_cols: list[str], ) -> str: """Create an HTML representation of the DataFrame or Series.""" - if isinstance(obj, bigframes.series.Series): - pd_series = pandas_df.iloc[:, 0] - try: - html_string = pd_series._repr_html_() - except AttributeError: - html_string = f"
{pd_series.to_string()}
" - else: - html_string = obj._create_html_representation( - pandas_df, total_rows, total_columns, blob_cols - ) - return html_string + return obj._create_html_representation( + pandas_df, total_rows, total_columns, blob_cols + ) def get_anywidget_bundle( @@ -206,19 +152,19 @@ def get_anywidget_bundle( total_columns, blob_cols if "blob_cols" in locals() else [], ) - widget_repr["text/plain"] = create_text_representation(obj, cached_pd, total_rows) + widget_repr["text/plain"] = plaintext.create_text_representation( + obj, cached_pd, total_rows + ) return widget_repr, widget_metadata -def repr_mimebundle( +def repr_mimebundle_head( obj: Union[bigframes.dataframe.DataFrame, bigframes.series.Series], include=None, exclude=None, ): - """ - Custom display method for IPython/Jupyter environments. - """ + """Mimebundle display for the start of the data.""" opts = bigframes.options.display if opts.repr_mode == "anywidget": try: @@ -249,6 +195,37 @@ def repr_mimebundle( obj, pandas_df, row_count, column_count, blob_cols ) - text_representation = create_text_representation(obj, pandas_df, row_count) + text_representation = plaintext.create_text_representation( + obj, pandas_df, row_count + ) return {"text/html": html_string, "text/plain": text_representation} + + +def repr_mimebundle_deferred( + obj: Union[bigframes.dataframe.DataFrame, bigframes.series.Series], + include=None, + exclude=None, +): + """Mimebundle display for deferred execution mode.""" + # We don't need the mimetype for the deferred case, but we need to match + # the signature of the other repr_mimebundle_* methods. + # TODO(swast): Add an HTML representation for deferred queries that simply + # prints the SQL. + query_job = obj._compute_dry_run() + text_representation = bigframes.formatting_helpers.repr_query_job(query_job) + return {"text/plain": text_representation} + + +def repr_mimebundle( + obj: Union[bigframes.dataframe.DataFrame, bigframes.series.Series], + include=None, + exclude=None, +): + """ + Custom display method for IPython/Jupyter environments. + """ + if options.display.repr_mode == "deferred": + return repr_mimebundle_deferred(obj, include=include, exclude=exclude) + else: + return repr_mimebundle_head(obj, include=include, exclude=exclude) diff --git a/bigframes/series.py b/bigframes/series.py index 663ee6e5a4..51a6681fd1 100644 --- a/bigframes/series.py +++ b/bigframes/series.py @@ -577,6 +577,54 @@ def _repr_mimebundle_(self, include=None, exclude=None): return html.repr_mimebundle(self, include=include, exclude=exclude) + def _create_text_representation( + self, + pandas_df: pandas.DataFrame, + total_rows: typing.Optional[int], + ) -> str: + """Create a text representation of the Series.""" + opts = bigframes.options.display + with bigframes._config.display_options.pandas_repr(opts): + pd_series = pandas_df.iloc[:, 0] + if len(self._block.index_columns) == 0: + repr_string = pd_series.to_string( + length=False, index=False, name=True, dtype=True + ) + else: + repr_string = pd_series.to_string(length=False, name=True, dtype=True) + + lines = repr_string.split("\n") + is_truncated = total_rows is not None and total_rows > len(pandas_df) + + if is_truncated: + lines.append("...") + lines.append("") # Add empty line for spacing only if truncated + lines.append(f"[{total_rows} rows]") + + return "\n".join(lines) + + def _create_html_representation( + self, + pandas_df: pandas.DataFrame, + total_rows: int, + total_columns: int, + blob_cols: typing.List[str], + ) -> str: + """Create an HTML representation of the Series.""" + pd_series = pandas_df.iloc[:, 0] + try: + # Not all pandas Series have a _repr_html_ method, so we fall back + # to a simple text representation in an HTML
 tag.
+            html_string = pd_series._repr_html_()
+        except AttributeError:
+            html_string = f"
{pd_series.to_string()}
" + + is_truncated = total_rows is not None and total_rows > len(pandas_df) + if is_truncated: + html_string += f"

... [{total_rows} rows in total]

" + + return html_string + def __repr__(self) -> str: # Protect against errors with uninitialized Series. See: # https://github.com/googleapis/python-bigquery-dataframes/issues/728 @@ -587,14 +635,11 @@ def __repr__(self) -> str: if opts.repr_mode == "deferred": return formatter.repr_query_job(self._compute_dry_run()) - self._cached() pandas_df, row_count, query_job = self._block.retrieve_repr_request_results( opts.max_rows ) self._set_internal_query_job(query_job) - from bigframes.display import html - - return html.create_text_representation(self, pandas_df, row_count) + return self._create_text_representation(pandas_df, row_count) def astype( self, From 400ea073b0d9b00fe6974698f153e01ba933b53f Mon Sep 17 00:00:00 2001 From: Shuowei Li Date: Mon, 22 Dec 2025 22:37:13 +0000 Subject: [PATCH 13/25] Revert "refactor: move code to plaintext file and add checks" This reverts commit 593f9aedc207192a45c8689740d2960f43841da1. --- bigframes/dataframe.py | 38 ++----------- bigframes/display/html.py | 109 +++++++++++++++++++++++--------------- bigframes/series.py | 53 ++---------------- 3 files changed, 73 insertions(+), 127 deletions(-) diff --git a/bigframes/dataframe.py b/bigframes/dataframe.py index f24402ee96..df447b8bc7 100644 --- a/bigframes/dataframe.py +++ b/bigframes/dataframe.py @@ -796,7 +796,9 @@ def __repr__(self) -> str: ) self._set_internal_query_job(query_job) - return self._create_text_representation(pandas_df, row_count) + from bigframes.display import html + + return html.create_text_representation(self, pandas_df, row_count) def _get_display_df_and_blob_cols(self) -> tuple[DataFrame, list[str]]: """Process blob columns for display.""" @@ -824,40 +826,6 @@ def _repr_mimebundle_(self, include=None, exclude=None): return html.repr_mimebundle(self, include=include, exclude=exclude) - def _create_text_representation( - self, - pandas_df: pandas.DataFrame, - total_rows: typing.Optional[int], - ) -> str: - """Create a text representation of the DataFrame.""" - opts = bigframes.options.display - with display_options.pandas_repr(opts): - import pandas.io.formats - - to_string_kwargs = ( - pandas.io.formats.format.get_dataframe_repr_params() # type: ignore - ) - if not self._has_index: - to_string_kwargs.update({"index": False}) - to_string_kwargs.update({"show_dimensions": False}) - repr_string = pandas_df.to_string(**to_string_kwargs) - - lines = repr_string.split("\n") - is_truncated = total_rows is not None and total_rows > len(pandas_df) - - if is_truncated: - lines.append("...") - lines.append("") # Add empty line for spacing only if truncated - column_count = len(self.columns) - lines.append(f"[{total_rows or '?'} rows x {column_count} columns]") - else: - # For non-truncated DataFrames, we still need to add dimensions if show_dimensions was False - column_count = len(self.columns) - lines.append("") - lines.append(f"[{total_rows or '?'} rows x {column_count} columns]") - - return "\n".join(lines) - def _create_html_representation( self, pandas_df: pandas.DataFrame, diff --git a/bigframes/display/html.py b/bigframes/display/html.py index 64abb1c1e2..85744a8b7f 100644 --- a/bigframes/display/html.py +++ b/bigframes/display/html.py @@ -18,6 +18,7 @@ import html import traceback +import typing from typing import Any, Union import warnings @@ -25,10 +26,8 @@ import pandas.api.types import bigframes -from bigframes._config import options +from bigframes._config import display_options, options import bigframes.dataframe -from bigframes.display import plaintext -import bigframes.formatting_helpers import bigframes.series @@ -100,6 +99,53 @@ def render_html( return "\n".join(table_html) +def create_text_representation( + obj: Union[bigframes.dataframe.DataFrame, bigframes.series.Series], + pandas_df: pd.DataFrame, + total_rows: typing.Optional[int], +) -> str: + """Create a text representation of the DataFrame or Series.""" + opts = bigframes.options.display + with display_options.pandas_repr(opts): + if isinstance(obj, bigframes.series.Series): + pd_series = pandas_df.iloc[:, 0] + if len(obj._block.index_columns) == 0: + repr_string = pd_series.to_string( + length=False, index=False, name=True, dtype=True + ) + else: + repr_string = pd_series.to_string(length=False, name=True, dtype=True) + else: + import pandas.io.formats + + to_string_kwargs = ( + pandas.io.formats.format.get_dataframe_repr_params() # type: ignore + ) + if not obj._has_index: + to_string_kwargs.update({"index": False}) + to_string_kwargs.update({"show_dimensions": False}) + repr_string = pandas_df.to_string(**to_string_kwargs) + + lines = repr_string.split("\n") + is_truncated = total_rows is not None and total_rows > len(pandas_df) + + if is_truncated: + lines.append("...") + lines.append("") # Add empty line for spacing only if truncated + if isinstance(obj, bigframes.series.Series): + lines.append(f"[{total_rows} rows]") + else: + column_count = len(obj.columns) + lines.append(f"[{total_rows or '?'} rows x {column_count} columns]") + elif isinstance(obj, bigframes.dataframe.DataFrame): + # For non-truncated DataFrames, we still need to add dimensions if show_dimensions was False + column_count = len(obj.columns) + lines.append("") + lines.append(f"[{total_rows or '?'} rows x {column_count} columns]") + + return "\n".join(lines) + + def create_html_representation( obj: Union[bigframes.dataframe.DataFrame, bigframes.series.Series], pandas_df: pd.DataFrame, @@ -108,9 +154,17 @@ def create_html_representation( blob_cols: list[str], ) -> str: """Create an HTML representation of the DataFrame or Series.""" - return obj._create_html_representation( - pandas_df, total_rows, total_columns, blob_cols - ) + if isinstance(obj, bigframes.series.Series): + pd_series = pandas_df.iloc[:, 0] + try: + html_string = pd_series._repr_html_() + except AttributeError: + html_string = f"
{pd_series.to_string()}
" + else: + html_string = obj._create_html_representation( + pandas_df, total_rows, total_columns, blob_cols + ) + return html_string def get_anywidget_bundle( @@ -152,19 +206,19 @@ def get_anywidget_bundle( total_columns, blob_cols if "blob_cols" in locals() else [], ) - widget_repr["text/plain"] = plaintext.create_text_representation( - obj, cached_pd, total_rows - ) + widget_repr["text/plain"] = create_text_representation(obj, cached_pd, total_rows) return widget_repr, widget_metadata -def repr_mimebundle_head( +def repr_mimebundle( obj: Union[bigframes.dataframe.DataFrame, bigframes.series.Series], include=None, exclude=None, ): - """Mimebundle display for the start of the data.""" + """ + Custom display method for IPython/Jupyter environments. + """ opts = bigframes.options.display if opts.repr_mode == "anywidget": try: @@ -195,37 +249,6 @@ def repr_mimebundle_head( obj, pandas_df, row_count, column_count, blob_cols ) - text_representation = plaintext.create_text_representation( - obj, pandas_df, row_count - ) + text_representation = create_text_representation(obj, pandas_df, row_count) return {"text/html": html_string, "text/plain": text_representation} - - -def repr_mimebundle_deferred( - obj: Union[bigframes.dataframe.DataFrame, bigframes.series.Series], - include=None, - exclude=None, -): - """Mimebundle display for deferred execution mode.""" - # We don't need the mimetype for the deferred case, but we need to match - # the signature of the other repr_mimebundle_* methods. - # TODO(swast): Add an HTML representation for deferred queries that simply - # prints the SQL. - query_job = obj._compute_dry_run() - text_representation = bigframes.formatting_helpers.repr_query_job(query_job) - return {"text/plain": text_representation} - - -def repr_mimebundle( - obj: Union[bigframes.dataframe.DataFrame, bigframes.series.Series], - include=None, - exclude=None, -): - """ - Custom display method for IPython/Jupyter environments. - """ - if options.display.repr_mode == "deferred": - return repr_mimebundle_deferred(obj, include=include, exclude=exclude) - else: - return repr_mimebundle_head(obj, include=include, exclude=exclude) diff --git a/bigframes/series.py b/bigframes/series.py index 51a6681fd1..663ee6e5a4 100644 --- a/bigframes/series.py +++ b/bigframes/series.py @@ -577,54 +577,6 @@ def _repr_mimebundle_(self, include=None, exclude=None): return html.repr_mimebundle(self, include=include, exclude=exclude) - def _create_text_representation( - self, - pandas_df: pandas.DataFrame, - total_rows: typing.Optional[int], - ) -> str: - """Create a text representation of the Series.""" - opts = bigframes.options.display - with bigframes._config.display_options.pandas_repr(opts): - pd_series = pandas_df.iloc[:, 0] - if len(self._block.index_columns) == 0: - repr_string = pd_series.to_string( - length=False, index=False, name=True, dtype=True - ) - else: - repr_string = pd_series.to_string(length=False, name=True, dtype=True) - - lines = repr_string.split("\n") - is_truncated = total_rows is not None and total_rows > len(pandas_df) - - if is_truncated: - lines.append("...") - lines.append("") # Add empty line for spacing only if truncated - lines.append(f"[{total_rows} rows]") - - return "\n".join(lines) - - def _create_html_representation( - self, - pandas_df: pandas.DataFrame, - total_rows: int, - total_columns: int, - blob_cols: typing.List[str], - ) -> str: - """Create an HTML representation of the Series.""" - pd_series = pandas_df.iloc[:, 0] - try: - # Not all pandas Series have a _repr_html_ method, so we fall back - # to a simple text representation in an HTML
 tag.
-            html_string = pd_series._repr_html_()
-        except AttributeError:
-            html_string = f"
{pd_series.to_string()}
" - - is_truncated = total_rows is not None and total_rows > len(pandas_df) - if is_truncated: - html_string += f"

... [{total_rows} rows in total]

" - - return html_string - def __repr__(self) -> str: # Protect against errors with uninitialized Series. See: # https://github.com/googleapis/python-bigquery-dataframes/issues/728 @@ -635,11 +587,14 @@ def __repr__(self) -> str: if opts.repr_mode == "deferred": return formatter.repr_query_job(self._compute_dry_run()) + self._cached() pandas_df, row_count, query_job = self._block.retrieve_repr_request_results( opts.max_rows ) self._set_internal_query_job(query_job) - return self._create_text_representation(pandas_df, row_count) + from bigframes.display import html + + return html.create_text_representation(self, pandas_df, row_count) def astype( self, From a4746063aaf852bb7c29d40fa5058a8d294b52e6 Mon Sep 17 00:00:00 2001 From: Shuowei Li Date: Tue, 23 Dec 2025 02:11:03 +0000 Subject: [PATCH 14/25] refactor: move create_text_representation to plaintext.py --- bigframes/dataframe.py | 38 +++++++++++++++- bigframes/display/html.py | 82 +++++++++++----------------------- bigframes/display/plaintext.py | 12 ++--- bigframes/series.py | 30 ++++++++++++- 4 files changed, 98 insertions(+), 64 deletions(-) diff --git a/bigframes/dataframe.py b/bigframes/dataframe.py index df447b8bc7..26ebade907 100644 --- a/bigframes/dataframe.py +++ b/bigframes/dataframe.py @@ -796,9 +796,9 @@ def __repr__(self) -> str: ) self._set_internal_query_job(query_job) - from bigframes.display import html + from bigframes.display import plaintext - return html.create_text_representation(self, pandas_df, row_count) + return plaintext.create_text_representation(self, pandas_df, row_count) def _get_display_df_and_blob_cols(self) -> tuple[DataFrame, list[str]]: """Process blob columns for display.""" @@ -826,6 +826,40 @@ def _repr_mimebundle_(self, include=None, exclude=None): return html.repr_mimebundle(self, include=include, exclude=exclude) + def _create_text_representation( + self, + pandas_df: pandas.DataFrame, + total_rows: typing.Optional[int], + ) -> str: + """Create a text representation of the DataFrame.""" + opts = bigframes.options.display + with display_options.pandas_repr(opts): + import pandas.io.formats + + to_string_kwargs = ( + pandas.io.formats.format.get_dataframe_repr_params() # type: ignore + ) + if not self._has_index: + to_string_kwargs.update({"index": False}) + to_string_kwargs.update({"show_dimensions": False}) + repr_string = pandas_df.to_string(**to_string_kwargs) + + lines = repr_string.split("\n") + is_truncated = total_rows is not None and total_rows > len(pandas_df) + + if is_truncated: + lines.append("...") + lines.append("") # Add empty line for spacing only if truncated + column_count = len(self.columns) + lines.append(f"[{total_rows or '?'} rows x {column_count} columns]") + else: + # For non-truncated DataFrames, we still need to add dimensions if show_dimensions was False + column_count = len(self.columns) + lines.append("") + lines.append(f"[{total_rows or '?'} rows x {column_count} columns]") + + return "\n".join(lines) + def _create_html_representation( self, pandas_df: pandas.DataFrame, diff --git a/bigframes/display/html.py b/bigframes/display/html.py index 85744a8b7f..fad6d55df5 100644 --- a/bigframes/display/html.py +++ b/bigframes/display/html.py @@ -26,9 +26,12 @@ import pandas.api.types import bigframes -from bigframes._config import display_options, options -import bigframes.dataframe -import bigframes.series +from bigframes._config import options +from bigframes.display import plaintext + +if typing.TYPE_CHECKING: + import bigframes.dataframe + import bigframes.series def _is_dtype_numeric(dtype: Any) -> bool: @@ -99,53 +102,6 @@ def render_html( return "\n".join(table_html) -def create_text_representation( - obj: Union[bigframes.dataframe.DataFrame, bigframes.series.Series], - pandas_df: pd.DataFrame, - total_rows: typing.Optional[int], -) -> str: - """Create a text representation of the DataFrame or Series.""" - opts = bigframes.options.display - with display_options.pandas_repr(opts): - if isinstance(obj, bigframes.series.Series): - pd_series = pandas_df.iloc[:, 0] - if len(obj._block.index_columns) == 0: - repr_string = pd_series.to_string( - length=False, index=False, name=True, dtype=True - ) - else: - repr_string = pd_series.to_string(length=False, name=True, dtype=True) - else: - import pandas.io.formats - - to_string_kwargs = ( - pandas.io.formats.format.get_dataframe_repr_params() # type: ignore - ) - if not obj._has_index: - to_string_kwargs.update({"index": False}) - to_string_kwargs.update({"show_dimensions": False}) - repr_string = pandas_df.to_string(**to_string_kwargs) - - lines = repr_string.split("\n") - is_truncated = total_rows is not None and total_rows > len(pandas_df) - - if is_truncated: - lines.append("...") - lines.append("") # Add empty line for spacing only if truncated - if isinstance(obj, bigframes.series.Series): - lines.append(f"[{total_rows} rows]") - else: - column_count = len(obj.columns) - lines.append(f"[{total_rows or '?'} rows x {column_count} columns]") - elif isinstance(obj, bigframes.dataframe.DataFrame): - # For non-truncated DataFrames, we still need to add dimensions if show_dimensions was False - column_count = len(obj.columns) - lines.append("") - lines.append(f"[{total_rows or '?'} rows x {column_count} columns]") - - return "\n".join(lines) - - def create_html_representation( obj: Union[bigframes.dataframe.DataFrame, bigframes.series.Series], pandas_df: pd.DataFrame, @@ -154,13 +110,22 @@ def create_html_representation( blob_cols: list[str], ) -> str: """Create an HTML representation of the DataFrame or Series.""" - if isinstance(obj, bigframes.series.Series): + # Note: We need to import Series here to avoid circular imports, but only if we use isinstance. + # To check if it is a Series without importing, we can check if it has the _repr_html_ method + # or rely on duck typing. However, the original code used isinstance. + # Let's import inside the function if needed, or rely on attribute checks. + # But wait, type checking imports are not available at runtime. + # We can check __class__.__name__ or similar, or just import locally. + from bigframes.series import Series + + if isinstance(obj, Series): pd_series = pandas_df.iloc[:, 0] try: html_string = pd_series._repr_html_() except AttributeError: html_string = f"
{pd_series.to_string()}
" else: + # It's a DataFrame html_string = obj._create_html_representation( pandas_df, total_rows, total_columns, blob_cols ) @@ -177,8 +142,9 @@ def get_anywidget_bundle( This function encapsulates the logic for anywidget display. """ from bigframes import display + from bigframes.series import Series - if isinstance(obj, bigframes.series.Series): + if isinstance(obj, Series): df = obj.to_frame() else: df, blob_cols = obj._get_display_df_and_blob_cols() @@ -206,7 +172,9 @@ def get_anywidget_bundle( total_columns, blob_cols if "blob_cols" in locals() else [], ) - widget_repr["text/plain"] = create_text_representation(obj, cached_pd, total_rows) + widget_repr["text/plain"] = plaintext.create_text_representation( + obj, cached_pd, total_rows + ) return widget_repr, widget_metadata @@ -219,6 +187,8 @@ def repr_mimebundle( """ Custom display method for IPython/Jupyter environments. """ + from bigframes.series import Series + opts = bigframes.options.display if opts.repr_mode == "anywidget": try: @@ -231,7 +201,7 @@ def repr_mimebundle( ) blob_cols: list[str] - if isinstance(obj, bigframes.series.Series): + if isinstance(obj, Series): pandas_df, row_count, query_job = obj._block.retrieve_repr_request_results( opts.max_rows ) @@ -249,6 +219,8 @@ def repr_mimebundle( obj, pandas_df, row_count, column_count, blob_cols ) - text_representation = create_text_representation(obj, pandas_df, row_count) + text_representation = plaintext.create_text_representation( + obj, pandas_df, row_count + ) return {"text/html": html_string, "text/plain": text_representation} diff --git a/bigframes/display/plaintext.py b/bigframes/display/plaintext.py index f96e5571dd..e378a59d92 100644 --- a/bigframes/display/plaintext.py +++ b/bigframes/display/plaintext.py @@ -1,4 +1,4 @@ -# Copyright 2024 Google LLC +# Copyright 2025 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -17,17 +17,19 @@ from __future__ import annotations import typing +from typing import Union if typing.TYPE_CHECKING: import pandas as pd - import bigframes + import bigframes.dataframe + import bigframes.series def create_text_representation( - obj: "bigframes.dataframe.DataFrame" | "bigframes.series.Series", - pandas_df: "pd.DataFrame", - total_rows: int | None, + obj: Union[bigframes.dataframe.DataFrame, bigframes.series.Series], + pandas_df: pd.DataFrame, + total_rows: typing.Optional[int], ) -> str: """Create a text representation of the DataFrame or Series.""" # TODO(swast): This module should probably just be removed and combined diff --git a/bigframes/series.py b/bigframes/series.py index 663ee6e5a4..8623e2a68f 100644 --- a/bigframes/series.py +++ b/bigframes/series.py @@ -568,6 +568,32 @@ def reset_index( block = block.assign_label(self._value_column, name) return bigframes.dataframe.DataFrame(block) + def _create_text_representation( + self, + pandas_df: pandas.DataFrame, + total_rows: typing.Optional[int], + ) -> str: + """Create a text representation of the Series.""" + opts = bigframes.options.display + with bigframes._config.display_options.pandas_repr(opts): + pd_series = pandas_df.iloc[:, 0] + if len(self._block.index_columns) == 0: + repr_string = pd_series.to_string( + length=False, index=False, name=True, dtype=True + ) + else: + repr_string = pd_series.to_string(length=False, name=True, dtype=True) + + lines = repr_string.split("\n") + is_truncated = total_rows is not None and total_rows > len(pandas_df) + + if is_truncated: + lines.append("...") + lines.append("") # Add empty line for spacing only if truncated + lines.append(f"[{total_rows} rows]") + + return "\n".join(lines) + def _repr_mimebundle_(self, include=None, exclude=None): """ Custom display method for IPython/Jupyter environments. @@ -592,9 +618,9 @@ def __repr__(self) -> str: opts.max_rows ) self._set_internal_query_job(query_job) - from bigframes.display import html + from bigframes.display import plaintext - return html.create_text_representation(self, pandas_df, row_count) + return plaintext.create_text_representation(self, pandas_df, row_count) def astype( self, From bd5699279ad9ae3a2b48a586b978c1cf2ee85352 Mon Sep 17 00:00:00 2001 From: Shuowei Li Date: Tue, 23 Dec 2025 02:30:17 +0000 Subject: [PATCH 15/25] refactor: move display logic to display/plaintext.py and display/html.py --- bigframes/dataframe.py | 95 ++-------------------------------- bigframes/display/html.py | 70 ++++++++++++++++++++----- bigframes/display/plaintext.py | 60 +++++++++++++++++++-- bigframes/series.py | 30 ++--------- 4 files changed, 122 insertions(+), 133 deletions(-) diff --git a/bigframes/dataframe.py b/bigframes/dataframe.py index 26ebade907..a74602620f 100644 --- a/bigframes/dataframe.py +++ b/bigframes/dataframe.py @@ -19,7 +19,6 @@ import datetime import inspect import itertools -import json import re import sys import textwrap @@ -54,7 +53,6 @@ import pyarrow import tabulate -import bigframes._config.display_options as display_options import bigframes.constants import bigframes.core from bigframes.core import agg_expressions, log_adapter @@ -790,6 +788,9 @@ def __repr__(self) -> str: if opts.repr_mode == "deferred": return formatter.repr_query_job(self._compute_dry_run()) + # TODO(swast): pass max_columns and get the true column count back. Maybe + # get 1 more column than we have requested so that pandas can add the + # ... for us? max_results = opts.max_rows pandas_df, row_count, query_job = self._block.retrieve_repr_request_results( max_results @@ -826,96 +827,6 @@ def _repr_mimebundle_(self, include=None, exclude=None): return html.repr_mimebundle(self, include=include, exclude=exclude) - def _create_text_representation( - self, - pandas_df: pandas.DataFrame, - total_rows: typing.Optional[int], - ) -> str: - """Create a text representation of the DataFrame.""" - opts = bigframes.options.display - with display_options.pandas_repr(opts): - import pandas.io.formats - - to_string_kwargs = ( - pandas.io.formats.format.get_dataframe_repr_params() # type: ignore - ) - if not self._has_index: - to_string_kwargs.update({"index": False}) - to_string_kwargs.update({"show_dimensions": False}) - repr_string = pandas_df.to_string(**to_string_kwargs) - - lines = repr_string.split("\n") - is_truncated = total_rows is not None and total_rows > len(pandas_df) - - if is_truncated: - lines.append("...") - lines.append("") # Add empty line for spacing only if truncated - column_count = len(self.columns) - lines.append(f"[{total_rows or '?'} rows x {column_count} columns]") - else: - # For non-truncated DataFrames, we still need to add dimensions if show_dimensions was False - column_count = len(self.columns) - lines.append("") - lines.append(f"[{total_rows or '?'} rows x {column_count} columns]") - - return "\n".join(lines) - - def _create_html_representation( - self, - pandas_df: pandas.DataFrame, - row_count: int, - column_count: int, - blob_cols: list[str], - ) -> str: - """Create an HTML representation of the DataFrame.""" - opts = bigframes.options.display - with display_options.pandas_repr(opts): - # TODO(shuowei, b/464053870): Escaping HTML would be useful, but - # `escape=False` is needed to show images. We may need to implement - # a full-fledged repr module to better support types not in pandas. - if bigframes.options.display.blob_display and blob_cols: - - def obj_ref_rt_to_html(obj_ref_rt) -> str: - obj_ref_rt_json = json.loads(obj_ref_rt) - obj_ref_details = obj_ref_rt_json["objectref"]["details"] - if "gcs_metadata" in obj_ref_details: - gcs_metadata = obj_ref_details["gcs_metadata"] - content_type = typing.cast( - str, gcs_metadata.get("content_type", "") - ) - if content_type.startswith("image"): - size_str = "" - if bigframes.options.display.blob_display_width: - size_str = f' width="{bigframes.options.display.blob_display_width}"' - if bigframes.options.display.blob_display_height: - size_str = ( - size_str - + f' height="{bigframes.options.display.blob_display_height}"' - ) - url = obj_ref_rt_json["access_urls"]["read_url"] - return f'' - - return f'uri: {obj_ref_rt_json["objectref"]["uri"]}, authorizer: {obj_ref_rt_json["objectref"]["authorizer"]}' - - formatters = {blob_col: obj_ref_rt_to_html for blob_col in blob_cols} - - # set max_colwidth so not to truncate the image url - with pandas.option_context("display.max_colwidth", None): - html_string = pandas_df.to_html( - escape=False, - notebook=True, - max_rows=pandas.get_option("display.max_rows"), - max_cols=pandas.get_option("display.max_columns"), - show_dimensions=pandas.get_option("display.show_dimensions"), - formatters=formatters, # type: ignore - ) - else: - # _repr_html_ stub is missing so mypy thinks it's a Series. Ignore mypy. - html_string = pandas_df._repr_html_() # type:ignore - - html_string += f"[{row_count} rows x {column_count} columns in total]" - return html_string - def __delitem__(self, key: str): df = self.drop(columns=[key]) self._set_block(df._get_block()) diff --git a/bigframes/display/html.py b/bigframes/display/html.py index fad6d55df5..675db47bc6 100644 --- a/bigframes/display/html.py +++ b/bigframes/display/html.py @@ -17,6 +17,7 @@ from __future__ import annotations import html +import json import traceback import typing from typing import Any, Union @@ -26,7 +27,7 @@ import pandas.api.types import bigframes -from bigframes._config import options +from bigframes._config import display_options, options from bigframes.display import plaintext if typing.TYPE_CHECKING: @@ -110,12 +111,6 @@ def create_html_representation( blob_cols: list[str], ) -> str: """Create an HTML representation of the DataFrame or Series.""" - # Note: We need to import Series here to avoid circular imports, but only if we use isinstance. - # To check if it is a Series without importing, we can check if it has the _repr_html_ method - # or rely on duck typing. However, the original code used isinstance. - # Let's import inside the function if needed, or rely on attribute checks. - # But wait, type checking imports are not available at runtime. - # We can check __class__.__name__ or similar, or just import locally. from bigframes.series import Series if isinstance(obj, Series): @@ -124,12 +119,63 @@ def create_html_representation( html_string = pd_series._repr_html_() except AttributeError: html_string = f"
{pd_series.to_string()}
" + + # Series doesn't typically show total rows/cols like DF in HTML repr here? + # But let's check what it was doing. + # Original code just returned _repr_html_ or wrapped to_string. + # It didn't append row/col count string for Series (wait, Series usually has length in repr). + return html_string else: # It's a DataFrame - html_string = obj._create_html_representation( - pandas_df, total_rows, total_columns, blob_cols - ) - return html_string + opts = options.display + with display_options.pandas_repr(opts): + # TODO(shuowei, b/464053870): Escaping HTML would be useful, but + # `escape=False` is needed to show images. We may need to implement + # a full-fledged repr module to better support types not in pandas. + if options.display.blob_display and blob_cols: + + def obj_ref_rt_to_html(obj_ref_rt) -> str: + obj_ref_rt_json = json.loads(obj_ref_rt) + obj_ref_details = obj_ref_rt_json["objectref"]["details"] + if "gcs_metadata" in obj_ref_details: + gcs_metadata = obj_ref_details["gcs_metadata"] + content_type = typing.cast( + str, gcs_metadata.get("content_type", "") + ) + if content_type.startswith("image"): + size_str = "" + if options.display.blob_display_width: + size_str = ( + f' width="{options.display.blob_display_width}"' + ) + if options.display.blob_display_height: + size_str = ( + size_str + + f' height="{options.display.blob_display_height}"' + ) + url = obj_ref_rt_json["access_urls"]["read_url"] + return f'' + + return f'uri: {obj_ref_rt_json["objectref"]["uri"]}, authorizer: {obj_ref_rt_json["objectref"]["authorizer"]}' + + formatters = {blob_col: obj_ref_rt_to_html for blob_col in blob_cols} + + # set max_colwidth so not to truncate the image url + with pandas.option_context("display.max_colwidth", None): + html_string = pandas_df.to_html( + escape=False, + notebook=True, + max_rows=pandas.get_option("display.max_rows"), + max_cols=pandas.get_option("display.max_columns"), + show_dimensions=pandas.get_option("display.show_dimensions"), + formatters=formatters, # type: ignore + ) + else: + # _repr_html_ stub is missing so mypy thinks it's a Series. Ignore mypy. + html_string = pandas_df._repr_html_() # type:ignore + + html_string += f"[{total_rows} rows x {total_columns} columns in total]" + return html_string def get_anywidget_bundle( @@ -189,7 +235,7 @@ def repr_mimebundle( """ from bigframes.series import Series - opts = bigframes.options.display + opts = options.display if opts.repr_mode == "anywidget": try: return get_anywidget_bundle(obj, include=include, exclude=exclude) diff --git a/bigframes/display/plaintext.py b/bigframes/display/plaintext.py index e378a59d92..4c175606f2 100644 --- a/bigframes/display/plaintext.py +++ b/bigframes/display/plaintext.py @@ -19,6 +19,11 @@ import typing from typing import Union +import pandas +import pandas.io.formats + +from bigframes._config import display_options, options + if typing.TYPE_CHECKING: import pandas as pd @@ -32,6 +37,55 @@ def create_text_representation( total_rows: typing.Optional[int], ) -> str: """Create a text representation of the DataFrame or Series.""" - # TODO(swast): This module should probably just be removed and combined - # with the html module. - return obj._create_text_representation(pandas_df, total_rows) + from bigframes.series import Series + + opts = options.display + + if isinstance(obj, Series): + with display_options.pandas_repr(opts): + pd_series = pandas_df.iloc[:, 0] + if len(obj._block.index_columns) == 0: + repr_string = pd_series.to_string( + length=False, index=False, name=True, dtype=True + ) + else: + repr_string = pd_series.to_string(length=False, name=True, dtype=True) + + lines = repr_string.split("\n") + is_truncated = total_rows is not None and total_rows > len(pandas_df) + + if is_truncated: + lines.append("...") + lines.append("") # Add empty line for spacing only if truncated + lines.append(f"[{total_rows} rows]") + + return "\n".join(lines) + + else: + # DataFrame + with display_options.pandas_repr(opts): + # safe to mutate this, this dict is owned by this code, and does not affect global config + to_string_kwargs = ( + pandas.io.formats.format.get_dataframe_repr_params() # type: ignore + ) + if not obj._has_index: + to_string_kwargs.update({"index": False}) + + # We add our own dimensions string, so don't want pandas to. + to_string_kwargs.update({"show_dimensions": False}) + repr_string = pandas_df.to_string(**to_string_kwargs) + + lines = repr_string.split("\n") + is_truncated = total_rows is not None and total_rows > len(pandas_df) + + if is_truncated: + lines.append("...") + lines.append("") # Add empty line for spacing only if truncated + column_count = len(obj.columns) + lines.append(f"[{total_rows or '?'} rows x {column_count} columns]") + else: + # For non-truncated DataFrames, we still need to add dimensions if show_dimensions was False + column_count = len(obj.columns) + lines.append("") + lines.append(f"[{total_rows or '?'} rows x {column_count} columns]") + return "\n".join(lines) diff --git a/bigframes/series.py b/bigframes/series.py index 8623e2a68f..82ef6687b7 100644 --- a/bigframes/series.py +++ b/bigframes/series.py @@ -568,32 +568,6 @@ def reset_index( block = block.assign_label(self._value_column, name) return bigframes.dataframe.DataFrame(block) - def _create_text_representation( - self, - pandas_df: pandas.DataFrame, - total_rows: typing.Optional[int], - ) -> str: - """Create a text representation of the Series.""" - opts = bigframes.options.display - with bigframes._config.display_options.pandas_repr(opts): - pd_series = pandas_df.iloc[:, 0] - if len(self._block.index_columns) == 0: - repr_string = pd_series.to_string( - length=False, index=False, name=True, dtype=True - ) - else: - repr_string = pd_series.to_string(length=False, name=True, dtype=True) - - lines = repr_string.split("\n") - is_truncated = total_rows is not None and total_rows > len(pandas_df) - - if is_truncated: - lines.append("...") - lines.append("") # Add empty line for spacing only if truncated - lines.append(f"[{total_rows} rows]") - - return "\n".join(lines) - def _repr_mimebundle_(self, include=None, exclude=None): """ Custom display method for IPython/Jupyter environments. @@ -609,6 +583,10 @@ def __repr__(self) -> str: if not hasattr(self, "_block"): return object.__repr__(self) + # TODO(swast): Add a timeout here? If the query is taking a long time, + # maybe we just print the job metadata that we have so far? + # TODO(swast): Avoid downloading the whole series by using job + # metadata, like we do with DataFrame. opts = bigframes.options.display if opts.repr_mode == "deferred": return formatter.repr_query_job(self._compute_dry_run()) From 971ee3311f9660b843a20ca8c4ab1fb24b010d96 Mon Sep 17 00:00:00 2001 From: Shuowei Li Date: Tue, 23 Dec 2025 02:48:39 +0000 Subject: [PATCH 16/25] refactor: restore original order of max_results in __repr__ --- bigframes/dataframe.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bigframes/dataframe.py b/bigframes/dataframe.py index a74602620f..e528cb1565 100644 --- a/bigframes/dataframe.py +++ b/bigframes/dataframe.py @@ -785,13 +785,13 @@ def __repr__(self) -> str: return object.__repr__(self) opts = bigframes.options.display + max_results = opts.max_rows if opts.repr_mode == "deferred": return formatter.repr_query_job(self._compute_dry_run()) # TODO(swast): pass max_columns and get the true column count back. Maybe # get 1 more column than we have requested so that pandas can add the # ... for us? - max_results = opts.max_rows pandas_df, row_count, query_job = self._block.retrieve_repr_request_results( max_results ) From 1a73628569611cca5849551602906b1f3329be88 Mon Sep 17 00:00:00 2001 From: Shuowei Li Date: Tue, 23 Dec 2025 02:53:53 +0000 Subject: [PATCH 17/25] docs: add todo back --- bigframes/dataframe.py | 2 ++ bigframes/display/html.py | 5 +++++ bigframes/series.py | 2 ++ 3 files changed, 9 insertions(+) diff --git a/bigframes/dataframe.py b/bigframes/dataframe.py index e528cb1565..a77845235f 100644 --- a/bigframes/dataframe.py +++ b/bigframes/dataframe.py @@ -823,6 +823,8 @@ def _repr_mimebundle_(self, include=None, exclude=None): Custom display method for IPython/Jupyter environments. This is called by IPython's display system when the object is displayed. """ + # TODO(b/467647693): Anywidget integration has been tested in Jupyter, VS Code, and + # BQ Studio, but there is a known compatibility issue with Marimo that needs to be addressed. from bigframes.display import html return html.repr_mimebundle(self, include=include, exclude=exclude) diff --git a/bigframes/display/html.py b/bigframes/display/html.py index 675db47bc6..45e7feac9d 100644 --- a/bigframes/display/html.py +++ b/bigframes/display/html.py @@ -233,6 +233,8 @@ def repr_mimebundle( """ Custom display method for IPython/Jupyter environments. """ + # TODO(b/467647693): Anywidget integration has been tested in Jupyter, VS Code, and + # BQ Studio, but there is a known compatibility issue with Marimo that needs to be addressed. from bigframes.series import Series opts = options.display @@ -240,6 +242,9 @@ def repr_mimebundle( try: return get_anywidget_bundle(obj, include=include, exclude=exclude) except ImportError: + # Anywidget is an optional dependency, so warn rather than fail. + # TODO(shuowei): When Anywidget becomes the default for all repr modes, + # remove this warning. warnings.warn( "Anywidget mode is not available. " "Please `pip install anywidget traitlets` or `pip install 'bigframes[anywidget]'` to use interactive tables. " diff --git a/bigframes/series.py b/bigframes/series.py index 82ef6687b7..80020096bb 100644 --- a/bigframes/series.py +++ b/bigframes/series.py @@ -573,6 +573,8 @@ def _repr_mimebundle_(self, include=None, exclude=None): Custom display method for IPython/Jupyter environments. This is called by IPython's display system when the object is displayed. """ + # TODO(b/467647693): Anywidget integration has been tested in Jupyter, VS Code, and + # BQ Studio, but there is a known compatibility issue with Marimo that needs to be addressed. from bigframes.display import html return html.repr_mimebundle(self, include=include, exclude=exclude) From 1b7952b3edc48ef972c725b29225940715df1975 Mon Sep 17 00:00:00 2001 From: Shuowei Li Date: Tue, 23 Dec 2025 03:03:20 +0000 Subject: [PATCH 18/25] refactor: split repr_mimebundle logic, handle deferred mode in html, and fix mypy errors --- bigframes/display/html.py | 73 +++++++++++++++++++++------------ bigframes/formatting_helpers.py | 46 +++++++++++++++++++-- 2 files changed, 90 insertions(+), 29 deletions(-) diff --git a/bigframes/display/html.py b/bigframes/display/html.py index 45e7feac9d..85be7d1e73 100644 --- a/bigframes/display/html.py +++ b/bigframes/display/html.py @@ -29,6 +29,7 @@ import bigframes from bigframes._config import display_options, options from bigframes.display import plaintext +import bigframes.formatting_helpers as formatter if typing.TYPE_CHECKING: import bigframes.dataframe @@ -114,16 +115,16 @@ def create_html_representation( from bigframes.series import Series if isinstance(obj, Series): + # Fallback to pandas string representation if the object is not a Series. + # This protects against cases where obj might be something else unexpectedly, + # or if the pandas Series implementation changes. pd_series = pandas_df.iloc[:, 0] try: html_string = pd_series._repr_html_() except AttributeError: html_string = f"
{pd_series.to_string()}
" - # Series doesn't typically show total rows/cols like DF in HTML repr here? - # But let's check what it was doing. - # Original code just returned _repr_html_ or wrapped to_string. - # It didn't append row/col count string for Series (wait, Series usually has length in repr). + html_string += f"[{total_rows} rows]" return html_string else: # It's a DataFrame @@ -225,32 +226,21 @@ def get_anywidget_bundle( return widget_repr, widget_metadata -def repr_mimebundle( +def _repr_mimebundle_deferred( obj: Union[bigframes.dataframe.DataFrame, bigframes.series.Series], - include=None, - exclude=None, -): - """ - Custom display method for IPython/Jupyter environments. - """ - # TODO(b/467647693): Anywidget integration has been tested in Jupyter, VS Code, and - # BQ Studio, but there is a known compatibility issue with Marimo that needs to be addressed. +) -> dict[str, str]: + return { + "text/plain": formatter.repr_query_job(obj._compute_dry_run()), + "text/html": formatter.repr_query_job_html(obj._compute_dry_run()), + } + + +def _repr_mimebundle_head( + obj: Union[bigframes.dataframe.DataFrame, bigframes.series.Series], +) -> dict[str, str]: from bigframes.series import Series opts = options.display - if opts.repr_mode == "anywidget": - try: - return get_anywidget_bundle(obj, include=include, exclude=exclude) - except ImportError: - # Anywidget is an optional dependency, so warn rather than fail. - # TODO(shuowei): When Anywidget becomes the default for all repr modes, - # remove this warning. - warnings.warn( - "Anywidget mode is not available. " - "Please `pip install anywidget traitlets` or `pip install 'bigframes[anywidget]'` to use interactive tables. " - f"Falling back to static HTML. Error: {traceback.format_exc()}" - ) - blob_cols: list[str] if isinstance(obj, Series): pandas_df, row_count, query_job = obj._block.retrieve_repr_request_results( @@ -275,3 +265,34 @@ def repr_mimebundle( ) return {"text/html": html_string, "text/plain": text_representation} + + +def repr_mimebundle( + obj: Union[bigframes.dataframe.DataFrame, bigframes.series.Series], + include=None, + exclude=None, +): + """ + Custom display method for IPython/Jupyter environments. + """ + # TODO(b/467647693): Anywidget integration has been tested in Jupyter, VS Code, and + # BQ Studio, but there is a known compatibility issue with Marimo that needs to be addressed. + + opts = options.display + if opts.repr_mode == "deferred": + return _repr_mimebundle_deferred(obj) + + if opts.repr_mode == "anywidget": + try: + return get_anywidget_bundle(obj, include=include, exclude=exclude) + except ImportError: + # Anywidget is an optional dependency, so warn rather than fail. + # TODO(shuowei): When Anywidget becomes the default for all repr modes, + # remove this warning. + warnings.warn( + "Anywidget mode is not available. " + "Please `pip install anywidget traitlets` or `pip install 'bigframes[anywidget]'` to use interactive tables. " + f"Falling back to static HTML. Error: {traceback.format_exc()}" + ) + + return _repr_mimebundle_head(obj) diff --git a/bigframes/formatting_helpers.py b/bigframes/formatting_helpers.py index 55731069a3..3c37a3470d 100644 --- a/bigframes/formatting_helpers.py +++ b/bigframes/formatting_helpers.py @@ -68,7 +68,7 @@ def repr_query_job(query_job: Optional[bigquery.QueryJob]): query_job: The job representing the execution of the query on the server. Returns: - Pywidget html table. + Formatted string. """ if query_job is None: return "No job information available" @@ -94,6 +94,46 @@ def repr_query_job(query_job: Optional[bigquery.QueryJob]): return res +def repr_query_job_html(query_job: Optional[bigquery.QueryJob]): + """Return query job as a formatted html string. + Args: + query_job: + The job representing the execution of the query on the server. + Returns: + Html string. + """ + if query_job is None: + return "No job information available" + if query_job.dry_run: + return f"Computation deferred. Computation will process {get_formatted_bytes(query_job.total_bytes_processed)}" + + # We can reuse the plaintext repr for now or make a nicer table. + # For deferred mode consistency, let's just wrap the text in a pre block or similar, + # but the request implies we want a distinct HTML representation if possible. + # However, existing repr_query_job returns a simple string. + # Let's format it as a simple table or list. + + res = "

Query Job Info

    " + for key, value in query_job_prop_pairs.items(): + job_val = getattr(query_job, value) + if job_val is not None: + if key == "Job Id": # add link to job + url = get_job_url( + project_id=query_job.project, + location=query_job.location, + job_id=query_job.job_id, + ) + res += f'
  • Job: {query_job.job_id}
  • ' + elif key == "Slot Time": + res += f"
  • {key}: {get_formatted_time(job_val)}
  • " + elif key == "Bytes Processed": + res += f"
  • {key}: {get_formatted_bytes(job_val)}
  • " + else: + res += f"
  • {key}: {job_val}
  • " + res += "
" + return res + + current_display: Optional[display.HTML] = None current_display_id: Optional[str] = None previous_display_html: str = "" @@ -296,7 +336,7 @@ def get_job_url( """ if project_id is None or location is None or job_id is None: return None - return f"""https://console.cloud.google.com/bigquery?project={project_id}&j=bq:{location}:{job_id}&page=queryresults""" + return f"""https://console.cloud. google.com/bigquery?project={project_id}&j=bq:{location}:{job_id}&page=queryresults""" def render_bqquery_sent_event_html( @@ -508,7 +548,7 @@ def get_base_job_loading_html(job: GenericJob): Returns: Html string. """ - return f"""{job.job_type.capitalize()} job {job.job_id} is {job.state}. Date: Tue, 23 Dec 2025 03:10:37 +0000 Subject: [PATCH 20/25] style: fix repr_mimebundle docstring formatting --- bigframes/display/html.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/bigframes/display/html.py b/bigframes/display/html.py index 5119bed7b5..1a4cd99cff 100644 --- a/bigframes/display/html.py +++ b/bigframes/display/html.py @@ -272,9 +272,7 @@ def repr_mimebundle( include=None, exclude=None, ): - """ - Custom display method for IPython/Jupyter environments. - """ + """Custom display method for IPython/Jupyter environments.""" # TODO(b/467647693): Anywidget integration has been tested in Jupyter, VS Code, and # BQ Studio, but there is a known compatibility issue with Marimo that needs to be addressed. From 9fea10ea0fe5917ff5ecf1fb0635d0bbcbc66d64 Mon Sep 17 00:00:00 2001 From: Shuowei Li Date: Tue, 23 Dec 2025 03:13:09 +0000 Subject: [PATCH 21/25] docs: update anywidget demo notebook with series display showcase --- notebooks/dataframes/anywidget_mode.ipynb | 936 +--------------------- 1 file changed, 6 insertions(+), 930 deletions(-) diff --git a/notebooks/dataframes/anywidget_mode.ipynb b/notebooks/dataframes/anywidget_mode.ipynb index b400fc5f9c..26acd57465 100644 --- a/notebooks/dataframes/anywidget_mode.ipynb +++ b/notebooks/dataframes/anywidget_mode.ipynb @@ -27,7 +27,7 @@ "id": "acca43ae", "metadata": {}, "source": [ - "# Demo to Show Anywidget mode" + "# Demo: Interactive Analysis with Anywidget" ] }, { @@ -45,933 +45,9 @@ "id": "04406a4d", "metadata": {}, "source": [ - "This notebook demonstrates the anywidget display mode, which provides an interactive table experience.\n", - "Key features include:\n", - "- **Column Sorting:** Click on column headers to sort data in ascending, descending, or unsorted states.\n", - "- **Adjustable Column Widths:** Drag the dividers between column headers to resize columns." - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "id": "1bc5aaf3", - "metadata": {}, - "outputs": [], - "source": [ - "bpd.options.bigquery.ordering_mode = \"partial\"\n", - "bpd.options.display.repr_mode = \"anywidget\"" - ] - }, - { - "cell_type": "markdown", - "id": "0a354c69", - "metadata": {}, - "source": [ - "Load Sample Data" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "id": "f289d250", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "✅ Completed. " - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "✅ Completed. \n", - " Query processed 0 Bytes in a moment of slot time.\n", - " " - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "state gender year name number\n", - " AL F 1910 Hazel 51\n", - " AL F 1910 Lucy 76\n", - " AR F 1910 Nellie 39\n", - " AR F 1910 Lena 40\n", - " CO F 1910 Thelma 36\n", - " CO F 1910 Ruth 68\n", - " CT F 1910 Elizabeth 86\n", - " DC F 1910 Mary 80\n", - " FL F 1910 Annie 101\n", - " FL F 1910 Alma 39\n", - "...\n", - "\n", - "[5552452 rows x 5 columns]\n" - ] - } - ], - "source": [ - "df = bpd.read_gbq(\"bigquery-public-data.usa_names.usa_1910_2013\")\n", - "print(df)" - ] - }, - { - "cell_type": "markdown", - "id": "3a73e472", - "metadata": {}, - "source": [ - "Display Series in anywidget mode" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "id": "42bb02ab", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "✅ Completed. \n", - " Query processed 171.4 MB in 39 seconds of slot time. [Job bigframes-dev:US.7cbc5c5a-3096-4bfb-9f68-79bdcad9b406 details]\n", - " " - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "✅ Completed. \n", - " Query processed 88.8 MB in a moment of slot time.\n", - " " - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "✅ Completed. " - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "1910\n", - "1910\n", - "1910\n", - "1910\n", - "1910\n", - "1910\n", - "1910\n", - "1910\n", - "1910\n", - "1910\n", - "Name: year, dtype: Int64\n", - "...\n", - "\n", - "[5552452 rows]\n" - ] - } - ], - "source": [ - "test_series = df[\"year\"]\n", - "print(test_series)" - ] - }, - { - "cell_type": "markdown", - "id": "7bcf1bb7", - "metadata": {}, - "source": [ - "Display with Pagination" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "id": "71fa52ec", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "✅ Completed. \n", - " Query processed 215.9 MB in 9 seconds of slot time. [Job bigframes-dev:US.job_IU7vod4QK1ou1TlbpmFQbTDG4ITH details]\n", - " " - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "✅ Completed. \n", - " Query processed 215.9 MB in 8 seconds of slot time. [Job bigframes-dev:US.job_fAljrol-9EGnTEYZEyllaW36FOI6 details]\n", - " " - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "b007377c75cd46f68924e271074d52ff", - "version_major": 2, - "version_minor": 1 - }, - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
stategenderyearnamenumber
0ALF1910Sadie40
1ALF1910Mary875
2ARF1910Vera39
3ARF1910Marie78
4ARF1910Lucille66
5CAF1910Virginia101
6DCF1910Margaret72
7GAF1910Mildred133
8GAF1910Vera51
9GAF1910Sallie92
\n", - "

10 rows × 5 columns

\n", - "
[5552452 rows x 5 columns in total]" - ], - "text/plain": [ - "state gender year name number\n", - " AL F 1910 Sadie 40\n", - " AL F 1910 Mary 875\n", - " AR F 1910 Vera 39\n", - " AR F 1910 Marie 78\n", - " AR F 1910 Lucille 66\n", - " CA F 1910 Virginia 101\n", - " DC F 1910 Margaret 72\n", - " GA F 1910 Mildred 133\n", - " GA F 1910 Vera 51\n", - " GA F 1910 Sallie 92\n", - "...\n", - "\n", - "[5552452 rows x 5 columns]" - ] - }, - "execution_count": 6, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "df" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "id": "da23e0f3", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "✅ Completed. \n", - " Query processed 88.8 MB in 2 seconds of slot time. [Job bigframes-dev:US.job_m5DlrymiC8C75Ky06_gYCa1zNOO- details]\n", - " " - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "✅ Completed. \n", - " Query processed 88.8 MB in 3 seconds of slot time. [Job bigframes-dev:US.job_GZiZyMOq33ShjyrL5iHd4-SbvYFP details]\n", - " " - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "4ca161fae40b46f5af9dd05a18a31f8b", - "version_major": 2, - "version_minor": 1 - }, - "text/plain": [ - "1910\n", - "1910\n", - "1910\n", - "1910\n", - "1910\n", - "1910\n", - "1910\n", - "1910\n", - "1910\n", - "1910\n", - "Name: year, dtype: Int64\n", - "...\n", - "\n", - "[5552452 rows]" - ] - }, - "execution_count": 7, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "test_series" - ] - }, - { - "cell_type": "markdown", - "id": "sorting-intro", - "metadata": {}, - "source": [ - "### Sorting by Single-Column\n", - "You can sort the table by clicking on the headers of columns that have orderable data types (like numbers, strings, and dates). Non-orderable columns (like arrays or structs) do not have sorting controls.\n", + "This notebook demonstrates the **anywidget** display mode for BigQuery DataFrames. This mode provides an interactive table experience for exploring your data directly within the notebook.\n", "\n", - "**Sorting indicators (▲, ▼) are always visible for sorted columns. The unsorted indicator (●) is only visible when you hover over an unsorted column header.** The sorting control cycles through three states:\n", - "- **Unsorted (no indicator by default, ● on hover):** The default state. Click the header to sort in ascending order.\n", - "- **Ascending (▲):** The data is sorted from smallest to largest. Click again to sort in descending order.\n", - "- **Descending (▼):** The data is sorted from largest to smallest. Click again to return to the unsorted state." - ] - }, - { - "cell_type": "markdown", - "id": "adjustable-width-intro", - "metadata": {}, - "source": [ - "### Adjustable Column Widths\n", - "You can easily adjust the width of any column in the table. Simply hover your mouse over the vertical dividers between column headers. When the cursor changes to a resize icon, click and drag to expand or shrink the column to your desired width. This allows for better readability and customization of your table view." - ] - }, - { - "cell_type": "markdown", - "id": "bb15bab6", - "metadata": {}, - "source": [ - "Programmatic Navigation Demo" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "id": "6920d49b", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "✅ Completed. \n", - " Query processed 215.9 MB in 9 seconds of slot time. [Job bigframes-dev:US.job_VHargXKi-0r5rTHw8MpLsH9SAHn4 details]\n", - " " - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "✅ Completed. \n", - " Query processed 215.9 MB in 8 seconds of slot time. [Job bigframes-dev:US.job_u35bTRyfTtiSX6aILqN44S5OJZMh details]\n", - " " - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Total pages: 555246\n" - ] - }, - { - "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "db36d4303b4a47479e2e6a92eba4f814", - "version_major": 2, - "version_minor": 1 - }, - "text/plain": [ - "" - ] - }, - "execution_count": 8, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "from bigframes.display.anywidget import TableWidget\n", - "import math\n", - " \n", - "# Create widget programmatically \n", - "widget = TableWidget(df)\n", - "print(f\"Total pages: {math.ceil(widget.row_count / widget.page_size)}\")\n", - " \n", - "# Display the widget\n", - "widget" - ] - }, - { - "cell_type": "markdown", - "id": "02cbd1be", - "metadata": {}, - "source": [ - "Test Navigation Programmatically" - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "id": "12b68f15", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Current page: 0\n", - "After next: 1\n", - "After prev: 0\n" - ] - } - ], - "source": [ - "# Simulate button clicks programmatically\n", - "print(\"Current page:\", widget.page)\n", - "\n", - "# Go to next page\n", - "widget.page = 1\n", - "print(\"After next:\", widget.page)\n", - "\n", - "# Go to previous page\n", - "widget.page = 0\n", - "print(\"After prev:\", widget.page)" - ] - }, - { - "cell_type": "markdown", - "id": "9d310138", - "metadata": {}, - "source": [ - "Edge Case Demonstration" - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "id": "a9d5d13a", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "✅ Completed. \n", - " Query processed 215.9 MB in a moment of slot time.\n", - " " - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "✅ Completed. \n", - " Query processed 215.9 MB in a moment of slot time.\n", - " " - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Small dataset pages: 1\n" - ] - }, - { - "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "46a644fa01a84ef18de5c7b7379322f1", - "version_major": 2, - "version_minor": 1 - }, - "text/plain": [ - "" - ] - }, - "execution_count": 10, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# Test with very small dataset\n", - "small_df = df.sort_values([\"name\", \"year\", \"state\"]).head(5)\n", - "small_widget = TableWidget(small_df)\n", - "print(f\"Small dataset pages: {math.ceil(small_widget.row_count / small_widget.page_size)}\")\n", - "small_widget" - ] - }, - { - "cell_type": "markdown", - "id": "added-cell-2", - "metadata": {}, - "source": [ - "### Displaying Generative AI results containing JSON\n", - "The `AI.GENERATE` function in BigQuery returns results in a JSON column. While BigQuery's JSON type is not natively supported by the underlying Arrow `to_pandas_batches()` method used in anywidget mode ([Apache Arrow issue #45262](https://github.com/apache/arrow/issues/45262)), BigQuery Dataframes automatically converts JSON columns to strings for display. This allows you to view the results of generative AI functions seamlessly." - ] - }, - { - "cell_type": "code", - "execution_count": 11, - "id": "added-cell-1", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "✅ Completed. \n", - " Query processed 85.9 kB in 18 seconds of slot time.\n", - " " - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/usr/local/google/home/shuowei/src/python-bigquery-dataframes/bigframes/dtypes.py:987: JSONDtypeWarning: JSON columns will be represented as pandas.ArrowDtype(pyarrow.json_())\n", - "instead of using `db_dtypes` in the future when available in pandas\n", - "(https://github.com/pandas-dev/pandas/issues/60958) and pyarrow.\n", - " warnings.warn(msg, bigframes.exceptions.JSONDtypeWarning)\n" - ] - }, - { - "data": { - "text/html": [ - "✅ Completed. " - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "✅ Completed. " - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/usr/local/google/home/shuowei/src/python-bigquery-dataframes/bigframes/dtypes.py:987: JSONDtypeWarning: JSON columns will be represented as pandas.ArrowDtype(pyarrow.json_())\n", - "instead of using `db_dtypes` in the future when available in pandas\n", - "(https://github.com/pandas-dev/pandas/issues/60958) and pyarrow.\n", - " warnings.warn(msg, bigframes.exceptions.JSONDtypeWarning)\n", - "/usr/local/google/home/shuowei/src/python-bigquery-dataframes/bigframes/dtypes.py:987: JSONDtypeWarning: JSON columns will be represented as pandas.ArrowDtype(pyarrow.json_())\n", - "instead of using `db_dtypes` in the future when available in pandas\n", - "(https://github.com/pandas-dev/pandas/issues/60958) and pyarrow.\n", - " warnings.warn(msg, bigframes.exceptions.JSONDtypeWarning)\n" - ] - }, - { - "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "10be79ab2b40490baecbcc5ffdd300e0", - "version_major": 2, - "version_minor": 1 - }, - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
resultgcs_pathissuerlanguagepublication_dateclass_internationalclass_usapplication_numberfiling_datepriority_date_eurepresentative_line_1_euapplicant_line_1inventor_line_1title_line_1number
0{'application_number': None, 'class_internatio...gs://gcs-public-data--labeled-patents/espacene...EUDE03.10.2018H01L 21/20<NA>18166536.516.02.2016<NA>Scheider, Sascha et alEV Group E. Thallner GmbHKurz, FlorianVORRICHTUNG ZUM BONDEN VON SUBSTRATENEP 3 382 744 A1
1{'application_number': None, 'class_internatio...gs://gcs-public-data--labeled-patents/espacene...EUDE29.08.018E04H 6/12<NA>18157874.121.02.201822.02.2017Liedtke & Partner PatentanwälteSHB Hebezeugbau GmbHVOLGER, AlexanderSTEUERUNGSSYSTEM FÜR AUTOMATISCHE PARKHÄUSEREP 3 366 869 A1
2{'application_number': None, 'class_internatio...gs://gcs-public-data--labeled-patents/espacene...EUDE03.10.2018G06F 11/30<NA>18157347.819.02.201831.03.2017Hoffmann EitleFUJITSU LIMITEDKukihara, KensukeMETHOD EXECUTED BY A COMPUTER, INFORMATION PRO...EP 3 382 553 A1
3{'application_number': None, 'class_internatio...gs://gcs-public-data--labeled-patents/espacene...EUDE03.10.2018A01K 31/00<NA>18171005.405.02.201505.02.2014Stork Bamberger PatentanwälteLinco Food Systems A/SThrane, UffeMASTHÄHNCHENCONTAINER ALS BESTANDTEIL EINER E...EP 3 381 276 A1
4{'application_number': None, 'class_internatio...gs://gcs-public-data--labeled-patents/espacene...EUDE03.10.2018H05B 6/12<NA>18165514.303.04.201830.03.2017<NA>BSH Hausger√§te GmbHAcero Acero, JesusVORRICHTUNG ZUR INDUKTIVEN ENERGIE√úBERTRAGUNGEP 3 383 141 A2
\n", - "

5 rows × 15 columns

\n", - "
[5 rows x 15 columns in total]" - ], - "text/plain": [ - " result \\\n", - "0 {'application_number': None, 'class_internatio... \n", - "1 {'application_number': None, 'class_internatio... \n", - "2 {'application_number': None, 'class_internatio... \n", - "3 {'application_number': None, 'class_internatio... \n", - "4 {'application_number': None, 'class_internatio... \n", - "\n", - " gcs_path issuer language \\\n", - "0 gs://gcs-public-data--labeled-patents/espacene... EU DE \n", - "1 gs://gcs-public-data--labeled-patents/espacene... EU DE \n", - "2 gs://gcs-public-data--labeled-patents/espacene... EU DE \n", - "3 gs://gcs-public-data--labeled-patents/espacene... EU DE \n", - "4 gs://gcs-public-data--labeled-patents/espacene... EU DE \n", - "\n", - " publication_date class_international class_us application_number \\\n", - "0 03.10.2018 H01L 21/20 18166536.5 \n", - "1 29.08.018 E04H 6/12 18157874.1 \n", - "2 03.10.2018 G06F 11/30 18157347.8 \n", - "3 03.10.2018 A01K 31/00 18171005.4 \n", - "4 03.10.2018 H05B 6/12 18165514.3 \n", - "\n", - " filing_date priority_date_eu representative_line_1_eu \\\n", - "0 16.02.2016 Scheider, Sascha et al \n", - "1 21.02.2018 22.02.2017 Liedtke & Partner Patentanw√§lte \n", - "2 19.02.2018 31.03.2017 Hoffmann Eitle \n", - "3 05.02.2015 05.02.2014 Stork Bamberger Patentanw√§lte \n", - "4 03.04.2018 30.03.2017 \n", - "\n", - " applicant_line_1 inventor_line_1 \\\n", - "0 EV Group E. Thallner GmbH Kurz, Florian \n", - "1 SHB Hebezeugbau GmbH VOLGER, Alexander \n", - "2 FUJITSU LIMITED Kukihara, Kensuke \n", - "3 Linco Food Systems A/S Thrane, Uffe \n", - "4 BSH Hausger√§te GmbH Acero Acero, Jesus \n", - "\n", - " title_line_1 number \n", - "0 VORRICHTUNG ZUM BONDEN VON SUBSTRATEN EP 3 382 744 A1 \n", - "1 STEUERUNGSSYSTEM F√úR AUTOMATISCHE PARKH√ÑUSER EP 3 366 869 A1 \n", - "2 METHOD EXECUTED BY A COMPUTER, INFORMATION PRO... EP 3 382 553 A1 \n", - "3 MASTH√ÑHNCHENCONTAINER ALS BESTANDTEIL EINER E... EP 3 381 276 A1 \n", - "4 VORRICHTUNG ZUR INDUKTIVEN ENERGIE√úBERTRAGUNG EP 3 383 141 A2 \n", - "\n", - "[5 rows x 15 columns]" - ] - }, - "execution_count": 11, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "bpd._read_gbq_colab(\"\"\"\n", - " SELECT\n", - " AI.GENERATE(\n", - " prompt=>(\\\"Extract the values.\\\", OBJ.GET_ACCESS_URL(OBJ.FETCH_METADATA(OBJ.MAKE_REF(gcs_path, \\\"us.conn\\\")), \\\"r\\\")),\n", - " connection_id=>\\\"bigframes-dev.us.bigframes-default-connection\\\",\n", - " output_schema=>\\\"publication_date string, class_international string, application_number string, filing_date string\\\") AS result,\n", - " *\n", - " FROM `bigquery-public-data.labeled_patents.extracted_data`\n", - " LIMIT 5;\n", - "\"\"\")" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "venv", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.13.0" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} + "**Key features:**\n", + "- **Rich DataFrames & Series:** Both DataFrames and Series are displayed as interactive widgets.\n", + "- **Pagination:** Navigate through large datasets page by page without overwhelming the output.\n", + "- **Column Sorting:** Click column headers to toggle between ascending, descending, and unsorted views.\n", From a20a5ee9cb3bf5de00869dac342d9809a98efb3f Mon Sep 17 00:00:00 2001 From: Shuowei Li Date: Tue, 23 Dec 2025 03:19:44 +0000 Subject: [PATCH 22/25] docs: update notebook --- notebooks/dataframes/anywidget_mode.ipynb | 963 +++++++++++++++++++++- 1 file changed, 962 insertions(+), 1 deletion(-) diff --git a/notebooks/dataframes/anywidget_mode.ipynb b/notebooks/dataframes/anywidget_mode.ipynb index 26acd57465..0508119f06 100644 --- a/notebooks/dataframes/anywidget_mode.ipynb +++ b/notebooks/dataframes/anywidget_mode.ipynb @@ -27,7 +27,7 @@ "id": "acca43ae", "metadata": {}, "source": [ - "# Demo: Interactive Analysis with Anywidget" + "# Demo to Show Anywidget mode" ] }, { @@ -51,3 +51,964 @@ "- **Rich DataFrames & Series:** Both DataFrames and Series are displayed as interactive widgets.\n", "- **Pagination:** Navigate through large datasets page by page without overwhelming the output.\n", "- **Column Sorting:** Click column headers to toggle between ascending, descending, and unsorted views.\n", + "- **Column Resizing:** Drag the dividers between column headers to adjust their width." + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "1bc5aaf3", + "metadata": {}, + "outputs": [], + "source": [ + "bpd.options.bigquery.ordering_mode = \"partial\"\n", + "bpd.options.display.repr_mode = \"anywidget\"" + ] + }, + { + "cell_type": "markdown", + "id": "0a354c69", + "metadata": {}, + "source": [ + "Load Sample Data" + ] + }, + { + "cell_type": "markdown", + "id": "interactive-df-header", + "metadata": {}, + "source": [ + "## 1. Interactive DataFrame Display\n", + "Loading a dataset from BigQuery automatically renders the interactive widget." + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "f289d250", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "✅ Completed. " + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "✅ Completed. \n", + " Query processed 0 Bytes in a moment of slot time.\n", + " " + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "state gender year name number\n", + " AL F 1910 Vera 71\n", + " AR F 1910 Viola 37\n", + " AR F 1910 Alice 57\n", + " AR F 1910 Edna 95\n", + " AR F 1910 Ollie 40\n", + " CA F 1910 Beatrice 37\n", + " CT F 1910 Marion 36\n", + " CT F 1910 Marie 36\n", + " FL F 1910 Alice 53\n", + " GA F 1910 Thelma 133\n", + "...\n", + "\n", + "[5552452 rows x 5 columns]\n" + ] + } + ], + "source": [ + "df = bpd.read_gbq(\"bigquery-public-data.usa_names.usa_1910_2013\")\n", + "print(df)" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "220340b0", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "✅ Completed. " + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "✅ Completed. " + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "7654ec5d46934fb8a60c7b91482c242a", + "version_major": 2, + "version_minor": 1 + }, + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
stategenderyearnamenumber
0ALF1910Sadie40
1ALF1910Mary875
2ARF1910Vera39
3ARF1910Marie78
4ARF1910Lucille66
5CAF1910Virginia101
6DCF1910Margaret72
7GAF1910Mildred133
8GAF1910Vera51
9GAF1910Sallie92
\n", + "

10 rows × 5 columns

\n", + "
[5552452 rows x 5 columns in total]" + ], + "text/plain": [ + "state gender year name number\n", + " AL F 1910 Sadie 40\n", + " AL F 1910 Mary 875\n", + " AR F 1910 Vera 39\n", + " AR F 1910 Marie 78\n", + " AR F 1910 Lucille 66\n", + " CA F 1910 Virginia 101\n", + " DC F 1910 Margaret 72\n", + " GA F 1910 Mildred 133\n", + " GA F 1910 Vera 51\n", + " GA F 1910 Sallie 92\n", + "...\n", + "\n", + "[5552452 rows x 5 columns]" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df" + ] + }, + { + "cell_type": "markdown", + "id": "3a73e472", + "metadata": {}, + "source": [ + "## 2. Interactive Series Display\n", + "BigQuery DataFrames `Series` objects now also support the full interactive widget experience, including pagination and formatting." + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "42bb02ab", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "✅ Completed. " + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "✅ Completed. " + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "e8a4fe22c46848fdabe75f29e381605f", + "version_major": 2, + "version_minor": 1 + }, + "text/html": [ + "
0    1910\n",
+       "1    1910\n",
+       "2    1910\n",
+       "3    1910\n",
+       "4    1910\n",
+       "5    1910\n",
+       "6    1910\n",
+       "7    1910\n",
+       "8    1910\n",
+       "9    1910
[5552452 rows]" + ], + "text/plain": [ + "1910\n", + "1910\n", + "1910\n", + "1910\n", + "1910\n", + "1910\n", + "1910\n", + "1910\n", + "1910\n", + "1910\n", + "Name: year, dtype: Int64\n", + "...\n", + "\n", + "[5552452 rows]" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "test_series = df[\"year\"]\n", + "# Displaying the series triggers the interactive widget\n", + "test_series" + ] + }, + { + "cell_type": "markdown", + "id": "7bcf1bb7", + "metadata": {}, + "source": [ + "Display with Pagination" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "da23e0f3", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "✅ Completed. " + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "✅ Completed. " + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "4876463d54fb4ad897c1f54805003b4e", + "version_major": 2, + "version_minor": 1 + }, + "text/html": [ + "
0    1910\n",
+       "1    1910\n",
+       "2    1910\n",
+       "3    1910\n",
+       "4    1910\n",
+       "5    1910\n",
+       "6    1910\n",
+       "7    1910\n",
+       "8    1910\n",
+       "9    1910
[5552452 rows]" + ], + "text/plain": [ + "1910\n", + "1910\n", + "1910\n", + "1910\n", + "1910\n", + "1910\n", + "1910\n", + "1910\n", + "1910\n", + "1910\n", + "Name: year, dtype: Int64\n", + "...\n", + "\n", + "[5552452 rows]" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "test_series" + ] + }, + { + "cell_type": "markdown", + "id": "sorting-intro", + "metadata": {}, + "source": [ + "### Sorting by Single-Column\n", + "You can sort the table by clicking on the headers of columns that have orderable data types (like numbers, strings, and dates). Non-orderable columns (like arrays or structs) do not have sorting controls.\n", + "\n", + "**Sorting indicators (▲, ▼) are always visible for sorted columns. The unsorted indicator (●) is only visible when you hover over an unsorted column header.** The sorting control cycles through three states:\n", + "- **Unsorted (no indicator by default, ● on hover):** The default state. Click the header to sort in ascending order.\n", + "- **Ascending (▲):** The data is sorted from smallest to largest. Click again to sort in descending order.\n", + "- **Descending (▼):** The data is sorted from largest to smallest. Click again to return to the unsorted state." + ] + }, + { + "cell_type": "markdown", + "id": "adjustable-width-intro", + "metadata": {}, + "source": [ + "### Adjustable Column Widths\n", + "You can easily adjust the width of any column in the table. Simply hover your mouse over the vertical dividers between column headers. When the cursor changes to a resize icon, click and drag to expand or shrink the column to your desired width. This allows for better readability and customization of your table view." + ] + }, + { + "cell_type": "markdown", + "id": "bb15bab6", + "metadata": {}, + "source": [ + "Programmatic Navigation Demo" + ] + }, + { + "cell_type": "markdown", + "id": "programmatic-header", + "metadata": {}, + "source": [ + "## 3. Programmatic Widget Control\n", + "You can also instantiate the `TableWidget` directly for more control, such as checking page counts or driving navigation programmatically." + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "6920d49b", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "✅ Completed. " + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "✅ Completed. " + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Total pages: 555246\n" + ] + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "38f719f387764fd39404fb87bf5f10cc", + "version_major": 2, + "version_minor": 1 + }, + "text/plain": [ + "" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from bigframes.display.anywidget import TableWidget\n", + "import math\n", + " \n", + "# Create widget programmatically \n", + "widget = TableWidget(df)\n", + "print(f\"Total pages: {math.ceil(widget.row_count / widget.page_size)}\")\n", + " \n", + "# Display the widget\n", + "widget" + ] + }, + { + "cell_type": "markdown", + "id": "02cbd1be", + "metadata": {}, + "source": [ + "Test Navigation Programmatically" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "12b68f15", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Current page: 0\n", + "After next: 1\n", + "After prev: 0\n" + ] + } + ], + "source": [ + "# Simulate button clicks programmatically\n", + "print(\"Current page:\", widget.page)\n", + "\n", + "# Go to next page\n", + "widget.page = 1\n", + "print(\"After next:\", widget.page)\n", + "\n", + "# Go to previous page\n", + "widget.page = 0\n", + "print(\"After prev:\", widget.page)" + ] + }, + { + "cell_type": "markdown", + "id": "9d310138", + "metadata": {}, + "source": [ + "## 4. Edge Cases\n", + "The widget handles small datasets gracefully, disabling unnecessary pagination controls." + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "a9d5d13a", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "✅ Completed. \n", + " Query processed 171.4 MB in a moment of slot time.\n", + " " + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "✅ Completed. \n", + " Query processed 0 Bytes in a moment of slot time.\n", + " " + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Small dataset pages: 1\n" + ] + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "9b521d2fdd0a4a4bb608be0fb7575ac3", + "version_major": 2, + "version_minor": 1 + }, + "text/plain": [ + "" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Test with very small dataset\n", + "small_df = df.sort_values([\"name\", \"year\", \"state\"]).head(5)\n", + "small_widget = TableWidget(small_df)\n", + "print(f\"Small dataset pages: {math.ceil(small_widget.row_count / small_widget.page_size)}\")\n", + "small_widget" + ] + }, + { + "cell_type": "markdown", + "id": "added-cell-2", + "metadata": {}, + "source": [ + "### Displaying Generative AI results containing JSON\n", + "The `AI.GENERATE` function in BigQuery returns results in a JSON column. While BigQuery's JSON type is not natively supported by the underlying Arrow `to_pandas_batches()` method used in anywidget mode ([Apache Arrow issue #45262](https://github.com/apache/arrow/issues/45262)), BigQuery Dataframes automatically converts JSON columns to strings for display. This allows you to view the results of generative AI functions seamlessly." + ] + }, + { + "cell_type": "markdown", + "id": "ai-header", + "metadata": {}, + "source": [ + "## 5. Advanced Data Types (JSON/Structs)\n", + "The `AI.GENERATE` function in BigQuery returns results in a JSON column. BigQuery Dataframes automatically handles complex types like JSON strings for display, allowing you to view generative AI results seamlessly." + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "added-cell-1", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "✅ Completed. \n", + " Query processed 85.9 kB in 18 seconds of slot time.\n", + " " + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/usr/local/google/home/shuowei/src/python-bigquery-dataframes/bigframes/dtypes.py:987: JSONDtypeWarning: JSON columns will be represented as pandas.ArrowDtype(pyarrow.json_())\n", + "instead of using `db_dtypes` in the future when available in pandas\n", + "(https://github.com/pandas-dev/pandas/issues/60958) and pyarrow.\n", + " warnings.warn(msg, bigframes.exceptions.JSONDtypeWarning)\n" + ] + }, + { + "data": { + "text/html": [ + "✅ Completed. " + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "✅ Completed. " + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/usr/local/google/home/shuowei/src/python-bigquery-dataframes/bigframes/dtypes.py:987: JSONDtypeWarning: JSON columns will be represented as pandas.ArrowDtype(pyarrow.json_())\n", + "instead of using `db_dtypes` in the future when available in pandas\n", + "(https://github.com/pandas-dev/pandas/issues/60958) and pyarrow.\n", + " warnings.warn(msg, bigframes.exceptions.JSONDtypeWarning)\n", + "/usr/local/google/home/shuowei/src/python-bigquery-dataframes/bigframes/dtypes.py:987: JSONDtypeWarning: JSON columns will be represented as pandas.ArrowDtype(pyarrow.json_())\n", + "instead of using `db_dtypes` in the future when available in pandas\n", + "(https://github.com/pandas-dev/pandas/issues/60958) and pyarrow.\n", + " warnings.warn(msg, bigframes.exceptions.JSONDtypeWarning)\n" + ] + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "6dcce05fc322464f9bb76a840a385c02", + "version_major": 2, + "version_minor": 1 + }, + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
resultgcs_pathissuerlanguagepublication_dateclass_internationalclass_usapplication_numberfiling_datepriority_date_eurepresentative_line_1_euapplicant_line_1inventor_line_1title_line_1number
0{'application_number': None, 'class_internatio...gs://gcs-public-data--labeled-patents/espacene...EUDE03.10.2018H05B 6/12<NA>18165514.303.04.201830.03.2017<NA>BSH Hausger√§te GmbHAcero Acero, JesusVORRICHTUNG ZUR INDUKTIVEN ENERGIE√úBERTRAGUNGEP 3 383 141 A2
1{'application_number': None, 'class_internatio...gs://gcs-public-data--labeled-patents/espacene...EUDE03.10.2018H01L 21/20<NA>18166536.516.02.2016<NA>Scheider, Sascha et alEV Group E. Thallner GmbHKurz, FlorianVORRICHTUNG ZUM BONDEN VON SUBSTRATENEP 3 382 744 A1
2{'application_number': None, 'class_internatio...gs://gcs-public-data--labeled-patents/espacene...EUDE03.10.2018G06F 11/30<NA>18157347.819.02.201831.03.2017Hoffmann EitleFUJITSU LIMITEDKukihara, KensukeMETHOD EXECUTED BY A COMPUTER, INFORMATION PRO...EP 3 382 553 A1
3{'application_number': None, 'class_internatio...gs://gcs-public-data--labeled-patents/espacene...EUDE03.10.2018A01K 31/00<NA>18171005.405.02.201505.02.2014Stork Bamberger PatentanwälteLinco Food Systems A/SThrane, UffeMASTHÄHNCHENCONTAINER ALS BESTANDTEIL EINER E...EP 3 381 276 A1
4{'application_number': None, 'class_internatio...gs://gcs-public-data--labeled-patents/espacene...EUDE29.08.018E04H 6/12<NA>18157874.121.02.201822.02.2017Liedtke & Partner PatentanwälteSHB Hebezeugbau GmbHVOLGER, AlexanderSTEUERUNGSSYSTEM FÜR AUTOMATISCHE PARKHÄUSEREP 3 366 869 A1
\n", + "

5 rows × 15 columns

\n", + "
[5 rows x 15 columns in total]" + ], + "text/plain": [ + " result \\\n", + "0 {'application_number': None, 'class_internatio... \n", + "1 {'application_number': None, 'class_internatio... \n", + "2 {'application_number': None, 'class_internatio... \n", + "3 {'application_number': None, 'class_internatio... \n", + "4 {'application_number': None, 'class_internatio... \n", + "\n", + " gcs_path issuer language \\\n", + "0 gs://gcs-public-data--labeled-patents/espacene... EU DE \n", + "1 gs://gcs-public-data--labeled-patents/espacene... EU DE \n", + "2 gs://gcs-public-data--labeled-patents/espacene... EU DE \n", + "3 gs://gcs-public-data--labeled-patents/espacene... EU DE \n", + "4 gs://gcs-public-data--labeled-patents/espacene... EU DE \n", + "\n", + " publication_date class_international class_us application_number \\\n", + "0 03.10.2018 H05B 6/12 18165514.3 \n", + "1 03.10.2018 H01L 21/20 18166536.5 \n", + "2 03.10.2018 G06F 11/30 18157347.8 \n", + "3 03.10.2018 A01K 31/00 18171005.4 \n", + "4 29.08.018 E04H 6/12 18157874.1 \n", + "\n", + " filing_date priority_date_eu representative_line_1_eu \\\n", + "0 03.04.2018 30.03.2017 \n", + "1 16.02.2016 Scheider, Sascha et al \n", + "2 19.02.2018 31.03.2017 Hoffmann Eitle \n", + "3 05.02.2015 05.02.2014 Stork Bamberger Patentanw√§lte \n", + "4 21.02.2018 22.02.2017 Liedtke & Partner Patentanw√§lte \n", + "\n", + " applicant_line_1 inventor_line_1 \\\n", + "0 BSH Hausger√§te GmbH Acero Acero, Jesus \n", + "1 EV Group E. Thallner GmbH Kurz, Florian \n", + "2 FUJITSU LIMITED Kukihara, Kensuke \n", + "3 Linco Food Systems A/S Thrane, Uffe \n", + "4 SHB Hebezeugbau GmbH VOLGER, Alexander \n", + "\n", + " title_line_1 number \n", + "0 VORRICHTUNG ZUR INDUKTIVEN ENERGIE√úBERTRAGUNG EP 3 383 141 A2 \n", + "1 VORRICHTUNG ZUM BONDEN VON SUBSTRATEN EP 3 382 744 A1 \n", + "2 METHOD EXECUTED BY A COMPUTER, INFORMATION PRO... EP 3 382 553 A1 \n", + "3 MASTH√ÑHNCHENCONTAINER ALS BESTANDTEIL EINER E... EP 3 381 276 A1 \n", + "4 STEUERUNGSSYSTEM F√úR AUTOMATISCHE PARKH√ÑUSER EP 3 366 869 A1 \n", + "\n", + "[5 rows x 15 columns]" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "bpd._read_gbq_colab(\"\"\"\n", + " SELECT\n", + " AI.GENERATE(\n", + " prompt=>(\\\"Extract the values.\\\", OBJ.GET_ACCESS_URL(OBJ.FETCH_METADATA(OBJ.MAKE_REF(gcs_path, \\\"us.conn\\\")), \\\"r\\\")),\n", + " connection_id=>\\\"bigframes-dev.us.bigframes-default-connection\\\",\n", + " output_schema=>\\\"publication_date string, class_international string, application_number string, filing_date string\\\") AS result,\n", + " *\n", + " FROM `bigquery-public-data.labeled_patents.extracted_data`\n", + " LIMIT 5;\n", + "\"\"\")" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "venv", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.13.0" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} From c0f4b4e26615fc0e8f7ae36f06d186a9c45d5a52 Mon Sep 17 00:00:00 2001 From: Shuowei Li Date: Tue, 23 Dec 2025 16:13:24 +0000 Subject: [PATCH 23/25] refactor: decouple plaintext representation from core objects --- bigframes/dataframe.py | 8 +- bigframes/display/html.py | 22 +++- bigframes/display/plaintext.py | 37 ++++-- bigframes/series.py | 7 +- notebooks/dataframes/anywidget_mode.ipynb | 142 +++++++++++----------- 5 files changed, 128 insertions(+), 88 deletions(-) diff --git a/bigframes/dataframe.py b/bigframes/dataframe.py index a77845235f..0456b15ac3 100644 --- a/bigframes/dataframe.py +++ b/bigframes/dataframe.py @@ -799,7 +799,13 @@ def __repr__(self) -> str: self._set_internal_query_job(query_job) from bigframes.display import plaintext - return plaintext.create_text_representation(self, pandas_df, row_count) + return plaintext.create_text_representation( + pandas_df, + row_count, + is_series=False, + has_index=self._has_index, + column_count=len(self.columns), + ) def _get_display_df_and_blob_cols(self) -> tuple[DataFrame, list[str]]: """Process blob columns for display.""" diff --git a/bigframes/display/html.py b/bigframes/display/html.py index 1a4cd99cff..3af8453c1b 100644 --- a/bigframes/display/html.py +++ b/bigframes/display/html.py @@ -219,8 +219,17 @@ def get_anywidget_bundle( total_columns, blob_cols if "blob_cols" in locals() else [], ) + is_series = isinstance(obj, Series) + if is_series: + has_index = len(obj._block.index_columns) > 0 + else: + has_index = obj._has_index widget_repr["text/plain"] = plaintext.create_text_representation( - obj, cached_pd, total_rows + cached_pd, + total_rows, + is_series=is_series, + has_index=has_index, + column_count=len(df.columns) if not is_series else 0, ) return widget_repr, widget_metadata @@ -260,8 +269,17 @@ def repr_mimebundle_head( obj, pandas_df, row_count, column_count, blob_cols ) + is_series = isinstance(obj, Series) + if is_series: + has_index = len(obj._block.index_columns) > 0 + else: + has_index = obj._has_index text_representation = plaintext.create_text_representation( - obj, pandas_df, row_count + pandas_df, + row_count, + is_series=is_series, + has_index=has_index, + column_count=len(pandas_df.columns) if not is_series else 0, ) return {"text/html": html_string, "text/plain": text_representation} diff --git a/bigframes/display/plaintext.py b/bigframes/display/plaintext.py index 4c175606f2..2f7bc1df07 100644 --- a/bigframes/display/plaintext.py +++ b/bigframes/display/plaintext.py @@ -17,7 +17,6 @@ from __future__ import annotations import typing -from typing import Union import pandas import pandas.io.formats @@ -27,24 +26,38 @@ if typing.TYPE_CHECKING: import pandas as pd - import bigframes.dataframe - import bigframes.series - def create_text_representation( - obj: Union[bigframes.dataframe.DataFrame, bigframes.series.Series], pandas_df: pd.DataFrame, total_rows: typing.Optional[int], + is_series: bool, + has_index: bool = True, + column_count: int = 0, ) -> str: - """Create a text representation of the DataFrame or Series.""" - from bigframes.series import Series - + """Create a text representation of the DataFrame or Series. + + Args: + pandas_df: + The pandas DataFrame containing the data to represent. + total_rows: + The total number of rows in the original BigFrames object. + is_series: + Whether the object being represented is a Series. + has_index: + Whether the object has an index to display. + column_count: + The total number of columns in the original BigFrames object. + Only used for DataFrames. + + Returns: + A plaintext string representation. + """ opts = options.display - if isinstance(obj, Series): + if is_series: with display_options.pandas_repr(opts): pd_series = pandas_df.iloc[:, 0] - if len(obj._block.index_columns) == 0: + if not has_index: repr_string = pd_series.to_string( length=False, index=False, name=True, dtype=True ) @@ -68,7 +81,7 @@ def create_text_representation( to_string_kwargs = ( pandas.io.formats.format.get_dataframe_repr_params() # type: ignore ) - if not obj._has_index: + if not has_index: to_string_kwargs.update({"index": False}) # We add our own dimensions string, so don't want pandas to. @@ -81,11 +94,9 @@ def create_text_representation( if is_truncated: lines.append("...") lines.append("") # Add empty line for spacing only if truncated - column_count = len(obj.columns) lines.append(f"[{total_rows or '?'} rows x {column_count} columns]") else: # For non-truncated DataFrames, we still need to add dimensions if show_dimensions was False - column_count = len(obj.columns) lines.append("") lines.append(f"[{total_rows or '?'} rows x {column_count} columns]") return "\n".join(lines) diff --git a/bigframes/series.py b/bigframes/series.py index 80020096bb..606169a8a1 100644 --- a/bigframes/series.py +++ b/bigframes/series.py @@ -600,7 +600,12 @@ def __repr__(self) -> str: self._set_internal_query_job(query_job) from bigframes.display import plaintext - return plaintext.create_text_representation(self, pandas_df, row_count) + return plaintext.create_text_representation( + pandas_df, + row_count, + is_series=True, + has_index=len(self._block.index_columns) > 0, + ) def astype( self, diff --git a/notebooks/dataframes/anywidget_mode.ipynb b/notebooks/dataframes/anywidget_mode.ipynb index 0508119f06..facefc6069 100644 --- a/notebooks/dataframes/anywidget_mode.ipynb +++ b/notebooks/dataframes/anywidget_mode.ipynb @@ -173,7 +173,7 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "7654ec5d46934fb8a60c7b91482c242a", + "model_id": "424cfa14088641518224b137b5444d58", "version_major": 2, "version_minor": 1 }, @@ -209,80 +209,80 @@ " AL\n", " F\n", " 1910\n", - " Sadie\n", - " 40\n", + " Vera\n", + " 71\n", " \n", " \n", " 1\n", - " AL\n", + " AR\n", " F\n", " 1910\n", - " Mary\n", - " 875\n", + " Viola\n", + " 37\n", " \n", " \n", " 2\n", " AR\n", " F\n", " 1910\n", - " Vera\n", - " 39\n", + " Alice\n", + " 57\n", " \n", " \n", " 3\n", " AR\n", " F\n", " 1910\n", - " Marie\n", - " 78\n", + " Edna\n", + " 95\n", " \n", " \n", " 4\n", " AR\n", " F\n", " 1910\n", - " Lucille\n", - " 66\n", + " Ollie\n", + " 40\n", " \n", " \n", " 5\n", " CA\n", " F\n", " 1910\n", - " Virginia\n", - " 101\n", + " Beatrice\n", + " 37\n", " \n", " \n", " 6\n", - " DC\n", + " CT\n", " F\n", " 1910\n", - " Margaret\n", - " 72\n", + " Marion\n", + " 36\n", " \n", " \n", " 7\n", - " GA\n", + " CT\n", " F\n", " 1910\n", - " Mildred\n", - " 133\n", + " Marie\n", + " 36\n", " \n", " \n", " 8\n", - " GA\n", + " FL\n", " F\n", " 1910\n", - " Vera\n", - " 51\n", + " Alice\n", + " 53\n", " \n", " \n", " 9\n", " GA\n", " F\n", " 1910\n", - " Sallie\n", - " 92\n", + " Thelma\n", + " 133\n", " \n", " \n", "\n", @@ -291,16 +291,16 @@ ], "text/plain": [ "state gender year name number\n", - " AL F 1910 Sadie 40\n", - " AL F 1910 Mary 875\n", - " AR F 1910 Vera 39\n", - " AR F 1910 Marie 78\n", - " AR F 1910 Lucille 66\n", - " CA F 1910 Virginia 101\n", - " DC F 1910 Margaret 72\n", - " GA F 1910 Mildred 133\n", - " GA F 1910 Vera 51\n", - " GA F 1910 Sallie 92\n", + " AL F 1910 Vera 71\n", + " AR F 1910 Viola 37\n", + " AR F 1910 Alice 57\n", + " AR F 1910 Edna 95\n", + " AR F 1910 Ollie 40\n", + " CA F 1910 Beatrice 37\n", + " CT F 1910 Marion 36\n", + " CT F 1910 Marie 36\n", + " FL F 1910 Alice 53\n", + " GA F 1910 Thelma 133\n", "...\n", "\n", "[5552452 rows x 5 columns]" @@ -357,7 +357,7 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "e8a4fe22c46848fdabe75f29e381605f", + "model_id": "3904868f71114a0c95c8c133a6c29d0b", "version_major": 2, "version_minor": 1 }, @@ -442,7 +442,7 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "4876463d54fb4ad897c1f54805003b4e", + "model_id": "0fd0bd56db2348a68d5755a045652001", "version_major": 2, "version_minor": 1 }, @@ -564,12 +564,12 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "38f719f387764fd39404fb87bf5f10cc", + "model_id": "13b063f7ea74473eb18de270c48c6417", "version_major": 2, "version_minor": 1 }, "text/plain": [ - "" + "" ] }, "execution_count": 8, @@ -679,12 +679,12 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "9b521d2fdd0a4a4bb608be0fb7575ac3", + "model_id": "0918149d2d734296afb3243f283eb2d3", "version_major": 2, "version_minor": 1 }, "text/plain": [ - "" + "" ] }, "execution_count": 10, @@ -728,7 +728,7 @@ "data": { "text/html": [ "✅ Completed. \n", - " Query processed 85.9 kB in 18 seconds of slot time.\n", + " Query processed 85.9 kB in 24 seconds of slot time.\n", " " ], "text/plain": [ @@ -789,7 +789,7 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "6dcce05fc322464f9bb76a840a385c02", + "model_id": "9543a0ef6eb744f480e49d4876c31b84", "version_major": 2, "version_minor": 1 }, @@ -836,17 +836,17 @@ " gs://gcs-public-data--labeled-patents/espacene...\n", " EU\n", " DE\n", - " 03.10.2018\n", - " H05B 6/12\n", - " <NA>\n", - " 18165514.3\n", - " 03.04.2018\n", - " 30.03.2017\n", + " 29.08.018\n", + " E04H 6/12\n", " <NA>\n", - " BSH Hausger√§te GmbH\n", - " Acero Acero, Jesus\n", - " VORRICHTUNG ZUR INDUKTIVEN ENERGIE√úBERTRAGUNG\n", - " EP 3 383 141 A2\n", + " 18157874.1\n", + " 21.02.2018\n", + " 22.02.2017\n", + " Liedtke & Partner Patentanw√§lte\n", + " SHB Hebezeugbau GmbH\n", + " VOLGER, Alexander\n", + " STEUERUNGSSYSTEM F√úR AUTOMATISCHE PARKH√ÑUSER\n", + " EP 3 366 869 A1\n", " \n", " \n", " 1\n", @@ -908,17 +908,17 @@ " gs://gcs-public-data--labeled-patents/espacene...\n", " EU\n", " DE\n", - " 29.08.018\n", - " E04H 6/12\n", + " 03.10.2018\n", + " H05B 6/12\n", " <NA>\n", - " 18157874.1\n", - " 21.02.2018\n", - " 22.02.2017\n", - " Liedtke & Partner Patentanw√§lte\n", - " SHB Hebezeugbau GmbH\n", - " VOLGER, Alexander\n", - " STEUERUNGSSYSTEM F√úR AUTOMATISCHE PARKH√ÑUSER\n", - " EP 3 366 869 A1\n", + " 18165514.3\n", + " 03.04.2018\n", + " 30.03.2017\n", + " <NA>\n", + " BSH Hausger√§te GmbH\n", + " Acero Acero, Jesus\n", + " VORRICHTUNG ZUR INDUKTIVEN ENERGIE√úBERTRAGUNG\n", + " EP 3 383 141 A2\n", " \n", " \n", "\n", @@ -941,32 +941,32 @@ "4 gs://gcs-public-data--labeled-patents/espacene... EU DE \n", "\n", " publication_date class_international class_us application_number \\\n", - "0 03.10.2018 H05B 6/12 18165514.3 \n", + "0 29.08.018 E04H 6/12 18157874.1 \n", "1 03.10.2018 H01L 21/20 18166536.5 \n", "2 03.10.2018 G06F 11/30 18157347.8 \n", "3 03.10.2018 A01K 31/00 18171005.4 \n", - "4 29.08.018 E04H 6/12 18157874.1 \n", + "4 03.10.2018 H05B 6/12 18165514.3 \n", "\n", " filing_date priority_date_eu representative_line_1_eu \\\n", - "0 03.04.2018 30.03.2017 \n", + "0 21.02.2018 22.02.2017 Liedtke & Partner Patentanw√§lte \n", "1 16.02.2016 Scheider, Sascha et al \n", "2 19.02.2018 31.03.2017 Hoffmann Eitle \n", "3 05.02.2015 05.02.2014 Stork Bamberger Patentanw√§lte \n", - "4 21.02.2018 22.02.2017 Liedtke & Partner Patentanw√§lte \n", + "4 03.04.2018 30.03.2017 \n", "\n", " applicant_line_1 inventor_line_1 \\\n", - "0 BSH Hausger√§te GmbH Acero Acero, Jesus \n", + "0 SHB Hebezeugbau GmbH VOLGER, Alexander \n", "1 EV Group E. Thallner GmbH Kurz, Florian \n", "2 FUJITSU LIMITED Kukihara, Kensuke \n", "3 Linco Food Systems A/S Thrane, Uffe \n", - "4 SHB Hebezeugbau GmbH VOLGER, Alexander \n", + "4 BSH Hausger√§te GmbH Acero Acero, Jesus \n", "\n", " title_line_1 number \n", - "0 VORRICHTUNG ZUR INDUKTIVEN ENERGIE√úBERTRAGUNG EP 3 383 141 A2 \n", + "0 STEUERUNGSSYSTEM F√úR AUTOMATISCHE PARKH√ÑUSER EP 3 366 869 A1 \n", "1 VORRICHTUNG ZUM BONDEN VON SUBSTRATEN EP 3 382 744 A1 \n", "2 METHOD EXECUTED BY A COMPUTER, INFORMATION PRO... EP 3 382 553 A1 \n", "3 MASTH√ÑHNCHENCONTAINER ALS BESTANDTEIL EINER E... EP 3 381 276 A1 \n", - "4 STEUERUNGSSYSTEM F√úR AUTOMATISCHE PARKH√ÑUSER EP 3 366 869 A1 \n", + "4 VORRICHTUNG ZUR INDUKTIVEN ENERGIE√úBERTRAGUNG EP 3 383 141 A2 \n", "\n", "[5 rows x 15 columns]" ] From 38899b7b2699c80f0b596602675a1075a5da419e Mon Sep 17 00:00:00 2001 From: Shuowei Li Date: Tue, 23 Dec 2025 16:26:50 +0000 Subject: [PATCH 24/25] refactor: consolidate object metadata extraction for display --- bigframes/display/html.py | 25 +++++++++++++++---------- 1 file changed, 15 insertions(+), 10 deletions(-) diff --git a/bigframes/display/html.py b/bigframes/display/html.py index 3af8453c1b..86d21834d7 100644 --- a/bigframes/display/html.py +++ b/bigframes/display/html.py @@ -179,6 +179,19 @@ def obj_ref_rt_to_html(obj_ref_rt) -> str: return html_string +def _get_obj_metadata( + obj: Union[bigframes.dataframe.DataFrame, bigframes.series.Series], +) -> tuple[bool, bool]: + from bigframes.series import Series + + is_series = isinstance(obj, Series) + if is_series: + has_index = len(obj._block.index_columns) > 0 + else: + has_index = obj._has_index + return is_series, has_index + + def get_anywidget_bundle( obj: Union[bigframes.dataframe.DataFrame, bigframes.series.Series], include=None, @@ -219,11 +232,7 @@ def get_anywidget_bundle( total_columns, blob_cols if "blob_cols" in locals() else [], ) - is_series = isinstance(obj, Series) - if is_series: - has_index = len(obj._block.index_columns) > 0 - else: - has_index = obj._has_index + is_series, has_index = _get_obj_metadata(obj) widget_repr["text/plain"] = plaintext.create_text_representation( cached_pd, total_rows, @@ -269,11 +278,7 @@ def repr_mimebundle_head( obj, pandas_df, row_count, column_count, blob_cols ) - is_series = isinstance(obj, Series) - if is_series: - has_index = len(obj._block.index_columns) > 0 - else: - has_index = obj._has_index + is_series, has_index = _get_obj_metadata(obj) text_representation = plaintext.create_text_representation( pandas_df, row_count, From 64230d1cbef7f6bb543c22e1bca03ff5a5611fdf Mon Sep 17 00:00:00 2001 From: Shuowei Li Date: Tue, 23 Dec 2025 19:57:32 +0000 Subject: [PATCH 25/25] fix: refactor html display to address review comments --- bigframes/display/html.py | 84 +++++++++++++++++++-------------------- 1 file changed, 40 insertions(+), 44 deletions(-) diff --git a/bigframes/display/html.py b/bigframes/display/html.py index 86d21834d7..3f1667eb9c 100644 --- a/bigframes/display/html.py +++ b/bigframes/display/html.py @@ -104,6 +104,24 @@ def render_html( return "\n".join(table_html) +def _obj_ref_rt_to_html(obj_ref_rt: str) -> str: + obj_ref_rt_json = json.loads(obj_ref_rt) + obj_ref_details = obj_ref_rt_json["objectref"]["details"] + if "gcs_metadata" in obj_ref_details: + gcs_metadata = obj_ref_details["gcs_metadata"] + content_type = typing.cast(str, gcs_metadata.get("content_type", "")) + if content_type.startswith("image"): + size_str = "" + if options.display.blob_display_width: + size_str = f' width="{options.display.blob_display_width}"' + if options.display.blob_display_height: + size_str = size_str + f' height="{options.display.blob_display_height}"' + url = obj_ref_rt_json["access_urls"]["read_url"] + return f'' + + return f'uri: {obj_ref_rt_json["objectref"]["uri"]}, authorizer: {obj_ref_rt_json["objectref"]["authorizer"]}' + + def create_html_representation( obj: Union[bigframes.dataframe.DataFrame, bigframes.series.Series], pandas_df: pd.DataFrame, @@ -114,52 +132,30 @@ def create_html_representation( """Create an HTML representation of the DataFrame or Series.""" from bigframes.series import Series - if isinstance(obj, Series): - # Some pandas objects may not have a _repr_html_ method, or it might - # fail in certain environments. We fall back to a pre-formatted - # string representation to ensure something is always displayed. - pd_series = pandas_df.iloc[:, 0] - try: - html_string = pd_series._repr_html_() - except AttributeError: - html_string = f"
{pd_series.to_string()}
" - - html_string += f"[{total_rows} rows]" - return html_string - else: - # It's a DataFrame - opts = options.display - with display_options.pandas_repr(opts): + opts = options.display + with display_options.pandas_repr(opts): + if isinstance(obj, Series): + # Some pandas objects may not have a _repr_html_ method, or it might + # fail in certain environments. We fall back to a pre-formatted + # string representation to ensure something is always displayed. + pd_series = pandas_df.iloc[:, 0] + try: + # TODO(b/464053870): Support rich display for blob Series. + html_string = pd_series._repr_html_() + except AttributeError: + html_string = f"
{pd_series.to_string()}
" + + is_truncated = total_rows is not None and total_rows > len(pandas_df) + if is_truncated: + html_string += f"

[{total_rows} rows]

" + return html_string + else: + # It's a DataFrame # TODO(shuowei, b/464053870): Escaping HTML would be useful, but # `escape=False` is needed to show images. We may need to implement # a full-fledged repr module to better support types not in pandas. if options.display.blob_display and blob_cols: - - def obj_ref_rt_to_html(obj_ref_rt) -> str: - obj_ref_rt_json = json.loads(obj_ref_rt) - obj_ref_details = obj_ref_rt_json["objectref"]["details"] - if "gcs_metadata" in obj_ref_details: - gcs_metadata = obj_ref_details["gcs_metadata"] - content_type = typing.cast( - str, gcs_metadata.get("content_type", "") - ) - if content_type.startswith("image"): - size_str = "" - if options.display.blob_display_width: - size_str = ( - f' width="{options.display.blob_display_width}"' - ) - if options.display.blob_display_height: - size_str = ( - size_str - + f' height="{options.display.blob_display_height}"' - ) - url = obj_ref_rt_json["access_urls"]["read_url"] - return f'' - - return f'uri: {obj_ref_rt_json["objectref"]["uri"]}, authorizer: {obj_ref_rt_json["objectref"]["authorizer"]}' - - formatters = {blob_col: obj_ref_rt_to_html for blob_col in blob_cols} + formatters = {blob_col: _obj_ref_rt_to_html for blob_col in blob_cols} # set max_colwidth so not to truncate the image url with pandas.option_context("display.max_colwidth", None): @@ -175,8 +171,8 @@ def obj_ref_rt_to_html(obj_ref_rt) -> str: # _repr_html_ stub is missing so mypy thinks it's a Series. Ignore mypy. html_string = pandas_df._repr_html_() # type:ignore - html_string += f"[{total_rows} rows x {total_columns} columns in total]" - return html_string + html_string += f"[{total_rows} rows x {total_columns} columns in total]" + return html_string def _get_obj_metadata(