From dd90de3cbec61daf4129e449524bcbddfb67b23f Mon Sep 17 00:00:00 2001 From: Arwa Date: Fri, 7 Mar 2025 09:50:47 -0600 Subject: [PATCH 01/10] feat: add GeoSeries.difference() --- bigframes/core/compile/scalar_op_compiler.py | 17 ++- bigframes/geopandas/geoseries.py | 9 ++ bigframes/operations/__init__.py | 2 + bigframes/operations/geo_ops.py | 5 +- bigframes/operations/type.py | 14 +++ .../bigframes_vendored/geopandas/geoseries.py | 103 ++++++++++++++++++ 6 files changed, 144 insertions(+), 6 deletions(-) diff --git a/bigframes/core/compile/scalar_op_compiler.py b/bigframes/core/compile/scalar_op_compiler.py index 35a307722f..ce0cd6c37a 100644 --- a/bigframes/core/compile/scalar_op_compiler.py +++ b/bigframes/core/compile/scalar_op_compiler.py @@ -1001,11 +1001,6 @@ def normalize_op_impl(x: ibis_types.Value): # Geo Ops -@scalar_op_compiler.register_unary_op(ops.geo_st_boundary_op, pass_op=False) -def geo_st_boundary_op_impl(x: ibis_types.Value): - return st_boundary(x) - - @scalar_op_compiler.register_unary_op(ops.geo_area_op) def geo_area_op_impl(x: ibis_types.Value): return typing.cast(ibis_types.GeoSpatialValue, x).area() @@ -1016,6 +1011,18 @@ def geo_st_astext_op_impl(x: ibis_types.Value): return typing.cast(ibis_types.GeoSpatialValue, x).as_text() +@scalar_op_compiler.register_unary_op(ops.geo_st_boundary_op, pass_op=False) +def geo_st_boundary_op_impl(x: ibis_types.Value): + return st_boundary(x) + + +@scalar_op_compiler.register_binary_op(ops.geo_st_difference_op, pass_op=False) +def geo_st_difference_op_impl(x: ibis_types.Value, y: ibis_types.Value): + return typing.cast(ibis_types.GeoSpatialValue, x).difference( + typing.cast(ibis_types.GeoSpatialValue, y) + ) + + @scalar_op_compiler.register_unary_op(ops.geo_st_geogfromtext_op) def geo_st_geogfromtext_op_impl(x: ibis_types.Value): # Ibis doesn't seem to provide a dedicated method to cast from string to geography, diff --git a/bigframes/geopandas/geoseries.py b/bigframes/geopandas/geoseries.py index 44018b8c5c..5675519152 100644 --- a/bigframes/geopandas/geoseries.py +++ b/bigframes/geopandas/geoseries.py @@ -13,6 +13,8 @@ # limitations under the License. from __future__ import annotations +from typing import Literal + import bigframes_vendored.constants as constants import bigframes_vendored.geopandas.geoseries as vendored_geoseries import geopandas.array # type: ignore @@ -93,3 +95,10 @@ def to_wkt(self: GeoSeries) -> bigframes.series.Series: series = self._apply_unary_op(ops.geo_st_astext_op) series.name = None return series + + def difference( + self: GeoSeries, other: GeoSeries, alignment: Literal["outer", "left"] = "outer" + ) -> GeoSeries: + return self._apply_binary_op( + other, ops.geo_st_difference_op, alignment=alignment + ) diff --git a/bigframes/operations/__init__.py b/bigframes/operations/__init__.py index 83cefbe6ba..2b4c9ca892 100644 --- a/bigframes/operations/__init__.py +++ b/bigframes/operations/__init__.py @@ -90,6 +90,7 @@ geo_area_op, geo_st_astext_op, geo_st_boundary_op, + geo_st_difference_op, geo_st_geogfromtext_op, geo_st_geogpoint_op, geo_x_op, @@ -366,6 +367,7 @@ # Geo ops "geo_area_op", "geo_st_boundary_op", + "geo_st_difference_op", "geo_st_astext_op", "geo_st_geogfromtext_op", "geo_st_geogpoint_op", diff --git a/bigframes/operations/geo_ops.py b/bigframes/operations/geo_ops.py index 9ef0983e24..3cf248bddb 100644 --- a/bigframes/operations/geo_ops.py +++ b/bigframes/operations/geo_ops.py @@ -37,6 +37,10 @@ ), ) +geo_st_difference_op = base_ops.create_binary_op( + name="geo_st_difference", type_signature=op_typing.BinaryGeo() +) + geo_st_geogfromtext_op = base_ops.create_unary_op( name="geo_st_geogfromtext", type_signature=op_typing.FixedOutputType( @@ -44,7 +48,6 @@ ), ) - geo_st_geogpoint_op = base_ops.create_binary_op( name="geo_st_geogpoint", type_signature=op_typing.BinaryNumericGeo() ) diff --git a/bigframes/operations/type.py b/bigframes/operations/type.py index 0a47cd91f0..b4029d74c7 100644 --- a/bigframes/operations/type.py +++ b/bigframes/operations/type.py @@ -122,6 +122,20 @@ def output_type( @dataclasses.dataclass +@dataclasses.dataclass +class BinaryGeo(BinaryTypeSignature): + """Type signature for geo functions like difference that can map geo to geo.""" + + def output_type( + self, left_type: ExpressionType, right_type: ExpressionType + ) -> ExpressionType: + if (left_type is not None) and not bigframes.dtypes.is_geo_like(left_type): + raise TypeError(f"Type {left_type} is not geo") + if (right_type is not None) and not bigframes.dtypes.is_geo_like(right_type): + raise TypeError(f"Type {right_type} is not numeric") + return bigframes.dtypes.GEO_DTYPE + + class BinaryNumericGeo(BinaryTypeSignature): """Type signature for geo functions like from_xy that can map ints to ints.""" diff --git a/third_party/bigframes_vendored/geopandas/geoseries.py b/third_party/bigframes_vendored/geopandas/geoseries.py index a2e7b74059..68834e8897 100644 --- a/third_party/bigframes_vendored/geopandas/geoseries.py +++ b/third_party/bigframes_vendored/geopandas/geoseries.py @@ -239,3 +239,106 @@ def to_wkt(self) -> bigframes.series.Series: WKT representations of the geometries. """ raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE) + + def difference(self) -> bigframes.series.Series: + """ + Returns a GeoSeries of the points in each aligned geometry that are not + in other. + + The operation works on a 1-to-1 row-wise manner + + + **Examples:** + + >>> import bigframes as bpd + >>> import bigframes.geopandas + >>> from shapely.geometry import Polygon, LineString, Point + >>> bpd.options.display.progress_bar = None + >>> s = geopandas.GeoSeries( + ... [ + ... Polygon([(0, 0), (2, 2), (0, 2)]), + ... Polygon([(0, 0), (2, 2), (0, 2)]), + ... LineString([(0, 0), (2, 2)]), + ... LineString([(2, 0), (0, 2)]), + ... Point(0, 1), + ... ], + ... ) + >>> s2 = geopandas.GeoSeries( + ... [ + ... Polygon([(0, 0), (1, 1), (0, 1)]), + ... LineString([(1, 0), (1, 3)]), + ... LineString([(2, 0), (0, 2)]), + ... Point(1, 1), + ... Point(0, 1), + ... ], + ... index=range(1, 6), + ... ) + + >>> s + 0 POLYGON ((0 0, 2 2, 0 2, 0 0)) + 1 POLYGON ((0 0, 2 2, 0 2, 0 0)) + 2 LINESTRING (0 0, 2 2) + 3 LINESTRING (2 0, 0 2) + 4 POINT (0 1) + dtype: geometry + + >>> s2 + 1 POLYGON ((0 0, 1 1, 0 1, 0 0)) + 2 LINESTRING (1 0, 1 3) + 3 LINESTRING (2 0, 0 2) + 4 POINT (1 1) + 5 POINT (0 1) + dtype: geometry + + + We can do difference of each geometry and a single shapely geometry: + + >>> s.difference(Polygon([(0, 0), (1, 1), (0, 1)])) + 0 POLYGON ((0.99954 1, 2 2, 0 2, 0 1, 0.99954 1)) + 1 None + 2 None + 3 None + 4 None + dtype: geometry + + We can also check two GeoSeries against each other, row by row. The + GeoSeries above have different indices. We can either align both GeoSeries + based on index values and compare elements with the same index using + align=True or ignore index and compare elements based on their matching + order using align=False: + + >>> s.difference(s2) + 0 None + 1 POLYGON ((0.99954 1, 2 2, 0 2, 0 1, 0.99954 1)) + 2 LINESTRING (0 0, 1 1.00046, 2 2) + 3 GEOMETRYCOLLECTION EMPTY + 4 POINT (0 1) + 5 None + dtype: geometry + + 0 POLYGON ((0.99954 1, 2 2, 0 2, 0 1, 0.99954 1)) + 1 POLYGON ((0 0, 1 1.00046, 2 2, 1 2.0003, 0 2, ... + 2 LINESTRING (0 0, 1 1.00046, 2 2) + 3 LINESTRING (2 0, 0 2) + 4 GEOMETRYCOLLECTION EMPTY + dtype: geometry + + + >>> s.difference(s2, align=True) + + >>> s.difference(s2, align=False) + + + Args: + other (Geoseries or geometric object): + The Geoseries (elementwise) or geometric object to find the difference to. + + align (bool or None (default None)): + The index for the GeoSeries. + + Returns: + bigframes.geopandas.GeoSeries: + A GeoSeries of the points in each aligned geometry that are not + in other. + """ + raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE) From 85ddc0ea69529cfa3c04933782d0d0da5ec42a77 Mon Sep 17 00:00:00 2001 From: Arwa Date: Tue, 11 Mar 2025 16:21:19 -0500 Subject: [PATCH 02/10] add st_difference method and test cases for geo.difference and st_difference --- bigframes/bigquery/__init__.py | 3 +- bigframes/bigquery/_operations/geo.py | 125 ++++++++ bigframes/geopandas/geoseries.py | 31 +- debugcrs.ipynb | 272 ++++++++++++++++++ tests/system/small/bigquery/test_geo.py | 23 ++ .../system/small/geopandas/test_geoseries.py | 24 ++ .../bigframes_vendored/geopandas/geoseries.py | 2 - 7 files changed, 469 insertions(+), 11 deletions(-) create mode 100644 debugcrs.ipynb diff --git a/bigframes/bigquery/__init__.py b/bigframes/bigquery/__init__.py index 56aee38bfe..a35e898c2d 100644 --- a/bigframes/bigquery/__init__.py +++ b/bigframes/bigquery/__init__.py @@ -27,7 +27,7 @@ unix_millis, unix_seconds, ) -from bigframes.bigquery._operations.geo import st_area +from bigframes.bigquery._operations.geo import st_area, st_difference from bigframes.bigquery._operations.json import ( json_extract, json_extract_array, @@ -48,6 +48,7 @@ "array_to_string", # geo ops "st_area", + "st_difference", # json ops "json_set", "json_extract", diff --git a/bigframes/bigquery/_operations/geo.py b/bigframes/bigquery/_operations/geo.py index 7b8e47e2da..7d66bd69d5 100644 --- a/bigframes/bigquery/_operations/geo.py +++ b/bigframes/bigquery/_operations/geo.py @@ -15,6 +15,7 @@ from __future__ import annotations from bigframes import operations as ops +import bigframes.dtypes import bigframes.geopandas import bigframes.series @@ -91,3 +92,127 @@ def st_area(series: bigframes.series.Series) -> bigframes.series.Series: series = series._apply_unary_op(ops.geo_area_op) series.name = None return series + + +def st_difference( + series: bigframes.series.Series, other: bigframes.series.Series +) -> bigframes.series.Series: + """ + Returns a GEOGRAPHY that represents the point set difference of + `geography_1` and `geography_2`. Therefore, the result consists of the part + of `geography_1` that doesn't intersect with `geography_2`. + + If `geometry_1` is completely contained in `geometry_2`, then ST_DIFFERENCE + returns an empty GEOGRAPHY. + + ..note:: + BigQuery's Geography functions, like `st_difference`, interpret the geometry + data type as a point set on the Earth's surface. A point set is a set + of points, lines, and polygons on the WGS84 reference spheroid, with + geodesic edges. See: https://cloud.google.com/bigquery/docs/geospatial-data + + **Examples:** + + >>> import bigframes as bpd + >>> import bigframes.bigquery as bbq + >>> import bigframes.geopandas + >>> from shapely.geometry import Polygon, LineString, Point + >>> bpd.options.display.progress_bar = None + + We can check two GeoSeries against each other, row by row. + + >>> s1 = bigframes.geopandas.GeoSeries( + ... [ + ... Polygon([(0, 0), (2, 2), (0, 2)]), + ... Polygon([(0, 0), (2, 2), (0, 2)]), + ... LineString([(0, 0), (2, 2)]), + ... LineString([(2, 0), (0, 2)]), + ... Point(0, 1), + ... ], + ... ) + >>> s2 = bigframes.geopandas.GeoSeries( + ... [ + ... Polygon([(0, 0), (1, 1), (0, 1)]), + ... LineString([(1, 0), (1, 3)]), + ... LineString([(2, 0), (0, 2)]), + ... Point(1, 1), + ... Point(0, 1), + ... ], + ... index=range(1, 6), + ... ) + + >>> s1 + 0 POLYGON ((0 0, 2 2, 0 2, 0 0)) + 1 POLYGON ((0 0, 2 2, 0 2, 0 0)) + 2 LINESTRING (0 0, 2 2) + 3 LINESTRING (2 0, 0 2) + 4 POINT (0 1) + dtype: geometry + + >>> s2 + 1 POLYGON ((0 0, 1 1, 0 1, 0 0)) + 2 LINESTRING (1 0, 1 3) + 3 LINESTRING (2 0, 0 2) + 4 POINT (1 1) + 5 POINT (0 1) + dtype: geometry + + >>> bbq.st_difference(s1, s2) + 0 None + 1 POLYGON ((0.99954 1, 2 2, 0 2, 0 1, 0.99954 1)) + 2 LINESTRING (0 0, 1 1.00046, 2 2) + 3 GEOMETRYCOLLECTION EMPTY + 4 POINT (0 1) + 5 None + dtype: geometry + + + We can also check difference of single shapely geometries: + + >>> sbq1 = bigframes.geopandas.GeoSeries( + ... [ + ... Polygon([(0, 0), (10, 0), (10, 10), (0, 0)]) + ... ] + ... ) + >>> sbq1 + 0 POLYGON ((0 0, 10 0, 10 10, 0 0)) + dtype: geometry + + >>> sbq2 = bigframes.geopandas.GeoSeries( + ... [ + ... Polygon([(4, 2), (6, 2), (8, 6), (4, 2)]) + ... ] + ... ) + >>> sbq2 + 0 POLYGON ((4 2, 6 2, 8 6, 4 2)) + dtype: geometry + + >>> bbq.st_difference(sbq1, sbq2) + 0 POLYGON ((0 0, 10 0, 10 10, 0 0), (8 6, 6 2, 4... + dtype: geometry + + Additionally, qe can do difference of a GeoSeries against a single shapely geometry: + + >>> bbq.st_difference(s, sbq2) + 0 POLYGON ((0 0, 2 2, 0 2, 0 0)) + 1 None + 2 None + 3 None + 4 None + dtype: geometry + + Args: + other (bigframes.series.Series or geometric object): + The Geoseries (elementwise) or geometric object to find the difference to. + + Returns: + bigframes.series.Series: + A GeoSeries of the points in each aligned geometry that are not + in other. + """ + series.name = None + + if isinstance(other, bigframes.series.Series): + other.name = None + + return series._apply_binary_op(other, ops.geo_st_difference_op) diff --git a/bigframes/geopandas/geoseries.py b/bigframes/geopandas/geoseries.py index 5675519152..9c5f3d1f4b 100644 --- a/bigframes/geopandas/geoseries.py +++ b/bigframes/geopandas/geoseries.py @@ -13,8 +13,6 @@ # limitations under the License. from __future__ import annotations -from typing import Literal - import bigframes_vendored.constants as constants import bigframes_vendored.geopandas.geoseries as vendored_geoseries import geopandas.array # type: ignore @@ -64,7 +62,7 @@ def area(self, crs=None) -> bigframes.series.Series: # type: ignore Raises: NotImplementedError: - GeoSeries.area is not supported. Use bigframes.bigquery.st_area(series), insetead. + GeoSeries.area is not supported. Use bigframes.bigquery.st_area(series), instead. """ raise NotImplementedError( f"GeoSeries.area is not supported. Use bigframes.bigquery.st_area(series), instead. {constants.FEEDBACK_LINK}" @@ -96,9 +94,26 @@ def to_wkt(self: GeoSeries) -> bigframes.series.Series: series.name = None return series - def difference( - self: GeoSeries, other: GeoSeries, alignment: Literal["outer", "left"] = "outer" - ) -> GeoSeries: - return self._apply_binary_op( - other, ops.geo_st_difference_op, alignment=alignment + def difference(self: GeoSeries, other: GeoSeries) -> GeoSeries: # type: ignore + """ + Returns a GeoSeries of the points in each aligned geometry that are not + in other. + + The operation works on a 1-to-1 row-wise manner + + Args: + other (Geoseries or geometric object): + The Geoseries (elementwise) or geometric object to find the difference to. + + Returns: + bigframes.geopandas.GeoSeries: + A GeoSeries of the points in each aligned geometry that are not + in other. + + Raises: + NotImplementedError: + GeoSeries.difference is not supported. Use bigframes.bigquery.st_difference(series), instead. + """ + raise NotImplementedError( + f"GeoSeries.difference() is not supported. Use bigframes.bigquery.st_difference(series), instead. {constants.FEEDBACK_LINK}" ) diff --git a/debugcrs.ipynb b/debugcrs.ipynb new file mode 100644 index 0000000000..eb2d300752 --- /dev/null +++ b/debugcrs.ipynb @@ -0,0 +1,272 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import bigframes\n", + "import bigframes.geopandas\n", + "import bigframes.pandas as bpd\n", + "import bigframes.bigquery as bbq\n", + "\n", + "import geopandas\n", + "from shapely.geometry import Polygon, LineString, Point\n", + "\n", + "bpd.options.display.progress_bar = None" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# GeoPandas Example" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "s = geopandas.GeoSeries(\n", + " [\n", + " Polygon([(0.000, 0.0), (0.001, 0.001), (0.000, 0.001)]),\n", + " Polygon([(0.0010, 0.004), (0.009, 0.005), (0.0010, 0.005)]),\n", + " Polygon([(0.001, 0.001), (0.002, 0.001), (0.002, 0.002)]),\n", + " LineString([(0, 0), (1, 1), (0, 1)]),\n", + " Point(0, 1),\n", + " ],\n", + " # crs=\"EPSG:4326\"\n", + ")\n", + "# .to_crs(26393)\n", + "s2 = geopandas.GeoSeries(\n", + " [\n", + " Polygon([(0, 0), (10, 0), (10, 10), (0, 0)]),\n", + " Polygon([(0, 0), (1, 1), (0, 1)]),\n", + " LineString([(1, 0), (1, 3)]),\n", + " LineString([(2, 0), (0, 2)]),\n", + " Point(1, 1),\n", + " ],\n", + " # crs=\"EPSG:4326\"\n", + ")\n", + "# .to_crs(26393)\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0 POLYGON ((0 0, 0.001 0.001, 0 0.001, 0 0))\n", + "1 POLYGON ((0.001 0.004, 0.009 0.005, 0.001 0.00...\n", + "2 POLYGON ((0.001 0.001, 0.002 0.001, 0.002 0.00...\n", + "3 LINESTRING (0 0, 1 1, 0 1)\n", + "4 POINT (0 1)\n", + "dtype: geometry" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "s" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0 POLYGON ((0 0, 10 0, 10 10, 0 0))\n", + "1 POLYGON ((0 0, 1 1, 0 1, 0 0))\n", + "2 LINESTRING (1 0, 1 3)\n", + "3 LINESTRING (2 0, 0 2)\n", + "4 POINT (1 1)\n", + "dtype: geometry" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "s2" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0 POLYGON ((0 0.001, 0.001 0.001, 0 0, 0 0.001))\n", + "1 POLYGON ((0.001 0.005, 0.009 0.005, 0.001 0.00...\n", + "2 POLYGON ((0.002 0.002, 0.002 0.001, 0.001 0.00...\n", + "3 MULTILINESTRING ((0 0, 1 1), (1 1, 0 1))\n", + "4 POINT (0 1)\n", + "dtype: geometry" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "s.difference(LineString([(2, 0), (0, 2)]),)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# GeoSeries Example" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/usr/local/google/home/arwas/src1/python-bigquery-dataframes/bigframes/core/global_session.py:114: DefaultLocationWarning: No explicit location is set, so using location US for the session.\n", + " return func(get_global_session(), *args, **kwargs)\n" + ] + } + ], + "source": [ + "sb = bigframes.geopandas.GeoSeries(\n", + " [\n", + " Polygon([(0.000, 0.0), (0.001, 0.001), (0.000, 0.001)]),\n", + " Polygon([(0.0010, 0.004), (0.009, 0.005), (0.0010, 0.005)]),\n", + " Polygon([(0.001, 0.001), (0.002, 0.001), (0.002, 0.002)]),\n", + " LineString([(0, 0), (1, 1), (0, 1)]),\n", + " Point(0, 1),\n", + " ],\n", + ")\n", + "sb2 = bigframes.geopandas.GeoSeries(\n", + " [\n", + " Polygon([(0, 0), (10, 0), (10, 10), (0, 0)]),\n", + " Polygon([(0, 0), (1, 1), (0, 1)]),\n", + " LineString([(1, 0), (1, 3)]),\n", + " LineString([(2, 0), (0, 2)]),\n", + " Point(1, 1),\n", + " ],\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0 POLYGON ((0 0, 0.001 0.001, 0 0.001, 0 0))\n", + "1 POLYGON ((0.001 0.004, 0.009 0.005, 0.001 0.00...\n", + "2 POLYGON ((0.001 0.001, 0.002 0.001, 0.002 0.00...\n", + "3 LINESTRING (0 0, 1 1, 0 1)\n", + "4 POINT (0 1)\n", + "dtype: geometry" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "sb" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0 POLYGON ((0 0, 10 0, 10 10, 0 0))\n", + "1 POLYGON ((0 0, 1 1, 0 1, 0 0))\n", + "2 LINESTRING (1 0, 1 3)\n", + "3 LINESTRING (2 0, 0 2)\n", + "4 POINT (1 1)\n", + "dtype: geometry" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "sb2" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0 POLYGON ((0.00098 0.001, 0 0.001, 0 0, 0.00098...\n", + "1 POLYGON ((0.00443 0.00443, 0.009 0.005, 0.005 ...\n", + "2 POLYGON ((0.001 0.001, 0.002 0.001, 0.002 0.00...\n", + "3 LINESTRING (0 0, 1 1, 0 1)\n", + "4 POINT (0 1)\n", + "dtype: geometry" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "bbq.st_difference(sb, sb2)\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "venv", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.19" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/tests/system/small/bigquery/test_geo.py b/tests/system/small/bigquery/test_geo.py index 7d38cd7d91..0d0bba302b 100644 --- a/tests/system/small/bigquery/test_geo.py +++ b/tests/system/small/bigquery/test_geo.py @@ -51,3 +51,26 @@ def test_geo_st_area(): check_exact=False, rtol=1, ) + + +def test_geo_st_difference(): + data1 = [Polygon([(0, 0), (10, 0), (10, 10), (0, 0)])] + + data2 = [Polygon([(4, 2), (6, 2), (8, 6), (4, 2)])] + + geopd_s1 = geopandas.GeoSeries(data=data1) + geopd_s2 = geopandas.GeoSeries(data=data2) + geobf_s1 = bigframes.geopandas.GeoSeries(data=data1) + geobf_s2 = bigframes.geopandas.GeoSeries(data=data2) + + geopd_s_result = geopd_s1.difference(geopd_s2).round(-3) + geobf_s_result = bbq.st_difference(geobf_s1, geobf_s2).to_pandas().round(-3) + + pd.testing.assert_series_equal( + geopd_s_result, + geobf_s_result, + check_series_type=False, + check_dtype=False, + check_index_type=False, + check_exact=False, + ) diff --git a/tests/system/small/geopandas/test_geoseries.py b/tests/system/small/geopandas/test_geoseries.py index d0987dbdaf..181e0b58cf 100644 --- a/tests/system/small/geopandas/test_geoseries.py +++ b/tests/system/small/geopandas/test_geoseries.py @@ -194,3 +194,27 @@ def test_geo_boundary(): check_series_type=False, check_index=False, ) + + +def test_geo_difference_not_supported(): + s1 = bigframes.geopandas.GeoSeries( + [ + Polygon([(0.000, 0.0), (0.001, 0.001), (0.000, 0.001)]), + Polygon([(0.0010, 0.004), (0.009, 0.005), (0.0010, 0.005)]), + Polygon([(0.001, 0.001), (0.002, 0.001), (0.002, 0.002)]), + LineString([(0, 0), (1, 1), (0, 1)]), + Point(0, 1), + ] + ) + + s2 = bigframes.geopandas.GeoSeries([Polygon([(0, 0), (10, 0), (10, 10), (0, 0)])]) + + bf_series1: bigframes.geopandas.GeoSeries = s1.geo + bf_series2: bigframes.geopandas.GeoSeries = s2.geo + with pytest.raises( + NotImplementedError, + match=re.escape( + f"GeoSeries.difference() is not supported. Use bigframes.bigquery.st_difference(series), instead. {constants.FEEDBACK_LINK}" + ), + ): + bf_series1.difference(bf_series2) diff --git a/third_party/bigframes_vendored/geopandas/geoseries.py b/third_party/bigframes_vendored/geopandas/geoseries.py index 68834e8897..71c218cc14 100644 --- a/third_party/bigframes_vendored/geopandas/geoseries.py +++ b/third_party/bigframes_vendored/geopandas/geoseries.py @@ -247,7 +247,6 @@ def difference(self) -> bigframes.series.Series: The operation works on a 1-to-1 row-wise manner - **Examples:** >>> import bigframes as bpd @@ -290,7 +289,6 @@ def difference(self) -> bigframes.series.Series: 5 POINT (0 1) dtype: geometry - We can do difference of each geometry and a single shapely geometry: >>> s.difference(Polygon([(0, 0), (1, 1), (0, 1)])) From 51a234feb36220ce20da29f4048a87f884312cec Mon Sep 17 00:00:00 2001 From: Arwa Date: Wed, 12 Mar 2025 13:33:24 -0500 Subject: [PATCH 03/10] update method and tests --- bigframes/bigquery/_operations/geo.py | 16 +- debugcrs.ipynb | 272 --------------- notebooks/geo/geoseries.ipynb | 322 +++++++++++++----- tests/system/small/bigquery/test_geo.py | 47 ++- .../bigframes_vendored/geopandas/geoseries.py | 101 ------ 5 files changed, 271 insertions(+), 487 deletions(-) delete mode 100644 debugcrs.ipynb diff --git a/bigframes/bigquery/_operations/geo.py b/bigframes/bigquery/_operations/geo.py index 7d66bd69d5..ef86f66ff3 100644 --- a/bigframes/bigquery/_operations/geo.py +++ b/bigframes/bigquery/_operations/geo.py @@ -174,15 +174,16 @@ def st_difference( ... Polygon([(0, 0), (10, 0), (10, 10), (0, 0)]) ... ] ... ) - >>> sbq1 - 0 POLYGON ((0 0, 10 0, 10 10, 0 0)) - dtype: geometry - >>> sbq2 = bigframes.geopandas.GeoSeries( ... [ ... Polygon([(4, 2), (6, 2), (8, 6), (4, 2)]) ... ] ... ) + + >>> sbq1 + 0 POLYGON ((0 0, 10 0, 10 10, 0 0)) + dtype: geometry + >>> sbq2 0 POLYGON ((4 2, 6 2, 8 6, 4 2)) dtype: geometry @@ -193,7 +194,7 @@ def st_difference( Additionally, qe can do difference of a GeoSeries against a single shapely geometry: - >>> bbq.st_difference(s, sbq2) + >>> bbq.st_difference(s1, sbq2) 0 POLYGON ((0 0, 2 2, 0 2, 0 0)) 1 None 2 None @@ -210,9 +211,4 @@ def st_difference( A GeoSeries of the points in each aligned geometry that are not in other. """ - series.name = None - - if isinstance(other, bigframes.series.Series): - other.name = None - return series._apply_binary_op(other, ops.geo_st_difference_op) diff --git a/debugcrs.ipynb b/debugcrs.ipynb deleted file mode 100644 index eb2d300752..0000000000 --- a/debugcrs.ipynb +++ /dev/null @@ -1,272 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 1, - "metadata": {}, - "outputs": [], - "source": [ - "import bigframes\n", - "import bigframes.geopandas\n", - "import bigframes.pandas as bpd\n", - "import bigframes.bigquery as bbq\n", - "\n", - "import geopandas\n", - "from shapely.geometry import Polygon, LineString, Point\n", - "\n", - "bpd.options.display.progress_bar = None" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# GeoPandas Example" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": {}, - "outputs": [], - "source": [ - "s = geopandas.GeoSeries(\n", - " [\n", - " Polygon([(0.000, 0.0), (0.001, 0.001), (0.000, 0.001)]),\n", - " Polygon([(0.0010, 0.004), (0.009, 0.005), (0.0010, 0.005)]),\n", - " Polygon([(0.001, 0.001), (0.002, 0.001), (0.002, 0.002)]),\n", - " LineString([(0, 0), (1, 1), (0, 1)]),\n", - " Point(0, 1),\n", - " ],\n", - " # crs=\"EPSG:4326\"\n", - ")\n", - "# .to_crs(26393)\n", - "s2 = geopandas.GeoSeries(\n", - " [\n", - " Polygon([(0, 0), (10, 0), (10, 10), (0, 0)]),\n", - " Polygon([(0, 0), (1, 1), (0, 1)]),\n", - " LineString([(1, 0), (1, 3)]),\n", - " LineString([(2, 0), (0, 2)]),\n", - " Point(1, 1),\n", - " ],\n", - " # crs=\"EPSG:4326\"\n", - ")\n", - "# .to_crs(26393)\n", - "\n" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "0 POLYGON ((0 0, 0.001 0.001, 0 0.001, 0 0))\n", - "1 POLYGON ((0.001 0.004, 0.009 0.005, 0.001 0.00...\n", - "2 POLYGON ((0.001 0.001, 0.002 0.001, 0.002 0.00...\n", - "3 LINESTRING (0 0, 1 1, 0 1)\n", - "4 POINT (0 1)\n", - "dtype: geometry" - ] - }, - "execution_count": 3, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "s" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "0 POLYGON ((0 0, 10 0, 10 10, 0 0))\n", - "1 POLYGON ((0 0, 1 1, 0 1, 0 0))\n", - "2 LINESTRING (1 0, 1 3)\n", - "3 LINESTRING (2 0, 0 2)\n", - "4 POINT (1 1)\n", - "dtype: geometry" - ] - }, - "execution_count": 4, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "s2" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "0 POLYGON ((0 0.001, 0.001 0.001, 0 0, 0 0.001))\n", - "1 POLYGON ((0.001 0.005, 0.009 0.005, 0.001 0.00...\n", - "2 POLYGON ((0.002 0.002, 0.002 0.001, 0.001 0.00...\n", - "3 MULTILINESTRING ((0 0, 1 1), (1 1, 0 1))\n", - "4 POINT (0 1)\n", - "dtype: geometry" - ] - }, - "execution_count": 5, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "s.difference(LineString([(2, 0), (0, 2)]),)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# GeoSeries Example" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/usr/local/google/home/arwas/src1/python-bigquery-dataframes/bigframes/core/global_session.py:114: DefaultLocationWarning: No explicit location is set, so using location US for the session.\n", - " return func(get_global_session(), *args, **kwargs)\n" - ] - } - ], - "source": [ - "sb = bigframes.geopandas.GeoSeries(\n", - " [\n", - " Polygon([(0.000, 0.0), (0.001, 0.001), (0.000, 0.001)]),\n", - " Polygon([(0.0010, 0.004), (0.009, 0.005), (0.0010, 0.005)]),\n", - " Polygon([(0.001, 0.001), (0.002, 0.001), (0.002, 0.002)]),\n", - " LineString([(0, 0), (1, 1), (0, 1)]),\n", - " Point(0, 1),\n", - " ],\n", - ")\n", - "sb2 = bigframes.geopandas.GeoSeries(\n", - " [\n", - " Polygon([(0, 0), (10, 0), (10, 10), (0, 0)]),\n", - " Polygon([(0, 0), (1, 1), (0, 1)]),\n", - " LineString([(1, 0), (1, 3)]),\n", - " LineString([(2, 0), (0, 2)]),\n", - " Point(1, 1),\n", - " ],\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "0 POLYGON ((0 0, 0.001 0.001, 0 0.001, 0 0))\n", - "1 POLYGON ((0.001 0.004, 0.009 0.005, 0.001 0.00...\n", - "2 POLYGON ((0.001 0.001, 0.002 0.001, 0.002 0.00...\n", - "3 LINESTRING (0 0, 1 1, 0 1)\n", - "4 POINT (0 1)\n", - "dtype: geometry" - ] - }, - "execution_count": 7, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "sb" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "0 POLYGON ((0 0, 10 0, 10 10, 0 0))\n", - "1 POLYGON ((0 0, 1 1, 0 1, 0 0))\n", - "2 LINESTRING (1 0, 1 3)\n", - "3 LINESTRING (2 0, 0 2)\n", - "4 POINT (1 1)\n", - "dtype: geometry" - ] - }, - "execution_count": 8, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "sb2" - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "0 POLYGON ((0.00098 0.001, 0 0.001, 0 0, 0.00098...\n", - "1 POLYGON ((0.00443 0.00443, 0.009 0.005, 0.005 ...\n", - "2 POLYGON ((0.001 0.001, 0.002 0.001, 0.002 0.00...\n", - "3 LINESTRING (0 0, 1 1, 0 1)\n", - "4 POINT (0 1)\n", - "dtype: geometry" - ] - }, - "execution_count": 9, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "bbq.st_difference(sb, sb2)\n" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "venv", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.9.19" - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} diff --git a/notebooks/geo/geoseries.ipynb b/notebooks/geo/geoseries.ipynb index 7060128bf6..6f411e44a5 100644 --- a/notebooks/geo/geoseries.ipynb +++ b/notebooks/geo/geoseries.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "code", - "execution_count": 6, + "execution_count": 1, "metadata": {}, "outputs": [], "source": [ @@ -30,7 +30,7 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 2, "metadata": {}, "outputs": [], "source": [ @@ -49,14 +49,17 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 3, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ - "/usr/local/google/home/arwas/src1/python-bigquery-dataframes/bigframes/session/_io/bigquery/read_gbq_table.py:280: DefaultIndexWarning: Table 'bigquery-public-data.geo_us_boundaries.counties' is clustered and/or partitioned, but BigQuery DataFrames was not able to find a suitable index. To avoid this warning, set at least one of: `index_col` or `filters`.\n", + "/usr/local/google/home/arwas/src1/python-bigquery-dataframes/bigframes/session/_io/bigquery/read_gbq_table.py:280: DefaultIndexWarning: \u001b[93mTable 'bigquery-public-data.geo_us_boundaries.counties' is clustered\n", + "and/or partitioned, but BigQuery DataFrames was not able to find a\n", + "suitable index. To avoid this warning, set at least one of:\n", + "`index_col` or `filters`.\u001b[0m\n", " warnings.warn(msg, category=bfe.DefaultIndexWarning)\n" ] } @@ -74,7 +77,7 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 4, "metadata": {}, "outputs": [], "source": [ @@ -97,21 +100,21 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 5, "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "137 POINT (-86.87338 38.37334)\n", - "164 POINT (-118.48037 46.25461)\n", - "333 POINT (-92.5617 32.30429)\n", - "703 POINT (-83.46189 39.55525)\n", - "846 POINT (-119.46779 47.21363)\n", + "406 POINT (-84.86717 33.92103)\n", + "926 POINT (-82.47974 35.33641)\n", + "37 POINT (-91.19496 39.98605)\n", + "940 POINT (-75.50298 39.09709)\n", + "996 POINT (-92.56434 39.8298)\n", "Name: int_point_geom, dtype: geometry" ] }, - "execution_count": 10, + "execution_count": 5, "metadata": {}, "output_type": "execute_result" } @@ -130,21 +133,21 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 6, "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "0 POINT (-86.87338 38.37334)\n", - "1 POINT (-118.48037 46.25461)\n", - "2 POINT (-92.5617 32.30429)\n", - "3 POINT (-83.46189 39.55525)\n", - "4 POINT (-119.46779 47.21363)\n", + "0 POINT (-84.86717 33.92103)\n", + "1 POINT (-82.47974 35.33641)\n", + "2 POINT (-91.19496 39.98605)\n", + "3 POINT (-75.50298 39.09709)\n", + "4 POINT (-92.56434 39.8298)\n", "dtype: geometry" ] }, - "execution_count": 11, + "execution_count": 6, "metadata": {}, "output_type": "execute_result" } @@ -179,21 +182,21 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": 7, "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "0 -86.873385\n", - "1 -118.48037\n", - "2 -92.5617\n", - "3 -83.461893\n", - "4 -119.467788\n", + "0 -84.867169\n", + "1 -82.479741\n", + "2 -91.194961\n", + "3 -75.502982\n", + "4 -92.56434\n", "dtype: Float64" ] }, - "execution_count": 12, + "execution_count": 7, "metadata": {}, "output_type": "execute_result" } @@ -211,21 +214,21 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": 8, "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "0 38.373344\n", - "1 46.254606\n", - "2 32.30429\n", - "3 39.555246\n", - "4 47.213633\n", + "0 33.92103\n", + "1 35.336415\n", + "2 39.986053\n", + "3 39.097088\n", + "4 39.829795\n", "dtype: Float64" ] }, - "execution_count": 13, + "execution_count": 8, "metadata": {}, "output_type": "execute_result" } @@ -250,7 +253,7 @@ }, { "cell_type": "code", - "execution_count": 14, + "execution_count": 9, "metadata": {}, "outputs": [ { @@ -284,7 +287,7 @@ "dtype: Float64" ] }, - "execution_count": 14, + "execution_count": 9, "metadata": {}, "output_type": "execute_result" } @@ -302,7 +305,7 @@ }, { "cell_type": "code", - "execution_count": 15, + "execution_count": 10, "metadata": {}, "outputs": [ { @@ -336,7 +339,7 @@ "dtype: Float64" ] }, - "execution_count": 15, + "execution_count": 10, "metadata": {}, "output_type": "execute_result" } @@ -361,21 +364,21 @@ }, { "cell_type": "code", - "execution_count": 16, + "execution_count": 11, "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "78 POLYGON ((-95.97154 44.6306, -95.97919 44.6305...\n", - "130 POLYGON ((-95.0933 41.77694, -95.09331 41.7764...\n", - "544 POLYGON ((-96.0664 40.43618, -96.06639 40.4352...\n", - "995 POLYGON ((-101.83583 47.49547, -101.83665 47.4...\n", - "1036 POLYGON ((-88.42474 37.15094, -88.42526 37.149...\n", + "103 POLYGON ((-100.6454 30.28817, -100.64936 30.28...\n", + "158 POLYGON ((-84.90231 33.67448, -84.90231 33.674...\n", + "155 POLYGON ((-92.69454 44.68874, -92.69432 44.688...\n", + "478 POLYGON ((-88.36906 32.74258, -88.3692 32.7414...\n", + "305 POLYGON ((-98.43923 46.89264, -98.43924 46.892...\n", "Name: county_geom, dtype: geometry" ] }, - "execution_count": 16, + "execution_count": 11, "metadata": {}, "output_type": "execute_result" } @@ -394,21 +397,21 @@ }, { "cell_type": "code", - "execution_count": 17, + "execution_count": 12, "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "0 POLYGON ((-95.97154 44.6306, -95.97919 44.6305...\n", - "1 POLYGON ((-95.0933 41.77694, -95.09331 41.7764...\n", - "2 POLYGON ((-96.0664 40.43618, -96.06639 40.4352...\n", - "3 POLYGON ((-101.83583 47.49547, -101.83665 47.4...\n", - "4 POLYGON ((-88.42474 37.15094, -88.42526 37.149...\n", + "0 POLYGON ((-100.6454 30.28817, -100.64936 30.28...\n", + "1 POLYGON ((-84.90231 33.67448, -84.90231 33.674...\n", + "2 POLYGON ((-92.69454 44.68874, -92.69432 44.688...\n", + "3 POLYGON ((-88.36906 32.74258, -88.3692 32.7414...\n", + "4 POLYGON ((-98.43923 46.89264, -98.43924 46.892...\n", "dtype: geometry" ] }, - "execution_count": 17, + "execution_count": 12, "metadata": {}, "output_type": "execute_result" } @@ -433,7 +436,7 @@ }, { "cell_type": "code", - "execution_count": 18, + "execution_count": 13, "metadata": { "tags": [ "raises-exception" @@ -442,14 +445,14 @@ "outputs": [ { "ename": "NotImplementedError", - "evalue": "GeoSeries.area is not supported. Use bigframes.bigquery.st_area(series), instead. Share your usecase with the BigQuery DataFrames team at the https://bit.ly/bigframes-feedback survey.You are currently running BigFrames version 1.38.0", + "evalue": "GeoSeries.area is not supported. Use bigframes.bigquery.st_area(series), instead. Share your usecase with the BigQuery DataFrames team at the https://bit.ly/bigframes-feedback survey.You are currently running BigFrames version 1.39.0", "output_type": "error", "traceback": [ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[0;31mNotImplementedError\u001b[0m Traceback (most recent call last)", - "Cell \u001b[0;32mIn[18], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[43mfive_geom\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43marea\u001b[49m\n", - "File \u001b[0;32m~/src1/python-bigquery-dataframes/bigframes/geopandas/geoseries.py:67\u001b[0m, in \u001b[0;36mGeoSeries.area\u001b[0;34m(self, crs)\u001b[0m\n\u001b[1;32m 48\u001b[0m \u001b[38;5;129m@property\u001b[39m\n\u001b[1;32m 49\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;21marea\u001b[39m(\u001b[38;5;28mself\u001b[39m, crs\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mNone\u001b[39;00m) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m bigframes\u001b[38;5;241m.\u001b[39mseries\u001b[38;5;241m.\u001b[39mSeries: \u001b[38;5;66;03m# type: ignore\u001b[39;00m\n\u001b[1;32m 50\u001b[0m \u001b[38;5;250m \u001b[39m\u001b[38;5;124;03m\"\"\"Returns a Series containing the area of each geometry in the GeoSeries\u001b[39;00m\n\u001b[1;32m 51\u001b[0m \u001b[38;5;124;03m expressed in the units of the CRS.\u001b[39;00m\n\u001b[1;32m 52\u001b[0m \n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 65\u001b[0m \u001b[38;5;124;03m GeoSeries.area is not supported. Use bigframes.bigquery.st_area(series), insetead.\u001b[39;00m\n\u001b[1;32m 66\u001b[0m \u001b[38;5;124;03m \"\"\"\u001b[39;00m\n\u001b[0;32m---> 67\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mNotImplementedError\u001b[39;00m(\n\u001b[1;32m 68\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mGeoSeries.area is not supported. Use bigframes.bigquery.st_area(series), instead. \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mconstants\u001b[38;5;241m.\u001b[39mFEEDBACK_LINK\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 69\u001b[0m )\n", - "\u001b[0;31mNotImplementedError\u001b[0m: GeoSeries.area is not supported. Use bigframes.bigquery.st_area(series), instead. Share your usecase with the BigQuery DataFrames team at the https://bit.ly/bigframes-feedback survey.You are currently running BigFrames version 1.38.0" + "Cell \u001b[0;32mIn[13], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[43mfive_geom\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43marea\u001b[49m\n", + "File \u001b[0;32m~/src1/python-bigquery-dataframes/bigframes/geopandas/geoseries.py:67\u001b[0m, in \u001b[0;36mGeoSeries.area\u001b[0;34m(self, crs)\u001b[0m\n\u001b[1;32m 48\u001b[0m \u001b[38;5;129m@property\u001b[39m\n\u001b[1;32m 49\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;21marea\u001b[39m(\u001b[38;5;28mself\u001b[39m, crs\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mNone\u001b[39;00m) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m bigframes\u001b[38;5;241m.\u001b[39mseries\u001b[38;5;241m.\u001b[39mSeries: \u001b[38;5;66;03m# type: ignore\u001b[39;00m\n\u001b[1;32m 50\u001b[0m \u001b[38;5;250m \u001b[39m\u001b[38;5;124;03m\"\"\"Returns a Series containing the area of each geometry in the GeoSeries\u001b[39;00m\n\u001b[1;32m 51\u001b[0m \u001b[38;5;124;03m expressed in the units of the CRS.\u001b[39;00m\n\u001b[1;32m 52\u001b[0m \n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 65\u001b[0m \u001b[38;5;124;03m GeoSeries.area is not supported. Use bigframes.bigquery.st_area(series), instead.\u001b[39;00m\n\u001b[1;32m 66\u001b[0m \u001b[38;5;124;03m \"\"\"\u001b[39;00m\n\u001b[0;32m---> 67\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mNotImplementedError\u001b[39;00m(\n\u001b[1;32m 68\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mGeoSeries.area is not supported. Use bigframes.bigquery.st_area(series), instead. \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mconstants\u001b[38;5;241m.\u001b[39mFEEDBACK_LINK\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 69\u001b[0m )\n", + "\u001b[0;31mNotImplementedError\u001b[0m: GeoSeries.area is not supported. Use bigframes.bigquery.st_area(series), instead. Share your usecase with the BigQuery DataFrames team at the https://bit.ly/bigframes-feedback survey.You are currently running BigFrames version 1.39.0" ] } ], @@ -461,12 +464,12 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### 3. Use `bigframes.bigquery.st_area` to retirive the `area` in square meters instead. See: https://cloud.google.com/bigquery/docs/reference/standard-sql/geography_functions#st_area" + "### 3. Use `bigframes.bigquery.st_area` to retrieve the `area` in square meters instead. See: https://cloud.google.com/bigquery/docs/reference/standard-sql/geography_functions#st_area" ] }, { "cell_type": "code", - "execution_count": 19, + "execution_count": 14, "metadata": {}, "outputs": [], "source": [ @@ -475,21 +478,21 @@ }, { "cell_type": "code", - "execution_count": 20, + "execution_count": 15, "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "0 1865212769.084914\n", - "1 1146753653.723439\n", - "2 1059653048.84506\n", - "3 2873655557.502374\n", - "4 886267772.361455\n", + "0 5496691687.760967\n", + "1 520963166.881344\n", + "2 1531842088.681163\n", + "3 2366936139.372852\n", + "4 3909215045.354443\n", "dtype: Float64" ] }, - "execution_count": 20, + "execution_count": 15, "metadata": {}, "output_type": "execute_result" } @@ -515,21 +518,21 @@ }, { "cell_type": "code", - "execution_count": 21, + "execution_count": 16, "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "0 POINT (-86.87338 38.37334)\n", - "1 POINT (-118.48037 46.25461)\n", - "2 POINT (-92.5617 32.30429)\n", - "3 POINT (-83.46189 39.55525)\n", - "4 POINT (-119.46779 47.21363)\n", + "0 POINT (-84.86717 33.92103)\n", + "1 POINT (-82.47974 35.33641)\n", + "2 POINT (-91.19496 39.98605)\n", + "3 POINT (-75.50298 39.09709)\n", + "4 POINT (-92.56434 39.8298)\n", "dtype: geometry" ] }, - "execution_count": 21, + "execution_count": 16, "metadata": {}, "output_type": "execute_result" } @@ -554,21 +557,21 @@ }, { "cell_type": "code", - "execution_count": 22, + "execution_count": 17, "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "0 POINT(-86.8733845 38.3733441)\n", - "1 POINT(-118.4803697 46.2546057)\n", - "2 POINT(-92.5616997 32.3042901)\n", - "3 POINT(-83.4618927 39.5552462)\n", - "4 POINT(-119.467788 47.2136328)\n", + "0 POINT(-84.8671693 33.9210299)\n", + "1 POINT(-82.4797406 35.3364146)\n", + "2 POINT(-91.1949607 39.9860525)\n", + "3 POINT(-75.5029819 39.0970884)\n", + "4 POINT(-92.56434 39.829795)\n", "dtype: string" ] }, - "execution_count": 22, + "execution_count": 17, "metadata": {}, "output_type": "execute_result" } @@ -594,21 +597,21 @@ }, { "cell_type": "code", - "execution_count": 23, + "execution_count": 18, "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "0 POINT (-86.87338 38.37334)\n", - "1 POINT (-118.48037 46.25461)\n", - "2 POINT (-92.5617 32.30429)\n", - "3 POINT (-83.46189 39.55525)\n", - "4 POINT (-119.46779 47.21363)\n", + "0 POINT (-84.86717 33.92103)\n", + "1 POINT (-82.47974 35.33641)\n", + "2 POINT (-91.19496 39.98605)\n", + "3 POINT (-75.50298 39.09709)\n", + "4 POINT (-92.56434 39.8298)\n", "dtype: geometry" ] }, - "execution_count": 23, + "execution_count": 18, "metadata": {}, "output_type": "execute_result" } @@ -627,7 +630,7 @@ }, { "cell_type": "code", - "execution_count": 24, + "execution_count": 19, "metadata": {}, "outputs": [ { @@ -641,7 +644,7 @@ "dtype: geometry" ] }, - "execution_count": 24, + "execution_count": 19, "metadata": {}, "output_type": "execute_result" } @@ -662,7 +665,7 @@ }, { "cell_type": "code", - "execution_count": 25, + "execution_count": 20, "metadata": {}, "outputs": [ { @@ -676,7 +679,7 @@ "dtype: geometry" ] }, - "execution_count": 25, + "execution_count": 20, "metadata": {}, "output_type": "execute_result" } @@ -684,6 +687,145 @@ "source": [ "geom_obj.geo.boundary" ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Find the difference between two `GeoSeries` " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Note: GeoSeries.difference raises a `NotImplementedError`." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Reuse `wkts_from_geo` and `geom_obj`" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "metadata": {}, + "outputs": [ + { + "ename": "NotImplementedError", + "evalue": "GeoSeries.difference() is not supported. Use bigframes.bigquery.st_difference(series), instead. Share your usecase with the BigQuery DataFrames team at the https://bit.ly/bigframes-feedback survey.You are currently running BigFrames version 1.39.0", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mNotImplementedError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[0;32mIn[21], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[43mwkts_from_geo\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mdifference\u001b[49m\u001b[43m(\u001b[49m\u001b[43mgeom_obj\u001b[49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m~/src1/python-bigquery-dataframes/bigframes/geopandas/geoseries.py:117\u001b[0m, in \u001b[0;36mGeoSeries.difference\u001b[0;34m(self, other)\u001b[0m\n\u001b[1;32m 97\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;21mdifference\u001b[39m(\u001b[38;5;28mself\u001b[39m: GeoSeries, other: GeoSeries) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m GeoSeries: \u001b[38;5;66;03m# type: ignore\u001b[39;00m\n\u001b[1;32m 98\u001b[0m \u001b[38;5;250m \u001b[39m\u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m 99\u001b[0m \u001b[38;5;124;03m Returns a GeoSeries of the points in each aligned geometry that are not\u001b[39;00m\n\u001b[1;32m 100\u001b[0m \u001b[38;5;124;03m in other.\u001b[39;00m\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 115\u001b[0m \u001b[38;5;124;03m GeoSeries.difference is not supported. Use bigframes.bigquery.st_difference(series), instead.\u001b[39;00m\n\u001b[1;32m 116\u001b[0m \u001b[38;5;124;03m \"\"\"\u001b[39;00m\n\u001b[0;32m--> 117\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mNotImplementedError\u001b[39;00m(\n\u001b[1;32m 118\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mGeoSeries.difference() is not supported. Use bigframes.bigquery.st_difference(series), instead. \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mconstants\u001b[38;5;241m.\u001b[39mFEEDBACK_LINK\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 119\u001b[0m )\n", + "\u001b[0;31mNotImplementedError\u001b[0m: GeoSeries.difference() is not supported. Use bigframes.bigquery.st_difference(series), instead. Share your usecase with the BigQuery DataFrames team at the https://bit.ly/bigframes-feedback survey.You are currently running BigFrames version 1.39.0" + ] + } + ], + "source": [ + "wkts_from_geo.difference(geom_obj)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Use `BigQuery.st_difference()` to find the difference between two GeSeries. See, https://cloud.google.com/bigquery/docs/reference/standard-sql/geography_functions#st_difference" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0 POINT (-84.86717 33.92103)\n", + "1 POINT (-82.47974 35.33641)\n", + "2 GEOMETRYCOLLECTION EMPTY\n", + "3 POINT (-75.50298 39.09709)\n", + "4 POINT (-92.56434 39.8298)\n", + "dtype: geometry" + ] + }, + "execution_count": 22, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "bbq.st_difference(wkts_from_geo, geom_obj)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Find the difference between a `GeoSeries` and a single geometry shape." + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0 POINT (-84.86717 33.92103)\n", + "1 None\n", + "2 None\n", + "3 None\n", + "4 None\n", + "dtype: geometry" + ] + }, + "execution_count": 23, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "bbq.st_difference(wkts_from_geo, [Polygon([(0, 0), (10, 0), (10, 10), (0, 0)])])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Find the difference in GeoSeries with the same parts" + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0 GEOMETRYCOLLECTION EMPTY\n", + "1 GEOMETRYCOLLECTION EMPTY\n", + "2 GEOMETRYCOLLECTION EMPTY\n", + "3 GEOMETRYCOLLECTION EMPTY\n", + "4 GEOMETRYCOLLECTION EMPTY\n", + "dtype: geometry" + ] + }, + "execution_count": 25, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "bbq.st_difference(geom_obj, geom_obj)" + ] } ], "metadata": { diff --git a/tests/system/small/bigquery/test_geo.py b/tests/system/small/bigquery/test_geo.py index 0d0bba302b..2cca8c933a 100644 --- a/tests/system/small/bigquery/test_geo.py +++ b/tests/system/small/bigquery/test_geo.py @@ -54,23 +54,42 @@ def test_geo_st_area(): def test_geo_st_difference(): - data1 = [Polygon([(0, 0), (10, 0), (10, 10), (0, 0)])] + data1 = [ + Polygon([(0, 0), (10, 0), (10, 10), (0, 0)]), + Polygon([(0.001, 0.001), (0.002, 0.001), (0.002, 0.002)]), + Point(0, 1), + ] - data2 = [Polygon([(4, 2), (6, 2), (8, 6), (4, 2)])] + data2 = [ + Polygon([(0.001, 0.001), (0.002, 0.001), (0.002, 0.002)]), + Polygon([(4, 2), (6, 2), (8, 6), (4, 2)]), + LineString([(2, 0), (0, 2)]), + ] - geopd_s1 = geopandas.GeoSeries(data=data1) - geopd_s2 = geopandas.GeoSeries(data=data2) geobf_s1 = bigframes.geopandas.GeoSeries(data=data1) geobf_s2 = bigframes.geopandas.GeoSeries(data=data2) + geobf_s_result = bbq.st_difference(geobf_s1, geobf_s2).to_pandas() - geopd_s_result = geopd_s1.difference(geopd_s2).round(-3) - geobf_s_result = bbq.st_difference(geobf_s1, geobf_s2).to_pandas().round(-3) - - pd.testing.assert_series_equal( - geopd_s_result, - geobf_s_result, - check_series_type=False, - check_dtype=False, - check_index_type=False, - check_exact=False, + assert geobf_s_result.dtype == "geometry" + assert geobf_s_result.iloc[1] == Polygon( + [(0.001, 0.001), (0.002, 0.001), (0.002, 0.002), (0.001, 0.001)] ) + + +def test_geo_st_difference_with_single_geometry_object(): + data1 = [ + Polygon([(0, 0), (10, 0), (10, 10), (0, 0)]), + Polygon([(0.001, 0.001), (0.002, 0.001), (0.002, 0.002)]), + Point(0, 1), + ] + + geobf_s1 = bigframes.geopandas.GeoSeries(data=data1) + geobf_s_result = bbq.st_difference( + geobf_s1, + bigframes.series.Series( + [Polygon([(0.001, 0.001), (0.002, 0.001), (0.002, 0.002)])] + ), + ).to_pandas() + + assert geobf_s_result.dtype == "geometry" + assert geobf_s_result.iloc[1] is None diff --git a/third_party/bigframes_vendored/geopandas/geoseries.py b/third_party/bigframes_vendored/geopandas/geoseries.py index 71c218cc14..a2e7b74059 100644 --- a/third_party/bigframes_vendored/geopandas/geoseries.py +++ b/third_party/bigframes_vendored/geopandas/geoseries.py @@ -239,104 +239,3 @@ def to_wkt(self) -> bigframes.series.Series: WKT representations of the geometries. """ raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE) - - def difference(self) -> bigframes.series.Series: - """ - Returns a GeoSeries of the points in each aligned geometry that are not - in other. - - The operation works on a 1-to-1 row-wise manner - - **Examples:** - - >>> import bigframes as bpd - >>> import bigframes.geopandas - >>> from shapely.geometry import Polygon, LineString, Point - >>> bpd.options.display.progress_bar = None - >>> s = geopandas.GeoSeries( - ... [ - ... Polygon([(0, 0), (2, 2), (0, 2)]), - ... Polygon([(0, 0), (2, 2), (0, 2)]), - ... LineString([(0, 0), (2, 2)]), - ... LineString([(2, 0), (0, 2)]), - ... Point(0, 1), - ... ], - ... ) - >>> s2 = geopandas.GeoSeries( - ... [ - ... Polygon([(0, 0), (1, 1), (0, 1)]), - ... LineString([(1, 0), (1, 3)]), - ... LineString([(2, 0), (0, 2)]), - ... Point(1, 1), - ... Point(0, 1), - ... ], - ... index=range(1, 6), - ... ) - - >>> s - 0 POLYGON ((0 0, 2 2, 0 2, 0 0)) - 1 POLYGON ((0 0, 2 2, 0 2, 0 0)) - 2 LINESTRING (0 0, 2 2) - 3 LINESTRING (2 0, 0 2) - 4 POINT (0 1) - dtype: geometry - - >>> s2 - 1 POLYGON ((0 0, 1 1, 0 1, 0 0)) - 2 LINESTRING (1 0, 1 3) - 3 LINESTRING (2 0, 0 2) - 4 POINT (1 1) - 5 POINT (0 1) - dtype: geometry - - We can do difference of each geometry and a single shapely geometry: - - >>> s.difference(Polygon([(0, 0), (1, 1), (0, 1)])) - 0 POLYGON ((0.99954 1, 2 2, 0 2, 0 1, 0.99954 1)) - 1 None - 2 None - 3 None - 4 None - dtype: geometry - - We can also check two GeoSeries against each other, row by row. The - GeoSeries above have different indices. We can either align both GeoSeries - based on index values and compare elements with the same index using - align=True or ignore index and compare elements based on their matching - order using align=False: - - >>> s.difference(s2) - 0 None - 1 POLYGON ((0.99954 1, 2 2, 0 2, 0 1, 0.99954 1)) - 2 LINESTRING (0 0, 1 1.00046, 2 2) - 3 GEOMETRYCOLLECTION EMPTY - 4 POINT (0 1) - 5 None - dtype: geometry - - 0 POLYGON ((0.99954 1, 2 2, 0 2, 0 1, 0.99954 1)) - 1 POLYGON ((0 0, 1 1.00046, 2 2, 1 2.0003, 0 2, ... - 2 LINESTRING (0 0, 1 1.00046, 2 2) - 3 LINESTRING (2 0, 0 2) - 4 GEOMETRYCOLLECTION EMPTY - dtype: geometry - - - >>> s.difference(s2, align=True) - - >>> s.difference(s2, align=False) - - - Args: - other (Geoseries or geometric object): - The Geoseries (elementwise) or geometric object to find the difference to. - - align (bool or None (default None)): - The index for the GeoSeries. - - Returns: - bigframes.geopandas.GeoSeries: - A GeoSeries of the points in each aligned geometry that are not - in other. - """ - raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE) From 9a66a6f5d5b1c8ae3ea147275f38d9f27e1052e6 Mon Sep 17 00:00:00 2001 From: Arwa Date: Wed, 12 Mar 2025 13:34:18 -0500 Subject: [PATCH 04/10] update method and tests --- tests/system/small/bigquery/test_geo.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/system/small/bigquery/test_geo.py b/tests/system/small/bigquery/test_geo.py index 2cca8c933a..3058d24bb1 100644 --- a/tests/system/small/bigquery/test_geo.py +++ b/tests/system/small/bigquery/test_geo.py @@ -86,7 +86,7 @@ def test_geo_st_difference_with_single_geometry_object(): geobf_s1 = bigframes.geopandas.GeoSeries(data=data1) geobf_s_result = bbq.st_difference( geobf_s1, - bigframes.series.Series( + bigframes.geopandas.GeoSeries( [Polygon([(0.001, 0.001), (0.002, 0.001), (0.002, 0.002)])] ), ).to_pandas() From c8979ee57653e931a4cb9b6b8fbedf417d396a2e Mon Sep 17 00:00:00 2001 From: Arwa Date: Thu, 13 Mar 2025 15:58:14 -0500 Subject: [PATCH 05/10] update test to cover different inputs --- tests/system/small/bigquery/test_geo.py | 75 +++++++++++++++++++++---- 1 file changed, 65 insertions(+), 10 deletions(-) diff --git a/tests/system/small/bigquery/test_geo.py b/tests/system/small/bigquery/test_geo.py index 3058d24bb1..2d3e843181 100644 --- a/tests/system/small/bigquery/test_geo.py +++ b/tests/system/small/bigquery/test_geo.py @@ -14,7 +14,12 @@ import geopandas # type: ignore import pandas as pd -from shapely.geometry import LineString, Point, Polygon # type: ignore +from shapely.geometry import ( # type: ignore + GeometryCollection, + LineString, + Point, + Polygon, +) import bigframes.bigquery as bbq import bigframes.geopandas @@ -53,16 +58,20 @@ def test_geo_st_area(): ) +# The tests for st_difference do not check against geopandas result because +# geopandas BigQuery's geography functions, like st_difference, interpret the +# geometry data as a point set on the Earth's surface while geopandas interprets +# the geometry data as a point set on planer surface def test_geo_st_difference(): data1 = [ Polygon([(0, 0), (10, 0), (10, 10), (0, 0)]), - Polygon([(0.001, 0.001), (0.002, 0.001), (0.002, 0.002)]), + Polygon([(0, 0), (1, 1), (0, 1), (0, 0)]), Point(0, 1), ] data2 = [ - Polygon([(0.001, 0.001), (0.002, 0.001), (0.002, 0.002)]), - Polygon([(4, 2), (6, 2), (8, 6), (4, 2)]), + Polygon([(0, 0), (10, 0), (10, 10), (0, 0)]), + Polygon([(0, 0), (1, 1), (0, 1), (0, 0)]), LineString([(2, 0), (0, 2)]), ] @@ -70,16 +79,26 @@ def test_geo_st_difference(): geobf_s2 = bigframes.geopandas.GeoSeries(data=data2) geobf_s_result = bbq.st_difference(geobf_s1, geobf_s2).to_pandas() + expected = bigframes.series.Series( + [ + GeometryCollection([]), + GeometryCollection([]), + Point(0, 1), + ], + index=[0, 1, 2], + dtype=geopandas.array.GeometryDtype(), + ).to_pandas() + assert geobf_s_result.dtype == "geometry" - assert geobf_s_result.iloc[1] == Polygon( - [(0.001, 0.001), (0.002, 0.001), (0.002, 0.002), (0.001, 0.001)] - ) + assert expected.iloc[0].equals(geobf_s_result.iloc[0]) + assert expected.iloc[1].equals(geobf_s_result.iloc[1]) + assert expected.iloc[2].equals(geobf_s_result.iloc[2]) def test_geo_st_difference_with_single_geometry_object(): data1 = [ Polygon([(0, 0), (10, 0), (10, 10), (0, 0)]), - Polygon([(0.001, 0.001), (0.002, 0.001), (0.002, 0.002)]), + Polygon([(0, 0), (1, 1), (0, 1)]), Point(0, 1), ] @@ -87,9 +106,45 @@ def test_geo_st_difference_with_single_geometry_object(): geobf_s_result = bbq.st_difference( geobf_s1, bigframes.geopandas.GeoSeries( - [Polygon([(0.001, 0.001), (0.002, 0.001), (0.002, 0.002)])] + [ + Polygon([(0, 0), (1, 1), (0, 1)]), + ] ), ).to_pandas() + expected = bigframes.series.Series( + [ + Polygon([(1, 1), (0, 0), (10, 0), (10, 10), (0.98496, 1), (1, 1)]), + None, + None, + ], + index=[0, 1, 2], + dtype=geopandas.array.GeometryDtype(), + ).to_pandas() + + assert geobf_s_result.dtype == "geometry" + assert expected.iloc[1] == geobf_s_result.iloc[1] + assert expected.iloc[2] == geobf_s_result.iloc[2] + assert expected.iloc[0].equals(geobf_s_result.iloc[0]) + + +def test_geo_st_difference_with_similar_geometry_objects(): + data1 = [ + Polygon([(0, 0), (10, 0), (10, 10), (0, 0)]), + Polygon([(0, 0), (1, 1), (0, 1)]), + Point(0, 1), + ] + + geobf_s1 = bigframes.geopandas.GeoSeries(data=data1) + geobf_s_result = bbq.st_difference(geobf_s1, geobf_s1).to_pandas() + + expected = bigframes.series.Series( + [GeometryCollection([]), GeometryCollection([]), GeometryCollection([])], + index=[0, 1, 2], + dtype=geopandas.array.GeometryDtype(), + ).to_pandas() + assert geobf_s_result.dtype == "geometry" - assert geobf_s_result.iloc[1] is None + assert expected.iloc[0].equals(geobf_s_result.iloc[0]) + assert expected.iloc[1].equals(geobf_s_result.iloc[1]) + assert expected.iloc[2].equals(geobf_s_result.iloc[2]) From 9fa9384cb20fc0f7d1f309b2ab6b93165f63eaa5 Mon Sep 17 00:00:00 2001 From: Arwa Date: Thu, 13 Mar 2025 16:08:42 -0500 Subject: [PATCH 06/10] update test assertion --- tests/system/small/bigquery/test_geo.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/system/small/bigquery/test_geo.py b/tests/system/small/bigquery/test_geo.py index 2d3e843181..9da83a9e5b 100644 --- a/tests/system/small/bigquery/test_geo.py +++ b/tests/system/small/bigquery/test_geo.py @@ -123,9 +123,9 @@ def test_geo_st_difference_with_single_geometry_object(): ).to_pandas() assert geobf_s_result.dtype == "geometry" + assert expected.iloc[0].equals(geobf_s_result.iloc[0]) assert expected.iloc[1] == geobf_s_result.iloc[1] assert expected.iloc[2] == geobf_s_result.iloc[2] - assert expected.iloc[0].equals(geobf_s_result.iloc[0]) def test_geo_st_difference_with_similar_geometry_objects(): From 51773300bfb9a1d223a8459ca7d0fcc38668c20d Mon Sep 17 00:00:00 2001 From: Arwa Date: Tue, 18 Mar 2025 09:50:34 -0500 Subject: [PATCH 07/10] update testing with single geometry object and add notebook tag to ignore the exception --- notebooks/geo/geoseries.ipynb | 138 ++++++++++++------------ tests/system/small/bigquery/test_geo.py | 11 +- 2 files changed, 77 insertions(+), 72 deletions(-) diff --git a/notebooks/geo/geoseries.ipynb b/notebooks/geo/geoseries.ipynb index 6f411e44a5..0c1371ee0a 100644 --- a/notebooks/geo/geoseries.ipynb +++ b/notebooks/geo/geoseries.ipynb @@ -106,11 +106,11 @@ { "data": { "text/plain": [ - "406 POINT (-84.86717 33.92103)\n", - "926 POINT (-82.47974 35.33641)\n", - "37 POINT (-91.19496 39.98605)\n", - "940 POINT (-75.50298 39.09709)\n", - "996 POINT (-92.56434 39.8298)\n", + "163 POINT (-83.11922 35.9162)\n", + "143 POINT (-84.06423 38.51347)\n", + "96 POINT (-96.9712 28.79637)\n", + "310 POINT (-90.45393 37.11107)\n", + "38 POINT (-106.31668 38.73822)\n", "Name: int_point_geom, dtype: geometry" ] }, @@ -139,11 +139,11 @@ { "data": { "text/plain": [ - "0 POINT (-84.86717 33.92103)\n", - "1 POINT (-82.47974 35.33641)\n", - "2 POINT (-91.19496 39.98605)\n", - "3 POINT (-75.50298 39.09709)\n", - "4 POINT (-92.56434 39.8298)\n", + "0 POINT (-83.11922 35.9162)\n", + "1 POINT (-84.06423 38.51347)\n", + "2 POINT (-96.9712 28.79637)\n", + "3 POINT (-90.45393 37.11107)\n", + "4 POINT (-106.31668 38.73822)\n", "dtype: geometry" ] }, @@ -188,11 +188,11 @@ { "data": { "text/plain": [ - "0 -84.867169\n", - "1 -82.479741\n", - "2 -91.194961\n", - "3 -75.502982\n", - "4 -92.56434\n", + "0 -83.119224\n", + "1 -84.06423\n", + "2 -96.971198\n", + "3 -90.453931\n", + "4 -106.316683\n", "dtype: Float64" ] }, @@ -220,11 +220,11 @@ { "data": { "text/plain": [ - "0 33.92103\n", - "1 35.336415\n", - "2 39.986053\n", - "3 39.097088\n", - "4 39.829795\n", + "0 35.916198\n", + "1 38.513473\n", + "2 28.79637\n", + "3 37.111074\n", + "4 38.738223\n", "dtype: Float64" ] }, @@ -370,11 +370,11 @@ { "data": { "text/plain": [ - "103 POLYGON ((-100.6454 30.28817, -100.64936 30.28...\n", - "158 POLYGON ((-84.90231 33.67448, -84.90231 33.674...\n", - "155 POLYGON ((-92.69454 44.68874, -92.69432 44.688...\n", - "478 POLYGON ((-88.36906 32.74258, -88.3692 32.7414...\n", - "305 POLYGON ((-98.43923 46.89264, -98.43924 46.892...\n", + "115 POLYGON ((-86.69516 40.3012, -86.69515 40.3011...\n", + "28 POLYGON ((-94.76099 39.04366, -94.75875 39.043...\n", + "173 POLYGON ((-76.98439 40.51456, -76.98403 40.513...\n", + "52 POLYGON ((-90.87722 35.44364, -90.87858 35.443...\n", + "3 POLYGON ((-102.57685 39.04068, -102.57696 39.0...\n", "Name: county_geom, dtype: geometry" ] }, @@ -403,11 +403,11 @@ { "data": { "text/plain": [ - "0 POLYGON ((-100.6454 30.28817, -100.64936 30.28...\n", - "1 POLYGON ((-84.90231 33.67448, -84.90231 33.674...\n", - "2 POLYGON ((-92.69454 44.68874, -92.69432 44.688...\n", - "3 POLYGON ((-88.36906 32.74258, -88.3692 32.7414...\n", - "4 POLYGON ((-98.43923 46.89264, -98.43924 46.892...\n", + "0 POLYGON ((-86.69516 40.3012, -86.69515 40.3011...\n", + "1 POLYGON ((-94.76099 39.04366, -94.75875 39.043...\n", + "2 POLYGON ((-76.98439 40.51456, -76.98403 40.513...\n", + "3 POLYGON ((-90.87722 35.44364, -90.87858 35.443...\n", + "4 POLYGON ((-102.57685 39.04068, -102.57696 39.0...\n", "dtype: geometry" ] }, @@ -445,14 +445,14 @@ "outputs": [ { "ename": "NotImplementedError", - "evalue": "GeoSeries.area is not supported. Use bigframes.bigquery.st_area(series), instead. Share your usecase with the BigQuery DataFrames team at the https://bit.ly/bigframes-feedback survey.You are currently running BigFrames version 1.39.0", + "evalue": "GeoSeries.area is not supported. Use bigframes.bigquery.st_area(series), instead. Share your usecase with the BigQuery DataFrames team at the https://bit.ly/bigframes-feedback survey. You are currently running BigFrames version 1.40.0.", "output_type": "error", "traceback": [ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[0;31mNotImplementedError\u001b[0m Traceback (most recent call last)", "Cell \u001b[0;32mIn[13], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[43mfive_geom\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43marea\u001b[49m\n", "File \u001b[0;32m~/src1/python-bigquery-dataframes/bigframes/geopandas/geoseries.py:67\u001b[0m, in \u001b[0;36mGeoSeries.area\u001b[0;34m(self, crs)\u001b[0m\n\u001b[1;32m 48\u001b[0m \u001b[38;5;129m@property\u001b[39m\n\u001b[1;32m 49\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;21marea\u001b[39m(\u001b[38;5;28mself\u001b[39m, crs\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mNone\u001b[39;00m) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m bigframes\u001b[38;5;241m.\u001b[39mseries\u001b[38;5;241m.\u001b[39mSeries: \u001b[38;5;66;03m# type: ignore\u001b[39;00m\n\u001b[1;32m 50\u001b[0m \u001b[38;5;250m \u001b[39m\u001b[38;5;124;03m\"\"\"Returns a Series containing the area of each geometry in the GeoSeries\u001b[39;00m\n\u001b[1;32m 51\u001b[0m \u001b[38;5;124;03m expressed in the units of the CRS.\u001b[39;00m\n\u001b[1;32m 52\u001b[0m \n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 65\u001b[0m \u001b[38;5;124;03m GeoSeries.area is not supported. Use bigframes.bigquery.st_area(series), instead.\u001b[39;00m\n\u001b[1;32m 66\u001b[0m \u001b[38;5;124;03m \"\"\"\u001b[39;00m\n\u001b[0;32m---> 67\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mNotImplementedError\u001b[39;00m(\n\u001b[1;32m 68\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mGeoSeries.area is not supported. Use bigframes.bigquery.st_area(series), instead. \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mconstants\u001b[38;5;241m.\u001b[39mFEEDBACK_LINK\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 69\u001b[0m )\n", - "\u001b[0;31mNotImplementedError\u001b[0m: GeoSeries.area is not supported. Use bigframes.bigquery.st_area(series), instead. Share your usecase with the BigQuery DataFrames team at the https://bit.ly/bigframes-feedback survey.You are currently running BigFrames version 1.39.0" + "\u001b[0;31mNotImplementedError\u001b[0m: GeoSeries.area is not supported. Use bigframes.bigquery.st_area(series), instead. Share your usecase with the BigQuery DataFrames team at the https://bit.ly/bigframes-feedback survey. You are currently running BigFrames version 1.40.0." ] } ], @@ -484,11 +484,11 @@ { "data": { "text/plain": [ - "0 5496691687.760967\n", - "1 520963166.881344\n", - "2 1531842088.681163\n", - "3 2366936139.372852\n", - "4 3909215045.354443\n", + "0 1048396641.043185\n", + "1 404195271.072038\n", + "2 1443999689.714014\n", + "3 1611776377.004328\n", + "4 4610240226.664809\n", "dtype: Float64" ] }, @@ -524,11 +524,11 @@ { "data": { "text/plain": [ - "0 POINT (-84.86717 33.92103)\n", - "1 POINT (-82.47974 35.33641)\n", - "2 POINT (-91.19496 39.98605)\n", - "3 POINT (-75.50298 39.09709)\n", - "4 POINT (-92.56434 39.8298)\n", + "0 POINT (-83.11922 35.9162)\n", + "1 POINT (-84.06423 38.51347)\n", + "2 POINT (-96.9712 28.79637)\n", + "3 POINT (-90.45393 37.11107)\n", + "4 POINT (-106.31668 38.73822)\n", "dtype: geometry" ] }, @@ -563,11 +563,11 @@ { "data": { "text/plain": [ - "0 POINT(-84.8671693 33.9210299)\n", - "1 POINT(-82.4797406 35.3364146)\n", - "2 POINT(-91.1949607 39.9860525)\n", - "3 POINT(-75.5029819 39.0970884)\n", - "4 POINT(-92.56434 39.829795)\n", + "0 POINT(-83.1192242 35.916198)\n", + "1 POINT(-84.06423 38.5134727)\n", + "2 POINT(-96.9711977 28.7963699)\n", + "3 POINT(-90.4539307 37.1110737)\n", + "4 POINT(-106.3166833 38.7382234)\n", "dtype: string" ] }, @@ -603,11 +603,11 @@ { "data": { "text/plain": [ - "0 POINT (-84.86717 33.92103)\n", - "1 POINT (-82.47974 35.33641)\n", - "2 POINT (-91.19496 39.98605)\n", - "3 POINT (-75.50298 39.09709)\n", - "4 POINT (-92.56434 39.8298)\n", + "0 POINT (-83.11922 35.9162)\n", + "1 POINT (-84.06423 38.51347)\n", + "2 POINT (-96.9712 28.79637)\n", + "3 POINT (-90.45393 37.11107)\n", + "4 POINT (-106.31668 38.73822)\n", "dtype: geometry" ] }, @@ -712,18 +712,22 @@ { "cell_type": "code", "execution_count": 21, - "metadata": {}, + "metadata": { + "tags": [ + "raises-exception" + ] + }, "outputs": [ { "ename": "NotImplementedError", - "evalue": "GeoSeries.difference() is not supported. Use bigframes.bigquery.st_difference(series), instead. Share your usecase with the BigQuery DataFrames team at the https://bit.ly/bigframes-feedback survey.You are currently running BigFrames version 1.39.0", + "evalue": "GeoSeries.difference() is not supported. Use bigframes.bigquery.st_difference(series), instead. Share your usecase with the BigQuery DataFrames team at the https://bit.ly/bigframes-feedback survey. You are currently running BigFrames version 1.40.0.", "output_type": "error", "traceback": [ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[0;31mNotImplementedError\u001b[0m Traceback (most recent call last)", "Cell \u001b[0;32mIn[21], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[43mwkts_from_geo\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mdifference\u001b[49m\u001b[43m(\u001b[49m\u001b[43mgeom_obj\u001b[49m\u001b[43m)\u001b[49m\n", "File \u001b[0;32m~/src1/python-bigquery-dataframes/bigframes/geopandas/geoseries.py:117\u001b[0m, in \u001b[0;36mGeoSeries.difference\u001b[0;34m(self, other)\u001b[0m\n\u001b[1;32m 97\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;21mdifference\u001b[39m(\u001b[38;5;28mself\u001b[39m: GeoSeries, other: GeoSeries) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m GeoSeries: \u001b[38;5;66;03m# type: ignore\u001b[39;00m\n\u001b[1;32m 98\u001b[0m \u001b[38;5;250m \u001b[39m\u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m 99\u001b[0m \u001b[38;5;124;03m Returns a GeoSeries of the points in each aligned geometry that are not\u001b[39;00m\n\u001b[1;32m 100\u001b[0m \u001b[38;5;124;03m in other.\u001b[39;00m\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 115\u001b[0m \u001b[38;5;124;03m GeoSeries.difference is not supported. Use bigframes.bigquery.st_difference(series), instead.\u001b[39;00m\n\u001b[1;32m 116\u001b[0m \u001b[38;5;124;03m \"\"\"\u001b[39;00m\n\u001b[0;32m--> 117\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mNotImplementedError\u001b[39;00m(\n\u001b[1;32m 118\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mGeoSeries.difference() is not supported. Use bigframes.bigquery.st_difference(series), instead. \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mconstants\u001b[38;5;241m.\u001b[39mFEEDBACK_LINK\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 119\u001b[0m )\n", - "\u001b[0;31mNotImplementedError\u001b[0m: GeoSeries.difference() is not supported. Use bigframes.bigquery.st_difference(series), instead. Share your usecase with the BigQuery DataFrames team at the https://bit.ly/bigframes-feedback survey.You are currently running BigFrames version 1.39.0" + "\u001b[0;31mNotImplementedError\u001b[0m: GeoSeries.difference() is not supported. Use bigframes.bigquery.st_difference(series), instead. Share your usecase with the BigQuery DataFrames team at the https://bit.ly/bigframes-feedback survey. You are currently running BigFrames version 1.40.0." ] } ], @@ -746,11 +750,11 @@ { "data": { "text/plain": [ - "0 POINT (-84.86717 33.92103)\n", - "1 POINT (-82.47974 35.33641)\n", - "2 GEOMETRYCOLLECTION EMPTY\n", - "3 POINT (-75.50298 39.09709)\n", - "4 POINT (-92.56434 39.8298)\n", + "0 POINT (-83.11922 35.9162)\n", + "1 POINT (-84.06423 38.51347)\n", + "2 GEOMETRYCOLLECTION EMPTY\n", + "3 POINT (-90.45393 37.11107)\n", + "4 POINT (-106.31668 38.73822)\n", "dtype: geometry" ] }, @@ -778,11 +782,11 @@ { "data": { "text/plain": [ - "0 POINT (-84.86717 33.92103)\n", - "1 None\n", - "2 None\n", - "3 None\n", - "4 None\n", + "0 POINT (-83.11922 35.9162)\n", + "1 None\n", + "2 None\n", + "3 None\n", + "4 None\n", "dtype: geometry" ] }, @@ -804,7 +808,7 @@ }, { "cell_type": "code", - "execution_count": 25, + "execution_count": 24, "metadata": {}, "outputs": [ { @@ -818,7 +822,7 @@ "dtype: geometry" ] }, - "execution_count": 25, + "execution_count": 24, "metadata": {}, "output_type": "execute_result" } diff --git a/tests/system/small/bigquery/test_geo.py b/tests/system/small/bigquery/test_geo.py index 9da83a9e5b..657a2991c4 100644 --- a/tests/system/small/bigquery/test_geo.py +++ b/tests/system/small/bigquery/test_geo.py @@ -98,7 +98,7 @@ def test_geo_st_difference(): def test_geo_st_difference_with_single_geometry_object(): data1 = [ Polygon([(0, 0), (10, 0), (10, 10), (0, 0)]), - Polygon([(0, 0), (1, 1), (0, 1)]), + Polygon([(4, 2), (6, 2), (8, 6), (4, 2)]), Point(0, 1), ] @@ -107,15 +107,16 @@ def test_geo_st_difference_with_single_geometry_object(): geobf_s1, bigframes.geopandas.GeoSeries( [ - Polygon([(0, 0), (1, 1), (0, 1)]), + Polygon([(0, 0), (10, 0), (10, 10), (0, 0)]), + Polygon([(1, 0), (0, 5), (0, 0), (1, 0)]), ] ), ).to_pandas() expected = bigframes.series.Series( [ - Polygon([(1, 1), (0, 0), (10, 0), (10, 10), (0.98496, 1), (1, 1)]), - None, + GeometryCollection([]), + Polygon([(4, 2), (6, 2), (8, 6), (4, 2)]), None, ], index=[0, 1, 2], @@ -123,7 +124,7 @@ def test_geo_st_difference_with_single_geometry_object(): ).to_pandas() assert geobf_s_result.dtype == "geometry" - assert expected.iloc[0].equals(geobf_s_result.iloc[0]) + assert (expected.iloc[0]).equals(geobf_s_result.iloc[0]) assert expected.iloc[1] == geobf_s_result.iloc[1] assert expected.iloc[2] == geobf_s_result.iloc[2] From 1333425fd89f3640d000894100adefd2276bfa12 Mon Sep 17 00:00:00 2001 From: Arwa Date: Tue, 18 Mar 2025 10:05:15 -0500 Subject: [PATCH 08/10] fix docstrings --- bigframes/bigquery/_operations/geo.py | 5 ++--- bigframes/geopandas/geoseries.py | 8 +++++--- 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/bigframes/bigquery/_operations/geo.py b/bigframes/bigquery/_operations/geo.py index ef86f66ff3..a41c33f67d 100644 --- a/bigframes/bigquery/_operations/geo.py +++ b/bigframes/bigquery/_operations/geo.py @@ -166,7 +166,6 @@ def st_difference( 5 None dtype: geometry - We can also check difference of single shapely geometries: >>> sbq1 = bigframes.geopandas.GeoSeries( @@ -192,7 +191,7 @@ def st_difference( 0 POLYGON ((0 0, 10 0, 10 10, 0 0), (8 6, 6 2, 4... dtype: geometry - Additionally, qe can do difference of a GeoSeries against a single shapely geometry: + Additionally, we can check difference of a GeoSeries against a single shapely geometry: >>> bbq.st_difference(s1, sbq2) 0 POLYGON ((0 0, 2 2, 0 2, 0 0)) @@ -204,7 +203,7 @@ def st_difference( Args: other (bigframes.series.Series or geometric object): - The Geoseries (elementwise) or geometric object to find the difference to. + The GeoSeries (elementwise) or geometric object to find the difference to. Returns: bigframes.series.Series: diff --git a/bigframes/geopandas/geoseries.py b/bigframes/geopandas/geoseries.py index 9c5f3d1f4b..1e296f0392 100644 --- a/bigframes/geopandas/geoseries.py +++ b/bigframes/geopandas/geoseries.py @@ -102,8 +102,9 @@ def difference(self: GeoSeries, other: GeoSeries) -> GeoSeries: # type: ignore The operation works on a 1-to-1 row-wise manner Args: - other (Geoseries or geometric object): - The Geoseries (elementwise) or geometric object to find the difference to. + other (GeoSeries or geometric object): + The GeoSeries (elementwise) or geometric object to find the + difference to. Returns: bigframes.geopandas.GeoSeries: @@ -112,7 +113,8 @@ def difference(self: GeoSeries, other: GeoSeries) -> GeoSeries: # type: ignore Raises: NotImplementedError: - GeoSeries.difference is not supported. Use bigframes.bigquery.st_difference(series), instead. + GeoSeries.difference is not supported. Use + bigframes.bigquery.st_difference(series), instead. """ raise NotImplementedError( f"GeoSeries.difference() is not supported. Use bigframes.bigquery.st_difference(series), instead. {constants.FEEDBACK_LINK}" From f1535ba5b5081a7331c817c7b90bdf98c3819dd6 Mon Sep 17 00:00:00 2001 From: Arwa Date: Thu, 20 Mar 2025 15:40:56 -0500 Subject: [PATCH 09/10] modify geo_difference to make it available for use and update tests and notebook --- bigframes/geopandas/geoseries.py | 25 +- notebooks/geo/geoseries.ipynb | 230 +++++++++++------- tests/system/small/bigquery/test_geo.py | 6 +- .../system/small/geopandas/test_geoseries.py | 107 ++++++-- .../bigframes_vendored/geopandas/geoseries.py | 113 +++++++++ 5 files changed, 349 insertions(+), 132 deletions(-) diff --git a/bigframes/geopandas/geoseries.py b/bigframes/geopandas/geoseries.py index 1e296f0392..1b2485158e 100644 --- a/bigframes/geopandas/geoseries.py +++ b/bigframes/geopandas/geoseries.py @@ -95,27 +95,4 @@ def to_wkt(self: GeoSeries) -> bigframes.series.Series: return series def difference(self: GeoSeries, other: GeoSeries) -> GeoSeries: # type: ignore - """ - Returns a GeoSeries of the points in each aligned geometry that are not - in other. - - The operation works on a 1-to-1 row-wise manner - - Args: - other (GeoSeries or geometric object): - The GeoSeries (elementwise) or geometric object to find the - difference to. - - Returns: - bigframes.geopandas.GeoSeries: - A GeoSeries of the points in each aligned geometry that are not - in other. - - Raises: - NotImplementedError: - GeoSeries.difference is not supported. Use - bigframes.bigquery.st_difference(series), instead. - """ - raise NotImplementedError( - f"GeoSeries.difference() is not supported. Use bigframes.bigquery.st_difference(series), instead. {constants.FEEDBACK_LINK}" - ) + return self._apply_binary_op(other, ops.geo_st_difference_op) diff --git a/notebooks/geo/geoseries.ipynb b/notebooks/geo/geoseries.ipynb index 0c1371ee0a..e7566f3fa6 100644 --- a/notebooks/geo/geoseries.ipynb +++ b/notebooks/geo/geoseries.ipynb @@ -106,11 +106,11 @@ { "data": { "text/plain": [ - "163 POINT (-83.11922 35.9162)\n", - "143 POINT (-84.06423 38.51347)\n", - "96 POINT (-96.9712 28.79637)\n", - "310 POINT (-90.45393 37.11107)\n", - "38 POINT (-106.31668 38.73822)\n", + "217 POINT (-86.80185 38.70532)\n", + "16 POINT (-83.47042 30.44723)\n", + "40 POINT (-94.33925 38.25722)\n", + "139 POINT (-78.88532 38.50758)\n", + "400 POINT (-95.6191 41.0337)\n", "Name: int_point_geom, dtype: geometry" ] }, @@ -139,11 +139,11 @@ { "data": { "text/plain": [ - "0 POINT (-83.11922 35.9162)\n", - "1 POINT (-84.06423 38.51347)\n", - "2 POINT (-96.9712 28.79637)\n", - "3 POINT (-90.45393 37.11107)\n", - "4 POINT (-106.31668 38.73822)\n", + "0 POINT (-86.80185 38.70532)\n", + "1 POINT (-83.47042 30.44723)\n", + "2 POINT (-94.33925 38.25722)\n", + "3 POINT (-78.88532 38.50758)\n", + "4 POINT (-95.6191 41.0337)\n", "dtype: geometry" ] }, @@ -188,11 +188,11 @@ { "data": { "text/plain": [ - "0 -83.119224\n", - "1 -84.06423\n", - "2 -96.971198\n", - "3 -90.453931\n", - "4 -106.316683\n", + "0 -86.801847\n", + "1 -83.470416\n", + "2 -94.339246\n", + "3 -78.885321\n", + "4 -95.619101\n", "dtype: Float64" ] }, @@ -220,11 +220,11 @@ { "data": { "text/plain": [ - "0 35.916198\n", - "1 38.513473\n", - "2 28.79637\n", - "3 37.111074\n", - "4 38.738223\n", + "0 38.705322\n", + "1 30.447232\n", + "2 38.257217\n", + "3 38.507585\n", + "4 41.033703\n", "dtype: Float64" ] }, @@ -370,11 +370,11 @@ { "data": { "text/plain": [ - "115 POLYGON ((-86.69516 40.3012, -86.69515 40.3011...\n", - "28 POLYGON ((-94.76099 39.04366, -94.75875 39.043...\n", - "173 POLYGON ((-76.98439 40.51456, -76.98403 40.513...\n", - "52 POLYGON ((-90.87722 35.44364, -90.87858 35.443...\n", - "3 POLYGON ((-102.57685 39.04068, -102.57696 39.0...\n", + "214 POLYGON ((-79.36704 34.96248, -79.36696 34.962...\n", + "161 POLYGON ((-89.08844 33.53252, -89.08843 33.532...\n", + "57 POLYGON ((-110.75069 35.50001, -110.75069 35.4...\n", + "46 POLYGON ((-94.6865 39.04405, -94.68764 39.0440...\n", + "260 POLYGON ((-100.53965 34.99391, -100.53966 34.9...\n", "Name: county_geom, dtype: geometry" ] }, @@ -403,11 +403,11 @@ { "data": { "text/plain": [ - "0 POLYGON ((-86.69516 40.3012, -86.69515 40.3011...\n", - "1 POLYGON ((-94.76099 39.04366, -94.75875 39.043...\n", - "2 POLYGON ((-76.98439 40.51456, -76.98403 40.513...\n", - "3 POLYGON ((-90.87722 35.44364, -90.87858 35.443...\n", - "4 POLYGON ((-102.57685 39.04068, -102.57696 39.0...\n", + "0 POLYGON ((-79.36704 34.96248, -79.36696 34.962...\n", + "1 POLYGON ((-89.08844 33.53252, -89.08843 33.532...\n", + "2 POLYGON ((-110.75069 35.50001, -110.75069 35.4...\n", + "3 POLYGON ((-94.6865 39.04405, -94.68764 39.0440...\n", + "4 POLYGON ((-100.53965 34.99391, -100.53966 34.9...\n", "dtype: geometry" ] }, @@ -484,11 +484,11 @@ { "data": { "text/plain": [ - "0 1048396641.043185\n", - "1 404195271.072038\n", - "2 1443999689.714014\n", - "3 1611776377.004328\n", - "4 4610240226.664809\n", + "0 1014426111.476457\n", + "1 1196896004.730286\n", + "2 25794235993.165642\n", + "3 1242002056.351685\n", + "4 2381217221.963739\n", "dtype: Float64" ] }, @@ -524,11 +524,11 @@ { "data": { "text/plain": [ - "0 POINT (-83.11922 35.9162)\n", - "1 POINT (-84.06423 38.51347)\n", - "2 POINT (-96.9712 28.79637)\n", - "3 POINT (-90.45393 37.11107)\n", - "4 POINT (-106.31668 38.73822)\n", + "0 POINT (-86.80185 38.70532)\n", + "1 POINT (-83.47042 30.44723)\n", + "2 POINT (-94.33925 38.25722)\n", + "3 POINT (-78.88532 38.50758)\n", + "4 POINT (-95.6191 41.0337)\n", "dtype: geometry" ] }, @@ -563,11 +563,11 @@ { "data": { "text/plain": [ - "0 POINT(-83.1192242 35.916198)\n", - "1 POINT(-84.06423 38.5134727)\n", - "2 POINT(-96.9711977 28.7963699)\n", - "3 POINT(-90.4539307 37.1110737)\n", - "4 POINT(-106.3166833 38.7382234)\n", + "0 POINT(-86.8018468 38.705322)\n", + "1 POINT(-83.4704159 30.4472325)\n", + "2 POINT(-94.3392459 38.2572171)\n", + "3 POINT(-78.8853213 38.5075848)\n", + "4 POINT(-95.619101 41.0337028)\n", "dtype: string" ] }, @@ -603,11 +603,11 @@ { "data": { "text/plain": [ - "0 POINT (-83.11922 35.9162)\n", - "1 POINT (-84.06423 38.51347)\n", - "2 POINT (-96.9712 28.79637)\n", - "3 POINT (-90.45393 37.11107)\n", - "4 POINT (-106.31668 38.73822)\n", + "0 POINT (-86.80185 38.70532)\n", + "1 POINT (-83.47042 30.44723)\n", + "2 POINT (-94.33925 38.25722)\n", + "3 POINT (-78.88532 38.50758)\n", + "4 POINT (-95.6191 41.0337)\n", "dtype: geometry" ] }, @@ -692,21 +692,14 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "## Find the difference between two `GeoSeries` " + "## Find the `difference` between two `GeoSeries` " ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "### Note: GeoSeries.difference raises a `NotImplementedError`." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### Reuse `wkts_from_geo` and `geom_obj`" + "#### Reuse `wkts_from_geo` and `geom_obj` to find the difference between the geometry objects" ] }, { @@ -719,16 +712,19 @@ }, "outputs": [ { - "ename": "NotImplementedError", - "evalue": "GeoSeries.difference() is not supported. Use bigframes.bigquery.st_difference(series), instead. Share your usecase with the BigQuery DataFrames team at the https://bit.ly/bigframes-feedback survey. You are currently running BigFrames version 1.40.0.", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mNotImplementedError\u001b[0m Traceback (most recent call last)", - "Cell \u001b[0;32mIn[21], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[43mwkts_from_geo\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mdifference\u001b[49m\u001b[43m(\u001b[49m\u001b[43mgeom_obj\u001b[49m\u001b[43m)\u001b[49m\n", - "File \u001b[0;32m~/src1/python-bigquery-dataframes/bigframes/geopandas/geoseries.py:117\u001b[0m, in \u001b[0;36mGeoSeries.difference\u001b[0;34m(self, other)\u001b[0m\n\u001b[1;32m 97\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;21mdifference\u001b[39m(\u001b[38;5;28mself\u001b[39m: GeoSeries, other: GeoSeries) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m GeoSeries: \u001b[38;5;66;03m# type: ignore\u001b[39;00m\n\u001b[1;32m 98\u001b[0m \u001b[38;5;250m \u001b[39m\u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m 99\u001b[0m \u001b[38;5;124;03m Returns a GeoSeries of the points in each aligned geometry that are not\u001b[39;00m\n\u001b[1;32m 100\u001b[0m \u001b[38;5;124;03m in other.\u001b[39;00m\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 115\u001b[0m \u001b[38;5;124;03m GeoSeries.difference is not supported. Use bigframes.bigquery.st_difference(series), instead.\u001b[39;00m\n\u001b[1;32m 116\u001b[0m \u001b[38;5;124;03m \"\"\"\u001b[39;00m\n\u001b[0;32m--> 117\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mNotImplementedError\u001b[39;00m(\n\u001b[1;32m 118\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mGeoSeries.difference() is not supported. Use bigframes.bigquery.st_difference(series), instead. \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mconstants\u001b[38;5;241m.\u001b[39mFEEDBACK_LINK\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 119\u001b[0m )\n", - "\u001b[0;31mNotImplementedError\u001b[0m: GeoSeries.difference() is not supported. Use bigframes.bigquery.st_difference(series), instead. Share your usecase with the BigQuery DataFrames team at the https://bit.ly/bigframes-feedback survey. You are currently running BigFrames version 1.40.0." - ] + "data": { + "text/plain": [ + "0 POINT (-86.80185 38.70532)\n", + "1 POINT (-83.47042 30.44723)\n", + "2 GEOMETRYCOLLECTION EMPTY\n", + "3 POINT (-78.88532 38.50758)\n", + "4 POINT (-95.6191 41.0337)\n", + "dtype: geometry" + ] + }, + "execution_count": 21, + "metadata": {}, + "output_type": "execute_result" } ], "source": [ @@ -739,7 +735,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Use `BigQuery.st_difference()` to find the difference between two GeSeries. See, https://cloud.google.com/bigquery/docs/reference/standard-sql/geography_functions#st_difference" + "### Find the difference between a `GeoSeries` and a single geometry shape." ] }, { @@ -750,11 +746,11 @@ { "data": { "text/plain": [ - "0 POINT (-83.11922 35.9162)\n", - "1 POINT (-84.06423 38.51347)\n", - "2 GEOMETRYCOLLECTION EMPTY\n", - "3 POINT (-90.45393 37.11107)\n", - "4 POINT (-106.31668 38.73822)\n", + "0 POINT (-86.80185 38.70532)\n", + "1 None\n", + "2 None\n", + "3 None\n", + "4 None\n", "dtype: geometry" ] }, @@ -764,14 +760,14 @@ } ], "source": [ - "bbq.st_difference(wkts_from_geo, geom_obj)" + "wkts_from_geo.difference([Polygon([(0, 0), (10, 0), (10, 10), (0, 0)])])" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "### Find the difference between a `GeoSeries` and a single geometry shape." + "### Find the difference in `GeoSeries` with the same shapes" ] }, { @@ -782,11 +778,11 @@ { "data": { "text/plain": [ - "0 POINT (-83.11922 35.9162)\n", - "1 None\n", - "2 None\n", - "3 None\n", - "4 None\n", + "0 GEOMETRYCOLLECTION EMPTY\n", + "1 GEOMETRYCOLLECTION EMPTY\n", + "2 GEOMETRYCOLLECTION EMPTY\n", + "3 GEOMETRYCOLLECTION EMPTY\n", + "4 GEOMETRYCOLLECTION EMPTY\n", "dtype: geometry" ] }, @@ -796,20 +792,84 @@ } ], "source": [ - "bbq.st_difference(wkts_from_geo, [Polygon([(0, 0), (10, 0), (10, 10), (0, 0)])])" + "geom_obj.difference(geom_obj)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "### Find the difference in GeoSeries with the same parts" + "## You can also use`BigQuery.st_difference()` to find the difference between two `GeoSeries`. See, https://cloud.google.com/bigquery/docs/reference/standard-sql/geography_functions#st_difference" ] }, { "cell_type": "code", "execution_count": 24, "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0 POINT (-86.80185 38.70532)\n", + "1 POINT (-83.47042 30.44723)\n", + "2 GEOMETRYCOLLECTION EMPTY\n", + "3 POINT (-78.88532 38.50758)\n", + "4 POINT (-95.6191 41.0337)\n", + "dtype: geometry" + ] + }, + "execution_count": 24, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "bbq.st_difference(wkts_from_geo, geom_obj)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Find the difference between a `GeoSeries` and a single geometry shape." + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0 POINT (-86.80185 38.70532)\n", + "1 None\n", + "2 None\n", + "3 None\n", + "4 None\n", + "dtype: geometry" + ] + }, + "execution_count": 25, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "bbq.st_difference(wkts_from_geo, [Polygon([(0, 0), (10, 0), (10, 10), (0, 0)])])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Find the difference in GeoSeries with the same shapes" + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "metadata": {}, "outputs": [ { "data": { @@ -822,7 +882,7 @@ "dtype: geometry" ] }, - "execution_count": 24, + "execution_count": 26, "metadata": {}, "output_type": "execute_result" } diff --git a/tests/system/small/bigquery/test_geo.py b/tests/system/small/bigquery/test_geo.py index 657a2991c4..538099e80a 100644 --- a/tests/system/small/bigquery/test_geo.py +++ b/tests/system/small/bigquery/test_geo.py @@ -58,11 +58,7 @@ def test_geo_st_area(): ) -# The tests for st_difference do not check against geopandas result because -# geopandas BigQuery's geography functions, like st_difference, interpret the -# geometry data as a point set on the Earth's surface while geopandas interprets -# the geometry data as a point set on planer surface -def test_geo_st_difference(): +def test_geo_st_difference_with_geometry_objects(): data1 = [ Polygon([(0, 0), (10, 0), (10, 10), (0, 0)]), Polygon([(0, 0), (1, 1), (0, 1), (0, 0)]), diff --git a/tests/system/small/geopandas/test_geoseries.py b/tests/system/small/geopandas/test_geoseries.py index 181e0b58cf..fdd9826468 100644 --- a/tests/system/small/geopandas/test_geoseries.py +++ b/tests/system/small/geopandas/test_geoseries.py @@ -20,7 +20,12 @@ import google.api_core.exceptions import pandas as pd import pytest -from shapely.geometry import LineString, Point, Polygon # type: ignore +from shapely.geometry import ( # type: ignore + GeometryCollection, + LineString, + Point, + Polygon, +) import bigframes.geopandas import bigframes.series @@ -196,25 +201,91 @@ def test_geo_boundary(): ) -def test_geo_difference_not_supported(): - s1 = bigframes.geopandas.GeoSeries( +# the GeoSeries and GeoPandas results are not always the same. +# For example, when the difference between two polygons is empty, +# GeoPandas returns 'POLYGON EMPTY' while GeoSeries returns 'GeometryCollection([])'. +# This is why we are hard-coding the expected results. +def test_geo_difference_with_geometry_objects(): + data1 = [ + Polygon([(0, 0), (10, 0), (10, 10), (0, 0)]), + Polygon([(0, 0), (1, 1), (0, 1), (0, 0)]), + Point(0, 1), + ] + + data2 = [ + Polygon([(0, 0), (10, 0), (10, 10), (0, 0)]), + Polygon([(0, 0), (1, 1), (0, 1), (0, 0)]), + LineString([(2, 0), (0, 2)]), + ] + + bf_s1 = bigframes.geopandas.GeoSeries(data=data1) + bf_s2 = bigframes.geopandas.GeoSeries(data=data2) + + bf_result = bf_s1.difference(bf_s2).to_pandas() + + expected = bigframes.geopandas.GeoSeries( [ - Polygon([(0.000, 0.0), (0.001, 0.001), (0.000, 0.001)]), - Polygon([(0.0010, 0.004), (0.009, 0.005), (0.0010, 0.005)]), - Polygon([(0.001, 0.001), (0.002, 0.001), (0.002, 0.002)]), - LineString([(0, 0), (1, 1), (0, 1)]), + Polygon([]), + Polygon([]), Point(0, 1), - ] - ) + ], + index=[0, 1, 2], + ).to_pandas() - s2 = bigframes.geopandas.GeoSeries([Polygon([(0, 0), (10, 0), (10, 10), (0, 0)])]) + assert bf_result.dtype == "geometry" + assert expected.iloc[0].equals(bf_result.iloc[0]) + assert expected.iloc[1].equals(bf_result.iloc[1]) + assert expected.iloc[2].equals(bf_result.iloc[2]) - bf_series1: bigframes.geopandas.GeoSeries = s1.geo - bf_series2: bigframes.geopandas.GeoSeries = s2.geo - with pytest.raises( - NotImplementedError, - match=re.escape( - f"GeoSeries.difference() is not supported. Use bigframes.bigquery.st_difference(series), instead. {constants.FEEDBACK_LINK}" + +def test_geo_difference_with_single_geometry_object(): + data1 = [ + Polygon([(0, 0), (10, 0), (10, 10), (0, 0)]), + Polygon([(4, 2), (6, 2), (8, 6), (4, 2)]), + Point(0, 1), + ] + + bf_s1 = bigframes.geopandas.GeoSeries(data=data1) + bf_result = bf_s1.difference( + bigframes.geopandas.GeoSeries( + [ + Polygon([(0, 0), (10, 0), (10, 10), (0, 0)]), + Polygon([(1, 0), (0, 5), (0, 0), (1, 0)]), + ] ), - ): - bf_series1.difference(bf_series2) + ).to_pandas() + + expected = bigframes.geopandas.GeoSeries( + [ + GeometryCollection([]), + Polygon([(4, 2), (6, 2), (8, 6), (4, 2)]), + None, + ], + index=[0, 1, 2], + ).to_pandas() + + assert bf_result.dtype == "geometry" + assert (expected.iloc[0]).equals(bf_result.iloc[0]) + assert expected.iloc[1] == bf_result.iloc[1] + assert expected.iloc[2] == bf_result.iloc[2] + + +def test_geo_difference_with_similar_geometry_objects(): + data1 = [ + Polygon([(0, 0), (10, 0), (10, 10), (0, 0)]), + Polygon([(0, 0), (1, 1), (0, 1)]), + Point(0, 1), + ] + + bf_s1 = bigframes.geopandas.GeoSeries(data=data1) + bf_result = bf_s1.difference(bf_s1).to_pandas() + + expected = bigframes.geopandas.GeoSeries( + [GeometryCollection([]), GeometryCollection([]), GeometryCollection([])], + index=[0, 1, 2], + ).to_pandas() + + assert bf_result.dtype == "geometry" + assert expected.iloc[0].equals(bf_result.iloc[0]) + assert expected.iloc[1].equals(bf_result.iloc[1]) + assert expected.iloc[2].equals(bf_result.iloc[2]) diff --git a/third_party/bigframes_vendored/geopandas/geoseries.py b/third_party/bigframes_vendored/geopandas/geoseries.py index a2e7b74059..b00d4220ff 100644 --- a/third_party/bigframes_vendored/geopandas/geoseries.py +++ b/third_party/bigframes_vendored/geopandas/geoseries.py @@ -239,3 +239,116 @@ def to_wkt(self) -> bigframes.series.Series: WKT representations of the geometries. """ raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE) + + def difference(self: GeoSeries, other: GeoSeries) -> GeoSeries: # type: ignore + """ + Returns a GeoSeries of the points in each aligned geometry that are not + in other. + + The operation works on a 1-to-1 row-wise manner + + **Examples:** + + >>> import bigframes as bpd + >>> import bigframes.geopandas + >>> from shapely.geometry import Polygon, LineString, Point + >>> bpd.options.display.progress_bar = None + + We can check two GeoSeries against each other, row by row. + + >>> s1 = bigframes.geopandas.GeoSeries( + ... [ + ... Polygon([(0, 0), (2, 2), (0, 2)]), + ... Polygon([(0, 0), (2, 2), (0, 2)]), + ... LineString([(0, 0), (2, 2)]), + ... LineString([(2, 0), (0, 2)]), + ... Point(0, 1), + ... ], + ... ) + >>> s2 = bigframes.geopandas.GeoSeries( + ... [ + ... Polygon([(0, 0), (1, 1), (0, 1)]), + ... LineString([(1, 0), (1, 3)]), + ... LineString([(2, 0), (0, 2)]), + ... Point(1, 1), + ... Point(0, 1), + ... ], + ... index=range(1, 6), + ... ) + + >>> s1 + 0 POLYGON ((0 0, 2 2, 0 2, 0 0)) + 1 POLYGON ((0 0, 2 2, 0 2, 0 0)) + 2 LINESTRING (0 0, 2 2) + 3 LINESTRING (2 0, 0 2) + 4 POINT (0 1) + dtype: geometry + + >>> s2 + 1 POLYGON ((0 0, 1 1, 0 1, 0 0)) + 2 LINESTRING (1 0, 1 3) + 3 LINESTRING (2 0, 0 2) + 4 POINT (1 1) + 5 POINT (0 1) + dtype: geometry + + >>> s1.difference(s2) + 0 None + 1 POLYGON ((0.99954 1, 2 2, 0 2, 0 1, 0.99954 1)) + 2 LINESTRING (0 0, 1 1.00046, 2 2) + 3 GEOMETRYCOLLECTION EMPTY + 4 POINT (0 1) + 5 None + dtype: geometry + + We can also check difference of single shapely geometries: + + >>> sbq1 = bigframes.geopandas.GeoSeries( + ... [ + ... Polygon([(0, 0), (10, 0), (10, 10), (0, 0)]) + ... ] + ... ) + >>> sbq2 = bigframes.geopandas.GeoSeries( + ... [ + ... Polygon([(4, 2), (6, 2), (8, 6), (4, 2)]) + ... ] + ... ) + + >>> sbq1 + 0 POLYGON ((0 0, 10 0, 10 10, 0 0)) + dtype: geometry + + >>> sbq2 + 0 POLYGON ((4 2, 6 2, 8 6, 4 2)) + dtype: geometry + + >>> sbq1.difference(sbq2) + 0 POLYGON ((0 0, 10 0, 10 10, 0 0), (8 6, 6 2, 4... + dtype: geometry + + Additionally, we can check difference of a GeoSeries against a single shapely geometry: + + >>> s1.difference(sbq2) + 0 POLYGON ((0 0, 2 2, 0 2, 0 0)) + 1 None + 2 None + 3 None + 4 None + dtype: geometry + + Args: + other (GeoSeries or geometric object): + The GeoSeries (elementwise) or geometric object to find the + difference to. + + Returns: + bigframes.geopandas.GeoSeries: + A GeoSeries of the points in each aligned geometry that are not + in other. + + Raises: + NotImplementedError: + GeoSeries.difference is not supported. Use + bigframes.bigquery.st_difference(series), instead. + """ + raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE) From a08f6e7d7631fadf94afe77c1b77acfd82629a3c Mon Sep 17 00:00:00 2001 From: Arwa Date: Thu, 20 Mar 2025 15:47:24 -0500 Subject: [PATCH 10/10] fix my py type error --- bigframes/geopandas/geoseries.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bigframes/geopandas/geoseries.py b/bigframes/geopandas/geoseries.py index 1b2485158e..6c9cb77a08 100644 --- a/bigframes/geopandas/geoseries.py +++ b/bigframes/geopandas/geoseries.py @@ -94,5 +94,5 @@ def to_wkt(self: GeoSeries) -> bigframes.series.Series: series.name = None return series - def difference(self: GeoSeries, other: GeoSeries) -> GeoSeries: # type: ignore + def difference(self: GeoSeries, other: GeoSeries) -> bigframes.series.Series: # type: ignore return self._apply_binary_op(other, ops.geo_st_difference_op)