Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion bigframes/bigquery/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@

import sys

from bigframes.bigquery import ai, ml
from bigframes.bigquery import ai, ml, obj
from bigframes.bigquery._operations.approx_agg import approx_top_count
from bigframes.bigquery._operations.array import (
array_agg,
Expand Down Expand Up @@ -158,4 +158,5 @@
# Modules / SQL namespaces
"ai",
"ml",
"obj",
]
107 changes: 107 additions & 0 deletions bigframes/bigquery/_operations/obj.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,107 @@
# Copyright 2025 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.


"""
ObjectRef functions defined from
https://cloud.google.com/bigquery/docs/reference/standard-sql/object-ref-functions
"""


from __future__ import annotations

import datetime
from typing import Optional, Union

import numpy as np
import pandas as pd

from bigframes.core import log_adapter
import bigframes.core.utils as utils
import bigframes.operations as ops
import bigframes.series as series


@log_adapter.method_logger(custom_base_name="bigquery_obj")
def fetch_metadata(
objectref: series.Series,
) -> series.Series:
"""The OBJ.FETCH_METADATA function returns Cloud Storage metadata for a partially populated ObjectRef value.

Args:
objectref (bigframes.series.Series):
A partially populated ObjectRef value, in which the uri and authorizer fields are populated and the details field isn't.

Returns:
bigframes.series.Series: A fully populated ObjectRef value. The metadata is provided in the details field of the returned ObjectRef value.
"""
return objectref._apply_unary_op(ops.obj_fetch_metadata_op)


@log_adapter.method_logger(custom_base_name="bigquery_obj")
def get_access_url(
objectref: series.Series,
mode: str,
duration: Optional[
Union[datetime.timedelta, pd.Timedelta, np.timedelta64]
] = None,
) -> series.Series:
"""The OBJ.GET_ACCESS_URL function returns JSON that contains reference information for the input ObjectRef value, and also access URLs that you can use to read or modify the Cloud Storage object.

Args:
objectref (bigframes.series.Series):
An ObjectRef value that represents a Cloud Storage object.
mode (str):
A STRING value that identifies the type of URL that you want to be returned. The following values are supported:
'r': Returns a URL that lets you read the object.
'rw': Returns two URLs, one that lets you read the object, and one that lets you modify the object.
duration (Union[datetime.timedelta, pandas.Timedelta, numpy.timedelta64], optional):
An optional INTERVAL value that specifies how long the generated access URLs remain valid. You can specify a value between 30 minutes and 6 hours. For example, you could specify INTERVAL 2 HOUR to generate URLs that expire after 2 hours. The default value is 6 hours.

Returns:
bigframes.series.Series: A JSON value that contains the Cloud Storage object reference information from the input ObjectRef value, and also one or more URLs that you can use to access the Cloud Storage object.
"""
duration_micros = None
if duration is not None:
duration_micros = utils.timedelta_to_micros(duration)

return objectref._apply_unary_op(
ops.ObjGetAccessUrl(mode=mode, duration=duration_micros)
)


@log_adapter.method_logger(custom_base_name="bigquery_obj")
def make_ref(
uri_or_json: series.Series,
authorizer: Optional[series.Series] = None,
) -> series.Series:
"""Use the OBJ.MAKE_REF function to create an ObjectRef value that contains reference information for a Cloud Storage object.

Args:
uri_or_json (bigframes.series.Series):
A STRING value that contains the URI for the Cloud Storage object, for example, gs://mybucket/flowers/12345.jpg.
OR
A JSON value that represents a Cloud Storage object.
authorizer (bigframes.series.Series, optional):
A STRING value that contains the Cloud Resource connection used to access the Cloud Storage object.
Required if uri_or_json is a URI string.

Returns:
bigframes.series.Series: An ObjectRef value.
"""
if authorizer is not None:
return uri_or_json._apply_binary_op(authorizer, ops.obj_make_ref_op)

# If authorizer is not provided, we assume uri_or_json is a JSON objectref
return uri_or_json._apply_unary_op(ops.obj_make_ref_json_op)
29 changes: 29 additions & 0 deletions bigframes/bigquery/obj.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
# Copyright 2025 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""This module exposes `BigQuery ObjectRef
<https://cloud.google.com/bigquery/docs/object-table-object-ref-functions>`_ functions.
"""

from bigframes.bigquery._operations.obj import (
fetch_metadata,
get_access_url,
make_ref,
)

__all__ = [
"fetch_metadata",
"get_access_url",
"make_ref",
]
22 changes: 22 additions & 0 deletions bigframes/core/compile/ibis_compiler/scalar_op_registry.py
Original file line number Diff line number Diff line change
Expand Up @@ -1247,6 +1247,11 @@ def obj_fetch_metadata_op_impl(obj_ref: ibis_types.Value):

@scalar_op_compiler.register_unary_op(ops.ObjGetAccessUrl, pass_op=True)
def obj_get_access_url_op_impl(obj_ref: ibis_types.Value, op: ops.ObjGetAccessUrl):
if op.duration is not None:
duration_value = ibis_types.literal(op.duration).to_interval("us")
return obj_get_access_url_with_duration(
obj_ref=obj_ref, mode=op.mode, duration=duration_value
)
return obj_get_access_url(obj_ref=obj_ref, mode=op.mode)


Expand Down Expand Up @@ -1807,6 +1812,11 @@ def obj_make_ref_op(x: ibis_types.Value, y: ibis_types.Value):
return obj_make_ref(uri=x, authorizer=y)


@scalar_op_compiler.register_unary_op(ops.obj_make_ref_json_op)
def obj_make_ref_json_op(x: ibis_types.Value):
return obj_make_ref_json(objectref_json=x)


# Ternary Operations
@scalar_op_compiler.register_ternary_op(ops.where_op)
def where_op(
Expand Down Expand Up @@ -2141,11 +2151,23 @@ def obj_make_ref(uri: str, authorizer: str) -> _OBJ_REF_IBIS_DTYPE: # type: ign
"""Make ObjectRef Struct from uri and connection."""


@ibis_udf.scalar.builtin(name="OBJ.MAKE_REF")
def obj_make_ref_json(objectref_json: ibis_dtypes.JSON) -> _OBJ_REF_IBIS_DTYPE: # type: ignore
"""Make ObjectRef Struct from json."""


@ibis_udf.scalar.builtin(name="OBJ.GET_ACCESS_URL")
def obj_get_access_url(obj_ref: _OBJ_REF_IBIS_DTYPE, mode: ibis_dtypes.String) -> ibis_dtypes.JSON: # type: ignore
"""Get access url (as ObjectRefRumtime JSON) from ObjectRef."""


@ibis_udf.scalar.builtin(name="OBJ.GET_ACCESS_URL")
def obj_get_access_url_with_duration(
obj_ref: _OBJ_REF_IBIS_DTYPE, mode: ibis_dtypes.String, duration: ibis_dtypes.Interval(unit="us") # type: ignore
) -> ibis_dtypes.JSON: # type: ignore
"""Get access url (as ObjectRefRumtime JSON) from ObjectRef."""


@ibis_udf.scalar.builtin(name="ltrim")
def str_lstrip_op( # type: ignore[empty-body]
x: ibis_dtypes.String, to_strip: ibis_dtypes.String
Expand Down
19 changes: 16 additions & 3 deletions bigframes/core/compile/sqlglot/expressions/blob_ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,11 +29,24 @@ def _(expr: TypedExpr) -> sge.Expression:
return sge.func("OBJ.FETCH_METADATA", expr.expr)


@register_unary_op(ops.ObjGetAccessUrl)
def _(expr: TypedExpr) -> sge.Expression:
return sge.func("OBJ.GET_ACCESS_URL", expr.expr)
@register_unary_op(ops.ObjGetAccessUrl, pass_op=True)
def _(expr: TypedExpr, op: ops.ObjGetAccessUrl) -> sge.Expression:
args = [expr.expr, sge.Literal.string(op.mode)]
if op.duration is not None:
args.append(
sge.Interval(
this=sge.Literal.number(op.duration),
unit=sge.Var(this="MICROSECOND"),
)
)
return sge.func("OBJ.GET_ACCESS_URL", *args)


@register_binary_op(ops.obj_make_ref_op)
def _(left: TypedExpr, right: TypedExpr) -> sge.Expression:
return sge.func("OBJ.MAKE_REF", left.expr, right.expr)


@register_unary_op(ops.obj_make_ref_json_op)
def _(expr: TypedExpr) -> sge.Expression:
return sge.func("OBJ.MAKE_REF", expr.expr)
2 changes: 2 additions & 0 deletions bigframes/operations/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@
)
from bigframes.operations.blob_ops import (
obj_fetch_metadata_op,
obj_make_ref_json_op,
obj_make_ref_op,
ObjGetAccessUrl,
)
Expand Down Expand Up @@ -365,6 +366,7 @@
"ArrayToStringOp",
# Blob ops
"ObjGetAccessUrl",
"obj_make_ref_json_op",
"obj_make_ref_op",
"obj_fetch_metadata_op",
# Struct ops
Expand Down
12 changes: 12 additions & 0 deletions bigframes/operations/blob_ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
class ObjGetAccessUrl(base_ops.UnaryOp):
name: typing.ClassVar[str] = "obj_get_access_url"
mode: str # access mode, e.g. R read, W write, RW read & write
duration: typing.Optional[int] = None # duration in microseconds

def output_type(self, *input_types):
return dtypes.JSON_DTYPE
Expand All @@ -46,3 +47,14 @@ def output_type(self, *input_types):


obj_make_ref_op = ObjMakeRef()


@dataclasses.dataclass(frozen=True)
class ObjMakeRefJson(base_ops.UnaryOp):
name: typing.ClassVar[str] = "obj_make_ref_json"

def output_type(self, *input_types):
return dtypes.OBJ_REF_DTYPE


obj_make_ref_json_op = ObjMakeRefJson()
100 changes: 100 additions & 0 deletions tests/unit/bigquery/test_obj.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,100 @@
# Copyright 2025 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import datetime
from unittest.mock import MagicMock

import pytest

import bigframes.bigquery.obj as obj
import bigframes.operations as ops
import bigframes.series as series


def test_fetch_metadata_op_structure():
op = ops.obj_fetch_metadata_op
assert op.name == "obj_fetch_metadata"

def test_get_access_url_op_structure():
op = ops.ObjGetAccessUrl(mode="r")
assert op.name == "obj_get_access_url"
assert op.mode == "r"
assert op.duration is None

def test_get_access_url_with_duration_op_structure():
op = ops.ObjGetAccessUrl(mode="rw", duration=3600000000)
assert op.name == "obj_get_access_url"
assert op.mode == "rw"
assert op.duration == 3600000000

def test_make_ref_op_structure():
op = ops.obj_make_ref_op
assert op.name == "obj_make_ref"

def test_make_ref_json_op_structure():
op = ops.obj_make_ref_json_op
assert op.name == "obj_make_ref_json"

def test_fetch_metadata_calls_apply_unary_op():
s = MagicMock(spec=series.Series)

obj.fetch_metadata(s)

s._apply_unary_op.assert_called_once()
args, _ = s._apply_unary_op.call_args
assert args[0] == ops.obj_fetch_metadata_op

def test_get_access_url_calls_apply_unary_op_without_duration():
s = MagicMock(spec=series.Series)

obj.get_access_url(s, mode="r")

s._apply_unary_op.assert_called_once()
args, _ = s._apply_unary_op.call_args
assert isinstance(args[0], ops.ObjGetAccessUrl)
assert args[0].mode == "r"
assert args[0].duration is None

def test_get_access_url_calls_apply_unary_op_with_duration():
s = MagicMock(spec=series.Series)
duration = datetime.timedelta(hours=1)

obj.get_access_url(s, mode="rw", duration=duration)

s._apply_unary_op.assert_called_once()
args, kwargs = s._apply_unary_op.call_args
assert isinstance(args[0], ops.ObjGetAccessUrl)
assert args[0].mode == "rw"
# 1 hour = 3600 seconds = 3600 * 1000 * 1000 microseconds
assert args[0].duration == 3600000000

def test_make_ref_calls_apply_binary_op_with_authorizer():
uri = MagicMock(spec=series.Series)
auth = MagicMock(spec=series.Series)

obj.make_ref(uri, authorizer=auth)

uri._apply_binary_op.assert_called_once()
args, _ = uri._apply_binary_op.call_args
assert args[0] == auth
assert args[1] == ops.obj_make_ref_op

def test_make_ref_calls_apply_unary_op_without_authorizer():
json_val = MagicMock(spec=series.Series)

obj.make_ref(json_val)

json_val._apply_unary_op.assert_called_once()
args, _ = json_val._apply_unary_op.call_args
assert args[0] == ops.obj_make_ref_json_op
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ WITH `bfcte_0` AS (
), `bfcte_2` AS (
SELECT
*,
OBJ.GET_ACCESS_URL(`bfcol_4`) AS `bfcol_7`
OBJ.GET_ACCESS_URL(`bfcol_4`, 'R') AS `bfcol_7`
FROM `bfcte_1`
), `bfcte_3` AS (
SELECT
Expand Down
Loading