@@ -112,6 +112,7 @@ class MaterializationOptions:
112
112
downsampling : sampling_options .SamplingOptions = dataclasses .field (
113
113
default_factory = sampling_options .SamplingOptions
114
114
)
115
+ allow_large_results : Optional [bool ] = None
115
116
ordered : bool = True
116
117
117
118
@@ -479,9 +480,12 @@ def to_arrow(
479
480
self ,
480
481
* ,
481
482
ordered : bool = True ,
483
+ allow_large_results : Optional [bool ] = None ,
482
484
) -> Tuple [pa .Table , bigquery .QueryJob ]:
483
485
"""Run query and download results as a pyarrow Table."""
484
- execute_result = self .session ._executor .execute (self .expr , ordered = ordered )
486
+ execute_result = self .session ._executor .execute (
487
+ self .expr , ordered = ordered , use_explicit_destination = allow_large_results
488
+ )
485
489
pa_table = execute_result .to_arrow_table ()
486
490
487
491
pa_index_labels = []
@@ -503,6 +507,7 @@ def to_pandas(
503
507
random_state : Optional [int ] = None ,
504
508
* ,
505
509
ordered : bool = True ,
510
+ allow_large_results : Optional [bool ] = None ,
506
511
) -> Tuple [pd .DataFrame , Optional [bigquery .QueryJob ]]:
507
512
"""Run query and download results as a pandas DataFrame.
508
513
@@ -545,7 +550,9 @@ def to_pandas(
545
550
546
551
df , query_job = self ._materialize_local (
547
552
materialize_options = MaterializationOptions (
548
- downsampling = sampling , ordered = ordered
553
+ downsampling = sampling ,
554
+ allow_large_results = allow_large_results ,
555
+ ordered = ordered ,
549
556
)
550
557
)
551
558
df .set_axis (self .column_labels , axis = 1 , copy = False )
@@ -563,7 +570,10 @@ def try_peek(
563
570
return None
564
571
565
572
def to_pandas_batches (
566
- self , page_size : Optional [int ] = None , max_results : Optional [int ] = None
573
+ self ,
574
+ page_size : Optional [int ] = None ,
575
+ max_results : Optional [int ] = None ,
576
+ allow_large_results : Optional [bool ] = None ,
567
577
):
568
578
"""Download results one message at a time.
569
579
@@ -572,7 +582,7 @@ def to_pandas_batches(
572
582
execute_result = self .session ._executor .execute (
573
583
self .expr ,
574
584
ordered = True ,
575
- use_explicit_destination = True ,
585
+ use_explicit_destination = allow_large_results ,
576
586
page_size = page_size ,
577
587
max_results = max_results ,
578
588
)
@@ -601,7 +611,10 @@ def _materialize_local(
601
611
"""Run query and download results as a pandas DataFrame. Return the total number of results as well."""
602
612
# TODO(swast): Allow for dry run and timeout.
603
613
execute_result = self .session ._executor .execute (
604
- self .expr , ordered = materialize_options .ordered , get_size_bytes = True
614
+ self .expr ,
615
+ ordered = materialize_options .ordered ,
616
+ use_explicit_destination = materialize_options .allow_large_results ,
617
+ get_size_bytes = True ,
605
618
)
606
619
assert execute_result .total_bytes is not None
607
620
table_mb = execute_result .total_bytes / _BYTES_TO_MEGABYTES
@@ -1698,7 +1711,7 @@ def transpose(
1698
1711
original_row_index = (
1699
1712
original_row_index
1700
1713
if original_row_index is not None
1701
- else self .index .to_pandas (ordered = True )
1714
+ else self .index .to_pandas (ordered = True )[ 0 ]
1702
1715
)
1703
1716
original_row_count = len (original_row_index )
1704
1717
if original_row_count > bigframes .constants .MAX_COLUMNS :
@@ -2657,14 +2670,22 @@ def column_ids(self) -> Sequence[str]:
2657
2670
def is_null (self ) -> bool :
2658
2671
return len (self ._block ._index_columns ) == 0
2659
2672
2660
- def to_pandas (self , * , ordered : Optional [bool ] = None ) -> pd .Index :
2673
+ def to_pandas (
2674
+ self ,
2675
+ * ,
2676
+ ordered : Optional [bool ] = None ,
2677
+ allow_large_results : Optional [bool ] = None ,
2678
+ ) -> Tuple [pd .Index , Optional [bigquery .QueryJob ]]:
2661
2679
"""Executes deferred operations and downloads the results."""
2662
2680
if len (self .column_ids ) == 0 :
2663
2681
raise bigframes .exceptions .NullIndexError (
2664
2682
"Cannot materialize index, as this object does not have an index. Set index column(s) using set_index."
2665
2683
)
2666
2684
ordered = ordered if ordered is not None else True
2667
- return self ._block .select_columns ([]).to_pandas (ordered = ordered )[0 ].index
2685
+ df , query_job = self ._block .select_columns ([]).to_pandas (
2686
+ ordered = ordered , allow_large_results = allow_large_results
2687
+ )
2688
+ return df .index , query_job
2668
2689
2669
2690
def resolve_level (self , level : LevelsType ) -> typing .Sequence [str ]:
2670
2691
if utils .is_list_like (level ):
0 commit comments