@@ -62,7 +62,7 @@ def setup(self, data_size, count_columns=1):
62
62
self .df = generate_dataframe (
63
63
ASV_USE_IMPL , "int" , data_size [1 ], data_size [0 ], RAND_LOW , RAND_HIGH
64
64
)
65
- self .groupby_columns = [ col for col in self .df .columns [:count_columns ]]
65
+ self .groupby_columns = self .df .columns [:count_columns ]. tolist ()
66
66
67
67
68
68
class TimeMultiColumnGroupby (BaseTimeGroupBy ):
@@ -96,34 +96,23 @@ def time_groupby_mean(self, data_size):
96
96
97
97
98
98
class TimeGroupByDictionaryAggregation (BaseTimeGroupBy ):
99
- param_names = ["data_size" ]
100
- params = [
101
- UNARY_OP_DATA_SIZE ,
102
- ]
103
- reduction_operations = [ "sum " , "count " , "prod" ]
104
- agg_operations = [ "quantile" , "std" , "median" ]
105
-
106
- def setup (self , data_size ):
99
+ param_names = ["data_size" , "operation_type" ]
100
+ params = [UNARY_OP_DATA_SIZE , [ "reduction" , "aggregation" ]]
101
+ operations = {
102
+ "reduction" : [ "sum" , "count" , "prod" ],
103
+ "aggregation" : [ "quantile " , "std " , "median" ],
104
+ }
105
+
106
+ def setup (self , data_size , operation_type ):
107
107
super ().setup (data_size )
108
108
self .cols_to_agg = self .df .columns [1 :4 ]
109
+ operations = self .operations [operation_type ]
110
+ self .agg_dict = {
111
+ c : operations [i % len (operations )] for i , c in enumerate (self .cols_to_agg )
112
+ }
109
113
110
- @trigger_execution
111
- def time_groupby_dictionary_reduction (self , data_size ):
112
- return self .df .groupby (by = self .groupby_columns ).agg (
113
- {
114
- c : self .reduction_operations [i % len (self .reduction_operations )]
115
- for i , c in enumerate (self .cols_to_agg )
116
- }
117
- )
118
-
119
- @trigger_execution
120
- def time_groupby_dictionary_aggregation (self , data_size ):
121
- return self .df .groupby (by = self .groupby_columns ).agg (
122
- {
123
- c : self .agg_operations [i % len (self .agg_operations )]
124
- for i , c in enumerate (self .cols_to_agg )
125
- }
126
- )
114
+ def time_groupby_dict_agg (self , data_size , operation_type ):
115
+ execute (self .df .groupby (by = self .groupby_columns ).agg (self .agg_dict ))
127
116
128
117
129
118
class TimeJoin :
0 commit comments