Skip to content

Commit 2262dc4

Browse files
committed
FEAT-#2491: addressing comments
Signed-off-by: Dmitry Chigarev <[email protected]>
1 parent 349fe85 commit 2262dc4

File tree

1 file changed

+15
-26
lines changed

1 file changed

+15
-26
lines changed

asv_bench/benchmarks/benchmarks.py

Lines changed: 15 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -62,7 +62,7 @@ def setup(self, data_size, count_columns=1):
6262
self.df = generate_dataframe(
6363
ASV_USE_IMPL, "int", data_size[1], data_size[0], RAND_LOW, RAND_HIGH
6464
)
65-
self.groupby_columns = [col for col in self.df.columns[:count_columns]]
65+
self.groupby_columns = self.df.columns[:count_columns].tolist()
6666

6767

6868
class TimeMultiColumnGroupby(BaseTimeGroupBy):
@@ -96,34 +96,23 @@ def time_groupby_mean(self, data_size):
9696

9797

9898
class TimeGroupByDictionaryAggregation(BaseTimeGroupBy):
99-
param_names = ["data_size"]
100-
params = [
101-
UNARY_OP_DATA_SIZE,
102-
]
103-
reduction_operations = ["sum", "count", "prod"]
104-
agg_operations = ["quantile", "std", "median"]
105-
106-
def setup(self, data_size):
99+
param_names = ["data_size", "operation_type"]
100+
params = [UNARY_OP_DATA_SIZE, ["reduction", "aggregation"]]
101+
operations = {
102+
"reduction": ["sum", "count", "prod"],
103+
"aggregation": ["quantile", "std", "median"],
104+
}
105+
106+
def setup(self, data_size, operation_type):
107107
super().setup(data_size)
108108
self.cols_to_agg = self.df.columns[1:4]
109+
operations = self.operations[operation_type]
110+
self.agg_dict = {
111+
c: operations[i % len(operations)] for i, c in enumerate(self.cols_to_agg)
112+
}
109113

110-
@trigger_execution
111-
def time_groupby_dictionary_reduction(self, data_size):
112-
return self.df.groupby(by=self.groupby_columns).agg(
113-
{
114-
c: self.reduction_operations[i % len(self.reduction_operations)]
115-
for i, c in enumerate(self.cols_to_agg)
116-
}
117-
)
118-
119-
@trigger_execution
120-
def time_groupby_dictionary_aggregation(self, data_size):
121-
return self.df.groupby(by=self.groupby_columns).agg(
122-
{
123-
c: self.agg_operations[i % len(self.agg_operations)]
124-
for i, c in enumerate(self.cols_to_agg)
125-
}
126-
)
114+
def time_groupby_dict_agg(self, data_size, operation_type):
115+
execute(self.df.groupby(by=self.groupby_columns).agg(self.agg_dict))
127116

128117

129118
class TimeJoin:

0 commit comments

Comments
 (0)