-
Notifications
You must be signed in to change notification settings - Fork 666
Closed
Description
System information
- OS Platform and Distribution (e.g., Linux Ubuntu 16.04): Ubuntu 20.04
- Modin version : aa818f5
- Python version: 3.8.6
- Code we can use to reproduce:
import os
os.environ["MODIN_ENGINE"] = "ray"
os.environ["MODIN_CPUS"] = "4"
import modin.pandas as pd
from timeit import default_timer as timer
data_filename = "h2o_10k.csv"
df = pd.read_csv(data_filename)
def q1(df):
groupby_cols = ["id1"]
agg_cols_funcs = {"v1": "sum"}
return df.groupby(groupby_cols).agg(agg_cols_funcs)
def q10(df):
groupby_cols = ["id1", "id2", "id3", "id4", "id5", "id6"]
agg_cols_funcs = {"v3": "sum", "v1": "count"}
return df.groupby(groupby_cols).agg(agg_cols_funcs)
def meas_func(func, iters, *args, **kwargs):
t_meas = float("inf")
for _ in range(iters):
t0 = timer()
ans = func(*args, **kwargs)
ans.shape
t = timer() - t0
del ans
t_meas = t if t < t_meas else t_meas
return t_meas
t_q1 = meas_func(q1, iters=3, **{"df": df})
t_q10 = meas_func(q10, iters=3, **{"df": df})
# results comparison between before vs after ee39d1767efb363fe6c147fc701b41315eee3afa commit
print("t_q1:", t_q1) # 0.026 s vs 0.031s
print("t_q10:", t_q10) # 0.091 s vs 2.09 s
Describe the problem
Regression occurred after ee39d17 commit.
data file h2o_10k.txt
Source code / logs
Metadata
Metadata
Assignees
Labels
bug 🦗Something isn't workingSomething isn't working