Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 11 additions & 9 deletions TraceLens/PerfModel/perf_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -104,8 +104,7 @@ def __init__(self, event, arch=None, detail_level=0):
if not os.path.exists(os.environ.get('GEMMOLOGIST_PATH')):
raise ValueError(f"GEMMOLOGIST_PATH does not exist: {os.environ.get('GEMMOLOGIST_PATH')}")
dtype = torch_dtype_map(self.param_details['dtype_A_B'][0])
gemmologist_time = self.get_gemmologist_time(arch, self.M, self.N, self.K, dtype)
self.gemmologist_time = gemmologist_time
self.gemmologist_time, self.gemmologist_cmd = GEMM.get_gemmologist_time(arch, self.M, self.N, self.K, dtype)
else:
# TODO: use naive roofline model
pass
Expand Down Expand Up @@ -161,32 +160,35 @@ def bytes_bwd(self, bytes_per_element):

@staticmethod
def get_gemmologist_time(arch, M, N, K, dtype):
# Create a unique key for the cache based on dimensions and architecture
cache_key = (arch['name'], arch['freq_mhz'], M, N, K, dtype)
# Check if the result is already in the cache
if cache_key in GEMM.cache_gemm_results:
return GEMM.cache_gemm_results[cache_key]
# assume that gemmologist path is given in the environment variable GEMMOLOGIST_PATH
gemmologist_path = os.environ.get('GEMMOLOGIST_PATH')
cmd = [
"./bin/gemmologist.py",
"-m", str(M),
"-n", str(N),
"-k", str(K),
"--dtype", dtype,
"-d", "1",
"-a", arch["name"],
"--freq_mhz", str(arch["freq_mhz"]),
"--topn", "1"
]

# Check if the result is already in the cache
cache_key = tuple(cmd)
if cache_key in GEMM.cache_gemm_results:
return GEMM.cache_gemm_results[cache_key], " ".join(cmd)

# Run the command
result = subprocess.run(cmd, cwd=gemmologist_path, capture_output=True, text=True)
stdout = result.stdout
stderr = result.stderr
log = re.findall(r"Time=\d+\.\d+", stdout)
if len(log) > 0:
gemmologist_time = float(re.sub("Time=", "", str(log[0])))
# Store the result in the cache
# Cache the result
GEMM.cache_gemm_results[cache_key] = gemmologist_time
return gemmologist_time
return gemmologist_time, " ".join(cmd)
else:
raise AssertionError("Not able to simulate in gemmologist", cmd, stdout, stderr)

Expand Down
2 changes: 2 additions & 0 deletions TraceLens/TreePerf/tree_perf.py
Original file line number Diff line number Diff line change
Expand Up @@ -149,6 +149,7 @@ def compute_perf_metrics(self, event, bwd=False,
if hasattr(perf_model, "gemmologist_time"):
dict_metrics['Gemmologist Time (µs)'] = perf_model.gemmologist_time
dict_metrics['Gemmologist TFLOPS/s'] = (gflops / 1e3) / (perf_model.gemmologist_time / 1e6) if perf_model.gemmologist_time > 0 else float('nan')
dict_metrics['Gemmologist cmd'] = perf_model.gemmologist_cmd

for key, value in perf_model.param_details.items():
dict_metrics[f"param: {key}"] = value
Expand Down Expand Up @@ -239,6 +240,7 @@ def summarize_df_perf_metrics(df_perf_metrics, agg_metrics=['mean', 'std']):
# first since it should be same for the group
dict_agg['Gemmologist TFLOPS/s'] = 'first'
dict_agg['Gemmologist Time (µs)'] = 'first'
dict_agg['Gemmologist cmd'] = 'first'
if 'Non-Data-Mov TFLOPS/s' in df_perf_metrics.columns:
dict_agg['Non-Data-Mov TFLOPS/s'] = agg_metrics
if 'Non-Data-Mov Kernel Time (µs)' in df_perf_metrics.columns:
Expand Down