26
26
the dictionary of submissions.
27
27
"""
28
28
import itertools
29
+ import json
29
30
import operator
30
31
import os
31
32
import re
45
46
BASE_WORKLOADS = workloads_registry .BASE_WORKLOADS
46
47
WORKLOAD_NAME_PATTERN = '(.*)(_jax|_pytorch)'
47
48
BASE_WORKLOADS_DIR = 'algorithmic_efficiency/workloads/'
49
+ # Open json file to read heldout workloads
50
+ # TODO: This probably shouldn't be hardcoded but passed as an argument.
51
+ with open ("held_out_workloads_algoperf_v05.json" , "r" ) as f :
52
+ HELDOUT_WORKLOADS = json .load (f )
48
53
# These global variables have to be set according to the current set of
49
54
# workloads and rules for the scoring to be correct.
50
55
# We do not use the workload registry since it contains test and development
@@ -248,6 +253,9 @@ def filter(x):
248
253
try :
249
254
if x [variant_workload ] == np .inf :
250
255
return np .inf
256
+ # Also check for nan values (e.g. OOMs)
257
+ elif np .isnan (x [variant_workload ]):
258
+ return np .inf
251
259
else :
252
260
return x [base_workload ]
253
261
except KeyError as e :
@@ -306,8 +314,14 @@ def compute_performance_profiles(submissions,
306
314
self_tuning_ruleset ,
307
315
strict ))
308
316
df = pd .concat (dfs )
309
-
310
- # For each held-out workload set to inf if the base workload is inf
317
+ # Restrict to base and sampled held-out workloads
318
+ # (ignore the additional workload variants of the baseline
319
+ # as they cause issues when checking for nans in workload variants).
320
+ df = df [BASE_WORKLOADS + HELDOUT_WORKLOADS ]
321
+ # Sort workloads alphabetically (for better display)
322
+ df = df .reindex (sorted (df .columns ), axis = 1 )
323
+
324
+ # For each held-out workload set to inf if the base workload is inf or nan
311
325
for workload in df .keys ():
312
326
if workload not in BASE_WORKLOADS :
313
327
# If base do not have finite score set variant score to inf
@@ -319,14 +333,13 @@ def compute_performance_profiles(submissions,
319
333
best_scores = df .min (axis = 0 )
320
334
df [df .apply (lambda x : x > 4 * best_scores , axis = 1 )] = np .inf
321
335
322
- # For each held-out workload if variant target was not hit set submission to inf
336
+ # For each base workload if variant target was not hit set submission to inf
323
337
for workload in df .keys ():
324
338
if workload not in BASE_WORKLOADS :
325
339
# If variants do not have finite score set base_workload score to inf
326
340
base_workload = get_base_workload_name (workload )
327
341
df [base_workload ] = df .apply (
328
342
variant_criteria_filter (base_workload , workload ), axis = 1 )
329
-
330
343
df = df [BASE_WORKLOADS ]
331
344
332
345
if verbosity > 0 :
0 commit comments