mlcommons
diff --git a/‎CHANGELOG.md‎
Lines changed: 15 additions & 0 deletions b/‎CHANGELOG.md‎
Lines changed: 15 additions & 0 deletions
diff --git a/‎GETTING_STARTED.md‎
Lines changed: 9 additions & 0 deletions b/‎GETTING_STARTED.md‎
Lines changed: 9 additions & 0 deletions
diff --git a/‎algorithmic_efficiency/workloads/criteo1tb/criteo1tb_jax/models.py‎
Lines changed: 1 addition & 1 deletion b/‎algorithmic_efficiency/workloads/criteo1tb/criteo1tb_jax/models.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎algorithmic_efficiency/workloads/criteo1tb/workload.py‎
Lines changed: 1 addition & 1 deletion b/‎algorithmic_efficiency/workloads/criteo1tb/workload.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎algorithmic_efficiency/workloads/fastmri/fastmri_jax/workload.py‎
Lines changed: 4 additions & 2 deletions b/‎algorithmic_efficiency/workloads/fastmri/fastmri_jax/workload.py‎
Lines changed: 4 additions & 2 deletions
diff --git a/‎algorithmic_efficiency/workloads/librispeech_conformer/librispeech_jax/workload.py‎
Lines changed: 1 addition & 1 deletion b/‎algorithmic_efficiency/workloads/librispeech_conformer/librispeech_jax/workload.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎algorithmic_efficiency/workloads/librispeech_deepspeech/librispeech_jax/workload.py‎
Lines changed: 2 additions & 1 deletion b/‎algorithmic_efficiency/workloads/librispeech_deepspeech/librispeech_jax/workload.py‎
Lines changed: 2 additions & 1 deletion
diff --git a/‎algorithmic_efficiency/workloads/wmt/wmt_jax/workload.py‎
Lines changed: 1 addition & 1 deletion b/‎algorithmic_efficiency/workloads/wmt/wmt_jax/workload.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎algorithmic_efficiency/workloads/workloads.py‎
Lines changed: 1 addition & 1 deletion b/‎algorithmic_efficiency/workloads/workloads.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎datasets/dataset_setup.py‎
Lines changed: 5 additions & 5 deletions b/‎datasets/dataset_setup.py‎
Lines changed: 5 additions & 5 deletions
@@ -1,5 +1,20 @@
 # Change Log
 
+## algoperf-benchmark-0.1.2 (2024-03-04)
+Workload variant additions and fixes:
+- Add Deepspeech workload variant
+- Fix bugs in Imagenet ResNet, WMT and Criteo1tb variants
+
+Add prize qualification logs for external tuning ruleset.
+Note: FastMRI trials with dropout are not yet added due to https://github.com/mlcommons/algorithmic-efficiency/issues/664.
+
+Add missing funcitonality to Docker startup script for self_tuning ruleset.
+Add self_tuning ruleset option to script that runs all workloads for scoring.
+
+Datasetup fixes.
+
+Fix tests that check training differences in PyTorch and JAX on GPU.
+
 ## algoperf-benchmark-0.1.1 (2024-01-19)
 Bug fixes to FastMRI metric calculation and targets.
 
 
@@ -381,4 +381,13 @@ python score_submissions.py --submission_directory <directory_with_submissions>
 
 We provide the scores and performance profiles for the [paper baseline algorithms](/reference_algorithms/paper_baselines/) in the "Baseline Results" section in [Benchmarking Neural Network Training Algorithms](https://arxiv.org/abs/2306.07179).
 
+## Package Submission for Self-Reporting
+To prepare your submission for self reporting run:
+
+```
+python3 package_logs.py --experiment_dir <experiment_dir> --destination_dir <destination_dir>
+```
+
+The destination directiory will contain the logs packed in studies and trials required for self-reporting. 
+
 **Good Luck!**
@@ -88,7 +88,7 @@ def scaled_init(key, shape, dtype=jnp.float_):
               stddev=jnp.sqrt(1.0 / mlp_top_dims[layer_idx])))(
                   top_mlp_input)
       x = nn.relu(x)
-      if self.dropout_rate > 0.0 and layer_idx == num_layers_top - 2:
+      if self.dropout_rate and layer_idx == num_layers_top - 2:
         x = nn.Dropout(rate=self.dropout_rate, deterministic=not train)(x)
       top_mlp_input += x
     # In the DLRM model the last layer width is always 1. We can hardcode that
 
@@ -97,7 +97,7 @@ def max_allowed_runtime_sec(self) -> int:
 
   @property
   def eval_period_time_sec(self) -> int:
-    return 2 * 600  # 20 mins.
+    return 2 * 60  # 2 mins.
 
   def _build_input_queue(
       self,
 
@@ -33,8 +33,10 @@ def init_model_fn(
         use_tanh=self.use_tanh,
         use_layer_norm=self.use_layer_norm,
         dropout_rate=dropout_rate)
-
-    variables = jax.jit(self._model.init)({'params': rng}, fake_batch)
+    params_rng, dropout_rng = jax.random.split(rng)
+    variables = jax.jit(
+        self._model.init)({'params': params_rng, 'dropout': dropout_rng},
+                          fake_batch)
     params = variables['params']
     self._param_shapes = param_utils.jax_param_shapes(params)
     self._param_types = param_utils.jax_param_types(self._param_shapes)
 
@@ -327,7 +327,7 @@ def _eval_model_on_split(self,
                            global_step: int = 0) -> Dict[str, float]:
     """Run a full evaluation of the model."""
     del global_step
-    if model_state is not None:
+    if model_state is not None and len(model_state) > 0:
       # Sync batch statistics across replicas before evaluating.
       model_state = self.sync_batch_stats(model_state)
 
 
@@ -47,7 +47,8 @@ def init_model_fn(
     variables = model_init_fn({'params': params_rng, 'dropout': dropout_rng},
                               *fake_input_batch)
 
-    model_state = variables['batch_stats']
+    model_state = variables[
+        'batch_stats'] if not self.layernorm_everywhere else {}
     params = variables['params']
     self._param_shapes = param_utils.jax_param_shapes(params)
     self._param_types = param_utils.jax_param_types(self._param_shapes)
 
@@ -121,7 +121,7 @@ def predict_step(self,
                    max_decode_len: int,
                    beam_size: int = 4) -> spec.Tensor:
     """Predict translation with fast decoding beam search on a batch."""
-    config = models.TransformerConfig(deterministic=True, decode=True)
+    config = replace(self._eval_model.config, decode=True)
     # Prepare transformer fast-decoder call for beam search: for beam search, we
     # need to set up our decoder model to handle a batch size equal to
     # batch_size * beam_size, where each batch item's data is expanded in-place
 
@@ -26,7 +26,7 @@
     },
     'criteo1tb_embed_init': {
         'workload_path': 'criteo1tb/criteo1tb',
-        'workload_class_name': 'Criteo1TbDlrmSmallEmbeddingInitWorkload'
+        'workload_class_name': 'Criteo1TbDlrmSmallEmbedInitWorkload'
     },
     'criteo1tb_resnet': {
         'workload_path': 'criteo1tb/criteo1tb',
 
@@ -706,13 +706,13 @@ def main(_):
           'to download the FastMRI dataset.\nSign up for the URLs at '
           'https://fastmri.med.nyu.edu/.')
 
-    updated_data_dir = download_fastmri(data_dir,
-                                        knee_singlecoil_train_url,
-                                        knee_singlecoil_val_url,
-                                        knee_singlecoil_test_url)
+    download_fastmri(data_dir,
+                     knee_singlecoil_train_url,
+                     knee_singlecoil_val_url,
+                     knee_singlecoil_test_url)
 
     logging.info('fastMRI download completed. Extracting...')
-    setup_fastmri(data_dir, updated_data_dir)
+    setup_fastmri(data_dir)
 
   if FLAGS.all or FLAGS.imagenet:
     flags.mark_flag_as_required('imagenet_train_url')