3 is the magic number

SalmanMohammadi · SalmanMohammadi · commit bfedad229fd4 · 2024-11-19T15:59:53.000Z
diff --git a/tests/recipes/test_eleuther_eval.py b/tests/recipes/test_eleuther_eval.py
@@ -39,12 +39,12 @@ def mocked_import(name, *args, **kwargs):
     @pytest.fixture
     def expected_vision_acc(self):
         return {
-            "Science": 0,
+            "Science": 0.2,
             "Biology": 0,
-            "Chemistry": 0,
+            "Chemistry": 0.3333,
             "Geography": 0,
             "Math": 0,
-            "Physics": 0,
+            "Physics": 0.6667,
         }
 
     @pytest.mark.parametrize(
@@ -212,6 +212,7 @@ def test_eval_recipe_errors_with_qat_quantizer(self, monkeypatch, tmpdir):
             runpy.run_path(TUNE_PATH, run_name="__main__")
 
     @pytest.mark.integration_test
+    @gpu_test(gpu_count=1)
     def test_meta_eval_vision(self, caplog, monkeypatch, tmpdir, expected_vision_acc):
         ckpt = "llama3_2_vision_meta"
         ckpt_path = Path(CKPT_MODEL_PATHS[ckpt])
@@ -230,9 +231,9 @@ def test_meta_eval_vision(self, caplog, monkeypatch, tmpdir, expected_vision_acc
             checkpointer.model_type=LLAMA3_VISION \
             tokenizer.path=/tmp/test-artifacts/tokenizer_llama3.model \
             tokenizer.prompt_template=null \
-            limit=1 \
+            limit=3 \
             dtype=bf16 \
-            device=cpu \
+            device=cuda \
         """.split()
 
         model_config = llama3_2_vision_test_config()
@@ -251,6 +252,7 @@ def test_meta_eval_vision(self, caplog, monkeypatch, tmpdir, expected_vision_acc
             assert math.isclose(float(accuracy), expected_vision_acc[task_name])
 
     @pytest.mark.integration_test
+    @gpu_test(gpu_count=1)
     def test_hf_eval_vision(self, caplog, monkeypatch, tmpdir, expected_vision_acc):
         ckpt = "llama3_2_vision_hf"
         ckpt_path = Path(CKPT_MODEL_PATHS[ckpt])
@@ -272,9 +274,9 @@ def test_hf_eval_vision(self, caplog, monkeypatch, tmpdir, expected_vision_acc):
             checkpointer.model_type=LLAMA3_VISION \
             tokenizer.path=/tmp/test-artifacts/tokenizer_llama3.model \
             tokenizer.prompt_template=null \
-            limit=1 \
+            limit=3 \
             dtype=bf16 \
-            device=cpu \
+            device=cuda \
         """.split()
 
         model_config = llama3_2_vision_test_config()