NVIDIA · LarryXFly · Aug 26, 2025 · Aug 21, 2025 · Aug 26, 2025
@@ -2408,6 +2408,7 @@ class TestEXAONE4(LlmapiAccuracyTestHarness):
     kv_cache_config = KvCacheConfig(enable_block_reuse=False,
                                     enable_partial_reuse=False)
 
+    @pytest.mark.skip_less_device_memory(80000)
     def test_auto_dtype(self):
         model_path = f"{llm_models_root()}/EXAONE-4.0-32B"
         with LLM(model_path, kv_cache_config=self.kv_cache_config) as llm:

@@ -2178,15 +2178,15 @@ def test_ptp_quickstart_advanced_8gpus_chunked_prefill_sq_22k(
 # This test is specifically to be run on 2 GPUs on Blackwell RTX 6000 Pro (SM120) architecture
 # TODO: remove once we have a node with 8 GPUs and reuse test_ptp_quickstart_advanced_8gpus
 @skip_no_sm120
-@pytest.mark.skip_less_device_memory(80000)
+@pytest.mark.skip_less_device_memory(160000)
 @pytest.mark.skip_less_device(2)
 @pytest.mark.parametrize("model_name,model_path", [
     ('Nemotron-Super-49B-v1-BF16',
      'nemotron-nas/Llama-3_3-Nemotron-Super-49B-v1'),
     ("Mixtral-8x7B-BF16", "Mixtral-8x7B-Instruct-v0.1"),
     pytest.param('Llama3.1-70B-BF16',
                  'llama-3.1-model/Meta-Llama-3.1-70B',
-                 marks=pytest.mark.skip_less_device_memory(95000)),
+                 marks=pytest.mark.skip_less_device_memory(190000)),
 ])
 def test_ptp_quickstart_advanced_2gpus_sm120(llm_root, llm_venv, model_name,
                                              model_path):

@@ -451,7 +451,6 @@ accuracy/test_llm_api_pytorch.py::TestLlama3_1_8BInstruct::test_guided_decoding_
 accuracy/test_llm_api_pytorch.py::TestLlama3_3_70BInstruct::test_fp8_tp4
 accuracy/test_llm_api_pytorch.py::TestLlama3_3_70BInstruct::test_nvfp4_tp4
 accuracy/test_llm_api_pytorch.py::TestLlama3_3_70BInstruct::test_fp8_eagle3_tp8[eagle3_one_model=True]
-accuracy/test_llm_api_pytorch.py::TestLlama3_3_70BInstruct::test_fp8_eagle3_tp8[eagle3_one_model=False]
 accuracy/test_llm_api_pytorch.py::TestMistral7B::test_auto_dtype
 accuracy/test_llm_api_pytorch.py::TestGemma3_1BInstruct::test_auto_dtype
 accuracy/test_llm_api_pytorch.py::TestMistralSmall24B::test_auto_dtype