diff --git a/tests/integration/defs/accuracy/test_llm_api_pytorch.py b/tests/integration/defs/accuracy/test_llm_api_pytorch.py index 0a2f44f35b2..acb810fe66b 100644 --- a/tests/integration/defs/accuracy/test_llm_api_pytorch.py +++ b/tests/integration/defs/accuracy/test_llm_api_pytorch.py @@ -2408,6 +2408,7 @@ class TestEXAONE4(LlmapiAccuracyTestHarness): kv_cache_config = KvCacheConfig(enable_block_reuse=False, enable_partial_reuse=False) + @pytest.mark.skip_less_device_memory(80000) def test_auto_dtype(self): model_path = f"{llm_models_root()}/EXAONE-4.0-32B" with LLM(model_path, kv_cache_config=self.kv_cache_config) as llm: diff --git a/tests/integration/defs/test_e2e.py b/tests/integration/defs/test_e2e.py index 95be7fb4c12..fa8f8b38756 100644 --- a/tests/integration/defs/test_e2e.py +++ b/tests/integration/defs/test_e2e.py @@ -2178,7 +2178,7 @@ def test_ptp_quickstart_advanced_8gpus_chunked_prefill_sq_22k( # This test is specifically to be run on 2 GPUs on Blackwell RTX 6000 Pro (SM120) architecture # TODO: remove once we have a node with 8 GPUs and reuse test_ptp_quickstart_advanced_8gpus @skip_no_sm120 -@pytest.mark.skip_less_device_memory(80000) +@pytest.mark.skip_less_device_memory(160000) @pytest.mark.skip_less_device(2) @pytest.mark.parametrize("model_name,model_path", [ ('Nemotron-Super-49B-v1-BF16', @@ -2186,7 +2186,7 @@ def test_ptp_quickstart_advanced_8gpus_chunked_prefill_sq_22k( ("Mixtral-8x7B-BF16", "Mixtral-8x7B-Instruct-v0.1"), pytest.param('Llama3.1-70B-BF16', 'llama-3.1-model/Meta-Llama-3.1-70B', - marks=pytest.mark.skip_less_device_memory(95000)), + marks=pytest.mark.skip_less_device_memory(190000)), ]) def test_ptp_quickstart_advanced_2gpus_sm120(llm_root, llm_venv, model_name, model_path): diff --git a/tests/integration/test_lists/qa/llm_function_full.txt b/tests/integration/test_lists/qa/llm_function_full.txt index 184b91ebafc..6ea09520ea9 100644 --- a/tests/integration/test_lists/qa/llm_function_full.txt +++ b/tests/integration/test_lists/qa/llm_function_full.txt @@ -451,7 +451,6 @@ accuracy/test_llm_api_pytorch.py::TestLlama3_1_8BInstruct::test_guided_decoding_ accuracy/test_llm_api_pytorch.py::TestLlama3_3_70BInstruct::test_fp8_tp4 accuracy/test_llm_api_pytorch.py::TestLlama3_3_70BInstruct::test_nvfp4_tp4 accuracy/test_llm_api_pytorch.py::TestLlama3_3_70BInstruct::test_fp8_eagle3_tp8[eagle3_one_model=True] -accuracy/test_llm_api_pytorch.py::TestLlama3_3_70BInstruct::test_fp8_eagle3_tp8[eagle3_one_model=False] accuracy/test_llm_api_pytorch.py::TestMistral7B::test_auto_dtype accuracy/test_llm_api_pytorch.py::TestGemma3_1BInstruct::test_auto_dtype accuracy/test_llm_api_pytorch.py::TestMistralSmall24B::test_auto_dtype