diff --git a/src/sagemaker/huggingface/llm_utils.py b/src/sagemaker/huggingface/llm_utils.py index 974cffcddf..9927d1d293 100644 --- a/src/sagemaker/huggingface/llm_utils.py +++ b/src/sagemaker/huggingface/llm_utils.py @@ -72,6 +72,13 @@ def get_huggingface_llm_image_uri( version=version, image_scope="inference", ) + if backend == "huggingface-tei-cpu": + return image_uris.retrieve( + "huggingface-tei-cpu", + region=region, + version=version, + image_scope="inference", + ) if backend == "lmi": version = version or "0.24.0" return image_uris.retrieve(framework="djl-deepspeed", region=region, version=version) diff --git a/src/sagemaker/image_uri_config/huggingface-tei-cpu.json b/src/sagemaker/image_uri_config/huggingface-tei-cpu.json new file mode 100644 index 0000000000..d68b0d6307 --- /dev/null +++ b/src/sagemaker/image_uri_config/huggingface-tei-cpu.json @@ -0,0 +1,59 @@ +{ + "inference": { + "processors": [ + "cpu" + ], + "version_aliases": { + "1.2": "1.2.3" + }, + "versions": { + "1.2.3": { + "py_versions": [ + "py310" + ], + "registries": { + "af-south-1": "510948584623", + "ap-east-1": "651117190479", + "ap-northeast-1": "354813040037", + "ap-northeast-2": "366743142698", + "ap-northeast-3": "867004704886", + "ap-south-1": "720646828776", + "ap-south-2": "628508329040", + "ap-southeast-1": "121021644041", + "ap-southeast-2": "783357654285", + "ap-southeast-3": "951798379941", + "ap-southeast-4": "106583098589", + "ca-central-1": "341280168497", + "ca-west-1": "190319476487", + "cn-north-1": "450853457545", + "cn-northwest-1": "451049120500", + "eu-central-1": "492215442770", + "eu-central-2": "680994064768", + "eu-north-1": "662702820516", + "eu-south-1": "978288397137", + "eu-south-2": "104374241257", + "eu-west-1": "141502667606", + "eu-west-2": "764974769150", + "eu-west-3": "659782779980", + "il-central-1": "898809789911", + "me-central-1": "272398656194", + "me-south-1": "801668240914", + "sa-east-1": "737474898029", + "us-east-1": "683313688378", + "us-east-2": "257758044811", + "us-gov-east-1": "237065988967", + "us-gov-west-1": "414596584902", + "us-iso-east-1": "833128469047", + "us-isob-east-1": "281123927165", + "us-west-1": "746614075791", + "us-west-2": "246618743249" + }, + "tag_prefix": "2.0.1-tei1.2.3", + "repository": "tei-cpu", + "container_version": { + "cpu": "ubuntu22.04" + } + } + } + } +} \ No newline at end of file diff --git a/src/sagemaker/image_uris.py b/src/sagemaker/image_uris.py index 1ca73d0af9..be5167dcc7 100644 --- a/src/sagemaker/image_uris.py +++ b/src/sagemaker/image_uris.py @@ -37,7 +37,8 @@ ECR_URI_TEMPLATE = "{registry}.dkr.{hostname}/{repository}" HUGGING_FACE_FRAMEWORK = "huggingface" HUGGING_FACE_LLM_FRAMEWORK = "huggingface-llm" -HUGGING_FACE_TEI_FRAMEWORK = "huggingface-tei" +HUGGING_FACE_TEI_GPU_FRAMEWORK = "huggingface-tei" +HUGGING_FACE_TEI_CPU_FRAMEWORK = "huggingface-tei-cpu" HUGGING_FACE_LLM_NEURONX_FRAMEWORK = "huggingface-llm-neuronx" XGBOOST_FRAMEWORK = "xgboost" SKLEARN_FRAMEWORK = "sklearn" @@ -478,7 +479,8 @@ def _validate_version_and_set_if_needed(version, config, framework): if version is None and framework in [ DATA_WRANGLER_FRAMEWORK, HUGGING_FACE_LLM_FRAMEWORK, - HUGGING_FACE_TEI_FRAMEWORK, + HUGGING_FACE_TEI_GPU_FRAMEWORK, + HUGGING_FACE_TEI_CPU_FRAMEWORK, HUGGING_FACE_LLM_NEURONX_FRAMEWORK, STABILITYAI_FRAMEWORK, ]: diff --git a/tests/unit/sagemaker/image_uris/test_huggingface_llm.py b/tests/unit/sagemaker/image_uris/test_huggingface_llm.py index b1e8e8253e..fa10fd24fe 100644 --- a/tests/unit/sagemaker/image_uris/test_huggingface_llm.py +++ b/tests/unit/sagemaker/image_uris/test_huggingface_llm.py @@ -22,6 +22,9 @@ "gpu": { "1.2.3": "2.0.1-tei1.2.3-gpu-py310-cu122-ubuntu22.04", }, + "cpu": { + "1.2.3": "2.0.1-tei1.2.3-cpu-py310-ubuntu22.04", + }, } HF_VERSIONS_MAPPING = { "gpu": { @@ -73,17 +76,20 @@ def test_huggingface_uris(load_config): assert expected == uri -@pytest.mark.parametrize("load_config", ["huggingface-tei.json"], indirect=True) +@pytest.mark.parametrize( + "load_config", ["huggingface-tei.json", "huggingface-tei-cpu.json"], indirect=True +) def test_huggingface_tei_uris(load_config): VERSIONS = load_config["inference"]["versions"] device = load_config["inference"]["processors"][0] - backend = "huggingface-tei" + backend = "huggingface-tei" if device == "gpu" else "huggingface-tei-cpu" + repo = "tei" if device == "gpu" else "tei-cpu" for version in VERSIONS: ACCOUNTS = load_config["inference"]["versions"][version]["registries"] for region in ACCOUNTS.keys(): uri = get_huggingface_llm_image_uri(backend, region=region, version=version) expected = expected_uris.huggingface_llm_framework_uri( - "tei", + repo, ACCOUNTS[region], version, TEI_VERSIONS_MAPPING[device][version],