diff --git a/src/sagemaker/serve/builder/djl_builder.py b/src/sagemaker/serve/builder/djl_builder.py index e89c1b8e9c..b07d78e142 100644 --- a/src/sagemaker/serve/builder/djl_builder.py +++ b/src/sagemaker/serve/builder/djl_builder.py @@ -265,6 +265,7 @@ def _djl_model_builder_deploy_wrapper(self, *args, **kwargs) -> Type[PredictorBa # if has not been built for local container we must use cache # that hosting has write access to. self.pysdk_model.env["TRANSFORMERS_CACHE"] = "/tmp" + self.pysdk_model.env["HF_HOME"] = "/tmp" self.pysdk_model.env["HUGGINGFACE_HUB_CACHE"] = "/tmp" if "endpoint_logging" not in kwargs: diff --git a/src/sagemaker/serve/builder/tei_builder.py b/src/sagemaker/serve/builder/tei_builder.py index 6aba3c9da2..953d72e39c 100644 --- a/src/sagemaker/serve/builder/tei_builder.py +++ b/src/sagemaker/serve/builder/tei_builder.py @@ -175,6 +175,7 @@ def _tei_model_builder_deploy_wrapper(self, *args, **kwargs) -> Type[PredictorBa # if has not been built for local container we must use cache # that hosting has write access to. self.pysdk_model.env["TRANSFORMERS_CACHE"] = "/tmp" + self.pysdk_model.env["HF_HOME"] = "/tmp" self.pysdk_model.env["HUGGINGFACE_HUB_CACHE"] = "/tmp" if "endpoint_logging" not in kwargs: diff --git a/src/sagemaker/serve/builder/tgi_builder.py b/src/sagemaker/serve/builder/tgi_builder.py index 23cc7e2202..3c199b3f56 100644 --- a/src/sagemaker/serve/builder/tgi_builder.py +++ b/src/sagemaker/serve/builder/tgi_builder.py @@ -214,6 +214,7 @@ def _tgi_model_builder_deploy_wrapper(self, *args, **kwargs) -> Type[PredictorBa # if has not been built for local container we must use cache # that hosting has write access to. self.pysdk_model.env["TRANSFORMERS_CACHE"] = "/tmp" + self.pysdk_model.env["HF_HOME"] = "/tmp" self.pysdk_model.env["HUGGINGFACE_HUB_CACHE"] = "/tmp" if "endpoint_logging" not in kwargs: diff --git a/src/sagemaker/serve/model_server/djl_serving/server.py b/src/sagemaker/serve/model_server/djl_serving/server.py index 8b152e5b81..80214332b0 100644 --- a/src/sagemaker/serve/model_server/djl_serving/server.py +++ b/src/sagemaker/serve/model_server/djl_serving/server.py @@ -19,6 +19,7 @@ _DEFAULT_ENV_VARS = { "SERVING_OPTS": "-Dai.djl.logging.level=debug", "TRANSFORMERS_CACHE": "/opt/ml/model/", + "HF_HOME": "/opt/ml/model/", "HUGGINGFACE_HUB_CACHE": "/opt/ml/model/", } diff --git a/src/sagemaker/serve/model_server/tei/server.py b/src/sagemaker/serve/model_server/tei/server.py index 67fca0e847..25c27e6dda 100644 --- a/src/sagemaker/serve/model_server/tei/server.py +++ b/src/sagemaker/serve/model_server/tei/server.py @@ -18,6 +18,7 @@ _SHM_SIZE = "2G" _DEFAULT_ENV_VARS = { "TRANSFORMERS_CACHE": "/opt/ml/model/", + "HF_HOME": "/opt/ml/model/", "HUGGINGFACE_HUB_CACHE": "/opt/ml/model/", } diff --git a/src/sagemaker/serve/model_server/tgi/server.py b/src/sagemaker/serve/model_server/tgi/server.py index ef39e890c8..75cf3bd402 100644 --- a/src/sagemaker/serve/model_server/tgi/server.py +++ b/src/sagemaker/serve/model_server/tgi/server.py @@ -17,6 +17,7 @@ _SHM_SIZE = "2G" _DEFAULT_ENV_VARS = { "TRANSFORMERS_CACHE": "/opt/ml/model/", + "HF_HOME": "/opt/ml/model/", "HUGGINGFACE_HUB_CACHE": "/opt/ml/model/", } diff --git a/tests/unit/sagemaker/serve/model_server/tei/test_server.py b/tests/unit/sagemaker/serve/model_server/tei/test_server.py index 16dcf12b5a..2344a61fbc 100644 --- a/tests/unit/sagemaker/serve/model_server/tei/test_server.py +++ b/tests/unit/sagemaker/serve/model_server/tei/test_server.py @@ -66,6 +66,7 @@ def test_start_invoke_destroy_local_tei_server(self, mock_requests): volumes={PosixPath("model_path/code"): {"bind": "/opt/ml/model/", "mode": "rw"}}, environment={ "TRANSFORMERS_CACHE": "/opt/ml/model/", + "HF_HOME": "/opt/ml/model/", "HUGGINGFACE_HUB_CACHE": "/opt/ml/model/", "KEY": "VALUE", "SAGEMAKER_SERVE_SECRET_KEY": "secret_key",