From 8f501836b2e939e0b640318b5e104e361d69f620 Mon Sep 17 00:00:00 2001 From: jiapinw Date: Fri, 10 May 2024 10:46:57 -0700 Subject: [PATCH 1/3] Initial commit for telemetry support --- src/sagemaker/serve/builder/model_builder.py | 3 ++ .../serve/model_format/mlflow/utils.py | 9 +++- src/sagemaker/serve/utils/telemetry_logger.py | 17 +++++++ .../model_format/mlflow/test_mlflow_utils.py | 50 +++++++++++++++++++ .../serve/utils/test_telemetry_logger.py | 38 ++++++++++++++ 5 files changed, 116 insertions(+), 1 deletion(-) diff --git a/src/sagemaker/serve/builder/model_builder.py b/src/sagemaker/serve/builder/model_builder.py index 52268ea40c..25f4c1cddc 100644 --- a/src/sagemaker/serve/builder/model_builder.py +++ b/src/sagemaker/serve/builder/model_builder.py @@ -669,6 +669,9 @@ def _initialize_for_mlflow(self) -> None: mlflow_path = self.model_metadata.get(MLFLOW_MODEL_PATH) if not _mlflow_input_is_local_path(mlflow_path): # TODO: extend to package arn, run id and etc. + logger.info( + f"Start downloading model artifacts from {mlflow_path} to {self.model_path}" + ) _download_s3_artifacts(mlflow_path, self.model_path, self.sagemaker_session) else: _copy_directory_contents(mlflow_path, self.model_path) diff --git a/src/sagemaker/serve/model_format/mlflow/utils.py b/src/sagemaker/serve/model_format/mlflow/utils.py index b67de08d78..359ed770c2 100644 --- a/src/sagemaker/serve/model_format/mlflow/utils.py +++ b/src/sagemaker/serve/model_format/mlflow/utils.py @@ -278,7 +278,7 @@ def _download_s3_artifacts(s3_path: str, dst_path: str, session: Session) -> Non os.makedirs(local_file_dir, exist_ok=True) # Download the file - print(f"Downloading {key} to {local_file_path}") + logger.info(f"Downloading {key} to {local_file_path}") s3.download_file(s3_bucket, key, local_file_path) @@ -356,6 +356,13 @@ def _select_container_for_mlflow_model( logger.info("Auto-detected framework to use is %s", framework_to_use) logger.info("Auto-detected framework version is %s", framework_version) + if framework_version is None: + raise ValueError( + ("Unable to auto detect framework version. Please provide framework %s as part of the " + "requirements.txt file for deployment flavor %s") % (framework_to_use, + deployment_flavor) + ) + casted_versions = ( _cast_to_compatible_version(framework_to_use, framework_version) if framework_version diff --git a/src/sagemaker/serve/utils/telemetry_logger.py b/src/sagemaker/serve/utils/telemetry_logger.py index 64cbce03e8..19f6f8c541 100644 --- a/src/sagemaker/serve/utils/telemetry_logger.py +++ b/src/sagemaker/serve/utils/telemetry_logger.py @@ -19,7 +19,11 @@ from sagemaker import Session, exceptions from sagemaker.serve.mode.function_pointers import Mode +from sagemaker.serve.model_format.mlflow.constants import MLFLOW_MODEL_PATH from sagemaker.serve.utils.exceptions import ModelBuilderException +from sagemaker.serve.utils.lineage_constants import MLFLOW_LOCAL_PATH, MLFLOW_S3_PATH, \ + MLFLOW_MODEL_PACKAGE_PATH, MLFLOW_RUN_ID, MLFLOW_REGISTRY_PATH +from sagemaker.serve.utils.lineage_utils import _get_mlflow_model_path_type from sagemaker.serve.utils.types import ModelServer, ImageUriOption from sagemaker.serve.validations.check_image_uri import is_1p_image_uri from sagemaker.user_agent import SDK_VERSION @@ -51,6 +55,14 @@ str(ModelServer.TGI): 6, } +MLFLOW_MODEL_PATH_CODE = { + MLFLOW_LOCAL_PATH: 1, + MLFLOW_S3_PATH: 2, + MLFLOW_MODEL_PACKAGE_PATH: 3, + MLFLOW_RUN_ID: 4, + MLFLOW_REGISTRY_PATH: 5, +} + def _capture_telemetry(func_name: str): """Placeholder docstring""" @@ -78,6 +90,11 @@ def wrapper(self, *args, **kwargs): if self.sagemaker_session and self.sagemaker_session.endpoint_arn: extra += f"&x-endpointArn={self.sagemaker_session.endpoint_arn}" + if getattr(self, "_is_mlflow_model", False): + mlflow_model_path = self.model_metadata[MLFLOW_MODEL_PATH] + mlflow_model_path_type = _get_mlflow_model_path_type(mlflow_model_path) + extra += f"&x-mlflowModelPathType={MLFLOW_MODEL_PATH_CODE[mlflow_model_path_type]}" + start_timer = perf_counter() try: response = func(self, *args, **kwargs) diff --git a/tests/unit/sagemaker/serve/model_format/mlflow/test_mlflow_utils.py b/tests/unit/sagemaker/serve/model_format/mlflow/test_mlflow_utils.py index 27c3a5280f..cabb8b0baf 100644 --- a/tests/unit/sagemaker/serve/model_format/mlflow/test_mlflow_utils.py +++ b/tests/unit/sagemaker/serve/model_format/mlflow/test_mlflow_utils.py @@ -418,6 +418,56 @@ def test_select_container_for_mlflow_model_no_dlc_detected( ) +@patch("sagemaker.image_uris.retrieve") +@patch("sagemaker.serve.model_format.mlflow.utils._cast_to_compatible_version") +@patch("sagemaker.serve.model_format.mlflow.utils._get_framework_version_from_requirements") +@patch( + "sagemaker.serve.model_format.mlflow.utils._get_python_version_from_parsed_mlflow_model_file" +) +@patch("sagemaker.serve.model_format.mlflow.utils._get_all_flavor_metadata") +@patch("sagemaker.serve.model_format.mlflow.utils._generate_mlflow_artifact_path") +def test_select_container_for_mlflow_model_no_framework_version_detected( + mock_generate_mlflow_artifact_path, + mock_get_all_flavor_metadata, + mock_get_python_version_from_parsed_mlflow_model_file, + mock_get_framework_version_from_requirements, + mock_cast_to_compatible_version, + mock_image_uris_retrieve, +): + mlflow_model_src_path = "/path/to/mlflow_model" + deployment_flavor = "pytorch" + region = "us-west-2" + instance_type = "ml.m5.xlarge" + + mock_requirements_path = "/path/to/requirements.txt" + mock_metadata_path = "/path/to/mlmodel" + mock_flavor_metadata = {"pytorch": {"some_key": "some_value"}} + mock_python_version = "3.8.6" + + mock_generate_mlflow_artifact_path.side_effect = lambda path, artifact: ( + mock_requirements_path if artifact == "requirements.txt" else mock_metadata_path + ) + mock_get_all_flavor_metadata.return_value = mock_flavor_metadata + mock_get_python_version_from_parsed_mlflow_model_file.return_value = mock_python_version + mock_get_framework_version_from_requirements.return_value = None + + with pytest.raises(ValueError, match="Unable to auto detect a DLC for framework"): + _select_container_for_mlflow_model( + mlflow_model_src_path, deployment_flavor, region, instance_type + ) + + mock_generate_mlflow_artifact_path.assert_any_call( + mlflow_model_src_path, "requirements.txt" + ) + mock_generate_mlflow_artifact_path.assert_any_call(mlflow_model_src_path, "MLmodel") + mock_get_all_flavor_metadata.assert_called_once_with(mock_metadata_path) + mock_get_framework_version_from_requirements.assert_called_once_with( + deployment_flavor, mock_requirements_path + ) + mock_cast_to_compatible_version.assert_not_called() + mock_image_uris_retrieve.assert_not_called() + + def test_validate_input_for_mlflow(): _validate_input_for_mlflow(ModelServer.TORCHSERVE, "pytorch") diff --git a/tests/unit/sagemaker/serve/utils/test_telemetry_logger.py b/tests/unit/sagemaker/serve/utils/test_telemetry_logger.py index e8273dd9a1..61aa030eb2 100644 --- a/tests/unit/sagemaker/serve/utils/test_telemetry_logger.py +++ b/tests/unit/sagemaker/serve/utils/test_telemetry_logger.py @@ -14,6 +14,7 @@ import unittest from unittest.mock import Mock, patch from sagemaker.serve import Mode, ModelServer +from sagemaker.serve.model_format.mlflow.constants import MLFLOW_MODEL_PATH from sagemaker.serve.utils.telemetry_logger import ( _send_telemetry, _capture_telemetry, @@ -32,9 +33,14 @@ "763104351884.dkr.ecr.us-east-1.amazonaws.com/" "huggingface-pytorch-inference:2.0.0-transformers4.28.1-cpu-py310-ubuntu20.04" ) +MOCK_PYTORCH_CONTAINER = \ + "763104351884.dkr.ecr.us-west-2.amazonaws.com/pytorch-inference:2.0.1-cpu-py310" MOCK_HUGGINGFACE_ID = "meta-llama/Llama-2-7b-hf" MOCK_EXCEPTION = LocalModelOutOfMemoryException("mock raise ex") MOCK_ENDPOINT_ARN = "arn:aws:sagemaker:us-west-2:123456789012:endpoint/test" +MOCK_MODEL_METADATA_FOR_MLFLOW = { + MLFLOW_MODEL_PATH: "s3://some_path" +} class ModelBuilderMock: @@ -239,3 +245,35 @@ def test_construct_url_with_failure_reason_and_extra_info(self): f"&x-extra={mock_extra_info}" ) self.assertEquals(ret_url, expected_base_url) + + + @patch("sagemaker.serve.utils.telemetry_logger._send_telemetry") + def test_capture_telemetry_decorator_mlflow_success(self, mock_send_telemetry): + mock_model_builder = ModelBuilderMock() + mock_model_builder.serve_settings.telemetry_opt_out = False + mock_model_builder.image_uri = MOCK_PYTORCH_CONTAINER + mock_model_builder._is_mlflow_model = True + mock_model_builder.model_metadata = MOCK_MODEL_METADATA_FOR_MLFLOW + mock_model_builder._is_custom_image_uri = False + mock_model_builder.mode = Mode.SAGEMAKER_ENDPOINT + mock_model_builder.model_server = ModelServer.TORCHSERVE + mock_model_builder.sagemaker_session.endpoint_arn = MOCK_ENDPOINT_ARN + + mock_model_builder.mock_deploy() + + args = mock_send_telemetry.call_args.args + latency = str(args[5]).split("latency=")[1] + expected_extra_str = ( + f"{MOCK_FUNC_NAME}" + "&x-modelServer=1" + "&x-imageTag=763104351884.dkr.ecr.us-west-2.amazonaws.com/pytorch-inference:2.0.1-cpu-py310" + f"&x-sdkVersion={SDK_VERSION}" + f"&x-defaultImageUsage={ImageUriOption.DEFAULT_IMAGE.value}" + f"&x-endpointArn={MOCK_ENDPOINT_ARN}" + f"&x-mlflowModelPathType=2" + f"&x-latency={latency}" + ) + + mock_send_telemetry.assert_called_once_with( + "1", 2, MOCK_SESSION, None, None, expected_extra_str + ) From e9f51810995c2b41bec43ba14a6ebdf47ca68f40 Mon Sep 17 00:00:00 2001 From: jiapinw Date: Fri, 10 May 2024 12:10:42 -0700 Subject: [PATCH 2/3] Fix style issues and add more logger messages --- src/sagemaker/serve/builder/model_builder.py | 2 +- src/sagemaker/serve/model_format/mlflow/constants.py | 6 +++--- src/sagemaker/serve/model_format/mlflow/utils.py | 8 +++++--- src/sagemaker/serve/utils/lineage_utils.py | 4 ++-- src/sagemaker/serve/utils/telemetry_logger.py | 9 +++++++-- .../serve/model_format/mlflow/test_mlflow_utils.py | 12 ++++++------ .../sagemaker/serve/utils/test_telemetry_logger.py | 12 +++++------- 7 files changed, 29 insertions(+), 24 deletions(-) diff --git a/src/sagemaker/serve/builder/model_builder.py b/src/sagemaker/serve/builder/model_builder.py index 25f4c1cddc..53c316efab 100644 --- a/src/sagemaker/serve/builder/model_builder.py +++ b/src/sagemaker/serve/builder/model_builder.py @@ -670,7 +670,7 @@ def _initialize_for_mlflow(self) -> None: if not _mlflow_input_is_local_path(mlflow_path): # TODO: extend to package arn, run id and etc. logger.info( - f"Start downloading model artifacts from {mlflow_path} to {self.model_path}" + "Start downloading model artifacts from %s to %s", mlflow_path, self.model_path ) _download_s3_artifacts(mlflow_path, self.model_path, self.sagemaker_session) else: diff --git a/src/sagemaker/serve/model_format/mlflow/constants.py b/src/sagemaker/serve/model_format/mlflow/constants.py index 5d2ac484b5..28a3cbdc8d 100644 --- a/src/sagemaker/serve/model_format/mlflow/constants.py +++ b/src/sagemaker/serve/model_format/mlflow/constants.py @@ -19,12 +19,12 @@ "py39": "1.13.1", "py310": "2.2.0", } -MODEL_PACAKGE_ARN_REGEX = ( - r"^arn:aws:sagemaker:[a-z0-9\-]+:[0-9]{12}:model-package\/[" r"a-zA-Z0-9\-_\/\.]+$" +MODEL_PACKAGE_ARN_REGEX = ( + r"^arn:aws:sagemaker:[a-z0-9\-]+:[0-9]{12}:model-package\/(.*?)(?:/(\d+))?$" ) MLFLOW_RUN_ID_REGEX = r"^runs:/[a-zA-Z0-9]+(/[a-zA-Z0-9]+)*$" MLFLOW_REGISTRY_PATH_REGEX = r"^models:/[a-zA-Z0-9\-_\.]+(/[0-9]+)*$" -S3_PATH_REGEX = r"^s3:\/\/[a-zA-Z0-9\-_\.]+\/[a-zA-Z0-9\-_\/\.]*$" +S3_PATH_REGEX = r"^s3:\/\/[a-zA-Z0-9\-_\.]+(?:\/[a-zA-Z0-9\-_\/\.]*)?$" MLFLOW_MODEL_PATH = "MLFLOW_MODEL_PATH" MLFLOW_METADATA_FILE = "MLmodel" MLFLOW_PIP_DEPENDENCY_FILE = "requirements.txt" diff --git a/src/sagemaker/serve/model_format/mlflow/utils.py b/src/sagemaker/serve/model_format/mlflow/utils.py index 359ed770c2..c92a6a8a27 100644 --- a/src/sagemaker/serve/model_format/mlflow/utils.py +++ b/src/sagemaker/serve/model_format/mlflow/utils.py @@ -358,9 +358,11 @@ def _select_container_for_mlflow_model( if framework_version is None: raise ValueError( - ("Unable to auto detect framework version. Please provide framework %s as part of the " - "requirements.txt file for deployment flavor %s") % (framework_to_use, - deployment_flavor) + ( + "Unable to auto detect framework version. Please provide framework %s as part of the " + "requirements.txt file for deployment flavor %s" + ) + % (framework_to_use, deployment_flavor) ) casted_versions = ( diff --git a/src/sagemaker/serve/utils/lineage_utils.py b/src/sagemaker/serve/utils/lineage_utils.py index b2e28d26c3..3435e138c9 100644 --- a/src/sagemaker/serve/utils/lineage_utils.py +++ b/src/sagemaker/serve/utils/lineage_utils.py @@ -28,7 +28,7 @@ from sagemaker.lineage.query import LineageSourceEnum from sagemaker.serve.model_format.mlflow.constants import ( MLFLOW_RUN_ID_REGEX, - MODEL_PACAKGE_ARN_REGEX, + MODEL_PACKAGE_ARN_REGEX, S3_PATH_REGEX, MLFLOW_REGISTRY_PATH_REGEX, ) @@ -107,7 +107,7 @@ def _get_mlflow_model_path_type(mlflow_model_path: str) -> str: """ mlflow_rub_id_pattern = MLFLOW_RUN_ID_REGEX mlflow_registry_id_pattern = MLFLOW_REGISTRY_PATH_REGEX - sagemaker_arn_pattern = MODEL_PACAKGE_ARN_REGEX + sagemaker_arn_pattern = MODEL_PACKAGE_ARN_REGEX s3_pattern = S3_PATH_REGEX if re.match(mlflow_rub_id_pattern, mlflow_model_path): diff --git a/src/sagemaker/serve/utils/telemetry_logger.py b/src/sagemaker/serve/utils/telemetry_logger.py index 19f6f8c541..8983a4b5c9 100644 --- a/src/sagemaker/serve/utils/telemetry_logger.py +++ b/src/sagemaker/serve/utils/telemetry_logger.py @@ -21,8 +21,13 @@ from sagemaker.serve.mode.function_pointers import Mode from sagemaker.serve.model_format.mlflow.constants import MLFLOW_MODEL_PATH from sagemaker.serve.utils.exceptions import ModelBuilderException -from sagemaker.serve.utils.lineage_constants import MLFLOW_LOCAL_PATH, MLFLOW_S3_PATH, \ - MLFLOW_MODEL_PACKAGE_PATH, MLFLOW_RUN_ID, MLFLOW_REGISTRY_PATH +from sagemaker.serve.utils.lineage_constants import ( + MLFLOW_LOCAL_PATH, + MLFLOW_S3_PATH, + MLFLOW_MODEL_PACKAGE_PATH, + MLFLOW_RUN_ID, + MLFLOW_REGISTRY_PATH, +) from sagemaker.serve.utils.lineage_utils import _get_mlflow_model_path_type from sagemaker.serve.utils.types import ModelServer, ImageUriOption from sagemaker.serve.validations.check_image_uri import is_1p_image_uri diff --git a/tests/unit/sagemaker/serve/model_format/mlflow/test_mlflow_utils.py b/tests/unit/sagemaker/serve/model_format/mlflow/test_mlflow_utils.py index cabb8b0baf..a775e8d981 100644 --- a/tests/unit/sagemaker/serve/model_format/mlflow/test_mlflow_utils.py +++ b/tests/unit/sagemaker/serve/model_format/mlflow/test_mlflow_utils.py @@ -427,12 +427,12 @@ def test_select_container_for_mlflow_model_no_dlc_detected( @patch("sagemaker.serve.model_format.mlflow.utils._get_all_flavor_metadata") @patch("sagemaker.serve.model_format.mlflow.utils._generate_mlflow_artifact_path") def test_select_container_for_mlflow_model_no_framework_version_detected( - mock_generate_mlflow_artifact_path, - mock_get_all_flavor_metadata, - mock_get_python_version_from_parsed_mlflow_model_file, - mock_get_framework_version_from_requirements, - mock_cast_to_compatible_version, - mock_image_uris_retrieve, + mock_generate_mlflow_artifact_path, + mock_get_all_flavor_metadata, + mock_get_python_version_from_parsed_mlflow_model_file, + mock_get_framework_version_from_requirements, + mock_cast_to_compatible_version, + mock_image_uris_retrieve, ): mlflow_model_src_path = "/path/to/mlflow_model" deployment_flavor = "pytorch" diff --git a/tests/unit/sagemaker/serve/utils/test_telemetry_logger.py b/tests/unit/sagemaker/serve/utils/test_telemetry_logger.py index 61aa030eb2..33af575e8f 100644 --- a/tests/unit/sagemaker/serve/utils/test_telemetry_logger.py +++ b/tests/unit/sagemaker/serve/utils/test_telemetry_logger.py @@ -33,14 +33,13 @@ "763104351884.dkr.ecr.us-east-1.amazonaws.com/" "huggingface-pytorch-inference:2.0.0-transformers4.28.1-cpu-py310-ubuntu20.04" ) -MOCK_PYTORCH_CONTAINER = \ +MOCK_PYTORCH_CONTAINER = ( "763104351884.dkr.ecr.us-west-2.amazonaws.com/pytorch-inference:2.0.1-cpu-py310" +) MOCK_HUGGINGFACE_ID = "meta-llama/Llama-2-7b-hf" MOCK_EXCEPTION = LocalModelOutOfMemoryException("mock raise ex") MOCK_ENDPOINT_ARN = "arn:aws:sagemaker:us-west-2:123456789012:endpoint/test" -MOCK_MODEL_METADATA_FOR_MLFLOW = { - MLFLOW_MODEL_PATH: "s3://some_path" -} +MOCK_MODEL_METADATA_FOR_MLFLOW = {MLFLOW_MODEL_PATH: "s3://some_path"} class ModelBuilderMock: @@ -246,7 +245,6 @@ def test_construct_url_with_failure_reason_and_extra_info(self): ) self.assertEquals(ret_url, expected_base_url) - @patch("sagemaker.serve.utils.telemetry_logger._send_telemetry") def test_capture_telemetry_decorator_mlflow_success(self, mock_send_telemetry): mock_model_builder = ModelBuilderMock() @@ -266,7 +264,7 @@ def test_capture_telemetry_decorator_mlflow_success(self, mock_send_telemetry): expected_extra_str = ( f"{MOCK_FUNC_NAME}" "&x-modelServer=1" - "&x-imageTag=763104351884.dkr.ecr.us-west-2.amazonaws.com/pytorch-inference:2.0.1-cpu-py310" + "&x-imageTag=pytorch-inference:2.0.1-cpu-py310" f"&x-sdkVersion={SDK_VERSION}" f"&x-defaultImageUsage={ImageUriOption.DEFAULT_IMAGE.value}" f"&x-endpointArn={MOCK_ENDPOINT_ARN}" @@ -275,5 +273,5 @@ def test_capture_telemetry_decorator_mlflow_success(self, mock_send_telemetry): ) mock_send_telemetry.assert_called_once_with( - "1", 2, MOCK_SESSION, None, None, expected_extra_str + "1", 3, MOCK_SESSION, None, None, expected_extra_str ) From e7d803e7399473914f5813cde53ccc50d140ab29 Mon Sep 17 00:00:00 2001 From: jiapinw Date: Fri, 10 May 2024 15:01:39 -0700 Subject: [PATCH 3/3] fix value error messages in ut --- .../serve/model_format/mlflow/test_mlflow_utils.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/tests/unit/sagemaker/serve/model_format/mlflow/test_mlflow_utils.py b/tests/unit/sagemaker/serve/model_format/mlflow/test_mlflow_utils.py index a775e8d981..23d1315647 100644 --- a/tests/unit/sagemaker/serve/model_format/mlflow/test_mlflow_utils.py +++ b/tests/unit/sagemaker/serve/model_format/mlflow/test_mlflow_utils.py @@ -451,7 +451,12 @@ def test_select_container_for_mlflow_model_no_framework_version_detected( mock_get_python_version_from_parsed_mlflow_model_file.return_value = mock_python_version mock_get_framework_version_from_requirements.return_value = None - with pytest.raises(ValueError, match="Unable to auto detect a DLC for framework"): + with pytest.raises( + ValueError, + match="Unable to auto detect framework version. Please provide framework " + "pytorch as part of the requirements.txt file for deployment flavor " + "pytorch", + ): _select_container_for_mlflow_model( mlflow_model_src_path, deployment_flavor, region, instance_type )