Skip to content

Commit 06d9eb2

Browse files
authored
feat: update alt config to work with model packages (#4706)
* feat: update alt config to work with model packages * format * remove env vars for model package * fix tests
1 parent 149edb7 commit 06d9eb2

File tree

6 files changed

+200
-10
lines changed

6 files changed

+200
-10
lines changed

src/sagemaker/jumpstart/artifacts/model_packages.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -96,7 +96,10 @@ def _retrieve_model_package_arn(
9696
if instance_specific_arn is not None:
9797
return instance_specific_arn
9898

99-
if model_specs.hosting_model_package_arns is None:
99+
if (
100+
model_specs.hosting_model_package_arns is None
101+
or model_specs.hosting_model_package_arns == {}
102+
):
100103
return None
101104

102105
regional_arn = model_specs.hosting_model_package_arns.get(region)

src/sagemaker/jumpstart/types.py

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -951,7 +951,10 @@ def from_json(self, json_obj: Dict[str, Any]) -> None:
951951

952952
self.hosting_eula_key: Optional[str] = json_obj.get("hosting_eula_key")
953953

954-
self.hosting_model_package_arns: Optional[Dict] = json_obj.get("hosting_model_package_arns")
954+
model_package_arns = json_obj.get("hosting_model_package_arns")
955+
self.hosting_model_package_arns: Optional[Dict] = (
956+
model_package_arns if model_package_arns is not None else {}
957+
)
955958
self.hosting_use_script_uri: bool = json_obj.get("hosting_use_script_uri", True)
956959

957960
self.hosting_instance_type_variants: Optional[JumpStartInstanceTypeVariants] = (
@@ -1147,6 +1150,12 @@ def resolved_config(self) -> Dict[str, Any]:
11471150
deepcopy(component.to_json()),
11481151
component.OVERRIDING_DENY_LIST,
11491152
)
1153+
1154+
# Remove environment variables from resolved config if using model packages
1155+
hosting_model_pacakge_arns = resolved_config.get("hosting_model_package_arns")
1156+
if hosting_model_pacakge_arns is not None and hosting_model_pacakge_arns != {}:
1157+
resolved_config["inference_environment_variables"] = []
1158+
11501159
self.resolved_metadata_config = resolved_config
11511160

11521161
return resolved_config

tests/unit/sagemaker/jumpstart/constants.py

Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7357,7 +7357,7 @@
73577357
"training_model_package_artifact_uris": None,
73587358
"deprecate_warn_message": None,
73597359
"deprecated_message": None,
7360-
"hosting_model_package_arns": None,
7360+
"hosting_model_package_arns": {},
73617361
"hosting_eula_key": None,
73627362
"model_subscription_link": None,
73637363
"hyperparameters": [
@@ -7692,6 +7692,14 @@
76927692
},
76937693
"component_names": ["gpu-inference"],
76947694
},
7695+
"gpu-inference-model-package": {
7696+
"benchmark_metrics": {
7697+
"ml.p3.2xlarge": [
7698+
{"name": "Latency", "value": "100", "unit": "Tokens/S", "concurrency": 1}
7699+
]
7700+
},
7701+
"component_names": ["gpu-inference-model-package"],
7702+
},
76957703
},
76967704
"inference_config_components": {
76977705
"neuron-base": {
@@ -7733,6 +7741,14 @@
77337741
},
77347742
},
77357743
},
7744+
"gpu-inference-model-package": {
7745+
"default_inference_instance_type": "ml.p2.xlarge",
7746+
"supported_inference_instance_types": ["ml.p2.xlarge", "ml.p3.2xlarge"],
7747+
"hosting_model_package_arns": {
7748+
"us-west-2": "arn:aws:sagemaker:us-west-2:594846645681:model-package/ll"
7749+
"ama2-7b-v3-740347e540da35b4ab9f6fc0ab3fed2c"
7750+
},
7751+
},
77367752
"gpu-inference-budget": {
77377753
"supported_inference_instance_types": ["ml.p2.xlarge", "ml.p3.2xlarge"],
77387754
"hosting_artifact_key": "artifacts/meta-textgeneration-llama-2-7b/gpu-inference-budget/model/",

tests/unit/sagemaker/jumpstart/model/test_model.py

Lines changed: 69 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1651,6 +1651,74 @@ def test_model_set_deployment_config(
16511651
endpoint_logging=False,
16521652
)
16531653

1654+
@mock.patch(
1655+
"sagemaker.jumpstart.model.get_jumpstart_configs", side_effect=lambda *args, **kwargs: {}
1656+
)
1657+
@mock.patch("sagemaker.jumpstart.accessors.JumpStartModelsAccessor._get_manifest")
1658+
@mock.patch("sagemaker.jumpstart.factory.model.Session")
1659+
@mock.patch("sagemaker.jumpstart.accessors.JumpStartModelsAccessor.get_model_specs")
1660+
@mock.patch("sagemaker.jumpstart.model.Model.deploy")
1661+
@mock.patch("sagemaker.jumpstart.factory.model.JUMPSTART_DEFAULT_REGION_NAME", region)
1662+
def test_model_set_deployment_config_model_package(
1663+
self,
1664+
mock_model_deploy: mock.Mock,
1665+
mock_get_model_specs: mock.Mock,
1666+
mock_session: mock.Mock,
1667+
mock_get_manifest: mock.Mock,
1668+
mock_get_jumpstart_configs: mock.Mock,
1669+
):
1670+
mock_get_model_specs.side_effect = get_prototype_spec_with_configs
1671+
mock_get_manifest.side_effect = (
1672+
lambda region, model_type, *args, **kwargs: get_prototype_manifest(region, model_type)
1673+
)
1674+
mock_model_deploy.return_value = default_predictor
1675+
1676+
model_id, _ = "pytorch-eqa-bert-base-cased", "*"
1677+
1678+
mock_session.return_value = sagemaker_session
1679+
1680+
model = JumpStartModel(model_id=model_id)
1681+
1682+
assert model.config_name == "neuron-inference"
1683+
1684+
model.deploy()
1685+
1686+
mock_model_deploy.assert_called_once_with(
1687+
initial_instance_count=1,
1688+
instance_type="ml.inf2.xlarge",
1689+
tags=[
1690+
{"Key": JumpStartTag.MODEL_ID, "Value": "pytorch-eqa-bert-base-cased"},
1691+
{"Key": JumpStartTag.MODEL_VERSION, "Value": "1.0.0"},
1692+
{"Key": JumpStartTag.INFERENCE_CONFIG_NAME, "Value": "neuron-inference"},
1693+
],
1694+
wait=True,
1695+
endpoint_logging=False,
1696+
)
1697+
1698+
mock_model_deploy.reset_mock()
1699+
1700+
model.set_deployment_config(
1701+
config_name="gpu-inference-model-package", instance_type="ml.p2.xlarge"
1702+
)
1703+
1704+
assert (
1705+
model.model_package_arn
1706+
== "arn:aws:sagemaker:us-west-2:594846645681:model-package/llama2-7b-v3-740347e540da35b4ab9f6fc0ab3fed2c"
1707+
)
1708+
model.deploy()
1709+
1710+
mock_model_deploy.assert_called_once_with(
1711+
initial_instance_count=1,
1712+
instance_type="ml.p2.xlarge",
1713+
tags=[
1714+
{"Key": JumpStartTag.MODEL_ID, "Value": "pytorch-eqa-bert-base-cased"},
1715+
{"Key": JumpStartTag.MODEL_VERSION, "Value": "1.0.0"},
1716+
{"Key": JumpStartTag.INFERENCE_CONFIG_NAME, "Value": "gpu-inference-model-package"},
1717+
],
1718+
wait=True,
1719+
endpoint_logging=False,
1720+
)
1721+
16541722
@mock.patch(
16551723
"sagemaker.jumpstart.model.get_jumpstart_configs", side_effect=lambda *args, **kwargs: {}
16561724
)
@@ -1706,12 +1774,7 @@ def test_model_set_deployment_config_incompatible_instance_type_or_name(
17061774

17071775
with pytest.raises(ValueError) as error:
17081776
model.set_deployment_config("neuron-inference-unknown-name", "ml.inf2.32xlarge")
1709-
assert (
1710-
"Cannot find Jumpstart config name neuron-inference-unknown-name. "
1711-
"List of config names that is supported by the model: "
1712-
"['neuron-inference', 'neuron-inference-budget', 'gpu-inference-budget', 'gpu-inference']"
1713-
in str(error)
1714-
)
1777+
assert "Cannot find Jumpstart config name neuron-inference-unknown-name. " in str(error)
17151778

17161779
@mock.patch("sagemaker.jumpstart.model.get_init_kwargs")
17171780
@mock.patch("sagemaker.jumpstart.utils.verify_model_region_and_return_specs")

tests/unit/sagemaker/jumpstart/test_types.py

Lines changed: 92 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
from sagemaker.jumpstart.types import (
1818
JumpStartBenchmarkStat,
1919
JumpStartECRSpecs,
20+
JumpStartEnvironmentVariable,
2021
JumpStartHyperparameter,
2122
JumpStartInstanceTypeVariants,
2223
JumpStartModelSpecs,
@@ -927,6 +928,7 @@ def test_inference_configs_parsing():
927928
"neuron-inference",
928929
"neuron-budget",
929930
"gpu-inference",
931+
"gpu-inference-model-package",
930932
"gpu-inference-budget",
931933
]
932934

@@ -1019,6 +1021,80 @@ def test_inference_configs_parsing():
10191021
}
10201022
),
10211023
]
1024+
assert specs1.inference_environment_variables == [
1025+
JumpStartEnvironmentVariable(
1026+
{
1027+
"name": "SAGEMAKER_PROGRAM",
1028+
"type": "text",
1029+
"default": "inference.py",
1030+
"scope": "container",
1031+
"required_for_model_class": True,
1032+
}
1033+
),
1034+
JumpStartEnvironmentVariable(
1035+
{
1036+
"name": "SAGEMAKER_SUBMIT_DIRECTORY",
1037+
"type": "text",
1038+
"default": "/opt/ml/model/code",
1039+
"scope": "container",
1040+
"required_for_model_class": False,
1041+
}
1042+
),
1043+
JumpStartEnvironmentVariable(
1044+
{
1045+
"name": "SAGEMAKER_CONTAINER_LOG_LEVEL",
1046+
"type": "text",
1047+
"default": "20",
1048+
"scope": "container",
1049+
"required_for_model_class": False,
1050+
}
1051+
),
1052+
JumpStartEnvironmentVariable(
1053+
{
1054+
"name": "SAGEMAKER_MODEL_SERVER_TIMEOUT",
1055+
"type": "text",
1056+
"default": "3600",
1057+
"scope": "container",
1058+
"required_for_model_class": False,
1059+
}
1060+
),
1061+
JumpStartEnvironmentVariable(
1062+
{
1063+
"name": "ENDPOINT_SERVER_TIMEOUT",
1064+
"type": "int",
1065+
"default": 3600,
1066+
"scope": "container",
1067+
"required_for_model_class": True,
1068+
}
1069+
),
1070+
JumpStartEnvironmentVariable(
1071+
{
1072+
"name": "MODEL_CACHE_ROOT",
1073+
"type": "text",
1074+
"default": "/opt/ml/model",
1075+
"scope": "container",
1076+
"required_for_model_class": True,
1077+
}
1078+
),
1079+
JumpStartEnvironmentVariable(
1080+
{
1081+
"name": "SAGEMAKER_ENV",
1082+
"type": "text",
1083+
"default": "1",
1084+
"scope": "container",
1085+
"required_for_model_class": True,
1086+
}
1087+
),
1088+
JumpStartEnvironmentVariable(
1089+
{
1090+
"name": "SAGEMAKER_MODEL_SERVER_WORKERS",
1091+
"type": "int",
1092+
"default": 1,
1093+
"scope": "container",
1094+
"required_for_model_class": True,
1095+
}
1096+
),
1097+
]
10221098

10231099
# Overrided fields in top config
10241100
assert specs1.supported_inference_instance_types == ["ml.inf2.xlarge", "ml.inf2.2xlarge"]
@@ -1057,6 +1133,20 @@ def test_inference_configs_parsing():
10571133
)
10581134
assert list(config.config_components.keys()) == ["neuron-inference"]
10591135

1136+
config = specs1.inference_configs.configs["gpu-inference-model-package"]
1137+
assert config.config_components["gpu-inference-model-package"] == JumpStartConfigComponent(
1138+
"gpu-inference-model-package",
1139+
{
1140+
"default_inference_instance_type": "ml.p2.xlarge",
1141+
"supported_inference_instance_types": ["ml.p2.xlarge", "ml.p3.2xlarge"],
1142+
"hosting_model_package_arns": {
1143+
"us-west-2": "arn:aws:sagemaker:us-west-2:594846645681:model-package/"
1144+
"llama2-7b-v3-740347e540da35b4ab9f6fc0ab3fed2c"
1145+
},
1146+
},
1147+
)
1148+
assert config.resolved_config.get("inference_environment_variables") == []
1149+
10601150
spec = {
10611151
**BASE_SPEC,
10621152
**INFERENCE_CONFIGS,
@@ -1075,6 +1165,7 @@ def test_set_inference_configs():
10751165
"neuron-inference",
10761166
"neuron-budget",
10771167
"gpu-inference",
1168+
"gpu-inference-model-package",
10781169
"gpu-inference-budget",
10791170
]
10801171

@@ -1083,7 +1174,7 @@ def test_set_inference_configs():
10831174
assert "Cannot find Jumpstart config name invalid_name."
10841175
"List of config names that is supported by the model: "
10851176
"['neuron-inference', 'neuron-inference-budget', "
1086-
"'gpu-inference-budget', 'gpu-inference']" in str(error.value)
1177+
"'gpu-inference-budget', 'gpu-inference', 'gpu-inference-model-package']" in str(error.value)
10871178

10881179
assert specs1.supported_inference_instance_types == ["ml.inf2.xlarge", "ml.inf2.2xlarge"]
10891180
specs1.set_config("gpu-inference")

tests/unit/sagemaker/jumpstart/test_utils.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1639,6 +1639,7 @@ def test_get_jumpstart_config_names_success(
16391639
"neuron-inference-budget",
16401640
"gpu-inference-budget",
16411641
"gpu-inference",
1642+
"gpu-inference-model-package",
16421643
]
16431644

16441645
@patch("sagemaker.jumpstart.accessors.JumpStartModelsAccessor.get_model_specs")
@@ -1735,6 +1736,13 @@ def test_get_jumpstart_benchmark_stats_full_list(
17351736
)
17361737
]
17371738
},
1739+
"gpu-inference-model-package": {
1740+
"ml.p3.2xlarge": [
1741+
JumpStartBenchmarkStat(
1742+
{"name": "Latency", "value": "100", "unit": "Tokens/S", "concurrency": 1}
1743+
)
1744+
]
1745+
},
17381746
}
17391747

17401748
@patch("sagemaker.jumpstart.accessors.JumpStartModelsAccessor.get_model_specs")

0 commit comments

Comments
 (0)