Skip to content

Commit

Permalink
Merge branch 'master' into suryans-feaure-store-collection-type
Browse files Browse the repository at this point in the history
  • Loading branch information
suryans-commit authored Feb 6, 2024
2 parents 85e1536 + b7a4792 commit 55b1462
Show file tree
Hide file tree
Showing 12 changed files with 197 additions and 23 deletions.
6 changes: 6 additions & 0 deletions CONTRIBUTING.md
Original file line number Diff line number Diff line change
Expand Up @@ -227,6 +227,12 @@ For example, see the [Processing API reference](https://github.com/aws/sagemaker

To build the Sphinx docs, run the following command in the `doc/` directory:

```shell
# Initial setup, only required for the first run
pip install -r requirements.txt
pip install -e ../
```

```shell
make html
```
Expand Down
3 changes: 1 addition & 2 deletions src/sagemaker/estimator.py
Original file line number Diff line number Diff line change
Expand Up @@ -3285,7 +3285,6 @@ class Framework(EstimatorBase):
UNSUPPORTED_DLC_IMAGE_FOR_SM_PARALLELISM = (
"2.0.1-gpu-py310-cu121",
"2.0-gpu-py310-cu121",
"2.1.0-gpu-py310",
)

def __init__(
Expand Down Expand Up @@ -3959,7 +3958,7 @@ def _distribution_configuration(self, distribution):
for unsupported_image in Framework.UNSUPPORTED_DLC_IMAGE_FOR_SM_PARALLELISM:
if (
unsupported_image in img_uri and not torch_distributed_enabled
): # disabling DLC images with CUDA12
): # disabling DLC images without SMDataParallel or SMModelParallel
raise ValueError(
f"SMDistributed is currently incompatible with DLC image: {img_uri}. "
"(Could be due to CUDA version being greater than 11.)"
Expand Down
107 changes: 104 additions & 3 deletions src/sagemaker/image_uri_config/huggingface.json
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,8 @@
"4.12": "4.12.3",
"4.17": "4.17.0",
"4.26": "4.26.0",
"4.28": "4.28.1"
"4.28": "4.28.1",
"4.36": "4.36.0"
},
"versions": {
"4.4.2": {
Expand Down Expand Up @@ -970,6 +971,53 @@
"gpu": "cu118-ubuntu20.04"
}
}
},
"4.36.0": {
"version_aliases": {
"pytorch2.1": "pytorch2.1.0"
},
"pytorch2.1.0": {
"py_versions": [
"py310"
],
"registries": {
"af-south-1": "626614931356",
"il-central-1": "780543022126",
"ap-east-1": "871362719292",
"ap-northeast-1": "763104351884",
"ap-northeast-2": "763104351884",
"ap-northeast-3": "364406365360",
"ap-south-1": "763104351884",
"ap-southeast-1": "763104351884",
"ap-southeast-2": "763104351884",
"ap-southeast-3": "907027046896",
"ca-central-1": "763104351884",
"cn-north-1": "727897471807",
"cn-northwest-1": "727897471807",
"eu-central-1": "763104351884",
"eu-north-1": "763104351884",
"eu-west-1": "763104351884",
"eu-west-2": "763104351884",
"eu-west-3": "763104351884",
"eu-south-1": "692866216735",
"me-south-1": "217643126080",
"me-central-1": "914824155844",
"sa-east-1": "763104351884",
"us-east-1": "763104351884",
"us-east-2": "763104351884",
"us-gov-east-1": "446045086412",
"us-gov-west-1": "442386744353",
"us-iso-east-1": "886529160074",
"us-isob-east-1": "094389454867",
"us-west-1": "763104351884",
"us-west-2": "763104351884",
"ca-west-1": "204538143572"
},
"repository": "huggingface-pytorch-training",
"container_version": {
"gpu": "cu121-ubuntu20.04"
}
}
}
}
},
Expand All @@ -985,7 +1033,8 @@
"4.12": "4.12.3",
"4.17": "4.17.0",
"4.26": "4.26.0",
"4.28": "4.28.1"
"4.28": "4.28.1",
"4.37": "4.37.0"
},
"versions": {
"4.6.1": {
Expand Down Expand Up @@ -1782,7 +1831,59 @@
"cpu": "ubuntu20.04"
}
}
},
"4.37.0": {
"version_aliases": {
"pytorch2.1": "pytorch2.1.0"
},
"pytorch2.1.0": {
"py_versions": [
"py310"
],
"registries": {
"af-south-1": "626614931356",
"il-central-1": "780543022126",
"ap-east-1": "871362719292",
"ap-northeast-1": "763104351884",
"ap-northeast-2": "763104351884",
"ap-northeast-3": "364406365360",
"ap-south-1": "763104351884",
"ap-south-2": "772153158452",
"ap-southeast-1": "763104351884",
"ap-southeast-2": "763104351884",
"ap-southeast-3": "907027046896",
"ap-southeast-4": "457447274322",
"ca-central-1": "763104351884",
"cn-north-1": "727897471807",
"cn-northwest-1": "727897471807",
"eu-central-1": "763104351884",
"eu-central-2": "380420809688",
"eu-north-1": "763104351884",
"eu-west-1": "763104351884",
"eu-west-2": "763104351884",
"eu-west-3": "763104351884",
"eu-south-1": "692866216735",
"eu-south-2": "503227376785",
"me-south-1": "217643126080",
"me-central-1": "914824155844",
"sa-east-1": "763104351884",
"us-east-1": "763104351884",
"us-east-2": "763104351884",
"us-gov-east-1": "446045086412",
"us-gov-west-1": "442386744353",
"us-iso-east-1": "886529160074",
"us-isob-east-1": "094389454867",
"us-west-1": "763104351884",
"us-west-2": "763104351884",
"ca-west-1": "204538143572"
},
"repository": "huggingface-pytorch-inference",
"container_version": {
"gpu": "cu118-ubuntu20.04",
"cpu": "ubuntu22.04"
}
}
}
}
}
}
}
4 changes: 4 additions & 0 deletions src/sagemaker/serve/builder/djl_builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,8 @@ def __init__(self):
self.mode = None
self.model_server = None
self.image_uri = None
self.image_config = None
self.vpc_config = None
self._original_deploy = None
self.secret_key = None
self.engine = None
Expand Down Expand Up @@ -138,6 +140,8 @@ def _create_djl_model(self) -> Type[Model]:
"source_dir": code_dir,
"env": self.env_vars,
"hf_hub_token": self.env_vars.get("HUGGING_FACE_HUB_TOKEN"),
"image_config": self.image_config,
"vpc_config": self.vpc_config,
}

if self.engine == _DjlEngine.DEEPSPEED:
Expand Down
46 changes: 36 additions & 10 deletions src/sagemaker/serve/builder/model_builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@
from sagemaker.serve.validations.check_image_and_hardware_type import (
validate_image_uri_and_hardware,
)
from sagemaker.workflow.entities import PipelineVariable
from sagemaker.huggingface.llm_utils import get_huggingface_model_metadata

logger = logging.getLogger(__name__)
Expand Down Expand Up @@ -81,7 +82,6 @@ class ModelBuilder(Triton, DJL, JumpStart, TGI, Transformers):
* ``Mode.SAGEMAKER_ENDPOINT``: Launch on a SageMaker endpoint
* ``Mode.LOCAL_CONTAINER``: Launch locally with a container
shared_libs (List[str]): Any shared libraries you want to bring into
the model packaging.
dependencies (Optional[Dict[str, Any]): The dependencies of the model
Expand Down Expand Up @@ -122,6 +122,15 @@ class ModelBuilder(Triton, DJL, JumpStart, TGI, Transformers):
``invoke`` and ``load`` functions.
image_uri (Optional[str]): The container image uri (which is derived from a
SageMaker-based container).
image_config (dict[str, str] or dict[str, PipelineVariable]): Specifies
whether the image of model container is pulled from ECR, or private
registry in your VPC. By default it is set to pull model container
image from ECR. (default: None).
vpc_config ( Optional[Dict[str, List[Union[str, PipelineVariable]]]]):
The VpcConfig set on the model (default: None)
* 'Subnets' (List[Union[str, PipelineVariable]]): List of subnet ids.
* 'SecurityGroupIds' (List[Union[str, PipelineVariable]]]): List of security group
ids.
model_server (Optional[ModelServer]): The model server to which to deploy.
You need to provide this argument when you specify an ``image_uri``
in order for model builder to build the artifacts correctly (according
Expand Down Expand Up @@ -204,6 +213,23 @@ class ModelBuilder(Triton, DJL, JumpStart, TGI, Transformers):
image_uri: Optional[str] = field(
default=None, metadata={"help": "Define the container image uri"}
)
image_config: Optional[Dict[str, Union[str, PipelineVariable]]] = field(
default=None,
metadata={
"help": "Specifies whether the image of model container is pulled from ECR,"
" or private registry in your VPC. By default it is set to pull model "
"container image from ECR. (default: None)."
},
)
vpc_config: Optional[Dict[str, List[Union[str, PipelineVariable]]]] = field(
default=None,
metadata={
"help": "The VpcConfig set on the model (default: None)."
"* 'Subnets' (List[Union[str, PipelineVariable]]): List of subnet ids."
"* ''SecurityGroupIds'' (List[Union[str, PipelineVariable]]): List of"
" security group ids."
},
)
model_server: Optional[ModelServer] = field(
default=None, metadata={"help": "Define the model server to deploy to."}
)
Expand Down Expand Up @@ -386,6 +412,8 @@ def _create_model(self):
# TODO: we should create model as per the framework
self.pysdk_model = Model(
image_uri=self.image_uri,
image_config=self.image_config,
vpc_config=self.vpc_config,
model_data=self.s3_upload_path,
role=self.serve_settings.role_arn,
env=self.env_vars,
Expand Down Expand Up @@ -543,15 +571,16 @@ def build(
self,
mode: Type[Mode] = None,
role_arn: str = None,
sagemaker_session: str = None,
sagemaker_session: Optional[Session] = None,
) -> Type[Model]:
"""Create a deployable ``Model`` instance with ``ModelBuilder``.
Args:
mode (Type[Mode], optional): The mode. Defaults to ``None``.
role_arn (str, optional): The IAM role arn. Defaults to ``None``.
sagemaker_session (str, optional): The SageMaker session to use
for the execution. Defaults to ``None``.
sagemaker_session (Optional[Session]): Session object which manages interactions
with Amazon SageMaker APIs and any other AWS services needed. If not specified, the
function creates one using the default AWS configuration chain.
Returns:
Type[Model]: A deployable ``Model`` object.
Expand All @@ -562,10 +591,7 @@ def build(
self.mode = mode
if role_arn:
self.role_arn = role_arn
if sagemaker_session:
self.sagemaker_session = sagemaker_session
elif not self.sagemaker_session:
self.sagemaker_session = Session()
self.sagemaker_session = sagemaker_session or Session()

self.sagemaker_session.settings._local_download_dir = self.model_path

Expand Down Expand Up @@ -607,7 +633,7 @@ def save(
self,
save_path: Optional[str] = None,
s3_path: Optional[str] = None,
sagemaker_session: Optional[str] = None,
sagemaker_session: Optional[Session] = None,
role_arn: Optional[str] = None,
) -> Type[Model]:
"""WARNING: This function is expremental and not intended for production use.
Expand All @@ -618,7 +644,7 @@ def save(
save_path (Optional[str]): The path where you want to save resources.
s3_path (Optional[str]): The path where you want to upload resources.
"""
self.sagemaker_session = sagemaker_session if sagemaker_session else Session()
self.sagemaker_session = sagemaker_session or Session()

if role_arn:
self.role_arn = role_arn
Expand Down
4 changes: 4 additions & 0 deletions src/sagemaker/serve/builder/tgi_builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,8 @@ def __init__(self):
self.mode = None
self.model_server = None
self.image_uri = None
self.image_config = None
self.vpc_config = None
self._original_deploy = None
self.hf_model_config = None
self._default_tensor_parallel_degree = None
Expand Down Expand Up @@ -134,6 +136,8 @@ def _create_tgi_model(self) -> Type[Model]:

pysdk_model = HuggingFaceModel(
image_uri=self.image_uri,
image_config=self.image_config,
vpc_config=self.vpc_config,
env=self.env_vars,
role=self.role_arn,
sagemaker_session=self.sagemaker_session,
Expand Down
2 changes: 2 additions & 0 deletions src/sagemaker/serve/model_server/triton/triton_builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -413,6 +413,8 @@ def _auto_detect_image_for_triton(self):
def _create_triton_model(self) -> Type[Model]:
self.pysdk_model = Model(
image_uri=self.image_uri,
image_config=self.image_config,
vpc_config=self.vpc_config,
model_data=self.s3_upload_path,
role=self.serve_settings.role_arn,
env=self.env_vars,
Expand Down
13 changes: 13 additions & 0 deletions tests/unit/sagemaker/serve/builder/test_djl_builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
LocalModelInvocationException,
)
from sagemaker.serve.utils.predictors import DjlLocalModePredictor
from tests.unit.sagemaker.serve.constants import MOCK_IMAGE_CONFIG, MOCK_VPC_CONFIG

mock_model_id = "TheBloke/Llama-2-7b-chat-fp16"
mock_t5_model_id = "google/flan-t5-xxl"
Expand Down Expand Up @@ -113,6 +114,8 @@ def test_build_deploy_for_djl_local_container(
schema_builder=mock_schema_builder,
mode=Mode.LOCAL_CONTAINER,
model_server=ModelServer.DJL_SERVING,
image_config=MOCK_IMAGE_CONFIG,
vpc_config=MOCK_VPC_CONFIG,
)

builder._prepare_for_mode = MagicMock()
Expand All @@ -132,6 +135,8 @@ def test_build_deploy_for_djl_local_container(
assert builder._default_max_new_tokens == 256
assert builder.schema_builder.sample_input["parameters"]["max_new_tokens"] == 256
assert builder.nb_instance_type == "ml.g5.24xlarge"
assert model.image_config == MOCK_IMAGE_CONFIG
assert model.vpc_config == MOCK_VPC_CONFIG
assert "deepspeed" in builder.image_uri

builder.modes[str(Mode.LOCAL_CONTAINER)] = MagicMock()
Expand Down Expand Up @@ -176,6 +181,8 @@ def test_build_for_djl_local_container_faster_transformer(
schema_builder=mock_schema_builder,
mode=Mode.LOCAL_CONTAINER,
model_server=ModelServer.DJL_SERVING,
image_config=MOCK_IMAGE_CONFIG,
vpc_config=MOCK_VPC_CONFIG,
)
model = builder.build()
builder.serve_settings.telemetry_opt_out = True
Expand All @@ -185,6 +192,8 @@ def test_build_for_djl_local_container_faster_transformer(
model.generate_serving_properties()
== mock_expected_fastertransformer_serving_properties
)
assert model.image_config == MOCK_IMAGE_CONFIG
assert model.vpc_config == MOCK_VPC_CONFIG
assert "fastertransformer" in builder.image_uri

@patch(
Expand Down Expand Up @@ -212,11 +221,15 @@ def test_build_for_djl_local_container_deepspeed(
schema_builder=mock_schema_builder,
mode=Mode.LOCAL_CONTAINER,
model_server=ModelServer.DJL_SERVING,
image_config=MOCK_IMAGE_CONFIG,
vpc_config=MOCK_VPC_CONFIG,
)
model = builder.build()
builder.serve_settings.telemetry_opt_out = True

assert isinstance(model, DeepSpeedModel)
assert model.image_config == MOCK_IMAGE_CONFIG
assert model.vpc_config == MOCK_VPC_CONFIG
assert model.generate_serving_properties() == mock_expected_deepspeed_serving_properties
assert "deepspeed" in builder.image_uri

Expand Down
Loading

0 comments on commit 55b1462

Please sign in to comment.