Skip to content

Commit

Permalink
Fix: Updated js mb compression logic - ModelBuilder (aws#4294)
Browse files Browse the repository at this point in the history
Co-authored-by: EC2 Default User <ec2-user@ip-172-16-54-104.us-west-2.compute.internal>
  • Loading branch information
gwang111 and EC2 Default User authored Dec 19, 2023
1 parent d756d4d commit f08be97
Show file tree
Hide file tree
Showing 8 changed files with 568 additions and 29 deletions.
1 change: 1 addition & 0 deletions src/sagemaker/serve/builder/jumpstart_builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,7 @@ def _is_jumpstart_model_id(self) -> bool:
def _create_pre_trained_js_model(self) -> Type[Model]:
"""Placeholder docstring"""
pysdk_model = JumpStartModel(self.model)
pysdk_model.sagemaker_session = self.sagemaker_session

self._original_deploy = pysdk_model.deploy
pysdk_model.deploy = self._js_builder_deploy_wrapper
Expand Down
5 changes: 4 additions & 1 deletion src/sagemaker/serve/builder/tgi_builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -133,7 +133,10 @@ def _create_tgi_model(self) -> Type[Model]:
logger.info("Auto detected %s. Proceeding with the the deployment.", self.image_uri)

pysdk_model = HuggingFaceModel(
image_uri=self.image_uri, env=self.env_vars, role=self.role_arn
image_uri=self.image_uri,
env=self.env_vars,
role=self.role_arn,
sagemaker_session=self.sagemaker_session,
)

self._original_deploy = pysdk_model.deploy
Expand Down
62 changes: 46 additions & 16 deletions src/sagemaker/serve/model_server/djl_serving/prepare.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,14 +14,14 @@

from __future__ import absolute_import
import shutil
import tarfile
import subprocess
import json
import tarfile
import logging
from typing import List
from pathlib import Path

from sagemaker.utils import _tmpdir
from sagemaker.s3 import S3Downloader
from sagemaker.djl_inference import DJLModel
from sagemaker.djl_inference.model import _read_existing_serving_properties
from sagemaker.serve.utils.local_hardware import _check_disk_space, _check_docker_disk_usage
Expand All @@ -34,27 +34,57 @@


def _has_serving_properties_file(code_dir: Path) -> bool:
"""Placeholder Docstring"""
"""Check for existing serving properties in the directory"""
return code_dir.joinpath(_SERVING_PROPERTIES_FILE).is_file()


def _members(resources: object, depth: int):
"""Placeholder Docstring"""
for member in resources.getmembers():
member.path = member.path.split("/", depth)[-1]
yield member
def _move_to_code_dir(js_model_dir: str, code_dir: Path):
"""Move DJL Jumpstart resources from model to code_dir"""
js_model_resources = Path(js_model_dir).joinpath("model")
for resource in js_model_resources.glob("*"):
try:
shutil.move(resource, code_dir)
except shutil.Error as e:
if "already exists" in str(e):
continue


def _extract_js_resource(js_model_dir: str, js_id: str):
"""Uncompress the jumpstart resource"""
tmp_sourcedir = Path(js_model_dir).joinpath(f"infer-prepack-{js_id}.tar.gz")
with tarfile.open(str(tmp_sourcedir)) as resources:
resources.extractall(path=js_model_dir)


def _copy_jumpstart_artifacts(model_data: str, js_id: str, code_dir: Path):
"""Placeholder Docstring"""
"""Copy the associated JumpStart Resource into the code directory"""
logger.info("Downloading JumpStart artifacts from S3...")
with _tmpdir(directory=str(code_dir)) as js_model_dir:
subprocess.run(["aws", "s3", "cp", model_data, js_model_dir])

logger.info("Uncompressing JumpStart artifacts for faster loading...")
tmp_sourcedir = Path(js_model_dir).joinpath(f"infer-prepack-{js_id}.tar.gz")
with tarfile.open(str(tmp_sourcedir)) as resources:
resources.extractall(path=code_dir, members=_members(resources, 1))
s3_downloader = S3Downloader()
invalid_model_data_format = False
with _tmpdir(directory=str(code_dir)) as js_model_dir:
if isinstance(model_data, str):
if model_data.endswith(".tar.gz"):
logger.info("Uncompressing JumpStart artifacts for faster loading...")
s3_downloader.download(model_data, js_model_dir)
_extract_js_resource(js_model_dir, js_id)
else:
logger.info("Copying uncompressed JumpStart artifacts...")
s3_downloader.download(model_data, js_model_dir)
elif (
isinstance(model_data, dict)
and model_data.get("S3DataSource")
and model_data.get("S3DataSource").get("S3Uri")
):
logger.info("Copying uncompressed JumpStart artifacts...")
s3_downloader.download(model_data.get("S3DataSource").get("S3Uri"), js_model_dir)
else:
invalid_model_data_format = True
if not invalid_model_data_format:
_move_to_code_dir(js_model_dir, code_dir)

if invalid_model_data_format:
raise ValueError("JumpStart model data compression format is unsupported: %s", model_data)

existing_properties = _read_existing_serving_properties(code_dir)
config_json_file = code_dir.joinpath("config.json")
Expand All @@ -70,7 +100,7 @@ def _copy_jumpstart_artifacts(model_data: str, js_id: str, code_dir: Path):
def _generate_properties_file(
model: DJLModel, code_dir: Path, overwrite_props_from_file: bool, manual_set_props: dict
):
"""Placeholder Docstring"""
"""Construct serving properties file taking into account of overrides or manual specs"""
if _has_serving_properties_file(code_dir):
existing_properties = _read_existing_serving_properties(code_dir)
else:
Expand Down
53 changes: 41 additions & 12 deletions src/sagemaker/serve/model_server/tgi/prepare.py
Original file line number Diff line number Diff line change
@@ -1,37 +1,66 @@
# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License"). You
# may not use this file except in compliance with the License. A copy of
# the License is located at
#
# http://aws.amazon.com/apache2.0/
#
# or in the "license" file accompanying this file. This file is
# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific
# language governing permissions and limitations under the License.
"""Prepare TgiModel for Deployment"""

from __future__ import absolute_import
import tarfile
import subprocess
import logging
from typing import List
from pathlib import Path

from sagemaker.serve.utils.local_hardware import _check_disk_space, _check_docker_disk_usage
from sagemaker.utils import _tmpdir
from sagemaker.s3 import S3Downloader

logger = logging.getLogger(__name__)


def _extract_js_resource(js_model_dir: str, code_dir: Path, js_id: str):
"""Uncompress the jumpstart resource"""
tmp_sourcedir = Path(js_model_dir).joinpath(f"infer-prepack-{js_id}.tar.gz")
with tarfile.open(str(tmp_sourcedir)) as resources:
resources.extractall(path=code_dir)


def _copy_jumpstart_artifacts(model_data: str, js_id: str, code_dir: Path) -> bool:
"""Placeholder Docstring"""
"""Copy the associated JumpStart Resource into the code directory"""
logger.info("Downloading JumpStart artifacts from S3...")
with _tmpdir(directory=str(code_dir)) as js_model_dir:
js_model_data_loc = model_data.get("S3DataSource").get("S3Uri")
# TODO: leave this check here until we are sure every js model has moved to uncompressed
if js_model_data_loc.endswith("tar.gz"):
subprocess.run(["aws", "s3", "cp", js_model_data_loc, js_model_dir])

s3_downloader = S3Downloader()
if isinstance(model_data, str):
if model_data.endswith(".tar.gz"):
logger.info("Uncompressing JumpStart artifacts for faster loading...")
tmp_sourcedir = Path(js_model_dir).joinpath(f"infer-prepack-{js_id}.tar.gz")
with tarfile.open(str(tmp_sourcedir)) as resources:
resources.extractall(path=code_dir)
with _tmpdir(directory=str(code_dir)) as js_model_dir:
s3_downloader.download(model_data, js_model_dir)
_extract_js_resource(js_model_dir, code_dir, js_id)
else:
subprocess.run(["aws", "s3", "cp", js_model_data_loc, js_model_dir, "--recursive"])
logger.info("Copying uncompressed JumpStart artifacts...")
s3_downloader.download(model_data, code_dir)
elif (
isinstance(model_data, dict)
and model_data.get("S3DataSource")
and model_data.get("S3DataSource").get("S3Uri")
):
logger.info("Copying uncompressed JumpStart artifacts...")
s3_downloader.download(model_data.get("S3DataSource").get("S3Uri"), code_dir)
else:
raise ValueError("JumpStart model data compression format is unsupported: %s", model_data)

return True


def _create_dir_structure(model_path: str) -> tuple:
"""Placeholder Docstring"""
"""Create the expected model directory structure for the TGI server"""
model_path = Path(model_path)
if not model_path.exists():
model_path.mkdir(parents=True)
Expand Down
9 changes: 9 additions & 0 deletions tests/unit/sagemaker/serve/builder/test_djl_builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -114,10 +114,12 @@ def test_build_deploy_for_djl_local_container(
mode=Mode.LOCAL_CONTAINER,
model_server=ModelServer.DJL_SERVING,
)

builder._prepare_for_mode = MagicMock()
builder._prepare_for_mode.side_effect = None

model = builder.build()
builder.serve_settings.telemetry_opt_out = True

assert isinstance(model, HuggingFaceAccelerateModel)
assert (
Expand Down Expand Up @@ -176,6 +178,7 @@ def test_build_for_djl_local_container_faster_transformer(
model_server=ModelServer.DJL_SERVING,
)
model = builder.build()
builder.serve_settings.telemetry_opt_out = True

assert isinstance(model, FasterTransformerModel)
assert (
Expand Down Expand Up @@ -211,6 +214,7 @@ def test_build_for_djl_local_container_deepspeed(
model_server=ModelServer.DJL_SERVING,
)
model = builder.build()
builder.serve_settings.telemetry_opt_out = True

assert isinstance(model, DeepSpeedModel)
assert model.generate_serving_properties() == mock_expected_deepspeed_serving_properties
Expand Down Expand Up @@ -268,6 +272,7 @@ def test_tune_for_djl_local_container(
builder._djl_model_builder_deploy_wrapper = MagicMock()

model = builder.build()
builder.serve_settings.telemetry_opt_out = True
tuned_model = model.tune()
assert tuned_model.generate_serving_properties() == mock_most_performant_serving_properties

Expand Down Expand Up @@ -317,6 +322,7 @@ def test_tune_for_djl_local_container_deep_ping_ex(
builder._prepare_for_mode.side_effect = None

model = builder.build()
builder.serve_settings.telemetry_opt_out = True
tuned_model = model.tune()
assert (
tuned_model.generate_serving_properties()
Expand Down Expand Up @@ -369,6 +375,7 @@ def test_tune_for_djl_local_container_load_ex(
builder._prepare_for_mode.side_effect = None

model = builder.build()
builder.serve_settings.telemetry_opt_out = True
tuned_model = model.tune()
assert (
tuned_model.generate_serving_properties()
Expand Down Expand Up @@ -421,6 +428,7 @@ def test_tune_for_djl_local_container_oom_ex(
builder._prepare_for_mode.side_effect = None

model = builder.build()
builder.serve_settings.telemetry_opt_out = True
tuned_model = model.tune()
assert (
tuned_model.generate_serving_properties()
Expand Down Expand Up @@ -473,6 +481,7 @@ def test_tune_for_djl_local_container_invoke_ex(
builder._prepare_for_mode.side_effect = None

model = builder.build()
builder.serve_settings.telemetry_opt_out = True
tuned_model = model.tune()
assert (
tuned_model.generate_serving_properties()
Expand Down
33 changes: 33 additions & 0 deletions tests/unit/sagemaker/serve/model_server/constants.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License"). You
# may not use this file except in compliance with the License. A copy of
# the License is located at
#
# http://aws.amazon.com/apache2.0/
#
# or in the "license" file accompanying this file. This file is
# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific
# language governing permissions and limitations under the License.
from __future__ import absolute_import

MOCK_MODEL_PATH = "/path/to/mock/model/dir"
MOCK_CODE_DIR = "/path/to/mock/model/dir/code"
MOCK_JUMPSTART_ID = "mock_llm_js_id"
MOCK_TMP_DIR = "tmp123456"
MOCK_COMPRESSED_MODEL_DATA_STR = (
"s3://jumpstart-cache/to/infer-prepack-huggingface-llm-falcon-7b-bf16.tar.gz"
)
MOCK_UNCOMPRESSED_MODEL_DATA_STR = "s3://jumpstart-cache/to/artifacts/inference-prepack/v1.0.1/"
MOCK_UNCOMPRESSED_MODEL_DATA_STR_FOR_DICT = (
"s3://jumpstart-cache/to/artifacts/inference-prepack/v1.0.1/dict/"
)
MOCK_UNCOMPRESSED_MODEL_DATA_DICT = {
"S3DataSource": {
"S3Uri": MOCK_UNCOMPRESSED_MODEL_DATA_STR_FOR_DICT,
"S3DataType": "S3Prefix",
"CompressionType": "None",
}
}
MOCK_INVALID_MODEL_DATA_DICT = {}
Loading

0 comments on commit f08be97

Please sign in to comment.