Skip to content

Commit

Permalink
change: TGI NeuronX (#4375)
Browse files Browse the repository at this point in the history
* TGI NeuronX

* Update

* Update
  • Loading branch information
amzn-choeric authored Jan 19, 2024
1 parent 34e5196 commit 99d883a
Show file tree
Hide file tree
Showing 4 changed files with 71 additions and 11 deletions.
8 changes: 8 additions & 0 deletions src/sagemaker/huggingface/llm_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,14 @@ def get_huggingface_llm_image_uri(
version=version,
image_scope="inference",
)
if backend == "huggingface-neuronx":
return image_uris.retrieve(
"huggingface-llm-neuronx",
region=region,
version=version,
image_scope="inference",
inference_tool="neuronx",
)
if backend == "lmi":
version = version or "0.24.0"
return image_uris.retrieve(framework="djl-deepspeed", region=region, version=version)
Expand Down
41 changes: 41 additions & 0 deletions src/sagemaker/image_uri_config/huggingface-llm-neuronx.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
{
"inference": {
"processors": [
"inf2"
],
"version_aliases": {
"0.0": "0.0.16"
},
"versions": {
"0.0.16": {
"py_versions": [
"py310"
],
"registries": {
"ap-northeast-1": "763104351884",
"ap-south-1": "763104351884",
"ap-south-2": "772153158452",
"ap-southeast-1": "763104351884",
"ap-southeast-2": "763104351884",
"ap-southeast-4": "457447274322",
"eu-central-1": "763104351884",
"eu-central-2": "380420809688",
"eu-south-2": "503227376785",
"eu-west-1": "763104351884",
"eu-west-3": "763104351884",
"il-central-1": "780543022126",
"sa-east-1": "763104351884",
"us-east-1": "763104351884",
"us-east-2": "763104351884",
"us-west-2": "763104351884",
"ca-west-1": "204538143572"
},
"tag_prefix": "1.13.1-optimum0.0.16",
"repository": "huggingface-pytorch-tgi-inference",
"container_version": {
"inf2": "ubuntu22.04"
}
}
}
}
}
2 changes: 2 additions & 0 deletions src/sagemaker/image_uris.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@
ECR_URI_TEMPLATE = "{registry}.dkr.{hostname}/{repository}"
HUGGING_FACE_FRAMEWORK = "huggingface"
HUGGING_FACE_LLM_FRAMEWORK = "huggingface-llm"
HUGGING_FACE_LLM_NEURONX_FRAMEWORK = "huggingface-llm-neuronx"
XGBOOST_FRAMEWORK = "xgboost"
SKLEARN_FRAMEWORK = "sklearn"
TRAINIUM_ALLOWED_FRAMEWORKS = "pytorch"
Expand Down Expand Up @@ -470,6 +471,7 @@ def _validate_version_and_set_if_needed(version, config, framework):
if version is None and framework in [
DATA_WRANGLER_FRAMEWORK,
HUGGING_FACE_LLM_FRAMEWORK,
HUGGING_FACE_LLM_NEURONX_FRAMEWORK,
STABILITYAI_FRAMEWORK,
]:
version = _get_latest_versions(available_versions)
Expand Down
31 changes: 20 additions & 11 deletions tests/unit/sagemaker/image_uris/test_huggingface_llm.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,29 +19,38 @@

LMI_VERSIONS = ["0.24.0"]
HF_VERSIONS_MAPPING = {
"0.6.0": "2.0.0-tgi0.6.0-gpu-py39-cu118-ubuntu20.04",
"0.8.2": "2.0.0-tgi0.8.2-gpu-py39-cu118-ubuntu20.04",
"0.9.3": "2.0.1-tgi0.9.3-gpu-py39-cu118-ubuntu20.04",
"1.0.3": "2.0.1-tgi1.0.3-gpu-py39-cu118-ubuntu20.04",
"1.1.0": "2.0.1-tgi1.1.0-gpu-py39-cu118-ubuntu20.04",
"1.2.0": "2.1.1-tgi1.2.0-gpu-py310-cu121-ubuntu20.04",
"1.3.1": "2.1.1-tgi1.3.1-gpu-py310-cu121-ubuntu20.04",
"1.3.3": "2.1.1-tgi1.3.3-gpu-py310-cu121-ubuntu20.04",
"gpu": {
"0.6.0": "2.0.0-tgi0.6.0-gpu-py39-cu118-ubuntu20.04",
"0.8.2": "2.0.0-tgi0.8.2-gpu-py39-cu118-ubuntu20.04",
"0.9.3": "2.0.1-tgi0.9.3-gpu-py39-cu118-ubuntu20.04",
"1.0.3": "2.0.1-tgi1.0.3-gpu-py39-cu118-ubuntu20.04",
"1.1.0": "2.0.1-tgi1.1.0-gpu-py39-cu118-ubuntu20.04",
"1.2.0": "2.1.1-tgi1.2.0-gpu-py310-cu121-ubuntu20.04",
"1.3.1": "2.1.1-tgi1.3.1-gpu-py310-cu121-ubuntu20.04",
"1.3.3": "2.1.1-tgi1.3.3-gpu-py310-cu121-ubuntu20.04",
},
"inf2": {
"0.0.16": "1.13.1-optimum0.0.16-neuronx-py310-ubuntu22.04",
},
}


@pytest.mark.parametrize("load_config", ["huggingface-llm.json"], indirect=True)
@pytest.mark.parametrize(
"load_config", ["huggingface-llm.json", "huggingface-llm-neuronx.json"], indirect=True
)
def test_huggingface_uris(load_config):
VERSIONS = load_config["inference"]["versions"]
device = load_config["inference"]["processors"][0]
backend = "huggingface-neuronx" if device == "inf2" else "huggingface"
for version in VERSIONS:
ACCOUNTS = load_config["inference"]["versions"][version]["registries"]
for region in ACCOUNTS.keys():
uri = get_huggingface_llm_image_uri("huggingface", region=region, version=version)
uri = get_huggingface_llm_image_uri(backend, region=region, version=version)
expected = expected_uris.huggingface_llm_framework_uri(
"huggingface-pytorch-tgi-inference",
ACCOUNTS[region],
version,
HF_VERSIONS_MAPPING[version],
HF_VERSIONS_MAPPING[device][version],
region=region,
)
assert expected == uri
Expand Down

0 comments on commit 99d883a

Please sign in to comment.