diff --git a/.gitignore b/.gitignore index b7a858e341..ad6e488dbd 100644 --- a/.gitignore +++ b/.gitignore @@ -34,4 +34,4 @@ env/ **/_repack_script_launcher.sh tests/data/**/_repack_model.py tests/data/experiment/sagemaker-dev-1.0.tar.gz -src/sagemaker/serve/tmp_workspace +src/sagemaker/serve/tmp_workspace \ No newline at end of file diff --git a/src/sagemaker/serve/utils/telemetry_logger.py b/src/sagemaker/serve/utils/telemetry_logger.py index cb57a9f0a7..84044c4d96 100644 --- a/src/sagemaker/serve/utils/telemetry_logger.py +++ b/src/sagemaker/serve/utils/telemetry_logger.py @@ -13,12 +13,15 @@ """Placeholder docstring""" from __future__ import absolute_import import logging +from time import perf_counter + import requests -from sagemaker import Session +from sagemaker import Session, exceptions from sagemaker.serve.mode.function_pointers import Mode from sagemaker.serve.utils.exceptions import ModelBuilderException from sagemaker.serve.utils.types import ModelServer +from sagemaker.user_agent import SDK_VERSION logger = logging.getLogger(__name__) @@ -63,13 +66,21 @@ def wrapper(self, *args, **kwargs): f"{func_name}" f"&x-modelServer={MODEL_SERVER_TO_CODE[str(self.model_server)]}" f"&x-imageTag={image_uri_tail}" + f"&x-sdkVersion={SDK_VERSION}" ) if self.model_server == ModelServer.DJL_SERVING or self.model_server == ModelServer.TGI: extra += f"&x-modelName={self.model}" + if self.sagemaker_session and self.sagemaker_session.endpoint_arn: + extra += f"&x-endpointArn={self.sagemaker_session.endpoint_arn}" + + start_timer = perf_counter() try: response = func(self, *args, **kwargs) + stop_timer = perf_counter() + elapsed = stop_timer - start_timer + extra += f"&x-latency={round(elapsed, 2)}" if not self.serve_settings.telemetry_opt_out: _send_telemetry( "1", @@ -79,7 +90,15 @@ def wrapper(self, *args, **kwargs): None, extra, ) - except ModelBuilderException as e: + except ( + ModelBuilderException, + exceptions.CapacityError, + exceptions.UnexpectedStatusException, + exceptions.AsyncInferenceError, + ) as e: + stop_timer = perf_counter() + elapsed = stop_timer - start_timer + extra += f"&x-latency={round(elapsed, 2)}" if not self.serve_settings.telemetry_opt_out: _send_telemetry( "0", diff --git a/src/sagemaker/session.py b/src/sagemaker/session.py index ff5a82a902..14b2e904e8 100644 --- a/src/sagemaker/session.py +++ b/src/sagemaker/session.py @@ -243,6 +243,7 @@ def __init__( # sagemaker_config is validated and initialized inside :func:`_initialize`, # so if default_bucket is None and the sagemaker_config has a default S3 bucket configured, # _default_bucket_name_override will be set again inside :func:`_initialize`. + self.endpoint_arn = None self._default_bucket = None self._default_bucket_name_override = default_bucket # this may also be set again inside :func:`_initialize` if it is None @@ -4284,9 +4285,12 @@ def create_endpoint(self, endpoint_name, config_name, tags=None, wait=True, live tags, "{}.{}.{}".format(SAGEMAKER, ENDPOINT, TAGS) ) - self.sagemaker_client.create_endpoint( + res = self.sagemaker_client.create_endpoint( EndpointName=endpoint_name, EndpointConfigName=config_name, Tags=tags ) + if res: + self.endpoint_arn = res["EndpointArn"] + if wait: self.wait_for_endpoint(endpoint_name, live_logging=live_logging) return endpoint_name @@ -4344,9 +4348,11 @@ def update_endpoint(self, endpoint_name, endpoint_config_name, wait=True): "existing endpoint name".format(endpoint_name) ) - self.sagemaker_client.update_endpoint( + res = self.sagemaker_client.update_endpoint( EndpointName=endpoint_name, EndpointConfigName=endpoint_config_name ) + if res: + self.endpoint_arn = res["EndpointArn"] if wait: self.wait_for_endpoint(endpoint_name) diff --git a/tests/unit/sagemaker/serve/utils/test_telemetry_logger.py b/tests/unit/sagemaker/serve/utils/test_telemetry_logger.py index 7c4b014e8a..73d24c9505 100644 --- a/tests/unit/sagemaker/serve/utils/test_telemetry_logger.py +++ b/tests/unit/sagemaker/serve/utils/test_telemetry_logger.py @@ -20,6 +20,7 @@ _construct_url, ) from sagemaker.serve.utils.exceptions import ModelBuilderException, LocalModelOutOfMemoryException +from sagemaker.user_agent import SDK_VERSION MOCK_SESSION = Mock() MOCK_FUNC_NAME = "Mock.deploy" @@ -32,6 +33,7 @@ ) MOCK_HUGGINGFACE_ID = "meta-llama/Llama-2-7b-hf" MOCK_EXCEPTION = LocalModelOutOfMemoryException("mock raise ex") +MOCK_ENDPOINT_ARN = "arn:aws:sagemaker:us-west-2:123456789012:endpoint/test" class ModelBuilderMock: @@ -72,15 +74,22 @@ def test_capture_telemetry_decorator_djl_success(self, mock_send_telemetry): mock_model_builder.model = MOCK_HUGGINGFACE_ID mock_model_builder.mode = Mode.LOCAL_CONTAINER mock_model_builder.model_server = ModelServer.DJL_SERVING + mock_model_builder.sagemaker_session.endpoint_arn = MOCK_ENDPOINT_ARN mock_model_builder.mock_deploy() + args = mock_send_telemetry.call_args.args + latency = str(args[5]).split("latency=")[1] expected_extra_str = ( f"{MOCK_FUNC_NAME}" "&x-modelServer=4" "&x-imageTag=djl-inference:0.25.0-deepspeed0.11.0-cu118" + f"&x-sdkVersion={SDK_VERSION}" f"&x-modelName={MOCK_HUGGINGFACE_ID}" + f"&x-endpointArn={MOCK_ENDPOINT_ARN}" + f"&x-latency={latency}" ) + mock_send_telemetry.assert_called_once_with( "1", 2, MOCK_SESSION, None, None, expected_extra_str ) @@ -93,15 +102,22 @@ def test_capture_telemetry_decorator_tgi_success(self, mock_send_telemetry): mock_model_builder.model = MOCK_HUGGINGFACE_ID mock_model_builder.mode = Mode.LOCAL_CONTAINER mock_model_builder.model_server = ModelServer.TGI + mock_model_builder.sagemaker_session.endpoint_arn = MOCK_ENDPOINT_ARN mock_model_builder.mock_deploy() + args = mock_send_telemetry.call_args.args + latency = str(args[5]).split("latency=")[1] expected_extra_str = ( f"{MOCK_FUNC_NAME}" "&x-modelServer=6" "&x-imageTag=huggingface-pytorch-inference:2.0.0-transformers4.28.1-cpu-py310-ubuntu20.04" + f"&x-sdkVersion={SDK_VERSION}" f"&x-modelName={MOCK_HUGGINGFACE_ID}" + f"&x-endpointArn={MOCK_ENDPOINT_ARN}" + f"&x-latency={latency}" ) + mock_send_telemetry.assert_called_once_with( "1", 2, MOCK_SESSION, None, None, expected_extra_str ) @@ -126,6 +142,7 @@ def test_capture_telemetry_decorator_handle_exception_success(self, mock_send_te mock_model_builder.model = MOCK_HUGGINGFACE_ID mock_model_builder.mode = Mode.LOCAL_CONTAINER mock_model_builder.model_server = ModelServer.DJL_SERVING + mock_model_builder.sagemaker_session.endpoint_arn = MOCK_ENDPOINT_ARN mock_exception = Mock() mock_exception_obj = MOCK_EXCEPTION @@ -134,12 +151,18 @@ def test_capture_telemetry_decorator_handle_exception_success(self, mock_send_te with self.assertRaises(ModelBuilderException) as _: mock_model_builder.mock_deploy(mock_exception) + args = mock_send_telemetry.call_args.args + latency = str(args[5]).split("latency=")[1] expected_extra_str = ( f"{MOCK_FUNC_NAME}" "&x-modelServer=4" "&x-imageTag=djl-inference:0.25.0-deepspeed0.11.0-cu118" + f"&x-sdkVersion={SDK_VERSION}" f"&x-modelName={MOCK_HUGGINGFACE_ID}" + f"&x-endpointArn={MOCK_ENDPOINT_ARN}" + f"&x-latency={latency}" ) + mock_send_telemetry.assert_called_once_with( "0", 2,