Skip to content

Commit

Permalink
fix: fixed create monitoring schedule failing after validation error (#…
Browse files Browse the repository at this point in the history
…4385)

Co-authored-by: Keshav Chandak <chakesh@amazon.com>
  • Loading branch information
keshav-chandak and Keshav Chandak authored Jan 30, 2024
1 parent 427dec6 commit 8b206ba
Show file tree
Hide file tree
Showing 3 changed files with 114 additions and 24 deletions.
2 changes: 2 additions & 0 deletions src/sagemaker/model_monitor/clarify_model_monitoring.py
Original file line number Diff line number Diff line change
Expand Up @@ -669,6 +669,7 @@ def create_monitoring_schedule(
self.monitoring_schedule_name = monitor_schedule_name
except Exception:
logger.exception("Failed to create monitoring schedule.")
self.monitoring_schedule_name = None
# noinspection PyBroadException
try:
self.sagemaker_session.sagemaker_client.delete_model_bias_job_definition(
Expand Down Expand Up @@ -1109,6 +1110,7 @@ def create_monitoring_schedule(
self.monitoring_schedule_name = monitor_schedule_name
except Exception:
logger.exception("Failed to create monitoring schedule.")
self.monitoring_schedule_name = None
# noinspection PyBroadException
try:
self.sagemaker_session.sagemaker_client.delete_model_explainability_job_definition(
Expand Down
54 changes: 30 additions & 24 deletions src/sagemaker/model_monitor/model_monitoring.py
Original file line number Diff line number Diff line change
Expand Up @@ -415,30 +415,34 @@ def create_monitoring_schedule(
if arguments is not None:
self.arguments = arguments

self.sagemaker_session.create_monitoring_schedule(
monitoring_schedule_name=self.monitoring_schedule_name,
schedule_expression=schedule_cron_expression,
statistics_s3_uri=statistics_s3_uri,
constraints_s3_uri=constraints_s3_uri,
monitoring_inputs=[normalized_monitoring_input],
monitoring_output_config=monitoring_output_config,
instance_count=self.instance_count,
instance_type=self.instance_type,
volume_size_in_gb=self.volume_size_in_gb,
volume_kms_key=self.volume_kms_key,
image_uri=self.image_uri,
entrypoint=self.entrypoint,
arguments=self.arguments,
record_preprocessor_source_uri=None,
post_analytics_processor_source_uri=None,
max_runtime_in_seconds=self.max_runtime_in_seconds,
environment=self.env,
network_config=network_config_dict,
role_arn=self.sagemaker_session.expand_role(self.role),
tags=self.tags,
data_analysis_start_time=data_analysis_start_time,
data_analysis_end_time=data_analysis_end_time,
)
try:
self.sagemaker_session.create_monitoring_schedule(
monitoring_schedule_name=self.monitoring_schedule_name,
schedule_expression=schedule_cron_expression,
statistics_s3_uri=statistics_s3_uri,
constraints_s3_uri=constraints_s3_uri,
monitoring_inputs=[normalized_monitoring_input],
monitoring_output_config=monitoring_output_config,
instance_count=self.instance_count,
instance_type=self.instance_type,
volume_size_in_gb=self.volume_size_in_gb,
volume_kms_key=self.volume_kms_key,
image_uri=self.image_uri,
entrypoint=self.entrypoint,
arguments=self.arguments,
record_preprocessor_source_uri=None,
post_analytics_processor_source_uri=None,
max_runtime_in_seconds=self.max_runtime_in_seconds,
environment=self.env,
network_config=network_config_dict,
role_arn=self.sagemaker_session.expand_role(self.role),
tags=self.tags,
data_analysis_start_time=data_analysis_start_time,
data_analysis_end_time=data_analysis_end_time,
)
except Exception:
self.monitoring_schedule_name = None
raise

def update_monitoring_schedule(
self,
Expand Down Expand Up @@ -2054,6 +2058,7 @@ def create_monitoring_schedule(
self.monitoring_schedule_name = monitor_schedule_name
except Exception:
logger.exception("Failed to create monitoring schedule.")
self.monitoring_schedule_name = None
# noinspection PyBroadException
try:
self.sagemaker_session.sagemaker_client.delete_data_quality_job_definition(
Expand Down Expand Up @@ -3173,6 +3178,7 @@ def create_monitoring_schedule(
self.monitoring_schedule_name = monitor_schedule_name
except Exception:
logger.exception("Failed to create monitoring schedule.")
self.monitoring_schedule_name = None
# noinspection PyBroadException
try:
self.sagemaker_session.sagemaker_client.delete_model_quality_job_definition(
Expand Down
82 changes: 82 additions & 0 deletions tests/integ/test_model_monitor.py
Original file line number Diff line number Diff line change
Expand Up @@ -2488,3 +2488,85 @@ def test_one_time_monitoring_schedule(sagemaker_session):
my_default_monitor.stop_monitoring_schedule()
my_default_monitor.delete_monitoring_schedule()
raise e


def test_create_monitoring_schedule_with_validation_error(sagemaker_session):
my_default_monitor = DefaultModelMonitor(
role=ROLE,
instance_count=INSTANCE_COUNT,
instance_type=INSTANCE_TYPE,
volume_size_in_gb=VOLUME_SIZE_IN_GB,
max_runtime_in_seconds=MAX_RUNTIME_IN_SECONDS,
sagemaker_session=sagemaker_session,
env=ENVIRONMENT,
tags=TAGS,
network_config=NETWORK_CONFIG,
)

output_s3_uri = os.path.join(
"s3://",
sagemaker_session.default_bucket(),
"integ-test-monitoring-output-bucket",
str(uuid.uuid4()),
)

data_captured_destination_s3_uri = os.path.join(
"s3://",
sagemaker_session.default_bucket(),
"sagemaker-serving-batch-transform",
str(uuid.uuid4()),
)

batch_transform_input = BatchTransformInput(
data_captured_destination_s3_uri=data_captured_destination_s3_uri,
destination="/opt/ml/processing/output",
dataset_format=MonitoringDatasetFormat.csv(header=False),
)

statistics = Statistics.from_file_path(
statistics_file_path=os.path.join(tests.integ.DATA_DIR, "monitor/statistics.json"),
sagemaker_session=sagemaker_session,
)

constraints = Constraints.from_file_path(
constraints_file_path=os.path.join(tests.integ.DATA_DIR, "monitor/constraints.json"),
sagemaker_session=sagemaker_session,
)

try:
my_default_monitor.create_monitoring_schedule(
monitor_schedule_name="schedule-name-more-than-63-characters-to-get-a-validation-exception",
batch_transform_input=batch_transform_input,
output_s3_uri=output_s3_uri,
statistics=statistics,
constraints=constraints,
schedule_cron_expression=CronExpressionGenerator.now(),
data_analysis_start_time="-PT1H",
data_analysis_end_time="-PT0H",
enable_cloudwatch_metrics=ENABLE_CLOUDWATCH_METRICS,
)
except Exception as e:
assert "ValidationException" in str(e)

my_default_monitor.create_monitoring_schedule(
monitor_schedule_name=unique_name_from_base("valid-schedule-name"),
batch_transform_input=batch_transform_input,
output_s3_uri=output_s3_uri,
statistics=statistics,
constraints=constraints,
schedule_cron_expression=CronExpressionGenerator.now(),
data_analysis_start_time="-PT1H",
data_analysis_end_time="-PT0H",
enable_cloudwatch_metrics=ENABLE_CLOUDWATCH_METRICS,
)
try:

_wait_for_schedule_changes_to_apply(monitor=my_default_monitor)

my_default_monitor.stop_monitoring_schedule()
my_default_monitor.delete_monitoring_schedule()

except Exception as e:
my_default_monitor.stop_monitoring_schedule()
my_default_monitor.delete_monitoring_schedule()
raise e

0 comments on commit 8b206ba

Please sign in to comment.