Skip to content

Commit

Permalink
infra: use fixture for Python version in TF integ tests (#1617)
Browse files Browse the repository at this point in the history
  • Loading branch information
laurenyu authored Jun 25, 2020
1 parent acbe02b commit c24e0b5
Show file tree
Hide file tree
Showing 10 changed files with 155 additions and 107 deletions.
46 changes: 28 additions & 18 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
import pytest
import tests.integ
from botocore.config import Config
from packaging.version import Version

from sagemaker import Session, utils
from sagemaker.local import LocalSession
Expand Down Expand Up @@ -57,7 +58,6 @@ def pytest_addoption(parser):
"--rl-ray-full-version", action="store", default=RLEstimator.RAY_LATEST_VERSION
)
parser.addoption("--sklearn-full-version", action="store", default="0.20.0")
parser.addoption("--tf-full-version", action="store", default="2.2.0")
parser.addoption("--ei-tf-full-version", action="store")
parser.addoption("--xgboost-full-version", action="store", default="1.0-1")

Expand Down Expand Up @@ -304,35 +304,45 @@ def sklearn_full_version(request):


@pytest.fixture(scope="module")
def tf_full_version(request):
return request.config.getoption("--tf-full-version")
def tf_training_latest_version():
return "2.2.0"


@pytest.fixture(scope="module")
def tf_training_latest_py_version():
return "py37"


@pytest.fixture(scope="module")
def tf_serving_latest_version():
return "2.1.0"


@pytest.fixture(scope="module")
def tf_full_version(tf_training_latest_version, tf_serving_latest_version):
"""Fixture for TF tests that test both training and inference.
Fixture exists as such, since TF training and TFS have different latest versions.
Otherwise, this would simply be a single latest version.
"""
return str(min(Version(tf_training_latest_version), Version(tf_serving_latest_version)))


@pytest.fixture(scope="module")
def tf_full_py_version(tf_full_version):
"""fixture to match tf_full_version
"""Fixture to match tf_full_version
Fixture exists as such, since tf_full_version may be overridden --tf-full-version.
Fixture exists as such, since TF training and TFS have different latest versions.
Otherwise, this would simply be py37 to match the latest version support.
TODO: Evaluate use of --tf-full-version with possible eye to remove and simplify code.
"""
version = [int(val) for val in tf_full_version.split(".")]
if version < [1, 11]:
version = Version(tf_full_version)
if version < Version("1.11"):
return "py2"
if version < [2, 2]:
if version < Version("2.2"):
return "py3"
return "py37"


@pytest.fixture(scope="module")
def tf_serving_version(tf_full_version):
full_version = [int(val) for val in tf_full_version.split(".")]
if full_version < [2, 2]:
return tf_full_version
return "2.1.0"


@pytest.fixture(scope="module", params=["1.15.0", "2.0.0"])
def ei_tf_full_version(request):
tf_ei_version = request.config.getoption("--ei-tf-full-version")
Expand Down
6 changes: 3 additions & 3 deletions tests/integ/test_airflow_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -512,7 +512,7 @@ def test_sklearn_airflow_config_uploads_data_source_to_s3(

@pytest.mark.canary_quick
def test_tf_airflow_config_uploads_data_source_to_s3(
sagemaker_session, cpu_instance_type, tf_full_version, tf_full_py_version
sagemaker_session, cpu_instance_type, tf_training_latest_version, tf_training_latest_py_version
):
with timeout(seconds=AIRFLOW_CONFIG_TIMEOUT_IN_SECONDS):
tf = TensorFlow(
Expand All @@ -524,8 +524,8 @@ def test_tf_airflow_config_uploads_data_source_to_s3(
train_instance_count=SINGLE_INSTANCE_COUNT,
train_instance_type=cpu_instance_type,
sagemaker_session=sagemaker_session,
framework_version=tf_full_version,
py_version=tf_full_py_version,
framework_version=tf_training_latest_version,
py_version=tf_training_latest_py_version,
metric_definitions=[
{"Name": "train:global_steps", "Regex": r"global_step\/sec:\s(.*)"}
],
Expand Down
12 changes: 6 additions & 6 deletions tests/integ/test_data_capture_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@


def test_enabling_data_capture_on_endpoint_shows_correct_data_capture_status(
sagemaker_session, tf_serving_version
sagemaker_session, tf_serving_latest_version
):
endpoint_name = unique_name_from_base("sagemaker-tensorflow-serving")
model_data = sagemaker_session.upload_data(
Expand All @@ -52,7 +52,7 @@ def test_enabling_data_capture_on_endpoint_shows_correct_data_capture_status(
model = TensorFlowModel(
model_data=model_data,
role=ROLE,
framework_version=tf_serving_version,
framework_version=tf_serving_latest_version,
sagemaker_session=sagemaker_session,
)
predictor = model.deploy(
Expand Down Expand Up @@ -98,7 +98,7 @@ def test_enabling_data_capture_on_endpoint_shows_correct_data_capture_status(


def test_disabling_data_capture_on_endpoint_shows_correct_data_capture_status(
sagemaker_session, tf_serving_version
sagemaker_session, tf_serving_latest_version
):
endpoint_name = unique_name_from_base("sagemaker-tensorflow-serving")
model_data = sagemaker_session.upload_data(
Expand All @@ -109,7 +109,7 @@ def test_disabling_data_capture_on_endpoint_shows_correct_data_capture_status(
model = TensorFlowModel(
model_data=model_data,
role=ROLE,
framework_version=tf_serving_version,
framework_version=tf_serving_latest_version,
sagemaker_session=sagemaker_session,
)
destination_s3_uri = os.path.join(
Expand Down Expand Up @@ -184,7 +184,7 @@ def test_disabling_data_capture_on_endpoint_shows_correct_data_capture_status(


def test_updating_data_capture_on_endpoint_shows_correct_data_capture_status(
sagemaker_session, tf_serving_version
sagemaker_session, tf_serving_latest_version
):
endpoint_name = sagemaker.utils.unique_name_from_base("sagemaker-tensorflow-serving")
model_data = sagemaker_session.upload_data(
Expand All @@ -195,7 +195,7 @@ def test_updating_data_capture_on_endpoint_shows_correct_data_capture_status(
model = TensorFlowModel(
model_data=model_data,
role=ROLE,
framework_version=tf_serving_version,
framework_version=tf_serving_latest_version,
sagemaker_session=sagemaker_session,
)
destination_s3_uri = os.path.join(
Expand Down
54 changes: 38 additions & 16 deletions tests/integ/test_horovod.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,10 +15,10 @@
import json
import os
import tarfile
from six.moves.urllib.parse import urlparse

import boto3
import pytest
from six.moves.urllib.parse import urlparse

import sagemaker.utils
import tests.integ as integ
Expand All @@ -28,27 +28,49 @@
horovod_dir = os.path.join(os.path.dirname(__file__), "..", "data", "horovod")


@pytest.fixture(scope="module")
def gpu_instance_type(request):
return "ml.p2.xlarge"


@pytest.mark.canary_quick
def test_hvd_cpu(sagemaker_session, cpu_instance_type, tmpdir):
_create_and_fit_estimator(sagemaker_session, cpu_instance_type, tmpdir)
def test_hvd_cpu(
sagemaker_session,
tf_training_latest_version,
tf_training_latest_py_version,
cpu_instance_type,
tmpdir,
):
_create_and_fit_estimator(
sagemaker_session,
tf_training_latest_version,
tf_training_latest_py_version,
cpu_instance_type,
tmpdir,
)


@pytest.mark.canary_quick
@pytest.mark.skipif(
integ.test_region() in integ.TRAINING_NO_P2_REGIONS, reason="no ml.p2 instances in this region"
)
def test_hvd_gpu(sagemaker_session, gpu_instance_type, tmpdir):
_create_and_fit_estimator(sagemaker_session, gpu_instance_type, tmpdir)
def test_hvd_gpu(
sagemaker_session, tf_training_latest_version, tf_training_latest_py_version, tmpdir
):
_create_and_fit_estimator(
sagemaker_session,
tf_training_latest_version,
tf_training_latest_py_version,
"ml.p2.xlarge",
tmpdir,
)


@pytest.mark.local_mode
@pytest.mark.parametrize("instances, processes", [[1, 2], (2, 1), (2, 2)])
def test_horovod_local_mode(sagemaker_local_session, instances, processes, tmpdir):
def test_horovod_local_mode(
sagemaker_local_session,
tf_training_latest_version,
tf_training_latest_py_version,
instances,
processes,
tmpdir,
):
output_path = "file://%s" % tmpdir
job_name = sagemaker.utils.unique_name_from_base("tf-horovod")
estimator = TensorFlow(
Expand All @@ -57,9 +79,9 @@ def test_horovod_local_mode(sagemaker_local_session, instances, processes, tmpdi
train_instance_count=2,
train_instance_type="local",
sagemaker_session=sagemaker_local_session,
py_version=integ.PYTHON_VERSION,
output_path=output_path,
framework_version="1.12",
framework_version=tf_training_latest_version,
py_version=tf_training_latest_py_version,
distributions={"mpi": {"enabled": True, "processes_per_host": processes}},
)

Expand Down Expand Up @@ -96,16 +118,16 @@ def extract_files_from_s3(s3_url, tmpdir, sagemaker_session):
tar_file.extractall(tmpdir)


def _create_and_fit_estimator(sagemaker_session, instance_type, tmpdir):
def _create_and_fit_estimator(sagemaker_session, tf_version, py_version, instance_type, tmpdir):
job_name = sagemaker.utils.unique_name_from_base("tf-horovod")
estimator = TensorFlow(
entry_point=os.path.join(horovod_dir, "hvd_basic.py"),
role="SageMakerRole",
train_instance_count=2,
train_instance_type=instance_type,
sagemaker_session=sagemaker_session,
py_version=integ.PYTHON_VERSION,
framework_version="1.12",
py_version=py_version,
framework_version=tf_version,
distributions={"mpi": {"enabled": True}},
)

Expand Down
4 changes: 2 additions & 2 deletions tests/integ/test_model_monitor.py
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,7 @@


@pytest.fixture(scope="module")
def predictor(sagemaker_session, tf_serving_version):
def predictor(sagemaker_session, tf_serving_latest_version):
endpoint_name = unique_name_from_base("sagemaker-tensorflow-serving")
model_data = sagemaker_session.upload_data(
path=os.path.join(tests.integ.DATA_DIR, "tensorflow-serving-test-model.tar.gz"),
Expand All @@ -100,7 +100,7 @@ def predictor(sagemaker_session, tf_serving_version):
model = TensorFlowModel(
model_data=model_data,
role=ROLE,
framework_version=tf_serving_version,
framework_version=tf_serving_latest_version,
sagemaker_session=sagemaker_session,
)
predictor = model.deploy(
Expand Down
37 changes: 20 additions & 17 deletions tests/integ/test_tf.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
from sagemaker.utils import unique_name_from_base, sagemaker_timestamp

import tests.integ
from tests.integ import kms_utils, timeout, PYTHON_VERSION
from tests.integ import kms_utils, timeout
from tests.integ.retry import retries
from tests.integ.s3_utils import assert_s3_files_exist

Expand All @@ -39,7 +39,7 @@


def test_mnist_with_checkpoint_config(
sagemaker_session, instance_type, tf_full_version, tf_full_py_version
sagemaker_session, instance_type, tf_training_latest_version, tf_training_latest_py_version
):
checkpoint_s3_uri = "s3://{}/checkpoints/tf-{}".format(
sagemaker_session.default_bucket(), sagemaker_timestamp()
Expand All @@ -51,8 +51,8 @@ def test_mnist_with_checkpoint_config(
train_instance_count=1,
train_instance_type=instance_type,
sagemaker_session=sagemaker_session,
framework_version=tf_full_version,
py_version=tf_full_py_version,
framework_version=tf_training_latest_version,
py_version=tf_training_latest_py_version,
metric_definitions=[{"Name": "train:global_steps", "Regex": r"global_step\/sec:\s(.*)"}],
checkpoint_s3_uri=checkpoint_s3_uri,
checkpoint_local_path=checkpoint_local_path,
Expand Down Expand Up @@ -82,7 +82,7 @@ def test_mnist_with_checkpoint_config(
assert actual_training_checkpoint_config == expected_training_checkpoint_config


def test_server_side_encryption(sagemaker_session, tf_serving_version):
def test_server_side_encryption(sagemaker_session, tf_full_version, tf_full_py_version):
with kms_utils.bucket_with_encryption(sagemaker_session, ROLE) as (bucket_with_kms, kms_key):
output_path = os.path.join(
bucket_with_kms, "test-server-side-encryption", time.strftime("%y%m%d-%H%M")
Expand All @@ -95,8 +95,8 @@ def test_server_side_encryption(sagemaker_session, tf_serving_version):
train_instance_count=1,
train_instance_type="ml.c5.xlarge",
sagemaker_session=sagemaker_session,
framework_version=tf_serving_version,
py_version=PYTHON_VERSION,
framework_version=tf_full_version,
py_version=tf_full_py_version,
code_location=output_path,
output_path=output_path,
model_dir="/opt/ml/model",
Expand All @@ -123,15 +123,17 @@ def test_server_side_encryption(sagemaker_session, tf_serving_version):


@pytest.mark.canary_quick
def test_mnist_distributed(sagemaker_session, instance_type, tf_full_version, tf_full_py_version):
def test_mnist_distributed(
sagemaker_session, instance_type, tf_training_latest_version, tf_training_latest_py_version
):
estimator = TensorFlow(
entry_point=SCRIPT,
role=ROLE,
train_instance_count=2,
train_instance_type=instance_type,
sagemaker_session=sagemaker_session,
framework_version=tf_full_version,
py_version=tf_full_py_version,
framework_version=tf_training_latest_version,
py_version=tf_training_latest_py_version,
distributions=PARAMETER_SERVER_DISTRIBUTION,
)
inputs = estimator.sagemaker_session.upload_data(
Expand All @@ -147,16 +149,15 @@ def test_mnist_distributed(sagemaker_session, instance_type, tf_full_version, tf
)


def test_mnist_async(sagemaker_session, cpu_instance_type, tf_serving_version):
def test_mnist_async(sagemaker_session, cpu_instance_type, tf_full_version, tf_full_py_version):
estimator = TensorFlow(
entry_point=SCRIPT,
role=ROLE,
train_instance_count=1,
train_instance_type="ml.c5.4xlarge",
py_version=PYTHON_VERSION,
sagemaker_session=sagemaker_session,
# testing py-sdk functionality, no need to run against all TF versions
framework_version=tf_serving_version,
framework_version=tf_full_version,
py_version=tf_full_py_version,
tags=TAGS,
)
inputs = estimator.sagemaker_session.upload_data(
Expand Down Expand Up @@ -188,15 +189,17 @@ def test_mnist_async(sagemaker_session, cpu_instance_type, tf_serving_version):
_assert_model_name_match(sagemaker_session.sagemaker_client, endpoint_name, model_name)


def test_deploy_with_input_handlers(sagemaker_session, instance_type, tf_serving_version):
def test_deploy_with_input_handlers(
sagemaker_session, instance_type, tf_full_version, tf_full_py_version
):
estimator = TensorFlow(
entry_point="training.py",
source_dir=TFS_RESOURCE_PATH,
role=ROLE,
train_instance_count=1,
train_instance_type=instance_type,
framework_version=tf_serving_version,
py_version=PYTHON_VERSION,
framework_version=tf_full_version,
py_version=tf_full_py_version,
sagemaker_session=sagemaker_session,
tags=TAGS,
)
Expand Down
Loading

0 comments on commit c24e0b5

Please sign in to comment.