Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

infra: use fixture for Python version in TF integ tests #1617

Merged
merged 6 commits into from
Jun 25, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
46 changes: 28 additions & 18 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
import pytest
import tests.integ
from botocore.config import Config
from packaging.version import Version

from sagemaker import Session, utils
from sagemaker.local import LocalSession
Expand Down Expand Up @@ -57,7 +58,6 @@ def pytest_addoption(parser):
"--rl-ray-full-version", action="store", default=RLEstimator.RAY_LATEST_VERSION
)
parser.addoption("--sklearn-full-version", action="store", default="0.20.0")
parser.addoption("--tf-full-version", action="store", default="2.2.0")
parser.addoption("--ei-tf-full-version", action="store")
parser.addoption("--xgboost-full-version", action="store", default="1.0-1")

Expand Down Expand Up @@ -304,35 +304,45 @@ def sklearn_full_version(request):


@pytest.fixture(scope="module")
def tf_full_version(request):
return request.config.getoption("--tf-full-version")
def tf_training_latest_version():
return "2.2.0"


@pytest.fixture(scope="module")
def tf_training_latest_py_version():
return "py37"


@pytest.fixture(scope="module")
def tf_serving_latest_version():
return "2.1.0"


@pytest.fixture(scope="module")
def tf_full_version(tf_training_latest_version, tf_serving_latest_version):
"""Fixture for TF tests that test both training and inference.

Fixture exists as such, since TF training and TFS have different latest versions.
Otherwise, this would simply be a single latest version.
"""
return str(min(Version(tf_training_latest_version), Version(tf_serving_latest_version)))


@pytest.fixture(scope="module")
def tf_full_py_version(tf_full_version):
"""fixture to match tf_full_version
"""Fixture to match tf_full_version

Fixture exists as such, since tf_full_version may be overridden --tf-full-version.
Fixture exists as such, since TF training and TFS have different latest versions.
Otherwise, this would simply be py37 to match the latest version support.

TODO: Evaluate use of --tf-full-version with possible eye to remove and simplify code.
"""
version = [int(val) for val in tf_full_version.split(".")]
if version < [1, 11]:
version = Version(tf_full_version)
if version < Version("1.11"):
return "py2"
if version < [2, 2]:
if version < Version("2.2"):
return "py3"
return "py37"


@pytest.fixture(scope="module")
def tf_serving_version(tf_full_version):
full_version = [int(val) for val in tf_full_version.split(".")]
if full_version < [2, 2]:
return tf_full_version
return "2.1.0"


@pytest.fixture(scope="module", params=["1.15.0", "2.0.0"])
def ei_tf_full_version(request):
tf_ei_version = request.config.getoption("--ei-tf-full-version")
Expand Down
6 changes: 3 additions & 3 deletions tests/integ/test_airflow_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -512,7 +512,7 @@ def test_sklearn_airflow_config_uploads_data_source_to_s3(

@pytest.mark.canary_quick
def test_tf_airflow_config_uploads_data_source_to_s3(
sagemaker_session, cpu_instance_type, tf_full_version, tf_full_py_version
sagemaker_session, cpu_instance_type, tf_training_latest_version, tf_training_latest_py_version
):
with timeout(seconds=AIRFLOW_CONFIG_TIMEOUT_IN_SECONDS):
tf = TensorFlow(
Expand All @@ -524,8 +524,8 @@ def test_tf_airflow_config_uploads_data_source_to_s3(
train_instance_count=SINGLE_INSTANCE_COUNT,
train_instance_type=cpu_instance_type,
sagemaker_session=sagemaker_session,
framework_version=tf_full_version,
py_version=tf_full_py_version,
framework_version=tf_training_latest_version,
py_version=tf_training_latest_py_version,
metric_definitions=[
{"Name": "train:global_steps", "Regex": r"global_step\/sec:\s(.*)"}
],
Expand Down
12 changes: 6 additions & 6 deletions tests/integ/test_data_capture_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@


def test_enabling_data_capture_on_endpoint_shows_correct_data_capture_status(
sagemaker_session, tf_serving_version
sagemaker_session, tf_serving_latest_version
):
endpoint_name = unique_name_from_base("sagemaker-tensorflow-serving")
model_data = sagemaker_session.upload_data(
Expand All @@ -52,7 +52,7 @@ def test_enabling_data_capture_on_endpoint_shows_correct_data_capture_status(
model = TensorFlowModel(
model_data=model_data,
role=ROLE,
framework_version=tf_serving_version,
framework_version=tf_serving_latest_version,
sagemaker_session=sagemaker_session,
)
predictor = model.deploy(
Expand Down Expand Up @@ -98,7 +98,7 @@ def test_enabling_data_capture_on_endpoint_shows_correct_data_capture_status(


def test_disabling_data_capture_on_endpoint_shows_correct_data_capture_status(
sagemaker_session, tf_serving_version
sagemaker_session, tf_serving_latest_version
):
endpoint_name = unique_name_from_base("sagemaker-tensorflow-serving")
model_data = sagemaker_session.upload_data(
Expand All @@ -109,7 +109,7 @@ def test_disabling_data_capture_on_endpoint_shows_correct_data_capture_status(
model = TensorFlowModel(
model_data=model_data,
role=ROLE,
framework_version=tf_serving_version,
framework_version=tf_serving_latest_version,
sagemaker_session=sagemaker_session,
)
destination_s3_uri = os.path.join(
Expand Down Expand Up @@ -184,7 +184,7 @@ def test_disabling_data_capture_on_endpoint_shows_correct_data_capture_status(


def test_updating_data_capture_on_endpoint_shows_correct_data_capture_status(
sagemaker_session, tf_serving_version
sagemaker_session, tf_serving_latest_version
):
endpoint_name = sagemaker.utils.unique_name_from_base("sagemaker-tensorflow-serving")
model_data = sagemaker_session.upload_data(
Expand All @@ -195,7 +195,7 @@ def test_updating_data_capture_on_endpoint_shows_correct_data_capture_status(
model = TensorFlowModel(
model_data=model_data,
role=ROLE,
framework_version=tf_serving_version,
framework_version=tf_serving_latest_version,
sagemaker_session=sagemaker_session,
)
destination_s3_uri = os.path.join(
Expand Down
54 changes: 38 additions & 16 deletions tests/integ/test_horovod.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,10 +15,10 @@
import json
import os
import tarfile
from six.moves.urllib.parse import urlparse

import boto3
import pytest
from six.moves.urllib.parse import urlparse

import sagemaker.utils
import tests.integ as integ
Expand All @@ -28,27 +28,49 @@
horovod_dir = os.path.join(os.path.dirname(__file__), "..", "data", "horovod")


@pytest.fixture(scope="module")
def gpu_instance_type(request):
return "ml.p2.xlarge"


@pytest.mark.canary_quick
def test_hvd_cpu(sagemaker_session, cpu_instance_type, tmpdir):
_create_and_fit_estimator(sagemaker_session, cpu_instance_type, tmpdir)
def test_hvd_cpu(
sagemaker_session,
tf_training_latest_version,
tf_training_latest_py_version,
cpu_instance_type,
tmpdir,
):
_create_and_fit_estimator(
sagemaker_session,
tf_training_latest_version,
tf_training_latest_py_version,
cpu_instance_type,
tmpdir,
)


@pytest.mark.canary_quick
@pytest.mark.skipif(
integ.test_region() in integ.TRAINING_NO_P2_REGIONS, reason="no ml.p2 instances in this region"
)
def test_hvd_gpu(sagemaker_session, gpu_instance_type, tmpdir):
_create_and_fit_estimator(sagemaker_session, gpu_instance_type, tmpdir)
def test_hvd_gpu(
sagemaker_session, tf_training_latest_version, tf_training_latest_py_version, tmpdir
):
_create_and_fit_estimator(
sagemaker_session,
tf_training_latest_version,
tf_training_latest_py_version,
"ml.p2.xlarge",
tmpdir,
)


@pytest.mark.local_mode
@pytest.mark.parametrize("instances, processes", [[1, 2], (2, 1), (2, 2)])
def test_horovod_local_mode(sagemaker_local_session, instances, processes, tmpdir):
def test_horovod_local_mode(
sagemaker_local_session,
tf_training_latest_version,
tf_training_latest_py_version,
instances,
processes,
tmpdir,
):
output_path = "file://%s" % tmpdir
job_name = sagemaker.utils.unique_name_from_base("tf-horovod")
estimator = TensorFlow(
Expand All @@ -57,9 +79,9 @@ def test_horovod_local_mode(sagemaker_local_session, instances, processes, tmpdi
train_instance_count=2,
train_instance_type="local",
sagemaker_session=sagemaker_local_session,
py_version=integ.PYTHON_VERSION,
output_path=output_path,
framework_version="1.12",
framework_version=tf_training_latest_version,
py_version=tf_training_latest_py_version,
distributions={"mpi": {"enabled": True, "processes_per_host": processes}},
)

Expand Down Expand Up @@ -96,16 +118,16 @@ def extract_files_from_s3(s3_url, tmpdir, sagemaker_session):
tar_file.extractall(tmpdir)


def _create_and_fit_estimator(sagemaker_session, instance_type, tmpdir):
def _create_and_fit_estimator(sagemaker_session, tf_version, py_version, instance_type, tmpdir):
job_name = sagemaker.utils.unique_name_from_base("tf-horovod")
estimator = TensorFlow(
entry_point=os.path.join(horovod_dir, "hvd_basic.py"),
role="SageMakerRole",
train_instance_count=2,
train_instance_type=instance_type,
sagemaker_session=sagemaker_session,
py_version=integ.PYTHON_VERSION,
framework_version="1.12",
py_version=py_version,
framework_version=tf_version,
distributions={"mpi": {"enabled": True}},
)

Expand Down
4 changes: 2 additions & 2 deletions tests/integ/test_model_monitor.py
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,7 @@


@pytest.fixture(scope="module")
def predictor(sagemaker_session, tf_serving_version):
def predictor(sagemaker_session, tf_serving_latest_version):
endpoint_name = unique_name_from_base("sagemaker-tensorflow-serving")
model_data = sagemaker_session.upload_data(
path=os.path.join(tests.integ.DATA_DIR, "tensorflow-serving-test-model.tar.gz"),
Expand All @@ -100,7 +100,7 @@ def predictor(sagemaker_session, tf_serving_version):
model = TensorFlowModel(
model_data=model_data,
role=ROLE,
framework_version=tf_serving_version,
framework_version=tf_serving_latest_version,
sagemaker_session=sagemaker_session,
)
predictor = model.deploy(
Expand Down
37 changes: 20 additions & 17 deletions tests/integ/test_tf.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
from sagemaker.utils import unique_name_from_base, sagemaker_timestamp

import tests.integ
from tests.integ import kms_utils, timeout, PYTHON_VERSION
from tests.integ import kms_utils, timeout
from tests.integ.retry import retries
from tests.integ.s3_utils import assert_s3_files_exist

Expand All @@ -39,7 +39,7 @@


def test_mnist_with_checkpoint_config(
sagemaker_session, instance_type, tf_full_version, tf_full_py_version
sagemaker_session, instance_type, tf_training_latest_version, tf_training_latest_py_version
):
checkpoint_s3_uri = "s3://{}/checkpoints/tf-{}".format(
sagemaker_session.default_bucket(), sagemaker_timestamp()
Expand All @@ -51,8 +51,8 @@ def test_mnist_with_checkpoint_config(
train_instance_count=1,
train_instance_type=instance_type,
sagemaker_session=sagemaker_session,
framework_version=tf_full_version,
py_version=tf_full_py_version,
framework_version=tf_training_latest_version,
py_version=tf_training_latest_py_version,
metric_definitions=[{"Name": "train:global_steps", "Regex": r"global_step\/sec:\s(.*)"}],
checkpoint_s3_uri=checkpoint_s3_uri,
checkpoint_local_path=checkpoint_local_path,
Expand Down Expand Up @@ -82,7 +82,7 @@ def test_mnist_with_checkpoint_config(
assert actual_training_checkpoint_config == expected_training_checkpoint_config


def test_server_side_encryption(sagemaker_session, tf_serving_version):
def test_server_side_encryption(sagemaker_session, tf_full_version, tf_full_py_version):
with kms_utils.bucket_with_encryption(sagemaker_session, ROLE) as (bucket_with_kms, kms_key):
output_path = os.path.join(
bucket_with_kms, "test-server-side-encryption", time.strftime("%y%m%d-%H%M")
Expand All @@ -95,8 +95,8 @@ def test_server_side_encryption(sagemaker_session, tf_serving_version):
train_instance_count=1,
train_instance_type="ml.c5.xlarge",
sagemaker_session=sagemaker_session,
framework_version=tf_serving_version,
py_version=PYTHON_VERSION,
framework_version=tf_full_version,
py_version=tf_full_py_version,
code_location=output_path,
output_path=output_path,
model_dir="/opt/ml/model",
Expand All @@ -123,15 +123,17 @@ def test_server_side_encryption(sagemaker_session, tf_serving_version):


@pytest.mark.canary_quick
def test_mnist_distributed(sagemaker_session, instance_type, tf_full_version, tf_full_py_version):
def test_mnist_distributed(
sagemaker_session, instance_type, tf_training_latest_version, tf_training_latest_py_version
):
estimator = TensorFlow(
entry_point=SCRIPT,
role=ROLE,
train_instance_count=2,
train_instance_type=instance_type,
sagemaker_session=sagemaker_session,
framework_version=tf_full_version,
py_version=tf_full_py_version,
framework_version=tf_training_latest_version,
py_version=tf_training_latest_py_version,
distributions=PARAMETER_SERVER_DISTRIBUTION,
)
inputs = estimator.sagemaker_session.upload_data(
Expand All @@ -147,16 +149,15 @@ def test_mnist_distributed(sagemaker_session, instance_type, tf_full_version, tf
)


def test_mnist_async(sagemaker_session, cpu_instance_type, tf_serving_version):
def test_mnist_async(sagemaker_session, cpu_instance_type, tf_full_version, tf_full_py_version):
estimator = TensorFlow(
entry_point=SCRIPT,
role=ROLE,
train_instance_count=1,
train_instance_type="ml.c5.4xlarge",
py_version=PYTHON_VERSION,
sagemaker_session=sagemaker_session,
# testing py-sdk functionality, no need to run against all TF versions
framework_version=tf_serving_version,
framework_version=tf_full_version,
py_version=tf_full_py_version,
tags=TAGS,
)
inputs = estimator.sagemaker_session.upload_data(
Expand Down Expand Up @@ -188,15 +189,17 @@ def test_mnist_async(sagemaker_session, cpu_instance_type, tf_serving_version):
_assert_model_name_match(sagemaker_session.sagemaker_client, endpoint_name, model_name)


def test_deploy_with_input_handlers(sagemaker_session, instance_type, tf_serving_version):
def test_deploy_with_input_handlers(
sagemaker_session, instance_type, tf_full_version, tf_full_py_version
):
estimator = TensorFlow(
entry_point="training.py",
source_dir=TFS_RESOURCE_PATH,
role=ROLE,
train_instance_count=1,
train_instance_type=instance_type,
framework_version=tf_serving_version,
py_version=PYTHON_VERSION,
framework_version=tf_full_version,
py_version=tf_full_py_version,
sagemaker_session=sagemaker_session,
tags=TAGS,
)
Expand Down
Loading