Skip to content

Commit

Permalink
infra: use fixture for Python version in TF integ tests
Browse files Browse the repository at this point in the history
  • Loading branch information
laurenyu committed Jun 23, 2020
1 parent c211417 commit 44a7ba2
Show file tree
Hide file tree
Showing 5 changed files with 50 additions and 68 deletions.
24 changes: 4 additions & 20 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,6 @@ def pytest_addoption(parser):
"--rl-ray-full-version", action="store", default=RLEstimator.RAY_LATEST_VERSION
)
parser.addoption("--sklearn-full-version", action="store", default="0.20.0")
parser.addoption("--tf-full-version", action="store", default="2.2.0")
parser.addoption("--ei-tf-full-version", action="store")
parser.addoption("--xgboost-full-version", action="store", default="1.0-1")

Expand Down Expand Up @@ -300,32 +299,17 @@ def sklearn_full_version(request):


@pytest.fixture(scope="module")
def tf_full_version(request):
return request.config.getoption("--tf-full-version")
def tf_full_version():
return "2.2.0"


@pytest.fixture(scope="module")
def tf_full_py_version(tf_full_version):
"""fixture to match tf_full_version
Fixture exists as such, since tf_full_version may be overridden --tf-full-version.
Otherwise, this would simply be py37 to match the latest version support.
TODO: Evaluate use of --tf-full-version with possible eye to remove and simplify code.
"""
version = [int(val) for val in tf_full_version.split(".")]
if version < [1, 11]:
return "py2"
if version < [2, 2]:
return "py3"
def tf_full_py_version():
return "py37"


@pytest.fixture(scope="module")
def tf_serving_version(tf_full_version):
full_version = [int(val) for val in tf_full_version.split(".")]
if full_version < [2, 2]:
return tf_full_version
def tf_serving_version():
return "2.1.0"


Expand Down
33 changes: 17 additions & 16 deletions tests/integ/test_horovod.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,10 +15,10 @@
import json
import os
import tarfile
from six.moves.urllib.parse import urlparse

import boto3
import pytest
from six.moves.urllib.parse import urlparse

import sagemaker.utils
import tests.integ as integ
Expand All @@ -28,27 +28,28 @@
horovod_dir = os.path.join(os.path.dirname(__file__), "..", "data", "horovod")


@pytest.fixture(scope="module")
def gpu_instance_type(request):
return "ml.p2.xlarge"


@pytest.mark.canary_quick
def test_hvd_cpu(sagemaker_session, cpu_instance_type, tmpdir):
_create_and_fit_estimator(sagemaker_session, cpu_instance_type, tmpdir)
def test_hvd_cpu(sagemaker_session, tf_full_version, tf_full_py_version, cpu_instance_type, tmpdir):
_create_and_fit_estimator(
sagemaker_session, tf_full_version, tf_full_py_version, cpu_instance_type, tmpdir
)


@pytest.mark.canary_quick
@pytest.mark.skipif(
integ.test_region() in integ.TRAINING_NO_P2_REGIONS, reason="no ml.p2 instances in this region"
)
def test_hvd_gpu(sagemaker_session, gpu_instance_type, tmpdir):
_create_and_fit_estimator(sagemaker_session, gpu_instance_type, tmpdir)
def test_hvd_gpu(sagemaker_session, tf_full_version, tf_full_py_version, tmpdir):
_create_and_fit_estimator(
sagemaker_session, tf_full_version, tf_full_py_version, "ml.p2.xlarge", tmpdir
)


@pytest.mark.local_mode
@pytest.mark.parametrize("instances, processes", [[1, 2], (2, 1), (2, 2)])
def test_horovod_local_mode(sagemaker_local_session, instances, processes, tmpdir):
def test_horovod_local_mode(
sagemaker_local_session, tf_full_version, tf_full_py_version, instances, processes, tmpdir
):
output_path = "file://%s" % tmpdir
job_name = sagemaker.utils.unique_name_from_base("tf-horovod")
estimator = TensorFlow(
Expand All @@ -57,9 +58,9 @@ def test_horovod_local_mode(sagemaker_local_session, instances, processes, tmpdi
train_instance_count=2,
train_instance_type="local",
sagemaker_session=sagemaker_local_session,
py_version=integ.PYTHON_VERSION,
output_path=output_path,
framework_version="1.12",
framework_version=tf_full_version,
py_version=tf_full_py_version,
distributions={"mpi": {"enabled": True, "processes_per_host": processes}},
)

Expand Down Expand Up @@ -96,16 +97,16 @@ def extract_files_from_s3(s3_url, tmpdir, sagemaker_session):
tar_file.extractall(tmpdir)


def _create_and_fit_estimator(sagemaker_session, instance_type, tmpdir):
def _create_and_fit_estimator(sagemaker_session, tf_version, py_version, instance_type, tmpdir):
job_name = sagemaker.utils.unique_name_from_base("tf-horovod")
estimator = TensorFlow(
entry_point=os.path.join(horovod_dir, "hvd_basic.py"),
role="SageMakerRole",
train_instance_count=2,
train_instance_type=instance_type,
sagemaker_session=sagemaker_session,
py_version=integ.PYTHON_VERSION,
framework_version="1.12",
py_version=py_version,
framework_version=tf_version,
distributions={"mpi": {"enabled": True}},
)

Expand Down
17 changes: 9 additions & 8 deletions tests/integ/test_tf.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
from sagemaker.utils import unique_name_from_base, sagemaker_timestamp

import tests.integ
from tests.integ import kms_utils, timeout, PYTHON_VERSION
from tests.integ import kms_utils, timeout
from tests.integ.retry import retries
from tests.integ.s3_utils import assert_s3_files_exist

Expand Down Expand Up @@ -82,7 +82,7 @@ def test_mnist_with_checkpoint_config(
assert actual_training_checkpoint_config == expected_training_checkpoint_config


def test_server_side_encryption(sagemaker_session, tf_serving_version):
def test_server_side_encryption(sagemaker_session, tf_serving_version, tf_full_py_version):
with kms_utils.bucket_with_encryption(sagemaker_session, ROLE) as (bucket_with_kms, kms_key):
output_path = os.path.join(
bucket_with_kms, "test-server-side-encryption", time.strftime("%y%m%d-%H%M")
Expand All @@ -96,7 +96,7 @@ def test_server_side_encryption(sagemaker_session, tf_serving_version):
train_instance_type="ml.c5.xlarge",
sagemaker_session=sagemaker_session,
framework_version=tf_serving_version,
py_version=PYTHON_VERSION,
py_version=tf_full_py_version,
code_location=output_path,
output_path=output_path,
model_dir="/opt/ml/model",
Expand Down Expand Up @@ -147,16 +147,15 @@ def test_mnist_distributed(sagemaker_session, instance_type, tf_full_version, tf
)


def test_mnist_async(sagemaker_session, cpu_instance_type, tf_serving_version):
def test_mnist_async(sagemaker_session, cpu_instance_type, tf_serving_version, tf_full_py_version):
estimator = TensorFlow(
entry_point=SCRIPT,
role=ROLE,
train_instance_count=1,
train_instance_type="ml.c5.4xlarge",
py_version=PYTHON_VERSION,
sagemaker_session=sagemaker_session,
# testing py-sdk functionality, no need to run against all TF versions
framework_version=tf_serving_version,
py_version=tf_full_py_version,
tags=TAGS,
)
inputs = estimator.sagemaker_session.upload_data(
Expand Down Expand Up @@ -188,15 +187,17 @@ def test_mnist_async(sagemaker_session, cpu_instance_type, tf_serving_version):
_assert_model_name_match(sagemaker_session.sagemaker_client, endpoint_name, model_name)


def test_deploy_with_input_handlers(sagemaker_session, instance_type, tf_serving_version):
def test_deploy_with_input_handlers(
sagemaker_session, instance_type, tf_serving_version, tf_full_py_version
):
estimator = TensorFlow(
entry_point="training.py",
source_dir=TFS_RESOURCE_PATH,
role=ROLE,
train_instance_count=1,
train_instance_type=instance_type,
framework_version=tf_serving_version,
py_version=PYTHON_VERSION,
py_version=tf_full_py_version,
sagemaker_session=sagemaker_session,
tags=TAGS,
)
Expand Down
4 changes: 2 additions & 2 deletions tests/integ/test_transformer.py
Original file line number Diff line number Diff line change
Expand Up @@ -332,7 +332,7 @@ def test_transform_mxnet_logs(


def test_transform_tf_kms_network_isolation(
sagemaker_session, cpu_instance_type, tmpdir, tf_serving_version
sagemaker_session, cpu_instance_type, tmpdir, tf_serving_version, tf_full_py_version
):
data_path = os.path.join(DATA_DIR, "tensorflow_mnist")

Expand All @@ -342,7 +342,7 @@ def test_transform_tf_kms_network_isolation(
train_instance_count=1,
train_instance_type=cpu_instance_type,
framework_version=tf_serving_version,
py_version=PYTHON_VERSION,
py_version=tf_full_py_version,
sagemaker_session=sagemaker_session,
)

Expand Down
40 changes: 18 additions & 22 deletions tests/integ/test_tuner.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,8 +51,6 @@
from tests.integ.record_set import prepare_record_set_from_local_files
from tests.integ.timeout import timeout, timeout_and_delete_endpoint_by_name

DATA_PATH = os.path.join(DATA_DIR, "iris", "data")


@pytest.fixture(scope="module")
def kmeans_train_set(sagemaker_session):
Expand Down Expand Up @@ -588,9 +586,7 @@ def test_tuning_mxnet(sagemaker_session, mxnet_full_version, cpu_instance_type):


@pytest.mark.canary_quick
def test_tuning_tf_script_mode(
sagemaker_session, cpu_instance_type, tf_full_version, tf_full_py_version
):
def test_tuning_tf(sagemaker_session, cpu_instance_type, tf_full_version, tf_full_py_version):
resource_path = os.path.join(DATA_DIR, "tensorflow_mnist")
script_path = os.path.join(resource_path, "mnist.py")

Expand Down Expand Up @@ -622,7 +618,7 @@ def test_tuning_tf_script_mode(
path=os.path.join(resource_path, "data"), key_prefix="scriptmode/mnist"
)

tuning_job_name = unique_name_from_base("tune-tf-script-mode", max_length=32)
tuning_job_name = unique_name_from_base("tune-tf", max_length=32)
tuner.fit(inputs, job_name=tuning_job_name)

print("Started hyperparameter tuning job with name: " + tuning_job_name)
Expand All @@ -631,13 +627,15 @@ def test_tuning_tf_script_mode(
tuner.wait()


@pytest.mark.skipif(PYTHON_VERSION != "py2", reason="TensorFlow image supports only python 2.")
def test_tuning_tf_vpc_multi(sagemaker_session, cpu_instance_type):
def test_tuning_tf_vpc_multi(
sagemaker_session, cpu_instance_type, tf_full_version, tf_full_py_version
):
"""Test Tensorflow multi-instance using the same VpcConfig for training and inference"""
instance_type = cpu_instance_type
instance_count = 2

script_path = os.path.join(DATA_DIR, "iris", "iris-dnn-classifier.py")
resource_path = os.path.join(DATA_DIR, "tensorflow_mnist")
script_path = os.path.join(resource_path, "mnist.py")

ec2_client = sagemaker_session.boto_session.client("ec2")
subnet_ids, security_group_id = vpc_test_utils.get_or_create_vpc_resources(ec2_client)
Expand All @@ -646,41 +644,39 @@ def test_tuning_tf_vpc_multi(sagemaker_session, cpu_instance_type):
estimator = TensorFlow(
entry_point=script_path,
role="SageMakerRole",
training_steps=1,
evaluation_steps=1,
hyperparameters={"input_tensor_name": "inputs"},
framework_version=tf_full_version,
py_version=tf_full_py_version,
train_instance_count=instance_count,
train_instance_type=instance_type,
sagemaker_session=sagemaker_session,
base_job_name="test-vpc-tf",
subnets=subnet_ids,
security_group_ids=[security_group_id],
encrypt_inter_container_traffic=True,
framework_version="1.11",
py_version=PYTHON_VERSION,
)

inputs = sagemaker_session.upload_data(path=DATA_PATH, key_prefix="integ-test-data/tf_iris")
hyperparameter_ranges = {"learning_rate": ContinuousParameter(0.05, 0.2)}

objective_metric_name = "loss"
metric_definitions = [{"Name": "loss", "Regex": "loss = ([0-9\\.]+)"}]
hyperparameter_ranges = {"epochs": IntegerParameter(1, 2)}
objective_metric_name = "accuracy"
metric_definitions = [{"Name": objective_metric_name, "Regex": "accuracy = ([0-9\\.]+)"}]

tuner = HyperparameterTuner(
estimator,
objective_metric_name,
hyperparameter_ranges,
metric_definitions,
objective_type="Minimize",
max_jobs=2,
max_parallel_jobs=2,
)

tuning_job_name = unique_name_from_base("tune-tf", max_length=32)
with timeout(minutes=TUNING_DEFAULT_TIMEOUT_MINUTES):
inputs = estimator.sagemaker_session.upload_data(
path=os.path.join(resource_path, "data"), key_prefix="scriptmode/mnist"
)

tuning_job_name = unique_name_from_base("tune-tf", max_length=32)
tuner.fit(inputs, job_name=tuning_job_name)

print("Started hyperparameter tuning job with name:" + tuning_job_name)
print("Started hyperparameter tuning job with name: " + tuning_job_name)

time.sleep(15)
tuner.wait()
Expand Down

0 comments on commit 44a7ba2

Please sign in to comment.