aws · laurenyu · Jun 25, 2020 · Jun 10, 2020 · Jun 24, 2020 · Jun 24, 2020
diff --git a/tests/conftest.py b/tests/conftest.py
@@ -19,6 +19,7 @@
 import pytest
 import tests.integ
 from botocore.config import Config
+from packaging.version import Version
 
 from sagemaker import Session, utils
 from sagemaker.local import LocalSession
@@ -57,7 +58,6 @@ def pytest_addoption(parser):
         "--rl-ray-full-version", action="store", default=RLEstimator.RAY_LATEST_VERSION
     )
     parser.addoption("--sklearn-full-version", action="store", default="0.20.0")
-    parser.addoption("--tf-full-version", action="store", default="2.2.0")
     parser.addoption("--ei-tf-full-version", action="store")
     parser.addoption("--xgboost-full-version", action="store", default="1.0-1")
 
@@ -304,35 +304,45 @@ def sklearn_full_version(request):
 
 
 @pytest.fixture(scope="module")
-def tf_full_version(request):
-    return request.config.getoption("--tf-full-version")
+def tf_training_latest_version():
+    return "2.2.0"
+
+
+@pytest.fixture(scope="module")
+def tf_training_latest_py_version():
+    return "py37"
+
+
+@pytest.fixture(scope="module")
+def tf_serving_latest_version():
+    return "2.1.0"
+
+
+@pytest.fixture(scope="module")
+def tf_full_version(tf_training_latest_version, tf_serving_latest_version):
+    """Fixture for TF tests that test both training and inference.
+
+    Fixture exists as such, since TF training and TFS have different latest versions.
+    Otherwise, this would simply be a single latest version.
+    """
+    return str(min(Version(tf_training_latest_version), Version(tf_serving_latest_version)))
 
 
 @pytest.fixture(scope="module")
 def tf_full_py_version(tf_full_version):
-    """fixture to match tf_full_version
+    """Fixture to match tf_full_version
 
-    Fixture exists as such, since tf_full_version may be overridden --tf-full-version.
+    Fixture exists as such, since TF training and TFS have different latest versions.
     Otherwise, this would simply be py37 to match the latest version support.
-
-    TODO: Evaluate use of --tf-full-version with possible eye to remove and simplify code.
     """
-    version = [int(val) for val in tf_full_version.split(".")]
-    if version < [1, 11]:
+    version = Version(tf_full_version)
+    if version < Version("1.11"):
         return "py2"
-    if version < [2, 2]:
+    if version < Version("2.2"):
         return "py3"
     return "py37"
 
 
-@pytest.fixture(scope="module")
-def tf_serving_version(tf_full_version):
-    full_version = [int(val) for val in tf_full_version.split(".")]
-    if full_version < [2, 2]:
-        return tf_full_version
-    return "2.1.0"
-
-
 @pytest.fixture(scope="module", params=["1.15.0", "2.0.0"])
 def ei_tf_full_version(request):
     tf_ei_version = request.config.getoption("--ei-tf-full-version")

diff --git a/tests/integ/test_airflow_config.py b/tests/integ/test_airflow_config.py
@@ -512,7 +512,7 @@ def test_sklearn_airflow_config_uploads_data_source_to_s3(
 
 @pytest.mark.canary_quick
 def test_tf_airflow_config_uploads_data_source_to_s3(
-    sagemaker_session, cpu_instance_type, tf_full_version, tf_full_py_version
+    sagemaker_session, cpu_instance_type, tf_training_latest_version, tf_training_latest_py_version
 ):
     with timeout(seconds=AIRFLOW_CONFIG_TIMEOUT_IN_SECONDS):
         tf = TensorFlow(
@@ -524,8 +524,8 @@ def test_tf_airflow_config_uploads_data_source_to_s3(
             train_instance_count=SINGLE_INSTANCE_COUNT,
             train_instance_type=cpu_instance_type,
             sagemaker_session=sagemaker_session,
-            framework_version=tf_full_version,
-            py_version=tf_full_py_version,
+            framework_version=tf_training_latest_version,
+            py_version=tf_training_latest_py_version,
             metric_definitions=[
                 {"Name": "train:global_steps", "Regex": r"global_step\/sec:\s(.*)"}
             ],

diff --git a/tests/integ/test_data_capture_config.py b/tests/integ/test_data_capture_config.py
@@ -41,7 +41,7 @@
 
 
 def test_enabling_data_capture_on_endpoint_shows_correct_data_capture_status(
-    sagemaker_session, tf_serving_version
+    sagemaker_session, tf_serving_latest_version
 ):
     endpoint_name = unique_name_from_base("sagemaker-tensorflow-serving")
     model_data = sagemaker_session.upload_data(
@@ -52,7 +52,7 @@ def test_enabling_data_capture_on_endpoint_shows_correct_data_capture_status(
         model = TensorFlowModel(
             model_data=model_data,
             role=ROLE,
-            framework_version=tf_serving_version,
+            framework_version=tf_serving_latest_version,
             sagemaker_session=sagemaker_session,
         )
         predictor = model.deploy(
@@ -98,7 +98,7 @@ def test_enabling_data_capture_on_endpoint_shows_correct_data_capture_status(
 
 
 def test_disabling_data_capture_on_endpoint_shows_correct_data_capture_status(
-    sagemaker_session, tf_serving_version
+    sagemaker_session, tf_serving_latest_version
 ):
     endpoint_name = unique_name_from_base("sagemaker-tensorflow-serving")
     model_data = sagemaker_session.upload_data(
@@ -109,7 +109,7 @@ def test_disabling_data_capture_on_endpoint_shows_correct_data_capture_status(
         model = TensorFlowModel(
             model_data=model_data,
             role=ROLE,
-            framework_version=tf_serving_version,
+            framework_version=tf_serving_latest_version,
             sagemaker_session=sagemaker_session,
         )
         destination_s3_uri = os.path.join(
@@ -184,7 +184,7 @@ def test_disabling_data_capture_on_endpoint_shows_correct_data_capture_status(
 
 
 def test_updating_data_capture_on_endpoint_shows_correct_data_capture_status(
-    sagemaker_session, tf_serving_version
+    sagemaker_session, tf_serving_latest_version
 ):
     endpoint_name = sagemaker.utils.unique_name_from_base("sagemaker-tensorflow-serving")
     model_data = sagemaker_session.upload_data(
@@ -195,7 +195,7 @@ def test_updating_data_capture_on_endpoint_shows_correct_data_capture_status(
         model = TensorFlowModel(
             model_data=model_data,
             role=ROLE,
-            framework_version=tf_serving_version,
+            framework_version=tf_serving_latest_version,
             sagemaker_session=sagemaker_session,
         )
         destination_s3_uri = os.path.join(

diff --git a/tests/integ/test_horovod.py b/tests/integ/test_horovod.py
@@ -15,10 +15,10 @@
 import json
 import os
 import tarfile
-from six.moves.urllib.parse import urlparse
 
 import boto3
 import pytest
+from six.moves.urllib.parse import urlparse
 
 import sagemaker.utils
 import tests.integ as integ
@@ -28,27 +28,49 @@
 horovod_dir = os.path.join(os.path.dirname(__file__), "..", "data", "horovod")
 
 
-@pytest.fixture(scope="module")
-def gpu_instance_type(request):
-    return "ml.p2.xlarge"
-
-
 @pytest.mark.canary_quick
-def test_hvd_cpu(sagemaker_session, cpu_instance_type, tmpdir):
-    _create_and_fit_estimator(sagemaker_session, cpu_instance_type, tmpdir)
+def test_hvd_cpu(
+    sagemaker_session,
+    tf_training_latest_version,
+    tf_training_latest_py_version,
+    cpu_instance_type,
+    tmpdir,
+):
+    _create_and_fit_estimator(
+        sagemaker_session,
+        tf_training_latest_version,
+        tf_training_latest_py_version,
+        cpu_instance_type,
+        tmpdir,
+    )
 
 
 @pytest.mark.canary_quick
 @pytest.mark.skipif(
     integ.test_region() in integ.TRAINING_NO_P2_REGIONS, reason="no ml.p2 instances in this region"
 )
-def test_hvd_gpu(sagemaker_session, gpu_instance_type, tmpdir):
-    _create_and_fit_estimator(sagemaker_session, gpu_instance_type, tmpdir)
+def test_hvd_gpu(
+    sagemaker_session, tf_training_latest_version, tf_training_latest_py_version, tmpdir
+):
+    _create_and_fit_estimator(
+        sagemaker_session,
+        tf_training_latest_version,
+        tf_training_latest_py_version,
+        "ml.p2.xlarge",
+        tmpdir,
+    )
 
 
 @pytest.mark.local_mode
 @pytest.mark.parametrize("instances, processes", [[1, 2], (2, 1), (2, 2)])
-def test_horovod_local_mode(sagemaker_local_session, instances, processes, tmpdir):
+def test_horovod_local_mode(
+    sagemaker_local_session,
+    tf_training_latest_version,
+    tf_training_latest_py_version,
+    instances,
+    processes,
+    tmpdir,
+):
     output_path = "file://%s" % tmpdir
     job_name = sagemaker.utils.unique_name_from_base("tf-horovod")
     estimator = TensorFlow(
@@ -57,9 +79,9 @@ def test_horovod_local_mode(sagemaker_local_session, instances, processes, tmpdi
         train_instance_count=2,
         train_instance_type="local",
         sagemaker_session=sagemaker_local_session,
-        py_version=integ.PYTHON_VERSION,
         output_path=output_path,
-        framework_version="1.12",
+        framework_version=tf_training_latest_version,
+        py_version=tf_training_latest_py_version,
         distributions={"mpi": {"enabled": True, "processes_per_host": processes}},
     )
 
@@ -96,16 +118,16 @@ def extract_files_from_s3(s3_url, tmpdir, sagemaker_session):
         tar_file.extractall(tmpdir)
 
 
-def _create_and_fit_estimator(sagemaker_session, instance_type, tmpdir):
+def _create_and_fit_estimator(sagemaker_session, tf_version, py_version, instance_type, tmpdir):
     job_name = sagemaker.utils.unique_name_from_base("tf-horovod")
     estimator = TensorFlow(
         entry_point=os.path.join(horovod_dir, "hvd_basic.py"),
         role="SageMakerRole",
         train_instance_count=2,
         train_instance_type=instance_type,
         sagemaker_session=sagemaker_session,
-        py_version=integ.PYTHON_VERSION,
-        framework_version="1.12",
+        py_version=py_version,
+        framework_version=tf_version,
         distributions={"mpi": {"enabled": True}},
     )
 

diff --git a/tests/integ/test_model_monitor.py b/tests/integ/test_model_monitor.py
@@ -88,7 +88,7 @@
 
 
 @pytest.fixture(scope="module")
-def predictor(sagemaker_session, tf_serving_version):
+def predictor(sagemaker_session, tf_serving_latest_version):
     endpoint_name = unique_name_from_base("sagemaker-tensorflow-serving")
     model_data = sagemaker_session.upload_data(
         path=os.path.join(tests.integ.DATA_DIR, "tensorflow-serving-test-model.tar.gz"),
@@ -100,7 +100,7 @@ def predictor(sagemaker_session, tf_serving_version):
         model = TensorFlowModel(
             model_data=model_data,
             role=ROLE,
-            framework_version=tf_serving_version,
+            framework_version=tf_serving_latest_version,
             sagemaker_session=sagemaker_session,
         )
         predictor = model.deploy(

diff --git a/tests/integ/test_tf.py b/tests/integ/test_tf.py
@@ -22,7 +22,7 @@
 from sagemaker.utils import unique_name_from_base, sagemaker_timestamp
 
 import tests.integ
-from tests.integ import kms_utils, timeout, PYTHON_VERSION
+from tests.integ import kms_utils, timeout
 from tests.integ.retry import retries
 from tests.integ.s3_utils import assert_s3_files_exist
 
@@ -39,7 +39,7 @@
 
 
 def test_mnist_with_checkpoint_config(
-    sagemaker_session, instance_type, tf_full_version, tf_full_py_version
+    sagemaker_session, instance_type, tf_training_latest_version, tf_training_latest_py_version
 ):
     checkpoint_s3_uri = "s3://{}/checkpoints/tf-{}".format(
         sagemaker_session.default_bucket(), sagemaker_timestamp()
@@ -51,8 +51,8 @@ def test_mnist_with_checkpoint_config(
         train_instance_count=1,
         train_instance_type=instance_type,
         sagemaker_session=sagemaker_session,
-        framework_version=tf_full_version,
-        py_version=tf_full_py_version,
+        framework_version=tf_training_latest_version,
+        py_version=tf_training_latest_py_version,
         metric_definitions=[{"Name": "train:global_steps", "Regex": r"global_step\/sec:\s(.*)"}],
         checkpoint_s3_uri=checkpoint_s3_uri,
         checkpoint_local_path=checkpoint_local_path,
@@ -82,7 +82,7 @@ def test_mnist_with_checkpoint_config(
     assert actual_training_checkpoint_config == expected_training_checkpoint_config
 
 
-def test_server_side_encryption(sagemaker_session, tf_serving_version):
+def test_server_side_encryption(sagemaker_session, tf_full_version, tf_full_py_version):
     with kms_utils.bucket_with_encryption(sagemaker_session, ROLE) as (bucket_with_kms, kms_key):
         output_path = os.path.join(
             bucket_with_kms, "test-server-side-encryption", time.strftime("%y%m%d-%H%M")
@@ -95,8 +95,8 @@ def test_server_side_encryption(sagemaker_session, tf_serving_version):
             train_instance_count=1,
             train_instance_type="ml.c5.xlarge",
             sagemaker_session=sagemaker_session,
-            framework_version=tf_serving_version,
-            py_version=PYTHON_VERSION,
+            framework_version=tf_full_version,
+            py_version=tf_full_py_version,
             code_location=output_path,
             output_path=output_path,
             model_dir="/opt/ml/model",
@@ -123,15 +123,17 @@ def test_server_side_encryption(sagemaker_session, tf_serving_version):
 
 
 @pytest.mark.canary_quick
-def test_mnist_distributed(sagemaker_session, instance_type, tf_full_version, tf_full_py_version):
+def test_mnist_distributed(
+    sagemaker_session, instance_type, tf_training_latest_version, tf_training_latest_py_version
+):
     estimator = TensorFlow(
         entry_point=SCRIPT,
         role=ROLE,
         train_instance_count=2,
         train_instance_type=instance_type,
         sagemaker_session=sagemaker_session,
-        framework_version=tf_full_version,
-        py_version=tf_full_py_version,
+        framework_version=tf_training_latest_version,
+        py_version=tf_training_latest_py_version,
         distributions=PARAMETER_SERVER_DISTRIBUTION,
     )
     inputs = estimator.sagemaker_session.upload_data(
@@ -147,16 +149,15 @@ def test_mnist_distributed(sagemaker_session, instance_type, tf_full_version, tf
     )
 
 
-def test_mnist_async(sagemaker_session, cpu_instance_type, tf_serving_version):
+def test_mnist_async(sagemaker_session, cpu_instance_type, tf_full_version, tf_full_py_version):
     estimator = TensorFlow(
         entry_point=SCRIPT,
         role=ROLE,
         train_instance_count=1,
         train_instance_type="ml.c5.4xlarge",
-        py_version=PYTHON_VERSION,
         sagemaker_session=sagemaker_session,
-        # testing py-sdk functionality, no need to run against all TF versions
-        framework_version=tf_serving_version,
+        framework_version=tf_full_version,
+        py_version=tf_full_py_version,
         tags=TAGS,
     )
     inputs = estimator.sagemaker_session.upload_data(
@@ -188,15 +189,17 @@ def test_mnist_async(sagemaker_session, cpu_instance_type, tf_serving_version):
         _assert_model_name_match(sagemaker_session.sagemaker_client, endpoint_name, model_name)
 
 
-def test_deploy_with_input_handlers(sagemaker_session, instance_type, tf_serving_version):
+def test_deploy_with_input_handlers(
+    sagemaker_session, instance_type, tf_full_version, tf_full_py_version
+):
     estimator = TensorFlow(
         entry_point="training.py",
         source_dir=TFS_RESOURCE_PATH,
         role=ROLE,
         train_instance_count=1,
         train_instance_type=instance_type,
-        framework_version=tf_serving_version,
-        py_version=PYTHON_VERSION,
+        framework_version=tf_full_version,
+        py_version=tf_full_py_version,
         sagemaker_session=sagemaker_session,
         tags=TAGS,
     )