ludwig-ai · justinxzhao · Sep 13, 2023 · Sep 7, 2023 · Sep 7, 2023 · Sep 7, 2023
@@ -425,7 +425,64 @@ jobs:
 
       - name: Integration Tests (D)
         run: |
-          RUN_PRIVATE=$IS_NOT_FORK LUDWIG_TEST_SUITE_TIMEOUT_S=7200 pytest -v --timeout 300 --durations 100 -m "not slow and not combinatorial and not horovod and not llm and not integration_tests_a and not integration_tests_b and not integration_tests_c" --junitxml pytest.xml tests/integration_tests
+          RUN_PRIVATE=$IS_NOT_FORK LUDWIG_TEST_SUITE_TIMEOUT_S=7200 pytest -v --timeout 300 --durations 100 -m "not slow and not combinatorial and not horovod and not llm and integration_tests_d" --junitxml pytest.xml tests/integration_tests
+
+  integration-tests-e:
+    name: Integration Tests (E)
+    runs-on: ubuntu-latest
+
+    env:
+      AWS_ACCESS_KEY_ID: ${{ secrets.LUDWIG_TESTS_AWS_ACCESS_KEY_ID }}
+      AWS_SECRET_ACCESS_KEY: ${{ secrets.LUDWIG_TESTS_AWS_SECRET_ACCESS_KEY }}
+      KAGGLE_USERNAME: ${{ secrets.KAGGLE_USERNAME }}
+      KAGGLE_KEY: ${{ secrets.KAGGLE_KEY }}
+      IS_NOT_FORK: ${{ !(github.event.pull_request.base.repo.full_name == 'ludwig-ai/ludwig' && github.event.pull_request.head.repo.fork) }}
+
+    services:
+      minio:
+        image: fclairamb/minio-github-actions
+        env:
+          MINIO_ACCESS_KEY: minio
+          MINIO_SECRET_KEY: minio123
+        ports:
+          - 9000:9000
+
+    timeout-minutes: 90
+    steps:
+      - uses: actions/checkout@v2
+      - name: Set up Python 3.9
+        uses: actions/setup-python@v2
+        with:
+          python-version: 3.9
+
+      - name: Setup Linux
+        if: runner.os == 'linux'
+        run: |
+          sudo apt-get install -y cmake libsndfile1
+
+      - name: Setup macOS
+        if: runner.os == 'macOS'
+        run: |
+          brew install libuv
+
+      - name: Install dependencies
+        run: |
+          python --version
+          pip --version
+          python -m pip install -U pip
+
+          # remove torch and ray from the dependencies so we can add them depending on the matrix args for the job.
+          cat requirements.txt | sed '/^torch[>=<\b]/d' | sed '/^torchtext/d' | sed '/^torchvision/d' | sed '/^torchaudio/d' > requirements-temp && mv requirements-temp requirements.txt
+          cat requirements_distributed.txt | sed '/^ray[\[]/d'
+          pip install torch==2.0.0 torchtext torchvision torchaudio
+          pip install ray==2.3.0
+          pip install '.[test]'
+          pip list
+        shell: bash
+
+      - name: Integration Tests (E)
+        run: |
+          RUN_PRIVATE=$IS_NOT_FORK LUDWIG_TEST_SUITE_TIMEOUT_S=7200 pytest -v --timeout 300 --durations 100 -m "not slow and not combinatorial and not horovod and not llm and not integration_tests_a and not integration_tests_b and not integration_tests_c and not integration_tests_d" --junitxml pytest.xml tests/integration_tests
 
   llm-tests:
     name: LLM Tests

@@ -808,6 +808,17 @@ def train_online(
         self.model = self._online_trainer.train_online(training_dataset)
 
     def _tune_batch_size(self, trainer, dataset, random_seed: int = default_random_seed):
+        """Sets AUTO batch-size-related parameters based on the trainer, backend type, and number of workers.
+
+        Batch-size related parameters that are set:
+        - trainer.batch_size
+        - trainer.eval_batch_size
+        - trainer.gradient_accumulation_steps
+        - trainer.effective_batch_size
+
+        The final batch size selected may be non-deterministic even with a fixed random seed since throughput-based
+        heuristics may be affected by resources used by other processes running on the machine.
+        """
         if not self.config_obj.trainer.can_tune_batch_size():
             # Models like GBMs don't have batch sizes to be tuned
             return

@@ -356,6 +356,24 @@ def get_training_report(
 
 
 def get_rendered_batch_size_grad_accum(config: "BaseTrainerConfig", num_workers: int) -> Tuple[int, int]:
+    """Returns the batch size and gradient accumulation steps to use for training.
+
+    For batch_size==AUTO:
+    1. effective_batch_size is not AUTO and gradient_accumulation_steps is not AUTO:
+        batch size is set to the effective batch size divided by the gradient accumulation steps, divided by the
+        number of workers.
+    2. effective_batch_size is AUTO or gradient_accumulation_steps is AUTO:
+        batch size remains AUTO.
+
+    For gradient_accumulation_steps==AUTO:
+    1. batch size is AUTO:
+        gradient accumulation steps remains AUTO.
+    2. batch_size is not AUTO and effective batch size is not AUTO:
+        gradient accumulation steps is set to the effective batch size divided by the batch size, divided by the number
+        of workers.
+    3. batch size is not AUTO and effective batch size is AUTO:
+        gradient accumulation steps is set to 1.
+    """
     effective_batch_size = config.effective_batch_size
     batch_size = config.batch_size
     gradient_accumulation_steps = config.gradient_accumulation_steps

@@ -10,5 +10,6 @@ markers =
     integration_tests_a: mark a test to be run as part of integration tests, group A.
     integration_tests_b: mark a test to be run as part of integration tests, group B.
     integration_tests_c: mark a test to be run as part of integration tests, group C.
+    integration_tests_d: mark a test to be run as part of integration tests, group D.
 filterwarnings =
     ignore::DeprecationWarning
@@ -280,6 +280,7 @@ def test_autoconfig_preprocessing_text_image(tmpdir):
     assert config[INPUT_FEATURES][1][ENCODER][TYPE] == "stacked_cnn"
 
 
+@pytest.mark.slow
 @pytest.mark.distributed
 @pytest.mark.parametrize("time_budget", [200, 1], ids=["high", "low"])
 def test_train_with_config(time_budget, test_data_tabular_large, ray_cluster_2cpu, tmpdir):
@@ -301,6 +302,7 @@ def test_auto_train(test_data_tabular_large, ray_cluster_2cpu, tmpdir):
         assert trial.status != Trial.ERROR, f"Error in trial {trial}"
 
 
+@pytest.mark.slow
 @pytest.mark.parametrize("fs_protocol,bucket", [private_param(("s3", "ludwig-tests"))], ids=["s3"])
 def test_train_with_config_remote(fs_protocol, bucket, test_data_tabular_large, ray_cluster_2cpu):
     backend = {

@@ -9,6 +9,7 @@
 from tests.integration_tests.utils import binary_feature, generate_data, number_feature, run_test_suite, text_feature
 
 
+@pytest.mark.slow
 @pytest.mark.parametrize(
     "backend",
     [
@@ -29,6 +30,7 @@ def test_onehot_encoding(tmpdir, backend, ray_cluster_2cpu):
     run_test_suite(config, dataset, backend)
 
 
+@pytest.mark.slow
 @pytest.mark.parametrize(
     "backend",
     [
@@ -56,6 +58,7 @@ def test_hf_text_embedding(tmpdir, backend, ray_cluster_2cpu):
     run_test_suite(config, dataset, backend)
 
 
+@pytest.mark.slow
 @pytest.mark.parametrize("cache_encoder_embeddings", [True, False, None])
 @pytest.mark.parametrize("model_type", [MODEL_ECD, MODEL_GBM])
 def test_onehot_encoding_preprocessing(model_type, cache_encoder_embeddings, tmpdir):

@@ -24,7 +24,16 @@
 import pytest
 import yaml
 
-from ludwig.constants import BATCH_SIZE, COMBINER, INPUT_FEATURES, NAME, OUTPUT_FEATURES, PREPROCESSING, TRAINER
+from ludwig.constants import (
+    BATCH_SIZE,
+    COMBINER,
+    EVAL_BATCH_SIZE,
+    INPUT_FEATURES,
+    NAME,
+    OUTPUT_FEATURES,
+    PREPROCESSING,
+    TRAINER,
+)
 from ludwig.types import FeatureConfigDict
 from ludwig.utils.data_utils import load_yaml
 from tests.integration_tests.utils import category_feature, generate_data, number_feature, sequence_feature
@@ -66,7 +75,7 @@ def _prepare_data(csv_filename, config_filename):
         "input_features": input_features,
         "output_features": output_features,
         "combiner": {"type": "concat", "output_size": 14},
-        TRAINER: {"epochs": 2, BATCH_SIZE: 128},
+        TRAINER: {"epochs": 2, BATCH_SIZE: 128, EVAL_BATCH_SIZE: 128},
     }
 
     with open(config_filename, "w") as f:

@@ -63,7 +63,7 @@
     vector_feature,
 )
 
-pytestmark = pytest.mark.integration_tests_b
+pytestmark = pytest.mark.integration_tests_d
 
 logger = logging.getLogger(__name__)
 logger.setLevel(logging.INFO)
@@ -717,6 +717,7 @@ def test_experiment_model_resume(tmpdir):
     shutil.rmtree(output_dir, ignore_errors=True)
 
 
+@pytest.mark.slow
 @pytest.mark.parametrize(
     "dist_strategy",
     [
@@ -804,6 +805,7 @@ def test_experiment_model_resume_missing_file(tmpdir, missing_file):
     shutil.rmtree(output_dir, ignore_errors=True)
 
 
+@pytest.mark.slow
 @pytest.mark.distributed
 def test_experiment_model_resume_before_1st_epoch_distributed(tmpdir, ray_cluster_4cpu):
     # Single sequence input, single category output
@@ -853,6 +855,7 @@ def on_resume_training(self, is_coordinator):
         )
 
 
+@pytest.mark.slow
 @pytest.mark.distributed
 def test_tabnet_with_batch_size_1(tmpdir, ray_cluster_4cpu):
     input_features = [number_feature()]

@@ -31,7 +31,7 @@
 except ImportError:
     RayIntegratedGradientsExplainer = None
 
-pytestmark = pytest.mark.integration_tests_b
+pytestmark = pytest.mark.integration_tests_d
 
 
 def test_explanation_dataclass():
@@ -102,6 +102,7 @@ def test_explainer_api_ray(output_feature, tmpdir, ray_cluster_2cpu):
     )
 
 
+@pytest.mark.slow
 @pytest.mark.distributed
 def test_explainer_api_ray_minimum_batch_size(tmpdir, ray_cluster_2cpu):
     from ludwig.explain.captum_ray import RayIntegratedGradientsExplainer

@@ -102,6 +102,7 @@ def test_local_gbm_binary(tmpdir, local_backend):
     run_test_gbm_binary(tmpdir, local_backend)
 
 
+@pytest.mark.slow
 @pytest.mark.distributed
 def test_ray_gbm_binary(tmpdir, ray_backend, ray_cluster_5cpu):
     run_test_gbm_binary(tmpdir, ray_backend)
@@ -126,6 +127,7 @@ def test_local_gbm_non_number_inputs(tmpdir, local_backend):
     run_test_gbm_non_number_inputs(tmpdir, local_backend)
 
 
+@pytest.mark.slow
 @pytest.mark.distributed
 def test_ray_gbm_non_number_inputs(tmpdir, ray_backend, ray_cluster_5cpu):
     run_test_gbm_non_number_inputs(tmpdir, ray_backend)
@@ -151,6 +153,7 @@ def test_local_gbm_category(vocab_size, tmpdir, local_backend):
     run_test_gbm_category(vocab_size, tmpdir, local_backend)
 
 
+@pytest.mark.slow
 @pytest.mark.distributed
 @pytest.mark.parametrize("vocab_size", [2, 3])
 def test_ray_gbm_category(vocab_size, tmpdir, ray_backend, ray_cluster_5cpu):
@@ -362,6 +365,7 @@ def test_dart_boosting_type(tmpdir, local_backend):
     _train_and_predict_gbm(input_features, output_features, tmpdir, local_backend, boosting_type="dart")
 
 
+@pytest.mark.slow
 @pytest.mark.parametrize(
     "backend",
     [
@@ -388,6 +392,7 @@ def test_gbm_category_one_hot_encoding(tmpdir, backend, ray_cluster_4cpu):
     assert prob_col.apply(sum).mean() == pytest.approx(1.0)
 
 
+@pytest.mark.slow
 @pytest.mark.parametrize(
     "backend",
     [
@@ -437,6 +442,7 @@ def test_gbm_text_tfidf(tmpdir, backend, ray_cluster_4cpu):
 #     assert prob_col.apply(sum).mean() == pytest.approx(1.0)
 
 
+@pytest.mark.slow
 @pytest.mark.parametrize("feature_name", ["valid_feature_name", "Unnamed: 0", "{", "}", "[", "]"])
 @pytest.mark.parametrize("feature_type", ["input", "output"])
 @pytest.mark.parametrize(

@@ -368,6 +368,7 @@ def _run_hyperopt_run_hyperopt(csv_filename, search_space, tmpdir, backend, ray_
         assert "model" in os.listdir(path)
 
 
+@pytest.mark.slow
 @pytest.mark.parametrize("search_space", ["random", "grid"])
 def test_hyperopt_run_hyperopt(csv_filename, search_space, tmpdir, ray_cluster_7cpu):
     _run_hyperopt_run_hyperopt(csv_filename, search_space, tmpdir, "local", ray_cluster_7cpu)
@@ -582,6 +583,7 @@ def test_hyperopt_nested_parameters(csv_filename, tmpdir, ray_cluster_7cpu):
         assert trial_config[TRAINER]["learning_rate"] in {0.7, 0.42}
 
 
+@pytest.mark.slow
 def test_hyperopt_without_config_defaults(csv_filename, tmpdir, ray_cluster_7cpu):
     input_features = [category_feature(encoder={"vocab_size": 3})]
     output_features = [category_feature(decoder={"vocab_size": 3})]
@@ -613,6 +615,7 @@ def test_hyperopt_without_config_defaults(csv_filename, tmpdir, ray_cluster_7cpu
     assert hyperopt_results.experiment_analysis.results_df.shape[0] == 10
 
 
+@pytest.mark.slow
 def test_hyperopt_with_time_budget(csv_filename, tmpdir, ray_cluster_7cpu):
     """Tests that incomplete checkpoints created by RayTune when time budget is hit doesn't throw errors because of
     missing .tune_metadata files in the checkpoint directories."""

@@ -45,7 +45,7 @@
     Trial = None
     TuneCallback = object  # needed to set up HyperoptTestCallback when not distributed
 
-pytestmark = pytest.mark.integration_tests_a
+pytestmark = pytest.mark.integration_tests_d
 
 logger = logging.getLogger(__name__)
 logger.setLevel(logging.INFO)
@@ -204,6 +204,7 @@ def run_hyperopt_executor(
     hyperopt_executor.execute(config, dataset=rel_path, output_directory=tmpdir, backend=backend)
 
 
+@pytest.mark.slow
 @pytest.mark.distributed
 @pytest.mark.parametrize("scenario", SCENARIOS)
 def test_hyperopt_executor(scenario, csv_filename, tmpdir, ray_cluster_4cpu):
@@ -214,6 +215,7 @@ def test_hyperopt_executor(scenario, csv_filename, tmpdir, ray_cluster_4cpu):
     run_hyperopt_executor(search_alg, executor, epochs, csv_filename, tmpdir)
 
 
+@pytest.mark.slow
 @pytest.mark.distributed
 @pytest.mark.parametrize("use_split", [True, False], ids=["split", "no_split"])
 def test_hyperopt_executor_with_metric(use_split, csv_filename, tmpdir, ray_cluster_4cpu):
@@ -301,6 +303,7 @@ def on_epoch_start(self, trainer, progress_tracker, save_path: str):
     run_hyperopt(config, rel_path, tmpdir, callbacks=[CancelCallback()])
 
 
+@pytest.mark.slow
 @pytest.mark.distributed
 def test_hyperopt_ray_mlflow(csv_filename, tmpdir, ray_cluster_4cpu):
     mlflow_uri = f"file://{tmpdir}/mlruns"

@@ -236,6 +236,7 @@ def run_hyperopt_executor(
     )
 
 
+@pytest.mark.slow
 @pytest.mark.distributed
 def test_hyperopt_executor_variant_generator(csv_filename, ray_mock_dir, ray_cluster_7cpu):
     search_alg = SCENARIOS[0]["search_alg"]

@@ -51,6 +51,7 @@ def random_set_logits(*args, num_predict_samples, vocab_size, pct_positive, **kw
     return torch.tensor(logits, dtype=torch.float32)  # simulate torch model output
 
 
+@pytest.mark.slow
 @pytest.mark.parametrize(
     "backend",
     [
@@ -114,6 +115,7 @@ def test_binary_predictions(tmpdir, backend, distinct_values, ray_cluster_2cpu):
         assert np.allclose(prob_0, 1 - prob_1)
 
 
+@pytest.mark.slow
 @pytest.mark.parametrize(
     "backend",
     [