Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[CI] Explicitly set eval batch size in determinism tests, introduce a new integration test group, and exclude slow tests. #3590

Merged
merged 17 commits into from
Sep 13, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
59 changes: 58 additions & 1 deletion .github/workflows/pytest.yml
Original file line number Diff line number Diff line change
Expand Up @@ -425,7 +425,64 @@ jobs:

- name: Integration Tests (D)
run: |
RUN_PRIVATE=$IS_NOT_FORK LUDWIG_TEST_SUITE_TIMEOUT_S=7200 pytest -v --timeout 300 --durations 100 -m "not slow and not combinatorial and not horovod and not llm and not integration_tests_a and not integration_tests_b and not integration_tests_c" --junitxml pytest.xml tests/integration_tests
RUN_PRIVATE=$IS_NOT_FORK LUDWIG_TEST_SUITE_TIMEOUT_S=7200 pytest -v --timeout 300 --durations 100 -m "not slow and not combinatorial and not horovod and not llm and integration_tests_d" --junitxml pytest.xml tests/integration_tests

integration-tests-e:
name: Integration Tests (E)
runs-on: ubuntu-latest

env:
AWS_ACCESS_KEY_ID: ${{ secrets.LUDWIG_TESTS_AWS_ACCESS_KEY_ID }}
AWS_SECRET_ACCESS_KEY: ${{ secrets.LUDWIG_TESTS_AWS_SECRET_ACCESS_KEY }}
KAGGLE_USERNAME: ${{ secrets.KAGGLE_USERNAME }}
KAGGLE_KEY: ${{ secrets.KAGGLE_KEY }}
IS_NOT_FORK: ${{ !(github.event.pull_request.base.repo.full_name == 'ludwig-ai/ludwig' && github.event.pull_request.head.repo.fork) }}

services:
minio:
image: fclairamb/minio-github-actions
env:
MINIO_ACCESS_KEY: minio
MINIO_SECRET_KEY: minio123
ports:
- 9000:9000

timeout-minutes: 90
steps:
- uses: actions/checkout@v2
- name: Set up Python 3.9
uses: actions/setup-python@v2
with:
python-version: 3.9

- name: Setup Linux
if: runner.os == 'linux'
run: |
sudo apt-get install -y cmake libsndfile1

- name: Setup macOS
if: runner.os == 'macOS'
run: |
brew install libuv

- name: Install dependencies
run: |
python --version
pip --version
python -m pip install -U pip

# remove torch and ray from the dependencies so we can add them depending on the matrix args for the job.
cat requirements.txt | sed '/^torch[>=<\b]/d' | sed '/^torchtext/d' | sed '/^torchvision/d' | sed '/^torchaudio/d' > requirements-temp && mv requirements-temp requirements.txt
cat requirements_distributed.txt | sed '/^ray[\[]/d'
pip install torch==2.0.0 torchtext torchvision torchaudio
pip install ray==2.3.0
pip install '.[test]'
pip list
shell: bash

- name: Integration Tests (E)
run: |
RUN_PRIVATE=$IS_NOT_FORK LUDWIG_TEST_SUITE_TIMEOUT_S=7200 pytest -v --timeout 300 --durations 100 -m "not slow and not combinatorial and not horovod and not llm and not integration_tests_a and not integration_tests_b and not integration_tests_c and not integration_tests_d" --junitxml pytest.xml tests/integration_tests

llm-tests:
name: LLM Tests
Expand Down
11 changes: 11 additions & 0 deletions ludwig/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -808,6 +808,17 @@ def train_online(
self.model = self._online_trainer.train_online(training_dataset)

def _tune_batch_size(self, trainer, dataset, random_seed: int = default_random_seed):
"""Sets AUTO batch-size-related parameters based on the trainer, backend type, and number of workers.

Batch-size related parameters that are set:
- trainer.batch_size
- trainer.eval_batch_size
- trainer.gradient_accumulation_steps
- trainer.effective_batch_size

The final batch size selected may be non-deterministic even with a fixed random seed since throughput-based
heuristics may be affected by resources used by other processes running on the machine.
"""
if not self.config_obj.trainer.can_tune_batch_size():
# Models like GBMs don't have batch sizes to be tuned
return
Expand Down
18 changes: 18 additions & 0 deletions ludwig/utils/trainer_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -356,6 +356,24 @@ def get_training_report(


def get_rendered_batch_size_grad_accum(config: "BaseTrainerConfig", num_workers: int) -> Tuple[int, int]:
"""Returns the batch size and gradient accumulation steps to use for training.

For batch_size==AUTO:
1. effective_batch_size is not AUTO and gradient_accumulation_steps is not AUTO:
batch size is set to the effective batch size divided by the gradient accumulation steps, divided by the
number of workers.
2. effective_batch_size is AUTO or gradient_accumulation_steps is AUTO:
batch size remains AUTO.

For gradient_accumulation_steps==AUTO:
1. batch size is AUTO:
gradient accumulation steps remains AUTO.
2. batch_size is not AUTO and effective batch size is not AUTO:
gradient accumulation steps is set to the effective batch size divided by the batch size, divided by the number
of workers.
3. batch size is not AUTO and effective batch size is AUTO:
gradient accumulation steps is set to 1.
"""
effective_batch_size = config.effective_batch_size
batch_size = config.batch_size
gradient_accumulation_steps = config.gradient_accumulation_steps
Expand Down
1 change: 1 addition & 0 deletions pytest.ini
Original file line number Diff line number Diff line change
Expand Up @@ -10,5 +10,6 @@ markers =
integration_tests_a: mark a test to be run as part of integration tests, group A.
integration_tests_b: mark a test to be run as part of integration tests, group B.
integration_tests_c: mark a test to be run as part of integration tests, group C.
integration_tests_d: mark a test to be run as part of integration tests, group D.
filterwarnings =
ignore::DeprecationWarning
2 changes: 2 additions & 0 deletions tests/integration_tests/test_automl.py
Original file line number Diff line number Diff line change
Expand Up @@ -280,6 +280,7 @@ def test_autoconfig_preprocessing_text_image(tmpdir):
assert config[INPUT_FEATURES][1][ENCODER][TYPE] == "stacked_cnn"


@pytest.mark.slow
@pytest.mark.distributed
@pytest.mark.parametrize("time_budget", [200, 1], ids=["high", "low"])
def test_train_with_config(time_budget, test_data_tabular_large, ray_cluster_2cpu, tmpdir):
Expand All @@ -301,6 +302,7 @@ def test_auto_train(test_data_tabular_large, ray_cluster_2cpu, tmpdir):
assert trial.status != Trial.ERROR, f"Error in trial {trial}"


@pytest.mark.slow
@pytest.mark.parametrize("fs_protocol,bucket", [private_param(("s3", "ludwig-tests"))], ids=["s3"])
def test_train_with_config_remote(fs_protocol, bucket, test_data_tabular_large, ray_cluster_2cpu):
backend = {
Expand Down
3 changes: 3 additions & 0 deletions tests/integration_tests/test_cached_preprocessing.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
from tests.integration_tests.utils import binary_feature, generate_data, number_feature, run_test_suite, text_feature


@pytest.mark.slow
@pytest.mark.parametrize(
"backend",
[
Expand All @@ -29,6 +30,7 @@ def test_onehot_encoding(tmpdir, backend, ray_cluster_2cpu):
run_test_suite(config, dataset, backend)


@pytest.mark.slow
@pytest.mark.parametrize(
"backend",
[
Expand Down Expand Up @@ -56,6 +58,7 @@ def test_hf_text_embedding(tmpdir, backend, ray_cluster_2cpu):
run_test_suite(config, dataset, backend)


@pytest.mark.slow
@pytest.mark.parametrize("cache_encoder_embeddings", [True, False, None])
@pytest.mark.parametrize("model_type", [MODEL_ECD, MODEL_GBM])
def test_onehot_encoding_preprocessing(model_type, cache_encoder_embeddings, tmpdir):
Expand Down
13 changes: 11 additions & 2 deletions tests/integration_tests/test_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,16 @@
import pytest
import yaml

from ludwig.constants import BATCH_SIZE, COMBINER, INPUT_FEATURES, NAME, OUTPUT_FEATURES, PREPROCESSING, TRAINER
from ludwig.constants import (
BATCH_SIZE,
COMBINER,
EVAL_BATCH_SIZE,
INPUT_FEATURES,
NAME,
OUTPUT_FEATURES,
PREPROCESSING,
TRAINER,
)
from ludwig.types import FeatureConfigDict
from ludwig.utils.data_utils import load_yaml
from tests.integration_tests.utils import category_feature, generate_data, number_feature, sequence_feature
Expand Down Expand Up @@ -66,7 +75,7 @@ def _prepare_data(csv_filename, config_filename):
"input_features": input_features,
"output_features": output_features,
"combiner": {"type": "concat", "output_size": 14},
TRAINER: {"epochs": 2, BATCH_SIZE: 128},
TRAINER: {"epochs": 2, BATCH_SIZE: 128, EVAL_BATCH_SIZE: 128},
}

with open(config_filename, "w") as f:
Expand Down
5 changes: 4 additions & 1 deletion tests/integration_tests/test_experiment.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@
vector_feature,
)

pytestmark = pytest.mark.integration_tests_b
pytestmark = pytest.mark.integration_tests_d

logger = logging.getLogger(__name__)
logger.setLevel(logging.INFO)
Expand Down Expand Up @@ -717,6 +717,7 @@ def test_experiment_model_resume(tmpdir):
shutil.rmtree(output_dir, ignore_errors=True)


@pytest.mark.slow
@pytest.mark.parametrize(
"dist_strategy",
[
Expand Down Expand Up @@ -804,6 +805,7 @@ def test_experiment_model_resume_missing_file(tmpdir, missing_file):
shutil.rmtree(output_dir, ignore_errors=True)


@pytest.mark.slow
@pytest.mark.distributed
def test_experiment_model_resume_before_1st_epoch_distributed(tmpdir, ray_cluster_4cpu):
# Single sequence input, single category output
Expand Down Expand Up @@ -853,6 +855,7 @@ def on_resume_training(self, is_coordinator):
)


@pytest.mark.slow
@pytest.mark.distributed
def test_tabnet_with_batch_size_1(tmpdir, ray_cluster_4cpu):
input_features = [number_feature()]
Expand Down
3 changes: 2 additions & 1 deletion tests/integration_tests/test_explain.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@
except ImportError:
RayIntegratedGradientsExplainer = None

pytestmark = pytest.mark.integration_tests_b
pytestmark = pytest.mark.integration_tests_d


def test_explanation_dataclass():
Expand Down Expand Up @@ -102,6 +102,7 @@ def test_explainer_api_ray(output_feature, tmpdir, ray_cluster_2cpu):
)


@pytest.mark.slow
@pytest.mark.distributed
def test_explainer_api_ray_minimum_batch_size(tmpdir, ray_cluster_2cpu):
from ludwig.explain.captum_ray import RayIntegratedGradientsExplainer
Expand Down
6 changes: 6 additions & 0 deletions tests/integration_tests/test_gbm.py
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,7 @@ def test_local_gbm_binary(tmpdir, local_backend):
run_test_gbm_binary(tmpdir, local_backend)


@pytest.mark.slow
@pytest.mark.distributed
def test_ray_gbm_binary(tmpdir, ray_backend, ray_cluster_5cpu):
run_test_gbm_binary(tmpdir, ray_backend)
Expand All @@ -126,6 +127,7 @@ def test_local_gbm_non_number_inputs(tmpdir, local_backend):
run_test_gbm_non_number_inputs(tmpdir, local_backend)


@pytest.mark.slow
@pytest.mark.distributed
def test_ray_gbm_non_number_inputs(tmpdir, ray_backend, ray_cluster_5cpu):
run_test_gbm_non_number_inputs(tmpdir, ray_backend)
Expand All @@ -151,6 +153,7 @@ def test_local_gbm_category(vocab_size, tmpdir, local_backend):
run_test_gbm_category(vocab_size, tmpdir, local_backend)


@pytest.mark.slow
@pytest.mark.distributed
@pytest.mark.parametrize("vocab_size", [2, 3])
def test_ray_gbm_category(vocab_size, tmpdir, ray_backend, ray_cluster_5cpu):
Expand Down Expand Up @@ -362,6 +365,7 @@ def test_dart_boosting_type(tmpdir, local_backend):
_train_and_predict_gbm(input_features, output_features, tmpdir, local_backend, boosting_type="dart")


@pytest.mark.slow
@pytest.mark.parametrize(
"backend",
[
Expand All @@ -388,6 +392,7 @@ def test_gbm_category_one_hot_encoding(tmpdir, backend, ray_cluster_4cpu):
assert prob_col.apply(sum).mean() == pytest.approx(1.0)


@pytest.mark.slow
@pytest.mark.parametrize(
"backend",
[
Expand Down Expand Up @@ -437,6 +442,7 @@ def test_gbm_text_tfidf(tmpdir, backend, ray_cluster_4cpu):
# assert prob_col.apply(sum).mean() == pytest.approx(1.0)


@pytest.mark.slow
@pytest.mark.parametrize("feature_name", ["valid_feature_name", "Unnamed: 0", "{", "}", "[", "]"])
@pytest.mark.parametrize("feature_type", ["input", "output"])
@pytest.mark.parametrize(
Expand Down
3 changes: 3 additions & 0 deletions tests/integration_tests/test_hyperopt.py
Original file line number Diff line number Diff line change
Expand Up @@ -368,6 +368,7 @@ def _run_hyperopt_run_hyperopt(csv_filename, search_space, tmpdir, backend, ray_
assert "model" in os.listdir(path)


@pytest.mark.slow
@pytest.mark.parametrize("search_space", ["random", "grid"])
def test_hyperopt_run_hyperopt(csv_filename, search_space, tmpdir, ray_cluster_7cpu):
_run_hyperopt_run_hyperopt(csv_filename, search_space, tmpdir, "local", ray_cluster_7cpu)
Expand Down Expand Up @@ -582,6 +583,7 @@ def test_hyperopt_nested_parameters(csv_filename, tmpdir, ray_cluster_7cpu):
assert trial_config[TRAINER]["learning_rate"] in {0.7, 0.42}


@pytest.mark.slow
def test_hyperopt_without_config_defaults(csv_filename, tmpdir, ray_cluster_7cpu):
input_features = [category_feature(encoder={"vocab_size": 3})]
output_features = [category_feature(decoder={"vocab_size": 3})]
Expand Down Expand Up @@ -613,6 +615,7 @@ def test_hyperopt_without_config_defaults(csv_filename, tmpdir, ray_cluster_7cpu
assert hyperopt_results.experiment_analysis.results_df.shape[0] == 10


@pytest.mark.slow
def test_hyperopt_with_time_budget(csv_filename, tmpdir, ray_cluster_7cpu):
"""Tests that incomplete checkpoints created by RayTune when time budget is hit doesn't throw errors because of
missing .tune_metadata files in the checkpoint directories."""
Expand Down
5 changes: 4 additions & 1 deletion tests/integration_tests/test_hyperopt_ray.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@
Trial = None
TuneCallback = object # needed to set up HyperoptTestCallback when not distributed

pytestmark = pytest.mark.integration_tests_a
pytestmark = pytest.mark.integration_tests_d

logger = logging.getLogger(__name__)
logger.setLevel(logging.INFO)
Expand Down Expand Up @@ -204,6 +204,7 @@ def run_hyperopt_executor(
hyperopt_executor.execute(config, dataset=rel_path, output_directory=tmpdir, backend=backend)


@pytest.mark.slow
@pytest.mark.distributed
@pytest.mark.parametrize("scenario", SCENARIOS)
def test_hyperopt_executor(scenario, csv_filename, tmpdir, ray_cluster_4cpu):
Expand All @@ -214,6 +215,7 @@ def test_hyperopt_executor(scenario, csv_filename, tmpdir, ray_cluster_4cpu):
run_hyperopt_executor(search_alg, executor, epochs, csv_filename, tmpdir)


@pytest.mark.slow
@pytest.mark.distributed
@pytest.mark.parametrize("use_split", [True, False], ids=["split", "no_split"])
def test_hyperopt_executor_with_metric(use_split, csv_filename, tmpdir, ray_cluster_4cpu):
Expand Down Expand Up @@ -301,6 +303,7 @@ def on_epoch_start(self, trainer, progress_tracker, save_path: str):
run_hyperopt(config, rel_path, tmpdir, callbacks=[CancelCallback()])


@pytest.mark.slow
@pytest.mark.distributed
def test_hyperopt_ray_mlflow(csv_filename, tmpdir, ray_cluster_4cpu):
mlflow_uri = f"file://{tmpdir}/mlruns"
Expand Down
1 change: 1 addition & 0 deletions tests/integration_tests/test_hyperopt_ray_horovod.py
Original file line number Diff line number Diff line change
Expand Up @@ -236,6 +236,7 @@ def run_hyperopt_executor(
)


@pytest.mark.slow
@pytest.mark.distributed
def test_hyperopt_executor_variant_generator(csv_filename, ray_mock_dir, ray_cluster_7cpu):
search_alg = SCENARIOS[0]["search_alg"]
Expand Down
2 changes: 2 additions & 0 deletions tests/integration_tests/test_postprocessing.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@ def random_set_logits(*args, num_predict_samples, vocab_size, pct_positive, **kw
return torch.tensor(logits, dtype=torch.float32) # simulate torch model output


@pytest.mark.slow
@pytest.mark.parametrize(
"backend",
[
Expand Down Expand Up @@ -114,6 +115,7 @@ def test_binary_predictions(tmpdir, backend, distinct_values, ray_cluster_2cpu):
assert np.allclose(prob_0, 1 - prob_1)


@pytest.mark.slow
@pytest.mark.parametrize(
"backend",
[
Expand Down
Loading
Loading