diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 6a5a2cff5..09a8fc413 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -65,13 +65,13 @@ jobs: - name: PyTest ("not long") run: | - poetry run pytest tests -v --cov=etna -m "not long" --cov-report=xml - poetry run pytest etna -v --doctest-modules + poetry run pytest tests -v --cov=etna -m "not long_1 and not long_2" --cov-report=xml --durations=10 + poetry run pytest etna -v --doctest-modules --durations=10 - name: Upload coverage uses: codecov/codecov-action@v2 - long-test: + long-1-test: runs-on: ubuntu-latest steps: @@ -103,7 +103,44 @@ jobs: - name: PyTest ("long") run: | - poetry run pytest tests -v --cov=etna -m "long" --cov-report=xml + poetry run pytest tests -v --cov=etna -m "long_1" --cov-report=xml --durations=10 + + - name: Upload coverage + uses: codecov/codecov-action@v2 + + long-2-test: + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v2 + + - name: Set up Python + id: setup-python + uses: actions/setup-python@v2 + with: + python-version: 3.8 + + - name: Install Poetry + uses: snok/install-poetry@v1 + with: + virtualenvs-create: true + virtualenvs-in-project: true + + - name: Load cached venv + id: cached-poetry-dependencies + uses: actions/cache@v2 + with: + path: .venv + key: venv-${{ runner.os }}-3.8-${{ hashFiles('**/poetry.lock') }} + + - name: Install dependencies + if: steps.cached-poetry-dependencies.outputs.cache-hit != 'true' + run: | + poetry install -E "all tests" -vv + + - name: PyTest ("long") + run: | + poetry run pytest tests -v --cov=etna -m "long_2" --cov-report=xml --durations=10 - name: Upload coverage uses: codecov/codecov-action@v2 @@ -141,8 +178,8 @@ jobs: - name: PyTest ("tsdataset transforms") run: | - poetry run pytest tests/test_datasets -v --cov=etna --cov-report=xml - poetry run pytest tests/test_transforms -v --cov=etna --cov-report=xml + poetry run pytest tests/test_datasets -v --cov=etna --cov-report=xml --durations=10 + poetry run pytest tests/test_transforms -v --cov=etna --cov-report=xml --durations=10 - name: Upload coverage uses: codecov/codecov-action@v2 diff --git a/CHANGELOG.md b/CHANGELOG.md index 38f94105b..2931a57c0 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -17,7 +17,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - - - Make slicing faster in `TSDataset._merge_exog`, `FilterFeaturesTransform`, `AddConstTransform`, `LambdaTransform`, `LagTransform`, `LogTransform`, `SklearnTransform`, `WindowStatisticsTransform`; make CICD test different pandas versions ([#900](https://github.com/tinkoff-ai/etna/pull/900)) -- +- +- +- +- Mark some tests as long ([#929](https://github.com/tinkoff-ai/etna/pull/929)) - ### Fixed - diff --git a/pyproject.toml b/pyproject.toml index 8b53cdcad..71e6beec9 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -245,7 +245,8 @@ filterwarnings = [ ] markers = [ "smoke", - "long" + "long_1", + "long_2" ] [tool.mypy] diff --git a/tests/test_ensembles/test_stacking_ensemble.py b/tests/test_ensembles/test_stacking_ensemble.py index 833555d8e..781af2896 100644 --- a/tests/test_ensembles/test_stacking_ensemble.py +++ b/tests/test_ensembles/test_stacking_ensemble.py @@ -207,7 +207,7 @@ def test_forecast_prediction_interval_interface(example_tsds, naive_ensemble: St assert (segment_slice["target_0.975"] - segment_slice["target_0.025"] >= 0).all() -@pytest.mark.long +@pytest.mark.long_1 def test_multiprocessing_ensembles( simple_df: TSDataset, catboost_pipeline: Pipeline, @@ -229,6 +229,7 @@ def test_multiprocessing_ensembles( assert (single_jobs_forecast.df == multi_jobs_forecast.df).all().all() +@pytest.mark.long_1 @pytest.mark.parametrize("n_jobs", (1, 5)) def test_backtest(stacking_ensemble_pipeline: StackingEnsemble, example_tsds: TSDataset, n_jobs: int): """Check that backtest works with StackingEnsemble.""" diff --git a/tests/test_ensembles/test_voting_ensemble.py b/tests/test_ensembles/test_voting_ensemble.py index c624821ca..48fd16e41 100644 --- a/tests/test_ensembles/test_voting_ensemble.py +++ b/tests/test_ensembles/test_voting_ensemble.py @@ -115,7 +115,7 @@ def test_forecast_prediction_interval_interface(example_tsds, naive_pipeline_1, assert (segment_slice["target_0.975"] - segment_slice["target_0.025"] >= 0).all() -@pytest.mark.long +@pytest.mark.long_1 def test_multiprocessing_ensembles( simple_df: TSDataset, catboost_pipeline: Pipeline, @@ -137,6 +137,7 @@ def test_multiprocessing_ensembles( assert (single_jobs_forecast.df == multi_jobs_forecast.df).all().all() +@pytest.mark.long_1 @pytest.mark.parametrize("n_jobs", (1, 5)) def test_backtest(voting_ensemble_pipeline: VotingEnsemble, example_tsds: TSDataset, n_jobs: int): """Check that backtest works with VotingEnsemble.""" diff --git a/tests/test_models/nn/test_deepar.py b/tests/test_models/nn/test_deepar.py index 63eb2a823..63fa10f0a 100644 --- a/tests/test_models/nn/test_deepar.py +++ b/tests/test_models/nn/test_deepar.py @@ -30,7 +30,7 @@ def test_fit_wrong_order_transform(weekly_period_df): model.fit(ts) -@pytest.mark.long +@pytest.mark.long_2 @pytest.mark.parametrize("horizon", [8, 21]) def test_deepar_model_run_weekly_overfit(weekly_period_df, horizon): """ @@ -69,7 +69,7 @@ def test_deepar_model_run_weekly_overfit(weekly_period_df, horizon): assert mae(ts_test, ts_pred) < 0.2207 -@pytest.mark.long +@pytest.mark.long_2 @pytest.mark.parametrize("horizon", [8]) def test_deepar_model_run_weekly_overfit_with_scaler(ts_dataset_weekly_function_with_horizon, horizon): """ diff --git a/tests/test_models/nn/test_rnn.py b/tests/test_models/nn/test_rnn.py index d8db7a400..b7952cbaa 100644 --- a/tests/test_models/nn/test_rnn.py +++ b/tests/test_models/nn/test_rnn.py @@ -9,6 +9,7 @@ from etna.transforms import StandardScalerTransform +@pytest.mark.long_2 @pytest.mark.parametrize("horizon", [8, 13]) def test_rnn_model_run_weekly_overfit_with_scaler(ts_dataset_weekly_function_with_horizon, horizon): """ diff --git a/tests/test_models/nn/test_tft.py b/tests/test_models/nn/test_tft.py index 5b6ee4601..795980039 100644 --- a/tests/test_models/nn/test_tft.py +++ b/tests/test_models/nn/test_tft.py @@ -30,7 +30,7 @@ def test_fit_wrong_order_transform(weekly_period_df): model.fit(ts) -@pytest.mark.long +@pytest.mark.long_2 @pytest.mark.parametrize("horizon", [8, 21]) def test_tft_model_run_weekly_overfit(ts_dataset_weekly_function_with_horizon, horizon): """ @@ -64,7 +64,7 @@ def test_tft_model_run_weekly_overfit(ts_dataset_weekly_function_with_horizon, h assert mae(ts_test, ts_pred) < 0.24 -@pytest.mark.long +@pytest.mark.long_2 @pytest.mark.parametrize("horizon", [8]) def test_tft_model_run_weekly_overfit_with_scaler(ts_dataset_weekly_function_with_horizon, horizon): """ diff --git a/tests/test_models/test_inference.py b/tests/test_models/test_inference.py index 7f43a484d..81b1c34b3 100644 --- a/tests/test_models/test_inference.py +++ b/tests/test_models/test_inference.py @@ -146,6 +146,7 @@ def _test_forecast_mixed_in_out_sample(ts, model, transforms): assert_frame_equal(forecast_out_sample_df, forecast_full_df.iloc[-5:]) +@pytest.mark.long_1 @pytest.mark.parametrize( "model, transforms", [ @@ -165,6 +166,7 @@ def test_forecast_in_sample_full(model, transforms, example_tsds): _test_forecast_in_sample_full(example_tsds, model, transforms) +@pytest.mark.long_1 @pytest.mark.xfail(strict=True) @pytest.mark.parametrize( "model, transforms", @@ -180,6 +182,7 @@ def test_forecast_in_sample_full_failed(model, transforms, example_tsds): _test_forecast_in_sample_full(example_tsds, model, transforms) +@pytest.mark.long_1 @pytest.mark.parametrize( "model, transforms", [ @@ -218,6 +221,7 @@ def test_forecast_in_sample_full_not_implemented(model, transforms, example_tsds _test_forecast_in_sample_full(example_tsds, model, transforms) +@pytest.mark.long_1 @pytest.mark.parametrize( "model, transforms", [ @@ -242,6 +246,7 @@ def test_forecast_in_sample_suffix(model, transforms, example_tsds): _test_forecast_in_sample_suffix(example_tsds, model, transforms) +@pytest.mark.long_1 @pytest.mark.parametrize( "model, transforms", [ @@ -280,6 +285,7 @@ def test_forecast_in_sample_suffix_not_implemented(model, transforms, example_ts _test_forecast_in_sample_suffix(example_tsds, model, transforms) +@pytest.mark.long_1 @pytest.mark.parametrize( "model, transforms", [ @@ -332,6 +338,7 @@ def test_forecast_out_sample_prefix(model, transforms, example_tsds): _test_forecast_out_sample_prefix(example_tsds, model, transforms) +@pytest.mark.long_1 @pytest.mark.parametrize( "model, transforms", [ @@ -355,6 +362,7 @@ def test_forecast_out_sample_suffix(model, transforms, example_tsds): _test_forecast_out_sample_suffix(example_tsds, model, transforms) +@pytest.mark.long_1 @pytest.mark.parametrize( "model, transforms", [ @@ -391,6 +399,7 @@ def test_forecast_out_sample_suffix_not_implemented(model, transforms, example_t _test_forecast_out_sample_suffix(example_tsds, model, transforms) +@pytest.mark.long_1 @pytest.mark.xfail(strict=True) @pytest.mark.parametrize( "model, transforms", @@ -404,6 +413,7 @@ def test_forecast_out_sample_suffix_failed(model, transforms, example_tsds): _test_forecast_out_sample_suffix(example_tsds, model, transforms) +@pytest.mark.long_1 @pytest.mark.parametrize( "model, transforms", [ @@ -425,6 +435,7 @@ def test_forecast_mixed_in_out_sample(model, transforms, example_tsds): _test_forecast_mixed_in_out_sample(example_tsds, model, transforms) +@pytest.mark.long_1 @pytest.mark.parametrize( "model, transforms", [ diff --git a/tests/test_models/test_tbats.py b/tests/test_models/test_tbats.py index 5ccdb4409..4a728162e 100644 --- a/tests/test_models/test_tbats.py +++ b/tests/test_models/test_tbats.py @@ -45,7 +45,7 @@ def sinusoid_ts(): "model_class, model_class_repr", ((TBATSModel, "TBATSModel"), (BATSModel, "BATSModel")), ) -def test_reper(model_class, model_class_repr): +def test_repr(model_class, model_class_repr): kwargs = { "use_box_cox": None, "box_cox_bounds": None, @@ -84,6 +84,7 @@ def test_not_fitted(model, linear_segments_ts_unique): model.forecast(to_forecast) +@pytest.mark.long_2 @pytest.mark.parametrize("model", [TBATSModel(), BATSModel()]) def test_format(model, new_format_df): df = new_format_df @@ -96,6 +97,7 @@ def test_format(model, new_format_df): assert not future_ts.isnull().values.any() +@pytest.mark.long_2 @pytest.mark.parametrize("model", [TBATSModel(), BATSModel()]) def test_dummy(model, sinusoid_ts): train, test = sinusoid_ts @@ -107,6 +109,7 @@ def test_dummy(model, sinusoid_ts): assert value_metric < 0.33 +@pytest.mark.long_2 @pytest.mark.parametrize("model", [TBATSModel(), BATSModel()]) def test_prediction_interval(model, example_tsds): model.fit(example_tsds) diff --git a/tests/test_pipeline/test_autoregressive_pipeline.py b/tests/test_pipeline/test_autoregressive_pipeline.py index 980e996ec..cdc73929f 100644 --- a/tests/test_pipeline/test_autoregressive_pipeline.py +++ b/tests/test_pipeline/test_autoregressive_pipeline.py @@ -125,7 +125,7 @@ def test_forecast_raise_error_if_not_fitted(): _ = pipeline.forecast() -@pytest.mark.long +@pytest.mark.long_1 def test_backtest_with_n_jobs(big_example_tsdf: TSDataset): """Check that AutoRegressivePipeline.backtest gives the same results in case of single and multiple jobs modes.""" # create a pipeline diff --git a/tests/test_pipeline/test_pipeline.py b/tests/test_pipeline/test_pipeline.py index d81bab272..bab42be9f 100644 --- a/tests/test_pipeline/test_pipeline.py +++ b/tests/test_pipeline/test_pipeline.py @@ -345,7 +345,7 @@ def test_get_fold_info_interface_hours(catboost_pipeline: Pipeline, example_tsdf assert expected_columns == sorted(info_df.columns) -@pytest.mark.long +@pytest.mark.long_1 def test_backtest_with_n_jobs(catboost_pipeline: Pipeline, big_example_tsdf: TSDataset): """Check that Pipeline.backtest gives the same results in case of single and multiple jobs modes.""" ts1 = deepcopy(big_example_tsdf) @@ -530,6 +530,7 @@ def test_sanity_backtest_naive_with_intervals(weekly_period_ts): assert f"target_{quantiles[1]}" in features +@pytest.mark.long_1 def test_backtest_pass_with_filter_transform(ts_with_feature): ts = ts_with_feature