diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
index 6a5a2cff5..09a8fc413 100644
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -65,13 +65,13 @@ jobs:
 
       - name: PyTest ("not long")
         run: |
-          poetry run pytest tests -v --cov=etna -m "not long" --cov-report=xml
-          poetry run pytest etna -v --doctest-modules
+          poetry run pytest tests -v --cov=etna -m "not long_1 and not long_2" --cov-report=xml --durations=10
+          poetry run pytest etna -v --doctest-modules --durations=10
 
       - name: Upload coverage
         uses: codecov/codecov-action@v2
 
-  long-test:
+  long-1-test:
       runs-on: ubuntu-latest
 
       steps:
@@ -103,7 +103,44 @@ jobs:
 
       - name: PyTest ("long")
         run: |
-          poetry run pytest tests -v --cov=etna -m "long" --cov-report=xml
+          poetry run pytest tests -v --cov=etna -m "long_1" --cov-report=xml --durations=10
+
+      - name: Upload coverage
+        uses: codecov/codecov-action@v2
+
+  long-2-test:
+      runs-on: ubuntu-latest
+
+      steps:
+      - uses: actions/checkout@v2
+
+      - name: Set up Python
+        id: setup-python
+        uses: actions/setup-python@v2
+        with:
+          python-version: 3.8
+
+      - name: Install Poetry
+        uses: snok/install-poetry@v1
+        with:
+          virtualenvs-create: true
+          virtualenvs-in-project: true
+
+      - name: Load cached venv
+        id: cached-poetry-dependencies
+        uses: actions/cache@v2
+        with:
+          path: .venv
+          key: venv-${{ runner.os }}-3.8-${{ hashFiles('**/poetry.lock') }}
+
+      - name: Install dependencies
+        if: steps.cached-poetry-dependencies.outputs.cache-hit != 'true'
+        run: |
+          poetry install -E "all tests" -vv
+
+      - name: PyTest ("long")
+        run: |
+          poetry run pytest tests -v --cov=etna -m "long_2" --cov-report=xml --durations=10
 
       - name: Upload coverage
         uses: codecov/codecov-action@v2
@@ -141,8 +178,8 @@ jobs:
 
       - name: PyTest ("tsdataset transforms")
         run: |
-          poetry run pytest tests/test_datasets -v --cov=etna --cov-report=xml
-          poetry run pytest tests/test_transforms -v --cov=etna --cov-report=xml
+          poetry run pytest tests/test_datasets -v --cov=etna --cov-report=xml --durations=10
+          poetry run pytest tests/test_transforms -v --cov=etna --cov-report=xml --durations=10
 
       - name: Upload coverage
         uses: codecov/codecov-action@v2
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 38f94105b..2931a57c0 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -17,7 +17,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 -
 -
 - Make slicing faster in `TSDataset._merge_exog`, `FilterFeaturesTransform`, `AddConstTransform`, `LambdaTransform`, `LagTransform`, `LogTransform`, `SklearnTransform`, `WindowStatisticsTransform`; make CICD test different pandas versions ([#900](https://github.com/tinkoff-ai/etna/pull/900))
--
+- 
+- 
+- 
+- Mark some tests as long ([#929](https://github.com/tinkoff-ai/etna/pull/929))
 -
 ### Fixed
 -
diff --git a/pyproject.toml b/pyproject.toml
index 8b53cdcad..71e6beec9 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -245,7 +245,8 @@ filterwarnings = [
 ]
 markers = [
     "smoke",
-    "long"
+    "long_1",
+    "long_2"
 ]
 
 [tool.mypy]
diff --git a/tests/test_ensembles/test_stacking_ensemble.py b/tests/test_ensembles/test_stacking_ensemble.py
index 833555d8e..781af2896 100644
--- a/tests/test_ensembles/test_stacking_ensemble.py
+++ b/tests/test_ensembles/test_stacking_ensemble.py
@@ -207,7 +207,7 @@ def test_forecast_prediction_interval_interface(example_tsds, naive_ensemble: St
         assert (segment_slice["target_0.975"] - segment_slice["target_0.025"] >= 0).all()
 
 
-@pytest.mark.long
+@pytest.mark.long_1
 def test_multiprocessing_ensembles(
     simple_df: TSDataset,
     catboost_pipeline: Pipeline,
@@ -229,6 +229,7 @@ def test_multiprocessing_ensembles(
     assert (single_jobs_forecast.df == multi_jobs_forecast.df).all().all()
 
 
+@pytest.mark.long_1
 @pytest.mark.parametrize("n_jobs", (1, 5))
 def test_backtest(stacking_ensemble_pipeline: StackingEnsemble, example_tsds: TSDataset, n_jobs: int):
     """Check that backtest works with StackingEnsemble."""
diff --git a/tests/test_ensembles/test_voting_ensemble.py b/tests/test_ensembles/test_voting_ensemble.py
index c624821ca..48fd16e41 100644
--- a/tests/test_ensembles/test_voting_ensemble.py
+++ b/tests/test_ensembles/test_voting_ensemble.py
@@ -115,7 +115,7 @@ def test_forecast_prediction_interval_interface(example_tsds, naive_pipeline_1,
         assert (segment_slice["target_0.975"] - segment_slice["target_0.025"] >= 0).all()
 
 
-@pytest.mark.long
+@pytest.mark.long_1
 def test_multiprocessing_ensembles(
     simple_df: TSDataset,
     catboost_pipeline: Pipeline,
@@ -137,6 +137,7 @@ def test_multiprocessing_ensembles(
     assert (single_jobs_forecast.df == multi_jobs_forecast.df).all().all()
 
 
+@pytest.mark.long_1
 @pytest.mark.parametrize("n_jobs", (1, 5))
 def test_backtest(voting_ensemble_pipeline: VotingEnsemble, example_tsds: TSDataset, n_jobs: int):
     """Check that backtest works with VotingEnsemble."""
diff --git a/tests/test_models/nn/test_deepar.py b/tests/test_models/nn/test_deepar.py
index 63eb2a823..63fa10f0a 100644
--- a/tests/test_models/nn/test_deepar.py
+++ b/tests/test_models/nn/test_deepar.py
@@ -30,7 +30,7 @@ def test_fit_wrong_order_transform(weekly_period_df):
         model.fit(ts)
 
 
-@pytest.mark.long
+@pytest.mark.long_2
 @pytest.mark.parametrize("horizon", [8, 21])
 def test_deepar_model_run_weekly_overfit(weekly_period_df, horizon):
     """
@@ -69,7 +69,7 @@ def test_deepar_model_run_weekly_overfit(weekly_period_df, horizon):
     assert mae(ts_test, ts_pred) < 0.2207
 
 
-@pytest.mark.long
+@pytest.mark.long_2
 @pytest.mark.parametrize("horizon", [8])
 def test_deepar_model_run_weekly_overfit_with_scaler(ts_dataset_weekly_function_with_horizon, horizon):
     """
diff --git a/tests/test_models/nn/test_rnn.py b/tests/test_models/nn/test_rnn.py
index d8db7a400..b7952cbaa 100644
--- a/tests/test_models/nn/test_rnn.py
+++ b/tests/test_models/nn/test_rnn.py
@@ -9,6 +9,7 @@
 from etna.transforms import StandardScalerTransform
 
 
+@pytest.mark.long_2
 @pytest.mark.parametrize("horizon", [8, 13])
 def test_rnn_model_run_weekly_overfit_with_scaler(ts_dataset_weekly_function_with_horizon, horizon):
     """
diff --git a/tests/test_models/nn/test_tft.py b/tests/test_models/nn/test_tft.py
index 5b6ee4601..795980039 100644
--- a/tests/test_models/nn/test_tft.py
+++ b/tests/test_models/nn/test_tft.py
@@ -30,7 +30,7 @@ def test_fit_wrong_order_transform(weekly_period_df):
         model.fit(ts)
 
 
-@pytest.mark.long
+@pytest.mark.long_2
 @pytest.mark.parametrize("horizon", [8, 21])
 def test_tft_model_run_weekly_overfit(ts_dataset_weekly_function_with_horizon, horizon):
     """
@@ -64,7 +64,7 @@ def test_tft_model_run_weekly_overfit(ts_dataset_weekly_function_with_horizon, h
     assert mae(ts_test, ts_pred) < 0.24
 
 
-@pytest.mark.long
+@pytest.mark.long_2
 @pytest.mark.parametrize("horizon", [8])
 def test_tft_model_run_weekly_overfit_with_scaler(ts_dataset_weekly_function_with_horizon, horizon):
     """
diff --git a/tests/test_models/test_inference.py b/tests/test_models/test_inference.py
index 7f43a484d..81b1c34b3 100644
--- a/tests/test_models/test_inference.py
+++ b/tests/test_models/test_inference.py
@@ -146,6 +146,7 @@ def _test_forecast_mixed_in_out_sample(ts, model, transforms):
     assert_frame_equal(forecast_out_sample_df, forecast_full_df.iloc[-5:])
 
 
+@pytest.mark.long_1
 @pytest.mark.parametrize(
     "model, transforms",
     [
@@ -165,6 +166,7 @@ def test_forecast_in_sample_full(model, transforms, example_tsds):
     _test_forecast_in_sample_full(example_tsds, model, transforms)
 
 
+@pytest.mark.long_1
 @pytest.mark.xfail(strict=True)
 @pytest.mark.parametrize(
     "model, transforms",
@@ -180,6 +182,7 @@ def test_forecast_in_sample_full_failed(model, transforms, example_tsds):
     _test_forecast_in_sample_full(example_tsds, model, transforms)
 
 
+@pytest.mark.long_1
 @pytest.mark.parametrize(
     "model, transforms",
     [
@@ -218,6 +221,7 @@ def test_forecast_in_sample_full_not_implemented(model, transforms, example_tsds
         _test_forecast_in_sample_full(example_tsds, model, transforms)
 
 
+@pytest.mark.long_1
 @pytest.mark.parametrize(
     "model, transforms",
     [
@@ -242,6 +246,7 @@ def test_forecast_in_sample_suffix(model, transforms, example_tsds):
     _test_forecast_in_sample_suffix(example_tsds, model, transforms)
 
 
+@pytest.mark.long_1
 @pytest.mark.parametrize(
     "model, transforms",
     [
@@ -280,6 +285,7 @@ def test_forecast_in_sample_suffix_not_implemented(model, transforms, example_ts
         _test_forecast_in_sample_suffix(example_tsds, model, transforms)
 
 
+@pytest.mark.long_1
 @pytest.mark.parametrize(
     "model, transforms",
     [
@@ -332,6 +338,7 @@ def test_forecast_out_sample_prefix(model, transforms, example_tsds):
     _test_forecast_out_sample_prefix(example_tsds, model, transforms)
 
 
+@pytest.mark.long_1
 @pytest.mark.parametrize(
     "model, transforms",
     [
@@ -355,6 +362,7 @@ def test_forecast_out_sample_suffix(model, transforms, example_tsds):
     _test_forecast_out_sample_suffix(example_tsds, model, transforms)
 
 
+@pytest.mark.long_1
 @pytest.mark.parametrize(
     "model, transforms",
     [
@@ -391,6 +399,7 @@ def test_forecast_out_sample_suffix_not_implemented(model, transforms, example_t
         _test_forecast_out_sample_suffix(example_tsds, model, transforms)
 
 
+@pytest.mark.long_1
 @pytest.mark.xfail(strict=True)
 @pytest.mark.parametrize(
     "model, transforms",
@@ -404,6 +413,7 @@ def test_forecast_out_sample_suffix_failed(model, transforms, example_tsds):
     _test_forecast_out_sample_suffix(example_tsds, model, transforms)
 
 
+@pytest.mark.long_1
 @pytest.mark.parametrize(
     "model, transforms",
     [
@@ -425,6 +435,7 @@ def test_forecast_mixed_in_out_sample(model, transforms, example_tsds):
     _test_forecast_mixed_in_out_sample(example_tsds, model, transforms)
 
 
+@pytest.mark.long_1
 @pytest.mark.parametrize(
     "model, transforms",
     [
diff --git a/tests/test_models/test_tbats.py b/tests/test_models/test_tbats.py
index 5ccdb4409..4a728162e 100644
--- a/tests/test_models/test_tbats.py
+++ b/tests/test_models/test_tbats.py
@@ -45,7 +45,7 @@ def sinusoid_ts():
     "model_class, model_class_repr",
     ((TBATSModel, "TBATSModel"), (BATSModel, "BATSModel")),
 )
-def test_reper(model_class, model_class_repr):
+def test_repr(model_class, model_class_repr):
     kwargs = {
         "use_box_cox": None,
         "box_cox_bounds": None,
@@ -84,6 +84,7 @@ def test_not_fitted(model, linear_segments_ts_unique):
         model.forecast(to_forecast)
 
 
+@pytest.mark.long_2
 @pytest.mark.parametrize("model", [TBATSModel(), BATSModel()])
 def test_format(model, new_format_df):
     df = new_format_df
@@ -96,6 +97,7 @@ def test_format(model, new_format_df):
     assert not future_ts.isnull().values.any()
 
 
+@pytest.mark.long_2
 @pytest.mark.parametrize("model", [TBATSModel(), BATSModel()])
 def test_dummy(model, sinusoid_ts):
     train, test = sinusoid_ts
@@ -107,6 +109,7 @@ def test_dummy(model, sinusoid_ts):
     assert value_metric < 0.33
 
 
+@pytest.mark.long_2
 @pytest.mark.parametrize("model", [TBATSModel(), BATSModel()])
 def test_prediction_interval(model, example_tsds):
     model.fit(example_tsds)
diff --git a/tests/test_pipeline/test_autoregressive_pipeline.py b/tests/test_pipeline/test_autoregressive_pipeline.py
index 980e996ec..cdc73929f 100644
--- a/tests/test_pipeline/test_autoregressive_pipeline.py
+++ b/tests/test_pipeline/test_autoregressive_pipeline.py
@@ -125,7 +125,7 @@ def test_forecast_raise_error_if_not_fitted():
         _ = pipeline.forecast()
 
 
-@pytest.mark.long
+@pytest.mark.long_1
 def test_backtest_with_n_jobs(big_example_tsdf: TSDataset):
     """Check that AutoRegressivePipeline.backtest gives the same results in case of single and multiple jobs modes."""
     # create a pipeline
diff --git a/tests/test_pipeline/test_pipeline.py b/tests/test_pipeline/test_pipeline.py
index d81bab272..bab42be9f 100644
--- a/tests/test_pipeline/test_pipeline.py
+++ b/tests/test_pipeline/test_pipeline.py
@@ -345,7 +345,7 @@ def test_get_fold_info_interface_hours(catboost_pipeline: Pipeline, example_tsdf
     assert expected_columns == sorted(info_df.columns)
 
 
-@pytest.mark.long
+@pytest.mark.long_1
 def test_backtest_with_n_jobs(catboost_pipeline: Pipeline, big_example_tsdf: TSDataset):
     """Check that Pipeline.backtest gives the same results in case of single and multiple jobs modes."""
     ts1 = deepcopy(big_example_tsdf)
@@ -530,6 +530,7 @@ def test_sanity_backtest_naive_with_intervals(weekly_period_ts):
     assert f"target_{quantiles[1]}" in features
 
 
+@pytest.mark.long_1
 def test_backtest_pass_with_filter_transform(ts_with_feature):
     ts = ts_with_feature