tinkoff-ai · Mr-Geekman · Mar 29, 2023 · Mar 27, 2023 · Mar 27, 2023 · Mar 27, 2023
diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
@@ -33,7 +33,7 @@ jobs:
     runs-on: ubuntu-latest
     strategy:
       matrix:
-        python-version: [3.7, 3.8, 3.9, '3.10']
+        python-version: [3.8, 3.9, '3.10']
       fail-fast: false
 
     steps:
@@ -59,10 +59,12 @@ jobs:
           path: .venv
           key: venv-${{ runner.os }}-${{ matrix.python-version }}-${{ hashFiles('**/poetry.lock') }}
 
+      # TODO: remove pip install after optuna fix
       - name: Install dependencies
         if: steps.cached-poetry-dependencies.outputs.cache-hit != 'true'
         run: |
           poetry install -E "all tests" -vv
+          poetry run pip install "sqlalchemy>=1.4,<2"
 
       - name: PyTest ("not long")
         run: |
@@ -210,7 +212,7 @@ jobs:
       - name: Install dependencies
         run: |
           poetry install -E "all tests" -vv
-          pip install "pandas${{ matrix.pandas-version }}"
+          poetry run pip install "pandas${{ matrix.pandas-version }}"
 
       - name: PyTest ("tsdataset transforms")
         run: |

diff --git a/.gitignore b/.gitignore
@@ -76,8 +76,8 @@ celerybeat-schedule
 /.env
 
 # virtualenv
-.venv/
-venv/
+.venv*/
+venv*/
 ENV/
 
 # Spyder project settings
@@ -126,6 +126,9 @@ checkpoints/
 # macOS
 .DS_Store
 
+# poetry configuration
+poetry.toml
+
 *.html
 .devcontainer
 /docs/source/api/

diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -42,6 +42,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 - Add more scenarios into tests for models ([#1082](https://github.com/tinkoff-ai/etna/pull/1082))
 - Decouple `SeasonalMovingAverageModel` from `PerSegmentModelMixin` ([#1132](https://github.com/tinkoff-ai/etna/pull/1132))
 - Decouple `DeadlineMovingAverageModel` from `PerSegmentModelMixin` ([#1140](https://github.com/tinkoff-ai/etna/pull/1140))
+- Remove version python-3.7 from `pyproject.toml`, update lock ([#1183](https://github.com/tinkoff-ai/etna/pull/1183))
 ### Fixed
 - Fix bug in `GaleShapleyFeatureSelectionTransform` with wrong number of remaining features ([#1110](https://github.com/tinkoff-ai/etna/pull/1110))
 - `ProphetModel` fails with additional seasonality set ([#1157](https://github.com/tinkoff-ai/etna/pull/1157))

diff --git a/etna/analysis/eda_utils.py b/etna/analysis/eda_utils.py
@@ -816,7 +816,7 @@ def seasonal_plot(
 
         # draw ticks if they are not digits
         if not np.all(seasonal_df["in_cycle_name"].str.isnumeric()):
-            ticks_dict = {key: value for key, value in zip(seasonal_df["in_cycle_num"], seasonal_df["in_cycle_name"])}
+            ticks_dict = dict(zip(seasonal_df["in_cycle_num"], seasonal_df["in_cycle_name"]))
             ticks = np.array(list(ticks_dict.keys()))
             ticks_labels = np.array(list(ticks_dict.values()))
             idx_sort = np.argsort(ticks)

diff --git a/etna/analysis/outliers/median_outliers.py b/etna/analysis/outliers/median_outliers.py
@@ -36,7 +36,7 @@ def get_anomalies_median(
     outliers_per_segment = {}
     segments = ts.segments
     for seg in segments:
-        anomalies = []
+        anomalies: typing.List[int] = []
 
         segment_df = ts.df[seg].reset_index()
         values = segment_df[in_column].values

diff --git a/etna/clustering/distances/euclidean_distance.py b/etna/clustering/distances/euclidean_distance.py
@@ -11,7 +11,7 @@
 
 
 @numba.cfunc(numba.float64(numba.float64[:], numba.float64[:]))
-def euclidean_distance(x1: np.ndarray, x2: np.ndarray) -> float:
+def euclidean_distance(x1: np.ndarray, x2: np.ndarray) -> np.floating:
     """Get euclidean distance between two arrays.
 
     Parameters
@@ -23,7 +23,7 @@ def euclidean_distance(x1: np.ndarray, x2: np.ndarray) -> float:
 
     Returns
     -------
-    float:
+    :
         distance between x1 and x2
     """
     return np.linalg.norm(x1 - x2)

diff --git a/etna/datasets/tsdataset.py b/etna/datasets/tsdataset.py
@@ -601,7 +601,7 @@ def to_flatten(df: pd.DataFrame, features: Union[Literal["all"], Sequence[str]]
         columns = df.columns.get_level_values("feature").unique()
 
         # flatten dataframe
-        df_dict = {}
+        df_dict: Dict[str, Any] = {}
         df_dict["timestamp"] = np.tile(df.index, len(segments))
         df_dict["segment"] = np.repeat(segments, len(df.index))
         if "target" in columns:
@@ -1060,9 +1060,10 @@ def drop_features(self, features: List[str], drop_from_exog: bool = False):
             columns_in_df = df.columns.get_level_values("feature")
             columns_to_remove = list(set(columns_in_df) & set(features))
             unknown_columns = set(features) - set(columns_to_remove)
-            if len(unknown_columns) != 0:
+            if len(unknown_columns) > 0:
                 warnings.warn(f"Features {unknown_columns} are not present in {name}!")
-            df.drop(columns=columns_to_remove, level="feature", inplace=True)
+            if len(columns_to_remove) > 0:
+                df.drop(columns=columns_to_remove, level="feature", inplace=True)
         self._regressors = list(set(self._regressors) - set(features))
 
     @property

diff --git a/etna/experimental/classification/feature_extraction/weasel.py b/etna/experimental/classification/feature_extraction/weasel.py
@@ -110,7 +110,7 @@ def __init__(
         self._min_series_len: Optional[int] = None
         self._sfa_list: List[SymbolicFourierApproximation] = []
         self._vectorizer_list: List[CountVectorizer] = []
-        self._relevant_features_list: List[int] = []
+        self._relevant_features_list: List[List[int]] = []
         self._vocabulary: Dict[int, str] = {}
         self._sfa = SymbolicFourierApproximation(
             n_coefs=self.word_size,
@@ -131,7 +131,8 @@ def _windowed_view(
         """Create the samples of length window_size with window_step."""
         n_samples = len(x)
         n_windows_per_sample = [((len(x[i]) - window_size + window_step) // window_step) for i in range(n_samples)]
-        n_windows_per_sample_cum = np.asarray(np.concatenate(([0], np.cumsum(n_windows_per_sample))))
+        # problem with `np.concatenate` typing: `[0]` can't be used instead of `np.array([0])`
+        n_windows_per_sample_cum = np.asarray(np.concatenate((np.array([0]), np.cumsum(n_windows_per_sample))))
         x_windowed = np.asarray(
             np.concatenate(
                 [sliding_window_view(series[::-1], window_shape=window_size)[::window_step][::-1, ::-1] for series in x]
@@ -186,7 +187,7 @@ def fit(self, x: List[np.ndarray], y: Optional[np.ndarray] = None) -> "CustomWEA
             for i, idx in enumerate(relevant_features):
                 self._vocabulary[i + old_length_vocab] = str(window_size) + " " + vocabulary[idx]
 
-            self._relevant_features_list.append(relevant_features)
+            self._relevant_features_list.append(relevant_features.tolist())
             self._sfa_list.append(sfa)
             self._vectorizer_list.append(vectorizer)
 

diff --git a/etna/transforms/encoders/mean_segment_encoder.py b/etna/transforms/encoders/mean_segment_encoder.py
@@ -3,6 +3,7 @@
 from typing import List
 from typing import Optional
 
+import numpy as np
 import pandas as pd
 
 from etna.transforms import IrreversibleTransform
@@ -74,7 +75,10 @@ def _transform(self, df: pd.DataFrame) -> pd.DataFrame:
         df = self.mean_encoder._transform(df)
         segment = segments[0]
         nan_timestamps = df[df.loc[:, self.idx[segment, "target"]].isna()].index
-        df.loc[nan_timestamps, self.idx[:, "segment_mean"]] = [self.global_means[x] for x in segments]
+        values_to_set = np.array([self.global_means[x] for x in segments])
+        # repetition isn't necessary for pandas >= 1.2
+        values_to_set = np.repeat(values_to_set[np.newaxis, :], len(nan_timestamps), axis=0)
+        df.loc[nan_timestamps, self.idx[:, "segment_mean"]] = values_to_set
         return df
 
     def get_regressors_info(self) -> List[str]:

diff --git a/etna/transforms/math/differencing.py b/etna/transforms/math/differencing.py
@@ -184,7 +184,7 @@ def _reconstruct_train(self, df: pd.DataFrame, columns_to_inverse: Set[str]) ->
                 cur_series = result_df.loc[:, pd.IndexSlice[current_segment, column]]
                 cur_series[init_segment.index] = init_segment.values
                 cur_series = self._make_inv_diff(cur_series)
-                result_df.loc[cur_series.index, pd.IndexSlice[current_segment, column]] = cur_series
+                result_df.loc[:, pd.IndexSlice[current_segment, column]] = cur_series
         return result_df
 
     def _reconstruct_test(self, df: pd.DataFrame, columns_to_inverse: Set[str]) -> pd.DataFrame:
@@ -212,7 +212,7 @@ def _reconstruct_test(self, df: pd.DataFrame, columns_to_inverse: Set[str]) -> p
 
             # run reconstruction and save the result
             to_transform = self._make_inv_diff(to_transform)
-            result_df.loc[:, pd.IndexSlice[segments, column]] = to_transform
+            result_df.loc[:, pd.IndexSlice[segments, column]] = to_transform.loc[result_df.index]
 
         return result_df
 

diff --git a/examples/EDA.ipynb b/examples/EDA.ipynb
@@ -167,7 +167,7 @@
    "id": "d0e793eb",
    "metadata": {},
    "source": [
-    "Our library works with the spacial data structure TSDataset. So, before starting the EDA, we need to convert the classical DataFrame to TSDataset."
+    "Our library works with the special data structure TSDataset. So, before starting the EDA, we need to convert the classical DataFrame to TSDataset."
    ]
   },
   {
@@ -1306,7 +1306,7 @@
  ],
  "metadata": {
   "kernelspec": {
-   "display_name": "Python 3.9.6 ('etna-r-uJEZHt-py3.9')",
+   "display_name": "Python 3 (ipykernel)",
    "language": "python",
    "name": "python3"
   },
@@ -1320,7 +1320,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.9.6"
+   "version": "3.8.6"
   },
   "vscode": {
    "interpreter": {

diff --git a/examples/NN_examples.ipynb b/examples/NN_examples.ipynb
@@ -173,7 +173,7 @@
    "id": "470802a9",
    "metadata": {},
    "source": [
-    "Our library works with the spacial data structure TSDataset. Let's create it as it was done in \"Get started\" notebook."
+    "Our library works with the special data structure TSDataset. Let's create it as it was done in \"Get started\" notebook."
    ]
   },
   {
@@ -2537,7 +2537,7 @@
  ],
  "metadata": {
   "kernelspec": {
-   "display_name": "Python 3.9.6 ('etna-LkP65DRT-py3.9')",
+   "display_name": "Python 3 (ipykernel)",
    "language": "python",
    "name": "python3"
   },
@@ -2551,7 +2551,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.9.6"
+   "version": "3.8.6"
   },
   "vscode": {
    "interpreter": {

diff --git a/examples/backtest.ipynb b/examples/backtest.ipynb
@@ -209,7 +209,7 @@
    "id": "f792fcd5",
    "metadata": {},
    "source": [
-    "Our library works with the spacial data structure TSDataset. So, before starting the EDA, we need to convert the classical DataFrame to TSDataset."
+    "Our library works with the special data structure TSDataset. So, before starting the EDA, we need to convert the classical DataFrame to TSDataset."
    ]
   },
   {
@@ -1766,9 +1766,9 @@
  "metadata": {
   "hide_input": false,
   "kernelspec": {
-   "display_name": "ext.ytarasyuk",
+   "display_name": "Python 3 (ipykernel)",
    "language": "python",
-   "name": "ext.ytarasyuk"
+   "name": "python3"
   },
   "language_info": {
    "codemirror_mode": {
@@ -1780,7 +1780,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.8.10"
+   "version": "3.8.6"
   },
   "toc": {
    "base_numbering": 1,

diff --git a/examples/custom_transform_and_model.ipynb b/examples/custom_transform_and_model.ipynb
@@ -81,7 +81,7 @@
     "id": "rx0rHUL2_EGJ"
    },
    "source": [
-    "Our library works with the spacial data structure TSDataset. So, before starting, we need to convert the classical DataFrame to TSDataset."
+    "Our library works with the special data structure TSDataset. So, before starting, we need to convert the classical DataFrame to TSDataset."
    ]
   },
   {