sdpython · sdpython · Jul 22, 2022 · Jul 22, 2022 · Jul 22, 2022 · Jul 22, 2022
diff --git a/_unittests/ut_timeseries/test_preprocessing_timeseries.py b/_unittests/ut_timeseries/test_preprocessing_timeseries.py
@@ -26,6 +26,22 @@ def test_base_parameters_split0(self):
             self.assertEqualArray(nx, ppx)
             self.assertEqualArray(ny, ppy)
 
+    def test_base_parameters_split0_weight(self):
+        X = numpy.arange(20).reshape((10, 2))
+        y = numpy.arange(10) * 100
+        bs = BaseTimeSeries(past=2)
+        nx, ny, _ = build_ts_X_y(bs, X, y)
+        weights = numpy.ones((nx.shape[0], ), dtype=nx.dtype)
+        for d in range(0, 5):
+            proc = TimeSeriesDifference(d)
+            proc.fit(nx, ny, weights)
+            px, py = proc.transform(nx, ny)
+            self.assertEqualArray(px[-1, :], nx[-1, :])
+            rev = proc.get_fct_inv()
+            ppx, ppy = rev.transform(px, py)
+            self.assertEqualArray(nx, ppx)
+            self.assertEqualArray(ny, ppy)
+
 
 if __name__ == "__main__":
     unittest.main()
diff --git a/mlinsights/helpers/parameters.py b/mlinsights/helpers/parameters.py
@@ -12,7 +12,7 @@ def format_value(v):
     @param      v           a string
     @return                 a string
     """
-    return ("'{0}'".format(v.replace("'", "\\'"))
+    return (v.replace("'", "\\'")
             if isinstance(v, str) else f"{v}")
 
 

diff --git a/mlinsights/helpers/pipeline.py b/mlinsights/helpers/pipeline.py
@@ -203,5 +203,6 @@ def decision_function(self, X, *args, **kwargs):
             try:
                 setattr(model, k, MethodType(new_methods[k], model))
             except AttributeError:  # pragma: no cover
-                warnings.warn("Unable to overwrite method '{}' for class "
-                              "{}.".format(k, type(model)))
+                warnings.warn(
+                    f"Unable to overwrite method {k!r} for class "
+                    f"{type(model)!r}.")
diff --git a/mlinsights/mlbatch/pipeline_cache.py b/mlinsights/mlbatch/pipeline_cache.py
@@ -54,12 +54,12 @@ def _get_fit_params_steps(self, fit_params):
             if '__' not in pname:
                 if not isinstance(pval, dict):
                     raise ValueError(  # pragma: no cover
-                        "For scikit-learn < 0.23, "
-                        "Pipeline.fit does not accept the {} parameter. "
-                        "You can pass parameters to specific steps of your "
-                        "pipeline using the stepname__parameter format, e.g. "
-                        "`Pipeline.fit(X, y, logisticregression__sample_weight"
-                        "=sample_weight)`.".format(pname))
+                        f"For scikit-learn < 0.23, "
+                        f"Pipeline.fit does not accept the {pname} parameter. "
+                        f"You can pass parameters to specific steps of your "
+                        f"pipeline using the stepname__parameter format, e.g. "
+                        f"`Pipeline.fit(X, y, logisticregression__sample_weight"
+                        f"=sample_weight)`.")
                 else:
                     fit_params_steps[pname].update(pval)
             else:

diff --git a/mlinsights/mlmodel/_kmeans_022.py b/mlinsights/mlmodel/_kmeans_022.py
@@ -63,8 +63,9 @@ def _assign_labels_csr(X, sample_weight, x_squared_norms, centers,
     if (distances is not None and
             distances.shape != (X.shape[0], )):
         raise ValueError(  # pragma: no cover
-            "Dimension mismatch for distance got {}, expecting {}."
-            "".format(distances.shape, (X.shape[0], centers.shape[0])))
+            f"Dimension mismatch for distance got "
+            f"{distances.shape}, expecting "
+            f"{(X.shape[0], centers.shape[0])}.")
     n_clusters = centers.shape[0]
     n_samples = X.shape[0]
     store_distances = 0

diff --git a/mlinsights/mlmodel/_kmeans_constraint_.py b/mlinsights/mlmodel/_kmeans_constraint_.py
@@ -550,9 +550,9 @@ def _constraint_kmeans_weights(X, labels, sample_weight, centers, inertia, it,
             X, centers, sw, weights, labels, total_inertia)
         if numpy.isnan(inertia):
             raise RuntimeError(  # pragma: no cover
-                "nanNobs={} Nclus={}\ninertia={}\nweights={}\ndiff={}\nlabels={}".format(
-                    X.shape[0], centers.shape[0], inertia, weights, diff,
-                    set(labels)))
+                f"nanNobs={X.shape[0]} Nclus={centers.shape[0]}\n"
+                f"inertia={inertia}\nweights={weights}\ndiff={diff}\n"
+                f"labels={set(labels)}")
 
         # best option so far?
         if best_inertia is None or inertia < best_inertia:

diff --git a/mlinsights/mlmodel/categories_to_integers.py b/mlinsights/mlmodel/categories_to_integers.py
@@ -152,8 +152,8 @@ def transform(v, vec):
                         lv = lv[:20]
                         lv.append("...")
                     raise ValueError(  # pragma: no cover
-                        "Unable to find category value '{0}' type(v)={2} among\n{1}".format(
-                            v, "\n".join(lv), type(v)))
+                        "Unable to find category value %r type(v)=%r "
+                        "among\n%s" % (v, type(v), '\n'.join(lv)))
                 return numpy.nan
 
             sch, pos, new_vector = self._schema
@@ -184,8 +184,9 @@ def transform(v, vec):
                                 lv = lv[:20]
                                 lv.append("...")
                             raise ValueError(  # pragma: no cover
-                                "unable to find category value '{0}': '{1}' type(v)={3} among\n{2}".format(
-                                    k, v, "\n".join(lv), type(v)))
+                                "Unable to find category value %r: %r "
+                                "type(v)=%r among\n%s" % (
+                                    k, v, type(v), '\n'.join(lv)))
                     else:
                         p = pos[k] + vec[k][v]
                     res[i, p] = 1.0

diff --git a/mlinsights/mlmodel/decision_tree_logreg.py b/mlinsights/mlmodel/decision_tree_logreg.py
@@ -189,8 +189,9 @@ def fit_improve(self, dtlr, total_N, X, y, sample_weight):
             # The classifier is not linear and cannot be improved.
             if dtlr.fit_improve_algo == 'intercept_sort_always':  # pragma: no cover
                 raise RuntimeError(
-                    "The model is not linear ({}), "
-                    "intercept cannot be improved.".format(self.estimator.__class__.__name__))
+                    f"The model is not linear "
+                    f"({self.estimator.__class__.__name__!r}), "
+                    f"intercept cannot be improved.")
             return prob
 
         above = prob[:, 1] > self.threshold
@@ -368,8 +369,8 @@ def __init__(self, estimator=None,
 
         if self.fit_improve_algo not in DecisionTreeLogisticRegression._fit_improve_algo_values:
             raise ValueError(
-                "fit_improve_algo='{}' not in {}".format(
-                    self.fit_improve_algo, DecisionTreeLogisticRegression._fit_improve_algo_values))
+                f"fit_improve_algo={self.fit_improve_algo!r} "
+                f"not in {DecisionTreeLogisticRegression._fit_improve_algo_values}.")
 
     def fit(self, X, y, sample_weight=None):
         """
@@ -401,8 +402,8 @@ def fit(self, X, y, sample_weight=None):
         self.classes_ = numpy.array(sorted(set(y)))
         if len(self.classes_) != 2:
             raise RuntimeError(
-                "The model only supports binary classification but labels are "
-                "{}.".format(self.classes_))
+                f"The model only supports binary classification but labels are "
+                f"{self.classes_}.")
 
         if self.strategy == 'parallel':
             return self._fit_parallel(X, y, sample_weight)

diff --git a/mlinsights/mlmodel/kmeans_l1.py b/mlinsights/mlmodel/kmeans_l1.py
@@ -617,9 +617,10 @@ def _fit_l1(self, X, y=None, sample_weight=None):
         distinct_clusters = len(set(best_labels))
         if distinct_clusters < self.n_clusters:
             warnings.warn(  # pragma no cover
-                "Number of distinct clusters ({}) found smaller than "
-                "n_clusters ({}). Possibly due to duplicate points "
-                "in X.".format(distinct_clusters, self.n_clusters),
+                f"Number of distinct clusters ({distinct_clusters}) "
+                f"found smaller than "
+                f"n_clusters ({self.n_clusters}). Possibly "
+                f"due to duplicate points in X.",
                 ConvergenceWarning, stacklevel=2)
 
         self.cluster_centers_ = best_centers

diff --git a/mlinsights/mlmodel/ml_featurizer.py b/mlinsights/mlmodel/ml_featurizer.py
@@ -43,8 +43,8 @@ def model_featurizer(model, **params):
         return model_featurizer_torch(model, **params)
     tried.append("torch")
     raise FeaturizerTypeError(  # pragma no cover
-        "Unable to process type '{0}', allowed:\n{1}".format(
-            type(model), "\n".join(sorted(str(_) for _ in tried))))
+        "Unable to process type %r, allowed:\n%s" % (
+            type(model), '\n'.join(sorted(str(_) for _ in tried))))
 
 
 def is_vector(X):

diff --git a/mlinsights/mlmodel/piecewise_estimator.py b/mlinsights/mlmodel/piecewise_estimator.py
@@ -20,7 +20,7 @@ def _fit_piecewise_estimator(i, model, X, y, sample_weight, association, nb_clas
     ind = association == i
     if not numpy.any(ind):
         # No training example for this bucket.
-        return None
+        return model  # pragma: no cover
     Xi = X[ind, :]
     yi = y[ind]
     sw = sample_weight[ind] if sample_weight is not None else None
@@ -141,7 +141,7 @@ def _mapping_train(self, X, binner):
                 ind = numpy.asarray(ind.todense()).flatten()
                 if not numpy.any(ind):
                     # No training example for this bucket.
-                    continue
+                    continue  # pragma: no cover
                 mapping[j] = ntree
                 association[ind] = ntree
                 ntree += 1
@@ -278,8 +278,8 @@ def _apply_predict_method(self, X, method, parallelized, dimout):
                 "Estimator was apparently fitted but contains no estimator.")
         if not hasattr(self.estimators_[0], method):
             raise TypeError(  # pragma: no cover
-                "Estimator {} does not have method '{}'.".format(
-                    type(self.estimators_[0]), method))
+                f"Estimator {type(self.estimators_[0])} "
+                f"does not have method {method!r}.")
         if isinstance(X, pandas.DataFrame):
             X = X.values
 

diff --git a/mlinsights/mlmodel/piecewise_tree_regression.py b/mlinsights/mlmodel/piecewise_tree_regression.py
@@ -120,8 +120,8 @@ def _fit_reglin(self, X, y, sample_weight):
                               if tree.children_left[i] <= i and tree.children_right[i] <= i]  # pylint: disable=E1136
         if tree.n_leaves != len(self.leaves_index_):
             raise RuntimeError(  # pragma: no cover
-                "Unexpected number of leaves {} != {}".format(
-                    tree.n_leaves, len(self.leaves_index_)))
+                f"Unexpected number of leaves {tree.n_leaves} "
+                f"!= {len(self.leaves_index_)}.")
         pred_leaves = self.predict_leaves(X)
         self.leaves_mapping_ = {k: i for i, k in enumerate(pred_leaves)}
         self.betas_ = numpy.empty((len(self.leaves_index_), X.shape[1] + 1))

diff --git a/mlinsights/mlmodel/predictable_tsne.py b/mlinsights/mlmodel/predictable_tsne.py
@@ -52,8 +52,8 @@ def __init__(self, normalizer=None, transformer=None, estimator=None,
                 f"normalizer {type(normalizer)} does not have a 'transform' method.")
         if not hasattr(transformer, "fit_transform"):
             raise AttributeError(  # pragma: no cover
-                "transformer {} does not have a 'fit_transform' method.".format(
-                    type(transformer)))
+                f"transformer {type(transformer)} does not have a "
+                f"'fit_transform' method.")
         if not hasattr(estimator, "predict"):
             raise AttributeError(  # pragma: no cover
                 f"estimator {type(estimator)} does not have a 'predict' method.")

diff --git a/mlinsights/mlmodel/sklearn_testing.py b/mlinsights/mlmodel/sklearn_testing.py
@@ -217,8 +217,9 @@ def assert_estimator_equal(esta, estb, ext=None):
         if (att.endswith('_') and not att.endswith('__')) or \
                 (att.startswith('_') and not att.startswith('__')):
             if not hasattr(estb, att):  # pragma no cover
-                raise AssertionError("Missing fitted attribute '{}' class {}\n==1 {}\n==2 {}".format(
-                    att, esta.__class__, list(sorted(esta.__dict__)), list(sorted(estb.__dict__))))
+                raise AssertionError(
+                    "Missing fitted attribute '{}' class {}\n==1 {}\n==2 {}".format(
+                        att, esta.__class__, list(sorted(esta.__dict__)), list(sorted(estb.__dict__))))
             if isinstance(getattr(esta, att), BaseEstimator):
                 assert_estimator_equal(
                     getattr(esta, att), getattr(estb, att), ext)
@@ -227,8 +228,9 @@ def assert_estimator_equal(esta, estb, ext=None):
     for att in estb.__dict__:
         if att.endswith('_') and not att.endswith('__'):
             if not hasattr(esta, att):  # pragma no cover
-                raise AssertionError("Missing fitted attribute\n==1 {}\n==2 {}".format(
-                    list(sorted(esta.__dict__)), list(sorted(estb.__dict__))))
+                raise AssertionError(
+                    "Missing fitted attribute\n==1 {}\n==2 {}".format(
+                        list(sorted(esta.__dict__)), list(sorted(estb.__dict__))))
 
 
 def test_sklearn_grid_search_cv(fct_model, X, y=None, sample_weight=None, **grid_params):

diff --git a/mlinsights/mlmodel/sklearn_transform_inv_fct.py b/mlinsights/mlmodel/sklearn_transform_inv_fct.py
@@ -144,9 +144,8 @@ def fit(self, X=None, y=None, sample_weight=None):
     def _check_is_fitted(self):
         if not hasattr(self, 'permutation_'):
             raise NotFittedError(  # pragma: no cover
-                "This instance {} is not fitted yet. Call 'fit' with "
-                "appropriate arguments before using this method.".format(
-                    type(self)))
+                f"This instance {type(self)} is not fitted yet. Call 'fit' with "
+                f"appropriate arguments before using this method.")
 
     def get_fct_inv(self):
         """
@@ -195,8 +194,9 @@ def transform(self, X, y):
                     if self.closest:
                         cl = self._find_closest(yp[i])
                     else:
-                        raise RuntimeError("Unable to find key '{}' in {}.".format(
-                            yp[i], list(sorted(self.permutation_))))
+                        raise RuntimeError(
+                            f"Unable to find key {yp[i]!r} in "
+                            f"{list(sorted(self.permutation_))!r}.")
                 else:
                     cl = yp[i]
                 yp[i] = self.permutation_[cl]

diff --git a/mlinsights/mlmodel/target_predictors.py b/mlinsights/mlmodel/target_predictors.py
@@ -20,8 +20,9 @@ def _common_get_transform(transformer, is_regression):
             return FunctionReciprocalTransformer(transformer)
     elif isinstance(transformer, BaseReciprocalTransformer):
         return clone(transformer)
-    raise TypeError("Transformer {} must be a string or on object of type "
-                    "BaseReciprocalTransformer.".format(type(transformer)))
+    raise TypeError(
+        f"Transformer {type(transformer)} must be a string or "
+        f"on object of type BaseReciprocalTransformer.")
 
 
 class TransformedTargetRegressor2(BaseEstimator, RegressorMixin):
@@ -110,9 +111,8 @@ def predict(self, X):
         """
         if not hasattr(self, 'regressor_'):
             raise NotFittedError(  # pragma: no cover
-                "This instance {} is not fitted yet. Call 'fit' with "
-                "appropriate arguments before using this method.".format(
-                    type(self)))
+                f"This instance {type(self)} is not fitted yet. Call 'fit' with "
+                f"appropriate arguments before using this method.")
         X_trans, _ = self.transformer_.transform(X, None)
         pred = self.regressor_.predict(X_trans)
 
@@ -210,10 +210,9 @@ def fit(self, X, y, sample_weight=None):
 
     def _check_is_fitted(self):
         if not hasattr(self, 'classifier_'):
-            raise NotFittedError(
-                "This instance {} is not fitted yet. Call 'fit' with "
-                "appropriate arguments before using this method.".format(
-                    type(self)))
+            raise NotFittedError(  # pragma: no cover
+                f"This instance {type(self)} is not fitted yet. Call 'fit' with "
+                f"appropriate arguments before using this method.")
 
     @property
     def classes_(self):
@@ -237,8 +236,9 @@ def _apply(self, X, method):
         """
         self._check_is_fitted()
         if not hasattr(self.classifier_, method):
-            raise RuntimeError("Unable to find method '{}' in model {}.".format(
-                method, type(self.classifier_)))
+            raise RuntimeError(  # pragma: no cover
+                f"Unable to find method {method!r} in model "
+                f"{type(self.classifier_)}.")
         meth = getattr(self.classifier_, method)
         X_trans, _ = self.transformer_.transform(X, None)
         pred = meth(X_trans)

diff --git a/mlinsights/mlmodel/transfer_transformer.py b/mlinsights/mlmodel/transfer_transformer.py
@@ -46,8 +46,8 @@ def __init__(self, estimator, method=None, copy_estimator=True,
                 method = "predict"
             else:
                 raise AttributeError(  # pragma: no cover
-                    "Cannot find a method transform, predict_proba, decision_function, "
-                    "predict in object {}".format(type(estimator)))
+                    f"Cannot find a method transform, predict_proba, decision_function, "
+                    f"predict in object {type(estimator)}.")
         if not hasattr(estimator, method):
             raise AttributeError(  # pragma: no cover
                 f"Cannot find method '{method}' in object {type(estimator)}")

diff --git a/mlinsights/plotting/visualize.py b/mlinsights/plotting/visualize.py
@@ -219,8 +219,8 @@ def pipeline2dot(pipe, data, **params):
     elif isinstance(raw_data, numpy.ndarray):
         if len(raw_data.shape) != 2:
             raise NotImplementedError(  # pragma: no cover
-                "Unexpected training data dimension: {}.".format(
-                    data.shape))  # pylint: disable=E1101
+                f"Unexpected training data dimension: {data.shape}."
+                f"")  # pylint: disable=E1101
         for i in range(raw_data.shape[1]):
             data['X%d' % i] = 'sch0:f%d' % i
     elif not isinstance(raw_data, list):

diff --git a/mlinsights/search_rank/search_engine_predictions_images.py b/mlinsights/search_rank/search_engine_predictions_images.py
@@ -52,8 +52,8 @@ def _prepare_fit(self, data=None, features=None, metadata=None,
             from keras_preprocessing.image import DirectoryIterator, NumpyArrayIterator  # pylint: disable=E0401,C0415
             if not isinstance(iter_images, (Iterator, DirectoryIterator, NumpyArrayIterator)):
                 raise NotImplementedError(  # pragma: no cover
-                    "iter_images must be a keras Iterator. No option implemented for type {0}."
-                    "".format(type(iter_images)))
+                    f"iter_images must be a keras Iterator. "
+                    f"No option implemented for type {type(iter_images)}.")
             if iter_images.batch_size != 1:
                 raise ValueError(  # pragma: no cover
                     f"batch_size must be 1 not {iter_images.batch_size}")
@@ -160,5 +160,5 @@ def kneighbors(self, iter_images, n_neighbors=None):  # pylint: disable=W0237
                     numpy.vstack([_[2] for _ in res]))
         else:
             raise TypeError(  # pragma: no cover
-                "Unexpected type {0} in SearchEnginePredictionImages.kneighbors".format(
-                    type(iter_images)))
+                f"Unexpected type {type(iter_images)} in "
+                f"SearchEnginePredictionImages.kneighbors")
diff --git a/mlinsights/search_rank/search_engine_vectors.py b/mlinsights/search_rank/search_engine_vectors.py
@@ -105,8 +105,8 @@ def transform(vec, many):
                             f"feature should be of type numpy.array not {type(tradd)}")
                     else:
                         raise TypeError(  # pragma: no cover
-                            "output of method transform ({}) should be of type numpy.array not {}".format(
-                                transform, type(tradd)))
+                            f"output of method transform {transform!r} should be of "
+                            f"type numpy.array not {type(tradd)}.")
                 arrays.append(tradd)
             self.features_ = numpy.vstack(arrays)
             self.metadata_ = pandas.DataFrame(metas)
@@ -225,7 +225,7 @@ def to_zip(self, zipfilename, **kwargs):
         if isinstance(zipfilename, str):
             zf = zipfile.ZipFile(zipfilename, 'w')
             close = True
-        else:
+        else:  # pragma: no cover
             zf = zipfilename
             close = False
         if 'index' not in kwargs:
@@ -252,7 +252,7 @@ def read_zip(zipfilename, **kwargs):
         if isinstance(zipfilename, str):
             zf = zipfile.ZipFile(zipfilename, 'r')
             close = True
-        else:
+        else:  # pragma: no cover
             zf = zipfilename
             close = False
         feat = read_zip(zf, 'SearchEngineVectors-features.npy')