diff --git a/doc/user_manual/generated/generateRomDoc.py b/doc/user_manual/generated/generateRomDoc.py
index 3108709386..57ffd63543 100644
--- a/doc/user_manual/generated/generateRomDoc.py
+++ b/doc/user_manual/generated/generateRomDoc.py
@@ -753,7 +753,8 @@
'KerasMLPRegression',
'KerasConvNetClassifier',
'KerasLSTMClassifier',
- 'KerasLSTMRegression']
+ 'KerasLSTMRegression'
+ ]
validInternalRom = ['NDspline',
'pickledROM',
'GaussPolynomialRom',
diff --git a/doc/user_manual/generated/sklRom.tex b/doc/user_manual/generated/sklRom.tex
index 0474f5cc98..609b0a4eec 100644
--- a/doc/user_manual/generated/sklRom.tex
+++ b/doc/user_manual/generated/sklRom.tex
@@ -363,21 +363,10 @@ \subsubsection{BayesianRidge}
Hyper-parameter : inverse scale parameter (rate parameter) for
the Gamma distribution prior over the lambda parameter.
- \item \xmlNode{alpha\_init}: \xmlDesc{float},
- Initial value for alpha (precision of the noise).
- If not set, alpha\_init is $1/Var(y)$.
-
- \item \xmlNode{lambda\_init}: \xmlDesc{float},
- Initial value for lambda (precision of the weights).
-
\item \xmlNode{compute\_score}: \xmlDesc{[True, Yes, 1, False, No, 0, t, y, 1, f, n, 0]},
If True, compute the objective function at each step of the
model.
- \item \xmlNode{threshold\_lambda}: \xmlDesc{float},
- threshold for removing (pruning) weights with
- shigh precision from the computation..
-
\item \xmlNode{fit\_intercept}: \xmlDesc{[True, Yes, 1, False, No, 0, t, y, 1, f, n, 0]},
Whether to calculate the intercept for this model. Specifies if a constant (a.k.a. bias or
intercept) should be added to the decision
@@ -685,11 +674,6 @@ \subsubsection{Lars}
\item \xmlNode{n\_nonzero\_coefs}: \xmlDesc{integer},
Target number of non-zero coefficients.
- \item \xmlNode{jitter}: \xmlDesc{float},
- Upper bound on a uniform noise parameter to be added to the
- y values, to satisfy the model’s assumption of one-at-a-time computations.
- Might help with stability.
-
\item \xmlNode{verbose}: \xmlDesc{[True, Yes, 1, False, No, 0, t, y, 1, f, n, 0]},
Sets the verbosity amount.
@@ -1088,11 +1072,6 @@ \subsubsection{LassoLars}
\item \xmlNode{positive}: \xmlDesc{[True, Yes, 1, False, No, 0, t, y, 1, f, n, 0]},
When set to True, forces the coefficients to be positive.
- \item \xmlNode{jitter}: \xmlDesc{float},
- Upper bound on a uniform noise parameter to be added to the y values,
- to satisfy the model’s assumption of one-at-a-time computations. Might help
- with stability.
-
\item \xmlNode{verbose}: \xmlDesc{[True, Yes, 1, False, No, 0, t, y, 1, f, n, 0]},
Amount of verbosity.
\end{itemize}
@@ -1352,9 +1331,6 @@ \subsubsection{LinearRegression}
This parameter is ignored when fit\_intercept is set to False. If True,
the regressors X will be normalized before regression by subtracting the mean and
dividing by the l2-norm.
-
- \item \xmlNode{positive}: \xmlDesc{[True, Yes, 1, False, No, 0, t, y, 1, f, n, 0]},
- When set to True, forces the coefficients to be positive.
\end{itemize}
@@ -2321,11 +2297,6 @@ \subsubsection{Perceptron}
\item \xmlNode{alpha}: \xmlDesc{float},
Constant that multiplies the regularization term if regularization is used.
- \item \xmlNode{l1\_ratio}: \xmlDesc{float},
- The Elastic Net mixing parameter, with $0 <= l1\_ratio <= 1.$ $l1\_ratio=0$ corresponds to L2
- penalty, $l1\_ratio=1$ to L1. Only used if
- penalty='elasticnet'.
-
\item \xmlNode{fit\_intercept}: \xmlDesc{[True, Yes, 1, False, No, 0, t, y, 1, f, n, 0]},
Whether the intercept should be estimated or not. If False,
the data is assumed to be already centered.
@@ -2562,7 +2533,7 @@ \subsubsection{RidgeCV}
Determines the cross-validation splitting strategy.
It specifies the number of folds..
- \item \xmlNode{alphas}: \xmlDesc{comma-separated floats},
+ \item \xmlNode{alphas}: \xmlDesc{tuple of comma-separated float},
Array of alpha values to try. Regularization strength; must be a positive float.
Regularization improves the conditioning of
the problem and reduces the variance of the estimates.
@@ -3191,6 +3162,76 @@ \subsubsection{ComplementNB}
\end{itemize}
+\subsubsection{CategoricalNB}
+ The \\textit{CategoricalNB} classifier (Naive Bayes classifier for categorical features)
+ is suitable for classification with discrete features that are categorically distributed.
+ The categories of each feature are drawn from a categorical distribution.
+ \zNormalizationPerformed{CategoricalNB}
+
+ The \xmlNode{CategoricalNB} node recognizes the following parameters:
+ \begin{itemize}
+ \item \xmlAttr{name}: \xmlDesc{string, required},
+ User-defined name to designate this entity in the RAVEN input file.
+ \item \xmlAttr{verbosity}: \xmlDesc{[silent, quiet, all, debug], optional},
+ Desired verbosity of messages coming from this entity
+ \item \xmlAttr{subType}: \xmlDesc{string, required},
+ specify the type of ROM that will be used
+ \end{itemize}
+
+ The \xmlNode{CategoricalNB} node recognizes the following subnodes:
+ \begin{itemize}
+ \item \xmlNode{Features}: \xmlDesc{comma-separated strings},
+ specifies the names of the features of this ROM. \nb These parameters are going to be
+ requested for the training of this object (see Section~\ref{subsec:stepRomTrainer})
+
+ \item \xmlNode{Target}: \xmlDesc{comma-separated strings},
+ contains a comma separated list of the targets of this ROM. These parameters are the
+ Figures of Merit (FOMs) this ROM is supposed to predict. \nb These parameters are
+ going to be requested for the training of this object (see Section
+ \ref{subsec:stepRomTrainer}).
+
+ \item \xmlNode{pivotParameter}: \xmlDesc{string},
+ If a time-dependent ROM is requested, please specifies the pivot variable (e.g. time,
+ etc) used in the input HistorySet.
+
+ \item \xmlNode{CV}: \xmlDesc{string},
+ The text portion of this node needs to contain the name of the \xmlNode{PostProcessor} with
+ \xmlAttr{subType} ``CrossValidation``.
+ The \xmlNode{CV} node recognizes the following parameters:
+ \begin{itemize}
+ \item \xmlAttr{class}: \xmlDesc{string, optional},
+ should be set to \xmlString{Model}
+ \item \xmlAttr{type}: \xmlDesc{string, optional},
+ should be set to \xmlString{PostProcessor}
+ \end{itemize}
+
+ \item \xmlNode{alias}: \xmlDesc{string},
+ specifies alias for any variable of interest in the input or output space. These
+ aliases can be used anywhere in the RAVEN input to refer to the variables. In the body
+ of this node the user specifies the name of the variable that the model is going to use
+ (during its execution).
+ The \xmlNode{alias} node recognizes the following parameters:
+ \begin{itemize}
+ \item \xmlAttr{variable}: \xmlDesc{string, required},
+ define the actual alias, usable throughout the RAVEN input
+ \item \xmlAttr{type}: \xmlDesc{[input, output], required},
+ either ``input'' or ``output''.
+ \end{itemize}
+
+ \item \xmlNode{alpha}: \xmlDesc{float},
+ Additive (Laplace and Lidstone) smoothing parameter (0 for no smoothing).
+
+ \item \xmlNode{class\_prior}: \xmlDesc{comma-separated floats},
+ Prior probabilities of the classes. If specified the priors are
+ not adjusted according to the data. \nb the number of elements inputted here must
+ match the number of classes in the data set used in the training stage.
+
+ \item \xmlNode{fit\_prior}: \xmlDesc{[True, Yes, 1, False, No, 0, t, y, 1, f, n, 0]},
+ Whether to learn class prior probabilities or not. If false, a uniform
+ prior will be used.
+ \end{itemize}
+
+
\subsubsection{BernoulliNB}
The \textit{BernoulliNB} classifier implements the naive Bayes training and
classification algorithms for data that is distributed according to multivariate
@@ -5141,12 +5182,6 @@ \subsubsection{NuSVC}
\item \xmlNode{cache\_size}: \xmlDesc{float},
Size of the kernel cache (in MB)
- \item \xmlNode{epsilon}: \xmlDesc{float},
- Epsilon in the epsilon-SVR model. It specifies the epsilon-tube
- within which no penalty is associated in the training loss function
- with points predicted within a distance epsilon from the actual
- value.
-
\item \xmlNode{shrinking}: \xmlDesc{[True, Yes, 1, False, No, 0, t, y, 1, f, n, 0]},
Whether to use the shrinking heuristic.
@@ -5161,13 +5196,6 @@ \subsubsection{NuSVC}
used as multi-class strategy. The
parameter is ignored for binary classification.
- \item \xmlNode{break\_ties}: \xmlDesc{[True, Yes, 1, False, No, 0, t, y, 1, f, n, 0]},
- if true, decision\_function\_shape='ovr', and number of $classes > 2$, predict will
- break ties according to the confidence values of decision\_function; otherwise the first class
- among the tied classes is returned. Please
- note that breaking ties comes at a relatively high computational
- cost compared to a simple predict.
-
\item \xmlNode{verbose}: \xmlDesc{[True, Yes, 1, False, No, 0, t, y, 1, f, n, 0]},
Enable verbose output. Note that this setting takes advantage
of a per-process runtime setting in libsvm that, if enabled, may not
@@ -5365,12 +5393,6 @@ \subsubsection{SVC}
\item \xmlNode{cache\_size}: \xmlDesc{float},
Size of the kernel cache (in MB)
- \item \xmlNode{epsilon}: \xmlDesc{float},
- Epsilon in the epsilon-SVR model. It specifies the epsilon-tube
- within which no penalty is associated in the training loss function
- with points predicted within a distance epsilon from the actual
- value.
-
\item \xmlNode{shrinking}: \xmlDesc{[True, Yes, 1, False, No, 0, t, y, 1, f, n, 0]},
Whether to use the shrinking heuristic.
@@ -5385,13 +5407,6 @@ \subsubsection{SVC}
used as multi-class strategy. The
parameter is ignored for binary classification.
- \item \xmlNode{break\_ties}: \xmlDesc{[True, Yes, 1, False, No, 0, t, y, 1, f, n, 0]},
- if true, decision\_function\_shape='ovr', and number of $classes > 2$, predict will
- break ties according to the confidence values of decision\_function; otherwise the first class
- among the tied classes is returned. Please
- note that breaking ties comes at a relatively high computational
- cost compared to a simple predict.
-
\item \xmlNode{verbose}: \xmlDesc{[True, Yes, 1, False, No, 0, t, y, 1, f, n, 0]},
Enable verbose output. Note that this setting takes advantage
of a per-process runtime setting in libsvm that, if enabled, may not
@@ -5620,11 +5635,6 @@ \subsubsection{DecisionTreeClassifier}
$N$, $N\_t$, $N\_t]\_R$ and $N\_t\_L$ all refer to the weighted sum, if sample\_weight is
passed.
- \item \xmlNode{ccp\_alpha}: \xmlDesc{float},
- Complexity parameter used for Minimal Cost-Complexity Pruning. The subtree with the largest
- cost complexity that is smaller than
- ccp\_alpha will be chosen. By default, no pruning is performed.
-
\item \xmlNode{random\_state}: \xmlDesc{integer},
Controls the randomness of the estimator. The features are
always randomly permuted at each split, even if splitter is set to
@@ -5751,11 +5761,6 @@ \subsubsection{DecisionTreeRegressor}
$N$, $N\_t$, $N\_t]\_R$ and $N\_t\_L$ all refer to the weighted sum, if sample\_weight is
passed.
- \item \xmlNode{ccp\_alpha}: \xmlDesc{float},
- Complexity parameter used for Minimal Cost-Complexity Pruning. The subtree with the largest
- cost complexity that is smaller than
- ccp\_alpha will be chosen. By default, no pruning is performed.
-
\item \xmlNode{random\_state}: \xmlDesc{integer},
Controls the randomness of the estimator. The features are
always randomly permuted at each split, even if splitter is set to
@@ -5881,11 +5886,6 @@ \subsubsection{ExtraTreeClassifier}
$N$, $N\_t$, $N\_t]\_R$ and $N\_t\_L$ all refer to the weighted sum, if sample\_weight is
passed.
- \item \xmlNode{ccp\_alpha}: \xmlDesc{float},
- Complexity parameter used for Minimal Cost-Complexity Pruning. The subtree with the largest
- cost complexity that is smaller than
- ccp\_alpha will be chosen. By default, no pruning is performed.
-
\item \xmlNode{random\_state}: \xmlDesc{integer},
Used to pick randomly the max\_features used at each split.
\end{itemize}
@@ -6014,3 +6014,259 @@ \subsubsection{ExtraTreeRegressor}
\item \xmlNode{random\_state}: \xmlDesc{integer},
Used to pick randomly the max\_features used at each split.
\end{itemize}
+
+
+\subsubsection{VotingRegressor}
+ The \xmlNode{VotingRegressor} is an ensemble meta-estimator that fits several base
+ regressors, each on the whole dataset. Then it averages the individual predictions to form
+ a final prediction.
+
+ The \xmlNode{VotingRegressor} node recognizes the following parameters:
+ \begin{itemize}
+ \item \xmlAttr{name}: \xmlDesc{string, required},
+ User-defined name to designate this entity in the RAVEN input file.
+ \item \xmlAttr{verbosity}: \xmlDesc{[silent, quiet, all, debug], optional},
+ Desired verbosity of messages coming from this entity
+ \item \xmlAttr{subType}: \xmlDesc{string, required},
+ specify the type of ROM that will be used
+ \end{itemize}
+
+ The \xmlNode{VotingRegressor} node recognizes the following subnodes:
+ \begin{itemize}
+ \item \xmlNode{Features}: \xmlDesc{comma-separated strings},
+ specifies the names of the features of this ROM. \nb These parameters are going to be
+ requested for the training of this object (see Section~\ref{subsec:stepRomTrainer})
+
+ \item \xmlNode{Target}: \xmlDesc{comma-separated strings},
+ contains a comma separated list of the targets of this ROM. These parameters are the
+ Figures of Merit (FOMs) this ROM is supposed to predict. \nb These parameters are
+ going to be requested for the training of this object (see Section
+ \ref{subsec:stepRomTrainer}).
+
+ \item \xmlNode{pivotParameter}: \xmlDesc{string},
+ If a time-dependent ROM is requested, please specifies the pivot variable (e.g. time,
+ etc) used in the input HistorySet.
+
+ \item \xmlNode{CV}: \xmlDesc{string},
+ The text portion of this node needs to contain the name of the \xmlNode{PostProcessor} with
+ \xmlAttr{subType} ``CrossValidation``.
+ The \xmlNode{CV} node recognizes the following parameters:
+ \begin{itemize}
+ \item \xmlAttr{class}: \xmlDesc{string, optional},
+ should be set to \xmlString{Model}
+ \item \xmlAttr{type}: \xmlDesc{string, optional},
+ should be set to \xmlString{PostProcessor}
+ \end{itemize}
+
+ \item \xmlNode{alias}: \xmlDesc{string},
+ specifies alias for any variable of interest in the input or output space. These
+ aliases can be used anywhere in the RAVEN input to refer to the variables. In the body
+ of this node the user specifies the name of the variable that the model is going to use
+ (during its execution).
+ The \xmlNode{alias} node recognizes the following parameters:
+ \begin{itemize}
+ \item \xmlAttr{variable}: \xmlDesc{string, required},
+ define the actual alias, usable throughout the RAVEN input
+ \item \xmlAttr{type}: \xmlDesc{[input, output], required},
+ either ``input'' or ``output''.
+ \end{itemize}
+
+ \item \xmlNode{estimator}: \xmlDesc{string},
+ name of a ROM that can be used as an estimator
+ The \xmlNode{estimator} node recognizes the following parameters:
+ \begin{itemize}
+ \item \xmlAttr{class}: \xmlDesc{string, required},
+ RAVEN class for this entity (e.g. Samplers, Models, DataObjects)
+ \item \xmlAttr{type}: \xmlDesc{string, required},
+ RAVEN type for this entity; a subtype of the class (e.g. MonteCarlo, Code, PointSet)
+ \end{itemize}
+
+ \item \xmlNode{weights}: \xmlDesc{comma-separated floats},
+ Sequence of weights (float or int) to weight the occurrences of predicted
+ values before averaging. Uses uniform weights if None.
+ \end{itemize}
+
+
+\subsubsection{BaggingRegressor}
+ The \xmlNode{BaggingRegressor} is an ensemble meta-estimator that fits base regressors each on
+ random subsets of the original dataset and then aggregate their
+ individual predictions (either by voting or by averaging) to form a final
+ prediction. Such a meta-estimator can typically be used as a way to reduce the variance of a
+ black-box estimator (e.g., a decision tree), by introducing
+ randomization into its construction procedure and then making an ensemble
+ out of it.
+
+ The \xmlNode{BaggingRegressor} node recognizes the following parameters:
+ \begin{itemize}
+ \item \xmlAttr{name}: \xmlDesc{string, required},
+ User-defined name to designate this entity in the RAVEN input file.
+ \item \xmlAttr{verbosity}: \xmlDesc{[silent, quiet, all, debug], optional},
+ Desired verbosity of messages coming from this entity
+ \item \xmlAttr{subType}: \xmlDesc{string, required},
+ specify the type of ROM that will be used
+ \end{itemize}
+
+ The \xmlNode{BaggingRegressor} node recognizes the following subnodes:
+ \begin{itemize}
+ \item \xmlNode{Features}: \xmlDesc{comma-separated strings},
+ specifies the names of the features of this ROM. \nb These parameters are going to be
+ requested for the training of this object (see Section~\ref{subsec:stepRomTrainer})
+
+ \item \xmlNode{Target}: \xmlDesc{comma-separated strings},
+ contains a comma separated list of the targets of this ROM. These parameters are the
+ Figures of Merit (FOMs) this ROM is supposed to predict. \nb These parameters are
+ going to be requested for the training of this object (see Section
+ \ref{subsec:stepRomTrainer}).
+
+ \item \xmlNode{pivotParameter}: \xmlDesc{string},
+ If a time-dependent ROM is requested, please specifies the pivot variable (e.g. time,
+ etc) used in the input HistorySet.
+
+ \item \xmlNode{CV}: \xmlDesc{string},
+ The text portion of this node needs to contain the name of the \xmlNode{PostProcessor} with
+ \xmlAttr{subType} ``CrossValidation``.
+ The \xmlNode{CV} node recognizes the following parameters:
+ \begin{itemize}
+ \item \xmlAttr{class}: \xmlDesc{string, optional},
+ should be set to \xmlString{Model}
+ \item \xmlAttr{type}: \xmlDesc{string, optional},
+ should be set to \xmlString{PostProcessor}
+ \end{itemize}
+
+ \item \xmlNode{alias}: \xmlDesc{string},
+ specifies alias for any variable of interest in the input or output space. These
+ aliases can be used anywhere in the RAVEN input to refer to the variables. In the body
+ of this node the user specifies the name of the variable that the model is going to use
+ (during its execution).
+ The \xmlNode{alias} node recognizes the following parameters:
+ \begin{itemize}
+ \item \xmlAttr{variable}: \xmlDesc{string, required},
+ define the actual alias, usable throughout the RAVEN input
+ \item \xmlAttr{type}: \xmlDesc{[input, output], required},
+ either ``input'' or ``output''.
+ \end{itemize}
+
+ \item \xmlNode{estimator}: \xmlDesc{string},
+ name of a ROM that can be used as an estimator
+ The \xmlNode{estimator} node recognizes the following parameters:
+ \begin{itemize}
+ \item \xmlAttr{class}: \xmlDesc{string, required},
+ RAVEN class for this entity (e.g. Samplers, Models, DataObjects)
+ \item \xmlAttr{type}: \xmlDesc{string, required},
+ RAVEN type for this entity; a subtype of the class (e.g. MonteCarlo, Code, PointSet)
+ \end{itemize}
+
+ \item \xmlNode{n\_estimators}: \xmlDesc{integer},
+ The number of base estimators in the ensemble.
+
+ \item \xmlNode{max\_samples}: \xmlDesc{float},
+ The number of samples to draw from X to train each base estimator
+
+ \item \xmlNode{max\_features}: \xmlDesc{float},
+ The number of features to draw from X to train each base estimator
+
+ \item \xmlNode{bootstrap}: \xmlDesc{[True, Yes, 1, False, No, 0, t, y, 1, f, n, 0]},
+ Whether samples are drawn with replacement. If False, sampling without
+ replacement is performed.
+
+ \item \xmlNode{bootstrap\_features}: \xmlDesc{[True, Yes, 1, False, No, 0, t, y, 1, f, n, 0]},
+ Whether features are drawn with replacement.
+
+ \item \xmlNode{oob\_score}: \xmlDesc{[True, Yes, 1, False, No, 0, t, y, 1, f, n, 0]},
+ Whether to use out-of-bag samples to estimate the generalization error.
+ Only available if bootstrap=True.
+
+ \item \xmlNode{warm\_start}: \xmlDesc{[True, Yes, 1, False, No, 0, t, y, 1, f, n, 0]},
+ When set to True, reuse the solution of the previous call to fit and add more
+ estimators to the ensemble, otherwise, just fit a whole new ensemble.
+
+ \item \xmlNode{random\_state}: \xmlDesc{integer},
+ Controls the random resampling of the original dataset (sample wise and feature wise).
+ \end{itemize}
+
+
+\subsubsection{AdaBoostRegressor}
+ The \xmlNode{AdaBoostRegressor} is a meta-estimator that begins by fitting a regressor on
+ the original dataset and then fits additional copies of the regressor on the same dataset
+ but where the weights of instances are adjusted according to the error of the current
+ prediction. As such, subsequent regressors focus more on difficult cases.
+
+ The \xmlNode{AdaBoostRegressor} node recognizes the following parameters:
+ \begin{itemize}
+ \item \xmlAttr{name}: \xmlDesc{string, required},
+ User-defined name to designate this entity in the RAVEN input file.
+ \item \xmlAttr{verbosity}: \xmlDesc{[silent, quiet, all, debug], optional},
+ Desired verbosity of messages coming from this entity
+ \item \xmlAttr{subType}: \xmlDesc{string, required},
+ specify the type of ROM that will be used
+ \end{itemize}
+
+ The \xmlNode{AdaBoostRegressor} node recognizes the following subnodes:
+ \begin{itemize}
+ \item \xmlNode{Features}: \xmlDesc{comma-separated strings},
+ specifies the names of the features of this ROM. \nb These parameters are going to be
+ requested for the training of this object (see Section~\ref{subsec:stepRomTrainer})
+
+ \item \xmlNode{Target}: \xmlDesc{comma-separated strings},
+ contains a comma separated list of the targets of this ROM. These parameters are the
+ Figures of Merit (FOMs) this ROM is supposed to predict. \nb These parameters are
+ going to be requested for the training of this object (see Section
+ \ref{subsec:stepRomTrainer}).
+
+ \item \xmlNode{pivotParameter}: \xmlDesc{string},
+ If a time-dependent ROM is requested, please specifies the pivot variable (e.g. time,
+ etc) used in the input HistorySet.
+
+ \item \xmlNode{CV}: \xmlDesc{string},
+ The text portion of this node needs to contain the name of the \xmlNode{PostProcessor} with
+ \xmlAttr{subType} ``CrossValidation``.
+ The \xmlNode{CV} node recognizes the following parameters:
+ \begin{itemize}
+ \item \xmlAttr{class}: \xmlDesc{string, optional},
+ should be set to \xmlString{Model}
+ \item \xmlAttr{type}: \xmlDesc{string, optional},
+ should be set to \xmlString{PostProcessor}
+ \end{itemize}
+
+ \item \xmlNode{alias}: \xmlDesc{string},
+ specifies alias for any variable of interest in the input or output space. These
+ aliases can be used anywhere in the RAVEN input to refer to the variables. In the body
+ of this node the user specifies the name of the variable that the model is going to use
+ (during its execution).
+ The \xmlNode{alias} node recognizes the following parameters:
+ \begin{itemize}
+ \item \xmlAttr{variable}: \xmlDesc{string, required},
+ define the actual alias, usable throughout the RAVEN input
+ \item \xmlAttr{type}: \xmlDesc{[input, output], required},
+ either ``input'' or ``output''.
+ \end{itemize}
+
+ \item \xmlNode{estimator}: \xmlDesc{string},
+ name of a ROM that can be used as an estimator
+ The \xmlNode{estimator} node recognizes the following parameters:
+ \begin{itemize}
+ \item \xmlAttr{class}: \xmlDesc{string, required},
+ RAVEN class for this entity (e.g. Samplers, Models, DataObjects)
+ \item \xmlAttr{type}: \xmlDesc{string, required},
+ RAVEN type for this entity; a subtype of the class (e.g. MonteCarlo, Code, PointSet)
+ \end{itemize}
+
+ \item \xmlNode{n\_estimators}: \xmlDesc{integer},
+ The maximum number of estimators at which boosting is
+ terminated. In case of perfect fit, the learning procedure is
+ stopped early.
+
+ \item \xmlNode{learning\_rate}: \xmlDesc{float},
+ Weight applied to each regressor at each boosting iteration.
+ A higher learning rate increases the contribution of each regressor.
+ There is a trade-off between the learning\_rate and n\_estimators
+ parameters.
+
+ \item \xmlNode{loss}: \xmlDesc{[linear, square, exponential]},
+ The loss function to use when updating the weights after each
+ boosting iteration.
+
+ \item \xmlNode{random\_state}: \xmlDesc{integer},
+ Controls the random seed given at each estimator at each
+ boosting iteration.
+ \end{itemize}
diff --git a/framework/Models/PostProcessors/LimitSurface.py b/framework/Models/PostProcessors/LimitSurface.py
index b4384c5f3e..2e80337767 100644
--- a/framework/Models/PostProcessors/LimitSurface.py
+++ b/framework/Models/PostProcessors/LimitSurface.py
@@ -131,7 +131,6 @@ def _initializeLSpp(self, runInfo, inputs, initDict):
self.ROM.initializeModel(settings)
else:
self.ROM = self.assemblerDict['ROM'][0][3]
- self.ROM.reset()
self.indexes = -1
for index, inp in enumerate(self.inputs):
if mathUtils.isAString(inp) or isinstance(inp, bytes):
diff --git a/framework/Models/ROM.py b/framework/Models/ROM.py
index b2a0230985..82a8cb981a 100644
--- a/framework/Models/ROM.py
+++ b/framework/Models/ROM.py
@@ -112,8 +112,8 @@ def __init__(self):
self.printTag = 'ROM MODEL' # label
self.cvInstanceName = None # the name of Cross Validation instance
self.cvInstance = None # Instance of provided cross validation
- self._estimatorName = None # the name of estimator instance
- self._estimator = None # Instance of provided estimator (ROM)
+ self._estimatorNameList = [] # the name list of estimator instance
+ self._estimatorList = [] # List of instances of provided estimators (ROM)
self._interfaceROM = None # Instance of provided ROM
self.pickled = False # True if ROM comes from a pickled rom
@@ -133,7 +133,7 @@ def __init__(self):
self.addAssemblerObject('Classifier', InputData.Quantity.zero_to_one)
self.addAssemblerObject('Metric', InputData.Quantity.zero_to_infinity)
self.addAssemblerObject('CV', InputData.Quantity.zero_to_one)
- self.addAssemblerObject('estimator', InputData.Quantity.zero_to_one)
+ self.addAssemblerObject('estimator', InputData.Quantity.zero_to_infinity)
def __getstate__(self):
"""
@@ -187,8 +187,8 @@ def _readMoreXML(self,xmlNode):
cvNode = paramInput.findFirst('CV')
if cvNode is not None:
self.cvInstanceName = cvNode.value
- estimatorNode = paramInput.findFirst('estimator')
- self._estimatorName = estimatorNode.value if estimatorNode is not None else None
+ estimatorNodeList = paramInput.findAll('estimator')
+ self._estimatorNameList = [estimatorNode.value for estimatorNode in estimatorNodeList] if len(estimatorNodeList) > 0 else []
self._interfaceROM = self.interfaceFactory.returnInstance(self.subType)
segmentNode = paramInput.findFirst('Segment')
@@ -235,9 +235,9 @@ def initialize(self,runInfo,inputs,initDict=None):
self.cvInstance.initialize(runInfo, inputs, initDict)
# only initialize once
- if self._estimator is None and self._estimatorName is not None:
- self._estimator = self.retrieveObjectFromAssemblerDict('estimator', self._estimatorName)
- self._interfaceROM.setEstimator(self._estimator)
+ if len(self._estimatorList) == 0 and len(self._estimatorNameList) > 0:
+ self._estimatorList = [self.retrieveObjectFromAssemblerDict('estimator', estimatorName) for estimatorName in self._estimatorNameList]
+ self._interfaceROM.setEstimator(self._estimatorList)
def reset(self):
"""
diff --git a/framework/SupervisedLearning/Factory.py b/framework/SupervisedLearning/Factory.py
index 5897ee866f..397c71d0bf 100644
--- a/framework/SupervisedLearning/Factory.py
+++ b/framework/SupervisedLearning/Factory.py
@@ -109,6 +109,12 @@
from .ScikitLearn.Tree.DecisionTreeRegressor import DecisionTreeRegressor
from .ScikitLearn.Tree.ExtraTreeClassifier import ExtraTreeClassifier
from .ScikitLearn.Tree.ExtraTreeRegressor import ExtraTreeRegressor
+# Ensemble ROM for Regression
+from .ScikitLearn.Ensemble.VotingRegressor import VotingRegressor
+from .ScikitLearn.Ensemble.BaggingRegressor import BaggingRegressor
+from .ScikitLearn.Ensemble.AdaBoostRegressor import AdaBoostRegressor
+# require sklearn version 0.24 at least
+from .ScikitLearn.Ensemble.StackingRegressor import StackingRegressor
################################################################################
factory = EntityFactory('SupervisedLearning')
diff --git a/framework/SupervisedLearning/ScikitLearn/DiscriminantAnalysis/LinearDiscriminantAnalysis.py b/framework/SupervisedLearning/ScikitLearn/DiscriminantAnalysis/LinearDiscriminantAnalysis.py
index fa1523800c..1175401fce 100644
--- a/framework/SupervisedLearning/ScikitLearn/DiscriminantAnalysis/LinearDiscriminantAnalysis.py
+++ b/framework/SupervisedLearning/ScikitLearn/DiscriminantAnalysis/LinearDiscriminantAnalysis.py
@@ -46,9 +46,7 @@ def __init__(self):
super().__init__()
import sklearn
import sklearn.discriminant_analysis
- import sklearn.multioutput
- # we wrap the model with the multi output classifier (for multitarget)
- self.model = sklearn.multioutput.MultiOutputClassifier(sklearn.discriminant_analysis.LinearDiscriminantAnalysis())
+ self.model = sklearn.discriminant_analysis.LinearDiscriminantAnalysis
@classmethod
def getInputSpecification(cls):
diff --git a/framework/SupervisedLearning/ScikitLearn/DiscriminantAnalysis/QuadraticDiscriminantAnalysis.py b/framework/SupervisedLearning/ScikitLearn/DiscriminantAnalysis/QuadraticDiscriminantAnalysis.py
index bd993768cd..4c20500210 100644
--- a/framework/SupervisedLearning/ScikitLearn/DiscriminantAnalysis/QuadraticDiscriminantAnalysis.py
+++ b/framework/SupervisedLearning/ScikitLearn/DiscriminantAnalysis/QuadraticDiscriminantAnalysis.py
@@ -47,9 +47,7 @@ def __init__(self):
super().__init__()
import sklearn
import sklearn.discriminant_analysis
- import sklearn.multioutput
- # we wrap the model with the multi output classifier (for multitarget)
- self.model = sklearn.multioutput.MultiOutputClassifier(sklearn.discriminant_analysis.QuadraticDiscriminantAnalysis())
+ self.model = sklearn.discriminant_analysis.QuadraticDiscriminantAnalysis
@classmethod
def getInputSpecification(cls):
diff --git a/framework/SupervisedLearning/ScikitLearn/Ensemble/AdaBoostRegressor.py b/framework/SupervisedLearning/ScikitLearn/Ensemble/AdaBoostRegressor.py
new file mode 100644
index 0000000000..21cd1a8146
--- /dev/null
+++ b/framework/SupervisedLearning/ScikitLearn/Ensemble/AdaBoostRegressor.py
@@ -0,0 +1,121 @@
+# Copyright 2017 Battelle Energy Alliance, LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""
+ Created on Nov. 16, 2021
+
+ @author: wangc
+ AdaBoostRegressor
+ An AdaBoost regressors
+"""
+#Internal Modules (Lazy Importer)--------------------------------------------------------------------
+#Internal Modules (Lazy Importer) End----------------------------------------------------------------
+
+#External Modules------------------------------------------------------------------------------------
+#External Modules End--------------------------------------------------------------------------------
+
+#Internal Modules------------------------------------------------------------------------------------
+from SupervisedLearning.ScikitLearn import ScikitLearnBase
+from utils import InputData, InputTypes
+#Internal Modules End--------------------------------------------------------------------------------
+
+class AdaBoostRegressor(ScikitLearnBase):
+ """
+ An AdaBoost regressors
+ """
+ info = {'problemtype':'regression', 'normalize':False}
+
+ def __init__(self):
+ """
+ Constructor that will appropriately initialize a supervised learning object
+ @ In, None
+ @ Out, None
+ """
+ super().__init__()
+ self.multioutputWrapper = True
+ import sklearn
+ import sklearn.ensemble
+ self.model = sklearn.ensemble.AdaBoostRegressor
+
+ @classmethod
+ def getInputSpecification(cls):
+ """
+ Method to get a reference to a class that specifies the input data for
+ class cls.
+ @ In, cls, the class for which we are retrieving the specification
+ @ Out, inputSpecification, InputData.ParameterInput, class to use for
+ specifying input of cls.
+ """
+ specs = super().getInputSpecification()
+ specs.description = r"""The \xmlNode{AdaBoostRegressor} is a meta-estimator that begins by fitting a regressor on
+ the original dataset and then fits additional copies of the regressor on the same dataset
+ but where the weights of instances are adjusted according to the error of the current
+ prediction. As such, subsequent regressors focus more on difficult cases.
+ """
+ estimatorInput = InputData.assemblyInputFactory("estimator", contentType=InputTypes.StringType,
+ descr=r"""name of a ROM that can be used as an estimator""", default='no-default')
+ specs.addSub(estimatorInput)
+ specs.addSub(InputData.parameterInputFactory("n_estimators", contentType=InputTypes.IntegerType,
+ descr=r"""The maximum number of estimators at which boosting is
+ terminated. In case of perfect fit, the learning procedure is
+ stopped early.""", default=50))
+ specs.addSub(InputData.parameterInputFactory("learning_rate", contentType=InputTypes.FloatType,
+ descr=r"""Weight applied to each regressor at each boosting iteration.
+ A higher learning rate increases the contribution of each regressor.
+ There is a trade-off between the learning\_rate and n\_estimators
+ parameters.""", default=1.0))
+ specs.addSub(InputData.parameterInputFactory("loss", contentType=InputTypes.makeEnumType("loss", "lossType",['linear', 'square', 'exponential']),
+ descr=r"""The loss function to use when updating the weights after each
+ boosting iteration.""", default='linear'))
+ specs.addSub(InputData.parameterInputFactory("random_state", contentType=InputTypes.IntegerType,
+ descr=r"""Controls the random seed given at each estimator at each
+ boosting iteration.""", default=None))
+ return specs
+
+ def _handleInput(self, paramInput):
+ """
+ Function to handle the common parts of the distribution parameter input.
+ @ In, paramInput, ParameterInput, the already parsed input.
+ @ Out, None
+ """
+ super()._handleInput(paramInput)
+ settings, notFound = paramInput.findNodesAndExtractValues(['n_estimators', 'learning_rate', 'loss', 'random_state'])
+ # notFound must be empty
+ assert(not notFound)
+ self.settings = settings
+
+ def setEstimator(self, estimatorList):
+ """
+ Initialization method
+ @ In, estimatorList, list of ROM instances/estimators used by ROM
+ @ Out, None
+ """
+ super().setEstimator(estimatorList)
+ if len(estimatorList) != 1:
+ self.raiseAWarning('ROM', self.name, 'can only accept one estimator, but multiple estimators are provided!',
+ 'Only the first one will be used, i.e.,', estimator.name)
+ estimator = estimatorList[0]
+ interfaceRom = estimator._interfaceROM
+ if interfaceRom.info['problemtype'] != 'regression':
+ self.raiseAnError(IOError, 'estimator:', estimator.name, 'with problem type', interfaceRom.info['problemtype'],
+ 'can not be used for', self.name)
+ # In sklearn, multioutput wrapper can not be used by outer and inner estimator at the same time
+ # If the outer estimator can handle multioutput, the multioutput wrapper of inner can be kept,
+ # otherwise, we need to remove the wrapper for inner estimator.
+ if interfaceRom.multioutputWrapper:
+ sklEstimator = interfaceRom.model.get_params()['estimator']
+ else:
+ sklEstimator = interfaceRom.model
+ settings = {'base_estimator':sklEstimator}
+ self.settings.update(settings)
+ self.initializeModel(self.settings)
diff --git a/framework/SupervisedLearning/ScikitLearn/Ensemble/BaggingRegressor.py b/framework/SupervisedLearning/ScikitLearn/Ensemble/BaggingRegressor.py
new file mode 100644
index 0000000000..117f0edc4c
--- /dev/null
+++ b/framework/SupervisedLearning/ScikitLearn/Ensemble/BaggingRegressor.py
@@ -0,0 +1,139 @@
+# Copyright 2017 Battelle Energy Alliance, LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""
+ Created on Nov. 22, 2021
+
+ @author: wangc
+ BaggingRegressor
+ A Bagging regressor.
+"""
+#Internal Modules (Lazy Importer)--------------------------------------------------------------------
+#Internal Modules (Lazy Importer) End----------------------------------------------------------------
+
+#External Modules------------------------------------------------------------------------------------
+#External Modules End--------------------------------------------------------------------------------
+
+#Internal Modules------------------------------------------------------------------------------------
+from SupervisedLearning.ScikitLearn import ScikitLearnBase
+from utils import InputData, InputTypes
+#Internal Modules End--------------------------------------------------------------------------------
+
+class BaggingRegressor(ScikitLearnBase):
+ """
+ A Bagging Regressor
+ A Bagging regressor is an ensemble meta-estimator that fits base regressors each on random subsets of the original
+ dataset and then aggregate their individual predictions (either by voting or by averaging) to form a final
+ prediction. Such a meta-estimator can typically be used as a way to reduce the variance of a black-box estimator
+ (e.g., a decision tree), by introducing randomization into its construction procedure and then making an ensemble
+ out of it.
+
+ This algorithm encompasses several works from the literature. When random subsets of the dataset are drawn as
+ random subsets of the samples, then this algorithm is known as Pasting. If samples are drawn with replacement,
+ then the method is known as Bagging. When random subsets of the dataset are drawn as random subsets of the
+ features, then the method is known as Random Subspaces. Finally, when base estimators are built on subsets of
+ both samples and features, then the method is known as Random Patches.
+ """
+ info = {'problemtype':'regression', 'normalize':False}
+
+ def __init__(self):
+ """
+ Constructor that will appropriately initialize a supervised learning object
+ @ In, None
+ @ Out, None
+ """
+ super().__init__()
+ self.multioutputWrapper = True
+ import sklearn
+ import sklearn.ensemble
+ self.model = sklearn.ensemble.BaggingRegressor
+
+ @classmethod
+ def getInputSpecification(cls):
+ """
+ Method to get a reference to a class that specifies the input data for
+ class cls.
+ @ In, cls, the class for which we are retrieving the specification
+ @ Out, inputSpecification, InputData.ParameterInput, class to use for
+ specifying input of cls.
+ """
+ specs = super().getInputSpecification()
+ specs.description = r"""The \xmlNode{BaggingRegressor} is an ensemble meta-estimator that fits base regressors each on random subsets of the original
+ dataset and then aggregate their individual predictions (either by voting or by averaging) to form a final
+ prediction. Such a meta-estimator can typically be used as a way to reduce the variance of a black-box estimator
+ (e.g., a decision tree), by introducing randomization into its construction procedure and then making an ensemble
+ out of it.
+ """
+ estimatorInput = InputData.assemblyInputFactory("estimator", contentType=InputTypes.StringType,
+ descr=r"""name of a ROM that can be used as an estimator""", default='no-default')
+ specs.addSub(estimatorInput)
+ specs.addSub(InputData.parameterInputFactory("n_estimators", contentType=InputTypes.IntegerType,
+ descr=r"""The number of base estimators in the ensemble.""", default=10))
+ specs.addSub(InputData.parameterInputFactory("max_samples", contentType=InputTypes.FloatType,
+ descr=r"""The number of samples to draw from X to train each base estimator""", default=1.0))
+ specs.addSub(InputData.parameterInputFactory("max_features", contentType=InputTypes.FloatType,
+ descr=r"""The number of features to draw from X to train each base estimator """, default=1.0))
+ specs.addSub(InputData.parameterInputFactory("bootstrap", contentType=InputTypes.BoolType,
+ descr=r"""Whether samples are drawn with replacement. If False, sampling without
+ replacement is performed.""", default=True))
+ specs.addSub(InputData.parameterInputFactory("bootstrap_features", contentType=InputTypes.BoolType,
+ descr=r"""Whether features are drawn with replacement.""", default=False))
+ specs.addSub(InputData.parameterInputFactory("oob_score", contentType=InputTypes.BoolType,
+ descr=r"""Whether to use out-of-bag samples to estimate the generalization error.
+ Only available if bootstrap=True.""", default=False))
+ specs.addSub(InputData.parameterInputFactory("warm_start", contentType=InputTypes.BoolType,
+ descr=r"""When set to True, reuse the solution of the previous call to fit and add more
+ estimators to the ensemble, otherwise, just fit a whole new ensemble.""", default=False))
+ specs.addSub(InputData.parameterInputFactory("random_state", contentType=InputTypes.IntegerType,
+ descr=r"""Controls the random resampling of the original dataset (sample wise and feature wise). """,
+ default=None))
+ return specs
+
+ def _handleInput(self, paramInput):
+ """
+ Function to handle the common parts of the distribution parameter input.
+ @ In, paramInput, ParameterInput, the already parsed input.
+ @ Out, None
+ """
+ super()._handleInput(paramInput)
+ settings, notFound = paramInput.findNodesAndExtractValues(['n_estimators', 'max_samples', 'max_features', 'bootstrap', 'bootstrap_features',
+ 'oob_score', 'warm_start', 'random_state'])
+ # notFound must be empty
+ assert(not notFound)
+ self.settings = settings
+
+ def setEstimator(self, estimatorList):
+ """
+ Initialization method
+ @ In, estimatorList, list of ROM instances/estimators used by ROM
+ @ Out, None
+ """
+ super().setEstimator(estimatorList)
+ if len(estimatorList) != 1:
+ self.raiseAWarning('ROM', self.name, 'can only accept one estimator, but multiple estimators are provided!',
+ 'Only the first one will be used, i.e.,', estimator.name)
+ estimator = estimatorList[0]
+ interfaceRom = estimator._interfaceROM
+ if interfaceRom.info['problemtype'] != 'regression':
+ self.raiseAnError(IOError, 'estimator:', estimator.name, 'with problem type', interfaceRom.info['problemtype'],
+ 'can not be used for', self.name)
+ # In sklearn, multioutput wrapper can not be used by outer and inner estimator at the same time
+ # If the outer estimator can handle multioutput, the multioutput wrapper of inner can be kept,
+ # otherwise, we need to remove the wrapper for inner estimator.
+ if interfaceRom.multioutputWrapper:
+ sklEstimator = interfaceRom.model.get_params()['estimator']
+ else:
+ sklEstimator = interfaceRom.model
+ settings = {'base_estimator':sklEstimator}
+ self.settings.update(settings)
+ self.initializeModel(self.settings)
diff --git a/framework/SupervisedLearning/ScikitLearn/Ensemble/StackingRegressor.py b/framework/SupervisedLearning/ScikitLearn/Ensemble/StackingRegressor.py
new file mode 100644
index 0000000000..afb65ab51f
--- /dev/null
+++ b/framework/SupervisedLearning/ScikitLearn/Ensemble/StackingRegressor.py
@@ -0,0 +1,124 @@
+# Copyright 2017 Battelle Energy Alliance, LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""
+ Created on Nov. 22, 2021
+
+ @author: wangc
+ StackingRegressor
+ A Bagging regressor.
+"""
+#Internal Modules (Lazy Importer)--------------------------------------------------------------------
+#Internal Modules (Lazy Importer) End----------------------------------------------------------------
+
+#External Modules------------------------------------------------------------------------------------
+#External Modules End--------------------------------------------------------------------------------
+
+#Internal Modules------------------------------------------------------------------------------------
+from SupervisedLearning.ScikitLearn import ScikitLearnBase
+from utils import InputData, InputTypes
+#Internal Modules End--------------------------------------------------------------------------------
+
+class StackingRegressor(ScikitLearnBase):
+ """
+ Stack of estimators with a final regressor.
+ """
+ info = {'problemtype':'regression', 'normalize':False}
+
+ def __init__(self):
+ """
+ Constructor that will appropriately initialize a supervised learning object
+ @ In, None
+ @ Out, None
+ """
+ super().__init__()
+ self.multioutputWrapper = True
+ import sklearn
+ import sklearn.ensemble
+ # check sklearn version, StackingRegressor is stable in sklearn version >= 0.24
+ version = [int(n) for n in sklearn.__version__.split('.')]
+ if version[0] < 1 and version[1] <= 24:
+ self.raiseAnError(IOError, 'StackingRegressor is not available in current sklearn version', sklearn.__version__,
+ 'Please try to update sklearn version to 0.24 or newer!')
+ self.model = sklearn.ensemble.StackingRegressor
+
+ @classmethod
+ def getInputSpecification(cls):
+ """
+ Method to get a reference to a class that specifies the input data for
+ class cls.
+ @ In, cls, the class for which we are retrieving the specification
+ @ Out, inputSpecification, InputData.ParameterInput, class to use for
+ specifying input of cls.
+ """
+ specs = super().getInputSpecification()
+ specs.description = r"""The \xmlNode{StackingRegressor} consists in stacking the output of individual estimator and
+ use a regressor to compute the final prediction. Stacking allows to use the strength of each
+ individual estimator by using their output as input of a final estimator.
+ """
+ estimatorInput = InputData.assemblyInputFactory("estimator", contentType=InputTypes.StringType,
+ descr=r"""name of a ROM that can be used as an estimator""", default='no-default')
+ specs.addSub(estimatorInput)
+ specs.addSub(InputData.parameterInputFactory("final_estimator", contentType=InputTypes.StringType,
+ descr=r"""The name of estimator which will be used to combine the base estimators.""", default='no-default'))
+ specs.addSub(InputData.parameterInputFactory("cv", contentType=InputTypes.IntegerType,
+ descr=r"""specify the number of folds in a (Stratified) KFold,""", default=5))
+ specs.addSub(InputData.parameterInputFactory("passthrough", contentType=InputTypes.BoolType,
+ descr=r"""When False, only the predictions of estimators will be used as training
+ data for final\_estimator. When True, the final\_estimator is trained on the predictions
+ as well as the original training data.""", default=False))
+ return specs
+
+ def _handleInput(self, paramInput):
+ """
+ Function to handle the common parts of the distribution parameter input.
+ @ In, paramInput, ParameterInput, the already parsed input.
+ @ Out, None
+ """
+ super()._handleInput(paramInput)
+ settings, notFound = paramInput.findNodesAndExtractValues(['final_estimator', 'cv', 'passthrough'])
+ # notFound must be empty
+ assert(not notFound)
+ self.settings = settings
+
+ def setEstimator(self, estimatorList):
+ """
+ Initialization method
+ @ In, estimatorList, list of ROM instances/estimators used by ROM
+ @ Out, None
+ """
+ super().setEstimator(estimatorList)
+ estimators = []
+ foundFinalEstimator = False
+ for estimator in estimatorList:
+ interfaceRom = estimator._interfaceROM
+ if interfaceRom.info['problemtype'] != 'regression':
+ self.raiseAnError(IOError, 'estimator:', estimator.name, 'with problem type', interfaceRom.info['problemtype'],
+ 'can not be used for', self.name)
+ # In sklearn, multioutput wrapper can not be used by outer and inner estimator at the same time
+ # If the outer estimator can handle multioutput, the multioutput wrapper of inner can be kept,
+ # otherwise, we need to remove the wrapper for inner estimator.
+ if interfaceRom.multioutputWrapper:
+ sklEstimator = interfaceRom.model.get_params()['estimator']
+ else:
+ sklEstimator = interfaceRom.model
+ if estimator.name == self.settings['final_estimator']:
+ self.settings['final_estimator'] = sklEstimator
+ foundFinalEstimator = True
+ continue
+ estimators.append((estimator.name, sklEstimator))
+ self.settings['estimators'] = estimators
+ if not foundFinalEstimator:
+ self.raiseAnError(IOError, 'final_estimator:', self.settings['final_estimator'], 'is not found among provdide estimators:',
+ ','.join([name for name,_ in estimators]))
+ self.initializeModel(self.settings)
diff --git a/framework/SupervisedLearning/ScikitLearn/Ensemble/VotingRegressor.py b/framework/SupervisedLearning/ScikitLearn/Ensemble/VotingRegressor.py
new file mode 100644
index 0000000000..3f85ab34fa
--- /dev/null
+++ b/framework/SupervisedLearning/ScikitLearn/Ensemble/VotingRegressor.py
@@ -0,0 +1,109 @@
+# Copyright 2017 Battelle Energy Alliance, LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""
+ Created on Nov. 16, 2021
+
+ @author: wangc
+ VotingRegressor
+ A voting regressor is an ensemble meta-estimator that fits several base regressors
+"""
+#Internal Modules (Lazy Importer)--------------------------------------------------------------------
+#Internal Modules (Lazy Importer) End----------------------------------------------------------------
+
+#External Modules------------------------------------------------------------------------------------
+#External Modules End--------------------------------------------------------------------------------
+
+#Internal Modules------------------------------------------------------------------------------------
+from SupervisedLearning.ScikitLearn import ScikitLearnBase
+from utils import InputData, InputTypes
+#Internal Modules End--------------------------------------------------------------------------------
+
+class VotingRegressor(ScikitLearnBase):
+ """
+ Prediction voting regressor for unfitted estimators.
+
+ A voting regressor is an ensemble meta-estimator that fits several base regressors, each on the whole dataset.
+ Then it averages the individual predictions to form a final predictions.
+ """
+ info = {'problemtype':'regression', 'normalize':False}
+
+ def __init__(self):
+ """
+ Constructor that will appropriately initialize a supervised learning object
+ @ In, None
+ @ Out, None
+ """
+ super().__init__()
+ self.multioutputWrapper = True
+ import sklearn
+ import sklearn.ensemble
+ self.model = sklearn.ensemble.VotingRegressor
+
+ @classmethod
+ def getInputSpecification(cls):
+ """
+ Method to get a reference to a class that specifies the input data for
+ class cls.
+ @ In, cls, the class for which we are retrieving the specification
+ @ Out, inputSpecification, InputData.ParameterInput, class to use for
+ specifying input of cls.
+ """
+ specs = super().getInputSpecification()
+ specs.description = r"""The \xmlNode{VotingRegressor} is an ensemble meta-estimator that fits several base
+ regressors, each on the whole dataset. Then it averages the individual predictions to form
+ a final prediction.
+ """
+ estimatorInput = InputData.assemblyInputFactory("estimator", contentType=InputTypes.StringType,
+ descr=r"""name of a ROM that can be used as an estimator""", default='no-default')
+ specs.addSub(estimatorInput)
+ specs.addSub(InputData.parameterInputFactory("weights", contentType=InputTypes.FloatListType,
+ descr=r"""Sequence of weights (float or int) to weight the occurrences of predicted
+ values before averaging. Uses uniform weights if None.""", default=None))
+ return specs
+
+ def _handleInput(self, paramInput):
+ """
+ Function to handle the common parts of the distribution parameter input.
+ @ In, paramInput, ParameterInput, the already parsed input.
+ @ Out, None
+ """
+ super()._handleInput(paramInput)
+ settings, notFound = paramInput.findNodesAndExtractValues(['weights'])
+ # notFound must be empty
+ assert(not notFound)
+ self.settings = settings
+
+ def setEstimator(self, estimatorList):
+ """
+ Initialization method
+ @ In, estimatorList, list of ROM instances/estimators used by ROM
+ @ Out, None
+ """
+ super().setEstimator(estimatorList)
+ estimators = []
+ for estimator in estimatorList:
+ interfaceRom = estimator._interfaceROM
+ if interfaceRom.info['problemtype'] != 'regression':
+ self.raiseAnError(IOError, 'estimator:', estimator.name, 'with problem type', interfaceRom.info['problemtype'],
+ 'can not be used for', self.name)
+ # In sklearn, multioutput wrapper can not be used by outer and inner estimator at the same time
+ # If the outer estimator can handle multioutput, the multioutput wrapper of inner can be kept,
+ # otherwise, we need to remove the wrapper for inner estimator.
+ if interfaceRom.multioutputWrapper:
+ sklEstimator = interfaceRom.model.get_params()['estimator']
+ else:
+ sklEstimator = interfaceRom.model
+ estimators.append((estimator.name, sklEstimator))
+ self.settings['estimators'] = estimators
+ self.initializeModel(self.settings)
diff --git a/framework/SupervisedLearning/ScikitLearn/Ensemble/__init__.py b/framework/SupervisedLearning/ScikitLearn/Ensemble/__init__.py
new file mode 100644
index 0000000000..5abd07b644
--- /dev/null
+++ b/framework/SupervisedLearning/ScikitLearn/Ensemble/__init__.py
@@ -0,0 +1,17 @@
+# Copyright 2017 Battelle Energy Alliance, LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""
+ The ScikitLearn folder includes different types of Scikitlearn tree based ROMs
+ that are available via RAVEN
+"""
diff --git a/framework/SupervisedLearning/ScikitLearn/GaussianProcess/GaussianProcessClassifier.py b/framework/SupervisedLearning/ScikitLearn/GaussianProcess/GaussianProcessClassifier.py
index 570ca0cf74..df9c2e2154 100644
--- a/framework/SupervisedLearning/ScikitLearn/GaussianProcess/GaussianProcessClassifier.py
+++ b/framework/SupervisedLearning/ScikitLearn/GaussianProcess/GaussianProcessClassifier.py
@@ -46,9 +46,7 @@ def __init__(self):
super().__init__()
import sklearn
import sklearn.gaussian_process
- import sklearn.multioutput
- # we wrap the model with the multi output classifier (for multitarget)
- self.model = sklearn.multioutput.MultiOutputClassifier(sklearn.gaussian_process.GaussianProcessClassifier())
+ self.model = sklearn.gaussian_process.GaussianProcessClassifier
@classmethod
def getInputSpecification(cls):
diff --git a/framework/SupervisedLearning/ScikitLearn/GaussianProcess/GaussianProcessRegressor.py b/framework/SupervisedLearning/ScikitLearn/GaussianProcess/GaussianProcessRegressor.py
index d6ed096b3b..67fcb27178 100644
--- a/framework/SupervisedLearning/ScikitLearn/GaussianProcess/GaussianProcessRegressor.py
+++ b/framework/SupervisedLearning/ScikitLearn/GaussianProcess/GaussianProcessRegressor.py
@@ -46,9 +46,7 @@ def __init__(self):
super().__init__()
import sklearn
import sklearn.gaussian_process
- import sklearn.multioutput
- # we wrap the model with the multi output regressor (for multitarget)
- self.model = sklearn.multioutput.MultiOutputRegressor(sklearn.gaussian_process.GaussianProcessRegressor())
+ self.model = sklearn.gaussian_process.GaussianProcessRegressor
@classmethod
def getInputSpecification(cls):
diff --git a/framework/SupervisedLearning/ScikitLearn/LinearModel/ARDRegression.py b/framework/SupervisedLearning/ScikitLearn/LinearModel/ARDRegression.py
index 6db1793202..5085c96859 100644
--- a/framework/SupervisedLearning/ScikitLearn/LinearModel/ARDRegression.py
+++ b/framework/SupervisedLearning/ScikitLearn/LinearModel/ARDRegression.py
@@ -44,9 +44,7 @@ def __init__(self):
super().__init__()
import sklearn
import sklearn.linear_model
- import sklearn.multioutput
- # we wrap the model with the multi output regressor (for multitarget)
- self.model = sklearn.multioutput.MultiOutputRegressor(sklearn.linear_model.ARDRegression())
+ self.model = sklearn.linear_model.ARDRegression
@classmethod
def getInputSpecification(cls):
diff --git a/framework/SupervisedLearning/ScikitLearn/LinearModel/BayesianRidge.py b/framework/SupervisedLearning/ScikitLearn/LinearModel/BayesianRidge.py
index 13a2bbab32..1aed3652ed 100644
--- a/framework/SupervisedLearning/ScikitLearn/LinearModel/BayesianRidge.py
+++ b/framework/SupervisedLearning/ScikitLearn/LinearModel/BayesianRidge.py
@@ -44,9 +44,7 @@ def __init__(self):
super().__init__()
import sklearn
import sklearn.linear_model
- import sklearn.multioutput
- # we wrap the model with the multi output regressor (for multitarget)
- self.model = sklearn.multioutput.MultiOutputRegressor(sklearn.linear_model.BayesianRidge())
+ self.model = sklearn.linear_model.BayesianRidge
@classmethod
def getInputSpecification(cls):
@@ -84,17 +82,15 @@ class cls.
specs.addSub(InputData.parameterInputFactory("lambda_2", contentType=InputTypes.FloatType,
descr=r"""Hyper-parameter : inverse scale parameter (rate parameter) for
the Gamma distribution prior over the lambda parameter.""", default=1e-6))
- specs.addSub(InputData.parameterInputFactory("alpha_init", contentType=InputTypes.FloatType,
- descr=r"""Initial value for alpha (precision of the noise).
- If not set, alpha_init is $1/Var(y)$.""", default=None))
- specs.addSub(InputData.parameterInputFactory("lambda_init", contentType=InputTypes.FloatType,
- descr=r"""Initial value for lambda (precision of the weights).""", default='1.'))
+ # new in sklearn version 0.22
+ # specs.addSub(InputData.parameterInputFactory("alpha_init", contentType=InputTypes.FloatType,
+ # descr=r"""Initial value for alpha (precision of the noise).
+ # If not set, alpha_init is $1/Var(y)$.""", default=None))
+ # specs.addSub(InputData.parameterInputFactory("lambda_init", contentType=InputTypes.FloatType,
+ # descr=r"""Initial value for lambda (precision of the weights).""", default='1.'))
specs.addSub(InputData.parameterInputFactory("compute_score", contentType=InputTypes.BoolType,
descr=r"""If True, compute the objective function at each step of the
model.""", default=False))
- specs.addSub(InputData.parameterInputFactory("threshold_lambda", contentType=InputTypes.FloatType,
- descr=r"""threshold for removing (pruning) weights with
- shigh precision from the computation..""", default=10000))
specs.addSub(InputData.parameterInputFactory("fit_intercept", contentType=InputTypes.BoolType,
descr=r"""Whether to calculate the intercept for this model. Specifies if a constant (a.k.a. bias or intercept)
should be added to the decision function.""", default=True))
@@ -114,8 +110,8 @@ def _handleInput(self, paramInput):
"""
super()._handleInput(paramInput)
settings, notFound = paramInput.findNodesAndExtractValues(['tol', 'alpha_1','alpha_2','lambda_1','lambda_2',
- 'compute_score', 'threshold_lambda', 'fit_intercept',
- 'n_iter', 'normalize','alpha_init','lambda_init', 'verbose'])
+ 'compute_score', 'fit_intercept',
+ 'n_iter', 'normalize', 'verbose'])
# notFound must be empty
assert(not notFound)
self.initializeModel(settings)
diff --git a/framework/SupervisedLearning/ScikitLearn/LinearModel/ElasticNet.py b/framework/SupervisedLearning/ScikitLearn/LinearModel/ElasticNet.py
index 0f5042e1ff..e74b0d60c3 100644
--- a/framework/SupervisedLearning/ScikitLearn/LinearModel/ElasticNet.py
+++ b/framework/SupervisedLearning/ScikitLearn/LinearModel/ElasticNet.py
@@ -44,9 +44,7 @@ def __init__(self):
super().__init__()
import sklearn
import sklearn.linear_model
- import sklearn.multioutput
- # we wrap the model with the multi output regressor (for multitarget)
- self.model = sklearn.multioutput.MultiOutputRegressor(sklearn.linear_model.ElasticNet())
+ self.model = sklearn.linear_model.ElasticNet
@classmethod
def getInputSpecification(cls):
diff --git a/framework/SupervisedLearning/ScikitLearn/LinearModel/ElasticNetCV.py b/framework/SupervisedLearning/ScikitLearn/LinearModel/ElasticNetCV.py
index ebb7a97329..d2155bf217 100644
--- a/framework/SupervisedLearning/ScikitLearn/LinearModel/ElasticNetCV.py
+++ b/framework/SupervisedLearning/ScikitLearn/LinearModel/ElasticNetCV.py
@@ -44,9 +44,7 @@ def __init__(self):
super().__init__()
import sklearn
import sklearn.linear_model
- import sklearn.multioutput
- # we wrap the model with the multi output regressor (for multitarget)
- self.model = sklearn.multioutput.MultiOutputRegressor(sklearn.linear_model.ElasticNetCV())
+ self.model = sklearn.linear_model.ElasticNetCV
@classmethod
def getInputSpecification(cls):
diff --git a/framework/SupervisedLearning/ScikitLearn/LinearModel/Lars.py b/framework/SupervisedLearning/ScikitLearn/LinearModel/Lars.py
index 336b4d2092..cebf7b1011 100644
--- a/framework/SupervisedLearning/ScikitLearn/LinearModel/Lars.py
+++ b/framework/SupervisedLearning/ScikitLearn/LinearModel/Lars.py
@@ -45,9 +45,7 @@ def __init__(self):
super().__init__()
import sklearn
import sklearn.linear_model
- import sklearn.multioutput
- # we wrap the model with the multi output regressor (for multitarget)
- self.model = sklearn.multioutput.MultiOutputRegressor(sklearn.linear_model.Lars())
+ self.model = sklearn.linear_model.Lars
@classmethod
def getInputSpecification(cls):
@@ -83,10 +81,11 @@ class cls.
dividing by the l2-norm.""", default=True))
specs.addSub(InputData.parameterInputFactory("n_nonzero_coefs", contentType=InputTypes.IntegerType,
descr=r"""Target number of non-zero coefficients.""", default=500))
- specs.addSub(InputData.parameterInputFactory("jitter", contentType=InputTypes.FloatType,
- descr=r"""Upper bound on a uniform noise parameter to be added to the
- y values, to satisfy the model’s assumption of one-at-a-time computations.
- Might help with stability.""", default=None))
+ # new in sklearn version 0.23
+ # specs.addSub(InputData.parameterInputFactory("jitter", contentType=InputTypes.FloatType,
+ # descr=r"""Upper bound on a uniform noise parameter to be added to the
+ # y values, to satisfy the model’s assumption of one-at-a-time computations.
+ # Might help with stability.""", default=None))
specs.addSub(InputData.parameterInputFactory("verbose", contentType=InputTypes.BoolType,
descr=r"""Sets the verbosity amount.""", default=False))
specs.addSub(InputData.parameterInputFactory("fit_path", contentType=InputTypes.BoolType,
@@ -104,7 +103,7 @@ def _handleInput(self, paramInput):
"""
super()._handleInput(paramInput)
settings, notFound = paramInput.findNodesAndExtractValues(['eps','precompute', 'fit_intercept',
- 'normalize','n_nonzero_coefs','jitter', 'verbose',
+ 'normalize','n_nonzero_coefs', 'verbose',
'fit_path'])
# notFound must be empty
assert(not notFound)
diff --git a/framework/SupervisedLearning/ScikitLearn/LinearModel/LarsCV.py b/framework/SupervisedLearning/ScikitLearn/LinearModel/LarsCV.py
index 3fbfdcb195..88d5edb657 100644
--- a/framework/SupervisedLearning/ScikitLearn/LinearModel/LarsCV.py
+++ b/framework/SupervisedLearning/ScikitLearn/LinearModel/LarsCV.py
@@ -45,9 +45,7 @@ def __init__(self):
super().__init__()
import sklearn
import sklearn.linear_model
- import sklearn.multioutput
- # we wrap the model with the multi output regressor (for multitarget)
- self.model = sklearn.multioutput.MultiOutputRegressor(sklearn.linear_model.LarsCV())
+ self.model = sklearn.linear_model.LarsCV
@classmethod
def getInputSpecification(cls):
diff --git a/framework/SupervisedLearning/ScikitLearn/LinearModel/Lasso.py b/framework/SupervisedLearning/ScikitLearn/LinearModel/Lasso.py
index b5190af2fb..938131d1b0 100644
--- a/framework/SupervisedLearning/ScikitLearn/LinearModel/Lasso.py
+++ b/framework/SupervisedLearning/ScikitLearn/LinearModel/Lasso.py
@@ -44,9 +44,7 @@ def __init__(self):
super().__init__()
import sklearn
import sklearn.linear_model
- import sklearn.multioutput
- # we wrap the model with the multi output regressor (for multitarget)
- self.model = sklearn.multioutput.MultiOutputRegressor(sklearn.linear_model.Lasso())
+ self.model = sklearn.linear_model.Lasso
@classmethod
def getInputSpecification(cls):
diff --git a/framework/SupervisedLearning/ScikitLearn/LinearModel/LassoCV.py b/framework/SupervisedLearning/ScikitLearn/LinearModel/LassoCV.py
index 18556d65fe..ce93924b2d 100644
--- a/framework/SupervisedLearning/ScikitLearn/LinearModel/LassoCV.py
+++ b/framework/SupervisedLearning/ScikitLearn/LinearModel/LassoCV.py
@@ -44,9 +44,7 @@ def __init__(self):
super().__init__()
import sklearn
import sklearn.linear_model
- import sklearn.multioutput
- # we wrap the model with the multi output regressor (for multitarget)
- self.model = sklearn.multioutput.MultiOutputRegressor(sklearn.linear_model.LassoCV())
+ self.model = sklearn.linear_model.LassoCV
@classmethod
def getInputSpecification(cls):
diff --git a/framework/SupervisedLearning/ScikitLearn/LinearModel/LassoLars.py b/framework/SupervisedLearning/ScikitLearn/LinearModel/LassoLars.py
index 2d430dee1c..7a91af179a 100644
--- a/framework/SupervisedLearning/ScikitLearn/LinearModel/LassoLars.py
+++ b/framework/SupervisedLearning/ScikitLearn/LinearModel/LassoLars.py
@@ -45,9 +45,7 @@ def __init__(self):
super().__init__()
import sklearn
import sklearn.linear_model
- import sklearn.multioutput
- # we wrap the model with the multi output regressor (for multitarget)
- self.model = sklearn.multioutput.MultiOutputRegressor(sklearn.linear_model.LassoLars())
+ self.model = sklearn.linear_model.LassoLars
@classmethod
def getInputSpecification(cls):
@@ -91,10 +89,11 @@ class cls.
control the tolerance of the optimization.""", default=finfo(float).eps))
specs.addSub(InputData.parameterInputFactory("positive", contentType=InputTypes.BoolType,
descr=r"""When set to True, forces the coefficients to be positive.""", default=False))
- specs.addSub(InputData.parameterInputFactory("jitter", contentType=InputTypes.FloatType,
- descr=r"""Upper bound on a uniform noise parameter to be added to the y values,
- to satisfy the model’s assumption of one-at-a-time computations. Might help
- with stability.""", default=None))
+ # New in sklearn version 0.23
+ # specs.addSub(InputData.parameterInputFactory("jitter", contentType=InputTypes.FloatType,
+ # descr=r"""Upper bound on a uniform noise parameter to be added to the y values,
+ # to satisfy the model’s assumption of one-at-a-time computations. Might help
+ # with stability.""", default=None))
specs.addSub(InputData.parameterInputFactory("verbose", contentType=InputTypes.BoolType,
descr=r"""Amount of verbosity.""", default=False))
return specs
@@ -107,7 +106,7 @@ def _handleInput(self, paramInput):
"""
super()._handleInput(paramInput)
settings, notFound = paramInput.findNodesAndExtractValues(['alpha','fit_intercept', 'normalize', 'precompute',
- 'max_iter','eps','positive','jitter', 'verbose'])
+ 'max_iter','eps','positive', 'verbose'])
# notFound must be empty
assert(not notFound)
self.initializeModel(settings)
diff --git a/framework/SupervisedLearning/ScikitLearn/LinearModel/LassoLarsCV.py b/framework/SupervisedLearning/ScikitLearn/LinearModel/LassoLarsCV.py
index 10f7d25524..c07188b874 100644
--- a/framework/SupervisedLearning/ScikitLearn/LinearModel/LassoLarsCV.py
+++ b/framework/SupervisedLearning/ScikitLearn/LinearModel/LassoLarsCV.py
@@ -45,9 +45,7 @@ def __init__(self):
super().__init__()
import sklearn
import sklearn.linear_model
- import sklearn.multioutput
- # we wrap the model with the multi output regressor (for multitarget)
- self.model = sklearn.multioutput.MultiOutputRegressor(sklearn.linear_model.LassoLarsCV())
+ self.model = sklearn.linear_model.LassoLarsCV
@classmethod
def getInputSpecification(cls):
diff --git a/framework/SupervisedLearning/ScikitLearn/LinearModel/LassoLarsIC.py b/framework/SupervisedLearning/ScikitLearn/LinearModel/LassoLarsIC.py
index 863e6327c9..843f5e16de 100644
--- a/framework/SupervisedLearning/ScikitLearn/LinearModel/LassoLarsIC.py
+++ b/framework/SupervisedLearning/ScikitLearn/LinearModel/LassoLarsIC.py
@@ -45,9 +45,7 @@ def __init__(self):
super().__init__()
import sklearn
import sklearn.linear_model
- import sklearn.multioutput
- # we wrap the model with the multi output regressor (for multitarget)
- self.model = sklearn.multioutput.MultiOutputRegressor(sklearn.linear_model.LassoLarsIC())
+ self.model = sklearn.linear_model.LassoLarsIC
@classmethod
def getInputSpecification(cls):
diff --git a/framework/SupervisedLearning/ScikitLearn/LinearModel/LinearRegression.py b/framework/SupervisedLearning/ScikitLearn/LinearModel/LinearRegression.py
index c1b872dc45..578842980c 100644
--- a/framework/SupervisedLearning/ScikitLearn/LinearModel/LinearRegression.py
+++ b/framework/SupervisedLearning/ScikitLearn/LinearModel/LinearRegression.py
@@ -44,9 +44,7 @@ def __init__(self):
super().__init__()
import sklearn
import sklearn.linear_model
- import sklearn.multioutput
- # we wrap the model with the multi output regressor (for multitarget)
- self.model = sklearn.multioutput.MultiOutputRegressor(sklearn.linear_model.LinearRegression())
+ self.model = sklearn.linear_model.LinearRegression
@classmethod
def getInputSpecification(cls):
@@ -72,8 +70,9 @@ class cls.
descr=r"""This parameter is ignored when fit_intercept is set to False. If True,
the regressors X will be normalized before regression by subtracting the mean and
dividing by the l2-norm.""", default=False))
- specs.addSub(InputData.parameterInputFactory("positive", contentType=InputTypes.BoolType,
- descr=r"""When set to True, forces the coefficients to be positive.""", default=False))
+ # New in sklearn version 0.24
+ # specs.addSub(InputData.parameterInputFactory("positive", contentType=InputTypes.BoolType,
+ # descr=r"""When set to True, forces the coefficients to be positive.""", default=False))
return specs
def _handleInput(self, paramInput):
@@ -83,7 +82,7 @@ def _handleInput(self, paramInput):
@ Out, None
"""
super()._handleInput(paramInput)
- settings, notFound = paramInput.findNodesAndExtractValues(['fit_intercept','normalize','positive'])
+ settings, notFound = paramInput.findNodesAndExtractValues(['fit_intercept','normalize'])
# notFound must be empty
assert(not notFound)
self.initializeModel(settings)
diff --git a/framework/SupervisedLearning/ScikitLearn/LinearModel/LogisticRegression.py b/framework/SupervisedLearning/ScikitLearn/LinearModel/LogisticRegression.py
index 38500b5e07..ed47f71719 100644
--- a/framework/SupervisedLearning/ScikitLearn/LinearModel/LogisticRegression.py
+++ b/framework/SupervisedLearning/ScikitLearn/LinearModel/LogisticRegression.py
@@ -44,9 +44,7 @@ def __init__(self):
super().__init__()
import sklearn
import sklearn.linear_model
- import sklearn.multioutput
- # we wrap the model with the multi output regressor (for multitarget)
- self.model = sklearn.multioutput.MultiOutputRegressor(sklearn.linear_model.LogisticRegression())
+ self.model = sklearn.linear_model.LogisticRegression
@classmethod
def getInputSpecification(cls):
diff --git a/framework/SupervisedLearning/ScikitLearn/LinearModel/MultiTaskElasticNet.py b/framework/SupervisedLearning/ScikitLearn/LinearModel/MultiTaskElasticNet.py
index 87d9a37952..5f8eecf333 100644
--- a/framework/SupervisedLearning/ScikitLearn/LinearModel/MultiTaskElasticNet.py
+++ b/framework/SupervisedLearning/ScikitLearn/LinearModel/MultiTaskElasticNet.py
@@ -45,8 +45,7 @@ def __init__(self):
self.multioutputWrapper = False
import sklearn
import sklearn.linear_model
- import sklearn.multioutput
- self.model = sklearn.linear_model.MultiTaskElasticNet()
+ self.model = sklearn.linear_model.MultiTaskElasticNet
@classmethod
def getInputSpecification(cls):
diff --git a/framework/SupervisedLearning/ScikitLearn/LinearModel/MultiTaskElasticNetCV.py b/framework/SupervisedLearning/ScikitLearn/LinearModel/MultiTaskElasticNetCV.py
index 313abbc081..bf43157575 100644
--- a/framework/SupervisedLearning/ScikitLearn/LinearModel/MultiTaskElasticNetCV.py
+++ b/framework/SupervisedLearning/ScikitLearn/LinearModel/MultiTaskElasticNetCV.py
@@ -44,9 +44,7 @@ def __init__(self):
super().__init__()
import sklearn
import sklearn.linear_model
- import sklearn.multioutput
- # we wrap the model with the multi output regressor (for multitarget)
- self.model = sklearn.multioutput.MultiOutputRegressor(sklearn.linear_model.MultiTaskElasticNetCV())
+ self.model = sklearn.linear_model.MultiTaskElasticNetCV
@classmethod
def getInputSpecification(cls):
diff --git a/framework/SupervisedLearning/ScikitLearn/LinearModel/MultiTaskLasso.py b/framework/SupervisedLearning/ScikitLearn/LinearModel/MultiTaskLasso.py
index 2f32712376..96d415226e 100644
--- a/framework/SupervisedLearning/ScikitLearn/LinearModel/MultiTaskLasso.py
+++ b/framework/SupervisedLearning/ScikitLearn/LinearModel/MultiTaskLasso.py
@@ -42,10 +42,10 @@ def __init__(self):
@ Out, None
"""
super().__init__()
+ self.multioutputWrapper = False
import sklearn
import sklearn.linear_model
- import sklearn.multioutput
- self.model = sklearn.linear_model.MultiTaskLasso()
+ self.model = sklearn.linear_model.MultiTaskLasso
@classmethod
def getInputSpecification(cls):
diff --git a/framework/SupervisedLearning/ScikitLearn/LinearModel/MultiTaskLassoCV.py b/framework/SupervisedLearning/ScikitLearn/LinearModel/MultiTaskLassoCV.py
index ea92d93360..1fe1287d5f 100644
--- a/framework/SupervisedLearning/ScikitLearn/LinearModel/MultiTaskLassoCV.py
+++ b/framework/SupervisedLearning/ScikitLearn/LinearModel/MultiTaskLassoCV.py
@@ -44,9 +44,7 @@ def __init__(self):
super().__init__()
import sklearn
import sklearn.linear_model
- import sklearn.multioutput
- # we wrap the model with the multi output regressor (for multitarget)
- self.model = sklearn.multioutput.MultiOutputRegressor(sklearn.linear_model.MultiTaskLassoCV())
+ self.model = sklearn.linear_model.MultiTaskLassoCV
@classmethod
def getInputSpecification(cls):
diff --git a/framework/SupervisedLearning/ScikitLearn/LinearModel/OrthogonalMatchingPursuit.py b/framework/SupervisedLearning/ScikitLearn/LinearModel/OrthogonalMatchingPursuit.py
index 1107bf43a6..f9cd6bcad6 100644
--- a/framework/SupervisedLearning/ScikitLearn/LinearModel/OrthogonalMatchingPursuit.py
+++ b/framework/SupervisedLearning/ScikitLearn/LinearModel/OrthogonalMatchingPursuit.py
@@ -44,9 +44,7 @@ def __init__(self):
super().__init__()
import sklearn
import sklearn.linear_model
- import sklearn.multioutput
- # we wrap the model with the multi output regressor (for multitarget)
- self.model = sklearn.multioutput.MultiOutputRegressor(sklearn.linear_model.OrthogonalMatchingPursuit())
+ self.model = sklearn.linear_model.OrthogonalMatchingPursuit
@classmethod
def getInputSpecification(cls):
diff --git a/framework/SupervisedLearning/ScikitLearn/LinearModel/OrthogonalMatchingPursuitCV.py b/framework/SupervisedLearning/ScikitLearn/LinearModel/OrthogonalMatchingPursuitCV.py
index 4193a253f7..1b5e05e2fd 100644
--- a/framework/SupervisedLearning/ScikitLearn/LinearModel/OrthogonalMatchingPursuitCV.py
+++ b/framework/SupervisedLearning/ScikitLearn/LinearModel/OrthogonalMatchingPursuitCV.py
@@ -44,9 +44,7 @@ def __init__(self):
super().__init__()
import sklearn
import sklearn.linear_model
- import sklearn.multioutput
- # we wrap the model with the multi output regressor (for multitarget)
- self.model = sklearn.multioutput.MultiOutputRegressor(sklearn.linear_model.OrthogonalMatchingPursuitCV())
+ self.model = sklearn.linear_model.OrthogonalMatchingPursuitCV
@classmethod
def getInputSpecification(cls):
diff --git a/framework/SupervisedLearning/ScikitLearn/LinearModel/PassiveAggressiveClassifier.py b/framework/SupervisedLearning/ScikitLearn/LinearModel/PassiveAggressiveClassifier.py
index 1bb4b5cc67..b85da00ca4 100644
--- a/framework/SupervisedLearning/ScikitLearn/LinearModel/PassiveAggressiveClassifier.py
+++ b/framework/SupervisedLearning/ScikitLearn/LinearModel/PassiveAggressiveClassifier.py
@@ -44,9 +44,7 @@ def __init__(self):
super().__init__()
import sklearn
import sklearn.linear_model
- import sklearn.multioutput
- # we wrap the model with the multi output regressor (for multitarget)
- self.model = sklearn.multioutput.MultiOutputClassifier(sklearn.linear_model.PassiveAggressiveClassifier())
+ self.model = sklearn.linear_model.PassiveAggressiveClassifier
@classmethod
def getInputSpecification(cls):
diff --git a/framework/SupervisedLearning/ScikitLearn/LinearModel/PassiveAggressiveRegressor.py b/framework/SupervisedLearning/ScikitLearn/LinearModel/PassiveAggressiveRegressor.py
index e5e2e2a1db..9cc57933a0 100644
--- a/framework/SupervisedLearning/ScikitLearn/LinearModel/PassiveAggressiveRegressor.py
+++ b/framework/SupervisedLearning/ScikitLearn/LinearModel/PassiveAggressiveRegressor.py
@@ -44,9 +44,7 @@ def __init__(self):
super().__init__()
import sklearn
import sklearn.linear_model
- import sklearn.multioutput
- # we wrap the model with the multi output regressor (for multitarget)
- self.model = sklearn.multioutput.MultiOutputRegressor(sklearn.linear_model.PassiveAggressiveRegressor())
+ self.model = sklearn.linear_model.PassiveAggressiveRegressor
@classmethod
def getInputSpecification(cls):
diff --git a/framework/SupervisedLearning/ScikitLearn/LinearModel/Perceptron.py b/framework/SupervisedLearning/ScikitLearn/LinearModel/Perceptron.py
index 85d2252d53..4baf776db4 100644
--- a/framework/SupervisedLearning/ScikitLearn/LinearModel/Perceptron.py
+++ b/framework/SupervisedLearning/ScikitLearn/LinearModel/Perceptron.py
@@ -44,9 +44,7 @@ def __init__(self):
super().__init__()
import sklearn
import sklearn.linear_model
- import sklearn.multioutput
- # we wrap the model with the multi output regressor (for multitarget)
- self.model = sklearn.multioutput.MultiOutputClassifier(sklearn.linear_model.Perceptron())
+ self.model = sklearn.linear_model.Perceptron
@classmethod
def getInputSpecification(cls):
@@ -72,9 +70,10 @@ class cls.
descr=r"""The penalty (aka regularization term) to be used.""", default=None))
specs.addSub(InputData.parameterInputFactory("alpha", contentType=InputTypes.FloatType,
descr=r"""Constant that multiplies the regularization term if regularization is used.""", default=0.0001))
- specs.addSub(InputData.parameterInputFactory("l1_ratio", contentType=InputTypes.FloatType,
- descr=r"""The Elastic Net mixing parameter, with $0 <= l1_ratio <= 1.$ $l1_ratio=0$ corresponds to L2 penalty,
- $l1_ratio=1$ to L1. Only used if penalty='elasticnet'.""", default=0.15))
+ # new in sklearn version 0.24
+ # specs.addSub(InputData.parameterInputFactory("l1_ratio", contentType=InputTypes.FloatType,
+ # descr=r"""The Elastic Net mixing parameter, with $0 <= l1_ratio <= 1.$ $l1_ratio=0$ corresponds to L2 penalty,
+ # $l1_ratio=1$ to L1. Only used if penalty='elasticnet'.""", default=0.15))
specs.addSub(InputData.parameterInputFactory("fit_intercept", contentType=InputTypes.BoolType,
descr=r"""Whether the intercept should be estimated or not. If False,
the data is assumed to be already centered.""", default=True))
@@ -118,7 +117,7 @@ def _handleInput(self, paramInput):
@ Out, None
"""
super()._handleInput(paramInput)
- settings, notFound = paramInput.findNodesAndExtractValues(['penalty','alpha','l1_ratio','early_stopping',
+ settings, notFound = paramInput.findNodesAndExtractValues(['penalty','alpha', 'early_stopping',
'fit_intercept','max_iter','tol','validation_fraction',
'n_iter_no_change','shuffle','eta0', 'class_weight',
'random_state', 'verbose', 'warm_start'])
diff --git a/framework/SupervisedLearning/ScikitLearn/LinearModel/Ridge.py b/framework/SupervisedLearning/ScikitLearn/LinearModel/Ridge.py
index c710414d66..ad782bebfb 100644
--- a/framework/SupervisedLearning/ScikitLearn/LinearModel/Ridge.py
+++ b/framework/SupervisedLearning/ScikitLearn/LinearModel/Ridge.py
@@ -46,7 +46,7 @@ def __init__(self):
self.multioutputWrapper = False
import sklearn
import sklearn.linear_model
- self.model = sklearn.linear_model.Ridge()
+ self.model = sklearn.linear_model.Ridge
@classmethod
def getInputSpecification(cls):
diff --git a/framework/SupervisedLearning/ScikitLearn/LinearModel/RidgeCV.py b/framework/SupervisedLearning/ScikitLearn/LinearModel/RidgeCV.py
index b8b006a345..56de56858f 100644
--- a/framework/SupervisedLearning/ScikitLearn/LinearModel/RidgeCV.py
+++ b/framework/SupervisedLearning/ScikitLearn/LinearModel/RidgeCV.py
@@ -45,7 +45,7 @@ def __init__(self):
self.multioutputWrapper = False
import sklearn
import sklearn.linear_model
- self.model = sklearn.linear_model.RidgeCV()
+ self.model = sklearn.linear_model.RidgeCV
@classmethod
def getInputSpecification(cls):
@@ -89,11 +89,11 @@ class cls.
specs.addSub(InputData.parameterInputFactory("cv", contentType=InputTypes.IntegerType,
descr=r"""Determines the cross-validation splitting strategy.
It specifies the number of folds..""", default=None))
- specs.addSub(InputData.parameterInputFactory("alphas", contentType=InputTypes.FloatListType,
+ specs.addSub(InputData.parameterInputFactory("alphas", contentType=InputTypes.FloatTupleType,
descr=r"""Array of alpha values to try. Regularization strength; must be a positive float. Regularization
improves the conditioning of the problem and reduces the variance of the estimates.
Larger values specify stronger regularization. Alpha corresponds to $1 / (2C)$ in other
- linear models such as LogisticRegression or LinearSVC.""", default=[0.1, 1.0, 10.0]))
+ linear models such as LogisticRegression or LinearSVC.""", default=(0.1, 1.0, 10.0)))
specs.addSub(InputData.parameterInputFactory("scoring", contentType=InputTypes.StringType,
descr=r"""A string (see model evaluation documentation) or a scorer
callable object / function with signature.""", default=None))
diff --git a/framework/SupervisedLearning/ScikitLearn/LinearModel/RidgeClassifier.py b/framework/SupervisedLearning/ScikitLearn/LinearModel/RidgeClassifier.py
index f5e5e380ec..45643e7023 100644
--- a/framework/SupervisedLearning/ScikitLearn/LinearModel/RidgeClassifier.py
+++ b/framework/SupervisedLearning/ScikitLearn/LinearModel/RidgeClassifier.py
@@ -45,9 +45,7 @@ def __init__(self):
super().__init__()
import sklearn
import sklearn.linear_model
- import sklearn.multioutput
- # we wrap the model with the multi output classifier (for multitarget)
- self.model = sklearn.multioutput.MultiOutputClassifier(sklearn.linear_model.RidgeClassifier())
+ self.model = sklearn.linear_model.RidgeClassifier
@classmethod
def getInputSpecification(cls):
diff --git a/framework/SupervisedLearning/ScikitLearn/LinearModel/RidgeClassifierCV.py b/framework/SupervisedLearning/ScikitLearn/LinearModel/RidgeClassifierCV.py
index 61e08596e7..70c3a3b4f4 100644
--- a/framework/SupervisedLearning/ScikitLearn/LinearModel/RidgeClassifierCV.py
+++ b/framework/SupervisedLearning/ScikitLearn/LinearModel/RidgeClassifierCV.py
@@ -44,9 +44,7 @@ def __init__(self):
super().__init__()
import sklearn
import sklearn.linear_model
- import sklearn.multioutput
- # we wrap the model with the multi output classifier (for multitarget)
- self.model = sklearn.multioutput.MultiOutputClassifier(sklearn.linear_model.RidgeClassifierCV())
+ self.model = sklearn.linear_model.RidgeClassifierCV
@classmethod
def getInputSpecification(cls):
@@ -101,7 +99,7 @@ def _handleInput(self, paramInput):
@ Out, None
"""
super()._handleInput(paramInput)
- settings, notFound = paramInput.findNodesAndExtractValues(['normalize','fit_intercept','cv', 'alphas'
+ settings, notFound = paramInput.findNodesAndExtractValues(['normalize','fit_intercept','cv', 'alphas',
'scoring', 'class_weight', 'store_cv_values'])
# notFound must be empty
assert(not notFound)
diff --git a/framework/SupervisedLearning/ScikitLearn/LinearModel/SGDClassifier.py b/framework/SupervisedLearning/ScikitLearn/LinearModel/SGDClassifier.py
index 997ad86ae7..dc4a34e827 100644
--- a/framework/SupervisedLearning/ScikitLearn/LinearModel/SGDClassifier.py
+++ b/framework/SupervisedLearning/ScikitLearn/LinearModel/SGDClassifier.py
@@ -44,9 +44,7 @@ def __init__(self):
super().__init__()
import sklearn
import sklearn.linear_model
- import sklearn.multioutput
- # we wrap the model with the multi output classifier (for multitarget)
- self.model = sklearn.multioutput.MultiOutputClassifier(sklearn.linear_model.SGDClassifier())
+ self.model = sklearn.linear_model.SGDClassifier
@classmethod
def getInputSpecification(cls):
diff --git a/framework/SupervisedLearning/ScikitLearn/LinearModel/SGDRegressor.py b/framework/SupervisedLearning/ScikitLearn/LinearModel/SGDRegressor.py
index 23316f22db..b8a86b4a65 100644
--- a/framework/SupervisedLearning/ScikitLearn/LinearModel/SGDRegressor.py
+++ b/framework/SupervisedLearning/ScikitLearn/LinearModel/SGDRegressor.py
@@ -44,9 +44,7 @@ def __init__(self):
super().__init__()
import sklearn
import sklearn.linear_model
- import sklearn.multioutput
- # we wrap the model with the multi output classifier (for multitarget)
- self.model = sklearn.multioutput.MultiOutputRegressor(sklearn.linear_model.SGDRegressor())
+ self.model = sklearn.linear_model.SGDRegressor
@classmethod
def getInputSpecification(cls):
diff --git a/framework/SupervisedLearning/ScikitLearn/MultiClass/OneVsOneClassifier.py b/framework/SupervisedLearning/ScikitLearn/MultiClass/OneVsOneClassifier.py
index 9302961acf..e6dff5b6bf 100644
--- a/framework/SupervisedLearning/ScikitLearn/MultiClass/OneVsOneClassifier.py
+++ b/framework/SupervisedLearning/ScikitLearn/MultiClass/OneVsOneClassifier.py
@@ -32,7 +32,7 @@ class OneVsOneClassifier(ScikitLearnBase):
"""
One-vs-one multiclass strategy classifer
"""
- info = {'problemtype':'classifer', 'normalize':False}
+ info = {'problemtype':'classification', 'normalize':False}
def __init__(self):
"""
@@ -43,10 +43,7 @@ def __init__(self):
super().__init__()
import sklearn
import sklearn.multiclass
- import sklearn.multioutput
- from sklearn.svm import LinearSVC
- # we wrap the model with the multi output regressor (for multitarget)
- self.model = sklearn.multioutput.MultiOutputClassifier(sklearn.multiclass.OneVsOneClassifier(LinearSVC()))
+ self.model = sklearn.multiclass.OneVsOneClassifier
@classmethod
def getInputSpecification(cls):
@@ -84,14 +81,18 @@ def _handleInput(self, paramInput):
settings, notFound = paramInput.findNodesAndExtractValues(['n_jobs'])
# notFound must be empty
assert(not notFound)
- self.initializeModel(settings)
+ self.settings = settings
- def setEstimator(self, estimator):
+ def setEstimator(self, estimatorList):
"""
Initialization method
- @ In, estimator, ROM instance, estimator used by ROM
+ @ In, estimatorList, list of ROM instances/estimators used by ROM
@ Out, None
"""
+ if len(estimatorList) != 1:
+ self.raiseAWarning('ROM', self.name, 'can only accept one estimator, but multiple estimators are provided!',
+ 'Only the first one will be used, i.e.,', estimator.name)
+ estimator = estimatorList[0]
if estimator._interfaceROM.multioutputWrapper:
sklEstimator = estimator._interfaceROM.model.get_params()['estimator']
else:
@@ -103,8 +104,6 @@ def setEstimator(self, estimator):
# self.raiseAnError(IOError, 'estimator:', estimator.name, 'can not be used! Please change to a different estimator')
else:
self.raiseADebug('A valid estimator', estimator.name, 'is provided!')
- if self.multioutputWrapper:
- settings = {'estimator__estimator':sklEstimator}
- else:
- settings = {'estimator':sklEstimator}
- self.model.set_params(**settings)
+ settings = {'estimator':sklEstimator}
+ self.settings.update(settings)
+ self.initializeModel(self.settings)
diff --git a/framework/SupervisedLearning/ScikitLearn/MultiClass/OneVsRestClassifier.py b/framework/SupervisedLearning/ScikitLearn/MultiClass/OneVsRestClassifier.py
index 59418db158..139d6f1536 100644
--- a/framework/SupervisedLearning/ScikitLearn/MultiClass/OneVsRestClassifier.py
+++ b/framework/SupervisedLearning/ScikitLearn/MultiClass/OneVsRestClassifier.py
@@ -32,7 +32,7 @@ class OneVsRestClassifier(ScikitLearnBase):
"""
One-vs-the-rest (OvR) multiclass strategy classifer
"""
- info = {'problemtype':'classifer', 'normalize':False}
+ info = {'problemtype':'classification', 'normalize':False}
def __init__(self):
"""
@@ -43,10 +43,7 @@ def __init__(self):
super().__init__()
import sklearn
import sklearn.multiclass
- import sklearn.multioutput
- from sklearn.svm import SVC
- # we wrap the model with the multi output regressor (for multitarget)
- self.model = sklearn.multioutput.MultiOutputClassifier(sklearn.multiclass.OneVsRestClassifier(SVC()))
+ self.model = sklearn.multiclass.OneVsRestClassifier
@classmethod
def getInputSpecification(cls):
@@ -88,14 +85,19 @@ def _handleInput(self, paramInput):
settings, notFound = paramInput.findNodesAndExtractValues(['n_jobs'])
# notFound must be empty
assert(not notFound)
- self.initializeModel(settings)
+ self.settings = settings
- def setEstimator(self, estimator):
+
+ def setEstimator(self, estimatorList):
"""
Initialization method
- @ In, estimator, ROM instance, estimator used by ROM
+ @ In, estimatorList, list of ROM instances/estimators used by ROM
@ Out, None
"""
+ if len(estimatorList) != 1:
+ self.raiseAWarning('ROM', self.name, 'can only accept one estimator, but multiple estimators are provided!',
+ 'Only the first one will be used, i.e.,', estimator.name)
+ estimator = estimatorList[0]
if estimator._interfaceROM.multioutputWrapper:
sklEstimator = estimator._interfaceROM.model.get_params()['estimator']
else:
@@ -107,8 +109,6 @@ def setEstimator(self, estimator):
# self.raiseAnError(IOError, 'estimator:', estimator.name, 'can not be used! Please change to a different estimator')
else:
self.raiseADebug('A valid estimator', estimator.name, 'is provided!')
- if self.multioutputWrapper:
- settings = {'estimator__estimator':sklEstimator}
- else:
- settings = {'estimator':sklEstimator}
- self.model.set_params(**settings)
+ settings = {'estimator':sklEstimator}
+ self.settings.update(settings)
+ self.initializeModel(self.settings)
diff --git a/framework/SupervisedLearning/ScikitLearn/MultiClass/OutputCodeClassifier.py b/framework/SupervisedLearning/ScikitLearn/MultiClass/OutputCodeClassifier.py
index 0809fee1c1..1d7e5b85bf 100644
--- a/framework/SupervisedLearning/ScikitLearn/MultiClass/OutputCodeClassifier.py
+++ b/framework/SupervisedLearning/ScikitLearn/MultiClass/OutputCodeClassifier.py
@@ -32,7 +32,7 @@ class OutputCodeClassifier(ScikitLearnBase):
"""
(Error-Correcting) Output-Code multiclass strategy classifer
"""
- info = {'problemtype':'classifer', 'normalize':False}
+ info = {'problemtype':'classification', 'normalize':False}
def __init__(self):
"""
@@ -43,10 +43,7 @@ def __init__(self):
super().__init__()
import sklearn
import sklearn.multiclass
- import sklearn.multioutput
- from sklearn.svm import SVC
- # we wrap the model with the multi output regressor (for multitarget)
- self.model = sklearn.multioutput.MultiOutputClassifier(sklearn.multiclass.OutputCodeClassifier(SVC()))
+ self.model = sklearn.multiclass.OutputCodeClassifier
@classmethod
def getInputSpecification(cls):
@@ -96,14 +93,18 @@ def _handleInput(self, paramInput):
settings, notFound = paramInput.findNodesAndExtractValues(['code_size', 'random_state', 'n_jobs'])
# notFound must be empty
assert(not notFound)
- self.initializeModel(settings)
+ self.settings = settings
- def setEstimator(self, estimator):
+ def setEstimator(self, estimatorList):
"""
Initialization method
- @ In, estimator, ROM instance, estimator used by ROM
+ @ In, estimatorList, list of ROM instances/estimators used by ROM
@ Out, None
"""
+ if len(estimatorList) != 1:
+ self.raiseAWarning('ROM', self.name, 'can only accept one estimator, but multiple estimators are provided!',
+ 'Only the first one will be used, i.e.,', estimator.name)
+ estimator = estimatorList[0]
if estimator._interfaceROM.multioutputWrapper:
sklEstimator = estimator._interfaceROM.model.get_params()['estimator']
else:
@@ -115,8 +116,6 @@ def setEstimator(self, estimator):
# self.raiseAnError(IOError, 'estimator:', estimator.name, 'can not be used! Please change to a different estimator')
else:
self.raiseADebug('A valid estimator', estimator.name, 'is provided!')
- if self.multioutputWrapper:
- settings = {'estimator__estimator':sklEstimator}
- else:
- settings = {'estimator':sklEstimator}
- self.model.set_params(**settings)
+ settings = {'estimator':sklEstimator}
+ self.settings.update(settings)
+ self.initializeModel(self.settings)
diff --git a/framework/SupervisedLearning/ScikitLearn/NaiveBayes/BernoulliNBClassifier.py b/framework/SupervisedLearning/ScikitLearn/NaiveBayes/BernoulliNBClassifier.py
index c3e2e10cec..c6c8fceba2 100644
--- a/framework/SupervisedLearning/ScikitLearn/NaiveBayes/BernoulliNBClassifier.py
+++ b/framework/SupervisedLearning/ScikitLearn/NaiveBayes/BernoulliNBClassifier.py
@@ -46,9 +46,7 @@ def __init__(self):
super().__init__()
import sklearn
import sklearn.naive_bayes
- import sklearn.multioutput
- # we wrap the model with the multi output classifier (for multitarget)
- self.model = sklearn.multioutput.MultiOutputClassifier(sklearn.naive_bayes.BernoulliNB())
+ self.model = sklearn.naive_bayes.BernoulliNB
@classmethod
def getInputSpecification(cls):
diff --git a/framework/SupervisedLearning/ScikitLearn/NaiveBayes/CategoricalNBClassifier.py b/framework/SupervisedLearning/ScikitLearn/NaiveBayes/CategoricalNBClassifier.py
index 46a1ba9e6e..f253fe85cc 100644
--- a/framework/SupervisedLearning/ScikitLearn/NaiveBayes/CategoricalNBClassifier.py
+++ b/framework/SupervisedLearning/ScikitLearn/NaiveBayes/CategoricalNBClassifier.py
@@ -46,9 +46,7 @@ def __init__(self):
super().__init__()
import sklearn
import sklearn.naive_bayes
- import sklearn.multioutput
- # we wrap the model with the multi output classifier (for multitarget)
- self.model = sklearn.multioutput.MultiOutputClassifier(sklearn.naive_bayes.CategoricalNB())
+ self.model = sklearn.naive_bayes.CategoricalNB
@classmethod
def getInputSpecification(cls):
diff --git a/framework/SupervisedLearning/ScikitLearn/NaiveBayes/ComplementNBClassifier.py b/framework/SupervisedLearning/ScikitLearn/NaiveBayes/ComplementNBClassifier.py
index 5361410633..41fad7749b 100644
--- a/framework/SupervisedLearning/ScikitLearn/NaiveBayes/ComplementNBClassifier.py
+++ b/framework/SupervisedLearning/ScikitLearn/NaiveBayes/ComplementNBClassifier.py
@@ -46,9 +46,7 @@ def __init__(self):
super().__init__()
import sklearn
import sklearn.naive_bayes
- import sklearn.multioutput
- # we wrap the model with the multi output classifier (for multitarget)
- self.model = sklearn.multioutput.MultiOutputClassifier(sklearn.naive_bayes.ComplementNB())
+ self.model = sklearn.naive_bayes.ComplementNB
@classmethod
def getInputSpecification(cls):
diff --git a/framework/SupervisedLearning/ScikitLearn/NaiveBayes/GaussianNBClassifier.py b/framework/SupervisedLearning/ScikitLearn/NaiveBayes/GaussianNBClassifier.py
index 7ddf940aff..56cde0651d 100644
--- a/framework/SupervisedLearning/ScikitLearn/NaiveBayes/GaussianNBClassifier.py
+++ b/framework/SupervisedLearning/ScikitLearn/NaiveBayes/GaussianNBClassifier.py
@@ -46,9 +46,7 @@ def __init__(self):
super().__init__()
import sklearn
import sklearn.naive_bayes
- import sklearn.multioutput
- # we wrap the model with the multi output classifier (for multitarget)
- self.model = sklearn.multioutput.MultiOutputClassifier(sklearn.naive_bayes.GaussianNB())
+ self.model = sklearn.naive_bayes.GaussianNB
@classmethod
def getInputSpecification(cls):
diff --git a/framework/SupervisedLearning/ScikitLearn/NaiveBayes/MultinomialNBClassifier.py b/framework/SupervisedLearning/ScikitLearn/NaiveBayes/MultinomialNBClassifier.py
index e015c47c4c..9a57a239a0 100644
--- a/framework/SupervisedLearning/ScikitLearn/NaiveBayes/MultinomialNBClassifier.py
+++ b/framework/SupervisedLearning/ScikitLearn/NaiveBayes/MultinomialNBClassifier.py
@@ -45,9 +45,7 @@ def __init__(self):
super().__init__()
import sklearn
import sklearn.naive_bayes
- import sklearn.multioutput
- # we wrap the model with the multi output classifier (for multitarget)
- self.model = sklearn.multioutput.MultiOutputClassifier(sklearn.naive_bayes.MultinomialNB())
+ self.model = sklearn.naive_bayes.MultinomialNB
@classmethod
def getInputSpecification(cls):
diff --git a/framework/SupervisedLearning/ScikitLearn/Neighbors/KNeighborsClassifier.py b/framework/SupervisedLearning/ScikitLearn/Neighbors/KNeighborsClassifier.py
index 435545ff6e..f5ee9f4510 100644
--- a/framework/SupervisedLearning/ScikitLearn/Neighbors/KNeighborsClassifier.py
+++ b/framework/SupervisedLearning/ScikitLearn/Neighbors/KNeighborsClassifier.py
@@ -46,9 +46,7 @@ def __init__(self):
super().__init__()
import sklearn
import sklearn.neighbors
- import sklearn.multioutput
- # we wrap the model with the multi output classifier (for multitarget)
- self.model = sklearn.multioutput.MultiOutputClassifier(sklearn.neighbors.KNeighborsClassifier())
+ self.model = sklearn.neighbors.KNeighborsClassifier
@classmethod
def getInputSpecification(cls):
diff --git a/framework/SupervisedLearning/ScikitLearn/Neighbors/KNeighborsRegressor.py b/framework/SupervisedLearning/ScikitLearn/Neighbors/KNeighborsRegressor.py
index 6bdfaa392c..3bac0cea27 100644
--- a/framework/SupervisedLearning/ScikitLearn/Neighbors/KNeighborsRegressor.py
+++ b/framework/SupervisedLearning/ScikitLearn/Neighbors/KNeighborsRegressor.py
@@ -46,9 +46,7 @@ def __init__(self):
super().__init__()
import sklearn
import sklearn.neighbors
- import sklearn.multioutput
- # we wrap the model with the multi output regressor (for multitarget)
- self.model = sklearn.multioutput.MultiOutputRegressor(sklearn.neighbors.KNeighborsRegressor())
+ self.model = sklearn.neighbors.KNeighborsRegressor
@classmethod
def getInputSpecification(cls):
diff --git a/framework/SupervisedLearning/ScikitLearn/Neighbors/NearestCentroidClassifier.py b/framework/SupervisedLearning/ScikitLearn/Neighbors/NearestCentroidClassifier.py
index 31fa311581..ed1b9e5594 100644
--- a/framework/SupervisedLearning/ScikitLearn/Neighbors/NearestCentroidClassifier.py
+++ b/framework/SupervisedLearning/ScikitLearn/Neighbors/NearestCentroidClassifier.py
@@ -44,9 +44,7 @@ def __init__(self):
super().__init__()
import sklearn
import sklearn.neighbors
- import sklearn.multioutput
- # we wrap the model with the multi output classifier (for multitarget)
- self.model = sklearn.multioutput.MultiOutputClassifier(sklearn.neighbors.NearestCentroid())
+ self.model = sklearn.neighbors.NearestCentroid
@classmethod
def getInputSpecification(cls):
diff --git a/framework/SupervisedLearning/ScikitLearn/Neighbors/RadiusNeighborsClassifier.py b/framework/SupervisedLearning/ScikitLearn/Neighbors/RadiusNeighborsClassifier.py
index 6c12502993..d5ebdf3df4 100644
--- a/framework/SupervisedLearning/ScikitLearn/Neighbors/RadiusNeighborsClassifier.py
+++ b/framework/SupervisedLearning/ScikitLearn/Neighbors/RadiusNeighborsClassifier.py
@@ -46,9 +46,7 @@ def __init__(self):
super().__init__()
import sklearn
import sklearn.neighbors
- import sklearn.multioutput
- # we wrap the model with the multi output classifier (for multitarget)
- self.model = sklearn.multioutput.MultiOutputClassifier(sklearn.neighbors.RadiusNeighborsClassifier())
+ self.model = sklearn.neighbors.RadiusNeighborsClassifier
@classmethod
def getInputSpecification(cls):
diff --git a/framework/SupervisedLearning/ScikitLearn/Neighbors/RadiusNeighborsRegressor.py b/framework/SupervisedLearning/ScikitLearn/Neighbors/RadiusNeighborsRegressor.py
index 40cba76963..d08a53120d 100644
--- a/framework/SupervisedLearning/ScikitLearn/Neighbors/RadiusNeighborsRegressor.py
+++ b/framework/SupervisedLearning/ScikitLearn/Neighbors/RadiusNeighborsRegressor.py
@@ -46,9 +46,7 @@ def __init__(self):
super().__init__()
import sklearn
import sklearn.neighbors
- import sklearn.multioutput
- # we wrap the model with the multi output regressor (for multitarget)
- self.model = sklearn.multioutput.MultiOutputRegressor(sklearn.neighbors.RadiusNeighborsRegressor())
+ self.model = sklearn.neighbors.RadiusNeighborsRegressor
@classmethod
def getInputSpecification(cls):
diff --git a/framework/SupervisedLearning/ScikitLearn/NeuralNetwork/MLPClassifier.py b/framework/SupervisedLearning/ScikitLearn/NeuralNetwork/MLPClassifier.py
index 065064e1a5..37c5868bb2 100644
--- a/framework/SupervisedLearning/ScikitLearn/NeuralNetwork/MLPClassifier.py
+++ b/framework/SupervisedLearning/ScikitLearn/NeuralNetwork/MLPClassifier.py
@@ -42,11 +42,10 @@ def __init__(self):
@ Out, None
"""
super().__init__()
+ self.multioutputWrapper = False
import sklearn
import sklearn.neural_network
- import sklearn.multioutput
- # we wrap the model with the multi output classifier (for multitarget)
- self.model = sklearn.multioutput.MultiOutputClassifier(sklearn.neural_network.MLPClassifier())
+ self.model = sklearn.neural_network.MLPClassifier
@classmethod
def getInputSpecification(cls):
@@ -162,7 +161,7 @@ def _handleInput(self, paramInput):
super()._handleInput(paramInput)
settings, notFound = paramInput.findNodesAndExtractValues(['hidden_layer_sizes','activation','solver','alpha','batch_size',
'learning_rate','learning_rate_init','power_t','max_iter', 'shuffle',
- 'random_state','tol','verbose','warm_start','momentum','nesterovs_momentum'
+ 'random_state','tol','verbose','warm_start','momentum','nesterovs_momentum',
'early_stopping','validation_fraction','beta_1','beta_2','epsilon',
'n_iter_no_change'])
# notFound must be empty
diff --git a/framework/SupervisedLearning/ScikitLearn/NeuralNetwork/MLPRegressor.py b/framework/SupervisedLearning/ScikitLearn/NeuralNetwork/MLPRegressor.py
index 1e6016a81f..cd119eb163 100644
--- a/framework/SupervisedLearning/ScikitLearn/NeuralNetwork/MLPRegressor.py
+++ b/framework/SupervisedLearning/ScikitLearn/NeuralNetwork/MLPRegressor.py
@@ -42,9 +42,10 @@ def __init__(self):
@ Out, None
"""
super().__init__()
+ self.multioutputWrapper = False
import sklearn
import sklearn.neural_network
- self.model = sklearn.neural_network.MLPRegressor()
+ self.model = sklearn.neural_network.MLPRegressor
@classmethod
def getInputSpecification(cls):
@@ -159,7 +160,7 @@ def _handleInput(self, paramInput):
super()._handleInput(paramInput)
settings, notFound = paramInput.findNodesAndExtractValues(['hidden_layer_sizes','activation','solver','alpha','batch_size',
'learning_rate','learning_rate_init','power_t','max_iter', 'shuffle',
- 'random_state','tol','verbose','warm_start','momentum','nesterovs_momentum'
+ 'random_state','tol','verbose','warm_start','momentum','nesterovs_momentum',
'early_stopping','validation_fraction','beta_1','beta_2','epsilon',
'n_iter_no_change'])
# notFound must be empty
diff --git a/framework/SupervisedLearning/ScikitLearn/SVM/LinearSVC.py b/framework/SupervisedLearning/ScikitLearn/SVM/LinearSVC.py
index 2a51f3e1e9..746a348cf1 100644
--- a/framework/SupervisedLearning/ScikitLearn/SVM/LinearSVC.py
+++ b/framework/SupervisedLearning/ScikitLearn/SVM/LinearSVC.py
@@ -44,9 +44,7 @@ def __init__(self):
super().__init__()
import sklearn
import sklearn.svm
- import sklearn.multioutput
- # we wrap the model with the multi output regressor (for multitarget)
- self.model = sklearn.multioutput.MultiOutputClassifier(sklearn.svm.LinearSVC())
+ self.model = sklearn.svm.LinearSVC
@classmethod
def getInputSpecification(cls):
diff --git a/framework/SupervisedLearning/ScikitLearn/SVM/LinearSVR.py b/framework/SupervisedLearning/ScikitLearn/SVM/LinearSVR.py
index def364f0fb..78afa0e4c9 100644
--- a/framework/SupervisedLearning/ScikitLearn/SVM/LinearSVR.py
+++ b/framework/SupervisedLearning/ScikitLearn/SVM/LinearSVR.py
@@ -44,9 +44,7 @@ def __init__(self):
super().__init__()
import sklearn
import sklearn.svm
- import sklearn.multioutput
- # we wrap the model with the multi output regressor (for multitarget)
- self.model = sklearn.multioutput.MultiOutputRegressor(sklearn.svm.LinearSVR())
+ self.model = sklearn.svm.LinearSVR
@classmethod
def getInputSpecification(cls):
diff --git a/framework/SupervisedLearning/ScikitLearn/SVM/NuSVC.py b/framework/SupervisedLearning/ScikitLearn/SVM/NuSVC.py
index 771229de9c..6cd91c54d6 100644
--- a/framework/SupervisedLearning/ScikitLearn/SVM/NuSVC.py
+++ b/framework/SupervisedLearning/ScikitLearn/SVM/NuSVC.py
@@ -44,9 +44,7 @@ def __init__(self):
super().__init__()
import sklearn
import sklearn.svm
- import sklearn.multioutput
- # we wrap the model with the multi output regressor (for multitarget)
- self.model = sklearn.multioutput.MultiOutputClassifier(sklearn.svm.NuSVC())
+ self.model = sklearn.svm.NuSVC
@classmethod
def getInputSpecification(cls):
@@ -83,11 +81,6 @@ class cls.
descr=r"""Tolerance for stopping criterion""", default=1e-3))
specs.addSub(InputData.parameterInputFactory("cache_size", contentType=InputTypes.FloatType,
descr=r"""Size of the kernel cache (in MB)""", default=200.))
- specs.addSub(InputData.parameterInputFactory("epsilon", contentType=InputTypes.FloatType,
- descr=r"""Epsilon in the epsilon-SVR model. It specifies the epsilon-tube
- within which no penalty is associated in the training loss function
- with points predicted within a distance epsilon from the actual
- value.""", default=0.1))
specs.addSub(InputData.parameterInputFactory("shrinking", contentType=InputTypes.BoolType,
descr=r"""Whether to use the shrinking heuristic.""", default=True))
specs.addSub(InputData.parameterInputFactory("max_iter", contentType=InputTypes.IntegerType,
@@ -97,11 +90,12 @@ class cls.
all other classifiers, or the original one-vs-one (``ovo'') decision function of libsvm which has
shape $(n_samples, n_classes * (n_classes - 1) / 2)$. However, one-vs-one (``ovo'') is always used as
multi-class strategy. The parameter is ignored for binary classification.""", default='ovr'))
- specs.addSub(InputData.parameterInputFactory("break_ties", contentType=InputTypes.BoolType,
- descr=r"""if true, decision_function_shape='ovr', and number of $classes > 2$, predict will
- break ties according to the confidence values of decision_function; otherwise the first class among
- the tied classes is returned. Please note that breaking ties comes at a relatively high computational
- cost compared to a simple predict.""", default=False))
+ # New in version sklearn 0.22
+ # specs.addSub(InputData.parameterInputFactory("break_ties", contentType=InputTypes.BoolType,
+ # descr=r"""if true, decision_function_shape='ovr', and number of $classes > 2$, predict will
+ # break ties according to the confidence values of decision_function; otherwise the first class among
+ # the tied classes is returned. Please note that breaking ties comes at a relatively high computational
+ # cost compared to a simple predict.""", default=False))
specs.addSub(InputData.parameterInputFactory("verbose", contentType=InputTypes.BoolType,
descr=r"""Enable verbose output. Note that this setting takes advantage
of a per-process runtime setting in libsvm that, if enabled, may not
@@ -127,8 +121,8 @@ def _handleInput(self, paramInput):
"""
super()._handleInput(paramInput)
settings, notFound = paramInput.findNodesAndExtractValues(['nu', 'kernel', 'degree', 'gamma', 'coef0',
- 'tol', 'cache_size', 'epsilon', 'shrinking', 'max_iter',
- 'decision_function_shape', 'break_ties', 'verbose',
+ 'tol', 'cache_size', 'shrinking', 'max_iter',
+ 'decision_function_shape', 'verbose',
'probability', 'class_weight', 'random_state'])
# notFound must be empty
assert(not notFound)
diff --git a/framework/SupervisedLearning/ScikitLearn/SVM/NuSVR.py b/framework/SupervisedLearning/ScikitLearn/SVM/NuSVR.py
index 884a7820b4..8f81177f6a 100644
--- a/framework/SupervisedLearning/ScikitLearn/SVM/NuSVR.py
+++ b/framework/SupervisedLearning/ScikitLearn/SVM/NuSVR.py
@@ -44,9 +44,7 @@ def __init__(self):
super().__init__()
import sklearn
import sklearn.svm
- import sklearn.multioutput
- # we wrap the model with the multi output regressor (for multitarget)
- self.model = sklearn.multioutput.MultiOutputRegressor(sklearn.svm.NuSVR())
+ self.model = sklearn.svm.NuSVR
@classmethod
def getInputSpecification(cls):
diff --git a/framework/SupervisedLearning/ScikitLearn/SVM/SVC.py b/framework/SupervisedLearning/ScikitLearn/SVM/SVC.py
index 6236058543..82b3981132 100644
--- a/framework/SupervisedLearning/ScikitLearn/SVM/SVC.py
+++ b/framework/SupervisedLearning/ScikitLearn/SVM/SVC.py
@@ -44,9 +44,7 @@ def __init__(self):
super().__init__()
import sklearn
import sklearn.svm
- import sklearn.multioutput
- # we wrap the model with the multi output regressor (for multitarget)
- self.model = sklearn.multioutput.MultiOutputClassifier(sklearn.svm.SVC())
+ self.model = sklearn.svm.SVC
@classmethod
def getInputSpecification(cls):
@@ -86,11 +84,6 @@ class cls.
descr=r"""Tolerance for stopping criterion""", default=1e-3))
specs.addSub(InputData.parameterInputFactory("cache_size", contentType=InputTypes.FloatType,
descr=r"""Size of the kernel cache (in MB)""", default=200.))
- specs.addSub(InputData.parameterInputFactory("epsilon", contentType=InputTypes.FloatType,
- descr=r"""Epsilon in the epsilon-SVR model. It specifies the epsilon-tube
- within which no penalty is associated in the training loss function
- with points predicted within a distance epsilon from the actual
- value.""", default=0.1))
specs.addSub(InputData.parameterInputFactory("shrinking", contentType=InputTypes.BoolType,
descr=r"""Whether to use the shrinking heuristic.""", default=True))
specs.addSub(InputData.parameterInputFactory("max_iter", contentType=InputTypes.IntegerType,
@@ -100,11 +93,12 @@ class cls.
all other classifiers, or the original one-vs-one (``ovo'') decision function of libsvm which has
shape $(n_samples, n_classes * (n_classes - 1) / 2)$. However, one-vs-one (``ovo'') is always used as
multi-class strategy. The parameter is ignored for binary classification.""", default='ovr'))
- specs.addSub(InputData.parameterInputFactory("break_ties", contentType=InputTypes.BoolType,
- descr=r"""if true, decision_function_shape='ovr', and number of $classes > 2$, predict will
- break ties according to the confidence values of decision_function; otherwise the first class among
- the tied classes is returned. Please note that breaking ties comes at a relatively high computational
- cost compared to a simple predict.""", default=False))
+ # new in version sklearn 0.22
+ # specs.addSub(InputData.parameterInputFactory("break_ties", contentType=InputTypes.BoolType,
+ # descr=r"""if true, decision_function_shape='ovr', and number of $classes > 2$, predict will
+ # break ties according to the confidence values of decision_function; otherwise the first class among
+ # the tied classes is returned. Please note that breaking ties comes at a relatively high computational
+ # cost compared to a simple predict.""", default=False))
specs.addSub(InputData.parameterInputFactory("verbose", contentType=InputTypes.BoolType,
descr=r"""Enable verbose output. Note that this setting takes advantage
of a per-process runtime setting in libsvm that, if enabled, may not
@@ -130,8 +124,8 @@ def _handleInput(self, paramInput):
"""
super()._handleInput(paramInput)
settings, notFound = paramInput.findNodesAndExtractValues(['C', 'kernel', 'degree', 'gamma', 'coef0',
- 'tol', 'cache_size', 'epsilon', 'shrinking', 'max_iter',
- 'decision_function_shape', 'break_ties', 'verbose', 'probability',
+ 'tol', 'cache_size', 'shrinking', 'max_iter',
+ 'decision_function_shape', 'verbose', 'probability',
'class_weight', 'random_state'])
# notFound must be empty
assert(not notFound)
diff --git a/framework/SupervisedLearning/ScikitLearn/SVM/SVR.py b/framework/SupervisedLearning/ScikitLearn/SVM/SVR.py
index dfda27653b..62adc04ec8 100644
--- a/framework/SupervisedLearning/ScikitLearn/SVM/SVR.py
+++ b/framework/SupervisedLearning/ScikitLearn/SVM/SVR.py
@@ -48,9 +48,7 @@ def __init__(self):
super().__init__()
import sklearn
import sklearn.svm
- import sklearn.multioutput
- # we wrap the model with the multi output regressor (for multitarget)
- self.model = sklearn.multioutput.MultiOutputRegressor(sklearn.svm.SVR())
+ self.model = sklearn.svm.SVR
@classmethod
def getInputSpecification(cls):
diff --git a/framework/SupervisedLearning/ScikitLearn/ScikitLearnBase.py b/framework/SupervisedLearning/ScikitLearn/ScikitLearnBase.py
index b2b7dcc4b0..d61ffcb8e8 100644
--- a/framework/SupervisedLearning/ScikitLearn/ScikitLearnBase.py
+++ b/framework/SupervisedLearning/ScikitLearn/ScikitLearnBase.py
@@ -36,7 +36,7 @@ class ScikitLearnBase(SupervisedLearning):
"""
Base Class for Scikitlearn-based surrogate models (classifiers and regressors)
"""
- info = {'datatype':None, 'normalize':None}
+ info = {'problemtype':None, 'normalize':None}
def __init__(self):
"""
@@ -45,10 +45,11 @@ def __init__(self):
@ Out, None
"""
super().__init__()
- self.uniqueVals = None
- self.settings = None
- self.model = None
- self.multioutputWrapper = True
+ self.uniqueVals = None # flag to indicate targets only have a single unique value
+ self.settings = None # initial settings for the ROM
+ self.model = None # Scikitlearn estimator/model
+ self.multioutputWrapper = True # If True, use MultiOutputRegressor or MultiOutputClassifier to wrap self.model else
+ # the self.model can handle multioutput/multi-targets prediction
def updateSettings(self, settings):
"""
@@ -74,17 +75,41 @@ def initializeModel(self, settings):
@ In, settings, dict, the dictionary containin the parameters/settings to instanciate the model
@ Out, None
"""
- settings = self.updateSettings(settings)
- self.settings = settings
- self.model.set_params(**settings)
+ if self.settings is None:
+ self.settings = settings
+ self.model = self.model(**settings)
+ if self.multioutputWrapper:
+ self.multioutput(self.info['problemtype'])
- def setEstimator(self, estimator):
+ def multioutput(self, type='regression'):
+ """
+ Method to extend ScikitLearn ROM that do not natively support multi-target regression/classification
+ @ In, type, str, either regression or classification
+ @ Out, None
+ """
+ import sklearn.multioutput
+ if type == 'regression':
+ self.model = sklearn.multioutput.MultiOutputRegressor(self.model)
+ elif type == 'classification':
+ self.model = sklearn.multioutput.MultiOutputClassifier(self.model)
+ else:
+ self.raiseAnError(IOError, 'The "type" param for function "multioutput" should be either "regression" or "classification"! but got',
+ type)
+
+ def setEstimator(self, estimatorList):
"""
Initialization method
- @ In, estimator, ROM instance, estimator used by ROM
+ @ In, estimatorList, list of ROM instances/estimators used by ROM
@ Out, None
"""
- pass
+ for estimator in estimatorList:
+ interfaceRom = estimator._interfaceROM
+ if not isinstance(interfaceRom, ScikitLearnBase):
+ self.raiseAnError(IOError, 'ROM', estimator.name, 'can not be used as estimator for ROM', self.name)
+ if not callable(getattr(interfaceRom.model, "fit", None)):
+ self.raiseAnError(IOError, 'estimator:', estimator.name, 'can not be used! Please change to a different estimator')
+ else:
+ self.raiseADebug('A valid estimator', estimator.name, 'is provided!')
def __trainLocal__(self,featureVals,targetVals):
"""
@@ -96,11 +121,11 @@ def __trainLocal__(self,featureVals,targetVals):
associated with the corresponding points in featureVals
"""
# check if all targets only have a single unique value, just store that value, no need to fit/train
- self.uniqueVals = None
if all([len(np.unique(targetVals[:,index])) == 1 for index in range(targetVals.shape[1])]):
self.uniqueVals = [np.unique(targetVals[:,index])[0] for index in range(targetVals.shape[1]) ]
else:
# the multi-target is handled by the internal wrapper
+ self.uniqueVals = None
self.model.fit(featureVals,targetVals)
def __confidenceLocal__(self,featureVals):
@@ -140,7 +165,8 @@ def __resetLocal__(self):
@ In, None
@ Out, None
"""
- self.model.set_params(**self.settings)
+ settings = self.updateSettings(self.settings)
+ self.model.set_params(**settings)
def __returnInitialParametersLocal__(self):
"""
@@ -148,7 +174,7 @@ def __returnInitialParametersLocal__(self):
@ In, None
@ Out, params, dict, dictionary of parameter names and initial values
"""
- params = self.model.get_params()
+ params = self.settings
return params
def __returnCurrentSettingLocal__(self):
diff --git a/framework/SupervisedLearning/ScikitLearn/Tree/DecisionTreeClassifier.py b/framework/SupervisedLearning/ScikitLearn/Tree/DecisionTreeClassifier.py
index 2d0966e1eb..4a108a0c5e 100644
--- a/framework/SupervisedLearning/ScikitLearn/Tree/DecisionTreeClassifier.py
+++ b/framework/SupervisedLearning/ScikitLearn/Tree/DecisionTreeClassifier.py
@@ -46,9 +46,7 @@ def __init__(self):
super().__init__()
import sklearn
import sklearn.tree
- import sklearn.multioutput
- # we wrap the model with the multi output classifier (for multitarget)
- self.model = sklearn.multioutput.MultiOutputClassifier(sklearn.tree.DecisionTreeClassifier())
+ self.model = sklearn.tree.DecisionTreeClassifier
@classmethod
def getInputSpecification(cls):
@@ -102,9 +100,10 @@ class cls.
where $N$ is the total number of samples, $N\_t$ is the number of samples at the current node, $N\_t\_L$ is the number
of samples in the left child, and $N\_t\_R$ is the number of samples in the right child.
$N$, $N\_t$, $N\_t]\_R$ and $N\_t\_L$ all refer to the weighted sum, if sample_weight is passed.""", default=0.0))
- specs.addSub(InputData.parameterInputFactory("ccp_alpha", contentType=InputTypes.FloatType,
- descr=r"""Complexity parameter used for Minimal Cost-Complexity Pruning. The subtree with the largest cost
- complexity that is smaller than ccp_alpha will be chosen. By default, no pruning is performed. """, default=0.0))
+ # new in sklearn 0.22
+ # specs.addSub(InputData.parameterInputFactory("ccp_alpha", contentType=InputTypes.FloatType,
+ # descr=r"""Complexity parameter used for Minimal Cost-Complexity Pruning. The subtree with the largest cost
+ # complexity that is smaller than ccp_alpha will be chosen. By default, no pruning is performed. """, default=0.0))
specs.addSub(InputData.parameterInputFactory("random_state", contentType=InputTypes.IntegerType,
descr=r"""Controls the randomness of the estimator. The features are
always randomly permuted at each split, even if splitter is set to
@@ -127,7 +126,7 @@ def _handleInput(self, paramInput):
super()._handleInput(paramInput)
settings, notFound = paramInput.findNodesAndExtractValues(['criterion', 'splitter', 'max_depth','min_samples_split',
'min_samples_leaf','min_weight_fraction_leaf','max_features',
- 'max_leaf_nodes','min_impurity_decrease','ccp_alpha',
+ 'max_leaf_nodes','min_impurity_decrease',
'random_state'])
# notFound must be empty
assert(not notFound)
diff --git a/framework/SupervisedLearning/ScikitLearn/Tree/DecisionTreeRegressor.py b/framework/SupervisedLearning/ScikitLearn/Tree/DecisionTreeRegressor.py
index 62cde495ea..0ae624557e 100644
--- a/framework/SupervisedLearning/ScikitLearn/Tree/DecisionTreeRegressor.py
+++ b/framework/SupervisedLearning/ScikitLearn/Tree/DecisionTreeRegressor.py
@@ -46,9 +46,7 @@ def __init__(self):
super().__init__()
import sklearn
import sklearn.tree
- import sklearn.multioutput
- # we wrap the model with the multi output classifier (for multitarget)
- self.model = sklearn.multioutput.MultiOutputRegressor(sklearn.tree.DecisionTreeRegressor())
+ self.model = sklearn.tree.DecisionTreeRegressor
@classmethod
def getInputSpecification(cls):
@@ -106,9 +104,10 @@ class cls.
where $N$ is the total number of samples, $N\_t$ is the number of samples at the current node, $N\_t\_L$ is the number
of samples in the left child, and $N\_t\_R$ is the number of samples in the right child.
$N$, $N\_t$, $N\_t]\_R$ and $N\_t\_L$ all refer to the weighted sum, if sample_weight is passed.""", default=0.0))
- specs.addSub(InputData.parameterInputFactory("ccp_alpha", contentType=InputTypes.FloatType,
- descr=r"""Complexity parameter used for Minimal Cost-Complexity Pruning. The subtree with the largest cost
- complexity that is smaller than ccp_alpha will be chosen. By default, no pruning is performed. """, default=0.0))
+ # New in sklearn version 0.22
+ # specs.addSub(InputData.parameterInputFactory("ccp_alpha", contentType=InputTypes.FloatType,
+ # descr=r"""Complexity parameter used for Minimal Cost-Complexity Pruning. The subtree with the largest cost
+ # complexity that is smaller than ccp_alpha will be chosen. By default, no pruning is performed. """, default=0.0))
specs.addSub(InputData.parameterInputFactory("random_state", contentType=InputTypes.IntegerType,
descr=r"""Controls the randomness of the estimator. The features are
always randomly permuted at each split, even if splitter is set to
@@ -131,7 +130,7 @@ def _handleInput(self, paramInput):
super()._handleInput(paramInput)
settings, notFound = paramInput.findNodesAndExtractValues(['criterion', 'splitter', 'max_depth','min_samples_split',
'min_samples_leaf','min_weight_fraction_leaf','max_features',
- 'max_leaf_nodes','min_impurity_decrease','ccp_alpha',
+ 'max_leaf_nodes','min_impurity_decrease',
'random_state'])
# notFound must be empty
assert(not notFound)
diff --git a/framework/SupervisedLearning/ScikitLearn/Tree/ExtraTreeClassifier.py b/framework/SupervisedLearning/ScikitLearn/Tree/ExtraTreeClassifier.py
index e49a53f98a..0a06f7e8e7 100644
--- a/framework/SupervisedLearning/ScikitLearn/Tree/ExtraTreeClassifier.py
+++ b/framework/SupervisedLearning/ScikitLearn/Tree/ExtraTreeClassifier.py
@@ -46,9 +46,7 @@ def __init__(self):
super().__init__()
import sklearn
import sklearn.tree
- import sklearn.multioutput
- # we wrap the model with the multi output classifier (for multitarget)
- self.model = sklearn.multioutput.MultiOutputClassifier(sklearn.tree.ExtraTreeClassifier())
+ self.model = sklearn.tree.ExtraTreeClassifier
@classmethod
def getInputSpecification(cls):
@@ -106,9 +104,10 @@ class cls.
where $N$ is the total number of samples, $N\_t$ is the number of samples at the current node, $N\_t\_L$ is the number
of samples in the left child, and $N\_t\_R$ is the number of samples in the right child.
$N$, $N\_t$, $N\_t]\_R$ and $N\_t\_L$ all refer to the weighted sum, if sample_weight is passed.""", default=0.0))
- specs.addSub(InputData.parameterInputFactory("ccp_alpha", contentType=InputTypes.FloatType,
- descr=r"""Complexity parameter used for Minimal Cost-Complexity Pruning. The subtree with the largest cost
- complexity that is smaller than ccp_alpha will be chosen. By default, no pruning is performed. """, default=0.0))
+ # new in sklearn version 0.22
+ # specs.addSub(InputData.parameterInputFactory("ccp_alpha", contentType=InputTypes.FloatType,
+ # descr=r"""Complexity parameter used for Minimal Cost-Complexity Pruning. The subtree with the largest cost
+ # complexity that is smaller than ccp_alpha will be chosen. By default, no pruning is performed. """, default=0.0))
specs.addSub(InputData.parameterInputFactory("random_state", contentType=InputTypes.IntegerType,
descr=r"""Used to pick randomly the max\_features used at each split.""",
default=None))
@@ -123,7 +122,7 @@ def _handleInput(self, paramInput):
super()._handleInput(paramInput)
settings, notFound = paramInput.findNodesAndExtractValues(['criterion', 'splitter', 'max_depth','min_samples_split',
'min_samples_leaf','min_weight_fraction_leaf','max_features',
- 'max_leaf_nodes','min_impurity_decrease','ccp_alpha',
+ 'max_leaf_nodes','min_impurity_decrease',
'random_state'])
# notFound must be empty
assert(not notFound)
diff --git a/framework/SupervisedLearning/ScikitLearn/Tree/ExtraTreeRegressor.py b/framework/SupervisedLearning/ScikitLearn/Tree/ExtraTreeRegressor.py
index 872054f45a..88a37a5d9c 100644
--- a/framework/SupervisedLearning/ScikitLearn/Tree/ExtraTreeRegressor.py
+++ b/framework/SupervisedLearning/ScikitLearn/Tree/ExtraTreeRegressor.py
@@ -46,9 +46,7 @@ def __init__(self):
super().__init__()
import sklearn
import sklearn.tree
- import sklearn.multioutput
- # we wrap the model with the multi output classifier (for multitarget)
- self.model = sklearn.multioutput.MultiOutputRegressor(sklearn.tree.ExtraTreeRegressor())
+ self.model = sklearn.tree.ExtraTreeRegressor
@classmethod
def getInputSpecification(cls):
diff --git a/framework/SupervisedLearning/SupervisedLearning.py b/framework/SupervisedLearning/SupervisedLearning.py
index 741ff26f2b..bc7f41399e 100644
--- a/framework/SupervisedLearning/SupervisedLearning.py
+++ b/framework/SupervisedLearning/SupervisedLearning.py
@@ -181,10 +181,10 @@ def __setstate__(self, d):
if 'dynamicFeatures' not in d:
self.dynamicFeatures = False
- def setEstimator(self, estimator):
+ def setEstimator(self, estimatorList):
"""
Initialization method
- @ In, estimator, ROM instance, estimator used by ROM
+ @ In, estimatorList, list of ROM instances/estimators used by ROM
@ Out, None
"""
pass
diff --git a/framework/utils/InputTypes.py b/framework/utils/InputTypes.py
index e7a3ada6be..9a79c045d3 100644
--- a/framework/utils/InputTypes.py
+++ b/framework/utils/InputTypes.py
@@ -494,6 +494,39 @@ def generateLatexType(cls):
return 'comma-separated list of comma separated integer tuples'
IntegerTupleListType.createClass("integer_tuple_list", "xsd:string")
+
+#
+#
+#
+#
+class FloatTupleType(BaseListType):
+ """
+ A type for float tuple "1.1, 2.0, 3.4" -> (1.1, 2.0, 3.4)
+ """
+
+ @classmethod
+ def convert(cls, value):
+ """
+ Converts value from string to a float list.
+ @ In, value, string, the value to convert
+ @ Out, convert, list, the converted value
+ """
+ # prefer commas, but allow spaces, to divide
+ delim = ',' if ',' in value else None
+ return tuple(float(x.strip()) for x in value.split(delim) if x.strip())
+
+ @classmethod
+ def generateLatexType(cls):
+ """
+ Generates LaTeX representing this type's type
+ @ In, None
+ @ Out, msg, string, representation
+ """
+ return 'tuple of comma-separated float'
+
+#Note, XSD's list type is split by spaces, not commas, so using xsd:string
+FloatTupleType.createClass("float_tuple", "xsd:string")
+
#
#
#
diff --git a/tests/framework/ROM/SKLearn/adaBoostRegressor_plotting.xml b/tests/framework/ROM/SKLearn/adaBoostRegressor_plotting.xml
new file mode 100644
index 0000000000..2ef2366f94
--- /dev/null
+++ b/tests/framework/ROM/SKLearn/adaBoostRegressor_plotting.xml
@@ -0,0 +1,214 @@
+
+
+
+ framework/ROM/SKLearn.AdaBoostRegressor
+ wangc
+ 2021-11-23
+ SupervisedLearning.SciKitLearn.AdaBoostRegressor
+
+ An example exercising AdaBoost regressor methods. Comparing AdaBoost Regressor with
+ DecisionTreeRegressor regressor.
+ A simple attenuate model is used for the comparison
+
+
+ Compare AdaBoost Regressor with individual DecisionTreeRegressor regressor
+
+
+
+
+ data
+
+ sample,
+ trainAdaBoost,
+ trainDecisionTree,
+ resample,
+ resampleAdaBoost,
+ resampleDecisionTree,
+ plot
+
+
+
+
+
+ X, Y
+ ans
+
+
+ X,Y
+ ans
+ 50
+ 0.1
+ linear
+ dtr
+
+
+ X,Y
+ ans
+ mse
+ best
+ 2
+ 1
+ 1
+
+
+
+
+
+ dummyIN
+ foo
+ mcSampler
+
+
+
+ dummyIN
+ foo
+ mcReSampler
+
+
+
+ dummyIN
+ abr
+ mcReSampler
+
+
+
+ dummyIN
+ dtr
+ mcReSampler
+
+
+
+ trainingData
+
+
+
+ trainingData
+
+
+
+ outData
+ outDataAdaBoost
+ outDataDTR
+
+
+
+
+
+
+
+
+ 50
+ 888
+
+
+ smallUniformDist
+
+
+ largeUniformDist
+
+
+
+
+ 20
+ 1301
+
+
+ smallUniformDist
+
+
+ largeUniformDist
+
+
+
+
+
+
+ 2
+ 3
+
+
+ -1
+ 6
+
+
+
+
+
+
+ csv
+
+ outAdaBoostRegressor_plotting
+
+
+
+
+ scatter
+ outData|Input|prefix
+ outData|Output|ans
+ *
+ r
+
+
+
+
+
+ scatter
+ outDataAdaBoost|Input|prefix
+ outDataAdaBoost|Output|ans
+ d
+ b
+
+
+
+
+
+ scatter
+ outDataDTR|Input|prefix
+ outDataDTR|Output|ans
+ ^
+ g
+
+
+
+
+ Test Samples
+ Predicted
+
+
+
+ png
+
+ AdaBoost Regressor Prediction
+
+
+
+
+
+
+
+
+ X,Y
+
+
+
+ X,Y
+
+
+
+
+ prefix, X,Y
+
+
+
+ prefix, X,Y
+
+
+
+ prefix, X,Y
+
+
+
+
+
diff --git a/tests/framework/ROM/SKLearn/baggingRegressor_plotting.xml b/tests/framework/ROM/SKLearn/baggingRegressor_plotting.xml
new file mode 100644
index 0000000000..f4470bc164
--- /dev/null
+++ b/tests/framework/ROM/SKLearn/baggingRegressor_plotting.xml
@@ -0,0 +1,217 @@
+
+
+
+ framework/ROM/SKLearn.BaggingRegressor
+ wangc
+ 2021-11-23
+ SupervisedLearning.SciKitLearn.BaggingRegressor
+
+ An example exercising bagging regressor methods. Comparing Bagging Regressor with
+ SVR regressor.
+ A simple attenuate model is used for the comparison
+
+
+ Compare Bagging Regressor with individual SVR regressor
+
+
+
+
+ data
+
+ sample,
+ trainBagging,
+ trainSVR,
+ resample,
+ resampleBagging,
+ resampleSVR,
+ plot
+
+
+
+
+
+ X, Y
+ ans
+
+
+ X,Y
+ ans
+ 0.8
+ svr
+
+
+ X,Y
+ ans
+ 2.0
+ 0.02
+ rbf
+ 3
+ 0.0
+ True
+ 1e-3
+ 200
+ False
+ -1
+
+
+
+
+
+ dummyIN
+ foo
+ mcSampler
+
+
+
+ dummyIN
+ foo
+ mcReSampler
+
+
+
+ dummyIN
+ br
+ mcReSampler
+
+
+
+ dummyIN
+ svr
+ mcReSampler
+
+
+
+ trainingData
+
+
+
+ trainingData
+
+
+
+ outData
+ outDataBagging
+ outDataSVR
+
+
+
+
+
+
+
+
+ 200
+ 888
+
+
+ smallUniformDist
+
+
+ largeUniformDist
+
+
+
+
+ 20
+ 1301
+
+
+ smallUniformDist
+
+
+ largeUniformDist
+
+
+
+
+
+
+ 2
+ 3
+
+
+ -1
+ 6
+
+
+
+
+
+
+ csv
+
+ outBaggingRegressor_plotting
+
+
+
+
+ scatter
+ outData|Input|prefix
+ outData|Output|ans
+ *
+ r
+
+
+
+
+
+ scatter
+ outDataBagging|Input|prefix
+ outDataBagging|Output|ans
+ d
+ b
+
+
+
+
+
+ scatter
+ outDataSVR|Input|prefix
+ outDataSVR|Output|ans
+ ^
+ g
+
+
+
+
+ Test Samples
+ Predicted
+
+
+
+ png
+
+ Bagging Regressor Prediction
+
+
+
+
+
+
+
+
+ X,Y
+
+
+
+ X,Y
+
+
+
+
+ prefix, X,Y
+
+
+
+ prefix, X,Y
+
+
+
+ prefix, X,Y
+
+
+
+
+
diff --git a/tests/framework/ROM/SKLearn/gold/data/AdaBoostRegressor_scatter-scatter-scatter.png b/tests/framework/ROM/SKLearn/gold/data/AdaBoostRegressor_scatter-scatter-scatter.png
new file mode 100644
index 0000000000..b4c7ce20aa
Binary files /dev/null and b/tests/framework/ROM/SKLearn/gold/data/AdaBoostRegressor_scatter-scatter-scatter.png differ
diff --git a/tests/framework/ROM/SKLearn/gold/data/BaggingRegressor_scatter-scatter-scatter.png b/tests/framework/ROM/SKLearn/gold/data/BaggingRegressor_scatter-scatter-scatter.png
new file mode 100644
index 0000000000..9d0e6b41cb
Binary files /dev/null and b/tests/framework/ROM/SKLearn/gold/data/BaggingRegressor_scatter-scatter-scatter.png differ
diff --git a/tests/framework/ROM/SKLearn/gold/data/outAdaBoostRegressor_plotting.csv b/tests/framework/ROM/SKLearn/gold/data/outAdaBoostRegressor_plotting.csv
new file mode 100644
index 0000000000..b6ff641366
--- /dev/null
+++ b/tests/framework/ROM/SKLearn/gold/data/outAdaBoostRegressor_plotting.csv
@@ -0,0 +1,21 @@
+prefix,X,Y,ans,ProbabilityWeight-Y,PointProbability,ProbabilityWeight,ProbabilityWeight-X
+1,2.37395064053,1.2592203627,0.162579933188,1.0,0.142857142857,1.0,1.0
+2,2.82158140578,5.97995419753,0.0122679169591,1.0,0.142857142857,1.0,1.0
+3,2.56694523654,1.87276302135,0.108624952906,1.0,0.142857142857,1.0,1.0
+4,2.80341326487,2.78625752399,0.0611249350091,1.0,0.142857142857,1.0,1.0
+5,2.71364057476,3.65586515904,0.0413884729987,1.0,0.142857142857,1.0,1.0
+6,2.04760532874,4.77861658711,0.0329385703134,1.0,0.142857142857,1.0,1.0
+7,2.31492473006,5.25335494272,0.0227284045175,1.0,0.142857142857,1.0,1.0
+8,2.02858035267,2.64979941879,0.0964057062427,1.0,0.142857142857,1.0,1.0
+9,2.78864469607,2.09761066923,0.0868886658559,1.0,0.142857142857,1.0,1.0
+10,2.57392735676,2.35865395641,0.0848991959226,1.0,0.142857142857,1.0,1.0
+11,2.30584621879,0.386827545796,0.26019162966,1.0,0.142857142857,1.0,1.0
+12,2.44849664589,-0.763326524003,0.430595966636,1.0,0.142857142857,1.0,1.0
+13,2.34505834555,0.506639431349,0.240304384984,1.0,0.142857142857,1.0,1.0
+14,2.54703313241,-0.571748185105,0.372453726271,1.0,0.142857142857,1.0,1.0
+15,2.90837015093,1.94612946849,0.0882792838653,1.0,0.142857142857,1.0,1.0
+16,2.39641788308,-0.146018058328,0.324587571799,1.0,0.142857142857,1.0,1.0
+17,2.18106394498,5.16718110679,0.0253716587318,1.0,0.142857142857,1.0,1.0
+18,2.03877198557,-0.438499440541,0.449267737101,1.0,0.142857142857,1.0,1.0
+19,2.68708064726,5.48527552921,0.0168033318433,1.0,0.142857142857,1.0,1.0
+20,2.76851823734,5.12613888647,0.0193062082868,1.0,0.142857142857,1.0,1.0
diff --git a/tests/framework/ROM/SKLearn/gold/data/outBaggingRegressor_plotting.csv b/tests/framework/ROM/SKLearn/gold/data/outBaggingRegressor_plotting.csv
new file mode 100644
index 0000000000..28765e4e03
--- /dev/null
+++ b/tests/framework/ROM/SKLearn/gold/data/outBaggingRegressor_plotting.csv
@@ -0,0 +1,21 @@
+prefix,X,Y,ans,ProbabilityWeight-X,PointProbability,ProbabilityWeight-Y,ProbabilityWeight
+1,2.37395064053,1.2592203627,0.162579933188,1.0,0.142857142857,1.0,1.0
+2,2.82158140578,5.97995419753,0.0122679169591,1.0,0.142857142857,1.0,1.0
+3,2.56694523654,1.87276302135,0.108624952906,1.0,0.142857142857,1.0,1.0
+4,2.80341326487,2.78625752399,0.0611249350091,1.0,0.142857142857,1.0,1.0
+5,2.71364057476,3.65586515904,0.0413884729987,1.0,0.142857142857,1.0,1.0
+6,2.04760532874,4.77861658711,0.0329385703134,1.0,0.142857142857,1.0,1.0
+7,2.31492473006,5.25335494272,0.0227284045175,1.0,0.142857142857,1.0,1.0
+8,2.02858035267,2.64979941879,0.0964057062427,1.0,0.142857142857,1.0,1.0
+9,2.78864469607,2.09761066923,0.0868886658559,1.0,0.142857142857,1.0,1.0
+10,2.57392735676,2.35865395641,0.0848991959226,1.0,0.142857142857,1.0,1.0
+11,2.30584621879,0.386827545796,0.26019162966,1.0,0.142857142857,1.0,1.0
+12,2.44849664589,-0.763326524003,0.430595966636,1.0,0.142857142857,1.0,1.0
+13,2.34505834555,0.506639431349,0.240304384984,1.0,0.142857142857,1.0,1.0
+14,2.54703313241,-0.571748185105,0.372453726271,1.0,0.142857142857,1.0,1.0
+15,2.90837015093,1.94612946849,0.0882792838653,1.0,0.142857142857,1.0,1.0
+16,2.39641788308,-0.146018058328,0.324587571799,1.0,0.142857142857,1.0,1.0
+17,2.18106394498,5.16718110679,0.0253716587318,1.0,0.142857142857,1.0,1.0
+18,2.03877198557,-0.438499440541,0.449267737101,1.0,0.142857142857,1.0,1.0
+19,2.68708064726,5.48527552921,0.0168033318433,1.0,0.142857142857,1.0,1.0
+20,2.76851823734,5.12613888647,0.0193062082868,1.0,0.142857142857,1.0,1.0
diff --git a/tests/framework/ROM/SKLearn/gold/data/outStackingRegressor_plotting.csv b/tests/framework/ROM/SKLearn/gold/data/outStackingRegressor_plotting.csv
new file mode 100644
index 0000000000..0eeb150def
--- /dev/null
+++ b/tests/framework/ROM/SKLearn/gold/data/outStackingRegressor_plotting.csv
@@ -0,0 +1,21 @@
+prefix,X,Y,ans,PointProbability,ProbabilityWeight,ProbabilityWeight-X,ProbabilityWeight-Y
+1,2.37395064053,1.2592203627,0.162579933188,0.142857142857,1.0,1.0,1.0
+2,2.82158140578,5.97995419753,0.0122679169591,0.142857142857,1.0,1.0,1.0
+3,2.56694523654,1.87276302135,0.108624952906,0.142857142857,1.0,1.0,1.0
+4,2.80341326487,2.78625752399,0.0611249350091,0.142857142857,1.0,1.0,1.0
+5,2.71364057476,3.65586515904,0.0413884729987,0.142857142857,1.0,1.0,1.0
+6,2.04760532874,4.77861658711,0.0329385703134,0.142857142857,1.0,1.0,1.0
+7,2.31492473006,5.25335494272,0.0227284045175,0.142857142857,1.0,1.0,1.0
+8,2.02858035267,2.64979941879,0.0964057062427,0.142857142857,1.0,1.0,1.0
+9,2.78864469607,2.09761066923,0.0868886658559,0.142857142857,1.0,1.0,1.0
+10,2.57392735676,2.35865395641,0.0848991959226,0.142857142857,1.0,1.0,1.0
+11,2.30584621879,0.386827545796,0.26019162966,0.142857142857,1.0,1.0,1.0
+12,2.44849664589,-0.763326524003,0.430595966636,0.142857142857,1.0,1.0,1.0
+13,2.34505834555,0.506639431349,0.240304384984,0.142857142857,1.0,1.0,1.0
+14,2.54703313241,-0.571748185105,0.372453726271,0.142857142857,1.0,1.0,1.0
+15,2.90837015093,1.94612946849,0.0882792838653,0.142857142857,1.0,1.0,1.0
+16,2.39641788308,-0.146018058328,0.324587571799,0.142857142857,1.0,1.0,1.0
+17,2.18106394498,5.16718110679,0.0253716587318,0.142857142857,1.0,1.0,1.0
+18,2.03877198557,-0.438499440541,0.449267737101,0.142857142857,1.0,1.0,1.0
+19,2.68708064726,5.48527552921,0.0168033318433,0.142857142857,1.0,1.0,1.0
+20,2.76851823734,5.12613888647,0.0193062082868,0.142857142857,1.0,1.0,1.0
diff --git a/tests/framework/ROM/SKLearn/gold/data/outVotingRegressor.csv b/tests/framework/ROM/SKLearn/gold/data/outVotingRegressor.csv
new file mode 100644
index 0000000000..28cc34a33d
--- /dev/null
+++ b/tests/framework/ROM/SKLearn/gold/data/outVotingRegressor.csv
@@ -0,0 +1,122 @@
+X,Y,Z
+2.0,-1000.0,0.28045086201
+2.0,-800.0,0.247013620151
+2.0,-600.0,0.211804145454
+2.0,-400.0,0.187266913785
+2.0,-200.0,0.17131481152
+2.0,0.0,0.160608069765
+2.0,200.0,0.156410730326
+2.0,400.0,0.159609324073
+2.0,600.0,0.170635474546
+2.0,800.0,0.186514754541
+2.0,1000.0,0.197002326745
+2.1,-1000.0,0.277762921536
+2.1,-800.0,0.244325695888
+2.1,-600.0,0.209116277322
+2.1,-400.0,0.184579094746
+2.1,-200.0,0.168626987673
+2.1,0.0,0.157920187621
+2.1,200.0,0.153722780213
+2.1,400.0,0.156921339647
+2.1,600.0,0.1679475024
+2.1,800.0,0.183826827107
+2.1,1000.0,0.194314452687
+2.2,-1000.0,0.275074978868
+2.2,-800.0,0.241637770982
+2.2,-600.0,0.206428410202
+2.2,-400.0,0.181891277751
+2.2,-200.0,0.165939166399
+2.2,0.0,0.155232308272
+2.2,200.0,0.151034832738
+2.2,400.0,0.154233357268
+2.2,600.0,0.165259531252
+2.2,800.0,0.181138899339
+2.2,1000.0,0.191626577279
+2.3,-1000.0,0.272387034006
+2.3,-800.0,0.238949845433
+2.3,-600.0,0.203740544095
+2.3,-400.0,0.1792034628
+2.3,-200.0,0.163251347698
+2.3,0.0,0.152544431719
+2.3,200.0,0.148346887901
+2.3,400.0,0.151545376935
+2.3,600.0,0.162571561103
+2.3,800.0,0.178450971239
+2.3,1000.0,0.188938700522
+2.4,-1000.0,0.269699086949
+2.4,-800.0,0.236261919242
+2.4,-600.0,0.201052679
+2.4,-400.0,0.176515649892
+2.4,-200.0,0.16056353157
+2.4,0.0,0.149856557961
+2.4,200.0,0.145658945701
+2.4,400.0,0.14885739865
+2.4,600.0,0.159883591953
+2.4,800.0,0.175763042806
+2.4,1000.0,0.186250822415
+2.5,-1000.0,0.267011137698
+2.5,-800.0,0.233573992408
+2.5,-600.0,0.198364814917
+2.5,-400.0,0.173827839028
+2.5,-200.0,0.157875718015
+2.5,0.0,0.147168686999
+2.5,200.0,0.142971006138
+2.5,400.0,0.146169422412
+2.5,600.0,0.157195623801
+2.5,800.0,0.173075114039
+2.5,1000.0,0.183562942959
+2.6,-1000.0,0.264323186253
+2.6,-800.0,0.230886064931
+2.6,-600.0,0.195676951847
+2.6,-400.0,0.171140030208
+2.6,-200.0,0.155187907033
+2.6,0.0,0.144480818832
+2.6,200.0,0.140283069213
+2.6,400.0,0.14348144822
+2.6,600.0,0.154507656649
+2.6,800.0,0.17038718494
+2.6,1000.0,0.180875062153
+2.7,-1000.0,0.261635232613
+2.7,-800.0,0.228198136812
+2.7,-600.0,0.19298908979
+2.7,-400.0,0.168452223431
+2.7,-200.0,0.152500098623
+2.7,0.0,0.141792953461
+2.7,200.0,0.137595134925
+2.7,400.0,0.140793476076
+2.7,600.0,0.151819690496
+2.7,800.0,0.167699255508
+2.7,1000.0,0.178187179997
+2.8,-1000.0,0.25894727678
+2.8,-800.0,0.22551020805
+2.8,-600.0,0.190301228745
+2.8,-400.0,0.165764418698
+2.8,-200.0,0.149812292787
+2.8,0.0,0.139105090885
+2.8,200.0,0.134907203275
+2.8,400.0,0.138105505979
+2.8,600.0,0.149131725342
+2.8,800.0,0.165011325743
+2.8,1000.0,0.175499296493
+2.9,-1000.0,0.256259318752
+2.9,-800.0,0.222822278645
+2.9,-600.0,0.187613368712
+2.9,-400.0,0.163076616009
+2.9,-200.0,0.147124489524
+2.9,0.0,0.136417231105
+2.9,200.0,0.132219274262
+2.9,400.0,0.135417537929
+2.9,600.0,0.146443761186
+2.9,800.0,0.162323395645
+2.9,1000.0,0.172811411638
+3.0,-1000.0,0.25357135853
+3.0,-800.0,0.220134348598
+3.0,-600.0,0.184925509693
+3.0,-400.0,0.160388815363
+3.0,-200.0,0.144436688833
+3.0,0.0,0.13372937412
+3.0,200.0,0.129531347887
+3.0,400.0,0.132729571926
+3.0,600.0,0.14375579803
+3.0,800.0,0.159635465214
+3.0,1000.0,0.170123525434
diff --git a/tests/framework/ROM/SKLearn/gold/data/outVotingRegressor_plotting.csv b/tests/framework/ROM/SKLearn/gold/data/outVotingRegressor_plotting.csv
new file mode 100644
index 0000000000..255dfee0c3
--- /dev/null
+++ b/tests/framework/ROM/SKLearn/gold/data/outVotingRegressor_plotting.csv
@@ -0,0 +1,21 @@
+prefix,X,Y,ans,PointProbability,ProbabilityWeight-Y,ProbabilityWeight,ProbabilityWeight-X
+1,2.37395064053,1.2592203627,0.162579933188,0.142857142857,1.0,1.0,1.0
+2,2.82158140578,5.97995419753,0.0122679169591,0.142857142857,1.0,1.0,1.0
+3,2.56694523654,1.87276302135,0.108624952906,0.142857142857,1.0,1.0,1.0
+4,2.80341326487,2.78625752399,0.0611249350091,0.142857142857,1.0,1.0,1.0
+5,2.71364057476,3.65586515904,0.0413884729987,0.142857142857,1.0,1.0,1.0
+6,2.04760532874,4.77861658711,0.0329385703134,0.142857142857,1.0,1.0,1.0
+7,2.31492473006,5.25335494272,0.0227284045175,0.142857142857,1.0,1.0,1.0
+8,2.02858035267,2.64979941879,0.0964057062427,0.142857142857,1.0,1.0,1.0
+9,2.78864469607,2.09761066923,0.0868886658559,0.142857142857,1.0,1.0,1.0
+10,2.57392735676,2.35865395641,0.0848991959226,0.142857142857,1.0,1.0,1.0
+11,2.30584621879,0.386827545796,0.26019162966,0.142857142857,1.0,1.0,1.0
+12,2.44849664589,-0.763326524003,0.430595966636,0.142857142857,1.0,1.0,1.0
+13,2.34505834555,0.506639431349,0.240304384984,0.142857142857,1.0,1.0,1.0
+14,2.54703313241,-0.571748185105,0.372453726271,0.142857142857,1.0,1.0,1.0
+15,2.90837015093,1.94612946849,0.0882792838653,0.142857142857,1.0,1.0,1.0
+16,2.39641788308,-0.146018058328,0.324587571799,0.142857142857,1.0,1.0,1.0
+17,2.18106394498,5.16718110679,0.0253716587318,0.142857142857,1.0,1.0,1.0
+18,2.03877198557,-0.438499440541,0.449267737101,0.142857142857,1.0,1.0,1.0
+19,2.68708064726,5.48527552921,0.0168033318433,0.142857142857,1.0,1.0,1.0
+20,2.76851823734,5.12613888647,0.0193062082868,0.142857142857,1.0,1.0,1.0
diff --git a/tests/framework/ROM/SKLearn/gold/data/stackingRegressor_scatter-scatter-scatter-scatter-scatter.png b/tests/framework/ROM/SKLearn/gold/data/stackingRegressor_scatter-scatter-scatter-scatter-scatter.png
new file mode 100644
index 0000000000..21b5c69273
Binary files /dev/null and b/tests/framework/ROM/SKLearn/gold/data/stackingRegressor_scatter-scatter-scatter-scatter-scatter.png differ
diff --git a/tests/framework/ROM/SKLearn/gold/data/votingRegressor_scatter-scatter-scatter-scatter-scatter.png b/tests/framework/ROM/SKLearn/gold/data/votingRegressor_scatter-scatter-scatter-scatter-scatter.png
new file mode 100644
index 0000000000..ed6189317f
Binary files /dev/null and b/tests/framework/ROM/SKLearn/gold/data/votingRegressor_scatter-scatter-scatter-scatter-scatter.png differ
diff --git a/tests/framework/ROM/SKLearn/stackingRegressor_plotting.xml b/tests/framework/ROM/SKLearn/stackingRegressor_plotting.xml
new file mode 100644
index 0000000000..bf34ebe33a
--- /dev/null
+++ b/tests/framework/ROM/SKLearn/stackingRegressor_plotting.xml
@@ -0,0 +1,288 @@
+
+
+
+ framework/ROM/SKLearn.StackingRegressor
+ wangc
+ 2021-11-24
+ SupervisedLearning.SciKitLearn.StackingRegressor
+
+ An example exercising stacking regressor methods. Comparing Stacking Regressor with
+ Linear Regressor, SVR and RidgeCV regressor.
+ A simple attenuate model is used for the comparison
+
+
+ Compare Stacking Regressor with individual regressors
+
+
+
+
+ data
+
+ sample,
+ trainSR,
+ trainLR,
+ trainSVR,
+ trainRidgeCV,
+ resample,
+ resampleSR,
+ resampleLR,
+ resampleSVR,
+ resampleRidgeCV,
+ plot
+
+
+
+
+
+ X, Y
+ ans
+
+
+ X,Y
+ ans
+ lr
+ svr
+ ridgeCV
+ ridgeCV
+
+
+ X,Y
+ ans
+ True
+ True
+
+
+ X,Y
+ ans
+ 1.0
+ 0.02
+ rbf
+ 3
+ 0.0
+ True
+ 1e-3
+ 200
+ False
+ -1
+
+
+
+ X,Y
+ ans
+ 0.1,1.0,10.0
+ True
+ True
+ False
+
+
+
+
+
+ dummyIN
+ foo
+ mcSampler
+
+
+
+ dummyIN
+ foo
+ mcReSampler
+
+
+
+ dummyIN
+ sr
+ mcReSampler
+
+
+
+ dummyIN
+ lr
+ mcReSampler
+
+
+
+ dummyIN
+ svr
+ mcReSampler
+
+
+
+ dummyIN
+ ridgeCV
+ mcReSampler
+
+
+
+ trainingData
+
+
+
+ trainingData
+
+
+
+ trainingData
+
+
+
+ trainingData
+
+
+
+ outData
+ outDataSR
+ outDataSVR
+ outDataLR
+ outDataRidgeCV
+
+
+
+
+
+
+
+
+ 100
+ 888
+
+
+ smallUniformDist
+
+
+ largeUniformDist
+
+
+
+
+ 20
+ 1301
+
+
+ smallUniformDist
+
+
+ largeUniformDist
+
+
+
+
+
+
+ 2
+ 3
+
+
+ -1
+ 6
+
+
+
+
+
+
+ csv
+
+ outStackingRegressor_plotting
+
+
+
+
+ scatter
+ outData|Input|prefix
+ outData|Output|ans
+ *
+ r
+
+
+
+
+
+ scatter
+ outDataSR|Input|prefix
+ outDataSR|Output|ans
+ d
+ b
+
+
+
+
+
+ scatter
+ outDataLR|Input|prefix
+ outDataLR|Output|ans
+ s
+ y
+
+
+
+
+
+ scatter
+ outDataSVR|Input|prefix
+ outDataSVR|Output|ans
+ ^
+ g
+
+
+
+
+
+ scatter
+ outDataRidgeCV|Input|prefix
+ outDataRidgeCV|Output|ans
+ o
+ m
+
+
+
+
+ Test Samples
+ Predicted
+
+
+
+ png
+
+ Regressor Predictions and Their Stacking
+
+
+
+
+
+
+
+
+ X,Y
+
+
+
+ X,Y
+
+
+
+
+ prefix, X,Y
+
+
+
+ prefix, X,Y
+
+
+
+ prefix, X,Y
+
+
+
+ prefix, X,Y
+
+
+
+ prefix, X,Y
+
+
+
+
+
diff --git a/tests/framework/ROM/SKLearn/tests b/tests/framework/ROM/SKLearn/tests
index 4d161d9713..4ee60714d9 100644
--- a/tests/framework/ROM/SKLearn/tests
+++ b/tests/framework/ROM/SKLearn/tests
@@ -292,6 +292,46 @@
output = 'data/outMLPRegressor.xml'
rel_err = 1.0e-8
[../]
+ [./votingRegressor]
+ type = 'RavenFramework'
+ input = 'votingRegressor.xml'
+ csv = 'data/outVotingRegressor.csv'
+ output = 'data/outVotingRegressor.xml'
+ rel_err = 1.0e-8
+ [../]
+ [./votingRegressor_plotting]
+ type = 'RavenFramework'
+ input = 'votingRegressor_plotting.xml'
+ csv = 'data/outVotingRegressor_plotting.csv'
+ image = 'data/votingRegressor_scatter-scatter-scatter-scatter-scatter.png'
+ required_libraries = 'imageio'
+ rel_err = 0.1
+ [../]
+ [./baggingRegressor_plotting]
+ type = 'RavenFramework'
+ input = 'baggingRegressor_plotting.xml'
+ csv = 'data/outBaggingRegressor_plotting.csv'
+ image = 'data/BaggingRegressor_scatter-scatter-scatter.png'
+ required_libraries = 'imageio'
+ rel_err = 0.1
+ [../]
+ [./adaBoostRegressor_plotting]
+ type = 'RavenFramework'
+ input = 'adaBoostRegressor_plotting.xml'
+ csv = 'data/outAdaBoostRegressor_plotting.csv'
+ image = 'data/AdaBoostRegressor_scatter-scatter-scatter.png'
+ required_libraries = 'imageio'
+ rel_err = 0.1
+ [../]
+ [./stackingRegressor_plotting]
+ type = 'RavenFramework'
+ input = 'stackingRegressor_plotting.xml.xml'
+ csv = 'data/outStackingRegressor_plotting.csv'
+ image = 'data/stackingRegressor_scatter-scatter-scatter-scatter-scatter.png'
+ required_libraries = 'imageio'
+ skip = "require sklearn version 0.24"
+ rel_err = 0.0001
+ [../]
##############################################################################
## Inconsistent Results
#[./DTR]
diff --git a/tests/framework/ROM/SKLearn/votingRegressor.xml b/tests/framework/ROM/SKLearn/votingRegressor.xml
new file mode 100644
index 0000000000..7bb4be4bc2
--- /dev/null
+++ b/tests/framework/ROM/SKLearn/votingRegressor.xml
@@ -0,0 +1,86 @@
+
+
+
+ framework/ROM/SKLearn.VotingRegressor
+ wangc
+ 2021-11-16
+ SupervisedLearning.SciKitLearn.VotingRegressor
+
+ An example exercising voting regressor methods.
+ Note, all of the tests in SKLearn operate on a 2D input domain with
+ the goal of fitting a paraboloid function. The input dimensions are
+ of largely different scales and one dimension is off-centered from
+ the origin to ensure that normalization is being handled correctly.
+
+
+ Set up test for Voting Regressor
+
+
+
+
+ data
+
+ sample,
+ train,
+ resample
+
+
+
+
+
+ X, Y
+ Z
+
+
+ X,Y
+ Z
+ lr
+ svr
+ ridgeCV
+
+
+ X,Y
+ Z
+ True
+ True
+
+
+ X,Y
+ Z
+ 1.0
+ 0.1
+ rbf
+ 3
+ 0.0
+ True
+ 1e-3
+ 200
+ False
+ -1
+
+
+
+ X,Y
+ Z
+ 0.1,1.0,10.0
+ True
+ True
+ False
+
+
+
+
+
+
+
+
+
+ csv
+
+ outVotingRegressor
+ input,output
+
+
+
+
+
diff --git a/tests/framework/ROM/SKLearn/votingRegressor_plotting.xml b/tests/framework/ROM/SKLearn/votingRegressor_plotting.xml
new file mode 100644
index 0000000000..ec3906f06e
--- /dev/null
+++ b/tests/framework/ROM/SKLearn/votingRegressor_plotting.xml
@@ -0,0 +1,287 @@
+
+
+
+ framework/ROM/SKLearn.VotingRegressor
+ wangc
+ 2021-11-22
+ SupervisedLearning.SciKitLearn.VotingRegressor
+
+ An example exercising voting regressor methods. Comparing Voting Regressor with
+ Linear Regressor, SVR and RidgeCV regressor.
+ A simple attenuate model is used for the comparison
+
+
+ Compare Voting Regressor with individual regressors
+
+
+
+
+ data
+
+ sample,
+ trainVR,
+ trainLR,
+ trainSVR,
+ trainRidgeCV,
+ resample,
+ resampleVR,
+ resampleLR,
+ resampleSVR,
+ resampleRidgeCV,
+ plot
+
+
+
+
+
+ X, Y
+ ans
+
+
+ X,Y
+ ans
+ lr
+ svr
+ ridgeCV
+
+
+ X,Y
+ ans
+ True
+ True
+
+
+ X,Y
+ ans
+ 1.0
+ 0.1
+ rbf
+ 3
+ 0.0
+ True
+ 1e-3
+ 200
+ False
+ -1
+
+
+
+ X,Y
+ ans
+ 0.1,1.0,10.0
+ True
+ True
+ False
+
+
+
+
+
+ dummyIN
+ foo
+ mcSampler
+
+
+
+ dummyIN
+ foo
+ mcReSampler
+
+
+
+ dummyIN
+ vr
+ mcReSampler
+
+
+
+ dummyIN
+ lr
+ mcReSampler
+
+
+
+ dummyIN
+ svr
+ mcReSampler
+
+
+
+ dummyIN
+ ridgeCV
+ mcReSampler
+
+
+
+ trainingData
+
+
+
+ trainingData
+
+
+
+ trainingData
+
+
+
+ trainingData
+
+
+
+ outData
+ outDataVR
+ outDataSVR
+ outDataLR
+ outDataRidgeCV
+
+
+
+
+
+
+
+
+ 100
+ 888
+
+
+ smallUniformDist
+
+
+ largeUniformDist
+
+
+
+
+ 20
+ 1301
+
+
+ smallUniformDist
+
+
+ largeUniformDist
+
+
+
+
+
+
+ 2
+ 3
+
+
+ -1
+ 6
+
+
+
+
+
+
+ csv
+
+ outVotingRegressor_plotting
+
+
+
+
+ scatter
+ outData|Input|prefix
+ outData|Output|ans
+ *
+ r
+
+
+
+
+
+ scatter
+ outDataVR|Input|prefix
+ outDataVR|Output|ans
+ d
+ b
+
+
+
+
+
+ scatter
+ outDataLR|Input|prefix
+ outDataLR|Output|ans
+ s
+ y
+
+
+
+
+
+ scatter
+ outDataSVR|Input|prefix
+ outDataSVR|Output|ans
+ ^
+ g
+
+
+
+
+
+ scatter
+ outDataRidgeCV|Input|prefix
+ outDataRidgeCV|Output|ans
+ o
+ m
+
+
+
+
+ Test Samples
+ Predicted
+
+
+
+ png
+
+ Regressor Predictions and Their Average
+
+
+
+
+
+
+
+
+ X,Y
+
+
+
+ X,Y
+
+
+
+
+ prefix, X,Y
+
+
+
+ prefix, X,Y
+
+
+
+ prefix, X,Y
+
+
+
+ prefix, X,Y
+
+
+
+ prefix, X,Y
+
+
+
+
+