Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Ensemble Rom #1720

Merged
merged 30 commits into from
Jan 17, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
30 commits
Select commit Hold shift + click to select a range
eab212f
add voting regressor
wangcj05 Nov 16, 2021
b439252
enable voting regressor
wangcj05 Nov 17, 2021
b501791
adjust RidgeCV to be allowed in voting regression
wangcj05 Nov 18, 2021
1ecd7f5
update ScikitLearnBase the way to initialize the model
wangcj05 Nov 18, 2021
cac2c5b
clean up sklearn rom, move multioutput rapper to base class
wangcj05 Nov 18, 2021
082bbc7
fix couple of bugs, especially uniqueVals reset in trainLocal
wangcj05 Nov 19, 2021
7ff0d61
clean up
wangcj05 Nov 19, 2021
5e1ab5a
add gold file for VotingRegressor
wangcj05 Nov 19, 2021
9e58711
fix multiclass classifier
wangcj05 Nov 19, 2021
f2a172f
update voting regressor
wangcj05 Nov 20, 2021
922bd94
remove whitespace
wangcj05 Nov 20, 2021
3970942
fix settings
wangcj05 Nov 21, 2021
0752953
fix problem in voting regression, add tests to testing the voting reg…
wangcj05 Nov 23, 2021
26c2a3b
add voting regressor plot file
wangcj05 Nov 23, 2021
93db6df
update info
wangcj05 Nov 23, 2021
f360dcc
clean up
wangcj05 Nov 23, 2021
f7befda
initial implementation for Bagging Regressor
wangcj05 Nov 23, 2021
feb3926
remove the multioutput layer for inner estimator, which will allow th…
wangcj05 Nov 23, 2021
45937ec
add bagging regressor test
wangcj05 Nov 23, 2021
7934b08
move some checks to ScikitLearnBase
wangcj05 Nov 23, 2021
998e949
add AdaBoost Regressor
wangcj05 Nov 23, 2021
b41e067
update scikitlearn from version 0.21 to 0.22
wangcj05 Nov 24, 2021
903e650
delete whitespace
wangcj05 Nov 24, 2021
09fd2e0
add StackingRegressor
wangcj05 Nov 24, 2021
3a48993
add sklearn version check in StackingRegressor
wangcj05 Nov 24, 2021
bae6738
add user manual for Ensemble ROMs
wangcj05 Nov 24, 2021
b4d2061
address comments
wangcj05 Jan 17, 2022
8eabfa9
Merge branch 'devel' into wangc/ensemble_rom
wangcj05 Jan 17, 2022
01b2ada
update plots
wangcj05 Jan 17, 2022
7862f6a
update
wangcj05 Jan 17, 2022
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion doc/user_manual/generated/generateRomDoc.py
Original file line number Diff line number Diff line change
Expand Up @@ -753,7 +753,8 @@
'KerasMLPRegression',
'KerasConvNetClassifier',
'KerasLSTMClassifier',
'KerasLSTMRegression']
'KerasLSTMRegression'
wangcj05 marked this conversation as resolved.
Show resolved Hide resolved
]
validInternalRom = ['NDspline',
'pickledROM',
'GaussPolynomialRom',
Expand Down
398 changes: 327 additions & 71 deletions doc/user_manual/generated/sklRom.tex

Large diffs are not rendered by default.

1 change: 0 additions & 1 deletion framework/Models/PostProcessors/LimitSurface.py
Original file line number Diff line number Diff line change
Expand Up @@ -131,7 +131,6 @@ def _initializeLSpp(self, runInfo, inputs, initDict):
self.ROM.initializeModel(settings)
else:
self.ROM = self.assemblerDict['ROM'][0][3]
self.ROM.reset()
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This change makes me pretty nervous. What else changed that supports removing this reset?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I just do not see the value for the reset here, it seems to me it is at the initialization stage, the ROM is initialized during this stage, why do we need to reset it?

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hm, I don't disagree. I wonder why it was added in the first place.

self.indexes = -1
for index, inp in enumerate(self.inputs):
if mathUtils.isAString(inp) or isinstance(inp, bytes):
Expand Down
16 changes: 8 additions & 8 deletions framework/Models/ROM.py
Original file line number Diff line number Diff line change
Expand Up @@ -112,8 +112,8 @@ def __init__(self):
self.printTag = 'ROM MODEL' # label
self.cvInstanceName = None # the name of Cross Validation instance
self.cvInstance = None # Instance of provided cross validation
self._estimatorName = None # the name of estimator instance
self._estimator = None # Instance of provided estimator (ROM)
self._estimatorNameList = [] # the name list of estimator instance
self._estimatorList = [] # List of instances of provided estimators (ROM)
self._interfaceROM = None # Instance of provided ROM

self.pickled = False # True if ROM comes from a pickled rom
Expand All @@ -133,7 +133,7 @@ def __init__(self):
self.addAssemblerObject('Classifier', InputData.Quantity.zero_to_one)
self.addAssemblerObject('Metric', InputData.Quantity.zero_to_infinity)
self.addAssemblerObject('CV', InputData.Quantity.zero_to_one)
self.addAssemblerObject('estimator', InputData.Quantity.zero_to_one)
self.addAssemblerObject('estimator', InputData.Quantity.zero_to_infinity)

def __getstate__(self):
"""
Expand Down Expand Up @@ -187,8 +187,8 @@ def _readMoreXML(self,xmlNode):
cvNode = paramInput.findFirst('CV')
if cvNode is not None:
self.cvInstanceName = cvNode.value
estimatorNode = paramInput.findFirst('estimator')
self._estimatorName = estimatorNode.value if estimatorNode is not None else None
estimatorNodeList = paramInput.findAll('estimator')
self._estimatorNameList = [estimatorNode.value for estimatorNode in estimatorNodeList] if len(estimatorNodeList) > 0 else []

self._interfaceROM = self.interfaceFactory.returnInstance(self.subType)
segmentNode = paramInput.findFirst('Segment')
Expand Down Expand Up @@ -235,9 +235,9 @@ def initialize(self,runInfo,inputs,initDict=None):
self.cvInstance.initialize(runInfo, inputs, initDict)

# only initialize once
if self._estimator is None and self._estimatorName is not None:
self._estimator = self.retrieveObjectFromAssemblerDict('estimator', self._estimatorName)
self._interfaceROM.setEstimator(self._estimator)
if len(self._estimatorList) == 0 and len(self._estimatorNameList) > 0:
self._estimatorList = [self.retrieveObjectFromAssemblerDict('estimator', estimatorName) for estimatorName in self._estimatorNameList]
self._interfaceROM.setEstimator(self._estimatorList)

def reset(self):
"""
Expand Down
6 changes: 6 additions & 0 deletions framework/SupervisedLearning/Factory.py
Original file line number Diff line number Diff line change
Expand Up @@ -109,6 +109,12 @@
from .ScikitLearn.Tree.DecisionTreeRegressor import DecisionTreeRegressor
from .ScikitLearn.Tree.ExtraTreeClassifier import ExtraTreeClassifier
from .ScikitLearn.Tree.ExtraTreeRegressor import ExtraTreeRegressor
# Ensemble ROM for Regression
from .ScikitLearn.Ensemble.VotingRegressor import VotingRegressor
from .ScikitLearn.Ensemble.BaggingRegressor import BaggingRegressor
from .ScikitLearn.Ensemble.AdaBoostRegressor import AdaBoostRegressor
# require sklearn version 0.24 at least
from .ScikitLearn.Ensemble.StackingRegressor import StackingRegressor
################################################################################

factory = EntityFactory('SupervisedLearning')
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -46,9 +46,7 @@ def __init__(self):
super().__init__()
import sklearn
import sklearn.discriminant_analysis
import sklearn.multioutput
# we wrap the model with the multi output classifier (for multitarget)
self.model = sklearn.multioutput.MultiOutputClassifier(sklearn.discriminant_analysis.LinearDiscriminantAnalysis())
self.model = sklearn.discriminant_analysis.LinearDiscriminantAnalysis

@classmethod
def getInputSpecification(cls):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -47,9 +47,7 @@ def __init__(self):
super().__init__()
import sklearn
import sklearn.discriminant_analysis
import sklearn.multioutput
# we wrap the model with the multi output classifier (for multitarget)
self.model = sklearn.multioutput.MultiOutputClassifier(sklearn.discriminant_analysis.QuadraticDiscriminantAnalysis())
self.model = sklearn.discriminant_analysis.QuadraticDiscriminantAnalysis

@classmethod
def getInputSpecification(cls):
Expand Down
121 changes: 121 additions & 0 deletions framework/SupervisedLearning/ScikitLearn/Ensemble/AdaBoostRegressor.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,121 @@
# Copyright 2017 Battelle Energy Alliance, LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
Created on Nov. 16, 2021

@author: wangc
AdaBoostRegressor
An AdaBoost regressors
"""
#Internal Modules (Lazy Importer)--------------------------------------------------------------------
#Internal Modules (Lazy Importer) End----------------------------------------------------------------

#External Modules------------------------------------------------------------------------------------
#External Modules End--------------------------------------------------------------------------------

#Internal Modules------------------------------------------------------------------------------------
from SupervisedLearning.ScikitLearn import ScikitLearnBase
from utils import InputData, InputTypes
#Internal Modules End--------------------------------------------------------------------------------

class AdaBoostRegressor(ScikitLearnBase):
"""
An AdaBoost regressors
"""
info = {'problemtype':'regression', 'normalize':False}

def __init__(self):
"""
Constructor that will appropriately initialize a supervised learning object
@ In, None
@ Out, None
"""
super().__init__()
self.multioutputWrapper = True
import sklearn
import sklearn.ensemble
self.model = sklearn.ensemble.AdaBoostRegressor

@classmethod
def getInputSpecification(cls):
"""
Method to get a reference to a class that specifies the input data for
class cls.
@ In, cls, the class for which we are retrieving the specification
@ Out, inputSpecification, InputData.ParameterInput, class to use for
specifying input of cls.
"""
specs = super().getInputSpecification()
specs.description = r"""The \xmlNode{AdaBoostRegressor} is a meta-estimator that begins by fitting a regressor on
the original dataset and then fits additional copies of the regressor on the same dataset
but where the weights of instances are adjusted according to the error of the current
prediction. As such, subsequent regressors focus more on difficult cases.
"""
estimatorInput = InputData.assemblyInputFactory("estimator", contentType=InputTypes.StringType,
descr=r"""name of a ROM that can be used as an estimator""", default='no-default')
specs.addSub(estimatorInput)
specs.addSub(InputData.parameterInputFactory("n_estimators", contentType=InputTypes.IntegerType,
descr=r"""The maximum number of estimators at which boosting is
terminated. In case of perfect fit, the learning procedure is
stopped early.""", default=50))
specs.addSub(InputData.parameterInputFactory("learning_rate", contentType=InputTypes.FloatType,
descr=r"""Weight applied to each regressor at each boosting iteration.
A higher learning rate increases the contribution of each regressor.
There is a trade-off between the learning\_rate and n\_estimators
parameters.""", default=1.0))
specs.addSub(InputData.parameterInputFactory("loss", contentType=InputTypes.makeEnumType("loss", "lossType",['linear', 'square', 'exponential']),
descr=r"""The loss function to use when updating the weights after each
boosting iteration.""", default='linear'))
specs.addSub(InputData.parameterInputFactory("random_state", contentType=InputTypes.IntegerType,
descr=r"""Controls the random seed given at each estimator at each
boosting iteration.""", default=None))
return specs

def _handleInput(self, paramInput):
"""
Function to handle the common parts of the distribution parameter input.
@ In, paramInput, ParameterInput, the already parsed input.
@ Out, None
"""
super()._handleInput(paramInput)
settings, notFound = paramInput.findNodesAndExtractValues(['n_estimators', 'learning_rate', 'loss', 'random_state'])
# notFound must be empty
assert(not notFound)
self.settings = settings

def setEstimator(self, estimatorList):
"""
Initialization method
@ In, estimatorList, list of ROM instances/estimators used by ROM
@ Out, None
"""
super().setEstimator(estimatorList)
if len(estimatorList) != 1:
self.raiseAWarning('ROM', self.name, 'can only accept one estimator, but multiple estimators are provided!',
'Only the first one will be used, i.e.,', estimator.name)
estimator = estimatorList[0]
interfaceRom = estimator._interfaceROM
if interfaceRom.info['problemtype'] != 'regression':
self.raiseAnError(IOError, 'estimator:', estimator.name, 'with problem type', interfaceRom.info['problemtype'],
'can not be used for', self.name)
# In sklearn, multioutput wrapper can not be used by outer and inner estimator at the same time
# If the outer estimator can handle multioutput, the multioutput wrapper of inner can be kept,
# otherwise, we need to remove the wrapper for inner estimator.
if interfaceRom.multioutputWrapper:
sklEstimator = interfaceRom.model.get_params()['estimator']
else:
sklEstimator = interfaceRom.model
settings = {'base_estimator':sklEstimator}
self.settings.update(settings)
self.initializeModel(self.settings)
139 changes: 139 additions & 0 deletions framework/SupervisedLearning/ScikitLearn/Ensemble/BaggingRegressor.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,139 @@
# Copyright 2017 Battelle Energy Alliance, LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
Created on Nov. 22, 2021

@author: wangc
BaggingRegressor
A Bagging regressor.
"""
#Internal Modules (Lazy Importer)--------------------------------------------------------------------
#Internal Modules (Lazy Importer) End----------------------------------------------------------------

#External Modules------------------------------------------------------------------------------------
#External Modules End--------------------------------------------------------------------------------

#Internal Modules------------------------------------------------------------------------------------
from SupervisedLearning.ScikitLearn import ScikitLearnBase
from utils import InputData, InputTypes
#Internal Modules End--------------------------------------------------------------------------------

class BaggingRegressor(ScikitLearnBase):
"""
A Bagging Regressor
A Bagging regressor is an ensemble meta-estimator that fits base regressors each on random subsets of the original
dataset and then aggregate their individual predictions (either by voting or by averaging) to form a final
prediction. Such a meta-estimator can typically be used as a way to reduce the variance of a black-box estimator
(e.g., a decision tree), by introducing randomization into its construction procedure and then making an ensemble
out of it.

This algorithm encompasses several works from the literature. When random subsets of the dataset are drawn as
random subsets of the samples, then this algorithm is known as Pasting. If samples are drawn with replacement,
then the method is known as Bagging. When random subsets of the dataset are drawn as random subsets of the
features, then the method is known as Random Subspaces. Finally, when base estimators are built on subsets of
both samples and features, then the method is known as Random Patches.
"""
info = {'problemtype':'regression', 'normalize':False}

def __init__(self):
"""
Constructor that will appropriately initialize a supervised learning object
@ In, None
@ Out, None
"""
super().__init__()
self.multioutputWrapper = True
import sklearn
import sklearn.ensemble
self.model = sklearn.ensemble.BaggingRegressor

@classmethod
def getInputSpecification(cls):
"""
Method to get a reference to a class that specifies the input data for
class cls.
@ In, cls, the class for which we are retrieving the specification
@ Out, inputSpecification, InputData.ParameterInput, class to use for
specifying input of cls.
"""
specs = super().getInputSpecification()
specs.description = r"""The \xmlNode{BaggingRegressor} is an ensemble meta-estimator that fits base regressors each on random subsets of the original
dataset and then aggregate their individual predictions (either by voting or by averaging) to form a final
prediction. Such a meta-estimator can typically be used as a way to reduce the variance of a black-box estimator
(e.g., a decision tree), by introducing randomization into its construction procedure and then making an ensemble
out of it.
"""
estimatorInput = InputData.assemblyInputFactory("estimator", contentType=InputTypes.StringType,
descr=r"""name of a ROM that can be used as an estimator""", default='no-default')
specs.addSub(estimatorInput)
specs.addSub(InputData.parameterInputFactory("n_estimators", contentType=InputTypes.IntegerType,
descr=r"""The number of base estimators in the ensemble.""", default=10))
specs.addSub(InputData.parameterInputFactory("max_samples", contentType=InputTypes.FloatType,
descr=r"""The number of samples to draw from X to train each base estimator""", default=1.0))
specs.addSub(InputData.parameterInputFactory("max_features", contentType=InputTypes.FloatType,
descr=r"""The number of features to draw from X to train each base estimator """, default=1.0))
specs.addSub(InputData.parameterInputFactory("bootstrap", contentType=InputTypes.BoolType,
descr=r"""Whether samples are drawn with replacement. If False, sampling without
replacement is performed.""", default=True))
specs.addSub(InputData.parameterInputFactory("bootstrap_features", contentType=InputTypes.BoolType,
descr=r"""Whether features are drawn with replacement.""", default=False))
specs.addSub(InputData.parameterInputFactory("oob_score", contentType=InputTypes.BoolType,
descr=r"""Whether to use out-of-bag samples to estimate the generalization error.
Only available if bootstrap=True.""", default=False))
specs.addSub(InputData.parameterInputFactory("warm_start", contentType=InputTypes.BoolType,
descr=r"""When set to True, reuse the solution of the previous call to fit and add more
estimators to the ensemble, otherwise, just fit a whole new ensemble.""", default=False))
specs.addSub(InputData.parameterInputFactory("random_state", contentType=InputTypes.IntegerType,
descr=r"""Controls the random resampling of the original dataset (sample wise and feature wise). """,
default=None))
return specs

def _handleInput(self, paramInput):
"""
Function to handle the common parts of the distribution parameter input.
@ In, paramInput, ParameterInput, the already parsed input.
@ Out, None
"""
super()._handleInput(paramInput)
settings, notFound = paramInput.findNodesAndExtractValues(['n_estimators', 'max_samples', 'max_features', 'bootstrap', 'bootstrap_features',
'oob_score', 'warm_start', 'random_state'])
# notFound must be empty
assert(not notFound)
self.settings = settings

def setEstimator(self, estimatorList):
"""
Initialization method
@ In, estimatorList, list of ROM instances/estimators used by ROM
@ Out, None
"""
super().setEstimator(estimatorList)
if len(estimatorList) != 1:
self.raiseAWarning('ROM', self.name, 'can only accept one estimator, but multiple estimators are provided!',
'Only the first one will be used, i.e.,', estimator.name)
estimator = estimatorList[0]
interfaceRom = estimator._interfaceROM
if interfaceRom.info['problemtype'] != 'regression':
self.raiseAnError(IOError, 'estimator:', estimator.name, 'with problem type', interfaceRom.info['problemtype'],
'can not be used for', self.name)
# In sklearn, multioutput wrapper can not be used by outer and inner estimator at the same time
# If the outer estimator can handle multioutput, the multioutput wrapper of inner can be kept,
# otherwise, we need to remove the wrapper for inner estimator.
if interfaceRom.multioutputWrapper:
sklEstimator = interfaceRom.model.get_params()['estimator']
else:
sklEstimator = interfaceRom.model
settings = {'base_estimator':sklEstimator}
self.settings.update(settings)
self.initializeModel(self.settings)
Loading