Skip to content

Commit

Permalink
convert basic statistics pp to use the new data object (#460)
Browse files Browse the repository at this point in the history
* convert basicStatistics to use the new data objects

* convert tests of basicStatisitics

* convert more tests

* clean up

* move addMetaKeys to localInputAndChecks

* resolve comments

* fix checkIndexAlignment in DataSet

* add unit test for checkIndexAlignment
  • Loading branch information
wangcj05 authored and PaulTalbot-INL committed Dec 7, 2017
1 parent ae32b72 commit 059d7eb
Show file tree
Hide file tree
Showing 122 changed files with 10,690 additions and 40,803 deletions.
21 changes: 18 additions & 3 deletions framework/DataObjects/TestXDataSet.py
Original file line number Diff line number Diff line change
Expand Up @@ -531,10 +531,25 @@ def formatRealization(rlz):
checkFails('Metadata get missing general','Some requested keys could not be found in the requested metadata: set([u\'prefix\'])',data.getMeta,kwargs=dict(keys=['prefix'],general=True))
# fail to find general in pointwise
checkFails('Metadata get missing general','Some requested keys could not be found in the requested metadata: set([u\'DataSet\'])',data.getMeta,kwargs=dict(keys=['DataSet'],pointwise=True))

# check that poorly-aligned set checks out as such
checkSame('Check misaligned data is not aligned',False,data.checkIndexAlignment())
# TODO check aligned data too
checkTrue('Check misaligned data is not aligned',not data.checkIndexAlignment())
# check aligned data too
xml = createElement('DataSet',attrib={'name':'test'})
xml.append(createElement('Input',text='a'))
xml.append(createElement('Output',text='b'))
xml.append(createElement('Index',attrib={'var':'t'},text='b'))
dataAlign = XDataSet.DataSet()
dataAlign.messageHandler = mh
dataAlign._readMoreXML(xml)
rlz = {'a':np.array([1.9]),
'b':np.array([3.4, 2.4, 6.5]),
't':np.array([0.4, 0.9, 10])}
dataAlign.addRealization(rlz)
rlz = {'a':np.array([7.9]),
'b':np.array([0.3, -0.8, 9.7]),
't':np.array([0.4, 0.9, 10])}
dataAlign.addRealization(rlz)
checkTrue('Check aligned data is aligned', dataAlign.checkIndexAlignment('t'))

######################################
# READ/WRITE FROM FILE #
Expand Down
19 changes: 11 additions & 8 deletions framework/DataObjects/XDataSet.py
Original file line number Diff line number Diff line change
Expand Up @@ -256,14 +256,17 @@ def checkIndexAlignment(self,indexesToCheck=None):
for index in indexesToCheck:
# check that index is indeed an index
assert(index in self.indexes)
# get number of slices
numSlices = len(data[index].values)
for i in range(numSlices):
# if any entries are null ...
if data.where(data.isel(**{index:i}).isnull()).sum > 0:
# don't print out statements, but useful if debugging during development. Comment again afterward.
#self.raiseADebug('Found misalignment in index "{}" entry "{}" (value "{}")'.format(index,i,data[index][i].values))
return False
# get a typical variable from set to look at
## NB we can do this because each variable within one realization must be aligned with the rest
## of the variables in that same realization, so checking one variable that depends on "index"
## is as good as checking all of them.
##TODO: This approach is only working for our current data struture, for ND case, this should be
## improved.
data = data[self._pivotParams[index][-1]]
# if any nulls exist in this data, this suggests missing data, therefore misalignment.
if data.isnull().sum() > 0:
self.raiseADebug('Found misalignment index variable "{}".'.format(index))
return False
# if you haven't returned False by now, you must be aligned
return True

Expand Down
2 changes: 1 addition & 1 deletion framework/Models/ROM.py
Original file line number Diff line number Diff line change
Expand Up @@ -398,7 +398,7 @@ def evaluateSample(self, myInput, samplerType, kwargs):
result = self._externalRun(inRun)
# build realization
# assure rlz has all metadata
rlz = dict((var,np.atleast_1d(kwargs[var]) for var in kwargs.keys())
rlz = dict((var,np.atleast_1d(kwargs[var])) for var in kwargs.keys())
# update rlz with input space from inRun and output space from result
rlz.update(dict((var,np.atlest_1d(inRun[var] if var in kwargs['SampledVars'] else result[var])) for var in set(result.keys()+inRun.keys())))
return rlz
Expand Down
627 changes: 187 additions & 440 deletions framework/PostProcessors/BasicStatistics.py

Large diffs are not rendered by default.

3 changes: 3 additions & 0 deletions framework/Samplers/CustomSampler.py
Original file line number Diff line number Diff line change
Expand Up @@ -171,4 +171,7 @@ def localGenerateInput(self,model,myInput):
self.inputInfo['PointProbability'] = self.infoFromCustom['PointProbability'][self.counter-1]
if 'ProbabilityWeight' in self.infoFromCustom.keys():
self.inputInfo['ProbabilityWeight'] = self.infoFromCustom['ProbabilityWeight'][self.counter-1]
else:
self.inputInfo['ProbabilityWeight'] = 1.0

self.inputInfo['SamplerType'] = 'Custom'
7 changes: 7 additions & 0 deletions framework/Samplers/Grid.py
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,13 @@ def localInputAndChecks(self,xmlNode, paramInput):
self.raiseAnError(IOError,'inconsistency between number of variables and grid specification')
self.axisName = list(grdInfo.keys())
self.axisName.sort()
for i in range(len(self.axisName)):
varName = self.axisName[i]
if ("<distribution>" in varName) or (self.variables2distributionsMapping[varName]['totDim']==1):
self.addMetaKeys(*['ProbabilityWeight-'+varName.replace(",","-")])
else:
if self.variables2distributionsMapping[varName]['reducedDim']==1:
self.addMetaKeys(*['ProbabilityWeight-'+varName.replace(",","!")])

def localGetInitParams(self):
"""
Expand Down
1 change: 1 addition & 0 deletions framework/Samplers/LimitSurfaceSearch.py
Original file line number Diff line number Diff line change
Expand Up @@ -714,6 +714,7 @@ def localGenerateInput(self,model,oldInput):
self.inputInfo['distributionType'][key] = self.distDict[key].type
self.inputInfo['SampledVarsPb' ][key] = self.distDict[key].pdf(self.values[key])
self.inputInfo['ProbabilityWeight-'+key] = self.distDict[key].pdf(self.values[key])
self.addMetaKeys(*['ProbabilityWeight-'+key])
self.inputInfo['PointProbability' ] = reduce(mul, self.inputInfo['SampledVarsPb'].values())
# the probability weight here is not used, the post processor is going to recreate the grid associated and use a ROM for the probability evaluation
self.inputInfo['ProbabilityWeight'] = self.inputInfo['PointProbability']
Expand Down
2 changes: 2 additions & 0 deletions framework/Samplers/Sampler.py
Original file line number Diff line number Diff line change
Expand Up @@ -600,6 +600,8 @@ def _constantVariables(self):
self.inputInfo['SampledVars' ].update(self.constants)
# we consider that CDF of the constant variables is equal to 1 (same as its Pb Weight)
self.inputInfo['SampledVarsPb'].update(dict.fromkeys(self.constants.keys(),1.0))
pbKey = ['ProbabilityWeight-'+key for key in self.constants.keys()]
self.addMetaKeys(pbKey)
self.inputInfo.update(dict.fromkeys(['ProbabilityWeight-'+key for key in self.constants.keys()],1.0))

def amIreadyToProvideAnInput(self): #inLastOutput=None):
Expand Down
8 changes: 8 additions & 0 deletions framework/Samplers/Stratified.py
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,14 @@ def localInputAndChecks(self,xmlNode, paramInput):
self.inputInfo['upper'] = {}
self.inputInfo['lower'] = {}

for varName in self.axisName:
if not "<distribution>" in varName:
if self.variables2distributionsMapping[varName]['totDim']>1 and self.variables2distributionsMapping[varName]['reducedDim'] == 1:
self.addMetaKeys(*['ProbabilityWeight-'+varName.replace(",","!")])
if ("<distribution>" in varName) or self.variables2distributionsMapping[varName]['totDim']==1:
self.addMetaKeys(*['ProbabilityWeight-'+varName.replace(",","-")])


def localInitialize(self):
"""
Will perform all initialization specific to this Sampler. For instance,
Expand Down
227 changes: 227 additions & 0 deletions scripts/conversionScripts/toBasicStatsPP.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,227 @@
# Copyright 2017 Battelle Energy Alliance, LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import xml.etree.ElementTree as ET
import xml.dom.minidom as pxml
import os

def convert(tree,fileName=None):
"""
Converts input files to be compatible with merge request #460
@ In, tree, xml.etree.ElementTree.ElementTree object, the contents of a RAVEN input file
@ In, fileName, the name for the raven input file
@Out, tree, xml.etree.ElementTree.ElementTree object, the modified RAVEN input file
"""
simulation = tree.getroot()
models = simulation.find('Models')

hasVariableGroups = True
hasDataObjects = True
hasOutStreams = True

variableGroups = simulation.find('VariableGroups')
if variableGroups is None:
variableGroups = ET.Element('VariableGroups')
hasVariableGroups = False
dataObjects = simulation.find('DataObjects')
if dataObjects is None:
dataObjects = ET.Element('DataObjects')
hasDataObjects = False
outStreams = simulation.find('OutStreams')
if outStreams is None:
outStreams = ET.Element('OutStreams')
hasOutStreams = False

steps = simulation.find('Steps')
postProcess = steps.findall('PostProcess')


if models is None: return tree # no models, no BasicStats
timeDep = {}
for model in models:
if model.tag == 'PostProcessor' and model.attrib['subType'] == 'BasicStatistics':
#note that this converts exactly, it asks for everything with respect to everything
params = []

timeDep[model.attrib['name']] = model.find('pivotParameter')
if model.find('all') is not None:
anode = model.find('all')
targNode = anode.find('targets')
featNode = anode.find('features')
targs = targNode.text
feats = featNode.text

model.remove(model.find('all'))

metricDict = {'expectedValue':'mean',
'minimum':'min',
'maximum':'max',
'median':'median',
'variance':'var',
'sigma':'sigma',
'percentile':'percentile',
'variationCoefficient':'vc',
'skewness':'skew',
'kurtosis':'kurt',
'samples':'samp'
}
for metric, prefix in metricDict.items():
node = ET.Element(metric)
node.text = targs
node.attrib['prefix'] = prefix
model.append(node)
for targ in targs.split(','):
if metric != 'percentile':
params.append(prefix+'_'+targ.strip())
else:
params.append(prefix+'_5_'+targ.strip())
params.append(prefix+'_95_'+targ.strip())

metricDict = {'sensitivity': 'sen',
'covariance':'cov',
'pearson':'pear',
'NormalizedSensitivity':'nsen',
'VarianceDependentSensitivity':'vsen'
}

for metric, prefix in metricDict.items():
node = ET.Element(metric)
node.attrib['prefix'] = prefix
node.append(targNode)
node.append(featNode)
model.append(node)
for targ in targs.split(','):
for feat in feats.split(','):
params.append(prefix+'_'+targ.strip()+'_'+feat.strip())

else:
metricDict = {'expectedValue':'mean',
'minimum':'min',
'maximum':'max',
'median':'median',
'variance':'var',
'sigma':'sigma',
'percentile':'percentile',
'variationCoefficient':'vc',
'skewness':'skew',
'kurtosis':'kurt',
'samples':'samp',
'sensitivity': 'sen',
'covariance':'cov',
'pearson':'pear',
'NormalizedSensitivity':'nsen',
'VarianceDependentSensitivity':'vsen'
}

metricDict1 = {'expectedValue':'mean',
'minimum':'min',
'maximum':'max',
'median':'median',
'variance':'var',
'sigma':'sigma',
'percentile':'percentile',
'variationCoefficient':'vc',
'skewness':'skew',
'kurtosis':'kurt',
'samples':'samp'
}

for child in model:
if child.tag in metricDict.keys():
child.attrib['prefix'] = metricDict[child.tag]
if child.tag in metricDict1.keys():
for var in child.text.split(','):
if child.tag != 'percentile':
params.append(metricDict[child.tag] + '_' + var.strip())
else:
if 'percent' in child.attrib.keys():
params.append(metricDict[child.tag]+'_'+child.attrib['percent']+'_'+var.strip())
else:
params.append(metricDict[child.tag]+'_5_'+var.strip())
params.append(metricDict[child.tag]+'_95_'+var.strip())

else:
targNode = child.find('targets')
featNode = child.find('features')
for targ in targNode.text.split(','):
for feat in featNode.text.split(','):
params.append(metricDict[child.tag]+'_'+targ.strip()+'_'+feat.strip())

# add variable groups
group = ET.Element('Group')
group.attrib['name'] = model.attrib['name'] + '_vars'
group.text = ',\n '.join(params)
variableGroups.append(group)

if variableGroups.find('Group') is not None:
if not hasVariableGroups:
simulation.append(variableGroups)
for modelName, pivotParam in timeDep.items():

dataSetName = modelName + '_basicStatPP'
if pivotParam is None:
dataSet = ET.Element('PointSet')
else:
dataSet = ET.Element('HistorySet')
option = ET.SubElement(dataSet, 'options')
pivotNode = ET.SubElement(option,'pivotParameter')
pivotNode.text = pivotParam.text

dataSet.attrib['name'] = dataSetName
outNode = ET.SubElement(dataSet,'Output')
outNode.text = modelName + '_vars'
dataObjects.append(dataSet)
if not hasDataObjects:
simulation.append(dataObjects)

printNode = ET.Element('Print')
printNode.attrib['name'] = dataSetName + '_dump'
typeNode = ET.SubElement(printNode,'type')
typeNode.text = 'csv'
sourceNode = ET.SubElement(printNode,'source')
sourceNode.text = dataSetName
outStreams.append(printNode)
if not hasOutStreams:
simulation.append(outStreams)

for pp in postProcess:
if modelName == pp.find('Model').text.strip():
outputs = pp.findall('Output')
remove = False
hasPrint = False
for output in outputs:
if output.attrib['class'] == 'Files':
output.attrib['class'] = 'DataObjects'
output.attrib['type'] = 'PointSet' if pivotParam is None else 'HistorySet'
output.text = dataSetName
if remove:
pp.remove(output)
else:
remove = True
elif output.attrib['class'] == 'OutStreams' and output.attrib['type'] == 'Print':
output.text = dataSetName + '_dump'
hasPrint = True
elif output.attrib['class'] == 'DataObjects':
pp.remove(output)
if not hasPrint:
printNode = ET.SubElement(pp, 'Output')
printNode.attrib['class'] = 'OutStreams'
printNode.attrib['type'] = 'Print'
printNode.text = dataSetName + '_dump'

return tree

if __name__=='__main__':
import convert_utils
import sys
convert_utils.standardMain(sys.argv,convert)
Loading

0 comments on commit 059d7eb

Please sign in to comment.