convert basic statistics pp to use the new data object (#460)

* convert basicStatistics to use the new data objects * convert tests of basicStatisitics * convert more tests * clean up * move addMetaKeys to localInputAndChecks * resolve comments * fix checkIndexAlignment in DataSet * add unit test for checkIndexAlignment
idaholab · Dec 7, 2017 · 059d7eb · 059d7eb
1 parent ae32b72
commit 059d7eb
Show file tree

Hide file tree

Showing 122 changed files with 10,690 additions and 40,803 deletions.
diff --git a/framework/DataObjects/TestXDataSet.py b/framework/DataObjects/TestXDataSet.py
@@ -531,10 +531,25 @@ def formatRealization(rlz):
 checkFails('Metadata get missing general','Some requested keys could not be found in the requested metadata: set([u\'prefix\'])',data.getMeta,kwargs=dict(keys=['prefix'],general=True))
 # fail to find general in pointwise
 checkFails('Metadata get missing general','Some requested keys could not be found in the requested metadata: set([u\'DataSet\'])',data.getMeta,kwargs=dict(keys=['DataSet'],pointwise=True))
-
 # check that poorly-aligned set checks out as such
-checkSame('Check misaligned data is not aligned',False,data.checkIndexAlignment())
-# TODO check aligned data too
+checkTrue('Check misaligned data is not aligned',not data.checkIndexAlignment())
+# check aligned data too
+xml = createElement('DataSet',attrib={'name':'test'})
+xml.append(createElement('Input',text='a'))
+xml.append(createElement('Output',text='b'))
+xml.append(createElement('Index',attrib={'var':'t'},text='b'))
+dataAlign = XDataSet.DataSet()
+dataAlign.messageHandler = mh
+dataAlign._readMoreXML(xml)
+rlz = {'a':np.array([1.9]),
+       'b':np.array([3.4, 2.4, 6.5]),
+       't':np.array([0.4, 0.9, 10])}
+dataAlign.addRealization(rlz)
+rlz = {'a':np.array([7.9]),
+       'b':np.array([0.3, -0.8, 9.7]),
+       't':np.array([0.4, 0.9, 10])}
+dataAlign.addRealization(rlz)
+checkTrue('Check aligned data is aligned', dataAlign.checkIndexAlignment('t'))
 
 ######################################
 #        READ/WRITE FROM FILE        #

diff --git a/framework/DataObjects/XDataSet.py b/framework/DataObjects/XDataSet.py
@@ -256,14 +256,17 @@ def checkIndexAlignment(self,indexesToCheck=None):
     for index in indexesToCheck:
       # check that index is indeed an index
       assert(index in self.indexes)
-      # get number of slices
-      numSlices = len(data[index].values)
-      for i in range(numSlices):
-        # if any entries are null ...
-        if data.where(data.isel(**{index:i}).isnull()).sum > 0:
-          # don't print out statements, but useful if debugging during development.  Comment again afterward.
-          #self.raiseADebug('Found misalignment in index "{}" entry "{}" (value "{}")'.format(index,i,data[index][i].values))
-          return False
+      # get a typical variable from set to look at
+      ## NB we can do this because each variable within one realization must be aligned with the rest
+      ##    of the variables in that same realization, so checking one variable that depends on "index"
+      ##    is as good as checking all of them.
+      ##TODO: This approach is only working for our current data struture, for ND case, this should be
+      ## improved.
+      data = data[self._pivotParams[index][-1]]
+      # if any nulls exist in this data, this suggests missing data, therefore misalignment.
+      if data.isnull().sum() > 0:
+        self.raiseADebug('Found misalignment index variable "{}".'.format(index))
+        return False
     # if you haven't returned False by now, you must be aligned
     return True
 

diff --git a/framework/Models/ROM.py b/framework/Models/ROM.py
@@ -398,7 +398,7 @@ def evaluateSample(self, myInput, samplerType, kwargs):
     result = self._externalRun(inRun)
     # build realization
     # assure rlz has all metadata
-    rlz = dict((var,np.atleast_1d(kwargs[var]) for var in kwargs.keys())
+    rlz = dict((var,np.atleast_1d(kwargs[var])) for var in kwargs.keys())
     # update rlz with input space from inRun and output space from result
     rlz.update(dict((var,np.atlest_1d(inRun[var] if var in kwargs['SampledVars'] else result[var])) for var in set(result.keys()+inRun.keys())))
     return rlz

diff --git a/framework/PostProcessors/BasicStatistics.py b/framework/PostProcessors/BasicStatistics.py
diff --git a/framework/Samplers/CustomSampler.py b/framework/Samplers/CustomSampler.py
@@ -171,4 +171,7 @@ def localGenerateInput(self,model,myInput):
       self.inputInfo['PointProbability'] = self.infoFromCustom['PointProbability'][self.counter-1]
     if 'ProbabilityWeight' in self.infoFromCustom.keys():
       self.inputInfo['ProbabilityWeight'] = self.infoFromCustom['ProbabilityWeight'][self.counter-1]
+    else:
+      self.inputInfo['ProbabilityWeight'] = 1.0
+
     self.inputInfo['SamplerType'] = 'Custom'
diff --git a/framework/Samplers/Grid.py b/framework/Samplers/Grid.py
@@ -103,6 +103,13 @@ def localInputAndChecks(self,xmlNode, paramInput):
       self.raiseAnError(IOError,'inconsistency between number of variables and grid specification')
     self.axisName = list(grdInfo.keys())
     self.axisName.sort()
+    for i in range(len(self.axisName)):
+      varName = self.axisName[i]
+      if ("<distribution>" in varName) or (self.variables2distributionsMapping[varName]['totDim']==1):
+        self.addMetaKeys(*['ProbabilityWeight-'+varName.replace(",","-")])
+      else:
+        if self.variables2distributionsMapping[varName]['reducedDim']==1:
+          self.addMetaKeys(*['ProbabilityWeight-'+varName.replace(",","!")])
 
   def localGetInitParams(self):
     """

diff --git a/framework/Samplers/LimitSurfaceSearch.py b/framework/Samplers/LimitSurfaceSearch.py
@@ -714,6 +714,7 @@ def localGenerateInput(self,model,oldInput):
         self.inputInfo['distributionType'][key]  = self.distDict[key].type
         self.inputInfo['SampledVarsPb'   ][key]  = self.distDict[key].pdf(self.values[key])
         self.inputInfo['ProbabilityWeight-'+key] = self.distDict[key].pdf(self.values[key])
+        self.addMetaKeys(*['ProbabilityWeight-'+key])
     self.inputInfo['PointProbability'    ]      = reduce(mul, self.inputInfo['SampledVarsPb'].values())
     # the probability weight here is not used, the post processor is going to recreate the grid associated and use a ROM for the probability evaluation
     self.inputInfo['ProbabilityWeight']         = self.inputInfo['PointProbability']

diff --git a/framework/Samplers/Sampler.py b/framework/Samplers/Sampler.py
@@ -600,6 +600,8 @@ def _constantVariables(self):
       self.inputInfo['SampledVars'  ].update(self.constants)
       # we consider that CDF of the constant variables is equal to 1 (same as its Pb Weight)
       self.inputInfo['SampledVarsPb'].update(dict.fromkeys(self.constants.keys(),1.0))
+      pbKey = ['ProbabilityWeight-'+key for key in self.constants.keys()]
+      self.addMetaKeys(pbKey)
       self.inputInfo.update(dict.fromkeys(['ProbabilityWeight-'+key for key in self.constants.keys()],1.0))
 
   def amIreadyToProvideAnInput(self): #inLastOutput=None):

diff --git a/framework/Samplers/Stratified.py b/framework/Samplers/Stratified.py
@@ -106,6 +106,14 @@ def localInputAndChecks(self,xmlNode, paramInput):
     self.inputInfo['upper'] = {}
     self.inputInfo['lower'] = {}
 
+    for varName in self.axisName:
+      if not "<distribution>" in varName:
+        if self.variables2distributionsMapping[varName]['totDim']>1 and self.variables2distributionsMapping[varName]['reducedDim'] == 1:
+          self.addMetaKeys(*['ProbabilityWeight-'+varName.replace(",","!")])
+      if ("<distribution>" in varName) or self.variables2distributionsMapping[varName]['totDim']==1:
+        self.addMetaKeys(*['ProbabilityWeight-'+varName.replace(",","-")])
+
+
   def localInitialize(self):
     """
       Will perform all initialization specific to this Sampler. For instance,

diff --git a/scripts/conversionScripts/toBasicStatsPP.py b/scripts/conversionScripts/toBasicStatsPP.py
@@ -0,0 +1,227 @@
+# Copyright 2017 Battelle Energy Alliance, LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import xml.etree.ElementTree as ET
+import xml.dom.minidom as pxml
+import os
+
+def convert(tree,fileName=None):
+  """
+    Converts input files to be compatible with merge request #460
+    @ In, tree, xml.etree.ElementTree.ElementTree object, the contents of a RAVEN input file
+    @ In, fileName, the name for the raven input file
+    @Out, tree, xml.etree.ElementTree.ElementTree object, the modified RAVEN input file
+  """
+  simulation = tree.getroot()
+  models = simulation.find('Models')
+
+  hasVariableGroups = True
+  hasDataObjects = True
+  hasOutStreams = True
+
+  variableGroups = simulation.find('VariableGroups')
+  if variableGroups is None:
+    variableGroups = ET.Element('VariableGroups')
+    hasVariableGroups = False
+  dataObjects = simulation.find('DataObjects')
+  if dataObjects is None:
+    dataObjects = ET.Element('DataObjects')
+    hasDataObjects = False
+  outStreams = simulation.find('OutStreams')
+  if outStreams is None:
+    outStreams = ET.Element('OutStreams')
+    hasOutStreams = False
+
+  steps = simulation.find('Steps')
+  postProcess = steps.findall('PostProcess')
+
+
+  if models is None: return tree # no models, no BasicStats
+  timeDep = {}
+  for model in models:
+    if model.tag == 'PostProcessor' and model.attrib['subType'] == 'BasicStatistics':
+      #note that this converts exactly, it asks for everything with respect to everything
+      params = []
+
+      timeDep[model.attrib['name']] = model.find('pivotParameter')
+      if model.find('all') is not None:
+        anode = model.find('all')
+        targNode = anode.find('targets')
+        featNode = anode.find('features')
+        targs = targNode.text
+        feats = featNode.text
+
+        model.remove(model.find('all'))
+
+        metricDict = {'expectedValue':'mean',
+              'minimum':'min',
+              'maximum':'max',
+              'median':'median',
+              'variance':'var',
+              'sigma':'sigma',
+              'percentile':'percentile',
+              'variationCoefficient':'vc',
+              'skewness':'skew',
+              'kurtosis':'kurt',
+              'samples':'samp'
+              }
+        for metric, prefix in metricDict.items():
+          node = ET.Element(metric)
+          node.text = targs
+          node.attrib['prefix'] = prefix
+          model.append(node)
+          for targ in targs.split(','):
+            if metric != 'percentile':
+              params.append(prefix+'_'+targ.strip())
+            else:
+              params.append(prefix+'_5_'+targ.strip())
+              params.append(prefix+'_95_'+targ.strip())
+
+        metricDict = {'sensitivity': 'sen',
+             'covariance':'cov',
+             'pearson':'pear',
+             'NormalizedSensitivity':'nsen',
+             'VarianceDependentSensitivity':'vsen'
+             }
+
+        for metric, prefix in metricDict.items():
+          node = ET.Element(metric)
+          node.attrib['prefix'] = prefix
+          node.append(targNode)
+          node.append(featNode)
+          model.append(node)
+          for targ in targs.split(','):
+            for feat in feats.split(','):
+              params.append(prefix+'_'+targ.strip()+'_'+feat.strip())
+
+      else:
+        metricDict = {'expectedValue':'mean',
+              'minimum':'min',
+              'maximum':'max',
+              'median':'median',
+              'variance':'var',
+              'sigma':'sigma',
+              'percentile':'percentile',
+              'variationCoefficient':'vc',
+              'skewness':'skew',
+              'kurtosis':'kurt',
+              'samples':'samp',
+              'sensitivity': 'sen',
+              'covariance':'cov',
+              'pearson':'pear',
+              'NormalizedSensitivity':'nsen',
+              'VarianceDependentSensitivity':'vsen'
+             }
+
+        metricDict1 = {'expectedValue':'mean',
+              'minimum':'min',
+              'maximum':'max',
+              'median':'median',
+              'variance':'var',
+              'sigma':'sigma',
+              'percentile':'percentile',
+              'variationCoefficient':'vc',
+              'skewness':'skew',
+              'kurtosis':'kurt',
+              'samples':'samp'
+              }
+
+        for child in model:
+          if child.tag in metricDict.keys():
+            child.attrib['prefix'] = metricDict[child.tag]
+            if child.tag in metricDict1.keys():
+              for var in child.text.split(','):
+                if child.tag != 'percentile':
+                  params.append(metricDict[child.tag] + '_' + var.strip())
+                else:
+                  if 'percent' in child.attrib.keys():
+                    params.append(metricDict[child.tag]+'_'+child.attrib['percent']+'_'+var.strip())
+                  else:
+                    params.append(metricDict[child.tag]+'_5_'+var.strip())
+                    params.append(metricDict[child.tag]+'_95_'+var.strip())
+
+            else:
+              targNode = child.find('targets')
+              featNode = child.find('features')
+              for targ in targNode.text.split(','):
+                for feat in featNode.text.split(','):
+                  params.append(metricDict[child.tag]+'_'+targ.strip()+'_'+feat.strip())
+
+        # add variable groups
+      group = ET.Element('Group')
+      group.attrib['name'] = model.attrib['name'] + '_vars'
+      group.text = ',\n                 '.join(params)
+      variableGroups.append(group)
+
+  if variableGroups.find('Group') is not None:
+    if not hasVariableGroups:
+      simulation.append(variableGroups)
+    for modelName, pivotParam in timeDep.items():
+
+      dataSetName = modelName + '_basicStatPP'
+      if pivotParam is None:
+        dataSet = ET.Element('PointSet')
+      else:
+        dataSet = ET.Element('HistorySet')
+        option = ET.SubElement(dataSet, 'options')
+        pivotNode = ET.SubElement(option,'pivotParameter')
+        pivotNode.text = pivotParam.text
+
+      dataSet.attrib['name'] = dataSetName
+      outNode = ET.SubElement(dataSet,'Output')
+      outNode.text = modelName + '_vars'
+      dataObjects.append(dataSet)
+      if not hasDataObjects:
+        simulation.append(dataObjects)
+
+      printNode = ET.Element('Print')
+      printNode.attrib['name'] = dataSetName + '_dump'
+      typeNode = ET.SubElement(printNode,'type')
+      typeNode.text = 'csv'
+      sourceNode = ET.SubElement(printNode,'source')
+      sourceNode.text = dataSetName
+      outStreams.append(printNode)
+      if not hasOutStreams:
+        simulation.append(outStreams)
+
+      for pp in postProcess:
+        if modelName == pp.find('Model').text.strip():
+          outputs = pp.findall('Output')
+          remove = False
+          hasPrint = False
+          for output in outputs:
+            if output.attrib['class'] == 'Files':
+              output.attrib['class'] = 'DataObjects'
+              output.attrib['type'] = 'PointSet' if pivotParam is None else 'HistorySet'
+              output.text = dataSetName
+              if remove:
+                pp.remove(output)
+              else:
+                remove = True
+            elif output.attrib['class'] == 'OutStreams' and output.attrib['type'] == 'Print':
+              output.text = dataSetName + '_dump'
+              hasPrint = True
+            elif output.attrib['class'] == 'DataObjects':
+              pp.remove(output)
+          if not hasPrint:
+            printNode = ET.SubElement(pp, 'Output')
+            printNode.attrib['class'] = 'OutStreams'
+            printNode.attrib['type'] = 'Print'
+            printNode.text = dataSetName + '_dump'
+
+  return tree
+
+if __name__=='__main__':
+  import convert_utils
+  import sys
+  convert_utils.standardMain(sys.argv,convert)