rework: raven running raven (#522)

* Script now offers a flag to change ignored files (#508) -e can be followed by a comma separated list of directories or files which will be ignored. Old behavior is to only ignore .git. New default is .git and raven. * test added * improving analytic test doc
idaholab · Jan 18, 2018 · b0070db · b0070db
1 parent bde0519
commit b0070db
Show file tree

Hide file tree

Showing 13 changed files with 478 additions and 29 deletions.
diff --git a/doc/tests/attenuate.tex b/doc/tests/attenuate.tex
@@ -96,3 +96,41 @@ \subsubsection{numeric values}
 \begin{equation}
 \text{var}{u(Y)} = 0.063779804051749989
 \end{equation}
+
+\subsection{Changing lower, upper bounds}
+A parametric study can be made by changing the lower and upper bounds of the material opacities.
+
+The objective is to determine the effects on the exit strength $u$ of a beam impinging on a
+unit-length material subdivided into two materials with opacities $y_1, y_2$.  The range of values for
+these opacities varies from lower bound $y_\ell$ to higher bound $y_h$, and the bounds are always
+the same for both opacities.
+
+We consider evaluating the lower and upper bounds
+on a grid, and determine the expected values for the opacity means and exit strength.
+
+The analytic values for the exit strength expected value depends on the lower and upper bound
+as follows:
+\begin{align}
+  \bar u(y_1,y_2) &= \int_{y_\ell}^{y_h}\int_{y_\ell}^{y_h} \left(\frac{1}{y_h-y_\ell}\right)^2
+    e^{-(y_1+y_2)/2} dy_1 dy_2, \\
+    &= \frac{4e^{-y_h-y_\ell}\left(e^{y_h/2}-e^{y_\ell/2}\right)^2}{(y_h-y_\ell)^2}.
+\end{align}
+
+Numerically, the following grid points result in the following expected values:
+
+\begin{table}[h!]
+\centering
+\begin{tabular}{c c|c|c}
+$y_\ell$ & $y_h$ & $\bar y_1=\bar y_2$ & $\bar u$ \\ \hline
+0.00 & 0.50 & 0.250 & 0.782865 \\
+0.00 & 0.75 & 0.375 & 0.695381 \\
+0.00 & 1.00 & 0.500 & 0.619272 \\
+0.25 & 0.50 & 0.375 & 0.688185 \\
+0.25 & 0.75 & 0.500 & 0.609696 \\
+0.25 & 1.00 & 0.625 & 0.541564 \\
+0.50 & 0.50 & 0.500 & 0.606531 \\
+0.50 & 0.75 & 0.625 & 0.535959 \\
+0.50 & 1.00 & 0.750 & 0.474832
+\end{tabular}
+\end{table}
+
diff --git a/framework/CodeInterfaces/RAVEN/RAVENInterface.py b/framework/CodeInterfaces/RAVEN/RAVENInterface.py
@@ -293,13 +293,38 @@ def finalizeCodeOutput(self,command,output,workingDir):
                                                  (internally we check if the return variable is a dict and if it is returned by RAVEN (if not, we error out))
     """
 
+    ##### TODO This is an exception to the way CodeInterfaces usually run.
+    # The return dict for this CodeInterface is a dictionary of data objects (either one or two of them, up to one each point set and history set).
+    # Normally, the end result of this method is producing a CSV file with the data to load.
+    # When data objects are returned, this triggers an "if" path in Models/Code.py in evaluateSample().  There's an "else"
+    # that can be found by searching for the comment "we have the DataObjects -> raven-runs-raven case only so far".
+    # See more details there.
+    #####
     dataObjectsToReturn = {}
+    numRlz = None
     for filename in self.linkedDataObjectOutStreamsNames:
+      # load the output CSV into a data object, so we can return that
+      ## load the XML initialization information and type
       dataObjectInfo = self.outStreamsNamesAndType[filename]
-      dataObjectsToReturn[dataObjectInfo[0]] = DataObjects.returnInstance(dataObjectInfo[1],None)
-      dataObjectsToReturn[dataObjectInfo[0]]._readMoreXML(dataObjectInfo[2])
-      dataObjectsToReturn[dataObjectInfo[0]].name = filename
-      dataObjectsToReturn[dataObjectInfo[0]].loadXMLandCSV(os.path.join(workingDir,self.innerWorkingDir))
+      # create an instance of the correct data object type
+      data = DataObjects.returnInstance(dataObjectInfo[1],None)
+      # dummy message handler to handle message parsing, TODO this stinks and should be fixed.
+      data.messageHandler = DataObjects.XDataObject.MessageCourier()
+      # initialize the data object by reading the XML
+      data._readMoreXML(dataObjectInfo[2])
+      # set the name, then load the data
+      data.name = filename
+      data.load(os.path.join(workingDir,self.innerWorkingDir,filename),style='csv')
+      # check consistency of data object number of realizations
+      if numRlz is None:
+        # set the standard if you're the first data object
+        numRlz = len(data)
+      else:
+        # otherwise, check that the number of realizations is appropriate
+        if len(data) != numRlz:
+          raise IOError('The number of realizations in output CSVs from the inner RAVEN run are not consistent!  In "{}" received "{}" realization(s), but other data objects had "{}" realization(s)!'.format(data.name,len(data),numRlz))
+      # store the object to return
+      dataObjectsToReturn[dataObjectInfo[0]] = data
     return dataObjectsToReturn
 
 
diff --git a/framework/DataObjects/TestXDataSet.py b/framework/DataObjects/TestXDataSet.py
@@ -26,6 +26,7 @@
 import pickle as pk
 import numpy as np
 import xarray as xr
+import copy
 frameworkDir = os.path.dirname(os.path.abspath(os.path.join(sys.argv[0],'..')))
 
 sys.path.append(frameworkDir)
@@ -956,6 +957,84 @@ def formatRealization(rlz):
 
 data.addRealization(rlz2)
 
+######################################
+#          DATA RENAMING             #
+######################################
+# use the renaming, needed for alias operations
+xml = createElement('DataSet',attrib={'name':'test'})
+xml.append(createElement('Input', text='a,b'))
+xml.append(createElement('Output',text='c,d'))
+xml.append(createElement('Index',attrib={'var':'t'},text='b,d'))
+data = XDataSet.DataSet()
+data.messageHandler = mh
+data._readMoreXML(xml)
+data.addExpectedMeta(['prefix'])
+rlz0 = {'a':np.array([0.0]),
+        'b':np.array([0.0, 0.1, 0.2]),
+        'c':np.array([0.5]),
+        'd':np.array([0.5, 0.6, 0.7]),
+        'prefix':np.array(['0']),
+        't':np.array([0.01, 0.02, 0.03])}
+data.addRealization(rlz0)
+# make a copy, to test renaming with just collector
+data2 = copy.deepcopy(data)
+data2.renameVariable('a','alpha')
+data2.renameVariable('b','beta')
+data2.renameVariable('c','gamma')
+data2.renameVariable('d','delta')
+data2.renameVariable('prefix','jobID')
+data2.renameVariable('t','timelike')
+# check everything was changed
+correct0 = {'alpha':np.array([0.0]),
+            'beta':np.array([0.0, 0.1, 0.2]),
+            'gamma':np.array([0.5]),
+            'delta':np.array([0.5, 0.6, 0.7]),
+            'jobID':np.array(['0']),
+            'timelike':np.array([0.01, 0.02, 0.03])}
+checkRlz('Rename in collector: variables',data2.realization(index=0),correct0,skip='timelike')
+checkArray('Rename in collector: index',data2.indexes,['timelike'],str)
+
+# now asDataset(), then rename
+data3 = copy.deepcopy(data)
+data3.asDataset()
+data3.renameVariable('a','alpha')
+data3.renameVariable('b','beta')
+data3.renameVariable('c','gamma')
+data3.renameVariable('d','delta')
+data3.renameVariable('prefix','jobID')
+data3.renameVariable('t','timelike')
+checkRlz('Rename in dataset: variables',data3.realization(index=0),correct0,skip='timelike')
+checkArray('Rename in dataset: index',data3.indexes,['timelike'],str)
+
+# now asDatset, then append, then rename
+data.asDataset()
+rlz1 = {'a':np.array([1.0]),
+        'b':np.array([1.0, 1.1, 1.2]),
+        'c':np.array([1.5]),
+        'd':np.array([1.5, 1.6, 1.7]),
+        'prefix':np.array(['1']),
+        't':np.array([0.01, 0.02, 0.03])}
+data.addRealization(rlz1)
+data.renameVariable('a','alpha')
+data.renameVariable('b','beta')
+data.renameVariable('c','gamma')
+data.renameVariable('d','delta')
+data.renameVariable('prefix','jobID')
+data.renameVariable('t','timelike')
+correct1 = {'alpha':np.array([1.0]),
+            'beta':np.array([1.0, 1.1, 1.2]),
+            'gamma':np.array([1.5]),
+            'delta':np.array([1.5, 1.6, 1.7]),
+            'jobID':np.array(['1']),
+            'timelike':np.array([0.01, 0.02, 0.03])}
+checkRlz('Rename in both: variables[0]',data.realization(index=0),correct0,skip='timelike')
+checkRlz('Rename in both: variables[1]',data.realization(index=1),correct1,skip='timelike')
+checkArray('Rename in both: index',data.indexes,['timelike'],str)
+# make sure adding a new realization without the renaming fails
+checkFails('Add old-named data after renaming variables','Provided realization does not have all requisite values for object \"DataSet\": \"alpha\"',data.addRealization,args=[rlz1])
+
+
+
 print(results)
 
 sys.exit(results["fail"])

diff --git a/framework/DataObjects/XDataSet.py b/framework/DataObjects/XDataSet.py
@@ -141,7 +141,7 @@ def addRealization(self,rlz):
       Before actually adding the realization, data is formatted for this data object.
       @ In, rlz, dict, {var:val} format where
                          "var" is the variable name as a string,
-                         "val" is either a float or a np.ndarray of values.
+                         "val" is a np.ndarray of values.
       @ Out, None
     """
     # protect against back-changing realization
@@ -420,9 +420,7 @@ def realization(self,index=None,matchDict=None,tol=1e-15, unpackXArray=False):
       @ Out, index, int, optional, index where found (or len(self) if not found), only returned if matchDict
       @ Out, rlz, dict, realization requested (None if not found)
     """
-    # FIXME the caller should have no idea whether to read the collector or not.
-    # TODO convert input space to KD tree for faster searching -> XArray.DataArray has this built in
-    # TODO option to read both collector and data for matches/indices
+    # TODO convert input space to KD tree for faster searching -> XArray.DataArray has this built in?
     ## first, check that some direction was given, either an index or a match to find
     if (index is None and matchDict is None) or (index is not None and matchDict is not None):
       self.raiseAnError(TypeError,'Either "index" OR "matchDict" (not both) must be specified to use "realization!"')
@@ -526,6 +524,41 @@ def remove(self,realization=None,variable=None):
     #either way reset kdtree
     self.inputKDTree = None
 
+  def renameVariable(self,old,new):
+    """
+      Changes the name of a variable from "old" to "new".
+      @ In, old, str, old name
+      @ In, new, str, new name
+      @ Out, None
+    """
+    # determine where the old variable was
+    isInput = old in self._inputs
+    isOutput = old in self._outputs
+    isMeta = old in self._metavars
+    isIndex = old in self.indexes
+    # make the changes to the variable listings
+    if isInput:
+      self._inputs = list(a if (a != old) else new for a in self._inputs)
+    if isOutput:
+      self._outputs = list(a if (a != old) else new for a in self._outputs)
+    if isMeta:
+      self._metavars = list(a if (a != old) else new for a in self._metavars)
+    if isIndex:
+      # change the pivotParameters listing, as well as the sync/unsynced listings
+      self._pivotParams[new] = self._pivotParams.pop(old)
+      if old in self._alignedIndexes.keys():
+        self._alignedIndexes[new] = self._alignedIndexes.pop(old)
+      else:
+        self._orderedVars = list(a if a != old else new for a in self._orderedVars)
+    # if in/out/meta, change allvars (TODO wastefully already done if an unaligned index)
+    if isInput or isOutput or isMeta:
+      self._orderedVars = list(a if a != old else new for a in self._orderedVars)
+    # change scaling factor entry
+    if old in self._scaleFactors:
+      self._scaleFactors[new] = self._scaleFactors.pop(old)
+    if self._data is not None:
+      self._data.rename({old:new},inplace=True)
+
   def reset(self):
     """
       Sets this object back to its initial state.
@@ -1144,6 +1177,9 @@ def _fromDict(self,source,dims=None,**kwargs):
       @ In, kwargs, dict, optional, additional arguments
       @ Out, None
     """
+    # if anything is in the collector, collapse it first
+    if self._collector is not None:
+      self.asDataset()
     # not safe to default to dict, so if "dims" not specified set it here
     if dims is None:
       dims = {}

diff --git a/framework/Models/Code.py b/framework/Models/Code.py
@@ -544,13 +544,41 @@ def evaluateSample(self, myInput, samplerType, kwargs):
           returnDict[header] = data
       if not ravenCase:
         self._replaceVariablesNamesWithAliasSystem(returnDict, 'inout', True)
+        returnDict.update(kwargs)
+        returnValue = (kwargs['SampledVars'],returnDict)
+        exportDict = self.createExportDictionary(returnValue)
       else:
-        # we have the DataObjects
-        for dataObj in finalCodeOutputFile.values():
-          self._replaceVariablesNamesWithAliasSystem(dataObj.getParametersValues('inputs',nodeId='RecontructEnding'), 'input', True)
-          self._replaceVariablesNamesWithAliasSystem(dataObj.getParametersValues('unstructuredinputs',nodeId='RecontructEnding'), 'input', True)
-          self._replaceVariablesNamesWithAliasSystem(dataObj.getParametersValues('outputs',nodeId='RecontructEnding'), 'output', True)
-        returnDict = finalCodeOutputFile
+        # we have the DataObjects -> raven-runs-raven case only so far
+        # we have two tasks to do: collect the input/output/meta/indexes from the INNER raven run, and ALSO the input from the OUTER raven run.
+        #  -> in addition, we have to fix the probability weights.
+        ## get the number of realizations
+        ### we already checked consistency in the CodeInterface, so just get the length of the first data object
+        numRlz = len(finalCodeOutputFile.values()[0])
+        ## set up the return container
+        exportDict = {'RAVEN_isBatch':True,'realizations':[]}
+        ## set up each realization
+        for n in range(numRlz):
+          rlz = {}
+          ## collect the results from INNER, both point set and history set
+          for dataObj in finalCodeOutputFile.values():
+            # TODO FIXME check for overwriting data.  For now just replace data if it's duplicate!
+            new = dict((var,np.atleast_1d(val)) for var,val in dataObj.realization(index=n,unpackXArray=True).items())
+            rlz.update( new )
+          ## add OUTER input space
+          # TODO FIXME check for overwriting data.  For now just replace data if it's duplicate!
+          new = dict((var,np.atleast_1d(val)) for var,val in kwargs['SampledVars'].items())
+          rlz.update( new )
+          ## combine ProbabilityWeights # TODO FIXME these are a rough attempt at getting it right!
+          rlz['ProbabilityWeight'] = np.atleast_1d(rlz.get('ProbabilityWeight',1.0) * kwargs.get('ProbabilityWeight',1.0))
+          rlz['PointProbability'] = np.atleast_1d(rlz.get('PointProbability',1.0) * kwargs.get('PointProbability',1.0))
+          rlz['prefix'] = np.atleast_1d(kwargs['prefix']+'_'+str(n))
+          ## add the rest of the metadata # TODO slow
+          for var,val in kwargs.items():
+            if var not in rlz.keys():
+              rlz[var] = np.atleast_1d(val)
+          self._replaceVariablesNamesWithAliasSystem(rlz,'inout',True)
+          exportDict['realizations'].append(rlz)
+
       ## The last thing before returning should be to delete the temporary log
       ## file and any other file the user requests to be cleared
       if deleteSuccessfulLogFiles:
@@ -563,16 +591,10 @@ def evaluateSample(self, myInput, samplerType, kwargs):
       for fileExt in fileExtensionsToDelete:
         if not fileExt.startswith("."):
           fileExt = "." + fileExt
-
         fileList = [ os.path.join(metaData['subDirectory'],f) for f in os.listdir(metaData['subDirectory']) if f.endswith(fileExt) ]
-
         for f in fileList:
           os.remove(f)
 
-      returnDict.update(kwargs)
-      returnValue = (kwargs['SampledVars'],returnDict)
-      exportDict = self.createExportDictionary(returnValue)
-
       return exportDict
 
     else:
@@ -625,7 +647,14 @@ def collectOutput(self,finishedJob,output,options=None):
     if isinstance(evaluation, Runners.Error):
       self.raiseAnError(AttributeError,"No available Output to collect")
 
-    output.addRealization(evaluation)
+
+    # in the event a batch is run, the evaluations will be a dict as {'RAVEN_isBatch':True, 'realizations': [...]}
+    if evaluation.get('RAVEN_isBatch',False):
+      for rlz in evaluation['realizations']:
+        output.addRealization(rlz)
+    # otherwise, we received a single realization
+    else:
+      output.addRealization(evaluation)
 
     ##TODO How to handle restart?
     ##TODO How to handle collectOutputFromDataObject

diff --git a/framework/Samplers/MonteCarlo.py b/framework/Samplers/MonteCarlo.py
@@ -181,7 +181,7 @@ def localGenerateInput(self,model,myInput):
       if self.samplingType == 'uniform':
         self.inputInfo['ProbabilityWeight'  ] = weight
       else:
-        self.inputInfo['ProbabilityWeight' ] = 1. #MC weight is 1/N => weight is one
+        self.inputInfo['ProbabilityWeight' ] = 1.0 #MC weight is 1/N => weight is one
     # reassign SampledVarsPb to fully correlated variables
     self._reassignSampledVarsPbToFullyCorrVars()
     self.inputInfo['SamplerType'] = 'MonteCarlo'

diff --git a/tests/framework/CodeInterfaceTests/Raven_running_raven_basic/innerRunDir/attenuate.py b/tests/framework/CodeInterfaceTests/Raven_running_raven_basic/innerRunDir/attenuate.py
@@ -0,0 +1,34 @@
+# Copyright 2017 Battelle Energy Alliance, LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#***************************************
+#* Simple analytic test ExternalModule *
+#***************************************
+#
+# Simulates the attenuation of a beam through a purely-scattering medium with N distinct materials and unit length.
+#     The uncertain inputs are the opacities.
+#
+import numpy as np
+
+def evaluate(inp):
+  if len(inp)>0:
+    return np.exp(-sum(inp)/len(inp))
+  else:
+    return 1.0
+
+def run(self,Input):
+  self.ans  = evaluate(Input.values())
+
+#
+#  This model has analytic mean and variance documented in raven/docs/tests
+#