convert external post processor to use new DataObjects (#479)

* convert external pp to use the new data objects * regold tests * fix the DataObjects * address comments
idaholab · Dec 16, 2017 · 85fe8f3 · 85fe8f3
1 parent c5a8425
commit 85fe8f3
Show file tree

Hide file tree

Showing 12 changed files with 134 additions and 364 deletions.
diff --git a/framework/PostProcessors/ExternalPostProcessor.py b/framework/PostProcessors/ExternalPostProcessor.py
@@ -22,6 +22,7 @@
 
 #External Modules---------------------------------------------------------------
 import numpy as np
+import copy
 #External Modules End-----------------------------------------------------------
 
 #Internal Modules---------------------------------------------------------------
@@ -90,52 +91,41 @@ def inputToInternal(self, currentInput):
     if type(currentInput) == dict and 'targets' in currentInput.keys():
       return
 
-    if type(currentInput) != list:
-      currentInput = [currentInput]
-
-    inputDict = {'targets':{}, 'metadata':{}}
-    metadata = []
-    for item in currentInput:
-      inType = None
-      if hasattr(item, 'type'):
-        inType = item.type
-      elif type(item) in [list]:
-        inType = "list"
-
-      if isinstance(item,Files.File):
-        if currentInput.subtype == 'csv':
-          self.raiseAWarning(self, 'Input type ' + inType + ' not yet implemented. I am going to skip it.')
-      elif inType == 'HDF5':
-        # TODO
-        self.raiseAWarning(self, 'Input type ' + inType + ' not yet implemented. I am going to skip it.')
-      elif inType == 'PointSet':
-        for param in item.getParaKeys('input'):
-          inputDict['targets'][param] = item.getParam('input', param)
-        for param in item.getParaKeys('output'):
-          inputDict['targets'][param] = item.getParam('output', param)
-        metadata.append(item.getAllMetadata())
-      elif inType =='HistorySet':
-        outs, ins = item.getOutParametersValues(nodeId = 'ending'), item.getInpParametersValues(nodeId = 'ending')
-        for param in item.getParaKeys('output'):
-          inputDict['targets'][param] = [value[param] for value in outs.values()]
-        for param in item.getParaKeys('input'):
-          inputDict['targets'][param] =  [value[param] for value in ins.values()]
-        metadata.append(item.getAllMetadata())
-      elif inType != 'list':
-        self.raiseAWarning(self, 'Input type ' + type(item).__name__ + ' not recognized. I am going to skip it.')
-
-      # Not sure if we need it, but keep a copy of every inputs metadata
-      inputDict['metadata'] = metadata
-
-    if len(inputDict['targets'].keys()) == 0:
-      self.raiseAnError(IOError, 'No input variables have been found in the input objects!')
+    if type(currentInput) == list:
+      if len(currentInput) != 1:
+        self.raiseAnError(IOError, "The postprocessor ", self.name, "only allows one input DataObjects,"
+              + " but multiple inputs are provided!")
+      else:
+        currentInput = currentInput[-1]
+    assert(hasattr(currentInput, 'type'), "The type is missing for input object! We should always associate a type with it.")
+    inType = currentInput.type
+    if inType in ['PointSet', 'HistorySet']:
+      dataSet = currentInput.asDataset()
+    else:
+      self.raiseAnError(IOError, "Input type ", inType, ' is not yet implemented!')
+
+    if len(currentInput) == 0:
+      self.raiseAnError(IOError, 'The Input object ', currentInput.name, ' is empty!')
+    inputDict = {}
+    if inType == 'PointSet':
+      for param in currentInput.getVars():
+        inputDict[param] = copy.copy(dataSet[param].values)
+    elif inType == 'HistorySet':
+      sliceList = currentInput.sliceByIndex('RAVEN_sample_ID')
+      indexes = currentInput.indexes
+      for param in currentInput.getVars('output'):
+        inputDict[param] =  [sliceData[param].dropna(indexes[-1]).values for sliceData in sliceList]
+      for param in currentInput.getVars('input'):
+        inputDict[param] =  [sliceData[param].values for sliceData in sliceList]
+      for param in indexes:
+        inputDict[param] =  [sliceData[param].dropna(indexes[-1]).values for sliceData in sliceList]
 
     for interface in self.externalInterfaces:
       for _ in self.methodsToRun:
         # The function should reference self and use the same variable names
         # as the xml file
         for param in interface.parameterNames():
-          if param not in inputDict['targets']:
+          if param not in inputDict.keys():
             self.raiseAnError(IOError, self, 'variable \"' + param
                                              + '\" unknown. Please verify your '
                                              + 'external script ('
@@ -198,113 +188,11 @@ def collectOutput(self, finishedJob, output):
 
     if isinstance(output,Files.File):
       self.raiseAWarning('Output type File not yet implemented. I am going to skip it.')
-    elif output.type == 'DataObjects':
-      self.raiseAWarning('Output type ' + type(output).__name__
-                         + ' not yet implemented. I am going to skip it.')
     elif output.type == 'HDF5':
       self.raiseAWarning('Output type ' + type(output).__name__
                          + ' not yet implemented. I am going to skip it.')
-    elif output.type in ['PointSet','HistorySet'] :
-      requestedInput = output.getParaKeys('input')
-      ## If you want to be able to dynamically add columns to your data, then
-      ## you should use this commented line, otherwise only the information
-      ## asked for by the user in the output data object will be available
-
-      # requestedOutput = list(set(output.getParaKeys('output') + self.methodsToRun))
-      requestedOutput = output.getParaKeys('output')
-
-      ## The user can simply ask for a computation that may exist in multiple
-      ## interfaces, in that case, we will need to qualify their names for the
-      ## output. The names should already be qualified from the outputDict.
-      ## However, the user may have already qualified the name, so make sure and
-      ## test whether the unqualified name exists in the requestedOutput before
-      ## replacing it.
-      for key, replacements in outputDict['qualifiedNames'].iteritems():
-        if key in requestedOutput:
-          requestedOutput.remove(key)
-          requestedOutput.extend(replacements)
-
-      ## Grab all data from the outputDict and anything else requested not
-      ## present in the outputDict will be copied from the input data.
-      ## TODO: User may want to specify which dataset the parameter comes from.
-      ##       For now, we assume that if we find more than one an error will
-      ##       occur.
-      ## FIXME: There is an issue that the data size should be determined before
-      ##        entering this loop, otherwise if say a scalar is first added,
-      ##        then dataLength will be 1 and everything longer will be placed
-      ##        in the Metadata.
-      ##        How do we know what size the output data should be?
-      dataLength = None
-      for key in requestedInput + requestedOutput:
-        storeInOutput = True
-        value = []
-        if key in outputDict:
-          value = outputDict[key]
-        else:
-          foundCount = 0
-          if key in requestedInput:
-            for inputData in inputList:
-              if key in inputData.getParametersValues('input',nodeId = 'ending').keys() if inputData.type == 'PointSet' else inputData.getParametersValues('input',nodeId = 'ending').values()[-1].keys():
-                if inputData.type == 'PointSet':
-                  value = inputData.getParametersValues('input',nodeId = 'ending')[key]
-                else:
-                  value = [value[key] for value in inputData.getParametersValues('input',nodeId = 'ending').values()]
-                foundCount += 1
-          else:
-            for inputData in inputList:
-              if key in inputData.getParametersValues('output',nodeId = 'ending').keys() if inputData.type == 'PointSet' else inputData.getParametersValues('output',nodeId = 'ending').values()[-1].keys():
-                if inputData.type == 'PointSet':
-                  value = inputData.getParametersValues('output',nodeId = 'ending')[key]
-                else:
-                  value = [value[key] for value in inputData.getParametersValues('output',nodeId = 'ending').values()]
-                foundCount += 1
-
-          if foundCount == 0:
-            self.raiseAnError(IOError, key + ' not found in the input '
-                                            + 'object or the computed output '
-                                            + 'object.')
-          elif foundCount > 1:
-            self.raiseAnError(IOError, key + ' is ambiguous since it occurs'
-                                            + ' in multiple input objects.')
-
-        ## We need the size to ensure the data size is consistent, but there
-        ## is no guarantee the data is not scalar, so this check is necessary
-        myLength = 1
-        if not hasattr(value, "__iter__"):
-          value = [value]
-        myLength = len(value)
-
-        if dataLength is None:
-          dataLength = myLength
-        elif dataLength != myLength:
-          self.raiseAWarning('Requested output for ' + key + ' has a'
-                                    + ' non-conformant data size ('
-                                    + str(dataLength) + ' vs ' + str(myLength)
-                                    + '), it is being placed in the metadata.')
-          storeInOutput = False
-
-        ## Finally, no matter what, place the requested data somewhere
-        ## accessible
-        if storeInOutput:
-          if key in requestedInput:
-            for histNum, val in enumerate(value):
-              param = key if output.type == 'PointSet' else [histNum+1,key]
-              output.updateInputValue(param, val)
-          else:
-            for histNum, val in enumerate(value):
-              if output.type == 'HistorySet':
-                if histNum+1 in dataLenghtHistory.keys():
-                  if dataLenghtHistory[histNum+1] != len(val):
-                    self.raiseAnError(IOError, key + ' the size of the arrays for history '+str(histNum+1)+' are different!')
-                else:
-                  dataLenghtHistory[histNum+1] = len(val)
-              param = key if output.type == 'PointSet' else [histNum+1,key]
-              output.updateOutputValue(param, val)
-        else:
-          if not hasattr(value, "__iter__"):
-            value = [value]
-          for val in value:
-            output.updateMetadata(key, val)
+    elif output.type in ['PointSet', 'HistorySet']:
+      output.load(outputDict, style='dict', dims=output.getDimensions())
     else:
       self.raiseAnError(IOError, 'Unknown output type: ' + str(output.type))
 
@@ -315,8 +203,8 @@ def run(self, inputIn):
       @ In, inputIn, dict, dictionary of data to process
       @ Out, outputDict, dict, Dictionary containing the post-processed results
     """
-    input = self.inputToInternal(inputIn)
-    outputDict = {'qualifiedNames' : {}}
+    inputDict = self.inputToInternal(inputIn)
+    outputDict = {}
     ## This will map the name to its appropriate interface and method
     ## in the case of a function being defined in two separate files, we
     ## qualify the output by appending the name of the interface from which it
@@ -332,42 +220,55 @@ def run(self, inputIn):
           matchingInterfaces.append(interface)
       if len(matchingInterfaces) == 0:
         self.raiseAWarning(method + ' not found. I will skip it.')
-      elif len(matchingInterfaces) == 1:
-        methodMap[method] = (matchingInterfaces[0], method)
+      #elif len(matchingInterfaces) == 1:
+      #  methodMap[method] = (matchingInterfaces[0], method)
       else:
-        outputDict['qualifiedNames'][method] = []
         for interface in matchingInterfaces:
-          methodName = interface.name + '.' + method
+          methodName = interface.name + '_' + method
           methodMap[methodName] = (interface, method)
-          outputDict['qualifiedNames'][method].append(methodName)
 
     ## Evaluate the method and add it to the outputDict, also if the method
     ## adjusts the input data, then you should update it as well.
     warningMessages = []
     for methodName, (interface, method) in methodMap.iteritems():
-      outputDict[methodName] = interface.evaluate(method, input['targets'])
+      # The deep copy is needed since the interface postprocesor will change the values of inputDict
+      tempInputDict = copy.deepcopy(inputDict)
+      outputDict[methodName] = np.atleast_1d(copy.copy(interface.evaluate(method, tempInputDict)))
       if outputDict[methodName] is None:
         self.raiseAnError(Exception,"the method "+methodName+" has not produced any result. It needs to return a result!")
-      for target in input['targets']:
+      for target in tempInputDict.keys():
         if hasattr(interface, target):
           #if target not in outputDict.keys():
           if target not in methodMap.keys():
             attributeInSelf = getattr(interface, target)
-            if len(np.atleast_1d(attributeInSelf)) != len(np.atleast_1d(input['targets'][target])) or (np.atleast_1d(attributeInSelf) - np.atleast_1d(input['targets'][target])).all():
+            if (np.atleast_1d(attributeInSelf)).shape != (np.atleast_1d(inputDict[target])).shape or (np.atleast_1d(attributeInSelf) - np.atleast_1d(inputDict[target])).all():
               if target in outputDict.keys():
                 self.raiseAWarning("In Post-Processor "+ self.name +" the modified variable "+target+
-                               " has the same name of a one already modified throuhg another Function method." +
+                               " has the same name of a one already modified through another Function method." +
                                " This method overwrites the input DataObject variable value")
-              outputDict[target] = attributeInSelf
+              outputDict[target] = np.atleast_1d(attributeInSelf)
           else:
             warningMessages.append("In Post-Processor "+ self.name +" the method "+method+
                                " has the same name of a variable contained in the input DataObject." +
                                " This method overwrites the input DataObject variable value")
     for msg in list(set(warningMessages)):
       self.raiseAWarning(msg)
 
-    for target in input['targets'].keys():
-      if target not in outputDict.keys() and target in input['targets'].keys():
-        outputDict[target] = input['targets'][target]
+    # TODO: We assume the structure of input to the external pp isthe  same as the struture of output to this external pp
+    # An interface pp should be used if the user wants to merge two data objects, or change the structures of input data
+    # objects.
+    numRlz = len(outputDict.values()[0])
+    for val in outputDict.values():
+      if len(val) != numRlz:
+        self.raiseAnError(IOError, "The return results from the external functions have different number of realizations!"
+                + " This postpocessor ", self.name, " requests all the returned values should have the same number of realizations.")
+    for target in inputDict.keys():
+      if target not in outputDict.keys():
+        if len(inputDict[target]) != numRlz:
+          self.raiseAWarning("Parameter ", target, " is available in the provided input DataObjects,"
+                  + " but it has different length from the returned values from the external functions."
+                  + " Thus this parameter will not be accessible by the output DataObjects!")
+        else:
+          outputDict[target] = np.atleast_1d(inputDict[target])
 
     return outputDict
diff --git a/...work/PostProcessors/ExternalPostProcessor/gold/externalPostProcessor/multiple_usage_1.csv b/...work/PostProcessors/ExternalPostProcessor/gold/externalPostProcessor/multiple_usage_1.csv
@@ -1,37 +1,37 @@
-Y,X,Sum,Z,Product
-0,0,0,0.00098783,0
-1,0,1,0.00990705,0
-2,0,2,0.0373733,0
-3,0,3,0.0802736,0
-4,0,4,0.0951609,0
-5,0,5,0.0639337,0
-0,1,1,0.00616933,0
-1,1,2,0.0373733,1
-2,1,3,0.146858,2
-3,1,4,0.321486,3
-4,1,5,0.377879,4
-5,1,6,0.239638,5
-0,2,2,0.013279,0
-1,2,3,0.0802736,2
-2,2,4,0.321486,4
-3,2,5,0.707676,6
-4,2,6,0.830731,8
-5,2,7,0.520617,10
-0,3,3,0.016889,0
-1,3,4,0.0951609,3
-2,3,5,0.377879,6
-3,3,6,0.830731,9
-4,3,7,0.974789,12
-5,3,8,0.610521,15
-0,4,4,0.0147379,0
-1,4,5,0.0639337,4
-2,4,6,0.239638,8
-3,4,7,0.520617,12
-4,4,8,0.610521,16
-5,4,9,0.385879,20
-0,5,5,0.0121985,0
-1,5,6,0.0295975,5
-2,5,7,0.0882736,10
-3,5,8,0.181299,15
-4,5,9,0.21198,20
-5,5,10,0.140032,25
+X,Y,Z,basicOps_Sum,basicOps_Product
+0,0,0.00098783,0,0
+0,1,0.00990705,1,0
+0,2,0.0373733,2,0
+0,3,0.0802736,3,0
+0,4,0.0951609,4,0
+0,5,0.0639337,5,0
+1,0,0.00616933,1,0
+1,1,0.0373733,2,1
+1,2,0.146858,3,2
+1,3,0.321486,4,3
+1,4,0.377879,5,4
+1,5,0.239638,6,5
+2,0,0.013279,2,0
+2,1,0.0802736,3,2
+2,2,0.321486,4,4
+2,3,0.707676,5,6
+2,4,0.830731,6,8
+2,5,0.520617,7,10
+3,0,0.016889,3,0
+3,1,0.0951609,4,3
+3,2,0.377879,5,6
+3,3,0.830731,6,9
+3,4,0.974789,7,12
+3,5,0.610521,8,15
+4,0,0.0147379,4,0
+4,1,0.0639337,5,4
+4,2,0.239638,6,8
+4,3,0.520617,7,12
+4,4,0.610521,8,16
+4,5,0.385879,9,20
+5,0,0.0121985,5,0
+5,1,0.0295975,6,5
+5,2,0.0882736,7,10
+5,3,0.181299,8,15
+5,4,0.21198,9,20
+5,5,0.140032,10,25
diff --git a/...work/PostProcessors/ExternalPostProcessor/gold/externalPostProcessor/multiple_usage_2.csv b/...work/PostProcessors/ExternalPostProcessor/gold/externalPostProcessor/multiple_usage_2.csv
@@ -1,4 +1,4 @@
-X,Y,Z,Delta,Product
+X,Y,Z,basicOps_Delta,basicOps_Product
 0,0,0.00098783,0,0
 0,1,0.00990705,-1,0
 0,2,0.0373733,-2,0

diff --git a/...ostProcessors/ExternalPostProcessor/gold/testHistorySet/partitionedHistorySet_dump_80.csv b/...ostProcessors/ExternalPostProcessor/gold/testHistorySet/partitionedHistorySet_dump_80.csv
@@ -1,3 +1,3 @@
-y,x,z,time
-4.28529174363,3.33116167882,3.00851255432,0.01
-5.07494567895,3.4265746853,3.07103554926,0.02
+partitionHistorySet_time,partitionHistorySet_x,partitionHistorySet_y,partitionHistorySet_z
+0.01,3.33116167882,4.28529174363,3.00851255432
+0.02,3.4265746853,5.07494567895,3.07103554926
diff --git a/...ostProcessors/ExternalPostProcessor/gold/testHistorySet/partitionedHistorySet_dump_90.csv b/...ostProcessors/ExternalPostProcessor/gold/testHistorySet/partitionedHistorySet_dump_90.csv
@@ -1,3 +1,3 @@
-y,x,z,time
-4.68682374261,3.9478454019,5.01449706314,0.01
-5.54738762598,4.02174323597,5.06580569707,0.02
+partitionHistorySet_time,partitionHistorySet_x,partitionHistorySet_y,partitionHistorySet_z
+0.01,3.9478454019,4.68682374261,5.01449706314
+0.02,4.02174323597,5.54738762598,5.06580569707
diff --git a/...ernalPostProcessor/gold/testHistorySetDeletingHistories/partitionedHistorySet_dump_80.csv b/...ernalPostProcessor/gold/testHistorySetDeletingHistories/partitionedHistorySet_dump_80.csv
@@ -1,3 +1,3 @@
-y,x,z,time
-6.45211956713,4.93285032932,4.05861544239,0.01
-7.56859103846,5.0847772531,4.26865909858,0.02
+partitionHistorySetAndRemoveSomeHistories_time,partitionHistorySetAndRemoveSomeHistories_x,partitionHistorySetAndRemoveSomeHistories_y,partitionHistorySetAndRemoveSomeHistories_z
+0.01,4.93285032932,6.45211956713,4.05861544239
+0.02,5.0847772531,7.56859103846,4.26865909858
diff --git a/...ernalPostProcessor/gold/testHistorySetDeletingHistories/partitionedHistorySet_dump_90.csv b/...ernalPostProcessor/gold/testHistorySetDeletingHistories/partitionedHistorySet_dump_90.csv
@@ -1,3 +1,3 @@
-y,x,z,time
-5.34944720817,5.79389328809,3.62650547116,0.01
-6.70812699967,5.7494486801,3.83973992134,0.02
+partitionHistorySetAndRemoveSomeHistories_time,partitionHistorySetAndRemoveSomeHistories_x,partitionHistorySetAndRemoveSomeHistories_y,partitionHistorySetAndRemoveSomeHistories_z
+0.01,5.79389328809,5.34944720817,3.62650547116
+0.02,5.7494486801,6.70812699967,3.83973992134