Reworked cNDarray, no more replicating indexes (#484)

* added a synch checking that is too slow * stash * stash before merging in develop branch * stash * moving back to desktop * got types back * all types including histories now preserved * point set now fixed * dummy fix, mergefix * whitespace * review comment cleanup
idaholab · Dec 19, 2017 · 3467d7b · 3467d7b
1 parent be18e77
commit 3467d7b
Show file tree

Hide file tree

Showing 10 changed files with 193 additions and 164 deletions.
diff --git a/framework/DataObjects/TestXDataSet.py b/framework/DataObjects/TestXDataSet.py
@@ -278,7 +278,7 @@ def formatRealization(rlz):
 # NOTE histories are currently disabled pending future work (c,y are history vars)
 checkArray('DataSet __init__ inp',data._inputs,['a','b','c'],str)
 checkArray('DataSet __init__ out',data._outputs,['x','y','z'],str)
-checkArray('DataSet __init__ all',data._allvars,['a','b','c','x','y','z'],str)
+checkArray('DataSet __init__ all',data.vars,['a','b','c','x','y','z'],str)
 checkNone('DataSet __init__ _data',data._data)
 checkNone('DataSet __init__ _collector',data._collector)
 
@@ -619,6 +619,7 @@ def formatRealization(rlz):
 dataCSV.messageHandler = mh
 dataCSV._readMoreXML(xml)
 dataCSV.load(csvname,style='CSV')
+
 for var in data.getVars():
   if var == 'z':
     # not included in XML input specs, so should be left out
@@ -766,7 +767,6 @@ def formatRealization(rlz):
 checkArray('Remove variable remaining vars',data.getVars(),['a'],str)
 checkRlz('Remove variable rlz -1',data.realization(index=-1),rlz)
 # collapse and re-check
-print('PRE:',data._data)
 data.asDataset()
 checkArray('Remove variable remaining vars',data.getVars(),['a'],str)
 checkRlz('Remove variable rlz -1',data.realization(index=-1),rlz)
@@ -824,7 +824,7 @@ def formatRealization(rlz):
                'y': np.array([200.1]),
           'varsUpdate': np.array([1])}
 data.addRealization(rlz1_1)
-tid = data._collector[-1,data._allvars.index('trajID')]
+tid = data._collector[-1,data._orderedVars.index('trajID')]
 checkRlz('Cluster extend traj 2[1]',data.realization(matchDict={'trajID':2,'varsUpdate':1})[1],rlz1_1,skip='varsUpdate')
 # print it
 fname = 'XDataUnitTestClusterLabels'
@@ -916,7 +916,7 @@ def formatRealization(rlz):
        'dbo':np.array([ False,  True, False]),
          't':np.array(['one','two','manystringchars'])}
 data.addRealization(rlz)
-#print('DEBUGG first',data.asDataset())
+data.asDataset()
 # check types
 for var in rlz.keys():
   correct = rlz[var].dtype
@@ -925,7 +925,6 @@ def formatRealization(rlz):
   checkSame('dtype checking "{}"'.format(var),data.asDataset()[var].dtype,correct)
 
 data.addRealization(rlz2)
-#print('DEBUGG second',data.asDataset())
 
 print(results)
 

diff --git a/framework/DataObjects/TestXHistorySet.py b/framework/DataObjects/TestXHistorySet.py
@@ -280,7 +280,7 @@ def formatRealization(rlz):
 # NOTE histories are currently disabled pending future work (c,y are history vars)
 checkArray('HistorySet __init__ inp',data._inputs,['a','b'],str)
 checkArray('HistorySet __init__ out',data._outputs,['x','y'],str)
-checkArray('HistorySet __init__ all',data._allvars,['a','b','x','y'],str)
+checkArray('HistorySet __init__ all',data._orderedVars,['a','b','x','y'],str)
 checkNone('HistorySet __init__ _data',data._data)
 checkNone('HistorySet __init__ _collector',data._collector)
 

diff --git a/framework/DataObjects/TestXPointSet.py b/framework/DataObjects/TestXPointSet.py
@@ -268,7 +268,7 @@ def formatRealization(rlz):
 data._readMoreXML(xml)
 checkArray('DataSet __init__ inp',data._inputs,['a','b'],str)
 checkArray('DataSet __init__ out',data._outputs,['x','z'],str)
-checkArray('DataSet __init__ all',data._allvars,['a','b','x','z'],str)
+checkArray('DataSet __init__ all',data._orderedVars,['a','b','x','z'],str)
 checkNone('DataSet __init__ _data',data._data)
 checkNone('DataSet __init__ _collector',data._collector)
 

diff --git a/framework/DataObjects/XDataObject.py b/framework/DataObjects/XDataObject.py
@@ -108,7 +108,7 @@ def __init__(self):
     self._inputs   = []     # list(str) if input variables
     self._outputs  = []     # list(str) of output variables
     self._metavars = []     # list(str) of POINTWISE metadata variables
-    self._allvars  = []     # list(str) of vars IN ORDER of their index
+    self._orderedVars = []     # list(str) of vars IN ORDER of their index
 
     self._meta         = {}     # dictionary to collect meta until data is collapsed
     self._heirarchal   = False  # if True, non-traditional format (not yet implemented)
@@ -201,7 +201,7 @@ def _readMoreXML(self,xmlNode):
         self._inputs.remove(index)
       except ValueError:
         pass #not requested as input anyway
-    self._allvars = self._inputs + self._outputs
+    self._orderedVars = self._inputs + self._outputs
     if self.messageHandler is None:
       self.messageHandler = MessageCourier()
 

diff --git a/framework/DataObjects/XDataSet.py b/framework/DataObjects/XDataSet.py
diff --git a/framework/DataObjects/XHistorySet.py b/framework/DataObjects/XHistorySet.py
@@ -95,34 +95,7 @@ def _setDefaultPivotParams(self):
     self._pivotParams = {self._tempPivotParam:self._outputs[:]}
 
   ### INTERNAL USE FUNCTIONS ###
-  def _collapseNDtoDataArray(self,data,var,labels=None):
-    """
-      Converts a row of numpy samples into a single DataArray suitable for a xr.Dataset.
-      @ In, data, np.ndarray, array of either float or xr.DataArray; array must be single-dimension
-      @ In, var, str, name of the variable being acted on
-      @ In, labels, list, list of labels to use for collapsed array under self.sampleTag title
-      @ Out, DataArray, xr.DataArray, single dataarray object
-    """
-    # TODO this is only type-checking before using the base class implementation.
-    ## TODO these assertions are identical to the base class right now; should abstract
-    assert(isinstance(data,np.ndarray))
-    assert(len(data.shape) == 1)
-    if labels is None:
-      labels = range(len(data))
-    else:
-      assert(len(labels) == len(data))
-    ## these assertions are specific to history sets -> should they be in addRealization instead?
-    # Inputs and meta should all be single entries, outputs should all be xr.DataArray that depend only on pivotParam
-    if var in self._inputs:
-      assert(isinstance(data[0],(float,str,unicode,int)))
-    elif var in self._outputs:
-      # all outputs are xr.DataArrays
-      assert(isinstance(data[0],xr.DataArray))
-      # all outputs have a single independent coordinate
-      assert(len(data[0].dims) == 1)
-      # all outputs depend only on the pivot parameter
-      assert(data[0].dims[0] == self._pivotParams.keys()[0])
-    return DataSet._collapseNDtoDataArray(self,data,var,labels)
+
 
   def _fromCSV(self,fileName,**kwargs):
     """
@@ -217,7 +190,7 @@ def _toCSV(self,fileName,start=0,**kwargs):
     else:
       data = self._data
       mode = 'w'
-    toDrop = list(var for var in self._allvars if var not in keep)
+    toDrop = list(var for var in self._orderedVars if var not in keep)
     data = data.drop(toDrop)
     self.raiseADebug('Printing data to CSV: "{}"'.format(fileName+'.csv'))
     # specific implementation

diff --git a/framework/DataObjects/XPointSet.py b/framework/DataObjects/XPointSet.py
@@ -93,34 +93,6 @@ def _readMoreXML(self,xmlNode):
       self._selectOutput = ('outputRow',-1)
 
   ### INTERNAL USE FUNCTIONS ###
-  def _collapseNDtoDataArray(self,data,var,labels=None):
-    """
-      Converts a row of numpy samples into a single DataArray suitable for a xr.Dataset.
-      @ In, data, np.ndarray, array of either float or xr.DataArray; array must be single-dimension
-      @ In, var, str, name of the variable being acted on
-      @ In, labels, list, list of labels to use for collapsed array under self.sampleTag title
-      @ Out, DataArray, xr.DataArray, single dataarray object
-    """
-    # TODO this is slightly different but quite similar to the base class.  Should it be separate?
-    assert(isinstance(data,np.ndarray))
-    assert(len(data.shape) == 1)
-    if labels is None:
-      labels = range(len(data))
-    else:
-      assert(len(labels) == len(data))
-    # ALL should be floats or otherwise 1d
-    #assert(isinstance(data[0],(float,str,unicode,int,type(None)))) # --> in LimitSurfaceSearch, first can be "None", floats come later
-    try:
-      assert(isinstance(data[0],(float,str,unicode,int,))) # --> in LimitSurfaceSearch, first can be "None", floats come later
-    except AssertionError as e:
-      raise e
-    array = xr.DataArray(data,
-                         dims=[self.sampleTag],
-                         coords={self.sampleTag:labels},
-                         name=var)
-    array.rename(var)
-    return array
-
   def _convertFinalizedDataRealizationToDict(self,rlz, unpackXArray=False):
     """
       After collapsing into xr.Dataset, all entries are stored as xr.DataArrays.
@@ -157,12 +129,9 @@ def _selectiveRealization(self,rlz):
         elif var in self._outputs or var in self._metavars:
           # TODO where does metadata get picked from?  Seems like output fits best?
           method,indic = self._selectOutput
-        # pivot variables might be included here; try removing them
-        elif var in self.indexes:
-          continue # don't need to handle coordinate dimensions, they come with values
+        # pivot variables are included here in "else"; remove them after they're used in operators
         else:
           toRemove.append(var)
-          print('DEBUGG unhandled:',var)
           continue
         if method in ['inputRow','outputRow']:
           # zero-d xarrays give false behavior sometimes
@@ -180,7 +149,11 @@ def _selectiveRealization(self,rlz):
           pivotParam = self.getDimensions(var)
           assert(len(pivotParam) == 1) # TODO only handle History for now
           pivotParam = pivotParam[var][0]
-          rlz[var] = float(val.sel(**{pivotParam:indic, 'method':b'nearest'})) #casting as str not unicode
+          idx = (np.abs(rlz[pivotParam] - indic)).argmin()
+          rlz[var] = rlz[var][idx]
+          # if history is dataarray -> not currently possible, but keep for when it's needed
+          #if type(rlz[var]).__name__ == 'DataArray':
+          #  rlz[var] = float(val.sel(**{pivotParam:indic, 'method':b'nearest'})) #casting as str not unicode
           # TODO allowing inexact matches; it's finding the nearest
         elif method == 'operator':
           if indic == 'max':

diff --git a/framework/Models/Dummy.py b/framework/Models/Dummy.py
@@ -104,7 +104,7 @@ def _inputToInternal(self,dataIN):
             for entries in dataIN.getVars('input'):
               if localInput[entries] is None:
                 localInput[entries] = []
-              value = dataSet.isel(**{dataSet.sampleTag:hist})[entries].values
+              value = dataSet.isel(**{dataIn.sampleTag:hist})[entries].values
               localInput[entries].append(np.full((sizeIndex,),value,dtype=value.dtype))
       #Now if an OutputPlaceHolder is used it is removed, this happens when the input data is not representing is internally manufactured
       if 'OutputPlaceHolder' in dataIN.getVars('output'):

diff --git a/framework/utils/cached_ndarray.py b/framework/utils/cached_ndarray.py
@@ -339,41 +339,36 @@ def append(self,entry):
     if type(entry) not in [np.ndarray]:
       raise IOError('Tried to add new data to cNDarray.  Can only accept np.ndarray, but got '+type(entry).__name__)
     # for now require full correct shape, later handle the single entry case
-    if len(entry.shape)!=2:
+    if len(entry.shape)!=1:
       # TODO single entry case
-      raise IOError('Tried to add new data to cNDarray.  Need shape (#,{}) but got "{}"!'.format(self.width,entry.shape))
+      raise IOError('Tried to add new data to cNDarray.  Need shape ({},) but got "{}"!'.format(self.width,entry.shape))
     # must have matching width (fix for single entry case)
-    if entry.shape[1] != self.width:
-      raise IOError('Tried to add new data to cNDarray.  Need {} entities per entry, but got '.format(self.width)+str(entry.shape[1]))
+    if entry.shape[0] != self.width:
+      raise IOError('Tried to add new data to cNDarray.  Need {} entries in array, but got '.format(self.width)+str(entry.shape[0]))
     # check if there's enough space in cache to append the new entries
-    if self.size + entry.shape[0] > self.capacity:
+    if self.size + 1 > self.capacity:
       # since there's not enough space, quadruple available space # TODO change growth parameter to be variable?
-      self.capacity += max(self.capacity*4,entry.shape[0])
+      self.capacity += self.capacity*3
       newdata = np.zeros((self.capacity,self.width),dtype=self.values.dtype)
       newdata[:self.size] = self.values[:self.size]
       self.values = newdata
-    self.values[self.size:self.size+entry.shape[0]][:] = entry[:]
-    self.size += entry.shape[0]
+    self.values[self.size] = entry[:]
+    self.size += 1
 
   def addEntity(self,vals,firstEver=False):
     """
       Adds a column to the dataset.
-      @ In, vals, list of np.array([ [#],[#],[#] ], dtype = float or xr.DataArray), fill values (each entry must be shape==(self.size,num new entites))
+      @ In, vals, list, as list(#,#,#) where # is either single-valued or numpy array
       @ Out, None
     """
-    # example 1: for 1 new entity with sample values [1,2,3], "vals" should be:
-    # [ np.array([[1],[2],[3]]) ] (note expecially the outermost list)
-    # example 2: for 2 new entities with sample values [1,2,3] and [4,5,6], "vals" should be:
-    # [ np.array([[1],[2],[3]]), np.array([[4],[5],[6]]) ]
-    for i,v in enumerate(vals):
-      # FIXME slow assertion check
-      if len(v) != self.size:
-        raise IOError('Wrong number ({}) of initial values passed to add entity!  Need {}.'.format(len(v),self.size))
-      # FIXME slow reshaping
-      new = np.ndarray((self.capacity,1),dtype=object)
-      new[:self.size] = v[:]
-      vals[i] = new
-    self.values = np.hstack([self.values] + vals)
+    # create a new column with up to the cached capacity
+    new = np.ndarray(self.capacity,dtype=object)
+    # fill up to current filled size with the values
+    new[:self.size] = vals
+    # reshape so it can be stacked onto the existing data
+    new = new.reshape(self.capacity,1)
+    # "hstack" stacks along the second dimension, or columns for us
+    self.values = np.hstack((self.values,new))
     self.width += 1
 
   def getData(self):

diff --git a/tests/framework/utils/testCachedNDArray.py b/tests/framework/utils/testCachedNDArray.py
@@ -89,6 +89,7 @@ def checkAnswer(comment,value,expected,tol=1e-10,updateResults=True):
 #test min
 checkAnswer('index min',testArray.returnIndexMin(),5)
 
+
 #test repr
 msg = str(testArray)
 right = 'array([ -3.14   ,   2.99792,   2.718  ,   8.987  ,   0.618  ,  -6.626  ,\n        12.56   ,   6.67   ])'