Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

dataobject rework: Optimizer #461

Merged
merged 30 commits into from
Dec 12, 2017
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
30 commits
Select commit Hold shift + click to select a range
5c170c8
adaptive sparse grid, sobol working
PaulTalbot-INL Dec 5, 2017
e9c9347
Adaptive sampling, plus Dummy-based rlz updates from input, output
PaulTalbot-INL Dec 5, 2017
bbdbbf9
cleanup
PaulTalbot-INL Dec 5, 2017
27699ed
added cluster labeling to outstream print, will be tested with optimizer
PaulTalbot-INL Dec 6, 2017
ac9feb3
optimizers working, but need to fix reading from CSV before merging
PaulTalbot-INL Dec 6, 2017
80ceb1b
stash
PaulTalbot-INL Dec 6, 2017
f51566d
fixed prefix as numpy array
PaulTalbot-INL Dec 6, 2017
c60f110
loading CSV correctly now, so optimizer can find optimal solns
PaulTalbot-INL Dec 6, 2017
cf9aab1
cleanup
PaulTalbot-INL Dec 6, 2017
4465ee1
now handling infs and nans
PaulTalbot-INL Dec 6, 2017
58b9fdb
Merge branch 'talbpaul/csv-read-fix' into talbpaul/optimizer
PaulTalbot-INL Dec 6, 2017
1583e10
mergefixes
PaulTalbot-INL Dec 6, 2017
5e5a150
cleanup
PaulTalbot-INL Dec 7, 2017
e563371
added the option to avoid to reprint the files in case their content …
alfoa Dec 8, 2017
97aac72
reverted 101 tests modified by the conversion scripts (by mistake)
alfoa Dec 8, 2017
6efb5ad
reverted other tests
alfoa Dec 8, 2017
f384661
reverted tests user guide
alfoa Dec 8, 2017
8db4e86
reverted all the other tests
alfoa Dec 8, 2017
5dc8ea2
Update beale.xml
alfoa Dec 8, 2017
068ecb9
removed "
alfoa Dec 8, 2017
a39aec0
Merge branch 'talbpaul/optimizer' of https://github.com/idaholab/rave…
alfoa Dec 8, 2017
0f2eab5
Merge branch 'dataobject-rework' of https://github.com/idaholab/raven…
alfoa Dec 8, 2017
82af4fc
removed abstract method that has been removed from the XDataSet
alfoa Dec 8, 2017
b3a8a6a
fixed with respect to the new prefix strategy
alfoa Dec 8, 2017
b9d437a
fixed loading of dataset in case no metadata are found in the xml (fi…
alfoa Dec 8, 2017
e1bd4d4
fixed type in ROM causing a failure
alfoa Dec 8, 2017
7336602
fixed another typo that was making the Dummy.py model to fail
alfoa Dec 8, 2017
480ce33
removed whitespace that was added from another commit
PaulTalbot-INL Dec 11, 2017
c1609c1
updated to use pd.isnull instead of np.isnan in unordered CSV differ
PaulTalbot-INL Dec 11, 2017
70a2a99
test files for differ
PaulTalbot-INL Dec 11, 2017
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
131 changes: 124 additions & 7 deletions framework/DataObjects/TestXDataSet.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@
import MessageHandler

mh = MessageHandler.MessageHandler()
mh.initialize({'verbosity':'debug', 'callerLength':10, 'tagLength':10})
mh.initialize({'verbosity':'silent', 'callerLength':10, 'tagLength':10})

print('Module undergoing testing:')
print (XDataSet )
Expand Down Expand Up @@ -169,7 +169,7 @@ def checkRlz(comment,first,second,tol=1e-10,update=True,skip=None):
for key,val in first.items():
if key in skip:
continue
if isinstance(val,float):
if isinstance(val,(float,int)):
pres = checkFloat('',val,second[key][0],tol,update=False)
elif isinstance(val,(str,unicode)):
pres = checkSame('',val,second[key][0],update=False)
Expand Down Expand Up @@ -330,11 +330,11 @@ def formatRealization(rlz):
rlzMissing = dict(rlz0)
rlz0['z'] = 6.0
formatRealization(rlz0)
checkFails('DataSet addRealization err missing','Provided realization does not have all requisite values: \"z\"',data.addRealization,args=[rlzMissing])
checkFails('DataSet addRealization err missing','Provided realization does not have all requisite values for object \"DataSet\": \"z\"',data.addRealization,args=[rlzMissing])
# bad formatting
rlzFormat = dict(rlz0)
rlzFormat['c'] = list(rlzFormat['c'])
checkFails('DataSet addRealization err format','Realization was not formatted correctly! See warnings above.',data.addRealization,args=[rlzFormat])
checkFails('DataSet addRealization err format','Realization was not formatted correctly for "DataSet"! See warnings above.',data.addRealization,args=[rlzFormat])
# test appending
data.addRealization(dict(rlz0))

Expand Down Expand Up @@ -610,11 +610,20 @@ def formatRealization(rlz):
# check
checkArray('CSV XML',lines,correct,str)
## read from CSV/XML
xml = createElement('DataSet',attrib={'name':'csv'})
# scramble the IO space, also skip 'z' for testing
xml.append(createElement('Input',text='a,x,y'))
xml.append(createElement('Output',text='c,b'))
xml.append(createElement('Index',attrib={'var':'t'},text='y,c'))
dataCSV = XDataSet.DataSet()
dataCSV.messageHandler = mh
dataCSV._readMoreXML(xml)
dataCSV.load(csvname,style='CSV')
for var in data.getVars():
if isinstance(data.getVarValues(var).item(0),(float,int)):
if var == 'z':
# not included in XML input specs, so should be left out
checkFails('CSV var z','z',dataCSV.getVarValues,args=var)
elif isinstance(data.getVarValues(var).item(0),(float,int)):
checkTrue('CSV var {}'.format(var),(dataCSV._data[var] - data._data[var]).sum()<1e-20) #necessary due to roundoff
else:
checkTrue('CSV var {}'.format(var),bool((dataCSV._data[var] == data._data[var]).prod()))
Expand All @@ -631,7 +640,7 @@ def formatRealization(rlz):
# by index
checkRlz('Dataset full origin idx 1',data.realization(index=1),rlz1,skip=['time'])
checkRlz('Dataset full netcdf idx 1',dataNET.realization(index=1),rlz1,skip=['time'])
checkRlz('Dataset full csvxml idx 1',dataCSV.realization(index=1),rlz1,skip=['time'])
checkRlz('Dataset full csvxml idx 1',dataCSV.realization(index=1),rlz1,skip=['time','z'])
# by match
idx,rlz = data.realization(matchDict={'prefix':'third'})
checkSame('Dataset full origin match idx',idx,2)
Expand All @@ -641,7 +650,7 @@ def formatRealization(rlz):
checkRlz('Dataset full netCDF match',rlz,rlz2,skip=['time'])
idx,rlz = dataCSV.realization(matchDict={'prefix':'third'})
checkSame('Dataset full csvxml match idx',idx,2)
checkRlz('Dataset full csvxml match',rlz,rlz2,skip=['time'])
checkRlz('Dataset full csvxml match',rlz,rlz2,skip=['time','z'])
# TODO metadata checks?


Expand Down Expand Up @@ -767,6 +776,114 @@ def formatRealization(rlz):
data.addRealization({'a':np.array([2.1]), 't':np.array([0])})


######################################
# CLUSTER LABELING #
######################################
# as used by the Optimizer, for example. We store as a flat point set, then
# divide up by cluster label for printing.
# create data object
xml = createElement('PointSet',attrib={'name':'test'})
xml.append(createElement('Input',text='a,b'))
xml.append(createElement('Output',text='x,y'))
data = XDataSet.DataSet()
data.messageHandler = mh
data._readMoreXML(xml)
# register "trajID" (cluster label) and "varsUpdate" (iteration number/monotonically increasing var) as meta
data.addExpectedMeta(['trajID','varsUpdate'])
# add two trajectories to get started, like starting two trajectories
rlz0_0 = {'trajID': np.atleast_1d(1),
'a': np.atleast_1d( 1.0),
'b': np.atleast_1d( 5.0),
'x': np.atleast_1d( 10.0),
'y': np.atleast_1d(100.0),
'varsUpdate': np.atleast_1d(0)}
rlz1_0 = {'trajID': np.atleast_1d(2),
'a': np.atleast_1d( 2.0),
'b': np.atleast_1d( 6.0),
'x': np.atleast_1d( 20.0),
'y': np.atleast_1d(200.0),
'varsUpdate': np.atleast_1d(0)}
data.addRealization(rlz0_0)
data.addRealization(rlz1_0)
checkRlz('Cluster initial traj 1',data.realization(index=0),rlz0_0,skip='varsUpdate')
checkRlz('Cluster initial traj 2',data.realization(index=1),rlz1_0,skip='varsUpdate')
# now sample a new trajectory point, going into the collector
rlz0_1 = {'trajID': np.atleast_1d(1),
'a': np.atleast_1d( 1.1),
'b': np.atleast_1d( 5.1),
'x': np.atleast_1d( 10.1),
'y': np.atleast_1d(100.1),
'varsUpdate': np.atleast_1d(1)}
data.addRealization(rlz0_1)
checkRlz('Cluster extend traj 1[0]',data.realization(matchDict={'trajID':1,'varsUpdate':0})[1],rlz0_0,skip='varsUpdate')
checkRlz('Cluster extend traj 1[1]',data.realization(matchDict={'trajID':1,'varsUpdate':1})[1],rlz0_1,skip='varsUpdate')
checkRlz('Cluster extend traj 2[0]',data.realization(matchDict={'trajID':2,'varsUpdate':0})[1],rlz1_0,skip='varsUpdate')
# now collapse and then append to the data
data.asDataset()
rlz1_1 = {'trajID': np.atleast_1d(2),
'a': np.atleast_1d( 2.1),
'b': np.atleast_1d( 6.1),
'x': np.atleast_1d( 20.1),
'y': np.atleast_1d(200.1),
'varsUpdate': np.atleast_1d(1)}
data.addRealization(rlz1_1)
checkRlz('Cluster extend traj 2[1]',data.realization(matchDict={'trajID':2,'varsUpdate':1})[1],rlz1_1,skip='varsUpdate')
# print it
fname = 'XDataUnitTestClusterLabels'
data.write(fname,style='csv',clusterLabel='trajID')
# manually check contents
for l,line in enumerate(open(fname+'.csv','r')):
if l == 0:
checkSame('Cluster CSV main [0]',line.strip(),'trajID,filename')
elif l == 1:
checkSame('Cluster CSV main [1]',line.strip(),'1,{}_1.csv'.format(fname))
elif l == 2:
checkSame('Cluster CSV main [2]',line.strip(),'2,{}_2.csv'.format(fname))
for l,line in enumerate(open(fname+'_1.csv','r')):
if l == 0:
checkSame('Cluster CSV id1 [0]',line.strip(),'a,b,x,y,varsUpdate')
elif l == 1:
line = list(float(x) for x in line.split(','))
checkArray('Cluster CSV id1 [1]',line,[1.0,5.0,10.0,100.0,0],float)
elif l == 2:
line = list(float(x) for x in line.split(','))
checkArray('Cluster CSV id1 [1]',line,[1.1,5.1,10.1,100.1,1],float)
for l,line in enumerate(open(fname+'_2.csv','r')):
if l == 0:
checkSame('Cluster CSV id1 [0]',line.strip(),'a,b,x,y,varsUpdate')
elif l == 1:
line = list(float(x) for x in line.split(','))
checkArray('Cluster CSV id1 [1]',line,[2.0,6.0,20.0,200.0,0],float)
elif l == 2:
line = list(float(x) for x in line.split(','))
checkArray('Cluster CSV id1 [1]',line,[2.1,6.1,20.1,200.1,1],float)
# load it as a history # TODO first, loading needs to be fixed to use DataObject params instead of XML params
from XHistorySet import HistorySet
xml = createElement('HistorySet',attrib={'name':'test'})
xml.append(createElement('Input',text='trajID'))
xml.append(createElement('Output',text='a,b,x,y'))
options = createElement('options')
options.append(createElement('pivotParameter',text='varsUpdate'))
xml.append(options)
data2 = HistorySet()
data2.messageHandler = mh
data2._readMoreXML(xml)
data2.load(fname,style='csv')
# check data is correct by realization
correct = {'a':np.array([ 1.0, 1.1]),
'b':np.array([ 5.0, 5.1]),
'x':np.array([ 10.0, 10.1]),
'y':np.array([100.0,100.1]),
'trajID':np.array([1])}
checkRlz('Cluster read [0]',data2.realization(index=0),correct)
correct = {'a':np.array([ 2.0, 2.1]),
'b':np.array([ 6.0, 6.1]),
'x':np.array([ 20.0, 20.1]),
'y':np.array([200.0,200.1]),
'trajID':np.array([2])}
checkRlz('Cluster read [1]',data2.realization(index=1),correct)


print(results)

sys.exit(results["fail"])
Expand Down
3 changes: 1 addition & 2 deletions framework/DataObjects/TestXHistorySet.py
Original file line number Diff line number Diff line change
Expand Up @@ -169,7 +169,7 @@ def checkRlz(comment,first,second,tol=1e-10,update=True,skip=None):
for key,val in first.items():
if key in skip:
continue
if isinstance(val,float):
if isinstance(val,(float,int)):
pres = checkFloat('',val,second[key][0],tol,update=False)
elif isinstance(val,(str,unicode)):
pres = checkSame('',val,second[key][0],update=False)
Expand Down Expand Up @@ -625,7 +625,6 @@ def formatRealization(rlz):
rlz0 = data.realization(index=0)
checkRlz('No input space',rlz0,rlz,skip='Timelike')

# TODO more exhaustive tests are needed, but this is sufficient for initial work.

print(results)

Expand Down
10 changes: 0 additions & 10 deletions framework/DataObjects/XDataObject.py
Original file line number Diff line number Diff line change
Expand Up @@ -344,16 +344,6 @@ def constructNDSample(self,vals,dims,coords,name=None):
"""
pass

@abc.abstractmethod
def extendExistingEntry(self,rlz):
"""
Extends an ND sample to include more data.
Probably only useful for the hacky way the Optimizer stores Trajectories.
@ In, rlz, dict, {name:value} as {str:float} of the variables to extend
@ Out, None
"""
pass

@abc.abstractmethod
def getDimensions(self,var):
"""
Expand Down
Loading