-
Notifications
You must be signed in to change notification settings - Fork 10
/
cdscan.py
1854 lines (1645 loc) · 75.1 KB
/
cdscan.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
#!/usr/bin/env python
from __future__ import print_function
import sys
import getopt
import cdms2
from cdms2.grid import lookupArray
from cdms2.axis import calendarToTag, tagToCalendar
from cdms2.cdmsobj import CdFromObject, CdString, CdScalar, CdFloat, CdDouble, CdShort, CdInt, CdLong
import numpy
import string
import cdtime
import os.path
import copy
from cdms2 import cdmsNode
import re
from functools import reduce
from cdms2.error import CDMSError
from collections import OrderedDict
from six import string_types
from cdms2.util import getenv_bool
usage = """Usage:
cdscan [options] <files>
Scan a list of files producing a CDMS dataset in XML representation. See Notes below
for a more complete explanation.
Arguments:
<files> is a list of file paths to scan. The files can be listed in any order, and may
be in multiple directories. A file may also be a CDML dataset (.xml or .cdml), in
which case the dataset(s) and files are combined into a new dataset.
Options:
-a alias_file: change variable names to the aliases defined in an alias file.
Each line of the alias file consists of two blank separated
fields: variable_id alias. 'variable_id' is the ID of the variable
in the file, and 'alias' is the name that will be substituted for
it in the output dataset. Only variables with entries in the alias_file
are renamed.
-c calendar: either "gregorian", "proleptic_gregorian", "julian", "noleap", or "360_day". Default:
"gregorian". This option should be used with caution, as it will
override any calendar information in the files.
-d dataset_id: dataset identifier. Default: "none"
-e newattr: Add or modify attributes of a file, variable, or
axis. The form of 'newattr' is either:
'var.attr = value' to modify a variable or attribute, or
'.attr = value' to modify a global (file) attribute.
In either case, 'value' may be quoted to preserve spaces
or force the attribute to be treated as a string. If
'value' is not quoted and the first character is a
digit, it is converted to integer or
floating-point. This option does not modify the input
datafiles. See notes and examples below.
--exclude var,var,...
Exclude specified variables. The argument
is a comma-separated list of variables containing no blanks.
In contrast to --exclude-file, this skips the variables regardless
of the file(s) in which they are contained, but processes other
variables in the files.
Also see --include.
--exclude-file pattern
Exclude files with a basename matching the regular expression pattern.
In contrast to --exclude, this skips the file entirely. Multiple patterns
may be listed by separating with vertical bars (e.g. abc|def ). Note
that the match is to the initial part of the basename. For example, the
pattern 'st' matches any basename starting with 'st'.
-f file_list: file containing a list of absolute data file names, one per
line. <files> arguments are ignored.
--forecast generate a description of a forecast dataset.
This is not compatible with the -i, -r, -t, or -l options.
A file can contain data for exactly one forecast; its
forecast_reference_time (aka run time, analysis time, starting time,
generating time, tau=0 time) is specified by the nbdate,nbsec variables.
Each file's time axis will be interpreted as the forecast_period (aka
tau, the interval from the forecast_reference_time to the current time)
regardless of its units, standard_name, or other attributes.
-h: print a help message.
-i time_delta: scan time as a 'linear' dimension. This is useful if the time dimension
is very long. The argument is the time delta, a float or integer. For
example, if the time delta is 6 hours, and the reference units are
"hours since xxxx", set the interval delta to 6. The default value is
the difference of the first two timepoints.
--ignore-open-error:
Ignore open errors. Print a warning and continue.
--include var,var,...
Only include specified variables in the output. The argument
is a comma-separated list of variables containing no blanks.
Also see --exclude.
--include-file pattern
Only include files with a basename matching the regular expression pattern.
In contrast to --include, this skips files entirely if they do not
match the pattern. Multiple patterns
may be listed by separating with vertical bars (e.g. abc|def ). Note
that the match is to the initial part of the basename. For example, the
pattern 'st' matches any basename starting with 'st'.
-j: scan time as a vector dimension. Time values are listed
individually. Turns off the -i option.
-l levels: list of levels, comma-separated. Only specify if files are partitioned by
levels.
-m levelid: name of the vertical level dimension. The default is the name of the
vertical level dimension
--notrim-lat: Don't trim latitude values (in degrees) to the range [-90..90]. By default
latitude values are trimmed.
-p template: Compatibility with pre-V3.0 datasets. 'cdimport -h' describes template strings.
-q: quiet mode
-r time_units: time units of the form "<units> since yyyy-mm-dd hh:mi:ss", where
<units> is one of "year", "month", "day", "hour", "minute", "second".
Trailing fields may be omitted. The default is the units of the first
time dimension found.
-s suffix_file: Append a suffix to variable names, depending on the directory
containing the data file. This can be used to distinguish variables
having the same name but generated by different models or ensemble
runs. 'suffix_file' is the name of a file describing a mapping between
directories and suffixes. Each line consists of two blank-separated
fields: 'directory' 'suffix'. Each file path is compared to the
directories in the suffix file. If the file path is in that directory
or a subdirectory, the corresponding suffix is appended to the variable
IDs in the file. If more than one such directory is found, the first
directory found is used. If no match is made, the variable ids are not
altered. Regular expressions can be used: see the example in the Notes
section.
-t timeid: id of the partitioned time dimension. The default is the name of the time
dimension.
--time-linear tzero,delta,units[,calendar]
Override the time dimensions(s) with a linear time dimension. The arguments are
a comma-separated list:
tzero is the initial time point, a floating-point value.
delta is the time delta, floating-point.
units are time units as specified in the [-r] option.
calendar is optional, and is specified as in the [-c] option. If omitted, it
defaults to the value specified by [-c], otherwise as specified in the file.
Example: --time-linear '0,1,months since 1980,noleap'
Note (6) compares this option with [-i] and [-r]
--var-locate 'var,file_pattern':
Only scan a variable if the basename of the file matches the pattern. This
may be used to resolve duplicate variable errors. var and file_pattern are
separated by a comma, with no blanks.
var is the name of the variable
file_pattern is a regular expression following the Python re module syntax.e
Example: to scan variable ps from files starting with the string 'ps_':
--var-locate 'ps,ps_.*'
-x xmlfile: XML filename. By default, output is written to standard output.
Example:
cdscan -c noleap -d test -x test.xml [uv]*.nc
cdscan -d pcmdi_6h -i 0.25 -r 'days since 1979-1-1' *6h*.ctl
Notes:
(1) The files can be in netCDF, GrADS/GRIB, HDF, or DRS format, and can be listed in
any order. Most commonly, the files are the result of a single experiment, and the
'partitioned' dimension is time. The time dimension of a variable is the coordinate
variable having a name that starts with 'time' or having an attribute "axis='T'". If
this is not the case, specify the time dimension with the -t option. The time
dimension should be in the form supported by cdtime. If this is not the case (or to
override them) use the -r option.
(2) The basic form of the command is 'cdscan <files>'. By default, the time values are
listed explicitly in the output XML. This can cause a problem if the time dimension is
very long, say for 6-hourly data. To handle this the form 'cdscan -i delta <files>'
may be used. This generates a compact time representation of the form <start, length,
delta>. An exception is raised if the time dimension for a given file is not linear.
(3) Another form of the command is 'cdscan -l lev1,lev2,..,levn <files>'. This asserts
that the dataset is partitioned in both time and vertical level dimensions. The level
dimension of a variable is the dimension having a name that starts with "lev", or
having an attribute "axis=Z". If this is not the case, set the level name with the -m
option.
(4) An example of a suffix file:
/exp/pr/ncar-a _ncar-a
/exp/pr/ecm-a _ecm-a
/exp/ta/ncar-a _ncar-a
/exp/ta/ecm-a _ecm-a
For all files in directory /exp/pr/ncar-a or a subdirectory, the corresponding
variable ids will be appended with the suffix '_ncar-a'. Regular expressions can be
used, as defined in the Python 're' module. For example, The previous example can be
replaced with the single line:
/exp/[^/]*/([^/]*) _\g<1>
Note the use of parentheses to delimit a group. The syntax \g<n> refers to the n-th
group matched in the regular expression, with the first group being n=1. The string
[^/]* matches any sequence of characters other than a forward slash.
(5) Adding or modifying attributes with the -e option:
time.units = "days since 1979-1-1"
sets the units of all variables/axes to "Days since 1979-1-1". Note
that since this is done before any other processing is done, it allows
overriding of non-COARDS time units.
.newattr=newvalue
Set the global file attribute 'newattr' to 'newvalue'.
(6) The [--time-linear] option overrides the time values in the file(s). The resulting
dimension does not have any gaps. In contrast, the [-i], [-r] options use the specified
time units (from [-r]), and calendar from [-c] if specified, to convert the file times
to the new units. The resulting linear dimension may have gaps.
In either case, the files are ordered by the time values in the files.
The [--time-linear] option should be used with caution, as it is applied to all the time
dimensions found.
"""
# Ensure that arrays are fully printed to XML files
numpy.set_printoptions(threshold=numpy.inf)
calendarMap = tagToCalendar
reverseCalendarMap = calendarToTag
attrPattern = re.compile(r'\s*(\w*)\.(\w+)\s*=\s*(.*)$')
cdms2.setNetcdfUseParallelFlag(0)
def timestamp():
"Generate a timestamp."
import time
y, m, d, h, mi, s, w, dy, ds = time.gmtime(time.time())
return "%d-%d-%d %d:%d:%d" % (y, m, d, h, mi, s)
def timeindex(value, units, basetime, delta, calendar):
""" Calculate (t - basetime)/delu
where t = reltime(value, units)
and delu is the time interval (delta, delunits) (e.g., 1 month).
"""
if units.find(" as ") == -1:
tval = cdtime.reltime(value, units)
else:
tval = cdtime.abstime(value, units)
newval = tval.torel(basetime, calendar)
if delta is None:
return newval.value
else:
return newval.value / delta
def combineKeys(mydict, typedict, timeIsLinear=0,
referenceDelta=None, forecast=None):
""" Combine dictionary keys into an axis.
dict: (i,j) => (path, axisname)
typedict is either timedict or levdict or fcdict.
timeIsLinear is true iff time has a linear representation.
referenceDelta is only used for error checks if timeIsLinear is true.
"""
global verbose
# Sort the projected time, level indices
keys = list(OrderedDict(sorted(mydict.items())).keys())
axislist = []
prevend = None
prevpath = None
name0 = None
compressPart = []
partition = []
previ = 0
firstunits = ""
prevvals = None
coordToInd = {(None, None): (None, None)}
linCoordToInd = {(None, None): (None, None)}
iadj = None
errorOccurred = 0
for i0, i1 in keys:
path, name = mydict[(i0, i1)]
if name0 is None:
name0 = name
values, units, dummy = typedict[(path, name)]
if firstunits == "":
firstunits = units
if prevend is not None and prevend >= i0:
if prevend >= i1:
if verbose:
print('Warning, file %s, dimension %s contains values in file %s' % (
prevpath, name, path), file=sys.stderr)
if timeIsLinear:
iind = lookupArray(prevvals, values[0])
jind = lookupArray(prevvals, values[-1])
else:
iind = lookupArray(prevvals, i0)
jind = lookupArray(prevvals, i1)
if len(values) != (jind - iind + 1):
raise RuntimeError(
'Dimension %s in files %s [len(%s)=%d], %s [len(%s)=%d], is inconsistent' %
(name, prevpath, name, (jind - iind + 1), path, name, len(values)))
coordToInd[(i0, i1)] = (iind, jind)
prevspart, prevepart = partition[-1]
linCoordToInd[(i0, i1)] = (
prevspart + iind, prevspart + jind + 1)
continue
else: # Fix partial overlap
if timeIsLinear:
jind = lookupArray(prevvals, values[0])
else:
jind = lookupArray(prevvals, i0)
if verbose:
print('Warning, file %s, dimension %s overlaps file %s, value=%f' % (
prevpath, name, path, prevvals[jind]), file=sys.stderr)
previ, prevj = compressPart[-1]
prevj = previ + jind
axislist[-1] = prevvals[0:jind]
compressPart[-1] = (previ, prevj)
coordToInd[(prevvals[0], prevvals[-1])] = (previ, prevj)
previ = prevj
prevspart, prevepart = partition[-1]
prevepart = prevspart + jind
partition[-1] = (prevspart, prevepart)
linCoordToInd[(prevvals[0], prevvals[-1])
] = (prevspart, prevepart)
axislist.append(values)
prevend = i1
prevpath = path
prevj = previ + len(values)
compressPart.append((previ, prevj))
coordToInd[(i0, i1)] = (previ, prevj)
if iadj is None: # partition has to start with 0
iadj = int(i0)
spart = int(i0) - iadj
epart = int(i1) + 1 - iadj
partition.append((spart, epart))
linCoordToInd[(i0, i1)] = (spart, epart)
if timeIsLinear and len(values) != (epart - spart):
# Find the bad values
diffs = values[1:] - values[:-1]
badindices = numpy.compress(
numpy.not_equal(diffs, referenceDelta), list(range(len(values))))
badvalues = numpy.take(values, badindices)
if verbose:
print("Error: Missing values in %s after times: %s." % (path, str(badvalues)) +
" Set delta with the -i option or turn off linear mode with the -j option.",
file=sys.stderr)
errorOccurred = 1
prevvals = values
previ = prevj
fullaxis = numpy.ma.concatenate(axislist)
return fullaxis, name0, compressPart, coordToInd, firstunits, partition, linCoordToInd, errorOccurred
def useKeys(mydict, typedict, timeIsLinear=0,
referenceDelta=None, forecast=None):
""" Use dictionary keys for an axis. This is like combineKeys (same arguments, same return values,
was written by simplifying combineKeys), but this doesn't do nearly so much because this is
for an axis where there is no splitting across files, hence partitions are not needed.
dict: (i,j) => (path, axisname)
typedict is either timedict or levdict or fcdict.
timeIsLinear is true iff time has a linear representation.
referenceDelta is only used for error checks if timeIsLinear is true.
"""
global verbose
# Sort the projected time, level indices
keys = list(OrderedDict(sorted(mydict.items())).keys())
axislist = []
name0 = None
# compressPart = []
compressPart = None
# partition = []
partition = None
# previ = 0
firstunits = None
# coordToInd = {(None,None):(None,None)}
# linCoordToInd = {(None,None):(None,None)}
coordToInd = None
linCoordToInd = None
errorOccurred = 0
for i0, i1 in keys:
path, name = mydict[(i0, i1)]
if name0 is None:
name0 = name
values, units, dummy = typedict[(path, name)]
if firstunits is None:
firstunits = units
axislist.append(values)
# prevj = previ+len(values)
# coordToInd[(i0,i1)] = (previ, prevj)
fullaxis = numpy.ma.concatenate(axislist)
return fullaxis, name0, compressPart, coordToInd, firstunits, partition, linCoordToInd, errorOccurred
def copyDict(mydict):
"""Copy a dictionary-like object dict to a true dictionary"""
result = {}
for key in mydict.keys():
result[key] = mydict[key]
return result
def disambig(name, mydict, num, comparator, value):
""" Make an unique name from name, wrt to the keys in dictionary dict.
Try using num first. comparator(value,dict[name]) returns 0 if equal, 1 if not.
"""
if name not in mydict or not comparator(value, mydict[name]):
uniqname = name
else:
uniqname = '%s_%d' % (name, num)
if uniqname in mydict and comparator(value, mydict[uniqname]):
trial_name = uniqname
for letter in string.lowercase:
uniqname = '%s_%s' % (trial_name, letter)
if uniqname not in mydict or not comparator(
value, mydict[uniqname]):
break
else:
raise BaseException(
'Cannot make axis name unique: ' + str(name))
return uniqname
def compareaxes(axis1, axis2):
"""Return 0 if equal, 1 if not"""
return ((len(axis1) != len(axis2)) or
not numpy.ma.allclose(axis1[:], axis2[:]))
def comparedomains(domain1, domain2):
"""Return 0 if equal, 1 if not"""
if len(domain1) != len(domain2):
return 1
for i in range(len(domain1)):
item1 = domain1[i]
item2 = domain2[i]
if not isinstance(item1, type(item2)):
return 1
if isinstance(item1, string_types):
return item1 != item2
elif compareaxes(item1, item2):
return 1
return 0
def compareVarDictValues(val1, val2):
return comparedomains(val1[0], val2[0])
def cleanupAttrs(attrs):
for attname in attrs.keys():
attval = attrs[attname]
if isinstance(attval, numpy.ndarray):
if attval.ndim == 0:
continue
elif len(attval) == 1:
attrs[attname] = attval[0]
else:
attrs[attname] = str(attval)
if 'missing_value' in attrs and attrs['missing_value'] is None:
del attrs['missing_value']
if 'ndim' in attrs:
del attrs['ndim']
def validateAttrs(node):
"""Compare attributes against DTD."""
global verbose
if hasattr(node, 'datatype'):
parenttype = node.datatype
else:
parenttype = None
atts = node.getExternalDict()
for attname in atts.keys():
(attval, datatype) = atts[attname] # (XML value, datatype)
constraint = node.extra.get(attname)
if constraint is not None:
# (CdScalar|CdArray, required type)
(scaletype, reqtype) = constraint
if reqtype == CdFromObject:
reqtype = parenttype
if reqtype != datatype and datatype == CdString and scaletype == CdScalar:
if reqtype in (CdFloat, CdDouble) and not isinstance(
attval, float):
try:
attval = float(attval)
except BaseException:
if verbose:
print("Warning: %s=%s should be a float, id=%s" % (
attname, attval, node.id), end=' ', file=sys.stderr)
try:
attval = int(attval)
attval = float(attval)
if verbose:
print("(Recasting)")
node.setExternalAttr(attname, attval)
except BaseException:
if attname in [
'modulo', 'add_offset', 'scale_factor']:
if verbose:
print("(Removing)")
attdict = node.getExternalDict()
del attdict[attname]
else:
if verbose:
print("")
elif reqtype in (CdShort, CdInt, CdLong) and not isinstance(attval, int):
try:
attval = int(attval)
except BaseException:
if verbose:
print("Warning: %s=%s should be an integer, id=%s" % (
attname, attval, node.id), end=' ', file=sys.stderr)
try:
attval = float(attval)
attval = int(attval)
if verbose:
print("(Recasting)")
node.setExternalAttr(attname, attval)
except BaseException:
if verbose:
print("")
def cloneWithLatCheck(axis):
"""Clone an axis, ensuring that latitudes (in degrees) are in the range [-90:90]"""
global verbose
global notrimlat
axisvals = origvals = axis[:]
if axis.isLatitude() and hasattr(
axis, "units") and axis.units[0:6].lower() == "degree":
if notrimlat == 0:
axisvals = numpy.maximum(-90.0, numpy.minimum(90.0, axisvals))
if not numpy.ma.allclose(axisvals, origvals) and verbose:
print(
"Warning: resetting latitude values: ",
origvals,
" to: ",
axisvals,
file=sys.stderr)
b = axis.getBounds()
mycopy = cdms2.createAxis(copy.copy(axisvals))
mycopy.id = axis.id
try:
mycopy.setBounds(b)
except CDMSError:
b = mycopy.genGenericBounds()
mycopy.setBounds(b)
for k, v in axis.attributes.items():
setattr(mycopy, k, v)
return mycopy
def addAttrs(fobj, eattrs):
"""Add extra attributes to file/dataset fobj.
eattrs has the form [(varid,attr,value), (varid,attr,value), ...]
where if varid is '', set the global attribute."""
for evar, eattr, evalue in eattrs:
if evar == '':
fobj.__dict__[eattr] = evalue
else:
varobj = fobj[evar]
if varobj is not None:
varobj.__dict__[eattr] = evalue
def setNodeDict(node, mydict):
for key in mydict.keys():
value = mydict[key]
if (isinstance(value, numpy.integer) or
isinstance(value, int)):
datatype = CdLong
elif (isinstance(value, numpy.floating) or isinstance(value, float)):
datatype = CdDouble
else:
datatype = CdString
node.attribute[key] = (value, datatype)
def initialize_filemap(filemap, timedict, levdict, timeid, extendDset, splitOnTime,
referenceTime, timeIsLinear, referenceDelta, splitOnLevel,
dirlen, overrideCalendar):
# This function was formerly part of the body of "main".
# Initialize filemap : varid => (tc0, tc1, lc0, lc1, path, timeid, levid)
# where tc0 is the first time index relative to the reference time, tc1 the last,
# lc0 is the first level, lc1 the last, path is the filename, timeid is the id
# of the time dimension of the variable, levid is the id of the level dimension
#
# timedict : (path, timeid) => (timearray, timeunits, calendar)
#
# levdict : (path, levelid) => (levelarray, levelunits, None)
#
initfilemap = cdms2.dataset.parseFileMap(extendDset.cdms_filemap)
dsetdirec = extendDset.directory
for namelist, slicelist in initfilemap:
for name in namelist:
var = extendDset[name]
timeaxis = var.getTime()
if timeaxis is not None and not overrideCalendar:
calendar = timeaxis.getCalendar()
if splitOnTime and timeaxis is not None:
if hasattr(timeaxis, 'name_in_file'):
timeid = timeaxis.name_in_file
else:
timeid = timeaxis.id
if referenceTime is None:
referenceTime = timeaxis.units
if timeIsLinear in [None, 1]:
timeIsLinear = timeaxis.isLinear()
if timeIsLinear:
if len(timeaxis) > 1:
referenceDelta = timeaxis[1] - timeaxis[0]
else:
referenceDelta = 1.0
else:
referenceDelta = None
else:
timeid = None
levelaxis = var.getLevel()
if splitOnLevel and levelaxis is not None:
if hasattr(levelaxis, 'name_in_file'):
levid = levelaxis.name_in_file
else:
levid = levelaxis.id
else:
levid = None
varmaplist = []
for t0, t1, lev0, lev1, path in slicelist:
fullpath = os.path.join(dsetdirec, path)
basepath = fullpath[dirlen:]
if t0 is not None:
tc0 = timeindex(
timeaxis[t0],
timeaxis.units,
referenceTime,
referenceDelta,
calendar)
tc1 = timeindex(timeaxis[t1 - 1],
timeaxis.units,
referenceTime,
referenceDelta,
calendar)
if (basepath, timeid, calendar) not in timedict:
values = timeaxis[t0:t1]
timedict[(basepath, timeid)] = (
values, timeaxis.units, calendar)
else:
tc0 = tc1 = None
if lev0 is not None:
lc0 = levelaxis[lev0]
lc1 = levelaxis[lev1 - 1]
if (basepath, levid, None) not in levdict:
values = levelaxis[lev0:lev1]
levdict[(basepath, levid)] = (
values, levelaxis.units, None)
else:
lc0 = lc1 = None
varmaplist.append(
(tc0, tc1, lc0, lc1, basepath, timeid, levid, calendar))
if name in filemap:
filemap[name].extend(varmaplist)
else:
filemap[name] = varmaplist
# ------------------------------------------------------------------------
verbose = 1
def main(argv):
global verbose
global notrimlat
try:
args, lastargs = getopt.getopt(
argv[1:], "a:c:d:e:f:hi:jl:m:p:qr:s:t:x:",
["include=", "include-file=", "exclude=", "exclude-file=", "forecast", "time-linear=",
"notrim-lat", "var-locate=", "ignore-open-error"])
except getopt.error:
print(sys.exc_info()[1], file=sys.stderr)
print(usage, file=sys.stderr)
sys.exit(0)
calendar = None
calenkey = None
timeid = None
levelid = None
notrimlat = 0
referenceTime = None
referenceDelta = None
readFromFile = 0
splitOnTime = 1
splitOnLevel = 0
datasetid = "none"
timeIsLinear = None
writeToStdout = 1
templatestr = None
timeIsVector = None
modelMapFile = None
aliasMapFile = None
overrideCalendar = 0
extraAttrs = []
extraDict = {}
includeList = None
excludeList = None
overrideTimeLinear = None
varLocate = None
ignoreOpenError = False
excludePattern = None
includePattern = None
forecast = False
for flag, arg in args:
if flag == '-a':
aliasMapFile = arg
elif flag == '-c':
calenkey = arg.lower()
calendar = calendarMap[calenkey]
overrideCalendar = 1
elif flag == '-d':
datasetid = arg
elif flag == '-e':
matchObj = attrPattern.match(arg)
if matchObj is None:
raise RuntimeError(
"Expression must have form '[var].attr=value': %s" % arg)
matchGroups = matchObj.groups()
if len(matchGroups) != 3:
raise RuntimeError(
"Expression must have form '[var].attr=value': %s" % arg)
matchValue = matchGroups[2]
if len(matchValue) > 0 and (matchValue[0].isdigit(
) or matchValue[0] in ['"', "'", "-", "+"]): # "
matcheval = eval(matchValue)
else:
matcheval = str(matchValue)
extraAttrs.append((matchGroups[0], matchGroups[1], matcheval))
elif flag == '--exclude':
if arg[0] == '-':
raise RuntimeError("--exclude option requires an argument")
excludeList = arg.split(',')
elif flag == '--exclude-file':
excludePattern = arg
elif flag == '-f':
readFromFile = 1
filelistpath = arg
elif flag == '--forecast': # experimental forecast mode
forecast = True
splitOnTime = 0
splitOnLevel = 0
elif flag == '-h':
print(usage)
sys.exit(0)
elif flag == '-i':
splitOnTime = 1
referenceDelta = float(arg)
timeIsLinear = 1
timeIsVector = None
elif flag == '--ignore-open-error':
ignoreOpenError = True
elif flag == '--include':
if arg[0] == '-':
raise RuntimeError("--include option requires an argument")
includeList = arg.split(',')
elif flag == '--include-file':
includePattern = arg
elif flag == '-j':
timeIsVector = 1
timeIsLinear = None
elif flag == '-l':
splitOnLevel = 1
levelstr = arg.split(',')
levellist = list(map(float, levelstr))
levels = numpy.array(levellist)
levels = numpy.sort(levels)
elif flag == '-m':
levelid = arg
args.append(('-e', '%s.axis=Z' % levelid)) # Add axis=Z attribute
elif flag == '--notrim-lat':
notrimlat = 1
elif flag == '-p':
templatestr = arg
elif flag == '-q':
verbose = 0
elif flag == '-r':
splitOnTime = 1
referenceTime = arg
elif flag == '-s':
modelMapFile = arg
elif flag == '-t':
splitOnTime = 1
timeid = arg
args.append(('-e', '%s.axis=T' % timeid)) # Add axis=T attribute
elif flag == '--time-linear':
targlist = arg.split(',')
ttzero = float(targlist[0])
tdelta = float(targlist[1])
tunits = targlist[2].strip()
if len(targlist) == 4:
tcalendar = targlist[3].strip()
else:
tcalendar = None
overrideTimeLinear = [ttzero, tdelta, tunits, tcalendar]
elif flag == '--var-locate':
if varLocate is None:
varLocate = {}
vname, pattern = arg.split(',')
varLocate[vname] = pattern
elif flag == '-x':
writeToStdout = 0
xmlpath = arg
# If overriding time, process time as vector so that no gaps result
if overrideTimeLinear is not None:
timeIsVector = 1
timeIsLinear = None
if overrideCalendar == 1:
overrideTimeLinear[3] = calenkey
if verbose:
print('Finding common directory ...')
if readFromFile:
f = open(filelistpath)
lastargs = f.readlines()
f.close()
# Ignore blank paths
realargs = []
for arg in lastargs:
sarg = arg.strip()
if len(sarg) > 0:
realargs.append(sarg)
lastargs = realargs
# Split lastargs into files and datasets
fileargs = []
dsetargs = []
for arg in lastargs:
base, suffix = os.path.splitext(arg)
if suffix.lower() in ['.xml', '.cdml']:
dsetargs.append(arg)
else:
fileargs.append(arg)
# Generate a list of pathnames for datasets
dsetfiles = []
for path in dsetargs:
dset = cdms2.open(path)
if not hasattr(dset, 'cdms_filemap'):
raise RuntimeError(
'Dataset must have a cdms_filemap attribute: ' + path)
if not hasattr(dset, 'directory'):
raise RuntimeError(
'Dataset must have a directory attribute: ' + path)
dsetdirec = dset.directory
initfilemap = cdms2.dataset.parseFileMap(dset.cdms_filemap)
for namelist, slicelist in initfilemap:
for t0, t1, lev0, lev1, path in slicelist:
dsetfiles.append(os.path.join(dsetdirec, path))
augmentedArgs = fileargs + dsetfiles
# Find the common directory
directory = os.path.commonprefix(augmentedArgs)
firstpath = augmentedArgs[0][len(directory):]
if not os.path.isfile(os.path.join(directory, firstpath)):
dnew = os.path.dirname(directory)
if len(dnew) > 0 and directory[len(dnew)] == '/':
directory = dnew + '/'
else:
directory = dnew
if verbose:
print('Common directory:', directory)
dirlen = len(directory)
if templatestr is not None:
if os.path.isabs(templatestr):
templatestr = templatestr[dirlen:]
templatere, ignore = cdms2.cdmsobj.templateToRegex(templatestr)
template = re.compile(templatere + '$')
else:
template = None
axisdict = {}
vardict = {}
filemap = {}
timedict = {}
levdict = {}
fcdict = {}
global_attrs = None
fctau0 = None
if modelMapFile is not None:
mfile = open(modelMapFile)
modelMap = {}
modelDirs = []
for line in mfile.readlines():
mdirec, model = line.split()
modelMap[mdirec] = model
modelDirs.append(mdirec)
mfile.close()
if aliasMapFile is not None:
afile = open(aliasMapFile)
aliasMap = {}
for line in afile.readlines():
if line[0] not in ["'", '"']: # "
varid, alias = line.split()
else:
dummy, varid, alias = line.split(line[0])
alias = alias.strip()
aliasMap[varid] = alias
afile.close()
# Save extra attribute information for new axes
for evar, eattr, evalue in extraAttrs:
if evar == '':
continue
if evar in extraDict:
curval = extraDict[evar]
curval.append((eattr, evalue))
else:
extraDict[evar] = [(eattr, evalue)]
# ------------------------------------------------------------------------
# Initialize dictionaries if adding to an existing dataset
if verbose and len(dsetargs) > 0:
print('Scanning datasets ...')
for extendPath in dsetargs:
if verbose:
print(extendPath)
extendDset = cdms2.open(extendPath)
# Add/modify attributes
addAttrs(extendDset, extraAttrs)
# Copy the global attribute dictionary if necessary. Note that copy.copy
# can't be used here, since .attributes is now a 'fake' dictionary.
if global_attrs is None:
global_attrs = copyDict(extendDset.attributes)
# Initialize filemap : varid => (tc0, tc1, lc0, lc1, path, timeid, levid)
# where tc0 is the first time index relative to the reference time, tc1 the last,
# lc0 is the first level, lc1 the last, path is the filename, timeid is the id
# of the time dimension of the variable, levid is the id of the level dimension
#
# timedict : (path, timeid) => (timearray, timeunits, calendar)
#
# levdict : (path, levelid) => (levelarray, levelunits, None)
#
initialize_filemap(filemap, timedict, levdict, timeid, extendDset, splitOnTime,
referenceTime, timeIsLinear, referenceDelta, splitOnLevel,
dirlen, overrideCalendar)
# axisdict : id => transient_axis