Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add Support for '+=' in Namelists #1389

Merged
merged 9 commits into from
Apr 28, 2017
146 changes: 116 additions & 30 deletions scripts/lib/CIME/namelist.py
Original file line number Diff line number Diff line change
Expand Up @@ -987,9 +987,12 @@ def set_variable_value(self, group_name, variable_name, value, var_size=1):
>>> x.set_variable_value('foo', 'bar(2)', [u'3'], var_size=4)
>>> x.get_variable_value('foo', 'bar')
[u'1', u'3']
>>> x.set_variable_value('foo', 'bar', [u'2'])
>>> x.set_variable_value('foo', 'bar(1)', [u'2'])
>>> x.get_variable_value('foo', 'bar')
[u'2', u'3']
>>> x.set_variable_value('foo', 'bar', [u'1'])
>>> x.get_variable_value('foo', 'bar')
[u'1', u'3']
>>> x.set_variable_value('foo', 'bazz', [u'3'])
>>> x.set_variable_value('Brack', 'baR', [u'4'])
>>> x.get_variable_value('foo', 'bazz')
Expand All @@ -1003,6 +1006,7 @@ def set_variable_value(self, group_name, variable_name, value, var_size=1):
group_name = group_name.lower()

minindex, maxindex, step = get_fortran_variable_indices(variable_name, var_size)
original_var = variable_name
variable_name = get_fortran_name_only(variable_name.lower())

expect(minindex > 0, "Indices < 1 not supported in CIME interface to fortran namelists... lower bound=%s"%minindex)
Expand Down Expand Up @@ -1534,9 +1538,11 @@ def _parse_variable_name(self, allow_equals=True):
Traceback (most recent call last):
...
_NamelistParseError: Error in parsing namelist: '' is not a valid variable name
>>> _NamelistParser('foo+= ')._parse_variable_name()
u'foo'
"""
old_pos = self._pos
separators = (' ', '\n', '=') if allow_equals else (' ', '\n')
separators = (' ', '\n', '=', '+') if allow_equals else (' ', '\n')
while self._curr() not in separators:
self._advance()
text = self._text[old_pos:self._pos]
Expand Down Expand Up @@ -1659,6 +1665,29 @@ def _look_ahead_for_equals(self, pos):
break
return False

def _look_ahead_for_plusequals(self, pos):
r"""Look ahead to see if the next two non-whitespace character are '+='.

The `pos` argument is the position in the text to start from while
looking. This function returns a boolean.

>>> _NamelistParser('+=')._look_ahead_for_plusequals(0)
True
>>> _NamelistParser('a \n+=')._look_ahead_for_plusequals(1)
True
>>> _NamelistParser('')._look_ahead_for_plusequals(0)
False
>>> _NamelistParser('a+=')._look_ahead_for_plusequals(0)
False
"""
for test_pos in range(pos, self._len):
if self._text[test_pos] not in (' ', '\n'):
if self._text[test_pos] == '+':
return self._look_ahead_for_equals(test_pos + 1)
else:
break
return False

def _parse_literal(self, allow_name=False, allow_eof_end=False):
r"""Parse and return a variable value at the current position.

Expand Down Expand Up @@ -1734,16 +1763,28 @@ def _parse_literal(self, allow_name=False, allow_eof_end=False):
Traceback (most recent call last):
...
_NamelistParseError: Error in parsing namelist: expected literal value, but got 'foo='
>>> _NamelistParser('foo+= ')._parse_literal()
Traceback (most recent call last):
...
_NamelistParseError: Error in parsing namelist: expected literal value, but got 'foo+='
>>> _NamelistParser('5,')._parse_literal(allow_name=True)
u'5'
>>> x = _NamelistParser('foo= ')
>>> x._parse_literal(allow_name=True)
>>> x._curr()
u'f'
>>> x = _NamelistParser('foo+= ')
>>> x._parse_literal(allow_name=True)
>>> x._curr()
u'f'
>>> _NamelistParser('6*foo= ')._parse_literal(allow_name=True)
Traceback (most recent call last):
...
_NamelistParseError: Error in parsing namelist: expected literal value, but got '6*foo='
>>> _NamelistParser('6*foo+= ')._parse_literal(allow_name=True)
Traceback (most recent call last):
...
_NamelistParseError: Error in parsing namelist: expected literal value, but got '6*foo+='
>>> x = _NamelistParser('foo = ')
>>> x._parse_literal(allow_name=True)
>>> x._curr()
Expand Down Expand Up @@ -1782,6 +1823,7 @@ def _parse_literal(self, allow_name=False, allow_eof_end=False):
separators = [' ', '\n', ',', '/']
if allow_name:
separators.append('=')
separators.append('+')
while new_pos != self._len and self._text[new_pos] not in separators:
# allow commas if they are inside ()
if self._text[new_pos] == '(':
Expand All @@ -1794,9 +1836,12 @@ def _parse_literal(self, allow_name=False, allow_eof_end=False):
# At the end of the file, give up by throwing an EOF.
self._advance(self._len)
# If `allow_name` is set, we need to check and see if the next non-blank
# character is '=', and return `None` if so.
# character is '=' or the next two are '+=', and return `None` if so.
if allow_name and self._look_ahead_for_equals(new_pos):
return
elif allow_name and self._look_ahead_for_plusequals(new_pos):
return

self._advance(new_pos - self._pos, check_eof=allow_eof_end)
text = self._text[old_pos:self._pos]
if not any(is_valid_fortran_namelist_literal(type_, text)
Expand Down Expand Up @@ -1911,71 +1956,79 @@ def _parse_name_and_values(self, allow_eof_end=False):
r"""Parse and return a variable name and values assigned to that name.

The return value of this function is a tuple containing (a) the name of
the variable in a string, and (b) a list of the variable's values. Null
the variable in a string, (b) a list of the variable's values, and
(c) whether or not to add the found value to existing variable. Null
values are represented by the empty string.

If `allow_eof_end=True`, the end of the sequence of values might come
from an empty string rather than a slash. (This is used for the
alternate file format in "groupless" mode.)

>>> _NamelistParser("foo='bar' /")._parse_name_and_values()
(u'foo', [u"'bar'"])
(u'foo', [u"'bar'"], False)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please modify the documentation of this function to indicate the new return value in the tuple.

>>> _NamelistParser("foo(3)='bar' /")._parse_name_and_values()
(u'foo(3)', [u"'bar'"])
(u'foo(3)', [u"'bar'"], False)
>>> _NamelistParser("foo ='bar' /")._parse_name_and_values()
(u'foo', [u"'bar'"])
(u'foo', [u"'bar'"], False)
>>> _NamelistParser("foo=\n'bar' /")._parse_name_and_values()
(u'foo', [u"'bar'"])
(u'foo', [u"'bar'"], False)
>>> _NamelistParser("foo 'bar' /")._parse_name_and_values()
Traceback (most recent call last):
...
_NamelistParseError: Error in parsing namelist: expected '=' but found "'"
>>> _NamelistParser("foo='bar','bazz' /")._parse_name_and_values()
(u'foo', [u"'bar'", u"'bazz'"])
(u'foo', [u"'bar'", u"'bazz'"], False)
>>> _NamelistParser("foo=,,'bazz',6*/")._parse_name_and_values()
(u'foo', [u'', u'', u"'bazz'", u'6*'])
(u'foo', [u'', u'', u"'bazz'", u'6*'], False)
>>> _NamelistParser("foo='bar' 'bazz' foo2='ban'")._parse_name_and_values()
(u'foo', [u"'bar'", u"'bazz'"])
(u'foo', [u"'bar'", u"'bazz'"], False)
>>> _NamelistParser("foo='bar' 'bazz' foo2(2)='ban'")._parse_name_and_values()
(u'foo', [u"'bar'", u"'bazz'"])
(u'foo', [u"'bar'", u"'bazz'"], False)
>>> _NamelistParser("foo= foo2='ban' ")._parse_name_and_values()
Traceback (most recent call last):
...
_NamelistParseError: Error in parsing namelist: expected literal value, but got "foo2='ban'"
>>> _NamelistParser("foo=,,'bazz',6* ")._parse_name_and_values(allow_eof_end=True)
(u'foo', [u'', u'', u"'bazz'", u'6*'])
(u'foo', [u'', u'', u"'bazz'", u'6*'], False)
>>> _NamelistParser("foo(3)='bazz'")._parse_name_and_values(allow_eof_end=True)
(u'foo(3)', [u"'bazz'"])
(u'foo(3)', [u"'bazz'"], False)
>>> _NamelistParser("foo=")._parse_name_and_values()
Traceback (most recent call last):
...
_NamelistEOF: Unexpected end of file encountered in namelist.
>>> _NamelistParser("foo=")._parse_name_and_values(allow_eof_end=True)
(u'foo', [u''])
(u'foo', [u''], False)
>>> _NamelistParser("foo= ")._parse_name_and_values(allow_eof_end=True)
(u'foo', [u''])
(u'foo', [u''], False)
>>> _NamelistParser("foo=2")._parse_name_and_values(allow_eof_end=True)
(u'foo', [u'2'])
(u'foo', [u'2'], False)
>>> _NamelistParser("foo=1,2")._parse_name_and_values(allow_eof_end=True)
(u'foo', [u'1', u'2'])
(u'foo', [u'1', u'2'], False)
>>> _NamelistParser("foo(1:2)=1,2,3 ")._parse_name_and_values(allow_eof_end=True)
Traceback (most recent call last):
...
SystemExit: ERROR: Too many values for array foo(1:2)
>>> _NamelistParser("foo=1,")._parse_name_and_values(allow_eof_end=True)
(u'foo', [u'1', u''])
(u'foo', [u'1', u''], False)
>>> _NamelistParser("foo+=1")._parse_name_and_values(allow_eof_end=True)
(u'foo', [u'1'], True)
"""
name = self._parse_variable_name()
addto = False # This keeps track of whether += existed

self._eat_whitespace()
# check to see if we have a "+="
if self._curr() == '+':
self._advance()
addto=True # tell parser that we want to add to dictionary values
self._expect_char("=")
try:
self._advance()
self._eat_whitespace()
except _NamelistEOF:
# If we hit the end of file, return a name assigned to a null value.
if allow_eof_end:
return name, [u'']
return name, [u''], addto
else:
raise
# Expect at least one literal, even if it's a null value.
Expand All @@ -1994,7 +2047,7 @@ def _parse_name_and_values(self, allow_eof_end=False):
arraylen =max(0,1 + ((maxindex - minindex)/step))
expect(len(values) <= arraylen, "Too many values for array %s"%(name))

return name, values
return name, values, addto

def _parse_namelist_group(self):
r"""Parse an entire namelist group, adding info to `self._settings`.
Expand Down Expand Up @@ -2037,6 +2090,22 @@ def _parse_namelist_group(self):
>>> x._parse_namelist_group()
>>> x._settings
OrderedDict([(u'foo', [u"'bar'"])])
>>> x = _NamelistParser("&group foo='bar', foo+='baz' /", groupless=True)
>>> x._parse_namelist_group()
>>> x._settings
OrderedDict([(u'foo', [u"'bar'", u"'baz'"])])
>>> x = _NamelistParser("&group foo+='bar' /", groupless=True)
>>> x._parse_namelist_group()
>>> x._settings
OrderedDict([(u'foo', [u"'bar'"])])
>>> x = _NamelistParser("&group foo='bar', foo+='baz' /")
>>> x._parse_namelist_group()
>>> x._settings
OrderedDict([(u'group', {u'foo': [u"'bar'", u"'baz'"]})])
>>> x = _NamelistParser("&group foo+='bar' /")
>>> x._parse_namelist_group()
>>> x._settings
OrderedDict([(u'group', {u'foo': [u"'bar'"]})])
"""
group_name = self._parse_namelist_group_name()
if not self._groupless:
Expand All @@ -2047,18 +2116,24 @@ def _parse_namelist_group(self):
self._settings[group_name] = {}
self._eat_whitespace()
while self._curr() != '/':
name, values = self._parse_name_and_values()
name, values, addto = self._parse_name_and_values()
dsettings = []
if self._groupless:
if name in self._settings:
dsettings = self._settings[name]
values = merge_literal_lists(dsettings, values)
if addto:
values = self._settings[name] + values
if not addto:
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This looks like it works correctly now, but I'm still wondering why it looks different from the analogous code below:

                if name in self._settings:
                    if addto:
                        values = self._settings[name] + values
                    else:
                        values = merge_literal_lists(self._settings[name], values)

My understanding is that the two blocks of code are doing the same thing, so I'd find it easier to read & understand this code if they looked identical (or called some shared function). I realize that they looked a little different in the baseline code, but now that this code block is getting more complicated, it would be good to make them uniform, so that the true differences between the two don't get lost in accidental differences like this.

This would amount to a small logic change, in that the 'if not addto' block would now only be executed if name in self._settings is true; but if I understand the code correctly, that shouldn't matter, because: if (not addto) is true but (name in self._settings) is false, then you're calling merge_literal_lists with dsettings = [], which looks to me like a no-op.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Beats the heck out of me why it is different. I don't seem to understand it well enough, but I do know that it works.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Okay, I believe I addressed your issues in the latest push.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Okay, I guess I'm okay with your leaving this as is. However, particularly since the two versions look different, I feel that all of the += tests that you have for parse_namelist should be duplicated for _parse_namelist_group (because knowing that this code works right in one doesn't tell you that it works right in the other).

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

_parse_namelist_group only parses groups. If you look at the tests in that function there is only group testing without variable testing.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't think that's right. My understanding from a quick read of the code is that _parse_namelist_group is for parsing variables that belong to a namelist group, which is actually the case for most variables. Indeed, I have confirmed that, when I remove these lines:

--- a/scripts/lib/CIME/namelist.py
+++ b/scripts/lib/CIME/namelist.py
@@ -2105,8 +2105,6 @@ def _parse_namelist_group(self):
             if self._groupless:
                 if name in self._settings:
                     dsettings = self._settings[name]
-                    if addto:
-                        values = self._settings[name] + values
                 if not addto:
                     values = merge_literal_lists(dsettings, values)
                 self._settings[name] = values

all doctests still pass, yet the behavior is incorrect when I have this in user_nl_cpl:

cplflds_custom = 'foo'
cplflds_custom += 'bar'

In this case, I get:

$ grep cplflds_custom CaseDocs/drv_in
  cplflds_custom = 'bar'

As far as I can tell, the critical code in _parse_namelist_group is doing the same thing as the analogous code in parse_namelist - it's just that one is used for variables that belong to a group and the other is used for variables that don't belong to a group (I didn't know there were any variables in the latter category, actually). This was the motivation for my suggestion of making them share code, or at least look identical. I still understand if you don't want to do that out of fear of breaking something, but if they don't share code, then it's important that there be tests that cover both blocks of nearly-identical code.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think that I can explain the group vs no-group thing: variables read from namelist definition xml files always have a group associated with them as it is defined in the xml file. However variables read from user_nl_* do not immediately have a group associated with them and so fall into that no group category. I was not the author of this code and admit to finding it less than intuitive...

I don't think that's completely right. I just tried introducing these diffs:

--- a/scripts/lib/CIME/namelist.py
+++ b/scripts/lib/CIME/namelist.py
@@ -2102,6 +2102,7 @@ def _parse_namelist_group(self):
         while self._curr() != '/':
             name, values, addto = self._parse_name_and_values()
             dsettings = []
+            print("Parsing a group: name, groupless = %s, %s"%(name, self._groupless))
             if self._groupless:
                 if name in self._settings:
                     dsettings = self._settings[name]
@@ -2173,6 +2174,7 @@ def parse_namelist(self):
         if self._groupless and self._curr() != '&':
             while self._pos < self._len:
                 name, values, addto = self._parse_name_and_values(allow_eof_end=True)
+                print("Parsing groupless: %s"%(name))
                 if name in self._settings:
                     if addto:
                         values = self._settings[name] + values

and when I ran ./preview_namelists with a user_nl_cpl file that sets cplflds_custom, I got:

Parsing a group: name, groupless = cplflds_custom, True
Parsing a group: name, groupless = cplflds_custom, True

I don't fully understand the operation of this code, but I am confident that _parse_namelist_group is being called to parse at least some entries in user_nl files, and so needs to be covered by tests.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sorry, I was thinking about _parse_namelist_group_name. I have now pushed that last change.

values = merge_literal_lists(dsettings, values)
self._settings[name] = values
else:
group = self._settings[group_name]
if name in group:
dsettings = group[name]
values = merge_literal_lists(dsettings, values)
if addto:
values = group[name] + values
if not addto:
values = merge_literal_lists(dsettings, values)
group[name] = values

def parse_namelist(self):
Expand All @@ -2085,16 +2160,24 @@ def parse_namelist(self):
OrderedDict([(u'foo', [u"'bar'", u"'bazz'", u'']), (u'foo2', [u'2*5', u'6'])])
>>> _NamelistParser("!blah \n foo='bar'", groupless=True).parse_namelist()
OrderedDict([(u'foo', [u"'bar'"])])
>>> _NamelistParser("foo='bar', foo='bazz'", groupless=True).parse_namelist()
OrderedDict([(u'foo', [u"'bazz'"])])
>>> _NamelistParser("foo='bar', foo=", groupless=True).parse_namelist()
OrderedDict([(u'foo', [u"'bar'"])])
>>> _NamelistParser("foo='bar', foo(3)='bazz'", groupless=True).parse_namelist()
OrderedDict([(u'foo', [u"'bar'"]), (u'foo(3)', [u"'bazz'"])])
>>> _NamelistParser("foo(2)='bar'", groupless=True).parse_namelist()
OrderedDict([(u'foo(2)', [u"'bar'"])])
>>> _NamelistParser("foo(2)='bar', foo(3)='bazz'", groupless=True).parse_namelist()
OrderedDict([(u'foo(2)', [u"'bar'"]), (u'foo(3)', [u"'bazz'"])])
>>> _NamelistParser("foo='bar', foo='bazz'", groupless=True).parse_namelist()
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If I understand the code correctly, the tests you added here cover the case where groupless is True, where the implementation is below. But I don't see similar tests for the implementation in _parse_namelist_group. Tests should be added to cover that code, too. Or, better yet, the duplicate code should be extracted into a shared routine called from both places; then it could just be covered by one set of tests.

OrderedDict([(u'foo', [u"'bazz'"])])
>>> _NamelistParser("foo='bar'\n foo+='bazz'", groupless=True).parse_namelist()
OrderedDict([(u'foo', [u"'bar'", u"'bazz'"])])
>>> _NamelistParser("foo='bar', foo='bazz'", groupless=True).parse_namelist()
OrderedDict([(u'foo', [u"'bazz'"])])
>>> _NamelistParser("foo='bar', foo=", groupless=True).parse_namelist()
OrderedDict([(u'foo', [u"'bar'"])])
>>> _NamelistParser("foo='bar', 'bazz'\n foo+='ban'", groupless=True).parse_namelist()
OrderedDict([(u'foo', [u"'bar'", u"'bazz'", u"'ban'"])])
>>> _NamelistParser("foo+='bar'", groupless=True).parse_namelist()
OrderedDict([(u'foo', [u"'bar'"])])
"""
# Return empty dictionary for empty files.
if self._len == 0:
Expand All @@ -2108,9 +2191,12 @@ def parse_namelist(self):
# Handle case with no namelist groups.
if self._groupless and self._curr() != '&':
while self._pos < self._len:
name, values = self._parse_name_and_values(allow_eof_end=True)
name, values, addto = self._parse_name_and_values(allow_eof_end=True)
if name in self._settings:
values = merge_literal_lists(self._settings[name], values)
if addto:
values = self._settings[name] + values
else:
values = merge_literal_lists(self._settings[name], values)
self._settings[name] = values
return self._settings
# Loop over namelist groups in the file.
Expand Down