Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Optimise cycling objects #2322

Merged
merged 7 commits into from
Jun 13, 2017
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 10 additions & 1 deletion lib/cylc/cycling/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,8 @@ class PointBase(object):
_TYPE = None
_TYPE_SORT_KEY = None

__slots__ = ('value')

@abstractproperty
def TYPE(self):
return self._TYPE
Expand Down Expand Up @@ -201,6 +203,8 @@ class IntervalBase(object):
_TYPE = None
_TYPE_SORT_KEY = None

__slots__ = ('value')

@abstractproperty
def TYPE(self):
return self._TYPE
Expand Down Expand Up @@ -325,6 +329,8 @@ class SequenceBase(object):
_TYPE = None
_TYPE_SORT_KEY = None

__slots__ = ()

@abstractproperty
def TYPE(self):
return self._TYPE
Expand Down Expand Up @@ -413,13 +419,16 @@ def __eq__(self, other):
class ExclusionBase(object):
"""A collection of points or sequences that are treated in an
exclusionary manner"""

__metaclass__ = ABCMeta
__slots__ = ('exclusion_sequences', 'exclusion_points',
'exclusion_start_point', 'exclusion_end_point')

def __init__(self, start_point, end_point=None):
"""creates an exclusions object that can contain integer points
or integer sequences to be used as excluded points."""
self.exclusion_sequences = []
self.exclusion_points = set()
self.exclusion_points = []
self.exclusion_start_point = start_point
self.exclusion_end_point = end_point

Expand Down
14 changes: 12 additions & 2 deletions lib/cylc/cycling/integer.py
Original file line number Diff line number Diff line change
Expand Up @@ -121,6 +121,8 @@ class IntegerPoint(PointBase):
TYPE = CYCLER_TYPE_INTEGER
TYPE_SORT_KEY = CYCLER_TYPE_SORT_KEY_INTEGER

__slots__ = ('value')

def __init__(self, value):
if isinstance(value, int):
value = str(value)
Expand Down Expand Up @@ -163,6 +165,8 @@ class IntegerInterval(IntervalBase):
TYPE = CYCLER_TYPE_INTEGER
TYPE_SORT_KEY = CYCLER_TYPE_SORT_KEY_INTEGER

__slots__ = ('value')

@classmethod
def from_integer(cls, integer):
"""Return an instance of this class using integer."""
Expand Down Expand Up @@ -230,6 +234,8 @@ class IntegerExclusions(ExclusionBase):
"""A collection of integer exclusion points, or sequences of
integers that are treated in an exclusionary manner."""

__slots__ = ExclusionBase.__slots__

def __init__(self, excl_points, start_point, end_point=None):
"""creates an exclusions object that can contain integer points
or integer sequences to be used as excluded points."""
Expand All @@ -244,8 +250,9 @@ def build_exclusions(self, excl_points):
integer_point = get_point_from_expression(
point,
None,
is_required=False)
self.exclusion_points.add(integer_point.standardise())
is_required=False).standardise()
if integer_point not in self.exclusion_points:
self.exclusion_points.append(integer_point)
except PointParsingError:
# Try making an integer sequence
integer_exclusion_sequence = (IntegerSequence(
Expand All @@ -260,6 +267,9 @@ class IntegerSequence(SequenceBase):
TYPE = CYCLER_TYPE_INTEGER
TYPE_SORT_KEY = CYCLER_TYPE_SORT_KEY_INTEGER

__slots__ = ('p_context_start', 'p_context_stop', 'p_start', 'p_stop',
'i_step', 'i_offset', 'exclusions')

@classmethod
def get_async_expr(cls, start_point=None):
"""Express a one-off sequence at the initial cycle point."""
Expand Down
62 changes: 40 additions & 22 deletions lib/cylc/cycling/iso8601.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@ class SuiteSpecifics(object):
abbrev_util = None
interval_parser = None
point_parser = None
iso8601_parsers = None


def memoize(function):
Expand Down Expand Up @@ -89,6 +90,8 @@ class ISO8601Point(PointBase):
TYPE = CYCLER_TYPE_ISO8601
TYPE_SORT_KEY = CYCLER_TYPE_SORT_KEY_ISO8601

__slots__ = ('value')

@classmethod
def from_nonstandard_string(cls, point_string):
"""Standardise a date-time string."""
Expand Down Expand Up @@ -173,6 +176,8 @@ class ISO8601Interval(IntervalBase):
TYPE = CYCLER_TYPE_ISO8601
TYPE_SORT_KEY = CYCLER_TYPE_SORT_KEY_ISO8601

__slots__ = ('value')

@classmethod
def get_null(cls):
"""Return a null interval."""
Expand Down Expand Up @@ -288,9 +293,12 @@ class ISO8601Exclusions(ExclusionBase):
grouped exclusion sequences. The Python ``in`` and ``not in`` operators
may be used on this object to determine if a point is in the collection
of exclusion sequences."""

__slots__ = ExclusionBase.__slots__ + ('p_iso_exclusions',)

def __init__(self, excl_points, start_point, end_point=None):
super(ISO8601Exclusions, self).__init__(start_point, end_point)
self.p_iso_exclusions = set()
self.p_iso_exclusions = []
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What's the rationale behind this change? I would have thought set was better, with the need for membership testing below...

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

(this comment was meant to apply to self.exclusion_points)

Copy link
Member

@hjoliver hjoliver Jun 13, 2017

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

OK, from a bit of googling, I guess set is implemented rather like dict, hence much higher memory consumption than list (makes sense)

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

(Answered my own question, will merge ...)

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The reason I changed this is because an empty set is much larger than an empty list:

Size in bytes for collections of None elements:

elements list set
0 72 232
1 96 248
2 104 248
3 112 248
4 120 248
5 128 248
6 136 248
7 144 248
8 152 248
9 160 248

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This suite doesn't test effect on performance when exclusions are used, unless I'm missing something? (Although it tests 'empty'/no exclusions, which I suppose would be majority of the use cases?)

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

no exclusions, which I suppose would be majority of the use cases

The vast majority of sequence objects will not have exclusions. In these cases we have sequence.exclusions = [] so the memory impact is minimal and p_iso_exclusions doesn't even come into it.

The reason for changing to a set was to make exclusion objects a little slimmer - it's pretty marginal I'll admit but many of these changes are, a few bytes saved here, a few there.

self.build_exclusions(excl_points)

def build_exclusions(self, excl_points):
Expand All @@ -299,8 +307,9 @@ def build_exclusions(self, excl_points):
# Try making an ISO8601Point
exclusion_point = ISO8601Point.from_nonstandard_string(
str(point)) if point else None
self.exclusion_points.add(exclusion_point)
self.p_iso_exclusions.add(str(exclusion_point))
if exclusion_point not in self.exclusion_points:
self.exclusion_points.append(exclusion_point)
self.p_iso_exclusions.append(str(exclusion_point))
except (AttributeError, TypeError, ValueError):
# Try making an ISO8601Sequence
exclusion = ISO8601Sequence(point, self.exclusion_start_point,
Expand All @@ -318,6 +327,12 @@ class ISO8601Sequence(SequenceBase):
TYPE_SORT_KEY = CYCLER_TYPE_SORT_KEY_ISO8601
_MAX_CACHED_POINTS = 100

__slots__ = ('dep_section', 'context_start_point', 'context_end_point',
'offset', '_cached_first_point_values',
'_cached_next_point_values', '_cached_valid_point_booleans',
'_cached_recent_valid_points', 'spec', 'abbrev_util',
'recurrence', 'exclusions', 'step', 'value')

@classmethod
def get_async_expr(cls, start_point=None):
"""Express a one-off sequence at the initial cycle point."""
Expand Down Expand Up @@ -353,39 +368,36 @@ def __init__(self, dep_section, context_start_point=None,
self._cached_recent_valid_points = []

self.spec = dep_section
self.abbrev_util = CylcTimeParser(
self.context_start_point, self.context_end_point,
num_expanded_year_digits=SuiteSpecifics.NUM_EXPANDED_YEAR_DIGITS,
dump_format=SuiteSpecifics.DUMP_FORMAT,
assumed_time_zone=SuiteSpecifics.ASSUMED_TIME_ZONE
)
self.abbrev_util = CylcTimeParser(self.context_start_point,
self.context_end_point,
SuiteSpecifics.iso8601_parsers)
# Parse_recurrence returns an isodatetime TimeRecurrence object
# and a list of exclusion strings.
self.recurrence, excl_points = self.abbrev_util.parse_recurrence(
dep_section)

# Determine the exclusion start point and end point
try:
self.exclusion_start_point = ISO8601Point.from_nonstandard_string(
exclusion_start_point = ISO8601Point.from_nonstandard_string(
str(self.recurrence.start_point))
except ValueError:
self.exclusion_start_point = self.context_start_point
exclusion_start_point = self.context_start_point

try:
self.exclusion_end_point = ISO8601Point.from_nonstandard_string(
exclusion_end_point = ISO8601Point.from_nonstandard_string(
str(self.recurrence.end_point))
except ValueError:
self.exclusion_end_point = self.context_end_point
exclusion_end_point = self.context_end_point

self.exclusions = []

# Creating an exclusions object instead
if excl_points is not None:
if excl_points:
try:
self.exclusions = ISO8601Exclusions(
excl_points,
self.exclusion_start_point,
self.exclusion_end_point)
exclusion_start_point,
exclusion_end_point)
except AttributeError:
pass

Expand Down Expand Up @@ -477,9 +489,11 @@ def get_nearest_prev_point(self, point):

for recurrence_iso_point in self.recurrence:
# Is recurrence point greater than aribitrary point?
if (recurrence_iso_point > p_iso_point or
(recurrence_iso_point in
self.exclusions.p_iso_exclusions)):
if (
recurrence_iso_point > p_iso_point or
(self.exclusions and
recurrence_iso_point in self.exclusions.p_iso_exclusions)
):
break
prev_iso_point = recurrence_iso_point
if prev_iso_point is None:
Expand Down Expand Up @@ -695,13 +709,17 @@ def init(num_expanded_year_digits=0, custom_dump_format=None, time_zone=None,
dump_format=SuiteSpecifics.DUMP_FORMAT,
assumed_time_zone=time_zone_hours_minutes
)
SuiteSpecifics.abbrev_util = CylcTimeParser(
None, None,
num_expanded_year_digits=SuiteSpecifics.NUM_EXPANDED_YEAR_DIGITS,

SuiteSpecifics.iso8601_parsers = CylcTimeParser.initiate_parsers(
dump_format=SuiteSpecifics.DUMP_FORMAT,
num_expanded_year_digits=num_expanded_year_digits,
assumed_time_zone=SuiteSpecifics.ASSUMED_TIME_ZONE
)

SuiteSpecifics.abbrev_util = CylcTimeParser(
None, None, SuiteSpecifics.iso8601_parsers
)


def get_point_relative(offset_string, base_point):
"""Create a point from offset_string applied to base_point."""
Expand Down
Loading