Skip to content

Commit

Permalink
Optimise time taken to get raw graph
Browse files Browse the repository at this point in the history
The main change is the restructure of the variable
`SuiteConfig().edges`. It is now a dict. Each key is a unique sequence,
and each value is a set of `(left, right, suicide, conditional)` tuples.
This change allows `get_graph_raw` to look at each unique sequence once
instead of multiple times. There is also caching at various points to
reduce the number of expensive calls to regular expression matching and
date-time functionality.

Other minor changes include:
* Remove unused variables.
* Use compiled regular expressions where possible.
* Reduce usage of complex regular expressions.
* Remove edge and graphnode objects, which were not cheap to initialise.
  • Loading branch information
matthewrmshin committed Jun 1, 2017
1 parent 7162266 commit 08c0c3a
Show file tree
Hide file tree
Showing 8 changed files with 258 additions and 368 deletions.
318 changes: 162 additions & 156 deletions lib/cylc/config.py

Large diffs are not rendered by default.

11 changes: 11 additions & 0 deletions lib/cylc/cycling/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -446,6 +446,17 @@ def __getitem__(self, key):
"""Allows indexing of the exclusion object"""
return self.exclusion_sequences[key]

def __str__(self):
returns = []
for point in sorted(self.exclusion_points):
returns.append(str(point))
for sequence in self.exclusion_sequences:
returns.append(str(sequence))
ret = ','.join(returns)
if ',' in ret:
ret = '(' + ret + ')'
return ret


if __name__ == "__main__":
import unittest
Expand Down
2 changes: 1 addition & 1 deletion lib/cylc/cycling/iso8601.py
Original file line number Diff line number Diff line change
Expand Up @@ -392,7 +392,7 @@ def __init__(self, dep_section, context_start_point=None,
self.step = ISO8601Interval(str(self.recurrence.duration))
self.value = str(self.recurrence)
# Concatenate the strings in exclusion list
if self.exclusions:
if str(self.exclusions):
self.value += '!' + str(self.exclusions)

def get_interval(self):
Expand Down
133 changes: 34 additions & 99 deletions lib/cylc/graphing.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,13 +19,8 @@
"""Cylc suite graphing module. Modules relying on this should test for
ImportError due to pygraphviz/graphviz not being installed."""

import re
import pygraphviz
from cylc.task_id import TaskID
from cycling.loader import get_point, get_point_relative, get_interval
from graphnode import graphnode

# TODO: Do we still need autoURL below?


class CGraphPlain(pygraphviz.AGraph):
Expand Down Expand Up @@ -55,7 +50,7 @@ def node_attr_by_taskname(self, node_string):
def style_edge(self, left, right):
pass

def style_node(self, node_string, autoURL, base=False):
def style_node(self, node_string, base=False):
node = self.get_node(node_string)
try:
name, point_string = TaskID.split(node_string)
Expand All @@ -71,30 +66,12 @@ def style_node(self, node_string, autoURL, base=False):
label += "\\n" + self.suite_polling_tasks[name][3]
label += "\\n" + point_string
node.attr['label'] = label
if autoURL:
if base:
# TODO - This is only called from cylc_add_edge in this
# base class ... should it also be called from add_node?
node.attr['URL'] = 'base:' + node_string
else:
node.attr['URL'] = node_string

def cylc_add_node(self, node_string, autoURL, **attr):
pygraphviz.AGraph.add_node(self, node_string, **attr)
self.style_node(node_string, autoURL)

def cylc_add_edge(self, left, right, autoURL, **attr):
if left is None and right is None:
pass
elif left is None:
self.cylc_add_node(right, autoURL)
elif right is None:
self.cylc_add_node(left, autoURL)
if base:
# TODO - This is only called from add_edges in this
# base class ... should it also be called from add_node?
node.attr['URL'] = 'base:' + node_string
else:
pygraphviz.AGraph.add_edge(self, left, right, **attr)
self.style_node(left, autoURL, base=True)
self.style_node(right, autoURL, base=True)
self.style_edge(left, right)
node.attr['URL'] = node_string

def cylc_remove_nodes_from(self, nodes):
"""Remove nodes, returning extra edge structure if possible.
Expand All @@ -107,7 +84,6 @@ def cylc_remove_nodes_from(self, nodes):
if not nodes:
return
existing_nodes = set(self.nodes())
add_edges = set()
remove_nodes = set(nodes)
remove_node_groups = {}
groups = {}
Expand Down Expand Up @@ -221,39 +197,36 @@ def cylc_remove_nodes_from(self, nodes):
new_edges.add((l_node, new_r_node, True, False, False))

self.remove_nodes_from(nodes)
self.add_edges(list(new_edges))
self.add_edges(sorted(new_edges))

def add_edges(self, edges, ignore_suicide=False):
edges.sort() # TODO: does sorting help layout stability?
"""Add edges and nodes connected by the edges."""
for edge in edges:
left, right, skipped, suicide, conditional = edge
if suicide and ignore_suicide:
if left is None and right is None or suicide and ignore_suicide:
continue
attrs = {}
if conditional:
if suicide:
attrs['style'] = 'dashed'
attrs['arrowhead'] = 'odot'
else:
attrs['style'] = 'solid'
attrs['arrowhead'] = 'onormal'
if left is None:
pygraphviz.AGraph.add_node(self, right)
self.style_node(right)
elif right is None:
pygraphviz.AGraph.add_node(self, left)
self.style_node(left)
else:
if suicide:
attrs['style'] = 'dashed'
attrs['arrowhead'] = 'dot'
attrs = {'penwidth': 2}
if skipped:
attrs.update({'style': 'dotted', 'arrowhead': 'oinv'})
elif conditional and suicide:
attrs.update({'style': 'dashed', 'arrowhead': 'odot'})
elif conditional:
attrs.update({'style': 'solid', 'arrowhead': 'onormal'})
elif suicide:
attrs.update({'style': 'dashed', 'arrowhead': 'dot'})
else:
attrs['style'] = 'solid'
attrs['arrowhead'] = 'normal'
if skipped:
# override
attrs['style'] = 'dotted'
attrs['arrowhead'] = 'oinv'

attrs['penwidth'] = 2

self.cylc_add_edge(
left, right, True, **attrs
)
attrs.update({'style': 'solid', 'arrowhead': 'normal'})
pygraphviz.AGraph.add_edge(self, left, right, **attrs)
self.style_node(left, base=True)
self.style_node(right, base=True)
self.style_edge(left, right)

def add_cycle_point_subgraphs(self, edges):
"""Draw nodes within cycle point groups (subgraphs)."""
Expand Down Expand Up @@ -314,11 +287,11 @@ def __init__(self, title, suite_polling_tasks={}, vizconfig={}):
# graph attributes
# - default node attributes
for item in vizconfig['default node attributes']:
attr, value = re.split('\s*=\s*', item)
attr, value = [val.strip() for val in item.split('=', 1)]
self.node_attr[attr] = value
# - default edge attributes
for item in vizconfig['default edge attributes']:
attr, value = re.split('\s*=\s*', item)
attr, value = [val.strip() for val in item.split('=', 1)]
self.edge_attr[attr] = value

# non-default node attributes by task name
Expand All @@ -341,12 +314,11 @@ def __init__(self, title, suite_polling_tasks={}, vizconfig={}):
self.task_attr[item] = []
self.task_attr[item].append(attr)

def style_node(self, node_string, autoURL, base=False):
super(self.__class__, self).style_node(
node_string, autoURL, False)
def style_node(self, node_string, base=False):
super(self.__class__, self).style_node(node_string, base)
node = self.get_node(node_string)
for item in self.node_attr_by_taskname(node_string):
attr, value = re.split('\s*=\s*', item)
attr, value = [val.strip() for val in item.split('=', 1)]
node.attr[attr] = value
if self.vizconfig['use node color for labels']:
node.attr['fontcolor'] = node.attr['color']
Expand All @@ -362,40 +334,3 @@ def style_edge(self, left, right):
if left_node.attr['style'] == 'filled':
edge.attr['color'] = left_node.attr['fillcolor']
edge.attr['penwidth'] = self.vizconfig['edge penwidth']


class edge(object):
def __init__(self, left, right, sequence, suicide=False,
conditional=False):
"""contains qualified node names, e.g. 'foo[T-6]:out1'"""
self.left = left
self.right = right
self.suicide = suicide
self.sequence = sequence
self.conditional = conditional

def get_right(self, inpoint, start_point):
inpoint_string = str(inpoint)
if self.right is None:
return None

# strip off special outputs
self.right = re.sub(':\w+', '', self.right)

return TaskID.get(self.right, inpoint_string)

def get_left(self, inpoint, start_point, base_interval):
# strip off special outputs
left = re.sub(':[\w-]+', '', self.left)

left_graphnode = graphnode(left, base_interval=base_interval)
if left_graphnode.offset_is_from_ict:
point = get_point_relative(left_graphnode.offset_string,
start_point)
elif left_graphnode.offset_string:
point = get_point_relative(left_graphnode.offset_string, inpoint)
else:
point = inpoint
name = left_graphnode.name

return TaskID.get(name, point)
132 changes: 37 additions & 95 deletions lib/cylc/graphnode.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,31 +20,22 @@
from cylc.task_id import TaskID
import re

# Cylc's ISO 8601 format.
NODE_ISO_RE = re.compile(
# Match a graph node string
NODE_REC = re.compile(
r"^" +
r"(" + TaskID.NAME_RE + r")" +
r"""(?:\[ # Begin optional [offset] syntax
(?!T[+-]) # Do not match a 'T-' or 'T+' (this is the old format)
([^\]]+) # Continue until next ']'
\] # Stop at next ']'
)? # End optional [offset] syntax]
(:[\w-]+|)$ # Optional type (e.g. :succeed)
""", re.X)

# Cylc's ISO 8601 initial cycle point based format
NODE_ISO_ICT_RE = re.compile(
r"^" +
r"(" + TaskID.NAME_RE + r")" +
r"""\[ # Begin square bracket syntax
\^ # Initial cycle point offset marker
([^\]]*) # Optional ^offset syntax
\] # End square bracket syntax
(:[\w-]+|)$ # Optional type (e.g. :succeed)
r"""(?:\[ # Begin optional [offset] syntax
(?!T[+-]) # Do not match a 'T-' or 'T+' (this is the old format)
(\^?) # Initial cycle point offset marker
([^\]]*) # Continue until next ']'
\] # Stop at next ']'
)? # End optional [offset] syntax]
(?::([\w-]+))? # Optional type (e.g. :succeed)
$
""", re.X)

# A potentially non-regular offset, such as foo[01T+P1W].
IRREGULAR_OFFSET_RE = re.compile(
IRREGULAR_OFFSET_REC = re.compile(
r"""^ # Start of string
( # Begin group
..+ # EITHER: Two or more characters
Expand All @@ -58,85 +49,36 @@


class GraphNodeError(Exception):
"""
Attributes:
message - what the problem is.
"""
def __init__(self, msg):
self.msg = msg

def __str__(self):
return repr(self.msg)

pass

class graphnode(object):
"""A node in the cycle suite.rc dependency graph."""

# Memory optimization - constrain possible attributes to this list.
__slots__ = ["offset_is_from_ict", "offset_is_irregular",
"is_absolute", "special_output", "output",
"name", "intercycle", "offset_string"]
def parse_graph_node(node):
"""Parse node string.
def __init__(self, node, base_interval=None):
node_in = node
# Get task name and properties from a graph node name.

# Graph node name is task name optionally followed by:
# - output label: foo:m1
# - intercycle dependence: foo[T-6]
# These may be combined: foo[T-6]:m1
# Task may be defined at initial cycle point: foo[^]
# or relative to initial cycle point: foo[^+P1D]

self.offset_is_from_ict = False
self.offset_is_irregular = False
self.is_absolute = False

m = re.match(NODE_ISO_ICT_RE, node)
if m:
# node looks like foo[^], foo[^-P4D], foo[^]:fail, etc.
self.is_absolute = True
name, offset_string, outp = m.groups()
self.offset_is_from_ict = True
sign = ""
prev_format = False
# Can't always set syntax here, as we use [^] for backwards comp.
Return (node name, offset_is_from_icp, offset, output).
Raise GraphNodeError on illegal syntax.
"""
name, offset_is_from_icp, offset, output = parse_graph_node_basic(node)

if offset_is_from_icp and not offset:
offset = str(get_interval_cls().get_null_offset())
offset_is_irregular = False
if offset:
if IRREGULAR_OFFSET_REC.search(offset):
offset = offset
offset_is_irregular = True
else:
m = re.match(NODE_ISO_RE, node)
if m:
# node looks like foo, foo:fail, foo[-PT6H], foo[-P4D]:fail...
name, offset_string, outp = m.groups()
sign = ""
prev_format = False
else:
raise GraphNodeError('Illegal graph node: ' + node)
offset = str(get_interval(offset).standardise())
return name, offset_is_from_icp, offset_is_irregular, offset, output

if outp:
self.special_output = True
self.output = outp[1:] # strip ':'
else:
self.special_output = False
self.output = None

if name:
self.name = name
else:
raise GraphNodeError('Illegal graph node: ' + node)
def parse_graph_node_basic(node):
"""Parse node string, but don't look at offset irregularity.
if self.offset_is_from_ict and not offset_string:
offset_string = str(get_interval_cls().get_null_offset())
if offset_string:
self.intercycle = True
if prev_format:
self.offset_string = str(
base_interval.get_inferred_child(offset_string))
else:
if IRREGULAR_OFFSET_RE.search(offset_string):
self.offset_string = offset_string
self.offset_is_irregular = True
else:
self.offset_string = str(
(get_interval(offset_string)).standardise())
else:
self.intercycle = False
self.offset_string = None
Return (node name, offset_is_from_icp, offset_raw, output).
Raise GraphNodeError on illegal syntax.
"""
match = NODE_REC.match(node)
if not match:
raise GraphNodeError('Illegal graph node: %s' % node)
return match.groups()
Loading

0 comments on commit 08c0c3a

Please sign in to comment.