Skip to content

Commit

Permalink
Changed merlin python topology builders so that they call pushNamePre…
Browse files Browse the repository at this point in the history
…fix() with the network name and added deferred build to System object so you can build without endpoints and get the links later to connect.
  • Loading branch information
feldergast committed Jan 19, 2024
1 parent 8841f88 commit 5260cc4
Show file tree
Hide file tree
Showing 16 changed files with 1,255 additions and 60 deletions.
2 changes: 2 additions & 0 deletions src/sst/elements/merlin/Makefile.am
Original file line number Diff line number Diff line change
Expand Up @@ -107,12 +107,14 @@ EXTRA_DIST = \
tests/dragon_128_platform_test.py \
tests/dragon_128_platform_test_cm.py \
tests/platform_file_dragon_128.py \
tests/dragon_128_test_deferred.py \
tests/polarfly_455_test.py \
tests/polarstar_504_test.py \
tests/refFiles/test_merlin_dragon_128_platform_test.out \
tests/refFiles/test_merlin_dragon_128_platform_test_cm.out \
tests/refFiles/test_merlin_dragon_128_test.out \
tests/refFiles/test_merlin_dragon_128_test_fl.out \
tests/refFiles/test_merlin_dragon_128_test_deferred.out \
tests/refFiles/test_merlin_dragon_72_test.out \
tests/refFiles/test_merlin_fattree_128_test.out \
tests/refFiles/test_merlin_fattree_256_test.out \
Expand Down
16 changes: 12 additions & 4 deletions src/sst/elements/merlin/interfaces/pymerlin-interface.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ def __init__(self):
self._subscribeToPlatformParamSet("network_interface")

# returns subcomp, port_name
def build(self,comp,slot,slot_num,job_id,job_size,logical_nid,use_nid_remap = False):
def build(self,comp,slot,slot_num,job_id,job_size,logical_nid,use_nid_remap = False, link=None):
if self._check_first_build():
set_name = "params_%s"%self._instance_name
sst.addGlobalParams(set_name, self._getGroupParams("params"))
Expand All @@ -39,7 +39,12 @@ def build(self,comp,slot,slot_num,job_id,job_size,logical_nid,use_nid_remap = Fa
self._applyStatisticsSettings(sub)
sub.addGlobalParamSet("params_%s"%self._instance_name)
sub.addParam("logical_nid",logical_nid)
return sub,"rtr_port"

if link:
sub.addLink(link, "rtr_port");
return True
else:
return sub,"rtr_port"


class ReorderLinkControl(NetworkInterface):
Expand All @@ -62,11 +67,14 @@ def _network_interface_callback(self, variable_name, value):
def setNetworkInterface(self,interface):
self.network_interface = interface

def build(self,comp,slot,slot_num,job_id,job_size,nid,use_nid_map = False):
def build(self,comp,slot,slot_num,job_id,job_size,nid,use_nid_map = False, link = None):
sub = comp.setSubComponent(slot,"merlin.reorderlinkcontrol",slot_num)
#self._applyStatisticsSettings(sub)
#sub.addParams(self._params)
return self.network_interface.build(sub,"networkIF",0,job_id,job_size,nid,use_nid_map)

return NetworkInterface._instanceNetworkInterfaceBackCompat(
self.network_interface,sub,"networkIF",0,job_id,job_size,nid,use_nid_map,link)


# Functions to enable statistics
def enableAllStatistics(self,stat_params,apply_to_children=False):
Expand Down
219 changes: 197 additions & 22 deletions src/sst/elements/merlin/pymerlin-base.py
Original file line number Diff line number Diff line change
Expand Up @@ -599,10 +599,9 @@ def findClass(base,name,_seen):
class Topology(TemplateBase):
def __init__(self):
TemplateBase.__init__(self)
self._declareClassVariables(["network_name","endPointLinks","built","router","_prefix"])
self._declareClassVariables(["network_name","endPointLinks","built","router"])

self._prefix = ""
self._lockVariable("_prefix")
self.network_name = ""
self._setCallbackOnWrite("network_name",self._network_name_callback)

self._setCallbackOnWrite("router",self._router_callback)
Expand All @@ -619,26 +618,30 @@ def __init__(self):

def _network_name_callback(self, variable_name, value):
self._lockVariable(variable_name)
if value:
self._unlockVariable("_prefix")
self._prefix = "%s."%value
self._lockVariable("_prefix")

def _router_callback(self, variable_name, value):
# Lock variable if it wasn't set to None
if value: self._lockVariable(variable_name)


def getName(self):
return "NoName"
# build() will automatically apply the network_name as a name
# prefix before calling _build_impl(). If you want to have the
# prefix automatically applied, override the _build_impl()
# function. If you want to control this yourself, overload the
# build() function.
def build(self, endpoint):
sst.pushNamePrefix(self.network_name)
self._build_impl(endpoint)
sst.popNamePrefix()
def _build_impl(self, endpoint):
pass
def getEndPointLinks(self):
pass
def getNumNodes(self):
pass
def getRouterNameForId(self,rtr_id):
return "%srtr%d"%(self._prefix,rtr_id)
return "rtr%d"%(rtr_id)
def findRouterById(self,rtr_id):
return sst.findComponentByName(self.getRouterNameForId(rtr_id))
def _instanceRouter(self,radix,rtr_id):
Expand All @@ -648,21 +651,137 @@ class NetworkInterface(TemplateBase):
def __init__(self):
TemplateBase.__init__(self)

# returns subcomp, port_name
def build(self,comp,slot,slot_num,link,job_id,job_size,logical_nid,use_nid_remap=False):

# NetworkInterface.build() has two possible implementations:

# OLD: Takes no link and returns an sst.SubComponent and port name

# NEW: Take a link and returns True if something was connected to
# it.

# This is the new method for instancing endpoints. You need to
# create the Link object first, then pass it in. If False is
# returned, then nothing was hooked up to the Link by build(). If
# True is returned, then something was connected and the parent
# needs to hook up its end of the Link. Note that the python Link
# class does not add anything to the ConfigGraph until a call to
# Link.connect or (Sub)Component.addLink is called, so it won't
# create strange artificts in the ConfigGraph if a Link is created
# by not used.

# For backward compatibility, use_nid_map and link should be
# optional arguments, in that order. Once we transition
# completely over to the new way, both will become required.

# To maintain backward compatibility with the old method, the new
# implemenation will need to check to see if link is None, if so,
# then return subcomp, port_name. Otherwise, return True or False
# depending on if the passed in link was connected or not.

# Objects that want to use the new Link version of the function
# will need to do so in a try block and catch the TypeError
# exception. If no exception is thrown the implementation
# supports the Link method, otherwise the object needs to fallback
# to the old method. Once the old version is no longer supported,
# the try/except block can be removed. This functionality is
# encapsulated in the _instanceNetworkInterfaceBackCompat()
def build(self,comp,slot,slot_num,job_id,job_size,logical_nid,use_nid_remap=False,link=None):
return None


# Convenience function to loada NetworkInterface in a backward
# compatibile manner. Adheres to the rules for backward
# compatilbility described in the comment to build()
@staticmethod
def _instanceNetworkInterfaceBackCompat(
netif,comp,slot,slot_num,job_id,job_size,logical_nid,use_nid_remap,link):
if not link:
# If link is None, then we are using the old method.
# This will return (subcomp, port_name)
return netif.build(comp,slot,slot_num,job_id,job_size,logical_nid,use_nid_remap)

# Using the new method, need to do a try/except to make sure
# the underlying networkif supports the link method
try:
built = netif.build(comp,slot,slot_num,job_id,job_size,logical_nid,use_nid_remap,link)
return built
except TypeError:
# Underlying networkif doesn't support the Link method.
# Need to call the old method and connect the link
# manually
subcomp, port_name = netif.build(comp,slot,slot_num,job_id,job_size,logical_nid,use_nid_remap)
if not subcomp:
# Nothing was hooked up, return False
return False
# Need to hookup the subcomponent to the link that was
# passed in
subcomp.addLink(link, port_name)
return True


# Base class that is used to build endpoints
class Buildable(TemplateBase):
def __init__(self):
TemplateBase.__init__(self)

# build() returns an sst.SubComponent and port name
def build(self, nID, extraKeys):
def name(self):
return "Buildable"

# build() has two possible implemenations.

# OLD: Takes no link and returns an sst.SubComponent and port name

# NEW: Take a link and returns True if something was connected to
# it.

# This is the new method for instancing endpoints. You need to
# create the Link object first, then pass it in. If False is
# returned, then nothing was hooked up to the Link by build(). If
# True is returned, then something was connected and the parent
# needs to hook up its end of the Link. Note that the python Link
# class does not add anything to the ConfigGraph until a call to
# Link.connect or (Sub)Component.addLink is called, so it won't
# create strange artificts in the ConfigGraph if a Link is created
# by not used.

# For backward compatibility, link should be an optional
# argumentr. Once we transition completely over to the new way,
# link will become required.

# To maintain backward compatibility with the old method, the new
# implemenation will need to check to see if link is None, if so,
# then return subcomp, port_name. Otherwise, return True or False
# depending on if the passed in link was connected or not.

# Objects that want to use the new Link version of the function
# will need to do so in a try block and catch the TypeError
# exception. If no exception is thrown the implementation
# supports the Link method, otherwise the object needs to fallback
# to the old method. Once the old version is no longer supported,
# the try/except block can be removed. This functionality is
# encapsulated in the _instanceNetworkInterfaceBackCompat()
def build(self, nID, extraKeys, link=None):
return None


# Convenience function to load a Buildable in a backward
# compatibile manner. Adheres to the rules for backward
# compatilbility described in the comment to build()
@staticmethod
def _instanceBuildableBackCompat(endpoint, comp, comp_port, nID, extraKeys, link):
try:
# Try the new Link-based method first
built = endpoint.build(nID, extraKeys, link)
if built:
comp.addLink(link, comp_port)
except TypeError as error:
# If it doesn't support the Link-based method,
# fall-back to old method
(nic, port_name) = endpoint.build(nID, extraKeys)
if nic:
link.connect( (nic, port_name), (comp, comp_port) )


# Classes that define endpoints
class Job(Buildable):
def __init__(self,job_id,size):
Expand All @@ -681,6 +800,10 @@ def getName(self):
def getSize(self):
return self.size

# This will create a linear nid map in the case where you build
# this using a deferred build
def createLinearNidMap(self):
self._nid_map = list(range(self.size))


class RouterTemplate(TemplateBase):
Expand Down Expand Up @@ -735,20 +858,57 @@ def instanceRouter(self, name, radix, rtr_id):
def getTopologySlotName(self):
return "topology"


class SystemEndpoint(Buildable):
def __init__(self,system):
Buildable.__init__(self)
self._declareClassVariables(["_system"])
self._system = system

# build() returns an sst.Component and port name
def build(self, nID, extraKeys):
# build() returns an sst.Component and port name if no link is
# passed in. Otherwise, it returns True if an endpoint was
# connected and False otherwise.
def build(self, nID, extraKeys, link=None):
# Just get the proper job object for this nID and call build
if self._system._endpoints[nID]:
return self._system._endpoints[nID].build(nID, extraKeys)
# There is an endpoint, instance it
if link:
# Need to try the new Link method
try:
return self._system._endpoints[nID].build(nID, extraKeys, link)
except TypeError:
# Link method not support, so will need to hook up
# the link manually
comp, port = self._system._endpoints[nID].build(nID, extraKeys)
comp.addLink(link, port)
return True
else:
# Using old method
return self._system._endpoints[nID].build(nID, extraKeys)
else:
return (None, None)
if link:
return False
else:
return (None, None)

# This is a endpoint class used to get all the links in a topology
# build. This info is then used to create the endpoints in the
# multiplane system
class DeferredEndpoint(Buildable):
def __init__(self):
Buildable.__init__(self)
self._declareClassVariables(["_link_map"])
# Dictionary that holds a map from network id to (extraKeys, Link)
self._link_map = {}


def build(self, nID, extraKeys, link):
self._link_map[nID] = link
return True

# Return the link associated with the specified endpoint ID. Will
# throw an exception if passed an invalid nID.
def getLink(self, nID):
return self._link_map[nID]


class EmptyJob(Job):
Expand All @@ -758,13 +918,13 @@ def __init__(self,job_id,size):
def getName(self):
return "Empty Job"

def build(self, nID, extraKeys):
def build(self, nID, extraKeys, link=None):
nic = sst.Component("empty_node_%d"%nID, "merlin.simple_patterns.empty")
id = self._nid_map[nID]

# Add the linkcontrol
networkif, port_name = self.network_interface.build(nic,"networkIF",0,self.job_id,self.size,id)
return (networkif, port_name)
return NetworkInterface._instanceNetworkInterfaceBackCompat(
self.network_interface,nic,"networkIF",0,self.job_id,self.size,id,False,link)


class System(TemplateBase):
Expand All @@ -779,12 +939,13 @@ def addAllocationFunction(cls,name,func):

def __init__(self):
TemplateBase.__init__(self)
self._declareClassVariables(["topology","allocation_block_size","_available_nodes","_num_nodes","_endpoints"])
self._declareClassVariables(["topology","allocation_block_size","_available_nodes","_num_nodes","_endpoints","_deferred_endpoint"])
self._setCallbackOnWrite("topology",self._topology_config_callback)
self._setCallbackOnWrite("allocation_block_size",self._block_size_config_callback)

self._subscribeToPlatformParamSet("system")
self.topology = PlatformDefinition.getPlatformDefinedClassInstance("topology")
self._deferred_endpoint = None
#self.allocation_block_size = 1

def setTopology(self, topo, allocation_block_size = 1):
Expand All @@ -802,6 +963,20 @@ def build(self):
system_ep = SystemEndpoint(self)
self.topology.build(system_ep)

# Will build the topology using the DeferredEndpoint, which means
# all allocations will be ignored. After this is called, use the
# getLinkForEndpoint(nID) call to get the link associated with
# that endpoint.
def deferred_build(self):
self._deferred_endpoint = DeferredEndpoint()
self.topology.build(self._deferred_endpoint)

def getLinkForEndpoint(self, nID):
if not self._deferred_endpoint:
# throw exception
pass
return self._deferred_endpoint.getLink(nID)

def _topology_config_callback(self, variable_name, value):
if not value: return
self._lockVariable(variable_name)
Expand Down
7 changes: 3 additions & 4 deletions src/sst/elements/merlin/pymerlin-endpoint.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ def __init__(self,job_id,size):
def getName(self):
return "TestJob"

def build(self, nID, extraKeys):
def build(self, nID, extraKeys, link = None):
nic = sst.Component("testNic_%d"%nID, "merlin.test_nic")
self._applyStatisticsSettings(nic)
nic.addParams(self._getGroupParams("main"))
Expand All @@ -38,9 +38,8 @@ def build(self, nID, extraKeys):
nic.addParam("id", id)

# Add the linkcontrol
networkif, port_name = self.network_interface.build(nic,"networkIF",0,self.job_id,self.size,id,True)

return (networkif,port_name)
return NetworkInterface._instanceNetworkInterfaceBackCompat(
self.network_interface,nic,"networkIF",0,self.job_id,self.size,id,True,link)


class OfferedLoadJob(Job):
Expand Down
Loading

0 comments on commit 5260cc4

Please sign in to comment.