diff --git a/src/sst/elements/memHierarchy/Makefile.am b/src/sst/elements/memHierarchy/Makefile.am index 16c328c566..83dc247e77 100644 --- a/src/sst/elements/memHierarchy/Makefile.am +++ b/src/sst/elements/memHierarchy/Makefile.am @@ -198,6 +198,7 @@ EXTRA_DIST = \ tests/testNoninclusive-2.py \ tests/testPrefetchParams.py \ tests/testThroughputThrottling.py \ + tests/testRangeCheck.py \ tests/testScratchCache-1.py \ tests/testScratchCache-2.py \ tests/testScratchCache-3.py \ @@ -253,6 +254,7 @@ EXTRA_DIST = \ tests/refFiles/test_memHA_Noninclusive_1.out \ tests/refFiles/test_memHA_Noninclusive_2.out \ tests/refFiles/test_memHA_PrefetchParams.out \ + tests/refFiles/test_memHA_RangeCheck.out \ tests/refFiles/test_memHA_ScratchCache_1.out \ tests/refFiles/test_memHA_ScratchCache_2.out \ tests/refFiles/test_memHA_ScratchCache_3.out \ diff --git a/src/sst/elements/memHierarchy/memNICBase.h b/src/sst/elements/memHierarchy/memNICBase.h index 6ab96ecbd1..a816c91436 100644 --- a/src/sst/elements/memHierarchy/memNICBase.h +++ b/src/sst/elements/memHierarchy/memNICBase.h @@ -41,7 +41,8 @@ class MemNICBase : public MemLinkBase { #define MEMNICBASE_ELI_PARAMS MEMLINKBASE_ELI_PARAMS, \ { "group", "(int) Group ID. See params 'sources' and 'destinations'. If not specified, the parent component will guess.", "1"},\ { "sources", "(comma-separated list of ints) List of group IDs that serve as sources for this component. If not specified, defaults to 'group - 1'.", "group-1"},\ - { "destinations", "(comma-separated list of ints) List of group IDs that serve as destinations for this component. If not specified, defaults to 'group + 1'.", "group+1"} + { "destinations", "(comma-separated list of ints) List of group IDs that serve as destinations for this component. If not specified, defaults to 'group + 1'.", "group+1"},\ + { "range_check", "(int) Enable initial check for overlapping memory ranges. 0=Disabled 1=Enabled", "1"} SST_ELI_REGISTER_SUBCOMPONENT_DERIVED_API(SST::MemHierarchy::MemNICBase, SST::MemHierarchy::MemLinkBase) @@ -402,30 +403,33 @@ class MemNICBase : public MemLinkBase { } } destEndpointInfo = newDests; - - int stopAfter = 20; // This is error checking, if it takes too long, stop - for (auto et = destEndpointInfo.begin(); et != destEndpointInfo.end(); et++) { - for (auto it = std::next(et,1); it != destEndpointInfo.end(); it++) { - if (it->name == et->name) continue; // Not a problem - if ((it->region).doesIntersect(et->region)) { - dbg.fatal(CALL_INFO, -1, "%s, Error: Found destinations on the network with overlapping address regions. Cannot generate routing table." - "\n Destination 1: %s\n Destination 2: %s\n", - getName().c_str(), it->toString().c_str(), et->toString().c_str()); + + // This algorithm can take an extremely long time for some memory configurations. + if (range_check > 0) { + int stopAfter = 20; // This is error checking, if it takes too long, stop + for (auto et = destEndpointInfo.begin(); et != destEndpointInfo.end(); et++) { + for (auto it = std::next(et,1); it != destEndpointInfo.end(); it++) { + if (it->name == et->name) continue; // Not a problem + if ((it->region).doesIntersect(et->region)) { + dbg.fatal(CALL_INFO, -1, "%s, Error: Found destinations on the network with overlapping address regions. Cannot generate routing table." + "\n Destination 1: %s\n Destination 2: %s\n", + getName().c_str(), it->toString().c_str(), et->toString().c_str()); + } + stopAfter--; + if (stopAfter == 0) { + stopAfter = -1; + break; + } } - stopAfter--; - if (stopAfter == 0) { + if (stopAfter <= 0) { stopAfter = -1; break; } } - if (stopAfter <= 0) { - stopAfter = -1; - break; - } + if (stopAfter == -1) + dbg.debug(_L2_, "%s, Notice: Too many regions to complete error check for overlapping destination regions. Checked first 20 pairs. To disable this check set range_check parameter to 0\n", + getName().c_str()); } - if (stopAfter == -1) - dbg.debug(_L2_, "%s, Notice: Too many regions to complete error check for overlapping destination regions. Checked first 20 pairs.\n", - getName().c_str()); for (auto it = networkAddressMap.begin(); it != networkAddressMap.end(); it++) { dbg.debug(_L10_, " Address: %s -> %" PRIu64 "\n", it->first.c_str(), it->second); @@ -536,6 +540,7 @@ class MemNICBase : public MemLinkBase { // Other parameters std::unordered_set sourceIDs, destIDs; // IDs which this endpoint cares about + uint32_t range_check = true; // Enable overlapping range check private: @@ -561,6 +566,10 @@ class MemNICBase : public MemLinkBase { params.find_array("destinations", dstArr); destIDs = std::unordered_set(dstArr.begin(), dstArr.end()); } + + // range_check current is off(0) or on(1) but is using a uint32_t to + // allow for future selection of different algorithms. + range_check=params.find("range_check", 1); std::stringstream sources, destinations; uint32_t id; diff --git a/src/sst/elements/memHierarchy/tests/refFiles/test_memHA_RangeCheck.out b/src/sst/elements/memHierarchy/tests/refFiles/test_memHA_RangeCheck.out new file mode 100644 index 0000000000..1726b155be --- /dev/null +++ b/src/sst/elements/memHierarchy/tests/refFiles/test_memHA_RangeCheck.out @@ -0,0 +1,65 @@ +memory0 start=0x0 end=0x3FE07FFFFFF size=0x1000000000 interleave_size=128MiB interleave_step=8192MiB +memory1 start=0x8000000 end=0x3FE0FFFFFFF size=0x1000000000 interleave_size=128MiB interleave_step=8192MiB +memory2 start=0x10000000 end=0x3FE17FFFFFF size=0x1000000000 interleave_size=128MiB interleave_step=8192MiB +memory3 start=0x18000000 end=0x3FE1FFFFFFF size=0x1000000000 interleave_size=128MiB interleave_step=8192MiB +memory4 start=0x20000000 end=0x3FE27FFFFFF size=0x1000000000 interleave_size=128MiB interleave_step=8192MiB +memory5 start=0x28000000 end=0x3FE2FFFFFFF size=0x1000000000 interleave_size=128MiB interleave_step=8192MiB +memory6 start=0x30000000 end=0x3FE37FFFFFF size=0x1000000000 interleave_size=128MiB interleave_step=8192MiB +memory7 start=0x38000000 end=0x3FE3FFFFFFF size=0x1000000000 interleave_size=128MiB interleave_step=8192MiB +memory8 start=0x40000000 end=0x3FE47FFFFFF size=0x1000000000 interleave_size=128MiB interleave_step=8192MiB +memory9 start=0x48000000 end=0x3FE4FFFFFFF size=0x1000000000 interleave_size=128MiB interleave_step=8192MiB +memory10 start=0x50000000 end=0x3FE57FFFFFF size=0x1000000000 interleave_size=128MiB interleave_step=8192MiB +memory11 start=0x58000000 end=0x3FE5FFFFFFF size=0x1000000000 interleave_size=128MiB interleave_step=8192MiB +memory12 start=0x60000000 end=0x3FE67FFFFFF size=0x1000000000 interleave_size=128MiB interleave_step=8192MiB +memory13 start=0x68000000 end=0x3FE6FFFFFFF size=0x1000000000 interleave_size=128MiB interleave_step=8192MiB +memory14 start=0x70000000 end=0x3FE77FFFFFF size=0x1000000000 interleave_size=128MiB interleave_step=8192MiB +memory15 start=0x78000000 end=0x3FE7FFFFFFF size=0x1000000000 interleave_size=128MiB interleave_step=8192MiB +memory16 start=0x80000000 end=0x3FE87FFFFFF size=0x1000000000 interleave_size=128MiB interleave_step=8192MiB +memory17 start=0x88000000 end=0x3FE8FFFFFFF size=0x1000000000 interleave_size=128MiB interleave_step=8192MiB +memory18 start=0x90000000 end=0x3FE97FFFFFF size=0x1000000000 interleave_size=128MiB interleave_step=8192MiB +memory19 start=0x98000000 end=0x3FE9FFFFFFF size=0x1000000000 interleave_size=128MiB interleave_step=8192MiB +memory20 start=0xA0000000 end=0x3FEA7FFFFFF size=0x1000000000 interleave_size=128MiB interleave_step=8192MiB +memory21 start=0xA8000000 end=0x3FEAFFFFFFF size=0x1000000000 interleave_size=128MiB interleave_step=8192MiB +memory22 start=0xB0000000 end=0x3FEB7FFFFFF size=0x1000000000 interleave_size=128MiB interleave_step=8192MiB +memory23 start=0xB8000000 end=0x3FEBFFFFFFF size=0x1000000000 interleave_size=128MiB interleave_step=8192MiB +memory24 start=0xC0000000 end=0x3FEC7FFFFFF size=0x1000000000 interleave_size=128MiB interleave_step=8192MiB +memory25 start=0xC8000000 end=0x3FECFFFFFFF size=0x1000000000 interleave_size=128MiB interleave_step=8192MiB +memory26 start=0xD0000000 end=0x3FED7FFFFFF size=0x1000000000 interleave_size=128MiB interleave_step=8192MiB +memory27 start=0xD8000000 end=0x3FEDFFFFFFF size=0x1000000000 interleave_size=128MiB interleave_step=8192MiB +memory28 start=0xE0000000 end=0x3FEE7FFFFFF size=0x1000000000 interleave_size=128MiB interleave_step=8192MiB +memory29 start=0xE8000000 end=0x3FEEFFFFFFF size=0x1000000000 interleave_size=128MiB interleave_step=8192MiB +memory30 start=0xF0000000 end=0x3FEF7FFFFFF size=0x1000000000 interleave_size=128MiB interleave_step=8192MiB +memory31 start=0xF8000000 end=0x3FEFFFFFFFF size=0x1000000000 interleave_size=128MiB interleave_step=8192MiB +memory32 start=0x100000000 end=0x3FF07FFFFFF size=0x1000000000 interleave_size=128MiB interleave_step=8192MiB +memory33 start=0x108000000 end=0x3FF0FFFFFFF size=0x1000000000 interleave_size=128MiB interleave_step=8192MiB +memory34 start=0x110000000 end=0x3FF17FFFFFF size=0x1000000000 interleave_size=128MiB interleave_step=8192MiB +memory35 start=0x118000000 end=0x3FF1FFFFFFF size=0x1000000000 interleave_size=128MiB interleave_step=8192MiB +memory36 start=0x120000000 end=0x3FF27FFFFFF size=0x1000000000 interleave_size=128MiB interleave_step=8192MiB +memory37 start=0x128000000 end=0x3FF2FFFFFFF size=0x1000000000 interleave_size=128MiB interleave_step=8192MiB +memory38 start=0x130000000 end=0x3FF37FFFFFF size=0x1000000000 interleave_size=128MiB interleave_step=8192MiB +memory39 start=0x138000000 end=0x3FF3FFFFFFF size=0x1000000000 interleave_size=128MiB interleave_step=8192MiB +memory40 start=0x140000000 end=0x3FF47FFFFFF size=0x1000000000 interleave_size=128MiB interleave_step=8192MiB +memory41 start=0x148000000 end=0x3FF4FFFFFFF size=0x1000000000 interleave_size=128MiB interleave_step=8192MiB +memory42 start=0x150000000 end=0x3FF57FFFFFF size=0x1000000000 interleave_size=128MiB interleave_step=8192MiB +memory43 start=0x158000000 end=0x3FF5FFFFFFF size=0x1000000000 interleave_size=128MiB interleave_step=8192MiB +memory44 start=0x160000000 end=0x3FF67FFFFFF size=0x1000000000 interleave_size=128MiB interleave_step=8192MiB +memory45 start=0x168000000 end=0x3FF6FFFFFFF size=0x1000000000 interleave_size=128MiB interleave_step=8192MiB +memory46 start=0x170000000 end=0x3FF77FFFFFF size=0x1000000000 interleave_size=128MiB interleave_step=8192MiB +memory47 start=0x178000000 end=0x3FF7FFFFFFF size=0x1000000000 interleave_size=128MiB interleave_step=8192MiB +memory48 start=0x180000000 end=0x3FF87FFFFFF size=0x1000000000 interleave_size=128MiB interleave_step=8192MiB +memory49 start=0x188000000 end=0x3FF8FFFFFFF size=0x1000000000 interleave_size=128MiB interleave_step=8192MiB +memory50 start=0x190000000 end=0x3FF97FFFFFF size=0x1000000000 interleave_size=128MiB interleave_step=8192MiB +memory51 start=0x198000000 end=0x3FF9FFFFFFF size=0x1000000000 interleave_size=128MiB interleave_step=8192MiB +memory52 start=0x1A0000000 end=0x3FFA7FFFFFF size=0x1000000000 interleave_size=128MiB interleave_step=8192MiB +memory53 start=0x1A8000000 end=0x3FFAFFFFFFF size=0x1000000000 interleave_size=128MiB interleave_step=8192MiB +memory54 start=0x1B0000000 end=0x3FFB7FFFFFF size=0x1000000000 interleave_size=128MiB interleave_step=8192MiB +memory55 start=0x1B8000000 end=0x3FFBFFFFFFF size=0x1000000000 interleave_size=128MiB interleave_step=8192MiB +memory56 start=0x1C0000000 end=0x3FFC7FFFFFF size=0x1000000000 interleave_size=128MiB interleave_step=8192MiB +memory57 start=0x1C8000000 end=0x3FFCFFFFFFF size=0x1000000000 interleave_size=128MiB interleave_step=8192MiB +memory58 start=0x1D0000000 end=0x3FFD7FFFFFF size=0x1000000000 interleave_size=128MiB interleave_step=8192MiB +memory59 start=0x1D8000000 end=0x3FFDFFFFFFF size=0x1000000000 interleave_size=128MiB interleave_step=8192MiB +memory60 start=0x1E0000000 end=0x3FFE7FFFFFF size=0x1000000000 interleave_size=128MiB interleave_step=8192MiB +memory61 start=0x1E8000000 end=0x3FFEFFFFFFF size=0x1000000000 interleave_size=128MiB interleave_step=8192MiB +memory62 start=0x1F0000000 end=0x3FFF7FFFFFF size=0x1000000000 interleave_size=128MiB interleave_step=8192MiB +memory63 start=0x1F8000000 end=0x3FFFFFFFFFF size=0x1000000000 interleave_size=128MiB interleave_step=8192MiB +Simulation is complete, simulated time: 175.616 us diff --git a/src/sst/elements/memHierarchy/tests/testRangeCheck.py b/src/sst/elements/memHierarchy/tests/testRangeCheck.py new file mode 100644 index 0000000000..8fc16ba6c3 --- /dev/null +++ b/src/sst/elements/memHierarchy/tests/testRangeCheck.py @@ -0,0 +1,218 @@ +import os +import sst +from mhlib import componentlist + +RANGE_CHECK = 0 # set to 1 to demonstrate long initialization issue +NODES = 64 +NUM_CPUS = 1 + +TOTAL_GB = 8*512 +mem_info = { + "gb" : TOTAL_GB, + "sz_str" : f"{TOTAL_GB}GiB", + "sz" : TOTAL_GB * 1024 * 1024 * 1024, + "last" : TOTAL_GB * 1024 * 1024 * 1024 - 1, + "sz_rev" : TOTAL_GB * 1024 * 1024 * 1024, +} +MEM_PER_NODE = int(mem_info['sz'] / NODES) +MiB_PER_NODE = f"{(1024*1024)}MiB" + +# NIC Groups +cpu_group = 1 # L2 cache level +node_group = 2 # Memory Controllers + +class CPU(): + def __init__(self, cpu_num): + self.comp = sst.Component(f"cpu{cpu_num}", "miranda.BaseCPU") + self.comp.addParams({ + "verbose" : 0, + "max_reqs_cycle" : 2, + "cache_line_size" : 64, + "maxmemreqpending" : 1, + "clock" : "2.0GHz", + "pagecount" : 1, + "pagesize" : mem_info['sz'] + }) + self.gen = self.comp.setSubComponent("generator", "miranda.RandomGenerator") + self.gen.addParams({ + "verbose" : 0, + "count" : 1024, + "max_address" : mem_info['sz'] - 64 + }) + # L1 Cache + self.l1 = sst.Component(f"l1_{cpu_num}", "memHierarchy.Cache") + self.l1.addParams({ + "node" : cpu_num, + "verbose" : 0, + "cache_frequency" : "2.0GHz", + "cache_size" : "16KiB", + "associativity" : "4", + "access_latency_cycles" : "5", + "L1" : "1", + "cache_line_size" : "64", + "coherence_protocol" : "MESI", + "cache_type" : "inclusive", + "force_noncacheable_reqs" : 1, + "replacement_policy" : "lru", + }) + # connect CPU to L1 + self.link_cpu_l1 = sst.Link(f"link_cpu_l1_{cpu_num}") + self.link_cpu_l1.connect( (self.comp, "cache_link", "1ns"), (self.l1, "high_network_0", "1ns") ) + + +class CPU_COMPLEX(): + def __init__(self,node): + + # CPU vector + self.cpu = [] + for i in range(NUM_CPUS): + self.cpu.append(CPU(i)) + + # Bus connecting CPU L1 to next level of the memory hierarchy + self.cpubus = sst.Component("cpubus", "memHierarchy.Bus") + self.cpubus.addParams({ + "bus_frequency" : "2.0GHz", + }) + + # Level2 Cache + self.l2cache = sst.Component("l2cache", "memHierarchy.Cache") + self.l2cache.addParams({ + "node" : node, + "verbose" : 0, + "cache_frequency" : "2.0GHz", + "cache_size" : "64 KiB", + "associativity" : "16", + "access_latency_cycles" : 32, + "cache_line_size" : "64", + "coherence_protocol" : "MESI", + "mshr_num_entries" : 8, + "mshr_latency_cycles" : 16 + }) + + # L2 Cache interface to cpu bus + self.cpulink = self.l2cache.setSubComponent("cpulink","memHierarchy.MemLink") + + # L2 Cache interface to nic + self.nic = self.l2cache.setSubComponent("memlink","memHierarchy.MemNIC") + self.nic.addParams({ + "group" : cpu_group, + "network_bw" : "8800GiB/s", + "network_input_buffer_size" : "1KiB", + "network_output_buffer_size" : "1KiB", + "range_check" : RANGE_CHECK, + }) + + # Connect cpubus to L2 + self.link_cpubus_l2 = sst.Link("link_cpubus_l2") + self.link_cpubus_l2.connect( + (self.cpubus, "low_network_0", "1ns"), + (self.cpulink, "port", "1ns") ) + + # Connect CPUs (L1) to cpubus + self.link_l1_cpubus = [] + for i in range(NUM_CPUS): + self.link_l1_cpubus.append(sst.Link(f"link_l1_{i}_cpubus")) + self.link_l1_cpubus[i].connect( + (self.cpu[i].l1, "low_network_0", "1ns"), + (self.cpubus, f"high_network_{i}", "1ns") + ) + +class NODE(): + def __init__(self,node): + node_mem_params = {} + if NODES==1: + memBot = 0; + memTop = mem_info['sz'] + node_mem_params = { + "node_id" : 0, + "backend.mem_size" : MiB_PER_NODE, + "addr_range_start" : f"{memBot}", + "addr_range_end" : f"{memTop-1}" + } + print(f"memory{node} start=0x{memBot:X} end=0x{memTop:X} size=0x{MEM_PER_NODE:X}") + else: + # every 128MB switch memories + istride = 0x8000000 + isize = "128MiB" + istep = f"{128*NODES}MiB" + memBot = (node%NODES) * istride + memTop = mem_info['sz'] - ((NODES-node-1)*istride) - 1 + print(f"memory{node} start=0x{memBot:X} end=0x{(memTop):X} size=0x{MEM_PER_NODE:X} interleave_size={isize} interleave_step={istep}") + + self.memctrl = sst.Component(f"memory{node}", "memHierarchy.MemController") + self.memctrl.addParams({ + "verbose" : 0, + "node_id" : node, + "backend.mem_size" : MiB_PER_NODE, + "addr_range_start" : f"{memBot}", + "addr_range_end" : f"{memTop}", + "interleave_size" : isize, + "interleave_step" : istep, + "clock" : "2.0GHz", + "request_width" : 64, + "backing" : "malloc", + }) + self.memory = self.memctrl.setSubComponent("backend", "memHierarchy.simpleMem") + self.memory.addParams({ + "verbose" : 0, + "access_time" : "100ns", + "mem_size" : mem_info['sz_str'] + }) + + # The memory controller NIC + self.memNIC = self.memctrl.setSubComponent("cpulink", "memHierarchy.MemNIC") + self.memNIC.addParams({ + "group" : node_group, + "sources" : [cpu_group, node_group], + "destinations" : [node_group], + "network_bw" : "8GiB/s", + "network_input_buffer_size" : "1KiB", + "network_output_buffer_size" : "1KiB", + "range_check" : RANGE_CHECK, + }) + +if __name__ == "__main__": + + cpucomplex = CPU_COMPLEX(0) + node = [] + for n in range(NODES): + node.append(NODE(n)) + + local_network = sst.Component("local_network", "merlin.hr_router") + local_network.addParams( { + "id" : 0, + "num_ports" : f"{1+NODES}", + "topology" : "merlin.singlerouter", + "link_bw" : "10TiB/s", + "xbar_bw" : "10TiB/s", + "flit_size" : "72B", + "input_latency" : "10ns", + "output_latency" : "10ns", + "input_buf_size" : "1KB", + "output_buf_size" : "1KB", + }); + local_network.setSubComponent("topology", "merlin.singlerouter") + + # connnect L2 NIC to Local Interconnect Network + link_cache_net_0 = sst.Link("link_cache_net_0") + link_cache_net_0.connect( + (cpucomplex.nic, "port", "1ns" ), + (local_network, "port0", "1ns" ) + ) + + # connect local_network to directory controllers + link_dir_net = [] + for n in range(NODES): + link_dir_net.append(sst.Link(f"link_dir_net_{n}")) + link_dir_net[n].connect( + (local_network, f"port{n+1}", "1ns"), + (node[n].memNIC, "port", "1ns") + ) + +# Enable statistics +# sst.setStatisticLoadLevel(7) +# sst.setStatisticOutput("sst.statOutputConsole") +# for a in componentlist: +# sst.enableAllStatisticsForComponentType(a) + +# EOF diff --git a/src/sst/elements/memHierarchy/tests/testsuite_default_memHierarchy_memHA.py b/src/sst/elements/memHierarchy/tests/testsuite_default_memHierarchy_memHA.py index 6f898d8ded..2da5ad3bba 100644 --- a/src/sst/elements/memHierarchy/tests/testsuite_default_memHierarchy_memHA.py +++ b/src/sst/elements/memHierarchy/tests/testsuite_default_memHierarchy_memHA.py @@ -187,6 +187,9 @@ def test_memHA_StdMem_mmio2(self): def test_memHA_StdMem_mmio3(self): self.memHA_Template("StdMem_mmio3") + + def test_memHA_RangeCheck(self): + self.memHA_Template("RangeCheck", testtimeout=60) ##### def memHA_Template(self, testcase,