diff --git a/src/sst/elements/ariel/api/arielapi.h b/src/sst/elements/ariel/api/arielapi.h index b6f4ea311b..e3704659bc 100644 --- a/src/sst/elements/ariel/api/arielapi.h +++ b/src/sst/elements/ariel/api/arielapi.h @@ -17,8 +17,6 @@ #ifndef _H_ARIEL_CLIENT_API #define _H_ARIEL_CLIENT_API -#include "arielapi.c" - void ariel_enable(); void ariel_fence(); diff --git a/src/sst/elements/ariel/arielcore.cc b/src/sst/elements/ariel/arielcore.cc index 8009194bd2..9caaba0ec9 100644 --- a/src/sst/elements/ariel/arielcore.cc +++ b/src/sst/elements/ariel/arielcore.cc @@ -62,6 +62,8 @@ ArielCore::ArielCore(ArielTunnel *tunnel, SimpleMem* coreToCacheLink, statWriteRequestSizes = own->registerStatistic( "write_request_sizes", subID ); statSplitReadRequests = own->registerStatistic( "split_read_requests", subID ); statSplitWriteRequests = own->registerStatistic( "split_write_requests", subID ); + statFlushRequests = own->registerStatistic( "flush_requests", subID); + statFenceRequests = own->registerStatistic( "fence_requests", subID); statNoopCount = own->registerStatistic( "no_ops", subID ); statInstructionCount = own->registerStatistic( "instruction_count", subID ); statCycles = own->registerStatistic( "cycles", subID ); @@ -213,6 +215,7 @@ void ArielCore::commitFlushEvent(const uint64_t address, pendingTransactions->insert( std::pair(req->id, req) ); cacheLink->sendRequest(req); + statFlushRequests->addData(1); } } @@ -257,8 +260,11 @@ void ArielCore::stall() { void ArielCore::fence(){ ARIEL_CORE_VERBOSE(4, output->verbose(CALL_INFO, 4, 0, "Core: %" PRIu32 " FENCE: Current pending transaction count: %" PRIu32 " (%" PRIu32 ")\n", coreID, pending_transaction_count, maxPendingTransactions)); - isFenced = true; - isStalled = true; + + if( pending_transaction_count > 0 ) { + isFenced = true; + isStalled = true; + } } void ArielCore::unfence() @@ -721,6 +727,7 @@ void ArielCore::handleFenceEvent(ArielFenceEvent *fEv) { fence(); // Possibility B: // commitFenceEvent(); + statFenceRequests->addData(1); } void ArielCore::printCoreStatistics() { diff --git a/src/sst/elements/ariel/arielcore.h b/src/sst/elements/ariel/arielcore.h index ef2ca9c8a1..c90e0d26c7 100644 --- a/src/sst/elements/ariel/arielcore.h +++ b/src/sst/elements/ariel/arielcore.h @@ -155,6 +155,8 @@ class ArielCore { Statistic* statReadRequests; Statistic* statWriteRequests; + Statistic* statFlushRequests; + Statistic* statFenceRequests; Statistic* statReadRequestSizes; Statistic* statWriteRequestSizes; Statistic* statSplitReadRequests; @@ -173,6 +175,7 @@ class ArielCore { Statistic* statFPSPScalarIns; Statistic* statFPSPOps; + uint64_t pending_transaction_count; }; diff --git a/src/sst/elements/ariel/arielcpu.h b/src/sst/elements/ariel/arielcpu.h index 7553e1b6a5..c53451c743 100644 --- a/src/sst/elements/ariel/arielcpu.h +++ b/src/sst/elements/ariel/arielcpu.h @@ -89,6 +89,8 @@ class ArielCPU : public SST::Component { { "split_read_requests", "Statistic counts number of split read requests (requests which come from multiple lines)", "requests", 1}, { "split_write_requests", "Statistic counts number of split write requests (requests which are split over multiple lines)", "requests", 1}, { "no_ops", "Statistic counts instructions which do not execute a memory operation", "instructions", 1}, + { "flush_requests", "Statistic counts instructions which perform flushes", "requests", 1}, + { "fence_requests", "Statistic counts instructions which perform fences", "requests", 1}, { "instruction_count", "Statistic for counting instructions", "instructions", 1 }, { "max_insts", "Maximum number of instructions reached by a thread", "instructions", 0}, { "fp_dp_ins", "Statistic for counting DP-floating point instructions", "instructions", 1 }, diff --git a/src/sst/elements/cassini/palaprefetch.h b/src/sst/elements/cassini/palaprefetch.h index cbf0daf163..2025b7a21c 100644 --- a/src/sst/elements/cassini/palaprefetch.h +++ b/src/sst/elements/cassini/palaprefetch.h @@ -65,7 +65,7 @@ class PalaPrefetcher : public SST::MemHierarchy::CacheListener "cassini", "PalaPrefetcher", SST_ELI_ELEMENT_VERSION(1,0,0), - "Prefetcher", + "Stride Prefetcher [Palacharla 1994]", "SST::Cassini::CacheListener" ) diff --git a/src/sst/elements/ember/mpi/motifs/emberfft3d.cc b/src/sst/elements/ember/mpi/motifs/emberfft3d.cc index bdc6499792..4265f8c915 100644 --- a/src/sst/elements/ember/mpi/motifs/emberfft3d.cc +++ b/src/sst/elements/ember/mpi/motifs/emberfft3d.cc @@ -34,26 +34,26 @@ EmberFFT3DGenerator::EmberFFT3DGenerator(SST::Component* owner, Params& params) m_backwardTotal(0), m_transCostPer(6) { - m_data.np0 = (uint32_t) params.find("arg.nx", 100); - m_data.np1 = (uint32_t) params.find("arg.ny", 100); - m_data.np2 = (uint32_t) params.find("arg.nz", 100); + m_data.np0 = params.find("arg.nx", 100); + m_data.np1 = params.find("arg.ny", 100); + m_data.np2 = params.find("arg.nz", 100); assert( m_data.np0 == m_data.np1 ); assert( m_data.np1 == m_data.np2 ); - m_data.nprow = (uint32_t) params.find("arg.npRow", 0); + m_data.nprow = params.find("arg.npRow", 0); assert( 0 < m_data.nprow ); - m_iterations = (uint32_t) params.find("arg.iterations", 1); + m_iterations = params.find("arg.iterations", 1); - m_nsPerElement = (float) params.find("arg.nsPerElement",1); + m_nsPerElement = params.find("arg.nsPerElement",1); - m_transCostPer[0] = (float) params.find("arg.fwd_fft1",1); - m_transCostPer[1] = (float) params.find("arg.fwd_fft2",1); - m_transCostPer[2] = (float) params.find("arg.fwd_fft3",1); - m_transCostPer[3] = (float) params.find("arg.bwd_fft1",1); - m_transCostPer[4] = (float) params.find("arg.bwd_fft2",1); - m_transCostPer[5] = (float) params.find("arg.bwd_fft3",1); + m_transCostPer[0] = params.find("arg.fwd_fft1",1); + m_transCostPer[1] = params.find("arg.fwd_fft2",1); + m_transCostPer[2] = params.find("arg.fwd_fft3",1); + m_transCostPer[3] = params.find("arg.bwd_fft1",1); + m_transCostPer[4] = params.find("arg.bwd_fft2",1); + m_transCostPer[5] = params.find("arg.bwd_fft3",1); configure(); } diff --git a/src/sst/elements/ember/mpi/motifs/emberhalo3d.cc b/src/sst/elements/ember/mpi/motifs/emberhalo3d.cc index 3665ed058f..d7f7f9c969 100644 --- a/src/sst/elements/ember/mpi/motifs/emberhalo3d.cc +++ b/src/sst/elements/ember/mpi/motifs/emberhalo3d.cc @@ -23,30 +23,30 @@ EmberHalo3DGenerator::EmberHalo3DGenerator(SST::Component* owner, Params& params EmberMessagePassingGenerator(owner, params, "Halo3D"), m_loopIndex(0) { - nx = (uint32_t) params.find("arg.nx", 100); - ny = (uint32_t) params.find("arg.ny", 100); - nz = (uint32_t) params.find("arg.nz", 100); + nx = params.find("arg.nx", 100); + ny = params.find("arg.ny", 100); + nz = params.find("arg.nz", 100); - peX = (uint32_t) params.find("arg.pex", 0); - peY = (uint32_t) params.find("arg.pey", 0); - peZ = (uint32_t) params.find("arg.pez", 0); + peX = params.find("arg.pex", 0); + peY = params.find("arg.pey", 0); + peZ = params.find("arg.pez", 0); - items_per_cell = (uint32_t) params.find("arg.fields_per_cell", 1); - performReduction = (params.find("arg.doreduce", 1) == 1); - sizeof_cell = (uint32_t) params.find("arg.datatype_width", 8); + items_per_cell = params.find("arg.fields_per_cell", 1); + performReduction = (params.find("arg.doreduce", 1) == 1); + sizeof_cell = params.find("arg.datatype_width", 8); - uint64_t pe_flops = (uint64_t) params.find("arg.peflops", 10000000000); - uint64_t flops_per_cell = (uint64_t) params.find("arg.flopspercell", 26); + uint64_t pe_flops = params.find("arg.peflops", 10000000000); + uint64_t flops_per_cell = params.find("arg.flopspercell", 26); const uint64_t total_grid_points = (uint64_t) (nx * ny * nz); const uint64_t total_flops = total_grid_points * ((uint64_t) items_per_cell) * ((uint64_t) flops_per_cell); // Converts FLOP/s into nano seconds of compute const double compute_seconds = ( (double) total_flops / ( (double) pe_flops / 1000000000.0 ) ); - nsCompute = (uint64_t) params.find("arg.computetime", (uint64_t) compute_seconds); - nsCopyTime = (uint32_t) params.find("arg.copytime", 0); + nsCompute = params.find("arg.computetime", (uint64_t) compute_seconds); + nsCopyTime = params.find("arg.copytime", 0); - iterations = (uint32_t) params.find("arg.iterations", 1); + iterations = params.find("arg.iterations", 1); x_down = -1; x_up = -1; @@ -55,7 +55,7 @@ EmberHalo3DGenerator::EmberHalo3DGenerator(SST::Component* owner, Params& params z_down = -1; z_up = -1; - jobId = (int) params.find("_jobId"); //NetworkSim + jobId = params.find("_jobId"); //NetworkSim configure(); } diff --git a/src/sst/elements/ember/shmem/motifs/emberShmemAtomicInc.h b/src/sst/elements/ember/shmem/motifs/emberShmemAtomicInc.h index 8938886562..adc87a44f2 100644 --- a/src/sst/elements/ember/shmem/motifs/emberShmemAtomicInc.h +++ b/src/sst/elements/ember/shmem/motifs/emberShmemAtomicInc.h @@ -34,12 +34,13 @@ namespace Ember { template < class TYPE, int VAL > class EmberShmemAtomicIncBaseGenerator : public EmberShmemGenerator { - enum { Add, Fadd, Putv } m_op; + enum { Add, Fadd, Putv, Getv } m_op; std::string m_opStr; public: EmberShmemAtomicIncBaseGenerator(SST::Component* owner, Params& params, std::string name) : EmberShmemGenerator(owner, params, name ), m_phase(-3), m_one(1) { + m_computeTime = params.find("arg.computeTime", 50 ); m_dataSize = params.find("arg.dataSize", 32*1024*1024 ); m_updates = params.find("arg.updates", 4096); m_iterations = params.find("arg.iterations", 1); @@ -51,6 +52,8 @@ class EmberShmemAtomicIncBaseGenerator : public EmberShmemGenerator { m_op = Fadd; } else if ( m_opStr.compare("putv") == 0 ) { m_op = Putv; + } else if ( m_opStr.compare("getv") == 0 ) { + m_op = Getv; } else { assert(0); } @@ -59,6 +62,7 @@ class EmberShmemAtomicIncBaseGenerator : public EmberShmemGenerator { m_backed = params.find("arg.backed", false); m_outLoop = params.find("arg.outLoop", 1); m_num_nodes = params.find("arg.numNodes", -1); + m_randAddr = params.find("arg.randAddr", 1); m_times.resize(m_outLoop); m_miscLib = static_cast(getLib("HadesMisc")); @@ -106,9 +110,16 @@ class EmberShmemAtomicIncBaseGenerator : public EmberShmemGenerator { enQ_getTime( evQ, &m_startTime ); } else if ( m_phase < m_iterations * m_updates ) { - int dest = calcDestPe(); - Hermes::MemAddr addr = m_dest.offset( genRand() % m_dataSize ); + int dest = calcDestPe(); + + Hermes::MemAddr addr; + if ( m_randAddr ) { + addr = m_dest.offset( genRand() % m_dataSize ); + } else { + addr = m_dest.offset( 0 ); + } + enQ_compute( evQ, m_computeTime ); switch ( m_op ) { case Fadd: @@ -119,6 +130,9 @@ class EmberShmemAtomicIncBaseGenerator : public EmberShmemGenerator { break; case Putv: enQ_putv( evQ, addr, &m_one, dest ); + break; + case Getv: + enQ_getv( evQ, &m_one, addr, dest ); break; } if ( m_phase + 1 == m_iterations * m_updates ) { @@ -136,6 +150,9 @@ class EmberShmemAtomicIncBaseGenerator : public EmberShmemGenerator { } if ( m_outLoop > 0 ) { + if ( m_backed ) { + bzero( &m_dest.at(0), sizeof(TYPE) * m_dataSize); + } m_phase = -1; } else { ++m_phase; @@ -163,11 +180,11 @@ class EmberShmemAtomicIncBaseGenerator : public EmberShmemGenerator { } } - printf("%s: GUpdates = %.9lf\n", getMotifName().c_str(), Gupdates ); - printf("%s: Min Time = %.9lf\n", getMotifName().c_str(), minTime ); - printf("%s: Max Time = %.9lf\n", getMotifName().c_str(), maxTime ); - printf("%s: Min GUP/s = %.9lf\n", getMotifName().c_str(), Gupdates / maxTime); - printf("%s: Max GUP/s = %.9lf\n", getMotifName().c_str(), Gupdates / minTime ); + printf("%s:GUpdates = %.9lf\n", getMotifName().c_str(), Gupdates ); + printf("%s:MinTime = %.9lf\n", getMotifName().c_str(), minTime ); + printf("%s:MaxTime = %.9lf\n", getMotifName().c_str(), maxTime ); + printf("%s:MinGUP/s = %.9lf\n", getMotifName().c_str(), Gupdates / maxTime); + printf("%s:MaxGUP/s = %.9lf\n", getMotifName().c_str(), Gupdates / minTime ); } @@ -227,6 +244,8 @@ class EmberShmemAtomicIncBaseGenerator : public EmberShmemGenerator { unsigned int m_randSeed; #endif + int m_computeTime; + bool m_randAddr; bool m_backed; bool m_printTotals; TYPE m_one; @@ -289,9 +308,14 @@ class EmberShmemAtomicIncGenerator : public EmberShmemAtomicIncBaseGener if( this->m_my_pe == this->m_num_pes - 1 ) { pe = this->genRand() % (this->m_num_pes - 1); } else { + int pecountHS = this->m_num_pes + this->m_hotMult; pe = this->genRand() % pecountHS; + while( pe == this->m_my_pe ) { + pe = this->genRand() % pecountHS; + } + // If we generate a PE higher than we have // clamp ourselves to the highest PE if( pe >= this->m_num_pes) { @@ -335,7 +359,7 @@ class EmberShmemNSAtomicIncIntGenerator : public EmberShmemAtomicIncGenerator { diff --git a/src/sst/elements/ember/shmem/motifs/emberShmemRing.h b/src/sst/elements/ember/shmem/motifs/emberShmemRing.h index cb6f38036d..91bb7a3fe1 100644 --- a/src/sst/elements/ember/shmem/motifs/emberShmemRing.h +++ b/src/sst/elements/ember/shmem/motifs/emberShmemRing.h @@ -27,19 +27,6 @@ namespace Ember { template< class TYPE > class EmberShmemRingGenerator : public EmberShmemGenerator { -public: - SST_ELI_REGISTER_SUBCOMPONENT( - EmberShmemRingGenerator, - "ember", - "ShmemRingMotif", - SST_ELI_ELEMENT_VERSION(1,0,0), - "SHMEM ring", - "SST::Ember::EmberGenerator" - ) - - SST_ELI_DOCUMENT_PARAMS( - ) - public: EmberShmemRingGenerator(SST::Component* owner, Params& params) : EmberShmemGenerator(owner, params, "ShmemRing" ), m_phase(-2) @@ -118,6 +105,25 @@ class EmberShmemRingGenerator : public EmberShmemGenerator { int m_my_pe; int m_num_pes; }; +class EmberShmemRingIntGenerator : public EmberShmemRingGenerator { +public: + SST_ELI_REGISTER_SUBCOMPONENT( + EmberShmemRingIntGenerator, + "ember", + "ShmemRingIntMotif", + SST_ELI_ELEMENT_VERSION(1,0,0), + "SHMEM ring2 int", + "SST::Ember::EmberGenerator" + + ) + + SST_ELI_DOCUMENT_PARAMS( + ) + +public: + EmberShmemRingIntGenerator( SST::Component* owner, Params& params ) : + EmberShmemRingGenerator(owner, params) { } +}; } } diff --git a/src/sst/elements/ember/shmem/motifs/emberShmemRing2.h b/src/sst/elements/ember/shmem/motifs/emberShmemRing2.h index 9f288e575b..4fd8adf63e 100644 --- a/src/sst/elements/ember/shmem/motifs/emberShmemRing2.h +++ b/src/sst/elements/ember/shmem/motifs/emberShmemRing2.h @@ -26,18 +26,6 @@ namespace Ember { template< class TYPE > class EmberShmemRing2Generator : public EmberShmemGenerator { -public: - SST_ELI_REGISTER_SUBCOMPONENT( - EmberShmemRing2Generator, - "ember", - "ShmemRing2Motif", - SST_ELI_ELEMENT_VERSION(1,0,0), - "SHMEM ring", - "SST::Ember::EmberGenerator" - ) - - SST_ELI_DOCUMENT_PARAMS( - ) public: EmberShmemRing2Generator(SST::Component* owner, Params& params) : @@ -119,6 +107,27 @@ class EmberShmemRing2Generator : public EmberShmemGenerator { int m_my_pe; int m_num_pes; }; + +class EmberShmemRing2IntGenerator : public EmberShmemRing2Generator { +public: + SST_ELI_REGISTER_SUBCOMPONENT( + EmberShmemRing2IntGenerator, + "ember", + "ShmemRing2IntMotif", + SST_ELI_ELEMENT_VERSION(1,0,0), + "SHMEM ring2 int", + "SST::Ember::EmberGenerator" + + ) + + SST_ELI_DOCUMENT_PARAMS( + ) + +public: + EmberShmemRing2IntGenerator( SST::Component* owner, Params& params ) : + EmberShmemRing2Generator(owner, params) { } +}; + } } diff --git a/src/sst/elements/ember/test/emberLoad.py b/src/sst/elements/ember/test/emberLoad.py index 1a26e83131..609c9c89a4 100644 --- a/src/sst/elements/ember/test/emberLoad.py +++ b/src/sst/elements/ember/test/emberLoad.py @@ -1,5 +1,6 @@ import sys,getopt +import pprint import sst from sst.merlin import * @@ -34,6 +35,7 @@ netPktSize = '' netTopo = '' netShape = '' +netHostsPerRtr = 1 netInspect = '' rtrArb = '' @@ -54,7 +56,8 @@ 'network': [], 'nic': [], 'ember': [], -'hermes': [] +'hermes': [], +'merlin': [], } motifAPI='HadesMP' @@ -68,7 +71,7 @@ try: - opts, args = getopt.getopt(sys.argv[1:], "", ["topo=", "shape=", + opts, args = getopt.getopt(sys.argv[1:], "", ["topo=", "shape=","hostsPerRtr=", "simConfig=","platParams=",",debug=","platform=","numNodes=", "numCores=","loadFile=","cmdLine=","printStats=","randomPlacement=", "emberVerbose=","netBW=","netPktSize=","netFlitSize=", @@ -84,6 +87,8 @@ for o, a in opts: if o in ("--shape"): netShape = a + elif o in ("--hostsPerRtr"): + netHostsPerRtr = int(a) elif o in ("--platform"): platform = a elif o in ("--numCores"): @@ -143,7 +148,7 @@ elif o in ("--platParams"): platParams = a elif o in ("--param"): - key,value = a.split(":") + key,value = a.split(":",1) params[key] += [value] elif o in ("--useSimpleMemoryModel"): useSimpleMemoryModel=True @@ -225,7 +230,7 @@ if "torus" == netTopo: - topoInfo = TorusInfo(netShape) + topoInfo = TorusInfo(netShape, netHostsPerRtr) topo = topoTorus() elif "fattree" == netTopo: @@ -233,15 +238,20 @@ topoInfo = FattreeInfo(netShape) topo = topoFatTree() -elif "dragonfly" == netTopo: +elif "dragonfly" == netTopo or "dragonfly2" == netTopo: topoInfo = DragonFlyInfo(netShape) topo = topoDragonFly() -elif "dragonfly2" == netTopo: +elif "dragonflyLegacy" == netTopo: - topoInfo = DragonFly2Info(netShape) - topo = topoDragonFly2() + topoInfo = DragonFlyLegacyInfo(netShape) + topo = topoDragonFlyLegacy() + +elif "hyperx" == netTopo: + + topoInfo = HyperXInfo(netShape, netHostsPerRtr) + topo = topoHyperX() else: sys.exit("how did we get here") @@ -332,6 +342,8 @@ hermesParams['hermesParams.nicParams.verboseLevel'] = debug hermesParams['hermesParams.functionSM.verboseLevel'] = debug hermesParams['hermesParams.ctrlMsg.verboseLevel'] = debug +hermesParams['hermesParams.ctrlMsg.pqs.verboseLevel'] = debug +hermesParams['hermesParams.ctrlMsg.pqs.verboseMask'] = 1 emberParams['verbose'] = emberVerbose hermesParams['hermesParams.numNodes'] = topoInfo.getNumNodes() @@ -372,6 +384,15 @@ print "set hermesParams {}={}".format( key, value ) hermesParams[key] = value +for a in params['merlin']: + key, value = a.split("=") + if key in sst.merlin._params: + print "override hermesParams {}={} with {}".format( key, sst.merlin._params[key], value ) + else: + print "set merlin {}={}".format( key, value ) + sst.merlin._params[key] = value + + nicParams["packetSize"] = networkParams['packetSize'] nicParams["link_bw"] = networkParams['link_bw'] sst.merlin._params["link_lat"] = networkParams['link_lat'] @@ -393,12 +414,17 @@ print "EMBER: network: BW={0} pktSize={1} flitSize={2}".format( networkParams['link_bw'], networkParams['packetSize'], networkParams['flitSize']) -sst.merlin._params.update( topoInfo.getNetworkParams() ) +if len(params['merlin']) == 0: + sst.merlin._params.update( topoInfo.getNetworkParams() ) epParams = {} epParams.update(emberParams) epParams.update(hermesParams) +#pprint.pprint( networkParams, width=1) +#pprint.pprint( nicParams, width=1) +#pprint.pprint( sst.merlin._params, width=1) + loadInfo = LoadInfo( nicParams, epParams, numNodes, numCores, topoInfo.getNumNodes(), model ) diff --git a/src/sst/elements/ember/test/loadInfo.py b/src/sst/elements/ember/test/loadInfo.py index e58d4b57fa..ba6fed6248 100644 --- a/src/sst/elements/ember/test/loadInfo.py +++ b/src/sst/elements/ember/test/loadInfo.py @@ -228,7 +228,7 @@ def foo( self, jobId, x, statNodes, detailedModel = None ): numNodes = calcMaxNode( nidList ) if numNodes > self.numNics: sys.exit('Error: Requested max nodes ' + str(numNodes) +\ - ' is greater than available nodes ' + str(self.numNodes) ) + ' is greater than available nodes ' + str(self.numNics) ) params.update( self.epParams ) ep = EmberEP( jobId, params, self.nicParams, self.numCores, ranksPerNode, statNodes, nidList, motifLogNodes, detailedModel ) # added motifLogNodes here diff --git a/src/sst/elements/ember/test/networkConfig.py b/src/sst/elements/ember/test/networkConfig.py index 5cfa0217ab..feb46fa9bd 100644 --- a/src/sst/elements/ember/test/networkConfig.py +++ b/src/sst/elements/ember/test/networkConfig.py @@ -7,19 +7,10 @@ def getNetworkParams(self): pass class TorusInfo(TopoInfo): - def __init__( self, config ): + def __init__( self, shape, local_ports ): - args = config.split(':') - shape = args[0] width = 1 - local_ports = 1 - if len( args ) > 1: - local_ports = int( args[1] ) - - if len( args ) > 2: - width = int( args[2] ) - self.params = {} self.params["num_dims"] = self.calcNumDim(shape) self.params["torus:shape"] = shape @@ -52,6 +43,43 @@ def calcWidth(self,shape,width): count += 1 return retval +class HyperXInfo(TopoInfo): + def __init__( self, shape, local_ports ): + + width = 1 + + self.params = {} + self.params["num_dims"] = self.calcNumDim(shape) + self.params["hyperx:shape"] = shape + self.params["hyperx:width"] = self.calcWidth(shape,width) + self.params["hyperx:local_ports"] = local_ports + self.numNodes = self.calcNumNodes( shape ) * local_ports + + def getNetworkParams(self): + return self.params + + def getNumNodes(self): + return self.numNodes + + def calcNumDim(self,shape): + return len( shape.split( 'x' ) ) + + def calcNumNodes(self,shape): + tmp = shape.split( 'x' ) + num = 1 + for d in tmp: + num = num * int(d) + return num + + def calcWidth(self,shape,width): + tmp = len( shape.split( 'x' ) ) - 1 + retval = str(width) + count = 0 + while ( count < tmp ): + retval += "x" + str(width) + count += 1 + return retval + class FattreeInfo(TopoInfo): def __init__( self, shape ): @@ -75,7 +103,7 @@ def calcNumNodes(self, shape): return total_hosts -class DragonFlyInfo(TopoInfo): +class DragonFlyLegacyInfo(TopoInfo): def __init__( self, shape ): radix, lcl, glbl, nRtrs = shape.split(':') self.params = {} @@ -97,7 +125,7 @@ def getNetworkParams(self): def getNumNodes(self): return self.numNodes -class DragonFly2Info(TopoInfo): +class DragonFlyInfo(TopoInfo): def __init__( self, shape ): lcl, nRtrs, glbl, nGrp = shape.split(':') self.params = {} @@ -110,7 +138,6 @@ def __init__( self, shape ): self.params["dragonfly:algorithm"] = "minimal" self.numNodes = int(nGrp) * hostsPerGroup - print self.numNodes def getNetworkParams(self): return self.params diff --git a/src/sst/elements/ember/tests/refFiles/ESshmem_cumulative.out b/src/sst/elements/ember/tests/refFiles/ESshmem_cumulative.out index e3fd8a15a8..1a39da1541 100644 --- a/src/sst/elements/ember/tests/refFiles/ESshmem_cumulative.out +++ b/src/sst/elements/ember/tests/refFiles/ESshmem_cumulative.out @@ -1,126 +1,126 @@ -20e886bfb7e3fea793bf0029e431d1a7 Simulation is complete, simulated time: 240.273 us -c4a74c672da38bc35a2cb3480313498d Simulation is complete, simulated time: 613.697 us -5f6021f216b371bf4d4834218ce3f1bb Simulation is complete, simulated time: 186.149 us -6eec970b3269d9eb2be6c4f09a9d9080 Simulation is complete, simulated time: 413.141 us -e8cdf1a1a4aa5a7749048c7dcf67ca6a Simulation is complete, simulated time: 9.074 us -f4b15be14bba163dcb2e4b642a8e3c6b Simulation is complete, simulated time: 46.368 us -4235171596788b842de423b0460c86bf Simulation is complete, simulated time: 56.91 us -50014f8db405b6e51a00abfe86d18a1d Simulation is complete, simulated time: 9.074 us -1ac054980eba5eb13f6207cbeb051ec1 Simulation is complete, simulated time: 46.968 us -99228b3dadbc45188e37e3b75ca6ce2c Simulation is complete, simulated time: 58.906 us -56897b7251e2e665f33199f82e1e266f Simulation is complete, simulated time: 10.85 us -23059a7933b94bb77d0d8cfa22e0aa4e Simulation is complete, simulated time: 9.074 us -063af3e51e2f826e19f23ff10b4c5f18 Simulation is complete, simulated time: 46.368 us -956cd31d0521f9dcaba85af469a68d12 Simulation is complete, simulated time: 3.33517 ms -d1beb5787ff77daf730c9daae83ede51 Simulation is complete, simulated time: 3.39472 ms -05ae92ecd0fa12d89b96b8fbfffbe178 Simulation is complete, simulated time: 3.39472 ms -b13fd8d32deb6af396771612e8dcbc6a Simulation is complete, simulated time: 341.704 us -bfcbcc3439246ee8bb3b9e1416b023b5 Simulation is complete, simulated time: 9.074 us -96ef09125604ac671e202d0381740c25 Simulation is complete, simulated time: 46.968 us -63c2acc33f4874da276c95f2347fac8d Simulation is complete, simulated time: 3.39472 ms -523b7c722410ba2b98dcc36846ff5314 Simulation is complete, simulated time: 3.51373 ms -1d9cb27f2056df62e8aedc95fdf7796c Simulation is complete, simulated time: 3.51373 ms -ab1add1f35f90727eea910bf7e0f81b9 Simulation is complete, simulated time: 373.42 us -2de75935ea9979e9394f939a035740ce Simulation is complete, simulated time: 7.683 us -0deb75644ef4c5b55526ea13acac9ddf Simulation is complete, simulated time: 17.869 us -d6c2fbe70743ecc9031e38e09cafaa6e Simulation is complete, simulated time: 7.679 us -caeb6c8c3f6fd743b585740f6e3d9061 Simulation is complete, simulated time: 17.853 us -7fca3bd3526e00cdad7d54637acf2f39 Simulation is complete, simulated time: 11.774 us -392adc0ace3695154d667a5058987354 Simulation is complete, simulated time: 109.216 us -11fa136c6e5730f03f97c2df2ac025ce Simulation is complete, simulated time: 110.244 us -1f0d8f7ed71092af5eb66728d370ae38 Simulation is complete, simulated time: 11.774 us -b357e0efaecab3d206b4f8d3cb2c7dd4 Simulation is complete, simulated time: 109.216 us -bf02d08d8b5b22aa1099cd3467706e85 Simulation is complete, simulated time: 110.896 us -90c12d7662800886091a47fbdfde7163 Simulation is complete, simulated time: 11.499 us -3baeef25ec6a74a92ea77394b2352c73 Simulation is complete, simulated time: 108.211 us -069e097dc752bdd149b0030377dcd9cc Simulation is complete, simulated time: 108.977 us -7c2b015c842bda7609bdd3cd3c39d0ba Simulation is complete, simulated time: 11.499 us -f55df8a1f4a9fcd6e2b04035137cf354 Simulation is complete, simulated time: 108.211 us -5dd729022a960da0a9ac163d18f88184 Simulation is complete, simulated time: 109.747 us -c7baae57caeb4b68703225f0a81f71a8 Simulation is complete, simulated time: 6.999 us -15d334d388a4ce0b4c6ad5a744291707 Simulation is complete, simulated time: 6.999 us -e3839497ee5efcbc0e2c02f5aff5e9fa Simulation is complete, simulated time: 6.999 us -ce495bcfcdec30c37bf956ac9237b5bb Simulation is complete, simulated time: 6.999 us -10b8a688bf73c255be048643c466d53c Simulation is complete, simulated time: 6.999 us -220a2bbb0022e7a7edfe41f7381109cc Simulation is complete, simulated time: 6.999 us -24aa3f67dc3169fdeb12b24133540c40 Simulation is complete, simulated time: 6.999 us -65457bdb91c90a0d5994093f1a032f42 Simulation is complete, simulated time: 6.999 us -f0cad1d67e4c255cb2d1f426b6806371 Simulation is complete, simulated time: 6.999 us -8b99377ee1e99b887a79e8dd95ba0e14 Simulation is complete, simulated time: 7.157 us -762328d5d6adafa312ab9c649cfd1aea Simulation is complete, simulated time: 9.501 us -8a1e3d64ded1d806a170e903521591f4 Simulation is complete, simulated time: 7.465 us -5e38fd7d5d594679d887fee9728e4435 Simulation is complete, simulated time: 5.045 us -641335334c8a3f637d657906f3a7c10e Simulation is complete, simulated time: 5.045 us -166e5a405187a3cd2672bd9f9e230636 Simulation is complete, simulated time: 5.045 us -bbf096f5c13274e454d1071a21be1ff0 Simulation is complete, simulated time: 5.045 us -03b39041a6953d56d4ab3ed44d3d808f Simulation is complete, simulated time: 5.467 us -43349e16f8d84e6d6be356c0e295dcc2 Simulation is complete, simulated time: 5.467 us -1d4b31a6677748ed649a0dad981f44f1 Simulation is complete, simulated time: 5.813 us -5889dd20792ae804f6621a40ce618990 Simulation is complete, simulated time: 5.467 us -dd79e49a68939928072188bba6f57249 Simulation is complete, simulated time: 6.471 us -fed6c149bcf87264b06fe224e59e1e9d Simulation is complete, simulated time: 6.471 us -ca0d18bb4120ac55076dc8e06162eace Simulation is complete, simulated time: 6.471 us -67512825a65816363335bd86cd23044f Simulation is complete, simulated time: 6.471 us -cf6baf07c1dc268f5a18c56cea05a7c0 Simulation is complete, simulated time: 12.116 us -e800535bfb11bef3874730b4031b2f5a Simulation is complete, simulated time: 12.116 us -7dc7594c195ea2175045f2cde4305e03 Simulation is complete, simulated time: 12.116 us -447d697538f54ab583eaf945c6fcec67 Simulation is complete, simulated time: 12.116 us -380531d42f55da530db1700f3ca56e53 Simulation is complete, simulated time: 12.116 us -5999458e5744c2ef1c5e093052167909 Simulation is complete, simulated time: 12.116 us -dce10c52a0044b4bbe8b8156a54f9abb Simulation is complete, simulated time: 12.116 us -0ec0a744056ca006c3c32d1713e37bc4 Simulation is complete, simulated time: 12.122 us -79bb3f25e52aa82a26a5c8f0f2220f78 Simulation is complete, simulated time: 12.122 us -9568ccf105c1194097791b215b5f9ce3 Simulation is complete, simulated time: 12.122 us -db2fd0c64381e8be1bb1c5833c5565a9 Simulation is complete, simulated time: 12.122 us -d9aa42f978cd1b02745830c0278eb1ac Simulation is complete, simulated time: 12.122 us -bfcc8f71df02f5fcff4dbf5884502821 Simulation is complete, simulated time: 12.122 us -4a11045ae8800841eb55287da0c1a526 Simulation is complete, simulated time: 12.122 us -5ae0e0bbb821c1d28bb75cf85016a3aa Simulation is complete, simulated time: 17.485 us -e016c98b90ae3b39114dd9b6a7db202a Simulation is complete, simulated time: 17.485 us -03f2b1fed5dcd9847a701237c754249a Simulation is complete, simulated time: 17.485 us -b5107315bd48cb43a7823762ebb9531b Simulation is complete, simulated time: 17.485 us -ccf9f70aca6488302d4eed5048e105b1 Simulation is complete, simulated time: 15.13 us -5de6e46cda20c5f0703812b2ad16ce00 Simulation is complete, simulated time: 15.13 us -313eaa178de1b726836e6607cf248d41 Simulation is complete, simulated time: 15.13 us -719e71be0c765f6ec685a849231a7e28 Simulation is complete, simulated time: 15.13 us -825f62f6f600ba298326bdf69b5f52a4 Simulation is complete, simulated time: 6.999 us -4235c525fdbab9153eb7a4056f9aab07 Simulation is complete, simulated time: 6.999 us -31e15481b0a69e931f817a611bd186b4 Simulation is complete, simulated time: 4.065 us -864398304944133f57f3301b923e64c2 Simulation is complete, simulated time: 4.065 us -125a174d8ec68607a400ccd08a106912 Simulation is complete, simulated time: 6.165 us -83da2374016bc103d7e1c476381780ab Simulation is complete, simulated time: 6.165 us -1adfd7ab5d105f65f194978755fa4008 Simulation is complete, simulated time: 6.165 us -94ce7bc42c41226c3a3620a7196c34b6 Simulation is complete, simulated time: 6.165 us -d976a31edc85f5f668c3e8c930a7444f Simulation is complete, simulated time: 6.165 us -61c31e462ae319be2afe7917c2cf8261 Simulation is complete, simulated time: 6.165 us -1876d545867933035e3edae51ebb6922 Simulation is complete, simulated time: 6.165 us -908a279f134958d01d357c1816c62ee7 Simulation is complete, simulated time: 6.165 us -4a7ea790ab15c1d2c9298849fdc282cb Simulation is complete, simulated time: 6.165 us -99a9a8ef68701329f999b0b0850e1400 Simulation is complete, simulated time: 6.165 us -4e01cc0a88838054e775aa7a4141af13 Simulation is complete, simulated time: 6.165 us -56133053efeae3d7ef74bc7aa66a0d06 Simulation is complete, simulated time: 6.165 us -7d42508b20ce6b4391751a06df3df00a Simulation is complete, simulated time: 1.825 us -1b34b05061c5058070a612445bc899d8 Simulation is complete, simulated time: 1.841 us -67b4ce7286191bee72842b4950479a2c Simulation is complete, simulated time: 1.841 us -a674ea20bd174b74dedee21fd43e16bc Simulation is complete, simulated time: 1.414 us -cd86b5412ea0b0890cd0f92ebfcd83a6 Simulation is complete, simulated time: 1.991 us -bc36733b69d250348d7e63a1b193444f Simulation is complete, simulated time: 1.991 us -3c0038c4be9eda97bc85469641f57aaf Simulation is complete, simulated time: 1.991 us -1a26ade0ee4de6663481765f67529f2d Simulation is complete, simulated time: 1.563 us -4ec46ab2f1d6e794b61664833c257f37 Simulation is complete, simulated time: 1.99 us -bed45b3470382f1633ba6b99cf77d9be Simulation is complete, simulated time: 43.497 us -57a9aee6e6521cac86fdc4507a903307 Simulation is complete, simulated time: 629.553 us -63c7271b0edc55185ef52bb6f848c547 Simulation is complete, simulated time: 36.002 us -4b2fd1f1d9a59ff64aaa4510649525eb Simulation is complete, simulated time: 441.323 us -d6dd2c35563417f4fb945ad3362ecb9c Simulation is complete, simulated time: 2.447 us -afa5982dcb8f084e23018629634732e0 Simulation is complete, simulated time: 24.312 us -3c1a798313220a6f398fa72fb7663254 Simulation is complete, simulated time: 2.324 us -c1ca0f5826a8e5131964e070672cab0f Simulation is complete, simulated time: 180.82 us -cec2296f0ac1370617d72b0db25e346b Simulation is complete, simulated time: 11.798 us -5a845c05ec78a9ac364731f4a22f30fb Simulation is complete, simulated time: 8.87541 ms -80279cde7618ffd8b5073c4e05da2dee Simulation is complete, simulated time: 2.06 us -8609806b1ae1fe8e47f90ca7e608785f Simulation is complete, simulated time: 25.937 us -07c8fcaf8265aae176cdd48c226f5719 Simulation is complete, simulated time: 2.536 us -f45cfd58a09cb512bec42a1bb30ca347 Simulation is complete, simulated time: 127.13 us -56d186afacb520f350c17bb40e155a99 Simulation is complete, simulated time: 2.729 us -23b2e8caf4b9ebd8087bbc701937f596 Simulation is complete, simulated time: 53.821 us +20e886bfb7e3fea793bf0029e431d1a7 Simulation is complete, simulated time: 234.54 us +c4a74c672da38bc35a2cb3480313498d Simulation is complete, simulated time: 619.538 us +5f6021f216b371bf4d4834218ce3f1bb Simulation is complete, simulated time: 185.008 us +6eec970b3269d9eb2be6c4f09a9d9080 Simulation is complete, simulated time: 431.626 us +e8cdf1a1a4aa5a7749048c7dcf67ca6a Simulation is complete, simulated time: 8.961 us +f4b15be14bba163dcb2e4b642a8e3c6b Simulation is complete, simulated time: 46.929 us +4235171596788b842de423b0460c86bf Simulation is complete, simulated time: 57.607 us +50014f8db405b6e51a00abfe86d18a1d Simulation is complete, simulated time: 9.003 us +1ac054980eba5eb13f6207cbeb051ec1 Simulation is complete, simulated time: 47.529 us +99228b3dadbc45188e37e3b75ca6ce2c Simulation is complete, simulated time: 61.169 us +56897b7251e2e665f33199f82e1e266f Simulation is complete, simulated time: 10.741 us +23059a7933b94bb77d0d8cfa22e0aa4e Simulation is complete, simulated time: 8.961 us +063af3e51e2f826e19f23ff10b4c5f18 Simulation is complete, simulated time: 46.929 us +956cd31d0521f9dcaba85af469a68d12 Simulation is complete, simulated time: 3.33598 ms +d1beb5787ff77daf730c9daae83ede51 Simulation is complete, simulated time: 3.39544 ms +05ae92ecd0fa12d89b96b8fbfffbe178 Simulation is complete, simulated time: 3.39544 ms +b13fd8d32deb6af396771612e8dcbc6a Simulation is complete, simulated time: 341.875 us +bfcbcc3439246ee8bb3b9e1416b023b5 Simulation is complete, simulated time: 9.003 us +96ef09125604ac671e202d0381740c25 Simulation is complete, simulated time: 47.529 us +63c2acc33f4874da276c95f2347fac8d Simulation is complete, simulated time: 3.39544 ms +523b7c722410ba2b98dcc36846ff5314 Simulation is complete, simulated time: 3.51416 ms +1d9cb27f2056df62e8aedc95fdf7796c Simulation is complete, simulated time: 3.51456 ms +ab1add1f35f90727eea910bf7e0f81b9 Simulation is complete, simulated time: 374.229 us +2de75935ea9979e9394f939a035740ce Simulation is complete, simulated time: 7.448 us +0deb75644ef4c5b55526ea13acac9ddf Simulation is complete, simulated time: 18.248 us +d6c2fbe70743ecc9031e38e09cafaa6e Simulation is complete, simulated time: 7.444 us +caeb6c8c3f6fd743b585740f6e3d9061 Simulation is complete, simulated time: 18.232 us +7fca3bd3526e00cdad7d54637acf2f39 Simulation is complete, simulated time: 11.215 us +392adc0ace3695154d667a5058987354 Simulation is complete, simulated time: 101.127 us +11fa136c6e5730f03f97c2df2ac025ce Simulation is complete, simulated time: 101.901 us +1f0d8f7ed71092af5eb66728d370ae38 Simulation is complete, simulated time: 11.215 us +b357e0efaecab3d206b4f8d3cb2c7dd4 Simulation is complete, simulated time: 101.127 us +bf02d08d8b5b22aa1099cd3467706e85 Simulation is complete, simulated time: 102.765 us +90c12d7662800886091a47fbdfde7163 Simulation is complete, simulated time: 11.352 us +3baeef25ec6a74a92ea77394b2352c73 Simulation is complete, simulated time: 105.71 us +069e097dc752bdd149b0030377dcd9cc Simulation is complete, simulated time: 106.352 us +7c2b015c842bda7609bdd3cd3c39d0ba Simulation is complete, simulated time: 11.352 us +f55df8a1f4a9fcd6e2b04035137cf354 Simulation is complete, simulated time: 105.71 us +5dd729022a960da0a9ac163d18f88184 Simulation is complete, simulated time: 107.246 us +c7baae57caeb4b68703225f0a81f71a8 Simulation is complete, simulated time: 7.176 us +15d334d388a4ce0b4c6ad5a744291707 Simulation is complete, simulated time: 7.176 us +e3839497ee5efcbc0e2c02f5aff5e9fa Simulation is complete, simulated time: 7.176 us +ce495bcfcdec30c37bf956ac9237b5bb Simulation is complete, simulated time: 7.176 us +10b8a688bf73c255be048643c466d53c Simulation is complete, simulated time: 7.176 us +220a2bbb0022e7a7edfe41f7381109cc Simulation is complete, simulated time: 7.176 us +24aa3f67dc3169fdeb12b24133540c40 Simulation is complete, simulated time: 7.176 us +65457bdb91c90a0d5994093f1a032f42 Simulation is complete, simulated time: 7.176 us +f0cad1d67e4c255cb2d1f426b6806371 Simulation is complete, simulated time: 7.358 us +8b99377ee1e99b887a79e8dd95ba0e14 Simulation is complete, simulated time: 7.464 us +762328d5d6adafa312ab9c649cfd1aea Simulation is complete, simulated time: 9.808 us +8a1e3d64ded1d806a170e903521591f4 Simulation is complete, simulated time: 7.772 us +5e38fd7d5d594679d887fee9728e4435 Simulation is complete, simulated time: 5.41 us +641335334c8a3f637d657906f3a7c10e Simulation is complete, simulated time: 5.41 us +166e5a405187a3cd2672bd9f9e230636 Simulation is complete, simulated time: 5.41 us +bbf096f5c13274e454d1071a21be1ff0 Simulation is complete, simulated time: 5.41 us +03b39041a6953d56d4ab3ed44d3d808f Simulation is complete, simulated time: 5.352 us +43349e16f8d84e6d6be356c0e295dcc2 Simulation is complete, simulated time: 5.352 us +1d4b31a6677748ed649a0dad981f44f1 Simulation is complete, simulated time: 5.726 us +5889dd20792ae804f6621a40ce618990 Simulation is complete, simulated time: 5.352 us +dd79e49a68939928072188bba6f57249 Simulation is complete, simulated time: 6.206 us +fed6c149bcf87264b06fe224e59e1e9d Simulation is complete, simulated time: 6.206 us +ca0d18bb4120ac55076dc8e06162eace Simulation is complete, simulated time: 6.206 us +67512825a65816363335bd86cd23044f Simulation is complete, simulated time: 6.206 us +cf6baf07c1dc268f5a18c56cea05a7c0 Simulation is complete, simulated time: 11.733 us +e800535bfb11bef3874730b4031b2f5a Simulation is complete, simulated time: 11.733 us +7dc7594c195ea2175045f2cde4305e03 Simulation is complete, simulated time: 11.733 us +447d697538f54ab583eaf945c6fcec67 Simulation is complete, simulated time: 11.733 us +380531d42f55da530db1700f3ca56e53 Simulation is complete, simulated time: 11.733 us +5999458e5744c2ef1c5e093052167909 Simulation is complete, simulated time: 11.733 us +dce10c52a0044b4bbe8b8156a54f9abb Simulation is complete, simulated time: 11.733 us +0ec0a744056ca006c3c32d1713e37bc4 Simulation is complete, simulated time: 11.737 us +79bb3f25e52aa82a26a5c8f0f2220f78 Simulation is complete, simulated time: 11.737 us +9568ccf105c1194097791b215b5f9ce3 Simulation is complete, simulated time: 11.737 us +db2fd0c64381e8be1bb1c5833c5565a9 Simulation is complete, simulated time: 11.737 us +d9aa42f978cd1b02745830c0278eb1ac Simulation is complete, simulated time: 11.737 us +bfcc8f71df02f5fcff4dbf5884502821 Simulation is complete, simulated time: 11.737 us +4a11045ae8800841eb55287da0c1a526 Simulation is complete, simulated time: 11.737 us +5ae0e0bbb821c1d28bb75cf85016a3aa Simulation is complete, simulated time: 17.345 us +e016c98b90ae3b39114dd9b6a7db202a Simulation is complete, simulated time: 17.345 us +03f2b1fed5dcd9847a701237c754249a Simulation is complete, simulated time: 17.345 us +b5107315bd48cb43a7823762ebb9531b Simulation is complete, simulated time: 17.345 us +ccf9f70aca6488302d4eed5048e105b1 Simulation is complete, simulated time: 16.161 us +5de6e46cda20c5f0703812b2ad16ce00 Simulation is complete, simulated time: 16.161 us +313eaa178de1b726836e6607cf248d41 Simulation is complete, simulated time: 16.161 us +719e71be0c765f6ec685a849231a7e28 Simulation is complete, simulated time: 16.161 us +825f62f6f600ba298326bdf69b5f52a4 Simulation is complete, simulated time: 7.176 us +4235c525fdbab9153eb7a4056f9aab07 Simulation is complete, simulated time: 7.176 us +31e15481b0a69e931f817a611bd186b4 Simulation is complete, simulated time: 3.858 us +864398304944133f57f3301b923e64c2 Simulation is complete, simulated time: 3.858 us +125a174d8ec68607a400ccd08a106912 Simulation is complete, simulated time: 5.9 us +83da2374016bc103d7e1c476381780ab Simulation is complete, simulated time: 5.9 us +1adfd7ab5d105f65f194978755fa4008 Simulation is complete, simulated time: 5.9 us +94ce7bc42c41226c3a3620a7196c34b6 Simulation is complete, simulated time: 5.9 us +d976a31edc85f5f668c3e8c930a7444f Simulation is complete, simulated time: 5.9 us +61c31e462ae319be2afe7917c2cf8261 Simulation is complete, simulated time: 5.9 us +1876d545867933035e3edae51ebb6922 Simulation is complete, simulated time: 5.9 us +908a279f134958d01d357c1816c62ee7 Simulation is complete, simulated time: 5.9 us +4a7ea790ab15c1d2c9298849fdc282cb Simulation is complete, simulated time: 5.9 us +99a9a8ef68701329f999b0b0850e1400 Simulation is complete, simulated time: 5.9 us +4e01cc0a88838054e775aa7a4141af13 Simulation is complete, simulated time: 5.9 us +56133053efeae3d7ef74bc7aa66a0d06 Simulation is complete, simulated time: 5.9 us +7d42508b20ce6b4391751a06df3df00a Simulation is complete, simulated time: 1.62 us +1b34b05061c5058070a612445bc899d8 Simulation is complete, simulated time: 1.563 us +67b4ce7286191bee72842b4950479a2c Simulation is complete, simulated time: 1.986 us +a674ea20bd174b74dedee21fd43e16bc Simulation is complete, simulated time: 1.737 us +cd86b5412ea0b0890cd0f92ebfcd83a6 Simulation is complete, simulated time: 2.443 us +bc36733b69d250348d7e63a1b193444f Simulation is complete, simulated time: 3.586 us +3c0038c4be9eda97bc85469641f57aaf Simulation is complete, simulated time: 2.443 us +1a26ade0ee4de6663481765f67529f2d Simulation is complete, simulated time: 1.314 us +4ec46ab2f1d6e794b61664833c257f37 Simulation is complete, simulated time: 1.563 us +bed45b3470382f1633ba6b99cf77d9be Simulation is complete, simulated time: 26.027 us +57a9aee6e6521cac86fdc4507a903307 Simulation is complete, simulated time: 630.332 us +63c7271b0edc55185ef52bb6f848c547 Simulation is complete, simulated time: 28.533 us +4b2fd1f1d9a59ff64aaa4510649525eb Simulation is complete, simulated time: 448.23 us +d6dd2c35563417f4fb945ad3362ecb9c Simulation is complete, simulated time: 2.416 us +afa5982dcb8f084e23018629634732e0 Simulation is complete, simulated time: 24.205 us +3c1a798313220a6f398fa72fb7663254 Simulation is complete, simulated time: 3.902 us +c1ca0f5826a8e5131964e070672cab0f Simulation is complete, simulated time: 141.661 us +cec2296f0ac1370617d72b0db25e346b Simulation is complete, simulated time: 19.311 us +5a845c05ec78a9ac364731f4a22f30fb Simulation is complete, simulated time: 8.87644 ms +80279cde7618ffd8b5073c4e05da2dee Simulation is complete, simulated time: 1.932 us +8609806b1ae1fe8e47f90ca7e608785f Simulation is complete, simulated time: 26.33 us +07c8fcaf8265aae176cdd48c226f5719 Simulation is complete, simulated time: 2.382 us +f45cfd58a09cb512bec42a1bb30ca347 Simulation is complete, simulated time: 114.565 us +56d186afacb520f350c17bb40e155a99 Simulation is complete, simulated time: 3.191 us +23b2e8caf4b9ebd8087bbc701937f596 Simulation is complete, simulated time: 60.468 us diff --git a/src/sst/elements/firefly/ctrlMsgProcessQueuesState.cc b/src/sst/elements/firefly/ctrlMsgProcessQueuesState.cc index 95d0b1ff91..1c509bbf11 100644 --- a/src/sst/elements/firefly/ctrlMsgProcessQueuesState.cc +++ b/src/sst/elements/firefly/ctrlMsgProcessQueuesState.cc @@ -33,10 +33,12 @@ ProcessQueuesState::ProcessQueuesState( Component* owner, Params& params ) : m_numRecvLooped(0), m_missedInt( false ), m_intCtx(NULL), - m_simVAddrs(NULL) + m_simVAddrs(NULL), + m_numSent(0), + m_numRecv(0) { - int level = params.find("verboseLevel",0); - int mask = params.find("verboseMask",-1); + int level = params.find("pqs.verboseLevel",0); + int mask = params.find("pqs.verboseMask",-1); m_dbg.init("", level, mask, Output::STDOUT ); @@ -88,8 +90,8 @@ void ProcessQueuesState::setVars( VirtNic* nic, Info* info, MemoryBase* mem, } void ProcessQueuesState:: finish() { - dbg().debug(CALL_INFO,1,0,"pstdRcvQ=%lu recvdMsgQ=%lu loopResp=%lu funcStack=%lu\n", - m_pstdRcvQ.size(), m_recvdMsgQ.size(), m_loopResp.size(), m_funcStack.size() ); + dbg().debug(CALL_INFO,1,1,"pstdRcvQ=%lu recvdMsgQ=%lu loopResp=%lu funcStack=%lu sent=%d recv=%d\n", + m_pstdRcvQ.size(), m_recvdMsgQ.size(), m_loopResp.size(), m_funcStack.size(), m_numSent, m_numRecv+m_recvdMsgQ.size() ); } void ProcessQueuesState::enterInit( bool haveGlobalMemHeap ) @@ -129,7 +131,7 @@ void ProcessQueuesState::enterSend( _CommReq* req, uint64_t exitDelay ) { m_exitDelay = exitDelay; req->setSrcRank( getMyRank( req ) ); - dbg().debug(CALL_INFO,1,1,"req=%p delay=%" PRIu64 " destRank=%d\n", req, exitDelay, + dbg().debug(CALL_INFO,1,DBG_MSK_PQS_APP_SIDE,"req=%p delay=%" PRIu64 " destRank=%d\n", req, exitDelay, req->getDestRank() ); uint64_t delay = txDelay( req->getLength() ); @@ -191,13 +193,14 @@ void ProcessQueuesState::processSend_2( _CommReq* req ) nid_t nid = calcNid( req, req->getDestRank() ); if ( length <= shortMsgLength() ) { - dbg().debug(CALL_INFO,1,1,"Short %lu bytes dest %#x\n",length,nid); + dbg().debug(CALL_INFO,2,DBG_MSK_PQS_APP_SIDE,"Short %lu bytes dest %#x\n",length,nid); vec.insert( vec.begin() + 1, req->ioVec().begin(), req->ioVec().end() ); req->setDone( sendReqFiniDelay( length ) ); + ++m_numSent; } else { - dbg().debug(CALL_INFO,1,1,"sending long message %lu bytes\n",length); + dbg().debug(CALL_INFO,2,DBG_MSK_PQS_APP_SIDE,"sending long message %lu bytes\n",length); req->hdr().key = genGetKey(); GetInfo* info = new GetInfo; @@ -237,7 +240,7 @@ void ProcessQueuesState::processSend_2( _CommReq* req ) void ProcessQueuesState::processSendLoop( _CommReq* req ) { - dbg().debug(CALL_INFO,2,2,"key=%p\n", req); + dbg().debug(CALL_INFO,2,DBG_MSK_PQS_APP_SIDE,"key=%p\n", req); IoVec hdrVec; hdrVec.len = sizeof( req->hdr() ); @@ -260,7 +263,7 @@ void ProcessQueuesState::processSendLoop( _CommReq* req ) void ProcessQueuesState::enterRecv( _CommReq* req, uint64_t exitDelay ) { - dbg().debug(CALL_INFO,1,1,"req=%p$ delay=%" PRIu64 " rank=%d\n", req, exitDelay, req->hdr().rank ); + dbg().debug(CALL_INFO,1,DBG_MSK_PQS_APP_SIDE,"req=%p$ delay=%" PRIu64 " rank=%d\n", req, exitDelay, req->hdr().rank ); m_exitDelay = exitDelay; if ( m_postedShortBuffers.size() < MaxPostedShortBuffers ) { @@ -294,7 +297,7 @@ void ProcessQueuesState::enterRecv( _CommReq* req, uint64_t exitDelay ) void ProcessQueuesState::processRecv_0( _CommReq* req ) { - dbg().debug(CALL_INFO,1,1,"\n"); + dbg().debug(CALL_INFO,2,DBG_MSK_PQS_APP_SIDE,"\n"); m_mem->pin( std::bind( &ProcessQueuesState::processRecv_1, this, req ), @@ -313,7 +316,7 @@ void ProcessQueuesState::processRecv_1( _CommReq* req ) void ProcessQueuesState::enterMakeProgress( uint64_t exitDelay ) { - dbg().debug(CALL_INFO,1,1,"num pstd %lu, num short %lu\n", + dbg().debug(CALL_INFO,1,DBG_MSK_PQS_APP_SIDE,"num pstd %lu, num short %lu\n", m_pstdRcvQ.size(), m_recvdMsgQ.size() ); m_exitDelay = exitDelay; @@ -329,8 +332,8 @@ void ProcessQueuesState::enterMakeProgress( uint64_t exitDelay ) void ProcessQueuesState::processMakeProgress( Stack* stack ) { - dbg().debug(CALL_INFO,2,1,"stack.size()=%lu\n", stack->size()); - dbg().debug(CALL_INFO,1,1,"num pstd %lu, num short %lu\n", + dbg().debug(CALL_INFO,2,DBG_MSK_PQS_APP_SIDE,"stack.size()=%lu\n", stack->size()); + dbg().debug(CALL_INFO,2,DBG_MSK_PQS_APP_SIDE,"num pstd %lu, num short %lu\n", m_pstdRcvQ.size(), m_recvdMsgQ.size() ); WaitCtx* ctx = static_cast( stack->back() ); @@ -344,7 +347,7 @@ void ProcessQueuesState::processMakeProgress( Stack* stack ) void ProcessQueuesState::enterWait( WaitReq* req, uint64_t exitDelay ) { - dbg().debug(CALL_INFO,1,1,"num pstd %lu, num short %lu\n", + dbg().debug(CALL_INFO,1,DBG_MSK_PQS_APP_SIDE,"num pstd %lu, num short %lu\n", m_pstdRcvQ.size(), m_recvdMsgQ.size() ); m_exitDelay = exitDelay; @@ -360,8 +363,8 @@ void ProcessQueuesState::enterWait( WaitReq* req, uint64_t exitDelay ) void ProcessQueuesState::processWait_0( Stack* stack ) { - dbg().debug(CALL_INFO,2,1,"stack.size()=%lu\n", stack->size()); - dbg().debug(CALL_INFO,1,1,"num pstd %lu, num short %lu\n", + dbg().debug(CALL_INFO,2,DBG_MSK_PQS_APP_SIDE,"stack.size()=%lu\n", stack->size()); + dbg().debug(CALL_INFO,2,DBG_MSK_PQS_APP_SIDE,"num pstd %lu, num short %lu\n", m_pstdRcvQ.size(), m_recvdMsgQ.size() ); WaitCtx* ctx = static_cast( stack->back() ); @@ -380,7 +383,7 @@ void ProcessQueuesState::processWait_0( Stack* stack ) void ProcessQueuesState::processWaitCtx_0( WaitCtx* ctx, _CommReq* req ) { - dbg().debug(CALL_INFO,1,1,"\n"); + dbg().debug(CALL_INFO,2,DBG_MSK_PQS_APP_SIDE,"\n"); schedCallback( std::bind( &ProcessQueuesState::processWaitCtx_1, this, ctx, req ), req->getFiniDelay() @@ -391,7 +394,7 @@ void ProcessQueuesState::processWaitCtx_1( WaitCtx* ctx, _CommReq* req ) { size_t length = req->getLength(); - dbg().debug(CALL_INFO,1,1,"\n"); + dbg().debug(CALL_INFO,2,DBG_MSK_PQS_APP_SIDE,"\n"); if ( length > shortMsgLength() ) { // this is a hack, to get point to point latencies to match Chama @@ -415,7 +418,7 @@ void ProcessQueuesState::processWaitCtx_1( WaitCtx* ctx, _CommReq* req ) void ProcessQueuesState::processWaitCtx_2( WaitCtx* ctx ) { _CommReq* req = ctx->req->getFiniReq(); - dbg().debug(CALL_INFO,1,1,"\n"); + dbg().debug(CALL_INFO,2,DBG_MSK_PQS_APP_SIDE,"\n"); if ( req ) { processWaitCtx_0( ctx, req ); @@ -429,8 +432,8 @@ void ProcessQueuesState::processWaitCtx_2( WaitCtx* ctx ) void ProcessQueuesState::processQueues( Stack* stack ) { - dbg().debug(CALL_INFO,2,1,"shortMsgV.size=%lu\n", m_recvdMsgQ.size() ); - dbg().debug(CALL_INFO,2,1,"stack.size()=%lu\n", stack->size()); + dbg().debug(CALL_INFO,1,DBG_MSK_PQS_Q,"shortMsgV.size=%lu\n", m_recvdMsgQ.size() ); + dbg().debug(CALL_INFO,1,DBG_MSK_PQS_Q,"stack.size()=%lu\n", stack->size()); // this does not cost time while ( m_needRecv ) { @@ -477,7 +480,7 @@ void ProcessQueuesState::processQueues( Stack* stack ) void ProcessQueuesState::processQueues0( Stack* stack ) { - dbg().debug(CALL_INFO,2,1,"stack.size()=%lu recvdMsgQ.size()=%lu\n", + dbg().debug(CALL_INFO,2,DBG_MSK_PQS_Q,"stack.size()=%lu recvdMsgQ.size()=%lu\n", stack->size(), m_recvdMsgQ.size() ); delete stack->back(); @@ -487,7 +490,7 @@ void ProcessQueuesState::processQueues0( Stack* stack ) void ProcessQueuesState::processShortList_0( Stack* stack ) { - dbg().debug(CALL_INFO,2,1,"stack.size()=%lu recvdMsgQ.size()=%lu\n", + dbg().debug(CALL_INFO,2,DBG_MSK_PQS_Q,"stack.size()=%lu recvdMsgQ.size()=%lu\n", stack->size(), m_recvdMsgQ.size() ); ProcessShortListCtx* ctx = new ProcessShortListCtx( m_recvdMsgQ ); @@ -499,7 +502,7 @@ void ProcessQueuesState::processShortList_0( Stack* stack ) void ProcessQueuesState::processShortList_1( Stack* stack ) { - dbg().debug(CALL_INFO,2,1,"stack.size()=%lu recvdMsgQ.size()=%lu\n", + dbg().debug(CALL_INFO,2,DBG_MSK_PQS_Q,"stack.size()=%lu recvdMsgQ.size()=%lu\n", stack->size(), m_recvdMsgQ.size() ); ProcessShortListCtx* ctx = @@ -518,7 +521,7 @@ void ProcessQueuesState::processShortList_2( Stack* stack ) { ProcessShortListCtx* ctx = static_cast( stack->back() ); - dbg().debug(CALL_INFO,2,1,"stack.size()=%lu\n", stack->size()); + dbg().debug(CALL_INFO,2,DBG_MSK_PQS_Q,"stack.size()=%lu\n", stack->size()); if ( ctx->req ) { schedCallback( @@ -534,7 +537,7 @@ void ProcessQueuesState::processShortList_2( Stack* stack ) void ProcessQueuesState::processShortList_3( Stack* stack ) { - dbg().debug(CALL_INFO,2,1,"stack.size()=%lu\n", stack->size()); + dbg().debug(CALL_INFO,2,DBG_MSK_PQS_Q,"stack.size()=%lu\n", stack->size()); ProcessShortListCtx* ctx = static_cast(stack->back()); @@ -547,7 +550,7 @@ void ProcessQueuesState::processShortList_3( Stack* stack ) if ( length <= shortMsgLength() || dynamic_cast( ctx->msg() ) ) { - dbg().debug(CALL_INFO,2,1,"copyIoVec() short|loop message\n"); + dbg().debug(CALL_INFO,2,DBG_MSK_PQS_Q,"copyIoVec() short|loop message\n"); copyIoVec( req->ioVec(), ctx->ioVec(), length ); @@ -566,7 +569,7 @@ void ProcessQueuesState::processShortList_4( Stack* stack ) { ProcessShortListCtx* ctx = static_cast(stack->back()); - dbg().debug(CALL_INFO,2,1,"stack.size()=%lu\n", stack->size()); + dbg().debug(CALL_INFO,2,DBG_MSK_PQS_Q,"stack.size()=%lu\n", stack->size()); _CommReq* req = ctx->req; @@ -574,17 +577,19 @@ void ProcessQueuesState::processShortList_4( Stack* stack ) LoopReq* loopReq; if ( ( loopReq = dynamic_cast( ctx->msg() ) ) ) { - dbg().debug(CALL_INFO,1,2,"loop message key=%p srcCore=%d " + dbg().debug(CALL_INFO,2,DBG_MSK_PQS_Q,"loop message key=%p srcCore=%d " "srcRank=%d\n", loopReq->key, loopReq->srcCore, ctx->hdr().rank); req->setDone(); + ++m_numRecv; loopSendResp( loopReq->srcCore , loopReq->key ); } else if ( length <= shortMsgLength() ) { - dbg().debug(CALL_INFO,1,1,"short\n"); + dbg().debug(CALL_INFO,2,DBG_MSK_PQS_Q,"short\n"); req->setDone( recvReqFiniDelay( length ) ); + ++m_numRecv; } else { - dbg().debug(CALL_INFO,1,1,"long\n"); + dbg().debug(CALL_INFO,2,DBG_MSK_PQS_Q,"long\n"); VoidFunction* callback = new VoidFunction; *callback = std::bind( &ProcessQueuesState::getFini,this, req ); @@ -604,10 +609,10 @@ void ProcessQueuesState::processShortList_5( Stack* stack ) { ProcessShortListCtx* ctx = static_cast(stack->back()); - dbg().debug(CALL_INFO,2,1,"stack.size()=%lu\n", stack->size()); + dbg().debug(CALL_INFO,2,DBG_MSK_PQS_Q,"stack.size()=%lu\n", stack->size()); if ( ctx->isDone() ) { - dbg().debug(CALL_INFO,2,1,"return up the stack\n"); + dbg().debug(CALL_INFO,2,DBG_MSK_PQS_Q,"return up the stack\n"); if ( ! ctx->msgQempty() ) { m_recvdMsgQ.insert( m_recvdMsgQ.begin(), ctx->getMsgQ().begin(), @@ -617,7 +622,7 @@ void ProcessQueuesState::processShortList_5( Stack* stack ) stack->pop_back(); schedCallback( stack->back()->getCallback() ); } else { - dbg().debug(CALL_INFO,1,1,"work on next Msg\n"); + dbg().debug(CALL_INFO,2,DBG_MSK_PQS_Q,"work on next Msg\n"); processShortList_1( stack ); } @@ -625,15 +630,16 @@ void ProcessQueuesState::processShortList_5( Stack* stack ) void ProcessQueuesState::processLoopResp( LoopResp* resp ) { - dbg().debug(CALL_INFO,1,2,"srcCore=%d\n",resp->srcCore ); + dbg().debug(CALL_INFO,2,DBG_MSK_PQS_Q,"srcCore=%d\n",resp->srcCore ); _CommReq* req = (_CommReq*)resp->key; + ++m_numSent; req->setDone(); delete resp; } void ProcessQueuesState::getFini( _CommReq* req ) { - dbg().debug(CALL_INFO,1,1,"\n"); + dbg().debug(CALL_INFO,2,DBG_MSK_PQS_CB,"\n"); m_longGetFiniQ.push_back( req ); runInterruptCtx(); @@ -642,7 +648,7 @@ void ProcessQueuesState::getFini( _CommReq* req ) void ProcessQueuesState::dmaRecvFiniGI( GetInfo* info, uint64_t simVAddr, nid_t nid, uint32_t tag, size_t length ) { - dbg().debug(CALL_INFO,1,1,"nid=%d tag=%#x length=%lu\n", + dbg().debug(CALL_INFO,2,DBG_MSK_PQS_CB,"nid=%d tag=%#x length=%lu\n", nid, tag, length ); assert( ( tag & LongAckKey ) == LongAckKey ); m_simVAddrs->free( simVAddr ); @@ -657,9 +663,9 @@ void ProcessQueuesState::dmaRecvFiniGI( GetInfo* info, uint64_t simVAddr, nid_t void ProcessQueuesState::dmaRecvFiniSRB( ShortRecvBuffer* buf, nid_t nid, uint32_t tag, size_t length ) { - dbg().debug(CALL_INFO,1,1,"ShortMsgQ nid=%#x tag=%#x length=%lu\n", + dbg().debug(CALL_INFO,2,DBG_MSK_PQS_CB,"ShortMsgQ nid=%#x tag=%#x length=%lu\n", nid, tag, length ); - dbg().debug(CALL_INFO,1,1,"ShortMsgQ rank=%d tag=%#" PRIx64 " count=%d " + dbg().debug(CALL_INFO,2,DBG_MSK_PQS_CB,"ShortMsgQ rank=%d tag=%#" PRIx64 " count=%d " "dtypeSize=%d\n", buf->hdr.rank, buf->hdr.tag, buf->hdr.count, buf->hdr.dtypeSize ); @@ -669,17 +675,17 @@ void ProcessQueuesState::dmaRecvFiniSRB( ShortRecvBuffer* buf, nid_t nid, runInterruptCtx(); m_postedShortBuffers.erase(buf); - dbg().debug(CALL_INFO,1,1,"num postedShortRecvBuffers %lu\n", + dbg().debug(CALL_INFO,2,DBG_MSK_PQS_CB,"num postedShortRecvBuffers %lu\n", m_postedShortBuffers.size()); } void ProcessQueuesState::enableInt( FuncCtxBase* ctx, void (ProcessQueuesState::*funcPtr)( Stack* ) ) { - dbg().debug(CALL_INFO,2,1,"ctx=%p\n",ctx); + dbg().debug(CALL_INFO,1,DBG_MSK_PQS_INT,"ctx=%p\n",ctx); assert( m_funcStack.empty() ); if ( m_intCtx ) { - dbg().debug(CALL_INFO,2,1,"already have a return ctx\n"); + dbg().debug(CALL_INFO,1,DBG_MSK_PQS_INT,"already have a return ctx\n"); return; } @@ -695,7 +701,7 @@ void ProcessQueuesState::enableInt( FuncCtxBase* ctx, void ProcessQueuesState::runInterruptCtx( ) { if ( ! m_intCtx || ! m_intStack.empty() ) { - dbg().debug(CALL_INFO,2,1,"missed interrupt\n"); + dbg().debug(CALL_INFO,1,DBG_MSK_PQS_INT,"missed interrupt\n"); m_missedInt = true; return; } @@ -706,6 +712,7 @@ void ProcessQueuesState::runInterruptCtx( ) m_intStack.push_back( ctx ); + dbg().debug(CALL_INFO,1,DBG_MSK_PQS_INT,"call processQueues\n" ); processQueues( &m_intStack ); } @@ -715,7 +722,7 @@ void ProcessQueuesState::leaveInterruptCtx( Stack* stack ) delete stack->back(); stack->pop_back(); - dbg().debug(CALL_INFO,2,1,"\n" ); + dbg().debug(CALL_INFO,1,DBG_MSK_PQS_INT,"\n" ); VoidFunction callback = m_intCtx->getCallback(); @@ -731,7 +738,7 @@ void ProcessQueuesState::leaveInterruptCtx( Stack* stack ) void ProcessQueuesState::pioSendFiniVoid( void* hdr, uint64_t simVAddr ) { - dbg().debug(CALL_INFO,1,1,"hdr=%p\n", hdr ); + dbg().debug(CALL_INFO,1,DBG_MSK_PQS_CB,"hdr=%p\n", hdr ); if ( hdr ) { free( hdr ); } @@ -740,7 +747,7 @@ void ProcessQueuesState::pioSendFiniVoid( void* hdr, uint64_t simVAddr ) void ProcessQueuesState::pioSendFiniCtrlHdr( CtrlHdr* hdr, uint64_t simVAddr ) { - dbg().debug(CALL_INFO,1,1,"MsgHdr, Ack sent key=%#x\n", hdr->key); + dbg().debug(CALL_INFO,1,DBG_MSK_PQS_CB,"MsgHdr, Ack sent key=%#x\n", hdr->key); m_simVAddrs->free(simVAddr); delete hdr; runInterruptCtx(); @@ -749,7 +756,7 @@ void ProcessQueuesState::pioSendFiniCtrlHdr( CtrlHdr* hdr, uint64_t simVAddr ) void ProcessQueuesState::processLongGetFini( Stack* stack, _CommReq* req ) { ProcessLongGetFiniCtx* ctx = new ProcessLongGetFiniCtx( req ); - dbg().debug(CALL_INFO,1,1,"\n"); + dbg().debug(CALL_INFO,1,DBG_MSK_PQS_CB,"\n"); stack->push_back( ctx ); @@ -763,11 +770,12 @@ void ProcessQueuesState::processLongGetFini0( Stack* stack ) { _CommReq* req = static_cast(stack->back())->req; - dbg().debug(CALL_INFO,1,1,"\n"); + dbg().debug(CALL_INFO,1,DBG_MSK_PQS_CB,"\n"); delete stack->back(); stack->pop_back(); - + + ++m_numRecv; req->setDone( recvReqFiniDelay( req->getLength() ) ); IoVec hdrVec; @@ -783,7 +791,7 @@ void ProcessQueuesState::processLongGetFini0( Stack* stack ) std::vector vec; vec.insert( vec.begin(), hdrVec ); - dbg().debug(CALL_INFO,1,1,"send long msg Ack to nid=%d key=%#x\n", + dbg().debug(CALL_INFO,1,DBG_MSK_PQS_CB,"send long msg Ack to nid=%d key=%#x\n", req->m_ackNid, req->m_ackKey ); m_nic->pioSend( req->m_ackNid, req->m_ackKey, vec, callback ); @@ -794,7 +802,8 @@ void ProcessQueuesState::processLongGetFini0( Stack* stack ) void ProcessQueuesState::processLongAck( GetInfo* info ) { - dbg().debug(CALL_INFO,1,1,"acked\n"); + ++m_numSent; + dbg().debug(CALL_INFO,2,DBG_MSK_PQS_Q,"acked\n"); info->req->setDone( sendReqFiniDelay( info->req->getLength() ) ); delete info; return; @@ -802,7 +811,7 @@ void ProcessQueuesState::processLongAck( GetInfo* info ) void ProcessQueuesState::needRecv( int nid, size_t length ) { - dbg().debug(CALL_INFO,1,1,"nid=%d length=%lu\n",nid,length); + dbg().debug(CALL_INFO,1,DBG_MSK_PQS_NEED_RECV,"nid=%d length=%lu\n",nid,length); ++m_needRecv; runInterruptCtx(); @@ -810,7 +819,7 @@ void ProcessQueuesState::needRecv( int nid, size_t length ) void ProcessQueuesState::loopHandler( int srcCore, void* key ) { - dbg().debug(CALL_INFO,1,2,"resp: srcCore=%d key=%p \n",srcCore,key); + dbg().debug(CALL_INFO,2,DBG_MSK_PQS_CB,"resp: srcCore=%d key=%p \n",srcCore,key); m_loopResp.push_back( new LoopResp( srcCore, key ) ); @@ -822,7 +831,7 @@ void ProcessQueuesState::loopHandler( int srcCore, std::vector& vec, void MatchHdr* hdr = (MatchHdr*) vec[0].addr.getBacking(); - dbg().debug(CALL_INFO,1,2,"req: srcCore=%d key=%p vec.size()=%lu srcRank=%d\n", + dbg().debug(CALL_INFO,2,DBG_MSK_PQS_CB,"req: srcCore=%d key=%p vec.size()=%lu srcRank=%d\n", srcCore, key, vec.size(), hdr->rank); ++m_numRecvLooped; @@ -835,7 +844,7 @@ void ProcessQueuesState::loopHandler( int srcCore, std::vector& vec, void _CommReq* ProcessQueuesState::searchPostedRecv( MatchHdr& hdr, int& count ) { _CommReq* req = NULL; - dbg().debug(CALL_INFO,1,1,"posted size %lu\n",m_pstdRcvQ.size()); + dbg().debug(CALL_INFO,2,DBG_MSK_PQS_Q,"posted size %lu\n",m_pstdRcvQ.size()); std::deque< _CommReq* >:: iterator iter = m_pstdRcvQ.begin(); for ( ; iter != m_pstdRcvQ.end(); ++iter ) { @@ -849,7 +858,7 @@ _CommReq* ProcessQueuesState::searchPostedRecv( MatchHdr& hdr, int& count ) m_pstdRcvQ.erase(iter); break; } - dbg().debug(CALL_INFO,2,1,"req=%p\n",req); + dbg().debug(CALL_INFO,2,DBG_MSK_PQS_Q,"req=%p\n",req); return req; } @@ -857,42 +866,42 @@ _CommReq* ProcessQueuesState::searchPostedRecv( MatchHdr& hdr, int& count ) bool ProcessQueuesState::checkMatchHdr( MatchHdr& hdr, MatchHdr& wantHdr, uint64_t ignore ) { - dbg().debug(CALL_INFO,1,1,"posted tag %#" PRIx64 ", msg tag %#" PRIx64 "\n", wantHdr.tag, hdr.tag ); + dbg().debug(CALL_INFO,2,DBG_MSK_PQS_Q,"posted tag %#" PRIx64 ", msg tag %#" PRIx64 "\n", wantHdr.tag, hdr.tag ); if ( ( AnyTag != wantHdr.tag ) && ( ( wantHdr.tag & ~ignore) != ( hdr.tag & ~ignore ) ) ) { return false; } - dbg().debug(CALL_INFO,1,1,"want rank %d %d\n", wantHdr.rank, hdr.rank ); + dbg().debug(CALL_INFO,2,DBG_MSK_PQS_Q,"want rank %d %d\n", wantHdr.rank, hdr.rank ); if ( ( MP::AnySrc != wantHdr.rank ) && ( wantHdr.rank != hdr.rank ) ) { return false; } - dbg().debug(CALL_INFO,1,1,"want group %d %d\n", wantHdr.group,hdr.group); + dbg().debug(CALL_INFO,2,DBG_MSK_PQS_Q,"want group %d %d\n", wantHdr.group,hdr.group); if ( wantHdr.group != hdr.group ) { return false; } - dbg().debug(CALL_INFO,1,1,"want count %d %d\n", wantHdr.count, + dbg().debug(CALL_INFO,2,DBG_MSK_PQS_Q,"want count %d %d\n", wantHdr.count, hdr.count); if ( wantHdr.count != hdr.count ) { return false; } - dbg().debug(CALL_INFO,1,1,"want dtypeSize %d %d\n", wantHdr.dtypeSize, + dbg().debug(CALL_INFO,2,DBG_MSK_PQS_Q,"want dtypeSize %d %d\n", wantHdr.dtypeSize, hdr.dtypeSize); if ( wantHdr.dtypeSize != hdr.dtypeSize ) { return false; } - dbg().debug(CALL_INFO,1,1,"matched\n"); + dbg().debug(CALL_INFO,1,DBG_MSK_PQS_Q,"matched\n"); return true; } void ProcessQueuesState::copyIoVec( std::vector& dst, std::vector& src, size_t len ) { - dbg().debug(CALL_INFO,1,1,"dst.size()=%lu src.size()=%lu wantLen=%lu\n", + dbg().debug(CALL_INFO,2,DBG_MSK_PQS_Q,"dst.size()=%lu src.size()=%lu wantLen=%lu\n", dst.size(), src.size(), len ); size_t copied = 0; @@ -900,10 +909,10 @@ void ProcessQueuesState::copyIoVec( for ( unsigned int i=0; i < src.size() && copied < len; i++ ) { assert( rV < dst.size() ); - dbg().debug(CALL_INFO,3,1,"src[%d].len %lu\n", i, src[i].len); + dbg().debug(CALL_INFO,3,DBG_MSK_PQS_Q,"src[%d].len %lu\n", i, src[i].len); for ( unsigned int j=0; j < src[i].len && copied < len ; j++ ) { - dbg().debug(CALL_INFO,3,1,"copied=%lu rV=%lu rP=%lu\n", + dbg().debug(CALL_INFO,3,DBG_MSK_PQS_Q,"copied=%lu rV=%lu rP=%lu\n", copied,rV,rP); if ( dst[rV].addr.getBacking() && src[i].addr.getBacking() ) { @@ -937,28 +946,28 @@ void ProcessQueuesState::postShortRecvBuffer( ) // save this info so we can cleanup in the destructor m_postedShortBuffers[buf ] = callback; - dbg().debug(CALL_INFO,1,1,"num postedShortRecvBuffers %lu\n", + dbg().debug(CALL_INFO,2,DBG_MSK_PQS_POST_SHORT,"num postedShortRecvBuffers %lu\n", m_postedShortBuffers.size()); m_nic->dmaRecv( -1, ShortMsgQ, buf->ioVec, callback ); } void ProcessQueuesState::loopSendReq( std::vector& vec, int core, void* key ) { - m_dbg.debug(CALL_INFO,1,1,"dest core=%d key=%p\n",core,key); + m_dbg.debug(CALL_INFO,2,DBG_MSK_PQS_LOOP,"dest core=%d key=%p\n",core,key); m_loopLink->send(0, new LoopBackEvent( vec, core, key ) ); } void ProcessQueuesState::loopSendResp( int core, void* key ) { - m_dbg.debug(CALL_INFO,1,1,"dest core=%d key=%p\n",core,key); + m_dbg.debug(CALL_INFO,2,DBG_MSK_PQS_LOOP,"dest core=%d key=%p\n",core,key); m_loopLink->send(0, new LoopBackEvent( core, key ) ); } void ProcessQueuesState::loopHandler( Event* ev ) { LoopBackEvent* event = static_cast< LoopBackEvent* >(ev); - m_dbg.debug(CALL_INFO,1,1,"%s key=%p\n", + m_dbg.debug(CALL_INFO,1,DBG_MSK_PQS_LOOP,"%s key=%p\n", event->vec.empty() ? "Response" : "Request", event->key); if ( event->vec.empty() ) { @@ -973,7 +982,7 @@ void ProcessQueuesState::delayHandler( SST::Event* e ) { DelayEvent* event = static_cast(e); - m_dbg.debug(CALL_INFO,2,1,"execute callback\n"); + m_dbg.debug(CALL_INFO,1,DBG_MSK_PQS_CB,"execute callback\n"); event->callback(); delete e; diff --git a/src/sst/elements/firefly/ctrlMsgProcessQueuesState.h b/src/sst/elements/firefly/ctrlMsgProcessQueuesState.h index 5b8d50303b..9cecd579d5 100644 --- a/src/sst/elements/firefly/ctrlMsgProcessQueuesState.h +++ b/src/sst/elements/firefly/ctrlMsgProcessQueuesState.h @@ -30,6 +30,15 @@ #include "ctrlMsgCommReq.h" #include "ctrlMsgWaitReq.h" +#define DBG_MSK_PQS_APP_SIDE 1 << 0 +#define DBG_MSK_PQS_INT 1 << 1 +#define DBG_MSK_PQS_Q 1 << 2 +#define DBG_MSK_PQS_CB 1 << 3 +#define DBG_MSK_PQS_LOOP 1<< 4 +#define DBG_MSK_PQS_NEED_RECV 1<< 5 +#define DBG_MSK_PQS_POST_SHORT 1<< 6 + + namespace SST { namespace Firefly { namespace CtrlMsg { @@ -370,7 +379,7 @@ class ProcessQueuesState : public SubComponent _CommReq* searchPostedRecv( MatchHdr& hdr, int& delay ); void exit( int delay = 0 ) { - dbg().debug(CALL_INFO,2,1,"exit ProcessQueuesState\n"); + dbg().debug(CALL_INFO,2,DBG_MSK_PQS_APP_SIDE,"exit ProcessQueuesState\n"); passCtrlToFunction( m_exitDelay + delay ); m_exitDelay = 0; } @@ -433,6 +442,7 @@ class ProcessQueuesState : public SubComponent m_delayLink->send( delay, new DelayEvent(callback) ); } void passCtrlToFunction( uint64_t delay = 0 ) { + dbg().debug(CALL_INFO,1,DBG_MSK_PQS_APP_SIDE,"\n"); m_returnToCaller->send( delay, NULL ); } @@ -483,6 +493,8 @@ class ProcessQueuesState : public SubComponent Statistic* m_statRcvdMsg; Statistic* m_statPstdRcv; + int m_numSent; + int m_numRecv; }; } diff --git a/src/sst/elements/firefly/hadesSHMEM.cc b/src/sst/elements/firefly/hadesSHMEM.cc index 48abdf77bc..2750b0c20e 100644 --- a/src/sst/elements/firefly/hadesSHMEM.cc +++ b/src/sst/elements/firefly/hadesSHMEM.cc @@ -15,6 +15,8 @@ #include +#define CALL_INFO_LAMBDA __LINE__, __FILE__ + #include "hadesSHMEM.h" #include "functionSM.h" #include "funcSM/event.h" @@ -37,6 +39,7 @@ HadesSHMEM::HadesSHMEM(Component* owner, Params& params) : m_enterLat_ns = params.find("enterLat_ns",30); m_returnLat_ns = params.find("returnLat_ns",30); + m_blockingReturnLat_ns = params.find("blockingReturnLat_ns",300); } HadesSHMEM::~HadesSHMEM() { @@ -57,7 +60,7 @@ void HadesSHMEM::setup() { char buffer[100]; snprintf(buffer,100,"@t:%d:%d:HadesSHMEM::@p():@l ", - m_os->getNic()->getNodeId(), m_os->getInfo()->worldRank()); + m_os->getNic()->getRealNodeId(), m_os->getInfo()->worldRank()); m_dbg.setPrefix(buffer); } @@ -180,7 +183,11 @@ void HadesSHMEM::my_pe2(int* val, Shmem::Callback callback ) void HadesSHMEM::quiet(Shmem::Callback callback) { dbg().debug(CALL_INFO,1,SHMEM_BASE,"\n"); - wait_until( m_pendingRemoteOps.getSimVAddr(), Shmem::EQ, m_zero, callback ); + wait_until( m_pendingRemoteOps.getSimVAddr(), Shmem::EQ, m_zero, + [=](int) { + dbg().debug(CALL_INFO_LAMBDA,"quiet",1,SHMEM_BASE,"returning\n"); + callback(0); } + ); } void HadesSHMEM::quiet2(Shmem::Callback callback) @@ -438,8 +445,8 @@ void HadesSHMEM::get2(Hermes::Vaddr dest, Hermes::Vaddr src, size_t length, int m_os->getNic()->shmemGet( calcNetPE(pe), dest, src, length, blocking, [=]() { - this->dbg().debug(CALL_INFO,1,SHMEM_BASE,"\n"); - this->delayReturn( callback ); + this->dbg().debug(CALL_INFO_LAMBDA,"get",1,SHMEM_BASE,"returning\n"); + this->delayReturn( callback, m_blockingReturnLat_ns ); } ); } @@ -463,12 +470,12 @@ void HadesSHMEM::getv2( Hermes::Value& value, Hermes::Vaddr src, int pe, Shmem:: m_os->getNic()->shmemGetv( calcNetPE(pe), src, type, [=]( Hermes::Value& newValue ) { - this->dbg().debug(CALL_INFO,1,SHMEM_BASE,"\n"); + this->dbg().debug(CALL_INFO_LAMBDA,"getv2",1,SHMEM_BASE,"returning\n"); Hermes::Value _value = value; ::memcpy( _value.getPtr(), newValue.getPtr(), _value.getLength() ); - this->delayReturn( callback ); + this->delayReturn( callback, m_blockingReturnLat_ns ); } ); } @@ -506,7 +513,7 @@ void HadesSHMEM::put2(Hermes::Vaddr dest, Hermes::Vaddr src, size_t length, int dbg().debug(CALL_INFO,1,SHMEM_BASE,"\n"); m_os->getNic()->shmemPut( calcNetPE(pe), dest, src, length, blocking, [=]() { - this->dbg().debug(CALL_INFO,1,SHMEM_BASE,"\n"); + this->dbg().debug(CALL_INFO_LAMBDA,"put2",1,SHMEM_BASE,"returning\n"); this->delayReturn( callback ); } ); @@ -528,7 +535,7 @@ void HadesSHMEM::putOp2(Hermes::Vaddr dest, Hermes::Vaddr src, size_t length, in dbg().debug(CALL_INFO,1,SHMEM_BASE,"\n"); m_os->getNic()->shmemPutOp( calcNetPE(pe), dest, src, length, op, dataType, [=]() { - this->dbg().debug(CALL_INFO,1,SHMEM_BASE,"\n"); + this->dbg().debug(CALL_INFO_LAMBDA,"putOp2",1,SHMEM_BASE,"returning\n"); this->delayReturn( callback ); } ); @@ -552,7 +559,7 @@ void HadesSHMEM::putv2(Hermes::Vaddr dest, Hermes::Value& value, int pe, Shmem:: m_os->getNic()->shmemPutv( calcNetPE(pe), dest, value, [=]() { - this->dbg().debug(CALL_INFO,1,SHMEM_BASE,"\n"); + this->dbg().debug(CALL_INFO_LAMBDA,"putv2",1,SHMEM_BASE,"returning\n"); this->delayReturn( callback ); } ); @@ -575,7 +582,7 @@ void HadesSHMEM::wait_until2(Hermes::Vaddr addr, Hermes::Shmem::WaitOp op, Herme m_os->getNic()->shmemWait( addr, op, value, [=]() { - this->dbg().debug(CALL_INFO,1,SHMEM_BASE,"wait_until2() addr=%#" PRIx64 " done\n",addr); + this->dbg().debug(CALL_INFO_LAMBDA,"wait_until2",1,SHMEM_BASE,"addr=%#" PRIx64 " returning\n",addr); this->delayReturn( callback ); } ); @@ -601,12 +608,12 @@ void HadesSHMEM::swap2(Hermes::Value& result, Hermes::Vaddr addr, Hermes::Value& m_os->getNic()->shmemSwap( calcNetPE(pe), addr, value, [=]( Hermes::Value& newValue ) { - this->dbg().debug(CALL_INFO,1,SHMEM_BASE,"\n"); + this->dbg().debug(CALL_INFO_LAMBDA,"swap2",1,SHMEM_BASE,"returning\n"); Hermes::Value _result = result; ::memcpy( _result.getPtr(), newValue.getPtr(), value.getLength() ); - this->delayReturn( callback ); + this->delayReturn( callback, m_blockingReturnLat_ns ); } ); } @@ -634,12 +641,12 @@ void HadesSHMEM::cswap2(Hermes::Value& result, Hermes::Vaddr addr, Hermes::Value m_os->getNic()->shmemCswap( calcNetPE(pe), addr, cond, value, [=]( Hermes::Value& newValue ) { - this->dbg().debug(CALL_INFO,1,SHMEM_BASE,"\n"); + this->dbg().debug(CALL_INFO_LAMBDA,"cswap2",1,SHMEM_BASE,"returning\n"); Hermes::Value _result = result; ::memcpy( _result.getPtr(), newValue.getPtr(), value.getLength() ); - this->delayReturn( callback ); + this->delayReturn( callback, m_blockingReturnLat_ns ); } ); } @@ -662,7 +669,7 @@ void HadesSHMEM::add2( Hermes::Vaddr addr, Hermes::Value& value, int pe, Shmem:: m_os->getNic()->shmemAdd( calcNetPE(pe), addr, value, [=]( ) { - this->dbg().debug(CALL_INFO,1,SHMEM_BASE,"add2() done\n"); + this->dbg().debug(CALL_INFO_LAMBDA,"add2",1,SHMEM_BASE,"returning\n"); this->delayReturn( callback ); } @@ -688,12 +695,12 @@ void HadesSHMEM::fadd2(Hermes::Value& result, Hermes::Vaddr addr, Hermes::Value& m_os->getNic()->shmemFadd( calcNetPE(pe), addr, value, [=]( Hermes::Value& newValue ) { - this->dbg().debug(CALL_INFO,1,SHMEM_BASE,"\n"); + this->dbg().debug(CALL_INFO_LAMBDA,"fadd2",1,SHMEM_BASE,"returning\n"); Hermes::Value _result = result; ::memcpy( _result.getPtr(), newValue.getPtr(), _result.getLength() ); - this->delayReturn( callback ); + this->delayReturn( callback, m_blockingReturnLat_ns ); } ); } diff --git a/src/sst/elements/firefly/hadesSHMEM.h b/src/sst/elements/firefly/hadesSHMEM.h index 5647519607..3a83176ab8 100644 --- a/src/sst/elements/firefly/hadesSHMEM.h +++ b/src/sst/elements/firefly/hadesSHMEM.h @@ -57,6 +57,7 @@ class HadesSHMEM : public Shmem::Interface {"verboseMask","Sets the debug mask",""}, {"enterLat_ns","Sets the latency of entering a SHMEM call","" }, {"returnLat_ns","Sets the latency of returning from a SHMEM call","" }, + {"blockingReturnLat_ns","Sets the latency of returning from a SHMEM call that blocked on response","" }, ) typedef std::function Callback; @@ -266,6 +267,7 @@ class HadesSHMEM : public Shmem::Interface void delayReturn( Shmem::Callback callback, SimTime_t delay = 0 ); void handleToDriver(SST::Event* e) { + dbg().debug(CALL_INFO,1,SHMEM_BASE,"\n"); DelayEvent* event = static_cast(e); if ( DelayEvent::One == event->type ) { event->m_callback1(); @@ -286,6 +288,7 @@ class HadesSHMEM : public Shmem::Interface SimTime_t m_returnLat_ns; SimTime_t m_enterLat_ns; + SimTime_t m_blockingReturnLat_ns; ShmemCommon* m_common; ShmemBarrier* m_barrier; diff --git a/src/sst/elements/firefly/memoryModel/busBridgeUnit.h b/src/sst/elements/firefly/memoryModel/busBridgeUnit.h index feb02b2946..189734a692 100644 --- a/src/sst/elements/firefly/memoryModel/busBridgeUnit.h +++ b/src/sst/elements/firefly/memoryModel/busBridgeUnit.h @@ -44,13 +44,13 @@ class BusBridgeUnit : public Unit { void resume( UnitBase* unit = 0 ) { if ( unit == m_loadWidget ) { - m_dbg.verbosePrefix(prefix(),CALL_INFO,1,BUS_BRIDGE_MASK,"load\n"); + m_dbg.verbosePrefix(prefix(),CALL_INFO,2,BUS_BRIDGE_MASK,"load\n"); if ( m_blocked[0] ) { m_model.schedResume( 0, m_blocked[0] ); m_blocked[0] = NULL; } } else { - m_dbg.verbosePrefix(prefix(),CALL_INFO,1,BUS_BRIDGE_MASK,"store\n" ); + m_dbg.verbosePrefix(prefix(),CALL_INFO,2,BUS_BRIDGE_MASK,"store\n" ); if ( m_blocked[1] ) { m_model.schedResume( 0, m_blocked[1] ); m_blocked[1] = NULL; @@ -71,10 +71,12 @@ class BusBridgeUnit : public Unit { bool write( UnitBase* src, MemReq* req, Callback callback ) { Entry* entry = new Entry( src, req, callback ); entry->qd = m_model.getCurrentSimTimeNano(); - m_dbg.verbosePrefix(prefix(),CALL_INFO,1,BUS_BRIDGE_MASK,"entry=%p addr=%#" PRIx64 " length=%lu\n",entry,req->addr,req->length); src->incPendingWrites(); m_respBus.addReq( entry ); - return src->numPendingWrites() == 10; + bool ret = src->numPendingWrites() == 10; + m_dbg.verbosePrefix(prefix(),CALL_INFO,1,BUS_BRIDGE_MASK,"entry=%p addr=%#" PRIx64 " length=%lu %s %p\n", + entry,req->addr,req->length, ret ? "blocked":"",src); + return ret; } bool store( UnitBase* src, MemReq* req ) { @@ -101,12 +103,12 @@ class BusBridgeUnit : public Unit { void addReq( Entry* req ) { m_pendingReqQ.push_back(req); - m_unit.m_dbg.verbosePrefix(prefix(),CALL_INFO,1,BUS_BRIDGE_MASK,"src=%p %s %#" PRIx64 " q size=%lu\n", + m_unit.m_dbg.verbosePrefix(prefix(),CALL_INFO,2,BUS_BRIDGE_MASK,"src=%p %s %#" PRIx64 " q size=%lu\n", req->src, req->callback?"load":"store", req->addr, m_pendingReqQ.size()); process(); } void addDLL( int bytes ) { - m_unit.m_dbg.verbosePrefix(prefix(),CALL_INFO,1,BUS_BRIDGE_MASK,"q size=%lu\n",m_pendingDLLQ.size()); + m_unit.m_dbg.verbosePrefix(prefix(),CALL_INFO,2,BUS_BRIDGE_MASK,"q size=%lu\n",m_pendingDLLQ.size()); m_pendingDLLQ.push_back(bytes); process(); } @@ -150,7 +152,7 @@ class BusBridgeUnit : public Unit { entry->xmit = m_unit.m_model.getCurrentSimTimeNano(); SimTime_t now = m_unit.m_model.getCurrentSimTimeNano(); - m_unit.m_dbg.verbosePrefix(prefix(),CALL_INFO,1,BUS_BRIDGE_MASK,"entry=%p addr=%#" PRIx64 " length=%lu delay=%" PRIu64 " Time=%" PRIu64 "\n", + m_unit.m_dbg.verbosePrefix(prefix(),CALL_INFO,2,BUS_BRIDGE_MASK,"entry=%p addr=%#" PRIx64 " length=%lu delay=%" PRIu64 " Time=%" PRIu64 "\n", entry,entry->addr, entry->length, delay, now - entry->qd ); m_unit.m_model.schedCallback( delay, std::bind( &Bus::reqArrived, this, entry ) ); } @@ -173,6 +175,7 @@ class BusBridgeUnit : public Unit { } if( 0 == entry->addr ) { if ( entry->src->numPendingWrites() == 10 ) { + m_dbg.verbosePrefix(prefix(),CALL_INFO,2,BUS_BRIDGE_MASK,"unblock src\n"); m_model.schedResume( 0, entry->src ); } entry->src->decPendingWrites(); @@ -185,7 +188,7 @@ class BusBridgeUnit : public Unit { void processReq( Entry* entry ) { SimTime_t now = m_model.getCurrentSimTimeNano(); - m_dbg.verbosePrefix(prefix(),CALL_INFO,1,BUS_BRIDGE_MASK,"entry=%p qdTime=%" PRIu64 " xmitTime=%" PRIu64 "\n", + m_dbg.verbosePrefix(prefix(),CALL_INFO,2,BUS_BRIDGE_MASK,"entry=%p qdTime=%" PRIu64 " xmitTime=%" PRIu64 "\n", entry, entry->xmit - entry->qd, now - entry->xmit); SimTime_t issueTime = m_model.getCurrentSimTimeNano(); @@ -198,7 +201,7 @@ class BusBridgeUnit : public Unit { size_t length = entry->req->length; if ( m_loadWidget->load( this, entry->req, [=]() { - m_dbg.verbosePrefix(prefix(),CALL_INFO,1,BUS_BRIDGE_MASK,"load done entry=%p addr=%#" PRIx64 " length=%lu latency=%" PRIu64 "\n", + m_dbg.verbosePrefix(prefix(),CALL_INFO_LAMBDA,"processReq",1,BUS_BRIDGE_MASK,"load done entry=%p addr=%#" PRIx64 " length=%lu latency=%" PRIu64 "\n", entry, addr, length, m_model.getCurrentSimTimeNano() - now ); m_respBus.addReq( entry ); }) ) @@ -220,17 +223,17 @@ class BusBridgeUnit : public Unit { } if ( resumeSrc ) { - m_dbg.verbosePrefix(prefix(),CALL_INFO,1,BUS_BRIDGE_MASK,"resume %p\n",resumeSrc); + m_dbg.verbosePrefix(prefix(),CALL_INFO,2,BUS_BRIDGE_MASK,"resume %p\n",resumeSrc); m_model.schedResume( 0, resumeSrc ); } else { - m_dbg.verbosePrefix(prefix(),CALL_INFO,1,BUS_BRIDGE_MASK,"blocked\n"); + m_dbg.verbosePrefix(prefix(),CALL_INFO,2,BUS_BRIDGE_MASK,"blocked\n"); } } SimTime_t calcByteDelay( size_t numBytes ) { double delay = (numBytes/(m_numLinks/8))/m_bandwidth_GB; - m_dbg.verbosePrefix(prefix(),CALL_INFO,1,BUS_BRIDGE_MASK,"bytes=%lu delay=%f\n",numBytes, (float) delay ); + m_dbg.verbosePrefix(prefix(),CALL_INFO,2,BUS_BRIDGE_MASK,"bytes=%lu delay=%f\n",numBytes, (float) delay ); return round(delay); } diff --git a/src/sst/elements/firefly/memoryModel/busWidget.h b/src/sst/elements/firefly/memoryModel/busWidget.h index 20f0ce3bf0..bf296b601d 100644 --- a/src/sst/elements/firefly/memoryModel/busWidget.h +++ b/src/sst/elements/firefly/memoryModel/busWidget.h @@ -110,23 +110,23 @@ class BusLoadWidget : public Unit { SimTime_t latency = m_model.getCurrentSimTimeNano() - entry.issueTime; --m_numPending; - m_dbg.verbosePrefix(prefix(),CALL_INFO,1,BUS_WIDGET_MASK,"addr=%#" PRIx64 " complete, latency=%" PRIu64 "\n", + m_dbg.verbosePrefix(prefix(),CALL_INFO_LAMBDA,"process",1,BUS_WIDGET_MASK,"addr=%#" PRIx64 " complete, latency=%" PRIu64 "\n", entry.addr,latency); if ( entry.callback ) { - m_dbg.verbosePrefix(prefix(),CALL_INFO,1,BUS_WIDGET_MASK,"tell src load is complete\n"); + m_dbg.verbosePrefix(prefix(),CALL_INFO_LAMBDA,"process",1,BUS_WIDGET_MASK,"tell src load is complete\n"); m_model.schedCallback( 0, entry.callback ); } if ( m_blockedSrc ) { - m_dbg.verbosePrefix(prefix(),CALL_INFO,1,BUS_WIDGET_MASK,"unblock src\n"); + m_dbg.verbosePrefix(prefix(),CALL_INFO_LAMBDA,"process",1,BUS_WIDGET_MASK,"unblock src\n"); m_model.schedResume( 0, m_blockedSrc, this ); m_blockedSrc = NULL; } - m_dbg.verbosePrefix(prefix(),CALL_INFO,1,BUS_WIDGET_MASK,"%s\n",m_blocked? "blocked" : "not blocked"); + m_dbg.verbosePrefix(prefix(),CALL_INFO_LAMBDA,"process",1,BUS_WIDGET_MASK,"%s\n",m_blocked? "blocked" : "not blocked"); if ( ! m_blocked && ! m_scheduled && ! m_pendingQ.empty() ) { - m_dbg.verbosePrefix(prefix(),CALL_INFO,1,BUS_WIDGET_MASK,"schedule process()\n"); + m_dbg.verbosePrefix(prefix(),CALL_INFO_LAMBDA,"process",1,BUS_WIDGET_MASK,"schedule process()\n"); m_model.schedCallback( 0, std::bind( &BusLoadWidget::process, this ) ); m_scheduled = true; } @@ -136,10 +136,10 @@ class BusLoadWidget : public Unit { } else { callback = [=](){ - m_dbg.verbosePrefix(prefix(),CALL_INFO,1,BUS_WIDGET_MASK,"%s\n",m_blocked? "blocked" : "not blocked"); + m_dbg.verbosePrefix(prefix(),CALL_INFO_LAMBDA,"process",1,BUS_WIDGET_MASK,"%s\n",m_blocked? "blocked" : "not blocked"); if ( ! m_blocked && ! m_scheduled && ! m_pendingQ.empty() ) { - m_dbg.verbosePrefix(prefix(),CALL_INFO,1,BUS_WIDGET_MASK,"schedule process()\n"); + m_dbg.verbosePrefix(prefix(),CALL_INFO_LAMBDA,"process",1,BUS_WIDGET_MASK,"schedule process()\n"); m_model.schedCallback( 0, std::bind( &BusLoadWidget::process, this ) ); m_scheduled = true; } diff --git a/src/sst/elements/firefly/memoryModel/cacheUnit.h b/src/sst/elements/firefly/memoryModel/cacheUnit.h index 836d50963d..601868748c 100644 --- a/src/sst/elements/firefly/memoryModel/cacheUnit.h +++ b/src/sst/elements/firefly/memoryModel/cacheUnit.h @@ -21,7 +21,7 @@ enum Op { Load, Store } op; Entry( Op op, UnitBase* src, MemReq* req, SimTime_t time, Callback callback=NULL ) : op(op), src(src), req(req), time(time), callback(callback) {} - ~Entry() { delete req; } + ~Entry() { if ( req) delete req; } UnitBase* src; Callback callback; MemReq* req; @@ -31,8 +31,8 @@ public: CacheUnit( SimpleMemoryModel& model, Output& dbg, int id, Unit* memory, int cacheSize, int cacheLineSize, int numMSHR, std::string name ) : Unit( model, dbg ), m_memory(memory), m_numPending(0), m_blockedSrc(NULL), m_numMSHR(numMSHR), m_scheduled(false), - m_cacheLineSize(cacheLineSize), m_qSize(m_numMSHR), m_numIssuedLoads(0), m_state( Idle ), m_missEntry(NULL), - m_mshrEntry(NULL), m_cache( cacheSize ), m_hitCnt(0), m_total(0) + m_cacheLineSize(cacheLineSize), m_qSize(numMSHR), m_numIssuedLoads(0), + m_cache( cacheSize ), m_hitCnt(0), m_total(0), m_blockedOnStore(false), m_blockedOnLoad(false) { m_prefix = "@t:" + std::to_string(id) + ":SimpleMemoryModel::" + name + "CacheUnit::@p():@l "; stats = std::to_string(id) + ":SimpleMemoryModel::" + name + "CacheUnit:: "; @@ -40,16 +40,19 @@ assert( m_numMSHR <= cacheSize ); } ~CacheUnit() { +#if 0 if ( m_total ) { m_dbg.output("%s total requests %" PRIu64 " %f percent hits\n", stats.c_str(), m_total, (float)m_hitCnt/(float)m_total); } +#endif } uint64_t m_hitCnt; uint64_t m_total; - enum State { Idle, BlockedStore, BlockedLoad, BlockedMSHR } m_state; + bool m_blockedOnStore; + bool m_blockedOnLoad; int m_numIssuedLoads; int m_numMSHR; int m_qSize; @@ -57,61 +60,51 @@ bool m_scheduled; UnitBase* m_blockedSrc; - Entry* m_missEntry; - Entry* m_mshrEntry; std::deque m_blockedQ; bool store( UnitBase* src, MemReq* req ) { - m_dbg.verbosePrefix(prefix(),CALL_INFO,1,CACHE_MASK,"addr=%#" PRIx64 " length=%lu\n",req->addr,req->length); + //m_dbg.verbosePrefix(prefix(),CALL_INFO,1,CACHE_MASK,"addr=%#" PRIx64 " length=%lu\n",req->addr,req->length); //assert( (req->addr & (m_cacheLineSize - 1) ) == 0 ); return addEntry( new Entry( Entry::Store, src, req, m_model.getCurrentSimTimeNano() ) ); } bool load( UnitBase* src, MemReq* req, Callback callback ) { - m_dbg.verbosePrefix(prefix(),CALL_INFO,1,CACHE_MASK,"addr=%#" PRIx64 " length=%lu\n",req->addr,req->length); + //m_dbg.verbosePrefix(prefix(),CALL_INFO,1,CACHE_MASK,"addr=%#" PRIx64 " length=%lu\n",req->addr,req->length); //assert( (req->addr & (m_cacheLineSize - 1) ) == 0 ); return addEntry( new Entry( Entry::Load, src, req, m_model.getCurrentSimTimeNano(), callback ) ); } void resume( UnitBase* src = NULL ) { - m_dbg.verbosePrefix(prefix(),CALL_INFO,1,CACHE_MASK,"state=%d blocked=%lu\n", m_state, m_blockedQ.size()); - switch (m_state) { - case BlockedStore: - assert( m_missEntry ); - m_state = Idle; - miss2( m_missEntry ); - m_missEntry = NULL; - break; - - case BlockedLoad: - m_state = Idle; - break; - default: - assert(0); - } - - m_dbg.verbosePrefix(prefix(),CALL_INFO,1,CACHE_MASK,"state=%d blocked=%lu\n", m_state, m_blockedQ.size()); + const char* ptr = (const char*) src; + if ( ptr[0] == 'R' ) { + m_dbg.verbosePrefix(prefix(),CALL_INFO,1,CACHE_MASK,"Load\n"); + m_blockedOnLoad = false; + } else if ( ptr[0] == 'W' ) { + m_dbg.verbosePrefix(prefix(),CALL_INFO,1,CACHE_MASK,"Store\n"); + m_blockedOnStore = false; + } else { + assert(0); + } schedule(); } private: bool addEntry( Entry* entry ) { + entry->req->addr = alignAddr( entry->req->addr ); m_dbg.verbosePrefix(prefix(),CALL_INFO,1,CACHE_MASK,"%s addr=%#" PRIx64 " pending=%d\n", entry->op == Entry::Load?"Load":"Store",entry->req->addr,m_numPending); - entry->req->addr = alignAddr( entry->req->addr ); - - ++m_numPending; - - assert( m_numPending <= m_qSize ); + incNumPending(); - m_model.schedCallback( 0, std::bind( &CacheUnit::checkHit, this, entry ) ); + //m_model.schedCallback( 0, std::bind( &CacheUnit::checkHit, this, entry ) ); + checkHit( entry ); + //if ( m_blockedQ.size() == m_qSize ) { if ( m_numPending == m_qSize ) { - m_dbg.verbosePrefix(prefix(),CALL_INFO,1,CACHE_MASK,"pending Q is full\n"); + m_dbg.verbosePrefix(prefix(),CALL_INFO,2,CACHE_MASK,"pending Q is full\n"); m_blockedSrc = entry->src; return true; } else { @@ -120,43 +113,44 @@ } void checkHit( Entry* entry ) { - m_dbg.verbosePrefix(prefix(),CALL_INFO,1,CACHE_MASK,"%s addr=%#" PRIx64 "\n", entry->op == Entry::Load?"Load":"Store",entry->req->addr); + m_dbg.verbosePrefix(prefix(),CALL_INFO,2,CACHE_MASK,"\n"); + if ( checkHit2( entry, false ) ) { + m_dbg.verbosePrefix(prefix(),CALL_INFO,2,CACHE_MASK,"queue blocked %s addr=%#" PRIx64 "\n", entry->op == Entry::Load?"Load":"Store",entry->req->addr); + m_blockedQ.push_back( entry ); + } + } + + bool checkHit2( Entry* entry, bool flag ) { + if ( flag ) { + m_dbg.verbosePrefix(prefix(),CALL_INFO,2,CACHE_MASK,"retry\n"); + } + m_dbg.verbosePrefix(prefix(),CALL_INFO,2,CACHE_MASK,"%s addr=%#" PRIx64 "\n", entry->op == Entry::Load?"Load":"Store",entry->req->addr); + m_scheduled = false; ++m_total; if ( m_cache.isValid( entry->req->addr ) ) { + m_dbg.verbosePrefix(prefix(),CALL_INFO,1,CACHE_MASK,"done, hit %s addr=%#" PRIx64 "\n", + entry->op == Entry::Load?"Load":"Store",entry->req->addr); ++m_hitCnt; hit( entry ); } else if ( isPending(entry->req->addr ) ) { - m_dbg.verbosePrefix(prefix(),CALL_INFO,1,CACHE_MASK,"pending addr=%#" PRIx64 "\n",entry->req->addr); + m_dbg.verbosePrefix(prefix(),CALL_INFO,2,CACHE_MASK,"pending addr=%#" PRIx64 "\n",entry->req->addr); m_pendingMap[entry->req->addr].push_back( entry ); } else { - if ( ! blocked() && m_blockedQ.empty() ) { + if ( ! blocked() && ( m_blockedQ.empty() || flag ) ) { + m_pendingMap[entry->req->addr]; miss( entry ); } else { - m_blockedQ.push_back( entry ); + assert( ! flag ); + --m_total; + return true; } } - } - - void checkHit2( ) { - m_scheduled = false; - assert( ! m_blockedQ.empty() ); - Entry* entry = m_blockedQ.front(); - m_dbg.verbosePrefix(prefix(),CALL_INFO,1,CACHE_MASK,"%s addr=%#" PRIx64 "\n", entry->op == Entry::Load?"Load":"Store",entry->req->addr); - m_blockedQ.pop_front(); - if ( m_cache.isValid( entry->req->addr ) ) { - hit( entry ); - } else if ( isPending(entry->req->addr ) ) { - m_dbg.verbosePrefix(prefix(),CALL_INFO,1,CACHE_MASK,"pending addr=%#" PRIx64 "\n",entry->req->addr); - m_pendingMap[entry->req->addr].push_back( entry ); - } else { - miss( entry ); - } - schedule(); + return false; } void hit( Entry* entry ) { - m_dbg.verbosePrefix(prefix(),CALL_INFO,1,CACHE_MASK,"addr=%#" PRIx64 "\n",entry->req->addr); + //m_dbg.verbosePrefix(prefix(),CALL_INFO,2,CACHE_MASK,"addr=%#" PRIx64 "\n",entry->req->addr); decNumPending(); @@ -167,84 +161,39 @@ delete entry; } - void setState( State state ) { - m_state = state; - } - bool blocked() { - return m_state != Idle; + return m_blockedOnStore || m_blockedOnLoad || m_numIssuedLoads == m_numMSHR; } void miss( Entry* entry ) { - m_dbg.verbosePrefix(prefix(),CALL_INFO,1,CACHE_MASK,"addr=%#" PRIx64 "\n",entry->req->addr); + m_dbg.verbosePrefix(prefix(),CALL_INFO,2,CACHE_MASK,"addr=%#" PRIx64 "\n",entry->req->addr); - if ( ! isPending(entry->req->addr) ) { - Hermes::Vaddr evictAddr = m_cache.evict(); - m_dbg.verbosePrefix(prefix(),CALL_INFO,1,CACHE_MASK,"evict addr=%#" PRIx64 "\n", evictAddr ); - - if ( m_memory->store( this, new MemReq( evictAddr, m_cacheLineSize ) ) ) { - m_missEntry = entry; - setState( BlockedStore ); - return; - } - } - miss2( entry ); + m_dbg.verbosePrefix(prefix(),CALL_INFO,2,CACHE_MASK,"%p\n",entry); + m_blockedOnLoad = m_memory->load( this, entry->req, + std::bind(&CacheUnit::loadDone, this, entry, entry->req->addr, m_model.getCurrentSimTimeNano() ) ); + // Note that the load deletes the request, so the req pointer is no longer valid + entry->req=NULL; + assert( m_numIssuedLoads < m_numMSHR); + ++m_numIssuedLoads; } - void miss2( Entry* entry ) { - - m_dbg.verbosePrefix(prefix(),CALL_INFO,1,CACHE_MASK,"addr=%#" PRIx64 "\n",entry->req->addr); - if ( entry->op == Entry::Store ) { - store( entry ); - } else { - load( entry ); - } - m_dbg.verbosePrefix(prefix(),CALL_INFO,1,CACHE_MASK,"state=%d blocked=%lu\n", m_state, m_blockedQ.size()); - } + void loadDone( Entry* entry, Hermes::Vaddr addr, SimTime_t startTime ) + { + m_dbg.verbosePrefix(prefix(),CALL_INFO,1,CACHE_MASK,"done, op=%s addr=%#" PRIx64 " latency=%" PRIu64 " numIssued=%d\n", + entry->op == Entry::Load ? "load" : "store", + addr,m_model.getCurrentSimTimeNano()-startTime, m_numIssuedLoads); - void store( Entry *entry ) { - m_dbg.verbosePrefix(prefix(),CALL_INFO,1,CACHE_MASK,"addr=%#" PRIx64 "\n",entry->req->addr); - decNumPending(); - if ( ! isPending(entry->req->addr) ) { - m_cache.insert( entry->req->addr ); - } - if ( entry->callback ) { - m_model.schedCallback( 0, entry->callback ); - } - m_dbg.verbosePrefix(prefix(),CALL_INFO,1,CACHE_MASK,"state=%d blocked=%lu\n", m_state, m_blockedQ.size()); - delete entry; - } + Hermes::Vaddr evictAddr = m_cache.evict(); - void load( Entry *entry ) { - m_dbg.verbosePrefix(prefix(),CALL_INFO,1,CACHE_MASK,"addr=%#" PRIx64 " numIssued=%d\n",entry->req->addr, m_numIssuedLoads); + m_blockedOnStore = m_memory->store( this, new MemReq( evictAddr, m_cacheLineSize ) ); - if ( m_numIssuedLoads == m_numMSHR ) { - m_dbg.verbosePrefix(prefix(),CALL_INFO,1,CACHE_MASK,"addr=%#" PRIx64 " blocked MSHR\n",entry->req->addr); - m_mshrEntry = entry; - setState( BlockedMSHR ); - return; - } - ++m_numIssuedLoads; - - m_dbg.verbosePrefix(prefix(),CALL_INFO,1,CACHE_MASK,"addr=%#" PRIx64 " issue load\n",entry->req->addr); - MemReq* req = new MemReq( entry->req->addr, m_cacheLineSize ); - if ( m_memory->load( this, req, std::bind(&CacheUnit::loadDone, this, entry->req->addr, m_model.getCurrentSimTimeNano() ) ) ) { - setState( BlockedLoad ); - }else{ - m_state = Idle; + if ( entry->callback ) { + m_model.schedCallback( 0, entry->callback ); } - - m_pendingMap[entry->req->addr].push_back( entry ); - - m_dbg.verbosePrefix(prefix(),CALL_INFO,1,CACHE_MASK,"state=%d blocked=%lu\n", m_state, m_blockedQ.size()); - } - - void loadDone( Hermes::Vaddr addr, SimTime_t startTime ) - { - m_dbg.verbosePrefix(prefix(),CALL_INFO,1,CACHE_MASK,"addr=%#" PRIx64 " latency=%" PRIu64 " numIssued=%d\n", - addr,m_model.getCurrentSimTimeNano()-startTime, m_numIssuedLoads); + decNumPending(); while ( ! m_pendingMap[addr].empty() ) { + m_dbg.verbosePrefix(prefix(),CALL_INFO,1,CACHE_MASK,"done\n"); decNumPending(); if ( m_pendingMap[addr].front()->callback ) { m_model.schedCallback( 0, m_pendingMap[addr].front()->callback ); @@ -253,34 +202,40 @@ m_pendingMap[addr].pop_front(); } m_pendingMap.erase( addr ); + delete entry; + m_dbg.verbosePrefix(prefix(),CALL_INFO,2,CACHE_MASK,"insert addr=%#" PRIx64 "\n",addr); m_cache.insert( addr ); --m_numIssuedLoads; - if ( BlockedMSHR == m_state ) { - assert( m_mshrEntry ); - m_state = Idle; - load( m_mshrEntry ); - m_mshrEntry = NULL; - } - schedule(); } void schedule() { - if ( ! m_scheduled && Idle == m_state && ! m_blockedQ.empty() ) { - m_model.schedCallback( 0, std::bind( &CacheUnit::checkHit2, this ) ); + m_dbg.verbosePrefix(prefix(),CALL_INFO,2,CACHE_MASK,"scheduled=%d blocked=%d numBlocked=%zu`\n", + m_scheduled, blocked(), m_blockedQ.size()); + if ( ! m_scheduled && ! blocked() && ! m_blockedQ.empty() ) { + Entry* entry = m_blockedQ.front(); + m_blockedQ.pop_front(); + m_dbg.verbosePrefix(prefix(),CALL_INFO,1,CACHE_MASK,"scheduled checkHit\n"); + m_model.schedCallback( 0, std::bind( &CacheUnit::checkHit2, this, entry, true ) ); m_scheduled = true; } } + void incNumPending() { + ++m_numPending; + assert( m_numPending <= m_qSize ); + m_dbg.verbosePrefix(prefix(),CALL_INFO,2,CACHE_MASK,"numPending=%d\n",m_numPending); + } void decNumPending() { assert(m_numPending); --m_numPending; - m_dbg.verbosePrefix(prefix(),CALL_INFO,1,CACHE_MASK,"numPending=%d\n",m_numPending); + assert( m_numPending >=0 ); + m_dbg.verbosePrefix(prefix(),CALL_INFO,2,CACHE_MASK,"numPending=%d\n",m_numPending); if ( m_blockedSrc ) { - m_dbg.verbosePrefix(prefix(),CALL_INFO,1,CACHE_MASK,"unblock src schedResume\n"); + m_dbg.verbosePrefix(prefix(),CALL_INFO,2,CACHE_MASK,"unblock src schedResume\n"); m_model.schedResume( 0, m_blockedSrc ); m_blockedSrc = NULL; } diff --git a/src/sst/elements/firefly/memoryModel/loadUnit.h b/src/sst/elements/firefly/memoryModel/loadUnit.h index ec4f9ae87e..2a0a083d8c 100644 --- a/src/sst/elements/firefly/memoryModel/loadUnit.h +++ b/src/sst/elements/firefly/memoryModel/loadUnit.h @@ -27,6 +27,11 @@ class LoadUnit : public Unit { m_blockedSrc(NULL) , m_numPending(0), m_name(name) { m_prefix = "@t:" + std::to_string(id) + ":SimpleMemoryModel::" + name + "LoadUnit::@p():@l "; + m_dbg.verbosePrefix(prefix(),CALL_INFO,1,LOAD_MASK,"maxPending=%d\n",m_qSize); + } + + void printStatus( Output& out, int id ) { + out.output("NIC %d: %s pending=%d\n",id, m_name.c_str(), m_numPending ); } std::string& name() { return m_name; } @@ -46,7 +51,7 @@ class LoadUnit : public Unit { } if ( m_numPending == m_qSize ) { - m_dbg.verbosePrefix(prefix(),CALL_INFO,1,LOAD_MASK,"blocking src\n"); + m_dbg.verbosePrefix(prefix(),CALL_INFO,2,LOAD_MASK,"blocking src\n"); m_blockedSrc = src; return true; } else { @@ -58,7 +63,7 @@ class LoadUnit : public Unit { void process() { assert( ! m_pendingQ.empty() ); Entry& entry = m_pendingQ.front(); - m_dbg.verbosePrefix(prefix(),CALL_INFO,1,LOAD_MASK,"addr=%#" PRIx64 " length=%lu pending=%lu\n",entry.req->addr,entry.req->length,m_pendingQ.size() ); + m_dbg.verbosePrefix(prefix(),CALL_INFO,3,LOAD_MASK,"addr=%#" PRIx64 " length=%lu pending=%lu\n",entry.req->addr,entry.req->length,m_pendingQ.size() ); assert( m_blocked == false ); m_scheduled = false; @@ -72,22 +77,22 @@ class LoadUnit : public Unit { SimTime_t latency = m_model.getCurrentSimTimeNano() - issueTime; - m_dbg.verbosePrefix(prefix(),CALL_INFO,1,LOAD_MASK,"latency=%" PRIu64 " addr=%#" PRIx64 " length=%lu pending=%lu\n", + m_dbg.verbosePrefix(prefix(),CALL_INFO_LAMBDA,"process",1,LOAD_MASK," complete, latency=%" PRIu64 " addr=%#" PRIx64 " length=%lu pending=%lu\n", latency,addr,length,m_pendingQ.size() ); --m_numPending; if ( entry.callback ) { - m_dbg.verbosePrefix(prefix(),CALL_INFO,1,LOAD_MASK,"tell src load is complete\n"); + m_dbg.verbosePrefix(prefix(),CALL_INFO_LAMBDA,"process",3,LOAD_MASK,"tell src load is complete\n"); m_model.schedCallback( 0, entry.callback ); } if ( m_blockedSrc ) { - m_dbg.verbosePrefix(prefix(),CALL_INFO,1,LOAD_MASK,"unblock src\n"); + m_dbg.verbosePrefix(prefix(),CALL_INFO_LAMBDA,"process",2,LOAD_MASK,"unblock src\n"); m_model.schedResume( 0, m_blockedSrc, this ); m_blockedSrc = NULL; } - m_dbg.verbosePrefix(prefix(),CALL_INFO,1,LOAD_MASK,"%s\n",m_blocked? "blocked" : "not blocked"); + m_dbg.verbosePrefix(prefix(),CALL_INFO_LAMBDA,"process",3,LOAD_MASK,"%s\n",m_blocked? "blocked" : "not blocked"); if ( ! m_blocked && ! m_scheduled && ! m_pendingQ.empty() ) { m_model.schedCallback( 0, std::bind( &LoadUnit::process, this ) ); @@ -95,13 +100,13 @@ class LoadUnit : public Unit { } } ); - m_dbg.verbosePrefix(prefix(),CALL_INFO,1,LOAD_MASK,"%s\n",m_blocked? "blocked" : " not blocked"); + m_dbg.verbosePrefix(prefix(),CALL_INFO,3,LOAD_MASK,"%s\n",m_blocked? "blocked" : " not blocked"); assert( ! m_pendingQ.empty() ); m_pendingQ.pop_front(); } void resume( UnitBase* src = NULL ) { - m_dbg.verbosePrefix(prefix(),CALL_INFO,1,LOAD_MASK,"pending=%lu\n",m_pendingQ.size()); + m_dbg.verbosePrefix(prefix(),CALL_INFO,3,LOAD_MASK,"pending=%lu\n",m_pendingQ.size()); assert( m_blocked == true ); m_blocked = false; diff --git a/src/sst/elements/firefly/memoryModel/memUnit.h b/src/sst/elements/firefly/memoryModel/memUnit.h index 5c1b8d1a33..c70494bd7f 100644 --- a/src/sst/elements/firefly/memoryModel/memUnit.h +++ b/src/sst/elements/firefly/memoryModel/memUnit.h @@ -79,7 +79,7 @@ XXX& xxx = m_blocked.front( ); work( xxx.delay, xxx.op, xxx.memReq, xxx.src, xxx.qTime, xxx.callback ); - m_model.schedResume( 0, xxx.src ); + m_model.schedResume( 0, xxx.src, (UnitBase*) ( xxx.op == Read ? "R" : "W" ) ); m_blocked.pop_front(); } } diff --git a/src/sst/elements/firefly/memoryModel/muxUnit.h b/src/sst/elements/firefly/memoryModel/muxUnit.h index 56a53b2452..e188662018 100644 --- a/src/sst/elements/firefly/memoryModel/muxUnit.h +++ b/src/sst/elements/firefly/memoryModel/muxUnit.h @@ -32,12 +32,15 @@ m_dbg.verbosePrefix(prefix(),CALL_INFO,1,MUX_MASK,"%s addr=%#" PRIx64 " length=%lu\n",src->name().c_str(), req->addr,req->length); if ( ! m_blockedSrc && ! m_scheduled ) { if ( m_unit->store( this, req ) ) { + + m_dbg.verbosePrefix(prefix(),CALL_INFO,1,MUX_MASK,"blocking\n"); m_blockedSrc = src; return true; } else { return false; } } else { + m_dbg.verbosePrefix(prefix(),CALL_INFO,1,MUX_MASK,"blocking\n"); m_blockedQ.push_back( Entry( Entry::Store, src, req ) ); return true; } @@ -49,12 +52,14 @@ if ( ! m_blockedSrc && ! m_scheduled ) { if ( m_unit->load( this, req, callback ) ) { + m_dbg.verbosePrefix(prefix(),CALL_INFO,1,MUX_MASK,"blocking\n"); m_blockedSrc = src; return true; } else { return false; } } else { + m_dbg.verbosePrefix(prefix(),CALL_INFO,1,MUX_MASK,"blocking\n"); m_blockedQ.push_back( Entry( Entry::Load, src, req, callback ) ); return true; } @@ -74,6 +79,7 @@ } if ( ! blocked ) { + m_dbg.verbosePrefix(prefix(),CALL_INFO,1,MUX_MASK,"unblocking\n"); m_model.schedResume( 0, entry.src ); if ( m_blockedQ.size() > 1 ) { m_scheduled = true; @@ -88,9 +94,11 @@ void resume( UnitBase* src = NULL ) { m_dbg.verbosePrefix(prefix(),CALL_INFO,2,MUX_MASK,"\n"); if ( m_blockedSrc ) { + m_dbg.verbosePrefix(prefix(),CALL_INFO,1,MUX_MASK,"unblocking\n"); m_model.schedResume( 0, m_blockedSrc ); m_blockedSrc = NULL; } + m_dbg.verbosePrefix(prefix(),CALL_INFO,2,MUX_MASK,"scheduled=%d numBlocked=%zu\n",m_scheduled, m_blockedQ.size()); if ( !m_scheduled && ! m_blockedQ.empty() ) { processQ(); diff --git a/src/sst/elements/firefly/memoryModel/nWayCache.h b/src/sst/elements/firefly/memoryModel/nWayCache.h index d42f19b239..fb404f93ec 100644 --- a/src/sst/elements/firefly/memoryModel/nWayCache.h +++ b/src/sst/elements/firefly/memoryModel/nWayCache.h @@ -26,7 +26,7 @@ class NWayCache { m_pageShift = calcPow( pageSize ); m_setShift = calcPow( nSets ); - printf("%s():%d m_setMask=%d m_pageShift=%d m_setShift=%d\n",__func__,__LINE__,m_setMask,m_pageShift,m_setShift); + printf("%s():%d m_setMask=%" PRIx64 " m_pageShift=%d m_setShift=%d\n",__func__,__LINE__,m_setMask,m_pageShift,m_setShift); } bool isValid( Hermes::Vaddr addr ) { diff --git a/src/sst/elements/firefly/memoryModel/sharedTlbUnit.h b/src/sst/elements/firefly/memoryModel/sharedTlbUnit.h index 3ca26c67f8..1c94c5b589 100644 --- a/src/sst/elements/firefly/memoryModel/sharedTlbUnit.h +++ b/src/sst/elements/firefly/memoryModel/sharedTlbUnit.h @@ -23,9 +23,11 @@ class SharedTlbUnit : public Unit { Callback callback; }; + std::string m_name; public: SharedTlbUnit( SimpleMemoryModel& model, Output& dbg, int id, std::string name, SharedTlb* tlb, Unit* load, Unit* store, int maxStores, int maxLoads ) : Unit( model, dbg ), + m_name(name), m_tlb(tlb), m_load(load), m_store(store), @@ -44,15 +46,19 @@ class SharedTlbUnit : public Unit { ~SharedTlbUnit() { } + + void printStatus( Output& out, int id ) { + out.output("NIC %d: %s pending=%d %p %p\n",id, m_name.c_str(), m_pendingLookups, m_blockedStoreSrc, m_blockedLoadSrc ); + } + void resume( UnitBase* unit ) { - m_dbg.verbosePrefix(prefix(),CALL_INFO,1,TLB_MASK,"\n"); if ( unit == m_store ) { m_dbg.verbosePrefix(prefix(),CALL_INFO,1,TLB_MASK,"store unblocked\n"); assert( m_storeBlocked ); m_storeBlocked = false; while( ! m_storeBlocked && ! m_readyStores.empty() ) { - m_loadBlocked = passUpLoad( m_readyLoads.front() ); + m_storeBlocked = passUpStore( m_readyStores.front() ); m_readyStores.pop_front(); } } @@ -83,6 +89,7 @@ class SharedTlbUnit : public Unit { if ( blockedStore() ) { m_dbg.verbosePrefix(prefix(),CALL_INFO,1,TLB_MASK,"Blocking source, pid %d, req Addr %#" PRIx64 "\n", req->pid, req->addr ); + assert( ! m_blockedStoreSrc ); m_blockedStoreSrc = src; return true; } else { @@ -104,6 +111,7 @@ class SharedTlbUnit : public Unit { } if ( blockedLoad() ) { m_dbg.verbosePrefix(prefix(),CALL_INFO,1,TLB_MASK,"Blocking source, pid %d, req Addr %#" PRIx64 "\n", req->pid, req->addr ); + assert( ! m_blockedLoadSrc ); m_blockedLoadSrc = src; return true; } else { @@ -118,8 +126,8 @@ class SharedTlbUnit : public Unit { bool retval = m_load->load( this, entry->req, entry->callback ); delete entry; - if ( m_readyStores.size() == m_maxPendingLoads ) { - assert( m_blockedLoadSrc ); + if ( m_blockedLoadSrc && m_readyLoads.size() == m_maxPendingLoads ) { + m_dbg.verbosePrefix(prefix(),CALL_INFO,1,TLB_MASK,"schedule resume\n"); m_model.schedResume( 0, m_blockedLoadSrc ); m_blockedLoadSrc = NULL; } @@ -132,11 +140,12 @@ class SharedTlbUnit : public Unit { bool retval = m_store->storeCB( this, entry->req, entry->callback ); delete entry; - if ( m_readyLoads.size() == m_maxPendingStores ) { - assert( m_blockedStoreSrc ); + if ( m_blockedStoreSrc && m_readyStores.size() == m_maxPendingStores ) { + m_dbg.verbosePrefix(prefix(),CALL_INFO,1,TLB_MASK,"schedule resume\n"); m_model.schedResume( 0, m_blockedStoreSrc ); m_blockedStoreSrc = NULL; } + return retval; } @@ -152,16 +161,32 @@ class SharedTlbUnit : public Unit { return blockedTlb() || m_readyLoads.size() >= m_maxPendingLoads; } - void storeAddrResolved( Callback callback, MemReq* req, uint64_t addr ) { + void checkBlockedSrcs() { + bool flag = true; + if ( m_blockedLoadSrc ) { + if ( m_readyLoads.size() < m_maxPendingLoads ) { + m_dbg.verbosePrefix(prefix(),CALL_INFO,1,TLB_MASK,"schedule resume\n"); + m_model.schedResume( 0, m_blockedLoadSrc ); + m_blockedLoadSrc = NULL; + flag = false; + } + } - m_dbg.verbosePrefix(prefix(),CALL_INFO,1,TLB_MASK,"addr=%#" PRIx64 " -> %#" PRIx64 " pendingLookups=%d\n", - req->addr, addr, m_pendingLookups); - if ( m_blockedStoreSrc ) { - if ( m_readyStores.size() < m_maxPendingStores && blockedTlb() ) { + if ( flag && m_blockedStoreSrc ) { + if ( m_readyStores.size() < m_maxPendingStores ) { + m_dbg.verbosePrefix(prefix(),CALL_INFO,1,TLB_MASK,"schedule resume\n"); m_model.schedResume( 0, m_blockedStoreSrc ); m_blockedStoreSrc = NULL; } } + } + + void storeAddrResolved( Callback callback, MemReq* req, uint64_t addr ) { + + m_dbg.verbosePrefix(prefix(),CALL_INFO,1,TLB_MASK,"addr=%#" PRIx64 " -> %#" PRIx64 " pendingLookups=%d\n", + req->addr, addr, m_pendingLookups); + + checkBlockedSrcs(); assert( m_pendingLookups > 0 ); --m_pendingLookups; @@ -184,12 +209,7 @@ class SharedTlbUnit : public Unit { m_dbg.verbosePrefix(prefix(),CALL_INFO,1,TLB_MASK,"addr=%#" PRIx64 " -> %#" PRIx64 " pendingLookups=%d\n", req->addr, addr, m_pendingLookups); - if ( m_blockedLoadSrc ) { - if ( m_readyLoads.size() < m_maxPendingLoads && blockedTlb() ) { - m_model.schedResume( 0, m_blockedLoadSrc ); - m_blockedLoadSrc = NULL; - } - } + checkBlockedSrcs(); assert( m_pendingLookups > 0 ); --m_pendingLookups; diff --git a/src/sst/elements/firefly/memoryModel/simpleMemoryModel.h b/src/sst/elements/firefly/memoryModel/simpleMemoryModel.h index 13cbe3bde6..7ed397f053 100644 --- a/src/sst/elements/firefly/memoryModel/simpleMemoryModel.h +++ b/src/sst/elements/firefly/memoryModel/simpleMemoryModel.h @@ -72,7 +72,7 @@ class SimpleMemoryModel : SubComponent { enum NIC_Thread { Send, Recv }; SimpleMemoryModel( Component* comp, Params& params, int id, int numCores, int numNicUnits ) : - SubComponent( comp ), m_numNicThreads(numNicUnits) + SubComponent( comp ), m_numNicThreads(numNicUnits), m_hostCacheUnit(NULL) { char buffer[100]; snprintf(buffer,100,"@t:%d:SimpleMemoryModel::@p():@l ",id); @@ -106,14 +106,20 @@ class SimpleMemoryModel : SubComponent { int numWalkers = params.find( "numWalkers", 1 ); int numTlbSlots = params.find( "numTlbSlots", 1 ); int nicToHostMTU = params.find( "nicToHostMTU", 256 ); + bool useHostCache = params.find( "useHostCache", true ); m_memUnit = new MemUnit( *this, m_dbg, id, memReadLat_ns, memWriteLat_ns, memNumSlots ); - m_hostCacheUnit = new CacheUnit( *this, m_dbg, id, m_memUnit, hostCacheUnitSize, hostCacheLineSize, hostCacheNumMSHR, "Host" ); - m_muxUnit = new MuxUnit( *this, m_dbg, id, m_hostCacheUnit, "HostCache" ); + if ( useHostCache ) { + m_hostCacheUnit = new CacheUnit( *this, m_dbg, id, m_memUnit, hostCacheUnitSize, hostCacheLineSize, hostCacheNumMSHR, "Host" ); + m_muxUnit = new MuxUnit( *this, m_dbg, id, m_hostCacheUnit, "HostCache" ); + } else { + m_muxUnit = new MuxUnit( *this, m_dbg, id, m_memUnit, "HostCache" ); + } m_busBridgeUnit = new BusBridgeUnit( *this, m_dbg, id, m_muxUnit, busBandwidth, busNumLinks, busLatency, TLP_overhead, DLL_bytes, hostCacheLineSize, widgetSlots ); + MuxUnit* muxUnit = new MuxUnit( *this, m_dbg, id, m_busBridgeUnit, "Nic" ); m_sharedTlb = new SharedTlb( *this, m_dbg, id, tlbSize, tlbPageSize, tlbMissLat_ns, numWalkers ); @@ -130,26 +136,14 @@ class SimpleMemoryModel : SubComponent { SharedTlbUnit* tlb = new SharedTlbUnit( *this, m_dbg, id, unitName.str().c_str(), m_sharedTlb, new LoadUnit( *this, m_dbg, id, - m_busBridgeUnit, + muxUnit, nicNumLoadSlots, unitName.str().c_str() ), new StoreUnit( *this, m_dbg, id, - m_busBridgeUnit, + muxUnit, nicNumStoreSlots, unitName.str().c_str() ), numTlbSlots, numTlbSlots ); -#if 0 - Tlb* tlb = new Tlb( *this, m_dbg, id, unitName.str().c_str(), - new LoadUnit( *this, m_dbg, id, - m_busBridgeUnit, - nicNumLoadSlots, unitName.str().c_str() ), - - new StoreUnit( *this, m_dbg, id, - m_busBridgeUnit, - nicNumStoreSlots, unitName.str().c_str() ), - tlbSize, tlbPageSize, tlbMissLat_ns, numWalkers, numTlbSlots, numTlbSlots - ); -#endif m_threads.push_back( new Thread( *this, unitName.str(), m_dbg, id, nicToHostMTU, tlb, tlb ) @@ -177,7 +171,9 @@ class SimpleMemoryModel : SubComponent { virtual ~SimpleMemoryModel() { m_sharedTlb->printStats(); - delete m_hostCacheUnit; + if ( m_hostCacheUnit ) { + delete m_hostCacheUnit; + } for ( unsigned i = 0; i < m_threads.size(); i++ ) { delete m_threads[i]; } @@ -242,6 +238,13 @@ class SimpleMemoryModel : SubComponent { NicUnit& nicUnit() { return *m_nicUnit; } BusBridgeUnit& busUnit() { return *m_busBridgeUnit; } + void printStatus( Output& out, int id ) { + for ( unsigned i = 0; i < m_threads.size(); i++ ) { + m_threads[i]->printStatus( out, id ); + } + } + + private: Link* m_selfLink; diff --git a/src/sst/elements/firefly/memoryModel/storeUnit.h b/src/sst/elements/firefly/memoryModel/storeUnit.h index b3930ce2cb..0562ad97a8 100644 --- a/src/sst/elements/firefly/memoryModel/storeUnit.h +++ b/src/sst/elements/firefly/memoryModel/storeUnit.h @@ -21,7 +21,12 @@ class StoreUnit : public Unit { m_prefix = "@t:" + std::to_string(id) + ":SimpleMemoryModel::"+ name + "StoreUnit::@p():@l "; } + std::string& name() { return m_name; } + + void printStatus( Output& out, int id ) { + out.output("NIC %d: %s pending=%zu\n",id, m_name.c_str(), m_pendingQ.size() ); + } bool storeCB( UnitBase* src, MemReq* req, Callback callback = NULL ) { diff --git a/src/sst/elements/firefly/memoryModel/thread.h b/src/sst/elements/firefly/memoryModel/thread.h index fe56632f75..21065c2d2b 100644 --- a/src/sst/elements/firefly/memoryModel/thread.h +++ b/src/sst/elements/firefly/memoryModel/thread.h @@ -75,7 +75,7 @@ class Work { void print( Output& dbg, const char* prefix ) { for ( unsigned i = 0; i < m_ops->size(); i++ ) { - dbg.verbosePrefix(prefix,CALL_INFO,1,THREAD_MASK,"%s %#" PRIx64 " %lu\n",(*m_ops)[i].getName(), (*m_ops)[i].addr, (*m_ops)[i].length ); + dbg.verbosePrefix(prefix,CALL_INFO,2,THREAD_MASK,"%s %#" PRIx64 " %lu\n",(*m_ops)[i].getName(), (*m_ops)[i].addr, (*m_ops)[i].length ); } } int m_workNum; @@ -91,9 +91,11 @@ class Work { class Thread : public UnitBase { + std::string m_name; + public: Thread( SimpleMemoryModel& model, std::string name, Output& output, int id, int accessSize, Unit* load, Unit* store ) : - m_model(model), m_dbg(output), m_loadUnit(load), m_storeUnit(store), + m_model(model), m_name(name), m_dbg(output), m_id(id), m_loadUnit(load), m_storeUnit(store), m_maxAccessSize( accessSize ), m_nextOp(NULL), m_waitingOnOp(NULL), m_blocked(false), m_curWorkNum(0),m_lastDelete(0) { m_prefix = "@t:" + std::to_string(id) + ":SimpleMemoryModel::" + name +"::@p():@l "; @@ -107,6 +109,23 @@ class Thread : public UnitBase { } } + void printStatus( Output& out, int id ) { + out.output( "NIC %d: %s cur=%d last=%d blocked=%d %p %p\n",id, m_name.c_str(), m_curWorkNum, m_lastDelete, m_blocked, m_nextOp, m_waitingOnOp ); + if ( m_workQ.size() ) { + out.output( "NIC %d: %s work.size=%zu\n", id, m_name.c_str(), m_workQ.size() ); + std::deque::iterator iter = m_workQ.begin(); + + for ( ; iter != m_workQ.end(); ++iter) { + (*iter)->print(out,""); + } + } + if ( m_OOOwork.size() ) { + out.output( "NIC %d: %s OOOwork.size: %zu \n", id, m_name.c_str(), m_OOOwork.size() ); + } + m_loadUnit->printStatus( out, id ); + m_storeUnit->printStatus( out, id ); + } + bool isIdle() { return !m_workQ.size(); } @@ -191,12 +210,14 @@ class Thread : public UnitBase { case MemOp::HostStore: case MemOp::BusStore: case MemOp::BusDmaToHost: + addr |= (uint64_t) pid << 56; m_blocked = m_storeUnit->storeCB( this, new MemReq( addr, length, pid ), callback ); break; case MemOp::HostLoad: case MemOp::BusLoad: case MemOp::BusDmaFromHost: + addr |= (uint64_t) pid << 56; m_blocked = m_loadUnit->load( this, new MemReq( addr, length, pid ), callback ); break; @@ -253,10 +274,10 @@ class Thread : public UnitBase { delete work; ++m_lastDelete; while ( ! m_OOOwork.empty() ) { - m_dbg.verbosePrefix(prefix(),CALL_INFO,1,THREAD_MASK,"check OOO, looking for %d\n",m_lastDelete); + m_dbg.verbosePrefix(prefix(),CALL_INFO,2,THREAD_MASK,"check OOO, looking for %d\n",m_lastDelete); if ( m_OOOwork.find( m_lastDelete ) != m_OOOwork.end() ) { work = m_OOOwork[ m_lastDelete ]; - m_dbg.verbosePrefix(prefix(),CALL_INFO,1,THREAD_MASK,"retire OOO work %p\n",m_OOOwork[m_lastDelete]); + m_dbg.verbosePrefix(prefix(),CALL_INFO,1,THREAD_MASK,"delete OOO work %p\n",m_OOOwork[m_lastDelete]); while ( ! work->m_pendingCallbacks.empty() ) { work->m_pendingCallbacks.front()(); work->m_pendingCallbacks.pop_front(); @@ -304,6 +325,7 @@ class Thread : public UnitBase { int m_maxAccessSize; int m_curWorkNum; int m_lastDelete; + int m_id; std::map m_OOOwork; }; diff --git a/src/sst/elements/firefly/memoryModel/unit.h b/src/sst/elements/firefly/memoryModel/unit.h index 9553d0fd46..498b1f51d4 100644 --- a/src/sst/elements/firefly/memoryModel/unit.h +++ b/src/sst/elements/firefly/memoryModel/unit.h @@ -42,6 +42,7 @@ int numPendingWrites() { return m_pendingWrite; } + virtual void printStatus( Output&, int ) {assert(0);} private: int m_pendingWrite; diff --git a/src/sst/elements/firefly/nic.cc b/src/sst/elements/firefly/nic.cc index 1a7da63e14..4b3bcec741 100644 --- a/src/sst/elements/firefly/nic.cc +++ b/src/sst/elements/firefly/nic.cc @@ -40,7 +40,8 @@ Nic::Nic(ComponentId_t id, Params ¶ms) : m_getKey(10), m_simpleMemoryModel(NULL), m_respKey(1), - m_curNetworkSrc(-1) + m_curNetworkSrc(-1), + m_sentPkts(0) { m_myNodeId = params.find("nid", -1); assert( m_myNodeId != -1 ); @@ -69,6 +70,7 @@ Nic::Nic(ComponentId_t id, Params ¶ms) : m_tracedNode = params.find( "tracedNode", -1 ); m_tracedPkt = params.find( "tracedPkt", -1 ); + SimTime_t shmemSendSetupLat = params.find( "shmemSendSetupLat", 100 ) ; int numShmemCmdSlots = params.find( "numShmemCmdSlots", 32 ); int maxSendMachineQsize = params.find( "maxSendMachineQsize", 1 ); int maxRecvMachineQsize = params.find( "maxRecvMachineQsize", 1 ); @@ -79,10 +81,11 @@ Nic::Nic(ComponentId_t id, Params ¶ms) : } ++numSendMachines; + int numRecvNicUnits = params.find( "numRecvNicUnits", 1 ); m_unitPool = new UnitPool( m_dbg, params.find( "nicAllocationPolicy", "RoundRobin" ), - params.find( "numRecvNicUnits", 1 ), + numRecvNicUnits, numSendMachines, 1, m_num_vNics @@ -99,7 +102,7 @@ Nic::Nic(ComponentId_t id, Params ¶ms) : assert(0); } - int minPktPayload = 64; + int minPktPayload = 32; assert( ( packetSizeInBytes - packetOverhead ) >= minPktPayload ); UnitAlgebra input_buf_size = params.find("input_buf_size" ); @@ -157,18 +160,19 @@ Nic::Nic(ComponentId_t id, Params ¶ms) : m_sendEntryQ[i].first = false; } - m_shmem = new Shmem( *this, m_myNodeId, m_num_vNics, m_dbg, numShmemCmdSlots, getDelay_ns(), getDelay_ns() ); + m_shmem = new Shmem( *this, m_myNodeId, m_num_vNics, m_dbg, numShmemCmdSlots, getDelay_ns(), getDelay_ns(), shmemSendSetupLat ); if ( params.find( "useSimpleMemoryModel", 0 ) ) { Params smmParams = params.find_prefix_params( "simpleMemoryModel." ); smmParams.insert( "busLatency", std::to_string(m_nic2host_lat_ns), false ); m_simpleMemoryModel = new SimpleMemoryModel( this, smmParams, m_myNodeId, m_num_vNics, m_unitPool->getTotal() ); } - + m_recvMachine = new RecvMachine( *this, 0, m_vNicV.size(), m_myNodeId, params.find("verboseLevel",0), params.find("verboseMask",-1), - rxMatchDelay, hostReadDelay, maxRecvMachineQsize ); + rxMatchDelay, hostReadDelay, maxRecvMachineQsize, + params.find<>( "maxActiveRecvStreams", 1024*1024) ); m_sendMachineV.resize(numSendMachines); for ( int i = 0; i < numSendMachines - 1; i++ ) { @@ -230,10 +234,14 @@ Nic::~Nic() delete m_shmem; delete m_linkControl; + int numRcvd = m_recvMachine->getNumReceived(); + int numSent=0; delete m_recvMachine; for ( int i = 0; i < m_sendMachineV.size(); i++ ) { + numSent += m_sendMachineV[i]->getNumSent(); delete m_sendMachineV[i]; } + m_dbg.debug(CALL_INFO,1,1," finish numSent=%d numRcvd=%d\n",numSent,numRcvd); if ( m_recvNotifyFunctor ) delete m_recvNotifyFunctor; if ( m_sendNotifyFunctor ) delete m_sendNotifyFunctor; @@ -415,7 +423,7 @@ void Nic::regMemRgn( NicCmdEvent *e, int vNicNum ) void Nic::qSendEntry( SendEntryBase* entry ) { - m_dbg.debug(CALL_INFO,1,NIC_DBG_SEND_MACHINE, "myPid=%d destNode=%d destPid=%d size=%zu %s\n", + m_dbg.debug(CALL_INFO,2,NIC_DBG_SEND_MACHINE, "myPid=%d destNode=%d destPid=%d size=%zu %s\n", entry->local_vNic(), entry->dest(), entry->dst_vNic(), entry->totalBytes(), entry->isCtrl() ? "Ctrl" : entry->isAck() ? "Ack" : "Std"); @@ -443,7 +451,7 @@ void Nic::qSendEntry( SendEntryBase* entry ) { void Nic::notifySendDone( SendMachine* mach, SendEntryBase* entry ) { int pid = entry->local_vNic(); - m_dbg.debug(CALL_INFO,1,NIC_DBG_SEND_MACHINE,"machine=%d pid=%d\n", mach->getId(), pid ); + m_dbg.debug(CALL_INFO,2,NIC_DBG_SEND_MACHINE,"machine=%d pid=%d\n", mach->getId(), pid ); m_sendEntryQ[pid].first = false; @@ -452,7 +460,7 @@ void Nic::notifySendDone( SendMachine* mach, SendEntryBase* entry ) { for ( unsigned i = 0; i < m_sendEntryQ.size(); i++ ) { - m_dbg.debug(CALL_INFO,1,NIC_DBG_SEND_MACHINE,"check pid=%d size=%zu\n", i, m_sendEntryQ[i].second.size( )); + m_dbg.debug(CALL_INFO,2,NIC_DBG_SEND_MACHINE,"check pid=%d size=%zu\n", i, m_sendEntryQ[i].second.size( )); if ( ! m_sendEntryQ[i].first && ! m_sendEntryQ[i].second.empty() && m_sendEntryQ[i].second.front( ).first < cur ) { @@ -461,12 +469,12 @@ void Nic::notifySendDone( SendMachine* mach, SendEntryBase* entry ) { } if ( next > -1 ) { - m_dbg.debug(CALL_INFO,1,NIC_DBG_SEND_MACHINE,"run pid=%d \n", next); + m_dbg.debug(CALL_INFO,2,NIC_DBG_SEND_MACHINE,"run pid=%d \n", next); m_sendEntryQ[next].first = false; mach->run(m_sendEntryQ[ next ].second.front().second ); m_sendEntryQ[ next ].second.pop_front(); } else { - m_sendMachineQ.push_back(mach); + m_sendMachineQ.push_back(mach); } } @@ -514,6 +522,7 @@ void Nic::sendPkt( std::pair< FireflyNetworkEvent*, int>& entry, int vc ) FireflyNetworkEvent* ev = entry.first; assert( ev->bufSize() ); + ++m_sentPkts; SimpleNetwork::Request* req = new SimpleNetwork::Request(); req->dest = IdToNet( entry.second ); req->src = IdToNet( m_myNodeId ); @@ -528,8 +537,10 @@ void Nic::sendPkt( std::pair< FireflyNetworkEvent*, int>& entry, int vc ) ++m_packetId; } m_dbg.debug(CALL_INFO,3,NIC_DBG_SEND_NETWORK, - "dst=%" PRIu64 " sending event with %zu bytes packetId=%" PRIu64 "\n",req->dest, - ev->bufSize(), (uint64_t)m_packetId); + "dst=%" PRIu64 " sending event with %zu bytes packetId=%" PRIu64 " %s %s\n",req->dest, + ev->bufSize(), (uint64_t)m_packetId, + ev->isHdr() ? "Hdr":"", + ev->isTail() ? "Tail":"" ); bool sent = m_linkControl->send( req, vc ); assert( sent ); } diff --git a/src/sst/elements/firefly/nic.h b/src/sst/elements/firefly/nic.h index dd7f2e9a75..a3ed5daab3 100644 --- a/src/sst/elements/firefly/nic.h +++ b/src/sst/elements/firefly/nic.h @@ -282,7 +282,15 @@ class Nic : public SST::Component { void init( unsigned int phase ); int getNodeId() { return m_myNodeId; } int getNum_vNics() { return m_num_vNics; } - void printStatus(Output &out) {} + void printStatus(Output &out) { + out.output("NIC %d: start time=%zu\n", m_myNodeId, (size_t) getCurrentSimTimeNano() ); + out.output("NIC %d: Received packets: %d\n", m_myNodeId, m_recvMachine->getNumReceivedPkts()); + out.output("NIC %d: Sent packets: %d\n", m_myNodeId, m_sentPkts); + m_simpleMemoryModel->printStatus( out, m_myNodeId ); + out.output("NIC %d: done\n", m_myNodeId ); + } + + int m_sentPkts; void detailedMemOp( Thornhill::DetailedCompute* detailed, std::vector& vec, std::string op, Callback callback ); diff --git a/src/sst/elements/firefly/nicEvents.h b/src/sst/elements/firefly/nicEvents.h index 83aea915a5..4a1723c654 100644 --- a/src/sst/elements/firefly/nicEvents.h +++ b/src/sst/elements/firefly/nicEvents.h @@ -54,6 +54,7 @@ class NicCmdBaseEvent : public Event { NicCmdBaseEvent( Type type ) : Event(), base_type(type) {} + virtual int getPid() { return -1; } NotSerializable(NicCmdBaseEvent) }; @@ -131,6 +132,7 @@ class NicShmemSendCmdEvent : public NicShmemCmdEvent { NicShmemCmdEvent( type ), vnic(vnic), node(node) {} virtual ~NicShmemSendCmdEvent() {} + virtual int getPid() { return vnic; } int getVnic() { return vnic; } int getNode() { return node; } virtual Hermes::Vaddr getMyAddr() { assert(0); } @@ -357,7 +359,7 @@ class NicShmemOpCmdEvent : public NicShmemCmdEvent { NicShmemCmdEvent( Wait ), addr(addr), op(op), value(value), callback(callback) {} Hermes::Vaddr getAddr() { return addr; } - virtual int getNode() { return -2; } + virtual int getNode() { return -1; } Hermes::Shmem::WaitOp op; Hermes::Vaddr addr; diff --git a/src/sst/elements/firefly/nicMsgStream.h b/src/sst/elements/firefly/nicMsgStream.h index a77e8b0855..26056647d7 100644 --- a/src/sst/elements/firefly/nicMsgStream.h +++ b/src/sst/elements/firefly/nicMsgStream.h @@ -16,6 +16,9 @@ class MsgStream : public StreamBase { public: MsgStream( Output&, Ctx*, int srcNode, int srcPid, int destPid, FireflyNetworkEvent* ); + ~MsgStream() { + m_dbg.debug(CALL_INFO,1,NIC_DBG_RECV_STREAM,"\n"); + } bool isBlocked() { return m_recvEntry == NULL || m_blocked; } diff --git a/src/sst/elements/firefly/nicRecvCtx.cc b/src/sst/elements/firefly/nicRecvCtx.cc index 228c04f6f6..576ec70258 100644 --- a/src/sst/elements/firefly/nicRecvCtx.cc +++ b/src/sst/elements/firefly/nicRecvCtx.cc @@ -47,10 +47,10 @@ bool Nic::RecvMachine::Ctx::processStdPkt( FireflyNetworkEvent* ev ) { StreamBase* stream; if ( ev->isHdr() ) { - assert ( m_streamMap.find(srcKey) == m_streamMap.end() ); stream = newStream( ev ); - m_dbg.verbosePrefix(prefix(),CALL_INFO,1,NIC_DBG_RECV_CTX,"new stream %p %s\n",stream, ev->isTail()? "single packet stream":""); + m_dbg.verbosePrefix(prefix(),CALL_INFO,2,NIC_DBG_RECV_CTX,"new stream %p %s\n",stream, ev->isTail()? "single packet stream":""); + assert ( m_streamMap.find(srcKey) == m_streamMap.end() ); if ( ! ev->isTail() ) { m_streamMap[srcKey] = stream; @@ -62,16 +62,18 @@ bool Nic::RecvMachine::Ctx::processStdPkt( FireflyNetworkEvent* ev ) { stream = m_streamMap[srcKey]; if ( ev->isTail() ) { - m_dbg.verbosePrefix(prefix(),CALL_INFO,1,NIC_DBG_RECV_CTX,"tail packet %p\n",stream ); + m_dbg.verbosePrefix(prefix(),CALL_INFO,2,NIC_DBG_RECV_CTX,"tail packet %p\n",stream ); m_streamMap.erase(srcKey); + } else { + m_dbg.verbosePrefix(prefix(),CALL_INFO,2,NIC_DBG_RECV_CTX,"body packet %p\n",stream ); } } if ( stream->isBlocked() ) { - m_dbg.verbosePrefix(prefix(),CALL_INFO,1,NIC_DBG_RECV_CTX,"stream is blocked %p\n",stream ); + m_dbg.verbosePrefix(prefix(),CALL_INFO,2,NIC_DBG_RECV_CTX,"stream is blocked %p\n",stream ); stream->setWakeup( [=]() { - m_dbg.verbosePrefix(prefix(),CALL_INFO_LAMBDA,"processStdPkt",1,NIC_DBG_RECV_CTX,"stream is unblocked %p\n",stream ); + m_dbg.verbosePrefix(prefix(),CALL_INFO_LAMBDA,"processStdPkt",2,NIC_DBG_RECV_CTX,"stream is unblocked %p\n",stream ); stream->processPkt( ev ); m_rm.checkNetworkForData(); } diff --git a/src/sst/elements/firefly/nicRecvCtx.h b/src/sst/elements/firefly/nicRecvCtx.h index 607470ccf3..a576617c49 100644 --- a/src/sst/elements/firefly/nicRecvCtx.h +++ b/src/sst/elements/firefly/nicRecvCtx.h @@ -72,7 +72,7 @@ } void checkWaitOps( int core, Hermes::Vaddr addr, size_t length ) { - m_dbg.verbosePrefix(prefix(),CALL_INFO,1,NIC_DBG_RECV_CTX,"\n"); + m_dbg.verbosePrefix(prefix(),CALL_INFO,2,NIC_DBG_RECV_CTX,"\n"); m_rm.m_nic.m_shmem->checkWaitOps( core, addr, length ); } @@ -94,7 +94,9 @@ void deleteStream( StreamBase* stream ) { m_rm.nic().schedCallback( [=]() { - m_dbg.verbosePrefix(prefix(),CALL_INFO,1,NIC_DBG_RECV_CTX,"deleteStream( %p )\n",stream); + + m_dbg.verbosePrefix(prefix(),CALL_INFO_LAMBDA,"deleteStream",1,NIC_DBG_RECV_CTX,"%p\n",stream); + m_rm.decActiveStream(); delete stream; } ); diff --git a/src/sst/elements/firefly/nicRecvMachine.h b/src/sst/elements/firefly/nicRecvMachine.h index f5a112b334..c88189df58 100644 --- a/src/sst/elements/firefly/nicRecvMachine.h +++ b/src/sst/elements/firefly/nicRecvMachine.h @@ -33,12 +33,17 @@ class RecvMachine { RecvMachine( Nic& nic, int vc, int numVnics, int nodeId, int verboseLevel, int verboseMask, - int rxMatchDelay, int hostReadDelay, int maxQsize ) : + int rxMatchDelay, int hostReadDelay, int maxQsize, int maxActiveStreams ) : m_nic(nic), m_vc(vc), m_rxMatchDelay( rxMatchDelay ), m_hostReadDelay( hostReadDelay ), - m_notifyCallback( false ) + m_notifyCallback( false ), + m_numActiveStreams( 0 ), + m_maxActiveStreams( maxActiveStreams ), + m_blockedPkt(NULL), + m_receivedPkts(0), + m_numMsgRcvd(0) { char buffer[100]; snprintf(buffer,100,"@t:%d:Nic::RecvMachine::@p():@l vc=%d ",nodeId,m_vc); @@ -50,7 +55,8 @@ class RecvMachine { } } - virtual ~RecvMachine(){} + int getNumReceived() { return m_numMsgRcvd; } + virtual ~RecvMachine(){ } void regMemRgn( int pid, int rgnNum, MemRgnEntry* entry ) { m_ctxMap[pid]->regMemRgn( rgnNum, entry ); @@ -65,6 +71,19 @@ class RecvMachine { Nic& nic() { return m_nic; } void printStatus( Output& out ); + void decActiveStream() { + --m_numActiveStreams; + assert( m_numActiveStreams >= 0 ); + + if ( m_blockedPkt ) { + m_dbg.debug(CALL_INFO,2,NIC_DBG_RECV_MACHINE,"unblocked\n"); + assert( m_blockedPkt ); + processPkt2( m_blockedPkt ); + m_blockedPkt = NULL; + } + } + int getNumReceivedPkts() { return m_receivedPkts; } + protected: Nic& m_nic; Output m_dbg; @@ -74,10 +93,10 @@ class RecvMachine { void checkNetworkForData() { FireflyNetworkEvent* ev = getNetworkEvent( m_vc ); if ( ev ) { - m_dbg.debug(CALL_INFO,2,NIC_DBG_RECV_MACHINE,"packet available\n"); - m_nic.schedCallback( std::bind( &Nic::RecvMachine::processPkt, this, ev ), 0); + m_dbg.debug(CALL_INFO,1,NIC_DBG_RECV_MACHINE,"packet available\n"); + m_nic.schedCallback( std::bind( &Nic::RecvMachine::processPkt, this, ev )); } else { - m_dbg.debug(CALL_INFO,2,NIC_DBG_RECV_MACHINE,"network idle\n"); + m_dbg.debug(CALL_INFO,1,NIC_DBG_RECV_MACHINE,"network idle\n"); setNotify(); } } @@ -94,7 +113,7 @@ class RecvMachine { } void processNetworkData() { - m_dbg.debug(CALL_INFO,2,NIC_DBG_RECV_MACHINE, "\n"); + m_dbg.debug(CALL_INFO,1,NIC_DBG_RECV_MACHINE, "\n"); // this notifier was called by the LinkControl object, the RecvMachine may // re-install the LinkControl notifier, if it does there would be a cycle @@ -103,18 +122,38 @@ class RecvMachine { m_notifyCallback = false; } + + int m_maxActiveStreams; + int m_numActiveStreams; + FireflyNetworkEvent* m_blockedPkt; virtual void processPkt( FireflyNetworkEvent* ev ) { + + if ( ev->isHdr() ) { + ++m_numActiveStreams; + } + + if ( m_numActiveStreams == m_maxActiveStreams + 1) { + m_dbg.debug(CALL_INFO,1,NIC_DBG_RECV_MACHINE,"blocked on available streams\n"); + assert( ! m_blockedPkt ); + m_blockedPkt = ev; + } else { + processPkt2( ev ); + } + } + + virtual void processPkt2( FireflyNetworkEvent* ev ) { // if the event was consumed, we can can check for the next if ( ! m_ctxMap[ ev->getDestPid() ]->processPkt( ev ) ) { checkNetworkForData(); } else { - m_dbg.debug(CALL_INFO,2,NIC_DBG_RECV_MACHINE,"blocked\n"); + m_dbg.debug(CALL_INFO,2,NIC_DBG_RECV_MACHINE,"blocked by context\n"); } } FireflyNetworkEvent* getNetworkEvent(int vc ) { SST::Interfaces::SimpleNetwork::Request* req = m_nic.m_linkControl->recv(vc); + ++m_receivedPkts; if ( req ) { Event* payload = req->takePayload(); if ( NULL == payload ) return NULL; @@ -122,12 +161,17 @@ class RecvMachine { static_cast(payload); event->setSrcNode( m_nic.NetToId( req->src ) ); delete req; + if ( ! event->isCtrl() && event->isHdr() ) { + ++m_numMsgRcvd; + } return event; } else { return NULL; } } + int m_numMsgRcvd; + int m_receivedPkts; int m_vc; int m_rxMatchDelay; bool m_notifyCallback; diff --git a/src/sst/elements/firefly/nicRecvStream.cc b/src/sst/elements/firefly/nicRecvStream.cc index dc1dfa0d4d..4d60163a48 100644 --- a/src/sst/elements/firefly/nicRecvStream.cc +++ b/src/sst/elements/firefly/nicRecvStream.cc @@ -37,6 +37,8 @@ Nic::RecvMachine::StreamBase::~StreamBase() { delete m_recvEntry; } if ( m_sendEntry ) { + m_dbg.verbosePrefix(prefix(),CALL_INFO,1,NIC_DBG_RECV_STREAM,"core=%d targetNode=%d targetCore=%d\n", + getMyPid(), m_sendEntry->dest(), m_sendEntry->dst_vNic() ); m_ctx->nic().qSendEntry( m_sendEntry ); } } diff --git a/src/sst/elements/firefly/nicSendMachine.cc b/src/sst/elements/firefly/nicSendMachine.cc index 5594184ff2..584a729e14 100644 --- a/src/sst/elements/firefly/nicSendMachine.cc +++ b/src/sst/elements/firefly/nicSendMachine.cc @@ -72,7 +72,8 @@ void Nic::SendMachine::getPayload( SendEntryBase* entry, FireflyNetworkEvent* ev } void Nic::SendMachine::streamFini( SendEntryBase* entry ) { - m_dbg.debug(CALL_INFO,1,NIC_DBG_SEND_MACHINE, "%p pid=%d\n",entry,m_id); + m_dbg.debug(CALL_INFO,1,NIC_DBG_SEND_MACHINE, "%p sendMachine=%d pid=%d\n",entry,m_id, entry->local_vNic()); + ++m_numSent; if ( m_I_manage ) { m_sendQ.pop_front(); if ( ! m_sendQ.empty() ) { @@ -84,7 +85,7 @@ void Nic::SendMachine::streamFini( SendEntryBase* entry ) } if ( entry->shouldDelete() ) { - m_dbg.debug(CALL_INFO,1,NIC_DBG_SEND_MACHINE, "%p delete SendEntry entry, pid=%d\n",entry, entry->local_vNic()); + m_dbg.debug(CALL_INFO,2,NIC_DBG_SEND_MACHINE, "%p delete SendEntry entry, pid=%d\n",entry, entry->local_vNic()); delete entry; } } @@ -123,10 +124,8 @@ void Nic::SendMachine::InQ::ready2( FireflyNetworkEvent* ev, int dest, Callback { m_dbg.verbosePrefix(prefix(),CALL_INFO,2,NIC_DBG_SEND_MACHINE, "pass packet to OutQ numBytes=%lu\n", ev->bufSize() ); --m_numPending; - m_outQ->enque( ev, dest ); - if ( callback ) { - callback(); - } + m_outQ->enque( ev, dest, callback ); + if ( m_callback ) { m_dbg.verbosePrefix(prefix(),CALL_INFO,2,NIC_DBG_SEND_MACHINE, "wakeup send machine\n"); m_callback( ); @@ -152,9 +151,9 @@ void Nic::SendMachine::InQ::processPending( ) } } -void Nic::SendMachine::OutQ::enque( FireflyNetworkEvent* ev, int dest ) +void Nic::SendMachine::OutQ::enque( FireflyNetworkEvent* ev, int dest, Callback callback ) { m_dbg.verbosePrefix(prefix(),CALL_INFO,2,NIC_DBG_SEND_MACHINE, "size=%lu\n", m_queue.size()); - m_queue.push_back( std::make_pair(ev,dest) ); + m_queue.push_back( Entry( std::make_pair(ev,dest), callback ) ); m_nic.notifyHavePkt(m_id); } diff --git a/src/sst/elements/firefly/nicSendMachine.h b/src/sst/elements/firefly/nicSendMachine.h index 671e4ad7fa..d865e7095a 100644 --- a/src/sst/elements/firefly/nicSendMachine.h +++ b/src/sst/elements/firefly/nicSendMachine.h @@ -20,6 +20,13 @@ class SendMachine { class OutQ { typedef std::function Callback; + + struct Entry { + Entry( std::pair< FireflyNetworkEvent*, int> data, Callback callback = NULL ) : data(data), callback(callback) {} + std::pair< FireflyNetworkEvent*, int> data; + Callback callback; + }; + std::string m_prefix; const char* prefix() { return m_prefix.c_str(); } public: @@ -31,7 +38,7 @@ class SendMachine { m_prefix = "@t:"+ std::to_string(nic.getNodeId()) +":Nic::SendMachine" + std::to_string(myId) + "::OutQ::@p():@l "; } - void enque( FireflyNetworkEvent* ev, int dest ); + void enque( FireflyNetworkEvent* ev, int dest, Callback ); bool isFull() { return m_queue.size() == m_maxQsize; @@ -47,7 +54,7 @@ class SendMachine { return m_queue.empty(); } std::pair< FireflyNetworkEvent*, int>& front() { - return m_queue.front(); + return m_queue.front().data; } void pop() { if ( m_wakeUpCallback ) { @@ -55,7 +62,12 @@ class SendMachine { m_nic.schedCallback( m_wakeUpCallback, 0); m_wakeUpCallback = NULL; } - return m_queue.pop_front(); + + if ( m_queue.front().callback ) { + m_queue.front().callback(); + } + + m_queue.pop_front(); } private: @@ -66,7 +78,8 @@ class SendMachine { int m_maxQsize; Callback m_wakeUpCallback; - std::deque< std::pair< FireflyNetworkEvent*, int> > m_queue; + //std::deque< std::pair< FireflyNetworkEvent*, int> > m_queue; + std::deque< Entry > m_queue; }; class InQ { @@ -124,7 +137,7 @@ class SendMachine { SendMachine( Nic& nic, int nodeId, int verboseLevel, int verboseMask, int myId, int packetSizeInBytes, int pktOverhead, int maxQsize, int unit, bool flag = false ) : m_nic(nic), m_id(myId), m_packetSizeInBytes( packetSizeInBytes - pktOverhead ), - m_unit(unit), m_pktOverhead(pktOverhead), m_activeEntry(NULL), m_I_manage( flag ) + m_unit(unit), m_pktOverhead(pktOverhead), m_activeEntry(NULL), m_I_manage( flag ), m_numSent(0) { char buffer[100]; snprintf(buffer,100,"@t:%d:Nic::SendMachine%d::@p():@l ",nodeId,myId); @@ -136,19 +149,21 @@ class SendMachine { ~SendMachine() { } + int getNumSent() { return m_numSent; } + bool isBusy() { return m_activeEntry; } void run( SendEntryBase* entry ) { - m_dbg.debug(CALL_INFO,1,NIC_DBG_SEND_MACHINE, "new stream\n"); + m_dbg.debug(CALL_INFO,2,NIC_DBG_SEND_MACHINE, "new stream\n"); assert( ! m_I_manage ); m_activeEntry = entry; streamInit( entry ); } void qSendEntry( SendEntryBase* entry ) { - m_dbg.debug(CALL_INFO,1,NIC_DBG_SEND_MACHINE, "new stream\n"); + m_dbg.debug(CALL_INFO,2,NIC_DBG_SEND_MACHINE, "new stream\n"); assert( m_I_manage ); m_sendQ.push_back( entry ); if ( m_sendQ.size() == 1 ) { @@ -158,7 +173,7 @@ class SendMachine { int getId() { return m_id; } bool netPktQ_empty() { return m_outQ->empty(); } - void netPktQ_pop() { return m_outQ->pop(); } + void netPktQ_pop() { m_outQ->pop(); } std::pair< FireflyNetworkEvent*, int>& netPktQ_front() { return m_outQ->front(); } private: @@ -178,4 +193,6 @@ class SendMachine { bool m_I_manage; SendEntryBase* m_activeEntry; std::deque< SendEntryBase* > m_sendQ; + + int m_numSent; }; diff --git a/src/sst/elements/firefly/nicShmem.cc b/src/sst/elements/firefly/nicShmem.cc index 4dc5056a06..33ac43d273 100644 --- a/src/sst/elements/firefly/nicShmem.cc +++ b/src/sst/elements/firefly/nicShmem.cc @@ -22,20 +22,103 @@ using namespace SST::Firefly; void Nic::Shmem::handleEvent( NicShmemCmdEvent* event, int id ) { - m_dbg.verbosePrefix( prefix(),CALL_INFO,1,NIC_DBG_SHMEM,"node=%d core=%d %s event=%p\n", event->getNode(), id, - event->getTypeStr().c_str(), event ); + m_dbg.verbosePrefix( prefix(),CALL_INFO,1,NIC_DBG_SHMEM,"core=%d `%s` targetNode=%d targetCore=%d\n", + id, event->getTypeStr().c_str(), event->getNode(), event->getPid() ); - if( event->getNode() == m_nic.getNodeId() || -2 == event->getNode() ) { - handleHostEvent( event, id ); - } else { - handleNicEvent( event, id ); - } + switch (event->type) { + + // operations that will take place only from the NIC + case NicShmemCmdEvent::Init: + case NicShmemCmdEvent::RegMem: + case NicShmemCmdEvent::Cswap: + case NicShmemCmdEvent::Swap: + handleNicEvent( event, id ); + break; + + case NicShmemCmdEvent::Fadd: + if ( event->getNode() == m_nic.getNodeId() ) { + m_nic.schedCallback( + [=]() { + hostFadd( static_cast(event), id ); + }, + 1600 ); + } else { + handleNicEvent( event, id ); + } + break; + + + case NicShmemCmdEvent::Add: + if ( event->getNode() == m_nic.getNodeId() ) { + m_nic.schedCallback( + [=]() { + hostAdd( static_cast(event), id ); + }, + 40 ); + } else { + m_nic.schedCallback( + [=]() { + handleNicEvent( event, id ); + }, + m_sendSetupLatency ); + } + break; + + // operations that could take place in either the NIC or Host + case NicShmemCmdEvent::Put: + if ( event->getNode() == m_nic.getNodeId() ) { + hostPut( static_cast(event), id ); + } else { + handleNicEvent( event, id ); + } + break; + + + case NicShmemCmdEvent::Putv: + if ( event->getNode() == m_nic.getNodeId() ) { + + m_nic.schedCallback( + [=]() { + hostPutv( static_cast(event), id ); + }, + 20 ); + } else { + handleNicEvent( event, id ); + } + break; + + case NicShmemCmdEvent::Get: + if ( event->getNode() == m_nic.getNodeId() ) { + hostGet( static_cast(event), id ); + } else { + handleNicEvent( event, id ); + } + break; + + case NicShmemCmdEvent::Getv: + + if ( event->getNode() == m_nic.getNodeId() ) { + hostGetv( static_cast(event), id ); + } else { + handleNicEvent( event, id ); + } + break; + + + // operations that take place only from the Host + case NicShmemCmdEvent::Fence: + break; + case NicShmemCmdEvent::Wait: + hostWait( static_cast< NicShmemOpCmdEvent*>(event), id ); + break; + } } void Nic::Shmem::handleNicEvent( NicShmemCmdEvent* event, int id ) { - m_dbg.verbosePrefix( prefix(),CALL_INFO,1,NIC_DBG_SHMEM,"node=%d core=%d %s event=%p freeCmdSlots=%d\n", event->getNode(), id, - event->getTypeStr().c_str(), event, m_freeCmdSlots ); + m_dbg.verbosePrefix( prefix(),CALL_INFO,1,NIC_DBG_SHMEM,"core=%d `%s` targetNode=%d targetCore=%d freeCmdSlots=%d\n", + id, event->getTypeStr().c_str(), event->getNode(), event->getPid(), m_freeCmdSlots ); + if( m_freeCmdSlots == 0 ) { m_dbg.verbosePrefix( prefix(),CALL_INFO,1,NIC_DBG_SHMEM,"sendIsBusy\n"); m_pendingCmds.push_back( std::make_pair(event,id ) ); @@ -86,7 +169,7 @@ void Nic::Shmem::handleNicEvent( NicShmemCmdEvent* event, int id ) std::vector* vec = new std::vector; vec->push_back( MemOp( 0, 16, MemOp::Op::HostBusWrite, [=]() { - m_dbg.verbosePrefix( prefix(),CALL_INFO_LAMBDA,__func__,1,NIC_DBG_SHMEM,"handleNicEvent latency=%" PRIu64 "\n", + m_dbg.verbosePrefix( prefix(),CALL_INFO_LAMBDA,"handleNicEvent",1,NIC_DBG_SHMEM,"latency=%" PRIu64 "\n", m_nic.getCurrentSimTimeNano() - start); handleEvent2( event, id ); } @@ -100,8 +183,11 @@ void Nic::Shmem::handleEvent2( NicShmemCmdEvent* event, int id ) m_dbg.verbosePrefix( prefix(),CALL_INFO,1,NIC_DBG_SHMEM,"core=%d %s\n",id,event->getTypeStr().c_str()); switch (event->type) { - case NicShmemCmdEvent::Add: - add( static_cast< NicShmemAddCmdEvent*>(event), id ); + case NicShmemCmdEvent::Init: + init( static_cast< NicShmemInitCmdEvent*>(event), id ); + break; + case NicShmemCmdEvent::RegMem: + regMem( static_cast< NicShmemRegMemCmdEvent*>(event), id ); break; case NicShmemCmdEvent::Put: put( static_cast(event), id ); @@ -109,64 +195,31 @@ void Nic::Shmem::handleEvent2( NicShmemCmdEvent* event, int id ) case NicShmemCmdEvent::Putv: putv( static_cast(event), id ); break; - case NicShmemCmdEvent::Init: - init( static_cast< NicShmemInitCmdEvent*>(event), id ); - break; case NicShmemCmdEvent::Get: get( static_cast(event), id ); break; case NicShmemCmdEvent::Getv: getv( static_cast(event), id ); break; - case NicShmemCmdEvent::RegMem: - regMem( static_cast< NicShmemRegMemCmdEvent*>(event), id ); - break; - case NicShmemCmdEvent::Fadd: - fadd( static_cast< NicShmemFaddCmdEvent*>(event), id ); - break; - case NicShmemCmdEvent::Cswap: - cswap( static_cast< NicShmemCswapCmdEvent*>(event), id ); - break; - case NicShmemCmdEvent::Swap: - swap( static_cast< NicShmemSwapCmdEvent*>(event), id ); - break; - - default: - assert(0); - } -} - -void Nic::Shmem::handleHostEvent( NicShmemCmdEvent* event, int id ) -{ - m_dbg.verbosePrefix( prefix(),CALL_INFO,1,NIC_DBG_SHMEM,"core=%d %s\n",id, event->getTypeStr().c_str() ); - switch (event->type) { - - case NicShmemCmdEvent::Wait: - hostWait( static_cast< NicShmemOpCmdEvent*>(event), id ); - break; case NicShmemCmdEvent::Add: - hostAdd( static_cast< NicShmemAddCmdEvent*>(event), id ); - break; - case NicShmemCmdEvent::Put: - hostPut( static_cast(event), id ); - break; - case NicShmemCmdEvent::Putv: - hostPutv( static_cast(event), id ); - break; - case NicShmemCmdEvent::Get: - hostGet( static_cast(event), id ); - break; - case NicShmemCmdEvent::Getv: - hostGetv( static_cast(event), id ); + add( static_cast< NicShmemAddCmdEvent*>(event), id ); break; case NicShmemCmdEvent::Fadd: - hostFadd( static_cast< NicShmemFaddCmdEvent*>(event), id ); + fadd( static_cast< NicShmemFaddCmdEvent*>(event), id ); break; case NicShmemCmdEvent::Cswap: - hostCswap( static_cast< NicShmemCswapCmdEvent*>(event), id ); + if ( event->getNode() == m_nic.getNodeId() ) { + sameNodeCswap( static_cast< NicShmemCswapCmdEvent*>(event), id ); + } else { + cswap( static_cast< NicShmemCswapCmdEvent*>(event), id ); + } break; case NicShmemCmdEvent::Swap: - hostSwap( static_cast< NicShmemSwapCmdEvent*>(event), id ); + if ( event->getNode() == m_nic.getNodeId() ) { + sameNodeSwap( static_cast< NicShmemSwapCmdEvent*>(event), id ); + } else { + swap( static_cast< NicShmemSwapCmdEvent*>(event), id ); + } break; default: @@ -174,7 +227,6 @@ void Nic::Shmem::handleHostEvent( NicShmemCmdEvent* event, int id ) } } - void Nic::Shmem::init( NicShmemInitCmdEvent* event, int id ) { m_dbg.verbosePrefix( prefix(),CALL_INFO,1,NIC_DBG_SHMEM,"core=%d simVAddr=%" PRIx64 "\n", @@ -201,7 +253,7 @@ void Nic::Shmem::regMem( NicShmemRegMemCmdEvent* event, int id ) void Nic::Shmem::put( NicShmemPutCmdEvent* event, int id ) { - m_dbg.verbosePrefix( prefix(),CALL_INFO,1,NIC_DBG_SHMEM,"core=%d farNode=%d farAddr=%" PRIx64" len=%lu\n", + m_dbg.verbosePrefix( prefix(),CALL_INFO,1,NIC_DBG_SHMEM,"core=%d targetNode=%d farAddr=%" PRIx64" len=%lu\n", id, event->getNode(), event->getFarAddr(), event->getLength() ); std::stringstream tmp; @@ -211,7 +263,7 @@ void Nic::Shmem::put( NicShmemPutCmdEvent* event, int id ) NicShmemRespEvent::Callback callback = static_cast< NicShmemPutCmdEvent*>(event)->getCallback(); ShmemPutSendEntry* entry = new ShmemPutbSendEntry( id, event, getBacking( id, event->getMyAddr(), event->getLength() ), [=]() { - m_dbg.verbosePrefix( prefix(),CALL_INFO_LAMBDA,__func__,1,NIC_DBG_SHMEM,"fini\n"); + m_dbg.verbosePrefix( prefix(),CALL_INFO_LAMBDA,"put",1,NIC_DBG_SHMEM,"finished\n"); if ( event->isBlocking() ) { m_nic.getVirtNic(id)->notifyShmem( 0, callback ); } else { @@ -226,7 +278,7 @@ void Nic::Shmem::put( NicShmemPutCmdEvent* event, int id ) void Nic::Shmem::putv( NicShmemPutvCmdEvent* event, int id ) { - m_dbg.verbosePrefix( prefix(),CALL_INFO,1,NIC_DBG_SHMEM,"core=%d farCore=%d far=%" PRIx64" len=%lu\n", + m_dbg.verbosePrefix( prefix(),CALL_INFO,1,NIC_DBG_SHMEM,"core=%d targetCore=%d far=%" PRIx64" len=%lu\n", id, event->getVnic(), event->getFarAddr(), event->getLength() ); std::stringstream tmp; @@ -240,18 +292,17 @@ void Nic::Shmem::putv( NicShmemPutvCmdEvent* event, int id ) ); m_nic.qSendEntry( entry ); - } void Nic::Shmem::getv( NicShmemGetvCmdEvent* event, int id ) { - m_dbg.verbosePrefix( prefix(),CALL_INFO,1,NIC_DBG_SHMEM,"core+%d far=%" PRIx64" len=%lu\n", id, event->getFarAddr(), event->getLength() ); + m_dbg.verbosePrefix( prefix(),CALL_INFO,1,NIC_DBG_SHMEM,"core=%d far=%" PRIx64" len=%lu\n", id, event->getFarAddr(), event->getLength() ); NicShmemValueRespEvent::Callback callback = event->callback; ShmemGetvSendEntry* entry = new ShmemGetvSendEntry( id, event, [=](Hermes::Value& value) { - m_dbg.verbosePrefix( prefix(),CALL_INFO_LAMBDA,__func__,1,NIC_DBG_SHMEM,"fini\n"); + m_dbg.verbosePrefix( prefix(),CALL_INFO_LAMBDA,"getv",1,NIC_DBG_SHMEM,"finished\n"); m_nic.getVirtNic(id)->notifyShmem( getNic2HostDelay_ns(), callback, value ); } ); @@ -272,7 +323,7 @@ void Nic::Shmem::get( NicShmemGetCmdEvent* event, int id ) NicShmemRespEvent::Callback callback = event->getCallback(); ShmemGetbSendEntry* entry = new ShmemGetbSendEntry( id, event, [=]() { - m_dbg.verbosePrefix( prefix(),CALL_INFO_LAMBDA,__func__,1,NIC_DBG_SHMEM,"fini\n"); + m_dbg.verbosePrefix( prefix(),CALL_INFO_LAMBDA,"get",1,NIC_DBG_SHMEM,"finished\n"); if ( event->isBlocking() ) { m_nic.getVirtNic(id)->notifyShmem( getNic2HostDelay_ns(), callback ); } else { @@ -289,7 +340,7 @@ void Nic::Shmem::get( NicShmemGetCmdEvent* event, int id ) void Nic::Shmem::add( NicShmemAddCmdEvent* event, int id ) { - m_dbg.verbosePrefix( prefix(),CALL_INFO,1,NIC_DBG_SHMEM,"core=%d farNode=%d farCore=%d far=%" PRIx64" len=%lu event=%p\n", id, + m_dbg.verbosePrefix( prefix(),CALL_INFO,1,NIC_DBG_SHMEM,"core=%d targetNode=%d targetCore=%d far=%" PRIx64" len=%lu event=%p\n", id, event->getNode(), event->getVnic(), event->getFarAddr(), event->getLength(), event ); std::stringstream tmp; @@ -313,7 +364,7 @@ void Nic::Shmem::fadd( NicShmemFaddCmdEvent* event, int id ) NicShmemValueRespEvent::Callback callback = event->callback; ShmemFaddSendEntry* entry = new ShmemFaddSendEntry( id, event, [=](Hermes::Value& value) { - m_dbg.verbosePrefix( prefix(),CALL_INFO_LAMBDA,__func__,1,NIC_DBG_SHMEM,"fini\n"); + m_dbg.verbosePrefix( prefix(),CALL_INFO_LAMBDA,"fadd",1,NIC_DBG_SHMEM,"finished\n"); m_nic.getVirtNic(id)->notifyShmem( getNic2HostDelay_ns(), callback, value ); } ); @@ -330,7 +381,7 @@ void Nic::Shmem::cswap( NicShmemCswapCmdEvent* event, int id ) NicShmemValueRespEvent::Callback callback = event->callback; ShmemCswapSendEntry* entry = new ShmemCswapSendEntry( id, event, [=](Hermes::Value& value) { - m_dbg.verbosePrefix( prefix(),CALL_INFO_LAMBDA,__func__,1,NIC_DBG_SHMEM,"fini\n"); + m_dbg.verbosePrefix( prefix(),CALL_INFO_LAMBDA,"cswap",1,NIC_DBG_SHMEM,"finished\n"); m_nic.getVirtNic(id)->notifyShmem( getNic2HostDelay_ns(), callback, value ); } ); @@ -347,7 +398,7 @@ void Nic::Shmem::swap( NicShmemSwapCmdEvent* event, int id ) NicShmemValueRespEvent::Callback callback = event->callback; ShmemSwapSendEntry* entry = new ShmemSwapSendEntry( id, event, [=](Hermes::Value& value) { - m_dbg.verbosePrefix( prefix(),CALL_INFO_LAMBDA,__func__,1,NIC_DBG_SHMEM,"fini\n"); + m_dbg.verbosePrefix( prefix(),CALL_INFO_LAMBDA,"swap",1,NIC_DBG_SHMEM,"finished\n"); m_nic.getVirtNic(id)->notifyShmem( getNic2HostDelay_ns(), callback, value ); } ); @@ -365,27 +416,27 @@ void Nic::Shmem::hostWait( NicShmemOpCmdEvent* event, int id ) Hermes::Vaddr addr = event->addr; Callback callback = event->callback; - Op* op = new WaitOp( event, getBacking( id, event->addr, event->value.getLength() ), + Op* op = new WaitOp( event, getBacking( id, event->addr, event->value.getLength() ), [=]() { //std::vector vec; - m_dbg.verbosePrefix( prefix(),CALL_INFO_LAMBDA,__func__,1,NIC_DBG_SHMEM,"fini addr=%" PRIx64 "\n",addr); + m_dbg.verbosePrefix( prefix(),CALL_INFO_LAMBDA,"hostWait",1,NIC_DBG_SHMEM,"core=%d addr=%" PRIx64 " finished\n",id,addr); m_nic.getVirtNic(id)->notifyShmem( 0, callback ); //m_nic.getVirtNic(id)->notifyShmem( m_nic.calcHostMemDelay(vec), event->callback ); } ); - m_dbg.verbosePrefix( prefix(),CALL_INFO,1,NIC_DBG_SHMEM,"\n"); - if ( ! op->checkOp( m_dbg ) ) { + if ( ! op->checkOp( m_dbg, id ) ) { m_pendingOps[id].push_back( op); } else { + m_dbg.verbosePrefix( prefix(),CALL_INFO,1,NIC_DBG_SHMEM,"core=%d wait satisfied\n",id); m_nic.getVirtNic(id)->notifyShmem( 0, op->callback() ); delete op; } - m_dbg.verbosePrefix( prefix(),CALL_INFO,1,NIC_DBG_SHMEM,"\n"); } void Nic::Shmem::hostPut( NicShmemPutCmdEvent* event, int id ) { + m_dbg.verbosePrefix( prefix(),CALL_INFO,1,NIC_DBG_SHMEM,"core=%d\n",id); std::vector* vec = new std::vector; if ( event->getOp() == Hermes::Shmem::ReduOp::MOVE ) { @@ -405,8 +456,12 @@ void Nic::Shmem::hostPut( NicShmemPutCmdEvent* event, int id ) checkWaitOps( event->getVnic(), event->getFarAddr(), event->getLength() ); + SimTime_t start = m_nic.getCurrentSimTimeNano(); + m_nic.calcHostMemDelay(id, vec, [=]() { + m_dbg.verbosePrefix( prefix(),CALL_INFO_LAMBDA,"hostPut",1,NIC_DBG_SHMEM,"core=%d finished latency=%" PRIu64 "\n",id, + m_nic.getCurrentSimTimeNano()-start); m_nic.getVirtNic(id)->notifyShmem( 0, event->getCallback() ); delete event; } @@ -415,6 +470,7 @@ void Nic::Shmem::hostPut( NicShmemPutCmdEvent* event, int id ) void Nic::Shmem::hostPutv( NicShmemPutvCmdEvent* event, int id ) { + m_dbg.verbosePrefix( prefix(),CALL_INFO,1,NIC_DBG_SHMEM,"core=%d\n",id); Hermes::Value local( event->getDataType(), getBacking( event->getVnic(), event->getFarAddr(), event->getLength() ) ); @@ -428,8 +484,11 @@ void Nic::Shmem::hostPutv( NicShmemPutvCmdEvent* event, int id ) vec->push_back( MemOp( event->getFarAddr(), event->getLength(), MemOp::Op::HostStore )); + SimTime_t start = m_nic.getCurrentSimTimeNano(); m_nic.calcHostMemDelay(id, vec, [=]() { + m_dbg.verbosePrefix( prefix(),CALL_INFO_LAMBDA,"hostPutv",1,NIC_DBG_SHMEM,"core=%d finished latency=%" PRIu64 "\n",id, + m_nic.getCurrentSimTimeNano()-start); m_nic.getVirtNic(id)->notifyShmem( 0, event->getCallback() ); delete event; } @@ -439,6 +498,7 @@ void Nic::Shmem::hostPutv( NicShmemPutvCmdEvent* event, int id ) void Nic::Shmem::hostGetv( NicShmemGetvCmdEvent* event, int id ) { + m_dbg.verbosePrefix( prefix(),CALL_INFO,1,NIC_DBG_SHMEM,"core=%d\n",id); Hermes::Value local( event->getDataType(), getBacking( event->getVnic(), event->getFarAddr(), event->getLength() ) ); std::vector* vec = new std::vector; @@ -448,6 +508,7 @@ void Nic::Shmem::hostGetv( NicShmemGetvCmdEvent* event, int id ) m_nic.calcHostMemDelay(id, vec, [=]() { Hermes::Value _local = local; + m_dbg.verbosePrefix( prefix(),CALL_INFO_LAMBDA,"hostGetv",1,NIC_DBG_SHMEM,"core=%d finished\n",id); m_nic.getVirtNic(id)->notifyShmem( 0, event->getCallback(), _local ); delete event; } @@ -458,6 +519,7 @@ void Nic::Shmem::hostGetv( NicShmemGetvCmdEvent* event, int id ) void Nic::Shmem::hostGet( NicShmemGetCmdEvent* event, int id ) { + m_dbg.verbosePrefix( prefix(),CALL_INFO,1,NIC_DBG_SHMEM,"core=%d\n",id); std::vector* vec =new std::vector; assert( event->getOp() == Hermes::Shmem::ReduOp::MOVE ); @@ -471,6 +533,7 @@ void Nic::Shmem::hostGet( NicShmemGetCmdEvent* event, int id ) m_nic.calcHostMemDelay(id, vec, [=]() { + m_dbg.verbosePrefix( prefix(),CALL_INFO_LAMBDA,"hostGet",1,NIC_DBG_SHMEM,"core=%d finished\n",id); m_nic.getVirtNic(id)->notifyShmem( 0, event->getCallback() ); delete event; } @@ -480,6 +543,7 @@ void Nic::Shmem::hostGet( NicShmemGetCmdEvent* event, int id ) void Nic::Shmem::hostAdd( NicShmemAddCmdEvent* event, int id ) { + m_dbg.verbosePrefix( prefix(),CALL_INFO,1,NIC_DBG_SHMEM,"core=%d\n",id); Hermes::Value local( event->getDataType(), getBacking( event->getVnic(), event->getFarAddr(), event->getLength() ) ); @@ -494,9 +558,15 @@ void Nic::Shmem::hostAdd( NicShmemAddCmdEvent* event, int id ) vec->push_back( MemOp( event->getFarAddr(), event->getLength(), MemOp::Op::HostLoad )); vec->push_back( MemOp( event->getFarAddr(), event->getLength(), MemOp::Op::HostStore )); - m_nic.calcHostMemDelay(id, vec, + SimTime_t start = m_nic.getCurrentSimTimeNano(); + + m_nic.calcHostMemDelay( id, vec, [=]() { - m_nic.getVirtNic(id)->notifyShmem( 0, event->getCallback() ); + m_dbg.verbosePrefix( prefix(),CALL_INFO_LAMBDA,"hostAdd",1,NIC_DBG_SHMEM,"core=%d finished latency=%" PRIu64 "\n",id, + m_nic.getCurrentSimTimeNano()-start); + //decPending( id ); + //incFreeCmdSlots(); + m_nic.getVirtNic(id)->notifyShmem( 0, event->getCallback() ); delete event; } ); @@ -505,6 +575,7 @@ void Nic::Shmem::hostAdd( NicShmemAddCmdEvent* event, int id ) void Nic::Shmem::hostFadd( NicShmemFaddCmdEvent* event, int id ) { + m_dbg.verbosePrefix( prefix(),CALL_INFO,1,NIC_DBG_SHMEM,"core=%d\n",id); Hermes::Value local( event->getDataType(), getBacking( event->getVnic(), event->getFarAddr(), event->getLength() ) ); std::vector* vec = new std::vector; @@ -522,18 +593,21 @@ void Nic::Shmem::hostFadd( NicShmemFaddCmdEvent* event, int id ) vec->push_back( MemOp( event->getFarAddr(), event->getLength(), MemOp::Op::HostLoad )); vec->push_back( MemOp( event->getFarAddr(), event->getLength(), MemOp::Op::HostStore )); - m_nic.calcHostMemDelay(id, vec, + m_nic.calcHostMemDelay( id, vec, [=]() { Hermes::Value _save = save; + m_dbg.verbosePrefix( prefix(),CALL_INFO_LAMBDA,"hostFadd",1,NIC_DBG_SHMEM,"core=%d finished\n",id); m_nic.getVirtNic(id)->notifyShmem( 0, event->getCallback(), _save ); delete event; + } ); } -void Nic::Shmem::hostCswap( NicShmemCswapCmdEvent* event, int id ) +void Nic::Shmem::sameNodeCswap( NicShmemCswapCmdEvent* event, int id ) { + m_dbg.verbosePrefix( prefix(),CALL_INFO,1,NIC_DBG_SHMEM,"core=%d\n",id); Hermes::Value local( event->getDataType(), getBacking( event->getVnic(), event->getFarAddr(), event->getLength() ) ); std::vector* vec = new std::vector; @@ -544,17 +618,18 @@ void Nic::Shmem::hostCswap( NicShmemCswapCmdEvent* event, int id ) save = local; - vec->push_back( MemOp( event->getFarAddr(), event->getLength(), MemOp::Op::HostLoad )); + vec->push_back( MemOp( event->getFarAddr(), event->getLength(), MemOp::Op::BusLoad )); if ( local == event->getCond() ) { local = event->getValue(); checkWaitOps( event->getVnic(), event->getFarAddr(), local.getLength() ); - vec->push_back( MemOp( event->getFarAddr(), event->getLength(), MemOp::Op::HostStore )); + vec->push_back( MemOp( event->getFarAddr(), event->getLength(), MemOp::Op::BusStore )); } - m_nic.calcHostMemDelay(id, vec, + m_nic.calcNicMemDelay( m_nic.allocNicRecvUnit(id), id, vec, [=]() { Hermes::Value _save = save; + m_dbg.verbosePrefix( prefix(),CALL_INFO_LAMBDA,"sameNodeCswap",1,NIC_DBG_SHMEM,"core=%d finished\n",id); m_nic.getVirtNic(id)->notifyShmem( 0, event->getCallback(), _save ); delete event; } @@ -562,8 +637,9 @@ void Nic::Shmem::hostCswap( NicShmemCswapCmdEvent* event, int id ) } -void Nic::Shmem::hostSwap( NicShmemSwapCmdEvent* event, int id ) +void Nic::Shmem::sameNodeSwap( NicShmemSwapCmdEvent* event, int id ) { + m_dbg.verbosePrefix( prefix(),CALL_INFO,1,NIC_DBG_SHMEM,"core=%d\n",id); Hermes::Value local( event->getDataType(), getBacking( event->getVnic(), event->getFarAddr(), event->getLength() ) ); @@ -577,13 +653,13 @@ void Nic::Shmem::hostSwap( NicShmemSwapCmdEvent* event, int id ) checkWaitOps( event->getVnic(), event->getFarAddr(), local.getLength() ); - vec->push_back( MemOp( event->getFarAddr(), event->getLength(), MemOp::Op::HostLoad )); - vec->push_back( MemOp( event->getFarAddr(), event->getLength(), MemOp::Op::HostStore )); - + vec->push_back( MemOp( event->getFarAddr(), event->getLength(), MemOp::Op::BusLoad )); + vec->push_back( MemOp( event->getFarAddr(), event->getLength(), MemOp::Op::BusStore )); - m_nic.calcHostMemDelay(id, vec, + m_nic.calcNicMemDelay(m_nic.allocNicRecvUnit(id), id, vec, [=](){ Hermes::Value _save = save; + m_dbg.verbosePrefix( prefix(),CALL_INFO_LAMBDA,"sameNodeSwap",1,NIC_DBG_SHMEM,"core=%d finished\n",id); m_nic.getVirtNic(id)->notifyShmem( 0, event->getCallback(), _save ); delete event; } @@ -649,11 +725,10 @@ void Nic::Shmem::checkWaitOps( int core, Hermes::Vaddr addr, size_t length ) m_dbg.verbosePrefix( prefix(),CALL_INFO,3,NIC_DBG_SHMEM,"check op\n" ); Op* op = *iter; - if ( op->inRange( addr, length ) && op->checkOp( m_dbg ) ) { + if ( op->inRange( addr, length ) && op->checkOp( m_dbg, core ) ) { m_dbg.verbosePrefix( prefix(),CALL_INFO,1,NIC_DBG_SHMEM,"op valid, notify\n"); - m_nic.schedCallback( op->callback(), m_nic2HostDelay_ns ); - //m_nic.schedCallback( op->callback() ); + m_nic.schedCallback( op->callback() ); delete op; iter = m_pendingOps[core].erase(iter); } else { diff --git a/src/sst/elements/firefly/nicShmem.h b/src/sst/elements/firefly/nicShmem.h index aa334796ae..a922a9d384 100644 --- a/src/sst/elements/firefly/nicShmem.h +++ b/src/sst/elements/firefly/nicShmem.h @@ -26,7 +26,7 @@ class Shmem { delete m_cmd; } Callback& callback() { return m_callback; } - virtual bool checkOp( Output& ) = 0; + virtual bool checkOp( Output&, int core ) = 0; bool inRange( Hermes::Vaddr addr, size_t length ) { //printf("%s() addr=%lu length=%lu\n",__func__,addr, length); return ( m_cmd->addr >= addr && m_cmd->addr + m_cmd->value.getLength() <= addr + length ); @@ -45,10 +45,10 @@ class Shmem { m_value( cmd->value.getType(), backing ) {} - bool checkOp( Output& dbg ) { + bool checkOp( Output& dbg, int core ) { std::stringstream tmp; tmp << "op=" << WaitOpName(m_cmd->op) << " testValue=" << m_cmd->value << " memValue=" << m_value; - dbg.debug( CALL_INFO,1,NIC_DBG_SHMEM,"%s %s\n",__func__,tmp.str().c_str()); + dbg.debug( CALL_INFO,1,NIC_DBG_SHMEM,"%s core=%d %s\n",__func__,core,tmp.str().c_str()); switch ( m_cmd->op ) { case Hermes::Shmem::NE: return m_value != m_cmd->value; @@ -81,9 +81,10 @@ class Shmem { const char* prefix() { return m_prefix.c_str(); } public: - Shmem( Nic& nic, int id, int numVnics, Output& output, int numCmdSlots, SimTime_t nic2HostDelay_ns, SimTime_t host2NicDelay_ns ) : + Shmem( Nic& nic, int id, int numVnics, Output& output, int numCmdSlots, SimTime_t nic2HostDelay_ns, SimTime_t host2NicDelay_ns, + SimTime_t sendSetupLatency ) : m_nic( nic ), m_dbg(output), m_one( (long) 1 ), m_freeCmdSlots( numCmdSlots ), - m_nic2HostDelay_ns(nic2HostDelay_ns), m_host2NicDelay_ns(host2NicDelay_ns) + m_nic2HostDelay_ns(nic2HostDelay_ns), m_host2NicDelay_ns(host2NicDelay_ns), m_sendSetupLatency( sendSetupLatency ) { m_prefix = "@t:" + std::to_string(id) + ":Nic::Shmem::@p():@l "; m_dbg.verbosePrefix( prefix(), CALL_INFO,1,NIC_DBG_SHMEM,"this=%p\n",this ); @@ -104,12 +105,12 @@ class Shmem { } void incPending( int core ) { long value = m_pendingRemoteOps[core].second.get(); - m_dbg.verbosePrefix( prefix(), CALL_INFO,1,NIC_DBG_SHMEM,"pid=%d count=%lu\n", core, value ); + m_dbg.verbosePrefix( prefix(), CALL_INFO,1,NIC_DBG_SHMEM,"core=%d count=%lu\n", core, value ); m_pendingRemoteOps[core].second += m_one; } void decPending( int core ) { long value = m_pendingRemoteOps[core].second.get(); - m_dbg.verbosePrefix( prefix(), CALL_INFO,1,NIC_DBG_SHMEM,"pid=%d count=%lu\n", core, value ); + m_dbg.verbosePrefix( prefix(), CALL_INFO,1,NIC_DBG_SHMEM,"core=%d count=%lu\n", core, value ); assert(value>0); m_pendingRemoteOps[core].second -= m_one; checkWaitOps( core, m_pendingRemoteOps[core].first, m_pendingRemoteOps[core].second.getLength() ); @@ -138,10 +139,11 @@ class Shmem { void hostPutv( NicShmemPutvCmdEvent*, int id ); void hostGet( NicShmemGetCmdEvent*, int id ); void hostGetv( NicShmemGetvCmdEvent*, int id ); + void hostAdd( NicShmemAddCmdEvent*, int id ); void hostFadd( NicShmemFaddCmdEvent*, int id ); - void hostCswap( NicShmemCswapCmdEvent*, int id ); - void hostSwap( NicShmemSwapCmdEvent*, int id ); + void sameNodeCswap( NicShmemCswapCmdEvent*, int id ); + void sameNodeSwap( NicShmemSwapCmdEvent*, int id ); void put( NicShmemPutCmdEvent*, int id ); void putv( NicShmemPutvCmdEvent*, int id ); @@ -179,4 +181,5 @@ class Shmem { std::vector > > m_regMem; SimTime_t m_nic2HostDelay_ns; SimTime_t m_host2NicDelay_ns; + SimTime_t m_sendSetupLatency; }; diff --git a/src/sst/elements/firefly/nicShmemMove.cc b/src/sst/elements/firefly/nicShmemMove.cc index 7c2da8c73a..d84961523e 100644 --- a/src/sst/elements/firefly/nicShmemMove.cc +++ b/src/sst/elements/firefly/nicShmemMove.cc @@ -107,6 +107,7 @@ bool Nic::ShmemRecvMoveMem::copyIn( Output& dbg, FireflyNetworkEvent& event, std size_t tmpOffset = m_addr + m_offset; int tmpCore = m_core; + vec.push_back( MemOp( m_addr + m_offset, length, MemOp::Op::BusDmaToHost, [=] () { m_shmem->checkWaitOps( tmpCore, tmpOffset, length ); diff --git a/src/sst/elements/firefly/nicShmemStream.cc b/src/sst/elements/firefly/nicShmemStream.cc index d307d802f3..c9101e191d 100644 --- a/src/sst/elements/firefly/nicShmemStream.cc +++ b/src/sst/elements/firefly/nicShmemStream.cc @@ -28,15 +28,18 @@ Nic::RecvMachine::ShmemStream::ShmemStream( Output& output, Ctx* ctx, ev->bufPop(sizeof(MsgHdr) + sizeof(m_shmemHdr) ); - m_dbg.debug(CALL_INFO,1,NIC_DBG_RECV_STREAM,"srcNode=%d srcPid=%d\n", m_srcNode, m_srcPid ); + m_dbg.debug(CALL_INFO,1,NIC_DBG_RECV_STREAM,"core=%d %s srcNode=%d srcCore=%d this=%p\n", + m_myPid, m_shmemHdr.getOpStr().c_str(), ev->getSrcNode(),m_srcPid, this); } void Nic::RecvMachine::ShmemStream::processPktHdr( FireflyNetworkEvent* ev ) { m_dbg.debug(CALL_INFO,1,NIC_DBG_RECV_STREAM,"srcNode=%d srcPid=%d %s\n", m_srcNode, m_srcPid, m_shmemHdr.getOpStr().c_str() ); + SimTime_t latency = m_shmemHdr.op == ShmemMsgHdr::Ack ? 0 : m_ctx->nic().getShmemRxDelay_ns(); m_ctx->nic().schedCallback( std::bind( &Nic::RecvMachine::ShmemStream::processOp, this, ev ), - m_ctx->nic().getShmemRxDelay_ns() ); + latency ); + //m_ctx->nic().getShmemRxDelay_ns() ); } void Nic::RecvMachine::ShmemStream::processOp( FireflyNetworkEvent* ev ) @@ -47,6 +50,9 @@ void Nic::RecvMachine::ShmemStream::processOp( FireflyNetworkEvent* ev ) m_unit = m_ctx->allocRecvUnit(); } + m_dbg.debug(CALL_INFO,1,NIC_DBG_RECV_STREAM,"core=%d %s srcNode=%d srcCore=%d\n", + m_myPid, m_shmemHdr.getOpStr().c_str(), ev->getSrcNode(),m_srcPid); + switch ( m_shmemHdr.op ) { case ShmemMsgHdr::Put: @@ -78,7 +84,7 @@ void Nic::RecvMachine::ShmemStream::processOp( FireflyNetworkEvent* ev ) break; case ShmemMsgHdr::Ack: - processAck( m_shmemHdr, ev, m_myPid ); + processAck( m_shmemHdr, ev, m_myPid, m_srcPid ); break; default: @@ -87,9 +93,8 @@ void Nic::RecvMachine::ShmemStream::processOp( FireflyNetworkEvent* ev ) } } -void Nic::RecvMachine::ShmemStream::processAck( ShmemMsgHdr& hdr, FireflyNetworkEvent* ev, int pid ) +void Nic::RecvMachine::ShmemStream::processAck( ShmemMsgHdr& hdr, FireflyNetworkEvent* ev, int pid, int srcPid ) { - m_dbg.debug(CALL_INFO,1,NIC_DBG_RECV_STREAM,"srcNode=%d\n",ev->getSrcNode()); m_ctx->nic().shmemDecPending( pid ); m_ctx->deleteStream(this); diff --git a/src/sst/elements/firefly/nicShmemStream.h b/src/sst/elements/firefly/nicShmemStream.h index 050269d524..9f013c0c30 100644 --- a/src/sst/elements/firefly/nicShmemStream.h +++ b/src/sst/elements/firefly/nicShmemStream.h @@ -22,7 +22,7 @@ class ShmemStream : public StreamBase { void processPktHdr( FireflyNetworkEvent* ev ); private: void processOp( FireflyNetworkEvent* ev ); - void processAck( ShmemMsgHdr&, FireflyNetworkEvent*, int ); + void processAck( ShmemMsgHdr&, FireflyNetworkEvent*, int, int ); void processPut( ShmemMsgHdr&, FireflyNetworkEvent*, int, int ); void processGetResp( ShmemMsgHdr&, FireflyNetworkEvent*, int, int ); void processGet( ShmemMsgHdr&, FireflyNetworkEvent*, int, int ); diff --git a/src/sst/elements/memHierarchy/coherencemgr/L1CoherenceController.cc b/src/sst/elements/memHierarchy/coherencemgr/L1CoherenceController.cc index d37afcd46a..63905ad9fa 100644 --- a/src/sst/elements/memHierarchy/coherencemgr/L1CoherenceController.cc +++ b/src/sst/elements/memHierarchy/coherencemgr/L1CoherenceController.cc @@ -325,7 +325,7 @@ CacheAction L1CoherenceController::handleGetXRequest(MemEvent* event, CacheLine* case M: if (cmd == Command::GetX) { /* L1s write back immediately */ - if (!event->isStoreConditional() | atomic) { + if (!event->isStoreConditional() || atomic) { cacheLine->setData(event->getPayload(), event->getAddr() - event->getBaseAddr()); if (is_debug_addr(cacheLine->getBaseAddr())) { diff --git a/src/sst/elements/scheduler/simulations/emberLoad.py b/src/sst/elements/scheduler/simulations/emberLoad.py index de1b44f08d..dffb459a97 100644 --- a/src/sst/elements/scheduler/simulations/emberLoad.py +++ b/src/sst/elements/scheduler/simulations/emberLoad.py @@ -195,8 +195,9 @@ topo = topoFatTree() elif "dragonfly" == netTopo: - # Only supports legacy dragonfly - topoInfo = DragonFlyInfo(netShape) + + # Only supports legacy dragonfly + topoInfo = DragonFlyLegacyInfo(netShape) topo = topoDragonFlyLegacy() else: