diff --git a/src/alpaka/AlpakaCore/backend.h b/src/alpaka/AlpakaCore/backend.h index 387154013..8d58953cc 100644 --- a/src/alpaka/AlpakaCore/backend.h +++ b/src/alpaka/AlpakaCore/backend.h @@ -3,4 +3,15 @@ enum class Backend { SERIAL, TBB, CUDA, HIP }; +inline std::string const& name(Backend backend) { + static const std::string names[] = {"serial_sync", "tbb_async", "cuda_async", "rocm_async"}; + return names[static_cast(backend)]; +} + +template +inline T& operator<<(T& out, Backend backend) { + out << name(backend); + return out; +} + #endif // AlpakaCore_backend_h diff --git a/src/alpaka/bin/EventProcessor.cc b/src/alpaka/bin/EventProcessor.cc index 3d7a3546f..7e84f5114 100644 --- a/src/alpaka/bin/EventProcessor.cc +++ b/src/alpaka/bin/EventProcessor.cc @@ -1,3 +1,4 @@ +#include #include #include #include @@ -13,7 +14,7 @@ namespace edm { EventProcessor::EventProcessor(int maxEvents, int runForMinutes, int numberOfStreams, - std::vector const& path, + Alternatives alternatives, std::vector const& esproducers, std::filesystem::path const& datadir, bool validation) @@ -24,9 +25,23 @@ namespace edm { esp->produce(eventSetup_); } + // normalise the total weight to the number of streams + float total = 0.; + for (auto const& alternative : alternatives) { + total += alternative.weight; + } //schedules_.reserve(numberOfStreams); - for (int i = 0; i < numberOfStreams; ++i) { - schedules_.emplace_back(registry_, pluginManager_, &source_, &eventSetup_, i, path); + float cumulative = 0.; + int lower_range = 0; + int upper_range = 0; + for (auto& alternative : alternatives) { + cumulative += alternative.weight; + lower_range = upper_range; + upper_range = static_cast(std::round(cumulative * numberOfStreams / total)); + for (int i = lower_range; i < upper_range; ++i) { + schedules_.emplace_back(registry_, pluginManager_, &source_, &eventSetup_, i, alternative.path); + } + streamsPerBackend_.emplace_back(alternative.backend, upper_range - lower_range); } } diff --git a/src/alpaka/bin/EventProcessor.h b/src/alpaka/bin/EventProcessor.h index 5fb20f0b9..e1ca9fe01 100644 --- a/src/alpaka/bin/EventProcessor.h +++ b/src/alpaka/bin/EventProcessor.h @@ -5,6 +5,7 @@ #include #include +#include "AlpakaCore/backend.h" #include "Framework/EventSetup.h" #include "PluginManager.h" @@ -12,18 +13,31 @@ #include "Source.h" namespace edm { + struct Alternative { + Alternative() = default; + Alternative(Backend backend, float weight, std::vector path) + : backend{backend}, weight{weight}, path{std::move(path)} {} + + Backend backend; + float weight; + std::vector path; + }; + + using Alternatives = std::vector; + class EventProcessor { public: explicit EventProcessor(int maxEvents, int runForMinutes, int numberOfStreams, - std::vector const& path, + Alternatives alternatives, std::vector const& esproducers, std::filesystem::path const& datadir, bool validation); int maxEvents() const { return source_.maxEvents(); } int processedEvents() const { return source_.processedEvents(); } + std::vector> const& backends() const { return streamsPerBackend_; } void runToCompletion(); @@ -35,6 +49,7 @@ namespace edm { Source source_; EventSetup eventSetup_; std::vector schedules_; + std::vector> streamsPerBackend_; }; } // namespace edm diff --git a/src/alpaka/bin/main.cc b/src/alpaka/bin/main.cc index e2f625fde..9c997f4af 100644 --- a/src/alpaka/bin/main.cc +++ b/src/alpaka/bin/main.cc @@ -6,6 +6,7 @@ #include #include #include +#include #include #include #include @@ -38,7 +39,7 @@ namespace { << "[--hip] " #endif << "[--numberOfThreads NT] [--numberOfStreams NS] [--maxEvents ME] [--data PATH] " - "[--transfer] [--validation]\n\n" + "[--transfer] [--validation] [--histogram]\n\n" << "Options\n" #ifdef ALPAKA_ACC_CPU_B_SEQ_T_SEQ_PRESENT << " --serial Use CPU Serial backend\n" @@ -64,13 +65,62 @@ namespace { << " --empty Ignore all producers (for testing only)\n" << std::endl; } - } // namespace +bool getOptionalArgument(std::vector const& args, std::vector::iterator& i, int& value) { + auto it = i; + ++it; + if (it == args.end()) { + return false; + } + try { + value = std::stoi(*it); + ++i; + return true; + } catch (...) { + return false; + } +} + +bool getOptionalArgument(std::vector const& args, std::vector::iterator& i, float& value) { + auto it = i; + ++it; + if (it == args.end()) { + return false; + } + try { + value = std::stof(*it); + ++i; + return true; + } catch (...) { + return false; + } +} + +bool getOptionalArgument(std::vector const& args, + std::vector::iterator& i, + std::filesystem::path& value) { + auto it = i; + ++it; + if (it == args.end()) { + return false; + } + value = *it; + return true; +} + +template +void getArgument(std::vector const& args, std::vector::iterator& i, T& value) { + if (not getOptionalArgument(args, i, value)) { + std::cerr << "error: " << *i << " expects an argument" << std::endl; + exit(EXIT_FAILURE); + } +} + int main(int argc, char** argv) { // Parse command line arguments std::vector args(argv, argv + argc); - std::vector backends; + std::unordered_map backends; int numberOfThreads = 1; int numberOfStreams = 0; int maxEvents = -1; @@ -86,35 +136,38 @@ int main(int argc, char** argv) { return EXIT_SUCCESS; #ifdef ALPAKA_ACC_CPU_B_SEQ_T_SEQ_PRESENT } else if (*i == "--serial") { - backends.emplace_back(Backend::SERIAL); + float weight = 1.; + getOptionalArgument(args, i, weight); + backends.insert_or_assign(Backend::SERIAL, weight); #endif #ifdef ALPAKA_ACC_CPU_B_TBB_T_SEQ_PRESENT } else if (*i == "--tbb") { - backends.emplace_back(Backend::TBB); + float weight = 1.; + getOptionalArgument(args, i, weight); + backends.insert_or_assign(Backend::TBB, weight); #endif #ifdef ALPAKA_ACC_GPU_CUDA_PRESENT } else if (*i == "--cuda") { - backends.emplace_back(Backend::CUDA); + float weight = 1.; + getOptionalArgument(args, i, weight); + backends.insert_or_assign(Backend::CUDA, weight); #endif #ifdef ALPAKA_ACC_GPU_HIP_PRESENT } else if (*i == "--hip") { - backends.emplace_back(Backend::HIP); + float weight = 1.; + getOptionalArgument(args, i, weight); + backends.insert_or_assign(Backend::HIP, weight); #endif } else if (*i == "--numberOfThreads") { - ++i; - numberOfThreads = std::stoi(*i); + getArgument(args, i, numberOfThreads); } else if (*i == "--numberOfStreams") { - ++i; - numberOfStreams = std::stoi(*i); + getArgument(args, i, numberOfStreams); } else if (*i == "--maxEvents") { - ++i; - maxEvents = std::stoi(*i); + getArgument(args, i, maxEvents); } else if (*i == "--runForMinutes") { - ++i; - runForMinutes = std::stoi(*i); + getArgument(args, i, runForMinutes); } else if (*i == "--data") { - ++i; - datadir = *i; + getArgument(args, i, datadir); } else if (*i == "--transfer") { transfer = true; } else if (*i == "--validation") { @@ -151,75 +204,77 @@ int main(int argc, char** argv) { // Initialiase the selected backends #ifdef ALPAKA_ACC_CPU_B_SEQ_T_SEQ_PRESENT - if (std::find(backends.begin(), backends.end(), Backend::SERIAL) != backends.end()) { + if (backends.find(Backend::SERIAL) != backends.end()) { cms::alpakatools::initialise(); } #endif #ifdef ALPAKA_ACC_CPU_B_TBB_T_SEQ_PRESENT - if (std::find(backends.begin(), backends.end(), Backend::TBB) != backends.end()) { + if (backends.find(Backend::TBB) != backends.end()) { cms::alpakatools::initialise(); } #endif #ifdef ALPAKA_ACC_GPU_CUDA_PRESENT - if (std::find(backends.begin(), backends.end(), Backend::CUDA) != backends.end()) { + if (backends.find(Backend::CUDA) != backends.end()) { cms::alpakatools::initialise(); } #endif #ifdef ALPAKA_ACC_GPU_HIP_PRESENT - if (std::find(backends.begin(), backends.end(), Backend::HIP) != backends.end()) { + if (backends.find(Backend::HIP) != backends.end()) { cms::alpakatools::initialise(); } #endif // Initialize EventProcessor - std::vector edmodules; std::vector esmodules; + edm::Alternatives alternatives; if (not empty) { + // host-only ESModules esmodules = {"BeamSpotESProducer", "SiPixelFedIdsESProducer"}; - auto addModules = [&](std::string const& accelerator_namespace, Backend backend) { - if (std::find(backends.begin(), backends.end(), backend) != backends.end()) { - edmodules.emplace_back(accelerator_namespace + "::" + "BeamSpotToAlpaka"); - edmodules.emplace_back(accelerator_namespace + "::" + "SiPixelRawToCluster"); - edmodules.emplace_back(accelerator_namespace + "::" + "SiPixelRecHitAlpaka"); - edmodules.emplace_back(accelerator_namespace + "::" + "CAHitNtupletAlpaka"); - edmodules.emplace_back(accelerator_namespace + "::" + "PixelVertexProducerAlpaka"); - if (transfer) { - edmodules.emplace_back(accelerator_namespace + "::" + "PixelTrackSoAFromAlpaka"); - edmodules.emplace_back(accelerator_namespace + "::" + "PixelVertexSoAFromAlpaka"); - } - if (validation) { - edmodules.emplace_back(accelerator_namespace + "::" + "CountValidator"); - } - if (histogram) { - edmodules.emplace_back(accelerator_namespace + "::" + "HistoValidator"); - } - esmodules.emplace_back(accelerator_namespace + "::" + "SiPixelFedCablingMapESProducer"); - esmodules.emplace_back(accelerator_namespace + "::" + "SiPixelGainCalibrationForHLTESProducer"); - esmodules.emplace_back(accelerator_namespace + "::" + "PixelCPEFastESProducer"); + for (auto const& [backend, weight] : backends) { + std::string prefix = "alpaka_" + name(backend) + "::"; + // "portable" ESModules + esmodules.emplace_back(prefix + "SiPixelFedCablingMapESProducer"); + esmodules.emplace_back(prefix + "SiPixelGainCalibrationForHLTESProducer"); + esmodules.emplace_back(prefix + "PixelCPEFastESProducer"); + // "portable" EDModules + std::vector edmodules; + edmodules.emplace_back(prefix + "BeamSpotToAlpaka"); + edmodules.emplace_back(prefix + "SiPixelRawToCluster"); + edmodules.emplace_back(prefix + "SiPixelRecHitAlpaka"); + edmodules.emplace_back(prefix + "CAHitNtupletAlpaka"); + edmodules.emplace_back(prefix + "PixelVertexProducerAlpaka"); + if (transfer) { + edmodules.emplace_back(prefix + "PixelTrackSoAFromAlpaka"); + edmodules.emplace_back(prefix + "PixelVertexSoAFromAlpaka"); } - }; -#ifdef ALPAKA_ACC_CPU_B_SEQ_T_SEQ_PRESENT - addModules("alpaka_serial_sync", Backend::SERIAL); -#endif -#ifdef ALPAKA_ACC_CPU_B_TBB_T_SEQ_PRESENT - addModules("alpaka_tbb_async", Backend::TBB); -#endif -#ifdef ALPAKA_ACC_GPU_CUDA_PRESENT - addModules("alpaka_cuda_async", Backend::CUDA); -#endif -#ifdef ALPAKA_ACC_GPU_HIP_PRESENT - addModules("alpaka_rocm_async", Backend::HIP); -#endif + if (validation) { + edmodules.emplace_back(prefix + "CountValidator"); + } + if (histogram) { + edmodules.emplace_back(prefix + "HistoValidator"); + } + alternatives.emplace_back(backend, weight, std::move(edmodules)); + } } edm::EventProcessor processor( - maxEvents, runForMinutes, numberOfStreams, std::move(edmodules), std::move(esmodules), datadir, validation); + maxEvents, runForMinutes, numberOfStreams, std::move(alternatives), std::move(esmodules), datadir, validation); if (runForMinutes < 0) { - std::cout << "Processing " << processor.maxEvents() << " events, of which " << numberOfStreams - << " concurrently, with " << numberOfThreads << " threads." << std::endl; + std::cout << "Processing " << processor.maxEvents() << " events,"; } else { - std::cout << "Processing for about " << runForMinutes << " minutes with " << numberOfStreams - << " concurrent events and " << numberOfThreads << " threads." << std::endl; + std::cout << "Processing for about " << runForMinutes << " minutes,"; + } + { + std::cout << " with " << numberOfStreams << " concurrent events ("; + bool need_comma = false; + for (auto const& [backend, streams] : processor.backends()) { + if (need_comma) { + std::cout << ", "; + } + std::cout << streams << " on " << backend; + need_comma = true; + } + std::cout << ") and " << numberOfThreads << " threads." << std::endl; } // Initialize the TBB thread pool