diff --git a/sycl/include/sycl/memory_enums.hpp b/sycl/include/sycl/memory_enums.hpp index 456f07b98800e..9826053554aa5 100644 --- a/sycl/include/sycl/memory_enums.hpp +++ b/sycl/include/sycl/memory_enums.hpp @@ -95,6 +95,10 @@ static constexpr std::memory_order getStdMemoryOrder(sycl::memory_order order) { case memory_order::seq_cst: return std::memory_order_seq_cst; } + // Return default value here to avoid compiler warnings. + // default case in switch doesn't help because some compiler warn about + // having a default case while all values of enum are handled. + return std::memory_order_acq_rel; } #endif // __SYCL_DEVICE_ONLY__ diff --git a/sycl/source/detail/event_impl.cpp b/sycl/source/detail/event_impl.cpp index 6f8bd84337c27..9ea1eac1867ca 100644 --- a/sycl/source/detail/event_impl.cpp +++ b/sycl/source/detail/event_impl.cpp @@ -60,20 +60,22 @@ event_impl::~event_impl() { void event_impl::waitInternal() { if (!MHostEvent && MEvent) { + // Wait for the native event getPlugin().call(1, &MEvent); - return; - } - - if (MState == HES_Discarded) + } else if (MState == HES_Discarded) { + // Waiting for the discarded event is invalid throw sycl::exception( make_error_code(errc::invalid), "waitInternal method cannot be used for a discarded event."); + } else if (MState != HES_Complete) { + // Wait for the host event + std::unique_lock lock(MMutex); + cv.wait(lock, [this] { return MState == HES_Complete; }); + } - if (MState == HES_Complete) - return; - - std::unique_lock lock(MMutex); - cv.wait(lock, [this] { return MState == HES_Complete; }); + // Wait for connected events(e.g. streams prints) + for (const EventImplPtr &Event : MPostCompleteEvents) + Event->wait(Event); } void event_impl::setComplete() { @@ -236,27 +238,10 @@ void event_impl::wait(std::shared_ptr Self) { void event_impl::wait_and_throw( std::shared_ptr Self) { - Scheduler &Sched = Scheduler::getInstance(); - - QueueImplPtr submittedQueue = nullptr; - { - Scheduler::ReadLockT Lock(Sched.MGraphLock); - Command *Cmd = static_cast(Self->getCommand()); - if (Cmd) - submittedQueue = Cmd->getSubmittedQueue(); - } wait(Self); - { - Scheduler::ReadLockT Lock(Sched.MGraphLock); - for (auto &EventImpl : getWaitList()) { - Command *Cmd = (Command *)EventImpl->getCommand(); - if (Cmd) - Cmd->getSubmittedQueue()->throw_asynchronous(); - } - } - if (submittedQueue) - submittedQueue->throw_asynchronous(); + if (QueueImplPtr SubmittedQueue = MSubmittedQueue.lock()) + SubmittedQueue->throw_asynchronous(); } void event_impl::cleanupCommand( diff --git a/sycl/source/detail/event_impl.hpp b/sycl/source/detail/event_impl.hpp index d33da055a4f8e..e689270c5abe8 100644 --- a/sycl/source/detail/event_impl.hpp +++ b/sycl/source/detail/event_impl.hpp @@ -225,6 +225,15 @@ class event_impl { MWorkerQueue = WorkerQueue; }; + /// Sets original queue used for submission. + /// + /// @return + void setSubmittedQueue(const QueueImplPtr &SubmittedQueue) { + MSubmittedQueue = SubmittedQueue; + }; + + QueueImplPtr getSubmittedQueue() const { return MSubmittedQueue.lock(); }; + /// Checks if an event is in a fully intialized state. Default-constructed /// events will return true only after having initialized its native event, /// while other events will assume that they are fully initialized at @@ -234,7 +243,12 @@ class event_impl { /// state. bool isInitialized() const noexcept { return MIsInitialized; } -private: + void attachEventToComplete(const EventImplPtr &Event) { + std::lock_guard Lock(MMutex); + MPostCompleteEvents.push_back(Event); + } + +protected: // When instrumentation is enabled emits trace event for event wait begin and // returns the telemetry event generated for the wait void *instrumentationProlog(std::string &Name, int32_t StreamID, @@ -257,11 +271,14 @@ class event_impl { const bool MIsProfilingEnabled = false; std::weak_ptr MWorkerQueue; + std::weak_ptr MSubmittedQueue; /// Dependency events prepared for waiting by backend. std::vector MPreparedDepsEvents; std::vector MPreparedHostDepsEvents; + std::vector MPostCompleteEvents; + /// Indicates that the task associated with this event has been submitted by /// the queue to the device. std::atomic MIsFlushed = false; diff --git a/sycl/source/detail/queue_impl.cpp b/sycl/source/detail/queue_impl.cpp index 9b88f83789b16..3822248c30920 100644 --- a/sycl/source/detail/queue_impl.cpp +++ b/sycl/source/detail/queue_impl.cpp @@ -377,6 +377,15 @@ void queue_impl::wait(const detail::code_location &CodeLoc) { for (event &Event : SharedEvents) Event.wait(); } + + std::vector StreamsServiceEvents; + { + std::lock_guard Lock(MMutex); + StreamsServiceEvents.swap(MStreamsServiceEvents); + } + for (const EventImplPtr &Event : StreamsServiceEvents) + Event->wait(Event); + #ifdef XPTI_ENABLE_INSTRUMENTATION instrumentationEpilog(TelemetryEvent, Name, StreamID, IId); #endif diff --git a/sycl/source/detail/queue_impl.hpp b/sycl/source/detail/queue_impl.hpp index a1b74927b7992..a58d19c3efb10 100644 --- a/sycl/source/detail/queue_impl.hpp +++ b/sycl/source/detail/queue_impl.hpp @@ -446,6 +446,11 @@ class queue_impl { return MAssertHappenedBuffer; } + void registerStreamServiceEvent(const EventImplPtr &Event) { + std::lock_guard Lock(MMutex); + MStreamsServiceEvents.push_back(Event); + } + protected: // template is needed for proper unit testing template @@ -480,7 +485,7 @@ class queue_impl { EventRet = Handler.finalize(); } -private: +protected: /// Helper function for checking whether a device is either a member of a /// context or a descendnant of its member. /// \return True iff the device or its parent is a member of the context. @@ -610,12 +615,14 @@ class queue_impl { const bool MIsInorder; + std::vector MStreamsServiceEvents; + public: // Queue constructed with the discard_events property const bool MDiscardEvents; const bool MIsProfilingEnabled; -private: +protected: // This flag says if we can discard events based on a queue "setup" which will // be common for all operations submitted to the queue. This is a must // condition for discarding, but even if it's true, in some cases, we won't be diff --git a/sycl/source/detail/scheduler/commands.cpp b/sycl/source/detail/scheduler/commands.cpp index faf2085e6e2e1..89085a512838c 100644 --- a/sycl/source/detail/scheduler/commands.cpp +++ b/sycl/source/detail/scheduler/commands.cpp @@ -394,9 +394,9 @@ Command::Command(CommandType Type, QueueImplPtr Queue) MPreparedDepsEvents(MEvent->getPreparedDepsEvents()), MPreparedHostDepsEvents(MEvent->getPreparedHostDepsEvents()), MType(Type) { - MSubmittedQueue = MQueue; MWorkerQueue = MQueue; MEvent->setWorkerQueue(MWorkerQueue); + MEvent->setSubmittedQueue(MWorkerQueue); MEvent->setCommand(this); MEvent->setContextImpl(MQueue->getContextImplPtr()); MEvent->setStateIncomplete(); @@ -1712,8 +1712,8 @@ ExecCGCommand::ExecCGCommand(std::unique_ptr CommandGroup, : Command(CommandType::RUN_CG, std::move(Queue)), MCommandGroup(std::move(CommandGroup)) { if (MCommandGroup->getType() == detail::CG::CodeplayHostTask) { - MSubmittedQueue = - static_cast(MCommandGroup.get())->MQueue; + MEvent->setSubmittedQueue( + static_cast(MCommandGroup.get())->MQueue); MEvent->setNeedsCleanupAfterWait(true); } else if (MCommandGroup->getType() == CG::CGTYPE::Kernel && (static_cast(MCommandGroup.get())->hasStreams() || diff --git a/sycl/source/detail/scheduler/commands.hpp b/sycl/source/detail/scheduler/commands.hpp index 77afa4936bc0a..d9ad883346ba8 100644 --- a/sycl/source/detail/scheduler/commands.hpp +++ b/sycl/source/detail/scheduler/commands.hpp @@ -148,8 +148,6 @@ class Command { const QueueImplPtr &getQueue() const { return MQueue; } - const QueueImplPtr &getSubmittedQueue() const { return MSubmittedQueue; } - const EventImplPtr &getEvent() const { return MEvent; } // Methods needed to support SYCL instrumentation @@ -216,7 +214,6 @@ class Command { protected: QueueImplPtr MQueue; - QueueImplPtr MSubmittedQueue; EventImplPtr MEvent; QueueImplPtr MWorkerQueue; diff --git a/sycl/source/detail/scheduler/scheduler.cpp b/sycl/source/detail/scheduler/scheduler.cpp index 788832f688558..336835d6198be 100644 --- a/sycl/source/detail/scheduler/scheduler.cpp +++ b/sycl/source/detail/scheduler/scheduler.cpp @@ -161,7 +161,7 @@ EventImplPtr Scheduler::addCG(std::unique_ptr CommandGroup, cleanupCommands(ToCleanUp); for (auto StreamImplPtr : Streams) { - StreamImplPtr->flush(); + StreamImplPtr->flush(NewEvent); } return NewEvent; diff --git a/sycl/source/detail/stream_impl.cpp b/sycl/source/detail/stream_impl.cpp index 70e74d287eba7..556edd51556f2 100644 --- a/sycl/source/detail/stream_impl.cpp +++ b/sycl/source/detail/stream_impl.cpp @@ -6,6 +6,7 @@ // //===----------------------------------------------------------------------===// +#include #include #include #include @@ -68,13 +69,13 @@ size_t stream_impl::get_size() const { return BufferSize_; } size_t stream_impl::get_max_statement_size() const { return MaxStatementSize_; } -void stream_impl::flush() { +void stream_impl::flush(const EventImplPtr &LeadEvent) { // We don't want stream flushing to be blocking operation that is why submit a // host task to print stream buffer. It will fire up as soon as the kernel // finishes execution. auto Q = detail::createSyclObjFromImpl( sycl::detail::Scheduler::getInstance().getDefaultHostQueue()); - Q.submit([&](handler &cgh) { + event Event = Q.submit([&](handler &cgh) { auto BufHostAcc = detail::Scheduler::getInstance() .StreamBuffersPool.find(this) @@ -96,7 +97,14 @@ void stream_impl::flush() { fflush(stdout); }); }); + if (LeadEvent) { + LeadEvent->attachEventToComplete(detail::getSyclObjImpl(Event)); + LeadEvent->getSubmittedQueue()->registerStreamServiceEvent( + detail::getSyclObjImpl(Event)); + } } + +void stream_impl::flush() { flush(nullptr); } } // namespace detail } // __SYCL_INLINE_VER_NAMESPACE(_V1) } // namespace sycl diff --git a/sycl/source/detail/stream_impl.hpp b/sycl/source/detail/stream_impl.hpp index 2c1c5ed63f509..2ed2c8ed2469b 100644 --- a/sycl/source/detail/stream_impl.hpp +++ b/sycl/source/detail/stream_impl.hpp @@ -42,6 +42,12 @@ class __SYCL_EXPORT stream_impl { GlobalOffsetAccessorT accessGlobalOffset(handler &CGH); // Enqueue task to copy stream buffer to the host and print the contents + // The host task event is then registered for post processing in the + // LeadEvent as well as in queue LeadEvent associated with. + void flush(const EventImplPtr &LeadEvent); + + // Enqueue task to copy stream buffer to the host and print the contents + // Remove during next ABI breaking window void flush(); size_t get_size() const; diff --git a/sycl/test/abi/sycl_symbols_linux.dump b/sycl/test/abi/sycl_symbols_linux.dump index 93dea77c9101c..ccdb7366edb9d 100644 --- a/sycl/test/abi/sycl_symbols_linux.dump +++ b/sycl/test/abi/sycl_symbols_linux.dump @@ -3756,6 +3756,7 @@ _ZN4sycl3_V16detail11make_kernelEmRKNS0_7contextENS0_7backendE _ZN4sycl3_V16detail11stream_impl15accessGlobalBufERNS0_7handlerE _ZN4sycl3_V16detail11stream_impl18accessGlobalOffsetERNS0_7handlerE _ZN4sycl3_V16detail11stream_impl20accessGlobalFlushBufERNS0_7handlerE +_ZN4sycl3_V16detail11stream_impl5flushERKSt10shared_ptrINS1_10event_implEE _ZN4sycl3_V16detail11stream_impl5flushEv _ZN4sycl3_V16detail11stream_implC1EmmRKNS0_13property_listE _ZN4sycl3_V16detail11stream_implC1EmmRNS0_7handlerE diff --git a/sycl/unittests/scheduler/CommandsWaitForEvents.cpp b/sycl/unittests/scheduler/CommandsWaitForEvents.cpp index 861f28f6d5c5a..024bb60120a69 100644 --- a/sycl/unittests/scheduler/CommandsWaitForEvents.cpp +++ b/sycl/unittests/scheduler/CommandsWaitForEvents.cpp @@ -73,6 +73,137 @@ pi_result getEventInfoFunc(pi_event Event, pi_event_info PName, size_t PVSize, return PI_SUCCESS; } +static bool GpiEventsWaitRedefineCalled = false; +pi_result piEventsWaitRedefine(pi_uint32 num_events, + const pi_event *event_list) { + GpiEventsWaitRedefineCalled = true; + return PI_SUCCESS; +} + +class StreamAUXCmdsWait_TestKernel; + +namespace sycl { +__SYCL_INLINE_VER_NAMESPACE(_V1) { +namespace detail { +template <> struct KernelInfo { + static constexpr unsigned getNumParams() { return 0; } + static const kernel_param_desc_t &getParamDesc(int) { + static kernel_param_desc_t Dummy; + return Dummy; + } + static constexpr const char *getName() { + return "StreamAUXCmdsWait_TestKernel"; + } + static constexpr bool isESIMD() { return true; } + static constexpr bool callsThisItem() { return false; } + static constexpr bool callsAnyThisFreeFunction() { return false; } + static constexpr int64_t getKernelSize() { return sizeof(sycl::stream); } +}; + +} // namespace detail +} // __SYCL_INLINE_VER_NAMESPACE(_V1) +} // namespace sycl + +static sycl::unittest::PiImage generateDefaultImage() { + using namespace sycl::unittest; + + PiPropertySet PropSet; + addESIMDFlag(PropSet); + std::vector Bin{0, 1, 2, 3, 4, 5}; // Random data + + PiArray Entries = + makeEmptyKernels({"StreamAUXCmdsWait_TestKernel"}); + + PiImage Img{PI_DEVICE_BINARY_TYPE_SPIRV, // Format + __SYCL_PI_DEVICE_BINARY_TARGET_SPIRV64, // DeviceTargetSpec + "", // Compile options + "", // Link options + std::move(Bin), + std::move(Entries), + std::move(PropSet)}; + + return Img; +} + +sycl::unittest::PiImage Img = generateDefaultImage(); +sycl::unittest::PiImageArray<1> ImgArray{&Img}; + +class EventImplProxyT : public sycl::detail::event_impl { +public: + using sycl::detail::event_impl::MPostCompleteEvents; + using sycl::detail::event_impl::MState; +}; + +class QueueImplProxyT : public sycl::detail::queue_impl { +public: + using sycl::detail::queue_impl::MStreamsServiceEvents; +}; + +TEST_F(SchedulerTest, StreamAUXCmdsWait) { + + { + sycl::unittest::PiMock Mock; + sycl::platform Plt = Mock.getPlatform(); + sycl::queue Q(Plt.get_devices()[0]); + std::shared_ptr QueueImpl = + detail::getSyclObjImpl(Q); + + auto QueueImplProxy = std::static_pointer_cast(QueueImpl); + + ASSERT_TRUE(QueueImplProxy->MStreamsServiceEvents.empty()) + << "No stream service events are expected at the beggining"; + + event Event = Q.submit([&](handler &CGH) { + stream Out(1024, 80, CGH); + CGH.single_task( + [=]() { Out << "Hello, World!" << endl; }); + }); + + ASSERT_TRUE(QueueImplProxy->MStreamsServiceEvents.size() == 1) + << "Expected 1 service stream event"; + + std::shared_ptr EventImpl = + detail::getSyclObjImpl(Event); + + auto EventImplProxy = std::static_pointer_cast(EventImpl); + + ASSERT_TRUE(EventImplProxy->MPostCompleteEvents.size() == 1) + << "Expected 1 post complete event"; + + Q.wait(); + + ASSERT_TRUE(QueueImplProxy->MStreamsServiceEvents.empty()) + << "No stream service events are expected to left after wait"; + } + + { + sycl::unittest::PiMock Mock; + sycl::platform Plt = Mock.getPlatform(); + sycl::queue Q(Plt.get_devices()[0]); + std::shared_ptr QueueImpl = + detail::getSyclObjImpl(Q); + + Mock.redefineBefore(piEventsWaitRedefine); + + auto QueueImplProxy = std::static_pointer_cast(QueueImpl); + + pi_event PIEvent = nullptr; + pi_result Res = + mock_piEventCreate(/*context = */ (pi_context)0x1, &PIEvent); + assert(PI_SUCCESS == Res); + + auto EventImpl = std::make_shared(QueueImpl); + EventImpl->getHandleRef() = PIEvent; + + QueueImplProxy->registerStreamServiceEvent(EventImpl); + + QueueImplProxy->wait(); + + ASSERT_TRUE(GpiEventsWaitRedefineCalled) + << "No stream service events are expected to left after wait"; + } +} + TEST_F(SchedulerTest, CommandsWaitForEvents) { sycl::unittest::PiMock Mock; sycl::platform Plt = Mock.getPlatform();