From 01208a1f78cdfa672e8d212a95ee3195f9402dc1 Mon Sep 17 00:00:00 2001 From: Paulius Velesko Date: Sun, 18 Feb 2024 06:02:06 +0200 Subject: [PATCH 01/19] Rename Associate -> Assign --- src/backend/Level0/CHIPBackendLevel0.cc | 34 ++++++++++++------------- src/backend/Level0/CHIPBackendLevel0.hh | 12 ++++----- 2 files changed, 23 insertions(+), 23 deletions(-) diff --git a/src/backend/Level0/CHIPBackendLevel0.cc b/src/backend/Level0/CHIPBackendLevel0.cc index f6f47b9a0..f0f1dd5fb 100644 --- a/src/backend/Level0/CHIPBackendLevel0.cc +++ b/src/backend/Level0/CHIPBackendLevel0.cc @@ -220,22 +220,22 @@ void CHIPEventLevel0::assignCmdList(CHIPContextLevel0 *ChipContext, ze_command_list_handle_t CmdList) { logTrace("CHIPEventLevel0({})::assignCmdList({})", (void *)this, (void *)CmdList); - assert(AssocCmdList_ == nullptr && "command list already assigned!"); - assert(AssocContext_ == nullptr && "queue already assigned!"); - AssocCmdList_ = CmdList; - AssocContext_ = ChipContext; + assert(AssignedCmdList_ == nullptr && "command list already assigned!"); + assert(AssignedContext_ == nullptr && "queue already assigned!"); + AssignedCmdList_ = CmdList; + AssignedContext_ = ChipContext; } void CHIPEventLevel0::unassignCmdList() { - assert(AssocCmdList_ != nullptr && "command list not assigned!"); - assert(AssocContext_ != nullptr && "queue not assigned!"); + assert(AssignedCmdList_ != nullptr && "command list not assigned!"); + assert(AssignedContext_ != nullptr && "queue not assigned!"); logTrace("CHIPEventLevel0({})::unassignCmdList({})", (void *)this, - (void *)AssocCmdList_); - auto Status = zeCommandListReset(AssocCmdList_); + (void *)AssignedCmdList_); + auto Status = zeCommandListReset(AssignedCmdList_); CHIPERR_CHECK_LOG_AND_THROW(Status, ZE_RESULT_SUCCESS, hipErrorTbd); - AssocContext_->returnCmdList(AssocCmdList_); - AssocCmdList_ = nullptr; - AssocContext_ = nullptr; + AssignedContext_->returnCmdList(AssignedCmdList_); + AssignedCmdList_ = nullptr; + AssignedContext_ = nullptr; } void CHIPEventLevel0::reset() { @@ -271,11 +271,11 @@ CHIPEventLevel0::~CHIPEventLevel0() { wait(); } - if (AssocCmdList_ || AssocContext_) { + if (AssignedCmdList_ || AssignedContext_) { logError("~CHIPEventLevel0({}) disassociating command list {}", - (void *)this, (void *)AssocCmdList_); + (void *)this, (void *)AssignedCmdList_); logError("~CHIPEventLevel0({}) disassociating queue {}", (void *)this, - (void *)AssocContext_); + (void *)AssignedContext_); // assert(false); } @@ -658,7 +658,7 @@ void CHIPStaleEventMonitorLevel0::checkEvents() { if (ChipEventLz->updateFinishStatus(false)) { ChipEventLz->releaseDependencies(); Backend->Events.erase(Backend->Events.begin() + EventIdx); - if (ChipEventLz->getAssocCmdList()) + if (ChipEventLz->getAssignedCmdList()) ChipEventLz->unassignCmdList(); ChipEventLz->doActions(); } @@ -709,7 +709,7 @@ void CHIPStaleEventMonitorLevel0::exitChecks() { auto EventLz = std::static_pointer_cast(Event); logError("Uncollected Backend->Events: {} {} AssocCmdList {}", (void *)Event.get(), Event->Msg, - (void *)EventLz->getAssocCmdList()); + (void *)EventLz->getAssignedCmdList()); } pthread_exit(0); } @@ -1680,7 +1680,7 @@ CHIPBackendLevel0::createEventShared(chipstar::Context *ChipCtx, Event = ZeCtx->getEventFromPool(); std::static_pointer_cast(Event)->reset(); - assert(!std::static_pointer_cast(Event)->getAssocCmdList()); + assert(!std::static_pointer_cast(Event)->getAssignedCmdList()); logDebug("CHIPBackendLevel0::createEventShared: Context {} Event {}", (void *)ChipCtx, (void *)Event.get()); return Event; diff --git a/src/backend/Level0/CHIPBackendLevel0.hh b/src/backend/Level0/CHIPBackendLevel0.hh index dbefb6eb4..19ae8f9b0 100644 --- a/src/backend/Level0/CHIPBackendLevel0.hh +++ b/src/backend/Level0/CHIPBackendLevel0.hh @@ -79,8 +79,8 @@ public: using ActionFn = std::function; private: - ze_command_list_handle_t AssocCmdList_ = nullptr; - CHIPContextLevel0 *AssocContext_ = nullptr; + ze_command_list_handle_t AssignedCmdList_ = nullptr; + CHIPContextLevel0 *AssignedContext_ = nullptr; /// Device timestamp gets ultimately stored here uint64_t Timestamp_ = 0; /// Since device counters can overflow resulting in a negative time between @@ -98,23 +98,23 @@ public: uint64_t &getTimestamp() { return Timestamp_; } uint64_t &getDeviceTimestamp() { return DeviceTimestamp_; } uint64_t &getHostTimestamp() { return HostTimestamp_; } - ze_command_list_handle_t getAssocCmdList() { return AssocCmdList_; } + ze_command_list_handle_t getAssignedCmdList() { return AssignedCmdList_; } /** - * @brief Associate a command list with this event. When this event completes, + * @brief Assign a command list with this event. When this event completes, * the EventMonitor thread will return the command list handle back to the * queue stack where it came from. * * @param ChipQueue queue where the event was created (and where the command * list stack resides) - * @param CmdList command list to associate with this event + * @param CmdList command list to Assign with this event */ void assignCmdList(CHIPContextLevel0 *ChipContext, ze_command_list_handle_t CmdList); /** * @brief Reset and then return the command list handle back to the context - * pointed by AssocContext_ + * pointed by AssignedContext_ */ void unassignCmdList(); From 0b719624c4c362ff6ad1de1e9fa2cb0aeddbb288 Mon Sep 17 00:00:00 2001 From: Paulius Velesko Date: Wed, 7 Feb 2024 09:40:33 +0200 Subject: [PATCH 02/19] switch to using event stack inside pool --- src/backend/Level0/CHIPBackendLevel0.cc | 39 ++++++++++--------------- src/backend/Level0/CHIPBackendLevel0.hh | 8 ++--- 2 files changed, 18 insertions(+), 29 deletions(-) diff --git a/src/backend/Level0/CHIPBackendLevel0.cc b/src/backend/Level0/CHIPBackendLevel0.cc index f0f1dd5fb..06813761b 100644 --- a/src/backend/Level0/CHIPBackendLevel0.cc +++ b/src/backend/Level0/CHIPBackendLevel0.cc @@ -666,7 +666,7 @@ void CHIPStaleEventMonitorLevel0::checkEvents() { // delete the event if refcount reached 1 (this->ChipEvent) if (ChipEventLz.use_count() == 1) { if (ChipEventLz->EventPool) { - ChipEventLz->EventPool->returnSlot(ChipEventLz->EventPoolIndex); + ChipEventLz->EventPool->returnEvent(ChipEventLz); } #ifndef NDEBUG ChipEventLz->markDeleted(); @@ -1607,9 +1607,10 @@ LZEventPool::LZEventPool(CHIPContextLevel0 *Ctx, unsigned int Size) for (unsigned i = 0; i < Size_; i++) { chipstar::EventFlags Flags; - Events_.push_back(std::shared_ptr( - new CHIPEventLevel0(Ctx_, this, i, Flags))); - FreeSlots_.push(i); + auto NewEvent = std::shared_ptr( + new CHIPEventLevel0(Ctx_, this, i, Flags)); + Events_.push_back(NewEvent); + AvailableEvents_.push(NewEvent); } }; @@ -1621,6 +1622,8 @@ LZEventPool::~LZEventPool() { logWarn("CHIPUserEventLevel0 objects still exist at the time of EventPool " "destruction"); + while (AvailableEvents_.size()) + AvailableEvents_.pop(); Events_.clear(); // shared_ptr's will be deleted // The application must not call this function from // simultaneous threads with the same event pool handle. @@ -1632,30 +1635,18 @@ LZEventPool::~LZEventPool() { std::shared_ptr LZEventPool::getEvent() { std::shared_ptr Event; - { - int PoolIndex = getFreeSlot(); - if (PoolIndex == -1) - return nullptr; - Event = Events_[PoolIndex]; - } + if (!AvailableEvents_.size()) + return nullptr; + + Event = AvailableEvents_.top(); + AvailableEvents_.pop(); return Event; }; -int LZEventPool::getFreeSlot() { - if (FreeSlots_.size() == 0) - return -1; - - auto Slot = FreeSlots_.top(); - FreeSlots_.pop(); - - return Slot; -} - -void LZEventPool::returnSlot(int Slot) { - LOCK(EventPoolMtx); // LZEventPool::FreeSlots_ - FreeSlots_.push(Slot); - return; +void LZEventPool::returnEvent(std::shared_ptr Event) { + LOCK(EventPoolMtx); + AvailableEvents_.push(Event); } // End EventPool diff --git a/src/backend/Level0/CHIPBackendLevel0.hh b/src/backend/Level0/CHIPBackendLevel0.hh index 19ae8f9b0..1edf5dde8 100644 --- a/src/backend/Level0/CHIPBackendLevel0.hh +++ b/src/backend/Level0/CHIPBackendLevel0.hh @@ -208,19 +208,17 @@ private: CHIPContextLevel0 *Ctx_; ze_event_pool_handle_t EventPool_; unsigned int Size_; - std::stack FreeSlots_; std::vector> Events_; - - int getFreeSlot(); + std::stack> AvailableEvents_; public: std::mutex EventPoolMtx; LZEventPool(CHIPContextLevel0 *Ctx, unsigned int Size); ~LZEventPool(); - bool EventAvailable() { return FreeSlots_.size() > 0; } + bool EventAvailable() { return AvailableEvents_.size() > 0; } ze_event_pool_handle_t get() { return EventPool_; } - void returnSlot(int Slot); + void returnEvent(std::shared_ptr Event); std::shared_ptr getEvent(); }; From bddeec5e0402d88acc473f369dafffb8d8da59a2 Mon Sep 17 00:00:00 2001 From: Paulius Velesko Date: Wed, 7 Feb 2024 11:57:25 +0200 Subject: [PATCH 03/19] Event refactor - asserts & logDebug ~Event skip nullptr check expand event logging hipEvent Dependency debug move addDependecy .cc logTrace & debug event typo Add null event assertion in createEventShared function --- src/CHIPBackend.cc | 11 +++++++++++ src/CHIPBackend.hh | 9 +++++---- src/backend/Level0/CHIPBackendLevel0.cc | 22 +++++++++++----------- 3 files changed, 27 insertions(+), 15 deletions(-) diff --git a/src/CHIPBackend.cc b/src/CHIPBackend.cc index 2875f1946..e1acbbc88 100644 --- a/src/CHIPBackend.cc +++ b/src/CHIPBackend.cc @@ -212,9 +212,20 @@ chipstar::Event::Event(chipstar::Context *Ctx, chipstar::EventFlags Flags) : EventStatus_(EVENT_STATUS_INIT), Flags_(Flags), ChipContext_(Ctx), Msg("") {} +void chipstar::Event::addDependency( + const std::shared_ptr &Event) { + assert(!Deleted_ && "Event use after delete!"); + logDebug("Event {} Msg {} now depends on event {} msg:{}", (void *)this, Msg, + (void *)Event.get(), Event->Msg); + DependsOnList.push_back(Event); +} + void chipstar::Event::releaseDependencies() { assert(!Deleted_ && "chipstar::Event use after delete!"); LOCK(EventMtx); // chipstar::Event::DependsOnList + for (auto &Dep : DependsOnList) + logDebug("Event {} msg: {} no longer depends on event {}", (void *)this, + Msg, (void *)Dep.get(), Dep->Msg); DependsOnList.clear(); } diff --git a/src/CHIPBackend.hh b/src/CHIPBackend.hh index 82bd348c4..bde4892a8 100644 --- a/src/CHIPBackend.hh +++ b/src/CHIPBackend.hh @@ -668,10 +668,11 @@ public: void setTrackCalled(bool Val) { TrackCalled_ = Val; } bool isUserEvent() { return UserEvent_; } void setUserEvent(bool Val) { UserEvent_ = Val; } - void addDependency(const std::shared_ptr &Event) { - assert(!Deleted_ && "Event use after delete!"); - DependsOnList.push_back(Event); - } + /// @brief Add an event on which this event depends, preventing that event + /// from getting recycled + /// @param Event + void addDependency(const std::shared_ptr &Event); + /// @brief Release dependencies, allowing them to be recycled void releaseDependencies(); chipstar::EventFlags getFlags() { return Flags_; } std::mutex EventMtx; diff --git a/src/backend/Level0/CHIPBackendLevel0.cc b/src/backend/Level0/CHIPBackendLevel0.cc index 06813761b..86a283068 100644 --- a/src/backend/Level0/CHIPBackendLevel0.cc +++ b/src/backend/Level0/CHIPBackendLevel0.cc @@ -239,6 +239,8 @@ void CHIPEventLevel0::unassignCmdList() { } void CHIPEventLevel0::reset() { + logTrace("CHIPEventLevel0::reset() {} msg: {} handle: {}", (void *)this, Msg, + (void *)Event_); auto Status = zeEventHostReset(Event_); CHIPERR_CHECK_LOG_AND_THROW(Status, ZE_RESULT_SUCCESS, hipErrorTbd); { @@ -264,7 +266,8 @@ ze_event_handle_t CHIPEventLevel0::peek() { } CHIPEventLevel0::~CHIPEventLevel0() { - logTrace("~CHIPEventLevel0({})", (void *)this); + logTrace("~CHIPEventLevel0() {} msg: {} handle: {}", (void *)this, Msg, + (void *)Event_); // if in RECORDING state, wait to finish if (EventStatus_ == EVENT_STATUS_RECORDING) { logTrace("~CHIPEventLevel0({}) waiting for event to finish", (void *)this); @@ -279,10 +282,8 @@ CHIPEventLevel0::~CHIPEventLevel0() { // assert(false); } - if (Event_) { - auto Status = zeEventDestroy(Event_); - assert(Status == ZE_RESULT_SUCCESS); - } + auto Status = zeEventDestroy(Event_); + assert(Status == ZE_RESULT_SUCCESS); if (isUserEvent()) { assert(!TrackCalled_ && @@ -454,10 +455,7 @@ bool CHIPEventLevel0::updateFinishStatus(bool ThrowErrorIfNotReady) { EventStatusNew = getEventStatusStr(); } - // logTrace("CHIPEventLevel0::updateFinishStatus() {} Refc: {} {}: {} -> - // {}", - // (void *)this, getCHIPRefc(), Msg, EventStatusOld, - // EventStatusNew); + if (EventStatusNew != EventStatusOld) { return true; } @@ -546,7 +544,8 @@ float CHIPEventLevel0::getElapsedTime(chipstar::Event *OtherIn) { void CHIPEventLevel0::hostSignal() { assert(!Deleted_ && "chipstar::Event use after delete!"); - logTrace("CHIPEventLevel0::hostSignal()"); + logTrace("CHIPEventLevel0::hostSignal() {} Msg: {} Handle: {}", (void *)this, + Msg, (void *)Event_); auto Status = zeEventHostSignal(Event_); CHIPERR_CHECK_LOG_AND_THROW(Status, ZE_RESULT_SUCCESS, hipErrorTbd); @@ -1669,6 +1668,7 @@ CHIPBackendLevel0::createEventShared(chipstar::Context *ChipCtx, auto ZeCtx = (CHIPContextLevel0 *)ChipCtx; Event = ZeCtx->getEventFromPool(); + assert(Event && "LZEventPool returned a null event"); std::static_pointer_cast(Event)->reset(); assert(!std::static_pointer_cast(Event)->getAssignedCmdList()); @@ -1681,7 +1681,7 @@ chipstar::Event *CHIPBackendLevel0::createEvent(chipstar::Context *ChipCtx, chipstar::EventFlags Flags) { auto Event = new CHIPEventLevel0((CHIPContextLevel0 *)ChipCtx, Flags); Event->setUserEvent(true); - logDebug("CHIPBackendLevel0::createEventd: Context {} Event {}", + logDebug("CHIPBackendLevel0::createEvent: Context {} Event {}", (void *)ChipCtx, (void *)Event); return Event; } From adb36f8e0f16adf97de3c34797cc41109a64e9df Mon Sep 17 00:00:00 2001 From: Paulius Velesko Date: Mon, 12 Feb 2024 16:13:22 +0200 Subject: [PATCH 04/19] LZEventPool::returnEvent logDebug --- src/backend/Level0/CHIPBackendLevel0.cc | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/backend/Level0/CHIPBackendLevel0.cc b/src/backend/Level0/CHIPBackendLevel0.cc index 86a283068..c473ab287 100644 --- a/src/backend/Level0/CHIPBackendLevel0.cc +++ b/src/backend/Level0/CHIPBackendLevel0.cc @@ -1645,6 +1645,8 @@ std::shared_ptr LZEventPool::getEvent() { void LZEventPool::returnEvent(std::shared_ptr Event) { LOCK(EventPoolMtx); + logTrace("Returning event {} handle {}", (void *)Event.get(), + (void *)Event.get()->get()); AvailableEvents_.push(Event); } From 9478f9278a8779ed2dadc615d21041ff835ebc6a Mon Sep 17 00:00:00 2001 From: Paulius Velesko Date: Wed, 7 Feb 2024 10:11:55 +0200 Subject: [PATCH 05/19] fix reference count bug --- src/backend/Level0/CHIPBackendLevel0.cc | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/backend/Level0/CHIPBackendLevel0.cc b/src/backend/Level0/CHIPBackendLevel0.cc index c473ab287..f3102adef 100644 --- a/src/backend/Level0/CHIPBackendLevel0.cc +++ b/src/backend/Level0/CHIPBackendLevel0.cc @@ -662,8 +662,9 @@ void CHIPStaleEventMonitorLevel0::checkEvents() { ChipEventLz->doActions(); } - // delete the event if refcount reached 1 (this->ChipEvent) - if (ChipEventLz.use_count() == 1) { + // delete the event if refcount reached 2 + // this->ChipEvent and LZEventPool::Events_ + if (ChipEventLz.use_count() == 2) { if (ChipEventLz->EventPool) { ChipEventLz->EventPool->returnEvent(ChipEventLz); } From fd0a54b1b1ae4b81c5af1cc53b1c1c4012f02d3c Mon Sep 17 00:00:00 2001 From: Paulius Velesko Date: Wed, 7 Feb 2024 14:53:43 +0200 Subject: [PATCH 06/19] Add eventRocord dependencies + clear --- src/backend/Level0/CHIPBackendLevel0.cc | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/src/backend/Level0/CHIPBackendLevel0.cc b/src/backend/Level0/CHIPBackendLevel0.cc index f3102adef..249d92517 100644 --- a/src/backend/Level0/CHIPBackendLevel0.cc +++ b/src/backend/Level0/CHIPBackendLevel0.cc @@ -241,6 +241,11 @@ void CHIPEventLevel0::unassignCmdList() { void CHIPEventLevel0::reset() { logTrace("CHIPEventLevel0::reset() {} msg: {} handle: {}", (void *)this, Msg, (void *)Event_); + if (DependsOnList.size() > 0) + logWarn("CHIPEventLevel0::reset() called while event has dependencies"); + DependsOnList.clear(); + // assert(DependsOnList.empty() && "CHIPEventLevel0::reset() called while " + // "event has dependencies"); auto Status = zeEventHostReset(Event_); CHIPERR_CHECK_LOG_AND_THROW(Status, ZE_RESULT_SUCCESS, hipErrorTbd); { @@ -385,6 +390,8 @@ void CHIPQueueLevel0::recordEvent(chipstar::Event *ChipEvent) { ze_command_list_handle_t CommandList = ChipCtxLz_->getCmdListReg(); auto EventsToWaitOn = getSyncQueuesLastEvents(); + for (auto &Event : EventsToWaitOn) + ChipEventLz->addDependency(Event); auto EventToWaitOnHandles = getEventListHandles(EventsToWaitOn); // create an Event for making a dependency chain @@ -402,6 +409,7 @@ void CHIPQueueLevel0::recordEvent(chipstar::Event *ChipEvent) { EventToWaitOnHandles.data()); CHIPERR_CHECK_LOG_AND_THROW(Status, ZE_RESULT_SUCCESS, hipErrorTbd); + ChipEventLz->addDependency(TimestampWriteComplete); // The application must not call this function from // simultaneous threads with the same command list handle. Status = zeCommandListAppendMemoryCopy( @@ -663,7 +671,7 @@ void CHIPStaleEventMonitorLevel0::checkEvents() { } // delete the event if refcount reached 2 - // this->ChipEvent and LZEventPool::Events_ + // this->ChipEvent and LZEventPool::Events_ if (ChipEventLz.use_count() == 2) { if (ChipEventLz->EventPool) { ChipEventLz->EventPool->returnEvent(ChipEventLz); From 9ab8ef666af76ada8f93a03b836dff1e4f295f64 Mon Sep 17 00:00:00 2001 From: Paulius Velesko Date: Wed, 7 Feb 2024 14:54:03 +0200 Subject: [PATCH 07/19] memFillAsync3D memFillAsyncImpl --- src/CHIPBackend.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/CHIPBackend.cc b/src/CHIPBackend.cc index e1acbbc88..1b5ebd0a1 100644 --- a/src/CHIPBackend.cc +++ b/src/CHIPBackend.cc @@ -1680,7 +1680,7 @@ void chipstar::Queue::memFillAsync3D(hipPitchedPtr PitchedDevPtr, int Value, ChipEvent->Msg = "memFillAsync3D"; ::Backend->trackEvent(ChipEvent); } else - memFillAsync(DstP + Offset, SizeBytes, &Value, 1); + memFillAsyncImpl(DstP + Offset, SizeBytes, &Value, 1); } } From dbbf893179e58af78dfda0ee408a7c39707e8a4f Mon Sep 17 00:00:00 2001 From: Paulius Velesko Date: Thu, 8 Feb 2024 10:35:16 +0200 Subject: [PATCH 08/19] bugfix - missing deps in enqueueBarrierImpl --- src/backend/Level0/CHIPBackendLevel0.cc | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/src/backend/Level0/CHIPBackendLevel0.cc b/src/backend/Level0/CHIPBackendLevel0.cc index 249d92517..4954e5b42 100644 --- a/src/backend/Level0/CHIPBackendLevel0.cc +++ b/src/backend/Level0/CHIPBackendLevel0.cc @@ -1403,9 +1403,9 @@ std::shared_ptr CHIPQueueLevel0::enqueueBarrierImpl( static_cast(Backend)->createEventShared( ChipContext_); BarrierEvent->Msg = "barrier"; - size_t NumEventsToWaitFor = 0; - NumEventsToWaitFor = EventsToWaitFor.size(); + auto QueueSyncEvents = addDependenciesQueueSync(BarrierEvent); + size_t NumEventsToWaitFor = QueueSyncEvents.size() + EventsToWaitFor.size(); ze_event_handle_t *EventHandles = nullptr; ze_event_handle_t SignalEventHandle = nullptr; @@ -1415,7 +1415,7 @@ std::shared_ptr CHIPQueueLevel0::enqueueBarrierImpl( if (NumEventsToWaitFor > 0) { EventHandles = new ze_event_handle_t[NumEventsToWaitFor]; - for (size_t i = 0; i < NumEventsToWaitFor; i++) { + for (size_t i = 0; i < EventsToWaitFor.size(); i++) { std::shared_ptr ChipEvent = EventsToWaitFor[i]; std::shared_ptr ChipEventLz = std::static_pointer_cast(ChipEvent); @@ -1423,6 +1423,10 @@ std::shared_ptr CHIPQueueLevel0::enqueueBarrierImpl( EventHandles[i] = ChipEventLz->peek(); BarrierEvent->addDependency(ChipEventLz); } + + for (size_t i = 0; i < QueueSyncEvents.size(); i++) { + EventHandles[i + EventsToWaitFor.size()] = QueueSyncEvents[i]; + } } // done gather Event_ handles to wait on // TODO Should this be memory or compute? @@ -1448,9 +1452,9 @@ std::shared_ptr CHIPQueueLevel0::enqueueBarrierImplReg( static_cast(Backend)->createEventShared( ChipContext_); BarrierEvent->Msg = "barrier"; - size_t NumEventsToWaitFor = 0; - NumEventsToWaitFor = EventsToWaitFor.size(); + auto QueueSyncEvents = addDependenciesQueueSync(BarrierEvent); + size_t NumEventsToWaitFor = QueueSyncEvents.size() + EventsToWaitFor.size(); ze_event_handle_t *EventHandles = nullptr; ze_event_handle_t SignalEventHandle = nullptr; @@ -1460,7 +1464,7 @@ std::shared_ptr CHIPQueueLevel0::enqueueBarrierImplReg( if (NumEventsToWaitFor > 0) { EventHandles = new ze_event_handle_t[NumEventsToWaitFor]; - for (size_t i = 0; i < NumEventsToWaitFor; i++) { + for (size_t i = 0; i < EventsToWaitFor.size(); i++) { std::shared_ptr ChipEvent = EventsToWaitFor[i]; std::shared_ptr ChipEventLz = std::static_pointer_cast(ChipEvent); @@ -1468,6 +1472,10 @@ std::shared_ptr CHIPQueueLevel0::enqueueBarrierImplReg( EventHandles[i] = ChipEventLz->peek(); BarrierEvent->addDependency(ChipEventLz); } + + for (size_t i = 0; i < QueueSyncEvents.size(); i++) { + EventHandles[i + EventsToWaitFor.size()] = QueueSyncEvents[i]; + } } // done gather Event_ handles to wait on // TODO Should this be memory or compute? From 83729b19ebad29d8b85e1725a9c861b73c789059 Mon Sep 17 00:00:00 2001 From: Paulius Velesko Date: Thu, 8 Feb 2024 18:11:27 +0200 Subject: [PATCH 09/19] move event callback checker into EventMonitor checkCallbacks() inside event monitor bugfix --- src/backend/Level0/CHIPBackendLevel0.cc | 141 +++++++++++++++++------- src/backend/Level0/CHIPBackendLevel0.hh | 2 + 2 files changed, 101 insertions(+), 42 deletions(-) diff --git a/src/backend/Level0/CHIPBackendLevel0.cc b/src/backend/Level0/CHIPBackendLevel0.cc index 4954e5b42..e1c9976e7 100644 --- a/src/backend/Level0/CHIPBackendLevel0.cc +++ b/src/backend/Level0/CHIPBackendLevel0.cc @@ -599,54 +599,103 @@ CHIPCallbackDataLevel0::CHIPCallbackDataLevel0(hipStreamCallback_t CallbackF, // *********************************************************************** void CHIPCallbackEventMonitorLevel0::monitor() { + // CHIPCallbackDataLevel0 *CbData; + // while (true) { + // usleep(200); + // LOCK(EventMonitorMtx); // chipstar::EventMonitor::Stop + // { + + // if (Stop) { + // logTrace("CHIPCallbackEventMonitorLevel0 out of callbacks. Exiting " + // "thread"); + // if (Backend->CallbackQueue.size()) + // logError("Callback thread exiting while there are still active " + // "callbacks in the queue"); + // pthread_exit(0); + // } + + // LOCK(Backend->CallbackQueueMtx); // Backend::CallbackQueue + + // if ((Backend->CallbackQueue.size() == 0)) + // continue; + + // // get the callback item + // CbData = (CHIPCallbackDataLevel0 *)Backend->CallbackQueue.front(); + + // // Lock the item and members + // assert(CbData); + // LOCK( // Backend::CallbackQueue + // CbData->CallbackDataMtx); + // Backend->CallbackQueue.pop(); + + // // Update Status + // logTrace("CHIPCallbackEventMonitorLevel0::monitor() checking event " + // "status for {}", + // static_cast(CbData->GpuReady.get())); + // CbData->GpuReady->updateFinishStatus(false); + // if (CbData->GpuReady->getEventStatus() != EVENT_STATUS_RECORDED) { + // // if not ready, push to the back + // Backend->CallbackQueue.push(CbData); + // continue; + // } + // } + + // CbData->execute(hipSuccess); + // CbData->CpuCallbackComplete->hostSignal(); + // CbData->GpuAck->wait(); + + // delete CbData; + // pthread_yield(); + // } +} + +void CHIPStaleEventMonitorLevel0::checkCallbacks() { CHIPCallbackDataLevel0 *CbData; - while (true) { - usleep(20000); - LOCK(EventMonitorMtx); // chipstar::EventMonitor::Stop - { - - if (Stop) { - logTrace("CHIPCallbackEventMonitorLevel0 out of callbacks. Exiting " - "thread"); - if (Backend->CallbackQueue.size()) - logError("Callback thread exiting while there are still active " - "callbacks in the queue"); - pthread_exit(0); - } + // usleep(200); + LOCK(EventMonitorMtx); // chipstar::EventMonitor::Stop + { - LOCK(Backend->CallbackQueueMtx); // Backend::CallbackQueue - - if ((Backend->CallbackQueue.size() == 0)) - continue; - - // get the callback item - CbData = (CHIPCallbackDataLevel0 *)Backend->CallbackQueue.front(); - - // Lock the item and members - assert(CbData); - LOCK( // Backend::CallbackQueue - CbData->CallbackDataMtx); - Backend->CallbackQueue.pop(); - - // Update Status - logTrace("CHIPCallbackEventMonitorLevel0::monitor() checking event " - "status for {}", - static_cast(CbData->GpuReady.get())); - CbData->GpuReady->updateFinishStatus(false); - if (CbData->GpuReady->getEventStatus() != EVENT_STATUS_RECORDED) { - // if not ready, push to the back - Backend->CallbackQueue.push(CbData); - continue; - } + if (Stop) { + logTrace("CHIPCallbackEventMonitorLevel0 out of callbacks. Exiting " + "thread"); + if (Backend->CallbackQueue.size()) + logError("Callback thread exiting while there are still active " + "callbacks in the queue"); + pthread_exit(0); } - CbData->execute(hipSuccess); - CbData->CpuCallbackComplete->hostSignal(); - CbData->GpuAck->wait(); + LOCK(Backend->CallbackQueueMtx); // Backend::CallbackQueue - delete CbData; - pthread_yield(); + if ((Backend->CallbackQueue.size() == 0)) + return; + + // get the callback item + CbData = (CHIPCallbackDataLevel0 *)Backend->CallbackQueue.front(); + + // Lock the item and members + assert(CbData); + LOCK( // Backend::CallbackQueue + CbData->CallbackDataMtx); + Backend->CallbackQueue.pop(); + + // Update Status + logTrace("CHIPCallbackEventMonitorLevel0::monitor() checking event " + "status for {}", + static_cast(CbData->GpuReady.get())); + CbData->GpuReady->updateFinishStatus(false); + if (CbData->GpuReady->getEventStatus() != EVENT_STATUS_RECORDED) { + // if not ready, push to the back + Backend->CallbackQueue.push(CbData); + return; + } } + + CbData->execute(hipSuccess); + CbData->CpuCallbackComplete->hostSignal(); + CbData->GpuAck->wait(); + + delete CbData; + pthread_yield(); } void CHIPStaleEventMonitorLevel0::checkEvents() { @@ -738,6 +787,7 @@ void CHIPStaleEventMonitorLevel0::monitor() { // Stop is false and I have more events while (true) { usleep(200); + checkCallbacks(); checkEvents(); exitChecks(); } // endless loop @@ -1951,6 +2001,13 @@ CHIPContextLevel0::~CHIPContextLevel0() { // delete all event pools for (LZEventPool *Pool : EventPools_) delete Pool; + + if (Backend->Events.size()) { + logWarn("Backend->Events still exist at the time of Context " + "destruction..."); + Backend->Events.clear(); + } + EventPools_.clear(); // delete all devicesA diff --git a/src/backend/Level0/CHIPBackendLevel0.hh b/src/backend/Level0/CHIPBackendLevel0.hh index 1edf5dde8..6015a112c 100644 --- a/src/backend/Level0/CHIPBackendLevel0.hh +++ b/src/backend/Level0/CHIPBackendLevel0.hh @@ -195,6 +195,8 @@ class CHIPStaleEventMonitorLevel0 : public chipstar::EventMonitor { */ void exitChecks(); + void checkCallbacks(); + public: ~CHIPStaleEventMonitorLevel0() { logTrace("CHIPStaleEventMonitorLevel0 DEST"); From 62e572c82c344b428b06bb31d22dd66f9040e661 Mon Sep 17 00:00:00 2001 From: Paulius Velesko Date: Mon, 12 Feb 2024 16:11:36 +0200 Subject: [PATCH 10/19] addDependenciesQueueSync reversed order on deps --- src/backend/Level0/CHIPBackendLevel0.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/backend/Level0/CHIPBackendLevel0.cc b/src/backend/Level0/CHIPBackendLevel0.cc index e1c9976e7..db300d69d 100644 --- a/src/backend/Level0/CHIPBackendLevel0.cc +++ b/src/backend/Level0/CHIPBackendLevel0.cc @@ -895,8 +895,8 @@ std::vector CHIPQueueLevel0::addDependenciesQueueSync( // that they don't get destroyed before MemCopyEvent for (auto &Event : EventsToWaitOn) { LOCK(Event->EventMtx); - std::static_pointer_cast(Event)->addDependency( - TargetEvent); + std::static_pointer_cast(TargetEvent) + ->addDependency(Event); } std::vector EventHandles = From 61000fdb636db09e3d5566cfc2bb02194551f6e1 Mon Sep 17 00:00:00 2001 From: Paulius Velesko Date: Mon, 12 Feb 2024 16:14:07 +0200 Subject: [PATCH 11/19] getSyncQueuesLastEvents getLastEvent() fix --- src/CHIPBackend.cc | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/CHIPBackend.cc b/src/CHIPBackend.cc index 1b5ebd0a1..c02c73462 100644 --- a/src/CHIPBackend.cc +++ b/src/CHIPBackend.cc @@ -1494,8 +1494,9 @@ chipstar::Queue::getSyncQueuesLastEvents() { LOCK(Dev->DeviceMtx); // chipstar::Device::ChipQueues_ via getQueuesNoLock() std::vector> EventsToWaitOn; - if (this->getLastEvent()) - EventsToWaitOn.push_back(this->getLastEvent()); + auto thisLastEvent = this->getLastEvent(); + if (thisLastEvent) + EventsToWaitOn.push_back(thisLastEvent); // If this stream is default legacy stream, sync with all other streams on // this device From e95462d6ad5e74257e81c4d3603aa26a06e733ff Mon Sep 17 00:00:00 2001 From: Paulius Velesko Date: Tue, 13 Feb 2024 06:37:22 +0200 Subject: [PATCH 12/19] 2D ops - capture all events --- src/CHIPBackend.cc | 33 +++++++++------------------------ 1 file changed, 9 insertions(+), 24 deletions(-) diff --git a/src/CHIPBackend.cc b/src/CHIPBackend.cc index c02c73462..d0384adb8 100644 --- a/src/CHIPBackend.cc +++ b/src/CHIPBackend.cc @@ -1597,13 +1597,9 @@ void chipstar::Queue::memCopyAsync2D(void *Dst, size_t DPitch, const void *Src, // perform the copy for (size_t i = 0; i < Height; ++i) { - // capture the event on last iteration - if (i == Height - 1) { - ChipEvent = memCopyAsyncImpl(Dst, Src, Width); - ChipEvent->Msg = "memCopyAsync2D"; - } else { - memCopyAsyncImpl(Dst, Src, Width); - } + ChipEvent = memCopyAsyncImpl(Dst, Src, Width); + ChipEvent->Msg = "memCopyAsync2D"; + ::Backend->trackEvent(ChipEvent); Src = (char *)Src + SPitch; Dst = (char *)Dst + DPitch; } @@ -1612,9 +1608,6 @@ void chipstar::Queue::memCopyAsync2D(void *Dst, size_t DPitch, const void *Src, this->MemMap(AllocInfoDst, chipstar::Queue::MEM_MAP_TYPE::HOST_READ_WRITE); if (AllocInfoSrc && AllocInfoSrc->MemoryType == hipMemoryTypeHost) this->MemMap(AllocInfoSrc, chipstar::Queue::MEM_MAP_TYPE::HOST_READ_WRITE); - - if (ChipEvent) - ::Backend->trackEvent(ChipEvent); } void chipstar::Queue::memFill(void *Dst, size_t Size, const void *Pattern, @@ -1649,13 +1642,9 @@ void chipstar::Queue::memFillAsync2D(void *Dst, size_t Pitch, int Value, for (size_t i = 0; i < Height; i++) { auto Offset = Pitch * i; char *DstP = (char *)Dst; - // capture the returned event on last iteration, otherwise don't - if (i == Height - 1) { - ChipEvent = memFillAsyncImpl(DstP + Offset, SizeBytes, &Value, 1); - ChipEvent->Msg = "memFillAsync2D"; - ::Backend->trackEvent(ChipEvent); - } else - memFillAsyncImpl(DstP + Offset, SizeBytes, &Value, 1); + ChipEvent = memFillAsyncImpl(DstP + Offset, SizeBytes, &Value, 1); + ChipEvent->Msg = "memFillAsync2D"; + ::Backend->trackEvent(ChipEvent); } } @@ -1675,13 +1664,9 @@ void chipstar::Queue::memFillAsync3D(hipPitchedPtr PitchedDevPtr, int Value, size_t SizeBytes = Width; auto Offset = i * (Pitch * PitchedDevPtr.ysize) + j * Pitch; char *DstP = (char *)Dst; - // capture the returned event on last iteration, otherwise don't - if (i == Depth - 1 && j == Height - 1) { - ChipEvent = memFillAsyncImpl(DstP + Offset, SizeBytes, &Value, 1); - ChipEvent->Msg = "memFillAsync3D"; - ::Backend->trackEvent(ChipEvent); - } else - memFillAsyncImpl(DstP + Offset, SizeBytes, &Value, 1); + ChipEvent = memFillAsyncImpl(DstP + Offset, SizeBytes, &Value, 1); + ChipEvent->Msg = "memFillAsync3D"; + ::Backend->trackEvent(ChipEvent); } } From bf6f3f56bfc0953c946466afe675f552585ddda3 Mon Sep 17 00:00:00 2001 From: Paulius Velesko Date: Tue, 13 Feb 2024 07:11:09 +0200 Subject: [PATCH 13/19] event pool size double every time --- src/backend/Level0/CHIPBackendLevel0.hh | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/backend/Level0/CHIPBackendLevel0.hh b/src/backend/Level0/CHIPBackendLevel0.hh index 6015a112c..d734bf594 100644 --- a/src/backend/Level0/CHIPBackendLevel0.hh +++ b/src/backend/Level0/CHIPBackendLevel0.hh @@ -23,8 +23,6 @@ #ifndef CHIP_BACKEND_LEVEL0_H #define CHIP_BACKEND_LEVEL0_H -// TODO: Should this be a cmake parameter? env? What is max size? -#define EVENT_POOL_SIZE 1000 #define L0_DEFAULT_QUEUE_PRIORITY ZE_COMMAND_QUEUE_PRIORITY_NORMAL #include "../../CHIPBackend.hh" @@ -364,6 +362,7 @@ class CHIPContextLevel0 : public chipstar::Context { size_t EventsRequested_ = 0; size_t EventsReused_ = 0; std::stack ZeCmdListRegPool_; + size_t EventPoolSize_ = 1; public: /** @@ -398,7 +397,8 @@ public: logTrace("No available events found in {} event pools. Creating a new " "event pool", EventPools_.size()); - auto NewEventPool = new LZEventPool(this, EVENT_POOL_SIZE); + auto NewEventPool = new LZEventPool(this, EventPoolSize_); + EventPoolSize_ *= 2; Event = NewEventPool->getEvent(); EventPools_.push_back(NewEventPool); return Event; From c7354d9e2e8be9559da5acda652469c9e6a71441 Mon Sep 17 00:00:00 2001 From: Paulius Velesko Date: Thu, 15 Feb 2024 20:33:14 +0200 Subject: [PATCH 14/19] isDeletedSanityCheck() + event reset --- src/CHIPBackend.cc | 12 ++++----- src/CHIPBackend.hh | 28 +++++++++++---------- src/backend/Level0/CHIPBackendLevel0.cc | 33 ++++++++++++++++--------- 3 files changed, 43 insertions(+), 30 deletions(-) diff --git a/src/CHIPBackend.cc b/src/CHIPBackend.cc index d0384adb8..99c556fe1 100644 --- a/src/CHIPBackend.cc +++ b/src/CHIPBackend.cc @@ -214,14 +214,14 @@ chipstar::Event::Event(chipstar::Context *Ctx, chipstar::EventFlags Flags) void chipstar::Event::addDependency( const std::shared_ptr &Event) { - assert(!Deleted_ && "Event use after delete!"); + isDeletedSanityCheck(); logDebug("Event {} Msg {} now depends on event {} msg:{}", (void *)this, Msg, (void *)Event.get(), Event->Msg); DependsOnList.push_back(Event); } void chipstar::Event::releaseDependencies() { - assert(!Deleted_ && "chipstar::Event use after delete!"); + isDeletedSanityCheck(); LOCK(EventMtx); // chipstar::Event::DependsOnList for (auto &Dep : DependsOnList) logDebug("Event {} msg: {} no longer depends on event {}", (void *)this, @@ -1184,9 +1184,7 @@ void chipstar::Backend::trackEvent( const std::shared_ptr &Event) { LOCK(::Backend->EventsMtx); // trackImpl Backend::Events LOCK(Event->EventMtx); // writing bool chipstar::Event::TrackCalled_ - // assert(!isUserEvent() && "Attemped to track a user event!"); - // assert(!Deleted_ && "chipstar::Event use after delete!"); - // assert(!TrackCalled_ && "chipstar::Event already tracked!"); + Event->isDeletedSanityCheck(); logDebug("Tracking chipstar::Event {} in Backend::Events", (void *)this); assert(!Event->isTrackCalled()); @@ -1495,8 +1493,10 @@ chipstar::Queue::getSyncQueuesLastEvents() { std::vector> EventsToWaitOn; auto thisLastEvent = this->getLastEvent(); - if (thisLastEvent) + if (thisLastEvent) { + thisLastEvent->isDeletedSanityCheck(); EventsToWaitOn.push_back(thisLastEvent); + } // If this stream is default legacy stream, sync with all other streams on // this device diff --git a/src/CHIPBackend.hh b/src/CHIPBackend.hh index bde4892a8..8df5affab 100644 --- a/src/CHIPBackend.hh +++ b/src/CHIPBackend.hh @@ -639,10 +639,7 @@ protected: chipstar::EventFlags Flags_; std::vector> DependsOnList; -#ifndef NDEBUG - // A debug flag for cathing use-after-delete. bool Deleted_ = false; -#endif /** * @brief Events are always created with a context @@ -660,7 +657,7 @@ protected: public: void setRecording() { - assert(!Deleted_ && "chipstar::Event use after delete!"); + isDeletedSanityCheck(); EventStatus_ = EVENT_STATUS_RECORDING; } void markTracked() { TrackCalled_ = true; } @@ -691,7 +688,7 @@ public: * @return Context* pointer to context on which this event was created */ chipstar::Context *getContext() { - assert(!Deleted_ && "chipstar::Event use after delete!"); + isDeletedSanityCheck(); return ChipContext_; } @@ -712,7 +709,7 @@ public: * @return false event is in init or invalid state */ bool isRecordingOrRecorded() { - assert(!Deleted_ && "chipstar::Event use after delete!"); + isDeletedSanityCheck(); return EventStatus_ >= EVENT_STATUS_RECORDING; } @@ -723,7 +720,7 @@ public: * @return false not recorded */ bool isFinished() { - assert(!Deleted_ && "chipstar::Event use after delete!"); + isDeletedSanityCheck(); return (EventStatus_ == EVENT_STATUS_RECORDED); } @@ -770,16 +767,19 @@ public: */ virtual void hostSignal() = 0; -#ifndef NDEBUG void markDeleted(bool State = true) { - LOCK(EventMtx); // Deleted_ +#ifndef NDEBUG Deleted_ = State; +#endif } - bool isDeleted() { - LOCK(EventMtx); // Deleted_ - return Deleted_; - } + void isDeletedSanityCheck() { +#ifndef NDEBUG + if (Deleted_) { + logError("chipstar::Event use after delete!"); + std::abort(); + } #endif + } }; class Program { @@ -2128,6 +2128,8 @@ public: virtual std::shared_ptr getLastEvent() { LOCK(LastEventMtx); // Queue::LastEvent_ + if (LastEvent_) + LastEvent_->isDeletedSanityCheck(); return LastEvent_; } diff --git a/src/backend/Level0/CHIPBackendLevel0.cc b/src/backend/Level0/CHIPBackendLevel0.cc index db300d69d..42fea0e91 100644 --- a/src/backend/Level0/CHIPBackendLevel0.cc +++ b/src/backend/Level0/CHIPBackendLevel0.cc @@ -218,6 +218,7 @@ createSampler(CHIPDeviceLevel0 *ChipDev, const hipResourceDesc *PResDesc, void CHIPEventLevel0::assignCmdList(CHIPContextLevel0 *ChipContext, ze_command_list_handle_t CmdList) { + isDeletedSanityCheck(); logTrace("CHIPEventLevel0({})::assignCmdList({})", (void *)this, (void *)CmdList); assert(AssignedCmdList_ == nullptr && "command list already assigned!"); @@ -244,8 +245,6 @@ void CHIPEventLevel0::reset() { if (DependsOnList.size() > 0) logWarn("CHIPEventLevel0::reset() called while event has dependencies"); DependsOnList.clear(); - // assert(DependsOnList.empty() && "CHIPEventLevel0::reset() called while " - // "event has dependencies"); auto Status = zeEventHostReset(Event_); CHIPERR_CHECK_LOG_AND_THROW(Status, ZE_RESULT_SUCCESS, hipErrorTbd); { @@ -259,14 +258,12 @@ void CHIPEventLevel0::reset() { Timestamp_ = 0; HostTimestamp_ = 0; DeviceTimestamp_ = 0; -#ifndef NDEBUG - Deleted_ = false; -#endif + markDeleted(false); } } ze_event_handle_t CHIPEventLevel0::peek() { - assert(!Deleted_ && "chipstar::Event use after delete!"); + isDeletedSanityCheck(); return Event_; } @@ -425,7 +422,7 @@ void CHIPQueueLevel0::recordEvent(chipstar::Event *ChipEvent) { } bool CHIPEventLevel0::wait() { - assert(!Deleted_ && "chipstar::Event use after delete!"); + isDeletedSanityCheck(); logTrace("CHIPEventLevel0::wait(timeout: {}) {} Msg: {} Handle: {}", ChipEnvVars.getL0EventTimeout(), (void *)this, Msg, (void *)Event_); @@ -447,7 +444,7 @@ bool CHIPEventLevel0::wait() { } bool CHIPEventLevel0::updateFinishStatus(bool ThrowErrorIfNotReady) { - assert(!Deleted_ && "chipstar::Event use after delete!"); + isDeletedSanityCheck(); std::string EventStatusOld, EventStatusNew; { LOCK(EventMtx); // chipstar::Event::EventStatus_ @@ -472,6 +469,7 @@ bool CHIPEventLevel0::updateFinishStatus(bool ThrowErrorIfNotReady) { } uint32_t CHIPEventLevel0::getValidTimestampBits() { + isDeletedSanityCheck(); CHIPContextLevel0 *ChipCtxLz = (CHIPContextLevel0 *)ChipContext_; CHIPDeviceLevel0 *ChipDevLz = (CHIPDeviceLevel0 *)ChipCtxLz->getDevice(); auto Props = ChipDevLz->getDeviceProps(); @@ -479,6 +477,7 @@ uint32_t CHIPEventLevel0::getValidTimestampBits() { } unsigned long CHIPEventLevel0::getFinishTime() { + isDeletedSanityCheck(); CHIPContextLevel0 *ChipCtxLz = (CHIPContextLevel0 *)ChipContext_; CHIPDeviceLevel0 *ChipDevLz = (CHIPDeviceLevel0 *)ChipCtxLz->getDevice(); auto Props = ChipDevLz->getDeviceProps(); @@ -551,7 +550,7 @@ float CHIPEventLevel0::getElapsedTime(chipstar::Event *OtherIn) { } void CHIPEventLevel0::hostSignal() { - assert(!Deleted_ && "chipstar::Event use after delete!"); + isDeletedSanityCheck(); logTrace("CHIPEventLevel0::hostSignal() {} Msg: {} Handle: {}", (void *)this, Msg, (void *)Event_); auto Status = zeEventHostSignal(Event_); @@ -705,6 +704,7 @@ void CHIPStaleEventMonitorLevel0::checkEvents() { for (size_t EventIdx = 0; EventIdx < Backend->Events.size(); EventIdx++) { std::shared_ptr ChipEventLz = std::static_pointer_cast(Backend->Events[EventIdx]); + ChipEventLz->isDeletedSanityCheck(); assert(ChipEventLz); assert(!ChipEventLz->isUserEvent() && @@ -719,15 +719,16 @@ void CHIPStaleEventMonitorLevel0::checkEvents() { ChipEventLz->doActions(); } + ChipEventLz->isDeletedSanityCheck(); + // delete the event if refcount reached 2 // this->ChipEvent and LZEventPool::Events_ if (ChipEventLz.use_count() == 2) { if (ChipEventLz->EventPool) { + ChipEventLz->isDeletedSanityCheck(); ChipEventLz->EventPool->returnEvent(ChipEventLz); } -#ifndef NDEBUG ChipEventLz->markDeleted(); -#endif } } // done collecting events to delete @@ -891,6 +892,9 @@ CHIPQueueLevel0::~CHIPQueueLevel0() { std::vector CHIPQueueLevel0::addDependenciesQueueSync( std::shared_ptr TargetEvent) { auto EventsToWaitOn = getSyncQueuesLastEvents(); + for (auto &Event : EventsToWaitOn) + Event->isDeletedSanityCheck(); + // Every event in EventsToWaitOn should have a dependency on MemCopyEvent so // that they don't get destroyed before MemCopyEvent for (auto &Event : EventsToWaitOn) { @@ -899,6 +903,10 @@ std::vector CHIPQueueLevel0::addDependenciesQueueSync( ->addDependency(Event); } + for (auto &Event : EventsToWaitOn) { + Event->isDeletedSanityCheck(); + } + std::vector EventHandles = getEventListHandles(EventsToWaitOn); return EventHandles; @@ -1711,6 +1719,8 @@ std::shared_ptr LZEventPool::getEvent() { }; void LZEventPool::returnEvent(std::shared_ptr Event) { + Event->isDeletedSanityCheck(); + Event->markDeleted(); LOCK(EventPoolMtx); logTrace("Returning event {} handle {}", (void *)Event.get(), (void *)Event.get()->get()); @@ -1743,6 +1753,7 @@ CHIPBackendLevel0::createEventShared(chipstar::Context *ChipCtx, assert(!std::static_pointer_cast(Event)->getAssignedCmdList()); logDebug("CHIPBackendLevel0::createEventShared: Context {} Event {}", (void *)ChipCtx, (void *)Event.get()); + Event->isDeletedSanityCheck(); return Event; } From b9a65f13d877ca71e540aa7861230ce21f762aa8 Mon Sep 17 00:00:00 2001 From: Paulius Velesko Date: Fri, 16 Feb 2024 08:09:21 +0200 Subject: [PATCH 15/19] remove unnecessary markDeleted --- src/backend/Level0/CHIPBackendLevel0.cc | 1 - 1 file changed, 1 deletion(-) diff --git a/src/backend/Level0/CHIPBackendLevel0.cc b/src/backend/Level0/CHIPBackendLevel0.cc index 42fea0e91..9d62df0d9 100644 --- a/src/backend/Level0/CHIPBackendLevel0.cc +++ b/src/backend/Level0/CHIPBackendLevel0.cc @@ -728,7 +728,6 @@ void CHIPStaleEventMonitorLevel0::checkEvents() { ChipEventLz->isDeletedSanityCheck(); ChipEventLz->EventPool->returnEvent(ChipEventLz); } - ChipEventLz->markDeleted(); } } // done collecting events to delete From b8e96a3f146affaaf87fd0c18d4db1cf96179bb3 Mon Sep 17 00:00:00 2001 From: Paulius Velesko Date: Sat, 17 Feb 2024 08:42:57 +0200 Subject: [PATCH 16/19] less verbose hipMultiThreadAddCallback --- samples/hipMultiThreadAddCallback/hipMultiThreadAddCallback.cc | 1 - 1 file changed, 1 deletion(-) diff --git a/samples/hipMultiThreadAddCallback/hipMultiThreadAddCallback.cc b/samples/hipMultiThreadAddCallback/hipMultiThreadAddCallback.cc index 16f6caec0..2c5b356a5 100644 --- a/samples/hipMultiThreadAddCallback/hipMultiThreadAddCallback.cc +++ b/samples/hipMultiThreadAddCallback/hipMultiThreadAddCallback.cc @@ -158,7 +158,6 @@ int main() { // Wait until all the threads finish their execution for (int i = 0; i < numThreads; i++) { - std::cout << "Joining Tid#" << i << "\n"; T[i].join(); } From 07375a747bbc7db44d7731767087bd0f10f7c2c6 Mon Sep 17 00:00:00 2001 From: Paulius Velesko Date: Sun, 18 Feb 2024 05:48:32 +0200 Subject: [PATCH 17/19] LZEventPool finish --- src/backend/Level0/CHIPBackendLevel0.cc | 21 +++++++++------------ src/backend/Level0/CHIPBackendLevel0.hh | 5 ++--- 2 files changed, 11 insertions(+), 15 deletions(-) diff --git a/src/backend/Level0/CHIPBackendLevel0.cc b/src/backend/Level0/CHIPBackendLevel0.cc index 9d62df0d9..5a796265c 100644 --- a/src/backend/Level0/CHIPBackendLevel0.cc +++ b/src/backend/Level0/CHIPBackendLevel0.cc @@ -721,9 +721,8 @@ void CHIPStaleEventMonitorLevel0::checkEvents() { ChipEventLz->isDeletedSanityCheck(); - // delete the event if refcount reached 2 - // this->ChipEvent and LZEventPool::Events_ - if (ChipEventLz.use_count() == 2) { + // delete the event if refcount reached 1 (this->ChipEventLz) + if (ChipEventLz.use_count() == 1) { if (ChipEventLz->EventPool) { ChipEventLz->isDeletedSanityCheck(); ChipEventLz->EventPool->returnEvent(ChipEventLz); @@ -1682,8 +1681,7 @@ LZEventPool::LZEventPool(CHIPContextLevel0 *Ctx, unsigned int Size) chipstar::EventFlags Flags; auto NewEvent = std::shared_ptr( new CHIPEventLevel0(Ctx_, this, i, Flags)); - Events_.push_back(NewEvent); - AvailableEvents_.push(NewEvent); + Events_.push(NewEvent); } }; @@ -1695,9 +1693,8 @@ LZEventPool::~LZEventPool() { logWarn("CHIPUserEventLevel0 objects still exist at the time of EventPool " "destruction"); - while (AvailableEvents_.size()) - AvailableEvents_.pop(); - Events_.clear(); // shared_ptr's will be deleted + while (Events_.size()) + Events_.pop(); // The application must not call this function from // simultaneous threads with the same event pool handle. // Done via destructor should not be called from multiple threads @@ -1708,11 +1705,11 @@ LZEventPool::~LZEventPool() { std::shared_ptr LZEventPool::getEvent() { std::shared_ptr Event; - if (!AvailableEvents_.size()) + if (!Events_.size()) return nullptr; - Event = AvailableEvents_.top(); - AvailableEvents_.pop(); + Event = Events_.top(); + Events_.pop(); return Event; }; @@ -1723,7 +1720,7 @@ void LZEventPool::returnEvent(std::shared_ptr Event) { LOCK(EventPoolMtx); logTrace("Returning event {} handle {}", (void *)Event.get(), (void *)Event.get()->get()); - AvailableEvents_.push(Event); + Events_.push(Event); } // End EventPool diff --git a/src/backend/Level0/CHIPBackendLevel0.hh b/src/backend/Level0/CHIPBackendLevel0.hh index d734bf594..55854585d 100644 --- a/src/backend/Level0/CHIPBackendLevel0.hh +++ b/src/backend/Level0/CHIPBackendLevel0.hh @@ -208,14 +208,13 @@ private: CHIPContextLevel0 *Ctx_; ze_event_pool_handle_t EventPool_; unsigned int Size_; - std::vector> Events_; - std::stack> AvailableEvents_; + std::stack> Events_; public: std::mutex EventPoolMtx; LZEventPool(CHIPContextLevel0 *Ctx, unsigned int Size); ~LZEventPool(); - bool EventAvailable() { return AvailableEvents_.size() > 0; } + bool EventAvailable() { return Events_.size() > 0; } ze_event_pool_handle_t get() { return EventPool_; } void returnEvent(std::shared_ptr Event); From fc4b654dbabc44969ead564f0a829e138ea4cb06 Mon Sep 17 00:00:00 2001 From: Paulius Velesko Date: Tue, 20 Feb 2024 08:02:17 +0200 Subject: [PATCH 18/19] check.py add --total-runtime --- scripts/check.py | 67 +++++++++++++++++++++++++++++++----------------- 1 file changed, 44 insertions(+), 23 deletions(-) diff --git a/scripts/check.py b/scripts/check.py index 341c9864b..dc78e9837 100755 --- a/scripts/check.py +++ b/scripts/check.py @@ -3,6 +3,7 @@ import argparse import subprocess import hashlib +import time parser = argparse.ArgumentParser( @@ -14,13 +15,17 @@ parser.add_argument('device_type', type=str, choices=['cpu', 'igpu', 'dgpu'], help='Device type') parser.add_argument('backend', type=str, choices=['opencl', 'level0-reg', 'level0-imm', 'pocl'], help='Backend to use') parser.add_argument('--num-threads', type=int, nargs='?', default=os.cpu_count(), help='Number of threads to use (default: number of cores on the system)') -parser.add_argument('--num-tries', type=int, nargs='?', default=1, help='Number of tries (default: 1)') parser.add_argument('--timeout', type=int, nargs='?', default=200, help='Timeout in seconds (default: 200)') parser.add_argument('-m', '--modules', type=str, choices=['on', 'off'], default="off", help='load modulefiles automatically (default: off)') parser.add_argument('-v', '--verbose', action='store_true', help='verbose output') parser.add_argument('-d', '--dry-run', '-N', action='store_true', help='dry run') parser.add_argument('-c', '--categories', action='store_true', help='run tests by categories, including running a set of tests in a single thread') +# --total-runtime cannot be used with --num-tries +group = parser.add_mutually_exclusive_group() +group.add_argument('--total-runtime', type=int, nargs='?', default=0, help='Set --num-tries such that the total runtime is approximately this value in minutes') +group.add_argument('--num-tries', type=int, nargs='?', default=1, help='Number of tries (default: 1)') + args = parser.parse_args() # execute a command and return the output along with the return code @@ -110,28 +115,44 @@ def run_cmd(cmd): else: texture_cmd = "" - - -if args.categories: - cmd_deviceFunc = f"{modules} {env_vars} ctest --output-on-failure --timeout {args.timeout} --repeat until-fail:{args.num_tries} -j 100 -E \"`cat ./test_lists/{args.device_type}_{args.backend}_failed_{level0_cmd_list}tests.txt`{texture_cmd}{double_cmd}\" -R deviceFunc -O checkpy_{args.device_type}_{args.backend}_device.txt" - cmd_graph = f"{modules} {env_vars} ctest --output-on-failure --timeout {args.timeout} --repeat until-fail:{args.num_tries} -j 100 -E \"`cat ./test_lists/{args.device_type}_{args.backend}_failed_{level0_cmd_list}tests.txt`{texture_cmd}{double_cmd}\" -R \"[Gg]raph\" -O checkpy_{args.device_type}_{args.backend}_graph.txt" - cmd_single = f"{modules} {env_vars} ctest --output-on-failure --timeout {args.timeout} --repeat until-fail:{args.num_tries} -j 1 -E \"`cat ./test_lists/{args.device_type}_{args.backend}_failed_{level0_cmd_list}tests.txt`{texture_cmd}{double_cmd}\" -R \"`cat ./test_lists/non_parallel_tests.txt`\" -O checkpy_{args.device_type}_{args.backend}_single.txt" - cmd_other = f"{modules} {env_vars} ctest --output-on-failure --timeout {args.timeout} --repeat until-fail:{args.num_tries} -j {args.num_threads} -E \"`cat ./test_lists/{args.device_type}_{args.backend}_failed_{level0_cmd_list}tests.txt`{texture_cmd}{double_cmd}|deviceFunc|[Gg]raph|`cat ./test_lists/non_parallel_tests.txt`\" -O checkpy_{args.device_type}_{args.backend}_other.txt" - - res_deviceFunc, err = run_cmd(cmd_deviceFunc) - res_graph, err = run_cmd(cmd_graph) - res_single, err = run_cmd(cmd_single) - res_other, err = run_cmd(cmd_other) - - if "0 tests failed" in res_deviceFunc and "0 tests failed" in res_graph and "0 tests failed" in res_single and "0 tests failed" in res_other: - exit(0) +def run_tests(num_tries): + if args.categories: + cmd_deviceFunc = f"{modules} {env_vars} ctest --output-on-failure --timeout {args.timeout} --repeat until-fail:{num_tries} -j 100 -E \"`cat ./test_lists/{args.device_type}_{args.backend}_failed_{level0_cmd_list}tests.txt`{texture_cmd}{double_cmd}\" -R deviceFunc -O checkpy_{args.device_type}_{args.backend}_device.txt" + cmd_graph = f"{modules} {env_vars} ctest --output-on-failure --timeout {args.timeout} --repeat until-fail:{num_tries} -j 100 -E \"`cat ./test_lists/{args.device_type}_{args.backend}_failed_{level0_cmd_list}tests.txt`{texture_cmd}{double_cmd}\" -R \"[Gg]raph\" -O checkpy_{args.device_type}_{args.backend}_graph.txt" + cmd_single = f"{modules} {env_vars} ctest --output-on-failure --timeout {args.timeout} --repeat until-fail:{num_tries} -j 1 -E \"`cat ./test_lists/{args.device_type}_{args.backend}_failed_{level0_cmd_list}tests.txt`{texture_cmd}{double_cmd}\" -R \"`cat ./test_lists/non_parallel_tests.txt`\" -O checkpy_{args.device_type}_{args.backend}_single.txt" + cmd_other = f"{modules} {env_vars} ctest --output-on-failure --timeout {args.timeout} --repeat until-fail:{num_tries} -j {args.num_threads} -E \"`cat ./test_lists/{args.device_type}_{args.backend}_failed_{level0_cmd_list}tests.txt`{texture_cmd}{double_cmd}|deviceFunc|[Gg]raph|`cat ./test_lists/non_parallel_tests.txt`\" -O checkpy_{args.device_type}_{args.backend}_other.txt" + + res_deviceFunc, err = run_cmd(cmd_deviceFunc) + res_graph, err = run_cmd(cmd_graph) + res_single, err = run_cmd(cmd_single) + res_other, err = run_cmd(cmd_other) + + if "0 tests failed" in res_deviceFunc and "0 tests failed" in res_graph and "0 tests failed" in res_single and "0 tests failed" in res_other: + exit(0) + else: + exit(1) else: - exit(1) + cmd = f"{modules} {env_vars} ctest --output-on-failure --timeout {args.timeout} --repeat until-fail:{num_tries} -j {args.num_threads} -E \"`cat ./test_lists/{args.device_type}_{args.backend}_failed_{level0_cmd_list}tests.txt`{texture_cmd}\" -O checkpy_{args.device_type}_{args.backend}.txt" + res, err = run_cmd(cmd) + return res, err + + +# if --total-runtime is set, calculate the number of tries by running run_tests and checking the time +num_tries = 1 +if args.total_runtime: + t_start = time.time() + run_tests(1) + t_end = time.time() + # calculate the total time + total_time = t_end - t_start + # calculate the number of tries + num_tries = int(args.total_runtime * 60 / total_time) + print(f"Running tests {num_tries} times to get a total runtime of {args.total_runtime} minutes") else: - cmd = f"{modules} {env_vars} ctest --output-on-failure --timeout {args.timeout} --repeat until-fail:{args.num_tries} -j {args.num_threads} -E \"`cat ./test_lists/{args.device_type}_{args.backend}_failed_{level0_cmd_list}tests.txt`{texture_cmd}\" -O checkpy_{args.device_type}_{args.backend}.txt" + num_tries = args.num_tries - res, err = run_cmd(cmd) - if "0 tests failed" in res: - exit(0) - else: - exit(1) +res, err = run_tests(num_tries) +if "0 tests failed" in res: + exit(0) +else: + exit(1) From 6e1382d92da2d15b782412b474703639bd7d2f38 Mon Sep 17 00:00:00 2001 From: Paulius Velesko Date: Tue, 20 Feb 2024 08:03:52 +0200 Subject: [PATCH 19/19] Leave only a single thread EventMonitor as opposed to using multiple threads for calblacks and event monitoring --- src/CHIPBackend.hh | 6 +- src/backend/Level0/CHIPBackendLevel0.cc | 131 +++++------------------- src/backend/Level0/CHIPBackendLevel0.hh | 27 ++--- src/backend/OpenCL/CHIPBackendOpenCL.cc | 8 +- src/backend/OpenCL/CHIPBackendOpenCL.hh | 3 +- 5 files changed, 33 insertions(+), 142 deletions(-) diff --git a/src/CHIPBackend.hh b/src/CHIPBackend.hh index 8df5affab..b039f17e1 100644 --- a/src/CHIPBackend.hh +++ b/src/CHIPBackend.hh @@ -1773,8 +1773,7 @@ public: */ class Backend { protected: - chipstar::EventMonitor *CallbackEventMonitor_ = nullptr; - chipstar::EventMonitor *StaleEventMonitor_ = nullptr; + chipstar::EventMonitor *EventMonitor_ = nullptr; int MinQueuePriority_; int MaxQueuePriority_ = 0; @@ -2024,8 +2023,7 @@ public: createCallbackData(hipStreamCallback_t Callback, void *UserData, chipstar::Queue *ChipQ) = 0; - virtual chipstar::EventMonitor *createCallbackEventMonitor_() = 0; - virtual chipstar::EventMonitor *createStaleEventMonitor_() = 0; + virtual chipstar::EventMonitor *createEventMonitor_() = 0; /* event interop */ virtual hipEvent_t getHipEvent(void *NativeEvent) = 0; diff --git a/src/backend/Level0/CHIPBackendLevel0.cc b/src/backend/Level0/CHIPBackendLevel0.cc index 5a796265c..3fb175ab1 100644 --- a/src/backend/Level0/CHIPBackendLevel0.cc +++ b/src/backend/Level0/CHIPBackendLevel0.cc @@ -597,65 +597,13 @@ CHIPCallbackDataLevel0::CHIPCallbackDataLevel0(hipStreamCallback_t CallbackF, // EventMonitorLevel0 // *********************************************************************** -void CHIPCallbackEventMonitorLevel0::monitor() { - // CHIPCallbackDataLevel0 *CbData; - // while (true) { - // usleep(200); - // LOCK(EventMonitorMtx); // chipstar::EventMonitor::Stop - // { - - // if (Stop) { - // logTrace("CHIPCallbackEventMonitorLevel0 out of callbacks. Exiting " - // "thread"); - // if (Backend->CallbackQueue.size()) - // logError("Callback thread exiting while there are still active " - // "callbacks in the queue"); - // pthread_exit(0); - // } - - // LOCK(Backend->CallbackQueueMtx); // Backend::CallbackQueue - - // if ((Backend->CallbackQueue.size() == 0)) - // continue; - - // // get the callback item - // CbData = (CHIPCallbackDataLevel0 *)Backend->CallbackQueue.front(); - - // // Lock the item and members - // assert(CbData); - // LOCK( // Backend::CallbackQueue - // CbData->CallbackDataMtx); - // Backend->CallbackQueue.pop(); - - // // Update Status - // logTrace("CHIPCallbackEventMonitorLevel0::monitor() checking event " - // "status for {}", - // static_cast(CbData->GpuReady.get())); - // CbData->GpuReady->updateFinishStatus(false); - // if (CbData->GpuReady->getEventStatus() != EVENT_STATUS_RECORDED) { - // // if not ready, push to the back - // Backend->CallbackQueue.push(CbData); - // continue; - // } - // } - - // CbData->execute(hipSuccess); - // CbData->CpuCallbackComplete->hostSignal(); - // CbData->GpuAck->wait(); - - // delete CbData; - // pthread_yield(); - // } -} - -void CHIPStaleEventMonitorLevel0::checkCallbacks() { +void CHIPEventMonitorLevel0::checkCallbacks() { CHIPCallbackDataLevel0 *CbData; - // usleep(200); LOCK(EventMonitorMtx); // chipstar::EventMonitor::Stop { if (Stop) { - logTrace("CHIPCallbackEventMonitorLevel0 out of callbacks. Exiting " + logTrace("checkCallbacks: out of callbacks. Exiting " "thread"); if (Backend->CallbackQueue.size()) logError("Callback thread exiting while there are still active " @@ -678,7 +626,7 @@ void CHIPStaleEventMonitorLevel0::checkCallbacks() { Backend->CallbackQueue.pop(); // Update Status - logTrace("CHIPCallbackEventMonitorLevel0::monitor() checking event " + logTrace("checkCallbacks: checking event " "status for {}", static_cast(CbData->GpuReady.get())); CbData->GpuReady->updateFinishStatus(false); @@ -697,7 +645,7 @@ void CHIPStaleEventMonitorLevel0::checkCallbacks() { pthread_yield(); } -void CHIPStaleEventMonitorLevel0::checkEvents() { +void CHIPEventMonitorLevel0::checkEvents() { CHIPBackendLevel0 *BackendZe = static_cast(Backend); LOCK(Backend->EventsMtx); // Backend::Events LOCK(BackendZe->CommandListsMtx); // CHIPBackendLevel0::EventCommandListMapk @@ -732,7 +680,7 @@ void CHIPStaleEventMonitorLevel0::checkEvents() { } // done collecting events to delete } -void CHIPStaleEventMonitorLevel0::exitChecks() { +void CHIPEventMonitorLevel0::exitChecks() { LOCK(EventMonitorMtx); // chipstar::EventMonitor::Stop /** * In the case that a user doesn't destroy all the @@ -756,7 +704,7 @@ void CHIPStaleEventMonitorLevel0::exitChecks() { pthread_exit(0); if (EpasedTime > ChipEnvVars.getL0CollectEventsTimeout()) { - logError("CHIPStaleEventMonitorLevel0 stop was called but not all events " + logError("CHIPEventMonitorLevel0 stop was called but not all events " "have been cleared. Timeout of {} seconds has been reached.", ChipEnvVars.getL0CollectEventsTimeout()); size_t MaxPrintEntries = std::min(Backend->Events.size(), size_t(10)); @@ -773,7 +721,7 @@ void CHIPStaleEventMonitorLevel0::exitChecks() { // print only once a second to avoid saturating stdout with logs if (CurrTime - LastPrint_ >= 1) { LastPrint_ = CurrTime; - logDebug("CHIPStaleEventMonitorLevel0 stop was called but not all " + logDebug("CHIPEventMonitorLevel0 stop was called but not all " "events have been cleared. Timeout of {} seconds has not " "been reached yet. Elapsed time: {} seconds", ChipEnvVars.getL0CollectEventsTimeout(), EpasedTime); @@ -781,7 +729,7 @@ void CHIPStaleEventMonitorLevel0::exitChecks() { } } -void CHIPStaleEventMonitorLevel0::monitor() { +void CHIPEventMonitorLevel0::monitor() { // Stop is false and I have more events while (true) { @@ -865,14 +813,13 @@ CHIPQueueLevel0::~CHIPQueueLevel0() { finish(); // must finish the queue because it's possible that that there are // outstanding operations which have an associated // chipstar::Event. If we do not finish we risk the chance of - // StaleEventMonitor of deadlocking while waiting for queue + // EventMonitor of deadlocking while waiting for queue // completion and subsequent event status change } - updateLastEvent( - nullptr); // Just in case that unique_ptr destructor calls this, the - // generic ~Queue() (which calls updateLastEvent(nullptr)) - // hasn't been called yet, and the stale event monitor ends up - // waiting forever. + updateLastEvent(nullptr); // Just in case that unique_ptr destructor calls + // this, the generic ~Queue() (which calls + // updateLastEvent(nullptr)) hasn't been called yet, + // and the event monitor ends up waiting forever. // The application must not call this function from // simultaneous threads with the same command queue handle. @@ -1746,7 +1693,8 @@ CHIPBackendLevel0::createEventShared(chipstar::Context *ChipCtx, assert(Event && "LZEventPool returned a null event"); std::static_pointer_cast(Event)->reset(); - assert(!std::static_pointer_cast(Event)->getAssignedCmdList()); + assert( + !std::static_pointer_cast(Event)->getAssignedCmdList()); logDebug("CHIPBackendLevel0::createEventShared: Context {} Event {}", (void *)ChipCtx, (void *)Event.get()); Event->isDeletedSanityCheck(); @@ -1764,7 +1712,7 @@ chipstar::Event *CHIPBackendLevel0::createEvent(chipstar::Context *ChipCtx, void CHIPBackendLevel0::uninitialize() { /** - * Stale chipstar::Event Monitor expects to collect all events. To do this, + * chipstar::Event Monitor expects to collect all events. To do this, * all events must reach the refcount of 0. At this point, all queues should * have their LastEvent as nullptr but in case a user didn't sync and destroy * a user-created stream, such stream might not have its LastEvent as nullptr. @@ -1775,38 +1723,12 @@ void CHIPBackendLevel0::uninitialize() { logTrace("Backend::uninitialize(): Setting the LastEvent to null for all " "user-created queues"); - if (CallbackEventMonitor_) { - logTrace("Backend::uninitialize(): Killing CallbackEventMonitor"); - LOCK( - CallbackEventMonitor_->EventMonitorMtx); // chipstar::EventMonitor::Stop - CallbackEventMonitor_->Stop = true; - } - CallbackEventMonitor_->join(); - { - logTrace("Backend::uninitialize(): Killing StaleEventMonitor"); - LOCK(StaleEventMonitor_->EventMonitorMtx); // chipstar::EventMonitor::Stop - StaleEventMonitor_->Stop = true; + logTrace("Backend::uninitialize(): Killing EventMonitor"); + LOCK(EventMonitor_->EventMonitorMtx); // chipstar::EventMonitor::Stop + EventMonitor_->Stop = true; } - StaleEventMonitor_->join(); - - // if (Backend->Events.size()) { - // logTrace("Remaining {} events that haven't been collected:", - // Backend->Events.size()); - // for (auto *E : Backend->Events) { - // logTrace("{} status= {} refc={}", E->Msg, E->getEventStatusStr(), - // E->getCHIPRefc()); - // if (!E->isUserEvent()) { - // // A strong indicator that we are missing decreaseRefCount() call - // // for events which are solely managed by the chipStar. - // assert(!(E->isFinished() && E->getCHIPRefc() > 0) && - // "Missed decreaseRefCount()?"); - // assert(E->isFinished() && "Uncollected non-user events!"); - // } - // } - // logTrace("Remaining {} command lists that haven't been collected:", - // static_cast(Backend)->EventCommandListMap.size()); - // } + EventMonitor_->join(); return; } @@ -1924,11 +1846,7 @@ void CHIPBackendLevel0::initializeImpl() { ChipL0Ctx->setDevice(ChipL0Dev); } - StaleEventMonitor_ = - (CHIPStaleEventMonitorLevel0 *)::Backend->createStaleEventMonitor_(); - CallbackEventMonitor_ = (CHIPCallbackEventMonitorLevel0 *)::Backend - ->createCallbackEventMonitor_(); - + EventMonitor_ = (CHIPEventMonitorLevel0 *)::Backend->createEventMonitor_(); // Run these lasts, as they may depend on the device properties being // populated setUseImmCmdLists(DeviceName); @@ -1952,13 +1870,10 @@ void CHIPBackendLevel0::initializeFromNative(const uintptr_t *NativeHandles, CHIPDeviceLevel0 *ChipDev = CHIPDeviceLevel0::create(Dev, ChipCtx, 0); ChipCtx->setDevice(ChipDev); - LOCK(::Backend->BackendMtx); // CHIPBackendLevel0::StaleEventMonitor + LOCK(::Backend->BackendMtx); // CHIPBackendLevel0::EventMonitor ChipDev->LegacyDefaultQueue = ChipDev->createQueue(NativeHandles, NumHandles); - StaleEventMonitor_ = - (CHIPStaleEventMonitorLevel0 *)::Backend->createStaleEventMonitor_(); - CallbackEventMonitor_ = (CHIPCallbackEventMonitorLevel0 *)::Backend - ->createCallbackEventMonitor_(); + EventMonitor_ = (CHIPEventMonitorLevel0 *)::Backend->createEventMonitor_(); setActiveDevice(ChipDev); } diff --git a/src/backend/Level0/CHIPBackendLevel0.hh b/src/backend/Level0/CHIPBackendLevel0.hh index 55854585d..84a3ef154 100644 --- a/src/backend/Level0/CHIPBackendLevel0.hh +++ b/src/backend/Level0/CHIPBackendLevel0.hh @@ -108,7 +108,7 @@ public: * @param CmdList command list to Assign with this event */ void assignCmdList(CHIPContextLevel0 *ChipContext, - ze_command_list_handle_t CmdList); + ze_command_list_handle_t CmdList); /** * @brief Reset and then return the command list handle back to the context @@ -167,16 +167,7 @@ public: virtual ~CHIPCallbackDataLevel0() override {} }; -class CHIPCallbackEventMonitorLevel0 : public chipstar::EventMonitor { -public: - ~CHIPCallbackEventMonitorLevel0() { - logTrace("CHIPCallbackEventMonitorLevel0 DEST"); - join(); - }; - virtual void monitor() override; -}; - -class CHIPStaleEventMonitorLevel0 : public chipstar::EventMonitor { +class CHIPEventMonitorLevel0 : public chipstar::EventMonitor { // variable for storing the how much time has passed since trying to exit // the monitor loop int TimeSinceStopRequested_ = 0; @@ -196,8 +187,8 @@ class CHIPStaleEventMonitorLevel0 : public chipstar::EventMonitor { void checkCallbacks(); public: - ~CHIPStaleEventMonitorLevel0() { - logTrace("CHIPStaleEventMonitorLevel0 DEST"); + ~CHIPEventMonitorLevel0() { + logTrace("CHIPEventMonitorLevel0 DEST"); join(); }; virtual void monitor() override; @@ -688,14 +679,8 @@ public: return new CHIPCallbackDataLevel0(Callback, UserData, ChipQueue); } - virtual chipstar::EventMonitor *createCallbackEventMonitor_() override { - auto Evm = new CHIPCallbackEventMonitorLevel0(); - Evm->start(); - return Evm; - } - - virtual chipstar::EventMonitor *createStaleEventMonitor_() override { - auto Evm = new CHIPStaleEventMonitorLevel0(); + virtual chipstar::EventMonitor *createEventMonitor_() override { + auto Evm = new CHIPEventMonitorLevel0(); Evm->start(); return Evm; } diff --git a/src/backend/OpenCL/CHIPBackendOpenCL.cc b/src/backend/OpenCL/CHIPBackendOpenCL.cc index 0c0fd3b8d..bf46764b2 100644 --- a/src/backend/OpenCL/CHIPBackendOpenCL.cc +++ b/src/backend/OpenCL/CHIPBackendOpenCL.cc @@ -1628,13 +1628,7 @@ chipstar::CallbackData *CHIPBackendOpenCL::createCallbackData( UNIMPLEMENTED(nullptr); } -chipstar::EventMonitor *CHIPBackendOpenCL::createCallbackEventMonitor_() { - auto Evm = new EventMonitorOpenCL(); - Evm->start(); - return Evm; -} - -chipstar::EventMonitor *CHIPBackendOpenCL::createStaleEventMonitor_() { +chipstar::EventMonitor *CHIPBackendOpenCL::createEventMonitor_() { UNIMPLEMENTED(nullptr); } diff --git a/src/backend/OpenCL/CHIPBackendOpenCL.hh b/src/backend/OpenCL/CHIPBackendOpenCL.hh index b02868b95..de7b6c09f 100644 --- a/src/backend/OpenCL/CHIPBackendOpenCL.hh +++ b/src/backend/OpenCL/CHIPBackendOpenCL.hh @@ -405,8 +405,7 @@ public: virtual chipstar::CallbackData * createCallbackData(hipStreamCallback_t Callback, void *UserData, chipstar::Queue *ChipQueue) override; - virtual chipstar::EventMonitor *createCallbackEventMonitor_() override; - virtual chipstar::EventMonitor *createStaleEventMonitor_() override; + virtual chipstar::EventMonitor *createEventMonitor_() override; virtual hipEvent_t getHipEvent(void *NativeEvent) override; virtual void *getNativeEvent(hipEvent_t HipEvent) override;