diff --git a/level_zero/core/source/cmdlist/cmdlist_hw.inl b/level_zero/core/source/cmdlist/cmdlist_hw.inl index b596708d27..d53a5839cf 100644 --- a/level_zero/core/source/cmdlist/cmdlist_hw.inl +++ b/level_zero/core/source/cmdlist/cmdlist_hw.inl @@ -46,6 +46,7 @@ #include "level_zero/core/source/driver/driver_handle_imp.h" #include "level_zero/core/source/event/event.h" #include "level_zero/core/source/gfx_core_helpers/l0_gfx_core_helper.h" +#include "level_zero/core/source/helpers/in_order_cmd_helpers.h" #include "level_zero/core/source/image/image.h" #include "level_zero/core/source/kernel/kernel.h" #include "level_zero/core/source/kernel/kernel_imp.h" @@ -141,8 +142,9 @@ ze_result_t CommandListCoreFamily::reset() { inOrderDependencyCounter = 0; inOrderAllocationOffset = 0; - if (inOrderDependencyCounterAllocation) { - memset(inOrderDependencyCounterAllocation->getUnderlyingBuffer(), 0, inOrderDependencyCounterAllocation->getUnderlyingBufferSize()); + if (inOrderExecInfo) { + auto &inOrderDependencyCounterAllocation = inOrderExecInfo->inOrderDependencyCounterAllocation; + memset(inOrderDependencyCounterAllocation.getUnderlyingBuffer(), 0, inOrderDependencyCounterAllocation.getUnderlyingBufferSize()); } return ZE_RESULT_SUCCESS; @@ -151,7 +153,7 @@ ze_result_t CommandListCoreFamily::reset() { template void CommandListCoreFamily::handleInOrderDependencyCounter() { if (!isQwordInOrderCounter() && ((inOrderDependencyCounter + 1) == std::numeric_limits::max())) { - CommandListCoreFamily::appendWaitOnInOrderDependency(inOrderDependencyCounterAllocation, inOrderDependencyCounter + 1, inOrderAllocationOffset, false, true); + CommandListCoreFamily::appendWaitOnInOrderDependency(&inOrderExecInfo->inOrderDependencyCounterAllocation, inOrderDependencyCounter + 1, inOrderAllocationOffset, false, true); inOrderDependencyCounter = 0; @@ -160,14 +162,14 @@ void CommandListCoreFamily::handleInOrderDependencyCounter() { inOrderAllocationOffset += offset; - UNRECOVERABLE_IF(inOrderAllocationOffset + offset >= inOrderDependencyCounterAllocation->getUnderlyingBufferSize()); + UNRECOVERABLE_IF(inOrderAllocationOffset + offset >= inOrderExecInfo->inOrderDependencyCounterAllocation.getUnderlyingBufferSize()); CommandListCoreFamily::appendSignalInOrderDependencyCounter(); // write 1 on new offset } inOrderDependencyCounter++; - this->commandContainer.addToResidencyContainer(this->inOrderDependencyCounterAllocation); + this->commandContainer.addToResidencyContainer(&inOrderExecInfo->inOrderDependencyCounterAllocation); } template @@ -395,7 +397,7 @@ ze_result_t CommandListCoreFamily::appendLaunchCooperativeKernel( event, launchParams); addToMappedEventList(event); - if (this->inOrderExecutionEnabled) { + if (this->isInOrderExecutionEnabled()) { handleInOrderDependencyCounter(); } return ret; @@ -497,7 +499,7 @@ ze_result_t CommandListCoreFamily::appendEventReset(ze_event_hand callId = neoDevice->getRootDeviceEnvironment().tagsManager->currentCallCount; } - if (this->inOrderExecutionEnabled) { + if (this->isInOrderExecutionEnabled()) { handleInOrderImplicitDependencies(isRelaxedOrderingDispatchAllowed(0)); } @@ -517,7 +519,7 @@ ze_result_t CommandListCoreFamily::appendEventReset(ze_event_hand } } - if (this->inOrderExecutionEnabled) { + if (this->isInOrderExecutionEnabled()) { appendSignalInOrderDependencyCounter(); handleInOrderDependencyCounter(); } @@ -556,7 +558,7 @@ ze_result_t CommandListCoreFamily::appendMemoryRangesBarrier(uint appendSignalEventPostWalker(signalEvent, false); addToMappedEventList(signalEvent); - if (this->inOrderExecutionEnabled) { + if (this->isInOrderExecutionEnabled()) { appendSignalInOrderDependencyCounter(); handleInOrderDependencyCounter(); } @@ -1316,7 +1318,7 @@ ze_result_t CommandListCoreFamily::appendMemoryCopy(void *dstptr, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents, bool relaxedOrderingDispatch, bool forceDisableCopyOnlyInOrderSignaling) { - const bool inOrderCopyOnlySignalingAllowed = this->inOrderExecutionEnabled && !forceDisableCopyOnlyInOrderSignaling && isCopyOnly(); + const bool inOrderCopyOnlySignalingAllowed = this->isInOrderExecutionEnabled() && !forceDisableCopyOnlyInOrderSignaling && isCopyOnly(); NEO::Device *neoDevice = device->getNEODevice(); uint32_t callId = 0; @@ -1458,7 +1460,7 @@ ze_result_t CommandListCoreFamily::appendMemoryCopy(void *dstptr, addFlushRequiredCommand(dstAllocationStruct.needsFlush, signalEvent); addToMappedEventList(signalEvent); - if (this->inOrderExecutionEnabled) { + if (this->isInOrderExecutionEnabled()) { if (launchParams.isKernelSplitOperation || inOrderCopyOnlySignalingAllowed) { if (!signalEvent && !isCopyOnly()) { NEO::PipeControlArgs args; @@ -1497,7 +1499,7 @@ ze_result_t CommandListCoreFamily::appendMemoryCopyRegion(void *d ze_event_handle_t *phWaitEvents, bool relaxedOrderingDispatch, bool forceDisableCopyOnlyInOrderSignaling) { - const bool inOrderCopyOnlySignalingAllowed = this->inOrderExecutionEnabled && !forceDisableCopyOnlyInOrderSignaling && isCopyOnly(); + const bool inOrderCopyOnlySignalingAllowed = this->isInOrderExecutionEnabled() && !forceDisableCopyOnlyInOrderSignaling && isCopyOnly(); NEO::Device *neoDevice = device->getNEODevice(); uint32_t callId = 0; @@ -1557,7 +1559,7 @@ ze_result_t CommandListCoreFamily::appendMemoryCopyRegion(void *d addToMappedEventList(signalEvent); addFlushRequiredCommand(dstAllocationStruct.needsFlush, signalEvent); - if (this->inOrderExecutionEnabled) { + if (this->isInOrderExecutionEnabled()) { if (inOrderCopyOnlySignalingAllowed) { appendSignalInOrderDependencyCounter(); } @@ -1986,7 +1988,7 @@ ze_result_t CommandListCoreFamily::appendMemoryFill(void *ptr, appendEventForProfilingAllWalkers(signalEvent, false, singlePipeControlPacket); addFlushRequiredCommand(hostPointerNeedsFlush, signalEvent); - if (this->inOrderExecutionEnabled) { + if (this->isInOrderExecutionEnabled()) { if (launchParams.isKernelSplitOperation) { if (!signalEvent) { NEO::PipeControlArgs args; @@ -2225,7 +2227,7 @@ void CommandListCoreFamily::handleInOrderImplicitDependencies(boo NEO::RelaxedOrderingHelper::encodeRegistersBeforeDependencyCheckers(*commandContainer.getCommandStream()); } - CommandListCoreFamily::appendWaitOnInOrderDependency(this->inOrderDependencyCounterAllocation, this->inOrderDependencyCounter, this->inOrderAllocationOffset, relaxedOrderingAllowed, true); + CommandListCoreFamily::appendWaitOnInOrderDependency(&inOrderExecInfo->inOrderDependencyCounterAllocation, this->inOrderDependencyCounter, this->inOrderAllocationOffset, relaxedOrderingAllowed, true); } } @@ -2252,7 +2254,7 @@ inline ze_result_t CommandListCoreFamily::addEventsToCmdList(uint template ze_result_t CommandListCoreFamily::appendSignalEvent(ze_event_handle_t hEvent) { - if (this->inOrderExecutionEnabled) { + if (this->isInOrderExecutionEnabled()) { handleInOrderImplicitDependencies(isRelaxedOrderingDispatchAllowed(0)); } @@ -2275,7 +2277,7 @@ ze_result_t CommandListCoreFamily::appendSignalEvent(ze_event_han bool appendPipeControlWithPostSync = (!isCopyOnly()) && (event->isSignalScope() || event->isEventTimestampFlagSet()); dispatchEventPostSyncOperation(event, Event::STATE_SIGNALED, false, false, appendPipeControlWithPostSync, false); - if (this->inOrderExecutionEnabled) { + if (this->isInOrderExecutionEnabled()) { appendSignalInOrderDependencyCounter(); handleInOrderDependencyCounter(); } @@ -2328,12 +2330,12 @@ void CommandListCoreFamily::appendWaitOnInOrderDependency(NEO::Gr template bool CommandListCoreFamily::isInOrderEventWaitRequired(const Event &event) const { - return (event.getInOrderExecDataAllocation() != this->inOrderDependencyCounterAllocation); + return (event.getInOrderExecDataAllocation() != &inOrderExecInfo->inOrderDependencyCounterAllocation); } template ze_result_t CommandListCoreFamily::appendWaitOnEvents(uint32_t numEvents, ze_event_handle_t *phEvent, bool relaxedOrderingAllowed, bool trackDependencies, bool signalInOrderCompletion) { - signalInOrderCompletion &= this->inOrderExecutionEnabled; + signalInOrderCompletion &= this->isInOrderExecutionEnabled(); NEO::Device *neoDevice = device->getNEODevice(); uint32_t callId = 0; @@ -2422,7 +2424,7 @@ void CommandListCoreFamily::appendSignalInOrderDependencyCounter( uint64_t signalValue = this->inOrderDependencyCounter + 1; - uint64_t gpuVa = this->inOrderDependencyCounterAllocation->getGpuAddress() + this->inOrderAllocationOffset; + uint64_t gpuVa = inOrderExecInfo->inOrderDependencyCounterAllocation.getGpuAddress() + this->inOrderAllocationOffset; auto miStoreCmd = reinterpret_cast(commandContainer.getCommandStream()->getSpace(sizeof(MI_STORE_DATA_IMM))); @@ -2568,7 +2570,7 @@ ze_result_t CommandListCoreFamily::appendWriteGlobalTimestamp( appendSignalEventPostWalker(signalEvent, false); - if (this->inOrderExecutionEnabled) { + if (this->isInOrderExecutionEnabled()) { appendSignalInOrderDependencyCounter(); handleInOrderDependencyCounter(); } @@ -3055,7 +3057,7 @@ ze_result_t CommandListCoreFamily::appendBarrier(ze_event_handle_ appendEventForProfiling(signalEvent, true, false); - if (this->inOrderExecutionEnabled) { + if (this->isInOrderExecutionEnabled()) { appendSignalInOrderDependencyCounter(); } else if (isCopyOnly()) { NEO::MiFlushArgs args{this->dummyBlitWa}; @@ -3075,7 +3077,7 @@ ze_result_t CommandListCoreFamily::appendBarrier(ze_event_handle_ } addToMappedEventList(signalEvent); - appendSignalEventPostWalker(signalEvent, this->inOrderExecutionEnabled); + appendSignalEventPostWalker(signalEvent, this->isInOrderExecutionEnabled()); if (isInOrderExecutionEnabled()) { handleInOrderDependencyCounter(); @@ -3204,7 +3206,7 @@ ze_result_t CommandListCoreFamily::appendWaitOnMemory(void *desc, appendEventForProfiling(signalEvent, true, false); - if (this->inOrderExecutionEnabled) { + if (this->isInOrderExecutionEnabled()) { handleInOrderImplicitDependencies(false); } @@ -3223,7 +3225,7 @@ ze_result_t CommandListCoreFamily::appendWaitOnMemory(void *desc, appendSignalEventPostWalker(signalEvent, false); - if (this->inOrderExecutionEnabled) { + if (this->isInOrderExecutionEnabled()) { appendSignalInOrderDependencyCounter(); handleInOrderDependencyCounter(); } @@ -3245,7 +3247,7 @@ ze_result_t CommandListCoreFamily::appendWriteToMemory(void *desc UNRECOVERABLE_IF(dstAllocationStruct.alloc == nullptr); commandContainer.addToResidencyContainer(dstAllocationStruct.alloc); - if (this->inOrderExecutionEnabled) { + if (this->isInOrderExecutionEnabled()) { handleInOrderImplicitDependencies(false); } @@ -3271,7 +3273,7 @@ ze_result_t CommandListCoreFamily::appendWriteToMemory(void *desc args); } - if (this->inOrderExecutionEnabled) { + if (this->isInOrderExecutionEnabled()) { appendSignalInOrderDependencyCounter(); handleInOrderDependencyCounter(); } @@ -3460,8 +3462,8 @@ void CommandListCoreFamily::addCmdForPatching(void *cmd, uint64_t template void CommandListCoreFamily::patchInOrderCmds() { - if (this->regularCmdListSubmissionCounter > 0) { - auto appendCounter = this->regularCmdListSubmissionCounter * inOrderDependencyCounter; + if (isInOrderExecutionEnabled() && inOrderExecInfo->regularCmdListSubmissionCounter > 0) { + auto appendCounter = inOrderExecInfo->regularCmdListSubmissionCounter * inOrderDependencyCounter; for (auto &cmd : inOrderPatchCmds) { cmd.patch(appendCounter); diff --git a/level_zero/core/source/cmdlist/cmdlist_hw_immediate.h b/level_zero/core/source/cmdlist/cmdlist_hw_immediate.h index 2cc86ae874..8ade76632d 100644 --- a/level_zero/core/source/cmdlist/cmdlist_hw_immediate.h +++ b/level_zero/core/source/cmdlist/cmdlist_hw_immediate.h @@ -183,7 +183,7 @@ struct CommandListCoreFamilyImmediate : public CommandListCoreFamily::appendBarrier(ze_even if (isInOrderExecutionEnabled()) { if (isSkippingInOrderBarrierAllowed(hSignalEvent, numWaitEvents, phWaitEvents)) { if (hSignalEvent) { - Event::fromHandle(hSignalEvent)->updateInOrderExecState(*this->inOrderDependencyCounterAllocation, this->inOrderDependencyCounter, this->inOrderAllocationOffset); + Event::fromHandle(hSignalEvent)->updateInOrderExecState(inOrderExecInfo, this->inOrderDependencyCounter, this->inOrderAllocationOffset); } return ZE_RESULT_SUCCESS; @@ -923,7 +924,7 @@ ze_result_t CommandListCoreFamilyImmediate::flushImmediate(ze_res this->latestFlushIsHostVisible = signalEvent->isSignalScope(ZE_EVENT_SCOPE_FLAG_HOST); if (isInOrderExecutionEnabled() && signalEvent->isInOrderExecEvent()) { - signalEvent->updateInOrderExecState(*this->inOrderDependencyCounterAllocation, this->inOrderDependencyCounter, this->inOrderAllocationOffset); + signalEvent->updateInOrderExecState(inOrderExecInfo, this->inOrderDependencyCounter, this->inOrderAllocationOffset); } } else { this->latestFlushIsHostVisible = false; @@ -1266,11 +1267,11 @@ ze_result_t CommandListCoreFamilyImmediate::synchronizeInOrderExe waitStartTime = lastHangCheckTime; do { - this->csr->downloadAllocation(*this->inOrderDependencyCounterAllocation); + this->csr->downloadAllocation(inOrderExecInfo->inOrderDependencyCounterAllocation); bool signaled = true; - auto hostAddress = static_cast(ptrOffset(this->inOrderDependencyCounterAllocation->getUnderlyingBuffer(), this->inOrderAllocationOffset)); + auto hostAddress = static_cast(ptrOffset(inOrderExecInfo->inOrderDependencyCounterAllocation.getUnderlyingBuffer(), this->inOrderAllocationOffset)); for (uint32_t i = 0; i < this->partitionCount; i++) { if (!NEO::WaitUtils::waitFunctionWithPredicate(hostAddress, waitValue, std::greater_equal())) { diff --git a/level_zero/core/source/cmdlist/cmdlist_hw_skl_to_tgllp.inl b/level_zero/core/source/cmdlist/cmdlist_hw_skl_to_tgllp.inl index dcf8077e7d..8c1a957662 100644 --- a/level_zero/core/source/cmdlist/cmdlist_hw_skl_to_tgllp.inl +++ b/level_zero/core/source/cmdlist/cmdlist_hw_skl_to_tgllp.inl @@ -261,7 +261,7 @@ ze_result_t CommandListCoreFamily::appendLaunchKernelWithParams(K event->setKernelForPrintf(kernel); } - if (this->inOrderExecutionEnabled && !launchParams.isKernelSplitOperation) { + if (this->isInOrderExecutionEnabled() && !launchParams.isKernelSplitOperation) { NEO::PipeControlArgs args; NEO::MemorySynchronizationCommands::addSingleBarrier(*commandContainer.getCommandStream(), args); diff --git a/level_zero/core/source/cmdlist/cmdlist_hw_xehp_and_later.inl b/level_zero/core/source/cmdlist/cmdlist_hw_xehp_and_later.inl index acb9725e26..8522d60e39 100644 --- a/level_zero/core/source/cmdlist/cmdlist_hw_xehp_and_later.inl +++ b/level_zero/core/source/cmdlist/cmdlist_hw_xehp_and_later.inl @@ -296,14 +296,14 @@ ze_result_t CommandListCoreFamily::appendLaunchKernelWithParams(K this->dcFlushSupport // dcFlushEnable }; - bool inOrderExecSignalRequired = (this->inOrderExecutionEnabled && !launchParams.isKernelSplitOperation); + bool inOrderExecSignalRequired = (this->isInOrderExecutionEnabled() && !launchParams.isKernelSplitOperation); bool inOrderNonWalkerSignalling = isInOrderNonWalkerSignalingRequired(event); if (inOrderExecSignalRequired) { if (inOrderNonWalkerSignalling) { dispatchEventPostSyncOperation(event, Event::STATE_CLEARED, false, false, false, false); } else { - dispatchKernelArgs.eventAddress = this->inOrderDependencyCounterAllocation->getGpuAddress() + this->inOrderAllocationOffset; + dispatchKernelArgs.eventAddress = inOrderExecInfo->inOrderDependencyCounterAllocation.getGpuAddress() + this->inOrderAllocationOffset; dispatchKernelArgs.postSyncImmValue = this->inOrderDependencyCounter + 1; } } diff --git a/level_zero/core/source/cmdlist/cmdlist_imp.cpp b/level_zero/core/source/cmdlist/cmdlist_imp.cpp index 530b1224ce..0ac7f9d0ac 100644 --- a/level_zero/core/source/cmdlist/cmdlist_imp.cpp +++ b/level_zero/core/source/cmdlist/cmdlist_imp.cpp @@ -74,8 +74,6 @@ ze_result_t CommandListImp::destroy() { } } - device->getNEODevice()->getMemoryManager()->freeGraphicsMemory(inOrderDependencyCounterAllocation); - delete this; return ZE_RESULT_SUCCESS; } @@ -226,19 +224,19 @@ void CommandListImp::setStreamPropertiesDefaultSettings(NEO::StreamProperties &s } void CommandListImp::enableInOrderExecution() { - UNRECOVERABLE_IF(inOrderExecutionEnabled); + UNRECOVERABLE_IF(inOrderExecInfo.get()); auto device = this->device->getNEODevice(); NEO::AllocationProperties allocationProperties{device->getRootDeviceIndex(), MemoryConstants::pageSize64k, NEO::AllocationType::TIMESTAMP_PACKET_TAG_BUFFER, device->getDeviceBitfield()}; - inOrderDependencyCounterAllocation = device->getMemoryManager()->allocateGraphicsMemoryWithProperties(allocationProperties); + auto inOrderDependencyCounterAllocation = device->getMemoryManager()->allocateGraphicsMemoryWithProperties(allocationProperties); UNRECOVERABLE_IF(!inOrderDependencyCounterAllocation); memset(inOrderDependencyCounterAllocation->getUnderlyingBuffer(), 0, inOrderDependencyCounterAllocation->getUnderlyingBufferSize()); - inOrderExecutionEnabled = true; + inOrderExecInfo = std::make_shared(*inOrderDependencyCounterAllocation, *device->getMemoryManager(), (this->cmdListType == TYPE_REGULAR)); } void CommandListImp::storeReferenceTsToMappedEvents(bool isClearEnabled) { @@ -263,4 +261,10 @@ void CommandListImp::addToMappedEventList(Event *event) { } } +void CommandListImp::incRegularCmdListSubmissionCounter() { + if (isInOrderExecutionEnabled()) { + inOrderExecInfo->regularCmdListSubmissionCounter++; + } +} + } // namespace L0 diff --git a/level_zero/core/source/cmdlist/cmdlist_imp.h b/level_zero/core/source/cmdlist/cmdlist_imp.h index cd6433ef70..9022dca805 100644 --- a/level_zero/core/source/cmdlist/cmdlist_imp.h +++ b/level_zero/core/source/cmdlist/cmdlist_imp.h @@ -9,7 +9,7 @@ #include "shared/source/os_interface/os_time.h" #include "level_zero/core/source/cmdlist/cmdlist.h" -#include "level_zero/core/source/helpers/in_order_patch_cmds.h" +#include "level_zero/core/source/helpers/in_order_cmd_helpers.h" #include @@ -31,19 +31,17 @@ struct CommandListImp : CommandList { void setStreamPropertiesDefaultSettings(NEO::StreamProperties &streamProperties); void enableInOrderExecution(); - bool isInOrderExecutionEnabled() const { return inOrderExecutionEnabled; } + bool isInOrderExecutionEnabled() const { return inOrderExecInfo.get(); } void storeReferenceTsToMappedEvents(bool clear); void addToMappedEventList(Event *event); const std::vector &peekMappedEventList() { return mappedTsEventList; } - void incRegularCmdListSubmissionCounter() { regularCmdListSubmissionCounter++; } + void incRegularCmdListSubmissionCounter(); virtual void patchInOrderCmds() = 0; protected: - NEO::GraphicsAllocation *inOrderDependencyCounterAllocation = nullptr; - uint64_t regularCmdListSubmissionCounter = 0; + std::shared_ptr inOrderExecInfo; uint64_t inOrderDependencyCounter = 0; uint32_t inOrderAllocationOffset = 0; - bool inOrderExecutionEnabled = false; ~CommandListImp() override = default; diff --git a/level_zero/core/source/event/event.cpp b/level_zero/core/source/event/event.cpp index cca7dc2f3d..06607546f0 100644 --- a/level_zero/core/source/event/event.cpp +++ b/level_zero/core/source/event/event.cpp @@ -335,8 +335,6 @@ ze_result_t EventPool::openEventPoolIpcHandle(const ze_ipc_event_pool_handle_t & } ze_result_t Event::destroy() { - freeInOrderExecAllocation(); - delete this; return ZE_RESULT_SUCCESS; } @@ -394,21 +392,15 @@ void Event::setIsCompleted() { } void Event::freeInOrderExecAllocation() { - if (inOrderExecDataAllocation) { - this->device->getNEODevice()->getMemoryManager()->freeGraphicsMemory(inOrderExecDataAllocation); - - inOrderExecDataAllocation = nullptr; - } + inOrderExecInfo.reset(); } -void Event::updateInOrderExecState(NEO::GraphicsAllocation &inOrderDependenciesAllocation, uint64_t signalValue, uint32_t allocationOffset) { - if (this->inOrderExecDataAllocation != &inOrderDependenciesAllocation) { - freeInOrderExecAllocation(); - inOrderDependenciesAllocation.incNumOwners(); +void Event::updateInOrderExecState(std::shared_ptr &newInOrderExecInfo, uint64_t signalValue, uint32_t allocationOffset) { + if (this->inOrderExecInfo.get() != newInOrderExecInfo.get()) { + inOrderExecInfo = newInOrderExecInfo; } inOrderExecSignalValue = signalValue; - inOrderExecDataAllocation = &inOrderDependenciesAllocation; inOrderAllocationOffset = allocationOffset; } @@ -432,4 +424,6 @@ void Event::setReferenceTs(uint64_t currentCpuTimeStamp) { } } +NEO::GraphicsAllocation *Event::getInOrderExecDataAllocation() const { return inOrderExecInfo.get() ? &inOrderExecInfo->inOrderDependencyCounterAllocation : nullptr; } + } // namespace L0 diff --git a/level_zero/core/source/event/event.h b/level_zero/core/source/event/event.h index fdbcce8616..778f65af24 100644 --- a/level_zero/core/source/event/event.h +++ b/level_zero/core/source/event/event.h @@ -42,6 +42,7 @@ struct DriverHandle; struct DriverHandleImp; struct Device; struct Kernel; +struct InOrderExecInfo; #pragma pack(1) struct IpcEventPoolData { @@ -215,10 +216,10 @@ struct Event : _ze_event_handle_t { void setMetricStreamer(MetricStreamer *metricStreamer) { this->metricStreamer = metricStreamer; } - void updateInOrderExecState(NEO::GraphicsAllocation &inOrderDependenciesAllocation, uint64_t signalValue, uint32_t allocationOffset); + void updateInOrderExecState(std::shared_ptr &newInOrderExecInfo, uint64_t signalValue, uint32_t allocationOffset); bool isInOrderExecEvent() const { return inOrderExecEvent; } void enableInOrderMode() { this->inOrderExecEvent = true; } - NEO::GraphicsAllocation *getInOrderExecDataAllocation() const { return inOrderExecDataAllocation; } + NEO::GraphicsAllocation *getInOrderExecDataAllocation() const; uint64_t getInOrderExecSignalValue() const { return inOrderExecSignalValue; } uint32_t getInOrderAllocationOffset() const { return inOrderAllocationOffset; } void setLatestUsedCmdQueue(CommandQueue *newCmdQ); @@ -265,7 +266,7 @@ struct Event : _ze_event_handle_t { Device *device = nullptr; EventPool *eventPool = nullptr; Kernel *kernelWithPrintf = nullptr; - NEO::GraphicsAllocation *inOrderExecDataAllocation = nullptr; + std::shared_ptr inOrderExecInfo; CommandQueue *latestUsedCmdQueue = nullptr; uint32_t maxKernelCount = 0; diff --git a/level_zero/core/source/event/event_impl.inl b/level_zero/core/source/event/event_impl.inl index b2ff4ce6fe..ae0612911b 100644 --- a/level_zero/core/source/event/event_impl.inl +++ b/level_zero/core/source/event/event_impl.inl @@ -152,11 +152,11 @@ void EventImp::assignKernelEventCompletionData(void *address) { template ze_result_t EventImp::queryInOrderEventStatus() { - if (!this->inOrderExecDataAllocation) { + if (!this->inOrderExecInfo.get()) { return ZE_RESULT_NOT_READY; } - auto hostAddress = static_cast(ptrOffset(this->inOrderExecDataAllocation->getUnderlyingBuffer(), this->inOrderAllocationOffset)); + auto hostAddress = static_cast(ptrOffset(inOrderExecInfo->inOrderDependencyCounterAllocation.getUnderlyingBuffer(), this->inOrderAllocationOffset)); bool signaled = true; for (uint32_t i = 0; i < this->getPacketsInUse(); i++) { @@ -247,8 +247,8 @@ bool EventImp::handlePreQueryStatusOperationsAndCheckCompletion() { csr->downloadAllocation(alloc); } - if (inOrderExecEvent) { - if (auto &alloc = *this->inOrderExecDataAllocation; alloc.isUsedByOsContext(csr->getOsContext().getContextId())) { + if (inOrderExecInfo) { + if (auto &alloc = inOrderExecInfo->inOrderDependencyCounterAllocation; alloc.isUsedByOsContext(csr->getOsContext().getContextId())) { csr->downloadAllocation(alloc); } } @@ -385,7 +385,11 @@ ze_result_t EventImp::waitForUserFence(uint64_t timeout) { return ZE_RESULT_SUCCESS; } - uint64_t waitAddress = castToUint64(ptrOffset(this->inOrderExecDataAllocation->getUnderlyingBuffer(), this->inOrderAllocationOffset)); + if (!inOrderExecInfo) { + return ZE_RESULT_NOT_READY; + } + + uint64_t waitAddress = castToUint64(ptrOffset(inOrderExecInfo->inOrderDependencyCounterAllocation.getUnderlyingBuffer(), this->inOrderAllocationOffset)); if (!csrs[0]->waitUserFence(this->inOrderExecSignalValue, waitAddress, timeout)) { return ZE_RESULT_NOT_READY; diff --git a/level_zero/core/source/helpers/CMakeLists.txt b/level_zero/core/source/helpers/CMakeLists.txt index a9cbeef44a..3a21f98426 100644 --- a/level_zero/core/source/helpers/CMakeLists.txt +++ b/level_zero/core/source/helpers/CMakeLists.txt @@ -11,7 +11,8 @@ target_sources(${L0_STATIC_LIB_NAME} ${CMAKE_CURRENT_SOURCE_DIR}/error_code_helper_l0.cpp ${CMAKE_CURRENT_SOURCE_DIR}/error_code_helper_l0.h ${CMAKE_CURRENT_SOURCE_DIR}/implicit_scaling_l0.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/in_order_patch_cmds.h + ${CMAKE_CURRENT_SOURCE_DIR}/in_order_cmd_helpers.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/in_order_cmd_helpers.h ${CMAKE_CURRENT_SOURCE_DIR}/l0_gfx_core_helper_factory_init.inl ${CMAKE_CURRENT_SOURCE_DIR}/l0_populate_factory.h ${CMAKE_CURRENT_SOURCE_DIR}/properties_parser.h diff --git a/level_zero/core/source/helpers/in_order_cmd_helpers.cpp b/level_zero/core/source/helpers/in_order_cmd_helpers.cpp new file mode 100644 index 0000000000..6d36b8548a --- /dev/null +++ b/level_zero/core/source/helpers/in_order_cmd_helpers.cpp @@ -0,0 +1,25 @@ +/* + * Copyright (C) 2023 Intel Corporation + * + * SPDX-License-Identifier: MIT + * + */ + +#include "level_zero/core/source/helpers/in_order_cmd_helpers.h" + +#include "shared/source/memory_manager/memory_manager.h" + +#include +#include + +namespace L0 { + +InOrderExecInfo::~InOrderExecInfo() { + memoryManager.freeGraphicsMemory(&inOrderDependencyCounterAllocation); +} + +InOrderExecInfo::InOrderExecInfo(NEO::GraphicsAllocation &inOrderDependencyCounterAllocation, NEO::MemoryManager &memoryManager, bool isRegularCmdList) + : inOrderDependencyCounterAllocation(inOrderDependencyCounterAllocation), memoryManager(memoryManager), isRegularCmdList(isRegularCmdList) { +} + +} // namespace L0 diff --git a/level_zero/core/source/helpers/in_order_patch_cmds.h b/level_zero/core/source/helpers/in_order_cmd_helpers.h similarity index 79% rename from level_zero/core/source/helpers/in_order_patch_cmds.h rename to level_zero/core/source/helpers/in_order_cmd_helpers.h index b50af5fbd1..e1bd0786eb 100644 --- a/level_zero/core/source/helpers/in_order_patch_cmds.h +++ b/level_zero/core/source/helpers/in_order_cmd_helpers.h @@ -7,12 +7,32 @@ #pragma once +#include "shared/source/helpers/non_copyable_or_moveable.h" #include "shared/source/helpers/ptr_math.h" #include #include +namespace NEO { +class GraphicsAllocation; +class MemoryManager; +} // namespace NEO + namespace L0 { + +struct InOrderExecInfo : public NEO::NonCopyableClass { + ~InOrderExecInfo(); + + InOrderExecInfo() = delete; + + InOrderExecInfo(NEO::GraphicsAllocation &inOrderDependencyCounterAllocation, NEO::MemoryManager &memoryManager, bool isRegularCmdList); + + NEO::GraphicsAllocation &inOrderDependencyCounterAllocation; + NEO::MemoryManager &memoryManager; + uint64_t regularCmdListSubmissionCounter = 0; + bool isRegularCmdList = false; +}; + namespace InOrderPatchCommandTypes { enum class CmdType { None, diff --git a/level_zero/core/test/unit_tests/mocks/mock_cmdlist.h b/level_zero/core/test/unit_tests/mocks/mock_cmdlist.h index 92177c261b..730b32d18d 100644 --- a/level_zero/core/test/unit_tests/mocks/mock_cmdlist.h +++ b/level_zero/core/test/unit_tests/mocks/mock_cmdlist.h @@ -78,7 +78,7 @@ struct WhiteBox<::L0::CommandListCoreFamily> using BaseClass::initialize; using BaseClass::inOrderAllocationOffset; using BaseClass::inOrderDependencyCounter; - using BaseClass::inOrderDependencyCounterAllocation; + using BaseClass::inOrderExecInfo; using BaseClass::inOrderPatchCmds; using BaseClass::isFlushTaskSubmissionEnabled; using BaseClass::isQwordInOrderCounter; @@ -90,7 +90,6 @@ struct WhiteBox<::L0::CommandListCoreFamily> using BaseClass::patternAllocations; using BaseClass::pipeControlMultiKernelEventSync; using BaseClass::pipelineSelectStateTracking; - using BaseClass::regularCmdListSubmissionCounter; using BaseClass::requiredStreamState; using BaseClass::requiresQueueUncachedMocs; using BaseClass::setupTimestampEventForMultiTile; @@ -172,7 +171,7 @@ struct WhiteBox> using BaseClass::hostSynchronize; using BaseClass::immediateCmdListHeapSharing; using BaseClass::inOrderDependencyCounter; - using BaseClass::inOrderDependencyCounterAllocation; + using BaseClass::inOrderExecInfo; using BaseClass::inOrderPatchCmds; using BaseClass::isBcsSplitNeeded; using BaseClass::isFlushTaskSubmissionEnabled; diff --git a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_1.cpp b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_1.cpp index 4dae26b55c..fe41ea6c8f 100644 --- a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_1.cpp +++ b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_1.cpp @@ -1769,7 +1769,7 @@ HWTEST2_F(CommandListCreate, givenInOrderExecutionWhenDispatchingRelaxedOrdering lrrCmd++; lrrCmd++; - EXPECT_TRUE(RelaxedOrderingCommandsHelper::verifyConditionalDataMemBbStart(lrrCmd, 0, cmdList->inOrderDependencyCounterAllocation->getGpuAddress(), 2, + EXPECT_TRUE(RelaxedOrderingCommandsHelper::verifyConditionalDataMemBbStart(lrrCmd, 0, cmdList->inOrderExecInfo->inOrderDependencyCounterAllocation.getGpuAddress(), 2, NEO::CompareOperation::Less, true, cmdList->isQwordInOrderCounter())); } diff --git a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_launch_kernel_3.cpp b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_launch_kernel_3.cpp index 5bbe8f5ee0..ad8fa120c1 100644 --- a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_launch_kernel_3.cpp +++ b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_launch_kernel_3.cpp @@ -667,7 +667,7 @@ struct InOrderCmdListTests : public CommandListAppendLaunchKernel { struct MockEvent : public EventImp { using EventImp::inOrderExecEvent; using EventImp::maxPacketCount; - using EventImp::inOrderExecDataAllocation; + using EventImp::inOrderExecInfo; using EventImp::inOrderExecSignalValue; using EventImp::inOrderAllocationOffset; using EventImp::csrs; @@ -909,11 +909,13 @@ HWTEST2_F(InOrderCmdListTests, givenDebugFlagSetWhenEventHostSyncCalledThenCallW auto eventPool = createEvents(2, false); + EXPECT_EQ(ZE_RESULT_NOT_READY, events[0]->hostSynchronize(2)); + immCmdList->appendLaunchKernel(kernel->toHandle(), groupCount, events[0]->toHandle(), 0, nullptr, launchParams, false); events[0]->inOrderAllocationOffset = 123; - auto hostAddress = castToUint64(ptrOffset(events[0]->inOrderExecDataAllocation->getUnderlyingBuffer(), events[0]->inOrderAllocationOffset)); + auto hostAddress = castToUint64(ptrOffset(events[0]->inOrderExecInfo->inOrderDependencyCounterAllocation.getUnderlyingBuffer(), events[0]->inOrderAllocationOffset)); auto ultCsr = static_cast *>(device->getNEODevice()->getDefaultEngine().commandStreamReceiver); @@ -954,18 +956,18 @@ HWTEST2_F(InOrderCmdListTests, givenInOrderModeWhenResetEventCalledThenResetEven immCmdList->appendLaunchKernel(kernel->toHandle(), groupCount, events[0]->toHandle(), 0, nullptr, launchParams, false); - EXPECT_EQ(MemoryConstants::pageSize64k, immCmdList->inOrderDependencyCounterAllocation->getUnderlyingBufferSize()); + EXPECT_EQ(MemoryConstants::pageSize64k, immCmdList->inOrderExecInfo->inOrderDependencyCounterAllocation.getUnderlyingBufferSize()); EXPECT_TRUE(events[0]->inOrderExecEvent); EXPECT_EQ(events[0]->inOrderExecSignalValue, immCmdList->inOrderDependencyCounter); - EXPECT_EQ(events[0]->inOrderExecDataAllocation, immCmdList->inOrderDependencyCounterAllocation); + EXPECT_EQ(&events[0]->inOrderExecInfo->inOrderDependencyCounterAllocation, &immCmdList->inOrderExecInfo->inOrderDependencyCounterAllocation); EXPECT_EQ(events[0]->inOrderAllocationOffset, 0u); events[0]->inOrderAllocationOffset = 123; events[0]->reset(); EXPECT_EQ(events[0]->inOrderExecSignalValue, 0u); - EXPECT_EQ(events[0]->inOrderExecDataAllocation, nullptr); + EXPECT_EQ(events[0]->inOrderExecInfo, nullptr); EXPECT_EQ(events[0]->inOrderAllocationOffset, 0u); } @@ -979,7 +981,7 @@ HWTEST2_F(InOrderCmdListTests, givenInOrderModeWheUsingRegularEventThenDontSetIn EXPECT_FALSE(events[0]->inOrderExecEvent); EXPECT_EQ(events[0]->inOrderExecSignalValue, 0u); - EXPECT_EQ(events[0]->inOrderExecDataAllocation, nullptr); + EXPECT_EQ(events[0]->inOrderExecInfo.get(), nullptr); EXPECT_EQ(events[0]->inOrderAllocationOffset, 0u); } @@ -1012,7 +1014,7 @@ HWTEST2_F(InOrderCmdListTests, givenInOrderModeWhenSubmittingThenProgramSemaphor auto semaphoreCmd = genCmdCast(*itor); EXPECT_EQ(1u, semaphoreCmd->getSemaphoreDataDword()); - EXPECT_EQ(immCmdList->inOrderDependencyCounterAllocation->getGpuAddress() + counterOffset, semaphoreCmd->getSemaphoreGraphicsAddress()); + EXPECT_EQ(immCmdList->inOrderExecInfo->inOrderDependencyCounterAllocation.getGpuAddress() + counterOffset, semaphoreCmd->getSemaphoreGraphicsAddress()); EXPECT_EQ(MI_SEMAPHORE_WAIT::COMPARE_OPERATION::COMPARE_OPERATION_SAD_GREATER_THAN_OR_EQUAL_SDD, semaphoreCmd->getCompareOperation()); } @@ -1113,7 +1115,7 @@ HWTEST2_F(InOrderCmdListTests, givenDebugFlagSetWhenDispatchingStoreDataImmThenP auto sdiCmd = genCmdCast(*itor); ASSERT_NE(nullptr, sdiCmd); - EXPECT_EQ(immCmdList->inOrderDependencyCounterAllocation->getGpuAddress(), sdiCmd->getAddress()); + EXPECT_EQ(immCmdList->inOrderExecInfo->inOrderDependencyCounterAllocation.getGpuAddress(), sdiCmd->getAddress()); auto userInterruptCmd = genCmdCast(*(++itor)); ASSERT_NE(nullptr, userInterruptCmd); @@ -1218,42 +1220,6 @@ HWTEST2_F(InOrderCmdListTests, givenInOrderModeWhenWaitingForEventFromAfterReset EXPECT_EQ(ZE_RESULT_ERROR_INVALID_ARGUMENT, retValue); } -HWTEST2_F(InOrderCmdListTests, givenMultipleAllocationOwnerWhenUsingEventsThenSetCorrectOwnersCount, IsAtLeastSkl) { - using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT; - - auto immCmdList0 = createImmCmdList(); - auto immCmdList1 = createImmCmdList(); - - auto eventPool = createEvents(2, false); - auto eventHandle0 = events[0]->toHandle(); - auto eventHandle1 = events[1]->toHandle(); - - auto inOrderAlloc0 = immCmdList0->inOrderDependencyCounterAllocation; - auto validateNumOwners = [&inOrderAlloc0](uint32_t expectedValue) { - inOrderAlloc0->incNumOwners(); - auto fetchValue = inOrderAlloc0->fetchDecNumOwners(); - - EXPECT_EQ(expectedValue, fetchValue - 1); - }; - - validateNumOwners(1); - - immCmdList0->appendLaunchKernel(kernel->toHandle(), groupCount, eventHandle0, 0, nullptr, launchParams, false); - validateNumOwners(2); - - immCmdList0->appendLaunchKernel(kernel->toHandle(), groupCount, eventHandle0, 0, nullptr, launchParams, false); - validateNumOwners(2); - - immCmdList0->appendLaunchKernel(kernel->toHandle(), groupCount, eventHandle1, 0, nullptr, launchParams, false); - validateNumOwners(3); - - immCmdList1->appendLaunchKernel(kernel->toHandle(), groupCount, eventHandle0, 0, nullptr, launchParams, false); - validateNumOwners(2); - - events[1]->reset(); - validateNumOwners(1); -} - HWTEST2_F(InOrderCmdListTests, givenInOrderEventModeWhenSubmittingThenProgramSemaphoreOnlyForExternalEvent, IsAtLeastXeHpCore) { using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT; @@ -1301,7 +1267,7 @@ HWTEST2_F(InOrderCmdListTests, givenInOrderEventModeWhenSubmittingThenProgramSem ASSERT_NE(nullptr, semaphoreCmd); EXPECT_EQ(1u, semaphoreCmd->getSemaphoreDataDword()); - EXPECT_EQ(immCmdList2->inOrderDependencyCounterAllocation->getGpuAddress() + counterOffset2, semaphoreCmd->getSemaphoreGraphicsAddress()); + EXPECT_EQ(immCmdList2->inOrderExecInfo->inOrderDependencyCounterAllocation.getGpuAddress() + counterOffset2, semaphoreCmd->getSemaphoreGraphicsAddress()); EXPECT_EQ(MI_SEMAPHORE_WAIT::COMPARE_OPERATION::COMPARE_OPERATION_SAD_GREATER_THAN_OR_EQUAL_SDD, semaphoreCmd->getCompareOperation()); itor++; @@ -1360,11 +1326,11 @@ HWTEST2_F(InOrderCmdListTests, givenInOrderEventModeWhenSubmittingFromDifferentC immCmdList1->appendLaunchKernel(kernel->toHandle(), groupCount, event0Handle, 0, nullptr, launchParams, false); - EXPECT_EQ(1u, ultCsr->makeResidentAllocations[immCmdList1->inOrderDependencyCounterAllocation]); + EXPECT_EQ(1u, ultCsr->makeResidentAllocations[&immCmdList1->inOrderExecInfo->inOrderDependencyCounterAllocation]); immCmdList2->appendLaunchKernel(kernel->toHandle(), groupCount, nullptr, 1, &event0Handle, launchParams, false); - EXPECT_EQ(2u, ultCsr->makeResidentAllocations[immCmdList1->inOrderDependencyCounterAllocation]); + EXPECT_EQ(2u, ultCsr->makeResidentAllocations[&immCmdList1->inOrderExecInfo->inOrderDependencyCounterAllocation]); GenCmdList cmdList; ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(cmdList, cmdStream->getCpuBase(), cmdStream->getUsed())); @@ -1376,8 +1342,8 @@ HWTEST2_F(InOrderCmdListTests, givenInOrderEventModeWhenSubmittingFromDifferentC auto semaphoreCmd = genCmdCast(*itor); EXPECT_EQ(1u, semaphoreCmd->getSemaphoreDataDword()); - EXPECT_NE(immCmdList1->inOrderDependencyCounterAllocation->getGpuAddress(), immCmdList2->inOrderDependencyCounterAllocation->getGpuAddress()); - EXPECT_EQ(immCmdList1->inOrderDependencyCounterAllocation->getGpuAddress(), semaphoreCmd->getSemaphoreGraphicsAddress()); + EXPECT_NE(immCmdList1->inOrderExecInfo->inOrderDependencyCounterAllocation.getGpuAddress(), immCmdList2->inOrderExecInfo->inOrderDependencyCounterAllocation.getGpuAddress()); + EXPECT_EQ(immCmdList1->inOrderExecInfo->inOrderDependencyCounterAllocation.getGpuAddress(), semaphoreCmd->getSemaphoreGraphicsAddress()); EXPECT_EQ(MI_SEMAPHORE_WAIT::COMPARE_OPERATION::COMPARE_OPERATION_SAD_GREATER_THAN_OR_EQUAL_SDD, semaphoreCmd->getCompareOperation()); } @@ -1400,8 +1366,8 @@ HWTEST2_F(InOrderCmdListTests, givenInOrderEventModeWhenSubmittingThenClearEvent HWTEST2_F(InOrderCmdListTests, givenInOrderModeWhenDispatchingThenHandleDependencyCounter, IsAtLeastXeHpCore) { auto immCmdList = createImmCmdList(); - EXPECT_NE(nullptr, immCmdList->inOrderDependencyCounterAllocation); - EXPECT_EQ(AllocationType::TIMESTAMP_PACKET_TAG_BUFFER, immCmdList->inOrderDependencyCounterAllocation->getAllocationType()); + EXPECT_NE(nullptr, immCmdList->inOrderExecInfo.get()); + EXPECT_EQ(AllocationType::TIMESTAMP_PACKET_TAG_BUFFER, immCmdList->inOrderExecInfo->inOrderDependencyCounterAllocation.getAllocationType()); EXPECT_EQ(0u, immCmdList->inOrderDependencyCounter); @@ -1410,11 +1376,11 @@ HWTEST2_F(InOrderCmdListTests, givenInOrderModeWhenDispatchingThenHandleDependen immCmdList->appendLaunchKernel(kernel->toHandle(), groupCount, nullptr, 0, nullptr, launchParams, false); EXPECT_EQ(1u, immCmdList->inOrderDependencyCounter); - EXPECT_EQ(1u, ultCsr->makeResidentAllocations[immCmdList->inOrderDependencyCounterAllocation]); + EXPECT_EQ(1u, ultCsr->makeResidentAllocations[&immCmdList->inOrderExecInfo->inOrderDependencyCounterAllocation]); immCmdList->appendLaunchKernel(kernel->toHandle(), groupCount, nullptr, 0, nullptr, launchParams, false); EXPECT_EQ(2u, immCmdList->inOrderDependencyCounter); - EXPECT_EQ(2u, ultCsr->makeResidentAllocations[immCmdList->inOrderDependencyCounterAllocation]); + EXPECT_EQ(2u, ultCsr->makeResidentAllocations[&immCmdList->inOrderExecInfo->inOrderDependencyCounterAllocation]); } HWTEST2_F(InOrderCmdListTests, givenInOrderModeWhenAddingRelaxedOrderingEventsThenConfigureRegistersFirst, IsAtLeastXeHpCore) { @@ -1476,7 +1442,7 @@ HWTEST2_F(InOrderCmdListTests, givenInOrderModeWhenProgrammingWalkerThenSignalSy EXPECT_EQ(POSTSYNC_DATA::OPERATION_WRITE_IMMEDIATE_DATA, postSync.getOperation()); EXPECT_EQ(1u, postSync.getImmediateData()); - EXPECT_EQ(immCmdList->inOrderDependencyCounterAllocation->getGpuAddress() + counterOffset, postSync.getDestinationAddress()); + EXPECT_EQ(immCmdList->inOrderExecInfo->inOrderDependencyCounterAllocation.getGpuAddress() + counterOffset, postSync.getDestinationAddress()); } auto offset = cmdStream->getUsed(); @@ -1498,10 +1464,10 @@ HWTEST2_F(InOrderCmdListTests, givenInOrderModeWhenProgrammingWalkerThenSignalSy EXPECT_EQ(POSTSYNC_DATA::OPERATION_WRITE_IMMEDIATE_DATA, postSync.getOperation()); EXPECT_EQ(2u, postSync.getImmediateData()); - EXPECT_EQ(immCmdList->inOrderDependencyCounterAllocation->getGpuAddress() + counterOffset, postSync.getDestinationAddress()); + EXPECT_EQ(immCmdList->inOrderExecInfo->inOrderDependencyCounterAllocation.getGpuAddress() + counterOffset, postSync.getDestinationAddress()); } - auto hostAddress = static_cast(ptrOffset(immCmdList->inOrderDependencyCounterAllocation->getUnderlyingBuffer(), counterOffset)); + auto hostAddress = static_cast(ptrOffset(immCmdList->inOrderExecInfo->inOrderDependencyCounterAllocation.getUnderlyingBuffer(), counterOffset)); *hostAddress = 1; EXPECT_EQ(ZE_RESULT_NOT_READY, events[0]->hostSynchronize(1)); @@ -1563,7 +1529,7 @@ HWTEST2_F(InOrderCmdListTests, givenInOrderModeWhenProgrammingTimestampEventThen sdiCmd = genCmdCast(++semaphoreCmd); ASSERT_NE(nullptr, sdiCmd); - EXPECT_EQ(immCmdList->inOrderDependencyCounterAllocation->getGpuAddress(), sdiCmd->getAddress()); + EXPECT_EQ(immCmdList->inOrderExecInfo->inOrderDependencyCounterAllocation.getGpuAddress(), sdiCmd->getAddress()); EXPECT_EQ(immCmdList->isQwordInOrderCounter(), sdiCmd->getStoreQword()); EXPECT_EQ(1u, sdiCmd->getDataDword0()); } @@ -1678,7 +1644,7 @@ HWTEST2_F(InOrderCmdListTests, givenRelaxedOrderingWhenProgrammingTimestampEvent auto sdiCmd = genCmdCast(ptrOffset(lrrCmd, EncodeBatchBufferStartOrEnd::getCmdSizeConditionalDataMemBatchBufferStart(false))); ASSERT_NE(nullptr, sdiCmd); - EXPECT_EQ(immCmdList->inOrderDependencyCounterAllocation->getGpuAddress(), sdiCmd->getAddress()); + EXPECT_EQ(immCmdList->inOrderExecInfo->inOrderDependencyCounterAllocation.getGpuAddress(), sdiCmd->getAddress()); EXPECT_EQ(immCmdList->isQwordInOrderCounter(), sdiCmd->getStoreQword()); EXPECT_EQ(2u, sdiCmd->getDataDword0()); } @@ -1778,7 +1744,7 @@ HWTEST2_F(InOrderCmdListTests, givenInOrderModeWhenProgrammingRegularEventThenCl sdiCmd = genCmdCast(++semaphoreCmd); ASSERT_NE(nullptr, sdiCmd); - EXPECT_EQ(immCmdList->inOrderDependencyCounterAllocation->getGpuAddress(), sdiCmd->getAddress()); + EXPECT_EQ(immCmdList->inOrderExecInfo->inOrderDependencyCounterAllocation.getGpuAddress(), sdiCmd->getAddress()); EXPECT_EQ(immCmdList->isQwordInOrderCounter(), sdiCmd->getStoreQword()); EXPECT_EQ(1u, sdiCmd->getDataDword0()); } @@ -1874,7 +1840,7 @@ HWTEST2_F(InOrderCmdListTests, givenInOrderModeWhenProgrammingWalkerThenProgramP auto sdiCmd = genCmdCast(*sdiItor); - uint64_t expectedAddress = immCmdList->inOrderDependencyCounterAllocation->getGpuAddress() + immCmdList->inOrderAllocationOffset; + uint64_t expectedAddress = immCmdList->inOrderExecInfo->inOrderDependencyCounterAllocation.getGpuAddress() + immCmdList->inOrderAllocationOffset; EXPECT_EQ(expectedAddress, sdiCmd->getAddress()); EXPECT_EQ(immCmdList->isQwordInOrderCounter(), sdiCmd->getStoreQword()); @@ -1922,7 +1888,7 @@ HWTEST2_F(InOrderCmdListTests, givenInOrderModeWhenProgrammingKernelSplitThenPro auto sdiCmd = genCmdCast(*sdiItor); - uint64_t expectedAddress = immCmdList->inOrderDependencyCounterAllocation->getGpuAddress() + immCmdList->inOrderAllocationOffset; + uint64_t expectedAddress = immCmdList->inOrderExecInfo->inOrderDependencyCounterAllocation.getGpuAddress() + immCmdList->inOrderAllocationOffset; EXPECT_EQ(expectedAddress, sdiCmd->getAddress()); EXPECT_EQ(immCmdList->isQwordInOrderCounter(), sdiCmd->getStoreQword()); @@ -1947,7 +1913,7 @@ HWTEST2_F(InOrderCmdListTests, givenInOrderModeWhenProgrammingAppendSignalEventT immCmdList->appendSignalEvent(events[0]->toHandle()); - uint64_t inOrderSyncVa = immCmdList->inOrderDependencyCounterAllocation->getGpuAddress(); + uint64_t inOrderSyncVa = immCmdList->inOrderExecInfo->inOrderDependencyCounterAllocation.getGpuAddress(); GenCmdList cmdList; ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(cmdList, @@ -1995,7 +1961,7 @@ HWTEST2_F(InOrderCmdListTests, givenInOrderModeWhenProgrammingNonKernelAppendThe auto eventPool = createEvents(1, true); - uint64_t inOrderSyncVa = immCmdList->inOrderDependencyCounterAllocation->getGpuAddress(); + uint64_t inOrderSyncVa = immCmdList->inOrderExecInfo->inOrderDependencyCounterAllocation.getGpuAddress(); uint8_t ptr[64] = {}; @@ -2087,7 +2053,7 @@ HWTEST2_F(InOrderCmdListTests, givenInOrderRegularCmdListWhenProgrammingNonKerne uint8_t ptr[64] = {}; - uint64_t inOrderSyncVa = regularCmdList->inOrderDependencyCounterAllocation->getGpuAddress(); + uint64_t inOrderSyncVa = regularCmdList->inOrderExecInfo->inOrderDependencyCounterAllocation.getGpuAddress(); regularCmdList->appendLaunchKernel(kernel->toHandle(), groupCount, nullptr, 0, nullptr, launchParams, false); @@ -2228,7 +2194,7 @@ HWTEST2_F(InOrderCmdListTests, givenImmediateEventWhenWaitingFromRegularCmdListT auto semaphoreCmd = genCmdCast(*semaphoreItor); ASSERT_NE(nullptr, semaphoreCmd); - EXPECT_EQ(immCmdList->inOrderDependencyCounterAllocation->getGpuAddress(), semaphoreCmd->getSemaphoreGraphicsAddress()); + EXPECT_EQ(immCmdList->inOrderExecInfo->inOrderDependencyCounterAllocation.getGpuAddress(), semaphoreCmd->getSemaphoreGraphicsAddress()); auto walkerItor = find(semaphoreItor, cmdList.end()); EXPECT_NE(cmdList.end(), walkerItor); @@ -2299,7 +2265,7 @@ HWTEST2_F(InOrderCmdListTests, givenCopyOnlyInOrderModeWhenProgrammingCopyThenSi auto sdiCmd = genCmdCast(*sdiItor); - uint64_t syncVa = immCmdList->inOrderDependencyCounterAllocation->getGpuAddress(); + uint64_t syncVa = immCmdList->inOrderExecInfo->inOrderDependencyCounterAllocation.getGpuAddress(); EXPECT_EQ(syncVa, sdiCmd->getAddress()); EXPECT_EQ(immCmdList->isQwordInOrderCounter(), sdiCmd->getStoreQword()); @@ -2328,7 +2294,7 @@ HWTEST2_F(InOrderCmdListTests, givenInOrderModeWhenProgrammingComputeCopyThenDon auto &postSync = walkerCmd->getPostSync(); - EXPECT_EQ(immCmdList->inOrderDependencyCounterAllocation->getGpuAddress(), postSync.getDestinationAddress()); + EXPECT_EQ(immCmdList->inOrderExecInfo->inOrderDependencyCounterAllocation.getGpuAddress(), postSync.getDestinationAddress()); auto sdiItor = find(walkerItor, cmdList.end()); EXPECT_EQ(cmdList.end(), sdiItor); @@ -2364,7 +2330,7 @@ HWTEST2_F(InOrderCmdListTests, givenCopyOnlyInOrderModeWhenProgrammingFillThenSi auto sdiCmd = genCmdCast(*sdiItor); - uint64_t syncVa = immCmdList->inOrderDependencyCounterAllocation->getGpuAddress(); + uint64_t syncVa = immCmdList->inOrderExecInfo->inOrderDependencyCounterAllocation.getGpuAddress(); EXPECT_EQ(syncVa, sdiCmd->getAddress()); EXPECT_EQ(immCmdList->isQwordInOrderCounter(), sdiCmd->getStoreQword()); @@ -2415,7 +2381,7 @@ HWTEST2_F(InOrderCmdListTests, givenInOrderModeWhenProgrammingFillWithSplitAndOu auto sdiCmd = genCmdCast(*sdiItor); ASSERT_NE(nullptr, sdiCmd); - uint64_t syncVa = immCmdList->inOrderDependencyCounterAllocation->getGpuAddress(); + uint64_t syncVa = immCmdList->inOrderExecInfo->inOrderDependencyCounterAllocation.getGpuAddress(); EXPECT_EQ(syncVa, sdiCmd->getAddress()); EXPECT_EQ(immCmdList->isQwordInOrderCounter(), sdiCmd->getStoreQword()); @@ -2456,7 +2422,7 @@ HWTEST2_F(InOrderCmdListTests, givenInOrderModeWhenProgrammingFillWithSplitAndWi auto sdiCmd = genCmdCast(*sdiItor); ASSERT_NE(nullptr, sdiCmd); - uint64_t syncVa = immCmdList->inOrderDependencyCounterAllocation->getGpuAddress(); + uint64_t syncVa = immCmdList->inOrderExecInfo->inOrderDependencyCounterAllocation.getGpuAddress(); EXPECT_EQ(syncVa, sdiCmd->getAddress()); EXPECT_EQ(immCmdList->isQwordInOrderCounter(), sdiCmd->getStoreQword()); @@ -2492,7 +2458,7 @@ HWTEST2_F(InOrderCmdListTests, givenInOrderModeWhenProgrammingFillWithoutSplitTh EXPECT_EQ(POSTSYNC_DATA::OPERATION_WRITE_IMMEDIATE_DATA, postSync.getOperation()); EXPECT_EQ(1u, postSync.getImmediateData()); - EXPECT_EQ(immCmdList->inOrderDependencyCounterAllocation->getGpuAddress(), postSync.getDestinationAddress()); + EXPECT_EQ(immCmdList->inOrderExecInfo->inOrderDependencyCounterAllocation.getGpuAddress(), postSync.getDestinationAddress()); auto sdiItor = find(walkerItor, cmdList.end()); EXPECT_EQ(cmdList.end(), sdiItor); @@ -2529,7 +2495,7 @@ HWTEST2_F(InOrderCmdListTests, givenCopyOnlyInOrderModeWhenProgrammingCopyRegion auto sdiCmd = genCmdCast(*sdiItor); - uint64_t syncVa = immCmdList->inOrderDependencyCounterAllocation->getGpuAddress(); + uint64_t syncVa = immCmdList->inOrderExecInfo->inOrderDependencyCounterAllocation.getGpuAddress(); EXPECT_EQ(syncVa, sdiCmd->getAddress()); EXPECT_EQ(immCmdList->isQwordInOrderCounter(), sdiCmd->getStoreQword()); @@ -2568,14 +2534,14 @@ HWTEST2_F(InOrderCmdListTests, givenInOrderModeWhenProgrammingAppendWaitOnEvents ASSERT_NE(nullptr, semaphoreCmd); EXPECT_EQ(2u, semaphoreCmd->getSemaphoreDataDword()); - EXPECT_EQ(immCmdList->inOrderDependencyCounterAllocation->getGpuAddress(), semaphoreCmd->getSemaphoreGraphicsAddress()); + EXPECT_EQ(immCmdList->inOrderExecInfo->inOrderDependencyCounterAllocation.getGpuAddress(), semaphoreCmd->getSemaphoreGraphicsAddress()); auto sdiItor = find(semaphoreItor, cmdList.end()); ASSERT_NE(cmdList.end(), sdiItor); auto sdiCmd = genCmdCast(*sdiItor); - EXPECT_EQ(immCmdList->inOrderDependencyCounterAllocation->getGpuAddress(), sdiCmd->getAddress()); + EXPECT_EQ(immCmdList->inOrderExecInfo->inOrderDependencyCounterAllocation.getGpuAddress(), sdiCmd->getAddress()); EXPECT_EQ(immCmdList->isQwordInOrderCounter(), sdiCmd->getStoreQword()); EXPECT_EQ(3u, sdiCmd->getDataDword0()); } @@ -2614,7 +2580,7 @@ HWTEST2_F(InOrderCmdListTests, givenRegularInOrderCmdListWhenProgrammingAppendWa auto sdiCmd = genCmdCast(*sdiItor); ASSERT_NE(nullptr, sdiCmd); - uint64_t syncVa = regularCmdList->inOrderDependencyCounterAllocation->getGpuAddress(); + uint64_t syncVa = regularCmdList->inOrderExecInfo->inOrderDependencyCounterAllocation.getGpuAddress(); EXPECT_EQ(syncVa, sdiCmd->getAddress()); EXPECT_EQ(regularCmdList->isQwordInOrderCounter(), sdiCmd->getStoreQword()); @@ -2637,7 +2603,7 @@ HWTEST2_F(InOrderCmdListTests, givenInOrderModeWhenProgrammingCounterWithOverflo auto eventHandle = events[0]->toHandle(); - uint64_t baseGpuVa = immCmdList->inOrderDependencyCounterAllocation->getGpuAddress(); + uint64_t baseGpuVa = immCmdList->inOrderExecInfo->inOrderDependencyCounterAllocation.getGpuAddress(); immCmdList->appendLaunchKernel(kernel->toHandle(), groupCount, eventHandle, 0, nullptr, launchParams, false); @@ -2651,7 +2617,7 @@ HWTEST2_F(InOrderCmdListTests, givenInOrderModeWhenProgrammingCounterWithOverflo auto &postSync = walkerCmd->getPostSync(); EXPECT_EQ(std::numeric_limits::max(), postSync.getImmediateData()); - EXPECT_EQ(immCmdList->inOrderDependencyCounterAllocation->getGpuAddress(), postSync.getDestinationAddress()); + EXPECT_EQ(immCmdList->inOrderExecInfo->inOrderDependencyCounterAllocation.getGpuAddress(), postSync.getDestinationAddress()); auto semaphoreItor = find(walkerItor, cmdList.end()); @@ -2668,7 +2634,7 @@ HWTEST2_F(InOrderCmdListTests, givenInOrderModeWhenProgrammingCounterWithOverflo EXPECT_EQ(POSTSYNC_DATA::OPERATION_WRITE_IMMEDIATE_DATA, postSync.getOperation()); EXPECT_EQ(expectedCounter, postSync.getImmediateData()); - EXPECT_EQ(immCmdList->inOrderDependencyCounterAllocation->getGpuAddress(), postSync.getDestinationAddress()); + EXPECT_EQ(immCmdList->inOrderExecInfo->inOrderDependencyCounterAllocation.getGpuAddress(), postSync.getDestinationAddress()); } else { ASSERT_NE(cmdList.end(), semaphoreItor); @@ -2724,7 +2690,7 @@ HWTEST2_F(InOrderCmdListTests, givenCopyOnlyInOrderModeWhenProgrammingBarrierThe auto sdiCmd = genCmdCast(*sdiItor); - EXPECT_EQ(immCmdList2->inOrderDependencyCounterAllocation->getGpuAddress(), sdiCmd->getAddress()); + EXPECT_EQ(immCmdList2->inOrderExecInfo->inOrderDependencyCounterAllocation.getGpuAddress(), sdiCmd->getAddress()); EXPECT_EQ(immCmdList2->isQwordInOrderCounter(), sdiCmd->getStoreQword()); EXPECT_EQ(1u, sdiCmd->getDataDword0()); EXPECT_EQ(0u, sdiCmd->getDataDword1()); @@ -2762,7 +2728,7 @@ HWTEST2_F(InOrderCmdListTests, givenInOrderModeWhenProgrammingAppendBarrierWithW auto sdiCmd = genCmdCast(*sdiItor); - EXPECT_EQ(immCmdList2->inOrderDependencyCounterAllocation->getGpuAddress(), sdiCmd->getAddress()); + EXPECT_EQ(immCmdList2->inOrderExecInfo->inOrderDependencyCounterAllocation.getGpuAddress(), sdiCmd->getAddress()); EXPECT_EQ(immCmdList2->isQwordInOrderCounter(), sdiCmd->getStoreQword()); EXPECT_EQ(1u, sdiCmd->getDataDword0()); EXPECT_EQ(0u, sdiCmd->getDataDword1()); @@ -2859,7 +2825,7 @@ HWTEST2_F(InOrderCmdListTests, givenInOrderModeWhenProgrammingAppendBarrierWitho auto sdiCmd = genCmdCast(*sdiItor); - EXPECT_EQ(immCmdList->inOrderDependencyCounterAllocation->getGpuAddress(), sdiCmd->getAddress()); + EXPECT_EQ(immCmdList->inOrderExecInfo->inOrderDependencyCounterAllocation.getGpuAddress(), sdiCmd->getAddress()); EXPECT_EQ(immCmdList->isQwordInOrderCounter(), sdiCmd->getStoreQword()); EXPECT_EQ(2u, sdiCmd->getDataDword0()); EXPECT_EQ(0u, sdiCmd->getDataDword1()); @@ -2896,7 +2862,7 @@ HWTEST2_F(InOrderCmdListTests, givenInOrderModeWhenProgrammingAppendBarrierWitho auto sdiCmd = genCmdCast(*sdiItor); - EXPECT_EQ(immCmdList->inOrderDependencyCounterAllocation->getGpuAddress(), sdiCmd->getAddress()); + EXPECT_EQ(immCmdList->inOrderExecInfo->inOrderDependencyCounterAllocation.getGpuAddress(), sdiCmd->getAddress()); EXPECT_EQ(immCmdList->isQwordInOrderCounter(), sdiCmd->getStoreQword()); EXPECT_EQ(2u, sdiCmd->getDataDword0()); EXPECT_EQ(0u, sdiCmd->getDataDword1()); @@ -2914,7 +2880,7 @@ HWTEST2_F(InOrderCmdListTests, givenInOrderModeWhenCallingSyncThenHandleCompleti immCmdList->appendLaunchKernel(kernel->toHandle(), groupCount, events[0]->toHandle(), 0, nullptr, launchParams, false); - auto hostAddress = static_cast(ptrOffset(immCmdList->inOrderDependencyCounterAllocation->getUnderlyingBuffer(), counterOffset)); + auto hostAddress = static_cast(ptrOffset(immCmdList->inOrderExecInfo->inOrderDependencyCounterAllocation.getUnderlyingBuffer(), counterOffset)); *hostAddress = 0; const uint32_t failCounter = 3; @@ -2988,7 +2954,7 @@ HWTEST2_F(InOrderCmdListTests, givenInOrderModeWhenDoingCpuCopyThenSynchronize, auto eventHandle = events[0]->toHandle(); - auto hostAddress = static_cast(immCmdList->inOrderDependencyCounterAllocation->getUnderlyingBuffer()); + auto hostAddress = static_cast(immCmdList->inOrderExecInfo->inOrderDependencyCounterAllocation.getUnderlyingBuffer()); *hostAddress = 0; const uint32_t failCounter = 3; @@ -3043,7 +3009,7 @@ HWTEST2_F(InOrderCmdListTests, givenInOrderModeWhenGpuHangDetectedInCpuCopyPathT auto ultCsr = static_cast *>(device->getNEODevice()->getDefaultEngine().commandStreamReceiver); - auto hostAddress = static_cast(immCmdList->inOrderDependencyCounterAllocation->getUnderlyingBuffer()); + auto hostAddress = static_cast(immCmdList->inOrderExecInfo->inOrderDependencyCounterAllocation.getUnderlyingBuffer()); *hostAddress = 0; immCmdList->appendLaunchKernel(kernel->toHandle(), groupCount, events[0]->toHandle(), 0, nullptr, launchParams, false); @@ -3098,7 +3064,7 @@ HWTEST2_F(InOrderCmdListTests, givenInOrderModeWhenProgrammingKernelSplitWithout ASSERT_NE(nullptr, sdiCmd); - EXPECT_EQ(immCmdList->inOrderDependencyCounterAllocation->getGpuAddress(), sdiCmd->getAddress()); + EXPECT_EQ(immCmdList->inOrderExecInfo->inOrderDependencyCounterAllocation.getGpuAddress(), sdiCmd->getAddress()); EXPECT_EQ(immCmdList->isQwordInOrderCounter(), sdiCmd->getStoreQword()); EXPECT_EQ(1u, sdiCmd->getDataDword0()); @@ -3148,7 +3114,7 @@ HWTEST2_F(InOrderCmdListTests, givenInOrderModeWhenProgrammingKernelSplitWithEve ASSERT_NE(nullptr, sdiCmd); - EXPECT_EQ(immCmdList->inOrderDependencyCounterAllocation->getGpuAddress(), sdiCmd->getAddress()); + EXPECT_EQ(immCmdList->inOrderExecInfo->inOrderDependencyCounterAllocation.getGpuAddress(), sdiCmd->getAddress()); EXPECT_EQ(immCmdList->isQwordInOrderCounter(), sdiCmd->getStoreQword()); EXPECT_EQ(1u, sdiCmd->getDataDword0()); @@ -3198,7 +3164,7 @@ HWTEST2_F(MultiTileInOrderCmdListTests, givenMultiTileInOrderModeWhenProgramming auto semaphoreCmd = genCmdCast(*cmdList.begin()); ASSERT_NE(nullptr, semaphoreCmd); - auto gpuAddress = immCmdList->inOrderDependencyCounterAllocation->getGpuAddress(); + auto gpuAddress = immCmdList->inOrderExecInfo->inOrderDependencyCounterAllocation.getGpuAddress(); EXPECT_EQ(1u, semaphoreCmd->getSemaphoreDataDword()); EXPECT_EQ(gpuAddress, semaphoreCmd->getSemaphoreGraphicsAddress()); @@ -3225,7 +3191,7 @@ HWTEST2_F(MultiTileInOrderCmdListTests, givenMultiTileInOrderModeWhenSignalingSy auto sdiCmd = genCmdCast(*cmdList.begin()); ASSERT_NE(nullptr, sdiCmd); - auto gpuAddress = immCmdList->inOrderDependencyCounterAllocation->getGpuAddress(); + auto gpuAddress = immCmdList->inOrderExecInfo->inOrderDependencyCounterAllocation.getGpuAddress(); EXPECT_EQ(gpuAddress, sdiCmd->getAddress()); EXPECT_TRUE(sdiCmd->getWorkloadPartitionIdOffsetEnable()); @@ -3240,7 +3206,7 @@ HWTEST2_F(MultiTileInOrderCmdListTests, givenMultiTileInOrderModeWhenCallingSync immCmdList->appendLaunchKernel(kernel->toHandle(), groupCount, events[0]->toHandle(), 0, nullptr, launchParams, false); - auto hostAddress0 = static_cast(immCmdList->inOrderDependencyCounterAllocation->getUnderlyingBuffer()); + auto hostAddress0 = static_cast(immCmdList->inOrderExecInfo->inOrderDependencyCounterAllocation.getUnderlyingBuffer()); auto hostAddress1 = ptrOffset(hostAddress0, sizeof(uint64_t)); *hostAddress0 = 0; @@ -3521,7 +3487,7 @@ HWTEST2_F(BcsSplitInOrderCmdListTests, givenBcsSplitEnabledWhenDispatchingCopyTh ASSERT_NE(nullptr, sdiCmd); - auto gpuAddress = immCmdList->inOrderDependencyCounterAllocation->getGpuAddress(); + auto gpuAddress = immCmdList->inOrderExecInfo->inOrderDependencyCounterAllocation.getGpuAddress(); EXPECT_EQ(gpuAddress, sdiCmd->getAddress()); EXPECT_EQ(immCmdList->isQwordInOrderCounter(), sdiCmd->getStoreQword()); @@ -3561,7 +3527,7 @@ HWTEST2_F(BcsSplitInOrderCmdListTests, givenBcsSplitEnabledWhenDispatchingCopyRe ASSERT_NE(nullptr, sdiCmd); - auto gpuAddress = immCmdList->inOrderDependencyCounterAllocation->getGpuAddress(); + auto gpuAddress = immCmdList->inOrderExecInfo->inOrderDependencyCounterAllocation.getGpuAddress(); EXPECT_EQ(gpuAddress, sdiCmd->getAddress()); EXPECT_EQ(immCmdList->isQwordInOrderCounter(), sdiCmd->getStoreQword()); @@ -3791,7 +3757,7 @@ HWTEST2_F(InOrderRegularCmdListTests, givenInOrderModeWhenDispatchingRegularCmdL EXPECT_EQ(POSTSYNC_DATA::OPERATION_WRITE_IMMEDIATE_DATA, postSync.getOperation()); EXPECT_EQ(1u, postSync.getImmediateData()); - EXPECT_EQ(regularCmdList->inOrderDependencyCounterAllocation->getGpuAddress(), postSync.getDestinationAddress()); + EXPECT_EQ(regularCmdList->inOrderExecInfo->inOrderDependencyCounterAllocation.getGpuAddress(), postSync.getDestinationAddress()); auto sdiItor = find(cmdList.begin(), cmdList.end()); EXPECT_EQ(cmdList.end(), sdiItor); @@ -3817,14 +3783,14 @@ HWTEST2_F(InOrderRegularCmdListTests, givenInOrderModeWhenDispatchingRegularCmdL EXPECT_EQ(POSTSYNC_DATA::OPERATION_WRITE_IMMEDIATE_DATA, postSync.getOperation()); EXPECT_EQ(2u, postSync.getImmediateData()); - EXPECT_EQ(regularCmdList->inOrderDependencyCounterAllocation->getGpuAddress(), postSync.getDestinationAddress()); + EXPECT_EQ(regularCmdList->inOrderExecInfo->inOrderDependencyCounterAllocation.getGpuAddress(), postSync.getDestinationAddress()); auto sdiItor = find(cmdList.begin(), cmdList.end()); EXPECT_EQ(cmdList.end(), sdiItor); } regularCmdList->inOrderAllocationOffset = 123; - auto hostAddr = static_cast(regularCmdList->inOrderDependencyCounterAllocation->getUnderlyingBuffer()); + auto hostAddr = static_cast(regularCmdList->inOrderExecInfo->inOrderDependencyCounterAllocation.getUnderlyingBuffer()); *hostAddr = 0x1234; regularCmdList->reset(); @@ -3849,7 +3815,7 @@ HWTEST2_F(InOrderRegularCmdListTests, givenInOrderModeWhenDispatchingRegularCmdL size_t offset = cmdStream->getUsed(); EXPECT_EQ(0u, regularCmdList->inOrderDependencyCounter); - EXPECT_NE(nullptr, regularCmdList->inOrderDependencyCounterAllocation); + EXPECT_NE(nullptr, regularCmdList->inOrderExecInfo.get()); constexpr size_t size = 128 * sizeof(uint32_t); auto data = allocHostMem(size); @@ -3920,7 +3886,7 @@ HWTEST2_F(InOrderRegularCopyOnlyCmdListTests, givenInOrderModeWhenDispatchingReg ASSERT_NE(nullptr, sdiCmd); - auto gpuAddress = regularCmdList->inOrderDependencyCounterAllocation->getGpuAddress(); + auto gpuAddress = regularCmdList->inOrderExecInfo->inOrderDependencyCounterAllocation.getGpuAddress(); EXPECT_EQ(gpuAddress, sdiCmd->getAddress()); EXPECT_EQ(regularCmdList->isQwordInOrderCounter(), sdiCmd->getStoreQword()); @@ -3953,7 +3919,7 @@ HWTEST2_F(InOrderRegularCopyOnlyCmdListTests, givenInOrderModeWhenDispatchingReg ASSERT_NE(nullptr, sdiCmd); - auto gpuAddress = regularCmdList->inOrderDependencyCounterAllocation->getGpuAddress(); + auto gpuAddress = regularCmdList->inOrderExecInfo->inOrderDependencyCounterAllocation.getGpuAddress(); EXPECT_EQ(gpuAddress, sdiCmd->getAddress()); EXPECT_EQ(regularCmdList->isQwordInOrderCounter(), sdiCmd->getStoreQword()); diff --git a/level_zero/core/test/unit_tests/sources/cmdqueue/test_cmdqueue_1.cpp b/level_zero/core/test/unit_tests/sources/cmdqueue/test_cmdqueue_1.cpp index 68853ed992..472bcfc899 100644 --- a/level_zero/core/test/unit_tests/sources/cmdqueue/test_cmdqueue_1.cpp +++ b/level_zero/core/test/unit_tests/sources/cmdqueue/test_cmdqueue_1.cpp @@ -1137,28 +1137,32 @@ HWTEST2_F(ExecuteCommandListTests, givenRegularCmdListWhenExecutionThenIncSubmis { auto computeCmdList = makeZeUniquePtr>>(); computeCmdList->initialize(device, NEO::EngineGroupType::Compute, 0u); + computeCmdList->enableInOrderExecution(); + auto commandListHandle = computeCmdList->toHandle(); computeCmdList->close(); commandQueue->executeCommandLists(1, &commandListHandle, nullptr, false); - EXPECT_EQ(1u, computeCmdList->regularCmdListSubmissionCounter); + EXPECT_EQ(1u, computeCmdList->inOrderExecInfo->regularCmdListSubmissionCounter); commandQueue->executeCommandLists(1, &commandListHandle, nullptr, false); - EXPECT_EQ(2u, computeCmdList->regularCmdListSubmissionCounter); + EXPECT_EQ(2u, computeCmdList->inOrderExecInfo->regularCmdListSubmissionCounter); } { auto copyCmdList = makeZeUniquePtr>>(); copyCmdList->initialize(device, NEO::EngineGroupType::Copy, 0u); + copyCmdList->enableInOrderExecution(); + auto commandListHandle = copyCmdList->toHandle(); copyCmdList->close(); commandQueue->isCopyOnlyCommandQueue = true; commandQueue->executeCommandLists(1, &commandListHandle, nullptr, false); - EXPECT_EQ(1u, copyCmdList->regularCmdListSubmissionCounter); + EXPECT_EQ(1u, copyCmdList->inOrderExecInfo->regularCmdListSubmissionCounter); commandQueue->executeCommandLists(1, &commandListHandle, nullptr, false); - EXPECT_EQ(2u, copyCmdList->regularCmdListSubmissionCounter); + EXPECT_EQ(2u, copyCmdList->inOrderExecInfo->regularCmdListSubmissionCounter); } } diff --git a/level_zero/core/test/unit_tests/sources/event/test_event.cpp b/level_zero/core/test/unit_tests/sources/event/test_event.cpp index f70c3136ed..9ba266f4db 100644 --- a/level_zero/core/test/unit_tests/sources/event/test_event.cpp +++ b/level_zero/core/test/unit_tests/sources/event/test_event.cpp @@ -3195,23 +3195,25 @@ HWTEST_F(EventTests, givenInOrderEventWhenHostSynchronizeIsCalledThenAllocationI uint64_t storage[2] = {1, 1}; - NEO::MockGraphicsAllocation syncAllocation(&storage, sizeof(storage)); + auto syncAllocation = new NEO::MockGraphicsAllocation(&storage, sizeof(storage)); + + auto inOrderExecInfo = std::make_shared(*syncAllocation, *neoDevice->getMemoryManager(), false); event->inOrderExecEvent = true; - event->updateInOrderExecState(syncAllocation, 1, 0); + event->updateInOrderExecState(inOrderExecInfo, 1, 0); constexpr uint64_t timeout = std::numeric_limits::max(); auto result = event->hostSynchronize(timeout); EXPECT_EQ(ZE_RESULT_SUCCESS, result); - EXPECT_EQ(0u, downloadAllocationTrack[&syncAllocation]); + EXPECT_EQ(0u, downloadAllocationTrack[syncAllocation]); EXPECT_EQ(1u, ultCsr->downloadAllocationsCalledCount); auto event2 = zeUniquePtr(whiteboxCast(getHelper().createEvent(eventPool.get(), &eventDesc, device))); event2->inOrderExecEvent = true; - event2->updateInOrderExecState(syncAllocation, 1, 0); - syncAllocation.updateTaskCount(0u, ultCsr->getOsContext().getContextId()); + event2->updateInOrderExecState(inOrderExecInfo, 1, 0); + syncAllocation->updateTaskCount(0u, ultCsr->getOsContext().getContextId()); ultCsr->downloadAllocationsCalledCount = 0; eventAddress = static_cast(event->getHostAddress()); *eventAddress = Event::STATE_SIGNALED; @@ -3219,7 +3221,7 @@ HWTEST_F(EventTests, givenInOrderEventWhenHostSynchronizeIsCalledThenAllocationI result = event2->hostSynchronize(timeout); EXPECT_EQ(ZE_RESULT_SUCCESS, result); - EXPECT_NE(0u, downloadAllocationTrack[&syncAllocation]); + EXPECT_NE(0u, downloadAllocationTrack[syncAllocation]); EXPECT_EQ(1u, ultCsr->downloadAllocationsCalledCount); } diff --git a/shared/source/memory_manager/graphics_allocation.h b/shared/source/memory_manager/graphics_allocation.h index e8f4795a99..23b9465b84 100644 --- a/shared/source/memory_manager/graphics_allocation.h +++ b/shared/source/memory_manager/graphics_allocation.h @@ -115,9 +115,6 @@ class GraphicsAllocation : public IDNode { return gpuAddress + allocationOffset - gpuBaseAddress; } - void incNumOwners() { numOwners++; } - uint32_t fetchDecNumOwners() { return numOwners.fetch_sub(1); } - void lock(void *ptr) { lockedPtr = ptr; } void unlock() { lockedPtr = nullptr; } bool isLocked() const { return lockedPtr != nullptr; } @@ -383,7 +380,6 @@ class GraphicsAllocation : public IDNode { StackVec usageInfos; std::atomic registeredContextsNum{0}; - std::atomic numOwners{1}; StackVec gmms; ResidencyData residency; }; diff --git a/shared/source/memory_manager/memory_manager.cpp b/shared/source/memory_manager/memory_manager.cpp index c91f4920e3..0055f6e0cd 100644 --- a/shared/source/memory_manager/memory_manager.cpp +++ b/shared/source/memory_manager/memory_manager.cpp @@ -245,10 +245,6 @@ void MemoryManager::freeGraphicsMemory(GraphicsAllocation *gfxAllocation, bool i return; } - if (gfxAllocation->fetchDecNumOwners() > 1) { - return; - } - if (ApiSpecificConfig::getGlobalBindlessHeapConfiguration() && executionEnvironment.rootDeviceEnvironments[gfxAllocation->getRootDeviceIndex()]->getBindlessHeapsHelper() != nullptr) { executionEnvironment.rootDeviceEnvironments[gfxAllocation->getRootDeviceIndex()]->getBindlessHeapsHelper()->releaseSSToReusePool(gfxAllocation->getBindlessInfo()); } diff --git a/shared/test/unit_test/memory_manager/memory_manager_tests.cpp b/shared/test/unit_test/memory_manager/memory_manager_tests.cpp index 1b7cbf42f9..ec022beb60 100644 --- a/shared/test/unit_test/memory_manager/memory_manager_tests.cpp +++ b/shared/test/unit_test/memory_manager/memory_manager_tests.cpp @@ -72,23 +72,6 @@ TEST(MemoryManagerTest, givenDefaultMemoryManagerWhenGraphicsAllocationContainsE memoryManager.freeGraphicsMemory(graphicsAllocation); } -TEST(MemoryManagerTest, givenMultipleOwnersWhenReleasingAllocationThenFreeOnlyWhenNoActiveOwners) { - MockExecutionEnvironment executionEnvironment(defaultHwInfo.get()); - OsAgnosticMemoryManager memoryManager(executionEnvironment); - - auto graphicsAllocation = memoryManager.allocateGraphicsMemoryWithProperties(MockAllocationProperties{0, MemoryConstants::pageSize}); - - graphicsAllocation->incNumOwners(); - graphicsAllocation->incNumOwners(); - EXPECT_EQ(3u, graphicsAllocation->fetchDecNumOwners()); - - graphicsAllocation->incNumOwners(); - memoryManager.freeGraphicsMemory(graphicsAllocation); - EXPECT_EQ(2u, graphicsAllocation->fetchDecNumOwners()); - - memoryManager.freeGraphicsMemory(graphicsAllocation); -} - TEST(MemoryManagerTest, whenGettingPreferredAllocationMethodThenNotDefinedIsReturned) { MockMemoryManager memoryManager; for (auto i = 0; i < static_cast(AllocationType::COUNT); i++) {