From 61fb19caab4aedd815baa84ab68791e29bddbb37 Mon Sep 17 00:00:00 2001 From: "Dunajski, Bartosz" Date: Wed, 7 Jun 2023 18:44:13 +0000 Subject: [PATCH] feature: bring back counter based in-order tracking Related-To: NEO-7966 Signed-off-by: Dunajski, Bartosz --- level_zero/core/source/cmdlist/cmdlist_hw.h | 5 +- level_zero/core/source/cmdlist/cmdlist_hw.inl | 81 ++-- .../source/cmdlist/cmdlist_hw_immediate.h | 4 +- .../source/cmdlist/cmdlist_hw_immediate.inl | 36 +- .../cmdlist/cmdlist_hw_skl_to_tgllp.inl | 2 +- .../cmdlist/cmdlist_hw_xehp_and_later.inl | 20 +- .../core/source/cmdlist/cmdlist_imp.cpp | 13 +- level_zero/core/source/cmdlist/cmdlist_imp.h | 5 +- level_zero/core/source/device/bcs_split.h | 3 +- level_zero/core/source/event/event.cpp | 9 +- level_zero/core/source/event/event.h | 9 +- level_zero/core/source/event/event_impl.inl | 17 +- .../core/test/unit_tests/mocks/mock_cmdlist.h | 5 +- .../sources/cmdlist/test_cmdlist_1.cpp | 8 +- .../test_cmdlist_append_launch_kernel_3.cpp | 459 +++++++----------- .../unit_tests/sources/event/test_event.cpp | 14 +- .../command_container/command_encoder.h | 2 +- 17 files changed, 285 insertions(+), 407 deletions(-) diff --git a/level_zero/core/source/cmdlist/cmdlist_hw.h b/level_zero/core/source/cmdlist/cmdlist_hw.h index b9e0f5473c..aca1164536 100644 --- a/level_zero/core/source/cmdlist/cmdlist_hw.h +++ b/level_zero/core/source/cmdlist/cmdlist_hw.h @@ -169,8 +169,8 @@ struct CommandListCoreFamily : CommandListImp { ze_result_t appendSignalEvent(ze_event_handle_t hEvent) override; ze_result_t appendWaitOnEvents(uint32_t numEvents, ze_event_handle_t *phEvent, bool relaxedOrderingAllowed, bool trackDependencies, bool signalInOrderCompletion) override; - void appendWaitOnInOrderDependency(bool relaxedOrderingAllowed); - void appendSignalInOrderDependencyTimestampPacket(); + void appendWaitOnInOrderDependency(NEO::GraphicsAllocation *dependencyCounterAllocation, uint64_t waitValue, bool relaxedOrderingAllowed); + void appendSignalInOrderDependencyCounter(); ze_result_t appendWriteGlobalTimestamp(uint64_t *dstptr, ze_event_handle_t hSignalEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) override; ze_result_t appendMemoryCopyFromContext(void *dstptr, ze_context_handle_t hContextSrc, const void *srcptr, @@ -186,7 +186,6 @@ struct CommandListCoreFamily : CommandListImp { ze_result_t executeCommandListImmediate(bool performMigration) override; ze_result_t executeCommandListImmediateImpl(bool performMigration, L0::CommandQueue *cmdQImmediate); size_t getReserveSshSize(); - void obtainNewTimestampPacketNode(); protected: MOCKABLE_VIRTUAL ze_result_t appendMemoryCopyKernelWithGA(void *dstPtr, NEO::GraphicsAllocation *dstPtrAlloc, diff --git a/level_zero/core/source/cmdlist/cmdlist_hw.inl b/level_zero/core/source/cmdlist/cmdlist_hw.inl index 5a6a4e93af..9d3e5e9da8 100644 --- a/level_zero/core/source/cmdlist/cmdlist_hw.inl +++ b/level_zero/core/source/cmdlist/cmdlist_hw.inl @@ -24,7 +24,6 @@ #include "shared/source/helpers/preamble.h" #include "shared/source/helpers/register_offsets.h" #include "shared/source/helpers/surface_format_info.h" -#include "shared/source/helpers/timestamp_packet.h" #include "shared/source/indirect_heap/indirect_heap.h" #include "shared/source/memory_manager/allocation_properties.h" #include "shared/source/memory_manager/graphics_allocation.h" @@ -1402,13 +1401,11 @@ ze_result_t CommandListCoreFamily::appendMemoryCopy(void *dstptr, addToMappedEventList(signalEvent); if (this->inOrderExecutionEnabled && (launchParams.isKernelSplitOperation || inOrderCopyOnlySignalingAllowed)) { - obtainNewTimestampPacketNode(); - if (!signalEvent && !isCopyOnly()) { NEO::PipeControlArgs args; NEO::MemorySynchronizationCommands::addSingleBarrier(*commandContainer.getCommandStream(), args); } - appendSignalInOrderDependencyTimestampPacket(); + appendSignalInOrderDependencyCounter(); } if (NEO::DebugManager.flags.EnableSWTags.get()) { @@ -1497,8 +1494,7 @@ ze_result_t CommandListCoreFamily::appendMemoryCopyRegion(void *d addFlushRequiredCommand(dstAllocationStruct.needsFlush, signalEvent); if (this->inOrderExecutionEnabled && isCopyOnly() && inOrderCopyOnlySignalingAllowed) { - obtainNewTimestampPacketNode(); - appendSignalInOrderDependencyTimestampPacket(); + appendSignalInOrderDependencyCounter(); } if (NEO::DebugManager.flags.EnableSWTags.get()) { @@ -2128,14 +2124,14 @@ inline uint32_t CommandListCoreFamily::getRegionOffsetForAppendMe template inline ze_result_t CommandListCoreFamily::addEventsToCmdList(uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents, bool relaxedOrderingAllowed, bool trackDependencies) { - auto hasInOrderDependencies = this->timestampPacketContainer.get() && (this->timestampPacketContainer->peekNodes().size() > 0); + auto hasInOrderDependencies = (inOrderDependencyCounter > 0); if (relaxedOrderingAllowed && (numWaitEvents > 0 || hasInOrderDependencies)) { NEO::RelaxedOrderingHelper::encodeRegistersBeforeDependencyCheckers(*commandContainer.getCommandStream()); } if (hasInOrderDependencies) { - CommandListCoreFamily::appendWaitOnInOrderDependency(relaxedOrderingAllowed); + CommandListCoreFamily::appendWaitOnInOrderDependency(this->inOrderDependencyCounterAllocation, this->inOrderDependencyCounter, relaxedOrderingAllowed); } if (numWaitEvents > 0) { @@ -2175,11 +2171,7 @@ ze_result_t CommandListCoreFamily::appendSignalEvent(ze_event_han dispatchEventPostSyncOperation(event, Event::STATE_SIGNALED, false, false, appendPipeControlWithPostSync); if (this->inOrderExecutionEnabled) { - obtainNewTimestampPacketNode(); - - CommandListCoreFamily::appendWaitOnEvents(1, &hEvent, false, false, false); - - appendSignalInOrderDependencyTimestampPacket(); + appendSignalInOrderDependencyCounter(); } if (NEO::DebugManager.flags.EnableSWTags.get()) { @@ -2194,15 +2186,28 @@ ze_result_t CommandListCoreFamily::appendSignalEvent(ze_event_han } template -void CommandListCoreFamily::appendWaitOnInOrderDependency(bool relaxedOrderingAllowed) { - auto node = this->timestampPacketContainer->peekNodes()[0]; +void CommandListCoreFamily::appendWaitOnInOrderDependency(NEO::GraphicsAllocation *dependencyCounterAllocation, uint64_t waitValue, bool relaxedOrderingAllowed) { + using COMPARE_OPERATION = typename GfxFamily::MI_SEMAPHORE_WAIT::COMPARE_OPERATION; - commandContainer.addToResidencyContainer(node->getBaseGraphicsAllocation()->getGraphicsAllocation(device->getRootDeviceIndex())); + UNRECOVERABLE_IF(waitValue >= std::numeric_limits::max()); - if (relaxedOrderingAllowed) { - NEO::TimestampPacketHelper::programConditionalBbStartForRelaxedOrdering(*commandContainer.getCommandStream(), *node); - } else { - NEO::TimestampPacketHelper::programSemaphore(*commandContainer.getCommandStream(), *node); + commandContainer.addToResidencyContainer(dependencyCounterAllocation); + + uint64_t gpuAddress = dependencyCounterAllocation->getGpuAddress(); + + for (uint32_t i = 0; i < this->partitionCount; i++) { + if (relaxedOrderingAllowed) { + NEO::EncodeBatchBufferStartOrEnd::programConditionalDataMemBatchBufferStart(*commandContainer.getCommandStream(), 0, gpuAddress, static_cast(waitValue), + NEO::CompareOperation::Less, true); + + } else { + NEO::EncodeSemaphore::addMiSemaphoreWaitCommand(*commandContainer.getCommandStream(), + gpuAddress, + static_cast(waitValue), + COMPARE_OPERATION::COMPARE_OPERATION_SAD_GREATER_THAN_OR_EQUAL_SDD); + } + + gpuAddress += sizeof(uint64_t); } } @@ -2251,13 +2256,14 @@ ze_result_t CommandListCoreFamily::appendWaitOnEvents(uint32_t nu continue; } - if (isInOrderExecutionEnabled() && event->isInOrderExecEvent()) { - auto const cmdListNode = this->timestampPacketContainer->peekNodes()[0]; - auto const eventNode = event->getInOrderTimestampPacket()->peekNodes()[0]; + if (event->isInOrderExecEvent()) { + bool eventFromPreviousAppend = (event->getInOrderExecDataAllocation() == this->inOrderDependencyCounterAllocation) && + (event->getInOrderExecSignalValue() == this->inOrderDependencyCounter); - if (cmdListNode == eventNode) { - continue; + if (!eventFromPreviousAppend) { + CommandListCoreFamily::appendWaitOnInOrderDependency(event->getInOrderExecDataAllocation(), event->getInOrderExecSignalValue(), relaxedOrderingAllowed); } + continue; } commandContainer.addToResidencyContainer(&event->getAllocation(this->device)); @@ -2289,9 +2295,7 @@ ze_result_t CommandListCoreFamily::appendWaitOnEvents(uint32_t nu } if (signalInOrderCompletion) { - obtainNewTimestampPacketNode(); - - appendSignalInOrderDependencyTimestampPacket(); + appendSignalInOrderDependencyCounter(); } makeResidentDummyAllocation(); @@ -2308,8 +2312,13 @@ ze_result_t CommandListCoreFamily::appendWaitOnEvents(uint32_t nu } template -void CommandListCoreFamily::appendSignalInOrderDependencyTimestampPacket() { - NEO::TimestampPacketHelper::nonStallingContextEndNodeSignal(*commandContainer.getCommandStream(), *this->timestampPacketContainer->peekNodes()[0], (this->partitionCount > 1)); +void CommandListCoreFamily::appendSignalInOrderDependencyCounter() { + uint64_t signalValue = this->inOrderDependencyCounter + 1; + auto lowPart = static_cast(signalValue & 0x0000FFFFFFFFULL); + auto highPart = static_cast(signalValue >> 32); + + NEO::EncodeStoreMemory::programStoreDataImm(*commandContainer.getCommandStream(), this->inOrderDependencyCounterAllocation->getGpuAddress(), + lowPart, highPart, true, (this->partitionCount > 1)); } template @@ -3234,16 +3243,4 @@ void CommandListCoreFamily::dispatchEventRemainingPacketsPostSync } } -template -void CommandListCoreFamily::obtainNewTimestampPacketNode() { - auto allocator = this->csr->getTimestampPacketAllocator(); - - timestampPacketContainer->moveNodesToNewContainer(*deferredTimestampPackets); - - auto tag = allocator->getTag(); - tag->setPacketsUsed(this->partitionCount); - - timestampPacketContainer->add(tag); -} - } // namespace L0 diff --git a/level_zero/core/source/cmdlist/cmdlist_hw_immediate.h b/level_zero/core/source/cmdlist/cmdlist_hw_immediate.h index 9696391128..812528d41f 100644 --- a/level_zero/core/source/cmdlist/cmdlist_hw_immediate.h +++ b/level_zero/core/source/cmdlist/cmdlist_hw_immediate.h @@ -177,8 +177,8 @@ struct CommandListCoreFamilyImmediate : public CommandListCoreFamily::flushImmediate(ze_res bool hasRelaxedOrderingDependencies, ze_event_handle_t hSignalEvent) { if (inputRet == ZE_RESULT_SUCCESS) { if (isInOrderExecutionEnabled()) { - auto node = this->timestampPacketContainer->peekNodes()[0]; - auto allocation = node->getBaseGraphicsAllocation()->getGraphicsAllocation(this->device->getRootDeviceIndex()); - this->commandContainer.addToResidencyContainer(allocation); + inOrderDependencyCounter++; + + this->commandContainer.addToResidencyContainer(this->inOrderDependencyCounterAllocation); } if (this->isFlushTaskSubmissionEnabled) { @@ -747,7 +747,7 @@ ze_result_t CommandListCoreFamilyImmediate::flushImmediate(ze_res signalEvent->setCsr(this->csr); if (isInOrderExecutionEnabled()) { - signalEvent->enableInOrderExecMode(*this->timestampPacketContainer); + signalEvent->enableInOrderExecMode(*this->inOrderDependencyCounterAllocation, this->inOrderDependencyCounter); } } @@ -829,7 +829,7 @@ ze_result_t CommandListCoreFamilyImmediate::performCpuMemcpy(cons } if (isInOrderExecutionEnabled()) { - this->dependenciesPresent = false; // wait only for waitlist and in-order TimestampPacket value + this->dependenciesPresent = false; // wait only for waitlist and in-order sync value } if (numWaitEvents > 0) { @@ -1077,43 +1077,37 @@ void CommandListCoreFamilyImmediate::checkAssert() { template bool CommandListCoreFamilyImmediate::isRelaxedOrderingDispatchAllowed(uint32_t numWaitEvents) const { - auto numEvents = numWaitEvents; - if (this->isInOrderExecutionEnabled()) { - numEvents += static_cast(this->timestampPacketContainer->peekNodes().size()); - } + auto numEvents = numWaitEvents + ((inOrderDependencyCounter > 0) ? 1 : 0); return NEO::RelaxedOrderingHelper::isRelaxedOrderingDispatchAllowed(*this->csr, numEvents); } template ze_result_t CommandListCoreFamilyImmediate::synchronizeInOrderExecution(uint64_t timeout) const { - using TSPacketType = typename GfxFamily::TimestampPacketType; - - NEO::TimestampPacketContainer nodesToRelease; - nodesToRelease.swapNodes(*this->deferredTimestampPackets); - std::chrono::high_resolution_clock::time_point waitStartTime, lastHangCheckTime, now; uint64_t timeDiff = 0; ze_result_t status = ZE_RESULT_NOT_READY; - auto node = this->timestampPacketContainer->peekNodes()[0]; + auto waitValue = this->inOrderDependencyCounter; lastHangCheckTime = std::chrono::high_resolution_clock::now(); waitStartTime = lastHangCheckTime; do { - this->csr->downloadAllocation(*node->getBaseGraphicsAllocation()->getGraphicsAllocation(this->device->getRootDeviceIndex())); + this->csr->downloadAllocation(*this->inOrderDependencyCounterAllocation); bool signaled = true; - for (uint32_t i = 0; i < this->partitionCount; i++) { - auto hostAddress = static_cast(node->getContextEndAddress(i)); + auto hostAddress = static_cast(this->inOrderDependencyCounterAllocation->getUnderlyingBuffer()); - if (!NEO::WaitUtils::waitFunctionWithPredicate(hostAddress, NEO::TimestampPacketConstants::initValue, std::not_equal_to())) { + for (uint32_t i = 0; i < this->partitionCount; i++) { + if (!NEO::WaitUtils::waitFunctionWithPredicate(hostAddress, waitValue, std::greater_equal())) { signaled = false; break; } + + hostAddress = ptrOffset(hostAddress, sizeof(uint64_t)); } if (signaled) { @@ -1136,10 +1130,6 @@ ze_result_t CommandListCoreFamilyImmediate::synchronizeInOrderExe timeDiff = std::chrono::duration_cast(now - waitStartTime).count(); } while (timeDiff < timeout); - if (status == ZE_RESULT_NOT_READY) { - nodesToRelease.moveNodesToNewContainer(*this->deferredTimestampPackets); - } - return status; } diff --git a/level_zero/core/source/cmdlist/cmdlist_hw_skl_to_tgllp.inl b/level_zero/core/source/cmdlist/cmdlist_hw_skl_to_tgllp.inl index 61a0e47ac7..346b4400d4 100644 --- a/level_zero/core/source/cmdlist/cmdlist_hw_skl_to_tgllp.inl +++ b/level_zero/core/source/cmdlist/cmdlist_hw_skl_to_tgllp.inl @@ -166,7 +166,7 @@ ze_result_t CommandListCoreFamily::appendLaunchKernelWithParams(K &additionalCommands, // additionalCommands commandListPreemptionMode, // preemptionMode 0, // partitionCount - static_cast(Event::STATE_SIGNALED), // postSyncImmValue + static_cast(Event::STATE_SIGNALED), // postSyncImmValue launchParams.isIndirect, // isIndirect launchParams.isPredicate, // isPredicate false, // isTimestampEvent diff --git a/level_zero/core/source/cmdlist/cmdlist_hw_xehp_and_later.inl b/level_zero/core/source/cmdlist/cmdlist_hw_xehp_and_later.inl index 43d2368c58..80da34b3b6 100644 --- a/level_zero/core/source/cmdlist/cmdlist_hw_xehp_and_later.inl +++ b/level_zero/core/source/cmdlist/cmdlist_hw_xehp_and_later.inl @@ -14,7 +14,6 @@ #include "shared/source/helpers/pause_on_gpu_properties.h" #include "shared/source/helpers/pipeline_select_helper.h" #include "shared/source/helpers/simd_helper.h" -#include "shared/source/helpers/timestamp_packet.h" #include "shared/source/indirect_heap/indirect_heap.h" #include "shared/source/kernel/grf_config.h" #include "shared/source/memory_manager/memory_manager.h" @@ -262,7 +261,7 @@ ze_result_t CommandListCoreFamily::appendLaunchKernelWithParams(K &additionalCommands, // additionalCommands kernelPreemptionMode, // preemptionMode this->partitionCount, // partitionCount - static_cast(Event::STATE_SIGNALED), // postSyncImmValue + static_cast(Event::STATE_SIGNALED), // postSyncImmValue launchParams.isIndirect, // isIndirect launchParams.isPredicate, // isPredicate isTimestampEvent, // isTimestampEvent @@ -279,13 +278,9 @@ ze_result_t CommandListCoreFamily::appendLaunchKernelWithParams(K bool inOrderExecSignalRequired = (this->inOrderExecutionEnabled && !launchParams.isKernelSplitOperation); - if (inOrderExecSignalRequired) { - obtainNewTimestampPacketNode(); - } - if (inOrderExecSignalRequired && !event) { - dispatchKernelArgs.isTimestampEvent = true; - dispatchKernelArgs.eventAddress = this->timestampPacketContainer->peekNodes()[0]->getGpuAddress(); + dispatchKernelArgs.eventAddress = this->inOrderDependencyCounterAllocation->getGpuAddress(); + dispatchKernelArgs.postSyncImmValue = this->inOrderDependencyCounter + 1; } NEO::EncodeDispatchKernel::encode(commandContainer, dispatchKernelArgs, getLogicalStateHelper()); @@ -310,7 +305,7 @@ ze_result_t CommandListCoreFamily::appendLaunchKernelWithParams(K auto eventHandle = event->toHandle(); CommandListCoreFamily::appendWaitOnEvents(1, &eventHandle, false, false, false); - appendSignalInOrderDependencyTimestampPacket(); + appendSignalInOrderDependencyCounter(); } if (neoDevice->getDebugger() && !this->immediateCmdListHeapSharing) { @@ -401,12 +396,9 @@ void CommandListCoreFamily::appendComputeBarrierCommand() { uint64_t writeValue = 0; if (this->inOrderExecutionEnabled) { - obtainNewTimestampPacketNode(); - auto node = this->timestampPacketContainer->peekNodes()[0]; - postSyncMode = NEO::PostSyncMode::ImmediateData; - gpuWriteAddress = node->getGpuAddress() + node->getContextEndOffset(); - writeValue = 0; + gpuWriteAddress = this->inOrderDependencyCounterAllocation->getGpuAddress(); + writeValue = this->inOrderDependencyCounter + 1; } NEO::MemorySynchronizationCommands::addSingleBarrier(*commandContainer.getCommandStream(), postSyncMode, gpuWriteAddress, writeValue, args); diff --git a/level_zero/core/source/cmdlist/cmdlist_imp.cpp b/level_zero/core/source/cmdlist/cmdlist_imp.cpp index 21c814d44a..432493777f 100644 --- a/level_zero/core/source/cmdlist/cmdlist_imp.cpp +++ b/level_zero/core/source/cmdlist/cmdlist_imp.cpp @@ -75,6 +75,8 @@ ze_result_t CommandListImp::destroy() { } } + device->getNEODevice()->getMemoryManager()->freeGraphicsMemory(inOrderDependencyCounterAllocation); + delete this; return ZE_RESULT_SUCCESS; } @@ -232,8 +234,15 @@ void CommandListImp::setStreamPropertiesDefaultSettings(NEO::StreamProperties &s void CommandListImp::enableInOrderExecution() { UNRECOVERABLE_IF(inOrderExecutionEnabled); - timestampPacketContainer = std::make_unique(); - deferredTimestampPackets = std::make_unique(); + auto device = this->device->getNEODevice(); + + NEO::AllocationProperties allocationProperties{device->getRootDeviceIndex(), sizeof(uint32_t), NEO::AllocationType::TIMESTAMP_PACKET_TAG_BUFFER, device->getDeviceBitfield()}; + + inOrderDependencyCounterAllocation = device->getMemoryManager()->allocateGraphicsMemoryWithProperties(allocationProperties); + + UNRECOVERABLE_IF(!inOrderDependencyCounterAllocation); + + memset(inOrderDependencyCounterAllocation->getUnderlyingBuffer(), 0, inOrderDependencyCounterAllocation->getUnderlyingBufferSize()); inOrderExecutionEnabled = true; } diff --git a/level_zero/core/source/cmdlist/cmdlist_imp.h b/level_zero/core/source/cmdlist/cmdlist_imp.h index 2043005aea..9308242a52 100644 --- a/level_zero/core/source/cmdlist/cmdlist_imp.h +++ b/level_zero/core/source/cmdlist/cmdlist_imp.h @@ -6,7 +6,6 @@ */ #pragma once -#include "shared/source/helpers/timestamp_packet_container.h" #include "shared/source/os_interface/os_time.h" #include "level_zero/core/source/cmdlist/cmdlist.h" @@ -43,8 +42,8 @@ struct CommandListImp : CommandList { protected: std::unique_ptr nonImmediateLogicalStateHelper; - std::unique_ptr deferredTimestampPackets; - std::unique_ptr timestampPacketContainer; + NEO::GraphicsAllocation *inOrderDependencyCounterAllocation = nullptr; + uint64_t inOrderDependencyCounter = 0; bool inOrderExecutionEnabled = false; ~CommandListImp() override = default; diff --git a/level_zero/core/source/device/bcs_split.h b/level_zero/core/source/device/bcs_split.h index f6e3d08cd9..bba6d7c573 100644 --- a/level_zero/core/source/device/bcs_split.h +++ b/level_zero/core/source/device/bcs_split.h @@ -129,8 +129,7 @@ struct BcsSplit { cmdList->appendEventForProfilingAllWalkers(this->events.marker[markerEventIndex], false, true); if (cmdList->isInOrderExecutionEnabled()) { - cmdList->obtainNewTimestampPacketNode(); - cmdList->appendSignalInOrderDependencyTimestampPacket(); + cmdList->appendSignalInOrderDependencyCounter(); } return result; diff --git a/level_zero/core/source/event/event.cpp b/level_zero/core/source/event/event.cpp index 812491ba7f..2256faafa2 100644 --- a/level_zero/core/source/event/event.cpp +++ b/level_zero/core/source/event/event.cpp @@ -390,14 +390,11 @@ void Event::setIsCompleted() { } } -void Event::enableInOrderExecMode(const NEO::TimestampPacketContainer &inOrderSyncNodes) { +void Event::enableInOrderExecMode(NEO::GraphicsAllocation &inOrderDependenciesAllocation, uint64_t signalValue) { inOrderExecEvent = true; - if (!inOrderTimestampPacket) { - inOrderTimestampPacket = std::make_unique(); - } - - inOrderTimestampPacket->assignAndIncrementNodesRefCounts(inOrderSyncNodes); + inOrderExecSignalValue = signalValue; + inOrderExecDataAllocation = &inOrderDependenciesAllocation; } } // namespace L0 diff --git a/level_zero/core/source/event/event.h b/level_zero/core/source/event/event.h index aa1cdb4dc1..2e844ecab8 100644 --- a/level_zero/core/source/event/event.h +++ b/level_zero/core/source/event/event.h @@ -210,9 +210,10 @@ struct Event : _ze_event_handle_t { void setMetricStreamer(MetricStreamer *metricStreamer) { this->metricStreamer = metricStreamer; } - void enableInOrderExecMode(const NEO::TimestampPacketContainer &inOrderSyncNodes); + void enableInOrderExecMode(NEO::GraphicsAllocation &inOrderDependenciesAllocation, uint64_t signalValue); bool isInOrderExecEvent() const { return inOrderExecEvent; } - const NEO::TimestampPacketContainer *getInOrderTimestampPacket() const { return inOrderTimestampPacket.get(); } + NEO::GraphicsAllocation *getInOrderExecDataAllocation() const { return inOrderExecDataAllocation; } + uint64_t getInOrderExecSignalValue() const { return inOrderExecSignalValue; } void setReferenceTs(NEO::TimeStampData ×tamp) { referenceTs = timestamp; } @@ -227,6 +228,8 @@ struct Event : _ze_event_handle_t { uint64_t contextEndTS = 1; NEO::TimeStampData referenceTs{}; + uint64_t inOrderExecSignalValue = 0; + std::chrono::microseconds gpuHangCheckPeriod{500'000}; std::bitset l3FlushAppliedOnKernel; @@ -249,7 +252,7 @@ struct Event : _ze_event_handle_t { Device *device = nullptr; EventPool *eventPool = nullptr; Kernel *kernelWithPrintf = nullptr; - std::unique_ptr inOrderTimestampPacket; + NEO::GraphicsAllocation *inOrderExecDataAllocation = nullptr; uint32_t maxKernelCount = 0; uint32_t kernelCount = 1u; diff --git a/level_zero/core/source/event/event_impl.inl b/level_zero/core/source/event/event_impl.inl index 76249df11b..7b166f85b0 100644 --- a/level_zero/core/source/event/event_impl.inl +++ b/level_zero/core/source/event/event_impl.inl @@ -134,17 +134,16 @@ void EventImp::assignKernelEventCompletionData(void *address) { template ze_result_t EventImp::queryInOrderEventStatus() { - auto node = this->inOrderTimestampPacket->peekNodes()[0]; - + auto hostAddress = static_cast(this->inOrderExecDataAllocation->getUnderlyingBuffer()); bool signaled = true; for (uint32_t i = 0; i < this->getPacketsInUse(); i++) { - auto hostAddress = static_cast(node->getContextEndAddress(i)); - - if (!NEO::WaitUtils::waitFunctionWithPredicate(hostAddress, NEO::TimestampPacketConstants::initValue, std::not_equal_to())) { + if (!NEO::WaitUtils::waitFunctionWithPredicate(hostAddress, this->inOrderExecSignalValue, std::greater_equal())) { signaled = false; break; } + + hostAddress = ptrOffset(hostAddress, sizeof(uint64_t)); } if (!signaled) { @@ -222,10 +221,7 @@ ze_result_t EventImp::queryStatus() { for (auto &csr : csrs) { csr->downloadAllocation(this->getAllocation(this->device)); if (inOrderExecEvent) { - auto node = this->inOrderTimestampPacket->peekNodes()[0]; - auto nodeAlloc = node->getBaseGraphicsAllocation()->getGraphicsAllocation(this->device->getRootDeviceIndex()); - - csr->downloadAllocation(*nodeAlloc); + csr->downloadAllocation(*this->inOrderExecDataAllocation); } } } @@ -397,7 +393,8 @@ template ze_result_t EventImp::reset() { if (inOrderExecEvent) { inOrderExecEvent = false; - inOrderTimestampPacket->releaseNodes(); + inOrderExecDataAllocation = nullptr; + inOrderExecSignalValue = 0; } this->resetCompletionStatus(); this->resetDeviceCompletionData(false); diff --git a/level_zero/core/test/unit_tests/mocks/mock_cmdlist.h b/level_zero/core/test/unit_tests/mocks/mock_cmdlist.h index 2bca9dd8f8..e0b7c56299 100644 --- a/level_zero/core/test/unit_tests/mocks/mock_cmdlist.h +++ b/level_zero/core/test/unit_tests/mocks/mock_cmdlist.h @@ -155,7 +155,6 @@ struct WhiteBox> using BaseClass::commandsToPatch; using BaseClass::compactL3FlushEventPacket; using BaseClass::csr; - using BaseClass::deferredTimestampPackets; using BaseClass::device; using BaseClass::doubleSbaWa; using BaseClass::engineGroupType; @@ -164,11 +163,12 @@ struct WhiteBox> using BaseClass::getDcFlushRequired; using BaseClass::getHostPtrAlloc; using BaseClass::immediateCmdListHeapSharing; + using BaseClass::inOrderDependencyCounter; + using BaseClass::inOrderDependencyCounterAllocation; using BaseClass::isBcsSplitNeeded; using BaseClass::isFlushTaskSubmissionEnabled; using BaseClass::isSyncModeQueue; using BaseClass::isTbxMode; - using BaseClass::obtainNewTimestampPacketNode; using BaseClass::partitionCount; using BaseClass::pipeControlMultiKernelEventSync; using BaseClass::pipelineSelectStateTracking; @@ -178,7 +178,6 @@ struct WhiteBox> using BaseClass::stateBaseAddressTracking; using BaseClass::stateComputeModeTracking; using BaseClass::synchronizeInOrderExecution; - using BaseClass::timestampPacketContainer; WhiteBox() : BaseClass(BaseClass::defaultNumIddsPerBlock) {} }; diff --git a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_1.cpp b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_1.cpp index 11123e885b..97994c2042 100644 --- a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_1.cpp +++ b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_1.cpp @@ -1315,10 +1315,6 @@ HWTEST2_F(CommandListCreate, givenInOrderExecutionWhenDispatchingRelaxedOrdering size_t offset = cmdStream->getUsed(); - ASSERT_EQ(1u, cmdList->timestampPacketContainer->peekNodes().size()); - - auto previousNode = cmdList->timestampPacketContainer->peekNodes()[0]; - cmdList->appendLaunchKernel(kernel.toHandle(), &groupCount, nullptr, 0, nullptr, launchParams, false); GenCmdList genCmdList; @@ -1333,9 +1329,7 @@ HWTEST2_F(CommandListCreate, givenInOrderExecutionWhenDispatchingRelaxedOrdering lrrCmd++; lrrCmd++; - auto compareAddress = NEO::TimestampPacketHelper::getContextEndGpuAddress(*previousNode); - - EXPECT_TRUE(RelaxedOrderingCommandsHelper::verifyConditionalDataMemBbStart(lrrCmd, 0, compareAddress, 1, NEO::CompareOperation::Equal, true)); + EXPECT_TRUE(RelaxedOrderingCommandsHelper::verifyConditionalDataMemBbStart(lrrCmd, 0, cmdList->inOrderDependencyCounterAllocation->getGpuAddress(), 2, NEO::CompareOperation::Less, true)); } TEST_F(CommandListCreate, GivenGpuHangWhenCreatingImmCmdListWithSyncModeAndAppendBarrierThenAppendBarrierReturnsDeviceLost) { diff --git a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_launch_kernel_3.cpp b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_launch_kernel_3.cpp index cb4fad5f70..df522aa103 100644 --- a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_launch_kernel_3.cpp +++ b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_launch_kernel_3.cpp @@ -7,6 +7,7 @@ #include "shared/source/command_container/command_encoder.h" #include "shared/source/command_container/encode_surface_state.h" +#include "shared/source/command_container/implicit_scaling.h" #include "shared/source/helpers/api_specific_config.h" #include "shared/source/helpers/bindless_heaps_helper.h" #include "shared/source/helpers/constants.h" @@ -671,9 +672,10 @@ HWTEST_F(CommandListAppendLaunchKernel, givenInvalidKernelWhenAppendingThenRetur struct InOrderCmdListTests : public CommandListAppendLaunchKernel { struct MockEvent : public EventImp { - using EventImp::inOrderTimestampPacket; using EventImp::inOrderExecEvent; using EventImp::maxPacketCount; + using EventImp::inOrderExecDataAllocation; + using EventImp::inOrderExecSignalValue; }; void SetUp() override { @@ -746,18 +748,6 @@ struct InOrderCmdListTests : public CommandListAppendLaunchKernel { return cmdList; } - template - void setTimestampPacketContextEndValue(TagNodeBase *node, uint32_t packetId, typename GfxFamily::TimestampPacketType contextEndValue) { - typename GfxFamily::TimestampPacketType data[] = {1, 1, contextEndValue, 1}; - - node->assignDataToAllTimestamps(packetId, data); - } - - template - TagNodeBase *getLatestTsNode(WhiteBox> *immCmdList) { - return immCmdList->timestampPacketContainer->peekNodes()[0]; - } - DebugManagerStateRestore restorer; std::unique_ptr mockCopyOsContext; @@ -774,19 +764,18 @@ HWTEST2_F(InOrderCmdListTests, givenInOrderModeWhenResetEventCalledThenResetEven auto eventPool = createEvents(3, false); - EXPECT_EQ(nullptr, events[0]->inOrderTimestampPacket.get()); - immCmdList->appendLaunchKernel(kernel->toHandle(), &groupCount, events[0]->toHandle(), 0, nullptr, launchParams, false); EXPECT_TRUE(events[0]->inOrderExecEvent); - ASSERT_NE(nullptr, events[0]->inOrderTimestampPacket.get()); - EXPECT_EQ(1u, events[0]->inOrderTimestampPacket->peekNodes().size()); + EXPECT_EQ(events[0]->inOrderExecSignalValue, immCmdList->inOrderDependencyCounter); + EXPECT_EQ(events[0]->inOrderExecDataAllocation, immCmdList->inOrderDependencyCounterAllocation); events[0]->reset(); EXPECT_FALSE(events[0]->inOrderExecEvent); - ASSERT_NE(nullptr, events[0]->inOrderTimestampPacket.get()); - EXPECT_EQ(0u, events[0]->inOrderTimestampPacket->peekNodes().size()); + + EXPECT_EQ(events[0]->inOrderExecSignalValue, 0u); + EXPECT_EQ(events[0]->inOrderExecDataAllocation, nullptr); } HWTEST2_F(InOrderCmdListTests, givenInOrderModeWhenSubmittingThenProgramSemaphoreForPreviousDispatch, IsAtLeastXeHpCore) { @@ -800,8 +789,6 @@ HWTEST2_F(InOrderCmdListTests, givenInOrderModeWhenSubmittingThenProgramSemaphor auto offset = cmdStream->getUsed(); - auto previousNode = getLatestTsNode(immCmdList.get()); - immCmdList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams, false); GenCmdList cmdList; @@ -816,11 +803,9 @@ HWTEST2_F(InOrderCmdListTests, givenInOrderModeWhenSubmittingThenProgramSemaphor auto semaphoreCmd = genCmdCast(*itor); - auto compareAddress = NEO::TimestampPacketHelper::getContextEndGpuAddress(*previousNode); - EXPECT_EQ(1u, semaphoreCmd->getSemaphoreDataDword()); - EXPECT_EQ(compareAddress, semaphoreCmd->getSemaphoreGraphicsAddress()); - EXPECT_EQ(MI_SEMAPHORE_WAIT::COMPARE_OPERATION::COMPARE_OPERATION_SAD_NOT_EQUAL_SDD, semaphoreCmd->getCompareOperation()); + EXPECT_EQ(immCmdList->inOrderDependencyCounterAllocation->getGpuAddress(), semaphoreCmd->getSemaphoreGraphicsAddress()); + EXPECT_EQ(MI_SEMAPHORE_WAIT::COMPARE_OPERATION::COMPARE_OPERATION_SAD_GREATER_THAN_OR_EQUAL_SDD, semaphoreCmd->getCompareOperation()); } HWTEST2_F(InOrderCmdListTests, givenInOrderModeWhenWaitingForEventFromPreviousAppendThenSkip, IsAtLeastXeHpCore) { @@ -921,38 +906,101 @@ HWTEST2_F(InOrderCmdListTests, givenInOrderEventModeWhenSubmittingThenProgramSem auto semaphoreCmd = genCmdCast(*itor); - auto gpuAddr = events[0]->getCompletionFieldGpuAddress(this->device); - - EXPECT_EQ(static_cast(Event::State::STATE_CLEARED), semaphoreCmd->getSemaphoreDataDword()); - EXPECT_EQ(gpuAddr, semaphoreCmd->getSemaphoreGraphicsAddress()); - EXPECT_EQ(MI_SEMAPHORE_WAIT::COMPARE_OPERATION::COMPARE_OPERATION_SAD_NOT_EQUAL_SDD, semaphoreCmd->getCompareOperation()); + EXPECT_EQ(2u, semaphoreCmd->getSemaphoreDataDword()); + EXPECT_EQ(immCmdList->inOrderDependencyCounterAllocation->getGpuAddress(), semaphoreCmd->getSemaphoreGraphicsAddress()); + EXPECT_EQ(MI_SEMAPHORE_WAIT::COMPARE_OPERATION::COMPARE_OPERATION_SAD_GREATER_THAN_OR_EQUAL_SDD, semaphoreCmd->getCompareOperation()); } -HWTEST2_F(InOrderCmdListTests, givenInOrderModeWhenDispatchingThenHandleTimestampPacketResidency, IsAtLeastXeHpCore) { - DebugManager.flags.DisableTimestampPacketOptimizations.set(1); // Create new allocation for node each time, to test residency handling +HWTEST2_F(InOrderCmdListTests, givenInOrderEventModeWhenWaitingForEventFromPreviousAppendThenSkip, IsAtLeastXeHpCore) { + using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT; auto immCmdList = createImmCmdList(); + auto eventPool = createEvents(1, false); + + auto cmdStream = immCmdList->getCmdContainer().getCommandStream(); + + auto event0Handle = events[0]->toHandle(); + + immCmdList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams, false); + + immCmdList->appendLaunchKernel(kernel->toHandle(), &groupCount, event0Handle, 0, nullptr, launchParams, false); + + auto offset = cmdStream->getUsed(); + + immCmdList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 1, &event0Handle, launchParams, false); + + GenCmdList cmdList; + ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer( + cmdList, + ptrOffset(cmdStream->getCpuBase(), offset), + cmdStream->getUsed() - offset)); + + auto itor = find(cmdList.begin(), cmdList.end()); + + ASSERT_NE(cmdList.end(), itor); + + itor = find(++itor, cmdList.end()); + + EXPECT_EQ(cmdList.end(), itor); +} + +HWTEST2_F(InOrderCmdListTests, givenInOrderEventModeWhenSubmittingFromDifferentCmdListThenProgramSemaphoreForEvent, IsAtLeastSkl) { + using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT; + + auto immCmdList1 = createImmCmdList(); + auto immCmdList2 = createImmCmdList(); + + auto eventPool = createEvents(1, false); + + auto cmdStream = immCmdList2->getCmdContainer().getCommandStream(); + + auto event0Handle = events[0]->toHandle(); + + auto ultCsr = static_cast *>(device->getNEODevice()->getDefaultEngine().commandStreamReceiver); + ultCsr->storeMakeResidentAllocations = true; + + immCmdList1->appendLaunchKernel(kernel->toHandle(), &groupCount, event0Handle, 0, nullptr, launchParams, false); + + EXPECT_EQ(1u, ultCsr->makeResidentAllocations[immCmdList1->inOrderDependencyCounterAllocation]); + + immCmdList2->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 1, &event0Handle, launchParams, false); + + EXPECT_EQ(2u, ultCsr->makeResidentAllocations[immCmdList1->inOrderDependencyCounterAllocation]); + + GenCmdList cmdList; + ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(cmdList, cmdStream->getCpuBase(), cmdStream->getUsed())); + + auto itor = find(cmdList.begin(), cmdList.end()); + + ASSERT_NE(cmdList.end(), itor); + + auto semaphoreCmd = genCmdCast(*itor); + + EXPECT_EQ(1u, semaphoreCmd->getSemaphoreDataDword()); + EXPECT_NE(immCmdList1->inOrderDependencyCounterAllocation->getGpuAddress(), immCmdList2->inOrderDependencyCounterAllocation->getGpuAddress()); + EXPECT_EQ(immCmdList1->inOrderDependencyCounterAllocation->getGpuAddress(), semaphoreCmd->getSemaphoreGraphicsAddress()); + EXPECT_EQ(MI_SEMAPHORE_WAIT::COMPARE_OPERATION::COMPARE_OPERATION_SAD_GREATER_THAN_OR_EQUAL_SDD, semaphoreCmd->getCompareOperation()); +} + +HWTEST2_F(InOrderCmdListTests, givenInOrderModeWhenDispatchingThenHandleDependencyCounter, IsAtLeastXeHpCore) { + auto immCmdList = createImmCmdList(); + + EXPECT_NE(nullptr, immCmdList->inOrderDependencyCounterAllocation); + EXPECT_EQ(AllocationType::TIMESTAMP_PACKET_TAG_BUFFER, immCmdList->inOrderDependencyCounterAllocation->getAllocationType()); + + EXPECT_EQ(0u, immCmdList->inOrderDependencyCounter); + auto ultCsr = static_cast *>(device->getNEODevice()->getDefaultEngine().commandStreamReceiver); ultCsr->storeMakeResidentAllocations = true; immCmdList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams, false); - auto node0 = getLatestTsNode(immCmdList.get()); - ultCsr->getTimestampPacketAllocator()->getTag(); - EXPECT_EQ(1u, ultCsr->makeResidentAllocations[node0->getBaseGraphicsAllocation()->getGraphicsAllocation(0)]); + EXPECT_EQ(1u, immCmdList->inOrderDependencyCounter); + EXPECT_EQ(1u, ultCsr->makeResidentAllocations[immCmdList->inOrderDependencyCounterAllocation]); immCmdList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams, false); - auto node1 = getLatestTsNode(immCmdList.get()); - ultCsr->getTimestampPacketAllocator()->getTag(); - - EXPECT_EQ(2u, ultCsr->makeResidentAllocations[node0->getBaseGraphicsAllocation()->getGraphicsAllocation(0)]); - EXPECT_EQ(1u, ultCsr->makeResidentAllocations[node1->getBaseGraphicsAllocation()->getGraphicsAllocation(0)]); - immCmdList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams, false); - auto node2 = getLatestTsNode(immCmdList.get()); - ultCsr->getTimestampPacketAllocator()->getTag(); - EXPECT_EQ(2u, ultCsr->makeResidentAllocations[node0->getBaseGraphicsAllocation()->getGraphicsAllocation(0)]); // not used anymore - EXPECT_EQ(2u, ultCsr->makeResidentAllocations[node1->getBaseGraphicsAllocation()->getGraphicsAllocation(0)]); - EXPECT_EQ(1u, ultCsr->makeResidentAllocations[node2->getBaseGraphicsAllocation()->getGraphicsAllocation(0)]); + EXPECT_EQ(2u, immCmdList->inOrderDependencyCounter); + EXPECT_EQ(2u, ultCsr->makeResidentAllocations[immCmdList->inOrderDependencyCounterAllocation]); } HWTEST2_F(InOrderCmdListTests, givenInOrderModeWhenAddingRelaxedOrderingEventsThenConfigureRegistersFirst, IsAtLeastXeHpCore) { @@ -984,41 +1032,6 @@ HWTEST2_F(InOrderCmdListTests, givenInOrderModeWhenAddingRelaxedOrderingEventsTh EXPECT_EQ(CS_GPR_R0 + 4, lrrCmd->getDestinationRegisterAddress()); } -HWTEST2_F(InOrderCmdListTests, givenInOrderModeWhenGettingNewNodeThenSwapWithDeferred, IsAtLeastXeHpCore) { - using COMPUTE_WALKER = typename FamilyType::COMPUTE_WALKER; - using POSTSYNC_DATA = typename FamilyType::POSTSYNC_DATA; - using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT; - using MI_STORE_DATA_IMM = typename FamilyType::MI_STORE_DATA_IMM; - - auto immCmdList = createImmCmdList(); - - immCmdList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams, false); - - EXPECT_EQ(1u, immCmdList->timestampPacketContainer->peekNodes().size()); - EXPECT_EQ(0u, immCmdList->deferredTimestampPackets->peekNodes().size()); - - uint64_t nodeGpuVa0 = getLatestTsNode(immCmdList.get())->getGpuAddress(); - - immCmdList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams, false); - - EXPECT_EQ(1u, immCmdList->timestampPacketContainer->peekNodes().size()); - EXPECT_EQ(1u, immCmdList->deferredTimestampPackets->peekNodes().size()); - - uint64_t nodeGpuVa1 = getLatestTsNode(immCmdList.get())->getGpuAddress(); - - EXPECT_NE(nodeGpuVa0, nodeGpuVa1); - - immCmdList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams, false); - - EXPECT_EQ(1u, immCmdList->timestampPacketContainer->peekNodes().size()); - EXPECT_EQ(2u, immCmdList->deferredTimestampPackets->peekNodes().size()); - - uint64_t nodeGpuVa2 = getLatestTsNode(immCmdList.get())->getGpuAddress(); - - EXPECT_NE(nodeGpuVa0, nodeGpuVa2); - EXPECT_NE(nodeGpuVa1, nodeGpuVa2); -} - HWTEST2_F(InOrderCmdListTests, givenInOrderModeWhenProgrammingWalkerThenSignalSyncAllocation, IsAtLeastXeHpCore) { using COMPUTE_WALKER = typename FamilyType::COMPUTE_WALKER; using POSTSYNC_DATA = typename FamilyType::POSTSYNC_DATA; @@ -1033,7 +1046,6 @@ HWTEST2_F(InOrderCmdListTests, givenInOrderModeWhenProgrammingWalkerThenSignalSy immCmdList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams, false); - uint64_t nodeGpuVa0 = 0; { GenCmdList cmdList; @@ -1045,13 +1057,9 @@ HWTEST2_F(InOrderCmdListTests, givenInOrderModeWhenProgrammingWalkerThenSignalSy auto walkerCmd = genCmdCast(*walkerItor); auto &postSync = walkerCmd->getPostSync(); - EXPECT_EQ(1u, immCmdList->timestampPacketContainer->peekNodes().size()); - EXPECT_EQ(0u, immCmdList->deferredTimestampPackets->peekNodes().size()); - - nodeGpuVa0 = getLatestTsNode(immCmdList.get())->getGpuAddress(); - - EXPECT_EQ(POSTSYNC_DATA::OPERATION_WRITE_TIMESTAMP, postSync.getOperation()); - EXPECT_EQ(nodeGpuVa0, postSync.getDestinationAddress()); + EXPECT_EQ(POSTSYNC_DATA::OPERATION_WRITE_IMMEDIATE_DATA, postSync.getOperation()); + EXPECT_EQ(1u, postSync.getImmediateData()); + EXPECT_EQ(immCmdList->inOrderDependencyCounterAllocation->getGpuAddress(), postSync.getDestinationAddress()); } auto offset = cmdStream->getUsed(); @@ -1071,14 +1079,6 @@ HWTEST2_F(InOrderCmdListTests, givenInOrderModeWhenProgrammingWalkerThenSignalSy auto walkerCmd = genCmdCast(*walkerItor); auto &postSync = walkerCmd->getPostSync(); - EXPECT_EQ(1u, immCmdList->timestampPacketContainer->peekNodes().size()); - EXPECT_EQ(1u, immCmdList->deferredTimestampPackets->peekNodes().size()); - - auto node = getLatestTsNode(immCmdList.get()); - uint64_t nodeGpuVa1 = getLatestTsNode(immCmdList.get())->getGpuAddress(); - EXPECT_NE(nodeGpuVa0, nodeGpuVa1); - EXPECT_EQ(nodeGpuVa0, immCmdList->deferredTimestampPackets->peekNodes()[0]->getGpuAddress()); - EXPECT_EQ(POSTSYNC_DATA::OPERATION_WRITE_IMMEDIATE_DATA, postSync.getOperation()); EXPECT_EQ(static_cast(Event::STATE_SIGNALED), postSync.getImmediateData()); EXPECT_EQ(events[0]->getPacketAddress(device), postSync.getDestinationAddress()); @@ -1096,17 +1096,21 @@ HWTEST2_F(InOrderCmdListTests, givenInOrderModeWhenProgrammingWalkerThenSignalSy auto sdiCmd = genCmdCast(++semaphoreCmd); ASSERT_NE(nullptr, sdiCmd); - EXPECT_EQ(nodeGpuVa1 + node->getContextEndOffset(), sdiCmd->getAddress()); - EXPECT_EQ(0u, sdiCmd->getStoreQword()); - EXPECT_EQ(0u, sdiCmd->getDataDword0()); + EXPECT_EQ(immCmdList->inOrderDependencyCounterAllocation->getGpuAddress(), sdiCmd->getAddress()); + EXPECT_EQ(1u, sdiCmd->getStoreQword()); + EXPECT_EQ(2u, sdiCmd->getDataDword0()); + EXPECT_EQ(0u, sdiCmd->getDataDword1()); } - auto node = getLatestTsNode(immCmdList.get()); + auto hostAddress = static_cast(immCmdList->inOrderDependencyCounterAllocation->getUnderlyingBuffer()); - setTimestampPacketContextEndValue(node, 0, 1); + *hostAddress = 1; EXPECT_EQ(ZE_RESULT_NOT_READY, events[0]->hostSynchronize(1)); - setTimestampPacketContextEndValue(node, 0, 0x12345); + *hostAddress = 2; + EXPECT_EQ(ZE_RESULT_SUCCESS, events[0]->hostSynchronize(1)); + + *hostAddress = 3; EXPECT_EQ(ZE_RESULT_SUCCESS, events[0]->hostSynchronize(1)); } @@ -1149,15 +1153,9 @@ HWTEST2_F(InOrderCmdListTests, givenInOrderModeWhenProgrammingTimestampEventThen auto sdiCmd = genCmdCast(++semaphoreCmd); ASSERT_NE(nullptr, sdiCmd); - EXPECT_EQ(1u, immCmdList->timestampPacketContainer->peekNodes().size()); - EXPECT_EQ(0u, immCmdList->deferredTimestampPackets->peekNodes().size()); - - auto node = getLatestTsNode(immCmdList.get()); - uint64_t nodeGpuVa0 = node->getGpuAddress() + node->getContextEndOffset(); - - EXPECT_EQ(nodeGpuVa0, sdiCmd->getAddress()); - EXPECT_EQ(0u, sdiCmd->getStoreQword()); - EXPECT_EQ(0u, sdiCmd->getDataDword0()); + EXPECT_EQ(immCmdList->inOrderDependencyCounterAllocation->getGpuAddress(), sdiCmd->getAddress()); + EXPECT_EQ(1u, sdiCmd->getStoreQword()); + EXPECT_EQ(1u, sdiCmd->getDataDword0()); } HWTEST2_F(InOrderCmdListTests, givenInOrderModeWhenProgrammingAppendSignalEventThenSignalSyncAllocation, IsAtLeastXeHpCore) { @@ -1176,8 +1174,7 @@ HWTEST2_F(InOrderCmdListTests, givenInOrderModeWhenProgrammingAppendSignalEventT immCmdList->appendSignalEvent(events[0]->toHandle()); - EXPECT_EQ(1u, immCmdList->timestampPacketContainer->peekNodes().size()); - EXPECT_EQ(1u, immCmdList->deferredTimestampPackets->peekNodes().size()); + uint64_t inOrderSyncVa = immCmdList->inOrderDependencyCounterAllocation->getGpuAddress(); GenCmdList cmdList; ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(cmdList, @@ -1189,12 +1186,9 @@ HWTEST2_F(InOrderCmdListTests, givenInOrderModeWhenProgrammingAppendSignalEventT ASSERT_NE(nullptr, semaphoreCmd); - auto previousNode = immCmdList->deferredTimestampPackets->peekNodes()[0]; - uint64_t nodeGpuVa = previousNode->getGpuAddress() + previousNode->getContextEndOffset(); - - EXPECT_EQ(TimestampPacketConstants::initValue, semaphoreCmd->getSemaphoreDataDword()); - EXPECT_EQ(nodeGpuVa, semaphoreCmd->getSemaphoreGraphicsAddress()); - EXPECT_EQ(MI_SEMAPHORE_WAIT::COMPARE_OPERATION::COMPARE_OPERATION_SAD_NOT_EQUAL_SDD, semaphoreCmd->getCompareOperation()); + EXPECT_EQ(1u, semaphoreCmd->getSemaphoreDataDword()); + EXPECT_EQ(inOrderSyncVa, semaphoreCmd->getSemaphoreGraphicsAddress()); + EXPECT_EQ(MI_SEMAPHORE_WAIT::COMPARE_OPERATION::COMPARE_OPERATION_SAD_GREATER_THAN_OR_EQUAL_SDD, semaphoreCmd->getCompareOperation()); } { @@ -1211,21 +1205,10 @@ HWTEST2_F(InOrderCmdListTests, givenInOrderModeWhenProgrammingAppendSignalEventT ASSERT_NE(nullptr, sdiCmd); - auto node = getLatestTsNode(immCmdList.get()); - uint64_t nodeGpuVa = node->getGpuAddress() + node->getContextEndOffset(); - - EXPECT_EQ(nodeGpuVa, sdiCmd->getAddress()); - EXPECT_EQ(0u, sdiCmd->getStoreQword()); - EXPECT_EQ(0u, sdiCmd->getDataDword0()); - - auto semaphoreCmd = genCmdCast(*(++rbeginItor)); - ASSERT_NE(nullptr, semaphoreCmd); - - auto eventEndGpuVa = events[0]->getCompletionFieldGpuAddress(device); - - EXPECT_EQ(static_cast(Event::State::STATE_CLEARED), semaphoreCmd->getSemaphoreDataDword()); - EXPECT_EQ(eventEndGpuVa, semaphoreCmd->getSemaphoreGraphicsAddress()); - EXPECT_EQ(MI_SEMAPHORE_WAIT::COMPARE_OPERATION::COMPARE_OPERATION_SAD_NOT_EQUAL_SDD, semaphoreCmd->getCompareOperation()); + EXPECT_EQ(inOrderSyncVa, sdiCmd->getAddress()); + EXPECT_EQ(1u, sdiCmd->getStoreQword()); + EXPECT_EQ(2u, sdiCmd->getDataDword0()); + EXPECT_EQ(0u, sdiCmd->getDataDword1()); } } @@ -1294,16 +1277,12 @@ HWTEST2_F(InOrderCmdListTests, givenCopyOnlyInOrderModeWhenProgrammingCopyThenSi auto sdiCmd = genCmdCast(*sdiItor); - EXPECT_EQ(1u, immCmdList->timestampPacketContainer->peekNodes().size()); - EXPECT_EQ(1u, immCmdList->deferredTimestampPackets->peekNodes().size()); + uint64_t syncVa = immCmdList->inOrderDependencyCounterAllocation->getGpuAddress(); - auto node = getLatestTsNode(immCmdList.get()); - - uint64_t nodeGpuVa = node->getGpuAddress() + node->getContextEndOffset(); - - EXPECT_EQ(nodeGpuVa, sdiCmd->getAddress()); - EXPECT_EQ(0u, sdiCmd->getStoreQword()); - EXPECT_EQ(0u, sdiCmd->getDataDword0()); + EXPECT_EQ(syncVa, sdiCmd->getAddress()); + EXPECT_EQ(1u, sdiCmd->getStoreQword()); + EXPECT_EQ(2u, sdiCmd->getDataDword0()); + EXPECT_EQ(0u, sdiCmd->getDataDword1()); } HWTEST2_F(InOrderCmdListTests, givenCopyOnlyInOrderModeWhenProgrammingCopyRegionThenSignalInOrderAllocation, IsAtLeastXeHpCore) { @@ -1335,36 +1314,12 @@ HWTEST2_F(InOrderCmdListTests, givenCopyOnlyInOrderModeWhenProgrammingCopyRegion auto sdiCmd = genCmdCast(*sdiItor); - EXPECT_EQ(1u, immCmdList->timestampPacketContainer->peekNodes().size()); - EXPECT_EQ(1u, immCmdList->deferredTimestampPackets->peekNodes().size()); + uint64_t syncVa = immCmdList->inOrderDependencyCounterAllocation->getGpuAddress(); - auto node = getLatestTsNode(immCmdList.get()); - - uint64_t nodeGpuVa = node->getGpuAddress() + node->getContextEndOffset(); - - EXPECT_EQ(nodeGpuVa, sdiCmd->getAddress()); - EXPECT_EQ(0u, sdiCmd->getStoreQword()); - EXPECT_EQ(0u, sdiCmd->getDataDword0()); -} - -HWTEST2_F(InOrderCmdListTests, givenInOrderModeWhenProgrammingCopyRegionThenObtainSingleSyncAlloc, IsAtLeastXeHpCore) { - using XY_COPY_BLT = typename std::remove_const::type; - using MI_STORE_DATA_IMM = typename FamilyType::MI_STORE_DATA_IMM; - - auto immCmdList = createImmCmdList(); - - uint32_t copyData = 0; - ze_copy_region_t region = {0, 0, 0, 1, 1, 1}; - - immCmdList->appendMemoryCopyRegion(©Data, ®ion, 1, 1, ©Data, ®ion, 1, 1, nullptr, 0, nullptr, false, false); - - EXPECT_EQ(1u, immCmdList->timestampPacketContainer->peekNodes().size()); - EXPECT_EQ(0u, immCmdList->deferredTimestampPackets->peekNodes().size()); - - immCmdList->appendMemoryCopyRegion(©Data, ®ion, 1, 1, ©Data, ®ion, 1, 1, nullptr, 0, nullptr, false, false); - - EXPECT_EQ(1u, immCmdList->timestampPacketContainer->peekNodes().size()); - EXPECT_EQ(1u, immCmdList->deferredTimestampPackets->peekNodes().size()); + EXPECT_EQ(syncVa, sdiCmd->getAddress()); + EXPECT_EQ(1u, sdiCmd->getStoreQword()); + EXPECT_EQ(2u, sdiCmd->getDataDword0()); + EXPECT_EQ(0u, sdiCmd->getDataDword1()); } HWTEST2_F(InOrderCmdListTests, givenInOrderModeWhenProgrammingAppendWaitOnEventsThenSignalSyncAllocation, IsAtLeastXeHpCore) { @@ -1380,9 +1335,6 @@ HWTEST2_F(InOrderCmdListTests, givenInOrderModeWhenProgrammingAppendWaitOnEvents immCmdList->appendLaunchKernel(kernel->toHandle(), &groupCount, eventHandle, 0, nullptr, launchParams, false); - EXPECT_EQ(1u, immCmdList->timestampPacketContainer->peekNodes().size()); - EXPECT_EQ(0u, immCmdList->deferredTimestampPackets->peekNodes().size()); - auto offset = cmdStream->getUsed(); zeCommandListAppendWaitOnEvents(immCmdList->toHandle(), 1, &eventHandle); @@ -1397,16 +1349,9 @@ HWTEST2_F(InOrderCmdListTests, givenInOrderModeWhenProgrammingAppendWaitOnEvents auto sdiCmd = genCmdCast(*sdiItor); - EXPECT_EQ(1u, immCmdList->timestampPacketContainer->peekNodes().size()); - EXPECT_EQ(1u, immCmdList->deferredTimestampPackets->peekNodes().size()); - - auto node = getLatestTsNode(immCmdList.get()); - - uint64_t nodeGpuVa = node->getGpuAddress() + node->getContextEndOffset(); - - EXPECT_EQ(nodeGpuVa, sdiCmd->getAddress()); - EXPECT_EQ(0u, sdiCmd->getStoreQword()); - EXPECT_EQ(0u, sdiCmd->getDataDword0()); + EXPECT_EQ(immCmdList->inOrderDependencyCounterAllocation->getGpuAddress(), sdiCmd->getAddress()); + EXPECT_EQ(1u, sdiCmd->getStoreQword()); + EXPECT_EQ(2u, sdiCmd->getDataDword0()); } HWTEST2_F(InOrderCmdListTests, givenInOrderModeWhenProgrammingAppendBarrierThenSignalSyncAllocation, IsAtLeastXeHpCore) { @@ -1432,16 +1377,14 @@ HWTEST2_F(InOrderCmdListTests, givenInOrderModeWhenProgrammingAppendBarrierThenS auto pcCmd = genCmdCast(*pcItor); - auto node = getLatestTsNode(immCmdList.get()); - - auto gpuAddress = node->getGpuAddress() + node->getContextEndOffset(); + auto gpuAddress = immCmdList->inOrderDependencyCounterAllocation->getGpuAddress(); auto lowAddress = static_cast(gpuAddress & 0x0000FFFFFFFFULL); auto highAddress = static_cast(gpuAddress >> 32); EXPECT_EQ(lowAddress, pcCmd->getAddress()); EXPECT_EQ(highAddress, pcCmd->getAddressHigh()); EXPECT_EQ(PIPE_CONTROL::POST_SYNC_OPERATION::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA, pcCmd->getPostSyncOperation()); - EXPECT_EQ(0u, pcCmd->getImmediateData()); + EXPECT_EQ(2u, pcCmd->getImmediateData()); } HWTEST2_F(InOrderCmdListTests, givenInOrderModeWhenCallingSyncThenHandleCompletion, IsAtLeastXeHpCore) { @@ -1451,9 +1394,8 @@ HWTEST2_F(InOrderCmdListTests, givenInOrderModeWhenCallingSyncThenHandleCompleti immCmdList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams, false); - auto node = getLatestTsNode(immCmdList.get()); - - setTimestampPacketContextEndValue(node, 0, 1); + auto hostAddress = static_cast(immCmdList->inOrderDependencyCounterAllocation->getUnderlyingBuffer()); + *hostAddress = 0; const uint32_t failCounter = 3; uint32_t callCounter = 0; @@ -1462,7 +1404,7 @@ HWTEST2_F(InOrderCmdListTests, givenInOrderModeWhenCallingSyncThenHandleCompleti ultCsr->downloadAllocationImpl = [&](GraphicsAllocation &graphicsAllocation) { callCounter++; if (callCounter >= failCounter && !forceFail) { - setTimestampPacketContextEndValue(node, 0, 0x123); + (*hostAddress)++; } }; @@ -1472,7 +1414,7 @@ HWTEST2_F(InOrderCmdListTests, givenInOrderModeWhenCallingSyncThenHandleCompleti EXPECT_EQ(1u, callCounter); EXPECT_EQ(1u, ultCsr->checkGpuHangDetectedCalled); - EXPECT_EQ(1u, node->getContextEndValue(0)); + EXPECT_EQ(0u, *hostAddress); } // timeout - not ready @@ -1482,7 +1424,7 @@ HWTEST2_F(InOrderCmdListTests, givenInOrderModeWhenCallingSyncThenHandleCompleti EXPECT_TRUE(callCounter > 1); EXPECT_TRUE(ultCsr->checkGpuHangDetectedCalled > 1); - EXPECT_EQ(1u, node->getContextEndValue(0)); + EXPECT_EQ(0u, *hostAddress); } // gpu hang @@ -1493,7 +1435,7 @@ HWTEST2_F(InOrderCmdListTests, givenInOrderModeWhenCallingSyncThenHandleCompleti EXPECT_TRUE(callCounter > 1); EXPECT_TRUE(ultCsr->checkGpuHangDetectedCalled > 1); - EXPECT_EQ(1u, node->getContextEndValue(0)); + EXPECT_EQ(0u, *hostAddress); } // success @@ -1506,37 +1448,10 @@ HWTEST2_F(InOrderCmdListTests, givenInOrderModeWhenCallingSyncThenHandleCompleti EXPECT_EQ(failCounter, callCounter); EXPECT_EQ(failCounter - 1, ultCsr->checkGpuHangDetectedCalled); - EXPECT_EQ(0x123u, node->getContextEndValue(0)); + EXPECT_EQ(1u, *hostAddress); } } -HWTEST2_F(InOrderCmdListTests, givenInOrderModeWhenCallingSyncThenHandleDeferredNodesLifeCycle, IsAtLeastXeHpCore) { - auto immCmdList = createImmCmdList(); - - immCmdList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams, false); - immCmdList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams, false); - immCmdList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams, false); - - auto node = getLatestTsNode(immCmdList.get()); - - setTimestampPacketContextEndValue(node, 0, 1); - - EXPECT_EQ(1u, immCmdList->timestampPacketContainer->peekNodes().size()); - EXPECT_EQ(2u, immCmdList->deferredTimestampPackets->peekNodes().size()); - - EXPECT_EQ(ZE_RESULT_NOT_READY, immCmdList->hostSynchronize(1)); - - EXPECT_EQ(1u, immCmdList->timestampPacketContainer->peekNodes().size()); - EXPECT_EQ(2u, immCmdList->deferredTimestampPackets->peekNodes().size()); - - setTimestampPacketContextEndValue(node, 0, 0x1234); - - EXPECT_EQ(ZE_RESULT_SUCCESS, immCmdList->hostSynchronize(std::numeric_limits::max())); - - EXPECT_EQ(1u, immCmdList->timestampPacketContainer->peekNodes().size()); - EXPECT_EQ(0u, immCmdList->deferredTimestampPackets->peekNodes().size()); -} - HWTEST2_F(InOrderCmdListTests, givenInOrderModeWhenDoingCpuCopyThenSynchronize, IsAtLeastXeHpCore) { auto immCmdList = createImmCmdList(); immCmdList->copyThroughLockedPtrEnabled = true; @@ -1546,14 +1461,16 @@ HWTEST2_F(InOrderCmdListTests, givenInOrderModeWhenDoingCpuCopyThenSynchronize, auto eventHandle = events[0]->toHandle(); + auto hostAddress = static_cast(immCmdList->inOrderDependencyCounterAllocation->getUnderlyingBuffer()); + *hostAddress = 0; + const uint32_t failCounter = 3; uint32_t callCounter = 0; ultCsr->downloadAllocationImpl = [&](GraphicsAllocation &graphicsAllocation) { callCounter++; if (callCounter >= failCounter) { - auto node = getLatestTsNode(immCmdList.get()); - setTimestampPacketContextEndValue(node, 0, 0x123); + (*hostAddress)++; } }; @@ -1572,10 +1489,8 @@ HWTEST2_F(InOrderCmdListTests, givenInOrderModeWhenDoingCpuCopyThenSynchronize, immCmdList->appendMemoryCopy(deviceAlloc, &hostCopyData, 1, nullptr, 1, &eventHandle, false, false); - auto node = getLatestTsNode(immCmdList.get()); - EXPECT_EQ(3u, callCounter); - EXPECT_EQ(0x123u, node->getContextEndValue(0)); + EXPECT_EQ(1u, *hostAddress); EXPECT_EQ(2u, ultCsr->checkGpuHangDetectedCalled); EXPECT_EQ(0u, ultCsr->waitForCompletionWithTimeoutTaskCountCalled); EXPECT_FALSE(ultCsr->flushTagUpdateCalled); @@ -1589,6 +1504,9 @@ HWTEST2_F(InOrderCmdListTests, givenInOrderModeWhenGpuHangDetectedInCpuCopyPathT auto ultCsr = static_cast *>(device->getNEODevice()->getDefaultEngine().commandStreamReceiver); + auto hostAddress = static_cast(immCmdList->inOrderDependencyCounterAllocation->getUnderlyingBuffer()); + *hostAddress = 0; + immCmdList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams, false); void *deviceAlloc = nullptr; @@ -1641,15 +1559,9 @@ HWTEST2_F(InOrderCmdListTests, givenInOrderModeWhenProgrammingKernelSplitWithout ASSERT_NE(nullptr, sdiCmd); - EXPECT_EQ(1u, immCmdList->timestampPacketContainer->peekNodes().size()); - EXPECT_EQ(0u, immCmdList->deferredTimestampPackets->peekNodes().size()); - - auto node = getLatestTsNode(immCmdList.get()); - uint64_t nodeGpuVa = node->getGpuAddress() + node->getContextEndOffset(); - - EXPECT_EQ(nodeGpuVa, sdiCmd->getAddress()); - EXPECT_EQ(0u, sdiCmd->getStoreQword()); - EXPECT_EQ(0u, sdiCmd->getDataDword0()); + EXPECT_EQ(immCmdList->inOrderDependencyCounterAllocation->getGpuAddress(), sdiCmd->getAddress()); + EXPECT_EQ(1u, sdiCmd->getStoreQword()); + EXPECT_EQ(1u, sdiCmd->getDataDword0()); alignedFree(alignedPtr); } @@ -1697,15 +1609,9 @@ HWTEST2_F(InOrderCmdListTests, givenInOrderModeWhenProgrammingKernelSplitWithEve ASSERT_NE(nullptr, sdiCmd); - EXPECT_EQ(1u, immCmdList->timestampPacketContainer->peekNodes().size()); - EXPECT_EQ(0u, immCmdList->deferredTimestampPackets->peekNodes().size()); - - auto node = getLatestTsNode(immCmdList.get()); - uint64_t nodeGpuVa = node->getGpuAddress() + node->getContextEndOffset(); - - EXPECT_EQ(nodeGpuVa, sdiCmd->getAddress()); - EXPECT_EQ(0u, sdiCmd->getStoreQword()); - EXPECT_EQ(0u, sdiCmd->getDataDword0()); + EXPECT_EQ(immCmdList->inOrderDependencyCounterAllocation->getGpuAddress(), sdiCmd->getAddress()); + EXPECT_EQ(1u, sdiCmd->getStoreQword()); + EXPECT_EQ(1u, sdiCmd->getDataDword0()); alignedFree(alignedPtr); } @@ -1753,17 +1659,16 @@ HWTEST2_F(MultiTileInOrderCmdListTests, givenMultiTileInOrderModeWhenProgramming auto semaphoreCmd = genCmdCast(*cmdList.begin()); ASSERT_NE(nullptr, semaphoreCmd); - auto node = events[0]->inOrderTimestampPacket->peekNodes()[0]; - uint64_t nodeGpuVa = node->getGpuAddress() + node->getContextEndOffset(); + auto gpuAddress = immCmdList->inOrderDependencyCounterAllocation->getGpuAddress(); EXPECT_EQ(1u, semaphoreCmd->getSemaphoreDataDword()); - EXPECT_EQ(nodeGpuVa, semaphoreCmd->getSemaphoreGraphicsAddress()); + EXPECT_EQ(gpuAddress, semaphoreCmd->getSemaphoreGraphicsAddress()); semaphoreCmd = genCmdCast(++semaphoreCmd); ASSERT_NE(nullptr, semaphoreCmd); EXPECT_EQ(1u, semaphoreCmd->getSemaphoreDataDword()); - EXPECT_EQ(nodeGpuVa + NEO::TimestampPackets::getSinglePacketSize(), semaphoreCmd->getSemaphoreGraphicsAddress()); + EXPECT_EQ(gpuAddress + sizeof(uint64_t), semaphoreCmd->getSemaphoreGraphicsAddress()); } HWTEST2_F(MultiTileInOrderCmdListTests, givenMultiTileInOrderModeWhenSignalingSyncAllocationThenEnablePartitionOffset, IsAtLeastXeHpCore) { @@ -1773,8 +1678,7 @@ HWTEST2_F(MultiTileInOrderCmdListTests, givenMultiTileInOrderModeWhenSignalingSy auto cmdStream = immCmdList->getCmdContainer().getCommandStream(); - immCmdList->obtainNewTimestampPacketNode(); - immCmdList->appendSignalInOrderDependencyTimestampPacket(); + immCmdList->appendSignalInOrderDependencyCounter(); GenCmdList cmdList; ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(cmdList, cmdStream->getCpuBase(), cmdStream->getUsed())); @@ -1782,11 +1686,9 @@ HWTEST2_F(MultiTileInOrderCmdListTests, givenMultiTileInOrderModeWhenSignalingSy auto sdiCmd = genCmdCast(*cmdList.begin()); ASSERT_NE(nullptr, sdiCmd); - auto node = getLatestTsNode(immCmdList.get()); - uint64_t nodeGpuVa = node->getGpuAddress() + node->getContextEndOffset(); + auto gpuAddress = immCmdList->inOrderDependencyCounterAllocation->getGpuAddress(); - EXPECT_EQ(immCmdList->partitionCount, node->getPacketsUsed()); - EXPECT_EQ(nodeGpuVa, sdiCmd->getAddress()); + EXPECT_EQ(gpuAddress, sdiCmd->getAddress()); EXPECT_TRUE(sdiCmd->getWorkloadPartitionIdOffsetEnable()); } @@ -1797,23 +1699,30 @@ HWTEST2_F(MultiTileInOrderCmdListTests, givenMultiTileInOrderModeWhenCallingSync immCmdList->appendLaunchKernel(kernel->toHandle(), &groupCount, events[0]->toHandle(), 0, nullptr, launchParams, false); - auto node = getLatestTsNode(immCmdList.get()); + auto hostAddress0 = static_cast(immCmdList->inOrderDependencyCounterAllocation->getUnderlyingBuffer()); + auto hostAddress1 = hostAddress0++; - setTimestampPacketContextEndValue(node, 0, 1); - setTimestampPacketContextEndValue(node, 1, 1); + *hostAddress0 = 0; + *hostAddress1 = 0; EXPECT_EQ(ZE_RESULT_NOT_READY, immCmdList->hostSynchronize(0)); EXPECT_EQ(ZE_RESULT_NOT_READY, events[0]->hostSynchronize(0)); - setTimestampPacketContextEndValue(node, 0, 0x1234); + *hostAddress0 = 1; EXPECT_EQ(ZE_RESULT_NOT_READY, immCmdList->hostSynchronize(0)); EXPECT_EQ(ZE_RESULT_NOT_READY, events[0]->hostSynchronize(0)); - setTimestampPacketContextEndValue(node, 0, 1); - setTimestampPacketContextEndValue(node, 1, 0x456); + *hostAddress0 = 0; + *hostAddress1 = 1; EXPECT_EQ(ZE_RESULT_NOT_READY, immCmdList->hostSynchronize(0)); EXPECT_EQ(ZE_RESULT_NOT_READY, events[0]->hostSynchronize(0)); - setTimestampPacketContextEndValue(node, 0, 0x789); + *hostAddress0 = 1; + *hostAddress1 = 1; + EXPECT_EQ(ZE_RESULT_SUCCESS, immCmdList->hostSynchronize(0)); + EXPECT_EQ(ZE_RESULT_SUCCESS, events[0]->hostSynchronize(0)); + + *hostAddress0 = 3; + *hostAddress1 = 3; EXPECT_EQ(ZE_RESULT_SUCCESS, immCmdList->hostSynchronize(0)); EXPECT_EQ(ZE_RESULT_SUCCESS, events[0]->hostSynchronize(0)); } @@ -2000,12 +1909,12 @@ HWTEST2_F(BcsSplitInOrderCmdListTests, givenBcsSplitEnabledWhenDispatchingCopyTh ASSERT_NE(nullptr, sdiCmd); - auto node = getLatestTsNode(immCmdList.get()); - uint64_t nodeGpuVa = node->getGpuAddress() + node->getContextEndOffset(); + auto gpuAddress = immCmdList->inOrderDependencyCounterAllocation->getGpuAddress(); - EXPECT_EQ(nodeGpuVa, sdiCmd->getAddress()); - EXPECT_EQ(0u, sdiCmd->getStoreQword()); - EXPECT_EQ(0u, sdiCmd->getDataDword0()); + EXPECT_EQ(gpuAddress, sdiCmd->getAddress()); + EXPECT_EQ(1u, sdiCmd->getStoreQword()); + EXPECT_EQ(1u, sdiCmd->getDataDword0()); + EXPECT_EQ(0u, sdiCmd->getDataDword1()); } HWTEST2_F(BcsSplitInOrderCmdListTests, givenBcsSplitEnabledWhenDispatchingCopyRegionThenHandleInOrderSignaling, IsAtLeastXeHpcCore) { @@ -2040,12 +1949,12 @@ HWTEST2_F(BcsSplitInOrderCmdListTests, givenBcsSplitEnabledWhenDispatchingCopyRe ASSERT_NE(nullptr, sdiCmd); - auto node = getLatestTsNode(immCmdList.get()); - uint64_t nodeGpuVa = node->getGpuAddress() + node->getContextEndOffset(); + auto gpuAddress = immCmdList->inOrderDependencyCounterAllocation->getGpuAddress(); - EXPECT_EQ(nodeGpuVa, sdiCmd->getAddress()); - EXPECT_EQ(0u, sdiCmd->getStoreQword()); - EXPECT_EQ(0u, sdiCmd->getDataDword0()); + EXPECT_EQ(gpuAddress, sdiCmd->getAddress()); + EXPECT_EQ(1u, sdiCmd->getStoreQword()); + EXPECT_EQ(1u, sdiCmd->getDataDword0()); + EXPECT_EQ(0u, sdiCmd->getDataDword1()); } struct CommandListAppendLaunchKernelWithImplicitArgs : CommandListAppendLaunchKernel { diff --git a/level_zero/core/test/unit_tests/sources/event/test_event.cpp b/level_zero/core/test/unit_tests/sources/event/test_event.cpp index 3908c6e6ac..bf99a324e6 100644 --- a/level_zero/core/test/unit_tests/sources/event/test_event.cpp +++ b/level_zero/core/test/unit_tests/sources/event/test_event.cpp @@ -3082,23 +3082,17 @@ HWTEST_F(EventTests, givenInOrderEventWhenHostEventSyncThenExpectDownloadEventAl downloadAllocationTrack[&gfxAllocation]++; }; - auto node = ultCsr->getTimestampPacketAllocator()->getTag(); - NEO::TimestampPacketContainer container; - container.add(node); + uint64_t storage[2] = {1, 1}; - typename FamilyType::TimestampPacketType data[] = {0, 0, 0, 0}; + NEO::MockGraphicsAllocation syncAllocation(&storage, sizeof(storage)); - node->assignDataToAllTimestamps(0, data); - - event->enableInOrderExecMode(container); - - auto allocation = node->getBaseGraphicsAllocation()->getDefaultGraphicsAllocation(); + event->enableInOrderExecMode(syncAllocation, 1); constexpr uint64_t timeout = std::numeric_limits::max(); auto result = event->hostSynchronize(timeout); EXPECT_EQ(ZE_RESULT_SUCCESS, result); - EXPECT_NE(0u, downloadAllocationTrack[allocation]); + EXPECT_NE(0u, downloadAllocationTrack[&syncAllocation]); EXPECT_EQ(1u, ultCsr->downloadAllocationsCalledCount); } diff --git a/shared/source/command_container/command_encoder.h b/shared/source/command_container/command_encoder.h index 0310fa677e..2422ce0838 100644 --- a/shared/source/command_container/command_encoder.h +++ b/shared/source/command_container/command_encoder.h @@ -52,7 +52,7 @@ struct EncodeDispatchKernelArgs { std::list *additionalCommands = nullptr; PreemptionMode preemptionMode = PreemptionMode::Initial; uint32_t partitionCount = 0u; - uint32_t postSyncImmValue = 0; + uint64_t postSyncImmValue = 0; bool isIndirect = false; bool isPredicate = false; bool isTimestampEvent = false;