diff --git a/level_zero/core/source/cmdlist/cmdlist_hw.h b/level_zero/core/source/cmdlist/cmdlist_hw.h index a8b9b25eeb..b3d3135371 100644 --- a/level_zero/core/source/cmdlist/cmdlist_hw.h +++ b/level_zero/core/source/cmdlist/cmdlist_hw.h @@ -169,7 +169,7 @@ struct CommandListCoreFamily : CommandListImp { ze_result_t appendSignalEvent(ze_event_handle_t hEvent) override; ze_result_t appendWaitOnEvents(uint32_t numEvents, ze_event_handle_t *phEvent, bool relaxedOrderingAllowed, bool trackDependencies, bool signalInOrderCompletion) override; - void appendWaitOnInOrderDependency(NEO::GraphicsAllocation *dependencyCounterAllocation, uint64_t waitValue, uint32_t offset, bool relaxedOrderingAllowed); + void appendWaitOnInOrderDependency(NEO::GraphicsAllocation *dependencyCounterAllocation, uint32_t waitValue, uint32_t offset, bool relaxedOrderingAllowed); void appendSignalInOrderDependencyCounter(); ze_result_t appendWriteGlobalTimestamp(uint64_t *dstptr, ze_event_handle_t hSignalEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) override; diff --git a/level_zero/core/source/cmdlist/cmdlist_hw.inl b/level_zero/core/source/cmdlist/cmdlist_hw.inl index e09339052e..7121aa3c35 100644 --- a/level_zero/core/source/cmdlist/cmdlist_hw.inl +++ b/level_zero/core/source/cmdlist/cmdlist_hw.inl @@ -2187,7 +2187,7 @@ ze_result_t CommandListCoreFamily::appendSignalEvent(ze_event_han } template -void CommandListCoreFamily::appendWaitOnInOrderDependency(NEO::GraphicsAllocation *dependencyCounterAllocation, uint64_t waitValue, uint32_t offset, bool relaxedOrderingAllowed) { +void CommandListCoreFamily::appendWaitOnInOrderDependency(NEO::GraphicsAllocation *dependencyCounterAllocation, uint32_t waitValue, uint32_t offset, bool relaxedOrderingAllowed) { using COMPARE_OPERATION = typename GfxFamily::MI_SEMAPHORE_WAIT::COMPARE_OPERATION; UNRECOVERABLE_IF(waitValue > std::numeric_limits::max()); @@ -2198,13 +2198,11 @@ void CommandListCoreFamily::appendWaitOnInOrderDependency(NEO::Gr for (uint32_t i = 0; i < this->partitionCount; i++) { if (relaxedOrderingAllowed) { - NEO::EncodeBatchBufferStartOrEnd::programConditionalDataMemBatchBufferStart(*commandContainer.getCommandStream(), 0, gpuAddress, static_cast(waitValue), - NEO::CompareOperation::Less, true); + NEO::EncodeBatchBufferStartOrEnd::programConditionalDataMemBatchBufferStart(*commandContainer.getCommandStream(), 0, gpuAddress, waitValue, NEO::CompareOperation::Less, true); } else { NEO::EncodeSemaphore::addMiSemaphoreWaitCommand(*commandContainer.getCommandStream(), - gpuAddress, - static_cast(waitValue), + gpuAddress, waitValue, COMPARE_OPERATION::COMPARE_OPERATION_SAD_GREATER_THAN_OR_EQUAL_SDD); } @@ -2314,14 +2312,12 @@ ze_result_t CommandListCoreFamily::appendWaitOnEvents(uint32_t nu template void CommandListCoreFamily::appendSignalInOrderDependencyCounter() { - uint64_t signalValue = this->inOrderDependencyCounter + 1; - auto lowPart = static_cast(signalValue & 0x0000FFFFFFFFULL); - auto highPart = static_cast(signalValue >> 32); + uint32_t signalValue = this->inOrderDependencyCounter + 1; uint64_t gpuVa = this->inOrderDependencyCounterAllocation->getGpuAddress() + this->inOrderAllocationOffset; NEO::EncodeStoreMemory::programStoreDataImm(*commandContainer.getCommandStream(), gpuVa, - lowPart, highPart, true, (this->partitionCount > 1)); + signalValue, 0, false, (this->partitionCount > 1)); } template diff --git a/level_zero/core/source/cmdlist/cmdlist_hw_immediate.inl b/level_zero/core/source/cmdlist/cmdlist_hw_immediate.inl index ac44c8cf03..76966d6a12 100644 --- a/level_zero/core/source/cmdlist/cmdlist_hw_immediate.inl +++ b/level_zero/core/source/cmdlist/cmdlist_hw_immediate.inl @@ -1156,10 +1156,10 @@ ze_result_t CommandListCoreFamilyImmediate::synchronizeInOrderExe bool signaled = true; - auto hostAddress = static_cast(ptrOffset(this->inOrderDependencyCounterAllocation->getUnderlyingBuffer(), this->inOrderAllocationOffset)); + auto hostAddress = static_cast(ptrOffset(this->inOrderDependencyCounterAllocation->getUnderlyingBuffer(), this->inOrderAllocationOffset)); for (uint32_t i = 0; i < this->partitionCount; i++) { - if (!NEO::WaitUtils::waitFunctionWithPredicate(hostAddress, waitValue, std::greater_equal())) { + if (!NEO::WaitUtils::waitFunctionWithPredicate(hostAddress, waitValue, std::greater_equal())) { signaled = false; break; } diff --git a/level_zero/core/source/cmdlist/cmdlist_imp.h b/level_zero/core/source/cmdlist/cmdlist_imp.h index 470e15cc3e..fdabf91f8f 100644 --- a/level_zero/core/source/cmdlist/cmdlist_imp.h +++ b/level_zero/core/source/cmdlist/cmdlist_imp.h @@ -43,7 +43,7 @@ struct CommandListImp : CommandList { protected: std::unique_ptr nonImmediateLogicalStateHelper; NEO::GraphicsAllocation *inOrderDependencyCounterAllocation = nullptr; - uint64_t inOrderDependencyCounter = 0; + uint32_t inOrderDependencyCounter = 0; uint32_t inOrderAllocationOffset = 0; bool inOrderExecutionEnabled = false; diff --git a/level_zero/core/source/event/event.cpp b/level_zero/core/source/event/event.cpp index 3fee874ca5..0708ade1a4 100644 --- a/level_zero/core/source/event/event.cpp +++ b/level_zero/core/source/event/event.cpp @@ -391,7 +391,7 @@ void Event::setIsCompleted() { unsetCmdQueue(true); } -void Event::enableInOrderExecMode(NEO::GraphicsAllocation &inOrderDependenciesAllocation, uint64_t signalValue, uint32_t allocationOffset) { +void Event::enableInOrderExecMode(NEO::GraphicsAllocation &inOrderDependenciesAllocation, uint32_t signalValue, uint32_t allocationOffset) { inOrderExecEvent = true; inOrderExecSignalValue = signalValue; diff --git a/level_zero/core/source/event/event.h b/level_zero/core/source/event/event.h index 53af86ecfb..c9917dff15 100644 --- a/level_zero/core/source/event/event.h +++ b/level_zero/core/source/event/event.h @@ -211,10 +211,10 @@ struct Event : _ze_event_handle_t { void setMetricStreamer(MetricStreamer *metricStreamer) { this->metricStreamer = metricStreamer; } - void enableInOrderExecMode(NEO::GraphicsAllocation &inOrderDependenciesAllocation, uint64_t signalValue, uint32_t allocationOffset); + void enableInOrderExecMode(NEO::GraphicsAllocation &inOrderDependenciesAllocation, uint32_t signalValue, uint32_t allocationOffset); bool isInOrderExecEvent() const { return inOrderExecEvent; } NEO::GraphicsAllocation *getInOrderExecDataAllocation() const { return inOrderExecDataAllocation; } - uint64_t getInOrderExecSignalValue() const { return inOrderExecSignalValue; } + uint32_t getInOrderExecSignalValue() const { return inOrderExecSignalValue; } uint32_t getInOrderAllocationOffset() const { return inOrderAllocationOffset; } void setLatestUsedCmdQueue(CommandQueue *newCmdQ); void setReferenceTs(NEO::TimeStampData ×tamp) { @@ -233,7 +233,7 @@ struct Event : _ze_event_handle_t { uint64_t contextEndTS = 1; NEO::TimeStampData referenceTs{}; - uint64_t inOrderExecSignalValue = 0; + uint32_t inOrderExecSignalValue = 0; uint32_t inOrderAllocationOffset = 0; std::chrono::microseconds gpuHangCheckPeriod{500'000}; diff --git a/level_zero/core/source/event/event_impl.inl b/level_zero/core/source/event/event_impl.inl index 1855ed2ece..ae71d0ac98 100644 --- a/level_zero/core/source/event/event_impl.inl +++ b/level_zero/core/source/event/event_impl.inl @@ -134,11 +134,11 @@ void EventImp::assignKernelEventCompletionData(void *address) { template ze_result_t EventImp::queryInOrderEventStatus() { - auto hostAddress = static_cast(ptrOffset(this->inOrderExecDataAllocation->getUnderlyingBuffer(), this->inOrderAllocationOffset)); + auto hostAddress = static_cast(ptrOffset(this->inOrderExecDataAllocation->getUnderlyingBuffer(), this->inOrderAllocationOffset)); bool signaled = true; for (uint32_t i = 0; i < this->getPacketsInUse(); i++) { - if (!NEO::WaitUtils::waitFunctionWithPredicate(hostAddress, this->inOrderExecSignalValue, std::greater_equal())) { + if (!NEO::WaitUtils::waitFunctionWithPredicate(hostAddress, this->inOrderExecSignalValue, std::greater_equal())) { signaled = false; break; } diff --git a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_launch_kernel_3.cpp b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_launch_kernel_3.cpp index 9696391fe6..601d08dcd8 100644 --- a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_launch_kernel_3.cpp +++ b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_launch_kernel_3.cpp @@ -1112,12 +1112,11 @@ HWTEST2_F(InOrderCmdListTests, givenInOrderModeWhenProgrammingWalkerThenSignalSy ASSERT_NE(nullptr, sdiCmd); EXPECT_EQ(immCmdList->inOrderDependencyCounterAllocation->getGpuAddress() + counterOffset, sdiCmd->getAddress()); - EXPECT_EQ(1u, sdiCmd->getStoreQword()); + EXPECT_EQ(0u, sdiCmd->getStoreQword()); EXPECT_EQ(2u, sdiCmd->getDataDword0()); - EXPECT_EQ(0u, sdiCmd->getDataDword1()); } - auto hostAddress = static_cast(ptrOffset(immCmdList->inOrderDependencyCounterAllocation->getUnderlyingBuffer(), counterOffset)); + auto hostAddress = static_cast(ptrOffset(immCmdList->inOrderDependencyCounterAllocation->getUnderlyingBuffer(), counterOffset)); *hostAddress = 1; EXPECT_EQ(ZE_RESULT_NOT_READY, events[0]->hostSynchronize(1)); @@ -1169,7 +1168,7 @@ HWTEST2_F(InOrderCmdListTests, givenInOrderModeWhenProgrammingTimestampEventThen ASSERT_NE(nullptr, sdiCmd); EXPECT_EQ(immCmdList->inOrderDependencyCounterAllocation->getGpuAddress(), sdiCmd->getAddress()); - EXPECT_EQ(1u, sdiCmd->getStoreQword()); + EXPECT_EQ(0u, sdiCmd->getStoreQword()); EXPECT_EQ(1u, sdiCmd->getDataDword0()); } @@ -1221,7 +1220,7 @@ HWTEST2_F(InOrderCmdListTests, givenInOrderModeWhenProgrammingAppendSignalEventT ASSERT_NE(nullptr, sdiCmd); EXPECT_EQ(inOrderSyncVa, sdiCmd->getAddress()); - EXPECT_EQ(1u, sdiCmd->getStoreQword()); + EXPECT_EQ(0u, sdiCmd->getStoreQword()); EXPECT_EQ(2u, sdiCmd->getDataDword0()); EXPECT_EQ(0u, sdiCmd->getDataDword1()); } @@ -1295,7 +1294,7 @@ HWTEST2_F(InOrderCmdListTests, givenCopyOnlyInOrderModeWhenProgrammingCopyThenSi uint64_t syncVa = immCmdList->inOrderDependencyCounterAllocation->getGpuAddress(); EXPECT_EQ(syncVa, sdiCmd->getAddress()); - EXPECT_EQ(1u, sdiCmd->getStoreQword()); + EXPECT_EQ(0u, sdiCmd->getStoreQword()); EXPECT_EQ(2u, sdiCmd->getDataDword0()); EXPECT_EQ(0u, sdiCmd->getDataDword1()); } @@ -1332,7 +1331,7 @@ HWTEST2_F(InOrderCmdListTests, givenCopyOnlyInOrderModeWhenProgrammingCopyRegion uint64_t syncVa = immCmdList->inOrderDependencyCounterAllocation->getGpuAddress(); EXPECT_EQ(syncVa, sdiCmd->getAddress()); - EXPECT_EQ(1u, sdiCmd->getStoreQword()); + EXPECT_EQ(0u, sdiCmd->getStoreQword()); EXPECT_EQ(2u, sdiCmd->getDataDword0()); EXPECT_EQ(0u, sdiCmd->getDataDword1()); } @@ -1365,7 +1364,7 @@ HWTEST2_F(InOrderCmdListTests, givenInOrderModeWhenProgrammingAppendWaitOnEvents auto sdiCmd = genCmdCast(*sdiItor); EXPECT_EQ(immCmdList->inOrderDependencyCounterAllocation->getGpuAddress(), sdiCmd->getAddress()); - EXPECT_EQ(1u, sdiCmd->getStoreQword()); + EXPECT_EQ(0u, sdiCmd->getStoreQword()); EXPECT_EQ(2u, sdiCmd->getDataDword0()); } @@ -1448,7 +1447,7 @@ HWTEST2_F(InOrderCmdListTests, givenCopyOnlyInOrderModeWhenProgrammingBarrierThe auto sdiCmd = genCmdCast(*sdiItor); EXPECT_EQ(immCmdList->inOrderDependencyCounterAllocation->getGpuAddress(), sdiCmd->getAddress()); - EXPECT_EQ(1u, sdiCmd->getStoreQword()); + EXPECT_EQ(0u, sdiCmd->getStoreQword()); EXPECT_EQ(2u, sdiCmd->getDataDword0()); EXPECT_EQ(0u, sdiCmd->getDataDword1()); } @@ -1485,7 +1484,7 @@ HWTEST2_F(InOrderCmdListTests, givenInOrderModeWhenProgrammingAppendBarrierWithW auto sdiCmd = genCmdCast(*sdiItor); EXPECT_EQ(immCmdList->inOrderDependencyCounterAllocation->getGpuAddress(), sdiCmd->getAddress()); - EXPECT_EQ(1u, sdiCmd->getStoreQword()); + EXPECT_EQ(0u, sdiCmd->getStoreQword()); EXPECT_EQ(2u, sdiCmd->getDataDword0()); EXPECT_EQ(0u, sdiCmd->getDataDword1()); } @@ -1547,7 +1546,7 @@ HWTEST2_F(InOrderCmdListTests, givenInOrderModeWhenProgrammingAppendBarrierWitho auto sdiCmd = genCmdCast(*sdiItor); EXPECT_EQ(immCmdList->inOrderDependencyCounterAllocation->getGpuAddress(), sdiCmd->getAddress()); - EXPECT_EQ(1u, sdiCmd->getStoreQword()); + EXPECT_EQ(0u, sdiCmd->getStoreQword()); EXPECT_EQ(2u, sdiCmd->getDataDword0()); EXPECT_EQ(0u, sdiCmd->getDataDword1()); } @@ -1562,7 +1561,7 @@ HWTEST2_F(InOrderCmdListTests, givenInOrderModeWhenCallingSyncThenHandleCompleti immCmdList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams, false); - auto hostAddress = static_cast(ptrOffset(immCmdList->inOrderDependencyCounterAllocation->getUnderlyingBuffer(), counterOffset)); + auto hostAddress = static_cast(ptrOffset(immCmdList->inOrderDependencyCounterAllocation->getUnderlyingBuffer(), counterOffset)); *hostAddress = 0; const uint32_t failCounter = 3; @@ -1629,7 +1628,7 @@ HWTEST2_F(InOrderCmdListTests, givenInOrderModeWhenDoingCpuCopyThenSynchronize, auto eventHandle = events[0]->toHandle(); - auto hostAddress = static_cast(immCmdList->inOrderDependencyCounterAllocation->getUnderlyingBuffer()); + auto hostAddress = static_cast(immCmdList->inOrderDependencyCounterAllocation->getUnderlyingBuffer()); *hostAddress = 0; const uint32_t failCounter = 3; @@ -1672,7 +1671,7 @@ HWTEST2_F(InOrderCmdListTests, givenInOrderModeWhenGpuHangDetectedInCpuCopyPathT auto ultCsr = static_cast *>(device->getNEODevice()->getDefaultEngine().commandStreamReceiver); - auto hostAddress = static_cast(immCmdList->inOrderDependencyCounterAllocation->getUnderlyingBuffer()); + auto hostAddress = static_cast(immCmdList->inOrderDependencyCounterAllocation->getUnderlyingBuffer()); *hostAddress = 0; immCmdList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams, false); @@ -1728,7 +1727,7 @@ HWTEST2_F(InOrderCmdListTests, givenInOrderModeWhenProgrammingKernelSplitWithout ASSERT_NE(nullptr, sdiCmd); EXPECT_EQ(immCmdList->inOrderDependencyCounterAllocation->getGpuAddress(), sdiCmd->getAddress()); - EXPECT_EQ(1u, sdiCmd->getStoreQword()); + EXPECT_EQ(0u, sdiCmd->getStoreQword()); EXPECT_EQ(1u, sdiCmd->getDataDword0()); alignedFree(alignedPtr); @@ -1778,7 +1777,7 @@ HWTEST2_F(InOrderCmdListTests, givenInOrderModeWhenProgrammingKernelSplitWithEve ASSERT_NE(nullptr, sdiCmd); EXPECT_EQ(immCmdList->inOrderDependencyCounterAllocation->getGpuAddress(), sdiCmd->getAddress()); - EXPECT_EQ(1u, sdiCmd->getStoreQword()); + EXPECT_EQ(0u, sdiCmd->getStoreQword()); EXPECT_EQ(1u, sdiCmd->getDataDword0()); alignedFree(alignedPtr); @@ -1867,8 +1866,8 @@ HWTEST2_F(MultiTileInOrderCmdListTests, givenMultiTileInOrderModeWhenCallingSync immCmdList->appendLaunchKernel(kernel->toHandle(), &groupCount, events[0]->toHandle(), 0, nullptr, launchParams, false); - auto hostAddress0 = static_cast(immCmdList->inOrderDependencyCounterAllocation->getUnderlyingBuffer()); - auto hostAddress1 = hostAddress0++; + auto hostAddress0 = static_cast(immCmdList->inOrderDependencyCounterAllocation->getUnderlyingBuffer()); + auto hostAddress1 = ptrOffset(hostAddress0, sizeof(uint64_t)); *hostAddress0 = 0; *hostAddress1 = 0; @@ -2080,7 +2079,7 @@ HWTEST2_F(BcsSplitInOrderCmdListTests, givenBcsSplitEnabledWhenDispatchingCopyTh auto gpuAddress = immCmdList->inOrderDependencyCounterAllocation->getGpuAddress(); EXPECT_EQ(gpuAddress, sdiCmd->getAddress()); - EXPECT_EQ(1u, sdiCmd->getStoreQword()); + EXPECT_EQ(0u, sdiCmd->getStoreQword()); EXPECT_EQ(1u, sdiCmd->getDataDword0()); EXPECT_EQ(0u, sdiCmd->getDataDword1()); } @@ -2120,7 +2119,7 @@ HWTEST2_F(BcsSplitInOrderCmdListTests, givenBcsSplitEnabledWhenDispatchingCopyRe auto gpuAddress = immCmdList->inOrderDependencyCounterAllocation->getGpuAddress(); EXPECT_EQ(gpuAddress, sdiCmd->getAddress()); - EXPECT_EQ(1u, sdiCmd->getStoreQword()); + EXPECT_EQ(0u, sdiCmd->getStoreQword()); EXPECT_EQ(1u, sdiCmd->getDataDword0()); EXPECT_EQ(0u, sdiCmd->getDataDword1()); }