mirror of
https://github.com/intel/compute-runtime.git
synced 2025-12-30 09:58:55 +08:00
feature: change in-order allocation type to uint32_t
Related-To: NEO-7966 Signed-off-by: Dunajski, Bartosz <bartosz.dunajski@intel.com>
This commit is contained in:
committed by
Compute-Runtime-Automation
parent
0f47295a23
commit
c18198ebd8
@@ -169,7 +169,7 @@ struct CommandListCoreFamily : CommandListImp {
|
||||
|
||||
ze_result_t appendSignalEvent(ze_event_handle_t hEvent) override;
|
||||
ze_result_t appendWaitOnEvents(uint32_t numEvents, ze_event_handle_t *phEvent, bool relaxedOrderingAllowed, bool trackDependencies, bool signalInOrderCompletion) override;
|
||||
void appendWaitOnInOrderDependency(NEO::GraphicsAllocation *dependencyCounterAllocation, uint64_t waitValue, uint32_t offset, bool relaxedOrderingAllowed);
|
||||
void appendWaitOnInOrderDependency(NEO::GraphicsAllocation *dependencyCounterAllocation, uint32_t waitValue, uint32_t offset, bool relaxedOrderingAllowed);
|
||||
void appendSignalInOrderDependencyCounter();
|
||||
ze_result_t appendWriteGlobalTimestamp(uint64_t *dstptr, ze_event_handle_t hSignalEvent,
|
||||
uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) override;
|
||||
|
||||
@@ -2187,7 +2187,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendSignalEvent(ze_event_han
|
||||
}
|
||||
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
void CommandListCoreFamily<gfxCoreFamily>::appendWaitOnInOrderDependency(NEO::GraphicsAllocation *dependencyCounterAllocation, uint64_t waitValue, uint32_t offset, bool relaxedOrderingAllowed) {
|
||||
void CommandListCoreFamily<gfxCoreFamily>::appendWaitOnInOrderDependency(NEO::GraphicsAllocation *dependencyCounterAllocation, uint32_t waitValue, uint32_t offset, bool relaxedOrderingAllowed) {
|
||||
using COMPARE_OPERATION = typename GfxFamily::MI_SEMAPHORE_WAIT::COMPARE_OPERATION;
|
||||
|
||||
UNRECOVERABLE_IF(waitValue > std::numeric_limits<uint32_t>::max());
|
||||
@@ -2198,13 +2198,11 @@ void CommandListCoreFamily<gfxCoreFamily>::appendWaitOnInOrderDependency(NEO::Gr
|
||||
|
||||
for (uint32_t i = 0; i < this->partitionCount; i++) {
|
||||
if (relaxedOrderingAllowed) {
|
||||
NEO::EncodeBatchBufferStartOrEnd<GfxFamily>::programConditionalDataMemBatchBufferStart(*commandContainer.getCommandStream(), 0, gpuAddress, static_cast<uint32_t>(waitValue),
|
||||
NEO::CompareOperation::Less, true);
|
||||
NEO::EncodeBatchBufferStartOrEnd<GfxFamily>::programConditionalDataMemBatchBufferStart(*commandContainer.getCommandStream(), 0, gpuAddress, waitValue, NEO::CompareOperation::Less, true);
|
||||
|
||||
} else {
|
||||
NEO::EncodeSemaphore<GfxFamily>::addMiSemaphoreWaitCommand(*commandContainer.getCommandStream(),
|
||||
gpuAddress,
|
||||
static_cast<uint32_t>(waitValue),
|
||||
gpuAddress, waitValue,
|
||||
COMPARE_OPERATION::COMPARE_OPERATION_SAD_GREATER_THAN_OR_EQUAL_SDD);
|
||||
}
|
||||
|
||||
@@ -2314,14 +2312,12 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendWaitOnEvents(uint32_t nu
|
||||
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
void CommandListCoreFamily<gfxCoreFamily>::appendSignalInOrderDependencyCounter() {
|
||||
uint64_t signalValue = this->inOrderDependencyCounter + 1;
|
||||
auto lowPart = static_cast<uint32_t>(signalValue & 0x0000FFFFFFFFULL);
|
||||
auto highPart = static_cast<uint32_t>(signalValue >> 32);
|
||||
uint32_t signalValue = this->inOrderDependencyCounter + 1;
|
||||
|
||||
uint64_t gpuVa = this->inOrderDependencyCounterAllocation->getGpuAddress() + this->inOrderAllocationOffset;
|
||||
|
||||
NEO::EncodeStoreMemory<GfxFamily>::programStoreDataImm(*commandContainer.getCommandStream(), gpuVa,
|
||||
lowPart, highPart, true, (this->partitionCount > 1));
|
||||
signalValue, 0, false, (this->partitionCount > 1));
|
||||
}
|
||||
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
|
||||
@@ -1156,10 +1156,10 @@ ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::synchronizeInOrderExe
|
||||
|
||||
bool signaled = true;
|
||||
|
||||
auto hostAddress = static_cast<uint64_t *>(ptrOffset(this->inOrderDependencyCounterAllocation->getUnderlyingBuffer(), this->inOrderAllocationOffset));
|
||||
auto hostAddress = static_cast<uint32_t *>(ptrOffset(this->inOrderDependencyCounterAllocation->getUnderlyingBuffer(), this->inOrderAllocationOffset));
|
||||
|
||||
for (uint32_t i = 0; i < this->partitionCount; i++) {
|
||||
if (!NEO::WaitUtils::waitFunctionWithPredicate<const uint64_t>(hostAddress, waitValue, std::greater_equal<uint64_t>())) {
|
||||
if (!NEO::WaitUtils::waitFunctionWithPredicate<const uint32_t>(hostAddress, waitValue, std::greater_equal<uint32_t>())) {
|
||||
signaled = false;
|
||||
break;
|
||||
}
|
||||
|
||||
@@ -43,7 +43,7 @@ struct CommandListImp : CommandList {
|
||||
protected:
|
||||
std::unique_ptr<NEO::LogicalStateHelper> nonImmediateLogicalStateHelper;
|
||||
NEO::GraphicsAllocation *inOrderDependencyCounterAllocation = nullptr;
|
||||
uint64_t inOrderDependencyCounter = 0;
|
||||
uint32_t inOrderDependencyCounter = 0;
|
||||
uint32_t inOrderAllocationOffset = 0;
|
||||
bool inOrderExecutionEnabled = false;
|
||||
|
||||
|
||||
@@ -391,7 +391,7 @@ void Event::setIsCompleted() {
|
||||
unsetCmdQueue(true);
|
||||
}
|
||||
|
||||
void Event::enableInOrderExecMode(NEO::GraphicsAllocation &inOrderDependenciesAllocation, uint64_t signalValue, uint32_t allocationOffset) {
|
||||
void Event::enableInOrderExecMode(NEO::GraphicsAllocation &inOrderDependenciesAllocation, uint32_t signalValue, uint32_t allocationOffset) {
|
||||
inOrderExecEvent = true;
|
||||
|
||||
inOrderExecSignalValue = signalValue;
|
||||
|
||||
@@ -211,10 +211,10 @@ struct Event : _ze_event_handle_t {
|
||||
void setMetricStreamer(MetricStreamer *metricStreamer) {
|
||||
this->metricStreamer = metricStreamer;
|
||||
}
|
||||
void enableInOrderExecMode(NEO::GraphicsAllocation &inOrderDependenciesAllocation, uint64_t signalValue, uint32_t allocationOffset);
|
||||
void enableInOrderExecMode(NEO::GraphicsAllocation &inOrderDependenciesAllocation, uint32_t signalValue, uint32_t allocationOffset);
|
||||
bool isInOrderExecEvent() const { return inOrderExecEvent; }
|
||||
NEO::GraphicsAllocation *getInOrderExecDataAllocation() const { return inOrderExecDataAllocation; }
|
||||
uint64_t getInOrderExecSignalValue() const { return inOrderExecSignalValue; }
|
||||
uint32_t getInOrderExecSignalValue() const { return inOrderExecSignalValue; }
|
||||
uint32_t getInOrderAllocationOffset() const { return inOrderAllocationOffset; }
|
||||
void setLatestUsedCmdQueue(CommandQueue *newCmdQ);
|
||||
void setReferenceTs(NEO::TimeStampData ×tamp) {
|
||||
@@ -233,7 +233,7 @@ struct Event : _ze_event_handle_t {
|
||||
uint64_t contextEndTS = 1;
|
||||
NEO::TimeStampData referenceTs{};
|
||||
|
||||
uint64_t inOrderExecSignalValue = 0;
|
||||
uint32_t inOrderExecSignalValue = 0;
|
||||
uint32_t inOrderAllocationOffset = 0;
|
||||
|
||||
std::chrono::microseconds gpuHangCheckPeriod{500'000};
|
||||
|
||||
@@ -134,11 +134,11 @@ void EventImp<TagSizeT>::assignKernelEventCompletionData(void *address) {
|
||||
|
||||
template <typename TagSizeT>
|
||||
ze_result_t EventImp<TagSizeT>::queryInOrderEventStatus() {
|
||||
auto hostAddress = static_cast<uint64_t *>(ptrOffset(this->inOrderExecDataAllocation->getUnderlyingBuffer(), this->inOrderAllocationOffset));
|
||||
auto hostAddress = static_cast<uint32_t *>(ptrOffset(this->inOrderExecDataAllocation->getUnderlyingBuffer(), this->inOrderAllocationOffset));
|
||||
bool signaled = true;
|
||||
|
||||
for (uint32_t i = 0; i < this->getPacketsInUse(); i++) {
|
||||
if (!NEO::WaitUtils::waitFunctionWithPredicate<const uint64_t>(hostAddress, this->inOrderExecSignalValue, std::greater_equal<uint64_t>())) {
|
||||
if (!NEO::WaitUtils::waitFunctionWithPredicate<const uint32_t>(hostAddress, this->inOrderExecSignalValue, std::greater_equal<uint32_t>())) {
|
||||
signaled = false;
|
||||
break;
|
||||
}
|
||||
|
||||
@@ -1112,12 +1112,11 @@ HWTEST2_F(InOrderCmdListTests, givenInOrderModeWhenProgrammingWalkerThenSignalSy
|
||||
ASSERT_NE(nullptr, sdiCmd);
|
||||
|
||||
EXPECT_EQ(immCmdList->inOrderDependencyCounterAllocation->getGpuAddress() + counterOffset, sdiCmd->getAddress());
|
||||
EXPECT_EQ(1u, sdiCmd->getStoreQword());
|
||||
EXPECT_EQ(0u, sdiCmd->getStoreQword());
|
||||
EXPECT_EQ(2u, sdiCmd->getDataDword0());
|
||||
EXPECT_EQ(0u, sdiCmd->getDataDword1());
|
||||
}
|
||||
|
||||
auto hostAddress = static_cast<uint64_t *>(ptrOffset(immCmdList->inOrderDependencyCounterAllocation->getUnderlyingBuffer(), counterOffset));
|
||||
auto hostAddress = static_cast<uint32_t *>(ptrOffset(immCmdList->inOrderDependencyCounterAllocation->getUnderlyingBuffer(), counterOffset));
|
||||
|
||||
*hostAddress = 1;
|
||||
EXPECT_EQ(ZE_RESULT_NOT_READY, events[0]->hostSynchronize(1));
|
||||
@@ -1169,7 +1168,7 @@ HWTEST2_F(InOrderCmdListTests, givenInOrderModeWhenProgrammingTimestampEventThen
|
||||
ASSERT_NE(nullptr, sdiCmd);
|
||||
|
||||
EXPECT_EQ(immCmdList->inOrderDependencyCounterAllocation->getGpuAddress(), sdiCmd->getAddress());
|
||||
EXPECT_EQ(1u, sdiCmd->getStoreQword());
|
||||
EXPECT_EQ(0u, sdiCmd->getStoreQword());
|
||||
EXPECT_EQ(1u, sdiCmd->getDataDword0());
|
||||
}
|
||||
|
||||
@@ -1221,7 +1220,7 @@ HWTEST2_F(InOrderCmdListTests, givenInOrderModeWhenProgrammingAppendSignalEventT
|
||||
ASSERT_NE(nullptr, sdiCmd);
|
||||
|
||||
EXPECT_EQ(inOrderSyncVa, sdiCmd->getAddress());
|
||||
EXPECT_EQ(1u, sdiCmd->getStoreQword());
|
||||
EXPECT_EQ(0u, sdiCmd->getStoreQword());
|
||||
EXPECT_EQ(2u, sdiCmd->getDataDword0());
|
||||
EXPECT_EQ(0u, sdiCmd->getDataDword1());
|
||||
}
|
||||
@@ -1295,7 +1294,7 @@ HWTEST2_F(InOrderCmdListTests, givenCopyOnlyInOrderModeWhenProgrammingCopyThenSi
|
||||
uint64_t syncVa = immCmdList->inOrderDependencyCounterAllocation->getGpuAddress();
|
||||
|
||||
EXPECT_EQ(syncVa, sdiCmd->getAddress());
|
||||
EXPECT_EQ(1u, sdiCmd->getStoreQword());
|
||||
EXPECT_EQ(0u, sdiCmd->getStoreQword());
|
||||
EXPECT_EQ(2u, sdiCmd->getDataDword0());
|
||||
EXPECT_EQ(0u, sdiCmd->getDataDword1());
|
||||
}
|
||||
@@ -1332,7 +1331,7 @@ HWTEST2_F(InOrderCmdListTests, givenCopyOnlyInOrderModeWhenProgrammingCopyRegion
|
||||
uint64_t syncVa = immCmdList->inOrderDependencyCounterAllocation->getGpuAddress();
|
||||
|
||||
EXPECT_EQ(syncVa, sdiCmd->getAddress());
|
||||
EXPECT_EQ(1u, sdiCmd->getStoreQword());
|
||||
EXPECT_EQ(0u, sdiCmd->getStoreQword());
|
||||
EXPECT_EQ(2u, sdiCmd->getDataDword0());
|
||||
EXPECT_EQ(0u, sdiCmd->getDataDword1());
|
||||
}
|
||||
@@ -1365,7 +1364,7 @@ HWTEST2_F(InOrderCmdListTests, givenInOrderModeWhenProgrammingAppendWaitOnEvents
|
||||
auto sdiCmd = genCmdCast<MI_STORE_DATA_IMM *>(*sdiItor);
|
||||
|
||||
EXPECT_EQ(immCmdList->inOrderDependencyCounterAllocation->getGpuAddress(), sdiCmd->getAddress());
|
||||
EXPECT_EQ(1u, sdiCmd->getStoreQword());
|
||||
EXPECT_EQ(0u, sdiCmd->getStoreQword());
|
||||
EXPECT_EQ(2u, sdiCmd->getDataDword0());
|
||||
}
|
||||
|
||||
@@ -1448,7 +1447,7 @@ HWTEST2_F(InOrderCmdListTests, givenCopyOnlyInOrderModeWhenProgrammingBarrierThe
|
||||
auto sdiCmd = genCmdCast<MI_STORE_DATA_IMM *>(*sdiItor);
|
||||
|
||||
EXPECT_EQ(immCmdList->inOrderDependencyCounterAllocation->getGpuAddress(), sdiCmd->getAddress());
|
||||
EXPECT_EQ(1u, sdiCmd->getStoreQword());
|
||||
EXPECT_EQ(0u, sdiCmd->getStoreQword());
|
||||
EXPECT_EQ(2u, sdiCmd->getDataDword0());
|
||||
EXPECT_EQ(0u, sdiCmd->getDataDword1());
|
||||
}
|
||||
@@ -1485,7 +1484,7 @@ HWTEST2_F(InOrderCmdListTests, givenInOrderModeWhenProgrammingAppendBarrierWithW
|
||||
auto sdiCmd = genCmdCast<MI_STORE_DATA_IMM *>(*sdiItor);
|
||||
|
||||
EXPECT_EQ(immCmdList->inOrderDependencyCounterAllocation->getGpuAddress(), sdiCmd->getAddress());
|
||||
EXPECT_EQ(1u, sdiCmd->getStoreQword());
|
||||
EXPECT_EQ(0u, sdiCmd->getStoreQword());
|
||||
EXPECT_EQ(2u, sdiCmd->getDataDword0());
|
||||
EXPECT_EQ(0u, sdiCmd->getDataDword1());
|
||||
}
|
||||
@@ -1547,7 +1546,7 @@ HWTEST2_F(InOrderCmdListTests, givenInOrderModeWhenProgrammingAppendBarrierWitho
|
||||
auto sdiCmd = genCmdCast<MI_STORE_DATA_IMM *>(*sdiItor);
|
||||
|
||||
EXPECT_EQ(immCmdList->inOrderDependencyCounterAllocation->getGpuAddress(), sdiCmd->getAddress());
|
||||
EXPECT_EQ(1u, sdiCmd->getStoreQword());
|
||||
EXPECT_EQ(0u, sdiCmd->getStoreQword());
|
||||
EXPECT_EQ(2u, sdiCmd->getDataDword0());
|
||||
EXPECT_EQ(0u, sdiCmd->getDataDword1());
|
||||
}
|
||||
@@ -1562,7 +1561,7 @@ HWTEST2_F(InOrderCmdListTests, givenInOrderModeWhenCallingSyncThenHandleCompleti
|
||||
|
||||
immCmdList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams, false);
|
||||
|
||||
auto hostAddress = static_cast<uint64_t *>(ptrOffset(immCmdList->inOrderDependencyCounterAllocation->getUnderlyingBuffer(), counterOffset));
|
||||
auto hostAddress = static_cast<uint32_t *>(ptrOffset(immCmdList->inOrderDependencyCounterAllocation->getUnderlyingBuffer(), counterOffset));
|
||||
*hostAddress = 0;
|
||||
|
||||
const uint32_t failCounter = 3;
|
||||
@@ -1629,7 +1628,7 @@ HWTEST2_F(InOrderCmdListTests, givenInOrderModeWhenDoingCpuCopyThenSynchronize,
|
||||
|
||||
auto eventHandle = events[0]->toHandle();
|
||||
|
||||
auto hostAddress = static_cast<uint64_t *>(immCmdList->inOrderDependencyCounterAllocation->getUnderlyingBuffer());
|
||||
auto hostAddress = static_cast<uint32_t *>(immCmdList->inOrderDependencyCounterAllocation->getUnderlyingBuffer());
|
||||
*hostAddress = 0;
|
||||
|
||||
const uint32_t failCounter = 3;
|
||||
@@ -1672,7 +1671,7 @@ HWTEST2_F(InOrderCmdListTests, givenInOrderModeWhenGpuHangDetectedInCpuCopyPathT
|
||||
|
||||
auto ultCsr = static_cast<UltCommandStreamReceiver<FamilyType> *>(device->getNEODevice()->getDefaultEngine().commandStreamReceiver);
|
||||
|
||||
auto hostAddress = static_cast<uint64_t *>(immCmdList->inOrderDependencyCounterAllocation->getUnderlyingBuffer());
|
||||
auto hostAddress = static_cast<uint32_t *>(immCmdList->inOrderDependencyCounterAllocation->getUnderlyingBuffer());
|
||||
*hostAddress = 0;
|
||||
|
||||
immCmdList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams, false);
|
||||
@@ -1728,7 +1727,7 @@ HWTEST2_F(InOrderCmdListTests, givenInOrderModeWhenProgrammingKernelSplitWithout
|
||||
ASSERT_NE(nullptr, sdiCmd);
|
||||
|
||||
EXPECT_EQ(immCmdList->inOrderDependencyCounterAllocation->getGpuAddress(), sdiCmd->getAddress());
|
||||
EXPECT_EQ(1u, sdiCmd->getStoreQword());
|
||||
EXPECT_EQ(0u, sdiCmd->getStoreQword());
|
||||
EXPECT_EQ(1u, sdiCmd->getDataDword0());
|
||||
|
||||
alignedFree(alignedPtr);
|
||||
@@ -1778,7 +1777,7 @@ HWTEST2_F(InOrderCmdListTests, givenInOrderModeWhenProgrammingKernelSplitWithEve
|
||||
ASSERT_NE(nullptr, sdiCmd);
|
||||
|
||||
EXPECT_EQ(immCmdList->inOrderDependencyCounterAllocation->getGpuAddress(), sdiCmd->getAddress());
|
||||
EXPECT_EQ(1u, sdiCmd->getStoreQword());
|
||||
EXPECT_EQ(0u, sdiCmd->getStoreQword());
|
||||
EXPECT_EQ(1u, sdiCmd->getDataDword0());
|
||||
|
||||
alignedFree(alignedPtr);
|
||||
@@ -1867,8 +1866,8 @@ HWTEST2_F(MultiTileInOrderCmdListTests, givenMultiTileInOrderModeWhenCallingSync
|
||||
|
||||
immCmdList->appendLaunchKernel(kernel->toHandle(), &groupCount, events[0]->toHandle(), 0, nullptr, launchParams, false);
|
||||
|
||||
auto hostAddress0 = static_cast<uint64_t *>(immCmdList->inOrderDependencyCounterAllocation->getUnderlyingBuffer());
|
||||
auto hostAddress1 = hostAddress0++;
|
||||
auto hostAddress0 = static_cast<uint32_t *>(immCmdList->inOrderDependencyCounterAllocation->getUnderlyingBuffer());
|
||||
auto hostAddress1 = ptrOffset(hostAddress0, sizeof(uint64_t));
|
||||
|
||||
*hostAddress0 = 0;
|
||||
*hostAddress1 = 0;
|
||||
@@ -2080,7 +2079,7 @@ HWTEST2_F(BcsSplitInOrderCmdListTests, givenBcsSplitEnabledWhenDispatchingCopyTh
|
||||
auto gpuAddress = immCmdList->inOrderDependencyCounterAllocation->getGpuAddress();
|
||||
|
||||
EXPECT_EQ(gpuAddress, sdiCmd->getAddress());
|
||||
EXPECT_EQ(1u, sdiCmd->getStoreQword());
|
||||
EXPECT_EQ(0u, sdiCmd->getStoreQword());
|
||||
EXPECT_EQ(1u, sdiCmd->getDataDword0());
|
||||
EXPECT_EQ(0u, sdiCmd->getDataDword1());
|
||||
}
|
||||
@@ -2120,7 +2119,7 @@ HWTEST2_F(BcsSplitInOrderCmdListTests, givenBcsSplitEnabledWhenDispatchingCopyRe
|
||||
auto gpuAddress = immCmdList->inOrderDependencyCounterAllocation->getGpuAddress();
|
||||
|
||||
EXPECT_EQ(gpuAddress, sdiCmd->getAddress());
|
||||
EXPECT_EQ(1u, sdiCmd->getStoreQword());
|
||||
EXPECT_EQ(0u, sdiCmd->getStoreQword());
|
||||
EXPECT_EQ(1u, sdiCmd->getDataDword0());
|
||||
EXPECT_EQ(0u, sdiCmd->getDataDword1());
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user