feature: change in-order allocation type to uint32_t

Related-To: NEO-7966

Signed-off-by: Dunajski, Bartosz <bartosz.dunajski@intel.com>
This commit is contained in:
Dunajski, Bartosz
2023-07-04 13:45:29 +00:00
committed by Compute-Runtime-Automation
parent 0f47295a23
commit c18198ebd8
8 changed files with 34 additions and 39 deletions

View File

@@ -169,7 +169,7 @@ struct CommandListCoreFamily : CommandListImp {
ze_result_t appendSignalEvent(ze_event_handle_t hEvent) override;
ze_result_t appendWaitOnEvents(uint32_t numEvents, ze_event_handle_t *phEvent, bool relaxedOrderingAllowed, bool trackDependencies, bool signalInOrderCompletion) override;
void appendWaitOnInOrderDependency(NEO::GraphicsAllocation *dependencyCounterAllocation, uint64_t waitValue, uint32_t offset, bool relaxedOrderingAllowed);
void appendWaitOnInOrderDependency(NEO::GraphicsAllocation *dependencyCounterAllocation, uint32_t waitValue, uint32_t offset, bool relaxedOrderingAllowed);
void appendSignalInOrderDependencyCounter();
ze_result_t appendWriteGlobalTimestamp(uint64_t *dstptr, ze_event_handle_t hSignalEvent,
uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) override;

View File

@@ -2187,7 +2187,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendSignalEvent(ze_event_han
}
template <GFXCORE_FAMILY gfxCoreFamily>
void CommandListCoreFamily<gfxCoreFamily>::appendWaitOnInOrderDependency(NEO::GraphicsAllocation *dependencyCounterAllocation, uint64_t waitValue, uint32_t offset, bool relaxedOrderingAllowed) {
void CommandListCoreFamily<gfxCoreFamily>::appendWaitOnInOrderDependency(NEO::GraphicsAllocation *dependencyCounterAllocation, uint32_t waitValue, uint32_t offset, bool relaxedOrderingAllowed) {
using COMPARE_OPERATION = typename GfxFamily::MI_SEMAPHORE_WAIT::COMPARE_OPERATION;
UNRECOVERABLE_IF(waitValue > std::numeric_limits<uint32_t>::max());
@@ -2198,13 +2198,11 @@ void CommandListCoreFamily<gfxCoreFamily>::appendWaitOnInOrderDependency(NEO::Gr
for (uint32_t i = 0; i < this->partitionCount; i++) {
if (relaxedOrderingAllowed) {
NEO::EncodeBatchBufferStartOrEnd<GfxFamily>::programConditionalDataMemBatchBufferStart(*commandContainer.getCommandStream(), 0, gpuAddress, static_cast<uint32_t>(waitValue),
NEO::CompareOperation::Less, true);
NEO::EncodeBatchBufferStartOrEnd<GfxFamily>::programConditionalDataMemBatchBufferStart(*commandContainer.getCommandStream(), 0, gpuAddress, waitValue, NEO::CompareOperation::Less, true);
} else {
NEO::EncodeSemaphore<GfxFamily>::addMiSemaphoreWaitCommand(*commandContainer.getCommandStream(),
gpuAddress,
static_cast<uint32_t>(waitValue),
gpuAddress, waitValue,
COMPARE_OPERATION::COMPARE_OPERATION_SAD_GREATER_THAN_OR_EQUAL_SDD);
}
@@ -2314,14 +2312,12 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendWaitOnEvents(uint32_t nu
template <GFXCORE_FAMILY gfxCoreFamily>
void CommandListCoreFamily<gfxCoreFamily>::appendSignalInOrderDependencyCounter() {
uint64_t signalValue = this->inOrderDependencyCounter + 1;
auto lowPart = static_cast<uint32_t>(signalValue & 0x0000FFFFFFFFULL);
auto highPart = static_cast<uint32_t>(signalValue >> 32);
uint32_t signalValue = this->inOrderDependencyCounter + 1;
uint64_t gpuVa = this->inOrderDependencyCounterAllocation->getGpuAddress() + this->inOrderAllocationOffset;
NEO::EncodeStoreMemory<GfxFamily>::programStoreDataImm(*commandContainer.getCommandStream(), gpuVa,
lowPart, highPart, true, (this->partitionCount > 1));
signalValue, 0, false, (this->partitionCount > 1));
}
template <GFXCORE_FAMILY gfxCoreFamily>

View File

@@ -1156,10 +1156,10 @@ ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::synchronizeInOrderExe
bool signaled = true;
auto hostAddress = static_cast<uint64_t *>(ptrOffset(this->inOrderDependencyCounterAllocation->getUnderlyingBuffer(), this->inOrderAllocationOffset));
auto hostAddress = static_cast<uint32_t *>(ptrOffset(this->inOrderDependencyCounterAllocation->getUnderlyingBuffer(), this->inOrderAllocationOffset));
for (uint32_t i = 0; i < this->partitionCount; i++) {
if (!NEO::WaitUtils::waitFunctionWithPredicate<const uint64_t>(hostAddress, waitValue, std::greater_equal<uint64_t>())) {
if (!NEO::WaitUtils::waitFunctionWithPredicate<const uint32_t>(hostAddress, waitValue, std::greater_equal<uint32_t>())) {
signaled = false;
break;
}

View File

@@ -43,7 +43,7 @@ struct CommandListImp : CommandList {
protected:
std::unique_ptr<NEO::LogicalStateHelper> nonImmediateLogicalStateHelper;
NEO::GraphicsAllocation *inOrderDependencyCounterAllocation = nullptr;
uint64_t inOrderDependencyCounter = 0;
uint32_t inOrderDependencyCounter = 0;
uint32_t inOrderAllocationOffset = 0;
bool inOrderExecutionEnabled = false;

View File

@@ -391,7 +391,7 @@ void Event::setIsCompleted() {
unsetCmdQueue(true);
}
void Event::enableInOrderExecMode(NEO::GraphicsAllocation &inOrderDependenciesAllocation, uint64_t signalValue, uint32_t allocationOffset) {
void Event::enableInOrderExecMode(NEO::GraphicsAllocation &inOrderDependenciesAllocation, uint32_t signalValue, uint32_t allocationOffset) {
inOrderExecEvent = true;
inOrderExecSignalValue = signalValue;

View File

@@ -211,10 +211,10 @@ struct Event : _ze_event_handle_t {
void setMetricStreamer(MetricStreamer *metricStreamer) {
this->metricStreamer = metricStreamer;
}
void enableInOrderExecMode(NEO::GraphicsAllocation &inOrderDependenciesAllocation, uint64_t signalValue, uint32_t allocationOffset);
void enableInOrderExecMode(NEO::GraphicsAllocation &inOrderDependenciesAllocation, uint32_t signalValue, uint32_t allocationOffset);
bool isInOrderExecEvent() const { return inOrderExecEvent; }
NEO::GraphicsAllocation *getInOrderExecDataAllocation() const { return inOrderExecDataAllocation; }
uint64_t getInOrderExecSignalValue() const { return inOrderExecSignalValue; }
uint32_t getInOrderExecSignalValue() const { return inOrderExecSignalValue; }
uint32_t getInOrderAllocationOffset() const { return inOrderAllocationOffset; }
void setLatestUsedCmdQueue(CommandQueue *newCmdQ);
void setReferenceTs(NEO::TimeStampData &timestamp) {
@@ -233,7 +233,7 @@ struct Event : _ze_event_handle_t {
uint64_t contextEndTS = 1;
NEO::TimeStampData referenceTs{};
uint64_t inOrderExecSignalValue = 0;
uint32_t inOrderExecSignalValue = 0;
uint32_t inOrderAllocationOffset = 0;
std::chrono::microseconds gpuHangCheckPeriod{500'000};

View File

@@ -134,11 +134,11 @@ void EventImp<TagSizeT>::assignKernelEventCompletionData(void *address) {
template <typename TagSizeT>
ze_result_t EventImp<TagSizeT>::queryInOrderEventStatus() {
auto hostAddress = static_cast<uint64_t *>(ptrOffset(this->inOrderExecDataAllocation->getUnderlyingBuffer(), this->inOrderAllocationOffset));
auto hostAddress = static_cast<uint32_t *>(ptrOffset(this->inOrderExecDataAllocation->getUnderlyingBuffer(), this->inOrderAllocationOffset));
bool signaled = true;
for (uint32_t i = 0; i < this->getPacketsInUse(); i++) {
if (!NEO::WaitUtils::waitFunctionWithPredicate<const uint64_t>(hostAddress, this->inOrderExecSignalValue, std::greater_equal<uint64_t>())) {
if (!NEO::WaitUtils::waitFunctionWithPredicate<const uint32_t>(hostAddress, this->inOrderExecSignalValue, std::greater_equal<uint32_t>())) {
signaled = false;
break;
}

View File

@@ -1112,12 +1112,11 @@ HWTEST2_F(InOrderCmdListTests, givenInOrderModeWhenProgrammingWalkerThenSignalSy
ASSERT_NE(nullptr, sdiCmd);
EXPECT_EQ(immCmdList->inOrderDependencyCounterAllocation->getGpuAddress() + counterOffset, sdiCmd->getAddress());
EXPECT_EQ(1u, sdiCmd->getStoreQword());
EXPECT_EQ(0u, sdiCmd->getStoreQword());
EXPECT_EQ(2u, sdiCmd->getDataDword0());
EXPECT_EQ(0u, sdiCmd->getDataDword1());
}
auto hostAddress = static_cast<uint64_t *>(ptrOffset(immCmdList->inOrderDependencyCounterAllocation->getUnderlyingBuffer(), counterOffset));
auto hostAddress = static_cast<uint32_t *>(ptrOffset(immCmdList->inOrderDependencyCounterAllocation->getUnderlyingBuffer(), counterOffset));
*hostAddress = 1;
EXPECT_EQ(ZE_RESULT_NOT_READY, events[0]->hostSynchronize(1));
@@ -1169,7 +1168,7 @@ HWTEST2_F(InOrderCmdListTests, givenInOrderModeWhenProgrammingTimestampEventThen
ASSERT_NE(nullptr, sdiCmd);
EXPECT_EQ(immCmdList->inOrderDependencyCounterAllocation->getGpuAddress(), sdiCmd->getAddress());
EXPECT_EQ(1u, sdiCmd->getStoreQword());
EXPECT_EQ(0u, sdiCmd->getStoreQword());
EXPECT_EQ(1u, sdiCmd->getDataDword0());
}
@@ -1221,7 +1220,7 @@ HWTEST2_F(InOrderCmdListTests, givenInOrderModeWhenProgrammingAppendSignalEventT
ASSERT_NE(nullptr, sdiCmd);
EXPECT_EQ(inOrderSyncVa, sdiCmd->getAddress());
EXPECT_EQ(1u, sdiCmd->getStoreQword());
EXPECT_EQ(0u, sdiCmd->getStoreQword());
EXPECT_EQ(2u, sdiCmd->getDataDword0());
EXPECT_EQ(0u, sdiCmd->getDataDword1());
}
@@ -1295,7 +1294,7 @@ HWTEST2_F(InOrderCmdListTests, givenCopyOnlyInOrderModeWhenProgrammingCopyThenSi
uint64_t syncVa = immCmdList->inOrderDependencyCounterAllocation->getGpuAddress();
EXPECT_EQ(syncVa, sdiCmd->getAddress());
EXPECT_EQ(1u, sdiCmd->getStoreQword());
EXPECT_EQ(0u, sdiCmd->getStoreQword());
EXPECT_EQ(2u, sdiCmd->getDataDword0());
EXPECT_EQ(0u, sdiCmd->getDataDword1());
}
@@ -1332,7 +1331,7 @@ HWTEST2_F(InOrderCmdListTests, givenCopyOnlyInOrderModeWhenProgrammingCopyRegion
uint64_t syncVa = immCmdList->inOrderDependencyCounterAllocation->getGpuAddress();
EXPECT_EQ(syncVa, sdiCmd->getAddress());
EXPECT_EQ(1u, sdiCmd->getStoreQword());
EXPECT_EQ(0u, sdiCmd->getStoreQword());
EXPECT_EQ(2u, sdiCmd->getDataDword0());
EXPECT_EQ(0u, sdiCmd->getDataDword1());
}
@@ -1365,7 +1364,7 @@ HWTEST2_F(InOrderCmdListTests, givenInOrderModeWhenProgrammingAppendWaitOnEvents
auto sdiCmd = genCmdCast<MI_STORE_DATA_IMM *>(*sdiItor);
EXPECT_EQ(immCmdList->inOrderDependencyCounterAllocation->getGpuAddress(), sdiCmd->getAddress());
EXPECT_EQ(1u, sdiCmd->getStoreQword());
EXPECT_EQ(0u, sdiCmd->getStoreQword());
EXPECT_EQ(2u, sdiCmd->getDataDword0());
}
@@ -1448,7 +1447,7 @@ HWTEST2_F(InOrderCmdListTests, givenCopyOnlyInOrderModeWhenProgrammingBarrierThe
auto sdiCmd = genCmdCast<MI_STORE_DATA_IMM *>(*sdiItor);
EXPECT_EQ(immCmdList->inOrderDependencyCounterAllocation->getGpuAddress(), sdiCmd->getAddress());
EXPECT_EQ(1u, sdiCmd->getStoreQword());
EXPECT_EQ(0u, sdiCmd->getStoreQword());
EXPECT_EQ(2u, sdiCmd->getDataDword0());
EXPECT_EQ(0u, sdiCmd->getDataDword1());
}
@@ -1485,7 +1484,7 @@ HWTEST2_F(InOrderCmdListTests, givenInOrderModeWhenProgrammingAppendBarrierWithW
auto sdiCmd = genCmdCast<MI_STORE_DATA_IMM *>(*sdiItor);
EXPECT_EQ(immCmdList->inOrderDependencyCounterAllocation->getGpuAddress(), sdiCmd->getAddress());
EXPECT_EQ(1u, sdiCmd->getStoreQword());
EXPECT_EQ(0u, sdiCmd->getStoreQword());
EXPECT_EQ(2u, sdiCmd->getDataDword0());
EXPECT_EQ(0u, sdiCmd->getDataDword1());
}
@@ -1547,7 +1546,7 @@ HWTEST2_F(InOrderCmdListTests, givenInOrderModeWhenProgrammingAppendBarrierWitho
auto sdiCmd = genCmdCast<MI_STORE_DATA_IMM *>(*sdiItor);
EXPECT_EQ(immCmdList->inOrderDependencyCounterAllocation->getGpuAddress(), sdiCmd->getAddress());
EXPECT_EQ(1u, sdiCmd->getStoreQword());
EXPECT_EQ(0u, sdiCmd->getStoreQword());
EXPECT_EQ(2u, sdiCmd->getDataDword0());
EXPECT_EQ(0u, sdiCmd->getDataDword1());
}
@@ -1562,7 +1561,7 @@ HWTEST2_F(InOrderCmdListTests, givenInOrderModeWhenCallingSyncThenHandleCompleti
immCmdList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams, false);
auto hostAddress = static_cast<uint64_t *>(ptrOffset(immCmdList->inOrderDependencyCounterAllocation->getUnderlyingBuffer(), counterOffset));
auto hostAddress = static_cast<uint32_t *>(ptrOffset(immCmdList->inOrderDependencyCounterAllocation->getUnderlyingBuffer(), counterOffset));
*hostAddress = 0;
const uint32_t failCounter = 3;
@@ -1629,7 +1628,7 @@ HWTEST2_F(InOrderCmdListTests, givenInOrderModeWhenDoingCpuCopyThenSynchronize,
auto eventHandle = events[0]->toHandle();
auto hostAddress = static_cast<uint64_t *>(immCmdList->inOrderDependencyCounterAllocation->getUnderlyingBuffer());
auto hostAddress = static_cast<uint32_t *>(immCmdList->inOrderDependencyCounterAllocation->getUnderlyingBuffer());
*hostAddress = 0;
const uint32_t failCounter = 3;
@@ -1672,7 +1671,7 @@ HWTEST2_F(InOrderCmdListTests, givenInOrderModeWhenGpuHangDetectedInCpuCopyPathT
auto ultCsr = static_cast<UltCommandStreamReceiver<FamilyType> *>(device->getNEODevice()->getDefaultEngine().commandStreamReceiver);
auto hostAddress = static_cast<uint64_t *>(immCmdList->inOrderDependencyCounterAllocation->getUnderlyingBuffer());
auto hostAddress = static_cast<uint32_t *>(immCmdList->inOrderDependencyCounterAllocation->getUnderlyingBuffer());
*hostAddress = 0;
immCmdList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams, false);
@@ -1728,7 +1727,7 @@ HWTEST2_F(InOrderCmdListTests, givenInOrderModeWhenProgrammingKernelSplitWithout
ASSERT_NE(nullptr, sdiCmd);
EXPECT_EQ(immCmdList->inOrderDependencyCounterAllocation->getGpuAddress(), sdiCmd->getAddress());
EXPECT_EQ(1u, sdiCmd->getStoreQword());
EXPECT_EQ(0u, sdiCmd->getStoreQword());
EXPECT_EQ(1u, sdiCmd->getDataDword0());
alignedFree(alignedPtr);
@@ -1778,7 +1777,7 @@ HWTEST2_F(InOrderCmdListTests, givenInOrderModeWhenProgrammingKernelSplitWithEve
ASSERT_NE(nullptr, sdiCmd);
EXPECT_EQ(immCmdList->inOrderDependencyCounterAllocation->getGpuAddress(), sdiCmd->getAddress());
EXPECT_EQ(1u, sdiCmd->getStoreQword());
EXPECT_EQ(0u, sdiCmd->getStoreQword());
EXPECT_EQ(1u, sdiCmd->getDataDword0());
alignedFree(alignedPtr);
@@ -1867,8 +1866,8 @@ HWTEST2_F(MultiTileInOrderCmdListTests, givenMultiTileInOrderModeWhenCallingSync
immCmdList->appendLaunchKernel(kernel->toHandle(), &groupCount, events[0]->toHandle(), 0, nullptr, launchParams, false);
auto hostAddress0 = static_cast<uint64_t *>(immCmdList->inOrderDependencyCounterAllocation->getUnderlyingBuffer());
auto hostAddress1 = hostAddress0++;
auto hostAddress0 = static_cast<uint32_t *>(immCmdList->inOrderDependencyCounterAllocation->getUnderlyingBuffer());
auto hostAddress1 = ptrOffset(hostAddress0, sizeof(uint64_t));
*hostAddress0 = 0;
*hostAddress1 = 0;
@@ -2080,7 +2079,7 @@ HWTEST2_F(BcsSplitInOrderCmdListTests, givenBcsSplitEnabledWhenDispatchingCopyTh
auto gpuAddress = immCmdList->inOrderDependencyCounterAllocation->getGpuAddress();
EXPECT_EQ(gpuAddress, sdiCmd->getAddress());
EXPECT_EQ(1u, sdiCmd->getStoreQword());
EXPECT_EQ(0u, sdiCmd->getStoreQword());
EXPECT_EQ(1u, sdiCmd->getDataDword0());
EXPECT_EQ(0u, sdiCmd->getDataDword1());
}
@@ -2120,7 +2119,7 @@ HWTEST2_F(BcsSplitInOrderCmdListTests, givenBcsSplitEnabledWhenDispatchingCopyRe
auto gpuAddress = immCmdList->inOrderDependencyCounterAllocation->getGpuAddress();
EXPECT_EQ(gpuAddress, sdiCmd->getAddress());
EXPECT_EQ(1u, sdiCmd->getStoreQword());
EXPECT_EQ(0u, sdiCmd->getStoreQword());
EXPECT_EQ(1u, sdiCmd->getDataDword0());
EXPECT_EQ(0u, sdiCmd->getDataDword1());
}