mirror of
https://github.com/intel/compute-runtime.git
synced 2025-12-20 08:53:55 +08:00
feature: support to use mi_atomic for signalling in-order counter
Related-To: NEO-7966 Signed-off-by: Dunajski, Bartosz <bartosz.dunajski@intel.com>
This commit is contained in:
committed by
Compute-Runtime-Automation
parent
35ead06765
commit
2c921ec940
@@ -340,6 +340,9 @@ struct CommandListCoreFamily : public CommandListImp {
|
||||
|
||||
void addCmdForPatching(std::shared_ptr<InOrderExecInfo> *externalInOrderExecInfo, void *cmd1, void *cmd2, uint64_t counterValue, InOrderPatchCommandHelpers::PatchCmdType patchCmdType);
|
||||
|
||||
bool inOrderAtomicSignallingEnabled() const;
|
||||
uint64_t getInOrderIncrementValue() const;
|
||||
|
||||
InOrderPatchCommandsContainer<GfxFamily> inOrderPatchCmds;
|
||||
|
||||
uint64_t latestHostWaitedInOrderSyncValue = 0;
|
||||
|
||||
@@ -174,10 +174,10 @@ void CommandListCoreFamily<gfxCoreFamily>::handleInOrderDependencyCounter(Event
|
||||
|
||||
UNRECOVERABLE_IF(inOrderAllocationOffset + offset >= inOrderExecInfo->getDeviceCounterAllocation().getUnderlyingBufferSize());
|
||||
|
||||
CommandListCoreFamily<gfxCoreFamily>::appendSignalInOrderDependencyCounter(nullptr); // write 1 on new offset
|
||||
CommandListCoreFamily<gfxCoreFamily>::appendSignalInOrderDependencyCounter(nullptr); // signal counter on new offset
|
||||
}
|
||||
|
||||
inOrderExecInfo->addCounterValue(1);
|
||||
inOrderExecInfo->addCounterValue(getInOrderIncrementValue());
|
||||
|
||||
this->commandContainer.addToResidencyContainer(&inOrderExecInfo->getDeviceCounterAllocation());
|
||||
|
||||
@@ -2554,21 +2554,30 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendWaitOnEvents(uint32_t nu
|
||||
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
void CommandListCoreFamily<gfxCoreFamily>::appendSignalInOrderDependencyCounter(Event *signalEvent) {
|
||||
using MI_STORE_DATA_IMM = typename GfxFamily::MI_STORE_DATA_IMM;
|
||||
|
||||
uint64_t signalValue = inOrderExecInfo->getCounterValue() + 1;
|
||||
|
||||
uint64_t gpuVa = inOrderExecInfo->getDeviceCounterAllocation().getGpuAddress() + this->inOrderAllocationOffset;
|
||||
auto cmdStream = commandContainer.getCommandStream();
|
||||
|
||||
auto miStoreCmd = reinterpret_cast<MI_STORE_DATA_IMM *>(commandContainer.getCommandStream()->getSpace(sizeof(MI_STORE_DATA_IMM)));
|
||||
if (inOrderAtomicSignallingEnabled()) {
|
||||
using ATOMIC_OPCODES = typename GfxFamily::MI_ATOMIC::ATOMIC_OPCODES;
|
||||
using DATA_SIZE = typename GfxFamily::MI_ATOMIC::DATA_SIZE;
|
||||
|
||||
NEO::EncodeStoreMemory<GfxFamily>::programStoreDataImm(miStoreCmd, gpuVa, getLowPart(signalValue), getHighPart(signalValue),
|
||||
isQwordInOrderCounter(), (this->partitionCount > 1));
|
||||
NEO::EncodeAtomic<GfxFamily>::programMiAtomic(*cmdStream, gpuVa, ATOMIC_OPCODES::ATOMIC_8B_INCREMENT,
|
||||
DATA_SIZE::DATA_SIZE_QWORD, 0, 0, 0, 0);
|
||||
} else {
|
||||
using MI_STORE_DATA_IMM = typename GfxFamily::MI_STORE_DATA_IMM;
|
||||
|
||||
addCmdForPatching(nullptr, miStoreCmd, nullptr, signalValue, InOrderPatchCommandHelpers::PatchCmdType::Sdi);
|
||||
uint64_t signalValue = inOrderExecInfo->getCounterValue() + 1;
|
||||
|
||||
auto miStoreCmd = reinterpret_cast<MI_STORE_DATA_IMM *>(cmdStream->getSpace(sizeof(MI_STORE_DATA_IMM)));
|
||||
|
||||
NEO::EncodeStoreMemory<GfxFamily>::programStoreDataImm(miStoreCmd, gpuVa, getLowPart(signalValue), getHighPart(signalValue),
|
||||
isQwordInOrderCounter(), (this->partitionCount > 1));
|
||||
|
||||
addCmdForPatching(nullptr, miStoreCmd, nullptr, signalValue, InOrderPatchCommandHelpers::PatchCmdType::Sdi);
|
||||
}
|
||||
|
||||
if ((NEO::debugManager.flags.ProgramUserInterruptOnResolvedDependency.get() == 1) && signalEvent && signalEvent->isKmdWaitModeEnabled()) {
|
||||
NEO::EnodeUserInterrupt<GfxFamily>::encode(*commandContainer.getCommandStream());
|
||||
NEO::EnodeUserInterrupt<GfxFamily>::encode(*cmdStream);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -3661,4 +3670,18 @@ bool CommandListCoreFamily<gfxCoreFamily>::handleCounterBasedEventOperations(Eve
|
||||
return true;
|
||||
}
|
||||
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
bool CommandListCoreFamily<gfxCoreFamily>::inOrderAtomicSignallingEnabled() const {
|
||||
if (NEO::debugManager.flags.InOrderAtomicSignallingEnabled.get() == 1) {
|
||||
return (this->getPartitionCount() > 1);
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
uint64_t CommandListCoreFamily<gfxCoreFamily>::getInOrderIncrementValue() const {
|
||||
return (inOrderAtomicSignallingEnabled() ? this->getPartitionCount() : 1);
|
||||
}
|
||||
|
||||
} // namespace L0
|
||||
|
||||
@@ -75,11 +75,13 @@ struct WhiteBox<::L0::CommandListCoreFamily<gfxCoreFamily>>
|
||||
using BaseClass::getAllocationFromHostPtrMap;
|
||||
using BaseClass::getDcFlushRequired;
|
||||
using BaseClass::getHostPtrAlloc;
|
||||
using BaseClass::getInOrderIncrementValue;
|
||||
using BaseClass::hostPtrMap;
|
||||
using BaseClass::immediateCmdListHeapSharing;
|
||||
using BaseClass::indirectAllocationsAllowed;
|
||||
using BaseClass::initialize;
|
||||
using BaseClass::inOrderAllocationOffset;
|
||||
using BaseClass::inOrderAtomicSignallingEnabled;
|
||||
using BaseClass::inOrderExecInfo;
|
||||
using BaseClass::inOrderPatchCmds;
|
||||
using BaseClass::isFlushTaskSubmissionEnabled;
|
||||
@@ -181,8 +183,10 @@ struct WhiteBox<L0::CommandListCoreFamilyImmediate<gfxCoreFamily>>
|
||||
using BaseClass::frontEndStateTracking;
|
||||
using BaseClass::getDcFlushRequired;
|
||||
using BaseClass::getHostPtrAlloc;
|
||||
using BaseClass::getInOrderIncrementValue;
|
||||
using BaseClass::hostSynchronize;
|
||||
using BaseClass::immediateCmdListHeapSharing;
|
||||
using BaseClass::inOrderAtomicSignallingEnabled;
|
||||
using BaseClass::inOrderExecInfo;
|
||||
using BaseClass::inOrderPatchCmds;
|
||||
using BaseClass::isBcsSplitNeeded;
|
||||
|
||||
@@ -1204,6 +1204,18 @@ HWTEST2_F(InOrderCmdListTests, givenRegularEventWithInOrderExecInfoWhenReusedOnR
|
||||
EXPECT_EQ(nullptr, events[0]->inOrderExecInfo.get());
|
||||
}
|
||||
|
||||
HWTEST2_F(InOrderCmdListTests, givenDebugFlagSetAndSingleTileCmdListWhenAskingForAtomicSignallingThenReturnFalse, IsAtLeastSkl) {
|
||||
auto immCmdList = createImmCmdList<gfxCoreFamily>();
|
||||
|
||||
EXPECT_FALSE(immCmdList->inOrderAtomicSignallingEnabled());
|
||||
EXPECT_EQ(1u, immCmdList->getInOrderIncrementValue());
|
||||
|
||||
debugManager.flags.InOrderAtomicSignallingEnabled.set(1);
|
||||
|
||||
EXPECT_FALSE(immCmdList->inOrderAtomicSignallingEnabled());
|
||||
EXPECT_EQ(1u, immCmdList->getInOrderIncrementValue());
|
||||
}
|
||||
|
||||
HWTEST2_F(InOrderCmdListTests, givenInOrderModeWhenSubmittingThenProgramSemaphoreForPreviousDispatch, IsAtLeastXeHpCore) {
|
||||
using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT;
|
||||
|
||||
@@ -4075,8 +4087,8 @@ HWTEST2_F(InOrderCmdListTests, givenInOrderModeWhenProgrammingKernelSplitWithEve
|
||||
|
||||
struct MultiTileInOrderCmdListTests : public InOrderCmdListTests {
|
||||
void SetUp() override {
|
||||
NEO::debugManager.flags.CreateMultipleSubDevices.set(2);
|
||||
NEO::debugManager.flags.EnableImplicitScaling.set(1);
|
||||
NEO::debugManager.flags.CreateMultipleSubDevices.set(partitionCount);
|
||||
NEO::debugManager.flags.EnableImplicitScaling.set(4);
|
||||
|
||||
InOrderCmdListTests::SetUp();
|
||||
}
|
||||
@@ -4085,12 +4097,71 @@ struct MultiTileInOrderCmdListTests : public InOrderCmdListTests {
|
||||
DestroyableZeUniquePtr<WhiteBox<L0::CommandListCoreFamilyImmediate<gfxCoreFamily>>> createMultiTileImmCmdList() {
|
||||
auto cmdList = createImmCmdList<gfxCoreFamily>();
|
||||
|
||||
cmdList->partitionCount = 2;
|
||||
cmdList->partitionCount = partitionCount;
|
||||
|
||||
return cmdList;
|
||||
}
|
||||
|
||||
const uint32_t partitionCount = 2;
|
||||
};
|
||||
|
||||
HWTEST2_F(MultiTileInOrderCmdListTests, givenDebugFlagSetWhenAskingForAtomicSignallingThenReturnTrue, IsAtLeastXeHpCore) {
|
||||
auto immCmdList = createMultiTileImmCmdList<gfxCoreFamily>();
|
||||
|
||||
EXPECT_FALSE(immCmdList->inOrderAtomicSignallingEnabled());
|
||||
EXPECT_EQ(1u, immCmdList->getInOrderIncrementValue());
|
||||
|
||||
debugManager.flags.InOrderAtomicSignallingEnabled.set(1);
|
||||
|
||||
EXPECT_TRUE(immCmdList->inOrderAtomicSignallingEnabled());
|
||||
EXPECT_EQ(partitionCount, immCmdList->getInOrderIncrementValue());
|
||||
}
|
||||
|
||||
HWTEST2_F(MultiTileInOrderCmdListTests, givenAtomicSignallingEnabledWhenSignallingCounterThenUseMiAtomicCmd, IsAtLeastXeHpCore) {
|
||||
using MI_ATOMIC = typename FamilyType::MI_ATOMIC;
|
||||
using ATOMIC_OPCODES = typename FamilyType::MI_ATOMIC::ATOMIC_OPCODES;
|
||||
using DATA_SIZE = typename FamilyType::MI_ATOMIC::DATA_SIZE;
|
||||
|
||||
debugManager.flags.InOrderAtomicSignallingEnabled.set(1);
|
||||
|
||||
auto immCmdList = createMultiTileImmCmdList<gfxCoreFamily>();
|
||||
|
||||
auto eventPool = createEvents<FamilyType>(1, false);
|
||||
|
||||
auto cmdStream = immCmdList->getCmdContainer().getCommandStream();
|
||||
|
||||
EXPECT_EQ(0u, immCmdList->inOrderExecInfo->getCounterValue());
|
||||
|
||||
auto handle = events[0]->toHandle();
|
||||
|
||||
immCmdList->appendLaunchKernel(kernel->toHandle(), groupCount, handle, 0, nullptr, launchParams, false);
|
||||
|
||||
EXPECT_EQ(partitionCount, immCmdList->inOrderExecInfo->getCounterValue());
|
||||
|
||||
size_t offset = cmdStream->getUsed();
|
||||
|
||||
immCmdList->appendWaitOnEvents(1, &handle, false, false, true);
|
||||
|
||||
EXPECT_EQ(partitionCount * 2, immCmdList->inOrderExecInfo->getCounterValue());
|
||||
|
||||
GenCmdList cmdList;
|
||||
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(cmdList, ptrOffset(cmdStream->getCpuBase(), offset), (cmdStream->getUsed() - offset)));
|
||||
|
||||
auto miAtomics = findAll<MI_ATOMIC *>(cmdList.begin(), cmdList.end());
|
||||
EXPECT_EQ(1u, miAtomics.size());
|
||||
|
||||
auto atomicCmd = genCmdCast<MI_ATOMIC *>(*miAtomics[0]);
|
||||
ASSERT_NE(nullptr, atomicCmd);
|
||||
|
||||
auto gpuAddress = immCmdList->inOrderExecInfo->getDeviceCounterAllocation().getGpuAddress();
|
||||
|
||||
EXPECT_EQ(gpuAddress, NEO::UnitTestHelper<FamilyType>::getAtomicMemoryAddress(*atomicCmd));
|
||||
EXPECT_EQ(ATOMIC_OPCODES::ATOMIC_8B_INCREMENT, atomicCmd->getAtomicOpcode());
|
||||
EXPECT_EQ(DATA_SIZE::DATA_SIZE_QWORD, atomicCmd->getDataSize());
|
||||
EXPECT_EQ(0u, atomicCmd->getReturnDataControl());
|
||||
EXPECT_EQ(0u, atomicCmd->getCsStall());
|
||||
}
|
||||
|
||||
HWTEST2_F(MultiTileInOrderCmdListTests, givenMultiTileInOrderModeWhenProgrammingWaitOnEventsThenHandleAllEventPackets, IsAtLeastXeHpCore) {
|
||||
using MI_STORE_DATA_IMM = typename FamilyType::MI_STORE_DATA_IMM;
|
||||
using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT;
|
||||
|
||||
@@ -262,6 +262,7 @@ DECLARE_DEBUG_VARIABLE(int32_t, DisableSystemPointerKernelArgument, -1, "-1: def
|
||||
DECLARE_DEBUG_VARIABLE(int32_t, ProgramUserInterruptOnResolvedDependency, -1, "-1: default, 0: Disabled, 1: On signaling append completion (if possible) - for example in-order counter update")
|
||||
DECLARE_DEBUG_VARIABLE(int32_t, EnableInOrderRegularCmdListPatching, -1, "-1: default, 0: Disabled, 1: If set, patch counter value on execute call")
|
||||
DECLARE_DEBUG_VARIABLE(int32_t, EnableInOrderRelaxedOrderingForEventsChaining, -1, "-1: default, 0: Disabled, 1: If set, send 2 immediate flushes to avoid stalling RelaxedOrdering Scheduler.")
|
||||
DECLARE_DEBUG_VARIABLE(int32_t, InOrderAtomicSignallingEnabled, -1, "-1: default, 0: disabled, 1: Use atomic GPU operations in increment the counter. Otherwise use non-atomic commands like SDI.")
|
||||
|
||||
/*LOGGING FLAGS*/
|
||||
DECLARE_DEBUG_VARIABLE(int32_t, PrintDriverDiagnostics, -1, "prints driver diagnostics messages to standard output, value corresponds to hint level")
|
||||
|
||||
@@ -565,4 +565,5 @@ EnableImplicitConvertionToCounterBasedEvents = -1
|
||||
SetAmountOfInternalHeapsToPreallocate = -1
|
||||
DoNotUseProductConfigForValidationWa = 0
|
||||
EnableDeviceStateVerificationAfterFailedSubmission = -1
|
||||
InOrderAtomicSignallingEnabled = -1
|
||||
# Please don't edit below this line
|
||||
|
||||
Reference in New Issue
Block a user