mirror of
https://github.com/intel/compute-runtime.git
synced 2025-12-26 23:33:20 +08:00
performance: Signal inOrder counter with pipe control, part 5
When on DC flush platform resolve in order implicit dependency with pipe control. Related-To: NEO-13441 Signed-off-by: Lukasz Jobczyk <lukasz.jobczyk@intel.com>
This commit is contained in:
committed by
Compute-Runtime-Automation
parent
112abeeeef
commit
3735ccaed7
@@ -2623,75 +2623,88 @@ void CommandListCoreFamily<gfxCoreFamily>::appendWaitOnInOrderDependency(std::sh
|
||||
NEO::EncodeBatchBufferStartOrEnd<GfxFamily>::programConditionalDataMemBatchBufferStart(*commandContainer.getCommandStream(), 0, gpuAddress, waitValue, NEO::CompareOperation::less, true, isQwordInOrderCounter(), isCopyOnly(copyOffloadOperation));
|
||||
|
||||
} else {
|
||||
using MI_SEMAPHORE_WAIT = typename GfxFamily::MI_SEMAPHORE_WAIT;
|
||||
using MI_LOAD_REGISTER_IMM = typename GfxFamily::MI_LOAD_REGISTER_IMM;
|
||||
auto resolveDependenciesViaPipeControls = !this->isCopyOnly(copyOffloadOperation) && !this->asMutable() && implicitDependency && this->dcFlushSupport;
|
||||
|
||||
bool indirectMode = false;
|
||||
if (NEO::debugManager.flags.ResolveDependenciesViaPipeControls.get() != -1) {
|
||||
resolveDependenciesViaPipeControls = NEO::debugManager.flags.ResolveDependenciesViaPipeControls.get();
|
||||
}
|
||||
|
||||
size_t inOrderPatchListIndex = std::numeric_limits<size_t>::max();
|
||||
if (isQwordInOrderCounter()) {
|
||||
indirectMode = true;
|
||||
if (resolveDependenciesViaPipeControls) {
|
||||
NEO::PipeControlArgs args;
|
||||
args.csStallOnly = true;
|
||||
NEO::MemorySynchronizationCommands<GfxFamily>::addSingleBarrier(*commandContainer.getCommandStream(), args);
|
||||
break;
|
||||
} else {
|
||||
using MI_SEMAPHORE_WAIT = typename GfxFamily::MI_SEMAPHORE_WAIT;
|
||||
using MI_LOAD_REGISTER_IMM = typename GfxFamily::MI_LOAD_REGISTER_IMM;
|
||||
|
||||
constexpr uint32_t firstRegister = RegisterOffsets::csGprR0;
|
||||
constexpr uint32_t secondRegister = RegisterOffsets::csGprR0 + 4;
|
||||
bool indirectMode = false;
|
||||
|
||||
auto lri1 = commandContainer.getCommandStream()->template getSpaceForCmd<MI_LOAD_REGISTER_IMM>();
|
||||
auto lri2 = commandContainer.getCommandStream()->template getSpaceForCmd<MI_LOAD_REGISTER_IMM>();
|
||||
size_t inOrderPatchListIndex = std::numeric_limits<size_t>::max();
|
||||
if (isQwordInOrderCounter()) {
|
||||
indirectMode = true;
|
||||
|
||||
if (!noopDispatch) {
|
||||
NEO::LriHelper<GfxFamily>::program(lri1, firstRegister, getLowPart(waitValue), true, isCopyOnly(copyOffloadOperation));
|
||||
NEO::LriHelper<GfxFamily>::program(lri2, secondRegister, getHighPart(waitValue), true, isCopyOnly(copyOffloadOperation));
|
||||
} else {
|
||||
memset(lri1, 0, sizeof(MI_LOAD_REGISTER_IMM));
|
||||
memset(lri2, 0, sizeof(MI_LOAD_REGISTER_IMM));
|
||||
constexpr uint32_t firstRegister = RegisterOffsets::csGprR0;
|
||||
constexpr uint32_t secondRegister = RegisterOffsets::csGprR0 + 4;
|
||||
|
||||
auto lri1 = commandContainer.getCommandStream()->template getSpaceForCmd<MI_LOAD_REGISTER_IMM>();
|
||||
auto lri2 = commandContainer.getCommandStream()->template getSpaceForCmd<MI_LOAD_REGISTER_IMM>();
|
||||
|
||||
if (!noopDispatch) {
|
||||
NEO::LriHelper<GfxFamily>::program(lri1, firstRegister, getLowPart(waitValue), true, isCopyOnly(copyOffloadOperation));
|
||||
NEO::LriHelper<GfxFamily>::program(lri2, secondRegister, getHighPart(waitValue), true, isCopyOnly(copyOffloadOperation));
|
||||
} else {
|
||||
memset(lri1, 0, sizeof(MI_LOAD_REGISTER_IMM));
|
||||
memset(lri2, 0, sizeof(MI_LOAD_REGISTER_IMM));
|
||||
}
|
||||
|
||||
if (inOrderExecInfo->isRegularCmdList()) {
|
||||
inOrderPatchListIndex = addCmdForPatching((implicitDependency ? nullptr : &inOrderExecInfo), lri1, lri2, waitValue, NEO::InOrderPatchCommandHelpers::PatchCmdType::lri64b);
|
||||
if (noopDispatch) {
|
||||
disablePatching(inOrderPatchListIndex);
|
||||
}
|
||||
}
|
||||
if (outListCommands != nullptr) {
|
||||
auto &lri1ToPatch = outListCommands->emplace_back();
|
||||
lri1ToPatch.type = CommandToPatch::CbWaitEventLoadRegisterImm;
|
||||
lri1ToPatch.pDestination = lri1;
|
||||
lri1ToPatch.inOrderPatchListIndex = inOrderPatchListIndex;
|
||||
lri1ToPatch.offset = firstRegister;
|
||||
|
||||
auto &lri2ToPatch = outListCommands->emplace_back();
|
||||
lri2ToPatch.type = CommandToPatch::CbWaitEventLoadRegisterImm;
|
||||
lri2ToPatch.pDestination = lri2;
|
||||
lri2ToPatch.inOrderPatchListIndex = inOrderPatchListIndex;
|
||||
lri2ToPatch.offset = secondRegister;
|
||||
}
|
||||
}
|
||||
|
||||
if (inOrderExecInfo->isRegularCmdList()) {
|
||||
inOrderPatchListIndex = addCmdForPatching((implicitDependency ? nullptr : &inOrderExecInfo), lri1, lri2, waitValue, NEO::InOrderPatchCommandHelpers::PatchCmdType::lri64b);
|
||||
auto semaphoreCommand = reinterpret_cast<MI_SEMAPHORE_WAIT *>(commandContainer.getCommandStream()->getSpace(sizeof(MI_SEMAPHORE_WAIT)));
|
||||
|
||||
if (!noopDispatch) {
|
||||
NEO::EncodeSemaphore<GfxFamily>::programMiSemaphoreWait(semaphoreCommand, gpuAddress, waitValue, COMPARE_OPERATION::COMPARE_OPERATION_SAD_GREATER_THAN_OR_EQUAL_SDD,
|
||||
false, true, isQwordInOrderCounter(), indirectMode, false);
|
||||
} else {
|
||||
memset(semaphoreCommand, 0, sizeof(MI_SEMAPHORE_WAIT));
|
||||
}
|
||||
|
||||
if (inOrderExecInfo->isRegularCmdList() && !isQwordInOrderCounter()) {
|
||||
inOrderPatchListIndex = addCmdForPatching((implicitDependency ? nullptr : &inOrderExecInfo), semaphoreCommand, nullptr, waitValue, NEO::InOrderPatchCommandHelpers::PatchCmdType::semaphore);
|
||||
if (noopDispatch) {
|
||||
disablePatching(inOrderPatchListIndex);
|
||||
}
|
||||
} else {
|
||||
inOrderPatchListIndex = std::numeric_limits<size_t>::max();
|
||||
}
|
||||
|
||||
if (outListCommands != nullptr) {
|
||||
auto &lri1ToPatch = outListCommands->emplace_back();
|
||||
lri1ToPatch.type = CommandToPatch::CbWaitEventLoadRegisterImm;
|
||||
lri1ToPatch.pDestination = lri1;
|
||||
lri1ToPatch.inOrderPatchListIndex = inOrderPatchListIndex;
|
||||
lri1ToPatch.offset = firstRegister;
|
||||
|
||||
auto &lri2ToPatch = outListCommands->emplace_back();
|
||||
lri2ToPatch.type = CommandToPatch::CbWaitEventLoadRegisterImm;
|
||||
lri2ToPatch.pDestination = lri2;
|
||||
lri2ToPatch.inOrderPatchListIndex = inOrderPatchListIndex;
|
||||
lri2ToPatch.offset = secondRegister;
|
||||
auto &semaphoreWaitPatch = outListCommands->emplace_back();
|
||||
semaphoreWaitPatch.type = CommandToPatch::CbWaitEventSemaphoreWait;
|
||||
semaphoreWaitPatch.pDestination = semaphoreCommand;
|
||||
semaphoreWaitPatch.offset = i * immWriteOffset;
|
||||
semaphoreWaitPatch.inOrderPatchListIndex = inOrderPatchListIndex;
|
||||
}
|
||||
}
|
||||
|
||||
auto semaphoreCommand = reinterpret_cast<MI_SEMAPHORE_WAIT *>(commandContainer.getCommandStream()->getSpace(sizeof(MI_SEMAPHORE_WAIT)));
|
||||
|
||||
if (!noopDispatch) {
|
||||
NEO::EncodeSemaphore<GfxFamily>::programMiSemaphoreWait(semaphoreCommand, gpuAddress, waitValue, COMPARE_OPERATION::COMPARE_OPERATION_SAD_GREATER_THAN_OR_EQUAL_SDD,
|
||||
false, true, isQwordInOrderCounter(), indirectMode, false);
|
||||
} else {
|
||||
memset(semaphoreCommand, 0, sizeof(MI_SEMAPHORE_WAIT));
|
||||
}
|
||||
|
||||
if (inOrderExecInfo->isRegularCmdList() && !isQwordInOrderCounter()) {
|
||||
inOrderPatchListIndex = addCmdForPatching((implicitDependency ? nullptr : &inOrderExecInfo), semaphoreCommand, nullptr, waitValue, NEO::InOrderPatchCommandHelpers::PatchCmdType::semaphore);
|
||||
if (noopDispatch) {
|
||||
disablePatching(inOrderPatchListIndex);
|
||||
}
|
||||
} else {
|
||||
inOrderPatchListIndex = std::numeric_limits<size_t>::max();
|
||||
}
|
||||
|
||||
if (outListCommands != nullptr) {
|
||||
auto &semaphoreWaitPatch = outListCommands->emplace_back();
|
||||
semaphoreWaitPatch.type = CommandToPatch::CbWaitEventSemaphoreWait;
|
||||
semaphoreWaitPatch.pDestination = semaphoreCommand;
|
||||
semaphoreWaitPatch.offset = i * immWriteOffset;
|
||||
semaphoreWaitPatch.inOrderPatchListIndex = inOrderPatchListIndex;
|
||||
}
|
||||
}
|
||||
|
||||
gpuAddress += immWriteOffset;
|
||||
|
||||
@@ -70,6 +70,7 @@ struct InOrderCmdListFixture : public ::Test<ModuleFixture> {
|
||||
|
||||
void SetUp() override {
|
||||
NEO::debugManager.flags.ForcePreemptionMode.set(static_cast<int32_t>(NEO::PreemptionMode::Disabled));
|
||||
NEO::debugManager.flags.ResolveDependenciesViaPipeControls.set(0u);
|
||||
|
||||
::Test<ModuleFixture>::SetUp();
|
||||
createKernel();
|
||||
|
||||
@@ -977,6 +977,33 @@ HWTEST2_F(InOrderCmdListTests, givenInOrderModeWhenSubmittingThenProgramSemaphor
|
||||
ASSERT_TRUE(verifyInOrderDependency<FamilyType>(itor, 1, immCmdList->inOrderExecInfo->getBaseDeviceAddress() + counterOffset, immCmdList->isQwordInOrderCounter(), false));
|
||||
}
|
||||
|
||||
HWTEST2_F(InOrderCmdListTests, givenResolveDependenciesViaPipeControlsForInOrderModeWhenSubmittingThenProgramPipeControlInBetweenDispatches, IsAtLeastXeHpCore) {
|
||||
DebugManagerStateRestore restorer;
|
||||
NEO::debugManager.flags.ResolveDependenciesViaPipeControls.set(1);
|
||||
|
||||
uint32_t counterOffset = 64;
|
||||
|
||||
auto immCmdList = createImmCmdList<gfxCoreFamily>();
|
||||
immCmdList->inOrderExecInfo->setAllocationOffset(counterOffset);
|
||||
|
||||
auto cmdStream = immCmdList->getCmdContainer().getCommandStream();
|
||||
|
||||
immCmdList->appendLaunchKernel(kernel->toHandle(), groupCount, nullptr, 0, nullptr, launchParams, false);
|
||||
|
||||
auto offset = cmdStream->getUsed();
|
||||
|
||||
immCmdList->appendLaunchKernel(kernel->toHandle(), groupCount, nullptr, 0, nullptr, launchParams, false);
|
||||
|
||||
GenCmdList cmdList;
|
||||
ASSERT_TRUE(FamilyType::Parse::parseCommandBuffer(
|
||||
cmdList,
|
||||
ptrOffset(cmdStream->getCpuBase(), offset),
|
||||
cmdStream->getUsed() - offset));
|
||||
|
||||
auto itor = find<typename FamilyType::PIPE_CONTROL *>(cmdList.begin(), cmdList.end());
|
||||
ASSERT_NE(cmdList.end(), itor);
|
||||
}
|
||||
|
||||
HWTEST2_F(InOrderCmdListTests, givenDependencyFromDifferentRootDeviceWhenAppendCalledThenCreatePeerAllocation, MatchAny) {
|
||||
NEO::UltDeviceFactory deviceFactory{2, 0};
|
||||
|
||||
|
||||
Reference in New Issue
Block a user