fix: Add infrastructure to force dc flush when mitigate dc

-force dc on next tag update after RT kernel
-force dc when release shared object

Related-To: NEO-10556

Signed-off-by: Lukasz Jobczyk <lukasz.jobczyk@intel.com>
This commit is contained in:
Lukasz Jobczyk
2024-06-21 12:00:59 +00:00
committed by Compute-Runtime-Automation
parent 773da10099
commit fed90f5c8e
13 changed files with 174 additions and 3 deletions

View File

@@ -89,6 +89,13 @@ void CommandList::removeMemoryPrefetchAllocations() {
}
}
void CommandList::registerCsrDcFlushForDcMitigation(NEO::CommandStreamReceiver &csr) {
if (this->requiresDcFlushForDcMitigation) {
csr.registerDcFlushForDcMitigation();
this->requiresDcFlushForDcMitigation = false;
}
}
NEO::GraphicsAllocation *CommandList::getAllocationFromHostPtrMap(const void *buffer, uint64_t bufferSize, bool copyOffload) {
auto allocation = hostPtrMap.lower_bound(buffer);
if (allocation != hostPtrMap.end()) {

View File

@@ -369,6 +369,8 @@ struct CommandList : _ze_command_list_handle_t {
return taskCountUpdateFenceRequired;
}
void registerCsrDcFlushForDcMitigation(NEO::CommandStreamReceiver &csr);
protected:
NEO::GraphicsAllocation *getAllocationFromHostPtrMap(const void *buffer, uint64_t bufferSize, bool copyOffload);
NEO::GraphicsAllocation *getHostPtrAlloc(const void *buffer, uint64_t bufferSize, bool hostCopyAllowed, bool copyOffload);
@@ -455,6 +457,7 @@ struct CommandList : _ze_command_list_handle_t {
bool heaplessStateInitEnabled = false;
bool scratchAddressPatchingEnabled = false;
bool taskCountUpdateFenceRequired = false;
bool requiresDcFlushForDcMitigation = false;
};
using CommandListAllocatorFn = CommandList *(*)(uint32_t);

View File

@@ -434,6 +434,7 @@ inline ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::executeCommand
if (cmdQ->peekIsCopyOnlyCommandQueue()) {
completionStamp = flushBcsTask(*commandStream, commandStreamStart, hasStallingCmds, hasRelaxedOrderingDependencies, csr);
} else {
this->registerCsrDcFlushForDcMitigation(*csr);
completionStamp = (this->*computeFlushMethod)(*commandStream, commandStreamStart, hasStallingCmds, hasRelaxedOrderingDependencies, kernelOperation);
}

View File

@@ -454,6 +454,10 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelWithParams(K
}
if (kernelImp->usesRayTracing()) {
if (this->device->getProductHelper().isDcFlushMitigated()) {
this->requiresDcFlushForDcMitigation = true;
}
NEO::PipeControlArgs args{};
args.stateCacheInvalidationEnable = true;
NEO::MemorySynchronizationCommands<GfxFamily>::addSingleBarrier(*commandContainer.getCommandStream(), args);

View File

@@ -764,6 +764,8 @@ void CommandQueueHw<gfxCoreFamily>::setupCmdListsAndContextParams(
this->partitionCount = std::max(this->partitionCount, commandList->getPartitionCount());
ctx.cmdListScratchAddressPatchingEnabled |= commandList->getCmdListScratchAddressPatchingEnabled();
commandList->registerCsrDcFlushForDcMitigation(*this->getCsr());
}
makeResidentAndMigrate(ctx.isMigrationRequested, commandContainer.getResidencyContainer());
@@ -1307,6 +1309,7 @@ void CommandQueueHw<gfxCoreFamily>::dispatchTaskCountPostSyncRegular(
NEO::PipeControlArgs args;
args.dcFlushEnable = this->csr->getDcFlushSupport();
args.dcFlushEnable |= this->csr->checkDcFlushRequiredForDcMitigationAndReset();
args.workloadPartitionOffset = this->partitionCount > 1;
args.notifyEnable = this->csr->isUsedNotifyEnableForPostSync();
NEO::MemorySynchronizationCommands<GfxFamily>::addBarrierWithPostSyncOperation(

View File

@@ -105,6 +105,7 @@ struct WhiteBox<::L0::CommandListCoreFamily<gfxCoreFamily>>
using BaseClass::pipeControlMultiKernelEventSync;
using BaseClass::pipelineSelectStateTracking;
using BaseClass::requiredStreamState;
using BaseClass::requiresDcFlushForDcMitigation;
using BaseClass::requiresQueueUncachedMocs;
using BaseClass::scratchAddressPatchingEnabled;
using BaseClass::setupTimestampEventForMultiTile;

View File

@@ -1576,6 +1576,31 @@ HWTEST2_F(CommandListAppendLaunchRayTracingKernelTest, givenKernelUsingRayTracin
neoDevice->rtMemoryBackedBuffer = nullptr;
}
HWTEST2_F(CommandListAppendLaunchRayTracingKernelTest, givenDcFlushMitigationWhenAppendLaunchKernelWithRayTracingIsCalledThenRequireDcFlush, RayTracingMatcher) {
DebugManagerStateRestore restorer;
debugManager.flags.AllowDcFlush.set(0);
Mock<::L0::KernelImp> kernel;
auto pMockModule = std::unique_ptr<Module>(new Mock<Module>(device, nullptr));
kernel.module = pMockModule.get();
kernel.setGroupSize(4, 1, 1);
ze_group_count_t groupCount{8, 1, 1};
auto pCommandList = std::make_unique<WhiteBox<::L0::CommandListCoreFamily<gfxCoreFamily>>>();
auto result = pCommandList->initialize(device, NEO::EngineGroupType::compute, 0);
ASSERT_EQ(ZE_RESULT_SUCCESS, result);
kernel.immutableData.kernelDescriptor->kernelAttributes.flags.hasRTCalls = true;
neoDevice->rtMemoryBackedBuffer = buffer1;
CmdListKernelLaunchParams launchParams = {};
result = pCommandList->appendLaunchKernel(kernel.toHandle(), groupCount, nullptr, 0, nullptr, launchParams, false);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
EXPECT_EQ(pCommandList->requiresDcFlushForDcMitigation, device->getProductHelper().isDcFlushMitigated());
neoDevice->rtMemoryBackedBuffer = nullptr;
}
using RayTracingCmdListTest = Test<RayTracingCmdListFixture>;
template <typename FamilyType>
@@ -1694,6 +1719,28 @@ HWTEST2_F(RayTracingCmdListTest,
ultCsr->isMadeResident(rtAllocation, residentCount);
}
HWTEST2_F(RayTracingCmdListTest,
givenDcFlushMitigationWhenRegularAppendLaunchKernelAndExecuteThenRegisterDcFlushForDcFlushMitigation,
RayTracingMatcher) {
DebugManagerStateRestore restorer;
debugManager.flags.AllowDcFlush.set(0);
auto ultCsr = static_cast<UltCommandStreamReceiver<FamilyType> *>(commandQueue->getCsr());
ze_group_count_t groupCount{1, 1, 1};
CmdListKernelLaunchParams launchParams = {};
auto result = commandList->appendLaunchKernel(kernel->toHandle(), groupCount, nullptr, 0, nullptr, launchParams, false);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
result = commandList->close();
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
ze_command_list_handle_t cmdListHandle = commandList->toHandle();
result = commandQueue->executeCommandLists(1, &cmdListHandle, nullptr, true, nullptr);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
EXPECT_EQ(ultCsr->registeredDcFlushForDcFlushMitigation, device->getProductHelper().isDcFlushMitigated());
}
HWTEST2_F(RayTracingCmdListTest,
givenRayTracingKernelWhenRegularCmdListExecutedAndImmediateExecutedAgainThenDispatch3dBtdCommandOnceMakeResidentTwiceAndPipeControlWithStateCacheFlushAfterWalker,
RayTracingMatcher) {
@@ -1796,6 +1843,22 @@ HWTEST2_F(RayTracingCmdListTest,
ultCsr->isMadeResident(rtAllocation, residentCount);
}
HWTEST2_F(RayTracingCmdListTest,
givenDcFlushMitigationWhenImmediateAppendLaunchKernelThenRegisterDcFlushForDcFlushMitigation,
RayTracingMatcher) {
DebugManagerStateRestore restorer;
debugManager.flags.AllowDcFlush.set(0);
commandListImmediate->isSyncModeQueue = true;
auto ultCsr = static_cast<UltCommandStreamReceiver<FamilyType> *>(commandQueue->getCsr());
ze_group_count_t groupCount{1, 1, 1};
CmdListKernelLaunchParams launchParams = {};
auto result = commandListImmediate->appendLaunchKernel(kernel->toHandle(), groupCount, nullptr, 0, nullptr, launchParams, false);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
EXPECT_EQ(ultCsr->registeredDcFlushForDcFlushMitigation, device->getProductHelper().isDcFlushMitigated());
}
HWTEST2_F(RayTracingCmdListTest,
givenRayTracingKernelWhenImmediateCmdListExecutedAndRegularExecutedAgainThenDispatch3dBtdCommandOnceMakeResidentTwiceAndPipeControlWithStateCacheFlushAfterWalker,
RayTracingMatcher) {