mirror of
https://github.com/intel/compute-runtime.git
synced 2025-12-18 22:08:53 +08:00
fix: Add infrastructure to force dc flush when mitigate dc
-force dc on next tag update after RT kernel -force dc when release shared object Related-To: NEO-10556 Signed-off-by: Lukasz Jobczyk <lukasz.jobczyk@intel.com>
This commit is contained in:
committed by
Compute-Runtime-Automation
parent
773da10099
commit
fed90f5c8e
@@ -89,6 +89,13 @@ void CommandList::removeMemoryPrefetchAllocations() {
|
||||
}
|
||||
}
|
||||
|
||||
void CommandList::registerCsrDcFlushForDcMitigation(NEO::CommandStreamReceiver &csr) {
|
||||
if (this->requiresDcFlushForDcMitigation) {
|
||||
csr.registerDcFlushForDcMitigation();
|
||||
this->requiresDcFlushForDcMitigation = false;
|
||||
}
|
||||
}
|
||||
|
||||
NEO::GraphicsAllocation *CommandList::getAllocationFromHostPtrMap(const void *buffer, uint64_t bufferSize, bool copyOffload) {
|
||||
auto allocation = hostPtrMap.lower_bound(buffer);
|
||||
if (allocation != hostPtrMap.end()) {
|
||||
|
||||
@@ -369,6 +369,8 @@ struct CommandList : _ze_command_list_handle_t {
|
||||
return taskCountUpdateFenceRequired;
|
||||
}
|
||||
|
||||
void registerCsrDcFlushForDcMitigation(NEO::CommandStreamReceiver &csr);
|
||||
|
||||
protected:
|
||||
NEO::GraphicsAllocation *getAllocationFromHostPtrMap(const void *buffer, uint64_t bufferSize, bool copyOffload);
|
||||
NEO::GraphicsAllocation *getHostPtrAlloc(const void *buffer, uint64_t bufferSize, bool hostCopyAllowed, bool copyOffload);
|
||||
@@ -455,6 +457,7 @@ struct CommandList : _ze_command_list_handle_t {
|
||||
bool heaplessStateInitEnabled = false;
|
||||
bool scratchAddressPatchingEnabled = false;
|
||||
bool taskCountUpdateFenceRequired = false;
|
||||
bool requiresDcFlushForDcMitigation = false;
|
||||
};
|
||||
|
||||
using CommandListAllocatorFn = CommandList *(*)(uint32_t);
|
||||
|
||||
@@ -434,6 +434,7 @@ inline ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::executeCommand
|
||||
if (cmdQ->peekIsCopyOnlyCommandQueue()) {
|
||||
completionStamp = flushBcsTask(*commandStream, commandStreamStart, hasStallingCmds, hasRelaxedOrderingDependencies, csr);
|
||||
} else {
|
||||
this->registerCsrDcFlushForDcMitigation(*csr);
|
||||
completionStamp = (this->*computeFlushMethod)(*commandStream, commandStreamStart, hasStallingCmds, hasRelaxedOrderingDependencies, kernelOperation);
|
||||
}
|
||||
|
||||
|
||||
@@ -454,6 +454,10 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelWithParams(K
|
||||
}
|
||||
|
||||
if (kernelImp->usesRayTracing()) {
|
||||
if (this->device->getProductHelper().isDcFlushMitigated()) {
|
||||
this->requiresDcFlushForDcMitigation = true;
|
||||
}
|
||||
|
||||
NEO::PipeControlArgs args{};
|
||||
args.stateCacheInvalidationEnable = true;
|
||||
NEO::MemorySynchronizationCommands<GfxFamily>::addSingleBarrier(*commandContainer.getCommandStream(), args);
|
||||
|
||||
@@ -764,6 +764,8 @@ void CommandQueueHw<gfxCoreFamily>::setupCmdListsAndContextParams(
|
||||
this->partitionCount = std::max(this->partitionCount, commandList->getPartitionCount());
|
||||
|
||||
ctx.cmdListScratchAddressPatchingEnabled |= commandList->getCmdListScratchAddressPatchingEnabled();
|
||||
|
||||
commandList->registerCsrDcFlushForDcMitigation(*this->getCsr());
|
||||
}
|
||||
|
||||
makeResidentAndMigrate(ctx.isMigrationRequested, commandContainer.getResidencyContainer());
|
||||
@@ -1307,6 +1309,7 @@ void CommandQueueHw<gfxCoreFamily>::dispatchTaskCountPostSyncRegular(
|
||||
|
||||
NEO::PipeControlArgs args;
|
||||
args.dcFlushEnable = this->csr->getDcFlushSupport();
|
||||
args.dcFlushEnable |= this->csr->checkDcFlushRequiredForDcMitigationAndReset();
|
||||
args.workloadPartitionOffset = this->partitionCount > 1;
|
||||
args.notifyEnable = this->csr->isUsedNotifyEnableForPostSync();
|
||||
NEO::MemorySynchronizationCommands<GfxFamily>::addBarrierWithPostSyncOperation(
|
||||
|
||||
@@ -105,6 +105,7 @@ struct WhiteBox<::L0::CommandListCoreFamily<gfxCoreFamily>>
|
||||
using BaseClass::pipeControlMultiKernelEventSync;
|
||||
using BaseClass::pipelineSelectStateTracking;
|
||||
using BaseClass::requiredStreamState;
|
||||
using BaseClass::requiresDcFlushForDcMitigation;
|
||||
using BaseClass::requiresQueueUncachedMocs;
|
||||
using BaseClass::scratchAddressPatchingEnabled;
|
||||
using BaseClass::setupTimestampEventForMultiTile;
|
||||
|
||||
@@ -1576,6 +1576,31 @@ HWTEST2_F(CommandListAppendLaunchRayTracingKernelTest, givenKernelUsingRayTracin
|
||||
neoDevice->rtMemoryBackedBuffer = nullptr;
|
||||
}
|
||||
|
||||
HWTEST2_F(CommandListAppendLaunchRayTracingKernelTest, givenDcFlushMitigationWhenAppendLaunchKernelWithRayTracingIsCalledThenRequireDcFlush, RayTracingMatcher) {
|
||||
DebugManagerStateRestore restorer;
|
||||
debugManager.flags.AllowDcFlush.set(0);
|
||||
|
||||
Mock<::L0::KernelImp> kernel;
|
||||
auto pMockModule = std::unique_ptr<Module>(new Mock<Module>(device, nullptr));
|
||||
kernel.module = pMockModule.get();
|
||||
|
||||
kernel.setGroupSize(4, 1, 1);
|
||||
ze_group_count_t groupCount{8, 1, 1};
|
||||
auto pCommandList = std::make_unique<WhiteBox<::L0::CommandListCoreFamily<gfxCoreFamily>>>();
|
||||
auto result = pCommandList->initialize(device, NEO::EngineGroupType::compute, 0);
|
||||
ASSERT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
|
||||
kernel.immutableData.kernelDescriptor->kernelAttributes.flags.hasRTCalls = true;
|
||||
neoDevice->rtMemoryBackedBuffer = buffer1;
|
||||
CmdListKernelLaunchParams launchParams = {};
|
||||
|
||||
result = pCommandList->appendLaunchKernel(kernel.toHandle(), groupCount, nullptr, 0, nullptr, launchParams, false);
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
EXPECT_EQ(pCommandList->requiresDcFlushForDcMitigation, device->getProductHelper().isDcFlushMitigated());
|
||||
|
||||
neoDevice->rtMemoryBackedBuffer = nullptr;
|
||||
}
|
||||
|
||||
using RayTracingCmdListTest = Test<RayTracingCmdListFixture>;
|
||||
|
||||
template <typename FamilyType>
|
||||
@@ -1694,6 +1719,28 @@ HWTEST2_F(RayTracingCmdListTest,
|
||||
ultCsr->isMadeResident(rtAllocation, residentCount);
|
||||
}
|
||||
|
||||
HWTEST2_F(RayTracingCmdListTest,
|
||||
givenDcFlushMitigationWhenRegularAppendLaunchKernelAndExecuteThenRegisterDcFlushForDcFlushMitigation,
|
||||
RayTracingMatcher) {
|
||||
DebugManagerStateRestore restorer;
|
||||
debugManager.flags.AllowDcFlush.set(0);
|
||||
|
||||
auto ultCsr = static_cast<UltCommandStreamReceiver<FamilyType> *>(commandQueue->getCsr());
|
||||
|
||||
ze_group_count_t groupCount{1, 1, 1};
|
||||
CmdListKernelLaunchParams launchParams = {};
|
||||
auto result = commandList->appendLaunchKernel(kernel->toHandle(), groupCount, nullptr, 0, nullptr, launchParams, false);
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
result = commandList->close();
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
|
||||
ze_command_list_handle_t cmdListHandle = commandList->toHandle();
|
||||
result = commandQueue->executeCommandLists(1, &cmdListHandle, nullptr, true, nullptr);
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
|
||||
EXPECT_EQ(ultCsr->registeredDcFlushForDcFlushMitigation, device->getProductHelper().isDcFlushMitigated());
|
||||
}
|
||||
|
||||
HWTEST2_F(RayTracingCmdListTest,
|
||||
givenRayTracingKernelWhenRegularCmdListExecutedAndImmediateExecutedAgainThenDispatch3dBtdCommandOnceMakeResidentTwiceAndPipeControlWithStateCacheFlushAfterWalker,
|
||||
RayTracingMatcher) {
|
||||
@@ -1796,6 +1843,22 @@ HWTEST2_F(RayTracingCmdListTest,
|
||||
ultCsr->isMadeResident(rtAllocation, residentCount);
|
||||
}
|
||||
|
||||
HWTEST2_F(RayTracingCmdListTest,
|
||||
givenDcFlushMitigationWhenImmediateAppendLaunchKernelThenRegisterDcFlushForDcFlushMitigation,
|
||||
RayTracingMatcher) {
|
||||
DebugManagerStateRestore restorer;
|
||||
debugManager.flags.AllowDcFlush.set(0);
|
||||
|
||||
commandListImmediate->isSyncModeQueue = true;
|
||||
auto ultCsr = static_cast<UltCommandStreamReceiver<FamilyType> *>(commandQueue->getCsr());
|
||||
|
||||
ze_group_count_t groupCount{1, 1, 1};
|
||||
CmdListKernelLaunchParams launchParams = {};
|
||||
auto result = commandListImmediate->appendLaunchKernel(kernel->toHandle(), groupCount, nullptr, 0, nullptr, launchParams, false);
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
EXPECT_EQ(ultCsr->registeredDcFlushForDcFlushMitigation, device->getProductHelper().isDcFlushMitigated());
|
||||
}
|
||||
|
||||
HWTEST2_F(RayTracingCmdListTest,
|
||||
givenRayTracingKernelWhenImmediateCmdListExecutedAndRegularExecutedAgainThenDispatch3dBtdCommandOnceMakeResidentTwiceAndPipeControlWithStateCacheFlushAfterWalker,
|
||||
RayTracingMatcher) {
|
||||
|
||||
@@ -633,8 +633,13 @@ cl_int CommandQueue::enqueueReleaseSharedObjects(cl_uint numObjects, const cl_me
|
||||
memObject->acquireCount--;
|
||||
}
|
||||
|
||||
if (isImageReleased && this->getGpgpuCommandStreamReceiver().isDirectSubmissionEnabled()) {
|
||||
if (this->getGpgpuCommandStreamReceiver().isDirectSubmissionEnabled()) {
|
||||
if (this->getDevice().getProductHelper().isDcFlushMitigated()) {
|
||||
this->getGpgpuCommandStreamReceiver().registerDcFlushForDcMitigation();
|
||||
this->getGpgpuCommandStreamReceiver().sendRenderStateCacheFlush();
|
||||
} else if (isImageReleased) {
|
||||
this->getGpgpuCommandStreamReceiver().sendRenderStateCacheFlush();
|
||||
}
|
||||
}
|
||||
|
||||
auto status = enqueueMarkerWithWaitList(
|
||||
|
||||
@@ -1368,6 +1368,38 @@ HWTEST_F(CommandQueueTests, givenDirectSubmissionAndSharedImageWhenReleasingShar
|
||||
result = cmdQ.enqueueReleaseSharedObjects(numObjects, memObjects, 0, nullptr, nullptr, 0);
|
||||
EXPECT_EQ(result, CL_SUCCESS);
|
||||
EXPECT_TRUE(ultCsr->renderStateCacheFlushed);
|
||||
EXPECT_EQ(ultCsr->renderStateCacheDcFlushForced, context.getDevice(0)->getProductHelper().isDcFlushMitigated());
|
||||
}
|
||||
|
||||
HWTEST_F(CommandQueueTests, givenDcFlushMitigationAndDirectSubmissionAndBufferWhenReleasingSharedObjectThenFlushRenderStateCacheAndForceDcFlush) {
|
||||
DebugManagerStateRestore restorer;
|
||||
debugManager.flags.AllowDcFlush.set(0);
|
||||
|
||||
MockContext context;
|
||||
MockCommandQueue cmdQ(&context, context.getDevice(0), 0, false);
|
||||
MockSharingHandler *mockSharingHandler = new MockSharingHandler;
|
||||
|
||||
auto buffer = std::unique_ptr<Buffer>(BufferHelper<>::create(&context));
|
||||
buffer->setSharingHandler(mockSharingHandler);
|
||||
buffer->getGraphicsAllocation(0u)->setAllocationType(AllocationType::sharedBuffer);
|
||||
|
||||
cl_mem memObject = buffer.get();
|
||||
cl_uint numObjects = 1;
|
||||
cl_mem *memObjects = &memObject;
|
||||
|
||||
cl_int result = cmdQ.enqueueAcquireSharedObjects(numObjects, memObjects, 0, nullptr, nullptr, 0);
|
||||
EXPECT_EQ(result, CL_SUCCESS);
|
||||
|
||||
auto ultCsr = static_cast<UltCommandStreamReceiver<FamilyType> *>(&cmdQ.getGpgpuCommandStreamReceiver());
|
||||
ultCsr->directSubmissionAvailable = true;
|
||||
ultCsr->callBaseSendRenderStateCacheFlush = false;
|
||||
ultCsr->flushReturnValue = SubmissionStatus::success;
|
||||
EXPECT_FALSE(ultCsr->renderStateCacheFlushed);
|
||||
|
||||
result = cmdQ.enqueueReleaseSharedObjects(numObjects, memObjects, 0, nullptr, nullptr, 0);
|
||||
EXPECT_EQ(result, CL_SUCCESS);
|
||||
EXPECT_EQ(ultCsr->renderStateCacheFlushed, context.getDevice(0)->getProductHelper().isDcFlushMitigated());
|
||||
EXPECT_EQ(ultCsr->renderStateCacheDcFlushForced, context.getDevice(0)->getProductHelper().isDcFlushMitigated());
|
||||
}
|
||||
|
||||
TEST(CommandQueue, givenEnqueuesForSharedObjectsWithImageWhenUsingSharingHandlerWithEventThenReturnSuccess) {
|
||||
|
||||
@@ -329,6 +329,16 @@ class CommandStreamReceiver {
|
||||
requiresInstructionCacheFlush = true;
|
||||
}
|
||||
|
||||
MOCKABLE_VIRTUAL bool checkDcFlushRequiredForDcMitigationAndReset() {
|
||||
auto ret = this->requiresDcFlush;
|
||||
this->requiresDcFlush = false;
|
||||
return ret;
|
||||
}
|
||||
|
||||
void registerDcFlushForDcMitigation() {
|
||||
this->requiresDcFlush = true;
|
||||
}
|
||||
|
||||
bool isLocalMemoryEnabled() const { return localMemoryEnabled; }
|
||||
|
||||
uint32_t getRootDeviceIndex() const { return rootDeviceIndex; }
|
||||
@@ -637,6 +647,7 @@ class CommandStreamReceiver {
|
||||
bool nTo1SubmissionModelEnabled = false;
|
||||
bool lastSystolicPipelineSelectMode = false;
|
||||
bool requiresInstructionCacheFlush = false;
|
||||
bool requiresDcFlush = false;
|
||||
|
||||
bool localMemoryEnabled = false;
|
||||
bool pageTableManagerInitialized = false;
|
||||
|
||||
@@ -1206,7 +1206,7 @@ SubmissionStatus CommandStreamReceiverHw<GfxFamily>::flushPipeControl(bool state
|
||||
auto lock = obtainUniqueOwnership();
|
||||
|
||||
PipeControlArgs args;
|
||||
args.dcFlushEnable = this->dcFlushSupport;
|
||||
args.dcFlushEnable = this->dcFlushSupport || this->checkDcFlushRequiredForDcMitigationAndReset();
|
||||
args.notifyEnable = isUsedNotifyEnableForPostSync();
|
||||
args.workloadPartitionOffset = isMultiTileOperationEnabled();
|
||||
|
||||
@@ -1794,6 +1794,7 @@ inline void CommandStreamReceiverHw<GfxFamily>::processBarrierWithPostSync(Linea
|
||||
auto &rootDeviceEnvironment = this->peekRootDeviceEnvironment();
|
||||
|
||||
args.dcFlushEnable = getDcFlushRequired(dispatchFlags.dcFlush);
|
||||
args.dcFlushEnable |= this->checkDcFlushRequiredForDcMitigationAndReset();
|
||||
args.notifyEnable = isUsedNotifyEnableForPostSync();
|
||||
args.tlbInvalidation |= dispatchFlags.memoryMigrationRequired;
|
||||
args.textureCacheInvalidationEnable |= dispatchFlags.textureCacheFlush;
|
||||
@@ -2153,6 +2154,7 @@ void CommandStreamReceiverHw<GfxFamily>::dispatchImmediateFlushClientBufferComma
|
||||
|
||||
PipeControlArgs args = {};
|
||||
args.dcFlushEnable = this->dcFlushSupport;
|
||||
args.dcFlushEnable |= this->checkDcFlushRequiredForDcMitigationAndReset();
|
||||
args.notifyEnable = isUsedNotifyEnableForPostSync();
|
||||
args.workloadPartitionOffset = isMultiTileOperationEnabled();
|
||||
MemorySynchronizationCommands<GfxFamily>::addBarrierWithPostSyncOperation(
|
||||
|
||||
@@ -256,6 +256,11 @@ class UltCommandStreamReceiver : public CommandStreamReceiverHw<GfxFamily>, publ
|
||||
downloadAllocationCalled = true;
|
||||
}
|
||||
|
||||
bool checkDcFlushRequiredForDcMitigationAndReset() override {
|
||||
this->registeredDcFlushForDcFlushMitigation = this->requiresDcFlush;
|
||||
return BaseClass::checkDcFlushRequiredForDcMitigationAndReset();
|
||||
}
|
||||
|
||||
WaitStatus waitForCompletionWithTimeout(const WaitParams ¶ms, TaskCountType taskCountToWait) override {
|
||||
std::lock_guard<std::mutex> guard(mutex);
|
||||
latestWaitForCompletionWithTimeoutTaskCount.store(taskCountToWait);
|
||||
@@ -457,6 +462,7 @@ class UltCommandStreamReceiver : public CommandStreamReceiverHw<GfxFamily>, publ
|
||||
|
||||
SubmissionStatus sendRenderStateCacheFlush() override {
|
||||
this->renderStateCacheFlushed = true;
|
||||
this->renderStateCacheDcFlushForced = this->requiresDcFlush;
|
||||
if (callBaseSendRenderStateCacheFlush) {
|
||||
return BaseClass::sendRenderStateCacheFlush();
|
||||
}
|
||||
@@ -523,6 +529,7 @@ class UltCommandStreamReceiver : public CommandStreamReceiverHw<GfxFamily>, publ
|
||||
std::atomic<uint32_t> downloadAllocationsCalledCount = 0;
|
||||
|
||||
bool renderStateCacheFlushed = false;
|
||||
bool renderStateCacheDcFlushForced = false;
|
||||
bool cpuCopyForHostPtrSurfaceAllowed = false;
|
||||
bool createPageTableManagerCalled = false;
|
||||
bool recordFlusheBatchBuffer = false;
|
||||
@@ -551,6 +558,7 @@ class UltCommandStreamReceiver : public CommandStreamReceiverHw<GfxFamily>, publ
|
||||
bool isKmdWaitOnTaskCountAllowedValue = false;
|
||||
bool stopDirectSubmissionCalled = false;
|
||||
bool stopDirectSubmissionCalledBlocking = false;
|
||||
bool registeredDcFlushForDcFlushMitigation = false;
|
||||
};
|
||||
|
||||
} // namespace NEO
|
||||
|
||||
@@ -3440,6 +3440,37 @@ HWTEST_F(CommandStreamReceiverHwTest, givenFlushPipeControlWhenFlushWithStateCac
|
||||
EXPECT_TRUE(UnitTestHelper<FamilyType>::findStateCacheFlushPipeControl(commandStreamReceiver, commandStreamReceiver.commandStream));
|
||||
}
|
||||
|
||||
HWTEST_F(CommandStreamReceiverHwTest, givenDcFlushForcedWhenSendRenderStateCacheFlushThenExpectDcFlush) {
|
||||
using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL;
|
||||
auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver<FamilyType>();
|
||||
|
||||
commandStreamReceiver.registerDcFlushForDcMitigation();
|
||||
commandStreamReceiver.sendRenderStateCacheFlush();
|
||||
|
||||
HardwareParse hwParserCsr;
|
||||
hwParserCsr.parsePipeControl = true;
|
||||
hwParserCsr.parseCommands<FamilyType>(commandStreamReceiver.commandStream, 0);
|
||||
hwParserCsr.findHardwareCommands<FamilyType>();
|
||||
|
||||
bool stateCacheFlushFound = false;
|
||||
auto itorPipeControl = hwParserCsr.pipeControlList.begin();
|
||||
while (itorPipeControl != hwParserCsr.pipeControlList.end()) {
|
||||
auto pipeControl = reinterpret_cast<PIPE_CONTROL *>(*itorPipeControl);
|
||||
|
||||
if (pipeControl->getDcFlushEnable() &&
|
||||
pipeControl->getRenderTargetCacheFlushEnable() &&
|
||||
pipeControl->getStateCacheInvalidationEnable() &&
|
||||
pipeControl->getTextureCacheInvalidationEnable() &&
|
||||
((commandStreamReceiver.isTlbFlushRequiredForStateCacheFlush() && pipeControl->getTlbInvalidate()) || (!commandStreamReceiver.isTlbFlushRequiredForStateCacheFlush() && !pipeControl->getTlbInvalidate()))) {
|
||||
stateCacheFlushFound = true;
|
||||
break;
|
||||
}
|
||||
itorPipeControl++;
|
||||
}
|
||||
|
||||
EXPECT_TRUE(stateCacheFlushFound);
|
||||
}
|
||||
|
||||
HWTEST2_F(CommandStreamReceiverHwTest,
|
||||
givenRayTracingAllocationPresentWhenFlushingTaskThenDispatchBtdStateCommandOnceAndResidentAlways,
|
||||
IsAtLeastXeHpCore) {
|
||||
|
||||
Reference in New Issue
Block a user