fix: Add texture cache flush after kernels writing to images on regular cmdlist

- Add `needsTextureCacheFlushOnBarrier` flag to CommandList to track if
a texture cache flush is needed
- Set the flag when appending such kernel to on regular (out-of-order)
command list
- Program texture cache flush in the next barrier and clear the flag
after the flush is programmed
- Clear the flag on command list reset
- Add unit tests

Related-To: NEO-14713
Signed-off-by: Fabian Zwoliński <fabian.zwolinski@intel.com>
This commit is contained in:
Fabian Zwoliński
2025-04-30 14:07:27 +00:00
committed by Compute-Runtime-Automation
parent aaaea67557
commit 7db6402df9
4 changed files with 83 additions and 3 deletions

View File

@@ -416,6 +416,14 @@ struct CommandList : _ze_command_list_handle_t {
return statelessBuiltinsEnabled;
}
void setNeedsTextureCacheFlushOnBarrier(bool value) {
needsTextureCacheFlushOnBarrier = value;
}
bool isTextureCacheFlushOnBarrierNeeded() const {
return needsTextureCacheFlushOnBarrier;
}
void registerCsrDcFlushForDcMitigation(NEO::CommandStreamReceiver &csr);
NEO::EngineGroupType getEngineGroupType() const {
@@ -525,6 +533,7 @@ struct CommandList : _ze_command_list_handle_t {
bool statelessBuiltinsEnabled = false;
bool localDispatchSupport = false;
bool l3FlushAfterPostSyncRequired = false;
bool needsTextureCacheFlushOnBarrier = false;
bool closedCmdList = false;
};

View File

@@ -153,6 +153,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::reset() {
latestOperationRequiredNonWalkerInOrderCmdsChaining = false;
taskCountUpdateFenceRequired = false;
needsTextureCacheFlushOnBarrier = false;
closedCmdList = false;
this->inOrderPatchCmds.clear();

View File

@@ -459,9 +459,15 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelWithParams(K
}
}
bool textureFlushRequired = this->device->getProductHelper().isPostImageWriteFlushRequired() &&
this->isImmediateType() &&
kernelInfo->kernelDescriptor.kernelAttributes.hasImageWriteArg;
bool textureFlushRequired = false;
if (this->device->getProductHelper().isPostImageWriteFlushRequired() &&
kernelInfo->kernelDescriptor.kernelAttributes.hasImageWriteArg) {
if (this->isImmediateType()) {
textureFlushRequired = true;
} else if (!this->inOrderExecInfo) {
this->setNeedsTextureCacheFlushOnBarrier(true);
}
}
if (inOrderExecSignalRequired) {
if (inOrderNonWalkerSignalling) {
@@ -605,6 +611,12 @@ NEO::PipeControlArgs CommandListCoreFamily<gfxCoreFamily>::createBarrierFlags()
NEO::PipeControlArgs args;
args.hdcPipelineFlush = true;
args.unTypedDataPortCacheFlush = true;
if (this->isTextureCacheFlushOnBarrierNeeded()) {
args.textureCacheInvalidationEnable = true;
this->setNeedsTextureCacheFlushOnBarrier(false);
}
return args;
}

View File

@@ -1545,6 +1545,64 @@ HWTEST_F(CommandListAppendLaunchKernel, GivenImmCmdListAndKernelWithImageWriteAr
EXPECT_TRUE(cmd->getTextureCacheInvalidationEnable());
}
HWTEST2_F(CommandListAppendLaunchKernel, GivenRegularCommandListAndOutOfOrderExecutionWhenKernelWithImageWriteIsAppendedThenBarrierContainsTextureCacheFlush, IsXeHpgCore) {
using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL;
auto kernel = std::make_unique<Mock<KernelImp>>();
kernel->module = module.get();
kernel->immutableData.kernelInfo->kernelDescriptor.kernelAttributes.hasImageWriteArg = true;
ze_group_count_t groupCount{1, 1, 1};
ze_result_t returnValue;
ze_command_list_flags_t flags = ZE_COMMAND_LIST_FLAG_RELAXED_ORDERING;
std::unique_ptr<L0::CommandList> commandList(CommandList::create(productFamily, device, NEO::EngineGroupType::renderCompute, flags, returnValue, false));
auto usedSpaceBefore = commandList->getCmdContainer().getCommandStream()->getUsed();
CmdListKernelLaunchParams launchParams = {};
auto result = commandList->appendLaunchKernel(kernel->toHandle(), groupCount, nullptr, 0, nullptr, launchParams, false);
ASSERT_EQ(ZE_RESULT_SUCCESS, result);
EXPECT_TRUE(commandList->isTextureCacheFlushOnBarrierNeeded());
auto usedSpaceAfter = commandList->getCmdContainer().getCommandStream()->getUsed();
EXPECT_GT(usedSpaceAfter, usedSpaceBefore);
usedSpaceBefore = commandList->getCmdContainer().getCommandStream()->getUsed();
result = commandList->appendBarrier(nullptr, 0, nullptr, false);
ASSERT_EQ(ZE_RESULT_SUCCESS, result);
EXPECT_FALSE(commandList->isTextureCacheFlushOnBarrierNeeded());
usedSpaceAfter = commandList->getCmdContainer().getCommandStream()->getUsed();
EXPECT_GT(usedSpaceAfter, usedSpaceBefore);
GenCmdList cmdList;
EXPECT_TRUE(FamilyType::Parse::parseCommandBuffer(
cmdList, ptrOffset(commandList->getCmdContainer().getCommandStream()->getCpuBase(), 0), usedSpaceAfter));
auto itorPC = findAll<PIPE_CONTROL *>(cmdList.begin(), cmdList.end());
ASSERT_NE(0u, itorPC.size());
PIPE_CONTROL *cmd = genCmdCast<PIPE_CONTROL *>(*itorPC[itorPC.size() - 1]);
EXPECT_TRUE(cmd->getTextureCacheInvalidationEnable());
}
HWTEST2_F(CommandListAppendLaunchKernel, whenResettingRegularCommandListThenTextureCacheFlushOnBarrierNeededStateIsCleared, IsXeHpgCore) {
auto kernel = std::make_unique<Mock<KernelImp>>();
kernel->module = module.get();
kernel->immutableData.kernelInfo->kernelDescriptor.kernelAttributes.hasImageWriteArg = true;
ze_group_count_t groupCount{1, 1, 1};
ze_result_t returnValue;
ze_command_list_flags_t flags = ZE_COMMAND_LIST_FLAG_RELAXED_ORDERING;
std::unique_ptr<L0::CommandList> commandList(CommandList::create(productFamily, device, NEO::EngineGroupType::renderCompute, flags, returnValue, false));
CmdListKernelLaunchParams launchParams = {};
auto result = commandList->appendLaunchKernel(kernel->toHandle(), groupCount, nullptr, 0, nullptr, launchParams, false);
ASSERT_EQ(ZE_RESULT_SUCCESS, result);
EXPECT_TRUE(commandList->isTextureCacheFlushOnBarrierNeeded());
commandList->reset();
EXPECT_FALSE(commandList->isTextureCacheFlushOnBarrierNeeded());
}
template <GFXCORE_FAMILY gfxCoreFamily>
struct MockCommandListCoreFamilyWithoutHeapSupport : public WhiteBox<::L0::CommandListCoreFamily<gfxCoreFamily>> {
using BaseClass = WhiteBox<::L0::CommandListCoreFamily<gfxCoreFamily>>;