fix: invalidate texture cache for BCS image write

Related-To: NEO-15029

If BCS is writing new data to an image, texture cache
should be invalidated as it might contain stale data for
that image.

Signed-off-by: Szymon Morek <szymon.morek@intel.com>
This commit is contained in:
Szymon Morek
2025-06-13 11:39:26 +00:00
committed by Compute-Runtime-Automation
parent ff380058de
commit ac7d936942
11 changed files with 268 additions and 24 deletions

View File

@@ -1596,6 +1596,166 @@ HWTEST_F(CommandQueueHwTest, GivenBuiltinKernelWhenBuiltinDispatchInfoBuilderIsP
EXPECT_EQ(builder.paramsToUse.kernel, dispatchInfo->getKernel());
}
struct ImageTextureCacheFlushTest : public CommandQueueHwBlitTest<false> {
void SetUp() override {
REQUIRE_IMAGES_OR_SKIP(defaultHwInfo);
MockExecutionEnvironment mockExecutionEnvironment{};
auto &productHelper = mockExecutionEnvironment.rootDeviceEnvironments[rootDeviceIndex]->getHelper<ProductHelper>();
if (!productHelper.isBlitterForImagesSupported() || !productHelper.blitEnqueuePreferred(false)) {
GTEST_SKIP();
}
CommandQueueHwBlitTest<false>::SetUp();
debugManager.flags.ForceCacheFlushForBcs.set(0);
}
void TearDown() override {
if (IsSkipped()) {
return;
}
CommandQueueHwBlitTest<false>::TearDown();
}
template <typename FamilyType>
void submitKernel(bool usingImages) {
MockKernelWithInternals kernelInternals(*pClDevice, context);
kernelInternals.mockKernel->usingImages = usingImages;
Kernel *kernel = kernelInternals.mockKernel;
MockMultiDispatchInfo multiDispatchInfo(pClDevice, kernel);
auto mockCmdQ = static_cast<MockCommandQueueHw<FamilyType> *>(this->pCmdQ);
auto enqueueResult = mockCmdQ->template enqueueHandler<CL_COMMAND_NDRANGE_KERNEL>(nullptr, 0, false, multiDispatchInfo, 0, nullptr, nullptr);
EXPECT_EQ(CL_SUCCESS, enqueueResult);
}
DebugManagerStateRestore restorer;
};
HWTEST_F(ImageTextureCacheFlushTest, givenTextureCacheFlushNotRequiredWhenEnqueueWriteImageThenNoCacheFlushSubmitted) {
using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL;
if (pCmdQ->getTimestampPacketContainer() == nullptr) {
GTEST_SKIP();
}
std::unique_ptr<Image> dstImage(ImageHelperUlt<ImageUseHostPtr<Image2dDefaults>>::create(context));
auto imageDesc = dstImage->getImageDesc();
size_t origin[] = {0, 0, 0};
size_t region[] = {imageDesc.image_width, imageDesc.image_height, 0};
char ptr[1] = {};
submitKernel<FamilyType>(false);
auto cmdQStart = pCmdQ->getCS(0).getUsed();
auto status = pCmdQ->enqueueWriteImage(dstImage.get(),
CL_FALSE,
origin,
region,
0,
0,
ptr,
nullptr,
0,
0,
nullptr);
EXPECT_EQ(CL_SUCCESS, status);
LinearStream &cmdQStream = pCmdQ->getCS(0);
HardwareParse ccsHwParser;
ccsHwParser.parseCommands<FamilyType>(cmdQStream, cmdQStart);
auto pipeControls = findAll<PIPE_CONTROL *>(ccsHwParser.cmdList.begin(), ccsHwParser.cmdList.end());
EXPECT_TRUE(pipeControls.empty());
EXPECT_EQ(CL_SUCCESS, pCmdQ->finish());
}
HWTEST_F(ImageTextureCacheFlushTest, givenTextureCacheFlushRequiredWhenEnqueueReadImageThenNoCacheFlushSubmitted) {
using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL;
if (pCmdQ->getTimestampPacketContainer() == nullptr) {
GTEST_SKIP();
}
std::unique_ptr<Image> srcImage(ImageHelperUlt<ImageUseHostPtr<Image2dDefaults>>::create(context));
auto imageDesc = srcImage->getImageDesc();
size_t origin[] = {0, 0, 0};
size_t region[] = {imageDesc.image_width, imageDesc.image_height, 0};
char ptr[1] = {};
submitKernel<FamilyType>(true);
auto cmdQStart = pCmdQ->getCS(0).getUsed();
auto status = pCmdQ->enqueueReadImage(srcImage.get(),
CL_FALSE,
origin,
region,
0,
0,
ptr,
nullptr,
0,
0,
nullptr);
EXPECT_EQ(CL_SUCCESS, status);
LinearStream &cmdQStream = pCmdQ->getCS(0);
HardwareParse ccsHwParser;
ccsHwParser.parseCommands<FamilyType>(cmdQStream, cmdQStart);
auto pipeControls = findAll<PIPE_CONTROL *>(ccsHwParser.cmdList.begin(), ccsHwParser.cmdList.end());
EXPECT_TRUE(pipeControls.empty());
EXPECT_EQ(CL_SUCCESS, pCmdQ->finish());
}
HWTEST_F(ImageTextureCacheFlushTest, givenTextureCacheFlushRequiredWhenEnqueueWriteImageThenCacheFlushSubmitted) {
using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL;
if (pCmdQ->getTimestampPacketContainer() == nullptr) {
GTEST_SKIP();
}
std::unique_ptr<Image> dstImage(ImageHelperUlt<ImageUseHostPtr<Image2dDefaults>>::create(context));
auto imageDesc = dstImage->getImageDesc();
size_t origin[] = {0, 0, 0};
size_t region[] = {imageDesc.image_width, imageDesc.image_height, 0};
char ptr[1] = {};
submitKernel<FamilyType>(true);
auto cmdQStart = pCmdQ->getCS(0).getUsed();
auto status = pCmdQ->enqueueWriteImage(dstImage.get(),
CL_FALSE,
origin,
region,
0,
0,
ptr,
nullptr,
0,
0,
nullptr);
EXPECT_EQ(CL_SUCCESS, status);
LinearStream &cmdQStream = pCmdQ->getCS(0);
HardwareParse ccsHwParser;
ccsHwParser.parseCommands<FamilyType>(cmdQStream, cmdQStart);
bool isPipeControlWithTextureCacheFlush = false;
auto pipeControls = findAll<PIPE_CONTROL *>(ccsHwParser.cmdList.begin(), ccsHwParser.cmdList.end());
EXPECT_FALSE(pipeControls.empty());
for (auto pipeControlIter : pipeControls) {
auto pipeControlCmd = genCmdCast<PIPE_CONTROL *>(*pipeControlIter);
if (0u == pipeControlCmd->getImmediateData() &&
PIPE_CONTROL::POST_SYNC_OPERATION::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA == pipeControlCmd->getPostSyncOperation() &&
pipeControlCmd->getTextureCacheInvalidationEnable()) {
isPipeControlWithTextureCacheFlush = true;
break;
}
}
EXPECT_TRUE(isPipeControlWithTextureCacheFlush);
EXPECT_EQ(CL_SUCCESS, pCmdQ->finish());
}
HWTEST_F(IoqCommandQueueHwBlitTest, givenImageWithHostPtrWhenCreateImageThenStopRegularBcs) {
REQUIRE_IMAGES_OR_SKIP(defaultHwInfo);
auto &engine = pDevice->getEngine(aub_stream::EngineType::ENGINE_BCS, EngineUsage::regular);

View File

@@ -450,7 +450,7 @@ HWTEST_F(CommandQueueCommandStreamTest, WhenCheckIsTextureCacheFlushNeededThenRe
MockCommandQueue cmdQ(&context, mockDevice.get(), 0, false);
auto &commandStreamReceiver = mockDevice->getUltCommandStreamReceiver<FamilyType>();
std::set<cl_command_type> typesToFlush = {CL_COMMAND_COPY_IMAGE, CL_COMMAND_WRITE_IMAGE, CL_COMMAND_FILL_IMAGE,
std::set<cl_command_type> typesToFlush = {CL_COMMAND_COPY_IMAGE, CL_COMMAND_WRITE_IMAGE, CL_COMMAND_FILL_IMAGE, CL_COMMAND_COPY_BUFFER_TO_IMAGE,
CL_COMMAND_READ_IMAGE, CL_COMMAND_COPY_IMAGE_TO_BUFFER};
for (auto operation = CL_COMMAND_NDRANGE_KERNEL; operation < CL_COMMAND_SVM_MIGRATE_MEM; operation++) {
if (typesToFlush.find(operation) != typesToFlush.end()) {
@@ -3480,11 +3480,11 @@ HWTEST_F(CommandQueueTests, GivenOOQCommandQueueWhenIsGpgpuSubmissionForBcsRequi
mockCmdQ->overrideIsCacheFlushForBcsRequired.returnValue = true;
TimestampPacketDependencies dependencies{};
auto containsCrossEngineDependency = false;
EXPECT_TRUE(mockCmdQ->isGpgpuSubmissionForBcsRequired(false, dependencies, containsCrossEngineDependency));
EXPECT_TRUE(mockCmdQ->isGpgpuSubmissionForBcsRequired(false, dependencies, containsCrossEngineDependency, false));
mockCmdQ->setOoqEnabled();
EXPECT_FALSE(mockCmdQ->isGpgpuSubmissionForBcsRequired(false, dependencies, containsCrossEngineDependency));
EXPECT_FALSE(mockCmdQ->isGpgpuSubmissionForBcsRequired(false, dependencies, containsCrossEngineDependency, false));
containsCrossEngineDependency = true;
EXPECT_TRUE(mockCmdQ->isGpgpuSubmissionForBcsRequired(false, dependencies, containsCrossEngineDependency));
EXPECT_TRUE(mockCmdQ->isGpgpuSubmissionForBcsRequired(false, dependencies, containsCrossEngineDependency, false));
}

View File

@@ -687,6 +687,72 @@ HWTEST_F(EnqueueHandlerTest, givenEnqueueHandlerWhenNotSupportedPolicyChangeThen
mockCmdQ->release();
}
HWTEST_F(EnqueueHandlerTest, givenEnqueueHandlerWhenImageNotUsedInKernelThenFlagCleared) {
MockKernelWithInternals kernelInternals(*pClDevice, context);
Kernel *kernel = kernelInternals.mockKernel;
MockMultiDispatchInfo multiDispatchInfo(pClDevice, kernel);
auto mockCmdQ = std::unique_ptr<MockCommandQueueHw<FamilyType>>(new MockCommandQueueHw<FamilyType>(context, pClDevice, 0));
const auto enqueueResult = mockCmdQ->template enqueueHandler<CL_COMMAND_NDRANGE_KERNEL>(nullptr,
0,
false,
multiDispatchInfo,
0,
nullptr,
nullptr);
EXPECT_EQ(CL_SUCCESS, enqueueResult);
EXPECT_FALSE(mockCmdQ->isCacheFlushForImageRequired(CL_COMMAND_WRITE_IMAGE));
}
HWTEST_F(EnqueueHandlerTest, givenEnqueueHandlerWhenImageUsedInKernelThenFlagSet) {
MockKernelWithInternals kernelInternals(*pClDevice, context);
kernelInternals.mockKernel->usingImages = true;
Kernel *kernel = kernelInternals.mockKernel;
MockMultiDispatchInfo multiDispatchInfo(pClDevice, kernel);
auto mockCmdQ = std::unique_ptr<MockCommandQueueHw<FamilyType>>(new MockCommandQueueHw<FamilyType>(context, pClDevice, 0));
const auto enqueueResult = mockCmdQ->template enqueueHandler<CL_COMMAND_NDRANGE_KERNEL>(nullptr,
0,
false,
multiDispatchInfo,
0,
nullptr,
nullptr);
EXPECT_EQ(CL_SUCCESS, enqueueResult);
EXPECT_TRUE(mockCmdQ->isCacheFlushForImageRequired(CL_COMMAND_WRITE_IMAGE));
}
HWTEST_F(EnqueueHandlerTest, givenEnqueueHandlerWhenImageUsedInKernelThenGetTotalSizeAdjusted) {
DebugManagerStateRestore restorer;
debugManager.flags.ForceCacheFlushForBcs.set(0);
MockKernelWithInternals kernelInternals(*pClDevice, context);
kernelInternals.mockKernel->usingImages = true;
Kernel *kernel = kernelInternals.mockKernel;
MockMultiDispatchInfo multiDispatchInfo(pClDevice, kernel);
auto defaultBcsCacheFlushSize = TimestampPacketHelper::getRequiredCmdStreamSizeForNodeDependencyWithBlitEnqueue<FamilyType>();
auto cacheFlushCmdSize = MemorySynchronizationCommands<FamilyType>::getSizeForBarrierWithPostSyncOperation(pClDevice->getRootDeviceEnvironment(), NEO::PostSyncMode::immediateData);
auto mockCmdQ = std::unique_ptr<MockCommandQueueHw<FamilyType>>(new MockCommandQueueHw<FamilyType>(context, pClDevice, 0));
auto sizeForBcsCacheFlush = EnqueueOperation<FamilyType>::getTotalSizeRequiredCS(CL_COMMAND_WRITE_IMAGE, {}, false, false, true, *mockCmdQ,
multiDispatchInfo, false, false, false, nullptr);
EXPECT_EQ(defaultBcsCacheFlushSize, sizeForBcsCacheFlush);
const auto enqueueResult = mockCmdQ->template enqueueHandler<CL_COMMAND_NDRANGE_KERNEL>(nullptr,
0,
false,
multiDispatchInfo,
0,
nullptr,
nullptr);
EXPECT_EQ(CL_SUCCESS, enqueueResult);
sizeForBcsCacheFlush = EnqueueOperation<FamilyType>::getTotalSizeRequiredCS(CL_COMMAND_WRITE_IMAGE, {}, false, false, true, *mockCmdQ,
multiDispatchInfo, false, false, false, nullptr);
EXPECT_EQ(defaultBcsCacheFlushSize + cacheFlushCmdSize, sizeForBcsCacheFlush);
}
HEAPFUL_HWTEST_F(EnqueueHandlerTest, givenKernelUsingSyncBufferWhenEnqueuingKernelThenSshIsCorrectlyProgrammed) {
using BINDING_TABLE_STATE = typename FamilyType::BINDING_TABLE_STATE;
using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE;