fix: don't flush texture cache if not required

Related-To: NEO-14645

Signed-off-by: Szymon Morek <szymon.morek@intel.com>
This commit is contained in:
Szymon Morek
2025-04-15 13:39:15 +00:00
committed by Compute-Runtime-Automation
parent b423084861
commit 4d60465a9e
7 changed files with 41 additions and 7 deletions

View File

@@ -994,13 +994,15 @@ TaskCountType CommandQueue::peekBcsTaskCount(aub_stream::EngineType bcsEngineTyp
}
bool CommandQueue::isTextureCacheFlushNeeded(uint32_t commandType) const {
auto isDirectSubmissionEnabled = getGpgpuCommandStreamReceiver().isDirectSubmissionEnabled();
switch (commandType) {
case CL_COMMAND_COPY_IMAGE:
case CL_COMMAND_WRITE_IMAGE:
case CL_COMMAND_FILL_IMAGE:
return isDirectSubmissionEnabled;
case CL_COMMAND_READ_IMAGE:
case CL_COMMAND_COPY_IMAGE_TO_BUFFER:
return getGpgpuCommandStreamReceiver().isDirectSubmissionEnabled();
return isDirectSubmissionEnabled && getDevice().getGfxCoreHelper().isCacheFlushPriorImageReadRequired();
default:
return false;
}

View File

@@ -451,17 +451,24 @@ HWTEST_F(CommandQueueCommandStreamTest, WhenCheckIsTextureCacheFlushNeededThenRe
std::set<cl_command_type> typesToFlush = {CL_COMMAND_COPY_IMAGE, CL_COMMAND_WRITE_IMAGE, CL_COMMAND_FILL_IMAGE,
CL_COMMAND_READ_IMAGE, CL_COMMAND_COPY_IMAGE_TO_BUFFER};
for (auto i = CL_COMMAND_NDRANGE_KERNEL; i < CL_COMMAND_SVM_MIGRATE_MEM; i++) {
if (typesToFlush.find(i) != typesToFlush.end()) {
for (auto operation = CL_COMMAND_NDRANGE_KERNEL; operation < CL_COMMAND_SVM_MIGRATE_MEM; operation++) {
if (typesToFlush.find(operation) != typesToFlush.end()) {
commandStreamReceiver.directSubmissionAvailable = true;
EXPECT_TRUE(cmdQ.isTextureCacheFlushNeeded(i));
if (operation == CL_COMMAND_READ_IMAGE || operation == CL_COMMAND_COPY_IMAGE_TO_BUFFER) {
auto isCacheFlushPriorImageReadRequired = mockDevice->getGfxCoreHelper().isCacheFlushPriorImageReadRequired();
EXPECT_EQ(isCacheFlushPriorImageReadRequired, cmdQ.isTextureCacheFlushNeeded(operation));
} else {
EXPECT_TRUE(cmdQ.isTextureCacheFlushNeeded(operation));
}
commandStreamReceiver.directSubmissionAvailable = false;
EXPECT_FALSE(cmdQ.isTextureCacheFlushNeeded(i));
EXPECT_FALSE(cmdQ.isTextureCacheFlushNeeded(operation));
} else {
commandStreamReceiver.directSubmissionAvailable = true;
EXPECT_FALSE(cmdQ.isTextureCacheFlushNeeded(i));
EXPECT_FALSE(cmdQ.isTextureCacheFlushNeeded(operation));
commandStreamReceiver.directSubmissionAvailable = false;
EXPECT_FALSE(cmdQ.isTextureCacheFlushNeeded(i));
EXPECT_FALSE(cmdQ.isTextureCacheFlushNeeded(operation));
}
}
}

View File

@@ -203,6 +203,8 @@ class GfxCoreHelper {
virtual bool getSipBinaryFromExternalLib() const = 0;
virtual uint32_t getImplicitArgsVersion() const = 0;
virtual bool isCacheFlushPriorImageReadRequired() const = 0;
virtual ~GfxCoreHelper() = default;
protected:
@@ -450,6 +452,8 @@ class GfxCoreHelperHw : public GfxCoreHelper {
bool getSipBinaryFromExternalLib() const override;
bool isCacheFlushPriorImageReadRequired() const override;
~GfxCoreHelperHw() override = default;
protected:

View File

@@ -848,4 +848,9 @@ uint32_t GfxCoreHelperHw<Family>::getImplicitArgsVersion() const {
return 0;
}
template <typename Family>
bool GfxCoreHelperHw<Family>::isCacheFlushPriorImageReadRequired() const {
return false;
}
} // namespace NEO

View File

@@ -91,4 +91,9 @@ bool GfxCoreHelperHw<Family>::usmCompressionSupported(const NEO::HardwareInfo &h
return hwInfo.capabilityTable.ftrRenderCompressedBuffers;
}
template <>
bool GfxCoreHelperHw<Family>::isCacheFlushPriorImageReadRequired() const {
return true;
}
} // namespace NEO

View File

@@ -1932,3 +1932,8 @@ HWTEST_F(GfxCoreHelperTest, givenDebugFlagForceUseOnlyGlobalTimestampsSetWhenCal
auto &gfxCoreHelper = getHelper<GfxCoreHelper>();
EXPECT_TRUE(gfxCoreHelper.useOnlyGlobalTimestamps());
}
HWTEST2_F(GfxCoreHelperTest, whenIsCacheFlushPriorImageReadRequiredCalledThenFalseIsReturned, IsBeforeXe2HpgCore) {
auto &helper = getHelper<GfxCoreHelper>();
EXPECT_FALSE(helper.isCacheFlushPriorImageReadRequired());
}

View File

@@ -106,3 +106,9 @@ HWTEST2_F(GfxCoreHelperXe2AndLaterTests, givenAtLeastXe2HpgWhenEncodeAdditionalT
EXPECT_EQ(storeRegMem->getRegisterAddress(), RegisterOffsets::globalTimestampUn);
EXPECT_EQ(storeRegMem->getMemoryAddress(), sndAddress + sizeof(uint32_t));
}
HWTEST2_F(GfxCoreHelperXe2AndLaterTests, givenAtLeastXe2HpgWhenIsCacheFlushPriorImageReadRequiredThenTrueIsReturned, IsAtLeastXe2HpgCore) {
MockExecutionEnvironment mockExecutionEnvironment{};
auto &gfxCoreHelper = mockExecutionEnvironment.rootDeviceEnvironments[0]->getHelper<GfxCoreHelper>();
EXPECT_TRUE(gfxCoreHelper.isCacheFlushPriorImageReadRequired());
}