fix: use mfence instead of sfence on discrete devices

Related-To: NEO-14642

Signed-off-by: Lukasz Jobczyk <lukasz.jobczyk@intel.com>
This commit is contained in:
Lukasz Jobczyk
2025-04-25 11:45:03 +00:00
committed by Compute-Runtime-Automation
parent 41efee1e7c
commit 97358acabe
5 changed files with 37 additions and 7 deletions

View File

@@ -437,7 +437,11 @@ bool DirectSubmissionHw<GfxFamily, Dispatcher>::allocateOsResources() {
template <typename GfxFamily, typename Dispatcher>
inline void DirectSubmissionHw<GfxFamily, Dispatcher>::unblockGpu() {
if (sfenceMode >= DirectSubmissionSfenceMode::beforeSemaphoreOnly) {
CpuIntrinsics::sfence();
if (!this->miMemFenceRequired && !this->pciBarrierPtr && !this->hwInfo->capabilityTable.isIntegratedDevice) {
CpuIntrinsics::mfence();
} else {
CpuIntrinsics::sfence();
}
}
if (this->pciBarrierPtr) {

View File

@@ -45,6 +45,10 @@ void sfence() {
_mm_sfence();
}
void mfence() {
_mm_mfence();
}
void pause() {
_mm_pause();
}

View File

@@ -14,6 +14,8 @@ namespace CpuIntrinsics {
void sfence();
void mfence();
void clFlush(void const *ptr);
void clFlushOpt(void *ptr);

View File

@@ -20,6 +20,7 @@ std::atomic<uintptr_t> lastClFlushedPtr(0u);
std::atomic<uint32_t> clFlushCounter(0u);
std::atomic<uint32_t> pauseCounter(0u);
std::atomic<uint32_t> sfenceCounter(0u);
std::atomic<uint32_t> mfenceCounter(0u);
std::atomic<uint64_t> lastUmwaitCounter(0u);
std::atomic<unsigned int> lastUmwaitControl(0u);
@@ -58,6 +59,10 @@ void sfence() {
CpuIntrinsicsTests::sfenceCounter++;
}
void mfence() {
CpuIntrinsicsTests::mfenceCounter++;
}
void pause() {
CpuIntrinsicsTests::pauseCounter++;
if (CpuIntrinsicsTests::pauseAddress != nullptr) {

View File

@@ -37,6 +37,7 @@
namespace CpuIntrinsicsTests {
extern std::atomic<uint32_t> sfenceCounter;
extern std::atomic<uint32_t> mfenceCounter;
} // namespace CpuIntrinsicsTests
using DirectSubmissionTest = Test<DirectSubmissionFixture>;
@@ -1079,13 +1080,20 @@ HWTEST_F(DirectSubmissionDispatchBufferTest, givenDebugFlagSetWhenDispatchingWor
MockDirectSubmissionHw<FamilyType, Dispatcher> directSubmission(*pDevice->getDefaultEngine().commandStreamReceiver);
EXPECT_TRUE(directSubmission.initialize(true));
auto initialCounterValue = CpuIntrinsicsTests::sfenceCounter.load();
auto initialSfenceCounterValue = CpuIntrinsicsTests::sfenceCounter.load();
auto initialMfenceCounterValue = CpuIntrinsicsTests::mfenceCounter.load();
EXPECT_TRUE(directSubmission.dispatchCommandBuffer(batchBuffer, flushStamp));
uint32_t expectedCount = (debugFlag == -1) ? 2 : static_cast<uint32_t>(debugFlag);
uint32_t expectedSfenceCount = (debugFlag == -1) ? 2 : static_cast<uint32_t>(debugFlag);
uint32_t expectedMfenceCount = 0u;
if (!pDevice->getHardwareInfo().capabilityTable.isIntegratedDevice && !pDevice->getProductHelper().isGlobalFenceInDirectSubmissionRequired(pDevice->getHardwareInfo()) && expectedSfenceCount > 0u) {
--expectedSfenceCount;
++expectedMfenceCount;
}
EXPECT_EQ(initialCounterValue + expectedCount, CpuIntrinsicsTests::sfenceCounter);
EXPECT_EQ(initialSfenceCounterValue + expectedSfenceCount, CpuIntrinsicsTests::sfenceCounter);
EXPECT_EQ(initialMfenceCounterValue + expectedMfenceCount, CpuIntrinsicsTests::mfenceCounter);
}
}
@@ -1102,13 +1110,20 @@ HWTEST_F(DirectSubmissionDispatchBufferTest, givenDebugFlagSetWhenStoppingRingbu
MockDirectSubmissionHw<FamilyType, Dispatcher> directSubmission(*pDevice->getDefaultEngine().commandStreamReceiver);
EXPECT_TRUE(directSubmission.initialize(true));
auto initialCounterValue = CpuIntrinsicsTests::sfenceCounter.load();
auto initialSfenceCounterValue = CpuIntrinsicsTests::sfenceCounter.load();
auto initialMfenceCounterValue = CpuIntrinsicsTests::mfenceCounter.load();
EXPECT_TRUE(directSubmission.stopRingBuffer(false));
uint32_t expectedCount = (debugFlag == -1) ? 2 : static_cast<uint32_t>(debugFlag);
uint32_t expectedSfenceCount = (debugFlag == -1) ? 2 : static_cast<uint32_t>(debugFlag);
uint32_t expectedMfenceCount = 0u;
if (!pDevice->getHardwareInfo().capabilityTable.isIntegratedDevice && !directSubmission.pciBarrierPtr && !pDevice->getProductHelper().isGlobalFenceInDirectSubmissionRequired(pDevice->getHardwareInfo()) && expectedSfenceCount > 0u) {
--expectedSfenceCount;
++expectedMfenceCount;
}
EXPECT_EQ(initialCounterValue + expectedCount, CpuIntrinsicsTests::sfenceCounter);
EXPECT_EQ(initialSfenceCounterValue + expectedSfenceCount, CpuIntrinsicsTests::sfenceCounter);
EXPECT_EQ(initialMfenceCounterValue + expectedMfenceCount, CpuIntrinsicsTests::mfenceCounter);
}
}