mirror of
https://github.com/intel/compute-runtime.git
synced 2025-12-25 05:24:02 +08:00
fix: use mfence instead of sfence on discrete devices
Related-To: NEO-14642 Signed-off-by: Lukasz Jobczyk <lukasz.jobczyk@intel.com>
This commit is contained in:
committed by
Compute-Runtime-Automation
parent
41efee1e7c
commit
97358acabe
@@ -437,7 +437,11 @@ bool DirectSubmissionHw<GfxFamily, Dispatcher>::allocateOsResources() {
|
||||
template <typename GfxFamily, typename Dispatcher>
|
||||
inline void DirectSubmissionHw<GfxFamily, Dispatcher>::unblockGpu() {
|
||||
if (sfenceMode >= DirectSubmissionSfenceMode::beforeSemaphoreOnly) {
|
||||
CpuIntrinsics::sfence();
|
||||
if (!this->miMemFenceRequired && !this->pciBarrierPtr && !this->hwInfo->capabilityTable.isIntegratedDevice) {
|
||||
CpuIntrinsics::mfence();
|
||||
} else {
|
||||
CpuIntrinsics::sfence();
|
||||
}
|
||||
}
|
||||
|
||||
if (this->pciBarrierPtr) {
|
||||
|
||||
@@ -45,6 +45,10 @@ void sfence() {
|
||||
_mm_sfence();
|
||||
}
|
||||
|
||||
void mfence() {
|
||||
_mm_mfence();
|
||||
}
|
||||
|
||||
void pause() {
|
||||
_mm_pause();
|
||||
}
|
||||
|
||||
@@ -14,6 +14,8 @@ namespace CpuIntrinsics {
|
||||
|
||||
void sfence();
|
||||
|
||||
void mfence();
|
||||
|
||||
void clFlush(void const *ptr);
|
||||
|
||||
void clFlushOpt(void *ptr);
|
||||
|
||||
@@ -20,6 +20,7 @@ std::atomic<uintptr_t> lastClFlushedPtr(0u);
|
||||
std::atomic<uint32_t> clFlushCounter(0u);
|
||||
std::atomic<uint32_t> pauseCounter(0u);
|
||||
std::atomic<uint32_t> sfenceCounter(0u);
|
||||
std::atomic<uint32_t> mfenceCounter(0u);
|
||||
|
||||
std::atomic<uint64_t> lastUmwaitCounter(0u);
|
||||
std::atomic<unsigned int> lastUmwaitControl(0u);
|
||||
@@ -58,6 +59,10 @@ void sfence() {
|
||||
CpuIntrinsicsTests::sfenceCounter++;
|
||||
}
|
||||
|
||||
void mfence() {
|
||||
CpuIntrinsicsTests::mfenceCounter++;
|
||||
}
|
||||
|
||||
void pause() {
|
||||
CpuIntrinsicsTests::pauseCounter++;
|
||||
if (CpuIntrinsicsTests::pauseAddress != nullptr) {
|
||||
|
||||
@@ -37,6 +37,7 @@
|
||||
|
||||
namespace CpuIntrinsicsTests {
|
||||
extern std::atomic<uint32_t> sfenceCounter;
|
||||
extern std::atomic<uint32_t> mfenceCounter;
|
||||
} // namespace CpuIntrinsicsTests
|
||||
|
||||
using DirectSubmissionTest = Test<DirectSubmissionFixture>;
|
||||
@@ -1079,13 +1080,20 @@ HWTEST_F(DirectSubmissionDispatchBufferTest, givenDebugFlagSetWhenDispatchingWor
|
||||
MockDirectSubmissionHw<FamilyType, Dispatcher> directSubmission(*pDevice->getDefaultEngine().commandStreamReceiver);
|
||||
EXPECT_TRUE(directSubmission.initialize(true));
|
||||
|
||||
auto initialCounterValue = CpuIntrinsicsTests::sfenceCounter.load();
|
||||
auto initialSfenceCounterValue = CpuIntrinsicsTests::sfenceCounter.load();
|
||||
auto initialMfenceCounterValue = CpuIntrinsicsTests::mfenceCounter.load();
|
||||
|
||||
EXPECT_TRUE(directSubmission.dispatchCommandBuffer(batchBuffer, flushStamp));
|
||||
|
||||
uint32_t expectedCount = (debugFlag == -1) ? 2 : static_cast<uint32_t>(debugFlag);
|
||||
uint32_t expectedSfenceCount = (debugFlag == -1) ? 2 : static_cast<uint32_t>(debugFlag);
|
||||
uint32_t expectedMfenceCount = 0u;
|
||||
if (!pDevice->getHardwareInfo().capabilityTable.isIntegratedDevice && !pDevice->getProductHelper().isGlobalFenceInDirectSubmissionRequired(pDevice->getHardwareInfo()) && expectedSfenceCount > 0u) {
|
||||
--expectedSfenceCount;
|
||||
++expectedMfenceCount;
|
||||
}
|
||||
|
||||
EXPECT_EQ(initialCounterValue + expectedCount, CpuIntrinsicsTests::sfenceCounter);
|
||||
EXPECT_EQ(initialSfenceCounterValue + expectedSfenceCount, CpuIntrinsicsTests::sfenceCounter);
|
||||
EXPECT_EQ(initialMfenceCounterValue + expectedMfenceCount, CpuIntrinsicsTests::mfenceCounter);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1102,13 +1110,20 @@ HWTEST_F(DirectSubmissionDispatchBufferTest, givenDebugFlagSetWhenStoppingRingbu
|
||||
MockDirectSubmissionHw<FamilyType, Dispatcher> directSubmission(*pDevice->getDefaultEngine().commandStreamReceiver);
|
||||
EXPECT_TRUE(directSubmission.initialize(true));
|
||||
|
||||
auto initialCounterValue = CpuIntrinsicsTests::sfenceCounter.load();
|
||||
auto initialSfenceCounterValue = CpuIntrinsicsTests::sfenceCounter.load();
|
||||
auto initialMfenceCounterValue = CpuIntrinsicsTests::mfenceCounter.load();
|
||||
|
||||
EXPECT_TRUE(directSubmission.stopRingBuffer(false));
|
||||
|
||||
uint32_t expectedCount = (debugFlag == -1) ? 2 : static_cast<uint32_t>(debugFlag);
|
||||
uint32_t expectedSfenceCount = (debugFlag == -1) ? 2 : static_cast<uint32_t>(debugFlag);
|
||||
uint32_t expectedMfenceCount = 0u;
|
||||
if (!pDevice->getHardwareInfo().capabilityTable.isIntegratedDevice && !directSubmission.pciBarrierPtr && !pDevice->getProductHelper().isGlobalFenceInDirectSubmissionRequired(pDevice->getHardwareInfo()) && expectedSfenceCount > 0u) {
|
||||
--expectedSfenceCount;
|
||||
++expectedMfenceCount;
|
||||
}
|
||||
|
||||
EXPECT_EQ(initialCounterValue + expectedCount, CpuIntrinsicsTests::sfenceCounter);
|
||||
EXPECT_EQ(initialSfenceCounterValue + expectedSfenceCount, CpuIntrinsicsTests::sfenceCounter);
|
||||
EXPECT_EQ(initialMfenceCounterValue + expectedMfenceCount, CpuIntrinsicsTests::mfenceCounter);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user