performance: Mitigate dc flush on LNL Windows

Signed-off-by: Lukasz Jobczyk <lukasz.jobczyk@intel.com>
This commit is contained in:
Lukasz Jobczyk 2024-08-28 10:11:54 +00:00 committed by Compute-Runtime-Automation
parent c9dd2b630a
commit 5aa5d40937
19 changed files with 84 additions and 16 deletions

View File

@ -17,4 +17,8 @@ bool CommandQueue::isTimestampWaitEnabled() {
return true;
}
bool checkIsGpuCopyRequiredForDcFlushMitigation(AllocationType type) {
return type != AllocationType::bufferHostMemory;
}
} // namespace NEO

View File

@ -184,6 +184,8 @@ Buffer *Buffer::create(Context *context,
flags, 0, size, hostPtr, bufferCreateArgs, errcodeRet);
}
extern bool checkIsGpuCopyRequiredForDcFlushMitigation(AllocationType type);
bool inline copyHostPointer(Buffer *buffer,
Device &device,
size_t size,
@ -195,7 +197,8 @@ bool inline copyHostPointer(Buffer *buffer,
auto memory = buffer->getGraphicsAllocation(rootDeviceIndex);
auto isCompressionEnabled = memory->isCompressionEnabled();
const bool isLocalMemory = !MemoryPoolHelper::isSystemMemoryPool(memory->getMemoryPool());
const bool gpuCopyRequired = isCompressionEnabled || isLocalMemory || productHelper.isDcFlushMitigated();
const bool isGpuCopyRequiredForDcFlushMitigation = productHelper.isDcFlushMitigated() && checkIsGpuCopyRequiredForDcFlushMitigation(memory->getAllocationType());
const bool gpuCopyRequired = isCompressionEnabled || isLocalMemory || isGpuCopyRequiredForDcFlushMitigation;
if (gpuCopyRequired) {
auto &hwInfo = device.getHardwareInfo();
@ -210,7 +213,7 @@ bool inline copyHostPointer(Buffer *buffer,
isCompressionEnabled == false &&
productHelper.getLocalMemoryAccessMode(hwInfo) != LocalMemoryAccessMode::cpuAccessDisallowed &&
isLockable &&
!productHelper.isDcFlushMitigated();
!isGpuCopyRequiredForDcFlushMitigation;
if (debugManager.flags.CopyHostPtrOnCpu.get() != -1) {
copyOnCpuAllowed = debugManager.flags.CopyHostPtrOnCpu.get() == 1;
@ -223,7 +226,7 @@ bool inline copyHostPointer(Buffer *buffer,
} else {
auto blitMemoryToAllocationResult = BlitOperationResult::unsupported;
if (productHelper.isBlitterFullySupported(hwInfo) && (isLocalMemory || productHelper.isDcFlushMitigated())) {
if (productHelper.isBlitterFullySupported(hwInfo) && (isLocalMemory || isGpuCopyRequiredForDcFlushMitigation)) {
blitMemoryToAllocationResult = BlitHelperFunctions::blitMemoryToAllocation(device, memory, buffer->getOffset(), hostPtr, {size, 1, 1});
}

View File

@ -403,7 +403,7 @@ void CopyEngineXeHPAndLater<numTiles, testLocalMemory>::givenSrcCompressedBuffer
template <uint32_t numTiles, bool testLocalMemory>
template <typename FamilyType>
void CopyEngineXeHPAndLater<numTiles, testLocalMemory>::givenCompressedBufferWhenAuxTranslationCalledThenResolveAndCompressImpl() {
if (this->context->getDevice(0u)->areSharedSystemAllocationsAllowed() || !compressionSupported()) {
if (this->context->getDevice(0u)->areSharedSystemAllocationsAllowed() || !compressionSupported() || this->context->getDevice(0u)->getProductHelper().isDcFlushMitigated()) {
// no support for scenarios where stateless is mixed with blitter compression
GTEST_SKIP();
}

View File

@ -469,7 +469,7 @@ TEST_P(PerformanceHintEnqueueMapTest, GivenZeroCopyFlagWhenEnqueueMapBufferIsCal
EXPECT_EQ(zeroCopyBuffer, containsHint(expectedHint, userData));
snprintf(expectedHint, DriverDiagnostics::maxHintStringSize, DriverDiagnostics::hintFormat[CL_ENQUEUE_MAP_BUFFER_REQUIRES_COPY_DATA], static_cast<cl_mem>(buffer));
EXPECT_EQ(!zeroCopyBuffer, containsHint(expectedHint, userData));
EXPECT_EQ(!zeroCopyBuffer && !pCmdQ->getDevice().getProductHelper().isDcFlushMitigated(), containsHint(expectedHint, userData));
alignedFree(address);
delete buffer;
@ -499,7 +499,7 @@ TEST_P(PerformanceHintEnqueueMapTest, GivenZeroCopyFlagAndBlockingEventWhenEnque
EXPECT_EQ(zeroCopyBuffer, containsHint(expectedHint, userData));
snprintf(expectedHint, DriverDiagnostics::maxHintStringSize, DriverDiagnostics::hintFormat[CL_ENQUEUE_MAP_BUFFER_REQUIRES_COPY_DATA], static_cast<cl_mem>(buffer.get()));
EXPECT_EQ(!zeroCopyBuffer, containsHint(expectedHint, userData));
EXPECT_EQ(!zeroCopyBuffer && !pCmdQ->getDevice().getProductHelper().isDcFlushMitigated(), containsHint(expectedHint, userData));
alignedFree(address);
}
@ -597,7 +597,7 @@ TEST_P(PerformanceHintEnqueueMapTest, GivenZeroCopyFlagWhenEnqueueUnmapIsCalling
pCmdQ->enqueueUnmapMemObject(buffer, mapPtr, 0, nullptr, nullptr);
snprintf(expectedHint, DriverDiagnostics::maxHintStringSize, DriverDiagnostics::hintFormat[CL_ENQUEUE_UNMAP_MEM_OBJ_REQUIRES_COPY_DATA], mapPtr, static_cast<cl_mem>(buffer));
EXPECT_EQ(!zeroCopyBuffer, containsHint(expectedHint, userData));
EXPECT_EQ(!zeroCopyBuffer && !pCmdQ->getDevice().getProductHelper().isDcFlushMitigated(), containsHint(expectedHint, userData));
snprintf(expectedHint, DriverDiagnostics::maxHintStringSize, DriverDiagnostics::hintFormat[CL_ENQUEUE_UNMAP_MEM_OBJ_DOESNT_REQUIRE_COPY_DATA], mapPtr);
EXPECT_EQ(zeroCopyBuffer, containsHint(expectedHint, userData));
@ -629,7 +629,7 @@ TEST_P(PerformanceHintEnqueueMapTest, GivenZeroCopyAndBlockedEventFlagWhenEnqueu
EXPECT_FALSE(pCmdQ->isQueueBlocked());
snprintf(expectedHint, DriverDiagnostics::maxHintStringSize, DriverDiagnostics::hintFormat[CL_ENQUEUE_UNMAP_MEM_OBJ_REQUIRES_COPY_DATA], mapPtr, static_cast<cl_mem>(buffer.get()));
EXPECT_EQ(!zeroCopyBuffer, containsHint(expectedHint, userData));
EXPECT_EQ(!zeroCopyBuffer && !pCmdQ->getDevice().getProductHelper().isDcFlushMitigated(), containsHint(expectedHint, userData));
snprintf(expectedHint, DriverDiagnostics::maxHintStringSize, DriverDiagnostics::hintFormat[CL_ENQUEUE_UNMAP_MEM_OBJ_DOESNT_REQUIRE_COPY_DATA], mapPtr);
EXPECT_EQ(zeroCopyBuffer, containsHint(expectedHint, userData));

View File

@ -19,4 +19,8 @@ bool CommandQueue::isTimestampWaitEnabled() {
return ultHwConfig.useWaitForTimestamps;
}
bool checkIsGpuCopyRequiredForDcFlushMitigation(AllocationType type) {
return false;
}
} // namespace NEO

View File

@ -1893,6 +1893,10 @@ HWTEST_F(BufferCreateTests, givenClMemCopyHostPointerPassedToBufferCreateWhenAll
auto memoryManager = new MockMemoryManager(true, *executionEnvironment);
executionEnvironment->memoryManager.reset(memoryManager);
if (executionEnvironment->rootDeviceEnvironments[0]->getProductHelper().isDcFlushMitigated()) {
debugManager.flags.AllowDcFlush.set(1);
}
MockClDevice device(new MockDevice(executionEnvironment, mockRootDeviceIndex));
ASSERT_TRUE(device.createEngines());
DeviceFactory::prepareDeviceEnvironments(*device.getExecutionEnvironment());

View File

@ -115,6 +115,10 @@ HWTEST2_F(PvcAndLaterBufferTests, givenCompressedBufferInSystemAndBlitterSupport
auto pDevice = deviceFactory.rootDevices[0];
auto pMockContext = std::make_unique<MockContext>(pDevice);
if (pDevice->getProductHelper().isDcFlushMitigated()) {
debugManager.flags.AllowDcFlush.set(1);
}
static_cast<MockMemoryManager *>(pDevice->getExecutionEnvironment()->memoryManager.get())->enable64kbpages[0] = true;
static_cast<MockMemoryManager *>(pDevice->getExecutionEnvironment()->memoryManager.get())->localMemorySupported[0] = false;

View File

@ -152,6 +152,10 @@ HWTEST_F(BufferCreateWindowsTests, givenClMemCopyHostPointerPassedToBufferCreate
auto memoryManager = new MockMemoryManager(true, *executionEnvironment);
executionEnvironment->memoryManager.reset(memoryManager);
if (executionEnvironment->rootDeviceEnvironments[0]->getProductHelper().isDcFlushMitigated()) {
debugManager.flags.AllowDcFlush.set(1);
}
MockClDevice device(new MockDevice(executionEnvironment, mockRootDeviceIndex));
ASSERT_TRUE(device.createEngines());
DeviceFactory::prepareDeviceEnvironments(*device.getExecutionEnvironment());

View File

@ -134,6 +134,7 @@ class ProductHelper {
virtual bool isTile64With3DSurfaceOnBCSSupported(const HardwareInfo &hwInfo) const = 0;
virtual bool isDcFlushAllowed() const = 0;
virtual bool isDcFlushMitigated() const = 0;
virtual bool mitigateDcFlush() const = 0;
virtual bool overridePatAndUsageForDcFlushMitigation(AllocationType allocationType) const = 0;
virtual bool overrideCacheableForDcFlushMitigation(AllocationType allocationType) const = 0;
virtual uint32_t computeMaxNeededSubSliceSpace(const HardwareInfo &hwInfo) const = 0;

View File

@ -394,7 +394,7 @@ bool ProductHelperHw<gfxProduct>::isDisableScratchPagesSupported() const {
template <PRODUCT_FAMILY gfxProduct>
bool ProductHelperHw<gfxProduct>::isDcFlushAllowed() const {
using GfxProduct = typename HwMapper<gfxProduct>::GfxProduct;
bool dcFlushAllowed = GfxProduct::isDcFlushAllowed;
bool dcFlushAllowed = GfxProduct::isDcFlushAllowed && !this->mitigateDcFlush();
if (debugManager.flags.AllowDcFlush.get() != -1) {
dcFlushAllowed = debugManager.flags.AllowDcFlush.get();
@ -403,6 +403,11 @@ bool ProductHelperHw<gfxProduct>::isDcFlushAllowed() const {
return dcFlushAllowed;
}
template <PRODUCT_FAMILY gfxProduct>
bool ProductHelperHw<gfxProduct>::mitigateDcFlush() const {
return false;
}
template <PRODUCT_FAMILY gfxProduct>
bool ProductHelperHw<gfxProduct>::isDcFlushMitigated() const {
using GfxProduct = typename HwMapper<gfxProduct>::GfxProduct;

View File

@ -76,6 +76,7 @@ class ProductHelperHw : public ProductHelper {
bool isTile64With3DSurfaceOnBCSSupported(const HardwareInfo &hwInfo) const override;
bool isDcFlushAllowed() const override;
bool isDcFlushMitigated() const override;
bool mitigateDcFlush() const override;
bool overridePatAndUsageForDcFlushMitigation(AllocationType allocationType) const override;
bool overrideCacheableForDcFlushMitigation(AllocationType allocationType) const override;
uint32_t computeMaxNeededSubSliceSpace(const HardwareInfo &hwInfo) const override;

View File

@ -40,6 +40,11 @@ bool ProductHelperHw<gfxProduct>::isDirectSubmissionSupported(ReleaseHelper *rel
return true;
}
template <>
bool ProductHelperHw<gfxProduct>::mitigateDcFlush() const {
return true;
}
template <>
bool ProductHelperHw<gfxProduct>::restartDirectSubmissionForHostptrFree() const {
return true;

View File

@ -281,6 +281,11 @@ bool ProductHelperHw<IGFX_UNKNOWN>::isDcFlushMitigated() const {
return false;
}
template <>
bool ProductHelperHw<IGFX_UNKNOWN>::mitigateDcFlush() const {
return false;
}
template <>
bool ProductHelperHw<IGFX_UNKNOWN>::overridePatAndUsageForDcFlushMitigation(AllocationType allocationType) const {
return false;

View File

@ -685,6 +685,11 @@ TEST(GmmTest, givenHwInfoWhenDeviceIsCreatedThenSetThisHwInfoToGmmHelper) {
TEST(GmmTest, givenAllocationTypeWhenGettingUsageTypeThenReturnCorrectValue) {
MockExecutionEnvironment mockExecutionEnvironment{};
const auto &productHelper = mockExecutionEnvironment.rootDeviceEnvironments[0]->getHelper<ProductHelper>();
if (productHelper.isDcFlushMitigated()) {
GTEST_SKIP();
}
for (uint32_t i = 0; i < static_cast<uint32_t>(AllocationType::count); i++) {
auto allocationType = static_cast<AllocationType>(i);
auto uncachedGmmUsageType = productHelper.isNewCoherencyModelSupported() ? GMM_RESOURCE_USAGE_OCL_BUFFER_CSR_UC : GMM_RESOURCE_USAGE_OCL_BUFFER_CACHELINE_MISALIGNED;
@ -883,6 +888,10 @@ TEST(GmmTest, givenUncachedDebugFlagMaskSetWhenAskingForUsageTypeThenReturnUncac
MockExecutionEnvironment mockExecutionEnvironment{};
const auto &productHelper = mockExecutionEnvironment.rootDeviceEnvironments[0]->getHelper<ProductHelper>();
if (productHelper.isDcFlushMitigated()) {
GTEST_SKIP();
}
constexpr int64_t bufferMask = 1 << (static_cast<int64_t>(AllocationType::buffer) - 1);
constexpr int64_t imageMask = 1 << (static_cast<int64_t>(AllocationType::image) - 1);

View File

@ -312,9 +312,11 @@ HWTEST_F(ProductHelperTest, givenVariousValuesWhenGettingAubStreamSteppingFromHw
HWTEST_F(ProductHelperTest, givenDcFlushMitigationWhenOverridePatAndUsageForDcFlushMitigationThenReturnCorrectValue) {
DebugManagerStateRestore restorer;
for (auto i = 0; i < static_cast<int>(AllocationType::count); ++i) {
auto allocationType = static_cast<AllocationType>(i);
EXPECT_FALSE(productHelper->overridePatAndUsageForDcFlushMitigation(allocationType));
if (!productHelper->isDcFlushMitigated()) {
for (auto i = 0; i < static_cast<int>(AllocationType::count); ++i) {
auto allocationType = static_cast<AllocationType>(i);
EXPECT_FALSE(productHelper->overridePatAndUsageForDcFlushMitigation(allocationType));
}
}
debugManager.flags.AllowDcFlush.set(0);
for (auto i = 0; i < static_cast<int>(AllocationType::count); ++i) {

View File

@ -2899,7 +2899,11 @@ HWTEST_F(WddmMemoryManagerTest, givenInternalHeapOrLinearStreamTypeWhenAllocatin
ASSERT_NE(nullptr, allocation);
EXPECT_TRUE(allocation->getDefaultGmm()->resourceParams.Usage == GMM_RESOURCE_USAGE_TYPE::GMM_RESOURCE_USAGE_OCL_STATE_HEAP_BUFFER);
if (rootDeviceEnvironment->getProductHelper().isDcFlushMitigated()) {
EXPECT_TRUE(allocation->getDefaultGmm()->resourceParams.Usage == GMM_RESOURCE_USAGE_TYPE::GMM_RESOURCE_USAGE_OCL_SYSTEM_MEMORY_BUFFER_CACHELINE_MISALIGNED);
} else {
EXPECT_TRUE(allocation->getDefaultGmm()->resourceParams.Usage == GMM_RESOURCE_USAGE_TYPE::GMM_RESOURCE_USAGE_OCL_STATE_HEAP_BUFFER);
}
memoryManager->freeGraphicsMemory(allocation);
}
@ -2911,7 +2915,11 @@ HWTEST_F(WddmMemoryManagerTest, givenInternalHeapOrLinearStreamTypeWhenAllocatin
ASSERT_NE(nullptr, allocation);
EXPECT_TRUE(allocation->getDefaultGmm()->resourceParams.Usage == GMM_RESOURCE_USAGE_TYPE::GMM_RESOURCE_USAGE_OCL_STATE_HEAP_BUFFER);
if (rootDeviceEnvironment->getProductHelper().isDcFlushMitigated()) {
EXPECT_TRUE(allocation->getDefaultGmm()->resourceParams.Usage == GMM_RESOURCE_USAGE_TYPE::GMM_RESOURCE_USAGE_OCL_SYSTEM_MEMORY_BUFFER_CACHELINE_MISALIGNED);
} else {
EXPECT_TRUE(allocation->getDefaultGmm()->resourceParams.Usage == GMM_RESOURCE_USAGE_TYPE::GMM_RESOURCE_USAGE_OCL_STATE_HEAP_BUFFER);
}
memoryManager->freeGraphicsMemory(allocation);
}

View File

@ -29,5 +29,5 @@ LNLTEST_F(GfxCoreHelperTestsLnl, givenCommandBufferAllocationTypeWhenGetAllocati
}
LNLTEST_F(GfxCoreHelperTestsLnl, WhenAskingForDcFlushThenReturnTrue) {
EXPECT_TRUE(MemorySynchronizationCommands<FamilyType>::getDcFlushEnable(true, this->pDevice->getRootDeviceEnvironment()));
EXPECT_NE(MemorySynchronizationCommands<FamilyType>::getDcFlushEnable(true, this->pDevice->getRootDeviceEnvironment()), this->pDevice->getRootDeviceEnvironment().getProductHelper().isDcFlushMitigated());
}

View File

@ -89,6 +89,7 @@ LNLTEST_F(LnlProductHelper, givenCompilerProductHelperWhenGetDefaultHwIpVersionT
LNLTEST_F(LnlProductHelper, whenCheckPreferredAllocationMethodThenAllocateByKmdIsReturnedExceptTagBufferAndTimestampPacketTagBuffer) {
DebugManagerStateRestore restorer;
debugManager.flags.AllowDcFlush.set(1);
for (auto i = 0; i < static_cast<int>(AllocationType::count); ++i) {
auto allocationType = static_cast<AllocationType>(i);
auto preferredAllocationMethod = productHelper->getPreferredAllocationMethod(allocationType);
@ -126,11 +127,13 @@ LNLTEST_F(LnlProductHelper, givenProductHelperWhenCheckOverrideAllocationCacheab
}
LNLTEST_F(LnlProductHelper, givenExternalHostPtrWhenMitigateDcFlushThenOverrideCacheable) {
DebugManagerStateRestore restorer;
debugManager.flags.AllowDcFlush.set(1);
AllocationData allocationData{};
allocationData.type = AllocationType::externalHostPtr;
EXPECT_FALSE(productHelper->overrideAllocationCacheable(allocationData));
DebugManagerStateRestore restorer;
debugManager.flags.AllowDcFlush.set(0);
for (auto i = 0; i < static_cast<int>(AllocationType::count); ++i) {

View File

@ -23,8 +23,14 @@ LNLTEST_F(LnlProductHelperWindows, givenProductHelperWhenCheckDirectSubmissionSu
EXPECT_TRUE(productHelper->isDirectSubmissionSupported(releaseHelper));
}
LNLTEST_F(LnlProductHelperWindows, givenProductHelperWhenDcFlushMitigationThenReturnTrue) {
EXPECT_TRUE(productHelper->mitigateDcFlush());
EXPECT_TRUE(productHelper->isDcFlushMitigated());
}
LNLTEST_F(LnlProductHelperWindows, givenProductHelperWhenOverridePatIndexCalledThenCorrectValueIsReturned) {
DebugManagerStateRestore restorer;
debugManager.flags.AllowDcFlush.set(1);
uint64_t expectedPatIndex = 6u;
EXPECT_EQ(expectedPatIndex, productHelper->overridePatIndex(0u, expectedPatIndex, AllocationType::bufferHostMemory));