From bb1029082814e79a598a99d1763a1a71622156c8 Mon Sep 17 00:00:00 2001 From: Szymon Morek Date: Thu, 20 Mar 2025 10:00:35 +0000 Subject: [PATCH] fix: make misaligned user memory 2-Way coherent Related-To: NEO-9004 Signed-off-by: Szymon Morek --- .../gmm_helper/cache_settings_helper.cpp | 10 ++++++ .../source/gmm_helper/cache_settings_helper.h | 3 +- .../os_interface/linux/drm_memory_manager.cpp | 7 +++- shared/source/os_interface/product_helper.h | 1 + shared/source/os_interface/product_helper.inl | 5 +++ .../source/os_interface/product_helper_hw.h | 1 + .../windows/wddm_memory_manager.cpp | 2 +- .../sku_info/operations/sku_info_transfer.h | 5 +-- .../operations/windows/sku_info_receiver.h | 4 ++- shared/source/sku_info/sku_info_base.h | 6 ++-- shared/source/xe2_hpg_core/hw_info_lnl.cpp | 2 ++ .../lnl/os_agnostic_product_helper_lnl.inl | 5 +++ .../common/mocks/mock_gmm_client_context.cpp | 6 +++- .../mocks/mock_gmm_client_context_base.h | 3 +- .../linux/drm_memory_manager_tests.cpp | 34 ++++++++++++++++--- .../windows/wddm_memory_manager_tests.cpp | 10 +++++- .../sku_info/sku_info_base_reference.h | 6 +++- 17 files changed, 93 insertions(+), 17 deletions(-) diff --git a/shared/source/gmm_helper/cache_settings_helper.cpp b/shared/source/gmm_helper/cache_settings_helper.cpp index 88c92373a4..7bc7b8b3e5 100644 --- a/shared/source/gmm_helper/cache_settings_helper.cpp +++ b/shared/source/gmm_helper/cache_settings_helper.cpp @@ -9,6 +9,7 @@ #include "shared/source/debug_settings/debug_settings_manager.h" #include "shared/source/execution_environment/root_device_environment.h" +#include "shared/source/helpers/cache_policy.h" #include "shared/source/helpers/gfx_core_helper.h" #include "shared/source/helpers/hw_info.h" #include "shared/source/memory_manager/allocation_type.h" @@ -124,4 +125,13 @@ GMM_RESOURCE_USAGE_TYPE_ENUM CacheSettingsHelper::getDefaultUsageTypeWithCaching } } +// Set 2-way coherency for allocations which are not aligned to cacheline +GMM_RESOURCE_USAGE_TYPE_ENUM CacheSettingsHelper::getGmmUsageTypeForUserPtr(const void *userPtr, size_t size, const ProductHelper &productHelper) { + if (!isL3Capable(userPtr, size) && productHelper.isMisalignedUserPtr2WayCoherent()) { + return GMM_RESOURCE_USAGE_HW_CONTEXT; + } else { + return GMM_RESOURCE_USAGE_OCL_SYSTEM_MEMORY_BUFFER; + } +} + } // namespace NEO diff --git a/shared/source/gmm_helper/cache_settings_helper.h b/shared/source/gmm_helper/cache_settings_helper.h index c7684e6b4b..d9c34a375a 100644 --- a/shared/source/gmm_helper/cache_settings_helper.h +++ b/shared/source/gmm_helper/cache_settings_helper.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2022-2024 Intel Corporation + * Copyright (C) 2022-2025 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -20,6 +20,7 @@ struct RootDeviceEnvironment; struct CacheSettingsHelper { static GMM_RESOURCE_USAGE_TYPE_ENUM getGmmUsageType(AllocationType allocationType, bool forceUncached, const ProductHelper &productHelper); + static GMM_RESOURCE_USAGE_TYPE_ENUM getGmmUsageTypeForUserPtr(const void *userPtr, size_t size, const ProductHelper &productHelper); static constexpr bool isUncachedType(GMM_RESOURCE_USAGE_TYPE_ENUM gmmResourceUsageType) { return ((gmmResourceUsageType == GMM_RESOURCE_USAGE_OCL_BUFFER_CSR_UC) || diff --git a/shared/source/os_interface/linux/drm_memory_manager.cpp b/shared/source/os_interface/linux/drm_memory_manager.cpp index 4a7a8417f2..6b28f3620f 100644 --- a/shared/source/os_interface/linux/drm_memory_manager.cpp +++ b/shared/source/os_interface/linux/drm_memory_manager.cpp @@ -606,7 +606,8 @@ GraphicsAllocation *DrmMemoryManager::allocateGraphicsMemoryWithGpuVa(const Allo GraphicsAllocation *DrmMemoryManager::allocateGraphicsMemoryForNonSvmHostPtr(const AllocationData &allocationData) { if (allocationData.size == 0 || !allocationData.hostPtr) return nullptr; - + auto rootDeviceEnvironment = executionEnvironment.rootDeviceEnvironments[allocationData.rootDeviceIndex].get(); + auto &productHelper = rootDeviceEnvironment->getHelper(); auto alignedPtr = alignDown(allocationData.hostPtr, MemoryConstants::pageSize); auto alignedSize = alignSizeWholePage(allocationData.hostPtr, allocationData.size); auto realAllocationSize = alignedSize; @@ -632,6 +633,10 @@ GraphicsAllocation *DrmMemoryManager::allocateGraphicsMemoryForNonSvmHostPtr(con bo->setAddress(gpuVirtualAddress); + auto usageType = CacheSettingsHelper::getGmmUsageTypeForUserPtr(allocationData.hostPtr, allocationData.size, productHelper); + auto patIndex = rootDeviceEnvironment->getGmmClientContext()->cachePolicyGetPATIndex(nullptr, usageType, false, true); + bo->setPatIndex(patIndex); + if (validateHostPtrMemory) { auto boPtr = bo.get(); auto vmHandleId = Math::getMinLsbSet(static_cast(allocationData.storageInfo.subDeviceBitfield.to_ulong())); diff --git a/shared/source/os_interface/product_helper.h b/shared/source/os_interface/product_helper.h index 9111cd53e3..250c8e0b82 100644 --- a/shared/source/os_interface/product_helper.h +++ b/shared/source/os_interface/product_helper.h @@ -267,6 +267,7 @@ class ProductHelper { virtual bool isSharingWith3dOrMediaAllowed() const = 0; virtual bool isL3FlushAfterPostSyncRequired(bool heaplessEnabled) const = 0; virtual void overrideDirectSubmissionTimeouts(std::chrono::microseconds &timeout, std::chrono::microseconds &maxTimeout) const = 0; + virtual bool isMisalignedUserPtr2WayCoherent() const = 0; virtual ~ProductHelper() = default; diff --git a/shared/source/os_interface/product_helper.inl b/shared/source/os_interface/product_helper.inl index 480b8da3bf..0e6faa3c97 100644 --- a/shared/source/os_interface/product_helper.inl +++ b/shared/source/os_interface/product_helper.inl @@ -1029,6 +1029,11 @@ template void ProductHelperHw::overrideDirectSubmissionTimeouts(std::chrono::microseconds &timeout, std::chrono::microseconds &maxTimeout) const { } +template +bool ProductHelperHw::isMisalignedUserPtr2WayCoherent() const { + return false; +} + template bool ProductHelperHw::isTimestampWaitSupportedForQueues(bool heaplessEnabled) const { return false; diff --git a/shared/source/os_interface/product_helper_hw.h b/shared/source/os_interface/product_helper_hw.h index 64b8d09fc8..bebccef8da 100644 --- a/shared/source/os_interface/product_helper_hw.h +++ b/shared/source/os_interface/product_helper_hw.h @@ -204,6 +204,7 @@ class ProductHelperHw : public ProductHelper { bool isSharingWith3dOrMediaAllowed() const override; bool isL3FlushAfterPostSyncRequired(bool heaplessEnabled) const override; void overrideDirectSubmissionTimeouts(std::chrono::microseconds &timeout, std::chrono::microseconds &maxTimeout) const override; + bool isMisalignedUserPtr2WayCoherent() const override; ~ProductHelperHw() override = default; diff --git a/shared/source/os_interface/windows/wddm_memory_manager.cpp b/shared/source/os_interface/windows/wddm_memory_manager.cpp index 6798ee82b2..031603b947 100644 --- a/shared/source/os_interface/windows/wddm_memory_manager.cpp +++ b/shared/source/os_interface/windows/wddm_memory_manager.cpp @@ -474,7 +474,7 @@ GraphicsAllocation *WddmMemoryManager::allocateGraphicsMemoryForNonSvmHostPtr(co } auto gmm = new Gmm(executionEnvironment.rootDeviceEnvironments[allocationData.rootDeviceIndex]->getGmmHelper(), alignedPtr, alignedSize, 0u, - CacheSettingsHelper::getGmmUsageType(wddmAllocation->getAllocationType(), !!allocationData.flags.uncacheable, productHelper), {}, gmmRequirements); + CacheSettingsHelper::getGmmUsageTypeForUserPtr(allocationData.hostPtr, allocationData.size, productHelper), {}, gmmRequirements); wddmAllocation->setDefaultGmm(gmm); diff --git a/shared/source/sku_info/operations/sku_info_transfer.h b/shared/source/sku_info/operations/sku_info_transfer.h index 99c237fc5c..93a02b0220 100644 --- a/shared/source/sku_info/operations/sku_info_transfer.h +++ b/shared/source/sku_info/operations/sku_info_transfer.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2018-2024 Intel Corporation + * Copyright (C) 2018-2025 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -68,7 +68,8 @@ class SkuInfoTransfer { TRANSFER_WA_TO_GMM(UntypedBufferCompression); TRANSFER_WA_TO_GMM(AuxTable64KGranular); TRANSFER_WA_TO_GMM(_15010089951); - + TRANSFER_WA_TO_GMM(_14018976079); + TRANSFER_WA_TO_GMM(_14018984349); #undef TRANSFER_WA_TO_GMM } }; diff --git a/shared/source/sku_info/operations/windows/sku_info_receiver.h b/shared/source/sku_info/operations/windows/sku_info_receiver.h index 50ca82dde4..1aab280bd3 100644 --- a/shared/source/sku_info/operations/windows/sku_info_receiver.h +++ b/shared/source/sku_info/operations/windows/sku_info_receiver.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2018-2024 Intel Corporation + * Copyright (C) 2018-2025 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -103,6 +103,8 @@ class SkuInfoReceiver { RECEIVE_WA(DisableFusedThreadScheduling); RECEIVE_WA(AuxTable64KGranular); RECEIVE_WA(_15010089951); + RECEIVE_WA(_14018984349); + RECEIVE_WA(_14018976079); #undef RECEIVE_WA } diff --git a/shared/source/sku_info/sku_info_base.h b/shared/source/sku_info/sku_info_base.h index d2e903c290..aca770c9a3 100644 --- a/shared/source/sku_info/sku_info_base.h +++ b/shared/source/sku_info/sku_info_base.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2018-2024 Intel Corporation + * Copyright (C) 2018-2025 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -110,7 +110,9 @@ struct WorkaroundTableBase { uint32_t waDisableFusedThreadScheduling : 1; uint32_t waAuxTable64KGranular : 1; uint32_t wa_15010089951 : 1; // NOLINT(readability-identifier-naming) - uint32_t reserved : 10; + uint32_t wa_14018976079 : 1; // NOLINT(readability-identifier-naming) + uint32_t wa_14018984349 : 1; // NOLINT(readability-identifier-naming) + uint32_t reserved : 8; }; union { diff --git a/shared/source/xe2_hpg_core/hw_info_lnl.cpp b/shared/source/xe2_hpg_core/hw_info_lnl.cpp index a8d55a825d..504117dfb4 100644 --- a/shared/source/xe2_hpg_core/hw_info_lnl.cpp +++ b/shared/source/xe2_hpg_core/hw_info_lnl.cpp @@ -90,6 +90,8 @@ void LNL::setupFeatureAndWorkaroundTable(HardwareInfo *hwInfo, const ReleaseHelp featureTable->flags.ftrPml5Support = true; featureTable->ftrBcsInfo = 1; + hwInfo->workaroundTable.flags.wa_14018976079 = true; + hwInfo->workaroundTable.flags.wa_14018984349 = true; } FeatureTable LNL::featureTable{}; diff --git a/shared/source/xe2_hpg_core/lnl/os_agnostic_product_helper_lnl.inl b/shared/source/xe2_hpg_core/lnl/os_agnostic_product_helper_lnl.inl index 90f1f6d94f..249d28b300 100644 --- a/shared/source/xe2_hpg_core/lnl/os_agnostic_product_helper_lnl.inl +++ b/shared/source/xe2_hpg_core/lnl/os_agnostic_product_helper_lnl.inl @@ -93,4 +93,9 @@ bool ProductHelperHw::isDeviceUsmAllocationReuseSupported() const { return true; } +template <> +bool ProductHelperHw::isMisalignedUserPtr2WayCoherent() const { + return true; +} + } // namespace NEO diff --git a/shared/test/common/mocks/mock_gmm_client_context.cpp b/shared/test/common/mocks/mock_gmm_client_context.cpp index 5269bdf279..fd50a60e71 100644 --- a/shared/test/common/mocks/mock_gmm_client_context.cpp +++ b/shared/test/common/mocks/mock_gmm_client_context.cpp @@ -1,5 +1,5 @@ /* - * Copyright (C) 2018-2024 Intel Corporation + * Copyright (C) 2018-2025 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -48,6 +48,10 @@ uint32_t MockGmmClientContextBase::cachePolicyGetPATIndex(GMM_RESOURCE_INFO *gmm return MockPatIndex::uncached; } + if (usage == GMM_RESOURCE_USAGE_HW_CONTEXT) { + return MockPatIndex::TwoWayCoherent; + } + return MockPatIndex::cached; } diff --git a/shared/test/common/mocks/mock_gmm_client_context_base.h b/shared/test/common/mocks/mock_gmm_client_context_base.h index 7313402f5b..f09e0a2b97 100644 --- a/shared/test/common/mocks/mock_gmm_client_context_base.h +++ b/shared/test/common/mocks/mock_gmm_client_context_base.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2018-2024 Intel Corporation + * Copyright (C) 2018-2025 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -14,6 +14,7 @@ class MockGmmClientContextBase : public GmmClientContext { struct MockPatIndex { static constexpr uint32_t uncached = 1; static constexpr uint32_t cached = 2; + static constexpr uint32_t TwoWayCoherent = 3; static constexpr uint32_t error = GMM_PAT_ERROR; }; diff --git a/shared/test/unit_test/os_interface/linux/drm_memory_manager_tests.cpp b/shared/test/unit_test/os_interface/linux/drm_memory_manager_tests.cpp index 99475ac626..ccbe8140ea 100644 --- a/shared/test/unit_test/os_interface/linux/drm_memory_manager_tests.cpp +++ b/shared/test/unit_test/os_interface/linux/drm_memory_manager_tests.cpp @@ -3803,10 +3803,7 @@ TEST_F(DrmMemoryManagerBasic, givenSpecificAddressSpaceWhenInitializingMemoryMan EXPECT_EQ(maxNBitValue(48 - 1), limit); } -TEST_F(DrmMemoryManagerBasic, givenDisabledHostPtrTrackingWhenAllocateGraphicsMemoryForNonSvmHostPtrIsCalledWithNotAlignedPtrIsPassedThenAllocationIsCreated) { - DebugManagerStateRestore restore; - debugManager.flags.EnableHostPtrTracking.set(false); - +TEST_F(DrmMemoryManagerBasic, givenUnalignedHostPtrWhenAllocateGraphicsMemoryThenSetCorrectPatIndex) { AllocationData allocationData; std::unique_ptr memoryManager(new (std::nothrow) TestedDrmMemoryManager(false, false, false, executionEnvironment)); @@ -3815,12 +3812,39 @@ TEST_F(DrmMemoryManagerBasic, givenDisabledHostPtrTrackingWhenAllocateGraphicsMe allocationData.size = 13; allocationData.hostPtr = reinterpret_cast(0x5001); allocationData.rootDeviceIndex = rootDeviceIndex; - auto allocation = memoryManager->allocateGraphicsMemoryForNonSvmHostPtr(allocationData); + auto allocation = static_cast(memoryManager->allocateGraphicsMemoryForNonSvmHostPtr(allocationData)); EXPECT_NE(nullptr, allocation); EXPECT_EQ(0x5001u, reinterpret_cast(allocation->getUnderlyingBuffer())); EXPECT_EQ(13u, allocation->getUnderlyingBufferSize()); EXPECT_EQ(1u, allocation->getAllocationOffset()); + auto &productHelper = executionEnvironment.rootDeviceEnvironments[0]->getHelper(); + if (productHelper.isMisalignedUserPtr2WayCoherent()) { + EXPECT_EQ(MockGmmClientContextBase::MockPatIndex::TwoWayCoherent, allocation->getBO()->peekPatIndex()); + } else { + EXPECT_EQ(MockGmmClientContextBase::MockPatIndex::cached, allocation->getBO()->peekPatIndex()); + } + + memoryManager->freeGraphicsMemory(allocation); +} + +TEST_F(DrmMemoryManagerBasic, givenAlignedHostPtrWhenAllocateGraphicsMemoryThenSetCorrectPatIndex) { + AllocationData allocationData; + std::unique_ptr memoryManager(new (std::nothrow) TestedDrmMemoryManager(false, false, false, executionEnvironment)); + + memoryManager->forceLimitedRangeAllocator(MemoryConstants::max48BitAddress); + + allocationData.size = MemoryConstants::cacheLineSize; + allocationData.hostPtr = reinterpret_cast(MemoryConstants::pageSize); + allocationData.rootDeviceIndex = rootDeviceIndex; + auto allocation = static_cast(memoryManager->allocateGraphicsMemoryForNonSvmHostPtr(allocationData)); + + EXPECT_NE(nullptr, allocation); + EXPECT_EQ(MemoryConstants::pageSize, reinterpret_cast(allocation->getUnderlyingBuffer())); + EXPECT_EQ(MemoryConstants::cacheLineSize, allocation->getUnderlyingBufferSize()); + EXPECT_EQ(0u, allocation->getAllocationOffset()); + + EXPECT_EQ(MockGmmClientContextBase::MockPatIndex::cached, allocation->getBO()->peekPatIndex()); memoryManager->freeGraphicsMemory(allocation); } diff --git a/shared/test/unit_test/os_interface/windows/wddm_memory_manager_tests.cpp b/shared/test/unit_test/os_interface/windows/wddm_memory_manager_tests.cpp index 47e4508fd6..63bde8387d 100644 --- a/shared/test/unit_test/os_interface/windows/wddm_memory_manager_tests.cpp +++ b/shared/test/unit_test/os_interface/windows/wddm_memory_manager_tests.cpp @@ -1050,7 +1050,7 @@ TEST_F(WddmMemoryManagerSimpleTest, whenCreateAllocationFromHandleAndMapCallFail EXPECT_EQ(1u, memoryManager->freeGraphicsMemoryImplCalled); } -TEST_F(WddmMemoryManagerSimpleTest, givenAllocateGraphicsMemoryForNonSvmHostPtrIsCalledWhenNotAlignedPtrIsPassedThenAlignedGraphicsAllocationIsCreated) { +TEST_F(WddmMemoryManagerSimpleTest, givenAllocateGraphicsMemoryForNonSvmHostPtrIsCalledWhenNotAlignedPtrIsPassedThenAlignedGraphicsAllocationIsCreatedWithCorrectGmmResource) { memoryManager.reset(new MockWddmMemoryManager(false, false, executionEnvironment)); auto size = 13u; auto hostPtr = reinterpret_cast(0x10001); @@ -1063,6 +1063,14 @@ TEST_F(WddmMemoryManagerSimpleTest, givenAllocateGraphicsMemoryForNonSvmHostPtrI EXPECT_EQ(hostPtr, allocation->getUnderlyingBuffer()); EXPECT_EQ(size, allocation->getUnderlyingBufferSize()); EXPECT_EQ(1u, allocation->getAllocationOffset()); + + const auto &productHelper = rootDeviceEnvironment->getHelper(); + auto expectedUsage = GMM_RESOURCE_USAGE_OCL_SYSTEM_MEMORY_BUFFER; + if (productHelper.isMisalignedUserPtr2WayCoherent()) { + expectedUsage = GMM_RESOURCE_USAGE_HW_CONTEXT; + } + EXPECT_EQ(expectedUsage, allocation->getGmm(0)->resourceParams.Usage); + memoryManager->freeGraphicsMemory(allocation); } diff --git a/shared/test/unit_test/sku_info/sku_info_base_reference.h b/shared/test/unit_test/sku_info/sku_info_base_reference.h index bfa072b767..637ec77589 100644 --- a/shared/test/unit_test/sku_info/sku_info_base_reference.h +++ b/shared/test/unit_test/sku_info/sku_info_base_reference.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2018-2024 Intel Corporation + * Copyright (C) 2018-2025 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -62,6 +62,8 @@ struct SkuInfoBaseReference { refWaTable.WaAuxTable16KGranular = 1; refWaTable.WaAuxTable64KGranular = 1; refWaTable.Wa_15010089951 = 1; + refWaTable.Wa_14018976079 = 1; + refWaTable.Wa_14018984349 = 1; } static void fillReferenceFtrToReceive(FeatureTable &refFtrTable) { @@ -149,6 +151,8 @@ struct SkuInfoBaseReference { refWaTable.flags.waDisableFusedThreadScheduling = true; refWaTable.flags.waAuxTable64KGranular = true; refWaTable.flags.wa_15010089951 = true; + refWaTable.flags.wa_14018976079 = true; + refWaTable.flags.wa_14018984349 = true; } }; // namespace SkuInfoBaseReference } // namespace NEO