From ed011de03ebe4082fec7e0e2f7793c42d0ba9433 Mon Sep 17 00:00:00 2001 From: Dominik Dabek Date: Thu, 15 Feb 2024 15:45:12 +0000 Subject: [PATCH] performance: program pat index on mtl linux Enable programming pat indexes on mtl linux for device buffers. Change DrmMemoryManager::allocateMemoryByKMD to use gemCreateExt. Changes currently disabled, can be enabled with flag DisableGemCreateExtSetPat=0 Related-To: NEO-7896 Signed-off-by: Dominik Dabek --- ...te_command_queue_with_properties_tests.cpp | 3 ++- .../os_interface/linux/drm_memory_manager.cpp | 21 ++++++++++++++----- shared/source/os_interface/linux/drm_neo.cpp | 2 +- shared/source/os_interface/product_helper.h | 2 +- shared/source/os_interface/product_helper.inl | 3 ++- .../source/os_interface/product_helper_hw.h | 2 +- .../xe_hpg_core/linux/product_helper_mtl.cpp | 12 +++++++++++ .../linux/device_command_stream_fixture.h | 6 ++++-- .../linux/drm_memory_manager_tests.cpp | 6 ++++++ .../drm_residency_handler_prelim_tests.cpp | 4 ++-- .../os_interface/product_helper_tests.cpp | 6 +++--- .../pvc/linux/product_helper_tests_pvc.cpp | 1 - .../mtl/excludes_xe_hpg_core_mtl.cpp | 1 + .../linux/product_helper_tests_mtl_linux.cpp | 14 ++++++++++++- 14 files changed, 64 insertions(+), 19 deletions(-) diff --git a/opencl/test/unit_test/os_interface/linux/linux_create_command_queue_with_properties_tests.cpp b/opencl/test/unit_test/os_interface/linux/linux_create_command_queue_with_properties_tests.cpp index 2a85bba4f6..ba8a968f97 100644 --- a/opencl/test/unit_test/os_interface/linux/linux_create_command_queue_with_properties_tests.cpp +++ b/opencl/test/unit_test/os_interface/linux/linux_create_command_queue_with_properties_tests.cpp @@ -10,6 +10,7 @@ #include "shared/test/common/libult/linux/drm_mock.h" #include "shared/test/common/mocks/linux/mock_drm_command_stream_receiver.h" #include "shared/test/common/mocks/linux/mock_drm_memory_manager.h" +#include "shared/test/common/mocks/linux/mock_ioctl_helper.h" #include "shared/test/common/test_macros/hw_test.h" #include "opencl/source/command_queue/command_queue_hw.h" @@ -32,7 +33,7 @@ struct ClCreateCommandQueueWithPropertiesLinux : public UltCommandStreamReceiver executionEnvironment->rootDeviceEnvironments[rootDeviceIndex]->memoryOperationsInterface = DrmMemoryOperationsHandler::create(*drm, rootDeviceIndex, false); executionEnvironment->memoryManager.reset(new TestedDrmMemoryManager(*executionEnvironment)); mdevice = std::make_unique(MockDevice::create(executionEnvironment, rootDeviceIndex)); - + ASSERT_NE(nullptr, mdevice.get()); clDevice = mdevice.get(); retVal = CL_SUCCESS; context = std::unique_ptr(Context::create(nullptr, ClDeviceVector(&clDevice, 1), nullptr, nullptr, retVal)); diff --git a/shared/source/os_interface/linux/drm_memory_manager.cpp b/shared/source/os_interface/linux/drm_memory_manager.cpp index c7625d554c..f29b6a5c49 100644 --- a/shared/source/os_interface/linux/drm_memory_manager.cpp +++ b/shared/source/os_interface/linux/drm_memory_manager.cpp @@ -635,14 +635,25 @@ GraphicsAllocation *DrmMemoryManager::allocateMemoryByKMD(const AllocationData & auto gmm = std::make_unique(executionEnvironment.rootDeviceEnvironments[allocationData.rootDeviceIndex]->getGmmHelper(), allocationData.hostPtr, allocationData.size, 0u, CacheSettingsHelper::getGmmUsageType(allocationData.type, allocationData.flags.uncacheable, productHelper), systemMemoryStorageInfo, gmmRequirements); size_t bufferSize = allocationData.size; - uint64_t gpuRange = acquireGpuRangeWithCustomAlignment(bufferSize, allocationData.rootDeviceIndex, HeapIndex::heapStandard64KB, allocationData.alignment); + auto alignment = allocationData.alignment; + if (bufferSize >= 2 * MemoryConstants::megaByte) { + alignment = MemoryConstants::pageSize2M; + } + uint64_t gpuRange = acquireGpuRangeWithCustomAlignment(bufferSize, allocationData.rootDeviceIndex, HeapIndex::heapStandard64KB, alignment); auto &drm = getDrm(allocationData.rootDeviceIndex); - auto ioctlHelper = drm.getIoctlHelper(); - - uint32_t handle = ioctlHelper->createGem(bufferSize, static_cast(allocationData.storageInfo.memoryBanks.to_ulong())); - + int ret = -1; + uint32_t handle; auto patIndex = drm.getPatIndex(gmm.get(), allocationData.type, CacheRegion::defaultRegion, CachePolicy::writeBack, false, MemoryPoolHelper::isSystemMemoryPool(memoryPool)); + const bool tryToUseGemCreateExt = !debugManager.flags.DisableGemCreateExtSetPat.get(); + if (tryToUseGemCreateExt && drm.getMemoryInfo()) { + ret = drm.getMemoryInfo()->createGemExtWithSingleRegion(allocationData.storageInfo.getMemoryBanks(), bufferSize, handle, patIndex, -1, allocationData.flags.isUSMHostAllocation); + } + + if (0 != ret) { + auto ioctlHelper = drm.getIoctlHelper(); + handle = ioctlHelper->createGem(bufferSize, static_cast(allocationData.storageInfo.memoryBanks.to_ulong())); + } std::unique_ptr bo(new BufferObject(allocationData.rootDeviceIndex, &drm, patIndex, handle, bufferSize, maxOsContextCount)); bo->setAddress(gpuRange); diff --git a/shared/source/os_interface/linux/drm_neo.cpp b/shared/source/os_interface/linux/drm_neo.cpp index d4c75b7545..7e137e33a3 100644 --- a/shared/source/os_interface/linux/drm_neo.cpp +++ b/shared/source/os_interface/linux/drm_neo.cpp @@ -1231,7 +1231,7 @@ uint64_t Drm::getPatIndex(Gmm *gmm, AllocationType allocationType, CacheRegion c } uint64_t patIndex = rootDeviceEnvironment.getGmmClientContext()->cachePolicyGetPATIndex(resourceInfo, usageType, compressed, cachable); - patIndex = productHelper.overridePatIndex(isUncachedType, patIndex); + patIndex = productHelper.overridePatIndex(isUncachedType, patIndex, allocationType); UNRECOVERABLE_IF(patIndex == static_cast(GMM_PAT_ERROR)); diff --git a/shared/source/os_interface/product_helper.h b/shared/source/os_interface/product_helper.h index b6687e18ee..1e3b6bdbe9 100644 --- a/shared/source/os_interface/product_helper.h +++ b/shared/source/os_interface/product_helper.h @@ -217,7 +217,7 @@ class ProductHelper { virtual bool isSkippingStatefulInformationRequired(const KernelDescriptor &kernelDescriptor) const = 0; virtual bool getMediaFrequencyTileIndex(const ReleaseHelper *releaseHelper, uint32_t &tileIndex) const = 0; virtual bool isResolvingSubDeviceIDNeeded(const ReleaseHelper *releaseHelper) const = 0; - virtual uint64_t overridePatIndex(bool isUncachedType, uint64_t patIndex) const = 0; + virtual uint64_t overridePatIndex(bool isUncachedType, uint64_t patIndex, AllocationType allocationType) const = 0; virtual std::vector getSupportedNumGrfs(const ReleaseHelper *releaseHelper) const = 0; virtual aub_stream::EngineType getDefaultCopyEngine() const = 0; virtual void adjustEngineGroupType(EngineGroupType &engineGroupType) const = 0; diff --git a/shared/source/os_interface/product_helper.inl b/shared/source/os_interface/product_helper.inl index d1b1b26105..fa3682a116 100644 --- a/shared/source/os_interface/product_helper.inl +++ b/shared/source/os_interface/product_helper.inl @@ -16,6 +16,7 @@ #include "shared/source/helpers/local_memory_access_modes.h" #include "shared/source/helpers/preamble.h" #include "shared/source/kernel/kernel_properties.h" +#include "shared/source/memory_manager/allocation_properties.h" #include "shared/source/memory_manager/graphics_allocation.h" #include "shared/source/memory_manager/memory_manager.h" #include "shared/source/os_interface/product_helper.h" @@ -838,7 +839,7 @@ bool ProductHelperHw::isResolvingSubDeviceIDNeeded(const ReleaseHelp } template -uint64_t ProductHelperHw::overridePatIndex(bool isUncachedType, uint64_t patIndex) const { +uint64_t ProductHelperHw::overridePatIndex(bool isUncachedType, uint64_t patIndex, AllocationType allocationType) const { return patIndex; } diff --git a/shared/source/os_interface/product_helper_hw.h b/shared/source/os_interface/product_helper_hw.h index 5244a1acdb..804fe947bc 100644 --- a/shared/source/os_interface/product_helper_hw.h +++ b/shared/source/os_interface/product_helper_hw.h @@ -164,7 +164,7 @@ class ProductHelperHw : public ProductHelper { bool isSkippingStatefulInformationRequired(const KernelDescriptor &kernelDescriptor) const override; bool getMediaFrequencyTileIndex(const ReleaseHelper *releaseHelper, uint32_t &tileIndex) const override; bool isResolvingSubDeviceIDNeeded(const ReleaseHelper *releaseHelper) const override; - uint64_t overridePatIndex(bool isUncachedType, uint64_t patIndex) const override; + uint64_t overridePatIndex(bool isUncachedType, uint64_t patIndex, AllocationType allocationType) const override; std::vector getSupportedNumGrfs(const ReleaseHelper *releaseHelper) const override; aub_stream::EngineType getDefaultCopyEngine() const override; void adjustEngineGroupType(EngineGroupType &engineGroupType) const override; diff --git a/shared/source/xe_hpg_core/linux/product_helper_mtl.cpp b/shared/source/xe_hpg_core/linux/product_helper_mtl.cpp index 793b00b2d1..99e6702a23 100644 --- a/shared/source/xe_hpg_core/linux/product_helper_mtl.cpp +++ b/shared/source/xe_hpg_core/linux/product_helper_mtl.cpp @@ -11,5 +11,17 @@ constexpr static auto gfxProduct = IGFX_METEORLAKE; #include "shared/source/xe_hpg_core/xe_lpg/linux/product_helper_xe_lpg_linux.inl" #include "shared/source/xe_hpg_core/xe_lpg/os_agnostic_product_helper_xe_lpg.inl" +namespace NEO { +template <> +uint64_t ProductHelperHw::overridePatIndex(bool isUncachedType, uint64_t patIndex, AllocationType allocationType) const { + switch (allocationType) { + case NEO::AllocationType::buffer: + return 0u; + default: + return 3u; + } +} template class NEO::ProductHelperHw; + +} // namespace NEO \ No newline at end of file diff --git a/shared/test/common/os_interface/linux/device_command_stream_fixture.h b/shared/test/common/os_interface/linux/device_command_stream_fixture.h index 0bf3e55543..379e59e197 100644 --- a/shared/test/common/os_interface/linux/device_command_stream_fixture.h +++ b/shared/test/common/os_interface/linux/device_command_stream_fixture.h @@ -77,8 +77,10 @@ class DrmMockTime : public DrmMockSuccess { public: using DrmMockSuccess::DrmMockSuccess; int ioctl(DrmIoctl request, void *arg) override { - auto *reg = reinterpret_cast(arg); - reg->value = getVal() << 32 | 0x1; + if (DrmIoctl::regRead == request) { + auto *reg = reinterpret_cast(arg); + reg->value = getVal() << 32 | 0x1; + } return 0; }; diff --git a/shared/test/unit_test/os_interface/linux/drm_memory_manager_tests.cpp b/shared/test/unit_test/os_interface/linux/drm_memory_manager_tests.cpp index b31cebe5f2..804382e38c 100644 --- a/shared/test/unit_test/os_interface/linux/drm_memory_manager_tests.cpp +++ b/shared/test/unit_test/os_interface/linux/drm_memory_manager_tests.cpp @@ -1756,6 +1756,9 @@ TEST_F(DrmMemoryManagerTest, givenRequiresStandard2MBHeapThenStandard2MBHeapIsAc } TEST_F(DrmMemoryManagerTest, GivenShareableEnabledWhenAskedToCreateGraphicsAllocationThenValidAllocationIsReturnedAndStandard64KBHeapIsUsed) { + mock->ioctlHelper.reset(new MockIoctlHelper(*mock)); + mock->queryMemoryInfo(); + EXPECT_NE(nullptr, mock->getMemoryInfo()); mock->ioctlExpected.gemWait = 1; mock->ioctlExpected.gemCreate = 1; mock->ioctlExpected.gemClose = 1; @@ -1774,6 +1777,9 @@ TEST_F(DrmMemoryManagerTest, GivenShareableEnabledWhenAskedToCreateGraphicsAlloc } TEST_F(DrmMemoryManagerTest, GivenSizeAndAlignmentWhenAskedToCreateGraphicsAllocationThenValidAllocationIsReturnedAndMemoryIsAligned) { + mock->ioctlHelper.reset(new MockIoctlHelper(*mock)); + mock->queryMemoryInfo(); + EXPECT_NE(nullptr, mock->getMemoryInfo()); allocationData.size = 1; int ioctlCnt = 0; size_t alignment = 8 * MemoryConstants::megaByte; diff --git a/shared/test/unit_test/os_interface/linux/drm_residency_handler_prelim_tests.cpp b/shared/test/unit_test/os_interface/linux/drm_residency_handler_prelim_tests.cpp index b0b5b684fd..1351102df8 100644 --- a/shared/test/unit_test/os_interface/linux/drm_residency_handler_prelim_tests.cpp +++ b/shared/test/unit_test/os_interface/linux/drm_residency_handler_prelim_tests.cpp @@ -1126,7 +1126,7 @@ HWTEST_F(DrmMemoryOperationsHandlerBindTest, givenPatIndexProgrammingEnabledWhen } if (debugFlag == 0 || !closSupported || debugFlag == -1) { - auto expectedIndex = productHelper.overridePatIndex(false, static_cast(MockGmmClientContextBase::MockPatIndex::cached)); + auto expectedIndex = productHelper.overridePatIndex(false, static_cast(MockGmmClientContextBase::MockPatIndex::cached), allocation.getAllocationType()); EXPECT_EQ(expectedIndex, mock->context.receivedVmBindPatIndex.value()); @@ -1187,7 +1187,7 @@ HWTEST_F(DrmMemoryOperationsHandlerBindTest, givenUncachedDebugFlagSetWhenVmBind auto allocation = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{device->getRootDeviceIndex(), MemoryConstants::pageSize}); operationHandler->makeResident(device, ArrayRef(&allocation, 1)); - auto expectedIndex = productHelper.overridePatIndex(true, static_cast(MockGmmClientContextBase::MockPatIndex::uncached)); + auto expectedIndex = productHelper.overridePatIndex(true, static_cast(MockGmmClientContextBase::MockPatIndex::uncached), allocation->getAllocationType()); EXPECT_EQ(expectedIndex, mock->context.receivedVmBindPatIndex.value()); diff --git a/shared/test/unit_test/os_interface/product_helper_tests.cpp b/shared/test/unit_test/os_interface/product_helper_tests.cpp index 79548ba13f..daa02d6735 100644 --- a/shared/test/unit_test/os_interface/product_helper_tests.cpp +++ b/shared/test/unit_test/os_interface/product_helper_tests.cpp @@ -11,6 +11,7 @@ #include "shared/source/helpers/gfx_core_helper.h" #include "shared/source/helpers/local_memory_access_modes.h" #include "shared/source/kernel/kernel_descriptor.h" +#include "shared/source/memory_manager/allocation_type.h" #include "shared/source/os_interface/product_helper.h" #include "shared/source/release_helper/release_helper.h" #include "shared/source/unified_memory/usm_memory_support.h" @@ -445,7 +446,6 @@ HWTEST_F(ProductHelperTest, givenProductHelperWhenAskedIfTile64With3DSurfaceOnBC } HWTEST_F(ProductHelperTest, givenProductHelperWhenAskedIfPatIndexProgrammingSupportedThenReturnFalse) { - EXPECT_FALSE(productHelper->isVmBindPatIndexProgrammingSupported()); } @@ -841,10 +841,10 @@ HWTEST_F(ProductHelperTest, whenDisableL3ForDebugCalledThenFalseIsReturned) { HWTEST_F(ProductHelperTest, givenBooleanUncachedWhenCallOverridePatIndexThenProperPatIndexIsReturned) { uint64_t patIndex = 1u; bool isUncached = true; - EXPECT_EQ(patIndex, productHelper->overridePatIndex(isUncached, patIndex)); + EXPECT_EQ(patIndex, productHelper->overridePatIndex(isUncached, patIndex, AllocationType::buffer)); isUncached = false; - EXPECT_EQ(patIndex, productHelper->overridePatIndex(isUncached, patIndex)); + EXPECT_EQ(patIndex, productHelper->overridePatIndex(isUncached, patIndex, AllocationType::buffer)); } HWTEST_F(ProductHelperTest, givenProductHelperWhenGettingSupportedNumGrfsThenCorrectValueIsReturned) { diff --git a/shared/test/unit_test/xe_hpc_core/pvc/linux/product_helper_tests_pvc.cpp b/shared/test/unit_test/xe_hpc_core/pvc/linux/product_helper_tests_pvc.cpp index 2487b874ea..d9bf5cf750 100644 --- a/shared/test/unit_test/xe_hpc_core/pvc/linux/product_helper_tests_pvc.cpp +++ b/shared/test/unit_test/xe_hpc_core/pvc/linux/product_helper_tests_pvc.cpp @@ -68,7 +68,6 @@ PVCTEST_F(PvcProductHelperLinux, GivenPvcWhenConfigureHardwareCustomThenKmdNotif } PVCTEST_F(PvcProductHelperLinux, givenProductHelperWhenAskedIfPatIndexProgrammingSupportedThenReturnTrue) { - EXPECT_TRUE(productHelper->isVmBindPatIndexProgrammingSupported()); } diff --git a/shared/test/unit_test/xe_hpg_core/mtl/excludes_xe_hpg_core_mtl.cpp b/shared/test/unit_test/xe_hpg_core/mtl/excludes_xe_hpg_core_mtl.cpp index 55bb8fd744..871863f569 100644 --- a/shared/test/unit_test/xe_hpg_core/mtl/excludes_xe_hpg_core_mtl.cpp +++ b/shared/test/unit_test/xe_hpg_core/mtl/excludes_xe_hpg_core_mtl.cpp @@ -21,3 +21,4 @@ HWTEST_EXCLUDE_PRODUCT(ProductHelperTest, givenProductHelperWhenAskedIfPatIndexP HWTEST_EXCLUDE_PRODUCT(ProductHelperTest, givenProductHelperWhenIsAdjustWalkOrderAvailableCallThenFalseReturn, IGFX_METEORLAKE); HWTEST_EXCLUDE_PRODUCT(ProductHelperTest, givenProductHelperWhenCheckBlitEnqueueAllowedThenReturnTrue, IGFX_METEORLAKE); HWTEST_EXCLUDE_PRODUCT(ProductHelperTest, whenGettingPreferredAllocationMethodThenNoPreferenceIsReturned, IGFX_METEORLAKE); +HWTEST_EXCLUDE_PRODUCT(ProductHelperTest, givenBooleanUncachedWhenCallOverridePatIndexThenProperPatIndexIsReturned, IGFX_METEORLAKE); diff --git a/shared/test/unit_test/xe_hpg_core/mtl/linux/product_helper_tests_mtl_linux.cpp b/shared/test/unit_test/xe_hpg_core/mtl/linux/product_helper_tests_mtl_linux.cpp index 3cd58b8e07..e97e318311 100644 --- a/shared/test/unit_test/xe_hpg_core/mtl/linux/product_helper_tests_mtl_linux.cpp +++ b/shared/test/unit_test/xe_hpg_core/mtl/linux/product_helper_tests_mtl_linux.cpp @@ -5,6 +5,7 @@ * */ +#include "shared/source/memory_manager/allocation_properties.h" #include "shared/source/os_interface/os_interface.h" #include "shared/source/xe_hpg_core/hw_info_xe_hpg_core.h" #include "shared/test/common/helpers/default_hw_info.h" @@ -56,4 +57,15 @@ MTLTEST_F(MtlProductHelperLinux, givenProductHelperWhenAskedIsKmdMigrationsSuppo MTLTEST_F(MtlProductHelperLinux, whenCheckingIsTimestampWaitSupportedForEventsThenReturnTrue) { EXPECT_FALSE(productHelper->isTimestampWaitSupportedForEvents()); -} \ No newline at end of file +} + +MTLTEST_F(MtlProductHelperLinux, givenBooleanUncachedWhenCallOverridePatIndexThenProperPatIndexIsReturned) { + uint64_t patIndex = 1u; + bool isUncached = true; + EXPECT_EQ(0u, productHelper->overridePatIndex(isUncached, patIndex, AllocationType::buffer)); + EXPECT_EQ(3u, productHelper->overridePatIndex(isUncached, patIndex, AllocationType::commandBuffer)); + + isUncached = false; + EXPECT_EQ(0u, productHelper->overridePatIndex(isUncached, patIndex, AllocationType::buffer)); + EXPECT_EQ(3u, productHelper->overridePatIndex(isUncached, patIndex, AllocationType::commandBuffer)); +}