From 83637404bf3743e2b03387ec13e43f84db1c02df Mon Sep 17 00:00:00 2001 From: Lukasz Jobczyk Date: Tue, 11 Mar 2025 11:57:35 +0000 Subject: [PATCH] performance: Cache timestamps on CPU Signed-off-by: Lukasz Jobczyk --- .../os_interface/linux/drm_memory_manager.cpp | 2 +- shared/source/os_interface/product_helper.h | 2 +- shared/source/os_interface/product_helper.inl | 2 +- shared/source/os_interface/product_helper_hw.h | 2 +- .../os_interface/windows/wddm_memory_manager.cpp | 4 ++-- .../lnl/os_agnostic_product_helper_lnl.inl | 4 ++-- .../ptl/os_agnostic_product_helper_ptl.inl | 4 ++-- .../xe_lpg/os_agnostic_product_helper_xe_lpg.inl | 2 +- shared/test/common/mocks/mock_product_helper.h | 2 +- .../windows/wddm_memory_manager_tests.cpp | 6 +++--- .../xe2_hpg_core/lnl/product_helper_tests_lnl.cpp | 15 ++++++++------- .../xe3_core/ptl/product_helper_tests_ptl.cpp | 15 ++++++++------- .../xe_hpg_core/mtl/product_helper_tests_mtl.cpp | 8 ++++---- 13 files changed, 35 insertions(+), 33 deletions(-) diff --git a/shared/source/os_interface/linux/drm_memory_manager.cpp b/shared/source/os_interface/linux/drm_memory_manager.cpp index bdd69e3aa7..231e42d4e5 100644 --- a/shared/source/os_interface/linux/drm_memory_manager.cpp +++ b/shared/source/os_interface/linux/drm_memory_manager.cpp @@ -1980,7 +1980,7 @@ inline std::unique_ptr DrmMemoryManager::makeGmmIfSingleHandle(const Alloca gmmRequirements.allowLargePages = true; gmmRequirements.preferCompressed = allocationData.flags.preferCompressed; - if (productHelper.overrideAllocationCacheable(allocationData)) { + if (productHelper.overrideAllocationCpuCacheable(allocationData)) { gmmRequirements.overriderCacheable.enableOverride = true; gmmRequirements.overriderCacheable.value = true; } diff --git a/shared/source/os_interface/product_helper.h b/shared/source/os_interface/product_helper.h index 1235809a9f..39692990b4 100644 --- a/shared/source/os_interface/product_helper.h +++ b/shared/source/os_interface/product_helper.h @@ -196,7 +196,7 @@ class ProductHelper { virtual uint32_t getMaxNumSamplers() const = 0; virtual uint32_t getCommandBuffersPreallocatedPerCommandQueue() const = 0; virtual uint32_t getInternalHeapsPreallocated() const = 0; - virtual bool overrideAllocationCacheable(const AllocationData &allocationData) const = 0; + virtual bool overrideAllocationCpuCacheable(const AllocationData &allocationData) const = 0; virtual bool is2MBLocalMemAlignmentEnabled() const = 0; virtual bool getFrontEndPropertyScratchSizeSupport() const = 0; diff --git a/shared/source/os_interface/product_helper.inl b/shared/source/os_interface/product_helper.inl index 9f640e154c..3b6b2bce75 100644 --- a/shared/source/os_interface/product_helper.inl +++ b/shared/source/os_interface/product_helper.inl @@ -219,7 +219,7 @@ uint64_t ProductHelperHw::getDeviceMemoryMaxBandWidthInBytesPerSecon } template -bool ProductHelperHw::overrideAllocationCacheable(const AllocationData &allocationData) const { +bool ProductHelperHw::overrideAllocationCpuCacheable(const AllocationData &allocationData) const { return false; } diff --git a/shared/source/os_interface/product_helper_hw.h b/shared/source/os_interface/product_helper_hw.h index 149272c89b..99655adc4e 100644 --- a/shared/source/os_interface/product_helper_hw.h +++ b/shared/source/os_interface/product_helper_hw.h @@ -137,7 +137,7 @@ class ProductHelperHw : public ProductHelper { uint32_t getMaxNumSamplers() const override; uint32_t getCommandBuffersPreallocatedPerCommandQueue() const override; uint32_t getInternalHeapsPreallocated() const override; - bool overrideAllocationCacheable(const AllocationData &allocationData) const override; + bool overrideAllocationCpuCacheable(const AllocationData &allocationData) const override; bool is2MBLocalMemAlignmentEnabled() const override; bool getFrontEndPropertyScratchSizeSupport() const override; diff --git a/shared/source/os_interface/windows/wddm_memory_manager.cpp b/shared/source/os_interface/windows/wddm_memory_manager.cpp index c69c0852a5..723d01a04f 100644 --- a/shared/source/os_interface/windows/wddm_memory_manager.cpp +++ b/shared/source/os_interface/windows/wddm_memory_manager.cpp @@ -234,7 +234,7 @@ GraphicsAllocation *WddmMemoryManager::allocateGraphicsMemoryUsingKmdAndMapItToC GmmRequirements gmmRequirements{}; gmmRequirements.allowLargePages = allowLargePages; gmmRequirements.preferCompressed = allocationData.flags.preferCompressed; - if (productHelper.overrideAllocationCacheable(allocationData)) { + if (productHelper.overrideAllocationCpuCacheable(allocationData)) { gmmRequirements.overriderCacheable.enableOverride = true; gmmRequirements.overriderCacheable.value = true; } @@ -464,7 +464,7 @@ GraphicsAllocation *WddmMemoryManager::allocateGraphicsMemoryForNonSvmHostPtr(co GmmRequirements gmmRequirements{}; gmmRequirements.allowLargePages = true; gmmRequirements.preferCompressed = false; - if (productHelper.overrideAllocationCacheable(allocationData)) { + if (productHelper.overrideAllocationCpuCacheable(allocationData)) { gmmRequirements.overriderCacheable.enableOverride = true; gmmRequirements.overriderCacheable.value = true; } diff --git a/shared/source/xe2_hpg_core/lnl/os_agnostic_product_helper_lnl.inl b/shared/source/xe2_hpg_core/lnl/os_agnostic_product_helper_lnl.inl index 90f1f6d94f..2f7c533afd 100644 --- a/shared/source/xe2_hpg_core/lnl/os_agnostic_product_helper_lnl.inl +++ b/shared/source/xe2_hpg_core/lnl/os_agnostic_product_helper_lnl.inl @@ -12,8 +12,8 @@ namespace NEO { template <> -bool ProductHelperHw::overrideAllocationCacheable(const AllocationData &allocationData) const { - return allocationData.type == AllocationType::commandBuffer || this->overrideCacheableForDcFlushMitigation(allocationData.type); +bool ProductHelperHw::overrideAllocationCpuCacheable(const AllocationData &allocationData) const { + return allocationData.type == AllocationType::commandBuffer || allocationData.type == AllocationType::timestampPacketTagBuffer || this->overrideCacheableForDcFlushMitigation(allocationData.type); } template <> diff --git a/shared/source/xe3_core/ptl/os_agnostic_product_helper_ptl.inl b/shared/source/xe3_core/ptl/os_agnostic_product_helper_ptl.inl index 0c68776f45..56684c3cea 100644 --- a/shared/source/xe3_core/ptl/os_agnostic_product_helper_ptl.inl +++ b/shared/source/xe3_core/ptl/os_agnostic_product_helper_ptl.inl @@ -10,8 +10,8 @@ namespace NEO { template <> -bool ProductHelperHw::overrideAllocationCacheable(const AllocationData &allocationData) const { - return allocationData.type == AllocationType::commandBuffer || this->overrideCacheableForDcFlushMitigation(allocationData.type); +bool ProductHelperHw::overrideAllocationCpuCacheable(const AllocationData &allocationData) const { + return allocationData.type == AllocationType::commandBuffer || allocationData.type == AllocationType::timestampPacketTagBuffer || this->overrideCacheableForDcFlushMitigation(allocationData.type); } template <> diff --git a/shared/source/xe_hpg_core/xe_lpg/os_agnostic_product_helper_xe_lpg.inl b/shared/source/xe_hpg_core/xe_lpg/os_agnostic_product_helper_xe_lpg.inl index d66a5632d9..5ee17f41a4 100644 --- a/shared/source/xe_hpg_core/xe_lpg/os_agnostic_product_helper_xe_lpg.inl +++ b/shared/source/xe_hpg_core/xe_lpg/os_agnostic_product_helper_xe_lpg.inl @@ -73,7 +73,7 @@ bool ProductHelperHw::isDeviceUsmPoolAllocatorSupported() const { } template <> -bool ProductHelperHw::overrideAllocationCacheable(const AllocationData &allocationData) const { +bool ProductHelperHw::overrideAllocationCpuCacheable(const AllocationData &allocationData) const { return allocationData.type == AllocationType::commandBuffer; } diff --git a/shared/test/common/mocks/mock_product_helper.h b/shared/test/common/mocks/mock_product_helper.h index a495b8050a..249e22a545 100644 --- a/shared/test/common/mocks/mock_product_helper.h +++ b/shared/test/common/mocks/mock_product_helper.h @@ -17,7 +17,7 @@ struct MockProductHelper : ProductHelperHw { MockProductHelper() = default; ADDMETHOD_CONST_NOBASE(is48bResourceNeededForRayTracing, bool, true, ()); - ADDMETHOD_CONST_NOBASE(overrideAllocationCacheable, bool, false, (const AllocationData &allocationData)); + ADDMETHOD_CONST_NOBASE(overrideAllocationCpuCacheable, bool, false, (const AllocationData &allocationData)); ADDMETHOD_NOBASE(configureHwInfoWddm, int, 0, (const HardwareInfo *inHwInfo, HardwareInfo *outHwInfo, const RootDeviceEnvironment &rootDeviceEnvironment)); ADDMETHOD_CONST_NOBASE(supportReadOnlyAllocations, bool, false, ()); ADDMETHOD_CONST_NOBASE(isBlitCopyRequiredForLocalMemory, bool, true, (const RootDeviceEnvironment &rootDeviceEnvironment, const GraphicsAllocation &allocation)); diff --git a/shared/test/unit_test/os_interface/windows/wddm_memory_manager_tests.cpp b/shared/test/unit_test/os_interface/windows/wddm_memory_manager_tests.cpp index cdc093c8fb..b5436acd92 100644 --- a/shared/test/unit_test/os_interface/windows/wddm_memory_manager_tests.cpp +++ b/shared/test/unit_test/os_interface/windows/wddm_memory_manager_tests.cpp @@ -1,5 +1,5 @@ /* - * Copyright (C) 2022-2024 Intel Corporation + * Copyright (C) 2022-2025 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -142,11 +142,11 @@ TEST_F(WddmMemoryManagerTests, GivenNotCompressedAndNotLockableAllocationTypeWhe memoryManager->freeGraphicsMemory(graphicsAllocation); } -TEST_F(WddmMemoryManagerTests, GivenOverrideAllocationCacheableWhenAllocateUsingKmdAndMapToCpuVaThenOverrideAllocationCacheable) { +TEST_F(WddmMemoryManagerTests, GivenoverrideAllocationCpuCacheableWhenAllocateUsingKmdAndMapToCpuVaThenoverrideAllocationCpuCacheable) { NEO::AllocationData allocData = {}; allocData.type = NEO::AllocationType::commandBuffer; auto mockProductHelper = std::make_unique(); - mockProductHelper->overrideAllocationCacheableResult = true; + mockProductHelper->overrideAllocationCpuCacheableResult = true; executionEnvironment->rootDeviceEnvironments[0]->productHelper.reset(mockProductHelper.release()); memoryManager->callBaseAllocateGraphicsMemoryUsingKmdAndMapItToCpuVA = true; diff --git a/shared/test/unit_test/xe2_hpg_core/lnl/product_helper_tests_lnl.cpp b/shared/test/unit_test/xe2_hpg_core/lnl/product_helper_tests_lnl.cpp index 54a267bbf1..3bdde8ccc1 100644 --- a/shared/test/unit_test/xe2_hpg_core/lnl/product_helper_tests_lnl.cpp +++ b/shared/test/unit_test/xe2_hpg_core/lnl/product_helper_tests_lnl.cpp @@ -108,13 +108,13 @@ LNLTEST_F(LnlProductHelper, givenProductHelperWhenCallIsCachingOnCpuAvailableThe EXPECT_FALSE(productHelper->isCachingOnCpuAvailable()); } -LNLTEST_F(LnlProductHelper, givenProductHelperWhenCheckOverrideAllocationCacheableThenTrueIsReturnedForCommandBuffer) { +LNLTEST_F(LnlProductHelper, givenProductHelperWhenCheckoverrideAllocationCpuCacheableThenTrueIsReturnedForCommandBuffer) { AllocationData allocationData{}; allocationData.type = AllocationType::commandBuffer; - EXPECT_TRUE(productHelper->overrideAllocationCacheable(allocationData)); + EXPECT_TRUE(productHelper->overrideAllocationCpuCacheable(allocationData)); allocationData.type = AllocationType::buffer; - EXPECT_FALSE(productHelper->overrideAllocationCacheable(allocationData)); + EXPECT_FALSE(productHelper->overrideAllocationCpuCacheable(allocationData)); } LNLTEST_F(LnlProductHelper, givenExternalHostPtrWhenMitigateDcFlushThenOverrideCacheable) { @@ -123,7 +123,7 @@ LNLTEST_F(LnlProductHelper, givenExternalHostPtrWhenMitigateDcFlushThenOverrideC AllocationData allocationData{}; allocationData.type = AllocationType::externalHostPtr; - EXPECT_FALSE(productHelper->overrideAllocationCacheable(allocationData)); + EXPECT_FALSE(productHelper->overrideAllocationCpuCacheable(allocationData)); debugManager.flags.AllowDcFlush.set(0); @@ -132,7 +132,8 @@ LNLTEST_F(LnlProductHelper, givenExternalHostPtrWhenMitigateDcFlushThenOverrideC allocationData.type = allocationType; switch (allocationData.type) { case AllocationType::commandBuffer: - EXPECT_TRUE(productHelper->overrideAllocationCacheable(allocationData)); + case AllocationType::timestampPacketTagBuffer: + EXPECT_TRUE(productHelper->overrideAllocationCpuCacheable(allocationData)); break; case AllocationType::externalHostPtr: case AllocationType::bufferHostMemory: @@ -141,11 +142,11 @@ LNLTEST_F(LnlProductHelper, givenExternalHostPtrWhenMitigateDcFlushThenOverrideC case AllocationType::svmZeroCopy: case AllocationType::internalHostMemory: case AllocationType::printfSurface: - EXPECT_TRUE(productHelper->overrideAllocationCacheable(allocationData)); + EXPECT_TRUE(productHelper->overrideAllocationCpuCacheable(allocationData)); EXPECT_TRUE(productHelper->overrideCacheableForDcFlushMitigation(allocationData.type)); break; default: - EXPECT_FALSE(productHelper->overrideAllocationCacheable(allocationData)); + EXPECT_FALSE(productHelper->overrideAllocationCpuCacheable(allocationData)); EXPECT_FALSE(productHelper->overrideCacheableForDcFlushMitigation(allocationData.type)); break; } diff --git a/shared/test/unit_test/xe3_core/ptl/product_helper_tests_ptl.cpp b/shared/test/unit_test/xe3_core/ptl/product_helper_tests_ptl.cpp index 4639ddd843..a75d6f54e4 100644 --- a/shared/test/unit_test/xe3_core/ptl/product_helper_tests_ptl.cpp +++ b/shared/test/unit_test/xe3_core/ptl/product_helper_tests_ptl.cpp @@ -46,13 +46,13 @@ PTLTEST_F(PtlProductHelper, givenProductHelperWhenCheckDirectSubmissionSupported EXPECT_TRUE(productHelper->isDirectSubmissionSupported(releaseHelper)); } -PTLTEST_F(PtlProductHelper, givenProductHelperWhenCheckOverrideAllocationCacheableThenTrueIsReturnedForCommandBuffer) { +PTLTEST_F(PtlProductHelper, givenProductHelperWhenCheckoverrideAllocationCpuCacheableThenTrueIsReturnedForCommandBuffer) { AllocationData allocationData{}; allocationData.type = AllocationType::commandBuffer; - EXPECT_TRUE(productHelper->overrideAllocationCacheable(allocationData)); + EXPECT_TRUE(productHelper->overrideAllocationCpuCacheable(allocationData)); allocationData.type = AllocationType::buffer; - EXPECT_FALSE(productHelper->overrideAllocationCacheable(allocationData)); + EXPECT_FALSE(productHelper->overrideAllocationCpuCacheable(allocationData)); } PTLTEST_F(PtlProductHelper, givenExternalHostPtrWhenMitigateDcFlushThenOverrideCacheable) { @@ -61,7 +61,7 @@ PTLTEST_F(PtlProductHelper, givenExternalHostPtrWhenMitigateDcFlushThenOverrideC AllocationData allocationData{}; allocationData.type = AllocationType::externalHostPtr; - EXPECT_FALSE(productHelper->overrideAllocationCacheable(allocationData)); + EXPECT_FALSE(productHelper->overrideAllocationCpuCacheable(allocationData)); debugManager.flags.AllowDcFlush.set(0); @@ -70,7 +70,8 @@ PTLTEST_F(PtlProductHelper, givenExternalHostPtrWhenMitigateDcFlushThenOverrideC allocationData.type = allocationType; switch (allocationData.type) { case AllocationType::commandBuffer: - EXPECT_TRUE(productHelper->overrideAllocationCacheable(allocationData)); + case AllocationType::timestampPacketTagBuffer: + EXPECT_TRUE(productHelper->overrideAllocationCpuCacheable(allocationData)); break; case AllocationType::externalHostPtr: case AllocationType::bufferHostMemory: @@ -79,11 +80,11 @@ PTLTEST_F(PtlProductHelper, givenExternalHostPtrWhenMitigateDcFlushThenOverrideC case AllocationType::svmZeroCopy: case AllocationType::internalHostMemory: case AllocationType::printfSurface: - EXPECT_TRUE(productHelper->overrideAllocationCacheable(allocationData)); + EXPECT_TRUE(productHelper->overrideAllocationCpuCacheable(allocationData)); EXPECT_TRUE(productHelper->overrideCacheableForDcFlushMitigation(allocationData.type)); break; default: - EXPECT_FALSE(productHelper->overrideAllocationCacheable(allocationData)); + EXPECT_FALSE(productHelper->overrideAllocationCpuCacheable(allocationData)); EXPECT_FALSE(productHelper->overrideCacheableForDcFlushMitigation(allocationData.type)); break; } diff --git a/shared/test/unit_test/xe_hpg_core/mtl/product_helper_tests_mtl.cpp b/shared/test/unit_test/xe_hpg_core/mtl/product_helper_tests_mtl.cpp index f1c3e94aa2..ac5acfbb04 100644 --- a/shared/test/unit_test/xe_hpg_core/mtl/product_helper_tests_mtl.cpp +++ b/shared/test/unit_test/xe_hpg_core/mtl/product_helper_tests_mtl.cpp @@ -1,5 +1,5 @@ /* - * Copyright (C) 2022-2024 Intel Corporation + * Copyright (C) 2022-2025 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -72,11 +72,11 @@ MTLTEST_F(MtlProductHelper, givenMtlWithoutHwIpVersionInHwInfoWhenGettingIpVersi EXPECT_EQ(compilerProductHelper->getDefaultHwIpVersion(), compilerProductHelper->getHwIpVersion(hwInfo)); } -MTLTEST_F(MtlProductHelper, givenProductHelperWhenCheckOverrideAllocationCacheableThenTrueIsReturnedForCommandBuffer) { +MTLTEST_F(MtlProductHelper, givenProductHelperWhenCheckoverrideAllocationCpuCacheableThenTrueIsReturnedForCommandBuffer) { AllocationData allocationData{}; allocationData.type = AllocationType::commandBuffer; - EXPECT_TRUE(productHelper->overrideAllocationCacheable(allocationData)); + EXPECT_TRUE(productHelper->overrideAllocationCpuCacheable(allocationData)); allocationData.type = AllocationType::buffer; - EXPECT_FALSE(productHelper->overrideAllocationCacheable(allocationData)); + EXPECT_FALSE(productHelper->overrideAllocationCpuCacheable(allocationData)); }