From 77d70fd4a749615de8e67d5836ad398a88677785 Mon Sep 17 00:00:00 2001 From: Dominik Dabek Date: Thu, 30 Mar 2023 15:48:02 +0000 Subject: [PATCH] feature: pvc, cpu copy in program init Use cpu copy for globals surface when allocated through svm, allocation not set as lockable but locking allocation succeeds. Make sure gfx allocations is unlocked after copy. Related-To: NEO-7796 Signed-off-by: Dominik Dabek --- shared/source/os_interface/product_helper.h | 1 + shared/source/os_interface/product_helper.inl | 7 ++- .../source/os_interface/product_helper_hw.h | 1 + .../source/program/program_initialization.cpp | 8 +++- .../pvc/os_agnostic_product_helper_pvc.inl | 9 +++- .../os_interface/product_helper_tests.cpp | 12 +++++ .../program/program_initialization_tests.cpp | 4 ++ .../pvc/engine_node_helper_tests_pvc.cpp | 48 +++++++++++++++++++ .../pvc/excludes_xe_hpc_core_pvc.cpp | 1 + 9 files changed, 86 insertions(+), 5 deletions(-) diff --git a/shared/source/os_interface/product_helper.h b/shared/source/os_interface/product_helper.h index ece582a1d9..7a3d72c908 100644 --- a/shared/source/os_interface/product_helper.h +++ b/shared/source/os_interface/product_helper.h @@ -149,6 +149,7 @@ class ProductHelper { virtual bool isBcsReportWaRequired(const HardwareInfo &hwInfo) const = 0; virtual bool isBlitSplitEnqueueWARequired(const HardwareInfo &hwInfo) const = 0; virtual bool isBlitCopyRequiredForLocalMemory(const RootDeviceEnvironment &rootDeviceEnvironment, const GraphicsAllocation &allocation) const = 0; + virtual bool isBlitCopyRequiredForLocalMemory(const RootDeviceEnvironment &rootDeviceEnvironment, const GraphicsAllocation &allocation, bool lockingSucceeded) const = 0; virtual bool isInitDeviceWithFirstSubmissionRequired(const HardwareInfo &hwInfo) const = 0; virtual bool isImplicitScalingSupported(const HardwareInfo &hwInfo) const = 0; virtual bool isCpuCopyNecessary(const void *ptr, MemoryManager *memoryManager) const = 0; diff --git a/shared/source/os_interface/product_helper.inl b/shared/source/os_interface/product_helper.inl index a1c0afd0df..03c25cee4e 100644 --- a/shared/source/os_interface/product_helper.inl +++ b/shared/source/os_interface/product_helper.inl @@ -485,7 +485,7 @@ bool ProductHelperHw::isBlitSplitEnqueueWARequired(const HardwareInf } template -bool ProductHelperHw::isBlitCopyRequiredForLocalMemory(const RootDeviceEnvironment &rootDeviceEnvironment, const GraphicsAllocation &allocation) const { +bool ProductHelperHw::isBlitCopyRequiredForLocalMemory(const RootDeviceEnvironment &rootDeviceEnvironment, const GraphicsAllocation &allocation, bool lockingSucceeded) const { auto &productHelper = rootDeviceEnvironment.getHelper(); auto &hwInfo = *rootDeviceEnvironment.getHardwareInfo(); return allocation.isAllocatedInLocalMemoryPool() && @@ -493,6 +493,11 @@ bool ProductHelperHw::isBlitCopyRequiredForLocalMemory(const RootDev !allocation.isAllocationLockable()); } +template +bool ProductHelperHw::isBlitCopyRequiredForLocalMemory(const RootDeviceEnvironment &rootDeviceEnvironment, const GraphicsAllocation &allocation) const { + return this->isBlitCopyRequiredForLocalMemory(rootDeviceEnvironment, allocation, false); +} + template bool ProductHelperHw::isInitDeviceWithFirstSubmissionRequired(const HardwareInfo &hwInfo) const { return false; diff --git a/shared/source/os_interface/product_helper_hw.h b/shared/source/os_interface/product_helper_hw.h index 408fb92705..bb16c48b15 100644 --- a/shared/source/os_interface/product_helper_hw.h +++ b/shared/source/os_interface/product_helper_hw.h @@ -101,6 +101,7 @@ class ProductHelperHw : public ProductHelper { bool allowMemoryPrefetch(const HardwareInfo &hwInfo) const override; bool isBcsReportWaRequired(const HardwareInfo &hwInfo) const override; bool isBlitCopyRequiredForLocalMemory(const RootDeviceEnvironment &rootDeviceEnvironment, const GraphicsAllocation &allocation) const override; + bool isBlitCopyRequiredForLocalMemory(const RootDeviceEnvironment &rootDeviceEnvironment, const GraphicsAllocation &allocation, bool lockingSucceeded) const override; bool isImplicitScalingSupported(const HardwareInfo &hwInfo) const override; bool isCpuCopyNecessary(const void *ptr, MemoryManager *memoryManager) const override; bool isUnlockingLockedPtrNecessary(const HardwareInfo &hwInfo) const override; diff --git a/shared/source/program/program_initialization.cpp b/shared/source/program/program_initialization.cpp index 207b5c6fee..336114f53f 100644 --- a/shared/source/program/program_initialization.cpp +++ b/shared/source/program/program_initialization.cpp @@ -29,7 +29,7 @@ GraphicsAllocation *allocateGlobalsSurface(NEO::SVMAllocsManager *const svmAlloc if (linkerInput != nullptr) { globalsAreExported = constant ? linkerInput->getTraits().exportsGlobalConstants : linkerInput->getTraits().exportsGlobalVariables; } - + bool lockingSucceeded = false; if (globalsAreExported && (svmAllocManager != nullptr)) { RootDeviceIndicesContainer rootDeviceIndices; rootDeviceIndices.push_back(rootDeviceIndex); @@ -45,6 +45,7 @@ GraphicsAllocation *allocateGlobalsSurface(NEO::SVMAllocsManager *const svmAlloc auto usmAlloc = svmAllocManager->getSVMAlloc(ptr); UNRECOVERABLE_IF(usmAlloc == nullptr); gpuAllocation = usmAlloc->gpuAllocations.getGraphicsAllocation(rootDeviceIndex); + lockingSucceeded = device.getMemoryManager()->lockResource(gpuAllocation) != nullptr; } else { auto allocationType = constant ? AllocationType::CONSTANT_SURFACE : AllocationType::GLOBAL_SURFACE; gpuAllocation = device.getMemoryManager()->allocateGraphicsMemoryWithProperties({rootDeviceIndex, @@ -64,9 +65,12 @@ GraphicsAllocation *allocateGlobalsSurface(NEO::SVMAllocsManager *const svmAlloc bool isOnlyBssData = (totalSize == zeroInitSize); if (false == isOnlyBssData) { auto initSize = totalSize - zeroInitSize; - auto success = MemoryTransferHelper::transferMemoryToAllocation(productHelper.isBlitCopyRequiredForLocalMemory(rootDeviceEnvironment, *gpuAllocation), + auto success = MemoryTransferHelper::transferMemoryToAllocation(productHelper.isBlitCopyRequiredForLocalMemory(rootDeviceEnvironment, *gpuAllocation, lockingSucceeded), device, gpuAllocation, 0, initData, initSize); UNRECOVERABLE_IF(!success); + if (lockingSucceeded) { + gpuAllocation->unlock(); + } } return gpuAllocation; } diff --git a/shared/source/xe_hpc_core/pvc/os_agnostic_product_helper_pvc.inl b/shared/source/xe_hpc_core/pvc/os_agnostic_product_helper_pvc.inl index 14c91dfc5a..dd0939cbaa 100644 --- a/shared/source/xe_hpc_core/pvc/os_agnostic_product_helper_pvc.inl +++ b/shared/source/xe_hpc_core/pvc/os_agnostic_product_helper_pvc.inl @@ -141,7 +141,7 @@ bool ProductHelperHw::isBcsReportWaRequired(const HardwareInfo &hwIn } template <> -bool ProductHelperHw::isBlitCopyRequiredForLocalMemory(const RootDeviceEnvironment &rootDeviceEnvironment, const GraphicsAllocation &allocation) const { +bool ProductHelperHw::isBlitCopyRequiredForLocalMemory(const RootDeviceEnvironment &rootDeviceEnvironment, const GraphicsAllocation &allocation, bool lockingSucceeded) const { if (!allocation.isAllocatedInLocalMemoryPool()) { return false; } @@ -152,7 +152,7 @@ bool ProductHelperHw::isBlitCopyRequiredForLocalMemory(const RootDev return true; } - if (!allocation.isAllocationLockable()) { + if (!allocation.isAllocationLockable() && !lockingSucceeded) { return true; } @@ -166,6 +166,11 @@ bool ProductHelperHw::isBlitCopyRequiredForLocalMemory(const RootDev return false; } +template <> +bool ProductHelperHw::isBlitCopyRequiredForLocalMemory(const RootDeviceEnvironment &rootDeviceEnvironment, const GraphicsAllocation &allocation) const { + return this->isBlitCopyRequiredForLocalMemory(rootDeviceEnvironment, allocation, false); +} + template <> bool ProductHelperHw::isTlbFlushRequired() const { return false; diff --git a/shared/test/unit_test/os_interface/product_helper_tests.cpp b/shared/test/unit_test/os_interface/product_helper_tests.cpp index ba30414cf5..e44b252ff8 100644 --- a/shared/test/unit_test/os_interface/product_helper_tests.cpp +++ b/shared/test/unit_test/os_interface/product_helper_tests.cpp @@ -433,18 +433,24 @@ HWTEST_F(ProductHelperTest, givenLockableAllocationWhenGettingIsBlitCopyRequired DebugManager.flags.ForceLocalMemoryAccessMode.set(0); EXPECT_FALSE(productHelper->isBlitCopyRequiredForLocalMemory(rootDeviceEnvironment, graphicsAllocation)); + EXPECT_FALSE(productHelper->isBlitCopyRequiredForLocalMemory(rootDeviceEnvironment, graphicsAllocation, true)); DebugManager.flags.ForceLocalMemoryAccessMode.set(1); EXPECT_FALSE(productHelper->isBlitCopyRequiredForLocalMemory(rootDeviceEnvironment, graphicsAllocation)); + EXPECT_FALSE(productHelper->isBlitCopyRequiredForLocalMemory(rootDeviceEnvironment, graphicsAllocation, true)); DebugManager.flags.ForceLocalMemoryAccessMode.set(3); EXPECT_TRUE(productHelper->isBlitCopyRequiredForLocalMemory(rootDeviceEnvironment, graphicsAllocation)); + EXPECT_TRUE(productHelper->isBlitCopyRequiredForLocalMemory(rootDeviceEnvironment, graphicsAllocation, true)); pInHwInfo.capabilityTable.blitterOperationsSupported = false; EXPECT_TRUE(productHelper->isBlitCopyRequiredForLocalMemory(rootDeviceEnvironment, graphicsAllocation)); + EXPECT_TRUE(productHelper->isBlitCopyRequiredForLocalMemory(rootDeviceEnvironment, graphicsAllocation, true)); graphicsAllocation.overrideMemoryPool(MemoryPool::System64KBPages); EXPECT_FALSE(productHelper->isBlitCopyRequiredForLocalMemory(rootDeviceEnvironment, graphicsAllocation)); + EXPECT_FALSE(productHelper->isBlitCopyRequiredForLocalMemory(rootDeviceEnvironment, graphicsAllocation, true)); pInHwInfo.capabilityTable.blitterOperationsSupported = true; EXPECT_FALSE(productHelper->isBlitCopyRequiredForLocalMemory(rootDeviceEnvironment, graphicsAllocation)); + EXPECT_FALSE(productHelper->isBlitCopyRequiredForLocalMemory(rootDeviceEnvironment, graphicsAllocation, true)); } HWTEST_F(ProductHelperTest, givenNotLockableAllocationWhenGettingIsBlitCopyRequiredForLocalMemoryThenCorrectValuesAreReturned) { @@ -473,18 +479,24 @@ HWTEST_F(ProductHelperTest, givenNotLockableAllocationWhenGettingIsBlitCopyRequi DebugManager.flags.ForceLocalMemoryAccessMode.set(0); EXPECT_TRUE(productHelper->isBlitCopyRequiredForLocalMemory(rootDeviceEnvironment, graphicsAllocation)); + EXPECT_TRUE(productHelper->isBlitCopyRequiredForLocalMemory(rootDeviceEnvironment, graphicsAllocation, true)); DebugManager.flags.ForceLocalMemoryAccessMode.set(1); EXPECT_TRUE(productHelper->isBlitCopyRequiredForLocalMemory(rootDeviceEnvironment, graphicsAllocation)); + EXPECT_TRUE(productHelper->isBlitCopyRequiredForLocalMemory(rootDeviceEnvironment, graphicsAllocation, true)); DebugManager.flags.ForceLocalMemoryAccessMode.set(3); EXPECT_TRUE(productHelper->isBlitCopyRequiredForLocalMemory(rootDeviceEnvironment, graphicsAllocation)); + EXPECT_TRUE(productHelper->isBlitCopyRequiredForLocalMemory(rootDeviceEnvironment, graphicsAllocation, true)); hwInfo.capabilityTable.blitterOperationsSupported = false; EXPECT_TRUE(productHelper->isBlitCopyRequiredForLocalMemory(rootDeviceEnvironment, graphicsAllocation)); + EXPECT_TRUE(productHelper->isBlitCopyRequiredForLocalMemory(rootDeviceEnvironment, graphicsAllocation, true)); graphicsAllocation.overrideMemoryPool(MemoryPool::System64KBPages); EXPECT_FALSE(productHelper->isBlitCopyRequiredForLocalMemory(rootDeviceEnvironment, graphicsAllocation)); + EXPECT_FALSE(productHelper->isBlitCopyRequiredForLocalMemory(rootDeviceEnvironment, graphicsAllocation, true)); hwInfo.capabilityTable.blitterOperationsSupported = true; EXPECT_FALSE(productHelper->isBlitCopyRequiredForLocalMemory(rootDeviceEnvironment, graphicsAllocation)); + EXPECT_FALSE(productHelper->isBlitCopyRequiredForLocalMemory(rootDeviceEnvironment, graphicsAllocation, true)); } HWTEST2_F(ProductHelperTest, givenProductHelperWhenGettingIsBlitCopyRequiredForLocalMemoryThenFalseIsReturned, IsAtMostGen11) { diff --git a/shared/test/unit_test/program/program_initialization_tests.cpp b/shared/test/unit_test/program/program_initialization_tests.cpp index 13add0c45d..9910ee27ca 100644 --- a/shared/test/unit_test/program/program_initialization_tests.cpp +++ b/shared/test/unit_test/program/program_initialization_tests.cpp @@ -85,6 +85,7 @@ TEST(AllocateGlobalSurfaceTest, GivenSvmAllocsManagerWhenGlobalsAreExportedThenM ASSERT_NE(nullptr, svmAllocsManager.getSVMAlloc(reinterpret_cast(static_cast(alloc->getGpuAddress())))); EXPECT_TRUE(alloc->isMemObjectsAllocationWithWritableFlags()); EXPECT_EQ(DEVICE_UNIFIED_MEMORY, svmAllocsManager.getSVMAlloc(reinterpret_cast(alloc->getGpuAddress()))->memoryType); + EXPECT_FALSE(alloc->isLocked()); svmAllocsManager.freeSVMAlloc(reinterpret_cast(static_cast(alloc->getGpuAddress()))); alloc = allocateGlobalsSurface(&svmAllocsManager, device, initData.size(), 0u, true /* constant */, &linkerInputExportGlobalVariables, initData.data()); @@ -92,6 +93,7 @@ TEST(AllocateGlobalSurfaceTest, GivenSvmAllocsManagerWhenGlobalsAreExportedThenM ASSERT_EQ(initData.size(), alloc->getUnderlyingBufferSize()); EXPECT_EQ(0, memcmp(alloc->getUnderlyingBuffer(), initData.data(), initData.size())); EXPECT_EQ(nullptr, svmAllocsManager.getSVMAlloc(reinterpret_cast(static_cast(alloc->getGpuAddress())))); + EXPECT_FALSE(alloc->isLocked()); device.getMemoryManager()->freeGraphicsMemory(alloc); alloc = allocateGlobalsSurface(&svmAllocsManager, device, initData.size(), 0u, false /* constant */, &linkerInputExportGlobalConstants, initData.data()); @@ -99,6 +101,7 @@ TEST(AllocateGlobalSurfaceTest, GivenSvmAllocsManagerWhenGlobalsAreExportedThenM ASSERT_EQ(initData.size(), alloc->getUnderlyingBufferSize()); EXPECT_EQ(0, memcmp(alloc->getUnderlyingBuffer(), initData.data(), initData.size())); EXPECT_EQ(nullptr, svmAllocsManager.getSVMAlloc(reinterpret_cast(static_cast(alloc->getGpuAddress())))); + EXPECT_FALSE(alloc->isLocked()); device.getMemoryManager()->freeGraphicsMemory(alloc); alloc = allocateGlobalsSurface(&svmAllocsManager, device, initData.size(), 0u, false /* constant */, &linkerInputExportGlobalVariables, initData.data()); @@ -108,6 +111,7 @@ TEST(AllocateGlobalSurfaceTest, GivenSvmAllocsManagerWhenGlobalsAreExportedThenM EXPECT_NE(nullptr, svmAllocsManager.getSVMAlloc(reinterpret_cast(static_cast(alloc->getGpuAddress())))); EXPECT_TRUE(alloc->isMemObjectsAllocationWithWritableFlags()); EXPECT_EQ(DEVICE_UNIFIED_MEMORY, svmAllocsManager.getSVMAlloc(reinterpret_cast(alloc->getGpuAddress()))->memoryType); + EXPECT_FALSE(alloc->isLocked()); svmAllocsManager.freeSVMAlloc(reinterpret_cast(static_cast(alloc->getGpuAddress()))); } diff --git a/shared/test/unit_test/xe_hpc_core/pvc/engine_node_helper_tests_pvc.cpp b/shared/test/unit_test/xe_hpc_core/pvc/engine_node_helper_tests_pvc.cpp index c6d8960fb2..f772d60277 100644 --- a/shared/test/unit_test/xe_hpc_core/pvc/engine_node_helper_tests_pvc.cpp +++ b/shared/test/unit_test/xe_hpc_core/pvc/engine_node_helper_tests_pvc.cpp @@ -11,6 +11,8 @@ #include "shared/test/common/fixtures/device_fixture.h" #include "shared/test/common/helpers/debug_manager_state_restore.h" #include "shared/test/common/mocks/mock_device.h" +#include "shared/test/common/mocks/mock_execution_environment.h" +#include "shared/test/common/mocks/mock_gmm.h" #include "shared/test/common/mocks/mock_graphics_allocation.h" #include "shared/test/common/test_macros/header/per_product_test_definitions.h" #include "shared/test/common/test_macros/test.h" @@ -540,3 +542,49 @@ PVCTEST_F(EngineNodeHelperPvcTests, givenNonTile0AccessWhenGettingIsBlitCopyRequ EXPECT_FALSE(productHelper.isBlitCopyRequiredForLocalMemory(rootDeviceEnvironment, graphicsAllocation)); } } + +PVCTEST_F(EngineNodeHelperPvcTests, givenNotLockableAllocationWhenGettingIsBlitCopyRequiredForLocalMemoryThenCorrectValuesAreReturned) { + DebugManagerStateRestore restore{}; + auto &rootDeviceEnvironment = pDevice->getRootDeviceEnvironment(); + auto &hwInfo = *rootDeviceEnvironment.getMutableHardwareInfo(); + + hwInfo.capabilityTable.blitterOperationsSupported = true; + + MockGraphicsAllocation graphicsAllocation; + graphicsAllocation.setAllocationType(AllocationType::SVM_GPU); + EXPECT_FALSE(GraphicsAllocation::isLockable(graphicsAllocation.getAllocationType())); + graphicsAllocation.overrideMemoryPool(MemoryPool::LocalMemory); + + MockExecutionEnvironment executionEnvironment(&hwInfo); + executionEnvironment.initGmm(); + executionEnvironment.prepareRootDeviceEnvironments(1); + auto gmmHelper = executionEnvironment.rootDeviceEnvironments[0]->getGmmHelper(); + + MockGmm mockGmm(gmmHelper, nullptr, 100, 100, GMM_RESOURCE_USAGE_OCL_BUFFER, false, {}, true); + mockGmm.resourceParams.Flags.Info.NotLockable = true; + graphicsAllocation.setDefaultGmm(&mockGmm); + + auto &productHelper = getHelper(); + EXPECT_TRUE(productHelper.isBlitCopyRequiredForLocalMemory(rootDeviceEnvironment, graphicsAllocation)); + + DebugManager.flags.ForceLocalMemoryAccessMode.set(0); + EXPECT_TRUE(productHelper.isBlitCopyRequiredForLocalMemory(rootDeviceEnvironment, graphicsAllocation)); + EXPECT_FALSE(productHelper.isBlitCopyRequiredForLocalMemory(rootDeviceEnvironment, graphicsAllocation, true)); + DebugManager.flags.ForceLocalMemoryAccessMode.set(1); + EXPECT_TRUE(productHelper.isBlitCopyRequiredForLocalMemory(rootDeviceEnvironment, graphicsAllocation)); + EXPECT_FALSE(productHelper.isBlitCopyRequiredForLocalMemory(rootDeviceEnvironment, graphicsAllocation, true)); + + DebugManager.flags.ForceLocalMemoryAccessMode.set(3); + EXPECT_TRUE(productHelper.isBlitCopyRequiredForLocalMemory(rootDeviceEnvironment, graphicsAllocation)); + EXPECT_TRUE(productHelper.isBlitCopyRequiredForLocalMemory(rootDeviceEnvironment, graphicsAllocation, true)); + hwInfo.capabilityTable.blitterOperationsSupported = false; + EXPECT_TRUE(productHelper.isBlitCopyRequiredForLocalMemory(rootDeviceEnvironment, graphicsAllocation)); + EXPECT_TRUE(productHelper.isBlitCopyRequiredForLocalMemory(rootDeviceEnvironment, graphicsAllocation, true)); + + graphicsAllocation.overrideMemoryPool(MemoryPool::System64KBPages); + EXPECT_FALSE(productHelper.isBlitCopyRequiredForLocalMemory(rootDeviceEnvironment, graphicsAllocation)); + EXPECT_FALSE(productHelper.isBlitCopyRequiredForLocalMemory(rootDeviceEnvironment, graphicsAllocation, true)); + hwInfo.capabilityTable.blitterOperationsSupported = true; + EXPECT_FALSE(productHelper.isBlitCopyRequiredForLocalMemory(rootDeviceEnvironment, graphicsAllocation)); + EXPECT_FALSE(productHelper.isBlitCopyRequiredForLocalMemory(rootDeviceEnvironment, graphicsAllocation, true)); +} \ No newline at end of file diff --git a/shared/test/unit_test/xe_hpc_core/pvc/excludes_xe_hpc_core_pvc.cpp b/shared/test/unit_test/xe_hpc_core/pvc/excludes_xe_hpc_core_pvc.cpp index 38e5942f89..3136d53e6d 100644 --- a/shared/test/unit_test/xe_hpc_core/pvc/excludes_xe_hpc_core_pvc.cpp +++ b/shared/test/unit_test/xe_hpc_core/pvc/excludes_xe_hpc_core_pvc.cpp @@ -8,5 +8,6 @@ #include "shared/test/common/test_macros/hw_test_base.h" HWTEST_EXCLUDE_PRODUCT(ProductHelperTest, givenProductHelperWhenAskedIfIsBlitSplitEnqueueWARequiredThenReturnFalse, IGFX_PVC); +HWTEST_EXCLUDE_PRODUCT(ProductHelperTest, givenNotLockableAllocationWhenGettingIsBlitCopyRequiredForLocalMemoryThenCorrectValuesAreReturned, IGFX_PVC); HWTEST_EXCLUDE_PRODUCT(BlitTests, GivenCpuAccessToLocalMemoryWhenGettingMaxBlitSizeThenValuesAreOverriden_BlitPlatforms, IGFX_PVC); HWTEST_EXCLUDE_PRODUCT(GfxCoreHelperTest, GivenCooperativeEngineSupportedAndNotUsedWhenAdjustMaxWorkGroupCountIsCalledThenSmallerValueIsReturned, IGFX_PVC);