From c54c3d796cc513781e785b040eb4fc35b97513a7 Mon Sep 17 00:00:00 2001 From: Compute-Runtime-Validation Date: Sat, 10 Dec 2022 05:25:44 +0100 Subject: [PATCH] Revert "Set isLockable if size small enough for cpu memcpy" This reverts commit 41a80072b905d0703c7d37e1053f9329b1fc55b2. Signed-off-by: Compute-Runtime-Validation --- .../source/cmdlist/cmdlist_hw_immediate.inl | 14 ++++-- .../core/source/context/context_imp.cpp | 1 + .../sources/cmdlist/test_cmdlist_7.cpp | 46 +++---------------- shared/source/helpers/constants.h | 5 -- shared/source/helpers/hw_helper.cpp | 13 ------ shared/source/helpers/hw_helper.h | 2 - .../memory_manager/unified_memory_manager.cpp | 7 --- .../unit_test/helpers/hw_helper_tests.cpp | 20 -------- .../unified_memory_manager_tests.cpp | 33 ------------- 9 files changed, 17 insertions(+), 124 deletions(-) diff --git a/level_zero/core/source/cmdlist/cmdlist_hw_immediate.inl b/level_zero/core/source/cmdlist/cmdlist_hw_immediate.inl index a8d6beee64..ead2c5c14a 100644 --- a/level_zero/core/source/cmdlist/cmdlist_hw_immediate.inl +++ b/level_zero/core/source/cmdlist/cmdlist_hw_immediate.inl @@ -623,9 +623,14 @@ bool CommandListCoreFamilyImmediate::preferCopyThroughLockedPtr(N return true; } - size_t h2DThreshold = 0; - size_t d2HThreshold = 0; - NEO::GfxCoreHelper::getCpuCopyThresholds(h2DThreshold, d2HThreshold); + size_t h2DThreshold = 2 * MemoryConstants::megaByte; + size_t d2HThreshold = 1 * MemoryConstants::kiloByte; + if (NEO::DebugManager.flags.ExperimentalH2DCpuCopyThreshold.get() != -1) { + h2DThreshold = NEO::DebugManager.flags.ExperimentalH2DCpuCopyThreshold.get(); + } + if (NEO::DebugManager.flags.ExperimentalD2HCpuCopyThreshold.get() != -1) { + d2HThreshold = NEO::DebugManager.flags.ExperimentalD2HCpuCopyThreshold.get(); + } if (NEO::GfxCoreHelper::get(this->device->getHwInfo().platform.eRenderCoreFamily).copyThroughLockedPtrEnabled(this->device->getHwInfo())) { return (!srcFound && isSuitableUSMDeviceAlloc(dstAlloc, dstFound) && size <= h2DThreshold) || (!dstFound && isSuitableUSMDeviceAlloc(srcAlloc, srcFound) && size <= d2HThreshold); @@ -636,8 +641,7 @@ bool CommandListCoreFamilyImmediate::preferCopyThroughLockedPtr(N template bool CommandListCoreFamilyImmediate::isSuitableUSMDeviceAlloc(NEO::SvmAllocationData *alloc, bool allocFound) { return allocFound && (alloc->memoryType == InternalMemoryType::DEVICE_UNIFIED_MEMORY) && - alloc->gpuAllocations.getGraphicsAllocation(this->device->getRootDeviceIndex())->storageInfo.getNumBanks() == 1 && - alloc->gpuAllocations.getGraphicsAllocation(this->device->getRootDeviceIndex())->storageInfo.isLockable; + alloc->gpuAllocations.getGraphicsAllocation(this->device->getRootDeviceIndex())->storageInfo.getNumBanks() == 1; } template diff --git a/level_zero/core/source/context/context_imp.cpp b/level_zero/core/source/context/context_imp.cpp index be3fda2400..43c58fa17e 100644 --- a/level_zero/core/source/context/context_imp.cpp +++ b/level_zero/core/source/context/context_imp.cpp @@ -172,6 +172,7 @@ ze_result_t ContextImp::allocDeviceMem(ze_device_handle_t hDevice, unifiedMemoryProperties.allocationFlags.flags.shareable = isShareableMemory(deviceDesc->pNext, static_cast(lookupTable.exportMemory), neoDevice); unifiedMemoryProperties.device = neoDevice; unifiedMemoryProperties.allocationFlags.flags.compressedHint = isAllocationSuitableForCompression(lookupTable, *device, size); + if (deviceDesc->flags & ZE_DEVICE_MEM_ALLOC_FLAG_BIAS_UNCACHED) { unifiedMemoryProperties.allocationFlags.flags.locallyUncachedResource = 1; } diff --git a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_7.cpp b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_7.cpp index b11f5bf392..0b88734e65 100644 --- a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_7.cpp +++ b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_7.cpp @@ -1883,9 +1883,8 @@ HWTEST_F(CommandListCreate, givenCommandListWhenRemoveDeallocationContainerDataT struct AppendMemoryLockedCopyFixture : public DeviceFixture { void setUp() { - DebugManager.flags.EnableLocalMemory.set(1); DebugManager.flags.ExperimentalCopyThroughLock.set(1); - DebugManager.flags.ForceLocalMemoryAccessMode.set(0); + DebugManager.flags.EnableLocalMemory.set(1); DeviceFixture::setUp(); nonUsmHostPtr = new char[sz]; @@ -1901,7 +1900,7 @@ struct AppendMemoryLockedCopyFixture : public DeviceFixture { DebugManagerStateRestore restore; char *nonUsmHostPtr; void *devicePtr; - size_t sz = 2 * MemoryConstants::megaByte; + size_t sz = 4 * MemoryConstants::megaByte; }; using AppendMemoryLockedCopyTest = Test; @@ -1925,9 +1924,6 @@ HWTEST2_F(AppendMemoryLockedCopyTest, givenImmediateCommandListWhenIsSuitableUSM auto dstFound = device->getDriverHandle()->findAllocationDataForRange(devicePtr, 1024, &dstAllocData); EXPECT_FALSE(cmdList.isSuitableUSMDeviceAlloc(srcAllocData, srcFound)); EXPECT_TRUE(cmdList.isSuitableUSMDeviceAlloc(dstAllocData, dstFound)); - - dstAllocData->gpuAllocations.getGraphicsAllocation(device->getRootDeviceIndex())->storageInfo.isLockable = 0; - EXPECT_FALSE(cmdList.isSuitableUSMDeviceAlloc(dstAllocData, dstFound)); } struct LocalMemoryMultiSubDeviceFixture : public SingleRootMultiSubDeviceFixture { @@ -2290,50 +2286,22 @@ HWTEST2_F(AppendMemoryLockedCopyTest, givenImmediateCommandListAndNonUsmDstHostP } HWTEST2_F(AppendMemoryLockedCopyTest, givenImmediateCommandListAndD2HCopyWhenSizeTooLargeButFlagSetThenUseCpuMemcpy, IsAtLeastSkl) { - constexpr size_t largeSize = 3 * MemoryConstants::megaByte; - DebugManager.flags.ExperimentalD2HCpuCopyThreshold.set(largeSize); + DebugManager.flags.ExperimentalD2HCpuCopyThreshold.set(2048); MockAppendMemoryLockedCopyTestImmediateCmdList cmdList; cmdList.initialize(device, NEO::EngineGroupType::RenderCompute, 0u); cmdList.csr = device->getNEODevice()->getInternalEngine().commandStreamReceiver; - ze_device_mem_alloc_desc_t deviceDesc = {}; - void *deviceAlloc; - char *hostAlloc = new char[largeSize]; - context->allocDeviceMem(device->toHandle(), &deviceDesc, largeSize, 1u, &deviceAlloc); - cmdList.appendMemoryCopy(hostAlloc, deviceAlloc, largeSize, nullptr, 0, nullptr); - EXPECT_EQ(cmdList.appendMemoryCopyKernelWithGACalled, 0u); - context->freeMem(deviceAlloc); - delete[] hostAlloc; -} -HWTEST2_F(AppendMemoryLockedCopyTest, givenImmediateCommandListAndH2DCopyWhenSizeTooLargeThenUseGpuMemcpy, IsAtLeastSkl) { - constexpr size_t largeSize = 3 * MemoryConstants::megaByte; - MockAppendMemoryLockedCopyTestImmediateCmdList cmdList; - cmdList.initialize(device, NEO::EngineGroupType::RenderCompute, 0u); - cmdList.csr = device->getNEODevice()->getInternalEngine().commandStreamReceiver; - ze_device_mem_alloc_desc_t deviceDesc = {}; - void *deviceAlloc; - char *hostAlloc = new char[largeSize]; - context->allocDeviceMem(device->toHandle(), &deviceDesc, largeSize, 1u, &deviceAlloc); - cmdList.appendMemoryCopy(deviceAlloc, hostAlloc, largeSize, nullptr, 0, nullptr); - EXPECT_EQ(cmdList.appendMemoryCopyKernelWithGACalled, 1u); - context->freeMem(deviceAlloc); - delete[] hostAlloc; + cmdList.appendMemoryCopy(nonUsmHostPtr, devicePtr, 2 * MemoryConstants::kiloByte, nullptr, 0, nullptr); + EXPECT_EQ(cmdList.appendMemoryCopyKernelWithGACalled, 0u); } HWTEST2_F(AppendMemoryLockedCopyTest, givenImmediateCommandListAndH2DCopyWhenSizeTooLargeButFlagSetThenUseCpuMemcpy, IsAtLeastSkl) { - constexpr size_t largeSize = 3 * MemoryConstants::megaByte; - DebugManager.flags.ExperimentalH2DCpuCopyThreshold.set(largeSize); + DebugManager.flags.ExperimentalH2DCpuCopyThreshold.set(3 * MemoryConstants::megaByte); MockAppendMemoryLockedCopyTestImmediateCmdList cmdList; cmdList.initialize(device, NEO::EngineGroupType::RenderCompute, 0u); cmdList.csr = device->getNEODevice()->getInternalEngine().commandStreamReceiver; - ze_device_mem_alloc_desc_t deviceDesc = {}; - void *deviceAlloc; - char *hostAlloc = new char[largeSize]; - context->allocDeviceMem(device->toHandle(), &deviceDesc, largeSize, 1u, &deviceAlloc); - cmdList.appendMemoryCopy(deviceAlloc, hostAlloc, largeSize, nullptr, 0, nullptr); + cmdList.appendMemoryCopy(devicePtr, nonUsmHostPtr, 3 * MemoryConstants::megaByte, nullptr, 0, nullptr); EXPECT_EQ(cmdList.appendMemoryCopyKernelWithGACalled, 0u); - context->freeMem(deviceAlloc); - delete[] hostAlloc; } HWTEST2_F(AppendMemoryLockedCopyTest, givenImmediateCommandListAndCpuMemcpyWithDependencyThenAppendBarrierCalled, IsAtLeastSkl) { diff --git a/shared/source/helpers/constants.h b/shared/source/helpers/constants.h index 3fd88506c1..66d8bc0fe6 100644 --- a/shared/source/helpers/constants.h +++ b/shared/source/helpers/constants.h @@ -93,8 +93,3 @@ inline constexpr uint32_t maximalSimdSize = 32; inline constexpr uint32_t maximalSizeOfAtomicType = 8; inline constexpr uint32_t engineGroupCount = static_cast(NEO::EngineGroupType::MaxEngineGroups); } // namespace CommonConstants - -namespace NonUsmCpuCopyConstants { -constexpr size_t h2DThreshold = 2 * MemoryConstants::megaByte; -constexpr size_t d2HThreshold = 1 * MemoryConstants::kiloByte; -} // namespace NonUsmCpuCopyConstants \ No newline at end of file diff --git a/shared/source/helpers/hw_helper.cpp b/shared/source/helpers/hw_helper.cpp index e5a534c91f..bc0178233e 100644 --- a/shared/source/helpers/hw_helper.cpp +++ b/shared/source/helpers/hw_helper.cpp @@ -8,7 +8,6 @@ #include "shared/source/helpers/hw_helper.h" #include "shared/source/debug_settings/debug_settings_manager.h" -#include "shared/source/helpers/constants.h" #include "shared/source/helpers/hw_info.h" #include @@ -81,16 +80,4 @@ uint32_t GfxCoreHelper::getHighestEnabledSlice(const HardwareInfo &hwInfo) { return highestEnabledSlice; } -void HwHelper::getCpuCopyThresholds(size_t &h2DThreshold, size_t &d2HThreshold) { - h2DThreshold = NonUsmCpuCopyConstants::h2DThreshold; - d2HThreshold = NonUsmCpuCopyConstants::d2HThreshold; - - if (NEO::DebugManager.flags.ExperimentalH2DCpuCopyThreshold.get() != -1) { - h2DThreshold = NEO::DebugManager.flags.ExperimentalH2DCpuCopyThreshold.get(); - } - if (NEO::DebugManager.flags.ExperimentalD2HCpuCopyThreshold.get() != -1) { - d2HThreshold = NEO::DebugManager.flags.ExperimentalD2HCpuCopyThreshold.get(); - } -} - } // namespace NEO diff --git a/shared/source/helpers/hw_helper.h b/shared/source/helpers/hw_helper.h index 36a1234349..2515a1a046 100644 --- a/shared/source/helpers/hw_helper.h +++ b/shared/source/helpers/hw_helper.h @@ -171,8 +171,6 @@ class GfxCoreHelper { virtual bool isTimestampShiftRequired() const = 0; virtual bool isRelaxedOrderingSupported() const = 0; - static void getCpuCopyThresholds(size_t &h2DThreshold, size_t &d2HThreshold); - protected: GfxCoreHelper() = default; }; diff --git a/shared/source/memory_manager/unified_memory_manager.cpp b/shared/source/memory_manager/unified_memory_manager.cpp index ef7360acf4..3051ee46bc 100644 --- a/shared/source/memory_manager/unified_memory_manager.cpp +++ b/shared/source/memory_manager/unified_memory_manager.cpp @@ -10,7 +10,6 @@ #include "shared/source/command_stream/command_stream_receiver.h" #include "shared/source/helpers/aligned_memory.h" #include "shared/source/helpers/api_specific_config.h" -#include "shared/source/helpers/hw_helper.h" #include "shared/source/helpers/memory_properties_helpers.h" #include "shared/source/memory_manager/memory_manager.h" #include "shared/source/os_interface/hw_info_config.h" @@ -260,12 +259,6 @@ void *SVMAllocsManager::createUnifiedMemoryAllocation(size_t size, return allocationFromCache; } } - size_t h2DThreshold = 0; - size_t d2HThreshold = 0; - GfxCoreHelper::getCpuCopyThresholds(h2DThreshold, d2HThreshold); - if (size <= std::max(h2DThreshold, d2HThreshold)) { - unifiedMemoryProperties.makeDeviceBufferLockable = 1; - } } else if (memoryProperties.memoryType == InternalMemoryType::HOST_UNIFIED_MEMORY) { unifiedMemoryProperties.flags.isUSMHostAllocation = true; } diff --git a/shared/test/unit_test/helpers/hw_helper_tests.cpp b/shared/test/unit_test/helpers/hw_helper_tests.cpp index 5843a768b2..403a5cdd83 100644 --- a/shared/test/unit_test/helpers/hw_helper_tests.cpp +++ b/shared/test/unit_test/helpers/hw_helper_tests.cpp @@ -1433,23 +1433,3 @@ TEST(GfxCoreHelperTests, whenIsDynamicallyPopulatedisTrueThengetHighestEnabledSl auto maxSlice = gfxCoreHelper.getHighestEnabledSlice(hwInfo); EXPECT_EQ(maxSlice, 7u); } - -TEST(GfxCoreHelperTests, whenGetCpuCopyThresholdsThenCorrectValueSet) { - size_t h2DThreshold = 0; - size_t d2HThreshold = 0; - GfxCoreHelper::getCpuCopyThresholds(h2DThreshold, d2HThreshold); - EXPECT_EQ(h2DThreshold, NonUsmCpuCopyConstants::h2DThreshold); - EXPECT_EQ(d2HThreshold, NonUsmCpuCopyConstants::d2HThreshold); -} - -TEST(GfxCoreHelperTests, givenThresholdChangedWhenGetCpuCopyThresholdsThenCorrectValueSet) { - DebugManagerStateRestore restore; - DebugManager.flags.ExperimentalH2DCpuCopyThreshold.set(2048); - DebugManager.flags.ExperimentalD2HCpuCopyThreshold.set(2048); - - size_t h2DThreshold = 0; - size_t d2HThreshold = 0; - GfxCoreHelper::getCpuCopyThresholds(h2DThreshold, d2HThreshold); - EXPECT_EQ(h2DThreshold, 2048u); - EXPECT_EQ(d2HThreshold, 2048u); -} \ No newline at end of file diff --git a/shared/test/unit_test/memory_manager/unified_memory_manager_tests.cpp b/shared/test/unit_test/memory_manager/unified_memory_manager_tests.cpp index 3498bbb11c..9d1b93a164 100644 --- a/shared/test/unit_test/memory_manager/unified_memory_manager_tests.cpp +++ b/shared/test/unit_test/memory_manager/unified_memory_manager_tests.cpp @@ -92,36 +92,3 @@ TEST_F(SVMLocalMemoryAllocatorTest, givenKmdMigratedSharedAllocationWhenPrefetch svmManager->freeSVMAlloc(ptr); } - -TEST_F(SVMLocalMemoryAllocatorTest, whenCreateUnifiedMemoryAllocationWithSmallSizeThenSetLockable) { - DebugManagerStateRestore restore; - DebugManager.flags.ForceLocalMemoryAccessMode.set(0); - DebugManager.flags.EnableLocalMemory.set(1); - std::unique_ptr deviceFactory(new UltDeviceFactory(1, 2)); - auto device = deviceFactory->rootDevices[0]; - SVMAllocsManager::UnifiedMemoryProperties unifiedMemoryProperties(InternalMemoryType::DEVICE_UNIFIED_MEMORY, rootDeviceIndices, deviceBitfields); - unifiedMemoryProperties.device = device; - auto ptr = svmManager->createUnifiedMemoryAllocation(4096u, unifiedMemoryProperties); - EXPECT_NE(nullptr, ptr); - EXPECT_TRUE(svmManager->getSVMAlloc(ptr)->gpuAllocations.getGraphicsAllocation(mockRootDeviceIndex)->storageInfo.isLockable); - svmManager->freeSVMAlloc(ptr); -} - -TEST_F(SVMLocalMemoryAllocatorTest, whenCreateUnifiedMemoryAllocationWithLargeSizeThenSetLockable) { - if (HwInfoConfig::get(defaultHwInfo->platform.eProductFamily)->isStorageInfoAdjustmentRequired()) { - GTEST_SKIP(); - } - DebugManagerStateRestore restore; - DebugManager.flags.ForceLocalMemoryAccessMode.set(0); - DebugManager.flags.EnableLocalMemory.set(1); - std::unique_ptr deviceFactory(new UltDeviceFactory(1, 2)); - auto device = deviceFactory->rootDevices[0]; - size_t largeSize = std::max(NonUsmCpuCopyConstants::d2HThreshold, NonUsmCpuCopyConstants::h2DThreshold) + 1; - SVMAllocsManager::UnifiedMemoryProperties unifiedMemoryProperties(InternalMemoryType::DEVICE_UNIFIED_MEMORY, rootDeviceIndices, deviceBitfields); - unifiedMemoryProperties.device = device; - auto ptr = svmManager->createUnifiedMemoryAllocation(largeSize, unifiedMemoryProperties); - EXPECT_NE(nullptr, ptr); - - EXPECT_FALSE(svmManager->getSVMAlloc(ptr)->gpuAllocations.getGraphicsAllocation(mockRootDeviceIndex)->storageInfo.isLockable); - svmManager->freeSVMAlloc(ptr); -} \ No newline at end of file