From 2fe3804cc23c6cf2d545768e4f74346060bb1baa Mon Sep 17 00:00:00 2001 From: Dominik Dabek Date: Fri, 22 Dec 2023 14:26:30 +0000 Subject: [PATCH] performance(ocl): add usm allocation pooling flag EnableDeviceUsmAllocationPool and EnableHostUsmAllocationPool for device and host allocations respectively. Pool size will be set to flag value * MB. Allocation size threshold to be pooled is 1MB. Pools are created per context. Related-To: NEO-9700 Signed-off-by: Dominik Dabek --- .../unit_tests/sources/kernel/test_kernel.cpp | 13 +- .../unit_tests/sources/memory/test_memory.cpp | 10 +- opencl/source/api/api.cpp | 22 ++- opencl/source/context/context.cpp | 46 ++++++ opencl/source/context/context.h | 17 +++ .../cl_set_kernel_arg_svm_pointer_tests.inl | 25 ++-- .../system_memfence_aub_tests_xe_hpc_core.cpp | 2 +- opencl/test/unit_test/context/CMakeLists.txt | 3 +- .../context/context_negative_tests.cpp | 1 + .../test/unit_test/context/context_tests.cpp | 3 +- .../context/context_usm_memory_pool_tests.cpp | 75 ++++++++++ .../kernel/kernel_arg_buffer_tests.cpp | 1 + .../mem_obj/buffer_pool_alloc_tests.cpp | 1 + .../debug_settings/debug_variables_base.inl | 2 + shared/source/memory_manager/CMakeLists.txt | 2 + .../memory_manager/unified_memory_manager.cpp | 9 +- .../memory_manager/unified_memory_pooling.cpp | 96 +++++++++++++ .../memory_manager/unified_memory_pooling.h | 56 ++++++++ shared/source/utilities/sorted_vector.h | 31 +++- shared/test/common/mocks/CMakeLists.txt | 1 + .../test/common/mocks/mock_usm_memory_pool.h | 18 +++ shared/test/common/test_files/igdrcl.config | 2 + .../unit_test/memory_manager/CMakeLists.txt | 3 +- .../unified_memory_pooling_tests.cpp | 136 ++++++++++++++++++ .../utilities/sorted_vector_tests.cpp | 22 ++- 25 files changed, 562 insertions(+), 35 deletions(-) create mode 100644 opencl/test/unit_test/context/context_usm_memory_pool_tests.cpp create mode 100644 shared/source/memory_manager/unified_memory_pooling.cpp create mode 100644 shared/source/memory_manager/unified_memory_pooling.h create mode 100644 shared/test/common/mocks/mock_usm_memory_pool.h create mode 100644 shared/test/unit_test/memory_manager/unified_memory_pooling_tests.cpp diff --git a/level_zero/core/test/unit_tests/sources/kernel/test_kernel.cpp b/level_zero/core/test/unit_tests/sources/kernel/test_kernel.cpp index 2a7b73f8c5..79b9ab028b 100644 --- a/level_zero/core/test/unit_tests/sources/kernel/test_kernel.cpp +++ b/level_zero/core/test/unit_tests/sources/kernel/test_kernel.cpp @@ -207,7 +207,7 @@ TEST_F(SetKernelArgCacheTest, givenValidBufferArgumentWhenSetMultipleTimesThenSe auto allocData = svmAllocsManager->getSVMAlloc(svmAllocation); size_t callCounter = 0u; - + svmAllocsManager->allocationsCounter = 0u; // first setArg - called EXPECT_EQ(ZE_RESULT_SUCCESS, mockKernel.setArgBuffer(0, sizeof(svmAllocation), &svmAllocation)); EXPECT_EQ(++callCounter, mockKernel.setArgBufferWithAllocCalled); @@ -217,15 +217,16 @@ TEST_F(SetKernelArgCacheTest, givenValidBufferArgumentWhenSetMultipleTimesThenSe EXPECT_EQ(ZE_RESULT_SUCCESS, mockKernel.setArgBuffer(0, sizeof(svmAllocation), &svmAllocation)); EXPECT_EQ(++callCounter, mockKernel.setArgBufferWithAllocCalled); - ++svmAllocsManager->allocationsCounter; + // same setArg and allocId matches - not called + svmAllocsManager->allocationsCounter = 2u; ASSERT_EQ(mockKernel.kernelArgInfos[0].allocIdMemoryManagerCounter, 0u); EXPECT_EQ(ZE_RESULT_SUCCESS, mockKernel.setArgBuffer(0, sizeof(svmAllocation), &svmAllocation)); - EXPECT_EQ(++callCounter, mockKernel.setArgBufferWithAllocCalled); - EXPECT_EQ(mockKernel.kernelArgInfos[0].allocIdMemoryManagerCounter, 1u); + EXPECT_EQ(callCounter, mockKernel.setArgBufferWithAllocCalled); + EXPECT_EQ(mockKernel.kernelArgInfos[0].allocIdMemoryManagerCounter, 2u); allocData->setAllocId(1u); // same setArg but allocId is uninitialized - called - ASSERT_EQ(mockKernel.kernelArgInfos[0].allocId, SvmAllocationData::uninitializedAllocId); + mockKernel.kernelArgInfos[0].allocId = SvmAllocationData::uninitializedAllocId; ASSERT_EQ(mockKernel.kernelArgInfos[0].allocIdMemoryManagerCounter, svmAllocsManager->allocationsCounter); EXPECT_EQ(ZE_RESULT_SUCCESS, mockKernel.setArgBuffer(0, sizeof(svmAllocation), &svmAllocation)); EXPECT_EQ(++callCounter, mockKernel.setArgBufferWithAllocCalled); @@ -233,7 +234,7 @@ TEST_F(SetKernelArgCacheTest, givenValidBufferArgumentWhenSetMultipleTimesThenSe ++svmAllocsManager->allocationsCounter; // same setArg - not called and argInfo.allocationCounter is updated - EXPECT_EQ(1u, mockKernel.kernelArgInfos[0].allocIdMemoryManagerCounter); + EXPECT_EQ(2u, mockKernel.kernelArgInfos[0].allocIdMemoryManagerCounter); EXPECT_EQ(ZE_RESULT_SUCCESS, mockKernel.setArgBuffer(0, sizeof(svmAllocation), &svmAllocation)); EXPECT_EQ(callCounter, mockKernel.setArgBufferWithAllocCalled); EXPECT_EQ(svmAllocsManager->allocationsCounter, mockKernel.kernelArgInfos[0].allocIdMemoryManagerCounter); diff --git a/level_zero/core/test/unit_tests/sources/memory/test_memory.cpp b/level_zero/core/test/unit_tests/sources/memory/test_memory.cpp index a57b9beacb..067423bca4 100644 --- a/level_zero/core/test/unit_tests/sources/memory/test_memory.cpp +++ b/level_zero/core/test/unit_tests/sources/memory/test_memory.cpp @@ -682,7 +682,7 @@ TEST_F(MemoryTest, givenDevicePointerThenDriverGetAllocPropertiesReturnsExpected EXPECT_EQ(memoryProperties.type, ZE_MEMORY_TYPE_DEVICE); EXPECT_EQ(deviceHandle, device->toHandle()); EXPECT_EQ(memoryProperties.id, - context->getDriverHandle()->getSvmAllocsManager()->allocationsCounter - 1); + context->getDriverHandle()->getSvmAllocsManager()->allocationsCounter); auto alloc = context->getDriverHandle()->getSvmAllocsManager()->getSVMAlloc(ptr); EXPECT_NE(alloc, nullptr); @@ -712,7 +712,7 @@ TEST_F(MemoryTest, givenHostPointerThenDriverGetAllocPropertiesReturnsExpectedPr EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_EQ(memoryProperties.type, ZE_MEMORY_TYPE_HOST); EXPECT_EQ(memoryProperties.id, - context->getDriverHandle()->getSvmAllocsManager()->allocationsCounter - 1); + context->getDriverHandle()->getSvmAllocsManager()->allocationsCounter); auto alloc = context->getDriverHandle()->getSvmAllocsManager()->getSVMAlloc(ptr); EXPECT_NE(alloc, nullptr); @@ -746,7 +746,7 @@ TEST_F(MemoryTest, givenSharedPointerThenDriverGetAllocPropertiesReturnsExpected EXPECT_EQ(memoryProperties.type, ZE_MEMORY_TYPE_SHARED); EXPECT_EQ(deviceHandle, device->toHandle()); EXPECT_EQ(memoryProperties.id, - context->getDriverHandle()->getSvmAllocsManager()->allocationsCounter - 1); + context->getDriverHandle()->getSvmAllocsManager()->allocationsCounter); auto alloc = context->getDriverHandle()->getSvmAllocsManager()->getSVMAlloc(ptr); EXPECT_NE(alloc, nullptr); @@ -835,7 +835,7 @@ TEST_F(MemoryTest, givenHostPointerThenDriverGetAllocPropertiesReturnsMemoryId) EXPECT_EQ(memoryProperties.type, ZE_MEMORY_TYPE_HOST); EXPECT_EQ(deviceHandle, nullptr); EXPECT_EQ(memoryProperties.id, - context->getDriverHandle()->getSvmAllocsManager()->allocationsCounter - 1); + context->getDriverHandle()->getSvmAllocsManager()->allocationsCounter); result = context->freeMem(ptr); ASSERT_EQ(result, ZE_RESULT_SUCCESS); @@ -862,7 +862,7 @@ TEST_F(MemoryTest, givenSharedPointerThenDriverGetAllocPropertiesReturnsMemoryId EXPECT_EQ(memoryProperties.type, ZE_MEMORY_TYPE_SHARED); EXPECT_EQ(deviceHandle, device->toHandle()); EXPECT_EQ(memoryProperties.id, - context->getDriverHandle()->getSvmAllocsManager()->allocationsCounter - 1); + context->getDriverHandle()->getSvmAllocsManager()->allocationsCounter); result = context->freeMem(ptr); ASSERT_EQ(result, ZE_RESULT_SUCCESS); diff --git a/opencl/source/api/api.cpp b/opencl/source/api/api.cpp index b806d7a7ab..4c61abeac1 100644 --- a/opencl/source/api/api.cpp +++ b/opencl/source/api/api.cpp @@ -3903,6 +3903,11 @@ CL_API_ENTRY void *CL_API_CALL clHostMemAllocINTEL( return nullptr; } + auto allocationFromPool = neoContext->getHostMemAllocPool().createUnifiedMemoryAllocation(size, unifiedMemoryProperties); + if (allocationFromPool) { + return allocationFromPool; + } + return neoContext->getSVMAllocsManager()->createHostUnifiedMemoryAllocation(size, unifiedMemoryProperties); } @@ -3952,6 +3957,11 @@ CL_API_ENTRY void *CL_API_CALL clDeviceMemAllocINTEL( unifiedMemoryProperties.device = &neoDevice->getDevice(); + auto allocationFromPool = neoContext->getDeviceMemAllocPool().createUnifiedMemoryAllocation(size, unifiedMemoryProperties); + if (allocationFromPool) { + return allocationFromPool; + } + return neoContext->getSVMAllocsManager()->createUnifiedMemoryAllocation(size, unifiedMemoryProperties); } @@ -4025,6 +4035,14 @@ CL_API_ENTRY cl_int CL_API_CALL clMemFreeCommon(cl_context context, return retVal; } + if (ptr && neoContext->getDeviceMemAllocPool().freeSVMAlloc(const_cast(ptr), blocking)) { + return CL_SUCCESS; + } + + if (ptr && neoContext->getHostMemAllocPool().freeSVMAlloc(const_cast(ptr), blocking)) { + return CL_SUCCESS; + } + if (ptr && !neoContext->getSVMAllocsManager()->freeSVMAlloc(const_cast(ptr), blocking)) { return CL_INVALID_VALUE; } @@ -4978,6 +4996,9 @@ cl_int CL_API_CALL clSetKernelArgSVMPointer(cl_kernel kernel, const auto allocationsCounter = svmManager->allocationsCounter.load(); if (allocationsCounter > 0) { if (allocationsCounter == multiDeviceKernel->getKernelArguments()[argIndex].allocIdMemoryManagerCounter) { + // manager count is not being incremented when allocation is from pool + // 1) add check for allocation from pool + // 2) increment when allocation is from pool reuseFromCache = true; } else { const auto svmData = svmManager->getSVMAlloc(argValue); @@ -5041,7 +5062,6 @@ cl_int CL_API_CALL clSetKernelArgSVMPointer(cl_kernel kernel, allocId = svmData->getAllocId(); } } - retVal = multiDeviceKernel->setArgSvmAlloc(argIndex, const_cast(argValue), svmAllocs, allocId); TRACING_EXIT(ClSetKernelArgSvmPointer, &retVal); return retVal; diff --git a/opencl/source/context/context.cpp b/opencl/source/context/context.cpp index ff43484471..06d45fdf83 100644 --- a/opencl/source/context/context.cpp +++ b/opencl/source/context/context.cpp @@ -61,6 +61,8 @@ Context::~Context() { smallBufferPoolAllocator.releaseSmallBufferPool(); } + cleanupUsmAllocationPools(); + delete[] properties; for (auto rootDeviceIndex = 0u; rootDeviceIndex < specialQueues.size(); rootDeviceIndex++) { @@ -487,6 +489,50 @@ bool Context::isSingleDeviceContext() { return getNumDevices() == 1 && devices[0]->getNumGenericSubDevices() == 0; } +void Context::initializeUsmAllocationPools() { + auto svmMemoryManager = getSVMAllocsManager(); + if (!(svmMemoryManager && this->isSingleDeviceContext())) { + return; + } + + bool enabled = false; + size_t poolSize = 2 * MemoryConstants::megaByte; + if (debugManager.flags.EnableDeviceUsmAllocationPool.get() != -1) { + enabled = debugManager.flags.EnableDeviceUsmAllocationPool.get() > 0; + poolSize = debugManager.flags.EnableDeviceUsmAllocationPool.get() * MemoryConstants::megaByte; + } + if (enabled) { + auto subDeviceBitfields = getDeviceBitfields(); + auto &neoDevice = devices[0]->getDevice(); + subDeviceBitfields[neoDevice.getRootDeviceIndex()] = neoDevice.getDeviceBitfield(); + SVMAllocsManager::UnifiedMemoryProperties memoryProperties(InternalMemoryType::deviceUnifiedMemory, MemoryConstants::pageSize2M, + getRootDeviceIndices(), subDeviceBitfields); + memoryProperties.device = &neoDevice; + usmDeviceMemAllocPool.initialize(svmMemoryManager, memoryProperties, poolSize); + } + + enabled = false; + poolSize = 2 * MemoryConstants::megaByte; + if (debugManager.flags.EnableHostUsmAllocationPool.get() != -1) { + enabled = debugManager.flags.EnableHostUsmAllocationPool.get() > 0; + poolSize = debugManager.flags.EnableDeviceUsmAllocationPool.get() * MemoryConstants::megaByte; + } + if (enabled) { + auto subDeviceBitfields = getDeviceBitfields(); + auto &neoDevice = devices[0]->getDevice(); + subDeviceBitfields[neoDevice.getRootDeviceIndex()] = neoDevice.getDeviceBitfield(); + SVMAllocsManager::UnifiedMemoryProperties memoryProperties(InternalMemoryType::hostUnifiedMemory, MemoryConstants::pageSize2M, + getRootDeviceIndices(), subDeviceBitfields); + memoryProperties.device = &neoDevice; + usmHostMemAllocPool.initialize(svmMemoryManager, memoryProperties, poolSize); + } +} + +void Context::cleanupUsmAllocationPools() { + usmDeviceMemAllocPool.cleanup(); + usmHostMemAllocPool.cleanup(); +} + bool Context::BufferPoolAllocator::isAggregatedSmallBuffersEnabled(Context *context) const { bool isSupportedForSingleDeviceContexts = false; bool isSupportedForAllContexts = false; diff --git a/opencl/source/context/context.h b/opencl/source/context/context.h index bf11168252..dd3172bd7e 100644 --- a/opencl/source/context/context.h +++ b/opencl/source/context/context.h @@ -9,6 +9,7 @@ #include "shared/source/debug_settings/debug_settings_manager.h" #include "shared/source/helpers/constants.h" #include "shared/source/helpers/string.h" +#include "shared/source/memory_manager/unified_memory_pooling.h" #include "shared/source/utilities/buffer_pool_allocator.h" #include "shared/source/utilities/stackvec.h" @@ -46,6 +47,9 @@ struct OpenCLObjectMapper<_cl_context> { }; class Context : public BaseObject<_cl_context> { + using UsmHostMemAllocPool = UsmMemAllocPool; + using UsmDeviceMemAllocPool = UsmMemAllocPool; + public: using BufferAllocationsVec = StackVec; @@ -109,6 +113,7 @@ class Context : public BaseObject<_cl_context> { if (bufferPoolAllocator.isAggregatedSmallBuffersEnabled(pContext)) { bufferPoolAllocator.initAggregatedSmallBuffers(pContext); } + pContext->initializeUsmAllocationPools(); } gtpinNotifyContextCreate(pContext); return pContext; @@ -230,12 +235,22 @@ class Context : public BaseObject<_cl_context> { BufferPoolAllocator &getBufferPoolAllocator() { return smallBufferPoolAllocator; } + UsmMemAllocPool &getDeviceMemAllocPool() { + return usmDeviceMemAllocPool; + } + UsmMemAllocPool &getHostMemAllocPool() { + return usmHostMemAllocPool; + } + TagAllocatorBase *getMultiRootDeviceTimestampPacketAllocator(); std::unique_lock obtainOwnershipForMultiRootDeviceAllocator(); void setMultiRootDeviceTimestampPacketAllocator(std::unique_ptr &allocator); void setContextAsNonZebin(); bool checkIfContextIsNonZebin() const; + void initializeUsmAllocationPools(); + void cleanupUsmAllocationPools(); + protected: struct BuiltInKernel { const char *pSource = nullptr; @@ -271,6 +286,8 @@ class Context : public BaseObject<_cl_context> { StackVec specialQueues; DriverDiagnostics *driverDiagnostics = nullptr; BufferPoolAllocator smallBufferPoolAllocator; + UsmDeviceMemAllocPool usmDeviceMemAllocPool; + UsmHostMemAllocPool usmHostMemAllocPool; uint32_t maxRootDeviceIndex = std::numeric_limits::max(); cl_bool preferD3dSharedResources = 0u; diff --git a/opencl/test/unit_test/api/cl_set_kernel_arg_svm_pointer_tests.inl b/opencl/test/unit_test/api/cl_set_kernel_arg_svm_pointer_tests.inl index 8ae06952cb..28db8a4bc0 100644 --- a/opencl/test/unit_test/api/cl_set_kernel_arg_svm_pointer_tests.inl +++ b/opencl/test/unit_test/api/cl_set_kernel_arg_svm_pointer_tests.inl @@ -213,7 +213,7 @@ TEST_F(clSetKernelArgSVMPointerTests, GivenSvmAndPointerWithInvalidOffsetWhenSet } } -TEST_F(clSetKernelArgSVMPointerTests, GivenSvmAndValidArgValueWhenSettingSameKernelArgThenSetArgSvmAllocCalledOnlyWhenNeeded) { +TEST_F(clSetKernelArgSVMPointerTests, givenSvmAndValidArgValueWhenSettingSameKernelArgThenSetArgSvmAllocCalledOnlyWhenNeeded) { const ClDeviceInfo &devInfo = pDevice->getDeviceInfo(); if (devInfo.svmCapabilities != 0) { auto mockSvmManager = reinterpret_cast(pMockKernel->getContext().getSVMAllocsManager()); @@ -221,7 +221,8 @@ TEST_F(clSetKernelArgSVMPointerTests, GivenSvmAndValidArgValueWhenSettingSameKer void *const ptrSvm = clSVMAlloc(pContext, CL_MEM_READ_WRITE, 256, 4); EXPECT_NE(nullptr, ptrSvm); auto callCounter = 0u; - // first set arg - called + // first set arg - called + mockSvmManager->allocationsCounter = 0u; auto retVal = clSetKernelArgSVMPointer( pMockMultiDeviceKernel, // cl_kernel kernel 0, // cl_uint arg_index @@ -271,7 +272,7 @@ TEST_F(clSetKernelArgSVMPointerTests, GivenSvmAndValidArgValueWhenSettingSameKer ++mockSvmManager->allocationsCounter; // different allocId - called - pMockKernel->kernelArguments[0].allocId = 1; + pMockKernel->kernelArguments[0].allocId = 2; retVal = clSetKernelArgSVMPointer( pMockMultiDeviceKernel, // cl_kernel kernel 0, // cl_uint arg_index @@ -281,8 +282,8 @@ TEST_F(clSetKernelArgSVMPointerTests, GivenSvmAndValidArgValueWhenSettingSameKer EXPECT_EQ(++callCounter, pMockKernel->setArgSvmAllocCalls); ++mockSvmManager->allocationsCounter; - // allocId = 0 - called - pMockKernel->kernelArguments[0].allocId = 0; + // allocId = 3 - called + pMockKernel->kernelArguments[0].allocId = 3; retVal = clSetKernelArgSVMPointer( pMockMultiDeviceKernel, // cl_kernel kernel 0, // cl_uint arg_index @@ -350,7 +351,7 @@ TEST_F(clSetKernelArgSVMPointerTests, GivenSvmAndValidArgValueWhenSettingSameKer clSVMFree(pContext, ptrSvm); } } -TEST_F(clSetKernelArgSVMPointerTests, GivenSvmAndValidArgValueWhenAllocIdCacheHitThenAllocIdMemoryManagerCounterIsUpdated) { +TEST_F(clSetKernelArgSVMPointerTests, givenSvmAndValidArgValueWhenAllocIdCacheHitThenAllocIdMemoryManagerCounterIsUpdated) { const ClDeviceInfo &devInfo = pDevice->getDeviceInfo(); if (devInfo.svmCapabilities != 0) { auto mockSvmManager = reinterpret_cast(pMockKernel->getContext().getSVMAllocsManager()); @@ -358,7 +359,7 @@ TEST_F(clSetKernelArgSVMPointerTests, GivenSvmAndValidArgValueWhenAllocIdCacheHi void *ptrSvm = clSVMAlloc(pContext, CL_MEM_READ_WRITE, 256, 4); EXPECT_NE(nullptr, ptrSvm); auto callCounter = 0u; - // first set arg - called + // first set arg - called auto retVal = clSetKernelArgSVMPointer( pMockMultiDeviceKernel, // cl_kernel kernel 0, // cl_uint arg_index @@ -367,7 +368,11 @@ TEST_F(clSetKernelArgSVMPointerTests, GivenSvmAndValidArgValueWhenAllocIdCacheHi EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(++callCounter, pMockKernel->setArgSvmAllocCalls); - EXPECT_EQ(0u, mockSvmManager->allocationsCounter); + auto expectedAllocationsCounter = 1u; + expectedAllocationsCounter += pContext->getHostMemAllocPool().isInitialized() ? 1u : 0u; + expectedAllocationsCounter += pContext->getDeviceMemAllocPool().isInitialized() ? 1u : 0u; + + EXPECT_EQ(expectedAllocationsCounter, mockSvmManager->allocationsCounter); EXPECT_EQ(mockSvmManager->allocationsCounter, pMockKernel->getKernelArguments()[0].allocIdMemoryManagerCounter); ++mockSvmManager->allocationsCounter; @@ -380,8 +385,8 @@ TEST_F(clSetKernelArgSVMPointerTests, GivenSvmAndValidArgValueWhenAllocIdCacheHi ); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(callCounter, pMockKernel->setArgSvmAllocCalls); - - EXPECT_EQ(1u, mockSvmManager->allocationsCounter); + ++expectedAllocationsCounter; + EXPECT_EQ(expectedAllocationsCounter, mockSvmManager->allocationsCounter); EXPECT_EQ(mockSvmManager->allocationsCounter, pMockKernel->getKernelArguments()[0].allocIdMemoryManagerCounter); clSVMFree(pContext, ptrSvm); diff --git a/opencl/test/unit_test/aub_tests/xe_hpc_core/system_memfence_aub_tests_xe_hpc_core.cpp b/opencl/test/unit_test/aub_tests/xe_hpc_core/system_memfence_aub_tests_xe_hpc_core.cpp index 87b793b251..072c10ae5b 100644 --- a/opencl/test/unit_test/aub_tests/xe_hpc_core/system_memfence_aub_tests_xe_hpc_core.cpp +++ b/opencl/test/unit_test/aub_tests/xe_hpc_core/system_memfence_aub_tests_xe_hpc_core.cpp @@ -217,7 +217,7 @@ XE_HPC_CORETEST_F(SystemMemFenceViaKernel, givenSystemMemFenceWhenKernelInstruct retVal = clSetKernelArgSVMPointer(pMultiDeviceKernel.get(), 1, hostMemAlloc); ASSERT_EQ(CL_SUCCESS, retVal); - size_t globalWorkSize[3] = {bufferSize, 1, 1}; + size_t globalWorkSize[3] = {bufferSize / sizeof(cl_int), 1, 1}; retVal = commandQueues[0][0]->enqueueKernel(pMultiDeviceKernel->getKernel(rootDeviceIndex), 1, nullptr, globalWorkSize, nullptr, 0, nullptr, nullptr); ASSERT_EQ(CL_SUCCESS, retVal); diff --git a/opencl/test/unit_test/context/CMakeLists.txt b/opencl/test/unit_test/context/CMakeLists.txt index 3740e626b0..16e8b5e02d 100644 --- a/opencl/test/unit_test/context/CMakeLists.txt +++ b/opencl/test/unit_test/context/CMakeLists.txt @@ -1,5 +1,5 @@ # -# Copyright (C) 2018-2021 Intel Corporation +# Copyright (C) 2018-2023 Intel Corporation # # SPDX-License-Identifier: MIT # @@ -10,6 +10,7 @@ set(IGDRCL_SRCS_tests_context ${CMAKE_CURRENT_SOURCE_DIR}/context_multi_device_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/context_negative_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/context_tests.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/context_usm_memory_pool_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/driver_diagnostics_enqueue_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/driver_diagnostics_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/driver_diagnostics_tests.h diff --git a/opencl/test/unit_test/context/context_negative_tests.cpp b/opencl/test/unit_test/context/context_negative_tests.cpp index 1c06bab30e..d867940d4a 100644 --- a/opencl/test/unit_test/context/context_negative_tests.cpp +++ b/opencl/test/unit_test/context/context_negative_tests.cpp @@ -31,6 +31,7 @@ TEST_F(ContextFailureInjection, GivenFailedAllocationInjectionWhenCreatingContex DebugManagerStateRestore restorer; debugManager.flags.ExperimentalSmallBufferPoolAllocator.set(0); // failing to allocate pool buffer is non-critical debugManager.flags.SetAmountOfReusableAllocationsPerCmdQueue.set(0); // same for preallocations + debugManager.flags.EnableDeviceUsmAllocationPool.set(0); // usm device allocation pooling auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(nullptr)); cl_device_id deviceID = device.get(); diff --git a/opencl/test/unit_test/context/context_tests.cpp b/opencl/test/unit_test/context/context_tests.cpp index 345fb24b10..3b95f68b42 100644 --- a/opencl/test/unit_test/context/context_tests.cpp +++ b/opencl/test/unit_test/context/context_tests.cpp @@ -52,7 +52,6 @@ class WhiteBoxContext : public Context { struct ContextTest : public PlatformFixture, public ::testing::Test { - using PlatformFixture::setUp; void SetUp() override { @@ -820,6 +819,8 @@ extern gtpin::ocl::gtpin_events_t gtpinCallbacks; TEST_F(GTPinContextDestroyTest, whenCallingConxtextDestructorThenGTPinIsNotifiedBeforeSVMAllocManagerGetsDestroyed) { auto mockContext = reinterpret_cast(context); if (mockContext->svmAllocsManager) { + mockContext->getDeviceMemAllocPool().cleanup(); + mockContext->getHostMemAllocPool().cleanup(); delete mockContext->svmAllocsManager; } mockContext->svmAllocsManager = new MockSVMAllocManager(); diff --git a/opencl/test/unit_test/context/context_usm_memory_pool_tests.cpp b/opencl/test/unit_test/context/context_usm_memory_pool_tests.cpp new file mode 100644 index 0000000000..bd54ddb113 --- /dev/null +++ b/opencl/test/unit_test/context/context_usm_memory_pool_tests.cpp @@ -0,0 +1,75 @@ +/* + * Copyright (C) 2023 Intel Corporation + * + * SPDX-License-Identifier: MIT + * + */ + +#include "shared/test/common/helpers/debug_manager_state_restore.h" +#include "shared/test/common/mocks/mock_usm_memory_pool.h" + +#include "opencl/source/cl_device/cl_device.h" +#include "opencl/test/unit_test/mocks/mock_context.h" + +#include "gtest/gtest.h" +using namespace NEO; + +template +struct ContextUsmPoolFlagValuesTest : public ::testing::Test { + + ContextUsmPoolFlagValuesTest() {} + + void SetUp() override { + mockContext = std::make_unique(); + const ClDeviceInfo &devInfo = mockContext->getDevice(0u)->getDeviceInfo(); + if (devInfo.svmCapabilities == 0) { + GTEST_SKIP(); + } + mockDeviceUsmMemAllocPool = static_cast(&mockContext->getDeviceMemAllocPool()); + mockHostUsmMemAllocPool = static_cast(&mockContext->getHostMemAllocPool()); + debugManager.flags.EnableDeviceUsmAllocationPool.set(devicePoolFlag); + debugManager.flags.EnableHostUsmAllocationPool.set(hostPoolFlag); + mockContext->initializeUsmAllocationPools(); + } + + std::unique_ptr mockContext; + DebugManagerStateRestore restorer; + MockUsmMemAllocPool *mockDeviceUsmMemAllocPool; + MockUsmMemAllocPool *mockHostUsmMemAllocPool; +}; + +using ContextUsmPoolDefaultFlagsTest = ContextUsmPoolFlagValuesTest<-1, -1>; + +TEST_F(ContextUsmPoolDefaultFlagsTest, givenDefaultDebugFlagsWhenCreatingContextThenPoolsAreNotInitialized) { + EXPECT_FALSE(mockDeviceUsmMemAllocPool->isInitialized()); + EXPECT_EQ(0u, mockDeviceUsmMemAllocPool->poolSize); + EXPECT_EQ(nullptr, mockDeviceUsmMemAllocPool->pool); + + EXPECT_FALSE(mockHostUsmMemAllocPool->isInitialized()); + EXPECT_EQ(0u, mockHostUsmMemAllocPool->poolSize); + EXPECT_EQ(nullptr, mockHostUsmMemAllocPool->pool); +} + +using ContextUsmPoolEnabledFlagsTest = ContextUsmPoolFlagValuesTest<1, 1>; +TEST_F(ContextUsmPoolEnabledFlagsTest, givenEnabledDebugFlagsWhenCreatingContextThenPoolsAreInitialized) { + EXPECT_TRUE(mockDeviceUsmMemAllocPool->isInitialized()); + EXPECT_EQ(1 * MemoryConstants::megaByte, mockDeviceUsmMemAllocPool->poolSize); + EXPECT_NE(nullptr, mockDeviceUsmMemAllocPool->pool); + EXPECT_EQ(InternalMemoryType::deviceUnifiedMemory, mockDeviceUsmMemAllocPool->poolMemoryType); + + EXPECT_TRUE(mockHostUsmMemAllocPool->isInitialized()); + EXPECT_EQ(1 * MemoryConstants::megaByte, mockHostUsmMemAllocPool->poolSize); + EXPECT_NE(nullptr, mockHostUsmMemAllocPool->pool); + EXPECT_EQ(InternalMemoryType::hostUnifiedMemory, mockHostUsmMemAllocPool->poolMemoryType); + + cl_int retVal = CL_SUCCESS; + void *pooledDeviceAlloc = clDeviceMemAllocINTEL(mockContext.get(), static_cast(mockContext->getDevice(0)), nullptr, UsmMemAllocPool::allocationThreshold, 0, &retVal); + EXPECT_EQ(CL_SUCCESS, retVal); + EXPECT_NE(nullptr, pooledDeviceAlloc); + clMemFreeINTEL(mockContext.get(), pooledDeviceAlloc); + + void *pooledHostAlloc = clHostMemAllocINTEL(mockContext.get(), nullptr, UsmMemAllocPool::allocationThreshold, 0, &retVal); + EXPECT_EQ(CL_SUCCESS, retVal); + EXPECT_NE(nullptr, pooledHostAlloc); + clMemFreeINTEL(mockContext.get(), pooledHostAlloc); +} \ No newline at end of file diff --git a/opencl/test/unit_test/kernel/kernel_arg_buffer_tests.cpp b/opencl/test/unit_test/kernel/kernel_arg_buffer_tests.cpp index a346777099..919604342f 100644 --- a/opencl/test/unit_test/kernel/kernel_arg_buffer_tests.cpp +++ b/opencl/test/unit_test/kernel/kernel_arg_buffer_tests.cpp @@ -354,6 +354,7 @@ TEST_F(KernelArgBufferTest, givenKernelExecInfoWithIndirectStatelessAccessWhenHa if (svmAllocationsManager == nullptr) { return; } + pContext->getHostMemAllocPool().cleanup(); mockKernel.unifiedMemoryControls.indirectHostAllocationsAllowed = true; EXPECT_FALSE(mockKernel.hasIndirectStatelessAccessToHostMemory()); diff --git a/opencl/test/unit_test/mem_obj/buffer_pool_alloc_tests.cpp b/opencl/test/unit_test/mem_obj/buffer_pool_alloc_tests.cpp index c0305ea39d..4ececfb3b3 100644 --- a/opencl/test/unit_test/mem_obj/buffer_pool_alloc_tests.cpp +++ b/opencl/test/unit_test/mem_obj/buffer_pool_alloc_tests.cpp @@ -51,6 +51,7 @@ class AggregatedSmallBuffersTestTemplate : public ::testing::Test { void setUpImpl() { debugManager.flags.ExperimentalSmallBufferPoolAllocator.set(poolBufferFlag); + debugManager.flags.EnableDeviceUsmAllocationPool.set(0); this->deviceFactory = std::make_unique(2, 0); this->device = deviceFactory->rootDevices[rootDeviceIndex]; this->mockMemoryManager = static_cast(device->getMemoryManager()); diff --git a/shared/source/debug_settings/debug_variables_base.inl b/shared/source/debug_settings/debug_variables_base.inl index 4690fae22a..79fd9a001c 100644 --- a/shared/source/debug_settings/debug_variables_base.inl +++ b/shared/source/debug_settings/debug_variables_base.inl @@ -362,6 +362,8 @@ DECLARE_DEBUG_VARIABLE(int32_t, UseHighAlignmentForHeapExtended, -1, "-1: defaul DECLARE_DEBUG_VARIABLE(int32_t, DispatchCmdlistCmdBufferPrimary, -1, "-1: default, 0: dispatch command buffers as seconadry, 1: dispatch command buffers as primary and chain") DECLARE_DEBUG_VARIABLE(int32_t, UseImmediateFlushTask, -1, "-1: default, 0: use regular flush task, 1: use immediate flush task") DECLARE_DEBUG_VARIABLE(int32_t, SkipDcFlushOnBarrierWithoutEvents, -1, "-1: default (enabled), 0: disabled, 1: enabled") +DECLARE_DEBUG_VARIABLE(int32_t, EnableDeviceUsmAllocationPool, -1, "-1: default (enabled, 1MB), 0: disabled, >=1: enabled, size in MB") +DECLARE_DEBUG_VARIABLE(int32_t, EnableHostUsmAllocationPool, -1, "-1: default (enabled, 1MB), 0: disabled, >=1: enabled, size in MB") /*DIRECT SUBMISSION FLAGS*/ DECLARE_DEBUG_VARIABLE(int32_t, EnableDirectSubmission, -1, "-1: default (disabled), 0: disable, 1:enable. Enables direct submission of command buffers bypassing KMD") diff --git a/shared/source/memory_manager/CMakeLists.txt b/shared/source/memory_manager/CMakeLists.txt index edbcf695e8..4f7d84d117 100644 --- a/shared/source/memory_manager/CMakeLists.txt +++ b/shared/source/memory_manager/CMakeLists.txt @@ -57,6 +57,8 @@ set(NEO_CORE_MEMORY_MANAGER ${CMAKE_CURRENT_SOURCE_DIR}/surface.h ${CMAKE_CURRENT_SOURCE_DIR}/unified_memory_manager.cpp ${CMAKE_CURRENT_SOURCE_DIR}/unified_memory_manager.h + ${CMAKE_CURRENT_SOURCE_DIR}/unified_memory_pooling.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/unified_memory_pooling.h ${CMAKE_CURRENT_SOURCE_DIR}/page_table.cpp ${CMAKE_CURRENT_SOURCE_DIR}/page_table.h ${CMAKE_CURRENT_SOURCE_DIR}/page_table.inl diff --git a/shared/source/memory_manager/unified_memory_manager.cpp b/shared/source/memory_manager/unified_memory_manager.cpp index f8ebb2e6af..021a5b7096 100644 --- a/shared/source/memory_manager/unified_memory_manager.cpp +++ b/shared/source/memory_manager/unified_memory_manager.cpp @@ -236,7 +236,7 @@ void *SVMAllocsManager::createHostUnifiedMemoryAllocation(size_t size, allocData.allocationFlagsProperty = memoryProperties.allocationFlags; allocData.device = nullptr; allocData.pageSizeForAlignment = pageSizeForAlignment; - allocData.setAllocId(this->allocationsCounter++); + allocData.setAllocId(++this->allocationsCounter); std::unique_lock lock(mtx); this->svmAllocs.insert(usmPtr, allocData); @@ -318,7 +318,7 @@ void *SVMAllocsManager::createUnifiedMemoryAllocation(size_t size, allocData.memoryType = memoryProperties.memoryType; allocData.allocationFlagsProperty = memoryProperties.allocationFlags; allocData.device = memoryProperties.device; - allocData.setAllocId(this->allocationsCounter++); + allocData.setAllocId(++this->allocationsCounter); std::unique_lock lock(mtx); @@ -405,7 +405,7 @@ void *SVMAllocsManager::createUnifiedKmdMigratedAllocation(size_t size, const Sv allocData.device = unifiedMemoryProperties.device; allocData.size = size; allocData.pageSizeForAlignment = pageSizeForAlignment; - allocData.setAllocId(this->allocationsCounter++); + allocData.setAllocId(++this->allocationsCounter); std::unique_lock lock(mtx); auto retPtr = allocationGpu->getUnderlyingBuffer(); @@ -560,6 +560,7 @@ void *SVMAllocsManager::createZeroCopySvmAllocation(size_t size, const SvmAlloca allocation->setCoherent(svmProperties.coherent); } allocData.size = size; + allocData.setAllocId(++this->allocationsCounter); std::unique_lock lock(mtx); this->svmAllocs.insert(usmPtr, allocData); @@ -627,7 +628,7 @@ void *SVMAllocsManager::createUnifiedAllocationWithDeviceStorage(size_t size, co allocData.device = unifiedMemoryProperties.device; allocData.pageSizeForAlignment = cpuAlignment; allocData.size = size; - allocData.setAllocId(this->allocationsCounter++); + allocData.setAllocId(++this->allocationsCounter); std::unique_lock lock(mtx); this->svmAllocs.insert(svmPtr, allocData); diff --git a/shared/source/memory_manager/unified_memory_pooling.cpp b/shared/source/memory_manager/unified_memory_pooling.cpp new file mode 100644 index 0000000000..ee6539970b --- /dev/null +++ b/shared/source/memory_manager/unified_memory_pooling.cpp @@ -0,0 +1,96 @@ +/* + * Copyright (C) 2023 Intel Corporation + * + * SPDX-License-Identifier: MIT + * + */ + +#include "shared/source/memory_manager/unified_memory_pooling.h" + +#include "shared/source/debug_settings/debug_settings_manager.h" +#include "shared/source/helpers/ptr_math.h" +#include "shared/source/memory_manager/unified_memory_manager.h" +#include "shared/source/utilities/heap_allocator.h" + +namespace NEO { + +bool UsmMemAllocPool::initialize(SVMAllocsManager *svmMemoryManager, const UnifiedMemoryProperties &memoryProperties, size_t poolSize) { + this->pool = svmMemoryManager->createUnifiedMemoryAllocation(poolSize, memoryProperties); + if (nullptr == this->pool) { + return false; + } + this->svmMemoryManager = svmMemoryManager; + this->poolEnd = ptrOffset(this->pool, poolSize); + this->chunkAllocator.reset(new HeapAllocator(startingOffset, + poolSize, + chunkAlignment)); + this->poolSize = poolSize; + this->poolMemoryType = memoryProperties.memoryType; + return true; +} + +bool UsmMemAllocPool::isInitialized() { + return this->svmMemoryManager && this->pool; +} + +void UsmMemAllocPool::cleanup() { + if (isInitialized()) { + this->svmMemoryManager->freeSVMAlloc(this->pool, true); + this->svmMemoryManager = nullptr; + this->pool = nullptr; + this->poolEnd = nullptr; + this->poolSize = 0u; + this->poolMemoryType = InternalMemoryType::notSpecified; + } +} + +bool UsmMemAllocPool::canBePooled(size_t size, const UnifiedMemoryProperties &memoryProperties) { + return size <= allocationThreshold && memoryProperties.memoryType == this->poolMemoryType; +} + +void *UsmMemAllocPool::createUnifiedMemoryAllocation(size_t size, const UnifiedMemoryProperties &memoryProperties) { + void *pooledPtr = nullptr; + if (isInitialized()) { + if (false == canBePooled(size, memoryProperties)) { + return nullptr; + } + std::unique_lock lock(mtx); + size_t offset = static_cast(this->chunkAllocator->allocate(size)); + if (offset == 0) { + return nullptr; + } + offset -= startingOffset; + DEBUG_BREAK_IF(offset >= poolSize); + pooledPtr = ptrOffset(this->pool, offset); + { + this->allocations.insert(pooledPtr, AllocationInfo{offset, size}); + } + ++this->svmMemoryManager->allocationsCounter; + } + return pooledPtr; +} + +bool UsmMemAllocPool::isInPool(const void *ptr) { + return ptr >= this->pool && ptr < this->poolEnd; +} + +bool UsmMemAllocPool::freeSVMAlloc(void *ptr, bool blocking) { + if (isInitialized() && isInPool(ptr)) { + size_t offset = 0u, size = 0u; + { + std::unique_lock lock(mtx); + auto allocationInfo = allocations.extract(ptr); + if (allocationInfo) { + offset = allocationInfo->offset; + size = allocationInfo->size; + } + } + if (size > 0u) { + this->chunkAllocator->free(offset + startingOffset, size); + return true; + } + } + return false; +} + +} // namespace NEO \ No newline at end of file diff --git a/shared/source/memory_manager/unified_memory_pooling.h b/shared/source/memory_manager/unified_memory_pooling.h new file mode 100644 index 0000000000..231ba8b403 --- /dev/null +++ b/shared/source/memory_manager/unified_memory_pooling.h @@ -0,0 +1,56 @@ +/* + * Copyright (C) 2023 Intel Corporation + * + * SPDX-License-Identifier: MIT + * + */ + +#pragma once +#include "shared/source/helpers/constants.h" +#include "shared/source/memory_manager/unified_memory_manager.h" +#include "shared/source/utilities/heap_allocator.h" +#include "shared/source/utilities/sorted_vector.h" + +namespace NEO { +class SVMAllocsManager; +class HeapAllocator; + +class UsmMemAllocPool { + using UnifiedMemoryProperties = SVMAllocsManager::UnifiedMemoryProperties; + struct AllocationInfo { + size_t offset; + size_t size; + }; + struct CompareAcceptEqualPointers { + bool operator()(const std::unique_ptr &svmData, const void *ptr, const void *otherPtr) { + return ptr == otherPtr; + } + }; + using AllocationsInfoStorage = BaseSortedPointerWithValueVector; + + public: + UsmMemAllocPool() = default; + bool initialize(SVMAllocsManager *svmMemoryManager, const UnifiedMemoryProperties &memoryProperties, size_t poolSize); + bool isInitialized(); + void cleanup(); + bool canBePooled(size_t size, const UnifiedMemoryProperties &memoryProperties); + void *createUnifiedMemoryAllocation(size_t size, const UnifiedMemoryProperties &memoryProperties); + bool isInPool(const void *ptr); + bool freeSVMAlloc(void *ptr, bool blocking); + + static constexpr auto allocationThreshold = 1 * MemoryConstants::megaByte; + static constexpr auto chunkAlignment = 512u; + static constexpr auto startingOffset = chunkAlignment; + + protected: + size_t poolSize{}; + std::unique_ptr chunkAllocator; + void *pool{}; + void *poolEnd{}; + SVMAllocsManager *svmMemoryManager{}; + AllocationsInfoStorage allocations; + std::mutex mtx; + InternalMemoryType poolMemoryType; +}; + +} // namespace NEO \ No newline at end of file diff --git a/shared/source/utilities/sorted_vector.h b/shared/source/utilities/sorted_vector.h index 17e09b3954..aea5701284 100644 --- a/shared/source/utilities/sorted_vector.h +++ b/shared/source/utilities/sorted_vector.h @@ -6,8 +6,12 @@ */ #pragma once +#include "shared/source/helpers/debug_helpers.h" + #include +#include #include +#include #include namespace NEO { @@ -38,15 +42,17 @@ class BaseSortedPointerWithValueVector { allocations.erase(removeIt); } - ValueType *get(const void *ptr) { + typename Container::iterator getImpl(const void *ptr) { if (allocations.size() == 0) { - return nullptr; + return allocations.end(); } if (nullptr == ptr) { - return nullptr; + return allocations.end(); } + DEBUG_BREAK_IF(allocations.size() > static_cast(std::numeric_limits::max())); + int begin = 0; int end = static_cast(allocations.size() - 1); while (end >= begin) { @@ -54,7 +60,7 @@ class BaseSortedPointerWithValueVector { const auto &allocation = allocations[currentPos]; if (compareFunctor(allocation.second, ptr, allocation.first)) { - return allocation.second.get(); + return allocations.begin() + currentPos; } else if (ptr < allocation.first) { end = currentPos - 1; continue; @@ -63,7 +69,24 @@ class BaseSortedPointerWithValueVector { continue; } } + return allocations.end(); + } + std::unique_ptr extract(const void *ptr) { + std::unique_ptr retVal{}; + auto it = getImpl(ptr); + if (it != allocations.end()) { + retVal.swap(it->second); + allocations.erase(it); + } + return retVal; + } + + ValueType *get(const void *ptr) { + auto it = getImpl(ptr); + if (it != allocations.end()) { + return it->second.get(); + } return nullptr; } diff --git a/shared/test/common/mocks/CMakeLists.txt b/shared/test/common/mocks/CMakeLists.txt index f1a05ab3d8..7b177fdc62 100644 --- a/shared/test/common/mocks/CMakeLists.txt +++ b/shared/test/common/mocks/CMakeLists.txt @@ -92,6 +92,7 @@ set(NEO_CORE_tests_mocks ${CMAKE_CURRENT_SOURCE_DIR}/mock_tbx_csr.h ${CMAKE_CURRENT_SOURCE_DIR}/mock_timestamp_container.h ${CMAKE_CURRENT_SOURCE_DIR}/mock_timestamp_packet.h + ${CMAKE_CURRENT_SOURCE_DIR}/mock_usm_memory_pool.h ${CMAKE_CURRENT_SOURCE_DIR}/mock_wddm_residency_controller.h ${CMAKE_CURRENT_SOURCE_DIR}/ult_device_factory.cpp ${CMAKE_CURRENT_SOURCE_DIR}/ult_device_factory.h diff --git a/shared/test/common/mocks/mock_usm_memory_pool.h b/shared/test/common/mocks/mock_usm_memory_pool.h new file mode 100644 index 0000000000..845cb6564a --- /dev/null +++ b/shared/test/common/mocks/mock_usm_memory_pool.h @@ -0,0 +1,18 @@ +/* + * Copyright (C) 2023 Intel Corporation + * + * SPDX-License-Identifier: MIT + * + */ + +#pragma once +#include "shared/source/memory_manager/unified_memory_pooling.h" + +class MockUsmMemAllocPool : public UsmMemAllocPool { + public: + using UsmMemAllocPool::allocations; + using UsmMemAllocPool::pool; + using UsmMemAllocPool::poolEnd; + using UsmMemAllocPool::poolMemoryType; + using UsmMemAllocPool::poolSize; +}; \ No newline at end of file diff --git a/shared/test/common/test_files/igdrcl.config b/shared/test/common/test_files/igdrcl.config index a281397941..b357a848ea 100644 --- a/shared/test/common/test_files/igdrcl.config +++ b/shared/test/common/test_files/igdrcl.config @@ -573,4 +573,6 @@ InOrderAtomicSignallingEnabled = -1 SetProcessPowerThrottlingState = -1 InOrderDuplicatedCounterStorageEnabled = -1 OverrideCpuCaching = -1 +EnableDeviceUsmAllocationPool = -1 +EnableHostUsmAllocationPool = -1 # Please don't edit below this line diff --git a/shared/test/unit_test/memory_manager/CMakeLists.txt b/shared/test/unit_test/memory_manager/CMakeLists.txt index d282de4cd8..ec2dcf078e 100644 --- a/shared/test/unit_test/memory_manager/CMakeLists.txt +++ b/shared/test/unit_test/memory_manager/CMakeLists.txt @@ -1,5 +1,5 @@ # -# Copyright (C) 2020-2022 Intel Corporation +# Copyright (C) 2020-2023 Intel Corporation # # SPDX-License-Identifier: MIT # @@ -30,6 +30,7 @@ target_sources(neo_shared_tests PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/surface_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/unified_memory_manager_cache_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/unified_memory_manager_tests.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/unified_memory_pooling_tests.cpp ) add_subdirectories() \ No newline at end of file diff --git a/shared/test/unit_test/memory_manager/unified_memory_pooling_tests.cpp b/shared/test/unit_test/memory_manager/unified_memory_pooling_tests.cpp new file mode 100644 index 0000000000..e0383a9921 --- /dev/null +++ b/shared/test/unit_test/memory_manager/unified_memory_pooling_tests.cpp @@ -0,0 +1,136 @@ +/* + * Copyright (C) 2023 Intel Corporation + * + * SPDX-License-Identifier: MIT + * + */ + +#include "shared/source/memory_manager/unified_memory_pooling.h" +#include "shared/test/common/helpers/debug_manager_state_restore.h" +#include "shared/test/common/mocks/mock_device.h" +#include "shared/test/common/mocks/mock_memory_manager.h" +#include "shared/test/common/mocks/mock_svm_manager.h" +#include "shared/test/common/mocks/mock_usm_memory_pool.h" +#include "shared/test/common/mocks/ult_device_factory.h" +#include "shared/test/common/test_macros/test.h" + +#include "gtest/gtest.h" + +using namespace NEO; + +using UnifiedMemoryPoolingTest = Test>; +TEST_F(UnifiedMemoryPoolingTest, givenUsmAllocPoolWhenCallingIsInitializedThenReturnCorrectValue) { + UsmMemAllocPool usmMemAllocPool; + EXPECT_FALSE(usmMemAllocPool.isInitialized()); + + std::unique_ptr deviceFactory(new UltDeviceFactory(1, 1)); + auto device = deviceFactory->rootDevices[0]; + auto svmManager = std::make_unique(device->getMemoryManager(), false); + + SVMAllocsManager::UnifiedMemoryProperties unifiedMemoryProperties(InternalMemoryType::hostUnifiedMemory, MemoryConstants::pageSize2M, rootDeviceIndices, deviceBitfields); + unifiedMemoryProperties.device = device; + + EXPECT_TRUE(usmMemAllocPool.initialize(svmManager.get(), unifiedMemoryProperties, 1 * MemoryConstants::megaByte)); + EXPECT_TRUE(usmMemAllocPool.isInitialized()); + + usmMemAllocPool.cleanup(); + EXPECT_FALSE(usmMemAllocPool.isInitialized()); + EXPECT_FALSE(usmMemAllocPool.freeSVMAlloc(reinterpret_cast(0x1), true)); +} + +template +class InitializedUnifiedMemoryPoolingTest : public UnifiedMemoryPoolingTest { + public: + void SetUp() { + UnifiedMemoryPoolingTest::setUp(); + EXPECT_FALSE(usmMemAllocPool.isInitialized()); + + deviceFactory = std::unique_ptr(new UltDeviceFactory(1, 1)); + device = deviceFactory->rootDevices[0]; + svmManager = std::make_unique(device->getMemoryManager(), false); + static_cast(device->getMemoryManager())->failInDevicePoolWithError = failAllocation; + + poolMemoryProperties = std::make_unique(poolMemoryType, MemoryConstants::pageSize2M, rootDeviceIndices, deviceBitfields); + poolMemoryProperties->device = device; + ASSERT_EQ(!failAllocation, usmMemAllocPool.initialize(svmManager.get(), *poolMemoryProperties.get(), poolSize)); + } + void TearDown() { + usmMemAllocPool.cleanup(); + UnifiedMemoryPoolingTest::tearDown(); + } + + const size_t poolSize = 2 * MemoryConstants::megaByte; + MockUsmMemAllocPool usmMemAllocPool; + std::unique_ptr deviceFactory; + Device *device; + std::unique_ptr svmManager; + std::unique_ptr poolMemoryProperties; +}; + +using InitializedHostUnifiedMemoryPoolingTest = InitializedUnifiedMemoryPoolingTest; +TEST_F(InitializedHostUnifiedMemoryPoolingTest, givenDifferentAllocationSizesWhenCallingCanBePooledThenCorrectValueIsReturned) { + SVMAllocsManager::UnifiedMemoryProperties memoryProperties(InternalMemoryType::hostUnifiedMemory, MemoryConstants::pageSize64k, rootDeviceIndices, deviceBitfields); + EXPECT_TRUE(usmMemAllocPool.canBePooled(UsmMemAllocPool::allocationThreshold, memoryProperties)); + EXPECT_FALSE(usmMemAllocPool.canBePooled(UsmMemAllocPool::allocationThreshold + 1, memoryProperties)); + + memoryProperties.memoryType = InternalMemoryType::deviceUnifiedMemory; + EXPECT_FALSE(usmMemAllocPool.canBePooled(UsmMemAllocPool::allocationThreshold, memoryProperties)); + EXPECT_FALSE(usmMemAllocPool.canBePooled(UsmMemAllocPool::allocationThreshold + 1, memoryProperties)); + + memoryProperties.memoryType = InternalMemoryType::sharedUnifiedMemory; + EXPECT_FALSE(usmMemAllocPool.canBePooled(UsmMemAllocPool::allocationThreshold, memoryProperties)); + EXPECT_FALSE(usmMemAllocPool.canBePooled(UsmMemAllocPool::allocationThreshold + 1, memoryProperties)); +} + +TEST_F(InitializedHostUnifiedMemoryPoolingTest, givenVariousPointersWhenCallingIsInPoolThenCorrectValueIsReturned) { + void *ptrBeforePool = reinterpret_cast(reinterpret_cast(usmMemAllocPool.pool) - 1); + void *lastPtrInPool = reinterpret_cast(reinterpret_cast(usmMemAllocPool.poolEnd) - 1); + + EXPECT_FALSE(usmMemAllocPool.isInPool(ptrBeforePool)); + EXPECT_TRUE(usmMemAllocPool.isInPool(usmMemAllocPool.pool)); + EXPECT_TRUE(usmMemAllocPool.isInPool(lastPtrInPool)); + EXPECT_FALSE(usmMemAllocPool.isInPool(usmMemAllocPool.poolEnd)); +} + +TEST_F(InitializedHostUnifiedMemoryPoolingTest, givenPoolableAllocationWhenUsingPoolThenAllocationIsPooledUnlessPoolIsFull) { + SVMAllocsManager::UnifiedMemoryProperties memoryProperties(InternalMemoryType::hostUnifiedMemory, MemoryConstants::pageSize64k, rootDeviceIndices, deviceBitfields); + const auto allocationSize = UsmMemAllocPool::allocationThreshold; + const auto allocationSizeAboveThreshold = allocationSize + 1; + EXPECT_EQ(nullptr, usmMemAllocPool.createUnifiedMemoryAllocation(allocationSizeAboveThreshold, memoryProperties)); + EXPECT_EQ(nullptr, usmMemAllocPool.allocations.get(reinterpret_cast(0x1))); + + auto allocFromPool = usmMemAllocPool.createUnifiedMemoryAllocation(allocationSize, memoryProperties); + EXPECT_NE(nullptr, allocFromPool); + EXPECT_TRUE(usmMemAllocPool.isInPool(allocFromPool)); + auto allocationInfo = usmMemAllocPool.allocations.get(allocFromPool); + EXPECT_NE(nullptr, allocationInfo); + EXPECT_EQ(allocationSize, allocationInfo->size); + + auto svmData = svmManager->getSVMAlloc(allocFromPool); + auto poolSvmData = svmManager->getSVMAlloc(usmMemAllocPool.pool); + EXPECT_EQ(svmData, poolSvmData); + + const auto allocationsToFillPool = poolSize / allocationSize; + for (auto i = 1u; i < allocationsToFillPool; ++i) { + // exhaust pool + EXPECT_NE(nullptr, usmMemAllocPool.createUnifiedMemoryAllocation(allocationSize, memoryProperties)); + } + + EXPECT_EQ(nullptr, usmMemAllocPool.createUnifiedMemoryAllocation(1, memoryProperties)); + + EXPECT_FALSE(usmMemAllocPool.freeSVMAlloc(reinterpret_cast(0x1), true)); + EXPECT_TRUE(usmMemAllocPool.freeSVMAlloc(allocFromPool, true)); + EXPECT_FALSE(usmMemAllocPool.freeSVMAlloc(allocFromPool, true)); + EXPECT_EQ(nullptr, usmMemAllocPool.allocations.get(reinterpret_cast(0x1))); + EXPECT_EQ(nullptr, usmMemAllocPool.allocations.extract(reinterpret_cast(0x1))); + + EXPECT_NE(nullptr, usmMemAllocPool.createUnifiedMemoryAllocation(allocationSize, memoryProperties)); +} + +using InitializationFailedUnifiedMemoryPoolingTest = InitializedUnifiedMemoryPoolingTest; +TEST_F(InitializationFailedUnifiedMemoryPoolingTest, givenNotInitializedPoolWhenUsingPoolThenMethodsSucceed) { + SVMAllocsManager::UnifiedMemoryProperties memoryProperties(InternalMemoryType::hostUnifiedMemory, MemoryConstants::pageSize64k, rootDeviceIndices, deviceBitfields); + const auto allocationSize = UsmMemAllocPool::allocationThreshold; + EXPECT_EQ(nullptr, usmMemAllocPool.createUnifiedMemoryAllocation(allocationSize, memoryProperties)); + EXPECT_FALSE(usmMemAllocPool.freeSVMAlloc(reinterpret_cast(0x1), true)); +} \ No newline at end of file diff --git a/shared/test/unit_test/utilities/sorted_vector_tests.cpp b/shared/test/unit_test/utilities/sorted_vector_tests.cpp index 32059e63a6..2c6f485fc3 100644 --- a/shared/test/unit_test/utilities/sorted_vector_tests.cpp +++ b/shared/test/unit_test/utilities/sorted_vector_tests.cpp @@ -11,7 +11,7 @@ struct Comparator { bool operator()(const std::unique_ptr &svmData, const void *ptr, const void *otherPtr) { - return false; + return ptr == otherPtr; } }; using TestedSortedVector = NEO::BaseSortedPointerWithValueVector; @@ -20,3 +20,23 @@ TEST(SortedVectorTest, givenBaseSortedVectorWhenGettingNullptrThenNullptrIsRetur TestedSortedVector testedVector; EXPECT_EQ(nullptr, testedVector.get(nullptr)); } + +TEST(SortedVectorTest, givenBaseSortedVectorWhenCallingExtractThenCorrectValueIsReturned) { + TestedSortedVector testedVector; + void *ptr = reinterpret_cast(0x1); + testedVector.insert(ptr, 1u); + + EXPECT_EQ(nullptr, testedVector.extract(nullptr)); + auto valuePtr = testedVector.extract(ptr); + EXPECT_EQ(1u, *valuePtr); + EXPECT_EQ(nullptr, testedVector.extract(ptr)); + + testedVector.insert(reinterpret_cast(0x1), 1u); + testedVector.insert(reinterpret_cast(0x2), 2u); + testedVector.insert(reinterpret_cast(0x3), 3u); + testedVector.insert(reinterpret_cast(0x4), 4u); + testedVector.insert(reinterpret_cast(0x5), 5u); + + valuePtr = testedVector.extract(reinterpret_cast(0x1)); + EXPECT_EQ(1u, *valuePtr); +}