From a2994e9b29389a518385edfd17b64afcd2fc9c59 Mon Sep 17 00:00:00 2001 From: Compute-Runtime-Validation Date: Sat, 9 Dec 2023 06:32:36 +0100 Subject: [PATCH] Revert "performance(ocl): set pool allocator threshold 1MB" This reverts commit fc1d93af8e4d36bfccf8d241a2f5c081777ead83. Signed-off-by: Compute-Runtime-Validation --- opencl/source/context/context.cpp | 1 - .../aub_multicontext_tests_xehp_and_later.cpp | 5 ++--- .../copy_engine_aub_tests_xehp_and_later.h | 3 +-- .../unit_test/mem_obj/buffer_pool_alloc_tests.cpp | 14 ++++++++------ shared/source/utilities/buffer_pool_allocator.h | 2 +- 5 files changed, 12 insertions(+), 13 deletions(-) diff --git a/opencl/source/context/context.cpp b/opencl/source/context/context.cpp index 18e4036969..a8f6a263f2 100644 --- a/opencl/source/context/context.cpp +++ b/opencl/source/context/context.cpp @@ -541,7 +541,6 @@ Buffer *Context::BufferPool::allocate(const MemoryProperties &memoryProperties, return nullptr; } bufferRegion.origin -= BufferPool::startingOffset; - DEBUG_BREAK_IF(bufferRegion.origin >= BufferPool::aggregatedSmallBuffersPoolSize); bufferRegion.size = requestedSize; auto bufferFromPool = this->mainStorage->createSubBuffer(flags, flagsIntel, &bufferRegion, errcodeRet); bufferFromPool->createFunction = this->mainStorage->createFunction; diff --git a/opencl/test/unit_test/aub_tests/command_queue/aub_multicontext_tests_xehp_and_later.cpp b/opencl/test/unit_test/aub_tests/command_queue/aub_multicontext_tests_xehp_and_later.cpp index 2b08fbba88..93b09d15d4 100644 --- a/opencl/test/unit_test/aub_tests/command_queue/aub_multicontext_tests_xehp_and_later.cpp +++ b/opencl/test/unit_test/aub_tests/command_queue/aub_multicontext_tests_xehp_and_later.cpp @@ -79,10 +79,9 @@ struct MultitileMulticontextTests : public MulticontextAubFixture, public ::test for (uint32_t tile = 0; tile < tileDevices.size(); tile++) { for (uint32_t tileEngine = 0; tileEngine < commandQueues[tile].size(); tileEngine++) { getSimulatedCsr(tile, tileEngine)->pollForCompletion(); - auto regularBufferGpuAddress = static_cast(ptrOffset(regularBuffers[tile][tileEngine]->getGraphicsAllocation(rootDeviceIndex)->getGpuAddress(), regularBuffers[tile][tileEngine]->getOffset())); - - auto tileOnlyBufferGpuAddress = static_cast(ptrOffset(tileOnlyBuffers[tile][tileEngine]->getGraphicsAllocation(rootDeviceIndex)->getGpuAddress(), tileOnlyBuffers[tile][tileEngine]->getOffset())); + auto regularBufferGpuAddress = static_cast(regularBuffers[tile][tileEngine]->getGraphicsAllocation(rootDeviceIndex)->getGpuAddress()); + auto tileOnlyBufferGpuAddress = static_cast(tileOnlyBuffers[tile][tileEngine]->getGraphicsAllocation(rootDeviceIndex)->getGpuAddress()); expectMemory(reinterpret_cast(regularBufferGpuAddress), writePattern, bufferSize, tile, tileEngine); expectMemory(reinterpret_cast(tileOnlyBufferGpuAddress), writePattern, bufferSize, tile, tileEngine); } diff --git a/opencl/test/unit_test/aub_tests/command_stream/copy_engine_aub_tests_xehp_and_later.h b/opencl/test/unit_test/aub_tests/command_stream/copy_engine_aub_tests_xehp_and_later.h index 3d0c33a589..f01e3c3f82 100644 --- a/opencl/test/unit_test/aub_tests/command_stream/copy_engine_aub_tests_xehp_and_later.h +++ b/opencl/test/unit_test/aub_tests/command_stream/copy_engine_aub_tests_xehp_and_later.h @@ -52,7 +52,6 @@ struct CopyEngineXeHPAndLater : public MulticontextAubFixture, public ::testing: debugManager.flags.RenderCompressedBuffersEnabled.set(true); debugManager.flags.RenderCompressedImagesEnabled.set(true); debugManager.flags.EnableFreeMemory.set(false); - debugManager.flags.ExperimentalSmallBufferPoolAllocator.set(0); MulticontextAubFixture::setUp(numTiles, EnabledCommandStreamers::Single, true); @@ -101,7 +100,7 @@ struct CopyEngineXeHPAndLater : public MulticontextAubFixture, public ::testing: EXPECT_EQ(CL_SUCCESS, retVal); if (compressed) { - EXPECT_TRUE(graphicsAllocation->isCompressionEnabled()); + EXPECT_TRUE(graphicsAllocation->getDefaultGmm()->isCompressionEnabled); } EXPECT_EQ(!inLocalMemory, MemoryPoolHelper::isSystemMemoryPool(graphicsAllocation->getMemoryPool())); diff --git a/opencl/test/unit_test/mem_obj/buffer_pool_alloc_tests.cpp b/opencl/test/unit_test/mem_obj/buffer_pool_alloc_tests.cpp index 0123d84af5..074a0aa5f0 100644 --- a/opencl/test/unit_test/mem_obj/buffer_pool_alloc_tests.cpp +++ b/opencl/test/unit_test/mem_obj/buffer_pool_alloc_tests.cpp @@ -362,8 +362,8 @@ TEST_F(AggregatedSmallBuffersEnabledTest, givenCopyHostPointerWhenCreatingBuffer context->setSpecialQueue(commandQueue, rootDeviceIndex); flags = CL_MEM_COPY_HOST_PTR; - auto dataToCopy = std::unique_ptr(new unsigned char[PoolAllocator::smallBufferThreshold]()); - hostPtr = dataToCopy.get(); + unsigned char dataToCopy[PoolAllocator::smallBufferThreshold]; + hostPtr = dataToCopy; EXPECT_TRUE(poolAllocator->isAggregatedSmallBuffersEnabled(context.get())); EXPECT_EQ(1u, poolAllocator->bufferPools.size()); @@ -430,6 +430,7 @@ TEST_F(AggregatedSmallBuffersKernelTest, givenBufferFromPoolWhenOffsetSubbufferI std::unique_ptr buffer(Buffer::create(context.get(), flags, size, hostPtr, retVal)); EXPECT_EQ(retVal, CL_SUCCESS); EXPECT_NE(buffer, nullptr); + EXPECT_GT(buffer->getOffset(), 0u); cl_buffer_region region; region.origin = 0xc0; region.size = 32; @@ -546,8 +547,8 @@ TEST_F(AggregatedSmallBuffersEnabledApiTest, givenSmallBufferWhenCreatingBufferT TEST_F(AggregatedSmallBuffersEnabledApiTest, givenUseHostPointerWhenCreatingBufferThenDoNotUsePool) { flags |= CL_MEM_USE_HOST_PTR; - auto hostData = std::unique_ptr(new unsigned char[PoolAllocator::smallBufferThreshold]()); - hostPtr = hostData.get(); + unsigned char hostData[PoolAllocator::smallBufferThreshold]; + hostPtr = hostData; cl_mem smallBuffer = clCreateBuffer(clContext, flags, size, hostPtr, &retVal); EXPECT_EQ(retVal, CL_SUCCESS); EXPECT_NE(smallBuffer, nullptr); @@ -633,9 +634,9 @@ TEST_F(AggregatedSmallBuffersEnabledApiTest, givenSubBufferNotFromPoolAndAggrega TEST_F(AggregatedSmallBuffersEnabledApiTest, givenCopyHostPointerWhenCreatingBufferThenUsePoolAndCopyHostPointer) { flags |= CL_MEM_COPY_HOST_PTR; - auto dataToCopy = std::unique_ptr(new unsigned char[PoolAllocator::smallBufferThreshold]()); + unsigned char dataToCopy[PoolAllocator::smallBufferThreshold]; dataToCopy[0] = 123; - hostPtr = dataToCopy.get(); + hostPtr = dataToCopy; auto contextRefCountBefore = context->getRefInternalCount(); cl_mem smallBuffer = clCreateBuffer(clContext, flags, size, hostPtr, &retVal); EXPECT_EQ(context->getRefInternalCount(), contextRefCountBefore + 1); @@ -671,6 +672,7 @@ TEST_F(AggregatedSmallBuffersSubBufferApiTest, givenBufferFromPoolWhenCreateSubB EXPECT_EQ(retVal, CL_SUCCESS); EXPECT_NE(buffer, nullptr); MockBuffer *mockBuffer = static_cast(buffer); + EXPECT_GT(mockBuffer->offset, 0u); EXPECT_EQ(ptrOffset(poolAllocator->bufferPools[0].mainStorage->getCpuAddress(), mockBuffer->getOffset()), mockBuffer->getCpuAddress()); cl_buffer_region region{}; diff --git a/shared/source/utilities/buffer_pool_allocator.h b/shared/source/utilities/buffer_pool_allocator.h index fbece56e46..6ac36b3bae 100644 --- a/shared/source/utilities/buffer_pool_allocator.h +++ b/shared/source/utilities/buffer_pool_allocator.h @@ -26,7 +26,7 @@ template struct SmallBuffersParams { protected: static constexpr auto aggregatedSmallBuffersPoolSize = 2 * MemoryConstants::megaByte; - static constexpr auto smallBufferThreshold = 1 * MemoryConstants::megaByte; + static constexpr auto smallBufferThreshold = 4 * MemoryConstants::kiloByte; static constexpr auto chunkAlignment = 512u; static constexpr auto startingOffset = chunkAlignment; };