mirror of
https://github.com/intel/compute-runtime.git
synced 2025-12-24 21:18:24 +08:00
performance(ocl): set pool allocator threshold 1MB
Increase pool allocator threshold to 1MB Remove stack allocations based on threshold in tests. Related-To: NEO-9690 Signed-off-by: Dominik Dabek <dominik.dabek@intel.com>
This commit is contained in:
committed by
Compute-Runtime-Automation
parent
9ce3713ace
commit
fc1d93af8e
@@ -541,6 +541,7 @@ Buffer *Context::BufferPool::allocate(const MemoryProperties &memoryProperties,
|
||||
return nullptr;
|
||||
}
|
||||
bufferRegion.origin -= BufferPool::startingOffset;
|
||||
DEBUG_BREAK_IF(bufferRegion.origin >= BufferPool::aggregatedSmallBuffersPoolSize);
|
||||
bufferRegion.size = requestedSize;
|
||||
auto bufferFromPool = this->mainStorage->createSubBuffer(flags, flagsIntel, &bufferRegion, errcodeRet);
|
||||
bufferFromPool->createFunction = this->mainStorage->createFunction;
|
||||
|
||||
@@ -79,9 +79,10 @@ struct MultitileMulticontextTests : public MulticontextAubFixture, public ::test
|
||||
for (uint32_t tile = 0; tile < tileDevices.size(); tile++) {
|
||||
for (uint32_t tileEngine = 0; tileEngine < commandQueues[tile].size(); tileEngine++) {
|
||||
getSimulatedCsr<FamilyType>(tile, tileEngine)->pollForCompletion();
|
||||
auto regularBufferGpuAddress = static_cast<uintptr_t>(ptrOffset(regularBuffers[tile][tileEngine]->getGraphicsAllocation(rootDeviceIndex)->getGpuAddress(), regularBuffers[tile][tileEngine]->getOffset()));
|
||||
|
||||
auto tileOnlyBufferGpuAddress = static_cast<uintptr_t>(ptrOffset(tileOnlyBuffers[tile][tileEngine]->getGraphicsAllocation(rootDeviceIndex)->getGpuAddress(), tileOnlyBuffers[tile][tileEngine]->getOffset()));
|
||||
|
||||
auto regularBufferGpuAddress = static_cast<uintptr_t>(regularBuffers[tile][tileEngine]->getGraphicsAllocation(rootDeviceIndex)->getGpuAddress());
|
||||
auto tileOnlyBufferGpuAddress = static_cast<uintptr_t>(tileOnlyBuffers[tile][tileEngine]->getGraphicsAllocation(rootDeviceIndex)->getGpuAddress());
|
||||
expectMemory<FamilyType>(reinterpret_cast<void *>(regularBufferGpuAddress), writePattern, bufferSize, tile, tileEngine);
|
||||
expectMemory<FamilyType>(reinterpret_cast<void *>(tileOnlyBufferGpuAddress), writePattern, bufferSize, tile, tileEngine);
|
||||
}
|
||||
|
||||
@@ -52,6 +52,7 @@ struct CopyEngineXeHPAndLater : public MulticontextAubFixture, public ::testing:
|
||||
debugManager.flags.RenderCompressedBuffersEnabled.set(true);
|
||||
debugManager.flags.RenderCompressedImagesEnabled.set(true);
|
||||
debugManager.flags.EnableFreeMemory.set(false);
|
||||
debugManager.flags.ExperimentalSmallBufferPoolAllocator.set(0);
|
||||
|
||||
MulticontextAubFixture::setUp(numTiles, EnabledCommandStreamers::Single, true);
|
||||
|
||||
@@ -100,7 +101,7 @@ struct CopyEngineXeHPAndLater : public MulticontextAubFixture, public ::testing:
|
||||
|
||||
EXPECT_EQ(CL_SUCCESS, retVal);
|
||||
if (compressed) {
|
||||
EXPECT_TRUE(graphicsAllocation->getDefaultGmm()->isCompressionEnabled);
|
||||
EXPECT_TRUE(graphicsAllocation->isCompressionEnabled());
|
||||
}
|
||||
EXPECT_EQ(!inLocalMemory, MemoryPoolHelper::isSystemMemoryPool(graphicsAllocation->getMemoryPool()));
|
||||
|
||||
|
||||
@@ -362,8 +362,8 @@ TEST_F(AggregatedSmallBuffersEnabledTest, givenCopyHostPointerWhenCreatingBuffer
|
||||
context->setSpecialQueue(commandQueue, rootDeviceIndex);
|
||||
|
||||
flags = CL_MEM_COPY_HOST_PTR;
|
||||
unsigned char dataToCopy[PoolAllocator::smallBufferThreshold];
|
||||
hostPtr = dataToCopy;
|
||||
auto dataToCopy = std::unique_ptr<unsigned char[]>(new unsigned char[PoolAllocator::smallBufferThreshold]());
|
||||
hostPtr = dataToCopy.get();
|
||||
|
||||
EXPECT_TRUE(poolAllocator->isAggregatedSmallBuffersEnabled(context.get()));
|
||||
EXPECT_EQ(1u, poolAllocator->bufferPools.size());
|
||||
@@ -430,7 +430,6 @@ TEST_F(AggregatedSmallBuffersKernelTest, givenBufferFromPoolWhenOffsetSubbufferI
|
||||
std::unique_ptr<Buffer> buffer(Buffer::create(context.get(), flags, size, hostPtr, retVal));
|
||||
EXPECT_EQ(retVal, CL_SUCCESS);
|
||||
EXPECT_NE(buffer, nullptr);
|
||||
EXPECT_GT(buffer->getOffset(), 0u);
|
||||
cl_buffer_region region;
|
||||
region.origin = 0xc0;
|
||||
region.size = 32;
|
||||
@@ -547,8 +546,8 @@ TEST_F(AggregatedSmallBuffersEnabledApiTest, givenSmallBufferWhenCreatingBufferT
|
||||
|
||||
TEST_F(AggregatedSmallBuffersEnabledApiTest, givenUseHostPointerWhenCreatingBufferThenDoNotUsePool) {
|
||||
flags |= CL_MEM_USE_HOST_PTR;
|
||||
unsigned char hostData[PoolAllocator::smallBufferThreshold];
|
||||
hostPtr = hostData;
|
||||
auto hostData = std::unique_ptr<unsigned char[]>(new unsigned char[PoolAllocator::smallBufferThreshold]());
|
||||
hostPtr = hostData.get();
|
||||
cl_mem smallBuffer = clCreateBuffer(clContext, flags, size, hostPtr, &retVal);
|
||||
EXPECT_EQ(retVal, CL_SUCCESS);
|
||||
EXPECT_NE(smallBuffer, nullptr);
|
||||
@@ -634,9 +633,9 @@ TEST_F(AggregatedSmallBuffersEnabledApiTest, givenSubBufferNotFromPoolAndAggrega
|
||||
|
||||
TEST_F(AggregatedSmallBuffersEnabledApiTest, givenCopyHostPointerWhenCreatingBufferThenUsePoolAndCopyHostPointer) {
|
||||
flags |= CL_MEM_COPY_HOST_PTR;
|
||||
unsigned char dataToCopy[PoolAllocator::smallBufferThreshold];
|
||||
auto dataToCopy = std::unique_ptr<unsigned char[]>(new unsigned char[PoolAllocator::smallBufferThreshold]());
|
||||
dataToCopy[0] = 123;
|
||||
hostPtr = dataToCopy;
|
||||
hostPtr = dataToCopy.get();
|
||||
auto contextRefCountBefore = context->getRefInternalCount();
|
||||
cl_mem smallBuffer = clCreateBuffer(clContext, flags, size, hostPtr, &retVal);
|
||||
EXPECT_EQ(context->getRefInternalCount(), contextRefCountBefore + 1);
|
||||
@@ -672,7 +671,6 @@ TEST_F(AggregatedSmallBuffersSubBufferApiTest, givenBufferFromPoolWhenCreateSubB
|
||||
EXPECT_EQ(retVal, CL_SUCCESS);
|
||||
EXPECT_NE(buffer, nullptr);
|
||||
MockBuffer *mockBuffer = static_cast<MockBuffer *>(buffer);
|
||||
EXPECT_GT(mockBuffer->offset, 0u);
|
||||
EXPECT_EQ(ptrOffset(poolAllocator->bufferPools[0].mainStorage->getCpuAddress(), mockBuffer->getOffset()), mockBuffer->getCpuAddress());
|
||||
|
||||
cl_buffer_region region{};
|
||||
|
||||
@@ -26,7 +26,7 @@ template <typename PoolT>
|
||||
struct SmallBuffersParams {
|
||||
protected:
|
||||
static constexpr auto aggregatedSmallBuffersPoolSize = 2 * MemoryConstants::megaByte;
|
||||
static constexpr auto smallBufferThreshold = 4 * MemoryConstants::kiloByte;
|
||||
static constexpr auto smallBufferThreshold = 1 * MemoryConstants::megaByte;
|
||||
static constexpr auto chunkAlignment = 512u;
|
||||
static constexpr auto startingOffset = chunkAlignment;
|
||||
};
|
||||
|
||||
Reference in New Issue
Block a user