diff --git a/opencl/source/api/api.cpp b/opencl/source/api/api.cpp index 9692631fdb..b0cdf961b6 100644 --- a/opencl/source/api/api.cpp +++ b/opencl/source/api/api.cpp @@ -786,7 +786,7 @@ cl_mem CL_API_CALL clCreateSubBuffer(cl_mem buffer, } if (parentBuffer->isSubBuffer() == true) { - if (!parentBuffer->getContext()->getBufferPoolAllocator().isPoolBuffer(parentBuffer->getAssociatedMemObject()) || parentBuffer->isSubBufferFromPool) { + if (!parentBuffer->getContext()->getBufferPoolAllocator().isPoolBuffer(parentBuffer->getAssociatedMemObject())) { retVal = CL_INVALID_MEM_OBJECT; break; } diff --git a/opencl/source/mem_obj/buffer.cpp b/opencl/source/mem_obj/buffer.cpp index d5798d747c..6cd2d70baf 100644 --- a/opencl/source/mem_obj/buffer.cpp +++ b/opencl/source/mem_obj/buffer.cpp @@ -618,20 +618,12 @@ Buffer *Buffer::createSubBuffer(cl_mem_flags flags, const cl_buffer_region *region, cl_int &errcodeRet) { DEBUG_BREAK_IF(nullptr == createFunction); - if (this->context->getBufferPoolAllocator().isPoolBuffer(associatedMemObject)) { - Buffer *poolBuffer = static_cast(associatedMemObject); - auto regionWithAdditionalOffset = *region; - regionWithAdditionalOffset.origin += this->offset; - auto buffer = poolBuffer->createSubBuffer(flags, flagsIntel, ®ionWithAdditionalOffset, errcodeRet); - buffer->isSubBufferFromPool = true; - return buffer; - } MemoryProperties memoryProperties = ClMemoryPropertiesHelper::createMemoryProperties(flags, flagsIntel, 0, &this->context->getDevice(0)->getDevice()); auto copyMultiGraphicsAllocation = MultiGraphicsAllocation{this->multiGraphicsAllocation}; auto buffer = createFunction(this->context, memoryProperties, flags, 0, region->size, - ptrOffset(this->memoryStorage, region->origin), + this->memoryStorage ? ptrOffset(this->memoryStorage, region->origin) : nullptr, this->hostPtr ? ptrOffset(this->hostPtr, region->origin) : nullptr, std::move(copyMultiGraphicsAllocation), this->isZeroCopy, this->isHostPtrSVM, false); @@ -641,7 +633,7 @@ Buffer *Buffer::createSubBuffer(cl_mem_flags flags, } buffer->associatedMemObject = this; - buffer->offset = region->origin; + buffer->offset = region->origin + this->offset; buffer->setParentSharingHandler(this->getSharingHandler()); this->incRefInternal(); diff --git a/opencl/source/mem_obj/buffer.h b/opencl/source/mem_obj/buffer.h index 1f9c19429c..f5958bce1c 100644 --- a/opencl/source/mem_obj/buffer.h +++ b/opencl/source/mem_obj/buffer.h @@ -66,7 +66,6 @@ class Buffer : public MemObj { constexpr static cl_ulong maskMagic = 0xFFFFFFFFFFFFFFFFLL; constexpr static cl_ulong objectMagic = MemObj::objectMagic | 0x02; bool forceDisallowCPUCopy = false; - bool isSubBufferFromPool = false; ~Buffer() override; diff --git a/opencl/source/mem_obj/mem_obj.cpp b/opencl/source/mem_obj/mem_obj.cpp index b9f4a7cce0..c23649d2c8 100644 --- a/opencl/source/mem_obj/mem_obj.cpp +++ b/opencl/source/mem_obj/mem_obj.cpp @@ -172,11 +172,21 @@ cl_int MemObj::getMemObjectInfo(cl_mem_info paramName, break; case CL_MEM_OFFSET: + if (nullptr != this->associatedMemObject) { + if (this->getContext()->getBufferPoolAllocator().isPoolBuffer(this->associatedMemObject)) { + offset = 0; + } else { + offset -= this->associatedMemObject->getOffset(); + } + } srcParamSize = sizeof(offset); srcParam = &offset; break; case CL_MEM_ASSOCIATED_MEMOBJECT: + if (this->getContext()->getBufferPoolAllocator().isPoolBuffer(this->associatedMemObject)) { + clAssociatedMemObject = nullptr; + } srcParamSize = sizeof(clAssociatedMemObject); srcParam = &clAssociatedMemObject; break; diff --git a/opencl/test/unit_test/mem_obj/buffer_pool_alloc_tests.cpp b/opencl/test/unit_test/mem_obj/buffer_pool_alloc_tests.cpp index 182c244577..741080417d 100644 --- a/opencl/test/unit_test/mem_obj/buffer_pool_alloc_tests.cpp +++ b/opencl/test/unit_test/mem_obj/buffer_pool_alloc_tests.cpp @@ -13,7 +13,7 @@ #include "opencl/test/unit_test/mocks/mock_buffer.h" #include "opencl/test/unit_test/mocks/mock_cl_device.h" #include "opencl/test/unit_test/mocks/mock_command_queue.h" - +#include "opencl/test/unit_test/mocks/mock_kernel.h" using namespace NEO; namespace Ult { using PoolAllocator = Context::BufferPoolAllocator; @@ -21,6 +21,7 @@ using MockBufferPoolAllocator = MockContext::MockBufferPoolAllocator; template class AggregatedSmallBuffersTestTemplate : public ::testing::Test { + public: void SetUp() override { if constexpr (runSetup) { this->setUpImpl(); @@ -37,7 +38,6 @@ class AggregatedSmallBuffersTestTemplate : public ::testing::Test { this->mockMemoryManager->failInDevicePoolWithError = shouldFail; } - public: std::unique_ptr deviceFactory; MockClDevice *device; std::unique_ptr context; @@ -66,6 +66,36 @@ class AggregatedSmallBuffersTestTemplate : public ::testing::Test { } }; +class aggregatedSmallBuffersKernelTest : public AggregatedSmallBuffersTestTemplate<1, false, true> { + public: + void SetUp() override { + AggregatedSmallBuffersTestTemplate::SetUp(); + + pKernelInfo = std::make_unique(); + pKernelInfo->kernelDescriptor.kernelAttributes.simdSize = 1; + + constexpr uint32_t sizeOfPointer = sizeof(void *); + pKernelInfo->addArgBuffer(0, 0x10, sizeOfPointer); + pProgram.reset(new MockProgram(context.get(), false, toClDeviceVector(*device))); + + retVal = CL_INVALID_VALUE; + pMultiDeviceKernel.reset(MultiDeviceKernel::create(pProgram.get(), MockKernel::toKernelInfoContainer(*pKernelInfo, device->getRootDeviceIndex()), &retVal)); + pKernel = static_cast(pMultiDeviceKernel->getKernel(device->getRootDeviceIndex())); + ASSERT_NE(pKernel, nullptr); + ASSERT_EQ(retVal, CL_SUCCESS); + + pKernel->setCrossThreadData(pCrossThreadData, sizeof(pCrossThreadData)); + pKernelArg = (void **)(pKernel->getCrossThreadData() + pKernelInfo->argAsPtr(0).stateless); + }; + + std::unique_ptr pKernelInfo; + std::unique_ptr pProgram; + std::unique_ptr pMultiDeviceKernel; + MockKernel *pKernel = nullptr; + char pCrossThreadData[64]; + void **pKernelArg = nullptr; +}; + using aggregatedSmallBuffersDefaultTest = AggregatedSmallBuffersTestTemplate<-1>; TEST_F(aggregatedSmallBuffersDefaultTest, givenAggregatedSmallBuffersDefaultWhenCheckIfEnabledThenReturnFalse) { @@ -244,6 +274,29 @@ TEST_F(aggregatedSmallBuffersEnabledTest, givenAggregatedSmallBuffersEnabledAndS } } +TEST_F(aggregatedSmallBuffersKernelTest, givenBufferFromPoolWhenOffsetSubbufferIsPassedToSetKernelArgThenCorrectGpuVAIsPatched) { + std::unique_ptr unusedBuffer(Buffer::create(context.get(), flags, size, hostPtr, retVal)); + std::unique_ptr buffer(Buffer::create(context.get(), flags, size, hostPtr, retVal)); + ASSERT_EQ(retVal, CL_SUCCESS); + ASSERT_NE(buffer, nullptr); + ASSERT_GT(buffer->getOffset(), 0u); + cl_buffer_region region; + region.origin = 0xc0; + region.size = 32; + cl_int error = 0; + std::unique_ptr subBuffer(buffer->createSubBuffer(buffer->getFlags(), buffer->getFlagsIntel(), ®ion, error)); + ASSERT_NE(subBuffer, nullptr); + EXPECT_EQ(ptrOffset(buffer->getCpuAddress(), region.origin), subBuffer->getCpuAddress()); + + const auto graphicsAllocation = subBuffer->getGraphicsAllocation(device->getRootDeviceIndex()); + ASSERT_NE(graphicsAllocation, nullptr); + const auto gpuAddress = graphicsAllocation->getGpuAddress(); + EXPECT_EQ(ptrOffset(gpuAddress, buffer->getOffset() + region.origin), subBuffer->getBufferAddress(device->getRootDeviceIndex())); + + subBuffer->setArgStateless(pKernelArg, pKernelInfo->argAsPtr(0).pointerSize, device->getRootDeviceIndex(), false); + EXPECT_EQ(reinterpret_cast(gpuAddress + region.origin + buffer->getOffset()), *pKernelArg); +} + using aggregatedSmallBuffersEnabledTestFailPoolInit = AggregatedSmallBuffersTestTemplate<1, true>; TEST_F(aggregatedSmallBuffersEnabledTestFailPoolInit, givenAggregatedSmallBuffersEnabledAndSizeEqualToThresholdWhenBufferCreateCalledButPoolCreateFailedThenDoNotUsePool) { @@ -279,6 +332,7 @@ class AggregatedSmallBuffersApiTestTemplate : public ::testing::Test { clContext = clCreateContext(nullptr, 1, devices, nullptr, nullptr, &retVal); ASSERT_EQ(retVal, CL_SUCCESS); context = castToObject(clContext); + poolAllocator = static_cast(&context->getBufferPoolAllocator()); } public: @@ -290,6 +344,7 @@ class AggregatedSmallBuffersApiTestTemplate : public ::testing::Test { void *hostPtr{nullptr}; cl_context clContext{nullptr}; Context *context{nullptr}; + MockBufferPoolAllocator *poolAllocator{nullptr}; DebugManagerStateRestore restore; }; @@ -340,6 +395,73 @@ TEST_F(aggregatedSmallBuffersEnabledApiTest, givenSmallBufferWhenCreatingBufferT EXPECT_EQ(clReleaseContext(context), CL_SUCCESS); } +TEST_F(aggregatedSmallBuffersEnabledApiTest, givenSmallBufferWhenCreatingBufferWithNullPropertiesThenUsePool) { + auto contextRefCountBefore = context->getRefInternalCount(); + cl_mem smallBuffer = clCreateBufferWithProperties(clContext, nullptr, flags, size, hostPtr, &retVal); + EXPECT_EQ(retVal, CL_SUCCESS); + ASSERT_NE(smallBuffer, nullptr); + + MockBuffer *asBuffer = static_cast(smallBuffer); + EXPECT_TRUE(asBuffer->isSubBuffer()); + Buffer *parentBuffer = static_cast(asBuffer->associatedMemObject); + EXPECT_EQ(2, parentBuffer->getRefInternalCount()); + MockBufferPoolAllocator *mockBufferPoolAllocator = static_cast(&context->getBufferPoolAllocator()); + EXPECT_EQ(parentBuffer, mockBufferPoolAllocator->mainStorage); + + retVal = clReleaseMemObject(smallBuffer); + EXPECT_EQ(retVal, CL_SUCCESS); + + EXPECT_EQ(context->getRefInternalCount(), contextRefCountBefore); + + EXPECT_EQ(clReleaseContext(context), CL_SUCCESS); +} + +TEST_F(aggregatedSmallBuffersEnabledApiTest, givenSmallBufferWhenCreatingBufferWithEmptyPropertiesThenUsePool) { + auto contextRefCountBefore = context->getRefInternalCount(); + cl_mem_properties memProperties{}; + cl_mem smallBuffer = clCreateBufferWithProperties(clContext, &memProperties, flags, size, hostPtr, &retVal); + EXPECT_EQ(retVal, CL_SUCCESS); + ASSERT_NE(smallBuffer, nullptr); + + MockBuffer *asBuffer = static_cast(smallBuffer); + EXPECT_TRUE(asBuffer->isSubBuffer()); + Buffer *parentBuffer = static_cast(asBuffer->associatedMemObject); + EXPECT_EQ(2, parentBuffer->getRefInternalCount()); + MockBufferPoolAllocator *mockBufferPoolAllocator = static_cast(&context->getBufferPoolAllocator()); + EXPECT_EQ(parentBuffer, mockBufferPoolAllocator->mainStorage); + + retVal = clReleaseMemObject(smallBuffer); + EXPECT_EQ(retVal, CL_SUCCESS); + + EXPECT_EQ(context->getRefInternalCount(), contextRefCountBefore); + + EXPECT_EQ(clReleaseContext(context), CL_SUCCESS); +} + +TEST_F(aggregatedSmallBuffersEnabledApiTest, givenBufferFromPoolWhenGetMemObjInfoCalledThenReturnValuesLikeForNormalBuffer) { + cl_mem buffer = clCreateBuffer(clContext, flags, size, hostPtr, &retVal); + EXPECT_EQ(retVal, CL_SUCCESS); + ASSERT_NE(buffer, nullptr); + + MockBuffer *asBuffer = static_cast(buffer); + EXPECT_TRUE(asBuffer->isSubBuffer()); + + cl_mem associatedMemObj = nullptr; + retVal = clGetMemObjectInfo(buffer, CL_MEM_ASSOCIATED_MEMOBJECT, sizeof(cl_mem), &associatedMemObj, nullptr); + EXPECT_EQ(retVal, CL_SUCCESS); + EXPECT_EQ(associatedMemObj, nullptr); + + size_t offset = 1u; + retVal = clGetMemObjectInfo(buffer, CL_MEM_OFFSET, sizeof(size_t), &offset, nullptr); + EXPECT_EQ(retVal, CL_SUCCESS); + EXPECT_EQ(offset, 0u); + + retVal = clReleaseMemObject(buffer); + EXPECT_EQ(retVal, CL_SUCCESS); + + EXPECT_EQ(clReleaseContext(context), CL_SUCCESS); +} + TEST_F(aggregatedSmallBuffersEnabledApiTest, givenSubBufferNotFromPoolAndAggregatedSmallBuffersEnabledWhenReleaseMemObjectCalledThenItSucceeds) { DebugManagerStateRestore restore; DebugManager.flags.ExperimentalSmallBufferPoolAllocator.set(0); @@ -407,6 +529,7 @@ TEST_F(aggregatedSmallBuffersSubBufferApiTest, givenBufferFromPoolWhenCreateSubB ASSERT_NE(buffer, nullptr); MockBuffer *mockBuffer = static_cast(buffer); EXPECT_GT(mockBuffer->offset, 0u); + EXPECT_EQ(ptrOffset(poolAllocator->mainStorage->getCpuAddress(), mockBuffer->getOffset()), mockBuffer->getCpuAddress()); cl_buffer_region region{}; region.size = 1; @@ -415,9 +538,8 @@ TEST_F(aggregatedSmallBuffersSubBufferApiTest, givenBufferFromPoolWhenCreateSubB EXPECT_EQ(retVal, CL_SUCCESS); ASSERT_NE(subBuffer, nullptr); MockBuffer *mockSubBuffer = static_cast(subBuffer); - EXPECT_EQ(mockSubBuffer->offset, mockBuffer->offset + region.origin); - MockBufferPoolAllocator *mockBufferPoolAllocator = static_cast(&context->getBufferPoolAllocator()); - EXPECT_EQ(mockSubBuffer->associatedMemObject, mockBufferPoolAllocator->mainStorage); + EXPECT_EQ(mockSubBuffer->associatedMemObject, buffer); + EXPECT_EQ(ptrOffset(mockBuffer->getCpuAddress(), region.origin), mockSubBuffer->getCpuAddress()); retVal = clReleaseMemObject(subBuffer); EXPECT_EQ(retVal, CL_SUCCESS); @@ -431,6 +553,39 @@ TEST_F(aggregatedSmallBuffersSubBufferApiTest, givenBufferFromPoolWhenCreateSubB EXPECT_EQ(clReleaseContext(context), CL_SUCCESS); } +TEST_F(aggregatedSmallBuffersSubBufferApiTest, givenSubBufferFromBufferPoolWhenGetMemObjInfoCalledThenReturnValuesLikeForNormalSubBuffer) { + cl_mem buffer = clCreateBuffer(clContext, flags, size, hostPtr, &retVal); + EXPECT_EQ(retVal, CL_SUCCESS); + ASSERT_NE(buffer, nullptr); + MockBuffer *mockBuffer = static_cast(buffer); + ASSERT_TRUE(context->getBufferPoolAllocator().isPoolBuffer(mockBuffer->associatedMemObject)); + + cl_buffer_region region{}; + region.size = 1; + region.origin = size / 2; + cl_mem subBuffer = clCreateSubBuffer(buffer, flags, CL_BUFFER_CREATE_TYPE_REGION, ®ion, &retVal); + EXPECT_EQ(retVal, CL_SUCCESS); + ASSERT_NE(subBuffer, nullptr); + + cl_mem associatedMemObj = nullptr; + retVal = clGetMemObjectInfo(subBuffer, CL_MEM_ASSOCIATED_MEMOBJECT, sizeof(cl_mem), &associatedMemObj, nullptr); + EXPECT_EQ(retVal, CL_SUCCESS); + EXPECT_EQ(associatedMemObj, buffer); + + size_t offset = 0u; + retVal = clGetMemObjectInfo(subBuffer, CL_MEM_OFFSET, sizeof(size_t), &offset, nullptr); + EXPECT_EQ(retVal, CL_SUCCESS); + EXPECT_EQ(offset, region.origin); + + retVal = clReleaseMemObject(subBuffer); + EXPECT_EQ(retVal, CL_SUCCESS); + + retVal = clReleaseMemObject(buffer); + EXPECT_EQ(retVal, CL_SUCCESS); + + EXPECT_EQ(clReleaseContext(context), CL_SUCCESS); +} + TEST_F(aggregatedSmallBuffersSubBufferApiTest, givenBufferFromPoolWhenCreateSubBufferCalledWithRegionOutsideBufferThenItFails) { cl_mem buffer = clCreateBuffer(clContext, flags, size, hostPtr, &retVal); EXPECT_EQ(retVal, CL_SUCCESS); diff --git a/opencl/test/unit_test/mem_obj/sub_buffer_tests.cpp b/opencl/test/unit_test/mem_obj/sub_buffer_tests.cpp index d67b543a31..11462156dd 100644 --- a/opencl/test/unit_test/mem_obj/sub_buffer_tests.cpp +++ b/opencl/test/unit_test/mem_obj/sub_buffer_tests.cpp @@ -176,6 +176,25 @@ TEST_F(SubBufferTest, GivenBufferWithMemoryStorageAndNullHostPtrWhenSubBufferIsC buffer->release(); } +TEST_F(SubBufferTest, givenBufferWithNullMemoryStorageWhenSubBufferIsCreatedThenMemoryStorageIsNotOffseted) { + cl_buffer_region region = {1, 1}; + cl_int retVal = 0; + + MockBuffer *mockBuffer = static_cast(buffer); + void *savedMemoryStorage = mockBuffer->memoryStorage; + mockBuffer->memoryStorage = nullptr; + + auto subBuffer = buffer->createSubBuffer(0, 0, ®ion, retVal); + EXPECT_EQ(CL_SUCCESS, retVal); + ASSERT_NE(nullptr, subBuffer); + + MockBuffer *mockSubBuffer = static_cast(subBuffer); + EXPECT_EQ(nullptr, mockSubBuffer->memoryStorage); + + mockBuffer->memoryStorage = savedMemoryStorage; + delete subBuffer; +} + TEST_F(SubBufferTest, givenBufferWithHostPtrWhenSubbufferGetsMapPtrThenExpectBufferHostPtr) { cl_buffer_region region = {0, 16}; diff --git a/opencl/test/unit_test/mocks/mock_buffer.h b/opencl/test/unit_test/mocks/mock_buffer.h index e1d7ebf718..21fba2e341 100644 --- a/opencl/test/unit_test/mocks/mock_buffer.h +++ b/opencl/test/unit_test/mocks/mock_buffer.h @@ -51,6 +51,7 @@ class MockBuffer : public MockBufferStorage, public Buffer { using MemObj::context; using MemObj::isZeroCopy; using MemObj::memObjectType; + using MemObj::memoryStorage; using MockBufferStorage::device; void setAllocationType(uint32_t rootDeviceIndex, bool compressed) {