diff --git a/opencl/source/context/CMakeLists.txt b/opencl/source/context/CMakeLists.txt index 961ee22400..1323d8857f 100644 --- a/opencl/source/context/CMakeLists.txt +++ b/opencl/source/context/CMakeLists.txt @@ -1,5 +1,5 @@ # -# Copyright (C) 2018-2020 Intel Corporation +# Copyright (C) 2018-2022 Intel Corporation # # SPDX-License-Identifier: MIT # @@ -12,6 +12,7 @@ set(RUNTIME_SRCS_CONTEXT ${CMAKE_CURRENT_SOURCE_DIR}/context_type.h ${CMAKE_CURRENT_SOURCE_DIR}/driver_diagnostics.cpp ${CMAKE_CURRENT_SOURCE_DIR}/driver_diagnostics.h + ${CMAKE_CURRENT_SOURCE_DIR}${BRANCH_DIR_SUFFIX}pool_buffer_additional_checks.cpp ) target_sources(${NEO_STATIC_LIB_NAME} PRIVATE ${RUNTIME_SRCS_CONTEXT}) diff --git a/opencl/source/context/context.cpp b/opencl/source/context/context.cpp index 3287c38d17..20297229d4 100644 --- a/opencl/source/context/context.cpp +++ b/opencl/source/context/context.cpp @@ -502,6 +502,7 @@ Buffer *Context::BufferPoolAllocator::allocateBufferFromPool(const MemoryPropert errcodeRet = CL_MEM_OBJECT_ALLOCATION_FAILURE; if (this->isAggregatedSmallBuffersEnabled() && this->isSizeWithinThreshold(size) && + this->flagsAllowBufferFromPool(flags, flagsIntel) && this->mainStorage) { auto lock = std::unique_lock(this->mutex); cl_buffer_region bufferRegion{}; diff --git a/opencl/source/context/context.h b/opencl/source/context/context.h index 778ae2dd28..e684dd3edb 100644 --- a/opencl/source/context/context.h +++ b/opencl/source/context/context.h @@ -71,6 +71,8 @@ class Context : public BaseObject<_cl_context> { bool isPoolBuffer(const MemObj *buffer) const; + bool flagsAllowBufferFromPool(const cl_mem_flags &flags, const cl_mem_flags_intel &flagsIntel) const; + protected: inline bool isSizeWithinThreshold(size_t size) const { return BufferPoolAllocator::smallBufferThreshold >= size; diff --git a/opencl/source/context/pool_buffer_additional_checks.cpp b/opencl/source/context/pool_buffer_additional_checks.cpp new file mode 100644 index 0000000000..6a6b9d7e92 --- /dev/null +++ b/opencl/source/context/pool_buffer_additional_checks.cpp @@ -0,0 +1,15 @@ +/* + * Copyright (C) 2022 Intel Corporation + * + * SPDX-License-Identifier: MIT + * + */ + +#include "opencl/source/context/context.h" + +namespace NEO { +bool Context::BufferPoolAllocator::flagsAllowBufferFromPool(const cl_mem_flags &flags, const cl_mem_flags_intel &flagsIntel) const { + return true; +} + +} // namespace NEO \ No newline at end of file diff --git a/opencl/test/unit_test/api/cl_get_mem_object_info_tests.inl b/opencl/test/unit_test/api/cl_get_mem_object_info_tests.inl index 7d9627d058..0fd5a9e898 100644 --- a/opencl/test/unit_test/api/cl_get_mem_object_info_tests.inl +++ b/opencl/test/unit_test/api/cl_get_mem_object_info_tests.inl @@ -1,5 +1,5 @@ /* - * Copyright (C) 2018-2021 Intel Corporation + * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -20,29 +20,30 @@ typedef api_tests clGetMemObjectInfoTests; namespace ULT { -TEST_F(clGetMemObjectInfoTests, GivenValidBufferWhenGettingMemObjectInfoThenCorrectBufferSizeIsReturned) { - size_t bufferSize = 16; +TEST_F(clGetMemObjectInfoTests, givenValidBufferWhenGettingMemObjectInfoThenCorrectBufferSizeIsReturned) { + size_t requestedBufferSize = 16; cl_mem buffer = nullptr; buffer = clCreateBuffer( pContext, 0, - bufferSize, + requestedBufferSize, NULL, &retVal); - ASSERT_EQ(CL_SUCCESS, retVal); - EXPECT_NE(nullptr, buffer); + EXPECT_EQ(CL_SUCCESS, retVal); + ASSERT_NE(nullptr, buffer); - size_t paramValue = 0; - retVal = clGetMemObjectInfo(buffer, CL_MEM_SIZE, sizeof(paramValue), ¶mValue, nullptr); - ASSERT_EQ(CL_SUCCESS, retVal); - ASSERT_EQ(bufferSize, paramValue); + size_t createdBufferSize = 0; + auto asBuffer = static_cast(buffer); + retVal = clGetMemObjectInfo(buffer, CL_MEM_SIZE, sizeof(createdBufferSize), &createdBufferSize, nullptr); + EXPECT_EQ(CL_SUCCESS, retVal); + EXPECT_EQ(createdBufferSize, asBuffer->getSize()); retVal = clReleaseMemObject(buffer); EXPECT_EQ(CL_SUCCESS, retVal); } -TEST_F(clGetMemObjectInfoTests, GivenBufferWithMappedRegionWhenGettingMemObjectInfoThenCorrectMapCountIsReturned) { +TEST_F(clGetMemObjectInfoTests, givenBufferWithMappedRegionWhenGettingMemObjectInfoThenCorrectMapCountIsReturned) { size_t bufferSize = 16; cl_mem buffer = nullptr; @@ -55,8 +56,8 @@ TEST_F(clGetMemObjectInfoTests, GivenBufferWithMappedRegionWhenGettingMemObjectI bufferSize, NULL, &retVal); - ASSERT_EQ(CL_SUCCESS, retVal); - EXPECT_NE(nullptr, buffer); + EXPECT_EQ(CL_SUCCESS, retVal); + ASSERT_NE(nullptr, buffer); clEnqueueMapBuffer( cmdQ, @@ -72,8 +73,8 @@ TEST_F(clGetMemObjectInfoTests, GivenBufferWithMappedRegionWhenGettingMemObjectI cl_uint paramValue = 0; retVal = clGetMemObjectInfo(buffer, CL_MEM_MAP_COUNT, sizeof(paramValue), ¶mValue, nullptr); - ASSERT_EQ(CL_SUCCESS, retVal); - ASSERT_EQ(1u, paramValue); + EXPECT_EQ(CL_SUCCESS, retVal); + EXPECT_EQ(1u, paramValue); retVal = clReleaseMemObject(buffer); EXPECT_EQ(CL_SUCCESS, retVal); @@ -81,7 +82,7 @@ TEST_F(clGetMemObjectInfoTests, GivenBufferWithMappedRegionWhenGettingMemObjectI clReleaseCommandQueue(cmdQ); } -TEST_F(clGetMemObjectInfoTests, GivenBufferCreatedFromSvmPointerWhenGettingMemObjectInfoThenClTrueIsReturned) { +TEST_F(clGetMemObjectInfoTests, givenBufferCreatedFromSvmPointerWhenGettingMemObjectInfoThenClTrueIsReturned) { const ClDeviceInfo &devInfo = pDevice->getDeviceInfo(); if (devInfo.svmCapabilities != 0) { size_t bufferSize = 64; @@ -96,14 +97,14 @@ TEST_F(clGetMemObjectInfoTests, GivenBufferCreatedFromSvmPointerWhenGettingMemOb bufferSize, ptr, &retVal); - ASSERT_EQ(CL_SUCCESS, retVal); - EXPECT_NE(nullptr, buffer); + EXPECT_EQ(CL_SUCCESS, retVal); + ASSERT_NE(nullptr, buffer); cl_bool paramValue = CL_FALSE; retVal = clGetMemObjectInfo(buffer, CL_MEM_USES_SVM_POINTER, sizeof(paramValue), ¶mValue, nullptr); - ASSERT_EQ(CL_SUCCESS, retVal); - ASSERT_EQ(static_cast(CL_TRUE), paramValue); + EXPECT_EQ(CL_SUCCESS, retVal); + EXPECT_EQ(static_cast(CL_TRUE), paramValue); retVal = clReleaseMemObject(buffer); EXPECT_EQ(CL_SUCCESS, retVal); diff --git a/opencl/test/unit_test/aub_tests/command_queue/aub_multicontext_tests_xehp_and_later.cpp b/opencl/test/unit_test/aub_tests/command_queue/aub_multicontext_tests_xehp_and_later.cpp index c933e7e2ba..68d35b35c5 100644 --- a/opencl/test/unit_test/aub_tests/command_queue/aub_multicontext_tests_xehp_and_later.cpp +++ b/opencl/test/unit_test/aub_tests/command_queue/aub_multicontext_tests_xehp_and_later.cpp @@ -572,7 +572,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, SingleTileDualContextTest, givenSingleAllocationWhe commandQueues[0][1]->finish(); // submit second enqueue first to make sure that residency flow is correct commandQueues[0][0]->finish(); - auto gpuPtr = reinterpret_cast(buffer->getGraphicsAllocation(rootDeviceIndex)->getGpuAddress()); + auto gpuPtr = reinterpret_cast(buffer->getGraphicsAllocation(rootDeviceIndex)->getGpuAddress() + buffer->getOffset()); expectMemory(gpuPtr, writePattern1, halfBufferSize, 0, 0); expectMemory(ptrOffset(gpuPtr, halfBufferSize), writePattern2, halfBufferSize, 0, 1); } diff --git a/opencl/test/unit_test/aub_tests/command_queue/aub_postsync_write_tests_xehp_and_later.cpp b/opencl/test/unit_test/aub_tests/command_queue/aub_postsync_write_tests_xehp_and_later.cpp index 8fc38a4d8d..3135bc6fe2 100644 --- a/opencl/test/unit_test/aub_tests/command_queue/aub_postsync_write_tests_xehp_and_later.cpp +++ b/opencl/test/unit_test/aub_tests/command_queue/aub_postsync_write_tests_xehp_and_later.cpp @@ -47,7 +47,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, PostSyncWriteXeHPTests, givenTimestampWriteEnabledW uint8_t writeData[bufferSize] = {1, 2, 3, 4}; cmdQ.enqueueWriteBuffer(buffer.get(), CL_TRUE, 0, bufferSize, writeData, nullptr, 0, nullptr, nullptr); - expectMemory(reinterpret_cast(graphicsAllocation->getGpuAddress()), writeData, bufferSize); + expectMemory(reinterpret_cast(graphicsAllocation->getGpuAddress() + buffer->getOffset()), writeData, bufferSize); typename FamilyType::TimestampPacketType expectedTimestampValues[4] = {1, 1, 1, 1}; auto tagGpuAddress = reinterpret_cast(cmdQ.timestampPacketContainer->peekNodes().at(0)->getGpuAddress()); @@ -67,7 +67,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, PostSyncWriteXeHPTests, givenDebugVariableEnabledWh uint8_t writeData[bufferSize] = {1, 2, 3, 4}; cmdQ.enqueueWriteBuffer(buffer.get(), CL_TRUE, 0, bufferSize, writeData, nullptr, 0, nullptr, nullptr); - expectMemory(reinterpret_cast(graphicsAllocation->getGpuAddress()), writeData, bufferSize); + expectMemory(reinterpret_cast(graphicsAllocation->getGpuAddress() + buffer->getOffset()), writeData, bufferSize); auto tagGpuAddress = reinterpret_cast(cmdQ.timestampPacketContainer->peekNodes().at(0)->getGpuAddress()); @@ -105,7 +105,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, PostSyncWriteXeHPTests, givenTwoBatchedEnqueuesWhen pCmdQ->enqueueWriteBuffer(buffer.get(), CL_TRUE, 0, bufferSize, writePattern2, nullptr, 0, nullptr, &outEvent2); auto node2 = castToObject(outEvent2)->getTimestampPacketNodes()->peekNodes().at(0); - expectMemory(reinterpret_cast(buffer->getGraphicsAllocation(pClDevice->getRootDeviceIndex())->getGpuAddress()), writePattern2, bufferSize); + expectMemory(reinterpret_cast(buffer->getGraphicsAllocation(pClDevice->getRootDeviceIndex())->getGpuAddress() + buffer->getOffset()), writePattern2, bufferSize); typename FamilyType::TimestampPacketType expectedEndTimestamp = 1; auto endTimestampAddress1 = TimestampPacketHelper::getContextEndGpuAddress(*node1); diff --git a/opencl/test/unit_test/aub_tests/fixtures/aub_fixture.h b/opencl/test/unit_test/aub_tests/fixtures/aub_fixture.h index d070decfbf..cd3700dcd7 100644 --- a/opencl/test/unit_test/aub_tests/fixtures/aub_fixture.h +++ b/opencl/test/unit_test/aub_tests/fixtures/aub_fixture.h @@ -165,6 +165,10 @@ class AUBFixture : public CommandQueueHwFixture { return reinterpret_cast(allocation->getGpuAddress()); } + static void *getGpuPointer(GraphicsAllocation *allocation, size_t offset) { + return reinterpret_cast(allocation->getGpuAddress() + offset); + } + const uint32_t rootDeviceIndex = 0; CommandStreamReceiver *csr = nullptr; volatile uint32_t *pTagMemory = nullptr; diff --git a/opencl/test/unit_test/aub_tests/xe_hp_core/aub_tests_stateless_compression_in_sba_xe_hp_core.cpp b/opencl/test/unit_test/aub_tests/xe_hp_core/aub_tests_stateless_compression_in_sba_xe_hp_core.cpp index 4e19e65818..cbdb9cd6aa 100644 --- a/opencl/test/unit_test/aub_tests/xe_hp_core/aub_tests_stateless_compression_in_sba_xe_hp_core.cpp +++ b/opencl/test/unit_test/aub_tests/xe_hp_core/aub_tests_stateless_compression_in_sba_xe_hp_core.cpp @@ -529,9 +529,9 @@ XE_HP_CORE_TEST_F(StatelessCompressionInSBAWithBCS, GENERATEONLY_givenCompressed commandQueues[0][0]->finish(); - expectMemoryNotEqual(AUBFixture::getGpuPointer(compressedAllocation), writePattern, bufferSize, 0, 0); + expectMemoryNotEqual(AUBFixture::getGpuPointer(compressedAllocation, compressedBuffer->getOffset()), writePattern, bufferSize, 0, 0); - expectMemory(AUBFixture::getGpuPointer(unCompressedAllocation), writePattern, bufferSize, 0, 0); + expectMemory(AUBFixture::getGpuPointer(unCompressedAllocation, unCompressedBuffer->getOffset()), writePattern, bufferSize, 0, 0); } XE_HP_CORE_TEST_F(StatelessCompressionInSBAWithBCS, givenUncompressibleBufferInHostMemoryWhenAccessedStatelesslyThenDisableCompressionInSBA) { @@ -565,7 +565,7 @@ XE_HP_CORE_TEST_F(StatelessCompressionInSBAWithBCS, givenUncompressibleBufferInH commandQueues[0][0]->finish(); - expectMemoryNotEqual(AUBFixture::getGpuPointer(compressedAllocation), writePattern, bufferSize, 0, 0); + expectMemoryNotEqual(AUBFixture::getGpuPointer(compressedAllocation, compressedBuffer->getOffset()), writePattern, bufferSize, 0, 0); - expectMemory(AUBFixture::getGpuPointer(uncompressibleAllocationInHostMemory), writePattern, bufferSize, 0, 0); + expectMemory(AUBFixture::getGpuPointer(uncompressibleAllocationInHostMemory, uncompressibleBufferInHostMemory->getOffset()), writePattern, bufferSize, 0, 0); } diff --git a/opencl/test/unit_test/aub_tests/xe_hpg_core/aub_tests_stateless_compression_in_sba_xe_hpg_core.cpp b/opencl/test/unit_test/aub_tests/xe_hpg_core/aub_tests_stateless_compression_in_sba_xe_hpg_core.cpp index 07684f7248..3ca67af786 100644 --- a/opencl/test/unit_test/aub_tests/xe_hpg_core/aub_tests_stateless_compression_in_sba_xe_hpg_core.cpp +++ b/opencl/test/unit_test/aub_tests/xe_hpg_core/aub_tests_stateless_compression_in_sba_xe_hpg_core.cpp @@ -537,9 +537,9 @@ XE_HPG_CORETEST_F(XeHpgCoreStatelessCompressionInSBAWithBCS, GENERATEONLY_givenC commandQueues[0][0]->finish(); - expectMemoryNotEqual(AUBFixture::getGpuPointer(compressedAllocation), writePattern, bufferSize, 0, 0); + expectMemoryNotEqual(AUBFixture::getGpuPointer(compressedAllocation, compressedBuffer->getOffset()), writePattern, bufferSize, 0, 0); - expectMemory(AUBFixture::getGpuPointer(unCompressedAllocation), writePattern, bufferSize, 0, 0); + expectMemory(AUBFixture::getGpuPointer(unCompressedAllocation, unCompressedBuffer->getOffset()), writePattern, bufferSize, 0, 0); } XE_HPG_CORETEST_F(XeHpgCoreStatelessCompressionInSBAWithBCS, givenUncompressibleBufferInHostMemoryWhenAccessedStatelesslyThenDisableCompressionInSBA) { @@ -573,7 +573,7 @@ XE_HPG_CORETEST_F(XeHpgCoreStatelessCompressionInSBAWithBCS, givenUncompressible commandQueues[0][0]->finish(); - expectMemoryNotEqual(AUBFixture::getGpuPointer(compressedAllocation), writePattern, bufferSize, 0, 0); + expectMemoryNotEqual(AUBFixture::getGpuPointer(compressedAllocation, compressedBuffer->getOffset()), writePattern, bufferSize, 0, 0); - expectMemory(AUBFixture::getGpuPointer(uncompressibleAllocationInHostMemory), writePattern, bufferSize, 0, 0); + expectMemory(AUBFixture::getGpuPointer(uncompressibleAllocationInHostMemory, uncompressibleBufferInHostMemory->getOffset()), writePattern, bufferSize, 0, 0); } diff --git a/opencl/test/unit_test/context/context_negative_tests.cpp b/opencl/test/unit_test/context/context_negative_tests.cpp index c4259f7065..638a1ed1e8 100644 --- a/opencl/test/unit_test/context/context_negative_tests.cpp +++ b/opencl/test/unit_test/context/context_negative_tests.cpp @@ -6,6 +6,7 @@ */ #include "shared/test/common/fixtures/memory_management_fixture.h" +#include "shared/test/common/helpers/debug_manager_state_restore.h" #include "shared/test/common/test_macros/test.h" #include "opencl/source/context/context.h" @@ -24,6 +25,8 @@ using namespace NEO; typedef Test ContextFailureInjection; TEST_F(ContextFailureInjection, GivenFailedAllocationInjectionWhenCreatingContextThenOutOfHostMemoryErrorIsReturned) { + DebugManagerStateRestore restorer; + DebugManager.flags.ExperimentalSmallBufferPoolAllocator.set(0); //failing to allocate pool buffer is non-critical auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(nullptr)); cl_device_id deviceID = device.get(); diff --git a/opencl/test/unit_test/context/driver_diagnostics_tests.cpp b/opencl/test/unit_test/context/driver_diagnostics_tests.cpp index 5cada22c40..2a42b2330c 100644 --- a/opencl/test/unit_test/context/driver_diagnostics_tests.cpp +++ b/opencl/test/unit_test/context/driver_diagnostics_tests.cpp @@ -703,6 +703,8 @@ HWTEST2_F(PerformanceHintTest, given64bitCompressedBufferWhenItsCreatedThenPrope } TEST_F(PerformanceHintTest, givenUncompressedBufferWhenItsCreatedThenProperPerformanceHintIsProvided) { + DebugManagerStateRestore restorer; + DebugManager.flags.ExperimentalSmallBufferPoolAllocator.set(0); // pool buffer will not provide performance hints cl_int retVal; HardwareInfo hwInfo = context->getDevice(0)->getHardwareInfo(); hwInfo.capabilityTable.ftrRenderCompressedBuffers = true;