Prepare for pool buffer enabling 1/n

check if flags allow buffer from pool
add buffer offset to aubtests
disable pool buffer where required

Related-To: NEO-7332

Signed-off-by: Dominik Dabek <dominik.dabek@intel.com>
This commit is contained in:
Dominik Dabek 2022-12-05 14:57:08 +00:00 committed by Compute-Runtime-Automation
parent 2ceada6bef
commit 70dbce12d1
12 changed files with 62 additions and 33 deletions

View File

@ -1,5 +1,5 @@
#
# Copyright (C) 2018-2020 Intel Corporation
# Copyright (C) 2018-2022 Intel Corporation
#
# SPDX-License-Identifier: MIT
#
@ -12,6 +12,7 @@ set(RUNTIME_SRCS_CONTEXT
${CMAKE_CURRENT_SOURCE_DIR}/context_type.h
${CMAKE_CURRENT_SOURCE_DIR}/driver_diagnostics.cpp
${CMAKE_CURRENT_SOURCE_DIR}/driver_diagnostics.h
${CMAKE_CURRENT_SOURCE_DIR}${BRANCH_DIR_SUFFIX}pool_buffer_additional_checks.cpp
)
target_sources(${NEO_STATIC_LIB_NAME} PRIVATE ${RUNTIME_SRCS_CONTEXT})

View File

@ -502,6 +502,7 @@ Buffer *Context::BufferPoolAllocator::allocateBufferFromPool(const MemoryPropert
errcodeRet = CL_MEM_OBJECT_ALLOCATION_FAILURE;
if (this->isAggregatedSmallBuffersEnabled() &&
this->isSizeWithinThreshold(size) &&
this->flagsAllowBufferFromPool(flags, flagsIntel) &&
this->mainStorage) {
auto lock = std::unique_lock<std::mutex>(this->mutex);
cl_buffer_region bufferRegion{};

View File

@ -71,6 +71,8 @@ class Context : public BaseObject<_cl_context> {
bool isPoolBuffer(const MemObj *buffer) const;
bool flagsAllowBufferFromPool(const cl_mem_flags &flags, const cl_mem_flags_intel &flagsIntel) const;
protected:
inline bool isSizeWithinThreshold(size_t size) const {
return BufferPoolAllocator::smallBufferThreshold >= size;

View File

@ -0,0 +1,15 @@
/*
* Copyright (C) 2022 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include "opencl/source/context/context.h"
namespace NEO {
bool Context::BufferPoolAllocator::flagsAllowBufferFromPool(const cl_mem_flags &flags, const cl_mem_flags_intel &flagsIntel) const {
return true;
}
} // namespace NEO

View File

@ -1,5 +1,5 @@
/*
* Copyright (C) 2018-2021 Intel Corporation
* Copyright (C) 2018-2022 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@ -20,29 +20,30 @@ typedef api_tests clGetMemObjectInfoTests;
namespace ULT {
TEST_F(clGetMemObjectInfoTests, GivenValidBufferWhenGettingMemObjectInfoThenCorrectBufferSizeIsReturned) {
size_t bufferSize = 16;
TEST_F(clGetMemObjectInfoTests, givenValidBufferWhenGettingMemObjectInfoThenCorrectBufferSizeIsReturned) {
size_t requestedBufferSize = 16;
cl_mem buffer = nullptr;
buffer = clCreateBuffer(
pContext,
0,
bufferSize,
requestedBufferSize,
NULL,
&retVal);
ASSERT_EQ(CL_SUCCESS, retVal);
EXPECT_NE(nullptr, buffer);
EXPECT_EQ(CL_SUCCESS, retVal);
ASSERT_NE(nullptr, buffer);
size_t paramValue = 0;
retVal = clGetMemObjectInfo(buffer, CL_MEM_SIZE, sizeof(paramValue), &paramValue, nullptr);
ASSERT_EQ(CL_SUCCESS, retVal);
ASSERT_EQ(bufferSize, paramValue);
size_t createdBufferSize = 0;
auto asBuffer = static_cast<Buffer *>(buffer);
retVal = clGetMemObjectInfo(buffer, CL_MEM_SIZE, sizeof(createdBufferSize), &createdBufferSize, nullptr);
EXPECT_EQ(CL_SUCCESS, retVal);
EXPECT_EQ(createdBufferSize, asBuffer->getSize());
retVal = clReleaseMemObject(buffer);
EXPECT_EQ(CL_SUCCESS, retVal);
}
TEST_F(clGetMemObjectInfoTests, GivenBufferWithMappedRegionWhenGettingMemObjectInfoThenCorrectMapCountIsReturned) {
TEST_F(clGetMemObjectInfoTests, givenBufferWithMappedRegionWhenGettingMemObjectInfoThenCorrectMapCountIsReturned) {
size_t bufferSize = 16;
cl_mem buffer = nullptr;
@ -55,8 +56,8 @@ TEST_F(clGetMemObjectInfoTests, GivenBufferWithMappedRegionWhenGettingMemObjectI
bufferSize,
NULL,
&retVal);
ASSERT_EQ(CL_SUCCESS, retVal);
EXPECT_NE(nullptr, buffer);
EXPECT_EQ(CL_SUCCESS, retVal);
ASSERT_NE(nullptr, buffer);
clEnqueueMapBuffer(
cmdQ,
@ -72,8 +73,8 @@ TEST_F(clGetMemObjectInfoTests, GivenBufferWithMappedRegionWhenGettingMemObjectI
cl_uint paramValue = 0;
retVal = clGetMemObjectInfo(buffer, CL_MEM_MAP_COUNT, sizeof(paramValue), &paramValue, nullptr);
ASSERT_EQ(CL_SUCCESS, retVal);
ASSERT_EQ(1u, paramValue);
EXPECT_EQ(CL_SUCCESS, retVal);
EXPECT_EQ(1u, paramValue);
retVal = clReleaseMemObject(buffer);
EXPECT_EQ(CL_SUCCESS, retVal);
@ -81,7 +82,7 @@ TEST_F(clGetMemObjectInfoTests, GivenBufferWithMappedRegionWhenGettingMemObjectI
clReleaseCommandQueue(cmdQ);
}
TEST_F(clGetMemObjectInfoTests, GivenBufferCreatedFromSvmPointerWhenGettingMemObjectInfoThenClTrueIsReturned) {
TEST_F(clGetMemObjectInfoTests, givenBufferCreatedFromSvmPointerWhenGettingMemObjectInfoThenClTrueIsReturned) {
const ClDeviceInfo &devInfo = pDevice->getDeviceInfo();
if (devInfo.svmCapabilities != 0) {
size_t bufferSize = 64;
@ -96,14 +97,14 @@ TEST_F(clGetMemObjectInfoTests, GivenBufferCreatedFromSvmPointerWhenGettingMemOb
bufferSize,
ptr,
&retVal);
ASSERT_EQ(CL_SUCCESS, retVal);
EXPECT_NE(nullptr, buffer);
EXPECT_EQ(CL_SUCCESS, retVal);
ASSERT_NE(nullptr, buffer);
cl_bool paramValue = CL_FALSE;
retVal = clGetMemObjectInfo(buffer, CL_MEM_USES_SVM_POINTER, sizeof(paramValue), &paramValue, nullptr);
ASSERT_EQ(CL_SUCCESS, retVal);
ASSERT_EQ(static_cast<cl_bool>(CL_TRUE), paramValue);
EXPECT_EQ(CL_SUCCESS, retVal);
EXPECT_EQ(static_cast<cl_bool>(CL_TRUE), paramValue);
retVal = clReleaseMemObject(buffer);
EXPECT_EQ(CL_SUCCESS, retVal);

View File

@ -572,7 +572,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, SingleTileDualContextTest, givenSingleAllocationWhe
commandQueues[0][1]->finish(); // submit second enqueue first to make sure that residency flow is correct
commandQueues[0][0]->finish();
auto gpuPtr = reinterpret_cast<void *>(buffer->getGraphicsAllocation(rootDeviceIndex)->getGpuAddress());
auto gpuPtr = reinterpret_cast<void *>(buffer->getGraphicsAllocation(rootDeviceIndex)->getGpuAddress() + buffer->getOffset());
expectMemory<FamilyType>(gpuPtr, writePattern1, halfBufferSize, 0, 0);
expectMemory<FamilyType>(ptrOffset(gpuPtr, halfBufferSize), writePattern2, halfBufferSize, 0, 1);
}

View File

@ -47,7 +47,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, PostSyncWriteXeHPTests, givenTimestampWriteEnabledW
uint8_t writeData[bufferSize] = {1, 2, 3, 4};
cmdQ.enqueueWriteBuffer(buffer.get(), CL_TRUE, 0, bufferSize, writeData, nullptr, 0, nullptr, nullptr);
expectMemory<FamilyType>(reinterpret_cast<void *>(graphicsAllocation->getGpuAddress()), writeData, bufferSize);
expectMemory<FamilyType>(reinterpret_cast<void *>(graphicsAllocation->getGpuAddress() + buffer->getOffset()), writeData, bufferSize);
typename FamilyType::TimestampPacketType expectedTimestampValues[4] = {1, 1, 1, 1};
auto tagGpuAddress = reinterpret_cast<void *>(cmdQ.timestampPacketContainer->peekNodes().at(0)->getGpuAddress());
@ -67,7 +67,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, PostSyncWriteXeHPTests, givenDebugVariableEnabledWh
uint8_t writeData[bufferSize] = {1, 2, 3, 4};
cmdQ.enqueueWriteBuffer(buffer.get(), CL_TRUE, 0, bufferSize, writeData, nullptr, 0, nullptr, nullptr);
expectMemory<FamilyType>(reinterpret_cast<void *>(graphicsAllocation->getGpuAddress()), writeData, bufferSize);
expectMemory<FamilyType>(reinterpret_cast<void *>(graphicsAllocation->getGpuAddress() + buffer->getOffset()), writeData, bufferSize);
auto tagGpuAddress = reinterpret_cast<void *>(cmdQ.timestampPacketContainer->peekNodes().at(0)->getGpuAddress());
@ -105,7 +105,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, PostSyncWriteXeHPTests, givenTwoBatchedEnqueuesWhen
pCmdQ->enqueueWriteBuffer(buffer.get(), CL_TRUE, 0, bufferSize, writePattern2, nullptr, 0, nullptr, &outEvent2);
auto node2 = castToObject<Event>(outEvent2)->getTimestampPacketNodes()->peekNodes().at(0);
expectMemory<FamilyType>(reinterpret_cast<void *>(buffer->getGraphicsAllocation(pClDevice->getRootDeviceIndex())->getGpuAddress()), writePattern2, bufferSize);
expectMemory<FamilyType>(reinterpret_cast<void *>(buffer->getGraphicsAllocation(pClDevice->getRootDeviceIndex())->getGpuAddress() + buffer->getOffset()), writePattern2, bufferSize);
typename FamilyType::TimestampPacketType expectedEndTimestamp = 1;
auto endTimestampAddress1 = TimestampPacketHelper::getContextEndGpuAddress(*node1);

View File

@ -165,6 +165,10 @@ class AUBFixture : public CommandQueueHwFixture {
return reinterpret_cast<void *>(allocation->getGpuAddress());
}
static void *getGpuPointer(GraphicsAllocation *allocation, size_t offset) {
return reinterpret_cast<void *>(allocation->getGpuAddress() + offset);
}
const uint32_t rootDeviceIndex = 0;
CommandStreamReceiver *csr = nullptr;
volatile uint32_t *pTagMemory = nullptr;

View File

@ -529,9 +529,9 @@ XE_HP_CORE_TEST_F(StatelessCompressionInSBAWithBCS, GENERATEONLY_givenCompressed
commandQueues[0][0]->finish();
expectMemoryNotEqual<FamilyType>(AUBFixture::getGpuPointer(compressedAllocation), writePattern, bufferSize, 0, 0);
expectMemoryNotEqual<FamilyType>(AUBFixture::getGpuPointer(compressedAllocation, compressedBuffer->getOffset()), writePattern, bufferSize, 0, 0);
expectMemory<FamilyType>(AUBFixture::getGpuPointer(unCompressedAllocation), writePattern, bufferSize, 0, 0);
expectMemory<FamilyType>(AUBFixture::getGpuPointer(unCompressedAllocation, unCompressedBuffer->getOffset()), writePattern, bufferSize, 0, 0);
}
XE_HP_CORE_TEST_F(StatelessCompressionInSBAWithBCS, givenUncompressibleBufferInHostMemoryWhenAccessedStatelesslyThenDisableCompressionInSBA) {
@ -565,7 +565,7 @@ XE_HP_CORE_TEST_F(StatelessCompressionInSBAWithBCS, givenUncompressibleBufferInH
commandQueues[0][0]->finish();
expectMemoryNotEqual<FamilyType>(AUBFixture::getGpuPointer(compressedAllocation), writePattern, bufferSize, 0, 0);
expectMemoryNotEqual<FamilyType>(AUBFixture::getGpuPointer(compressedAllocation, compressedBuffer->getOffset()), writePattern, bufferSize, 0, 0);
expectMemory<FamilyType>(AUBFixture::getGpuPointer(uncompressibleAllocationInHostMemory), writePattern, bufferSize, 0, 0);
expectMemory<FamilyType>(AUBFixture::getGpuPointer(uncompressibleAllocationInHostMemory, uncompressibleBufferInHostMemory->getOffset()), writePattern, bufferSize, 0, 0);
}

View File

@ -537,9 +537,9 @@ XE_HPG_CORETEST_F(XeHpgCoreStatelessCompressionInSBAWithBCS, GENERATEONLY_givenC
commandQueues[0][0]->finish();
expectMemoryNotEqual<FamilyType>(AUBFixture::getGpuPointer(compressedAllocation), writePattern, bufferSize, 0, 0);
expectMemoryNotEqual<FamilyType>(AUBFixture::getGpuPointer(compressedAllocation, compressedBuffer->getOffset()), writePattern, bufferSize, 0, 0);
expectMemory<FamilyType>(AUBFixture::getGpuPointer(unCompressedAllocation), writePattern, bufferSize, 0, 0);
expectMemory<FamilyType>(AUBFixture::getGpuPointer(unCompressedAllocation, unCompressedBuffer->getOffset()), writePattern, bufferSize, 0, 0);
}
XE_HPG_CORETEST_F(XeHpgCoreStatelessCompressionInSBAWithBCS, givenUncompressibleBufferInHostMemoryWhenAccessedStatelesslyThenDisableCompressionInSBA) {
@ -573,7 +573,7 @@ XE_HPG_CORETEST_F(XeHpgCoreStatelessCompressionInSBAWithBCS, givenUncompressible
commandQueues[0][0]->finish();
expectMemoryNotEqual<FamilyType>(AUBFixture::getGpuPointer(compressedAllocation), writePattern, bufferSize, 0, 0);
expectMemoryNotEqual<FamilyType>(AUBFixture::getGpuPointer(compressedAllocation, compressedBuffer->getOffset()), writePattern, bufferSize, 0, 0);
expectMemory<FamilyType>(AUBFixture::getGpuPointer(uncompressibleAllocationInHostMemory), writePattern, bufferSize, 0, 0);
expectMemory<FamilyType>(AUBFixture::getGpuPointer(uncompressibleAllocationInHostMemory, uncompressibleBufferInHostMemory->getOffset()), writePattern, bufferSize, 0, 0);
}

View File

@ -6,6 +6,7 @@
*/
#include "shared/test/common/fixtures/memory_management_fixture.h"
#include "shared/test/common/helpers/debug_manager_state_restore.h"
#include "shared/test/common/test_macros/test.h"
#include "opencl/source/context/context.h"
@ -24,6 +25,8 @@ using namespace NEO;
typedef Test<MemoryManagementFixture> ContextFailureInjection;
TEST_F(ContextFailureInjection, GivenFailedAllocationInjectionWhenCreatingContextThenOutOfHostMemoryErrorIsReturned) {
DebugManagerStateRestore restorer;
DebugManager.flags.ExperimentalSmallBufferPoolAllocator.set(0); //failing to allocate pool buffer is non-critical
auto device = std::make_unique<MockClDevice>(MockDevice::createWithNewExecutionEnvironment<MockDevice>(nullptr));
cl_device_id deviceID = device.get();

View File

@ -703,6 +703,8 @@ HWTEST2_F(PerformanceHintTest, given64bitCompressedBufferWhenItsCreatedThenPrope
}
TEST_F(PerformanceHintTest, givenUncompressedBufferWhenItsCreatedThenProperPerformanceHintIsProvided) {
DebugManagerStateRestore restorer;
DebugManager.flags.ExperimentalSmallBufferPoolAllocator.set(0); // pool buffer will not provide performance hints
cl_int retVal;
HardwareInfo hwInfo = context->getDevice(0)->getHardwareInfo();
hwInfo.capabilityTable.ftrRenderCompressedBuffers = true;