Add additional create buffer arguments

Allow to: disable performance hints, make allocation lockable

Used in BufferPoolAllocator

Related-To: NEO-7332

Signed-off-by: Dominik Dabek <dominik.dabek@intel.com>
This commit is contained in:
Dominik Dabek
2022-11-21 11:03:05 +00:00
committed by Compute-Runtime-Automation
parent 50df7f430e
commit 67bfebb25e
12 changed files with 125 additions and 26 deletions

View File

@ -475,10 +475,14 @@ bool Context::isSingleDeviceContext() {
void Context::BufferPoolAllocator::initAggregatedSmallBuffers(Context *context) {
static constexpr cl_mem_flags flags{};
[[maybe_unused]] cl_int errcodeRet{};
Buffer::AdditionalBufferCreateArgs bufferCreateArgs{};
bufferCreateArgs.doNotProvidePerformanceHints = true;
bufferCreateArgs.makeAllocationLockable = true;
this->mainStorage = Buffer::create(context,
flags,
BufferPoolAllocator::aggregatedSmallBuffersPoolSize,
nullptr,
bufferCreateArgs,
errcodeRet);
if (this->mainStorage) {
this->chunkAllocator.reset(new HeapAllocator(BufferPoolAllocator::startingOffset,

View File

@ -50,6 +50,7 @@ class Context : public BaseObject<_cl_context> {
static constexpr auto startingOffset = chunkAlignment;
static_assert(aggregatedSmallBuffersPoolSize > smallBufferThreshold, "Largest allowed buffer needs to fit in pool");
Buffer *allocateBufferFromPool(const MemoryProperties &memoryProperties,
cl_mem_flags flags,
cl_mem_flags_intel flagsIntel,

View File

@ -170,9 +170,20 @@ Buffer *Buffer::create(Context *context,
cl_mem_flags flags,
size_t size,
void *hostPtr,
AdditionalBufferCreateArgs &bufferCreateArgs,
cl_int &errcodeRet) {
return create(context, ClMemoryPropertiesHelper::createMemoryProperties(flags, 0, 0, &context->getDevice(0)->getDevice()),
flags, 0, size, hostPtr, errcodeRet);
flags, 0, size, hostPtr, bufferCreateArgs, errcodeRet);
}
Buffer *Buffer::create(Context *context,
cl_mem_flags flags,
size_t size,
void *hostPtr,
cl_int &errcodeRet) {
AdditionalBufferCreateArgs bufferCreateArgs{};
return create(context, ClMemoryPropertiesHelper::createMemoryProperties(flags, 0, 0, &context->getDevice(0)->getDevice()),
flags, 0, size, hostPtr, bufferCreateArgs, errcodeRet);
}
bool inline copyHostPointer(Buffer *buffer,
@ -233,6 +244,18 @@ Buffer *Buffer::create(Context *context,
size_t size,
void *hostPtr,
cl_int &errcodeRet) {
AdditionalBufferCreateArgs bufferCreateArgs{};
return create(context, memoryProperties, flags, flagsIntel, size, hostPtr, bufferCreateArgs, errcodeRet);
}
Buffer *Buffer::create(Context *context,
const MemoryProperties &memoryProperties,
cl_mem_flags flags,
cl_mem_flags_intel flagsIntel,
size_t size,
void *hostPtr,
AdditionalBufferCreateArgs &bufferCreateArgs,
cl_int &errcodeRet) {
errcodeRet = CL_SUCCESS;
Context::BufferPoolAllocator &bufferPoolAllocator = context->getBufferPoolAllocator();
@ -355,7 +378,7 @@ Buffer *Buffer::create(Context *context,
allocationInfo.allocateMemory = false;
}
if (hostPtr && context->isProvidingPerformanceHints()) {
if (!bufferCreateArgs.doNotProvidePerformanceHints && hostPtr && context->isProvidingPerformanceHints()) {
if (allocationInfo.zeroCopyAllowed) {
context->providePerformanceHint(CL_CONTEXT_DIAGNOSTICS_LEVEL_GOOD_INTEL, CL_BUFFER_MEETS_ALIGNMENT_RESTRICTIONS, hostPtr, size);
} else {
@ -367,7 +390,7 @@ Buffer *Buffer::create(Context *context,
allocationInfo.zeroCopyAllowed = false;
}
if (allocationInfo.allocateMemory && context->isProvidingPerformanceHints()) {
if (!bufferCreateArgs.doNotProvidePerformanceHints && allocationInfo.allocateMemory && context->isProvidingPerformanceHints()) {
context->providePerformanceHint(CL_CONTEXT_DIAGNOSTICS_LEVEL_GOOD_INTEL, CL_BUFFER_NEEDS_ALLOCATE_MEMORY);
}
@ -381,6 +404,7 @@ Buffer *Buffer::create(Context *context,
*hwInfo, context->getDeviceBitfieldForAllocation(rootDeviceIndex), context->isSingleDeviceContext());
allocProperties.flags.crossRootDeviceAccess = context->getRootDeviceIndices().size() > 1;
allocProperties.flags.preferCompressed = compressionEnabled;
allocProperties.makeDeviceBufferLockable = bufferCreateArgs.makeAllocationLockable;
if (allocationCpuPtr) {
allocationInfo.memory = memoryManager->createGraphicsAllocationFromExistingStorage(allocProperties, allocationCpuPtr, multiGraphicsAllocation);

View File

@ -57,6 +57,10 @@ extern ValidateInputAndCreateBufferFunc validateInputAndCreateBuffer;
class Buffer : public MemObj {
public:
struct AdditionalBufferCreateArgs {
bool doNotProvidePerformanceHints;
bool makeAllocationLockable;
};
constexpr static size_t maxBufferSizeForReadWriteOnCpu = 10 * MB;
constexpr static size_t maxBufferSizeForCopyOnCpu = 64 * KB;
constexpr static cl_ulong maskMagic = 0xFFFFFFFFFFFFFFFFLL;
@ -80,6 +84,13 @@ class Buffer : public MemObj {
void *hostPtr,
cl_int &errcodeRet);
static Buffer *create(Context *context,
cl_mem_flags flags,
size_t size,
void *hostPtr,
AdditionalBufferCreateArgs &bufferCreateArgs,
cl_int &errcodeRet);
static Buffer *create(Context *context,
const MemoryProperties &properties,
cl_mem_flags flags,
@ -88,6 +99,15 @@ class Buffer : public MemObj {
void *hostPtr,
cl_int &errcodeRet);
static Buffer *create(Context *context,
const MemoryProperties &properties,
cl_mem_flags flags,
cl_mem_flags_intel flagsIntel,
size_t size,
void *hostPtr,
AdditionalBufferCreateArgs &bufferCreateArgs,
cl_int &errcodeRet);
static Buffer *createSharedBuffer(Context *context,
cl_mem_flags flags,
SharingHandler *sharingHandler,

View File

@ -91,19 +91,23 @@ TEST_P(PerformanceHintBufferTest, GivenHostPtrAndSizeAlignmentsWhenBufferIsCreat
flags |= CL_MEM_FORCE_HOST_MEMORY_INTEL;
}
Buffer::AdditionalBufferCreateArgs bufferCreateArgs{};
bufferCreateArgs.doNotProvidePerformanceHints = !providePerformanceHint;
buffer = Buffer::create(
context,
flags,
sizeForBuffer,
(void *)addressForBuffer,
bufferCreateArgs,
retVal);
EXPECT_EQ(CL_SUCCESS, retVal);
EXPECT_NE(nullptr, buffer);
snprintf(expectedHint, DriverDiagnostics::maxHintStringSize, DriverDiagnostics::hintFormat[CL_BUFFER_DOESNT_MEET_ALIGNMENT_RESTRICTIONS], addressForBuffer, sizeForBuffer, MemoryConstants::pageSize, MemoryConstants::pageSize);
EXPECT_EQ(!(alignedSize && alignedAddress), containsHint(expectedHint, userData));
EXPECT_EQ(providePerformanceHint && !(alignedSize && alignedAddress), containsHint(expectedHint, userData));
snprintf(expectedHint, DriverDiagnostics::maxHintStringSize, DriverDiagnostics::hintFormat[CL_BUFFER_NEEDS_ALLOCATE_MEMORY], 0);
EXPECT_EQ(!(alignedSize && alignedAddress), containsHint(expectedHint, userData));
EXPECT_EQ(providePerformanceHint && !(alignedSize && alignedAddress), containsHint(expectedHint, userData));
}
TEST_P(PerformanceHintCommandQueueTest, GivenProfilingFlagAndPreemptionFlagWhenCommandQueueIsCreatingThenContextProvidesProperHints) {
@ -916,6 +920,7 @@ INSTANTIATE_TEST_CASE_P(
DriverDiagnosticsTests,
PerformanceHintBufferTest,
testing::Combine(
::testing::Bool(),
::testing::Bool(),
::testing::Bool()));

View File

@ -84,11 +84,11 @@ struct PerformanceHintTest : public DriverDiagnosticsTest,
};
struct PerformanceHintBufferTest : public PerformanceHintTest,
public ::testing::WithParamInterface<std::tuple<bool /*address aligned*/, bool /*size aligned*/>> {
public ::testing::WithParamInterface<std::tuple<bool /*address aligned*/, bool /*size aligned*/, bool /*provide performance hint*/>> {
void SetUp() override {
PerformanceHintTest::SetUp();
std::tie(alignedAddress, alignedSize) = GetParam();
std::tie(alignedAddress, alignedSize, providePerformanceHint) = GetParam();
address = alignedMalloc(2 * MemoryConstants::cacheLineSize, MemoryConstants::cacheLineSize);
}
@ -99,6 +99,7 @@ struct PerformanceHintBufferTest : public PerformanceHintTest,
}
bool alignedSize = false;
bool alignedAddress = false;
bool providePerformanceHint = false;
void *address = nullptr;
Buffer *buffer = nullptr;
};

View File

@ -19,24 +19,12 @@ namespace Ult {
using PoolAllocator = Context::BufferPoolAllocator;
using MockBufferPoolAllocator = MockContext::MockBufferPoolAllocator;
template <int32_t poolBufferFlag = -1, bool failMainStorageAllocation = false>
template <int32_t poolBufferFlag = -1, bool failMainStorageAllocation = false, bool runSetup = true>
class AggregatedSmallBuffersTestTemplate : public ::testing::Test {
void SetUp() override {
this->setUpImpl();
}
void setUpImpl() {
DebugManager.flags.ExperimentalSmallBufferPoolAllocator.set(poolBufferFlag);
this->deviceFactory = std::make_unique<UltClDeviceFactory>(1, 0);
this->device = deviceFactory->rootDevices[0];
this->mockMemoryManager = static_cast<MockMemoryManager *>(device->getMemoryManager());
this->mockMemoryManager->localMemorySupported[mockRootDeviceIndex] = true;
this->setAllocationToFail(failMainStorageAllocation);
cl_device_id devices[] = {device};
this->context.reset(Context::create<MockContext>(nullptr, ClDeviceVector(devices, 1), nullptr, nullptr, retVal));
ASSERT_EQ(retVal, CL_SUCCESS);
this->setAllocationToFail(false);
this->poolAllocator = static_cast<MockBufferPoolAllocator *>(&context->smallBufferPoolAllocator);
if constexpr (runSetup) {
this->setUpImpl();
}
}
void TearDown() override {
@ -62,6 +50,20 @@ class AggregatedSmallBuffersTestTemplate : public ::testing::Test {
cl_int retVal = CL_SUCCESS;
DebugManagerStateRestore restore;
void setUpImpl() {
DebugManager.flags.ExperimentalSmallBufferPoolAllocator.set(poolBufferFlag);
this->deviceFactory = std::make_unique<UltClDeviceFactory>(1, 0);
this->device = deviceFactory->rootDevices[0];
this->mockMemoryManager = static_cast<MockMemoryManager *>(device->getMemoryManager());
this->mockMemoryManager->localMemorySupported[mockRootDeviceIndex] = true;
this->setAllocationToFail(failMainStorageAllocation);
cl_device_id devices[] = {device};
this->context.reset(Context::create<MockContext>(nullptr, ClDeviceVector(devices, 1), nullptr, nullptr, retVal));
ASSERT_EQ(retVal, CL_SUCCESS);
this->setAllocationToFail(false);
this->poolAllocator = static_cast<MockBufferPoolAllocator *>(&context->smallBufferPoolAllocator);
}
};
using aggregatedSmallBuffersDefaultTest = AggregatedSmallBuffersTestTemplate<-1>;
@ -84,6 +86,13 @@ TEST_F(aggregatedSmallBuffersDisabledTest, givenAggregatedSmallBuffersDisabledWh
using aggregatedSmallBuffersEnabledTest = AggregatedSmallBuffersTestTemplate<1>;
TEST_F(aggregatedSmallBuffersEnabledTest, givenAggregatedSmallBuffersEnabledWhenAllocatingMainStorageThenMakeDeviceBufferLockable) {
ASSERT_TRUE(poolAllocator->isAggregatedSmallBuffersEnabled());
ASSERT_NE(poolAllocator->mainStorage, nullptr);
ASSERT_NE(mockMemoryManager->lastAllocationProperties, nullptr);
EXPECT_TRUE(mockMemoryManager->lastAllocationProperties->makeDeviceBufferLockable);
}
TEST_F(aggregatedSmallBuffersEnabledTest, givenAggregatedSmallBuffersEnabledAndSizeLargerThanThresholdWhenBufferCreateCalledThenDoNotUsePool) {
ASSERT_TRUE(poolAllocator->isAggregatedSmallBuffersEnabled());
ASSERT_NE(poolAllocator->mainStorage, nullptr);
@ -235,9 +244,9 @@ TEST_F(aggregatedSmallBuffersEnabledTest, givenAggregatedSmallBuffersEnabledAndS
}
}
using aggregatedSmallBuffersEnabledTestDoNotRunSetup = AggregatedSmallBuffersTestTemplate<1, true>;
using aggregatedSmallBuffersEnabledTestFailPoolInit = AggregatedSmallBuffersTestTemplate<1, true>;
TEST_F(aggregatedSmallBuffersEnabledTestDoNotRunSetup, givenAggregatedSmallBuffersEnabledAndSizeEqualToThresholdWhenBufferCreateCalledButPoolCreateFailedThenDoNotUsePool) {
TEST_F(aggregatedSmallBuffersEnabledTestFailPoolInit, givenAggregatedSmallBuffersEnabledAndSizeEqualToThresholdWhenBufferCreateCalledButPoolCreateFailedThenDoNotUsePool) {
ASSERT_TRUE(poolAllocator->isAggregatedSmallBuffersEnabled());
ASSERT_EQ(poolAllocator->mainStorage, nullptr);
std::unique_ptr<Buffer> buffer(Buffer::create(context.get(), flags, size, hostPtr, retVal));
@ -247,6 +256,19 @@ TEST_F(aggregatedSmallBuffersEnabledTestDoNotRunSetup, givenAggregatedSmallBuffe
EXPECT_EQ(poolAllocator->mainStorage, nullptr);
}
using aggregatedSmallBuffersEnabledTestDoNotRunSetup = AggregatedSmallBuffersTestTemplate<1, false, false>;
TEST_F(aggregatedSmallBuffersEnabledTestDoNotRunSetup, givenAggregatedSmallBuffersEnabledWhenPoolInitializedThenPerformanceHintsNotProvided) {
testing::internal::CaptureStdout();
DebugManager.flags.PrintDriverDiagnostics.set(1);
setUpImpl();
ASSERT_TRUE(poolAllocator->isAggregatedSmallBuffersEnabled());
ASSERT_NE(poolAllocator->mainStorage, nullptr);
ASSERT_NE(context->driverDiagnostics, nullptr);
std::string output = testing::internal::GetCapturedStdout();
EXPECT_EQ(0u, output.size());
}
template <int32_t poolBufferFlag = -1>
class AggregatedSmallBuffersApiTestTemplate : public ::testing::Test {
void SetUp() override {

View File

@ -48,6 +48,7 @@ struct AllocationProperties {
OsContext *osContext = nullptr;
bool useMmapObject = true;
uint32_t cacheRegion = 0;
bool makeDeviceBufferLockable = false;
AllocationProperties(uint32_t rootDeviceIndex, size_t size,
AllocationType allocationType, DeviceBitfield subDevicesBitfieldParam)

View File

@ -35,7 +35,7 @@ StorageInfo MemoryManager::createStorageInfoFromProperties(const AllocationPrope
StorageInfo storageInfo{preferredTile, allTilesValue};
storageInfo.subDeviceBitfield = properties.subDevicesBitfield;
storageInfo.isLockable = GraphicsAllocation::isLockable(properties.allocationType);
storageInfo.isLockable = GraphicsAllocation::isLockable(properties.allocationType) || (properties.makeDeviceBufferLockable && properties.allocationType == AllocationType::BUFFER);
storageInfo.cpuVisibleSegment = GraphicsAllocation::isCpuAccessRequired(properties.allocationType);
AppResourceHelper::copyResourceTagStr(storageInfo.resourceTag, properties.allocationType,

View File

@ -56,6 +56,7 @@ GraphicsAllocation *MockMemoryManager::allocateGraphicsMemoryWithProperties(cons
}
GraphicsAllocation *MockMemoryManager::allocateGraphicsMemoryWithProperties(const AllocationProperties &properties, const void *ptr) {
lastAllocationProperties.reset(new AllocationProperties(properties));
if (returnFakeAllocation) {
return new GraphicsAllocation(properties.rootDeviceIndex, properties.allocationType, reinterpret_cast<void *>(dummyAddress), reinterpret_cast<uint64_t>(ptr), properties.size, 0, MemoryPool::System4KBPages, maxOsContextCount);
}

View File

@ -261,6 +261,7 @@ class MockMemoryManager : public MemoryManagerCreate<OsAgnosticMemoryManager> {
MemAdviseFlags memAdviseFlags{};
MemoryManager::AllocationStatus populateOsHandlesResult = MemoryManager::AllocationStatus::Success;
GraphicsAllocation *allocateGraphicsMemoryForNonSvmHostPtrResult = nullptr;
std::unique_ptr<AllocationProperties> lastAllocationProperties = nullptr;
};
class MockAllocSysMemAgnosticMemoryManager : public OsAgnosticMemoryManager {

View File

@ -472,6 +472,25 @@ TEST_F(MultiDeviceStorageInfoTest, givenGraphicsAllocationThatIsLockableWhenCrea
}
}
TEST_F(MultiDeviceStorageInfoTest, givenAllocationTypeBufferWhenCreatingStorageInfoThenIsLockableFlagIsSetCorrectly) {
AllocationProperties properties{mockRootDeviceIndex, false, 1u, AllocationType::BUFFER, false, singleTileMask};
{
properties.makeDeviceBufferLockable = false;
auto storageInfo = memoryManager->createStorageInfoFromProperties(properties);
EXPECT_FALSE(storageInfo.isLockable);
}
{
properties.makeDeviceBufferLockable = true;
auto storageInfo = memoryManager->createStorageInfoFromProperties(properties);
EXPECT_TRUE(storageInfo.isLockable);
}
{
properties.allocationType = AllocationType::IMAGE;
auto storageInfo = memoryManager->createStorageInfoFromProperties(properties);
EXPECT_FALSE(storageInfo.isLockable);
}
}
TEST_F(MultiDeviceStorageInfoTest, givenGpuTimestampAllocationWhenUsingSingleTileDeviceThenExpectRegularAllocationStorageInfo) {
AllocationProperties properties{mockRootDeviceIndex,
false,