mirror of
https://github.com/intel/compute-runtime.git
synced 2025-09-10 12:53:42 +08:00
Add additional create buffer arguments
Allow to: disable performance hints, make allocation lockable Used in BufferPoolAllocator Related-To: NEO-7332 Signed-off-by: Dominik Dabek <dominik.dabek@intel.com>
This commit is contained in:

committed by
Compute-Runtime-Automation

parent
50df7f430e
commit
67bfebb25e
@ -475,10 +475,14 @@ bool Context::isSingleDeviceContext() {
|
||||
void Context::BufferPoolAllocator::initAggregatedSmallBuffers(Context *context) {
|
||||
static constexpr cl_mem_flags flags{};
|
||||
[[maybe_unused]] cl_int errcodeRet{};
|
||||
Buffer::AdditionalBufferCreateArgs bufferCreateArgs{};
|
||||
bufferCreateArgs.doNotProvidePerformanceHints = true;
|
||||
bufferCreateArgs.makeAllocationLockable = true;
|
||||
this->mainStorage = Buffer::create(context,
|
||||
flags,
|
||||
BufferPoolAllocator::aggregatedSmallBuffersPoolSize,
|
||||
nullptr,
|
||||
bufferCreateArgs,
|
||||
errcodeRet);
|
||||
if (this->mainStorage) {
|
||||
this->chunkAllocator.reset(new HeapAllocator(BufferPoolAllocator::startingOffset,
|
||||
|
@ -50,6 +50,7 @@ class Context : public BaseObject<_cl_context> {
|
||||
static constexpr auto startingOffset = chunkAlignment;
|
||||
|
||||
static_assert(aggregatedSmallBuffersPoolSize > smallBufferThreshold, "Largest allowed buffer needs to fit in pool");
|
||||
|
||||
Buffer *allocateBufferFromPool(const MemoryProperties &memoryProperties,
|
||||
cl_mem_flags flags,
|
||||
cl_mem_flags_intel flagsIntel,
|
||||
|
@ -170,9 +170,20 @@ Buffer *Buffer::create(Context *context,
|
||||
cl_mem_flags flags,
|
||||
size_t size,
|
||||
void *hostPtr,
|
||||
AdditionalBufferCreateArgs &bufferCreateArgs,
|
||||
cl_int &errcodeRet) {
|
||||
return create(context, ClMemoryPropertiesHelper::createMemoryProperties(flags, 0, 0, &context->getDevice(0)->getDevice()),
|
||||
flags, 0, size, hostPtr, errcodeRet);
|
||||
flags, 0, size, hostPtr, bufferCreateArgs, errcodeRet);
|
||||
}
|
||||
|
||||
Buffer *Buffer::create(Context *context,
|
||||
cl_mem_flags flags,
|
||||
size_t size,
|
||||
void *hostPtr,
|
||||
cl_int &errcodeRet) {
|
||||
AdditionalBufferCreateArgs bufferCreateArgs{};
|
||||
return create(context, ClMemoryPropertiesHelper::createMemoryProperties(flags, 0, 0, &context->getDevice(0)->getDevice()),
|
||||
flags, 0, size, hostPtr, bufferCreateArgs, errcodeRet);
|
||||
}
|
||||
|
||||
bool inline copyHostPointer(Buffer *buffer,
|
||||
@ -233,6 +244,18 @@ Buffer *Buffer::create(Context *context,
|
||||
size_t size,
|
||||
void *hostPtr,
|
||||
cl_int &errcodeRet) {
|
||||
AdditionalBufferCreateArgs bufferCreateArgs{};
|
||||
return create(context, memoryProperties, flags, flagsIntel, size, hostPtr, bufferCreateArgs, errcodeRet);
|
||||
}
|
||||
|
||||
Buffer *Buffer::create(Context *context,
|
||||
const MemoryProperties &memoryProperties,
|
||||
cl_mem_flags flags,
|
||||
cl_mem_flags_intel flagsIntel,
|
||||
size_t size,
|
||||
void *hostPtr,
|
||||
AdditionalBufferCreateArgs &bufferCreateArgs,
|
||||
cl_int &errcodeRet) {
|
||||
|
||||
errcodeRet = CL_SUCCESS;
|
||||
Context::BufferPoolAllocator &bufferPoolAllocator = context->getBufferPoolAllocator();
|
||||
@ -355,7 +378,7 @@ Buffer *Buffer::create(Context *context,
|
||||
allocationInfo.allocateMemory = false;
|
||||
}
|
||||
|
||||
if (hostPtr && context->isProvidingPerformanceHints()) {
|
||||
if (!bufferCreateArgs.doNotProvidePerformanceHints && hostPtr && context->isProvidingPerformanceHints()) {
|
||||
if (allocationInfo.zeroCopyAllowed) {
|
||||
context->providePerformanceHint(CL_CONTEXT_DIAGNOSTICS_LEVEL_GOOD_INTEL, CL_BUFFER_MEETS_ALIGNMENT_RESTRICTIONS, hostPtr, size);
|
||||
} else {
|
||||
@ -367,7 +390,7 @@ Buffer *Buffer::create(Context *context,
|
||||
allocationInfo.zeroCopyAllowed = false;
|
||||
}
|
||||
|
||||
if (allocationInfo.allocateMemory && context->isProvidingPerformanceHints()) {
|
||||
if (!bufferCreateArgs.doNotProvidePerformanceHints && allocationInfo.allocateMemory && context->isProvidingPerformanceHints()) {
|
||||
context->providePerformanceHint(CL_CONTEXT_DIAGNOSTICS_LEVEL_GOOD_INTEL, CL_BUFFER_NEEDS_ALLOCATE_MEMORY);
|
||||
}
|
||||
|
||||
@ -381,6 +404,7 @@ Buffer *Buffer::create(Context *context,
|
||||
*hwInfo, context->getDeviceBitfieldForAllocation(rootDeviceIndex), context->isSingleDeviceContext());
|
||||
allocProperties.flags.crossRootDeviceAccess = context->getRootDeviceIndices().size() > 1;
|
||||
allocProperties.flags.preferCompressed = compressionEnabled;
|
||||
allocProperties.makeDeviceBufferLockable = bufferCreateArgs.makeAllocationLockable;
|
||||
|
||||
if (allocationCpuPtr) {
|
||||
allocationInfo.memory = memoryManager->createGraphicsAllocationFromExistingStorage(allocProperties, allocationCpuPtr, multiGraphicsAllocation);
|
||||
|
@ -57,6 +57,10 @@ extern ValidateInputAndCreateBufferFunc validateInputAndCreateBuffer;
|
||||
|
||||
class Buffer : public MemObj {
|
||||
public:
|
||||
struct AdditionalBufferCreateArgs {
|
||||
bool doNotProvidePerformanceHints;
|
||||
bool makeAllocationLockable;
|
||||
};
|
||||
constexpr static size_t maxBufferSizeForReadWriteOnCpu = 10 * MB;
|
||||
constexpr static size_t maxBufferSizeForCopyOnCpu = 64 * KB;
|
||||
constexpr static cl_ulong maskMagic = 0xFFFFFFFFFFFFFFFFLL;
|
||||
@ -80,6 +84,13 @@ class Buffer : public MemObj {
|
||||
void *hostPtr,
|
||||
cl_int &errcodeRet);
|
||||
|
||||
static Buffer *create(Context *context,
|
||||
cl_mem_flags flags,
|
||||
size_t size,
|
||||
void *hostPtr,
|
||||
AdditionalBufferCreateArgs &bufferCreateArgs,
|
||||
cl_int &errcodeRet);
|
||||
|
||||
static Buffer *create(Context *context,
|
||||
const MemoryProperties &properties,
|
||||
cl_mem_flags flags,
|
||||
@ -88,6 +99,15 @@ class Buffer : public MemObj {
|
||||
void *hostPtr,
|
||||
cl_int &errcodeRet);
|
||||
|
||||
static Buffer *create(Context *context,
|
||||
const MemoryProperties &properties,
|
||||
cl_mem_flags flags,
|
||||
cl_mem_flags_intel flagsIntel,
|
||||
size_t size,
|
||||
void *hostPtr,
|
||||
AdditionalBufferCreateArgs &bufferCreateArgs,
|
||||
cl_int &errcodeRet);
|
||||
|
||||
static Buffer *createSharedBuffer(Context *context,
|
||||
cl_mem_flags flags,
|
||||
SharingHandler *sharingHandler,
|
||||
|
@ -91,19 +91,23 @@ TEST_P(PerformanceHintBufferTest, GivenHostPtrAndSizeAlignmentsWhenBufferIsCreat
|
||||
flags |= CL_MEM_FORCE_HOST_MEMORY_INTEL;
|
||||
}
|
||||
|
||||
Buffer::AdditionalBufferCreateArgs bufferCreateArgs{};
|
||||
bufferCreateArgs.doNotProvidePerformanceHints = !providePerformanceHint;
|
||||
|
||||
buffer = Buffer::create(
|
||||
context,
|
||||
flags,
|
||||
sizeForBuffer,
|
||||
(void *)addressForBuffer,
|
||||
bufferCreateArgs,
|
||||
retVal);
|
||||
EXPECT_EQ(CL_SUCCESS, retVal);
|
||||
EXPECT_NE(nullptr, buffer);
|
||||
|
||||
snprintf(expectedHint, DriverDiagnostics::maxHintStringSize, DriverDiagnostics::hintFormat[CL_BUFFER_DOESNT_MEET_ALIGNMENT_RESTRICTIONS], addressForBuffer, sizeForBuffer, MemoryConstants::pageSize, MemoryConstants::pageSize);
|
||||
EXPECT_EQ(!(alignedSize && alignedAddress), containsHint(expectedHint, userData));
|
||||
EXPECT_EQ(providePerformanceHint && !(alignedSize && alignedAddress), containsHint(expectedHint, userData));
|
||||
snprintf(expectedHint, DriverDiagnostics::maxHintStringSize, DriverDiagnostics::hintFormat[CL_BUFFER_NEEDS_ALLOCATE_MEMORY], 0);
|
||||
EXPECT_EQ(!(alignedSize && alignedAddress), containsHint(expectedHint, userData));
|
||||
EXPECT_EQ(providePerformanceHint && !(alignedSize && alignedAddress), containsHint(expectedHint, userData));
|
||||
}
|
||||
|
||||
TEST_P(PerformanceHintCommandQueueTest, GivenProfilingFlagAndPreemptionFlagWhenCommandQueueIsCreatingThenContextProvidesProperHints) {
|
||||
@ -916,6 +920,7 @@ INSTANTIATE_TEST_CASE_P(
|
||||
DriverDiagnosticsTests,
|
||||
PerformanceHintBufferTest,
|
||||
testing::Combine(
|
||||
::testing::Bool(),
|
||||
::testing::Bool(),
|
||||
::testing::Bool()));
|
||||
|
||||
|
@ -84,11 +84,11 @@ struct PerformanceHintTest : public DriverDiagnosticsTest,
|
||||
};
|
||||
|
||||
struct PerformanceHintBufferTest : public PerformanceHintTest,
|
||||
public ::testing::WithParamInterface<std::tuple<bool /*address aligned*/, bool /*size aligned*/>> {
|
||||
public ::testing::WithParamInterface<std::tuple<bool /*address aligned*/, bool /*size aligned*/, bool /*provide performance hint*/>> {
|
||||
|
||||
void SetUp() override {
|
||||
PerformanceHintTest::SetUp();
|
||||
std::tie(alignedAddress, alignedSize) = GetParam();
|
||||
std::tie(alignedAddress, alignedSize, providePerformanceHint) = GetParam();
|
||||
address = alignedMalloc(2 * MemoryConstants::cacheLineSize, MemoryConstants::cacheLineSize);
|
||||
}
|
||||
|
||||
@ -99,6 +99,7 @@ struct PerformanceHintBufferTest : public PerformanceHintTest,
|
||||
}
|
||||
bool alignedSize = false;
|
||||
bool alignedAddress = false;
|
||||
bool providePerformanceHint = false;
|
||||
void *address = nullptr;
|
||||
Buffer *buffer = nullptr;
|
||||
};
|
||||
|
@ -19,24 +19,12 @@ namespace Ult {
|
||||
using PoolAllocator = Context::BufferPoolAllocator;
|
||||
using MockBufferPoolAllocator = MockContext::MockBufferPoolAllocator;
|
||||
|
||||
template <int32_t poolBufferFlag = -1, bool failMainStorageAllocation = false>
|
||||
template <int32_t poolBufferFlag = -1, bool failMainStorageAllocation = false, bool runSetup = true>
|
||||
class AggregatedSmallBuffersTestTemplate : public ::testing::Test {
|
||||
void SetUp() override {
|
||||
this->setUpImpl();
|
||||
}
|
||||
|
||||
void setUpImpl() {
|
||||
DebugManager.flags.ExperimentalSmallBufferPoolAllocator.set(poolBufferFlag);
|
||||
this->deviceFactory = std::make_unique<UltClDeviceFactory>(1, 0);
|
||||
this->device = deviceFactory->rootDevices[0];
|
||||
this->mockMemoryManager = static_cast<MockMemoryManager *>(device->getMemoryManager());
|
||||
this->mockMemoryManager->localMemorySupported[mockRootDeviceIndex] = true;
|
||||
this->setAllocationToFail(failMainStorageAllocation);
|
||||
cl_device_id devices[] = {device};
|
||||
this->context.reset(Context::create<MockContext>(nullptr, ClDeviceVector(devices, 1), nullptr, nullptr, retVal));
|
||||
ASSERT_EQ(retVal, CL_SUCCESS);
|
||||
this->setAllocationToFail(false);
|
||||
this->poolAllocator = static_cast<MockBufferPoolAllocator *>(&context->smallBufferPoolAllocator);
|
||||
if constexpr (runSetup) {
|
||||
this->setUpImpl();
|
||||
}
|
||||
}
|
||||
|
||||
void TearDown() override {
|
||||
@ -62,6 +50,20 @@ class AggregatedSmallBuffersTestTemplate : public ::testing::Test {
|
||||
cl_int retVal = CL_SUCCESS;
|
||||
|
||||
DebugManagerStateRestore restore;
|
||||
|
||||
void setUpImpl() {
|
||||
DebugManager.flags.ExperimentalSmallBufferPoolAllocator.set(poolBufferFlag);
|
||||
this->deviceFactory = std::make_unique<UltClDeviceFactory>(1, 0);
|
||||
this->device = deviceFactory->rootDevices[0];
|
||||
this->mockMemoryManager = static_cast<MockMemoryManager *>(device->getMemoryManager());
|
||||
this->mockMemoryManager->localMemorySupported[mockRootDeviceIndex] = true;
|
||||
this->setAllocationToFail(failMainStorageAllocation);
|
||||
cl_device_id devices[] = {device};
|
||||
this->context.reset(Context::create<MockContext>(nullptr, ClDeviceVector(devices, 1), nullptr, nullptr, retVal));
|
||||
ASSERT_EQ(retVal, CL_SUCCESS);
|
||||
this->setAllocationToFail(false);
|
||||
this->poolAllocator = static_cast<MockBufferPoolAllocator *>(&context->smallBufferPoolAllocator);
|
||||
}
|
||||
};
|
||||
|
||||
using aggregatedSmallBuffersDefaultTest = AggregatedSmallBuffersTestTemplate<-1>;
|
||||
@ -84,6 +86,13 @@ TEST_F(aggregatedSmallBuffersDisabledTest, givenAggregatedSmallBuffersDisabledWh
|
||||
|
||||
using aggregatedSmallBuffersEnabledTest = AggregatedSmallBuffersTestTemplate<1>;
|
||||
|
||||
TEST_F(aggregatedSmallBuffersEnabledTest, givenAggregatedSmallBuffersEnabledWhenAllocatingMainStorageThenMakeDeviceBufferLockable) {
|
||||
ASSERT_TRUE(poolAllocator->isAggregatedSmallBuffersEnabled());
|
||||
ASSERT_NE(poolAllocator->mainStorage, nullptr);
|
||||
ASSERT_NE(mockMemoryManager->lastAllocationProperties, nullptr);
|
||||
EXPECT_TRUE(mockMemoryManager->lastAllocationProperties->makeDeviceBufferLockable);
|
||||
}
|
||||
|
||||
TEST_F(aggregatedSmallBuffersEnabledTest, givenAggregatedSmallBuffersEnabledAndSizeLargerThanThresholdWhenBufferCreateCalledThenDoNotUsePool) {
|
||||
ASSERT_TRUE(poolAllocator->isAggregatedSmallBuffersEnabled());
|
||||
ASSERT_NE(poolAllocator->mainStorage, nullptr);
|
||||
@ -235,9 +244,9 @@ TEST_F(aggregatedSmallBuffersEnabledTest, givenAggregatedSmallBuffersEnabledAndS
|
||||
}
|
||||
}
|
||||
|
||||
using aggregatedSmallBuffersEnabledTestDoNotRunSetup = AggregatedSmallBuffersTestTemplate<1, true>;
|
||||
using aggregatedSmallBuffersEnabledTestFailPoolInit = AggregatedSmallBuffersTestTemplate<1, true>;
|
||||
|
||||
TEST_F(aggregatedSmallBuffersEnabledTestDoNotRunSetup, givenAggregatedSmallBuffersEnabledAndSizeEqualToThresholdWhenBufferCreateCalledButPoolCreateFailedThenDoNotUsePool) {
|
||||
TEST_F(aggregatedSmallBuffersEnabledTestFailPoolInit, givenAggregatedSmallBuffersEnabledAndSizeEqualToThresholdWhenBufferCreateCalledButPoolCreateFailedThenDoNotUsePool) {
|
||||
ASSERT_TRUE(poolAllocator->isAggregatedSmallBuffersEnabled());
|
||||
ASSERT_EQ(poolAllocator->mainStorage, nullptr);
|
||||
std::unique_ptr<Buffer> buffer(Buffer::create(context.get(), flags, size, hostPtr, retVal));
|
||||
@ -247,6 +256,19 @@ TEST_F(aggregatedSmallBuffersEnabledTestDoNotRunSetup, givenAggregatedSmallBuffe
|
||||
EXPECT_EQ(poolAllocator->mainStorage, nullptr);
|
||||
}
|
||||
|
||||
using aggregatedSmallBuffersEnabledTestDoNotRunSetup = AggregatedSmallBuffersTestTemplate<1, false, false>;
|
||||
|
||||
TEST_F(aggregatedSmallBuffersEnabledTestDoNotRunSetup, givenAggregatedSmallBuffersEnabledWhenPoolInitializedThenPerformanceHintsNotProvided) {
|
||||
testing::internal::CaptureStdout();
|
||||
DebugManager.flags.PrintDriverDiagnostics.set(1);
|
||||
setUpImpl();
|
||||
ASSERT_TRUE(poolAllocator->isAggregatedSmallBuffersEnabled());
|
||||
ASSERT_NE(poolAllocator->mainStorage, nullptr);
|
||||
ASSERT_NE(context->driverDiagnostics, nullptr);
|
||||
std::string output = testing::internal::GetCapturedStdout();
|
||||
EXPECT_EQ(0u, output.size());
|
||||
}
|
||||
|
||||
template <int32_t poolBufferFlag = -1>
|
||||
class AggregatedSmallBuffersApiTestTemplate : public ::testing::Test {
|
||||
void SetUp() override {
|
||||
|
@ -48,6 +48,7 @@ struct AllocationProperties {
|
||||
OsContext *osContext = nullptr;
|
||||
bool useMmapObject = true;
|
||||
uint32_t cacheRegion = 0;
|
||||
bool makeDeviceBufferLockable = false;
|
||||
|
||||
AllocationProperties(uint32_t rootDeviceIndex, size_t size,
|
||||
AllocationType allocationType, DeviceBitfield subDevicesBitfieldParam)
|
||||
|
@ -35,7 +35,7 @@ StorageInfo MemoryManager::createStorageInfoFromProperties(const AllocationPrope
|
||||
|
||||
StorageInfo storageInfo{preferredTile, allTilesValue};
|
||||
storageInfo.subDeviceBitfield = properties.subDevicesBitfield;
|
||||
storageInfo.isLockable = GraphicsAllocation::isLockable(properties.allocationType);
|
||||
storageInfo.isLockable = GraphicsAllocation::isLockable(properties.allocationType) || (properties.makeDeviceBufferLockable && properties.allocationType == AllocationType::BUFFER);
|
||||
storageInfo.cpuVisibleSegment = GraphicsAllocation::isCpuAccessRequired(properties.allocationType);
|
||||
|
||||
AppResourceHelper::copyResourceTagStr(storageInfo.resourceTag, properties.allocationType,
|
||||
|
@ -56,6 +56,7 @@ GraphicsAllocation *MockMemoryManager::allocateGraphicsMemoryWithProperties(cons
|
||||
}
|
||||
|
||||
GraphicsAllocation *MockMemoryManager::allocateGraphicsMemoryWithProperties(const AllocationProperties &properties, const void *ptr) {
|
||||
lastAllocationProperties.reset(new AllocationProperties(properties));
|
||||
if (returnFakeAllocation) {
|
||||
return new GraphicsAllocation(properties.rootDeviceIndex, properties.allocationType, reinterpret_cast<void *>(dummyAddress), reinterpret_cast<uint64_t>(ptr), properties.size, 0, MemoryPool::System4KBPages, maxOsContextCount);
|
||||
}
|
||||
|
@ -261,6 +261,7 @@ class MockMemoryManager : public MemoryManagerCreate<OsAgnosticMemoryManager> {
|
||||
MemAdviseFlags memAdviseFlags{};
|
||||
MemoryManager::AllocationStatus populateOsHandlesResult = MemoryManager::AllocationStatus::Success;
|
||||
GraphicsAllocation *allocateGraphicsMemoryForNonSvmHostPtrResult = nullptr;
|
||||
std::unique_ptr<AllocationProperties> lastAllocationProperties = nullptr;
|
||||
};
|
||||
|
||||
class MockAllocSysMemAgnosticMemoryManager : public OsAgnosticMemoryManager {
|
||||
|
@ -472,6 +472,25 @@ TEST_F(MultiDeviceStorageInfoTest, givenGraphicsAllocationThatIsLockableWhenCrea
|
||||
}
|
||||
}
|
||||
|
||||
TEST_F(MultiDeviceStorageInfoTest, givenAllocationTypeBufferWhenCreatingStorageInfoThenIsLockableFlagIsSetCorrectly) {
|
||||
AllocationProperties properties{mockRootDeviceIndex, false, 1u, AllocationType::BUFFER, false, singleTileMask};
|
||||
{
|
||||
properties.makeDeviceBufferLockable = false;
|
||||
auto storageInfo = memoryManager->createStorageInfoFromProperties(properties);
|
||||
EXPECT_FALSE(storageInfo.isLockable);
|
||||
}
|
||||
{
|
||||
properties.makeDeviceBufferLockable = true;
|
||||
auto storageInfo = memoryManager->createStorageInfoFromProperties(properties);
|
||||
EXPECT_TRUE(storageInfo.isLockable);
|
||||
}
|
||||
{
|
||||
properties.allocationType = AllocationType::IMAGE;
|
||||
auto storageInfo = memoryManager->createStorageInfoFromProperties(properties);
|
||||
EXPECT_FALSE(storageInfo.isLockable);
|
||||
}
|
||||
}
|
||||
|
||||
TEST_F(MultiDeviceStorageInfoTest, givenGpuTimestampAllocationWhenUsingSingleTileDeviceThenExpectRegularAllocationStorageInfo) {
|
||||
AllocationProperties properties{mockRootDeviceIndex,
|
||||
false,
|
||||
|
Reference in New Issue
Block a user