feature: add SetBOChunkingSize debug variable

Resolves: NEO-9562

Signed-off-by: John Falkowski <john.falkowski@intel.com>
This commit is contained in:
John Falkowski
2023-11-30 17:31:22 +00:00
committed by Compute-Runtime-Automation
parent 3349fcaf75
commit e1df8f9112
5 changed files with 108 additions and 7 deletions

View File

@@ -532,7 +532,8 @@ DECLARE_DEBUG_VARIABLE(bool, EnableBOChunkingPrefetch, false, "Enables prefetchi
DECLARE_DEBUG_VARIABLE(bool, EnableBOChunkingDevMemPrefetch, false, "Enables prefetching of Device Memory chunks")
DECLARE_DEBUG_VARIABLE(bool, EnableBOChunkingPreferredLocationHint, false, "Enables preferred location advise on chunks")
DECLARE_DEBUG_VARIABLE(int32_t, EnableBOChunking, -1, "Enables use of chunking of BOs in the KMD, mask: -1 = default, 0 = no chunking, 1 = shared allocations only, 2 = multi-tile device allocations only, 3 = shared and multi-tile device allocations .")
DECLARE_DEBUG_VARIABLE(int32_t, NumberOfBOChunks, 2, "Number of chunks to use. Must be a power of two)")
DECLARE_DEBUG_VARIABLE(int32_t, NumberOfBOChunks, 2, "Number of chunks to use. Must be a power of two")
DECLARE_DEBUG_VARIABLE(int32_t, SetBOChunkingSize, -1, "Size of chunk in bytes: -1 = default, otherwise power of two chunk size in bytes")
DECLARE_DEBUG_VARIABLE(int32_t, MinimalAllocationSizeForChunking, -1, "2097152: default, >0: size in B. Minimal size an allocation should have to use chunking.")
DECLARE_DEBUG_VARIABLE(int32_t, ForceAutoGrfCompilationMode, -1, "Adds build option -*-intel-enable-auto-large-GRF-mode to force kernel compilation")
DECLARE_DEBUG_VARIABLE(int32_t, ForceOCLVersion, 0, "Force specific OpenCL API version")

View File

@@ -1901,12 +1901,40 @@ BufferObject *DrmMemoryManager::createBufferObjectInMemoryRegion(uint32_t rootDe
return bo;
}
size_t DrmMemoryManager::getSizeOfChunk(size_t allocSize) {
size_t chunkSize = MemoryConstants::chunkThreshold;
size_t chunkMask = (~(MemoryConstants::chunkThreshold - 1));
size_t numChunk = debugManager.flags.NumberOfBOChunks.get();
if (debugManager.flags.SetBOChunkingSize.get() != -1) {
chunkSize = debugManager.flags.SetBOChunkingSize.get() & chunkMask;
if (chunkSize == 0) {
chunkSize = MemoryConstants::chunkThreshold;
}
numChunk = allocSize / chunkSize;
if (numChunk < 2) {
numChunk = 2;
}
}
if (numChunk > 1) {
chunkSize = (allocSize / numChunk) & chunkMask;
if (chunkSize == 0) {
chunkSize = MemoryConstants::chunkThreshold;
}
numChunk = allocSize / chunkSize;
while (((!Math::isPow2(numChunk)) || (chunkSize & (MemoryConstants::chunkThreshold - 1))) && (numChunk > 2)) {
numChunk -= 1;
chunkSize = allocSize / numChunk;
}
}
return chunkSize;
}
bool DrmMemoryManager::createDrmChunkedAllocation(Drm *drm, DrmAllocation *allocation, uint64_t boAddress, size_t boSize, size_t maxOsContextCount) {
auto &storageInfo = allocation->storageInfo;
auto memoryInfo = drm->getMemoryInfo();
uint32_t handle = 0;
auto memoryBanks = static_cast<uint32_t>(storageInfo.memoryBanks.to_ulong());
uint32_t numOfChunks = debugManager.flags.NumberOfBOChunks.get();
uint32_t numOfChunks = static_cast<uint32_t>(boSize / getSizeOfChunk(boSize));
auto gmm = allocation->getGmm(0u);
auto patIndex = drm->getPatIndex(gmm, allocation->getAllocationType(), CacheRegion::Default, CachePolicy::WriteBack, false, !allocation->isAllocatedInLocalMemoryPool());
@@ -1948,9 +1976,7 @@ bool DrmMemoryManager::createDrmAllocation(Drm *drm, DrmAllocation *allocation,
(drm->getChunkingMode() & 0x02)) {
boTotalChunkSize = allocation->getUnderlyingBufferSize();
uint32_t numOfChunks = debugManager.flags.NumberOfBOChunks.get();
size_t chunkingSize = boTotalChunkSize / numOfChunks;
size_t chunkingSize = getSizeOfChunk(boTotalChunkSize);
// Do not chunk for sizes less than chunkThreshold
// Do not chunk for single tile device memory
@@ -2300,8 +2326,8 @@ GraphicsAllocation *DrmMemoryManager::createSharedUnifiedMemoryAllocation(const
auto numHandles = GraphicsAllocation::getNumHandlesForKmdSharedAllocation(allocationData.storageInfo.getNumBanks());
bool useChunking = false;
uint32_t numOfChunks = debugManager.flags.NumberOfBOChunks.get();
size_t chunkingSize = size / numOfChunks;
size_t chunkingSize = getSizeOfChunk(size);
uint32_t numOfChunks = static_cast<uint32_t>(size / getSizeOfChunk(size));
// Dont chunk for sizes less than chunkThreshold or if debugging is enabled
if (!executionEnvironment.isDebuggingEnabled() &&

View File

@@ -95,6 +95,7 @@ class DrmMemoryManager : public MemoryManager {
void createDeviceSpecificMemResources(uint32_t rootDeviceIndex) override;
bool allowIndirectAllocationsAsPack(uint32_t rootDeviceIndex) override;
Drm &getDrm(uint32_t rootDeviceIndex) const;
size_t getSizeOfChunk(size_t allocSize);
protected:
void registerSharedBoHandleAllocation(DrmAllocation *drmAllocation);

View File

@@ -524,6 +524,7 @@ EnableBOChunkingPrefetch = 0
EnableBOChunkingDevMemPrefetch = 0
EnableBOChunkingPreferredLocationHint = 0
NumberOfBOChunks = 2
SetBOChunkingSize = -1
EnableBOChunking = -1
MinimalAllocationSizeForChunking = -1
DirectSubmissionControllerMaxTimeout = -1

View File

@@ -5300,6 +5300,78 @@ TEST_F(DrmMemoryManagerTest, givenPageFaultIsSupportedWhenCallingBindBoOnAllocat
}
}
TEST_F(DrmMemoryManagerWithLocalMemoryAndExplicitExpectationsTest, givenDrmMemoryManagerWhenGetSizeOfChunkFor2ChunksThenActualValueReturned) {
DebugManagerStateRestore stateRestore;
debugManager.flags.NumberOfBOChunks.set(2);
size_t allocSize = 2097152;
size_t expectedSize = 1048576;
size_t chunkSize = memoryManager->getSizeOfChunk(allocSize);
EXPECT_EQ(expectedSize, chunkSize);
}
TEST_F(DrmMemoryManagerWithLocalMemoryAndExplicitExpectationsTest, givenDrmMemoryManagerWhenGetSizeOfChunkFor3ChunksThenCorrectedValueReturned) {
DebugManagerStateRestore stateRestore;
debugManager.flags.NumberOfBOChunks.set(3);
size_t allocSize = 2097152;
size_t expectedSize = 1048576;
size_t chunkSize = memoryManager->getSizeOfChunk(allocSize);
EXPECT_EQ(expectedSize, chunkSize);
}
TEST_F(DrmMemoryManagerWithLocalMemoryAndExplicitExpectationsTest, givenDrmMemoryManagerWhenGetSizeOfChunkFor1ChunkThenDefaultMinimumChunkSizeReturned) {
DebugManagerStateRestore stateRestore;
debugManager.flags.NumberOfBOChunks.set(1);
size_t allocSize = 2097152;
size_t expectedSize = 65536;
size_t chunkSize = memoryManager->getSizeOfChunk(allocSize);
EXPECT_EQ(expectedSize, chunkSize);
}
TEST_F(DrmMemoryManagerWithLocalMemoryAndExplicitExpectationsTest, givenDrmMemoryManagerWhenGetSizeOfChunkForTooManyChunksThenDefaultMinimumChunkSizeReturned) {
DebugManagerStateRestore stateRestore;
debugManager.flags.NumberOfBOChunks.set(10000);
size_t allocSize = 2097152;
size_t expectedSize = 65536;
size_t chunkSize = memoryManager->getSizeOfChunk(allocSize);
EXPECT_EQ(expectedSize, chunkSize);
}
TEST_F(DrmMemoryManagerWithLocalMemoryAndExplicitExpectationsTest, givenDrmMemoryManagerWhenSetChunkSizeThenSameSizeReturned) {
DebugManagerStateRestore stateRestore;
debugManager.flags.SetBOChunkingSize.set(65536);
size_t allocSize = 2097152;
size_t expectedSize = 65536;
size_t chunkSize = memoryManager->getSizeOfChunk(allocSize);
EXPECT_EQ(expectedSize, chunkSize);
}
TEST_F(DrmMemoryManagerWithLocalMemoryAndExplicitExpectationsTest, givenDrmMemoryManagerWhenSetChunkSizeThenCorrectedSizeReturned) {
DebugManagerStateRestore stateRestore;
debugManager.flags.SetBOChunkingSize.set(100000);
size_t allocSize = 2097152;
size_t expectedSize = 65536;
size_t chunkSize = memoryManager->getSizeOfChunk(allocSize);
EXPECT_EQ(expectedSize, chunkSize);
}
TEST_F(DrmMemoryManagerWithLocalMemoryAndExplicitExpectationsTest, givenDrmMemoryManagerWhenSetChunkSizeTooLargeThenCorrectedSizeReturned) {
DebugManagerStateRestore stateRestore;
debugManager.flags.SetBOChunkingSize.set(4000000);
size_t allocSize = 2097152;
size_t expectedSize = 1048576;
size_t chunkSize = memoryManager->getSizeOfChunk(allocSize);
EXPECT_EQ(expectedSize, chunkSize);
}
TEST_F(DrmMemoryManagerWithLocalMemoryAndExplicitExpectationsTest, givenDrmMemoryManagerWhenSetChunkSizeTooSmallThenCorrectedSizeReturned) {
DebugManagerStateRestore stateRestore;
debugManager.flags.SetBOChunkingSize.set(4000);
size_t allocSize = 2097152;
size_t expectedSize = 65536;
size_t chunkSize = memoryManager->getSizeOfChunk(allocSize);
EXPECT_EQ(expectedSize, chunkSize);
}
TEST_F(DrmMemoryManagerWithLocalMemoryAndExplicitExpectationsTest, givenDrmMemoryManagerWhenCreateBufferObjectInMemoryRegionIsCalledWithoutMemoryInfoThenNullBufferObjectIsReturned) {
mock->memoryInfo.reset(nullptr);