fix: correct calculation for chunking size

Resolves: NEO-9562

Signed-off-by: John Falkowski <john.falkowski@intel.com>
This commit is contained in:
John Falkowski
2023-12-23 01:23:42 +00:00
committed by Compute-Runtime-Automation
parent 368070069e
commit 138f22f684
3 changed files with 24 additions and 11 deletions

View File

@@ -541,7 +541,7 @@ DECLARE_DEBUG_VARIABLE(bool, EnableBOChunkingPrefetch, false, "Enables prefetchi
DECLARE_DEBUG_VARIABLE(bool, EnableBOChunkingDevMemPrefetch, false, "Enables prefetching of Device Memory chunks")
DECLARE_DEBUG_VARIABLE(bool, EnableBOChunkingPreferredLocationHint, false, "Enables preferred location advise on chunks")
DECLARE_DEBUG_VARIABLE(int32_t, EnableBOChunking, -1, "Enables use of chunking of BOs in the KMD, mask: -1 = default, 0 = no chunking, 1 = shared allocations only, 2 = multi-tile device allocations only, 3 = shared and multi-tile device allocations .")
DECLARE_DEBUG_VARIABLE(int32_t, NumberOfBOChunks, 2, "Number of chunks to use. Must be a power of two")
DECLARE_DEBUG_VARIABLE(int32_t, NumberOfBOChunks, 2, "Number of chunks to use")
DECLARE_DEBUG_VARIABLE(int32_t, SetBOChunkingSize, -1, "Size of chunk in bytes: -1 = default, otherwise power of two chunk size in bytes")
DECLARE_DEBUG_VARIABLE(int32_t, MinimalAllocationSizeForChunking, -1, "2097152: default, >0: size in B. Minimal size an allocation should have to use chunking.")
DECLARE_DEBUG_VARIABLE(int32_t, ForceAutoGrfCompilationMode, -1, "Adds build option -*-intel-enable-auto-large-GRF-mode to force kernel compilation")

View File

@@ -1905,25 +1905,27 @@ size_t DrmMemoryManager::getSizeOfChunk(size_t allocSize) {
size_t chunkSize = MemoryConstants::chunkThreshold;
size_t chunkMask = (~(MemoryConstants::chunkThreshold - 1));
size_t numChunk = debugManager.flags.NumberOfBOChunks.get();
size_t alignSize = alignUp(allocSize, MemoryConstants::pageSize64k);
if (debugManager.flags.SetBOChunkingSize.get() != -1) {
chunkSize = debugManager.flags.SetBOChunkingSize.get() & chunkMask;
if (chunkSize == 0) {
chunkSize = MemoryConstants::chunkThreshold;
}
numChunk = allocSize / chunkSize;
numChunk = alignSize / chunkSize;
if (numChunk < 2) {
numChunk = 2;
}
}
if (numChunk > 1) {
chunkSize = (allocSize / numChunk) & chunkMask;
chunkSize = (alignSize / numChunk) & chunkMask;
if (chunkSize == 0) {
chunkSize = MemoryConstants::chunkThreshold;
}
numChunk = allocSize / chunkSize;
while (((!Math::isPow2(numChunk)) || (chunkSize & (MemoryConstants::chunkThreshold - 1))) && (numChunk > 2)) {
numChunk -= 1;
chunkSize = allocSize / numChunk;
while ((alignSize % chunkSize) && ((alignSize / chunkSize) > 1)) {
chunkSize += MemoryConstants::chunkThreshold;
}
while ((alignSize % chunkSize) && (chunkSize >= (2 * MemoryConstants::chunkThreshold))) {
chunkSize -= MemoryConstants::chunkThreshold;
}
}
return chunkSize;
@@ -1939,7 +1941,8 @@ bool DrmMemoryManager::createDrmChunkedAllocation(Drm *drm, DrmAllocation *alloc
auto memoryInfo = drm->getMemoryInfo();
uint32_t handle = 0;
auto memoryBanks = static_cast<uint32_t>(storageInfo.memoryBanks.to_ulong());
uint32_t numOfChunks = static_cast<uint32_t>(boSize / getSizeOfChunk(boSize));
auto alignSize = alignUp(boSize, MemoryConstants::pageSize64k);
uint32_t numOfChunks = static_cast<uint32_t>(alignSize / getSizeOfChunk(alignSize));
auto gmm = allocation->getGmm(0u);
auto patIndex = drm->getPatIndex(gmm, allocation->getAllocationType(), CacheRegion::defaultRegion, CachePolicy::writeBack, false, !allocation->isAllocatedInLocalMemoryPool());
@@ -2316,18 +2319,19 @@ GraphicsAllocation *DrmMemoryManager::createSharedUnifiedMemoryAllocation(const
BufferObjects bos{};
auto currentAddress = cpuPointer;
auto remainingSize = size;
auto alignSize = alignUp(remainingSize, MemoryConstants::pageSize64k);
auto remainingMemoryBanks = allocationData.storageInfo.memoryBanks;
auto numHandles = GraphicsAllocation::getNumHandlesForKmdSharedAllocation(allocationData.storageInfo.getNumBanks());
bool useChunking = false;
uint32_t numOfChunks = 0;
if (checkAllocationForChunking(size, drm.getMinimalSizeForChunking(),
if (checkAllocationForChunking(alignSize, drm.getMinimalSizeForChunking(),
true, (!executionEnvironment.isDebuggingEnabled()),
(drm.getChunkingMode() & chunkingModeShared), true)) {
numHandles = 1;
useChunking = true;
numOfChunks = static_cast<uint32_t>(size / getSizeOfChunk(size));
numOfChunks = static_cast<uint32_t>(alignSize / getSizeOfChunk(alignSize));
}
const auto memoryPool = MemoryPool::localMemory;