Gmm construction cleanup

Signed-off-by: Bartosz Dunajski <bartosz.dunajski@intel.com>
This commit is contained in:
Bartosz Dunajski
2022-02-07 14:27:53 +00:00
committed by Compute-Runtime-Automation
parent 82ad3d61be
commit c88fce0def
43 changed files with 285 additions and 284 deletions

View File

@@ -18,13 +18,7 @@
#include "shared/source/helpers/surface_format_info.h"
namespace NEO {
Gmm::Gmm(GmmClientContext *clientContext, const void *alignedPtr, size_t alignedSize, size_t alignment, bool uncacheable) : Gmm(clientContext, alignedPtr, alignedSize, alignment, uncacheable, false, true, {}) {}
Gmm::Gmm(GmmClientContext *clientContext, const void *alignedPtr, size_t alignedSize, size_t alignment, bool uncacheable, bool preferCompressed, bool systemMemoryPool, StorageInfo storageInfo)
: Gmm(clientContext, alignedPtr, alignedSize, alignment, uncacheable, preferCompressed, systemMemoryPool, storageInfo, true) {
}
Gmm::Gmm(GmmClientContext *clientContext, const void *alignedPtr, size_t alignedSize, size_t alignment, bool uncacheable, bool preferCompressed, bool systemMemoryPool, StorageInfo storageInfo, bool allowLargePages) : clientContext(clientContext) {
Gmm::Gmm(GmmClientContext *clientContext, const void *alignedPtr, size_t alignedSize, size_t alignment, bool uncacheable, bool preferCompressed, StorageInfo storageInfo, bool allowLargePages) : clientContext(clientContext) {
resourceParams.Type = RESOURCE_BUFFER;
resourceParams.Format = GMM_FORMAT_GENERIC_8BIT;
resourceParams.BaseWidth64 = static_cast<uint64_t>(alignedSize);
@@ -60,7 +54,7 @@ Gmm::Gmm(GmmClientContext *clientContext, const void *alignedPtr, size_t aligned
}
applyAuxFlagsForBuffer(preferCompressed);
applyMemoryFlags(systemMemoryPool, storageInfo);
applyMemoryFlags(storageInfo);
applyAppResource(storageInfo);
applyDebugOverrides();
@@ -77,7 +71,7 @@ Gmm::~Gmm() = default;
Gmm::Gmm(GmmClientContext *clientContext, ImageInfo &inputOutputImgInfo, StorageInfo storageInfo, bool preferCompressed) : clientContext(clientContext) {
this->resourceParams = {};
setupImageResourceParams(inputOutputImgInfo, preferCompressed);
applyMemoryFlags(!inputOutputImgInfo.useLocalMemory, storageInfo);
applyMemoryFlags(storageInfo);
applyAppResource(storageInfo);
applyDebugOverrides();
@@ -338,9 +332,9 @@ uint32_t Gmm::getAuxQPitch() {
return this->gmmResourceInfo->getAuxQPitch();
}
void Gmm::applyMemoryFlags(bool systemMemoryPool, StorageInfo &storageInfo) {
this->useSystemMemoryPool = systemMemoryPool;
void Gmm::applyMemoryFlags(StorageInfo &storageInfo) {
auto hardwareInfo = clientContext->getHardwareInfo();
bool systemMemoryPool = (storageInfo.getMemoryBanks() == 0);
if (hardwareInfo->featureTable.flags.ftrLocalMemory) {
if (systemMemoryPool) {

View File

@@ -1,5 +1,5 @@
/*
* Copyright (C) 2018-2021 Intel Corporation
* Copyright (C) 2018-2022 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@@ -25,15 +25,13 @@ class Gmm {
virtual ~Gmm();
Gmm() = delete;
Gmm(GmmClientContext *clientContext, ImageInfo &inputOutputImgInfo, StorageInfo storageInfo, bool preferCompressed);
Gmm(GmmClientContext *clientContext, const void *alignedPtr, size_t alignedSize, size_t alignment, bool uncacheable);
Gmm(GmmClientContext *clientContext, const void *alignedPtr, size_t alignedSize, size_t alignment, bool uncacheable, bool preferCompressed, bool systemMemoryPool, StorageInfo storageInfo);
Gmm(GmmClientContext *clientContext, const void *alignedPtr, size_t alignedSize, size_t alignment, bool uncacheable, bool preferCompressed, bool systemMemoryPool, StorageInfo storageInfo, bool allowLargePages);
Gmm(GmmClientContext *clientContext, const void *alignedPtr, size_t alignedSize, size_t alignment, bool uncacheable, bool preferCompressed, StorageInfo storageInfo, bool allowLargePages);
Gmm(GmmClientContext *clientContext, GMM_RESOURCE_INFO *inputGmm);
void queryImageParams(ImageInfo &inputOutputImgInfo);
void applyAuxFlagsForBuffer(bool preferCompression);
void applyMemoryFlags(bool systemMemoryPool, StorageInfo &storageInfo);
void applyMemoryFlags(StorageInfo &storageInfo);
void applyAppResource(StorageInfo &storageInfo);
bool unifiedAuxTranslationCapable() const;
@@ -51,7 +49,6 @@ class Gmm {
std::unique_ptr<GmmResourceInfo> gmmResourceInfo;
bool isCompressionEnabled = false;
bool useSystemMemoryPool = true;
protected:
void applyAuxFlagsForImage(ImageInfo &imgInfo, bool preferCompressed);

View File

@@ -86,7 +86,7 @@ GraphicsAllocation *OsAgnosticMemoryManager::allocateGraphicsMemoryWithAlignment
counter, MemoryPool::System4KBPages, allocationData.rootDeviceIndex, allocationData.flags.uncacheable, allocationData.flags.flushL3, false);
if (allocationData.type == AllocationType::SVM_CPU) {
//add padding in case mapPtr is not aligned
// add padding in case mapPtr is not aligned
size_t reserveSize = sizeAligned + alignment;
void *gpuPtr = reserveCpuAddressRange(reserveSize, allocationData.rootDeviceIndex);
if (!gpuPtr) {
@@ -112,8 +112,8 @@ GraphicsAllocation *OsAgnosticMemoryManager::allocateGraphicsMemoryWithAlignment
alignment,
allocationData.flags.uncacheable,
true,
allocationData.flags.useSystemMemory,
allocationData.storageInfo);
allocationData.storageInfo,
true);
memoryAllocation->setDefaultGmm(gmm.release());
}
}
@@ -163,8 +163,7 @@ GraphicsAllocation *OsAgnosticMemoryManager::allocateGraphicsMemory64kb(const Al
allocationDataAlign.alignment,
allocationData.flags.uncacheable,
allocationData.flags.preferCompressed,
allocationData.flags.useSystemMemory,
allocationData.storageInfo);
allocationData.storageInfo, true);
memoryAllocation->setDefaultGmm(gmm.release());
}
}
@@ -349,7 +348,7 @@ void OsAgnosticMemoryManager::cleanOsHandles(OsHandleStorage &handleStorage, uin
GraphicsAllocation *OsAgnosticMemoryManager::allocateMemoryByKMD(const AllocationData &allocationData) {
auto gmm = std::make_unique<Gmm>(executionEnvironment.rootDeviceEnvironments[allocationData.rootDeviceIndex]->getGmmClientContext(), allocationData.hostPtr,
allocationData.size, 0u, false, allocationData.flags.preferCompressed, allocationData.flags.useSystemMemory, allocationData.storageInfo);
allocationData.size, 0u, false, allocationData.flags.preferCompressed, allocationData.storageInfo, true);
GraphicsAllocation *alloc = nullptr;
@@ -483,8 +482,8 @@ GraphicsAllocation *OsAgnosticMemoryManager::allocateGraphicsMemoryInDevicePool(
MemoryConstants::pageSize64k,
allocationData.flags.uncacheable,
true,
allocationData.flags.useSystemMemory,
allocationData.storageInfo);
allocationData.storageInfo,
true);
}
}

View File

@@ -287,7 +287,7 @@ DrmAllocation *DrmMemoryManager::allocateGraphicsMemoryWithAlignmentImpl(const A
size_t alignedVirtualAdressRangeSize = cSize;
auto svmCpuAllocation = allocationData.type == AllocationType::SVM_CPU;
if (svmCpuAllocation) {
//add padding in case reserved addr is not aligned
// add padding in case reserved addr is not aligned
alignedStorageSize = alignUp(cSize, cAlignment);
alignedVirtualAdressRangeSize = alignedStorageSize + cAlignment;
}
@@ -481,7 +481,9 @@ DrmAllocation *DrmMemoryManager::allocateGraphicsMemory64kb(const AllocationData
}
GraphicsAllocation *DrmMemoryManager::allocateMemoryByKMD(const AllocationData &allocationData) {
auto gmm = std::make_unique<Gmm>(executionEnvironment.rootDeviceEnvironments[allocationData.rootDeviceIndex]->getGmmClientContext(), allocationData.hostPtr, allocationData.size, 0u, false);
StorageInfo systemMemoryStorageInfo = {};
auto gmm = std::make_unique<Gmm>(executionEnvironment.rootDeviceEnvironments[allocationData.rootDeviceIndex]->getGmmClientContext(), allocationData.hostPtr,
allocationData.size, 0u, false, false, systemMemoryStorageInfo, true);
size_t bufferSize = allocationData.size;
uint64_t gpuRange = acquireGpuRange(bufferSize, allocationData.rootDeviceIndex, HeapIndex::HEAP_STANDARD64KB);
@@ -916,7 +918,7 @@ void DrmMemoryManager::cleanOsHandles(OsHandleStorage &handleStorage, uint32_t r
}
bool DrmMemoryManager::setDomainCpu(GraphicsAllocation &graphicsAllocation, bool writeEnable) {
DEBUG_BREAK_IF(writeEnable); //unsupported path (for CPU writes call SW_FINISH ioctl in unlockResource)
DEBUG_BREAK_IF(writeEnable); // unsupported path (for CPU writes call SW_FINISH ioctl in unlockResource)
auto bo = static_cast<DrmAllocation *>(&graphicsAllocation)->getBO();
if (bo == nullptr)
@@ -1115,8 +1117,8 @@ void *DrmMemoryManager::lockResourceInLocalMemoryImpl(GraphicsAllocation &graphi
auto addr = lockResourceInLocalMemoryImpl(bo);
auto alignedAddr = alignUp(addr, MemoryConstants::pageSize64k);
auto notUsedSize = ptrDiff(alignedAddr, addr);
//call unmap to free the unaligned pages preceding the BO allocation and
//adjust the pointer in the CPU mapping to the beginning of the BO allocation
// call unmap to free the unaligned pages preceding the BO allocation and
// adjust the pointer in the CPU mapping to the beginning of the BO allocation
munmapFunction(addr, notUsedSize);
bo->setLockedAddress(alignedAddr);
return bo->peekLockedAddress();
@@ -1196,8 +1198,8 @@ void createColouredGmms(GmmClientContext *clientContext, DrmAllocation &allocati
0u,
false,
compression,
false,
limitedStorageInfo);
limitedStorageInfo,
true);
allocation.setGmm(gmm, handleId);
}
}
@@ -1208,7 +1210,7 @@ void fillGmmsInAllocation(GmmClientContext *clientContext, DrmAllocation *alloca
StorageInfo limitedStorageInfo = storageInfo;
limitedStorageInfo.memoryBanks &= 1u << handleId;
limitedStorageInfo.pageTablesVisibility &= 1u << handleId;
auto gmm = new Gmm(clientContext, nullptr, alignedSize, 0u, false, false, false, limitedStorageInfo);
auto gmm = new Gmm(clientContext, nullptr, alignedSize, 0u, false, false, limitedStorageInfo, true);
allocation->setGmm(gmm, handleId);
}
}
@@ -1279,8 +1281,8 @@ GraphicsAllocation *DrmMemoryManager::allocateGraphicsMemoryInDevicePool(const A
0u,
allocationData.flags.uncacheable,
allocationData.flags.preferCompressed,
false,
allocationData.storageInfo);
allocationData.storageInfo,
true);
}
}
@@ -1396,8 +1398,8 @@ bool DrmMemoryManager::createDrmAllocation(Drm *drm, DrmAllocation *allocation,
}
uint32_t memoryBanks = static_cast<uint32_t>(storageInfo.memoryBanks.to_ulong());
if (storageInfo.getNumBanks() > 1) {
//check if we have this bank, if not move to next one
//we may have holes in memoryBanks that we need to skip i.e. memoryBanks 1101 and 3 handle allocation
// check if we have this bank, if not move to next one
// we may have holes in memoryBanks that we need to skip i.e. memoryBanks 1101 and 3 handle allocation
while (!(memoryBanks & (1u << currentBank))) {
currentBank++;
}

View File

@@ -69,7 +69,9 @@ GraphicsAllocation *WddmMemoryManager::allocateMemoryByKMD(const AllocationData
if (allocationData.size > getHugeGfxMemoryChunkSize(GfxMemoryAllocationMethod::AllocateByKmd)) {
return allocateHugeGraphicsMemory(allocationData, false);
}
auto gmm = std::make_unique<Gmm>(executionEnvironment.rootDeviceEnvironments[allocationData.rootDeviceIndex]->getGmmClientContext(), allocationData.hostPtr, allocationData.size, 0u, false);
StorageInfo systemMemoryStorageInfo = {};
auto gmm = std::make_unique<Gmm>(executionEnvironment.rootDeviceEnvironments[allocationData.rootDeviceIndex]->getGmmClientContext(), allocationData.hostPtr, allocationData.size, 0u,
false, false, systemMemoryStorageInfo, true);
auto allocation = std::make_unique<WddmAllocation>(allocationData.rootDeviceIndex,
1u, // numGmms
allocationData.type, nullptr, allocationData.size, nullptr,
@@ -121,7 +123,7 @@ GraphicsAllocation *WddmMemoryManager::allocateGraphicsMemoryUsingKmdAndMapItToC
auto gmm = new Gmm(executionEnvironment.rootDeviceEnvironments[allocationData.rootDeviceIndex]->getGmmClientContext(), nullptr,
sizeAligned, 0u,
allocationData.flags.uncacheable,
allocationData.flags.preferCompressed, true,
allocationData.flags.preferCompressed,
allocationData.storageInfo,
allowLargePages);
wddmAllocation->setDefaultGmm(gmm);
@@ -186,7 +188,7 @@ GraphicsAllocation *WddmMemoryManager::allocateHugeGraphicsMemory(const Allocati
for (auto gmmId = 0u; gmmId < numGmms; ++gmmId) {
auto size = sizeRemaining > chunkSize ? chunkSize : sizeRemaining;
auto gmm = new Gmm(executionEnvironment.rootDeviceEnvironments[allocationData.rootDeviceIndex]->getGmmClientContext(),
static_cast<char *>(alignedPtr) + gmmId * chunkSize, size, 0u, uncacheable);
static_cast<char *>(alignedPtr) + gmmId * chunkSize, size, 0u, uncacheable, false, {}, true);
wddmAllocation->setGmm(gmm, gmmId);
sizeRemaining -= size;
}
@@ -240,12 +242,12 @@ GraphicsAllocation *WddmMemoryManager::allocateSystemMemoryAndCreateGraphicsAllo
wddmAllocation->setDriverAllocatedCpuPtr(pSysMem);
gmm = new Gmm(executionEnvironment.rootDeviceEnvironments[allocationData.rootDeviceIndex]->getGmmClientContext(), pSysMem, sizeAligned, 0u,
allocationData.flags.uncacheable, allocationData.flags.preferCompressed, true, allocationData.storageInfo);
allocationData.flags.uncacheable, allocationData.flags.preferCompressed, allocationData.storageInfo, true);
wddmAllocation->setDefaultGmm(gmm);
void *mapPtr = wddmAllocation->getAlignedCpuPtr();
if (allocationData.type == AllocationType::SVM_CPU) {
//add padding in case mapPtr is not aligned
// add padding in case mapPtr is not aligned
size_t reserveSizeAligned = sizeAligned + allocationData.alignment;
bool ret = getWddm(wddmAllocation->getRootDeviceIndex()).reserveValidAddressRange(reserveSizeAligned, mapPtr);
if (!ret) {
@@ -284,7 +286,7 @@ GraphicsAllocation *WddmMemoryManager::allocateGraphicsMemoryForNonSvmHostPtr(co
auto offsetInPage = ptrDiff(allocationData.hostPtr, alignedPtr);
wddmAllocation->setAllocationOffset(offsetInPage);
auto gmm = new Gmm(executionEnvironment.rootDeviceEnvironments[allocationData.rootDeviceIndex]->getGmmClientContext(), alignedPtr, alignedSize, 0u, false);
auto gmm = new Gmm(executionEnvironment.rootDeviceEnvironments[allocationData.rootDeviceIndex]->getGmmClientContext(), alignedPtr, alignedSize, 0u, false, false, {}, true);
wddmAllocation->setDefaultGmm(gmm);
@@ -320,7 +322,7 @@ GraphicsAllocation *WddmMemoryManager::allocateGraphicsMemoryWithHostPtr(const A
maxOsContextCount);
allocation->setAllocationOffset(offset);
Gmm *gmm = new Gmm(executionEnvironment.rootDeviceEnvironments[allocationData.rootDeviceIndex]->getGmmClientContext(), ptrAligned, sizeAligned, 0u, false);
Gmm *gmm = new Gmm(executionEnvironment.rootDeviceEnvironments[allocationData.rootDeviceIndex]->getGmmClientContext(), ptrAligned, sizeAligned, 0u, false, false, {}, true);
allocation->setDefaultGmm(gmm);
if (createWddmAllocation(allocation, reserve)) {
return allocation;
@@ -364,7 +366,7 @@ GraphicsAllocation *WddmMemoryManager::allocate32BitGraphicsMemoryImpl(const All
wddmAllocation->setAllocationOffset(offset);
wddmAllocation->allocInFrontWindowPool = allocationData.flags.use32BitFrontWindow;
gmm = new Gmm(executionEnvironment.rootDeviceEnvironments[allocationData.rootDeviceIndex]->getGmmClientContext(), ptrAligned, sizeAligned, 0u, false);
gmm = new Gmm(executionEnvironment.rootDeviceEnvironments[allocationData.rootDeviceIndex]->getGmmClientContext(), ptrAligned, sizeAligned, 0u, false, false, {}, true);
wddmAllocation->setDefaultGmm(gmm);
if (!createWddmAllocation(wddmAllocation.get(), nullptr)) {
@@ -594,7 +596,7 @@ MemoryManager::AllocationStatus WddmMemoryManager::populateOsHandles(OsHandleSto
handleStorage.fragmentStorageData[i].residency = new ResidencyData(maxOsContextCount);
osHandle->gmm = new Gmm(executionEnvironment.rootDeviceEnvironments[rootDeviceIndex]->getGmmClientContext(), handleStorage.fragmentStorageData[i].cpuPtr,
handleStorage.fragmentStorageData[i].fragmentSize, 0u, false);
handleStorage.fragmentStorageData[i].fragmentSize, 0u, false, false, {}, true);
allocatedFragmentIndexes[allocatedFragmentsCounter] = i;
allocatedFragmentsCounter++;
}
@@ -800,26 +802,26 @@ void WddmMemoryManager::releaseReservedCpuAddressRange(void *reserved, size_t si
}
bool WddmMemoryManager::isCpuCopyRequired(const void *ptr) {
//check if any device support local memory
// check if any device support local memory
if (std::all_of(this->localMemorySupported.begin(), this->localMemorySupported.end(), [](bool value) { return !value; })) {
return false;
}
//function checks what is the delta between reading from cachead memory
//compare to reading from provided pointer
//if value is above threshold, it means that pointer is uncached.
// function checks what is the delta between reading from cachead memory
// compare to reading from provided pointer
// if value is above threshold, it means that pointer is uncached.
constexpr auto slownessFactor = 50u;
static int64_t meassurmentOverhead = std::numeric_limits<int64_t>::max();
static int64_t fastestLocalRead = std::numeric_limits<int64_t>::max();
//local variable that we will read for comparison
// local variable that we will read for comparison
int cacheable = 1;
volatile int *localVariablePointer = &cacheable;
volatile const int *volatileInputPtr = static_cast<volatile const int *>(ptr);
int64_t timestamp0, timestamp1, localVariableReadDelta, inputPointerReadDelta;
//compute timing overhead
// compute timing overhead
_mm_lfence();
timestamp0 = __rdtsc();
_mm_lfence();
@@ -830,13 +832,13 @@ bool WddmMemoryManager::isCpuCopyRequired(const void *ptr) {
meassurmentOverhead = timestamp1 - timestamp0;
}
//dummy read
// dummy read
cacheable = *localVariablePointer;
_mm_lfence();
timestamp0 = __rdtsc();
_mm_lfence();
//do read
// do read
cacheable = *localVariablePointer;
_mm_lfence();
timestamp1 = __rdtsc();
@@ -848,7 +850,7 @@ bool WddmMemoryManager::isCpuCopyRequired(const void *ptr) {
if (localVariableReadDelta < fastestLocalRead) {
fastestLocalRead = localVariableReadDelta;
}
//dummy read
// dummy read
cacheable = *volatileInputPtr;
_mm_lfence();
@@ -919,8 +921,7 @@ void createColouredGmms(GmmClientContext *clientContext, WddmAllocation &allocat
0u,
false,
compression,
false,
limitedStorageInfo);
limitedStorageInfo, true);
allocation.setGmm(gmm, handleId);
}
}
@@ -930,7 +931,7 @@ void fillGmmsInAllocation(GmmClientContext *clientContext, WddmAllocation *alloc
StorageInfo limitedStorageInfo = storageInfo;
limitedStorageInfo.memoryBanks &= static_cast<uint32_t>(1u << handleId);
limitedStorageInfo.pageTablesVisibility &= static_cast<uint32_t>(1u << handleId);
auto gmm = new Gmm(clientContext, nullptr, allocation->getAlignedSize(), 0u, false, false, false, limitedStorageInfo);
auto gmm = new Gmm(clientContext, nullptr, allocation->getAlignedSize(), 0u, false, false, limitedStorageInfo, true);
allocation->setGmm(gmm, handleId);
}
}
@@ -939,7 +940,7 @@ void splitGmmsInAllocation(GmmClientContext *clientContext, WddmAllocation *wddm
auto sizeRemaining = wddmAllocation->getAlignedSize();
for (auto gmmId = 0u; gmmId < wddmAllocation->getNumGmms(); ++gmmId) {
auto size = sizeRemaining > chunkSize ? chunkSize : sizeRemaining;
auto gmm = new Gmm(clientContext, nullptr, size, alignment, false, false, false, storageInfo);
auto gmm = new Gmm(clientContext, nullptr, size, alignment, false, false, storageInfo, true);
wddmAllocation->setGmm(gmm, gmmId);
sizeRemaining -= size;
}
@@ -989,8 +990,8 @@ GraphicsAllocation *WddmMemoryManager::allocateGraphicsMemoryInDevicePool(const
alignment,
allocationData.flags.uncacheable,
allocationData.flags.preferCompressed,
false,
allocationData.storageInfo);
allocationData.storageInfo,
true);
}
}
@@ -1027,8 +1028,8 @@ GraphicsAllocation *WddmMemoryManager::allocateGraphicsMemoryInDevicePool(const
allocationData.type == AllocationType::SCRATCH_SURFACE ||
allocationData.type == AllocationType::LINEAR_STREAM ||
allocationData.type == AllocationType::PRIVATE_SURFACE) {
//TODO : FOR DG2 device make sure that scratch doesn't go here
//add 2MB padding to make sure there are no overlaps between system and local memory
// TODO : FOR DG2 device make sure that scratch doesn't go here
// add 2MB padding to make sure there are no overlaps between system and local memory
size_t reserveSizeAligned = sizeAligned + 2 * MemoryConstants::megaByte;
wddm.reserveValidAddressRange(reserveSizeAligned, requiredGpuVa);
wddmAllocation->setReservedAddressRange(requiredGpuVa, reserveSizeAligned);