Refactor USM properties

store reference to std of root device indices and device bitfields
store NEO::Device in USM properties

Related-To: NEO-3691
Signed-off-by: Mateusz Jablonski <mateusz.jablonski@intel.com>
This commit is contained in:
Mateusz Jablonski
2020-12-23 13:47:18 +00:00
committed by Compute-Runtime-Automation
parent 06dad67c5a
commit 1b7d7afc07
27 changed files with 312 additions and 246 deletions

View File

@@ -105,45 +105,44 @@ void SVMAllocsManager::makeInternalAllocationsResident(CommandStreamReceiver &co
SVMAllocsManager::SVMAllocsManager(MemoryManager *memoryManager) : memoryManager(memoryManager) {
}
void *SVMAllocsManager::createSVMAlloc(uint32_t rootDeviceIndex, size_t size, const SvmAllocationProperties svmProperties, const DeviceBitfield &deviceBitfield) {
void *SVMAllocsManager::createSVMAlloc(size_t size, const SvmAllocationProperties svmProperties,
const std::set<uint32_t> &rootDeviceIndices,
const std::map<uint32_t, DeviceBitfield> &subdeviceBitfields) {
if (size == 0)
return nullptr;
if (!memoryManager->isLocalMemorySupported(rootDeviceIndex)) {
return createZeroCopySvmAllocation(rootDeviceIndex, size, svmProperties, deviceBitfield);
if (!memoryManager->isLocalMemorySupported(*rootDeviceIndices.begin())) {
return createZeroCopySvmAllocation(size, svmProperties, rootDeviceIndices, subdeviceBitfields);
} else {
UnifiedMemoryProperties unifiedMemoryProperties(InternalMemoryType::NOT_SPECIFIED, deviceBitfield);
return createUnifiedAllocationWithDeviceStorage(rootDeviceIndex, size, svmProperties, unifiedMemoryProperties);
UnifiedMemoryProperties unifiedMemoryProperties(InternalMemoryType::NOT_SPECIFIED, rootDeviceIndices, subdeviceBitfields);
return createUnifiedAllocationWithDeviceStorage(size, svmProperties, unifiedMemoryProperties);
}
}
void *SVMAllocsManager::createHostUnifiedMemoryAllocation(uint32_t maxRootDeviceIndex,
size_t size,
void *SVMAllocsManager::createHostUnifiedMemoryAllocation(size_t size,
const UnifiedMemoryProperties &memoryProperties) {
size_t alignedSize = alignUp<size_t>(size, MemoryConstants::pageSize64k);
GraphicsAllocation::AllocationType allocationType = GraphicsAllocation::AllocationType::BUFFER_HOST_MEMORY;
std::vector<uint32_t> rootDeviceIndices;
rootDeviceIndices.reserve(maxRootDeviceIndex + 1);
for (auto rootDeviceIndex = 0u; rootDeviceIndex <= maxRootDeviceIndex; rootDeviceIndex++) {
rootDeviceIndices.push_back(rootDeviceIndex);
}
std::vector<uint32_t> rootDeviceIndicesVector(memoryProperties.rootDeviceIndices.begin(), memoryProperties.rootDeviceIndices.end());
uint32_t rootDeviceIndex = rootDeviceIndices.at(0);
uint32_t rootDeviceIndex = rootDeviceIndicesVector.at(0);
auto deviceBitfield = memoryProperties.subdeviceBitfields.at(rootDeviceIndex);
AllocationProperties unifiedMemoryProperties{rootDeviceIndex,
true,
alignedSize,
allocationType,
memoryProperties.subdeviceBitfield.count() > 1,
memoryProperties.subdeviceBitfield.count() > 1,
memoryProperties.subdeviceBitfield};
deviceBitfield.count() > 1,
deviceBitfield.count() > 1,
deviceBitfield};
unifiedMemoryProperties.flags.shareable = memoryProperties.allocationFlags.flags.shareable;
auto maxRootDeviceIndex = *std::max_element(rootDeviceIndicesVector.begin(), rootDeviceIndicesVector.end(), std::less<uint32_t const>());
SvmAllocationData allocData(maxRootDeviceIndex);
void *usmPtr = memoryManager->createMultiGraphicsAllocation(rootDeviceIndices, unifiedMemoryProperties, allocData.gpuAllocations);
void *usmPtr = memoryManager->createMultiGraphicsAllocation(rootDeviceIndicesVector, unifiedMemoryProperties, allocData.gpuAllocations);
if (!usmPtr) {
return nullptr;
}
@@ -160,9 +159,12 @@ void *SVMAllocsManager::createHostUnifiedMemoryAllocation(uint32_t maxRootDevice
return usmPtr;
}
void *SVMAllocsManager::createUnifiedMemoryAllocation(uint32_t rootDeviceIndex,
size_t size,
void *SVMAllocsManager::createUnifiedMemoryAllocation(size_t size,
const UnifiedMemoryProperties &memoryProperties) {
auto rootDeviceIndex = memoryProperties.device
? memoryProperties.device->getRootDeviceIndex()
: *memoryProperties.rootDeviceIndices.begin();
auto deviceBitfield = memoryProperties.subdeviceBitfields.at(rootDeviceIndex);
size_t alignedSize = alignUp<size_t>(size, MemoryConstants::pageSize64k);
@@ -172,9 +174,9 @@ void *SVMAllocsManager::createUnifiedMemoryAllocation(uint32_t rootDeviceIndex,
true,
alignedSize,
allocationType,
memoryProperties.subdeviceBitfield.count() > 1,
memoryProperties.subdeviceBitfield.count() > 1,
memoryProperties.subdeviceBitfield};
deviceBitfield.count() > 1,
deviceBitfield.count() > 1,
deviceBitfield};
unifiedMemoryProperties.flags.shareable = memoryProperties.allocationFlags.flags.shareable;
unifiedMemoryProperties.flags.isUSMDeviceAllocation = true;
@@ -202,11 +204,10 @@ void *SVMAllocsManager::createUnifiedMemoryAllocation(uint32_t rootDeviceIndex,
return reinterpret_cast<void *>(unifiedMemoryAllocation->getGpuAddress());
}
void *SVMAllocsManager::createSharedUnifiedMemoryAllocation(uint32_t rootDeviceIndex,
size_t size,
void *SVMAllocsManager::createSharedUnifiedMemoryAllocation(size_t size,
const UnifiedMemoryProperties &memoryProperties,
void *cmdQ) {
auto supportDualStorageSharedMemory = memoryManager->isLocalMemorySupported(rootDeviceIndex);
auto supportDualStorageSharedMemory = memoryManager->isLocalMemorySupported(*memoryProperties.rootDeviceIndices.begin());
if (DebugManager.flags.AllocateSharedAllocationsWithCpuAndGpuStorage.get() != -1) {
supportDualStorageSharedMemory = !!DebugManager.flags.AllocateSharedAllocationsWithCpuAndGpuStorage.get();
@@ -221,12 +222,12 @@ void *SVMAllocsManager::createSharedUnifiedMemoryAllocation(uint32_t rootDeviceI
void *unifiedMemoryPointer = nullptr;
if (useKmdMigration) {
unifiedMemoryPointer = createUnifiedKmdMigratedAllocation(rootDeviceIndex, size, {}, memoryProperties);
unifiedMemoryPointer = createUnifiedKmdMigratedAllocation(size, {}, memoryProperties);
if (!unifiedMemoryPointer) {
return nullptr;
}
} else {
unifiedMemoryPointer = createUnifiedAllocationWithDeviceStorage(rootDeviceIndex, size, {}, memoryProperties);
unifiedMemoryPointer = createUnifiedAllocationWithDeviceStorage(size, {}, memoryProperties);
if (!unifiedMemoryPointer) {
return nullptr;
}
@@ -242,18 +243,23 @@ void *SVMAllocsManager::createSharedUnifiedMemoryAllocation(uint32_t rootDeviceI
return unifiedMemoryPointer;
}
return createUnifiedMemoryAllocation(rootDeviceIndex, size, memoryProperties);
return createUnifiedMemoryAllocation(size, memoryProperties);
}
void *SVMAllocsManager::createUnifiedKmdMigratedAllocation(uint32_t rootDeviceIndex, size_t size, const SvmAllocationProperties &svmProperties, const UnifiedMemoryProperties &unifiedMemoryProperties) {
void *SVMAllocsManager::createUnifiedKmdMigratedAllocation(size_t size, const SvmAllocationProperties &svmProperties, const UnifiedMemoryProperties &unifiedMemoryProperties) {
auto rootDeviceIndex = unifiedMemoryProperties.device
? unifiedMemoryProperties.device->getRootDeviceIndex()
: *unifiedMemoryProperties.rootDeviceIndices.begin();
auto deviceBitfield = unifiedMemoryProperties.subdeviceBitfields.at(rootDeviceIndex);
size_t alignedSize = alignUp<size_t>(size, 2 * MemoryConstants::megaByte);
AllocationProperties gpuProperties{rootDeviceIndex,
true,
alignedSize,
GraphicsAllocation::AllocationType::UNIFIED_SHARED_MEMORY,
unifiedMemoryProperties.subdeviceBitfield.count() > 1,
deviceBitfield.count() > 1,
false,
unifiedMemoryProperties.subdeviceBitfield};
deviceBitfield};
gpuProperties.alignment = 2 * MemoryConstants::megaByte;
MemoryPropertiesHelper::fillCachePolicyInProperties(gpuProperties, false, svmProperties.readOnly, false);
@@ -319,7 +325,12 @@ bool SVMAllocsManager::freeSVMAlloc(void *ptr, bool blocking) {
return false;
}
void *SVMAllocsManager::createZeroCopySvmAllocation(uint32_t rootDeviceIndex, size_t size, const SvmAllocationProperties &svmProperties, const DeviceBitfield &deviceBitfield) {
void *SVMAllocsManager::createZeroCopySvmAllocation(size_t size, const SvmAllocationProperties &svmProperties,
const std::set<uint32_t> &rootDeviceIndices,
const std::map<uint32_t, DeviceBitfield> &subdeviceBitfields) {
auto rootDeviceIndex = *rootDeviceIndices.begin();
auto deviceBitfield = subdeviceBitfields.at(rootDeviceIndex);
AllocationProperties properties{rootDeviceIndex,
true, // allocateMemory
size,
@@ -343,13 +354,16 @@ void *SVMAllocsManager::createZeroCopySvmAllocation(uint32_t rootDeviceIndex, si
return allocation->getUnderlyingBuffer();
}
void *SVMAllocsManager::createUnifiedAllocationWithDeviceStorage(uint32_t rootDeviceIndex, size_t size, const SvmAllocationProperties &svmProperties, const UnifiedMemoryProperties &unifiedMemoryProperties) {
void *SVMAllocsManager::createUnifiedAllocationWithDeviceStorage(size_t size, const SvmAllocationProperties &svmProperties, const UnifiedMemoryProperties &unifiedMemoryProperties) {
auto rootDeviceIndex = unifiedMemoryProperties.device
? unifiedMemoryProperties.device->getRootDeviceIndex()
: *unifiedMemoryProperties.rootDeviceIndices.begin();
size_t alignedSize = alignUp<size_t>(size, 2 * MemoryConstants::megaByte);
AllocationProperties cpuProperties{rootDeviceIndex,
true, // allocateMemory
alignedSize, GraphicsAllocation::AllocationType::SVM_CPU,
false, // isMultiStorageAllocation
unifiedMemoryProperties.subdeviceBitfield};
unifiedMemoryProperties.subdeviceBitfields.at(rootDeviceIndex)};
cpuProperties.alignment = 2 * MemoryConstants::megaByte;
MemoryPropertiesHelper::fillCachePolicyInProperties(cpuProperties, false, svmProperties.readOnly, false);
GraphicsAllocation *allocationCpu = memoryManager->allocateGraphicsMemoryWithProperties(cpuProperties);
@@ -363,9 +377,9 @@ void *SVMAllocsManager::createUnifiedAllocationWithDeviceStorage(uint32_t rootDe
false,
alignedSize,
GraphicsAllocation::AllocationType::SVM_GPU,
unifiedMemoryProperties.subdeviceBitfield.count() > 1,
unifiedMemoryProperties.subdeviceBitfields.at(rootDeviceIndex).count() > 1,
false,
unifiedMemoryProperties.subdeviceBitfield};
unifiedMemoryProperties.subdeviceBitfields.at(rootDeviceIndex)};
gpuProperties.alignment = 2 * MemoryConstants::megaByte;
MemoryPropertiesHelper::fillCachePolicyInProperties(gpuProperties, false, svmProperties.readOnly, false);

View File

@@ -17,11 +17,13 @@
#include <cstdint>
#include <map>
#include <mutex>
#include <set>
namespace NEO {
class CommandStreamReceiver;
class GraphicsAllocation;
class MemoryManager;
class Device;
struct SvmAllocationData {
SvmAllocationData(uint32_t maxRootDeviceIndex) : gpuAllocations(maxRootDeviceIndex), maxRootDeviceIndex(maxRootDeviceIndex){};
@@ -43,7 +45,7 @@ struct SvmAllocationData {
size_t size = 0;
InternalMemoryType memoryType = InternalMemoryType::SVM;
MemoryProperties allocationFlagsProperty;
void *device = nullptr;
Device *device = nullptr;
protected:
const uint32_t maxRootDeviceIndex;
@@ -91,31 +93,32 @@ class SVMAllocsManager {
};
struct UnifiedMemoryProperties {
UnifiedMemoryProperties(InternalMemoryType memoryType, DeviceBitfield subdeviceBitfield) : memoryType(memoryType), subdeviceBitfield(subdeviceBitfield){};
UnifiedMemoryProperties(InternalMemoryType memoryType,
const std::set<uint32_t> &rootDeviceIndices,
const std::map<uint32_t, DeviceBitfield> &subdeviceBitfields) : memoryType(memoryType),
rootDeviceIndices(rootDeviceIndices),
subdeviceBitfields(subdeviceBitfields){};
InternalMemoryType memoryType = InternalMemoryType::NOT_SPECIFIED;
MemoryProperties allocationFlags;
void *device = nullptr;
DeviceBitfield subdeviceBitfield;
Device *device = nullptr;
const std::set<uint32_t> &rootDeviceIndices;
const std::map<uint32_t, DeviceBitfield> &subdeviceBitfields;
};
SVMAllocsManager(MemoryManager *memoryManager);
MOCKABLE_VIRTUAL ~SVMAllocsManager() = default;
void *createSVMAlloc(uint32_t rootDeviceIndex,
size_t size,
void *createSVMAlloc(size_t size,
const SvmAllocationProperties svmProperties,
const DeviceBitfield &deviceBitfield);
void *createHostUnifiedMemoryAllocation(uint32_t maxRootDeviceIndex,
size_t size,
const std::set<uint32_t> &rootDeviceIndices,
const std::map<uint32_t, DeviceBitfield> &subdeviceBitfields);
void *createHostUnifiedMemoryAllocation(size_t size,
const UnifiedMemoryProperties &svmProperties);
void *createUnifiedMemoryAllocation(uint32_t rootDeviceIndex,
size_t size,
void *createUnifiedMemoryAllocation(size_t size,
const UnifiedMemoryProperties &svmProperties);
void *createSharedUnifiedMemoryAllocation(uint32_t rootDeviceIndex,
size_t size,
void *createSharedUnifiedMemoryAllocation(size_t size,
const UnifiedMemoryProperties &svmProperties,
void *cmdQ);
void *createUnifiedKmdMigratedAllocation(uint32_t rootDeviceIndex,
size_t size,
void *createUnifiedKmdMigratedAllocation(size_t size,
const SvmAllocationProperties &svmProperties,
const UnifiedMemoryProperties &unifiedMemoryProperties);
void setUnifiedAllocationProperties(GraphicsAllocation *allocation, const SvmAllocationProperties &svmProperties);
@@ -134,11 +137,13 @@ class SVMAllocsManager {
ResidencyContainer &residencyContainer,
uint32_t requestedTypesMask);
void makeInternalAllocationsResident(CommandStreamReceiver &commandStreamReceiver, uint32_t requestedTypesMask);
void *createUnifiedAllocationWithDeviceStorage(uint32_t rootDeviceIndex, size_t size, const SvmAllocationProperties &svmProperties, const UnifiedMemoryProperties &unifiedMemoryProperties);
void *createUnifiedAllocationWithDeviceStorage(size_t size, const SvmAllocationProperties &svmProperties, const UnifiedMemoryProperties &unifiedMemoryProperties);
void freeSvmAllocationWithDeviceStorage(SvmAllocationData *svmData);
protected:
void *createZeroCopySvmAllocation(uint32_t rootDeviceIndex, size_t size, const SvmAllocationProperties &svmProperties, const DeviceBitfield &deviceBitfield);
void *createZeroCopySvmAllocation(size_t size, const SvmAllocationProperties &svmProperties,
const std::set<uint32_t> &rootDeviceIndices,
const std::map<uint32_t, DeviceBitfield> &subdeviceBitfields);
GraphicsAllocation::AllocationType getGraphicsAllocationType(const UnifiedMemoryProperties &unifiedMemoryProperties) const;
void freeZeroCopySvmAllocation(SvmAllocationData *svmData);

View File

@@ -22,6 +22,8 @@ GraphicsAllocation *allocateGlobalsSurface(NEO::SVMAllocsManager *const svmAlloc
LinkerInput *const linkerInput, const void *initData) {
bool globalsAreExported = false;
GraphicsAllocation *gpuAllocation = nullptr;
auto rootDeviceIndex = device.getRootDeviceIndex();
auto deviceBitfield = device.getDeviceBitfield();
if (linkerInput != nullptr) {
globalsAreExported = constant ? linkerInput->getTraits().exportsGlobalConstants : linkerInput->getTraits().exportsGlobalVariables;
@@ -32,21 +34,26 @@ GraphicsAllocation *allocateGlobalsSurface(NEO::SVMAllocsManager *const svmAlloc
svmProps.coherent = false;
svmProps.readOnly = constant;
svmProps.hostPtrReadOnly = constant;
auto ptr = svmAllocManager->createSVMAlloc(device.getRootDeviceIndex(), size, svmProps, device.getDeviceBitfield());
std::set<uint32_t> rootDeviceIndices;
rootDeviceIndices.insert(rootDeviceIndex);
std::map<uint32_t, DeviceBitfield> subDeviceBitfields;
subDeviceBitfields.insert({rootDeviceIndex, deviceBitfield});
auto ptr = svmAllocManager->createSVMAlloc(size, svmProps, rootDeviceIndices, subDeviceBitfields);
DEBUG_BREAK_IF(ptr == nullptr);
if (ptr == nullptr) {
return nullptr;
}
auto svmAlloc = svmAllocManager->getSVMAlloc(ptr);
UNRECOVERABLE_IF(svmAlloc == nullptr);
gpuAllocation = svmAlloc->gpuAllocations.getGraphicsAllocation(device.getRootDeviceIndex());
gpuAllocation = svmAlloc->gpuAllocations.getGraphicsAllocation(rootDeviceIndex);
} else {
auto allocationType = constant ? GraphicsAllocation::AllocationType::CONSTANT_SURFACE : GraphicsAllocation::AllocationType::GLOBAL_SURFACE;
gpuAllocation = device.getMemoryManager()->allocateGraphicsMemoryWithProperties({device.getRootDeviceIndex(),
gpuAllocation = device.getMemoryManager()->allocateGraphicsMemoryWithProperties({rootDeviceIndex,
true, // allocateMemory
size, allocationType,
false, // isMultiStorageAllocation
device.getDeviceBitfield()});
deviceBitfield});
}
if (!gpuAllocation) {

View File

@@ -353,12 +353,14 @@ TEST_F(PageFaultManagerTest, givenUnifiedMemoryAllocWhenSetAubWritableIsCalledTh
MockExecutionEnvironment executionEnvironment;
REQUIRE_SVM_OR_SKIP(executionEnvironment.rootDeviceEnvironments[0]->getHardwareInfo());
std::set<uint32_t> rootDeviceIndices{mockRootDeviceIndex};
std::map<uint32_t, DeviceBitfield> deviceBitfields{{mockRootDeviceIndex, mockDeviceBitfield}};
void *cmdQ = reinterpret_cast<void *>(0xFFFF);
auto memoryManager = std::make_unique<MockMemoryManager>(executionEnvironment);
auto unifiedMemoryManager = std::make_unique<SVMAllocsManager>(memoryManager.get());
auto properties = SVMAllocsManager::UnifiedMemoryProperties(InternalMemoryType::SHARED_UNIFIED_MEMORY, mockDeviceBitfield);
properties.subdeviceBitfield = mockDeviceBitfield;
void *alloc1 = unifiedMemoryManager->createSharedUnifiedMemoryAllocation(mockRootDeviceIndex, 10, properties, cmdQ);
auto properties = SVMAllocsManager::UnifiedMemoryProperties(InternalMemoryType::SHARED_UNIFIED_MEMORY, rootDeviceIndices, deviceBitfields);
void *alloc1 = unifiedMemoryManager->createSharedUnifiedMemoryAllocation(10, properties, cmdQ);
pageFaultManager->baseAubWritable(false, alloc1, unifiedMemoryManager.get());