feature usm: allocate host USM in HEAP_EXTENDED

Related-To: NEO-7665
Signed-off-by: Mateusz Jablonski <mateusz.jablonski@intel.com>
This commit is contained in:
Mateusz Jablonski
2023-03-20 17:26:31 +00:00
committed by Compute-Runtime-Automation
parent 792d365390
commit 7a03c78b0d
9 changed files with 118 additions and 13 deletions

View File

@@ -370,7 +370,7 @@ DrmAllocation *DrmMemoryManager::allocateGraphicsMemoryWithAlignmentImpl(const A
}
auto drmAllocation = createAllocWithAlignment(allocationData, cSize, mmapAlignment, alignedStorageSize, alignedGpuAddress);
if (drmAllocation != nullptr) {
if (drmAllocation != nullptr && gpuReservationAddress) {
drmAllocation->setReservedAddressRange(reinterpret_cast<void *>(gpuReservationAddress), alignedVirtualAddressRangeSize);
}
@@ -1977,7 +1977,18 @@ DrmAllocation *DrmMemoryManager::createAllocWithAlignment(const AllocationData &
if (useBooMmap) {
auto totalSizeToAlloc = alignedSize + alignment;
auto cpuPointer = this->mmapFunction(0, totalSizeToAlloc, PROT_NONE, MAP_SHARED | MAP_ANONYMOUS, -1, 0);
uint64_t preferredAddress = 0;
auto gfxPartition = getGfxPartition(allocationData.rootDeviceIndex);
if (allocationData.flags.isUSMHostAllocation && gfxPartition->getHeapLimit(HeapIndex::HEAP_EXTENDED) > 0u) {
preferredAddress = acquireGpuRange(totalSizeToAlloc, allocationData.rootDeviceIndex, HeapIndex::HEAP_EXTENDED);
}
auto cpuPointer = this->mmapFunction(reinterpret_cast<void *>(preferredAddress), totalSizeToAlloc, PROT_NONE, MAP_SHARED | MAP_ANONYMOUS, -1, 0);
if (castToUint64(cpuPointer) != preferredAddress) {
releaseGpuRange(reinterpret_cast<void *>(preferredAddress), totalSizeToAlloc, allocationData.rootDeviceIndex);
preferredAddress = 0;
}
auto cpuBasePointer = cpuPointer;
cpuPointer = alignUp(cpuPointer, alignment);
@@ -1987,6 +1998,7 @@ DrmAllocation *DrmMemoryManager::createAllocWithAlignment(const AllocationData &
reinterpret_cast<uintptr_t>(cpuPointer), alignedSize, 0u, maxOsContextCount, -1));
if (!bo) {
releaseGpuRange(reinterpret_cast<void *>(preferredAddress), totalSizeToAlloc, allocationData.rootDeviceIndex);
this->munmapFunction(cpuBasePointer, totalSizeToAlloc);
return nullptr;
}
@@ -1995,6 +2007,7 @@ DrmAllocation *DrmMemoryManager::createAllocWithAlignment(const AllocationData &
auto ioctlHelper = drm.getIoctlHelper();
uint64_t mmapOffsetWb = ioctlHelper->getDrmParamValue(DrmParam::MmapOffsetWb);
if (!retrieveMmapOffsetForBufferObject(allocationData.rootDeviceIndex, *bo, mmapOffsetWb, offset)) {
releaseGpuRange(reinterpret_cast<void *>(preferredAddress), totalSizeToAlloc, allocationData.rootDeviceIndex);
this->munmapFunction(cpuPointer, size);
return nullptr;
}
@@ -2015,7 +2028,11 @@ DrmAllocation *DrmMemoryManager::createAllocWithAlignment(const AllocationData &
}
[[maybe_unused]] int retCode = this->munmapFunction(ptrOffset(cpuPointer, alignedSize), alignment - pointerDiff);
DEBUG_BREAK_IF(retCode != 0);
allocation->setReservedAddressRange(reinterpret_cast<void *>(gpuAddress), alignedSize);
if (preferredAddress) {
allocation->setReservedAddressRange(reinterpret_cast<void *>(preferredAddress), totalSizeToAlloc);
} else {
allocation->setReservedAddressRange(reinterpret_cast<void *>(gpuAddress), alignedSize);
}
if (!allocation->setCacheRegion(&drm, static_cast<CacheRegion>(allocationData.cacheRegion))) {
if (pointerDiff == 0) {
allocation->registerMemoryToUnmap(cpuBasePointer, totalSizeToAlloc, this->munmapFunction);

View File

@@ -13,13 +13,12 @@
#include "shared/test/common/mocks/linux/mock_drm_memory_manager.h"
#include "shared/test/common/mocks/mock_builtins.h"
#include "shared/test/common/mocks/mock_device.h"
#include "shared/test/common/os_interface/linux/sys_calls_linux_ult.h"
#include "hw_cmds_default.h"
namespace NEO {
extern std::vector<void *> mmapVector;
void DrmMemoryManagerBasic::SetUp() {
for (auto i = 0u; i < numRootDevices; i++) {
executionEnvironment.rootDeviceEnvironments[i]->setHwInfoAndInitHelpers(defaultHwInfo.get());
@@ -103,7 +102,7 @@ void DrmMemoryManagerFixture::tearDown() {
mock->testIoctls();
executionEnvironment->decRefInternal();
MemoryManagementFixture::tearDown();
mmapVector.clear();
SysCalls::mmapVector.clear();
}
void DrmMemoryManagerWithLocalMemoryFixture::setUp() {

View File

@@ -22,8 +22,6 @@ class MockDevice;
class TestedDrmMemoryManager;
struct UltHwConfig;
extern std::vector<void *> mmapVector;
class DrmMemoryManagerBasic : public ::testing::Test {
public:
DrmMemoryManagerBasic() : executionEnvironment(defaultHwInfo.get(), false, numRootDevices){};

View File

@@ -26,7 +26,6 @@
#include <system_error>
namespace NEO {
std::vector<void *> mmapVector(64);
namespace SysCalls {
uint32_t closeFuncCalled = 0u;
int closeFuncArgPassed = 0;
@@ -41,14 +40,19 @@ int setErrno = 0;
int fstatFuncRetVal = 0;
uint32_t preadFuncCalled = 0u;
uint32_t pwriteFuncCalled = 0u;
uint32_t mmapFuncCalled = 0u;
uint32_t munmapFuncCalled = 0u;
bool isInvalidAILTest = false;
const char *drmVersion = "i915";
int passedFileDescriptorFlagsToSet = 0;
int getFileDescriptorFlagsCalled = 0;
int setFileDescriptorFlagsCalled = 0;
std::vector<void *> mmapVector(64);
std::vector<void *> mmapCapturedExtendedPointers(64);
bool mmapCaptureExtendedPointers = false;
bool mmapAllowExtendedPointers = false;
uint32_t mmapFuncCalled = 0u;
uint32_t munmapFuncCalled = 0u;
int (*sysCallsOpen)(const char *pathname, int flags) = nullptr;
ssize_t (*sysCallsPread)(int fd, void *buf, size_t count, off_t offset) = nullptr;
int (*sysCallsReadlink)(const char *path, char *buf, size_t bufsize) = nullptr;
@@ -191,6 +195,14 @@ ssize_t pwrite(int fd, const void *buf, size_t count, off_t offset) {
void *mmap(void *addr, size_t size, int prot, int flags, int fd, off_t off) noexcept {
mmapFuncCalled++;
if (reinterpret_cast<uint64_t>(addr) > maxNBitValue(48)) {
if (mmapCaptureExtendedPointers) {
mmapCapturedExtendedPointers.push_back(addr);
}
if (!mmapAllowExtendedPointers) {
addr = nullptr;
}
}
if (addr) {
return addr;
}

View File

@@ -10,6 +10,7 @@
#include <iostream>
#include <poll.h>
#include <sys/stat.h>
#include <vector>
namespace NEO {
namespace SysCalls {
@@ -29,5 +30,12 @@ extern int passedFileDescriptorFlagsToSet;
extern int getFileDescriptorFlagsCalled;
extern int setFileDescriptorFlagsCalled;
extern uint32_t closeFuncCalled;
extern std::vector<void *> mmapVector;
extern std::vector<void *> mmapCapturedExtendedPointers;
extern bool mmapCaptureExtendedPointers;
extern bool mmapAllowExtendedPointers;
extern uint32_t mmapFuncCalled;
extern uint32_t munmapFuncCalled;
} // namespace SysCalls
} // namespace NEO

View File

@@ -11,12 +11,16 @@
namespace NEO {
namespace SysCalls {
extern bool mmapAllowExtendedPointers;
}
BOOL WINAPI ULTVirtualFree(LPVOID ptr, SIZE_T size, DWORD flags) {
return 1;
}
LPVOID WINAPI ULTVirtualAlloc(LPVOID inPtr, SIZE_T size, DWORD flags, DWORD type) {
if (castToUint64(inPtr) > maxNBitValue(48)) {
if (castToUint64(inPtr) > maxNBitValue(48) && SysCalls::mmapAllowExtendedPointers) {
return inPtr;
}
return reinterpret_cast<LPVOID>(virtualAllocAddress);

View File

@@ -1,5 +1,5 @@
/*
* Copyright (C) 2018-2022 Intel Corporation
* Copyright (C) 2018-2023 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@@ -31,6 +31,7 @@ uint32_t regQueryValueSuccessCount = 0u;
uint64_t regQueryValueExpectedData = 0ull;
const HKEY validHkey = reinterpret_cast<HKEY>(0);
bool getNumThreadsCalled = false;
bool mmapAllowExtendedPointers = false;
HANDLE createEvent(LPSECURITY_ATTRIBUTES lpEventAttributes, BOOL bManualReset, BOOL bInitialState, LPCSTR lpName) {
if (mockCreateEventClb) {

View File

@@ -10,12 +10,19 @@
#include "shared/source/helpers/ptr_math.h"
#include "shared/source/os_interface/os_memory.h"
#include "shared/source/utilities/cpu_info.h"
#include "shared/test/common/helpers/variable_backup.h"
#include "shared/test/common/mocks/mock_gfx_partition.h"
#include "gtest/gtest.h"
#include <mutex>
namespace NEO {
namespace SysCalls {
extern bool mmapAllowExtendedPointers;
}
} // namespace NEO
static std::string mockCpuFlags;
static void mockGetCpuFlagsFunc(std::string &cpuFlags) { cpuFlags = mockCpuFlags; }
static void (*getCpuFlagsFuncSave)(std::string &) = nullptr;
@@ -855,6 +862,7 @@ TEST(GfxPartitionTest, givenGpuAddressSpaceIs57BitAndSeveralRootDevicesThenHeapE
{
// 57 bit CPU VA, la57 flag is present
CpuInfoOverrideVirtualAddressSizeAndFlags overrideCpuInfo(57, "la57");
VariableBackup<bool> backupAllowExtendedPointers(&SysCalls::mmapAllowExtendedPointers, true);
MockGfxPartition gfxPartition;
EXPECT_TRUE(gfxPartition.init(maxNBitValue(57), reservedCpuAddressRangeSize, rootDeviceIndex, numRootDevices));

View File

@@ -28,6 +28,7 @@
#include "shared/test/common/mocks/mock_host_ptr_manager.h"
#include "shared/test/common/os_interface/linux/drm_memory_manager_fixture.h"
#include "shared/test/common/os_interface/linux/drm_mock_cache_info.h"
#include "shared/test/common/os_interface/linux/sys_calls_linux_ult.h"
#include "shared/test/common/test_macros/hw_test.h"
#include "drm/i915_drm.h"
@@ -6257,3 +6258,60 @@ TEST_F(DrmMemoryManagerWithExplicitExpectationsTest, givenDrmMemoryManagerWhenGp
EXPECT_GT(memoryManager->getGfxPartition(1)->getHeapLimit(HeapIndex::HEAP_STANDARD), gmmHelper->decanonize(addressRange.address));
memoryManager->freeGpuAddress(addressRange, 1);
}
TEST_F(DrmMemoryManagerTest, given57bAddressSpaceCpuAndGpuWhenAllocatingHostUSMThenAddressFromExtendedHeapIsPassedAsHintAndSetAsGpuAddressAndReservedAddress) {
if (defaultHwInfo->capabilityTable.gpuAddressSpace < maxNBitValue(57)) {
GTEST_SKIP();
}
VariableBackup<bool> backupCaptureExtendedPointers(&SysCalls::mmapCaptureExtendedPointers, true);
VariableBackup<bool> backupAllowExtendedPointers(&SysCalls::mmapAllowExtendedPointers, true);
SysCalls::mmapCapturedExtendedPointers.clear();
std::vector<MemoryRegion> regionInfo(1);
regionInfo[0].region = {drm_i915_gem_memory_class::I915_MEMORY_CLASS_SYSTEM, 0};
auto &drm = static_cast<DrmMockCustom &>(memoryManager->getDrm(mockRootDeviceIndex));
drm.memoryInfo.reset(new MemoryInfo(regionInfo, drm));
AllocationProperties allocationProperties(mockRootDeviceIndex, MemoryConstants::cacheLineSize, AllocationType::SVM_CPU, {});
allocationProperties.flags.isUSMHostAllocation = true;
auto hostUSM = memoryManager->allocateGraphicsMemoryInPreferredPool(allocationProperties, nullptr);
EXPECT_NE(nullptr, hostUSM);
EXPECT_EQ(2u, SysCalls::mmapCapturedExtendedPointers.size());
auto gpuAddress = reinterpret_cast<uint64_t>(SysCalls::mmapCapturedExtendedPointers[0]);
SysCalls::mmapCapturedExtendedPointers.clear();
auto gmmHelper = memoryManager->getGmmHelper(mockRootDeviceIndex);
EXPECT_LE(memoryManager->getGfxPartition(mockRootDeviceIndex)->getHeapBase(HeapIndex::HEAP_EXTENDED), gmmHelper->decanonize(gpuAddress));
EXPECT_GT(memoryManager->getGfxPartition(mockRootDeviceIndex)->getHeapLimit(HeapIndex::HEAP_EXTENDED), gmmHelper->decanonize(gpuAddress));
EXPECT_EQ(hostUSM->getGpuAddress(), gpuAddress);
EXPECT_EQ(hostUSM->getReservedAddressPtr(), reinterpret_cast<void *>(gpuAddress));
memoryManager->freeGraphicsMemory(hostUSM);
}
TEST_F(DrmMemoryManagerTest, given48bAddressSpaceCpuAnd57bGpuWhenAllocatingHostUSMThenAddressFromExtendedHeapIsPassedAsHintAndThenIgnored) {
if (defaultHwInfo->capabilityTable.gpuAddressSpace < maxNBitValue(57)) {
GTEST_SKIP();
}
VariableBackup<bool> backupCaptureExtendedPointers(&SysCalls::mmapCaptureExtendedPointers, true);
VariableBackup<bool> backupAllowExtendedPointers(&SysCalls::mmapAllowExtendedPointers, false);
SysCalls::mmapCapturedExtendedPointers.clear();
std::vector<MemoryRegion> regionInfo(1);
regionInfo[0].region = {drm_i915_gem_memory_class::I915_MEMORY_CLASS_SYSTEM, 0};
auto &drm = static_cast<DrmMockCustom &>(memoryManager->getDrm(mockRootDeviceIndex));
drm.memoryInfo.reset(new MemoryInfo(regionInfo, drm));
AllocationProperties allocationProperties(mockRootDeviceIndex, MemoryConstants::cacheLineSize, AllocationType::SVM_CPU, {});
allocationProperties.flags.isUSMHostAllocation = true;
auto hostUSM = memoryManager->allocateGraphicsMemoryInPreferredPool(allocationProperties, nullptr);
EXPECT_NE(nullptr, hostUSM);
EXPECT_EQ(1u, SysCalls::mmapCapturedExtendedPointers.size());
auto gpuAddress = reinterpret_cast<uint64_t>(SysCalls::mmapCapturedExtendedPointers[0]);
SysCalls::mmapCapturedExtendedPointers.clear();
auto gmmHelper = memoryManager->getGmmHelper(mockRootDeviceIndex);
EXPECT_LE(memoryManager->getGfxPartition(mockRootDeviceIndex)->getHeapBase(HeapIndex::HEAP_EXTENDED), gmmHelper->decanonize(gpuAddress));
EXPECT_GT(memoryManager->getGfxPartition(mockRootDeviceIndex)->getHeapLimit(HeapIndex::HEAP_EXTENDED), gmmHelper->decanonize(gpuAddress));
EXPECT_NE(hostUSM->getGpuAddress(), gpuAddress);
memoryManager->freeGraphicsMemory(hostUSM);
}