mirror of
https://github.com/intel/compute-runtime.git
synced 2025-12-19 06:24:51 +08:00
feature usm: allocate host USM in HEAP_EXTENDED
Related-To: NEO-7665 Signed-off-by: Mateusz Jablonski <mateusz.jablonski@intel.com>
This commit is contained in:
committed by
Compute-Runtime-Automation
parent
792d365390
commit
7a03c78b0d
@@ -370,7 +370,7 @@ DrmAllocation *DrmMemoryManager::allocateGraphicsMemoryWithAlignmentImpl(const A
|
||||
}
|
||||
|
||||
auto drmAllocation = createAllocWithAlignment(allocationData, cSize, mmapAlignment, alignedStorageSize, alignedGpuAddress);
|
||||
if (drmAllocation != nullptr) {
|
||||
if (drmAllocation != nullptr && gpuReservationAddress) {
|
||||
drmAllocation->setReservedAddressRange(reinterpret_cast<void *>(gpuReservationAddress), alignedVirtualAddressRangeSize);
|
||||
}
|
||||
|
||||
@@ -1977,7 +1977,18 @@ DrmAllocation *DrmMemoryManager::createAllocWithAlignment(const AllocationData &
|
||||
|
||||
if (useBooMmap) {
|
||||
auto totalSizeToAlloc = alignedSize + alignment;
|
||||
auto cpuPointer = this->mmapFunction(0, totalSizeToAlloc, PROT_NONE, MAP_SHARED | MAP_ANONYMOUS, -1, 0);
|
||||
uint64_t preferredAddress = 0;
|
||||
auto gfxPartition = getGfxPartition(allocationData.rootDeviceIndex);
|
||||
if (allocationData.flags.isUSMHostAllocation && gfxPartition->getHeapLimit(HeapIndex::HEAP_EXTENDED) > 0u) {
|
||||
|
||||
preferredAddress = acquireGpuRange(totalSizeToAlloc, allocationData.rootDeviceIndex, HeapIndex::HEAP_EXTENDED);
|
||||
}
|
||||
|
||||
auto cpuPointer = this->mmapFunction(reinterpret_cast<void *>(preferredAddress), totalSizeToAlloc, PROT_NONE, MAP_SHARED | MAP_ANONYMOUS, -1, 0);
|
||||
if (castToUint64(cpuPointer) != preferredAddress) {
|
||||
releaseGpuRange(reinterpret_cast<void *>(preferredAddress), totalSizeToAlloc, allocationData.rootDeviceIndex);
|
||||
preferredAddress = 0;
|
||||
}
|
||||
|
||||
auto cpuBasePointer = cpuPointer;
|
||||
cpuPointer = alignUp(cpuPointer, alignment);
|
||||
@@ -1987,6 +1998,7 @@ DrmAllocation *DrmMemoryManager::createAllocWithAlignment(const AllocationData &
|
||||
reinterpret_cast<uintptr_t>(cpuPointer), alignedSize, 0u, maxOsContextCount, -1));
|
||||
|
||||
if (!bo) {
|
||||
releaseGpuRange(reinterpret_cast<void *>(preferredAddress), totalSizeToAlloc, allocationData.rootDeviceIndex);
|
||||
this->munmapFunction(cpuBasePointer, totalSizeToAlloc);
|
||||
return nullptr;
|
||||
}
|
||||
@@ -1995,6 +2007,7 @@ DrmAllocation *DrmMemoryManager::createAllocWithAlignment(const AllocationData &
|
||||
auto ioctlHelper = drm.getIoctlHelper();
|
||||
uint64_t mmapOffsetWb = ioctlHelper->getDrmParamValue(DrmParam::MmapOffsetWb);
|
||||
if (!retrieveMmapOffsetForBufferObject(allocationData.rootDeviceIndex, *bo, mmapOffsetWb, offset)) {
|
||||
releaseGpuRange(reinterpret_cast<void *>(preferredAddress), totalSizeToAlloc, allocationData.rootDeviceIndex);
|
||||
this->munmapFunction(cpuPointer, size);
|
||||
return nullptr;
|
||||
}
|
||||
@@ -2015,7 +2028,11 @@ DrmAllocation *DrmMemoryManager::createAllocWithAlignment(const AllocationData &
|
||||
}
|
||||
[[maybe_unused]] int retCode = this->munmapFunction(ptrOffset(cpuPointer, alignedSize), alignment - pointerDiff);
|
||||
DEBUG_BREAK_IF(retCode != 0);
|
||||
allocation->setReservedAddressRange(reinterpret_cast<void *>(gpuAddress), alignedSize);
|
||||
if (preferredAddress) {
|
||||
allocation->setReservedAddressRange(reinterpret_cast<void *>(preferredAddress), totalSizeToAlloc);
|
||||
} else {
|
||||
allocation->setReservedAddressRange(reinterpret_cast<void *>(gpuAddress), alignedSize);
|
||||
}
|
||||
if (!allocation->setCacheRegion(&drm, static_cast<CacheRegion>(allocationData.cacheRegion))) {
|
||||
if (pointerDiff == 0) {
|
||||
allocation->registerMemoryToUnmap(cpuBasePointer, totalSizeToAlloc, this->munmapFunction);
|
||||
|
||||
@@ -13,13 +13,12 @@
|
||||
#include "shared/test/common/mocks/linux/mock_drm_memory_manager.h"
|
||||
#include "shared/test/common/mocks/mock_builtins.h"
|
||||
#include "shared/test/common/mocks/mock_device.h"
|
||||
#include "shared/test/common/os_interface/linux/sys_calls_linux_ult.h"
|
||||
|
||||
#include "hw_cmds_default.h"
|
||||
|
||||
namespace NEO {
|
||||
|
||||
extern std::vector<void *> mmapVector;
|
||||
|
||||
void DrmMemoryManagerBasic::SetUp() {
|
||||
for (auto i = 0u; i < numRootDevices; i++) {
|
||||
executionEnvironment.rootDeviceEnvironments[i]->setHwInfoAndInitHelpers(defaultHwInfo.get());
|
||||
@@ -103,7 +102,7 @@ void DrmMemoryManagerFixture::tearDown() {
|
||||
mock->testIoctls();
|
||||
executionEnvironment->decRefInternal();
|
||||
MemoryManagementFixture::tearDown();
|
||||
mmapVector.clear();
|
||||
SysCalls::mmapVector.clear();
|
||||
}
|
||||
|
||||
void DrmMemoryManagerWithLocalMemoryFixture::setUp() {
|
||||
|
||||
@@ -22,8 +22,6 @@ class MockDevice;
|
||||
class TestedDrmMemoryManager;
|
||||
struct UltHwConfig;
|
||||
|
||||
extern std::vector<void *> mmapVector;
|
||||
|
||||
class DrmMemoryManagerBasic : public ::testing::Test {
|
||||
public:
|
||||
DrmMemoryManagerBasic() : executionEnvironment(defaultHwInfo.get(), false, numRootDevices){};
|
||||
|
||||
@@ -26,7 +26,6 @@
|
||||
#include <system_error>
|
||||
|
||||
namespace NEO {
|
||||
std::vector<void *> mmapVector(64);
|
||||
namespace SysCalls {
|
||||
uint32_t closeFuncCalled = 0u;
|
||||
int closeFuncArgPassed = 0;
|
||||
@@ -41,14 +40,19 @@ int setErrno = 0;
|
||||
int fstatFuncRetVal = 0;
|
||||
uint32_t preadFuncCalled = 0u;
|
||||
uint32_t pwriteFuncCalled = 0u;
|
||||
uint32_t mmapFuncCalled = 0u;
|
||||
uint32_t munmapFuncCalled = 0u;
|
||||
bool isInvalidAILTest = false;
|
||||
const char *drmVersion = "i915";
|
||||
int passedFileDescriptorFlagsToSet = 0;
|
||||
int getFileDescriptorFlagsCalled = 0;
|
||||
int setFileDescriptorFlagsCalled = 0;
|
||||
|
||||
std::vector<void *> mmapVector(64);
|
||||
std::vector<void *> mmapCapturedExtendedPointers(64);
|
||||
bool mmapCaptureExtendedPointers = false;
|
||||
bool mmapAllowExtendedPointers = false;
|
||||
uint32_t mmapFuncCalled = 0u;
|
||||
uint32_t munmapFuncCalled = 0u;
|
||||
|
||||
int (*sysCallsOpen)(const char *pathname, int flags) = nullptr;
|
||||
ssize_t (*sysCallsPread)(int fd, void *buf, size_t count, off_t offset) = nullptr;
|
||||
int (*sysCallsReadlink)(const char *path, char *buf, size_t bufsize) = nullptr;
|
||||
@@ -191,6 +195,14 @@ ssize_t pwrite(int fd, const void *buf, size_t count, off_t offset) {
|
||||
|
||||
void *mmap(void *addr, size_t size, int prot, int flags, int fd, off_t off) noexcept {
|
||||
mmapFuncCalled++;
|
||||
if (reinterpret_cast<uint64_t>(addr) > maxNBitValue(48)) {
|
||||
if (mmapCaptureExtendedPointers) {
|
||||
mmapCapturedExtendedPointers.push_back(addr);
|
||||
}
|
||||
if (!mmapAllowExtendedPointers) {
|
||||
addr = nullptr;
|
||||
}
|
||||
}
|
||||
if (addr) {
|
||||
return addr;
|
||||
}
|
||||
|
||||
@@ -10,6 +10,7 @@
|
||||
#include <iostream>
|
||||
#include <poll.h>
|
||||
#include <sys/stat.h>
|
||||
#include <vector>
|
||||
|
||||
namespace NEO {
|
||||
namespace SysCalls {
|
||||
@@ -29,5 +30,12 @@ extern int passedFileDescriptorFlagsToSet;
|
||||
extern int getFileDescriptorFlagsCalled;
|
||||
extern int setFileDescriptorFlagsCalled;
|
||||
extern uint32_t closeFuncCalled;
|
||||
|
||||
extern std::vector<void *> mmapVector;
|
||||
extern std::vector<void *> mmapCapturedExtendedPointers;
|
||||
extern bool mmapCaptureExtendedPointers;
|
||||
extern bool mmapAllowExtendedPointers;
|
||||
extern uint32_t mmapFuncCalled;
|
||||
extern uint32_t munmapFuncCalled;
|
||||
} // namespace SysCalls
|
||||
} // namespace NEO
|
||||
|
||||
@@ -11,12 +11,16 @@
|
||||
|
||||
namespace NEO {
|
||||
|
||||
namespace SysCalls {
|
||||
extern bool mmapAllowExtendedPointers;
|
||||
}
|
||||
|
||||
BOOL WINAPI ULTVirtualFree(LPVOID ptr, SIZE_T size, DWORD flags) {
|
||||
return 1;
|
||||
}
|
||||
|
||||
LPVOID WINAPI ULTVirtualAlloc(LPVOID inPtr, SIZE_T size, DWORD flags, DWORD type) {
|
||||
if (castToUint64(inPtr) > maxNBitValue(48)) {
|
||||
if (castToUint64(inPtr) > maxNBitValue(48) && SysCalls::mmapAllowExtendedPointers) {
|
||||
return inPtr;
|
||||
}
|
||||
return reinterpret_cast<LPVOID>(virtualAllocAddress);
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) 2018-2022 Intel Corporation
|
||||
* Copyright (C) 2018-2023 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
@@ -31,6 +31,7 @@ uint32_t regQueryValueSuccessCount = 0u;
|
||||
uint64_t regQueryValueExpectedData = 0ull;
|
||||
const HKEY validHkey = reinterpret_cast<HKEY>(0);
|
||||
bool getNumThreadsCalled = false;
|
||||
bool mmapAllowExtendedPointers = false;
|
||||
|
||||
HANDLE createEvent(LPSECURITY_ATTRIBUTES lpEventAttributes, BOOL bManualReset, BOOL bInitialState, LPCSTR lpName) {
|
||||
if (mockCreateEventClb) {
|
||||
|
||||
@@ -10,12 +10,19 @@
|
||||
#include "shared/source/helpers/ptr_math.h"
|
||||
#include "shared/source/os_interface/os_memory.h"
|
||||
#include "shared/source/utilities/cpu_info.h"
|
||||
#include "shared/test/common/helpers/variable_backup.h"
|
||||
#include "shared/test/common/mocks/mock_gfx_partition.h"
|
||||
|
||||
#include "gtest/gtest.h"
|
||||
|
||||
#include <mutex>
|
||||
|
||||
namespace NEO {
|
||||
namespace SysCalls {
|
||||
extern bool mmapAllowExtendedPointers;
|
||||
}
|
||||
} // namespace NEO
|
||||
|
||||
static std::string mockCpuFlags;
|
||||
static void mockGetCpuFlagsFunc(std::string &cpuFlags) { cpuFlags = mockCpuFlags; }
|
||||
static void (*getCpuFlagsFuncSave)(std::string &) = nullptr;
|
||||
@@ -855,6 +862,7 @@ TEST(GfxPartitionTest, givenGpuAddressSpaceIs57BitAndSeveralRootDevicesThenHeapE
|
||||
{
|
||||
// 57 bit CPU VA, la57 flag is present
|
||||
CpuInfoOverrideVirtualAddressSizeAndFlags overrideCpuInfo(57, "la57");
|
||||
VariableBackup<bool> backupAllowExtendedPointers(&SysCalls::mmapAllowExtendedPointers, true);
|
||||
|
||||
MockGfxPartition gfxPartition;
|
||||
EXPECT_TRUE(gfxPartition.init(maxNBitValue(57), reservedCpuAddressRangeSize, rootDeviceIndex, numRootDevices));
|
||||
|
||||
@@ -28,6 +28,7 @@
|
||||
#include "shared/test/common/mocks/mock_host_ptr_manager.h"
|
||||
#include "shared/test/common/os_interface/linux/drm_memory_manager_fixture.h"
|
||||
#include "shared/test/common/os_interface/linux/drm_mock_cache_info.h"
|
||||
#include "shared/test/common/os_interface/linux/sys_calls_linux_ult.h"
|
||||
#include "shared/test/common/test_macros/hw_test.h"
|
||||
|
||||
#include "drm/i915_drm.h"
|
||||
@@ -6257,3 +6258,60 @@ TEST_F(DrmMemoryManagerWithExplicitExpectationsTest, givenDrmMemoryManagerWhenGp
|
||||
EXPECT_GT(memoryManager->getGfxPartition(1)->getHeapLimit(HeapIndex::HEAP_STANDARD), gmmHelper->decanonize(addressRange.address));
|
||||
memoryManager->freeGpuAddress(addressRange, 1);
|
||||
}
|
||||
|
||||
TEST_F(DrmMemoryManagerTest, given57bAddressSpaceCpuAndGpuWhenAllocatingHostUSMThenAddressFromExtendedHeapIsPassedAsHintAndSetAsGpuAddressAndReservedAddress) {
|
||||
if (defaultHwInfo->capabilityTable.gpuAddressSpace < maxNBitValue(57)) {
|
||||
GTEST_SKIP();
|
||||
}
|
||||
VariableBackup<bool> backupCaptureExtendedPointers(&SysCalls::mmapCaptureExtendedPointers, true);
|
||||
VariableBackup<bool> backupAllowExtendedPointers(&SysCalls::mmapAllowExtendedPointers, true);
|
||||
SysCalls::mmapCapturedExtendedPointers.clear();
|
||||
std::vector<MemoryRegion> regionInfo(1);
|
||||
regionInfo[0].region = {drm_i915_gem_memory_class::I915_MEMORY_CLASS_SYSTEM, 0};
|
||||
|
||||
auto &drm = static_cast<DrmMockCustom &>(memoryManager->getDrm(mockRootDeviceIndex));
|
||||
drm.memoryInfo.reset(new MemoryInfo(regionInfo, drm));
|
||||
AllocationProperties allocationProperties(mockRootDeviceIndex, MemoryConstants::cacheLineSize, AllocationType::SVM_CPU, {});
|
||||
allocationProperties.flags.isUSMHostAllocation = true;
|
||||
auto hostUSM = memoryManager->allocateGraphicsMemoryInPreferredPool(allocationProperties, nullptr);
|
||||
EXPECT_NE(nullptr, hostUSM);
|
||||
|
||||
EXPECT_EQ(2u, SysCalls::mmapCapturedExtendedPointers.size());
|
||||
auto gpuAddress = reinterpret_cast<uint64_t>(SysCalls::mmapCapturedExtendedPointers[0]);
|
||||
SysCalls::mmapCapturedExtendedPointers.clear();
|
||||
auto gmmHelper = memoryManager->getGmmHelper(mockRootDeviceIndex);
|
||||
EXPECT_LE(memoryManager->getGfxPartition(mockRootDeviceIndex)->getHeapBase(HeapIndex::HEAP_EXTENDED), gmmHelper->decanonize(gpuAddress));
|
||||
EXPECT_GT(memoryManager->getGfxPartition(mockRootDeviceIndex)->getHeapLimit(HeapIndex::HEAP_EXTENDED), gmmHelper->decanonize(gpuAddress));
|
||||
|
||||
EXPECT_EQ(hostUSM->getGpuAddress(), gpuAddress);
|
||||
EXPECT_EQ(hostUSM->getReservedAddressPtr(), reinterpret_cast<void *>(gpuAddress));
|
||||
memoryManager->freeGraphicsMemory(hostUSM);
|
||||
}
|
||||
|
||||
TEST_F(DrmMemoryManagerTest, given48bAddressSpaceCpuAnd57bGpuWhenAllocatingHostUSMThenAddressFromExtendedHeapIsPassedAsHintAndThenIgnored) {
|
||||
if (defaultHwInfo->capabilityTable.gpuAddressSpace < maxNBitValue(57)) {
|
||||
GTEST_SKIP();
|
||||
}
|
||||
VariableBackup<bool> backupCaptureExtendedPointers(&SysCalls::mmapCaptureExtendedPointers, true);
|
||||
VariableBackup<bool> backupAllowExtendedPointers(&SysCalls::mmapAllowExtendedPointers, false);
|
||||
SysCalls::mmapCapturedExtendedPointers.clear();
|
||||
std::vector<MemoryRegion> regionInfo(1);
|
||||
regionInfo[0].region = {drm_i915_gem_memory_class::I915_MEMORY_CLASS_SYSTEM, 0};
|
||||
|
||||
auto &drm = static_cast<DrmMockCustom &>(memoryManager->getDrm(mockRootDeviceIndex));
|
||||
drm.memoryInfo.reset(new MemoryInfo(regionInfo, drm));
|
||||
AllocationProperties allocationProperties(mockRootDeviceIndex, MemoryConstants::cacheLineSize, AllocationType::SVM_CPU, {});
|
||||
allocationProperties.flags.isUSMHostAllocation = true;
|
||||
auto hostUSM = memoryManager->allocateGraphicsMemoryInPreferredPool(allocationProperties, nullptr);
|
||||
EXPECT_NE(nullptr, hostUSM);
|
||||
|
||||
EXPECT_EQ(1u, SysCalls::mmapCapturedExtendedPointers.size());
|
||||
auto gpuAddress = reinterpret_cast<uint64_t>(SysCalls::mmapCapturedExtendedPointers[0]);
|
||||
SysCalls::mmapCapturedExtendedPointers.clear();
|
||||
auto gmmHelper = memoryManager->getGmmHelper(mockRootDeviceIndex);
|
||||
EXPECT_LE(memoryManager->getGfxPartition(mockRootDeviceIndex)->getHeapBase(HeapIndex::HEAP_EXTENDED), gmmHelper->decanonize(gpuAddress));
|
||||
EXPECT_GT(memoryManager->getGfxPartition(mockRootDeviceIndex)->getHeapLimit(HeapIndex::HEAP_EXTENDED), gmmHelper->decanonize(gpuAddress));
|
||||
|
||||
EXPECT_NE(hostUSM->getGpuAddress(), gpuAddress);
|
||||
memoryManager->freeGraphicsMemory(hostUSM);
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user