refactor: remove not needed arguments in adjustGpuPtrToHostAddressSpace

- add also tests to confirm that proper alignment is applied

Signed-off-by: Katarzyna Cencelewska <katarzyna.cencelewska@intel.com>
This commit is contained in:
Katarzyna Cencelewska
2023-07-17 12:57:15 +00:00
committed by Compute-Runtime-Automation
parent 8e07dd30cb
commit 33a5dd486b
5 changed files with 144 additions and 15 deletions

View File

@@ -104,7 +104,6 @@ class WddmAllocation : public GraphicsAllocation {
std::string getAllocationInfoString() const override; std::string getAllocationInfoString() const override;
uint64_t &getGpuAddressToModify() { return gpuAddress; } uint64_t &getGpuAddressToModify() { return gpuAddress; }
bool isLocalMemoryPool() { return memoryPool == MemoryPool::LocalMemory; }
// OS assigned fields // OS assigned fields
D3DKMT_HANDLE resourceHandle = 0u; // used by shared resources D3DKMT_HANDLE resourceHandle = 0u; // used by shared resources

View File

@@ -210,7 +210,7 @@ GraphicsAllocation *WddmMemoryManager::allocateGraphicsMemoryUsingKmdAndMapItToC
if ((!(alignGpuAddressTo64KB) && executionEnvironment.rootDeviceEnvironments[allocationData.rootDeviceIndex]->getHardwareInfo()->capabilityTable.gpuAddressSpace >= MemoryConstants::max64BitAppAddress) || is32bit) { if ((!(alignGpuAddressTo64KB) && executionEnvironment.rootDeviceEnvironments[allocationData.rootDeviceIndex]->getHardwareInfo()->capabilityTable.gpuAddressSpace >= MemoryConstants::max64BitAppAddress) || is32bit) {
void *requiredGpuVa = cpuPtr; void *requiredGpuVa = cpuPtr;
if (!cpuPtr) { if (!cpuPtr) {
adjustGpuPtrToHostAddressSpace(allocationData, *wddmAllocation.get(), sizeAligned, requiredGpuVa); adjustGpuPtrToHostAddressSpace(*wddmAllocation.get(), requiredGpuVa);
} }
status = mapGpuVirtualAddress(wddmAllocation.get(), requiredGpuVa); status = mapGpuVirtualAddress(wddmAllocation.get(), requiredGpuVa);
} else { } else {
@@ -1330,7 +1330,7 @@ GraphicsAllocation *WddmMemoryManager::allocateGraphicsMemoryInDevicePool(const
auto &wddm = getWddm(allocationData.rootDeviceIndex); auto &wddm = getWddm(allocationData.rootDeviceIndex);
adjustGpuPtrToHostAddressSpace(allocationData, *wddmAllocation.get(), sizeAligned, requiredGpuVa); adjustGpuPtrToHostAddressSpace(*wddmAllocation.get(), requiredGpuVa);
if (!createWddmAllocation(wddmAllocation.get(), requiredGpuVa)) { if (!createWddmAllocation(wddmAllocation.get(), requiredGpuVa)) {
for (auto handleId = 0u; handleId < allocationData.storageInfo.getNumBanks(); handleId++) { for (auto handleId = 0u; handleId < allocationData.storageInfo.getNumBanks(); handleId++) {
@@ -1399,22 +1399,30 @@ void WddmMemoryManager::registerAllocationInOs(GraphicsAllocation *allocation) {
} }
} }
bool WddmMemoryManager::isStatelessAccessRequired(AllocationType type) {
if (type == AllocationType::BUFFER ||
type == AllocationType::SHARED_BUFFER ||
type == AllocationType::SCRATCH_SURFACE ||
type == AllocationType::LINEAR_STREAM ||
type == AllocationType::PRIVATE_SURFACE) {
return true;
}
return false;
}
template <bool Is32Bit> template <bool Is32Bit>
void WddmMemoryManager::adjustGpuPtrToHostAddressSpace(const AllocationData &allocationData, WddmAllocation &wddmAllocation, size_t sizeAligned, void *&requiredGpuVa) { void WddmMemoryManager::adjustGpuPtrToHostAddressSpace(WddmAllocation &wddmAllocation, void *&requiredGpuVa) {
if constexpr (Is32Bit) { if constexpr (Is32Bit) {
if (executionEnvironment.rootDeviceEnvironments[allocationData.rootDeviceIndex]->isFullRangeSvm()) { auto rootDeviceIndex = wddmAllocation.getRootDeviceIndex();
if (allocationData.type == AllocationType::BUFFER || if (executionEnvironment.rootDeviceEnvironments[rootDeviceIndex]->isFullRangeSvm()) {
allocationData.type == AllocationType::SHARED_BUFFER || if (isStatelessAccessRequired(wddmAllocation.getAllocationType())) {
allocationData.type == AllocationType::SCRATCH_SURFACE || size_t reserveSizeAligned = wddmAllocation.getUnderlyingBufferSize();
allocationData.type == AllocationType::LINEAR_STREAM || auto isLocalMemory = wddmAllocation.getMemoryPool() == MemoryPool::LocalMemory;
allocationData.type == AllocationType::PRIVATE_SURFACE) {
size_t reserveSizeAligned = sizeAligned;
auto isLocalMemory = wddmAllocation.isLocalMemoryPool();
if (isLocalMemory) { if (isLocalMemory) {
// add 2MB padding to make sure there are no overlaps between system and local memory // add 2MB padding to make sure there are no overlaps between system and local memory
reserveSizeAligned += 2 * MemoryConstants::megaByte; reserveSizeAligned += 2 * MemoryConstants::megaByte;
} }
auto &wddm = getWddm(allocationData.rootDeviceIndex); auto &wddm = getWddm(rootDeviceIndex);
wddm.reserveValidAddressRange(reserveSizeAligned, requiredGpuVa); wddm.reserveValidAddressRange(reserveSizeAligned, requiredGpuVa);
wddmAllocation.setReservedAddressRange(requiredGpuVa, reserveSizeAligned); wddmAllocation.setReservedAddressRange(requiredGpuVa, reserveSizeAligned);
requiredGpuVa = isLocalMemory ? alignUp(requiredGpuVa, 2 * MemoryConstants::megaByte) : requiredGpuVa; requiredGpuVa = isLocalMemory ? alignUp(requiredGpuVa, 2 * MemoryConstants::megaByte) : requiredGpuVa;

View File

@@ -115,7 +115,8 @@ class WddmMemoryManager : public MemoryManager {
bool mapMultiHandleAllocationWithRetry(WddmAllocation *allocation, const void *requiredGpuPtr); bool mapMultiHandleAllocationWithRetry(WddmAllocation *allocation, const void *requiredGpuPtr);
bool createGpuAllocationsWithRetry(WddmAllocation *graphicsAllocation); bool createGpuAllocationsWithRetry(WddmAllocation *graphicsAllocation);
template <bool Is32Bit = is32bit> template <bool Is32Bit = is32bit>
void adjustGpuPtrToHostAddressSpace(const AllocationData &allocationData, WddmAllocation &wddmAllocation, size_t sizeAligned, void *&requiredGpuVa); void adjustGpuPtrToHostAddressSpace(WddmAllocation &wddmAllocation, void *&requiredGpuVa);
bool isStatelessAccessRequired(AllocationType type);
AlignedMallocRestrictions mallocRestrictions; AlignedMallocRestrictions mallocRestrictions;
Wddm &getWddm(uint32_t rootDeviceIndex) const; Wddm &getWddm(uint32_t rootDeviceIndex) const;

View File

@@ -37,6 +37,7 @@ class MockWddmMemoryManager : public MemoryManagerCreate<WddmMemoryManager> {
using BaseClass::unMapPhysicalToVirtualMemory; using BaseClass::unMapPhysicalToVirtualMemory;
using MemoryManagerCreate<WddmMemoryManager>::MemoryManagerCreate; using MemoryManagerCreate<WddmMemoryManager>::MemoryManagerCreate;
using BaseClass::getHugeGfxMemoryChunkSize; using BaseClass::getHugeGfxMemoryChunkSize;
using BaseClass::isStatelessAccessRequired;
GraphicsAllocation *allocateGraphicsMemory64kb(const AllocationData &allocationData) override { GraphicsAllocation *allocateGraphicsMemory64kb(const AllocationData &allocationData) override {
allocationGraphicsMemory64kbCreated = true; allocationGraphicsMemory64kbCreated = true;

View File

@@ -165,6 +165,7 @@ TEST_F(WddmMemoryManagerTests, givenAllocateGraphicsMemory64kbWhen32bitThenAddre
class MockAllocateGraphicsMemoryUsingKmdAndMapItToCpuVAWddm : public MemoryManagerCreate<WddmMemoryManager> { class MockAllocateGraphicsMemoryUsingKmdAndMapItToCpuVAWddm : public MemoryManagerCreate<WddmMemoryManager> {
public: public:
using WddmMemoryManager::adjustGpuPtrToHostAddressSpace;
using WddmMemoryManager::allocateGraphicsMemoryUsingKmdAndMapItToCpuVA; using WddmMemoryManager::allocateGraphicsMemoryUsingKmdAndMapItToCpuVA;
using WddmMemoryManager::mapGpuVirtualAddress; using WddmMemoryManager::mapGpuVirtualAddress;
MockAllocateGraphicsMemoryUsingKmdAndMapItToCpuVAWddm(ExecutionEnvironment &executionEnvironment) : MemoryManagerCreate(false, false, executionEnvironment) {} MockAllocateGraphicsMemoryUsingKmdAndMapItToCpuVAWddm(ExecutionEnvironment &executionEnvironment) : MemoryManagerCreate(false, false, executionEnvironment) {}
@@ -354,4 +355,123 @@ TEST_F(WddmMemoryManagerAllocPathTests, givenAllocateGraphicsMemoryUsingKmdAndMa
EXPECT_LT(graphicsAllocation->getGpuAddress(), MemoryConstants::max32BitAddress); EXPECT_LT(graphicsAllocation->getGpuAddress(), MemoryConstants::max32BitAddress);
memoryManager->freeGraphicsMemory(graphicsAllocation); memoryManager->freeGraphicsMemory(graphicsAllocation);
} }
class MockWddmReserveValidAddressRange : public WddmMock {
public:
MockWddmReserveValidAddressRange(RootDeviceEnvironment &rootDeviceEnvironment) : WddmMock(rootDeviceEnvironment){};
bool reserveValidAddressRange(size_t size, void *&reservedMem) override {
reserveValidAddressRangeResult.called++;
reservedMem = dummyAddress;
return true;
}
void *dummyAddress = reinterpret_cast<void *>(0x43211111);
};
TEST_F(WddmMemoryManagerAllocPathTests, givenLocalMemoryWhen32bitAndCallAdjustGpuPtrToHostAddressSpaceThenProperAlignmentIsApplied) {
if constexpr (is64bit) {
GTEST_SKIP();
}
auto mockWddm = std::make_unique<MockWddmReserveValidAddressRange>(*executionEnvironment->rootDeviceEnvironments[0].get());
auto addressWithoutAligment = reinterpret_cast<uint64_t>(mockWddm->dummyAddress);
uint32_t rootDeviceIndex = 0u;
executionEnvironment->rootDeviceEnvironments[rootDeviceIndex]->osInterface->setDriverModel(std::move(mockWddm));
size_t size = 10;
auto wddmAllocation = std::make_unique<WddmAllocation>(rootDeviceIndex,
1u, // numGmms
NEO::AllocationType::BUFFER, nullptr, 0,
size, nullptr, MemoryPool::LocalMemory,
0u, // shareable
0u);
void *addressPtr;
memoryManager->adjustGpuPtrToHostAddressSpace(*wddmAllocation.get(), addressPtr);
EXPECT_NE(nullptr, addressPtr);
auto address = reinterpret_cast<uint64_t>(addressPtr);
uint64_t alignmentMask = 2 * MemoryConstants::megaByte - 1;
EXPECT_FALSE(address & alignmentMask);
EXPECT_NE(addressWithoutAligment, address);
}
TEST_F(WddmMemoryManagerAllocPathTests, givenSystemMemoryWhen32bitAndCallAdjustGpuPtrToHostAddressSpaceThenThereIsNoExtraAlignment) {
if constexpr (is64bit) {
GTEST_SKIP();
}
auto mockWddm = std::make_unique<MockWddmReserveValidAddressRange>(*executionEnvironment->rootDeviceEnvironments[0].get());
auto expectedAddress = reinterpret_cast<uint64_t>(mockWddm->dummyAddress);
uint32_t rootDeviceIndex = 0u;
executionEnvironment->rootDeviceEnvironments[rootDeviceIndex]->osInterface->setDriverModel(std::move(mockWddm));
size_t size = 10;
auto wddmAllocation = std::make_unique<WddmAllocation>(rootDeviceIndex,
1u, // numGmms
NEO::AllocationType::BUFFER, nullptr, 0,
size, nullptr, MemoryPool::System64KBPages,
0u, // shareable
0u);
void *addressPtr;
memoryManager->adjustGpuPtrToHostAddressSpace(*wddmAllocation.get(), addressPtr);
EXPECT_NE(nullptr, addressPtr);
auto address = reinterpret_cast<uint64_t>(addressPtr);
EXPECT_EQ(expectedAddress, address);
}
TEST_F(WddmMemoryManagerTests, givenTypeWhenCallIsStatelessAccessRequiredThenProperValueIsReturned) {
auto wddmMemoryManager = std::make_unique<MockWddmMemoryManager>(*executionEnvironment);
for (auto type : {AllocationType::BUFFER,
AllocationType::SHARED_BUFFER,
AllocationType::SCRATCH_SURFACE,
AllocationType::LINEAR_STREAM,
AllocationType::PRIVATE_SURFACE}) {
EXPECT_TRUE(wddmMemoryManager->isStatelessAccessRequired(type));
}
for (auto type : {AllocationType::BUFFER_HOST_MEMORY,
AllocationType::COMMAND_BUFFER,
AllocationType::CONSTANT_SURFACE,
AllocationType::EXTERNAL_HOST_PTR,
AllocationType::FILL_PATTERN,
AllocationType::GLOBAL_SURFACE,
AllocationType::IMAGE,
AllocationType::INDIRECT_OBJECT_HEAP,
AllocationType::INSTRUCTION_HEAP,
AllocationType::INTERNAL_HEAP,
AllocationType::INTERNAL_HOST_MEMORY,
AllocationType::KERNEL_ARGS_BUFFER,
AllocationType::KERNEL_ISA,
AllocationType::KERNEL_ISA_INTERNAL,
AllocationType::MAP_ALLOCATION,
AllocationType::MCS,
AllocationType::PIPE,
AllocationType::PREEMPTION,
AllocationType::PRINTF_SURFACE,
AllocationType::PROFILING_TAG_BUFFER,
AllocationType::SHARED_CONTEXT_IMAGE,
AllocationType::SHARED_IMAGE,
AllocationType::SHARED_RESOURCE_COPY,
AllocationType::SURFACE_STATE_HEAP,
AllocationType::SVM_CPU,
AllocationType::SVM_GPU,
AllocationType::SVM_ZERO_COPY,
AllocationType::TAG_BUFFER,
AllocationType::GLOBAL_FENCE,
AllocationType::TIMESTAMP_PACKET_TAG_BUFFER,
AllocationType::WRITE_COMBINED,
AllocationType::RING_BUFFER,
AllocationType::SEMAPHORE_BUFFER,
AllocationType::DEBUG_CONTEXT_SAVE_AREA,
AllocationType::DEBUG_SBA_TRACKING_BUFFER,
AllocationType::DEBUG_MODULE_AREA,
AllocationType::UNIFIED_SHARED_MEMORY,
AllocationType::WORK_PARTITION_SURFACE,
AllocationType::GPU_TIMESTAMP_DEVICE_BUFFER,
AllocationType::SW_TAG_BUFFER,
AllocationType::DEFERRED_TASKS_LIST,
AllocationType::ASSERT_BUFFER}) {
EXPECT_FALSE(wddmMemoryManager->isStatelessAccessRequired(type));
}
}