fix: bindlessHeapsHelper handle unavailable external heap

This PR handles the situation in which a component
has reserved a front window space for itself in the external heap,
so that the Compute Runtime cannot access this area.

In such a situation, we perform the following steps:
1. reserve 4GB chunk in heapStandard
2. split our chunk into 2 parts: heapFrontWindow, heapRegular
3. from this point on, map all linearStream allocations in reserved 4GB
chunk

Patch applies to Windows and WSL.
Patch only applies when the bindless global allocator is enabled.

Related-To: HSD-16025889919
Signed-off-by: Fabian Zwoliński <fabian.zwolinski@intel.com>
This commit is contained in:
Fabian Zwoliński
2024-12-04 16:21:36 +00:00
committed by Compute-Runtime-Automation
parent e575bc52c2
commit d2ce3badfc
12 changed files with 589 additions and 7 deletions

View File

@@ -10,6 +10,8 @@
#include "shared/source/device/device.h"
#include "shared/source/execution_environment/execution_environment.h"
#include "shared/source/execution_environment/root_device_environment.h"
#include "shared/source/gmm_helper/gmm_helper.h"
#include "shared/source/helpers/driver_model_type.h"
#include "shared/source/helpers/gfx_core_helper.h"
#include "shared/source/helpers/string.h"
#include "shared/source/indirect_heap/indirect_heap.h"
@@ -18,11 +20,32 @@
#include "shared/source/memory_manager/memory_manager.h"
#include "shared/source/memory_manager/memory_operations_handler.h"
#include "shared/source/os_interface/os_context.h"
#include "shared/source/os_interface/os_interface.h"
#include "shared/source/utilities/heap_allocator.h"
namespace NEO {
constexpr size_t globalSshAllocationSize = 4 * MemoryConstants::pageSize64k;
constexpr size_t borderColorAlphaOffset = alignUp(4 * sizeof(float), MemoryConstants::cacheLineSize);
constexpr HeapIndex heapIndexForPoolReservedRange = HeapIndex::heapStandard;
constexpr size_t reservedRangeSize = static_cast<size_t>(4 * MemoryConstants::gigaByte);
constexpr size_t heapFrontWindowSize = GfxPartition::externalFrontWindowPoolSize;
constexpr size_t heapRegularSize = reservedRangeSize - heapFrontWindowSize;
/*
* __________________________________ STANDARD __________________________________
* / \
* / ____________________ Reserved 4GB _____________________ \
* / / \ \
* / / \ \
* |_____________|_________________|_______________________________________|____________|
* | | | | |
* heapFrontWindow heapRegular
* ^
* reservedRangeBase
*/
using BindlesHeapType = BindlessHeapsHelper::BindlesHeapType;
BindlessHeapsHelper::BindlessHeapsHelper(Device *rootDevice, bool isMultiOsContextCapable) : rootDevice(rootDevice),
@@ -34,6 +57,14 @@ BindlessHeapsHelper::BindlessHeapsHelper(Device *rootDevice, bool isMultiOsConte
for (auto heapType = 0; heapType < BindlesHeapType::numHeapTypes; heapType++) {
auto size = MemoryConstants::pageSize64k;
if (heapType == BindlesHeapType::specialSsh) {
if (isReservedMemoryModeAvailable() &&
!tryReservingMemoryForSpecialSsh(size, MemoryConstants::pageSize64k)) {
useReservedMemory = initializeReservedMemory();
}
}
auto heapAllocation = getHeapAllocation(size, MemoryConstants::pageSize64k, heapType == BindlesHeapType::specialSsh);
UNRECOVERABLE_IF(heapAllocation == nullptr);
ssHeapsAllocations.push_back(heapAllocation);
@@ -48,12 +79,70 @@ BindlessHeapsHelper::BindlessHeapsHelper(Device *rootDevice, bool isMultiOsConte
memcpy_s(ptrOffset(borderColorStates->getUnderlyingBuffer(), borderColorAlphaOffset), sizeof(borderColorAlpha), borderColorAlpha, sizeof(borderColorDefault));
}
std::optional<AddressRange> BindlessHeapsHelper::reserveMemoryRange(size_t size, size_t alignment, HeapIndex heapIndex) {
RootDeviceIndicesContainer rootDeviceIndices;
rootDeviceIndices.pushUnique(rootDeviceIndex);
uint32_t reservedOnRootDevice = 0;
auto reservedRange = memManager->reserveGpuAddressOnHeap(
0ull, size, rootDeviceIndices, &reservedOnRootDevice, heapIndex, alignment);
if (reservedRange.address == 0u) {
return std::nullopt;
}
reservedRanges.push_back({reservedRange.address, reservedRange.size});
return reservedRange;
}
bool BindlessHeapsHelper::tryReservingMemoryForSpecialSsh(const size_t size, size_t alignment) {
auto heapIndex = memManager->isLocalMemorySupported(rootDeviceIndex) ? HeapIndex::heapExternalDeviceFrontWindow : HeapIndex::heapExternalFrontWindow;
auto reservedRange = reserveMemoryRange(size, alignment, heapIndex);
return reservedRange.has_value();
}
bool BindlessHeapsHelper::initializeReservedMemory() {
if (reservedMemoryInitialized) {
return true;
}
auto reservedRangeOpt = reserveMemoryRange(reservedRangeSize, MemoryConstants::pageSize64k, heapIndexForPoolReservedRange);
if (!reservedRangeOpt.has_value()) {
return false;
}
DEBUG_BREAK_IF((reservedRangeOpt.value().address % MemoryConstants::pageSize64k) != 0);
auto reservedRange = reservedRangeOpt.value();
reservedRangeBase = rootDevice->getRootDeviceEnvironmentRef().getGmmHelper()->decanonize(reservedRange.address);
heapFrontWindow = std::make_unique<HeapAllocator>(reservedRangeBase, heapFrontWindowSize, MemoryConstants::pageSize64k, 0);
heapRegular = std::make_unique<HeapAllocator>(reservedRangeBase + heapFrontWindowSize, heapRegularSize, MemoryConstants::pageSize64k, 0);
memManager->addCustomHeapAllocatorConfig(AllocationType::linearStream, true, {heapFrontWindow.get(), reservedRangeBase});
memManager->addCustomHeapAllocatorConfig(AllocationType::linearStream, false, {heapRegular.get(), reservedRangeBase});
reservedMemoryInitialized = true;
return true;
}
BindlessHeapsHelper::~BindlessHeapsHelper() {
for (auto *allocation : ssHeapsAllocations) {
memManager->freeGraphicsMemory(allocation);
}
memManager->freeGraphicsMemory(borderColorStates);
ssHeapsAllocations.clear();
for (const auto &range : reservedRanges) {
memManager->freeGpuAddress(range, rootDeviceIndex);
}
reservedRanges.clear();
if (reservedMemoryInitialized) {
memManager->removeCustomHeapAllocatorConfig(AllocationType::linearStream, true);
memManager->removeCustomHeapAllocatorConfig(AllocationType::linearStream, false);
}
}
GraphicsAllocation *BindlessHeapsHelper::getHeapAllocation(size_t heapSize, size_t alignment, bool allocInFrontWindow) {
@@ -72,6 +161,14 @@ GraphicsAllocation *BindlessHeapsHelper::getHeapAllocation(size_t heapSize, size
return allocation;
}
bool BindlessHeapsHelper::isReservedMemoryModeAvailable() {
auto osInterface = rootDevice->getRootDeviceEnvironment().osInterface.get();
if (!osInterface) {
return false;
}
return osInterface->getDriverModel()->getDriverModelType() == NEO::DriverModelType::wddm;
}
void BindlessHeapsHelper::clearStateDirtyForContext(uint32_t osContextId) {
std::lock_guard<std::mutex> autolock(this->mtx);

View File

@@ -13,12 +13,15 @@
#include <array>
#include <memory>
#include <mutex>
#include <optional>
#include <unordered_map>
#include <vector>
namespace NEO {
class IndirectHeap;
struct AddressRange;
class HeapAllocator;
namespace BindlessImageSlot {
constexpr uint32_t image = 0;
@@ -68,6 +71,12 @@ class BindlessHeapsHelper {
bool getStateDirtyForContext(uint32_t osContextId);
void clearStateDirtyForContext(uint32_t osContextId);
protected:
bool tryReservingMemoryForSpecialSsh(const size_t size, size_t alignment);
std::optional<AddressRange> reserveMemoryRange(size_t size, size_t alignment, HeapIndex heapIndex);
bool initializeReservedMemory();
bool isReservedMemoryModeAvailable();
protected:
Device *rootDevice = nullptr;
const size_t surfaceStateSize;
@@ -89,5 +98,14 @@ class BindlessHeapsHelper {
std::mutex mtx;
DeviceBitfield deviceBitfield;
bool globalBindlessDsh = false;
bool useReservedMemory = false;
bool reservedMemoryInitialized = false;
uint64_t reservedRangeBase = 0;
std::unique_ptr<HeapAllocator> heapFrontWindow;
std::unique_ptr<HeapAllocator> heapRegular;
std::vector<AddressRange> reservedRanges;
};
} // namespace NEO

View File

@@ -1220,4 +1220,20 @@ bool MemoryManager::usmCompressionSupported(Device *device) {
return gfxCoreHelper.usmCompressionSupported(hwInfo);
}
void MemoryManager::addCustomHeapAllocatorConfig(AllocationType allocationType, bool isFrontWindowPool, const CustomHeapAllocatorConfig &config) {
customHeapAllocators[{allocationType, isFrontWindowPool}] = config;
}
std::optional<std::reference_wrapper<CustomHeapAllocatorConfig>> MemoryManager::getCustomHeapAllocatorConfig(AllocationType allocationType, bool isFrontWindowPool) {
auto it = customHeapAllocators.find({allocationType, isFrontWindowPool});
if (it != customHeapAllocators.end()) {
return it->second;
}
return std::nullopt;
}
void MemoryManager::removeCustomHeapAllocatorConfig(AllocationType allocationType, bool isFrontWindowPool) {
customHeapAllocators.erase({allocationType, isFrontWindowPool});
}
} // namespace NEO

View File

@@ -20,6 +20,7 @@
#include <cstdint>
#include <map>
#include <mutex>
#include <optional>
#include <unordered_map>
#include <vector>
@@ -43,6 +44,7 @@ class Gmm;
class HostPtrManager;
class OsContext;
class PrefetchManager;
class HeapAllocator;
enum AllocationUsage {
TEMPORARY_ALLOCATION,
@@ -81,6 +83,11 @@ struct VirtualMemoryReservation {
size_t reservationTotalSize;
};
struct CustomHeapAllocatorConfig {
HeapAllocator *allocator = nullptr;
uint64_t gpuVaBase = std::numeric_limits<uint64_t>::max();
};
constexpr size_t paddingBufferSize = 2 * MemoryConstants::megaByte;
namespace MemoryTransferHelper {
@@ -342,6 +349,10 @@ class MemoryManager {
return hostAllocationsSavedForReuseSize;
}
void addCustomHeapAllocatorConfig(AllocationType allocationType, bool isFrontWindowPool, const CustomHeapAllocatorConfig &config);
std::optional<std::reference_wrapper<CustomHeapAllocatorConfig>> getCustomHeapAllocatorConfig(AllocationType allocationType, bool isFrontWindowPool);
void removeCustomHeapAllocatorConfig(AllocationType allocationType, bool isFrontWindowPool);
protected:
bool getAllocationData(AllocationData &allocationData, const AllocationProperties &properties, const void *hostPtr, const StorageInfo &storageInfo);
static void overrideAllocationData(AllocationData &allocationData, const AllocationProperties &properties);
@@ -416,6 +427,7 @@ class MemoryManager {
std::atomic<size_t> sysMemAllocsSize;
size_t hostAllocationsSavedForReuseSize = 0u;
mutable std::mutex hostAllocationsReuseMtx;
std::map<std::pair<AllocationType, bool>, CustomHeapAllocatorConfig> customHeapAllocators;
};
std::unique_ptr<DeferredDeleter> createDeferredDeleter();

View File

@@ -40,6 +40,7 @@
#include "shared/source/os_interface/windows/wddm_allocation.h"
#include "shared/source/os_interface/windows/wddm_residency_allocations_container.h"
#include "shared/source/os_interface/windows/wddm_residency_controller.h"
#include "shared/source/utilities/heap_allocator.h"
#include "shared/source/utilities/logger_neo_only.h"
#include <algorithm>
@@ -587,10 +588,8 @@ GraphicsAllocation *WddmMemoryManager::allocate32BitGraphicsMemoryImpl(const All
freeSystemMemory(pSysMem);
return nullptr;
}
auto baseAddress = getGfxPartition(allocationData.rootDeviceIndex)->getHeapBase(heapAssigners[allocationData.rootDeviceIndex]->get32BitHeapIndex(allocationData.type, false, *hwInfo, allocationData.flags.use32BitFrontWindow));
UNRECOVERABLE_IF(gmmHelper->canonize(baseAddress) != wddmAllocation->getGpuBaseAddress());
wddmAllocation->setGpuBaseAddress(gmmHelper->canonize(baseAddress));
[[maybe_unused]] auto baseAddress = getGfxPartition(allocationData.rootDeviceIndex)->getHeapBase(heapAssigners[allocationData.rootDeviceIndex]->get32BitHeapIndex(allocationData.type, false, *hwInfo, allocationData.flags.use32BitFrontWindow));
DEBUG_BREAK_IF(gmmHelper->canonize(baseAddress) != wddmAllocation->getGpuBaseAddress());
if (storageInfo.isLockable) {
auto lockedPtr = lockResource(wddmAllocation.get());
@@ -1045,9 +1044,16 @@ bool WddmMemoryManager::mapGpuVaForOneHandleAllocation(WddmAllocation *allocatio
if (allocation->getReservedGpuVirtualAddress()) {
addressToMap = allocation->getReservedGpuVirtualAddress();
}
auto customHeapAllocatorCfg = getCustomHeapAllocatorConfig(allocation->getAllocationType(), allocation->isAllocInFrontWindowPool());
auto gfxPartition = getGfxPartition(allocation->getRootDeviceIndex());
if (allocation->isAllocInFrontWindowPool()) {
auto alignedSize = allocation->getAlignedSize();
auto alignedSize = allocation->getAlignedSize();
if (customHeapAllocatorCfg.has_value()) {
auto &customRange = customHeapAllocatorCfg.value().get();
addressToMap = customRange.allocator->allocateWithCustomAlignment(alignedSize, MemoryConstants::pageSize64k);
} else if (allocation->isAllocInFrontWindowPool()) {
addressToMap = gfxPartition->heapAllocate(heapIndex, alignedSize);
}
@@ -1067,7 +1073,10 @@ bool WddmMemoryManager::mapGpuVaForOneHandleAllocation(WddmAllocation *allocatio
return false;
}
if (GfxPartition::isAnyHeap32(heapIndex)) {
if (auto config = customHeapAllocatorCfg; config.has_value() && config->get().gpuVaBase != std::numeric_limits<uint64_t>::max()) {
auto gmmHelper = getGmmHelper(allocation->getRootDeviceIndex());
allocation->setGpuBaseAddress(gmmHelper->canonize(config->get().gpuVaBase));
} else if (GfxPartition::isAnyHeap32(heapIndex)) {
auto gmmHelper = getGmmHelper(allocation->getRootDeviceIndex());
allocation->setGpuBaseAddress(gmmHelper->canonize(gfxPartition->getHeapBase(heapIndex)));
}

View File

@@ -58,6 +58,10 @@ class HeapAllocator {
double getUsage() const;
uint64_t getBaseAddress() const {
return this->pLeftBound;
}
protected:
const uint64_t size;
uint64_t availableSize;

View File

@@ -8,6 +8,7 @@
#pragma once
#include "shared/source/helpers/bindless_heaps_helper.h"
#include "shared/source/memory_manager/memory_manager.h"
using namespace NEO;
@@ -34,9 +35,17 @@ class MockBindlesHeapsHelper : public BindlessHeapsHelper {
using BaseClass::borderColorStates;
using BaseClass::globalBindlessDsh;
using BaseClass::growHeap;
using BaseClass::heapFrontWindow;
using BaseClass::heapRegular;
using BaseClass::initializeReservedMemory;
using BaseClass::isMultiOsContextCapable;
using BaseClass::isReservedMemoryModeAvailable;
using BaseClass::memManager;
using BaseClass::releasePoolIndex;
using BaseClass::reservedMemoryInitialized;
using BaseClass::reservedRangeBase;
using BaseClass::reservedRanges;
using BaseClass::reserveMemoryRange;
using BaseClass::reuseSlotCountThreshold;
using BaseClass::rootDeviceIndex;
using BaseClass::ssHeapsAllocations;
@@ -44,6 +53,8 @@ class MockBindlesHeapsHelper : public BindlessHeapsHelper {
using BaseClass::surfaceStateHeaps;
using BaseClass::surfaceStateInHeapVectorReuse;
using BaseClass::surfaceStateSize;
using BaseClass::tryReservingMemoryForSpecialSsh;
using BaseClass::useReservedMemory;
IndirectHeap *specialSsh;
IndirectHeap *globalSsh;

View File

@@ -52,6 +52,7 @@ class MockMemoryManager : public MemoryManagerCreate<OsAgnosticMemoryManager> {
using MemoryManager::useNonSvmHostPtrAlloc;
using OsAgnosticMemoryManager::allocateGraphicsMemoryForImageFromHostPtr;
using MemoryManagerCreate<OsAgnosticMemoryManager>::MemoryManagerCreate;
using MemoryManager::customHeapAllocators;
using MemoryManager::enable64kbpages;
using MemoryManager::executionEnvironment;
using MemoryManager::getPreferredAllocationMethod;
@@ -102,6 +103,41 @@ class MockMemoryManager : public MemoryManagerCreate<OsAgnosticMemoryManager> {
OsAgnosticMemoryManager::freeGraphicsMemoryImpl(gfxAllocation);
};
AddressRange reserveGpuAddressOnHeap(const uint64_t requiredStartAddress, size_t size, RootDeviceIndicesContainer rootDeviceIndices, uint32_t *reservedOnRootDeviceIndex, HeapIndex heap, size_t alignment) override {
reserveGpuAddressOnHeapCalled++;
reserveGpuAddressOnHeapParamsPassed.push_back({requiredStartAddress, size, rootDeviceIndices, reservedOnRootDeviceIndex, heap, alignment});
bool shouldFail = failReserveGpuAddressOnHeap;
if (!reserveGpuAddressOnHeapFailOnCalls.empty() &&
std::find(reserveGpuAddressOnHeapFailOnCalls.begin(), reserveGpuAddressOnHeapFailOnCalls.end(), reserveGpuAddressOnHeapCalled - 1) != reserveGpuAddressOnHeapFailOnCalls.end()) {
shouldFail = true;
}
if (shouldFail) {
reserveGpuAddressOnHeapResult = AddressRange{0u, 0u};
} else {
reserveGpuAddressOnHeapResult = OsAgnosticMemoryManager::reserveGpuAddressOnHeap(requiredStartAddress, size, rootDeviceIndices, reservedOnRootDeviceIndex, heap, alignment);
}
return reserveGpuAddressOnHeapResult;
}
struct ReserveGpuAddressOnHeapParams {
uint64_t requiredStartAddress{};
size_t size{};
RootDeviceIndicesContainer rootDeviceIndices{};
uint32_t *reservedOnRootDeviceIndex{};
HeapIndex heap{};
size_t alignment{};
};
StackVec<ReserveGpuAddressOnHeapParams, 2> reserveGpuAddressOnHeapParamsPassed{};
StackVec<size_t, 5> reserveGpuAddressOnHeapFailOnCalls;
void freeGpuAddress(AddressRange addressRange, uint32_t rootDeviceIndex) override {
freeGpuAddressCalled++;
return OsAgnosticMemoryManager::freeGpuAddress(addressRange, rootDeviceIndex);
}
void *lockResourceImpl(GraphicsAllocation &gfxAllocation) override {
lockResourceCalled++;
void *pLockedMemory = nullptr;
@@ -258,6 +294,8 @@ class MockMemoryManager : public MemoryManagerCreate<OsAgnosticMemoryManager> {
uint32_t populateOsHandlesCalled = 0u;
uint32_t allocateGraphicsMemoryForNonSvmHostPtrCalled = 0u;
uint32_t freeGraphicsMemoryCalled = 0u;
uint32_t reserveGpuAddressOnHeapCalled = 0u;
uint32_t freeGpuAddressCalled = 0u;
uint32_t unlockResourceCalled = 0u;
uint32_t lockResourceCalled = 0u;
uint32_t createGraphicsAllocationFromExistingStorageCalled = 0u;
@@ -284,6 +322,7 @@ class MockMemoryManager : public MemoryManagerCreate<OsAgnosticMemoryManager> {
bool allocate32BitGraphicsMemoryImplCalled = false;
bool allocateForShareableCalled = false;
bool failReserveAddress = false;
bool failReserveGpuAddressOnHeap = false;
bool failAllocateSystemMemory = false;
bool failAllocate32Bit = false;
bool failLockResource = false;
@@ -314,6 +353,7 @@ class MockMemoryManager : public MemoryManagerCreate<OsAgnosticMemoryManager> {
GraphicsAllocation *allocateGraphicsMemoryForNonSvmHostPtrResult = nullptr;
std::unique_ptr<AllocationProperties> lastAllocationProperties = nullptr;
std::function<void(const AllocationProperties &)> validateAllocateProperties = [](const AllocationProperties &) -> void {};
AddressRange reserveGpuAddressOnHeapResult = AddressRange{0u, 0u};
};
class MockAllocSysMemAgnosticMemoryManager : public OsAgnosticMemoryManager {

View File

@@ -5,12 +5,16 @@
*
*/
#include "shared/source/gmm_helper/gmm_helper.h"
#include "shared/source/helpers/bindless_heaps_helper.h"
#include "shared/source/indirect_heap/indirect_heap.h"
#include "shared/source/memory_manager/gfx_partition.h"
#include "shared/source/utilities/heap_allocator.h"
#include "shared/test/common/helpers/debug_manager_state_restore.h"
#include "shared/test/common/mocks/mock_bindless_heaps_helper.h"
#include "shared/test/common/mocks/mock_device.h"
#include "shared/test/common/mocks/mock_driver_model.h"
#include "shared/test/common/mocks/mock_gfx_partition.h"
#include "shared/test/common/mocks/mock_graphics_allocation.h"
#include "shared/test/common/mocks/ult_device_factory.h"
#include "shared/test/common/test_macros/test.h"
@@ -557,3 +561,219 @@ TEST_F(BindlessHeapsHelperTests, givenBindlessHeapHelperWhenItsCreatedThenSshAll
EXPECT_EQ(memoryOperationsIface->isResident(getDevice(), *allocation), MemoryOperationsStatus::success);
}
}
TEST_F(BindlessHeapsHelperTests, givenBindlessHeapHelperWhenDriverModelWDDMThenReservedMemoryModeIsAvailable) {
auto bindlessHeapHelper = std::make_unique<MockBindlesHeapsHelper>(getDevice(), false);
getDevice()->getRootDeviceEnvironmentRef().osInterface.reset(new NEO::OSInterface());
getDevice()->getRootDeviceEnvironmentRef().osInterface->setDriverModel(std::make_unique<NEO::MockDriverModelWDDM>());
EXPECT_TRUE(bindlessHeapHelper->isReservedMemoryModeAvailable());
getDevice()->getRootDeviceEnvironmentRef().osInterface->setDriverModel(std::make_unique<NEO::MockDriverModelDRM>());
EXPECT_FALSE(bindlessHeapHelper->isReservedMemoryModeAvailable());
}
TEST_F(BindlessHeapsHelperTests, givenBindlessHeapHelperWhenSuccessfullyReservingMemoryRangeThenRangeIsReservedAndStoredAndFreed) {
auto bindlessHeapHelper = std::make_unique<MockBindlesHeapsHelper>(getDevice(), false);
size_t reservationSize = 1 * MemoryConstants::gigaByte;
size_t alignment = MemoryConstants::pageSize64k;
HeapIndex heapIndex = HeapIndex::heapStandard;
auto reservedRange = bindlessHeapHelper->reserveMemoryRange(reservationSize, alignment, heapIndex);
ASSERT_TRUE(reservedRange.has_value());
EXPECT_EQ(reservationSize, reservedRange->size);
EXPECT_EQ(1u, bindlessHeapHelper->reservedRanges.size());
EXPECT_EQ(bindlessHeapHelper->reservedRanges[0].address, reservedRange->address);
EXPECT_EQ(bindlessHeapHelper->reservedRanges[0].size, reservedRange->size);
EXPECT_EQ(1u, memManager->reserveGpuAddressOnHeapCalled);
bindlessHeapHelper.reset();
EXPECT_EQ(1u, memManager->freeGpuAddressCalled);
}
TEST_F(BindlessHeapsHelperTests, givenBindlessHeapHelperWhenUnsuccessfullyReservingMemoryRangeThenNoValueIsReturned) {
auto bindlessHeapHelper = std::make_unique<MockBindlesHeapsHelper>(getDevice(), false);
size_t reservationSize = 1 * MemoryConstants::gigaByte;
size_t alignment = MemoryConstants::pageSize64k;
HeapIndex heapIndex = HeapIndex::heapStandard;
memManager->failReserveGpuAddressOnHeap = true;
auto reservedRange = bindlessHeapHelper->reserveMemoryRange(reservationSize, alignment, heapIndex);
EXPECT_FALSE(reservedRange.has_value());
EXPECT_EQ(0u, bindlessHeapHelper->reservedRanges.size());
EXPECT_EQ(1u, memManager->reserveGpuAddressOnHeapCalled);
bindlessHeapHelper.reset();
EXPECT_EQ(0u, memManager->freeGpuAddressCalled);
}
TEST_F(BindlessHeapsHelperTests, givenLocalMemorySupportWhenReservingMemoryForSpecialSshThenCorrectHeapIsUsed) {
auto gfxPartition = std::make_unique<MockGfxPartition>();
gfxPartition->callHeapAllocate = false;
memManager->gfxPartitions[0] = std::move(gfxPartition);
std::map<bool, HeapIndex> localMemSupportedToExpectedHeapIndexMap = {
{false, HeapIndex::heapExternalFrontWindow},
{true, HeapIndex::heapExternalDeviceFrontWindow}};
size_t currentIter = 0u;
for (auto &[localMemSupported, expectedHeapIndex] : localMemSupportedToExpectedHeapIndexMap) {
auto bindlessHeapHelper = std::make_unique<MockBindlesHeapsHelper>(getDevice(), false);
size_t reservationSize = MemoryConstants::pageSize64k;
size_t alignment = MemoryConstants::pageSize64k;
memManager->localMemorySupported = {localMemSupported};
auto specialSshReservationSuccessful = bindlessHeapHelper->tryReservingMemoryForSpecialSsh(reservationSize, alignment);
EXPECT_TRUE(specialSshReservationSuccessful);
auto &reserveGpuAddressOnHeapParamsPassed = memManager->reserveGpuAddressOnHeapParamsPassed;
ASSERT_GE(reserveGpuAddressOnHeapParamsPassed.size(), currentIter + 1);
EXPECT_EQ(expectedHeapIndex, reserveGpuAddressOnHeapParamsPassed[currentIter].heap);
EXPECT_EQ(1u, bindlessHeapHelper->reservedRanges.size());
EXPECT_EQ(currentIter + 1u, memManager->reserveGpuAddressOnHeapCalled);
bindlessHeapHelper.reset();
EXPECT_EQ(currentIter + 1u, memManager->freeGpuAddressCalled);
currentIter++;
}
}
TEST_F(BindlessHeapsHelperTests, givenBindlessHeapHelperWhenSpecialSshReservationFailsThenNoRangeIsReserved) {
memManager->failReserveGpuAddressOnHeap = true;
auto bindlessHeapHelper = std::make_unique<MockBindlesHeapsHelper>(getDevice(), false);
size_t reservationSize = MemoryConstants::pageSize64k;
size_t alignment = MemoryConstants::pageSize64k;
auto specialSshReservationSuccessful = bindlessHeapHelper->tryReservingMemoryForSpecialSsh(reservationSize, alignment);
EXPECT_FALSE(specialSshReservationSuccessful);
EXPECT_EQ(0u, bindlessHeapHelper->reservedRanges.size());
EXPECT_EQ(1u, memManager->reserveGpuAddressOnHeapCalled);
}
TEST_F(BindlessHeapsHelperTests, givenBindlessHeapHelperWhenReservedMemoryAlreadyInitializedThenEarlyReturnTrue) {
auto bindlessHeapHelper = std::make_unique<MockBindlesHeapsHelper>(getDevice(), false);
bindlessHeapHelper->reservedMemoryInitialized = true;
memManager->reserveGpuAddressOnHeapCalled = 0u;
EXPECT_TRUE(bindlessHeapHelper->initializeReservedMemory());
EXPECT_EQ(0u, memManager->reserveGpuAddressOnHeapCalled);
}
TEST_F(BindlessHeapsHelperTests, givenBindlessHeapHelperWhenMemoryReservationFailsDuringInitializationThenInitializationReturnsFalse) {
auto bindlessHeapHelper = std::make_unique<MockBindlesHeapsHelper>(getDevice(), false);
memManager->reserveGpuAddressOnHeapCalled = 0u;
memManager->failReserveGpuAddressOnHeap = true;
EXPECT_FALSE(bindlessHeapHelper->initializeReservedMemory());
EXPECT_EQ(1u, memManager->reserveGpuAddressOnHeapCalled);
}
TEST_F(BindlessHeapsHelperTests, givenBindlessHeapHelperWhenSuccessfullyInitializingReservedMemoryThenHeapsAndAllocatorsAreConfiguredCorrectly) {
constexpr uint64_t fullHeapSize = 4 * MemoryConstants::gigaByte;
if (fullHeapSize > std::numeric_limits<size_t>::max()) {
GTEST_SKIP();
}
auto bindlessHeapHelper = std::make_unique<MockBindlesHeapsHelper>(getDevice(), false);
memManager->reserveGpuAddressOnHeapCalled = 0u;
memManager->customHeapAllocators.clear();
// Override gfxPartition to ensure heapStandard has sufficient free/available space for this test.
auto mockGfxPartition = std::make_unique<MockGfxPartition>();
mockGfxPartition->initHeap(HeapIndex::heapStandard, maxNBitValue(56) + 1, MemoryConstants::teraByte, MemoryConstants::pageSize64k);
memManager->gfxPartitions[0] = std::move(mockGfxPartition);
EXPECT_TRUE(bindlessHeapHelper->initializeReservedMemory());
EXPECT_EQ(1u, memManager->reserveGpuAddressOnHeapCalled);
EXPECT_TRUE(bindlessHeapHelper->reservedMemoryInitialized);
auto &reserveGpuAddressOnHeapParamsPassed = memManager->reserveGpuAddressOnHeapParamsPassed;
ASSERT_EQ(1u, reserveGpuAddressOnHeapParamsPassed.size());
EXPECT_EQ(HeapIndex::heapStandard, reserveGpuAddressOnHeapParamsPassed[0].heap);
EXPECT_EQ(4 * MemoryConstants::gigaByte, reserveGpuAddressOnHeapParamsPassed[0].size);
EXPECT_EQ(MemoryConstants::pageSize64k, reserveGpuAddressOnHeapParamsPassed[0].alignment);
EXPECT_EQ(rootDevice->getRootDeviceEnvironmentRef().getGmmHelper()->decanonize(memManager->reserveGpuAddressOnHeapResult.address), bindlessHeapHelper->reservedRangeBase);
ASSERT_EQ(1u, bindlessHeapHelper->reservedRanges.size());
EXPECT_EQ(memManager->reserveGpuAddressOnHeapResult.address, bindlessHeapHelper->reservedRanges[0].address);
EXPECT_EQ(memManager->reserveGpuAddressOnHeapResult.size, bindlessHeapHelper->reservedRanges[0].size);
constexpr auto expectedFrontWindowSize = GfxPartition::externalFrontWindowPoolSize;
{
// heapFrontWindow
EXPECT_EQ(bindlessHeapHelper->heapFrontWindow->getBaseAddress(), bindlessHeapHelper->reservedRangeBase);
auto frontWindowSize = bindlessHeapHelper->heapFrontWindow->getLeftSize() + bindlessHeapHelper->heapFrontWindow->getUsedSize();
EXPECT_EQ(expectedFrontWindowSize, frontWindowSize);
}
{
// heapRegular
EXPECT_EQ(bindlessHeapHelper->heapRegular->getBaseAddress(), bindlessHeapHelper->heapFrontWindow->getBaseAddress() + expectedFrontWindowSize);
auto expectedRegularSize = 4 * MemoryConstants::gigaByte - expectedFrontWindowSize;
auto heapRegularSize = bindlessHeapHelper->heapRegular->getLeftSize() + bindlessHeapHelper->heapRegular->getUsedSize();
EXPECT_EQ(expectedRegularSize, heapRegularSize);
}
EXPECT_EQ(2u, memManager->customHeapAllocators.size());
{
// heapFrontWindow
ASSERT_TRUE(memManager->getCustomHeapAllocatorConfig(AllocationType::linearStream, true).has_value());
EXPECT_EQ(bindlessHeapHelper->heapFrontWindow.get(), memManager->getCustomHeapAllocatorConfig(AllocationType::linearStream, true)->get().allocator);
EXPECT_EQ(bindlessHeapHelper->reservedRangeBase, memManager->getCustomHeapAllocatorConfig(AllocationType::linearStream, true)->get().gpuVaBase);
}
{
// heapRegular
ASSERT_TRUE(memManager->getCustomHeapAllocatorConfig(AllocationType::linearStream, false).has_value());
EXPECT_EQ(bindlessHeapHelper->heapRegular.get(), memManager->getCustomHeapAllocatorConfig(AllocationType::linearStream, false)->get().allocator);
EXPECT_EQ(bindlessHeapHelper->reservedRangeBase, memManager->getCustomHeapAllocatorConfig(AllocationType::linearStream, false)->get().gpuVaBase);
}
bindlessHeapHelper.reset();
EXPECT_EQ(1u, memManager->freeGpuAddressCalled); // 1 * 4GB reserved range
EXPECT_EQ(0u, memManager->customHeapAllocators.size());
}
TEST_F(BindlessHeapsHelperTests, givenReservedMemoryModeAvailableWhenSpecialSshReservationInFrontWindowFailsThenReservedMemoryModeIsUsed) {
auto gfxPartition = std::make_unique<MockGfxPartition>();
gfxPartition->callHeapAllocate = false;
memManager->gfxPartitions[0] = std::move(gfxPartition);
getDevice()->getRootDeviceEnvironmentRef().osInterface.reset(new NEO::OSInterface());
getDevice()->getRootDeviceEnvironmentRef().osInterface->setDriverModel(std::make_unique<NEO::MockDriverModelWDDM>());
memManager->reserveGpuAddressOnHeapFailOnCalls.push_back(0u); // Fail reserving memory for special ssh
auto bindlessHeapHelper = std::make_unique<MockBindlesHeapsHelper>(getDevice(), false);
EXPECT_TRUE(bindlessHeapHelper->reservedMemoryInitialized);
EXPECT_TRUE(bindlessHeapHelper->useReservedMemory);
}

View File

@@ -13,6 +13,7 @@
#include "shared/source/memory_manager/unified_memory_manager.h"
#include "shared/source/os_interface/os_interface.h"
#include "shared/source/program/program_initialization.h"
#include "shared/source/utilities/heap_allocator.h"
#include "shared/test/common/compiler_interface/linker_mock.h"
#include "shared/test/common/fixtures/device_fixture.h"
#include "shared/test/common/fixtures/memory_allocator_fixture.h"
@@ -3363,3 +3364,41 @@ TEST(MemoryManagerTest, WhenGettingExtraDevicePropertiesThenPropertiesRemainUnch
EXPECT_EQ(moduleId, 0u);
EXPECT_EQ(serverType, 0u);
}
TEST(MemoryManagerTest, WhenAddingCustomHeapAllocatorConfigsThenCanRetrieveAndMatchConfigs) {
uint64_t heapBase = 0xAAAAAAAA;
uint64_t heapFrontStart = 0xAAAABBBB;
uint64_t heapRegularStart = 0xEEEEFFFF;
size_t heapFrontSize = 1 * MemoryConstants::gigaByte;
size_t heapRegularSize = 2 * MemoryConstants::gigaByte;
auto allocator1 = std::make_unique<HeapAllocator>(heapFrontStart, heapFrontSize, MemoryConstants::pageSize64k, 0);
auto allocator2 = std::make_unique<HeapAllocator>(heapRegularStart, heapRegularSize, MemoryConstants::pageSize64k, 0);
MockMemoryManager memoryManager;
memoryManager.addCustomHeapAllocatorConfig(AllocationType::linearStream, true, {allocator1.get(), heapBase});
memoryManager.addCustomHeapAllocatorConfig(AllocationType::linearStream, false, {allocator2.get(), heapBase});
auto config1 = memoryManager.getCustomHeapAllocatorConfig(AllocationType::linearStream, true);
auto config2 = memoryManager.getCustomHeapAllocatorConfig(AllocationType::linearStream, false);
auto configNonExisting = memoryManager.getCustomHeapAllocatorConfig(AllocationType::buffer, false);
EXPECT_TRUE(config1.has_value());
EXPECT_TRUE(config2.has_value());
EXPECT_FALSE(configNonExisting.has_value());
EXPECT_EQ(allocator1.get(), config1->get().allocator);
EXPECT_EQ(heapBase, config1->get().gpuVaBase);
EXPECT_EQ(allocator2.get(), config2->get().allocator);
EXPECT_EQ(heapBase, config2->get().gpuVaBase);
memoryManager.removeCustomHeapAllocatorConfig(AllocationType::linearStream, true);
memoryManager.removeCustomHeapAllocatorConfig(AllocationType::linearStream, false);
EXPECT_FALSE(memoryManager.getCustomHeapAllocatorConfig(AllocationType::linearStream, true).has_value());
EXPECT_FALSE(memoryManager.getCustomHeapAllocatorConfig(AllocationType::linearStream, false).has_value());
}

View File

@@ -14,6 +14,7 @@
#include "shared/source/os_interface/windows/dxgi_wrapper.h"
#include "shared/source/os_interface/windows/wddm/um_km_data_translator.h"
#include "shared/source/os_interface/windows/windows_wrapper.h"
#include "shared/source/utilities/heap_allocator.h"
#include "shared/source/utilities/tag_allocator.h"
#include "shared/test/common/helpers/debug_manager_state_restore.h"
#include "shared/test/common/helpers/execution_environment_helper.h"
@@ -4384,3 +4385,106 @@ TEST(WddmMemoryManagerTest3, givenWmmWhenAsyncDeleterIsEnabledAndWaitForDeletion
EXPECT_EQ(nullptr, memoryManager.getDeferredDeleter());
debugManager.flags.EnableDeferredDeleter.set(actualDeleterFlag);
}
class WddmMemoryManagerBindlessHeapHelperCustomHeapAllocatorCfgTest : public WddmMemoryManagerSimpleTest {
public:
void SetUp() override {
debugManager.flags.UseExternalAllocatorForSshAndDsh.set(1);
WddmMemoryManagerSimpleTest::SetUp();
wddm->callBaseMapGpuVa = false;
heapBase = alignUp(0xAAAAAAAA, alignment);
heapFrontStart = heapBase;
heapRegularStart = heapFrontStart + heapFrontSize;
heapFrontWindow = std::make_unique<HeapAllocator>(heapFrontStart, heapFrontSize, alignment, 0);
heapRegular = std::make_unique<HeapAllocator>(heapRegularStart, heapRegularSize, alignment, 0);
}
void TearDown() override {
WddmMemoryManagerSimpleTest::TearDown();
}
DebugManagerStateRestore restore{};
size_t allocationSize = MemoryConstants::pageSize64k;
size_t alignment = MemoryConstants::pageSize64k;
size_t heapFrontSize = 1 * MemoryConstants::gigaByte;
size_t heapRegularSize = 2 * MemoryConstants::gigaByte;
uint64_t heapBase = 0u;
uint64_t heapFrontStart = 0u;
uint64_t heapRegularStart = 0u;
std::unique_ptr<HeapAllocator> heapFrontWindow;
std::unique_ptr<HeapAllocator> heapRegular;
};
TEST_F(WddmMemoryManagerBindlessHeapHelperCustomHeapAllocatorCfgTest, givenCustomHeapAllocatorForFrontWindowWhenAllocatingThenGpuAddressAndBaseAreAssignedByCustomAllocator) {
memoryManager->addCustomHeapAllocatorConfig(AllocationType::linearStream, true, {heapFrontWindow.get(), heapBase});
memoryManager->addCustomHeapAllocatorConfig(AllocationType::linearStream, false, {heapRegular.get(), heapBase});
NEO::AllocationProperties properties{mockRootDeviceIndex, true, allocationSize, AllocationType::linearStream, false, mockDeviceBitfield};
properties.flags.use32BitFrontWindow = 1;
properties.alignment = alignment;
auto allocation = static_cast<WddmAllocation *>(memoryManager->allocateGraphicsMemoryWithProperties(properties));
ASSERT_NE(nullptr, allocation);
EXPECT_EQ(heapFrontStart, allocation->getGpuBaseAddress());
EXPECT_EQ(heapFrontStart, allocation->getGpuAddress());
EXPECT_LE(allocationSize, allocation->getUnderlyingBufferSize());
EXPECT_NE(nullptr, allocation->getUnderlyingBuffer());
EXPECT_TRUE(allocation->isAllocInFrontWindowPool());
memoryManager->freeGraphicsMemory(allocation);
}
TEST_F(WddmMemoryManagerBindlessHeapHelperCustomHeapAllocatorCfgTest, givenCustomHeapAllocatorForNonFrontWindowHeapWhenAllocatingThenGpuAddressAndBaseAreAssignedByCustomAllocator) {
memoryManager->addCustomHeapAllocatorConfig(AllocationType::linearStream, true, {heapFrontWindow.get(), heapBase});
memoryManager->addCustomHeapAllocatorConfig(AllocationType::linearStream, false, {heapRegular.get(), heapBase});
NEO::AllocationProperties properties{mockRootDeviceIndex, true, allocationSize, AllocationType::linearStream, false, mockDeviceBitfield};
properties.flags.use32BitFrontWindow = 0;
properties.alignment = alignment;
auto allocation = static_cast<WddmAllocation *>(memoryManager->allocateGraphicsMemoryWithProperties(properties));
ASSERT_NE(nullptr, allocation);
EXPECT_EQ(heapBase, allocation->getGpuBaseAddress());
EXPECT_EQ(heapRegularStart, allocation->getGpuAddress());
EXPECT_LE(allocationSize, allocation->getUnderlyingBufferSize());
EXPECT_NE(nullptr, allocation->getUnderlyingBuffer());
EXPECT_FALSE(allocation->isAllocInFrontWindowPool());
memoryManager->freeGraphicsMemory(allocation);
}
TEST_F(WddmMemoryManagerBindlessHeapHelperCustomHeapAllocatorCfgTest, givenCustomHeapAllocatorCfgWithoutGpuVaBaseWhenAllocatingThenGpuBaseAddressIsNotObtainedFromCfg) {
CustomHeapAllocatorConfig cfg1;
cfg1.allocator = heapFrontWindow.get();
CustomHeapAllocatorConfig cfg2;
cfg2.allocator = heapRegular.get();
memoryManager->addCustomHeapAllocatorConfig(AllocationType::linearStream, true, cfg1);
memoryManager->addCustomHeapAllocatorConfig(AllocationType::linearStream, false, cfg2);
NEO::AllocationProperties properties{mockRootDeviceIndex, true, allocationSize, AllocationType::linearStream, false, mockDeviceBitfield};
properties.flags.use32BitFrontWindow = 1;
properties.alignment = alignment;
auto allocation = static_cast<WddmAllocation *>(memoryManager->allocateGraphicsMemoryWithProperties(properties));
ASSERT_NE(nullptr, allocation);
EXPECT_NE(heapFrontStart, allocation->getGpuBaseAddress());
EXPECT_EQ(heapFrontStart, allocation->getGpuAddress());
EXPECT_LE(allocationSize, allocation->getUnderlyingBufferSize());
EXPECT_NE(nullptr, allocation->getUnderlyingBuffer());
EXPECT_TRUE(allocation->isAllocInFrontWindowPool());
memoryManager->freeGraphicsMemory(allocation);
}

View File

@@ -1437,3 +1437,15 @@ TEST(HeapAllocatorTest, givenZeroAlignmentPassedWhenAllocatingMemoryWithCustomAl
uint64_t ptr = heapAllocator.allocateWithCustomAlignment(ptrSize, 0u);
EXPECT_EQ(alignUp(heapBase, allocationAlignment), ptr);
}
TEST(HeapAllocatorTest, whenGetBaseAddressIsCalledThenReturnInitialLeftBoundAddress) {
const uint64_t heapBase = 0x100000llu;
const size_t heapSize = 1024 * 4096;
HeapAllocatorUnderTest heapAllocator(heapBase, heapSize, allocationAlignment, sizeThreshold);
EXPECT_EQ(heapBase, heapAllocator.getBaseAddress());
size_t sizeToAlloc = 4096;
heapAllocator.allocate(sizeToAlloc);
EXPECT_EQ(heapBase, heapAllocator.getBaseAddress());
}