fix: set correct bindless offsets for L0 bindless images extension with heapless

Additionally reorder members in bindless heaps helper.

Related-To: NEO-14710
Signed-off-by: Kamil Kopryk <kamil.kopryk@intel.com>
This commit is contained in:
Kamil Kopryk
2025-05-07 11:07:21 +00:00
committed by Compute-Runtime-Automation
parent 063b719a44
commit b048d0e557
5 changed files with 71 additions and 33 deletions

View File

@@ -256,8 +256,10 @@ ze_result_t ImageImp::getDeviceOffset(uint64_t *deviceOffset) {
return ZE_RESULT_ERROR_NOT_AVAILABLE;
}
DEBUG_BREAK_IF(this->getBindlessSlot() == nullptr);
*deviceOffset = this->getBindlessSlot()->surfaceStateOffset;
auto bindlessSlot = this->getBindlessSlot();
DEBUG_BREAK_IF(bindlessSlot == nullptr);
*deviceOffset = bindlessSlot->surfaceStateOffset;
return ZE_RESULT_SUCCESS;
}

View File

@@ -11,6 +11,7 @@
#include "shared/source/execution_environment/execution_environment.h"
#include "shared/source/execution_environment/root_device_environment.h"
#include "shared/source/gmm_helper/gmm_helper.h"
#include "shared/source/helpers/compiler_product_helper.h"
#include "shared/source/helpers/driver_model_type.h"
#include "shared/source/helpers/gfx_core_helper.h"
#include "shared/source/helpers/string.h"
@@ -49,11 +50,11 @@ constexpr size_t heapRegularSize = reservedRangeSize - heapFrontWindowSize;
using BindlesHeapType = BindlessHeapsHelper::BindlesHeapType;
BindlessHeapsHelper::BindlessHeapsHelper(Device *rootDevice, bool isMultiOsContextCapable) : rootDevice(rootDevice),
surfaceStateSize(rootDevice->getRootDeviceEnvironment().getHelper<GfxCoreHelper>().getRenderSurfaceStateSize()),
deviceBitfield(rootDevice->getDeviceBitfield()),
memManager(rootDevice->getMemoryManager()),
isMultiOsContextCapable(isMultiOsContextCapable),
surfaceStateSize(rootDevice->getRootDeviceEnvironment().getHelper<GfxCoreHelper>().getRenderSurfaceStateSize()),
rootDeviceIndex(rootDevice->getRootDeviceIndex()),
deviceBitfield(rootDevice->getDeviceBitfield()) {
isMultiOsContextCapable(isMultiOsContextCapable) {
for (auto heapType = 0; heapType < BindlesHeapType::numHeapTypes; heapType++) {
auto size = MemoryConstants::pageSize64k;
@@ -77,6 +78,9 @@ BindlessHeapsHelper::BindlessHeapsHelper(Device *rootDevice, bool isMultiOsConte
memcpy_s(borderColorStates->getUnderlyingBuffer(), sizeof(borderColorDefault), borderColorDefault, sizeof(borderColorDefault));
float borderColorAlpha[4] = {0, 0, 0, 1.0};
memcpy_s(ptrOffset(borderColorStates->getUnderlyingBuffer(), borderColorAlphaOffset), sizeof(borderColorAlpha), borderColorAlpha, sizeof(borderColorDefault));
auto &hwInfo = *rootDevice->getRootDeviceEnvironment().getHardwareInfo();
this->heaplessEnabled = rootDevice->getRootDeviceEnvironment().getHelper<CompilerProductHelper>().isHeaplessModeEnabled(hwInfo);
}
std::optional<AddressRange> BindlessHeapsHelper::reserveMemoryRange(size_t size, size_t alignment, HeapIndex heapIndex) {
@@ -235,6 +239,9 @@ SurfaceStateInHeapInfo BindlessHeapsHelper::allocateSSInHeap(size_t ssSize, Grap
memset(ptrInHeap, 0, ssSize);
auto bindlessOffset = heap->getGraphicsAllocation()->getGpuAddress() - heap->getGraphicsAllocation()->getGpuBaseAddress() + heap->getUsed() - ssSize;
if (this->heaplessEnabled) {
bindlessOffset += heap->getGraphicsAllocation()->getGpuBaseAddress();
}
bindlesInfo = SurfaceStateInHeapInfo{heap->getGraphicsAllocation(), bindlessOffset, ptrInHeap, ssSize};
}

View File

@@ -74,37 +74,35 @@ class BindlessHeapsHelper : NEO::NonCopyableAndNonMovableClass {
std::optional<AddressRange> reserveMemoryRange(size_t size, size_t alignment, HeapIndex heapIndex);
bool initializeReservedMemory();
bool isReservedMemoryModeAvailable();
protected:
Device *rootDevice = nullptr;
const size_t surfaceStateSize;
bool growHeap(BindlesHeapType heapType);
MemoryManager *memManager = nullptr;
bool isMultiOsContextCapable = false;
const uint32_t rootDeviceIndex;
std::unique_ptr<IndirectHeap> surfaceStateHeaps[BindlesHeapType::numHeapTypes];
GraphicsAllocation *borderColorStates;
std::vector<GraphicsAllocation *> ssHeapsAllocations;
size_t reuseSlotCountThreshold = 512;
uint32_t allocatePoolIndex = 0;
uint32_t releasePoolIndex = 0;
bool allocateFromReusePool = false;
Device *rootDevice = nullptr;
DeviceBitfield deviceBitfield;
GraphicsAllocation *borderColorStates;
MemoryManager *memManager = nullptr;
std::array<std::vector<SurfaceStateInHeapInfo>, 2> surfaceStateInHeapVectorReuse[2];
std::bitset<64> stateCacheDirtyForContext;
std::mutex mtx;
DeviceBitfield deviceBitfield;
bool globalBindlessDsh = false;
bool useReservedMemory = false;
bool reservedMemoryInitialized = false;
uint64_t reservedRangeBase = 0;
std::unique_ptr<HeapAllocator> heapFrontWindow;
std::unique_ptr<HeapAllocator> heapRegular;
std::unique_ptr<IndirectHeap> surfaceStateHeaps[BindlesHeapType::numHeapTypes];
std::vector<AddressRange> reservedRanges;
std::vector<GraphicsAllocation *> ssHeapsAllocations;
const size_t surfaceStateSize;
size_t reuseSlotCountThreshold = 512;
const uint32_t rootDeviceIndex;
uint32_t allocatePoolIndex = 0;
uint32_t releasePoolIndex = 0;
uint64_t reservedRangeBase = 0;
std::mutex mtx;
bool allocateFromReusePool = false;
bool globalBindlessDsh = false;
bool heaplessEnabled = false;
bool isMultiOsContextCapable = false;
bool reservedMemoryInitialized = false;
bool useReservedMemory = false;
};
static_assert(NEO::NonCopyableAndNonMovable<BindlessHeapsHelper>);

View File

@@ -1,5 +1,5 @@
/*
* Copyright (C) 2021-2024 Intel Corporation
* Copyright (C) 2021-2025 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@@ -36,6 +36,7 @@ class MockBindlesHeapsHelper : public BindlessHeapsHelper {
using BaseClass::globalBindlessDsh;
using BaseClass::growHeap;
using BaseClass::heapFrontWindow;
using BaseClass::heaplessEnabled;
using BaseClass::heapRegular;
using BaseClass::initializeReservedMemory;
using BaseClass::isMultiOsContextCapable;

View File

@@ -1,5 +1,5 @@
/*
* Copyright (C) 2020-2024 Intel Corporation
* Copyright (C) 2020-2025 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@@ -216,23 +216,49 @@ TEST_F(BindlessHeapsHelperTests, givenBindlessHeapHelperWhenGetAlphaBorderColorO
}
TEST_F(BindlessHeapsHelperTests, givenBindlessHeapHelperWhenAllocateSsInSpecialHeapThenFirstSlotIsAtOffsetZero) {
auto bindlessHeapHelper = std::make_unique<MockBindlesHeapsHelper>(getDevice(), false);
if (bindlessHeapHelper->heaplessEnabled) {
GTEST_SKIP();
}
MockGraphicsAllocation alloc;
size_t size = 0x40;
auto ssInHeapInfo = bindlessHeapHelper->allocateSSInHeap(size, &alloc, BindlessHeapsHelper::BindlesHeapType::specialSsh);
EXPECT_EQ(0u, ssInHeapInfo.surfaceStateOffset);
EXPECT_EQ(ssInHeapInfo.heapAllocation->getGpuAddress(), ssInHeapInfo.heapAllocation->getGpuBaseAddress());
EXPECT_EQ(bindlessHeapHelper->getGlobalHeapsBase(), ssInHeapInfo.heapAllocation->getGpuBaseAddress());
}
TEST_F(BindlessHeapsHelperTests, givenHeaplessAndBindlessHeapHelperWhenAllocateSsInSpecialHeapThenFirstSlotIsAtOffsetOfHeapBaseAddress) {
auto bindlessHeapHelper = std::make_unique<MockBindlesHeapsHelper>(getDevice(), false);
if (!bindlessHeapHelper->heaplessEnabled) {
GTEST_SKIP();
}
MockGraphicsAllocation alloc;
size_t size = 0x40;
auto ssInHeapInfo = bindlessHeapHelper->allocateSSInHeap(size, &alloc, BindlessHeapsHelper::BindlesHeapType::specialSsh);
EXPECT_EQ(ssInHeapInfo.heapAllocation->getGpuBaseAddress(), ssInHeapInfo.surfaceStateOffset);
EXPECT_EQ(ssInHeapInfo.heapAllocation->getGpuAddress(), ssInHeapInfo.heapAllocation->getGpuBaseAddress());
EXPECT_EQ(bindlessHeapHelper->getGlobalHeapsBase(), ssInHeapInfo.heapAllocation->getGpuBaseAddress());
}
TEST_F(BindlessHeapsHelperTests, givenBindlessHeapHelperWhenAllocateSsInGlobalHeapThenOffsetLessThanHeapSize) {
auto bindlessHeapHelper = std::make_unique<MockBindlesHeapsHelper>(getDevice(), false);
MockGraphicsAllocation alloc;
size_t size = 0x40;
auto ssInHeapInfo = bindlessHeapHelper->allocateSSInHeap(size, &alloc, BindlessHeapsHelper::BindlesHeapType::globalSsh);
EXPECT_LE(0u, ssInHeapInfo.surfaceStateOffset);
EXPECT_GT(MemoryConstants::max32BitAddress, ssInHeapInfo.surfaceStateOffset);
if (bindlessHeapHelper->heaplessEnabled) {
EXPECT_GT(MemoryConstants::max32BitAddress, ssInHeapInfo.surfaceStateOffset - ssInHeapInfo.heapAllocation->getGpuBaseAddress());
} else {
EXPECT_GT(MemoryConstants::max32BitAddress, ssInHeapInfo.surfaceStateOffset);
}
}
TEST_F(BindlessHeapsHelperTests, givenBindlessHeapHelperWhenAllocateSsInGlobalDshThenOffsetLessThanHeapSize) {
@@ -241,7 +267,11 @@ TEST_F(BindlessHeapsHelperTests, givenBindlessHeapHelperWhenAllocateSsInGlobalDs
size_t size = 0x40;
auto ssInHeapInfo = bindlessHeapHelper->allocateSSInHeap(size, &alloc, BindlessHeapsHelper::BindlesHeapType::globalDsh);
EXPECT_LE(0u, ssInHeapInfo.surfaceStateOffset);
EXPECT_GT(MemoryConstants::max32BitAddress, ssInHeapInfo.surfaceStateOffset);
if (bindlessHeapHelper->heaplessEnabled) {
EXPECT_GT(MemoryConstants::max32BitAddress, ssInHeapInfo.surfaceStateOffset - ssInHeapInfo.heapAllocation->getGpuBaseAddress());
} else {
EXPECT_GT(MemoryConstants::max32BitAddress, ssInHeapInfo.surfaceStateOffset);
}
}
TEST_F(BindlessHeapsHelperTests, givenBindlessHeapHelperWhenFreeGraphicsMemoryIsCalledThenSSinHeapInfoShouldBePlacedInReuseVector) {