fix: correct bindless offsets for L0 bindless images extension with heapless

Related-To: NEO-14710
Signed-off-by: Kamil Kopryk <kamil.kopryk@intel.com>
This commit is contained in:
Kamil Kopryk
2025-06-16 01:31:54 +00:00
committed by Compute-Runtime-Automation
parent a8394cbdeb
commit cdf087f3da
6 changed files with 41 additions and 5 deletions

View File

@@ -627,7 +627,7 @@ ze_result_t KernelImp::setArgRedescribedImage(uint32_t argIndex, ze_image_handle
uint64_t bindlessSlotOffset = ssInHeap->surfaceStateOffset + surfaceStateSize * bindlessSlot;
uint32_t patchSize = this->heaplessEnabled ? 8u : 4u;
uint64_t patchValue = this->heaplessEnabled
? bindlessSlotOffset + bindlessHeapsHelper->getGlobalHeapsBase()
? bindlessSlotOffset
: gfxCoreHelper.getBindlessSurfaceExtendedMessageDescriptorValue(static_cast<uint32_t>(bindlessSlotOffset));
patchWithRequiredSize(const_cast<uint8_t *>(patchLocation), patchSize, patchValue);
@@ -829,7 +829,7 @@ ze_result_t KernelImp::setArgImage(uint32_t argIndex, size_t argSize, const void
auto bindlessSlotOffset = ssInHeap->surfaceStateOffset;
uint32_t patchSize = this->heaplessEnabled ? 8u : 4u;
uint64_t patchValue = this->heaplessEnabled
? bindlessSlotOffset + bindlessHeapsHelper->getGlobalHeapsBase()
? bindlessSlotOffset
: gfxCoreHelper.getBindlessSurfaceExtendedMessageDescriptorValue(static_cast<uint32_t>(bindlessSlotOffset));
patchWithRequiredSize(const_cast<uint8_t *>(patchLocation), patchSize, patchValue);

View File

@@ -3174,7 +3174,7 @@ HWTEST2_F(SetKernelArg, givenHeaplessWhenPatchingImageWithBindlessEnabledCorrect
auto patchLocation = ptrOffset(ctd, imageArg.bindless);
uint64_t bindlessSlotOffset = ssInHeap->surfaceStateOffset + surfaceStateSize * NEO::BindlessImageSlot::redescribedImage;
uint64_t expectedPatchValue = kernel->heaplessEnabled
? bindlessSlotOffset + bindlessHeapsHelper->getGlobalHeapsBase()
? bindlessSlotOffset
: gfxCoreHelper.getBindlessSurfaceExtendedMessageDescriptorValue(static_cast<uint32_t>(bindlessSlotOffset));
if (kernel->heaplessEnabled) {

View File

@@ -11,6 +11,7 @@
#include "shared/source/execution_environment/execution_environment.h"
#include "shared/source/execution_environment/root_device_environment.h"
#include "shared/source/gmm_helper/gmm_helper.h"
#include "shared/source/helpers/compiler_product_helper.h"
#include "shared/source/helpers/driver_model_type.h"
#include "shared/source/helpers/gfx_core_helper.h"
#include "shared/source/helpers/string.h"
@@ -77,6 +78,9 @@ BindlessHeapsHelper::BindlessHeapsHelper(Device *rootDevice, bool isMultiOsConte
memcpy_s(borderColorStates->getUnderlyingBuffer(), sizeof(borderColorDefault), borderColorDefault, sizeof(borderColorDefault));
float borderColorAlpha[4] = {0, 0, 0, 1.0};
memcpy_s(ptrOffset(borderColorStates->getUnderlyingBuffer(), borderColorAlphaOffset), sizeof(borderColorAlpha), borderColorAlpha, sizeof(borderColorDefault));
auto &hwInfo = *rootDevice->getRootDeviceEnvironment().getHardwareInfo();
this->heaplessEnabled = rootDevice->getRootDeviceEnvironment().getHelper<CompilerProductHelper>().isHeaplessModeEnabled(hwInfo);
}
std::optional<AddressRange> BindlessHeapsHelper::reserveMemoryRange(size_t size, size_t alignment, HeapIndex heapIndex) {
@@ -235,6 +239,10 @@ SurfaceStateInHeapInfo BindlessHeapsHelper::allocateSSInHeap(size_t ssSize, Grap
memset(ptrInHeap, 0, ssSize);
auto bindlessOffset = heap->getGraphicsAllocation()->getGpuAddress() - heap->getGraphicsAllocation()->getGpuBaseAddress() + heap->getUsed() - ssSize;
if (this->heaplessEnabled) {
bindlessOffset += heap->getGraphicsAllocation()->getGpuBaseAddress();
}
bindlesInfo = SurfaceStateInHeapInfo{heap->getGraphicsAllocation(), bindlessOffset, ptrInHeap, ssSize};
}

View File

@@ -105,6 +105,7 @@ class BindlessHeapsHelper : NEO::NonCopyableAndNonMovableClass {
bool isMultiOsContextCapable = false;
bool reservedMemoryInitialized = false;
bool useReservedMemory = false;
bool heaplessEnabled = false;
};
static_assert(NEO::NonCopyableAndNonMovable<BindlessHeapsHelper>);

View File

@@ -36,6 +36,7 @@ class MockBindlesHeapsHelper : public BindlessHeapsHelper {
using BaseClass::globalBindlessDsh;
using BaseClass::growHeap;
using BaseClass::heapFrontWindow;
using BaseClass::heaplessEnabled;
using BaseClass::heapRegular;
using BaseClass::initializeReservedMemory;
using BaseClass::isMultiOsContextCapable;

View File

@@ -217,6 +217,9 @@ TEST_F(BindlessHeapsHelperTests, givenBindlessHeapHelperWhenGetAlphaBorderColorO
TEST_F(BindlessHeapsHelperTests, givenBindlessHeapHelperWhenAllocateSsInSpecialHeapThenFirstSlotIsAtOffsetZero) {
auto bindlessHeapHelper = std::make_unique<MockBindlesHeapsHelper>(getDevice(), false);
if (bindlessHeapHelper->heaplessEnabled) {
GTEST_SKIP();
}
MockGraphicsAllocation alloc;
size_t size = 0x40;
auto ssInHeapInfo = bindlessHeapHelper->allocateSSInHeap(size, &alloc, BindlessHeapsHelper::BindlesHeapType::specialSsh);
@@ -226,13 +229,32 @@ TEST_F(BindlessHeapsHelperTests, givenBindlessHeapHelperWhenAllocateSsInSpecialH
EXPECT_EQ(bindlessHeapHelper->getGlobalHeapsBase(), ssInHeapInfo.heapAllocation->getGpuBaseAddress());
}
TEST_F(BindlessHeapsHelperTests, givenHeaplessAndBindlessHeapHelperWhenAllocateSsInSpecialHeapThenFirstSlotIsAtOffsetOfHeapBaseAddress) {
auto bindlessHeapHelper = std::make_unique<MockBindlesHeapsHelper>(getDevice(), false);
if (!bindlessHeapHelper->heaplessEnabled) {
GTEST_SKIP();
}
MockGraphicsAllocation alloc;
size_t size = 0x40;
auto ssInHeapInfo = bindlessHeapHelper->allocateSSInHeap(size, &alloc, BindlessHeapsHelper::BindlesHeapType::specialSsh);
EXPECT_EQ(ssInHeapInfo.heapAllocation->getGpuBaseAddress(), ssInHeapInfo.surfaceStateOffset);
EXPECT_EQ(ssInHeapInfo.heapAllocation->getGpuAddress(), ssInHeapInfo.heapAllocation->getGpuBaseAddress());
EXPECT_EQ(bindlessHeapHelper->getGlobalHeapsBase(), ssInHeapInfo.heapAllocation->getGpuBaseAddress());
}
TEST_F(BindlessHeapsHelperTests, givenBindlessHeapHelperWhenAllocateSsInGlobalHeapThenOffsetLessThanHeapSize) {
auto bindlessHeapHelper = std::make_unique<MockBindlesHeapsHelper>(getDevice(), false);
MockGraphicsAllocation alloc;
size_t size = 0x40;
auto ssInHeapInfo = bindlessHeapHelper->allocateSSInHeap(size, &alloc, BindlessHeapsHelper::BindlesHeapType::globalSsh);
EXPECT_LE(0u, ssInHeapInfo.surfaceStateOffset);
if (bindlessHeapHelper->heaplessEnabled) {
EXPECT_GT(MemoryConstants::max32BitAddress, ssInHeapInfo.surfaceStateOffset - ssInHeapInfo.heapAllocation->getGpuBaseAddress());
} else {
EXPECT_GT(MemoryConstants::max32BitAddress, ssInHeapInfo.surfaceStateOffset);
}
}
TEST_F(BindlessHeapsHelperTests, givenBindlessHeapHelperWhenAllocateSsInGlobalDshThenOffsetLessThanHeapSize) {
@@ -241,7 +263,11 @@ TEST_F(BindlessHeapsHelperTests, givenBindlessHeapHelperWhenAllocateSsInGlobalDs
size_t size = 0x40;
auto ssInHeapInfo = bindlessHeapHelper->allocateSSInHeap(size, &alloc, BindlessHeapsHelper::BindlesHeapType::globalDsh);
EXPECT_LE(0u, ssInHeapInfo.surfaceStateOffset);
if (bindlessHeapHelper->heaplessEnabled) {
EXPECT_GT(MemoryConstants::max32BitAddress, ssInHeapInfo.surfaceStateOffset - ssInHeapInfo.heapAllocation->getGpuBaseAddress());
} else {
EXPECT_GT(MemoryConstants::max32BitAddress, ssInHeapInfo.surfaceStateOffset);
}
}
TEST_F(BindlessHeapsHelperTests, givenBindlessHeapHelperWhenFreeGraphicsMemoryIsCalledThenSSinHeapInfoShouldBePlacedInReuseVector) {