fix: add support for bindless implicit args

Support for:
global_base and const_base in bindless addressing mode.

Related-To: NEO-9855
Signed-off-by: Fabian Zwolinski <fabian.zwolinski@intel.com>
This commit is contained in:
Fabian Zwolinski
2024-01-10 17:46:32 +00:00
committed by Compute-Runtime-Automation
parent 2bda9f0b58
commit 903e581b5f
10 changed files with 682 additions and 33 deletions

View File

@@ -136,6 +136,21 @@ ze_result_t KernelImmutableData::initialize(NEO::KernelInfo *kernelInfo, Device
this->residencyContainer.push_back(globalConstBuffer);
}
if (globalConstBuffer && NEO::isValidOffset(kernelDescriptor->payloadMappings.implicitArgs.globalConstantsSurfaceAddress.bindless)) {
if (!neoDevice->getMemoryManager()->allocateBindlessSlot(globalConstBuffer)) {
return ZE_RESULT_ERROR_OUT_OF_HOST_MEMORY;
}
auto ssInHeap = globalConstBuffer->getBindlessInfo();
if (ssInHeap.heapAllocation) {
this->residencyContainer.push_back(ssInHeap.heapAllocation);
}
patchImplicitArgBindlessOffsetAndSetSurfaceState(crossThreadDataArrayRef, surfaceStateHeapArrayRef,
globalConstBuffer, kernelDescriptor->payloadMappings.implicitArgs.globalConstantsSurfaceAddress,
*neoDevice, kernelDescriptor->kernelAttributes.flags.useGlobalAtomics, deviceImp->isImplicitScalingCapable(), ssInHeap, kernelInfo->kernelDescriptor);
}
if (NEO::isValidOffset(kernelDescriptor->payloadMappings.implicitArgs.globalVariablesSurfaceAddress.stateless)) {
UNRECOVERABLE_IF(globalVarBuffer == nullptr);
@@ -148,6 +163,21 @@ ze_result_t KernelImmutableData::initialize(NEO::KernelInfo *kernelInfo, Device
this->residencyContainer.push_back(globalVarBuffer);
}
if (globalVarBuffer && NEO::isValidOffset(kernelDescriptor->payloadMappings.implicitArgs.globalVariablesSurfaceAddress.bindless)) {
if (!neoDevice->getMemoryManager()->allocateBindlessSlot(globalVarBuffer)) {
return ZE_RESULT_ERROR_OUT_OF_HOST_MEMORY;
}
auto ssInHeap = globalVarBuffer->getBindlessInfo();
if (ssInHeap.heapAllocation) {
this->residencyContainer.push_back(ssInHeap.heapAllocation);
}
patchImplicitArgBindlessOffsetAndSetSurfaceState(crossThreadDataArrayRef, surfaceStateHeapArrayRef,
globalVarBuffer, kernelDescriptor->payloadMappings.implicitArgs.globalVariablesSurfaceAddress,
*neoDevice, kernelDescriptor->kernelAttributes.flags.useGlobalAtomics, deviceImp->isImplicitScalingCapable(), ssInHeap, kernelInfo->kernelDescriptor);
}
return ZE_RESULT_SUCCESS;
}
@@ -1133,6 +1163,32 @@ void KernelImp::patchSyncBuffer(NEO::GraphicsAllocation *gfxAllocation, size_t b
static_cast<uintptr_t>(ptrOffset(gfxAllocation->getGpuAddressToPatch(), bufferOffset)));
}
uint32_t KernelImp::getSurfaceStateHeapDataSize() const {
if (NEO::KernelDescriptor::isBindlessAddressingKernel(kernelImmData->getDescriptor())) {
const auto bindlessHeapsHelper = this->module && this->module->getDevice()->getNEODevice()->getBindlessHeapsHelper();
bool isBindlessImplicitArgPresent = false;
auto implicitArgsVec = kernelImmData->getDescriptor().getImplicitArgBindlessCandidatesVec();
for (const auto implicitArg : implicitArgsVec) {
if (NEO::isValidOffset(implicitArg->bindless)) {
isBindlessImplicitArgPresent = true;
break;
}
}
const bool noBindlessExplicitArgs = std::none_of(usingSurfaceStateHeap.cbegin(), usingSurfaceStateHeap.cend(), [](bool i) { return i; });
if (isBindlessImplicitArgPresent && !bindlessHeapsHelper) {
return surfaceStateHeapDataSize;
}
if (noBindlessExplicitArgs) {
return 0;
}
}
return surfaceStateHeapDataSize;
}
void *KernelImp::patchBindlessSurfaceState(NEO::GraphicsAllocation *alloc, uint32_t bindless) {
auto &gfxCoreHelper = this->module->getDevice()->getGfxCoreHelper();
auto ssInHeap = alloc->getBindlessInfo();
@@ -1279,6 +1335,8 @@ void KernelImp::patchBindlessOffsetsInCrossThreadData(uint64_t bindlessSurfaceSt
}
}
}
patchBindlessOffsetsForImplicitArgs(bindlessSurfaceStateBaseOffset);
}
uint32_t KernelImp::getSurfaceStateIndexForBindlessOffset(NEO::CrossThreadDataOffset bindlessOffset) const {
@@ -1290,4 +1348,25 @@ uint32_t KernelImp::getSurfaceStateIndexForBindlessOffset(NEO::CrossThreadDataOf
return std::numeric_limits<uint32_t>::max();
}
void KernelImp::patchBindlessOffsetsForImplicitArgs(uint64_t bindlessSurfaceStateBaseOffset) const {
auto implicitArgsVec = kernelImmData->getDescriptor().getImplicitArgBindlessCandidatesVec();
auto &gfxCoreHelper = this->module->getDevice()->getGfxCoreHelper();
auto surfaceStateSize = gfxCoreHelper.getRenderSurfaceStateSize();
for (size_t i = 0; i < implicitArgsVec.size(); i++) {
if (NEO::isValidOffset(implicitArgsVec[i]->bindless)) {
auto patchLocation = ptrOffset(getCrossThreadData(), implicitArgsVec[i]->bindless);
auto index = getSurfaceStateIndexForBindlessOffset(implicitArgsVec[i]->bindless);
if (index < std::numeric_limits<uint32_t>::max()) {
auto surfaceStateOffset = static_cast<uint32_t>(bindlessSurfaceStateBaseOffset + index * surfaceStateSize);
auto patchValue = gfxCoreHelper.getBindlessSurfaceExtendedMessageDescriptorValue(static_cast<uint32_t>(surfaceStateOffset));
patchWithRequiredSize(const_cast<uint8_t *>(patchLocation), sizeof(patchValue), patchValue);
}
}
}
}
} // namespace L0

View File

@@ -1,5 +1,5 @@
/*
* Copyright (C) 2020-2023 Intel Corporation
* Copyright (C) 2020-2024 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@@ -114,14 +114,7 @@ struct KernelImp : Kernel {
void patchSyncBuffer(NEO::GraphicsAllocation *gfxAllocation, size_t bufferOffset) override;
const uint8_t *getSurfaceStateHeapData() const override { return surfaceStateHeapData.get(); }
uint32_t getSurfaceStateHeapDataSize() const override {
if (NEO::KernelDescriptor::isBindlessAddressingKernel(kernelImmData->getDescriptor())) {
if (std::none_of(usingSurfaceStateHeap.cbegin(), usingSurfaceStateHeap.cend(), [](bool i) { return i; })) {
return 0;
}
}
return surfaceStateHeapDataSize;
}
uint32_t getSurfaceStateHeapDataSize() const override;
const uint8_t *getDynamicStateHeapData() const override { return dynamicStateHeapData.get(); }
@@ -181,6 +174,7 @@ struct KernelImp : Kernel {
NEO::GraphicsAllocation *allocatePrivateMemoryGraphicsAllocation() override;
void patchCrossthreadDataWithPrivateAllocation(NEO::GraphicsAllocation *privateAllocation) override;
void patchBindlessOffsetsInCrossThreadData(uint64_t bindlessSurfaceStateBaseOffset) const override;
void patchBindlessOffsetsForImplicitArgs(uint64_t bindlessSurfaceStateBaseOffset) const;
NEO::GraphicsAllocation *getPrivateMemoryGraphicsAllocation() override {
return privateMemoryGraphicsAllocation;

View File

@@ -1,5 +1,5 @@
/*
* Copyright (C) 2022-2023 Intel Corporation
* Copyright (C) 2022-2024 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@@ -36,3 +36,52 @@ inline void patchWithImplicitSurface(ArrayRef<uint8_t> crossThreadData, ArrayRef
gfxCoreHelper.encodeBufferSurfaceState(args);
}
}
inline void patchImplicitArgBindlessOffsetAndSetSurfaceState(ArrayRef<uint8_t> crossThreadData, ArrayRef<uint8_t> surfaceStateHeap, NEO::GraphicsAllocation *allocation,
const NEO::ArgDescPointer &ptr, const NEO::Device &device, bool useGlobalAtomics, bool implicitScaling,
const NEO::SurfaceStateInHeapInfo &ssInHeap, const NEO::KernelDescriptor &kernelDescriptor) {
auto &gfxCoreHelper = device.getGfxCoreHelper();
void *surfaceStateAddress = nullptr;
auto surfaceStateSize = gfxCoreHelper.getRenderSurfaceStateSize();
if (NEO::isValidOffset(ptr.bindless)) {
if (device.getBindlessHeapsHelper()) {
surfaceStateAddress = ssInHeap.ssPtr;
auto patchLocation = ptrOffset(crossThreadData.begin(), ptr.bindless);
auto patchValue = gfxCoreHelper.getBindlessSurfaceExtendedMessageDescriptorValue(static_cast<uint32_t>(ssInHeap.surfaceStateOffset));
patchWithRequiredSize(const_cast<uint8_t *>(patchLocation), sizeof(patchValue), patchValue);
} else {
auto index = std::numeric_limits<uint32_t>::max();
const auto &iter = kernelDescriptor.getBindlessOffsetToSurfaceState().find(ptr.bindless);
if (iter != kernelDescriptor.getBindlessOffsetToSurfaceState().end()) {
index = iter->second;
}
if (index < std::numeric_limits<uint32_t>::max()) {
surfaceStateAddress = ptrOffset(surfaceStateHeap.begin(), index * surfaceStateSize);
}
}
}
if (surfaceStateAddress) {
auto addressToPatch = allocation->getGpuAddress();
size_t sizeToPatch = allocation->getUnderlyingBufferSize();
auto isDebuggerActive = device.getDebugger() != nullptr;
NEO::EncodeSurfaceStateArgs args;
args.outMemory = surfaceStateAddress;
args.graphicsAddress = addressToPatch;
args.size = sizeToPatch;
args.mocs = gfxCoreHelper.getMocsIndex(*device.getGmmHelper(), true, false) << 1;
args.numAvailableDevices = device.getNumGenericSubDevices();
args.allocation = allocation;
args.gmmHelper = device.getGmmHelper();
args.useGlobalAtomics = useGlobalAtomics;
args.areMultipleSubDevicesInContext = args.numAvailableDevices > 1;
args.implicitScaling = implicitScaling;
args.isDebuggerActive = isDebuggerActive;
gfxCoreHelper.encodeBufferSurfaceState(args);
}
}

View File

@@ -2190,6 +2190,40 @@ HWTEST2_F(KernelImpPatchBindlessTest, GivenMisalignedBufferAddressWhenSettingSur
EXPECT_EQ(mockKernel.surfaceStateHeapDataSize, mockKernel.getSurfaceStateHeapDataSize());
}
HWTEST2_F(KernelImpPatchBindlessTest, GivenBindlessImplicitArgAndNoBindlessHeapsHelperWhenGetSurfaceStateHeapDataSizeThenReturnSurfaceStateHeapDataSize, MatchAny) {
ze_kernel_desc_t desc = {};
desc.pKernelName = kernelName.c_str();
WhiteBoxKernelHw<gfxCoreFamily> mockKernel;
mockKernel.module = module.get();
mockKernel.initialize(&desc);
const_cast<NEO::KernelDescriptor &>(mockKernel.kernelImmData->getDescriptor()).kernelAttributes.bufferAddressingMode = NEO::KernelDescriptor::BindlessAndStateless;
EXPECT_EQ(0u, mockKernel.getSurfaceStateHeapDataSize());
const_cast<NEO::KernelDescriptor &>(mockKernel.kernelImmData->getDescriptor()).payloadMappings.implicitArgs.globalConstantsSurfaceAddress.bindless = 0x20;
ASSERT_EQ(nullptr, mockKernel.module->getDevice()->getNEODevice()->getBindlessHeapsHelper());
EXPECT_NE(0u, mockKernel.surfaceStateHeapDataSize);
EXPECT_EQ(mockKernel.surfaceStateHeapDataSize, mockKernel.getSurfaceStateHeapDataSize());
}
HWTEST2_F(KernelImpPatchBindlessTest, GivenBindlessImplicitArgAndBindlessHeapsHelperWhenGetSurfaceStateHeapDataSizeThenReturnZero, MatchAny) {
ze_kernel_desc_t desc = {};
desc.pKernelName = kernelName.c_str();
WhiteBoxKernelHw<gfxCoreFamily> mockKernel;
mockKernel.module = module.get();
mockKernel.initialize(&desc);
const_cast<NEO::KernelDescriptor &>(mockKernel.kernelImmData->getDescriptor()).kernelAttributes.bufferAddressingMode = NEO::KernelDescriptor::BindlessAndStateless;
EXPECT_EQ(0u, mockKernel.getSurfaceStateHeapDataSize());
const_cast<NEO::KernelDescriptor &>(mockKernel.kernelImmData->getDescriptor()).payloadMappings.implicitArgs.globalConstantsSurfaceAddress.bindless = 0x20;
neoDevice->getExecutionEnvironment()->rootDeviceEnvironments[neoDevice->getRootDeviceIndex()]->createBindlessHeapsHelper(neoDevice->getMemoryManager(),
neoDevice->getNumGenericSubDevices() > 1,
neoDevice->getRootDeviceIndex(),
neoDevice->getDeviceBitfield());
EXPECT_EQ(0u, mockKernel.getSurfaceStateHeapDataSize());
}
HWTEST2_F(KernelImpPatchBindlessTest, GivenMisalignedAndAlignedBufferAddressWhenSettingSurfaceStateThenKernelReportsNonZeroSurfaceStateHeapDataSize, MatchAny) {
using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE;
@@ -3579,13 +3613,16 @@ TEST_F(BindlessKernelTest, givenBindlessKernelWhenPatchingCrossThreadDataThenCor
argDescriptor2.as<NEO::ArgDescPointer>().stateless = 2 * sizeof(uint64_t);
mockKernel.descriptor.payloadMappings.explicitArgs.push_back(argDescriptor2);
mockKernel.descriptor.payloadMappings.implicitArgs.globalVariablesSurfaceAddress.bindless = 3 * sizeof(uint64_t);
mockKernel.descriptor.payloadMappings.implicitArgs.globalConstantsSurfaceAddress.bindless = 4 * sizeof(uint64_t);
mockKernel.isBindlessOffsetSet.resize(4, 0);
mockKernel.usingSurfaceStateHeap.resize(4, 0);
mockKernel.descriptor.initBindlessOffsetToSurfaceState();
mockKernel.crossThreadData = std::make_unique<uint8_t[]>(4 * sizeof(uint64_t));
mockKernel.crossThreadDataSize = 4 * sizeof(uint64_t);
mockKernel.crossThreadData = std::make_unique<uint8_t[]>(5 * sizeof(uint64_t));
mockKernel.crossThreadDataSize = 5 * sizeof(uint64_t);
memset(mockKernel.crossThreadData.get(), 0, mockKernel.crossThreadDataSize);
const uint64_t baseAddress = 0x1000;
@@ -3593,7 +3630,9 @@ TEST_F(BindlessKernelTest, givenBindlessKernelWhenPatchingCrossThreadDataThenCor
auto surfaceStateSize = gfxCoreHelper.getRenderSurfaceStateSize();
auto patchValue1 = gfxCoreHelper.getBindlessSurfaceExtendedMessageDescriptorValue(static_cast<uint32_t>(baseAddress));
auto patchValue2 = gfxCoreHelper.getBindlessSurfaceExtendedMessageDescriptorValue(static_cast<uint32_t>(baseAddress + surfaceStateSize));
auto patchValue2 = gfxCoreHelper.getBindlessSurfaceExtendedMessageDescriptorValue(static_cast<uint32_t>(baseAddress + 1 * surfaceStateSize));
auto patchValue3 = gfxCoreHelper.getBindlessSurfaceExtendedMessageDescriptorValue(static_cast<uint32_t>(baseAddress + 2 * surfaceStateSize));
auto patchValue4 = gfxCoreHelper.getBindlessSurfaceExtendedMessageDescriptorValue(static_cast<uint32_t>(baseAddress + 3 * surfaceStateSize));
mockKernel.patchBindlessOffsetsInCrossThreadData(baseAddress);
@@ -3602,7 +3641,9 @@ TEST_F(BindlessKernelTest, givenBindlessKernelWhenPatchingCrossThreadDataThenCor
EXPECT_EQ(patchValue1, crossThreadData[0]);
EXPECT_EQ(patchValue2, crossThreadData[1]);
EXPECT_EQ(0u, crossThreadData[3]);
EXPECT_EQ(0u, crossThreadData[2]);
EXPECT_EQ(patchValue3, crossThreadData[3]);
EXPECT_EQ(patchValue4, crossThreadData[4]);
}
TEST_F(BindlessKernelTest, givenBindlessKernelWithPatchedBindlessOffsetsWhenPatchingCrossThreadDataThenMemoryIsNotPatched) {

View File

@@ -1,5 +1,5 @@
/*
* Copyright (C) 2022-2023 Intel Corporation
* Copyright (C) 2022-2024 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@@ -10,14 +10,17 @@
#include "shared/source/helpers/gfx_core_helper.h"
#include "shared/source/helpers/simd_helper.h"
#include "shared/test/common/helpers/raii_gfx_core_helper.h"
#include "shared/test/common/mocks/mock_bindless_heaps_helper.h"
#include "shared/test/common/mocks/mock_device.h"
#include "shared/test/common/mocks/mock_graphics_allocation.h"
#include "shared/test/common/mocks/mock_l0_debugger.h"
#include "shared/test/common/mocks/mock_modules_zebin.h"
#include "shared/test/common/test_macros/hw_test.h"
#include "shared/test/common/test_macros/test.h"
#include "level_zero/core/test/unit_tests/fixtures/device_fixture.h"
#include "level_zero/core/test/unit_tests/fixtures/kernel_max_cooperative_groups_count_fixture.h"
#include "level_zero/core/test/unit_tests/mocks/mock_device.h"
#include "level_zero/core/test/unit_tests/mocks/mock_kernel.h"
#include "level_zero/core/test/unit_tests/mocks/mock_module.h"
@@ -486,5 +489,303 @@ HWTEST2_F(KernelTest, GivenInlineSamplersWhenSettingInlineSamplerThenDshIsPatche
EXPECT_EQ(SamplerState::MAG_MODE_FILTER_NEAREST, samplerState->getMagModeFilter());
}
using KernelImmutableDataBindlessTest = Test<DeviceFixture>;
HWTEST2_F(KernelImmutableDataBindlessTest, givenGlobalConstBufferAndBindlessExplicitAndImplicitArgsAndNoBindlessHeapsHelperWhenInitializeKernelImmutableDataThenSurfaceStateIsSetAndImplicitArgBindlessOffsetIsPatched, IsAtLeastXeHpgCore) {
HardwareInfo hwInfo = *defaultHwInfo;
auto device = std::unique_ptr<NEO::MockDevice>(NEO::MockDevice::createWithNewExecutionEnvironment<NEO::MockDevice>(&hwInfo, 0));
static EncodeSurfaceStateArgs savedSurfaceStateArgs{};
static size_t encodeBufferSurfaceStateCalled{};
savedSurfaceStateArgs = {};
encodeBufferSurfaceStateCalled = {};
struct MockGfxCoreHelper : NEO::GfxCoreHelperHw<FamilyType> {
void encodeBufferSurfaceState(EncodeSurfaceStateArgs &args) const override {
savedSurfaceStateArgs = args;
++encodeBufferSurfaceStateCalled;
}
};
RAIIGfxCoreHelperFactory<MockGfxCoreHelper> raii(*device->getExecutionEnvironment()->rootDeviceEnvironments[0]);
{
device->incRefInternal();
MockDeviceImp deviceImp(device.get(), device->getExecutionEnvironment());
uint64_t gpuAddress = 0x1200;
void *buffer = reinterpret_cast<void *>(gpuAddress);
size_t allocSize = 0x1100;
NEO::MockGraphicsAllocation globalConstBuffer(buffer, gpuAddress, allocSize);
auto kernelInfo = std::make_unique<KernelInfo>();
kernelInfo->kernelDescriptor.kernelMetadata.kernelName = ZebinTestData::ValidEmptyProgram<>::kernelName;
kernelInfo->kernelDescriptor.kernelAttributes.bufferAddressingMode = NEO::KernelDescriptor::BindlessAndStateless;
auto argDescriptor = NEO::ArgDescriptor(NEO::ArgDescriptor::argTPointer);
argDescriptor.as<NEO::ArgDescPointer>() = NEO::ArgDescPointer();
argDescriptor.as<NEO::ArgDescPointer>().bindful = NEO::undefined<NEO::SurfaceStateHeapOffset>;
argDescriptor.as<NEO::ArgDescPointer>().bindless = 0x40;
kernelInfo->kernelDescriptor.payloadMappings.explicitArgs.push_back(argDescriptor);
kernelInfo->kernelDescriptor.payloadMappings.implicitArgs.globalConstantsSurfaceAddress.bindless = 0x80;
kernelInfo->kernelDescriptor.kernelAttributes.numArgsStateful = 2;
kernelInfo->kernelDescriptor.kernelAttributes.crossThreadDataSize = 4 * sizeof(uint64_t);
kernelInfo->kernelDescriptor.initBindlessOffsetToSurfaceState();
const auto globalConstantsSurfaceAddressSSIndex = 1;
auto kernelImmutableData = std::make_unique<KernelImmutableData>(&deviceImp);
kernelImmutableData->initialize(kernelInfo.get(), &deviceImp, 0, &globalConstBuffer, nullptr, false);
auto &gfxCoreHelper = device->getGfxCoreHelper();
auto surfaceStateSize = static_cast<uint32_t>(gfxCoreHelper.getRenderSurfaceStateSize());
EXPECT_EQ(surfaceStateSize * kernelInfo->kernelDescriptor.kernelAttributes.numArgsStateful, kernelImmutableData->getSurfaceStateHeapSize());
auto &residencyContainer = kernelImmutableData->getResidencyContainer();
EXPECT_EQ(1u, residencyContainer.size());
EXPECT_EQ(1, std::count(residencyContainer.begin(), residencyContainer.end(), &globalConstBuffer));
EXPECT_EQ(1u, encodeBufferSurfaceStateCalled);
EXPECT_EQ(allocSize, savedSurfaceStateArgs.size);
EXPECT_EQ(gpuAddress, savedSurfaceStateArgs.graphicsAddress);
EXPECT_EQ(ptrOffset(kernelImmutableData->getSurfaceStateHeapTemplate(), globalConstantsSurfaceAddressSSIndex * surfaceStateSize), savedSurfaceStateArgs.outMemory);
EXPECT_EQ(&globalConstBuffer, savedSurfaceStateArgs.allocation);
}
}
HWTEST2_F(KernelImmutableDataBindlessTest, givenGlobalVarBufferAndBindlessExplicitAndImplicitArgsAndNoBindlessHeapsHelperWhenInitializeKernelImmutableDataThenSurfaceStateIsSetAndImplicitArgBindlessOffsetIsPatched, IsAtLeastXeHpgCore) {
HardwareInfo hwInfo = *defaultHwInfo;
auto device = std::unique_ptr<NEO::MockDevice>(NEO::MockDevice::createWithNewExecutionEnvironment<NEO::MockDevice>(&hwInfo, 0));
static EncodeSurfaceStateArgs savedSurfaceStateArgs{};
static size_t encodeBufferSurfaceStateCalled{};
savedSurfaceStateArgs = {};
encodeBufferSurfaceStateCalled = {};
struct MockGfxCoreHelper : NEO::GfxCoreHelperHw<FamilyType> {
void encodeBufferSurfaceState(EncodeSurfaceStateArgs &args) const override {
savedSurfaceStateArgs = args;
++encodeBufferSurfaceStateCalled;
}
};
RAIIGfxCoreHelperFactory<MockGfxCoreHelper> raii(*device->getExecutionEnvironment()->rootDeviceEnvironments[0]);
{
device->incRefInternal();
MockDeviceImp deviceImp(device.get(), device->getExecutionEnvironment());
uint64_t gpuAddress = 0x1200;
void *buffer = reinterpret_cast<void *>(gpuAddress);
size_t allocSize = 0x1100;
NEO::MockGraphicsAllocation globalVarBuffer(buffer, gpuAddress, allocSize);
auto kernelInfo = std::make_unique<KernelInfo>();
kernelInfo->kernelDescriptor.kernelMetadata.kernelName = ZebinTestData::ValidEmptyProgram<>::kernelName;
kernelInfo->kernelDescriptor.kernelAttributes.bufferAddressingMode = NEO::KernelDescriptor::BindlessAndStateless;
auto argDescriptor = NEO::ArgDescriptor(NEO::ArgDescriptor::argTPointer);
argDescriptor.as<NEO::ArgDescPointer>() = NEO::ArgDescPointer();
argDescriptor.as<NEO::ArgDescPointer>().bindful = NEO::undefined<NEO::SurfaceStateHeapOffset>;
argDescriptor.as<NEO::ArgDescPointer>().bindless = 0x40;
kernelInfo->kernelDescriptor.payloadMappings.explicitArgs.push_back(argDescriptor);
kernelInfo->kernelDescriptor.payloadMappings.implicitArgs.globalVariablesSurfaceAddress.bindless = 0x80;
kernelInfo->kernelDescriptor.kernelAttributes.numArgsStateful = 2;
kernelInfo->kernelDescriptor.kernelAttributes.crossThreadDataSize = 4 * sizeof(uint64_t);
kernelInfo->kernelDescriptor.initBindlessOffsetToSurfaceState();
const auto globalVariablesSurfaceAddressSSIndex = 1;
auto kernelImmutableData = std::make_unique<KernelImmutableData>(&deviceImp);
kernelImmutableData->initialize(kernelInfo.get(), &deviceImp, 0, nullptr, &globalVarBuffer, false);
auto &gfxCoreHelper = device->getGfxCoreHelper();
auto surfaceStateSize = static_cast<uint32_t>(gfxCoreHelper.getRenderSurfaceStateSize());
EXPECT_EQ(surfaceStateSize * kernelInfo->kernelDescriptor.kernelAttributes.numArgsStateful, kernelImmutableData->getSurfaceStateHeapSize());
auto &residencyContainer = kernelImmutableData->getResidencyContainer();
EXPECT_EQ(1u, residencyContainer.size());
EXPECT_EQ(1, std::count(residencyContainer.begin(), residencyContainer.end(), &globalVarBuffer));
EXPECT_EQ(1u, encodeBufferSurfaceStateCalled);
EXPECT_EQ(allocSize, savedSurfaceStateArgs.size);
EXPECT_EQ(gpuAddress, savedSurfaceStateArgs.graphicsAddress);
EXPECT_EQ(ptrOffset(kernelImmutableData->getSurfaceStateHeapTemplate(), globalVariablesSurfaceAddressSSIndex * surfaceStateSize), savedSurfaceStateArgs.outMemory);
EXPECT_EQ(&globalVarBuffer, savedSurfaceStateArgs.allocation);
}
}
HWTEST2_F(KernelImmutableDataBindlessTest, givenGlobalConstBufferAndBindlessExplicitAndImplicitArgsAndBindlessHeapsHelperWhenInitializeKernelImmutableDataThenSurfaceStateIsSetAndImplicitArgBindlessOffsetIsPatched, IsAtLeastXeHpgCore) {
HardwareInfo hwInfo = *defaultHwInfo;
auto device = std::unique_ptr<NEO::MockDevice>(NEO::MockDevice::createWithNewExecutionEnvironment<NEO::MockDevice>(&hwInfo, 0));
auto mockHelper = std::make_unique<MockBindlesHeapsHelper>(device->getMemoryManager(),
device->getNumGenericSubDevices() > 1,
device->getRootDeviceIndex(),
device->getDeviceBitfield());
device->getExecutionEnvironment()->rootDeviceEnvironments[device->getRootDeviceIndex()]->bindlessHeapsHelper.reset(mockHelper.release());
static EncodeSurfaceStateArgs savedSurfaceStateArgs{};
static size_t encodeBufferSurfaceStateCalled{};
savedSurfaceStateArgs = {};
encodeBufferSurfaceStateCalled = {};
struct MockGfxCoreHelper : NEO::GfxCoreHelperHw<FamilyType> {
void encodeBufferSurfaceState(EncodeSurfaceStateArgs &args) const override {
savedSurfaceStateArgs = args;
++encodeBufferSurfaceStateCalled;
}
};
RAIIGfxCoreHelperFactory<MockGfxCoreHelper> raii(*device->getExecutionEnvironment()->rootDeviceEnvironments[0]);
{
device->incRefInternal();
MockDeviceImp deviceImp(device.get(), device->getExecutionEnvironment());
uint64_t gpuAddress = 0x1200;
void *buffer = reinterpret_cast<void *>(gpuAddress);
size_t allocSize = 0x1100;
NEO::MockGraphicsAllocation globalConstBuffer(buffer, gpuAddress, allocSize);
auto kernelInfo = std::make_unique<KernelInfo>();
kernelInfo->kernelDescriptor.kernelMetadata.kernelName = ZebinTestData::ValidEmptyProgram<>::kernelName;
kernelInfo->kernelDescriptor.kernelAttributes.bufferAddressingMode = NEO::KernelDescriptor::BindlessAndStateless;
auto argDescriptor = NEO::ArgDescriptor(NEO::ArgDescriptor::argTPointer);
argDescriptor.as<NEO::ArgDescPointer>() = NEO::ArgDescPointer();
argDescriptor.as<NEO::ArgDescPointer>().bindful = NEO::undefined<NEO::SurfaceStateHeapOffset>;
argDescriptor.as<NEO::ArgDescPointer>().bindless = 4;
kernelInfo->kernelDescriptor.payloadMappings.explicitArgs.push_back(argDescriptor);
NEO::CrossThreadDataOffset globalConstSurfaceAddressBindlessOffset = 8;
kernelInfo->kernelDescriptor.payloadMappings.implicitArgs.globalConstantsSurfaceAddress.bindless = globalConstSurfaceAddressBindlessOffset;
kernelInfo->kernelDescriptor.kernelAttributes.numArgsStateful = 2;
kernelInfo->kernelDescriptor.kernelAttributes.crossThreadDataSize = 4 * sizeof(uint64_t);
kernelInfo->kernelDescriptor.initBindlessOffsetToSurfaceState();
auto kernelImmutableData = std::make_unique<KernelImmutableData>(&deviceImp);
kernelImmutableData->initialize(kernelInfo.get(), &deviceImp, 0, &globalConstBuffer, nullptr, false);
auto &gfxCoreHelper = device->getGfxCoreHelper();
auto surfaceStateSize = static_cast<uint32_t>(gfxCoreHelper.getRenderSurfaceStateSize());
EXPECT_EQ(surfaceStateSize * kernelInfo->kernelDescriptor.kernelAttributes.numArgsStateful, kernelImmutableData->getSurfaceStateHeapSize());
auto &residencyContainer = kernelImmutableData->getResidencyContainer();
EXPECT_EQ(2u, residencyContainer.size());
EXPECT_EQ(1, std::count(residencyContainer.begin(), residencyContainer.end(), &globalConstBuffer));
EXPECT_EQ(1, std::count(residencyContainer.begin(), residencyContainer.end(), globalConstBuffer.getBindlessInfo().heapAllocation));
auto crossThreadData = kernelImmutableData->getCrossThreadDataTemplate();
auto patchLocation = reinterpret_cast<const uint32_t *>(ptrOffset(crossThreadData, globalConstSurfaceAddressBindlessOffset));
auto patchValue = gfxCoreHelper.getBindlessSurfaceExtendedMessageDescriptorValue(static_cast<uint32_t>(globalConstBuffer.getBindlessInfo().surfaceStateOffset));
EXPECT_EQ(patchValue, *patchLocation);
EXPECT_EQ(1u, encodeBufferSurfaceStateCalled);
EXPECT_EQ(allocSize, savedSurfaceStateArgs.size);
EXPECT_EQ(gpuAddress, savedSurfaceStateArgs.graphicsAddress);
EXPECT_EQ(globalConstBuffer.getBindlessInfo().ssPtr, savedSurfaceStateArgs.outMemory);
EXPECT_EQ(&globalConstBuffer, savedSurfaceStateArgs.allocation);
}
}
HWTEST2_F(KernelImmutableDataBindlessTest, givenGlobalVarBufferAndBindlessExplicitAndImplicitArgsAndBindlessHeapsHelperWhenInitializeKernelImmutableDataThenSurfaceStateIsSetAndImplicitArgBindlessOffsetIsPatched, IsAtLeastXeHpgCore) {
HardwareInfo hwInfo = *defaultHwInfo;
auto device = std::unique_ptr<NEO::MockDevice>(NEO::MockDevice::createWithNewExecutionEnvironment<NEO::MockDevice>(&hwInfo, 0));
auto mockHelper = std::make_unique<MockBindlesHeapsHelper>(device->getMemoryManager(),
device->getNumGenericSubDevices() > 1,
device->getRootDeviceIndex(),
device->getDeviceBitfield());
device->getExecutionEnvironment()->rootDeviceEnvironments[device->getRootDeviceIndex()]->bindlessHeapsHelper.reset(mockHelper.release());
static EncodeSurfaceStateArgs savedSurfaceStateArgs{};
static size_t encodeBufferSurfaceStateCalled{};
savedSurfaceStateArgs = {};
encodeBufferSurfaceStateCalled = {};
struct MockGfxCoreHelper : NEO::GfxCoreHelperHw<FamilyType> {
void encodeBufferSurfaceState(EncodeSurfaceStateArgs &args) const override {
savedSurfaceStateArgs = args;
++encodeBufferSurfaceStateCalled;
}
};
RAIIGfxCoreHelperFactory<MockGfxCoreHelper> raii(*device->getExecutionEnvironment()->rootDeviceEnvironments[0]);
{
device->incRefInternal();
MockDeviceImp deviceImp(device.get(), device->getExecutionEnvironment());
uint64_t gpuAddress = 0x1200;
void *buffer = reinterpret_cast<void *>(gpuAddress);
size_t allocSize = 0x1100;
NEO::MockGraphicsAllocation globalVarBuffer(buffer, gpuAddress, allocSize);
auto kernelInfo = std::make_unique<KernelInfo>();
kernelInfo->kernelDescriptor.kernelMetadata.kernelName = ZebinTestData::ValidEmptyProgram<>::kernelName;
kernelInfo->kernelDescriptor.kernelAttributes.bufferAddressingMode = NEO::KernelDescriptor::BindlessAndStateless;
auto argDescriptor = NEO::ArgDescriptor(NEO::ArgDescriptor::argTPointer);
argDescriptor.as<NEO::ArgDescPointer>() = NEO::ArgDescPointer();
argDescriptor.as<NEO::ArgDescPointer>().bindful = NEO::undefined<NEO::SurfaceStateHeapOffset>;
argDescriptor.as<NEO::ArgDescPointer>().bindless = 4;
kernelInfo->kernelDescriptor.payloadMappings.explicitArgs.push_back(argDescriptor);
NEO::CrossThreadDataOffset globalVariablesSurfaceAddressBindlessOffset = 8;
kernelInfo->kernelDescriptor.payloadMappings.implicitArgs.globalVariablesSurfaceAddress.bindless = globalVariablesSurfaceAddressBindlessOffset;
kernelInfo->kernelDescriptor.kernelAttributes.numArgsStateful = 2;
kernelInfo->kernelDescriptor.kernelAttributes.crossThreadDataSize = 4 * sizeof(uint64_t);
kernelInfo->kernelDescriptor.initBindlessOffsetToSurfaceState();
auto kernelImmutableData = std::make_unique<KernelImmutableData>(&deviceImp);
kernelImmutableData->initialize(kernelInfo.get(), &deviceImp, 0, nullptr, &globalVarBuffer, false);
auto &gfxCoreHelper = device->getGfxCoreHelper();
auto surfaceStateSize = static_cast<uint32_t>(gfxCoreHelper.getRenderSurfaceStateSize());
EXPECT_EQ(surfaceStateSize * kernelInfo->kernelDescriptor.kernelAttributes.numArgsStateful, kernelImmutableData->getSurfaceStateHeapSize());
auto &residencyContainer = kernelImmutableData->getResidencyContainer();
EXPECT_EQ(2u, residencyContainer.size());
EXPECT_EQ(1, std::count(residencyContainer.begin(), residencyContainer.end(), &globalVarBuffer));
EXPECT_EQ(1, std::count(residencyContainer.begin(), residencyContainer.end(), globalVarBuffer.getBindlessInfo().heapAllocation));
auto crossThreadData = kernelImmutableData->getCrossThreadDataTemplate();
auto patchLocation = reinterpret_cast<const uint32_t *>(ptrOffset(crossThreadData, globalVariablesSurfaceAddressBindlessOffset));
auto patchValue = gfxCoreHelper.getBindlessSurfaceExtendedMessageDescriptorValue(static_cast<uint32_t>(globalVarBuffer.getBindlessInfo().surfaceStateOffset));
EXPECT_EQ(patchValue, *patchLocation);
EXPECT_EQ(1u, encodeBufferSurfaceStateCalled);
EXPECT_EQ(allocSize, savedSurfaceStateArgs.size);
EXPECT_EQ(gpuAddress, savedSurfaceStateArgs.graphicsAddress);
EXPECT_EQ(globalVarBuffer.getBindlessInfo().ssPtr, savedSurfaceStateArgs.outMemory);
EXPECT_EQ(&globalVarBuffer, savedSurfaceStateArgs.allocation);
}
}
} // namespace ult
} // namespace L0