mirror of
https://github.com/intel/compute-runtime.git
synced 2026-01-03 14:55:24 +08:00
fix: add support for bindless implicit args
Support for: global_base and const_base in bindless addressing mode. Related-To: NEO-9855 Signed-off-by: Fabian Zwolinski <fabian.zwolinski@intel.com>
This commit is contained in:
committed by
Compute-Runtime-Automation
parent
2bda9f0b58
commit
903e581b5f
@@ -136,6 +136,21 @@ ze_result_t KernelImmutableData::initialize(NEO::KernelInfo *kernelInfo, Device
|
||||
this->residencyContainer.push_back(globalConstBuffer);
|
||||
}
|
||||
|
||||
if (globalConstBuffer && NEO::isValidOffset(kernelDescriptor->payloadMappings.implicitArgs.globalConstantsSurfaceAddress.bindless)) {
|
||||
if (!neoDevice->getMemoryManager()->allocateBindlessSlot(globalConstBuffer)) {
|
||||
return ZE_RESULT_ERROR_OUT_OF_HOST_MEMORY;
|
||||
}
|
||||
auto ssInHeap = globalConstBuffer->getBindlessInfo();
|
||||
|
||||
if (ssInHeap.heapAllocation) {
|
||||
this->residencyContainer.push_back(ssInHeap.heapAllocation);
|
||||
}
|
||||
|
||||
patchImplicitArgBindlessOffsetAndSetSurfaceState(crossThreadDataArrayRef, surfaceStateHeapArrayRef,
|
||||
globalConstBuffer, kernelDescriptor->payloadMappings.implicitArgs.globalConstantsSurfaceAddress,
|
||||
*neoDevice, kernelDescriptor->kernelAttributes.flags.useGlobalAtomics, deviceImp->isImplicitScalingCapable(), ssInHeap, kernelInfo->kernelDescriptor);
|
||||
}
|
||||
|
||||
if (NEO::isValidOffset(kernelDescriptor->payloadMappings.implicitArgs.globalVariablesSurfaceAddress.stateless)) {
|
||||
UNRECOVERABLE_IF(globalVarBuffer == nullptr);
|
||||
|
||||
@@ -148,6 +163,21 @@ ze_result_t KernelImmutableData::initialize(NEO::KernelInfo *kernelInfo, Device
|
||||
this->residencyContainer.push_back(globalVarBuffer);
|
||||
}
|
||||
|
||||
if (globalVarBuffer && NEO::isValidOffset(kernelDescriptor->payloadMappings.implicitArgs.globalVariablesSurfaceAddress.bindless)) {
|
||||
if (!neoDevice->getMemoryManager()->allocateBindlessSlot(globalVarBuffer)) {
|
||||
return ZE_RESULT_ERROR_OUT_OF_HOST_MEMORY;
|
||||
}
|
||||
auto ssInHeap = globalVarBuffer->getBindlessInfo();
|
||||
|
||||
if (ssInHeap.heapAllocation) {
|
||||
this->residencyContainer.push_back(ssInHeap.heapAllocation);
|
||||
}
|
||||
|
||||
patchImplicitArgBindlessOffsetAndSetSurfaceState(crossThreadDataArrayRef, surfaceStateHeapArrayRef,
|
||||
globalVarBuffer, kernelDescriptor->payloadMappings.implicitArgs.globalVariablesSurfaceAddress,
|
||||
*neoDevice, kernelDescriptor->kernelAttributes.flags.useGlobalAtomics, deviceImp->isImplicitScalingCapable(), ssInHeap, kernelInfo->kernelDescriptor);
|
||||
}
|
||||
|
||||
return ZE_RESULT_SUCCESS;
|
||||
}
|
||||
|
||||
@@ -1133,6 +1163,32 @@ void KernelImp::patchSyncBuffer(NEO::GraphicsAllocation *gfxAllocation, size_t b
|
||||
static_cast<uintptr_t>(ptrOffset(gfxAllocation->getGpuAddressToPatch(), bufferOffset)));
|
||||
}
|
||||
|
||||
uint32_t KernelImp::getSurfaceStateHeapDataSize() const {
|
||||
if (NEO::KernelDescriptor::isBindlessAddressingKernel(kernelImmData->getDescriptor())) {
|
||||
const auto bindlessHeapsHelper = this->module && this->module->getDevice()->getNEODevice()->getBindlessHeapsHelper();
|
||||
|
||||
bool isBindlessImplicitArgPresent = false;
|
||||
auto implicitArgsVec = kernelImmData->getDescriptor().getImplicitArgBindlessCandidatesVec();
|
||||
for (const auto implicitArg : implicitArgsVec) {
|
||||
if (NEO::isValidOffset(implicitArg->bindless)) {
|
||||
isBindlessImplicitArgPresent = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
const bool noBindlessExplicitArgs = std::none_of(usingSurfaceStateHeap.cbegin(), usingSurfaceStateHeap.cend(), [](bool i) { return i; });
|
||||
|
||||
if (isBindlessImplicitArgPresent && !bindlessHeapsHelper) {
|
||||
return surfaceStateHeapDataSize;
|
||||
}
|
||||
|
||||
if (noBindlessExplicitArgs) {
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
return surfaceStateHeapDataSize;
|
||||
}
|
||||
|
||||
void *KernelImp::patchBindlessSurfaceState(NEO::GraphicsAllocation *alloc, uint32_t bindless) {
|
||||
auto &gfxCoreHelper = this->module->getDevice()->getGfxCoreHelper();
|
||||
auto ssInHeap = alloc->getBindlessInfo();
|
||||
@@ -1279,6 +1335,8 @@ void KernelImp::patchBindlessOffsetsInCrossThreadData(uint64_t bindlessSurfaceSt
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
patchBindlessOffsetsForImplicitArgs(bindlessSurfaceStateBaseOffset);
|
||||
}
|
||||
|
||||
uint32_t KernelImp::getSurfaceStateIndexForBindlessOffset(NEO::CrossThreadDataOffset bindlessOffset) const {
|
||||
@@ -1290,4 +1348,25 @@ uint32_t KernelImp::getSurfaceStateIndexForBindlessOffset(NEO::CrossThreadDataOf
|
||||
return std::numeric_limits<uint32_t>::max();
|
||||
}
|
||||
|
||||
void KernelImp::patchBindlessOffsetsForImplicitArgs(uint64_t bindlessSurfaceStateBaseOffset) const {
|
||||
auto implicitArgsVec = kernelImmData->getDescriptor().getImplicitArgBindlessCandidatesVec();
|
||||
|
||||
auto &gfxCoreHelper = this->module->getDevice()->getGfxCoreHelper();
|
||||
auto surfaceStateSize = gfxCoreHelper.getRenderSurfaceStateSize();
|
||||
|
||||
for (size_t i = 0; i < implicitArgsVec.size(); i++) {
|
||||
if (NEO::isValidOffset(implicitArgsVec[i]->bindless)) {
|
||||
auto patchLocation = ptrOffset(getCrossThreadData(), implicitArgsVec[i]->bindless);
|
||||
auto index = getSurfaceStateIndexForBindlessOffset(implicitArgsVec[i]->bindless);
|
||||
|
||||
if (index < std::numeric_limits<uint32_t>::max()) {
|
||||
auto surfaceStateOffset = static_cast<uint32_t>(bindlessSurfaceStateBaseOffset + index * surfaceStateSize);
|
||||
auto patchValue = gfxCoreHelper.getBindlessSurfaceExtendedMessageDescriptorValue(static_cast<uint32_t>(surfaceStateOffset));
|
||||
|
||||
patchWithRequiredSize(const_cast<uint8_t *>(patchLocation), sizeof(patchValue), patchValue);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace L0
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) 2020-2023 Intel Corporation
|
||||
* Copyright (C) 2020-2024 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
@@ -114,14 +114,7 @@ struct KernelImp : Kernel {
|
||||
void patchSyncBuffer(NEO::GraphicsAllocation *gfxAllocation, size_t bufferOffset) override;
|
||||
|
||||
const uint8_t *getSurfaceStateHeapData() const override { return surfaceStateHeapData.get(); }
|
||||
uint32_t getSurfaceStateHeapDataSize() const override {
|
||||
if (NEO::KernelDescriptor::isBindlessAddressingKernel(kernelImmData->getDescriptor())) {
|
||||
if (std::none_of(usingSurfaceStateHeap.cbegin(), usingSurfaceStateHeap.cend(), [](bool i) { return i; })) {
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
return surfaceStateHeapDataSize;
|
||||
}
|
||||
uint32_t getSurfaceStateHeapDataSize() const override;
|
||||
|
||||
const uint8_t *getDynamicStateHeapData() const override { return dynamicStateHeapData.get(); }
|
||||
|
||||
@@ -181,6 +174,7 @@ struct KernelImp : Kernel {
|
||||
NEO::GraphicsAllocation *allocatePrivateMemoryGraphicsAllocation() override;
|
||||
void patchCrossthreadDataWithPrivateAllocation(NEO::GraphicsAllocation *privateAllocation) override;
|
||||
void patchBindlessOffsetsInCrossThreadData(uint64_t bindlessSurfaceStateBaseOffset) const override;
|
||||
void patchBindlessOffsetsForImplicitArgs(uint64_t bindlessSurfaceStateBaseOffset) const;
|
||||
|
||||
NEO::GraphicsAllocation *getPrivateMemoryGraphicsAllocation() override {
|
||||
return privateMemoryGraphicsAllocation;
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) 2022-2023 Intel Corporation
|
||||
* Copyright (C) 2022-2024 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
@@ -36,3 +36,52 @@ inline void patchWithImplicitSurface(ArrayRef<uint8_t> crossThreadData, ArrayRef
|
||||
gfxCoreHelper.encodeBufferSurfaceState(args);
|
||||
}
|
||||
}
|
||||
|
||||
inline void patchImplicitArgBindlessOffsetAndSetSurfaceState(ArrayRef<uint8_t> crossThreadData, ArrayRef<uint8_t> surfaceStateHeap, NEO::GraphicsAllocation *allocation,
|
||||
const NEO::ArgDescPointer &ptr, const NEO::Device &device, bool useGlobalAtomics, bool implicitScaling,
|
||||
const NEO::SurfaceStateInHeapInfo &ssInHeap, const NEO::KernelDescriptor &kernelDescriptor) {
|
||||
auto &gfxCoreHelper = device.getGfxCoreHelper();
|
||||
void *surfaceStateAddress = nullptr;
|
||||
auto surfaceStateSize = gfxCoreHelper.getRenderSurfaceStateSize();
|
||||
|
||||
if (NEO::isValidOffset(ptr.bindless)) {
|
||||
if (device.getBindlessHeapsHelper()) {
|
||||
surfaceStateAddress = ssInHeap.ssPtr;
|
||||
|
||||
auto patchLocation = ptrOffset(crossThreadData.begin(), ptr.bindless);
|
||||
auto patchValue = gfxCoreHelper.getBindlessSurfaceExtendedMessageDescriptorValue(static_cast<uint32_t>(ssInHeap.surfaceStateOffset));
|
||||
patchWithRequiredSize(const_cast<uint8_t *>(patchLocation), sizeof(patchValue), patchValue);
|
||||
} else {
|
||||
auto index = std::numeric_limits<uint32_t>::max();
|
||||
const auto &iter = kernelDescriptor.getBindlessOffsetToSurfaceState().find(ptr.bindless);
|
||||
if (iter != kernelDescriptor.getBindlessOffsetToSurfaceState().end()) {
|
||||
index = iter->second;
|
||||
}
|
||||
|
||||
if (index < std::numeric_limits<uint32_t>::max()) {
|
||||
surfaceStateAddress = ptrOffset(surfaceStateHeap.begin(), index * surfaceStateSize);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (surfaceStateAddress) {
|
||||
auto addressToPatch = allocation->getGpuAddress();
|
||||
size_t sizeToPatch = allocation->getUnderlyingBufferSize();
|
||||
auto isDebuggerActive = device.getDebugger() != nullptr;
|
||||
|
||||
NEO::EncodeSurfaceStateArgs args;
|
||||
args.outMemory = surfaceStateAddress;
|
||||
args.graphicsAddress = addressToPatch;
|
||||
args.size = sizeToPatch;
|
||||
args.mocs = gfxCoreHelper.getMocsIndex(*device.getGmmHelper(), true, false) << 1;
|
||||
args.numAvailableDevices = device.getNumGenericSubDevices();
|
||||
args.allocation = allocation;
|
||||
args.gmmHelper = device.getGmmHelper();
|
||||
args.useGlobalAtomics = useGlobalAtomics;
|
||||
args.areMultipleSubDevicesInContext = args.numAvailableDevices > 1;
|
||||
args.implicitScaling = implicitScaling;
|
||||
args.isDebuggerActive = isDebuggerActive;
|
||||
|
||||
gfxCoreHelper.encodeBufferSurfaceState(args);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -2190,6 +2190,40 @@ HWTEST2_F(KernelImpPatchBindlessTest, GivenMisalignedBufferAddressWhenSettingSur
|
||||
EXPECT_EQ(mockKernel.surfaceStateHeapDataSize, mockKernel.getSurfaceStateHeapDataSize());
|
||||
}
|
||||
|
||||
HWTEST2_F(KernelImpPatchBindlessTest, GivenBindlessImplicitArgAndNoBindlessHeapsHelperWhenGetSurfaceStateHeapDataSizeThenReturnSurfaceStateHeapDataSize, MatchAny) {
|
||||
ze_kernel_desc_t desc = {};
|
||||
desc.pKernelName = kernelName.c_str();
|
||||
|
||||
WhiteBoxKernelHw<gfxCoreFamily> mockKernel;
|
||||
mockKernel.module = module.get();
|
||||
mockKernel.initialize(&desc);
|
||||
const_cast<NEO::KernelDescriptor &>(mockKernel.kernelImmData->getDescriptor()).kernelAttributes.bufferAddressingMode = NEO::KernelDescriptor::BindlessAndStateless;
|
||||
EXPECT_EQ(0u, mockKernel.getSurfaceStateHeapDataSize());
|
||||
|
||||
const_cast<NEO::KernelDescriptor &>(mockKernel.kernelImmData->getDescriptor()).payloadMappings.implicitArgs.globalConstantsSurfaceAddress.bindless = 0x20;
|
||||
ASSERT_EQ(nullptr, mockKernel.module->getDevice()->getNEODevice()->getBindlessHeapsHelper());
|
||||
EXPECT_NE(0u, mockKernel.surfaceStateHeapDataSize);
|
||||
EXPECT_EQ(mockKernel.surfaceStateHeapDataSize, mockKernel.getSurfaceStateHeapDataSize());
|
||||
}
|
||||
|
||||
HWTEST2_F(KernelImpPatchBindlessTest, GivenBindlessImplicitArgAndBindlessHeapsHelperWhenGetSurfaceStateHeapDataSizeThenReturnZero, MatchAny) {
|
||||
ze_kernel_desc_t desc = {};
|
||||
desc.pKernelName = kernelName.c_str();
|
||||
|
||||
WhiteBoxKernelHw<gfxCoreFamily> mockKernel;
|
||||
mockKernel.module = module.get();
|
||||
mockKernel.initialize(&desc);
|
||||
const_cast<NEO::KernelDescriptor &>(mockKernel.kernelImmData->getDescriptor()).kernelAttributes.bufferAddressingMode = NEO::KernelDescriptor::BindlessAndStateless;
|
||||
EXPECT_EQ(0u, mockKernel.getSurfaceStateHeapDataSize());
|
||||
|
||||
const_cast<NEO::KernelDescriptor &>(mockKernel.kernelImmData->getDescriptor()).payloadMappings.implicitArgs.globalConstantsSurfaceAddress.bindless = 0x20;
|
||||
neoDevice->getExecutionEnvironment()->rootDeviceEnvironments[neoDevice->getRootDeviceIndex()]->createBindlessHeapsHelper(neoDevice->getMemoryManager(),
|
||||
neoDevice->getNumGenericSubDevices() > 1,
|
||||
neoDevice->getRootDeviceIndex(),
|
||||
neoDevice->getDeviceBitfield());
|
||||
EXPECT_EQ(0u, mockKernel.getSurfaceStateHeapDataSize());
|
||||
}
|
||||
|
||||
HWTEST2_F(KernelImpPatchBindlessTest, GivenMisalignedAndAlignedBufferAddressWhenSettingSurfaceStateThenKernelReportsNonZeroSurfaceStateHeapDataSize, MatchAny) {
|
||||
using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE;
|
||||
|
||||
@@ -3579,13 +3613,16 @@ TEST_F(BindlessKernelTest, givenBindlessKernelWhenPatchingCrossThreadDataThenCor
|
||||
argDescriptor2.as<NEO::ArgDescPointer>().stateless = 2 * sizeof(uint64_t);
|
||||
mockKernel.descriptor.payloadMappings.explicitArgs.push_back(argDescriptor2);
|
||||
|
||||
mockKernel.descriptor.payloadMappings.implicitArgs.globalVariablesSurfaceAddress.bindless = 3 * sizeof(uint64_t);
|
||||
mockKernel.descriptor.payloadMappings.implicitArgs.globalConstantsSurfaceAddress.bindless = 4 * sizeof(uint64_t);
|
||||
|
||||
mockKernel.isBindlessOffsetSet.resize(4, 0);
|
||||
mockKernel.usingSurfaceStateHeap.resize(4, 0);
|
||||
|
||||
mockKernel.descriptor.initBindlessOffsetToSurfaceState();
|
||||
|
||||
mockKernel.crossThreadData = std::make_unique<uint8_t[]>(4 * sizeof(uint64_t));
|
||||
mockKernel.crossThreadDataSize = 4 * sizeof(uint64_t);
|
||||
mockKernel.crossThreadData = std::make_unique<uint8_t[]>(5 * sizeof(uint64_t));
|
||||
mockKernel.crossThreadDataSize = 5 * sizeof(uint64_t);
|
||||
memset(mockKernel.crossThreadData.get(), 0, mockKernel.crossThreadDataSize);
|
||||
|
||||
const uint64_t baseAddress = 0x1000;
|
||||
@@ -3593,7 +3630,9 @@ TEST_F(BindlessKernelTest, givenBindlessKernelWhenPatchingCrossThreadDataThenCor
|
||||
auto surfaceStateSize = gfxCoreHelper.getRenderSurfaceStateSize();
|
||||
|
||||
auto patchValue1 = gfxCoreHelper.getBindlessSurfaceExtendedMessageDescriptorValue(static_cast<uint32_t>(baseAddress));
|
||||
auto patchValue2 = gfxCoreHelper.getBindlessSurfaceExtendedMessageDescriptorValue(static_cast<uint32_t>(baseAddress + surfaceStateSize));
|
||||
auto patchValue2 = gfxCoreHelper.getBindlessSurfaceExtendedMessageDescriptorValue(static_cast<uint32_t>(baseAddress + 1 * surfaceStateSize));
|
||||
auto patchValue3 = gfxCoreHelper.getBindlessSurfaceExtendedMessageDescriptorValue(static_cast<uint32_t>(baseAddress + 2 * surfaceStateSize));
|
||||
auto patchValue4 = gfxCoreHelper.getBindlessSurfaceExtendedMessageDescriptorValue(static_cast<uint32_t>(baseAddress + 3 * surfaceStateSize));
|
||||
|
||||
mockKernel.patchBindlessOffsetsInCrossThreadData(baseAddress);
|
||||
|
||||
@@ -3602,7 +3641,9 @@ TEST_F(BindlessKernelTest, givenBindlessKernelWhenPatchingCrossThreadDataThenCor
|
||||
|
||||
EXPECT_EQ(patchValue1, crossThreadData[0]);
|
||||
EXPECT_EQ(patchValue2, crossThreadData[1]);
|
||||
EXPECT_EQ(0u, crossThreadData[3]);
|
||||
EXPECT_EQ(0u, crossThreadData[2]);
|
||||
EXPECT_EQ(patchValue3, crossThreadData[3]);
|
||||
EXPECT_EQ(patchValue4, crossThreadData[4]);
|
||||
}
|
||||
|
||||
TEST_F(BindlessKernelTest, givenBindlessKernelWithPatchedBindlessOffsetsWhenPatchingCrossThreadDataThenMemoryIsNotPatched) {
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) 2022-2023 Intel Corporation
|
||||
* Copyright (C) 2022-2024 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
@@ -10,14 +10,17 @@
|
||||
#include "shared/source/helpers/gfx_core_helper.h"
|
||||
#include "shared/source/helpers/simd_helper.h"
|
||||
#include "shared/test/common/helpers/raii_gfx_core_helper.h"
|
||||
#include "shared/test/common/mocks/mock_bindless_heaps_helper.h"
|
||||
#include "shared/test/common/mocks/mock_device.h"
|
||||
#include "shared/test/common/mocks/mock_graphics_allocation.h"
|
||||
#include "shared/test/common/mocks/mock_l0_debugger.h"
|
||||
#include "shared/test/common/mocks/mock_modules_zebin.h"
|
||||
#include "shared/test/common/test_macros/hw_test.h"
|
||||
#include "shared/test/common/test_macros/test.h"
|
||||
|
||||
#include "level_zero/core/test/unit_tests/fixtures/device_fixture.h"
|
||||
#include "level_zero/core/test/unit_tests/fixtures/kernel_max_cooperative_groups_count_fixture.h"
|
||||
#include "level_zero/core/test/unit_tests/mocks/mock_device.h"
|
||||
#include "level_zero/core/test/unit_tests/mocks/mock_kernel.h"
|
||||
#include "level_zero/core/test/unit_tests/mocks/mock_module.h"
|
||||
|
||||
@@ -486,5 +489,303 @@ HWTEST2_F(KernelTest, GivenInlineSamplersWhenSettingInlineSamplerThenDshIsPatche
|
||||
EXPECT_EQ(SamplerState::MAG_MODE_FILTER_NEAREST, samplerState->getMagModeFilter());
|
||||
}
|
||||
|
||||
using KernelImmutableDataBindlessTest = Test<DeviceFixture>;
|
||||
|
||||
HWTEST2_F(KernelImmutableDataBindlessTest, givenGlobalConstBufferAndBindlessExplicitAndImplicitArgsAndNoBindlessHeapsHelperWhenInitializeKernelImmutableDataThenSurfaceStateIsSetAndImplicitArgBindlessOffsetIsPatched, IsAtLeastXeHpgCore) {
|
||||
HardwareInfo hwInfo = *defaultHwInfo;
|
||||
|
||||
auto device = std::unique_ptr<NEO::MockDevice>(NEO::MockDevice::createWithNewExecutionEnvironment<NEO::MockDevice>(&hwInfo, 0));
|
||||
|
||||
static EncodeSurfaceStateArgs savedSurfaceStateArgs{};
|
||||
static size_t encodeBufferSurfaceStateCalled{};
|
||||
savedSurfaceStateArgs = {};
|
||||
encodeBufferSurfaceStateCalled = {};
|
||||
struct MockGfxCoreHelper : NEO::GfxCoreHelperHw<FamilyType> {
|
||||
void encodeBufferSurfaceState(EncodeSurfaceStateArgs &args) const override {
|
||||
savedSurfaceStateArgs = args;
|
||||
++encodeBufferSurfaceStateCalled;
|
||||
}
|
||||
};
|
||||
|
||||
RAIIGfxCoreHelperFactory<MockGfxCoreHelper> raii(*device->getExecutionEnvironment()->rootDeviceEnvironments[0]);
|
||||
|
||||
{
|
||||
device->incRefInternal();
|
||||
MockDeviceImp deviceImp(device.get(), device->getExecutionEnvironment());
|
||||
|
||||
uint64_t gpuAddress = 0x1200;
|
||||
void *buffer = reinterpret_cast<void *>(gpuAddress);
|
||||
size_t allocSize = 0x1100;
|
||||
NEO::MockGraphicsAllocation globalConstBuffer(buffer, gpuAddress, allocSize);
|
||||
|
||||
auto kernelInfo = std::make_unique<KernelInfo>();
|
||||
|
||||
kernelInfo->kernelDescriptor.kernelMetadata.kernelName = ZebinTestData::ValidEmptyProgram<>::kernelName;
|
||||
|
||||
kernelInfo->kernelDescriptor.kernelAttributes.bufferAddressingMode = NEO::KernelDescriptor::BindlessAndStateless;
|
||||
|
||||
auto argDescriptor = NEO::ArgDescriptor(NEO::ArgDescriptor::argTPointer);
|
||||
argDescriptor.as<NEO::ArgDescPointer>() = NEO::ArgDescPointer();
|
||||
argDescriptor.as<NEO::ArgDescPointer>().bindful = NEO::undefined<NEO::SurfaceStateHeapOffset>;
|
||||
argDescriptor.as<NEO::ArgDescPointer>().bindless = 0x40;
|
||||
kernelInfo->kernelDescriptor.payloadMappings.explicitArgs.push_back(argDescriptor);
|
||||
|
||||
kernelInfo->kernelDescriptor.payloadMappings.implicitArgs.globalConstantsSurfaceAddress.bindless = 0x80;
|
||||
|
||||
kernelInfo->kernelDescriptor.kernelAttributes.numArgsStateful = 2;
|
||||
kernelInfo->kernelDescriptor.kernelAttributes.crossThreadDataSize = 4 * sizeof(uint64_t);
|
||||
|
||||
kernelInfo->kernelDescriptor.initBindlessOffsetToSurfaceState();
|
||||
const auto globalConstantsSurfaceAddressSSIndex = 1;
|
||||
|
||||
auto kernelImmutableData = std::make_unique<KernelImmutableData>(&deviceImp);
|
||||
kernelImmutableData->initialize(kernelInfo.get(), &deviceImp, 0, &globalConstBuffer, nullptr, false);
|
||||
|
||||
auto &gfxCoreHelper = device->getGfxCoreHelper();
|
||||
auto surfaceStateSize = static_cast<uint32_t>(gfxCoreHelper.getRenderSurfaceStateSize());
|
||||
|
||||
EXPECT_EQ(surfaceStateSize * kernelInfo->kernelDescriptor.kernelAttributes.numArgsStateful, kernelImmutableData->getSurfaceStateHeapSize());
|
||||
|
||||
auto &residencyContainer = kernelImmutableData->getResidencyContainer();
|
||||
EXPECT_EQ(1u, residencyContainer.size());
|
||||
EXPECT_EQ(1, std::count(residencyContainer.begin(), residencyContainer.end(), &globalConstBuffer));
|
||||
|
||||
EXPECT_EQ(1u, encodeBufferSurfaceStateCalled);
|
||||
EXPECT_EQ(allocSize, savedSurfaceStateArgs.size);
|
||||
EXPECT_EQ(gpuAddress, savedSurfaceStateArgs.graphicsAddress);
|
||||
|
||||
EXPECT_EQ(ptrOffset(kernelImmutableData->getSurfaceStateHeapTemplate(), globalConstantsSurfaceAddressSSIndex * surfaceStateSize), savedSurfaceStateArgs.outMemory);
|
||||
EXPECT_EQ(&globalConstBuffer, savedSurfaceStateArgs.allocation);
|
||||
}
|
||||
}
|
||||
|
||||
HWTEST2_F(KernelImmutableDataBindlessTest, givenGlobalVarBufferAndBindlessExplicitAndImplicitArgsAndNoBindlessHeapsHelperWhenInitializeKernelImmutableDataThenSurfaceStateIsSetAndImplicitArgBindlessOffsetIsPatched, IsAtLeastXeHpgCore) {
|
||||
HardwareInfo hwInfo = *defaultHwInfo;
|
||||
|
||||
auto device = std::unique_ptr<NEO::MockDevice>(NEO::MockDevice::createWithNewExecutionEnvironment<NEO::MockDevice>(&hwInfo, 0));
|
||||
|
||||
static EncodeSurfaceStateArgs savedSurfaceStateArgs{};
|
||||
static size_t encodeBufferSurfaceStateCalled{};
|
||||
savedSurfaceStateArgs = {};
|
||||
encodeBufferSurfaceStateCalled = {};
|
||||
struct MockGfxCoreHelper : NEO::GfxCoreHelperHw<FamilyType> {
|
||||
void encodeBufferSurfaceState(EncodeSurfaceStateArgs &args) const override {
|
||||
savedSurfaceStateArgs = args;
|
||||
++encodeBufferSurfaceStateCalled;
|
||||
}
|
||||
};
|
||||
|
||||
RAIIGfxCoreHelperFactory<MockGfxCoreHelper> raii(*device->getExecutionEnvironment()->rootDeviceEnvironments[0]);
|
||||
|
||||
{
|
||||
device->incRefInternal();
|
||||
MockDeviceImp deviceImp(device.get(), device->getExecutionEnvironment());
|
||||
|
||||
uint64_t gpuAddress = 0x1200;
|
||||
void *buffer = reinterpret_cast<void *>(gpuAddress);
|
||||
size_t allocSize = 0x1100;
|
||||
NEO::MockGraphicsAllocation globalVarBuffer(buffer, gpuAddress, allocSize);
|
||||
|
||||
auto kernelInfo = std::make_unique<KernelInfo>();
|
||||
|
||||
kernelInfo->kernelDescriptor.kernelMetadata.kernelName = ZebinTestData::ValidEmptyProgram<>::kernelName;
|
||||
|
||||
kernelInfo->kernelDescriptor.kernelAttributes.bufferAddressingMode = NEO::KernelDescriptor::BindlessAndStateless;
|
||||
|
||||
auto argDescriptor = NEO::ArgDescriptor(NEO::ArgDescriptor::argTPointer);
|
||||
argDescriptor.as<NEO::ArgDescPointer>() = NEO::ArgDescPointer();
|
||||
argDescriptor.as<NEO::ArgDescPointer>().bindful = NEO::undefined<NEO::SurfaceStateHeapOffset>;
|
||||
argDescriptor.as<NEO::ArgDescPointer>().bindless = 0x40;
|
||||
kernelInfo->kernelDescriptor.payloadMappings.explicitArgs.push_back(argDescriptor);
|
||||
|
||||
kernelInfo->kernelDescriptor.payloadMappings.implicitArgs.globalVariablesSurfaceAddress.bindless = 0x80;
|
||||
|
||||
kernelInfo->kernelDescriptor.kernelAttributes.numArgsStateful = 2;
|
||||
kernelInfo->kernelDescriptor.kernelAttributes.crossThreadDataSize = 4 * sizeof(uint64_t);
|
||||
|
||||
kernelInfo->kernelDescriptor.initBindlessOffsetToSurfaceState();
|
||||
const auto globalVariablesSurfaceAddressSSIndex = 1;
|
||||
|
||||
auto kernelImmutableData = std::make_unique<KernelImmutableData>(&deviceImp);
|
||||
kernelImmutableData->initialize(kernelInfo.get(), &deviceImp, 0, nullptr, &globalVarBuffer, false);
|
||||
|
||||
auto &gfxCoreHelper = device->getGfxCoreHelper();
|
||||
auto surfaceStateSize = static_cast<uint32_t>(gfxCoreHelper.getRenderSurfaceStateSize());
|
||||
|
||||
EXPECT_EQ(surfaceStateSize * kernelInfo->kernelDescriptor.kernelAttributes.numArgsStateful, kernelImmutableData->getSurfaceStateHeapSize());
|
||||
|
||||
auto &residencyContainer = kernelImmutableData->getResidencyContainer();
|
||||
EXPECT_EQ(1u, residencyContainer.size());
|
||||
EXPECT_EQ(1, std::count(residencyContainer.begin(), residencyContainer.end(), &globalVarBuffer));
|
||||
|
||||
EXPECT_EQ(1u, encodeBufferSurfaceStateCalled);
|
||||
EXPECT_EQ(allocSize, savedSurfaceStateArgs.size);
|
||||
EXPECT_EQ(gpuAddress, savedSurfaceStateArgs.graphicsAddress);
|
||||
|
||||
EXPECT_EQ(ptrOffset(kernelImmutableData->getSurfaceStateHeapTemplate(), globalVariablesSurfaceAddressSSIndex * surfaceStateSize), savedSurfaceStateArgs.outMemory);
|
||||
EXPECT_EQ(&globalVarBuffer, savedSurfaceStateArgs.allocation);
|
||||
}
|
||||
}
|
||||
|
||||
HWTEST2_F(KernelImmutableDataBindlessTest, givenGlobalConstBufferAndBindlessExplicitAndImplicitArgsAndBindlessHeapsHelperWhenInitializeKernelImmutableDataThenSurfaceStateIsSetAndImplicitArgBindlessOffsetIsPatched, IsAtLeastXeHpgCore) {
|
||||
HardwareInfo hwInfo = *defaultHwInfo;
|
||||
|
||||
auto device = std::unique_ptr<NEO::MockDevice>(NEO::MockDevice::createWithNewExecutionEnvironment<NEO::MockDevice>(&hwInfo, 0));
|
||||
auto mockHelper = std::make_unique<MockBindlesHeapsHelper>(device->getMemoryManager(),
|
||||
device->getNumGenericSubDevices() > 1,
|
||||
device->getRootDeviceIndex(),
|
||||
device->getDeviceBitfield());
|
||||
device->getExecutionEnvironment()->rootDeviceEnvironments[device->getRootDeviceIndex()]->bindlessHeapsHelper.reset(mockHelper.release());
|
||||
|
||||
static EncodeSurfaceStateArgs savedSurfaceStateArgs{};
|
||||
static size_t encodeBufferSurfaceStateCalled{};
|
||||
savedSurfaceStateArgs = {};
|
||||
encodeBufferSurfaceStateCalled = {};
|
||||
struct MockGfxCoreHelper : NEO::GfxCoreHelperHw<FamilyType> {
|
||||
void encodeBufferSurfaceState(EncodeSurfaceStateArgs &args) const override {
|
||||
savedSurfaceStateArgs = args;
|
||||
++encodeBufferSurfaceStateCalled;
|
||||
}
|
||||
};
|
||||
|
||||
RAIIGfxCoreHelperFactory<MockGfxCoreHelper> raii(*device->getExecutionEnvironment()->rootDeviceEnvironments[0]);
|
||||
|
||||
{
|
||||
device->incRefInternal();
|
||||
MockDeviceImp deviceImp(device.get(), device->getExecutionEnvironment());
|
||||
|
||||
uint64_t gpuAddress = 0x1200;
|
||||
void *buffer = reinterpret_cast<void *>(gpuAddress);
|
||||
size_t allocSize = 0x1100;
|
||||
NEO::MockGraphicsAllocation globalConstBuffer(buffer, gpuAddress, allocSize);
|
||||
|
||||
auto kernelInfo = std::make_unique<KernelInfo>();
|
||||
|
||||
kernelInfo->kernelDescriptor.kernelMetadata.kernelName = ZebinTestData::ValidEmptyProgram<>::kernelName;
|
||||
|
||||
kernelInfo->kernelDescriptor.kernelAttributes.bufferAddressingMode = NEO::KernelDescriptor::BindlessAndStateless;
|
||||
|
||||
auto argDescriptor = NEO::ArgDescriptor(NEO::ArgDescriptor::argTPointer);
|
||||
argDescriptor.as<NEO::ArgDescPointer>() = NEO::ArgDescPointer();
|
||||
argDescriptor.as<NEO::ArgDescPointer>().bindful = NEO::undefined<NEO::SurfaceStateHeapOffset>;
|
||||
argDescriptor.as<NEO::ArgDescPointer>().bindless = 4;
|
||||
kernelInfo->kernelDescriptor.payloadMappings.explicitArgs.push_back(argDescriptor);
|
||||
|
||||
NEO::CrossThreadDataOffset globalConstSurfaceAddressBindlessOffset = 8;
|
||||
kernelInfo->kernelDescriptor.payloadMappings.implicitArgs.globalConstantsSurfaceAddress.bindless = globalConstSurfaceAddressBindlessOffset;
|
||||
|
||||
kernelInfo->kernelDescriptor.kernelAttributes.numArgsStateful = 2;
|
||||
kernelInfo->kernelDescriptor.kernelAttributes.crossThreadDataSize = 4 * sizeof(uint64_t);
|
||||
|
||||
kernelInfo->kernelDescriptor.initBindlessOffsetToSurfaceState();
|
||||
|
||||
auto kernelImmutableData = std::make_unique<KernelImmutableData>(&deviceImp);
|
||||
kernelImmutableData->initialize(kernelInfo.get(), &deviceImp, 0, &globalConstBuffer, nullptr, false);
|
||||
|
||||
auto &gfxCoreHelper = device->getGfxCoreHelper();
|
||||
auto surfaceStateSize = static_cast<uint32_t>(gfxCoreHelper.getRenderSurfaceStateSize());
|
||||
|
||||
EXPECT_EQ(surfaceStateSize * kernelInfo->kernelDescriptor.kernelAttributes.numArgsStateful, kernelImmutableData->getSurfaceStateHeapSize());
|
||||
|
||||
auto &residencyContainer = kernelImmutableData->getResidencyContainer();
|
||||
EXPECT_EQ(2u, residencyContainer.size());
|
||||
EXPECT_EQ(1, std::count(residencyContainer.begin(), residencyContainer.end(), &globalConstBuffer));
|
||||
EXPECT_EQ(1, std::count(residencyContainer.begin(), residencyContainer.end(), globalConstBuffer.getBindlessInfo().heapAllocation));
|
||||
|
||||
auto crossThreadData = kernelImmutableData->getCrossThreadDataTemplate();
|
||||
auto patchLocation = reinterpret_cast<const uint32_t *>(ptrOffset(crossThreadData, globalConstSurfaceAddressBindlessOffset));
|
||||
auto patchValue = gfxCoreHelper.getBindlessSurfaceExtendedMessageDescriptorValue(static_cast<uint32_t>(globalConstBuffer.getBindlessInfo().surfaceStateOffset));
|
||||
|
||||
EXPECT_EQ(patchValue, *patchLocation);
|
||||
|
||||
EXPECT_EQ(1u, encodeBufferSurfaceStateCalled);
|
||||
EXPECT_EQ(allocSize, savedSurfaceStateArgs.size);
|
||||
EXPECT_EQ(gpuAddress, savedSurfaceStateArgs.graphicsAddress);
|
||||
|
||||
EXPECT_EQ(globalConstBuffer.getBindlessInfo().ssPtr, savedSurfaceStateArgs.outMemory);
|
||||
EXPECT_EQ(&globalConstBuffer, savedSurfaceStateArgs.allocation);
|
||||
}
|
||||
}
|
||||
|
||||
HWTEST2_F(KernelImmutableDataBindlessTest, givenGlobalVarBufferAndBindlessExplicitAndImplicitArgsAndBindlessHeapsHelperWhenInitializeKernelImmutableDataThenSurfaceStateIsSetAndImplicitArgBindlessOffsetIsPatched, IsAtLeastXeHpgCore) {
|
||||
HardwareInfo hwInfo = *defaultHwInfo;
|
||||
|
||||
auto device = std::unique_ptr<NEO::MockDevice>(NEO::MockDevice::createWithNewExecutionEnvironment<NEO::MockDevice>(&hwInfo, 0));
|
||||
auto mockHelper = std::make_unique<MockBindlesHeapsHelper>(device->getMemoryManager(),
|
||||
device->getNumGenericSubDevices() > 1,
|
||||
device->getRootDeviceIndex(),
|
||||
device->getDeviceBitfield());
|
||||
device->getExecutionEnvironment()->rootDeviceEnvironments[device->getRootDeviceIndex()]->bindlessHeapsHelper.reset(mockHelper.release());
|
||||
|
||||
static EncodeSurfaceStateArgs savedSurfaceStateArgs{};
|
||||
static size_t encodeBufferSurfaceStateCalled{};
|
||||
savedSurfaceStateArgs = {};
|
||||
encodeBufferSurfaceStateCalled = {};
|
||||
struct MockGfxCoreHelper : NEO::GfxCoreHelperHw<FamilyType> {
|
||||
void encodeBufferSurfaceState(EncodeSurfaceStateArgs &args) const override {
|
||||
savedSurfaceStateArgs = args;
|
||||
++encodeBufferSurfaceStateCalled;
|
||||
}
|
||||
};
|
||||
|
||||
RAIIGfxCoreHelperFactory<MockGfxCoreHelper> raii(*device->getExecutionEnvironment()->rootDeviceEnvironments[0]);
|
||||
|
||||
{
|
||||
device->incRefInternal();
|
||||
MockDeviceImp deviceImp(device.get(), device->getExecutionEnvironment());
|
||||
|
||||
uint64_t gpuAddress = 0x1200;
|
||||
void *buffer = reinterpret_cast<void *>(gpuAddress);
|
||||
size_t allocSize = 0x1100;
|
||||
NEO::MockGraphicsAllocation globalVarBuffer(buffer, gpuAddress, allocSize);
|
||||
|
||||
auto kernelInfo = std::make_unique<KernelInfo>();
|
||||
|
||||
kernelInfo->kernelDescriptor.kernelMetadata.kernelName = ZebinTestData::ValidEmptyProgram<>::kernelName;
|
||||
|
||||
kernelInfo->kernelDescriptor.kernelAttributes.bufferAddressingMode = NEO::KernelDescriptor::BindlessAndStateless;
|
||||
|
||||
auto argDescriptor = NEO::ArgDescriptor(NEO::ArgDescriptor::argTPointer);
|
||||
argDescriptor.as<NEO::ArgDescPointer>() = NEO::ArgDescPointer();
|
||||
argDescriptor.as<NEO::ArgDescPointer>().bindful = NEO::undefined<NEO::SurfaceStateHeapOffset>;
|
||||
argDescriptor.as<NEO::ArgDescPointer>().bindless = 4;
|
||||
kernelInfo->kernelDescriptor.payloadMappings.explicitArgs.push_back(argDescriptor);
|
||||
|
||||
NEO::CrossThreadDataOffset globalVariablesSurfaceAddressBindlessOffset = 8;
|
||||
kernelInfo->kernelDescriptor.payloadMappings.implicitArgs.globalVariablesSurfaceAddress.bindless = globalVariablesSurfaceAddressBindlessOffset;
|
||||
|
||||
kernelInfo->kernelDescriptor.kernelAttributes.numArgsStateful = 2;
|
||||
kernelInfo->kernelDescriptor.kernelAttributes.crossThreadDataSize = 4 * sizeof(uint64_t);
|
||||
|
||||
kernelInfo->kernelDescriptor.initBindlessOffsetToSurfaceState();
|
||||
|
||||
auto kernelImmutableData = std::make_unique<KernelImmutableData>(&deviceImp);
|
||||
kernelImmutableData->initialize(kernelInfo.get(), &deviceImp, 0, nullptr, &globalVarBuffer, false);
|
||||
|
||||
auto &gfxCoreHelper = device->getGfxCoreHelper();
|
||||
auto surfaceStateSize = static_cast<uint32_t>(gfxCoreHelper.getRenderSurfaceStateSize());
|
||||
|
||||
EXPECT_EQ(surfaceStateSize * kernelInfo->kernelDescriptor.kernelAttributes.numArgsStateful, kernelImmutableData->getSurfaceStateHeapSize());
|
||||
|
||||
auto &residencyContainer = kernelImmutableData->getResidencyContainer();
|
||||
EXPECT_EQ(2u, residencyContainer.size());
|
||||
EXPECT_EQ(1, std::count(residencyContainer.begin(), residencyContainer.end(), &globalVarBuffer));
|
||||
EXPECT_EQ(1, std::count(residencyContainer.begin(), residencyContainer.end(), globalVarBuffer.getBindlessInfo().heapAllocation));
|
||||
|
||||
auto crossThreadData = kernelImmutableData->getCrossThreadDataTemplate();
|
||||
auto patchLocation = reinterpret_cast<const uint32_t *>(ptrOffset(crossThreadData, globalVariablesSurfaceAddressBindlessOffset));
|
||||
auto patchValue = gfxCoreHelper.getBindlessSurfaceExtendedMessageDescriptorValue(static_cast<uint32_t>(globalVarBuffer.getBindlessInfo().surfaceStateOffset));
|
||||
|
||||
EXPECT_EQ(patchValue, *patchLocation);
|
||||
|
||||
EXPECT_EQ(1u, encodeBufferSurfaceStateCalled);
|
||||
EXPECT_EQ(allocSize, savedSurfaceStateArgs.size);
|
||||
EXPECT_EQ(gpuAddress, savedSurfaceStateArgs.graphicsAddress);
|
||||
|
||||
EXPECT_EQ(globalVarBuffer.getBindlessInfo().ssPtr, savedSurfaceStateArgs.outMemory);
|
||||
EXPECT_EQ(&globalVarBuffer, savedSurfaceStateArgs.allocation);
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace ult
|
||||
} // namespace L0
|
||||
|
||||
Reference in New Issue
Block a user