feature: bindless addressing for buffers with offset

- allocate SurfaceStates on kernel's heap for offsetted buffers

Related-To: NEO-7063

Signed-off-by: Mateusz Hoppe <mateusz.hoppe@intel.com>
This commit is contained in:
Mateusz Hoppe
2023-09-05 14:08:03 +00:00
committed by Compute-Runtime-Automation
parent 269adbe43b
commit 93469eaf5d
14 changed files with 255 additions and 14 deletions

View File

@@ -70,6 +70,11 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelWithParams(K
NEO::EncodeDispatchKernel<GfxFamily>::getSizeRequiredSsh(*kernelInfo),
NEO::EncodeDispatchKernel<GfxFamily>::getDefaultSshAlignment()};
// update SSH size - when global bindless addressing is used, kernel args may not require ssh space
if (kernel->getSurfaceStateHeapDataSize() == 0) {
sshReserveArgs.size = 0;
}
auto &dshReserveConfig = commandContainer.getDynamicStateHeapReserve();
NEO::HeapReserveArguments dshReserveArgs = {
dshReserveConfig.indirectHeapReservation,

View File

@@ -135,7 +135,8 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelWithParams(K
NEO::EncodeDispatchKernel<GfxFamily>::getSizeRequiredSsh(*kernelInfo),
NEO::EncodeDispatchKernel<GfxFamily>::getDefaultSshAlignment()};
if (device->getNEODevice()->getBindlessHeapsHelper() && NEO::KernelDescriptor::isBindlessAddressingKernel(kernelImmutableData->getDescriptor())) {
// update SSH size - when global bindless addressing is used, kernel args may not require ssh space
if (kernel->getSurfaceStateHeapDataSize() == 0) {
sshReserveArgs.size = 0;
}

View File

@@ -46,11 +46,14 @@ struct KernelHw : public KernelImp {
auto argInfo = kernelImmData->getDescriptor().payloadMappings.explicitArgs[argIndex].as<NEO::ArgDescPointer>();
bool offsetWasPatched = NEO::patchNonPointer<uint32_t, uint32_t>(ArrayRef<uint8_t>(this->crossThreadData.get(), this->crossThreadDataSize),
argInfo.bufferOffset, static_cast<uint32_t>(offset));
bool offsetedAddress = false;
if (false == offsetWasPatched) {
// fallback to handling offset in surface state
offsetedAddress = baseAddress != reinterpret_cast<uintptr_t>(address);
baseAddress = reinterpret_cast<uintptr_t>(address);
bufferSizeForSsh -= offset;
DEBUG_BREAK_IF(baseAddress != (baseAddress & sshAlignmentMask));
offset = 0;
}
void *surfaceStateAddress = nullptr;
@@ -61,9 +64,13 @@ struct KernelHw : public KernelImp {
surfaceState = *reinterpret_cast<typename GfxFamily::RENDER_SURFACE_STATE *>(surfaceStateAddress);
} else if (NEO::isValidOffset(argInfo.bindless)) {
if (this->module->getDevice()->getNEODevice()->getBindlessHeapsHelper()) {
isBindlessOffsetSet[argIndex] = false;
usingSurfaceStateHeap[argIndex] = false;
if (this->module->getDevice()->getNEODevice()->getBindlessHeapsHelper() && !offsetedAddress) {
surfaceStateAddress = patchBindlessSurfaceState(alloc, argInfo.bindless);
isBindlessOffsetSet[argIndex] = true;
} else {
usingSurfaceStateHeap[argIndex] = true;
surfaceStateAddress = ptrOffset(surfaceStateHeapData.get(), getSurfaceStateIndexForBindlessOffset(argInfo.bindless) * sizeof(typename GfxFamily::RENDER_SURFACE_STATE));
}
}

View File

@@ -569,6 +569,7 @@ ze_result_t KernelImp::setArgRedescribedImage(uint32_t argIndex, ze_image_handle
patchWithRequiredSize(const_cast<uint8_t *>(patchLocation), sizeof(patchValue), patchValue);
image->copyRedescribedSurfaceStateToSSH(ptrOffset(ssInHeap.ssPtr, surfaceStateSize), 0u);
isBindlessOffsetSet[argIndex] = true;
this->residencyContainer.push_back(ssInHeap.heapAllocation);
} else {
@@ -764,6 +765,7 @@ ze_result_t KernelImp::setArgImage(uint32_t argIndex, size_t argSize, const void
}
auto ssPtr = patchBindlessSurfaceState(image->getAllocation(), arg.bindless);
isBindlessOffsetSet[argIndex] = true;
image->copySurfaceStateToSSH(ssPtr, 0u, isMediaBlockImage);
} else {
auto &gfxCoreHelper = this->module->getDevice()->getNEODevice()->getRootDeviceEnvironmentRef().getHelper<NEO::GfxCoreHelper>();
@@ -976,6 +978,8 @@ ze_result_t KernelImp::initialize(const ze_kernel_desc_t *desc) {
slmArgSizes.resize(this->kernelArgHandlers.size(), 0);
kernelArgInfos.resize(this->kernelArgHandlers.size(), {});
isArgUncached.resize(this->kernelArgHandlers.size(), 0);
isBindlessOffsetSet.resize(this->kernelArgHandlers.size(), 0);
usingSurfaceStateHeap.resize(this->kernelArgHandlers.size(), 0);
if (kernelImmData->getSurfaceStateHeapSize() > 0) {
this->surfaceStateHeapData.reset(new uint8_t[kernelImmData->getSurfaceStateHeapSize()]);
@@ -1253,7 +1257,7 @@ void KernelImp::patchBindlessOffsetsInCrossThreadData(uint64_t bindlessSurfaceSt
auto patchLocation = ptrOffset(getCrossThreadData(), crossThreadOffset);
auto index = getSurfaceStateIndexForBindlessOffset(crossThreadOffset);
if (index < std::numeric_limits<uint32_t>::max()) {
if (index < std::numeric_limits<uint32_t>::max() && !isBindlessOffsetSet[argIndex]) {
auto surfaceStateOffset = static_cast<uint32_t>(bindlessSurfaceStateBaseOffset + index * surfaceStateSize);
auto patchValue = gfxCoreHelper.getBindlessSurfaceExtendedMessageDescriptorValue(static_cast<uint32_t>(surfaceStateOffset));

View File

@@ -108,7 +108,14 @@ struct KernelImp : Kernel {
void patchSyncBuffer(NEO::GraphicsAllocation *gfxAllocation, size_t bufferOffset) override;
const uint8_t *getSurfaceStateHeapData() const override { return surfaceStateHeapData.get(); }
uint32_t getSurfaceStateHeapDataSize() const override { return surfaceStateHeapDataSize; }
uint32_t getSurfaceStateHeapDataSize() const override {
if (NEO::KernelDescriptor::isBindlessAddressingKernel(kernelImmData->getDescriptor())) {
if (std::none_of(usingSurfaceStateHeap.cbegin(), usingSurfaceStateHeap.cend(), [](bool i) { return i; })) {
return 0;
}
}
return surfaceStateHeapDataSize;
}
const uint8_t *getDynamicStateHeapData() const override { return dynamicStateHeapData.get(); }
@@ -230,6 +237,8 @@ struct KernelImp : Kernel {
uint32_t kernelRequiresUncachedMocsCount = 0;
uint32_t kernelRequiresQueueUncachedMocsCount = 0;
std::vector<bool> isArgUncached;
std::vector<bool> isBindlessOffsetSet;
std::vector<bool> usingSurfaceStateHeap;
uint32_t globalOffsets[3] = {};

View File

@@ -124,8 +124,8 @@ void ModuleImmutableDataFixture::tearDown() {
DeviceFixture::tearDown();
}
L0::Module *ModuleFixture::ProxyModuleImp::create(L0::Device *device, const ze_module_desc_t *desc,
ModuleBuildLog *moduleBuildLog, ModuleType type, ze_result_t *result) {
ModuleFixture::ProxyModuleImp *ModuleFixture::ProxyModuleImp::create(L0::Device *device, const ze_module_desc_t *desc,
ModuleBuildLog *moduleBuildLog, ModuleType type, ze_result_t *result) {
auto module = new ProxyModuleImp(device, moduleBuildLog, type);
*result = module->initialize(desc, device->getNEODevice());

View File

@@ -13,6 +13,7 @@
#include "level_zero/core/source/module/module_imp.h"
#include "level_zero/core/test/unit_tests/fixtures/device_fixture.h"
#include "level_zero/core/test/unit_tests/mocks/mock_kernel.h"
#include "level_zero/core/test/unit_tests/mocks/mock_module.h"
namespace L0 {
namespace ult {
@@ -122,15 +123,16 @@ struct ModuleImmutableDataFixture : public DeviceFixture {
struct ModuleFixture : public DeviceFixture {
struct ProxyModuleImp : public ModuleImp {
using ModuleImp::ModuleImp;
struct ProxyModuleImp : public WhiteBox<::L0::Module> {
using BaseClass = WhiteBox<::L0::Module>;
using BaseClass::BaseClass;
std::vector<std::unique_ptr<KernelImmutableData>> &getKernelImmDatas() {
return kernelImmDatas;
}
static L0::Module *create(L0::Device *device, const ze_module_desc_t *desc,
ModuleBuildLog *moduleBuildLog, ModuleType type, ze_result_t *result);
static ModuleFixture::ProxyModuleImp *create(L0::Device *device, const ze_module_desc_t *desc,
ModuleBuildLog *moduleBuildLog, ModuleType type, ze_result_t *result);
};
void setUp();
@@ -145,7 +147,7 @@ struct ModuleFixture : public DeviceFixture {
const std::string kernelName = "test";
const uint32_t numKernelArguments = 6;
std::unique_ptr<L0::Module> module;
std::unique_ptr<ProxyModuleImp> module;
std::unique_ptr<WhiteBox<::L0::KernelImp>> kernel;
std::unique_ptr<ZebinTestData::ZebinWithL0TestCommonModule> zebinData;
DebugManagerStateRestore restore;

View File

@@ -54,7 +54,7 @@ struct MockBuiltinFunctionsLibImplTimestamps : BuiltinFunctionsLibImpl {
[[maybe_unused]] ze_result_t res;
Module *module;
L0::Module *module;
ze_module_handle_t moduleHandle;
ze_module_desc_t moduleDesc = {};
moduleDesc.format = builtInCode.type == BuiltInCodeType::Binary ? ZE_MODULE_FORMAT_NATIVE : ZE_MODULE_FORMAT_IL_SPIRV;

View File

@@ -48,6 +48,7 @@ struct WhiteBox<::L0::KernelImp> : public ::L0::KernelImp {
using ::L0::KernelImp::dynamicStateHeapData;
using ::L0::KernelImp::dynamicStateHeapDataSize;
using ::L0::KernelImp::groupSize;
using ::L0::KernelImp::isBindlessOffsetSet;
using ::L0::KernelImp::kernelHasIndirectAccess;
using ::L0::KernelImp::kernelImmData;
using ::L0::KernelImp::kernelRequiresGenerationOfLocalIdsByRuntime;
@@ -69,6 +70,7 @@ struct WhiteBox<::L0::KernelImp> : public ::L0::KernelImp {
using ::L0::KernelImp::surfaceStateHeapData;
using ::L0::KernelImp::surfaceStateHeapDataSize;
using ::L0::KernelImp::unifiedMemoryControls;
using ::L0::KernelImp::usingSurfaceStateHeap;
void setBufferSurfaceState(uint32_t argIndex, void *address,
NEO::GraphicsAllocation *alloc) override {}

View File

@@ -51,6 +51,7 @@ struct WhiteBox<::L0::Module> : public ::L0::ModuleImp {
using BaseClass::allocatePrivateMemoryPerDispatch;
using BaseClass::BaseClass;
using BaseClass::builtFromSPIRv;
using BaseClass::checkIfPrivateMemoryPerDispatchIsNeeded;
using BaseClass::copyPatchedSegments;
using BaseClass::device;
using BaseClass::exportedFunctionsSurface;

View File

@@ -1746,6 +1746,92 @@ HWTEST2_F(CommandListBindlessSshPrivateHeapTest,
EXPECT_EQ(globalBindlessBase, sbaCmd->getBindlessSurfaceStateBaseAddress());
}
HWTEST2_F(CommandListBindlessSshPrivateHeapTest,
givenBindlessKernelStateBaseAddressTrackingAndGlobalBindlessEnabledWhenOneArgUsesKernelsSshThenReservedSshSizeIsNonZero,
IsAtLeastSkl) {
using STATE_BASE_ADDRESS = typename FamilyType::STATE_BASE_ADDRESS;
auto mockHelper = std::make_unique<MockBindlesHeapsHelper>(device->getNEODevice()->getMemoryManager(),
device->getNEODevice()->getNumGenericSubDevices() > 1,
device->getNEODevice()->getRootDeviceIndex(),
device->getNEODevice()->getDeviceBitfield());
mockHelper->globalBindlessDsh = false;
auto globalBindlessBase = mockHelper->getGlobalHeapsBase();
device->getNEODevice()->getExecutionEnvironment()->rootDeviceEnvironments[device->getNEODevice()->getRootDeviceIndex()]->bindlessHeapsHelper.reset(mockHelper.release());
EXPECT_TRUE(commandList->stateBaseAddressTracking);
auto &container = commandList->getCmdContainer();
auto &cmdListStream = *container.getCommandStream();
Mock<Module> mockModule(this->device, nullptr);
Mock<KernelImp> mockKernel;
mockKernel.module = &mockModule;
mockKernel.descriptor.kernelAttributes.bufferAddressingMode = NEO::KernelDescriptor::BindlessAndStateless;
mockKernel.descriptor.kernelAttributes.imageAddressingMode = NEO::KernelDescriptor::Bindless;
auto argDescriptor = NEO::ArgDescriptor(NEO::ArgDescriptor::ArgTPointer);
argDescriptor.as<NEO::ArgDescPointer>() = NEO::ArgDescPointer();
argDescriptor.as<NEO::ArgDescPointer>().bindful = NEO::undefined<NEO::SurfaceStateHeapOffset>;
argDescriptor.as<NEO::ArgDescPointer>().bindless = 0x0;
mockKernel.crossThreadData = std::make_unique<uint8_t[]>(4 * sizeof(uint64_t));
mockKernel.crossThreadDataSize = 4 * sizeof(uint64_t);
const auto surfStateSize = static_cast<uint32_t>(device->getNEODevice()->getGfxCoreHelper().getRenderSurfaceStateSize());
mockKernel.surfaceStateHeapData = std::make_unique<uint8_t[]>(surfStateSize);
mockKernel.surfaceStateHeapDataSize = surfStateSize;
mockKernel.info.heapInfo.surfaceStateHeapSize = surfStateSize;
mockKernel.descriptor.payloadMappings.explicitArgs.push_back(argDescriptor);
mockKernel.descriptor.initBindlessOffsetToSurfaceState();
mockKernel.usingSurfaceStateHeap.resize(1, false);
mockKernel.isBindlessOffsetSet.resize(1, false);
mockKernel.usingSurfaceStateHeap[0] = true;
ze_group_count_t groupCount{1, 1, 1};
CmdListKernelLaunchParams launchParams = {};
auto result = commandList->appendLaunchKernel(mockKernel.toHandle(), &groupCount, nullptr, 0, nullptr, launchParams, false);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
GenCmdList cmdList;
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(
cmdList,
cmdListStream.getCpuBase(),
cmdListStream.getUsed()));
auto sbaCmds = findAll<STATE_BASE_ADDRESS *>(cmdList.begin(), cmdList.end());
EXPECT_EQ(1u, sbaCmds.size());
auto sshHeap = container.getIndirectHeap(NEO::HeapType::SURFACE_STATE);
EXPECT_NE(nullptr, sshHeap);
result = commandList->close();
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
auto &cmdQueueStream = commandQueue->commandStream;
size_t queueBefore = cmdQueueStream.getUsed();
ze_command_list_handle_t cmdListHandle = commandList->toHandle();
result = commandQueue->executeCommandLists(1, &cmdListHandle, nullptr, true);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
size_t queueAfter = cmdQueueStream.getUsed();
cmdList.clear();
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(
cmdList,
ptrOffset(cmdQueueStream.getCpuBase(), queueBefore),
queueAfter - queueBefore));
sbaCmds = findAll<STATE_BASE_ADDRESS *>(cmdList.begin(), cmdList.end());
ASSERT_EQ(expectedSbaCmds, sbaCmds.size());
auto sbaCmd = reinterpret_cast<STATE_BASE_ADDRESS *>(*sbaCmds[0]);
EXPECT_TRUE(sbaCmd->getBindlessSurfaceStateBaseAddressModifyEnable());
EXPECT_EQ(globalBindlessBase, sbaCmd->getBindlessSurfaceStateBaseAddress());
auto offsetInHeap = ptrDiff(sshHeap->getSpace(0), sshHeap->getCpuBase()) - surfStateSize;
uint64_t bindlessSshBaseOffset = ptrDiff(sshHeap->getGraphicsAllocation()->getGpuAddress(), sshHeap->getGraphicsAllocation()->getGpuBaseAddress()) + offsetInHeap;
auto patchValue = device->getNEODevice()->getGfxCoreHelper().getBindlessSurfaceExtendedMessageDescriptorValue(static_cast<uint32_t>(bindlessSshBaseOffset));
auto patchLocation = reinterpret_cast<uint32_t *>(mockKernel.crossThreadData.get());
EXPECT_EQ(patchValue, *patchLocation);
}
HWTEST2_F(CommandListStateBaseAddressPrivateHeapTest,
givenStateBaseAddressTrackingWhenRegularCmdListAppendKernelChangesHeapsAndNextKernelIsAppendedThenFinalBaseAddressStateIsDispatchedInCommandListOnce,
IsAtLeastSkl) {

View File

@@ -63,6 +63,7 @@ struct WhiteBoxKernelHw : public KernelHw<gfxCoreFamily> {
using ::L0::KernelImp::dynamicStateHeapData;
using ::L0::KernelImp::dynamicStateHeapDataSize;
using ::L0::KernelImp::groupSize;
using ::L0::KernelImp::isBindlessOffsetSet;
using ::L0::KernelImp::kernelImmData;
using ::L0::KernelImp::kernelRequiresGenerationOfLocalIdsByRuntime;
using ::L0::KernelImp::module;
@@ -75,7 +76,9 @@ struct WhiteBoxKernelHw : public KernelHw<gfxCoreFamily> {
using ::L0::KernelImp::requiredWorkgroupOrder;
using ::L0::KernelImp::residencyContainer;
using ::L0::KernelImp::surfaceStateHeapData;
using ::L0::KernelImp::surfaceStateHeapDataSize;
using ::L0::KernelImp::unifiedMemoryControls;
using ::L0::KernelImp::usingSurfaceStateHeap;
void evaluateIfRequiresGenerationOfLocalIdsByRuntime(const NEO::KernelDescriptor &kernelDescriptor) override {}
@@ -2054,6 +2057,8 @@ HWTEST2_F(KernelImpPatchBindlessTest, GivenKernelImpWhenSetSurfaceStateBindlessT
auto &arg = const_cast<NEO::ArgDescPointer &>(mockKernel.kernelImmData->getDescriptor().payloadMappings.explicitArgs[0].template as<NEO::ArgDescPointer>());
arg.bindless = 0x40;
arg.bindful = undefined<SurfaceStateHeapOffset>;
const_cast<NEO::KernelDescriptor &>(mockKernel.kernelImmData->getDescriptor()).kernelAttributes.bufferAddressingMode = NEO::KernelDescriptor::BindlessAndStateless;
const_cast<NEO::KernelDescriptor &>(mockKernel.kernelImmData->getDescriptor()).kernelAttributes.imageAddressingMode = NEO::KernelDescriptor::Bindless;
neoDevice->getExecutionEnvironment()->rootDeviceEnvironments[neoDevice->getRootDeviceIndex()]->createBindlessHeapsHelper(neoDevice->getMemoryManager(),
neoDevice->getNumGenericSubDevices() > 1,
@@ -2076,6 +2081,59 @@ HWTEST2_F(KernelImpPatchBindlessTest, GivenKernelImpWhenSetSurfaceStateBindlessT
auto surfaceStateAfter = *reinterpret_cast<RENDER_SURFACE_STATE *>(expectedSsInHeap.ssPtr);
EXPECT_FALSE(memcmp(&surfaceStateAfter, &surfaceStateBefore, size) == 0);
EXPECT_TRUE(mockKernel.isBindlessOffsetSet[0]);
EXPECT_FALSE(mockKernel.usingSurfaceStateHeap[0]);
}
HWTEST2_F(KernelImpPatchBindlessTest, GivenMisalignedBufferAddressWhenSettingSurfaceStateThenSurfaceStateInKernelHeapIsUsed, MatchAny) {
using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE;
ze_kernel_desc_t desc = {};
desc.pKernelName = kernelName.c_str();
WhiteBoxKernelHw<gfxCoreFamily> mockKernel;
mockKernel.module = module.get();
mockKernel.initialize(&desc);
auto &arg = const_cast<NEO::ArgDescPointer &>(mockKernel.kernelImmData->getDescriptor().payloadMappings.explicitArgs[0].template as<NEO::ArgDescPointer>());
arg.bindless = 0x40;
arg.bindful = undefined<SurfaceStateHeapOffset>;
const_cast<NEO::KernelDescriptor &>(mockKernel.kernelImmData->getDescriptor()).kernelAttributes.bufferAddressingMode = NEO::KernelDescriptor::BindlessAndStateless;
const_cast<NEO::KernelDescriptor &>(mockKernel.kernelImmData->getDescriptor()).kernelAttributes.imageAddressingMode = NEO::KernelDescriptor::Bindless;
const_cast<NEO::KernelDescriptor &>(mockKernel.kernelImmData->getDescriptor()).initBindlessOffsetToSurfaceState();
neoDevice->getExecutionEnvironment()->rootDeviceEnvironments[neoDevice->getRootDeviceIndex()]->createBindlessHeapsHelper(neoDevice->getMemoryManager(),
neoDevice->getNumGenericSubDevices() > 1,
neoDevice->getRootDeviceIndex(),
neoDevice->getDeviceBitfield());
auto &gfxCoreHelper = device->getGfxCoreHelper();
size_t size = gfxCoreHelper.getRenderSurfaceStateSize();
uint64_t gpuAddress = 0x2000;
void *buffer = reinterpret_cast<void *>(gpuAddress);
NEO::MockGraphicsAllocation mockAllocation(buffer, gpuAddress, size);
auto expectedSsInHeap = device->getNEODevice()->getBindlessHeapsHelper()->allocateSSInHeap(size, &mockAllocation, NEO::BindlessHeapsHelper::GLOBAL_SSH);
mockAllocation.setBindlessInfo(expectedSsInHeap);
memset(expectedSsInHeap.ssPtr, 0, size);
EXPECT_EQ(0u, mockKernel.getSurfaceStateHeapDataSize());
EXPECT_FALSE(mockKernel.isBindlessOffsetSet[0]);
EXPECT_FALSE(mockKernel.usingSurfaceStateHeap[0]);
mockKernel.setBufferSurfaceState(0, buffer, &mockAllocation);
auto surfaceStateBefore = *reinterpret_cast<RENDER_SURFACE_STATE *>(expectedSsInHeap.ssPtr);
mockKernel.setBufferSurfaceState(0, ptrOffset(buffer, 8), &mockAllocation);
auto surfaceStateAfter = *reinterpret_cast<RENDER_SURFACE_STATE *>(expectedSsInHeap.ssPtr);
auto surfaceStateOnSsh = *reinterpret_cast<RENDER_SURFACE_STATE *>(mockKernel.surfaceStateHeapData.get());
EXPECT_TRUE(memcmp(&surfaceStateAfter, &surfaceStateBefore, size) == 0);
EXPECT_EQ(reinterpret_cast<uint64_t>(ptrOffset(buffer, 8)), surfaceStateOnSsh.getSurfaceBaseAddress());
EXPECT_FALSE(mockKernel.isBindlessOffsetSet[0]);
EXPECT_TRUE(mockKernel.usingSurfaceStateHeap[0]);
EXPECT_EQ(mockKernel.surfaceStateHeapDataSize, mockKernel.getSurfaceStateHeapDataSize());
}
HWTEST2_F(KernelImpPatchBindlessTest, GivenKernelImpWhenSetSurfaceStateBindfulThenSurfaceStateNotUpdated, MatchAny) {
@@ -2481,6 +2539,7 @@ HWTEST2_F(SetKernelArg, givenImageAndBindlessKernelWhenSetArgImageThenCopySurfac
auto expectedSsInHeap = imageHW->getAllocation()->getBindlessInfo();
EXPECT_EQ(imageHW->passedSurfaceStateHeap, expectedSsInHeap.ssPtr);
EXPECT_EQ(imageHW->passedSurfaceStateOffset, 0u);
EXPECT_TRUE(kernel->isBindlessOffsetSet[3]);
}
HWTEST2_F(SetKernelArg, givenBindlessKernelAndNoAvailableSpaceOnSshWhenSetArgImageCalledThenOutOfMemoryErrorReturned, ImageSupport) {
@@ -2547,6 +2606,7 @@ HWTEST2_F(SetKernelArg, givenImageBindlessKernelAndGlobalBindlessHelperWhenSetAr
auto expectedSsInHeap = imageHW->getAllocation()->getBindlessInfo();
EXPECT_EQ(imageHW->passedRedescribedSurfaceStateHeap, ptrOffset(expectedSsInHeap.ssPtr, surfaceStateSize));
EXPECT_EQ(imageHW->passedRedescribedSurfaceStateOffset, 0u);
EXPECT_TRUE(kernel->isBindlessOffsetSet[3]);
}
HWTEST2_F(SetKernelArg, givenImageAndBindlessKernelWhenSetArgRedescribedImageCalledThenCopySurfaceStateToSSHCalledWithCorrectArgs, ImageSupport) {
@@ -3265,6 +3325,9 @@ TEST_F(BindlessKernelTest, givenBindlessKernelWhenPatchingCrossThreadDataThenCor
argDescriptor2.as<NEO::ArgDescPointer>().stateless = 2 * sizeof(uint64_t);
mockKernel.descriptor.payloadMappings.explicitArgs.push_back(argDescriptor2);
mockKernel.isBindlessOffsetSet.resize(4, 0);
mockKernel.usingSurfaceStateHeap.resize(4, 0);
mockKernel.descriptor.initBindlessOffsetToSurfaceState();
mockKernel.crossThreadData = std::make_unique<uint8_t[]>(4 * sizeof(uint64_t));
@@ -3287,6 +3350,59 @@ TEST_F(BindlessKernelTest, givenBindlessKernelWhenPatchingCrossThreadDataThenCor
EXPECT_EQ(patchValue2, crossThreadData[1]);
EXPECT_EQ(0u, crossThreadData[3]);
}
TEST_F(BindlessKernelTest, givenBindlessKernelWithPatchedBindlessOffsetsWhenPatchingCrossThreadDataThenMemoryIsNotPatched) {
Mock<Module> mockModule(this->device, nullptr);
Mock<KernelImp> mockKernel;
mockKernel.module = &mockModule;
mockKernel.descriptor.kernelAttributes.bufferAddressingMode = NEO::KernelDescriptor::BindlessAndStateless;
mockKernel.descriptor.kernelAttributes.imageAddressingMode = NEO::KernelDescriptor::Bindless;
auto argDescriptor = NEO::ArgDescriptor(NEO::ArgDescriptor::ArgTPointer);
argDescriptor.as<NEO::ArgDescPointer>() = NEO::ArgDescPointer();
argDescriptor.as<NEO::ArgDescPointer>().bindful = NEO::undefined<NEO::SurfaceStateHeapOffset>;
argDescriptor.as<NEO::ArgDescPointer>().bindless = 0x0;
mockKernel.descriptor.payloadMappings.explicitArgs.push_back(argDescriptor);
auto argDescriptorImg = NEO::ArgDescriptor(NEO::ArgDescriptor::ArgTImage);
argDescriptorImg.as<NEO::ArgDescImage>() = NEO::ArgDescImage();
argDescriptorImg.as<NEO::ArgDescImage>().bindful = NEO::undefined<NEO::SurfaceStateHeapOffset>;
argDescriptorImg.as<NEO::ArgDescImage>().bindless = sizeof(uint64_t);
mockKernel.descriptor.payloadMappings.explicitArgs.push_back(argDescriptorImg);
auto argDescriptor2 = NEO::ArgDescriptor(NEO::ArgDescriptor::ArgTPointer);
argDescriptor2.as<NEO::ArgDescPointer>() = NEO::ArgDescPointer();
argDescriptor2.as<NEO::ArgDescPointer>().bindful = NEO::undefined<NEO::SurfaceStateHeapOffset>;
argDescriptor2.as<NEO::ArgDescPointer>().stateless = 2 * sizeof(uint64_t);
mockKernel.descriptor.payloadMappings.explicitArgs.push_back(argDescriptor2);
mockKernel.isBindlessOffsetSet.resize(4, 1);
mockKernel.isBindlessOffsetSet[1] = false;
mockKernel.descriptor.initBindlessOffsetToSurfaceState();
mockKernel.crossThreadData = std::make_unique<uint8_t[]>(4 * sizeof(uint64_t));
mockKernel.crossThreadDataSize = 4 * sizeof(uint64_t);
memset(mockKernel.crossThreadData.get(), 0, mockKernel.crossThreadDataSize);
const uint64_t baseAddress = 0x1000;
auto &gfxCoreHelper = this->device->getGfxCoreHelper();
auto surfaceStateSize = gfxCoreHelper.getRenderSurfaceStateSize();
auto patchValue2 = gfxCoreHelper.getBindlessSurfaceExtendedMessageDescriptorValue(static_cast<uint32_t>(baseAddress + surfaceStateSize));
mockKernel.patchBindlessOffsetsInCrossThreadData(baseAddress);
auto crossThreadData = std::make_unique<uint64_t[]>(mockKernel.crossThreadDataSize / sizeof(uint64_t));
memcpy(crossThreadData.get(), mockKernel.crossThreadData.get(), mockKernel.crossThreadDataSize);
EXPECT_EQ(0u, crossThreadData[0]);
EXPECT_EQ(patchValue2, crossThreadData[1]);
EXPECT_EQ(0u, crossThreadData[2]);
EXPECT_EQ(0u, crossThreadData[3]);
}
TEST_F(BindlessKernelTest, givenNoEntryInBindlessOffsetsMapWhenPatchingCrossThreadDataThenMemoryIsNotPatched) {
Mock<Module> mockModule(this->device, nullptr);
Mock<KernelImp> mockKernel;

View File

@@ -115,12 +115,16 @@ void EncodeDispatchKernel<Family>::encode(CommandContainer &container, EncodeDis
}
} else {
bool globalBindlessSsh = args.device->getBindlessHeapsHelper() != nullptr;
if (!globalBindlessSsh && args.dispatchInterface->getSurfaceStateHeapDataSize() > 0u) {
if (args.dispatchInterface->getSurfaceStateHeapDataSize() > 0u) {
auto ssh = args.surfaceStateHeap;
if (ssh == nullptr) {
container.prepareBindfulSsh();
ssh = container.getHeapWithRequiredSizeAndAlignment(HeapType::SURFACE_STATE, args.dispatchInterface->getSurfaceStateHeapDataSize(), BINDING_TABLE_STATE::SURFACESTATEPOINTER_ALIGN_SIZE);
}
uint64_t bindlessSshBaseOffset = ptrDiff(ssh->getSpace(0), ssh->getCpuBase());
if (globalBindlessSsh) {
bindlessSshBaseOffset += ptrDiff(ssh->getGraphicsAllocation()->getGpuAddress(), ssh->getGraphicsAllocation()->getGpuBaseAddress());
}
// Allocate space for new ssh data
auto dstSurfaceState = ssh->getSpace(args.dispatchInterface->getSurfaceStateHeapDataSize());
memcpy_s(dstSurfaceState, args.dispatchInterface->getSurfaceStateHeapDataSize(), args.dispatchInterface->getSurfaceStateHeapData(), args.dispatchInterface->getSurfaceStateHeapDataSize());

View File

@@ -140,12 +140,16 @@ void EncodeDispatchKernel<Family>::encode(CommandContainer &container, EncodeDis
}
} else {
bool globalBindlessSsh = args.device->getBindlessHeapsHelper() != nullptr;
if (!globalBindlessSsh && args.dispatchInterface->getSurfaceStateHeapDataSize() > 0u) {
if (args.dispatchInterface->getSurfaceStateHeapDataSize() > 0u) {
auto ssh = args.surfaceStateHeap;
if (ssh == nullptr) {
container.prepareBindfulSsh();
ssh = container.getHeapWithRequiredSizeAndAlignment(HeapType::SURFACE_STATE, args.dispatchInterface->getSurfaceStateHeapDataSize(), BINDING_TABLE_STATE::SURFACESTATEPOINTER_ALIGN_SIZE);
}
uint64_t bindlessSshBaseOffset = ptrDiff(ssh->getSpace(0), ssh->getCpuBase());
if (globalBindlessSsh) {
bindlessSshBaseOffset += ptrDiff(ssh->getGraphicsAllocation()->getGpuAddress(), ssh->getGraphicsAllocation()->getGpuBaseAddress());
}
// Allocate space for new ssh data
auto dstSurfaceState = ssh->getSpace(args.dispatchInterface->getSurfaceStateHeapDataSize());
memcpy_s(dstSurfaceState, args.dispatchInterface->getSurfaceStateHeapDataSize(), args.dispatchInterface->getSurfaceStateHeapData(), args.dispatchInterface->getSurfaceStateHeapDataSize());