mirror of
https://github.com/intel/compute-runtime.git
synced 2026-01-06 19:32:25 +08:00
feature: bindless addressing for buffers with offset
- allocate SurfaceStates on kernel's heap for offsetted buffers Related-To: NEO-7063 Signed-off-by: Mateusz Hoppe <mateusz.hoppe@intel.com>
This commit is contained in:
committed by
Compute-Runtime-Automation
parent
269adbe43b
commit
93469eaf5d
@@ -70,6 +70,11 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelWithParams(K
|
||||
NEO::EncodeDispatchKernel<GfxFamily>::getSizeRequiredSsh(*kernelInfo),
|
||||
NEO::EncodeDispatchKernel<GfxFamily>::getDefaultSshAlignment()};
|
||||
|
||||
// update SSH size - when global bindless addressing is used, kernel args may not require ssh space
|
||||
if (kernel->getSurfaceStateHeapDataSize() == 0) {
|
||||
sshReserveArgs.size = 0;
|
||||
}
|
||||
|
||||
auto &dshReserveConfig = commandContainer.getDynamicStateHeapReserve();
|
||||
NEO::HeapReserveArguments dshReserveArgs = {
|
||||
dshReserveConfig.indirectHeapReservation,
|
||||
|
||||
@@ -135,7 +135,8 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelWithParams(K
|
||||
NEO::EncodeDispatchKernel<GfxFamily>::getSizeRequiredSsh(*kernelInfo),
|
||||
NEO::EncodeDispatchKernel<GfxFamily>::getDefaultSshAlignment()};
|
||||
|
||||
if (device->getNEODevice()->getBindlessHeapsHelper() && NEO::KernelDescriptor::isBindlessAddressingKernel(kernelImmutableData->getDescriptor())) {
|
||||
// update SSH size - when global bindless addressing is used, kernel args may not require ssh space
|
||||
if (kernel->getSurfaceStateHeapDataSize() == 0) {
|
||||
sshReserveArgs.size = 0;
|
||||
}
|
||||
|
||||
|
||||
@@ -46,11 +46,14 @@ struct KernelHw : public KernelImp {
|
||||
auto argInfo = kernelImmData->getDescriptor().payloadMappings.explicitArgs[argIndex].as<NEO::ArgDescPointer>();
|
||||
bool offsetWasPatched = NEO::patchNonPointer<uint32_t, uint32_t>(ArrayRef<uint8_t>(this->crossThreadData.get(), this->crossThreadDataSize),
|
||||
argInfo.bufferOffset, static_cast<uint32_t>(offset));
|
||||
bool offsetedAddress = false;
|
||||
if (false == offsetWasPatched) {
|
||||
// fallback to handling offset in surface state
|
||||
offsetedAddress = baseAddress != reinterpret_cast<uintptr_t>(address);
|
||||
baseAddress = reinterpret_cast<uintptr_t>(address);
|
||||
bufferSizeForSsh -= offset;
|
||||
DEBUG_BREAK_IF(baseAddress != (baseAddress & sshAlignmentMask));
|
||||
|
||||
offset = 0;
|
||||
}
|
||||
void *surfaceStateAddress = nullptr;
|
||||
@@ -61,9 +64,13 @@ struct KernelHw : public KernelImp {
|
||||
surfaceState = *reinterpret_cast<typename GfxFamily::RENDER_SURFACE_STATE *>(surfaceStateAddress);
|
||||
|
||||
} else if (NEO::isValidOffset(argInfo.bindless)) {
|
||||
if (this->module->getDevice()->getNEODevice()->getBindlessHeapsHelper()) {
|
||||
isBindlessOffsetSet[argIndex] = false;
|
||||
usingSurfaceStateHeap[argIndex] = false;
|
||||
if (this->module->getDevice()->getNEODevice()->getBindlessHeapsHelper() && !offsetedAddress) {
|
||||
surfaceStateAddress = patchBindlessSurfaceState(alloc, argInfo.bindless);
|
||||
isBindlessOffsetSet[argIndex] = true;
|
||||
} else {
|
||||
usingSurfaceStateHeap[argIndex] = true;
|
||||
surfaceStateAddress = ptrOffset(surfaceStateHeapData.get(), getSurfaceStateIndexForBindlessOffset(argInfo.bindless) * sizeof(typename GfxFamily::RENDER_SURFACE_STATE));
|
||||
}
|
||||
}
|
||||
|
||||
@@ -569,6 +569,7 @@ ze_result_t KernelImp::setArgRedescribedImage(uint32_t argIndex, ze_image_handle
|
||||
patchWithRequiredSize(const_cast<uint8_t *>(patchLocation), sizeof(patchValue), patchValue);
|
||||
|
||||
image->copyRedescribedSurfaceStateToSSH(ptrOffset(ssInHeap.ssPtr, surfaceStateSize), 0u);
|
||||
isBindlessOffsetSet[argIndex] = true;
|
||||
this->residencyContainer.push_back(ssInHeap.heapAllocation);
|
||||
} else {
|
||||
|
||||
@@ -764,6 +765,7 @@ ze_result_t KernelImp::setArgImage(uint32_t argIndex, size_t argSize, const void
|
||||
}
|
||||
|
||||
auto ssPtr = patchBindlessSurfaceState(image->getAllocation(), arg.bindless);
|
||||
isBindlessOffsetSet[argIndex] = true;
|
||||
image->copySurfaceStateToSSH(ssPtr, 0u, isMediaBlockImage);
|
||||
} else {
|
||||
auto &gfxCoreHelper = this->module->getDevice()->getNEODevice()->getRootDeviceEnvironmentRef().getHelper<NEO::GfxCoreHelper>();
|
||||
@@ -976,6 +978,8 @@ ze_result_t KernelImp::initialize(const ze_kernel_desc_t *desc) {
|
||||
slmArgSizes.resize(this->kernelArgHandlers.size(), 0);
|
||||
kernelArgInfos.resize(this->kernelArgHandlers.size(), {});
|
||||
isArgUncached.resize(this->kernelArgHandlers.size(), 0);
|
||||
isBindlessOffsetSet.resize(this->kernelArgHandlers.size(), 0);
|
||||
usingSurfaceStateHeap.resize(this->kernelArgHandlers.size(), 0);
|
||||
|
||||
if (kernelImmData->getSurfaceStateHeapSize() > 0) {
|
||||
this->surfaceStateHeapData.reset(new uint8_t[kernelImmData->getSurfaceStateHeapSize()]);
|
||||
@@ -1253,7 +1257,7 @@ void KernelImp::patchBindlessOffsetsInCrossThreadData(uint64_t bindlessSurfaceSt
|
||||
auto patchLocation = ptrOffset(getCrossThreadData(), crossThreadOffset);
|
||||
auto index = getSurfaceStateIndexForBindlessOffset(crossThreadOffset);
|
||||
|
||||
if (index < std::numeric_limits<uint32_t>::max()) {
|
||||
if (index < std::numeric_limits<uint32_t>::max() && !isBindlessOffsetSet[argIndex]) {
|
||||
auto surfaceStateOffset = static_cast<uint32_t>(bindlessSurfaceStateBaseOffset + index * surfaceStateSize);
|
||||
auto patchValue = gfxCoreHelper.getBindlessSurfaceExtendedMessageDescriptorValue(static_cast<uint32_t>(surfaceStateOffset));
|
||||
|
||||
|
||||
@@ -108,7 +108,14 @@ struct KernelImp : Kernel {
|
||||
void patchSyncBuffer(NEO::GraphicsAllocation *gfxAllocation, size_t bufferOffset) override;
|
||||
|
||||
const uint8_t *getSurfaceStateHeapData() const override { return surfaceStateHeapData.get(); }
|
||||
uint32_t getSurfaceStateHeapDataSize() const override { return surfaceStateHeapDataSize; }
|
||||
uint32_t getSurfaceStateHeapDataSize() const override {
|
||||
if (NEO::KernelDescriptor::isBindlessAddressingKernel(kernelImmData->getDescriptor())) {
|
||||
if (std::none_of(usingSurfaceStateHeap.cbegin(), usingSurfaceStateHeap.cend(), [](bool i) { return i; })) {
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
return surfaceStateHeapDataSize;
|
||||
}
|
||||
|
||||
const uint8_t *getDynamicStateHeapData() const override { return dynamicStateHeapData.get(); }
|
||||
|
||||
@@ -230,6 +237,8 @@ struct KernelImp : Kernel {
|
||||
uint32_t kernelRequiresUncachedMocsCount = 0;
|
||||
uint32_t kernelRequiresQueueUncachedMocsCount = 0;
|
||||
std::vector<bool> isArgUncached;
|
||||
std::vector<bool> isBindlessOffsetSet;
|
||||
std::vector<bool> usingSurfaceStateHeap;
|
||||
|
||||
uint32_t globalOffsets[3] = {};
|
||||
|
||||
|
||||
@@ -124,8 +124,8 @@ void ModuleImmutableDataFixture::tearDown() {
|
||||
DeviceFixture::tearDown();
|
||||
}
|
||||
|
||||
L0::Module *ModuleFixture::ProxyModuleImp::create(L0::Device *device, const ze_module_desc_t *desc,
|
||||
ModuleBuildLog *moduleBuildLog, ModuleType type, ze_result_t *result) {
|
||||
ModuleFixture::ProxyModuleImp *ModuleFixture::ProxyModuleImp::create(L0::Device *device, const ze_module_desc_t *desc,
|
||||
ModuleBuildLog *moduleBuildLog, ModuleType type, ze_result_t *result) {
|
||||
auto module = new ProxyModuleImp(device, moduleBuildLog, type);
|
||||
|
||||
*result = module->initialize(desc, device->getNEODevice());
|
||||
|
||||
@@ -13,6 +13,7 @@
|
||||
#include "level_zero/core/source/module/module_imp.h"
|
||||
#include "level_zero/core/test/unit_tests/fixtures/device_fixture.h"
|
||||
#include "level_zero/core/test/unit_tests/mocks/mock_kernel.h"
|
||||
#include "level_zero/core/test/unit_tests/mocks/mock_module.h"
|
||||
|
||||
namespace L0 {
|
||||
namespace ult {
|
||||
@@ -122,15 +123,16 @@ struct ModuleImmutableDataFixture : public DeviceFixture {
|
||||
|
||||
struct ModuleFixture : public DeviceFixture {
|
||||
|
||||
struct ProxyModuleImp : public ModuleImp {
|
||||
using ModuleImp::ModuleImp;
|
||||
struct ProxyModuleImp : public WhiteBox<::L0::Module> {
|
||||
using BaseClass = WhiteBox<::L0::Module>;
|
||||
using BaseClass::BaseClass;
|
||||
|
||||
std::vector<std::unique_ptr<KernelImmutableData>> &getKernelImmDatas() {
|
||||
return kernelImmDatas;
|
||||
}
|
||||
|
||||
static L0::Module *create(L0::Device *device, const ze_module_desc_t *desc,
|
||||
ModuleBuildLog *moduleBuildLog, ModuleType type, ze_result_t *result);
|
||||
static ModuleFixture::ProxyModuleImp *create(L0::Device *device, const ze_module_desc_t *desc,
|
||||
ModuleBuildLog *moduleBuildLog, ModuleType type, ze_result_t *result);
|
||||
};
|
||||
|
||||
void setUp();
|
||||
@@ -145,7 +147,7 @@ struct ModuleFixture : public DeviceFixture {
|
||||
|
||||
const std::string kernelName = "test";
|
||||
const uint32_t numKernelArguments = 6;
|
||||
std::unique_ptr<L0::Module> module;
|
||||
std::unique_ptr<ProxyModuleImp> module;
|
||||
std::unique_ptr<WhiteBox<::L0::KernelImp>> kernel;
|
||||
std::unique_ptr<ZebinTestData::ZebinWithL0TestCommonModule> zebinData;
|
||||
DebugManagerStateRestore restore;
|
||||
|
||||
@@ -54,7 +54,7 @@ struct MockBuiltinFunctionsLibImplTimestamps : BuiltinFunctionsLibImpl {
|
||||
|
||||
[[maybe_unused]] ze_result_t res;
|
||||
|
||||
Module *module;
|
||||
L0::Module *module;
|
||||
ze_module_handle_t moduleHandle;
|
||||
ze_module_desc_t moduleDesc = {};
|
||||
moduleDesc.format = builtInCode.type == BuiltInCodeType::Binary ? ZE_MODULE_FORMAT_NATIVE : ZE_MODULE_FORMAT_IL_SPIRV;
|
||||
|
||||
@@ -48,6 +48,7 @@ struct WhiteBox<::L0::KernelImp> : public ::L0::KernelImp {
|
||||
using ::L0::KernelImp::dynamicStateHeapData;
|
||||
using ::L0::KernelImp::dynamicStateHeapDataSize;
|
||||
using ::L0::KernelImp::groupSize;
|
||||
using ::L0::KernelImp::isBindlessOffsetSet;
|
||||
using ::L0::KernelImp::kernelHasIndirectAccess;
|
||||
using ::L0::KernelImp::kernelImmData;
|
||||
using ::L0::KernelImp::kernelRequiresGenerationOfLocalIdsByRuntime;
|
||||
@@ -69,6 +70,7 @@ struct WhiteBox<::L0::KernelImp> : public ::L0::KernelImp {
|
||||
using ::L0::KernelImp::surfaceStateHeapData;
|
||||
using ::L0::KernelImp::surfaceStateHeapDataSize;
|
||||
using ::L0::KernelImp::unifiedMemoryControls;
|
||||
using ::L0::KernelImp::usingSurfaceStateHeap;
|
||||
|
||||
void setBufferSurfaceState(uint32_t argIndex, void *address,
|
||||
NEO::GraphicsAllocation *alloc) override {}
|
||||
|
||||
@@ -51,6 +51,7 @@ struct WhiteBox<::L0::Module> : public ::L0::ModuleImp {
|
||||
using BaseClass::allocatePrivateMemoryPerDispatch;
|
||||
using BaseClass::BaseClass;
|
||||
using BaseClass::builtFromSPIRv;
|
||||
using BaseClass::checkIfPrivateMemoryPerDispatchIsNeeded;
|
||||
using BaseClass::copyPatchedSegments;
|
||||
using BaseClass::device;
|
||||
using BaseClass::exportedFunctionsSurface;
|
||||
|
||||
@@ -1746,6 +1746,92 @@ HWTEST2_F(CommandListBindlessSshPrivateHeapTest,
|
||||
EXPECT_EQ(globalBindlessBase, sbaCmd->getBindlessSurfaceStateBaseAddress());
|
||||
}
|
||||
|
||||
HWTEST2_F(CommandListBindlessSshPrivateHeapTest,
|
||||
givenBindlessKernelStateBaseAddressTrackingAndGlobalBindlessEnabledWhenOneArgUsesKernelsSshThenReservedSshSizeIsNonZero,
|
||||
IsAtLeastSkl) {
|
||||
using STATE_BASE_ADDRESS = typename FamilyType::STATE_BASE_ADDRESS;
|
||||
|
||||
auto mockHelper = std::make_unique<MockBindlesHeapsHelper>(device->getNEODevice()->getMemoryManager(),
|
||||
device->getNEODevice()->getNumGenericSubDevices() > 1,
|
||||
device->getNEODevice()->getRootDeviceIndex(),
|
||||
device->getNEODevice()->getDeviceBitfield());
|
||||
mockHelper->globalBindlessDsh = false;
|
||||
auto globalBindlessBase = mockHelper->getGlobalHeapsBase();
|
||||
device->getNEODevice()->getExecutionEnvironment()->rootDeviceEnvironments[device->getNEODevice()->getRootDeviceIndex()]->bindlessHeapsHelper.reset(mockHelper.release());
|
||||
|
||||
EXPECT_TRUE(commandList->stateBaseAddressTracking);
|
||||
|
||||
auto &container = commandList->getCmdContainer();
|
||||
auto &cmdListStream = *container.getCommandStream();
|
||||
|
||||
Mock<Module> mockModule(this->device, nullptr);
|
||||
Mock<KernelImp> mockKernel;
|
||||
mockKernel.module = &mockModule;
|
||||
|
||||
mockKernel.descriptor.kernelAttributes.bufferAddressingMode = NEO::KernelDescriptor::BindlessAndStateless;
|
||||
mockKernel.descriptor.kernelAttributes.imageAddressingMode = NEO::KernelDescriptor::Bindless;
|
||||
|
||||
auto argDescriptor = NEO::ArgDescriptor(NEO::ArgDescriptor::ArgTPointer);
|
||||
argDescriptor.as<NEO::ArgDescPointer>() = NEO::ArgDescPointer();
|
||||
argDescriptor.as<NEO::ArgDescPointer>().bindful = NEO::undefined<NEO::SurfaceStateHeapOffset>;
|
||||
argDescriptor.as<NEO::ArgDescPointer>().bindless = 0x0;
|
||||
mockKernel.crossThreadData = std::make_unique<uint8_t[]>(4 * sizeof(uint64_t));
|
||||
mockKernel.crossThreadDataSize = 4 * sizeof(uint64_t);
|
||||
const auto surfStateSize = static_cast<uint32_t>(device->getNEODevice()->getGfxCoreHelper().getRenderSurfaceStateSize());
|
||||
mockKernel.surfaceStateHeapData = std::make_unique<uint8_t[]>(surfStateSize);
|
||||
mockKernel.surfaceStateHeapDataSize = surfStateSize;
|
||||
mockKernel.info.heapInfo.surfaceStateHeapSize = surfStateSize;
|
||||
mockKernel.descriptor.payloadMappings.explicitArgs.push_back(argDescriptor);
|
||||
mockKernel.descriptor.initBindlessOffsetToSurfaceState();
|
||||
mockKernel.usingSurfaceStateHeap.resize(1, false);
|
||||
mockKernel.isBindlessOffsetSet.resize(1, false);
|
||||
mockKernel.usingSurfaceStateHeap[0] = true;
|
||||
|
||||
ze_group_count_t groupCount{1, 1, 1};
|
||||
CmdListKernelLaunchParams launchParams = {};
|
||||
auto result = commandList->appendLaunchKernel(mockKernel.toHandle(), &groupCount, nullptr, 0, nullptr, launchParams, false);
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
|
||||
GenCmdList cmdList;
|
||||
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(
|
||||
cmdList,
|
||||
cmdListStream.getCpuBase(),
|
||||
cmdListStream.getUsed()));
|
||||
auto sbaCmds = findAll<STATE_BASE_ADDRESS *>(cmdList.begin(), cmdList.end());
|
||||
EXPECT_EQ(1u, sbaCmds.size());
|
||||
|
||||
auto sshHeap = container.getIndirectHeap(NEO::HeapType::SURFACE_STATE);
|
||||
EXPECT_NE(nullptr, sshHeap);
|
||||
|
||||
result = commandList->close();
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
|
||||
auto &cmdQueueStream = commandQueue->commandStream;
|
||||
size_t queueBefore = cmdQueueStream.getUsed();
|
||||
ze_command_list_handle_t cmdListHandle = commandList->toHandle();
|
||||
result = commandQueue->executeCommandLists(1, &cmdListHandle, nullptr, true);
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
size_t queueAfter = cmdQueueStream.getUsed();
|
||||
|
||||
cmdList.clear();
|
||||
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(
|
||||
cmdList,
|
||||
ptrOffset(cmdQueueStream.getCpuBase(), queueBefore),
|
||||
queueAfter - queueBefore));
|
||||
sbaCmds = findAll<STATE_BASE_ADDRESS *>(cmdList.begin(), cmdList.end());
|
||||
ASSERT_EQ(expectedSbaCmds, sbaCmds.size());
|
||||
|
||||
auto sbaCmd = reinterpret_cast<STATE_BASE_ADDRESS *>(*sbaCmds[0]);
|
||||
EXPECT_TRUE(sbaCmd->getBindlessSurfaceStateBaseAddressModifyEnable());
|
||||
EXPECT_EQ(globalBindlessBase, sbaCmd->getBindlessSurfaceStateBaseAddress());
|
||||
|
||||
auto offsetInHeap = ptrDiff(sshHeap->getSpace(0), sshHeap->getCpuBase()) - surfStateSize;
|
||||
uint64_t bindlessSshBaseOffset = ptrDiff(sshHeap->getGraphicsAllocation()->getGpuAddress(), sshHeap->getGraphicsAllocation()->getGpuBaseAddress()) + offsetInHeap;
|
||||
auto patchValue = device->getNEODevice()->getGfxCoreHelper().getBindlessSurfaceExtendedMessageDescriptorValue(static_cast<uint32_t>(bindlessSshBaseOffset));
|
||||
auto patchLocation = reinterpret_cast<uint32_t *>(mockKernel.crossThreadData.get());
|
||||
EXPECT_EQ(patchValue, *patchLocation);
|
||||
}
|
||||
|
||||
HWTEST2_F(CommandListStateBaseAddressPrivateHeapTest,
|
||||
givenStateBaseAddressTrackingWhenRegularCmdListAppendKernelChangesHeapsAndNextKernelIsAppendedThenFinalBaseAddressStateIsDispatchedInCommandListOnce,
|
||||
IsAtLeastSkl) {
|
||||
|
||||
@@ -63,6 +63,7 @@ struct WhiteBoxKernelHw : public KernelHw<gfxCoreFamily> {
|
||||
using ::L0::KernelImp::dynamicStateHeapData;
|
||||
using ::L0::KernelImp::dynamicStateHeapDataSize;
|
||||
using ::L0::KernelImp::groupSize;
|
||||
using ::L0::KernelImp::isBindlessOffsetSet;
|
||||
using ::L0::KernelImp::kernelImmData;
|
||||
using ::L0::KernelImp::kernelRequiresGenerationOfLocalIdsByRuntime;
|
||||
using ::L0::KernelImp::module;
|
||||
@@ -75,7 +76,9 @@ struct WhiteBoxKernelHw : public KernelHw<gfxCoreFamily> {
|
||||
using ::L0::KernelImp::requiredWorkgroupOrder;
|
||||
using ::L0::KernelImp::residencyContainer;
|
||||
using ::L0::KernelImp::surfaceStateHeapData;
|
||||
using ::L0::KernelImp::surfaceStateHeapDataSize;
|
||||
using ::L0::KernelImp::unifiedMemoryControls;
|
||||
using ::L0::KernelImp::usingSurfaceStateHeap;
|
||||
|
||||
void evaluateIfRequiresGenerationOfLocalIdsByRuntime(const NEO::KernelDescriptor &kernelDescriptor) override {}
|
||||
|
||||
@@ -2054,6 +2057,8 @@ HWTEST2_F(KernelImpPatchBindlessTest, GivenKernelImpWhenSetSurfaceStateBindlessT
|
||||
auto &arg = const_cast<NEO::ArgDescPointer &>(mockKernel.kernelImmData->getDescriptor().payloadMappings.explicitArgs[0].template as<NEO::ArgDescPointer>());
|
||||
arg.bindless = 0x40;
|
||||
arg.bindful = undefined<SurfaceStateHeapOffset>;
|
||||
const_cast<NEO::KernelDescriptor &>(mockKernel.kernelImmData->getDescriptor()).kernelAttributes.bufferAddressingMode = NEO::KernelDescriptor::BindlessAndStateless;
|
||||
const_cast<NEO::KernelDescriptor &>(mockKernel.kernelImmData->getDescriptor()).kernelAttributes.imageAddressingMode = NEO::KernelDescriptor::Bindless;
|
||||
|
||||
neoDevice->getExecutionEnvironment()->rootDeviceEnvironments[neoDevice->getRootDeviceIndex()]->createBindlessHeapsHelper(neoDevice->getMemoryManager(),
|
||||
neoDevice->getNumGenericSubDevices() > 1,
|
||||
@@ -2076,6 +2081,59 @@ HWTEST2_F(KernelImpPatchBindlessTest, GivenKernelImpWhenSetSurfaceStateBindlessT
|
||||
auto surfaceStateAfter = *reinterpret_cast<RENDER_SURFACE_STATE *>(expectedSsInHeap.ssPtr);
|
||||
|
||||
EXPECT_FALSE(memcmp(&surfaceStateAfter, &surfaceStateBefore, size) == 0);
|
||||
EXPECT_TRUE(mockKernel.isBindlessOffsetSet[0]);
|
||||
EXPECT_FALSE(mockKernel.usingSurfaceStateHeap[0]);
|
||||
}
|
||||
|
||||
HWTEST2_F(KernelImpPatchBindlessTest, GivenMisalignedBufferAddressWhenSettingSurfaceStateThenSurfaceStateInKernelHeapIsUsed, MatchAny) {
|
||||
using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE;
|
||||
|
||||
ze_kernel_desc_t desc = {};
|
||||
desc.pKernelName = kernelName.c_str();
|
||||
|
||||
WhiteBoxKernelHw<gfxCoreFamily> mockKernel;
|
||||
mockKernel.module = module.get();
|
||||
mockKernel.initialize(&desc);
|
||||
auto &arg = const_cast<NEO::ArgDescPointer &>(mockKernel.kernelImmData->getDescriptor().payloadMappings.explicitArgs[0].template as<NEO::ArgDescPointer>());
|
||||
arg.bindless = 0x40;
|
||||
arg.bindful = undefined<SurfaceStateHeapOffset>;
|
||||
const_cast<NEO::KernelDescriptor &>(mockKernel.kernelImmData->getDescriptor()).kernelAttributes.bufferAddressingMode = NEO::KernelDescriptor::BindlessAndStateless;
|
||||
const_cast<NEO::KernelDescriptor &>(mockKernel.kernelImmData->getDescriptor()).kernelAttributes.imageAddressingMode = NEO::KernelDescriptor::Bindless;
|
||||
const_cast<NEO::KernelDescriptor &>(mockKernel.kernelImmData->getDescriptor()).initBindlessOffsetToSurfaceState();
|
||||
|
||||
neoDevice->getExecutionEnvironment()->rootDeviceEnvironments[neoDevice->getRootDeviceIndex()]->createBindlessHeapsHelper(neoDevice->getMemoryManager(),
|
||||
neoDevice->getNumGenericSubDevices() > 1,
|
||||
neoDevice->getRootDeviceIndex(),
|
||||
neoDevice->getDeviceBitfield());
|
||||
|
||||
auto &gfxCoreHelper = device->getGfxCoreHelper();
|
||||
size_t size = gfxCoreHelper.getRenderSurfaceStateSize();
|
||||
uint64_t gpuAddress = 0x2000;
|
||||
void *buffer = reinterpret_cast<void *>(gpuAddress);
|
||||
|
||||
NEO::MockGraphicsAllocation mockAllocation(buffer, gpuAddress, size);
|
||||
auto expectedSsInHeap = device->getNEODevice()->getBindlessHeapsHelper()->allocateSSInHeap(size, &mockAllocation, NEO::BindlessHeapsHelper::GLOBAL_SSH);
|
||||
mockAllocation.setBindlessInfo(expectedSsInHeap);
|
||||
|
||||
memset(expectedSsInHeap.ssPtr, 0, size);
|
||||
|
||||
EXPECT_EQ(0u, mockKernel.getSurfaceStateHeapDataSize());
|
||||
EXPECT_FALSE(mockKernel.isBindlessOffsetSet[0]);
|
||||
EXPECT_FALSE(mockKernel.usingSurfaceStateHeap[0]);
|
||||
|
||||
mockKernel.setBufferSurfaceState(0, buffer, &mockAllocation);
|
||||
auto surfaceStateBefore = *reinterpret_cast<RENDER_SURFACE_STATE *>(expectedSsInHeap.ssPtr);
|
||||
|
||||
mockKernel.setBufferSurfaceState(0, ptrOffset(buffer, 8), &mockAllocation);
|
||||
auto surfaceStateAfter = *reinterpret_cast<RENDER_SURFACE_STATE *>(expectedSsInHeap.ssPtr);
|
||||
auto surfaceStateOnSsh = *reinterpret_cast<RENDER_SURFACE_STATE *>(mockKernel.surfaceStateHeapData.get());
|
||||
|
||||
EXPECT_TRUE(memcmp(&surfaceStateAfter, &surfaceStateBefore, size) == 0);
|
||||
|
||||
EXPECT_EQ(reinterpret_cast<uint64_t>(ptrOffset(buffer, 8)), surfaceStateOnSsh.getSurfaceBaseAddress());
|
||||
EXPECT_FALSE(mockKernel.isBindlessOffsetSet[0]);
|
||||
EXPECT_TRUE(mockKernel.usingSurfaceStateHeap[0]);
|
||||
EXPECT_EQ(mockKernel.surfaceStateHeapDataSize, mockKernel.getSurfaceStateHeapDataSize());
|
||||
}
|
||||
|
||||
HWTEST2_F(KernelImpPatchBindlessTest, GivenKernelImpWhenSetSurfaceStateBindfulThenSurfaceStateNotUpdated, MatchAny) {
|
||||
@@ -2481,6 +2539,7 @@ HWTEST2_F(SetKernelArg, givenImageAndBindlessKernelWhenSetArgImageThenCopySurfac
|
||||
auto expectedSsInHeap = imageHW->getAllocation()->getBindlessInfo();
|
||||
EXPECT_EQ(imageHW->passedSurfaceStateHeap, expectedSsInHeap.ssPtr);
|
||||
EXPECT_EQ(imageHW->passedSurfaceStateOffset, 0u);
|
||||
EXPECT_TRUE(kernel->isBindlessOffsetSet[3]);
|
||||
}
|
||||
|
||||
HWTEST2_F(SetKernelArg, givenBindlessKernelAndNoAvailableSpaceOnSshWhenSetArgImageCalledThenOutOfMemoryErrorReturned, ImageSupport) {
|
||||
@@ -2547,6 +2606,7 @@ HWTEST2_F(SetKernelArg, givenImageBindlessKernelAndGlobalBindlessHelperWhenSetAr
|
||||
auto expectedSsInHeap = imageHW->getAllocation()->getBindlessInfo();
|
||||
EXPECT_EQ(imageHW->passedRedescribedSurfaceStateHeap, ptrOffset(expectedSsInHeap.ssPtr, surfaceStateSize));
|
||||
EXPECT_EQ(imageHW->passedRedescribedSurfaceStateOffset, 0u);
|
||||
EXPECT_TRUE(kernel->isBindlessOffsetSet[3]);
|
||||
}
|
||||
|
||||
HWTEST2_F(SetKernelArg, givenImageAndBindlessKernelWhenSetArgRedescribedImageCalledThenCopySurfaceStateToSSHCalledWithCorrectArgs, ImageSupport) {
|
||||
@@ -3265,6 +3325,9 @@ TEST_F(BindlessKernelTest, givenBindlessKernelWhenPatchingCrossThreadDataThenCor
|
||||
argDescriptor2.as<NEO::ArgDescPointer>().stateless = 2 * sizeof(uint64_t);
|
||||
mockKernel.descriptor.payloadMappings.explicitArgs.push_back(argDescriptor2);
|
||||
|
||||
mockKernel.isBindlessOffsetSet.resize(4, 0);
|
||||
mockKernel.usingSurfaceStateHeap.resize(4, 0);
|
||||
|
||||
mockKernel.descriptor.initBindlessOffsetToSurfaceState();
|
||||
|
||||
mockKernel.crossThreadData = std::make_unique<uint8_t[]>(4 * sizeof(uint64_t));
|
||||
@@ -3287,6 +3350,59 @@ TEST_F(BindlessKernelTest, givenBindlessKernelWhenPatchingCrossThreadDataThenCor
|
||||
EXPECT_EQ(patchValue2, crossThreadData[1]);
|
||||
EXPECT_EQ(0u, crossThreadData[3]);
|
||||
}
|
||||
|
||||
TEST_F(BindlessKernelTest, givenBindlessKernelWithPatchedBindlessOffsetsWhenPatchingCrossThreadDataThenMemoryIsNotPatched) {
|
||||
Mock<Module> mockModule(this->device, nullptr);
|
||||
Mock<KernelImp> mockKernel;
|
||||
mockKernel.module = &mockModule;
|
||||
|
||||
mockKernel.descriptor.kernelAttributes.bufferAddressingMode = NEO::KernelDescriptor::BindlessAndStateless;
|
||||
mockKernel.descriptor.kernelAttributes.imageAddressingMode = NEO::KernelDescriptor::Bindless;
|
||||
|
||||
auto argDescriptor = NEO::ArgDescriptor(NEO::ArgDescriptor::ArgTPointer);
|
||||
argDescriptor.as<NEO::ArgDescPointer>() = NEO::ArgDescPointer();
|
||||
argDescriptor.as<NEO::ArgDescPointer>().bindful = NEO::undefined<NEO::SurfaceStateHeapOffset>;
|
||||
argDescriptor.as<NEO::ArgDescPointer>().bindless = 0x0;
|
||||
mockKernel.descriptor.payloadMappings.explicitArgs.push_back(argDescriptor);
|
||||
|
||||
auto argDescriptorImg = NEO::ArgDescriptor(NEO::ArgDescriptor::ArgTImage);
|
||||
argDescriptorImg.as<NEO::ArgDescImage>() = NEO::ArgDescImage();
|
||||
argDescriptorImg.as<NEO::ArgDescImage>().bindful = NEO::undefined<NEO::SurfaceStateHeapOffset>;
|
||||
argDescriptorImg.as<NEO::ArgDescImage>().bindless = sizeof(uint64_t);
|
||||
mockKernel.descriptor.payloadMappings.explicitArgs.push_back(argDescriptorImg);
|
||||
|
||||
auto argDescriptor2 = NEO::ArgDescriptor(NEO::ArgDescriptor::ArgTPointer);
|
||||
argDescriptor2.as<NEO::ArgDescPointer>() = NEO::ArgDescPointer();
|
||||
argDescriptor2.as<NEO::ArgDescPointer>().bindful = NEO::undefined<NEO::SurfaceStateHeapOffset>;
|
||||
argDescriptor2.as<NEO::ArgDescPointer>().stateless = 2 * sizeof(uint64_t);
|
||||
mockKernel.descriptor.payloadMappings.explicitArgs.push_back(argDescriptor2);
|
||||
|
||||
mockKernel.isBindlessOffsetSet.resize(4, 1);
|
||||
mockKernel.isBindlessOffsetSet[1] = false;
|
||||
|
||||
mockKernel.descriptor.initBindlessOffsetToSurfaceState();
|
||||
|
||||
mockKernel.crossThreadData = std::make_unique<uint8_t[]>(4 * sizeof(uint64_t));
|
||||
mockKernel.crossThreadDataSize = 4 * sizeof(uint64_t);
|
||||
memset(mockKernel.crossThreadData.get(), 0, mockKernel.crossThreadDataSize);
|
||||
|
||||
const uint64_t baseAddress = 0x1000;
|
||||
auto &gfxCoreHelper = this->device->getGfxCoreHelper();
|
||||
auto surfaceStateSize = gfxCoreHelper.getRenderSurfaceStateSize();
|
||||
|
||||
auto patchValue2 = gfxCoreHelper.getBindlessSurfaceExtendedMessageDescriptorValue(static_cast<uint32_t>(baseAddress + surfaceStateSize));
|
||||
|
||||
mockKernel.patchBindlessOffsetsInCrossThreadData(baseAddress);
|
||||
|
||||
auto crossThreadData = std::make_unique<uint64_t[]>(mockKernel.crossThreadDataSize / sizeof(uint64_t));
|
||||
memcpy(crossThreadData.get(), mockKernel.crossThreadData.get(), mockKernel.crossThreadDataSize);
|
||||
|
||||
EXPECT_EQ(0u, crossThreadData[0]);
|
||||
EXPECT_EQ(patchValue2, crossThreadData[1]);
|
||||
EXPECT_EQ(0u, crossThreadData[2]);
|
||||
EXPECT_EQ(0u, crossThreadData[3]);
|
||||
}
|
||||
|
||||
TEST_F(BindlessKernelTest, givenNoEntryInBindlessOffsetsMapWhenPatchingCrossThreadDataThenMemoryIsNotPatched) {
|
||||
Mock<Module> mockModule(this->device, nullptr);
|
||||
Mock<KernelImp> mockKernel;
|
||||
|
||||
@@ -115,12 +115,16 @@ void EncodeDispatchKernel<Family>::encode(CommandContainer &container, EncodeDis
|
||||
}
|
||||
} else {
|
||||
bool globalBindlessSsh = args.device->getBindlessHeapsHelper() != nullptr;
|
||||
if (!globalBindlessSsh && args.dispatchInterface->getSurfaceStateHeapDataSize() > 0u) {
|
||||
if (args.dispatchInterface->getSurfaceStateHeapDataSize() > 0u) {
|
||||
auto ssh = args.surfaceStateHeap;
|
||||
if (ssh == nullptr) {
|
||||
container.prepareBindfulSsh();
|
||||
ssh = container.getHeapWithRequiredSizeAndAlignment(HeapType::SURFACE_STATE, args.dispatchInterface->getSurfaceStateHeapDataSize(), BINDING_TABLE_STATE::SURFACESTATEPOINTER_ALIGN_SIZE);
|
||||
}
|
||||
uint64_t bindlessSshBaseOffset = ptrDiff(ssh->getSpace(0), ssh->getCpuBase());
|
||||
if (globalBindlessSsh) {
|
||||
bindlessSshBaseOffset += ptrDiff(ssh->getGraphicsAllocation()->getGpuAddress(), ssh->getGraphicsAllocation()->getGpuBaseAddress());
|
||||
}
|
||||
// Allocate space for new ssh data
|
||||
auto dstSurfaceState = ssh->getSpace(args.dispatchInterface->getSurfaceStateHeapDataSize());
|
||||
memcpy_s(dstSurfaceState, args.dispatchInterface->getSurfaceStateHeapDataSize(), args.dispatchInterface->getSurfaceStateHeapData(), args.dispatchInterface->getSurfaceStateHeapDataSize());
|
||||
|
||||
@@ -140,12 +140,16 @@ void EncodeDispatchKernel<Family>::encode(CommandContainer &container, EncodeDis
|
||||
}
|
||||
} else {
|
||||
bool globalBindlessSsh = args.device->getBindlessHeapsHelper() != nullptr;
|
||||
if (!globalBindlessSsh && args.dispatchInterface->getSurfaceStateHeapDataSize() > 0u) {
|
||||
if (args.dispatchInterface->getSurfaceStateHeapDataSize() > 0u) {
|
||||
auto ssh = args.surfaceStateHeap;
|
||||
if (ssh == nullptr) {
|
||||
container.prepareBindfulSsh();
|
||||
ssh = container.getHeapWithRequiredSizeAndAlignment(HeapType::SURFACE_STATE, args.dispatchInterface->getSurfaceStateHeapDataSize(), BINDING_TABLE_STATE::SURFACESTATEPOINTER_ALIGN_SIZE);
|
||||
}
|
||||
uint64_t bindlessSshBaseOffset = ptrDiff(ssh->getSpace(0), ssh->getCpuBase());
|
||||
if (globalBindlessSsh) {
|
||||
bindlessSshBaseOffset += ptrDiff(ssh->getGraphicsAllocation()->getGpuAddress(), ssh->getGraphicsAllocation()->getGpuBaseAddress());
|
||||
}
|
||||
// Allocate space for new ssh data
|
||||
auto dstSurfaceState = ssh->getSpace(args.dispatchInterface->getSurfaceStateHeapDataSize());
|
||||
memcpy_s(dstSurfaceState, args.dispatchInterface->getSurfaceStateHeapDataSize(), args.dispatchInterface->getSurfaceStateHeapData(), args.dispatchInterface->getSurfaceStateHeapDataSize());
|
||||
|
||||
Reference in New Issue
Block a user