mirror of
https://github.com/intel/compute-runtime.git
synced 2025-09-15 13:01:45 +08:00
feature: use bindless samplers in heapless ocl
Related-To: NEO-12741 Signed-off-by: Kamil Kopryk <kamil.kopryk@intel.com>
This commit is contained in:

committed by
Compute-Runtime-Automation

parent
47fc1ce34b
commit
6926582915
@ -264,6 +264,11 @@ size_t HardwareCommandsHelper<GfxFamily>::sendIndirectState(
|
||||
samplerCount, samplerTable.borderColor,
|
||||
kernel.getDynamicStateHeap(), device.getBindlessHeapsHelper(),
|
||||
device.getRootDeviceEnvironment());
|
||||
if constexpr (heaplessModeEnabled) {
|
||||
uint64_t bindlessSamplerStateAddress = samplerStateOffset;
|
||||
bindlessSamplerStateAddress += dsh.getGraphicsAllocation()->getGpuAddress();
|
||||
kernel.patchBindlessSamplerStatesInCrossThreadData(bindlessSamplerStateAddress);
|
||||
}
|
||||
}
|
||||
|
||||
if constexpr (bindfulAllowed) {
|
||||
|
@ -68,6 +68,7 @@
|
||||
|
||||
#include <algorithm>
|
||||
#include <cstdint>
|
||||
#include <ranges>
|
||||
#include <vector>
|
||||
|
||||
using namespace iOpenCL;
|
||||
@ -1335,7 +1336,18 @@ void Kernel::setInlineSamplers() {
|
||||
errCode));
|
||||
UNRECOVERABLE_IF(errCode != CL_SUCCESS);
|
||||
|
||||
auto samplerState = ptrOffset(getDynamicStateHeap(), static_cast<size_t>(inlineSampler.getSamplerBindfulOffset()));
|
||||
void *samplerState = nullptr;
|
||||
auto dsh = const_cast<void *>(getDynamicStateHeap());
|
||||
|
||||
if (isValidOffset(inlineSampler.bindless)) {
|
||||
auto samplerStateIndex = inlineSampler.samplerIndex;
|
||||
auto &gfxCoreHelper = this->getGfxCoreHelper();
|
||||
auto samplerStateSize = gfxCoreHelper.getSamplerStateSize();
|
||||
auto offset = inlineSampler.borderColorStateSize;
|
||||
samplerState = ptrOffset(dsh, (samplerStateIndex * samplerStateSize) + offset);
|
||||
} else {
|
||||
samplerState = ptrOffset(dsh, static_cast<size_t>(inlineSampler.getSamplerBindfulOffset()));
|
||||
}
|
||||
sampler->setArg(const_cast<void *>(samplerState), clDevice.getRootDeviceEnvironment());
|
||||
}
|
||||
}
|
||||
@ -1792,8 +1804,19 @@ cl_int Kernel::setArgSampler(uint32_t argIndex,
|
||||
|
||||
storeKernelArg(argIndex, SAMPLER_OBJ, clSamplerObj, argVal, argSize);
|
||||
|
||||
void *samplerState = nullptr;
|
||||
auto dsh = getDynamicStateHeap();
|
||||
auto samplerState = ptrOffset(dsh, argAsSmp.bindful);
|
||||
|
||||
if (isValidOffset(argAsSmp.bindless)) {
|
||||
auto samplerStateIndex = argAsSmp.index;
|
||||
auto &gfxCoreHelper = this->getGfxCoreHelper();
|
||||
auto samplerStateSize = gfxCoreHelper.getSamplerStateSize();
|
||||
const auto offset = kernelInfo.kernelDescriptor.payloadMappings.samplerTable.tableOffset;
|
||||
samplerState = ptrOffset(const_cast<void *>(dsh), (samplerStateIndex * samplerStateSize) + offset);
|
||||
} else {
|
||||
DEBUG_BREAK_IF(isUndefinedOffset(argAsSmp.bindful));
|
||||
samplerState = ptrOffset(const_cast<void *>(dsh), argAsSmp.bindful);
|
||||
}
|
||||
|
||||
pSampler->setArg(const_cast<void *>(samplerState), clDevice.getRootDeviceEnvironment());
|
||||
|
||||
@ -2187,6 +2210,35 @@ void Kernel::patchBindlessSurfaceStatesInCrossThreadData(uint64_t bindlessSurfac
|
||||
}
|
||||
}
|
||||
|
||||
void Kernel::patchBindlessSamplerStatesInCrossThreadData(uint64_t bindlessSamplerStatesBaseAddress) const {
|
||||
auto &gfxCoreHelper = this->getGfxCoreHelper();
|
||||
const auto samplerStateSize = gfxCoreHelper.getSamplerStateSize();
|
||||
auto *crossThreadDataPtr = reinterpret_cast<uint8_t *>(getCrossThreadData());
|
||||
|
||||
auto samplerArgs = std::ranges::subrange(kernelInfo.kernelDescriptor.payloadMappings.explicitArgs) | std::views::filter([](const auto &arg) {
|
||||
return (arg.type == NEO::ArgDescriptor::argTSampler) && NEO::isValidOffset(arg.template as<NEO::ArgDescSampler>().bindless);
|
||||
});
|
||||
|
||||
for (auto &arg : samplerArgs) {
|
||||
auto &sampler = arg.template as<NEO::ArgDescSampler>();
|
||||
auto patchLocation = ptrOffset(crossThreadDataPtr, sampler.bindless);
|
||||
auto samplerStateAddress = static_cast<uint64_t>(bindlessSamplerStatesBaseAddress + sampler.index * samplerStateSize);
|
||||
auto patchValue = samplerStateAddress;
|
||||
patchWithRequiredSize(patchLocation, sampler.size, patchValue);
|
||||
}
|
||||
|
||||
auto inlineSamplers = kernelInfo.kernelDescriptor.inlineSamplers | std::views::filter([](const auto &sampler) {
|
||||
return (NEO::isValidOffset(sampler.bindless));
|
||||
});
|
||||
|
||||
for (auto &sampler : inlineSamplers) {
|
||||
auto patchLocation = ptrOffset(crossThreadDataPtr, sampler.bindless);
|
||||
auto samplerStateAddress = static_cast<uint64_t>(bindlessSamplerStatesBaseAddress + sampler.samplerIndex * samplerStateSize);
|
||||
auto patchValue = samplerStateAddress;
|
||||
patchWithRequiredSize(patchLocation, sampler.size, patchValue);
|
||||
}
|
||||
}
|
||||
|
||||
void Kernel::setAdditionalKernelExecInfo(uint32_t additionalKernelExecInfo) {
|
||||
this->additionalKernelExecInfo = additionalKernelExecInfo;
|
||||
}
|
||||
|
@ -220,6 +220,8 @@ class Kernel : public ReferenceTrackedObject<Kernel> {
|
||||
template <bool heaplessEnabled>
|
||||
void patchBindlessSurfaceStatesInCrossThreadData(uint64_t bindlessSurfaceStatesBaseAddress) const;
|
||||
|
||||
void patchBindlessSamplerStatesInCrossThreadData(uint64_t bindlessSamplerStatesBaseAddress) const;
|
||||
|
||||
// Helpers
|
||||
cl_int setArg(uint32_t argIndex, uint32_t argValue);
|
||||
cl_int setArg(uint32_t argIndex, uint64_t argValue);
|
||||
|
@ -571,6 +571,66 @@ TEST_F(BindlessKernelTests, givenBindlessKernelWhenPatchBindlessSurfaceStatesInC
|
||||
EXPECT_EQ(globalConstantsSurfaceAddress, crossThreadData[4]);
|
||||
}
|
||||
|
||||
HWTEST_F(BindlessKernelTests, givenBindlessKernelAndSamplersWhenPatchBindlessSamplerStatesInCrossThreadDataThenCorrectAddressesAreWritten) {
|
||||
auto argDescriptorSampler1 = NEO::ArgDescriptor(NEO::ArgDescriptor::argTSampler);
|
||||
argDescriptorSampler1.as<NEO::ArgDescSampler>() = NEO::ArgDescSampler();
|
||||
argDescriptorSampler1.as<NEO::ArgDescSampler>().bindful = NEO::undefined<NEO::SurfaceStateHeapOffset>;
|
||||
argDescriptorSampler1.as<NEO::ArgDescSampler>().bindless = 0x0;
|
||||
argDescriptorSampler1.as<NEO::ArgDescSampler>().size = 8;
|
||||
argDescriptorSampler1.as<NEO::ArgDescSampler>().index = 0;
|
||||
|
||||
auto argDescriptorSampler2 = NEO::ArgDescriptor(NEO::ArgDescriptor::argTSampler);
|
||||
argDescriptorSampler2.as<NEO::ArgDescSampler>() = NEO::ArgDescSampler();
|
||||
argDescriptorSampler2.as<NEO::ArgDescSampler>().bindful = NEO::undefined<NEO::SurfaceStateHeapOffset>;
|
||||
argDescriptorSampler2.as<NEO::ArgDescSampler>().bindless = sizeof(uint64_t);
|
||||
argDescriptorSampler2.as<NEO::ArgDescSampler>().size = 8;
|
||||
argDescriptorSampler2.as<NEO::ArgDescSampler>().index = 1;
|
||||
|
||||
KernelInfo kernelInfo = {};
|
||||
pProgram->mockKernelInfo.kernelDescriptor.kernelAttributes.samplerAddressingMode = NEO::KernelDescriptor::Bindless;
|
||||
pProgram->mockKernelInfo.kernelDescriptor.payloadMappings.explicitArgs.push_back(argDescriptorSampler1);
|
||||
pProgram->mockKernelInfo.kernelDescriptor.payloadMappings.explicitArgs.push_back(argDescriptorSampler2);
|
||||
|
||||
auto &inlineSampler = pProgram->mockKernelInfo.kernelDescriptor.inlineSamplers.emplace_back();
|
||||
inlineSampler.addrMode = NEO::KernelDescriptor::InlineSampler::AddrMode::repeat;
|
||||
inlineSampler.filterMode = NEO::KernelDescriptor::InlineSampler::FilterMode::nearest;
|
||||
inlineSampler.isNormalized = false;
|
||||
inlineSampler.bindless = 2 * sizeof(uint64_t);
|
||||
inlineSampler.samplerIndex = 2;
|
||||
inlineSampler.size = 8;
|
||||
|
||||
const uint32_t borderColorSize = inlineSampler.borderColorStateSize;
|
||||
pProgram->mockKernelInfo.kernelDescriptor.payloadMappings.samplerTable.tableOffset = borderColorSize;
|
||||
|
||||
MockKernel mockKernel(pProgram, pProgram->mockKernelInfo, *pClDevice);
|
||||
using SamplerState = typename FamilyType::SAMPLER_STATE;
|
||||
std::array<uint8_t, 64 + 3 * sizeof(SamplerState)> dsh = {0};
|
||||
pProgram->mockKernelInfo.heapInfo.pDsh = dsh.data();
|
||||
pProgram->mockKernelInfo.heapInfo.dynamicStateHeapSize = static_cast<uint32_t>(dsh.size());
|
||||
mockKernel.crossThreadData = new char[3 * sizeof(uint64_t)];
|
||||
mockKernel.crossThreadDataSize = 3 * sizeof(uint64_t);
|
||||
memset(mockKernel.crossThreadData, 0x00, mockKernel.crossThreadDataSize);
|
||||
|
||||
const uint64_t baseAddress = reinterpret_cast<uint64_t>(dsh.data()) + borderColorSize;
|
||||
|
||||
auto &gfxCoreHelper = pClDevice->getGfxCoreHelper();
|
||||
auto samplerStateSize = gfxCoreHelper.getSamplerStateSize();
|
||||
|
||||
auto bindlessSamplerState1Address = baseAddress;
|
||||
auto bindlessSamplerState2Address = baseAddress + 1 * samplerStateSize;
|
||||
auto bindlessInlineSamplerStateAddress = baseAddress + inlineSampler.samplerIndex * samplerStateSize;
|
||||
|
||||
mockKernel.setInlineSamplers();
|
||||
mockKernel.patchBindlessSamplerStatesInCrossThreadData(baseAddress);
|
||||
|
||||
auto crossThreadData = std::make_unique<uint64_t[]>(mockKernel.crossThreadDataSize / sizeof(uint64_t));
|
||||
memcpy(crossThreadData.get(), mockKernel.crossThreadData, mockKernel.crossThreadDataSize);
|
||||
|
||||
EXPECT_EQ(bindlessSamplerState1Address, crossThreadData[0]);
|
||||
EXPECT_EQ(bindlessSamplerState2Address, crossThreadData[1]);
|
||||
EXPECT_EQ(bindlessInlineSamplerStateAddress, crossThreadData[2]);
|
||||
}
|
||||
|
||||
TEST_F(BindlessKernelTests, givenNoEntryInBindlessOffsetsMapWhenPatchingCrossThreadDataThenMemoryIsNotPatched) {
|
||||
pProgram->mockKernelInfo.kernelDescriptor.kernelAttributes.bufferAddressingMode = NEO::KernelDescriptor::BindlessAndStateless;
|
||||
pProgram->mockKernelInfo.kernelDescriptor.kernelAttributes.imageAddressingMode = NEO::KernelDescriptor::Bindless;
|
||||
|
Reference in New Issue
Block a user