feature: add inline samplers bindless addressing support

- inline samplers in bindless addressing mode requires bindless offset
passed in cross thread data

Related-To: NEO-11748

Signed-off-by: Mateusz Hoppe <mateusz.hoppe@intel.com>
This commit is contained in:
Mateusz Hoppe
2024-06-23 16:17:27 +00:00
committed by Compute-Runtime-Automation
parent 7136dfbd38
commit 4c49a08017
7 changed files with 169 additions and 3 deletions

View File

@@ -1414,6 +1414,26 @@ void KernelImp::patchSamplerBindlessOffsetsInCrossThreadData(uint64_t samplerSta
}
}
}
for (size_t index = 0; index < kernelImmData->getDescriptor().inlineSamplers.size(); index++) {
const auto &sampler = kernelImmData->getDescriptor().inlineSamplers[index];
auto crossThreadOffset = NEO::undefined<NEO::CrossThreadDataOffset>;
if (sampler.bindless != NEO::undefined<NEO::CrossThreadDataOffset>) {
crossThreadOffset = sampler.bindless;
} else {
continue;
}
auto samplerIndex = sampler.samplerIndex;
if (samplerIndex < std::numeric_limits<uint8_t>::max()) {
auto patchLocation = ptrOffset(crossThreadData, crossThreadOffset);
auto surfaceStateOffset = static_cast<uint64_t>(samplerStateOffset + samplerIndex * samplerStateSize);
auto patchValue = surfaceStateOffset;
patchWithRequiredSize(const_cast<uint8_t *>(patchLocation), sampler.size, patchValue);
}
}
}
uint32_t KernelImp::getSurfaceStateIndexForBindlessOffset(NEO::CrossThreadDataOffset bindlessOffset) const {

View File

@@ -4018,5 +4018,68 @@ TEST_F(BindlessKernelTest, givenBindlessKernelWhenPatchingSamplerOffsetsInCrossT
EXPECT_EQ(0u, crossThreadData[2]);
}
TEST_F(BindlessKernelTest, givenBindlessKernelWithInlineSamplersWhenPatchingSamplerOffsetsInCrossThreadDataThenCorrectBindlessOffsetsAreWritten) {
Mock<Module> mockModule(this->device, nullptr);
Mock<KernelImp> mockKernel;
mockKernel.module = &mockModule;
mockKernel.descriptor.kernelAttributes.bufferAddressingMode = NEO::KernelDescriptor::BindlessAndStateless;
mockKernel.descriptor.kernelAttributes.imageAddressingMode = NEO::KernelDescriptor::Bindless;
auto argDescriptor = NEO::ArgDescriptor(NEO::ArgDescriptor::argTPointer);
argDescriptor.as<NEO::ArgDescPointer>() = NEO::ArgDescPointer();
argDescriptor.as<NEO::ArgDescPointer>().bindful = NEO::undefined<NEO::SurfaceStateHeapOffset>;
argDescriptor.as<NEO::ArgDescPointer>().bindless = 0x0;
mockKernel.descriptor.payloadMappings.explicitArgs.push_back(argDescriptor);
auto argDescriptor2 = NEO::ArgDescriptor(NEO::ArgDescriptor::argTPointer);
argDescriptor2.as<NEO::ArgDescPointer>() = NEO::ArgDescPointer();
argDescriptor2.as<NEO::ArgDescPointer>().bindful = NEO::undefined<NEO::SurfaceStateHeapOffset>;
argDescriptor2.as<NEO::ArgDescPointer>().stateless = 2 * sizeof(uint64_t);
mockKernel.descriptor.payloadMappings.explicitArgs.push_back(argDescriptor2);
mockKernel.descriptor.payloadMappings.implicitArgs.globalVariablesSurfaceAddress.bindless = 3 * sizeof(uint64_t);
mockKernel.descriptor.payloadMappings.implicitArgs.globalConstantsSurfaceAddress.bindless = 4 * sizeof(uint64_t);
NEO::KernelDescriptor::InlineSampler inlineSampler = {};
inlineSampler.samplerIndex = 0;
inlineSampler.addrMode = NEO::KernelDescriptor::InlineSampler::AddrMode::clampBorder;
inlineSampler.filterMode = NEO::KernelDescriptor::InlineSampler::FilterMode::linear;
inlineSampler.isNormalized = true;
inlineSampler.bindless = 5 * sizeof(uint64_t);
inlineSampler.size = sizeof(uint64_t);
mockKernel.descriptor.inlineSamplers.push_back(inlineSampler);
inlineSampler.samplerIndex = 1;
inlineSampler.bindless = 6 * sizeof(uint64_t);
inlineSampler.size = sizeof(uint64_t);
mockKernel.descriptor.inlineSamplers.push_back(inlineSampler);
mockKernel.descriptor.payloadMappings.samplerTable.numSamplers = 2;
mockKernel.isBindlessOffsetSet.resize(2, 0);
mockKernel.usingSurfaceStateHeap.resize(2, 0);
mockKernel.descriptor.initBindlessOffsetToSurfaceState();
mockKernel.crossThreadData = std::make_unique<uint8_t[]>(7 * sizeof(uint64_t));
mockKernel.crossThreadDataSize = 7 * sizeof(uint64_t);
memset(mockKernel.crossThreadData.get(), 0, mockKernel.crossThreadDataSize);
const uint64_t baseAddress = 0x1000;
auto &gfxCoreHelper = this->device->getGfxCoreHelper();
auto samplerStateSize = gfxCoreHelper.getSamplerStateSize();
auto patchValue1 = (static_cast<uint32_t>(baseAddress + 0 * samplerStateSize));
auto patchValue2 = (static_cast<uint32_t>(baseAddress + 1 * samplerStateSize));
mockKernel.patchSamplerBindlessOffsetsInCrossThreadData(baseAddress);
auto crossThreadData = std::make_unique<uint64_t[]>(mockKernel.crossThreadDataSize / sizeof(uint64_t));
memcpy(crossThreadData.get(), mockKernel.crossThreadData.get(), mockKernel.crossThreadDataSize);
EXPECT_EQ(patchValue1, crossThreadData[5]);
EXPECT_EQ(patchValue2, crossThreadData[6]);
}
} // namespace ult
} // namespace L0