mirror of
https://github.com/intel/compute-runtime.git
synced 2025-12-21 01:04:57 +08:00
feature: add inline samplers bindless addressing support
- inline samplers in bindless addressing mode requires bindless offset passed in cross thread data Related-To: NEO-11748 Signed-off-by: Mateusz Hoppe <mateusz.hoppe@intel.com>
This commit is contained in:
committed by
Compute-Runtime-Automation
parent
7136dfbd38
commit
4c49a08017
@@ -1414,6 +1414,26 @@ void KernelImp::patchSamplerBindlessOffsetsInCrossThreadData(uint64_t samplerSta
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for (size_t index = 0; index < kernelImmData->getDescriptor().inlineSamplers.size(); index++) {
|
||||
const auto &sampler = kernelImmData->getDescriptor().inlineSamplers[index];
|
||||
|
||||
auto crossThreadOffset = NEO::undefined<NEO::CrossThreadDataOffset>;
|
||||
if (sampler.bindless != NEO::undefined<NEO::CrossThreadDataOffset>) {
|
||||
crossThreadOffset = sampler.bindless;
|
||||
} else {
|
||||
continue;
|
||||
}
|
||||
|
||||
auto samplerIndex = sampler.samplerIndex;
|
||||
|
||||
if (samplerIndex < std::numeric_limits<uint8_t>::max()) {
|
||||
auto patchLocation = ptrOffset(crossThreadData, crossThreadOffset);
|
||||
auto surfaceStateOffset = static_cast<uint64_t>(samplerStateOffset + samplerIndex * samplerStateSize);
|
||||
auto patchValue = surfaceStateOffset;
|
||||
patchWithRequiredSize(const_cast<uint8_t *>(patchLocation), sampler.size, patchValue);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
uint32_t KernelImp::getSurfaceStateIndexForBindlessOffset(NEO::CrossThreadDataOffset bindlessOffset) const {
|
||||
|
||||
@@ -4018,5 +4018,68 @@ TEST_F(BindlessKernelTest, givenBindlessKernelWhenPatchingSamplerOffsetsInCrossT
|
||||
EXPECT_EQ(0u, crossThreadData[2]);
|
||||
}
|
||||
|
||||
TEST_F(BindlessKernelTest, givenBindlessKernelWithInlineSamplersWhenPatchingSamplerOffsetsInCrossThreadDataThenCorrectBindlessOffsetsAreWritten) {
|
||||
Mock<Module> mockModule(this->device, nullptr);
|
||||
Mock<KernelImp> mockKernel;
|
||||
mockKernel.module = &mockModule;
|
||||
|
||||
mockKernel.descriptor.kernelAttributes.bufferAddressingMode = NEO::KernelDescriptor::BindlessAndStateless;
|
||||
mockKernel.descriptor.kernelAttributes.imageAddressingMode = NEO::KernelDescriptor::Bindless;
|
||||
|
||||
auto argDescriptor = NEO::ArgDescriptor(NEO::ArgDescriptor::argTPointer);
|
||||
argDescriptor.as<NEO::ArgDescPointer>() = NEO::ArgDescPointer();
|
||||
argDescriptor.as<NEO::ArgDescPointer>().bindful = NEO::undefined<NEO::SurfaceStateHeapOffset>;
|
||||
argDescriptor.as<NEO::ArgDescPointer>().bindless = 0x0;
|
||||
mockKernel.descriptor.payloadMappings.explicitArgs.push_back(argDescriptor);
|
||||
|
||||
auto argDescriptor2 = NEO::ArgDescriptor(NEO::ArgDescriptor::argTPointer);
|
||||
argDescriptor2.as<NEO::ArgDescPointer>() = NEO::ArgDescPointer();
|
||||
argDescriptor2.as<NEO::ArgDescPointer>().bindful = NEO::undefined<NEO::SurfaceStateHeapOffset>;
|
||||
argDescriptor2.as<NEO::ArgDescPointer>().stateless = 2 * sizeof(uint64_t);
|
||||
mockKernel.descriptor.payloadMappings.explicitArgs.push_back(argDescriptor2);
|
||||
|
||||
mockKernel.descriptor.payloadMappings.implicitArgs.globalVariablesSurfaceAddress.bindless = 3 * sizeof(uint64_t);
|
||||
mockKernel.descriptor.payloadMappings.implicitArgs.globalConstantsSurfaceAddress.bindless = 4 * sizeof(uint64_t);
|
||||
|
||||
NEO::KernelDescriptor::InlineSampler inlineSampler = {};
|
||||
inlineSampler.samplerIndex = 0;
|
||||
inlineSampler.addrMode = NEO::KernelDescriptor::InlineSampler::AddrMode::clampBorder;
|
||||
inlineSampler.filterMode = NEO::KernelDescriptor::InlineSampler::FilterMode::linear;
|
||||
inlineSampler.isNormalized = true;
|
||||
inlineSampler.bindless = 5 * sizeof(uint64_t);
|
||||
inlineSampler.size = sizeof(uint64_t);
|
||||
mockKernel.descriptor.inlineSamplers.push_back(inlineSampler);
|
||||
|
||||
inlineSampler.samplerIndex = 1;
|
||||
inlineSampler.bindless = 6 * sizeof(uint64_t);
|
||||
inlineSampler.size = sizeof(uint64_t);
|
||||
mockKernel.descriptor.inlineSamplers.push_back(inlineSampler);
|
||||
|
||||
mockKernel.descriptor.payloadMappings.samplerTable.numSamplers = 2;
|
||||
mockKernel.isBindlessOffsetSet.resize(2, 0);
|
||||
mockKernel.usingSurfaceStateHeap.resize(2, 0);
|
||||
|
||||
mockKernel.descriptor.initBindlessOffsetToSurfaceState();
|
||||
|
||||
mockKernel.crossThreadData = std::make_unique<uint8_t[]>(7 * sizeof(uint64_t));
|
||||
mockKernel.crossThreadDataSize = 7 * sizeof(uint64_t);
|
||||
memset(mockKernel.crossThreadData.get(), 0, mockKernel.crossThreadDataSize);
|
||||
|
||||
const uint64_t baseAddress = 0x1000;
|
||||
auto &gfxCoreHelper = this->device->getGfxCoreHelper();
|
||||
auto samplerStateSize = gfxCoreHelper.getSamplerStateSize();
|
||||
|
||||
auto patchValue1 = (static_cast<uint32_t>(baseAddress + 0 * samplerStateSize));
|
||||
auto patchValue2 = (static_cast<uint32_t>(baseAddress + 1 * samplerStateSize));
|
||||
|
||||
mockKernel.patchSamplerBindlessOffsetsInCrossThreadData(baseAddress);
|
||||
|
||||
auto crossThreadData = std::make_unique<uint64_t[]>(mockKernel.crossThreadDataSize / sizeof(uint64_t));
|
||||
memcpy(crossThreadData.get(), mockKernel.crossThreadData.get(), mockKernel.crossThreadDataSize);
|
||||
|
||||
EXPECT_EQ(patchValue1, crossThreadData[5]);
|
||||
EXPECT_EQ(patchValue2, crossThreadData[6]);
|
||||
}
|
||||
|
||||
} // namespace ult
|
||||
} // namespace L0
|
||||
|
||||
Reference in New Issue
Block a user