feature: add inline samplers bindless addressing support
- inline samplers in bindless addressing mode requires bindless offset passed in cross thread data Related-To: NEO-11748 Signed-off-by: Mateusz Hoppe <mateusz.hoppe@intel.com>
This commit is contained in:
parent
7136dfbd38
commit
4c49a08017
|
@ -1414,6 +1414,26 @@ void KernelImp::patchSamplerBindlessOffsetsInCrossThreadData(uint64_t samplerSta
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
for (size_t index = 0; index < kernelImmData->getDescriptor().inlineSamplers.size(); index++) {
|
||||
const auto &sampler = kernelImmData->getDescriptor().inlineSamplers[index];
|
||||
|
||||
auto crossThreadOffset = NEO::undefined<NEO::CrossThreadDataOffset>;
|
||||
if (sampler.bindless != NEO::undefined<NEO::CrossThreadDataOffset>) {
|
||||
crossThreadOffset = sampler.bindless;
|
||||
} else {
|
||||
continue;
|
||||
}
|
||||
|
||||
auto samplerIndex = sampler.samplerIndex;
|
||||
|
||||
if (samplerIndex < std::numeric_limits<uint8_t>::max()) {
|
||||
auto patchLocation = ptrOffset(crossThreadData, crossThreadOffset);
|
||||
auto surfaceStateOffset = static_cast<uint64_t>(samplerStateOffset + samplerIndex * samplerStateSize);
|
||||
auto patchValue = surfaceStateOffset;
|
||||
patchWithRequiredSize(const_cast<uint8_t *>(patchLocation), sampler.size, patchValue);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
uint32_t KernelImp::getSurfaceStateIndexForBindlessOffset(NEO::CrossThreadDataOffset bindlessOffset) const {
|
||||
|
|
|
@ -4018,5 +4018,68 @@ TEST_F(BindlessKernelTest, givenBindlessKernelWhenPatchingSamplerOffsetsInCrossT
|
|||
EXPECT_EQ(0u, crossThreadData[2]);
|
||||
}
|
||||
|
||||
TEST_F(BindlessKernelTest, givenBindlessKernelWithInlineSamplersWhenPatchingSamplerOffsetsInCrossThreadDataThenCorrectBindlessOffsetsAreWritten) {
|
||||
Mock<Module> mockModule(this->device, nullptr);
|
||||
Mock<KernelImp> mockKernel;
|
||||
mockKernel.module = &mockModule;
|
||||
|
||||
mockKernel.descriptor.kernelAttributes.bufferAddressingMode = NEO::KernelDescriptor::BindlessAndStateless;
|
||||
mockKernel.descriptor.kernelAttributes.imageAddressingMode = NEO::KernelDescriptor::Bindless;
|
||||
|
||||
auto argDescriptor = NEO::ArgDescriptor(NEO::ArgDescriptor::argTPointer);
|
||||
argDescriptor.as<NEO::ArgDescPointer>() = NEO::ArgDescPointer();
|
||||
argDescriptor.as<NEO::ArgDescPointer>().bindful = NEO::undefined<NEO::SurfaceStateHeapOffset>;
|
||||
argDescriptor.as<NEO::ArgDescPointer>().bindless = 0x0;
|
||||
mockKernel.descriptor.payloadMappings.explicitArgs.push_back(argDescriptor);
|
||||
|
||||
auto argDescriptor2 = NEO::ArgDescriptor(NEO::ArgDescriptor::argTPointer);
|
||||
argDescriptor2.as<NEO::ArgDescPointer>() = NEO::ArgDescPointer();
|
||||
argDescriptor2.as<NEO::ArgDescPointer>().bindful = NEO::undefined<NEO::SurfaceStateHeapOffset>;
|
||||
argDescriptor2.as<NEO::ArgDescPointer>().stateless = 2 * sizeof(uint64_t);
|
||||
mockKernel.descriptor.payloadMappings.explicitArgs.push_back(argDescriptor2);
|
||||
|
||||
mockKernel.descriptor.payloadMappings.implicitArgs.globalVariablesSurfaceAddress.bindless = 3 * sizeof(uint64_t);
|
||||
mockKernel.descriptor.payloadMappings.implicitArgs.globalConstantsSurfaceAddress.bindless = 4 * sizeof(uint64_t);
|
||||
|
||||
NEO::KernelDescriptor::InlineSampler inlineSampler = {};
|
||||
inlineSampler.samplerIndex = 0;
|
||||
inlineSampler.addrMode = NEO::KernelDescriptor::InlineSampler::AddrMode::clampBorder;
|
||||
inlineSampler.filterMode = NEO::KernelDescriptor::InlineSampler::FilterMode::linear;
|
||||
inlineSampler.isNormalized = true;
|
||||
inlineSampler.bindless = 5 * sizeof(uint64_t);
|
||||
inlineSampler.size = sizeof(uint64_t);
|
||||
mockKernel.descriptor.inlineSamplers.push_back(inlineSampler);
|
||||
|
||||
inlineSampler.samplerIndex = 1;
|
||||
inlineSampler.bindless = 6 * sizeof(uint64_t);
|
||||
inlineSampler.size = sizeof(uint64_t);
|
||||
mockKernel.descriptor.inlineSamplers.push_back(inlineSampler);
|
||||
|
||||
mockKernel.descriptor.payloadMappings.samplerTable.numSamplers = 2;
|
||||
mockKernel.isBindlessOffsetSet.resize(2, 0);
|
||||
mockKernel.usingSurfaceStateHeap.resize(2, 0);
|
||||
|
||||
mockKernel.descriptor.initBindlessOffsetToSurfaceState();
|
||||
|
||||
mockKernel.crossThreadData = std::make_unique<uint8_t[]>(7 * sizeof(uint64_t));
|
||||
mockKernel.crossThreadDataSize = 7 * sizeof(uint64_t);
|
||||
memset(mockKernel.crossThreadData.get(), 0, mockKernel.crossThreadDataSize);
|
||||
|
||||
const uint64_t baseAddress = 0x1000;
|
||||
auto &gfxCoreHelper = this->device->getGfxCoreHelper();
|
||||
auto samplerStateSize = gfxCoreHelper.getSamplerStateSize();
|
||||
|
||||
auto patchValue1 = (static_cast<uint32_t>(baseAddress + 0 * samplerStateSize));
|
||||
auto patchValue2 = (static_cast<uint32_t>(baseAddress + 1 * samplerStateSize));
|
||||
|
||||
mockKernel.patchSamplerBindlessOffsetsInCrossThreadData(baseAddress);
|
||||
|
||||
auto crossThreadData = std::make_unique<uint64_t[]>(mockKernel.crossThreadDataSize / sizeof(uint64_t));
|
||||
memcpy(crossThreadData.get(), mockKernel.crossThreadData.get(), mockKernel.crossThreadDataSize);
|
||||
|
||||
EXPECT_EQ(patchValue1, crossThreadData[5]);
|
||||
EXPECT_EQ(patchValue2, crossThreadData[6]);
|
||||
}
|
||||
|
||||
} // namespace ult
|
||||
} // namespace L0
|
||||
|
|
|
@ -129,6 +129,7 @@ inline constexpr ConstStringRef regionGroupSize("region_group_size");
|
|||
inline constexpr ConstStringRef regionGroupDimension("region_group_dimension");
|
||||
inline constexpr ConstStringRef regionGroupWgCount("region_group_wg_count");
|
||||
inline constexpr ConstStringRef regionGroupBarrierBuffer("region_group_barrier_buffer");
|
||||
inline constexpr ConstStringRef inlineSampler("inline_sampler");
|
||||
|
||||
namespace Image {
|
||||
inline constexpr ConstStringRef width("image_width");
|
||||
|
@ -508,6 +509,7 @@ enum ArgType : uint8_t {
|
|||
argTypeRegionGroupDimension,
|
||||
argTypeRegionGroupWgCount,
|
||||
argTypeRegionGroupBarrierBuffer,
|
||||
argTypeInlineSampler,
|
||||
argTypeMax
|
||||
};
|
||||
|
||||
|
|
|
@ -536,12 +536,12 @@ DecodeError decodeZeInfoKernelEntry(NEO::KernelDescriptor &dst, NEO::Yaml::YamlP
|
|||
return decodeError;
|
||||
}
|
||||
|
||||
decodeError = decodeZeInfoKernelPayloadArguments(dst, yamlParser, zeInfokernelSections, outErrReason, outWarning);
|
||||
decodeError = decodeZeInfoKernelInlineSamplers(dst, yamlParser, zeInfokernelSections, outErrReason, outWarning);
|
||||
if (DecodeError::success != decodeError) {
|
||||
return decodeError;
|
||||
}
|
||||
|
||||
decodeError = decodeZeInfoKernelInlineSamplers(dst, yamlParser, zeInfokernelSections, outErrReason, outWarning);
|
||||
decodeError = decodeZeInfoKernelPayloadArguments(dst, yamlParser, zeInfokernelSections, outErrReason, outWarning);
|
||||
if (DecodeError::success != decodeError) {
|
||||
return decodeError;
|
||||
}
|
||||
|
@ -1111,6 +1111,24 @@ DecodeError populateKernelPayloadArgument(NEO::KernelDescriptor &dst, const Kern
|
|||
return DecodeError::success;
|
||||
};
|
||||
|
||||
auto populateInlineSampler = [&src, &outErrReason, &kernelName](auto &dst, ConstStringRef typeName) {
|
||||
if (dst.payloadMappings.samplerTable.numSamplers < src.samplerIndex) {
|
||||
outErrReason.append("DeviceBinaryFormat::zebin : Invalid sampler index for argument of type " + typeName.str() + " in context of : " + kernelName + ".\n");
|
||||
return DecodeError::invalidBinary;
|
||||
}
|
||||
|
||||
if (src.addrmode == Types::Kernel::PayloadArgument::memoryAddressingModeBindless) {
|
||||
for (auto &sampler : dst.inlineSamplers) {
|
||||
if (static_cast<Types::Kernel::PayloadArgument::SamplerIndexT>(sampler.samplerIndex) == src.samplerIndex) {
|
||||
sampler.bindless = src.offset;
|
||||
sampler.size = src.size;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
return DecodeError::success;
|
||||
};
|
||||
|
||||
switch (src.argType) {
|
||||
default:
|
||||
outErrReason.append("DeviceBinaryFormat::zebin : Invalid arg type in cross thread data section in context of : " + kernelName + ".\n");
|
||||
|
@ -1398,6 +1416,9 @@ DecodeError populateKernelPayloadArgument(NEO::KernelDescriptor &dst, const Kern
|
|||
case Types::Kernel::argTypeRegionGroupBarrierBuffer:
|
||||
dst.kernelAttributes.flags.usesRegionGroupBarrier = true;
|
||||
return populateArgPointerStateless(dst.payloadMappings.implicitArgs.regionGroupBarrierBuffer);
|
||||
|
||||
case Types::Kernel::argTypeInlineSampler:
|
||||
return populateInlineSampler(dst, Tags::Kernel::PayloadArgument::ArgType::inlineSampler);
|
||||
}
|
||||
|
||||
UNREACHABLE();
|
||||
|
|
|
@ -22,7 +22,7 @@ using namespace Tags::Kernel::PayloadArgument::ArgType::Sampler::Vme;
|
|||
using ArgType = Types::Kernel::ArgType;
|
||||
|
||||
inline constexpr ConstStringRef name = "argument type";
|
||||
inline constexpr LookupArray<ConstStringRef, ArgType, 45> lookup({{
|
||||
inline constexpr LookupArray<ConstStringRef, ArgType, 46> lookup({{
|
||||
{packedLocalIds, ArgType::argTypePackedLocalIds},
|
||||
{localId, ArgType::argTypeLocalId},
|
||||
{localSize, ArgType::argTypeLocalSize},
|
||||
|
@ -68,6 +68,7 @@ inline constexpr LookupArray<ConstStringRef, ArgType, 45> lookup({{
|
|||
{regionGroupDimension, ArgType::argTypeRegionGroupDimension},
|
||||
{regionGroupWgCount, ArgType::argTypeRegionGroupWgCount},
|
||||
{regionGroupBarrierBuffer, ArgType::argTypeRegionGroupBarrierBuffer},
|
||||
{inlineSampler, ArgType::argTypeInlineSampler},
|
||||
}});
|
||||
static_assert(lookup.size() == ArgType::argTypeMax - 1, "Every enum field must be present");
|
||||
} // namespace ArgType
|
||||
|
|
|
@ -234,6 +234,9 @@ struct KernelDescriptor {
|
|||
bool isNormalized;
|
||||
AddrMode addrMode;
|
||||
FilterMode filterMode;
|
||||
CrossThreadDataOffset bindless = undefined<CrossThreadDataOffset>;
|
||||
uint8_t size = undefined<uint8_t>;
|
||||
|
||||
constexpr uint32_t getSamplerBindfulOffset() const {
|
||||
return borderColorStateSize + samplerStateSize * samplerIndex;
|
||||
}
|
||||
|
|
|
@ -6814,6 +6814,9 @@ kernels:
|
|||
EXPECT_EQ(NEO::KernelDescriptor::InlineSampler::AddrMode::clampEdge, inlineSampler.addrMode);
|
||||
EXPECT_EQ(NEO::KernelDescriptor::InlineSampler::FilterMode::nearest, inlineSampler.filterMode);
|
||||
EXPECT_TRUE(inlineSampler.isNormalized);
|
||||
|
||||
EXPECT_EQ(undefined<CrossThreadDataOffset>, inlineSampler.bindless);
|
||||
EXPECT_EQ(undefined<uint8_t>, inlineSampler.size);
|
||||
}
|
||||
|
||||
TEST_F(decodeZeInfoKernelEntryTest, GivenInvalidInlineSamplersEntryThenPopulateKernelDescriptorFails) {
|
||||
|
@ -6848,6 +6851,59 @@ kernels:
|
|||
EXPECT_EQ(NEO::DecodeError::invalidBinary, err);
|
||||
}
|
||||
|
||||
TEST_F(decodeZeInfoKernelEntryTest, GivenBindlessInlineSamplersThenPopulateKernelDescriptorSetsBindlessOffsetAndSize) {
|
||||
ConstStringRef zeinfo = R"===(
|
||||
kernels:
|
||||
- name : some_kernel
|
||||
execution_env:
|
||||
simd_size: 8
|
||||
has_sample: true
|
||||
payload_arguments:
|
||||
- arg_type: inline_sampler
|
||||
offset: 40
|
||||
size: 4
|
||||
addrmode: bindless
|
||||
addrspace: sampler
|
||||
sampler_index: 0
|
||||
- arg_type: inline_sampler
|
||||
offset: 44
|
||||
size: 4
|
||||
addrmode: bindless
|
||||
addrspace: sampler
|
||||
sampler_index: 1
|
||||
inline_samplers:
|
||||
- sampler_index: 1
|
||||
addrmode: none
|
||||
filtermode: nearest
|
||||
- sampler_index: 0
|
||||
addrmode: repeat
|
||||
filtermode: linear
|
||||
normalized: true
|
||||
...
|
||||
)===";
|
||||
auto err = decodeZeInfoKernelEntry(zeinfo);
|
||||
EXPECT_EQ(NEO::DecodeError::success, err);
|
||||
EXPECT_TRUE(errors.empty());
|
||||
|
||||
ASSERT_EQ(2U, kernelDescriptor->inlineSamplers.size());
|
||||
|
||||
const auto &inlineSampler1 = kernelDescriptor->inlineSamplers[0];
|
||||
EXPECT_EQ(1U, inlineSampler1.samplerIndex);
|
||||
EXPECT_EQ(NEO::KernelDescriptor::InlineSampler::AddrMode::none, inlineSampler1.addrMode);
|
||||
EXPECT_EQ(NEO::KernelDescriptor::InlineSampler::FilterMode::nearest, inlineSampler1.filterMode);
|
||||
EXPECT_FALSE(inlineSampler1.isNormalized);
|
||||
EXPECT_EQ(44u, inlineSampler1.bindless);
|
||||
EXPECT_EQ(4u, inlineSampler1.size);
|
||||
|
||||
const auto &inlineSampler0 = kernelDescriptor->inlineSamplers[1];
|
||||
EXPECT_EQ(0U, inlineSampler0.samplerIndex);
|
||||
EXPECT_EQ(NEO::KernelDescriptor::InlineSampler::AddrMode::repeat, inlineSampler0.addrMode);
|
||||
EXPECT_EQ(NEO::KernelDescriptor::InlineSampler::FilterMode::linear, inlineSampler0.filterMode);
|
||||
EXPECT_TRUE(inlineSampler0.isNormalized);
|
||||
EXPECT_EQ(40u, inlineSampler0.bindless);
|
||||
EXPECT_EQ(4u, inlineSampler0.size);
|
||||
}
|
||||
|
||||
TEST_F(decodeZeInfoKernelEntryTest, givenGlobalBufferAndConstBufferWhenPopulatingKernelDescriptorThenPopulateThemProperly) {
|
||||
NEO::ConstStringRef zeinfo = R"===(
|
||||
kernels:
|
||||
|
|
Loading…
Reference in New Issue