feature: add inline samplers bindless addressing support

- inline samplers in bindless addressing mode requires bindless offset
passed in cross thread data

Related-To: NEO-11748

Signed-off-by: Mateusz Hoppe <mateusz.hoppe@intel.com>
This commit is contained in:
Mateusz Hoppe
2024-06-23 16:17:27 +00:00
committed by Compute-Runtime-Automation
parent 7136dfbd38
commit 4c49a08017
7 changed files with 169 additions and 3 deletions

View File

@@ -129,6 +129,7 @@ inline constexpr ConstStringRef regionGroupSize("region_group_size");
inline constexpr ConstStringRef regionGroupDimension("region_group_dimension");
inline constexpr ConstStringRef regionGroupWgCount("region_group_wg_count");
inline constexpr ConstStringRef regionGroupBarrierBuffer("region_group_barrier_buffer");
inline constexpr ConstStringRef inlineSampler("inline_sampler");
namespace Image {
inline constexpr ConstStringRef width("image_width");
@@ -508,6 +509,7 @@ enum ArgType : uint8_t {
argTypeRegionGroupDimension,
argTypeRegionGroupWgCount,
argTypeRegionGroupBarrierBuffer,
argTypeInlineSampler,
argTypeMax
};

View File

@@ -536,12 +536,12 @@ DecodeError decodeZeInfoKernelEntry(NEO::KernelDescriptor &dst, NEO::Yaml::YamlP
return decodeError;
}
decodeError = decodeZeInfoKernelPayloadArguments(dst, yamlParser, zeInfokernelSections, outErrReason, outWarning);
decodeError = decodeZeInfoKernelInlineSamplers(dst, yamlParser, zeInfokernelSections, outErrReason, outWarning);
if (DecodeError::success != decodeError) {
return decodeError;
}
decodeError = decodeZeInfoKernelInlineSamplers(dst, yamlParser, zeInfokernelSections, outErrReason, outWarning);
decodeError = decodeZeInfoKernelPayloadArguments(dst, yamlParser, zeInfokernelSections, outErrReason, outWarning);
if (DecodeError::success != decodeError) {
return decodeError;
}
@@ -1111,6 +1111,24 @@ DecodeError populateKernelPayloadArgument(NEO::KernelDescriptor &dst, const Kern
return DecodeError::success;
};
auto populateInlineSampler = [&src, &outErrReason, &kernelName](auto &dst, ConstStringRef typeName) {
if (dst.payloadMappings.samplerTable.numSamplers < src.samplerIndex) {
outErrReason.append("DeviceBinaryFormat::zebin : Invalid sampler index for argument of type " + typeName.str() + " in context of : " + kernelName + ".\n");
return DecodeError::invalidBinary;
}
if (src.addrmode == Types::Kernel::PayloadArgument::memoryAddressingModeBindless) {
for (auto &sampler : dst.inlineSamplers) {
if (static_cast<Types::Kernel::PayloadArgument::SamplerIndexT>(sampler.samplerIndex) == src.samplerIndex) {
sampler.bindless = src.offset;
sampler.size = src.size;
break;
}
}
}
return DecodeError::success;
};
switch (src.argType) {
default:
outErrReason.append("DeviceBinaryFormat::zebin : Invalid arg type in cross thread data section in context of : " + kernelName + ".\n");
@@ -1398,6 +1416,9 @@ DecodeError populateKernelPayloadArgument(NEO::KernelDescriptor &dst, const Kern
case Types::Kernel::argTypeRegionGroupBarrierBuffer:
dst.kernelAttributes.flags.usesRegionGroupBarrier = true;
return populateArgPointerStateless(dst.payloadMappings.implicitArgs.regionGroupBarrierBuffer);
case Types::Kernel::argTypeInlineSampler:
return populateInlineSampler(dst, Tags::Kernel::PayloadArgument::ArgType::inlineSampler);
}
UNREACHABLE();

View File

@@ -22,7 +22,7 @@ using namespace Tags::Kernel::PayloadArgument::ArgType::Sampler::Vme;
using ArgType = Types::Kernel::ArgType;
inline constexpr ConstStringRef name = "argument type";
inline constexpr LookupArray<ConstStringRef, ArgType, 45> lookup({{
inline constexpr LookupArray<ConstStringRef, ArgType, 46> lookup({{
{packedLocalIds, ArgType::argTypePackedLocalIds},
{localId, ArgType::argTypeLocalId},
{localSize, ArgType::argTypeLocalSize},
@@ -68,6 +68,7 @@ inline constexpr LookupArray<ConstStringRef, ArgType, 45> lookup({{
{regionGroupDimension, ArgType::argTypeRegionGroupDimension},
{regionGroupWgCount, ArgType::argTypeRegionGroupWgCount},
{regionGroupBarrierBuffer, ArgType::argTypeRegionGroupBarrierBuffer},
{inlineSampler, ArgType::argTypeInlineSampler},
}});
static_assert(lookup.size() == ArgType::argTypeMax - 1, "Every enum field must be present");
} // namespace ArgType

View File

@@ -234,6 +234,9 @@ struct KernelDescriptor {
bool isNormalized;
AddrMode addrMode;
FilterMode filterMode;
CrossThreadDataOffset bindless = undefined<CrossThreadDataOffset>;
uint8_t size = undefined<uint8_t>;
constexpr uint32_t getSamplerBindfulOffset() const {
return borderColorStateSize + samplerStateSize * samplerIndex;
}