feat(zebin): inline sampler

Add support for inline samplers in zebin.
Generate required SAMPLER_STATEs in DSH.

Resolves: NEO-7388

Signed-off-by: Krystian Chmielewski <krystian.chmielewski@intel.com>
This commit is contained in:
Krystian Chmielewski
2022-10-06 09:50:38 +00:00
committed by Compute-Runtime-Automation
parent b21fef96cb
commit 73a58aaf9e
15 changed files with 511 additions and 4 deletions

View File

@@ -767,6 +767,21 @@ void KernelImp::patchCrossthreadDataWithPrivateAllocation(NEO::GraphicsAllocatio
*device->getNEODevice(), kernelAttributes.flags.useGlobalAtomics, device->isImplicitScalingCapable());
}
void KernelImp::setInlineSamplers() {
auto device = module->getDevice();
const auto productFamily = device->getNEODevice()->getHardwareInfo().platform.eProductFamily;
for (auto &inlineSampler : getKernelDescriptor().inlineSamplers) {
ze_sampler_desc_t samplerDesc = {};
samplerDesc.addressMode = static_cast<ze_sampler_address_mode_t>(inlineSampler.addrMode);
samplerDesc.filterMode = static_cast<ze_sampler_filter_mode_t>(inlineSampler.filterMode);
samplerDesc.isNormalized = inlineSampler.isNormalized;
auto sampler = std::unique_ptr<L0::Sampler>(L0::Sampler::create(productFamily, device, &samplerDesc));
UNRECOVERABLE_IF(sampler.get() == nullptr);
sampler->copySamplerStateToDSH(dynamicStateHeapData.get(), dynamicStateHeapDataSize, inlineSampler.getSamplerBindfulOffset());
}
}
ze_result_t KernelImp::initialize(const ze_kernel_desc_t *desc) {
this->kernelImmData = module->getKernelImmutableData(desc->pKernelName);
if (this->kernelImmData == nullptr) {
@@ -877,6 +892,8 @@ ze_result_t KernelImp::initialize(const ze_kernel_desc_t *desc) {
this->setDebugSurface();
this->setInlineSamplers();
residencyContainer.insert(residencyContainer.end(), kernelImmData->getResidencyContainer().begin(),
kernelImmData->getResidencyContainer().end());

View File

@@ -84,6 +84,8 @@ struct KernelImp : Kernel {
virtual void setBufferSurfaceState(uint32_t argIndex, void *address, NEO::GraphicsAllocation *alloc) = 0;
void setInlineSamplers();
ze_result_t initialize(const ze_kernel_desc_t *desc);
const uint8_t *getPerThreadData() const override { return perThreadDataForWholeThreadGroup; }

View File

@@ -125,7 +125,7 @@ void SamplerCoreFamily<gfxCoreFamily>::copySamplerStateToDSH(void *dynamicStateH
using BINDING_TABLE_STATE = typename GfxFamily::BINDING_TABLE_STATE;
auto destSamplerState = ptrOffset(dynamicStateHeap, samplerOffset);
auto freeSpace = dynamicStateHeapSize - (samplerOffset + sizeof(SAMPLER_STATE));
auto freeSpace = dynamicStateHeapSize - samplerOffset;
memcpy_s(destSamplerState, freeSpace, &samplerState, sizeof(SAMPLER_STATE));
}

View File

@@ -415,5 +415,35 @@ TEST_F(KernelImpSuggestMaxCooperativeGroupCountTests, GivenUsedSlmSizeWhenCalcul
EXPECT_EQ(expected, getMaxWorkGroupCount());
}
using KernelTest = Test<DeviceFixture>;
HWTEST2_F(KernelTest, GivenInlineSamplersWhenSettingInlineSamplerThenDshIsPatched, SupportsSampler) {
WhiteBox<::L0::KernelImmutableData> kernelImmData = {};
NEO::KernelDescriptor descriptor;
kernelImmData.kernelDescriptor = &descriptor;
auto &inlineSampler = descriptor.inlineSamplers.emplace_back();
inlineSampler.addrMode = NEO::KernelDescriptor::InlineSampler::AddrMode::Repeat;
inlineSampler.filterMode = NEO::KernelDescriptor::InlineSampler::FilterMode::Nearest;
inlineSampler.isNormalized = false;
Mock<Module> module(device, nullptr);
Mock<Kernel> kernel;
kernel.module = &module;
kernel.kernelImmData = &kernelImmData;
kernel.dynamicStateHeapData.reset(new uint8_t[64 + 16]);
kernel.dynamicStateHeapDataSize = 64 + 16;
kernel.setInlineSamplers();
using SamplerState = typename FamilyType::SAMPLER_STATE;
auto samplerState = reinterpret_cast<const SamplerState *>(kernel.dynamicStateHeapData.get() + 64U);
EXPECT_TRUE(samplerState->getNonNormalizedCoordinateEnable());
EXPECT_EQ(SamplerState::TEXTURE_COORDINATE_MODE_WRAP, samplerState->getTcxAddressControlMode());
EXPECT_EQ(SamplerState::TEXTURE_COORDINATE_MODE_WRAP, samplerState->getTcyAddressControlMode());
EXPECT_EQ(SamplerState::TEXTURE_COORDINATE_MODE_WRAP, samplerState->getTczAddressControlMode());
EXPECT_EQ(SamplerState::MIN_MODE_FILTER_NEAREST, samplerState->getMinModeFilter());
EXPECT_EQ(SamplerState::MAG_MODE_FILTER_NEAREST, samplerState->getMagModeFilter());
}
} // namespace ult
} // namespace L0

View File

@@ -26,6 +26,7 @@
#include "shared/source/memory_manager/unified_memory_manager.h"
#include "shared/source/os_interface/hw_info_config.h"
#include "shared/source/program/kernel_info.h"
#include "shared/source/utilities/lookup_array.h"
#include "opencl/source/accelerators/intel_accelerator.h"
#include "opencl/source/accelerators/intel_motion_estimation.h"
@@ -257,6 +258,8 @@ cl_int Kernel::initialize() {
auto numArgs = explicitArgs.size();
slmSizes.resize(numArgs);
this->setInlineSamplers();
this->kernelHasIndirectAccess |= kernelInfo.kernelDescriptor.kernelAttributes.hasNonKernelArgLoad ||
kernelInfo.kernelDescriptor.kernelAttributes.hasNonKernelArgStore ||
kernelInfo.kernelDescriptor.kernelAttributes.hasNonKernelArgAtomic;
@@ -1208,6 +1211,32 @@ bool Kernel::isSingleSubdevicePreferred() const {
return this->singleSubdevicePreferredInCurrentEnqueue || this->usesSyncBuffer();
}
void Kernel::setInlineSamplers() {
for (auto &inlineSampler : getDescriptor().inlineSamplers) {
using AddrMode = NEO::KernelDescriptor::InlineSampler::AddrMode;
constexpr LookupArray<AddrMode, cl_addressing_mode, 5> addressingModes({{{AddrMode::None, CL_ADDRESS_NONE},
{AddrMode::Repeat, CL_ADDRESS_REPEAT},
{AddrMode::ClampEdge, CL_ADDRESS_CLAMP_TO_EDGE},
{AddrMode::ClampBorder, CL_ADDRESS_CLAMP},
{AddrMode::Mirror, CL_ADDRESS_MIRRORED_REPEAT}}});
using FilterMode = NEO::KernelDescriptor::InlineSampler::FilterMode;
constexpr LookupArray<FilterMode, cl_filter_mode, 2> filterModes({{{FilterMode::Linear, CL_FILTER_LINEAR},
{FilterMode::Nearest, CL_FILTER_NEAREST}}});
cl_int errCode = CL_SUCCESS;
auto sampler = std::unique_ptr<Sampler>(Sampler::create(&getContext(),
static_cast<cl_bool>(inlineSampler.isNormalized),
addressingModes.lookUp(inlineSampler.addrMode),
filterModes.lookUp(inlineSampler.filterMode),
errCode));
UNRECOVERABLE_IF(errCode != CL_SUCCESS);
auto samplerState = ptrOffset(getDynamicStateHeap(), static_cast<size_t>(inlineSampler.getSamplerBindfulOffset()));
sampler->setArg(const_cast<void *>(samplerState), clDevice.getHardwareInfo());
}
}
void Kernel::makeResident(CommandStreamReceiver &commandStreamReceiver) {
auto rootDeviceIndex = commandStreamReceiver.getRootDeviceIndex();
if (privateSurface) {

View File

@@ -295,6 +295,7 @@ class Kernel : public ReferenceTrackedObject<Kernel> {
void performKernelTuning(CommandStreamReceiver &commandStreamReceiver, const Vec3<size_t> &lws, const Vec3<size_t> &gws, const Vec3<size_t> &offsets, TimestampPacketContainer *timestampContainer);
MOCKABLE_VIRTUAL bool isSingleSubdevicePreferred() const;
void setInlineSamplers();
// residency for kernel surfaces
MOCKABLE_VIRTUAL void makeResident(CommandStreamReceiver &commandStreamReceiver);

View File

@@ -3343,4 +3343,28 @@ TEST(KernelTest, givenKernelWithNumThreadsRequiredPatchTokenWhenQueryingEuThread
EXPECT_EQ(CL_SUCCESS, retVal);
EXPECT_EQ(sizeof(cl_uint), paramRetSize);
EXPECT_EQ(123U, euThreadCount);
}
}
HWTEST2_F(KernelTest, GivenInlineSamplersWhenSettingInlineSamplerThenDshIsPatched, SupportsSampler) {
auto device = clUniquePtr(new MockClDevice(MockDevice::createWithNewExecutionEnvironment<MockDevice>(defaultHwInfo.get())));
MockKernelWithInternals kernel(*device);
auto &inlineSampler = kernel.kernelInfo.kernelDescriptor.inlineSamplers.emplace_back();
inlineSampler.addrMode = NEO::KernelDescriptor::InlineSampler::AddrMode::Repeat;
inlineSampler.filterMode = NEO::KernelDescriptor::InlineSampler::FilterMode::Nearest;
inlineSampler.isNormalized = false;
std::array<uint8_t, 64 + 16> dsh = {0};
kernel.kernelInfo.heapInfo.pDsh = dsh.data();
kernel.kernelInfo.heapInfo.DynamicStateHeapSize = static_cast<uint32_t>(dsh.size());
kernel.mockKernel->setInlineSamplers();
using SamplerState = typename FamilyType::SAMPLER_STATE;
auto samplerState = reinterpret_cast<const SamplerState *>(dsh.data() + 64U);
EXPECT_TRUE(samplerState->getNonNormalizedCoordinateEnable());
EXPECT_EQ(SamplerState::TEXTURE_COORDINATE_MODE_WRAP, samplerState->getTcxAddressControlMode());
EXPECT_EQ(SamplerState::TEXTURE_COORDINATE_MODE_WRAP, samplerState->getTcyAddressControlMode());
EXPECT_EQ(SamplerState::TEXTURE_COORDINATE_MODE_WRAP, samplerState->getTczAddressControlMode());
EXPECT_EQ(SamplerState::MIN_MODE_FILTER_NEAREST, samplerState->getMinModeFilter());
EXPECT_EQ(SamplerState::MAG_MODE_FILTER_NEAREST, samplerState->getMagModeFilter());
}

View File

@@ -126,6 +126,7 @@ class MockKernel : public Kernel {
using Kernel::pImplicitArgs;
using Kernel::preferredWkgMultipleOffset;
using Kernel::privateSurface;
using Kernel::setInlineSamplers;
using Kernel::singleSubdevicePreferredInCurrentEnqueue;
using Kernel::svmAllocationsRequireCacheFlush;
using Kernel::unifiedMemoryControls;
@@ -261,7 +262,7 @@ class MockKernel : public Kernel {
KernelInfo *kernelInfoAllocated = nullptr;
};
//class below have enough internals to service Enqueue operation.
// class below have enough internals to service Enqueue operation.
class MockKernelWithInternals {
public:
MockKernelWithInternals(const ClDeviceVector &deviceVector, Context *context = nullptr, bool addDefaultArg = false, SPatchExecutionEnvironment execEnv = {}) {

View File

@@ -131,6 +131,7 @@ constexpr ConstStringRef bindingTableIndices("binding_table_indices");
constexpr ConstStringRef perThreadPayloadArguments("per_thread_payload_arguments");
constexpr ConstStringRef perThreadMemoryBuffers("per_thread_memory_buffers");
constexpr ConstStringRef experimentalProperties("experimental_properties");
constexpr ConstStringRef inlineSamplers("inline_samplers");
namespace ExecutionEnv {
constexpr ConstStringRef barrierCount("barrier_count");
@@ -190,6 +191,9 @@ constexpr ConstStringRef slmArgAlignment("slm_alignment");
constexpr ConstStringRef imageType("image_type");
constexpr ConstStringRef imageTransformable("image_transformable");
constexpr ConstStringRef samplerType("sampler_type");
constexpr ConstStringRef addrMode("sampler_desc_addrmode");
constexpr ConstStringRef filterMode("sampler_desc_filtermode");
constexpr ConstStringRef normalized("sampler_desc_normalized");
namespace ArgType {
constexpr ConstStringRef localSize("local_size");
@@ -205,6 +209,7 @@ constexpr ConstStringRef bufferOffset("buffer_offset");
constexpr ConstStringRef printfBuffer("printf_buffer");
constexpr ConstStringRef workDimensions("work_dimensions");
constexpr ConstStringRef implicitArgBuffer("implicit_arg_buffer");
constexpr ConstStringRef inlineSampler("arg_inline_sampler");
namespace Image {
constexpr ConstStringRef width("image_width");
constexpr ConstStringRef height("image_height");
@@ -249,6 +254,7 @@ constexpr ConstStringRef imageType2DArrayMSAADepth("image_2d_array_msaa_depth");
constexpr ConstStringRef imageType2DMedia("image_2d_media");
constexpr ConstStringRef imageType2DMediaBlock("image_2d_media_block");
} // namespace ImageType
namespace SamplerType {
constexpr ConstStringRef samplerTypeTexture("texture");
constexpr ConstStringRef samplerType8x8("sample_8x8");
@@ -263,12 +269,14 @@ constexpr ConstStringRef samplerTypeVD("vd");
constexpr ConstStringRef samplerTypeVE("ve");
constexpr ConstStringRef samplerTypeVME("vme");
} // namespace SamplerType
namespace MemoryAddressingMode {
constexpr ConstStringRef stateless("stateless");
constexpr ConstStringRef stateful("stateful");
constexpr ConstStringRef bindless("bindless");
constexpr ConstStringRef sharedLocalMemory("slm");
} // namespace MemoryAddressingMode
namespace AddrSpace {
constexpr ConstStringRef global("global");
constexpr ConstStringRef local("local");
@@ -276,6 +284,7 @@ constexpr ConstStringRef constant("constant");
constexpr ConstStringRef image("image");
constexpr ConstStringRef sampler("sampler");
} // namespace AddrSpace
namespace AccessType {
constexpr ConstStringRef readonly("readonly");
constexpr ConstStringRef writeonly("writeonly");
@@ -320,6 +329,27 @@ constexpr ConstStringRef hasNonKernelArgLoad("has_non_kernel_arg_load");
constexpr ConstStringRef hasNonKernelArgStore("has_non_kernel_arg_store");
constexpr ConstStringRef hasNonKernelArgAtomic("has_non_kernel_arg_atomic");
} // namespace ExperimentalProperties
namespace InlineSamplers {
constexpr ConstStringRef samplerIndex("sampler_index");
constexpr ConstStringRef addrMode("addrmode");
constexpr ConstStringRef filterMode("filtermode");
constexpr ConstStringRef normalized("normalized");
namespace AddrMode {
constexpr ConstStringRef none("none");
constexpr ConstStringRef repeat("repeat");
constexpr ConstStringRef clamp_edge("clamp_edge");
constexpr ConstStringRef clamp_border("clamp_border");
constexpr ConstStringRef mirror("mirror");
} // namespace AddrMode
namespace FilterMode {
constexpr ConstStringRef nearest("nearest");
constexpr ConstStringRef linear("linear");
} // namespace FilterMode
} // namespace InlineSamplers
} // namespace Kernel
namespace GlobalHostAccessTable {
@@ -685,6 +715,44 @@ struct PerThreadMemoryBufferBaseT {
};
} // namespace PerThreadMemoryBuffer
namespace InlineSamplers {
enum class AddrMode : uint8_t {
Unknown,
None,
Repeat,
ClampEdge,
ClampBorder,
Mirror,
Max
};
enum FilterMode {
Unknown,
Nearest,
Linear,
Max
};
using SamplerIndexT = int32_t;
using AddrModeT = AddrMode;
using FilterModeT = FilterMode;
using NormalizedT = bool;
namespace Defaults {
constexpr SamplerIndexT samplerIndex = -1;
constexpr AddrModeT addrMode = AddrMode::Unknown;
constexpr FilterModeT filterMode = FilterMode::Unknown;
constexpr NormalizedT normalized = false;
}; // namespace Defaults
struct InlineSamplerBaseT {
SamplerIndexT samplerIndex = Defaults::samplerIndex;
AddrModeT addrMode = Defaults::addrMode;
FilterModeT filterMode = Defaults::filterMode;
NormalizedT normalized = Defaults::normalized;
};
} // namespace InlineSamplers
} // namespace Kernel
namespace GlobalHostAccessTable {

View File

@@ -166,6 +166,27 @@ constexpr LookupArray<ConstStringRef, ThreadSchedulingMode, 3> lookup({{{ageBase
static_assert(lookup.size() == ThreadSchedulingMode::ThreadSchedulingModeMax - 1, "Every enum field must be present");
} // namespace ThreadSchedulingMode
namespace InlineSamplerAddrMode {
using namespace Tags::Kernel::InlineSamplers::AddrMode;
using AddrMode = Types::Kernel::InlineSamplers::AddrMode;
constexpr ConstStringRef name = "inline sampler addressing mode";
constexpr LookupArray<ConstStringRef, AddrMode, 5> lookup({{{none, AddrMode::None},
{repeat, AddrMode::Repeat},
{clamp_edge, AddrMode::ClampEdge},
{clamp_border, AddrMode::ClampBorder},
{mirror, AddrMode::Mirror}}});
static_assert(lookup.size() == static_cast<size_t>(AddrMode::Max) - 1, "Every enum field must be present");
} // namespace InlineSamplerAddrMode
namespace InlineSamplerFilterMode {
using namespace Tags::Kernel::InlineSamplers::FilterMode;
using FilterMode = Types::Kernel::InlineSamplers::FilterMode;
constexpr ConstStringRef name = "inline sampler filter mode";
constexpr LookupArray<ConstStringRef, FilterMode, 2> lookup({{{nearest, FilterMode::Nearest},
{linear, FilterMode::Linear}}});
static_assert(lookup.size() == FilterMode::Max - 1, "Every enum field must be present");
} // namespace InlineSamplerFilterMode
template <typename T>
struct EnumLooker {};
@@ -222,4 +243,16 @@ struct EnumLooker<Types::Kernel::ExecutionEnv::ThreadSchedulingMode> {
static constexpr ConstStringRef name = ThreadSchedulingMode::name;
static constexpr auto members = ThreadSchedulingMode::lookup;
};
template <>
struct EnumLooker<Types::Kernel::InlineSamplers::AddrMode> {
static constexpr ConstStringRef name = InlineSamplerAddrMode::name;
static constexpr auto members = InlineSamplerAddrMode::lookup;
};
template <>
struct EnumLooker<Types::Kernel::InlineSamplers::FilterMode> {
static constexpr ConstStringRef name = InlineSamplerFilterMode::name;
static constexpr auto members = InlineSamplerFilterMode::lookup;
};
} // namespace NEO::Zebin::ZeInfo::EnumLookup

View File

@@ -294,6 +294,8 @@ void extractZeInfoKernelSections(const NEO::Yaml::YamlParser &parser, const NEO:
outZeInfoKernelSections.perThreadMemoryBuffersNd.push_back(&kernelMetadataNd);
} else if (NEO::Elf::ZebinKernelMetadata::Tags::Kernel::experimentalProperties == key) {
outZeInfoKernelSections.experimentalPropertiesNd.push_back(&kernelMetadataNd);
} else if (NEO::Elf::ZebinKernelMetadata::Tags::Kernel::inlineSamplers == key) {
outZeInfoKernelSections.inlineSamplersNd.push_back(&kernelMetadataNd);
} else {
outWarning.append("DeviceBinaryFormat::Zebin::" + NEO::Elf::SectionsNamesZebin::zeInfo.str() + " : Unknown entry \"" + parser.readKey(kernelMetadataNd).str() + "\" in context of : " + context.str() + "\n");
}
@@ -310,6 +312,7 @@ DecodeError validateZeInfoKernelSectionsCount(const ZeInfoKernelSections &outZeI
valid &= validateZebinSectionsCountAtMost(outZeInfoKernelSections.bindingTableIndicesNd, NEO::Elf::ZebinKernelMetadata::Tags::Kernel::bindingTableIndices, 1U, outErrReason, outWarning);
valid &= validateZebinSectionsCountAtMost(outZeInfoKernelSections.perThreadMemoryBuffersNd, NEO::Elf::ZebinKernelMetadata::Tags::Kernel::perThreadMemoryBuffers, 1U, outErrReason, outWarning);
valid &= validateZebinSectionsCountAtMost(outZeInfoKernelSections.experimentalPropertiesNd, NEO::Elf::ZebinKernelMetadata::Tags::Kernel::experimentalProperties, 1U, outErrReason, outWarning);
valid &= validateZebinSectionsCountAtMost(outZeInfoKernelSections.inlineSamplersNd, NEO::Elf::ZebinKernelMetadata::Tags::Kernel::inlineSamplers, 1U, outErrReason, outWarning);
return valid ? DecodeError::Success : DecodeError::InvalidBinary;
}
@@ -581,6 +584,32 @@ DecodeError readZeInfoPayloadArguments(const NEO::Yaml::YamlParser &parser, cons
return validPayload ? DecodeError::Success : DecodeError::InvalidBinary;
}
DecodeError readZeInfoInlineSamplers(const NEO::Yaml::YamlParser &parser, const NEO::Yaml::Node &node, ZeInfoInlineSamplers &outInlineSamplers, int32_t &outMaxSamplerIndex, ConstStringRef context, std::string &outErrReason, std::string &outWarning) {
bool validInlineSamplers = true;
for (const auto &inlineSamplerNd : parser.createChildrenRange(node)) {
outInlineSamplers.resize(outInlineSamplers.size() + 1);
auto &inlineSampler = *outInlineSamplers.rbegin();
for (const auto &inlineSamplerMemberNd : parser.createChildrenRange(inlineSamplerNd)) {
namespace Tags = NEO::Elf::ZebinKernelMetadata::Tags::Kernel::InlineSamplers;
auto key = parser.readKey(inlineSamplerMemberNd);
if (Tags::samplerIndex == key) {
validInlineSamplers &= readZeInfoValueChecked(parser, inlineSamplerMemberNd, inlineSampler.samplerIndex, context, outErrReason);
outMaxSamplerIndex = std::max<int32_t>(outMaxSamplerIndex, inlineSampler.samplerIndex);
} else if (Tags::addrMode == key) {
validInlineSamplers &= readZeInfoEnumChecked(parser, inlineSamplerMemberNd, inlineSampler.addrMode, context, outErrReason);
} else if (Tags::filterMode == key) {
validInlineSamplers &= readZeInfoEnumChecked(parser, inlineSamplerMemberNd, inlineSampler.filterMode, context, outErrReason);
} else if (Tags::normalized == key) {
validInlineSamplers &= readZeInfoValueChecked(parser, inlineSamplerMemberNd, inlineSampler.normalized, context, outErrReason);
} else {
outWarning.append("DeviceBinaryFormat::Zebin::" + NEO::Elf::SectionsNamesZebin::zeInfo.str() + " : Unknown entry \"" + key.str() + "\" for inline sampler in context of " + context.str() + "\n");
}
}
}
return validInlineSamplers ? DecodeError::Success : DecodeError::InvalidBinary;
}
DecodeError readZeInfoBindingTableIndices(const NEO::Yaml::YamlParser &parser, const NEO::Yaml::Node &node,
ZeInfoBindingTableIndices &outBindingTableIndices, ZeInfoBindingTableIndices::value_type &outMaxBindingTableIndex,
ConstStringRef context,
@@ -1045,6 +1074,47 @@ NEO::DecodeError populateArgDescriptor(const NEO::Elf::ZebinKernelMetadata::Type
return DecodeError::Success;
}
NEO::DecodeError populateInlineSamplers(const NEO::Elf::ZebinKernelMetadata::Types::Kernel::InlineSamplers::InlineSamplerBaseT &src, NEO::KernelDescriptor &dst, std::string &outErrReason, std::string &outWarning) {
NEO::KernelDescriptor::InlineSampler inlineSampler = {};
if (src.samplerIndex == -1) {
outErrReason.append("DeviceBinaryFormat::Zebin : Invalid inline sampler index (must be >= 0) in context of : " + dst.kernelMetadata.kernelName + ".\n");
return DecodeError::InvalidBinary;
}
inlineSampler.samplerIndex = src.samplerIndex;
using AddrModeZeInfo = NEO::Elf::ZebinKernelMetadata::Types::Kernel::InlineSamplers::AddrModeT;
using AddrModeDescriptor = NEO::KernelDescriptor::InlineSampler::AddrMode;
constexpr LookupArray<AddrModeZeInfo, AddrModeDescriptor, 5> addrModes({{{AddrModeZeInfo::None, AddrModeDescriptor::None},
{AddrModeZeInfo::Repeat, AddrModeDescriptor::Repeat},
{AddrModeZeInfo::ClampEdge, AddrModeDescriptor::ClampEdge},
{AddrModeZeInfo::ClampBorder, AddrModeDescriptor::ClampBorder},
{AddrModeZeInfo::Mirror, AddrModeDescriptor::Mirror}}});
auto addrMode = addrModes.find(src.addrMode);
if (addrMode.has_value() == false) {
outErrReason.append("DeviceBinaryFormat::Zebin : Invalid inline sampler addressing mode in context of : " + dst.kernelMetadata.kernelName + "\n");
return DecodeError::InvalidBinary;
}
inlineSampler.addrMode = *addrMode;
using FilterModeZeInfo = NEO::Elf::ZebinKernelMetadata::Types::Kernel::InlineSamplers::FilterModeT;
using FilterModeDescriptor = NEO::KernelDescriptor::InlineSampler::FilterMode;
constexpr LookupArray<FilterModeZeInfo, FilterModeDescriptor, 2> filterModes({{{FilterModeZeInfo::Nearest, FilterModeDescriptor::Nearest},
{FilterModeZeInfo::Linear, FilterModeDescriptor::Linear}}});
auto filterMode = filterModes.find(src.filterMode);
if (filterMode.has_value() == false) {
outErrReason.append("DeviceBinaryFormat::Zebin : Invalid inline sampler filterMode mode in context of : " + dst.kernelMetadata.kernelName + "\n");
return DecodeError::InvalidBinary;
}
inlineSampler.filterMode = *filterMode;
inlineSampler.isNormalized = src.normalized;
dst.inlineSamplers.push_back(inlineSampler);
return DecodeError::Success;
}
NEO::DecodeError populateKernelDescriptor(const NEO::Elf::ZebinKernelMetadata::Types::Kernel::PerThreadMemoryBuffer::PerThreadMemoryBufferBaseT &src, NEO::KernelDescriptor &dst, uint32_t minScratchSpaceSize,
std::string &outErrReason, std::string &outWarning) {
using namespace NEO::Elf::ZebinKernelMetadata::Types::Kernel::PerThreadMemoryBuffer;
@@ -1150,6 +1220,15 @@ NEO::DecodeError populateKernelDescriptor(NEO::ProgramInfo &dst, NEO::Elf::Elf<n
}
}
ZeInfoInlineSamplers inlineSamplers;
if (false == zeInfokernelSections.inlineSamplersNd.empty()) {
auto decodeErr = readZeInfoInlineSamplers(yamlParser, *zeInfokernelSections.inlineSamplersNd[0], inlineSamplers, maxSamplerIndex,
kernelDescriptor.kernelMetadata.kernelName, outErrReason, outWarning);
if (DecodeError::Success != decodeErr) {
return decodeErr;
}
}
ZeInfoPerThreadMemoryBuffers perThreadMemoryBuffers;
if (false == zeInfokernelSections.perThreadMemoryBuffersNd.empty()) {
auto perThreadMemoryBuffersErr = readZeInfoPerThreadMemoryBuffers(yamlParser, *zeInfokernelSections.perThreadMemoryBuffersNd[0], perThreadMemoryBuffers,
@@ -1240,6 +1319,13 @@ NEO::DecodeError populateKernelDescriptor(NEO::ProgramInfo &dst, NEO::Elf::Elf<n
}
}
for (const auto &inlineSampler : inlineSamplers) {
auto decodeErr = populateInlineSamplers(inlineSampler, kernelDescriptor, outErrReason, outWarning);
if (DecodeError::Success != decodeErr) {
return decodeErr;
}
}
for (const auto &memBuff : perThreadMemoryBuffers) {
auto decodeErr = populateKernelDescriptor(memBuff, kernelDescriptor, dst.minScratchSpaceSize, outErrReason, outWarning);
if (DecodeError::Success != decodeErr) {

View File

@@ -45,6 +45,7 @@ struct ZeInfoKernelSections {
UniqueNode perThreadPayloadArgumentsNd;
UniqueNode perThreadMemoryBuffersNd;
UniqueNode experimentalPropertiesNd;
UniqueNode inlineSamplersNd;
};
template <Elf::ELF_IDENTIFIER_CLASS numBits>
@@ -93,6 +94,13 @@ DecodeError readZeInfoPayloadArguments(const NEO::Yaml::YamlParser &parser, cons
ConstStringRef context,
std::string &outErrReason, std::string &outWarning);
using ZeInfoInlineSamplers = StackVec<NEO::Elf::ZebinKernelMetadata::Types::Kernel::InlineSamplers::InlineSamplerBaseT, 4>;
DecodeError readZeInfoInlineSamplers(const NEO::Yaml::YamlParser &parser, const NEO::Yaml::Node &node,
ZeInfoInlineSamplers &outInlineSamplers,
int32_t &outMaxSamplerIndex,
ConstStringRef context,
std::string &outErrReason, std::string &outWarning);
using ZeInfoBindingTableIndices = StackVec<NEO::Elf::ZebinKernelMetadata::Types::Kernel::BindingTableEntry::BindingTableEntryBaseT, 32>;
DecodeError readZeInfoBindingTableIndices(const NEO::Yaml::YamlParser &parser, const NEO::Yaml::Node &node,
ZeInfoBindingTableIndices &outBindingTableIndices, ZeInfoBindingTableIndices::value_type &outMaxBindingTableIndex,
@@ -116,6 +124,8 @@ NEO::DecodeError populateArgDescriptor(const NEO::Elf::ZebinKernelMetadata::Type
NEO::DecodeError populateArgDescriptor(const NEO::Elf::ZebinKernelMetadata::Types::Kernel::PayloadArgument::PayloadArgumentBaseT &src, NEO::KernelDescriptor &dst, uint32_t &crossThreadDataSize,
std::string &outErrReason, std::string &outWarning);
NEO::DecodeError populateInlineSamplers(const NEO::Elf::ZebinKernelMetadata::Types::Kernel::InlineSamplers::InlineSamplerBaseT &src, NEO::KernelDescriptor &dst, std::string &outErrReason, std::string &outWarning);
template <Elf::ELF_IDENTIFIER_CLASS numBits>
NEO::DecodeError populateKernelDescriptor(NEO::ProgramInfo &dst, NEO::Elf::Elf<numBits> &elf, NEO::ZebinSections<numBits> &zebinSections,
NEO::Yaml::YamlParser &yamlParser, const NEO::Yaml::Node &kernelNd, std::string &outErrReason, std::string &outWarning);

View File

@@ -278,6 +278,31 @@ struct KernelDescriptor {
std::vector<ArgTypeMetadataExtended> explicitArgsExtendedMetadata;
struct InlineSampler {
enum class AddrMode : uint8_t {
None,
Repeat,
ClampEdge,
ClampBorder,
Mirror
};
enum class FilterMode : uint8_t {
Nearest,
Linear
};
static constexpr size_t borderColorStateSize = 64U;
static constexpr size_t samplerStateSize = 16U;
uint32_t samplerIndex;
bool isNormalized;
AddrMode addrMode;
FilterMode filterMode;
constexpr uint32_t getSamplerBindfulOffset() const {
return borderColorStateSize + samplerStateSize * samplerIndex;
}
};
std::vector<InlineSampler> inlineSamplers;
struct {
std::string kernelName;
std::string kernelLanguageAttributes;

View File

@@ -709,4 +709,11 @@ struct MatchAny {
static constexpr bool isMatched() { return true; }
};
struct SupportsSampler {
template <PRODUCT_FAMILY productFamily>
static constexpr bool isMatched() {
return NEO::HwMapper<productFamily>::GfxProduct::supportsSampler;
}
};
#include "common_matchers.h"

View File

@@ -6091,4 +6091,178 @@ kernels:
parseResult = parser.parse(zeInfoMissingEntrySamePrevInd, errors, warnings);
ASSERT_FALSE(parseResult);
EXPECT_STREQ("NEO::Yaml : Could not parse line : [5] : [per_thread_payload_arguments:] <-- parser position on error. Reason : Vector data type expects to have at least one value starting with -\n", errors.c_str());
}
}
TEST(PopulateKernelDescriptor, GivenValidInlineSamplersThenPopulateKernelDescriptorSucceeds) {
NEO::ConstStringRef zeinfo = R"===(
kernels:
- name : some_kernel
execution_env:
simd_size: 8
inline_samplers:
- sampler_index: 0
addrmode: clamp_edge
filtermode: nearest
normalized: true
...
)===";
NEO::ProgramInfo programInfo;
ZebinTestData::ValidEmptyProgram zebin;
zebin.appendSection(NEO::Elf::SHT_PROGBITS, NEO::Elf::SectionsNamesZebin::textPrefix.str() + "some_kernel", {});
std::string errors, warnings;
auto elf = NEO::Elf::decodeElf(zebin.storage, errors, warnings);
ASSERT_NE(nullptr, elf.elfFileHeader) << errors << " " << warnings;
NEO::Yaml::YamlParser parser;
bool parseSuccess = parser.parse(zeinfo, errors, warnings);
ASSERT_TRUE(parseSuccess) << errors << " " << warnings;
NEO::ZebinSections zebinSections;
auto extractErr = NEO::extractZebinSections(elf, zebinSections, errors, warnings);
ASSERT_EQ(NEO::DecodeError::Success, extractErr) << errors << " " << warnings;
auto &kernelNode = *parser.createChildrenRange(*parser.findNodeWithKeyDfs("kernels")).begin();
auto err = NEO::populateKernelDescriptor(programInfo, elf, zebinSections, parser, kernelNode, errors, warnings);
EXPECT_EQ(NEO::DecodeError::Success, err);
EXPECT_TRUE(errors.empty());
ASSERT_EQ(1U, programInfo.kernelInfos.size());
ASSERT_EQ(1U, programInfo.kernelInfos[0]->kernelDescriptor.inlineSamplers.size());
const auto &inlineSampler = programInfo.kernelInfos[0]->kernelDescriptor.inlineSamplers[0];
EXPECT_EQ(0U, inlineSampler.samplerIndex);
EXPECT_EQ(NEO::KernelDescriptor::InlineSampler::AddrMode::ClampEdge, inlineSampler.addrMode);
EXPECT_EQ(NEO::KernelDescriptor::InlineSampler::FilterMode::Nearest, inlineSampler.filterMode);
EXPECT_TRUE(inlineSampler.isNormalized);
}
TEST(PopulateKernelDescriptor, GivenInvalidInlineSamplersEntryThenPopulateKernelDescriptorFails) {
NEO::ConstStringRef zeinfo = R"===(
kernels:
- name : some_kernel
execution_env:
simd_size: 8
inline_samplers:
- sampler_index: -1
addrmode: gibberish
filtermode: trash
normalized: dead_beef
...
)===";
NEO::ProgramInfo programInfo;
ZebinTestData::ValidEmptyProgram zebin;
zebin.appendSection(NEO::Elf::SHT_PROGBITS, NEO::Elf::SectionsNamesZebin::textPrefix.str() + "some_kernel", {});
std::string errors, warnings;
auto elf = NEO::Elf::decodeElf(zebin.storage, errors, warnings);
ASSERT_NE(nullptr, elf.elfFileHeader) << errors << " " << warnings;
NEO::Yaml::YamlParser parser;
bool parseSuccess = parser.parse(zeinfo, errors, warnings);
ASSERT_TRUE(parseSuccess) << errors << " " << warnings;
NEO::ZebinSections zebinSections;
auto extractErr = NEO::extractZebinSections(elf, zebinSections, errors, warnings);
ASSERT_EQ(NEO::DecodeError::Success, extractErr) << errors << " " << warnings;
auto &kernelNode = *parser.createChildrenRange(*parser.findNodeWithKeyDfs("kernels")).begin();
auto err = NEO::populateKernelDescriptor(programInfo, elf, zebinSections, parser, kernelNode, errors, warnings);
EXPECT_EQ(NEO::DecodeError::InvalidBinary, err);
EXPECT_FALSE(errors.empty());
}
TEST(PopulateKernelDescriptor, GivenMissingMemberInInlineSamplersThenPopulateKernelDescriptorFails) {
NEO::ConstStringRef zeinfo = R"===(
kernels:
- name : some_kernel
execution_env:
simd_size: 8
inline_samplers:
- addrmode: clamp_edge
...
)===";
NEO::ProgramInfo programInfo;
ZebinTestData::ValidEmptyProgram zebin;
zebin.appendSection(NEO::Elf::SHT_PROGBITS, NEO::Elf::SectionsNamesZebin::textPrefix.str() + "some_kernel", {});
std::string errors, warnings;
auto elf = NEO::Elf::decodeElf(zebin.storage, errors, warnings);
ASSERT_NE(nullptr, elf.elfFileHeader) << errors << " " << warnings;
NEO::Yaml::YamlParser parser;
bool parseSuccess = parser.parse(zeinfo, errors, warnings);
ASSERT_TRUE(parseSuccess) << errors << " " << warnings;
NEO::ZebinSections zebinSections;
auto extractErr = NEO::extractZebinSections(elf, zebinSections, errors, warnings);
ASSERT_EQ(NEO::DecodeError::Success, extractErr) << errors << " " << warnings;
auto &kernelNode = *parser.createChildrenRange(*parser.findNodeWithKeyDfs("kernels")).begin();
auto err = NEO::populateKernelDescriptor(programInfo, elf, zebinSections, parser, kernelNode, errors, warnings);
EXPECT_EQ(NEO::DecodeError::InvalidBinary, err);
}
TEST(PopulateInlineSamplers, GivenInvalidSamplerIndexThenPopulateInlineSamplersFails) {
NEO::KernelDescriptor kd;
std::string errors, warnings;
NEO::Elf::ZebinKernelMetadata::Types::Kernel::InlineSamplers::InlineSamplerBaseT inlineSamplerSrc;
inlineSamplerSrc.samplerIndex = -1;
inlineSamplerSrc.addrMode = NEO::Elf::ZebinKernelMetadata::Types::Kernel::InlineSamplers::AddrMode::None;
inlineSamplerSrc.filterMode = NEO::Elf::ZebinKernelMetadata::Types::Kernel::InlineSamplers::FilterMode::Nearest;
auto err = populateInlineSamplers(inlineSamplerSrc, kd, errors, warnings);
EXPECT_EQ(NEO::DecodeError::InvalidBinary, err);
EXPECT_FALSE(errors.empty());
}
TEST(PopulateInlineSamplers, GivenInvalidAddrModeThenPopulateInlineSamplersFails) {
NEO::KernelDescriptor kd;
std::string errors, warnings;
NEO::Elf::ZebinKernelMetadata::Types::Kernel::InlineSamplers::InlineSamplerBaseT inlineSamplerSrc;
inlineSamplerSrc.samplerIndex = 0;
inlineSamplerSrc.addrMode = NEO::Elf::ZebinKernelMetadata::Types::Kernel::InlineSamplers::AddrMode::Unknown;
inlineSamplerSrc.filterMode = NEO::Elf::ZebinKernelMetadata::Types::Kernel::InlineSamplers::FilterMode::Nearest;
auto err = populateInlineSamplers(inlineSamplerSrc, kd, errors, warnings);
EXPECT_EQ(NEO::DecodeError::InvalidBinary, err);
EXPECT_FALSE(errors.empty());
}
TEST(PopulateInlineSamplers, GivenInvalidFilterModeThenPopulateInlineSamplersFails) {
NEO::KernelDescriptor kd;
std::string errors, warnings;
NEO::Elf::ZebinKernelMetadata::Types::Kernel::InlineSamplers::InlineSamplerBaseT inlineSamplerSrc;
inlineSamplerSrc.samplerIndex = 0;
inlineSamplerSrc.addrMode = NEO::Elf::ZebinKernelMetadata::Types::Kernel::InlineSamplers::AddrMode::None;
inlineSamplerSrc.filterMode = NEO::Elf::ZebinKernelMetadata::Types::Kernel::InlineSamplers::FilterMode::Unknown;
auto err = populateInlineSamplers(inlineSamplerSrc, kd, errors, warnings);
EXPECT_EQ(NEO::DecodeError::InvalidBinary, err);
EXPECT_FALSE(errors.empty());
}
TEST(ReadZeInfoInlineSamplers, GivenUnknownEntryThenPrintWarning) {
NEO::ConstStringRef yaml = R"===(---
kernels:
- name: some_kernel
inline_samplers:
- sampler_index: 0
new_entry: 3
...
)===";
std::string parserErrors;
std::string parserWarnings;
NEO::Yaml::YamlParser parser;
bool success = parser.parse(yaml, parserErrors, parserWarnings);
ASSERT_TRUE(success);
auto &inlineSamplersNode = *parser.findNodeWithKeyDfs("inline_samplers");
std::string errors;
std::string warnings;
ZeInfoInlineSamplers inlineSamplers;
int32_t maxSamplerIndex = -1;
auto err = NEO::readZeInfoInlineSamplers(parser,
inlineSamplersNode,
inlineSamplers,
maxSamplerIndex,
"some_kernel",
errors,
warnings);
EXPECT_EQ(NEO::DecodeError::Success, err);
EXPECT_FALSE(warnings.empty());
EXPECT_TRUE(errors.empty()) << errors;
}