431 lines
16 KiB
C++
431 lines
16 KiB
C++
/*
|
|
* Copyright (C) 2020-2021 Intel Corporation
|
|
*
|
|
* SPDX-License-Identifier: MIT
|
|
*
|
|
*/
|
|
|
|
#pragma once
|
|
|
|
#include "shared/source/device_binary_format/elf/elf.h"
|
|
#include "shared/source/utilities/const_stringref.h"
|
|
|
|
#include <inttypes.h>
|
|
#include <stddef.h>
|
|
|
|
namespace NEO {
|
|
|
|
namespace Elf {
|
|
|
|
enum ELF_TYPE_ZEBIN : uint16_t {
|
|
ET_ZEBIN_REL = 0xff11, // A relocatable ZE binary file
|
|
ET_ZEBIN_EXE = 0xff12, // An executable ZE binary file
|
|
ET_ZEBIN_DYN = 0xff13, // A shared object ZE binary file
|
|
};
|
|
|
|
enum SHT_ZEBIN : uint32_t {
|
|
SHT_ZEBIN_SPIRV = 0xff000009, // .spv.kernel section, value the same as SHT_OPENCL_SPIRV
|
|
SHT_ZEBIN_ZEINFO = 0xff000011, // .ze_info section
|
|
SHT_ZEBIN_GTPIN_INFO = 0xff000012 // .gtpin_info section
|
|
};
|
|
|
|
namespace SectionsNamesZebin {
|
|
static constexpr ConstStringRef textPrefix = ".text.";
|
|
static constexpr ConstStringRef dataConst = ".data.const";
|
|
static constexpr ConstStringRef dataGlobalConst = ".data.global_const";
|
|
static constexpr ConstStringRef dataGlobal = ".data.global";
|
|
static constexpr ConstStringRef symtab = ".symtab";
|
|
static constexpr ConstStringRef relTablePrefix = ".rel.";
|
|
static constexpr ConstStringRef spv = ".spv";
|
|
static constexpr ConstStringRef debugInfo = ".debug_info";
|
|
static constexpr ConstStringRef zeInfo = ".ze_info";
|
|
static constexpr ConstStringRef gtpinInfo = ".gtpin_info";
|
|
} // namespace SectionsNamesZebin
|
|
|
|
struct ZebinTargetFlags {
|
|
union {
|
|
struct {
|
|
// bit[7:0]: dedicated for specific generator (meaning based on generatorId)
|
|
uint8_t generatorSpecificFlags : 8;
|
|
|
|
// bit[12:8]: values [0-31], min compatbile device revision Id (stepping)
|
|
uint8_t minHwRevisionId : 5;
|
|
|
|
// bit[13:13]:
|
|
// 0 - full validation during decoding (safer decoding)
|
|
// 1 - no validation (faster decoding - recommended for known generators)
|
|
bool validateRevisionId : 1;
|
|
|
|
// bit[14:14]:
|
|
// 0 - ignore minHwRevisionId and maxHwRevisionId
|
|
// 1 - underlying device must match specified revisionId info
|
|
bool disableExtendedValidation : 1;
|
|
|
|
// bit[15:15]:
|
|
// 0 - elfFileHeader::machine is PRODUCT_FAMILY
|
|
// 1 - elfFileHeader::machine is GFXCORE_FAMILY
|
|
bool machineEntryUsesGfxCoreInsteadOfProductFamily : 1;
|
|
|
|
// bit[20:16]: max compatbile device revision Id (stepping)
|
|
uint8_t maxHwRevisionId : 5;
|
|
|
|
// bit[23:21]: generator of this device binary
|
|
// 0 - Unregistered
|
|
// 1 - IGC
|
|
uint8_t generatorId : 3;
|
|
|
|
// bit[31:24]: MBZ, reserved for future use
|
|
uint8_t reserved : 8;
|
|
};
|
|
uint32_t packed = 0U;
|
|
};
|
|
};
|
|
static_assert(sizeof(ZebinTargetFlags) == sizeof(uint32_t), "");
|
|
|
|
namespace ZebinKernelMetadata {
|
|
namespace Tags {
|
|
static constexpr ConstStringRef kernels("kernels");
|
|
static constexpr ConstStringRef version("version");
|
|
namespace Kernel {
|
|
static constexpr ConstStringRef name("name");
|
|
static constexpr ConstStringRef executionEnv("execution_env");
|
|
static constexpr ConstStringRef payloadArguments("payload_arguments");
|
|
static constexpr ConstStringRef bindingTableIndices("binding_table_indices");
|
|
static constexpr ConstStringRef perThreadPayloadArguments("per_thread_payload_arguments");
|
|
static constexpr ConstStringRef perThreadMemoryBuffers("per_thread_memory_buffers");
|
|
static constexpr ConstStringRef experimentalProperties("experimental_properties");
|
|
|
|
namespace ExecutionEnv {
|
|
static constexpr ConstStringRef actualKernelStartOffset("actual_kernel_start_offset");
|
|
static constexpr ConstStringRef barrierCount("barrier_count");
|
|
static constexpr ConstStringRef disableMidThreadPreemption("disable_mid_thread_preemption");
|
|
static constexpr ConstStringRef grfCount("grf_count");
|
|
static constexpr ConstStringRef has4gbBuffers("has_4gb_buffers");
|
|
static constexpr ConstStringRef hasDeviceEnqueue("has_device_enqueue");
|
|
static constexpr ConstStringRef hasFenceForImageAccess("has_fence_for_image_access");
|
|
static constexpr ConstStringRef hasGlobalAtomics("has_global_atomics");
|
|
static constexpr ConstStringRef hasMultiScratchSpaces("has_multi_scratch_spaces");
|
|
static constexpr ConstStringRef hasNoStatelessWrite("has_no_stateless_write");
|
|
static constexpr ConstStringRef hwPreemptionMode("hw_preemption_mode");
|
|
static constexpr ConstStringRef offsetToSkipPerThreadDataLoad("offset_to_skip_per_thread_data_load");
|
|
static constexpr ConstStringRef offsetToSkipSetFfidGp("offset_to_skip_set_ffid_gp");
|
|
static constexpr ConstStringRef requiredSubGroupSize("required_sub_group_size");
|
|
static constexpr ConstStringRef requiredWorkGroupSize("required_work_group_size");
|
|
static constexpr ConstStringRef simdSize("simd_size");
|
|
static constexpr ConstStringRef slmSize("slm_size");
|
|
static constexpr ConstStringRef subgroupIndependentForwardProgress("subgroup_independent_forward_progress");
|
|
static constexpr ConstStringRef workGroupWalkOrderDimensions("work_group_walk_order_dimensions");
|
|
} // namespace ExecutionEnv
|
|
|
|
namespace PayloadArgument {
|
|
static constexpr ConstStringRef argType("arg_type");
|
|
static constexpr ConstStringRef argIndex("arg_index");
|
|
static constexpr ConstStringRef offset("offset");
|
|
static constexpr ConstStringRef size("size");
|
|
static constexpr ConstStringRef addrmode("addrmode");
|
|
static constexpr ConstStringRef addrspace("addrspace");
|
|
static constexpr ConstStringRef accessType("access_type");
|
|
static constexpr ConstStringRef samplerIndex("sampler_index");
|
|
namespace ArgType {
|
|
static constexpr ConstStringRef localSize("local_size");
|
|
static constexpr ConstStringRef groupCount("group_count");
|
|
static constexpr ConstStringRef globalIdOffset("global_id_offset");
|
|
static constexpr ConstStringRef globalSize("global_size");
|
|
static constexpr ConstStringRef enqueuedLocalSize("enqueued_local_size");
|
|
static constexpr ConstStringRef privateBaseStateless("private_base_stateless");
|
|
static constexpr ConstStringRef argByvalue("arg_byvalue");
|
|
static constexpr ConstStringRef argBypointer("arg_bypointer");
|
|
static constexpr ConstStringRef bufferOffset("buffer_offset");
|
|
static constexpr ConstStringRef printfBuffer("printf_buffer");
|
|
} // namespace ArgType
|
|
namespace MemoryAddressingMode {
|
|
static constexpr ConstStringRef stateless("stateless");
|
|
static constexpr ConstStringRef stateful("stateful");
|
|
static constexpr ConstStringRef bindless("bindless");
|
|
static constexpr ConstStringRef sharedLocalMemory("slm");
|
|
} // namespace MemoryAddressingMode
|
|
namespace AddrSpace {
|
|
static constexpr ConstStringRef global("global");
|
|
static constexpr ConstStringRef local("local");
|
|
static constexpr ConstStringRef constant("constant");
|
|
static constexpr ConstStringRef image("image");
|
|
static constexpr ConstStringRef sampler("sampler");
|
|
} // namespace AddrSpace
|
|
namespace AccessType {
|
|
static constexpr ConstStringRef readonly("readonly");
|
|
static constexpr ConstStringRef writeonly("writeonly");
|
|
static constexpr ConstStringRef readwrite("readwrite");
|
|
} // namespace AccessType
|
|
} // namespace PayloadArgument
|
|
|
|
namespace BindingTableIndex {
|
|
static constexpr ConstStringRef btiValue("bti_value");
|
|
static constexpr ConstStringRef argIndex("arg_index");
|
|
} // namespace BindingTableIndex
|
|
|
|
namespace PerThreadPayloadArgument {
|
|
static constexpr ConstStringRef argType("arg_type");
|
|
static constexpr ConstStringRef offset("offset");
|
|
static constexpr ConstStringRef size("size");
|
|
namespace ArgType {
|
|
static constexpr ConstStringRef packedLocalIds("packed_local_ids");
|
|
static constexpr ConstStringRef localId("local_id");
|
|
} // namespace ArgType
|
|
} // namespace PerThreadPayloadArgument
|
|
|
|
namespace PerThreadMemoryBuffer {
|
|
static constexpr ConstStringRef allocationType("type");
|
|
static constexpr ConstStringRef memoryUsage("usage");
|
|
static constexpr ConstStringRef size("size");
|
|
static constexpr ConstStringRef isSimtThread("is_simt_thread");
|
|
static constexpr ConstStringRef slot("slot");
|
|
namespace AllocationType {
|
|
static constexpr ConstStringRef global("global");
|
|
static constexpr ConstStringRef scratch("scratch");
|
|
static constexpr ConstStringRef slm("slm");
|
|
} // namespace AllocationType
|
|
namespace MemoryUsage {
|
|
static constexpr ConstStringRef privateSpace("private_space");
|
|
static constexpr ConstStringRef spillFillSpace("spill_fill_space");
|
|
static constexpr ConstStringRef singleSpace("single_space");
|
|
} // namespace MemoryUsage
|
|
} // namespace PerThreadMemoryBuffer
|
|
namespace ExperimentalProperties {
|
|
static constexpr ConstStringRef hasNonKernelArgLoad("has_non_kernel_arg_load");
|
|
static constexpr ConstStringRef hasNonKernelArgStore("has_non_kernel_arg_store");
|
|
static constexpr ConstStringRef hasNonKernelArgAtomic("has_non_kernel_arg_atomic");
|
|
} // namespace ExperimentalProperties
|
|
} // namespace Kernel
|
|
} // namespace Tags
|
|
|
|
namespace Types {
|
|
|
|
struct Version {
|
|
uint32_t major = 0U;
|
|
uint32_t minor = 0U;
|
|
};
|
|
|
|
namespace Kernel {
|
|
namespace ExecutionEnv {
|
|
using ActualKernelStartOffsetT = int32_t;
|
|
using BarrierCountT = int32_t;
|
|
using DisableMidThreadPreemptionT = bool;
|
|
using GrfCountT = int32_t;
|
|
using Has4GBBuffersT = bool;
|
|
using HasDeviceEnqueueT = bool;
|
|
using HasFenceForImageAccessT = bool;
|
|
using HasGlobalAtomicsT = bool;
|
|
using HasMultiScratchSpacesT = bool;
|
|
using HasNoStatelessWriteT = bool;
|
|
using HwPreemptionModeT = int32_t;
|
|
using OffsetToSkipPerThreadDataLoadT = int32_t;
|
|
using OffsetToSkipSetFfidGpT = int32_t;
|
|
using RequiredSubGroupSizeT = int32_t;
|
|
using RequiredWorkGroupSizeT = int32_t[3];
|
|
using SimdSizeT = int32_t;
|
|
using SlmSizeT = int32_t;
|
|
using SubgroupIndependentForwardProgressT = bool;
|
|
using WorkgroupWalkOrderDimensionsT = int32_t[3];
|
|
using HasNonKernelArgLoad = int32_t;
|
|
using HasNonKernelArgStore = int32_t;
|
|
using HasNonKernelArgAtomic = int32_t;
|
|
|
|
namespace Defaults {
|
|
static constexpr BarrierCountT barrierCount = 0;
|
|
static constexpr DisableMidThreadPreemptionT disableMidThreadPreemption = false;
|
|
static constexpr Has4GBBuffersT has4GBBuffers = false;
|
|
static constexpr HasDeviceEnqueueT hasDeviceEnqueue = false;
|
|
static constexpr HasFenceForImageAccessT hasFenceForImageAccess = false;
|
|
static constexpr HasGlobalAtomicsT hasGlobalAtomics = false;
|
|
static constexpr HasMultiScratchSpacesT hasMultiScratchSpaces = false;
|
|
static constexpr HasNoStatelessWriteT hasNoStatelessWrite = false;
|
|
static constexpr HwPreemptionModeT hwPreemptionMode = -1;
|
|
static constexpr OffsetToSkipPerThreadDataLoadT offsetToSkipPerThreadDataLoad = 0;
|
|
static constexpr OffsetToSkipSetFfidGpT offsetToSkipSetFfidGp = 0;
|
|
static constexpr RequiredSubGroupSizeT requiredSubGroupSize = 0;
|
|
static constexpr RequiredWorkGroupSizeT requiredWorkGroupSize = {0, 0, 0};
|
|
static constexpr SlmSizeT slmSize = 0;
|
|
static constexpr SubgroupIndependentForwardProgressT subgroupIndependentForwardProgress = false;
|
|
static constexpr WorkgroupWalkOrderDimensionsT workgroupWalkOrderDimensions = {0, 1, 2};
|
|
static constexpr HasNonKernelArgLoad hasNonKernelArgLoad = false;
|
|
static constexpr HasNonKernelArgStore hasNonKernelArgStore = false;
|
|
static constexpr HasNonKernelArgAtomic hasNonKernelArgAtomic = false;
|
|
} // namespace Defaults
|
|
|
|
static constexpr ConstStringRef required[] = {
|
|
Tags::Kernel::ExecutionEnv::actualKernelStartOffset,
|
|
Tags::Kernel::ExecutionEnv::grfCount,
|
|
Tags::Kernel::ExecutionEnv::simdSize};
|
|
|
|
struct ExecutionEnvBaseT {
|
|
ActualKernelStartOffsetT actualKernelStartOffset = -1;
|
|
BarrierCountT barrierCount = Defaults::barrierCount;
|
|
DisableMidThreadPreemptionT disableMidThreadPreemption = Defaults::disableMidThreadPreemption;
|
|
GrfCountT grfCount = -1;
|
|
Has4GBBuffersT has4GBBuffers = Defaults::has4GBBuffers;
|
|
HasDeviceEnqueueT hasDeviceEnqueue = Defaults::hasDeviceEnqueue;
|
|
HasFenceForImageAccessT hasFenceForImageAccess = Defaults::hasFenceForImageAccess;
|
|
HasGlobalAtomicsT hasGlobalAtomics = Defaults::hasGlobalAtomics;
|
|
HasMultiScratchSpacesT hasMultiScratchSpaces = Defaults::hasMultiScratchSpaces;
|
|
HasNoStatelessWriteT hasNoStatelessWrite = Defaults::hasNoStatelessWrite;
|
|
HwPreemptionModeT hwPreemptionMode = Defaults::hwPreemptionMode;
|
|
OffsetToSkipPerThreadDataLoadT offsetToSkipPerThreadDataLoad = Defaults::offsetToSkipPerThreadDataLoad;
|
|
OffsetToSkipSetFfidGpT offsetToSkipSetFfidGp = Defaults::offsetToSkipSetFfidGp;
|
|
RequiredSubGroupSizeT requiredSubGroupSize = Defaults::requiredSubGroupSize;
|
|
RequiredWorkGroupSizeT requiredWorkGroupSize = {Defaults::requiredWorkGroupSize[0], Defaults::requiredWorkGroupSize[1], Defaults::requiredWorkGroupSize[2]};
|
|
SimdSizeT simdSize = -1;
|
|
SlmSizeT slmSize = Defaults::slmSize;
|
|
SubgroupIndependentForwardProgressT subgroupIndependentForwardProgress = Defaults::subgroupIndependentForwardProgress;
|
|
WorkgroupWalkOrderDimensionsT workgroupWalkOrderDimensions{Defaults::workgroupWalkOrderDimensions[0], Defaults::workgroupWalkOrderDimensions[1], Defaults::workgroupWalkOrderDimensions[2]};
|
|
};
|
|
|
|
struct ExperimentalPropertiesBaseT {
|
|
HasNonKernelArgLoad hasNonKernelArgLoad = Defaults::hasNonKernelArgLoad;
|
|
HasNonKernelArgStore hasNonKernelArgStore = Defaults::hasNonKernelArgStore;
|
|
HasNonKernelArgAtomic hasNonKernelArgAtomic = Defaults::hasNonKernelArgAtomic;
|
|
};
|
|
|
|
} // namespace ExecutionEnv
|
|
|
|
enum ArgType : uint8_t {
|
|
ArgTypeUnknown = 0,
|
|
ArgTypePackedLocalIds = 1,
|
|
ArgTypeLocalId,
|
|
ArgTypeLocalSize,
|
|
ArgTypeGroupCount,
|
|
ArgTypeGlobalSize,
|
|
ArgTypeEnqueuedLocalSize,
|
|
ArgTypeGlobalIdOffset,
|
|
ArgTypePrivateBaseStateless,
|
|
ArgTypeArgByvalue,
|
|
ArgTypeArgBypointer,
|
|
ArgTypeBufferOffset,
|
|
ArgTypePrintfBuffer
|
|
};
|
|
|
|
namespace PerThreadPayloadArgument {
|
|
|
|
using OffsetT = int32_t;
|
|
using SizeT = int32_t;
|
|
using ArgTypeT = ArgType;
|
|
|
|
namespace Defaults {
|
|
}
|
|
|
|
struct PerThreadPayloadArgumentBaseT {
|
|
ArgTypeT argType = ArgTypeUnknown;
|
|
OffsetT offset = -1;
|
|
SizeT size = -1;
|
|
};
|
|
|
|
} // namespace PerThreadPayloadArgument
|
|
|
|
namespace PayloadArgument {
|
|
|
|
enum MemoryAddressingMode : uint8_t {
|
|
MemoryAddressingModeUnknown = 0,
|
|
MemoryAddressingModeStateful = 1,
|
|
MemoryAddressingModeStateless,
|
|
MemoryAddressingModeBindless,
|
|
MemoryAddressingModeSharedLocalMemory,
|
|
};
|
|
|
|
enum AddressSpace : uint8_t {
|
|
AddressSpaceUnknown = 0,
|
|
AddressSpaceGlobal = 1,
|
|
AddressSpaceLocal,
|
|
AddressSpaceConstant,
|
|
AddressSpaceImage,
|
|
AddressSpaceSampler,
|
|
};
|
|
|
|
enum AccessType : uint8_t {
|
|
AccessTypeUnknown = 0,
|
|
AccessTypeReadonly = 1,
|
|
AccessTypeWriteonly,
|
|
AccessTypeReadwrite,
|
|
};
|
|
|
|
using ArgTypeT = ArgType;
|
|
using OffseT = int32_t;
|
|
using SizeT = int32_t;
|
|
using ArgIndexT = int32_t;
|
|
using AddrmodeT = MemoryAddressingMode;
|
|
using AddrspaceT = AddressSpace;
|
|
using AccessTypeT = AccessType;
|
|
using SlmAlignment = uint8_t;
|
|
using SamplerIndexT = int32_t;
|
|
|
|
namespace Defaults {
|
|
static constexpr ArgIndexT argIndex = -1;
|
|
static constexpr SlmAlignment slmArgAlignment = 16U;
|
|
static constexpr SamplerIndexT samplerIndex = -1;
|
|
} // namespace Defaults
|
|
|
|
struct PayloadArgumentBaseT {
|
|
ArgTypeT argType = ArgTypeUnknown;
|
|
OffseT offset = 0;
|
|
SizeT size = 0;
|
|
ArgIndexT argIndex = Defaults::argIndex;
|
|
AddrmodeT addrmode = MemoryAddressingModeUnknown;
|
|
AddrspaceT addrspace = AddressSpaceUnknown;
|
|
AccessTypeT accessType = AccessTypeUnknown;
|
|
SamplerIndexT samplerIndex = Defaults::samplerIndex;
|
|
};
|
|
|
|
} // namespace PayloadArgument
|
|
|
|
namespace BindingTableEntry {
|
|
using BtiValueT = int32_t;
|
|
using ArgIndexT = int32_t;
|
|
struct BindingTableEntryBaseT {
|
|
BtiValueT btiValue = 0U;
|
|
ArgIndexT argIndex = 0U;
|
|
};
|
|
} // namespace BindingTableEntry
|
|
|
|
namespace PerThreadMemoryBuffer {
|
|
enum AllocationType : uint8_t {
|
|
AllocationTypeUnknown = 0,
|
|
AllocationTypeGlobal,
|
|
AllocationTypeScratch,
|
|
AllocationTypeSlm
|
|
};
|
|
|
|
enum MemoryUsage : uint8_t {
|
|
MemoryUsageUnknown = 0,
|
|
MemoryUsagePrivateSpace,
|
|
MemoryUsageSpillFillSpace,
|
|
MemoryUsageSingleSpace
|
|
};
|
|
|
|
using SizeT = int32_t;
|
|
using AllocationTypeT = AllocationType;
|
|
using MemoryUsageT = MemoryUsage;
|
|
using IsSimtThreadT = bool;
|
|
using Slot = int32_t;
|
|
|
|
namespace Defaults {
|
|
static constexpr IsSimtThreadT isSimtThread = false;
|
|
static constexpr Slot slot = 0U;
|
|
} // namespace Defaults
|
|
|
|
struct PerThreadMemoryBufferBaseT {
|
|
AllocationType allocationType = AllocationTypeUnknown;
|
|
MemoryUsageT memoryUsage = MemoryUsageUnknown;
|
|
SizeT size = 0U;
|
|
IsSimtThreadT isSimtThread = Defaults::isSimtThread;
|
|
Slot slot = Defaults::slot;
|
|
};
|
|
} // namespace PerThreadMemoryBuffer
|
|
|
|
} // namespace Kernel
|
|
|
|
} // namespace Types
|
|
|
|
} // namespace ZebinKernelMetadata
|
|
|
|
} // namespace Elf
|
|
|
|
} // namespace NEO
|