feat(zebin): add eu_thread_count exec env attrib
Signed-off-by: Krystian Chmielewski <krystian.chmielewski@intel.com>
This commit is contained in:
parent
82952a7d99
commit
0e5b0b1173
|
@ -138,6 +138,7 @@ constexpr ConstStringRef inlineSamplers("inline_samplers");
|
||||||
namespace ExecutionEnv {
|
namespace ExecutionEnv {
|
||||||
constexpr ConstStringRef barrierCount("barrier_count");
|
constexpr ConstStringRef barrierCount("barrier_count");
|
||||||
constexpr ConstStringRef disableMidThreadPreemption("disable_mid_thread_preemption");
|
constexpr ConstStringRef disableMidThreadPreemption("disable_mid_thread_preemption");
|
||||||
|
constexpr ConstStringRef euThreadCount("eu_thread_count");
|
||||||
constexpr ConstStringRef grfCount("grf_count");
|
constexpr ConstStringRef grfCount("grf_count");
|
||||||
constexpr ConstStringRef has4gbBuffers("has_4gb_buffers");
|
constexpr ConstStringRef has4gbBuffers("has_4gb_buffers");
|
||||||
constexpr ConstStringRef hasDpas("has_dpas");
|
constexpr ConstStringRef hasDpas("has_dpas");
|
||||||
|
@ -402,6 +403,7 @@ enum ThreadSchedulingMode : uint8_t {
|
||||||
|
|
||||||
using ActualKernelStartOffsetT = int32_t;
|
using ActualKernelStartOffsetT = int32_t;
|
||||||
using BarrierCountT = int32_t;
|
using BarrierCountT = int32_t;
|
||||||
|
using EuThreadCountT = int32_t;
|
||||||
using DisableMidThreadPreemptionT = bool;
|
using DisableMidThreadPreemptionT = bool;
|
||||||
using GrfCountT = int32_t;
|
using GrfCountT = int32_t;
|
||||||
using Has4GBBuffersT = bool;
|
using Has4GBBuffersT = bool;
|
||||||
|
@ -431,6 +433,7 @@ using IndirectStatelessCountT = int32_t;
|
||||||
namespace Defaults {
|
namespace Defaults {
|
||||||
constexpr BarrierCountT barrierCount = 0;
|
constexpr BarrierCountT barrierCount = 0;
|
||||||
constexpr DisableMidThreadPreemptionT disableMidThreadPreemption = false;
|
constexpr DisableMidThreadPreemptionT disableMidThreadPreemption = false;
|
||||||
|
constexpr EuThreadCountT euThreadCount = 0;
|
||||||
constexpr Has4GBBuffersT has4GBBuffers = false;
|
constexpr Has4GBBuffersT has4GBBuffers = false;
|
||||||
constexpr HasDpasT hasDpas = false;
|
constexpr HasDpasT hasDpas = false;
|
||||||
constexpr HasFenceForImageAccessT hasFenceForImageAccess = false;
|
constexpr HasFenceForImageAccessT hasFenceForImageAccess = false;
|
||||||
|
@ -462,6 +465,7 @@ constexpr ConstStringRef required[] = {
|
||||||
struct ExecutionEnvBaseT {
|
struct ExecutionEnvBaseT {
|
||||||
BarrierCountT barrierCount = Defaults::barrierCount;
|
BarrierCountT barrierCount = Defaults::barrierCount;
|
||||||
DisableMidThreadPreemptionT disableMidThreadPreemption = Defaults::disableMidThreadPreemption;
|
DisableMidThreadPreemptionT disableMidThreadPreemption = Defaults::disableMidThreadPreemption;
|
||||||
|
EuThreadCountT euThreadCount = Defaults::euThreadCount;
|
||||||
GrfCountT grfCount = -1;
|
GrfCountT grfCount = -1;
|
||||||
Has4GBBuffersT has4GBBuffers = Defaults::has4GBBuffers;
|
Has4GBBuffersT has4GBBuffers = Defaults::has4GBBuffers;
|
||||||
HasDpasT hasDpas = Defaults::hasDpas;
|
HasDpasT hasDpas = Defaults::hasDpas;
|
||||||
|
|
|
@ -367,6 +367,8 @@ DecodeError readZeInfoExecutionEnvironment(const NEO::Yaml::YamlParser &parser,
|
||||||
validExecEnv = validExecEnv & readZeInfoValueChecked(parser, execEnvMetadataNd, outExecEnv.barrierCount, context, outErrReason);
|
validExecEnv = validExecEnv & readZeInfoValueChecked(parser, execEnvMetadataNd, outExecEnv.barrierCount, context, outErrReason);
|
||||||
} else if (NEO::Elf::ZebinKernelMetadata::Tags::Kernel::ExecutionEnv::disableMidThreadPreemption == key) {
|
} else if (NEO::Elf::ZebinKernelMetadata::Tags::Kernel::ExecutionEnv::disableMidThreadPreemption == key) {
|
||||||
validExecEnv = validExecEnv & readZeInfoValueChecked(parser, execEnvMetadataNd, outExecEnv.disableMidThreadPreemption, context, outErrReason);
|
validExecEnv = validExecEnv & readZeInfoValueChecked(parser, execEnvMetadataNd, outExecEnv.disableMidThreadPreemption, context, outErrReason);
|
||||||
|
} else if (NEO::Elf::ZebinKernelMetadata::Tags::Kernel::ExecutionEnv::euThreadCount == key) {
|
||||||
|
validExecEnv &= readZeInfoValueChecked(parser, execEnvMetadataNd, outExecEnv.euThreadCount, context, outErrReason);
|
||||||
} else if (NEO::Elf::ZebinKernelMetadata::Tags::Kernel::ExecutionEnv::grfCount == key) {
|
} else if (NEO::Elf::ZebinKernelMetadata::Tags::Kernel::ExecutionEnv::grfCount == key) {
|
||||||
validExecEnv = validExecEnv & readZeInfoValueChecked(parser, execEnvMetadataNd, outExecEnv.grfCount, context, outErrReason);
|
validExecEnv = validExecEnv & readZeInfoValueChecked(parser, execEnvMetadataNd, outExecEnv.grfCount, context, outErrReason);
|
||||||
} else if (NEO::Elf::ZebinKernelMetadata::Tags::Kernel::ExecutionEnv::has4gbBuffers == key) {
|
} else if (NEO::Elf::ZebinKernelMetadata::Tags::Kernel::ExecutionEnv::has4gbBuffers == key) {
|
||||||
|
@ -1298,6 +1300,7 @@ NEO::DecodeError populateKernelDescriptor(NEO::ProgramInfo &dst, NEO::Elf::Elf<n
|
||||||
kernelDescriptor.kernelAttributes.workgroupWalkOrder[1] = static_cast<uint8_t>(execEnv.workgroupWalkOrderDimensions[1]);
|
kernelDescriptor.kernelAttributes.workgroupWalkOrder[1] = static_cast<uint8_t>(execEnv.workgroupWalkOrderDimensions[1]);
|
||||||
kernelDescriptor.kernelAttributes.workgroupWalkOrder[2] = static_cast<uint8_t>(execEnv.workgroupWalkOrderDimensions[2]);
|
kernelDescriptor.kernelAttributes.workgroupWalkOrder[2] = static_cast<uint8_t>(execEnv.workgroupWalkOrderDimensions[2]);
|
||||||
kernelDescriptor.kernelAttributes.hasIndirectStatelessAccess = (execEnv.indirectStatelessCount > 0);
|
kernelDescriptor.kernelAttributes.hasIndirectStatelessAccess = (execEnv.indirectStatelessCount > 0);
|
||||||
|
kernelDescriptor.kernelAttributes.numThreadsRequired = static_cast<uint32_t>(execEnv.euThreadCount);
|
||||||
|
|
||||||
using ThreadSchedulingMode = NEO::Elf::ZebinKernelMetadata::Types::Kernel::ExecutionEnv::ThreadSchedulingMode;
|
using ThreadSchedulingMode = NEO::Elf::ZebinKernelMetadata::Types::Kernel::ExecutionEnv::ThreadSchedulingMode;
|
||||||
switch (execEnv.threadSchedulingMode) {
|
switch (execEnv.threadSchedulingMode) {
|
||||||
|
|
|
@ -4247,6 +4247,7 @@ kernels:
|
||||||
simd_size : 32
|
simd_size : 32
|
||||||
slm_size : 1024
|
slm_size : 1024
|
||||||
subgroup_independent_forward_progress : true
|
subgroup_independent_forward_progress : true
|
||||||
|
eu_thread_count : 8
|
||||||
required_work_group_size:
|
required_work_group_size:
|
||||||
- 8
|
- 8
|
||||||
- 2
|
- 2
|
||||||
|
@ -4295,6 +4296,7 @@ kernels:
|
||||||
EXPECT_EQ(32U, kernelDescriptor.kernelAttributes.simdSize);
|
EXPECT_EQ(32U, kernelDescriptor.kernelAttributes.simdSize);
|
||||||
EXPECT_EQ(1024U, kernelDescriptor.kernelAttributes.slmInlineSize);
|
EXPECT_EQ(1024U, kernelDescriptor.kernelAttributes.slmInlineSize);
|
||||||
EXPECT_TRUE(kernelDescriptor.kernelAttributes.flags.requiresSubgroupIndependentForwardProgress);
|
EXPECT_TRUE(kernelDescriptor.kernelAttributes.flags.requiresSubgroupIndependentForwardProgress);
|
||||||
|
EXPECT_EQ(8U, kernelDescriptor.kernelAttributes.numThreadsRequired);
|
||||||
EXPECT_EQ(8U, kernelDescriptor.kernelAttributes.requiredWorkgroupSize[0]);
|
EXPECT_EQ(8U, kernelDescriptor.kernelAttributes.requiredWorkgroupSize[0]);
|
||||||
EXPECT_EQ(2U, kernelDescriptor.kernelAttributes.requiredWorkgroupSize[1]);
|
EXPECT_EQ(2U, kernelDescriptor.kernelAttributes.requiredWorkgroupSize[1]);
|
||||||
EXPECT_EQ(1U, kernelDescriptor.kernelAttributes.requiredWorkgroupSize[2]);
|
EXPECT_EQ(1U, kernelDescriptor.kernelAttributes.requiredWorkgroupSize[2]);
|
||||||
|
|
Loading…
Reference in New Issue