feat(zebin): add eu_thread_count exec env attrib

Signed-off-by: Krystian Chmielewski <krystian.chmielewski@intel.com>
This commit is contained in:
Krystian Chmielewski 2022-11-22 15:49:55 +00:00 committed by Compute-Runtime-Automation
parent 82952a7d99
commit 0e5b0b1173
3 changed files with 9 additions and 0 deletions

View File

@ -138,6 +138,7 @@ constexpr ConstStringRef inlineSamplers("inline_samplers");
namespace ExecutionEnv {
constexpr ConstStringRef barrierCount("barrier_count");
constexpr ConstStringRef disableMidThreadPreemption("disable_mid_thread_preemption");
constexpr ConstStringRef euThreadCount("eu_thread_count");
constexpr ConstStringRef grfCount("grf_count");
constexpr ConstStringRef has4gbBuffers("has_4gb_buffers");
constexpr ConstStringRef hasDpas("has_dpas");
@ -402,6 +403,7 @@ enum ThreadSchedulingMode : uint8_t {
using ActualKernelStartOffsetT = int32_t;
using BarrierCountT = int32_t;
using EuThreadCountT = int32_t;
using DisableMidThreadPreemptionT = bool;
using GrfCountT = int32_t;
using Has4GBBuffersT = bool;
@ -431,6 +433,7 @@ using IndirectStatelessCountT = int32_t;
namespace Defaults {
constexpr BarrierCountT barrierCount = 0;
constexpr DisableMidThreadPreemptionT disableMidThreadPreemption = false;
constexpr EuThreadCountT euThreadCount = 0;
constexpr Has4GBBuffersT has4GBBuffers = false;
constexpr HasDpasT hasDpas = false;
constexpr HasFenceForImageAccessT hasFenceForImageAccess = false;
@ -462,6 +465,7 @@ constexpr ConstStringRef required[] = {
struct ExecutionEnvBaseT {
BarrierCountT barrierCount = Defaults::barrierCount;
DisableMidThreadPreemptionT disableMidThreadPreemption = Defaults::disableMidThreadPreemption;
EuThreadCountT euThreadCount = Defaults::euThreadCount;
GrfCountT grfCount = -1;
Has4GBBuffersT has4GBBuffers = Defaults::has4GBBuffers;
HasDpasT hasDpas = Defaults::hasDpas;

View File

@ -367,6 +367,8 @@ DecodeError readZeInfoExecutionEnvironment(const NEO::Yaml::YamlParser &parser,
validExecEnv = validExecEnv & readZeInfoValueChecked(parser, execEnvMetadataNd, outExecEnv.barrierCount, context, outErrReason);
} else if (NEO::Elf::ZebinKernelMetadata::Tags::Kernel::ExecutionEnv::disableMidThreadPreemption == key) {
validExecEnv = validExecEnv & readZeInfoValueChecked(parser, execEnvMetadataNd, outExecEnv.disableMidThreadPreemption, context, outErrReason);
} else if (NEO::Elf::ZebinKernelMetadata::Tags::Kernel::ExecutionEnv::euThreadCount == key) {
validExecEnv &= readZeInfoValueChecked(parser, execEnvMetadataNd, outExecEnv.euThreadCount, context, outErrReason);
} else if (NEO::Elf::ZebinKernelMetadata::Tags::Kernel::ExecutionEnv::grfCount == key) {
validExecEnv = validExecEnv & readZeInfoValueChecked(parser, execEnvMetadataNd, outExecEnv.grfCount, context, outErrReason);
} else if (NEO::Elf::ZebinKernelMetadata::Tags::Kernel::ExecutionEnv::has4gbBuffers == key) {
@ -1298,6 +1300,7 @@ NEO::DecodeError populateKernelDescriptor(NEO::ProgramInfo &dst, NEO::Elf::Elf<n
kernelDescriptor.kernelAttributes.workgroupWalkOrder[1] = static_cast<uint8_t>(execEnv.workgroupWalkOrderDimensions[1]);
kernelDescriptor.kernelAttributes.workgroupWalkOrder[2] = static_cast<uint8_t>(execEnv.workgroupWalkOrderDimensions[2]);
kernelDescriptor.kernelAttributes.hasIndirectStatelessAccess = (execEnv.indirectStatelessCount > 0);
kernelDescriptor.kernelAttributes.numThreadsRequired = static_cast<uint32_t>(execEnv.euThreadCount);
using ThreadSchedulingMode = NEO::Elf::ZebinKernelMetadata::Types::Kernel::ExecutionEnv::ThreadSchedulingMode;
switch (execEnv.threadSchedulingMode) {

View File

@ -4247,6 +4247,7 @@ kernels:
simd_size : 32
slm_size : 1024
subgroup_independent_forward_progress : true
eu_thread_count : 8
required_work_group_size:
- 8
- 2
@ -4295,6 +4296,7 @@ kernels:
EXPECT_EQ(32U, kernelDescriptor.kernelAttributes.simdSize);
EXPECT_EQ(1024U, kernelDescriptor.kernelAttributes.slmInlineSize);
EXPECT_TRUE(kernelDescriptor.kernelAttributes.flags.requiresSubgroupIndependentForwardProgress);
EXPECT_EQ(8U, kernelDescriptor.kernelAttributes.numThreadsRequired);
EXPECT_EQ(8U, kernelDescriptor.kernelAttributes.requiredWorkgroupSize[0]);
EXPECT_EQ(2U, kernelDescriptor.kernelAttributes.requiredWorkgroupSize[1]);
EXPECT_EQ(1U, kernelDescriptor.kernelAttributes.requiredWorkgroupSize[2]);