Disable EU fusion based on kernel properties from compiler

Related-To: NEO-6633

Signed-off-by: Konstanty Misiak <konstanty.misiak@intel.com>
This commit is contained in:
Konstanty Misiak
2022-02-10 23:33:40 +00:00
committed by Compute-Runtime-Automation
parent 13bc2300e1
commit cf1bc3a2ba
37 changed files with 228 additions and 95 deletions

View File

@@ -185,6 +185,7 @@ static constexpr ConstStringRef offsetToSkipPerThreadDataLoad("offset_to_skip_pe
static constexpr ConstStringRef offsetToSkipSetFfidGp("offset_to_skip_set_ffid_gp");
static constexpr ConstStringRef requiredSubGroupSize("required_sub_group_size");
static constexpr ConstStringRef requiredWorkGroupSize("required_work_group_size");
static constexpr ConstStringRef requireDisableEUFusion("require_disable_eufusion");
static constexpr ConstStringRef simdSize("simd_size");
static constexpr ConstStringRef slmSize("slm_size");
static constexpr ConstStringRef subgroupIndependentForwardProgress("subgroup_independent_forward_progress");
@@ -308,6 +309,7 @@ using OffsetToSkipPerThreadDataLoadT = int32_t;
using OffsetToSkipSetFfidGpT = int32_t;
using RequiredSubGroupSizeT = int32_t;
using RequiredWorkGroupSizeT = int32_t[3];
using RequireDisableEUFusionT = bool;
using SimdSizeT = int32_t;
using SlmSizeT = int32_t;
using SubgroupIndependentForwardProgressT = bool;
@@ -332,6 +334,7 @@ static constexpr OffsetToSkipPerThreadDataLoadT offsetToSkipPerThreadDataLoad =
static constexpr OffsetToSkipSetFfidGpT offsetToSkipSetFfidGp = 0;
static constexpr RequiredSubGroupSizeT requiredSubGroupSize = 0;
static constexpr RequiredWorkGroupSizeT requiredWorkGroupSize = {0, 0, 0};
static constexpr RequireDisableEUFusionT requireDisableEUFusion = false;
static constexpr SlmSizeT slmSize = 0;
static constexpr SubgroupIndependentForwardProgressT subgroupIndependentForwardProgress = false;
static constexpr WorkgroupWalkOrderDimensionsT workgroupWalkOrderDimensions = {0, 1, 2};
@@ -360,6 +363,7 @@ struct ExecutionEnvBaseT {
OffsetToSkipSetFfidGpT offsetToSkipSetFfidGp = Defaults::offsetToSkipSetFfidGp;
RequiredSubGroupSizeT requiredSubGroupSize = Defaults::requiredSubGroupSize;
RequiredWorkGroupSizeT requiredWorkGroupSize = {Defaults::requiredWorkGroupSize[0], Defaults::requiredWorkGroupSize[1], Defaults::requiredWorkGroupSize[2]};
RequireDisableEUFusionT requireDisableEUFusion = Defaults::requireDisableEUFusion;
SimdSizeT simdSize = -1;
SlmSizeT slmSize = Defaults::slmSize;
SubgroupIndependentForwardProgressT subgroupIndependentForwardProgress = Defaults::subgroupIndependentForwardProgress;

View File

@@ -406,6 +406,7 @@ void dump(const SPatchExecutionEnvironment &value, std::stringstream &out, const
out << indent << " uint32_t WorkgroupWalkOrderDims;// = " << value.WorkgroupWalkOrderDims << "\n";
out << indent << " uint32_t HasGlobalAtomics;// = " << value.HasGlobalAtomics << "\n";
out << indent << " uint32_t HasStackCalls;// = " << value.HasStackCalls << "\n";
out << indent << " uint32_t RequireDisableEUFusion;// = " << value.RequireDisableEUFusion << "\n";
out << indent << "}\n";
}

View File

@@ -281,6 +281,8 @@ DecodeError readZeInfoExecutionEnvironment(const NEO::Yaml::YamlParser &parser,
validExecEnv = validExecEnv & readZeInfoValueChecked(parser, execEnvMetadataNd, outExecEnv.requiredSubGroupSize, context, outErrReason);
} else if (NEO::Elf::ZebinKernelMetadata::Tags::Kernel::ExecutionEnv::requiredWorkGroupSize == key) {
validExecEnv = validExecEnv & readZeInfoValueCollectionChecked(outExecEnv.requiredWorkGroupSize, parser, execEnvMetadataNd, context, outErrReason);
} else if (NEO::Elf::ZebinKernelMetadata::Tags::Kernel::ExecutionEnv::requireDisableEUFusion == key) {
validExecEnv = validExecEnv & readZeInfoValueChecked(parser, execEnvMetadataNd, outExecEnv.requireDisableEUFusion, context, outErrReason);
} else if (NEO::Elf::ZebinKernelMetadata::Tags::Kernel::ExecutionEnv::simdSize == key) {
validExecEnv = validExecEnv & readZeInfoValueChecked(parser, execEnvMetadataNd, outExecEnv.simdSize, context, outErrReason);
} else if (NEO::Elf::ZebinKernelMetadata::Tags::Kernel::ExecutionEnv::slmSize == key) {
@@ -1032,6 +1034,7 @@ NEO::DecodeError populateKernelDescriptor(NEO::ProgramInfo &dst, NEO::Elf::Elf<N
kernelDescriptor.kernelAttributes.flags.passInlineData = (execEnv.inlineDataPayloadSize != 0);
kernelDescriptor.kernelAttributes.flags.requiresDisabledMidThreadPreemption = execEnv.disableMidThreadPreemption;
kernelDescriptor.kernelAttributes.flags.requiresSubgroupIndependentForwardProgress = execEnv.subgroupIndependentForwardProgress;
kernelDescriptor.kernelAttributes.flags.requiresDisabledEUFusion = execEnv.requireDisableEUFusion;
kernelDescriptor.kernelAttributes.flags.useGlobalAtomics = execEnv.hasGlobalAtomics;
kernelDescriptor.kernelAttributes.flags.useStackCalls = execEnv.hasStackCalls;
kernelDescriptor.kernelAttributes.flags.usesFencesForReadWriteImages = execEnv.hasFenceForImageAccess;