mirror of
https://github.com/intel/compute-runtime.git
synced 2026-01-05 09:09:04 +08:00
Disable EUFusion for odd work groups with DPAS on DG2
Related-To: NEO-7495, HSD-14017007475 Signed-off-by: Maciej Plewka <maciej.plewka@intel.com>
This commit is contained in:
committed by
Compute-Runtime-Automation
parent
4c58eda90d
commit
429be6b4cb
@@ -259,9 +259,9 @@ struct CommandListCoreFamily : CommandListImp {
|
||||
const CmdListKernelLaunchParams &launchParams);
|
||||
|
||||
ze_result_t prepareIndirectParams(const ze_group_count_t *threadGroupDimensions);
|
||||
void updateStreamProperties(Kernel &kernel, bool isCooperative);
|
||||
void updateStreamPropertiesForRegularCommandLists(Kernel &kernel, bool isCooperative);
|
||||
void updateStreamPropertiesForFlushTaskDispatchFlags(Kernel &kernel, bool isCooperative);
|
||||
void updateStreamPropertiesForRegularCommandLists(Kernel &kernel, bool isCooperative, const ze_group_count_t *threadGroupDimensions, bool isIndirect);
|
||||
void updateStreamPropertiesForFlushTaskDispatchFlags(Kernel &kernel, bool isCooperative, const ze_group_count_t *threadGroupDimensions, bool isIndirect);
|
||||
void updateStreamProperties(Kernel &kernel, bool isCooperative, const ze_group_count_t *threadGroupDimensions, bool isIndirect);
|
||||
void clearCommandsToPatch();
|
||||
|
||||
size_t getTotalSizeForCopyRegion(const ze_copy_region_t *region, uint32_t pitch, uint32_t slicePitch);
|
||||
|
||||
@@ -2344,32 +2344,48 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::prepareIndirectParams(const ze
|
||||
}
|
||||
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
void CommandListCoreFamily<gfxCoreFamily>::updateStreamProperties(Kernel &kernel, bool isCooperative) {
|
||||
void CommandListCoreFamily<gfxCoreFamily>::updateStreamProperties(Kernel &kernel, bool isCooperative, const ze_group_count_t *threadGroupDimensions, bool isIndirect) {
|
||||
if (this->isFlushTaskSubmissionEnabled) {
|
||||
updateStreamPropertiesForFlushTaskDispatchFlags(kernel, isCooperative);
|
||||
updateStreamPropertiesForFlushTaskDispatchFlags(kernel, isCooperative, threadGroupDimensions, isIndirect);
|
||||
} else {
|
||||
updateStreamPropertiesForRegularCommandLists(kernel, isCooperative);
|
||||
updateStreamPropertiesForRegularCommandLists(kernel, isCooperative, threadGroupDimensions, isIndirect);
|
||||
}
|
||||
}
|
||||
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
void CommandListCoreFamily<gfxCoreFamily>::updateStreamPropertiesForFlushTaskDispatchFlags(Kernel &kernel, bool isCooperative) {
|
||||
void CommandListCoreFamily<gfxCoreFamily>::updateStreamPropertiesForFlushTaskDispatchFlags(Kernel &kernel, bool isCooperative, const ze_group_count_t *threadGroupDimensions, bool isIndirect) {
|
||||
auto &rootDeviceEnvironment = device->getNEODevice()->getRootDeviceEnvironment();
|
||||
auto &kernelAttributes = kernel.getKernelDescriptor().kernelAttributes;
|
||||
bool fusedEuDisabled = kernelAttributes.flags.requiresDisabledEUFusion;
|
||||
auto &productHelper = device->getProductHelper();
|
||||
if (productHelper.isCalculationForDisablingEuFusionWithDpasNeeded()) {
|
||||
if (threadGroupDimensions) {
|
||||
uint32_t *groupCountPtr = nullptr;
|
||||
uint32_t groupCount[3] = {};
|
||||
if (!isIndirect) {
|
||||
groupCount[0] = threadGroupDimensions->groupCountX;
|
||||
groupCount[1] = threadGroupDimensions->groupCountY;
|
||||
groupCount[2] = threadGroupDimensions->groupCountZ;
|
||||
groupCountPtr = groupCount;
|
||||
}
|
||||
fusedEuDisabled |= productHelper.isFusedEuDisabledForDpas(kernelAttributes.flags.usesSystolicPipelineSelectMode, kernel.getGroupSize(), groupCountPtr);
|
||||
}
|
||||
}
|
||||
|
||||
requiredStreamState.stateComputeMode.setPropertiesGrfNumberThreadArbitration(kernelAttributes.numGrfRequired, kernelAttributes.threadArbitrationPolicy, rootDeviceEnvironment);
|
||||
|
||||
requiredStreamState.frontEndState.setPropertiesComputeDispatchAllWalkerEnableDisableEuFusion(isCooperative, kernelAttributes.flags.requiresDisabledEUFusion, rootDeviceEnvironment);
|
||||
requiredStreamState.frontEndState.setPropertiesComputeDispatchAllWalkerEnableDisableEuFusion(isCooperative, fusedEuDisabled, rootDeviceEnvironment);
|
||||
|
||||
requiredStreamState.pipelineSelect.setPropertySystolicMode(kernelAttributes.flags.usesSystolicPipelineSelectMode, rootDeviceEnvironment);
|
||||
}
|
||||
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
void CommandListCoreFamily<gfxCoreFamily>::updateStreamPropertiesForRegularCommandLists(Kernel &kernel, bool isCooperative) {
|
||||
void CommandListCoreFamily<gfxCoreFamily>::updateStreamPropertiesForRegularCommandLists(Kernel &kernel, bool isCooperative, const ze_group_count_t *threadGroupDimensions, bool isIndirect) {
|
||||
using VFE_STATE_TYPE = typename GfxFamily::VFE_STATE_TYPE;
|
||||
|
||||
auto &rootDeviceEnvironment = device->getNEODevice()->getRootDeviceEnvironment();
|
||||
auto &kernelAttributes = kernel.getKernelDescriptor().kernelAttributes;
|
||||
|
||||
KernelImp &kernelImp = static_cast<KernelImp &>(kernel);
|
||||
|
||||
currentMocsState = static_cast<int32_t>(device->getMOCS(!kernelImp.getKernelRequiresUncachedMocs(), false) >> 1);
|
||||
@@ -2391,8 +2407,23 @@ void CommandListCoreFamily<gfxCoreFamily>::updateStreamPropertiesForRegularComma
|
||||
currentIndirectObjectBaseAddress = ioh->getHeapGpuBase();
|
||||
currentIndirectObjectSize = ioh->getHeapSizeInPages();
|
||||
|
||||
bool fusedEuDisabled = kernelAttributes.flags.requiresDisabledEUFusion;
|
||||
auto &productHelper = device->getProductHelper();
|
||||
if (productHelper.isCalculationForDisablingEuFusionWithDpasNeeded()) {
|
||||
if (threadGroupDimensions) {
|
||||
uint32_t *groupCountPtr = nullptr;
|
||||
uint32_t groupCount[3] = {};
|
||||
if (!isIndirect) {
|
||||
groupCount[0] = threadGroupDimensions->groupCountX;
|
||||
groupCount[1] = threadGroupDimensions->groupCountY;
|
||||
groupCount[2] = threadGroupDimensions->groupCountZ;
|
||||
groupCountPtr = groupCount;
|
||||
}
|
||||
fusedEuDisabled |= productHelper.isFusedEuDisabledForDpas(kernelAttributes.flags.usesSystolicPipelineSelectMode, kernel.getGroupSize(), groupCountPtr);
|
||||
}
|
||||
}
|
||||
if (!containsAnyKernel) {
|
||||
requiredStreamState.frontEndState.setProperties(isCooperative, kernelAttributes.flags.requiresDisabledEUFusion, cmdListDefaultDisableOverdispatch, -1, rootDeviceEnvironment);
|
||||
requiredStreamState.frontEndState.setProperties(isCooperative, fusedEuDisabled, cmdListDefaultDisableOverdispatch, -1, rootDeviceEnvironment);
|
||||
requiredStreamState.pipelineSelect.setProperties(true, false, kernelAttributes.flags.usesSystolicPipelineSelectMode, rootDeviceEnvironment);
|
||||
|
||||
requiredStreamState.stateBaseAddress.setProperties(kernelImp.getKernelDescriptor().kernelAttributes.flags.useGlobalAtomics, currentMocsState,
|
||||
@@ -2425,7 +2456,7 @@ void CommandListCoreFamily<gfxCoreFamily>::updateStreamPropertiesForRegularComma
|
||||
rootDeviceEnvironment);
|
||||
}
|
||||
|
||||
finalStreamState.frontEndState.setProperties(isCooperative, kernelAttributes.flags.requiresDisabledEUFusion, cmdListDefaultDisableOverdispatch, -1, rootDeviceEnvironment);
|
||||
finalStreamState.frontEndState.setProperties(isCooperative, fusedEuDisabled, cmdListDefaultDisableOverdispatch, -1, rootDeviceEnvironment);
|
||||
bool isPatchingVfeStateAllowed = NEO::DebugManager.flags.AllowPatchingVfeStateInCommandLists.get();
|
||||
if (finalStreamState.frontEndState.isDirty() && logicalStateHelperBlock) {
|
||||
if (isPatchingVfeStateAllowed) {
|
||||
|
||||
@@ -152,7 +152,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelWithParams(K
|
||||
|
||||
std::list<void *> additionalCommands;
|
||||
|
||||
updateStreamProperties(*kernel, launchParams.isCooperative);
|
||||
updateStreamProperties(*kernel, launchParams.isCooperative, threadGroupDimensions, launchParams.isIndirect);
|
||||
NEO::EncodeDispatchKernelArgs dispatchKernelArgs{
|
||||
0, // eventAddress
|
||||
neoDevice, // device
|
||||
|
||||
@@ -265,7 +265,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelWithParams(K
|
||||
this->containsStatelessUncachedResource |= kernelImp->getKernelRequiresUncachedMocs();
|
||||
this->requiresQueueUncachedMocs |= kernelImp->getKernelRequiresQueueUncachedMocs();
|
||||
|
||||
updateStreamProperties(*kernel, launchParams.isCooperative);
|
||||
updateStreamProperties(*kernel, launchParams.isCooperative, threadGroupDimensions, launchParams.isIndirect);
|
||||
|
||||
auto localMemSize = static_cast<uint32_t>(neoDevice->getDeviceInfo().localMemSize);
|
||||
auto slmTotalSize = kernelImp->getSlmTotalSize();
|
||||
|
||||
@@ -384,7 +384,6 @@ ze_result_t KernelImp::suggestGroupSize(uint32_t globalSizeX, uint32_t globalSiz
|
||||
NEO::computeWorkgroupSize2D(maxWorkGroupSize, retGroupSize, workItems, simd);
|
||||
}
|
||||
}
|
||||
|
||||
*groupSizeX = static_cast<uint32_t>(retGroupSize[0]);
|
||||
*groupSizeY = static_cast<uint32_t>(retGroupSize[1]);
|
||||
*groupSizeZ = static_cast<uint32_t>(retGroupSize[2]);
|
||||
|
||||
Reference in New Issue
Block a user