Disable EU fusion based on kernel properties from compiler

Related-To: NEO-6633

Signed-off-by: Konstanty Misiak <konstanty.misiak@intel.com>
This commit is contained in:
Konstanty Misiak
2022-02-10 23:33:40 +00:00
committed by Compute-Runtime-Automation
parent 13bc2300e1
commit cf1bc3a2ba
37 changed files with 228 additions and 95 deletions

View File

@@ -2239,13 +2239,14 @@ void CommandListCoreFamily<gfxCoreFamily>::updateStreamProperties(Kernel &kernel
const auto &hwInfoConfig = *NEO::HwInfoConfig::get(hwInfo.platform.eProductFamily);
auto disableOverdispatch = hwInfoConfig.isDisableOverdispatchAvailable(hwInfo);
auto &kernelAttributes = kernel.getKernelDescriptor().kernelAttributes;
if (!containsAnyKernel) {
requiredStreamState.frontEndState.setProperties(isCooperative, disableOverdispatch, -1, hwInfo);
requiredStreamState.frontEndState.setProperties(isCooperative, kernelAttributes.flags.requiresDisabledEUFusion, disableOverdispatch, -1, hwInfo);
finalStreamState = requiredStreamState;
containsAnyKernel = true;
}
finalStreamState.frontEndState.setProperties(isCooperative, disableOverdispatch, -1, hwInfo);
finalStreamState.frontEndState.setProperties(isCooperative, kernelAttributes.flags.requiresDisabledEUFusion, disableOverdispatch, -1, hwInfo);
bool isPatchingVfeStateAllowed = NEO::DebugManager.flags.AllowPatchingVfeStateInCommandLists.get();
if (finalStreamState.frontEndState.isDirty() && isPatchingVfeStateAllowed) {
auto pVfeStateAddress = NEO::PreambleHelper<GfxFamily>::getSpaceForVfeState(commandContainer.getCommandStream(), hwInfo, engineGroupType);
@@ -2254,7 +2255,6 @@ void CommandListCoreFamily<gfxCoreFamily>::updateStreamProperties(Kernel &kernel
commandsToPatch.push_back({pVfeStateAddress, pVfeState, CommandToPatch::FrontEndState});
}
auto &kernelAttributes = kernel.getKernelDescriptor().kernelAttributes;
auto &neoDevice = *device->getNEODevice();
finalStreamState.stateComputeMode.setProperties(false, kernelAttributes.numGrfRequired, this->threadArbitrationPolicy);

View File

@@ -84,7 +84,7 @@ ze_result_t CommandQueueHw<gfxCoreFamily>::executeCommandLists(
auto anyCommandListWithCooperativeKernels = false;
auto anyCommandListWithoutCooperativeKernels = false;
bool anyCommandListRequiresDisabledEUFusion = false;
bool cachedMOCSAllowed = true;
for (auto i = 0u; i < numCommandLists; i++) {
@@ -102,6 +102,11 @@ ze_result_t CommandQueueHw<gfxCoreFamily>::executeCommandLists(
} else {
anyCommandListWithoutCooperativeKernels = true;
}
if (commandList->getRequiredStreamState().frontEndState.disableEUFusion.value == 1) {
anyCommandListRequiresDisabledEUFusion = true;
}
// If the Command List has commands that require uncached MOCS, then any changes to the commands in the queue requires the uncached MOCS
if (commandList->requiresQueueUncachedMocs && cachedMOCSAllowed == true) {
cachedMOCSAllowed = false;
@@ -235,8 +240,8 @@ ze_result_t CommandQueueHw<gfxCoreFamily>::executeCommandLists(
auto isEngineInstanced = csr->getOsContext().isEngineInstanced();
bool isPatchingVfeStateAllowed = NEO::DebugManager.flags.AllowPatchingVfeStateInCommandLists.get();
if (!isPatchingVfeStateAllowed) {
streamProperties.frontEndState.setProperties(anyCommandListWithCooperativeKernels, disableOverdispatch,
isEngineInstanced, hwInfo);
streamProperties.frontEndState.setProperties(anyCommandListWithCooperativeKernels, anyCommandListRequiresDisabledEUFusion,
disableOverdispatch, isEngineInstanced, hwInfo);
} else {
streamProperties.frontEndState.singleSliceDispatchCcsMode.set(isEngineInstanced);
}

View File

@@ -374,7 +374,7 @@ ze_result_t KernelImp::suggestGroupSize(uint32_t globalSizeX, uint32_t globalSiz
NEO::WorkSizeInfo wsInfo(maxWorkGroupSize, kernelImmData->getDescriptor().kernelAttributes.usesBarriers(), simd, this->getSlmTotalSize(),
hwInfo, numThreadsPerSubSlice, localMemSize,
usesImages, false);
usesImages, false, kernelImmData->getDescriptor().kernelAttributes.flags.requiresDisabledEUFusion);
NEO::computeWorkgroupSizeND(wsInfo, retGroupSize, workItems, dim);
} else {
if (1U == dim) {

View File

@@ -120,6 +120,33 @@ HWTEST_F(CommandQueueExecuteCommandLists, whenACommandListExecutedRequiresUncach
commandQueue->destroy();
}
HWTEST_F(CommandQueueExecuteCommandLists, givenCommandListThatRequiresDisabledEUFusionWhenExecutingCommandListsThenCommandQueueHasProperStreamProperties) {
struct WhiteBoxCommandList : public L0::CommandList {
using CommandList::CommandList;
using CommandList::requiredStreamState;
};
const ze_command_queue_desc_t desc{};
ze_result_t returnValue;
auto commandQueue = whitebox_cast(CommandQueue::create(productFamily,
device,
neoDevice->getDefaultEngine().commandStreamReceiver,
&desc,
false,
false,
returnValue));
ASSERT_NE(nullptr, commandQueue->commandStream);
auto commandList1 = static_cast<WhiteBoxCommandList *>(CommandList::fromHandle(commandLists[0]));
commandList1->requiredStreamState.frontEndState.disableEUFusion.set(true);
auto result = commandQueue->executeCommandLists(numCommandLists, commandLists, nullptr, true);
ASSERT_EQ(ZE_RESULT_SUCCESS, result);
EXPECT_EQ(1, commandQueue->getCsr()->getStreamProperties().frontEndState.disableEUFusion.value);
commandQueue->destroy();
}
HWTEST_F(CommandQueueExecuteCommandLists, whenASecondLevelBatchBufferPerCommandListAddedThenProperSizeExpected) {
using MI_BATCH_BUFFER_START = typename FamilyType::MI_BATCH_BUFFER_START;
using MI_BATCH_BUFFER_END = typename FamilyType::MI_BATCH_BUFFER_END;

View File

@@ -222,7 +222,7 @@ HWTEST2_F(CommandQueueCommandsPvc, whenExecuteCommandListsIsCalledThenAdditional
auto hCommandListBA = commandListBA->toHandle();
// Set state B
csr->getStreamProperties().frontEndState.setProperties(true, false, false, *NEO::defaultHwInfo);
csr->getStreamProperties().frontEndState.setProperties(true, false, false, false, *NEO::defaultHwInfo);
// Execute command list AB
commandQueue->executeCommandLists(1, &hCommandListAB, nullptr, false);
@@ -243,7 +243,7 @@ HWTEST2_F(CommandQueueCommandsPvc, whenExecuteCommandListsIsCalledThenAdditional
EXPECT_EQ(true, genCmdCast<CFE_STATE *>(*cfeStates[0])->getComputeDispatchAllWalkerEnable());
// Set state A
csr->getStreamProperties().frontEndState.setProperties(false, false, false, *NEO::defaultHwInfo);
csr->getStreamProperties().frontEndState.setProperties(false, false, false, false, *NEO::defaultHwInfo);
// Execute command list BA
commandQueue->executeCommandLists(1, &hCommandListBA, nullptr, false);