mirror of
https://github.com/intel/compute-runtime.git
synced 2026-01-03 14:55:24 +08:00
Disable EU fusion based on kernel properties from compiler
Related-To: NEO-6633 Signed-off-by: Konstanty Misiak <konstanty.misiak@intel.com>
This commit is contained in:
committed by
Compute-Runtime-Automation
parent
13bc2300e1
commit
cf1bc3a2ba
@@ -2239,13 +2239,14 @@ void CommandListCoreFamily<gfxCoreFamily>::updateStreamProperties(Kernel &kernel
|
||||
const auto &hwInfoConfig = *NEO::HwInfoConfig::get(hwInfo.platform.eProductFamily);
|
||||
auto disableOverdispatch = hwInfoConfig.isDisableOverdispatchAvailable(hwInfo);
|
||||
|
||||
auto &kernelAttributes = kernel.getKernelDescriptor().kernelAttributes;
|
||||
if (!containsAnyKernel) {
|
||||
requiredStreamState.frontEndState.setProperties(isCooperative, disableOverdispatch, -1, hwInfo);
|
||||
requiredStreamState.frontEndState.setProperties(isCooperative, kernelAttributes.flags.requiresDisabledEUFusion, disableOverdispatch, -1, hwInfo);
|
||||
finalStreamState = requiredStreamState;
|
||||
containsAnyKernel = true;
|
||||
}
|
||||
|
||||
finalStreamState.frontEndState.setProperties(isCooperative, disableOverdispatch, -1, hwInfo);
|
||||
finalStreamState.frontEndState.setProperties(isCooperative, kernelAttributes.flags.requiresDisabledEUFusion, disableOverdispatch, -1, hwInfo);
|
||||
bool isPatchingVfeStateAllowed = NEO::DebugManager.flags.AllowPatchingVfeStateInCommandLists.get();
|
||||
if (finalStreamState.frontEndState.isDirty() && isPatchingVfeStateAllowed) {
|
||||
auto pVfeStateAddress = NEO::PreambleHelper<GfxFamily>::getSpaceForVfeState(commandContainer.getCommandStream(), hwInfo, engineGroupType);
|
||||
@@ -2254,7 +2255,6 @@ void CommandListCoreFamily<gfxCoreFamily>::updateStreamProperties(Kernel &kernel
|
||||
commandsToPatch.push_back({pVfeStateAddress, pVfeState, CommandToPatch::FrontEndState});
|
||||
}
|
||||
|
||||
auto &kernelAttributes = kernel.getKernelDescriptor().kernelAttributes;
|
||||
auto &neoDevice = *device->getNEODevice();
|
||||
finalStreamState.stateComputeMode.setProperties(false, kernelAttributes.numGrfRequired, this->threadArbitrationPolicy);
|
||||
|
||||
|
||||
@@ -84,7 +84,7 @@ ze_result_t CommandQueueHw<gfxCoreFamily>::executeCommandLists(
|
||||
|
||||
auto anyCommandListWithCooperativeKernels = false;
|
||||
auto anyCommandListWithoutCooperativeKernels = false;
|
||||
|
||||
bool anyCommandListRequiresDisabledEUFusion = false;
|
||||
bool cachedMOCSAllowed = true;
|
||||
|
||||
for (auto i = 0u; i < numCommandLists; i++) {
|
||||
@@ -102,6 +102,11 @@ ze_result_t CommandQueueHw<gfxCoreFamily>::executeCommandLists(
|
||||
} else {
|
||||
anyCommandListWithoutCooperativeKernels = true;
|
||||
}
|
||||
|
||||
if (commandList->getRequiredStreamState().frontEndState.disableEUFusion.value == 1) {
|
||||
anyCommandListRequiresDisabledEUFusion = true;
|
||||
}
|
||||
|
||||
// If the Command List has commands that require uncached MOCS, then any changes to the commands in the queue requires the uncached MOCS
|
||||
if (commandList->requiresQueueUncachedMocs && cachedMOCSAllowed == true) {
|
||||
cachedMOCSAllowed = false;
|
||||
@@ -235,8 +240,8 @@ ze_result_t CommandQueueHw<gfxCoreFamily>::executeCommandLists(
|
||||
auto isEngineInstanced = csr->getOsContext().isEngineInstanced();
|
||||
bool isPatchingVfeStateAllowed = NEO::DebugManager.flags.AllowPatchingVfeStateInCommandLists.get();
|
||||
if (!isPatchingVfeStateAllowed) {
|
||||
streamProperties.frontEndState.setProperties(anyCommandListWithCooperativeKernels, disableOverdispatch,
|
||||
isEngineInstanced, hwInfo);
|
||||
streamProperties.frontEndState.setProperties(anyCommandListWithCooperativeKernels, anyCommandListRequiresDisabledEUFusion,
|
||||
disableOverdispatch, isEngineInstanced, hwInfo);
|
||||
} else {
|
||||
streamProperties.frontEndState.singleSliceDispatchCcsMode.set(isEngineInstanced);
|
||||
}
|
||||
|
||||
@@ -374,7 +374,7 @@ ze_result_t KernelImp::suggestGroupSize(uint32_t globalSizeX, uint32_t globalSiz
|
||||
|
||||
NEO::WorkSizeInfo wsInfo(maxWorkGroupSize, kernelImmData->getDescriptor().kernelAttributes.usesBarriers(), simd, this->getSlmTotalSize(),
|
||||
hwInfo, numThreadsPerSubSlice, localMemSize,
|
||||
usesImages, false);
|
||||
usesImages, false, kernelImmData->getDescriptor().kernelAttributes.flags.requiresDisabledEUFusion);
|
||||
NEO::computeWorkgroupSizeND(wsInfo, retGroupSize, workItems, dim);
|
||||
} else {
|
||||
if (1U == dim) {
|
||||
|
||||
@@ -120,6 +120,33 @@ HWTEST_F(CommandQueueExecuteCommandLists, whenACommandListExecutedRequiresUncach
|
||||
commandQueue->destroy();
|
||||
}
|
||||
|
||||
HWTEST_F(CommandQueueExecuteCommandLists, givenCommandListThatRequiresDisabledEUFusionWhenExecutingCommandListsThenCommandQueueHasProperStreamProperties) {
|
||||
struct WhiteBoxCommandList : public L0::CommandList {
|
||||
using CommandList::CommandList;
|
||||
using CommandList::requiredStreamState;
|
||||
};
|
||||
|
||||
const ze_command_queue_desc_t desc{};
|
||||
ze_result_t returnValue;
|
||||
auto commandQueue = whitebox_cast(CommandQueue::create(productFamily,
|
||||
device,
|
||||
neoDevice->getDefaultEngine().commandStreamReceiver,
|
||||
&desc,
|
||||
false,
|
||||
false,
|
||||
returnValue));
|
||||
ASSERT_NE(nullptr, commandQueue->commandStream);
|
||||
|
||||
auto commandList1 = static_cast<WhiteBoxCommandList *>(CommandList::fromHandle(commandLists[0]));
|
||||
commandList1->requiredStreamState.frontEndState.disableEUFusion.set(true);
|
||||
|
||||
auto result = commandQueue->executeCommandLists(numCommandLists, commandLists, nullptr, true);
|
||||
ASSERT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
EXPECT_EQ(1, commandQueue->getCsr()->getStreamProperties().frontEndState.disableEUFusion.value);
|
||||
|
||||
commandQueue->destroy();
|
||||
}
|
||||
|
||||
HWTEST_F(CommandQueueExecuteCommandLists, whenASecondLevelBatchBufferPerCommandListAddedThenProperSizeExpected) {
|
||||
using MI_BATCH_BUFFER_START = typename FamilyType::MI_BATCH_BUFFER_START;
|
||||
using MI_BATCH_BUFFER_END = typename FamilyType::MI_BATCH_BUFFER_END;
|
||||
|
||||
@@ -222,7 +222,7 @@ HWTEST2_F(CommandQueueCommandsPvc, whenExecuteCommandListsIsCalledThenAdditional
|
||||
auto hCommandListBA = commandListBA->toHandle();
|
||||
|
||||
// Set state B
|
||||
csr->getStreamProperties().frontEndState.setProperties(true, false, false, *NEO::defaultHwInfo);
|
||||
csr->getStreamProperties().frontEndState.setProperties(true, false, false, false, *NEO::defaultHwInfo);
|
||||
// Execute command list AB
|
||||
commandQueue->executeCommandLists(1, &hCommandListAB, nullptr, false);
|
||||
|
||||
@@ -243,7 +243,7 @@ HWTEST2_F(CommandQueueCommandsPvc, whenExecuteCommandListsIsCalledThenAdditional
|
||||
EXPECT_EQ(true, genCmdCast<CFE_STATE *>(*cfeStates[0])->getComputeDispatchAllWalkerEnable());
|
||||
|
||||
// Set state A
|
||||
csr->getStreamProperties().frontEndState.setProperties(false, false, false, *NEO::defaultHwInfo);
|
||||
csr->getStreamProperties().frontEndState.setProperties(false, false, false, false, *NEO::defaultHwInfo);
|
||||
// Execute command list BA
|
||||
commandQueue->executeCommandLists(1, &hCommandListBA, nullptr, false);
|
||||
|
||||
|
||||
Reference in New Issue
Block a user