Disable patching VfeState by default

By default VfeState will be programmed once per command lists execute.
Patching may be enabled with AllowPatchingVfeStateInCommandLists variable.

Related-To: NEO-4940

Signed-off-by: Filip Hazubski <filip.hazubski@intel.com>
This commit is contained in:
Filip Hazubski
2021-09-01 09:50:34 +00:00
committed by Compute-Runtime-Automation
parent b323e78f55
commit 0606ef4095
8 changed files with 66 additions and 34 deletions

View File

@@ -2164,7 +2164,8 @@ void CommandListCoreFamily<gfxCoreFamily>::updateStreamProperties(Kernel &kernel
}
finalStreamState.frontEndState.setProperties(isCooperative, disableOverdispatch, false, hwInfo);
if (finalStreamState.frontEndState.isDirty()) {
bool isPatchingVfeStateAllowed = NEO::DebugManager.flags.AllowPatchingVfeStateInCommandLists.get();
if (finalStreamState.frontEndState.isDirty() && isPatchingVfeStateAllowed) {
auto pVfeStateAddress = NEO::PreambleHelper<GfxFamily>::getSpaceForVfeState(commandContainer.getCommandStream(), hwInfo, engineGroupType);
auto pVfeState = new VFE_STATE_TYPE;
NEO::PreambleHelper<GfxFamily>::programVfeState(pVfeState, hwInfo, 0, 0, device->getMaxNumHwThreads(), finalStreamState);

View File

@@ -120,7 +120,8 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelWithParams(z
this->indirectAllocationsAllowed = true;
}
if ((!containsAnyKernel) || NEO::DebugManager.flags.AllowMixingRegularAndCooperativeKernels.get()) {
bool isMixingRegularAndCooperativeKernelsAllowed = NEO::DebugManager.flags.AllowMixingRegularAndCooperativeKernels.get();
if ((!containsAnyKernel) || isMixingRegularAndCooperativeKernelsAllowed) {
containsCooperativeKernelsFlag = (containsCooperativeKernelsFlag || isCooperative);
} else if (containsCooperativeKernelsFlag != isCooperative) {
return ZE_RESULT_ERROR_INVALID_ARGUMENT;

View File

@@ -188,7 +188,8 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelWithParams(z
kernelDescriptor.kernelMetadata.kernelName.c_str(), 0u);
}
if ((!containsAnyKernel) || NEO::DebugManager.flags.AllowMixingRegularAndCooperativeKernels.get()) {
bool isMixingRegularAndCooperativeKernelsAllowed = NEO::DebugManager.flags.AllowMixingRegularAndCooperativeKernels.get();
if ((!containsAnyKernel) || isMixingRegularAndCooperativeKernelsAllowed) {
containsCooperativeKernelsFlag = (containsCooperativeKernelsFlag || isCooperative);
} else if (containsCooperativeKernelsFlag != isCooperative) {
return ZE_RESULT_ERROR_INVALID_ARGUMENT;

View File

@@ -39,8 +39,8 @@ struct CommandQueueHw : public CommandQueueImp {
size_t estimateStateBaseAddressCmdSize();
MOCKABLE_VIRTUAL void programFrontEnd(uint64_t scratchAddress, uint32_t perThreadScratchSpaceSize, NEO::LinearStream &commandStream);
size_t estimateFrontEndCmdSizeForMultipleCommandLists(bool isFrontEndStateDirty, uint32_t numCommandLists,
ze_command_list_handle_t *phCommandLists);
MOCKABLE_VIRTUAL size_t estimateFrontEndCmdSizeForMultipleCommandLists(bool isFrontEndStateDirty, uint32_t numCommandLists,
ze_command_list_handle_t *phCommandLists);
size_t estimateFrontEndCmdSize();
size_t estimatePipelineSelect();
void programPipelineSelect(NEO::LinearStream &commandStream);

View File

@@ -74,7 +74,8 @@ ze_result_t CommandQueueHw<gfxCoreFamily>::executeCommandLists(
auto lockCSR = csr->obtainUniqueOwnership();
auto commandListsContainCooperativeKernels = CommandList::fromHandle(phCommandLists[0])->containsCooperativeKernels();
auto anyCommandListWithCooperativeKernels = false;
auto anyCommandListWithoutCooperativeKernels = false;
for (auto i = 0u; i < numCommandLists; i++) {
auto commandList = CommandList::fromHandle(phCommandLists[i]);
@@ -82,12 +83,19 @@ ze_result_t CommandQueueHw<gfxCoreFamily>::executeCommandLists(
return ZE_RESULT_ERROR_INVALID_COMMAND_LIST_TYPE;
}
if ((commandListsContainCooperativeKernels != commandList->containsCooperativeKernels()) &&
(!NEO::DebugManager.flags.AllowMixingRegularAndCooperativeKernels.get())) {
return ZE_RESULT_ERROR_INVALID_COMMAND_LIST_TYPE;
if (commandList->containsCooperativeKernels()) {
anyCommandListWithCooperativeKernels = true;
} else {
anyCommandListWithoutCooperativeKernels = true;
}
}
bool isMixingRegularAndCooperativeKernelsAllowed = NEO::DebugManager.flags.AllowMixingRegularAndCooperativeKernels.get();
if (anyCommandListWithCooperativeKernels && anyCommandListWithoutCooperativeKernels &&
(!isMixingRegularAndCooperativeKernelsAllowed)) {
return ZE_RESULT_ERROR_INVALID_COMMAND_LIST_TYPE;
}
size_t spaceForResidency = 0;
size_t preemptionSize = 0u;
size_t debuggerCmdsSize = 0;
@@ -199,6 +207,17 @@ ze_result_t CommandQueueHw<gfxCoreFamily>::executeCommandLists(
gsbaStateDirty, frontEndStateDirty,
perThreadScratchSpaceSize);
auto &streamProperties = csr->getStreamProperties();
auto &hwHelper = NEO::HwHelper::get(hwInfo.platform.eRenderCoreFamily);
auto disableOverdispatch = hwHelper.isDisableOverdispatchAvailable(hwInfo);
auto isEngineInstanced = csr->getOsContext().isEngineInstanced();
bool isPatchingVfeStateAllowed = NEO::DebugManager.flags.AllowPatchingVfeStateInCommandLists.get();
if (!isPatchingVfeStateAllowed) {
streamProperties.frontEndState.setProperties(anyCommandListWithCooperativeKernels, disableOverdispatch,
isEngineInstanced, hwInfo);
frontEndStateDirty |= streamProperties.frontEndState.isDirty();
}
gsbaStateDirty |= csr->getGSBAStateDirty();
frontEndStateDirty |= csr->getMediaVFEStateDirty();
if (!isCopyOnlyCommandQueue) {
@@ -308,7 +327,6 @@ ze_result_t CommandQueueHw<gfxCoreFamily>::executeCommandLists(
}
}
auto &streamProperties = csr->getStreamProperties();
for (auto i = 0u; i < numCommandLists; ++i) {
auto commandList = CommandList::fromHandle(phCommandLists[i]);
auto cmdBufferAllocations = commandList->commandContainer.getCmdBufferAllocations();
@@ -335,20 +353,24 @@ ze_result_t CommandQueueHw<gfxCoreFamily>::executeCommandLists(
}
if (!isCopyOnlyCommandQueue) {
auto &requiredStreamState = commandList->getRequiredStreamState();
streamProperties.frontEndState.setProperties(requiredStreamState.frontEndState);
streamProperties.frontEndState.singleSliceDispatchCcsMode.value = csr->getOsContext().isEngineInstanced();
auto programVfe = streamProperties.frontEndState.isDirty();
if (frontEndStateDirty) {
programVfe = true;
frontEndStateDirty = false;
bool programVfe = frontEndStateDirty;
if (isPatchingVfeStateAllowed) {
auto requiredStreamStateCopy = commandList->getRequiredStreamState();
requiredStreamStateCopy.frontEndState.singleSliceDispatchCcsMode.set(isEngineInstanced);
streamProperties.frontEndState.setProperties(requiredStreamStateCopy.frontEndState);
programVfe |= streamProperties.frontEndState.isDirty();
}
if (programVfe) {
programFrontEnd(scratchSpaceController->getScratchPatchAddress(), scratchSpaceController->getPerThreadScratchSpaceSize(), child);
frontEndStateDirty = false;
}
if (isPatchingVfeStateAllowed) {
auto finalStreamStateCopy = commandList->getFinalStreamState();
finalStreamStateCopy.frontEndState.singleSliceDispatchCcsMode.set(isEngineInstanced);
streamProperties.frontEndState.setProperties(finalStreamStateCopy.frontEndState);
}
auto &finalStreamState = commandList->getFinalStreamState();
streamProperties.frontEndState.setProperties(finalStreamState.frontEndState);
streamProperties.frontEndState.singleSliceDispatchCcsMode.value = csr->getOsContext().isEngineInstanced();
}
patchCommands(*commandList, scratchSpaceController->getScratchPatchAddress());
@@ -413,7 +435,7 @@ ze_result_t CommandQueueHw<gfxCoreFamily>::executeCommandLists(
}
submitBatchBuffer(ptrDiff(child.getCpuBase(), commandStream->getCpuBase()), csr->getResidencyAllocations(), endingCmd,
commandListsContainCooperativeKernels);
anyCommandListWithCooperativeKernels);
this->taskCount = csr->peekTaskCount();
@@ -456,24 +478,29 @@ template <GFXCORE_FAMILY gfxCoreFamily>
size_t CommandQueueHw<gfxCoreFamily>::estimateFrontEndCmdSizeForMultipleCommandLists(
bool isFrontEndStateDirty, uint32_t numCommandLists, ze_command_list_handle_t *phCommandLists) {
auto streamPropertiesCopy = csr->getStreamProperties();
auto singleFrontEndCmdSize = estimateFrontEndCmdSize();
bool isPatchingVfeStateAllowed = NEO::DebugManager.flags.AllowPatchingVfeStateInCommandLists.get();
if (!isPatchingVfeStateAllowed) {
return isFrontEndStateDirty * singleFrontEndCmdSize;
}
auto streamPropertiesCopy = csr->getStreamProperties();
auto isEngineInstanced = csr->getOsContext().isEngineInstanced();
size_t estimatedSize = 0;
for (size_t i = 0; i < numCommandLists; i++) {
auto commandList = CommandList::fromHandle(phCommandLists[i]);
auto &requiredStreamState = commandList->getRequiredStreamState();
streamPropertiesCopy.frontEndState.setProperties(requiredStreamState.frontEndState);
auto isVfeRequired = streamPropertiesCopy.frontEndState.isDirty();
if (isFrontEndStateDirty) {
isVfeRequired = true;
auto requiredStreamStateCopy = commandList->getRequiredStreamState();
requiredStreamStateCopy.frontEndState.singleSliceDispatchCcsMode.set(isEngineInstanced);
streamPropertiesCopy.frontEndState.setProperties(requiredStreamStateCopy.frontEndState);
if (isFrontEndStateDirty || streamPropertiesCopy.frontEndState.isDirty()) {
estimatedSize += singleFrontEndCmdSize;
isFrontEndStateDirty = false;
}
if (isVfeRequired) {
estimatedSize += singleFrontEndCmdSize;
}
auto &finalStreamState = commandList->getFinalStreamState();
streamPropertiesCopy.frontEndState.setProperties(finalStreamState.frontEndState);
auto finalStreamStateCopy = commandList->getFinalStreamState();
finalStreamStateCopy.frontEndState.singleSliceDispatchCcsMode.set(isEngineInstanced);
streamPropertiesCopy.frontEndState.setProperties(finalStreamStateCopy.frontEndState);
}
return estimatedSize;

View File

@@ -144,7 +144,7 @@ void CommandQueueHw<gfxCoreFamily>::patchCommands(CommandList &commandList, uint
cfeStateCmd = reinterpret_cast<CFE_STATE *>(commandToPatch.pCommand);
cfeStateCmd->setScratchSpaceBuffer(lowScratchAddress);
cfeStateCmd->setSingleSliceDispatchCcsMode(csr->getStreamProperties().frontEndState.singleSliceDispatchCcsMode.value);
cfeStateCmd->setSingleSliceDispatchCcsMode(csr->getOsContext().isEngineInstanced());
*reinterpret_cast<CFE_STATE *>(commandToPatch.pDestination) = *cfeStateCmd;
break;