refactor: remove LogicalStateHelper

Signed-off-by: Dunajski, Bartosz <bartosz.dunajski@intel.com>
This commit is contained in:
Dunajski, Bartosz
2023-09-12 17:51:43 +00:00
committed by Compute-Runtime-Automation
parent 652a6dee67
commit 7562842a58
129 changed files with 263 additions and 805 deletions

View File

@@ -20,7 +20,6 @@
namespace NEO {
enum class MemoryPool;
enum class ImageType;
class LogicalStateHelper;
} // namespace NEO
namespace L0 {
@@ -296,7 +295,6 @@ struct CommandListCoreFamily : CommandListImp {
void addFlushRequiredCommand(bool flushOperationRequired, Event *signalEvent);
void handlePostSubmissionState();
virtual void createLogicalStateHelper();
void setupFillKernelArguments(size_t baseOffset,
size_t patternSize,
size_t dstSize,

View File

@@ -21,7 +21,6 @@
#include "shared/source/helpers/gfx_core_helper.h"
#include "shared/source/helpers/hw_info.h"
#include "shared/source/helpers/kernel_helpers.h"
#include "shared/source/helpers/logical_state_helper.h"
#include "shared/source/helpers/pipe_control_args.h"
#include "shared/source/helpers/preamble.h"
#include "shared/source/helpers/register_offsets.h"
@@ -245,15 +244,9 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::initialize(Device *device, NEO
enableInOrderExecution();
}
createLogicalStateHelper();
return returnType;
}
template <GFXCORE_FAMILY gfxCoreFamily>
void CommandListCoreFamily<gfxCoreFamily>::createLogicalStateHelper() {
this->nonImmediateLogicalStateHelper.reset(NEO::LogicalStateHelper::create<GfxFamily>());
}
template <GFXCORE_FAMILY gfxCoreFamily>
ze_result_t CommandListCoreFamily<gfxCoreFamily>::executeCommandListImmediate(bool performMigration) {
return executeCommandListImmediateImpl(performMigration, this->cmdQImmediate);
@@ -2799,10 +2792,8 @@ void CommandListCoreFamily<gfxCoreFamily>::updateStreamPropertiesForRegularComma
containsAnyKernel = true;
}
auto logicalStateHelperBlock = !getLogicalStateHelper();
finalStreamState.pipelineSelect.setPropertySystolicMode(kernelAttributes.flags.usesSystolicPipelineSelectMode);
if (this->pipelineSelectStateTracking && finalStreamState.pipelineSelect.isDirty() && logicalStateHelperBlock) {
if (this->pipelineSelectStateTracking && finalStreamState.pipelineSelect.isDirty()) {
NEO::PipelineSelectArgs pipelineSelectArgs;
pipelineSelectArgs.systolicPipelineSelectMode = kernelAttributes.flags.usesSystolicPipelineSelectMode;
pipelineSelectArgs.systolicPipelineSelectSupport = this->systolicModeSupport;
@@ -2814,11 +2805,11 @@ void CommandListCoreFamily<gfxCoreFamily>::updateStreamPropertiesForRegularComma
finalStreamState.frontEndState.setPropertiesComputeDispatchAllWalkerEnableDisableEuFusion(isCooperative, fusedEuDisabled);
bool isPatchingVfeStateAllowed = (NEO::DebugManager.flags.AllowPatchingVfeStateInCommandLists.get() || (this->frontEndStateTracking && this->dispatchCmdListBatchBufferAsPrimary));
if (logicalStateHelperBlock && finalStreamState.frontEndState.isDirty()) {
if (finalStreamState.frontEndState.isDirty()) {
if (isPatchingVfeStateAllowed) {
auto frontEndStateAddress = NEO::PreambleHelper<GfxFamily>::getSpaceForVfeState(commandContainer.getCommandStream(), device->getHwInfo(), engineGroupType);
auto frontEndStateCmd = new VFE_STATE_TYPE;
NEO::PreambleHelper<GfxFamily>::programVfeState(frontEndStateCmd, rootDeviceEnvironment, 0, 0, device->getMaxNumHwThreads(), finalStreamState, nullptr);
NEO::PreambleHelper<GfxFamily>::programVfeState(frontEndStateCmd, rootDeviceEnvironment, 0, 0, device->getMaxNumHwThreads(), finalStreamState);
commandsToPatch.push_back({frontEndStateAddress, frontEndStateCmd, CommandToPatch::FrontEndState});
}
if (this->frontEndStateTracking && !this->dispatchCmdListBatchBufferAsPrimary) {
@@ -2839,14 +2830,14 @@ void CommandListCoreFamily<gfxCoreFamily>::updateStreamPropertiesForRegularComma
} else {
finalStreamState.stateComputeMode.setPropertiesAll(cmdListDefaultCoherency, kernelAttributes.numGrfRequired, kernelAttributes.threadArbitrationPolicy, device->getDevicePreemptionMode());
}
if (finalStreamState.stateComputeMode.isDirty() && logicalStateHelperBlock) {
if (finalStreamState.stateComputeMode.isDirty()) {
bool isRcs = (this->engineGroupType == NEO::EngineGroupType::RenderCompute);
NEO::PipelineSelectArgs pipelineSelectArgs;
pipelineSelectArgs.systolicPipelineSelectMode = kernelAttributes.flags.usesSystolicPipelineSelectMode;
pipelineSelectArgs.systolicPipelineSelectSupport = this->systolicModeSupport;
NEO::EncodeComputeMode<GfxFamily>::programComputeModeCommandWithSynchronization(
*commandContainer.getCommandStream(), finalStreamState.stateComputeMode, pipelineSelectArgs, false, rootDeviceEnvironment, isRcs, this->dcFlushSupport, nullptr);
*commandContainer.getCommandStream(), finalStreamState.stateComputeMode, pipelineSelectArgs, false, rootDeviceEnvironment, isRcs, this->dcFlushSupport);
}
finalStreamState.stateBaseAddress.setPropertyStatelessMocs(currentMocsState);
@@ -2861,7 +2852,7 @@ void CommandListCoreFamily<gfxCoreFamily>::updateStreamPropertiesForRegularComma
finalStreamState.stateBaseAddress.setPropertiesIndirectState(currentIndirectObjectBaseAddress, currentIndirectObjectSize);
}
if (logicalStateHelperBlock && this->stateBaseAddressTracking && finalStreamState.stateBaseAddress.isDirty()) {
if (this->stateBaseAddressTracking && finalStreamState.stateBaseAddress.isDirty()) {
commandContainer.setDirtyStateForAllHeaps(false);
programStateBaseAddress(commandContainer, true);
finalStreamState.stateBaseAddress.clearIsDirty();

View File

@@ -167,9 +167,6 @@ struct CommandListCoreFamilyImmediate : public CommandListCoreFamily<gfxCoreFami
ze_result_t flushImmediate(ze_result_t inputRet, bool performMigration, bool hasStallingCmds, bool hasRelaxedOrderingDependencies, bool kernelOperation, ze_event_handle_t hSignalEvent);
void createLogicalStateHelper() override {}
NEO::LogicalStateHelper *getLogicalStateHelper() const override;
bool preferCopyThroughLockedPtr(CpuMemCopyInfo &cpuMemCopyInfo, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents);
bool isSuitableUSMHostAlloc(NEO::SvmAllocationData *alloc);
bool isSuitableUSMDeviceAlloc(NEO::SvmAllocationData *alloc);

View File

@@ -38,11 +38,6 @@ CommandListCoreFamilyImmediate<gfxCoreFamily>::CommandListCoreFamilyImmediate(ui
computeFlushMethod = &CommandListCoreFamilyImmediate<gfxCoreFamily>::flushRegularTask;
}
template <GFXCORE_FAMILY gfxCoreFamily>
NEO::LogicalStateHelper *CommandListCoreFamilyImmediate<gfxCoreFamily>::getLogicalStateHelper() const {
return this->csr->getLogicalStateHelper();
}
template <GFXCORE_FAMILY gfxCoreFamily>
void CommandListCoreFamilyImmediate<gfxCoreFamily>::checkAvailableSpace(uint32_t numEvents, bool hasRelaxedOrderingDependencies, size_t commandSize) {
this->commandContainer.fillReusableAllocationLists();

View File

@@ -195,7 +195,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelWithParams(K
this->dcFlushSupport // dcFlushEnable
};
NEO::EncodeDispatchKernel<GfxFamily>::encode(commandContainer, dispatchKernelArgs, getLogicalStateHelper());
NEO::EncodeDispatchKernel<GfxFamily>::encode(commandContainer, dispatchKernelArgs);
if (!this->isFlushTaskSubmissionEnabled) {
this->containsStatelessUncachedResource = dispatchKernelArgs.requiresUncachedMocs;
}

View File

@@ -308,7 +308,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelWithParams(K
}
}
NEO::EncodeDispatchKernel<GfxFamily>::encode(commandContainer, dispatchKernelArgs, getLogicalStateHelper());
NEO::EncodeDispatchKernel<GfxFamily>::encode(commandContainer, dispatchKernelArgs);
if (!this->isFlushTaskSubmissionEnabled) {
this->containsStatelessUncachedResource = dispatchKernelArgs.requiresUncachedMocs;

View File

@@ -14,7 +14,6 @@
#include "shared/source/helpers/engine_control.h"
#include "shared/source/helpers/engine_node_helper.h"
#include "shared/source/helpers/gfx_core_helper.h"
#include "shared/source/helpers/logical_state_helper.h"
#include "shared/source/indirect_heap/indirect_heap.h"
#include "shared/source/memory_manager/allocation_properties.h"
#include "shared/source/memory_manager/memory_manager.h"

View File

@@ -12,10 +12,6 @@
#include <memory>
namespace NEO {
class LogicalStateHelper;
}
namespace L0 {
struct CommandListImp : CommandList {
@@ -32,7 +28,6 @@ struct CommandListImp : CommandList {
virtual void appendMultiPartitionPrologue(uint32_t partitionDataSize) = 0;
virtual void appendMultiPartitionEpilogue() = 0;
virtual NEO::LogicalStateHelper *getLogicalStateHelper() const { return nonImmediateLogicalStateHelper.get(); }
void setStreamPropertiesDefaultSettings(NEO::StreamProperties &streamProperties);
void enableInOrderExecution();
bool isInOrderExecutionEnabled() const { return inOrderExecutionEnabled; }
@@ -41,7 +36,6 @@ struct CommandListImp : CommandList {
const std::vector<Event *> &peekMappedEventList() { return mappedTsEventList; }
protected:
std::unique_ptr<NEO::LogicalStateHelper> nonImmediateLogicalStateHelper;
NEO::GraphicsAllocation *inOrderDependencyCounterAllocation = nullptr;
uint32_t inOrderDependencyCounter = 0;
uint32_t inOrderAllocationOffset = 0;

View File

@@ -158,8 +158,6 @@ struct CommandQueueHw : public CommandQueueImp {
inline void makeRayTracingBufferResident(NEO::GraphicsAllocation *rtBuffer);
inline void programActivePartitionConfig(bool isProgramActivePartitionConfigRequired, NEO::LinearStream &commandStream);
inline void encodeKernelArgsBufferAndMakeItResident();
inline void writeCsrStreamInlineIfLogicalStateHelperAvailable(NEO::LinearStream &commandStream);
inline void programOneCmdListFrontEndIfDirty(CommandListExecutionContext &ctx,
NEO::LinearStream &commandStream,
CommandListRequiredStateChange &cmdListRequiredState);
@@ -169,7 +167,6 @@ struct CommandQueueHw : public CommandQueueImp {
inline void programLastCommandListReturnBbStart(
NEO::LinearStream &commandStream,
CommandListExecutionContext &ctx);
inline void mergeOneCmdListPipelinedState(CommandList *commandList);
inline void programFrontEndAndClearDirtyFlag(bool shouldFrontEndBeProgrammed,
CommandListExecutionContext &ctx,
NEO::LinearStream &commandStream,

View File

@@ -22,7 +22,6 @@
#include "shared/source/helpers/definitions/command_encoder_args.h"
#include "shared/source/helpers/gfx_core_helper.h"
#include "shared/source/helpers/heap_base_address_model.h"
#include "shared/source/helpers/logical_state_helper.h"
#include "shared/source/helpers/pause_on_gpu_properties.h"
#include "shared/source/helpers/pipe_control_args.h"
#include "shared/source/helpers/preamble.h"
@@ -149,7 +148,6 @@ ze_result_t CommandQueueHw<gfxCoreFamily>::executeCommandListsRegular(
this->makeRayTracingBufferResident(neoDevice->getRTMemoryBackedBuffer());
this->makeSbaTrackingBufferResidentIfL0DebuggerEnabled(ctx.isDebugEnabled);
this->makeCsrTagAllocationResident();
this->encodeKernelArgsBufferAndMakeItResident();
if (ctx.globalInit) {
this->getTagsManagerHeapsAndMakeThemResidentIfSWTagsEnabled(child);
@@ -175,7 +173,7 @@ ze_result_t CommandQueueHw<gfxCoreFamily>::executeCommandListsRegular(
this->programCsrBaseAddressIfPreemptionModeInitial(ctx.isPreemptionModeInitial, child);
this->programStateSip(ctx.stateSipRequired, child);
this->programActivePartitionConfig(ctx.isProgramActivePartitionConfigRequired, child);
bool shouldProgramVfe = (this->csr->getLogicalStateHelper() || !frontEndTrackingEnabled()) && ctx.frontEndStateDirty;
bool shouldProgramVfe = !frontEndTrackingEnabled() && ctx.frontEndStateDirty;
this->programFrontEndAndClearDirtyFlag(shouldProgramVfe, ctx, child, csr->getStreamProperties());
if (ctx.rtDispatchRequired) {
@@ -184,8 +182,6 @@ ze_result_t CommandQueueHw<gfxCoreFamily>::executeCommandListsRegular(
}
}
this->writeCsrStreamInlineIfLogicalStateHelperAvailable(child);
for (auto i = 0u; i < numCommandLists; ++i) {
auto commandList = CommandList::fromHandle(commandListHandles[i]);
@@ -207,7 +203,6 @@ ze_result_t CommandQueueHw<gfxCoreFamily>::executeCommandListsRegular(
this->patchCommands(*commandList, this->csr->getScratchSpaceController()->getScratchPatchAddress());
this->programOneCmdListBatchBufferStart(commandList, child, ctx);
this->mergeOneCmdListPipelinedState(commandList);
this->prefetchMemoryToDeviceAssociatedWithCmdList(commandList);
if (commandList->hasKernelWithAssert()) {
@@ -279,16 +274,11 @@ ze_result_t CommandQueueHw<gfxCoreFamily>::executeCommandListsCopyOnly(
this->getTagsManagerHeapsAndMakeThemResidentIfSWTagsEnabled(child);
this->csr->programHardwareContext(child);
this->encodeKernelArgsBufferAndMakeItResident();
this->writeCsrStreamInlineIfLogicalStateHelperAvailable(child);
for (auto i = 0u; i < numCommandLists; ++i) {
auto commandList = CommandList::fromHandle(phCommandLists[i]);
ctx.childGpuAddressPositionBeforeDynamicPreamble = child.getCurrentGpuAddressPosition();
this->programOneCmdListBatchBufferStart(commandList, child, ctx);
this->mergeOneCmdListPipelinedState(commandList);
this->prefetchMemoryToDeviceAssociatedWithCmdList(commandList);
}
this->migrateSharedAllocationsIfRequested(ctx.isMigrationRequested, ctx.firstCommandList);
@@ -401,8 +391,7 @@ void CommandQueueHw<gfxCoreFamily>::programFrontEnd(uint64_t scratchAddress, uin
perThreadScratchSpaceSize,
scratchAddress,
device->getMaxNumHwThreads(),
streamProperties,
csr->getLogicalStateHelper());
streamProperties);
csr->setMediaVFEStateDirty(false);
}
@@ -669,9 +658,6 @@ size_t CommandQueueHw<gfxCoreFamily>::estimateLinearStreamSizeInitial(
ctx.globalInit = true;
}
linearStreamSizeEstimate += NEO::EncodeKernelArgsBuffer<GfxFamily>::getKernelArgsBufferCmdsSize(this->csr->getKernelArgsBufferAllocation(),
this->csr->getLogicalStateHelper());
return linearStreamSizeEstimate;
}
@@ -704,7 +690,7 @@ void CommandQueueHw<gfxCoreFamily>::setFrontEndStateProperties(CommandListExecut
if (!frontEndTrackingEnabled()) {
streamProperties.frontEndState.setPropertiesAll(ctx.anyCommandListWithCooperativeKernels, ctx.anyCommandListRequiresDisabledEUFusion,
true, isEngineInstanced);
ctx.frontEndStateDirty |= (streamProperties.frontEndState.isDirty() && !this->csr->getLogicalStateHelper());
ctx.frontEndStateDirty |= streamProperties.frontEndState.isDirty();
} else {
ctx.engineInstanced = isEngineInstanced;
}
@@ -902,8 +888,7 @@ void CommandQueueHw<gfxCoreFamily>::programCsrBaseAddressIfPreemptionModeInitial
NEO::Device *neoDevice = this->device->getNEODevice();
NEO::PreemptionHelper::programCsrBaseAddress<GfxFamily>(cmdStream,
*neoDevice,
this->csr->getPreemptionAllocation(),
this->csr->getLogicalStateHelper());
this->csr->getPreemptionAllocation());
}
template <GFXCORE_FAMILY gfxCoreFamily>
@@ -912,7 +897,7 @@ void CommandQueueHw<gfxCoreFamily>::programStateSip(bool isStateSipRequired, NEO
return;
}
NEO::Device *neoDevice = this->device->getNEODevice();
NEO::PreemptionHelper::programStateSip<GfxFamily>(cmdStream, *neoDevice, this->csr->getLogicalStateHelper(), &this->csr->getOsContext());
NEO::PreemptionHelper::programStateSip<GfxFamily>(cmdStream, *neoDevice, &this->csr->getOsContext());
this->csr->setSipSentFlag(true);
}
@@ -987,22 +972,6 @@ void CommandQueueHw<gfxCoreFamily>::programActivePartitionConfig(
csrHw->programActivePartitionConfig(cmdStream);
}
template <GFXCORE_FAMILY gfxCoreFamily>
void CommandQueueHw<gfxCoreFamily>::encodeKernelArgsBufferAndMakeItResident() {
NEO::EncodeKernelArgsBuffer<GfxFamily>::encodeKernelArgsBufferCmds(this->csr->getKernelArgsBufferAllocation(),
this->csr->getLogicalStateHelper());
if (this->csr->getKernelArgsBufferAllocation()) {
this->csr->makeResident(*this->csr->getKernelArgsBufferAllocation());
}
}
template <GFXCORE_FAMILY gfxCoreFamily>
void CommandQueueHw<gfxCoreFamily>::writeCsrStreamInlineIfLogicalStateHelperAvailable(NEO::LinearStream &cmdStream) {
if (this->csr->getLogicalStateHelper()) {
this->csr->getLogicalStateHelper()->writeStreamInline(cmdStream, false);
}
}
template <GFXCORE_FAMILY gfxCoreFamily>
void CommandQueueHw<gfxCoreFamily>::programOneCmdListBatchBufferStart(CommandList *commandList, NEO::LinearStream &commandStream, CommandListExecutionContext &ctx) {
if (this->dispatchCmdListBatchBufferAsPrimary) {
@@ -1112,16 +1081,6 @@ void CommandQueueHw<gfxCoreFamily>::programLastCommandListReturnBbStart(
}
}
template <GFXCORE_FAMILY gfxCoreFamily>
void CommandQueueHw<gfxCoreFamily>::mergeOneCmdListPipelinedState(CommandList *commandList) {
bool isCommandListImmediate = (commandList->getCmdListType() == CommandList::CommandListType::TYPE_IMMEDIATE) ? true : false;
auto commandListImp = static_cast<CommandListImp *>(commandList);
if (!isCommandListImmediate && commandListImp->getLogicalStateHelper()) {
this->csr->getLogicalStateHelper()->mergePipelinedState(*commandListImp->getLogicalStateHelper());
}
}
template <GFXCORE_FAMILY gfxCoreFamily>
void CommandQueueHw<gfxCoreFamily>::collectPrintfContentsFromCommandsList(
CommandList *commandList) {
@@ -1240,7 +1199,7 @@ NEO::SubmissionStatus CommandQueueHw<gfxCoreFamily>::prepareAndSubmitBatchBuffer
*(MI_BATCH_BUFFER_END *)buffer = GfxFamily::cmdInitBatchBufferEnd;
}
if (ctx.isNEODebuggerActive(this->device) || NEO::DebugManager.flags.EnableSWTags.get() || csr->getLogicalStateHelper()) {
if (ctx.isNEODebuggerActive(this->device) || NEO::DebugManager.flags.EnableSWTags.get()) {
cleanLeftoverMemory(outerCommandStream, innerCommandStream);
} else if (this->alignedChildStreamPadding) {
void *paddingPtr = innerCommandStream.getSpace(this->alignedChildStreamPadding);
@@ -1436,7 +1395,7 @@ void CommandQueueHw<gfxCoreFamily>::programRequiredStateComputeModeForCommandLis
NEO::EncodeComputeMode<GfxFamily>::programComputeModeCommandWithSynchronization(commandStream, cmdListRequired.requiredState.stateComputeMode, pipelineSelectArgs,
false, device->getNEODevice()->getRootDeviceEnvironment(), this->csr->isRcs(),
this->csr->getDcFlushSupport(), nullptr);
this->csr->getDcFlushSupport());
this->csr->setStateComputeModeDirty(false);
}
}

View File

@@ -1,5 +1,5 @@
/*
* Copyright (C) 2022 Intel Corporation
* Copyright (C) 2022-2023 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@@ -7,7 +7,6 @@
#pragma once
#include "shared/source/gen11/hw_cmds_base.h"
#include "shared/source/helpers/logical_state_helper.h"
#include "level_zero/core/source/cmdlist/cmdlist_hw.h"
#include "level_zero/core/source/cmdlist/cmdlist_hw_immediate.h"

View File

@@ -8,7 +8,6 @@
#pragma once
#include "shared/source/gen12lp/hw_cmds_base.h"
#include "shared/source/gen12lp/hw_info.h"
#include "shared/source/helpers/logical_state_helper.h"
#include "level_zero/core/source/cmdlist/cmdlist_hw.h"
#include "level_zero/core/source/cmdlist/cmdlist_hw_immediate.h"

View File

@@ -1,5 +1,5 @@
/*
* Copyright (C) 2020-2022 Intel Corporation
* Copyright (C) 2020-2023 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@@ -9,7 +9,6 @@
#include "shared/source/gen9/hw_cmds.h"
#include "shared/source/gen9/hw_info.h"
#include "shared/source/helpers/logical_state_helper.h"
#include "level_zero/core/source/cmdlist/cmdlist_hw.h"
#include "level_zero/core/source/cmdlist/cmdlist_hw_immediate.h"

View File

@@ -1,12 +1,11 @@
/*
* Copyright (C) 2022 Intel Corporation
* Copyright (C) 2022-2023 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#pragma once
#include "shared/source/helpers/logical_state_helper.h"
#include "shared/source/xe_hp_core/hw_cmds_base.h"
#include "level_zero/core/source/cmdlist/cmdlist_hw.h"

View File

@@ -1,12 +1,11 @@
/*
* Copyright (C) 2022 Intel Corporation
* Copyright (C) 2022-2023 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#pragma once
#include "shared/source/helpers/logical_state_helper.h"
#include "shared/source/xe_hpc_core/hw_cmds_xe_hpc_core_base.h"
#include "level_zero/core/source/cmdlist/cmdlist_hw.h"

View File

@@ -1,5 +1,5 @@
/*
* Copyright (C) 2021-2022 Intel Corporation
* Copyright (C) 2021-2023 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@@ -7,7 +7,6 @@
#pragma once
#include "shared/source/helpers/logical_state_helper.h"
#include "shared/source/xe_hpg_core/hw_cmds_xe_hpg_core_base.h"
#include "level_zero/core/source/cmdlist/cmdlist_hw.h"