2017-12-21 07:45:38 +08:00
/*
2023-01-02 19:14:39 +08:00
* Copyright (C) 2019-2023 Intel Corporation
2017-12-21 07:45:38 +08:00
*
2018-09-19 03:29:07 +08:00
* SPDX-License-Identifier: MIT
2017-12-21 07:45:38 +08:00
*
*/
2022-05-25 22:39:35 +08:00
#include "shared/source/built_ins/sip.h"
2023-01-21 01:45:04 +08:00
#include "shared/source/command_container/encode_surface_state.h"
2020-02-24 05:44:01 +08:00
#include "shared/source/command_stream/command_stream_receiver_hw.h"
#include "shared/source/command_stream/experimental_command_buffer.h"
#include "shared/source/command_stream/linear_stream.h"
#include "shared/source/command_stream/preemption.h"
#include "shared/source/command_stream/scratch_space_controller_base.h"
2021-05-18 10:46:21 +08:00
#include "shared/source/command_stream/stream_properties.h"
2023-01-02 19:14:39 +08:00
#include "shared/source/command_stream/submission_status.h"
2023-01-18 01:04:14 +08:00
#include "shared/source/command_stream/submissions_aggregator.h"
2022-02-22 20:51:29 +08:00
#include "shared/source/command_stream/wait_status.h"
2020-02-24 05:44:01 +08:00
#include "shared/source/debug_settings/debug_settings_manager.h"
2022-11-23 04:07:45 +08:00
#include "shared/source/debugger/debugger_l0.h"
2020-02-24 05:44:01 +08:00
#include "shared/source/device/device.h"
2021-07-30 17:56:58 +08:00
#include "shared/source/direct_submission/direct_submission_controller.h"
2020-02-24 05:44:01 +08:00
#include "shared/source/direct_submission/direct_submission_hw.h"
2023-03-14 01:56:20 +08:00
#include "shared/source/direct_submission/relaxed_ordering_helper.h"
2022-12-29 20:27:52 +08:00
#include "shared/source/execution_environment/execution_environment.h"
2020-02-24 05:44:01 +08:00
#include "shared/source/execution_environment/root_device_environment.h"
#include "shared/source/gmm_helper/page_table_mngr.h"
#include "shared/source/helpers/blit_commands_helper.h"
2023-01-18 23:52:24 +08:00
#include "shared/source/helpers/blit_properties.h"
2023-03-06 20:42:09 +08:00
#include "shared/source/helpers/definitions/command_encoder_args.h"
2021-07-22 16:56:08 +08:00
#include "shared/source/helpers/engine_node_helper.h"
2020-02-24 05:44:01 +08:00
#include "shared/source/helpers/flat_batch_buffer_helper_hw.h"
#include "shared/source/helpers/flush_stamp.h"
2023-02-02 00:23:01 +08:00
#include "shared/source/helpers/gfx_core_helper.h"
2023-02-06 17:05:43 +08:00
#include "shared/source/helpers/hw_info.h"
2022-06-14 22:18:28 +08:00
#include "shared/source/helpers/logical_state_helper.h"
2020-09-24 16:52:53 +08:00
#include "shared/source/helpers/pause_on_gpu_properties.h"
2020-02-24 05:44:01 +08:00
#include "shared/source/helpers/preamble.h"
#include "shared/source/helpers/ptr_math.h"
#include "shared/source/helpers/state_base_address.h"
#include "shared/source/helpers/timestamp_packet.h"
#include "shared/source/indirect_heap/indirect_heap.h"
#include "shared/source/memory_manager/internal_allocation_storage.h"
#include "shared/source/memory_manager/memory_manager.h"
#include "shared/source/os_interface/os_context.h"
2023-03-10 20:28:11 +08:00
#include "shared/source/os_interface/product_helper.h"
2020-02-24 05:44:01 +08:00
#include "shared/source/utilities/tag_allocator.h"
2017-12-21 07:45:38 +08:00
2019-10-11 12:54:10 +08:00
#include "command_stream_receiver_hw_ext.inl"
2019-03-26 18:59:46 +08:00
namespace NEO {
2017-12-21 07:45:38 +08:00
2020-01-16 00:02:47 +08:00
template <typename GfxFamily>
2021-07-30 17:56:58 +08:00
CommandStreamReceiverHw<GfxFamily>::~CommandStreamReceiverHw() {
2022-03-25 21:00:53 +08:00
this->unregisterDirectSubmissionFromController();
2022-04-26 21:29:31 +08:00
if (completionFenceValuePointer) {
completionFenceValue = *completionFenceValuePointer;
completionFenceValuePointer = &completionFenceValue;
}
2021-07-30 17:56:58 +08:00
}
2020-01-16 00:02:47 +08:00
2018-02-20 15:11:24 +08:00
template <typename GfxFamily>
2020-10-28 23:08:37 +08:00
CommandStreamReceiverHw<GfxFamily>::CommandStreamReceiverHw(ExecutionEnvironment &executionEnvironment,
uint32_t rootDeviceIndex,
2020-10-29 22:33:35 +08:00
const DeviceBitfield deviceBitfield)
2020-10-28 23:08:37 +08:00
: CommandStreamReceiver(executionEnvironment, rootDeviceIndex, deviceBitfield) {
2018-10-18 19:40:53 +08:00
2021-12-20 22:37:33 +08:00
const auto &hwInfo = peekHwInfo();
2022-12-09 23:11:27 +08:00
auto &gfxCoreHelper = getGfxCoreHelper();
2022-12-08 20:22:35 +08:00
localMemoryEnabled = gfxCoreHelper.getEnableLocalMemory(hwInfo);
2018-10-18 19:40:53 +08:00
2021-12-20 22:37:33 +08:00
resetKmdNotifyHelper(new KmdNotifyHelper(&hwInfo.capabilityTable.kmdNotifyProperties));
2021-10-21 20:32:45 +08:00
if (DebugManager.flags.FlattenBatchBufferForAUBDump.get() || DebugManager.flags.AddPatchInfoCommentsForAUBDump.get()) {
flatBatchBufferHelper.reset(new FlatBatchBufferHelperHw<GfxFamily>(executionEnvironment));
}
2023-03-16 08:12:49 +08:00
defaultSshSize = HeapSize::getDefaultHeapSize(EncodeStates<GfxFamily>::getSshHeapSize());
2021-04-01 04:21:59 +08:00
canUse4GbHeaps = are4GbHeapsAvailable();
2018-10-18 19:40:53 +08:00
2022-12-08 20:22:35 +08:00
timestampPacketWriteEnabled = gfxCoreHelper.timestampPacketWriteSupported();
2018-10-18 19:40:53 +08:00
if (DebugManager.flags.EnableTimestampPacket.get() != -1) {
timestampPacketWriteEnabled = !!DebugManager.flags.EnableTimestampPacket.get();
}
2022-06-14 22:18:28 +08:00
2022-07-04 22:16:44 +08:00
logicalStateHelper.reset(LogicalStateHelper::create<GfxFamily>());
2022-06-14 22:18:28 +08:00
2019-03-29 07:49:23 +08:00
createScratchSpaceController();
2021-11-30 22:41:26 +08:00
configurePostSyncWriteOffset();
2022-10-11 18:37:19 +08:00
2023-01-20 11:04:15 +08:00
this->dcFlushSupport = NEO::MemorySynchronizationCommands<GfxFamily>::getDcFlushEnable(true, *executionEnvironment.rootDeviceEnvironments[rootDeviceIndex]);
2023-06-05 21:29:53 +08:00
this->dshSupported = hwInfo.capabilityTable.supportsImages;
2018-02-20 15:11:24 +08:00
}
2018-02-02 17:33:31 +08:00
template <typename GfxFamily>
2022-01-07 22:53:31 +08:00
SubmissionStatus CommandStreamReceiverHw<GfxFamily>::flush(BatchBuffer &batchBuffer, ResidencyContainer &allocationsForResidency) {
return SubmissionStatus::SUCCESS;
2018-02-02 17:33:31 +08:00
}
2017-12-21 07:45:38 +08:00
template <typename GfxFamily>
inline void CommandStreamReceiverHw<GfxFamily>::addBatchBufferEnd(LinearStream &commandStream, void **patchLocation) {
2020-04-27 03:48:59 +08:00
using MI_BATCH_BUFFER_END = typename GfxFamily::MI_BATCH_BUFFER_END;
2017-12-21 07:45:38 +08:00
2020-04-27 03:48:59 +08:00
auto pCmd = commandStream.getSpaceForCmd<MI_BATCH_BUFFER_END>();
2017-12-21 07:45:38 +08:00
*pCmd = GfxFamily::cmdInitBatchBufferEnd;
if (patchLocation) {
*patchLocation = pCmd;
}
}
2020-01-16 00:02:47 +08:00
template <typename GfxFamily>
2022-11-29 00:57:36 +08:00
inline void CommandStreamReceiverHw<GfxFamily>::programEndingCmd(LinearStream &commandStream, void **patchLocation, bool directSubmissionEnabled,
bool hasRelaxedOrderingDependencies, bool sipWaAllowed) {
2020-01-16 00:02:47 +08:00
if (directSubmissionEnabled) {
2022-03-11 02:44:28 +08:00
uint64_t startAddress = commandStream.getGraphicsAllocation()->getGpuAddress() + commandStream.getUsed();
if (DebugManager.flags.BatchBufferStartPrepatchingWaEnabled.get() == 0) {
startAddress = 0;
2022-03-10 21:34:16 +08:00
}
2022-11-29 22:57:51 +08:00
bool relaxedOrderingEnabled = false;
if (isBlitterDirectSubmissionEnabled() && EngineHelpers::isBcs(this->osContext->getEngineType())) {
relaxedOrderingEnabled = this->blitterDirectSubmission->isRelaxedOrderingEnabled();
} else if (isDirectSubmissionEnabled()) {
relaxedOrderingEnabled = this->directSubmission->isRelaxedOrderingEnabled();
}
2022-11-21 22:55:39 +08:00
bool indirect = false;
2022-11-29 22:57:51 +08:00
if (relaxedOrderingEnabled && hasRelaxedOrderingDependencies) {
2022-11-21 22:55:39 +08:00
NEO::EncodeSetMMIO<GfxFamily>::encodeREG(commandStream, CS_GPR_R0, CS_GPR_R3);
NEO::EncodeSetMMIO<GfxFamily>::encodeREG(commandStream, CS_GPR_R0 + 4, CS_GPR_R3 + 4);
indirect = true;
}
*patchLocation = commandStream.getSpace(0);
NEO::EncodeBatchBufferStartOrEnd<GfxFamily>::programBatchBufferStart(&commandStream, startAddress, false, indirect, false);
2022-03-10 21:34:16 +08:00
2020-01-16 00:02:47 +08:00
} else {
2022-11-04 22:56:15 +08:00
if (sipWaAllowed) {
2022-12-15 22:57:31 +08:00
auto &rootDeviceEnvironment = peekRootDeviceEnvironment();
PreemptionHelper::programStateSipEndWa<GfxFamily>(commandStream, rootDeviceEnvironment);
2022-03-10 21:34:16 +08:00
}
2020-01-16 00:02:47 +08:00
this->addBatchBufferEnd(commandStream, patchLocation);
}
}
2017-12-21 07:45:38 +08:00
template <typename GfxFamily>
2018-07-05 17:23:28 +08:00
inline void CommandStreamReceiverHw<GfxFamily>::addBatchBufferStart(MI_BATCH_BUFFER_START *commandBufferMemory, uint64_t startAddress, bool secondary) {
2020-04-28 00:55:26 +08:00
MI_BATCH_BUFFER_START cmd = GfxFamily::cmdInitBatchBufferStart;
2021-12-17 02:02:45 +08:00
cmd.setBatchBufferStartAddress(startAddress);
2020-04-28 00:55:26 +08:00
cmd.setAddressSpaceIndicator(MI_BATCH_BUFFER_START::ADDRESS_SPACE_INDICATOR_PPGTT);
2018-07-05 17:23:28 +08:00
if (secondary) {
2020-04-28 00:55:26 +08:00
cmd.setSecondLevelBatchBuffer(MI_BATCH_BUFFER_START::SECOND_LEVEL_BATCH_BUFFER_SECOND_LEVEL_BATCH);
2018-07-05 17:23:28 +08:00
}
2018-04-04 17:34:46 +08:00
if (DebugManager.flags.FlattenBatchBufferForAUBDump.get()) {
flatBatchBufferHelper->registerBatchBufferStartAddress(reinterpret_cast<uint64_t>(commandBufferMemory), startAddress);
}
2020-04-28 00:55:26 +08:00
*commandBufferMemory = cmd;
2017-12-21 07:45:38 +08:00
}
2019-05-16 23:17:53 +08:00
template <typename GfxFamily>
inline size_t CommandStreamReceiverHw<GfxFamily>::getRequiredCmdSizeForPreamble(Device &device) const {
size_t size = 0;
if (mediaVfeStateDirty) {
size += PreambleHelper<GfxFamily>::getVFECommandsSize();
}
if (!this->isPreambleSent) {
size += PreambleHelper<GfxFamily>::getAdditionalCommandsSize(device);
}
2020-06-24 20:53:51 +08:00
if (!this->isPreambleSent) {
if (DebugManager.flags.ForceSemaphoreDelayBetweenWaits.get() > -1) {
size += PreambleHelper<GfxFamily>::getSemaphoreDelayCommandSize();
}
}
2019-05-16 23:17:53 +08:00
return size;
}
2020-05-27 21:30:31 +08:00
template <typename GfxFamily>
2020-05-23 00:11:28 +08:00
void CommandStreamReceiverHw<GfxFamily>::programHardwareContext(LinearStream &cmdStream) {
programEnginePrologue(cmdStream);
2020-05-27 21:30:31 +08:00
}
template <typename GfxFamily>
size_t CommandStreamReceiverHw<GfxFamily>::getCmdsSizeForHardwareContext() const {
return getCmdSizeForPrologue();
}
2022-11-30 22:57:18 +08:00
template <typename GfxFamily>
CompletionStamp CommandStreamReceiverHw<GfxFamily>::flushBcsTask(LinearStream &commandStreamTask, size_t commandStreamTaskStart,
const DispatchBcsFlags &dispatchBcsFlags, const HardwareInfo &hwInfo) {
UNRECOVERABLE_IF(this->dispatchMode != DispatchMode::ImmediateDispatch);
uint64_t taskStartAddress = commandStreamTask.getGpuBase() + commandStreamTaskStart;
if (dispatchBcsFlags.flushTaskCount) {
uint64_t postSyncAddress = getTagAllocation()->getGpuAddress();
TaskCountType postSyncData = peekTaskCount() + 1;
2023-03-11 03:21:59 +08:00
NEO::EncodeDummyBlitWaArgs waArgs{false, const_cast<RootDeviceEnvironment *>(&(this->peekRootDeviceEnvironment()))};
NEO::MiFlushArgs args{waArgs};
2022-11-30 22:57:18 +08:00
args.commandWithPostSync = true;
args.notifyEnable = isUsedNotifyEnableForPostSync();
2023-03-11 03:21:59 +08:00
2023-03-06 20:42:09 +08:00
NEO::EncodeMiFlushDW<GfxFamily>::programWithWa(commandStreamTask, postSyncAddress, postSyncData, args);
2022-11-30 22:57:18 +08:00
}
auto &commandStreamCSR = getCS(getRequiredCmdStreamSizeAligned(dispatchBcsFlags));
size_t commandStreamStartCSR = commandStreamCSR.getUsed();
programHardwareContext(commandStreamCSR);
if (globalFenceAllocation) {
makeResident(*globalFenceAllocation);
}
if (dispatchBcsFlags.flushTaskCount) {
makeResident(*getTagAllocation());
}
2022-12-14 23:47:56 +08:00
makeResident(*commandStreamTask.getGraphicsAllocation());
2022-11-30 22:57:18 +08:00
bool submitCSR = (commandStreamStartCSR != commandStreamCSR.getUsed());
void *bbEndLocation = nullptr;
programEndingCmd(commandStreamTask, &bbEndLocation, isBlitterDirectSubmissionEnabled(), dispatchBcsFlags.hasRelaxedOrderingDependencies, false);
EncodeNoop<GfxFamily>::alignToCacheLine(commandStreamTask);
if (submitCSR) {
auto bbStart = reinterpret_cast<MI_BATCH_BUFFER_START *>(commandStreamCSR.getSpace(sizeof(MI_BATCH_BUFFER_START)));
addBatchBufferStart(bbStart, taskStartAddress, false);
EncodeNoop<GfxFamily>::alignToCacheLine(commandStreamCSR);
this->makeResident(*commandStreamCSR.getGraphicsAllocation());
}
size_t startOffset = submitCSR ? commandStreamStartCSR : commandStreamTaskStart;
auto &streamToSubmit = submitCSR ? commandStreamCSR : commandStreamTask;
BatchBuffer batchBuffer{streamToSubmit.getGraphicsAllocation(), startOffset, 0, taskStartAddress, nullptr,
false, false, QueueThrottle::MEDIUM, NEO::QueueSliceCount::defaultSliceCount,
2023-02-02 01:06:21 +08:00
streamToSubmit.getUsed(), &streamToSubmit, bbEndLocation, this->getNumClients(), (submitCSR || dispatchBcsFlags.hasStallingCmds),
2022-11-30 22:57:18 +08:00
dispatchBcsFlags.hasRelaxedOrderingDependencies};
2023-03-15 23:10:06 +08:00
updateStreamTaskCount(streamToSubmit, taskCount + 1);
2022-11-30 22:57:18 +08:00
auto submissionStatus = flushHandler(batchBuffer, this->getResidencyAllocations());
if (submissionStatus != SubmissionStatus::SUCCESS) {
2023-03-15 23:10:06 +08:00
updateStreamTaskCount(streamToSubmit, taskCount);
2022-11-30 22:57:18 +08:00
CompletionStamp completionStamp = {CompletionStamp::getTaskCountFromSubmissionStatusError(submissionStatus)};
return completionStamp;
}
if (dispatchBcsFlags.flushTaskCount) {
this->latestFlushedTaskCount = this->taskCount + 1;
}
++taskCount;
CompletionStamp completionStamp = {taskCount, taskLevel, flushStamp->peekStamp()};
return completionStamp;
}
2023-06-01 21:21:48 +08:00
template <typename GfxFamily>
CompletionStamp CommandStreamReceiverHw<GfxFamily>::flushImmediateTask(
LinearStream &immediateCommandStream,
size_t immediateCommandStreamStart,
ImmediateDispatchFlags &dispatchFlags,
Device &device) {
ImmediateFlushData flushData;
2023-06-02 20:04:06 +08:00
flushData.pipelineSelectFullConfigurationNeeded = !getPreambleSetFlag();
flushData.frontEndFullConfigurationNeeded = getMediaVFEStateDirty();
2023-06-03 06:10:15 +08:00
flushData.stateComputeModeFullConfigurationNeeded = getStateComputeModeDirty();
2023-06-05 21:29:53 +08:00
flushData.stateBaseAddressFullConfigurationNeeded = getGSBAStateDirty();
2023-06-01 21:21:48 +08:00
2023-06-16 01:54:12 +08:00
if (this->requiredScratchSize > 0 || this->requiredPrivateScratchSize > 0) {
bool checkFeStateDirty = false;
bool checkSbaStateDirty = false;
scratchSpaceController->setRequiredScratchSpace(dispatchFlags.sshCpuBase,
0u,
this->requiredScratchSize,
this->requiredPrivateScratchSize,
this->taskCount,
*this->osContext,
checkSbaStateDirty,
checkFeStateDirty);
flushData.frontEndFullConfigurationNeeded |= checkFeStateDirty;
flushData.stateBaseAddressFullConfigurationNeeded |= checkSbaStateDirty;
if (scratchSpaceController->getScratchSpaceAllocation()) {
makeResident(*scratchSpaceController->getScratchSpaceAllocation());
}
if (scratchSpaceController->getPrivateScratchSpaceAllocation()) {
makeResident(*scratchSpaceController->getPrivateScratchSpaceAllocation());
}
}
2023-06-02 20:04:06 +08:00
handleImmediateFlushPipelineSelectState(dispatchFlags, flushData);
handleImmediateFlushFrontEndState(dispatchFlags, flushData);
2023-06-03 06:10:15 +08:00
handleImmediateFlushStateComputeModeState(dispatchFlags, flushData);
2023-06-05 21:29:53 +08:00
handleImmediateFlushStateBaseAddressState(dispatchFlags, flushData, device);
2023-06-20 22:57:40 +08:00
handleImmediateFlushOneTimeContextInitState(dispatchFlags, flushData, device);
2023-06-01 21:21:48 +08:00
2023-06-22 21:17:49 +08:00
handleImmediateFlushJumpToImmediate(flushData);
2023-06-02 20:04:06 +08:00
auto &csrCommandStream = getCS(flushData.estimatedSize);
2023-06-28 03:54:20 +08:00
flushData.csrStartOffset = csrCommandStream.getUsed();
2023-06-01 21:21:48 +08:00
2023-06-03 06:10:15 +08:00
dispatchImmediateFlushPipelineSelectCommand(flushData, csrCommandStream);
2023-07-01 03:57:19 +08:00
dispatchImmediateFlushFrontEndCommand(flushData, device, csrCommandStream);
2023-06-03 06:10:15 +08:00
dispatchImmediateFlushStateComputeModeCommand(flushData, csrCommandStream);
2023-06-05 21:29:53 +08:00
dispatchImmediateFlushStateBaseAddressCommand(flushData, csrCommandStream, device);
2023-06-20 22:57:40 +08:00
dispatchImmediateFlushOneTimeContextInitCommand(flushData, csrCommandStream, device);
2023-06-19 23:40:22 +08:00
2023-06-22 21:17:49 +08:00
dispatchImmediateFlushJumpToImmediateCommand(immediateCommandStream, immediateCommandStreamStart, flushData, csrCommandStream);
2023-06-27 21:42:31 +08:00
dispatchImmediateFlushClientBufferCommands(dispatchFlags, immediateCommandStream, flushData);
2023-06-28 03:54:20 +08:00
handleImmediateFlushAllocationsResidency(device,
immediateCommandStream,
flushData,
csrCommandStream);
2023-06-01 21:21:48 +08:00
2023-06-28 03:54:20 +08:00
return handleImmediateFlushSendBatchBuffer(immediateCommandStream,
immediateCommandStreamStart,
dispatchFlags,
flushData,
csrCommandStream);
2023-06-01 21:21:48 +08:00
}
2017-12-21 07:45:38 +08:00
template <typename GfxFamily>
CompletionStamp CommandStreamReceiverHw<GfxFamily>::flushTask(
LinearStream &commandStreamTask,
size_t commandStreamStartTask,
2022-03-28 20:55:12 +08:00
const IndirectHeap *dsh,
const IndirectHeap *ioh,
const IndirectHeap *ssh,
2022-11-22 21:53:59 +08:00
TaskCountType taskLevel,
2018-08-01 16:01:41 +08:00
DispatchFlags &dispatchFlags,
Device &device) {
2023-05-15 22:41:44 +08:00
using MI_BATCH_BUFFER_START = typename GfxFamily::MI_BATCH_BUFFER_START;
using MI_BATCH_BUFFER_END = typename GfxFamily::MI_BATCH_BUFFER_END;
using PIPE_CONTROL = typename GfxFamily::PIPE_CONTROL;
2017-12-21 07:45:38 +08:00
2023-02-11 01:54:13 +08:00
auto &rootDeviceEnvironment = this->peekRootDeviceEnvironment();
2017-12-21 07:45:38 +08:00
DEBUG_BREAK_IF(&commandStreamTask == &commandStream);
2018-08-01 16:01:41 +08:00
DEBUG_BREAK_IF(!(dispatchFlags.preemptionMode == PreemptionMode::Disabled ? device.getPreemptionMode() == PreemptionMode::Disabled : true));
2020-06-16 19:19:11 +08:00
DEBUG_BREAK_IF(taskLevel >= CompletionStamp::notReady);
2017-12-21 07:45:38 +08:00
DBG_LOG(LogTaskCounts, __FUNCTION__, "Line: ", __LINE__, "taskLevel", taskLevel);
auto levelClosed = false;
2020-09-17 14:58:06 +08:00
bool implicitFlush = dispatchFlags.implicitFlush || dispatchFlags.blocking || DebugManager.flags.ForceImplicitFlush.get();
2017-12-21 07:45:38 +08:00
void *currentPipeControlForNooping = nullptr;
2018-02-15 15:29:57 +08:00
void *epiloguePipeControlLocation = nullptr;
2022-07-01 16:15:04 +08:00
PipeControlArgs args;
2017-12-21 07:45:38 +08:00
2022-12-16 18:52:55 +08:00
if (DebugManager.flags.ForceCsrFlushing.get()) {
2018-06-13 02:33:03 +08:00
flushBatchedSubmissions();
}
2020-06-19 15:50:04 +08:00
2019-01-28 20:44:59 +08:00
if (detectInitProgrammingFlagsRequired(dispatchFlags)) {
2018-06-13 02:33:03 +08:00
initProgrammingFlags();
}
2021-12-20 22:37:33 +08:00
const auto &hwInfo = peekHwInfo();
2022-12-09 23:11:27 +08:00
auto &gfxCoreHelper = getGfxCoreHelper();
2022-03-02 20:43:02 +08:00
2023-03-10 02:29:45 +08:00
bool hasStallingCmdsOnTaskStream = false;
2023-02-08 01:23:45 +08:00
if (dispatchFlags.blocking || dispatchFlags.dcFlush || dispatchFlags.guardCommandBufferWithPipeControl || this->heapStorageRequiresRecyclingTag) {
2018-04-04 17:34:46 +08:00
if (this->dispatchMode == DispatchMode::ImmediateDispatch) {
2022-06-29 02:52:33 +08:00
// for ImmediateDispatch we will send this right away, therefore this pipe control will close the level
// for BatchedSubmissions it will be nooped and only last ppc in batch will be emitted.
2017-12-21 07:45:38 +08:00
levelClosed = true;
2022-06-29 02:52:33 +08:00
// if we guard with ppc, flush dc as well to speed up completion latency
2021-12-20 22:34:39 +08:00
if (dispatchFlags.guardCommandBufferWithPipeControl) {
2023-06-16 02:05:12 +08:00
dispatchFlags.dcFlush = this->dcFlushSupport;
2018-02-13 17:01:20 +08:00
}
2017-12-21 07:45:38 +08:00
}
2023-02-08 01:23:45 +08:00
this->heapStorageRequiresRecyclingTag = false;
2018-03-05 18:03:38 +08:00
epiloguePipeControlLocation = ptrOffset(commandStreamTask.getCpuBase(), commandStreamTask.getUsed());
2018-02-15 15:29:57 +08:00
2018-09-06 15:03:07 +08:00
if ((dispatchFlags.outOfOrderExecutionAllowed || timestampPacketWriteEnabled) &&
2018-08-30 17:05:18 +08:00
!dispatchFlags.dcFlush) {
2018-02-15 15:29:57 +08:00
currentPipeControlForNooping = epiloguePipeControlLocation;
2017-12-21 07:45:38 +08:00
}
2023-03-10 02:29:45 +08:00
hasStallingCmdsOnTaskStream = true;
2018-11-23 17:32:15 +08:00
auto address = getTagAllocation()->getGpuAddress();
2020-04-27 03:48:59 +08:00
2022-10-11 18:37:19 +08:00
args.dcFlushEnable = getDcFlushRequired(dispatchFlags.dcFlush);
2022-03-25 21:00:53 +08:00
args.notifyEnable = isUsedNotifyEnableForPostSync();
args.tlbInvalidation |= dispatchFlags.memoryMigrationRequired;
args.textureCacheInvalidationEnable |= dispatchFlags.textureCacheFlush;
args.workloadPartitionOffset = isMultiTileOperationEnabled();
2023-01-13 00:58:18 +08:00
args.stateCacheInvalidationEnable = dispatchFlags.stateCacheInvalidation;
2022-07-21 22:28:10 +08:00
MemorySynchronizationCommands<GfxFamily>::addBarrierWithPostSyncOperation(
2022-03-25 21:00:53 +08:00
commandStreamTask,
2022-07-21 22:28:10 +08:00
PostSyncMode::ImmediateData,
2022-03-25 21:00:53 +08:00
address,
taskCount + 1,
2023-02-11 01:54:13 +08:00
rootDeviceEnvironment,
2022-03-25 21:00:53 +08:00
args);
2017-12-21 07:45:38 +08:00
2020-07-18 00:33:16 +08:00
DBG_LOG(LogTaskCounts, __FUNCTION__, "Line: ", __LINE__, "taskCount", peekTaskCount());
2018-04-04 17:34:46 +08:00
if (DebugManager.flags.AddPatchInfoCommentsForAUBDump.get()) {
flatBatchBufferHelper->setPatchInfoData(PatchInfoData(address, 0u,
PatchInfoAllocationType::TagAddress,
commandStreamTask.getGraphicsAllocation()->getGpuAddress(),
commandStreamTask.getUsed() - 2 * sizeof(uint64_t),
PatchInfoAllocationType::Default));
flatBatchBufferHelper->setPatchInfoData(PatchInfoData(address, 0u,
PatchInfoAllocationType::TagValue,
commandStreamTask.getGraphicsAllocation()->getGpuAddress(),
commandStreamTask.getUsed() - sizeof(uint64_t),
PatchInfoAllocationType::Default));
}
2017-12-21 07:45:38 +08:00
}
2022-03-25 21:00:53 +08:00
this->latestSentTaskCount = taskCount + 1;
2017-12-21 07:45:38 +08:00
if (DebugManager.flags.ForceSLML3Config.get()) {
dispatchFlags.useSLM = true;
}
2021-12-20 22:37:33 +08:00
auto newL3Config = PreambleHelper<GfxFamily>::getL3Config(hwInfo, dispatchFlags.useSLM);
2022-09-21 00:46:15 +08:00
2022-09-22 09:44:06 +08:00
dispatchFlags.pipelineSelectArgs.systolicPipelineSelectSupport = this->pipelineSupportFlags.systolicMode;
handlePipelineSelectStateTransition(dispatchFlags);
2017-12-21 07:45:38 +08:00
2022-12-08 20:22:35 +08:00
auto requiresCoherency = gfxCoreHelper.forceNonGpuCoherencyWA(dispatchFlags.requiresCoherency);
2023-02-10 17:51:27 +08:00
this->streamProperties.stateComputeMode.setPropertiesAll(requiresCoherency, dispatchFlags.numGrfRequired,
2023-03-10 07:12:09 +08:00
dispatchFlags.threadArbitrationPolicy, device.getPreemptionMode());
2021-12-09 00:22:18 +08:00
2017-12-21 07:45:38 +08:00
csrSizeRequestFlags.l3ConfigChanged = this->lastSentL3Config != newL3Config;
csrSizeRequestFlags.preemptionRequestChanged = this->lastPreemptionMode != dispatchFlags.preemptionMode;
2020-08-21 19:05:01 +08:00
2021-11-18 03:51:43 +08:00
csrSizeRequestFlags.activePartitionsChanged = isProgramActivePartitionConfigRequired();
2017-12-21 07:45:38 +08:00
bool stateBaseAddressDirty = false;
2018-11-22 22:16:20 +08:00
bool checkVfeStateDirty = false;
2022-10-27 22:12:05 +08:00
if (ssh && (requiredScratchSize || requiredPrivateScratchSize)) {
2022-03-28 20:55:12 +08:00
scratchSpaceController->setRequiredScratchSpace(ssh->getCpuBase(),
2020-10-05 17:39:15 +08:00
0u,
2018-11-22 22:16:20 +08:00
requiredScratchSize,
2019-06-28 15:37:04 +08:00
requiredPrivateScratchSize,
2018-11-22 22:16:20 +08:00
this->taskCount,
2019-09-16 20:59:54 +08:00
*this->osContext,
2018-11-22 22:16:20 +08:00
stateBaseAddressDirty,
checkVfeStateDirty);
if (checkVfeStateDirty) {
2018-12-13 18:06:28 +08:00
setMediaVFEStateDirty(true);
2017-12-21 07:45:38 +08:00
}
2019-07-15 15:12:53 +08:00
if (scratchSpaceController->getScratchSpaceAllocation()) {
makeResident(*scratchSpaceController->getScratchSpaceAllocation());
}
if (scratchSpaceController->getPrivateScratchSpaceAllocation()) {
makeResident(*scratchSpaceController->getPrivateScratchSpaceAllocation());
}
2017-12-21 07:45:38 +08:00
}
2020-01-29 21:15:10 +08:00
if (dispatchFlags.usePerDssBackedBuffer) {
2019-09-04 22:44:27 +08:00
if (!perDssBackedBuffer) {
createPerDssBackedBuffer(device);
}
makeResident(*perDssBackedBuffer);
}
2022-06-25 00:50:31 +08:00
if (!logicalStateHelper) {
2022-09-21 00:46:15 +08:00
handleFrontEndStateTransition(dispatchFlags);
2020-11-17 18:42:29 +08:00
}
2018-08-07 15:07:50 +08:00
auto &commandStreamCSR = this->getCS(getRequiredCmdStreamSizeAligned(dispatchFlags, device));
2018-05-15 15:46:22 +08:00
auto commandStreamStartCSR = commandStreamCSR.getUsed();
2023-03-10 02:29:45 +08:00
TimestampPacketHelper::programCsrDependenciesForTimestampPacketContainer<GfxFamily>(commandStreamCSR, dispatchFlags.csrDependencies, false);
2023-01-20 00:11:39 +08:00
TimestampPacketHelper::programCsrDependenciesForForMultiRootDeviceSyncContainer<GfxFamily>(commandStreamCSR, dispatchFlags.csrDependencies);
2019-01-25 17:20:32 +08:00
2021-11-18 03:51:43 +08:00
programActivePartitionConfigFlushTask(commandStreamCSR);
2019-10-11 12:54:10 +08:00
programEngineModeCommands(commandStreamCSR, dispatchFlags);
2022-02-03 00:30:03 +08:00
2021-09-29 23:59:41 +08:00
if (pageTableManager.get() && !pageTableManagerInitialized) {
pageTableManagerInitialized = pageTableManager->initPageTableManagerRegisters(this);
2019-11-07 01:14:30 +08:00
}
2020-05-27 21:30:31 +08:00
2020-05-23 00:11:28 +08:00
programHardwareContext(commandStreamCSR);
2019-09-10 22:13:11 +08:00
programPipelineSelect(commandStreamCSR, dispatchFlags.pipelineSelectArgs);
2023-03-11 05:51:52 +08:00
programComputeMode(commandStreamCSR, dispatchFlags, hwInfo);
2021-11-04 20:54:18 +08:00
programL3(commandStreamCSR, newL3Config);
programPreamble(commandStreamCSR, device, newL3Config);
2018-05-15 15:46:22 +08:00
programMediaSampler(commandStreamCSR, dispatchFlags);
2021-09-30 19:10:58 +08:00
addPipeControlBefore3dState(commandStreamCSR, dispatchFlags);
2020-11-27 03:02:18 +08:00
programPerDssBackedBuffer(commandStreamCSR, device, dispatchFlags);
2023-03-31 04:03:12 +08:00
if (isRayTracingStateProgramingNeeded(device)) {
dispatchRayTracingStateCommand(commandStreamCSR, device);
}
2018-05-15 15:46:22 +08:00
2019-05-29 21:37:54 +08:00
programVFEState(commandStreamCSR, dispatchFlags, device.getDeviceInfo().maxFrontEndThreads);
2017-12-21 07:45:38 +08:00
2019-10-08 01:54:39 +08:00
programPreemption(commandStreamCSR, dispatchFlags);
2022-08-03 19:54:08 +08:00
EncodeKernelArgsBuffer<GfxFamily>::encodeKernelArgsBufferCmds(kernelArgsBufferAllocation, logicalStateHelper.get());
2023-02-28 20:20:30 +08:00
if (dispatchFlags.isStallingCommandsOnNextFlushRequired) {
2022-02-03 00:30:03 +08:00
programStallingCommandsForBarrier(commandStreamCSR, dispatchFlags);
}
2017-12-21 07:45:38 +08:00
2023-05-15 22:41:44 +08:00
programStateBaseAddress(dsh, ioh, ssh, dispatchFlags, device, commandStreamCSR, stateBaseAddressDirty);
2022-04-22 01:57:54 +08:00
addPipeControlBeforeStateSip(commandStreamCSR, device);
programStateSip(commandStreamCSR, device);
2017-12-21 07:45:38 +08:00
DBG_LOG(LogTaskCounts, __FUNCTION__, "Line: ", __LINE__, "this->taskLevel", (uint32_t)this->taskLevel);
2023-05-15 22:41:44 +08:00
bool samplerCacheFlushBetweenRedescribedSurfaceReadsRequired = hwInfo.workaroundTable.flags.waSamplerCacheFlushBetweenRedescribedSurfaceReads;
if (samplerCacheFlushBetweenRedescribedSurfaceReadsRequired) {
programSamplerCacheFlushBetweenRedescribedSurfaceReads(commandStreamCSR);
2018-01-10 21:05:34 +08:00
}
2018-07-05 17:23:28 +08:00
if (experimentalCmdBuffer.get() != nullptr) {
size_t startingOffset = experimentalCmdBuffer->programExperimentalCommandBuffer<GfxFamily>();
experimentalCmdBuffer->injectBufferStart<GfxFamily>(commandStreamCSR, startingOffset);
}
2019-08-28 01:14:24 +08:00
if (requiresInstructionCacheFlush) {
2020-04-27 03:48:59 +08:00
PipeControlArgs args;
args.instructionCacheInvalidateEnable = true;
2022-07-21 22:28:10 +08:00
MemorySynchronizationCommands<GfxFamily>::addSingleBarrier(commandStreamCSR, args);
2019-08-28 01:14:24 +08:00
requiresInstructionCacheFlush = false;
}
2020-11-04 21:02:31 +08:00
// Add a Pipe Control if we have a dependency on a previous walker to avoid concurrency issues.
2017-12-21 07:45:38 +08:00
if (taskLevel > this->taskLevel) {
2022-11-29 21:28:05 +08:00
const auto programPipeControl = !timestampPacketWriteEnabled;
2021-12-10 00:18:18 +08:00
if (programPipeControl) {
2020-04-27 03:48:59 +08:00
PipeControlArgs args;
2022-07-21 22:28:10 +08:00
MemorySynchronizationCommands<GfxFamily>::addSingleBarrier(commandStreamCSR, args);
2018-08-30 17:05:18 +08:00
}
2017-12-21 07:45:38 +08:00
this->taskLevel = taskLevel;
2020-07-18 00:33:16 +08:00
DBG_LOG(LogTaskCounts, __FUNCTION__, "Line: ", __LINE__, "this->taskCount", peekTaskCount());
2017-12-21 07:45:38 +08:00
}
2020-07-06 15:34:15 +08:00
if (DebugManager.flags.ForcePipeControlPriorToWalker.get()) {
forcePipeControl(commandStreamCSR);
}
2017-12-21 07:45:38 +08:00
this->makeResident(*tagAllocation);
2020-02-06 03:00:08 +08:00
if (globalFenceAllocation) {
makeResident(*globalFenceAllocation);
}
if (preemptionAllocation) {
2019-06-28 03:33:05 +08:00
makeResident(*preemptionAllocation);
2020-02-06 03:00:08 +08:00
}
2017-12-21 07:45:38 +08:00
2023-05-15 22:41:44 +08:00
bool debuggingEnabled = device.getDebugger() != nullptr;
2022-04-22 01:57:54 +08:00
if (dispatchFlags.preemptionMode == PreemptionMode::MidThread || debuggingEnabled) {
2021-04-16 20:52:30 +08:00
makeResident(*SipKernel::getSipKernel(device).getSipAllocation());
2022-04-22 01:57:54 +08:00
}
2022-09-07 01:30:55 +08:00
if (debuggingEnabled && debugSurface) {
2022-04-22 01:57:54 +08:00
makeResident(*debugSurface);
2018-03-13 00:18:00 +08:00
}
2018-07-05 17:23:28 +08:00
if (experimentalCmdBuffer.get() != nullptr) {
experimentalCmdBuffer->makeResidentAllocations();
}
2021-03-04 20:37:56 +08:00
if (workPartitionAllocation) {
makeResident(*workPartitionAllocation);
}
2022-08-03 19:54:08 +08:00
if (kernelArgsBufferAllocation) {
makeResident(*kernelArgsBufferAllocation);
}
2023-03-31 04:03:12 +08:00
auto rtBuffer = device.getRTMemoryBackedBuffer();
if (rtBuffer) {
makeResident(*rtBuffer);
}
2022-06-15 18:29:35 +08:00
if (logicalStateHelper) {
2022-07-04 22:16:44 +08:00
logicalStateHelper->writeStreamInline(commandStreamCSR, false);
2022-06-15 18:29:35 +08:00
}
2017-12-21 07:45:38 +08:00
// If the CSR has work in its CS, flush it before the task
bool submitTask = commandStreamStartTask != commandStreamTask.getUsed();
2022-11-10 03:02:46 +08:00
bool submitCSR = (commandStreamStartCSR != commandStreamCSR.getUsed());
2017-12-21 07:45:38 +08:00
bool submitCommandStreamFromCsr = false;
void *bbEndLocation = nullptr;
auto bbEndPaddingSize = this->dispatchMode == DispatchMode::ImmediateDispatch ? 0 : sizeof(MI_BATCH_BUFFER_START) - sizeof(MI_BATCH_BUFFER_END);
2018-03-01 05:50:41 +08:00
size_t chainedBatchBufferStartOffset = 0;
GraphicsAllocation *chainedBatchBuffer = nullptr;
2020-01-16 00:02:47 +08:00
bool directSubmissionEnabled = isDirectSubmissionEnabled();
2017-12-21 07:45:38 +08:00
if (submitTask) {
2022-11-29 00:57:36 +08:00
programEndingCmd(commandStreamTask, &bbEndLocation, directSubmissionEnabled, dispatchFlags.hasRelaxedOrderingDependencies, true);
2021-11-18 06:36:00 +08:00
EncodeNoop<GfxFamily>::emitNoop(commandStreamTask, bbEndPaddingSize);
EncodeNoop<GfxFamily>::alignToCacheLine(commandStreamTask);
2017-12-21 07:45:38 +08:00
if (submitCSR) {
2018-03-01 05:50:41 +08:00
chainedBatchBufferStartOffset = commandStreamCSR.getUsed();
chainedBatchBuffer = commandStreamTask.getGraphicsAllocation();
2017-12-21 07:45:38 +08:00
// Add MI_BATCH_BUFFER_START to chain from CSR -> Task
auto pBBS = reinterpret_cast<MI_BATCH_BUFFER_START *>(commandStreamCSR.getSpace(sizeof(MI_BATCH_BUFFER_START)));
2018-07-05 17:23:28 +08:00
addBatchBufferStart(pBBS, ptrOffset(commandStreamTask.getGraphicsAllocation()->getGpuAddress(), commandStreamStartTask), false);
2018-04-04 17:34:46 +08:00
if (DebugManager.flags.FlattenBatchBufferForAUBDump.get()) {
flatBatchBufferHelper->registerCommandChunk(commandStreamTask.getGraphicsAllocation()->getGpuAddress(),
reinterpret_cast<uint64_t>(commandStreamTask.getCpuBase()),
commandStreamStartTask,
static_cast<uint64_t>(ptrDiff(bbEndLocation,
commandStreamTask.getGraphicsAllocation()->getGpuAddress())) +
sizeof(MI_BATCH_BUFFER_START));
}
2017-12-21 07:45:38 +08:00
auto commandStreamAllocation = commandStreamTask.getGraphicsAllocation();
DEBUG_BREAK_IF(commandStreamAllocation == nullptr);
this->makeResident(*commandStreamAllocation);
2021-11-18 06:36:00 +08:00
EncodeNoop<GfxFamily>::alignToCacheLine(commandStreamCSR);
2017-12-21 07:45:38 +08:00
submitCommandStreamFromCsr = true;
2019-08-08 01:33:40 +08:00
} else if (dispatchFlags.epilogueRequired) {
this->makeResident(*commandStreamCSR.getGraphicsAllocation());
2017-12-21 07:45:38 +08:00
}
2021-01-29 01:30:56 +08:00
this->programEpilogue(commandStreamCSR, device, &bbEndLocation, dispatchFlags);
2019-08-08 01:33:40 +08:00
2017-12-21 07:45:38 +08:00
} else if (submitCSR) {
2022-11-29 00:57:36 +08:00
programEndingCmd(commandStreamCSR, &bbEndLocation, directSubmissionEnabled, dispatchFlags.hasRelaxedOrderingDependencies, true);
2021-11-18 06:36:00 +08:00
EncodeNoop<GfxFamily>::emitNoop(commandStreamCSR, bbEndPaddingSize);
EncodeNoop<GfxFamily>::alignToCacheLine(commandStreamCSR);
2017-12-21 07:45:38 +08:00
DEBUG_BREAK_IF(commandStreamCSR.getUsed() > commandStreamCSR.getMaxAvailableSpace());
submitCommandStreamFromCsr = true;
}
2022-11-19 05:02:29 +08:00
uint64_t taskStartAddress = commandStreamTask.getGpuBase() + commandStreamStartTask;
2017-12-21 07:45:38 +08:00
size_t startOffset = submitCommandStreamFromCsr ? commandStreamStartCSR : commandStreamStartTask;
auto &streamToSubmit = submitCommandStreamFromCsr ? commandStreamCSR : commandStreamTask;
2022-11-19 05:02:29 +08:00
BatchBuffer batchBuffer{streamToSubmit.getGraphicsAllocation(), startOffset, chainedBatchBufferStartOffset, taskStartAddress, chainedBatchBuffer,
2020-01-16 00:02:47 +08:00
dispatchFlags.requiresCoherency, dispatchFlags.lowPriority, dispatchFlags.throttle, dispatchFlags.sliceCount,
2023-03-10 02:29:45 +08:00
streamToSubmit.getUsed(), &streamToSubmit, bbEndLocation, this->getNumClients(), (submitCSR || dispatchFlags.hasStallingCmds || hasStallingCmdsOnTaskStream),
2022-11-27 04:10:32 +08:00
dispatchFlags.hasRelaxedOrderingDependencies};
2023-03-15 23:10:06 +08:00
updateStreamTaskCount(streamToSubmit, taskCount + 1);
2017-12-21 07:45:38 +08:00
2022-08-18 06:33:49 +08:00
if (submitCSR || submitTask) {
2017-12-21 07:45:38 +08:00
if (this->dispatchMode == DispatchMode::ImmediateDispatch) {
2022-11-07 19:50:09 +08:00
auto submissionStatus = flushHandler(batchBuffer, this->getResidencyAllocations());
if (submissionStatus != SubmissionStatus::SUCCESS) {
2023-03-15 23:10:06 +08:00
updateStreamTaskCount(streamToSubmit, taskCount);
2022-11-07 19:50:09 +08:00
CompletionStamp completionStamp = {CompletionStamp::getTaskCountFromSubmissionStatusError(submissionStatus)};
return completionStamp;
}
2022-03-25 21:00:53 +08:00
if (dispatchFlags.blocking || dispatchFlags.dcFlush || dispatchFlags.guardCommandBufferWithPipeControl) {
2021-02-23 16:48:08 +08:00
this->latestFlushedTaskCount = this->taskCount + 1;
}
2017-12-21 07:45:38 +08:00
} else {
2018-08-16 17:18:05 +08:00
auto commandBuffer = new CommandBuffer(device);
2017-12-21 07:45:38 +08:00
commandBuffer->batchBuffer = batchBuffer;
2018-09-12 15:47:01 +08:00
commandBuffer->surfaces.swap(this->getResidencyAllocations());
2017-12-21 07:45:38 +08:00
commandBuffer->batchBufferEndLocation = bbEndLocation;
commandBuffer->taskCount = this->taskCount + 1;
commandBuffer->flushStamp->replaceStampObject(dispatchFlags.flushStampReference);
2018-02-15 15:29:57 +08:00
commandBuffer->pipeControlThatMayBeErasedLocation = currentPipeControlForNooping;
commandBuffer->epiloguePipeControlLocation = epiloguePipeControlLocation;
2022-07-01 16:15:04 +08:00
commandBuffer->epiloguePipeControlArgs = args;
2017-12-21 07:45:38 +08:00
this->submissionAggregator->recordCommandBuffer(commandBuffer);
}
} else {
2022-05-27 11:58:07 +08:00
this->makeSurfacePackNonResident(this->getResidencyAllocations(), true);
2017-12-21 07:45:38 +08:00
}
2022-10-11 01:39:04 +08:00
if (this->dispatchMode == DispatchMode::BatchedDispatch) {
// check if we are not over the budget, if we are do implicit flush
if (getMemoryManager()->isMemoryBudgetExhausted()) {
if (this->totalMemoryUsed >= device.getDeviceInfo().globalMemSize / 4) {
implicitFlush = true;
}
2020-09-17 14:58:06 +08:00
}
2022-10-11 01:39:04 +08:00
if (DebugManager.flags.PerformImplicitFlushEveryEnqueueCount.get() != -1) {
if ((taskCount + 1) % DebugManager.flags.PerformImplicitFlushEveryEnqueueCount.get() == 0) {
implicitFlush = true;
}
2017-12-21 07:45:38 +08:00
}
2022-10-11 01:39:04 +08:00
if (this->newResources) {
implicitFlush = true;
this->newResources = false;
}
implicitFlush |= checkImplicitFlushForGpuIdle();
2020-09-22 22:29:34 +08:00
2022-10-11 01:39:04 +08:00
if (implicitFlush) {
this->flushBatchedSubmissions();
}
2017-12-21 07:45:38 +08:00
}
++taskCount;
2020-07-18 00:33:16 +08:00
DBG_LOG(LogTaskCounts, __FUNCTION__, "Line: ", __LINE__, "taskCount", peekTaskCount());
2017-12-21 07:45:38 +08:00
DBG_LOG(LogTaskCounts, __FUNCTION__, "Line: ", __LINE__, "Current taskCount:", tagAddress ? *tagAddress : 0);
CompletionStamp completionStamp = {
taskCount,
this->taskLevel,
2018-11-26 21:04:52 +08:00
flushStamp->peekStamp()};
2017-12-21 07:45:38 +08:00
2022-10-11 01:39:04 +08:00
if (levelClosed) {
this->taskLevel++;
}
2018-02-08 23:00:20 +08:00
2017-12-21 07:45:38 +08:00
return completionStamp;
}
2020-07-06 15:34:15 +08:00
template <typename GfxFamily>
void CommandStreamReceiverHw<GfxFamily>::forcePipeControl(NEO::LinearStream &commandStreamCSR) {
PipeControlArgs args;
2022-07-21 22:28:10 +08:00
args.csStallOnly = true;
MemorySynchronizationCommands<GfxFamily>::addSingleBarrier(commandStreamCSR, args);
args.csStallOnly = false;
MemorySynchronizationCommands<GfxFamily>::addSingleBarrier(commandStreamCSR, args);
2020-07-06 15:34:15 +08:00
}
2022-03-08 22:18:31 +08:00
template <typename GfxFamily>
void CommandStreamReceiverHw<GfxFamily>::programComputeMode(LinearStream &stream, DispatchFlags &dispatchFlags, const HardwareInfo &hwInfo) {
if (this->streamProperties.stateComputeMode.isDirty()) {
EncodeComputeMode<GfxFamily>::programComputeModeCommandWithSynchronization(
stream, this->streamProperties.stateComputeMode, dispatchFlags.pipelineSelectArgs,
2023-01-06 00:57:56 +08:00
hasSharedHandles(), this->peekRootDeviceEnvironment(), isRcs(), this->dcFlushSupport, logicalStateHelper.get());
2023-03-10 21:20:23 +08:00
this->setStateComputeModeDirty(false);
2023-03-11 05:51:52 +08:00
this->streamProperties.stateComputeMode.clearIsDirty();
2022-03-08 22:18:31 +08:00
}
}
2019-10-03 20:38:49 +08:00
template <typename GfxFamily>
2021-11-06 09:42:54 +08:00
inline void CommandStreamReceiverHw<GfxFamily>::programStallingCommandsForBarrier(LinearStream &cmdStream, DispatchFlags &dispatchFlags) {
2019-10-03 20:38:49 +08:00
auto barrierTimestampPacketNodes = dispatchFlags.barrierTimestampPacketNodes;
if (barrierTimestampPacketNodes && barrierTimestampPacketNodes->peekNodes().size() != 0) {
2021-12-10 03:31:27 +08:00
programStallingPostSyncCommandsForBarrier(cmdStream, *barrierTimestampPacketNodes->peekNodes()[0]);
barrierTimestampPacketNodes->makeResident(*this);
2019-10-03 20:38:49 +08:00
} else {
2021-11-06 09:42:54 +08:00
programStallingNoPostSyncCommandsForBarrier(cmdStream);
2019-10-03 20:38:49 +08:00
}
}
2017-12-21 07:45:38 +08:00
template <typename GfxFamily>
2019-11-24 21:50:41 +08:00
inline bool CommandStreamReceiverHw<GfxFamily>::flushBatchedSubmissions() {
2018-04-04 17:34:46 +08:00
if (this->dispatchMode == DispatchMode::ImmediateDispatch) {
2019-11-24 21:50:41 +08:00
return true;
2017-12-21 07:45:38 +08:00
}
typedef typename GfxFamily::MI_BATCH_BUFFER_START MI_BATCH_BUFFER_START;
2018-02-13 17:01:20 +08:00
typedef typename GfxFamily::PIPE_CONTROL PIPE_CONTROL;
2018-08-06 20:55:04 +08:00
std::unique_lock<MutexType> lockGuard(ownershipMutex);
2019-11-24 21:50:41 +08:00
bool submitResult = true;
2017-12-21 07:45:38 +08:00
auto &commandBufferList = this->submissionAggregator->peekCmdBufferList();
if (!commandBufferList.peekIsEmpty()) {
2018-12-04 22:11:29 +08:00
const auto totalMemoryBudget = static_cast<size_t>(commandBufferList.peekHead()->device.getDeviceInfo().globalMemSize / 2);
2018-08-16 17:18:05 +08:00
2017-12-21 07:45:38 +08:00
ResidencyContainer surfacesForSubmit;
ResourcePackage resourcePackage;
2018-02-15 15:29:57 +08:00
void *currentPipeControlForNooping = nullptr;
void *epiloguePipeControlLocation = nullptr;
2017-12-21 07:45:38 +08:00
while (!commandBufferList.peekIsEmpty()) {
size_t totalUsedSize = 0u;
2018-12-04 22:11:29 +08:00
this->submissionAggregator->aggregateCommandBuffers(resourcePackage, totalUsedSize, totalMemoryBudget, osContext->getContextId());
2017-12-21 07:45:38 +08:00
auto primaryCmdBuffer = commandBufferList.removeFrontOne();
auto nextCommandBuffer = commandBufferList.peekHead();
auto currentBBendLocation = primaryCmdBuffer->batchBufferEndLocation;
auto lastTaskCount = primaryCmdBuffer->taskCount;
2022-07-01 16:15:04 +08:00
auto lastPipeControlArgs = primaryCmdBuffer->epiloguePipeControlArgs;
2017-12-21 07:45:38 +08:00
2023-01-26 11:58:18 +08:00
auto pipeControlLocationSize = MemorySynchronizationCommands<GfxFamily>::getSizeForBarrierWithPostSyncOperation(peekRootDeviceEnvironment(), lastPipeControlArgs.tlbInvalidation);
2022-08-19 23:56:22 +08:00
2017-12-21 07:45:38 +08:00
FlushStampUpdateHelper flushStampUpdateHelper;
flushStampUpdateHelper.insert(primaryCmdBuffer->flushStamp->getStampReference());
2018-02-15 15:29:57 +08:00
currentPipeControlForNooping = primaryCmdBuffer->pipeControlThatMayBeErasedLocation;
epiloguePipeControlLocation = primaryCmdBuffer->epiloguePipeControlLocation;
2017-12-21 07:45:38 +08:00
2018-04-04 17:34:46 +08:00
if (DebugManager.flags.FlattenBatchBufferForAUBDump.get()) {
2022-05-10 00:21:21 +08:00
flatBatchBufferHelper->registerCommandChunk(primaryCmdBuffer->batchBuffer, sizeof(MI_BATCH_BUFFER_START));
2018-04-04 17:34:46 +08:00
}
2021-07-13 19:56:41 +08:00
2017-12-21 07:45:38 +08:00
while (nextCommandBuffer && nextCommandBuffer->inspectionId == primaryCmdBuffer->inspectionId) {
2021-07-13 19:56:41 +08:00
2022-06-29 02:52:33 +08:00
// noop pipe control
2018-02-15 15:29:57 +08:00
if (currentPipeControlForNooping) {
2018-04-04 17:34:46 +08:00
if (DebugManager.flags.AddPatchInfoCommentsForAUBDump.get()) {
2023-01-28 02:07:04 +08:00
flatBatchBufferHelper->removePipeControlData(pipeControlLocationSize, currentPipeControlForNooping, peekRootDeviceEnvironment());
2018-04-04 17:34:46 +08:00
}
2018-02-15 15:29:57 +08:00
memset(currentPipeControlForNooping, 0, pipeControlLocationSize);
2017-12-21 07:45:38 +08:00
}
2022-06-29 02:52:33 +08:00
// obtain next candidate for nooping
2018-02-15 15:29:57 +08:00
currentPipeControlForNooping = nextCommandBuffer->pipeControlThatMayBeErasedLocation;
2022-06-29 02:52:33 +08:00
// track epilogue pipe control
2018-02-15 15:29:57 +08:00
epiloguePipeControlLocation = nextCommandBuffer->epiloguePipeControlLocation;
2017-12-21 07:45:38 +08:00
flushStampUpdateHelper.insert(nextCommandBuffer->flushStamp->getStampReference());
2018-08-24 21:23:45 +08:00
auto nextCommandBufferAddress = nextCommandBuffer->batchBuffer.commandBufferAllocation->getGpuAddress();
2017-12-21 07:45:38 +08:00
auto offsetedCommandBuffer = (uint64_t)ptrOffset(nextCommandBufferAddress, nextCommandBuffer->batchBuffer.startOffset);
2021-07-13 19:56:41 +08:00
auto cpuAddressForCommandBufferDestination = ptrOffset(nextCommandBuffer->batchBuffer.commandBufferAllocation->getUnderlyingBuffer(), nextCommandBuffer->batchBuffer.startOffset);
auto cpuAddressForCurrentCommandBufferEndingSection = alignUp(ptrOffset(currentBBendLocation, sizeof(MI_BATCH_BUFFER_START)), MemoryConstants::cacheLineSize);
2022-06-29 02:52:33 +08:00
// if we point to exact same command buffer, then batch buffer start is not needed at all
2021-07-13 19:56:41 +08:00
if (cpuAddressForCurrentCommandBufferEndingSection == cpuAddressForCommandBufferDestination) {
memset(currentBBendLocation, 0u, ptrDiff(cpuAddressForCurrentCommandBufferEndingSection, currentBBendLocation));
} else {
addBatchBufferStart((MI_BATCH_BUFFER_START *)currentBBendLocation, offsetedCommandBuffer, false);
}
2018-04-04 17:34:46 +08:00
if (DebugManager.flags.FlattenBatchBufferForAUBDump.get()) {
flatBatchBufferHelper->registerCommandChunk(nextCommandBuffer->batchBuffer, sizeof(MI_BATCH_BUFFER_START));
}
2017-12-21 07:45:38 +08:00
currentBBendLocation = nextCommandBuffer->batchBufferEndLocation;
lastTaskCount = nextCommandBuffer->taskCount;
2022-07-01 16:15:04 +08:00
lastPipeControlArgs = nextCommandBuffer->epiloguePipeControlArgs;
2017-12-21 07:45:38 +08:00
nextCommandBuffer = nextCommandBuffer->next;
2021-07-13 19:56:41 +08:00
2017-12-21 07:45:38 +08:00
commandBufferList.removeFrontOne();
}
surfacesForSubmit.reserve(resourcePackage.size() + 1);
for (auto &surface : resourcePackage) {
surfacesForSubmit.push_back(surface);
}
2022-06-29 02:52:33 +08:00
// make sure we flush DC if needed
2022-10-11 18:37:19 +08:00
if (getDcFlushRequired(epiloguePipeControlLocation)) {
2022-07-01 16:15:04 +08:00
lastPipeControlArgs.dcFlushEnable = true;
2022-03-04 22:07:37 +08:00
if (DebugManager.flags.DisableDcFlushInEpilogue.get()) {
2022-07-01 16:15:04 +08:00
lastPipeControlArgs.dcFlushEnable = false;
2022-03-04 22:07:37 +08:00
}
2022-07-21 22:28:10 +08:00
MemorySynchronizationCommands<GfxFamily>::setBarrierWithPostSyncOperation(
2022-07-01 16:15:04 +08:00
epiloguePipeControlLocation,
2022-07-21 22:28:10 +08:00
PostSyncMode::ImmediateData,
2022-07-01 16:15:04 +08:00
getTagAllocation()->getGpuAddress(),
lastTaskCount,
2023-01-26 11:58:18 +08:00
peekRootDeviceEnvironment(),
2022-07-01 16:15:04 +08:00
lastPipeControlArgs);
2018-03-09 21:48:42 +08:00
}
2019-11-24 21:50:41 +08:00
2020-09-22 20:19:07 +08:00
primaryCmdBuffer->batchBuffer.endCmdPtr = currentBBendLocation;
2022-01-07 22:53:31 +08:00
if (this->flush(primaryCmdBuffer->batchBuffer, surfacesForSubmit) != SubmissionStatus::SUCCESS) {
2019-11-24 21:50:41 +08:00
submitResult = false;
break;
}
2017-12-21 07:45:38 +08:00
2022-06-29 02:52:33 +08:00
// after flush task level is closed
2017-12-21 07:45:38 +08:00
this->taskLevel++;
2019-11-24 21:50:41 +08:00
flushStampUpdateHelper.updateAll(flushStamp->peekStamp());
2017-12-21 07:45:38 +08:00
2021-10-28 17:21:44 +08:00
if (!isUpdateTagFromWaitEnabled()) {
this->latestFlushedTaskCount = lastTaskCount;
}
2022-05-27 11:58:07 +08:00
this->makeSurfacePackNonResident(surfacesForSubmit, true);
2017-12-21 07:45:38 +08:00
resourcePackage.clear();
}
this->totalMemoryUsed = 0;
}
2019-11-24 21:50:41 +08:00
return submitResult;
2017-12-21 07:45:38 +08:00
}
2022-11-30 22:57:18 +08:00
template <typename GfxFamily>
size_t CommandStreamReceiverHw<GfxFamily>::getRequiredCmdStreamSize(const DispatchBcsFlags &dispatchBcsFlags) {
return getCmdsSizeForHardwareContext() + sizeof(typename GfxFamily::MI_BATCH_BUFFER_START);
}
template <typename GfxFamily>
size_t CommandStreamReceiverHw<GfxFamily>::getRequiredCmdStreamSizeAligned(const DispatchBcsFlags &dispatchBcsFlags) {
return alignUp(getRequiredCmdStreamSize(dispatchBcsFlags), MemoryConstants::cacheLineSize);
}
2018-01-24 17:11:37 +08:00
template <typename GfxFamily>
2018-08-07 15:07:50 +08:00
size_t CommandStreamReceiverHw<GfxFamily>::getRequiredCmdStreamSizeAligned(const DispatchFlags &dispatchFlags, Device &device) {
size_t size = getRequiredCmdStreamSize(dispatchFlags, device);
2018-01-24 17:11:37 +08:00
return alignUp(size, MemoryConstants::cacheLineSize);
}
2017-12-21 07:45:38 +08:00
template <typename GfxFamily>
2018-08-07 15:07:50 +08:00
size_t CommandStreamReceiverHw<GfxFamily>::getRequiredCmdStreamSize(const DispatchFlags &dispatchFlags, Device &device) {
2018-08-16 17:18:05 +08:00
size_t size = getRequiredCmdSizeForPreamble(device);
2021-07-08 23:08:37 +08:00
size += getRequiredStateBaseAddressSize(device);
2022-04-22 01:57:54 +08:00
if (device.getDebugger()) {
size += device.getDebugger()->getSbaTrackingCommandsSize(NEO::Debugger::SbaAddresses::trackedAddressCount);
}
2022-04-20 22:12:20 +08:00
if (!this->isStateSipSent || device.getDebugger()) {
2021-10-17 20:21:29 +08:00
size += PreemptionHelper::getRequiredStateSipCmdSize<GfxFamily>(device, isRcs());
2018-11-05 18:52:19 +08:00
}
2022-08-19 23:56:22 +08:00
size += MemorySynchronizationCommands<GfxFamily>::getSizeForSingleBarrier(false);
2018-04-20 19:55:54 +08:00
size += sizeof(typename GfxFamily::MI_BATCH_BUFFER_START);
2018-04-13 17:05:09 +08:00
size += getCmdSizeForL3Config();
2022-03-15 00:40:32 +08:00
if (this->streamProperties.stateComputeMode.isDirty()) {
2022-03-10 01:15:48 +08:00
size += getCmdSizeForComputeMode();
}
2019-09-10 22:13:11 +08:00
size += getCmdSizeForMediaSampler(dispatchFlags.pipelineSelectArgs.mediaSamplerRequired);
2018-04-13 17:05:09 +08:00
size += getCmdSizeForPipelineSelect();
size += getCmdSizeForPreemption(dispatchFlags);
2023-03-31 04:03:12 +08:00
if ((dispatchFlags.usePerDssBackedBuffer && !isPerDssBackedBufferSent) || isRayTracingStateProgramingNeeded(device)) {
2021-09-23 05:39:42 +08:00
size += getCmdSizeForPerDssBackedBuffer(device.getHardwareInfo());
}
2019-08-08 01:33:40 +08:00
size += getCmdSizeForEpilogue(dispatchFlags);
2020-05-27 21:30:31 +08:00
size += getCmdsSizeForHardwareContext();
2021-11-18 03:51:43 +08:00
if (csrSizeRequestFlags.activePartitionsChanged) {
size += getCmdSizeForActivePartitionConfig();
}
2017-12-21 07:45:38 +08:00
2021-11-25 17:31:14 +08:00
if (executionEnvironment.rootDeviceEnvironments[rootDeviceIndex]->getHardwareInfo()->workaroundTable.flags.waSamplerCacheFlushBetweenRedescribedSurfaceReads) {
2018-01-10 21:05:34 +08:00
if (this->samplerCacheFlushRequired != SamplerCacheFlushState::samplerCacheFlushNotRequired) {
size += sizeof(typename GfxFamily::PIPE_CONTROL);
}
}
2018-07-05 17:23:28 +08:00
if (experimentalCmdBuffer.get() != nullptr) {
size += experimentalCmdBuffer->getRequiredInjectionSize<GfxFamily>();
}
2019-01-25 17:20:32 +08:00
2023-03-10 02:29:45 +08:00
size += TimestampPacketHelper::getRequiredCmdStreamSize<GfxFamily>(dispatchFlags.csrDependencies, false);
2023-01-20 00:11:39 +08:00
size += TimestampPacketHelper::getRequiredCmdStreamSizeForMultiRootDeviceSyncNodesContainer<GfxFamily>(dispatchFlags.csrDependencies);
2019-01-25 17:20:32 +08:00
2022-08-03 19:54:08 +08:00
size += EncodeKernelArgsBuffer<GfxFamily>::getKernelArgsBufferCmdsSize(kernelArgsBufferAllocation, logicalStateHelper.get());
2023-02-28 20:20:30 +08:00
if (dispatchFlags.isStallingCommandsOnNextFlushRequired) {
2021-11-06 09:42:54 +08:00
size += getCmdSizeForStallingCommands(dispatchFlags);
2018-10-15 16:35:45 +08:00
}
2019-10-03 20:38:49 +08:00
2019-08-28 01:14:24 +08:00
if (requiresInstructionCacheFlush) {
2022-08-19 23:56:22 +08:00
size += MemorySynchronizationCommands<GfxFamily>::getSizeForSingleBarrier(false);
2019-08-28 01:14:24 +08:00
}
2020-07-06 15:34:15 +08:00
if (DebugManager.flags.ForcePipeControlPriorToWalker.get()) {
2022-08-19 23:56:22 +08:00
size += 2 * MemorySynchronizationCommands<GfxFamily>::getSizeForSingleBarrier(false);
2020-07-06 15:34:15 +08:00
}
2018-01-24 17:11:37 +08:00
return size;
2017-12-21 07:45:38 +08:00
}
2019-08-21 20:08:51 +08:00
template <typename GfxFamily>
inline size_t CommandStreamReceiverHw<GfxFamily>::getCmdSizeForPipelineSelect() const {
2019-09-10 22:13:11 +08:00
size_t size = 0;
2020-03-19 22:15:51 +08:00
if ((csrSizeRequestFlags.mediaSamplerConfigChanged ||
2022-08-31 21:26:29 +08:00
csrSizeRequestFlags.systolicPipelineSelectMode ||
2020-03-19 22:15:51 +08:00
!isPreambleSent) &&
!isPipelineSelectAlreadyProgrammed()) {
2023-01-26 11:58:18 +08:00
size += PreambleHelper<GfxFamily>::getCmdSizeForPipelineSelect(peekRootDeviceEnvironment());
2019-08-21 20:08:51 +08:00
}
return size;
}
2017-12-21 07:45:38 +08:00
template <typename GfxFamily>
2022-11-22 21:53:59 +08:00
inline WaitStatus CommandStreamReceiverHw<GfxFamily>::waitForTaskCountWithKmdNotifyFallback(TaskCountType taskCountToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep, QueueThrottle throttle) {
2022-03-23 22:36:07 +08:00
const auto params = kmdNotifyHelper->obtainTimeoutParams(useQuickKmdSleep, *getTagAddress(), taskCountToWait, flushStampToWait, throttle, this->isKmdWaitModeActive(),
this->isAnyDirectSubmissionEnabled());
2018-03-21 17:00:49 +08:00
2022-03-23 22:36:07 +08:00
auto status = waitForCompletionWithTimeout(params, taskCountToWait);
2022-01-21 00:56:19 +08:00
if (status == WaitStatus::NotReady) {
2021-09-17 21:05:26 +08:00
waitForFlushStamp(flushStampToWait);
2022-06-29 02:52:33 +08:00
// now call blocking wait, this is to ensure that task count is reached
2022-03-23 22:36:07 +08:00
status = waitForCompletionWithTimeout(WaitParams{false, false, 0}, taskCountToWait);
2017-12-21 07:45:38 +08:00
}
2022-01-21 00:56:19 +08:00
// If GPU hang occured, then propagate it to the caller.
if (status == WaitStatus::GpuHang) {
return status;
}
2022-04-22 01:41:33 +08:00
for (uint32_t i = 0; i < this->activePartitions; i++) {
2023-06-06 23:11:09 +08:00
UNRECOVERABLE_IF(*(ptrOffset(getTagAddress(), (i * this->immWritePostSyncWriteOffset))) < taskCountToWait);
2022-04-22 01:41:33 +08:00
}
2018-03-22 16:41:17 +08:00
2018-04-10 16:26:59 +08:00
if (kmdNotifyHelper->quickKmdSleepForSporadicWaitsEnabled()) {
kmdNotifyHelper->updateLastWaitForCompletionTimestamp();
2018-03-22 16:41:17 +08:00
}
2022-01-21 00:56:19 +08:00
return WaitStatus::Ready;
2017-12-21 07:45:38 +08:00
}
template <typename GfxFamily>
2019-07-31 14:57:00 +08:00
inline void CommandStreamReceiverHw<GfxFamily>::programPreemption(LinearStream &csr, DispatchFlags &dispatchFlags) {
PreemptionHelper::programCmdStream<GfxFamily>(csr, dispatchFlags.preemptionMode, this->lastPreemptionMode, preemptionAllocation);
2018-01-08 22:58:02 +08:00
this->lastPreemptionMode = dispatchFlags.preemptionMode;
2017-12-21 07:45:38 +08:00
}
2018-04-13 17:05:09 +08:00
template <typename GfxFamily>
inline size_t CommandStreamReceiverHw<GfxFamily>::getCmdSizeForPreemption(const DispatchFlags &dispatchFlags) const {
return PreemptionHelper::getRequiredCmdStreamSize<GfxFamily>(dispatchFlags.preemptionMode, this->lastPreemptionMode);
}
2018-11-05 18:52:19 +08:00
template <typename GfxFamily>
inline void CommandStreamReceiverHw<GfxFamily>::programStateSip(LinearStream &cmdStream, Device &device) {
2023-05-12 04:07:03 +08:00
if (!this->isStateSipSent) {
2023-03-27 20:03:02 +08:00
PreemptionHelper::programStateSip<GfxFamily>(cmdStream, device, logicalStateHelper.get(), this->osContext);
2018-11-05 18:52:19 +08:00
this->isStateSipSent = true;
}
}
2017-12-21 07:45:38 +08:00
template <typename GfxFamily>
2021-11-04 20:54:18 +08:00
inline void CommandStreamReceiverHw<GfxFamily>::programPreamble(LinearStream &csr, Device &device, uint32_t &newL3Config) {
2017-12-21 07:45:38 +08:00
if (!this->isPreambleSent) {
2022-06-22 20:10:21 +08:00
PreambleHelper<GfxFamily>::programPreamble(&csr, device, newL3Config, this->preemptionAllocation, logicalStateHelper.get());
2017-12-21 07:45:38 +08:00
this->isPreambleSent = true;
this->lastSentL3Config = newL3Config;
}
}
template <typename GfxFamily>
2019-05-29 21:37:54 +08:00
inline void CommandStreamReceiverHw<GfxFamily>::programVFEState(LinearStream &csr, DispatchFlags &dispatchFlags, uint32_t maxFrontEndThreads) {
2017-12-21 07:45:38 +08:00
if (mediaVfeStateDirty) {
2020-09-02 17:38:54 +08:00
if (dispatchFlags.additionalKernelExecInfo != AdditionalKernelExecInfo::NotApplicable) {
lastAdditionalKernelExecInfo = dispatchFlags.additionalKernelExecInfo;
}
2020-11-17 18:42:29 +08:00
if (dispatchFlags.kernelExecutionType != KernelExecutionType::NotApplicable) {
lastKernelExecutionType = dispatchFlags.kernelExecutionType;
}
2021-03-31 22:11:31 +08:00
auto &hwInfo = peekHwInfo();
2022-09-21 00:46:15 +08:00
auto isCooperative = dispatchFlags.kernelExecutionType == KernelExecutionType::Concurrent;
auto disableOverdispatch = (dispatchFlags.additionalKernelExecInfo != AdditionalKernelExecInfo::NotSet);
2023-03-11 05:51:52 +08:00
this->streamProperties.frontEndState.setPropertiesAll(isCooperative, dispatchFlags.disableEUFusion, disableOverdispatch, osContext->isEngineInstanced());
2022-09-21 00:46:15 +08:00
2022-12-09 23:11:27 +08:00
auto &gfxCoreHelper = getGfxCoreHelper();
2022-12-08 20:22:35 +08:00
auto engineGroupType = gfxCoreHelper.getEngineGroupType(getOsContext().getEngineType(), getOsContext().getEngineUsage(), hwInfo);
2021-04-02 02:26:29 +08:00
auto pVfeState = PreambleHelper<GfxFamily>::getSpaceForVfeState(&csr, hwInfo, engineGroupType);
PreambleHelper<GfxFamily>::programVfeState(
2022-12-20 23:56:50 +08:00
pVfeState, peekRootDeviceEnvironment(), requiredScratchSize, getScratchPatchAddress(),
2022-06-25 00:50:31 +08:00
maxFrontEndThreads, streamProperties, logicalStateHelper.get());
2021-03-31 22:11:31 +08:00
auto commandOffset = PreambleHelper<GfxFamily>::getScratchSpaceAddressOffsetForVfeState(&csr, pVfeState);
2019-08-13 17:34:56 +08:00
if (DebugManager.flags.AddPatchInfoCommentsForAUBDump.get()) {
flatBatchBufferHelper->collectScratchSpacePatchInfo(getScratchPatchAddress(), commandOffset, csr);
}
2018-12-13 18:06:28 +08:00
setMediaVFEStateDirty(false);
2023-03-11 05:51:52 +08:00
this->streamProperties.frontEndState.clearIsDirty();
2017-12-21 07:45:38 +08:00
}
}
2018-01-19 20:29:25 +08:00
template <typename GfxFamily>
void CommandStreamReceiverHw<GfxFamily>::programMediaSampler(LinearStream &commandStream, DispatchFlags &dispatchFlags) {
}
template <typename GfxFamily>
size_t CommandStreamReceiverHw<GfxFamily>::getCmdSizeForMediaSampler(bool mediaSamplerRequired) const {
return 0;
}
2018-03-23 02:02:58 +08:00
2018-03-14 18:07:51 +08:00
template <typename GfxFamily>
void CommandStreamReceiverHw<GfxFamily>::collectStateBaseAddresPatchInfo(
uint64_t baseAddress,
uint64_t commandOffset,
2022-03-28 20:55:12 +08:00
const LinearStream *dsh,
const LinearStream *ioh,
const LinearStream *ssh,
2022-09-30 21:20:48 +08:00
uint64_t generalStateBase,
bool imagesSupported) {
2018-03-14 18:07:51 +08:00
typedef typename GfxFamily::STATE_BASE_ADDRESS STATE_BASE_ADDRESS;
2022-09-30 21:20:48 +08:00
if (imagesSupported) {
2022-03-28 20:55:12 +08:00
PatchInfoData dynamicStatePatchInfo = {dsh->getGraphicsAllocation()->getGpuAddress(), 0u, PatchInfoAllocationType::DynamicStateHeap, baseAddress, commandOffset + STATE_BASE_ADDRESS::PATCH_CONSTANTS::DYNAMICSTATEBASEADDRESS_BYTEOFFSET, PatchInfoAllocationType::Default};
flatBatchBufferHelper->setPatchInfoData(dynamicStatePatchInfo);
}
2018-03-14 18:07:51 +08:00
PatchInfoData generalStatePatchInfo = {generalStateBase, 0u, PatchInfoAllocationType::GeneralStateHeap, baseAddress, commandOffset + STATE_BASE_ADDRESS::PATCH_CONSTANTS::GENERALSTATEBASEADDRESS_BYTEOFFSET, PatchInfoAllocationType::Default};
2022-03-28 20:55:12 +08:00
PatchInfoData surfaceStatePatchInfo = {ssh->getGraphicsAllocation()->getGpuAddress(), 0u, PatchInfoAllocationType::SurfaceStateHeap, baseAddress, commandOffset + STATE_BASE_ADDRESS::PATCH_CONSTANTS::SURFACESTATEBASEADDRESS_BYTEOFFSET, PatchInfoAllocationType::Default};
2018-04-04 17:34:46 +08:00
flatBatchBufferHelper->setPatchInfoData(generalStatePatchInfo);
flatBatchBufferHelper->setPatchInfoData(surfaceStatePatchInfo);
2022-03-28 20:55:12 +08:00
collectStateBaseAddresIohPatchInfo(baseAddress, commandOffset, *ioh);
2018-03-14 18:07:51 +08:00
}
2018-04-10 16:26:59 +08:00
template <typename GfxFamily>
void CommandStreamReceiverHw<GfxFamily>::resetKmdNotifyHelper(KmdNotifyHelper *newHelper) {
kmdNotifyHelper.reset(newHelper);
kmdNotifyHelper->updateAcLineStatus();
if (kmdNotifyHelper->quickKmdSleepForSporadicWaitsEnabled()) {
kmdNotifyHelper->updateLastWaitForCompletionTimestamp();
}
}
2018-05-21 16:57:28 +08:00
template <typename GfxFamily>
2022-05-13 08:29:53 +08:00
void CommandStreamReceiverHw<GfxFamily>::setClearSlmWorkAroundParameter(PipeControlArgs &args) {
2018-05-21 16:57:28 +08:00
}
2018-09-07 20:31:37 +08:00
2018-11-22 22:16:20 +08:00
template <typename GfxFamily>
uint64_t CommandStreamReceiverHw<GfxFamily>::getScratchPatchAddress() {
return scratchSpaceController->getScratchPatchAddress();
2018-09-12 22:32:42 +08:00
}
2019-01-28 20:44:59 +08:00
template <typename GfxFamily>
bool CommandStreamReceiverHw<GfxFamily>::detectInitProgrammingFlagsRequired(const DispatchFlags &dispatchFlags) const {
return DebugManager.flags.ForceCsrReprogramming.get();
}
2019-03-21 00:08:05 +08:00
2022-03-25 21:00:53 +08:00
template <typename GfxFamily>
inline void CommandStreamReceiverHw<GfxFamily>::unregisterDirectSubmissionFromController() {
auto directSubmissionController = executionEnvironment.directSubmissionController.get();
if (directSubmissionController) {
directSubmissionController->unregisterDirectSubmission(this);
}
}
2023-03-14 01:56:20 +08:00
template <typename GfxFamily>
bool CommandStreamReceiverHw<GfxFamily>::bcsRelaxedOrderingAllowed(const BlitPropertiesContainer &blitPropertiesContainer, bool hasStallingCmds) const {
return directSubmissionRelaxedOrderingEnabled() && (DebugManager.flags.DirectSubmissionRelaxedOrderingForBcs.get() == 1) &&
(blitPropertiesContainer.size() == 1) && !hasStallingCmds;
}
2019-04-03 21:59:31 +08:00
template <typename GfxFamily>
2022-11-22 21:53:59 +08:00
TaskCountType CommandStreamReceiverHw<GfxFamily>::flushBcsTask(const BlitPropertiesContainer &blitPropertiesContainer, bool blocking, bool profilingEnabled, Device &device) {
2019-04-03 21:59:31 +08:00
using MI_BATCH_BUFFER_END = typename GfxFamily::MI_BATCH_BUFFER_END;
2019-04-15 14:54:38 +08:00
auto lock = obtainUniqueOwnership();
2020-10-19 21:36:57 +08:00
bool blitterDirectSubmission = this->isBlitterDirectSubmissionEnabled();
2022-02-08 00:52:08 +08:00
auto debugPauseEnabled = PauseOnGpuProperties::featureEnabled(DebugManager.flags.PauseOnBlitCopy.get());
2023-03-01 05:08:09 +08:00
auto &rootDeviceEnvironment = this->executionEnvironment.rootDeviceEnvironments[this->rootDeviceIndex];
2023-03-14 01:56:20 +08:00
const bool updateTag = !isUpdateTagFromWaitEnabled() || blocking;
const bool hasStallingCmds = updateTag || !this->isEnginePrologueSent;
const bool relaxedOrderingAllowed = bcsRelaxedOrderingAllowed(blitPropertiesContainer, hasStallingCmds);
auto estimatedCsSize = BlitCommandsHelper<GfxFamily>::estimateBlitCommandsSize(blitPropertiesContainer, profilingEnabled, debugPauseEnabled, blitterDirectSubmission,
2023-03-11 03:21:59 +08:00
relaxedOrderingAllowed, *rootDeviceEnvironment.get());
2023-03-14 01:56:20 +08:00
auto &commandStream = getCS(estimatedCsSize);
2023-03-11 03:21:59 +08:00
2019-04-15 14:54:38 +08:00
auto commandStreamStart = commandStream.getUsed();
auto newTaskCount = taskCount + 1;
2019-04-16 19:02:34 +08:00
latestSentTaskCount = newTaskCount;
2019-04-03 21:59:31 +08:00
2022-11-03 23:25:30 +08:00
this->initializeResources();
2022-04-19 22:44:06 +08:00
this->initDirectSubmission();
2021-04-16 00:14:04 +08:00
2020-09-24 16:52:53 +08:00
if (PauseOnGpuProperties::pauseModeAllowed(DebugManager.flags.PauseOnBlitCopy.get(), taskCount, PauseOnGpuProperties::PauseMode::BeforeWorkload)) {
2021-12-22 02:13:53 +08:00
BlitCommandsHelper<GfxFamily>::dispatchDebugPauseCommands(commandStream, getDebugPauseStateGPUAddress(),
DebugPauseState::waitingForUserStartConfirmation,
2023-03-06 20:42:09 +08:00
DebugPauseState::hasUserStartConfirmation, *rootDeviceEnvironment.get());
2020-06-17 20:58:28 +08:00
}
2023-03-14 01:56:20 +08:00
bool isRelaxedOrderingDispatch = false;
if (relaxedOrderingAllowed) {
uint32_t dependenciesCount = 0;
for (auto timestampPacketContainer : blitPropertiesContainer[0].csrDependencies.timestampPacketContainer) {
dependenciesCount += static_cast<uint32_t>(timestampPacketContainer->peekNodes().size());
}
isRelaxedOrderingDispatch = RelaxedOrderingHelper::isRelaxedOrderingDispatchAllowed(*this, dependenciesCount);
}
2020-02-07 22:36:15 +08:00
programEnginePrologue(commandStream);
2021-09-29 23:59:41 +08:00
if (pageTableManager.get() && !pageTableManagerInitialized) {
pageTableManagerInitialized = pageTableManager->initPageTableManagerRegisters(this);
}
2022-06-23 17:44:45 +08:00
if (logicalStateHelper) {
2022-07-04 22:16:44 +08:00
logicalStateHelper->writeStreamInline(commandStream, false);
2022-06-23 17:44:45 +08:00
}
2023-03-14 01:56:20 +08:00
if (isRelaxedOrderingDispatch) {
RelaxedOrderingHelper::encodeRegistersBeforeDependencyCheckers<GfxFamily>(commandStream);
}
2023-03-11 03:21:59 +08:00
NEO::EncodeDummyBlitWaArgs waArgs{false, rootDeviceEnvironment.get()};
MiFlushArgs args{waArgs};
2019-11-07 16:15:53 +08:00
for (auto &blitProperties : blitPropertiesContainer) {
2023-03-14 01:56:20 +08:00
TimestampPacketHelper::programCsrDependenciesForTimestampPacketContainer<GfxFamily>(commandStream, blitProperties.csrDependencies, isRelaxedOrderingDispatch);
2023-01-20 00:11:39 +08:00
TimestampPacketHelper::programCsrDependenciesForForMultiRootDeviceSyncContainer<GfxFamily>(commandStream, blitProperties.csrDependencies);
2019-05-20 18:00:02 +08:00
2022-07-01 19:10:43 +08:00
BlitCommandsHelper<GfxFamily>::encodeWa(commandStream, blitProperties, latestSentBcsWaValue);
2020-04-29 20:06:01 +08:00
if (blitProperties.outputTimestampPacket && profilingEnabled) {
2021-04-01 19:54:16 +08:00
BlitCommandsHelper<GfxFamily>::encodeProfilingStartMmios(commandStream, *blitProperties.outputTimestampPacket);
2020-04-29 20:06:01 +08:00
}
2023-03-11 03:21:59 +08:00
BlitCommandsHelper<GfxFamily>::dispatchBlitCommands(blitProperties, commandStream, waArgs);
2023-03-01 05:08:09 +08:00
auto dummyAllocation = rootDeviceEnvironment->getDummyAllocation();
if (dummyAllocation) {
makeResident(*dummyAllocation);
}
2023-03-06 20:42:09 +08:00
2019-11-07 16:15:53 +08:00
if (blitProperties.outputTimestampPacket) {
2020-04-29 20:06:01 +08:00
if (profilingEnabled) {
2023-03-06 20:42:09 +08:00
EncodeMiFlushDW<GfxFamily>::programWithWa(commandStream, 0llu, newTaskCount, args);
2021-04-01 19:54:16 +08:00
BlitCommandsHelper<GfxFamily>::encodeProfilingEndMmios(commandStream, *blitProperties.outputTimestampPacket);
2020-04-29 20:06:01 +08:00
} else {
auto timestampPacketGpuAddress = TimestampPacketHelper::getContextEndGpuAddress(*blitProperties.outputTimestampPacket);
2021-06-17 19:55:28 +08:00
args.commandWithPostSync = true;
2023-01-26 03:28:09 +08:00
2023-03-06 20:42:09 +08:00
EncodeMiFlushDW<GfxFamily>::programWithWa(commandStream, timestampPacketGpuAddress, 0, args);
2020-04-29 20:06:01 +08:00
}
2019-11-13 00:56:10 +08:00
makeResident(*blitProperties.outputTimestampPacket->getBaseGraphicsAllocation());
2019-11-07 16:15:53 +08:00
}
2019-04-03 21:59:31 +08:00
2019-11-07 16:15:53 +08:00
blitProperties.csrDependencies.makeResident(*this);
2022-05-05 20:01:59 +08:00
blitProperties.srcAllocation->prepareHostPtrForResidency(this);
blitProperties.dstAllocation->prepareHostPtrForResidency(this);
2019-11-07 16:15:53 +08:00
makeResident(*blitProperties.srcAllocation);
makeResident(*blitProperties.dstAllocation);
2020-11-19 07:58:42 +08:00
if (blitProperties.clearColorAllocation) {
makeResident(*blitProperties.clearColorAllocation);
}
2023-01-20 00:11:39 +08:00
if (blitProperties.multiRootDeviceEventSync != nullptr) {
args.commandWithPostSync = true;
args.notifyEnable = isUsedNotifyEnableForPostSync();
2023-03-06 20:42:09 +08:00
EncodeMiFlushDW<GfxFamily>::programWithWa(commandStream, blitProperties.multiRootDeviceEventSync->getGpuAddress() + blitProperties.multiRootDeviceEventSync->getContextEndOffset(), std::numeric_limits<uint64_t>::max(), args);
2023-01-20 00:11:39 +08:00
}
2019-06-13 17:45:27 +08:00
}
2020-10-16 21:58:47 +08:00
BlitCommandsHelper<GfxFamily>::programGlobalSequencerFlush(commandStream);
2021-10-28 17:21:44 +08:00
if (updateTag) {
2023-01-28 02:07:04 +08:00
MemorySynchronizationCommands<GfxFamily>::addAdditionalSynchronization(commandStream, tagAllocation->getGpuAddress(), false, peekRootDeviceEnvironment());
2021-10-28 17:21:44 +08:00
args.commandWithPostSync = true;
args.notifyEnable = isUsedNotifyEnableForPostSync();
2023-03-06 20:42:09 +08:00
EncodeMiFlushDW<GfxFamily>::programWithWa(commandStream, tagAllocation->getGpuAddress(), newTaskCount, args);
2019-11-07 16:15:53 +08:00
2023-01-28 02:07:04 +08:00
MemorySynchronizationCommands<GfxFamily>::addAdditionalSynchronization(commandStream, tagAllocation->getGpuAddress(), false, peekRootDeviceEnvironment());
2021-10-28 17:21:44 +08:00
}
2020-09-24 16:52:53 +08:00
if (PauseOnGpuProperties::pauseModeAllowed(DebugManager.flags.PauseOnBlitCopy.get(), taskCount, PauseOnGpuProperties::PauseMode::AfterWorkload)) {
2021-12-22 02:13:53 +08:00
BlitCommandsHelper<GfxFamily>::dispatchDebugPauseCommands(commandStream, getDebugPauseStateGPUAddress(),
DebugPauseState::waitingForUserEndConfirmation,
2023-03-06 20:42:09 +08:00
DebugPauseState::hasUserEndConfirmation, *rootDeviceEnvironment.get());
2020-06-17 20:58:28 +08:00
}
2020-10-19 21:36:57 +08:00
void *endingCmdPtr = nullptr;
2023-03-14 01:56:20 +08:00
programEndingCmd(commandStream, &endingCmdPtr, blitterDirectSubmission, isRelaxedOrderingDispatch, false);
2019-04-03 21:59:31 +08:00
2021-11-18 06:36:00 +08:00
EncodeNoop<GfxFamily>::alignToCacheLine(commandStream);
2019-04-03 21:59:31 +08:00
2019-04-11 17:34:35 +08:00
makeResident(*tagAllocation);
2020-02-07 22:36:15 +08:00
if (globalFenceAllocation) {
makeResident(*globalFenceAllocation);
}
2019-04-11 17:34:35 +08:00
2022-11-19 05:02:29 +08:00
uint64_t taskStartAddress = commandStream.getGpuBase() + commandStreamStart;
BatchBuffer batchBuffer{commandStream.getGraphicsAllocation(), commandStreamStart, 0, taskStartAddress, nullptr, false, false, QueueThrottle::MEDIUM, QueueSliceCount::defaultSliceCount,
2023-03-14 01:56:20 +08:00
commandStream.getUsed(), &commandStream, endingCmdPtr, this->getNumClients(), hasStallingCmds, isRelaxedOrderingDispatch};
2019-04-15 14:54:38 +08:00
2023-03-15 23:10:06 +08:00
updateStreamTaskCount(commandStream, newTaskCount);
2021-07-01 00:06:28 +08:00
2022-11-05 01:02:46 +08:00
auto flushSubmissionStatus = flush(batchBuffer, getResidencyAllocations());
if (flushSubmissionStatus != SubmissionStatus::SUCCESS) {
2023-03-15 23:10:06 +08:00
updateStreamTaskCount(commandStream, taskCount);
2022-11-05 01:02:46 +08:00
return CompletionStamp::getTaskCountFromSubmissionStatusError(flushSubmissionStatus);
}
2022-05-27 11:58:07 +08:00
makeSurfacePackNonResident(getResidencyAllocations(), true);
2019-04-15 14:54:38 +08:00
2021-11-03 20:05:33 +08:00
if (updateTag) {
2021-10-28 17:21:44 +08:00
latestFlushedTaskCount = newTaskCount;
}
2019-04-15 14:54:38 +08:00
taskCount = newTaskCount;
2019-04-25 15:59:08 +08:00
auto flushStampToWait = flushStamp->peekStamp();
2019-04-15 14:54:38 +08:00
lock.unlock();
2019-11-07 16:15:53 +08:00
if (blocking) {
2022-05-06 00:52:25 +08:00
const auto waitStatus = waitForTaskCountWithKmdNotifyFallback(newTaskCount, flushStampToWait, false, QueueThrottle::MEDIUM);
2019-06-04 19:37:22 +08:00
internalAllocationStorage->cleanAllocationList(newTaskCount, TEMPORARY_ALLOCATION);
2022-05-06 00:52:25 +08:00
if (waitStatus == WaitStatus::GpuHang) {
2022-11-04 21:57:42 +08:00
return CompletionStamp::gpuHang;
2022-05-06 00:52:25 +08:00
}
2019-06-04 19:37:22 +08:00
}
2019-10-22 17:25:14 +08:00
return newTaskCount;
2019-04-03 21:59:31 +08:00
}
2021-02-23 16:48:08 +08:00
template <typename GfxFamily>
2022-11-09 19:18:06 +08:00
inline SubmissionStatus CommandStreamReceiverHw<GfxFamily>::flushTagUpdate() {
2021-10-28 10:11:32 +08:00
if (this->osContext != nullptr) {
2021-10-28 17:21:44 +08:00
if (EngineHelpers::isBcs(this->osContext->getEngineType())) {
2022-11-09 19:18:06 +08:00
return this->flushMiFlushDW();
2021-10-28 10:11:32 +08:00
} else {
2023-03-28 05:37:18 +08:00
return this->flushPipeControl(false);
2021-10-28 10:11:32 +08:00
}
2021-02-23 16:48:08 +08:00
}
2022-11-09 19:18:06 +08:00
return SubmissionStatus::DEVICE_UNINITIALIZED;
2021-02-23 16:48:08 +08:00
}
template <typename GfxFamily>
2022-11-09 19:18:06 +08:00
inline SubmissionStatus CommandStreamReceiverHw<GfxFamily>::flushMiFlushDW() {
2021-02-23 16:48:08 +08:00
auto lock = obtainUniqueOwnership();
2023-03-11 03:21:59 +08:00
NEO::EncodeDummyBlitWaArgs waArgs{false, const_cast<RootDeviceEnvironment *>(&peekRootDeviceEnvironment())};
MiFlushArgs args{waArgs};
2021-06-17 19:55:28 +08:00
args.commandWithPostSync = true;
args.notifyEnable = isUsedNotifyEnableForPostSync();
2023-03-06 20:42:09 +08:00
2023-03-11 03:21:59 +08:00
auto &commandStream = getCS(EncodeMiFlushDW<GfxFamily>::getCommandSizeWithWa(waArgs));
2023-03-06 20:42:09 +08:00
auto commandStreamStart = commandStream.getUsed();
EncodeMiFlushDW<GfxFamily>::programWithWa(commandStream, tagAllocation->getGpuAddress(), taskCount + 1, args);
2021-02-23 16:48:08 +08:00
makeResident(*tagAllocation);
2021-04-13 04:19:19 +08:00
2022-11-09 19:18:06 +08:00
auto submissionStatus = this->flushSmallTask(commandStream, commandStreamStart);
2021-10-28 17:21:44 +08:00
this->latestFlushedTaskCount = taskCount.load();
2022-11-09 19:18:06 +08:00
return submissionStatus;
2021-04-13 04:19:19 +08:00
}
2021-02-23 16:48:08 +08:00
template <typename GfxFamily>
2023-03-28 05:37:18 +08:00
SubmissionStatus CommandStreamReceiverHw<GfxFamily>::flushPipeControl(bool stateCacheFlush) {
2021-02-23 16:48:08 +08:00
auto lock = obtainUniqueOwnership();
2021-12-21 05:37:45 +08:00
PipeControlArgs args;
2022-10-11 18:37:19 +08:00
args.dcFlushEnable = this->dcFlushSupport;
2021-06-17 19:55:28 +08:00
args.notifyEnable = isUsedNotifyEnableForPostSync();
2021-12-11 05:31:34 +08:00
args.workloadPartitionOffset = isMultiTileOperationEnabled();
2022-08-19 23:56:22 +08:00
2023-03-28 05:37:18 +08:00
if (stateCacheFlush) {
args.textureCacheInvalidationEnable = true;
args.renderTargetCacheFlushEnable = true;
args.stateCacheInvalidationEnable = true;
}
2023-01-27 20:37:09 +08:00
auto dispatchSize = MemorySynchronizationCommands<GfxFamily>::getSizeForBarrierWithPostSyncOperation(peekRootDeviceEnvironment(), args.tlbInvalidation) + this->getCmdSizeForPrologue();
auto &commandStream = getCS(dispatchSize);
2022-08-19 23:56:22 +08:00
auto commandStreamStart = commandStream.getUsed();
2023-01-27 20:37:09 +08:00
this->programEnginePrologue(commandStream);
2022-07-21 22:28:10 +08:00
MemorySynchronizationCommands<GfxFamily>::addBarrierWithPostSyncOperation(commandStream,
PostSyncMode::ImmediateData,
getTagAllocation()->getGpuAddress(),
taskCount + 1,
2023-01-26 11:58:18 +08:00
peekRootDeviceEnvironment(),
2022-07-21 22:28:10 +08:00
args);
2021-02-23 16:48:08 +08:00
makeResident(*tagAllocation);
2023-01-27 20:37:09 +08:00
makeResident(*commandStream.getGraphicsAllocation());
2021-02-23 16:48:08 +08:00
2022-11-09 19:18:06 +08:00
auto submissionStatus = this->flushSmallTask(commandStream, commandStreamStart);
2021-10-28 17:21:44 +08:00
this->latestFlushedTaskCount = taskCount.load();
2022-11-09 19:18:06 +08:00
return submissionStatus;
2021-02-23 16:48:08 +08:00
}
template <typename GfxFamily>
2022-11-09 19:18:06 +08:00
SubmissionStatus CommandStreamReceiverHw<GfxFamily>::flushSmallTask(LinearStream &commandStreamTask, size_t commandStreamStartTask) {
2021-12-27 19:41:16 +08:00
using MI_BATCH_BUFFER_START = typename GfxFamily::MI_BATCH_BUFFER_START;
2021-02-23 16:48:08 +08:00
using MI_BATCH_BUFFER_END = typename GfxFamily::MI_BATCH_BUFFER_END;
void *endingCmdPtr = nullptr;
2022-11-29 00:57:36 +08:00
programEndingCmd(commandStreamTask, &endingCmdPtr, isAnyDirectSubmissionEnabled(), false, false);
2021-02-23 16:48:08 +08:00
2022-09-02 21:10:48 +08:00
auto bytesToPad = EncodeBatchBufferStartOrEnd<GfxFamily>::getBatchBufferStartSize() -
EncodeBatchBufferStartOrEnd<GfxFamily>::getBatchBufferEndSize();
2021-12-27 19:41:16 +08:00
EncodeNoop<GfxFamily>::emitNoop(commandStreamTask, bytesToPad);
2021-11-18 06:36:00 +08:00
EncodeNoop<GfxFamily>::alignToCacheLine(commandStreamTask);
2021-02-23 16:48:08 +08:00
2021-05-13 04:20:22 +08:00
if (globalFenceAllocation) {
makeResident(*globalFenceAllocation);
}
2022-11-19 05:02:29 +08:00
uint64_t taskStartAddress = commandStreamTask.getGpuBase() + commandStreamStartTask;
BatchBuffer batchBuffer{commandStreamTask.getGraphicsAllocation(), commandStreamStartTask, 0, taskStartAddress,
nullptr, false, false, QueueThrottle::MEDIUM, QueueSliceCount::defaultSliceCount,
2023-02-02 01:06:21 +08:00
commandStreamTask.getUsed(), &commandStreamTask, endingCmdPtr, this->getNumClients(), true, false};
2021-02-23 16:48:08 +08:00
2021-10-28 17:21:44 +08:00
this->latestSentTaskCount = taskCount + 1;
2022-11-09 19:18:06 +08:00
auto submissionStatus = flushHandler(batchBuffer, getResidencyAllocations());
2023-03-15 23:10:06 +08:00
if (submissionStatus == SubmissionStatus::SUCCESS) {
taskCount++;
}
2022-11-09 19:18:06 +08:00
return submissionStatus;
2021-02-23 16:48:08 +08:00
}
2023-03-28 05:37:18 +08:00
template <typename GfxFamily>
SubmissionStatus CommandStreamReceiverHw<GfxFamily>::sendRenderStateCacheFlush() {
return this->flushPipeControl(true);
}
2021-02-23 16:48:08 +08:00
template <typename GfxFamily>
2022-11-07 19:50:09 +08:00
inline SubmissionStatus CommandStreamReceiverHw<GfxFamily>::flushHandler(BatchBuffer &batchBuffer, ResidencyContainer &allocationsForResidency) {
auto status = flush(batchBuffer, allocationsForResidency);
2022-05-27 11:58:07 +08:00
makeSurfacePackNonResident(allocationsForResidency, true);
2022-11-07 19:50:09 +08:00
return status;
2021-02-23 16:48:08 +08:00
}
template <typename GfxFamily>
2023-03-27 17:06:13 +08:00
inline bool CommandStreamReceiverHw<GfxFamily>::isUpdateTagFromWaitEnabled() {
2022-12-09 23:11:27 +08:00
auto &gfxCoreHelper = getGfxCoreHelper();
2022-12-08 20:22:35 +08:00
auto enabled = gfxCoreHelper.isUpdateTaskCountFromWaitSupported();
2022-03-25 21:00:53 +08:00
enabled &= this->isAnyDirectSubmissionEnabled();
2021-02-23 16:48:08 +08:00
2021-11-17 23:05:48 +08:00
switch (DebugManager.flags.UpdateTaskCountFromWait.get()) {
case 0:
enabled = false;
break;
case 1:
enabled = this->isDirectSubmissionEnabled();
break;
case 2:
enabled = this->isAnyDirectSubmissionEnabled();
break;
case 3:
enabled = true;
break;
2021-02-23 16:48:08 +08:00
}
return enabled;
}
template <typename GfxFamily>
inline void CommandStreamReceiverHw<GfxFamily>::updateTagFromWait() {
2022-04-28 00:34:20 +08:00
flushBatchedSubmissions();
2021-02-23 16:48:08 +08:00
if (isUpdateTagFromWaitEnabled()) {
flushTagUpdate();
}
}
2020-12-17 08:36:45 +08:00
template <typename GfxFamily>
2023-01-04 17:45:07 +08:00
inline MemoryCompressionState CommandStreamReceiverHw<GfxFamily>::getMemoryCompressionState(bool auxTranslationRequired) const {
2020-12-17 08:36:45 +08:00
return MemoryCompressionState::NotApplicable;
}
2020-03-19 22:15:51 +08:00
template <typename GfxFamily>
inline bool CommandStreamReceiverHw<GfxFamily>::isPipelineSelectAlreadyProgrammed() const {
2022-12-15 21:33:28 +08:00
const auto &productHelper = getProductHelper();
2022-12-13 00:43:41 +08:00
return this->streamProperties.stateComputeMode.isDirty() && productHelper.is3DPipelineSelectWARequired() && isRcs();
2020-03-19 22:15:51 +08:00
}
2019-08-08 01:33:40 +08:00
template <typename GfxFamily>
2021-01-29 01:30:56 +08:00
inline void CommandStreamReceiverHw<GfxFamily>::programEpilogue(LinearStream &csr, Device &device, void **batchBufferEndLocation, DispatchFlags &dispatchFlags) {
2019-08-08 01:33:40 +08:00
if (dispatchFlags.epilogueRequired) {
auto currentOffset = ptrDiff(csr.getSpace(0u), csr.getCpuBase());
auto gpuAddress = ptrOffset(csr.getGraphicsAllocation()->getGpuAddress(), currentOffset);
addBatchBufferStart(reinterpret_cast<typename GfxFamily::MI_BATCH_BUFFER_START *>(*batchBufferEndLocation), gpuAddress, false);
2019-08-08 17:44:23 +08:00
this->programEpliogueCommands(csr, dispatchFlags);
2022-11-29 00:57:36 +08:00
programEndingCmd(csr, batchBufferEndLocation, isDirectSubmissionEnabled(), false, !EngineHelpers::isBcs(osContext->getEngineType()));
2021-11-18 06:36:00 +08:00
EncodeNoop<GfxFamily>::alignToCacheLine(csr);
2019-08-08 01:33:40 +08:00
}
}
template <typename GfxFamily>
inline size_t CommandStreamReceiverHw<GfxFamily>::getCmdSizeForEpilogue(const DispatchFlags &dispatchFlags) const {
if (dispatchFlags.epilogueRequired) {
2020-01-16 00:02:47 +08:00
size_t terminateCmd = sizeof(typename GfxFamily::MI_BATCH_BUFFER_END);
if (isDirectSubmissionEnabled()) {
terminateCmd = sizeof(typename GfxFamily::MI_BATCH_BUFFER_START);
}
auto size = getCmdSizeForEpilogueCommands(dispatchFlags) + terminateCmd;
2019-08-08 17:44:23 +08:00
return alignUp(size, MemoryConstants::cacheLineSize);
2019-08-08 01:33:40 +08:00
}
return 0u;
}
2020-01-31 15:50:12 +08:00
template <typename GfxFamily>
2020-02-07 22:36:15 +08:00
inline void CommandStreamReceiverHw<GfxFamily>::programEnginePrologue(LinearStream &csr) {
2020-01-31 15:50:12 +08:00
}
template <typename GfxFamily>
2020-05-27 21:30:31 +08:00
inline size_t CommandStreamReceiverHw<GfxFamily>::getCmdSizeForPrologue() const {
2020-01-31 15:50:12 +08:00
return 0u;
}
2021-07-30 17:56:58 +08:00
template <typename GfxFamily>
2023-07-05 16:55:12 +08:00
inline void CommandStreamReceiverHw<GfxFamily>::stopDirectSubmission(bool blocking) {
2021-07-30 17:56:58 +08:00
if (EngineHelpers::isBcs(this->osContext->getEngineType())) {
2023-07-05 16:55:12 +08:00
this->blitterDirectSubmission->stopRingBuffer(blocking);
2021-07-30 17:56:58 +08:00
} else {
2023-07-05 16:55:12 +08:00
this->directSubmission->stopRingBuffer(blocking);
2021-07-30 17:56:58 +08:00
}
}
2020-07-17 17:28:59 +08:00
template <typename GfxFamily>
2022-04-19 22:44:06 +08:00
inline bool CommandStreamReceiverHw<GfxFamily>::initDirectSubmission() {
2020-07-17 17:28:59 +08:00
bool ret = true;
2021-02-10 23:13:50 +08:00
bool submitOnInit = false;
2022-04-19 22:44:06 +08:00
auto startDirect = this->osContext->isDirectSubmissionAvailable(peekHwInfo(), submitOnInit);
2020-07-17 17:28:59 +08:00
2021-02-10 23:13:50 +08:00
if (startDirect) {
2021-11-16 21:26:36 +08:00
if (!this->isAnyDirectSubmissionEnabled()) {
2023-02-23 19:13:14 +08:00
auto lock = this->obtainUniqueOwnership();
if (!this->isAnyDirectSubmissionEnabled()) {
if (EngineHelpers::isBcs(this->osContext->getEngineType())) {
blitterDirectSubmission = DirectSubmissionHw<GfxFamily, BlitterDispatcher<GfxFamily>>::create(*this);
ret = blitterDirectSubmission->initialize(submitOnInit, this->isUsedNotifyEnableForPostSync());
completionFenceValuePointer = blitterDirectSubmission->getCompletionValuePointer();
2021-07-30 17:56:58 +08:00
2023-02-23 19:13:14 +08:00
} else {
directSubmission = DirectSubmissionHw<GfxFamily, RenderDispatcher<GfxFamily>>::create(*this);
ret = directSubmission->initialize(submitOnInit, this->isUsedNotifyEnableForPostSync());
completionFenceValuePointer = directSubmission->getCompletionValuePointer();
}
auto directSubmissionController = executionEnvironment.initializeDirectSubmissionController();
if (directSubmissionController) {
directSubmissionController->registerDirectSubmission(this);
}
if (this->isUpdateTagFromWaitEnabled()) {
this->overrideDispatchPolicy(DispatchMode::ImmediateDispatch);
}
2022-03-25 21:00:53 +08:00
}
2020-07-17 17:28:59 +08:00
}
2022-04-19 22:44:06 +08:00
this->osContext->setDirectSubmissionActive();
2020-07-17 17:28:59 +08:00
}
return ret;
}
2021-04-28 18:49:38 +08:00
template <typename GfxFamily>
TagAllocatorBase *CommandStreamReceiverHw<GfxFamily>::getTimestampPacketAllocator() {
if (timestampPacketAllocator.get() == nullptr) {
2022-12-09 23:11:27 +08:00
auto &gfxCoreHelper = getGfxCoreHelper();
2022-04-07 21:09:40 +08:00
const RootDeviceIndicesContainer rootDeviceIndices = {rootDeviceIndex};
2021-05-14 18:20:32 +08:00
2022-12-08 20:22:35 +08:00
timestampPacketAllocator = gfxCoreHelper.createTimestampPacketAllocator(rootDeviceIndices, getMemoryManager(), getPreferredTagPoolSize(), getType(), osContext->getDeviceBitfield());
2021-03-29 22:46:41 +08:00
}
return timestampPacketAllocator.get();
}
2023-01-20 00:11:39 +08:00
template <typename GfxFamily>
std::unique_ptr<TagAllocatorBase> CommandStreamReceiverHw<GfxFamily>::createMultiRootDeviceTimestampPacketAllocator(const RootDeviceIndicesContainer rootDeviceIndices) {
auto &gfxCoreHelper = getGfxCoreHelper();
return gfxCoreHelper.createTimestampPacketAllocator(rootDeviceIndices, getMemoryManager(), getPreferredTagPoolSize(), getType(), osContext->getDeviceBitfield());
}
2021-05-20 05:41:59 +08:00
template <typename GfxFamily>
void CommandStreamReceiverHw<GfxFamily>::postInitFlagsSetup() {
useNewResourceImplicitFlush = checkPlatformSupportsNewResourceImplicitFlush();
int32_t overrideNewResourceImplicitFlush = DebugManager.flags.PerformImplicitFlushForNewResource.get();
if (overrideNewResourceImplicitFlush != -1) {
useNewResourceImplicitFlush = overrideNewResourceImplicitFlush == 0 ? false : true;
}
useGpuIdleImplicitFlush = checkPlatformSupportsGpuIdleImplicitFlush();
int32_t overrideGpuIdleImplicitFlush = DebugManager.flags.PerformImplicitFlushForIdleGpu.get();
if (overrideGpuIdleImplicitFlush != -1) {
useGpuIdleImplicitFlush = overrideGpuIdleImplicitFlush == 0 ? false : true;
}
}
2021-11-06 09:42:54 +08:00
template <typename GfxFamily>
size_t CommandStreamReceiverHw<GfxFamily>::getCmdSizeForStallingCommands(const DispatchFlags &dispatchFlags) const {
auto barrierTimestampPacketNodes = dispatchFlags.barrierTimestampPacketNodes;
if (barrierTimestampPacketNodes && barrierTimestampPacketNodes->peekNodes().size() > 0) {
2021-12-10 03:31:27 +08:00
return getCmdSizeForStallingPostSyncCommands();
2021-11-06 09:42:54 +08:00
} else {
return getCmdSizeForStallingNoPostSyncCommands();
}
}
2021-11-18 03:51:43 +08:00
template <typename GfxFamily>
inline void CommandStreamReceiverHw<GfxFamily>::programActivePartitionConfigFlushTask(LinearStream &csr) {
if (csrSizeRequestFlags.activePartitionsChanged) {
programActivePartitionConfig(csr);
}
}
2022-03-10 01:15:48 +08:00
template <typename GfxFamily>
bool CommandStreamReceiverHw<GfxFamily>::hasSharedHandles() {
if (!csrSizeRequestFlags.hasSharedHandles) {
for (const auto &allocation : this->getResidencyAllocations()) {
if (allocation->peekSharedHandle()) {
csrSizeRequestFlags.hasSharedHandles = true;
break;
}
}
}
return csrSizeRequestFlags.hasSharedHandles;
}
template <typename GfxFamily>
size_t CommandStreamReceiverHw<GfxFamily>::getCmdSizeForComputeMode() {
2023-01-23 22:08:25 +08:00
return EncodeComputeMode<GfxFamily>::getCmdSizeForComputeMode(this->peekRootDeviceEnvironment(), hasSharedHandles(), isRcs());
2022-03-10 01:15:48 +08:00
}
2022-08-03 19:54:08 +08:00
template <typename GfxFamily>
void CommandStreamReceiverHw<GfxFamily>::createKernelArgsBufferAllocation() {
}
2022-09-19 21:34:14 +08:00
template <typename GfxFamily>
2022-11-10 19:28:41 +08:00
SubmissionStatus CommandStreamReceiverHw<GfxFamily>::initializeDeviceWithFirstSubmission() {
2023-01-27 20:37:09 +08:00
return flushTagUpdate();
2022-09-19 21:34:14 +08:00
}
2022-09-21 00:46:15 +08:00
template <typename GfxFamily>
2023-02-03 02:57:24 +08:00
void CommandStreamReceiverHw<GfxFamily>::handleFrontEndStateTransition(const DispatchFlags &dispatchFlags) {
2022-09-21 00:46:15 +08:00
if (streamProperties.frontEndState.disableOverdispatch.value != -1) {
lastAdditionalKernelExecInfo = streamProperties.frontEndState.disableOverdispatch.value == 1 ? AdditionalKernelExecInfo::DisableOverdispatch : AdditionalKernelExecInfo::NotSet;
}
if (streamProperties.frontEndState.computeDispatchAllWalkerEnable.value != -1) {
lastKernelExecutionType = streamProperties.frontEndState.computeDispatchAllWalkerEnable.value == 1 ? KernelExecutionType::Concurrent : KernelExecutionType::Default;
}
2023-02-03 02:57:24 +08:00
if (feSupportFlags.disableOverdispatch &&
(dispatchFlags.additionalKernelExecInfo != AdditionalKernelExecInfo::NotApplicable && lastAdditionalKernelExecInfo != dispatchFlags.additionalKernelExecInfo)) {
2022-09-21 00:46:15 +08:00
setMediaVFEStateDirty(true);
}
2023-02-03 02:57:24 +08:00
if (feSupportFlags.computeDispatchAllWalker &&
(dispatchFlags.kernelExecutionType != KernelExecutionType::NotApplicable && lastKernelExecutionType != dispatchFlags.kernelExecutionType)) {
2022-09-21 00:46:15 +08:00
setMediaVFEStateDirty(true);
}
2023-02-03 02:57:24 +08:00
if (feSupportFlags.disableEuFusion &&
(streamProperties.frontEndState.disableEUFusion.value == -1 || dispatchFlags.disableEUFusion != !!streamProperties.frontEndState.disableEUFusion.value)) {
2022-09-21 00:46:15 +08:00
setMediaVFEStateDirty(true);
}
}
2022-09-22 09:44:06 +08:00
template <typename GfxFamily>
2023-02-03 02:57:24 +08:00
void CommandStreamReceiverHw<GfxFamily>::handlePipelineSelectStateTransition(const DispatchFlags &dispatchFlags) {
2022-09-22 09:44:06 +08:00
if (streamProperties.pipelineSelect.mediaSamplerDopClockGate.value != -1) {
this->lastMediaSamplerConfig = static_cast<int8_t>(streamProperties.pipelineSelect.mediaSamplerDopClockGate.value);
}
if (streamProperties.pipelineSelect.systolicMode.value != -1) {
this->lastSystolicPipelineSelectMode = !!streamProperties.pipelineSelect.systolicMode.value;
}
2023-02-03 02:57:24 +08:00
csrSizeRequestFlags.mediaSamplerConfigChanged = this->pipelineSupportFlags.mediaSamplerDopClockGate &&
(this->lastMediaSamplerConfig != static_cast<int8_t>(dispatchFlags.pipelineSelectArgs.mediaSamplerRequired));
csrSizeRequestFlags.systolicPipelineSelectMode = this->pipelineSupportFlags.systolicMode &&
(this->lastSystolicPipelineSelectMode != dispatchFlags.pipelineSelectArgs.systolicPipelineSelectMode);
2022-09-22 09:44:06 +08:00
}
2022-11-21 22:55:39 +08:00
template <typename GfxFamily>
bool CommandStreamReceiverHw<GfxFamily>::directSubmissionRelaxedOrderingEnabled() const {
return ((directSubmission.get() && directSubmission->isRelaxedOrderingEnabled()) ||
(blitterDirectSubmission.get() && blitterDirectSubmission->isRelaxedOrderingEnabled()));
}
2023-02-03 02:57:24 +08:00
template <typename GfxFamily>
2023-02-18 02:35:05 +08:00
inline void CommandStreamReceiverHw<GfxFamily>::handleStateBaseAddressStateTransition(const DispatchFlags &dispatchFlags, bool &isStateBaseAddressDirty) {
2023-02-03 02:57:24 +08:00
auto &rootDeviceEnvironment = this->peekRootDeviceEnvironment();
if (this->streamProperties.stateBaseAddress.statelessMocs.value != -1) {
this->latestSentStatelessMocsConfig = static_cast<uint32_t>(this->streamProperties.stateBaseAddress.statelessMocs.value);
}
auto mocsIndex = this->latestSentStatelessMocsConfig;
if (dispatchFlags.l3CacheSettings != L3CachingSettings::NotApplicable) {
auto l3On = dispatchFlags.l3CacheSettings != L3CachingSettings::l3CacheOff;
auto l1On = dispatchFlags.l3CacheSettings == L3CachingSettings::l3AndL1On;
auto &gfxCoreHelper = getGfxCoreHelper();
mocsIndex = gfxCoreHelper.getMocsIndex(*rootDeviceEnvironment.getGmmHelper(), l3On, l1On);
}
if (mocsIndex != this->latestSentStatelessMocsConfig) {
isStateBaseAddressDirty = true;
this->latestSentStatelessMocsConfig = mocsIndex;
}
this->streamProperties.stateBaseAddress.setPropertyStatelessMocs(mocsIndex);
auto memoryCompressionState = this->lastMemoryCompressionState;
if (dispatchFlags.memoryCompressionState != MemoryCompressionState::NotApplicable) {
memoryCompressionState = dispatchFlags.memoryCompressionState;
}
if (memoryCompressionState != this->lastMemoryCompressionState) {
isStateBaseAddressDirty = true;
this->lastMemoryCompressionState = memoryCompressionState;
}
if (this->sbaSupportFlags.globalAtomics) {
if (this->streamProperties.stateBaseAddress.globalAtomics.value != -1) {
this->lastSentUseGlobalAtomics = !!this->streamProperties.stateBaseAddress.globalAtomics.value;
}
bool globalAtomics = (this->isMultiOsContextCapable() || dispatchFlags.areMultipleSubDevicesInContext) && dispatchFlags.useGlobalAtomics;
if (this->lastSentUseGlobalAtomics != globalAtomics) {
isStateBaseAddressDirty = true;
this->lastSentUseGlobalAtomics = globalAtomics;
}
2023-03-10 07:12:09 +08:00
this->streamProperties.stateBaseAddress.setPropertyGlobalAtomics(globalAtomics, false);
2023-02-03 02:57:24 +08:00
}
}
2023-03-15 23:10:06 +08:00
template <typename GfxFamily>
void CommandStreamReceiverHw<GfxFamily>::updateStreamTaskCount(LinearStream &stream, TaskCountType newTaskCount) {
stream.getGraphicsAllocation()->updateTaskCount(newTaskCount, this->osContext->getContextId());
stream.getGraphicsAllocation()->updateResidencyTaskCount(newTaskCount, this->osContext->getContextId());
}
2023-05-15 22:41:44 +08:00
template <typename GfxFamily>
inline void CommandStreamReceiverHw<GfxFamily>::programStateBaseAddress(const IndirectHeap *dsh,
const IndirectHeap *ioh,
const IndirectHeap *ssh,
DispatchFlags &dispatchFlags,
Device &device,
LinearStream &commandStreamCSR,
bool stateBaseAddressDirty) {
auto &hwInfo = this->peekHwInfo();
const bool hasDsh = hwInfo.capabilityTable.supportsImages && dsh != nullptr;
bool dshDirty = hasDsh ? dshState.updateAndCheck(dsh) : false;
bool iohDirty = iohState.updateAndCheck(ioh);
bool sshDirty = ssh != nullptr ? sshState.updateAndCheck(ssh) : false;
bool bindingTablePoolCommandNeeded = sshDirty && (ssh->getGraphicsAllocation() != globalStatelessHeapAllocation);
if (dshDirty) {
int64_t dynamicStateBaseAddress = dsh->getHeapGpuBase();
size_t dynamicStateSize = dsh->getHeapSizeInPages();
this->streamProperties.stateBaseAddress.setPropertiesDynamicState(dynamicStateBaseAddress, dynamicStateSize);
}
if (iohDirty) {
int64_t indirectObjectBaseAddress = ioh->getHeapGpuBase();
size_t indirectObjectSize = ioh->getHeapSizeInPages();
this->streamProperties.stateBaseAddress.setPropertiesIndirectState(indirectObjectBaseAddress, indirectObjectSize);
}
if (sshDirty) {
int64_t surfaceStateBaseAddress = ssh->getHeapGpuBase();
size_t surfaceStateSize = ssh->getHeapSizeInPages();
int64_t bindingTablePoolBaseAddress = -1;
size_t bindingTablePoolSize = std::numeric_limits<size_t>::max();
if (bindingTablePoolCommandNeeded) {
bindingTablePoolBaseAddress = surfaceStateBaseAddress;
bindingTablePoolSize = surfaceStateSize;
}
this->streamProperties.stateBaseAddress.setPropertiesBindingTableSurfaceState(bindingTablePoolBaseAddress, bindingTablePoolSize,
surfaceStateBaseAddress, surfaceStateSize);
}
auto force32BitAllocations = getMemoryManager()->peekForce32BitAllocations();
stateBaseAddressDirty |= ((gsbaFor32BitProgrammed ^ dispatchFlags.gsba32BitRequired) && force32BitAllocations);
bool isStateBaseAddressDirty = dshDirty || iohDirty || sshDirty || stateBaseAddressDirty;
handleStateBaseAddressStateTransition(dispatchFlags, isStateBaseAddressDirty);
bool sourceLevelDebuggerActive = device.getSourceLevelDebugger() != nullptr;
// reprogram state base address command if required
if (isStateBaseAddressDirty || sourceLevelDebuggerActive) {
reprogramStateBaseAddress(dsh, ioh, ssh, dispatchFlags, device, commandStreamCSR, force32BitAllocations, sshDirty, bindingTablePoolCommandNeeded);
}
if (hasDsh) {
auto dshAllocation = dsh->getGraphicsAllocation();
this->makeResident(*dshAllocation);
dshAllocation->setEvictable(false);
}
if (ssh != nullptr) {
auto sshAllocation = ssh->getGraphicsAllocation();
this->makeResident(*sshAllocation);
}
auto iohAllocation = ioh->getGraphicsAllocation();
this->makeResident(*iohAllocation);
iohAllocation->setEvictable(false);
if (globalStatelessHeapAllocation) {
makeResident(*globalStatelessHeapAllocation);
}
}
template <typename GfxFamily>
inline void CommandStreamReceiverHw<GfxFamily>::reprogramStateBaseAddress(const IndirectHeap *dsh, const IndirectHeap *ioh, const IndirectHeap *ssh, DispatchFlags &dispatchFlags, Device &device, LinearStream &commandStreamCSR, bool force32BitAllocations, bool sshDirty, bool bindingTablePoolCommandNeeded) {
uint64_t newGshBase = 0;
gsbaFor32BitProgrammed = false;
if (is64bit && scratchSpaceController->getScratchSpaceAllocation() && !force32BitAllocations) {
newGshBase = scratchSpaceController->calculateNewGSH();
} else if (is64bit && force32BitAllocations && dispatchFlags.gsba32BitRequired) {
bool useLocalMemory = scratchSpaceController->getScratchSpaceAllocation() ? scratchSpaceController->getScratchSpaceAllocation()->isAllocatedInLocalMemoryPool() : false;
newGshBase = getMemoryManager()->getExternalHeapBaseAddress(rootDeviceIndex, useLocalMemory);
gsbaFor32BitProgrammed = true;
}
uint64_t indirectObjectStateBaseAddress = getMemoryManager()->getInternalHeapBaseAddress(rootDeviceIndex, ioh->getGraphicsAllocation()->isAllocatedInLocalMemoryPool());
2023-06-05 21:29:53 +08:00
if (sshDirty) {
bindingTableBaseAddressRequired = bindingTablePoolCommandNeeded;
}
2023-07-01 03:57:19 +08:00
programStateBaseAddressCommon(dsh, ioh, ssh, nullptr,
newGshBase,
indirectObjectStateBaseAddress,
dispatchFlags.pipelineSelectArgs,
device,
commandStreamCSR,
bindingTableBaseAddressRequired,
dispatchFlags.areMultipleSubDevicesInContext,
true);
2023-06-05 21:29:53 +08:00
bindingTableBaseAddressRequired = false;
setGSBAStateDirty(false);
this->streamProperties.stateBaseAddress.clearIsDirty();
}
template <typename GfxFamily>
inline void CommandStreamReceiverHw<GfxFamily>::programStateBaseAddressCommon(
const IndirectHeap *dsh,
const IndirectHeap *ioh,
const IndirectHeap *ssh,
StateBaseAddressProperties *sbaProperties,
uint64_t generalStateBaseAddress,
uint64_t indirectObjectStateBaseAddress,
PipelineSelectArgs &pipelineSelectArgs,
Device &device,
LinearStream &csrCommandStream,
bool dispatchBindingTableCommand,
2023-07-01 03:57:19 +08:00
bool areMultipleSubDevicesInContext,
bool setGeneralStateBaseAddress) {
2023-06-05 21:29:53 +08:00
using STATE_BASE_ADDRESS = typename GfxFamily::STATE_BASE_ADDRESS;
auto &rootDeviceEnvironment = this->peekRootDeviceEnvironment();
bool debuggingEnabled = device.getDebugger() != nullptr;
EncodeWA<GfxFamily>::addPipeControlBeforeStateBaseAddress(csrCommandStream, rootDeviceEnvironment, isRcs(), this->dcFlushSupport);
EncodeWA<GfxFamily>::encodeAdditionalPipelineSelect(csrCommandStream, pipelineSelectArgs, true, rootDeviceEnvironment, isRcs());
auto stateBaseAddressCmdOffset = csrCommandStream.getUsed();
auto instructionHeapBaseAddress = getMemoryManager()->getInternalHeapBaseAddress(rootDeviceIndex, getMemoryManager()->isLocalMemoryUsedForIsa(rootDeviceIndex));
2023-05-15 22:41:44 +08:00
2023-06-05 21:29:53 +08:00
STATE_BASE_ADDRESS stateBaseAddressCmd;
2023-05-15 22:41:44 +08:00
StateBaseAddressHelperArgs<GfxFamily> args = {
2023-06-05 21:29:53 +08:00
generalStateBaseAddress, // generalStateBaseAddress
2023-05-15 22:41:44 +08:00
indirectObjectStateBaseAddress, // indirectObjectHeapBaseAddress
instructionHeapBaseAddress, // instructionHeapBaseAddress
0, // globalHeapsBaseAddress
0, // surfaceStateBaseAddress
&stateBaseAddressCmd, // stateBaseAddressCmd
2023-06-05 21:29:53 +08:00
sbaProperties, // sbaProperties
2023-05-15 22:41:44 +08:00
dsh, // dsh
ioh, // ioh
ssh, // ssh
device.getGmmHelper(), // gmmHelper
this->latestSentStatelessMocsConfig, // statelessMocsIndex
l1CachePolicyData.getL1CacheValue(false), // l1CachePolicy
l1CachePolicyData.getL1CacheValue(true), // l1CachePolicyDebuggerActive
this->lastMemoryCompressionState, // memoryCompressionState
true, // setInstructionStateBaseAddress
2023-07-01 03:57:19 +08:00
setGeneralStateBaseAddress, // setGeneralStateBaseAddress
2023-05-15 22:41:44 +08:00
false, // useGlobalHeapsBaseAddress
isMultiOsContextCapable(), // isMultiOsContextCapable
this->lastSentUseGlobalAtomics, // useGlobalAtomics
2023-06-05 21:29:53 +08:00
areMultipleSubDevicesInContext, // areMultipleSubDevicesInContext
2023-05-15 22:41:44 +08:00
false, // overrideSurfaceStateBaseAddress
debuggingEnabled || device.isDebuggerActive(), // isDebuggerActive
this->doubleSbaWa // doubleSbaWa
};
2023-06-05 21:29:53 +08:00
StateBaseAddressHelper<GfxFamily>::programStateBaseAddressIntoCommandStream(args, csrCommandStream);
2023-05-15 22:41:44 +08:00
bool sbaTrackingEnabled = (debuggingEnabled && !device.getDebugger()->isLegacy());
if (sbaTrackingEnabled) {
2023-06-05 21:29:53 +08:00
device.getL0Debugger()->programSbaAddressLoad(csrCommandStream,
2023-05-15 22:41:44 +08:00
device.getL0Debugger()->getSbaTrackingBuffer(this->getOsContext().getContextId())->getGpuAddress());
}
2023-06-05 21:29:53 +08:00
2023-05-15 22:41:44 +08:00
NEO::EncodeStateBaseAddress<GfxFamily>::setSbaTrackingForL0DebuggerIfEnabled(sbaTrackingEnabled,
device,
2023-06-05 21:29:53 +08:00
csrCommandStream,
2023-05-15 22:41:44 +08:00
stateBaseAddressCmd, true);
2023-06-05 21:29:53 +08:00
if (dispatchBindingTableCommand) {
uint64_t bindingTableBaseAddress = 0;
uint32_t bindingTableSize = 0;
if (sbaProperties) {
bindingTableBaseAddress = static_cast<uint64_t>(sbaProperties->bindingTablePoolBaseAddress.value);
bindingTableSize = static_cast<uint32_t>(sbaProperties->bindingTablePoolSize.value);
} else {
UNRECOVERABLE_IF(!ssh);
bindingTableBaseAddress = ssh->getHeapGpuBase();
bindingTableSize = ssh->getHeapSizeInPages();
}
StateBaseAddressHelper<GfxFamily>::programBindingTableBaseAddress(csrCommandStream, bindingTableBaseAddress, bindingTableSize, device.getGmmHelper());
2023-05-15 22:41:44 +08:00
}
2023-06-05 21:29:53 +08:00
EncodeWA<GfxFamily>::encodeAdditionalPipelineSelect(csrCommandStream, pipelineSelectArgs, false, rootDeviceEnvironment, isRcs());
2023-05-15 22:41:44 +08:00
if (DebugManager.flags.AddPatchInfoCommentsForAUBDump.get()) {
2023-06-05 21:29:53 +08:00
collectStateBaseAddresPatchInfo(commandStream.getGraphicsAllocation()->getGpuAddress(), stateBaseAddressCmdOffset, dsh, ioh, ssh, generalStateBaseAddress,
2023-05-15 22:41:44 +08:00
device.getDeviceInfo().imageSupport);
}
}
template <typename GfxFamily>
inline void CommandStreamReceiverHw<GfxFamily>::programSamplerCacheFlushBetweenRedescribedSurfaceReads(LinearStream &commandStreamCSR) {
if (this->samplerCacheFlushRequired != SamplerCacheFlushState::samplerCacheFlushNotRequired) {
PipeControlArgs args;
args.textureCacheInvalidationEnable = true;
MemorySynchronizationCommands<GfxFamily>::addSingleBarrier(commandStreamCSR, args);
if (this->samplerCacheFlushRequired == SamplerCacheFlushState::samplerCacheFlushBefore) {
this->samplerCacheFlushRequired = SamplerCacheFlushState::samplerCacheFlushAfter;
} else {
this->samplerCacheFlushRequired = SamplerCacheFlushState::samplerCacheFlushNotRequired;
}
}
}
2023-06-01 21:21:48 +08:00
template <typename GfxFamily>
2023-06-02 20:04:06 +08:00
void CommandStreamReceiverHw<GfxFamily>::handleImmediateFlushPipelineSelectState(ImmediateDispatchFlags &dispatchFlags, ImmediateFlushData &flushData) {
if (flushData.pipelineSelectFullConfigurationNeeded) {
2023-06-01 21:21:48 +08:00
this->streamProperties.pipelineSelect.copyPropertiesAll(dispatchFlags.requiredState->pipelineSelect);
flushData.pipelineSelectDirty = true;
setPreambleSetFlag(true);
} else {
this->streamProperties.pipelineSelect.copyPropertiesSystolicMode(dispatchFlags.requiredState->pipelineSelect);
flushData.pipelineSelectDirty = this->streamProperties.pipelineSelect.isDirty();
}
if (flushData.pipelineSelectDirty) {
2023-06-02 20:04:06 +08:00
flushData.estimatedSize += PreambleHelper<GfxFamily>::getCmdSizeForPipelineSelect(peekRootDeviceEnvironment());
2023-06-01 21:21:48 +08:00
}
2023-06-05 21:29:53 +08:00
flushData.pipelineSelectArgs = {
this->streamProperties.pipelineSelect.systolicMode.value == 1,
false,
false,
this->pipelineSupportFlags.systolicMode};
2023-06-01 21:21:48 +08:00
}
template <typename GfxFamily>
2023-06-03 06:10:15 +08:00
void CommandStreamReceiverHw<GfxFamily>::dispatchImmediateFlushPipelineSelectCommand(ImmediateFlushData &flushData, LinearStream &csrStream) {
2023-06-01 21:21:48 +08:00
if (flushData.pipelineSelectDirty) {
2023-06-05 21:29:53 +08:00
PreambleHelper<GfxFamily>::programPipelineSelect(&csrStream, flushData.pipelineSelectArgs, peekRootDeviceEnvironment());
2023-06-03 06:10:15 +08:00
this->streamProperties.pipelineSelect.clearIsDirty();
2023-06-02 20:04:06 +08:00
}
}
template <typename GfxFamily>
void CommandStreamReceiverHw<GfxFamily>::handleImmediateFlushFrontEndState(ImmediateDispatchFlags &dispatchFlags, ImmediateFlushData &flushData) {
if (flushData.frontEndFullConfigurationNeeded) {
this->streamProperties.frontEndState.copyPropertiesAll(dispatchFlags.requiredState->frontEndState);
flushData.frontEndDirty = true;
setMediaVFEStateDirty(false);
} else {
this->streamProperties.frontEndState.copyPropertiesComputeDispatchAllWalkerEnableDisableEuFusion(dispatchFlags.requiredState->frontEndState);
flushData.frontEndDirty = this->streamProperties.frontEndState.isDirty();
}
if (flushData.frontEndDirty) {
flushData.estimatedSize += NEO::PreambleHelper<GfxFamily>::getVFECommandsSize();
}
}
template <typename GfxFamily>
2023-07-01 03:57:19 +08:00
void CommandStreamReceiverHw<GfxFamily>::dispatchImmediateFlushFrontEndCommand(ImmediateFlushData &flushData, Device &device, LinearStream &csrStream) {
2023-06-02 20:04:06 +08:00
if (flushData.frontEndDirty) {
auto &gfxCoreHelper = getGfxCoreHelper();
auto engineGroupType = gfxCoreHelper.getEngineGroupType(getOsContext().getEngineType(), getOsContext().getEngineUsage(), peekHwInfo());
auto feStateCmdSpace = PreambleHelper<GfxFamily>::getSpaceForVfeState(&csrStream, peekHwInfo(), engineGroupType);
PreambleHelper<GfxFamily>::programVfeState(feStateCmdSpace,
peekRootDeviceEnvironment(),
requiredScratchSize,
2023-07-01 03:57:19 +08:00
getScratchPatchAddress(),
2023-06-02 20:04:06 +08:00
device.getDeviceInfo().maxFrontEndThreads,
this->streamProperties,
getLogicalStateHelper());
2023-06-03 06:10:15 +08:00
this->streamProperties.frontEndState.clearIsDirty();
}
}
template <typename GfxFamily>
void CommandStreamReceiverHw<GfxFamily>::handleImmediateFlushStateComputeModeState(ImmediateDispatchFlags &dispatchFlags, ImmediateFlushData &flushData) {
if (flushData.stateComputeModeFullConfigurationNeeded) {
this->streamProperties.stateComputeMode.copyPropertiesAll(dispatchFlags.requiredState->stateComputeMode);
flushData.stateComputeModeDirty = true;
setStateComputeModeDirty(false);
} else {
this->streamProperties.stateComputeMode.copyPropertiesGrfNumberThreadArbitration(dispatchFlags.requiredState->stateComputeMode);
flushData.stateComputeModeDirty = this->streamProperties.stateComputeMode.isDirty();
}
if (flushData.stateComputeModeDirty) {
flushData.estimatedSize += EncodeComputeMode<GfxFamily>::getCmdSizeForComputeMode(peekRootDeviceEnvironment(), false, isRcs());
}
}
template <typename GfxFamily>
void CommandStreamReceiverHw<GfxFamily>::dispatchImmediateFlushStateComputeModeCommand(ImmediateFlushData &flushData, LinearStream &csrStream) {
if (flushData.stateComputeModeDirty) {
2023-06-05 21:29:53 +08:00
EncodeComputeMode<GfxFamily>::programComputeModeCommandWithSynchronization(csrStream, this->streamProperties.stateComputeMode,
flushData.pipelineSelectArgs,
2023-06-03 06:10:15 +08:00
false, peekRootDeviceEnvironment(), isRcs(),
getDcFlushSupport(), nullptr);
this->streamProperties.stateComputeMode.clearIsDirty();
2023-06-01 21:21:48 +08:00
}
}
2023-06-05 21:29:53 +08:00
template <typename GfxFamily>
void CommandStreamReceiverHw<GfxFamily>::handleImmediateFlushStateBaseAddressState(ImmediateDispatchFlags &dispatchFlags, ImmediateFlushData &flushData, Device &device) {
if (flushData.stateBaseAddressFullConfigurationNeeded) {
this->streamProperties.stateBaseAddress.copyPropertiesAll(dispatchFlags.requiredState->stateBaseAddress);
flushData.stateBaseAddressDirty = true;
setGSBAStateDirty(false);
} else {
this->streamProperties.stateBaseAddress.copyPropertiesStatelessMocs(dispatchFlags.requiredState->stateBaseAddress);
if (globalStatelessHeapAllocation == nullptr) {
2023-07-01 03:57:19 +08:00
this->streamProperties.stateBaseAddress.copyPropertiesBindingTableSurfaceState(dispatchFlags.requiredState->stateBaseAddress);
2023-06-05 21:29:53 +08:00
if (this->dshSupported) {
this->streamProperties.stateBaseAddress.copyPropertiesDynamicState(dispatchFlags.requiredState->stateBaseAddress);
}
2023-07-01 03:57:19 +08:00
} else {
this->streamProperties.stateBaseAddress.copyPropertiesSurfaceState(dispatchFlags.requiredState->stateBaseAddress);
2023-06-05 21:29:53 +08:00
}
flushData.stateBaseAddressDirty = this->streamProperties.stateBaseAddress.isDirty();
}
if (flushData.stateBaseAddressDirty) {
flushData.estimatedSize += getRequiredStateBaseAddressSize(device);
}
}
template <typename GfxFamily>
void CommandStreamReceiverHw<GfxFamily>::dispatchImmediateFlushStateBaseAddressCommand(ImmediateFlushData &flushData, LinearStream &csrStream, Device &device) {
if (flushData.stateBaseAddressDirty) {
bool btCommandNeeded = this->streamProperties.stateBaseAddress.bindingTablePoolBaseAddress.value != StreamProperty64::initValue;
programStateBaseAddressCommon(nullptr, nullptr, nullptr, &this->streamProperties.stateBaseAddress,
2023-07-01 03:57:19 +08:00
0, 0, flushData.pipelineSelectArgs, device, csrStream, btCommandNeeded, device.getNumGenericSubDevices() > 1, false);
2023-06-05 21:29:53 +08:00
this->streamProperties.stateBaseAddress.clearIsDirty();
}
}
2023-06-19 23:40:22 +08:00
template <typename GfxFamily>
2023-06-20 22:57:40 +08:00
void CommandStreamReceiverHw<GfxFamily>::handleImmediateFlushOneTimeContextInitState(ImmediateDispatchFlags &dispatchFlags, ImmediateFlushData &flushData, Device &device) {
2023-06-19 23:40:22 +08:00
size_t size = 0;
size = getCmdSizeForPrologue();
flushData.contextOneTimeInit = size > 0;
flushData.estimatedSize += size;
2023-06-20 19:53:28 +08:00
if (this->isProgramActivePartitionConfigRequired()) {
flushData.contextOneTimeInit = true;
flushData.estimatedSize += this->getCmdSizeForActivePartitionConfig();
}
2023-06-20 22:57:40 +08:00
if (this->isRayTracingStateProgramingNeeded(device)) {
flushData.contextOneTimeInit = true;
flushData.estimatedSize += this->getCmdSizeForPerDssBackedBuffer(peekHwInfo());
}
2023-07-05 00:42:23 +08:00
if (this->getPreemptionMode() == PreemptionMode::Initial) {
flushData.contextOneTimeInit = true;
flushData.estimatedSize += PreemptionHelper::getRequiredCmdStreamSize<GfxFamily>(device.getPreemptionMode(), this->getPreemptionMode());
2023-07-05 22:08:58 +08:00
flushData.estimatedSize += PreemptionHelper::getRequiredPreambleSize<GfxFamily>(device);
2023-07-05 00:42:23 +08:00
}
2023-07-06 02:21:23 +08:00
if (!this->isStateSipSent) {
size_t size = PreemptionHelper::getRequiredStateSipCmdSize<GfxFamily>(device, isRcs());
flushData.contextOneTimeInit |= size > 0;
flushData.estimatedSize += size;
}
2023-06-19 23:40:22 +08:00
}
template <typename GfxFamily>
2023-06-20 22:57:40 +08:00
void CommandStreamReceiverHw<GfxFamily>::dispatchImmediateFlushOneTimeContextInitCommand(ImmediateFlushData &flushData, LinearStream &csrStream, Device &device) {
2023-06-19 23:40:22 +08:00
if (flushData.contextOneTimeInit) {
programEnginePrologue(csrStream);
2023-06-20 19:53:28 +08:00
if (this->isProgramActivePartitionConfigRequired()) {
this->programActivePartitionConfig(csrStream);
}
2023-06-20 22:57:40 +08:00
if (this->isRayTracingStateProgramingNeeded(device)) {
this->dispatchRayTracingStateCommand(csrStream, device);
}
2023-07-05 00:42:23 +08:00
if (this->getPreemptionMode() == PreemptionMode::Initial) {
PreemptionHelper::programCmdStream<GfxFamily>(csrStream, device.getPreemptionMode(), this->getPreemptionMode(), this->getPreemptionAllocation());
2023-07-05 22:08:58 +08:00
PreemptionHelper::programCsrBaseAddress<GfxFamily>(csrStream,
device,
getPreemptionAllocation(),
getLogicalStateHelper());
2023-07-05 00:42:23 +08:00
this->setPreemptionMode(device.getPreemptionMode());
}
2023-07-06 02:21:23 +08:00
programStateSip(csrStream, device);
2023-06-19 23:40:22 +08:00
}
}
template <typename GfxFamily>
2023-06-28 03:54:20 +08:00
void CommandStreamReceiverHw<GfxFamily>::handleImmediateFlushAllocationsResidency(Device &device,
LinearStream &immediateCommandStream,
ImmediateFlushData &flushData,
LinearStream &csrStream) {
2023-06-27 21:42:31 +08:00
this->makeResident(*tagAllocation);
2023-06-19 23:40:22 +08:00
if (globalFenceAllocation) {
makeResident(*globalFenceAllocation);
}
2023-06-20 19:53:28 +08:00
if (workPartitionAllocation) {
makeResident(*workPartitionAllocation);
}
2023-06-20 22:57:40 +08:00
if (device.getRTMemoryBackedBuffer()) {
makeResident(*device.getRTMemoryBackedBuffer());
}
2023-06-28 03:54:20 +08:00
if (flushData.estimatedSize > 0) {
makeResident(*csrStream.getGraphicsAllocation());
}
2023-07-05 22:08:58 +08:00
if (preemptionAllocation) {
makeResident(*preemptionAllocation);
}
2023-07-06 02:21:23 +08:00
if (device.isStateSipRequired()) {
makeResident(*SipKernel::getSipKernel(device).getSipAllocation());
}
2023-06-19 23:40:22 +08:00
}
2023-06-22 21:17:49 +08:00
template <typename GfxFamily>
void CommandStreamReceiverHw<GfxFamily>::handleImmediateFlushJumpToImmediate(ImmediateFlushData &flushData) {
if (flushData.estimatedSize > 0) {
flushData.estimatedSize += EncodeBatchBufferStartOrEnd<GfxFamily>::getBatchBufferStartSize();
flushData.estimatedSize = alignUp(flushData.estimatedSize, MemoryConstants::cacheLineSize);
}
}
template <typename GfxFamily>
void CommandStreamReceiverHw<GfxFamily>::dispatchImmediateFlushJumpToImmediateCommand(LinearStream &immediateCommandStream,
size_t immediateCommandStreamStart,
ImmediateFlushData &flushData,
LinearStream &csrStream) {
if (flushData.estimatedSize > 0) {
uint64_t immediateStartAddress = immediateCommandStream.getGpuBase() + immediateCommandStreamStart;
EncodeBatchBufferStartOrEnd<GfxFamily>::programBatchBufferStart(&csrStream, immediateStartAddress, false, false, false);
EncodeNoop<GfxFamily>::alignToCacheLine(csrStream);
}
}
2023-06-27 21:42:31 +08:00
template <typename GfxFamily>
void CommandStreamReceiverHw<GfxFamily>::dispatchImmediateFlushClientBufferCommands(ImmediateDispatchFlags &dispatchFlags,
LinearStream &immediateCommandStream,
ImmediateFlushData &flushData) {
if (dispatchFlags.blockingAppend) {
auto address = getTagAllocation()->getGpuAddress();
PipeControlArgs args = {};
args.dcFlushEnable = this->dcFlushSupport;
args.notifyEnable = isUsedNotifyEnableForPostSync();
args.workloadPartitionOffset = isMultiTileOperationEnabled();
MemorySynchronizationCommands<GfxFamily>::addBarrierWithPostSyncOperation(
immediateCommandStream,
PostSyncMode::ImmediateData,
address,
this->taskCount + 1,
peekRootDeviceEnvironment(),
args);
}
makeResident(*immediateCommandStream.getGraphicsAllocation());
programEndingCmd(immediateCommandStream, &flushData.endPtr, isDirectSubmissionEnabled(), dispatchFlags.hasRelaxedOrderingDependencies, true);
EncodeNoop<GfxFamily>::alignToCacheLine(immediateCommandStream);
}
2023-06-28 03:54:20 +08:00
template <typename GfxFamily>
CompletionStamp CommandStreamReceiverHw<GfxFamily>::handleImmediateFlushSendBatchBuffer(LinearStream &immediateCommandStream,
size_t immediateCommandStreamStart,
ImmediateDispatchFlags &dispatchFlags,
ImmediateFlushData &flushData,
LinearStream &csrStream) {
this->latestSentTaskCount = taskCount + 1;
bool startFromCsr = flushData.estimatedSize > 0;
size_t startOffset = startFromCsr ? flushData.csrStartOffset : immediateCommandStreamStart;
auto &streamToSubmit = startFromCsr ? csrStream : immediateCommandStream;
GraphicsAllocation *chainedBatchBuffer = startFromCsr ? immediateCommandStream.getGraphicsAllocation() : nullptr;
size_t chainedBatchBufferStartOffset = startFromCsr ? csrStream.getUsed() : 0;
uint64_t taskStartAddress = immediateCommandStream.getGpuBase() + immediateCommandStreamStart;
bool hasStallingCmds = (startFromCsr || dispatchFlags.blockingAppend || dispatchFlags.hasStallingCmds);
constexpr bool immediateRequiresCoherency = false;
constexpr bool immediateLowPriority = false;
constexpr QueueThrottle immediateThrottle = QueueThrottle::MEDIUM;
constexpr uint64_t immediateSliceCount = QueueSliceCount::defaultSliceCount;
BatchBuffer batchBuffer{streamToSubmit.getGraphicsAllocation(), startOffset, chainedBatchBufferStartOffset, taskStartAddress, chainedBatchBuffer,
immediateRequiresCoherency, immediateLowPriority, immediateThrottle, immediateSliceCount,
streamToSubmit.getUsed(), &streamToSubmit, flushData.endPtr, this->getNumClients(), hasStallingCmds,
dispatchFlags.hasRelaxedOrderingDependencies};
updateStreamTaskCount(streamToSubmit, taskCount + 1);
auto submissionStatus = flushHandler(batchBuffer, this->getResidencyAllocations());
if (submissionStatus != SubmissionStatus::SUCCESS) {
--this->latestSentTaskCount;
updateStreamTaskCount(streamToSubmit, taskCount);
CompletionStamp completionStamp = {CompletionStamp::getTaskCountFromSubmissionStatusError(submissionStatus)};
return completionStamp;
} else {
if (dispatchFlags.blockingAppend) {
this->latestFlushedTaskCount = this->taskCount + 1;
}
++taskCount;
CompletionStamp completionStamp = {
this->taskCount,
this->taskLevel,
flushStamp->peekStamp()};
return completionStamp;
}
}
2019-03-26 18:59:46 +08:00
} // namespace NEO