2020-01-15 17:02:47 +01:00
/*
2024-01-10 11:56:58 +00:00
* Copyright (C) 2020-2024 Intel Corporation
2020-01-15 17:02:47 +01:00
*
* SPDX-License-Identifier: MIT
*
*/
2020-02-23 22:44:01 +01:00
#include "shared/source/command_container/command_encoder.h"
2022-04-06 14:41:45 +00:00
#include "shared/source/command_stream/command_stream_receiver.h"
2020-02-23 22:44:01 +01:00
#include "shared/source/command_stream/submissions_aggregator.h"
#include "shared/source/debug_settings/debug_settings_manager.h"
#include "shared/source/device/device.h"
#include "shared/source/direct_submission/direct_submission_hw.h"
2020-03-27 16:32:07 +01:00
#include "shared/source/direct_submission/direct_submission_hw_diagnostic_mode.h"
2022-11-16 17:24:04 +00:00
#include "shared/source/direct_submission/relaxed_ordering_helper.h"
2023-02-27 08:47:23 +00:00
#include "shared/source/execution_environment/execution_environment.h"
2022-12-15 17:32:03 +00:00
#include "shared/source/execution_environment/root_device_environment.h"
2023-02-27 16:21:40 +00:00
#include "shared/source/gmm_helper/gmm_helper.h"
#include "shared/source/gmm_helper/gmm_lib.h"
2022-12-22 11:31:28 +00:00
#include "shared/source/helpers/aligned_memory.h"
2024-09-11 18:42:55 +00:00
#include "shared/source/helpers/compiler_product_helper.h"
2023-03-06 12:42:09 +00:00
#include "shared/source/helpers/definitions/command_encoder_args.h"
2020-02-23 22:44:01 +01:00
#include "shared/source/helpers/flush_stamp.h"
2023-02-01 16:23:01 +00:00
#include "shared/source/helpers/gfx_core_helper.h"
2023-06-22 10:08:09 +00:00
#include "shared/source/helpers/hw_info.h"
2020-02-23 22:44:01 +01:00
#include "shared/source/helpers/ptr_math.h"
#include "shared/source/memory_manager/allocation_properties.h"
#include "shared/source/memory_manager/graphics_allocation.h"
#include "shared/source/memory_manager/memory_manager.h"
2020-07-17 11:28:59 +02:00
#include "shared/source/memory_manager/memory_operations_handler.h"
2020-02-23 22:44:01 +01:00
#include "shared/source/os_interface/os_context.h"
2023-03-10 12:28:11 +00:00
#include "shared/source/os_interface/product_helper.h"
2020-02-23 22:44:01 +01:00
#include "shared/source/utilities/cpu_info.h"
#include "shared/source/utilities/cpuintrinsics.h"
2020-01-15 17:02:47 +01:00
2021-06-05 12:09:29 +02:00
#include "create_direct_submission_hw.inl"
2023-06-20 09:37:39 +00:00
#include <algorithm>
2020-01-15 17:02:47 +01:00
#include <cstring>
namespace NEO {
2020-03-26 20:13:10 +01:00
template <typename GfxFamily, typename Dispatcher>
2022-04-20 13:55:31 +00:00
DirectSubmissionHw<GfxFamily, Dispatcher>::DirectSubmissionHw(const DirectSubmissionInputParams &inputParams)
2023-01-20 03:04:15 +00:00
: ringBuffers(RingBufferUse::initialRingBufferCount), osContext(inputParams.osContext), rootDeviceIndex(inputParams.rootDeviceIndex), rootDeviceEnvironment(inputParams.rootDeviceEnvironment) {
2022-04-20 13:55:31 +00:00
memoryManager = inputParams.memoryManager;
globalFenceAllocation = inputParams.globalFenceAllocation;
hwInfo = inputParams.rootDeviceEnvironment.getHardwareInfo();
memoryOperationHandler = inputParams.rootDeviceEnvironment.memoryOperationsInterface.get();
2022-04-06 14:41:45 +00:00
2022-12-28 23:30:03 +00:00
auto &productHelper = inputParams.rootDeviceEnvironment.getHelper<ProductHelper>();
2024-09-11 18:42:55 +00:00
auto &compilerProductHelper = inputParams.rootDeviceEnvironment.getHelper<CompilerProductHelper>();
2020-01-15 17:02:47 +01:00
2020-09-22 17:49:06 +02:00
disableCacheFlush = UllsDefaults::defaultDisableCacheFlush;
disableMonitorFence = UllsDefaults::defaultDisableMonitorFence;
2023-11-30 08:32:25 +00:00
if (debugManager.flags.DirectSubmissionDisableMonitorFence.get() != -1) {
this->disableMonitorFence = debugManager.flags.DirectSubmissionDisableMonitorFence.get();
2023-09-20 11:32:46 +00:00
}
2020-09-22 17:49:06 +02:00
2023-11-30 08:32:25 +00:00
if (debugManager.flags.DirectSubmissionMaxRingBuffers.get() != -1) {
this->maxRingBufferCount = debugManager.flags.DirectSubmissionMaxRingBuffers.get();
2022-06-01 10:05:07 +00:00
}
2023-11-30 08:32:25 +00:00
if (debugManager.flags.DirectSubmissionDisableCacheFlush.get() != -1) {
disableCacheFlush = !!debugManager.flags.DirectSubmissionDisableCacheFlush.get();
2020-09-22 17:49:06 +02:00
}
2020-03-27 16:32:07 +01:00
2023-11-30 08:32:25 +00:00
if (debugManager.flags.DirectSubmissionDetectGpuHang.get() != -1) {
detectGpuHang = !!debugManager.flags.DirectSubmissionDetectGpuHang.get();
2023-11-02 16:30:57 +00:00
}
2023-06-22 10:08:09 +00:00
if (hwInfo->capabilityTable.isIntegratedDevice) {
miMemFenceRequired = false;
} else {
miMemFenceRequired = productHelper.isGlobalFenceInDirectSubmissionRequired(*hwInfo);
}
2024-01-29 15:18:25 +00:00
if (debugManager.flags.DirectSubmissionInsertExtraMiMemFenceCommands.get() != -1) {
miMemFenceRequired = debugManager.flags.DirectSubmissionInsertExtraMiMemFenceCommands.get();
2022-04-06 14:41:45 +00:00
}
2024-09-11 18:42:55 +00:00
if (miMemFenceRequired && compilerProductHelper.isHeaplessStateInitEnabled(compilerProductHelper.isHeaplessModeEnabled())) {
this->systemMemoryFenceAddressSet = true;
}
2023-11-30 08:32:25 +00:00
if (debugManager.flags.DirectSubmissionInsertSfenceInstructionPriorToSubmission.get() != -1) {
sfenceMode = static_cast<DirectSubmissionSfenceMode>(debugManager.flags.DirectSubmissionInsertSfenceInstructionPriorToSubmission.get());
2022-04-06 11:32:56 +00:00
}
2023-11-30 08:32:25 +00:00
if (debugManager.flags.DirectSubmissionMonitorFenceInputPolicy.get() != -1) {
this->inputMonitorFenceDispatchRequirement = !!(debugManager.flags.DirectSubmissionMonitorFenceInputPolicy.get());
2023-11-02 16:18:37 +00:00
}
2023-11-30 08:32:25 +00:00
int32_t disableCacheFlushKey = debugManager.flags.DirectSubmissionDisableCpuCacheFlush.get();
2020-01-15 17:02:47 +01:00
if (disableCacheFlushKey != -1) {
2024-08-16 12:11:18 +00:00
disableCpuCacheFlush = (disableCacheFlushKey == 1);
2020-01-15 17:02:47 +01:00
}
2021-11-03 12:05:33 +00:00
2022-12-28 23:30:03 +00:00
isDisablePrefetcherRequired = productHelper.isPrefetcherDisablingInDirectSubmissionRequired();
2023-11-30 08:32:25 +00:00
if (debugManager.flags.DirectSubmissionDisablePrefetcher.get() != -1) {
isDisablePrefetcherRequired = !!debugManager.flags.DirectSubmissionDisablePrefetcher.get();
2022-08-10 11:52:06 +00:00
}
2021-11-03 12:05:33 +00:00
UNRECOVERABLE_IF(!CpuInfo::getInstance().isFeatureSupported(CpuInfo::featureClflush) && !disableCpuCacheFlush);
2020-03-27 16:32:07 +01:00
createDiagnostic();
2023-06-06 15:11:09 +00:00
setImmWritePostSyncOffset();
2022-10-11 18:47:13 +00:00
2023-01-20 03:04:15 +00:00
dcFlushRequired = MemorySynchronizationCommands<GfxFamily>::getDcFlushEnable(true, inputParams.rootDeviceEnvironment);
2023-01-11 14:04:07 +00:00
auto &gfxCoreHelper = inputParams.rootDeviceEnvironment.getHelper<GfxCoreHelper>();
2023-03-27 09:06:13 +00:00
relaxedOrderingEnabled = gfxCoreHelper.isRelaxedOrderingSupported();
2022-11-30 10:30:31 +00:00
2023-02-01 17:06:21 +00:00
this->currentRelaxedOrderingQueueSize = RelaxedOrderingHelper::queueSizeMultiplier;
2023-11-30 08:32:25 +00:00
if (debugManager.flags.DirectSubmissionRelaxedOrdering.get() != -1) {
relaxedOrderingEnabled = (debugManager.flags.DirectSubmissionRelaxedOrdering.get() == 1);
2022-11-30 10:30:31 +00:00
}
2022-11-29 14:57:51 +00:00
2024-09-06 13:29:29 +00:00
if (Dispatcher::isCopy() && relaxedOrderingEnabled) {
2023-11-30 08:32:25 +00:00
relaxedOrderingEnabled = (debugManager.flags.DirectSubmissionRelaxedOrderingForBcs.get() != 0);
2022-11-29 14:57:51 +00:00
}
2020-01-15 17:02:47 +01:00
}
2022-11-18 09:56:59 +00:00
template <typename GfxFamily, typename Dispatcher>
2022-11-24 16:57:18 +00:00
void DirectSubmissionHw<GfxFamily, Dispatcher>::dispatchStaticRelaxedOrderingScheduler() {
LinearStream schedulerCmdStream(this->relaxedOrderingSchedulerAllocation);
uint64_t schedulerStartAddress = schedulerCmdStream.getGpuBase();
2022-11-18 09:56:59 +00:00
uint64_t deferredTasksListGpuVa = deferredTasksListAllocation->getGpuAddress();
2022-12-05 15:26:58 +00:00
uint64_t loopSectionStartAddress = schedulerStartAddress + RelaxedOrderingHelper::StaticSchedulerSizeAndOffsetSection<GfxFamily>::loopStartSectionStart;
2023-02-27 16:21:40 +00:00
const uint32_t miMathMocs = this->rootDeviceEnvironment.getGmmHelper()->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER);
2024-09-06 13:29:29 +00:00
constexpr bool isBcs = Dispatcher::isCopy();
2024-04-19 14:20:27 +00:00
2022-11-18 09:56:59 +00:00
// 1. Init section
{
2023-12-13 16:09:52 +00:00
EncodeMiPredicate<GfxFamily>::encode(schedulerCmdStream, MiPredicateType::disable);
2023-04-03 17:57:51 +00:00
2024-09-04 21:24:17 +00:00
EncodeSetMMIO<GfxFamily>::encodeREG(schedulerCmdStream, RegisterOffsets::csGprR0, RegisterOffsets::csGprR9, isBcs);
EncodeSetMMIO<GfxFamily>::encodeREG(schedulerCmdStream, RegisterOffsets::csGprR0 + 4, RegisterOffsets::csGprR9 + 4, isBcs);
2023-04-03 17:57:51 +00:00
2024-09-04 21:24:17 +00:00
EncodeBatchBufferStartOrEnd<GfxFamily>::programConditionalDataRegBatchBufferStart(schedulerCmdStream, 0, RegisterOffsets::csGprR1, 0, CompareOperation::equal, true, false, isBcs);
2022-11-18 09:56:59 +00:00
2024-09-04 21:24:17 +00:00
LriHelper<GfxFamily>::program(&schedulerCmdStream, RegisterOffsets::csGprR2, 0, true, isBcs);
LriHelper<GfxFamily>::program(&schedulerCmdStream, RegisterOffsets::csGprR2 + 4, 0, true, isBcs);
2022-11-18 09:56:59 +00:00
2022-11-24 16:57:18 +00:00
uint64_t removeTaskVa = schedulerStartAddress + RelaxedOrderingHelper::StaticSchedulerSizeAndOffsetSection<GfxFamily>::removeTaskSectionStart;
2024-09-04 21:24:17 +00:00
LriHelper<GfxFamily>::program(&schedulerCmdStream, RegisterOffsets::csGprR3, static_cast<uint32_t>(removeTaskVa & 0xFFFF'FFFFULL), true, isBcs);
LriHelper<GfxFamily>::program(&schedulerCmdStream, RegisterOffsets::csGprR3 + 4, static_cast<uint32_t>(removeTaskVa >> 32), true, isBcs);
2022-11-18 09:56:59 +00:00
2022-11-24 16:57:18 +00:00
uint64_t walkersLoopConditionCheckVa = schedulerStartAddress + RelaxedOrderingHelper::StaticSchedulerSizeAndOffsetSection<GfxFamily>::tasksListLoopCheckSectionStart;
2024-09-04 21:24:17 +00:00
LriHelper<GfxFamily>::program(&schedulerCmdStream, RegisterOffsets::csGprR4, static_cast<uint32_t>(walkersLoopConditionCheckVa & 0xFFFF'FFFFULL), true, isBcs);
LriHelper<GfxFamily>::program(&schedulerCmdStream, RegisterOffsets::csGprR4 + 4, static_cast<uint32_t>(walkersLoopConditionCheckVa >> 32), true, isBcs);
2022-11-18 09:56:59 +00:00
}
// 2. Dispatch task section (loop start)
{
2023-04-03 17:57:51 +00:00
UNRECOVERABLE_IF(schedulerCmdStream.getUsed() != RelaxedOrderingHelper::StaticSchedulerSizeAndOffsetSection<GfxFamily>::loopStartSectionStart);
2023-12-13 16:09:52 +00:00
EncodeMiPredicate<GfxFamily>::encode(schedulerCmdStream, MiPredicateType::disable);
2022-11-18 09:56:59 +00:00
2024-09-04 21:24:17 +00:00
LriHelper<GfxFamily>::program(&schedulerCmdStream, RegisterOffsets::csGprR6, 8, true, isBcs);
LriHelper<GfxFamily>::program(&schedulerCmdStream, RegisterOffsets::csGprR6 + 4, 0, true, isBcs);
2022-11-18 09:56:59 +00:00
2024-09-04 21:24:17 +00:00
LriHelper<GfxFamily>::program(&schedulerCmdStream, RegisterOffsets::csGprR8, static_cast<uint32_t>(deferredTasksListGpuVa & 0xFFFF'FFFFULL), true, isBcs);
LriHelper<GfxFamily>::program(&schedulerCmdStream, RegisterOffsets::csGprR8 + 4, static_cast<uint32_t>(deferredTasksListGpuVa >> 32), true, isBcs);
2022-11-18 09:56:59 +00:00
EncodeAluHelper<GfxFamily, 10> aluHelper;
2023-02-27 16:21:40 +00:00
aluHelper.setMocs(miMathMocs);
2023-12-19 07:40:17 +00:00
aluHelper.setNextAlu(AluRegisters::opcodeLoad, AluRegisters::srca, AluRegisters::gpr2);
aluHelper.setNextAlu(AluRegisters::opcodeLoad, AluRegisters::srcb, AluRegisters::gpr6);
aluHelper.setNextAlu(AluRegisters::opcodeShl);
aluHelper.setNextAlu(AluRegisters::opcodeStore, AluRegisters::gpr7, AluRegisters::accu);
aluHelper.setNextAlu(AluRegisters::opcodeLoad, AluRegisters::srca, AluRegisters::gpr7);
aluHelper.setNextAlu(AluRegisters::opcodeLoad, AluRegisters::srcb, AluRegisters::gpr8);
aluHelper.setNextAlu(AluRegisters::opcodeAdd);
aluHelper.setNextAlu(AluRegisters::opcodeStore, AluRegisters::gpr6, AluRegisters::accu);
aluHelper.setNextAlu(AluRegisters::opcodeLoadind, AluRegisters::gpr0, AluRegisters::accu);
aluHelper.setNextAlu(AluRegisters::opcodeFenceRd);
2022-11-18 09:56:59 +00:00
2022-11-24 16:57:18 +00:00
aluHelper.copyToCmdStream(schedulerCmdStream);
2022-11-18 09:56:59 +00:00
2022-11-24 16:57:18 +00:00
EncodeBatchBufferStartOrEnd<GfxFamily>::programBatchBufferStart(&schedulerCmdStream, 0, false, true, false);
2022-11-18 09:56:59 +00:00
}
// 3. Remove task section
{
2023-04-03 17:57:51 +00:00
UNRECOVERABLE_IF(schedulerCmdStream.getUsed() != RelaxedOrderingHelper::StaticSchedulerSizeAndOffsetSection<GfxFamily>::removeTaskSectionStart);
2023-12-13 16:09:52 +00:00
EncodeMiPredicate<GfxFamily>::encode(schedulerCmdStream, MiPredicateType::disable);
2022-11-18 09:56:59 +00:00
2024-09-04 21:24:17 +00:00
EncodeMathMMIO<GfxFamily>::encodeDecrement(schedulerCmdStream, AluRegisters::gpr1, isBcs);
EncodeMathMMIO<GfxFamily>::encodeDecrement(schedulerCmdStream, AluRegisters::gpr2, isBcs);
2022-11-18 09:56:59 +00:00
2024-09-04 21:24:17 +00:00
EncodeSetMMIO<GfxFamily>::encodeREG(schedulerCmdStream, RegisterOffsets::csGprR0, RegisterOffsets::csGprR9, isBcs);
EncodeSetMMIO<GfxFamily>::encodeREG(schedulerCmdStream, RegisterOffsets::csGprR0 + 4, RegisterOffsets::csGprR9 + 4, isBcs);
2023-04-03 17:57:51 +00:00
2024-09-04 21:24:17 +00:00
EncodeBatchBufferStartOrEnd<GfxFamily>::programConditionalDataRegBatchBufferStart(schedulerCmdStream, 0, RegisterOffsets::csGprR1, 0, CompareOperation::equal, true, false, isBcs);
2022-11-18 09:56:59 +00:00
2024-09-04 21:24:17 +00:00
LriHelper<GfxFamily>::program(&schedulerCmdStream, RegisterOffsets::csGprR7, 8, true, isBcs);
LriHelper<GfxFamily>::program(&schedulerCmdStream, RegisterOffsets::csGprR7 + 4, 0, true, isBcs);
2022-11-18 09:56:59 +00:00
2024-09-04 21:24:17 +00:00
LriHelper<GfxFamily>::program(&schedulerCmdStream, RegisterOffsets::csGprR8, static_cast<uint32_t>(deferredTasksListGpuVa & 0xFFFF'FFFFULL), true, isBcs);
LriHelper<GfxFamily>::program(&schedulerCmdStream, RegisterOffsets::csGprR8 + 4, static_cast<uint32_t>(deferredTasksListGpuVa >> 32), true, isBcs);
2022-11-18 09:56:59 +00:00
EncodeAluHelper<GfxFamily, 14> aluHelper;
2023-02-27 16:21:40 +00:00
aluHelper.setMocs(miMathMocs);
2023-12-19 07:40:17 +00:00
aluHelper.setNextAlu(AluRegisters::opcodeLoad, AluRegisters::srca, AluRegisters::gpr1);
aluHelper.setNextAlu(AluRegisters::opcodeLoad, AluRegisters::srcb, AluRegisters::gpr7);
aluHelper.setNextAlu(AluRegisters::opcodeShl);
aluHelper.setNextAlu(AluRegisters::opcodeStore, AluRegisters::gpr7, AluRegisters::accu);
aluHelper.setNextAlu(AluRegisters::opcodeLoad, AluRegisters::srca, AluRegisters::gpr7);
aluHelper.setNextAlu(AluRegisters::opcodeLoad, AluRegisters::srcb, AluRegisters::gpr8);
aluHelper.setNextAlu(AluRegisters::opcodeAdd);
aluHelper.setNextAlu(AluRegisters::opcodeLoadind, AluRegisters::gpr7, AluRegisters::accu);
aluHelper.setNextAlu(AluRegisters::opcodeFenceRd);
aluHelper.setNextAlu(AluRegisters::opcodeLoad, AluRegisters::srca, AluRegisters::gpr6);
aluHelper.setNextAlu(AluRegisters::opcodeLoad0, AluRegisters::srcb, AluRegisters::opcodeNone);
aluHelper.setNextAlu(AluRegisters::opcodeAdd);
aluHelper.setNextAlu(AluRegisters::opcodeStoreind, AluRegisters::accu, AluRegisters::gpr7);
aluHelper.setNextAlu(AluRegisters::opcodeFenceWr);
2022-11-18 09:56:59 +00:00
2022-11-24 16:57:18 +00:00
aluHelper.copyToCmdStream(schedulerCmdStream);
2022-11-18 09:56:59 +00:00
}
// 4. List loop check section
{
2023-04-03 17:57:51 +00:00
UNRECOVERABLE_IF(schedulerCmdStream.getUsed() != RelaxedOrderingHelper::StaticSchedulerSizeAndOffsetSection<GfxFamily>::tasksListLoopCheckSectionStart);
2023-12-13 16:09:52 +00:00
EncodeMiPredicate<GfxFamily>::encode(schedulerCmdStream, MiPredicateType::disable);
2022-11-18 09:56:59 +00:00
2024-09-04 21:24:17 +00:00
EncodeMathMMIO<GfxFamily>::encodeIncrement(schedulerCmdStream, AluRegisters::gpr2, isBcs);
2022-11-18 09:56:59 +00:00
2022-11-24 16:57:18 +00:00
EncodeBatchBufferStartOrEnd<GfxFamily>::programConditionalRegRegBatchBufferStart(
schedulerCmdStream,
2022-12-05 15:26:58 +00:00
loopSectionStartAddress,
2024-09-04 21:24:17 +00:00
AluRegisters::gpr1, AluRegisters::gpr2, CompareOperation::notEqual, false, isBcs);
2022-11-18 09:56:59 +00:00
2024-09-04 21:24:17 +00:00
LriHelper<GfxFamily>::program(&schedulerCmdStream, RegisterOffsets::csGprR2, 0, true, isBcs);
LriHelper<GfxFamily>::program(&schedulerCmdStream, RegisterOffsets::csGprR2 + 4, 0, true, isBcs);
2022-11-18 09:56:59 +00:00
}
// 5. Drain request section
{
2023-04-03 17:57:51 +00:00
UNRECOVERABLE_IF(schedulerCmdStream.getUsed() != RelaxedOrderingHelper::StaticSchedulerSizeAndOffsetSection<GfxFamily>::drainRequestSectionStart);
2024-02-27 14:36:13 +00:00
EncodeMiArbCheck<GfxFamily>::program(schedulerCmdStream, std::nullopt);
2022-11-18 09:56:59 +00:00
2023-11-30 08:32:25 +00:00
if (debugManager.flags.DirectSubmissionRelaxedOrderingQueueSizeLimit.get() != -1) {
currentRelaxedOrderingQueueSize = static_cast<uint32_t>(debugManager.flags.DirectSubmissionRelaxedOrderingQueueSizeLimit.get());
2022-12-05 15:26:58 +00:00
}
2023-02-01 17:06:21 +00:00
this->relaxedOrderingQueueSizeLimitValueVa = schedulerCmdStream.getCurrentGpuAddressPosition() + RelaxedOrderingHelper::getQueueSizeLimitValueOffset<GfxFamily>();
2022-12-05 15:26:58 +00:00
EncodeBatchBufferStartOrEnd<GfxFamily>::programConditionalDataRegBatchBufferStart(
schedulerCmdStream,
loopSectionStartAddress,
2024-09-04 21:24:17 +00:00
RegisterOffsets::csGprR1, currentRelaxedOrderingQueueSize, CompareOperation::greaterOrEqual, false, false, isBcs);
2022-12-05 15:26:58 +00:00
2022-11-24 16:57:18 +00:00
EncodeBatchBufferStartOrEnd<GfxFamily>::programConditionalDataRegBatchBufferStart(
schedulerCmdStream,
2022-12-05 15:26:58 +00:00
loopSectionStartAddress,
2024-09-04 21:24:17 +00:00
RegisterOffsets::csGprR5, 1, CompareOperation::equal, false, false, isBcs);
2022-11-18 09:56:59 +00:00
}
2023-04-03 17:57:51 +00:00
// 6. Scheduler loop check section
{
UNRECOVERABLE_IF(schedulerCmdStream.getUsed() != RelaxedOrderingHelper::StaticSchedulerSizeAndOffsetSection<GfxFamily>::schedulerLoopCheckSectionStart);
2022-11-24 16:57:18 +00:00
2024-09-04 21:24:17 +00:00
LriHelper<GfxFamily>::program(&schedulerCmdStream, RegisterOffsets::csGprR10, static_cast<uint32_t>(RelaxedOrderingHelper::DynamicSchedulerSizeAndOffsetSection<GfxFamily>::semaphoreSectionSize), true, isBcs);
LriHelper<GfxFamily>::program(&schedulerCmdStream, RegisterOffsets::csGprR10 + 4, 0, true, isBcs);
2022-11-24 16:57:18 +00:00
2023-04-03 17:57:51 +00:00
EncodeAluHelper<GfxFamily, 4> aluHelper;
aluHelper.setMocs(miMathMocs);
2023-12-19 07:40:17 +00:00
aluHelper.setNextAlu(AluRegisters::opcodeLoad, AluRegisters::srca, AluRegisters::gpr9);
aluHelper.setNextAlu(AluRegisters::opcodeLoad, AluRegisters::srcb, AluRegisters::gpr10);
aluHelper.setNextAlu(AluRegisters::opcodeAdd);
aluHelper.setNextAlu(AluRegisters::opcodeStore, AluRegisters::gpr0, AluRegisters::accu);
2023-04-03 17:57:51 +00:00
aluHelper.copyToCmdStream(schedulerCmdStream);
2022-11-24 16:57:18 +00:00
2024-09-04 21:24:17 +00:00
EncodeBatchBufferStartOrEnd<GfxFamily>::programConditionalRegMemBatchBufferStart(schedulerCmdStream, 0, semaphoreGpuVa, RegisterOffsets::csGprR11, CompareOperation::greaterOrEqual, true, isBcs);
2022-11-24 16:57:18 +00:00
2023-04-03 17:57:51 +00:00
EncodeBatchBufferStartOrEnd<GfxFamily>::programBatchBufferStart(&schedulerCmdStream, schedulerStartAddress + RelaxedOrderingHelper::StaticSchedulerSizeAndOffsetSection<GfxFamily>::loopStartSectionStart,
false, false, false);
}
2022-11-24 16:57:18 +00:00
2023-04-03 17:57:51 +00:00
UNRECOVERABLE_IF(schedulerCmdStream.getUsed() != RelaxedOrderingHelper::StaticSchedulerSizeAndOffsetSection<GfxFamily>::totalSize);
2022-11-24 16:57:18 +00:00
}
template <typename GfxFamily, typename Dispatcher>
void DirectSubmissionHw<GfxFamily, Dispatcher>::dispatchRelaxedOrderingSchedulerSection(uint32_t value) {
LinearStream schedulerCmdStream(this->preinitializedRelaxedOrderingScheduler.get(), RelaxedOrderingHelper::DynamicSchedulerSizeAndOffsetSection<GfxFamily>::totalSize);
// 1. Init section
uint64_t schedulerStartVa = ringCommandStream.getCurrentGpuAddressPosition();
2023-04-03 17:57:51 +00:00
uint64_t semaphoreSectionVa = schedulerStartVa + RelaxedOrderingHelper::DynamicSchedulerSizeAndOffsetSection<GfxFamily>::semaphoreSectionStart;
2022-11-24 16:57:18 +00:00
2024-09-06 13:29:29 +00:00
constexpr bool isBcs = Dispatcher::isCopy();
2024-09-04 21:24:17 +00:00
LriHelper<GfxFamily>::program(&schedulerCmdStream, RegisterOffsets::csGprR11, value, true, isBcs);
LriHelper<GfxFamily>::program(&schedulerCmdStream, RegisterOffsets::csGprR9, static_cast<uint32_t>(semaphoreSectionVa & 0xFFFF'FFFFULL), true, isBcs);
LriHelper<GfxFamily>::program(&schedulerCmdStream, RegisterOffsets::csGprR9 + 4, static_cast<uint32_t>(semaphoreSectionVa >> 32), true, isBcs);
2022-11-24 16:57:18 +00:00
schedulerCmdStream.getSpace(sizeof(typename GfxFamily::MI_BATCH_BUFFER_START)); // skip patching
2023-04-03 17:57:51 +00:00
// 2. Semaphore section
2022-11-18 09:56:59 +00:00
{
using COMPARE_OPERATION = typename GfxFamily::MI_SEMAPHORE_WAIT::COMPARE_OPERATION;
2022-11-24 16:57:18 +00:00
schedulerCmdStream.getSpace(EncodeMiPredicate<GfxFamily>::getCmdSize()); // skip patching
2022-11-18 09:56:59 +00:00
2023-03-10 13:49:06 +00:00
EncodeSemaphore<GfxFamily>::addMiSemaphoreWaitCommand(schedulerCmdStream, semaphoreGpuVa, value,
2024-03-26 11:56:45 +00:00
COMPARE_OPERATION::COMPARE_OPERATION_SAD_GREATER_THAN_OR_EQUAL_SDD, false, false, false, false, nullptr);
2022-11-18 09:56:59 +00:00
}
2022-11-24 16:57:18 +00:00
// skip patching End section
2022-11-18 09:56:59 +00:00
2022-11-24 16:57:18 +00:00
auto dst = ringCommandStream.getSpace(RelaxedOrderingHelper::DynamicSchedulerSizeAndOffsetSection<GfxFamily>::totalSize);
memcpy_s(dst, RelaxedOrderingHelper::DynamicSchedulerSizeAndOffsetSection<GfxFamily>::totalSize,
this->preinitializedRelaxedOrderingScheduler.get(), RelaxedOrderingHelper::DynamicSchedulerSizeAndOffsetSection<GfxFamily>::totalSize);
2022-11-18 09:56:59 +00:00
}
2020-03-26 20:13:10 +01:00
template <typename GfxFamily, typename Dispatcher>
2020-03-27 16:32:07 +01:00
DirectSubmissionHw<GfxFamily, Dispatcher>::~DirectSubmissionHw() = default;
2020-01-15 17:02:47 +01:00
2020-03-26 20:13:10 +01:00
template <typename GfxFamily, typename Dispatcher>
bool DirectSubmissionHw<GfxFamily, Dispatcher>::allocateResources() {
2020-01-15 17:02:47 +01:00
DirectSubmissionAllocations allocations;
bool isMultiOsContextCapable = osContext.getNumSupportedDevices() > 1u;
constexpr size_t minimumRequiredSize = 256 * MemoryConstants::kiloByte;
constexpr size_t additionalAllocationSize = MemoryConstants::pageSize;
const auto allocationSize = alignUp(minimumRequiredSize + additionalAllocationSize, MemoryConstants::pageSize64k);
2022-04-20 13:55:31 +00:00
const AllocationProperties commandStreamAllocationProperties{rootDeviceIndex,
2020-01-15 17:02:47 +01:00
true, allocationSize,
2023-12-11 14:24:36 +00:00
AllocationType::ringBuffer,
2022-03-16 18:41:29 +00:00
isMultiOsContextCapable, false, osContext.getDeviceBitfield()};
2020-01-15 17:02:47 +01:00
2022-06-01 10:05:07 +00:00
for (uint32_t ringBufferIndex = 0; ringBufferIndex < RingBufferUse::initialRingBufferCount; ringBufferIndex++) {
auto ringBuffer = memoryManager->allocateGraphicsMemoryWithProperties(commandStreamAllocationProperties);
this->ringBuffers[ringBufferIndex].ringBuffer = ringBuffer;
UNRECOVERABLE_IF(ringBuffer == nullptr);
allocations.push_back(ringBuffer);
memset(ringBuffer->getUnderlyingBuffer(), 0, allocationSize);
}
2020-01-15 17:02:47 +01:00
2022-04-20 13:55:31 +00:00
const AllocationProperties semaphoreAllocationProperties{rootDeviceIndex,
2020-01-15 17:02:47 +01:00
true, MemoryConstants::pageSize,
2023-12-11 14:24:36 +00:00
AllocationType::semaphoreBuffer,
2022-03-16 18:41:29 +00:00
isMultiOsContextCapable, false, osContext.getDeviceBitfield()};
2020-01-15 17:02:47 +01:00
semaphores = memoryManager->allocateGraphicsMemoryWithProperties(semaphoreAllocationProperties);
UNRECOVERABLE_IF(semaphores == nullptr);
allocations.push_back(semaphores);
2021-10-12 21:28:34 +00:00
if (this->workPartitionAllocation != nullptr) {
allocations.push_back(workPartitionAllocation);
}
2022-04-20 17:32:39 +00:00
if (completionFenceAllocation != nullptr) {
allocations.push_back(completionFenceAllocation);
}
2022-11-16 17:24:04 +00:00
if (this->relaxedOrderingEnabled) {
const AllocationProperties allocationProperties(rootDeviceIndex,
true, MemoryConstants::pageSize64k,
2023-12-11 14:24:36 +00:00
AllocationType::deferredTasksList,
2022-11-16 17:24:04 +00:00
isMultiOsContextCapable, false, osContext.getDeviceBitfield());
deferredTasksListAllocation = memoryManager->allocateGraphicsMemoryWithProperties(allocationProperties);
UNRECOVERABLE_IF(deferredTasksListAllocation == nullptr);
allocations.push_back(deferredTasksListAllocation);
2022-11-24 16:57:18 +00:00
2024-03-29 10:17:04 +00:00
AllocationProperties relaxedOrderingSchedulerAllocationProperties(rootDeviceIndex,
true, MemoryConstants::pageSize64k,
AllocationType::commandBuffer,
isMultiOsContextCapable, false, osContext.getDeviceBitfield());
relaxedOrderingSchedulerAllocationProperties.flags.cantBeReadOnly = true;
2022-11-24 16:57:18 +00:00
relaxedOrderingSchedulerAllocation = memoryManager->allocateGraphicsMemoryWithProperties(relaxedOrderingSchedulerAllocationProperties);
UNRECOVERABLE_IF(relaxedOrderingSchedulerAllocation == nullptr);
allocations.push_back(relaxedOrderingSchedulerAllocation);
2022-11-16 17:24:04 +00:00
}
2023-11-30 08:32:25 +00:00
if (debugManager.flags.DirectSubmissionPrintBuffers.get()) {
2022-06-01 10:05:07 +00:00
for (uint32_t ringBufferIndex = 0; ringBufferIndex < RingBufferUse::initialRingBufferCount; ringBufferIndex++) {
const auto ringBuffer = this->ringBuffers[ringBufferIndex].ringBuffer;
printf("Ring buffer %u - gpu address: %" PRIx64 " - %" PRIx64 ", cpu address: %p - %p, size: %zu \n",
ringBufferIndex,
ringBuffer->getGpuAddress(),
ptrOffset(ringBuffer->getGpuAddress(), ringBuffer->getUnderlyingBufferSize()),
ringBuffer->getUnderlyingBuffer(),
ptrOffset(ringBuffer->getUnderlyingBuffer(), ringBuffer->getUnderlyingBufferSize()),
ringBuffer->getUnderlyingBufferSize());
}
2021-11-23 11:16:29 +00:00
}
2020-01-15 17:02:47 +01:00
handleResidency();
2022-06-01 10:05:07 +00:00
ringCommandStream.replaceBuffer(this->ringBuffers[0u].ringBuffer->getUnderlyingBuffer(), minimumRequiredSize);
ringCommandStream.replaceGraphicsAllocation(this->ringBuffers[0].ringBuffer);
2020-01-15 17:02:47 +01:00
semaphorePtr = semaphores->getUnderlyingBuffer();
semaphoreGpuVa = semaphores->getGpuAddress();
semaphoreData = static_cast<volatile RingSemaphoreData *>(semaphorePtr);
memset(semaphorePtr, 0, sizeof(RingSemaphoreData));
2023-04-28 09:38:31 +00:00
semaphoreData->queueWorkCount = 0;
2020-01-15 17:02:47 +01:00
cpuCachelineFlush(semaphorePtr, MemoryConstants::cacheLineSize);
2023-04-28 09:38:31 +00:00
workloadModeOneStoreAddress = static_cast<volatile void *>(&semaphoreData->diagnosticModeCounter);
2020-03-27 16:32:07 +01:00
*static_cast<volatile uint32_t *>(workloadModeOneStoreAddress) = 0u;
2020-07-17 11:28:59 +02:00
2021-09-27 23:27:46 +00:00
this->gpuVaForMiFlush = this->semaphoreGpuVa + offsetof(RingSemaphoreData, miFlushSpace);
2024-08-06 16:13:06 +00:00
this->gpuVaForPagingFenceSemaphore = this->semaphoreGpuVa + offsetof(RingSemaphoreData, pagingFenceCounter);
2020-07-17 11:28:59 +02:00
auto ret = makeResourcesResident(allocations);
return ret && allocateOsResources();
}
template <typename GfxFamily, typename Dispatcher>
bool DirectSubmissionHw<GfxFamily, Dispatcher>::makeResourcesResident(DirectSubmissionAllocations &allocations) {
2023-12-13 09:17:24 +00:00
auto ret = memoryOperationHandler->makeResidentWithinOsContext(&this->osContext, ArrayRef<GraphicsAllocation *>(allocations), false) == MemoryOperationsStatus::success;
2020-07-17 11:28:59 +02:00
return ret;
2020-01-15 17:02:47 +01:00
}
2022-11-09 14:43:02 +00:00
template <typename GfxFamily, typename Dispatcher>
inline void DirectSubmissionHw<GfxFamily, Dispatcher>::unblockGpu() {
2023-12-19 07:40:17 +00:00
if (sfenceMode >= DirectSubmissionSfenceMode::beforeSemaphoreOnly) {
2022-11-09 14:43:02 +00:00
CpuIntrinsics::sfence();
}
2023-04-24 09:02:15 +00:00
if (this->pciBarrierPtr) {
*this->pciBarrierPtr = 0u;
}
2024-08-16 12:11:18 +00:00
if (debugManager.flags.DirectSubmissionPrintSemaphoreUsage.get() == 1) {
printf("DirectSubmission semaphore %" PRIx64 " unlocked with value: %u\n", semaphoreGpuVa, currentQueueWorkCount);
}
2023-04-28 09:38:31 +00:00
semaphoreData->queueWorkCount = currentQueueWorkCount;
2022-11-09 14:43:02 +00:00
2023-12-19 07:40:17 +00:00
if (sfenceMode == DirectSubmissionSfenceMode::beforeAndAfterSemaphore) {
2022-11-09 14:43:02 +00:00
CpuIntrinsics::sfence();
}
}
2020-03-26 20:13:10 +01:00
template <typename GfxFamily, typename Dispatcher>
inline void DirectSubmissionHw<GfxFamily, Dispatcher>::cpuCachelineFlush(void *ptr, size_t size) {
2020-01-15 17:02:47 +01:00
if (disableCpuCacheFlush) {
return;
}
constexpr size_t cachlineBit = 6;
static_assert(MemoryConstants::cacheLineSize == 1 << cachlineBit, "cachlineBit has invalid value");
char *flushPtr = reinterpret_cast<char *>(ptr);
char *flushEndPtr = reinterpret_cast<char *>(ptr) + size;
flushPtr = alignDown(flushPtr, MemoryConstants::cacheLineSize);
flushEndPtr = alignUp(flushEndPtr, MemoryConstants::cacheLineSize);
size_t cachelines = (flushEndPtr - flushPtr) >> cachlineBit;
for (size_t i = 0; i < cachelines; i++) {
CpuIntrinsics::clFlush(flushPtr);
flushPtr += MemoryConstants::cacheLineSize;
}
}
2020-03-26 20:13:10 +01:00
template <typename GfxFamily, typename Dispatcher>
2021-12-22 19:28:02 +00:00
bool DirectSubmissionHw<GfxFamily, Dispatcher>::initialize(bool submitOnInit, bool useNotify) {
useNotifyForPostSync = useNotify;
2020-01-15 17:02:47 +01:00
bool ret = allocateResources();
2020-03-27 16:32:07 +01:00
initDiagnostic(submitOnInit);
2020-01-15 17:02:47 +01:00
if (ret && submitOnInit) {
2020-03-26 20:13:10 +01:00
size_t startBufferSize = Dispatcher::getSizePreemption() +
2022-11-26 20:10:32 +00:00
getSizeSemaphoreSection(false);
2021-10-26 11:53:01 +00:00
2020-03-26 20:13:10 +01:00
Dispatcher::dispatchPreemption(ringCommandStream);
2021-10-12 21:28:34 +00:00
if (this->partitionedMode) {
2021-11-17 19:51:43 +00:00
startBufferSize += getSizePartitionRegisterConfigurationSection();
dispatchPartitionRegisterConfiguration();
2021-10-26 11:53:01 +00:00
this->partitionConfigSet = true;
2021-10-12 21:28:34 +00:00
}
2024-09-11 18:42:55 +00:00
if (this->miMemFenceRequired && !this->systemMemoryFenceAddressSet) {
2022-04-06 14:41:45 +00:00
startBufferSize += getSizeSystemMemoryFenceAddress();
dispatchSystemMemoryFenceAddress();
this->systemMemoryFenceAddressSet = true;
}
2022-11-16 17:24:04 +00:00
if (this->relaxedOrderingEnabled) {
2022-11-24 16:57:18 +00:00
preinitializeRelaxedOrderingSections();
2022-11-18 09:56:59 +00:00
initRelaxedOrderingRegisters();
2022-11-24 16:57:18 +00:00
dispatchStaticRelaxedOrderingScheduler();
2022-11-18 09:56:59 +00:00
startBufferSize += RelaxedOrderingHelper::getSizeRegistersInit<GfxFamily>();
2022-11-16 17:24:04 +00:00
this->relaxedOrderingInitialized = true;
}
2020-05-18 18:48:43 +02:00
if (workloadMode == 1) {
dispatchDiagnosticModeSection();
2020-08-18 13:54:23 +02:00
startBufferSize += getDiagnosticModeSection();
2020-05-18 18:48:43 +02:00
}
2022-11-26 20:10:32 +00:00
dispatchSemaphoreSection(currentQueueWorkCount);
2020-01-15 17:02:47 +01:00
ringStart = submit(ringCommandStream.getGraphicsAllocation()->getGpuAddress(), startBufferSize);
2020-03-27 16:32:07 +01:00
performDiagnosticMode();
2020-01-15 17:02:47 +01:00
return ringStart;
}
return ret;
}
2020-03-26 20:13:10 +01:00
template <typename GfxFamily, typename Dispatcher>
2023-07-05 08:55:12 +00:00
bool DirectSubmissionHw<GfxFamily, Dispatcher>::stopRingBuffer(bool blocking) {
2021-08-25 15:19:44 +00:00
if (!ringStart) {
2023-10-06 11:47:33 +00:00
if (blocking) {
this->ensureRingCompletion();
}
2021-08-25 15:19:44 +00:00
return true;
}
2022-11-26 20:10:32 +00:00
bool relaxedOrderingSchedulerWasRequired = this->relaxedOrderingSchedulerRequired;
if (this->relaxedOrderingEnabled && this->relaxedOrderingSchedulerRequired) {
2022-11-19 18:25:04 +00:00
dispatchRelaxedOrderingQueueStall();
}
2020-01-15 17:02:47 +01:00
void *flushPtr = ringCommandStream.getSpace(0);
2023-01-20 03:04:15 +00:00
Dispatcher::dispatchCacheFlush(ringCommandStream, this->rootDeviceEnvironment, gpuVaForMiFlush);
2020-03-23 10:14:50 +01:00
if (disableMonitorFence) {
2020-03-18 14:14:04 +01:00
TagData currentTagData = {};
getTagAddressValue(currentTagData);
2023-01-25 19:28:09 +00:00
Dispatcher::dispatchMonitorFence(ringCommandStream, currentTagData.tagAddress, currentTagData.tagValue, this->rootDeviceEnvironment,
2022-10-11 18:47:13 +00:00
this->useNotifyForPostSync, this->partitionedMode, this->dcFlushRequired);
2020-03-18 14:14:04 +01:00
}
2020-03-27 16:32:07 +01:00
Dispatcher::dispatchStopCommandBuffer(ringCommandStream);
2021-07-30 09:56:58 +00:00
auto bytesToPad = Dispatcher::getSizeStartCommandBuffer() - Dispatcher::getSizeStopCommandBuffer();
2021-11-17 22:36:00 +00:00
EncodeNoop<GfxFamily>::emitNoop(ringCommandStream, bytesToPad);
EncodeNoop<GfxFamily>::alignToCacheLine(ringCommandStream);
2021-07-30 09:56:58 +00:00
2022-11-26 20:10:32 +00:00
cpuCachelineFlush(flushPtr, getSizeEnd(relaxedOrderingSchedulerWasRequired));
2022-11-09 14:43:02 +00:00
this->unblockGpu();
2020-01-15 17:02:47 +01:00
cpuCachelineFlush(semaphorePtr, MemoryConstants::cacheLineSize);
2021-08-25 15:19:44 +00:00
this->handleStopRingBuffer();
2021-07-30 09:56:58 +00:00
this->ringStart = false;
2023-07-05 08:55:12 +00:00
if (blocking) {
this->ensureRingCompletion();
}
2020-01-15 17:02:47 +01:00
return true;
}
2020-03-26 20:13:10 +01:00
template <typename GfxFamily, typename Dispatcher>
2022-11-26 20:10:32 +00:00
inline void DirectSubmissionHw<GfxFamily, Dispatcher>::dispatchSemaphoreSection(uint32_t value) {
2020-01-15 17:02:47 +01:00
using COMPARE_OPERATION = typename GfxFamily::MI_SEMAPHORE_WAIT::COMPARE_OPERATION;
2022-03-30 14:11:26 +00:00
2024-08-16 12:11:18 +00:00
if (debugManager.flags.DirectSubmissionPrintSemaphoreUsage.get() == 1) {
printf("DirectSubmission semaphore %" PRIx64 " programmed with value: %u\n", semaphoreGpuVa, value);
}
2020-08-18 11:27:44 +02:00
dispatchDisablePrefetcher(true);
2022-11-18 09:56:59 +00:00
2022-11-26 20:10:32 +00:00
if (this->relaxedOrderingEnabled && this->relaxedOrderingSchedulerRequired) {
2022-11-18 09:56:59 +00:00
dispatchRelaxedOrderingSchedulerSection(value);
} else {
2024-10-02 18:00:49 +00:00
bool switchOnUnsuccessful = false;
if (debugManager.flags.DirectSubmissionSwitchSemaphoreMode.get() != -1) {
switchOnUnsuccessful = !!debugManager.flags.DirectSubmissionSwitchSemaphoreMode.get();
}
2023-03-10 13:49:06 +00:00
EncodeSemaphore<GfxFamily>::addMiSemaphoreWaitCommand(ringCommandStream,
2022-11-18 09:56:59 +00:00
semaphoreGpuVa,
value,
2024-10-02 18:00:49 +00:00
COMPARE_OPERATION::COMPARE_OPERATION_SAD_GREATER_THAN_OR_EQUAL_SDD, false, false, false, switchOnUnsuccessful, nullptr);
2022-11-18 09:56:59 +00:00
}
2022-03-30 14:11:26 +00:00
2022-04-06 14:41:45 +00:00
if (miMemFenceRequired) {
2023-01-27 18:07:04 +00:00
MemorySynchronizationCommands<GfxFamily>::addAdditionalSynchronizationForDirectSubmission(ringCommandStream, this->gpuVaForAdditionalSynchronizationWA, true, rootDeviceEnvironment);
2022-03-30 14:11:26 +00:00
}
2020-08-18 11:27:44 +02:00
dispatchPrefetchMitigation();
dispatchDisablePrefetcher(false);
2020-01-15 17:02:47 +01:00
}
2020-03-26 20:13:10 +01:00
template <typename GfxFamily, typename Dispatcher>
2022-11-26 20:10:32 +00:00
inline size_t DirectSubmissionHw<GfxFamily, Dispatcher>::getSizeSemaphoreSection(bool relaxedOrderingSchedulerRequired) {
size_t semaphoreSize = (this->relaxedOrderingEnabled && relaxedOrderingSchedulerRequired) ? RelaxedOrderingHelper::DynamicSchedulerSizeAndOffsetSection<GfxFamily>::totalSize
2023-03-10 13:49:06 +00:00
: EncodeSemaphore<GfxFamily>::getSizeMiSemaphoreWait();
2020-08-18 11:27:44 +02:00
semaphoreSize += getSizePrefetchMitigation();
2022-08-10 11:52:06 +00:00
if (isDisablePrefetcherRequired) {
semaphoreSize += 2 * getSizeDisablePrefetcher();
}
2022-03-30 14:11:26 +00:00
2022-04-06 14:41:45 +00:00
if (miMemFenceRequired) {
2023-01-27 18:07:04 +00:00
semaphoreSize += MemorySynchronizationCommands<GfxFamily>::getSizeForSingleAdditionalSynchronizationForDirectSubmission(rootDeviceEnvironment);
2022-03-30 14:11:26 +00:00
}
2020-08-18 11:27:44 +02:00
return semaphoreSize;
2020-01-15 17:02:47 +01:00
}
2020-03-26 20:13:10 +01:00
template <typename GfxFamily, typename Dispatcher>
inline void DirectSubmissionHw<GfxFamily, Dispatcher>::dispatchStartSection(uint64_t gpuStartAddress) {
Dispatcher::dispatchStartCommandBuffer(ringCommandStream, gpuStartAddress);
2020-01-15 17:02:47 +01:00
}
2020-03-26 20:13:10 +01:00
template <typename GfxFamily, typename Dispatcher>
inline size_t DirectSubmissionHw<GfxFamily, Dispatcher>::getSizeStartSection() {
return Dispatcher::getSizeStartCommandBuffer();
2020-01-15 17:02:47 +01:00
}
2020-03-26 20:13:10 +01:00
template <typename GfxFamily, typename Dispatcher>
inline void DirectSubmissionHw<GfxFamily, Dispatcher>::dispatchSwitchRingBufferSection(uint64_t nextBufferGpuAddress) {
2021-03-29 12:23:38 +00:00
if (disableMonitorFence) {
TagData currentTagData = {};
getTagAddressValue(currentTagData);
2023-01-25 19:28:09 +00:00
Dispatcher::dispatchMonitorFence(ringCommandStream, currentTagData.tagAddress, currentTagData.tagValue, this->rootDeviceEnvironment,
2022-10-11 18:47:13 +00:00
this->useNotifyForPostSync, this->partitionedMode, this->dcFlushRequired);
2021-03-29 12:23:38 +00:00
}
2020-03-26 20:13:10 +01:00
Dispatcher::dispatchStartCommandBuffer(ringCommandStream, nextBufferGpuAddress);
2020-01-15 17:02:47 +01:00
}
2020-03-26 20:13:10 +01:00
template <typename GfxFamily, typename Dispatcher>
inline size_t DirectSubmissionHw<GfxFamily, Dispatcher>::getSizeSwitchRingBufferSection() {
2021-03-29 12:23:38 +00:00
size_t size = Dispatcher::getSizeStartCommandBuffer();
if (disableMonitorFence) {
2023-01-26 03:58:18 +00:00
size += Dispatcher::getSizeMonitorFence(rootDeviceEnvironment);
2021-03-29 12:23:38 +00:00
}
return size;
2020-01-15 17:02:47 +01:00
}
2020-03-26 20:13:10 +01:00
template <typename GfxFamily, typename Dispatcher>
2022-11-26 20:10:32 +00:00
inline size_t DirectSubmissionHw<GfxFamily, Dispatcher>::getSizeEnd(bool relaxedOrderingSchedulerRequired) {
2020-03-27 16:32:07 +01:00
size_t size = Dispatcher::getSizeStopCommandBuffer() +
2023-03-06 12:42:09 +00:00
Dispatcher::getSizeCacheFlush(rootDeviceEnvironment) +
2021-07-30 09:56:58 +00:00
(Dispatcher::getSizeStartCommandBuffer() - Dispatcher::getSizeStopCommandBuffer()) +
MemoryConstants::cacheLineSize;
2020-03-23 10:14:50 +01:00
if (disableMonitorFence) {
2023-01-26 03:58:18 +00:00
size += Dispatcher::getSizeMonitorFence(rootDeviceEnvironment);
2020-03-18 14:14:04 +01:00
}
2022-11-26 20:10:32 +00:00
if (this->relaxedOrderingEnabled && relaxedOrderingSchedulerRequired) {
2022-11-19 18:25:04 +00:00
size += getSizeDispatchRelaxedOrderingQueueStall();
}
2020-03-18 14:14:04 +01:00
return size;
2020-01-15 17:02:47 +01:00
}
2020-03-26 20:13:10 +01:00
template <typename GfxFamily, typename Dispatcher>
2023-09-20 11:32:46 +00:00
inline size_t DirectSubmissionHw<GfxFamily, Dispatcher>::getSizeDispatch(bool relaxedOrderingSchedulerRequired, bool returnPtrsRequired, bool dispatchMonitorFence) {
2022-11-26 20:10:32 +00:00
size_t size = getSizeSemaphoreSection(relaxedOrderingSchedulerRequired);
2020-03-23 10:14:50 +01:00
if (workloadMode == 0) {
2020-03-18 14:14:04 +01:00
size += getSizeStartSection();
2022-11-30 10:30:31 +00:00
if (this->relaxedOrderingEnabled && returnPtrsRequired) {
2022-11-21 14:55:39 +00:00
size += RelaxedOrderingHelper::getSizeReturnPtrRegs<GfxFamily>();
}
2020-03-23 10:14:50 +01:00
} else if (workloadMode == 1) {
2020-05-18 18:48:43 +02:00
size += getDiagnosticModeSection();
2020-03-18 14:14:04 +01:00
}
2022-11-16 17:24:04 +00:00
// mode 2 does not dispatch any commands
2020-03-18 14:14:04 +01:00
2020-03-23 10:14:50 +01:00
if (!disableCacheFlush) {
2023-03-06 12:42:09 +00:00
size += Dispatcher::getSizeCacheFlush(rootDeviceEnvironment);
2020-03-18 14:14:04 +01:00
}
2023-09-20 11:32:46 +00:00
if (dispatchMonitorFence) {
2023-01-26 03:58:18 +00:00
size += Dispatcher::getSizeMonitorFence(rootDeviceEnvironment);
2020-03-18 14:14:04 +01:00
}
2021-04-14 14:40:23 +00:00
size += getSizeNewResourceHandler();
2020-03-18 14:14:04 +01:00
return size;
}
2023-02-01 17:06:21 +00:00
template <typename GfxFamily, typename Dispatcher>
void DirectSubmissionHw<GfxFamily, Dispatcher>::updateRelaxedOrderingQueueSize(uint32_t newSize) {
this->currentRelaxedOrderingQueueSize = newSize;
EncodeStoreMemory<GfxFamily>::programStoreDataImm(this->ringCommandStream, this->relaxedOrderingQueueSizeLimitValueVa,
2024-03-07 10:51:48 +00:00
this->currentRelaxedOrderingQueueSize, 0, false, false,
nullptr);
2023-02-01 17:06:21 +00:00
}
2020-03-26 20:13:10 +01:00
template <typename GfxFamily, typename Dispatcher>
2023-09-20 11:32:46 +00:00
void *DirectSubmissionHw<GfxFamily, Dispatcher>::dispatchWorkloadSection(BatchBuffer &batchBuffer, bool dispatchMonitorFence) {
2020-03-18 14:14:04 +01:00
void *currentPosition = ringCommandStream.getSpace(0);
2024-08-07 12:44:39 +00:00
auto copyCmdBuffer = this->copyCommandBufferIntoRing(batchBuffer);
2020-03-23 10:14:50 +01:00
2023-11-30 08:32:25 +00:00
if (debugManager.flags.DirectSubmissionPrintBuffers.get()) {
2021-11-23 11:16:29 +00:00
printf("Client buffer:\n");
printf("Command buffer allocation - gpu address: %" PRIx64 " - %" PRIx64 ", cpu address: %p - %p, size: %zu \n",
batchBuffer.commandBufferAllocation->getGpuAddress(),
ptrOffset(batchBuffer.commandBufferAllocation->getGpuAddress(), batchBuffer.commandBufferAllocation->getUnderlyingBufferSize()),
batchBuffer.commandBufferAllocation->getUnderlyingBuffer(),
ptrOffset(batchBuffer.commandBufferAllocation->getUnderlyingBuffer(), batchBuffer.commandBufferAllocation->getUnderlyingBufferSize()),
batchBuffer.commandBufferAllocation->getUnderlyingBufferSize());
printf("Command buffer - start gpu address: %" PRIx64 " - %" PRIx64 ", start cpu address: %p - %p, start offset: %zu, used size: %zu \n",
ptrOffset(batchBuffer.commandBufferAllocation->getGpuAddress(), batchBuffer.startOffset),
2023-03-09 11:24:38 +00:00
ptrOffset(batchBuffer.commandBufferAllocation->getGpuAddress(), batchBuffer.usedSize),
2021-11-23 11:16:29 +00:00
ptrOffset(batchBuffer.commandBufferAllocation->getUnderlyingBuffer(), batchBuffer.startOffset),
2023-03-09 11:24:38 +00:00
ptrOffset(batchBuffer.commandBufferAllocation->getUnderlyingBuffer(), batchBuffer.usedSize),
2021-11-23 11:16:29 +00:00
batchBuffer.startOffset,
batchBuffer.usedSize);
2024-08-07 12:44:39 +00:00
printf("Ring buffer for submission - start gpu address: %" PRIx64 " - %" PRIx64 ", start cpu address: %p - %p, size: %zu, submission address: %" PRIx64 ", used size: %zu, copyCmdBuffer: %d \n",
ringCommandStream.getGraphicsAllocation()->getGpuAddress(),
ptrOffset(ringCommandStream.getGraphicsAllocation()->getGpuAddress(), ringCommandStream.getGraphicsAllocation()->getUnderlyingBufferSize()),
ringCommandStream.getGraphicsAllocation()->getUnderlyingBuffer(),
ptrOffset(ringCommandStream.getGraphicsAllocation()->getUnderlyingBuffer(), ringCommandStream.getGraphicsAllocation()->getUnderlyingBufferSize()),
ringCommandStream.getGraphicsAllocation()->getUnderlyingBufferSize(),
ptrOffset(ringCommandStream.getGraphicsAllocation()->getGpuAddress(), ringCommandStream.getUsed()),
ringCommandStream.getUsed(),
copyCmdBuffer);
2021-11-23 11:16:29 +00:00
}
2024-10-04 15:52:28 +00:00
if (batchBuffer.pagingFenceSemInfo.requiresProgrammingSemaphore()) {
2024-08-06 16:13:06 +00:00
dispatchSemaphoreForPagingFence(batchBuffer.pagingFenceSemInfo.pagingFenceValue);
}
2020-03-23 10:14:50 +01:00
if (workloadMode == 0) {
2020-03-18 14:14:04 +01:00
auto commandStreamAddress = ptrOffset(batchBuffer.commandBufferAllocation->getGpuAddress(), batchBuffer.startOffset);
void *returnCmd = batchBuffer.endCmdPtr;
2022-11-21 14:55:39 +00:00
LinearStream relaxedOrderingReturnPtrCmdStream;
2022-11-28 16:57:36 +00:00
if (this->relaxedOrderingEnabled && batchBuffer.hasRelaxedOrderingDependencies) {
2022-11-21 14:55:39 +00:00
// preallocate and patch after start section
auto relaxedOrderingReturnPtrCmds = ringCommandStream.getSpace(RelaxedOrderingHelper::getSizeReturnPtrRegs<GfxFamily>());
relaxedOrderingReturnPtrCmdStream.replaceBuffer(relaxedOrderingReturnPtrCmds, RelaxedOrderingHelper::getSizeReturnPtrRegs<GfxFamily>());
}
2023-02-22 07:29:42 +00:00
if (copyCmdBuffer) {
auto cmdStreamTaskPtr = ptrOffset(batchBuffer.stream->getCpuBase(), batchBuffer.startOffset);
auto sizeToCopy = ptrDiff(returnCmd, cmdStreamTaskPtr);
auto ringPtr = ringCommandStream.getSpace(sizeToCopy);
memcpy(ringPtr, cmdStreamTaskPtr, sizeToCopy);
} else {
dispatchStartSection(commandStreamAddress);
}
2022-11-24 15:00:51 +00:00
uint64_t returnGpuPointer = ringCommandStream.getCurrentGpuAddressPosition();
2020-03-18 14:14:04 +01:00
2022-11-28 16:57:36 +00:00
if (this->relaxedOrderingEnabled && batchBuffer.hasRelaxedOrderingDependencies) {
dispatchRelaxedOrderingReturnPtrRegs(relaxedOrderingReturnPtrCmdStream, returnGpuPointer);
2023-02-22 07:29:42 +00:00
} else if (!copyCmdBuffer) {
2022-11-21 14:55:39 +00:00
setReturnAddress(returnCmd, returnGpuPointer);
}
2020-03-23 10:14:50 +01:00
} else if (workloadMode == 1) {
2020-03-27 16:32:07 +01:00
DirectSubmissionDiagnostics::diagnosticModeOneDispatch(diagnostic.get());
2020-05-18 18:48:43 +02:00
dispatchDiagnosticModeSection();
2020-03-18 14:14:04 +01:00
}
2022-11-16 17:24:04 +00:00
// mode 2 does not dispatch any commands
2022-11-26 20:10:32 +00:00
if (this->relaxedOrderingEnabled && batchBuffer.hasRelaxedOrderingDependencies) {
2022-11-18 21:02:29 +00:00
dispatchTaskStoreSection(batchBuffer.taskStartAddress);
2023-02-01 17:06:21 +00:00
uint32_t expectedQueueSize = batchBuffer.numCsrClients * RelaxedOrderingHelper::queueSizeMultiplier;
2023-06-20 09:37:39 +00:00
expectedQueueSize = std::min(expectedQueueSize, RelaxedOrderingHelper::maxQueueSize);
2023-11-30 08:32:25 +00:00
if (expectedQueueSize > this->currentRelaxedOrderingQueueSize && debugManager.flags.DirectSubmissionRelaxedOrderingQueueSizeLimit.get() == -1) {
2023-02-01 17:06:21 +00:00
updateRelaxedOrderingQueueSize(expectedQueueSize);
}
2022-11-16 17:24:04 +00:00
}
2020-03-18 14:14:04 +01:00
2020-03-23 10:14:50 +01:00
if (!disableCacheFlush) {
2023-01-20 03:04:15 +00:00
Dispatcher::dispatchCacheFlush(ringCommandStream, this->rootDeviceEnvironment, gpuVaForMiFlush);
2020-03-18 14:14:04 +01:00
}
2023-09-20 11:32:46 +00:00
if (dispatchMonitorFence) {
2020-03-18 14:14:04 +01:00
TagData currentTagData = {};
getTagAddressValue(currentTagData);
2023-01-25 19:28:09 +00:00
Dispatcher::dispatchMonitorFence(ringCommandStream, currentTagData.tagAddress, currentTagData.tagValue, this->rootDeviceEnvironment,
2022-10-11 18:47:13 +00:00
this->useNotifyForPostSync, this->partitionedMode, this->dcFlushRequired);
2020-03-18 14:14:04 +01:00
}
2022-11-26 20:10:32 +00:00
dispatchSemaphoreSection(currentQueueWorkCount + 1);
2020-03-18 14:14:04 +01:00
return currentPosition;
}
2022-11-19 18:25:04 +00:00
template <typename GfxFamily, typename Dispatcher>
void DirectSubmissionHw<GfxFamily, Dispatcher>::dispatchRelaxedOrderingQueueStall() {
2023-09-11 17:20:00 +00:00
LinearStream bbStartStream(ringCommandStream.getSpace(EncodeBatchBufferStartOrEnd<GfxFamily>::getCmdSizeConditionalDataRegBatchBufferStart(false)),
EncodeBatchBufferStartOrEnd<GfxFamily>::getCmdSizeConditionalDataRegBatchBufferStart(false));
2022-11-24 15:00:51 +00:00
2024-09-06 13:29:29 +00:00
constexpr bool isBcs = Dispatcher::isCopy();
2024-09-04 21:24:17 +00:00
LriHelper<GfxFamily>::program(&ringCommandStream, RegisterOffsets::csGprR5, 1, true, isBcs);
2022-11-26 20:10:32 +00:00
dispatchSemaphoreSection(currentQueueWorkCount);
2022-11-24 15:00:51 +00:00
// patch conditional bb_start with current GPU address
EncodeBatchBufferStartOrEnd<GfxFamily>::programConditionalDataRegBatchBufferStart(bbStartStream, ringCommandStream.getCurrentGpuAddressPosition(),
2024-09-04 21:24:17 +00:00
RegisterOffsets::csGprR1, 0, CompareOperation::equal, false, false, isBcs);
2022-11-26 20:10:32 +00:00
relaxedOrderingSchedulerRequired = false;
2022-11-19 18:25:04 +00:00
}
template <typename GfxFamily, typename Dispatcher>
size_t DirectSubmissionHw<GfxFamily, Dispatcher>::getSizeDispatchRelaxedOrderingQueueStall() {
2022-11-26 20:10:32 +00:00
return getSizeSemaphoreSection(true) + sizeof(typename GfxFamily::MI_LOAD_REGISTER_IMM) +
2023-09-11 17:20:00 +00:00
EncodeBatchBufferStartOrEnd<GfxFamily>::getCmdSizeConditionalDataRegBatchBufferStart(false);
2022-11-19 18:25:04 +00:00
}
2022-11-21 14:55:39 +00:00
template <typename GfxFamily, typename Dispatcher>
2022-11-28 16:57:36 +00:00
void DirectSubmissionHw<GfxFamily, Dispatcher>::dispatchRelaxedOrderingReturnPtrRegs(LinearStream &cmdStream, uint64_t returnPtr) {
2024-04-19 14:20:27 +00:00
2024-09-06 13:29:29 +00:00
constexpr bool isBcs = Dispatcher::isCopy();
2024-09-04 21:24:17 +00:00
LriHelper<GfxFamily>::program(&cmdStream, RegisterOffsets::csGprR4, static_cast<uint32_t>(returnPtr & 0xFFFF'FFFFULL), true, isBcs);
LriHelper<GfxFamily>::program(&cmdStream, RegisterOffsets::csGprR4 + 4, static_cast<uint32_t>(returnPtr >> 32), true, isBcs);
2022-11-21 14:55:39 +00:00
2022-11-26 20:10:32 +00:00
uint64_t returnPtrAfterTaskStoreSection = returnPtr;
2022-11-28 16:57:36 +00:00
returnPtrAfterTaskStoreSection += RelaxedOrderingHelper::getSizeTaskStoreSection<GfxFamily>();
2022-11-21 14:55:39 +00:00
2024-09-04 21:24:17 +00:00
LriHelper<GfxFamily>::program(&cmdStream, RegisterOffsets::csGprR3, static_cast<uint32_t>(returnPtrAfterTaskStoreSection & 0xFFFF'FFFFULL), true, isBcs);
LriHelper<GfxFamily>::program(&cmdStream, RegisterOffsets::csGprR3 + 4, static_cast<uint32_t>(returnPtrAfterTaskStoreSection >> 32), true, isBcs);
2022-11-21 14:55:39 +00:00
}
2022-11-18 09:56:59 +00:00
template <typename GfxFamily, typename Dispatcher>
void DirectSubmissionHw<GfxFamily, Dispatcher>::initRelaxedOrderingRegisters() {
2024-04-19 14:20:27 +00:00
2024-09-06 13:29:29 +00:00
constexpr bool isBcs = Dispatcher::isCopy();
2024-09-04 21:24:17 +00:00
LriHelper<GfxFamily>::program(&ringCommandStream, RegisterOffsets::csGprR1, 0, true, isBcs);
LriHelper<GfxFamily>::program(&ringCommandStream, RegisterOffsets::csGprR1 + 4, 0, true, isBcs);
LriHelper<GfxFamily>::program(&ringCommandStream, RegisterOffsets::csGprR5, 0, true, isBcs);
LriHelper<GfxFamily>::program(&ringCommandStream, RegisterOffsets::csGprR5 + 4, 0, true, isBcs);
2022-11-18 09:56:59 +00:00
}
2022-11-16 17:24:04 +00:00
template <typename GfxFamily, typename Dispatcher>
2022-11-24 16:57:18 +00:00
void DirectSubmissionHw<GfxFamily, Dispatcher>::preinitializeRelaxedOrderingSections() {
// Task store section
2022-11-16 17:24:04 +00:00
preinitializedTaskStoreSection = std::make_unique<uint8_t[]>(RelaxedOrderingHelper::getSizeTaskStoreSection<GfxFamily>());
LinearStream stream(preinitializedTaskStoreSection.get(), RelaxedOrderingHelper::getSizeTaskStoreSection<GfxFamily>());
2023-12-13 16:09:52 +00:00
EncodeMiPredicate<GfxFamily>::encode(stream, MiPredicateType::disable);
2022-11-16 17:24:04 +00:00
2022-11-18 21:02:29 +00:00
uint64_t deferredTasksListGpuVa = deferredTasksListAllocation->getGpuAddress();
2024-04-19 14:20:27 +00:00
2024-09-06 13:29:29 +00:00
constexpr bool isBcs = Dispatcher::isCopy();
2024-09-04 21:24:17 +00:00
LriHelper<GfxFamily>::program(&stream, RegisterOffsets::csGprR6, static_cast<uint32_t>(deferredTasksListGpuVa & 0xFFFF'FFFFULL), true, isBcs);
LriHelper<GfxFamily>::program(&stream, RegisterOffsets::csGprR6 + 4, static_cast<uint32_t>(deferredTasksListGpuVa >> 32), true, isBcs);
2022-11-16 17:24:04 +00:00
// Task start VA
2024-09-04 21:24:17 +00:00
LriHelper<GfxFamily>::program(&stream, RegisterOffsets::csGprR7, 0, true, isBcs);
LriHelper<GfxFamily>::program(&stream, RegisterOffsets::csGprR7 + 4, 0, true, isBcs);
2022-11-16 17:24:04 +00:00
// Shift by 8 = multiply by 256. Address must by 64b aligned (shift by 6), but SHL accepts only 1, 2, 4, 8, 16 and 32
2024-09-04 21:24:17 +00:00
LriHelper<GfxFamily>::program(&stream, RegisterOffsets::csGprR8, 8, true, isBcs);
LriHelper<GfxFamily>::program(&stream, RegisterOffsets::csGprR8 + 4, 0, true, isBcs);
2022-11-16 17:24:04 +00:00
2023-02-27 16:21:40 +00:00
const uint32_t miMathMocs = this->rootDeviceEnvironment.getGmmHelper()->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER);
2022-11-16 17:24:04 +00:00
EncodeAluHelper<GfxFamily, 9> aluHelper;
2023-02-27 16:21:40 +00:00
aluHelper.setMocs(miMathMocs);
2023-12-19 07:40:17 +00:00
aluHelper.setNextAlu(AluRegisters::opcodeLoad, AluRegisters::srca, AluRegisters::gpr1);
aluHelper.setNextAlu(AluRegisters::opcodeLoad, AluRegisters::srcb, AluRegisters::gpr8);
aluHelper.setNextAlu(AluRegisters::opcodeShl);
aluHelper.setNextAlu(AluRegisters::opcodeStore, AluRegisters::gpr8, AluRegisters::accu);
aluHelper.setNextAlu(AluRegisters::opcodeLoad, AluRegisters::srca, AluRegisters::gpr8);
aluHelper.setNextAlu(AluRegisters::opcodeLoad, AluRegisters::srcb, AluRegisters::gpr6);
aluHelper.setNextAlu(AluRegisters::opcodeAdd);
aluHelper.setNextAlu(AluRegisters::opcodeStoreind, AluRegisters::accu, AluRegisters::gpr7);
aluHelper.setNextAlu(AluRegisters::opcodeFenceWr);
2022-11-16 17:24:04 +00:00
aluHelper.copyToCmdStream(stream);
2024-09-04 21:24:17 +00:00
EncodeMathMMIO<GfxFamily>::encodeIncrement(stream, AluRegisters::gpr1, isBcs);
2022-11-16 17:24:04 +00:00
UNRECOVERABLE_IF(stream.getUsed() != RelaxedOrderingHelper::getSizeTaskStoreSection<GfxFamily>());
2022-11-24 16:57:18 +00:00
// Scheduler section
preinitializedRelaxedOrderingScheduler = std::make_unique<uint8_t[]>(RelaxedOrderingHelper::DynamicSchedulerSizeAndOffsetSection<GfxFamily>::totalSize);
LinearStream schedulerStream(preinitializedRelaxedOrderingScheduler.get(), RelaxedOrderingHelper::DynamicSchedulerSizeAndOffsetSection<GfxFamily>::totalSize);
uint64_t schedulerStartAddress = relaxedOrderingSchedulerAllocation->getGpuAddress();
// 1. Init section
2024-09-04 21:24:17 +00:00
LriHelper<GfxFamily>::program(&schedulerStream, RegisterOffsets::csGprR11, 0, true, isBcs);
LriHelper<GfxFamily>::program(&schedulerStream, RegisterOffsets::csGprR9, 0, true, isBcs);
LriHelper<GfxFamily>::program(&schedulerStream, RegisterOffsets::csGprR9 + 4, 0, true, isBcs);
2022-11-24 16:57:18 +00:00
EncodeBatchBufferStartOrEnd<GfxFamily>::programBatchBufferStart(&schedulerStream, schedulerStartAddress, false, false, false);
2023-04-03 17:57:51 +00:00
// 2. Semaphore section
2022-11-24 16:57:18 +00:00
{
using COMPARE_OPERATION = typename GfxFamily::MI_SEMAPHORE_WAIT::COMPARE_OPERATION;
2023-12-13 16:09:52 +00:00
EncodeMiPredicate<GfxFamily>::encode(schedulerStream, MiPredicateType::disable);
2022-11-24 16:57:18 +00:00
2024-03-26 11:56:45 +00:00
EncodeSemaphore<GfxFamily>::addMiSemaphoreWaitCommand(schedulerStream, 0, 0, COMPARE_OPERATION::COMPARE_OPERATION_SAD_GREATER_THAN_OR_EQUAL_SDD, false, false, false, false, nullptr);
2022-11-24 16:57:18 +00:00
}
2023-04-03 17:57:51 +00:00
// 3. End section
2022-11-24 16:57:18 +00:00
{
2023-12-13 16:09:52 +00:00
EncodeMiPredicate<GfxFamily>::encode(schedulerStream, MiPredicateType::disable);
2022-11-24 16:57:18 +00:00
2024-09-04 21:24:17 +00:00
LriHelper<GfxFamily>::program(&schedulerStream, RegisterOffsets::csGprR5, 0, true, isBcs);
2022-11-24 16:57:18 +00:00
}
UNRECOVERABLE_IF(schedulerStream.getUsed() != RelaxedOrderingHelper::DynamicSchedulerSizeAndOffsetSection<GfxFamily>::totalSize);
2022-11-16 17:24:04 +00:00
}
template <typename GfxFamily, typename Dispatcher>
void DirectSubmissionHw<GfxFamily, Dispatcher>::dispatchTaskStoreSection(uint64_t taskStartSectionVa) {
using MI_LOAD_REGISTER_IMM = typename GfxFamily::MI_LOAD_REGISTER_IMM;
constexpr size_t patchOffset = EncodeMiPredicate<GfxFamily>::getCmdSize() + (2 * sizeof(MI_LOAD_REGISTER_IMM));
auto lri = reinterpret_cast<MI_LOAD_REGISTER_IMM *>(ptrOffset(preinitializedTaskStoreSection.get(), patchOffset));
lri->setDataDword(static_cast<uint32_t>(taskStartSectionVa & 0xFFFF'FFFFULL));
lri++;
lri->setDataDword(static_cast<uint32_t>(taskStartSectionVa >> 32));
auto dst = ringCommandStream.getSpace(RelaxedOrderingHelper::getSizeTaskStoreSection<GfxFamily>());
memcpy_s(dst, RelaxedOrderingHelper::getSizeTaskStoreSection<GfxFamily>(), preinitializedTaskStoreSection.get(), RelaxedOrderingHelper::getSizeTaskStoreSection<GfxFamily>());
}
2023-02-22 07:29:42 +00:00
template <typename GfxFamily, typename Dispatcher>
bool DirectSubmissionHw<GfxFamily, Dispatcher>::copyCommandBufferIntoRing(BatchBuffer &batchBuffer) {
2023-02-28 13:17:30 +00:00
/* Command buffer can't be copied into ring if implicit scaling or metrics are enabled,
because those features uses GPU VAs of command buffer which would be invalid after copy. */
2023-06-19 14:02:37 +00:00
auto ret = !batchBuffer.disableFlatRingBuffer &&
this->osContext.getNumSupportedDevices() == 1u &&
2023-02-27 08:47:23 +00:00
!this->rootDeviceEnvironment.executionEnvironment.areMetricsEnabled() &&
2023-02-22 07:29:42 +00:00
!batchBuffer.chainedBatchBuffer &&
batchBuffer.commandBufferAllocation &&
MemoryPoolHelper::isSystemMemoryPool(batchBuffer.commandBufferAllocation->getMemoryPool()) &&
!batchBuffer.hasRelaxedOrderingDependencies;
2023-11-30 08:32:25 +00:00
if (debugManager.flags.DirectSubmissionFlatRingBuffer.get() != -1) {
ret &= !!debugManager.flags.DirectSubmissionFlatRingBuffer.get();
2023-02-22 07:29:42 +00:00
}
return ret;
}
2023-12-29 11:58:09 +00:00
template <typename GfxFamily, typename Dispatcher>
size_t DirectSubmissionHw<GfxFamily, Dispatcher>::getUllsStateSize() {
size_t startSize = 0u;
if (!this->partitionConfigSet) {
startSize += getSizePartitionRegisterConfigurationSection();
}
if (this->miMemFenceRequired && !this->systemMemoryFenceAddressSet) {
startSize += getSizeSystemMemoryFenceAddress();
}
if (this->relaxedOrderingEnabled && !this->relaxedOrderingInitialized) {
startSize += RelaxedOrderingHelper::getSizeRegistersInit<GfxFamily>();
}
return startSize;
}
template <typename GfxFamily, typename Dispatcher>
void DirectSubmissionHw<GfxFamily, Dispatcher>::dispatchUllsState() {
if (!this->partitionConfigSet) {
dispatchPartitionRegisterConfiguration();
this->partitionConfigSet = true;
}
if (this->miMemFenceRequired && !this->systemMemoryFenceAddressSet) {
dispatchSystemMemoryFenceAddress();
this->systemMemoryFenceAddressSet = true;
}
if (this->relaxedOrderingEnabled && !this->relaxedOrderingInitialized) {
preinitializeRelaxedOrderingSections();
dispatchStaticRelaxedOrderingScheduler();
initRelaxedOrderingRegisters();
this->relaxedOrderingInitialized = true;
}
}
2020-03-26 20:13:10 +01:00
template <typename GfxFamily, typename Dispatcher>
bool DirectSubmissionHw<GfxFamily, Dispatcher>::dispatchCommandBuffer(BatchBuffer &batchBuffer, FlushStampTracker &flushStamp) {
2023-11-07 14:09:43 +00:00
lastSubmittedThrottle = batchBuffer.throttle;
2022-11-26 20:10:32 +00:00
bool relaxedOrderingSchedulerWillBeNeeded = (this->relaxedOrderingSchedulerRequired || batchBuffer.hasRelaxedOrderingDependencies);
2023-11-02 16:18:37 +00:00
bool inputRequiredMonitorFence = false;
if (this->inputMonitorFenceDispatchRequirement) {
inputRequiredMonitorFence = batchBuffer.dispatchMonitorFence;
} else {
inputRequiredMonitorFence = batchBuffer.hasStallingCmds;
}
bool dispatchMonitorFence = this->dispatchMonitorFenceRequired(inputRequiredMonitorFence);
2022-11-26 20:10:32 +00:00
2023-12-29 11:58:09 +00:00
size_t dispatchSize = this->getUllsStateSize() + getSizeDispatch(relaxedOrderingSchedulerWillBeNeeded, batchBuffer.hasRelaxedOrderingDependencies, dispatchMonitorFence);
2023-02-22 07:29:42 +00:00
if (this->copyCommandBufferIntoRing(batchBuffer)) {
dispatchSize += (batchBuffer.stream->getUsed() - batchBuffer.startOffset) - 2 * getSizeStartSection();
}
2024-10-04 15:52:28 +00:00
if (batchBuffer.pagingFenceSemInfo.requiresProgrammingSemaphore()) {
2024-08-06 16:13:06 +00:00
dispatchSize += getSizeSemaphoreForPagingFence();
}
2023-02-22 07:29:42 +00:00
2020-01-15 17:02:47 +01:00
size_t cycleSize = getSizeSwitchRingBufferSection();
2022-11-26 20:10:32 +00:00
size_t requiredMinimalSize = dispatchSize + cycleSize + getSizeEnd(relaxedOrderingSchedulerWillBeNeeded);
2022-11-16 17:24:04 +00:00
if (this->relaxedOrderingEnabled) {
2022-11-26 20:10:32 +00:00
requiredMinimalSize += +RelaxedOrderingHelper::getSizeReturnPtrRegs<GfxFamily>();
if (batchBuffer.hasStallingCmds && this->relaxedOrderingSchedulerRequired) {
2022-11-19 18:25:04 +00:00
requiredMinimalSize += getSizeDispatchRelaxedOrderingQueueStall();
}
2022-11-26 20:10:32 +00:00
if (batchBuffer.hasRelaxedOrderingDependencies) {
2023-02-01 17:06:21 +00:00
requiredMinimalSize += RelaxedOrderingHelper::getSizeTaskStoreSection<GfxFamily>() + sizeof(typename GfxFamily::MI_STORE_DATA_IMM);
2022-11-26 20:10:32 +00:00
}
2022-11-16 17:24:04 +00:00
}
2020-01-15 17:02:47 +01:00
2023-12-29 11:58:09 +00:00
auto needStart = !this->ringStart;
2023-11-17 13:14:41 +00:00
this->switchRingBuffersNeeded(requiredMinimalSize, batchBuffer.allocationsForResidency);
2020-01-15 17:02:47 +01:00
2023-12-29 11:58:09 +00:00
auto startVA = ringCommandStream.getCurrentGpuAddressPosition();
this->dispatchUllsState();
2022-11-26 20:10:32 +00:00
if (this->relaxedOrderingEnabled && batchBuffer.hasStallingCmds && this->relaxedOrderingSchedulerRequired) {
2022-11-19 18:25:04 +00:00
dispatchRelaxedOrderingQueueStall();
}
2022-11-26 20:10:32 +00:00
this->relaxedOrderingSchedulerRequired |= batchBuffer.hasRelaxedOrderingDependencies;
2021-04-14 14:40:23 +00:00
handleNewResourcesSubmission();
2023-09-20 11:32:46 +00:00
void *currentPosition = dispatchWorkloadSection(batchBuffer, dispatchMonitorFence);
2020-01-15 17:02:47 +01:00
2021-07-30 09:56:58 +00:00
cpuCachelineFlush(currentPosition, dispatchSize);
2020-01-15 17:02:47 +01:00
2024-10-04 15:52:28 +00:00
auto requiresBlockingResidencyHandling = batchBuffer.pagingFenceSemInfo.requiresBlockingResidencyHandling;
2024-08-06 16:13:06 +00:00
if (!this->submitCommandBufferToGpu(needStart, startVA, requiredMinimalSize, requiresBlockingResidencyHandling)) {
2023-12-29 11:58:09 +00:00
return false;
}
2022-04-01 11:47:24 +00:00
2020-01-15 17:02:47 +01:00
cpuCachelineFlush(semaphorePtr, MemoryConstants::cacheLineSize);
currentQueueWorkCount++;
2020-03-27 16:32:07 +01:00
DirectSubmissionDiagnostics::diagnosticModeOneSubmit(diagnostic.get());
2021-07-30 09:56:58 +00:00
2023-11-02 16:18:37 +00:00
uint64_t flushValue = updateTagValue(dispatchMonitorFence);
2023-11-02 16:30:57 +00:00
if (flushValue == DirectSubmissionHw<GfxFamily, Dispatcher>::updateTagValueFail) {
return false;
}
2020-01-15 17:02:47 +01:00
flushStamp.setStamp(flushValue);
2024-01-10 11:56:58 +00:00
return this->ringStart;
2020-01-15 17:02:47 +01:00
}
2023-12-29 11:58:09 +00:00
template <typename GfxFamily, typename Dispatcher>
2024-08-06 16:13:06 +00:00
bool DirectSubmissionHw<GfxFamily, Dispatcher>::submitCommandBufferToGpu(bool needStart, uint64_t gpuAddress, size_t size, bool needWait) {
2023-12-29 11:58:09 +00:00
if (needStart) {
2024-01-10 11:56:58 +00:00
this->ringStart = this->submit(gpuAddress, size);
return this->ringStart;
2023-12-29 11:58:09 +00:00
} else {
2024-08-06 16:13:06 +00:00
if (needWait) {
handleResidency();
}
2023-12-29 11:58:09 +00:00
this->unblockGpu();
return true;
}
}
2020-03-26 20:13:10 +01:00
template <typename GfxFamily, typename Dispatcher>
inline void DirectSubmissionHw<GfxFamily, Dispatcher>::setReturnAddress(void *returnCmd, uint64_t returnAddress) {
2020-01-15 17:02:47 +01:00
using MI_BATCH_BUFFER_START = typename GfxFamily::MI_BATCH_BUFFER_START;
2020-04-08 18:33:03 +02:00
MI_BATCH_BUFFER_START cmd = GfxFamily::cmdInitBatchBufferStart;
2021-12-16 18:02:45 +00:00
cmd.setBatchBufferStartAddress(returnAddress);
2020-04-08 18:33:03 +02:00
cmd.setAddressSpaceIndicator(MI_BATCH_BUFFER_START::ADDRESS_SPACE_INDICATOR_PPGTT);
2020-01-15 17:02:47 +01:00
MI_BATCH_BUFFER_START *returnBBStart = static_cast<MI_BATCH_BUFFER_START *>(returnCmd);
2020-04-08 18:33:03 +02:00
*returnBBStart = cmd;
2020-01-15 17:02:47 +01:00
}
2020-08-18 11:27:44 +02:00
template <typename GfxFamily, typename Dispatcher>
2021-04-14 14:40:23 +00:00
inline void DirectSubmissionHw<GfxFamily, Dispatcher>::handleNewResourcesSubmission() {
2023-06-29 12:53:36 +00:00
if (isNewResourceHandleNeeded()) {
auto tlbFlushCounter = this->osContext.peekTlbFlushCounter();
Dispatcher::dispatchTlbFlush(this->ringCommandStream, this->gpuVaForMiFlush, this->rootDeviceEnvironment);
this->osContext.setTlbFlushed(tlbFlushCounter);
}
}
template <typename GfxFamily, typename Dispatcher>
size_t DirectSubmissionHw<GfxFamily, Dispatcher>::getSizeNewResourceHandler() {
// Overestimate to avoid race
return Dispatcher::getSizeTlbFlush(this->rootDeviceEnvironment);
2021-04-14 14:40:23 +00:00
}
template <typename GfxFamily, typename Dispatcher>
2023-06-29 12:53:36 +00:00
bool DirectSubmissionHw<GfxFamily, Dispatcher>::isNewResourceHandleNeeded() {
auto newResourcesBound = this->osContext.isTlbFlushRequired();
2023-11-30 08:32:25 +00:00
if (debugManager.flags.DirectSubmissionNewResourceTlbFlush.get() != -1) {
newResourcesBound = debugManager.flags.DirectSubmissionNewResourceTlbFlush.get();
2023-06-29 12:53:36 +00:00
}
return newResourcesBound;
2021-04-14 14:40:23 +00:00
}
2023-08-07 13:33:24 +00:00
template <typename GfxFamily, typename Dispatcher>
2023-11-17 13:14:41 +00:00
void DirectSubmissionHw<GfxFamily, Dispatcher>::switchRingBuffersNeeded(size_t size, ResidencyContainer *allocationsForResidency) {
2023-08-07 13:33:24 +00:00
if (this->ringCommandStream.getAvailableSpace() < size) {
2023-11-17 13:14:41 +00:00
this->switchRingBuffers(allocationsForResidency);
2023-08-07 13:33:24 +00:00
}
}
2020-07-17 11:28:59 +02:00
template <typename GfxFamily, typename Dispatcher>
2023-11-17 13:14:41 +00:00
inline uint64_t DirectSubmissionHw<GfxFamily, Dispatcher>::switchRingBuffers(ResidencyContainer *allocationsForResidency) {
2020-07-17 11:28:59 +02:00
GraphicsAllocation *nextRingBuffer = switchRingBuffersAllocations();
void *flushPtr = ringCommandStream.getSpace(0);
2022-11-24 15:00:51 +00:00
uint64_t currentBufferGpuVa = ringCommandStream.getCurrentGpuAddressPosition();
2020-07-17 11:28:59 +02:00
if (ringStart) {
dispatchSwitchRingBufferSection(nextRingBuffer->getGpuAddress());
cpuCachelineFlush(flushPtr, getSizeSwitchRingBufferSection());
}
ringCommandStream.replaceBuffer(nextRingBuffer->getUnderlyingBuffer(), ringCommandStream.getMaxAvailableSpace());
ringCommandStream.replaceGraphicsAllocation(nextRingBuffer);
2023-11-17 13:14:41 +00:00
handleSwitchRingBuffers(allocationsForResidency);
2020-07-17 11:28:59 +02:00
return currentBufferGpuVa;
}
2020-03-26 20:13:10 +01:00
template <typename GfxFamily, typename Dispatcher>
inline GraphicsAllocation *DirectSubmissionHw<GfxFamily, Dispatcher>::switchRingBuffersAllocations() {
2022-06-01 10:05:07 +00:00
this->previousRingBuffer = this->currentRingBuffer;
2020-01-15 17:02:47 +01:00
GraphicsAllocation *nextAllocation = nullptr;
2022-06-01 10:05:07 +00:00
for (uint32_t ringBufferIndex = 0; ringBufferIndex < this->ringBuffers.size(); ringBufferIndex++) {
if (ringBufferIndex != this->currentRingBuffer && this->isCompleted(ringBufferIndex)) {
this->currentRingBuffer = ringBufferIndex;
nextAllocation = this->ringBuffers[ringBufferIndex].ringBuffer;
break;
}
2020-01-15 17:02:47 +01:00
}
2022-06-01 10:05:07 +00:00
if (nextAllocation == nullptr) {
if (this->ringBuffers.size() == this->maxRingBufferCount) {
this->currentRingBuffer = (this->currentRingBuffer + 1) % this->ringBuffers.size();
nextAllocation = this->ringBuffers[this->currentRingBuffer].ringBuffer;
} else {
bool isMultiOsContextCapable = osContext.getNumSupportedDevices() > 1u;
constexpr size_t minimumRequiredSize = 256 * MemoryConstants::kiloByte;
constexpr size_t additionalAllocationSize = MemoryConstants::pageSize;
const auto allocationSize = alignUp(minimumRequiredSize + additionalAllocationSize, MemoryConstants::pageSize64k);
const AllocationProperties commandStreamAllocationProperties{rootDeviceIndex,
true, allocationSize,
2023-12-11 14:24:36 +00:00
AllocationType::ringBuffer,
2022-06-01 10:05:07 +00:00
isMultiOsContextCapable, false, osContext.getDeviceBitfield()};
nextAllocation = memoryManager->allocateGraphicsMemoryWithProperties(commandStreamAllocationProperties);
this->currentRingBuffer = static_cast<uint32_t>(this->ringBuffers.size());
this->ringBuffers.emplace_back(0ull, nextAllocation);
2023-12-13 09:17:24 +00:00
auto ret = memoryOperationHandler->makeResidentWithinOsContext(&this->osContext, ArrayRef<GraphicsAllocation *>(&nextAllocation, 1u), false) == MemoryOperationsStatus::success;
2022-06-01 10:05:07 +00:00
UNRECOVERABLE_IF(!ret);
}
}
UNRECOVERABLE_IF(this->currentRingBuffer == this->previousRingBuffer);
2020-01-15 17:02:47 +01:00
return nextAllocation;
}
2023-08-30 10:05:58 +00:00
template <typename GfxFamily, typename Dispatcher>
2023-09-20 12:54:50 +00:00
bool DirectSubmissionHw<GfxFamily, Dispatcher>::dispatchMonitorFenceRequired(bool requireMonitorFence) {
2023-09-20 11:32:46 +00:00
return !this->disableMonitorFence;
2023-08-30 10:05:58 +00:00
}
2020-03-26 20:13:10 +01:00
template <typename GfxFamily, typename Dispatcher>
void DirectSubmissionHw<GfxFamily, Dispatcher>::deallocateResources() {
2022-06-01 10:05:07 +00:00
for (uint32_t ringBufferIndex = 0; ringBufferIndex < this->ringBuffers.size(); ringBufferIndex++) {
memoryManager->freeGraphicsMemory(this->ringBuffers[ringBufferIndex].ringBuffer);
2020-01-15 17:02:47 +01:00
}
2022-06-01 10:05:07 +00:00
this->ringBuffers.clear();
2020-01-15 17:02:47 +01:00
if (semaphores) {
memoryManager->freeGraphicsMemory(semaphores);
semaphores = nullptr;
}
2022-11-16 17:24:04 +00:00
memoryManager->freeGraphicsMemory(deferredTasksListAllocation);
2022-11-24 16:57:18 +00:00
memoryManager->freeGraphicsMemory(relaxedOrderingSchedulerAllocation);
2020-01-15 17:02:47 +01:00
}
2020-03-27 16:32:07 +01:00
template <typename GfxFamily, typename Dispatcher>
void DirectSubmissionHw<GfxFamily, Dispatcher>::createDiagnostic() {
if (directSubmissionDiagnosticAvailable) {
2023-11-30 08:32:25 +00:00
workloadMode = debugManager.flags.DirectSubmissionEnableDebugBuffer.get();
2020-03-27 16:32:07 +01:00
if (workloadMode > 0) {
2023-11-30 08:32:25 +00:00
disableCacheFlush = debugManager.flags.DirectSubmissionDisableCacheFlush.get();
disableMonitorFence = debugManager.flags.DirectSubmissionDisableMonitorFence.get();
uint32_t executions = static_cast<uint32_t>(debugManager.flags.DirectSubmissionDiagnosticExecutionCount.get());
2020-04-03 14:43:13 +02:00
diagnostic = std::make_unique<DirectSubmissionDiagnosticsCollector>(
executions,
workloadMode == 1,
2023-11-30 08:32:25 +00:00
debugManager.flags.DirectSubmissionBufferPlacement.get(),
debugManager.flags.DirectSubmissionSemaphorePlacement.get(),
2020-04-03 14:43:13 +02:00
workloadMode,
disableCacheFlush,
disableMonitorFence);
2020-03-27 16:32:07 +01:00
}
}
}
template <typename GfxFamily, typename Dispatcher>
void DirectSubmissionHw<GfxFamily, Dispatcher>::initDiagnostic(bool &submitOnInit) {
if (directSubmissionDiagnosticAvailable) {
if (diagnostic.get()) {
submitOnInit = true;
diagnostic->diagnosticModeAllocation();
}
}
}
template <typename GfxFamily, typename Dispatcher>
void DirectSubmissionHw<GfxFamily, Dispatcher>::performDiagnosticMode() {
if (directSubmissionDiagnosticAvailable) {
if (diagnostic.get()) {
diagnostic->diagnosticModeDiagnostic();
2020-05-18 18:48:43 +02:00
if (workloadMode == 1) {
diagnostic->diagnosticModeOneWait(workloadModeOneStoreAddress, workloadModeOneExpectedValue);
}
2020-03-27 16:32:07 +01:00
BatchBuffer dummyBuffer = {};
FlushStampTracker dummyTracker(true);
for (uint32_t execution = 0; execution < diagnostic->getExecutionsCount(); execution++) {
dispatchCommandBuffer(dummyBuffer, dummyTracker);
if (workloadMode == 1) {
2020-05-18 18:48:43 +02:00
diagnostic->diagnosticModeOneWaitCollect(execution, workloadModeOneStoreAddress, workloadModeOneExpectedValue);
2020-03-27 16:32:07 +01:00
}
}
workloadMode = 0;
2020-09-22 17:49:06 +02:00
disableCacheFlush = UllsDefaults::defaultDisableCacheFlush;
disableMonitorFence = UllsDefaults::defaultDisableMonitorFence;
2020-03-27 16:32:07 +01:00
diagnostic.reset(nullptr);
}
}
}
2020-05-18 18:48:43 +02:00
template <typename GfxFamily, typename Dispatcher>
void DirectSubmissionHw<GfxFamily, Dispatcher>::dispatchDiagnosticModeSection() {
workloadModeOneExpectedValue++;
uint64_t storeAddress = semaphoreGpuVa;
storeAddress += ptrDiff(workloadModeOneStoreAddress, semaphorePtr);
Dispatcher::dispatchStoreDwordCommand(ringCommandStream, storeAddress, workloadModeOneExpectedValue);
}
template <typename GfxFamily, typename Dispatcher>
size_t DirectSubmissionHw<GfxFamily, Dispatcher>::getDiagnosticModeSection() {
return Dispatcher::getSizeStoreDwordCommand();
}
2022-04-06 14:41:45 +00:00
template <typename GfxFamily, typename Dispatcher>
void DirectSubmissionHw<GfxFamily, Dispatcher>::dispatchSystemMemoryFenceAddress() {
2024-12-03 11:40:08 +00:00
this->makeGlobalFenceAlwaysResident();
2023-09-12 17:51:43 +00:00
EncodeMemoryFence<GfxFamily>::encodeSystemMemoryFence(ringCommandStream, this->globalFenceAllocation);
2022-04-06 14:41:45 +00:00
}
template <typename GfxFamily, typename Dispatcher>
size_t DirectSubmissionHw<GfxFamily, Dispatcher>::getSizeSystemMemoryFenceAddress() {
return EncodeMemoryFence<GfxFamily>::getSystemMemoryFenceSize();
}
2023-03-24 09:01:23 +00:00
template <typename GfxFamily, typename Dispatcher>
uint32_t DirectSubmissionHw<GfxFamily, Dispatcher>::getDispatchErrorCode() {
return dispatchErrorCode;
}
2024-08-06 16:13:06 +00:00
template <typename GfxFamily, typename Dispatcher>
inline void DirectSubmissionHw<GfxFamily, Dispatcher>::dispatchSemaphoreForPagingFence(uint64_t value) {
using COMPARE_OPERATION = typename GfxFamily::MI_SEMAPHORE_WAIT::COMPARE_OPERATION;
EncodeSemaphore<GfxFamily>::addMiSemaphoreWaitCommand(ringCommandStream,
this->gpuVaForPagingFenceSemaphore,
value,
COMPARE_OPERATION::COMPARE_OPERATION_SAD_GREATER_THAN_OR_EQUAL_SDD, false, false, false, false, nullptr);
}
template <typename GfxFamily, typename Dispatcher>
inline size_t DirectSubmissionHw<GfxFamily, Dispatcher>::getSizeSemaphoreForPagingFence() {
return EncodeSemaphore<GfxFamily>::getSizeMiSemaphoreWait();
}
2020-01-15 17:02:47 +01:00
} // namespace NEO