2018-10-02 21:09:06 +08:00
|
|
|
/*
|
2020-01-13 20:15:03 +08:00
|
|
|
* Copyright (C) 2017-2020 Intel Corporation
|
2018-10-02 21:09:06 +08:00
|
|
|
*
|
|
|
|
* SPDX-License-Identifier: MIT
|
|
|
|
*
|
|
|
|
*/
|
|
|
|
|
|
|
|
#pragma once
|
2020-02-24 05:44:01 +08:00
|
|
|
#include "shared/source/command_stream/command_stream_receiver.h"
|
|
|
|
#include "shared/source/helpers/aligned_memory.h"
|
|
|
|
#include "shared/source/helpers/debug_helpers.h"
|
|
|
|
#include "shared/source/helpers/engine_node_helper.h"
|
|
|
|
#include "shared/source/helpers/hw_helper.h"
|
|
|
|
#include "shared/source/indirect_heap/indirect_heap.h"
|
|
|
|
#include "shared/source/memory_manager/graphics_allocation.h"
|
|
|
|
#include "shared/source/os_interface/os_context.h"
|
|
|
|
#include "shared/source/utilities/tag_allocator.h"
|
2020-02-24 17:22:30 +08:00
|
|
|
|
2020-02-23 05:50:57 +08:00
|
|
|
#include "opencl/source/command_queue/command_queue.h"
|
2020-02-28 16:07:07 +08:00
|
|
|
#include "opencl/source/command_queue/command_queue_hw.h"
|
2020-02-23 05:50:57 +08:00
|
|
|
#include "opencl/source/command_queue/gpgpu_walker.h"
|
|
|
|
#include "opencl/source/command_queue/local_id_gen.h"
|
|
|
|
#include "opencl/source/event/perf_counter.h"
|
|
|
|
#include "opencl/source/event/user_event.h"
|
|
|
|
#include "opencl/source/helpers/hardware_commands_helper.h"
|
|
|
|
#include "opencl/source/helpers/queue_helpers.h"
|
|
|
|
#include "opencl/source/helpers/validators.h"
|
|
|
|
#include "opencl/source/mem_obj/mem_obj.h"
|
2019-05-13 20:15:03 +08:00
|
|
|
|
2020-04-27 03:48:59 +08:00
|
|
|
#include "pipe_control_args.h"
|
|
|
|
|
2019-05-13 20:15:03 +08:00
|
|
|
#include <algorithm>
|
|
|
|
#include <cmath>
|
2018-10-02 21:09:06 +08:00
|
|
|
|
2019-03-26 18:59:46 +08:00
|
|
|
namespace NEO {
|
2018-10-02 21:09:06 +08:00
|
|
|
|
2019-05-13 20:15:03 +08:00
|
|
|
// Performs ReadModifyWrite operation on value of a register: Register = Register Operation Mask
|
2018-10-02 21:09:06 +08:00
|
|
|
template <typename GfxFamily>
|
2019-05-13 20:15:03 +08:00
|
|
|
void GpgpuWalkerHelper<GfxFamily>::addAluReadModifyWriteRegister(
|
2019-07-17 21:39:53 +08:00
|
|
|
LinearStream *pCommandStream,
|
2019-05-13 20:15:03 +08:00
|
|
|
uint32_t aluRegister,
|
2020-02-26 02:23:04 +08:00
|
|
|
AluRegisters operation,
|
2019-05-13 20:15:03 +08:00
|
|
|
uint32_t mask) {
|
|
|
|
// Load "Register" value into CS_GPR_R0
|
2020-04-28 00:55:26 +08:00
|
|
|
using MI_LOAD_REGISTER_REG = typename GfxFamily::MI_LOAD_REGISTER_REG;
|
|
|
|
using MI_MATH = typename GfxFamily::MI_MATH;
|
|
|
|
using MI_MATH_ALU_INST_INLINE = typename GfxFamily::MI_MATH_ALU_INST_INLINE;
|
|
|
|
|
2019-05-13 20:15:03 +08:00
|
|
|
auto pCmd = pCommandStream->getSpaceForCmd<MI_LOAD_REGISTER_REG>();
|
2020-04-28 00:55:26 +08:00
|
|
|
MI_LOAD_REGISTER_REG cmdReg = GfxFamily::cmdInitLoadRegisterReg;
|
|
|
|
cmdReg.setSourceRegisterAddress(aluRegister);
|
|
|
|
cmdReg.setDestinationRegisterAddress(CS_GPR_R0);
|
|
|
|
*pCmd = cmdReg;
|
2019-05-13 20:15:03 +08:00
|
|
|
|
|
|
|
// Load "Mask" into CS_GPR_R1
|
|
|
|
typedef typename GfxFamily::MI_LOAD_REGISTER_IMM MI_LOAD_REGISTER_IMM;
|
|
|
|
auto pCmd2 = pCommandStream->getSpaceForCmd<MI_LOAD_REGISTER_IMM>();
|
2020-04-28 00:55:26 +08:00
|
|
|
MI_LOAD_REGISTER_IMM cmdImm = GfxFamily::cmdInitLoadRegisterImm;
|
|
|
|
cmdImm.setRegisterOffset(CS_GPR_R1);
|
|
|
|
cmdImm.setDataDword(mask);
|
|
|
|
*pCmd2 = cmdImm;
|
2019-05-13 20:15:03 +08:00
|
|
|
|
|
|
|
// Add instruction MI_MATH with 4 MI_MATH_ALU_INST_INLINE operands
|
|
|
|
auto pCmd3 = reinterpret_cast<uint32_t *>(pCommandStream->getSpace(sizeof(MI_MATH) + NUM_ALU_INST_FOR_READ_MODIFY_WRITE * sizeof(MI_MATH_ALU_INST_INLINE)));
|
2020-04-28 00:55:26 +08:00
|
|
|
MI_MATH mathCmd;
|
|
|
|
mathCmd.DW0.Value = 0x0;
|
|
|
|
mathCmd.DW0.BitField.InstructionType = MI_MATH::COMMAND_TYPE_MI_COMMAND;
|
|
|
|
mathCmd.DW0.BitField.InstructionOpcode = MI_MATH::MI_COMMAND_OPCODE_MI_MATH;
|
2019-05-13 20:15:03 +08:00
|
|
|
// 0x3 - 5 Dwords length cmd (-2): 1 for MI_MATH, 4 for MI_MATH_ALU_INST_INLINE
|
2020-04-28 00:55:26 +08:00
|
|
|
mathCmd.DW0.BitField.DwordLength = NUM_ALU_INST_FOR_READ_MODIFY_WRITE - 1;
|
|
|
|
*reinterpret_cast<MI_MATH *>(pCmd3) = mathCmd;
|
|
|
|
|
2019-05-13 20:15:03 +08:00
|
|
|
pCmd3++;
|
|
|
|
MI_MATH_ALU_INST_INLINE *pAluParam = reinterpret_cast<MI_MATH_ALU_INST_INLINE *>(pCmd3);
|
2020-04-28 00:55:26 +08:00
|
|
|
MI_MATH_ALU_INST_INLINE cmdAluParam;
|
|
|
|
cmdAluParam.DW0.Value = 0x0;
|
2019-05-13 20:15:03 +08:00
|
|
|
|
|
|
|
// Setup first operand of MI_MATH - load CS_GPR_R0 into register A
|
2020-04-28 00:55:26 +08:00
|
|
|
cmdAluParam.DW0.BitField.ALUOpcode =
|
2020-02-26 02:23:04 +08:00
|
|
|
static_cast<uint32_t>(AluRegisters::OPCODE_LOAD);
|
2020-04-28 00:55:26 +08:00
|
|
|
cmdAluParam.DW0.BitField.Operand1 =
|
2020-02-26 02:23:04 +08:00
|
|
|
static_cast<uint32_t>(AluRegisters::R_SRCA);
|
2020-04-28 00:55:26 +08:00
|
|
|
cmdAluParam.DW0.BitField.Operand2 =
|
2020-02-26 02:23:04 +08:00
|
|
|
static_cast<uint32_t>(AluRegisters::R_0);
|
2020-04-28 00:55:26 +08:00
|
|
|
*pAluParam = cmdAluParam;
|
2019-05-13 20:15:03 +08:00
|
|
|
pAluParam++;
|
|
|
|
|
2020-04-28 00:55:26 +08:00
|
|
|
cmdAluParam.DW0.Value = 0x0;
|
2019-05-13 20:15:03 +08:00
|
|
|
// Setup second operand of MI_MATH - load CS_GPR_R1 into register B
|
2020-04-28 00:55:26 +08:00
|
|
|
cmdAluParam.DW0.BitField.ALUOpcode =
|
2020-02-26 02:23:04 +08:00
|
|
|
static_cast<uint32_t>(AluRegisters::OPCODE_LOAD);
|
2020-04-28 00:55:26 +08:00
|
|
|
cmdAluParam.DW0.BitField.Operand1 =
|
2020-02-26 02:23:04 +08:00
|
|
|
static_cast<uint32_t>(AluRegisters::R_SRCB);
|
2020-04-28 00:55:26 +08:00
|
|
|
cmdAluParam.DW0.BitField.Operand2 =
|
2020-02-26 02:23:04 +08:00
|
|
|
static_cast<uint32_t>(AluRegisters::R_1);
|
2020-04-28 00:55:26 +08:00
|
|
|
*pAluParam = cmdAluParam;
|
2019-05-13 20:15:03 +08:00
|
|
|
pAluParam++;
|
|
|
|
|
2020-04-28 00:55:26 +08:00
|
|
|
cmdAluParam.DW0.Value = 0x0;
|
2019-05-13 20:15:03 +08:00
|
|
|
// Setup third operand of MI_MATH - "Operation" on registers A and B
|
2020-04-28 00:55:26 +08:00
|
|
|
cmdAluParam.DW0.BitField.ALUOpcode = static_cast<uint32_t>(operation);
|
|
|
|
cmdAluParam.DW0.BitField.Operand1 = 0;
|
|
|
|
cmdAluParam.DW0.BitField.Operand2 = 0;
|
|
|
|
*pAluParam = cmdAluParam;
|
2019-05-13 20:15:03 +08:00
|
|
|
pAluParam++;
|
|
|
|
|
2020-04-28 00:55:26 +08:00
|
|
|
cmdAluParam.DW0.Value = 0x0;
|
2019-05-13 20:15:03 +08:00
|
|
|
// Setup fourth operand of MI_MATH - store result into CS_GPR_R0
|
2020-04-28 00:55:26 +08:00
|
|
|
cmdAluParam.DW0.BitField.ALUOpcode =
|
2020-02-26 02:23:04 +08:00
|
|
|
static_cast<uint32_t>(AluRegisters::OPCODE_STORE);
|
2020-04-28 00:55:26 +08:00
|
|
|
cmdAluParam.DW0.BitField.Operand1 =
|
2020-02-26 02:23:04 +08:00
|
|
|
static_cast<uint32_t>(AluRegisters::R_0);
|
2020-04-28 00:55:26 +08:00
|
|
|
cmdAluParam.DW0.BitField.Operand2 =
|
2020-02-26 02:23:04 +08:00
|
|
|
static_cast<uint32_t>(AluRegisters::R_ACCU);
|
2020-04-28 00:55:26 +08:00
|
|
|
*pAluParam = cmdAluParam;
|
2019-05-13 20:15:03 +08:00
|
|
|
|
|
|
|
// LOAD value of CS_GPR_R0 into "Register"
|
|
|
|
auto pCmd4 = pCommandStream->getSpaceForCmd<MI_LOAD_REGISTER_REG>();
|
2020-04-28 00:55:26 +08:00
|
|
|
cmdReg = GfxFamily::cmdInitLoadRegisterReg;
|
|
|
|
cmdReg.setSourceRegisterAddress(CS_GPR_R0);
|
|
|
|
cmdReg.setDestinationRegisterAddress(aluRegister);
|
|
|
|
*pCmd4 = cmdReg;
|
2019-05-13 20:15:03 +08:00
|
|
|
|
|
|
|
// Add PIPE_CONTROL to flush caches
|
|
|
|
auto pCmd5 = pCommandStream->getSpaceForCmd<PIPE_CONTROL>();
|
2020-04-28 00:55:26 +08:00
|
|
|
PIPE_CONTROL cmdPipeControl = GfxFamily::cmdInitPipeControl;
|
|
|
|
cmdPipeControl.setCommandStreamerStallEnable(true);
|
|
|
|
cmdPipeControl.setDcFlushEnable(true);
|
|
|
|
cmdPipeControl.setTextureCacheInvalidationEnable(true);
|
|
|
|
cmdPipeControl.setPipeControlFlushEnable(true);
|
|
|
|
cmdPipeControl.setStateCacheInvalidationEnable(true);
|
|
|
|
*pCmd5 = cmdPipeControl;
|
2018-10-02 21:09:06 +08:00
|
|
|
}
|
|
|
|
|
2019-05-13 20:15:03 +08:00
|
|
|
template <typename GfxFamily>
|
|
|
|
void GpgpuWalkerHelper<GfxFamily>::dispatchPerfCountersCommandsStart(
|
|
|
|
CommandQueue &commandQueue,
|
|
|
|
TagNode<HwPerfCounter> &hwPerfCounter,
|
|
|
|
LinearStream *commandStream) {
|
|
|
|
|
2019-11-13 21:48:44 +08:00
|
|
|
const auto pPerformanceCounters = commandQueue.getPerfCounters();
|
2020-02-21 22:25:04 +08:00
|
|
|
const auto commandBufferType = EngineHelpers::isCcs(commandQueue.getGpgpuEngine().osContext->getEngineType())
|
2019-11-13 21:48:44 +08:00
|
|
|
? MetricsLibraryApi::GpuCommandBufferType::Compute
|
|
|
|
: MetricsLibraryApi::GpuCommandBufferType::Render;
|
|
|
|
const uint32_t size = pPerformanceCounters->getGpuCommandsSize(commandBufferType, true);
|
2019-05-20 17:19:27 +08:00
|
|
|
void *pBuffer = commandStream->getSpace(size);
|
2019-05-13 20:15:03 +08:00
|
|
|
|
2019-11-13 21:48:44 +08:00
|
|
|
pPerformanceCounters->getGpuCommands(commandBufferType, hwPerfCounter, true, size, pBuffer);
|
2019-05-13 20:15:03 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
template <typename GfxFamily>
|
|
|
|
void GpgpuWalkerHelper<GfxFamily>::dispatchPerfCountersCommandsEnd(
|
|
|
|
CommandQueue &commandQueue,
|
|
|
|
TagNode<HwPerfCounter> &hwPerfCounter,
|
|
|
|
LinearStream *commandStream) {
|
|
|
|
|
2019-11-13 21:48:44 +08:00
|
|
|
const auto pPerformanceCounters = commandQueue.getPerfCounters();
|
2020-02-21 22:25:04 +08:00
|
|
|
const auto commandBufferType = EngineHelpers::isCcs(commandQueue.getGpgpuEngine().osContext->getEngineType())
|
2019-11-13 21:48:44 +08:00
|
|
|
? MetricsLibraryApi::GpuCommandBufferType::Compute
|
|
|
|
: MetricsLibraryApi::GpuCommandBufferType::Render;
|
|
|
|
const uint32_t size = pPerformanceCounters->getGpuCommandsSize(commandBufferType, false);
|
2019-05-20 17:19:27 +08:00
|
|
|
void *pBuffer = commandStream->getSpace(size);
|
2019-05-13 20:15:03 +08:00
|
|
|
|
2019-11-13 21:48:44 +08:00
|
|
|
pPerformanceCounters->getGpuCommands(commandBufferType, hwPerfCounter, false, size, pBuffer);
|
2019-05-13 20:15:03 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
template <typename GfxFamily>
|
|
|
|
void GpgpuWalkerHelper<GfxFamily>::applyWADisableLSQCROPERFforOCL(NEO::LinearStream *pCommandStream, const Kernel &kernel, bool disablePerfMode) {
|
|
|
|
}
|
|
|
|
|
|
|
|
template <typename GfxFamily>
|
|
|
|
size_t GpgpuWalkerHelper<GfxFamily>::getSizeForWADisableLSQCROPERFforOCL(const Kernel *pKernel) {
|
|
|
|
return (size_t)0;
|
|
|
|
}
|
|
|
|
|
|
|
|
template <typename GfxFamily>
|
2019-07-03 15:30:30 +08:00
|
|
|
size_t EnqueueOperation<GfxFamily>::getTotalSizeRequiredCS(uint32_t eventType, const CsrDependencies &csrDeps, bool reserveProfilingCmdsSpace, bool reservePerfCounters, bool blitEnqueue, CommandQueue &commandQueue, const MultiDispatchInfo &multiDispatchInfo) {
|
2020-02-28 16:07:07 +08:00
|
|
|
size_t expectedSizeCS = 0;
|
2020-03-09 20:48:30 +08:00
|
|
|
auto &hwInfo = commandQueue.getDevice().getHardwareInfo();
|
|
|
|
auto &commandQueueHw = static_cast<CommandQueueHw<GfxFamily> &>(commandQueue);
|
2020-02-28 16:07:07 +08:00
|
|
|
|
2019-07-03 15:30:30 +08:00
|
|
|
if (blitEnqueue) {
|
2020-02-28 16:07:07 +08:00
|
|
|
size_t expectedSizeCS = TimestampPacketHelper::getRequiredCmdStreamSizeForNodeDependencyWithBlitEnqueue<GfxFamily>();
|
|
|
|
if (commandQueueHw.isCacheFlushForBcsRequired()) {
|
|
|
|
expectedSizeCS += MemorySynchronizationCommands<GfxFamily>::getSizeForPipeControlWithPostSyncOperation(hwInfo);
|
|
|
|
}
|
|
|
|
|
|
|
|
return expectedSizeCS;
|
2019-07-03 15:30:30 +08:00
|
|
|
}
|
2020-02-28 16:07:07 +08:00
|
|
|
|
2019-05-13 20:15:03 +08:00
|
|
|
Kernel *parentKernel = multiDispatchInfo.peekParentKernel();
|
|
|
|
for (auto &dispatchInfo : multiDispatchInfo) {
|
|
|
|
expectedSizeCS += EnqueueOperation<GfxFamily>::getSizeRequiredCS(eventType, reserveProfilingCmdsSpace, reservePerfCounters, commandQueue, dispatchInfo.getKernel());
|
2020-02-20 23:09:41 +08:00
|
|
|
size_t memObjAuxCount = multiDispatchInfo.getMemObjsForAuxTranslation() != nullptr ? multiDispatchInfo.getMemObjsForAuxTranslation()->size() : 0;
|
2020-03-09 20:48:30 +08:00
|
|
|
expectedSizeCS += dispatchInfo.dispatchInitCommands.estimateCommandsSize(memObjAuxCount, hwInfo, commandQueueHw.isCacheFlushForBcsRequired());
|
|
|
|
expectedSizeCS += dispatchInfo.dispatchEpilogueCommands.estimateCommandsSize(memObjAuxCount, hwInfo, commandQueueHw.isCacheFlushForBcsRequired());
|
2019-05-13 20:15:03 +08:00
|
|
|
}
|
|
|
|
if (parentKernel) {
|
2020-02-19 23:32:00 +08:00
|
|
|
SchedulerKernel &scheduler = commandQueue.getContext().getSchedulerKernel();
|
2019-05-13 20:15:03 +08:00
|
|
|
expectedSizeCS += EnqueueOperation<GfxFamily>::getSizeRequiredCS(eventType, reserveProfilingCmdsSpace, reservePerfCounters, commandQueue, &scheduler);
|
|
|
|
}
|
2019-07-15 20:28:09 +08:00
|
|
|
if (commandQueue.getGpgpuCommandStreamReceiver().peekTimestampPacketWriteEnabled()) {
|
2019-05-13 20:15:03 +08:00
|
|
|
expectedSizeCS += TimestampPacketHelper::getRequiredCmdStreamSize<GfxFamily>(csrDeps);
|
2019-07-03 15:30:30 +08:00
|
|
|
expectedSizeCS += EnqueueOperation<GfxFamily>::getSizeRequiredForTimestampPacketWrite();
|
2019-05-13 20:15:03 +08:00
|
|
|
}
|
2020-04-22 22:34:39 +08:00
|
|
|
|
2020-08-19 17:06:01 +08:00
|
|
|
if (multiDispatchInfo.peekMainKernel()) {
|
|
|
|
expectedSizeCS += EnqueueOperation<GfxFamily>::getSizeForCacheFlushAfterWalkerCommands(*multiDispatchInfo.peekMainKernel(), commandQueue);
|
|
|
|
}
|
|
|
|
|
2020-04-30 23:12:01 +08:00
|
|
|
if (DebugManager.flags.PauseOnEnqueue.get() != -1) {
|
|
|
|
expectedSizeCS += MemorySynchronizationCommands<GfxFamily>::getSizeForSinglePipeControl() * 2;
|
|
|
|
expectedSizeCS += sizeof(typename GfxFamily::MI_SEMAPHORE_WAIT) * 2;
|
2020-04-22 22:34:39 +08:00
|
|
|
}
|
2020-04-30 23:12:01 +08:00
|
|
|
|
2019-05-13 20:15:03 +08:00
|
|
|
return expectedSizeCS;
|
|
|
|
}
|
|
|
|
|
|
|
|
template <typename GfxFamily>
|
|
|
|
size_t EnqueueOperation<GfxFamily>::getSizeRequiredCS(uint32_t cmdType, bool reserveProfilingCmdsSpace, bool reservePerfCounters, CommandQueue &commandQueue, const Kernel *pKernel) {
|
|
|
|
if (isCommandWithoutKernel(cmdType)) {
|
|
|
|
return EnqueueOperation<GfxFamily>::getSizeRequiredCSNonKernel(reserveProfilingCmdsSpace, reservePerfCounters, commandQueue);
|
|
|
|
} else {
|
|
|
|
return EnqueueOperation<GfxFamily>::getSizeRequiredCSKernel(reserveProfilingCmdsSpace, reservePerfCounters, commandQueue, pKernel);
|
2018-10-02 21:09:06 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
template <typename GfxFamily>
|
2019-05-13 20:15:03 +08:00
|
|
|
size_t EnqueueOperation<GfxFamily>::getSizeRequiredCSNonKernel(bool reserveProfilingCmdsSpace, bool reservePerfCounters, CommandQueue &commandQueue) {
|
|
|
|
size_t size = 0;
|
|
|
|
if (reserveProfilingCmdsSpace) {
|
|
|
|
size += 2 * sizeof(PIPE_CONTROL) + 4 * sizeof(typename GfxFamily::MI_STORE_REGISTER_MEM);
|
2018-10-02 21:09:06 +08:00
|
|
|
}
|
2019-05-13 20:15:03 +08:00
|
|
|
return size;
|
2018-10-02 21:09:06 +08:00
|
|
|
}
|
|
|
|
|
2019-03-26 18:59:46 +08:00
|
|
|
} // namespace NEO
|