2020-01-17 08:56:05 +01:00
/*
2025-02-18 17:50:26 +00:00
* Copyright (C) 2020-2025 Intel Corporation
2020-01-17 08:56:05 +01:00
*
* SPDX-License-Identifier: MIT
*
*/
2020-02-23 22:44:01 +01:00
#include "shared/source/command_container/command_encoder.h"
2024-03-01 12:33:10 +00:00
#include "shared/source/command_container/implicit_scaling.h"
2020-02-23 22:44:01 +01:00
#include "shared/source/command_stream/linear_stream.h"
2022-09-07 14:27:09 +00:00
#include "shared/source/debugger/debugger_l0.h"
2020-02-23 22:44:01 +01:00
#include "shared/source/device/device.h"
#include "shared/source/execution_environment/execution_environment.h"
2022-12-15 17:32:03 +00:00
#include "shared/source/execution_environment/root_device_environment.h"
2020-06-23 12:03:43 +02:00
#include "shared/source/gmm_helper/gmm_helper.h"
2020-11-26 09:04:26 +00:00
#include "shared/source/helpers/api_specific_config.h"
#include "shared/source/helpers/bindless_heaps_helper.h"
2023-02-28 21:08:09 +00:00
#include "shared/source/helpers/blit_commands_helper.h"
2025-02-27 02:09:12 +00:00
#include "shared/source/helpers/compiler_product_helper.h"
2023-03-06 12:42:09 +00:00
#include "shared/source/helpers/definitions/command_encoder_args.h"
2023-02-01 16:23:01 +00:00
#include "shared/source/helpers/gfx_core_helper.h"
2024-03-01 12:33:10 +00:00
#include "shared/source/helpers/hw_info.h"
2020-10-07 14:22:26 +02:00
#include "shared/source/helpers/local_id_gen.h"
2024-11-12 15:23:07 +00:00
#include "shared/source/helpers/pipeline_select_args.h"
2020-02-23 22:44:01 +01:00
#include "shared/source/helpers/preamble.h"
#include "shared/source/helpers/register_offsets.h"
#include "shared/source/helpers/simd_helper.h"
#include "shared/source/helpers/string.h"
2021-01-29 10:17:10 +00:00
#include "shared/source/image/image_surface_state.h"
2022-01-26 10:59:30 +00:00
#include "shared/source/indirect_heap/indirect_heap.h"
2020-02-23 22:44:01 +01:00
#include "shared/source/kernel/dispatch_kernel_encoder_interface.h"
2023-12-15 14:36:11 +00:00
#include "shared/source/kernel/implicit_args_helper.h"
2020-10-07 14:22:26 +02:00
#include "shared/source/kernel/kernel_descriptor.h"
2023-03-10 12:28:11 +00:00
#include "shared/source/os_interface/product_helper.h"
2022-09-26 22:28:10 +00:00
#include "shared/source/program/kernel_info.h"
2020-01-17 08:56:05 +01:00
2021-10-21 01:30:53 +00:00
#include "encode_surface_state.inl"
2022-12-29 12:27:52 +00:00
#include "encode_surface_state_args.h"
2021-10-21 01:30:53 +00:00
2020-01-17 08:56:05 +01:00
#include <algorithm>
namespace NEO {
2025-03-18 00:28:35 +00:00
template <typename Family>
void EncodeStates<Family>::dshAlign(IndirectHeap *dsh) {
dsh->align(InterfaceDescriptorTraits<INTERFACE_DESCRIPTOR_DATA>::samplerStatePointerAlignSize);
}
2020-01-17 08:56:05 +01:00
template <typename Family>
uint32_t EncodeStates<Family>::copySamplerState(IndirectHeap *dsh,
uint32_t samplerStateOffset,
uint32_t samplerCount,
uint32_t borderColorOffset,
2020-11-26 09:04:26 +00:00
const void *fnDynamicStateHeap,
2021-08-25 10:28:05 +00:00
BindlessHeapsHelper *bindlessHeapHelper,
2022-11-10 00:05:51 +00:00
const RootDeviceEnvironment &rootDeviceEnvironment) {
2020-01-17 08:56:05 +01:00
auto sizeSamplerState = sizeof(SAMPLER_STATE) * samplerCount;
2021-05-17 10:08:54 +00:00
auto borderColorSize = samplerStateOffset - borderColorOffset;
2020-01-17 08:56:05 +01:00
2020-11-26 09:04:26 +00:00
SAMPLER_STATE *dstSamplerState = nullptr;
uint32_t samplerStateOffsetInDsh = 0;
2020-01-17 08:56:05 +01:00
2024-07-24 15:50:00 +00:00
dsh->align(NEO::EncodeDispatchKernel<Family>::getDefaultDshAlignment());
2020-11-26 09:04:26 +00:00
uint32_t borderColorOffsetInDsh = 0;
2025-02-27 02:09:12 +00:00
auto borderColor = reinterpret_cast<const SAMPLER_BORDER_COLOR_STATE *>(ptrOffset(fnDynamicStateHeap, borderColorOffset));
auto &compilerProductHelper = rootDeviceEnvironment.getHelper<CompilerProductHelper>();
2025-04-01 23:42:36 +00:00
auto &hwInfo = *rootDeviceEnvironment.getHardwareInfo();
bool heaplessEnabled = compilerProductHelper.isHeaplessModeEnabled(hwInfo);
2025-02-27 02:09:12 +00:00
2023-06-26 17:49:16 +00:00
if (!bindlessHeapHelper || (!bindlessHeapHelper->isGlobalDshSupported())) {
2021-05-17 10:08:54 +00:00
borderColorOffsetInDsh = static_cast<uint32_t>(dsh->getUsed());
2024-04-24 14:08:57 +00:00
// add offset of graphics allocation base address relative to heap base address
if (bindlessHeapHelper) {
borderColorOffsetInDsh += static_cast<uint32_t>(ptrDiff(dsh->getGpuBase(), bindlessHeapHelper->getGlobalHeapsBase()));
}
2025-02-27 02:09:12 +00:00
auto borderColorDst = dsh->getSpace(borderColorSize);
memcpy_s(borderColorDst, borderColorSize, borderColor, borderColorSize);
2021-05-17 10:08:54 +00:00
2025-03-18 00:28:35 +00:00
dsh->align(InterfaceDescriptorTraits<INTERFACE_DESCRIPTOR_DATA>::samplerStatePointerAlignSize);
2020-11-26 09:04:26 +00:00
samplerStateOffsetInDsh = static_cast<uint32_t>(dsh->getUsed());
2021-05-17 10:08:54 +00:00
2020-11-26 09:04:26 +00:00
dstSamplerState = reinterpret_cast<SAMPLER_STATE *>(dsh->getSpace(sizeSamplerState));
} else {
2021-05-17 10:08:54 +00:00
if (borderColor->getBorderColorRed() != 0.0f ||
borderColor->getBorderColorGreen() != 0.0f ||
borderColor->getBorderColorBlue() != 0.0f ||
(borderColor->getBorderColorAlpha() != 0.0f && borderColor->getBorderColorAlpha() != 1.0f)) {
UNRECOVERABLE_IF(true);
} else if (borderColor->getBorderColorAlpha() == 0.0f) {
borderColorOffsetInDsh = bindlessHeapHelper->getDefaultBorderColorOffset();
} else {
borderColorOffsetInDsh = bindlessHeapHelper->getAlphaBorderColorOffset();
2020-11-26 09:04:26 +00:00
}
2025-03-18 00:28:35 +00:00
dshAlign(dsh);
2023-12-13 16:09:52 +00:00
auto samplerStateInDsh = bindlessHeapHelper->allocateSSInHeap(sizeSamplerState, nullptr, BindlessHeapsHelper::BindlesHeapType::globalDsh);
2020-11-26 09:04:26 +00:00
dstSamplerState = reinterpret_cast<SAMPLER_STATE *>(samplerStateInDsh.ssPtr);
samplerStateOffsetInDsh = static_cast<uint32_t>(samplerStateInDsh.surfaceStateOffset);
}
2020-01-17 08:56:05 +01:00
2022-11-10 00:05:51 +00:00
auto &helper = rootDeviceEnvironment.getHelper<ProductHelper>();
2020-04-27 18:55:26 +02:00
auto srcSamplerState = reinterpret_cast<const SAMPLER_STATE *>(ptrOffset(fnDynamicStateHeap, samplerStateOffset));
SAMPLER_STATE state = {};
2020-01-17 08:56:05 +01:00
for (uint32_t i = 0; i < samplerCount; i++) {
2020-04-27 18:55:26 +02:00
state = srcSamplerState[i];
2025-02-27 02:09:12 +00:00
if (heaplessEnabled) {
EncodeStates<Family>::adjustSamplerStateBorderColor(state, *borderColor);
} else {
state.setIndirectStatePointer(static_cast<uint32_t>(borderColorOffsetInDsh));
}
2022-11-10 00:05:51 +00:00
helper.adjustSamplerState(&state, hwInfo);
2020-04-27 18:55:26 +02:00
dstSamplerState[i] = state;
2020-01-17 08:56:05 +01:00
}
return samplerStateOffsetInDsh;
2025-02-27 02:09:12 +00:00
}
2020-10-07 14:22:26 +02:00
2020-01-17 08:56:05 +01:00
template <typename Family>
2025-04-03 16:02:01 +00:00
void EncodeMathMMIO<Family>::encodeMulRegVal(CommandContainer &container, uint32_t offset, uint32_t val, uint64_t dstAddress, bool isBcs, EncodeStoreMMIOParams *outStoreMMIOParams) {
2020-01-17 08:56:05 +01:00
int logLws = 0;
int i = val;
while (val >> logLws) {
logLws++;
}
2024-09-04 21:24:17 +00:00
EncodeSetMMIO<Family>::encodeREG(container, RegisterOffsets::csGprR0, offset, isBcs);
2024-04-19 14:20:27 +00:00
EncodeSetMMIO<Family>::encodeIMM(container, RegisterOffsets::csGprR1, 0, true, isBcs);
2020-01-17 08:56:05 +01:00
i = 0;
while (i < logLws) {
if (val & (1 << i)) {
2023-12-19 07:40:17 +00:00
EncodeMath<Family>::addition(container, AluRegisters::gpr1,
AluRegisters::gpr0, AluRegisters::gpr2);
2024-09-04 21:24:17 +00:00
EncodeSetMMIO<Family>::encodeREG(container, RegisterOffsets::csGprR1, RegisterOffsets::csGprR2, isBcs);
2020-01-17 08:56:05 +01:00
}
2023-12-19 07:40:17 +00:00
EncodeMath<Family>::addition(container, AluRegisters::gpr0,
AluRegisters::gpr0, AluRegisters::gpr2);
2024-09-04 21:24:17 +00:00
EncodeSetMMIO<Family>::encodeREG(container, RegisterOffsets::csGprR0, RegisterOffsets::csGprR2, isBcs);
2020-01-17 08:56:05 +01:00
i++;
}
2025-04-03 16:02:01 +00:00
void **outStoreMMIOCmd = nullptr;
if (outStoreMMIOParams) {
outStoreMMIOParams->address = dstAddress;
outStoreMMIOParams->offset = RegisterOffsets::csGprR1;
outStoreMMIOParams->workloadPartition = false;
outStoreMMIOParams->isBcs = isBcs;
outStoreMMIOCmd = &outStoreMMIOParams->command;
}
EncodeStoreMMIO<Family>::encode(*container.getCommandStream(), RegisterOffsets::csGprR1, dstAddress, false, outStoreMMIOCmd, isBcs);
2020-01-17 08:56:05 +01:00
}
2020-02-05 13:38:11 -08:00
/*
* Compute *firstOperand > secondOperand and store the result in
* MI_PREDICATE_RESULT where firstOperand is an device memory address.
*
* To calculate the "greater than" operation in the device,
* (secondOperand - *firstOperand) is used, and if the carry flag register is
* set, then (*firstOperand) is greater than secondOperand.
*/
2020-01-17 08:56:05 +01:00
template <typename Family>
2024-04-19 14:20:27 +00:00
void EncodeMathMMIO<Family>::encodeGreaterThanPredicate(CommandContainer &container, uint64_t firstOperand, uint32_t secondOperand, bool isBcs) {
2024-09-04 21:24:17 +00:00
EncodeSetMMIO<Family>::encodeMEM(container, RegisterOffsets::csGprR0, firstOperand, isBcs);
2024-04-19 14:20:27 +00:00
EncodeSetMMIO<Family>::encodeIMM(container, RegisterOffsets::csGprR1, secondOperand, true, isBcs);
2020-01-17 08:56:05 +01:00
2023-12-19 07:40:17 +00:00
/* RegisterOffsets::csGprR* registers map to AluRegisters::gpr* registers */
EncodeMath<Family>::greaterThan(container, AluRegisters::gpr0,
AluRegisters::gpr1, AluRegisters::gpr2);
2020-01-17 08:56:05 +01:00
2024-09-04 21:24:17 +00:00
EncodeSetMMIO<Family>::encodeREG(container, RegisterOffsets::csPredicateResult, RegisterOffsets::csGprR2, isBcs);
2020-01-17 08:56:05 +01:00
}
2020-12-04 08:15:15 -08:00
/*
* Compute bitwise AND between a register value from regOffset and immVal
* and store it into dstAddress.
*/
template <typename Family>
2022-04-06 12:35:32 +00:00
void EncodeMathMMIO<Family>::encodeBitwiseAndVal(CommandContainer &container, uint32_t regOffset, uint32_t immVal, uint64_t dstAddress,
2024-04-19 14:20:27 +00:00
bool workloadPartition, void **outCmdBuffer, bool isBcs) {
2024-09-04 21:24:17 +00:00
EncodeSetMMIO<Family>::encodeREG(container, RegisterOffsets::csGprR13, regOffset, isBcs);
2024-04-19 14:20:27 +00:00
EncodeSetMMIO<Family>::encodeIMM(container, RegisterOffsets::csGprR14, immVal, true, isBcs);
2023-12-19 07:40:17 +00:00
EncodeMath<Family>::bitwiseAnd(container, AluRegisters::gpr13,
AluRegisters::gpr14,
2024-04-08 23:38:43 +00:00
AluRegisters::gpr12);
2020-12-04 08:15:15 -08:00
EncodeStoreMMIO<Family>::encode(*container.getCommandStream(),
2024-09-04 21:24:17 +00:00
RegisterOffsets::csGprR12, dstAddress, workloadPartition, outCmdBuffer, isBcs);
2020-12-04 08:15:15 -08:00
}
2020-02-03 17:15:04 -08:00
/*
* encodeAlu() performs operations that leave a state including the result of
* an operation such as the carry flag, and the accu flag with subtraction and
* addition result.
*
* Parameter "postOperationStateRegister" is the ALU register with the result
* from the operation that the function caller is interested in obtaining.
*
* Parameter "finalResultRegister" is the final destination register where
* data from "postOperationStateRegister" will be copied.
*/
2020-01-17 08:56:05 +01:00
template <typename Family>
2020-02-25 10:23:04 -08:00
void EncodeMathMMIO<Family>::encodeAlu(MI_MATH_ALU_INST_INLINE *pAluParam, AluRegisters srcA, AluRegisters srcB, AluRegisters op, AluRegisters finalResultRegister, AluRegisters postOperationStateRegister) {
2020-04-27 18:55:26 +02:00
MI_MATH_ALU_INST_INLINE aluParam;
aluParam.DW0.Value = 0x0;
2023-12-19 07:40:17 +00:00
aluParam.DW0.BitField.ALUOpcode = static_cast<uint32_t>(AluRegisters::opcodeLoad);
aluParam.DW0.BitField.Operand1 = static_cast<uint32_t>(AluRegisters::srca);
2020-04-27 18:55:26 +02:00
aluParam.DW0.BitField.Operand2 = static_cast<uint32_t>(srcA);
*pAluParam = aluParam;
2020-01-17 08:56:05 +01:00
pAluParam++;
2020-04-27 18:55:26 +02:00
aluParam.DW0.Value = 0x0;
2023-12-19 07:40:17 +00:00
aluParam.DW0.BitField.ALUOpcode = static_cast<uint32_t>(AluRegisters::opcodeLoad);
aluParam.DW0.BitField.Operand1 = static_cast<uint32_t>(AluRegisters::srcb);
2020-04-27 18:55:26 +02:00
aluParam.DW0.BitField.Operand2 = static_cast<uint32_t>(srcB);
*pAluParam = aluParam;
2020-01-17 08:56:05 +01:00
pAluParam++;
2020-02-05 13:38:11 -08:00
/* Order of operation: Operand1 <ALUOpcode> Operand2 */
2020-04-27 18:55:26 +02:00
aluParam.DW0.Value = 0x0;
aluParam.DW0.BitField.ALUOpcode = static_cast<uint32_t>(op);
aluParam.DW0.BitField.Operand1 = 0;
aluParam.DW0.BitField.Operand2 = 0;
*pAluParam = aluParam;
2020-01-17 08:56:05 +01:00
pAluParam++;
2020-04-27 18:55:26 +02:00
aluParam.DW0.Value = 0x0;
2023-12-19 07:40:17 +00:00
aluParam.DW0.BitField.ALUOpcode = static_cast<uint32_t>(AluRegisters::opcodeStore);
2020-04-27 18:55:26 +02:00
aluParam.DW0.BitField.Operand1 = static_cast<uint32_t>(finalResultRegister);
aluParam.DW0.BitField.Operand2 = static_cast<uint32_t>(postOperationStateRegister);
*pAluParam = aluParam;
2020-01-17 08:56:05 +01:00
pAluParam++;
}
template <typename Family>
2020-02-25 10:23:04 -08:00
uint32_t *EncodeMath<Family>::commandReserve(CommandContainer &container) {
2022-03-23 08:57:31 +00:00
return commandReserve(*container.getCommandStream());
}
template <typename Family>
uint32_t *EncodeMath<Family>::commandReserve(LinearStream &cmdStream) {
2023-12-04 13:53:09 +00:00
size_t size = sizeof(MI_MATH) + sizeof(MI_MATH_ALU_INST_INLINE) * RegisterConstants::numAluInstForReadModifyWrite;
2020-02-25 10:23:04 -08:00
2022-03-23 08:57:31 +00:00
auto cmd = reinterpret_cast<uint32_t *>(cmdStream.getSpace(size));
2020-04-27 18:55:26 +02:00
MI_MATH mathBuffer;
mathBuffer.DW0.Value = 0x0;
mathBuffer.DW0.BitField.InstructionType = MI_MATH::COMMAND_TYPE_MI_COMMAND;
mathBuffer.DW0.BitField.InstructionOpcode = MI_MATH::MI_COMMAND_OPCODE_MI_MATH;
2023-12-04 13:53:09 +00:00
mathBuffer.DW0.BitField.DwordLength = RegisterConstants::numAluInstForReadModifyWrite - 1;
2020-04-27 18:55:26 +02:00
*reinterpret_cast<MI_MATH *>(cmd) = mathBuffer;
2020-02-25 10:23:04 -08:00
cmd++;
return cmd;
}
2020-04-27 18:55:26 +02:00
template <typename Family>
void EncodeMathMMIO<Family>::encodeAluAdd(MI_MATH_ALU_INST_INLINE *pAluParam,
AluRegisters firstOperandRegister,
AluRegisters secondOperandRegister,
AluRegisters finalResultRegister) {
2023-12-19 07:40:17 +00:00
encodeAlu(pAluParam, firstOperandRegister, secondOperandRegister, AluRegisters::opcodeAdd, finalResultRegister, AluRegisters::accu);
2020-04-27 18:55:26 +02:00
}
template <typename Family>
void EncodeMathMMIO<Family>::encodeAluSubStoreCarry(MI_MATH_ALU_INST_INLINE *pAluParam, AluRegisters regA, AluRegisters regB, AluRegisters finalResultRegister) {
/* regB is subtracted from regA */
2023-12-19 07:40:17 +00:00
encodeAlu(pAluParam, regA, regB, AluRegisters::opcodeSub, finalResultRegister, AluRegisters::cf);
2020-04-27 18:55:26 +02:00
}
2020-11-24 19:15:37 -08:00
template <typename Family>
void EncodeMathMMIO<Family>::encodeAluAnd(MI_MATH_ALU_INST_INLINE *pAluParam,
AluRegisters firstOperandRegister,
AluRegisters secondOperandRegister,
AluRegisters finalResultRegister) {
2023-12-19 07:40:17 +00:00
encodeAlu(pAluParam, firstOperandRegister, secondOperandRegister, AluRegisters::opcodeAnd, finalResultRegister, AluRegisters::accu);
2020-11-24 19:15:37 -08:00
}
2022-11-10 09:58:55 +00:00
template <typename Family>
2024-04-19 14:20:27 +00:00
void EncodeMathMMIO<Family>::encodeIncrementOrDecrement(LinearStream &cmdStream, AluRegisters operandRegister, IncrementOrDecrementOperation operationType, bool isBcs) {
LriHelper<Family>::program(&cmdStream, RegisterOffsets::csGprR7, 1, true, isBcs);
LriHelper<Family>::program(&cmdStream, RegisterOffsets::csGprR7 + 4, 0, true, isBcs);
2022-11-10 09:58:55 +00:00
EncodeAluHelper<Family, 4> aluHelper;
2023-12-19 07:40:17 +00:00
aluHelper.setNextAlu(AluRegisters::opcodeLoad, AluRegisters::srca, operandRegister);
aluHelper.setNextAlu(AluRegisters::opcodeLoad, AluRegisters::srcb, AluRegisters::gpr7);
aluHelper.setNextAlu((operationType == IncrementOrDecrementOperation::increment) ? AluRegisters::opcodeAdd
: AluRegisters::opcodeSub);
aluHelper.setNextAlu(AluRegisters::opcodeStore, operandRegister, AluRegisters::accu);
2022-11-10 09:58:55 +00:00
aluHelper.copyToCmdStream(cmdStream);
}
template <typename Family>
2024-04-19 14:20:27 +00:00
void EncodeMathMMIO<Family>::encodeIncrement(LinearStream &cmdStream, AluRegisters operandRegister, bool isBcs) {
encodeIncrementOrDecrement(cmdStream, operandRegister, IncrementOrDecrementOperation::increment, isBcs);
2022-11-10 09:58:55 +00:00
}
template <typename Family>
2024-04-19 14:20:27 +00:00
void EncodeMathMMIO<Family>::encodeDecrement(LinearStream &cmdStream, AluRegisters operandRegister, bool isBcs) {
encodeIncrementOrDecrement(cmdStream, operandRegister, IncrementOrDecrementOperation::decrement, isBcs);
2022-11-10 09:58:55 +00:00
}
2020-02-25 10:23:04 -08:00
/*
* greaterThan() tests if firstOperandRegister is greater than
* secondOperandRegister.
*/
template <typename Family>
void EncodeMath<Family>::greaterThan(CommandContainer &container,
AluRegisters firstOperandRegister,
AluRegisters secondOperandRegister,
AluRegisters finalResultRegister) {
uint32_t *cmd = EncodeMath<Family>::commandReserve(container);
/* firstOperandRegister will be subtracted from secondOperandRegister */
2020-04-27 18:55:26 +02:00
EncodeMathMMIO<Family>::encodeAluSubStoreCarry(reinterpret_cast<MI_MATH_ALU_INST_INLINE *>(cmd),
2020-02-25 10:23:04 -08:00
secondOperandRegister,
firstOperandRegister,
finalResultRegister);
}
2020-02-25 10:23:04 -08:00
template <typename Family>
void EncodeMath<Family>::addition(CommandContainer &container,
2020-02-25 10:23:04 -08:00
AluRegisters firstOperandRegister,
AluRegisters secondOperandRegister,
AluRegisters finalResultRegister) {
2020-02-25 10:23:04 -08:00
uint32_t *cmd = EncodeMath<Family>::commandReserve(container);
2022-03-23 08:57:31 +00:00
EncodeMathMMIO<Family>::encodeAluAdd(reinterpret_cast<MI_MATH_ALU_INST_INLINE *>(cmd),
firstOperandRegister,
secondOperandRegister,
finalResultRegister);
}
template <typename Family>
void EncodeMath<Family>::addition(LinearStream &cmdStream,
AluRegisters firstOperandRegister,
AluRegisters secondOperandRegister,
AluRegisters finalResultRegister) {
uint32_t *cmd = EncodeMath<Family>::commandReserve(cmdStream);
2020-04-27 18:55:26 +02:00
EncodeMathMMIO<Family>::encodeAluAdd(reinterpret_cast<MI_MATH_ALU_INST_INLINE *>(cmd),
firstOperandRegister,
2020-12-04 08:15:15 -08:00
secondOperandRegister,
finalResultRegister);
}
template <typename Family>
void EncodeMath<Family>::bitwiseAnd(CommandContainer &container,
AluRegisters firstOperandRegister,
AluRegisters secondOperandRegister,
AluRegisters finalResultRegister) {
uint32_t *cmd = EncodeMath<Family>::commandReserve(container);
EncodeMathMMIO<Family>::encodeAluAnd(reinterpret_cast<MI_MATH_ALU_INST_INLINE *>(cmd),
firstOperandRegister,
2020-02-25 10:23:04 -08:00
secondOperandRegister,
finalResultRegister);
2020-01-17 08:56:05 +01:00
}
template <typename Family>
2024-04-19 14:20:27 +00:00
inline void EncodeSetMMIO<Family>::encodeIMM(CommandContainer &container, uint32_t offset, uint32_t data, bool remap, bool isBcs) {
EncodeSetMMIO<Family>::encodeIMM(*container.getCommandStream(), offset, data, remap, isBcs);
2021-09-03 11:42:31 +00:00
}
template <typename Family>
2024-09-04 21:24:17 +00:00
inline void EncodeSetMMIO<Family>::encodeMEM(CommandContainer &container, uint32_t offset, uint64_t address, bool isBcs) {
EncodeSetMMIO<Family>::encodeMEM(*container.getCommandStream(), offset, address, isBcs);
2021-09-03 11:42:31 +00:00
}
template <typename Family>
2024-09-04 21:24:17 +00:00
inline void EncodeSetMMIO<Family>::encodeREG(CommandContainer &container, uint32_t dstOffset, uint32_t srcOffset, bool isBcs) {
EncodeSetMMIO<Family>::encodeREG(*container.getCommandStream(), dstOffset, srcOffset, isBcs);
2021-09-03 11:42:31 +00:00
}
template <typename Family>
2024-04-19 14:20:27 +00:00
inline void EncodeSetMMIO<Family>::encodeIMM(LinearStream &cmdStream, uint32_t offset, uint32_t data, bool remap, bool isBcs) {
2021-09-03 11:42:31 +00:00
LriHelper<Family>::program(&cmdStream,
2020-10-06 10:58:18 +02:00
offset,
data,
2024-04-19 14:20:27 +00:00
remap,
isBcs);
2020-01-17 08:56:05 +01:00
}
2022-09-07 14:27:09 +00:00
template <typename Family>
inline void EncodeStateBaseAddress<Family>::setSbaTrackingForL0DebuggerIfEnabled(bool trackingEnabled,
Device &device,
LinearStream &commandStream,
2022-11-22 20:07:45 +00:00
STATE_BASE_ADDRESS &sbaCmd, bool useFirstLevelBB) {
2022-09-07 14:27:09 +00:00
if (!trackingEnabled) {
return;
}
NEO::Debugger::SbaAddresses sbaAddresses = {};
NEO::EncodeStateBaseAddress<Family>::setSbaAddressesForDebugger(sbaAddresses, sbaCmd);
2022-11-22 20:07:45 +00:00
device.getL0Debugger()->captureStateBaseAddress(commandStream, sbaAddresses, useFirstLevelBB);
2022-09-07 14:27:09 +00:00
}
2020-01-17 08:56:05 +01:00
template <typename Family>
2024-09-04 21:24:17 +00:00
void EncodeSetMMIO<Family>::encodeMEM(LinearStream &cmdStream, uint32_t offset, uint64_t address, bool isBcs) {
2020-01-17 08:56:05 +01:00
MI_LOAD_REGISTER_MEM cmd = Family::cmdInitLoadRegisterMem;
cmd.setRegisterAddress(offset);
cmd.setMemoryAddress(address);
2020-10-20 15:14:56 +02:00
remapOffset(&cmd);
2024-09-09 17:09:52 +00:00
if (isBcs) {
cmd.setRegisterAddress(offset + RegisterOffsets::bcs0Base);
}
2020-10-20 15:14:56 +02:00
2021-09-03 11:42:31 +00:00
auto buffer = cmdStream.getSpaceForCmd<MI_LOAD_REGISTER_MEM>();
2020-04-08 18:33:03 +02:00
*buffer = cmd;
2020-01-17 08:56:05 +01:00
}
template <typename Family>
2024-09-04 21:24:17 +00:00
void EncodeSetMMIO<Family>::encodeREG(LinearStream &cmdStream, uint32_t dstOffset, uint32_t srcOffset, bool isBcs) {
2020-01-17 08:56:05 +01:00
MI_LOAD_REGISTER_REG cmd = Family::cmdInitLoadRegisterReg;
2024-08-15 04:39:34 +02:00
cmd.setSourceRegisterAddress(srcOffset);
cmd.setDestinationRegisterAddress(dstOffset);
2020-10-20 15:14:56 +02:00
remapOffset(&cmd);
2024-09-09 17:09:52 +00:00
if (isBcs) {
cmd.setSourceRegisterAddress(srcOffset + RegisterOffsets::bcs0Base);
cmd.setDestinationRegisterAddress(dstOffset + RegisterOffsets::bcs0Base);
}
2021-09-03 11:42:31 +00:00
auto buffer = cmdStream.getSpaceForCmd<MI_LOAD_REGISTER_REG>();
2020-04-08 18:33:03 +02:00
*buffer = cmd;
2020-01-17 08:56:05 +01:00
}
template <typename Family>
2024-09-04 21:24:17 +00:00
void EncodeStoreMMIO<Family>::encode(LinearStream &csr, uint32_t offset, uint64_t address, bool workloadPartition, void **outCmdBuffer, bool isBcs) {
2022-04-06 12:35:32 +00:00
auto buffer = csr.getSpaceForCmd<MI_STORE_REGISTER_MEM>();
2024-02-26 17:44:07 +00:00
if (outCmdBuffer != nullptr) {
*outCmdBuffer = buffer;
}
2024-09-04 21:24:17 +00:00
EncodeStoreMMIO<Family>::encode(buffer, offset, address, workloadPartition, isBcs);
2022-04-06 12:35:32 +00:00
}
template <typename Family>
2024-09-04 21:24:17 +00:00
inline void EncodeStoreMMIO<Family>::encode(MI_STORE_REGISTER_MEM *cmdBuffer, uint32_t offset, uint64_t address, bool workloadPartition, bool isBcs) {
2020-01-17 08:56:05 +01:00
MI_STORE_REGISTER_MEM cmd = Family::cmdInitStoreRegisterMem;
cmd.setRegisterAddress(offset);
cmd.setMemoryAddress(address);
2022-04-06 12:35:32 +00:00
appendFlags(&cmd, workloadPartition);
2024-09-09 17:09:52 +00:00
if (isBcs) {
cmd.setRegisterAddress(offset + RegisterOffsets::bcs0Base);
}
2022-04-06 12:35:32 +00:00
*cmdBuffer = cmd;
2020-01-17 08:56:05 +01:00
}
template <typename Family>
2021-10-21 01:30:53 +00:00
void EncodeSurfaceState<Family>::encodeBuffer(EncodeSurfaceStateArgs &args) {
auto surfaceState = reinterpret_cast<R_SURFACE_STATE *>(args.outMemory);
2024-04-25 06:14:39 +00:00
uint64_t bufferSize = alignUp(args.size, getSurfaceBaseAddressAlignment());
bufferSize = std::min(bufferSize, static_cast<uint64_t>(MemoryConstants::fullStatefulRegion - 1));
2023-11-23 17:09:04 +00:00
SurfaceStateBufferLength length = {0};
2023-04-28 09:38:31 +00:00
length.length = static_cast<uint32_t>(bufferSize - 1);
2020-01-17 08:56:05 +01:00
2023-04-28 09:38:31 +00:00
surfaceState->setWidth(length.surfaceState.width + 1);
surfaceState->setHeight(length.surfaceState.height + 1);
surfaceState->setDepth(length.surfaceState.depth + 1);
2020-09-11 15:44:09 +02:00
2021-10-21 01:30:53 +00:00
surfaceState->setSurfaceType((args.graphicsAddress != 0) ? R_SURFACE_STATE::SURFACE_TYPE_SURFTYPE_BUFFER
: R_SURFACE_STATE::SURFACE_TYPE_SURFTYPE_NULL);
2020-09-15 20:27:18 +02:00
surfaceState->setSurfaceFormat(SURFACE_FORMAT::SURFACE_FORMAT_RAW);
surfaceState->setSurfaceVerticalAlignment(R_SURFACE_STATE::SURFACE_VERTICAL_ALIGNMENT_VALIGN_4);
2022-01-19 12:50:29 +00:00
surfaceState->setSurfaceHorizontalAlignment(R_SURFACE_STATE::SURFACE_HORIZONTAL_ALIGNMENT_HALIGN_DEFAULT);
2020-06-23 12:03:43 +02:00
2020-09-15 20:27:18 +02:00
surfaceState->setTileMode(R_SURFACE_STATE::TILE_MODE_LINEAR);
surfaceState->setVerticalLineStride(0);
surfaceState->setVerticalLineStrideOffset(0);
2021-10-21 01:30:53 +00:00
surfaceState->setMemoryObjectControlState(args.mocs);
surfaceState->setSurfaceBaseAddress(args.graphicsAddress);
2020-09-15 20:27:18 +02:00
surfaceState->setAuxiliarySurfaceMode(AUXILIARY_SURFACE_MODE::AUXILIARY_SURFACE_MODE_AUX_NONE);
2020-06-23 12:03:43 +02:00
2021-10-21 01:30:53 +00:00
setCoherencyType(surfaceState, args.cpuCoherent ? R_SURFACE_STATE::COHERENCY_TYPE_IA_COHERENT : R_SURFACE_STATE::COHERENCY_TYPE_GPU_COHERENT);
2021-04-13 00:36:24 +00:00
2021-12-01 18:11:27 +00:00
auto compressionEnabled = args.allocation ? args.allocation->isCompressionEnabled() : false;
if (compressionEnabled && !args.forceNonAuxMode) {
2020-06-23 12:03:43 +02:00
// Its expected to not program pitch/qpitch/baseAddress for Aux surface in CCS scenarios
2021-04-13 00:36:24 +00:00
setCoherencyType(surfaceState, R_SURFACE_STATE::COHERENCY_TYPE_GPU_COHERENT);
2021-01-29 15:40:17 +00:00
setBufferAuxParamsForCCS(surfaceState);
2020-06-23 12:03:43 +02:00
}
2020-01-17 08:56:05 +01:00
2023-11-30 08:32:25 +00:00
if (debugManager.flags.DisableCachingForStatefulBufferAccess.get()) {
2025-04-14 12:15:51 +00:00
surfaceState->setMemoryObjectControlState(args.gmmHelper->getUncachedMOCS());
2020-06-23 12:03:43 +02:00
}
2020-09-15 20:27:18 +02:00
2021-10-21 01:30:53 +00:00
EncodeSurfaceState<Family>::encodeExtraBufferParams(args);
EncodeSurfaceState<Family>::appendBufferSurfaceState(args);
2020-06-23 12:03:43 +02:00
}
2020-09-15 20:27:18 +02:00
2020-10-07 14:22:26 +02:00
template <typename Family>
void EncodeSurfaceState<Family>::getSshAlignedPointer(uintptr_t &ptr, size_t &offset) {
auto sshAlignmentMask =
getSurfaceBaseAddressAlignmentMask();
uintptr_t alignedPtr = ptr & sshAlignmentMask;
offset = 0;
if (ptr != alignedPtr) {
offset = ptrDiff(ptr, alignedPtr);
ptr = alignedPtr;
}
}
2025-09-19 11:54:48 +00:00
// Returned binding table pointer is relative to given heap (which is assumed to be the Surface state base address)
2020-10-07 14:22:26 +02:00
// as required by the INTERFACE_DESCRIPTOR_DATA.
template <typename Family>
2023-03-23 13:47:39 +00:00
size_t EncodeSurfaceState<Family>::pushBindingTableAndSurfaceStates(IndirectHeap &dstHeap,
2020-10-07 14:22:26 +02:00
const void *srcKernelSsh, size_t srcKernelSshSize,
size_t numberOfBindingTableStates, size_t offsetOfBindingTable) {
2025-03-18 00:28:35 +00:00
if constexpr (Family::isHeaplessRequired()) {
UNRECOVERABLE_IF(true);
return 0;
} else {
using BINDING_TABLE_STATE = typename Family::BINDING_TABLE_STATE;
size_t sshSize = srcKernelSshSize;
DEBUG_BREAK_IF(srcKernelSsh == nullptr);
auto srcSurfaceState = srcKernelSsh;
// Allocate space for new ssh data
auto dstSurfaceState = dstHeap.getSpace(sshSize);
// Compiler sends BTI table that is already populated with surface state pointers relative to local SSH.
// We may need to patch these pointers so that they are relative to surface state base address
if (dstSurfaceState == dstHeap.getCpuBase()) {
// nothing to patch, we're at the start of heap (which is assumed to be the surface state base address)
// we need to simply copy the ssh (including BTIs from compiler)
memcpy_s(dstSurfaceState, sshSize, srcSurfaceState, sshSize);
return offsetOfBindingTable;
}
2020-10-07 14:22:26 +02:00
2025-03-18 00:28:35 +00:00
// We can copy-over the surface states, but BTIs will need to be patched
memcpy_s(dstSurfaceState, sshSize, srcSurfaceState, offsetOfBindingTable);
uint32_t surfaceStatesOffset = static_cast<uint32_t>(ptrDiff(dstSurfaceState, dstHeap.getCpuBase()));
// march over BTIs and offset the pointers based on surface state base address
auto *dstBtiTableBase = reinterpret_cast<BINDING_TABLE_STATE *>(ptrOffset(dstSurfaceState, offsetOfBindingTable));
DEBUG_BREAK_IF(reinterpret_cast<uintptr_t>(dstBtiTableBase) % Family::INTERFACE_DESCRIPTOR_DATA::BINDINGTABLEPOINTER_ALIGN_SIZE != 0);
auto *srcBtiTableBase = reinterpret_cast<const BINDING_TABLE_STATE *>(ptrOffset(srcSurfaceState, offsetOfBindingTable));
BINDING_TABLE_STATE bti = Family::cmdInitBindingTableState;
for (uint32_t i = 0, e = static_cast<uint32_t>(numberOfBindingTableStates); i != e; ++i) {
uint32_t localSurfaceStateOffset = srcBtiTableBase[i].getSurfaceStatePointer();
2025-09-19 11:54:48 +00:00
uint32_t offsetSurfaceStateOffset = localSurfaceStateOffset + surfaceStatesOffset;
bti.setSurfaceStatePointer(offsetSurfaceStateOffset); // patch just the SurfaceStatePointer bits
2025-03-18 00:28:35 +00:00
dstBtiTableBase[i] = bti;
DEBUG_BREAK_IF(bti.getRawData(0) % sizeof(BINDING_TABLE_STATE::SURFACESTATEPOINTER_ALIGN_SIZE) != 0);
}
2020-10-07 14:22:26 +02:00
2025-03-18 00:28:35 +00:00
return ptrDiff(dstBtiTableBase, dstHeap.getCpuBase());
}
2020-10-07 14:22:26 +02:00
}
2021-06-23 15:26:56 +00:00
template <typename Family>
void EncodeSurfaceState<Family>::appendParamsForImageFromBuffer(R_SURFACE_STATE *surfaceState) {
}
2023-12-01 14:30:28 +00:00
template <typename Family>
inline void EncodeDispatchKernel<Family>::encodeCommon(CommandContainer &container, EncodeDispatchKernelArgs &args) {
2023-12-04 12:20:54 +00:00
using DefaultWalkerType = typename Family::DefaultWalkerType;
EncodeDispatchKernel<Family>::template encode<DefaultWalkerType>(container, args);
2023-12-01 14:30:28 +00:00
}
2020-01-17 08:56:05 +01:00
template <typename Family>
2023-02-02 18:57:24 +00:00
void *EncodeDispatchKernel<Family>::getInterfaceDescriptor(CommandContainer &container, IndirectHeap *childDsh, uint32_t &iddOffset) {
2023-03-16 10:09:21 +00:00
if (container.nextIddInBlockRef() == container.getNumIddPerBlock()) {
2023-06-05 16:53:29 +00:00
void *heapPointer = nullptr;
size_t heapSize = sizeof(INTERFACE_DESCRIPTOR_DATA) * container.getNumIddPerBlock();
if (childDsh != nullptr) {
2024-07-24 15:50:00 +00:00
childDsh->align(NEO::EncodeDispatchKernel<Family>::getDefaultDshAlignment());
2023-06-05 16:53:29 +00:00
heapPointer = childDsh->getSpace(heapSize);
2020-12-04 11:28:18 +00:00
} else {
2024-07-24 15:50:00 +00:00
container.getIndirectHeap(HeapType::dynamicState)->align(NEO::EncodeDispatchKernel<Family>::getDefaultDshAlignment());
2023-12-13 16:09:52 +00:00
heapPointer = container.getHeapSpaceAllowGrow(HeapType::dynamicState, heapSize);
2020-12-04 11:28:18 +00:00
}
2023-06-05 16:53:29 +00:00
container.setIddBlock(heapPointer);
2023-03-16 10:09:21 +00:00
container.nextIddInBlockRef() = 0;
2020-01-17 08:56:05 +01:00
}
2023-03-16 10:09:21 +00:00
iddOffset = container.nextIddInBlockRef();
2020-01-17 08:56:05 +01:00
auto interfaceDescriptorData = static_cast<INTERFACE_DESCRIPTOR_DATA *>(container.getIddBlock());
2023-03-16 10:09:21 +00:00
container.nextIddInBlockRef()++;
return &interfaceDescriptorData[iddOffset];
2020-01-17 08:56:05 +01:00
}
2020-05-11 18:20:21 +02:00
2020-07-06 22:55:37 +02:00
template <typename Family>
bool EncodeDispatchKernel<Family>::inlineDataProgrammingRequired(const KernelDescriptor &kernelDesc) {
auto checkKernelForInlineData = true;
2023-11-30 08:32:25 +00:00
if (debugManager.flags.EnablePassInlineData.get() != -1) {
checkKernelForInlineData = !!debugManager.flags.EnablePassInlineData.get();
2020-07-06 22:55:37 +02:00
}
if (checkKernelForInlineData) {
return kernelDesc.kernelAttributes.flags.passInlineData;
}
return false;
}
2021-09-24 12:20:21 +00:00
template <typename Family>
2025-04-03 16:02:01 +00:00
void EncodeIndirectParams<Family>::encode(CommandContainer &container, uint64_t crossThreadDataGpuVa, DispatchKernelEncoderI *dispatchInterface, uint64_t implicitArgsGpuPtr, IndirectParamsInInlineDataArgs *outArgs) {
2021-09-24 12:20:21 +00:00
const auto &kernelDescriptor = dispatchInterface->getKernelDescriptor();
2025-04-03 16:02:01 +00:00
if (outArgs) {
for (int i = 0; i < 3; i++) {
if (!NEO::isUndefinedOffset(kernelDescriptor.payloadMappings.dispatchTraits.numWorkGroups[i]) && kernelDescriptor.kernelAttributes.inlineDataPayloadSize > kernelDescriptor.payloadMappings.dispatchTraits.numWorkGroups[i]) {
outArgs->storeGroupCountInInlineData[i] = true;
}
if (!NEO::isUndefinedOffset(kernelDescriptor.payloadMappings.dispatchTraits.globalWorkSize[i]) && kernelDescriptor.kernelAttributes.inlineDataPayloadSize > kernelDescriptor.payloadMappings.dispatchTraits.globalWorkSize[i]) {
outArgs->storeGlobalWorkSizeInInlineData[i] = true;
}
}
if (!NEO::isUndefinedOffset(kernelDescriptor.payloadMappings.dispatchTraits.workDim) && kernelDescriptor.kernelAttributes.inlineDataPayloadSize > kernelDescriptor.payloadMappings.dispatchTraits.workDim) {
outArgs->storeWorkDimInInlineData = true;
}
}
setGroupCountIndirect(container, kernelDescriptor.payloadMappings.dispatchTraits.numWorkGroups, crossThreadDataGpuVa, outArgs);
setGlobalWorkSizeIndirect(container, kernelDescriptor.payloadMappings.dispatchTraits.globalWorkSize, crossThreadDataGpuVa, dispatchInterface->getGroupSize(), outArgs);
2021-09-24 12:20:21 +00:00
UNRECOVERABLE_IF(NEO::isValidOffset(kernelDescriptor.payloadMappings.dispatchTraits.workDim) && (kernelDescriptor.payloadMappings.dispatchTraits.workDim & 0b11) != 0u);
2025-04-03 16:02:01 +00:00
setWorkDimIndirect(container, kernelDescriptor.payloadMappings.dispatchTraits.workDim, crossThreadDataGpuVa, dispatchInterface->getGroupSize(), outArgs);
2021-09-24 12:20:21 +00:00
if (implicitArgsGpuPtr) {
2025-07-25 12:07:26 +00:00
const auto version = dispatchInterface->getImplicitArgs()->v0.header.structVersion;
2025-02-25 09:01:14 +00:00
if (version == 0) {
constexpr CrossThreadDataOffset groupCountOffset[] = {offsetof(ImplicitArgsV0, groupCountX), offsetof(ImplicitArgsV0, groupCountY), offsetof(ImplicitArgsV0, groupCountZ)};
constexpr CrossThreadDataOffset globalSizeOffset[] = {offsetof(ImplicitArgsV0, globalSizeX), offsetof(ImplicitArgsV0, globalSizeY), offsetof(ImplicitArgsV0, globalSizeZ)};
constexpr auto numWorkDimOffset = offsetof(ImplicitArgsV0, numWorkDim);
2025-04-03 16:02:01 +00:00
setGroupCountIndirect(container, groupCountOffset, implicitArgsGpuPtr, nullptr);
setGlobalWorkSizeIndirect(container, globalSizeOffset, implicitArgsGpuPtr, dispatchInterface->getGroupSize(), nullptr);
setWorkDimIndirect(container, numWorkDimOffset, implicitArgsGpuPtr, dispatchInterface->getGroupSize(), nullptr);
2025-02-25 09:01:14 +00:00
} else if (version == 1) {
constexpr CrossThreadDataOffset groupCountOffsetV1[] = {offsetof(ImplicitArgsV1, groupCountX), offsetof(ImplicitArgsV1, groupCountY), offsetof(ImplicitArgsV1, groupCountZ)};
constexpr CrossThreadDataOffset globalSizeOffsetV1[] = {offsetof(ImplicitArgsV1, globalSizeX), offsetof(ImplicitArgsV1, globalSizeY), offsetof(ImplicitArgsV1, globalSizeZ)};
constexpr auto numWorkDimOffsetV1 = offsetof(ImplicitArgsV1, numWorkDim);
2025-04-03 16:02:01 +00:00
setGroupCountIndirect(container, groupCountOffsetV1, implicitArgsGpuPtr, nullptr);
setGlobalWorkSizeIndirect(container, globalSizeOffsetV1, implicitArgsGpuPtr, dispatchInterface->getGroupSize(), nullptr);
setWorkDimIndirect(container, numWorkDimOffsetV1, implicitArgsGpuPtr, dispatchInterface->getGroupSize(), nullptr);
2025-07-22 12:33:29 +00:00
} else if (version == 2) {
constexpr CrossThreadDataOffset groupCountOffsetV2[] = {offsetof(ImplicitArgsV2, groupCountX), offsetof(ImplicitArgsV2, groupCountY), offsetof(ImplicitArgsV2, groupCountZ)};
constexpr CrossThreadDataOffset globalSizeOffsetV2[] = {offsetof(ImplicitArgsV2, globalSizeX), offsetof(ImplicitArgsV2, globalSizeY), offsetof(ImplicitArgsV2, globalSizeZ)};
constexpr auto numWorkDimOffsetV2 = offsetof(ImplicitArgsV2, numWorkDim);
setGroupCountIndirect(container, groupCountOffsetV2, implicitArgsGpuPtr, nullptr);
setGlobalWorkSizeIndirect(container, globalSizeOffsetV2, implicitArgsGpuPtr, dispatchInterface->getGroupSize(), nullptr);
setWorkDimIndirect(container, numWorkDimOffsetV2, implicitArgsGpuPtr, dispatchInterface->getGroupSize(), nullptr);
} else {
UNRECOVERABLE_IF(true);
2025-02-25 09:01:14 +00:00
}
2021-09-24 12:20:21 +00:00
}
2025-04-07 13:59:39 +00:00
if (outArgs && !outArgs->commandsToPatch.empty()) {
auto &commandStream = *container.getCommandStream();
EncodeMiArbCheck<Family>::program(commandStream, true);
auto gpuVa = commandStream.getCurrentGpuAddressPosition() + EncodeBatchBufferStartOrEnd<Family>::getBatchBufferStartSize();
EncodeBatchBufferStartOrEnd<Family>::programBatchBufferStart(&commandStream, gpuVa, !(container.getFlushTaskUsedForImmediate() || container.isUsingPrimaryBuffer()), false, false);
EncodeMiArbCheck<Family>::program(commandStream, false);
}
2021-09-24 12:20:21 +00:00
}
2020-01-17 08:56:05 +01:00
template <typename Family>
2025-04-03 16:02:01 +00:00
void EncodeIndirectParams<Family>::setGroupCountIndirect(CommandContainer &container, const NEO::CrossThreadDataOffset offsets[3], uint64_t crossThreadAddress, IndirectParamsInInlineDataArgs *outArgs) {
2020-10-07 14:22:26 +02:00
for (int i = 0; i < 3; ++i) {
if (NEO::isUndefinedOffset(offsets[i])) {
continue;
}
2025-04-03 16:02:01 +00:00
void **storeCmd = nullptr;
if (outArgs && outArgs->storeGroupCountInInlineData[i]) {
outArgs->commandsToPatch.push_back({});
auto &commandArgs = outArgs->commandsToPatch.back();
storeCmd = &commandArgs.command;
commandArgs.address = offsets[i];
commandArgs.offset = RegisterOffsets::gpgpuDispatchDim[i];
commandArgs.isBcs = false;
commandArgs.workloadPartition = false;
}
EncodeStoreMMIO<Family>::encode(*container.getCommandStream(), RegisterOffsets::gpgpuDispatchDim[i], ptrOffset(crossThreadAddress, offsets[i]), false, storeCmd, false);
2020-10-07 14:22:26 +02:00
}
}
2021-06-25 13:35:30 +00:00
template <typename Family>
2025-04-03 16:02:01 +00:00
void EncodeIndirectParams<Family>::applyInlineDataGpuVA(IndirectParamsInInlineDataArgs &args, uint64_t inlineDataGpuVa) {
for (auto &commandArgs : args.commandsToPatch) {
auto commandToPatch = reinterpret_cast<MI_STORE_REGISTER_MEM *>(commandArgs.command);
EncodeStoreMMIO<Family>::encode(commandToPatch, commandArgs.offset, commandArgs.address + inlineDataGpuVa, commandArgs.workloadPartition, commandArgs.isBcs);
}
}
template <typename Family>
void EncodeIndirectParams<Family>::setWorkDimIndirect(CommandContainer &container, const NEO::CrossThreadDataOffset workDimOffset, uint64_t crossThreadAddress, const uint32_t *groupSize, IndirectParamsInInlineDataArgs *outArgs) {
2021-06-25 13:35:30 +00:00
if (NEO::isValidOffset(workDimOffset)) {
2021-10-06 17:32:04 +00:00
auto dstPtr = ptrOffset(crossThreadAddress, workDimOffset);
2023-12-04 13:53:09 +00:00
constexpr uint32_t resultRegister = RegisterOffsets::csGprR0;
2023-12-19 07:40:17 +00:00
constexpr AluRegisters resultAluRegister = AluRegisters::gpr0;
2021-09-24 12:20:21 +00:00
const uint32_t offset = static_cast<uint32_t>((1ull << 8 * (dstPtr & 0b11)) - 1);
const uint32_t memoryMask = std::numeric_limits<uint32_t>::max() - static_cast<uint32_t>((1ull << 8 * ((dstPtr & 0b11) + 1)) - 1) + offset;
2021-06-25 13:35:30 +00:00
2021-10-05 16:53:00 +00:00
/*
* if ( groupSize[2] > 1 || groupCount[2] > 1 ) { workdim = 3 }
* else if ( groupSize[1] + groupCount[1] > 2 ) { workdim = 2 }
* else { workdim = 1 }
*/
if (groupSize[2] > 1) {
2024-04-19 14:20:27 +00:00
EncodeSetMMIO<Family>::encodeIMM(container, resultRegister, 3 << (8 * (dstPtr & 0b11)), true, false);
2021-10-05 16:53:00 +00:00
} else {
2021-06-25 13:35:30 +00:00
2023-12-04 13:53:09 +00:00
constexpr uint32_t groupCount2Register = RegisterOffsets::csGprR1;
2023-12-19 07:40:17 +00:00
constexpr AluRegisters groupCount2AluRegister = AluRegisters::gpr1;
2021-06-25 13:35:30 +00:00
2023-12-04 13:53:09 +00:00
constexpr uint32_t groupSize1Register = RegisterOffsets::csGprR0;
2023-12-19 07:40:17 +00:00
constexpr AluRegisters groupSize1AluRegister = AluRegisters::gpr0;
2021-06-25 13:35:30 +00:00
2023-12-04 13:53:09 +00:00
constexpr uint32_t groupCount1Register = RegisterOffsets::csGprR1;
2023-12-19 07:40:17 +00:00
constexpr AluRegisters groupCount1AluRegister = AluRegisters::gpr1;
2021-06-25 13:35:30 +00:00
2023-12-19 07:40:17 +00:00
constexpr AluRegisters sumAluRegister = AluRegisters::gpr0;
2021-06-25 13:35:30 +00:00
2023-12-19 07:40:17 +00:00
constexpr AluRegisters workDimEq3AluRegister = AluRegisters::gpr3;
2021-06-25 13:35:30 +00:00
2023-12-19 07:40:17 +00:00
constexpr AluRegisters workDimGe2AluRegister = AluRegisters::gpr4;
2021-06-25 13:35:30 +00:00
2023-12-04 13:53:09 +00:00
constexpr uint32_t constantOneRegister = RegisterOffsets::csGprR5;
2023-12-19 07:40:17 +00:00
constexpr AluRegisters constantOneAluRegister = AluRegisters::gpr5;
2023-12-04 13:53:09 +00:00
constexpr uint32_t constantTwoRegister = RegisterOffsets::csGprR6;
2023-12-19 07:40:17 +00:00
constexpr AluRegisters constantTwoAluRegister = AluRegisters::gpr6;
2021-06-25 13:35:30 +00:00
2023-12-04 13:53:09 +00:00
constexpr uint32_t backupRegister = RegisterOffsets::csGprR7;
2023-12-19 07:40:17 +00:00
constexpr AluRegisters backupAluRegister = AluRegisters::gpr7;
2021-09-24 12:20:21 +00:00
2023-12-04 13:53:09 +00:00
constexpr uint32_t memoryMaskRegister = RegisterOffsets::csGprR8;
2023-12-19 07:40:17 +00:00
constexpr AluRegisters memoryMaskAluRegister = AluRegisters::gpr8;
2021-09-24 12:20:21 +00:00
2023-12-04 13:53:09 +00:00
constexpr uint32_t offsetRegister = RegisterOffsets::csGprR8;
2023-12-19 07:40:17 +00:00
constexpr AluRegisters offsetAluRegister = AluRegisters::gpr8;
2021-09-24 12:20:21 +00:00
if (offset) {
2024-09-04 21:24:17 +00:00
EncodeSetMMIO<Family>::encodeMEM(container, backupRegister, dstPtr, false);
2024-04-19 14:20:27 +00:00
EncodeSetMMIO<Family>::encodeIMM(container, memoryMaskRegister, memoryMask, true, false);
2022-05-16 13:46:02 +00:00
EncodeMath<Family>::bitwiseAnd(container, memoryMaskAluRegister, backupAluRegister, backupAluRegister);
2024-04-19 14:20:27 +00:00
EncodeSetMMIO<Family>::encodeIMM(container, offsetRegister, offset, true, false);
2021-09-24 12:20:21 +00:00
}
2024-04-19 14:20:27 +00:00
EncodeSetMMIO<Family>::encodeIMM(container, constantOneRegister, 1, true, false);
EncodeSetMMIO<Family>::encodeIMM(container, constantTwoRegister, 2, true, false);
2021-10-05 16:53:00 +00:00
2024-09-04 21:24:17 +00:00
EncodeSetMMIO<Family>::encodeREG(container, groupCount2Register, RegisterOffsets::gpgpuDispatchDim[2], false);
2021-10-05 16:53:00 +00:00
2022-05-16 13:46:02 +00:00
EncodeMath<Family>::greaterThan(container, groupCount2AluRegister, constantOneAluRegister, workDimEq3AluRegister);
EncodeMath<Family>::bitwiseAnd(container, workDimEq3AluRegister, constantOneAluRegister, workDimEq3AluRegister);
2021-10-05 16:53:00 +00:00
2024-04-19 14:20:27 +00:00
EncodeSetMMIO<Family>::encodeIMM(container, groupSize1Register, groupSize[1], true, false);
2024-09-04 21:24:17 +00:00
EncodeSetMMIO<Family>::encodeREG(container, groupCount1Register, RegisterOffsets::gpgpuDispatchDim[1], false);
2021-06-25 13:35:30 +00:00
2022-05-16 13:46:02 +00:00
EncodeMath<Family>::addition(container, groupSize1AluRegister, groupCount1AluRegister, sumAluRegister);
EncodeMath<Family>::addition(container, sumAluRegister, workDimEq3AluRegister, sumAluRegister);
EncodeMath<Family>::greaterThan(container, sumAluRegister, constantTwoAluRegister, workDimGe2AluRegister);
EncodeMath<Family>::bitwiseAnd(container, workDimGe2AluRegister, constantOneAluRegister, workDimGe2AluRegister);
2021-06-25 13:35:30 +00:00
2021-09-24 12:20:21 +00:00
if (offset) {
2022-05-16 13:46:02 +00:00
EncodeMath<Family>::addition(container, constantOneAluRegister, offsetAluRegister, constantOneAluRegister);
EncodeMath<Family>::addition(container, workDimEq3AluRegister, offsetAluRegister, workDimEq3AluRegister);
EncodeMath<Family>::bitwiseAnd(container, workDimEq3AluRegister, constantOneAluRegister, workDimEq3AluRegister);
EncodeMath<Family>::addition(container, workDimGe2AluRegister, offsetAluRegister, workDimGe2AluRegister);
EncodeMath<Family>::bitwiseAnd(container, workDimGe2AluRegister, constantOneAluRegister, workDimGe2AluRegister);
2021-09-24 12:20:21 +00:00
}
2024-09-04 21:24:17 +00:00
EncodeSetMMIO<Family>::encodeREG(container, resultRegister, constantOneRegister, false);
2022-05-16 13:46:02 +00:00
EncodeMath<Family>::addition(container, resultAluRegister, workDimGe2AluRegister, resultAluRegister);
EncodeMath<Family>::addition(container, resultAluRegister, workDimEq3AluRegister, resultAluRegister);
2021-09-24 12:20:21 +00:00
if (offset) {
2022-05-16 13:46:02 +00:00
EncodeMath<Family>::addition(container, resultAluRegister, backupAluRegister, resultAluRegister);
2021-09-24 12:20:21 +00:00
}
2021-06-25 13:35:30 +00:00
}
2025-04-03 16:02:01 +00:00
void **storeCmd = nullptr;
if (outArgs && outArgs->storeWorkDimInInlineData) {
outArgs->commandsToPatch.push_back({});
auto &commandArgs = outArgs->commandsToPatch.back();
storeCmd = &commandArgs.command;
commandArgs.address = workDimOffset;
commandArgs.offset = resultRegister;
commandArgs.isBcs = false;
commandArgs.workloadPartition = false;
}
EncodeStoreMMIO<Family>::encode(*container.getCommandStream(), resultRegister, dstPtr, false, storeCmd, false);
2021-06-25 13:35:30 +00:00
}
}
2020-11-27 09:22:59 +00:00
template <typename Family>
2022-09-13 12:47:58 +00:00
bool EncodeSurfaceState<Family>::doBindingTablePrefetch() {
auto enableBindingTablePrefetech = isBindingTablePrefetchPreferred();
2023-11-30 08:32:25 +00:00
if (debugManager.flags.ForceBtpPrefetchMode.get() != -1) {
enableBindingTablePrefetech = static_cast<bool>(debugManager.flags.ForceBtpPrefetchMode.get());
2020-11-27 09:22:59 +00:00
}
2022-09-13 12:47:58 +00:00
return enableBindingTablePrefetech;
}
template <typename Family>
2025-05-29 12:46:10 +00:00
template <typename InterfaceDescriptorType>
void EncodeDispatchKernelWithHeap<Family>::adjustBindingTablePrefetch(InterfaceDescriptorType &interfaceDescriptor, uint32_t samplerCount, uint32_t bindingTableEntryCount) {
2022-09-13 12:47:58 +00:00
auto enablePrefetch = EncodeSurfaceState<Family>::doBindingTablePrefetch();
2020-11-27 09:22:59 +00:00
if (enablePrefetch) {
2025-05-29 12:46:10 +00:00
interfaceDescriptor.setSamplerCount(static_cast<typename InterfaceDescriptorType::SAMPLER_COUNT>((samplerCount + 3) / 4));
2020-11-27 09:22:59 +00:00
interfaceDescriptor.setBindingTableEntryCount(std::min(bindingTableEntryCount, 31u));
} else {
2025-05-29 12:46:10 +00:00
interfaceDescriptor.setSamplerCount(InterfaceDescriptorType::SAMPLER_COUNT::SAMPLER_COUNT_NO_SAMPLERS_USED);
2020-11-27 09:22:59 +00:00
interfaceDescriptor.setBindingTableEntryCount(0u);
}
}
2022-09-26 22:28:10 +00:00
template <typename Family>
2023-02-02 18:57:24 +00:00
size_t EncodeDispatchKernel<Family>::getSizeRequiredDsh(const KernelDescriptor &kernelDescriptor, uint32_t iddCount) {
2022-09-26 22:28:10 +00:00
constexpr auto samplerStateSize = sizeof(typename Family::SAMPLER_STATE);
2022-12-08 17:23:33 +00:00
const auto numSamplers = kernelDescriptor.payloadMappings.samplerTable.numSamplers;
2023-02-02 18:57:24 +00:00
const auto additionalDshSize = additionalSizeRequiredDsh(iddCount);
2022-09-26 22:28:10 +00:00
if (numSamplers == 0U) {
2023-02-02 18:57:24 +00:00
return alignUp(additionalDshSize, EncodeDispatchKernel<Family>::getDefaultDshAlignment());
2022-09-26 22:28:10 +00:00
}
2022-12-08 17:23:33 +00:00
size_t size = kernelDescriptor.payloadMappings.samplerTable.tableOffset -
kernelDescriptor.payloadMappings.samplerTable.borderColor;
2023-02-02 18:57:24 +00:00
size = alignUp(size, EncodeDispatchKernel<Family>::getDefaultDshAlignment());
2022-09-26 22:28:10 +00:00
size += numSamplers * samplerStateSize;
2025-03-18 00:28:35 +00:00
size = alignUp(size, InterfaceDescriptorTraits<INTERFACE_DESCRIPTOR_DATA>::samplerStatePointerAlignSize);
2022-09-26 22:28:10 +00:00
if (additionalDshSize > 0) {
2023-07-17 10:00:40 +00:00
size = alignUp(size, EncodeStates<Family>::alignInterfaceDescriptorData);
2022-09-26 22:28:10 +00:00
size += additionalDshSize;
2023-02-02 18:57:24 +00:00
size = alignUp(size, EncodeDispatchKernel<Family>::getDefaultDshAlignment());
2022-09-26 22:28:10 +00:00
}
return size;
}
template <typename Family>
size_t EncodeDispatchKernel<Family>::getSizeRequiredSsh(const KernelInfo &kernelInfo) {
2023-04-28 09:38:31 +00:00
size_t requiredSshSize = kernelInfo.heapInfo.surfaceStateHeapSize;
2023-06-14 18:26:31 +00:00
bool isBindlessKernel = NEO::KernelDescriptor ::isBindlessAddressingKernel(kernelInfo.kernelDescriptor);
if (isBindlessKernel) {
2023-08-17 08:17:57 +00:00
requiredSshSize = std::max(requiredSshSize, kernelInfo.kernelDescriptor.kernelAttributes.numArgsStateful * sizeof(typename Family::RENDER_SURFACE_STATE));
2023-06-14 18:26:31 +00:00
}
2023-02-02 18:57:24 +00:00
requiredSshSize = alignUp(requiredSshSize, EncodeDispatchKernel<Family>::getDefaultSshAlignment());
2022-09-26 22:28:10 +00:00
return requiredSshSize;
}
2023-02-02 18:57:24 +00:00
template <typename Family>
size_t EncodeDispatchKernel<Family>::getDefaultDshAlignment() {
2024-07-24 15:50:00 +00:00
return Family::cacheLineSize;
2023-02-02 18:57:24 +00:00
}
2020-10-07 14:22:26 +02:00
template <typename Family>
2025-04-03 16:02:01 +00:00
void EncodeIndirectParams<Family>::setGlobalWorkSizeIndirect(CommandContainer &container, const NEO::CrossThreadDataOffset offsets[3], uint64_t crossThreadAddress, const uint32_t *lws, IndirectParamsInInlineDataArgs *outArgs) {
2020-10-07 14:22:26 +02:00
for (int i = 0; i < 3; ++i) {
if (NEO::isUndefinedOffset(offsets[i])) {
continue;
}
2025-04-03 16:02:01 +00:00
EncodeStoreMMIOParams *storeParams = nullptr;
auto patchLocation = ptrOffset(crossThreadAddress, offsets[i]);
if (outArgs && outArgs->storeGlobalWorkSizeInInlineData[i]) {
outArgs->commandsToPatch.push_back({});
storeParams = &outArgs->commandsToPatch.back();
patchLocation = offsets[i];
}
EncodeMathMMIO<Family>::encodeMulRegVal(container, RegisterOffsets::gpgpuDispatchDim[i], lws[i], patchLocation, false, storeParams);
2020-10-07 14:22:26 +02:00
}
2020-01-17 08:56:05 +01:00
}
2021-09-24 12:20:21 +00:00
template <typename Family>
inline size_t EncodeIndirectParams<Family>::getCmdsSizeForSetWorkDimIndirect(const uint32_t *groupSize, bool misaligedPtr) {
2023-12-04 13:53:09 +00:00
constexpr uint32_t aluCmdSize = sizeof(MI_MATH) + sizeof(MI_MATH_ALU_INST_INLINE) * RegisterConstants::numAluInstForReadModifyWrite;
2021-09-24 12:20:21 +00:00
auto requiredSize = sizeof(MI_STORE_REGISTER_MEM) + sizeof(MI_LOAD_REGISTER_IMM);
UNRECOVERABLE_IF(!groupSize);
if (groupSize[2] < 2) {
requiredSize += 2 * sizeof(MI_LOAD_REGISTER_IMM) + 3 * sizeof(MI_LOAD_REGISTER_REG) + 8 * aluCmdSize;
if (misaligedPtr) {
requiredSize += 2 * sizeof(MI_LOAD_REGISTER_IMM) + sizeof(MI_LOAD_REGISTER_MEM) + 7 * aluCmdSize;
}
}
return requiredSize;
}
2023-09-11 15:39:43 +00:00
2020-02-11 15:21:18 +01:00
template <typename Family>
2023-03-10 13:49:06 +00:00
void EncodeSemaphore<Family>::addMiSemaphoreWaitCommand(LinearStream &commandStream,
2020-02-11 15:21:18 +01:00
uint64_t compareAddress,
2023-09-11 15:39:43 +00:00
uint64_t compareData,
2020-10-16 15:58:47 +02:00
COMPARE_OPERATION compareMode,
2023-09-11 15:39:43 +00:00
bool registerPollMode,
2023-09-12 11:42:40 +00:00
bool useQwordData,
2024-02-28 13:51:36 +00:00
bool indirect,
2024-03-26 11:56:45 +00:00
bool switchOnUnsuccessful,
2024-02-28 13:51:36 +00:00
void **outSemWaitCmd) {
2020-02-11 15:21:18 +01:00
auto semaphoreCommand = commandStream.getSpaceForCmd<MI_SEMAPHORE_WAIT>();
2024-02-28 13:51:36 +00:00
if (outSemWaitCmd != nullptr) {
*outSemWaitCmd = semaphoreCommand;
}
2024-03-26 11:56:45 +00:00
programMiSemaphoreWait(semaphoreCommand, compareAddress, compareData, compareMode, registerPollMode, true, useQwordData, indirect, switchOnUnsuccessful);
2023-03-03 19:01:14 +00:00
}
2025-09-22 11:19:51 +00:00
template <typename Family>
void EncodeSemaphore<Family>::programMiSemaphoreWaitCommand(LinearStream *commandStream,
MI_SEMAPHORE_WAIT *semaphoreCommand,
uint64_t compareAddress,
uint64_t compareData,
COMPARE_OPERATION compareMode,
bool registerPollMode,
bool waitMode,
bool useQwordData,
bool indirect,
bool switchOnUnsuccessful) {
if (semaphoreCommand == nullptr) {
DEBUG_BREAK_IF(commandStream == nullptr);
semaphoreCommand = commandStream->getSpaceForCmd<MI_SEMAPHORE_WAIT>();
}
programMiSemaphoreWait(semaphoreCommand, compareAddress, compareData, compareMode, registerPollMode, waitMode, useQwordData, indirect, switchOnUnsuccessful);
}
2023-03-03 19:01:14 +00:00
template <typename Family>
2023-03-10 13:49:06 +00:00
void EncodeSemaphore<Family>::applyMiSemaphoreWaitCommand(LinearStream &commandStream, std::list<void *> &commandsList) {
2023-03-03 19:01:14 +00:00
MI_SEMAPHORE_WAIT *semaphoreCommand = commandStream.getSpaceForCmd<MI_SEMAPHORE_WAIT>();
commandsList.push_back(semaphoreCommand);
2020-02-11 15:21:18 +01:00
}
2020-01-24 14:58:15 +01:00
template <typename Family>
2020-10-01 21:08:46 +02:00
void EncodeAtomic<Family>::programMiAtomic(MI_ATOMIC *atomic,
uint64_t writeAddress,
2020-01-24 14:58:15 +01:00
ATOMIC_OPCODES opcode,
2020-10-01 21:08:46 +02:00
DATA_SIZE dataSize,
uint32_t returnDataControl,
2021-03-04 17:54:14 +00:00
uint32_t csStall,
2024-03-27 16:03:11 +00:00
uint64_t operand1Data,
uint64_t operand2Data) {
2020-04-08 18:33:03 +02:00
MI_ATOMIC cmd = Family::cmdInitAtomic;
cmd.setAtomicOpcode(opcode);
cmd.setDataSize(dataSize);
2021-03-04 17:54:14 +00:00
EncodeAtomic<Family>::setMiAtomicAddress(cmd, writeAddress);
2020-10-01 21:08:46 +02:00
cmd.setReturnDataControl(returnDataControl);
cmd.setCsStall(csStall);
2024-03-27 16:03:11 +00:00
2024-05-21 14:14:07 +00:00
if (opcode == ATOMIC_OPCODES::ATOMIC_4B_MOVE || opcode == ATOMIC_OPCODES::ATOMIC_8B_MOVE || opcode == ATOMIC_OPCODES::ATOMIC_8B_CMP_WR || opcode == ATOMIC_OPCODES::ATOMIC_8B_ADD) {
2021-03-04 17:54:14 +00:00
cmd.setDwordLength(MI_ATOMIC::DWORD_LENGTH::DWORD_LENGTH_INLINE_DATA_1);
cmd.setInlineData(0x1);
2024-03-27 16:03:11 +00:00
cmd.setOperand1DataDword0(getLowPart(operand1Data));
cmd.setOperand1DataDword1(getHighPart(operand1Data));
cmd.setOperand2DataDword0(getLowPart(operand2Data));
cmd.setOperand2DataDword1(getHighPart(operand2Data));
2021-03-04 17:54:14 +00:00
}
2020-04-08 18:33:03 +02:00
*atomic = cmd;
2020-01-24 14:58:15 +01:00
}
2020-10-07 14:22:26 +02:00
template <typename Family>
void EncodeAtomic<Family>::programMiAtomic(LinearStream &commandStream,
uint64_t writeAddress,
ATOMIC_OPCODES opcode,
DATA_SIZE dataSize,
uint32_t returnDataControl,
2021-03-04 17:54:14 +00:00
uint32_t csStall,
2024-03-27 16:03:11 +00:00
uint64_t operand1Data,
uint64_t operand2Data) {
2020-10-01 21:08:46 +02:00
auto miAtomic = commandStream.getSpaceForCmd<MI_ATOMIC>();
2024-03-27 16:03:11 +00:00
EncodeAtomic<Family>::programMiAtomic(miAtomic, writeAddress, opcode, dataSize, returnDataControl, csStall, operand1Data, operand2Data);
2020-10-01 21:08:46 +02:00
}
2020-01-28 13:50:19 +01:00
template <typename Family>
2022-11-10 09:58:55 +00:00
void EncodeBatchBufferStartOrEnd<Family>::programConditionalDataMemBatchBufferStart(LinearStream &commandStream, uint64_t startAddress, uint64_t compareAddress,
2024-04-19 14:20:27 +00:00
uint64_t compareData, CompareOperation compareOperation, bool indirect, bool useQwordData, bool isBcs) {
2024-09-04 21:24:17 +00:00
EncodeSetMMIO<Family>::encodeMEM(commandStream, RegisterOffsets::csGprR7, compareAddress, isBcs);
2022-11-10 09:58:55 +00:00
2023-09-11 17:20:00 +00:00
if (useQwordData) {
2024-09-04 21:24:17 +00:00
EncodeSetMMIO<Family>::encodeMEM(commandStream, RegisterOffsets::csGprR7 + 4, compareAddress + 4, isBcs);
2023-09-11 17:20:00 +00:00
} else {
2024-04-19 14:20:27 +00:00
LriHelper<Family>::program(&commandStream, RegisterOffsets::csGprR7 + 4, 0, true, isBcs);
2023-09-11 17:20:00 +00:00
}
uint32_t compareDataLow = static_cast<uint32_t>(compareData & std::numeric_limits<uint32_t>::max());
uint32_t compareDataHigh = useQwordData ? static_cast<uint32_t>(compareData >> 32) : 0;
2024-04-19 14:20:27 +00:00
LriHelper<Family>::program(&commandStream, RegisterOffsets::csGprR8, compareDataLow, true, isBcs);
LriHelper<Family>::program(&commandStream, RegisterOffsets::csGprR8 + 4, compareDataHigh, true, isBcs);
2022-11-10 09:58:55 +00:00
2024-09-04 21:24:17 +00:00
programConditionalBatchBufferStartBase(commandStream, startAddress, AluRegisters::gpr7, AluRegisters::gpr8, compareOperation, indirect, isBcs);
2022-11-10 09:58:55 +00:00
}
template <typename Family>
void EncodeBatchBufferStartOrEnd<Family>::programConditionalDataRegBatchBufferStart(LinearStream &commandStream, uint64_t startAddress, uint32_t compareReg,
2024-04-19 14:20:27 +00:00
uint64_t compareData, CompareOperation compareOperation, bool indirect, bool useQwordData, bool isBcs) {
2024-09-04 21:24:17 +00:00
EncodeSetMMIO<Family>::encodeREG(commandStream, RegisterOffsets::csGprR7, compareReg, isBcs);
2023-09-11 17:20:00 +00:00
if (useQwordData) {
2024-09-04 21:24:17 +00:00
EncodeSetMMIO<Family>::encodeREG(commandStream, RegisterOffsets::csGprR7 + 4, compareReg + 4, isBcs);
2023-09-11 17:20:00 +00:00
} else {
2024-04-19 14:20:27 +00:00
LriHelper<Family>::program(&commandStream, RegisterOffsets::csGprR7 + 4, 0, true, isBcs);
2023-09-11 17:20:00 +00:00
}
uint32_t compareDataLow = static_cast<uint32_t>(compareData & std::numeric_limits<uint32_t>::max());
uint32_t compareDataHigh = useQwordData ? static_cast<uint32_t>(compareData >> 32) : 0;
2022-11-10 09:58:55 +00:00
2024-04-19 14:20:27 +00:00
LriHelper<Family>::program(&commandStream, RegisterOffsets::csGprR8, compareDataLow, true, isBcs);
LriHelper<Family>::program(&commandStream, RegisterOffsets::csGprR8 + 4, compareDataHigh, true, isBcs);
2022-11-10 09:58:55 +00:00
2024-09-04 21:24:17 +00:00
programConditionalBatchBufferStartBase(commandStream, startAddress, AluRegisters::gpr7, AluRegisters::gpr8, compareOperation, indirect, isBcs);
2022-11-10 09:58:55 +00:00
}
template <typename Family>
void EncodeBatchBufferStartOrEnd<Family>::programConditionalRegRegBatchBufferStart(LinearStream &commandStream, uint64_t startAddress, AluRegisters compareReg0,
2024-09-04 21:24:17 +00:00
AluRegisters compareReg1, CompareOperation compareOperation, bool indirect, bool isBcs) {
2022-11-10 09:58:55 +00:00
2024-09-04 21:24:17 +00:00
programConditionalBatchBufferStartBase(commandStream, startAddress, compareReg0, compareReg1, compareOperation, indirect, isBcs);
2023-04-03 17:57:51 +00:00
}
template <typename Family>
void EncodeBatchBufferStartOrEnd<Family>::programConditionalRegMemBatchBufferStart(LinearStream &commandStream, uint64_t startAddress, uint64_t compareAddress, uint32_t compareReg,
2024-04-19 14:20:27 +00:00
CompareOperation compareOperation, bool indirect, bool isBcs) {
2024-09-04 21:24:17 +00:00
EncodeSetMMIO<Family>::encodeMEM(commandStream, RegisterOffsets::csGprR7, compareAddress, isBcs);
2024-04-19 14:20:27 +00:00
LriHelper<Family>::program(&commandStream, RegisterOffsets::csGprR7 + 4, 0, true, isBcs);
2023-04-03 17:57:51 +00:00
2024-09-04 21:24:17 +00:00
EncodeSetMMIO<Family>::encodeREG(commandStream, RegisterOffsets::csGprR8, compareReg, isBcs);
2024-04-19 14:20:27 +00:00
LriHelper<Family>::program(&commandStream, RegisterOffsets::csGprR8 + 4, 0, true, isBcs);
2023-04-03 17:57:51 +00:00
2024-09-04 21:24:17 +00:00
programConditionalBatchBufferStartBase(commandStream, startAddress, AluRegisters::gpr7, AluRegisters::gpr8, compareOperation, indirect, isBcs);
2022-11-10 09:58:55 +00:00
}
template <typename Family>
void EncodeBatchBufferStartOrEnd<Family>::programConditionalBatchBufferStartBase(LinearStream &commandStream, uint64_t startAddress, AluRegisters regA, AluRegisters regB,
2024-09-04 21:24:17 +00:00
CompareOperation compareOperation, bool indirect, bool isBcs) {
2022-11-10 09:58:55 +00:00
EncodeAluHelper<Family, 4> aluHelper;
2023-12-19 07:40:17 +00:00
aluHelper.setNextAlu(AluRegisters::opcodeLoad, AluRegisters::srca, regA);
aluHelper.setNextAlu(AluRegisters::opcodeLoad, AluRegisters::srcb, regB);
aluHelper.setNextAlu(AluRegisters::opcodeSub);
if ((compareOperation == CompareOperation::equal) || (compareOperation == CompareOperation::notEqual)) {
aluHelper.setNextAlu(AluRegisters::opcodeStore, AluRegisters::gpr7, AluRegisters::zf);
} else if ((compareOperation == CompareOperation::greaterOrEqual) || (compareOperation == CompareOperation::less)) {
aluHelper.setNextAlu(AluRegisters::opcodeStore, AluRegisters::gpr7, AluRegisters::cf);
2023-05-05 10:14:07 +00:00
} else {
UNRECOVERABLE_IF(true);
2022-11-10 09:58:55 +00:00
}
aluHelper.copyToCmdStream(commandStream);
2024-09-04 21:24:17 +00:00
EncodeSetMMIO<Family>::encodeREG(commandStream, RegisterOffsets::csPredicateResult2, RegisterOffsets::csGprR7, isBcs);
2022-11-10 09:58:55 +00:00
2023-12-13 16:09:52 +00:00
MiPredicateType predicateType = MiPredicateType::noopOnResult2Clear; // Equal or Less
2023-12-19 07:40:17 +00:00
if ((compareOperation == CompareOperation::notEqual) || (compareOperation == CompareOperation::greaterOrEqual)) {
2023-12-13 16:09:52 +00:00
predicateType = MiPredicateType::noopOnResult2Set;
2022-11-10 09:58:55 +00:00
}
EncodeMiPredicate<Family>::encode(commandStream, predicateType);
programBatchBufferStart(&commandStream, startAddress, false, indirect, true);
2023-12-13 16:09:52 +00:00
EncodeMiPredicate<Family>::encode(commandStream, MiPredicateType::disable);
2022-11-10 09:58:55 +00:00
}
template <typename Family>
2023-04-05 13:21:45 +00:00
void EncodeBatchBufferStartOrEnd<Family>::programBatchBufferStart(MI_BATCH_BUFFER_START *cmdBuffer, uint64_t address, bool secondLevel, bool indirect, bool predicate) {
2020-01-28 13:50:19 +01:00
MI_BATCH_BUFFER_START cmd = Family::cmdInitBatchBufferStart;
if (secondLevel) {
cmd.setSecondLevelBatchBuffer(MI_BATCH_BUFFER_START::SECOND_LEVEL_BATCH_BUFFER_SECOND_LEVEL_BATCH);
}
cmd.setAddressSpaceIndicator(MI_BATCH_BUFFER_START::ADDRESS_SPACE_INDICATOR_PPGTT);
2021-12-16 18:02:45 +00:00
cmd.setBatchBufferStartAddress(address);
2022-11-10 09:58:55 +00:00
appendBatchBufferStart(cmd, indirect, predicate);
2023-04-05 13:21:45 +00:00
*cmdBuffer = cmd;
}
template <typename Family>
void EncodeBatchBufferStartOrEnd<Family>::programBatchBufferStart(LinearStream *commandStream, uint64_t address, bool secondLevel, bool indirect, bool predicate) {
programBatchBufferStart(commandStream->getSpaceForCmd<MI_BATCH_BUFFER_START>(), address, secondLevel, indirect, predicate);
2020-01-28 13:50:19 +01:00
}
template <typename Family>
2022-09-02 13:10:48 +00:00
void EncodeBatchBufferStartOrEnd<Family>::programBatchBufferEnd(LinearStream &commandStream) {
2020-01-28 13:50:19 +01:00
MI_BATCH_BUFFER_END cmd = Family::cmdInitBatchBufferEnd;
2022-09-02 13:10:48 +00:00
auto buffer = commandStream.getSpaceForCmd<MI_BATCH_BUFFER_END>();
2020-04-08 18:33:03 +02:00
*buffer = cmd;
2020-01-28 13:50:19 +01:00
}
2022-09-02 13:10:48 +00:00
template <typename Family>
void EncodeBatchBufferStartOrEnd<Family>::programBatchBufferEnd(CommandContainer &container) {
programBatchBufferEnd(*container.getCommandStream());
}
2023-03-06 12:42:09 +00:00
template <typename GfxFamily>
void EncodeMiFlushDW<GfxFamily>::appendWa(LinearStream &commandStream, MiFlushArgs &args) {
BlitCommandsHelper<GfxFamily>::dispatchDummyBlit(commandStream, args.waArgs);
}
2021-09-03 11:42:31 +00:00
template <typename Family>
2023-03-06 12:42:09 +00:00
void EncodeMiFlushDW<Family>::programWithWa(LinearStream &commandStream, uint64_t immediateDataGpuAddress, uint64_t immediateData,
MiFlushArgs &args) {
2024-03-12 11:12:51 +00:00
UNRECOVERABLE_IF(args.waArgs.isWaRequired && !args.commandWithPostSync);
2023-03-06 12:42:09 +00:00
appendWa(commandStream, args);
2024-02-29 13:30:12 +00:00
args.waArgs.isWaRequired = false;
2020-03-04 17:31:21 +01:00
2020-02-21 15:35:08 +01:00
auto miFlushDwCmd = commandStream.getSpaceForCmd<MI_FLUSH_DW>();
2021-09-03 11:42:31 +00:00
MI_FLUSH_DW miFlush = Family::cmdInitMiFlushDw;
2021-06-17 11:55:28 +00:00
if (args.commandWithPostSync) {
auto postSyncType = args.timeStampOperation ? MI_FLUSH_DW::POST_SYNC_OPERATION_WRITE_TIMESTAMP_REGISTER : MI_FLUSH_DW::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA_QWORD;
2020-04-08 18:33:03 +02:00
miFlush.setPostSyncOperation(postSyncType);
miFlush.setDestinationAddress(immediateDataGpuAddress);
miFlush.setImmediateData(immediateData);
2020-04-07 16:50:09 +02:00
}
2021-06-17 11:55:28 +00:00
miFlush.setNotifyEnable(args.notifyEnable);
2021-07-29 06:40:42 +00:00
miFlush.setTlbInvalidate(args.tlbFlush);
2023-03-06 12:42:09 +00:00
adjust(&miFlush, args.waArgs.rootDeviceEnvironment->getProductHelper());
2020-04-08 18:33:03 +02:00
*miFlushDwCmd = miFlush;
2020-02-21 15:35:08 +01:00
}
2021-09-03 11:42:31 +00:00
template <typename Family>
2023-03-06 12:42:09 +00:00
size_t EncodeMiFlushDW<Family>::getWaSize(const EncodeDummyBlitWaArgs &waArgs) {
return BlitCommandsHelper<Family>::getDummyBlitSize(waArgs);
}
template <typename Family>
size_t EncodeMiFlushDW<Family>::getCommandSizeWithWa(const EncodeDummyBlitWaArgs &waArgs) {
return sizeof(typename Family::MI_FLUSH_DW) + EncodeMiFlushDW<Family>::getWaSize(waArgs);
2020-03-13 12:29:45 +01:00
}
2020-11-16 17:12:08 +00:00
template <typename Family>
2023-02-28 21:08:09 +00:00
void EncodeMiArbCheck<Family>::program(LinearStream &commandStream, std::optional<bool> preParserDisable) {
2025-07-31 12:54:10 +00:00
auto miArbCheckStream = commandStream.getSpaceForCmd<MI_ARB_CHECK>();
program(miArbCheckStream, preParserDisable);
}
2022-03-09 15:17:47 +00:00
2025-07-31 12:54:10 +00:00
template <typename Family>
void EncodeMiArbCheck<Family>::program(MI_ARB_CHECK *arbCheckCmd, std::optional<bool> preParserDisable) {
MI_ARB_CHECK cmd = Family::cmdInitArbCheck;
2023-02-28 21:08:09 +00:00
EncodeMiArbCheck<Family>::adjust(cmd, preParserDisable);
2025-07-31 12:54:10 +00:00
*arbCheckCmd = cmd;
2020-11-16 17:12:08 +00:00
}
2021-11-17 22:36:00 +00:00
template <typename Family>
inline void EncodeNoop<Family>::alignToCacheLine(LinearStream &commandStream) {
auto used = commandStream.getUsed();
auto alignment = MemoryConstants::cacheLineSize;
auto partialCacheline = used & (alignment - 1);
if (partialCacheline) {
auto amountToPad = alignment - partialCacheline;
auto pCmd = commandStream.getSpace(amountToPad);
memset(pCmd, 0, amountToPad);
}
}
template <typename Family>
inline void EncodeNoop<Family>::emitNoop(LinearStream &commandStream, size_t bytesToUpdate) {
if (bytesToUpdate) {
auto ptr = commandStream.getSpace(bytesToUpdate);
memset(ptr, 0, bytesToUpdate);
}
}
2021-12-02 14:17:45 +00:00
template <typename Family>
inline void EncodeStoreMemory<Family>::programStoreDataImm(LinearStream &commandStream,
uint64_t gpuAddress,
uint32_t dataDword0,
uint32_t dataDword1,
bool storeQword,
2024-03-07 10:51:48 +00:00
bool workloadPartitionOffset,
void **outCmdPtr) {
2021-12-02 14:17:45 +00:00
auto miStoreDataImmBuffer = commandStream.getSpaceForCmd<MI_STORE_DATA_IMM>();
2024-03-07 10:51:48 +00:00
if (outCmdPtr != nullptr) {
*outCmdPtr = miStoreDataImmBuffer;
}
2021-12-02 14:17:45 +00:00
EncodeStoreMemory<Family>::programStoreDataImm(miStoreDataImmBuffer,
gpuAddress,
dataDword0,
dataDword1,
storeQword,
workloadPartitionOffset);
}
2025-09-22 11:19:51 +00:00
template <typename Family>
inline void EncodeStoreMemory<Family>::programStoreDataImmCommand(LinearStream *commandStream,
MI_STORE_DATA_IMM *cmdBuffer,
uint64_t gpuAddress,
uint32_t dataDword0,
uint32_t dataDword1,
bool storeQword,
bool workloadPartitionOffset) {
if (cmdBuffer == nullptr) {
DEBUG_BREAK_IF(commandStream == nullptr);
cmdBuffer = commandStream->getSpaceForCmd<MI_STORE_DATA_IMM>();
}
EncodeStoreMemory<Family>::programStoreDataImm(cmdBuffer,
gpuAddress,
dataDword0,
dataDword1,
storeQword,
workloadPartitionOffset);
}
2025-07-28 20:08:58 +00:00
template <typename Family>
inline void EncodeDataMemory<Family>::programDataMemory(LinearStream &commandStream,
uint64_t dstGpuAddress,
void *srcData,
size_t size) {
size_t bufferSize = getCommandSizeForEncode(size);
2025-08-01 03:45:15 +00:00
void *basePtr = commandStream.getSpace(bufferSize);
void *commandBuffer = basePtr;
2025-07-28 20:08:58 +00:00
EncodeDataMemory<Family>::programDataMemory(commandBuffer, dstGpuAddress, srcData, size);
2025-08-01 03:45:15 +00:00
size_t sizeDiff = ptrDiff(commandBuffer, basePtr);
if (bufferSize > sizeDiff) {
auto paddingSize = bufferSize - sizeDiff;
memset(commandBuffer, 0, paddingSize);
}
2025-07-28 20:08:58 +00:00
}
template <typename Family>
2025-07-30 10:02:01 +00:00
inline void EncodeDataMemory<Family>::programDataMemory(void *&commandBuffer,
2025-07-28 20:08:58 +00:00
uint64_t dstGpuAddress,
void *srcData,
size_t size) {
using MI_STORE_DATA_IMM = typename Family::MI_STORE_DATA_IMM;
2025-08-01 03:45:15 +00:00
2025-07-28 20:08:58 +00:00
auto alignedUpSize = alignUp(size, sizeof(uint32_t));
UNRECOVERABLE_IF(alignedUpSize != size);
2025-08-01 03:45:15 +00:00
const auto alignedUpDstGpuAddress = alignUp(dstGpuAddress, sizeof(uint64_t));
bool useQword = alignedUpDstGpuAddress == dstGpuAddress;
2025-07-28 20:08:58 +00:00
MI_STORE_DATA_IMM *cmdSdi = reinterpret_cast<MI_STORE_DATA_IMM *>(commandBuffer);
uint32_t dataDword0 = 0;
uint32_t dataDword1 = 0;
bool storeQword = false;
size_t step = sizeof(uint32_t);
2025-08-01 03:45:15 +00:00
if (useQword == false) {
if (srcData != nullptr) {
dataDword0 = *reinterpret_cast<uint32_t *>(srcData);
}
EncodeStoreMemory<Family>::programStoreDataImm(cmdSdi, dstGpuAddress, dataDword0, dataDword1, storeQword, false);
size -= step;
dstGpuAddress += step;
if (srcData != nullptr) {
srcData = ptrOffset(srcData, step);
}
cmdSdi++;
}
2025-07-28 20:08:58 +00:00
while (size > 0) {
2025-08-01 03:45:15 +00:00
if (srcData != nullptr) {
dataDword0 = *reinterpret_cast<uint32_t *>(srcData);
}
2025-07-28 20:08:58 +00:00
storeQword = false;
dataDword1 = 0;
step = sizeof(uint32_t);
if (size >= sizeof(uint64_t)) {
2025-08-01 03:45:15 +00:00
if (srcData != nullptr) {
dataDword1 = *(reinterpret_cast<uint32_t *>(srcData) + 1);
}
2025-07-28 20:08:58 +00:00
storeQword = true;
step = sizeof(uint64_t);
}
EncodeStoreMemory<Family>::programStoreDataImm(cmdSdi, dstGpuAddress, dataDword0, dataDword1, storeQword, false);
2025-08-01 03:45:15 +00:00
if (srcData != nullptr) {
srcData = ptrOffset(srcData, step);
}
2025-07-28 20:08:58 +00:00
size -= step;
dstGpuAddress += step;
cmdSdi++;
}
2025-07-30 10:02:01 +00:00
commandBuffer = reinterpret_cast<void *>(cmdSdi);
2025-07-28 20:08:58 +00:00
}
template <typename Family>
inline size_t EncodeDataMemory<Family>::getCommandSizeForEncode(size_t size) {
auto alignedUpSize = alignUp(size, sizeof(uint32_t));
UNRECOVERABLE_IF(alignedUpSize != size);
2025-08-01 03:45:15 +00:00
constexpr size_t storeDataImmSize = EncodeStoreMemory<Family>::getStoreDataImmSize();
// for single dword or qword of data, must reserve one or two dword SDI commands for worst-case scenario
if (size <= sizeof(uint64_t)) {
size_t cmds = size / sizeof(uint32_t);
return storeDataImmSize * cmds;
}
2025-07-28 20:08:58 +00:00
2025-08-01 03:45:15 +00:00
// two dwords are reserved for begin and end SDI dword reminder for worst-case scenario
size_t cmds = 2;
// two dwords are reserved for begin and end SDI dword writes
size_t qwordCapableSize = size - (2 * sizeof(uint32_t));
size_t qwordCmds = (qwordCapableSize / sizeof(uint64_t));
return storeDataImmSize * (cmds + qwordCmds);
2025-07-29 19:21:18 +00:00
}
template <typename Family>
inline void EncodeDataMemory<Family>::programNoop(LinearStream &commandStream,
uint64_t dstGpuAddress, size_t size) {
size_t bufferSize = getCommandSizeForEncode(size);
2025-08-01 03:45:15 +00:00
void *basePtr = commandStream.getSpace(bufferSize);
void *commandBuffer = basePtr;
2025-07-29 19:21:18 +00:00
programNoop(commandBuffer, dstGpuAddress, size);
2025-08-01 03:45:15 +00:00
size_t sizeDiff = ptrDiff(commandBuffer, basePtr);
if (bufferSize > sizeDiff) {
auto paddingSize = bufferSize - sizeDiff;
memset(commandBuffer, 0, paddingSize);
}
2025-07-29 19:21:18 +00:00
}
template <typename Family>
2025-07-30 10:02:01 +00:00
inline void EncodeDataMemory<Family>::programNoop(void *&commandBuffer,
2025-07-29 19:21:18 +00:00
uint64_t dstGpuAddress, size_t size) {
2025-08-01 03:45:15 +00:00
programDataMemory(commandBuffer, dstGpuAddress, nullptr, size);
2025-07-29 19:21:18 +00:00
}
2025-07-28 20:08:58 +00:00
2025-07-29 19:21:18 +00:00
template <typename Family>
inline void EncodeDataMemory<Family>::programBbStart(LinearStream &commandStream,
uint64_t dstGpuAddress, uint64_t address, bool secondLevel, bool indirect, bool predicate) {
using MI_BATCH_BUFFER_START = typename Family::MI_BATCH_BUFFER_START;
2025-08-01 03:45:15 +00:00
// size of dword+qword has the same consumption for best and worst-case scenario, so no need to clean-up possible reminder when gpu address is qword misaligned
static_assert(sizeof(MI_BATCH_BUFFER_START) == (sizeof(uint32_t) + sizeof(uint64_t)), "MI_BATCH_BUFFER_START requires to add cleanup after overestimation");
2025-07-29 19:21:18 +00:00
size_t bufferSize = getCommandSizeForEncode(sizeof(MI_BATCH_BUFFER_START));
void *commandBuffer = commandStream.getSpace(bufferSize);
EncodeDataMemory<Family>::programBbStart(commandBuffer, dstGpuAddress, address, secondLevel, indirect, predicate);
}
template <typename Family>
2025-07-30 10:02:01 +00:00
inline void EncodeDataMemory<Family>::programBbStart(void *&commandBuffer,
2025-07-29 19:21:18 +00:00
uint64_t dstGpuAddress, uint64_t address, bool secondLevel, bool indirect, bool predicate) {
using MI_BATCH_BUFFER_START = typename Family::MI_BATCH_BUFFER_START;
alignas(8) uint8_t bbStartCmdBuffer[sizeof(MI_BATCH_BUFFER_START)];
EncodeBatchBufferStartOrEnd<Family>::programBatchBufferStart(reinterpret_cast<MI_BATCH_BUFFER_START *>(bbStartCmdBuffer), address, secondLevel, indirect, predicate);
programDataMemory(commandBuffer, dstGpuAddress, bbStartCmdBuffer, sizeof(MI_BATCH_BUFFER_START));
2025-07-28 20:08:58 +00:00
}
2022-11-08 11:05:43 +00:00
template <typename Family>
void EncodeMiPredicate<Family>::encode(LinearStream &cmdStream, [[maybe_unused]] MiPredicateType predicateType) {
if constexpr (Family::isUsingMiSetPredicate) {
using MI_SET_PREDICATE = typename Family::MI_SET_PREDICATE;
using PREDICATE_ENABLE = typename MI_SET_PREDICATE::PREDICATE_ENABLE;
auto miSetPredicate = Family::cmdInitSetPredicate;
miSetPredicate.setPredicateEnable(static_cast<PREDICATE_ENABLE>(predicateType));
*cmdStream.getSpaceForCmd<MI_SET_PREDICATE>() = miSetPredicate;
}
}
2023-07-27 12:36:00 +00:00
template <typename Family>
2025-07-18 14:41:31 +00:00
void EncodeUserInterrupt<Family>::encode(LinearStream &commandStream) {
2023-07-27 12:36:00 +00:00
*commandStream.getSpaceForCmd<typename Family::MI_USER_INTERRUPT>() = Family::cmdInitUserInterrupt;
}
2024-11-12 15:23:07 +00:00
template <typename Family>
bool EncodeSurfaceState<Family>::isBindingTablePrefetchPreferred() {
return false;
}
template <typename Family>
void EncodeComputeMode<Family>::adjustPipelineSelect(CommandContainer &container, const NEO::KernelDescriptor &kernelDescriptor) {
PipelineSelectArgs pipelineSelectArgs;
pipelineSelectArgs.systolicPipelineSelectMode = kernelDescriptor.kernelAttributes.flags.usesSystolicPipelineSelectMode;
pipelineSelectArgs.systolicPipelineSelectSupport = container.systolicModeSupportRef();
PreambleHelper<Family>::programPipelineSelect(container.getCommandStream(),
pipelineSelectArgs,
container.getDevice()->getRootDeviceEnvironment());
}
2020-01-24 14:58:15 +01:00
} // namespace NEO