/* * Copyright (C) 2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "core/command_container/command_encoder.h" #include "core/command_stream/linear_stream.h" #include "core/helpers/hw_helper.h" #include "core/helpers/preamble.h" #include "core/helpers/register_offsets.h" #include "core/helpers/simd_helper.h" #include "core/helpers/string.h" #include "core/kernel/dispatch_kernel_encoder_interface.h" #include "runtime/device/device.h" #include "runtime/execution_environment/execution_environment.h" #include "runtime/helpers/hardware_commands_helper.h" #include namespace NEO { template uint32_t EncodeStates::copySamplerState(IndirectHeap *dsh, uint32_t samplerStateOffset, uint32_t samplerCount, uint32_t borderColorOffset, const void *fnDynamicStateHeap) { auto sizeSamplerState = sizeof(SAMPLER_STATE) * samplerCount; auto borderColorSize = samplerStateOffset - borderColorOffset; dsh->align(alignIndirectStatePointer); auto borderColorOffsetInDsh = static_cast(dsh->getUsed()); auto borderColor = dsh->getSpace(borderColorSize); memcpy_s(borderColor, borderColorSize, ptrOffset(fnDynamicStateHeap, borderColorOffset), borderColorSize); dsh->align(INTERFACE_DESCRIPTOR_DATA::SAMPLERSTATEPOINTER_ALIGN_SIZE); auto samplerStateOffsetInDsh = static_cast(dsh->getUsed()); auto samplerState = dsh->getSpace(sizeSamplerState); memcpy_s(samplerState, sizeSamplerState, ptrOffset(fnDynamicStateHeap, samplerStateOffset), sizeSamplerState); auto pSmplr = reinterpret_cast(samplerState); for (uint32_t i = 0; i < samplerCount; i++) { pSmplr[i].setIndirectStatePointer((uint32_t)borderColorOffsetInDsh); } return samplerStateOffsetInDsh; } template void EncodeMathMMIO::encodeMulRegVal(CommandContainer &container, uint32_t offset, uint32_t val, uint64_t dstAddress) { int logLws = 0; int addsCount = 0; int i = val; while (val >> logLws) { if (val & (1 << logLws)) { addsCount++; } logLws++; addsCount++; } EncodeSetMMIO::encodeREG(container, CS_GPR_R0, offset); EncodeSetMMIO::encodeIMM(container, CS_GPR_R1, 0); uint32_t length = NUM_ALU_INST_FOR_READ_MODIFY_WRITE * addsCount; auto cmd2 = reinterpret_cast(container.getCommandStream()->getSpace(sizeof(MI_MATH) + sizeof(MI_MATH_ALU_INST_INLINE) * length)); reinterpret_cast(cmd2)->DW0.Value = 0x0; reinterpret_cast(cmd2)->DW0.BitField.InstructionType = MI_MATH::COMMAND_TYPE_MI_COMMAND; reinterpret_cast(cmd2)->DW0.BitField.InstructionOpcode = MI_MATH::MI_COMMAND_OPCODE_MI_MATH; reinterpret_cast(cmd2)->DW0.BitField.DwordLength = length - 1; cmd2++; MI_MATH_ALU_INST_INLINE *pAluParam = reinterpret_cast(cmd2); i = 0; while (i < logLws) { if (val & (1 << i)) { encodeAluAdd(pAluParam, ALU_REGISTER_R_1, ALU_REGISTER_R_0); pAluParam += NUM_ALU_INST_FOR_READ_MODIFY_WRITE; } encodeAluAdd(pAluParam, ALU_REGISTER_R_0, ALU_REGISTER_R_0); pAluParam += NUM_ALU_INST_FOR_READ_MODIFY_WRITE; i++; } EncodeStoreMMIO::encode(container, CS_GPR_R1, dstAddress); } template void EncodeMathMMIO::encodeGreaterThanPredicate(CommandContainer &container, uint64_t lhsVal, uint32_t rhsVal) { EncodeSetMMIO::encodeMEM(container, CS_GPR_R0, lhsVal); EncodeSetMMIO::encodeIMM(container, CS_GPR_R1, rhsVal); size_t size = sizeof(MI_MATH) + sizeof(MI_MATH_ALU_INST_INLINE) * NUM_ALU_INST_FOR_READ_MODIFY_WRITE; auto cmd = reinterpret_cast(container.getCommandStream()->getSpace(size)); reinterpret_cast(cmd)->DW0.Value = 0x0; reinterpret_cast(cmd)->DW0.BitField.InstructionType = MI_MATH::COMMAND_TYPE_MI_COMMAND; reinterpret_cast(cmd)->DW0.BitField.InstructionOpcode = MI_MATH::MI_COMMAND_OPCODE_MI_MATH; reinterpret_cast(cmd)->DW0.BitField.DwordLength = NUM_ALU_INST_FOR_READ_MODIFY_WRITE - 1; cmd++; encodeAluSubStoreCarry(reinterpret_cast(cmd), ALU_REGISTER_R_0, ALU_REGISTER_R_1); EncodeSetMMIO::encodeREG(container, CS_PREDICATE_RESULT, CS_GPR_R0); } template void EncodeMathMMIO::encodeAlu(MI_MATH_ALU_INST_INLINE *pAluParam, uint32_t srcA, uint32_t srcB, uint32_t op, uint32_t dest, uint32_t result) { pAluParam->DW0.BitField.ALUOpcode = ALU_OPCODE_LOAD; pAluParam->DW0.BitField.Operand1 = ALU_REGISTER_R_SRCA; pAluParam->DW0.BitField.Operand2 = srcA; pAluParam++; pAluParam->DW0.BitField.ALUOpcode = ALU_OPCODE_LOAD; pAluParam->DW0.BitField.Operand1 = ALU_REGISTER_R_SRCB; pAluParam->DW0.BitField.Operand2 = srcB; pAluParam++; pAluParam->DW0.BitField.ALUOpcode = op; pAluParam->DW0.BitField.Operand1 = 0; pAluParam->DW0.BitField.Operand2 = 0; pAluParam++; pAluParam->DW0.BitField.ALUOpcode = ALU_OPCODE_STORE; pAluParam->DW0.BitField.Operand1 = dest; pAluParam->DW0.BitField.Operand2 = result; pAluParam++; } template void EncodeMathMMIO::encodeAluSubStoreCarry(MI_MATH_ALU_INST_INLINE *pAluParam, uint32_t regA, uint32_t regB) { encodeAlu(pAluParam, regA, regB, ALU_OPCODE_SUB, ALU_REGISTER_R_CF, regA); } template void EncodeMathMMIO::encodeAluAdd(MI_MATH_ALU_INST_INLINE *pAluParam, uint32_t regA, uint32_t regB) { encodeAlu(pAluParam, regA, regB, ALU_OPCODE_ADD, ALU_REGISTER_R_ACCU, regA); } template void EncodeIndirectParams::setGroupCountIndirect(CommandContainer &container, uint32_t offsets[3], void *crossThreadAddress) { EncodeStoreMMIO::encode(container, GPUGPU_DISPATCHDIMX, ptrOffset(reinterpret_cast(crossThreadAddress), offsets[0])); EncodeStoreMMIO::encode(container, GPUGPU_DISPATCHDIMY, ptrOffset(reinterpret_cast(crossThreadAddress), offsets[1])); EncodeStoreMMIO::encode(container, GPUGPU_DISPATCHDIMZ, ptrOffset(reinterpret_cast(crossThreadAddress), offsets[2])); } template void EncodeIndirectParams::setGroupSizeIndirect(CommandContainer &container, uint32_t offsets[3], void *crossThreadAddress, uint32_t lws[3]) { EncodeMathMMIO::encodeMulRegVal(container, GPUGPU_DISPATCHDIMX, lws[0], ptrOffset(reinterpret_cast(crossThreadAddress), offsets[0])); EncodeMathMMIO::encodeMulRegVal(container, GPUGPU_DISPATCHDIMY, lws[1], ptrOffset(reinterpret_cast(crossThreadAddress), offsets[1])); EncodeMathMMIO::encodeMulRegVal(container, GPUGPU_DISPATCHDIMZ, lws[2], ptrOffset(reinterpret_cast(crossThreadAddress), offsets[2])); } template void EncodeFlush::encode(CommandContainer &container) { PIPE_CONTROL cmd = Family::cmdInitPipeControl; cmd.setCommandStreamerStallEnable(true); cmd.setDcFlushEnable(true); auto buffer = container.getCommandStream()->getSpace(sizeof(cmd)); *(PIPE_CONTROL *)buffer = cmd; } template void EncodeFlush::encodeWithQwordWrite(CommandContainer &container, uint64_t gpuAddress, uint64_t value, bool dcFlushEnable) { PIPE_CONTROL cmd = Family::cmdInitPipeControl; cmd.setPostSyncOperation(POST_SYNC_OPERATION::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA); cmd.setImmediateData(value); cmd.setCommandStreamerStallEnable(true); if (dcFlushEnable) { cmd.setDcFlushEnable(true); } cmd.setAddressHigh(gpuAddress >> 32u); cmd.setAddress(uint32_t(gpuAddress)); auto buffer = container.getCommandStream()->getSpace(sizeof(cmd)); *(PIPE_CONTROL *)buffer = cmd; } template void EncodeSetMMIO::encodeIMM(CommandContainer &container, uint32_t offset, uint32_t data) { MI_LOAD_REGISTER_IMM cmd = Family::cmdInitLoadRegisterImm; cmd.setRegisterOffset(offset); cmd.setDataDword(data); auto buffer = container.getCommandStream()->getSpace(sizeof(cmd)); *(MI_LOAD_REGISTER_IMM *)buffer = cmd; } template void EncodeSetMMIO::encodeMEM(CommandContainer &container, uint32_t offset, uint64_t address) { MI_LOAD_REGISTER_MEM cmd = Family::cmdInitLoadRegisterMem; cmd.setRegisterAddress(offset); cmd.setMemoryAddress(address); auto buffer = container.getCommandStream()->getSpace(sizeof(cmd)); *(MI_LOAD_REGISTER_MEM *)buffer = cmd; } template void EncodeSetMMIO::encodeREG(CommandContainer &container, uint32_t dstOffset, uint32_t srcOffset) { MI_LOAD_REGISTER_REG cmd = Family::cmdInitLoadRegisterReg; cmd.setSourceRegisterAddress(srcOffset); cmd.setDestinationRegisterAddress(dstOffset); auto buffer = container.getCommandStream()->getSpace(sizeof(cmd)); *(MI_LOAD_REGISTER_REG *)buffer = cmd; } template void EncodeStoreMMIO::encode(CommandContainer &container, uint32_t offset, uint64_t address) { MI_STORE_REGISTER_MEM cmd = Family::cmdInitStoreRegisterMem; cmd.setRegisterAddress(offset); cmd.setMemoryAddress(address); auto buffer = container.getCommandStream()->getSpace(sizeof(cmd)); *(MI_STORE_REGISTER_MEM *)buffer = cmd; } template void EncodeSurfaceState::encodeBuffer(void *dst, void *address, size_t size, uint32_t mocs, bool cpuCoherent) { auto ss = reinterpret_cast(dst); UNRECOVERABLE_IF(!isAligned(size)); SURFACE_STATE_BUFFER_LENGTH Length = {0}; Length.Length = static_cast(size - 1); ss->setWidth(Length.SurfaceState.Width + 1); ss->setHeight(Length.SurfaceState.Height + 1); ss->setDepth(Length.SurfaceState.Depth + 1); ss->setSurfaceType((address != nullptr) ? R_SURFACE_STATE::SURFACE_TYPE_SURFTYPE_BUFFER : R_SURFACE_STATE::SURFACE_TYPE_SURFTYPE_NULL); ss->setSurfaceFormat(SURFACE_FORMAT::SURFACE_FORMAT_RAW); ss->setSurfaceVerticalAlignment(R_SURFACE_STATE::SURFACE_VERTICAL_ALIGNMENT_VALIGN_4); ss->setSurfaceHorizontalAlignment(R_SURFACE_STATE::SURFACE_HORIZONTAL_ALIGNMENT_HALIGN_4); ss->setTileMode(R_SURFACE_STATE::TILE_MODE_LINEAR); ss->setVerticalLineStride(0); ss->setVerticalLineStrideOffset(0); ss->setMemoryObjectControlState(mocs); ss->setSurfaceBaseAddress(reinterpret_cast(address)); ss->setCoherencyType(cpuCoherent ? R_SURFACE_STATE::COHERENCY_TYPE_IA_COHERENT : R_SURFACE_STATE::COHERENCY_TYPE_GPU_COHERENT); ss->setAuxiliarySurfaceMode(AUXILIARY_SURFACE_MODE::AUXILIARY_SURFACE_MODE_AUX_NONE); } template void EncodeStates::adjustStateComputeMode(CommandContainer &container) { } template void *EncodeDispatchKernel::getInterfaceDescriptor(CommandContainer &container, uint32_t &iddOffset) { if (container.nextIddInBlock == container.numIddsPerBlock) { container.getIndirectHeap(HeapType::DYNAMIC_STATE)->align(HardwareCommandsHelper::alignInterfaceDescriptorData); container.setIddBlock(container.getHeapSpaceAllowGrow(HeapType::DYNAMIC_STATE, sizeof(INTERFACE_DESCRIPTOR_DATA) * container.numIddsPerBlock)); container.nextIddInBlock = 0; EncodeMediaInterfaceDescriptorLoad::encode(container); } iddOffset = container.nextIddInBlock; auto interfaceDescriptorData = static_cast(container.getIddBlock()); return &interfaceDescriptorData[container.nextIddInBlock++]; } template size_t EncodeStates::getAdjustStateComputeModeSize() { return 0; } template size_t EncodeIndirectParams::getCmdsSizeForIndirectParams() { return 3 * sizeof(typename Family::MI_LOAD_REGISTER_MEM); } template size_t EncodeIndirectParams::getCmdsSizeForSetGroupCountIndirect() { return 3 * (sizeof(MI_STORE_REGISTER_MEM)); } template size_t EncodeIndirectParams::getCmdsSizeForSetGroupSizeIndirect() { return 3 * (sizeof(MI_LOAD_REGISTER_REG) + sizeof(MI_LOAD_REGISTER_IMM) + sizeof(MI_MATH) + sizeof(MI_MATH_ALU_INST_INLINE) + sizeof(MI_STORE_REGISTER_MEM)); } template void EncodeSempahore::programMiSemaphoreWait(MI_SEMAPHORE_WAIT *cmd, uint64_t compareAddress, uint32_t compareData, COMPARE_OPERATION compareMode) { *cmd = Family::cmdInitMiSemaphoreWait; cmd->setCompareOperation(compareMode); cmd->setSemaphoreDataDword(compareData); cmd->setSemaphoreGraphicsAddress(compareAddress); cmd->setWaitMode(MI_SEMAPHORE_WAIT::WAIT_MODE::WAIT_MODE_POLLING_MODE); } template void EncodeAtomic::programMiAtomic(MI_ATOMIC *atomic, uint64_t writeAddress, ATOMIC_OPCODES opcode, DATA_SIZE dataSize) { *atomic = Family::cmdInitAtomic; atomic->setAtomicOpcode(opcode); atomic->setDataSize(dataSize); atomic->setMemoryAddress(static_cast(writeAddress & 0x0000FFFFFFFFULL)); atomic->setMemoryAddressHigh(static_cast(writeAddress >> 32)); } template void EncodeBatchBufferStartOrEnd::programBatchBufferStart(LinearStream *commandStream, uint64_t address, bool secondLevel) { MI_BATCH_BUFFER_START cmd = Family::cmdInitBatchBufferStart; if (secondLevel) { cmd.setSecondLevelBatchBuffer(MI_BATCH_BUFFER_START::SECOND_LEVEL_BATCH_BUFFER_SECOND_LEVEL_BATCH); } cmd.setAddressSpaceIndicator(MI_BATCH_BUFFER_START::ADDRESS_SPACE_INDICATOR_PPGTT); cmd.setBatchBufferStartAddressGraphicsaddress472(address); auto buffer = commandStream->getSpaceForCmd(); *reinterpret_cast(buffer) = cmd; } template void EncodeBatchBufferStartOrEnd::programBatchBufferEnd(CommandContainer &container) { MI_BATCH_BUFFER_END cmd = Family::cmdInitBatchBufferEnd; auto buffer = container.getCommandStream()->getSpace(sizeof(cmd)); *reinterpret_cast(buffer) = cmd; } } // namespace NEO