Files
compute-runtime/shared/source/gen12lp/command_encoder_gen12lp.cpp
Zbigniew Zdanowicz 8eaa9d690e add tracking of the state of pipeline select for command lists and queues
This change prepares infrastructure for pipeline select handling in
command lists and queues by optimization of number of commands dispatched.
State is synchronized between flush-task immediate and regular command lists.
Next step is to add optimization itself which disables legacy hw command
dispatch algorithm.
This change corrects ADL-P support for systolic mode changes.

Related-To: NEO-5019

Signed-off-by: Zbigniew Zdanowicz <zbigniew.zdanowicz@intel.com>
2022-09-19 11:57:34 +02:00

137 lines
5.8 KiB
C++

/*
* Copyright (C) 2020-2022 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include "shared/source/command_container/command_encoder.h"
#include "shared/source/command_stream/stream_properties.h"
#include "shared/source/gen12lp/hw_cmds_base.h"
#include "shared/source/gen12lp/reg_configs.h"
#include "shared/source/gmm_helper/gmm_helper.h"
#include "shared/source/helpers/preamble.h"
using Family = NEO::Gen12LpFamily;
#include "shared/source/command_container/command_encoder.inl"
#include "shared/source/command_container/command_encoder_bdw_and_later.inl"
#include "shared/source/command_container/command_encoder_tgllp_and_later.inl"
#include "shared/source/command_container/encode_compute_mode_tgllp_and_later.inl"
#include "shared/source/command_container/image_surface_state/compression_params_bdw_and_later.inl"
#include "shared/source/command_container/image_surface_state/compression_params_tgllp_and_later.inl"
#include "shared/source/command_stream/command_stream_receiver.h"
namespace NEO {
template <>
size_t EncodeWA<Family>::getAdditionalPipelineSelectSize(Device &device, bool isRcs) {
size_t size = 0;
const auto &hwInfoConfig = *HwInfoConfig::get(device.getHardwareInfo().platform.eProductFamily);
if (isRcs && hwInfoConfig.is3DPipelineSelectWARequired()) {
size += 2 * PreambleHelper<Family>::getCmdSizeForPipelineSelect(device.getHardwareInfo());
}
return size;
}
template <>
void EncodeComputeMode<Family>::programComputeModeCommand(LinearStream &csr, StateComputeModeProperties &properties, const HardwareInfo &hwInfo, LogicalStateHelper *logicalStateHelper) {
using STATE_COMPUTE_MODE = typename Family::STATE_COMPUTE_MODE;
using FORCE_NON_COHERENT = typename STATE_COMPUTE_MODE::FORCE_NON_COHERENT;
STATE_COMPUTE_MODE stateComputeMode = Family::cmdInitStateComputeMode;
auto maskBits = stateComputeMode.getMaskBits();
FORCE_NON_COHERENT coherencyValue = (properties.isCoherencyRequired.value == 1) ? FORCE_NON_COHERENT::FORCE_NON_COHERENT_FORCE_DISABLED
: FORCE_NON_COHERENT::FORCE_NON_COHERENT_FORCE_GPU_NON_COHERENT;
stateComputeMode.setForceNonCoherent(coherencyValue);
maskBits |= Family::stateComputeModeForceNonCoherentMask;
stateComputeMode.setMaskBits(maskBits);
auto buffer = csr.getSpace(sizeof(STATE_COMPUTE_MODE));
*reinterpret_cast<STATE_COMPUTE_MODE *>(buffer) = stateComputeMode;
}
template <>
void EncodeWA<Family>::encodeAdditionalPipelineSelect(LinearStream &stream, const PipelineSelectArgs &args,
bool is3DPipeline, const HardwareInfo &hwInfo, bool isRcs) {
const auto &hwInfoConfig = *HwInfoConfig::get(hwInfo.platform.eProductFamily);
if (hwInfoConfig.is3DPipelineSelectWARequired() && isRcs) {
PipelineSelectArgs pipelineSelectArgs = args;
pipelineSelectArgs.is3DPipelineRequired = is3DPipeline;
PreambleHelper<Family>::programPipelineSelect(&stream, pipelineSelectArgs, hwInfo);
}
}
template <>
void EncodeSurfaceState<Family>::encodeExtraBufferParams(EncodeSurfaceStateArgs &args) {
auto surfaceState = reinterpret_cast<R_SURFACE_STATE *>(args.outMemory);
const bool isL3Allowed = surfaceState->getMemoryObjectControlState() == args.gmmHelper->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER);
if (isL3Allowed) {
const bool isConstantSurface = args.allocation && args.allocation->getAllocationType() == AllocationType::CONSTANT_SURFACE;
bool useL1 = args.isReadOnly || isConstantSurface;
if (DebugManager.flags.ForceL1Caching.get() != 1) {
useL1 = false;
}
if (useL1) {
surfaceState->setMemoryObjectControlState(args.gmmHelper->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER_CONST));
}
}
}
template <>
bool EncodeSurfaceState<Family>::isBindingTablePrefetchPreferred() {
return false;
}
template <>
void EncodeL3State<Family>::encode(CommandContainer &container, bool enableSLM) {
}
template <>
void EncodeStoreMMIO<Family>::appendFlags(MI_STORE_REGISTER_MEM *storeRegMem, bool workloadPartition) {
storeRegMem->setMmioRemapEnable(true);
}
template <>
void EncodeComputeMode<Family>::adjustPipelineSelect(CommandContainer &container, const NEO::KernelDescriptor &kernelDescriptor) {
auto &hwInfo = container.getDevice()->getHardwareInfo();
PipelineSelectArgs pipelineSelectArgs;
pipelineSelectArgs.systolicPipelineSelectMode = kernelDescriptor.kernelAttributes.flags.usesSystolicPipelineSelectMode;
pipelineSelectArgs.systolicPipelineSelectSupport = container.systolicModeSupport;
PreambleHelper<Family>::programPipelineSelect(container.getCommandStream(),
pipelineSelectArgs,
hwInfo);
}
template struct EncodeDispatchKernel<Family>;
template struct EncodeStates<Family>;
template struct EncodeMath<Family>;
template struct EncodeMathMMIO<Family>;
template struct EncodeIndirectParams<Family>;
template struct EncodeSetMMIO<Family>;
template struct EncodeL3State<Family>;
template struct EncodeMediaInterfaceDescriptorLoad<Family>;
template struct EncodeStateBaseAddress<Family>;
template struct EncodeStoreMMIO<Family>;
template struct EncodeSurfaceState<Family>;
template struct EncodeAtomic<Family>;
template struct EncodeSempahore<Family>;
template struct EncodeBatchBufferStartOrEnd<Family>;
template struct EncodeMiFlushDW<Family>;
template struct EncodeWA<Family>;
template struct EncodeMemoryPrefetch<Family>;
template struct EncodeMiArbCheck<Family>;
template struct EncodeComputeMode<Family>;
template struct EncodeEnableRayTracing<Family>;
template struct EncodeNoop<Family>;
template struct EncodeStoreMemory<Family>;
template struct EncodeMemoryFence<Family>;
template struct EncodeKernelArgsBuffer<Family>;
} // namespace NEO