2021-04-24 00:43:48 +08:00
/*
2024-01-09 02:01:13 +08:00
* Copyright (C) 2020-2024 Intel Corporation
2021-04-24 00:43:48 +08:00
*
* SPDX-License-Identifier: MIT
*
*/
#pragma once
#include "shared/source/command_container/command_encoder.h"
#include "shared/source/command_container/implicit_scaling.h"
#include "shared/source/command_stream/command_stream_receiver.h"
#include "shared/source/command_stream/linear_stream.h"
#include "shared/source/command_stream/preemption.h"
2024-01-25 17:17:03 +08:00
#include "shared/source/command_stream/scratch_space_controller.h"
2021-04-24 00:43:48 +08:00
#include "shared/source/command_stream/stream_properties.h"
#include "shared/source/debug_settings/debug_settings_manager.h"
2022-12-16 01:32:03 +08:00
#include "shared/source/execution_environment/root_device_environment.h"
2021-06-29 01:36:16 +08:00
#include "shared/source/gmm_helper/client_context/gmm_client_context.h"
2021-04-24 00:43:48 +08:00
#include "shared/source/helpers/basic_math.h"
2023-02-25 00:55:21 +08:00
#include "shared/source/helpers/cache_policy.h"
2024-03-29 11:22:39 +08:00
#include "shared/source/helpers/compiler_product_helper.h"
2021-04-24 00:43:48 +08:00
#include "shared/source/helpers/constants.h"
2023-02-02 00:23:01 +08:00
#include "shared/source/helpers/gfx_core_helper.h"
2022-02-04 01:00:25 +08:00
#include "shared/source/helpers/hw_walk_order.h"
2023-12-11 20:10:56 +08:00
#include "shared/source/helpers/in_order_cmd_helpers.h"
2022-07-21 22:51:09 +08:00
#include "shared/source/helpers/pause_on_gpu_properties.h"
2021-12-22 22:11:05 +08:00
#include "shared/source/helpers/pipe_control_args.h"
2021-02-27 06:02:57 +08:00
#include "shared/source/helpers/ray_tracing_helper.h"
2021-04-24 00:43:48 +08:00
#include "shared/source/helpers/simd_helper.h"
#include "shared/source/helpers/state_base_address.h"
#include "shared/source/kernel/dispatch_kernel_encoder_interface.h"
2023-12-15 22:36:11 +08:00
#include "shared/source/kernel/implicit_args_helper.h"
2021-04-24 00:43:48 +08:00
#include "shared/source/kernel/kernel_descriptor.h"
2023-03-10 20:28:11 +08:00
#include "shared/source/os_interface/product_helper.h"
2021-04-24 00:43:48 +08:00
#include <algorithm>
2023-12-07 20:52:24 +08:00
#include <type_traits>
2021-04-24 00:43:48 +08:00
namespace NEO {
2023-12-04 18:51:53 +08:00
constexpr size_t timestampDestinationAddressAlignment = 16;
constexpr size_t immWriteDestinationAddressAlignment = 8;
2021-04-24 00:43:48 +08:00
2022-01-20 19:23:30 +08:00
template <typename Family>
2023-11-23 21:58:58 +08:00
template <typename InterfaceDescriptorType>
2024-03-22 17:39:15 +08:00
void EncodeDispatchKernel<Family>::setGrfInfo(InterfaceDescriptorType *pInterfaceDescriptor, uint32_t grfCount,
2022-04-26 22:28:18 +08:00
const size_t &sizeCrossThreadData, const size_t &sizePerThreadData,
2023-09-23 00:06:50 +08:00
const RootDeviceEnvironment &rootDeviceEnvironment) {
2022-04-26 22:28:18 +08:00
}
2022-01-20 19:23:30 +08:00
2021-04-24 00:43:48 +08:00
template <typename Family>
2023-12-01 22:30:28 +08:00
template <typename WalkerType>
2023-09-13 01:51:43 +08:00
void EncodeDispatchKernel<Family>::encode(CommandContainer &container, EncodeDispatchKernelArgs &args) {
2021-04-24 00:43:48 +08:00
using STATE_BASE_ADDRESS = typename Family::STATE_BASE_ADDRESS;
2024-07-30 06:10:00 +08:00
UNRECOVERABLE_IF(args.makeCommandView && (args.cpuWalkerBuffer == nullptr || args.cpuPayloadBuffer == nullptr));
2023-12-01 22:30:28 +08:00
constexpr bool heaplessModeEnabled = Family::template isHeaplessMode<WalkerType>();
2022-01-13 01:53:00 +08:00
const HardwareInfo &hwInfo = args.device->getHardwareInfo();
2023-01-26 21:39:28 +08:00
auto &rootDeviceEnvironment = args.device->getRootDeviceEnvironment();
2021-04-24 00:43:48 +08:00
2022-01-13 01:53:00 +08:00
const auto &kernelDescriptor = args.dispatchInterface->getKernelDescriptor();
auto sizeCrossThreadData = args.dispatchInterface->getCrossThreadDataSize();
2022-01-20 19:23:30 +08:00
auto sizePerThreadData = args.dispatchInterface->getPerThreadDataSize();
2022-01-13 01:53:00 +08:00
auto sizePerThreadDataForWholeGroup = args.dispatchInterface->getPerThreadDataSizeForWholeThreadGroup();
auto pImplicitArgs = args.dispatchInterface->getImplicitArgs();
2021-04-24 00:43:48 +08:00
LinearStream *listCmdBufferStream = container.getCommandStream();
2022-06-14 21:22:59 +08:00
auto threadDims = static_cast<const uint32_t *>(args.threadGroupDimensions);
2021-04-24 00:43:48 +08:00
const Vec3<size_t> threadStartVec{0, 0, 0};
Vec3<size_t> threadDimsVec{0, 0, 0};
2022-01-13 01:53:00 +08:00
if (!args.isIndirect) {
2021-04-24 00:43:48 +08:00
threadDimsVec = {threadDims[0], threadDims[1], threadDims[2]};
}
2024-07-30 06:10:00 +08:00
if (!args.makeCommandView) {
bool systolicModeRequired = kernelDescriptor.kernelAttributes.flags.usesSystolicPipelineSelectMode;
if (container.systolicModeSupportRef() && (container.lastPipelineSelectModeRequiredRef() != systolicModeRequired)) {
container.lastPipelineSelectModeRequiredRef() = systolicModeRequired;
EncodeComputeMode<Family>::adjustPipelineSelect(container, kernelDescriptor);
}
2021-04-24 00:43:48 +08:00
}
2023-12-01 22:30:28 +08:00
WalkerType walkerCmd = Family::template getInitGpuWalker<WalkerType>();
2021-04-24 00:43:48 +08:00
auto &idd = walkerCmd.getInterfaceDescriptor();
2022-04-26 22:28:18 +08:00
EncodeDispatchKernel<Family>::setGrfInfo(&idd, kernelDescriptor.kernelAttributes.numGrfRequired, sizeCrossThreadData,
2023-09-23 00:06:50 +08:00
sizePerThreadData, rootDeviceEnvironment);
2022-05-17 04:34:53 +08:00
2022-01-13 01:53:00 +08:00
bool localIdsGenerationByRuntime = args.dispatchInterface->requiresGenerationOfLocalIdsByRuntime();
2022-02-22 20:24:30 +08:00
auto requiredWorkgroupOrder = args.dispatchInterface->getRequiredWorkgroupOrder();
2024-04-09 23:25:08 +08:00
2021-04-24 00:43:48 +08:00
{
2024-04-09 23:25:08 +08:00
auto isaAllocation = args.dispatchInterface->getIsaAllocation();
UNRECOVERABLE_IF(nullptr == isaAllocation);
2023-12-01 22:30:28 +08:00
2024-04-09 23:25:08 +08:00
uint64_t kernelStartPointer = args.dispatchInterface->getIsaOffsetInParentAllocation();
2023-12-01 22:30:28 +08:00
if constexpr (heaplessModeEnabled) {
2024-04-09 23:25:08 +08:00
kernelStartPointer += isaAllocation->getGpuAddress();
2023-12-01 22:30:28 +08:00
} else {
2024-04-09 23:25:08 +08:00
kernelStartPointer += isaAllocation->getGpuAddressToPatch();
}
if (!localIdsGenerationByRuntime) {
kernelStartPointer += kernelDescriptor.entryPoints.skipPerThreadDataLoad;
2021-04-24 00:43:48 +08:00
}
2024-04-09 23:25:08 +08:00
idd.setKernelStartPointer(kernelStartPointer);
2021-04-24 00:43:48 +08:00
}
2023-11-08 08:04:40 +08:00
if (args.dispatchInterface->getKernelDescriptor().kernelAttributes.flags.usesAssert && args.device->getL0Debugger() != nullptr) {
2023-06-01 22:03:43 +08:00
idd.setSoftwareExceptionEnable(1);
}
2021-04-24 00:43:48 +08:00
2022-01-13 01:53:00 +08:00
auto threadsPerThreadGroup = args.dispatchInterface->getNumThreadsPerThreadGroup();
2021-04-24 00:43:48 +08:00
idd.setNumberOfThreadsInGpgpuThreadGroup(threadsPerThreadGroup);
EncodeDispatchKernel<Family>::programBarrierEnable(idd,
kernelDescriptor.kernelAttributes.barrierCount,
hwInfo);
2024-07-09 23:10:02 +08:00
EncodeDispatchKernel<Family>::encodeEuSchedulingPolicy(&idd, kernelDescriptor, args.defaultPipelinedThreadArbitrationPolicy);
2024-05-15 18:25:49 +08:00
2024-09-11 06:26:23 +08:00
auto slmSize = EncodeDispatchKernel<Family>::computeSlmValues(hwInfo, args.dispatchInterface->getSlmTotalSize());
2021-04-24 00:43:48 +08:00
2023-11-30 16:32:25 +08:00
if (debugManager.flags.OverrideSlmAllocationSize.get() != -1) {
slmSize = static_cast<uint32_t>(debugManager.flags.OverrideSlmAllocationSize.get());
2021-04-24 00:43:48 +08:00
}
idd.setSharedLocalMemorySize(slmSize);
auto bindingTableStateCount = kernelDescriptor.payloadMappings.bindingTable.numEntries;
2023-12-01 22:30:28 +08:00
bool sshProgrammingRequired = true;
2023-06-09 23:23:03 +08:00
2023-11-23 21:58:58 +08:00
auto &productHelper = args.device->getProductHelper();
2023-06-09 23:23:03 +08:00
if (productHelper.isSkippingStatefulInformationRequired(kernelDescriptor)) {
bindingTableStateCount = 0u;
2023-12-01 22:30:28 +08:00
sshProgrammingRequired = false;
2023-06-09 23:23:03 +08:00
}
2024-07-30 06:10:00 +08:00
if (sshProgrammingRequired && !args.makeCommandView) {
2023-12-01 22:30:28 +08:00
bool isBindlessKernel = NEO::KernelDescriptor::isBindlessAddressingKernel(kernelDescriptor);
if (isBindlessKernel) {
2023-06-27 01:49:16 +08:00
bool globalBindlessSsh = args.device->getBindlessHeapsHelper() != nullptr;
2023-11-07 03:07:42 +08:00
auto sshHeapSize = args.dispatchInterface->getSurfaceStateHeapDataSize();
if (sshHeapSize > 0u) {
2023-06-15 02:26:31 +08:00
auto ssh = args.surfaceStateHeap;
if (ssh == nullptr) {
2023-09-05 22:08:03 +08:00
container.prepareBindfulSsh();
2024-07-24 23:50:00 +08:00
ssh = container.getHeapWithRequiredSizeAndAlignment(HeapType::surfaceState, sshHeapSize, NEO::EncodeDispatchKernel<Family>::getDefaultSshAlignment());
2023-06-15 02:26:31 +08:00
}
2023-11-30 23:30:14 +08:00
2023-06-15 02:26:31 +08:00
uint64_t bindlessSshBaseOffset = ptrDiff(ssh->getSpace(0), ssh->getCpuBase());
2023-09-05 22:08:03 +08:00
if (globalBindlessSsh) {
bindlessSshBaseOffset += ptrDiff(ssh->getGraphicsAllocation()->getGpuAddress(), ssh->getGraphicsAllocation()->getGpuBaseAddress());
}
2023-11-30 23:30:14 +08:00
2024-05-09 22:02:14 +08:00
DEBUG_BREAK_IF(bindingTableStateCount > 0u);
2023-11-30 23:30:14 +08:00
if (bindingTableStateCount == 0) {
// Allocate space for new ssh data
auto dstSurfaceState = ssh->getSpace(sshHeapSize);
memcpy_s(dstSurfaceState, sshHeapSize, args.dispatchInterface->getSurfaceStateHeapData(), sshHeapSize);
}
2023-06-15 02:26:31 +08:00
args.dispatchInterface->patchBindlessOffsetsInCrossThreadData(bindlessSshBaseOffset);
2023-01-26 22:33:36 +08:00
}
2023-12-01 22:30:28 +08:00
} else {
if constexpr (heaplessModeEnabled == false) {
if (bindingTableStateCount > 0u) {
auto ssh = args.surfaceStateHeap;
if (ssh == nullptr) {
2024-07-23 22:40:26 +08:00
container.prepareBindfulSsh();
2024-07-24 23:50:00 +08:00
ssh = container.getHeapWithRequiredSizeAndAlignment(HeapType::surfaceState, args.dispatchInterface->getSurfaceStateHeapDataSize(), NEO::EncodeDispatchKernel<Family>::getDefaultSshAlignment());
2023-12-01 22:30:28 +08:00
}
auto bindingTablePointer = static_cast<uint32_t>(EncodeSurfaceState<Family>::pushBindingTableAndSurfaceStates(
*ssh,
args.dispatchInterface->getSurfaceStateHeapData(),
args.dispatchInterface->getSurfaceStateHeapDataSize(), bindingTableStateCount,
kernelDescriptor.payloadMappings.bindingTable.tableOffset));
idd.setBindingTablePointer(bindingTablePointer);
}
}
2021-04-24 00:43:48 +08:00
}
}
2022-01-13 01:53:00 +08:00
PreemptionHelper::programInterfaceDescriptorDataPreemption<Family>(&idd, args.preemptionMode);
2021-04-24 00:43:48 +08:00
2022-09-30 21:20:48 +08:00
uint32_t samplerCount = 0;
2023-12-01 22:30:28 +08:00
if constexpr (Family::supportsSampler && heaplessModeEnabled == false) {
2024-07-30 06:10:00 +08:00
if (args.device->getDeviceInfo().imageSupport && !args.makeCommandView) {
2023-01-26 22:33:36 +08:00
2022-09-30 21:20:48 +08:00
uint32_t samplerStateOffset = 0;
if (kernelDescriptor.payloadMappings.samplerTable.numSamplers > 0) {
2023-01-26 22:33:36 +08:00
auto dsHeap = args.dynamicStateHeap;
if (dsHeap == nullptr) {
2023-12-14 00:09:52 +08:00
dsHeap = container.getIndirectHeap(HeapType::dynamicState);
2024-07-24 23:50:00 +08:00
auto dshSizeRequired = NEO::EncodeDispatchKernel<Family>::getSizeRequiredDsh(kernelDescriptor, container.getNumIddPerBlock());
if (dsHeap->getAvailableSpace() <= dshSizeRequired) {
dsHeap = container.getHeapWithRequiredSizeAndAlignment(HeapType::dynamicState, dsHeap->getMaxAvailableSpace(), NEO::EncodeDispatchKernel<Family>::getDefaultDshAlignment());
}
2023-01-26 22:33:36 +08:00
}
UNRECOVERABLE_IF(!dsHeap);
2022-09-30 21:20:48 +08:00
samplerCount = kernelDescriptor.payloadMappings.samplerTable.numSamplers;
samplerStateOffset = EncodeStates<Family>::copySamplerState(
2023-01-26 22:33:36 +08:00
dsHeap, kernelDescriptor.payloadMappings.samplerTable.tableOffset,
2022-09-30 21:20:48 +08:00
kernelDescriptor.payloadMappings.samplerTable.numSamplers, kernelDescriptor.payloadMappings.samplerTable.borderColor,
args.dispatchInterface->getDynamicStateHeapData(),
2023-01-26 21:39:28 +08:00
args.device->getBindlessHeapsHelper(), rootDeviceEnvironment);
2024-04-24 22:08:57 +08:00
if (args.device->getBindlessHeapsHelper() && !args.device->getBindlessHeapsHelper()->isGlobalDshSupported()) {
// add offset of graphics allocation base address relative to heap base address
samplerStateOffset += static_cast<uint32_t>(ptrDiff(dsHeap->getGpuBase(), args.device->getBindlessHeapsHelper()->getGlobalHeapsBase()));
}
2024-07-03 00:26:12 +08:00
args.dispatchInterface->patchSamplerBindlessOffsetsInCrossThreadData(samplerStateOffset);
2022-03-28 20:55:12 +08:00
}
2021-04-24 00:43:48 +08:00
2022-09-30 21:20:48 +08:00
idd.setSamplerStatePointer(samplerStateOffset);
}
2022-01-14 07:57:00 +08:00
}
2021-04-24 00:43:48 +08:00
2023-12-01 22:30:28 +08:00
if constexpr (heaplessModeEnabled == false) {
EncodeDispatchKernel<Family>::adjustBindingTablePrefetch(idd, samplerCount, bindingTableStateCount);
}
2022-09-30 21:20:48 +08:00
2021-04-24 00:43:48 +08:00
uint64_t offsetThreadData = 0u;
2023-12-01 22:30:28 +08:00
constexpr uint32_t inlineDataSize = WalkerType::getInlineDataSize();
2022-01-13 01:53:00 +08:00
auto crossThreadData = args.dispatchInterface->getCrossThreadData();
2021-04-24 00:43:48 +08:00
2021-09-24 20:20:21 +08:00
uint32_t inlineDataProgrammingOffset = 0u;
2024-04-09 23:25:08 +08:00
bool inlineDataProgramming = EncodeDispatchKernel<Family>::inlineDataProgrammingRequired(kernelDescriptor);
2021-04-24 00:43:48 +08:00
if (inlineDataProgramming) {
2021-09-24 20:20:21 +08:00
inlineDataProgrammingOffset = std::min(inlineDataSize, sizeCrossThreadData);
2021-04-24 00:43:48 +08:00
auto dest = reinterpret_cast<char *>(walkerCmd.getInlineDataPointer());
2024-04-09 23:25:08 +08:00
memcpy_s(dest, inlineDataSize, crossThreadData, inlineDataProgrammingOffset);
2021-09-24 20:20:21 +08:00
sizeCrossThreadData -= inlineDataProgrammingOffset;
crossThreadData = ptrOffset(crossThreadData, inlineDataProgrammingOffset);
inlineDataProgramming = inlineDataProgrammingOffset != 0;
2021-04-24 00:43:48 +08:00
}
2024-07-05 00:28:20 +08:00
auto scratchAddressForImmediatePatching = EncodeDispatchKernel<Family>::getScratchAddressForImmediatePatching<heaplessModeEnabled>(container, args);
2021-04-24 00:43:48 +08:00
uint32_t sizeThreadData = sizePerThreadDataForWholeGroup + sizeCrossThreadData;
2024-03-22 02:54:41 +08:00
uint32_t sizeForImplicitArgsPatching = NEO::ImplicitArgsHelper::getSizeForImplicitArgsPatching(pImplicitArgs, kernelDescriptor, !localIdsGenerationByRuntime, rootDeviceEnvironment);
2024-08-23 19:39:18 +08:00
uint32_t sizeForImplicitArgsStruct = NEO::ImplicitArgsHelper::getSizeForImplicitArgsStruct(pImplicitArgs, kernelDescriptor, true, rootDeviceEnvironment);
2024-04-24 18:23:29 +08:00
uint32_t iohRequiredSize = sizeThreadData + sizeForImplicitArgsPatching + args.reserveExtraPayloadSpace;
2021-04-24 00:43:48 +08:00
{
2022-10-06 00:27:50 +08:00
void *ptr = nullptr;
2024-07-30 06:10:00 +08:00
if (!args.makeCommandView) {
auto heap = container.getIndirectHeap(HeapType::indirectObject);
UNRECOVERABLE_IF(!heap);
heap->align(Family::cacheLineSize);
if (args.isKernelDispatchedFromImmediateCmdList) {
ptr = container.getHeapWithRequiredSizeAndAlignment(HeapType::indirectObject, iohRequiredSize, Family::indirectDataAlignment)->getSpace(iohRequiredSize);
} else {
ptr = container.getHeapSpaceAllowGrow(HeapType::indirectObject, iohRequiredSize);
}
offsetThreadData = (is64bit ? heap->getHeapGpuStartOffset() : heap->getHeapGpuBase()) + static_cast<uint64_t>(heap->getUsed() - sizeThreadData - args.reserveExtraPayloadSpace);
if (pImplicitArgs) {
2024-08-23 19:39:18 +08:00
offsetThreadData -= sizeForImplicitArgsStruct;
2024-07-30 06:10:00 +08:00
pImplicitArgs->localIdTablePtr = heap->getGraphicsAllocation()->getGpuAddress() + heap->getUsed() - iohRequiredSize;
EncodeDispatchKernel<Family>::patchScratchAddressInImplicitArgs<heaplessModeEnabled>(*pImplicitArgs, scratchAddressForImmediatePatching, args.immediateScratchAddressPatching);
2024-08-06 00:40:51 +08:00
ptr = NEO::ImplicitArgsHelper::patchImplicitArgs(ptr, *pImplicitArgs, kernelDescriptor, std::make_pair(localIdsGenerationByRuntime, requiredWorkgroupOrder), rootDeviceEnvironment, &args.outImplicitArgsPtr);
2024-07-30 06:10:00 +08:00
}
if (args.isIndirect) {
auto gpuPtr = heap->getGraphicsAllocation()->getGpuAddress() + static_cast<uint64_t>(heap->getUsed() - sizeThreadData - inlineDataProgrammingOffset);
uint64_t implicitArgsGpuPtr = 0u;
if (pImplicitArgs) {
2024-08-23 19:39:18 +08:00
implicitArgsGpuPtr = gpuPtr + inlineDataProgrammingOffset - sizeForImplicitArgsStruct;
2024-07-30 06:10:00 +08:00
}
EncodeIndirectParams<Family>::encode(container, gpuPtr, args.dispatchInterface, implicitArgsGpuPtr);
}
2022-10-06 00:27:50 +08:00
} else {
2024-07-30 06:10:00 +08:00
ptr = args.cpuPayloadBuffer;
2021-09-14 00:49:13 +08:00
}
2021-04-24 00:43:48 +08:00
if (sizeCrossThreadData > 0) {
memcpy_s(ptr, sizeCrossThreadData,
crossThreadData, sizeCrossThreadData);
}
2023-12-01 22:30:28 +08:00
2022-01-13 01:53:00 +08:00
auto perThreadDataPtr = args.dispatchInterface->getPerThreadData();
2021-04-24 00:43:48 +08:00
if (perThreadDataPtr != nullptr) {
ptr = ptrOffset(ptr, sizeCrossThreadData);
memcpy_s(ptr, sizePerThreadDataForWholeGroup,
perThreadDataPtr, sizePerThreadDataForWholeGroup);
}
}
2024-07-30 06:10:00 +08:00
if (args.isHeaplessStateInitEnabled == false && !args.makeCommandView) {
2024-03-29 11:22:39 +08:00
if (container.isAnyHeapDirty() ||
args.requiresUncachedMocs) {
PipeControlArgs syncArgs;
syncArgs.dcFlushEnable = args.dcFlushEnable;
MemorySynchronizationCommands<Family>::addSingleBarrier(*container.getCommandStream(), syncArgs);
STATE_BASE_ADDRESS sbaCmd;
auto gmmHelper = container.getDevice()->getGmmHelper();
uint32_t statelessMocsIndex =
args.requiresUncachedMocs ? (gmmHelper->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER_CACHELINE_MISALIGNED) >> 1) : (gmmHelper->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER) >> 1);
auto l1CachePolicy = container.l1CachePolicyDataRef()->getL1CacheValue(false);
auto l1CachePolicyDebuggerActive = container.l1CachePolicyDataRef()->getL1CacheValue(true);
EncodeStateBaseAddressArgs<Family> encodeStateBaseAddressArgs = {
&container, // container
sbaCmd, // sbaCmd
nullptr, // sbaProperties
statelessMocsIndex, // statelessMocsIndex
l1CachePolicy, // l1CachePolicy
l1CachePolicyDebuggerActive, // l1CachePolicyDebuggerActive
args.partitionCount > 1, // multiOsContextCapable
args.isRcs, // isRcs
container.doubleSbaWaRef(), // doubleSbaWa
2024-04-24 22:08:57 +08:00
heaplessModeEnabled // heaplessModeEnabled
2024-03-29 11:22:39 +08:00
};
EncodeStateBaseAddress<Family>::encode(encodeStateBaseAddressArgs);
container.setDirtyStateForAllHeaps(false);
2024-08-20 01:01:03 +08:00
bool sbaTrackingEnabled = NEO::Debugger::isDebugEnabled(args.isInternal) && args.device->getL0Debugger();
NEO::EncodeStateBaseAddress<Family>::setSbaTrackingForL0DebuggerIfEnabled(sbaTrackingEnabled,
*args.device,
*container.getCommandStream(),
sbaCmd, container.isUsingPrimaryBuffer());
2024-03-29 11:22:39 +08:00
}
2021-04-24 00:43:48 +08:00
}
2024-07-30 06:10:00 +08:00
if (!args.makeCommandView) {
if (NEO::PauseOnGpuProperties::pauseModeAllowed(NEO::debugManager.flags.PauseOnEnqueue.get(), args.device->debugExecutionCounter.load(), NEO::PauseOnGpuProperties::PauseMode::BeforeWorkload)) {
void *commandBuffer = listCmdBufferStream->getSpace(MemorySynchronizationCommands<Family>::getSizeForBarrierWithPostSyncOperation(rootDeviceEnvironment, false));
args.additionalCommands->push_back(commandBuffer);
2022-07-21 22:51:09 +08:00
2024-07-30 06:10:00 +08:00
EncodeSemaphore<Family>::applyMiSemaphoreWaitCommand(*listCmdBufferStream, *args.additionalCommands);
}
2022-07-21 22:51:09 +08:00
}
2024-04-09 23:25:08 +08:00
uint8_t *inlineData = reinterpret_cast<uint8_t *>(walkerCmd.getInlineDataPointer());
2024-07-05 00:28:20 +08:00
EncodeDispatchKernel<Family>::programInlineDataHeapless<heaplessModeEnabled>(inlineData, args, container, offsetThreadData, scratchAddressForImmediatePatching);
2024-04-03 06:53:24 +08:00
2024-04-09 23:25:08 +08:00
if constexpr (heaplessModeEnabled == false) {
2024-07-30 06:10:00 +08:00
if (!args.makeCommandView) {
walkerCmd.setIndirectDataStartAddress(static_cast<uint32_t>(offsetThreadData));
walkerCmd.setIndirectDataLength(sizeThreadData);
}
2023-12-01 22:30:28 +08:00
}
2024-08-09 18:50:43 +08:00
container.getIndirectHeap(HeapType::indirectObject)->align(NEO::EncodeDispatchKernel<Family>::getDefaultIOHAlignment());
2021-04-24 00:43:48 +08:00
EncodeDispatchKernel<Family>::encodeThreadData(walkerCmd,
nullptr,
threadDims,
2022-01-13 01:53:00 +08:00
args.dispatchInterface->getGroupSize(),
2021-04-24 00:43:48 +08:00
kernelDescriptor.kernelAttributes.simdSize,
kernelDescriptor.kernelAttributes.numLocalIdChannels,
2022-01-13 01:53:00 +08:00
args.dispatchInterface->getNumThreadsPerThreadGroup(),
args.dispatchInterface->getThreadExecutionMask(),
2021-04-24 00:43:48 +08:00
localIdsGenerationByRuntime,
inlineDataProgramming,
2022-01-13 01:53:00 +08:00
args.isIndirect,
2022-06-15 02:17:04 +08:00
requiredWorkgroupOrder,
2023-01-30 08:09:45 +08:00
rootDeviceEnvironment);
2021-04-24 00:43:48 +08:00
2023-12-13 19:47:42 +08:00
if (args.inOrderExecInfo) {
EncodeDispatchKernel<Family>::setupPostSyncForInOrderExec<WalkerType>(walkerCmd, args);
} else if (args.eventAddress) {
EncodeDispatchKernel<Family>::setupPostSyncForRegularEvent<WalkerType>(walkerCmd, args);
2024-07-26 20:59:09 +08:00
} else {
EncodeDispatchKernel<Family>::forceComputeWalkerPostSyncFlushWithWrite<WalkerType>(walkerCmd);
2021-04-24 00:43:48 +08:00
}
2023-11-30 16:32:25 +08:00
if (debugManager.flags.ForceComputeWalkerPostSyncFlush.get() == 1) {
2023-12-13 19:47:42 +08:00
auto &postSync = walkerCmd.getPostSync();
2022-11-15 02:21:44 +08:00
postSync.setDataportPipelineFlush(true);
2023-12-10 03:26:30 +08:00
postSync.setDataportSubsliceCacheFlush(true);
2022-11-15 02:21:44 +08:00
}
2022-01-13 01:53:00 +08:00
walkerCmd.setPredicateEnable(args.isPredicate);
2021-04-24 00:43:48 +08:00
2022-08-03 20:22:30 +08:00
auto threadGroupCount = walkerCmd.getThreadGroupIdXDimension() * walkerCmd.getThreadGroupIdYDimension() * walkerCmd.getThreadGroupIdZDimension();
2023-04-28 00:58:48 +08:00
EncodeDispatchKernel<Family>::adjustInterfaceDescriptorData(idd, *args.device, hwInfo, threadGroupCount, kernelDescriptor.kernelAttributes.numGrfRequired, walkerCmd);
2023-11-30 16:32:25 +08:00
if (debugManager.flags.PrintKernelDispatchParameters.get()) {
2024-03-22 17:39:15 +08:00
fprintf(stdout, "kernel, %s, grfCount, %d, simdSize, %d, tilesCount, %d, implicitScaling, %s, threadGroupCount, %d, numberOfThreadsInGpgpuThreadGroup, %d, threadGroupDimensions, %d, %d, %d, threadGroupDispatchSize enum, %d\n",
2023-10-17 18:30:47 +08:00
kernelDescriptor.kernelMetadata.kernelName.c_str(),
kernelDescriptor.kernelAttributes.numGrfRequired,
kernelDescriptor.kernelAttributes.simdSize,
args.device->getNumSubDevices(),
ImplicitScalingHelper::isImplicitScalingEnabled(args.device->getDeviceBitfield(), true) ? "Yes" : "no",
threadGroupCount,
idd.getNumberOfThreadsInGpgpuThreadGroup(),
walkerCmd.getThreadGroupIdXDimension(),
walkerCmd.getThreadGroupIdYDimension(),
walkerCmd.getThreadGroupIdZDimension(),
idd.getThreadGroupDispatchSize());
}
2021-04-24 00:43:48 +08:00
2023-01-26 21:39:28 +08:00
EncodeDispatchKernel<Family>::appendAdditionalIDDFields(&idd, rootDeviceEnvironment, threadsPerThreadGroup,
2022-01-13 01:53:00 +08:00
args.dispatchInterface->getSlmTotalSize(),
args.dispatchInterface->getSlmPolicy());
2021-04-24 00:43:48 +08:00
2022-05-28 00:47:43 +08:00
EncodeWalkerArgs walkerArgs{
2023-11-30 18:36:43 +08:00
args.isCooperative ? KernelExecutionType::concurrent : KernelExecutionType::defaultType,
2024-01-09 02:01:13 +08:00
args.requiresSystemMemoryFence(),
2023-12-05 23:21:29 +08:00
kernelDescriptor,
args.requiredDispatchWalkOrder,
2023-12-07 17:35:23 +08:00
args.additionalSizeParam,
args.device->getDeviceInfo().maxFrontEndThreads};
2023-01-26 21:39:28 +08:00
EncodeDispatchKernel<Family>::encodeAdditionalWalkerFields(rootDeviceEnvironment, walkerCmd, walkerArgs);
2021-04-24 00:43:48 +08:00
2024-09-10 21:07:00 +08:00
uint32_t workgroupSize = args.dispatchInterface->getGroupSize()[0] * args.dispatchInterface->getGroupSize()[1] * args.dispatchInterface->getGroupSize()[2];
2024-09-11 23:25:33 +08:00
bool isRequiredWorkGroupOrder = args.requiredDispatchWalkOrder != NEO::RequiredDispatchWalkOrder::none;
2022-11-15 21:48:45 +08:00
if (args.partitionCount > 1 && !args.isInternal) {
2022-01-13 01:53:00 +08:00
const uint64_t workPartitionAllocationGpuVa = args.device->getDefaultEngine().commandStreamReceiver->getWorkPartitionAllocationGpuAddress();
2023-12-22 21:31:50 +08:00
2024-07-29 21:37:30 +08:00
ImplicitScalingDispatchCommandArgs implicitScalingArgs{
2024-09-11 06:26:23 +08:00
workPartitionAllocationGpuVa, // workPartitionAllocationGpuVa
&hwInfo, // hwInfo
&args.outWalkerPtr, // outWalkerPtr
args.requiredPartitionDim, // requiredPartitionDim
args.partitionCount, // partitionCount
2024-09-11 23:25:33 +08:00
workgroupSize, // workgroupSize
args.maxWgCountPerTile, // maxWgCountPerTile
2024-09-11 06:26:23 +08:00
!(container.getFlushTaskUsedForImmediate() || container.isUsingPrimaryBuffer()), // useSecondaryBatchBuffer
!args.isKernelDispatchedFromImmediateCmdList, // apiSelfCleanup
args.dcFlushEnable, // dcFlush
EncodeDispatchKernel<Family>::singleTileExecImplicitScalingRequired(args.isCooperative), // forceExecutionOnSingleTile
2024-09-11 23:25:33 +08:00
args.makeCommandView, // blockDispatchToCommandBuffer
isRequiredWorkGroupOrder}; // isRequiredWorkGroupOrder
2024-07-29 21:37:30 +08:00
2021-04-24 00:43:48 +08:00
ImplicitScalingDispatch<Family>::dispatchCommands(*listCmdBufferStream,
walkerCmd,
2022-01-13 01:53:00 +08:00
args.device->getDeviceBitfield(),
2024-07-29 21:37:30 +08:00
implicitScalingArgs);
args.partitionCount = implicitScalingArgs.partitionCount;
2021-04-24 00:43:48 +08:00
} else {
2022-01-13 01:53:00 +08:00
args.partitionCount = 1;
2024-09-11 23:25:33 +08:00
EncodeDispatchKernel<Family>::setWalkerRegionSettings(walkerCmd, hwInfo, args.partitionCount, workgroupSize, args.maxWgCountPerTile, isRequiredWorkGroupOrder);
2024-09-10 21:07:00 +08:00
2024-07-30 06:10:00 +08:00
if (!args.makeCommandView) {
auto buffer = listCmdBufferStream->getSpaceForCmd<WalkerType>();
args.outWalkerPtr = buffer;
*buffer = walkerCmd;
}
2021-04-24 00:43:48 +08:00
}
2024-04-17 06:23:53 +08:00
if (args.cpuWalkerBuffer) {
*reinterpret_cast<WalkerType *>(args.cpuWalkerBuffer) = walkerCmd;
}
2024-07-30 06:10:00 +08:00
if (!args.makeCommandView) {
2022-07-21 22:51:09 +08:00
2024-07-30 06:10:00 +08:00
if (NEO::PauseOnGpuProperties::pauseModeAllowed(NEO::debugManager.flags.PauseOnEnqueue.get(), args.device->debugExecutionCounter.load(), NEO::PauseOnGpuProperties::PauseMode::AfterWorkload)) {
void *commandBuffer = listCmdBufferStream->getSpace(MemorySynchronizationCommands<Family>::getSizeForBarrierWithPostSyncOperation(rootDeviceEnvironment, false));
args.additionalCommands->push_back(commandBuffer);
2022-07-21 22:51:09 +08:00
2024-07-30 06:10:00 +08:00
EncodeSemaphore<Family>::applyMiSemaphoreWaitCommand(*listCmdBufferStream, *args.additionalCommands);
}
2022-07-21 22:51:09 +08:00
}
2021-04-24 00:43:48 +08:00
}
2023-12-13 19:47:42 +08:00
template <typename Family>
template <typename WalkerType>
void EncodeDispatchKernel<Family>::setupPostSyncForRegularEvent(WalkerType &walkerCmd, const EncodeDispatchKernelArgs &args) {
2024-07-29 20:59:19 +08:00
using POSTSYNC_DATA = typename WalkerType::PostSyncType;
2023-12-13 19:47:42 +08:00
auto &postSync = walkerCmd.getPostSync();
postSync.setDataportPipelineFlush(true);
postSync.setDataportSubsliceCacheFlush(true);
2024-07-11 00:38:13 +08:00
if (NEO::debugManager.flags.ForcePostSyncL1Flush.get() != -1) {
postSync.setDataportPipelineFlush(!!NEO::debugManager.flags.ForcePostSyncL1Flush.get());
postSync.setDataportSubsliceCacheFlush(!!NEO::debugManager.flags.ForcePostSyncL1Flush.get());
}
2023-12-13 19:47:42 +08:00
auto operationType = POSTSYNC_DATA::OPERATION_WRITE_IMMEDIATE_DATA;
uint64_t gpuVa = args.eventAddress;
uint64_t immData = args.postSyncImmValue;
if (args.isTimestampEvent) {
operationType = POSTSYNC_DATA::OPERATION_WRITE_TIMESTAMP;
immData = 0;
UNRECOVERABLE_IF(!(isAligned<timestampDestinationAddressAlignment>(gpuVa)));
} else {
UNRECOVERABLE_IF(!(isAligned<immWriteDestinationAddressAlignment>(gpuVa)));
}
postSync.setOperation(operationType);
postSync.setImmediateData(immData);
postSync.setDestinationAddress(gpuVa);
EncodeDispatchKernel<Family>::setupPostSyncMocs(walkerCmd, args.device->getRootDeviceEnvironment(), args.dcFlushEnable);
EncodeDispatchKernel<Family>::adjustTimestampPacket(walkerCmd, args);
}
template <typename Family>
template <typename WalkerType>
void EncodeDispatchKernel<Family>::setupPostSyncForInOrderExec(WalkerType &walkerCmd, const EncodeDispatchKernelArgs &args) {
2024-07-29 20:59:19 +08:00
using POSTSYNC_DATA = typename WalkerType::PostSyncType;
2023-12-13 19:47:42 +08:00
auto &postSync = walkerCmd.getPostSync();
postSync.setDataportPipelineFlush(true);
postSync.setDataportSubsliceCacheFlush(true);
2024-07-11 00:38:13 +08:00
if (NEO::debugManager.flags.ForcePostSyncL1Flush.get() != -1) {
postSync.setDataportPipelineFlush(!!NEO::debugManager.flags.ForcePostSyncL1Flush.get());
postSync.setDataportSubsliceCacheFlush(!!NEO::debugManager.flags.ForcePostSyncL1Flush.get());
}
2023-12-13 19:47:42 +08:00
2024-01-13 00:27:50 +08:00
uint64_t gpuVa = args.inOrderExecInfo->getBaseDeviceAddress() + args.inOrderExecInfo->getAllocationOffset();
2023-12-13 19:47:42 +08:00
UNRECOVERABLE_IF(!(isAligned<immWriteDestinationAddressAlignment>(gpuVa)));
postSync.setOperation(POSTSYNC_DATA::OPERATION_WRITE_IMMEDIATE_DATA);
postSync.setImmediateData(args.inOrderCounterValue);
postSync.setDestinationAddress(gpuVa);
EncodeDispatchKernel<Family>::setupPostSyncMocs(walkerCmd, args.device->getRootDeviceEnvironment(), args.dcFlushEnable);
EncodeDispatchKernel<Family>::adjustTimestampPacket(walkerCmd, args);
}
2022-03-25 19:24:00 +08:00
template <typename Family>
2023-11-23 21:58:58 +08:00
template <typename WalkerType>
inline void EncodeDispatchKernel<Family>::setupPostSyncMocs(WalkerType &walkerCmd, const RootDeviceEnvironment &rootDeviceEnvironment, bool dcFlush) {
2022-03-25 19:24:00 +08:00
auto &postSyncData = walkerCmd.getPostSync();
auto gmmHelper = rootDeviceEnvironment.getGmmHelper();
2022-10-12 02:47:13 +08:00
if (dcFlush) {
2022-03-25 19:24:00 +08:00
postSyncData.setMocs(gmmHelper->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER_CACHELINE_MISALIGNED));
} else {
postSyncData.setMocs(gmmHelper->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER));
}
2023-11-30 16:32:25 +08:00
if (debugManager.flags.OverridePostSyncMocs.get() != -1) {
postSyncData.setMocs(debugManager.flags.OverridePostSyncMocs.get());
2022-03-25 19:24:00 +08:00
}
}
2021-04-24 00:43:48 +08:00
template <typename Family>
bool EncodeDispatchKernel<Family>::isRuntimeLocalIdsGenerationRequired(uint32_t activeChannels,
2022-02-04 01:00:25 +08:00
const size_t *lws,
2021-04-24 00:43:48 +08:00
std::array<uint8_t, 3> walkOrder,
bool requireInputWalkOrder,
uint32_t &requiredWalkOrder,
uint32_t simd) {
if (simd == 1) {
return true;
}
bool hwGenerationOfLocalIdsEnabled = true;
2023-11-30 16:32:25 +08:00
if (debugManager.flags.EnableHwGenerationLocalIds.get() != -1) {
hwGenerationOfLocalIdsEnabled = !!debugManager.flags.EnableHwGenerationLocalIds.get();
2021-04-24 00:43:48 +08:00
}
if (hwGenerationOfLocalIdsEnabled) {
if (activeChannels == 0) {
return false;
}
size_t totalLwsSize = 1u;
for (auto dimension = 0u; dimension < activeChannels; dimension++) {
totalLwsSize *= lws[dimension];
}
if (totalLwsSize > 1024u) {
return true;
}
2022-07-04 09:28:25 +08:00
// check if we need to follow kernel requirements
2021-04-24 00:43:48 +08:00
if (requireInputWalkOrder) {
for (uint32_t dimension = 0; dimension < activeChannels - 1; dimension++) {
if (!Math::isPow2<size_t>(lws[walkOrder[dimension]])) {
return true;
}
}
auto index = 0u;
2022-02-04 01:00:25 +08:00
while (index < HwWalkOrderHelper::walkOrderPossibilties) {
if (walkOrder[0] == HwWalkOrderHelper::compatibleDimensionOrders[index][0] &&
walkOrder[1] == HwWalkOrderHelper::compatibleDimensionOrders[index][1]) {
2021-04-24 00:43:48 +08:00
break;
2024-04-15 23:10:21 +08:00
}
2021-04-24 00:43:48 +08:00
index++;
}
2022-02-04 01:00:25 +08:00
DEBUG_BREAK_IF(index >= HwWalkOrderHelper::walkOrderPossibilties);
2021-04-24 00:43:48 +08:00
requiredWalkOrder = index;
return false;
}
2022-07-04 09:28:25 +08:00
// kernel doesn't specify any walk order requirements, check if we have any compatible
2022-02-04 01:00:25 +08:00
for (uint32_t walkOrder = 0; walkOrder < HwWalkOrderHelper::walkOrderPossibilties; walkOrder++) {
2021-04-24 00:43:48 +08:00
bool allDimensionsCompatible = true;
for (uint32_t dimension = 0; dimension < activeChannels - 1; dimension++) {
2022-02-04 01:00:25 +08:00
if (!Math::isPow2<size_t>(lws[HwWalkOrderHelper::compatibleDimensionOrders[walkOrder][dimension]])) {
2021-04-24 00:43:48 +08:00
allDimensionsCompatible = false;
break;
}
}
if (allDimensionsCompatible) {
requiredWalkOrder = walkOrder;
return false;
}
}
}
return true;
}
template <typename Family>
2023-12-01 22:30:28 +08:00
template <typename WalkerType>
void EncodeDispatchKernel<Family>::encodeThreadData(WalkerType &walkerCmd,
2021-04-24 00:43:48 +08:00
const uint32_t *startWorkGroup,
const uint32_t *numWorkGroups,
const uint32_t *workGroupSizes,
uint32_t simd,
uint32_t localIdDimensions,
uint32_t threadsPerThreadGroup,
uint32_t threadExecutionMask,
bool localIdsGenerationByRuntime,
bool inlineDataProgrammingRequired,
bool isIndirect,
2022-06-15 02:17:04 +08:00
uint32_t requiredWorkGroupOrder,
2023-01-30 08:09:45 +08:00
const RootDeviceEnvironment &rootDeviceEnvironment) {
2021-04-24 00:43:48 +08:00
if (isIndirect) {
walkerCmd.setIndirectParameterEnable(true);
} else {
walkerCmd.setThreadGroupIdXDimension(static_cast<uint32_t>(numWorkGroups[0]));
walkerCmd.setThreadGroupIdYDimension(static_cast<uint32_t>(numWorkGroups[1]));
walkerCmd.setThreadGroupIdZDimension(static_cast<uint32_t>(numWorkGroups[2]));
}
if (startWorkGroup) {
walkerCmd.setThreadGroupIdStartingX(static_cast<uint32_t>(startWorkGroup[0]));
walkerCmd.setThreadGroupIdStartingY(static_cast<uint32_t>(startWorkGroup[1]));
walkerCmd.setThreadGroupIdStartingZ(static_cast<uint32_t>(startWorkGroup[2]));
}
uint64_t executionMask = threadExecutionMask;
if (executionMask == 0) {
auto workGroupSize = workGroupSizes[0] * workGroupSizes[1] * workGroupSizes[2];
auto remainderSimdLanes = workGroupSize & (simd - 1);
executionMask = maxNBitValue(remainderSimdLanes);
if (!executionMask) {
2023-07-07 17:48:37 +08:00
executionMask = maxNBitValue(isSimd1(simd) ? 32 : simd);
2021-04-24 00:43:48 +08:00
}
}
walkerCmd.setExecutionMask(static_cast<uint32_t>(executionMask));
2023-12-01 22:30:28 +08:00
walkerCmd.setSimdSize(getSimdConfig<WalkerType>(simd));
2021-04-24 00:43:48 +08:00
walkerCmd.setMessageSimd(walkerCmd.getSimdSize());
2023-11-30 16:32:25 +08:00
if (debugManager.flags.ForceSimdMessageSizeInWalker.get() != -1) {
walkerCmd.setMessageSimd(debugManager.flags.ForceSimdMessageSizeInWalker.get());
2022-02-03 17:07:39 +08:00
}
2022-07-04 09:28:25 +08:00
// 1) cross-thread inline data will be put into R1, but if kernel uses local ids, then cross-thread should be put further back
// so whenever local ids are driver or hw generated, reserve space by setting right values for emitLocalIds
// 2) Auto-generation of local ids should be possible, when in fact local ids are used
2021-04-24 00:43:48 +08:00
if (!localIdsGenerationByRuntime && localIdDimensions > 0) {
UNRECOVERABLE_IF(localIdDimensions != 3);
uint32_t emitLocalIdsForDim = (1 << 0) | (1 << 1) | (1 << 2);
walkerCmd.setEmitLocalId(emitLocalIdsForDim);
walkerCmd.setLocalXMaximum(static_cast<uint32_t>(workGroupSizes[0] - 1));
walkerCmd.setLocalYMaximum(static_cast<uint32_t>(workGroupSizes[1] - 1));
walkerCmd.setLocalZMaximum(static_cast<uint32_t>(workGroupSizes[2] - 1));
walkerCmd.setGenerateLocalId(1);
walkerCmd.setWalkOrder(requiredWorkGroupOrder);
}
2023-01-30 08:09:45 +08:00
adjustWalkOrder(walkerCmd, requiredWorkGroupOrder, rootDeviceEnvironment);
2021-04-24 00:43:48 +08:00
if (inlineDataProgrammingRequired == true) {
walkerCmd.setEmitInlineParameter(1);
}
}
2023-01-26 22:33:36 +08:00
template <typename Family>
inline bool EncodeDispatchKernel<Family>::isDshNeeded(const DeviceInfo &deviceInfo) {
if constexpr (Family::supportsSampler) {
return deviceInfo.imageSupport;
}
return false;
}
2021-09-28 18:56:22 +08:00
template <typename Family>
2022-04-20 22:12:20 +08:00
void EncodeStateBaseAddress<Family>::setSbaAddressesForDebugger(NEO::Debugger::SbaAddresses &sbaAddress, const STATE_BASE_ADDRESS &sbaCmd) {
2023-04-28 17:38:31 +08:00
sbaAddress.bindlessSurfaceStateBaseAddress = sbaCmd.getBindlessSurfaceStateBaseAddress();
sbaAddress.dynamicStateBaseAddress = sbaCmd.getDynamicStateBaseAddress();
sbaAddress.generalStateBaseAddress = sbaCmd.getGeneralStateBaseAddress();
sbaAddress.instructionBaseAddress = sbaCmd.getInstructionBaseAddress();
sbaAddress.surfaceStateBaseAddress = sbaCmd.getSurfaceStateBaseAddress();
sbaAddress.indirectObjectBaseAddress = 0;
2021-09-28 18:56:22 +08:00
}
2021-04-24 00:43:48 +08:00
template <typename Family>
2022-08-17 22:58:27 +08:00
void EncodeStateBaseAddress<Family>::encode(EncodeStateBaseAddressArgs<Family> &args) {
2022-08-23 19:48:18 +08:00
auto &device = *args.container->getDevice();
auto gmmHelper = device.getRootDeviceEnvironment().getGmmHelper();
2022-08-17 22:58:27 +08:00
2023-12-14 00:09:52 +08:00
auto dsh = args.container->isHeapDirty(HeapType::dynamicState) ? args.container->getIndirectHeap(HeapType::dynamicState) : nullptr;
auto ioh = args.container->isHeapDirty(HeapType::indirectObject) ? args.container->getIndirectHeap(HeapType::indirectObject) : nullptr;
auto ssh = args.container->isHeapDirty(HeapType::surfaceState) ? args.container->getIndirectHeap(HeapType::surfaceState) : nullptr;
2023-08-04 17:19:46 +08:00
auto isDebuggerActive = device.getDebugger() != nullptr;
2023-03-09 23:22:05 +08:00
bool setGeneralStateBaseAddress = args.sbaProperties ? false : true;
2023-06-27 01:49:16 +08:00
uint64_t globalHeapsBase = 0;
uint64_t bindlessSurfStateBase = 0;
bool useGlobalSshAndDsh = false;
if (device.getBindlessHeapsHelper()) {
bindlessSurfStateBase = device.getBindlessHeapsHelper()->getGlobalHeapsBase();
2024-04-24 22:08:57 +08:00
globalHeapsBase = device.getBindlessHeapsHelper()->getGlobalHeapsBase();
useGlobalSshAndDsh = true;
2023-06-27 01:49:16 +08:00
}
2022-08-17 22:58:27 +08:00
StateBaseAddressHelperArgs<Family> stateBaseAddressHelperArgs = {
2023-02-20 18:27:13 +08:00
0, // generalStateBaseAddress
2022-08-17 22:58:27 +08:00
args.container->getIndirectObjectHeapBaseAddress(), // indirectObjectHeapBaseAddress
args.container->getInstructionHeapBaseAddress(), // instructionHeapBaseAddress
2023-06-27 01:49:16 +08:00
globalHeapsBase, // globalHeapsBaseAddress
2022-08-18 06:33:49 +08:00
0, // surfaceStateBaseAddress
2023-06-27 01:49:16 +08:00
bindlessSurfStateBase, // bindlessSurfaceStateBaseAddress
2022-08-17 22:58:27 +08:00
&args.sbaCmd, // stateBaseAddressCmd
2023-02-20 18:27:13 +08:00
args.sbaProperties, // sbaProperties
2022-08-17 22:58:27 +08:00
dsh, // dsh
ioh, // ioh
ssh, // ssh
gmmHelper, // gmmHelper
args.statelessMocsIndex, // statelessMocsIndex
2023-02-25 00:55:21 +08:00
args.l1CachePolicy, // l1CachePolicy
args.l1CachePolicyDebuggerActive, // l1CachePolicyDebuggerActive
2023-11-30 18:36:43 +08:00
NEO::MemoryCompressionState::notApplicable, // memoryCompressionState
2022-08-17 22:58:27 +08:00
true, // setInstructionStateBaseAddress
2023-03-09 23:22:05 +08:00
setGeneralStateBaseAddress, // setGeneralStateBaseAddress
2023-06-27 01:49:16 +08:00
useGlobalSshAndDsh, // useGlobalHeapsBaseAddress
2022-08-17 22:58:27 +08:00
args.multiOsContextCapable, // isMultiOsContextCapable
2022-08-18 06:33:49 +08:00
false, // areMultipleSubDevicesInContext
2022-08-23 19:48:18 +08:00
false, // overrideSurfaceStateBaseAddress
2023-02-23 05:30:40 +08:00
isDebuggerActive, // isDebuggerActive
2023-11-24 21:39:13 +08:00
args.doubleSbaWa, // doubleSbaWa
args.heaplessModeEnabled // heaplessModeEnabled
2022-08-11 21:36:02 +08:00
};
2022-09-08 01:47:17 +08:00
StateBaseAddressHelper<Family>::programStateBaseAddressIntoCommandStream(stateBaseAddressHelperArgs,
*args.container->getCommandStream());
2021-07-08 23:08:37 +08:00
2023-02-20 18:27:13 +08:00
if (args.sbaProperties) {
if (args.sbaProperties->bindingTablePoolBaseAddress.value != StreamProperty64::initValue) {
StateBaseAddressHelper<Family>::programBindingTableBaseAddress(*args.container->getCommandStream(),
static_cast<uint64_t>(args.sbaProperties->bindingTablePoolBaseAddress.value),
static_cast<uint32_t>(args.sbaProperties->bindingTablePoolSize.value),
gmmHelper);
}
2023-12-14 00:09:52 +08:00
} else if (args.container->isHeapDirty(HeapType::surfaceState) && ssh != nullptr) {
auto heap = args.container->getIndirectHeap(HeapType::surfaceState);
2022-08-17 22:58:27 +08:00
StateBaseAddressHelper<Family>::programBindingTableBaseAddress(*args.container->getCommandStream(),
2022-08-05 07:45:59 +08:00
*heap,
gmmHelper);
2021-04-24 00:43:48 +08:00
}
}
2021-06-23 21:34:56 +08:00
template <typename Family>
2022-08-17 22:58:27 +08:00
size_t EncodeStateBaseAddress<Family>::getRequiredSizeForStateBaseAddress(Device &device, CommandContainer &container, bool isRcs) {
2021-07-08 23:08:37 +08:00
auto &hwInfo = device.getHardwareInfo();
2022-12-29 07:30:03 +08:00
auto &productHelper = device.getProductHelper();
2021-07-08 23:08:37 +08:00
size_t size = sizeof(typename Family::STATE_BASE_ADDRESS);
2022-12-13 00:43:41 +08:00
if (productHelper.isAdditionalStateBaseAddressWARequired(hwInfo)) {
2021-07-08 23:08:37 +08:00
size += sizeof(typename Family::STATE_BASE_ADDRESS);
}
2023-12-14 00:09:52 +08:00
if (container.isHeapDirty(HeapType::surfaceState)) {
2021-07-08 23:08:37 +08:00
size += sizeof(typename Family::_3DSTATE_BINDING_TABLE_POOL_ALLOC);
}
return size;
}
2021-06-23 21:34:56 +08:00
2021-04-24 00:43:48 +08:00
template <typename Family>
void EncodeComputeMode<Family>::adjustPipelineSelect(CommandContainer &container, const NEO::KernelDescriptor &kernelDescriptor) {
2022-09-15 08:28:41 +08:00
PipelineSelectArgs pipelineSelectArgs;
pipelineSelectArgs.systolicPipelineSelectMode = kernelDescriptor.kernelAttributes.flags.usesSystolicPipelineSelectMode;
2023-03-16 18:09:21 +08:00
pipelineSelectArgs.systolicPipelineSelectSupport = container.systolicModeSupportRef();
2021-04-24 00:43:48 +08:00
2022-09-15 08:28:41 +08:00
PreambleHelper<Family>::programPipelineSelect(container.getCommandStream(),
pipelineSelectArgs,
2023-01-26 11:58:18 +08:00
container.getDevice()->getRootDeviceEnvironment());
2021-04-24 00:43:48 +08:00
}
template <typename Family>
2023-02-03 02:57:24 +08:00
inline void EncodeMediaInterfaceDescriptorLoad<Family>::encode(CommandContainer &container, IndirectHeap *childDsh) {
2021-04-24 00:43:48 +08:00
}
template <typename Family>
2023-03-06 20:42:09 +08:00
void EncodeMiFlushDW<Family>::adjust(MI_FLUSH_DW *miFlushDwCmd, const ProductHelper &productHelper) {
2021-04-24 00:43:48 +08:00
miFlushDwCmd->setFlushCcs(1);
miFlushDwCmd->setFlushLlc(1);
}
template <typename Family>
2022-09-13 20:47:58 +08:00
bool EncodeSurfaceState<Family>::isBindingTablePrefetchPreferred() {
2021-04-24 00:43:48 +08:00
return false;
}
2021-09-03 19:42:31 +08:00
template <typename Family>
2021-10-21 09:30:53 +08:00
void EncodeSurfaceState<Family>::encodeExtraBufferParams(EncodeSurfaceStateArgs &args) {
auto surfaceState = reinterpret_cast<R_SURFACE_STATE *>(args.outMemory);
Gmm *gmm = args.allocation ? args.allocation->getDefaultGmm() : nullptr;
2021-04-24 00:43:48 +08:00
uint32_t compressionFormat = 0;
bool setConstCachePolicy = false;
2023-12-11 22:24:36 +08:00
if (args.allocation && args.allocation->getAllocationType() == AllocationType::constantSurface) {
2021-04-24 00:43:48 +08:00
setConstCachePolicy = true;
}
2021-10-21 09:30:53 +08:00
if (surfaceState->getMemoryObjectControlState() == args.gmmHelper->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER) &&
2023-11-30 16:32:25 +08:00
debugManager.flags.ForceL1Caching.get() != 0) {
2021-04-24 00:43:48 +08:00
setConstCachePolicy = true;
}
if (setConstCachePolicy == true) {
2021-10-21 09:30:53 +08:00
surfaceState->setMemoryObjectControlState(args.gmmHelper->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER_CONST));
2021-04-24 00:43:48 +08:00
}
2022-08-23 19:48:18 +08:00
encodeExtraCacheSettings(surfaceState, args);
2021-04-24 00:43:48 +08:00
2021-09-03 19:42:31 +08:00
if (EncodeSurfaceState<Family>::isAuxModeEnabled(surfaceState, gmm)) {
2021-04-24 00:43:48 +08:00
auto resourceFormat = gmm->gmmResourceInfo->getResourceFormat();
2021-10-21 09:30:53 +08:00
compressionFormat = args.gmmHelper->getClientContext()->getSurfaceStateCompressionFormat(resourceFormat);
2021-04-24 00:43:48 +08:00
2023-11-30 16:32:25 +08:00
if (debugManager.flags.ForceBufferCompressionFormat.get() != -1) {
compressionFormat = debugManager.flags.ForceBufferCompressionFormat.get();
2021-04-24 00:43:48 +08:00
}
}
2023-11-30 16:32:25 +08:00
if (debugManager.flags.EnableStatelessCompressionWithUnifiedMemory.get()) {
2022-06-02 05:13:52 +08:00
if (args.allocation && !MemoryPoolHelper::isSystemMemoryPool(args.allocation->getMemoryPool())) {
2021-04-24 00:43:48 +08:00
setCoherencyType(surfaceState, R_SURFACE_STATE::COHERENCY_TYPE_GPU_COHERENT);
setBufferAuxParamsForCCS(surfaceState);
2023-11-30 16:32:25 +08:00
compressionFormat = debugManager.flags.FormatForStatelessCompressionWithUnifiedMemory.get();
2021-04-24 00:43:48 +08:00
}
}
surfaceState->setCompressionFormat(compressionFormat);
}
template <typename Family>
2024-04-18 21:50:40 +08:00
void EncodeSurfaceState<Family>::setCoherencyType(R_SURFACE_STATE *surfaceState, COHERENCY_TYPE coherencyType) {
2021-04-24 00:43:48 +08:00
surfaceState->setCoherencyType(R_SURFACE_STATE::COHERENCY_TYPE_GPU_COHERENT);
}
template <typename Family>
2023-03-10 21:49:06 +08:00
void EncodeSemaphore<Family>::programMiSemaphoreWait(MI_SEMAPHORE_WAIT *cmd,
2021-04-24 00:43:48 +08:00
uint64_t compareAddress,
2023-09-11 23:39:43 +08:00
uint64_t compareData,
2021-04-24 00:43:48 +08:00
COMPARE_OPERATION compareMode,
2023-03-04 03:01:14 +08:00
bool registerPollMode,
2023-09-11 23:39:43 +08:00
bool waitMode,
2023-09-12 19:42:40 +08:00
bool useQwordData,
2024-03-26 19:56:45 +08:00
bool indirect,
bool switchOnUnsuccessful) {
2021-04-24 00:43:48 +08:00
MI_SEMAPHORE_WAIT localCmd = Family::cmdInitMiSemaphoreWait;
localCmd.setCompareOperation(compareMode);
2023-09-11 23:39:43 +08:00
localCmd.setSemaphoreDataDword(static_cast<uint32_t>(compareData));
2021-04-24 00:43:48 +08:00
localCmd.setSemaphoreGraphicsAddress(compareAddress);
2023-03-04 03:01:14 +08:00
localCmd.setWaitMode(waitMode ? MI_SEMAPHORE_WAIT::WAIT_MODE::WAIT_MODE_POLLING_MODE : MI_SEMAPHORE_WAIT::WAIT_MODE::WAIT_MODE_SIGNAL_MODE);
2021-04-24 00:43:48 +08:00
localCmd.setRegisterPollMode(registerPollMode ? MI_SEMAPHORE_WAIT::REGISTER_POLL_MODE::REGISTER_POLL_MODE_REGISTER_POLL : MI_SEMAPHORE_WAIT::REGISTER_POLL_MODE::REGISTER_POLL_MODE_MEMORY_POLL);
2023-09-12 19:42:40 +08:00
localCmd.setIndirectSemaphoreDataDword(indirect);
2021-04-24 00:43:48 +08:00
2024-03-26 19:56:45 +08:00
EncodeSemaphore<Family>::appendSemaphoreCommand(localCmd, compareData, indirect, useQwordData, switchOnUnsuccessful);
2023-09-11 23:39:43 +08:00
2021-04-24 00:43:48 +08:00
*cmd = localCmd;
}
template <typename Family>
2022-03-10 01:15:48 +08:00
inline void EncodeWA<Family>::encodeAdditionalPipelineSelect(LinearStream &stream, const PipelineSelectArgs &args, bool is3DPipeline,
2023-01-26 11:58:18 +08:00
const RootDeviceEnvironment &rootDeviceEnvironment, bool isRcs) {}
2021-04-24 00:43:48 +08:00
template <typename Family>
2022-08-17 22:58:27 +08:00
inline size_t EncodeWA<Family>::getAdditionalPipelineSelectSize(Device &device, bool isRcs) {
2021-04-24 00:43:48 +08:00
return 0u;
}
2022-03-10 01:15:48 +08:00
template <typename Family>
inline void EncodeWA<Family>::addPipeControlPriorToNonPipelinedStateCommand(LinearStream &commandStream, PipeControlArgs args,
2023-01-06 00:57:56 +08:00
const RootDeviceEnvironment &rootDeviceEnvironment, bool isRcs) {
auto &productHelper = rootDeviceEnvironment.getHelper<ProductHelper>();
2023-05-12 22:49:39 +08:00
auto *releaseHelper = rootDeviceEnvironment.getReleaseHelper();
2023-01-06 00:57:56 +08:00
auto &hwInfo = *rootDeviceEnvironment.getHardwareInfo();
2023-05-12 22:49:39 +08:00
const auto &[isBasicWARequired, isExtendedWARequired] = productHelper.isPipeControlPriorToNonPipelinedStateCommandsWARequired(hwInfo, isRcs, releaseHelper);
2022-03-10 01:15:48 +08:00
if (isExtendedWARequired) {
args.textureCacheInvalidationEnable = true;
args.hdcPipelineFlush = true;
args.amfsFlushEnable = true;
args.instructionCacheInvalidateEnable = true;
args.constantCacheInvalidationEnable = true;
args.stateCacheInvalidationEnable = true;
args.dcFlushEnable = false;
NEO::EncodeWA<Family>::setAdditionalPipeControlFlagsForNonPipelineStateCommand(args);
} else if (isBasicWARequired) {
args.hdcPipelineFlush = true;
NEO::EncodeWA<Family>::setAdditionalPipeControlFlagsForNonPipelineStateCommand(args);
}
2022-07-21 22:28:10 +08:00
MemorySynchronizationCommands<Family>::addSingleBarrier(commandStream, args);
2022-03-10 01:15:48 +08:00
}
2021-12-02 22:17:45 +08:00
2022-04-21 20:05:17 +08:00
template <typename Family>
2023-03-01 05:08:09 +08:00
void EncodeWA<Family>::adjustCompressionFormatForPlanarImage(uint32_t &compressionFormat, int plane) {
static_assert(sizeof(plane) == sizeof(GMM_YUV_PLANE_ENUM));
2022-04-21 20:05:17 +08:00
if (plane == GMM_PLANE_Y) {
compressionFormat &= 0xf;
} else if ((plane == GMM_PLANE_U) || (plane == GMM_PLANE_V)) {
compressionFormat |= 0x10;
}
}
2021-12-02 22:17:45 +08:00
template <typename Family>
inline void EncodeStoreMemory<Family>::programStoreDataImm(MI_STORE_DATA_IMM *cmdBuffer,
uint64_t gpuAddress,
uint32_t dataDword0,
uint32_t dataDword1,
bool storeQword,
bool workloadPartitionOffset) {
MI_STORE_DATA_IMM storeDataImmediate = Family::cmdInitStoreDataImm;
storeDataImmediate.setAddress(gpuAddress);
storeDataImmediate.setStoreQword(storeQword);
storeDataImmediate.setDataDword0(dataDword0);
if (storeQword) {
storeDataImmediate.setDataDword1(dataDword1);
storeDataImmediate.setDwordLength(MI_STORE_DATA_IMM::DWORD_LENGTH::DWORD_LENGTH_STORE_QWORD);
} else {
storeDataImmediate.setDwordLength(MI_STORE_DATA_IMM::DWORD_LENGTH::DWORD_LENGTH_STORE_DWORD);
}
storeDataImmediate.setWorkloadPartitionIdOffsetEnable(workloadPartitionOffset);
2024-04-24 18:13:00 +08:00
EncodeStoreMemory<Family>::encodeForceCompletionCheck(storeDataImmediate);
2021-12-02 22:17:45 +08:00
*cmdBuffer = storeDataImmediate;
}
2022-04-06 20:35:32 +08:00
template <typename Family>
inline void EncodeStoreMMIO<Family>::appendFlags(MI_STORE_REGISTER_MEM *storeRegMem, bool workloadPartition) {
storeRegMem->setMmioRemapEnable(true);
storeRegMem->setWorkloadPartitionIdOffsetEnable(workloadPartition);
}
2022-05-27 01:53:33 +08:00
template <typename Family>
2023-12-01 22:30:28 +08:00
template <typename WalkerType>
void EncodeDispatchKernel<Family>::adjustWalkOrder(WalkerType &walkerCmd, uint32_t requiredWorkGroupOrder, const RootDeviceEnvironment &rootDeviceEnvironment) {}
2022-05-27 01:53:33 +08:00
2022-09-27 06:28:10 +08:00
template <typename Family>
2023-02-03 02:57:24 +08:00
size_t EncodeDispatchKernel<Family>::additionalSizeRequiredDsh(uint32_t iddCount) {
2022-09-27 06:28:10 +08:00
return 0u;
}
2023-03-16 08:12:49 +08:00
2024-03-28 04:10:32 +08:00
template <typename Family>
inline size_t EncodeDispatchKernel<Family>::getInlineDataOffset(EncodeDispatchKernelArgs &args) {
using DefaultWalkerType = typename Family::DefaultWalkerType;
return offsetof(DefaultWalkerType, TheStructure.Common.InlineData);
}
2024-07-26 20:59:09 +08:00
template <typename Family>
template <typename WalkerType>
void EncodeDispatchKernel<Family>::forceComputeWalkerPostSyncFlushWithWrite(WalkerType &walkerCmd) {
using PostSyncType = typename WalkerType::PostSyncType;
using OperationType = typename PostSyncType::OPERATION;
if (debugManager.flags.ForceComputeWalkerPostSyncFlushWithWrite.get() != -1) {
auto &postSync = walkerCmd.getPostSync();
postSync.setDataportPipelineFlush(true);
postSync.setDataportSubsliceCacheFlush(true);
postSync.setDestinationAddress(static_cast<uint64_t>(debugManager.flags.ForceComputeWalkerPostSyncFlushWithWrite.get()));
postSync.setOperation(OperationType::OPERATION_WRITE_IMMEDIATE_DATA);
postSync.setImmediateData(0u);
}
}
2024-09-11 06:26:23 +08:00
template <typename Family>
uint32_t EncodeDispatchKernel<Family>::alignSlmSize(uint32_t slmSize) {
const uint32_t alignedSlmSizes[] = {
0u,
1u * MemoryConstants::kiloByte,
2u * MemoryConstants::kiloByte,
4u * MemoryConstants::kiloByte,
8u * MemoryConstants::kiloByte,
16u * MemoryConstants::kiloByte,
24u * MemoryConstants::kiloByte,
32u * MemoryConstants::kiloByte,
48u * MemoryConstants::kiloByte,
64u * MemoryConstants::kiloByte,
96u * MemoryConstants::kiloByte,
128u * MemoryConstants::kiloByte,
};
for (auto &alignedSlmSize : alignedSlmSizes) {
if (slmSize <= alignedSlmSize) {
return alignedSlmSize;
}
}
UNRECOVERABLE_IF(true);
return 0;
}
template <typename Family>
uint32_t EncodeDispatchKernel<Family>::computeSlmValues(const HardwareInfo &hwInfo, uint32_t slmSize) {
using SHARED_LOCAL_MEMORY_SIZE = typename Family::INTERFACE_DESCRIPTOR_DATA::SHARED_LOCAL_MEMORY_SIZE;
auto alignedSlmSize = EncodeDispatchKernel<Family>::alignSlmSize(slmSize);
if (alignedSlmSize == 0u) {
return SHARED_LOCAL_MEMORY_SIZE::SHARED_LOCAL_MEMORY_SIZE_ENCODES_0K;
}
UNRECOVERABLE_IF(slmSize > 128u * MemoryConstants::kiloByte);
if (alignedSlmSize > 96u * MemoryConstants::kiloByte) {
return SHARED_LOCAL_MEMORY_SIZE::SHARED_LOCAL_MEMORY_SIZE_ENCODES_128K;
}
if (alignedSlmSize > 64u * MemoryConstants::kiloByte) {
return SHARED_LOCAL_MEMORY_SIZE::SHARED_LOCAL_MEMORY_SIZE_ENCODES_96K;
}
if (alignedSlmSize > 48u * MemoryConstants::kiloByte) {
return SHARED_LOCAL_MEMORY_SIZE::SHARED_LOCAL_MEMORY_SIZE_ENCODES_64K;
}
if (alignedSlmSize > 32u * MemoryConstants::kiloByte) {
return SHARED_LOCAL_MEMORY_SIZE::SHARED_LOCAL_MEMORY_SIZE_ENCODES_48K;
}
if (alignedSlmSize > 24u * MemoryConstants::kiloByte) {
return SHARED_LOCAL_MEMORY_SIZE::SHARED_LOCAL_MEMORY_SIZE_ENCODES_32K;
}
if (alignedSlmSize > 16u * MemoryConstants::kiloByte) {
return SHARED_LOCAL_MEMORY_SIZE::SHARED_LOCAL_MEMORY_SIZE_ENCODES_24K;
}
if (alignedSlmSize > 8u * MemoryConstants::kiloByte) {
return SHARED_LOCAL_MEMORY_SIZE::SHARED_LOCAL_MEMORY_SIZE_ENCODES_16K;
}
if (alignedSlmSize > 4u * MemoryConstants::kiloByte) {
return SHARED_LOCAL_MEMORY_SIZE::SHARED_LOCAL_MEMORY_SIZE_ENCODES_8K;
}
if (alignedSlmSize > 2u * MemoryConstants::kiloByte) {
return SHARED_LOCAL_MEMORY_SIZE::SHARED_LOCAL_MEMORY_SIZE_ENCODES_4K;
}
if (alignedSlmSize > 1u * MemoryConstants::kiloByte) {
return SHARED_LOCAL_MEMORY_SIZE::SHARED_LOCAL_MEMORY_SIZE_ENCODES_2K;
}
return SHARED_LOCAL_MEMORY_SIZE::SHARED_LOCAL_MEMORY_SIZE_ENCODES_1K;
}
template <typename Family>
bool EncodeDispatchKernel<Family>::singleTileExecImplicitScalingRequired(bool cooperativeKernel) {
return cooperativeKernel;
}
2023-03-16 08:12:49 +08:00
template <typename Family>
size_t EncodeStates<Family>::getSshHeapSize() {
2023-12-04 23:55:01 +08:00
return 2 * MemoryConstants::megaByte;
2023-03-16 08:12:49 +08:00
}
2023-12-21 21:16:43 +08:00
template <typename Family>
void InOrderPatchCommandHelpers::PatchCmd<Family>::patchComputeWalker(uint64_t appendCounterValue) {
2024-03-12 09:03:56 +08:00
auto walkerCmd = reinterpret_cast<typename Family::DefaultWalkerType *>(cmd1);
2023-12-21 21:16:43 +08:00
auto &postSync = walkerCmd->getPostSync();
postSync.setImmediateData(baseCounterValue + appendCounterValue);
}
2021-04-24 00:43:48 +08:00
} // namespace NEO