2020-01-17 15:56:05 +08:00
|
|
|
/*
|
2022-01-13 01:53:00 +08:00
|
|
|
* Copyright (C) 2020-2022 Intel Corporation
|
2020-01-17 15:56:05 +08:00
|
|
|
*
|
|
|
|
* SPDX-License-Identifier: MIT
|
|
|
|
*
|
|
|
|
*/
|
|
|
|
|
|
|
|
#pragma once
|
2020-02-24 05:44:01 +08:00
|
|
|
#include "shared/source/command_container/command_encoder.h"
|
|
|
|
#include "shared/source/command_stream/linear_stream.h"
|
|
|
|
#include "shared/source/command_stream/preemption.h"
|
|
|
|
#include "shared/source/execution_environment/execution_environment.h"
|
|
|
|
#include "shared/source/gmm_helper/gmm_helper.h"
|
2020-11-26 17:04:26 +08:00
|
|
|
#include "shared/source/helpers/api_specific_config.h"
|
2020-06-09 03:49:11 +08:00
|
|
|
#include "shared/source/helpers/hw_helper.h"
|
2021-12-22 22:11:05 +08:00
|
|
|
#include "shared/source/helpers/pipe_control_args.h"
|
2020-02-24 05:44:01 +08:00
|
|
|
#include "shared/source/helpers/simd_helper.h"
|
|
|
|
#include "shared/source/helpers/state_base_address.h"
|
|
|
|
#include "shared/source/kernel/dispatch_kernel_encoder_interface.h"
|
2021-09-14 00:49:13 +08:00
|
|
|
#include "shared/source/kernel/implicit_args.h"
|
2020-02-24 17:22:30 +08:00
|
|
|
|
2020-01-17 15:56:05 +08:00
|
|
|
#include <algorithm>
|
|
|
|
|
|
|
|
namespace NEO {
|
2022-01-20 19:23:30 +08:00
|
|
|
|
|
|
|
template <typename Family>
|
|
|
|
void EncodeDispatchKernel<Family>::setGrfInfo(INTERFACE_DESCRIPTOR_DATA *pInterfaceDescriptor, uint32_t numGrf,
|
|
|
|
const size_t &sizeCrossThreadData, const size_t &sizePerThreadData) {
|
|
|
|
auto grfSize = sizeof(typename Family::GRF);
|
|
|
|
DEBUG_BREAK_IF((sizeCrossThreadData % grfSize) != 0);
|
|
|
|
auto numGrfCrossThreadData = static_cast<uint32_t>(sizeCrossThreadData / grfSize);
|
|
|
|
DEBUG_BREAK_IF(numGrfCrossThreadData == 0);
|
|
|
|
pInterfaceDescriptor->setCrossThreadConstantDataReadLength(numGrfCrossThreadData);
|
|
|
|
|
|
|
|
DEBUG_BREAK_IF((sizePerThreadData % grfSize) != 0);
|
|
|
|
auto numGrfPerThreadData = static_cast<uint32_t>(sizePerThreadData / grfSize);
|
|
|
|
|
|
|
|
// at least 1 GRF of perThreadData for each thread in a thread group when sizeCrossThreadData != 0
|
|
|
|
numGrfPerThreadData = std::max(numGrfPerThreadData, 1u);
|
|
|
|
pInterfaceDescriptor->setConstantIndirectUrbEntryReadLength(numGrfPerThreadData);
|
|
|
|
}
|
|
|
|
|
2020-01-17 15:56:05 +08:00
|
|
|
template <typename Family>
|
|
|
|
void EncodeDispatchKernel<Family>::encode(CommandContainer &container,
|
2022-01-13 01:53:00 +08:00
|
|
|
EncodeDispatchKernelArgs &args) {
|
2020-01-17 15:56:05 +08:00
|
|
|
|
|
|
|
using MEDIA_STATE_FLUSH = typename Family::MEDIA_STATE_FLUSH;
|
|
|
|
using MEDIA_INTERFACE_DESCRIPTOR_LOAD = typename Family::MEDIA_INTERFACE_DESCRIPTOR_LOAD;
|
|
|
|
using MI_BATCH_BUFFER_END = typename Family::MI_BATCH_BUFFER_END;
|
2020-08-03 19:13:58 +08:00
|
|
|
using STATE_BASE_ADDRESS = typename Family::STATE_BASE_ADDRESS;
|
2020-01-17 15:56:05 +08:00
|
|
|
|
2022-01-13 01:53:00 +08:00
|
|
|
auto &kernelDescriptor = args.dispatchInterface->getKernelDescriptor();
|
|
|
|
auto sizeCrossThreadData = args.dispatchInterface->getCrossThreadDataSize();
|
|
|
|
auto sizePerThreadData = args.dispatchInterface->getPerThreadDataSize();
|
|
|
|
auto sizePerThreadDataForWholeGroup = args.dispatchInterface->getPerThreadDataSizeForWholeThreadGroup();
|
|
|
|
auto pImplicitArgs = args.dispatchInterface->getImplicitArgs();
|
2020-01-17 15:56:05 +08:00
|
|
|
|
2022-01-13 01:53:00 +08:00
|
|
|
const HardwareInfo &hwInfo = args.device->getHardwareInfo();
|
2020-07-23 03:17:50 +08:00
|
|
|
|
2020-01-17 15:56:05 +08:00
|
|
|
LinearStream *listCmdBufferStream = container.getCommandStream();
|
2020-05-12 00:20:21 +08:00
|
|
|
size_t sshOffset = 0;
|
2020-01-17 15:56:05 +08:00
|
|
|
|
2022-01-13 01:53:00 +08:00
|
|
|
auto threadDims = static_cast<const uint32_t *>(args.pThreadGroupDimensions);
|
2021-03-04 01:29:32 +08:00
|
|
|
const Vec3<size_t> threadStartVec{0, 0, 0};
|
|
|
|
Vec3<size_t> threadDimsVec{0, 0, 0};
|
2022-01-13 01:53:00 +08:00
|
|
|
if (!args.isIndirect) {
|
2021-03-04 01:29:32 +08:00
|
|
|
threadDimsVec = {threadDims[0], threadDims[1], threadDims[2]};
|
|
|
|
}
|
2020-01-17 15:56:05 +08:00
|
|
|
|
|
|
|
WALKER_TYPE cmd = Family::cmdInitGpgpuWalker;
|
|
|
|
auto idd = Family::cmdInitInterfaceDescriptorData;
|
|
|
|
{
|
2022-01-13 01:53:00 +08:00
|
|
|
auto alloc = args.dispatchInterface->getIsaAllocation();
|
2020-01-17 15:56:05 +08:00
|
|
|
UNRECOVERABLE_IF(nullptr == alloc);
|
|
|
|
auto offset = alloc->getGpuAddressToPatch();
|
|
|
|
idd.setKernelStartPointer(offset);
|
|
|
|
idd.setKernelStartPointerHigh(0u);
|
|
|
|
}
|
2020-03-25 17:04:42 +08:00
|
|
|
|
2022-01-13 01:53:00 +08:00
|
|
|
auto numThreadsPerThreadGroup = args.dispatchInterface->getNumThreadsPerThreadGroup();
|
2020-04-07 20:07:31 +08:00
|
|
|
idd.setNumberOfThreadsInGpgpuThreadGroup(numThreadsPerThreadGroup);
|
2020-01-17 15:56:05 +08:00
|
|
|
|
2020-07-23 03:17:50 +08:00
|
|
|
EncodeDispatchKernel<Family>::programBarrierEnable(idd,
|
2020-11-19 19:30:44 +08:00
|
|
|
kernelDescriptor.kernelAttributes.barrierCount,
|
2020-07-23 03:17:50 +08:00
|
|
|
hwInfo);
|
2020-06-09 03:49:11 +08:00
|
|
|
auto slmSize = static_cast<typename INTERFACE_DESCRIPTOR_DATA::SHARED_LOCAL_MEMORY_SIZE>(
|
2022-01-13 01:53:00 +08:00
|
|
|
HwHelperHw<Family>::get().computeSlmValues(hwInfo, args.dispatchInterface->getSlmTotalSize()));
|
2020-12-04 19:57:11 +08:00
|
|
|
idd.setSharedLocalMemorySize(slmSize);
|
2020-01-17 15:56:05 +08:00
|
|
|
|
2020-11-27 17:22:59 +08:00
|
|
|
uint32_t bindingTableStateCount = kernelDescriptor.payloadMappings.bindingTable.numEntries;
|
|
|
|
uint32_t bindingTablePointer = 0u;
|
2020-11-26 17:04:26 +08:00
|
|
|
bool isBindlessKernel = kernelDescriptor.kernelAttributes.bufferAddressingMode == KernelDescriptor::BindlessAndStateless;
|
|
|
|
if (!isBindlessKernel) {
|
2020-12-04 19:28:18 +08:00
|
|
|
container.prepareBindfulSsh();
|
2020-11-26 17:04:26 +08:00
|
|
|
if (bindingTableStateCount > 0u) {
|
2022-01-13 01:53:00 +08:00
|
|
|
auto ssh = container.getHeapWithRequiredSizeAndAlignment(HeapType::SURFACE_STATE, args.dispatchInterface->getSurfaceStateHeapDataSize(), BINDING_TABLE_STATE::SURFACESTATEPOINTER_ALIGN_SIZE);
|
2020-11-26 17:04:26 +08:00
|
|
|
sshOffset = ssh->getUsed();
|
|
|
|
bindingTablePointer = static_cast<uint32_t>(EncodeSurfaceState<Family>::pushBindingTableAndSurfaceStates(
|
|
|
|
*ssh, bindingTableStateCount,
|
2022-01-13 01:53:00 +08:00
|
|
|
args.dispatchInterface->getSurfaceStateHeapData(),
|
|
|
|
args.dispatchInterface->getSurfaceStateHeapDataSize(), bindingTableStateCount,
|
2020-11-26 17:04:26 +08:00
|
|
|
kernelDescriptor.payloadMappings.bindingTable.tableOffset));
|
|
|
|
}
|
2020-11-27 17:22:59 +08:00
|
|
|
}
|
2020-12-04 19:28:18 +08:00
|
|
|
idd.setBindingTablePointer(bindingTablePointer);
|
2020-01-17 15:56:05 +08:00
|
|
|
|
2022-01-13 01:53:00 +08:00
|
|
|
PreemptionHelper::programInterfaceDescriptorDataPreemption<Family>(&idd, args.preemptionMode);
|
2020-01-17 15:56:05 +08:00
|
|
|
|
2022-01-13 01:53:00 +08:00
|
|
|
auto heap = ApiSpecificConfig::getBindlessConfiguration() ? args.device->getBindlessHeapsHelper()->getHeap(BindlessHeapsHelper::GLOBAL_DSH) : container.getIndirectHeap(HeapType::DYNAMIC_STATE);
|
2020-01-17 15:56:05 +08:00
|
|
|
UNRECOVERABLE_IF(!heap);
|
|
|
|
|
|
|
|
uint32_t samplerStateOffset = 0;
|
|
|
|
uint32_t samplerCount = 0;
|
|
|
|
|
2020-04-07 20:07:31 +08:00
|
|
|
if (kernelDescriptor.payloadMappings.samplerTable.numSamplers > 0) {
|
|
|
|
samplerCount = kernelDescriptor.payloadMappings.samplerTable.numSamplers;
|
|
|
|
samplerStateOffset = EncodeStates<Family>::copySamplerState(heap, kernelDescriptor.payloadMappings.samplerTable.tableOffset,
|
|
|
|
kernelDescriptor.payloadMappings.samplerTable.numSamplers,
|
|
|
|
kernelDescriptor.payloadMappings.samplerTable.borderColor,
|
2022-01-13 01:53:00 +08:00
|
|
|
args.dispatchInterface->getDynamicStateHeapData(),
|
|
|
|
args.device->getBindlessHeapsHelper(), hwInfo);
|
2020-01-17 15:56:05 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
idd.setSamplerStatePointer(samplerStateOffset);
|
2020-12-04 19:28:18 +08:00
|
|
|
if (!isBindlessKernel) {
|
|
|
|
EncodeDispatchKernel<Family>::adjustBindingTablePrefetch(idd, samplerCount, bindingTableStateCount);
|
|
|
|
}
|
2020-01-17 15:56:05 +08:00
|
|
|
|
2022-01-20 19:23:30 +08:00
|
|
|
EncodeDispatchKernel<Family>::setGrfInfo(&idd, kernelDescriptor.kernelAttributes.numGrfRequired, sizeCrossThreadData, sizePerThreadData);
|
2020-01-17 15:56:05 +08:00
|
|
|
|
|
|
|
uint32_t sizeThreadData = sizePerThreadDataForWholeGroup + sizeCrossThreadData;
|
2022-01-13 01:53:00 +08:00
|
|
|
uint32_t sizeForImplicitArgsPatching = args.dispatchInterface->getSizeForImplicitArgsPatching();
|
2021-09-14 00:49:13 +08:00
|
|
|
uint32_t iohRequiredSize = sizeThreadData + sizeForImplicitArgsPatching;
|
2020-01-17 15:56:05 +08:00
|
|
|
uint64_t offsetThreadData = 0u;
|
|
|
|
{
|
|
|
|
auto heapIndirect = container.getIndirectHeap(HeapType::INDIRECT_OBJECT);
|
|
|
|
UNRECOVERABLE_IF(!(heapIndirect));
|
|
|
|
heapIndirect->align(WALKER_TYPE::INDIRECTDATASTARTADDRESS_ALIGN_SIZE);
|
|
|
|
|
2021-09-14 00:49:13 +08:00
|
|
|
auto ptr = container.getHeapSpaceAllowGrow(HeapType::INDIRECT_OBJECT, iohRequiredSize);
|
2020-01-17 15:56:05 +08:00
|
|
|
UNRECOVERABLE_IF(!(ptr));
|
|
|
|
offsetThreadData = heapIndirect->getHeapGpuStartOffset() + static_cast<uint64_t>(heapIndirect->getUsed() - sizeThreadData);
|
|
|
|
|
2021-09-14 00:49:13 +08:00
|
|
|
if (pImplicitArgs) {
|
|
|
|
offsetThreadData -= sizeof(ImplicitArgs);
|
|
|
|
pImplicitArgs->localIdTablePtr = heapIndirect->getGraphicsAllocation()->getGpuAddress() + heapIndirect->getUsed() - iohRequiredSize;
|
2022-01-13 01:53:00 +08:00
|
|
|
args.dispatchInterface->patchImplicitArgs(ptr);
|
2021-09-14 00:49:13 +08:00
|
|
|
}
|
|
|
|
|
2020-01-17 15:56:05 +08:00
|
|
|
memcpy_s(ptr, sizeCrossThreadData,
|
2022-01-13 01:53:00 +08:00
|
|
|
args.dispatchInterface->getCrossThreadData(), sizeCrossThreadData);
|
2020-01-17 15:56:05 +08:00
|
|
|
|
2022-01-13 01:53:00 +08:00
|
|
|
if (args.isIndirect) {
|
2021-10-07 01:32:04 +08:00
|
|
|
auto gpuPtr = heapIndirect->getGraphicsAllocation()->getGpuAddress() + heapIndirect->getUsed() - sizeThreadData;
|
|
|
|
uint64_t implicitArgsGpuPtr = 0u;
|
2021-09-24 20:20:21 +08:00
|
|
|
if (pImplicitArgs) {
|
2021-10-07 01:32:04 +08:00
|
|
|
implicitArgsGpuPtr = gpuPtr - sizeof(ImplicitArgs);
|
2021-09-24 20:20:21 +08:00
|
|
|
}
|
2022-01-13 01:53:00 +08:00
|
|
|
EncodeIndirectParams<Family>::encode(container, gpuPtr, args.dispatchInterface, implicitArgsGpuPtr);
|
2020-01-17 15:56:05 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
ptr = ptrOffset(ptr, sizeCrossThreadData);
|
|
|
|
memcpy_s(ptr, sizePerThreadDataForWholeGroup,
|
2022-01-13 01:53:00 +08:00
|
|
|
args.dispatchInterface->getPerThreadData(), sizePerThreadDataForWholeGroup);
|
2020-01-17 15:56:05 +08:00
|
|
|
}
|
|
|
|
|
2022-01-13 01:53:00 +08:00
|
|
|
auto slmSizeNew = args.dispatchInterface->getSlmTotalSize();
|
2020-07-31 14:40:52 +08:00
|
|
|
bool dirtyHeaps = container.isAnyHeapDirty();
|
2022-01-13 01:53:00 +08:00
|
|
|
bool flush = container.slmSize != slmSizeNew || dirtyHeaps || args.requiresUncachedMocs;
|
2020-01-17 15:56:05 +08:00
|
|
|
|
|
|
|
if (flush) {
|
2022-01-13 01:53:00 +08:00
|
|
|
PipeControlArgs syncArgs;
|
|
|
|
syncArgs.dcFlushEnable = MemorySynchronizationCommands<Family>::getDcFlushEnable(true, hwInfo);
|
2020-07-31 14:40:52 +08:00
|
|
|
if (dirtyHeaps) {
|
2022-01-13 01:53:00 +08:00
|
|
|
syncArgs.hdcPipelineFlush = true;
|
2020-07-31 14:40:52 +08:00
|
|
|
}
|
2022-01-13 01:53:00 +08:00
|
|
|
MemorySynchronizationCommands<Family>::addPipeControl(*container.getCommandStream(), syncArgs);
|
2020-01-17 15:56:05 +08:00
|
|
|
|
2022-01-13 01:53:00 +08:00
|
|
|
if (dirtyHeaps || args.requiresUncachedMocs) {
|
2020-08-03 19:13:58 +08:00
|
|
|
STATE_BASE_ADDRESS sba;
|
2020-11-17 16:37:44 +08:00
|
|
|
auto gmmHelper = container.getDevice()->getGmmHelper();
|
|
|
|
uint32_t statelessMocsIndex =
|
2022-01-13 01:53:00 +08:00
|
|
|
args.requiresUncachedMocs ? (gmmHelper->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER_CACHELINE_MISALIGNED) >> 1) : (gmmHelper->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER) >> 1);
|
2022-01-14 07:48:47 +08:00
|
|
|
EncodeStateBaseAddress<Family>::encode(container, sba, statelessMocsIndex, false, false);
|
2020-07-31 14:40:52 +08:00
|
|
|
container.setDirtyStateForAllHeaps(false);
|
2022-01-13 01:53:00 +08:00
|
|
|
args.requiresUncachedMocs = false;
|
2020-07-31 14:40:52 +08:00
|
|
|
}
|
|
|
|
|
2020-01-17 15:56:05 +08:00
|
|
|
if (container.slmSize != slmSizeNew) {
|
|
|
|
EncodeL3State<Family>::encode(container, slmSizeNew != 0u);
|
|
|
|
container.slmSize = slmSizeNew;
|
|
|
|
|
2020-02-05 06:03:48 +08:00
|
|
|
if (container.nextIddInBlock != container.getNumIddPerBlock()) {
|
2020-01-17 15:56:05 +08:00
|
|
|
EncodeMediaInterfaceDescriptorLoad<Family>::encode(container);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
uint32_t numIDD = 0u;
|
|
|
|
void *ptr = getInterfaceDescriptor(container, numIDD);
|
|
|
|
memcpy_s(ptr, sizeof(idd), &idd, sizeof(idd));
|
|
|
|
|
|
|
|
cmd.setIndirectDataStartAddress(static_cast<uint32_t>(offsetThreadData));
|
|
|
|
cmd.setIndirectDataLength(sizeThreadData);
|
|
|
|
cmd.setInterfaceDescriptorOffset(numIDD);
|
|
|
|
|
2020-07-07 04:55:37 +08:00
|
|
|
EncodeDispatchKernel<Family>::encodeThreadData(cmd,
|
|
|
|
nullptr,
|
2021-03-04 01:29:32 +08:00
|
|
|
threadDims,
|
2022-01-13 01:53:00 +08:00
|
|
|
args.dispatchInterface->getGroupSize(),
|
2020-07-07 04:55:37 +08:00
|
|
|
kernelDescriptor.kernelAttributes.simdSize,
|
|
|
|
kernelDescriptor.kernelAttributes.numLocalIdChannels,
|
2022-01-13 01:53:00 +08:00
|
|
|
args.dispatchInterface->getNumThreadsPerThreadGroup(),
|
|
|
|
args.dispatchInterface->getThreadExecutionMask(),
|
2020-07-07 04:55:37 +08:00
|
|
|
true,
|
|
|
|
false,
|
2022-01-13 01:53:00 +08:00
|
|
|
args.isIndirect,
|
|
|
|
args.dispatchInterface->getRequiredWorkgroupOrder());
|
2020-01-17 15:56:05 +08:00
|
|
|
|
2022-01-13 01:53:00 +08:00
|
|
|
cmd.setPredicateEnable(args.isPredicate);
|
2020-01-17 15:56:05 +08:00
|
|
|
|
2021-12-01 06:59:19 +08:00
|
|
|
if (ApiSpecificConfig::getBindlessConfiguration()) {
|
2022-01-13 01:53:00 +08:00
|
|
|
container.getResidencyContainer().push_back(args.device->getBindlessHeapsHelper()->getHeap(NEO::BindlessHeapsHelper::BindlesHeapType::GLOBAL_DSH)->getGraphicsAllocation());
|
2021-12-01 06:59:19 +08:00
|
|
|
}
|
|
|
|
|
2020-07-23 03:17:50 +08:00
|
|
|
EncodeDispatchKernel<Family>::adjustInterfaceDescriptorData(idd, hwInfo);
|
|
|
|
|
2022-01-13 01:53:00 +08:00
|
|
|
PreemptionHelper::applyPreemptionWaCmdsBegin<Family>(listCmdBufferStream, *args.device);
|
2020-01-17 15:56:05 +08:00
|
|
|
|
|
|
|
auto buffer = listCmdBufferStream->getSpace(sizeof(cmd));
|
|
|
|
*(decltype(cmd) *)buffer = cmd;
|
|
|
|
|
2022-01-13 01:53:00 +08:00
|
|
|
PreemptionHelper::applyPreemptionWaCmdsEnd<Family>(listCmdBufferStream, *args.device);
|
2020-01-17 15:56:05 +08:00
|
|
|
{
|
|
|
|
auto mediaStateFlush = listCmdBufferStream->getSpace(sizeof(MEDIA_STATE_FLUSH));
|
|
|
|
*reinterpret_cast<MEDIA_STATE_FLUSH *>(mediaStateFlush) = Family::cmdInitMediaStateFlush;
|
|
|
|
}
|
2021-02-11 07:01:10 +08:00
|
|
|
|
2022-01-13 01:53:00 +08:00
|
|
|
args.partitionCount = 1;
|
2020-01-17 15:56:05 +08:00
|
|
|
}
|
2020-05-12 00:20:21 +08:00
|
|
|
|
2020-01-17 15:56:05 +08:00
|
|
|
template <typename Family>
|
|
|
|
void EncodeMediaInterfaceDescriptorLoad<Family>::encode(CommandContainer &container) {
|
|
|
|
using MEDIA_STATE_FLUSH = typename Family::MEDIA_STATE_FLUSH;
|
|
|
|
using MEDIA_INTERFACE_DESCRIPTOR_LOAD = typename Family::MEDIA_INTERFACE_DESCRIPTOR_LOAD;
|
2020-12-04 19:28:18 +08:00
|
|
|
auto heapBase = ApiSpecificConfig::getBindlessConfiguration() ? container.getDevice()->getBindlessHeapsHelper()->getHeap(BindlessHeapsHelper::GLOBAL_DSH)->getGraphicsAllocation()->getUnderlyingBuffer() : container.getIndirectHeap(HeapType::DYNAMIC_STATE)->getCpuBase();
|
2020-01-17 15:56:05 +08:00
|
|
|
|
2020-04-28 00:55:26 +08:00
|
|
|
auto mediaStateFlush = container.getCommandStream()->getSpaceForCmd<MEDIA_STATE_FLUSH>();
|
|
|
|
*mediaStateFlush = Family::cmdInitMediaStateFlush;
|
2020-01-17 15:56:05 +08:00
|
|
|
|
2020-12-04 19:28:18 +08:00
|
|
|
auto iddOffset = static_cast<uint32_t>(ptrDiff(container.getIddBlock(), heapBase));
|
|
|
|
iddOffset += ApiSpecificConfig::getBindlessConfiguration() ? static_cast<uint32_t>(container.getDevice()->getBindlessHeapsHelper()->getHeap(BindlessHeapsHelper::GLOBAL_DSH)->getGraphicsAllocation()->getGpuAddress() -
|
|
|
|
container.getDevice()->getBindlessHeapsHelper()->getHeap(BindlessHeapsHelper::GLOBAL_DSH)->getGraphicsAllocation()->getGpuBaseAddress())
|
|
|
|
: 0;
|
|
|
|
|
2020-01-17 15:56:05 +08:00
|
|
|
MEDIA_INTERFACE_DESCRIPTOR_LOAD cmd = Family::cmdInitMediaInterfaceDescriptorLoad;
|
2020-12-04 19:28:18 +08:00
|
|
|
cmd.setInterfaceDescriptorDataStartAddress(iddOffset);
|
2020-02-05 06:03:48 +08:00
|
|
|
cmd.setInterfaceDescriptorTotalLength(sizeof(INTERFACE_DESCRIPTOR_DATA) * container.getNumIddPerBlock());
|
2020-01-17 15:56:05 +08:00
|
|
|
|
|
|
|
auto buffer = container.getCommandStream()->getSpace(sizeof(cmd));
|
|
|
|
*(decltype(cmd) *)buffer = cmd;
|
|
|
|
}
|
|
|
|
|
|
|
|
template <typename Family>
|
2021-06-09 18:00:13 +08:00
|
|
|
inline bool EncodeDispatchKernel<Family>::isRuntimeLocalIdsGenerationRequired(uint32_t activeChannels,
|
|
|
|
size_t *lws,
|
|
|
|
std::array<uint8_t, 3> walkOrder,
|
|
|
|
bool requireInputWalkOrder,
|
|
|
|
uint32_t &requiredWalkOrder,
|
|
|
|
uint32_t simd) {
|
2020-07-07 04:55:37 +08:00
|
|
|
requiredWalkOrder = 0u;
|
2020-06-05 22:12:49 +08:00
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2020-07-23 03:17:50 +08:00
|
|
|
template <typename Family>
|
|
|
|
void EncodeDispatchKernel<Family>::encodeThreadData(WALKER_TYPE &walkerCmd,
|
|
|
|
const uint32_t *startWorkGroup,
|
|
|
|
const uint32_t *numWorkGroups,
|
|
|
|
const uint32_t *workGroupSizes,
|
|
|
|
uint32_t simd,
|
|
|
|
uint32_t localIdDimensions,
|
|
|
|
uint32_t threadsPerThreadGroup,
|
|
|
|
uint32_t threadExecutionMask,
|
|
|
|
bool localIdsGenerationByRuntime,
|
|
|
|
bool inlineDataProgrammingRequired,
|
|
|
|
bool isIndirect,
|
|
|
|
uint32_t requiredWorkGroupOrder) {
|
2020-06-26 22:03:30 +08:00
|
|
|
|
|
|
|
if (isIndirect) {
|
|
|
|
walkerCmd.setIndirectParameterEnable(true);
|
|
|
|
} else {
|
|
|
|
walkerCmd.setThreadGroupIdXDimension(static_cast<uint32_t>(numWorkGroups[0]));
|
|
|
|
walkerCmd.setThreadGroupIdYDimension(static_cast<uint32_t>(numWorkGroups[1]));
|
|
|
|
walkerCmd.setThreadGroupIdZDimension(static_cast<uint32_t>(numWorkGroups[2]));
|
|
|
|
}
|
|
|
|
|
|
|
|
if (startWorkGroup) {
|
|
|
|
walkerCmd.setThreadGroupIdStartingX(static_cast<uint32_t>(startWorkGroup[0]));
|
|
|
|
walkerCmd.setThreadGroupIdStartingY(static_cast<uint32_t>(startWorkGroup[1]));
|
|
|
|
walkerCmd.setThreadGroupIdStartingResumeZ(static_cast<uint32_t>(startWorkGroup[2]));
|
|
|
|
}
|
|
|
|
|
|
|
|
walkerCmd.setSimdSize(getSimdConfig<WALKER_TYPE>(simd));
|
|
|
|
|
|
|
|
auto localWorkSize = workGroupSizes[0] * workGroupSizes[1] * workGroupSizes[2];
|
2020-07-07 04:55:37 +08:00
|
|
|
if (threadsPerThreadGroup == 0) {
|
|
|
|
threadsPerThreadGroup = static_cast<uint32_t>(getThreadsPerWG(simd, localWorkSize));
|
|
|
|
}
|
|
|
|
walkerCmd.setThreadWidthCounterMaximum(threadsPerThreadGroup);
|
|
|
|
|
|
|
|
uint64_t executionMask = threadExecutionMask;
|
|
|
|
if (executionMask == 0) {
|
|
|
|
auto remainderSimdLanes = localWorkSize & (simd - 1);
|
|
|
|
executionMask = maxNBitValue(remainderSimdLanes);
|
|
|
|
if (!executionMask)
|
|
|
|
executionMask = ~executionMask;
|
|
|
|
}
|
2020-06-26 22:03:30 +08:00
|
|
|
|
2020-07-07 04:55:37 +08:00
|
|
|
constexpr uint32_t maxDword = std::numeric_limits<uint32_t>::max();
|
2020-06-26 22:03:30 +08:00
|
|
|
walkerCmd.setRightExecutionMask(static_cast<uint32_t>(executionMask));
|
2020-07-07 04:55:37 +08:00
|
|
|
walkerCmd.setBottomExecutionMask(maxDword);
|
2020-06-26 22:03:30 +08:00
|
|
|
}
|
|
|
|
|
2020-07-23 03:17:50 +08:00
|
|
|
template <typename Family>
|
|
|
|
void EncodeDispatchKernel<Family>::programBarrierEnable(INTERFACE_DESCRIPTOR_DATA &interfaceDescriptor,
|
|
|
|
uint32_t value,
|
|
|
|
const HardwareInfo &hwInfo) {
|
|
|
|
interfaceDescriptor.setBarrierEnable(value);
|
|
|
|
}
|
|
|
|
|
|
|
|
template <typename Family>
|
2021-12-03 19:46:44 +08:00
|
|
|
inline void EncodeDispatchKernel<Family>::encodeAdditionalWalkerFields(const HardwareInfo &hwInfo, WALKER_TYPE &walkerCmd, KernelExecutionType kernelExecutionType) {}
|
2020-07-23 03:17:50 +08:00
|
|
|
|
2020-08-03 22:33:02 +08:00
|
|
|
template <typename Family>
|
2020-11-07 15:32:45 +08:00
|
|
|
void EncodeDispatchKernel<Family>::appendAdditionalIDDFields(INTERFACE_DESCRIPTOR_DATA *pInterfaceDescriptor, const HardwareInfo &hwInfo, const uint32_t threadsPerThreadGroup, uint32_t slmTotalSize, SlmPolicy slmPolicy) {}
|
2020-08-03 22:33:02 +08:00
|
|
|
|
2021-05-18 10:46:21 +08:00
|
|
|
template <typename Family>
|
2021-12-09 03:03:42 +08:00
|
|
|
inline void EncodeComputeMode<Family>::programComputeModeCommand(LinearStream &csr, StateComputeModeProperties &properties, const HardwareInfo &hwInfo) {
|
2021-05-18 10:46:21 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
template <typename Family>
|
2021-06-09 18:00:13 +08:00
|
|
|
inline void EncodeComputeMode<Family>::adjustPipelineSelect(CommandContainer &container, const NEO::KernelDescriptor &kernelDescriptor) {
|
2021-05-18 10:46:21 +08:00
|
|
|
}
|
|
|
|
|
2021-09-28 18:56:22 +08:00
|
|
|
template <typename Family>
|
|
|
|
void EncodeStateBaseAddress<Family>::setIohAddressForDebugger(NEO::Debugger::SbaAddresses &sbaAddress, const STATE_BASE_ADDRESS &sbaCmd) {
|
|
|
|
sbaAddress.IndirectObjectBaseAddress = sbaCmd.getIndirectObjectBaseAddress();
|
|
|
|
}
|
|
|
|
|
2020-07-23 03:17:50 +08:00
|
|
|
template <typename Family>
|
2022-01-14 07:48:47 +08:00
|
|
|
void EncodeStateBaseAddress<Family>::encode(CommandContainer &container, STATE_BASE_ADDRESS &sbaCmd, bool multiOsContextCapable) {
|
2020-11-17 16:37:44 +08:00
|
|
|
auto gmmHelper = container.getDevice()->getRootDeviceEnvironment().getGmmHelper();
|
|
|
|
uint32_t statelessMocsIndex = (gmmHelper->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER) >> 1);
|
2022-01-14 07:48:47 +08:00
|
|
|
EncodeStateBaseAddress<Family>::encode(container, sbaCmd, statelessMocsIndex, false, multiOsContextCapable);
|
2020-11-17 16:37:44 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
template <typename Family>
|
2022-01-14 07:48:47 +08:00
|
|
|
void EncodeStateBaseAddress<Family>::encode(CommandContainer &container, STATE_BASE_ADDRESS &sbaCmd, uint32_t statelessMocsIndex, bool useGlobalAtomics, bool multiOsContextCapable) {
|
2021-09-10 19:47:13 +08:00
|
|
|
if (container.isAnyHeapDirty()) {
|
|
|
|
EncodeWA<Family>::encodeAdditionalPipelineSelect(*container.getDevice(), *container.getCommandStream(), true);
|
|
|
|
}
|
2020-07-23 03:17:50 +08:00
|
|
|
|
|
|
|
auto gmmHelper = container.getDevice()->getGmmHelper();
|
|
|
|
|
|
|
|
StateBaseAddressHelper<Family>::programStateBaseAddress(
|
2020-08-03 19:13:58 +08:00
|
|
|
&sbaCmd,
|
2020-07-23 03:17:50 +08:00
|
|
|
container.isHeapDirty(HeapType::DYNAMIC_STATE) ? container.getIndirectHeap(HeapType::DYNAMIC_STATE) : nullptr,
|
|
|
|
container.isHeapDirty(HeapType::INDIRECT_OBJECT) ? container.getIndirectHeap(HeapType::INDIRECT_OBJECT) : nullptr,
|
|
|
|
container.isHeapDirty(HeapType::SURFACE_STATE) ? container.getIndirectHeap(HeapType::SURFACE_STATE) : nullptr,
|
|
|
|
0,
|
|
|
|
false,
|
2020-11-17 16:37:44 +08:00
|
|
|
statelessMocsIndex,
|
2020-09-22 21:48:24 +08:00
|
|
|
container.getIndirectObjectHeapBaseAddress(),
|
2020-07-23 03:17:50 +08:00
|
|
|
container.getInstructionHeapBaseAddress(),
|
2020-12-04 19:28:18 +08:00
|
|
|
0,
|
|
|
|
false,
|
2020-07-23 03:17:50 +08:00
|
|
|
false,
|
|
|
|
gmmHelper,
|
2020-12-17 08:36:45 +08:00
|
|
|
false,
|
2021-01-26 22:05:22 +08:00
|
|
|
MemoryCompressionState::NotApplicable,
|
2021-06-15 10:02:59 +08:00
|
|
|
useGlobalAtomics,
|
2021-01-26 22:05:22 +08:00
|
|
|
1u);
|
2020-07-23 03:17:50 +08:00
|
|
|
|
2020-08-03 19:13:58 +08:00
|
|
|
auto pCmd = reinterpret_cast<STATE_BASE_ADDRESS *>(container.getCommandStream()->getSpace(sizeof(STATE_BASE_ADDRESS)));
|
|
|
|
*pCmd = sbaCmd;
|
|
|
|
|
2020-07-23 03:17:50 +08:00
|
|
|
EncodeWA<Family>::encodeAdditionalPipelineSelect(*container.getDevice(), *container.getCommandStream(), false);
|
|
|
|
}
|
|
|
|
|
2021-06-23 21:34:56 +08:00
|
|
|
template <typename Family>
|
2021-07-08 23:08:37 +08:00
|
|
|
size_t EncodeStateBaseAddress<Family>::getRequiredSizeForStateBaseAddress(Device &device, CommandContainer &container) {
|
|
|
|
return sizeof(typename Family::STATE_BASE_ADDRESS) + 2 * EncodeWA<Family>::getAdditionalPipelineSelectSize(device);
|
|
|
|
}
|
2021-06-23 21:34:56 +08:00
|
|
|
|
2020-07-23 03:17:50 +08:00
|
|
|
template <typename Family>
|
|
|
|
void EncodeL3State<Family>::encode(CommandContainer &container, bool enableSLM) {
|
|
|
|
auto offset = L3CNTLRegisterOffset<Family>::registerOffset;
|
|
|
|
auto data = PreambleHelper<Family>::getL3Config(container.getDevice()->getHardwareInfo(), enableSLM);
|
2020-10-06 16:58:18 +08:00
|
|
|
EncodeSetMMIO<Family>::encodeIMM(container, offset, data, false);
|
2020-07-17 02:00:52 +08:00
|
|
|
}
|
|
|
|
|
2020-02-21 22:35:08 +08:00
|
|
|
template <typename GfxFamily>
|
2021-12-22 02:13:53 +08:00
|
|
|
void EncodeMiFlushDW<GfxFamily>::appendMiFlushDw(MI_FLUSH_DW *miFlushDwCmd, const HardwareInfo &hwInfo) {}
|
2020-02-21 22:35:08 +08:00
|
|
|
|
2020-03-12 17:49:20 +08:00
|
|
|
template <typename GfxFamily>
|
|
|
|
void EncodeMiFlushDW<GfxFamily>::programMiFlushDwWA(LinearStream &commandStream) {}
|
|
|
|
|
2020-03-13 19:29:45 +08:00
|
|
|
template <typename GfxFamily>
|
|
|
|
size_t EncodeMiFlushDW<GfxFamily>::getMiFlushDwWaSize() {
|
|
|
|
return 0;
|
|
|
|
}
|
2020-03-25 17:04:42 +08:00
|
|
|
|
|
|
|
template <typename GfxFamily>
|
|
|
|
inline void EncodeWA<GfxFamily>::encodeAdditionalPipelineSelect(Device &device, LinearStream &stream, bool is3DPipeline) {}
|
|
|
|
|
|
|
|
template <typename GfxFamily>
|
|
|
|
inline size_t EncodeWA<GfxFamily>::getAdditionalPipelineSelectSize(Device &device) {
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2020-09-16 02:27:18 +08:00
|
|
|
template <typename GfxFamily>
|
2021-10-21 09:30:53 +08:00
|
|
|
inline void EncodeSurfaceState<GfxFamily>::encodeExtraBufferParams(EncodeSurfaceStateArgs &args) {
|
|
|
|
auto surfaceState = reinterpret_cast<R_SURFACE_STATE *>(args.outMemory);
|
|
|
|
encodeExtraCacheSettings(surfaceState, *args.gmmHelper->getHardwareInfo());
|
2020-09-16 02:27:18 +08:00
|
|
|
}
|
|
|
|
|
2020-11-17 18:03:24 +08:00
|
|
|
template <typename GfxFamily>
|
|
|
|
bool EncodeSurfaceState<GfxFamily>::doBindingTablePrefetch() {
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2021-04-13 08:36:24 +08:00
|
|
|
template <typename Family>
|
2021-06-09 18:00:13 +08:00
|
|
|
inline void EncodeSurfaceState<Family>::setCoherencyType(R_SURFACE_STATE *surfaceState, COHERENCY_TYPE coherencyType) {
|
2021-04-13 08:36:24 +08:00
|
|
|
surfaceState->setCoherencyType(coherencyType);
|
|
|
|
}
|
|
|
|
|
2020-10-16 21:58:47 +08:00
|
|
|
template <typename Family>
|
|
|
|
void EncodeSempahore<Family>::programMiSemaphoreWait(MI_SEMAPHORE_WAIT *cmd,
|
|
|
|
uint64_t compareAddress,
|
|
|
|
uint32_t compareData,
|
|
|
|
COMPARE_OPERATION compareMode,
|
|
|
|
bool registerPollMode) {
|
|
|
|
MI_SEMAPHORE_WAIT localCmd = Family::cmdInitMiSemaphoreWait;
|
|
|
|
localCmd.setCompareOperation(compareMode);
|
|
|
|
localCmd.setSemaphoreDataDword(compareData);
|
|
|
|
localCmd.setSemaphoreGraphicsAddress(compareAddress);
|
|
|
|
localCmd.setWaitMode(MI_SEMAPHORE_WAIT::WAIT_MODE::WAIT_MODE_POLLING_MODE);
|
|
|
|
|
|
|
|
*cmd = localCmd;
|
|
|
|
}
|
|
|
|
|
2021-02-27 06:02:57 +08:00
|
|
|
template <typename GfxFamily>
|
|
|
|
void EncodeEnableRayTracing<GfxFamily>::programEnableRayTracing(LinearStream &commandStream, GraphicsAllocation &backBuffer) {
|
|
|
|
}
|
|
|
|
|
2021-12-02 22:17:45 +08:00
|
|
|
template <typename Family>
|
|
|
|
inline void EncodeStoreMemory<Family>::programStoreDataImm(MI_STORE_DATA_IMM *cmdBuffer,
|
|
|
|
uint64_t gpuAddress,
|
|
|
|
uint32_t dataDword0,
|
|
|
|
uint32_t dataDword1,
|
|
|
|
bool storeQword,
|
|
|
|
bool workloadPartitionOffset) {
|
|
|
|
MI_STORE_DATA_IMM storeDataImmediate = Family::cmdInitStoreDataImm;
|
|
|
|
storeDataImmediate.setAddress(gpuAddress);
|
|
|
|
storeDataImmediate.setStoreQword(storeQword);
|
|
|
|
storeDataImmediate.setDataDword0(dataDword0);
|
|
|
|
if (storeQword) {
|
|
|
|
storeDataImmediate.setDataDword1(dataDword1);
|
|
|
|
storeDataImmediate.setDwordLength(MI_STORE_DATA_IMM::DWORD_LENGTH::DWORD_LENGTH_STORE_QWORD);
|
|
|
|
} else {
|
|
|
|
storeDataImmediate.setDwordLength(MI_STORE_DATA_IMM::DWORD_LENGTH::DWORD_LENGTH_STORE_DWORD);
|
|
|
|
}
|
|
|
|
*cmdBuffer = storeDataImmediate;
|
|
|
|
}
|
|
|
|
|
2020-03-13 19:29:45 +08:00
|
|
|
} // namespace NEO
|