mirror of
https://github.com/intel/compute-runtime.git
synced 2025-12-24 21:18:24 +08:00
Move kernel info to shared directory
Signed-off-by: Zbigniew Zdanowicz <zbigniew.zdanowicz@intel.com>
This commit is contained in:
committed by
Compute-Runtime-Automation
parent
f0003c1da6
commit
25c71a6c13
@@ -14,12 +14,11 @@
|
||||
#include "shared/source/device_binary_format/elf/elf_encoder.h"
|
||||
#include "shared/source/device_binary_format/elf/zebin_elf.h"
|
||||
#include "shared/source/device_binary_format/yaml/yaml_parser.h"
|
||||
#include "shared/source/program/kernel_info.h"
|
||||
#include "shared/source/program/program_info.h"
|
||||
#include "shared/source/utilities/compiler_support.h"
|
||||
#include "shared/source/utilities/stackvec.h"
|
||||
|
||||
#include "opencl/source/program/kernel_info.h"
|
||||
|
||||
#include <tuple>
|
||||
|
||||
namespace NEO {
|
||||
|
||||
@@ -144,6 +144,7 @@ class HwHelper {
|
||||
virtual bool isSipKernelAsHexadecimalArrayPreferred() const = 0;
|
||||
virtual void setSipKernelData(uint32_t *&sipKernelBinary, size_t &kernelBinarySize) const = 0;
|
||||
virtual void adjustPreemptionSurfaceSize(size_t &csrSize) const = 0;
|
||||
virtual size_t getSamplerStateSize() const = 0;
|
||||
|
||||
protected:
|
||||
HwHelper() = default;
|
||||
@@ -184,6 +185,11 @@ class HwHelperHw : public HwHelper {
|
||||
return sizeof(RENDER_SURFACE_STATE);
|
||||
}
|
||||
|
||||
size_t getSamplerStateSize() const override {
|
||||
using SAMPLER_STATE = typename GfxFamily::SAMPLER_STATE;
|
||||
return sizeof(SAMPLER_STATE);
|
||||
}
|
||||
|
||||
uint32_t getBindlessSurfaceExtendedMessageDescriptorValue(uint32_t surfStateOffset) const override {
|
||||
using DataPortBindlessSurfaceExtendedMessageDescriptor = typename GfxFamily::DataPortBindlessSurfaceExtendedMessageDescriptor;
|
||||
DataPortBindlessSurfaceExtendedMessageDescriptor messageExtDescriptor = {};
|
||||
|
||||
@@ -7,6 +7,8 @@
|
||||
set(NEO_CORE_PROGRAM
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/heap_info.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/kernel_info.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/kernel_info.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/print_formatter.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/print_formatter.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/program_info.cpp
|
||||
|
||||
176
shared/source/program/kernel_info.cpp
Normal file
176
shared/source/program/kernel_info.cpp
Normal file
@@ -0,0 +1,176 @@
|
||||
/*
|
||||
* Copyright (C) 2018-2021 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#include "shared/source/program/kernel_info.h"
|
||||
|
||||
#include "shared/source/device/device.h"
|
||||
#include "shared/source/device_binary_format/patchtokens_decoder.h"
|
||||
#include "shared/source/helpers/aligned_memory.h"
|
||||
#include "shared/source/helpers/blit_commands_helper.h"
|
||||
#include "shared/source/helpers/hw_helper.h"
|
||||
#include "shared/source/helpers/kernel_helpers.h"
|
||||
#include "shared/source/helpers/ptr_math.h"
|
||||
#include "shared/source/helpers/string.h"
|
||||
#include "shared/source/memory_manager/memory_manager.h"
|
||||
|
||||
#include <cstdint>
|
||||
#include <cstring>
|
||||
#include <map>
|
||||
#include <sstream>
|
||||
#include <unordered_map>
|
||||
|
||||
namespace NEO {
|
||||
|
||||
struct KernelArgumentType {
|
||||
const char *argTypeQualifier;
|
||||
uint64_t argTypeQualifierValue;
|
||||
};
|
||||
|
||||
WorkSizeInfo::WorkSizeInfo(uint32_t maxWorkGroupSize, bool hasBarriers, uint32_t simdSize, uint32_t slmTotalSize, const HardwareInfo *hwInfo, uint32_t numThreadsPerSubSlice, uint32_t localMemSize, bool imgUsed, bool yTiledSurface) {
|
||||
this->maxWorkGroupSize = maxWorkGroupSize;
|
||||
this->hasBarriers = hasBarriers;
|
||||
this->simdSize = simdSize;
|
||||
this->slmTotalSize = slmTotalSize;
|
||||
this->coreFamily = hwInfo->platform.eRenderCoreFamily;
|
||||
this->numThreadsPerSubSlice = numThreadsPerSubSlice;
|
||||
this->localMemSize = localMemSize;
|
||||
this->imgUsed = imgUsed;
|
||||
this->yTiledSurfaces = yTiledSurface;
|
||||
|
||||
setMinWorkGroupSize(hwInfo);
|
||||
}
|
||||
|
||||
void WorkSizeInfo::setIfUseImg(const KernelInfo &kernelInfo) {
|
||||
for (const auto &arg : kernelInfo.kernelDescriptor.payloadMappings.explicitArgs) {
|
||||
if (arg.is<ArgDescriptor::ArgTImage>()) {
|
||||
imgUsed = true;
|
||||
yTiledSurfaces = true;
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
void WorkSizeInfo::setMinWorkGroupSize(const HardwareInfo *hwInfo) {
|
||||
minWorkGroupSize = 0;
|
||||
if (hasBarriers) {
|
||||
uint32_t maxBarriersPerHSlice = (coreFamily >= IGFX_GEN9_CORE) ? 32 : 16;
|
||||
minWorkGroupSize = numThreadsPerSubSlice * simdSize / maxBarriersPerHSlice;
|
||||
}
|
||||
if (slmTotalSize > 0) {
|
||||
UNRECOVERABLE_IF(localMemSize < slmTotalSize);
|
||||
minWorkGroupSize = std::max(maxWorkGroupSize / ((localMemSize / slmTotalSize)), minWorkGroupSize);
|
||||
}
|
||||
|
||||
const auto &hwHelper = HwHelper::get(hwInfo->platform.eRenderCoreFamily);
|
||||
if (hwHelper.isFusedEuDispatchEnabled(*hwInfo)) {
|
||||
minWorkGroupSize *= 2;
|
||||
}
|
||||
}
|
||||
void WorkSizeInfo::checkRatio(const size_t workItems[3]) {
|
||||
if (slmTotalSize > 0) {
|
||||
useRatio = true;
|
||||
targetRatio = log((float)workItems[0]) - log((float)workItems[1]);
|
||||
useStrictRatio = false;
|
||||
} else if (yTiledSurfaces == true) {
|
||||
useRatio = true;
|
||||
targetRatio = YTilingRatioValue;
|
||||
useStrictRatio = true;
|
||||
}
|
||||
}
|
||||
|
||||
KernelInfo::~KernelInfo() {
|
||||
delete[] crossThreadData;
|
||||
}
|
||||
|
||||
size_t KernelInfo::getSamplerStateArrayCount() const {
|
||||
return kernelDescriptor.payloadMappings.samplerTable.numSamplers;
|
||||
}
|
||||
size_t KernelInfo::getSamplerStateArraySize(const HardwareInfo &hwInfo) const {
|
||||
size_t samplerStateArraySize = getSamplerStateArrayCount() * HwHelper::get(hwInfo.platform.eRenderCoreFamily).getSamplerStateSize();
|
||||
return samplerStateArraySize;
|
||||
}
|
||||
|
||||
size_t KernelInfo::getBorderColorStateSize() const {
|
||||
size_t borderColorSize = 0;
|
||||
if (kernelDescriptor.payloadMappings.samplerTable.numSamplers > 0U) {
|
||||
borderColorSize = kernelDescriptor.payloadMappings.samplerTable.tableOffset - kernelDescriptor.payloadMappings.samplerTable.borderColor;
|
||||
}
|
||||
return borderColorSize;
|
||||
}
|
||||
|
||||
size_t KernelInfo::getBorderColorOffset() const {
|
||||
size_t borderColorOffset = 0;
|
||||
if (kernelDescriptor.payloadMappings.samplerTable.numSamplers > 0U) {
|
||||
borderColorOffset = kernelDescriptor.payloadMappings.samplerTable.borderColor;
|
||||
}
|
||||
return borderColorOffset;
|
||||
}
|
||||
|
||||
uint32_t KernelInfo::getConstantBufferSize() const {
|
||||
return kernelDescriptor.kernelAttributes.crossThreadDataSize;
|
||||
}
|
||||
int32_t KernelInfo::getArgNumByName(const char *name) const {
|
||||
int32_t argNum = 0;
|
||||
for (const auto &argMeta : kernelDescriptor.explicitArgsExtendedMetadata) {
|
||||
if (argMeta.argName.compare(name) == 0) {
|
||||
return argNum;
|
||||
}
|
||||
++argNum;
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
||||
bool KernelInfo::createKernelAllocation(const Device &device, bool internalIsa) {
|
||||
UNRECOVERABLE_IF(kernelAllocation);
|
||||
auto kernelIsaSize = heapInfo.KernelHeapSize;
|
||||
const auto allocType = internalIsa ? GraphicsAllocation::AllocationType::KERNEL_ISA_INTERNAL : GraphicsAllocation::AllocationType::KERNEL_ISA;
|
||||
kernelAllocation = device.getMemoryManager()->allocateGraphicsMemoryWithProperties({device.getRootDeviceIndex(), kernelIsaSize, allocType, device.getDeviceBitfield()});
|
||||
if (!kernelAllocation) {
|
||||
return false;
|
||||
}
|
||||
|
||||
auto &hwInfo = device.getHardwareInfo();
|
||||
auto &hwHelper = HwHelper::get(hwInfo.platform.eRenderCoreFamily);
|
||||
|
||||
return MemoryTransferHelper::transferMemoryToAllocation(hwHelper.isBlitCopyRequiredForLocalMemory(hwInfo, *kernelAllocation),
|
||||
device, kernelAllocation, 0, heapInfo.pKernelHeap,
|
||||
static_cast<size_t>(kernelIsaSize));
|
||||
}
|
||||
|
||||
void KernelInfo::apply(const DeviceInfoKernelPayloadConstants &constants) {
|
||||
if (nullptr == this->crossThreadData) {
|
||||
return;
|
||||
}
|
||||
|
||||
const auto &implicitArgs = kernelDescriptor.payloadMappings.implicitArgs;
|
||||
const auto privateMemorySize = static_cast<uint32_t>(KernelHelper::getPrivateSurfaceSize(kernelDescriptor.kernelAttributes.perHwThreadPrivateMemorySize,
|
||||
constants.computeUnitsUsedForScratch));
|
||||
|
||||
auto setIfValidOffset = [&](auto value, NEO::CrossThreadDataOffset offset) {
|
||||
if (isValidOffset(offset)) {
|
||||
*ptrOffset(reinterpret_cast<decltype(value) *>(crossThreadData), offset) = value;
|
||||
}
|
||||
};
|
||||
setIfValidOffset(reinterpret_cast<uintptr_t>(constants.slmWindow), implicitArgs.localMemoryStatelessWindowStartAddres);
|
||||
setIfValidOffset(constants.slmWindowSize, implicitArgs.localMemoryStatelessWindowSize);
|
||||
setIfValidOffset(privateMemorySize, implicitArgs.privateMemorySize);
|
||||
setIfValidOffset(constants.maxWorkGroupSize, implicitArgs.maxWorkGroupSize);
|
||||
}
|
||||
|
||||
std::string concatenateKernelNames(ArrayRef<KernelInfo *> kernelInfos) {
|
||||
std::string semiColonDelimitedKernelNameStr;
|
||||
|
||||
for (const auto &kernelInfo : kernelInfos) {
|
||||
if (!semiColonDelimitedKernelNameStr.empty()) {
|
||||
semiColonDelimitedKernelNameStr += ';';
|
||||
}
|
||||
semiColonDelimitedKernelNameStr += kernelInfo->kernelDescriptor.kernelMetadata.kernelName;
|
||||
}
|
||||
|
||||
return semiColonDelimitedKernelNameStr;
|
||||
}
|
||||
|
||||
} // namespace NEO
|
||||
138
shared/source/program/kernel_info.h
Normal file
138
shared/source/program/kernel_info.h
Normal file
@@ -0,0 +1,138 @@
|
||||
/*
|
||||
* Copyright (C) 2018-2021 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
#include "shared/source/helpers/hw_info.h"
|
||||
#include "shared/source/kernel/kernel_descriptor.h"
|
||||
#include "shared/source/program/heap_info.h"
|
||||
#include "shared/source/utilities/arrayref.h"
|
||||
#include "shared/source/utilities/const_stringref.h"
|
||||
|
||||
#include <algorithm>
|
||||
#include <array>
|
||||
#include <cmath>
|
||||
#include <cstdint>
|
||||
#include <map>
|
||||
#include <string>
|
||||
#include <unordered_map>
|
||||
#include <vector>
|
||||
|
||||
namespace gtpin {
|
||||
typedef struct igc_info_s igc_info_t;
|
||||
}
|
||||
|
||||
namespace NEO {
|
||||
class BuiltinDispatchInfoBuilder;
|
||||
class Device;
|
||||
class Kernel;
|
||||
struct KernelInfo;
|
||||
class DispatchInfo;
|
||||
struct KernelArgumentType;
|
||||
class GraphicsAllocation;
|
||||
class MemoryManager;
|
||||
|
||||
static const float YTilingRatioValue = 1.3862943611198906188344642429164f;
|
||||
|
||||
struct WorkSizeInfo {
|
||||
uint32_t maxWorkGroupSize;
|
||||
uint32_t minWorkGroupSize;
|
||||
bool hasBarriers;
|
||||
uint32_t simdSize;
|
||||
uint32_t slmTotalSize;
|
||||
GFXCORE_FAMILY coreFamily;
|
||||
uint32_t numThreadsPerSubSlice;
|
||||
uint32_t localMemSize;
|
||||
bool imgUsed = false;
|
||||
bool yTiledSurfaces = false;
|
||||
bool useRatio = false;
|
||||
bool useStrictRatio = false;
|
||||
float targetRatio = 0;
|
||||
|
||||
WorkSizeInfo(uint32_t maxWorkGroupSize, bool hasBarriers, uint32_t simdSize, uint32_t slmTotalSize, const HardwareInfo *hwInfo, uint32_t numThreadsPerSubSlice, uint32_t localMemSize, bool imgUsed, bool yTiledSurface);
|
||||
|
||||
void setIfUseImg(const KernelInfo &kernelInfo);
|
||||
void setMinWorkGroupSize(const HardwareInfo *hwInfo);
|
||||
void checkRatio(const size_t workItems[3]);
|
||||
};
|
||||
|
||||
struct DeviceInfoKernelPayloadConstants {
|
||||
void *slmWindow = nullptr;
|
||||
uint32_t slmWindowSize = 0U;
|
||||
uint32_t computeUnitsUsedForScratch = 0U;
|
||||
uint32_t maxWorkGroupSize = 0U;
|
||||
};
|
||||
|
||||
struct KernelInfo {
|
||||
public:
|
||||
KernelInfo() = default;
|
||||
KernelInfo(const KernelInfo &) = delete;
|
||||
KernelInfo &operator=(const KernelInfo &) = delete;
|
||||
~KernelInfo();
|
||||
|
||||
GraphicsAllocation *getGraphicsAllocation() const { return this->kernelAllocation; }
|
||||
|
||||
const ArgDescriptor &getArgDescriptorAt(uint32_t index) const {
|
||||
DEBUG_BREAK_IF(index >= kernelDescriptor.payloadMappings.explicitArgs.size());
|
||||
return kernelDescriptor.payloadMappings.explicitArgs[index];
|
||||
}
|
||||
const StackVec<ArgDescriptor, 16> &getExplicitArgs() const {
|
||||
return kernelDescriptor.payloadMappings.explicitArgs;
|
||||
}
|
||||
const ArgTypeMetadataExtended &getExtendedMetadata(uint32_t index) const {
|
||||
DEBUG_BREAK_IF(index >= kernelDescriptor.explicitArgsExtendedMetadata.size());
|
||||
return kernelDescriptor.explicitArgsExtendedMetadata[index];
|
||||
}
|
||||
size_t getSamplerStateArrayCount() const;
|
||||
size_t getSamplerStateArraySize(const HardwareInfo &hwInfo) const;
|
||||
size_t getBorderColorStateSize() const;
|
||||
size_t getBorderColorOffset() const;
|
||||
unsigned int getMaxSimdSize() const {
|
||||
return kernelDescriptor.kernelAttributes.simdSize;
|
||||
}
|
||||
bool hasDeviceEnqueue() const {
|
||||
return kernelDescriptor.kernelAttributes.flags.usesDeviceSideEnqueue;
|
||||
}
|
||||
bool requiresSubgroupIndependentForwardProgress() const {
|
||||
return kernelDescriptor.kernelAttributes.flags.requiresSubgroupIndependentForwardProgress;
|
||||
}
|
||||
size_t getMaxRequiredWorkGroupSize(size_t maxWorkGroupSize) const {
|
||||
auto requiredWorkGroupSizeX = kernelDescriptor.kernelAttributes.requiredWorkgroupSize[0];
|
||||
auto requiredWorkGroupSizeY = kernelDescriptor.kernelAttributes.requiredWorkgroupSize[1];
|
||||
auto requiredWorkGroupSizeZ = kernelDescriptor.kernelAttributes.requiredWorkgroupSize[2];
|
||||
size_t maxRequiredWorkGroupSize = requiredWorkGroupSizeX * requiredWorkGroupSizeY * requiredWorkGroupSizeZ;
|
||||
if ((maxRequiredWorkGroupSize == 0) || (maxRequiredWorkGroupSize > maxWorkGroupSize)) {
|
||||
maxRequiredWorkGroupSize = maxWorkGroupSize;
|
||||
}
|
||||
return maxRequiredWorkGroupSize;
|
||||
}
|
||||
|
||||
uint32_t getConstantBufferSize() const;
|
||||
int32_t getArgNumByName(const char *name) const;
|
||||
|
||||
bool createKernelAllocation(const Device &device, bool internalIsa);
|
||||
void apply(const DeviceInfoKernelPayloadConstants &constants);
|
||||
|
||||
HeapInfo heapInfo = {};
|
||||
std::vector<std::pair<uint32_t, uint32_t>> childrenKernelsIdOffset;
|
||||
char *crossThreadData = nullptr;
|
||||
const BuiltinDispatchInfoBuilder *builtinDispatchBuilder = nullptr;
|
||||
uint32_t systemKernelOffset = 0;
|
||||
uint64_t kernelId = 0;
|
||||
bool hasIndirectStatelessAccess = false;
|
||||
bool isKernelHeapSubstituted = false;
|
||||
GraphicsAllocation *kernelAllocation = nullptr;
|
||||
DebugData debugData;
|
||||
bool computeMode = false;
|
||||
const gtpin::igc_info_t *igcInfoForGtpin = nullptr;
|
||||
|
||||
uint64_t shaderHashCode;
|
||||
KernelDescriptor kernelDescriptor;
|
||||
};
|
||||
|
||||
std::string concatenateKernelNames(ArrayRef<KernelInfo *> kernelInfos);
|
||||
|
||||
} // namespace NEO
|
||||
@@ -7,7 +7,7 @@
|
||||
|
||||
#include "shared/source/program/program_info.h"
|
||||
|
||||
#include "opencl/source/program/kernel_info.h"
|
||||
#include "shared/source/program/kernel_info.h"
|
||||
|
||||
namespace NEO {
|
||||
|
||||
|
||||
@@ -10,9 +10,9 @@
|
||||
#include "shared/source/compiler_interface/linker.h"
|
||||
#include "shared/source/debug_settings/debug_settings_manager.h"
|
||||
#include "shared/source/device_binary_format/patchtokens_decoder.h"
|
||||
#include "shared/source/program/kernel_info.h"
|
||||
#include "shared/source/program/program_info.h"
|
||||
|
||||
#include "opencl/source/program/kernel_info.h"
|
||||
#include "opencl/source/program/kernel_info_from_patchtokens.h"
|
||||
|
||||
namespace NEO {
|
||||
|
||||
Reference in New Issue
Block a user