mirror of
https://github.com/intel/compute-runtime.git
synced 2026-01-03 06:49:52 +08:00
Initial commit
Change-Id: I4bf1707bd3dfeadf2c17b0a7daff372b1925ebbd
This commit is contained in:
239
runtime/program/kernel_info.h
Normal file
239
runtime/program/kernel_info.h
Normal file
@@ -0,0 +1,239 @@
|
||||
/*
|
||||
* Copyright (c) 2017, Intel Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included
|
||||
* in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
||||
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
#include "config.h"
|
||||
#include "CL/cl.h"
|
||||
#include "heap_info.h"
|
||||
#include "kernel_arg_info.h"
|
||||
#include "patch_info.h"
|
||||
#include "runtime/helpers/hw_info.h"
|
||||
#include "runtime/helpers/dispatch_info.h"
|
||||
#include <algorithm>
|
||||
#include <cstdint>
|
||||
#include <cmath>
|
||||
#include <vector>
|
||||
#include <string>
|
||||
#include <unordered_map>
|
||||
#include <map>
|
||||
|
||||
namespace OCLRT {
|
||||
class BuiltinDispatchInfoBuilder;
|
||||
class Device;
|
||||
class Kernel;
|
||||
struct KernelInfo;
|
||||
struct KernelArgumentType;
|
||||
|
||||
extern std::unordered_map<std::string, uint32_t> accessQualifierMap;
|
||||
extern std::unordered_map<std::string, uint32_t> addressQualifierMap;
|
||||
extern std::map<std::string, size_t> typeSizeMap;
|
||||
|
||||
struct WorkloadInfo {
|
||||
uint32_t globalWorkOffsetOffsets[3];
|
||||
uint32_t globalWorkSizeOffsets[3];
|
||||
uint32_t localWorkSizeOffsets[3];
|
||||
uint32_t localWorkSizeOffsets2[3];
|
||||
uint32_t enqueuedLocalWorkSizeOffsets[3];
|
||||
uint32_t numWorkGroupsOffset[3];
|
||||
uint32_t maxWorkGroupSizeOffset;
|
||||
uint32_t workDimOffset;
|
||||
uint32_t slmStaticSize = 0;
|
||||
uint32_t simdSizeOffset;
|
||||
uint32_t parentEventOffset;
|
||||
uint32_t prefferedWkgMultipleOffset;
|
||||
|
||||
static const uint32_t undefinedOffset;
|
||||
static const uint32_t invalidParentEvent;
|
||||
|
||||
WorkloadInfo() {
|
||||
globalWorkOffsetOffsets[0] = undefinedOffset;
|
||||
globalWorkOffsetOffsets[1] = undefinedOffset;
|
||||
globalWorkOffsetOffsets[2] = undefinedOffset;
|
||||
globalWorkSizeOffsets[0] = undefinedOffset;
|
||||
globalWorkSizeOffsets[1] = undefinedOffset;
|
||||
globalWorkSizeOffsets[2] = undefinedOffset;
|
||||
localWorkSizeOffsets[0] = undefinedOffset;
|
||||
localWorkSizeOffsets[1] = undefinedOffset;
|
||||
localWorkSizeOffsets[2] = undefinedOffset;
|
||||
localWorkSizeOffsets2[0] = undefinedOffset;
|
||||
localWorkSizeOffsets2[1] = undefinedOffset;
|
||||
localWorkSizeOffsets2[2] = undefinedOffset;
|
||||
enqueuedLocalWorkSizeOffsets[0] = undefinedOffset;
|
||||
enqueuedLocalWorkSizeOffsets[1] = undefinedOffset;
|
||||
enqueuedLocalWorkSizeOffsets[2] = undefinedOffset;
|
||||
numWorkGroupsOffset[0] = undefinedOffset;
|
||||
numWorkGroupsOffset[1] = undefinedOffset;
|
||||
numWorkGroupsOffset[2] = undefinedOffset;
|
||||
maxWorkGroupSizeOffset = undefinedOffset;
|
||||
workDimOffset = undefinedOffset;
|
||||
simdSizeOffset = undefinedOffset;
|
||||
parentEventOffset = undefinedOffset;
|
||||
prefferedWkgMultipleOffset = undefinedOffset;
|
||||
}
|
||||
};
|
||||
|
||||
static const float YTilingRatioValue = 1.3862943611198906188344642429164f;
|
||||
|
||||
struct WorkSizeInfo {
|
||||
|
||||
uint32_t maxWorkGroupSize;
|
||||
uint32_t minWorkGroupSize;
|
||||
uint32_t hasBarriers;
|
||||
uint32_t simdSize;
|
||||
uint32_t slmTotalSize;
|
||||
GFXCORE_FAMILY coreFamily;
|
||||
uint32_t numThreadsPerSlice;
|
||||
uint32_t localMemSize;
|
||||
bool imgUsed = false;
|
||||
bool yTiledSurfaces = false;
|
||||
bool useRatio = false;
|
||||
bool useStrictRatio = false;
|
||||
float targetRatio = 0;
|
||||
|
||||
WorkSizeInfo(uint32_t maxWorkGroupSize, uint32_t hasBarriers, uint32_t simdSize, uint32_t slmTotalSize, GFXCORE_FAMILY coreFamily, uint32_t numThreadsPerSlice, uint32_t localMemSize, bool imgUsed, bool yTiledSurface);
|
||||
WorkSizeInfo(const DispatchInfo &dispatchInfo);
|
||||
void setIfUseImg(Kernel *pKernel);
|
||||
void setMinWorkGroupSize();
|
||||
void checkRatio(const size_t workItems[3]);
|
||||
};
|
||||
|
||||
struct KernelInfo {
|
||||
public:
|
||||
static KernelInfo *create();
|
||||
KernelInfo() {
|
||||
heapInfo = {};
|
||||
patchInfo = {};
|
||||
workloadInfo = {};
|
||||
kernelArgInfo = {};
|
||||
kernelNonArgInfo = {};
|
||||
childrenKernelsIdOffset = {};
|
||||
reqdWorkGroupSize[0] = WorkloadInfo::undefinedOffset;
|
||||
reqdWorkGroupSize[1] = WorkloadInfo::undefinedOffset;
|
||||
reqdWorkGroupSize[2] = WorkloadInfo::undefinedOffset;
|
||||
}
|
||||
|
||||
KernelInfo(const KernelInfo &) = delete;
|
||||
KernelInfo &operator=(const KernelInfo &) = delete;
|
||||
|
||||
~KernelInfo();
|
||||
|
||||
cl_int storeArgInfo(const SPatchKernelArgumentInfo *pkernelArgInfo);
|
||||
void storeKernelArgument(const SPatchDataParameterBuffer *pDataParameterKernelArg);
|
||||
void storeKernelArgument(const SPatchStatelessGlobalMemoryObjectKernelArgument *pStatelessGlobalKernelArg);
|
||||
void storeKernelArgument(const SPatchImageMemoryObjectKernelArgument *pImageMemObjKernelArg);
|
||||
void storeKernelArgument(const SPatchGlobalMemoryObjectKernelArgument *pGlobalMemObjKernelArg);
|
||||
void storeKernelArgument(const SPatchStatelessConstantMemoryObjectKernelArgument *pStatelessConstMemObjKernelArg);
|
||||
void storeKernelArgument(const SPatchStatelessDeviceQueueKernelArgument *pStatelessDeviceQueueKernelArg);
|
||||
void storeKernelArgument(const SPatchSamplerKernelArgument *pSamplerKernelArg);
|
||||
void storePatchToken(const SPatchAllocateStatelessPrivateSurface *pStatelessPrivateSurfaceArg);
|
||||
void storePatchToken(const SPatchAllocateStatelessConstantMemorySurfaceWithInitialization *pStatelessConstantMemorySurfaceWithInitializationArg);
|
||||
void storePatchToken(const SPatchAllocateStatelessGlobalMemorySurfaceWithInitialization *pStatelessGlobalMemorySurfaceWithInitializationArg);
|
||||
void storePatchToken(const SPatchAllocateStatelessPrintfSurface *pStatelessPrintfSurfaceArg);
|
||||
void storePatchToken(const SPatchAllocateStatelessEventPoolSurface *pStatelessEventPoolSurfaceArg);
|
||||
void storePatchToken(const SPatchAllocateStatelessDefaultDeviceQueueSurface *pStatelessDefaultDeviceQueueSurfaceArg);
|
||||
void storePatchToken(const SPatchString *pStringArg);
|
||||
void storePatchToken(const SPatchKernelAttributesInfo *pKernelAttributesInfo);
|
||||
cl_int resolveKernelInfo();
|
||||
void resizeKernelArgInfoAndRegisterParameter(uint32_t argCount) {
|
||||
if (kernelArgInfo.size() <= argCount) {
|
||||
kernelArgInfo.resize(argCount + 1);
|
||||
}
|
||||
if (!kernelArgInfo[argCount].needPatch) {
|
||||
kernelArgInfo[argCount].needPatch = true;
|
||||
argumentsToPatchNum++;
|
||||
}
|
||||
}
|
||||
|
||||
void storeKernelArgPatchInfo(uint32_t argNum, uint32_t dataSize, uint32_t crossthreadOffset, uint32_t sourceOffset, uint32_t offsetSSH);
|
||||
|
||||
const char *queryPrintfString(uint32_t index) const;
|
||||
|
||||
size_t getSamplerStateArrayCount() const;
|
||||
size_t getSamplerStateArraySize(const HardwareInfo &hwInfo) const;
|
||||
size_t getBorderColorStateSize() const;
|
||||
size_t getBorderColorOffset() const;
|
||||
unsigned int getMaxSimdSize() const {
|
||||
const auto executionEnvironment = patchInfo.executionEnvironment;
|
||||
if (executionEnvironment == nullptr) {
|
||||
return 1;
|
||||
}
|
||||
|
||||
if (executionEnvironment->CompiledSIMD32) {
|
||||
return 32;
|
||||
}
|
||||
|
||||
if (executionEnvironment->CompiledSIMD16) {
|
||||
return 16;
|
||||
}
|
||||
|
||||
return 8;
|
||||
}
|
||||
bool hasDeviceEnqueue() const {
|
||||
return patchInfo.executionEnvironment ? !!patchInfo.executionEnvironment->HasDeviceEnqueue : false;
|
||||
}
|
||||
bool requiresSubgroupIndependentForwardProgress() const {
|
||||
return patchInfo.executionEnvironment ? !!patchInfo.executionEnvironment->SubgroupIndependentForwardProgressRequired : false;
|
||||
}
|
||||
size_t getMaxRequiredWorkGroupSize(size_t maxWorkGroupSize) const {
|
||||
auto requiredWorkGroupSizeX = patchInfo.executionEnvironment->RequiredWorkGroupSizeX;
|
||||
auto requiredWorkGroupSizeY = patchInfo.executionEnvironment->RequiredWorkGroupSizeY;
|
||||
auto requiredWorkGroupSizeZ = patchInfo.executionEnvironment->RequiredWorkGroupSizeZ;
|
||||
size_t maxRequiredWorkGroupSize = requiredWorkGroupSizeX * requiredWorkGroupSizeY * requiredWorkGroupSizeZ;
|
||||
if ((maxRequiredWorkGroupSize == 0) || (maxRequiredWorkGroupSize > maxWorkGroupSize)) {
|
||||
maxRequiredWorkGroupSize = maxWorkGroupSize;
|
||||
}
|
||||
return maxRequiredWorkGroupSize;
|
||||
}
|
||||
|
||||
uint32_t getConstantBufferSize() const;
|
||||
int32_t getArgNumByName(const char *name) const {
|
||||
int32_t argNum = 0;
|
||||
for (auto &arg : kernelArgInfo) {
|
||||
if (arg.name == name) {
|
||||
return argNum;
|
||||
}
|
||||
++argNum;
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
||||
std::string name;
|
||||
std::string attributes;
|
||||
HeapInfo heapInfo;
|
||||
PatchInfo patchInfo;
|
||||
std::vector<KernelArgInfo> kernelArgInfo;
|
||||
std::vector<KernelArgInfo> kernelNonArgInfo;
|
||||
WorkloadInfo workloadInfo;
|
||||
std::vector<std::pair<uint32_t, uint32_t>> childrenKernelsIdOffset;
|
||||
bool usesSsh = false;
|
||||
bool requiresSshForBuffers = false;
|
||||
bool isValid = false;
|
||||
bool isVmeWorkload = false;
|
||||
char *crossThreadData = nullptr;
|
||||
size_t reqdWorkGroupSize[3];
|
||||
size_t requiredSubGroupSize = 0;
|
||||
uint32_t gpuPointerSize = 0;
|
||||
const BuiltinDispatchInfoBuilder *builtinDispatchBuilder = nullptr;
|
||||
uint32_t argumentsToPatchNum = 0;
|
||||
uint32_t systemKernelOffset = 0;
|
||||
};
|
||||
} // namespace OCLRT
|
||||
Reference in New Issue
Block a user