Initial commit

Change-Id: I4bf1707bd3dfeadf2c17b0a7daff372b1925ebbd
2026-01-03 06:49:52 +08:00 · 2017-12-21 00:45:38 +01:00
commit 7e9ad41290
1350 changed files with 233156 additions and 0 deletions
--- a/runtime/program/kernel_info.h
+++ b/runtime/program/kernel_info.h
@@ -0,0 +1,239 @@
+/*
+ * Copyright (c) 2017, Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#pragma once
+#include "config.h"
+#include "CL/cl.h"
+#include "heap_info.h"
+#include "kernel_arg_info.h"
+#include "patch_info.h"
+#include "runtime/helpers/hw_info.h"
+#include "runtime/helpers/dispatch_info.h"
+#include <algorithm>
+#include <cstdint>
+#include <cmath>
+#include <vector>
+#include <string>
+#include <unordered_map>
+#include <map>
+
+namespace OCLRT {
+class BuiltinDispatchInfoBuilder;
+class Device;
+class Kernel;
+struct KernelInfo;
+struct KernelArgumentType;
+
+extern std::unordered_map<std::string, uint32_t> accessQualifierMap;
+extern std::unordered_map<std::string, uint32_t> addressQualifierMap;
+extern std::map<std::string, size_t> typeSizeMap;
+
+struct WorkloadInfo {
+    uint32_t globalWorkOffsetOffsets[3];
+    uint32_t globalWorkSizeOffsets[3];
+    uint32_t localWorkSizeOffsets[3];
+    uint32_t localWorkSizeOffsets2[3];
+    uint32_t enqueuedLocalWorkSizeOffsets[3];
+    uint32_t numWorkGroupsOffset[3];
+    uint32_t maxWorkGroupSizeOffset;
+    uint32_t workDimOffset;
+    uint32_t slmStaticSize = 0;
+    uint32_t simdSizeOffset;
+    uint32_t parentEventOffset;
+    uint32_t prefferedWkgMultipleOffset;
+
+    static const uint32_t undefinedOffset;
+    static const uint32_t invalidParentEvent;
+
+    WorkloadInfo() {
+        globalWorkOffsetOffsets[0] = undefinedOffset;
+        globalWorkOffsetOffsets[1] = undefinedOffset;
+        globalWorkOffsetOffsets[2] = undefinedOffset;
+        globalWorkSizeOffsets[0] = undefinedOffset;
+        globalWorkSizeOffsets[1] = undefinedOffset;
+        globalWorkSizeOffsets[2] = undefinedOffset;
+        localWorkSizeOffsets[0] = undefinedOffset;
+        localWorkSizeOffsets[1] = undefinedOffset;
+        localWorkSizeOffsets[2] = undefinedOffset;
+        localWorkSizeOffsets2[0] = undefinedOffset;
+        localWorkSizeOffsets2[1] = undefinedOffset;
+        localWorkSizeOffsets2[2] = undefinedOffset;
+        enqueuedLocalWorkSizeOffsets[0] = undefinedOffset;
+        enqueuedLocalWorkSizeOffsets[1] = undefinedOffset;
+        enqueuedLocalWorkSizeOffsets[2] = undefinedOffset;
+        numWorkGroupsOffset[0] = undefinedOffset;
+        numWorkGroupsOffset[1] = undefinedOffset;
+        numWorkGroupsOffset[2] = undefinedOffset;
+        maxWorkGroupSizeOffset = undefinedOffset;
+        workDimOffset = undefinedOffset;
+        simdSizeOffset = undefinedOffset;
+        parentEventOffset = undefinedOffset;
+        prefferedWkgMultipleOffset = undefinedOffset;
+    }
+};
+
+static const float YTilingRatioValue = 1.3862943611198906188344642429164f;
+
+struct WorkSizeInfo {
+
+    uint32_t maxWorkGroupSize;
+    uint32_t minWorkGroupSize;
+    uint32_t hasBarriers;
+    uint32_t simdSize;
+    uint32_t slmTotalSize;
+    GFXCORE_FAMILY coreFamily;
+    uint32_t numThreadsPerSlice;
+    uint32_t localMemSize;
+    bool imgUsed = false;
+    bool yTiledSurfaces = false;
+    bool useRatio = false;
+    bool useStrictRatio = false;
+    float targetRatio = 0;
+
+    WorkSizeInfo(uint32_t maxWorkGroupSize, uint32_t hasBarriers, uint32_t simdSize, uint32_t slmTotalSize, GFXCORE_FAMILY coreFamily, uint32_t numThreadsPerSlice, uint32_t localMemSize, bool imgUsed, bool yTiledSurface);
+    WorkSizeInfo(const DispatchInfo &dispatchInfo);
+    void setIfUseImg(Kernel *pKernel);
+    void setMinWorkGroupSize();
+    void checkRatio(const size_t workItems[3]);
+};
+
+struct KernelInfo {
+  public:
+    static KernelInfo *create();
+    KernelInfo() {
+        heapInfo = {};
+        patchInfo = {};
+        workloadInfo = {};
+        kernelArgInfo = {};
+        kernelNonArgInfo = {};
+        childrenKernelsIdOffset = {};
+        reqdWorkGroupSize[0] = WorkloadInfo::undefinedOffset;
+        reqdWorkGroupSize[1] = WorkloadInfo::undefinedOffset;
+        reqdWorkGroupSize[2] = WorkloadInfo::undefinedOffset;
+    }
+
+    KernelInfo(const KernelInfo &) = delete;
+    KernelInfo &operator=(const KernelInfo &) = delete;
+
+    ~KernelInfo();
+
+    cl_int storeArgInfo(const SPatchKernelArgumentInfo *pkernelArgInfo);
+    void storeKernelArgument(const SPatchDataParameterBuffer *pDataParameterKernelArg);
+    void storeKernelArgument(const SPatchStatelessGlobalMemoryObjectKernelArgument *pStatelessGlobalKernelArg);
+    void storeKernelArgument(const SPatchImageMemoryObjectKernelArgument *pImageMemObjKernelArg);
+    void storeKernelArgument(const SPatchGlobalMemoryObjectKernelArgument *pGlobalMemObjKernelArg);
+    void storeKernelArgument(const SPatchStatelessConstantMemoryObjectKernelArgument *pStatelessConstMemObjKernelArg);
+    void storeKernelArgument(const SPatchStatelessDeviceQueueKernelArgument *pStatelessDeviceQueueKernelArg);
+    void storeKernelArgument(const SPatchSamplerKernelArgument *pSamplerKernelArg);
+    void storePatchToken(const SPatchAllocateStatelessPrivateSurface *pStatelessPrivateSurfaceArg);
+    void storePatchToken(const SPatchAllocateStatelessConstantMemorySurfaceWithInitialization *pStatelessConstantMemorySurfaceWithInitializationArg);
+    void storePatchToken(const SPatchAllocateStatelessGlobalMemorySurfaceWithInitialization *pStatelessGlobalMemorySurfaceWithInitializationArg);
+    void storePatchToken(const SPatchAllocateStatelessPrintfSurface *pStatelessPrintfSurfaceArg);
+    void storePatchToken(const SPatchAllocateStatelessEventPoolSurface *pStatelessEventPoolSurfaceArg);
+    void storePatchToken(const SPatchAllocateStatelessDefaultDeviceQueueSurface *pStatelessDefaultDeviceQueueSurfaceArg);
+    void storePatchToken(const SPatchString *pStringArg);
+    void storePatchToken(const SPatchKernelAttributesInfo *pKernelAttributesInfo);
+    cl_int resolveKernelInfo();
+    void resizeKernelArgInfoAndRegisterParameter(uint32_t argCount) {
+        if (kernelArgInfo.size() <= argCount) {
+            kernelArgInfo.resize(argCount + 1);
+        }
+        if (!kernelArgInfo[argCount].needPatch) {
+            kernelArgInfo[argCount].needPatch = true;
+            argumentsToPatchNum++;
+        }
+    }
+
+    void storeKernelArgPatchInfo(uint32_t argNum, uint32_t dataSize, uint32_t crossthreadOffset, uint32_t sourceOffset, uint32_t offsetSSH);
+
+    const char *queryPrintfString(uint32_t index) const;
+
+    size_t getSamplerStateArrayCount() const;
+    size_t getSamplerStateArraySize(const HardwareInfo &hwInfo) const;
+    size_t getBorderColorStateSize() const;
+    size_t getBorderColorOffset() const;
+    unsigned int getMaxSimdSize() const {
+        const auto executionEnvironment = patchInfo.executionEnvironment;
+        if (executionEnvironment == nullptr) {
+            return 1;
+        }
+
+        if (executionEnvironment->CompiledSIMD32) {
+            return 32;
+        }
+
+        if (executionEnvironment->CompiledSIMD16) {
+            return 16;
+        }
+
+        return 8;
+    }
+    bool hasDeviceEnqueue() const {
+        return patchInfo.executionEnvironment ? !!patchInfo.executionEnvironment->HasDeviceEnqueue : false;
+    }
+    bool requiresSubgroupIndependentForwardProgress() const {
+        return patchInfo.executionEnvironment ? !!patchInfo.executionEnvironment->SubgroupIndependentForwardProgressRequired : false;
+    }
+    size_t getMaxRequiredWorkGroupSize(size_t maxWorkGroupSize) const {
+        auto requiredWorkGroupSizeX = patchInfo.executionEnvironment->RequiredWorkGroupSizeX;
+        auto requiredWorkGroupSizeY = patchInfo.executionEnvironment->RequiredWorkGroupSizeY;
+        auto requiredWorkGroupSizeZ = patchInfo.executionEnvironment->RequiredWorkGroupSizeZ;
+        size_t maxRequiredWorkGroupSize = requiredWorkGroupSizeX * requiredWorkGroupSizeY * requiredWorkGroupSizeZ;
+        if ((maxRequiredWorkGroupSize == 0) || (maxRequiredWorkGroupSize > maxWorkGroupSize)) {
+            maxRequiredWorkGroupSize = maxWorkGroupSize;
+        }
+        return maxRequiredWorkGroupSize;
+    }
+
+    uint32_t getConstantBufferSize() const;
+    int32_t getArgNumByName(const char *name) const {
+        int32_t argNum = 0;
+        for (auto &arg : kernelArgInfo) {
+            if (arg.name == name) {
+                return argNum;
+            }
+            ++argNum;
+        }
+        return -1;
+    }
+
+    std::string name;
+    std::string attributes;
+    HeapInfo heapInfo;
+    PatchInfo patchInfo;
+    std::vector<KernelArgInfo> kernelArgInfo;
+    std::vector<KernelArgInfo> kernelNonArgInfo;
+    WorkloadInfo workloadInfo;
+    std::vector<std::pair<uint32_t, uint32_t>> childrenKernelsIdOffset;
+    bool usesSsh = false;
+    bool requiresSshForBuffers = false;
+    bool isValid = false;
+    bool isVmeWorkload = false;
+    char *crossThreadData = nullptr;
+    size_t reqdWorkGroupSize[3];
+    size_t requiredSubGroupSize = 0;
+    uint32_t gpuPointerSize = 0;
+    const BuiltinDispatchInfoBuilder *builtinDispatchBuilder = nullptr;
+    uint32_t argumentsToPatchNum = 0;
+    uint32_t systemKernelOffset = 0;
+};
+} // namespace OCLRT