/* * Copyright (C) 2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/helpers/debug_helpers.h" #include "shared/source/kernel/debug_data.h" #include "shared/source/kernel/kernel_arg_descriptor.h" #include "shared/source/kernel/kernel_arg_metadata.h" #include "shared/source/utilities/arrayref.h" #include "shared/source/utilities/stackvec.h" #include #include #include #include #include #include namespace NEO { using StringMap = std::unordered_map; using InstructionsSegmentOffset = uint16_t; struct KernelDescriptor final { enum AddressingMode : uint8_t { AddrNone, Stateless, Bindful, Bindless, BindfulAndStateless, BindlessAndStateless }; KernelDescriptor() = default; ~KernelDescriptor() = default; struct KernelAttributes { KernelAttributes() { flags.packed = 0U; } uint32_t slmInlineSize = 0U; uint32_t perThreadScratchSize[2] = {0U, 0U}; uint32_t perThreadPrivateMemorySize = 0U; uint32_t perThreadSystemThreadSurfaceSize = 0U; uint16_t requiredWorkgroupSize[3] = {0U, 0U, 0U}; uint16_t crossThreadDataSize = 0U; uint16_t perThreadDataSize = 0U; uint16_t numArgsToPatch = 0U; uint16_t numGrfRequired = 0U; AddressingMode bufferAddressingMode = BindfulAndStateless; AddressingMode imageAddressingMode = Bindful; AddressingMode samplerAddressingMode = Bindful; uint8_t workgroupWalkOrder[3] = {0, 1, 2}; uint8_t workgroupDimensionsOrder[3] = {0, 1, 2}; uint8_t gpuPointerSize = 0; uint8_t simdSize = 8; uint8_t grfSize = 32; uint8_t numLocalIdChannels = 3; bool supportsBuffersBiggerThan4Gb() const { return Stateless == bufferAddressingMode; } union { struct { bool usesPrintf : 1; bool usesBarriers : 1; bool usesFencesForReadWriteImages : 1; bool usesFlattenedLocalIds; bool usesPrivateMemory : 1; bool usesVme : 1; bool usesImages : 1; bool usesSamplers : 1; bool usesDeviceSideEnqueue : 1; bool usesSyncBuffer : 1; bool useGlobalAtomics : 1; bool usesStatelessWrites : 1; bool passInlineData : 1; bool perThreadDataHeaderIsPresent : 1; bool perThreadDataUnusedGrfIsPresent : 1; bool requiresDisabledMidThreadPreemption : 1; bool requiresSubgroupIndependentForwardProgress : 1; bool requiresWorkgroupWalkOrder : 1; }; uint32_t packed; } flags; static_assert(sizeof(KernelAttributes::flags) == sizeof(KernelAttributes::flags.packed), ""); } kernelAttributes; struct { InstructionsSegmentOffset skipPerThreadDataLoad = 0U; InstructionsSegmentOffset skipSetFFIDGP = 0U; InstructionsSegmentOffset systemKernel = 0U; } entryPoints; struct PayloadMappings { struct { CrossThreadDataOffset globalWorkOffset[3] = {undefined, undefined, undefined}; CrossThreadDataOffset globalWorkSize[3] = {undefined, undefined, undefined}; CrossThreadDataOffset localWorkSize[3] = {undefined, undefined, undefined}; CrossThreadDataOffset localWorkSize2[3] = {undefined, undefined, undefined}; CrossThreadDataOffset enqueuedLocalWorkSize[3] = {undefined, undefined, undefined}; CrossThreadDataOffset numWorkGroups[3] = {undefined, undefined, undefined}; CrossThreadDataOffset workDim = undefined; } dispatchTraits; struct { SurfaceStateHeapOffset tableOffset = undefined; uint8_t numEntries = 0; } bindingTable; struct { DynamicStateHeapOffset tableOffset = undefined; DynamicStateHeapOffset borderColor = undefined; uint8_t numSamplers = 0; } samplerTable; StackVec explicitArgs; struct { ArgDescPointer printfSurfaceAddress; ArgDescPointer globalVariablesSurfaceAddress; ArgDescPointer globalConstantsSurfaceAddress; ArgDescPointer privateMemoryAddress; ArgDescPointer deviceSideEnqueueEventPoolSurfaceAddress; ArgDescPointer deviceSideEnqueueDefaultQueueSurfaceAddress; ArgDescPointer systemThreadSurfaceAddress; ArgDescPointer syncBufferAddress; CrossThreadDataOffset privateMemorySize = undefined; CrossThreadDataOffset maxWorkGroupSize = undefined; CrossThreadDataOffset simdSize = undefined; CrossThreadDataOffset deviceSideEnqueueParentEvent = undefined; CrossThreadDataOffset preferredWkgMultiple = undefined; CrossThreadDataOffset localMemoryStatelessWindowSize = undefined; CrossThreadDataOffset localMemoryStatelessWindowStartAddres = undefined; } implicitArgs; std::vector> explicitArgsExtendedDescriptors; } payloadMappings; std::vector explicitArgsExtendedMetadata; struct { std::string kernelName; std::string kernelLanguageAttributes; StringMap printfStringsMap; std::vector> deviceSideEnqueueChildrenKernelsIdOffset; uint32_t deviceSideEnqueueBlockInterfaceDescriptorOffset = 0U; struct ByValueArgument { ArgDescValue::Element byValueElement; uint16_t argNum; }; StackVec allByValueKernelArguments; uint16_t compiledSubGroupsNumber = 0U; uint8_t requiredSubGroupSize = 0U; } kernelMetadata; struct { std::unique_ptr debugData; const void *igcInfoForGtpin = nullptr; } external; }; } // namespace NEO