/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/kernel/dispatch_kernel_encoder_interface.h" #include "shared/source/unified_memory/unified_memory.h" #include "level_zero/core/source/kernel.h" #include namespace L0 { struct GraphicsAllocation; struct KernelImp : Kernel { KernelImp(Module *module); ~KernelImp() override; ze_result_t destroy() override { delete this; return ZE_RESULT_SUCCESS; } ze_result_t setAttribute(ze_kernel_attribute_t attr, uint32_t size, const void *pValue) override; ze_result_t getAttribute(ze_kernel_attribute_t attr, uint32_t *pSize, void *pValue) override; ze_result_t getProperties(ze_kernel_properties_t *pKernelProperties) override; ze_result_t setIntermediateCacheConfig(ze_cache_config_t cacheConfig) override { return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; } ze_result_t setArgumentValue(uint32_t argIndex, size_t argSize, const void *pArgValue) override; void setGroupCount(uint32_t groupCountX, uint32_t groupCountY, uint32_t groupCountZ) override; bool getGroupCountOffsets(uint32_t *locations) override; bool getGroupSizeOffsets(uint32_t *locations) override; ze_result_t setGroupSize(uint32_t groupSizeX, uint32_t groupSizeY, uint32_t groupSizeZ) override; ze_result_t suggestGroupSize(uint32_t globalSizeX, uint32_t globalSizeY, uint32_t globalSizeZ, uint32_t *groupSizeX, uint32_t *groupSizeY, uint32_t *groupSizeZ) override; uint32_t suggestMaxCooperativeGroupCount() override; const uint8_t *getCrossThreadData() const override { return crossThreadData.get(); } uint32_t getCrossThreadDataSize() const override { return crossThreadDataSize; } const std::vector &getResidencyContainer() const override { return residencyContainer; } void getGroupSize(uint32_t &outGroupSizeX, uint32_t &outGroupSizeY, uint32_t &outGroupSizeZ) const override { outGroupSizeX = this->groupSize[0]; outGroupSizeY = this->groupSize[1]; outGroupSizeZ = this->groupSize[2]; } ze_result_t setArgImmediate(uint32_t argIndex, size_t argSize, const void *argVal); ze_result_t setArgBuffer(uint32_t argIndex, size_t argSize, const void *argVal); ze_result_t setArgRedescribedImage(uint32_t argIndex, ze_image_handle_t argVal) override; ze_result_t setArgBufferWithAlloc(uint32_t argIndex, const void *argVal, NEO::GraphicsAllocation *allocation) override; ze_result_t setArgImage(uint32_t argIndex, size_t argSize, const void *argVal); ze_result_t setArgSampler(uint32_t argIndex, size_t argSize, const void *argVal); virtual void setBufferSurfaceState(uint32_t argIndex, void *address, NEO::GraphicsAllocation *alloc) = 0; ze_result_t initialize(const ze_kernel_desc_t *desc); const uint8_t *getPerThreadData() const override { return perThreadDataForWholeThreadGroup; } uint32_t getPerThreadDataSizeForWholeThreadGroup() const override { return perThreadDataSizeForWholeThreadGroup; } uint32_t getPerThreadDataSize() const override { return perThreadDataSize; } uint32_t getThreadsPerThreadGroup() const override { return threadsPerThreadGroup; } uint32_t getThreadExecutionMask() const override { return threadExecutionMask; } NEO::GraphicsAllocation *getPrintfBufferAllocation() override { return this->printfBuffer; } void printPrintfOutput() override; const uint8_t *getSurfaceStateHeapData() const override { return surfaceStateHeapData.get(); } uint32_t getSurfaceStateHeapDataSize() const override { return surfaceStateHeapDataSize; } const uint8_t *getDynamicStateHeapData() const override { return dynamicStateHeapData.get(); } size_t getDynamicStateHeapDataSize() const override { return dynamicStateHeapDataSize; } const KernelImmutableData *getImmutableData() const override { return kernelImmData; } UnifiedMemoryControls getUnifiedMemoryControls() const override { return unifiedMemoryControls; } bool hasBarriers() override; uint32_t getSlmTotalSize() override; uint32_t getBindingTableOffset() override; uint32_t getBorderColor() override; uint32_t getSamplerTableOffset() override; uint32_t getNumSurfaceStates() override; uint32_t getNumSamplers() override; uint32_t getSimdSize() override; uint32_t getSizeCrossThreadData() override; uint32_t getPerThreadScratchSize() override; uint32_t getThreadsPerThreadGroupCount() override; uint32_t getSizePerThreadData() override; uint32_t getSizePerThreadDataForWholeGroup() override; uint32_t getSizeSurfaceStateHeapData() override; uint32_t getPerThreadExecutionMask() override; uint32_t *getCountOffsets() override; uint32_t *getSizeOffsets() override; uint32_t *getLocalWorkSize() override; uint32_t getNumGrfRequired() override; NEO::GraphicsAllocation *getIsaAllocation() override; bool hasGroupCounts() override; bool hasGroupSize() override; const void *getSurfaceStateHeap() override; const void *getDynamicStateHeap() override; const void *getCrossThread() override; const void *getPerThread() override; protected: KernelImp() = default; void patchWorkgroupSizeInCrossThreadData(uint32_t x, uint32_t y, uint32_t z); void createPrintfBuffer(); const KernelImmutableData *kernelImmData = nullptr; Module *module = nullptr; typedef ze_result_t (KernelImp::*KernelArgHandler)(uint32_t argIndex, size_t argSize, const void *argVal); std::vector kernelArgHandlers; std::vector residencyContainer; NEO::GraphicsAllocation *printfBuffer = nullptr; uint32_t groupSize[3] = {0u, 0u, 0u}; uint32_t threadsPerThreadGroup = 0u; uint32_t threadExecutionMask = 0u; std::unique_ptr crossThreadData = 0; uint32_t crossThreadDataSize = 0; std::unique_ptr surfaceStateHeapData = nullptr; uint32_t surfaceStateHeapDataSize = 0; std::unique_ptr dynamicStateHeapData = nullptr; uint32_t dynamicStateHeapDataSize = 0; uint8_t *perThreadDataForWholeThreadGroup = nullptr; uint32_t perThreadDataSizeForWholeThreadGroupAllocated = 0; uint32_t perThreadDataSizeForWholeThreadGroup = 0u; uint32_t perThreadDataSize = 0u; UnifiedMemoryControls unifiedMemoryControls; std::vector slmArgSizes; uint32_t slmArgsTotalSize = 0U; }; } // namespace L0