176 lines
6.6 KiB
C++
176 lines
6.6 KiB
C++
/*
|
|
* Copyright (C) 2019-2020 Intel Corporation
|
|
*
|
|
* SPDX-License-Identifier: MIT
|
|
*
|
|
*/
|
|
|
|
#pragma once
|
|
|
|
#include "shared/source/kernel/dispatch_kernel_encoder_interface.h"
|
|
#include "shared/source/unified_memory/unified_memory.h"
|
|
|
|
#include "level_zero/core/source/kernel.h"
|
|
|
|
#include <memory>
|
|
|
|
namespace L0 {
|
|
|
|
struct GraphicsAllocation;
|
|
|
|
struct KernelImp : Kernel {
|
|
KernelImp(Module *module);
|
|
|
|
~KernelImp() override;
|
|
|
|
ze_result_t destroy() override {
|
|
delete this;
|
|
return ZE_RESULT_SUCCESS;
|
|
}
|
|
|
|
ze_result_t setAttribute(ze_kernel_attribute_t attr, uint32_t size, const void *pValue) override;
|
|
|
|
ze_result_t getAttribute(ze_kernel_attribute_t attr, uint32_t *pSize, void *pValue) override;
|
|
|
|
ze_result_t getProperties(ze_kernel_properties_t *pKernelProperties) override;
|
|
|
|
ze_result_t setIntermediateCacheConfig(ze_cache_config_t cacheConfig) override {
|
|
return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE;
|
|
}
|
|
|
|
ze_result_t setArgumentValue(uint32_t argIndex, size_t argSize, const void *pArgValue) override;
|
|
|
|
void setGroupCount(uint32_t groupCountX, uint32_t groupCountY, uint32_t groupCountZ) override;
|
|
|
|
bool getGroupCountOffsets(uint32_t *locations) override;
|
|
|
|
bool getGroupSizeOffsets(uint32_t *locations) override;
|
|
|
|
ze_result_t setGroupSize(uint32_t groupSizeX, uint32_t groupSizeY,
|
|
uint32_t groupSizeZ) override;
|
|
|
|
ze_result_t suggestGroupSize(uint32_t globalSizeX, uint32_t globalSizeY, uint32_t globalSizeZ,
|
|
uint32_t *groupSizeX, uint32_t *groupSizeY,
|
|
uint32_t *groupSizeZ) override;
|
|
|
|
ze_result_t suggestMaxCooperativeGroupCount(uint32_t *totalGroupCount) override;
|
|
|
|
const uint8_t *getCrossThreadData() const override { return crossThreadData.get(); }
|
|
uint32_t getCrossThreadDataSize() const override { return crossThreadDataSize; }
|
|
|
|
const std::vector<NEO::GraphicsAllocation *> &getResidencyContainer() const override {
|
|
return residencyContainer;
|
|
}
|
|
|
|
void getGroupSize(uint32_t &outGroupSizeX, uint32_t &outGroupSizeY,
|
|
uint32_t &outGroupSizeZ) const override {
|
|
outGroupSizeX = this->groupSize[0];
|
|
outGroupSizeY = this->groupSize[1];
|
|
outGroupSizeZ = this->groupSize[2];
|
|
}
|
|
|
|
ze_result_t setArgImmediate(uint32_t argIndex, size_t argSize, const void *argVal);
|
|
|
|
ze_result_t setArgBuffer(uint32_t argIndex, size_t argSize, const void *argVal);
|
|
|
|
ze_result_t setArgRedescribedImage(uint32_t argIndex, ze_image_handle_t argVal) override;
|
|
|
|
ze_result_t setArgBufferWithAlloc(uint32_t argIndex, const void *argVal, NEO::GraphicsAllocation *allocation) override;
|
|
|
|
ze_result_t setArgImage(uint32_t argIndex, size_t argSize, const void *argVal);
|
|
|
|
ze_result_t setArgSampler(uint32_t argIndex, size_t argSize, const void *argVal);
|
|
|
|
virtual void setBufferSurfaceState(uint32_t argIndex, void *address, NEO::GraphicsAllocation *alloc) = 0;
|
|
|
|
ze_result_t initialize(const ze_kernel_desc_t *desc);
|
|
|
|
const uint8_t *getPerThreadData() const override { return perThreadDataForWholeThreadGroup; }
|
|
uint32_t getPerThreadDataSizeForWholeThreadGroup() const override { return perThreadDataSizeForWholeThreadGroup; }
|
|
|
|
uint32_t getPerThreadDataSize() const override { return perThreadDataSize; }
|
|
uint32_t getThreadsPerThreadGroup() const override { return threadsPerThreadGroup; }
|
|
uint32_t getThreadExecutionMask() const override { return threadExecutionMask; }
|
|
|
|
NEO::GraphicsAllocation *getPrintfBufferAllocation() override { return this->printfBuffer; }
|
|
void printPrintfOutput() override;
|
|
|
|
const uint8_t *getSurfaceStateHeapData() const override { return surfaceStateHeapData.get(); }
|
|
uint32_t getSurfaceStateHeapDataSize() const override { return surfaceStateHeapDataSize; }
|
|
|
|
const uint8_t *getDynamicStateHeapData() const override { return dynamicStateHeapData.get(); }
|
|
size_t getDynamicStateHeapDataSize() const override { return dynamicStateHeapDataSize; }
|
|
|
|
const KernelImmutableData *getImmutableData() const override { return kernelImmData; }
|
|
|
|
UnifiedMemoryControls getUnifiedMemoryControls() const override { return unifiedMemoryControls; }
|
|
bool hasIndirectAllocationsAllowed() const override;
|
|
|
|
bool hasBarriers() override;
|
|
uint32_t getSlmTotalSize() override;
|
|
uint32_t getBindingTableOffset() override;
|
|
uint32_t getBorderColor() override;
|
|
uint32_t getSamplerTableOffset() override;
|
|
uint32_t getNumSurfaceStates() override;
|
|
uint32_t getNumSamplers() override;
|
|
uint32_t getSimdSize() override;
|
|
uint32_t getSizeCrossThreadData() override;
|
|
uint32_t getPerThreadScratchSize() override;
|
|
uint32_t getThreadsPerThreadGroupCount() override;
|
|
uint32_t getSizePerThreadData() override;
|
|
uint32_t getSizePerThreadDataForWholeGroup() override;
|
|
uint32_t getSizeSurfaceStateHeapData() override;
|
|
uint32_t getPerThreadExecutionMask() override;
|
|
uint32_t *getCountOffsets() override;
|
|
uint32_t *getSizeOffsets() override;
|
|
uint32_t *getLocalWorkSize() override;
|
|
uint32_t getNumGrfRequired() override;
|
|
NEO::GraphicsAllocation *getIsaAllocation() override;
|
|
bool hasGroupCounts() override;
|
|
bool hasGroupSize() override;
|
|
const void *getSurfaceStateHeap() override;
|
|
const void *getDynamicStateHeap() override;
|
|
const void *getCrossThread() override;
|
|
const void *getPerThread() override;
|
|
|
|
protected:
|
|
KernelImp() = default;
|
|
|
|
void patchWorkgroupSizeInCrossThreadData(uint32_t x, uint32_t y, uint32_t z);
|
|
|
|
void createPrintfBuffer();
|
|
|
|
const KernelImmutableData *kernelImmData = nullptr;
|
|
Module *module = nullptr;
|
|
|
|
typedef ze_result_t (KernelImp::*KernelArgHandler)(uint32_t argIndex, size_t argSize, const void *argVal);
|
|
std::vector<KernelImp::KernelArgHandler> kernelArgHandlers;
|
|
std::vector<NEO::GraphicsAllocation *> residencyContainer;
|
|
|
|
NEO::GraphicsAllocation *printfBuffer = nullptr;
|
|
|
|
uint32_t groupSize[3] = {0u, 0u, 0u};
|
|
uint32_t threadsPerThreadGroup = 0u;
|
|
uint32_t threadExecutionMask = 0u;
|
|
|
|
std::unique_ptr<uint8_t[]> crossThreadData = 0;
|
|
uint32_t crossThreadDataSize = 0;
|
|
|
|
std::unique_ptr<uint8_t[]> surfaceStateHeapData = nullptr;
|
|
uint32_t surfaceStateHeapDataSize = 0;
|
|
|
|
std::unique_ptr<uint8_t[]> dynamicStateHeapData = nullptr;
|
|
uint32_t dynamicStateHeapDataSize = 0;
|
|
|
|
uint8_t *perThreadDataForWholeThreadGroup = nullptr;
|
|
uint32_t perThreadDataSizeForWholeThreadGroupAllocated = 0;
|
|
uint32_t perThreadDataSizeForWholeThreadGroup = 0u;
|
|
uint32_t perThreadDataSize = 0u;
|
|
|
|
UnifiedMemoryControls unifiedMemoryControls;
|
|
std::vector<uint32_t> slmArgSizes;
|
|
uint32_t slmArgsTotalSize = 0U;
|
|
};
|
|
|
|
} // namespace L0
|