2017-12-21 07:45:38 +08:00
|
|
|
/*
|
2019-01-28 17:24:11 +08:00
|
|
|
* Copyright (C) 2017-2019 Intel Corporation
|
2017-12-21 07:45:38 +08:00
|
|
|
*
|
2018-09-18 15:11:08 +08:00
|
|
|
* SPDX-License-Identifier: MIT
|
2017-12-21 07:45:38 +08:00
|
|
|
*
|
|
|
|
*/
|
|
|
|
|
|
|
|
#pragma once
|
2019-10-03 16:17:22 +08:00
|
|
|
#include "core/helpers/preamble.h"
|
2019-06-17 21:27:33 +08:00
|
|
|
#include "core/unified_memory/unified_memory.h"
|
2017-12-21 07:45:38 +08:00
|
|
|
#include "runtime/api/cl_types.h"
|
|
|
|
#include "runtime/command_stream/thread_arbitration_policy.h"
|
|
|
|
#include "runtime/device_queue/device_queue.h"
|
2019-02-27 18:39:32 +08:00
|
|
|
#include "runtime/helpers/address_patch.h"
|
2017-12-21 07:45:38 +08:00
|
|
|
#include "runtime/helpers/base_object.h"
|
2018-08-07 21:09:16 +08:00
|
|
|
#include "runtime/helpers/properties_helper.h"
|
2017-12-21 07:45:38 +08:00
|
|
|
#include "runtime/os_interface/debug_settings_manager.h"
|
2019-02-27 18:39:32 +08:00
|
|
|
#include "runtime/program/kernel_info.h"
|
|
|
|
#include "runtime/program/program.h"
|
|
|
|
|
2017-12-21 07:45:38 +08:00
|
|
|
#include <vector>
|
|
|
|
|
2019-03-26 18:59:46 +08:00
|
|
|
namespace NEO {
|
2017-12-21 07:45:38 +08:00
|
|
|
struct CompletionStamp;
|
2018-08-07 21:09:16 +08:00
|
|
|
class Buffer;
|
2017-12-21 07:45:38 +08:00
|
|
|
class GraphicsAllocation;
|
2018-03-27 20:30:05 +08:00
|
|
|
class ImageTransformer;
|
2017-12-21 07:45:38 +08:00
|
|
|
class Surface;
|
|
|
|
class PrintfHandler;
|
|
|
|
|
|
|
|
template <>
|
|
|
|
struct OpenCLObjectMapper<_cl_kernel> {
|
|
|
|
typedef class Kernel DerivedType;
|
|
|
|
};
|
|
|
|
|
|
|
|
class Kernel : public BaseObject<_cl_kernel> {
|
|
|
|
public:
|
|
|
|
static const cl_ulong objectMagic = 0x3284ADC8EA0AFE25LL;
|
|
|
|
static const uint32_t kernelBinaryAlignement = 64;
|
|
|
|
|
|
|
|
enum kernelArgType {
|
|
|
|
NONE_OBJ,
|
|
|
|
IMAGE_OBJ,
|
|
|
|
BUFFER_OBJ,
|
|
|
|
PIPE_OBJ,
|
|
|
|
SVM_OBJ,
|
|
|
|
SVM_ALLOC_OBJ,
|
|
|
|
SAMPLER_OBJ,
|
|
|
|
ACCELERATOR_OBJ,
|
|
|
|
DEVICE_QUEUE_OBJ,
|
|
|
|
SLM_OBJ
|
|
|
|
};
|
|
|
|
|
|
|
|
struct SimpleKernelArgInfo {
|
|
|
|
kernelArgType type;
|
2018-08-10 19:42:52 +08:00
|
|
|
void *object;
|
2017-12-21 07:45:38 +08:00
|
|
|
const void *value;
|
|
|
|
size_t size;
|
|
|
|
GraphicsAllocation *pSvmAlloc;
|
|
|
|
cl_mem_flags svmFlags;
|
|
|
|
bool isPatched = false;
|
2019-08-30 15:55:44 +08:00
|
|
|
bool isStatelessUncacheable = false;
|
2017-12-21 07:45:38 +08:00
|
|
|
};
|
|
|
|
|
|
|
|
typedef int32_t (Kernel::*KernelArgHandler)(uint32_t argIndex,
|
|
|
|
size_t argSize,
|
|
|
|
const void *argVal);
|
|
|
|
|
|
|
|
template <typename kernel_t = Kernel, typename program_t = Program>
|
|
|
|
static kernel_t *create(Program *program, const KernelInfo &kernelInfo, cl_int *errcodeRet) {
|
|
|
|
cl_int retVal;
|
|
|
|
kernel_t *pKernel = nullptr;
|
|
|
|
|
|
|
|
do {
|
|
|
|
// copy the kernel data into our new allocation
|
|
|
|
pKernel = new kernel_t(program, kernelInfo, program->getDevice(0));
|
|
|
|
retVal = pKernel->initialize();
|
|
|
|
} while (false);
|
|
|
|
|
|
|
|
if (retVal != CL_SUCCESS) {
|
|
|
|
delete pKernel;
|
|
|
|
pKernel = nullptr;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (errcodeRet) {
|
|
|
|
*errcodeRet = retVal;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (DebugManager.debugKernelDumpingAvailable()) {
|
|
|
|
char *pSrc = nullptr;
|
|
|
|
unsigned int size = 0;
|
|
|
|
program->getSource(pSrc, size);
|
|
|
|
DebugManager.dumpKernel(kernelInfo.name, (pSrc != nullptr) ? std::string(pSrc) : std::string());
|
|
|
|
}
|
|
|
|
|
|
|
|
return pKernel;
|
|
|
|
}
|
|
|
|
|
|
|
|
Kernel &operator=(const Kernel &) = delete;
|
|
|
|
Kernel(const Kernel &) = delete;
|
|
|
|
|
|
|
|
~Kernel() override;
|
|
|
|
|
|
|
|
static bool isMemObj(kernelArgType kernelArg) {
|
|
|
|
return kernelArg == BUFFER_OBJ || kernelArg == IMAGE_OBJ || kernelArg == PIPE_OBJ;
|
|
|
|
}
|
|
|
|
|
2018-08-03 14:14:43 +08:00
|
|
|
bool isAuxTranslationRequired() const { return auxTranslationRequired; }
|
|
|
|
|
2017-12-21 07:45:38 +08:00
|
|
|
char *getCrossThreadData() const {
|
|
|
|
return crossThreadData;
|
|
|
|
}
|
|
|
|
|
|
|
|
uint32_t getCrossThreadDataSize() const {
|
|
|
|
return crossThreadDataSize;
|
|
|
|
}
|
|
|
|
|
|
|
|
cl_int initialize();
|
|
|
|
|
2018-08-07 15:22:55 +08:00
|
|
|
MOCKABLE_VIRTUAL cl_int cloneKernel(Kernel *pSourceKernel);
|
2017-12-21 07:45:38 +08:00
|
|
|
|
2018-03-27 20:30:05 +08:00
|
|
|
MOCKABLE_VIRTUAL bool canTransformImages() const;
|
2017-12-21 07:45:38 +08:00
|
|
|
MOCKABLE_VIRTUAL bool isPatched() const;
|
|
|
|
|
|
|
|
// API entry points
|
|
|
|
cl_int setArg(uint32_t argIndex, size_t argSize, const void *argVal);
|
2019-01-28 17:24:11 +08:00
|
|
|
cl_int setArgSvm(uint32_t argIndex, size_t svmAllocSize, void *svmPtr, GraphicsAllocation *svmAlloc, cl_mem_flags svmFlags);
|
2017-12-21 07:45:38 +08:00
|
|
|
cl_int setArgSvmAlloc(uint32_t argIndex, void *svmPtr, GraphicsAllocation *svmAlloc);
|
|
|
|
|
2019-06-14 13:10:45 +08:00
|
|
|
void setSvmKernelExecInfo(GraphicsAllocation *argValue);
|
|
|
|
void clearSvmKernelExecInfo();
|
2017-12-21 07:45:38 +08:00
|
|
|
|
|
|
|
cl_int getInfo(cl_kernel_info paramName, size_t paramValueSize,
|
|
|
|
void *paramValue, size_t *paramValueSizeRet) const;
|
2019-03-21 19:51:20 +08:00
|
|
|
void getAdditionalInfo(cl_kernel_info paramName, const void *¶mValue, size_t ¶mValueSizeRet) const;
|
2017-12-21 07:45:38 +08:00
|
|
|
|
|
|
|
cl_int getArgInfo(cl_uint argIndx, cl_kernel_arg_info paramName,
|
|
|
|
size_t paramValueSize, void *paramValue, size_t *paramValueSizeRet) const;
|
|
|
|
|
|
|
|
cl_int getWorkGroupInfo(cl_device_id device, cl_kernel_work_group_info paramName,
|
|
|
|
size_t paramValueSize, void *paramValue, size_t *paramValueSizeRet) const;
|
|
|
|
|
|
|
|
cl_int getSubGroupInfo(cl_kernel_sub_group_info paramName,
|
|
|
|
size_t inputValueSize, const void *inputValue,
|
|
|
|
size_t paramValueSize, void *paramValue,
|
|
|
|
size_t *paramValueSizeRet) const;
|
|
|
|
|
|
|
|
const void *getKernelHeap() const;
|
|
|
|
const void *getSurfaceStateHeap() const;
|
|
|
|
void *getSurfaceStateHeap();
|
|
|
|
const void *getDynamicStateHeap() const;
|
|
|
|
|
|
|
|
size_t getKernelHeapSize() const;
|
|
|
|
size_t getSurfaceStateHeapSize() const;
|
|
|
|
size_t getDynamicStateHeapSize() const;
|
2018-02-08 23:00:20 +08:00
|
|
|
size_t getNumberOfBindingTableStates() const;
|
|
|
|
size_t getBindingTableOffset() const {
|
|
|
|
return localBindingTableOffset;
|
|
|
|
}
|
|
|
|
|
|
|
|
void resizeSurfaceStateHeap(void *pNewSsh, size_t newSshSize, size_t newBindingTableCount, size_t newBindingTableOffset);
|
2017-12-21 07:45:38 +08:00
|
|
|
|
2018-01-24 20:26:46 +08:00
|
|
|
void substituteKernelHeap(void *newKernelHeap, size_t newKernelHeapSize);
|
2018-02-08 23:00:20 +08:00
|
|
|
bool isKernelHeapSubstituted() const;
|
2018-01-24 20:26:46 +08:00
|
|
|
uint64_t getKernelId() const;
|
|
|
|
void setKernelId(uint64_t newKernelId);
|
2018-06-04 15:09:04 +08:00
|
|
|
uint32_t getStartOffset() const;
|
|
|
|
void setStartOffset(uint32_t offset);
|
2018-01-24 20:26:46 +08:00
|
|
|
|
2017-12-21 07:45:38 +08:00
|
|
|
const std::vector<SimpleKernelArgInfo> &getKernelArguments() const {
|
|
|
|
return kernelArguments;
|
|
|
|
}
|
|
|
|
|
|
|
|
size_t getKernelArgsNumber() const {
|
|
|
|
return kernelInfo.kernelArgInfo.size();
|
|
|
|
}
|
|
|
|
|
|
|
|
uint32_t getKernelArgAddressQualifier(uint32_t argIndex) const {
|
|
|
|
return kernelInfo.kernelArgInfo[argIndex].addressQualifier;
|
|
|
|
}
|
|
|
|
|
|
|
|
bool requiresSshForBuffers() const {
|
|
|
|
return kernelInfo.requiresSshForBuffers;
|
|
|
|
}
|
|
|
|
|
|
|
|
const KernelInfo &getKernelInfo() const {
|
|
|
|
return kernelInfo;
|
|
|
|
}
|
|
|
|
|
2018-08-21 21:47:21 +08:00
|
|
|
const Device &getDevice() const {
|
2017-12-21 07:45:38 +08:00
|
|
|
return device;
|
|
|
|
}
|
|
|
|
|
|
|
|
Context &getContext() const {
|
|
|
|
return context ? *context : program->getContext();
|
|
|
|
}
|
|
|
|
|
|
|
|
void setContext(Context *context) {
|
|
|
|
this->context = context;
|
|
|
|
}
|
|
|
|
|
|
|
|
Program *getProgram() const { return program; }
|
|
|
|
|
|
|
|
static uint32_t getScratchSizeValueToProgramMediaVfeState(int scratchSize);
|
|
|
|
uint32_t getScratchSize() {
|
|
|
|
return kernelInfo.patchInfo.mediavfestate ? kernelInfo.patchInfo.mediavfestate->PerThreadScratchSpace : 0;
|
|
|
|
}
|
|
|
|
|
2019-06-27 20:06:19 +08:00
|
|
|
uint32_t getPrivateScratchSize() {
|
|
|
|
return kernelInfo.patchInfo.mediaVfeStateSlot1 ? kernelInfo.patchInfo.mediaVfeStateSlot1->PerThreadScratchSpace : 0;
|
|
|
|
}
|
|
|
|
|
2017-12-21 07:45:38 +08:00
|
|
|
void createReflectionSurface();
|
|
|
|
template <bool mockable = false>
|
|
|
|
void patchReflectionSurface(DeviceQueue *devQueue, PrintfHandler *printfHandler);
|
|
|
|
|
|
|
|
void patchDefaultDeviceQueue(DeviceQueue *devQueue);
|
|
|
|
void patchEventPool(DeviceQueue *devQueue);
|
|
|
|
void patchBlocksSimdSize();
|
|
|
|
|
|
|
|
GraphicsAllocation *getKernelReflectionSurface() const {
|
|
|
|
return kernelReflectionSurface;
|
|
|
|
}
|
|
|
|
|
|
|
|
size_t getInstructionHeapSizeForExecutionModel() const;
|
|
|
|
|
|
|
|
// Helpers
|
|
|
|
cl_int setArg(uint32_t argIndex, uint32_t argValue);
|
|
|
|
cl_int setArg(uint32_t argIndex, cl_mem argValue);
|
2018-03-12 23:32:08 +08:00
|
|
|
cl_int setArg(uint32_t argIndex, cl_mem argValue, uint32_t mipLevel);
|
2017-12-21 07:45:38 +08:00
|
|
|
|
|
|
|
// Handlers
|
|
|
|
void setKernelArgHandler(uint32_t argIndex, KernelArgHandler handler);
|
|
|
|
|
|
|
|
void unsetArg(uint32_t argIndex);
|
|
|
|
|
|
|
|
cl_int setArgImmediate(uint32_t argIndex,
|
|
|
|
size_t argSize,
|
|
|
|
const void *argVal);
|
|
|
|
|
|
|
|
cl_int setArgBuffer(uint32_t argIndex,
|
|
|
|
size_t argSize,
|
|
|
|
const void *argVal);
|
|
|
|
|
|
|
|
cl_int setArgPipe(uint32_t argIndex,
|
|
|
|
size_t argSize,
|
|
|
|
const void *argVal);
|
|
|
|
|
|
|
|
cl_int setArgImage(uint32_t argIndex,
|
|
|
|
size_t argSize,
|
|
|
|
const void *argVal);
|
|
|
|
|
2018-03-12 23:32:08 +08:00
|
|
|
cl_int setArgImageWithMipLevel(uint32_t argIndex,
|
|
|
|
size_t argSize,
|
|
|
|
const void *argVal, uint32_t mipLevel);
|
|
|
|
|
2017-12-21 07:45:38 +08:00
|
|
|
cl_int setArgLocal(uint32_t argIndex,
|
|
|
|
size_t argSize,
|
|
|
|
const void *argVal);
|
|
|
|
|
|
|
|
cl_int setArgSampler(uint32_t argIndex,
|
|
|
|
size_t argSize,
|
|
|
|
const void *argVal);
|
|
|
|
|
|
|
|
cl_int setArgAccelerator(uint32_t argIndex,
|
|
|
|
size_t argSize,
|
|
|
|
const void *argVal);
|
|
|
|
|
|
|
|
cl_int setArgDevQueue(uint32_t argIndex,
|
|
|
|
size_t argSize,
|
|
|
|
const void *argVal);
|
|
|
|
|
|
|
|
void storeKernelArg(uint32_t argIndex,
|
|
|
|
kernelArgType argType,
|
2018-08-10 19:42:52 +08:00
|
|
|
void *argObject,
|
2017-12-21 07:45:38 +08:00
|
|
|
const void *argValue,
|
|
|
|
size_t argSize,
|
|
|
|
GraphicsAllocation *argSvmAlloc = nullptr,
|
|
|
|
cl_mem_flags argSvmFlags = 0);
|
|
|
|
const void *getKernelArg(uint32_t argIndex) const;
|
|
|
|
const SimpleKernelArgInfo &getKernelArgInfo(uint32_t argIndex) const;
|
|
|
|
|
2018-11-14 15:40:37 +08:00
|
|
|
bool getAllowNonUniform() const { return program->getAllowNonUniform(); }
|
|
|
|
bool isVmeKernel() const { return kernelInfo.isVmeWorkload; }
|
|
|
|
bool requiresSpecialPipelineSelectMode() const { return specialPipelineSelectMode; }
|
2017-12-21 07:45:38 +08:00
|
|
|
|
|
|
|
//residency for kernel surfaces
|
2018-02-16 16:15:36 +08:00
|
|
|
MOCKABLE_VIRTUAL void makeResident(CommandStreamReceiver &commandStreamReceiver);
|
|
|
|
MOCKABLE_VIRTUAL void getResidency(std::vector<Surface *> &dst);
|
2017-12-21 07:45:38 +08:00
|
|
|
bool requiresCoherency();
|
|
|
|
void resetSharedObjectsPatchAddresses();
|
2018-11-14 15:40:37 +08:00
|
|
|
bool isUsingSharedObjArgs() const { return usingSharedObjArgs; }
|
2019-08-30 15:55:44 +08:00
|
|
|
bool hasUncacheableStatelessArgs() const { return statelessUncacheableArgsCount > 0; }
|
2017-12-21 07:45:38 +08:00
|
|
|
|
|
|
|
bool hasPrintfOutput() const;
|
|
|
|
|
|
|
|
void setReflectionSurfaceBlockBtOffset(uint32_t blockID, uint32_t offset) {
|
|
|
|
DEBUG_BREAK_IF(blockID >= program->getBlockKernelManager()->getCount());
|
|
|
|
ReflectionSurfaceHelper::setKernelAddressDataBtOffset(getKernelReflectionSurface()->getUnderlyingBuffer(), blockID, offset);
|
|
|
|
}
|
|
|
|
|
|
|
|
cl_int checkCorrectImageAccessQualifier(cl_uint argIndex,
|
|
|
|
size_t argSize,
|
|
|
|
const void *argValue) const;
|
|
|
|
|
|
|
|
uint32_t *globalWorkOffsetX;
|
|
|
|
uint32_t *globalWorkOffsetY;
|
|
|
|
uint32_t *globalWorkOffsetZ;
|
|
|
|
|
|
|
|
uint32_t *localWorkSizeX;
|
|
|
|
uint32_t *localWorkSizeY;
|
|
|
|
uint32_t *localWorkSizeZ;
|
|
|
|
|
|
|
|
uint32_t *localWorkSizeX2;
|
|
|
|
uint32_t *localWorkSizeY2;
|
|
|
|
uint32_t *localWorkSizeZ2;
|
|
|
|
|
|
|
|
uint32_t *globalWorkSizeX;
|
|
|
|
uint32_t *globalWorkSizeY;
|
|
|
|
uint32_t *globalWorkSizeZ;
|
|
|
|
|
|
|
|
uint32_t *enqueuedLocalWorkSizeX;
|
|
|
|
uint32_t *enqueuedLocalWorkSizeY;
|
|
|
|
uint32_t *enqueuedLocalWorkSizeZ;
|
|
|
|
|
|
|
|
uint32_t *numWorkGroupsX;
|
|
|
|
uint32_t *numWorkGroupsY;
|
|
|
|
uint32_t *numWorkGroupsZ;
|
|
|
|
|
2019-09-13 20:09:49 +08:00
|
|
|
uint32_t *maxWorkGroupSizeForCrossThreadData;
|
|
|
|
uint32_t maxKernelWorkGroupSize = 0;
|
2017-12-21 07:45:38 +08:00
|
|
|
uint32_t *workDim;
|
|
|
|
uint32_t *dataParameterSimdSize;
|
|
|
|
uint32_t *parentEventOffset;
|
2019-07-17 23:45:52 +08:00
|
|
|
uint32_t *preferredWkgMultipleOffset;
|
2017-12-21 07:45:38 +08:00
|
|
|
|
|
|
|
static uint32_t dummyPatchLocation;
|
|
|
|
|
|
|
|
std::vector<size_t> slmSizes;
|
|
|
|
|
2019-03-21 19:51:20 +08:00
|
|
|
uint32_t allBufferArgsStateful = CL_TRUE;
|
|
|
|
|
2017-12-21 07:45:38 +08:00
|
|
|
uint32_t slmTotalSize;
|
|
|
|
bool isBuiltIn;
|
|
|
|
const bool isParentKernel;
|
|
|
|
const bool isSchedulerKernel;
|
|
|
|
|
2018-02-20 15:11:24 +08:00
|
|
|
template <typename GfxFamily>
|
2017-12-21 07:45:38 +08:00
|
|
|
uint32_t getThreadArbitrationPolicy() {
|
|
|
|
if (kernelInfo.patchInfo.executionEnvironment->SubgroupIndependentForwardProgressRequired) {
|
2018-02-20 15:11:24 +08:00
|
|
|
return PreambleHelper<GfxFamily>::getDefaultThreadArbitrationPolicy();
|
2017-12-21 07:45:38 +08:00
|
|
|
} else {
|
2018-02-20 15:11:24 +08:00
|
|
|
return ThreadArbitrationPolicy::AgeBased;
|
2017-12-21 07:45:38 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
bool checkIfIsParentKernelAndBlocksUsesPrintf() {
|
|
|
|
return isParentKernel && getProgram()->getBlockKernelManager()->getIfBlockUsesPrintf();
|
|
|
|
}
|
|
|
|
|
2018-04-03 22:06:37 +08:00
|
|
|
bool is32Bit() const {
|
|
|
|
return kernelInfo.gpuPointerSize == 4;
|
|
|
|
}
|
|
|
|
|
2018-03-19 17:11:30 +08:00
|
|
|
int32_t getDebugSurfaceBti() const {
|
|
|
|
if (kernelInfo.patchInfo.pAllocateSystemThreadSurface) {
|
|
|
|
return kernelInfo.patchInfo.pAllocateSystemThreadSurface->BTI;
|
|
|
|
}
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
|
|
|
size_t getPerThreadSystemThreadSurfaceSize() const {
|
|
|
|
if (kernelInfo.patchInfo.pAllocateSystemThreadSurface) {
|
|
|
|
return kernelInfo.patchInfo.pAllocateSystemThreadSurface->PerThreadSystemThreadSurfaceSize;
|
|
|
|
}
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2018-03-14 18:07:51 +08:00
|
|
|
std::vector<PatchInfoData> &getPatchInfoDataList() { return patchInfoDataList; };
|
2018-08-07 15:49:47 +08:00
|
|
|
bool usesOnlyImages() const {
|
|
|
|
return usingImagesOnly;
|
|
|
|
}
|
2018-03-14 18:07:51 +08:00
|
|
|
|
2018-12-17 22:23:35 +08:00
|
|
|
void fillWithBuffersForAuxTranslation(MemObjsForAuxTranslation &memObjsForAuxTranslation);
|
2018-08-07 21:09:16 +08:00
|
|
|
|
2019-02-21 23:59:10 +08:00
|
|
|
MOCKABLE_VIRTUAL bool requiresCacheFlushCommand(const CommandQueue &commandQueue) const;
|
2018-12-06 22:33:02 +08:00
|
|
|
|
2019-01-31 21:47:55 +08:00
|
|
|
using CacheFlushAllocationsVec = StackVec<GraphicsAllocation *, 32>;
|
|
|
|
void getAllocationsForCacheFlush(CacheFlushAllocationsVec &out) const;
|
|
|
|
|
2019-08-13 17:07:47 +08:00
|
|
|
void setAuxTranslationDirection(AuxTranslationDirection auxTranslationDirection) {
|
|
|
|
this->auxTranslationDirection = auxTranslationDirection;
|
2019-02-06 04:41:51 +08:00
|
|
|
}
|
2019-07-04 18:17:42 +08:00
|
|
|
void setUnifiedMemorySyncRequirement(bool isUnifiedMemorySyncRequired) {
|
|
|
|
this->isUnifiedMemorySyncRequired = isUnifiedMemorySyncRequired;
|
|
|
|
}
|
2019-06-13 21:49:35 +08:00
|
|
|
void setUnifiedMemoryProperty(cl_kernel_exec_info infoType, bool infoValue);
|
2019-06-14 18:48:40 +08:00
|
|
|
void setUnifiedMemoryExecInfo(GraphicsAllocation *argValue);
|
|
|
|
void clearUnifiedMemoryExecInfo();
|
2019-02-06 04:41:51 +08:00
|
|
|
|
2019-08-23 16:35:57 +08:00
|
|
|
bool areStatelessWritesUsed() { return containsStatelessWrites; }
|
|
|
|
|
2017-12-21 07:45:38 +08:00
|
|
|
protected:
|
|
|
|
struct ObjectCounts {
|
|
|
|
uint32_t imageCount;
|
|
|
|
uint32_t samplerCount;
|
|
|
|
};
|
|
|
|
|
|
|
|
class ReflectionSurfaceHelper {
|
|
|
|
public:
|
|
|
|
static const uint64_t undefinedOffset = (uint64_t)-1;
|
|
|
|
|
|
|
|
static void setKernelDataHeader(void *reflectionSurface, uint32_t numberOfBlocks,
|
|
|
|
uint32_t parentImages, uint32_t parentSamplers,
|
|
|
|
uint32_t imageOffset, uint32_t samplerOffset) {
|
|
|
|
IGIL_KernelDataHeader *kernelDataHeader = reinterpret_cast<IGIL_KernelDataHeader *>(reflectionSurface);
|
|
|
|
kernelDataHeader->m_numberOfKernels = numberOfBlocks;
|
|
|
|
kernelDataHeader->m_ParentKernelImageCount = parentImages;
|
|
|
|
kernelDataHeader->m_ParentSamplerCount = parentSamplers;
|
|
|
|
kernelDataHeader->m_ParentImageDataOffset = imageOffset;
|
|
|
|
kernelDataHeader->m_ParentSamplerParamsOffset = samplerOffset;
|
|
|
|
}
|
|
|
|
|
|
|
|
static uint32_t setKernelData(void *reflectionSurface, uint32_t offset,
|
|
|
|
std::vector<IGIL_KernelCurbeParams> &curbeParamsIn,
|
|
|
|
uint64_t tokenMaskIn, size_t maxConstantBufferSize,
|
|
|
|
size_t samplerCount, const KernelInfo &kernelInfo,
|
|
|
|
const HardwareInfo &hwInfo);
|
|
|
|
|
|
|
|
static void setKernelAddressData(void *reflectionSurface, uint32_t offset,
|
|
|
|
uint32_t kernelDataOffset, uint32_t samplerHeapOffset,
|
|
|
|
uint32_t constantBufferOffset, uint32_t samplerParamsOffset,
|
|
|
|
uint32_t sshTokensOffset, uint32_t btOffset,
|
|
|
|
const KernelInfo &kernelInfo, const HardwareInfo &hwInfo);
|
|
|
|
|
|
|
|
static void getCurbeParams(std::vector<IGIL_KernelCurbeParams> &curbeParamsOut,
|
|
|
|
uint64_t &tokenMaskOut, uint32_t &firstSSHTokenIndex,
|
|
|
|
const KernelInfo &kernelInfo, const HardwareInfo &hwInfo);
|
|
|
|
|
|
|
|
static bool compareFunction(IGIL_KernelCurbeParams argFirst, IGIL_KernelCurbeParams argSecond) {
|
|
|
|
if (argFirst.m_parameterType == argSecond.m_parameterType) {
|
|
|
|
if (argFirst.m_parameterType == iOpenCL::DATA_PARAMETER_LOCAL_WORK_SIZE) {
|
|
|
|
return argFirst.m_patchOffset < argSecond.m_patchOffset;
|
|
|
|
} else {
|
|
|
|
return argFirst.m_sourceOffset < argSecond.m_sourceOffset;
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
return argFirst.m_parameterType < argSecond.m_parameterType;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static void setKernelAddressDataBtOffset(void *reflectionSurface, uint32_t blockID, uint32_t btOffset);
|
|
|
|
|
|
|
|
static void setParentImageParams(void *reflectionSurface, std::vector<Kernel::SimpleKernelArgInfo> &parentArguments, const KernelInfo &parentKernelInfo);
|
|
|
|
static void setParentSamplerParams(void *reflectionSurface, std::vector<Kernel::SimpleKernelArgInfo> &parentArguments, const KernelInfo &parentKernelInfo);
|
|
|
|
|
|
|
|
template <bool mockable = false>
|
|
|
|
static void patchBlocksCurbe(void *reflectionSurface, uint32_t blockID,
|
|
|
|
uint64_t defaultDeviceQueueCurbeOffset, uint32_t patchSizeDefaultQueue, uint64_t defaultDeviceQueueGpuAddress,
|
|
|
|
uint64_t eventPoolCurbeOffset, uint32_t patchSizeEventPool, uint64_t eventPoolGpuAddress,
|
|
|
|
uint64_t deviceQueueCurbeOffset, uint32_t patchSizeDeviceQueue, uint64_t deviceQueueGpuAddress,
|
|
|
|
uint64_t printfBufferOffset, uint32_t printfBufferSize, uint64_t printfBufferGpuAddress,
|
|
|
|
uint64_t privateSurfaceOffset, uint32_t privateSurfaceSize, uint64_t privateSurfaceGpuAddress);
|
|
|
|
|
|
|
|
static void patchBlocksCurbeWithConstantValues(void *reflectionSurface, uint32_t blockID,
|
|
|
|
uint64_t globalMemoryCurbeOffset, uint32_t globalMemoryPatchSize, uint64_t globalMemoryGpuAddress,
|
|
|
|
uint64_t constantMemoryCurbeOffset, uint32_t constantMemoryPatchSize, uint64_t constantMemoryGpuAddress,
|
|
|
|
uint64_t privateMemoryCurbeOffset, uint32_t privateMemoryPatchSize, uint64_t privateMemoryGpuAddress);
|
|
|
|
};
|
|
|
|
|
2019-06-13 21:49:35 +08:00
|
|
|
void
|
|
|
|
makeArgsResident(CommandStreamReceiver &commandStreamReceiver);
|
2017-12-21 07:45:38 +08:00
|
|
|
|
|
|
|
void *patchBufferOffset(const KernelArgInfo &argInfo, void *svmPtr, GraphicsAllocation *svmAlloc);
|
|
|
|
|
|
|
|
// Sets-up both crossThreadData and ssh for given implicit (private/constant, etc.) allocation
|
|
|
|
template <typename PatchTokenT>
|
|
|
|
void patchWithImplicitSurface(void *ptrToPatchInCrossThreadData, GraphicsAllocation &allocation, const PatchTokenT &patch);
|
|
|
|
|
|
|
|
void getParentObjectCounts(ObjectCounts &objectCount);
|
|
|
|
Kernel(Program *programArg, const KernelInfo &kernelInfoArg, const Device &deviceArg, bool schedulerKernel = false);
|
|
|
|
void provideInitializationHints();
|
|
|
|
|
|
|
|
void patchBlocksCurbeWithConstantValues();
|
|
|
|
|
2018-03-27 20:30:05 +08:00
|
|
|
void resolveArgs();
|
|
|
|
|
2018-09-21 20:06:35 +08:00
|
|
|
void reconfigureKernel();
|
|
|
|
|
2018-12-06 22:33:02 +08:00
|
|
|
void addAllocationToCacheFlushVector(uint32_t argIndex, GraphicsAllocation *argAllocation);
|
2019-01-31 21:47:55 +08:00
|
|
|
bool allocationForCacheFlush(GraphicsAllocation *argAllocation) const;
|
2017-12-21 07:45:38 +08:00
|
|
|
Program *program;
|
|
|
|
Context *context;
|
|
|
|
const Device &device;
|
|
|
|
const KernelInfo &kernelInfo;
|
|
|
|
|
|
|
|
std::vector<SimpleKernelArgInfo> kernelArguments;
|
|
|
|
std::vector<KernelArgHandler> kernelArgHandlers;
|
|
|
|
std::vector<GraphicsAllocation *> kernelSvmGfxAllocations;
|
2019-06-14 18:48:40 +08:00
|
|
|
std::vector<GraphicsAllocation *> kernelUnifiedMemoryGfxAllocations;
|
2017-12-21 07:45:38 +08:00
|
|
|
|
2019-08-13 17:07:47 +08:00
|
|
|
AuxTranslationDirection auxTranslationDirection = AuxTranslationDirection::None;
|
2019-02-06 04:41:51 +08:00
|
|
|
|
2018-02-08 23:00:20 +08:00
|
|
|
size_t numberOfBindingTableStates;
|
|
|
|
size_t localBindingTableOffset;
|
2018-08-03 14:14:43 +08:00
|
|
|
std::unique_ptr<char[]> pSshLocal;
|
2017-12-21 07:45:38 +08:00
|
|
|
uint32_t sshLocalSize;
|
|
|
|
|
|
|
|
char *crossThreadData;
|
|
|
|
uint32_t crossThreadDataSize;
|
|
|
|
|
|
|
|
GraphicsAllocation *privateSurface;
|
2018-04-16 16:15:41 +08:00
|
|
|
uint64_t privateSurfaceSize;
|
2017-12-21 07:45:38 +08:00
|
|
|
|
|
|
|
GraphicsAllocation *kernelReflectionSurface;
|
|
|
|
|
|
|
|
bool usingSharedObjArgs;
|
2018-08-07 15:49:47 +08:00
|
|
|
bool usingImagesOnly = false;
|
2018-08-03 14:14:43 +08:00
|
|
|
bool auxTranslationRequired = false;
|
2019-08-23 16:35:57 +08:00
|
|
|
bool containsStatelessWrites = true;
|
2017-12-21 07:45:38 +08:00
|
|
|
uint32_t patchedArgumentsNum = 0;
|
2018-06-04 15:09:04 +08:00
|
|
|
uint32_t startOffset = 0;
|
2019-08-30 15:55:44 +08:00
|
|
|
uint32_t statelessUncacheableArgsCount = 0;
|
2018-03-14 18:07:51 +08:00
|
|
|
|
|
|
|
std::vector<PatchInfoData> patchInfoDataList;
|
2018-03-27 20:30:05 +08:00
|
|
|
std::unique_ptr<ImageTransformer> imageTransformer;
|
2018-11-14 15:40:37 +08:00
|
|
|
|
|
|
|
bool specialPipelineSelectMode = false;
|
2018-12-06 22:33:02 +08:00
|
|
|
bool svmAllocationsRequireCacheFlush = false;
|
|
|
|
std::vector<GraphicsAllocation *> kernelArgRequiresCacheFlush;
|
2019-06-13 21:49:35 +08:00
|
|
|
UnifiedMemoryControls unifiedMemoryControls;
|
2019-07-04 18:17:42 +08:00
|
|
|
bool isUnifiedMemorySyncRequired = true;
|
2017-12-21 07:45:38 +08:00
|
|
|
};
|
2019-03-26 18:59:46 +08:00
|
|
|
} // namespace NEO
|