mirror of
https://github.com/intel/compute-runtime.git
synced 2025-09-15 13:01:45 +08:00
Add kernel algorithm to check any argument is using system memory
Related-To: NEO-6959 Signed-off-by: Zbigniew Zdanowicz <zbigniew.zdanowicz@intel.com>
This commit is contained in:

committed by
Compute-Runtime-Automation

parent
5a3a39a281
commit
e07f9f0698
@ -4898,9 +4898,9 @@ cl_int CL_API_CALL clSetKernelArgSVMPointer(cl_kernel kernel,
|
||||
const void *argValue) {
|
||||
TRACING_ENTER(ClSetKernelArgSvmPointer, &kernel, &argIndex, &argValue);
|
||||
|
||||
MultiDeviceKernel *pMultiDeviceKernel = nullptr;
|
||||
MultiDeviceKernel *multiDeviceKernel = nullptr;
|
||||
|
||||
auto retVal = validateObjects(withCastToInternal(kernel, &pMultiDeviceKernel));
|
||||
auto retVal = validateObjects(withCastToInternal(kernel, &multiDeviceKernel));
|
||||
API_ENTER(&retVal);
|
||||
|
||||
if (CL_SUCCESS != retVal) {
|
||||
@ -4908,27 +4908,27 @@ cl_int CL_API_CALL clSetKernelArgSVMPointer(cl_kernel kernel,
|
||||
return retVal;
|
||||
}
|
||||
|
||||
if (argIndex >= pMultiDeviceKernel->getKernelArgsNumber()) {
|
||||
if (argIndex >= multiDeviceKernel->getKernelArgsNumber()) {
|
||||
retVal = CL_INVALID_ARG_INDEX;
|
||||
TRACING_EXIT(ClSetKernelArgSvmPointer, &retVal);
|
||||
return retVal;
|
||||
}
|
||||
|
||||
const auto svmManager = pMultiDeviceKernel->getContext().getSVMAllocsManager();
|
||||
const auto svmManager = multiDeviceKernel->getContext().getSVMAllocsManager();
|
||||
|
||||
if (argValue != nullptr) {
|
||||
if (pMultiDeviceKernel->getKernelArguments()[argIndex].allocId > 0 &&
|
||||
pMultiDeviceKernel->getKernelArguments()[argIndex].value == argValue) {
|
||||
if (multiDeviceKernel->getKernelArguments()[argIndex].allocId > 0 &&
|
||||
multiDeviceKernel->getKernelArguments()[argIndex].value == argValue) {
|
||||
bool reuseFromCache = false;
|
||||
const auto allocationsCounter = svmManager->allocationsCounter.load();
|
||||
if (allocationsCounter > 0) {
|
||||
if (allocationsCounter == pMultiDeviceKernel->getKernelArguments()[argIndex].allocIdMemoryManagerCounter) {
|
||||
if (allocationsCounter == multiDeviceKernel->getKernelArguments()[argIndex].allocIdMemoryManagerCounter) {
|
||||
reuseFromCache = true;
|
||||
} else {
|
||||
const auto svmData = svmManager->getSVMAlloc(argValue);
|
||||
if (svmData && pMultiDeviceKernel->getKernelArguments()[argIndex].allocId == svmData->getAllocId()) {
|
||||
if (svmData && multiDeviceKernel->getKernelArguments()[argIndex].allocId == svmData->getAllocId()) {
|
||||
reuseFromCache = true;
|
||||
pMultiDeviceKernel->storeKernelArgAllocIdMemoryManagerCounter(argIndex, allocationsCounter);
|
||||
multiDeviceKernel->storeKernelArgAllocIdMemoryManagerCounter(argIndex, allocationsCounter);
|
||||
}
|
||||
}
|
||||
if (reuseFromCache) {
|
||||
@ -4938,7 +4938,7 @@ cl_int CL_API_CALL clSetKernelArgSVMPointer(cl_kernel kernel,
|
||||
}
|
||||
}
|
||||
} else {
|
||||
if (pMultiDeviceKernel->getKernelArguments()[argIndex].isSetToNullptr) {
|
||||
if (multiDeviceKernel->getKernelArguments()[argIndex].isSetToNullptr) {
|
||||
TRACING_EXIT(ClSetKernelArgSvmPointer, &retVal);
|
||||
return CL_SUCCESS;
|
||||
}
|
||||
@ -4946,7 +4946,7 @@ cl_int CL_API_CALL clSetKernelArgSVMPointer(cl_kernel kernel,
|
||||
|
||||
DBG_LOG_INPUTS("kernel", kernel, "argIndex", argIndex, "argValue", argValue);
|
||||
|
||||
for (const auto &pDevice : pMultiDeviceKernel->getDevices()) {
|
||||
for (const auto &pDevice : multiDeviceKernel->getDevices()) {
|
||||
const HardwareInfo &hwInfo = pDevice->getHardwareInfo();
|
||||
if (!hwInfo.capabilityTable.ftrSvm) {
|
||||
retVal = CL_INVALID_OPERATION;
|
||||
@ -4955,8 +4955,8 @@ cl_int CL_API_CALL clSetKernelArgSVMPointer(cl_kernel kernel,
|
||||
}
|
||||
}
|
||||
|
||||
for (const auto &pDevice : pMultiDeviceKernel->getDevices()) {
|
||||
auto pKernel = pMultiDeviceKernel->getKernel(pDevice->getRootDeviceIndex());
|
||||
for (const auto &pDevice : multiDeviceKernel->getDevices()) {
|
||||
auto pKernel = multiDeviceKernel->getKernel(pDevice->getRootDeviceIndex());
|
||||
cl_int kernelArgAddressQualifier = asClKernelArgAddressQualifier(pKernel->getKernelInfo()
|
||||
.kernelDescriptor.payloadMappings.explicitArgs[argIndex]
|
||||
.getTraits()
|
||||
@ -4969,12 +4969,12 @@ cl_int CL_API_CALL clSetKernelArgSVMPointer(cl_kernel kernel,
|
||||
}
|
||||
}
|
||||
|
||||
MultiGraphicsAllocation *pSvmAllocs = nullptr;
|
||||
MultiGraphicsAllocation *svmAllocs = nullptr;
|
||||
uint32_t allocId = 0u;
|
||||
if (argValue != nullptr) {
|
||||
auto svmData = svmManager->getSVMAlloc(argValue);
|
||||
if (svmData == nullptr) {
|
||||
for (const auto &pDevice : pMultiDeviceKernel->getDevices()) {
|
||||
for (const auto &pDevice : multiDeviceKernel->getDevices()) {
|
||||
if (!pDevice->areSharedSystemAllocationsAllowed()) {
|
||||
retVal = CL_INVALID_ARG_VALUE;
|
||||
TRACING_EXIT(ClSetKernelArgSvmPointer, &retVal);
|
||||
@ -4982,12 +4982,12 @@ cl_int CL_API_CALL clSetKernelArgSVMPointer(cl_kernel kernel,
|
||||
}
|
||||
}
|
||||
} else {
|
||||
pSvmAllocs = &svmData->gpuAllocations;
|
||||
svmAllocs = &svmData->gpuAllocations;
|
||||
allocId = svmData->getAllocId();
|
||||
}
|
||||
}
|
||||
|
||||
retVal = pMultiDeviceKernel->setArgSvmAlloc(argIndex, const_cast<void *>(argValue), pSvmAllocs, allocId);
|
||||
retVal = multiDeviceKernel->setArgSvmAlloc(argIndex, const_cast<void *>(argValue), svmAllocs, allocId);
|
||||
TRACING_EXIT(ClSetKernelArgSvmPointer, &retVal);
|
||||
return retVal;
|
||||
}
|
||||
|
@ -447,6 +447,7 @@ void CommandQueueHw<GfxFamily>::processDispatchForKernels(const MultiDispatchInf
|
||||
dispatchWalkerArgs.timestampPacketDependencies = ×tampPacketDependencies;
|
||||
dispatchWalkerArgs.currentTimestampPacketNodes = timestampPacketContainer.get();
|
||||
dispatchWalkerArgs.commandType = commandType;
|
||||
dispatchWalkerArgs.event = event;
|
||||
|
||||
HardwareInterface<GfxFamily>::dispatchWalker(
|
||||
*this,
|
||||
|
@ -16,6 +16,7 @@ namespace NEO {
|
||||
|
||||
class CommandQueue;
|
||||
class DispatchInfo;
|
||||
class Event;
|
||||
class IndirectHeap;
|
||||
class Kernel;
|
||||
class LinearStream;
|
||||
@ -37,6 +38,7 @@ struct HardwareInterfaceWalkerArgs {
|
||||
const Vec3<size_t> *numberOfWorkgroups = nullptr;
|
||||
const Vec3<size_t> *startOfWorkgroups = nullptr;
|
||||
KernelOperation *blockedCommandsData = nullptr;
|
||||
Event *event = nullptr;
|
||||
size_t currentDispatchIndex = 0;
|
||||
size_t offsetInterfaceDescriptorTable = 0;
|
||||
PreemptionMode preemptionMode = PreemptionMode::Initial;
|
||||
|
@ -362,7 +362,7 @@ cl_int Kernel::cloneKernel(Kernel *pSourceKernel) {
|
||||
break;
|
||||
case SVM_OBJ:
|
||||
setArgSvm(i, pSourceKernel->getKernelArgInfo(i).size, const_cast<void *>(pSourceKernel->getKernelArgInfo(i).value),
|
||||
pSourceKernel->getKernelArgInfo(i).pSvmAlloc, pSourceKernel->getKernelArgInfo(i).svmFlags);
|
||||
pSourceKernel->getKernelArgInfo(i).svmAllocation, pSourceKernel->getKernelArgInfo(i).svmFlags);
|
||||
break;
|
||||
case SVM_ALLOC_OBJ:
|
||||
setArgSvmAlloc(i, const_cast<void *>(pSourceKernel->getKernelArgInfo(i).value),
|
||||
@ -881,8 +881,10 @@ cl_int Kernel::setArgSvm(uint32_t argIndex, size_t svmAllocSize, void *svmPtr, G
|
||||
patchedArgumentsNum++;
|
||||
kernelArguments[argIndex].isPatched = true;
|
||||
}
|
||||
if (svmPtr != nullptr) {
|
||||
this->anyKernelArgumentUsingSystemMemory |= true;
|
||||
}
|
||||
addAllocationToCacheFlushVector(argIndex, svmAlloc);
|
||||
|
||||
return CL_SUCCESS;
|
||||
}
|
||||
|
||||
@ -894,6 +896,8 @@ cl_int Kernel::setArgSvmAlloc(uint32_t argIndex, void *svmPtr, GraphicsAllocatio
|
||||
auto patchLocation = ptrOffset(getCrossThreadData(), argAsPtr.stateless);
|
||||
patchWithRequiredSize(patchLocation, argAsPtr.pointerSize, reinterpret_cast<uintptr_t>(svmPtr));
|
||||
|
||||
auto &kernelArgInfo = kernelArguments[argIndex];
|
||||
|
||||
bool disableL3 = false;
|
||||
bool forceNonAuxMode = false;
|
||||
bool isAuxTranslationKernel = (AuxTranslationDirection::None != auxTranslationDirection);
|
||||
@ -910,7 +914,7 @@ cl_int Kernel::setArgSvmAlloc(uint32_t argIndex, void *svmPtr, GraphicsAllocatio
|
||||
forceNonAuxMode = true;
|
||||
}
|
||||
|
||||
bool argWasUncacheable = kernelArguments[argIndex].isStatelessUncacheable;
|
||||
bool argWasUncacheable = kernelArgInfo.isStatelessUncacheable;
|
||||
bool argIsUncacheable = svmAlloc ? svmAlloc->isUncacheable() : false;
|
||||
statelessUncacheableArgsCount += (argIsUncacheable ? 1 : 0) - (argWasUncacheable ? 1 : 0);
|
||||
|
||||
@ -929,15 +933,21 @@ cl_int Kernel::setArgSvmAlloc(uint32_t argIndex, void *svmPtr, GraphicsAllocatio
|
||||
}
|
||||
|
||||
storeKernelArg(argIndex, SVM_ALLOC_OBJ, svmAlloc, svmPtr, sizeof(uintptr_t));
|
||||
kernelArguments[argIndex].allocId = allocId;
|
||||
kernelArguments[argIndex].allocIdMemoryManagerCounter = allocId ? this->getContext().getSVMAllocsManager()->allocationsCounter.load() : 0u;
|
||||
kernelArguments[argIndex].isSetToNullptr = nullptr == svmPtr;
|
||||
if (!kernelArguments[argIndex].isPatched) {
|
||||
kernelArgInfo.allocId = allocId;
|
||||
kernelArgInfo.allocIdMemoryManagerCounter = allocId ? this->getContext().getSVMAllocsManager()->allocationsCounter.load() : 0u;
|
||||
kernelArgInfo.isSetToNullptr = nullptr == svmPtr;
|
||||
if (!kernelArgInfo.isPatched) {
|
||||
patchedArgumentsNum++;
|
||||
kernelArguments[argIndex].isPatched = true;
|
||||
kernelArgInfo.isPatched = true;
|
||||
}
|
||||
if (!kernelArgInfo.isSetToNullptr) {
|
||||
if (svmAlloc != nullptr) {
|
||||
this->anyKernelArgumentUsingSystemMemory |= graphicsAllocationTypeUseSystemMemory(svmAlloc->getAllocationType());
|
||||
} else {
|
||||
this->anyKernelArgumentUsingSystemMemory |= true;
|
||||
}
|
||||
}
|
||||
addAllocationToCacheFlushVector(argIndex, svmAlloc);
|
||||
|
||||
return CL_SUCCESS;
|
||||
}
|
||||
|
||||
@ -948,7 +958,7 @@ void Kernel::storeKernelArg(uint32_t argIndex, kernelArgType argType, void *argO
|
||||
kernelArguments[argIndex].object = argObject;
|
||||
kernelArguments[argIndex].value = argValue;
|
||||
kernelArguments[argIndex].size = argSize;
|
||||
kernelArguments[argIndex].pSvmAlloc = argSvmAlloc;
|
||||
kernelArguments[argIndex].svmAllocation = argSvmAlloc;
|
||||
kernelArguments[argIndex].svmFlags = argSvmFlags;
|
||||
}
|
||||
|
||||
@ -1391,8 +1401,12 @@ cl_int Kernel::setArgBuffer(uint32_t argIndex,
|
||||
storeKernelArg(argIndex, BUFFER_OBJ, clMemObj, argVal, argSize);
|
||||
|
||||
auto buffer = castToObject<Buffer>(clMemObj);
|
||||
if (!buffer)
|
||||
if (!buffer) {
|
||||
return CL_INVALID_MEM_OBJECT;
|
||||
}
|
||||
|
||||
auto gfxAllocationType = buffer->getGraphicsAllocation(rootDeviceIndex)->getAllocationType();
|
||||
this->anyKernelArgumentUsingSystemMemory |= graphicsAllocationTypeUseSystemMemory(gfxAllocationType);
|
||||
|
||||
if (buffer->peekSharingHandler()) {
|
||||
usingSharedObjArgs = true;
|
||||
@ -1449,7 +1463,6 @@ cl_int Kernel::setArgBuffer(uint32_t argIndex,
|
||||
}
|
||||
|
||||
addAllocationToCacheFlushVector(argIndex, allocationForCacheFlush);
|
||||
|
||||
return CL_SUCCESS;
|
||||
} else {
|
||||
storeKernelArg(argIndex, BUFFER_OBJ, nullptr, argVal, argSize);
|
||||
@ -2237,4 +2250,11 @@ int Kernel::setKernelThreadArbitrationPolicy(uint32_t policy) {
|
||||
return CL_SUCCESS;
|
||||
}
|
||||
|
||||
bool Kernel::graphicsAllocationTypeUseSystemMemory(AllocationType type) {
|
||||
return (type == AllocationType::BUFFER_HOST_MEMORY) ||
|
||||
(type == AllocationType::EXTERNAL_HOST_PTR) ||
|
||||
(type == AllocationType::SVM_CPU) ||
|
||||
(type == AllocationType::SVM_ZERO_COPY);
|
||||
}
|
||||
|
||||
} // namespace NEO
|
||||
|
@ -59,16 +59,16 @@ class Kernel : public ReferenceTrackedObject<Kernel> {
|
||||
};
|
||||
|
||||
struct SimpleKernelArgInfo {
|
||||
kernelArgType type;
|
||||
cl_mem_flags svmFlags;
|
||||
void *object;
|
||||
const void *value;
|
||||
size_t size;
|
||||
GraphicsAllocation *pSvmAlloc;
|
||||
cl_mem_flags svmFlags;
|
||||
bool isPatched = false;
|
||||
bool isStatelessUncacheable = false;
|
||||
GraphicsAllocation *svmAllocation;
|
||||
kernelArgType type;
|
||||
uint32_t allocId;
|
||||
uint32_t allocIdMemoryManagerCounter;
|
||||
bool isPatched = false;
|
||||
bool isStatelessUncacheable = false;
|
||||
bool isSetToNullptr = false;
|
||||
};
|
||||
|
||||
@ -405,89 +405,11 @@ class Kernel : public ReferenceTrackedObject<Kernel> {
|
||||
const std::map<uint32_t, MemObj *> &getMemObjectsToMigrate() const { return migratableArgsMap; }
|
||||
ImplicitArgs *getImplicitArgs() const { return pImplicitArgs.get(); }
|
||||
const HardwareInfo &getHardwareInfo() const;
|
||||
bool isAnyKernelArgumentUsingSystemMemory() const {
|
||||
return anyKernelArgumentUsingSystemMemory;
|
||||
}
|
||||
|
||||
protected:
|
||||
void
|
||||
makeArgsResident(CommandStreamReceiver &commandStreamReceiver);
|
||||
|
||||
void *patchBufferOffset(const ArgDescPointer &argAsPtr, void *svmPtr, GraphicsAllocation *svmAlloc);
|
||||
|
||||
void patchWithImplicitSurface(void *ptrToPatchInCrossThreadData, GraphicsAllocation &allocation, const ArgDescPointer &arg);
|
||||
|
||||
Kernel(Program *programArg, const KernelInfo &kernelInfo, ClDevice &clDevice);
|
||||
void provideInitializationHints();
|
||||
|
||||
void markArgPatchedAndResolveArgs(uint32_t argIndex);
|
||||
void resolveArgs();
|
||||
|
||||
void reconfigureKernel();
|
||||
bool hasDirectStatelessAccessToSharedBuffer() const;
|
||||
bool hasDirectStatelessAccessToHostMemory() const;
|
||||
bool hasIndirectStatelessAccessToHostMemory() const;
|
||||
|
||||
void addAllocationToCacheFlushVector(uint32_t argIndex, GraphicsAllocation *argAllocation);
|
||||
bool allocationForCacheFlush(GraphicsAllocation *argAllocation) const;
|
||||
|
||||
const ClDevice &getDevice() const {
|
||||
return clDevice;
|
||||
}
|
||||
cl_int patchPrivateSurface();
|
||||
|
||||
bool containsStatelessWrites = true;
|
||||
const ExecutionEnvironment &executionEnvironment;
|
||||
Program *program;
|
||||
ClDevice &clDevice;
|
||||
const KernelInfo &kernelInfo;
|
||||
|
||||
std::vector<SimpleKernelArgInfo> kernelArguments;
|
||||
std::vector<KernelArgHandler> kernelArgHandlers;
|
||||
std::vector<GraphicsAllocation *> kernelSvmGfxAllocations;
|
||||
std::vector<GraphicsAllocation *> kernelUnifiedMemoryGfxAllocations;
|
||||
|
||||
AuxTranslationDirection auxTranslationDirection = AuxTranslationDirection::None;
|
||||
|
||||
bool usingSharedObjArgs = false;
|
||||
bool usingImages = false;
|
||||
bool usingImagesOnly = false;
|
||||
bool auxTranslationRequired = false;
|
||||
uint32_t patchedArgumentsNum = 0;
|
||||
uint32_t startOffset = 0;
|
||||
uint32_t statelessUncacheableArgsCount = 0;
|
||||
int32_t threadArbitrationPolicy = ThreadArbitrationPolicy::NotPresent;
|
||||
KernelExecutionType executionType = KernelExecutionType::Default;
|
||||
|
||||
std::vector<PatchInfoData> patchInfoDataList;
|
||||
std::unique_ptr<ImageTransformer> imageTransformer;
|
||||
std::map<uint32_t, MemObj *> migratableArgsMap{};
|
||||
|
||||
bool specialPipelineSelectMode = false;
|
||||
bool svmAllocationsRequireCacheFlush = false;
|
||||
std::vector<GraphicsAllocation *> kernelArgRequiresCacheFlush;
|
||||
UnifiedMemoryControls unifiedMemoryControls{};
|
||||
bool isUnifiedMemorySyncRequired = true;
|
||||
bool debugEnabled = false;
|
||||
uint32_t additionalKernelExecInfo = AdditionalKernelExecInfo::DisableOverdispatch;
|
||||
|
||||
uint32_t *maxWorkGroupSizeForCrossThreadData = &Kernel::dummyPatchLocation;
|
||||
uint32_t maxKernelWorkGroupSize = 0;
|
||||
uint32_t *dataParameterSimdSize = &Kernel::dummyPatchLocation;
|
||||
uint32_t *parentEventOffset = &Kernel::dummyPatchLocation;
|
||||
uint32_t *preferredWkgMultipleOffset = &Kernel::dummyPatchLocation;
|
||||
|
||||
size_t numberOfBindingTableStates = 0u;
|
||||
size_t localBindingTableOffset = 0u;
|
||||
|
||||
std::vector<size_t> slmSizes;
|
||||
uint32_t slmTotalSize = 0u;
|
||||
|
||||
std::unique_ptr<char[]> pSshLocal;
|
||||
uint32_t sshLocalSize = 0u;
|
||||
char *crossThreadData = nullptr;
|
||||
uint32_t crossThreadDataSize = 0u;
|
||||
|
||||
GraphicsAllocation *privateSurface = nullptr;
|
||||
uint64_t privateSurfaceSize = 0u;
|
||||
|
||||
struct KernelConfig {
|
||||
Vec3<size_t> gws;
|
||||
Vec3<size_t> lws;
|
||||
@ -523,15 +445,98 @@ class Kernel : public ReferenceTrackedObject<Kernel> {
|
||||
bool singleSubdevicePreferred = false;
|
||||
};
|
||||
|
||||
Kernel(Program *programArg, const KernelInfo &kernelInfo, ClDevice &clDevice);
|
||||
|
||||
void makeArgsResident(CommandStreamReceiver &commandStreamReceiver);
|
||||
|
||||
void *patchBufferOffset(const ArgDescPointer &argAsPtr, void *svmPtr, GraphicsAllocation *svmAlloc);
|
||||
|
||||
void patchWithImplicitSurface(void *ptrToPatchInCrossThreadData, GraphicsAllocation &allocation, const ArgDescPointer &arg);
|
||||
|
||||
void provideInitializationHints();
|
||||
|
||||
void markArgPatchedAndResolveArgs(uint32_t argIndex);
|
||||
void resolveArgs();
|
||||
|
||||
void reconfigureKernel();
|
||||
bool hasDirectStatelessAccessToSharedBuffer() const;
|
||||
bool hasDirectStatelessAccessToHostMemory() const;
|
||||
bool hasIndirectStatelessAccessToHostMemory() const;
|
||||
|
||||
void addAllocationToCacheFlushVector(uint32_t argIndex, GraphicsAllocation *argAllocation);
|
||||
bool allocationForCacheFlush(GraphicsAllocation *argAllocation) const;
|
||||
|
||||
const ClDevice &getDevice() const {
|
||||
return clDevice;
|
||||
}
|
||||
cl_int patchPrivateSurface();
|
||||
|
||||
bool hasTunningFinished(KernelSubmissionData &submissionData);
|
||||
bool hasRunFinished(TimestampPacketContainer *timestampContainer);
|
||||
bool graphicsAllocationTypeUseSystemMemory(AllocationType type);
|
||||
|
||||
UnifiedMemoryControls unifiedMemoryControls{};
|
||||
|
||||
std::map<uint32_t, MemObj *> migratableArgsMap{};
|
||||
|
||||
std::unordered_map<KernelConfig, KernelSubmissionData, KernelConfigHash> kernelSubmissionMap;
|
||||
bool singleSubdevicePreferredInCurrentEnqueue = false;
|
||||
|
||||
bool kernelHasIndirectAccess = true;
|
||||
MultiDeviceKernel *pMultiDeviceKernel = nullptr;
|
||||
std::vector<SimpleKernelArgInfo> kernelArguments;
|
||||
std::vector<KernelArgHandler> kernelArgHandlers;
|
||||
std::vector<GraphicsAllocation *> kernelSvmGfxAllocations;
|
||||
std::vector<GraphicsAllocation *> kernelUnifiedMemoryGfxAllocations;
|
||||
std::vector<PatchInfoData> patchInfoDataList;
|
||||
std::vector<GraphicsAllocation *> kernelArgRequiresCacheFlush;
|
||||
std::vector<size_t> slmSizes;
|
||||
|
||||
std::unique_ptr<ImageTransformer> imageTransformer;
|
||||
std::unique_ptr<char[]> pSshLocal;
|
||||
std::unique_ptr<ImplicitArgs> pImplicitArgs = nullptr;
|
||||
|
||||
uint64_t privateSurfaceSize = 0u;
|
||||
|
||||
size_t numberOfBindingTableStates = 0u;
|
||||
size_t localBindingTableOffset = 0u;
|
||||
|
||||
const ExecutionEnvironment &executionEnvironment;
|
||||
Program *program;
|
||||
ClDevice &clDevice;
|
||||
const KernelInfo &kernelInfo;
|
||||
GraphicsAllocation *privateSurface = nullptr;
|
||||
MultiDeviceKernel *pMultiDeviceKernel = nullptr;
|
||||
|
||||
uint32_t *maxWorkGroupSizeForCrossThreadData = &Kernel::dummyPatchLocation;
|
||||
uint32_t *dataParameterSimdSize = &Kernel::dummyPatchLocation;
|
||||
uint32_t *parentEventOffset = &Kernel::dummyPatchLocation;
|
||||
uint32_t *preferredWkgMultipleOffset = &Kernel::dummyPatchLocation;
|
||||
char *crossThreadData = nullptr;
|
||||
|
||||
AuxTranslationDirection auxTranslationDirection = AuxTranslationDirection::None;
|
||||
KernelExecutionType executionType = KernelExecutionType::Default;
|
||||
|
||||
int32_t threadArbitrationPolicy = ThreadArbitrationPolicy::NotPresent;
|
||||
|
||||
uint32_t patchedArgumentsNum = 0;
|
||||
uint32_t startOffset = 0;
|
||||
uint32_t statelessUncacheableArgsCount = 0;
|
||||
uint32_t additionalKernelExecInfo = AdditionalKernelExecInfo::DisableOverdispatch;
|
||||
uint32_t maxKernelWorkGroupSize = 0;
|
||||
uint32_t slmTotalSize = 0u;
|
||||
uint32_t sshLocalSize = 0u;
|
||||
uint32_t crossThreadDataSize = 0u;
|
||||
|
||||
bool containsStatelessWrites = true;
|
||||
bool usingSharedObjArgs = false;
|
||||
bool usingImages = false;
|
||||
bool usingImagesOnly = false;
|
||||
bool auxTranslationRequired = false;
|
||||
bool specialPipelineSelectMode = false;
|
||||
bool svmAllocationsRequireCacheFlush = false;
|
||||
bool isUnifiedMemorySyncRequired = true;
|
||||
bool debugEnabled = false;
|
||||
bool singleSubdevicePreferredInCurrentEnqueue = false;
|
||||
bool kernelHasIndirectAccess = true;
|
||||
bool anyKernelArgumentUsingSystemMemory = false;
|
||||
};
|
||||
|
||||
} // namespace NEO
|
||||
|
Reference in New Issue
Block a user