mirror of
https://github.com/intel/compute-runtime.git
synced 2026-01-03 23:03:02 +08:00
Store multiple Kernels in MultiDeviceKernel
Related-To: NEO-5001 Signed-off-by: Mateusz Jablonski <mateusz.jablonski@intel.com>
This commit is contained in:
committed by
Compute-Runtime-Automation
parent
d7f67ad22b
commit
b07f0e81b7
@@ -9,11 +9,22 @@
|
||||
namespace NEO {
|
||||
|
||||
MultiDeviceKernel::~MultiDeviceKernel() {
|
||||
kernel->decRefInternal();
|
||||
for (auto &pKernel : kernels) {
|
||||
if (pKernel) {
|
||||
pKernel->decRefInternal();
|
||||
}
|
||||
}
|
||||
}
|
||||
MultiDeviceKernel::MultiDeviceKernel(Kernel *pKernel) : kernel(pKernel) {
|
||||
pKernel->incRefInternal();
|
||||
pKernel->setMultiDeviceKernel(this);
|
||||
MultiDeviceKernel::MultiDeviceKernel(KernelVectorType kernelVector) : kernels(std::move(kernelVector)) {
|
||||
for (auto &pKernel : kernels) {
|
||||
if (pKernel) {
|
||||
if (!defaultKernel) {
|
||||
defaultKernel = kernels[(*pKernel->getDevices().begin())->getRootDeviceIndex()];
|
||||
}
|
||||
pKernel->incRefInternal();
|
||||
pKernel->setMultiDeviceKernel(this);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace NEO
|
||||
|
||||
@@ -14,48 +14,59 @@ struct OpenCLObjectMapper<_cl_kernel> {
|
||||
typedef class MultiDeviceKernel DerivedType;
|
||||
};
|
||||
|
||||
using KernelVectorType = StackVec<Kernel *, 4>;
|
||||
|
||||
class MultiDeviceKernel : public BaseObject<_cl_kernel> {
|
||||
public:
|
||||
static const cl_ulong objectMagic = 0x3284ADC8EA0AFE25LL;
|
||||
|
||||
~MultiDeviceKernel() override;
|
||||
MultiDeviceKernel(Kernel *pKernel);
|
||||
MultiDeviceKernel(KernelVectorType kernelVector);
|
||||
|
||||
Kernel *getKernel(uint32_t rootDeviceIndex) const { return kernel; }
|
||||
Kernel *getDefaultKernel() const { return kernel; }
|
||||
Kernel *getKernel(uint32_t rootDeviceIndex) const { return kernels[rootDeviceIndex]; }
|
||||
Kernel *getDefaultKernel() const { return defaultKernel; }
|
||||
|
||||
template <typename kernel_t = Kernel, typename program_t = Program, typename multi_device_kernel_t = MultiDeviceKernel>
|
||||
static multi_device_kernel_t *create(program_t *program, const KernelInfoContainer &kernelInfos, cl_int *errcodeRet) {
|
||||
KernelVectorType kernels{};
|
||||
kernels.resize(program->getMaxRootDeviceIndex() + 1);
|
||||
|
||||
auto pKernel = Kernel::create<kernel_t, program_t>(program, kernelInfos, errcodeRet);
|
||||
auto pMultiDeviceKernel = new multi_device_kernel_t(pKernel);
|
||||
for (auto &pDevice : program->getDevices()) {
|
||||
auto rootDeviceIndex = pDevice->getRootDeviceIndex();
|
||||
if (kernels[rootDeviceIndex]) {
|
||||
continue;
|
||||
}
|
||||
kernels[rootDeviceIndex] = Kernel::create<kernel_t, program_t>(program, kernelInfos, errcodeRet);
|
||||
}
|
||||
auto pMultiDeviceKernel = new multi_device_kernel_t(std::move(kernels));
|
||||
|
||||
return pMultiDeviceKernel;
|
||||
}
|
||||
|
||||
cl_int cloneKernel(Kernel *pSourceKernel) { return kernel->cloneKernel(pSourceKernel); }
|
||||
const std::vector<Kernel::SimpleKernelArgInfo> &getKernelArguments() const { return kernel->getKernelArguments(); }
|
||||
cl_int checkCorrectImageAccessQualifier(cl_uint argIndex, size_t argSize, const void *argValue) const { return kernel->checkCorrectImageAccessQualifier(argIndex, argSize, argValue); }
|
||||
void unsetArg(uint32_t argIndex) { return kernel->unsetArg(argIndex); }
|
||||
cl_int setArg(uint32_t argIndex, size_t argSize, const void *argVal) { return kernel->setArg(argIndex, argSize, argVal); }
|
||||
cl_int getInfo(cl_kernel_info paramName, size_t paramValueSize, void *paramValue, size_t *paramValueSizeRet) const { return kernel->getInfo(paramName, paramValueSize, paramValue, paramValueSizeRet); }
|
||||
cl_int getArgInfo(cl_uint argIndx, cl_kernel_arg_info paramName, size_t paramValueSize, void *paramValue, size_t *paramValueSizeRet) const { return kernel->getArgInfo(argIndx, paramName, paramValueSize, paramValue, paramValueSizeRet); }
|
||||
const ClDeviceVector &getDevices() const { return kernel->getDevices(); }
|
||||
size_t getKernelArgsNumber() const { return kernel->getKernelArgsNumber(); }
|
||||
Context &getContext() const { return kernel->getContext(); }
|
||||
cl_int setArgSvmAlloc(uint32_t argIndex, void *svmPtr, GraphicsAllocation *svmAlloc) { return kernel->setArgSvmAlloc(argIndex, svmPtr, svmAlloc); }
|
||||
bool getHasIndirectAccess() const { return kernel->getHasIndirectAccess(); }
|
||||
void setUnifiedMemoryProperty(cl_kernel_exec_info infoType, bool infoValue) { return kernel->setUnifiedMemoryProperty(infoType, infoValue); }
|
||||
void setSvmKernelExecInfo(GraphicsAllocation *argValue) { return kernel->setSvmKernelExecInfo(argValue); }
|
||||
void clearSvmKernelExecInfo() { return kernel->clearSvmKernelExecInfo(); }
|
||||
void setUnifiedMemoryExecInfo(GraphicsAllocation *argValue) { return kernel->setUnifiedMemoryExecInfo(argValue); }
|
||||
void clearUnifiedMemoryExecInfo() { return kernel->clearUnifiedMemoryExecInfo(); }
|
||||
int setKernelThreadArbitrationPolicy(uint32_t propertyValue) { return kernel->setKernelThreadArbitrationPolicy(propertyValue); }
|
||||
cl_int setKernelExecutionType(cl_execution_info_kernel_type_intel executionType) { return kernel->setKernelExecutionType(executionType); }
|
||||
int32_t setAdditionalKernelExecInfoWithParam(uint32_t paramName, size_t paramValueSize, const void *paramValue) { return kernel->setAdditionalKernelExecInfoWithParam(paramName, paramValueSize, paramValue); }
|
||||
cl_int cloneKernel(Kernel *pSourceKernel) { return defaultKernel->cloneKernel(pSourceKernel); }
|
||||
const std::vector<Kernel::SimpleKernelArgInfo> &getKernelArguments() const { return defaultKernel->getKernelArguments(); }
|
||||
cl_int checkCorrectImageAccessQualifier(cl_uint argIndex, size_t argSize, const void *argValue) const { return defaultKernel->checkCorrectImageAccessQualifier(argIndex, argSize, argValue); }
|
||||
void unsetArg(uint32_t argIndex) { return defaultKernel->unsetArg(argIndex); }
|
||||
cl_int setArg(uint32_t argIndex, size_t argSize, const void *argVal) { return defaultKernel->setArg(argIndex, argSize, argVal); }
|
||||
cl_int getInfo(cl_kernel_info paramName, size_t paramValueSize, void *paramValue, size_t *paramValueSizeRet) const { return defaultKernel->getInfo(paramName, paramValueSize, paramValue, paramValueSizeRet); }
|
||||
cl_int getArgInfo(cl_uint argIndx, cl_kernel_arg_info paramName, size_t paramValueSize, void *paramValue, size_t *paramValueSizeRet) const { return defaultKernel->getArgInfo(argIndx, paramName, paramValueSize, paramValue, paramValueSizeRet); }
|
||||
const ClDeviceVector &getDevices() const { return defaultKernel->getDevices(); }
|
||||
size_t getKernelArgsNumber() const { return defaultKernel->getKernelArgsNumber(); }
|
||||
Context &getContext() const { return defaultKernel->getContext(); }
|
||||
cl_int setArgSvmAlloc(uint32_t argIndex, void *svmPtr, GraphicsAllocation *svmAlloc) { return defaultKernel->setArgSvmAlloc(argIndex, svmPtr, svmAlloc); }
|
||||
bool getHasIndirectAccess() const { return defaultKernel->getHasIndirectAccess(); }
|
||||
void setUnifiedMemoryProperty(cl_kernel_exec_info infoType, bool infoValue) { return defaultKernel->setUnifiedMemoryProperty(infoType, infoValue); }
|
||||
void setSvmKernelExecInfo(GraphicsAllocation *argValue) { return defaultKernel->setSvmKernelExecInfo(argValue); }
|
||||
void clearSvmKernelExecInfo() { return defaultKernel->clearSvmKernelExecInfo(); }
|
||||
void setUnifiedMemoryExecInfo(GraphicsAllocation *argValue) { return defaultKernel->setUnifiedMemoryExecInfo(argValue); }
|
||||
void clearUnifiedMemoryExecInfo() { return defaultKernel->clearUnifiedMemoryExecInfo(); }
|
||||
int setKernelThreadArbitrationPolicy(uint32_t propertyValue) { return defaultKernel->setKernelThreadArbitrationPolicy(propertyValue); }
|
||||
cl_int setKernelExecutionType(cl_execution_info_kernel_type_intel executionType) { return defaultKernel->setKernelExecutionType(executionType); }
|
||||
int32_t setAdditionalKernelExecInfoWithParam(uint32_t paramName, size_t paramValueSize, const void *paramValue) { return defaultKernel->setAdditionalKernelExecInfoWithParam(paramName, paramValueSize, paramValue); }
|
||||
|
||||
protected:
|
||||
Kernel *kernel = nullptr;
|
||||
KernelVectorType kernels;
|
||||
Kernel *defaultKernel = nullptr;
|
||||
};
|
||||
|
||||
} // namespace NEO
|
||||
|
||||
Reference in New Issue
Block a user