mirror of
https://github.com/intel/compute-runtime.git
synced 2025-11-10 05:49:51 +08:00
Introduce MultiDeviceKernel class
Related-To: NEO-5001 Signed-off-by: Mateusz Jablonski <mateusz.jablonski@intel.com>
This commit is contained in:
committed by
Compute-Runtime-Automation
parent
71940061b8
commit
04eca48ee0
@@ -101,6 +101,7 @@ class BuiltInOp<EBuiltInOps::AuxTranslation> : public BuiltinDispatchInfoBuilder
|
||||
}
|
||||
|
||||
void resizeKernelInstances(size_t size) const;
|
||||
MultiDeviceKernel *multiDeviceBaseKernel = nullptr;
|
||||
Kernel *baseKernel = nullptr;
|
||||
mutable std::vector<std::unique_ptr<Kernel>> convertToNonAuxKernel;
|
||||
mutable std::vector<std::unique_ptr<Kernel>> convertToAuxKernel;
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) 2018-2020 Intel Corporation
|
||||
* Copyright (C) 2018-2021 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
@@ -12,8 +12,8 @@
|
||||
namespace NEO {
|
||||
|
||||
BuiltInOp<EBuiltInOps::AuxTranslation>::BuiltInOp(BuiltIns &kernelsLib, ClDevice &device) : BuiltinDispatchInfoBuilder(kernelsLib, device) {
|
||||
BuiltinDispatchInfoBuilder::populate(EBuiltInOps::AuxTranslation, "", "fullCopy", baseKernel);
|
||||
|
||||
BuiltinDispatchInfoBuilder::populate(EBuiltInOps::AuxTranslation, "", "fullCopy", multiDeviceBaseKernel);
|
||||
baseKernel = multiDeviceBaseKernel->getKernel(clDevice.getRootDeviceIndex());
|
||||
resizeKernelInstances(5);
|
||||
}
|
||||
|
||||
|
||||
@@ -62,13 +62,13 @@ class BuiltInOp<EBuiltInOps::CopyBufferToBuffer> : public BuiltinDispatchInfoBui
|
||||
auto middleSizeEls = middleSizeBytes / middleElSize; // num work items in middle walker
|
||||
|
||||
// Set-up ISA
|
||||
kernelSplit1DBuilder.setKernel(SplitDispatch::RegionCoordX::Left, kernLeftLeftover);
|
||||
kernelSplit1DBuilder.setKernel(SplitDispatch::RegionCoordX::Left, kernLeftLeftover->getKernel(clDevice.getRootDeviceIndex()));
|
||||
if (isSrcMisaligned) {
|
||||
kernelSplit1DBuilder.setKernel(SplitDispatch::RegionCoordX::Middle, kernMiddleMisaligned);
|
||||
kernelSplit1DBuilder.setKernel(SplitDispatch::RegionCoordX::Middle, kernMiddleMisaligned->getKernel(clDevice.getRootDeviceIndex()));
|
||||
} else {
|
||||
kernelSplit1DBuilder.setKernel(SplitDispatch::RegionCoordX::Middle, kernMiddle);
|
||||
kernelSplit1DBuilder.setKernel(SplitDispatch::RegionCoordX::Middle, kernMiddle->getKernel(clDevice.getRootDeviceIndex()));
|
||||
}
|
||||
kernelSplit1DBuilder.setKernel(SplitDispatch::RegionCoordX::Right, kernRightLeftover);
|
||||
kernelSplit1DBuilder.setKernel(SplitDispatch::RegionCoordX::Right, kernRightLeftover->getKernel(clDevice.getRootDeviceIndex()));
|
||||
|
||||
// Set-up common kernel args
|
||||
if (operationParams.srcSvmAlloc) {
|
||||
@@ -117,10 +117,10 @@ class BuiltInOp<EBuiltInOps::CopyBufferToBuffer> : public BuiltinDispatchInfoBui
|
||||
}
|
||||
|
||||
protected:
|
||||
Kernel *kernLeftLeftover = nullptr;
|
||||
Kernel *kernMiddle = nullptr;
|
||||
Kernel *kernMiddleMisaligned = nullptr;
|
||||
Kernel *kernRightLeftover = nullptr;
|
||||
MultiDeviceKernel *kernLeftLeftover = nullptr;
|
||||
MultiDeviceKernel *kernMiddle = nullptr;
|
||||
MultiDeviceKernel *kernMiddleMisaligned = nullptr;
|
||||
MultiDeviceKernel *kernRightLeftover = nullptr;
|
||||
BuiltInOp(BuiltIns &kernelsLib, ClDevice &device, bool populateKernels)
|
||||
: BuiltinDispatchInfoBuilder(kernelsLib, device) {
|
||||
if (populateKernels) {
|
||||
@@ -187,7 +187,7 @@ class BuiltInOp<EBuiltInOps::CopyBufferRect> : public BuiltinDispatchInfoBuilder
|
||||
|
||||
// Set-up ISA
|
||||
int dimensions = is3D ? 3 : 2;
|
||||
kernelNoSplit3DBuilder.setKernel(kernelBytes[dimensions - 1]);
|
||||
kernelNoSplit3DBuilder.setKernel(kernelBytes[dimensions - 1]->getKernel(clDevice.getRootDeviceIndex()));
|
||||
|
||||
size_t srcOffsetFromAlignedPtr = 0;
|
||||
size_t dstOffsetFromAlignedPtr = 0;
|
||||
@@ -246,7 +246,7 @@ class BuiltInOp<EBuiltInOps::CopyBufferRect> : public BuiltinDispatchInfoBuilder
|
||||
}
|
||||
|
||||
protected:
|
||||
Kernel *kernelBytes[3]{};
|
||||
MultiDeviceKernel *kernelBytes[3]{};
|
||||
BuiltInOp(BuiltIns &kernelsLib, ClDevice &device, bool populateKernels)
|
||||
: BuiltinDispatchInfoBuilder(kernelsLib, device) {
|
||||
if (populateKernels) {
|
||||
@@ -303,9 +303,9 @@ class BuiltInOp<EBuiltInOps::FillBuffer> : public BuiltinDispatchInfoBuilder {
|
||||
auto middleSizeEls = middleSizeBytes / middleElSize; // num work items in middle walker
|
||||
|
||||
// Set-up ISA
|
||||
kernelSplit1DBuilder.setKernel(SplitDispatch::RegionCoordX::Left, kernLeftLeftover);
|
||||
kernelSplit1DBuilder.setKernel(SplitDispatch::RegionCoordX::Middle, kernMiddle);
|
||||
kernelSplit1DBuilder.setKernel(SplitDispatch::RegionCoordX::Right, kernRightLeftover);
|
||||
kernelSplit1DBuilder.setKernel(SplitDispatch::RegionCoordX::Left, kernLeftLeftover->getKernel(clDevice.getRootDeviceIndex()));
|
||||
kernelSplit1DBuilder.setKernel(SplitDispatch::RegionCoordX::Middle, kernMiddle->getKernel(clDevice.getRootDeviceIndex()));
|
||||
kernelSplit1DBuilder.setKernel(SplitDispatch::RegionCoordX::Right, kernRightLeftover->getKernel(clDevice.getRootDeviceIndex()));
|
||||
|
||||
DEBUG_BREAK_IF((operationParams.srcMemObj == nullptr) || (operationParams.srcOffset != 0));
|
||||
DEBUG_BREAK_IF((operationParams.dstMemObj == nullptr) && (operationParams.dstSvmAlloc == nullptr));
|
||||
@@ -346,9 +346,9 @@ class BuiltInOp<EBuiltInOps::FillBuffer> : public BuiltinDispatchInfoBuilder {
|
||||
}
|
||||
|
||||
protected:
|
||||
Kernel *kernLeftLeftover = nullptr;
|
||||
Kernel *kernMiddle = nullptr;
|
||||
Kernel *kernRightLeftover = nullptr;
|
||||
MultiDeviceKernel *kernLeftLeftover = nullptr;
|
||||
MultiDeviceKernel *kernMiddle = nullptr;
|
||||
MultiDeviceKernel *kernRightLeftover = nullptr;
|
||||
|
||||
BuiltInOp(BuiltIns &kernelsLib, ClDevice &device, bool populateKernels)
|
||||
: BuiltinDispatchInfoBuilder(kernelsLib, device) {
|
||||
@@ -388,7 +388,7 @@ class BuiltInOp<EBuiltInOps::CopyBufferToImage3d> : public BuiltinDispatchInfoBu
|
||||
}
|
||||
|
||||
protected:
|
||||
Kernel *kernelBytes[5] = {nullptr};
|
||||
MultiDeviceKernel *kernelBytes[5] = {nullptr};
|
||||
BuiltInOp(BuiltIns &kernelsLib, ClDevice &device, bool populateKernels)
|
||||
: BuiltinDispatchInfoBuilder(kernelsLib, device) {
|
||||
if (populateKernels) {
|
||||
@@ -432,7 +432,7 @@ class BuiltInOp<EBuiltInOps::CopyBufferToImage3d> : public BuiltinDispatchInfoBu
|
||||
// Set-up kernel
|
||||
auto bytesExponent = Math::log2(bytesPerPixel);
|
||||
DEBUG_BREAK_IF(bytesExponent >= 5);
|
||||
kernelNoSplit3DBuilder.setKernel(kernelBytes[bytesExponent]);
|
||||
kernelNoSplit3DBuilder.setKernel(kernelBytes[bytesExponent]->getKernel(clDevice.getRootDeviceIndex()));
|
||||
|
||||
// Set-up source host ptr / buffer
|
||||
if (operationParams.srcPtr) {
|
||||
@@ -503,7 +503,7 @@ class BuiltInOp<EBuiltInOps::CopyImage3dToBuffer> : public BuiltinDispatchInfoBu
|
||||
}
|
||||
|
||||
protected:
|
||||
Kernel *kernelBytes[5] = {nullptr};
|
||||
MultiDeviceKernel *kernelBytes[5] = {nullptr};
|
||||
|
||||
BuiltInOp(BuiltIns &kernelsLib, ClDevice &device, bool populateKernels)
|
||||
: BuiltinDispatchInfoBuilder(kernelsLib, device) {
|
||||
@@ -548,7 +548,7 @@ class BuiltInOp<EBuiltInOps::CopyImage3dToBuffer> : public BuiltinDispatchInfoBu
|
||||
// Set-up ISA
|
||||
auto bytesExponent = Math::log2(bytesPerPixel);
|
||||
DEBUG_BREAK_IF(bytesExponent >= 5);
|
||||
kernelNoSplit3DBuilder.setKernel(kernelBytes[bytesExponent]);
|
||||
kernelNoSplit3DBuilder.setKernel(kernelBytes[bytesExponent]->getKernel(clDevice.getRootDeviceIndex()));
|
||||
|
||||
// Set-up source image
|
||||
kernelNoSplit3DBuilder.setArg(0, srcImageRedescribed, operationParams.srcMipLevel);
|
||||
@@ -634,7 +634,7 @@ class BuiltInOp<EBuiltInOps::CopyImageToImage3d> : public BuiltinDispatchInfoBui
|
||||
multiDispatchInfo.pushRedescribedMemObj(std::unique_ptr<MemObj>(dstImageRedescribed)); // life range same as mdi's
|
||||
|
||||
// Set-up kernel
|
||||
kernelNoSplit3DBuilder.setKernel(kernel);
|
||||
kernelNoSplit3DBuilder.setKernel(kernel->getKernel(clDevice.getRootDeviceIndex()));
|
||||
|
||||
// Set-up source image
|
||||
kernelNoSplit3DBuilder.setArg(0, srcImageRedescribed, operationParams.srcMipLevel);
|
||||
@@ -670,7 +670,7 @@ class BuiltInOp<EBuiltInOps::CopyImageToImage3d> : public BuiltinDispatchInfoBui
|
||||
}
|
||||
|
||||
protected:
|
||||
Kernel *kernel = nullptr;
|
||||
MultiDeviceKernel *kernel = nullptr;
|
||||
};
|
||||
|
||||
template <>
|
||||
@@ -695,7 +695,7 @@ class BuiltInOp<EBuiltInOps::FillImage3d> : public BuiltinDispatchInfoBuilder {
|
||||
multiDispatchInfo.pushRedescribedMemObj(std::unique_ptr<MemObj>(imageRedescribed));
|
||||
|
||||
// Set-up kernel
|
||||
kernelNoSplit3DBuilder.setKernel(kernel);
|
||||
kernelNoSplit3DBuilder.setKernel(kernel->getKernel(clDevice.getRootDeviceIndex()));
|
||||
|
||||
// Set-up destination image
|
||||
kernelNoSplit3DBuilder.setArg(0, imageRedescribed);
|
||||
@@ -727,7 +727,7 @@ class BuiltInOp<EBuiltInOps::FillImage3d> : public BuiltinDispatchInfoBuilder {
|
||||
}
|
||||
|
||||
protected:
|
||||
Kernel *kernel = nullptr;
|
||||
MultiDeviceKernel *kernel = nullptr;
|
||||
};
|
||||
|
||||
BuiltinDispatchInfoBuilder &BuiltInDispatchBuilderOp::getBuiltinDispatchInfoBuilder(EBuiltInOps::Type operation, ClDevice &device) {
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) 2018-2020 Intel Corporation
|
||||
* Copyright (C) 2018-2021 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
@@ -9,7 +9,7 @@
|
||||
#include "shared/source/built_ins/built_ins.h"
|
||||
#include "shared/source/helpers/vec.h"
|
||||
|
||||
#include "opencl/source/kernel/kernel.h"
|
||||
#include "opencl/source/kernel/multi_device_kernel.h"
|
||||
|
||||
#include "CL/cl.h"
|
||||
#include "built_in_ops.h"
|
||||
@@ -60,7 +60,7 @@ class BuiltinDispatchInfoBuilder {
|
||||
virtual ~BuiltinDispatchInfoBuilder() = default;
|
||||
|
||||
template <typename... KernelsDescArgsT>
|
||||
void populate(EBuiltInOps::Type operation, ConstStringRef options, KernelsDescArgsT &&... desc);
|
||||
void populate(EBuiltInOps::Type operation, ConstStringRef options, KernelsDescArgsT &&...desc);
|
||||
|
||||
virtual bool buildDispatchInfos(MultiDispatchInfo &multiDispatchInfo) const {
|
||||
return false;
|
||||
@@ -81,13 +81,13 @@ class BuiltinDispatchInfoBuilder {
|
||||
return true;
|
||||
}
|
||||
|
||||
std::vector<std::unique_ptr<Kernel>> &peekUsedKernels() { return usedKernels; }
|
||||
std::vector<std::unique_ptr<MultiDeviceKernel>> &peekUsedKernels() { return usedKernels; }
|
||||
|
||||
static std::unique_ptr<Program> createProgramFromCode(const BuiltinCode &bc, const ClDeviceVector &device);
|
||||
|
||||
protected:
|
||||
template <typename KernelNameT, typename... KernelsDescArgsT>
|
||||
void grabKernels(KernelNameT &&kernelName, Kernel *&kernelDst, KernelsDescArgsT &&... kernelsDesc) {
|
||||
void grabKernels(KernelNameT &&kernelName, MultiDeviceKernel *&kernelDst, KernelsDescArgsT &&...kernelsDesc) {
|
||||
auto rootDeviceIndex = clDevice.getRootDeviceIndex();
|
||||
const KernelInfo *kernelInfo = prog->getKernelInfo(kernelName, rootDeviceIndex);
|
||||
UNRECOVERABLE_IF(nullptr == kernelInfo);
|
||||
@@ -95,16 +95,16 @@ class BuiltinDispatchInfoBuilder {
|
||||
KernelInfoContainer kernelInfos;
|
||||
kernelInfos.resize(rootDeviceIndex + 1);
|
||||
kernelInfos[rootDeviceIndex] = kernelInfo;
|
||||
kernelDst = Kernel::create(prog.get(), kernelInfos, &err);
|
||||
kernelDst->isBuiltIn = true;
|
||||
usedKernels.push_back(std::unique_ptr<Kernel>(kernelDst));
|
||||
kernelDst = MultiDeviceKernel::create(prog.get(), kernelInfos, &err);
|
||||
kernelDst->getKernel(rootDeviceIndex)->isBuiltIn = true;
|
||||
usedKernels.push_back(std::unique_ptr<MultiDeviceKernel>(kernelDst));
|
||||
grabKernels(std::forward<KernelsDescArgsT>(kernelsDesc)...);
|
||||
}
|
||||
|
||||
cl_int grabKernels() { return CL_SUCCESS; }
|
||||
|
||||
std::unique_ptr<Program> prog;
|
||||
std::vector<std::unique_ptr<Kernel>> usedKernels;
|
||||
std::vector<std::unique_ptr<MultiDeviceKernel>> usedKernels;
|
||||
BuiltIns &kernelsLib;
|
||||
ClDevice &clDevice;
|
||||
};
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) 2017-2020 Intel Corporation
|
||||
* Copyright (C) 2017-2021 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
@@ -26,8 +26,9 @@ class VmeBuiltinDispatchInfoBuilder : public BuiltinDispatchInfoBuilder {
|
||||
: BuiltinDispatchInfoBuilder(kernelsLib, device) {
|
||||
populate(builtinOp,
|
||||
mediaKernelsBuildOptions,
|
||||
kernelName, vmeKernel);
|
||||
kernelName, multiDeviceVmeKernel);
|
||||
auto rootDeviceIndex = clDevice.getRootDeviceIndex();
|
||||
vmeKernel = multiDeviceVmeKernel->getKernel(rootDeviceIndex);
|
||||
widthArgNum = vmeKernel->getKernelInfo(rootDeviceIndex).getArgNumByName("width");
|
||||
heightArgNum = vmeKernel->getKernelInfo(rootDeviceIndex).getArgNumByName("height");
|
||||
strideArgNum = vmeKernel->getKernelInfo(rootDeviceIndex).getArgNumByName("stride");
|
||||
@@ -241,6 +242,7 @@ class VmeBuiltinDispatchInfoBuilder : public BuiltinDispatchInfoBuilder {
|
||||
int32_t motionVectorBufferArgNum;
|
||||
int32_t predictionMotionVectorBufferArgNum;
|
||||
int32_t residualsArgNum;
|
||||
MultiDeviceKernel *multiDeviceVmeKernel;
|
||||
Kernel *vmeKernel;
|
||||
};
|
||||
|
||||
|
||||
Reference in New Issue
Block a user