Introduce MultiDeviceKernel class

Related-To: NEO-5001
Signed-off-by: Mateusz Jablonski <mateusz.jablonski@intel.com>
This commit is contained in:
Mateusz Jablonski
2021-03-09 10:30:21 +00:00
committed by Compute-Runtime-Automation
parent 71940061b8
commit 04eca48ee0
62 changed files with 778 additions and 596 deletions

View File

@@ -101,6 +101,7 @@ class BuiltInOp<EBuiltInOps::AuxTranslation> : public BuiltinDispatchInfoBuilder
}
void resizeKernelInstances(size_t size) const;
MultiDeviceKernel *multiDeviceBaseKernel = nullptr;
Kernel *baseKernel = nullptr;
mutable std::vector<std::unique_ptr<Kernel>> convertToNonAuxKernel;
mutable std::vector<std::unique_ptr<Kernel>> convertToAuxKernel;

View File

@@ -1,5 +1,5 @@
/*
* Copyright (C) 2018-2020 Intel Corporation
* Copyright (C) 2018-2021 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@@ -12,8 +12,8 @@
namespace NEO {
BuiltInOp<EBuiltInOps::AuxTranslation>::BuiltInOp(BuiltIns &kernelsLib, ClDevice &device) : BuiltinDispatchInfoBuilder(kernelsLib, device) {
BuiltinDispatchInfoBuilder::populate(EBuiltInOps::AuxTranslation, "", "fullCopy", baseKernel);
BuiltinDispatchInfoBuilder::populate(EBuiltInOps::AuxTranslation, "", "fullCopy", multiDeviceBaseKernel);
baseKernel = multiDeviceBaseKernel->getKernel(clDevice.getRootDeviceIndex());
resizeKernelInstances(5);
}

View File

@@ -62,13 +62,13 @@ class BuiltInOp<EBuiltInOps::CopyBufferToBuffer> : public BuiltinDispatchInfoBui
auto middleSizeEls = middleSizeBytes / middleElSize; // num work items in middle walker
// Set-up ISA
kernelSplit1DBuilder.setKernel(SplitDispatch::RegionCoordX::Left, kernLeftLeftover);
kernelSplit1DBuilder.setKernel(SplitDispatch::RegionCoordX::Left, kernLeftLeftover->getKernel(clDevice.getRootDeviceIndex()));
if (isSrcMisaligned) {
kernelSplit1DBuilder.setKernel(SplitDispatch::RegionCoordX::Middle, kernMiddleMisaligned);
kernelSplit1DBuilder.setKernel(SplitDispatch::RegionCoordX::Middle, kernMiddleMisaligned->getKernel(clDevice.getRootDeviceIndex()));
} else {
kernelSplit1DBuilder.setKernel(SplitDispatch::RegionCoordX::Middle, kernMiddle);
kernelSplit1DBuilder.setKernel(SplitDispatch::RegionCoordX::Middle, kernMiddle->getKernel(clDevice.getRootDeviceIndex()));
}
kernelSplit1DBuilder.setKernel(SplitDispatch::RegionCoordX::Right, kernRightLeftover);
kernelSplit1DBuilder.setKernel(SplitDispatch::RegionCoordX::Right, kernRightLeftover->getKernel(clDevice.getRootDeviceIndex()));
// Set-up common kernel args
if (operationParams.srcSvmAlloc) {
@@ -117,10 +117,10 @@ class BuiltInOp<EBuiltInOps::CopyBufferToBuffer> : public BuiltinDispatchInfoBui
}
protected:
Kernel *kernLeftLeftover = nullptr;
Kernel *kernMiddle = nullptr;
Kernel *kernMiddleMisaligned = nullptr;
Kernel *kernRightLeftover = nullptr;
MultiDeviceKernel *kernLeftLeftover = nullptr;
MultiDeviceKernel *kernMiddle = nullptr;
MultiDeviceKernel *kernMiddleMisaligned = nullptr;
MultiDeviceKernel *kernRightLeftover = nullptr;
BuiltInOp(BuiltIns &kernelsLib, ClDevice &device, bool populateKernels)
: BuiltinDispatchInfoBuilder(kernelsLib, device) {
if (populateKernels) {
@@ -187,7 +187,7 @@ class BuiltInOp<EBuiltInOps::CopyBufferRect> : public BuiltinDispatchInfoBuilder
// Set-up ISA
int dimensions = is3D ? 3 : 2;
kernelNoSplit3DBuilder.setKernel(kernelBytes[dimensions - 1]);
kernelNoSplit3DBuilder.setKernel(kernelBytes[dimensions - 1]->getKernel(clDevice.getRootDeviceIndex()));
size_t srcOffsetFromAlignedPtr = 0;
size_t dstOffsetFromAlignedPtr = 0;
@@ -246,7 +246,7 @@ class BuiltInOp<EBuiltInOps::CopyBufferRect> : public BuiltinDispatchInfoBuilder
}
protected:
Kernel *kernelBytes[3]{};
MultiDeviceKernel *kernelBytes[3]{};
BuiltInOp(BuiltIns &kernelsLib, ClDevice &device, bool populateKernels)
: BuiltinDispatchInfoBuilder(kernelsLib, device) {
if (populateKernels) {
@@ -303,9 +303,9 @@ class BuiltInOp<EBuiltInOps::FillBuffer> : public BuiltinDispatchInfoBuilder {
auto middleSizeEls = middleSizeBytes / middleElSize; // num work items in middle walker
// Set-up ISA
kernelSplit1DBuilder.setKernel(SplitDispatch::RegionCoordX::Left, kernLeftLeftover);
kernelSplit1DBuilder.setKernel(SplitDispatch::RegionCoordX::Middle, kernMiddle);
kernelSplit1DBuilder.setKernel(SplitDispatch::RegionCoordX::Right, kernRightLeftover);
kernelSplit1DBuilder.setKernel(SplitDispatch::RegionCoordX::Left, kernLeftLeftover->getKernel(clDevice.getRootDeviceIndex()));
kernelSplit1DBuilder.setKernel(SplitDispatch::RegionCoordX::Middle, kernMiddle->getKernel(clDevice.getRootDeviceIndex()));
kernelSplit1DBuilder.setKernel(SplitDispatch::RegionCoordX::Right, kernRightLeftover->getKernel(clDevice.getRootDeviceIndex()));
DEBUG_BREAK_IF((operationParams.srcMemObj == nullptr) || (operationParams.srcOffset != 0));
DEBUG_BREAK_IF((operationParams.dstMemObj == nullptr) && (operationParams.dstSvmAlloc == nullptr));
@@ -346,9 +346,9 @@ class BuiltInOp<EBuiltInOps::FillBuffer> : public BuiltinDispatchInfoBuilder {
}
protected:
Kernel *kernLeftLeftover = nullptr;
Kernel *kernMiddle = nullptr;
Kernel *kernRightLeftover = nullptr;
MultiDeviceKernel *kernLeftLeftover = nullptr;
MultiDeviceKernel *kernMiddle = nullptr;
MultiDeviceKernel *kernRightLeftover = nullptr;
BuiltInOp(BuiltIns &kernelsLib, ClDevice &device, bool populateKernels)
: BuiltinDispatchInfoBuilder(kernelsLib, device) {
@@ -388,7 +388,7 @@ class BuiltInOp<EBuiltInOps::CopyBufferToImage3d> : public BuiltinDispatchInfoBu
}
protected:
Kernel *kernelBytes[5] = {nullptr};
MultiDeviceKernel *kernelBytes[5] = {nullptr};
BuiltInOp(BuiltIns &kernelsLib, ClDevice &device, bool populateKernels)
: BuiltinDispatchInfoBuilder(kernelsLib, device) {
if (populateKernels) {
@@ -432,7 +432,7 @@ class BuiltInOp<EBuiltInOps::CopyBufferToImage3d> : public BuiltinDispatchInfoBu
// Set-up kernel
auto bytesExponent = Math::log2(bytesPerPixel);
DEBUG_BREAK_IF(bytesExponent >= 5);
kernelNoSplit3DBuilder.setKernel(kernelBytes[bytesExponent]);
kernelNoSplit3DBuilder.setKernel(kernelBytes[bytesExponent]->getKernel(clDevice.getRootDeviceIndex()));
// Set-up source host ptr / buffer
if (operationParams.srcPtr) {
@@ -503,7 +503,7 @@ class BuiltInOp<EBuiltInOps::CopyImage3dToBuffer> : public BuiltinDispatchInfoBu
}
protected:
Kernel *kernelBytes[5] = {nullptr};
MultiDeviceKernel *kernelBytes[5] = {nullptr};
BuiltInOp(BuiltIns &kernelsLib, ClDevice &device, bool populateKernels)
: BuiltinDispatchInfoBuilder(kernelsLib, device) {
@@ -548,7 +548,7 @@ class BuiltInOp<EBuiltInOps::CopyImage3dToBuffer> : public BuiltinDispatchInfoBu
// Set-up ISA
auto bytesExponent = Math::log2(bytesPerPixel);
DEBUG_BREAK_IF(bytesExponent >= 5);
kernelNoSplit3DBuilder.setKernel(kernelBytes[bytesExponent]);
kernelNoSplit3DBuilder.setKernel(kernelBytes[bytesExponent]->getKernel(clDevice.getRootDeviceIndex()));
// Set-up source image
kernelNoSplit3DBuilder.setArg(0, srcImageRedescribed, operationParams.srcMipLevel);
@@ -634,7 +634,7 @@ class BuiltInOp<EBuiltInOps::CopyImageToImage3d> : public BuiltinDispatchInfoBui
multiDispatchInfo.pushRedescribedMemObj(std::unique_ptr<MemObj>(dstImageRedescribed)); // life range same as mdi's
// Set-up kernel
kernelNoSplit3DBuilder.setKernel(kernel);
kernelNoSplit3DBuilder.setKernel(kernel->getKernel(clDevice.getRootDeviceIndex()));
// Set-up source image
kernelNoSplit3DBuilder.setArg(0, srcImageRedescribed, operationParams.srcMipLevel);
@@ -670,7 +670,7 @@ class BuiltInOp<EBuiltInOps::CopyImageToImage3d> : public BuiltinDispatchInfoBui
}
protected:
Kernel *kernel = nullptr;
MultiDeviceKernel *kernel = nullptr;
};
template <>
@@ -695,7 +695,7 @@ class BuiltInOp<EBuiltInOps::FillImage3d> : public BuiltinDispatchInfoBuilder {
multiDispatchInfo.pushRedescribedMemObj(std::unique_ptr<MemObj>(imageRedescribed));
// Set-up kernel
kernelNoSplit3DBuilder.setKernel(kernel);
kernelNoSplit3DBuilder.setKernel(kernel->getKernel(clDevice.getRootDeviceIndex()));
// Set-up destination image
kernelNoSplit3DBuilder.setArg(0, imageRedescribed);
@@ -727,7 +727,7 @@ class BuiltInOp<EBuiltInOps::FillImage3d> : public BuiltinDispatchInfoBuilder {
}
protected:
Kernel *kernel = nullptr;
MultiDeviceKernel *kernel = nullptr;
};
BuiltinDispatchInfoBuilder &BuiltInDispatchBuilderOp::getBuiltinDispatchInfoBuilder(EBuiltInOps::Type operation, ClDevice &device) {

View File

@@ -1,5 +1,5 @@
/*
* Copyright (C) 2018-2020 Intel Corporation
* Copyright (C) 2018-2021 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@@ -9,7 +9,7 @@
#include "shared/source/built_ins/built_ins.h"
#include "shared/source/helpers/vec.h"
#include "opencl/source/kernel/kernel.h"
#include "opencl/source/kernel/multi_device_kernel.h"
#include "CL/cl.h"
#include "built_in_ops.h"
@@ -60,7 +60,7 @@ class BuiltinDispatchInfoBuilder {
virtual ~BuiltinDispatchInfoBuilder() = default;
template <typename... KernelsDescArgsT>
void populate(EBuiltInOps::Type operation, ConstStringRef options, KernelsDescArgsT &&... desc);
void populate(EBuiltInOps::Type operation, ConstStringRef options, KernelsDescArgsT &&...desc);
virtual bool buildDispatchInfos(MultiDispatchInfo &multiDispatchInfo) const {
return false;
@@ -81,13 +81,13 @@ class BuiltinDispatchInfoBuilder {
return true;
}
std::vector<std::unique_ptr<Kernel>> &peekUsedKernels() { return usedKernels; }
std::vector<std::unique_ptr<MultiDeviceKernel>> &peekUsedKernels() { return usedKernels; }
static std::unique_ptr<Program> createProgramFromCode(const BuiltinCode &bc, const ClDeviceVector &device);
protected:
template <typename KernelNameT, typename... KernelsDescArgsT>
void grabKernels(KernelNameT &&kernelName, Kernel *&kernelDst, KernelsDescArgsT &&... kernelsDesc) {
void grabKernels(KernelNameT &&kernelName, MultiDeviceKernel *&kernelDst, KernelsDescArgsT &&...kernelsDesc) {
auto rootDeviceIndex = clDevice.getRootDeviceIndex();
const KernelInfo *kernelInfo = prog->getKernelInfo(kernelName, rootDeviceIndex);
UNRECOVERABLE_IF(nullptr == kernelInfo);
@@ -95,16 +95,16 @@ class BuiltinDispatchInfoBuilder {
KernelInfoContainer kernelInfos;
kernelInfos.resize(rootDeviceIndex + 1);
kernelInfos[rootDeviceIndex] = kernelInfo;
kernelDst = Kernel::create(prog.get(), kernelInfos, &err);
kernelDst->isBuiltIn = true;
usedKernels.push_back(std::unique_ptr<Kernel>(kernelDst));
kernelDst = MultiDeviceKernel::create(prog.get(), kernelInfos, &err);
kernelDst->getKernel(rootDeviceIndex)->isBuiltIn = true;
usedKernels.push_back(std::unique_ptr<MultiDeviceKernel>(kernelDst));
grabKernels(std::forward<KernelsDescArgsT>(kernelsDesc)...);
}
cl_int grabKernels() { return CL_SUCCESS; }
std::unique_ptr<Program> prog;
std::vector<std::unique_ptr<Kernel>> usedKernels;
std::vector<std::unique_ptr<MultiDeviceKernel>> usedKernels;
BuiltIns &kernelsLib;
ClDevice &clDevice;
};

View File

@@ -1,5 +1,5 @@
/*
* Copyright (C) 2017-2020 Intel Corporation
* Copyright (C) 2017-2021 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@@ -26,8 +26,9 @@ class VmeBuiltinDispatchInfoBuilder : public BuiltinDispatchInfoBuilder {
: BuiltinDispatchInfoBuilder(kernelsLib, device) {
populate(builtinOp,
mediaKernelsBuildOptions,
kernelName, vmeKernel);
kernelName, multiDeviceVmeKernel);
auto rootDeviceIndex = clDevice.getRootDeviceIndex();
vmeKernel = multiDeviceVmeKernel->getKernel(rootDeviceIndex);
widthArgNum = vmeKernel->getKernelInfo(rootDeviceIndex).getArgNumByName("width");
heightArgNum = vmeKernel->getKernelInfo(rootDeviceIndex).getArgNumByName("height");
strideArgNum = vmeKernel->getKernelInfo(rootDeviceIndex).getArgNumByName("stride");
@@ -241,6 +242,7 @@ class VmeBuiltinDispatchInfoBuilder : public BuiltinDispatchInfoBuilder {
int32_t motionVectorBufferArgNum;
int32_t predictionMotionVectorBufferArgNum;
int32_t residualsArgNum;
MultiDeviceKernel *multiDeviceVmeKernel;
Kernel *vmeKernel;
};