Add support for stateless fill buffer

Change-Id: I6dac17090e499f013916b1ba2f2b6d0de47f51a3
Signed-off-by: Kamil Kopryk <kamil.kopryk@intel.com>
Related-To: NEO-3314
This commit is contained in:
Kamil Kopryk
2019-10-29 14:23:32 +01:00
committed by sys_ocldev
parent 0db28144ec
commit dc0272d86a
9 changed files with 198 additions and 20 deletions

View File

@@ -34,6 +34,7 @@ set(RUNTIME_SRCS_BUILT_IN_KERNELS
${CMAKE_CURRENT_SOURCE_DIR}/kernels/copy_image_to_image2d.igdrcl_built_in
${CMAKE_CURRENT_SOURCE_DIR}/kernels/copy_image_to_image3d.igdrcl_built_in
${CMAKE_CURRENT_SOURCE_DIR}/kernels/fill_buffer.igdrcl_built_in
${CMAKE_CURRENT_SOURCE_DIR}/kernels/fill_buffer_stateless.igdrcl_built_in
${CMAKE_CURRENT_SOURCE_DIR}/kernels/fill_image1d.igdrcl_built_in
${CMAKE_CURRENT_SOURCE_DIR}/kernels/fill_image2d.igdrcl_built_in
${CMAKE_CURRENT_SOURCE_DIR}/kernels/fill_image3d.igdrcl_built_in

View File

@@ -18,19 +18,20 @@ constexpr Type CopyBufferToBufferStateless{2};
constexpr Type CopyBufferRect{3};
constexpr Type CopyBufferRectStateless{4};
constexpr Type FillBuffer{5};
constexpr Type CopyBufferToImage3d{6};
constexpr Type CopyImage3dToBuffer{7};
constexpr Type CopyImageToImage1d{8};
constexpr Type CopyImageToImage2d{9};
constexpr Type CopyImageToImage3d{10};
constexpr Type FillImage1d{11};
constexpr Type FillImage2d{12};
constexpr Type FillImage3d{13};
constexpr Type VmeBlockMotionEstimateIntel{14};
constexpr Type VmeBlockAdvancedMotionEstimateCheckIntel{15};
constexpr Type VmeBlockAdvancedMotionEstimateBidirectionalCheckIntel{16};
constexpr Type Scheduler{17};
constexpr Type FillBufferStateless{6};
constexpr Type CopyBufferToImage3d{7};
constexpr Type CopyImage3dToBuffer{8};
constexpr Type CopyImageToImage1d{9};
constexpr Type CopyImageToImage2d{10};
constexpr Type CopyImageToImage3d{11};
constexpr Type FillImage1d{12};
constexpr Type FillImage2d{13};
constexpr Type FillImage3d{14};
constexpr Type VmeBlockMotionEstimateIntel{15};
constexpr Type VmeBlockAdvancedMotionEstimateCheckIntel{16};
constexpr Type VmeBlockAdvancedMotionEstimateBidirectionalCheckIntel{17};
constexpr Type Scheduler{18};
constexpr uint32_t MaxBaseValue{17};
constexpr uint32_t MaxBaseValue{18};
} // namespace EBuiltInOps
} // namespace NEO

View File

@@ -417,7 +417,7 @@ template <>
class BuiltInOp<EBuiltInOps::FillBuffer> : public BuiltinDispatchInfoBuilder {
public:
BuiltInOp(BuiltIns &kernelsLib, Context &context, Device &device)
: BuiltinDispatchInfoBuilder(kernelsLib), kernLeftLeftover(nullptr), kernMiddle(nullptr), kernRightLeftover(nullptr) {
: BuiltinDispatchInfoBuilder(kernelsLib) {
populate(context, device,
EBuiltInOps::FillBuffer,
"",
@@ -484,9 +484,24 @@ class BuiltInOp<EBuiltInOps::FillBuffer> : public BuiltinDispatchInfoBuilder {
}
protected:
Kernel *kernLeftLeftover;
Kernel *kernMiddle;
Kernel *kernRightLeftover;
Kernel *kernLeftLeftover = nullptr;
Kernel *kernMiddle = nullptr;
Kernel *kernRightLeftover = nullptr;
BuiltInOp(BuiltIns &kernelsLib) : BuiltinDispatchInfoBuilder(kernelsLib) {}
};
template <>
class BuiltInOp<EBuiltInOps::FillBufferStateless> : public BuiltInOp<EBuiltInOps::FillBuffer> {
public:
BuiltInOp(BuiltIns &kernelsLib, Context &context, Device &device) : BuiltInOp<EBuiltInOps::FillBuffer>(kernelsLib) {
populate(context, device,
EBuiltInOps::FillBufferStateless,
"-cl-intel-greater-than-4GB-buffer-required",
"FillBufferLeftLeftover", kernLeftLeftover,
"FillBufferMiddle", kernMiddle,
"FillBufferRightLeftover", kernRightLeftover);
}
};
template <>
@@ -807,6 +822,9 @@ BuiltinDispatchInfoBuilder &BuiltIns::getBuiltinDispatchInfoBuilder(EBuiltInOps:
case EBuiltInOps::FillBuffer:
std::call_once(operationBuilder.second, [&] { operationBuilder.first = std::make_unique<BuiltInOp<EBuiltInOps::FillBuffer>>(*this, context, device); });
break;
case EBuiltInOps::FillBufferStateless:
std::call_once(operationBuilder.second, [&] { operationBuilder.first = std::make_unique<BuiltInOp<EBuiltInOps::FillBufferStateless>>(*this, context, device); });
break;
case EBuiltInOps::CopyBufferToImage3d:
std::call_once(operationBuilder.second, [&] { operationBuilder.first = std::make_unique<BuiltInOp<EBuiltInOps::CopyBufferToImage3d>>(*this, context, device); });
break;

View File

@@ -32,6 +32,8 @@ const char *getBuiltinAsString(EBuiltInOps::Type builtin) {
return "copy_buffer_rect_stateless.igdrcl_built_in";
case EBuiltInOps::FillBuffer:
return "fill_buffer.igdrcl_built_in";
case EBuiltInOps::FillBufferStateless:
return "fill_buffer_stateless.igdrcl_built_in";
case EBuiltInOps::CopyBufferToImage3d:
return "copy_buffer_to_image3d.igdrcl_built_in";
case EBuiltInOps::CopyImage3dToBuffer:

View File

@@ -27,6 +27,7 @@ set(GENERATED_BUILTINS
set(GENERATED_BUILTINS_STATELESS
"copy_buffer_to_buffer_stateless"
"copy_buffer_rect_stateless"
"fill_buffer_stateless"
)
# Generate builtins cpps

View File

@@ -0,0 +1,49 @@
/*
* Copyright (C) 2019 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
R"===(
// assumption is local work size = pattern size
__kernel void FillBufferBytes(
__global uchar* pDst,
ulong dstOffsetInBytes,
const __global uchar* pPattern )
{
size_t dstIndex = get_global_id(0) + dstOffsetInBytes;
size_t srcIndex = get_local_id(0);
pDst[dstIndex] = pPattern[srcIndex];
}
__kernel void FillBufferLeftLeftover(
__global uchar* pDst,
ulong dstOffsetInBytes,
const __global uchar* pPattern,
const size_t patternSizeInEls )
{
size_t gid = get_global_id(0);
pDst[ gid + dstOffsetInBytes ] = pPattern[ gid & (patternSizeInEls - 1) ];
}
__kernel void FillBufferMiddle(
__global uchar* pDst,
ulong dstOffsetInBytes,
const __global uint* pPattern,
const ulong patternSizeInEls )
{
size_t gid = get_global_id(0);
((__global uint*)(pDst + dstOffsetInBytes))[gid] = pPattern[ gid & (patternSizeInEls - 1) ];
}
__kernel void FillBufferRightLeftover(
__global uchar* pDst,
ulong dstOffsetInBytes,
const __global uchar* pPattern,
const ulong patternSizeInEls )
{
size_t gid = get_global_id(0);
pDst[ gid + dstOffsetInBytes ] = pPattern[ gid & (patternSizeInEls - 1) ];
}
)==="

View File

@@ -56,6 +56,15 @@ static RegisterEmbeddedResource registerFillBufferSrc(
#include "runtime/built_ins/kernels/fill_buffer.igdrcl_built_in"
));
static RegisterEmbeddedResource registerFillBufferStatelessSrc(
createBuiltinResourceName(
EBuiltInOps::FillBufferStateless,
BuiltinCode::getExtension(BuiltinCode::ECodeType::Source))
.c_str(),
std::string(
#include "runtime/built_ins/kernels/fill_buffer_stateless.igdrcl_built_in"
));
static RegisterEmbeddedResource registerCopyBufferToImage3dSrc(
createBuiltinResourceName(
EBuiltInOps::CopyBufferToImage3d,