mirror of
https://github.com/intel/compute-runtime.git
synced 2026-01-11 16:45:25 +08:00
Add support for stateless fill buffer
Change-Id: I6dac17090e499f013916b1ba2f2b6d0de47f51a3 Signed-off-by: Kamil Kopryk <kamil.kopryk@intel.com> Related-To: NEO-3314
This commit is contained in:
@@ -34,6 +34,7 @@ set(RUNTIME_SRCS_BUILT_IN_KERNELS
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/kernels/copy_image_to_image2d.igdrcl_built_in
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/kernels/copy_image_to_image3d.igdrcl_built_in
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/kernels/fill_buffer.igdrcl_built_in
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/kernels/fill_buffer_stateless.igdrcl_built_in
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/kernels/fill_image1d.igdrcl_built_in
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/kernels/fill_image2d.igdrcl_built_in
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/kernels/fill_image3d.igdrcl_built_in
|
||||
|
||||
@@ -18,19 +18,20 @@ constexpr Type CopyBufferToBufferStateless{2};
|
||||
constexpr Type CopyBufferRect{3};
|
||||
constexpr Type CopyBufferRectStateless{4};
|
||||
constexpr Type FillBuffer{5};
|
||||
constexpr Type CopyBufferToImage3d{6};
|
||||
constexpr Type CopyImage3dToBuffer{7};
|
||||
constexpr Type CopyImageToImage1d{8};
|
||||
constexpr Type CopyImageToImage2d{9};
|
||||
constexpr Type CopyImageToImage3d{10};
|
||||
constexpr Type FillImage1d{11};
|
||||
constexpr Type FillImage2d{12};
|
||||
constexpr Type FillImage3d{13};
|
||||
constexpr Type VmeBlockMotionEstimateIntel{14};
|
||||
constexpr Type VmeBlockAdvancedMotionEstimateCheckIntel{15};
|
||||
constexpr Type VmeBlockAdvancedMotionEstimateBidirectionalCheckIntel{16};
|
||||
constexpr Type Scheduler{17};
|
||||
constexpr Type FillBufferStateless{6};
|
||||
constexpr Type CopyBufferToImage3d{7};
|
||||
constexpr Type CopyImage3dToBuffer{8};
|
||||
constexpr Type CopyImageToImage1d{9};
|
||||
constexpr Type CopyImageToImage2d{10};
|
||||
constexpr Type CopyImageToImage3d{11};
|
||||
constexpr Type FillImage1d{12};
|
||||
constexpr Type FillImage2d{13};
|
||||
constexpr Type FillImage3d{14};
|
||||
constexpr Type VmeBlockMotionEstimateIntel{15};
|
||||
constexpr Type VmeBlockAdvancedMotionEstimateCheckIntel{16};
|
||||
constexpr Type VmeBlockAdvancedMotionEstimateBidirectionalCheckIntel{17};
|
||||
constexpr Type Scheduler{18};
|
||||
|
||||
constexpr uint32_t MaxBaseValue{17};
|
||||
constexpr uint32_t MaxBaseValue{18};
|
||||
} // namespace EBuiltInOps
|
||||
} // namespace NEO
|
||||
|
||||
@@ -417,7 +417,7 @@ template <>
|
||||
class BuiltInOp<EBuiltInOps::FillBuffer> : public BuiltinDispatchInfoBuilder {
|
||||
public:
|
||||
BuiltInOp(BuiltIns &kernelsLib, Context &context, Device &device)
|
||||
: BuiltinDispatchInfoBuilder(kernelsLib), kernLeftLeftover(nullptr), kernMiddle(nullptr), kernRightLeftover(nullptr) {
|
||||
: BuiltinDispatchInfoBuilder(kernelsLib) {
|
||||
populate(context, device,
|
||||
EBuiltInOps::FillBuffer,
|
||||
"",
|
||||
@@ -484,9 +484,24 @@ class BuiltInOp<EBuiltInOps::FillBuffer> : public BuiltinDispatchInfoBuilder {
|
||||
}
|
||||
|
||||
protected:
|
||||
Kernel *kernLeftLeftover;
|
||||
Kernel *kernMiddle;
|
||||
Kernel *kernRightLeftover;
|
||||
Kernel *kernLeftLeftover = nullptr;
|
||||
Kernel *kernMiddle = nullptr;
|
||||
Kernel *kernRightLeftover = nullptr;
|
||||
|
||||
BuiltInOp(BuiltIns &kernelsLib) : BuiltinDispatchInfoBuilder(kernelsLib) {}
|
||||
};
|
||||
|
||||
template <>
|
||||
class BuiltInOp<EBuiltInOps::FillBufferStateless> : public BuiltInOp<EBuiltInOps::FillBuffer> {
|
||||
public:
|
||||
BuiltInOp(BuiltIns &kernelsLib, Context &context, Device &device) : BuiltInOp<EBuiltInOps::FillBuffer>(kernelsLib) {
|
||||
populate(context, device,
|
||||
EBuiltInOps::FillBufferStateless,
|
||||
"-cl-intel-greater-than-4GB-buffer-required",
|
||||
"FillBufferLeftLeftover", kernLeftLeftover,
|
||||
"FillBufferMiddle", kernMiddle,
|
||||
"FillBufferRightLeftover", kernRightLeftover);
|
||||
}
|
||||
};
|
||||
|
||||
template <>
|
||||
@@ -807,6 +822,9 @@ BuiltinDispatchInfoBuilder &BuiltIns::getBuiltinDispatchInfoBuilder(EBuiltInOps:
|
||||
case EBuiltInOps::FillBuffer:
|
||||
std::call_once(operationBuilder.second, [&] { operationBuilder.first = std::make_unique<BuiltInOp<EBuiltInOps::FillBuffer>>(*this, context, device); });
|
||||
break;
|
||||
case EBuiltInOps::FillBufferStateless:
|
||||
std::call_once(operationBuilder.second, [&] { operationBuilder.first = std::make_unique<BuiltInOp<EBuiltInOps::FillBufferStateless>>(*this, context, device); });
|
||||
break;
|
||||
case EBuiltInOps::CopyBufferToImage3d:
|
||||
std::call_once(operationBuilder.second, [&] { operationBuilder.first = std::make_unique<BuiltInOp<EBuiltInOps::CopyBufferToImage3d>>(*this, context, device); });
|
||||
break;
|
||||
|
||||
@@ -32,6 +32,8 @@ const char *getBuiltinAsString(EBuiltInOps::Type builtin) {
|
||||
return "copy_buffer_rect_stateless.igdrcl_built_in";
|
||||
case EBuiltInOps::FillBuffer:
|
||||
return "fill_buffer.igdrcl_built_in";
|
||||
case EBuiltInOps::FillBufferStateless:
|
||||
return "fill_buffer_stateless.igdrcl_built_in";
|
||||
case EBuiltInOps::CopyBufferToImage3d:
|
||||
return "copy_buffer_to_image3d.igdrcl_built_in";
|
||||
case EBuiltInOps::CopyImage3dToBuffer:
|
||||
|
||||
@@ -27,6 +27,7 @@ set(GENERATED_BUILTINS
|
||||
set(GENERATED_BUILTINS_STATELESS
|
||||
"copy_buffer_to_buffer_stateless"
|
||||
"copy_buffer_rect_stateless"
|
||||
"fill_buffer_stateless"
|
||||
)
|
||||
|
||||
# Generate builtins cpps
|
||||
|
||||
@@ -0,0 +1,49 @@
|
||||
/*
|
||||
* Copyright (C) 2019 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
R"===(
|
||||
// assumption is local work size = pattern size
|
||||
__kernel void FillBufferBytes(
|
||||
__global uchar* pDst,
|
||||
ulong dstOffsetInBytes,
|
||||
const __global uchar* pPattern )
|
||||
{
|
||||
size_t dstIndex = get_global_id(0) + dstOffsetInBytes;
|
||||
size_t srcIndex = get_local_id(0);
|
||||
pDst[dstIndex] = pPattern[srcIndex];
|
||||
}
|
||||
|
||||
__kernel void FillBufferLeftLeftover(
|
||||
__global uchar* pDst,
|
||||
ulong dstOffsetInBytes,
|
||||
const __global uchar* pPattern,
|
||||
const size_t patternSizeInEls )
|
||||
{
|
||||
size_t gid = get_global_id(0);
|
||||
pDst[ gid + dstOffsetInBytes ] = pPattern[ gid & (patternSizeInEls - 1) ];
|
||||
}
|
||||
|
||||
__kernel void FillBufferMiddle(
|
||||
__global uchar* pDst,
|
||||
ulong dstOffsetInBytes,
|
||||
const __global uint* pPattern,
|
||||
const ulong patternSizeInEls )
|
||||
{
|
||||
size_t gid = get_global_id(0);
|
||||
((__global uint*)(pDst + dstOffsetInBytes))[gid] = pPattern[ gid & (patternSizeInEls - 1) ];
|
||||
}
|
||||
|
||||
__kernel void FillBufferRightLeftover(
|
||||
__global uchar* pDst,
|
||||
ulong dstOffsetInBytes,
|
||||
const __global uchar* pPattern,
|
||||
const ulong patternSizeInEls )
|
||||
{
|
||||
size_t gid = get_global_id(0);
|
||||
pDst[ gid + dstOffsetInBytes ] = pPattern[ gid & (patternSizeInEls - 1) ];
|
||||
}
|
||||
)==="
|
||||
@@ -56,6 +56,15 @@ static RegisterEmbeddedResource registerFillBufferSrc(
|
||||
#include "runtime/built_ins/kernels/fill_buffer.igdrcl_built_in"
|
||||
));
|
||||
|
||||
static RegisterEmbeddedResource registerFillBufferStatelessSrc(
|
||||
createBuiltinResourceName(
|
||||
EBuiltInOps::FillBufferStateless,
|
||||
BuiltinCode::getExtension(BuiltinCode::ECodeType::Source))
|
||||
.c_str(),
|
||||
std::string(
|
||||
#include "runtime/built_ins/kernels/fill_buffer_stateless.igdrcl_built_in"
|
||||
));
|
||||
|
||||
static RegisterEmbeddedResource registerCopyBufferToImage3dSrc(
|
||||
createBuiltinResourceName(
|
||||
EBuiltInOps::CopyBufferToImage3d,
|
||||
|
||||
Reference in New Issue
Block a user