mirror of
https://github.com/intel/compute-runtime.git
synced 2025-12-26 23:33:20 +08:00
Add support for stateless copy buffer to buffer
Change-Id: I7ad19890ea1725bcecf17aee16abe1f993f5b08c Signed-off-by: Kamil Kopryk <kamil.kopryk@intel.com> Related-To: NEO-3314
This commit is contained in:
@@ -26,6 +26,7 @@ set(RUNTIME_SRCS_BUILT_IN_KERNELS
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/kernels/aux_translation.igdrcl_built_in
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/kernels/copy_buffer_rect.igdrcl_built_in
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/kernels/copy_buffer_to_buffer.igdrcl_built_in
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/kernels/copy_buffer_to_buffer_stateless.igdrcl_built_in
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/kernels/copy_buffer_to_image3d.igdrcl_built_in
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/kernels/copy_image3d_to_buffer.igdrcl_built_in
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/kernels/copy_image_to_image1d.igdrcl_built_in
|
||||
@@ -35,9 +36,9 @@ set(RUNTIME_SRCS_BUILT_IN_KERNELS
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/kernels/fill_image1d.igdrcl_built_in
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/kernels/fill_image2d.igdrcl_built_in
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/kernels/fill_image3d.igdrcl_built_in
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/kernels/vme_block_motion_estimate_intel.igdrcl_built_in
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/kernels/vme_block_advanced_motion_estimate_check_intel.igdrcl_built_in
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/kernels/vme_block_advanced_motion_estimate_bidirectional_check_intel.igdrcl_built_in
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/kernels/vme_block_advanced_motion_estimate_check_intel.igdrcl_built_in
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/kernels/vme_block_motion_estimate_intel.igdrcl_built_in
|
||||
)
|
||||
|
||||
target_sources(${NEO_STATIC_LIB_NAME} PRIVATE ${RUNTIME_SRCS_BUILT_IN_KERNELS})
|
||||
|
||||
@@ -14,20 +14,22 @@ using Type = uint32_t;
|
||||
|
||||
constexpr Type AuxTranslation{0};
|
||||
constexpr Type CopyBufferToBuffer{1};
|
||||
constexpr Type CopyBufferRect{2};
|
||||
constexpr Type FillBuffer{3};
|
||||
constexpr Type CopyBufferToImage3d{4};
|
||||
constexpr Type CopyImage3dToBuffer{5};
|
||||
constexpr Type CopyImageToImage1d{6};
|
||||
constexpr Type CopyImageToImage2d{7};
|
||||
constexpr Type CopyImageToImage3d{8};
|
||||
constexpr Type FillImage1d{9};
|
||||
constexpr Type FillImage2d{10};
|
||||
constexpr Type FillImage3d{11};
|
||||
constexpr Type VmeBlockMotionEstimateIntel{12};
|
||||
constexpr Type VmeBlockAdvancedMotionEstimateCheckIntel{13};
|
||||
constexpr Type VmeBlockAdvancedMotionEstimateBidirectionalCheckIntel{14};
|
||||
constexpr Type Scheduler{15};
|
||||
constexpr uint32_t MaxBaseValue{15};
|
||||
constexpr Type CopyBufferToBufferStateless{2};
|
||||
constexpr Type CopyBufferRect{3};
|
||||
constexpr Type FillBuffer{4};
|
||||
constexpr Type CopyBufferToImage3d{5};
|
||||
constexpr Type CopyImage3dToBuffer{6};
|
||||
constexpr Type CopyImageToImage1d{7};
|
||||
constexpr Type CopyImageToImage2d{8};
|
||||
constexpr Type CopyImageToImage3d{9};
|
||||
constexpr Type FillImage1d{10};
|
||||
constexpr Type FillImage2d{11};
|
||||
constexpr Type FillImage3d{12};
|
||||
constexpr Type VmeBlockMotionEstimateIntel{13};
|
||||
constexpr Type VmeBlockAdvancedMotionEstimateCheckIntel{14};
|
||||
constexpr Type VmeBlockAdvancedMotionEstimateBidirectionalCheckIntel{15};
|
||||
constexpr Type Scheduler{16};
|
||||
|
||||
constexpr uint32_t MaxBaseValue{16};
|
||||
} // namespace EBuiltInOps
|
||||
} // namespace NEO
|
||||
|
||||
@@ -200,7 +200,7 @@ template <>
|
||||
class BuiltInOp<EBuiltInOps::CopyBufferToBuffer> : public BuiltinDispatchInfoBuilder {
|
||||
public:
|
||||
BuiltInOp(BuiltIns &kernelsLib, Context &context, Device &device)
|
||||
: BuiltinDispatchInfoBuilder(kernelsLib), kernLeftLeftover(nullptr), kernMiddle(nullptr), kernRightLeftover(nullptr) {
|
||||
: BuiltinDispatchInfoBuilder(kernelsLib) {
|
||||
populate(context, device,
|
||||
EBuiltInOps::CopyBufferToBuffer,
|
||||
"",
|
||||
@@ -278,9 +278,26 @@ class BuiltInOp<EBuiltInOps::CopyBufferToBuffer> : public BuiltinDispatchInfoBui
|
||||
}
|
||||
|
||||
protected:
|
||||
Kernel *kernLeftLeftover;
|
||||
Kernel *kernMiddle;
|
||||
Kernel *kernRightLeftover;
|
||||
Kernel *kernLeftLeftover = nullptr;
|
||||
Kernel *kernMiddle = nullptr;
|
||||
Kernel *kernRightLeftover = nullptr;
|
||||
BuiltInOp(BuiltIns &kernelsLib)
|
||||
: BuiltinDispatchInfoBuilder(kernelsLib) {
|
||||
}
|
||||
};
|
||||
|
||||
template <>
|
||||
class BuiltInOp<EBuiltInOps::CopyBufferToBufferStateless> : public BuiltInOp<EBuiltInOps::CopyBufferToBuffer> {
|
||||
public:
|
||||
BuiltInOp(BuiltIns &kernelsLib, Context &context, Device &device)
|
||||
: BuiltInOp<EBuiltInOps::CopyBufferToBuffer>(kernelsLib) {
|
||||
populate(context, device,
|
||||
EBuiltInOps::CopyBufferToBufferStateless,
|
||||
"-cl-intel-greater-than-4GB-buffer-required",
|
||||
"CopyBufferToBufferLeftLeftover", kernLeftLeftover,
|
||||
"CopyBufferToBufferMiddle", kernMiddle,
|
||||
"CopyBufferToBufferRightLeftover", kernRightLeftover);
|
||||
}
|
||||
};
|
||||
|
||||
template <>
|
||||
@@ -763,6 +780,9 @@ BuiltinDispatchInfoBuilder &BuiltIns::getBuiltinDispatchInfoBuilder(EBuiltInOps:
|
||||
case EBuiltInOps::CopyBufferToBuffer:
|
||||
std::call_once(operationBuilder.second, [&] { operationBuilder.first = std::make_unique<BuiltInOp<EBuiltInOps::CopyBufferToBuffer>>(*this, context, device); });
|
||||
break;
|
||||
case EBuiltInOps::CopyBufferToBufferStateless:
|
||||
std::call_once(operationBuilder.second, [&] { operationBuilder.first = std::make_unique<BuiltInOp<EBuiltInOps::CopyBufferToBufferStateless>>(*this, context, device); });
|
||||
break;
|
||||
case EBuiltInOps::CopyBufferRect:
|
||||
std::call_once(operationBuilder.second, [&] { operationBuilder.first = std::make_unique<BuiltInOp<EBuiltInOps::CopyBufferRect>>(*this, context, device); });
|
||||
break;
|
||||
|
||||
@@ -24,6 +24,8 @@ const char *getBuiltinAsString(EBuiltInOps::Type builtin) {
|
||||
return "aux_translation.igdrcl_built_in";
|
||||
case EBuiltInOps::CopyBufferToBuffer:
|
||||
return "copy_buffer_to_buffer.igdrcl_built_in";
|
||||
case EBuiltInOps::CopyBufferToBufferStateless:
|
||||
return "copy_buffer_to_buffer_stateless.igdrcl_built_in";
|
||||
case EBuiltInOps::CopyBufferRect:
|
||||
return "copy_buffer_rect.igdrcl_built_in";
|
||||
case EBuiltInOps::FillBuffer:
|
||||
|
||||
@@ -24,6 +24,10 @@ set(GENERATED_BUILTINS
|
||||
"fill_image3d"
|
||||
)
|
||||
|
||||
set(GENERATED_BUILTINS_STATELESS
|
||||
"copy_buffer_to_buffer_stateless"
|
||||
)
|
||||
|
||||
# Generate builtins cpps
|
||||
if(COMPILE_BUILT_INS)
|
||||
add_subdirectory(kernels)
|
||||
@@ -35,6 +39,9 @@ macro(macro_for_each_gen)
|
||||
foreach(GENERATED_BUILTIN ${GENERATED_BUILTINS})
|
||||
list(APPEND GENERATED_BUILTINS_CPPS ${BUILTINS_INCLUDE_DIR}/${RUNTIME_GENERATED_${GENERATED_BUILTIN}_${family_name_with_type}})
|
||||
endforeach()
|
||||
foreach(GENERATED_BUILTIN_STATELESS ${GENERATED_BUILTINS_STATELESS})
|
||||
list(APPEND GENERATED_BUILTINS_CPPS ${BUILTINS_INCLUDE_DIR}/${RUNTIME_GENERATED_${GENERATED_BUILTIN_STATELESS}_${family_name_with_type}})
|
||||
endforeach()
|
||||
endforeach()
|
||||
source_group("generated files\\${GEN_TYPE_LOWER}" FILES ${GENERATED_BUILTINS_CPPS})
|
||||
endmacro()
|
||||
|
||||
@@ -10,6 +10,7 @@ set(BUILTINS_OUTDIR_WITH_ARCH "${TargetDir}/built_ins/${NEO_ARCH}")
|
||||
add_dependencies(${BUILTINS_BINARIES_LIB_NAME} builtins)
|
||||
add_subdirectories()
|
||||
set(GENERATED_BUILTINS ${GENERATED_BUILTINS} PARENT_SCOPE)
|
||||
set(GENERATED_BUILTINS_STATELESS ${GENERATED_BUILTINS_STATELESS} PARENT_SCOPE)
|
||||
|
||||
if("${NEO_ARCH}" STREQUAL "x32")
|
||||
set(BUILTIN_OPTIONS "-cl-intel-greater-than-4GB-buffer-required")
|
||||
@@ -17,6 +18,10 @@ else()
|
||||
set(BUILTIN_OPTIONS "")
|
||||
endif()
|
||||
|
||||
set(BUILTIN_OPTIONS_STATELESS
|
||||
"-cl-intel-greater-than-4GB-buffer-required"
|
||||
)
|
||||
|
||||
if("${CMAKE_BUILD_TYPE}" STREQUAL "Debug" )
|
||||
list(APPEND __cloc__options__ "-D DEBUG")
|
||||
endif()
|
||||
@@ -24,8 +29,17 @@ endif()
|
||||
set(BUILTINS_INCLUDE_DIR ${TargetDir} PARENT_SCOPE)
|
||||
set(BUILTIN_CPP "")
|
||||
|
||||
function(get_bits_for_stateless gen_type platform_type)
|
||||
# Force 32bits compiling on gen9lp for stateless builtins
|
||||
if( (${GEN_TYPE} STREQUAL "GEN9" ) AND ( ${PLATFORM_TYPE} STREQUAL "LP"))
|
||||
set(BITS "32" PARENT_SCOPE)
|
||||
else()
|
||||
set(BITS ${NEO_BITS} PARENT_SCOPE)
|
||||
endif()
|
||||
endfunction()
|
||||
|
||||
# Define function for compiling built-ins (with ocloc)
|
||||
function(compile_builtin gen_type platform_type builtin)
|
||||
function(compile_builtin gen_type platform_type builtin bits builtin_options)
|
||||
string(TOLOWER ${gen_type} gen_type_lower)
|
||||
get_family_name_with_type(${gen_type} ${platform_type})
|
||||
set(OUTPUTDIR "${BUILTINS_OUTDIR_WITH_ARCH}/${gen_type_lower}")
|
||||
@@ -58,7 +72,7 @@ function(compile_builtin gen_type platform_type builtin)
|
||||
list(APPEND __cloc__options__ "-cl-kernel-arg-info")
|
||||
add_custom_command(
|
||||
OUTPUT ${OUTPUT_FILES}
|
||||
COMMAND ${cloc_cmd_prefix} -q -file ${FILENAME} -device ${DEFAULT_SUPPORTED_${gen_type}_${platform_type}_PLATFORM} ${BUILTIN_OPTIONS} -${NEO_BITS} -out_dir ${OUTPUTDIR} -cpp_file -options "$<JOIN:${__cloc__options__}, >"
|
||||
COMMAND ${cloc_cmd_prefix} -q -file ${FILENAME} -device ${DEFAULT_SUPPORTED_${gen_type}_${platform_type}_PLATFORM} ${builtin_options} -${bits} -out_dir ${OUTPUTDIR} -cpp_file -options "$<JOIN:${__cloc__options__}, >"
|
||||
WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
|
||||
DEPENDS ${builtin} ocloc copy_compiler_files
|
||||
)
|
||||
@@ -71,11 +85,16 @@ macro(macro_for_each_gen)
|
||||
string(TOLOWER ${PLATFORM_TYPE} PLATFORM_TYPE_LOWER)
|
||||
unset(BUILTINS_COMMANDS)
|
||||
foreach(GENERATED_BUILTIN ${GENERATED_BUILTINS})
|
||||
compile_builtin(${GEN_TYPE} ${PLATFORM_TYPE} ${GENERATED_BUILTIN}.igdrcl_built_in)
|
||||
compile_builtin(${GEN_TYPE} ${PLATFORM_TYPE} ${GENERATED_BUILTIN}.igdrcl_built_in ${NEO_BITS} "${BUILTIN_OPTIONS}")
|
||||
list(APPEND BUILTINS_COMMANDS ${TargetDir}/${BUILTIN_CPP})
|
||||
set(RUNTIME_GENERATED_${GENERATED_BUILTIN}_${family_name_with_type} ${BUILTIN_CPP} PARENT_SCOPE)
|
||||
endforeach()
|
||||
|
||||
get_bits_for_stateless(${GEN_TYPE} ${PLATFORM_TYPE})
|
||||
foreach(GENERATED_BUILTIN_STATELESS ${GENERATED_BUILTINS_STATELESS})
|
||||
compile_builtin(${GEN_TYPE} ${PLATFORM_TYPE} ${GENERATED_BUILTIN_STATELESS}.igdrcl_built_in ${BITS} "${BUILTIN_OPTIONS_STATELESS}")
|
||||
list(APPEND BUILTINS_COMMANDS ${TargetDir}/${BUILTIN_CPP})
|
||||
set(RUNTIME_GENERATED_${GENERATED_BUILTIN_STATELESS}_${family_name_with_type} ${BUILTIN_CPP} PARENT_SCOPE)
|
||||
endforeach()
|
||||
set(target_name builtins_${family_name_with_type})
|
||||
add_custom_target(${target_name} DEPENDS ${BUILTINS_COMMANDS})
|
||||
add_dependencies(builtins ${target_name})
|
||||
|
||||
@@ -0,0 +1,54 @@
|
||||
/*
|
||||
* Copyright (C) 2019 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
R"===(
|
||||
__kernel void CopyBufferToBufferBytes(
|
||||
const __global uchar* pSrc,
|
||||
__global uchar* pDst,
|
||||
ulong srcOffsetInBytes,
|
||||
ulong dstOffsetInBytes,
|
||||
ulong bytesToRead )
|
||||
{
|
||||
pSrc += ( srcOffsetInBytes + get_global_id(0) );
|
||||
pDst += ( dstOffsetInBytes + get_global_id(0) );
|
||||
pDst[ 0 ] = pSrc[ 0 ];
|
||||
}
|
||||
|
||||
__kernel void CopyBufferToBufferLeftLeftover(
|
||||
const __global uchar* pSrc,
|
||||
__global uchar* pDst,
|
||||
ulong srcOffsetInBytes,
|
||||
ulong dstOffsetInBytes)
|
||||
{
|
||||
size_t gid = get_global_id(0);
|
||||
pDst[ gid + dstOffsetInBytes ] = pSrc[ gid + srcOffsetInBytes ];
|
||||
}
|
||||
|
||||
__kernel void CopyBufferToBufferMiddle(
|
||||
const __global uint* pSrc,
|
||||
__global uint* pDst,
|
||||
ulong srcOffsetInBytes,
|
||||
ulong dstOffsetInBytes)
|
||||
{
|
||||
size_t gid = get_global_id(0);
|
||||
pDst += dstOffsetInBytes >> 2;
|
||||
pSrc += srcOffsetInBytes >> 2;
|
||||
uint4 loaded = vload4(gid, pSrc);
|
||||
vstore4(loaded, gid, pDst);
|
||||
}
|
||||
|
||||
__kernel void CopyBufferToBufferRightLeftover(
|
||||
const __global uchar* pSrc,
|
||||
__global uchar* pDst,
|
||||
ulong srcOffsetInBytes,
|
||||
ulong dstOffsetInBytes)
|
||||
{
|
||||
size_t gid = get_global_id(0);
|
||||
pDst[ gid + dstOffsetInBytes ] = pSrc[ gid + srcOffsetInBytes ];
|
||||
}
|
||||
|
||||
)==="
|
||||
@@ -20,6 +20,15 @@ static RegisterEmbeddedResource registerCopyBufferToBufferSrc(
|
||||
#include "runtime/built_ins/kernels/copy_buffer_to_buffer.igdrcl_built_in"
|
||||
));
|
||||
|
||||
static RegisterEmbeddedResource registerCopyBufferToBufferStatelessSrc(
|
||||
createBuiltinResourceName(
|
||||
EBuiltInOps::CopyBufferToBufferStateless,
|
||||
BuiltinCode::getExtension(BuiltinCode::ECodeType::Source))
|
||||
.c_str(),
|
||||
std::string(
|
||||
#include "runtime/built_ins/kernels/copy_buffer_to_buffer_stateless.igdrcl_built_in"
|
||||
));
|
||||
|
||||
static RegisterEmbeddedResource registerCopyBufferRectSrc(
|
||||
createBuiltinResourceName(
|
||||
EBuiltInOps::CopyBufferRect,
|
||||
|
||||
@@ -395,6 +395,8 @@ class CommandQueueHw : public CommandQueue {
|
||||
MOCKABLE_VIRTUAL void dispatchAuxTranslation(MultiDispatchInfo &multiDispatchInfo, MemObjsForAuxTranslation &memObjsForAuxTranslation,
|
||||
AuxTranslationDirection auxTranslationDirection);
|
||||
|
||||
MOCKABLE_VIRTUAL bool forceStateless(size_t size);
|
||||
|
||||
template <uint32_t commandType>
|
||||
LinearStream *obtainCommandStream(const CsrDependencies &csrDependencies, bool blitEnqueue, bool blockedQueue,
|
||||
const MultiDispatchInfo &multiDispatchInfo, const EventsRequest &eventsRequest,
|
||||
|
||||
@@ -113,4 +113,10 @@ void CommandQueueHw<Family>::dispatchAuxTranslation(MultiDispatchInfo &multiDisp
|
||||
|
||||
auxTranslationBuilder.buildDispatchInfosForAuxTranslation<Family>(multiDispatchInfo, dispatchParams);
|
||||
}
|
||||
|
||||
template <typename Family>
|
||||
bool CommandQueueHw<Family>::forceStateless(size_t size) {
|
||||
return size >= 4ull * MemoryConstants::gigaByte;
|
||||
}
|
||||
|
||||
} // namespace NEO
|
||||
|
||||
@@ -32,9 +32,16 @@ cl_int CommandQueueHw<GfxFamily>::enqueueCopyBuffer(
|
||||
cl_event *event) {
|
||||
|
||||
MultiDispatchInfo dispatchInfo;
|
||||
auto eBuiltInOpsType = EBuiltInOps::CopyBufferToBuffer;
|
||||
|
||||
if (forceStateless(size)) {
|
||||
eBuiltInOpsType = EBuiltInOps::CopyBufferToBufferStateless;
|
||||
}
|
||||
|
||||
auto &builder = getDevice().getExecutionEnvironment()->getBuiltIns()->getBuiltinDispatchInfoBuilder(eBuiltInOpsType,
|
||||
this->getContext(),
|
||||
this->getDevice());
|
||||
|
||||
auto &builder = getDevice().getExecutionEnvironment()->getBuiltIns()->getBuiltinDispatchInfoBuilder(EBuiltInOps::CopyBufferToBuffer,
|
||||
this->getContext(), this->getDevice());
|
||||
BuiltInOwnershipWrapper builtInLock(builder, this->context);
|
||||
|
||||
BuiltinOpParams dc;
|
||||
|
||||
@@ -506,6 +506,7 @@ HWTEST_F(AUBSimpleKernelStatelessTest, givenSimpleKernelWhenStatelessPathIsUsedT
|
||||
}
|
||||
|
||||
EXPECT_FALSE(this->kernel->getKernelInfo().kernelArgInfo[0].pureStatefulBufferAccess);
|
||||
EXPECT_TRUE(this->kernel->getKernelInfo().patchInfo.executionEnvironment->CompiledForGreaterThan4GBBuffers);
|
||||
|
||||
this->pCmdQ->flush();
|
||||
expectMemory<FamilyType>(reinterpret_cast<void *>(pBuffer->getGraphicsAllocation()->getGpuAddress()),
|
||||
|
||||
@@ -8,6 +8,7 @@
|
||||
#include "core/helpers/ptr_math.h"
|
||||
#include "runtime/built_ins/built_ins.h"
|
||||
#include "runtime/built_ins/builtins_dispatch_builder.h"
|
||||
#include "runtime/command_queue/command_queue_hw.h"
|
||||
#include "runtime/helpers/dispatch_info.h"
|
||||
#include "runtime/kernel/kernel.h"
|
||||
#include "test.h"
|
||||
@@ -138,7 +139,7 @@ HWTEST_F(EnqueueCopyBufferTest, WhenCopyingBufferThenIndirectDataGetsAdded) {
|
||||
|
||||
BuiltinOpParams dc;
|
||||
dc.srcMemObj = srcBuffer;
|
||||
dc.srcMemObj = dstBuffer;
|
||||
dc.dstMemObj = dstBuffer;
|
||||
dc.srcOffset = {EnqueueCopyBufferTraits::srcOffset, 0, 0};
|
||||
dc.dstOffset = {EnqueueCopyBufferTraits::dstOffset, 0, 0};
|
||||
dc.size = {EnqueueCopyBufferTraits::size, 0, 0};
|
||||
@@ -154,6 +155,30 @@ HWTEST_F(EnqueueCopyBufferTest, WhenCopyingBufferThenIndirectDataGetsAdded) {
|
||||
}
|
||||
}
|
||||
|
||||
HWTEST_F(EnqueueCopyBufferTest, WhenCopyingBufferStatelessThenStatelessKernelIsUsed) {
|
||||
|
||||
auto srcBuffer = std::unique_ptr<Buffer>(BufferHelper<>::create());
|
||||
auto dstBuffer = std::unique_ptr<Buffer>(BufferHelper<>::create());
|
||||
|
||||
MultiDispatchInfo multiDispatchInfo;
|
||||
auto &builder = pDevice->getExecutionEnvironment()->getBuiltIns()->getBuiltinDispatchInfoBuilder(EBuiltInOps::CopyBufferToBufferStateless,
|
||||
pCmdQ->getContext(), pCmdQ->getDevice());
|
||||
|
||||
ASSERT_NE(nullptr, &builder);
|
||||
BuiltinOpParams dc;
|
||||
dc.srcMemObj = srcBuffer.get();
|
||||
dc.dstMemObj = dstBuffer.get();
|
||||
dc.srcOffset = {EnqueueCopyBufferTraits::srcOffset, 0, 0};
|
||||
dc.dstOffset = {EnqueueCopyBufferTraits::dstOffset, 0, 0};
|
||||
dc.size = {EnqueueCopyBufferTraits::size, 0, 0};
|
||||
builder.buildDispatchInfos(multiDispatchInfo, dc);
|
||||
EXPECT_NE(0u, multiDispatchInfo.size());
|
||||
|
||||
auto kernel = multiDispatchInfo.begin()->getKernel();
|
||||
EXPECT_TRUE(kernel->getKernelInfo().patchInfo.executionEnvironment->CompiledForGreaterThan4GBBuffers);
|
||||
EXPECT_FALSE(kernel->getKernelInfo().kernelArgInfo[0].pureStatefulBufferAccess);
|
||||
}
|
||||
|
||||
HWTEST_F(EnqueueCopyBufferTest, WhenCopyingBufferThenL3ProgrammingIsCorrect) {
|
||||
enqueueCopyBufferAndParse<FamilyType>();
|
||||
validateL3Programming<FamilyType>(cmdList, itorWalker);
|
||||
@@ -273,3 +298,83 @@ HWTEST_F(EnqueueCopyBufferTest, WhenCopyingBufferThenArgumentOneMatchesDestinati
|
||||
|
||||
EXPECT_EQ((void *)((uintptr_t)dstBuffer->getGraphicsAllocation()->getGpuAddress()), *pArgument);
|
||||
}
|
||||
|
||||
struct EnqueueCopyBufferHw : public ::testing::Test {
|
||||
template <typename FamilyType>
|
||||
struct MyCmdQStateless : public CommandQueueHw<FamilyType> {
|
||||
using CommandQueueHw<FamilyType>::commandStream;
|
||||
MyCmdQStateless(Context *context, Device *device) : CommandQueueHw<FamilyType>(context, device, nullptr){};
|
||||
|
||||
bool forceStateless(size_t size) override {
|
||||
return true;
|
||||
}
|
||||
};
|
||||
|
||||
template <typename FamilyType>
|
||||
struct MyCmdQStatefull : public CommandQueueHw<FamilyType> {
|
||||
using CommandQueueHw<FamilyType>::commandStream;
|
||||
MyCmdQStatefull(Context *context, Device *device) : CommandQueueHw<FamilyType>(context, device, nullptr){};
|
||||
|
||||
bool forceStateless(size_t size) override {
|
||||
return false;
|
||||
}
|
||||
};
|
||||
|
||||
void SetUp() override {
|
||||
device.reset(MockDevice::createWithNewExecutionEnvironment<MockDevice>(*platformDevices));
|
||||
context.reset(new MockContext(device.get()));
|
||||
|
||||
srcBuffer = std::unique_ptr<Buffer>(BufferHelper<>::create(context.get()));
|
||||
dstBuffer = std::unique_ptr<Buffer>(BufferHelper<>::create(context.get()));
|
||||
}
|
||||
|
||||
std::unique_ptr<MockDevice> device;
|
||||
std::unique_ptr<MockContext> context;
|
||||
std::unique_ptr<Buffer> srcBuffer;
|
||||
std::unique_ptr<Buffer> dstBuffer;
|
||||
};
|
||||
|
||||
using EnqueueCopyBufferStatelessTest = EnqueueCopyBufferHw;
|
||||
|
||||
HWTEST_F(EnqueueCopyBufferStatelessTest, givenBuffersWhenCopyingBufferStatelessThenSuccessIsReturned) {
|
||||
|
||||
if (is32bit) {
|
||||
GTEST_SKIP();
|
||||
}
|
||||
auto cmdQ = std::make_unique<MyCmdQStateless<FamilyType>>(context.get(), device.get());
|
||||
|
||||
auto retVal = cmdQ->enqueueCopyBuffer(
|
||||
srcBuffer.get(),
|
||||
dstBuffer.get(),
|
||||
0,
|
||||
0,
|
||||
sizeof(float),
|
||||
0,
|
||||
nullptr,
|
||||
nullptr);
|
||||
|
||||
EXPECT_EQ(CL_SUCCESS, retVal);
|
||||
}
|
||||
|
||||
using EnqueueCopyBufferStatefullTest = EnqueueCopyBufferHw;
|
||||
|
||||
HWTEST_F(EnqueueCopyBufferStatefullTest, givenBuffersWhenCopyingBufferStatefullThenSuccessIsReturned) {
|
||||
|
||||
if (is32bit) {
|
||||
GTEST_SKIP();
|
||||
}
|
||||
|
||||
auto cmdQ = std::make_unique<MyCmdQStatefull<FamilyType>>(context.get(), device.get());
|
||||
|
||||
auto retVal = cmdQ->enqueueCopyBuffer(
|
||||
srcBuffer.get(),
|
||||
dstBuffer.get(),
|
||||
0,
|
||||
0,
|
||||
sizeof(float),
|
||||
0,
|
||||
nullptr,
|
||||
nullptr);
|
||||
|
||||
EXPECT_EQ(CL_SUCCESS, retVal);
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user