From 403b49e987c375a689c0abbfc21c5fec6e70053e Mon Sep 17 00:00:00 2001 From: Szymon Morek Date: Fri, 2 Sep 2022 16:16:41 +0000 Subject: [PATCH] Append additional fill kernel for left leftover Related-To: NEO-7287 Signed-off-by: Szymon Morek --- .../source/builtin/builtin_functions_lib.h | 4 +- .../builtin/builtin_functions_lib_impl.cpp | 8 +-- level_zero/core/source/cmdlist/cmdlist_hw.h | 9 ++- level_zero/core/source/cmdlist/cmdlist_hw.inl | 62 ++++++++++++------- .../sources/cmdlist/test_cmdlist_fill.cpp | 62 ++++++++++++++++++- manifests/manifest.yml | 2 +- .../kernels/fill_buffer.builtin_kernel | 7 ++- .../fill_buffer_stateless.builtin_kernel | 7 ++- .../kernel_binary_helper_hash_value.cpp | 4 +- ...29276190336.cl => 11060315779005710265.cl} | 7 ++- ...s.txt => 11060315779005710265_options.txt} | 0 ...mages.cl => 7912693229131574095_images.cl} | 7 ++- ...=> 7912693229131574095_images_options.txt} | 0 .../common/test_files/builtin_copyfill.cl | 9 +-- .../test_files/builtin_copyfill_stateless.cl | 7 ++- .../test/common/test_files/builtin_images.cl | 9 +-- 16 files changed, 146 insertions(+), 58 deletions(-) rename shared/test/common/test_files/{11256751929276190336.cl => 11060315779005710265.cl} (98%) rename shared/test/common/test_files/{11256751929276190336_options.txt => 11060315779005710265_options.txt} (100%) rename shared/test/common/test_files/{13967618058110882853_images.cl => 7912693229131574095_images.cl} (99%) rename shared/test/common/test_files/{13967618058110882853_images_options.txt => 7912693229131574095_images_options.txt} (100%) diff --git a/level_zero/core/source/builtin/builtin_functions_lib.h b/level_zero/core/source/builtin/builtin_functions_lib.h index 432beb3f45..f50135c0d9 100644 --- a/level_zero/core/source/builtin/builtin_functions_lib.h +++ b/level_zero/core/source/builtin/builtin_functions_lib.h @@ -29,8 +29,8 @@ enum class Builtin : uint32_t { CopyBufferToBufferSideStateless, FillBufferImmediate, FillBufferImmediateStateless, - FillBufferImmediateRightLeftOver, - FillBufferImmediateRightLeftOverStateless, + FillBufferImmediateLeftOver, + FillBufferImmediateLeftOverStateless, FillBufferSSHOffset, FillBufferSSHOffsetStateless, FillBufferMiddle, diff --git a/level_zero/core/source/builtin/builtin_functions_lib_impl.cpp b/level_zero/core/source/builtin/builtin_functions_lib_impl.cpp index c36d958175..f99d051931 100644 --- a/level_zero/core/source/builtin/builtin_functions_lib_impl.cpp +++ b/level_zero/core/source/builtin/builtin_functions_lib_impl.cpp @@ -79,12 +79,12 @@ void BuiltinFunctionsLibImpl::initBuiltinKernel(Builtin func) { builtinName = "FillBufferImmediate"; builtin = NEO::EBuiltInOps::FillBufferStateless; break; - case Builtin::FillBufferImmediateRightLeftOver: - builtinName = "FillBufferImmediateRightLeftOver"; + case Builtin::FillBufferImmediateLeftOver: + builtinName = "FillBufferImmediateLeftOver"; builtin = NEO::EBuiltInOps::FillBuffer; break; - case Builtin::FillBufferImmediateRightLeftOverStateless: - builtinName = "FillBufferImmediateRightLeftOver"; + case Builtin::FillBufferImmediateLeftOverStateless: + builtinName = "FillBufferImmediateLeftOver"; builtin = NEO::EBuiltInOps::FillBufferStateless; break; case Builtin::FillBufferSSHOffset: diff --git a/level_zero/core/source/cmdlist/cmdlist_hw.h b/level_zero/core/source/cmdlist/cmdlist_hw.h index cf5a4d0bef..72ce67d039 100644 --- a/level_zero/core/source/cmdlist/cmdlist_hw.h +++ b/level_zero/core/source/cmdlist/cmdlist_hw.h @@ -228,6 +228,14 @@ struct CommandListCoreFamily : CommandListImp { const ze_group_count_t *threadGroupDimensions, Event *event, const CmdListKernelLaunchParams &launchParams); + + ze_result_t appendUnalignedFillKernel(bool isStateless, + uint32_t unalignedSize, + AlignedAllocationData dstAllocation, + const void *pattern, + Event *signalEvent, + CmdListKernelLaunchParams launchParams); + ze_result_t prepareIndirectParams(const ze_group_count_t *threadGroupDimensions); void updateStreamProperties(Kernel &kernel, bool isMultiOsContextCapable, bool isCooperative); void clearCommandsToPatch(); @@ -248,7 +256,6 @@ struct CommandListCoreFamily : CommandListImp { NEO::PipeControlArgs createBarrierFlags(); void appendMultiTileBarrier(NEO::Device &neoDevice); size_t estimateBufferSizeMultiTileBarrier(const NEO::HardwareInfo &hwInfo); - uint64_t getInputBufferSize(NEO::ImageType imageType, uint64_t bytesPerPixel, const ze_image_region_t *region); MOCKABLE_VIRTUAL AlignedAllocationData getAlignedAllocation(Device *device, const void *buffer, uint64_t bufferSize, bool hostCopyAllowed); ze_result_t addEventsToCmdList(uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents); diff --git a/level_zero/core/source/cmdlist/cmdlist_hw.inl b/level_zero/core/source/cmdlist/cmdlist_hw.inl index f5204ff56b..436ecdb403 100644 --- a/level_zero/core/source/cmdlist/cmdlist_hw.inl +++ b/level_zero/core/source/cmdlist/cmdlist_hw.inl @@ -1462,6 +1462,31 @@ ze_result_t CommandListCoreFamily::appendMemoryPrefetch(const voi return ZE_RESULT_ERROR_INVALID_ARGUMENT; } +template +ze_result_t CommandListCoreFamily::appendUnalignedFillKernel(bool isStateless, uint32_t unalignedSize, AlignedAllocationData dstAllocation, const void *pattern, Event *signalEvent, CmdListKernelLaunchParams launchParams) { + Kernel *builtinFunction = nullptr; + if (isStateless) { + builtinFunction = device->getBuiltinFunctionsLib()->getFunction(Builtin::FillBufferImmediateLeftOverStateless); + } else { + builtinFunction = device->getBuiltinFunctionsLib()->getFunction(Builtin::FillBufferImmediateLeftOver); + } + uint32_t groupSizeY = 1, groupSizeZ = 1; + uint32_t groupSizeX = static_cast(unalignedSize); + builtinFunction->suggestGroupSize(groupSizeX, groupSizeY, groupSizeZ, &groupSizeX, &groupSizeY, &groupSizeZ); + builtinFunction->setGroupSize(groupSizeX, groupSizeY, groupSizeZ); + ze_group_count_t dispatchFuncRemainderArgs{static_cast(unalignedSize / groupSizeX), 1u, 1u}; + uint32_t value = *(reinterpret_cast(pattern)); + builtinFunction->setArgBufferWithAlloc(0, dstAllocation.alignedAllocationPtr, dstAllocation.alloc); + builtinFunction->setArgumentValue(1, sizeof(dstAllocation.offset), &dstAllocation.offset); + builtinFunction->setArgumentValue(2, sizeof(value), &value); + + auto res = appendLaunchKernelSplit(builtinFunction, &dispatchFuncRemainderArgs, signalEvent, launchParams); + if (res) { + return res; + } + return ZE_RESULT_SUCCESS; +} + template ze_result_t CommandListCoreFamily::appendMemoryFill(void *ptr, const void *pattern, @@ -1524,8 +1549,17 @@ ze_result_t CommandListCoreFamily::appendMemoryFill(void *ptr, launchParams.isKernelSplitOperation = true; launchParams.isBuiltInKernel = true; launchParams.isDestinationAllocationInSystemMemory = hostPointerNeedsFlush; - if (patternSize == 1) { + size_t middleSize = size; + uint32_t leftRemainder = sizeof(uint32_t) - (dstAllocation.offset % sizeof(uint32_t)); + if (dstAllocation.offset % sizeof(uint32_t) != 0 && leftRemainder <= size) { + res = appendUnalignedFillKernel(isStateless, leftRemainder, dstAllocation, pattern, signalEvent, launchParams); + if (res) { + return res; + } + middleSize -= leftRemainder; + dstAllocation.offset += leftRemainder; + } Kernel *builtinFunction = nullptr; if (isStateless) { @@ -1534,7 +1568,7 @@ ze_result_t CommandListCoreFamily::appendMemoryFill(void *ptr, builtinFunction = device->getBuiltinFunctionsLib()->getFunction(Builtin::FillBufferImmediate); } const auto dataTypeSize = sizeof(uint32_t) * 4; - size_t adjustedSize = size / dataTypeSize; + size_t adjustedSize = middleSize / dataTypeSize; size_t groupSizeX = device->getDeviceInfo().maxWorkGroupSize; if (groupSizeX > adjustedSize && adjustedSize > 0) { groupSizeX = adjustedSize; @@ -1545,8 +1579,8 @@ ze_result_t CommandListCoreFamily::appendMemoryFill(void *ptr, } size_t groups = adjustedSize / groupSizeX; - size_t remainingBytes = static_cast((adjustedSize % groupSizeX) * dataTypeSize + - size % dataTypeSize); + uint32_t remainingBytes = static_cast((adjustedSize % groupSizeX) * dataTypeSize + + middleSize % dataTypeSize); ze_group_count_t dispatchFuncArgs{static_cast(groups), 1u, 1u}; uint32_t value = 0; @@ -1563,23 +1597,8 @@ ze_result_t CommandListCoreFamily::appendMemoryFill(void *ptr, } if (remainingBytes) { - if (isStateless) { - builtinFunction = device->getBuiltinFunctionsLib()->getFunction(Builtin::FillBufferImmediateRightLeftOverStateless); - } else { - builtinFunction = device->getBuiltinFunctionsLib()->getFunction(Builtin::FillBufferImmediateRightLeftOver); - } - uint32_t groupSizeY = 1, groupSizeZ = 1; - uint32_t groupSizeX = static_cast(remainingBytes); - builtinFunction->suggestGroupSize(groupSizeX, groupSizeY, groupSizeZ, &groupSizeX, &groupSizeY, &groupSizeZ); - builtinFunction->setGroupSize(groupSizeX, groupSizeY, groupSizeZ); - ze_group_count_t dispatchFuncRemainderArgs{static_cast(remainingBytes / groupSizeX), 1u, 1u}; - size_t dstOffset = dstAllocation.offset + (size - remainingBytes); - value = *(reinterpret_cast(pattern)); - builtinFunction->setArgBufferWithAlloc(0, dstAllocation.alignedAllocationPtr, dstAllocation.alloc); - builtinFunction->setArgumentValue(1, sizeof(dstOffset), &dstOffset); - builtinFunction->setArgumentValue(2, sizeof(value), &value); - - res = appendLaunchKernelSplit(builtinFunction, &dispatchFuncRemainderArgs, signalEvent, launchParams); + dstAllocation.offset += (middleSize - remainingBytes); + res = appendUnalignedFillKernel(isStateless, remainingBytes, dstAllocation, pattern, signalEvent, launchParams); if (res) { return res; } @@ -1628,7 +1647,6 @@ ze_result_t CommandListCoreFamily::appendMemoryFill(void *ptr, patternAllocOffset += patternSizeToCopy; } while (patternAllocOffset < patternAllocationSize); - builtinFunction->setArgBufferWithAlloc(0, dstAllocation.alignedAllocationPtr, dstAllocation.alloc); builtinFunction->setArgumentValue(1, sizeof(dstAllocation.offset), &dstAllocation.offset); builtinFunction->setArgBufferWithAlloc(2, reinterpret_cast(patternGfxAllocPtr), patternGfxAlloc); diff --git a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_fill.cpp b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_fill.cpp index 9a00478fa4..af4c25945a 100644 --- a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_fill.cpp +++ b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_fill.cpp @@ -53,7 +53,7 @@ class AppendFillFixture : public DeviceFixture { if (numberOfCallsToAppendLaunchKernelWithParams == thresholdOfCallsToAppendLaunchKernelWithParamsToFail) { return ZE_RESULT_ERROR_UNKNOWN; } - if (numberOfCallsToAppendLaunchKernelWithParams < 2) { + if (numberOfCallsToAppendLaunchKernelWithParams < 3) { threadGroupDimensions[numberOfCallsToAppendLaunchKernelWithParams] = *pThreadGroupDimensions; xGroupSizes[numberOfCallsToAppendLaunchKernelWithParams] = kernel->getGroupSize()[0]; } @@ -63,8 +63,8 @@ class AppendFillFixture : public DeviceFixture { event, launchParams); } - ze_group_count_t threadGroupDimensions[2]; - uint32_t xGroupSizes[2]; + ze_group_count_t threadGroupDimensions[3]; + uint32_t xGroupSizes[3]; uint32_t thresholdOfCallsToAppendLaunchKernelWithParamsToFail = std::numeric_limits::max(); uint32_t numberOfCallsToAppendLaunchKernelWithParams = 0; }; @@ -281,6 +281,62 @@ HWTEST2_F(AppendFillTest, delete[] ptr; } +HWTEST2_F(AppendFillTest, + givenAppendMemoryFillWhenPtrWithOffsetAndPatternSizeIsOneThenThreeKernelsDispatched, IsAtLeastSkl) { + using GfxFamily = typename NEO::GfxFamilyMapper::GfxFamily; + + auto commandList = std::make_unique>>(); + commandList->initialize(device, NEO::EngineGroupType::Compute, 0u); + int pattern = 0; + uint32_t offset = 1; + const size_t size = 1024; + uint8_t *ptr = new uint8_t[size]; + ze_result_t result = commandList->appendMemoryFill(ptr + offset, &pattern, 1, size - offset, nullptr, 0, nullptr); + size_t filledSize = commandList->xGroupSizes[0] * commandList->threadGroupDimensions[0].groupCountX; + filledSize += commandList->xGroupSizes[1] * commandList->threadGroupDimensions[1].groupCountX * 16; + filledSize += commandList->xGroupSizes[2] * commandList->threadGroupDimensions[2].groupCountX; + EXPECT_EQ(sizeof(uint32_t) - offset, commandList->xGroupSizes[0]); + EXPECT_EQ(ZE_RESULT_SUCCESS, result); + EXPECT_EQ(3u, commandList->numberOfCallsToAppendLaunchKernelWithParams); + EXPECT_EQ(size - offset, filledSize); + delete[] ptr; +} + +HWTEST2_F(AppendFillTest, + givenAppendMemoryFillWhenPtrWithOffsetAndSmallSizeAndPatternSizeIsOneThenTwoKernelsDispatched, IsAtLeastSkl) { + using GfxFamily = typename NEO::GfxFamilyMapper::GfxFamily; + + auto commandList = std::make_unique>>(); + commandList->initialize(device, NEO::EngineGroupType::Compute, 0u); + int pattern = 0; + uint32_t offset = 1; + const size_t size = 2; + uint8_t *ptr = new uint8_t[size]; + ze_result_t result = commandList->appendMemoryFill(ptr + offset, &pattern, 1, size - offset, nullptr, 0, nullptr); + size_t filledSize = commandList->xGroupSizes[0] * commandList->threadGroupDimensions[0].groupCountX * 16; + filledSize += commandList->xGroupSizes[1] * commandList->threadGroupDimensions[1].groupCountX; + EXPECT_EQ(ZE_RESULT_SUCCESS, result); + EXPECT_EQ(2u, commandList->numberOfCallsToAppendLaunchKernelWithParams); + EXPECT_EQ(size - offset, filledSize); + delete[] ptr; +} + +HWTEST2_F(AppendFillTest, + givenAppendMemoryFillWhenPtrWithOffsetAndFailAppendUnalignedFillKernelThenReturnError, IsAtLeastSkl) { + using GfxFamily = typename NEO::GfxFamilyMapper::GfxFamily; + + auto commandList = std::make_unique>>(); + commandList->thresholdOfCallsToAppendLaunchKernelWithParamsToFail = 0; + commandList->initialize(device, NEO::EngineGroupType::Compute, 0u); + int pattern = 0; + uint32_t offset = 1; + const size_t size = 1024; + uint8_t *ptr = new uint8_t[size]; + ze_result_t result = commandList->appendMemoryFill(ptr + offset, &pattern, 1, size - offset, nullptr, 0, nullptr); + EXPECT_NE(ZE_RESULT_SUCCESS, result); + delete[] ptr; +} + HWTEST2_F(AppendFillTest, givenCallToAppendMemoryFillWithSizeNotMultipleOfPatternSizeThenSuccessIsReturned, IsAtLeastSkl) { using GfxFamily = typename NEO::GfxFamilyMapper::GfxFamily; diff --git a/manifests/manifest.yml b/manifests/manifest.yml index f23d8392f5..2b0f3b4207 100644 --- a/manifests/manifest.yml +++ b/manifests/manifest.yml @@ -38,7 +38,7 @@ components: dest_dir: kernels_bin type: git branch: kernels_bin - revision: 1941-284 + revision: 1941-290 kmdaf: branch: kmdaf dest_dir: kmdaf diff --git a/shared/source/built_ins/kernels/fill_buffer.builtin_kernel b/shared/source/built_ins/kernels/fill_buffer.builtin_kernel index 7690e01b41..6b5efc602d 100644 --- a/shared/source/built_ins/kernels/fill_buffer.builtin_kernel +++ b/shared/source/built_ins/kernels/fill_buffer.builtin_kernel @@ -48,15 +48,16 @@ __kernel void FillBufferRightLeftover( } __kernel void FillBufferImmediate( - __global uint4* ptr, + __global uchar* ptr, ulong dstSshOffset, // Offset needed in case ptr has been adjusted for SSH alignment const uint value) { uint gid = get_global_id(0); - (ptr + dstSshOffset)[gid] = value; + __global uint4* dstPtr = (__global uint4*)(ptr + dstSshOffset); + dstPtr[gid] = value; } -__kernel void FillBufferImmediateRightLeftOver( +__kernel void FillBufferImmediateLeftOver( __global uchar* ptr, ulong dstSshOffset, // Offset needed in case ptr has been adjusted for SSH alignment const uint value) diff --git a/shared/source/built_ins/kernels/fill_buffer_stateless.builtin_kernel b/shared/source/built_ins/kernels/fill_buffer_stateless.builtin_kernel index 8b967bf8e5..358de60781 100644 --- a/shared/source/built_ins/kernels/fill_buffer_stateless.builtin_kernel +++ b/shared/source/built_ins/kernels/fill_buffer_stateless.builtin_kernel @@ -48,15 +48,16 @@ __kernel void FillBufferRightLeftover( } __kernel void FillBufferImmediate( - __global uint4* ptr, + __global uchar* ptr, ulong dstSshOffset, // Offset needed in case ptr has been adjusted for SSH alignment const uint value) { size_t gid = get_global_id(0); - (ptr + dstSshOffset)[gid] = value; + __global uint4* dstPtr = (__global uint4*)(ptr + dstSshOffset); + dstPtr[gid] = value; } -__kernel void FillBufferImmediateRightLeftOver( +__kernel void FillBufferImmediateLeftOver( __global uchar* ptr, ulong dstSshOffset, // Offset needed in case ptr has been adjusted for SSH alignment const uint value) diff --git a/shared/test/common/helpers/kernel_binary_helper_hash_value.cpp b/shared/test/common/helpers/kernel_binary_helper_hash_value.cpp index 624e690ab0..291d4b45d5 100644 --- a/shared/test/common/helpers/kernel_binary_helper_hash_value.cpp +++ b/shared/test/common/helpers/kernel_binary_helper_hash_value.cpp @@ -7,5 +7,5 @@ #include "shared/test/common/helpers/kernel_binary_helper.h" -const std::string KernelBinaryHelper::BUILT_INS("11256751929276190336"); -const std::string KernelBinaryHelper::BUILT_INS_WITH_IMAGES("13967618058110882853_images"); +const std::string KernelBinaryHelper::BUILT_INS("11060315779005710265"); +const std::string KernelBinaryHelper::BUILT_INS_WITH_IMAGES("7912693229131574095_images"); diff --git a/shared/test/common/test_files/11256751929276190336.cl b/shared/test/common/test_files/11060315779005710265.cl similarity index 98% rename from shared/test/common/test_files/11256751929276190336.cl rename to shared/test/common/test_files/11060315779005710265.cl index 208ba372e6..613762afec 100644 --- a/shared/test/common/test_files/11256751929276190336.cl +++ b/shared/test/common/test_files/11060315779005710265.cl @@ -156,15 +156,16 @@ __kernel void FillBufferRightLeftover( } __kernel void FillBufferImmediate( - __global uint4* ptr, + __global uchar* ptr, ulong dstSshOffset, // Offset needed in case ptr has been adjusted for SSH alignment const uint value) { uint gid = get_global_id(0); - (ptr + dstSshOffset)[gid] = value; + __global uint4* dstPtr = (__global uint4*)(ptr + dstSshOffset); + dstPtr[gid] = value; } -__kernel void FillBufferImmediateRightLeftOver( +__kernel void FillBufferImmediateLeftOver( __global uchar* ptr, ulong dstSshOffset, // Offset needed in case ptr has been adjusted for SSH alignment const uint value) diff --git a/shared/test/common/test_files/11256751929276190336_options.txt b/shared/test/common/test_files/11060315779005710265_options.txt similarity index 100% rename from shared/test/common/test_files/11256751929276190336_options.txt rename to shared/test/common/test_files/11060315779005710265_options.txt diff --git a/shared/test/common/test_files/13967618058110882853_images.cl b/shared/test/common/test_files/7912693229131574095_images.cl similarity index 99% rename from shared/test/common/test_files/13967618058110882853_images.cl rename to shared/test/common/test_files/7912693229131574095_images.cl index 57025cbfc1..0b819298fd 100644 --- a/shared/test/common/test_files/13967618058110882853_images.cl +++ b/shared/test/common/test_files/7912693229131574095_images.cl @@ -156,15 +156,16 @@ __kernel void FillBufferRightLeftover( } __kernel void FillBufferImmediate( - __global uint4* ptr, + __global uchar* ptr, ulong dstSshOffset, // Offset needed in case ptr has been adjusted for SSH alignment const uint value) { uint gid = get_global_id(0); - (ptr + dstSshOffset)[gid] = value; + __global uint4* dstPtr = (__global uint4*)(ptr + dstSshOffset); + dstPtr[gid] = value; } -__kernel void FillBufferImmediateRightLeftOver( +__kernel void FillBufferImmediateLeftOver( __global uchar* ptr, ulong dstSshOffset, // Offset needed in case ptr has been adjusted for SSH alignment const uint value) diff --git a/shared/test/common/test_files/13967618058110882853_images_options.txt b/shared/test/common/test_files/7912693229131574095_images_options.txt similarity index 100% rename from shared/test/common/test_files/13967618058110882853_images_options.txt rename to shared/test/common/test_files/7912693229131574095_images_options.txt diff --git a/shared/test/common/test_files/builtin_copyfill.cl b/shared/test/common/test_files/builtin_copyfill.cl index ca4a73808f..f2521b782f 100644 --- a/shared/test/common/test_files/builtin_copyfill.cl +++ b/shared/test/common/test_files/builtin_copyfill.cl @@ -156,15 +156,16 @@ __kernel void FillBufferRightLeftover( } __kernel void FillBufferImmediate( - __global uint4* ptr, - uint dstSshOffset, // Offset needed in case ptr has been adjusted for SSH alignment + __global uchar* ptr, + ulong dstSshOffset, // Offset needed in case ptr has been adjusted for SSH alignment const uint value) { uint gid = get_global_id(0); - (ptr + dstSshOffset)[gid] = value; + __global uint4* dstPtr = (__global uint4*)(ptr + dstSshOffset); + dstPtr[gid] = value; } -__kernel void FillBufferImmediateRightLeftOver( +__kernel void FillBufferImmediateLeftOver( __global uchar* ptr, uint dstSshOffset, // Offset needed in case ptr has been adjusted for SSH alignment const uint value) diff --git a/shared/test/common/test_files/builtin_copyfill_stateless.cl b/shared/test/common/test_files/builtin_copyfill_stateless.cl index 9296207d4f..cd344392be 100644 --- a/shared/test/common/test_files/builtin_copyfill_stateless.cl +++ b/shared/test/common/test_files/builtin_copyfill_stateless.cl @@ -156,15 +156,16 @@ __kernel void FillBufferRightLeftover( } __kernel void FillBufferImmediate( - __global uint4* ptr, + __global uchar* ptr, ulong dstSshOffset, // Offset needed in case ptr has been adjusted for SSH alignment const uint value) { uint gid = get_global_id(0); - (ptr + dstSshOffset)[gid] = value; + __global uint4* dstPtr = (__global uint4*)(ptr + dstSshOffset); + dstPtr[gid] = value; } -__kernel void FillBufferImmediateRightLeftOver( +__kernel void FillBufferImmediateLeftOver( __global uchar* ptr, ulong dstSshOffset, // Offset needed in case ptr has been adjusted for SSH alignment const uint value) diff --git a/shared/test/common/test_files/builtin_images.cl b/shared/test/common/test_files/builtin_images.cl index 213848fb98..fb4240fac2 100644 --- a/shared/test/common/test_files/builtin_images.cl +++ b/shared/test/common/test_files/builtin_images.cl @@ -156,15 +156,16 @@ __kernel void FillBufferRightLeftover( } __kernel void FillBufferImmediate( - __global uint4* ptr, - uint dstSshOffset, // Offset needed in case ptr has been adjusted for SSH alignment + __global uchar* ptr, + ulong dstSshOffset, // Offset needed in case ptr has been adjusted for SSH alignment const uint value) { uint gid = get_global_id(0); - (ptr + dstSshOffset)[gid] = value; + __global uint4* dstPtr = (__global uint4*)(ptr + dstSshOffset); + dstPtr[gid] = value; } -__kernel void FillBufferImmediateRightLeftOver( +__kernel void FillBufferImmediateLeftOver( __global uchar* ptr, uint dstSshOffset, // Offset needed in case ptr has been adjusted for SSH alignment const uint value)