mirror of
https://github.com/intel/compute-runtime.git
synced 2025-09-10 12:53:42 +08:00
Append additional fill kernel for left leftover
Related-To: NEO-7287 Signed-off-by: Szymon Morek <szymon.morek@intel.com>
This commit is contained in:

committed by
Compute-Runtime-Automation

parent
992e8e7c96
commit
403b49e987
@ -29,8 +29,8 @@ enum class Builtin : uint32_t {
|
||||
CopyBufferToBufferSideStateless,
|
||||
FillBufferImmediate,
|
||||
FillBufferImmediateStateless,
|
||||
FillBufferImmediateRightLeftOver,
|
||||
FillBufferImmediateRightLeftOverStateless,
|
||||
FillBufferImmediateLeftOver,
|
||||
FillBufferImmediateLeftOverStateless,
|
||||
FillBufferSSHOffset,
|
||||
FillBufferSSHOffsetStateless,
|
||||
FillBufferMiddle,
|
||||
|
@ -79,12 +79,12 @@ void BuiltinFunctionsLibImpl::initBuiltinKernel(Builtin func) {
|
||||
builtinName = "FillBufferImmediate";
|
||||
builtin = NEO::EBuiltInOps::FillBufferStateless;
|
||||
break;
|
||||
case Builtin::FillBufferImmediateRightLeftOver:
|
||||
builtinName = "FillBufferImmediateRightLeftOver";
|
||||
case Builtin::FillBufferImmediateLeftOver:
|
||||
builtinName = "FillBufferImmediateLeftOver";
|
||||
builtin = NEO::EBuiltInOps::FillBuffer;
|
||||
break;
|
||||
case Builtin::FillBufferImmediateRightLeftOverStateless:
|
||||
builtinName = "FillBufferImmediateRightLeftOver";
|
||||
case Builtin::FillBufferImmediateLeftOverStateless:
|
||||
builtinName = "FillBufferImmediateLeftOver";
|
||||
builtin = NEO::EBuiltInOps::FillBufferStateless;
|
||||
break;
|
||||
case Builtin::FillBufferSSHOffset:
|
||||
|
@ -228,6 +228,14 @@ struct CommandListCoreFamily : CommandListImp {
|
||||
const ze_group_count_t *threadGroupDimensions,
|
||||
Event *event,
|
||||
const CmdListKernelLaunchParams &launchParams);
|
||||
|
||||
ze_result_t appendUnalignedFillKernel(bool isStateless,
|
||||
uint32_t unalignedSize,
|
||||
AlignedAllocationData dstAllocation,
|
||||
const void *pattern,
|
||||
Event *signalEvent,
|
||||
CmdListKernelLaunchParams launchParams);
|
||||
|
||||
ze_result_t prepareIndirectParams(const ze_group_count_t *threadGroupDimensions);
|
||||
void updateStreamProperties(Kernel &kernel, bool isMultiOsContextCapable, bool isCooperative);
|
||||
void clearCommandsToPatch();
|
||||
@ -248,7 +256,6 @@ struct CommandListCoreFamily : CommandListImp {
|
||||
NEO::PipeControlArgs createBarrierFlags();
|
||||
void appendMultiTileBarrier(NEO::Device &neoDevice);
|
||||
size_t estimateBufferSizeMultiTileBarrier(const NEO::HardwareInfo &hwInfo);
|
||||
|
||||
uint64_t getInputBufferSize(NEO::ImageType imageType, uint64_t bytesPerPixel, const ze_image_region_t *region);
|
||||
MOCKABLE_VIRTUAL AlignedAllocationData getAlignedAllocation(Device *device, const void *buffer, uint64_t bufferSize, bool hostCopyAllowed);
|
||||
ze_result_t addEventsToCmdList(uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents);
|
||||
|
@ -1462,6 +1462,31 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryPrefetch(const voi
|
||||
return ZE_RESULT_ERROR_INVALID_ARGUMENT;
|
||||
}
|
||||
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendUnalignedFillKernel(bool isStateless, uint32_t unalignedSize, AlignedAllocationData dstAllocation, const void *pattern, Event *signalEvent, CmdListKernelLaunchParams launchParams) {
|
||||
Kernel *builtinFunction = nullptr;
|
||||
if (isStateless) {
|
||||
builtinFunction = device->getBuiltinFunctionsLib()->getFunction(Builtin::FillBufferImmediateLeftOverStateless);
|
||||
} else {
|
||||
builtinFunction = device->getBuiltinFunctionsLib()->getFunction(Builtin::FillBufferImmediateLeftOver);
|
||||
}
|
||||
uint32_t groupSizeY = 1, groupSizeZ = 1;
|
||||
uint32_t groupSizeX = static_cast<uint32_t>(unalignedSize);
|
||||
builtinFunction->suggestGroupSize(groupSizeX, groupSizeY, groupSizeZ, &groupSizeX, &groupSizeY, &groupSizeZ);
|
||||
builtinFunction->setGroupSize(groupSizeX, groupSizeY, groupSizeZ);
|
||||
ze_group_count_t dispatchFuncRemainderArgs{static_cast<uint32_t>(unalignedSize / groupSizeX), 1u, 1u};
|
||||
uint32_t value = *(reinterpret_cast<const unsigned char *>(pattern));
|
||||
builtinFunction->setArgBufferWithAlloc(0, dstAllocation.alignedAllocationPtr, dstAllocation.alloc);
|
||||
builtinFunction->setArgumentValue(1, sizeof(dstAllocation.offset), &dstAllocation.offset);
|
||||
builtinFunction->setArgumentValue(2, sizeof(value), &value);
|
||||
|
||||
auto res = appendLaunchKernelSplit(builtinFunction, &dispatchFuncRemainderArgs, signalEvent, launchParams);
|
||||
if (res) {
|
||||
return res;
|
||||
}
|
||||
return ZE_RESULT_SUCCESS;
|
||||
}
|
||||
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryFill(void *ptr,
|
||||
const void *pattern,
|
||||
@ -1524,8 +1549,17 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryFill(void *ptr,
|
||||
launchParams.isKernelSplitOperation = true;
|
||||
launchParams.isBuiltInKernel = true;
|
||||
launchParams.isDestinationAllocationInSystemMemory = hostPointerNeedsFlush;
|
||||
|
||||
if (patternSize == 1) {
|
||||
size_t middleSize = size;
|
||||
uint32_t leftRemainder = sizeof(uint32_t) - (dstAllocation.offset % sizeof(uint32_t));
|
||||
if (dstAllocation.offset % sizeof(uint32_t) != 0 && leftRemainder <= size) {
|
||||
res = appendUnalignedFillKernel(isStateless, leftRemainder, dstAllocation, pattern, signalEvent, launchParams);
|
||||
if (res) {
|
||||
return res;
|
||||
}
|
||||
middleSize -= leftRemainder;
|
||||
dstAllocation.offset += leftRemainder;
|
||||
}
|
||||
Kernel *builtinFunction = nullptr;
|
||||
|
||||
if (isStateless) {
|
||||
@ -1534,7 +1568,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryFill(void *ptr,
|
||||
builtinFunction = device->getBuiltinFunctionsLib()->getFunction(Builtin::FillBufferImmediate);
|
||||
}
|
||||
const auto dataTypeSize = sizeof(uint32_t) * 4;
|
||||
size_t adjustedSize = size / dataTypeSize;
|
||||
size_t adjustedSize = middleSize / dataTypeSize;
|
||||
size_t groupSizeX = device->getDeviceInfo().maxWorkGroupSize;
|
||||
if (groupSizeX > adjustedSize && adjustedSize > 0) {
|
||||
groupSizeX = adjustedSize;
|
||||
@ -1545,8 +1579,8 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryFill(void *ptr,
|
||||
}
|
||||
|
||||
size_t groups = adjustedSize / groupSizeX;
|
||||
size_t remainingBytes = static_cast<size_t>((adjustedSize % groupSizeX) * dataTypeSize +
|
||||
size % dataTypeSize);
|
||||
uint32_t remainingBytes = static_cast<uint32_t>((adjustedSize % groupSizeX) * dataTypeSize +
|
||||
middleSize % dataTypeSize);
|
||||
ze_group_count_t dispatchFuncArgs{static_cast<uint32_t>(groups), 1u, 1u};
|
||||
|
||||
uint32_t value = 0;
|
||||
@ -1563,23 +1597,8 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryFill(void *ptr,
|
||||
}
|
||||
|
||||
if (remainingBytes) {
|
||||
if (isStateless) {
|
||||
builtinFunction = device->getBuiltinFunctionsLib()->getFunction(Builtin::FillBufferImmediateRightLeftOverStateless);
|
||||
} else {
|
||||
builtinFunction = device->getBuiltinFunctionsLib()->getFunction(Builtin::FillBufferImmediateRightLeftOver);
|
||||
}
|
||||
uint32_t groupSizeY = 1, groupSizeZ = 1;
|
||||
uint32_t groupSizeX = static_cast<uint32_t>(remainingBytes);
|
||||
builtinFunction->suggestGroupSize(groupSizeX, groupSizeY, groupSizeZ, &groupSizeX, &groupSizeY, &groupSizeZ);
|
||||
builtinFunction->setGroupSize(groupSizeX, groupSizeY, groupSizeZ);
|
||||
ze_group_count_t dispatchFuncRemainderArgs{static_cast<uint32_t>(remainingBytes / groupSizeX), 1u, 1u};
|
||||
size_t dstOffset = dstAllocation.offset + (size - remainingBytes);
|
||||
value = *(reinterpret_cast<const unsigned char *>(pattern));
|
||||
builtinFunction->setArgBufferWithAlloc(0, dstAllocation.alignedAllocationPtr, dstAllocation.alloc);
|
||||
builtinFunction->setArgumentValue(1, sizeof(dstOffset), &dstOffset);
|
||||
builtinFunction->setArgumentValue(2, sizeof(value), &value);
|
||||
|
||||
res = appendLaunchKernelSplit(builtinFunction, &dispatchFuncRemainderArgs, signalEvent, launchParams);
|
||||
dstAllocation.offset += (middleSize - remainingBytes);
|
||||
res = appendUnalignedFillKernel(isStateless, remainingBytes, dstAllocation, pattern, signalEvent, launchParams);
|
||||
if (res) {
|
||||
return res;
|
||||
}
|
||||
@ -1628,7 +1647,6 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryFill(void *ptr,
|
||||
|
||||
patternAllocOffset += patternSizeToCopy;
|
||||
} while (patternAllocOffset < patternAllocationSize);
|
||||
|
||||
builtinFunction->setArgBufferWithAlloc(0, dstAllocation.alignedAllocationPtr, dstAllocation.alloc);
|
||||
builtinFunction->setArgumentValue(1, sizeof(dstAllocation.offset), &dstAllocation.offset);
|
||||
builtinFunction->setArgBufferWithAlloc(2, reinterpret_cast<uintptr_t>(patternGfxAllocPtr), patternGfxAlloc);
|
||||
|
@ -53,7 +53,7 @@ class AppendFillFixture : public DeviceFixture {
|
||||
if (numberOfCallsToAppendLaunchKernelWithParams == thresholdOfCallsToAppendLaunchKernelWithParamsToFail) {
|
||||
return ZE_RESULT_ERROR_UNKNOWN;
|
||||
}
|
||||
if (numberOfCallsToAppendLaunchKernelWithParams < 2) {
|
||||
if (numberOfCallsToAppendLaunchKernelWithParams < 3) {
|
||||
threadGroupDimensions[numberOfCallsToAppendLaunchKernelWithParams] = *pThreadGroupDimensions;
|
||||
xGroupSizes[numberOfCallsToAppendLaunchKernelWithParams] = kernel->getGroupSize()[0];
|
||||
}
|
||||
@ -63,8 +63,8 @@ class AppendFillFixture : public DeviceFixture {
|
||||
event,
|
||||
launchParams);
|
||||
}
|
||||
ze_group_count_t threadGroupDimensions[2];
|
||||
uint32_t xGroupSizes[2];
|
||||
ze_group_count_t threadGroupDimensions[3];
|
||||
uint32_t xGroupSizes[3];
|
||||
uint32_t thresholdOfCallsToAppendLaunchKernelWithParamsToFail = std::numeric_limits<uint32_t>::max();
|
||||
uint32_t numberOfCallsToAppendLaunchKernelWithParams = 0;
|
||||
};
|
||||
@ -281,6 +281,62 @@ HWTEST2_F(AppendFillTest,
|
||||
delete[] ptr;
|
||||
}
|
||||
|
||||
HWTEST2_F(AppendFillTest,
|
||||
givenAppendMemoryFillWhenPtrWithOffsetAndPatternSizeIsOneThenThreeKernelsDispatched, IsAtLeastSkl) {
|
||||
using GfxFamily = typename NEO::GfxFamilyMapper<gfxCoreFamily>::GfxFamily;
|
||||
|
||||
auto commandList = std::make_unique<WhiteBox<MockCommandList<gfxCoreFamily>>>();
|
||||
commandList->initialize(device, NEO::EngineGroupType::Compute, 0u);
|
||||
int pattern = 0;
|
||||
uint32_t offset = 1;
|
||||
const size_t size = 1024;
|
||||
uint8_t *ptr = new uint8_t[size];
|
||||
ze_result_t result = commandList->appendMemoryFill(ptr + offset, &pattern, 1, size - offset, nullptr, 0, nullptr);
|
||||
size_t filledSize = commandList->xGroupSizes[0] * commandList->threadGroupDimensions[0].groupCountX;
|
||||
filledSize += commandList->xGroupSizes[1] * commandList->threadGroupDimensions[1].groupCountX * 16;
|
||||
filledSize += commandList->xGroupSizes[2] * commandList->threadGroupDimensions[2].groupCountX;
|
||||
EXPECT_EQ(sizeof(uint32_t) - offset, commandList->xGroupSizes[0]);
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
EXPECT_EQ(3u, commandList->numberOfCallsToAppendLaunchKernelWithParams);
|
||||
EXPECT_EQ(size - offset, filledSize);
|
||||
delete[] ptr;
|
||||
}
|
||||
|
||||
HWTEST2_F(AppendFillTest,
|
||||
givenAppendMemoryFillWhenPtrWithOffsetAndSmallSizeAndPatternSizeIsOneThenTwoKernelsDispatched, IsAtLeastSkl) {
|
||||
using GfxFamily = typename NEO::GfxFamilyMapper<gfxCoreFamily>::GfxFamily;
|
||||
|
||||
auto commandList = std::make_unique<WhiteBox<MockCommandList<gfxCoreFamily>>>();
|
||||
commandList->initialize(device, NEO::EngineGroupType::Compute, 0u);
|
||||
int pattern = 0;
|
||||
uint32_t offset = 1;
|
||||
const size_t size = 2;
|
||||
uint8_t *ptr = new uint8_t[size];
|
||||
ze_result_t result = commandList->appendMemoryFill(ptr + offset, &pattern, 1, size - offset, nullptr, 0, nullptr);
|
||||
size_t filledSize = commandList->xGroupSizes[0] * commandList->threadGroupDimensions[0].groupCountX * 16;
|
||||
filledSize += commandList->xGroupSizes[1] * commandList->threadGroupDimensions[1].groupCountX;
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
EXPECT_EQ(2u, commandList->numberOfCallsToAppendLaunchKernelWithParams);
|
||||
EXPECT_EQ(size - offset, filledSize);
|
||||
delete[] ptr;
|
||||
}
|
||||
|
||||
HWTEST2_F(AppendFillTest,
|
||||
givenAppendMemoryFillWhenPtrWithOffsetAndFailAppendUnalignedFillKernelThenReturnError, IsAtLeastSkl) {
|
||||
using GfxFamily = typename NEO::GfxFamilyMapper<gfxCoreFamily>::GfxFamily;
|
||||
|
||||
auto commandList = std::make_unique<WhiteBox<MockCommandList<gfxCoreFamily>>>();
|
||||
commandList->thresholdOfCallsToAppendLaunchKernelWithParamsToFail = 0;
|
||||
commandList->initialize(device, NEO::EngineGroupType::Compute, 0u);
|
||||
int pattern = 0;
|
||||
uint32_t offset = 1;
|
||||
const size_t size = 1024;
|
||||
uint8_t *ptr = new uint8_t[size];
|
||||
ze_result_t result = commandList->appendMemoryFill(ptr + offset, &pattern, 1, size - offset, nullptr, 0, nullptr);
|
||||
EXPECT_NE(ZE_RESULT_SUCCESS, result);
|
||||
delete[] ptr;
|
||||
}
|
||||
|
||||
HWTEST2_F(AppendFillTest,
|
||||
givenCallToAppendMemoryFillWithSizeNotMultipleOfPatternSizeThenSuccessIsReturned, IsAtLeastSkl) {
|
||||
using GfxFamily = typename NEO::GfxFamilyMapper<gfxCoreFamily>::GfxFamily;
|
||||
|
@ -38,7 +38,7 @@ components:
|
||||
dest_dir: kernels_bin
|
||||
type: git
|
||||
branch: kernels_bin
|
||||
revision: 1941-284
|
||||
revision: 1941-290
|
||||
kmdaf:
|
||||
branch: kmdaf
|
||||
dest_dir: kmdaf
|
||||
|
@ -48,15 +48,16 @@ __kernel void FillBufferRightLeftover(
|
||||
}
|
||||
|
||||
__kernel void FillBufferImmediate(
|
||||
__global uint4* ptr,
|
||||
__global uchar* ptr,
|
||||
ulong dstSshOffset, // Offset needed in case ptr has been adjusted for SSH alignment
|
||||
const uint value)
|
||||
{
|
||||
uint gid = get_global_id(0);
|
||||
(ptr + dstSshOffset)[gid] = value;
|
||||
__global uint4* dstPtr = (__global uint4*)(ptr + dstSshOffset);
|
||||
dstPtr[gid] = value;
|
||||
}
|
||||
|
||||
__kernel void FillBufferImmediateRightLeftOver(
|
||||
__kernel void FillBufferImmediateLeftOver(
|
||||
__global uchar* ptr,
|
||||
ulong dstSshOffset, // Offset needed in case ptr has been adjusted for SSH alignment
|
||||
const uint value)
|
||||
|
@ -48,15 +48,16 @@ __kernel void FillBufferRightLeftover(
|
||||
}
|
||||
|
||||
__kernel void FillBufferImmediate(
|
||||
__global uint4* ptr,
|
||||
__global uchar* ptr,
|
||||
ulong dstSshOffset, // Offset needed in case ptr has been adjusted for SSH alignment
|
||||
const uint value)
|
||||
{
|
||||
size_t gid = get_global_id(0);
|
||||
(ptr + dstSshOffset)[gid] = value;
|
||||
__global uint4* dstPtr = (__global uint4*)(ptr + dstSshOffset);
|
||||
dstPtr[gid] = value;
|
||||
}
|
||||
|
||||
__kernel void FillBufferImmediateRightLeftOver(
|
||||
__kernel void FillBufferImmediateLeftOver(
|
||||
__global uchar* ptr,
|
||||
ulong dstSshOffset, // Offset needed in case ptr has been adjusted for SSH alignment
|
||||
const uint value)
|
||||
|
@ -7,5 +7,5 @@
|
||||
|
||||
#include "shared/test/common/helpers/kernel_binary_helper.h"
|
||||
|
||||
const std::string KernelBinaryHelper::BUILT_INS("11256751929276190336");
|
||||
const std::string KernelBinaryHelper::BUILT_INS_WITH_IMAGES("13967618058110882853_images");
|
||||
const std::string KernelBinaryHelper::BUILT_INS("11060315779005710265");
|
||||
const std::string KernelBinaryHelper::BUILT_INS_WITH_IMAGES("7912693229131574095_images");
|
||||
|
@ -156,15 +156,16 @@ __kernel void FillBufferRightLeftover(
|
||||
}
|
||||
|
||||
__kernel void FillBufferImmediate(
|
||||
__global uint4* ptr,
|
||||
__global uchar* ptr,
|
||||
ulong dstSshOffset, // Offset needed in case ptr has been adjusted for SSH alignment
|
||||
const uint value)
|
||||
{
|
||||
uint gid = get_global_id(0);
|
||||
(ptr + dstSshOffset)[gid] = value;
|
||||
__global uint4* dstPtr = (__global uint4*)(ptr + dstSshOffset);
|
||||
dstPtr[gid] = value;
|
||||
}
|
||||
|
||||
__kernel void FillBufferImmediateRightLeftOver(
|
||||
__kernel void FillBufferImmediateLeftOver(
|
||||
__global uchar* ptr,
|
||||
ulong dstSshOffset, // Offset needed in case ptr has been adjusted for SSH alignment
|
||||
const uint value)
|
@ -156,15 +156,16 @@ __kernel void FillBufferRightLeftover(
|
||||
}
|
||||
|
||||
__kernel void FillBufferImmediate(
|
||||
__global uint4* ptr,
|
||||
__global uchar* ptr,
|
||||
ulong dstSshOffset, // Offset needed in case ptr has been adjusted for SSH alignment
|
||||
const uint value)
|
||||
{
|
||||
uint gid = get_global_id(0);
|
||||
(ptr + dstSshOffset)[gid] = value;
|
||||
__global uint4* dstPtr = (__global uint4*)(ptr + dstSshOffset);
|
||||
dstPtr[gid] = value;
|
||||
}
|
||||
|
||||
__kernel void FillBufferImmediateRightLeftOver(
|
||||
__kernel void FillBufferImmediateLeftOver(
|
||||
__global uchar* ptr,
|
||||
ulong dstSshOffset, // Offset needed in case ptr has been adjusted for SSH alignment
|
||||
const uint value)
|
@ -156,15 +156,16 @@ __kernel void FillBufferRightLeftover(
|
||||
}
|
||||
|
||||
__kernel void FillBufferImmediate(
|
||||
__global uint4* ptr,
|
||||
uint dstSshOffset, // Offset needed in case ptr has been adjusted for SSH alignment
|
||||
__global uchar* ptr,
|
||||
ulong dstSshOffset, // Offset needed in case ptr has been adjusted for SSH alignment
|
||||
const uint value)
|
||||
{
|
||||
uint gid = get_global_id(0);
|
||||
(ptr + dstSshOffset)[gid] = value;
|
||||
__global uint4* dstPtr = (__global uint4*)(ptr + dstSshOffset);
|
||||
dstPtr[gid] = value;
|
||||
}
|
||||
|
||||
__kernel void FillBufferImmediateRightLeftOver(
|
||||
__kernel void FillBufferImmediateLeftOver(
|
||||
__global uchar* ptr,
|
||||
uint dstSshOffset, // Offset needed in case ptr has been adjusted for SSH alignment
|
||||
const uint value)
|
||||
|
@ -156,15 +156,16 @@ __kernel void FillBufferRightLeftover(
|
||||
}
|
||||
|
||||
__kernel void FillBufferImmediate(
|
||||
__global uint4* ptr,
|
||||
__global uchar* ptr,
|
||||
ulong dstSshOffset, // Offset needed in case ptr has been adjusted for SSH alignment
|
||||
const uint value)
|
||||
{
|
||||
uint gid = get_global_id(0);
|
||||
(ptr + dstSshOffset)[gid] = value;
|
||||
__global uint4* dstPtr = (__global uint4*)(ptr + dstSshOffset);
|
||||
dstPtr[gid] = value;
|
||||
}
|
||||
|
||||
__kernel void FillBufferImmediateRightLeftOver(
|
||||
__kernel void FillBufferImmediateLeftOver(
|
||||
__global uchar* ptr,
|
||||
ulong dstSshOffset, // Offset needed in case ptr has been adjusted for SSH alignment
|
||||
const uint value)
|
||||
|
@ -156,15 +156,16 @@ __kernel void FillBufferRightLeftover(
|
||||
}
|
||||
|
||||
__kernel void FillBufferImmediate(
|
||||
__global uint4* ptr,
|
||||
uint dstSshOffset, // Offset needed in case ptr has been adjusted for SSH alignment
|
||||
__global uchar* ptr,
|
||||
ulong dstSshOffset, // Offset needed in case ptr has been adjusted for SSH alignment
|
||||
const uint value)
|
||||
{
|
||||
uint gid = get_global_id(0);
|
||||
(ptr + dstSshOffset)[gid] = value;
|
||||
__global uint4* dstPtr = (__global uint4*)(ptr + dstSshOffset);
|
||||
dstPtr[gid] = value;
|
||||
}
|
||||
|
||||
__kernel void FillBufferImmediateRightLeftOver(
|
||||
__kernel void FillBufferImmediateLeftOver(
|
||||
__global uchar* ptr,
|
||||
uint dstSshOffset, // Offset needed in case ptr has been adjusted for SSH alignment
|
||||
const uint value)
|
||||
|
Reference in New Issue
Block a user