mirror of
https://github.com/intel/compute-runtime.git
synced 2025-09-10 12:53:42 +08:00
Revert "Use uint4 type instead of char in builtin kernel"
This reverts commit a39bc7e7b3
.
Signed-off-by: Compute-Runtime-Validation <compute-runtime-validation@intel.com>
This commit is contained in:

committed by
Compute-Runtime-Automation

parent
6e5c0141b5
commit
84f19a1b93
@ -29,8 +29,6 @@ enum class Builtin : uint32_t {
|
|||||||
CopyBufferToBufferSideStateless,
|
CopyBufferToBufferSideStateless,
|
||||||
FillBufferImmediate,
|
FillBufferImmediate,
|
||||||
FillBufferImmediateStateless,
|
FillBufferImmediateStateless,
|
||||||
FillBufferImmediateRightLeftOver,
|
|
||||||
FillBufferImmediateRightLeftOverStateless,
|
|
||||||
FillBufferSSHOffset,
|
FillBufferSSHOffset,
|
||||||
FillBufferSSHOffsetStateless,
|
FillBufferSSHOffsetStateless,
|
||||||
FillBufferMiddle,
|
FillBufferMiddle,
|
||||||
|
@ -79,14 +79,6 @@ void BuiltinFunctionsLibImpl::initBuiltinKernel(Builtin func) {
|
|||||||
builtinName = "FillBufferImmediate";
|
builtinName = "FillBufferImmediate";
|
||||||
builtin = NEO::EBuiltInOps::FillBufferStateless;
|
builtin = NEO::EBuiltInOps::FillBufferStateless;
|
||||||
break;
|
break;
|
||||||
case Builtin::FillBufferImmediateRightLeftOver:
|
|
||||||
builtinName = "FillBufferImmediateRightLeftOver";
|
|
||||||
builtin = NEO::EBuiltInOps::FillBuffer;
|
|
||||||
break;
|
|
||||||
case Builtin::FillBufferImmediateRightLeftOverStateless:
|
|
||||||
builtinName = "FillBufferImmediateRightLeftOver";
|
|
||||||
builtin = NEO::EBuiltInOps::FillBufferStateless;
|
|
||||||
break;
|
|
||||||
case Builtin::FillBufferSSHOffset:
|
case Builtin::FillBufferSSHOffset:
|
||||||
builtinName = "FillBufferSSHOffset";
|
builtinName = "FillBufferSSHOffset";
|
||||||
builtin = NEO::EBuiltInOps::FillBuffer;
|
builtin = NEO::EBuiltInOps::FillBuffer;
|
||||||
|
@ -1533,51 +1533,37 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryFill(void *ptr,
|
|||||||
} else {
|
} else {
|
||||||
builtinFunction = device->getBuiltinFunctionsLib()->getFunction(Builtin::FillBufferImmediate);
|
builtinFunction = device->getBuiltinFunctionsLib()->getFunction(Builtin::FillBufferImmediate);
|
||||||
}
|
}
|
||||||
const auto dataTypeSize = sizeof(uint32_t) * 4;
|
uint32_t groupSizeX = builtinFunction->getImmutableData()->getDescriptor().kernelAttributes.simdSize;
|
||||||
size_t adjustedSize = std::max(static_cast<size_t>(1u), size / (dataTypeSize));
|
if (groupSizeX > static_cast<uint32_t>(size)) {
|
||||||
size_t groupSizeX = device->getDeviceInfo().maxWorkGroupSize;
|
groupSizeX = static_cast<uint32_t>(size);
|
||||||
if (groupSizeX > adjustedSize) {
|
|
||||||
groupSizeX = adjustedSize;
|
|
||||||
}
|
}
|
||||||
if (builtinFunction->setGroupSize(static_cast<uint32_t>(groupSizeX), 1u, 1u)) {
|
if (builtinFunction->setGroupSize(groupSizeX, 1u, 1u)) {
|
||||||
DEBUG_BREAK_IF(true);
|
DEBUG_BREAK_IF(true);
|
||||||
return ZE_RESULT_ERROR_UNKNOWN;
|
return ZE_RESULT_ERROR_UNKNOWN;
|
||||||
}
|
}
|
||||||
|
|
||||||
size_t groups = adjustedSize / groupSizeX;
|
uint32_t value = *(reinterpret_cast<const unsigned char *>(pattern));
|
||||||
size_t remainingBytes = static_cast<size_t>((adjustedSize % groupSizeX) * dataTypeSize +
|
|
||||||
size % dataTypeSize);
|
|
||||||
ze_group_count_t dispatchFuncArgs{static_cast<uint32_t>(groups), 1u, 1u};
|
|
||||||
|
|
||||||
uint32_t value = 0;
|
|
||||||
memset(&value, *reinterpret_cast<const unsigned char *>(pattern), 4);
|
|
||||||
builtinFunction->setArgBufferWithAlloc(0, dstAllocation.alignedAllocationPtr, dstAllocation.alloc);
|
builtinFunction->setArgBufferWithAlloc(0, dstAllocation.alignedAllocationPtr, dstAllocation.alloc);
|
||||||
builtinFunction->setArgumentValue(1, sizeof(dstAllocation.offset), &dstAllocation.offset);
|
builtinFunction->setArgumentValue(1, sizeof(dstAllocation.offset), &dstAllocation.offset);
|
||||||
builtinFunction->setArgumentValue(2, sizeof(value), &value);
|
builtinFunction->setArgumentValue(2, sizeof(value), &value);
|
||||||
|
|
||||||
appendEventForProfilingAllWalkers(signalEvent, true);
|
appendEventForProfilingAllWalkers(signalEvent, true);
|
||||||
|
|
||||||
|
uint32_t groups = static_cast<uint32_t>(size) / groupSizeX;
|
||||||
|
ze_group_count_t dispatchFuncArgs{groups, 1u, 1u};
|
||||||
res = appendLaunchKernelSplit(builtinFunction, &dispatchFuncArgs, signalEvent, launchParams);
|
res = appendLaunchKernelSplit(builtinFunction, &dispatchFuncArgs, signalEvent, launchParams);
|
||||||
if (res) {
|
if (res) {
|
||||||
return res;
|
return res;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (remainingBytes) {
|
uint32_t groupRemainderSizeX = static_cast<uint32_t>(size) % groupSizeX;
|
||||||
if (isStateless) {
|
if (groupRemainderSizeX) {
|
||||||
builtinFunction = device->getBuiltinFunctionsLib()->getFunction(Builtin::FillBufferImmediateRightLeftOverStateless);
|
builtinFunction->setGroupSize(groupRemainderSizeX, 1u, 1u);
|
||||||
} else {
|
ze_group_count_t dispatchFuncRemainderArgs{1u, 1u, 1u};
|
||||||
builtinFunction = device->getBuiltinFunctionsLib()->getFunction(Builtin::FillBufferImmediateRightLeftOver);
|
|
||||||
}
|
size_t dstOffset = dstAllocation.offset + (size - groupRemainderSizeX);
|
||||||
uint32_t groupSizeY = 1, groupSizeZ = 1;
|
|
||||||
uint32_t groupSizeX = static_cast<uint32_t>(remainingBytes);
|
|
||||||
builtinFunction->suggestGroupSize(groupSizeX, groupSizeY, groupSizeZ, &groupSizeX, &groupSizeY, &groupSizeZ);
|
|
||||||
builtinFunction->setGroupSize(groupSizeX, groupSizeY, groupSizeZ);
|
|
||||||
ze_group_count_t dispatchFuncRemainderArgs{static_cast<uint32_t>(remainingBytes / groupSizeX), 1u, 1u};
|
|
||||||
size_t dstOffset = dstAllocation.offset + (size - remainingBytes);
|
|
||||||
value = *(reinterpret_cast<const unsigned char *>(pattern));
|
|
||||||
builtinFunction->setArgBufferWithAlloc(0, dstAllocation.alignedAllocationPtr, dstAllocation.alloc);
|
builtinFunction->setArgBufferWithAlloc(0, dstAllocation.alignedAllocationPtr, dstAllocation.alloc);
|
||||||
builtinFunction->setArgumentValue(1, sizeof(dstOffset), &dstOffset);
|
builtinFunction->setArgumentValue(1, sizeof(dstOffset), &dstOffset);
|
||||||
builtinFunction->setArgumentValue(2, sizeof(value), &value);
|
|
||||||
|
|
||||||
res = appendLaunchKernelSplit(builtinFunction, &dispatchFuncRemainderArgs, signalEvent, launchParams);
|
res = appendLaunchKernelSplit(builtinFunction, &dispatchFuncRemainderArgs, signalEvent, launchParams);
|
||||||
if (res) {
|
if (res) {
|
||||||
@ -1592,6 +1578,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryFill(void *ptr,
|
|||||||
} else {
|
} else {
|
||||||
builtinFunction = device->getBuiltinFunctionsLib()->getFunction(Builtin::FillBufferMiddle);
|
builtinFunction = device->getBuiltinFunctionsLib()->getFunction(Builtin::FillBufferMiddle);
|
||||||
}
|
}
|
||||||
|
|
||||||
size_t middleElSize = sizeof(uint32_t);
|
size_t middleElSize = sizeof(uint32_t);
|
||||||
size_t adjustedSize = size / middleElSize;
|
size_t adjustedSize = size / middleElSize;
|
||||||
uint32_t groupSizeX = static_cast<uint32_t>(adjustedSize);
|
uint32_t groupSizeX = static_cast<uint32_t>(adjustedSize);
|
||||||
|
@ -53,18 +53,14 @@ class AppendFillFixture : public DeviceFixture {
|
|||||||
if (numberOfCallsToAppendLaunchKernelWithParams == thresholdOfCallsToAppendLaunchKernelWithParamsToFail) {
|
if (numberOfCallsToAppendLaunchKernelWithParams == thresholdOfCallsToAppendLaunchKernelWithParamsToFail) {
|
||||||
return ZE_RESULT_ERROR_UNKNOWN;
|
return ZE_RESULT_ERROR_UNKNOWN;
|
||||||
}
|
}
|
||||||
if (numberOfCallsToAppendLaunchKernelWithParams < 2) {
|
|
||||||
threadGroupDimensions[numberOfCallsToAppendLaunchKernelWithParams] = *pThreadGroupDimensions;
|
|
||||||
xGroupSizes[numberOfCallsToAppendLaunchKernelWithParams] = kernel->getGroupSize()[0];
|
|
||||||
}
|
|
||||||
numberOfCallsToAppendLaunchKernelWithParams++;
|
numberOfCallsToAppendLaunchKernelWithParams++;
|
||||||
return CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelWithParams(kernel,
|
return CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelWithParams(kernel,
|
||||||
pThreadGroupDimensions,
|
pThreadGroupDimensions,
|
||||||
event,
|
event,
|
||||||
launchParams);
|
launchParams);
|
||||||
}
|
}
|
||||||
ze_group_count_t threadGroupDimensions[2];
|
|
||||||
uint32_t xGroupSizes[2];
|
|
||||||
uint32_t thresholdOfCallsToAppendLaunchKernelWithParamsToFail = std::numeric_limits<uint32_t>::max();
|
uint32_t thresholdOfCallsToAppendLaunchKernelWithParamsToFail = std::numeric_limits<uint32_t>::max();
|
||||||
uint32_t numberOfCallsToAppendLaunchKernelWithParams = 0;
|
uint32_t numberOfCallsToAppendLaunchKernelWithParams = 0;
|
||||||
};
|
};
|
||||||
@ -81,7 +77,6 @@ class AppendFillFixture : public DeviceFixture {
|
|||||||
driverHandle = std::make_unique<Mock<MockDriverFillHandle>>();
|
driverHandle = std::make_unique<Mock<MockDriverFillHandle>>();
|
||||||
driverHandle->initialize(std::move(devices));
|
driverHandle->initialize(std::move(devices));
|
||||||
device = driverHandle->devices[0];
|
device = driverHandle->devices[0];
|
||||||
neoDevice->deviceInfo.maxWorkGroupSize = 256;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void tearDown() {
|
void tearDown() {
|
||||||
@ -194,75 +189,6 @@ HWTEST2_F(AppendFillTest,
|
|||||||
EXPECT_EQ(patternAllocationsVectorSize + 1u, newPatternAllocationsVectorSize);
|
EXPECT_EQ(patternAllocationsVectorSize + 1u, newPatternAllocationsVectorSize);
|
||||||
}
|
}
|
||||||
|
|
||||||
HWTEST2_F(AppendFillTest,
|
|
||||||
givenAppendMemoryFillWhenPatternSizeIsOneThenDispatchOneKernel, IsAtLeastSkl) {
|
|
||||||
using GfxFamily = typename NEO::GfxFamilyMapper<gfxCoreFamily>::GfxFamily;
|
|
||||||
|
|
||||||
auto commandList = std::make_unique<WhiteBox<MockCommandList<gfxCoreFamily>>>();
|
|
||||||
commandList->initialize(device, NEO::EngineGroupType::Compute, 0u);
|
|
||||||
int pattern = 0;
|
|
||||||
const size_t size = 1024 * 1024;
|
|
||||||
uint8_t *ptr = new uint8_t[size];
|
|
||||||
ze_result_t result = commandList->appendMemoryFill(ptr, &pattern, 1, size, nullptr, 0, nullptr);
|
|
||||||
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
|
|
||||||
EXPECT_EQ(1u, commandList->numberOfCallsToAppendLaunchKernelWithParams);
|
|
||||||
EXPECT_EQ(size, commandList->xGroupSizes[0] * commandList->threadGroupDimensions[0].groupCountX * 16);
|
|
||||||
delete[] ptr;
|
|
||||||
}
|
|
||||||
|
|
||||||
HWTEST2_F(AppendFillTest,
|
|
||||||
givenAppendMemoryFillWithUnalignedSizeWhenPatternSizeIsOneThenDispatchTwoKernels, IsAtLeastSkl) {
|
|
||||||
using GfxFamily = typename NEO::GfxFamilyMapper<gfxCoreFamily>::GfxFamily;
|
|
||||||
|
|
||||||
auto commandList = std::make_unique<WhiteBox<MockCommandList<gfxCoreFamily>>>();
|
|
||||||
commandList->initialize(device, NEO::EngineGroupType::Compute, 0u);
|
|
||||||
int pattern = 0;
|
|
||||||
const size_t size = 1025;
|
|
||||||
uint8_t *ptr = new uint8_t[size];
|
|
||||||
ze_result_t result = commandList->appendMemoryFill(ptr, &pattern, 1, size, nullptr, 0, nullptr);
|
|
||||||
size_t filledSize = commandList->xGroupSizes[0] * commandList->threadGroupDimensions[0].groupCountX * 16;
|
|
||||||
filledSize += commandList->xGroupSizes[1] * commandList->threadGroupDimensions[1].groupCountX;
|
|
||||||
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
|
|
||||||
EXPECT_EQ(2u, commandList->numberOfCallsToAppendLaunchKernelWithParams);
|
|
||||||
EXPECT_EQ(size, filledSize);
|
|
||||||
delete[] ptr;
|
|
||||||
}
|
|
||||||
|
|
||||||
HWTEST2_F(AppendFillTest,
|
|
||||||
givenAppendMemoryFillWithSizeBelowMaxWorkgroupSizeWhenPatternSizeIsOneThenDispatchOneKernel, IsAtLeastSkl) {
|
|
||||||
using GfxFamily = typename NEO::GfxFamilyMapper<gfxCoreFamily>::GfxFamily;
|
|
||||||
|
|
||||||
auto commandList = std::make_unique<WhiteBox<MockCommandList<gfxCoreFamily>>>();
|
|
||||||
commandList->initialize(device, NEO::EngineGroupType::Compute, 0u);
|
|
||||||
int pattern = 0;
|
|
||||||
const size_t size = neoDevice->getDeviceInfo().maxWorkGroupSize / 2;
|
|
||||||
uint8_t *ptr = new uint8_t[size];
|
|
||||||
ze_result_t result = commandList->appendMemoryFill(ptr, &pattern, 1, size, nullptr, 0, nullptr);
|
|
||||||
size_t filledSize = commandList->xGroupSizes[0] * commandList->threadGroupDimensions[0].groupCountX * 16;
|
|
||||||
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
|
|
||||||
EXPECT_EQ(1u, commandList->numberOfCallsToAppendLaunchKernelWithParams);
|
|
||||||
EXPECT_EQ(size, filledSize);
|
|
||||||
delete[] ptr;
|
|
||||||
}
|
|
||||||
|
|
||||||
HWTEST2_F(AppendFillTest,
|
|
||||||
givenAppendMemoryFillWhenPatternSizeIsOneThenGroupCountIsCorrect, IsAtLeastSkl) {
|
|
||||||
using GfxFamily = typename NEO::GfxFamilyMapper<gfxCoreFamily>::GfxFamily;
|
|
||||||
|
|
||||||
auto commandList = std::make_unique<WhiteBox<MockCommandList<gfxCoreFamily>>>();
|
|
||||||
commandList->initialize(device, NEO::EngineGroupType::Compute, 0u);
|
|
||||||
int pattern = 0;
|
|
||||||
const size_t size = 1024 * 1024;
|
|
||||||
uint8_t *ptr = new uint8_t[size];
|
|
||||||
ze_result_t result = commandList->appendMemoryFill(ptr, &pattern, 1, size, nullptr, 0, nullptr);
|
|
||||||
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
|
|
||||||
auto groupSize = device->getDeviceInfo().maxWorkGroupSize;
|
|
||||||
auto dataTypeSize = sizeof(uint32_t) * 4;
|
|
||||||
auto expectedGroupCount = size / (dataTypeSize * groupSize);
|
|
||||||
EXPECT_EQ(expectedGroupCount, commandList->threadGroupDimensions[0].groupCountX);
|
|
||||||
delete[] ptr;
|
|
||||||
}
|
|
||||||
|
|
||||||
HWTEST2_F(AppendFillTest,
|
HWTEST2_F(AppendFillTest,
|
||||||
givenCallToAppendMemoryFillWithSizeNotMultipleOfPatternSizeThenSuccessIsReturned, IsAtLeastSkl) {
|
givenCallToAppendMemoryFillWithSizeNotMultipleOfPatternSizeThenSuccessIsReturned, IsAtLeastSkl) {
|
||||||
using GfxFamily = typename NEO::GfxFamilyMapper<gfxCoreFamily>::GfxFamily;
|
using GfxFamily = typename NEO::GfxFamilyMapper<gfxCoreFamily>::GfxFamily;
|
||||||
|
@ -38,7 +38,7 @@ components:
|
|||||||
dest_dir: kernels_bin
|
dest_dir: kernels_bin
|
||||||
type: git
|
type: git
|
||||||
branch: kernels_bin
|
branch: kernels_bin
|
||||||
revision: 1941-284
|
revision: 1941-282
|
||||||
kmdaf:
|
kmdaf:
|
||||||
branch: kmdaf
|
branch: kmdaf
|
||||||
dest_dir: kmdaf
|
dest_dir: kmdaf
|
||||||
|
@ -48,21 +48,13 @@ __kernel void FillBufferRightLeftover(
|
|||||||
}
|
}
|
||||||
|
|
||||||
__kernel void FillBufferImmediate(
|
__kernel void FillBufferImmediate(
|
||||||
__global uint4* ptr,
|
|
||||||
ulong dstSshOffset, // Offset needed in case ptr has been adjusted for SSH alignment
|
|
||||||
const uint value)
|
|
||||||
{
|
|
||||||
uint gid = get_global_id(0);
|
|
||||||
(ptr + dstSshOffset)[gid] = value;
|
|
||||||
}
|
|
||||||
|
|
||||||
__kernel void FillBufferImmediateRightLeftOver(
|
|
||||||
__global uchar* ptr,
|
__global uchar* ptr,
|
||||||
ulong dstSshOffset, // Offset needed in case ptr has been adjusted for SSH alignment
|
uint dstSshOffset, // Offset needed in case ptr has been adjusted for SSH alignment
|
||||||
const uint value)
|
const uint value)
|
||||||
{
|
{
|
||||||
uint gid = get_global_id(0);
|
uint dstIndex = get_global_id(0);
|
||||||
(ptr + dstSshOffset)[gid] = value;
|
__global uchar* pDst = (__global uchar*)ptr + dstSshOffset;
|
||||||
|
pDst[dstIndex] = value;
|
||||||
}
|
}
|
||||||
|
|
||||||
__kernel void FillBufferSSHOffset(
|
__kernel void FillBufferSSHOffset(
|
||||||
|
@ -48,21 +48,13 @@ __kernel void FillBufferRightLeftover(
|
|||||||
}
|
}
|
||||||
|
|
||||||
__kernel void FillBufferImmediate(
|
__kernel void FillBufferImmediate(
|
||||||
__global uint4* ptr,
|
|
||||||
ulong dstSshOffset, // Offset needed in case ptr has been adjusted for SSH alignment
|
|
||||||
const uint value)
|
|
||||||
{
|
|
||||||
size_t gid = get_global_id(0);
|
|
||||||
(ptr + dstSshOffset)[gid] = value;
|
|
||||||
}
|
|
||||||
|
|
||||||
__kernel void FillBufferImmediateRightLeftOver(
|
|
||||||
__global uchar* ptr,
|
__global uchar* ptr,
|
||||||
ulong dstSshOffset, // Offset needed in case ptr has been adjusted for SSH alignment
|
ulong dstSshOffset, // Offset needed in case ptr has been adjusted for SSH alignment
|
||||||
const uint value)
|
const uint value)
|
||||||
{
|
{
|
||||||
size_t gid = get_global_id(0);
|
size_t dstIndex = get_global_id(0);
|
||||||
(ptr + dstSshOffset)[gid] = value;
|
__global uchar* pDst = (__global uchar*)ptr + dstSshOffset;
|
||||||
|
pDst[dstIndex] = value;
|
||||||
}
|
}
|
||||||
|
|
||||||
__kernel void FillBufferSSHOffset(
|
__kernel void FillBufferSSHOffset(
|
||||||
|
@ -7,5 +7,5 @@
|
|||||||
|
|
||||||
#include "shared/test/common/helpers/kernel_binary_helper.h"
|
#include "shared/test/common/helpers/kernel_binary_helper.h"
|
||||||
|
|
||||||
const std::string KernelBinaryHelper::BUILT_INS("11256751929276190336");
|
const std::string KernelBinaryHelper::BUILT_INS("7998916142903730155");
|
||||||
const std::string KernelBinaryHelper::BUILT_INS_WITH_IMAGES("13967618058110882853_images");
|
const std::string KernelBinaryHelper::BUILT_INS_WITH_IMAGES("16526264370178379440_images");
|
||||||
|
@ -156,21 +156,13 @@ __kernel void FillBufferRightLeftover(
|
|||||||
}
|
}
|
||||||
|
|
||||||
__kernel void FillBufferImmediate(
|
__kernel void FillBufferImmediate(
|
||||||
__global uint4* ptr,
|
|
||||||
ulong dstSshOffset, // Offset needed in case ptr has been adjusted for SSH alignment
|
|
||||||
const uint value)
|
|
||||||
{
|
|
||||||
uint gid = get_global_id(0);
|
|
||||||
(ptr + dstSshOffset)[gid] = value;
|
|
||||||
}
|
|
||||||
|
|
||||||
__kernel void FillBufferImmediateRightLeftOver(
|
|
||||||
__global uchar* ptr,
|
__global uchar* ptr,
|
||||||
ulong dstSshOffset, // Offset needed in case ptr has been adjusted for SSH alignment
|
uint dstSshOffset, // Offset needed in case ptr has been adjusted for SSH alignment
|
||||||
const uint value)
|
const uint value)
|
||||||
{
|
{
|
||||||
uint gid = get_global_id(0);
|
uint dstIndex = get_global_id(0);
|
||||||
(ptr + dstSshOffset)[gid] = value;
|
__global uchar* pDst = (__global uchar*)ptr + dstSshOffset;
|
||||||
|
pDst[dstIndex] = value;
|
||||||
}
|
}
|
||||||
|
|
||||||
__kernel void FillBufferSSHOffset(
|
__kernel void FillBufferSSHOffset(
|
@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright (C) 2022 Intel Corporation
|
* Copyright (C) 2020-2022 Intel Corporation
|
||||||
*
|
*
|
||||||
* SPDX-License-Identifier: MIT
|
* SPDX-License-Identifier: MIT
|
||||||
*
|
*
|
||||||
@ -156,21 +156,13 @@ __kernel void FillBufferRightLeftover(
|
|||||||
}
|
}
|
||||||
|
|
||||||
__kernel void FillBufferImmediate(
|
__kernel void FillBufferImmediate(
|
||||||
__global uint4* ptr,
|
|
||||||
ulong dstSshOffset, // Offset needed in case ptr has been adjusted for SSH alignment
|
|
||||||
const uint value)
|
|
||||||
{
|
|
||||||
uint gid = get_global_id(0);
|
|
||||||
(ptr + dstSshOffset)[gid] = value;
|
|
||||||
}
|
|
||||||
|
|
||||||
__kernel void FillBufferImmediateRightLeftOver(
|
|
||||||
__global uchar* ptr,
|
__global uchar* ptr,
|
||||||
ulong dstSshOffset, // Offset needed in case ptr has been adjusted for SSH alignment
|
uint dstSshOffset, // Offset needed in case ptr has been adjusted for SSH alignment
|
||||||
const uint value)
|
const uint value)
|
||||||
{
|
{
|
||||||
uint gid = get_global_id(0);
|
uint dstIndex = get_global_id(0);
|
||||||
(ptr + dstSshOffset)[gid] = value;
|
__global uchar* pDst = (__global uchar*)ptr + dstSshOffset;
|
||||||
|
pDst[dstIndex] = value;
|
||||||
}
|
}
|
||||||
|
|
||||||
__kernel void FillBufferSSHOffset(
|
__kernel void FillBufferSSHOffset(
|
@ -156,21 +156,13 @@ __kernel void FillBufferRightLeftover(
|
|||||||
}
|
}
|
||||||
|
|
||||||
__kernel void FillBufferImmediate(
|
__kernel void FillBufferImmediate(
|
||||||
__global uint4* ptr,
|
|
||||||
uint dstSshOffset, // Offset needed in case ptr has been adjusted for SSH alignment
|
|
||||||
const uint value)
|
|
||||||
{
|
|
||||||
uint gid = get_global_id(0);
|
|
||||||
(ptr + dstSshOffset)[gid] = value;
|
|
||||||
}
|
|
||||||
|
|
||||||
__kernel void FillBufferImmediateRightLeftOver(
|
|
||||||
__global uchar* ptr,
|
__global uchar* ptr,
|
||||||
uint dstSshOffset, // Offset needed in case ptr has been adjusted for SSH alignment
|
uint dstSshOffset, // Offset needed in case ptr has been adjusted for SSH alignment
|
||||||
const uint value)
|
const uint value)
|
||||||
{
|
{
|
||||||
uint gid = get_global_id(0);
|
uint dstIndex = get_global_id(0);
|
||||||
(ptr + dstSshOffset)[gid] = value;
|
__global uchar* pDst = (__global uchar*)ptr + dstSshOffset;
|
||||||
|
pDst[dstIndex] = value;
|
||||||
}
|
}
|
||||||
|
|
||||||
__kernel void FillBufferSSHOffset(
|
__kernel void FillBufferSSHOffset(
|
||||||
|
@ -156,21 +156,13 @@ __kernel void FillBufferRightLeftover(
|
|||||||
}
|
}
|
||||||
|
|
||||||
__kernel void FillBufferImmediate(
|
__kernel void FillBufferImmediate(
|
||||||
__global uint4* ptr,
|
|
||||||
ulong dstSshOffset, // Offset needed in case ptr has been adjusted for SSH alignment
|
|
||||||
const uint value)
|
|
||||||
{
|
|
||||||
uint gid = get_global_id(0);
|
|
||||||
(ptr + dstSshOffset)[gid] = value;
|
|
||||||
}
|
|
||||||
|
|
||||||
__kernel void FillBufferImmediateRightLeftOver(
|
|
||||||
__global uchar* ptr,
|
__global uchar* ptr,
|
||||||
ulong dstSshOffset, // Offset needed in case ptr has been adjusted for SSH alignment
|
ulong dstSshOffset, // Offset needed in case ptr has been adjusted for SSH alignment
|
||||||
const uint value)
|
const uint value)
|
||||||
{
|
{
|
||||||
uint gid = get_global_id(0);
|
size_t dstIndex = get_global_id(0);
|
||||||
(ptr + dstSshOffset)[gid] = value;
|
__global uchar* pDst = (__global uchar*)ptr + dstSshOffset;
|
||||||
|
pDst[dstIndex] = value;
|
||||||
}
|
}
|
||||||
|
|
||||||
__kernel void FillBufferSSHOffset(
|
__kernel void FillBufferSSHOffset(
|
||||||
|
@ -156,21 +156,13 @@ __kernel void FillBufferRightLeftover(
|
|||||||
}
|
}
|
||||||
|
|
||||||
__kernel void FillBufferImmediate(
|
__kernel void FillBufferImmediate(
|
||||||
__global uint4* ptr,
|
|
||||||
uint dstSshOffset, // Offset needed in case ptr has been adjusted for SSH alignment
|
|
||||||
const uint value)
|
|
||||||
{
|
|
||||||
uint gid = get_global_id(0);
|
|
||||||
(ptr + dstSshOffset)[gid] = value;
|
|
||||||
}
|
|
||||||
|
|
||||||
__kernel void FillBufferImmediateRightLeftOver(
|
|
||||||
__global uchar* ptr,
|
__global uchar* ptr,
|
||||||
uint dstSshOffset, // Offset needed in case ptr has been adjusted for SSH alignment
|
uint dstSshOffset, // Offset needed in case ptr has been adjusted for SSH alignment
|
||||||
const uint value)
|
const uint value)
|
||||||
{
|
{
|
||||||
uint gid = get_global_id(0);
|
uint dstIndex = get_global_id(0);
|
||||||
(ptr + dstSshOffset)[gid] = value;
|
__global uchar* pDst = (__global uchar*)ptr + dstSshOffset;
|
||||||
|
pDst[dstIndex] = value;
|
||||||
}
|
}
|
||||||
|
|
||||||
__kernel void FillBufferSSHOffset(
|
__kernel void FillBufferSSHOffset(
|
||||||
|
Reference in New Issue
Block a user