mirror of
https://github.com/intel/compute-runtime.git
synced 2025-09-10 12:53:42 +08:00
Revert "Use uint4 type instead of char in builtin kernel"
This reverts commit a39bc7e7b3
.
Signed-off-by: Compute-Runtime-Validation <compute-runtime-validation@intel.com>
This commit is contained in:

committed by
Compute-Runtime-Automation

parent
6e5c0141b5
commit
84f19a1b93
@ -29,8 +29,6 @@ enum class Builtin : uint32_t {
|
||||
CopyBufferToBufferSideStateless,
|
||||
FillBufferImmediate,
|
||||
FillBufferImmediateStateless,
|
||||
FillBufferImmediateRightLeftOver,
|
||||
FillBufferImmediateRightLeftOverStateless,
|
||||
FillBufferSSHOffset,
|
||||
FillBufferSSHOffsetStateless,
|
||||
FillBufferMiddle,
|
||||
|
@ -79,14 +79,6 @@ void BuiltinFunctionsLibImpl::initBuiltinKernel(Builtin func) {
|
||||
builtinName = "FillBufferImmediate";
|
||||
builtin = NEO::EBuiltInOps::FillBufferStateless;
|
||||
break;
|
||||
case Builtin::FillBufferImmediateRightLeftOver:
|
||||
builtinName = "FillBufferImmediateRightLeftOver";
|
||||
builtin = NEO::EBuiltInOps::FillBuffer;
|
||||
break;
|
||||
case Builtin::FillBufferImmediateRightLeftOverStateless:
|
||||
builtinName = "FillBufferImmediateRightLeftOver";
|
||||
builtin = NEO::EBuiltInOps::FillBufferStateless;
|
||||
break;
|
||||
case Builtin::FillBufferSSHOffset:
|
||||
builtinName = "FillBufferSSHOffset";
|
||||
builtin = NEO::EBuiltInOps::FillBuffer;
|
||||
|
@ -1533,51 +1533,37 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryFill(void *ptr,
|
||||
} else {
|
||||
builtinFunction = device->getBuiltinFunctionsLib()->getFunction(Builtin::FillBufferImmediate);
|
||||
}
|
||||
const auto dataTypeSize = sizeof(uint32_t) * 4;
|
||||
size_t adjustedSize = std::max(static_cast<size_t>(1u), size / (dataTypeSize));
|
||||
size_t groupSizeX = device->getDeviceInfo().maxWorkGroupSize;
|
||||
if (groupSizeX > adjustedSize) {
|
||||
groupSizeX = adjustedSize;
|
||||
uint32_t groupSizeX = builtinFunction->getImmutableData()->getDescriptor().kernelAttributes.simdSize;
|
||||
if (groupSizeX > static_cast<uint32_t>(size)) {
|
||||
groupSizeX = static_cast<uint32_t>(size);
|
||||
}
|
||||
if (builtinFunction->setGroupSize(static_cast<uint32_t>(groupSizeX), 1u, 1u)) {
|
||||
if (builtinFunction->setGroupSize(groupSizeX, 1u, 1u)) {
|
||||
DEBUG_BREAK_IF(true);
|
||||
return ZE_RESULT_ERROR_UNKNOWN;
|
||||
}
|
||||
|
||||
size_t groups = adjustedSize / groupSizeX;
|
||||
size_t remainingBytes = static_cast<size_t>((adjustedSize % groupSizeX) * dataTypeSize +
|
||||
size % dataTypeSize);
|
||||
ze_group_count_t dispatchFuncArgs{static_cast<uint32_t>(groups), 1u, 1u};
|
||||
|
||||
uint32_t value = 0;
|
||||
memset(&value, *reinterpret_cast<const unsigned char *>(pattern), 4);
|
||||
uint32_t value = *(reinterpret_cast<const unsigned char *>(pattern));
|
||||
builtinFunction->setArgBufferWithAlloc(0, dstAllocation.alignedAllocationPtr, dstAllocation.alloc);
|
||||
builtinFunction->setArgumentValue(1, sizeof(dstAllocation.offset), &dstAllocation.offset);
|
||||
builtinFunction->setArgumentValue(2, sizeof(value), &value);
|
||||
|
||||
appendEventForProfilingAllWalkers(signalEvent, true);
|
||||
|
||||
uint32_t groups = static_cast<uint32_t>(size) / groupSizeX;
|
||||
ze_group_count_t dispatchFuncArgs{groups, 1u, 1u};
|
||||
res = appendLaunchKernelSplit(builtinFunction, &dispatchFuncArgs, signalEvent, launchParams);
|
||||
if (res) {
|
||||
return res;
|
||||
}
|
||||
|
||||
if (remainingBytes) {
|
||||
if (isStateless) {
|
||||
builtinFunction = device->getBuiltinFunctionsLib()->getFunction(Builtin::FillBufferImmediateRightLeftOverStateless);
|
||||
} else {
|
||||
builtinFunction = device->getBuiltinFunctionsLib()->getFunction(Builtin::FillBufferImmediateRightLeftOver);
|
||||
}
|
||||
uint32_t groupSizeY = 1, groupSizeZ = 1;
|
||||
uint32_t groupSizeX = static_cast<uint32_t>(remainingBytes);
|
||||
builtinFunction->suggestGroupSize(groupSizeX, groupSizeY, groupSizeZ, &groupSizeX, &groupSizeY, &groupSizeZ);
|
||||
builtinFunction->setGroupSize(groupSizeX, groupSizeY, groupSizeZ);
|
||||
ze_group_count_t dispatchFuncRemainderArgs{static_cast<uint32_t>(remainingBytes / groupSizeX), 1u, 1u};
|
||||
size_t dstOffset = dstAllocation.offset + (size - remainingBytes);
|
||||
value = *(reinterpret_cast<const unsigned char *>(pattern));
|
||||
uint32_t groupRemainderSizeX = static_cast<uint32_t>(size) % groupSizeX;
|
||||
if (groupRemainderSizeX) {
|
||||
builtinFunction->setGroupSize(groupRemainderSizeX, 1u, 1u);
|
||||
ze_group_count_t dispatchFuncRemainderArgs{1u, 1u, 1u};
|
||||
|
||||
size_t dstOffset = dstAllocation.offset + (size - groupRemainderSizeX);
|
||||
builtinFunction->setArgBufferWithAlloc(0, dstAllocation.alignedAllocationPtr, dstAllocation.alloc);
|
||||
builtinFunction->setArgumentValue(1, sizeof(dstOffset), &dstOffset);
|
||||
builtinFunction->setArgumentValue(2, sizeof(value), &value);
|
||||
|
||||
res = appendLaunchKernelSplit(builtinFunction, &dispatchFuncRemainderArgs, signalEvent, launchParams);
|
||||
if (res) {
|
||||
@ -1592,6 +1578,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryFill(void *ptr,
|
||||
} else {
|
||||
builtinFunction = device->getBuiltinFunctionsLib()->getFunction(Builtin::FillBufferMiddle);
|
||||
}
|
||||
|
||||
size_t middleElSize = sizeof(uint32_t);
|
||||
size_t adjustedSize = size / middleElSize;
|
||||
uint32_t groupSizeX = static_cast<uint32_t>(adjustedSize);
|
||||
|
@ -53,18 +53,14 @@ class AppendFillFixture : public DeviceFixture {
|
||||
if (numberOfCallsToAppendLaunchKernelWithParams == thresholdOfCallsToAppendLaunchKernelWithParamsToFail) {
|
||||
return ZE_RESULT_ERROR_UNKNOWN;
|
||||
}
|
||||
if (numberOfCallsToAppendLaunchKernelWithParams < 2) {
|
||||
threadGroupDimensions[numberOfCallsToAppendLaunchKernelWithParams] = *pThreadGroupDimensions;
|
||||
xGroupSizes[numberOfCallsToAppendLaunchKernelWithParams] = kernel->getGroupSize()[0];
|
||||
}
|
||||
|
||||
numberOfCallsToAppendLaunchKernelWithParams++;
|
||||
return CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelWithParams(kernel,
|
||||
pThreadGroupDimensions,
|
||||
event,
|
||||
launchParams);
|
||||
}
|
||||
ze_group_count_t threadGroupDimensions[2];
|
||||
uint32_t xGroupSizes[2];
|
||||
|
||||
uint32_t thresholdOfCallsToAppendLaunchKernelWithParamsToFail = std::numeric_limits<uint32_t>::max();
|
||||
uint32_t numberOfCallsToAppendLaunchKernelWithParams = 0;
|
||||
};
|
||||
@ -81,7 +77,6 @@ class AppendFillFixture : public DeviceFixture {
|
||||
driverHandle = std::make_unique<Mock<MockDriverFillHandle>>();
|
||||
driverHandle->initialize(std::move(devices));
|
||||
device = driverHandle->devices[0];
|
||||
neoDevice->deviceInfo.maxWorkGroupSize = 256;
|
||||
}
|
||||
|
||||
void tearDown() {
|
||||
@ -194,75 +189,6 @@ HWTEST2_F(AppendFillTest,
|
||||
EXPECT_EQ(patternAllocationsVectorSize + 1u, newPatternAllocationsVectorSize);
|
||||
}
|
||||
|
||||
HWTEST2_F(AppendFillTest,
|
||||
givenAppendMemoryFillWhenPatternSizeIsOneThenDispatchOneKernel, IsAtLeastSkl) {
|
||||
using GfxFamily = typename NEO::GfxFamilyMapper<gfxCoreFamily>::GfxFamily;
|
||||
|
||||
auto commandList = std::make_unique<WhiteBox<MockCommandList<gfxCoreFamily>>>();
|
||||
commandList->initialize(device, NEO::EngineGroupType::Compute, 0u);
|
||||
int pattern = 0;
|
||||
const size_t size = 1024 * 1024;
|
||||
uint8_t *ptr = new uint8_t[size];
|
||||
ze_result_t result = commandList->appendMemoryFill(ptr, &pattern, 1, size, nullptr, 0, nullptr);
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
EXPECT_EQ(1u, commandList->numberOfCallsToAppendLaunchKernelWithParams);
|
||||
EXPECT_EQ(size, commandList->xGroupSizes[0] * commandList->threadGroupDimensions[0].groupCountX * 16);
|
||||
delete[] ptr;
|
||||
}
|
||||
|
||||
HWTEST2_F(AppendFillTest,
|
||||
givenAppendMemoryFillWithUnalignedSizeWhenPatternSizeIsOneThenDispatchTwoKernels, IsAtLeastSkl) {
|
||||
using GfxFamily = typename NEO::GfxFamilyMapper<gfxCoreFamily>::GfxFamily;
|
||||
|
||||
auto commandList = std::make_unique<WhiteBox<MockCommandList<gfxCoreFamily>>>();
|
||||
commandList->initialize(device, NEO::EngineGroupType::Compute, 0u);
|
||||
int pattern = 0;
|
||||
const size_t size = 1025;
|
||||
uint8_t *ptr = new uint8_t[size];
|
||||
ze_result_t result = commandList->appendMemoryFill(ptr, &pattern, 1, size, nullptr, 0, nullptr);
|
||||
size_t filledSize = commandList->xGroupSizes[0] * commandList->threadGroupDimensions[0].groupCountX * 16;
|
||||
filledSize += commandList->xGroupSizes[1] * commandList->threadGroupDimensions[1].groupCountX;
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
EXPECT_EQ(2u, commandList->numberOfCallsToAppendLaunchKernelWithParams);
|
||||
EXPECT_EQ(size, filledSize);
|
||||
delete[] ptr;
|
||||
}
|
||||
|
||||
HWTEST2_F(AppendFillTest,
|
||||
givenAppendMemoryFillWithSizeBelowMaxWorkgroupSizeWhenPatternSizeIsOneThenDispatchOneKernel, IsAtLeastSkl) {
|
||||
using GfxFamily = typename NEO::GfxFamilyMapper<gfxCoreFamily>::GfxFamily;
|
||||
|
||||
auto commandList = std::make_unique<WhiteBox<MockCommandList<gfxCoreFamily>>>();
|
||||
commandList->initialize(device, NEO::EngineGroupType::Compute, 0u);
|
||||
int pattern = 0;
|
||||
const size_t size = neoDevice->getDeviceInfo().maxWorkGroupSize / 2;
|
||||
uint8_t *ptr = new uint8_t[size];
|
||||
ze_result_t result = commandList->appendMemoryFill(ptr, &pattern, 1, size, nullptr, 0, nullptr);
|
||||
size_t filledSize = commandList->xGroupSizes[0] * commandList->threadGroupDimensions[0].groupCountX * 16;
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
EXPECT_EQ(1u, commandList->numberOfCallsToAppendLaunchKernelWithParams);
|
||||
EXPECT_EQ(size, filledSize);
|
||||
delete[] ptr;
|
||||
}
|
||||
|
||||
HWTEST2_F(AppendFillTest,
|
||||
givenAppendMemoryFillWhenPatternSizeIsOneThenGroupCountIsCorrect, IsAtLeastSkl) {
|
||||
using GfxFamily = typename NEO::GfxFamilyMapper<gfxCoreFamily>::GfxFamily;
|
||||
|
||||
auto commandList = std::make_unique<WhiteBox<MockCommandList<gfxCoreFamily>>>();
|
||||
commandList->initialize(device, NEO::EngineGroupType::Compute, 0u);
|
||||
int pattern = 0;
|
||||
const size_t size = 1024 * 1024;
|
||||
uint8_t *ptr = new uint8_t[size];
|
||||
ze_result_t result = commandList->appendMemoryFill(ptr, &pattern, 1, size, nullptr, 0, nullptr);
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
auto groupSize = device->getDeviceInfo().maxWorkGroupSize;
|
||||
auto dataTypeSize = sizeof(uint32_t) * 4;
|
||||
auto expectedGroupCount = size / (dataTypeSize * groupSize);
|
||||
EXPECT_EQ(expectedGroupCount, commandList->threadGroupDimensions[0].groupCountX);
|
||||
delete[] ptr;
|
||||
}
|
||||
|
||||
HWTEST2_F(AppendFillTest,
|
||||
givenCallToAppendMemoryFillWithSizeNotMultipleOfPatternSizeThenSuccessIsReturned, IsAtLeastSkl) {
|
||||
using GfxFamily = typename NEO::GfxFamilyMapper<gfxCoreFamily>::GfxFamily;
|
||||
|
@ -38,7 +38,7 @@ components:
|
||||
dest_dir: kernels_bin
|
||||
type: git
|
||||
branch: kernels_bin
|
||||
revision: 1941-284
|
||||
revision: 1941-282
|
||||
kmdaf:
|
||||
branch: kmdaf
|
||||
dest_dir: kmdaf
|
||||
|
@ -48,21 +48,13 @@ __kernel void FillBufferRightLeftover(
|
||||
}
|
||||
|
||||
__kernel void FillBufferImmediate(
|
||||
__global uint4* ptr,
|
||||
ulong dstSshOffset, // Offset needed in case ptr has been adjusted for SSH alignment
|
||||
const uint value)
|
||||
{
|
||||
uint gid = get_global_id(0);
|
||||
(ptr + dstSshOffset)[gid] = value;
|
||||
}
|
||||
|
||||
__kernel void FillBufferImmediateRightLeftOver(
|
||||
__global uchar* ptr,
|
||||
ulong dstSshOffset, // Offset needed in case ptr has been adjusted for SSH alignment
|
||||
uint dstSshOffset, // Offset needed in case ptr has been adjusted for SSH alignment
|
||||
const uint value)
|
||||
{
|
||||
uint gid = get_global_id(0);
|
||||
(ptr + dstSshOffset)[gid] = value;
|
||||
uint dstIndex = get_global_id(0);
|
||||
__global uchar* pDst = (__global uchar*)ptr + dstSshOffset;
|
||||
pDst[dstIndex] = value;
|
||||
}
|
||||
|
||||
__kernel void FillBufferSSHOffset(
|
||||
|
@ -48,21 +48,13 @@ __kernel void FillBufferRightLeftover(
|
||||
}
|
||||
|
||||
__kernel void FillBufferImmediate(
|
||||
__global uint4* ptr,
|
||||
ulong dstSshOffset, // Offset needed in case ptr has been adjusted for SSH alignment
|
||||
const uint value)
|
||||
{
|
||||
size_t gid = get_global_id(0);
|
||||
(ptr + dstSshOffset)[gid] = value;
|
||||
}
|
||||
|
||||
__kernel void FillBufferImmediateRightLeftOver(
|
||||
__global uchar* ptr,
|
||||
ulong dstSshOffset, // Offset needed in case ptr has been adjusted for SSH alignment
|
||||
const uint value)
|
||||
{
|
||||
size_t gid = get_global_id(0);
|
||||
(ptr + dstSshOffset)[gid] = value;
|
||||
size_t dstIndex = get_global_id(0);
|
||||
__global uchar* pDst = (__global uchar*)ptr + dstSshOffset;
|
||||
pDst[dstIndex] = value;
|
||||
}
|
||||
|
||||
__kernel void FillBufferSSHOffset(
|
||||
|
@ -7,5 +7,5 @@
|
||||
|
||||
#include "shared/test/common/helpers/kernel_binary_helper.h"
|
||||
|
||||
const std::string KernelBinaryHelper::BUILT_INS("11256751929276190336");
|
||||
const std::string KernelBinaryHelper::BUILT_INS_WITH_IMAGES("13967618058110882853_images");
|
||||
const std::string KernelBinaryHelper::BUILT_INS("7998916142903730155");
|
||||
const std::string KernelBinaryHelper::BUILT_INS_WITH_IMAGES("16526264370178379440_images");
|
||||
|
@ -156,21 +156,13 @@ __kernel void FillBufferRightLeftover(
|
||||
}
|
||||
|
||||
__kernel void FillBufferImmediate(
|
||||
__global uint4* ptr,
|
||||
ulong dstSshOffset, // Offset needed in case ptr has been adjusted for SSH alignment
|
||||
const uint value)
|
||||
{
|
||||
uint gid = get_global_id(0);
|
||||
(ptr + dstSshOffset)[gid] = value;
|
||||
}
|
||||
|
||||
__kernel void FillBufferImmediateRightLeftOver(
|
||||
__global uchar* ptr,
|
||||
ulong dstSshOffset, // Offset needed in case ptr has been adjusted for SSH alignment
|
||||
uint dstSshOffset, // Offset needed in case ptr has been adjusted for SSH alignment
|
||||
const uint value)
|
||||
{
|
||||
uint gid = get_global_id(0);
|
||||
(ptr + dstSshOffset)[gid] = value;
|
||||
uint dstIndex = get_global_id(0);
|
||||
__global uchar* pDst = (__global uchar*)ptr + dstSshOffset;
|
||||
pDst[dstIndex] = value;
|
||||
}
|
||||
|
||||
__kernel void FillBufferSSHOffset(
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) 2022 Intel Corporation
|
||||
* Copyright (C) 2020-2022 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
@ -156,21 +156,13 @@ __kernel void FillBufferRightLeftover(
|
||||
}
|
||||
|
||||
__kernel void FillBufferImmediate(
|
||||
__global uint4* ptr,
|
||||
ulong dstSshOffset, // Offset needed in case ptr has been adjusted for SSH alignment
|
||||
const uint value)
|
||||
{
|
||||
uint gid = get_global_id(0);
|
||||
(ptr + dstSshOffset)[gid] = value;
|
||||
}
|
||||
|
||||
__kernel void FillBufferImmediateRightLeftOver(
|
||||
__global uchar* ptr,
|
||||
ulong dstSshOffset, // Offset needed in case ptr has been adjusted for SSH alignment
|
||||
uint dstSshOffset, // Offset needed in case ptr has been adjusted for SSH alignment
|
||||
const uint value)
|
||||
{
|
||||
uint gid = get_global_id(0);
|
||||
(ptr + dstSshOffset)[gid] = value;
|
||||
uint dstIndex = get_global_id(0);
|
||||
__global uchar* pDst = (__global uchar*)ptr + dstSshOffset;
|
||||
pDst[dstIndex] = value;
|
||||
}
|
||||
|
||||
__kernel void FillBufferSSHOffset(
|
@ -156,21 +156,13 @@ __kernel void FillBufferRightLeftover(
|
||||
}
|
||||
|
||||
__kernel void FillBufferImmediate(
|
||||
__global uint4* ptr,
|
||||
uint dstSshOffset, // Offset needed in case ptr has been adjusted for SSH alignment
|
||||
const uint value)
|
||||
{
|
||||
uint gid = get_global_id(0);
|
||||
(ptr + dstSshOffset)[gid] = value;
|
||||
}
|
||||
|
||||
__kernel void FillBufferImmediateRightLeftOver(
|
||||
__global uchar* ptr,
|
||||
uint dstSshOffset, // Offset needed in case ptr has been adjusted for SSH alignment
|
||||
const uint value)
|
||||
{
|
||||
uint gid = get_global_id(0);
|
||||
(ptr + dstSshOffset)[gid] = value;
|
||||
uint dstIndex = get_global_id(0);
|
||||
__global uchar* pDst = (__global uchar*)ptr + dstSshOffset;
|
||||
pDst[dstIndex] = value;
|
||||
}
|
||||
|
||||
__kernel void FillBufferSSHOffset(
|
||||
|
@ -156,21 +156,13 @@ __kernel void FillBufferRightLeftover(
|
||||
}
|
||||
|
||||
__kernel void FillBufferImmediate(
|
||||
__global uint4* ptr,
|
||||
ulong dstSshOffset, // Offset needed in case ptr has been adjusted for SSH alignment
|
||||
const uint value)
|
||||
{
|
||||
uint gid = get_global_id(0);
|
||||
(ptr + dstSshOffset)[gid] = value;
|
||||
}
|
||||
|
||||
__kernel void FillBufferImmediateRightLeftOver(
|
||||
__global uchar* ptr,
|
||||
ulong dstSshOffset, // Offset needed in case ptr has been adjusted for SSH alignment
|
||||
const uint value)
|
||||
{
|
||||
uint gid = get_global_id(0);
|
||||
(ptr + dstSshOffset)[gid] = value;
|
||||
size_t dstIndex = get_global_id(0);
|
||||
__global uchar* pDst = (__global uchar*)ptr + dstSshOffset;
|
||||
pDst[dstIndex] = value;
|
||||
}
|
||||
|
||||
__kernel void FillBufferSSHOffset(
|
||||
|
@ -156,21 +156,13 @@ __kernel void FillBufferRightLeftover(
|
||||
}
|
||||
|
||||
__kernel void FillBufferImmediate(
|
||||
__global uint4* ptr,
|
||||
uint dstSshOffset, // Offset needed in case ptr has been adjusted for SSH alignment
|
||||
const uint value)
|
||||
{
|
||||
uint gid = get_global_id(0);
|
||||
(ptr + dstSshOffset)[gid] = value;
|
||||
}
|
||||
|
||||
__kernel void FillBufferImmediateRightLeftOver(
|
||||
__global uchar* ptr,
|
||||
uint dstSshOffset, // Offset needed in case ptr has been adjusted for SSH alignment
|
||||
const uint value)
|
||||
{
|
||||
uint gid = get_global_id(0);
|
||||
(ptr + dstSshOffset)[gid] = value;
|
||||
uint dstIndex = get_global_id(0);
|
||||
__global uchar* pDst = (__global uchar*)ptr + dstSshOffset;
|
||||
pDst[dstIndex] = value;
|
||||
}
|
||||
|
||||
__kernel void FillBufferSSHOffset(
|
||||
|
Reference in New Issue
Block a user