Revert "Use uint4 type instead of char in builtin kernel"

This reverts commit a39bc7e7b3.

Signed-off-by: Compute-Runtime-Validation <compute-runtime-validation@intel.com>
This commit is contained in:
Compute-Runtime-Validation
2022-09-01 05:04:43 +02:00
committed by Compute-Runtime-Automation
parent 6e5c0141b5
commit 84f19a1b93
15 changed files with 44 additions and 197 deletions

View File

@ -29,8 +29,6 @@ enum class Builtin : uint32_t {
CopyBufferToBufferSideStateless,
FillBufferImmediate,
FillBufferImmediateStateless,
FillBufferImmediateRightLeftOver,
FillBufferImmediateRightLeftOverStateless,
FillBufferSSHOffset,
FillBufferSSHOffsetStateless,
FillBufferMiddle,

View File

@ -79,14 +79,6 @@ void BuiltinFunctionsLibImpl::initBuiltinKernel(Builtin func) {
builtinName = "FillBufferImmediate";
builtin = NEO::EBuiltInOps::FillBufferStateless;
break;
case Builtin::FillBufferImmediateRightLeftOver:
builtinName = "FillBufferImmediateRightLeftOver";
builtin = NEO::EBuiltInOps::FillBuffer;
break;
case Builtin::FillBufferImmediateRightLeftOverStateless:
builtinName = "FillBufferImmediateRightLeftOver";
builtin = NEO::EBuiltInOps::FillBufferStateless;
break;
case Builtin::FillBufferSSHOffset:
builtinName = "FillBufferSSHOffset";
builtin = NEO::EBuiltInOps::FillBuffer;

View File

@ -1533,51 +1533,37 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryFill(void *ptr,
} else {
builtinFunction = device->getBuiltinFunctionsLib()->getFunction(Builtin::FillBufferImmediate);
}
const auto dataTypeSize = sizeof(uint32_t) * 4;
size_t adjustedSize = std::max(static_cast<size_t>(1u), size / (dataTypeSize));
size_t groupSizeX = device->getDeviceInfo().maxWorkGroupSize;
if (groupSizeX > adjustedSize) {
groupSizeX = adjustedSize;
uint32_t groupSizeX = builtinFunction->getImmutableData()->getDescriptor().kernelAttributes.simdSize;
if (groupSizeX > static_cast<uint32_t>(size)) {
groupSizeX = static_cast<uint32_t>(size);
}
if (builtinFunction->setGroupSize(static_cast<uint32_t>(groupSizeX), 1u, 1u)) {
if (builtinFunction->setGroupSize(groupSizeX, 1u, 1u)) {
DEBUG_BREAK_IF(true);
return ZE_RESULT_ERROR_UNKNOWN;
}
size_t groups = adjustedSize / groupSizeX;
size_t remainingBytes = static_cast<size_t>((adjustedSize % groupSizeX) * dataTypeSize +
size % dataTypeSize);
ze_group_count_t dispatchFuncArgs{static_cast<uint32_t>(groups), 1u, 1u};
uint32_t value = 0;
memset(&value, *reinterpret_cast<const unsigned char *>(pattern), 4);
uint32_t value = *(reinterpret_cast<const unsigned char *>(pattern));
builtinFunction->setArgBufferWithAlloc(0, dstAllocation.alignedAllocationPtr, dstAllocation.alloc);
builtinFunction->setArgumentValue(1, sizeof(dstAllocation.offset), &dstAllocation.offset);
builtinFunction->setArgumentValue(2, sizeof(value), &value);
appendEventForProfilingAllWalkers(signalEvent, true);
uint32_t groups = static_cast<uint32_t>(size) / groupSizeX;
ze_group_count_t dispatchFuncArgs{groups, 1u, 1u};
res = appendLaunchKernelSplit(builtinFunction, &dispatchFuncArgs, signalEvent, launchParams);
if (res) {
return res;
}
if (remainingBytes) {
if (isStateless) {
builtinFunction = device->getBuiltinFunctionsLib()->getFunction(Builtin::FillBufferImmediateRightLeftOverStateless);
} else {
builtinFunction = device->getBuiltinFunctionsLib()->getFunction(Builtin::FillBufferImmediateRightLeftOver);
}
uint32_t groupSizeY = 1, groupSizeZ = 1;
uint32_t groupSizeX = static_cast<uint32_t>(remainingBytes);
builtinFunction->suggestGroupSize(groupSizeX, groupSizeY, groupSizeZ, &groupSizeX, &groupSizeY, &groupSizeZ);
builtinFunction->setGroupSize(groupSizeX, groupSizeY, groupSizeZ);
ze_group_count_t dispatchFuncRemainderArgs{static_cast<uint32_t>(remainingBytes / groupSizeX), 1u, 1u};
size_t dstOffset = dstAllocation.offset + (size - remainingBytes);
value = *(reinterpret_cast<const unsigned char *>(pattern));
uint32_t groupRemainderSizeX = static_cast<uint32_t>(size) % groupSizeX;
if (groupRemainderSizeX) {
builtinFunction->setGroupSize(groupRemainderSizeX, 1u, 1u);
ze_group_count_t dispatchFuncRemainderArgs{1u, 1u, 1u};
size_t dstOffset = dstAllocation.offset + (size - groupRemainderSizeX);
builtinFunction->setArgBufferWithAlloc(0, dstAllocation.alignedAllocationPtr, dstAllocation.alloc);
builtinFunction->setArgumentValue(1, sizeof(dstOffset), &dstOffset);
builtinFunction->setArgumentValue(2, sizeof(value), &value);
res = appendLaunchKernelSplit(builtinFunction, &dispatchFuncRemainderArgs, signalEvent, launchParams);
if (res) {
@ -1592,6 +1578,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryFill(void *ptr,
} else {
builtinFunction = device->getBuiltinFunctionsLib()->getFunction(Builtin::FillBufferMiddle);
}
size_t middleElSize = sizeof(uint32_t);
size_t adjustedSize = size / middleElSize;
uint32_t groupSizeX = static_cast<uint32_t>(adjustedSize);

View File

@ -53,18 +53,14 @@ class AppendFillFixture : public DeviceFixture {
if (numberOfCallsToAppendLaunchKernelWithParams == thresholdOfCallsToAppendLaunchKernelWithParamsToFail) {
return ZE_RESULT_ERROR_UNKNOWN;
}
if (numberOfCallsToAppendLaunchKernelWithParams < 2) {
threadGroupDimensions[numberOfCallsToAppendLaunchKernelWithParams] = *pThreadGroupDimensions;
xGroupSizes[numberOfCallsToAppendLaunchKernelWithParams] = kernel->getGroupSize()[0];
}
numberOfCallsToAppendLaunchKernelWithParams++;
return CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelWithParams(kernel,
pThreadGroupDimensions,
event,
launchParams);
}
ze_group_count_t threadGroupDimensions[2];
uint32_t xGroupSizes[2];
uint32_t thresholdOfCallsToAppendLaunchKernelWithParamsToFail = std::numeric_limits<uint32_t>::max();
uint32_t numberOfCallsToAppendLaunchKernelWithParams = 0;
};
@ -81,7 +77,6 @@ class AppendFillFixture : public DeviceFixture {
driverHandle = std::make_unique<Mock<MockDriverFillHandle>>();
driverHandle->initialize(std::move(devices));
device = driverHandle->devices[0];
neoDevice->deviceInfo.maxWorkGroupSize = 256;
}
void tearDown() {
@ -194,75 +189,6 @@ HWTEST2_F(AppendFillTest,
EXPECT_EQ(patternAllocationsVectorSize + 1u, newPatternAllocationsVectorSize);
}
HWTEST2_F(AppendFillTest,
givenAppendMemoryFillWhenPatternSizeIsOneThenDispatchOneKernel, IsAtLeastSkl) {
using GfxFamily = typename NEO::GfxFamilyMapper<gfxCoreFamily>::GfxFamily;
auto commandList = std::make_unique<WhiteBox<MockCommandList<gfxCoreFamily>>>();
commandList->initialize(device, NEO::EngineGroupType::Compute, 0u);
int pattern = 0;
const size_t size = 1024 * 1024;
uint8_t *ptr = new uint8_t[size];
ze_result_t result = commandList->appendMemoryFill(ptr, &pattern, 1, size, nullptr, 0, nullptr);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
EXPECT_EQ(1u, commandList->numberOfCallsToAppendLaunchKernelWithParams);
EXPECT_EQ(size, commandList->xGroupSizes[0] * commandList->threadGroupDimensions[0].groupCountX * 16);
delete[] ptr;
}
HWTEST2_F(AppendFillTest,
givenAppendMemoryFillWithUnalignedSizeWhenPatternSizeIsOneThenDispatchTwoKernels, IsAtLeastSkl) {
using GfxFamily = typename NEO::GfxFamilyMapper<gfxCoreFamily>::GfxFamily;
auto commandList = std::make_unique<WhiteBox<MockCommandList<gfxCoreFamily>>>();
commandList->initialize(device, NEO::EngineGroupType::Compute, 0u);
int pattern = 0;
const size_t size = 1025;
uint8_t *ptr = new uint8_t[size];
ze_result_t result = commandList->appendMemoryFill(ptr, &pattern, 1, size, nullptr, 0, nullptr);
size_t filledSize = commandList->xGroupSizes[0] * commandList->threadGroupDimensions[0].groupCountX * 16;
filledSize += commandList->xGroupSizes[1] * commandList->threadGroupDimensions[1].groupCountX;
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
EXPECT_EQ(2u, commandList->numberOfCallsToAppendLaunchKernelWithParams);
EXPECT_EQ(size, filledSize);
delete[] ptr;
}
HWTEST2_F(AppendFillTest,
givenAppendMemoryFillWithSizeBelowMaxWorkgroupSizeWhenPatternSizeIsOneThenDispatchOneKernel, IsAtLeastSkl) {
using GfxFamily = typename NEO::GfxFamilyMapper<gfxCoreFamily>::GfxFamily;
auto commandList = std::make_unique<WhiteBox<MockCommandList<gfxCoreFamily>>>();
commandList->initialize(device, NEO::EngineGroupType::Compute, 0u);
int pattern = 0;
const size_t size = neoDevice->getDeviceInfo().maxWorkGroupSize / 2;
uint8_t *ptr = new uint8_t[size];
ze_result_t result = commandList->appendMemoryFill(ptr, &pattern, 1, size, nullptr, 0, nullptr);
size_t filledSize = commandList->xGroupSizes[0] * commandList->threadGroupDimensions[0].groupCountX * 16;
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
EXPECT_EQ(1u, commandList->numberOfCallsToAppendLaunchKernelWithParams);
EXPECT_EQ(size, filledSize);
delete[] ptr;
}
HWTEST2_F(AppendFillTest,
givenAppendMemoryFillWhenPatternSizeIsOneThenGroupCountIsCorrect, IsAtLeastSkl) {
using GfxFamily = typename NEO::GfxFamilyMapper<gfxCoreFamily>::GfxFamily;
auto commandList = std::make_unique<WhiteBox<MockCommandList<gfxCoreFamily>>>();
commandList->initialize(device, NEO::EngineGroupType::Compute, 0u);
int pattern = 0;
const size_t size = 1024 * 1024;
uint8_t *ptr = new uint8_t[size];
ze_result_t result = commandList->appendMemoryFill(ptr, &pattern, 1, size, nullptr, 0, nullptr);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
auto groupSize = device->getDeviceInfo().maxWorkGroupSize;
auto dataTypeSize = sizeof(uint32_t) * 4;
auto expectedGroupCount = size / (dataTypeSize * groupSize);
EXPECT_EQ(expectedGroupCount, commandList->threadGroupDimensions[0].groupCountX);
delete[] ptr;
}
HWTEST2_F(AppendFillTest,
givenCallToAppendMemoryFillWithSizeNotMultipleOfPatternSizeThenSuccessIsReturned, IsAtLeastSkl) {
using GfxFamily = typename NEO::GfxFamilyMapper<gfxCoreFamily>::GfxFamily;

View File

@ -38,7 +38,7 @@ components:
dest_dir: kernels_bin
type: git
branch: kernels_bin
revision: 1941-284
revision: 1941-282
kmdaf:
branch: kmdaf
dest_dir: kmdaf

View File

@ -48,21 +48,13 @@ __kernel void FillBufferRightLeftover(
}
__kernel void FillBufferImmediate(
__global uint4* ptr,
ulong dstSshOffset, // Offset needed in case ptr has been adjusted for SSH alignment
const uint value)
{
uint gid = get_global_id(0);
(ptr + dstSshOffset)[gid] = value;
}
__kernel void FillBufferImmediateRightLeftOver(
__global uchar* ptr,
ulong dstSshOffset, // Offset needed in case ptr has been adjusted for SSH alignment
uint dstSshOffset, // Offset needed in case ptr has been adjusted for SSH alignment
const uint value)
{
uint gid = get_global_id(0);
(ptr + dstSshOffset)[gid] = value;
uint dstIndex = get_global_id(0);
__global uchar* pDst = (__global uchar*)ptr + dstSshOffset;
pDst[dstIndex] = value;
}
__kernel void FillBufferSSHOffset(

View File

@ -48,21 +48,13 @@ __kernel void FillBufferRightLeftover(
}
__kernel void FillBufferImmediate(
__global uint4* ptr,
ulong dstSshOffset, // Offset needed in case ptr has been adjusted for SSH alignment
const uint value)
{
size_t gid = get_global_id(0);
(ptr + dstSshOffset)[gid] = value;
}
__kernel void FillBufferImmediateRightLeftOver(
__global uchar* ptr,
ulong dstSshOffset, // Offset needed in case ptr has been adjusted for SSH alignment
const uint value)
{
size_t gid = get_global_id(0);
(ptr + dstSshOffset)[gid] = value;
size_t dstIndex = get_global_id(0);
__global uchar* pDst = (__global uchar*)ptr + dstSshOffset;
pDst[dstIndex] = value;
}
__kernel void FillBufferSSHOffset(

View File

@ -7,5 +7,5 @@
#include "shared/test/common/helpers/kernel_binary_helper.h"
const std::string KernelBinaryHelper::BUILT_INS("11256751929276190336");
const std::string KernelBinaryHelper::BUILT_INS_WITH_IMAGES("13967618058110882853_images");
const std::string KernelBinaryHelper::BUILT_INS("7998916142903730155");
const std::string KernelBinaryHelper::BUILT_INS_WITH_IMAGES("16526264370178379440_images");

View File

@ -156,21 +156,13 @@ __kernel void FillBufferRightLeftover(
}
__kernel void FillBufferImmediate(
__global uint4* ptr,
ulong dstSshOffset, // Offset needed in case ptr has been adjusted for SSH alignment
const uint value)
{
uint gid = get_global_id(0);
(ptr + dstSshOffset)[gid] = value;
}
__kernel void FillBufferImmediateRightLeftOver(
__global uchar* ptr,
ulong dstSshOffset, // Offset needed in case ptr has been adjusted for SSH alignment
uint dstSshOffset, // Offset needed in case ptr has been adjusted for SSH alignment
const uint value)
{
uint gid = get_global_id(0);
(ptr + dstSshOffset)[gid] = value;
uint dstIndex = get_global_id(0);
__global uchar* pDst = (__global uchar*)ptr + dstSshOffset;
pDst[dstIndex] = value;
}
__kernel void FillBufferSSHOffset(

View File

@ -1,5 +1,5 @@
/*
* Copyright (C) 2022 Intel Corporation
* Copyright (C) 2020-2022 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@ -156,21 +156,13 @@ __kernel void FillBufferRightLeftover(
}
__kernel void FillBufferImmediate(
__global uint4* ptr,
ulong dstSshOffset, // Offset needed in case ptr has been adjusted for SSH alignment
const uint value)
{
uint gid = get_global_id(0);
(ptr + dstSshOffset)[gid] = value;
}
__kernel void FillBufferImmediateRightLeftOver(
__global uchar* ptr,
ulong dstSshOffset, // Offset needed in case ptr has been adjusted for SSH alignment
uint dstSshOffset, // Offset needed in case ptr has been adjusted for SSH alignment
const uint value)
{
uint gid = get_global_id(0);
(ptr + dstSshOffset)[gid] = value;
uint dstIndex = get_global_id(0);
__global uchar* pDst = (__global uchar*)ptr + dstSshOffset;
pDst[dstIndex] = value;
}
__kernel void FillBufferSSHOffset(

View File

@ -156,21 +156,13 @@ __kernel void FillBufferRightLeftover(
}
__kernel void FillBufferImmediate(
__global uint4* ptr,
uint dstSshOffset, // Offset needed in case ptr has been adjusted for SSH alignment
const uint value)
{
uint gid = get_global_id(0);
(ptr + dstSshOffset)[gid] = value;
}
__kernel void FillBufferImmediateRightLeftOver(
__global uchar* ptr,
uint dstSshOffset, // Offset needed in case ptr has been adjusted for SSH alignment
const uint value)
{
uint gid = get_global_id(0);
(ptr + dstSshOffset)[gid] = value;
uint dstIndex = get_global_id(0);
__global uchar* pDst = (__global uchar*)ptr + dstSshOffset;
pDst[dstIndex] = value;
}
__kernel void FillBufferSSHOffset(

View File

@ -156,21 +156,13 @@ __kernel void FillBufferRightLeftover(
}
__kernel void FillBufferImmediate(
__global uint4* ptr,
ulong dstSshOffset, // Offset needed in case ptr has been adjusted for SSH alignment
const uint value)
{
uint gid = get_global_id(0);
(ptr + dstSshOffset)[gid] = value;
}
__kernel void FillBufferImmediateRightLeftOver(
__global uchar* ptr,
ulong dstSshOffset, // Offset needed in case ptr has been adjusted for SSH alignment
const uint value)
{
uint gid = get_global_id(0);
(ptr + dstSshOffset)[gid] = value;
size_t dstIndex = get_global_id(0);
__global uchar* pDst = (__global uchar*)ptr + dstSshOffset;
pDst[dstIndex] = value;
}
__kernel void FillBufferSSHOffset(

View File

@ -156,21 +156,13 @@ __kernel void FillBufferRightLeftover(
}
__kernel void FillBufferImmediate(
__global uint4* ptr,
uint dstSshOffset, // Offset needed in case ptr has been adjusted for SSH alignment
const uint value)
{
uint gid = get_global_id(0);
(ptr + dstSshOffset)[gid] = value;
}
__kernel void FillBufferImmediateRightLeftOver(
__global uchar* ptr,
uint dstSshOffset, // Offset needed in case ptr has been adjusted for SSH alignment
const uint value)
{
uint gid = get_global_id(0);
(ptr + dstSshOffset)[gid] = value;
uint dstIndex = get_global_id(0);
__global uchar* pDst = (__global uchar*)ptr + dstSshOffset;
pDst[dstIndex] = value;
}
__kernel void FillBufferSSHOffset(