feature: Add support for BUFFER_SIZE explicit argument

Related-To: NEO-13972

Signed-off-by: Vysochyn, Illia <illia.vysochyn@intel.com>
This commit is contained in:
Vysochyn, Illia
2025-04-07 12:28:48 +00:00
committed by Compute-Runtime-Automation
parent 8978ea5e5a
commit 146a7f7b9b
8 changed files with 43 additions and 5 deletions

View File

@@ -644,6 +644,8 @@ ze_result_t KernelImp::setArgRedescribedImage(uint32_t argIndex, ze_image_handle
ze_result_t KernelImp::setArgBufferWithAlloc(uint32_t argIndex, uintptr_t argVal, NEO::GraphicsAllocation *allocation, NEO::SvmAllocationData *peerAllocData) {
const auto &arg = kernelImmData->getDescriptor().payloadMappings.explicitArgs[argIndex].as<NEO::ArgDescPointer>();
const auto val = argVal;
const int64_t bufferSize = static_cast<int64_t>(allocation->getUnderlyingBufferSize() - (ptrDiff(argVal, allocation->getGpuAddress())));
NEO::patchNonPointer<int64_t, int64_t>(ArrayRef<uint8_t>(crossThreadData.get(), crossThreadDataSize), arg.bufferSize, bufferSize);
NEO::patchPointer(ArrayRef<uint8_t>(crossThreadData.get(), crossThreadDataSize), arg, val);
if (NEO::isValidOffset(arg.bindful) || NEO::isValidOffset(arg.bindless)) {
@@ -719,6 +721,7 @@ ze_result_t KernelImp::setArgBuffer(uint32_t argIndex, size_t argSize, const voi
const auto &allArgs = kernelImmData->getDescriptor().payloadMappings.explicitArgs;
const auto &currArg = allArgs[argIndex];
if (currArg.getTraits().getAddressQualifier() == NEO::KernelArgMetadata::AddrLocal) {
NEO::patchNonPointer<int64_t, int64_t>(ArrayRef<uint8_t>(crossThreadData.get(), crossThreadDataSize), currArg.as<NEO::ArgDescPointer>().bufferSize, static_cast<int64_t>(argSize));
slmArgSizes[argIndex] = static_cast<uint32_t>(argSize);
kernelArgInfos[argIndex] = KernelArgInfo{nullptr, 0, 0, false};
UNRECOVERABLE_IF(NEO::isUndefinedOffset(currArg.as<NEO::ArgDescPointer>().slmOffset));
@@ -777,6 +780,7 @@ ze_result_t KernelImp::setArgBuffer(uint32_t argIndex, size_t argSize, const voi
argumentsResidencyContainer[argIndex] = nullptr;
const auto &argAsPtr = kernelImmData->getDescriptor().payloadMappings.explicitArgs[argIndex].as<NEO::ArgDescPointer>();
auto patchLocation = ptrOffset(getCrossThreadData(), argAsPtr.stateless);
NEO::patchNonPointer<int64_t, int64_t>(ArrayRef<uint8_t>(crossThreadData.get(), crossThreadDataSize), argAsPtr.bufferSize, 0);
patchWithRequiredSize(const_cast<uint8_t *>(patchLocation), argAsPtr.pointerSize, reinterpret_cast<uintptr_t>(requestedAddress));
kernelArgInfos[argIndex] = KernelArgInfo{requestedAddress, 0, 0, false};
return ZE_RESULT_SUCCESS;

View File

@@ -1527,6 +1527,7 @@ cl_int Kernel::setArgBuffer(uint32_t argIndex,
const auto &arg = kernelInfo.kernelDescriptor.payloadMappings.explicitArgs[argIndex];
const auto &argAsPtr = arg.as<ArgDescPointer>();
patch<int64_t, int64_t>(0, crossThreadData, argAsPtr.bufferSize);
if (clMem && *clMem) {
auto clMemObj = *clMem;
@@ -1539,6 +1540,8 @@ cl_int Kernel::setArgBuffer(uint32_t argIndex,
return CL_INVALID_MEM_OBJECT;
}
patch<int64_t, int64_t>(static_cast<int64_t>(buffer->getSize()), getCrossThreadData(), argAsPtr.bufferSize);
auto gfxAllocationType = buffer->getGraphicsAllocation(rootDeviceIndex)->getAllocationType();
if (!isBuiltIn) {
this->anyKernelArgumentUsingSystemMemory |= Kernel::graphicsAllocationTypeUseSystemMemory(gfxAllocationType);

View File

@@ -134,6 +134,7 @@ inline constexpr ConstStringRef regionGroupDimension("region_group_dimension");
inline constexpr ConstStringRef regionGroupWgCount("region_group_wg_count");
inline constexpr ConstStringRef regionGroupBarrierBuffer("region_group_barrier_buffer");
inline constexpr ConstStringRef inlineSampler("inline_sampler");
inline constexpr ConstStringRef bufferSize("buffer_size");
namespace Image {
inline constexpr ConstStringRef width("image_width");
@@ -540,6 +541,7 @@ enum ArgType : uint8_t {
argTypeRegionGroupWgCount,
argTypeRegionGroupBarrierBuffer,
argTypeInlineSampler,
argTypeBufferSize,
argTypeMax
};

View File

@@ -1487,6 +1487,9 @@ DecodeError populateKernelPayloadArgument(NEO::KernelDescriptor &dst, const Kern
case Types::Kernel::argTypeInlineSampler:
return populateInlineSampler(dst, Tags::Kernel::PayloadArgument::ArgType::inlineSampler);
case Types::Kernel::argTypeBufferSize:
return populateWithOffsetChecked(dst.payloadMappings.explicitArgs[src.argIndex].as<ArgDescPointer>(true).bufferSize, sizeof(int64_t), Tags::Kernel::PayloadArgument::ArgType::bufferSize);
}
UNREACHABLE();

View File

@@ -1,5 +1,5 @@
/*
* Copyright (C) 2022-2024 Intel Corporation
* Copyright (C) 2022-2025 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@@ -22,7 +22,7 @@ using namespace Tags::Kernel::PayloadArgument::ArgType::Sampler::Vme;
using ArgType = Types::Kernel::ArgType;
inline constexpr ConstStringRef name = "argument type";
inline constexpr LookupArray<ConstStringRef, ArgType, 46> lookup({{
inline constexpr LookupArray<ConstStringRef, ArgType, 47> lookup({{
{packedLocalIds, ArgType::argTypePackedLocalIds},
{localId, ArgType::argTypeLocalId},
{localSize, ArgType::argTypeLocalSize},
@@ -69,6 +69,7 @@ inline constexpr LookupArray<ConstStringRef, ArgType, 46> lookup({{
{regionGroupWgCount, ArgType::argTypeRegionGroupWgCount},
{regionGroupBarrierBuffer, ArgType::argTypeRegionGroupBarrierBuffer},
{inlineSampler, ArgType::argTypeInlineSampler},
{bufferSize, ArgType::argTypeBufferSize},
}});
static_assert(lookup.size() == ArgType::argTypeMax - 1, "Every enum field must be present");
} // namespace ArgType

View File

@@ -1,5 +1,5 @@
/*
* Copyright (C) 2020-2024 Intel Corporation
* Copyright (C) 2020-2025 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@@ -40,6 +40,7 @@ struct ArgDescPointer final {
CrossThreadDataOffset stateless = undefined<CrossThreadDataOffset>;
CrossThreadDataOffset bindless = undefined<CrossThreadDataOffset>;
CrossThreadDataOffset bufferOffset = undefined<CrossThreadDataOffset>;
CrossThreadDataOffset bufferSize = undefined<CrossThreadDataOffset>;
CrossThreadDataOffset slmOffset = undefined<CrossThreadDataOffset>;
uint8_t requiredSlmAlignment = 0;
uint8_t pointerSize = 0;

View File

@@ -5356,6 +5356,27 @@ TEST_F(decodeZeInfoKernelEntryTest, GivenArgTypeWorkDimensionsWhenSizeIsValidThe
EXPECT_EQ(32U, kernelDescriptor->payloadMappings.dispatchTraits.workDim);
}
TEST_F(decodeZeInfoKernelEntryTest, GivenArgTypeBufferSizeWhenSizeIsValidThenPopulatesKernelDescriptor) {
ConstStringRef zeinfo = R"===(
kernels:
- name : some_kernel
execution_env:
simd_size: 32
payload_arguments:
- arg_type: buffer_size
offset: 32
size: 8
arg_index: 0
)===";
auto err = decodeZeInfoKernelEntry(zeinfo);
EXPECT_EQ(NEO::DecodeError::success, err);
EXPECT_TRUE(errors.empty()) << errors;
EXPECT_TRUE(warnings.empty()) << warnings;
ASSERT_EQ(1U, kernelDescriptor->payloadMappings.explicitArgs.size());
const auto &arg = kernelDescriptor->payloadMappings.explicitArgs[0].as<ArgDescPointer>();
EXPECT_EQ(32, arg.bufferSize);
}
TEST_F(decodeZeInfoKernelEntryTest, GivenArgTypeImplicitArgBufferWhenPopulatingKernelDescriptorThenProperOffsetIsSetAndImplicitArgsAreRequired) {
ConstStringRef zeinfo = R"===(
kernels:

View File

@@ -1,5 +1,5 @@
/*
* Copyright (C) 2020-2023 Intel Corporation
* Copyright (C) 2020-2025 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@@ -57,6 +57,7 @@ TEST(ArgDescPointer, WhenDefaultInitializedThenOffsetsAreUndefined) {
EXPECT_TRUE(NEO::isUndefinedOffset(argPtr.stateless));
EXPECT_TRUE(NEO::isUndefinedOffset(argPtr.bindless));
EXPECT_TRUE(NEO::isUndefinedOffset(argPtr.bufferOffset));
EXPECT_TRUE(NEO::isUndefinedOffset(argPtr.bufferSize));
EXPECT_TRUE(NEO::isUndefinedOffset(argPtr.slmOffset));
EXPECT_EQ(0U, argPtr.requiredSlmAlignment);
@@ -331,6 +332,7 @@ TEST(ArgDescriptorCopyAssign, GivenPointerArgWhenCopyAssignedThenCopiesDataBased
argPointer.stateless = 3;
argPointer.bindless = 5;
argPointer.bufferOffset = 7;
argPointer.bufferSize = 9;
argPointer.slmOffset = 11;
argPointer.requiredSlmAlignment = 13;
argPointer.pointerSize = 17;
@@ -342,6 +344,7 @@ TEST(ArgDescriptorCopyAssign, GivenPointerArgWhenCopyAssignedThenCopiesDataBased
EXPECT_EQ(argPointer.stateless, arg2.as<NEO::ArgDescPointer>().stateless);
EXPECT_EQ(argPointer.bindless, arg2.as<NEO::ArgDescPointer>().bindless);
EXPECT_EQ(argPointer.bufferOffset, arg2.as<NEO::ArgDescPointer>().bufferOffset);
EXPECT_EQ(argPointer.bufferSize, arg2.as<NEO::ArgDescPointer>().bufferSize);
EXPECT_EQ(argPointer.slmOffset, arg2.as<NEO::ArgDescPointer>().slmOffset);
EXPECT_EQ(argPointer.requiredSlmAlignment, arg2.as<NEO::ArgDescPointer>().requiredSlmAlignment);
EXPECT_EQ(argPointer.pointerSize, arg2.as<NEO::ArgDescPointer>().pointerSize);
@@ -521,4 +524,4 @@ TEST(PatchPointer, Given64bitPointerSizeThenPatchesAll64bits) {
alignas(8) uint8_t expected[64];
memset(expected, 3, sizeof(expected));
*reinterpret_cast<uint64_t *>(expected) = static_cast<uint64_t>(ptrValue);
}
}