mirror of
https://github.com/intel/compute-runtime.git
synced 2025-12-19 06:24:51 +08:00
feature: Add support for BUFFER_SIZE explicit argument
Related-To: NEO-13972 Signed-off-by: Vysochyn, Illia <illia.vysochyn@intel.com>
This commit is contained in:
committed by
Compute-Runtime-Automation
parent
8978ea5e5a
commit
146a7f7b9b
@@ -644,6 +644,8 @@ ze_result_t KernelImp::setArgRedescribedImage(uint32_t argIndex, ze_image_handle
|
||||
ze_result_t KernelImp::setArgBufferWithAlloc(uint32_t argIndex, uintptr_t argVal, NEO::GraphicsAllocation *allocation, NEO::SvmAllocationData *peerAllocData) {
|
||||
const auto &arg = kernelImmData->getDescriptor().payloadMappings.explicitArgs[argIndex].as<NEO::ArgDescPointer>();
|
||||
const auto val = argVal;
|
||||
const int64_t bufferSize = static_cast<int64_t>(allocation->getUnderlyingBufferSize() - (ptrDiff(argVal, allocation->getGpuAddress())));
|
||||
NEO::patchNonPointer<int64_t, int64_t>(ArrayRef<uint8_t>(crossThreadData.get(), crossThreadDataSize), arg.bufferSize, bufferSize);
|
||||
|
||||
NEO::patchPointer(ArrayRef<uint8_t>(crossThreadData.get(), crossThreadDataSize), arg, val);
|
||||
if (NEO::isValidOffset(arg.bindful) || NEO::isValidOffset(arg.bindless)) {
|
||||
@@ -719,6 +721,7 @@ ze_result_t KernelImp::setArgBuffer(uint32_t argIndex, size_t argSize, const voi
|
||||
const auto &allArgs = kernelImmData->getDescriptor().payloadMappings.explicitArgs;
|
||||
const auto &currArg = allArgs[argIndex];
|
||||
if (currArg.getTraits().getAddressQualifier() == NEO::KernelArgMetadata::AddrLocal) {
|
||||
NEO::patchNonPointer<int64_t, int64_t>(ArrayRef<uint8_t>(crossThreadData.get(), crossThreadDataSize), currArg.as<NEO::ArgDescPointer>().bufferSize, static_cast<int64_t>(argSize));
|
||||
slmArgSizes[argIndex] = static_cast<uint32_t>(argSize);
|
||||
kernelArgInfos[argIndex] = KernelArgInfo{nullptr, 0, 0, false};
|
||||
UNRECOVERABLE_IF(NEO::isUndefinedOffset(currArg.as<NEO::ArgDescPointer>().slmOffset));
|
||||
@@ -777,6 +780,7 @@ ze_result_t KernelImp::setArgBuffer(uint32_t argIndex, size_t argSize, const voi
|
||||
argumentsResidencyContainer[argIndex] = nullptr;
|
||||
const auto &argAsPtr = kernelImmData->getDescriptor().payloadMappings.explicitArgs[argIndex].as<NEO::ArgDescPointer>();
|
||||
auto patchLocation = ptrOffset(getCrossThreadData(), argAsPtr.stateless);
|
||||
NEO::patchNonPointer<int64_t, int64_t>(ArrayRef<uint8_t>(crossThreadData.get(), crossThreadDataSize), argAsPtr.bufferSize, 0);
|
||||
patchWithRequiredSize(const_cast<uint8_t *>(patchLocation), argAsPtr.pointerSize, reinterpret_cast<uintptr_t>(requestedAddress));
|
||||
kernelArgInfos[argIndex] = KernelArgInfo{requestedAddress, 0, 0, false};
|
||||
return ZE_RESULT_SUCCESS;
|
||||
|
||||
@@ -1527,6 +1527,7 @@ cl_int Kernel::setArgBuffer(uint32_t argIndex,
|
||||
|
||||
const auto &arg = kernelInfo.kernelDescriptor.payloadMappings.explicitArgs[argIndex];
|
||||
const auto &argAsPtr = arg.as<ArgDescPointer>();
|
||||
patch<int64_t, int64_t>(0, crossThreadData, argAsPtr.bufferSize);
|
||||
|
||||
if (clMem && *clMem) {
|
||||
auto clMemObj = *clMem;
|
||||
@@ -1539,6 +1540,8 @@ cl_int Kernel::setArgBuffer(uint32_t argIndex,
|
||||
return CL_INVALID_MEM_OBJECT;
|
||||
}
|
||||
|
||||
patch<int64_t, int64_t>(static_cast<int64_t>(buffer->getSize()), getCrossThreadData(), argAsPtr.bufferSize);
|
||||
|
||||
auto gfxAllocationType = buffer->getGraphicsAllocation(rootDeviceIndex)->getAllocationType();
|
||||
if (!isBuiltIn) {
|
||||
this->anyKernelArgumentUsingSystemMemory |= Kernel::graphicsAllocationTypeUseSystemMemory(gfxAllocationType);
|
||||
|
||||
@@ -134,6 +134,7 @@ inline constexpr ConstStringRef regionGroupDimension("region_group_dimension");
|
||||
inline constexpr ConstStringRef regionGroupWgCount("region_group_wg_count");
|
||||
inline constexpr ConstStringRef regionGroupBarrierBuffer("region_group_barrier_buffer");
|
||||
inline constexpr ConstStringRef inlineSampler("inline_sampler");
|
||||
inline constexpr ConstStringRef bufferSize("buffer_size");
|
||||
|
||||
namespace Image {
|
||||
inline constexpr ConstStringRef width("image_width");
|
||||
@@ -540,6 +541,7 @@ enum ArgType : uint8_t {
|
||||
argTypeRegionGroupWgCount,
|
||||
argTypeRegionGroupBarrierBuffer,
|
||||
argTypeInlineSampler,
|
||||
argTypeBufferSize,
|
||||
argTypeMax
|
||||
};
|
||||
|
||||
|
||||
@@ -1487,6 +1487,9 @@ DecodeError populateKernelPayloadArgument(NEO::KernelDescriptor &dst, const Kern
|
||||
|
||||
case Types::Kernel::argTypeInlineSampler:
|
||||
return populateInlineSampler(dst, Tags::Kernel::PayloadArgument::ArgType::inlineSampler);
|
||||
|
||||
case Types::Kernel::argTypeBufferSize:
|
||||
return populateWithOffsetChecked(dst.payloadMappings.explicitArgs[src.argIndex].as<ArgDescPointer>(true).bufferSize, sizeof(int64_t), Tags::Kernel::PayloadArgument::ArgType::bufferSize);
|
||||
}
|
||||
|
||||
UNREACHABLE();
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) 2022-2024 Intel Corporation
|
||||
* Copyright (C) 2022-2025 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
@@ -22,7 +22,7 @@ using namespace Tags::Kernel::PayloadArgument::ArgType::Sampler::Vme;
|
||||
using ArgType = Types::Kernel::ArgType;
|
||||
|
||||
inline constexpr ConstStringRef name = "argument type";
|
||||
inline constexpr LookupArray<ConstStringRef, ArgType, 46> lookup({{
|
||||
inline constexpr LookupArray<ConstStringRef, ArgType, 47> lookup({{
|
||||
{packedLocalIds, ArgType::argTypePackedLocalIds},
|
||||
{localId, ArgType::argTypeLocalId},
|
||||
{localSize, ArgType::argTypeLocalSize},
|
||||
@@ -69,6 +69,7 @@ inline constexpr LookupArray<ConstStringRef, ArgType, 46> lookup({{
|
||||
{regionGroupWgCount, ArgType::argTypeRegionGroupWgCount},
|
||||
{regionGroupBarrierBuffer, ArgType::argTypeRegionGroupBarrierBuffer},
|
||||
{inlineSampler, ArgType::argTypeInlineSampler},
|
||||
{bufferSize, ArgType::argTypeBufferSize},
|
||||
}});
|
||||
static_assert(lookup.size() == ArgType::argTypeMax - 1, "Every enum field must be present");
|
||||
} // namespace ArgType
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) 2020-2024 Intel Corporation
|
||||
* Copyright (C) 2020-2025 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
@@ -40,6 +40,7 @@ struct ArgDescPointer final {
|
||||
CrossThreadDataOffset stateless = undefined<CrossThreadDataOffset>;
|
||||
CrossThreadDataOffset bindless = undefined<CrossThreadDataOffset>;
|
||||
CrossThreadDataOffset bufferOffset = undefined<CrossThreadDataOffset>;
|
||||
CrossThreadDataOffset bufferSize = undefined<CrossThreadDataOffset>;
|
||||
CrossThreadDataOffset slmOffset = undefined<CrossThreadDataOffset>;
|
||||
uint8_t requiredSlmAlignment = 0;
|
||||
uint8_t pointerSize = 0;
|
||||
|
||||
@@ -5356,6 +5356,27 @@ TEST_F(decodeZeInfoKernelEntryTest, GivenArgTypeWorkDimensionsWhenSizeIsValidThe
|
||||
EXPECT_EQ(32U, kernelDescriptor->payloadMappings.dispatchTraits.workDim);
|
||||
}
|
||||
|
||||
TEST_F(decodeZeInfoKernelEntryTest, GivenArgTypeBufferSizeWhenSizeIsValidThenPopulatesKernelDescriptor) {
|
||||
ConstStringRef zeinfo = R"===(
|
||||
kernels:
|
||||
- name : some_kernel
|
||||
execution_env:
|
||||
simd_size: 32
|
||||
payload_arguments:
|
||||
- arg_type: buffer_size
|
||||
offset: 32
|
||||
size: 8
|
||||
arg_index: 0
|
||||
)===";
|
||||
auto err = decodeZeInfoKernelEntry(zeinfo);
|
||||
EXPECT_EQ(NEO::DecodeError::success, err);
|
||||
EXPECT_TRUE(errors.empty()) << errors;
|
||||
EXPECT_TRUE(warnings.empty()) << warnings;
|
||||
ASSERT_EQ(1U, kernelDescriptor->payloadMappings.explicitArgs.size());
|
||||
const auto &arg = kernelDescriptor->payloadMappings.explicitArgs[0].as<ArgDescPointer>();
|
||||
EXPECT_EQ(32, arg.bufferSize);
|
||||
}
|
||||
|
||||
TEST_F(decodeZeInfoKernelEntryTest, GivenArgTypeImplicitArgBufferWhenPopulatingKernelDescriptorThenProperOffsetIsSetAndImplicitArgsAreRequired) {
|
||||
ConstStringRef zeinfo = R"===(
|
||||
kernels:
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) 2020-2023 Intel Corporation
|
||||
* Copyright (C) 2020-2025 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
@@ -57,6 +57,7 @@ TEST(ArgDescPointer, WhenDefaultInitializedThenOffsetsAreUndefined) {
|
||||
EXPECT_TRUE(NEO::isUndefinedOffset(argPtr.stateless));
|
||||
EXPECT_TRUE(NEO::isUndefinedOffset(argPtr.bindless));
|
||||
EXPECT_TRUE(NEO::isUndefinedOffset(argPtr.bufferOffset));
|
||||
EXPECT_TRUE(NEO::isUndefinedOffset(argPtr.bufferSize));
|
||||
EXPECT_TRUE(NEO::isUndefinedOffset(argPtr.slmOffset));
|
||||
|
||||
EXPECT_EQ(0U, argPtr.requiredSlmAlignment);
|
||||
@@ -331,6 +332,7 @@ TEST(ArgDescriptorCopyAssign, GivenPointerArgWhenCopyAssignedThenCopiesDataBased
|
||||
argPointer.stateless = 3;
|
||||
argPointer.bindless = 5;
|
||||
argPointer.bufferOffset = 7;
|
||||
argPointer.bufferSize = 9;
|
||||
argPointer.slmOffset = 11;
|
||||
argPointer.requiredSlmAlignment = 13;
|
||||
argPointer.pointerSize = 17;
|
||||
@@ -342,6 +344,7 @@ TEST(ArgDescriptorCopyAssign, GivenPointerArgWhenCopyAssignedThenCopiesDataBased
|
||||
EXPECT_EQ(argPointer.stateless, arg2.as<NEO::ArgDescPointer>().stateless);
|
||||
EXPECT_EQ(argPointer.bindless, arg2.as<NEO::ArgDescPointer>().bindless);
|
||||
EXPECT_EQ(argPointer.bufferOffset, arg2.as<NEO::ArgDescPointer>().bufferOffset);
|
||||
EXPECT_EQ(argPointer.bufferSize, arg2.as<NEO::ArgDescPointer>().bufferSize);
|
||||
EXPECT_EQ(argPointer.slmOffset, arg2.as<NEO::ArgDescPointer>().slmOffset);
|
||||
EXPECT_EQ(argPointer.requiredSlmAlignment, arg2.as<NEO::ArgDescPointer>().requiredSlmAlignment);
|
||||
EXPECT_EQ(argPointer.pointerSize, arg2.as<NEO::ArgDescPointer>().pointerSize);
|
||||
@@ -521,4 +524,4 @@ TEST(PatchPointer, Given64bitPointerSizeThenPatchesAll64bits) {
|
||||
alignas(8) uint8_t expected[64];
|
||||
memset(expected, 3, sizeof(expected));
|
||||
*reinterpret_cast<uint64_t *>(expected) = static_cast<uint64_t>(ptrValue);
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user