feature(zebin): add implicit_arg_buffer_used_by_code exec env support

Resolves: NEO-14667

Signed-off-by: Mateusz Hoppe <mateusz.hoppe@intel.com>
This commit is contained in:
Mateusz Hoppe 2025-05-23 15:50:51 +00:00 committed by Compute-Runtime-Automation
parent 542f47dfe6
commit c4bfe4e487
6 changed files with 94 additions and 3 deletions

View File

@ -4808,6 +4808,37 @@ TEST_F(ModuleTests, givenImplicitArgsRelocationAndNoDebuggerOrStackCallsWhenLink
EXPECT_FALSE(kernelInfo->kernelDescriptor.kernelAttributes.flags.requiresImplicitArgs); EXPECT_FALSE(kernelInfo->kernelDescriptor.kernelAttributes.flags.requiresImplicitArgs);
} }
TEST_F(ModuleTests, givenRequiredImplicitArgsInKernelAndNoDebuggerOrStackCallsWhenLinkingModuleThenImplicitArgsRequiredRemainSet) {
auto pModule = std::make_unique<Module>(device, nullptr, ModuleType::user);
EXPECT_EQ(nullptr, neoDevice->getDebugger());
char data[64]{};
auto kernelInfo = new KernelInfo();
kernelInfo->heapInfo.kernelHeapSize = 64;
kernelInfo->heapInfo.pKernelHeap = data;
std::unique_ptr<WhiteBox<::L0::KernelImmutableData>> kernelImmData{new WhiteBox<::L0::KernelImmutableData>(this->device)};
kernelImmData->setIsaPerKernelAllocation(pModule->allocateKernelsIsaMemory(kernelInfo->heapInfo.kernelHeapSize));
kernelImmData->initialize(kernelInfo, device, 0, nullptr, nullptr, false);
kernelImmData->kernelDescriptor->kernelAttributes.flags.useStackCalls = false;
auto isaCpuPtr = reinterpret_cast<char *>(kernelImmData->isaGraphicsAllocation->getUnderlyingBuffer());
pModule->kernelImmDatas.push_back(std::move(kernelImmData));
pModule->translationUnit->programInfo.kernelInfos.push_back(kernelInfo);
auto linkerInput = std::make_unique<::WhiteBox<NEO::LinkerInput>>();
linkerInput->traits.requiresPatchingOfInstructionSegments = true;
linkerInput->textRelocations.push_back({{implicitArgsRelocationSymbolName, 0x8, LinkerInput::RelocationInfo::Type::addressLow, SegmentType::instructions}});
pModule->translationUnit->programInfo.linkerInput = std::move(linkerInput);
kernelInfo->kernelDescriptor.kernelAttributes.flags.requiresImplicitArgs = true;
auto status = pModule->linkBinary();
EXPECT_TRUE(status);
EXPECT_NE(0u, *reinterpret_cast<uint32_t *>(ptrOffset(isaCpuPtr, 0x8)));
EXPECT_TRUE(kernelInfo->kernelDescriptor.kernelAttributes.flags.requiresImplicitArgs);
}
TEST_F(ModuleTests, givenModuleWithGlobalAndConstAllocationsWhenGettingModuleAllocationsThenAllAreReturned) { TEST_F(ModuleTests, givenModuleWithGlobalAndConstAllocationsWhenGettingModuleAllocationsThenAllAreReturned) {
std::unique_ptr<MockModule> module = std::make_unique<MockModule>(device, std::unique_ptr<MockModule> module = std::make_unique<MockModule>(device,
nullptr, nullptr,

View File

@ -657,7 +657,7 @@ void Linker::resolveImplicitArgs(const KernelDescriptorsT &kernelDescriptors, De
if (pImplicitArgsRelocs != pImplicitArgsRelocationAddresses.end()) { if (pImplicitArgsRelocs != pImplicitArgsRelocationAddresses.end()) {
for (const auto &pImplicitArgsReloc : pImplicitArgsRelocs->second) { for (const auto &pImplicitArgsReloc : pImplicitArgsRelocs->second) {
UNRECOVERABLE_IF(!pDevice); UNRECOVERABLE_IF(!pDevice);
kernelDescriptor.kernelAttributes.flags.requiresImplicitArgs = kernelDescriptor.kernelAttributes.flags.useStackCalls || pDevice->getDebugger() != nullptr; kernelDescriptor.kernelAttributes.flags.requiresImplicitArgs |= kernelDescriptor.kernelAttributes.flags.useStackCalls || pDevice->getDebugger() != nullptr;
if (kernelDescriptor.kernelAttributes.flags.requiresImplicitArgs) { if (kernelDescriptor.kernelAttributes.flags.requiresImplicitArgs) {
uint64_t implicitArgsSize = 0; uint64_t implicitArgsSize = 0;
if (pDevice->getGfxCoreHelper().getImplicitArgsVersion() == 0) { if (pDevice->getGfxCoreHelper().getImplicitArgsVersion() == 0) {

View File

@ -62,6 +62,8 @@ inline constexpr ConstStringRef workGroupWalkOrderDimensions("work_group_walk_or
inline constexpr ConstStringRef threadSchedulingMode("thread_scheduling_mode"); inline constexpr ConstStringRef threadSchedulingMode("thread_scheduling_mode");
inline constexpr ConstStringRef hasSample("has_sample"); inline constexpr ConstStringRef hasSample("has_sample");
inline constexpr ConstStringRef actualKernelStartOffset("actual_kernel_start_offset"); inline constexpr ConstStringRef actualKernelStartOffset("actual_kernel_start_offset");
inline constexpr ConstStringRef implicitArgBufferUsedByCode("implicit_arg_buffer_used_by_code");
namespace ThreadSchedulingMode { namespace ThreadSchedulingMode {
inline constexpr ConstStringRef ageBased("age_based"); inline constexpr ConstStringRef ageBased("age_based");
inline constexpr ConstStringRef roundRobin("round_robin"); inline constexpr ConstStringRef roundRobin("round_robin");
@ -357,6 +359,7 @@ using SpillSizeT = int32_t;
using LocalRegionSizeT = int32_t; using LocalRegionSizeT = int32_t;
using WalkOrderT = int32_t; using WalkOrderT = int32_t;
using PartitionDimT = int32_t; using PartitionDimT = int32_t;
using HasImplicitArgBufferUsedByCodeT = bool;
namespace Defaults { namespace Defaults {
inline constexpr BarrierCountT barrierCount = 0; inline constexpr BarrierCountT barrierCount = 0;
@ -391,6 +394,7 @@ inline constexpr SpillSizeT spillSize = 0;
inline constexpr LocalRegionSizeT localRegionSize = -1; inline constexpr LocalRegionSizeT localRegionSize = -1;
inline constexpr WalkOrderT dispatchWalkOrder = -1; inline constexpr WalkOrderT dispatchWalkOrder = -1;
inline constexpr PartitionDimT partitionDim = -1; inline constexpr PartitionDimT partitionDim = -1;
inline constexpr HasImplicitArgBufferUsedByCodeT hasImplicitArgBufferUsedByCode = false;
} // namespace Defaults } // namespace Defaults
inline constexpr ConstStringRef required[] = { inline constexpr ConstStringRef required[] = {
@ -441,6 +445,7 @@ struct ExecutionEnvBaseT final : NEO::NonCopyableAndNonMovableClass {
LocalRegionSizeT localRegionSize = Defaults::localRegionSize; LocalRegionSizeT localRegionSize = Defaults::localRegionSize;
WalkOrderT dispatchWalkOrder = Defaults::dispatchWalkOrder; WalkOrderT dispatchWalkOrder = Defaults::dispatchWalkOrder;
PartitionDimT partitionDim = Defaults::partitionDim; PartitionDimT partitionDim = Defaults::partitionDim;
HasImplicitArgBufferUsedByCodeT hasImplicitArgBufferUsedByCode = Defaults::hasImplicitArgBufferUsedByCode;
}; };
static_assert(NEO::NonCopyableAndNonMovable<ExecutionEnvBaseT>); static_assert(NEO::NonCopyableAndNonMovable<ExecutionEnvBaseT>);

View File

@ -686,6 +686,8 @@ DecodeError readZeInfoExecutionEnvironment(const Yaml::YamlParser &parser, const
validExecEnv &= readZeInfoValueChecked(parser, execEnvMetadataNd, outExecEnv.privateSize, context, outErrReason); validExecEnv &= readZeInfoValueChecked(parser, execEnvMetadataNd, outExecEnv.privateSize, context, outErrReason);
} else if (Tags::Kernel::ExecutionEnv::spillSize == key) { } else if (Tags::Kernel::ExecutionEnv::spillSize == key) {
validExecEnv &= readZeInfoValueChecked(parser, execEnvMetadataNd, outExecEnv.spillSize, context, outErrReason); validExecEnv &= readZeInfoValueChecked(parser, execEnvMetadataNd, outExecEnv.spillSize, context, outErrReason);
} else if (Tags::Kernel::ExecutionEnv::implicitArgBufferUsedByCode == key) {
validExecEnv &= readZeInfoValueChecked(parser, execEnvMetadataNd, outExecEnv.hasImplicitArgBufferUsedByCode, context, outErrReason);
} else if (Tags::Kernel::ExecutionEnv::actualKernelStartOffset == key) { } else if (Tags::Kernel::ExecutionEnv::actualKernelStartOffset == key) {
// ignore intentionally - deprecated and redundant key // ignore intentionally - deprecated and redundant key
} else { } else {
@ -718,6 +720,7 @@ void populateKernelExecutionEnvironment(KernelDescriptor &dst, const KernelExecu
dst.kernelAttributes.flags.usesSystolicPipelineSelectMode = execEnv.hasDpas; dst.kernelAttributes.flags.usesSystolicPipelineSelectMode = execEnv.hasDpas;
dst.kernelAttributes.flags.usesStatelessWrites = (false == execEnv.hasNoStatelessWrite); dst.kernelAttributes.flags.usesStatelessWrites = (false == execEnv.hasNoStatelessWrite);
dst.kernelAttributes.flags.hasSample = execEnv.hasSample; dst.kernelAttributes.flags.hasSample = execEnv.hasSample;
dst.kernelAttributes.flags.requiresImplicitArgs = execEnv.hasImplicitArgBufferUsedByCode;
dst.kernelAttributes.barrierCount = execEnv.barrierCount; dst.kernelAttributes.barrierCount = execEnv.barrierCount;
dst.kernelAttributes.bufferAddressingMode = (execEnv.has4GBBuffers) ? KernelDescriptor::Stateless : KernelDescriptor::BindfulAndStateless; dst.kernelAttributes.bufferAddressingMode = (execEnv.has4GBBuffers) ? KernelDescriptor::Stateless : KernelDescriptor::BindfulAndStateless;
dst.kernelAttributes.inlineDataPayloadSize = static_cast<uint16_t>(execEnv.inlineDataPayloadSize); dst.kernelAttributes.inlineDataPayloadSize = static_cast<uint16_t>(execEnv.inlineDataPayloadSize);

View File

@ -2122,7 +2122,7 @@ TEST_F(LinkerTests, GivenDebugDataWhenApplyingDebugDataRelocationsThenRelocation
EXPECT_EQ(expectedValue5, *reloc5Location); EXPECT_EQ(expectedValue5, *reloc5Location);
} }
TEST_F(LinkerTests, givenImplicitArgRelocationAndStackCallsThenPatchRelocationWithSizeOfImplicitArgStructAndUpdateKernelDescriptor) { TEST_F(LinkerTests, givenImplicitArgRelocationAndStackCallsOrRequiredImplicitArgsThenPatchRelocationWithSizeOfImplicitArgStructAndUpdateKernelDescriptor) {
NEO::LinkerInput linkerInput; NEO::LinkerInput linkerInput;
vISA::GenRelocEntry reloc = {}; vISA::GenRelocEntry reloc = {};
@ -2170,11 +2170,23 @@ TEST_F(LinkerTests, givenImplicitArgRelocationAndStackCallsThenPatchRelocationWi
EXPECT_EQ(0U, unresolvedExternals.size()); EXPECT_EQ(0U, unresolvedExternals.size());
EXPECT_EQ(0U, relocatedSymbols.size()); EXPECT_EQ(0U, relocatedSymbols.size());
auto addressToPatch = reinterpret_cast<const uint32_t *>(instructionSegment.data() + reloc.r_offset); auto addressToPatch = reinterpret_cast<uint32_t *>(instructionSegment.data() + reloc.r_offset);
EXPECT_EQ(ImplicitArgsTestHelper::getImplicitArgsSize(deviceFactory.rootDevices[0]->getGfxCoreHelper().getImplicitArgsVersion()), *addressToPatch); EXPECT_EQ(ImplicitArgsTestHelper::getImplicitArgsSize(deviceFactory.rootDevices[0]->getGfxCoreHelper().getImplicitArgsVersion()), *addressToPatch);
EXPECT_EQ(initData, *(addressToPatch - 1)); EXPECT_EQ(initData, *(addressToPatch - 1));
EXPECT_EQ(initData, *(addressToPatch + 1)); EXPECT_EQ(initData, *(addressToPatch + 1));
EXPECT_TRUE(kernelDescriptor.kernelAttributes.flags.requiresImplicitArgs); EXPECT_TRUE(kernelDescriptor.kernelAttributes.flags.requiresImplicitArgs);
kernelDescriptor.kernelAttributes.flags.requiresImplicitArgs = true;
kernelDescriptor.kernelAttributes.flags.useStackCalls = false;
*addressToPatch = 0;
linkResult = linker.link(globalVarSegment, globalConstSegment, exportedFuncSegment, {},
nullptr, nullptr, patchableInstructionSegments, unresolvedExternals,
deviceFactory.rootDevices[0], nullptr, 0, nullptr, 0, kernelDescriptors, externalFunctions);
EXPECT_EQ(NEO::LinkingStatus::linkedFully, linkResult);
EXPECT_EQ(initData, *(addressToPatch - 1));
EXPECT_EQ(initData, *(addressToPatch + 1));
EXPECT_TRUE(kernelDescriptor.kernelAttributes.flags.requiresImplicitArgs);
} }
HWTEST_F(LinkerTests, givenImplicitArgRelocationAndImplicitArgsV1WhenLinkingThenPatchRelocationWithSizeOfImplicitArgsV1) { HWTEST_F(LinkerTests, givenImplicitArgRelocationAndImplicitArgsV1WhenLinkingThenPatchRelocationWithSizeOfImplicitArgsV1) {

View File

@ -5400,6 +5400,46 @@ kernels:
EXPECT_TRUE(kernelDescriptor->kernelAttributes.flags.requiresImplicitArgs); EXPECT_TRUE(kernelDescriptor->kernelAttributes.flags.requiresImplicitArgs);
} }
TEST_F(decodeZeInfoKernelEntryTest, GivenExecEnvImplicitArgBufferUsedByCodeTrueWhenPopulatingKernelDescriptorThenImplicitArgsAreRequired) {
ConstStringRef zeinfo = R"===(
kernels:
- name : some_kernel
execution_env:
simd_size: 32
implicit_arg_buffer_used_by_code: true
payload_arguments:
- arg_type: work_dimensions
offset: 32
size: 4
)===";
auto err = decodeZeInfoKernelEntry(zeinfo);
EXPECT_EQ(NEO::DecodeError::success, err);
EXPECT_TRUE(errors.empty()) << errors;
EXPECT_TRUE(warnings.empty()) << warnings;
EXPECT_TRUE(NEO::isUndefinedOffset(kernelDescriptor->payloadMappings.implicitArgs.implicitArgsBuffer));
EXPECT_TRUE(kernelDescriptor->kernelAttributes.flags.requiresImplicitArgs);
}
TEST_F(decodeZeInfoKernelEntryTest, GivenExecEnvImplicitArgBufferUsedByCodeFalseWhenPopulatingKernelDescriptorThenImplicitArgsAreNotRequired) {
ConstStringRef zeinfo = R"===(
kernels:
- name : some_kernel
execution_env:
simd_size: 32
implicit_arg_buffer_used_by_code: false
payload_arguments:
- arg_type: work_dimensions
offset: 32
size: 4
)===";
auto err = decodeZeInfoKernelEntry(zeinfo);
EXPECT_EQ(NEO::DecodeError::success, err);
EXPECT_TRUE(errors.empty()) << errors;
EXPECT_TRUE(warnings.empty()) << warnings;
EXPECT_TRUE(NEO::isUndefinedOffset(kernelDescriptor->payloadMappings.implicitArgs.implicitArgsBuffer));
EXPECT_FALSE(kernelDescriptor->kernelAttributes.flags.requiresImplicitArgs);
}
TEST(PopulateArgDescriptorCrossthreadPayload, GivenArgTypeWorkDimensionsWhenSizeIsInvalidThenPopulateKernelDescriptorFails) { TEST(PopulateArgDescriptorCrossthreadPayload, GivenArgTypeWorkDimensionsWhenSizeIsInvalidThenPopulateKernelDescriptorFails) {
NEO::KernelDescriptor kernelDescriptor; NEO::KernelDescriptor kernelDescriptor;
kernelDescriptor.payloadMappings.explicitArgs.resize(1); kernelDescriptor.payloadMappings.explicitArgs.resize(1);