feature(zebin): add implicit_arg_buffer_used_by_code exec env support
Resolves: NEO-14667 Signed-off-by: Mateusz Hoppe <mateusz.hoppe@intel.com>
This commit is contained in:
parent
542f47dfe6
commit
c4bfe4e487
|
@ -4808,6 +4808,37 @@ TEST_F(ModuleTests, givenImplicitArgsRelocationAndNoDebuggerOrStackCallsWhenLink
|
||||||
EXPECT_FALSE(kernelInfo->kernelDescriptor.kernelAttributes.flags.requiresImplicitArgs);
|
EXPECT_FALSE(kernelInfo->kernelDescriptor.kernelAttributes.flags.requiresImplicitArgs);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
TEST_F(ModuleTests, givenRequiredImplicitArgsInKernelAndNoDebuggerOrStackCallsWhenLinkingModuleThenImplicitArgsRequiredRemainSet) {
|
||||||
|
auto pModule = std::make_unique<Module>(device, nullptr, ModuleType::user);
|
||||||
|
EXPECT_EQ(nullptr, neoDevice->getDebugger());
|
||||||
|
|
||||||
|
char data[64]{};
|
||||||
|
auto kernelInfo = new KernelInfo();
|
||||||
|
kernelInfo->heapInfo.kernelHeapSize = 64;
|
||||||
|
kernelInfo->heapInfo.pKernelHeap = data;
|
||||||
|
|
||||||
|
std::unique_ptr<WhiteBox<::L0::KernelImmutableData>> kernelImmData{new WhiteBox<::L0::KernelImmutableData>(this->device)};
|
||||||
|
kernelImmData->setIsaPerKernelAllocation(pModule->allocateKernelsIsaMemory(kernelInfo->heapInfo.kernelHeapSize));
|
||||||
|
kernelImmData->initialize(kernelInfo, device, 0, nullptr, nullptr, false);
|
||||||
|
|
||||||
|
kernelImmData->kernelDescriptor->kernelAttributes.flags.useStackCalls = false;
|
||||||
|
auto isaCpuPtr = reinterpret_cast<char *>(kernelImmData->isaGraphicsAllocation->getUnderlyingBuffer());
|
||||||
|
pModule->kernelImmDatas.push_back(std::move(kernelImmData));
|
||||||
|
pModule->translationUnit->programInfo.kernelInfos.push_back(kernelInfo);
|
||||||
|
auto linkerInput = std::make_unique<::WhiteBox<NEO::LinkerInput>>();
|
||||||
|
linkerInput->traits.requiresPatchingOfInstructionSegments = true;
|
||||||
|
linkerInput->textRelocations.push_back({{implicitArgsRelocationSymbolName, 0x8, LinkerInput::RelocationInfo::Type::addressLow, SegmentType::instructions}});
|
||||||
|
pModule->translationUnit->programInfo.linkerInput = std::move(linkerInput);
|
||||||
|
|
||||||
|
kernelInfo->kernelDescriptor.kernelAttributes.flags.requiresImplicitArgs = true;
|
||||||
|
auto status = pModule->linkBinary();
|
||||||
|
EXPECT_TRUE(status);
|
||||||
|
|
||||||
|
EXPECT_NE(0u, *reinterpret_cast<uint32_t *>(ptrOffset(isaCpuPtr, 0x8)));
|
||||||
|
|
||||||
|
EXPECT_TRUE(kernelInfo->kernelDescriptor.kernelAttributes.flags.requiresImplicitArgs);
|
||||||
|
}
|
||||||
|
|
||||||
TEST_F(ModuleTests, givenModuleWithGlobalAndConstAllocationsWhenGettingModuleAllocationsThenAllAreReturned) {
|
TEST_F(ModuleTests, givenModuleWithGlobalAndConstAllocationsWhenGettingModuleAllocationsThenAllAreReturned) {
|
||||||
std::unique_ptr<MockModule> module = std::make_unique<MockModule>(device,
|
std::unique_ptr<MockModule> module = std::make_unique<MockModule>(device,
|
||||||
nullptr,
|
nullptr,
|
||||||
|
|
|
@ -657,7 +657,7 @@ void Linker::resolveImplicitArgs(const KernelDescriptorsT &kernelDescriptors, De
|
||||||
if (pImplicitArgsRelocs != pImplicitArgsRelocationAddresses.end()) {
|
if (pImplicitArgsRelocs != pImplicitArgsRelocationAddresses.end()) {
|
||||||
for (const auto &pImplicitArgsReloc : pImplicitArgsRelocs->second) {
|
for (const auto &pImplicitArgsReloc : pImplicitArgsRelocs->second) {
|
||||||
UNRECOVERABLE_IF(!pDevice);
|
UNRECOVERABLE_IF(!pDevice);
|
||||||
kernelDescriptor.kernelAttributes.flags.requiresImplicitArgs = kernelDescriptor.kernelAttributes.flags.useStackCalls || pDevice->getDebugger() != nullptr;
|
kernelDescriptor.kernelAttributes.flags.requiresImplicitArgs |= kernelDescriptor.kernelAttributes.flags.useStackCalls || pDevice->getDebugger() != nullptr;
|
||||||
if (kernelDescriptor.kernelAttributes.flags.requiresImplicitArgs) {
|
if (kernelDescriptor.kernelAttributes.flags.requiresImplicitArgs) {
|
||||||
uint64_t implicitArgsSize = 0;
|
uint64_t implicitArgsSize = 0;
|
||||||
if (pDevice->getGfxCoreHelper().getImplicitArgsVersion() == 0) {
|
if (pDevice->getGfxCoreHelper().getImplicitArgsVersion() == 0) {
|
||||||
|
|
|
@ -62,6 +62,8 @@ inline constexpr ConstStringRef workGroupWalkOrderDimensions("work_group_walk_or
|
||||||
inline constexpr ConstStringRef threadSchedulingMode("thread_scheduling_mode");
|
inline constexpr ConstStringRef threadSchedulingMode("thread_scheduling_mode");
|
||||||
inline constexpr ConstStringRef hasSample("has_sample");
|
inline constexpr ConstStringRef hasSample("has_sample");
|
||||||
inline constexpr ConstStringRef actualKernelStartOffset("actual_kernel_start_offset");
|
inline constexpr ConstStringRef actualKernelStartOffset("actual_kernel_start_offset");
|
||||||
|
inline constexpr ConstStringRef implicitArgBufferUsedByCode("implicit_arg_buffer_used_by_code");
|
||||||
|
|
||||||
namespace ThreadSchedulingMode {
|
namespace ThreadSchedulingMode {
|
||||||
inline constexpr ConstStringRef ageBased("age_based");
|
inline constexpr ConstStringRef ageBased("age_based");
|
||||||
inline constexpr ConstStringRef roundRobin("round_robin");
|
inline constexpr ConstStringRef roundRobin("round_robin");
|
||||||
|
@ -357,6 +359,7 @@ using SpillSizeT = int32_t;
|
||||||
using LocalRegionSizeT = int32_t;
|
using LocalRegionSizeT = int32_t;
|
||||||
using WalkOrderT = int32_t;
|
using WalkOrderT = int32_t;
|
||||||
using PartitionDimT = int32_t;
|
using PartitionDimT = int32_t;
|
||||||
|
using HasImplicitArgBufferUsedByCodeT = bool;
|
||||||
|
|
||||||
namespace Defaults {
|
namespace Defaults {
|
||||||
inline constexpr BarrierCountT barrierCount = 0;
|
inline constexpr BarrierCountT barrierCount = 0;
|
||||||
|
@ -391,6 +394,7 @@ inline constexpr SpillSizeT spillSize = 0;
|
||||||
inline constexpr LocalRegionSizeT localRegionSize = -1;
|
inline constexpr LocalRegionSizeT localRegionSize = -1;
|
||||||
inline constexpr WalkOrderT dispatchWalkOrder = -1;
|
inline constexpr WalkOrderT dispatchWalkOrder = -1;
|
||||||
inline constexpr PartitionDimT partitionDim = -1;
|
inline constexpr PartitionDimT partitionDim = -1;
|
||||||
|
inline constexpr HasImplicitArgBufferUsedByCodeT hasImplicitArgBufferUsedByCode = false;
|
||||||
} // namespace Defaults
|
} // namespace Defaults
|
||||||
|
|
||||||
inline constexpr ConstStringRef required[] = {
|
inline constexpr ConstStringRef required[] = {
|
||||||
|
@ -441,6 +445,7 @@ struct ExecutionEnvBaseT final : NEO::NonCopyableAndNonMovableClass {
|
||||||
LocalRegionSizeT localRegionSize = Defaults::localRegionSize;
|
LocalRegionSizeT localRegionSize = Defaults::localRegionSize;
|
||||||
WalkOrderT dispatchWalkOrder = Defaults::dispatchWalkOrder;
|
WalkOrderT dispatchWalkOrder = Defaults::dispatchWalkOrder;
|
||||||
PartitionDimT partitionDim = Defaults::partitionDim;
|
PartitionDimT partitionDim = Defaults::partitionDim;
|
||||||
|
HasImplicitArgBufferUsedByCodeT hasImplicitArgBufferUsedByCode = Defaults::hasImplicitArgBufferUsedByCode;
|
||||||
};
|
};
|
||||||
|
|
||||||
static_assert(NEO::NonCopyableAndNonMovable<ExecutionEnvBaseT>);
|
static_assert(NEO::NonCopyableAndNonMovable<ExecutionEnvBaseT>);
|
||||||
|
|
|
@ -686,6 +686,8 @@ DecodeError readZeInfoExecutionEnvironment(const Yaml::YamlParser &parser, const
|
||||||
validExecEnv &= readZeInfoValueChecked(parser, execEnvMetadataNd, outExecEnv.privateSize, context, outErrReason);
|
validExecEnv &= readZeInfoValueChecked(parser, execEnvMetadataNd, outExecEnv.privateSize, context, outErrReason);
|
||||||
} else if (Tags::Kernel::ExecutionEnv::spillSize == key) {
|
} else if (Tags::Kernel::ExecutionEnv::spillSize == key) {
|
||||||
validExecEnv &= readZeInfoValueChecked(parser, execEnvMetadataNd, outExecEnv.spillSize, context, outErrReason);
|
validExecEnv &= readZeInfoValueChecked(parser, execEnvMetadataNd, outExecEnv.spillSize, context, outErrReason);
|
||||||
|
} else if (Tags::Kernel::ExecutionEnv::implicitArgBufferUsedByCode == key) {
|
||||||
|
validExecEnv &= readZeInfoValueChecked(parser, execEnvMetadataNd, outExecEnv.hasImplicitArgBufferUsedByCode, context, outErrReason);
|
||||||
} else if (Tags::Kernel::ExecutionEnv::actualKernelStartOffset == key) {
|
} else if (Tags::Kernel::ExecutionEnv::actualKernelStartOffset == key) {
|
||||||
// ignore intentionally - deprecated and redundant key
|
// ignore intentionally - deprecated and redundant key
|
||||||
} else {
|
} else {
|
||||||
|
@ -718,6 +720,7 @@ void populateKernelExecutionEnvironment(KernelDescriptor &dst, const KernelExecu
|
||||||
dst.kernelAttributes.flags.usesSystolicPipelineSelectMode = execEnv.hasDpas;
|
dst.kernelAttributes.flags.usesSystolicPipelineSelectMode = execEnv.hasDpas;
|
||||||
dst.kernelAttributes.flags.usesStatelessWrites = (false == execEnv.hasNoStatelessWrite);
|
dst.kernelAttributes.flags.usesStatelessWrites = (false == execEnv.hasNoStatelessWrite);
|
||||||
dst.kernelAttributes.flags.hasSample = execEnv.hasSample;
|
dst.kernelAttributes.flags.hasSample = execEnv.hasSample;
|
||||||
|
dst.kernelAttributes.flags.requiresImplicitArgs = execEnv.hasImplicitArgBufferUsedByCode;
|
||||||
dst.kernelAttributes.barrierCount = execEnv.barrierCount;
|
dst.kernelAttributes.barrierCount = execEnv.barrierCount;
|
||||||
dst.kernelAttributes.bufferAddressingMode = (execEnv.has4GBBuffers) ? KernelDescriptor::Stateless : KernelDescriptor::BindfulAndStateless;
|
dst.kernelAttributes.bufferAddressingMode = (execEnv.has4GBBuffers) ? KernelDescriptor::Stateless : KernelDescriptor::BindfulAndStateless;
|
||||||
dst.kernelAttributes.inlineDataPayloadSize = static_cast<uint16_t>(execEnv.inlineDataPayloadSize);
|
dst.kernelAttributes.inlineDataPayloadSize = static_cast<uint16_t>(execEnv.inlineDataPayloadSize);
|
||||||
|
|
|
@ -2122,7 +2122,7 @@ TEST_F(LinkerTests, GivenDebugDataWhenApplyingDebugDataRelocationsThenRelocation
|
||||||
EXPECT_EQ(expectedValue5, *reloc5Location);
|
EXPECT_EQ(expectedValue5, *reloc5Location);
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST_F(LinkerTests, givenImplicitArgRelocationAndStackCallsThenPatchRelocationWithSizeOfImplicitArgStructAndUpdateKernelDescriptor) {
|
TEST_F(LinkerTests, givenImplicitArgRelocationAndStackCallsOrRequiredImplicitArgsThenPatchRelocationWithSizeOfImplicitArgStructAndUpdateKernelDescriptor) {
|
||||||
NEO::LinkerInput linkerInput;
|
NEO::LinkerInput linkerInput;
|
||||||
|
|
||||||
vISA::GenRelocEntry reloc = {};
|
vISA::GenRelocEntry reloc = {};
|
||||||
|
@ -2170,11 +2170,23 @@ TEST_F(LinkerTests, givenImplicitArgRelocationAndStackCallsThenPatchRelocationWi
|
||||||
EXPECT_EQ(0U, unresolvedExternals.size());
|
EXPECT_EQ(0U, unresolvedExternals.size());
|
||||||
EXPECT_EQ(0U, relocatedSymbols.size());
|
EXPECT_EQ(0U, relocatedSymbols.size());
|
||||||
|
|
||||||
auto addressToPatch = reinterpret_cast<const uint32_t *>(instructionSegment.data() + reloc.r_offset);
|
auto addressToPatch = reinterpret_cast<uint32_t *>(instructionSegment.data() + reloc.r_offset);
|
||||||
EXPECT_EQ(ImplicitArgsTestHelper::getImplicitArgsSize(deviceFactory.rootDevices[0]->getGfxCoreHelper().getImplicitArgsVersion()), *addressToPatch);
|
EXPECT_EQ(ImplicitArgsTestHelper::getImplicitArgsSize(deviceFactory.rootDevices[0]->getGfxCoreHelper().getImplicitArgsVersion()), *addressToPatch);
|
||||||
EXPECT_EQ(initData, *(addressToPatch - 1));
|
EXPECT_EQ(initData, *(addressToPatch - 1));
|
||||||
EXPECT_EQ(initData, *(addressToPatch + 1));
|
EXPECT_EQ(initData, *(addressToPatch + 1));
|
||||||
EXPECT_TRUE(kernelDescriptor.kernelAttributes.flags.requiresImplicitArgs);
|
EXPECT_TRUE(kernelDescriptor.kernelAttributes.flags.requiresImplicitArgs);
|
||||||
|
|
||||||
|
kernelDescriptor.kernelAttributes.flags.requiresImplicitArgs = true;
|
||||||
|
kernelDescriptor.kernelAttributes.flags.useStackCalls = false;
|
||||||
|
*addressToPatch = 0;
|
||||||
|
|
||||||
|
linkResult = linker.link(globalVarSegment, globalConstSegment, exportedFuncSegment, {},
|
||||||
|
nullptr, nullptr, patchableInstructionSegments, unresolvedExternals,
|
||||||
|
deviceFactory.rootDevices[0], nullptr, 0, nullptr, 0, kernelDescriptors, externalFunctions);
|
||||||
|
EXPECT_EQ(NEO::LinkingStatus::linkedFully, linkResult);
|
||||||
|
EXPECT_EQ(initData, *(addressToPatch - 1));
|
||||||
|
EXPECT_EQ(initData, *(addressToPatch + 1));
|
||||||
|
EXPECT_TRUE(kernelDescriptor.kernelAttributes.flags.requiresImplicitArgs);
|
||||||
}
|
}
|
||||||
|
|
||||||
HWTEST_F(LinkerTests, givenImplicitArgRelocationAndImplicitArgsV1WhenLinkingThenPatchRelocationWithSizeOfImplicitArgsV1) {
|
HWTEST_F(LinkerTests, givenImplicitArgRelocationAndImplicitArgsV1WhenLinkingThenPatchRelocationWithSizeOfImplicitArgsV1) {
|
||||||
|
|
|
@ -5400,6 +5400,46 @@ kernels:
|
||||||
EXPECT_TRUE(kernelDescriptor->kernelAttributes.flags.requiresImplicitArgs);
|
EXPECT_TRUE(kernelDescriptor->kernelAttributes.flags.requiresImplicitArgs);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
TEST_F(decodeZeInfoKernelEntryTest, GivenExecEnvImplicitArgBufferUsedByCodeTrueWhenPopulatingKernelDescriptorThenImplicitArgsAreRequired) {
|
||||||
|
ConstStringRef zeinfo = R"===(
|
||||||
|
kernels:
|
||||||
|
- name : some_kernel
|
||||||
|
execution_env:
|
||||||
|
simd_size: 32
|
||||||
|
implicit_arg_buffer_used_by_code: true
|
||||||
|
payload_arguments:
|
||||||
|
- arg_type: work_dimensions
|
||||||
|
offset: 32
|
||||||
|
size: 4
|
||||||
|
)===";
|
||||||
|
auto err = decodeZeInfoKernelEntry(zeinfo);
|
||||||
|
EXPECT_EQ(NEO::DecodeError::success, err);
|
||||||
|
EXPECT_TRUE(errors.empty()) << errors;
|
||||||
|
EXPECT_TRUE(warnings.empty()) << warnings;
|
||||||
|
EXPECT_TRUE(NEO::isUndefinedOffset(kernelDescriptor->payloadMappings.implicitArgs.implicitArgsBuffer));
|
||||||
|
EXPECT_TRUE(kernelDescriptor->kernelAttributes.flags.requiresImplicitArgs);
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST_F(decodeZeInfoKernelEntryTest, GivenExecEnvImplicitArgBufferUsedByCodeFalseWhenPopulatingKernelDescriptorThenImplicitArgsAreNotRequired) {
|
||||||
|
ConstStringRef zeinfo = R"===(
|
||||||
|
kernels:
|
||||||
|
- name : some_kernel
|
||||||
|
execution_env:
|
||||||
|
simd_size: 32
|
||||||
|
implicit_arg_buffer_used_by_code: false
|
||||||
|
payload_arguments:
|
||||||
|
- arg_type: work_dimensions
|
||||||
|
offset: 32
|
||||||
|
size: 4
|
||||||
|
)===";
|
||||||
|
auto err = decodeZeInfoKernelEntry(zeinfo);
|
||||||
|
EXPECT_EQ(NEO::DecodeError::success, err);
|
||||||
|
EXPECT_TRUE(errors.empty()) << errors;
|
||||||
|
EXPECT_TRUE(warnings.empty()) << warnings;
|
||||||
|
EXPECT_TRUE(NEO::isUndefinedOffset(kernelDescriptor->payloadMappings.implicitArgs.implicitArgsBuffer));
|
||||||
|
EXPECT_FALSE(kernelDescriptor->kernelAttributes.flags.requiresImplicitArgs);
|
||||||
|
}
|
||||||
|
|
||||||
TEST(PopulateArgDescriptorCrossthreadPayload, GivenArgTypeWorkDimensionsWhenSizeIsInvalidThenPopulateKernelDescriptorFails) {
|
TEST(PopulateArgDescriptorCrossthreadPayload, GivenArgTypeWorkDimensionsWhenSizeIsInvalidThenPopulateKernelDescriptorFails) {
|
||||||
NEO::KernelDescriptor kernelDescriptor;
|
NEO::KernelDescriptor kernelDescriptor;
|
||||||
kernelDescriptor.payloadMappings.explicitArgs.resize(1);
|
kernelDescriptor.payloadMappings.explicitArgs.resize(1);
|
||||||
|
|
Loading…
Reference in New Issue