Add implementation for reading logical subDevice from builtin

Related-To: NEO-6258
Signed-off-by: Kamil Diedrich <kamil.diedrich@intel.com>
This commit is contained in:
Kamil Diedrich 2022-01-10 18:32:25 +00:00 committed by Compute-Runtime-Automation
parent aa6e3bbef5
commit 34856747b4
5 changed files with 115 additions and 9 deletions

View File

@ -1,5 +1,5 @@
/*
* Copyright (C) 2018-2021 Intel Corporation
* Copyright (C) 2018-2022 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@ -591,15 +591,16 @@ bool CommandStreamReceiver::createWorkPartitionAllocation(const Device &device)
return false;
}
uint32_t logicalId = 0;
for (uint32_t deviceIndex = 0; deviceIndex < deviceBitfield.size(); deviceIndex++) {
if (!deviceBitfield.test(deviceIndex)) {
continue;
}
const uint32_t copySrc = deviceIndex;
const uint32_t copySrc[2] = {logicalId++, deviceIndex};
DeviceBitfield copyBitfield{};
copyBitfield.set(deviceIndex);
auto copySuccess = MemoryTransferHelper::transferMemoryToAllocationBanks(device, workPartitionAllocation, 0, &copySrc, sizeof(copySrc), copyBitfield);
auto copySuccess = MemoryTransferHelper::transferMemoryToAllocationBanks(device, workPartitionAllocation, 0, copySrc, sizeof(copySrc), copyBitfield);
if (!copySuccess) {
return false;

View File

@ -1,5 +1,5 @@
/*
* Copyright (C) 2019-2021 Intel Corporation
* Copyright (C) 2019-2022 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@ -7,6 +7,7 @@
#include "shared/source/compiler_interface/linker.h"
#include "shared/source/command_stream/command_stream_receiver.h"
#include "shared/source/compiler_interface/linker.inl"
#include "shared/source/device/device.h"
#include "shared/source/device_binary_format/elf/zebin_elf.h"
@ -483,4 +484,21 @@ void Linker::resolveImplicitArgs(const KernelDescriptorsT &kernelDescriptors, De
}
}
}
void Linker::resolveBuiltins(Device *pDevice, UnresolvedExternals &outUnresolvedExternals, const std::vector<PatchableSegment> &instructionsSegments) {
int vecIndex = static_cast<int>(outUnresolvedExternals.size() - 1u);
for (; vecIndex >= 0; --vecIndex) {
if (outUnresolvedExternals[vecIndex].unresolvedRelocation.symbolName == subDeviceID) {
RelocatedSymbol symbol;
symbol.gpuAddress = static_cast<uintptr_t>(pDevice->getDefaultEngine().commandStreamReceiver->getWorkPartitionAllocationGpuAddress());
auto relocAddress = ptrOffset(instructionsSegments[outUnresolvedExternals[vecIndex].instructionsSegmentId].hostPointer,
static_cast<uintptr_t>(outUnresolvedExternals[vecIndex].unresolvedRelocation.offset));
NEO::Linker::patchAddress(relocAddress, symbol, outUnresolvedExternals[vecIndex].unresolvedRelocation);
outUnresolvedExternals[vecIndex] = outUnresolvedExternals[outUnresolvedExternals.size() - 1u];
outUnresolvedExternals.resize(outUnresolvedExternals.size() - 1u);
}
}
}
} // namespace NEO

View File

@ -1,5 +1,5 @@
/*
* Copyright (C) 2019-2021 Intel Corporation
* Copyright (C) 2019-2022 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@ -152,6 +152,8 @@ struct LinkerInput {
};
struct Linker {
inline static const std::string subDeviceID = "__SubDeviceID";
using RelocationInfo = LinkerInput::RelocationInfo;
struct SegmentInfo {
@ -197,7 +199,7 @@ struct Linker {
patchDataSegments(globalVariablesSegInfo, globalConstantsSegInfo, globalVariablesSeg, globalConstantsSeg,
outUnresolvedExternals, pDevice, constantsInitData, variablesInitData);
resolveImplicitArgs(kernelDescriptors, pDevice);
resolveBuiltins(pDevice, outUnresolvedExternals, instructionsSegments);
if (initialUnresolvedExternalsCount < outUnresolvedExternals.size()) {
return LinkingStatus::LinkedPartially;
}
@ -227,6 +229,7 @@ struct Linker {
const void *constantsInitData, const void *variablesInitData);
void resolveImplicitArgs(const KernelDescriptorsT &kernelDescriptors, Device *pDevice);
void resolveBuiltins(Device *pDevice, UnresolvedExternals &outUnresolvedExternals, const std::vector<PatchableSegment> &instructionsSegments);
template <typename PatchSizeT>
void patchIncrement(Device *pDevice, GraphicsAllocation *dstAllocation, size_t relocationOffset, const void *initData, uint64_t incrementValue);

View File

@ -1,5 +1,5 @@
/*
* Copyright (C) 2021 Intel Corporation
* Copyright (C) 2021-2022 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@ -1655,7 +1655,7 @@ TEST(CreateWorkPartitionAllocationTest, givenDisabledBlitterWhenInitializingWork
EXPECT_EQ(deviceFactory.subDevices[1]->getDeviceBitfield(), memoryManager->copyMemoryToAllocationBanksParamsPassed[1].handleMask);
for (auto i = 0; i < 2; i++) {
EXPECT_EQ(commandStreamReceiver->getWorkPartitionAllocation(), memoryManager->copyMemoryToAllocationBanksParamsPassed[i].graphicsAllocation);
EXPECT_EQ(sizeof(uint32_t), memoryManager->copyMemoryToAllocationBanksParamsPassed[i].sizeToCopy);
EXPECT_EQ(2 * sizeof(uint32_t), memoryManager->copyMemoryToAllocationBanksParamsPassed[i].sizeToCopy);
EXPECT_NE(nullptr, memoryManager->copyMemoryToAllocationBanksParamsPassed[i].memoryToCopy);
}
}

View File

@ -1,5 +1,5 @@
/*
* Copyright (C) 2019-2021 Intel Corporation
* Copyright (C) 2019-2022 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@ -1003,6 +1003,90 @@ TEST(LinkerTests, givenInvalidLinkerInputThenLinkerFails) {
EXPECT_EQ(NEO::LinkingStatus::Error, linkResult);
}
TEST(LinkerTests, givenUnresolvedExternalSymbolsWhenResolveBuiltinsIsCalledThenSubDeviceIDSymbolsAreRmoved) {
struct LinkerMock : public NEO::Linker {
public:
using NEO::Linker::resolveBuiltins;
LinkerMock(const LinkerInput &data) : NEO::Linker(data) {
}
};
NEO::LinkerInput linkerInput;
LinkerMock linker(linkerInput);
NEO::Linker::UnresolvedExternals unresolvedExternals;
unresolvedExternals.push_back({{"__SubDeviceID", 0, NEO::Linker::RelocationInfo::Type::AddressLow, NEO::SegmentType::Instructions}, 0u, false});
unresolvedExternals.push_back({{"__MaxHWThreadIDPerSubDevice", 156, NEO::Linker::RelocationInfo::Type::AddressLow, NEO::SegmentType::Instructions}, 0u, false});
unresolvedExternals.push_back({{"__MaxHWThreadIDPerSubDevice", 140, NEO::Linker::RelocationInfo::Type::AddressHigh, NEO::SegmentType::Instructions}, 0u, false});
unresolvedExternals.push_back({{"__SubDeviceID", 64, NEO::Linker::RelocationInfo::Type::AddressHigh, NEO::SegmentType::Instructions}, 0u, false});
std::vector<char> instructionSegment;
instructionSegment.resize(128u);
NEO::Linker::PatchableSegments instructionsSegments;
instructionsSegments.push_back({instructionSegment.data(), 64u});
instructionsSegments.push_back({&instructionSegment[64], 64u});
DebugManagerStateRestore restorer;
DebugManager.flags.CreateMultipleSubDevices.set(2);
DebugManager.flags.EnableImplicitScaling.set(1);
auto device = std::unique_ptr<NEO::MockDevice>(NEO::MockDevice::createWithNewExecutionEnvironment<NEO::MockDevice>(NEO::defaultHwInfo.get()));
linker.resolveBuiltins(device.get(), unresolvedExternals, instructionsSegments);
EXPECT_EQ(2U, unresolvedExternals.size());
for (auto &symbol : unresolvedExternals) {
EXPECT_NE(NEO::Linker::subDeviceID, symbol.unresolvedRelocation.symbolName);
}
auto gpuAddressAs64bit = device->getDefaultEngine().commandStreamReceiver->getWorkPartitionAllocationGpuAddress();
EXPECT_EQ(*reinterpret_cast<uint32_t *>(&instructionSegment[64]), static_cast<uint32_t>((gpuAddressAs64bit >> 32) & 0xffffffff));
EXPECT_EQ(*reinterpret_cast<uint32_t *>(instructionSegment.data()), static_cast<uint32_t>(gpuAddressAs64bit & 0xffffffff));
}
TEST(LinkerTests, givenUnresolvedExternalsWhenLinkThenSubDeviceIDSymbolsAreRemoved) {
NEO::LinkerInput linkerInput;
NEO::Linker linker(linkerInput);
NEO::Linker::SegmentInfo globalVar, globalConst, exportedFunc;
NEO::Linker::UnresolvedExternals unresolvedExternals;
unresolvedExternals.push_back({{"__SubDeviceID", 0, NEO::Linker::RelocationInfo::Type::AddressLow, NEO::SegmentType::Instructions}, 0u, false});
unresolvedExternals.push_back({{"__MaxHWThreadIDPerSubDevice", 156, NEO::Linker::RelocationInfo::Type::AddressLow, NEO::SegmentType::Instructions}, 0u, false});
unresolvedExternals.push_back({{"__MaxHWThreadIDPerSubDevice", 140, NEO::Linker::RelocationInfo::Type::AddressHigh, NEO::SegmentType::Instructions}, 0u, false});
unresolvedExternals.push_back({{"__SubDeviceID", 64, NEO::Linker::RelocationInfo::Type::AddressHigh, NEO::SegmentType::Instructions}, 0u, false});
NEO::Linker::KernelDescriptorsT kernelDescriptors;
NEO::GraphicsAllocation *patchableGlobalVarSeg = nullptr;
NEO::GraphicsAllocation *patchableConstVarSeg = nullptr;
std::vector<char> instructionSegment;
instructionSegment.resize(128u);
NEO::Linker::PatchableSegments instructionsSegments;
instructionsSegments.push_back({instructionSegment.data(), 0});
instructionsSegments.push_back({&instructionSegment[64], 64u});
DebugManagerStateRestore restorer;
DebugManager.flags.CreateMultipleSubDevices.set(2);
DebugManager.flags.EnableImplicitScaling.set(1);
auto device = std::unique_ptr<NEO::MockDevice>(NEO::MockDevice::createWithNewExecutionEnvironment<NEO::MockDevice>(NEO::defaultHwInfo.get()));
linker.link(
globalVar, globalConst, exportedFunc, {},
patchableGlobalVarSeg, patchableConstVarSeg, instructionsSegments,
unresolvedExternals, device.get(), nullptr, nullptr, kernelDescriptors);
auto relocatedSymbols = linker.extractRelocatedSymbols();
EXPECT_EQ(0U, relocatedSymbols.size());
EXPECT_EQ(2U, unresolvedExternals.size());
for (auto &symbol : unresolvedExternals) {
EXPECT_NE(NEO::Linker::subDeviceID, symbol.unresolvedRelocation.symbolName);
}
auto gpuAddressAs64bit = device->getDefaultEngine().commandStreamReceiver->getWorkPartitionAllocationGpuAddress();
EXPECT_EQ(*reinterpret_cast<uint32_t *>(&instructionSegment[64]), static_cast<uint32_t>((gpuAddressAs64bit >> 32) & 0xffffffff));
EXPECT_EQ(*reinterpret_cast<uint32_t *>(instructionSegment.data()), static_cast<uint32_t>(gpuAddressAs64bit & 0xffffffff));
}
TEST(LinkerTests, givenUnresolvedExternalWhenPatchingInstructionsThenLinkPartially) {
NEO::LinkerInput linkerInput;
vISA::GenRelocEntry entry = {};