diff --git a/shared/source/command_stream/command_stream_receiver.cpp b/shared/source/command_stream/command_stream_receiver.cpp index f068567342..911a63d87c 100644 --- a/shared/source/command_stream/command_stream_receiver.cpp +++ b/shared/source/command_stream/command_stream_receiver.cpp @@ -592,15 +592,16 @@ bool CommandStreamReceiver::createWorkPartitionAllocation(const Device &device) return false; } + uint32_t logicalId = 0; for (uint32_t deviceIndex = 0; deviceIndex < deviceBitfield.size(); deviceIndex++) { if (!deviceBitfield.test(deviceIndex)) { continue; } - const uint32_t copySrc = deviceIndex; + const uint32_t copySrc[2] = {logicalId++, deviceIndex}; DeviceBitfield copyBitfield{}; copyBitfield.set(deviceIndex); - auto copySuccess = MemoryTransferHelper::transferMemoryToAllocationBanks(device, workPartitionAllocation, 0, ©Src, sizeof(copySrc), copyBitfield); + auto copySuccess = MemoryTransferHelper::transferMemoryToAllocationBanks(device, workPartitionAllocation, 0, copySrc, sizeof(copySrc), copyBitfield); if (!copySuccess) { return false; diff --git a/shared/source/compiler_interface/linker.cpp b/shared/source/compiler_interface/linker.cpp index 671708737e..99259e9a36 100644 --- a/shared/source/compiler_interface/linker.cpp +++ b/shared/source/compiler_interface/linker.cpp @@ -7,6 +7,7 @@ #include "shared/source/compiler_interface/linker.h" +#include "shared/source/command_stream/command_stream_receiver.h" #include "shared/source/compiler_interface/linker.inl" #include "shared/source/device/device.h" #include "shared/source/device_binary_format/elf/zebin_elf.h" @@ -481,4 +482,21 @@ void Linker::resolveImplicitArgs(const KernelDescriptorsT &kernelDescriptors, De } } } + +void Linker::resolveBuiltins(Device *pDevice, UnresolvedExternals &outUnresolvedExternals, const std::vector &instructionsSegments) { + int vecIndex = static_cast(outUnresolvedExternals.size() - 1u); + for (; vecIndex >= 0; --vecIndex) { + if (outUnresolvedExternals[vecIndex].unresolvedRelocation.symbolName == subDeviceID) { + RelocatedSymbol symbol; + symbol.gpuAddress = static_cast(pDevice->getDefaultEngine().commandStreamReceiver->getWorkPartitionAllocationGpuAddress()); + auto relocAddress = ptrOffset(instructionsSegments[outUnresolvedExternals[vecIndex].instructionsSegmentId].hostPointer, + static_cast(outUnresolvedExternals[vecIndex].unresolvedRelocation.offset)); + + NEO::Linker::patchAddress(relocAddress, symbol, outUnresolvedExternals[vecIndex].unresolvedRelocation); + + outUnresolvedExternals[vecIndex] = outUnresolvedExternals[outUnresolvedExternals.size() - 1u]; + outUnresolvedExternals.resize(outUnresolvedExternals.size() - 1u); + } + } +} } // namespace NEO \ No newline at end of file diff --git a/shared/source/compiler_interface/linker.h b/shared/source/compiler_interface/linker.h index 7093426a47..8b310308c6 100644 --- a/shared/source/compiler_interface/linker.h +++ b/shared/source/compiler_interface/linker.h @@ -152,6 +152,8 @@ struct LinkerInput { }; struct Linker { + static constexpr std::string_view subDeviceID{"__SubDeviceID"}; + using RelocationInfo = LinkerInput::RelocationInfo; struct SegmentInfo { @@ -197,7 +199,7 @@ struct Linker { patchDataSegments(globalVariablesSegInfo, globalConstantsSegInfo, globalVariablesSeg, globalConstantsSeg, outUnresolvedExternals, pDevice, constantsInitData, variablesInitData); resolveImplicitArgs(kernelDescriptors, pDevice); - + resolveBuiltins(pDevice, outUnresolvedExternals, instructionsSegments); if (initialUnresolvedExternalsCount < outUnresolvedExternals.size()) { return LinkingStatus::LinkedPartially; } @@ -227,6 +229,7 @@ struct Linker { const void *constantsInitData, const void *variablesInitData); void resolveImplicitArgs(const KernelDescriptorsT &kernelDescriptors, Device *pDevice); + void resolveBuiltins(Device *pDevice, UnresolvedExternals &outUnresolvedExternals, const std::vector &instructionsSegments); template void patchIncrement(Device *pDevice, GraphicsAllocation *dstAllocation, size_t relocationOffset, const void *initData, uint64_t incrementValue); diff --git a/shared/test/unit_test/command_stream/command_stream_receiver_tests.cpp b/shared/test/unit_test/command_stream/command_stream_receiver_tests.cpp index 75110de5e2..87286f5b34 100644 --- a/shared/test/unit_test/command_stream/command_stream_receiver_tests.cpp +++ b/shared/test/unit_test/command_stream/command_stream_receiver_tests.cpp @@ -1628,7 +1628,7 @@ TEST(CreateWorkPartitionAllocationTest, givenDisabledBlitterWhenInitializingWork EXPECT_EQ(deviceFactory.subDevices[1]->getDeviceBitfield(), memoryManager->copyMemoryToAllocationBanksParamsPassed[1].handleMask); for (auto i = 0; i < 2; i++) { EXPECT_EQ(commandStreamReceiver->getWorkPartitionAllocation(), memoryManager->copyMemoryToAllocationBanksParamsPassed[i].graphicsAllocation); - EXPECT_EQ(sizeof(uint32_t), memoryManager->copyMemoryToAllocationBanksParamsPassed[i].sizeToCopy); + EXPECT_EQ(2 * sizeof(uint32_t), memoryManager->copyMemoryToAllocationBanksParamsPassed[i].sizeToCopy); EXPECT_NE(nullptr, memoryManager->copyMemoryToAllocationBanksParamsPassed[i].memoryToCopy); } } diff --git a/shared/test/unit_test/compiler_interface/linker_tests.cpp b/shared/test/unit_test/compiler_interface/linker_tests.cpp index cfd841acde..7f134770ab 100644 --- a/shared/test/unit_test/compiler_interface/linker_tests.cpp +++ b/shared/test/unit_test/compiler_interface/linker_tests.cpp @@ -1004,6 +1004,90 @@ TEST(LinkerTests, givenInvalidLinkerInputThenLinkerFails) { EXPECT_EQ(NEO::LinkingStatus::Error, linkResult); } +TEST(LinkerTests, givenUnresolvedExternalSymbolsWhenResolveBuiltinsIsCalledThenSubDeviceIDSymbolsAreRmoved) { + struct LinkerMock : public NEO::Linker { + public: + using NEO::Linker::resolveBuiltins; + + LinkerMock(const LinkerInput &data) : NEO::Linker(data) { + } + }; + + NEO::LinkerInput linkerInput; + LinkerMock linker(linkerInput); + NEO::Linker::UnresolvedExternals unresolvedExternals; + unresolvedExternals.push_back({{"__SubDeviceID", 0, NEO::Linker::RelocationInfo::Type::AddressLow, NEO::SegmentType::Instructions}, 0u, false}); + unresolvedExternals.push_back({{"__MaxHWThreadIDPerSubDevice", 156, NEO::Linker::RelocationInfo::Type::AddressLow, NEO::SegmentType::Instructions}, 0u, false}); + unresolvedExternals.push_back({{"__MaxHWThreadIDPerSubDevice", 140, NEO::Linker::RelocationInfo::Type::AddressHigh, NEO::SegmentType::Instructions}, 0u, false}); + unresolvedExternals.push_back({{"__SubDeviceID", 64, NEO::Linker::RelocationInfo::Type::AddressHigh, NEO::SegmentType::Instructions}, 0u, false}); + + std::vector instructionSegment; + instructionSegment.resize(128u); + NEO::Linker::PatchableSegments instructionsSegments; + instructionsSegments.push_back({instructionSegment.data(), 64u}); + instructionsSegments.push_back({&instructionSegment[64], 64u}); + + DebugManagerStateRestore restorer; + DebugManager.flags.CreateMultipleSubDevices.set(2); + DebugManager.flags.EnableImplicitScaling.set(1); + + auto device = std::unique_ptr(NEO::MockDevice::createWithNewExecutionEnvironment(NEO::defaultHwInfo.get())); + linker.resolveBuiltins(device.get(), unresolvedExternals, instructionsSegments); + + EXPECT_EQ(2U, unresolvedExternals.size()); + for (auto &symbol : unresolvedExternals) { + EXPECT_NE(NEO::Linker::subDeviceID, symbol.unresolvedRelocation.symbolName); + } + + auto gpuAddressAs64bit = device->getDefaultEngine().commandStreamReceiver->getWorkPartitionAllocationGpuAddress(); + EXPECT_EQ(*reinterpret_cast(&instructionSegment[64]), static_cast((gpuAddressAs64bit >> 32) & 0xffffffff)); + EXPECT_EQ(*reinterpret_cast(instructionSegment.data()), static_cast(gpuAddressAs64bit & 0xffffffff)); +} + +TEST(LinkerTests, givenUnresolvedExternalsWhenLinkThenSubDeviceIDSymbolsAreRemoved) { + NEO::LinkerInput linkerInput; + + NEO::Linker linker(linkerInput); + NEO::Linker::SegmentInfo globalVar, globalConst, exportedFunc; + NEO::Linker::UnresolvedExternals unresolvedExternals; + unresolvedExternals.push_back({{"__SubDeviceID", 0, NEO::Linker::RelocationInfo::Type::AddressLow, NEO::SegmentType::Instructions}, 0u, false}); + unresolvedExternals.push_back({{"__MaxHWThreadIDPerSubDevice", 156, NEO::Linker::RelocationInfo::Type::AddressLow, NEO::SegmentType::Instructions}, 0u, false}); + unresolvedExternals.push_back({{"__MaxHWThreadIDPerSubDevice", 140, NEO::Linker::RelocationInfo::Type::AddressHigh, NEO::SegmentType::Instructions}, 0u, false}); + unresolvedExternals.push_back({{"__SubDeviceID", 64, NEO::Linker::RelocationInfo::Type::AddressHigh, NEO::SegmentType::Instructions}, 0u, false}); + + NEO::Linker::KernelDescriptorsT kernelDescriptors; + + NEO::GraphicsAllocation *patchableGlobalVarSeg = nullptr; + NEO::GraphicsAllocation *patchableConstVarSeg = nullptr; + std::vector instructionSegment; + instructionSegment.resize(128u); + NEO::Linker::PatchableSegments instructionsSegments; + instructionsSegments.push_back({instructionSegment.data(), 0}); + instructionsSegments.push_back({&instructionSegment[64], 64u}); + + DebugManagerStateRestore restorer; + DebugManager.flags.CreateMultipleSubDevices.set(2); + DebugManager.flags.EnableImplicitScaling.set(1); + + auto device = std::unique_ptr(NEO::MockDevice::createWithNewExecutionEnvironment(NEO::defaultHwInfo.get())); + + linker.link( + globalVar, globalConst, exportedFunc, {}, + patchableGlobalVarSeg, patchableConstVarSeg, instructionsSegments, + unresolvedExternals, device.get(), nullptr, nullptr, kernelDescriptors); + + auto relocatedSymbols = linker.extractRelocatedSymbols(); + EXPECT_EQ(0U, relocatedSymbols.size()); + EXPECT_EQ(2U, unresolvedExternals.size()); + for (auto &symbol : unresolvedExternals) { + EXPECT_NE(NEO::Linker::subDeviceID, symbol.unresolvedRelocation.symbolName); + } + + auto gpuAddressAs64bit = device->getDefaultEngine().commandStreamReceiver->getWorkPartitionAllocationGpuAddress(); + EXPECT_EQ(*reinterpret_cast(&instructionSegment[64]), static_cast((gpuAddressAs64bit >> 32) & 0xffffffff)); + EXPECT_EQ(*reinterpret_cast(instructionSegment.data()), static_cast(gpuAddressAs64bit & 0xffffffff)); +} + TEST(LinkerTests, givenUnresolvedExternalWhenPatchingInstructionsThenLinkPartially) { NEO::LinkerInput linkerInput; vISA::GenRelocEntry entry = {};