Fix L0: reuse partially linked segments when performing dynamic linking

When module is created but not fully linked we don't copy ISA to GPU
however we need to store patched segments for dynamic linking

Related-To HSD-18022114161

Signed-off-by: Mateusz Jablonski <mateusz.jablonski@intel.com>
This commit is contained in:
Mateusz Jablonski
2022-05-30 16:15:41 +00:00
committed by Compute-Runtime-Automation
parent ae56d50b4f
commit 16fd791a7c
3 changed files with 69 additions and 14 deletions

View File

@@ -784,8 +784,7 @@ bool ModuleImp::linkBinary() {
exportedFunctions.gpuAddress = static_cast<uintptr_t>(exportedFunctionsSurface->getGpuAddressToPatch());
exportedFunctions.segmentSize = exportedFunctionsSurface->getUnderlyingBufferSize();
}
Linker::PatchableSegments isaSegmentsForPatching;
std::vector<std::vector<char>> patchedIsaTempStorage;
Linker::KernelDescriptorsT kernelDescriptors;
if (linkerInput->getTraits().requiresPatchingOfInstructionSegments) {
patchedIsaTempStorage.reserve(this->kernelImmDatas.size());
@@ -990,17 +989,19 @@ ze_result_t ModuleImp::performDynamicLink(uint32_t numModules,
}
// Resolve Unresolved Symbols in the Relocation Table between the Modules if Required.
NEO::Linker::PatchableSegments isaSegmentsForPatching;
std::vector<std::vector<char>> patchedIsaTempStorage;
auto &isaSegmentsForPatching = moduleId->isaSegmentsForPatching;
auto &patchedIsaTempStorage = moduleId->patchedIsaTempStorage;
uint32_t numPatchedSymbols = 0u;
std::vector<std::string> unresolvedSymbolLogMessages;
if (moduleId->translationUnit->programInfo.linkerInput && moduleId->translationUnit->programInfo.linkerInput->getTraits().requiresPatchingOfInstructionSegments) {
patchedIsaTempStorage.reserve(moduleId->kernelImmDatas.size());
for (const auto &kernelInfo : moduleId->translationUnit->programInfo.kernelInfos) {
auto &kernHeapInfo = kernelInfo->heapInfo;
const char *originalIsa = reinterpret_cast<const char *>(kernHeapInfo.pKernelHeap);
patchedIsaTempStorage.push_back(std::vector<char>(originalIsa, originalIsa + kernHeapInfo.KernelHeapSize));
isaSegmentsForPatching.push_back(NEO::Linker::PatchableSegment{patchedIsaTempStorage.rbegin()->data(), kernHeapInfo.KernelHeapSize});
if (patchedIsaTempStorage.empty()) {
patchedIsaTempStorage.reserve(moduleId->kernelImmDatas.size());
for (const auto &kernelInfo : moduleId->translationUnit->programInfo.kernelInfos) {
auto &kernHeapInfo = kernelInfo->heapInfo;
const char *originalIsa = reinterpret_cast<const char *>(kernHeapInfo.pKernelHeap);
patchedIsaTempStorage.push_back(std::vector<char>(originalIsa, originalIsa + kernHeapInfo.KernelHeapSize));
isaSegmentsForPatching.push_back(NEO::Linker::PatchableSegment{patchedIsaTempStorage.rbegin()->data(), kernHeapInfo.KernelHeapSize});
}
}
for (const auto &unresolvedExternal : moduleId->unresolvedExternalsInfo) {
if (moduleLinkLog) {

View File

@@ -174,6 +174,9 @@ struct ModuleImp : public Module {
NEO::Linker::UnresolvedExternals unresolvedExternalsInfo{};
std::set<NEO::GraphicsAllocation *> importedSymbolAllocations{};
uint32_t debugModuleHandle = 0;
NEO::Linker::PatchableSegments isaSegmentsForPatching;
std::vector<std::vector<char>> patchedIsaTempStorage;
};
bool moveBuildOption(std::string &dstOptionsSet, std::string &srcOptionSet, NEO::ConstStringRef dstOptionName, NEO::ConstStringRef srcOptionName);

View File

@@ -1143,9 +1143,9 @@ TEST_F(ModulePropertyTest, givenCallToGetPropertiesWithUnresolvedSymbolsThenFlag
EXPECT_EQ(expectedFlags, moduleProperties.flags);
}
struct ModuleDynamicLinkTests : public Test<ModuleFixture> {
struct ModuleDynamicLinkTests : public Test<DeviceFixture> {
void SetUp() override {
Test<ModuleFixture>::SetUp();
Test<DeviceFixture>::SetUp();
module0 = std::make_unique<WhiteBox<::L0::Module>>(device, nullptr, ModuleType::User);
module1 = std::make_unique<WhiteBox<::L0::Module>>(device, nullptr, ModuleType::User);
module2 = std::make_unique<WhiteBox<::L0::Module>>(device, nullptr, ModuleType::User);
@@ -1154,7 +1154,7 @@ struct ModuleDynamicLinkTests : public Test<ModuleFixture> {
module0.reset(nullptr);
module1.reset(nullptr);
module2.reset(nullptr);
Test<ModuleFixture>::TearDown();
Test<DeviceFixture>::TearDown();
}
std::unique_ptr<WhiteBox<::L0::Module>> module0;
std::unique_ptr<WhiteBox<::L0::Module>> module1;
@@ -1378,6 +1378,56 @@ TEST_F(ModuleDynamicLinkTests, givenMultipleModulesWithUnresolvedSymbolWhenTheEa
EXPECT_TRUE(std::find(module0->kernelImmDatas[0]->getResidencyContainer().begin(), module0->kernelImmDatas[0]->getResidencyContainer().end(), &alloc2) != module0->kernelImmDatas[0]->getResidencyContainer().end());
}
TEST_F(ModuleDynamicLinkTests, givenModuleWithInternalRelocationAndUnresolvedExternalSymbolWhenTheOtherModuleDefinesTheSymbolThenAllSymbolsArePatched) {
auto linkerInput = std::make_unique<::WhiteBox<NEO::LinkerInput>>();
linkerInput->traits.requiresPatchingOfInstructionSegments = true;
linkerInput->exportedFunctionsSegmentId = 0;
uint32_t internalRelocationOffset = 0x10;
linkerInput->textRelocations.push_back({{implicitArgsRelocationSymbolName, internalRelocationOffset, LinkerInput::RelocationInfo::Type::Address, SegmentType::Instructions}});
uint32_t expectedInternalRelocationValue = sizeof(ImplicitArgs);
uint32_t externalRelocationOffset = 0x20;
constexpr auto externalSymbolName = "unresolved";
uint64_t externalSymbolAddress = castToUint64(&externalRelocationOffset);
linkerInput->textRelocations[0].push_back({externalSymbolName, externalRelocationOffset, LinkerInput::RelocationInfo::Type::Address, SegmentType::Instructions});
char kernelHeap[MemoryConstants::cacheLineSize] = {};
auto kernelInfo = std::make_unique<NEO::KernelInfo>();
kernelInfo->heapInfo.pKernelHeap = kernelHeap;
kernelInfo->heapInfo.KernelHeapSize = MemoryConstants::cacheLineSize;
kernelInfo->kernelDescriptor.kernelAttributes.flags.useStackCalls = true;
module0->getTranslationUnit()->programInfo.kernelInfos.push_back(kernelInfo.release());
module0->getTranslationUnit()->programInfo.linkerInput = std::move(linkerInput);
auto kernelImmData = std::make_unique<WhiteBox<::L0::KernelImmutableData>>(device);
kernelImmData->isaGraphicsAllocation.reset(neoDevice->getMemoryManager()->allocateGraphicsMemoryWithProperties(
{device->getRootDeviceIndex(), MemoryConstants::cacheLineSize, NEO::AllocationType::KERNEL_ISA, neoDevice->getDeviceBitfield()}));
auto isaPtr = kernelImmData->getIsaGraphicsAllocation()->getUnderlyingBuffer();
memset(isaPtr, 0, MemoryConstants::cacheLineSize);
module0->kernelImmDatas.push_back(std::move(kernelImmData));
module1->symbols[externalSymbolName] = {{}, externalSymbolAddress};
EXPECT_TRUE(module0->linkBinary());
EXPECT_NE(expectedInternalRelocationValue, *reinterpret_cast<uint32_t *>(ptrOffset(isaPtr, internalRelocationOffset)));
EXPECT_FALSE(module0->isFullyLinked);
std::vector<ze_module_handle_t> hModules = {module0->toHandle(), module1->toHandle()};
ze_result_t res = module0->performDynamicLink(2, hModules.data(), nullptr);
EXPECT_EQ(ZE_RESULT_SUCCESS, res);
EXPECT_TRUE(module0->isFullyLinked);
EXPECT_EQ(expectedInternalRelocationValue, *reinterpret_cast<uint32_t *>(ptrOffset(isaPtr, internalRelocationOffset)));
EXPECT_EQ(externalSymbolAddress, *reinterpret_cast<uint64_t *>(ptrOffset(isaPtr, externalRelocationOffset)));
}
TEST_F(ModuleDynamicLinkTests, givenMultipleModulesWithUnresolvedSymbolWhenTheEachModuleDefinesTheSymbolThenTheExportedFunctionSurfaceInBothModulesIsAddedToTheImportedSymbolAllocations) {
uint64_t gpuAddress0 = 0x12345;
@@ -1594,7 +1644,8 @@ TEST_F(ModuleDynamicLinkTests, givenModuleWithUnresolvedSymbolsNotPresentInAnoth
zeModuleBuildLogDestroy(dynLinkLog);
}
TEST_F(ModuleDynamicLinkTests, givenUnresolvedSymbolsWhenModuleIsCreatedThenIsaAllocationsAreNotCopied) {
using ModuleDynamicLinkTest = Test<ModuleFixture>;
TEST_F(ModuleDynamicLinkTest, givenUnresolvedSymbolsWhenModuleIsCreatedThenIsaAllocationsAreNotCopied) {
NEO::MockCompilerEnableGuard mock(true);
auto cip = new NEO::MockCompilerInterfaceCaptureBuildOptions();
neoDevice->getExecutionEnvironment()->rootDeviceEnvironments[neoDevice->getRootDeviceIndex()]->compilerInterface.reset(cip);