diff --git a/level_zero/core/source/module/module_imp.cpp b/level_zero/core/source/module/module_imp.cpp index 0ef9255cf6..7fc7f856e9 100644 --- a/level_zero/core/source/module/module_imp.cpp +++ b/level_zero/core/source/module/module_imp.cpp @@ -35,6 +35,7 @@ #include "compiler_options.h" #include "program_debug_data.h" +#include #include #include @@ -928,9 +929,23 @@ ze_result_t ModuleImp::getProperties(ze_module_properties_t *pModuleProperties) return ZE_RESULT_SUCCESS; } +void ModuleImp::moduleDependencyWalker(std::map> inDeps, void *moduleHandle, std::list *outDeps) { + std::map>::iterator it; + it = inDeps.find(moduleHandle); + if (it != inDeps.end()) { + std::map dependencies = it->second; + inDeps.erase(it); + for (auto const &dependency : dependencies) { + moduleDependencyWalker(inDeps, dependency.first, outDeps); + outDeps->push_back(static_cast(dependency.first)); + } + } +} + ze_result_t ModuleImp::performDynamicLink(uint32_t numModules, ze_module_handle_t *phModules, ze_module_build_log_handle_t *phLinkLog) { + std::map> dependencies; ModuleBuildLog *moduleLinkLog = nullptr; if (phLinkLog) { moduleLinkLog = ModuleBuildLog::create(); @@ -941,6 +956,7 @@ ze_result_t ModuleImp::performDynamicLink(uint32_t numModules, if (moduleId->isFullyLinked) { continue; } + std::map moduleDeps; NEO::Linker::PatchableSegments isaSegmentsForPatching; std::vector> patchedIsaTempStorage; uint32_t numPatchedSymbols = 0u; @@ -970,6 +986,11 @@ ze_result_t ModuleImp::performDynamicLink(uint32_t numModules, NEO::Linker::patchAddress(relocAddress, symbolIt->second, unresolvedExternal.unresolvedRelocation); numPatchedSymbols++; moduleId->importedSymbolAllocations.insert(moduleHandle->exportedFunctionsSurface); + std::map::iterator it; + it = moduleDeps.find(moduleHandle); + if ((it == moduleDeps.end()) && (nullptr != moduleHandle->exportedFunctionsSurface)) { + moduleDeps.insert(std::pair(moduleHandle, moduleHandle)); + } if (moduleLinkLog) { std::stringstream logMessage; @@ -977,18 +998,6 @@ ze_result_t ModuleImp::performDynamicLink(uint32_t numModules, unresolvedSymbolLogMessages.back().append(logMessage.str()); } - // Apply the exported functions surface state from the export module to the import module if it exists. - // Enables import modules to access the exported functions during kernel execution. - for (auto &kernImmData : moduleId->kernelImmDatas) { - kernImmData->getResidencyContainer().reserve(kernImmData->getResidencyContainer().size() + - ((moduleHandle->exportedFunctionsSurface != nullptr) ? 1 : 0) + moduleId->importedSymbolAllocations.size()); - - if (nullptr != moduleHandle->exportedFunctionsSurface) { - kernImmData->getResidencyContainer().push_back(moduleHandle->exportedFunctionsSurface); - } - kernImmData->getResidencyContainer().insert(kernImmData->getResidencyContainer().end(), moduleId->importedSymbolAllocations.begin(), - moduleId->importedSymbolAllocations.end()); - } break; } } @@ -1002,10 +1011,39 @@ ze_result_t ModuleImp::performDynamicLink(uint32_t numModules, if (numPatchedSymbols != moduleId->unresolvedExternalsInfo.size()) { return ZE_RESULT_ERROR_MODULE_LINK_FAILURE; } + dependencies.insert(std::pair>(moduleId, moduleDeps)); moduleId->copyPatchedSegments(isaSegmentsForPatching); moduleId->isFullyLinked = true; } + for (auto i = 0u; i < numModules; i++) { + static std::mutex depWalkMutex; + std::lock_guard autolock(depWalkMutex); + + auto moduleId = static_cast(Module::fromHandle(phModules[i])); + std::map>::iterator it; + std::list dependentModules; + + // Walk the dependencies for each Module and dependent Module to determine + // the dependency exportedFunctionsSurfaces that must be resident for a given Module's kernels + // to execute on the device using Dynamic Module Linking. + it = dependencies.find(moduleId); + if (it != dependencies.end()) { + moduleDependencyWalker(dependencies, moduleId, &dependentModules); + // Apply the exported functions surface state from the export module(s) to the import module if it exists. + // Enables import modules to access the exported function(s) during kernel execution. + for (auto &kernImmData : moduleId->kernelImmDatas) { + for (auto const &dependency : dependentModules) { + kernImmData->getResidencyContainer().reserve(kernImmData->getResidencyContainer().size() + + 1 + moduleId->importedSymbolAllocations.size()); + kernImmData->getResidencyContainer().push_back(dependency->exportedFunctionsSurface); + } + kernImmData->getResidencyContainer().insert(kernImmData->getResidencyContainer().end(), moduleId->importedSymbolAllocations.begin(), + moduleId->importedSymbolAllocations.end()); + } + } + } + { NEO::ExternalFunctionInfosT externalFunctionInfos; NEO::FunctionDependenciesT extFuncDependencies; diff --git a/level_zero/core/source/module/module_imp.h b/level_zero/core/source/module/module_imp.h index cebc8115b1..ffbf6dd7d2 100644 --- a/level_zero/core/source/module/module_imp.h +++ b/level_zero/core/source/module/module_imp.h @@ -17,6 +17,7 @@ #include "igfxfmid.h" +#include #include #include @@ -139,6 +140,8 @@ struct ModuleImp : public Module { return this->translationUnit.get(); } + void moduleDependencyWalker(std::map> inDeps, void *moduleHandle, std::list *outDeps); + protected: void copyPatchedSegments(const NEO::Linker::PatchableSegments &isaSegmentsForPatching); void verifyDebugCapabilities(); diff --git a/level_zero/core/test/black_box_tests/common/zello_common.h b/level_zero/core/test/black_box_tests/common/zello_common.h index b619d6a0df..fedb409fba 100644 --- a/level_zero/core/test/black_box_tests/common/zello_common.h +++ b/level_zero/core/test/black_box_tests/common/zello_common.h @@ -68,6 +68,17 @@ inline int getParamValue(int argc, char *argv[], const char *shortName, const ch return defaultValue; } +inline bool isCircularDepTest(int argc, char *argv[]) { + bool enabled = isParamEnabled(argc, argv, "-c", "--circular"); + if (enabled == false) { + return false; + } + + std::cerr << "Circular Dependency Test mode detected" << std::endl; + + return true; +} + inline bool isVerbose(int argc, char *argv[]) { bool enabled = isParamEnabled(argc, argv, "-v", "--verbose"); if (enabled == false) { diff --git a/level_zero/core/test/black_box_tests/zello_dynamic_link.cpp b/level_zero/core/test/black_box_tests/zello_dynamic_link.cpp index 974baf060c..be5eea2ecb 100644 --- a/level_zero/core/test/black_box_tests/zello_dynamic_link.cpp +++ b/level_zero/core/test/black_box_tests/zello_dynamic_link.cpp @@ -1,5 +1,5 @@ /* - * Copyright (C) 2021 Intel Corporation + * Copyright (C) 2021-2022 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -40,13 +40,62 @@ int lib_func_sub(int x, int y) { } )==="; +const char *importModuleSrcCircDep = R"===( +int lib_func_add(int x, int y); +int lib_func_mult(int x, int y); +int lib_func_sub(int x, int y); + +kernel void call_library_funcs(__global int* result) { + int add_result = lib_func_add(1,2); + int mult_result = lib_func_mult(add_result,2); + result[0] = lib_func_sub(mult_result, 1); +} + +int lib_func_add2(int x) { + return x+2; +} +)==="; + +const char *exportModuleSrcCircDep = R"===( +int lib_func_add2(int x); +int lib_func_add5(int x); + +int lib_func_add(int x, int y) { + return lib_func_add5(lib_func_add2(x + y)); +} + +int lib_func_mult(int x, int y) { + return x*y; +} + +int lib_func_sub(int x, int y) { + return x-y; +} +)==="; + +const char *exportModuleSrc2CircDep = R"===( +int lib_func_add5(int x) { + return x+5; +} +)==="; + extern bool verbose; bool verbose = false; int main(int argc, char *argv[]) { bool outputValidationSuccessful = true; verbose = isVerbose(argc, argv); + bool circularDep = isCircularDepTest(argc, argv); + int numModules = 2; + char *exportModuleSrcValue = const_cast(exportModuleSrc); + char *importModuleSrcValue = const_cast(importModuleSrc); + ze_module_handle_t exportModule2 = {}; + if (circularDep) { + exportModuleSrcValue = const_cast(exportModuleSrcCircDep); + importModuleSrcValue = const_cast(importModuleSrcCircDep); + numModules = 3; + } // Setup SUCCESS_OR_TERMINATE(zeInit(ZE_INIT_FLAG_GPU_ONLY)); @@ -93,7 +142,7 @@ int main(int argc, char *argv[]) { std::cout << "reading export module for spirv\n"; } std::string buildLog; - auto exportBinaryModule = compileToSpirV(exportModuleSrc, "", buildLog); + auto exportBinaryModule = compileToSpirV(const_cast(exportModuleSrcValue), "", buildLog); if (buildLog.size() > 0) { std::cout << "Build log " << buildLog; } @@ -114,10 +163,35 @@ int main(int argc, char *argv[]) { SUCCESS_OR_TERMINATE(zeModuleCreate(context, device, &exportModuleDesc, &exportModule, nullptr)); + if (circularDep) { + if (verbose) { + std::cout << "reading export module2 for spirv\n"; + } + auto exportBinaryModule2 = compileToSpirV(exportModuleSrc2CircDep, "", buildLog); + if (buildLog.size() > 0) { + std::cout << "Build log " << buildLog; + } + SUCCESS_OR_TERMINATE((0 == exportBinaryModule2.size())); + + ze_module_desc_t exportModuleDesc2 = {ZE_STRUCTURE_TYPE_MODULE_DESC}; + exportModuleDesc2.format = ZE_MODULE_FORMAT_IL_SPIRV; + exportModuleDesc2.pInputModule = reinterpret_cast(exportBinaryModule2.data()); + exportModuleDesc2.inputSize = exportBinaryModule2.size(); + + // -library-compliation is required for the non-kernel functions to be listed as exported by the Intel Graphics Compiler + exportModuleDesc2.pBuildFlags = "-library-compilation"; + + if (verbose) { + std::cout << "building export module\n"; + } + + SUCCESS_OR_TERMINATE(zeModuleCreate(context, device, &exportModuleDesc2, &exportModule2, nullptr)); + } + if (verbose) { std::cout << "reading import module for spirv\n"; } - auto importBinaryModule = compileToSpirV(importModuleSrc, "", buildLog); + auto importBinaryModule = compileToSpirV(const_cast(importModuleSrcValue), "", buildLog); if (buildLog.size() > 0) { std::cout << "Build log " << buildLog; } @@ -128,14 +202,14 @@ int main(int argc, char *argv[]) { importModuleDesc.format = ZE_MODULE_FORMAT_IL_SPIRV; importModuleDesc.pInputModule = reinterpret_cast(importBinaryModule.data()); importModuleDesc.inputSize = importBinaryModule.size(); - + if (circularDep) { + importModuleDesc.pBuildFlags = "-library-compilation"; + } if (verbose) { std::cout << "building import module\n"; } SUCCESS_OR_TERMINATE(zeModuleCreate(context, device, &importModuleDesc, &importModule, nullptr)); - ze_module_handle_t modulesToLink[] = {importModule, exportModule}; - // Dynamically linking the two Modules to resolve the symbols if (verbose) { @@ -144,7 +218,13 @@ int main(int argc, char *argv[]) { ze_module_build_log_handle_t dynLinkLog; - SUCCESS_OR_TERMINATE(zeModuleDynamicLink(2, modulesToLink, &dynLinkLog)); + if (circularDep) { + ze_module_handle_t modulesToLink[] = {importModule, exportModule, exportModule2}; + SUCCESS_OR_TERMINATE(zeModuleDynamicLink(numModules, modulesToLink, &dynLinkLog)); + } else { + ze_module_handle_t modulesToLink[] = {importModule, exportModule}; + SUCCESS_OR_TERMINATE(zeModuleDynamicLink(numModules, modulesToLink, &dynLinkLog)); + } size_t buildLogSize; SUCCESS_OR_TERMINATE(zeModuleBuildLogGetString(dynLinkLog, &buildLogSize, nullptr)); @@ -202,8 +282,10 @@ int main(int argc, char *argv[]) { SUCCESS_OR_TERMINATE(zeCommandQueueSynchronize(cmdQueue, std::numeric_limits::max())); // Validate results - int expectedResult = (((1 + 2) * 2) - 1); + if (circularDep) { + expectedResult = (((((1 + 2) + 2) + 5) * 2) - 1); + } if (expectedResult != *(int *)resultBuffer) { std::cout << "Result:" << *(int *)resultBuffer << " invalid\n"; @@ -223,6 +305,9 @@ int main(int argc, char *argv[]) { SUCCESS_OR_TERMINATE(zeKernelDestroy(importKernel)); SUCCESS_OR_TERMINATE(zeModuleDestroy(importModule)); SUCCESS_OR_TERMINATE(zeModuleDestroy(exportModule)); + if (circularDep) { + SUCCESS_OR_TERMINATE(zeModuleDestroy(exportModule2)); + } SUCCESS_OR_TERMINATE(zeContextDestroy(context)); std::cout << "\nZello Dynamic Link Results validation " << (outputValidationSuccessful ? "PASSED" : "FAILED") << "\n"; return 0; diff --git a/level_zero/core/test/unit_tests/sources/module/test_module.cpp b/level_zero/core/test/unit_tests/sources/module/test_module.cpp index 53af4e4458..5ae3e32222 100644 --- a/level_zero/core/test/unit_tests/sources/module/test_module.cpp +++ b/level_zero/core/test/unit_tests/sources/module/test_module.cpp @@ -1087,9 +1087,11 @@ struct ModuleDynamicLinkTests : public Test { Test::SetUp(); module0 = std::make_unique(device, nullptr, ModuleType::User); module1 = std::make_unique(device, nullptr, ModuleType::User); + module2 = std::make_unique(device, nullptr, ModuleType::User); } std::unique_ptr module0; std::unique_ptr module1; + std::unique_ptr module2; }; TEST_F(ModuleDynamicLinkTests, givenCallToDynamicLinkOnModulesWithoutUnresolvedSymbolsThenSuccessIsReturned) { @@ -1215,6 +1217,100 @@ TEST_F(ModuleDynamicLinkTests, givenModuleWithUnresolvedSymbolWhenTheOtherModule EXPECT_EQ(module0->kernelImmDatas[0]->getResidencyContainer().back(), &alloc); } +TEST_F(ModuleDynamicLinkTests, givenMultipleModulesWithUnresolvedSymbolWhenTheEachModuleDefinesTheSymbolThenTheExportedFunctionSurfaceInBothModulesIsAddedToTheResidencyContainer) { + + uint64_t gpuAddress0 = 0x12345; + uint64_t gpuAddress1 = 0x6789; + uint64_t gpuAddress2 = 0x1479; + uint32_t offset = 0x20; + + NEO::Linker::RelocationInfo unresolvedRelocation; + unresolvedRelocation.symbolName = "unresolved"; + unresolvedRelocation.offset = offset; + unresolvedRelocation.type = NEO::Linker::RelocationInfo::Type::Address; + NEO::Linker::UnresolvedExternal unresolvedExternal; + unresolvedExternal.unresolvedRelocation = unresolvedRelocation; + + NEO::Linker::RelocationInfo unresolvedRelocationCircular; + unresolvedRelocationCircular.symbolName = "unresolvedCircular"; + unresolvedRelocationCircular.offset = offset; + unresolvedRelocationCircular.type = NEO::Linker::RelocationInfo::Type::Address; + NEO::Linker::UnresolvedExternal unresolvedExternalCircular; + unresolvedExternalCircular.unresolvedRelocation = unresolvedRelocationCircular; + + NEO::Linker::RelocationInfo unresolvedRelocationChained; + unresolvedRelocationChained.symbolName = "unresolvedChained"; + unresolvedRelocationChained.offset = offset; + unresolvedRelocationChained.type = NEO::Linker::RelocationInfo::Type::Address; + NEO::Linker::UnresolvedExternal unresolvedExternalChained; + unresolvedExternalChained.unresolvedRelocation = unresolvedRelocationChained; + + NEO::SymbolInfo module0SymbolInfo{}; + NEO::Linker::RelocatedSymbol module0RelocatedSymbol{module0SymbolInfo, gpuAddress0}; + + NEO::SymbolInfo module1SymbolInfo{}; + NEO::Linker::RelocatedSymbol module1RelocatedSymbol{module1SymbolInfo, gpuAddress1}; + + NEO::SymbolInfo module2SymbolInfo{}; + NEO::Linker::RelocatedSymbol module2RelocatedSymbol{module2SymbolInfo, gpuAddress2}; + + char kernelHeap[MemoryConstants::pageSize] = {}; + + auto kernelInfo = std::make_unique(); + kernelInfo->heapInfo.pKernelHeap = kernelHeap; + kernelInfo->heapInfo.KernelHeapSize = MemoryConstants::pageSize; + module0->getTranslationUnit()->programInfo.kernelInfos.push_back(kernelInfo.release()); + + auto linkerInput = std::make_unique<::WhiteBox>(); + linkerInput->traits.requiresPatchingOfInstructionSegments = true; + + module0->getTranslationUnit()->programInfo.linkerInput = std::move(linkerInput); + module0->unresolvedExternalsInfo.push_back({unresolvedRelocation}); + module0->unresolvedExternalsInfo[0].instructionsSegmentId = 0u; + + auto kernelImmData = std::make_unique>(device); + kernelImmData->isaGraphicsAllocation.reset(neoDevice->getMemoryManager()->allocateGraphicsMemoryWithProperties( + {device->getRootDeviceIndex(), MemoryConstants::pageSize, NEO::AllocationType::KERNEL_ISA, neoDevice->getDeviceBitfield()})); + + module0->kernelImmDatas.push_back(std::move(kernelImmData)); + + module0->symbols[unresolvedRelocationCircular.symbolName] = module0RelocatedSymbol; + MockGraphicsAllocation alloc0; + module0->exportedFunctionsSurface = &alloc0; + + char kernelHeap2[MemoryConstants::pageSize] = {}; + + auto kernelInfo2 = std::make_unique(); + kernelInfo2->heapInfo.pKernelHeap = kernelHeap2; + kernelInfo2->heapInfo.KernelHeapSize = MemoryConstants::pageSize; + module1->getTranslationUnit()->programInfo.kernelInfos.push_back(kernelInfo2.release()); + + auto linkerInput1 = std::make_unique<::WhiteBox>(); + linkerInput1->traits.requiresPatchingOfInstructionSegments = true; + + module1->getTranslationUnit()->programInfo.linkerInput = std::move(linkerInput1); + module1->unresolvedExternalsInfo.push_back({unresolvedRelocationCircular}); + module1->unresolvedExternalsInfo[0].instructionsSegmentId = 0u; + module1->unresolvedExternalsInfo.push_back({unresolvedRelocationChained}); + module1->unresolvedExternalsInfo[1].instructionsSegmentId = 0u; + + module1->symbols[unresolvedRelocation.symbolName] = module1RelocatedSymbol; + MockGraphicsAllocation alloc1; + module1->exportedFunctionsSurface = &alloc1; + + module2->symbols[unresolvedRelocationChained.symbolName] = module2RelocatedSymbol; + MockGraphicsAllocation alloc2; + module2->exportedFunctionsSurface = &alloc2; + + std::vector hModules = {module0->toHandle(), module1->toHandle(), module2->toHandle()}; + ze_result_t res = module0->performDynamicLink(3, hModules.data(), nullptr); + EXPECT_EQ(ZE_RESULT_SUCCESS, res); + EXPECT_EQ((int)module0->kernelImmDatas[0]->getResidencyContainer().size(), 4); + EXPECT_TRUE(std::find(module0->kernelImmDatas[0]->getResidencyContainer().begin(), module0->kernelImmDatas[0]->getResidencyContainer().end(), &alloc0) != module0->kernelImmDatas[0]->getResidencyContainer().end()); + EXPECT_TRUE(std::find(module0->kernelImmDatas[0]->getResidencyContainer().begin(), module0->kernelImmDatas[0]->getResidencyContainer().end(), &alloc1) != module0->kernelImmDatas[0]->getResidencyContainer().end()); + EXPECT_TRUE(std::find(module0->kernelImmDatas[0]->getResidencyContainer().begin(), module0->kernelImmDatas[0]->getResidencyContainer().end(), &alloc2) != module0->kernelImmDatas[0]->getResidencyContainer().end()); +} + TEST_F(ModuleDynamicLinkTests, givenModuleWithUnresolvedSymbolWhenTheOtherModuleDefinesTheSymbolThenTheBuildLogContainsTheSuccessfulLinkage) { uint64_t gpuAddress = 0x12345;