Reuse builtin module and init selected builtin on device init

-start async thread at device initialization which initializes selected
builtins and exits
-share module across builtins using same binary

Resolves: NEO-7644

Signed-off-by: Lukasz Jobczyk <lukasz.jobczyk@intel.com>
This commit is contained in:
Lukasz Jobczyk 2023-01-31 08:07:58 +00:00 committed by Compute-Runtime-Automation
parent c726d3b3fe
commit d4fe981fa9
14 changed files with 125 additions and 43 deletions

View File

@ -67,6 +67,7 @@ struct BuiltinFunctionsLib {
virtual Kernel *getImageFunction(ImageBuiltin func) = 0;
virtual void initBuiltinKernel(Builtin builtId) = 0;
virtual void initBuiltinImageKernel(ImageBuiltin func) = 0;
virtual void ensureInitCompletion() = 0;
[[nodiscard]] MOCKABLE_VIRTUAL std::unique_lock<MutexType> obtainUniqueOwnership();
protected:

View File

@ -12,7 +12,6 @@
#include "level_zero/core/source/device/device.h"
#include "level_zero/core/source/kernel/kernel.h"
#include "level_zero/core/source/module/module.h"
namespace NEO {
const char *getAdditionalBuiltinAsString(EBuiltInOps::Type builtin) {
@ -24,13 +23,9 @@ namespace L0 {
BuiltinFunctionsLibImpl::BuiltinData::~BuiltinData() {
func.reset();
module.reset();
}
BuiltinFunctionsLibImpl::BuiltinData::BuiltinData() = default;
BuiltinFunctionsLibImpl::BuiltinData::BuiltinData(std::unique_ptr<L0::Module> &&mod, std::unique_ptr<L0::Kernel> &&ker) {
module = std::move(mod);
func = std::move(ker);
}
BuiltinFunctionsLibImpl::BuiltinData::BuiltinData(Module *module, std::unique_ptr<L0::Kernel> &&ker) : module(module), func(std::move(ker)) {}
std::unique_lock<BuiltinFunctionsLib::MutexType> BuiltinFunctionsLib::obtainUniqueOwnership() {
return std::unique_lock<BuiltinFunctionsLib::MutexType>(this->ownershipMutex);
}
@ -185,9 +180,17 @@ void BuiltinFunctionsLibImpl::initBuiltinImageKernel(ImageBuiltin func) {
imageBuiltins[builtId] = loadBuiltIn(builtin, builtinName);
}
BuiltinFunctionsLibImpl::BuiltinFunctionsLibImpl(Device *device, NEO::BuiltIns *builtInsLib) : device(device), builtInsLib(builtInsLib) {
if (initBuiltinsAsyncEnabled()) {
this->initAsyncComplete = false;
this->initAsync = std::async(std::launch::async, &BuiltinFunctionsLibImpl::initBuiltinKernel, this, Builtin::FillBufferImmediate);
}
}
Kernel *BuiltinFunctionsLibImpl::getFunction(Builtin func) {
auto builtId = static_cast<uint32_t>(func);
this->ensureInitCompletion();
if (builtins[builtId].get() == nullptr) {
initBuiltinKernel(func);
}
@ -198,6 +201,7 @@ Kernel *BuiltinFunctionsLibImpl::getFunction(Builtin func) {
Kernel *BuiltinFunctionsLibImpl::getImageFunction(ImageBuiltin func) {
auto builtId = static_cast<uint32_t>(func);
this->ensureInitCompletion();
if (imageBuiltins[builtId].get() == nullptr) {
initBuiltinImageKernel(func);
}
@ -223,27 +227,46 @@ std::unique_ptr<BuiltinFunctionsLibImpl::BuiltinData> BuiltinFunctionsLibImpl::l
}
}
[[maybe_unused]] ze_result_t res;
std::unique_ptr<Module> module;
ze_module_handle_t moduleHandle;
ze_module_desc_t moduleDesc = {};
moduleDesc.format = builtinCode.type == BuiltInCodeType::Binary ? ZE_MODULE_FORMAT_NATIVE : ZE_MODULE_FORMAT_IL_SPIRV;
moduleDesc.pInputModule = reinterpret_cast<uint8_t *>(&builtinCode.resource[0]);
moduleDesc.inputSize = builtinCode.resource.size();
res = device->createModule(&moduleDesc, &moduleHandle, nullptr, ModuleType::Builtin);
UNRECOVERABLE_IF(res != ZE_RESULT_SUCCESS);
if (builtinCode.resource.empty()) {
return nullptr;
}
module.reset(Module::fromHandle(moduleHandle));
[[maybe_unused]] ze_result_t res;
if (this->modules.size() <= builtin) {
this->modules.resize(builtin + 1u);
}
if (this->modules[builtin].get() == nullptr) {
std::unique_ptr<Module> module;
ze_module_handle_t moduleHandle;
ze_module_desc_t moduleDesc = {};
moduleDesc.format = builtinCode.type == BuiltInCodeType::Binary ? ZE_MODULE_FORMAT_NATIVE : ZE_MODULE_FORMAT_IL_SPIRV;
moduleDesc.pInputModule = reinterpret_cast<uint8_t *>(&builtinCode.resource[0]);
moduleDesc.inputSize = builtinCode.resource.size();
res = device->createModule(&moduleDesc, &moduleHandle, nullptr, ModuleType::Builtin);
UNRECOVERABLE_IF(res != ZE_RESULT_SUCCESS);
module.reset(Module::fromHandle(moduleHandle));
this->modules[builtin] = std::move(module);
}
std::unique_ptr<Kernel> kernel;
ze_kernel_handle_t kernelHandle;
ze_kernel_desc_t kernelDesc = {};
kernelDesc.pKernelName = builtInName;
res = module->createKernel(&kernelDesc, &kernelHandle);
res = this->modules[builtin]->createKernel(&kernelDesc, &kernelHandle);
DEBUG_BREAK_IF(res != ZE_RESULT_SUCCESS);
kernel.reset(Kernel::fromHandle(kernelHandle));
return std::unique_ptr<BuiltinData>(new BuiltinData{std::move(module), std::move(kernel)});
return std::unique_ptr<BuiltinData>(new BuiltinData{modules[builtin].get(), std::move(kernel)});
}
void BuiltinFunctionsLibImpl::ensureInitCompletion() {
if (!this->initAsyncComplete) {
this->initAsync.wait();
this->initAsyncComplete = true;
}
}
} // namespace L0

View File

@ -7,7 +7,13 @@
#pragma once
#include "shared/source/os_interface/os_thread.h"
#include "level_zero/core/source/builtin/builtin_functions_lib.h"
#include "level_zero/core/source/module/module.h"
#include <future>
#include <vector>
namespace NEO {
namespace EBuiltInOps {
@ -17,15 +23,12 @@ class BuiltIns;
} // namespace NEO
namespace L0 {
struct Module;
struct Kernel;
struct Device;
struct BuiltinFunctionsLibImpl : BuiltinFunctionsLib {
struct BuiltinData;
BuiltinFunctionsLibImpl(Device *device, NEO::BuiltIns *builtInsLib)
: device(device), builtInsLib(builtInsLib) {
}
BuiltinFunctionsLibImpl(Device *device, NEO::BuiltIns *builtInsLib);
~BuiltinFunctionsLibImpl() override {
builtins->reset();
imageBuiltins->reset();
@ -35,20 +38,27 @@ struct BuiltinFunctionsLibImpl : BuiltinFunctionsLib {
Kernel *getImageFunction(ImageBuiltin func) override;
void initBuiltinKernel(Builtin builtId) override;
void initBuiltinImageKernel(ImageBuiltin func) override;
void ensureInitCompletion() override;
MOCKABLE_VIRTUAL std::unique_ptr<BuiltinFunctionsLibImpl::BuiltinData> loadBuiltIn(NEO::EBuiltInOps::Type builtin, const char *builtInName);
static bool initBuiltinsAsyncEnabled();
protected:
std::vector<std::unique_ptr<Module>> modules = {};
std::unique_ptr<BuiltinData> builtins[static_cast<uint32_t>(Builtin::COUNT)];
std::unique_ptr<BuiltinData> imageBuiltins[static_cast<uint32_t>(ImageBuiltin::COUNT)];
Device *device;
NEO::BuiltIns *builtInsLib;
std::future<void> initAsync = {};
bool initAsyncComplete = true;
};
struct BuiltinFunctionsLibImpl::BuiltinData {
MOCKABLE_VIRTUAL ~BuiltinData();
BuiltinData();
BuiltinData(std::unique_ptr<L0::Module> &&mod, std::unique_ptr<L0::Kernel> &&ker);
BuiltinData(Module *module, std::unique_ptr<L0::Kernel> &&ker);
std::unique_ptr<Module> module;
Module *module = nullptr;
std::unique_ptr<Kernel> func;
};
} // namespace L0

View File

@ -16,4 +16,8 @@ std::unique_ptr<BuiltinFunctionsLib> BuiltinFunctionsLib::create(Device *device,
return std::unique_ptr<BuiltinFunctionsLib>(new BuiltinFunctionsLibImpl(device, builtins));
}
bool BuiltinFunctionsLibImpl::initBuiltinsAsyncEnabled() {
return true;
}
} // namespace L0

View File

@ -18,6 +18,7 @@
#include "shared/source/memory_manager/unified_memory_manager.h"
#include "shared/source/os_interface/os_library.h"
#include "level_zero/core/source/builtin/builtin_functions_lib.h"
#include "level_zero/core/source/context/context_imp.h"
#include "level_zero/core/source/device/device_imp.h"
#include "level_zero/core/source/driver/driver_imp.h"
@ -161,6 +162,9 @@ DriverHandleImp::~DriverHandleImp() {
}
for (auto &device : this->devices) {
if (device->getBuiltinFunctionsLib()) {
device->getBuiltinFunctionsLib()->ensureInitCompletion();
}
delete device;
}

View File

@ -23,6 +23,7 @@ struct MockBuiltinFunctionsLibImpl : BuiltinFunctionsLibImpl {
dummyKernel = std::unique_ptr<WhiteBox<::L0::Kernel>>(new Mock<::L0::Kernel>());
dummyModule = std::unique_ptr<Module>(new Mock<Module>(device, nullptr));
dummyKernel->module = dummyModule.get();
mockModule = std::unique_ptr<Module>(new Mock<Module>(device, nullptr));
}
void initBuiltinKernel(L0::Builtin func) override {
auto builtId = static_cast<uint32_t>(func);
@ -40,6 +41,7 @@ struct MockBuiltinFunctionsLibImpl : BuiltinFunctionsLibImpl {
std::unique_ptr<WhiteBox<::L0::Kernel>> dummyKernel;
std::unique_ptr<Module> dummyModule;
std::unique_ptr<Module> mockModule;
Kernel *getFunction(Builtin func) override {
return dummyKernel.get();
@ -51,9 +53,8 @@ struct MockBuiltinFunctionsLibImpl : BuiltinFunctionsLibImpl {
std::unique_ptr<BuiltinData> loadBuiltIn(NEO::EBuiltInOps::Type builtin, const char *builtInName) override {
std::unique_ptr<Kernel> mockKernel(new Mock<::L0::Kernel>());
std::unique_ptr<Module> mockModule(new Mock<Module>(device, nullptr));
return std::unique_ptr<BuiltinData>(new BuiltinData{std::move(mockModule), std::move(mockKernel)});
return std::unique_ptr<BuiltinData>(new BuiltinData{mockModule.get(), std::move(mockKernel)});
}
};
} // namespace ult

View File

@ -17,10 +17,6 @@ namespace ult {
struct MockBuiltinDataTimestamp : BuiltinFunctionsLibImpl::BuiltinData {
using BuiltinFunctionsLibImpl::BuiltinData::BuiltinData;
~MockBuiltinDataTimestamp() override {
module.release();
}
};
struct MockBuiltinFunctionsLibImplTimestamps : BuiltinFunctionsLibImpl {
@ -57,7 +53,8 @@ struct MockBuiltinFunctionsLibImplTimestamps : BuiltinFunctionsLibImpl {
auto builtInCode = builtInsLib->getBuiltinsLib().getBuiltinCode(builtin, builtInCodeType, *device->getNEODevice());
[[maybe_unused]] ze_result_t res;
std::unique_ptr<Module> module;
Module *module;
ze_module_handle_t moduleHandle;
ze_module_desc_t moduleDesc = {};
moduleDesc.format = builtInCode.type == BuiltInCodeType::Binary ? ZE_MODULE_FORMAT_NATIVE : ZE_MODULE_FORMAT_IL_SPIRV;
@ -65,8 +62,7 @@ struct MockBuiltinFunctionsLibImplTimestamps : BuiltinFunctionsLibImpl {
moduleDesc.inputSize = builtInCode.resource.size();
res = device->createModule(&moduleDesc, &moduleHandle, nullptr, ModuleType::Builtin);
UNRECOVERABLE_IF(res != ZE_RESULT_SUCCESS);
module.reset(Module::fromHandle(moduleHandle));
module = Module::fromHandle(moduleHandle);
std::unique_ptr<Kernel> kernel;
ze_kernel_handle_t kernelHandle;
@ -76,7 +72,7 @@ struct MockBuiltinFunctionsLibImplTimestamps : BuiltinFunctionsLibImpl {
DEBUG_BREAK_IF(res != ZE_RESULT_SUCCESS);
kernel.reset(Kernel::fromHandle(kernelHandle));
return std::unique_ptr<BuiltinData>(new MockBuiltinDataTimestamp{std::move(module), std::move(kernel)});
return std::unique_ptr<BuiltinData>(new MockBuiltinDataTimestamp{module, std::move(kernel)});
}
};

View File

@ -43,7 +43,6 @@ HWTEST_F(BuiltInTestL0, givenDeviceWithUnregisteredBinaryBuiltinWhenGettingBuilt
pDevice->getRootDeviceEnvironment().getMutableHardwareInfo()->platform.usRevId += 0xdead;
L0::BuiltinFunctionsLibImpl builtinFunctionsLib{&deviceL0, pDevice->getBuiltIns()};
for (uint32_t builtId = 0; builtId < static_cast<uint32_t>(L0::Builtin::COUNT); builtId++) {
deviceL0.formatForModule = {};
ASSERT_NE(nullptr, builtinFunctionsLib.getFunction(static_cast<L0::Builtin>(builtId)));
EXPECT_EQ(ZE_MODULE_FORMAT_NATIVE, deviceL0.formatForModule);
}

View File

@ -8,6 +8,8 @@
#include "shared/source/built_ins/built_ins.h"
#include "shared/source/execution_environment/root_device_environment.h"
#include "shared/test/common/helpers/debug_manager_state_restore.h"
#include "shared/test/common/helpers/memory_management.h"
#include "shared/test/common/helpers/ult_hw_config.h"
#include "shared/test/common/mocks/mock_compiler_interface_spirv.h"
#include "shared/test/common/test_macros/hw_test.h"
@ -26,21 +28,27 @@ class BuiltinFunctionsLibFixture : public DeviceFixture {
public:
struct MockBuiltinFunctionsLibImpl : BuiltinFunctionsLibImpl {
using BuiltinFunctionsLibImpl::builtins;
using BuiltinFunctionsLibImpl::ensureInitCompletion;
using BuiltinFunctionsLibImpl::getFunction;
using BuiltinFunctionsLibImpl::imageBuiltins;
MockBuiltinFunctionsLibImpl(L0::Device *device, NEO::BuiltIns *builtInsLib) : BuiltinFunctionsLibImpl(device, builtInsLib) {}
using BuiltinFunctionsLibImpl::initAsyncComplete;
MockBuiltinFunctionsLibImpl(L0::Device *device, NEO::BuiltIns *builtInsLib) : BuiltinFunctionsLibImpl(device, builtInsLib) {
mockModule = std::unique_ptr<Module>(new Mock<Module>(device, nullptr));
}
std::unique_ptr<BuiltinData> loadBuiltIn(NEO::EBuiltInOps::Type builtin, const char *builtInName) override {
std::unique_ptr<Kernel> mockKernel(new Mock<::L0::Kernel>());
std::unique_ptr<Module> mockModule(new Mock<Module>(device, nullptr));
return std::unique_ptr<BuiltinData>(new BuiltinData{std::move(mockModule), std::move(mockKernel)});
return std::unique_ptr<BuiltinData>(new BuiltinData{mockModule.get(), std::move(mockKernel)});
}
std::unique_ptr<Module> mockModule;
};
void setUp() {
DeviceFixture::setUp();
mockDevicePtr = std::unique_ptr<MockDeviceForSpv<false, false>>(new MockDeviceForSpv<false, false>(device->getNEODevice(), device->getNEODevice()->getExecutionEnvironment(), driverHandle.get()));
mockBuiltinFunctionsLibImpl.reset(new MockBuiltinFunctionsLibImpl(mockDevicePtr.get(), neoDevice->getBuiltIns()));
mockBuiltinFunctionsLibImpl->ensureInitCompletion();
EXPECT_TRUE(mockBuiltinFunctionsLibImpl->initAsyncComplete);
}
void tearDown() {
mockBuiltinFunctionsLibImpl.reset();
@ -108,6 +116,36 @@ HWTEST_F(TestBuiltinFunctionsLibImpl, givenCallToBuiltinFunctionWithWrongIdThenE
EXPECT_THROW(mockBuiltinFunctionsLibImpl->initBuiltinKernel(static_cast<L0::Builtin>(builtId)), std::exception);
}
HWTEST_F(TestBuiltinFunctionsLibImpl, whenCreateBuiltinFunctionsLibThenImmediateFillIsLoaded) {
struct MockBuiltinFunctionsLibImpl : public BuiltinFunctionsLibImpl {
using BuiltinFunctionsLibImpl::BuiltinFunctionsLibImpl;
using BuiltinFunctionsLibImpl::builtins;
using BuiltinFunctionsLibImpl::ensureInitCompletion;
using BuiltinFunctionsLibImpl::initAsyncComplete;
};
EXPECT_TRUE(mockBuiltinFunctionsLibImpl->initAsyncComplete);
VariableBackup<UltHwConfig> backup(&ultHwConfig);
ultHwConfig.useinitBuiltinsAsyncEnabled = true;
MockBuiltinFunctionsLibImpl lib(device, device->getNEODevice()->getBuiltIns());
EXPECT_FALSE(lib.initAsyncComplete);
lib.ensureInitCompletion();
EXPECT_TRUE(lib.initAsyncComplete);
for (uint32_t builtId = 0; builtId < static_cast<uint32_t>(Builtin::COUNT); builtId++) {
if (builtId == static_cast<uint32_t>(Builtin::FillBufferImmediate)) {
EXPECT_NE(nullptr, lib.builtins[builtId]);
} else {
EXPECT_EQ(nullptr, lib.builtins[builtId]);
}
}
uint32_t builtId = static_cast<uint32_t>(Builtin::COUNT) + 1;
EXPECT_THROW(lib.initBuiltinKernel(static_cast<L0::Builtin>(builtId)), std::exception);
/* std::async may create a detached thread - completion of the scheduled task can be ensured,
but there is no way to ensure that actual OS thread exited and its resources are freed */
MemoryManagement::fastLeaksDetectionMode = MemoryManagement::LeakDetectionMode::TURN_OFF_LEAK_DETECTION;
}
HWTEST_F(TestBuiltinFunctionsLibImpl, givenCompilerInterfaceWhenCreateDeviceAndImageSupportedThenBuiltinsImageFunctionsAreLoaded) {
ze_result_t returnValue = ZE_RESULT_SUCCESS;
neoDevice->getExecutionEnvironment()->rootDeviceEnvironments[neoDevice->getRootDeviceIndex()]->compilerInterface.reset(new NEO::MockCompilerInterfaceSpirv());

View File

@ -7,6 +7,7 @@
#include "shared/source/built_ins/built_ins.h"
#include "shared/source/compiler_interface/compiler_cache.h"
#include "shared/test/common/helpers/ult_hw_config.h"
#include "level_zero/core/source/builtin/builtin_functions_lib_impl.h"
#include "level_zero/core/test/unit_tests/mocks/mock_builtin_functions_lib_impl.h"
@ -18,4 +19,8 @@ std::unique_ptr<BuiltinFunctionsLib> BuiltinFunctionsLib::create(Device *device,
return std::unique_ptr<BuiltinFunctionsLib>(new ult::MockBuiltinFunctionsLibImpl(device, builtins));
}
bool BuiltinFunctionsLibImpl::initBuiltinsAsyncEnabled() {
return NEO::ultHwConfig.useinitBuiltinsAsyncEnabled;
}
} // namespace L0

View File

@ -329,7 +329,7 @@ HWTEST2_F(AppendQueryKernelTimestamps, givenCommandListWhenAppendQueryKernelTime
neoDevice->incRefInternal();
execEnvironment = device->getExecEnvironment();
driverHandle = device->getDriverHandle();
tmpMockBultinLib = new MockBuiltinFunctionsForQueryKernelTimestamps{nullptr, nullptr};
tmpMockBultinLib = new MockBuiltinFunctionsForQueryKernelTimestamps{this, device->getNEODevice()->getBuiltIns()};
}
MockBuiltinFunctionsForQueryKernelTimestamps *getBuiltinFunctionsLib() override {
return tmpMockBultinLib;
@ -414,7 +414,7 @@ HWTEST2_F(AppendQueryKernelTimestamps, givenCommandListWhenAppendQueryKernelTime
neoDevice->incRefInternal();
execEnvironment = device->getExecEnvironment();
driverHandle = device->getDriverHandle();
tmpMockBultinLib = new MockBuiltinFunctionsForQueryKernelTimestamps{nullptr, nullptr};
tmpMockBultinLib = new MockBuiltinFunctionsForQueryKernelTimestamps{this, device->getNEODevice()->getBuiltIns()};
}
MockBuiltinFunctionsForQueryKernelTimestamps *getBuiltinFunctionsLib() override {
return tmpMockBultinLib;
@ -498,7 +498,7 @@ HWTEST2_F(AppendQueryKernelTimestamps, givenEventWhenAppendQueryIsCalledThenSetA
neoDevice->incRefInternal();
execEnvironment = device->getExecEnvironment();
driverHandle = device->getDriverHandle();
tmpMockBultinLib = std::make_unique<MockBuiltinFunctionsForQueryKernelTimestamps>(this, nullptr);
tmpMockBultinLib = std::make_unique<MockBuiltinFunctionsForQueryKernelTimestamps>(this, device->getNEODevice()->getBuiltIns());
}
MockBuiltinFunctionsForQueryKernelTimestamps *getBuiltinFunctionsLib() override {
return tmpMockBultinLib.get();

View File

@ -27,7 +27,7 @@ target_sources(${TARGET_NAME} PRIVATE
${NEO_SOURCE_DIR}/level_zero/core/test/unit_tests/main.cpp
${NEO_SOURCE_DIR}/level_zero/core/test/unit_tests/mock.h
${NEO_SOURCE_DIR}/level_zero/core/test/unit_tests/white_box.h
${NEO_SOURCE_DIR}/level_zero/core/source/dll/create_builtin_functions_lib.cpp
${NEO_SOURCE_DIR}/level_zero/core/test/unit_tests/sources/builtin/create_ult_builtin_functions_lib.cpp
${NEO_SOURCE_DIR}/level_zero/tools/test/unit_tests/sources/debug/debug_session_helper.cpp
${NEO_SHARED_TEST_DIRECTORY}/common/tests_configuration.h
)

View File

@ -36,7 +36,7 @@ void NEO::BaseUltConfigListener::OnTestEnd(const ::testing::TestInfo &) {
// Ensure that global state is restored
UltHwConfig expectedState{};
static_assert(sizeof(UltHwConfig) == 14 * sizeof(bool), ""); // Ensure that there is no internal padding
static_assert(sizeof(UltHwConfig) == 15 * sizeof(bool), ""); // Ensure that there is no internal padding
EXPECT_EQ(0, memcmp(&expectedState, &ultHwConfig, sizeof(UltHwConfig)));
EXPECT_EQ(0, memcmp(&referencedHwInfo.platform, &defaultHwInfo->platform, sizeof(PLATFORM)));

View File

@ -12,6 +12,7 @@ struct UltHwConfig {
bool useHwCsr = false;
bool useMockedPrepareDeviceEnvironmentsFunc = true;
bool forceOsAgnosticMemoryManager = true;
bool useinitBuiltinsAsyncEnabled = false;
bool useWaitForTimestamps = false;
bool useBlitSplit = false;
bool useFirstSubmissionInitDevice = false;