From 0f42ef1ed769a75fd19e5b640e3c35fd8e00e50a Mon Sep 17 00:00:00 2001 From: Mateusz Hoppe Date: Thu, 12 Nov 2020 19:53:30 +0100 Subject: [PATCH] Differentiate between users ISA and internal ISA allocation Related-To: NEO-5240 Signed-off-by: Mateusz Hoppe --- level_zero/api/core/ze_module.cpp | 1 + .../builtin/builtin_functions_lib_impl.cpp | 2 +- .../core/source/context/context_imp.cpp | 2 +- level_zero/core/source/device/device.h | 5 ++- level_zero/core/source/device/device_imp.cpp | 4 +- level_zero/core/source/device/device_imp.h | 3 +- level_zero/core/source/kernel/kernel.h | 2 +- level_zero/core/source/kernel/kernel_imp.cpp | 5 ++- level_zero/core/source/module/module.h | 10 +++-- level_zero/core/source/module/module_imp.cpp | 11 ++--- level_zero/core/source/module/module_imp.h | 3 +- .../test/unit_tests/fixtures/module_fixture.h | 6 +-- .../core/test/unit_tests/mocks/mock_device.h | 3 +- .../test/unit_tests/mocks/mock_module.cpp | 2 +- .../core/test/unit_tests/mocks/mock_module.h | 4 +- .../builtin/builtin_functions_tests.cpp | 34 +++++++++++++-- .../debugger/test_module_with_debug.cpp | 6 +-- .../unit_tests/sources/kernel/test_kernel.cpp | 32 +++++++++++++-- .../unit_tests/sources/module/test_module.cpp | 41 ++++++++++++++----- opencl/source/helpers/built_ins_helper.cpp | 2 +- opencl/source/kernel/kernel.cpp | 2 +- opencl/source/program/kernel_info.cpp | 5 ++- opencl/source/program/kernel_info.h | 2 +- .../source/program/process_device_binary.cpp | 2 +- opencl/source/utilities/logger.cpp | 2 + .../command_queue/blit_enqueue_tests.cpp | 6 +-- .../aub_command_stream_receiver_1_tests.cpp | 1 + .../gen12lp/device_queue_tests_gen12lp.cpp | 2 +- opencl/test/unit_test/kernel/kernel_tests.cpp | 6 +-- .../kernel/substitute_kernel_heap_tests.cpp | 8 ++-- .../graphics_allocation_tests.cpp | 4 ++ ...nager_allocate_in_preferred_pool_tests.inl | 4 ++ .../memory_manager/memory_manager_tests.cpp | 8 ++++ .../linux/drm_memory_manager_tests.cpp | 23 +++++++++++ .../unit_test/program/kernel_info_tests.cpp | 38 +++++++++++++++-- .../unit_test/utilities/file_logger_tests.cpp | 1 + shared/source/aub/aub_helper.h | 1 + .../command_stream_receiver.cpp | 2 +- shared/source/helpers/heap_assigner.cpp | 2 +- .../memory_manager/graphics_allocation.h | 7 ++++ .../source/memory_manager/memory_manager.cpp | 3 +- ...ry_manager_allocate_in_device_pool_dg1.cpp | 1 + .../heap_assigner/heap_assigner_tests.cpp | 7 +++- 43 files changed, 246 insertions(+), 69 deletions(-) diff --git a/level_zero/api/core/ze_module.cpp b/level_zero/api/core/ze_module.cpp index 69a88c7e95..39c3356b61 100644 --- a/level_zero/api/core/ze_module.cpp +++ b/level_zero/api/core/ze_module.cpp @@ -5,6 +5,7 @@ * */ +#include "level_zero/core/source/cmdlist/cmdlist.h" #include "level_zero/core/source/module/module.h" #include diff --git a/level_zero/core/source/builtin/builtin_functions_lib_impl.cpp b/level_zero/core/source/builtin/builtin_functions_lib_impl.cpp index 43f98e00b4..531b89f31e 100644 --- a/level_zero/core/source/builtin/builtin_functions_lib_impl.cpp +++ b/level_zero/core/source/builtin/builtin_functions_lib_impl.cpp @@ -158,7 +158,7 @@ std::unique_ptr BuiltinFunctionsLibImpl::l moduleDesc.format = builtInCode.type == BuiltInCodeType::Binary ? ZE_MODULE_FORMAT_NATIVE : ZE_MODULE_FORMAT_IL_SPIRV; moduleDesc.pInputModule = reinterpret_cast(&builtInCode.resource[0]); moduleDesc.inputSize = builtInCode.resource.size(); - res = device->createModule(&moduleDesc, &moduleHandle, nullptr); + res = device->createModule(&moduleDesc, &moduleHandle, nullptr, ModuleType::Builtin); UNRECOVERABLE_IF(res != ZE_RESULT_SUCCESS); module.reset(Module::fromHandle(moduleHandle)); diff --git a/level_zero/core/source/context/context_imp.cpp b/level_zero/core/source/context/context_imp.cpp index bb8dc356c9..cd21ac5234 100644 --- a/level_zero/core/source/context/context_imp.cpp +++ b/level_zero/core/source/context/context_imp.cpp @@ -170,7 +170,7 @@ ze_result_t ContextImp::createModule(ze_device_handle_t hDevice, const ze_module_desc_t *desc, ze_module_handle_t *phModule, ze_module_build_log_handle_t *phBuildLog) { - return L0::Device::fromHandle(hDevice)->createModule(desc, phModule, phBuildLog); + return L0::Device::fromHandle(hDevice)->createModule(desc, phModule, phBuildLog, ModuleType::User); } ze_result_t ContextImp::createSampler(ze_device_handle_t hDevice, diff --git a/level_zero/core/source/device/device.h b/level_zero/core/source/device/device.h index 8c3eeea2d1..3a16031ffa 100644 --- a/level_zero/core/source/device/device.h +++ b/level_zero/core/source/device/device.h @@ -16,6 +16,7 @@ #include "level_zero/core/source/debugger/debugger_l0.h" #include "level_zero/core/source/driver/driver.h" #include "level_zero/core/source/driver/driver_handle.h" +#include "level_zero/core/source/module/module.h" #include #include @@ -36,6 +37,8 @@ struct ExecutionEnvironment; struct MetricContext; struct SysmanDevice; +enum class ModuleType; + struct Device : _ze_device_handle_t { virtual uint32_t getRootDeviceIndex() = 0; virtual ze_result_t canAccessPeer(ze_device_handle_t hPeerDevice, ze_bool_t *value) = 0; @@ -51,7 +54,7 @@ struct Device : _ze_device_handle_t { virtual ze_result_t createImage(const ze_image_desc_t *desc, ze_image_handle_t *phImage) = 0; virtual ze_result_t createModule(const ze_module_desc_t *desc, ze_module_handle_t *module, - ze_module_build_log_handle_t *buildLog) = 0; + ze_module_build_log_handle_t *buildLog, ModuleType type) = 0; virtual ze_result_t createSampler(const ze_sampler_desc_t *pDesc, ze_sampler_handle_t *phSampler) = 0; virtual ze_result_t getComputeProperties(ze_device_compute_properties_t *pComputeProperties) = 0; diff --git a/level_zero/core/source/device/device_imp.cpp b/level_zero/core/source/device/device_imp.cpp index 06dc4304f6..bd7dd8c75b 100644 --- a/level_zero/core/source/device/device_imp.cpp +++ b/level_zero/core/source/device/device_imp.cpp @@ -198,14 +198,14 @@ ze_result_t DeviceImp::createSampler(const ze_sampler_desc_t *desc, } ze_result_t DeviceImp::createModule(const ze_module_desc_t *desc, ze_module_handle_t *module, - ze_module_build_log_handle_t *buildLog) { + ze_module_build_log_handle_t *buildLog, ModuleType type) { ModuleBuildLog *moduleBuildLog = nullptr; if (buildLog) { moduleBuildLog = ModuleBuildLog::create(); *buildLog = moduleBuildLog->toHandle(); } - auto modulePtr = Module::create(this, desc, moduleBuildLog); + auto modulePtr = Module::create(this, desc, moduleBuildLog, type); if (modulePtr == nullptr) { return ZE_RESULT_ERROR_MODULE_BUILD_FAILURE; } diff --git a/level_zero/core/source/device/device_imp.h b/level_zero/core/source/device/device_imp.h index db8f2be70b..bc213442c7 100644 --- a/level_zero/core/source/device/device_imp.h +++ b/level_zero/core/source/device/device_imp.h @@ -11,6 +11,7 @@ #include "level_zero/core/source/cmdlist/cmdlist.h" #include "level_zero/core/source/device/device.h" #include "level_zero/core/source/driver/driver_handle.h" +#include "level_zero/core/source/module/module.h" #include "level_zero/tools/source/metrics/metric.h" namespace L0 { @@ -26,7 +27,7 @@ struct DeviceImp : public Device { ze_command_queue_handle_t *commandQueue) override; ze_result_t createImage(const ze_image_desc_t *desc, ze_image_handle_t *phImage) override; ze_result_t createModule(const ze_module_desc_t *desc, ze_module_handle_t *module, - ze_module_build_log_handle_t *buildLog) override; + ze_module_build_log_handle_t *buildLog, ModuleType type) override; ze_result_t createSampler(const ze_sampler_desc_t *pDesc, ze_sampler_handle_t *phSampler) override; ze_result_t getComputeProperties(ze_device_compute_properties_t *pComputeProperties) override; diff --git a/level_zero/core/source/kernel/kernel.h b/level_zero/core/source/kernel/kernel.h index f452f99aa8..113b21e163 100644 --- a/level_zero/core/source/kernel/kernel.h +++ b/level_zero/core/source/kernel/kernel.h @@ -35,7 +35,7 @@ struct KernelImmutableData { void initialize(NEO::KernelInfo *kernelInfo, NEO::MemoryManager &memoryManager, const NEO::Device *device, uint32_t computeUnitsUsedForSratch, - NEO::GraphicsAllocation *globalConstBuffer, NEO::GraphicsAllocation *globalVarBuffer); + NEO::GraphicsAllocation *globalConstBuffer, NEO::GraphicsAllocation *globalVarBuffer, bool internalKernel); const std::vector &getResidencyContainer() const { return residencyContainer; diff --git a/level_zero/core/source/kernel/kernel_imp.cpp b/level_zero/core/source/kernel/kernel_imp.cpp index e5cbc09df4..44466c9acf 100644 --- a/level_zero/core/source/kernel/kernel_imp.cpp +++ b/level_zero/core/source/kernel/kernel_imp.cpp @@ -97,14 +97,15 @@ inline void patchWithImplicitSurface(ArrayRef crossThreadData, ArrayRef void KernelImmutableData::initialize(NEO::KernelInfo *kernelInfo, NEO::MemoryManager &memoryManager, const NEO::Device *device, uint32_t computeUnitsUsedForSratch, NEO::GraphicsAllocation *globalConstBuffer, - NEO::GraphicsAllocation *globalVarBuffer) { + NEO::GraphicsAllocation *globalVarBuffer, bool internalKernel) { UNRECOVERABLE_IF(kernelInfo == nullptr); this->kernelDescriptor = &kernelInfo->kernelDescriptor; auto kernelIsaSize = kernelInfo->heapInfo.KernelHeapSize; + const auto allocType = internalKernel ? NEO::GraphicsAllocation::AllocationType::KERNEL_ISA_INTERNAL : NEO::GraphicsAllocation::AllocationType::KERNEL_ISA; auto allocation = memoryManager.allocateGraphicsMemoryWithProperties( - {device->getRootDeviceIndex(), kernelIsaSize, NEO::GraphicsAllocation::AllocationType::KERNEL_ISA, device->getDeviceBitfield()}); + {device->getRootDeviceIndex(), kernelIsaSize, allocType, device->getDeviceBitfield()}); UNRECOVERABLE_IF(allocation == nullptr); auto &hwInfo = device->getHardwareInfo(); diff --git a/level_zero/core/source/module/module.h b/level_zero/core/source/module/module.h index 85ca61d025..af85be3d4e 100644 --- a/level_zero/core/source/module/module.h +++ b/level_zero/core/source/module/module.h @@ -7,7 +7,6 @@ #pragma once -#include "level_zero/core/source/cmdlist/cmdlist.h" #include "level_zero/core/source/context/context.h" #include "level_zero/core/source/kernel/kernel.h" #include "level_zero/core/source/module/module_build_log.h" @@ -21,9 +20,14 @@ struct _ze_module_handle_t {}; namespace L0 { struct Device; +enum class ModuleType { + Builtin, + User +}; + struct Module : _ze_module_handle_t { - static Module *create(Device *device, const ze_module_desc_t *desc, - ModuleBuildLog *moduleBuildLog); + + static Module *create(Device *device, const ze_module_desc_t *desc, ModuleBuildLog *moduleBuildLog, ModuleType type); virtual ~Module() = default; diff --git a/level_zero/core/source/module/module_imp.cpp b/level_zero/core/source/module/module_imp.cpp index e1312bc5d7..2773b2188d 100644 --- a/level_zero/core/source/module/module_imp.cpp +++ b/level_zero/core/source/module/module_imp.cpp @@ -285,9 +285,9 @@ void ModuleTranslationUnit::processDebugData() { } } -ModuleImp::ModuleImp(Device *device, ModuleBuildLog *moduleBuildLog) +ModuleImp::ModuleImp(Device *device, ModuleBuildLog *moduleBuildLog, ModuleType type) : device(device), translationUnit(std::make_unique(device)), - moduleBuildLog(moduleBuildLog) { + moduleBuildLog(moduleBuildLog), type(type) { productFamily = device->getHwInfo().platform.eProductFamily; } @@ -340,7 +340,8 @@ bool ModuleImp::initialize(const ze_module_desc_t *desc, NEO::Device *neoDevice) kernelImmData->initialize(ki, *(device->getNEODevice()->getMemoryManager()), device->getNEODevice(), device->getNEODevice()->getDeviceInfo().computeUnitsUsedForScratch, - this->translationUnit->globalConstBuffer, this->translationUnit->globalVarBuffer); + this->translationUnit->globalConstBuffer, this->translationUnit->globalVarBuffer, + this->type == ModuleType::Builtin); kernelImmDatas.push_back(std::move(kernelImmData)); } this->maxGroupSize = static_cast(this->translationUnit->device->getNEODevice()->getDeviceInfo().maxWorkGroupSize); @@ -522,8 +523,8 @@ ze_result_t ModuleImp::getGlobalPointer(const char *pGlobalName, void **pPtr) { } Module *Module::create(Device *device, const ze_module_desc_t *desc, - ModuleBuildLog *moduleBuildLog) { - auto module = new ModuleImp(device, moduleBuildLog); + ModuleBuildLog *moduleBuildLog, ModuleType type) { + auto module = new ModuleImp(device, moduleBuildLog, type); bool success = module->initialize(desc, device->getNEODevice()); if (success == false) { diff --git a/level_zero/core/source/module/module_imp.h b/level_zero/core/source/module/module_imp.h index ec4f36685f..825d790f1a 100644 --- a/level_zero/core/source/module/module_imp.h +++ b/level_zero/core/source/module/module_imp.h @@ -66,7 +66,7 @@ struct ModuleTranslationUnit { struct ModuleImp : public Module { ModuleImp() = delete; - ModuleImp(Device *device, ModuleBuildLog *moduleBuildLog); + ModuleImp(Device *device, ModuleBuildLog *moduleBuildLog, ModuleType type); ~ModuleImp() override; @@ -127,6 +127,7 @@ struct ModuleImp : public Module { NEO::Linker::RelocatedSymbolsMap symbols; bool debugEnabled = false; bool isFullyLinked = false; + ModuleType type; NEO::Linker::UnresolvedExternals unresolvedExternalsInfo{}; std::set importedSymbolAllocations{}; }; diff --git a/level_zero/core/test/unit_tests/fixtures/module_fixture.h b/level_zero/core/test/unit_tests/fixtures/module_fixture.h index 24f1fb74a2..bf2ee232eb 100644 --- a/level_zero/core/test/unit_tests/fixtures/module_fixture.h +++ b/level_zero/core/test/unit_tests/fixtures/module_fixture.h @@ -23,7 +23,7 @@ struct ModuleFixture : public DeviceFixture { createModuleFromBinary(); } - void createModuleFromBinary() { + void createModuleFromBinary(ModuleType type = ModuleType::User) { std::string testFile; retrieveBinaryKernelFilename(testFile, binaryFilename + "_", ".bin"); @@ -42,7 +42,7 @@ struct ModuleFixture : public DeviceFixture { ModuleBuildLog *moduleBuildLog = nullptr; - module.reset(Module::create(device, &moduleDesc, moduleBuildLog)); + module.reset(Module::create(device, &moduleDesc, moduleBuildLog, type)); } void createKernel() { @@ -91,7 +91,7 @@ struct MultiDeviceModuleFixture : public MultiDeviceFixture { auto device = driverHandle->devices[rootDeviceIndex]; modules[rootDeviceIndex].reset(Module::create(device, &moduleDesc, - moduleBuildLog)); + moduleBuildLog, ModuleType::User)); } void TearDown() override { diff --git a/level_zero/core/test/unit_tests/mocks/mock_device.h b/level_zero/core/test/unit_tests/mocks/mock_device.h index dac0655494..1dddb296dd 100644 --- a/level_zero/core/test/unit_tests/mocks/mock_device.h +++ b/level_zero/core/test/unit_tests/mocks/mock_device.h @@ -69,7 +69,8 @@ struct Mock : public Device { createModule, (const ze_module_desc_t *desc, ze_module_handle_t *module, - ze_module_build_log_handle_t *buildLog), + ze_module_build_log_handle_t *buildLog, + ModuleType type), (override)); MOCK_METHOD(ze_result_t, createSampler, diff --git a/level_zero/core/test/unit_tests/mocks/mock_module.cpp b/level_zero/core/test/unit_tests/mocks/mock_module.cpp index cfec10d99a..978d9e4ecc 100644 --- a/level_zero/core/test/unit_tests/mocks/mock_module.cpp +++ b/level_zero/core/test/unit_tests/mocks/mock_module.cpp @@ -12,7 +12,7 @@ using ::testing::Return; namespace L0 { namespace ult { -Mock::Mock(::L0::Device *device, ModuleBuildLog *moduleBuildLog) : WhiteBox(device, moduleBuildLog) { EXPECT_CALL(*this, getMaxGroupSize).WillRepeatedly(Return(256u)); } +Mock::Mock(::L0::Device *device, ModuleBuildLog *moduleBuildLog, ModuleType type) : WhiteBox(device, moduleBuildLog, type) { EXPECT_CALL(*this, getMaxGroupSize).WillRepeatedly(Return(256u)); } } // namespace ult } // namespace L0 diff --git a/level_zero/core/test/unit_tests/mocks/mock_module.h b/level_zero/core/test/unit_tests/mocks/mock_module.h index 0e24919130..0f2e78da4e 100644 --- a/level_zero/core/test/unit_tests/mocks/mock_module.h +++ b/level_zero/core/test/unit_tests/mocks/mock_module.h @@ -28,6 +28,7 @@ struct WhiteBox<::L0::Module> : public ::L0::ModuleImp { using BaseClass::kernelImmDatas; using BaseClass::symbols; using BaseClass::translationUnit; + using BaseClass::type; using BaseClass::unresolvedExternalsInfo; }; @@ -35,7 +36,8 @@ using Module = WhiteBox<::L0::Module>; template <> struct Mock : public Module { - Mock(::L0::Device *device, ModuleBuildLog *moduleBuildLog); + Mock(::L0::Device *device, ModuleBuildLog *moduleBuildLog, ModuleType type); + Mock(::L0::Device *device, ModuleBuildLog *moduleBuildLog) : Mock(device, moduleBuildLog, ModuleType::User){}; MOCK_METHOD(ze_result_t, createKernel, (const ze_kernel_desc_t *desc, ze_kernel_handle_t *phFunction), (override)); MOCK_METHOD(ze_result_t, destroy, (), (override)); diff --git a/level_zero/core/test/unit_tests/sources/builtin/builtin_functions_tests.cpp b/level_zero/core/test/unit_tests/sources/builtin/builtin_functions_tests.cpp index 5788204ce1..6e89ddf613 100644 --- a/level_zero/core/test/unit_tests/sources/builtin/builtin_functions_tests.cpp +++ b/level_zero/core/test/unit_tests/sources/builtin/builtin_functions_tests.cpp @@ -90,7 +90,7 @@ HWTEST_F(TestBuiltinFunctionsLibImpl, givenCompilerInterfaceWhenCreateDeviceThen HWTEST_F(TestBuiltinFunctionsLibImpl, givenRebuildPrecompiledKernelsDebugFlagWhenInitFuctionsThenIntermediateCodeForBuiltinsIsRequested) { struct MockDeviceForRebuildBuilins : public Mock { struct MockModuleForRebuildBuiltins : public ModuleImp { - MockModuleForRebuildBuiltins(Device *device) : ModuleImp(device, nullptr) {} + MockModuleForRebuildBuiltins(Device *device) : ModuleImp(device, nullptr, ModuleType::Builtin) {} ze_result_t createKernel(const ze_kernel_desc_t *desc, ze_kernel_handle_t *phFunction) override { @@ -106,7 +106,7 @@ HWTEST_F(TestBuiltinFunctionsLibImpl, givenRebuildPrecompiledKernelsDebugFlagWhe ze_result_t createModule(const ze_module_desc_t *desc, ze_module_handle_t *module, - ze_module_build_log_handle_t *buildLog) override { + ze_module_build_log_handle_t *buildLog, ModuleType type) override { EXPECT_EQ(desc->format, ZE_MODULE_FORMAT_IL_SPIRV); EXPECT_GT(desc->inputSize, 0u); EXPECT_NE(desc->pInputModule, nullptr); @@ -138,13 +138,13 @@ HWTEST_F(TestBuiltinFunctionsLibImpl, givenNotToRebuildPrecompiledKernelsDebugFl ze_result_t createModule(const ze_module_desc_t *desc, ze_module_handle_t *module, - ze_module_build_log_handle_t *buildLog) override { + ze_module_build_log_handle_t *buildLog, ModuleType type) override { EXPECT_EQ(desc->format, ZE_MODULE_FORMAT_NATIVE); EXPECT_GT(desc->inputSize, 0u); EXPECT_NE(desc->pInputModule, nullptr); wasCreatedModuleCalled = true; - return DeviceImp::createModule(desc, module, buildLog); + return DeviceImp::createModule(desc, module, buildLog, type); } bool wasCreatedModuleCalled = false; @@ -159,5 +159,31 @@ HWTEST_F(TestBuiltinFunctionsLibImpl, givenNotToRebuildPrecompiledKernelsDebugFl EXPECT_TRUE(testDevice.wasCreatedModuleCalled); } +HWTEST_F(TestBuiltinFunctionsLibImpl, GivenBuiltinsWhenInitializingFunctionsThenModulesWithProperTypeAreCreated) { + struct MockDeviceWithBuilins : public Mock { + MockDeviceWithBuilins(L0::Device *device) : Mock(device->getNEODevice(), static_cast(device->getExecEnvironment())) { + driverHandle = device->getDriverHandle(); + builtins = BuiltinFunctionsLib::create(this, neoDevice->getBuiltIns()); + } + + ze_result_t createModule(const ze_module_desc_t *desc, + ze_module_handle_t *module, + ze_module_build_log_handle_t *buildLog, ModuleType type) override { + + typeCreated = type; + EXPECT_EQ(ModuleType::Builtin, type); + + return DeviceImp::createModule(desc, module, buildLog, type); + } + + ModuleType typeCreated = ModuleType::User; + }; + + MockDeviceWithBuilins testDevice(device); + testDevice.getBuiltinFunctionsLib()->initFunctions(); + + EXPECT_EQ(ModuleType::Builtin, testDevice.typeCreated); +} + } // namespace ult } // namespace L0 diff --git a/level_zero/core/test/unit_tests/sources/debugger/test_module_with_debug.cpp b/level_zero/core/test/unit_tests/sources/debugger/test_module_with_debug.cpp index 250f1b89a1..1c4cb4667b 100644 --- a/level_zero/core/test/unit_tests/sources/debugger/test_module_with_debug.cpp +++ b/level_zero/core/test/unit_tests/sources/debugger/test_module_with_debug.cpp @@ -33,7 +33,7 @@ TEST_F(DeviceWithDebuggerEnabledTest, givenDebuggingEnabledWhenModuleIsCreatedTh ModuleBuildLog *moduleBuildLog = nullptr; - auto module = std::unique_ptr(new L0::ModuleImp(deviceL0, moduleBuildLog)); + auto module = std::unique_ptr(new L0::ModuleImp(deviceL0, moduleBuildLog, ModuleType::User)); ASSERT_NE(nullptr, module.get()); module->initialize(&moduleDesc, device); @@ -54,7 +54,7 @@ TEST_F(DeviceWithDebuggerEnabledTest, GivenDebuggeableKernelWhenModuleIsInitiali ModuleBuildLog *moduleBuildLog = nullptr; - auto module = std::make_unique>(deviceL0, moduleBuildLog); + auto module = std::make_unique>(deviceL0, moduleBuildLog, ModuleType::User); ASSERT_NE(nullptr, module.get()); NEO::PatchTokenBinary::KernelFromPatchtokens kernelTokens; @@ -86,7 +86,7 @@ TEST_F(DeviceWithDebuggerEnabledTest, GivenNonDebuggeableKernelWhenModuleIsIniti ModuleBuildLog *moduleBuildLog = nullptr; - auto module = std::make_unique>(deviceL0, moduleBuildLog); + auto module = std::make_unique>(deviceL0, moduleBuildLog, ModuleType::User); ASSERT_NE(nullptr, module.get()); NEO::PatchTokenBinary::KernelFromPatchtokens kernelTokens; diff --git a/level_zero/core/test/unit_tests/sources/kernel/test_kernel.cpp b/level_zero/core/test/unit_tests/sources/kernel/test_kernel.cpp index 11f9873ac5..108505fd1b 100644 --- a/level_zero/core/test/unit_tests/sources/kernel/test_kernel.cpp +++ b/level_zero/core/test/unit_tests/sources/kernel/test_kernel.cpp @@ -254,7 +254,7 @@ HWTEST_F(KernelPropertiesTests, whenInitializingThenCalculatesProperPrivateSurfa kernelAttributes.simdSize = 8; KernelImmutableData kernelImmutableData(device); - kernelImmutableData.initialize(&kernelInfo, *device->getNEODevice()->getMemoryManager(), device->getNEODevice(), computeUnitsUsedForSratch, nullptr, nullptr); + kernelImmutableData.initialize(&kernelInfo, *device->getNEODevice()->getMemoryManager(), device->getNEODevice(), computeUnitsUsedForSratch, nullptr, nullptr, false); size_t expectedSize = static_cast(kernelAttributes.perHwThreadPrivateMemorySize) * computeUnitsUsedForSratch; EXPECT_GE(expectedSize, kernelImmutableData.getPrivateMemoryGraphicsAllocation()->getUnderlyingBufferSize()); @@ -470,7 +470,7 @@ TEST_F(KernelIsaTests, givenKernelAllocationInLocalMemoryWhenCreatingWithoutAllo auto bcsCsr = device->getNEODevice()->getEngine(aub_stream::EngineType::ENGINE_BCS, false, false).commandStreamReceiver; auto initialTaskCount = bcsCsr->peekTaskCount(); - kernelImmutableData.initialize(&kernelInfo, *device->getNEODevice()->getMemoryManager(), device->getNEODevice(), 0, nullptr, nullptr); + kernelImmutableData.initialize(&kernelInfo, *device->getNEODevice()->getMemoryManager(), device->getNEODevice(), 0, nullptr, nullptr, false); if (kernelImmutableData.getIsaGraphicsAllocation()->isAllocatedInLocalMemoryPool()) { EXPECT_EQ(initialTaskCount + 1, bcsCsr->peekTaskCount()); @@ -496,7 +496,7 @@ TEST_F(KernelIsaTests, givenKernelAllocationInLocalMemoryWhenCreatingWithAllowed auto bcsCsr = device->getNEODevice()->getEngine(aub_stream::EngineType::ENGINE_BCS, false, false).commandStreamReceiver; auto initialTaskCount = bcsCsr->peekTaskCount(); - kernelImmutableData.initialize(&kernelInfo, *device->getNEODevice()->getMemoryManager(), device->getNEODevice(), 0, nullptr, nullptr); + kernelImmutableData.initialize(&kernelInfo, *device->getNEODevice()->getMemoryManager(), device->getNEODevice(), 0, nullptr, nullptr, false); EXPECT_EQ(initialTaskCount, bcsCsr->peekTaskCount()); @@ -520,12 +520,36 @@ TEST_F(KernelIsaTests, givenKernelAllocationInLocalMemoryWhenCreatingWithDisallo auto bcsCsr = device->getNEODevice()->getEngine(aub_stream::EngineType::ENGINE_BCS, false, false).commandStreamReceiver; auto initialTaskCount = bcsCsr->peekTaskCount(); - kernelImmutableData.initialize(&kernelInfo, *device->getNEODevice()->getMemoryManager(), device->getNEODevice(), 0, nullptr, nullptr); + kernelImmutableData.initialize(&kernelInfo, *device->getNEODevice()->getMemoryManager(), device->getNEODevice(), 0, nullptr, nullptr, false); EXPECT_EQ(initialTaskCount, bcsCsr->peekTaskCount()); device->getNEODevice()->getMemoryManager()->freeGraphicsMemory(kernelInfo.kernelAllocation); } +TEST_F(KernelIsaTests, givenKernelInfoWhenInitializingImmutableDataWithInternalIsaThenCorrectAllocationTypeIsUsed) { + uint32_t kernelHeap = 0; + KernelInfo kernelInfo; + kernelInfo.heapInfo.KernelHeapSize = 1; + kernelInfo.heapInfo.pKernelHeap = &kernelHeap; + + KernelImmutableData kernelImmutableData(device); + + kernelImmutableData.initialize(&kernelInfo, *device->getNEODevice()->getMemoryManager(), device->getNEODevice(), 0, nullptr, nullptr, true); + EXPECT_EQ(NEO::GraphicsAllocation::AllocationType::KERNEL_ISA_INTERNAL, kernelImmutableData.getIsaGraphicsAllocation()->getAllocationType()); +} + +TEST_F(KernelIsaTests, givenKernelInfoWhenInitializingImmutableDataWithNonInternalIsaThenCorrectAllocationTypeIsUsed) { + uint32_t kernelHeap = 0; + KernelInfo kernelInfo; + kernelInfo.heapInfo.KernelHeapSize = 1; + kernelInfo.heapInfo.pKernelHeap = &kernelHeap; + + KernelImmutableData kernelImmutableData(device); + + kernelImmutableData.initialize(&kernelInfo, *device->getNEODevice()->getMemoryManager(), device->getNEODevice(), 0, nullptr, nullptr, false); + EXPECT_EQ(NEO::GraphicsAllocation::AllocationType::KERNEL_ISA, kernelImmutableData.getIsaGraphicsAllocation()->getAllocationType()); +} + } // namespace ult } // namespace L0 diff --git a/level_zero/core/test/unit_tests/sources/module/test_module.cpp b/level_zero/core/test/unit_tests/sources/module/test_module.cpp index 6a444981c1..9422979762 100644 --- a/level_zero/core/test/unit_tests/sources/module/test_module.cpp +++ b/level_zero/core/test/unit_tests/sources/module/test_module.cpp @@ -70,6 +70,27 @@ HWTEST_F(ModuleTest, givenNonZeroCountWhenGettingKernelNamesThenNamesAreReturned EXPECT_EQ(ZE_RESULT_SUCCESS, result); } +HWTEST_F(ModuleTest, givenUserModuleTypeWhenCreatingModuleThenCorrectTypeIsSet) { + WhiteBox module(device, nullptr, ModuleType::User); + EXPECT_EQ(ModuleType::User, module.type); +} + +HWTEST_F(ModuleTest, givenBuiltinModuleTypeWhenCreatingModuleThenCorrectTypeIsSet) { + WhiteBox module(device, nullptr, ModuleType::Builtin); + EXPECT_EQ(ModuleType::Builtin, module.type); +} + +HWTEST_F(ModuleTest, givenUserModuleWhenCreatedThenCorrectAllocationTypeIsUsedForIsa) { + createKernel(); + EXPECT_EQ(NEO::GraphicsAllocation::AllocationType::KERNEL_ISA, kernel->getIsaAllocation()->getAllocationType()); +} + +HWTEST_F(ModuleTest, givenBuiltinModuleWhenCreatedThenCorrectAllocationTypeIsUsedForIsa) { + createModuleFromBinary(ModuleType::Builtin); + createKernel(); + EXPECT_EQ(NEO::GraphicsAllocation::AllocationType::KERNEL_ISA_INTERNAL, kernel->getIsaAllocation()->getAllocationType()); +} + using ModuleTestSupport = IsWithinProducts; HWTEST2_F(ModuleTest, givenNonPatchedTokenThenSurfaceBaseAddressIsCorrectlySet, ModuleTestSupport) { @@ -172,7 +193,7 @@ HWTEST_F(ModuleSpecConstantsTests, givenSpecializationConstantsSetInDescriptorTh specConstants.pConstantValues = specConstantsPointerValues.data(); moduleDesc.pConstants = &specConstants; - auto module = new Module(device, nullptr); + auto module = new Module(device, nullptr, ModuleType::User); module->translationUnit.reset(mockTranslationUnit); bool success = module->initialize(&moduleDesc, neoDevice); @@ -211,7 +232,7 @@ HWTEST_F(ModuleLinkingTest, givenFailureDuringLinkingWhenCreatingModuleThenModul moduleDesc.pInputModule = &spirvData; moduleDesc.inputSize = sizeof(spirvData); - Module module(device, nullptr); + Module module(device, nullptr, ModuleType::User); module.translationUnit.reset(mockTranslationUnit); bool success = module.initialize(&moduleDesc, neoDevice); @@ -240,7 +261,7 @@ HWTEST_F(ModuleLinkingTest, givenRemainingUnresolvedSymbolsDuringLinkingWhenCrea moduleDesc.pInputModule = &spirvData; moduleDesc.inputSize = sizeof(spirvData); - Module module(device, nullptr); + Module module(device, nullptr, ModuleType::User); module.translationUnit.reset(mockTranslationUnit); bool success = module.initialize(&moduleDesc, neoDevice); @@ -248,7 +269,7 @@ HWTEST_F(ModuleLinkingTest, givenRemainingUnresolvedSymbolsDuringLinkingWhenCrea EXPECT_FALSE(module.isFullyLinked); } HWTEST_F(ModuleLinkingTest, givenNotFullyLinkedModuleWhenCreatingKernelThenErrorIsReturned) { - Module module(device, nullptr); + Module module(device, nullptr, ModuleType::User); module.isFullyLinked = false; auto retVal = module.createKernel(nullptr, nullptr); @@ -258,8 +279,8 @@ HWTEST_F(ModuleLinkingTest, givenNotFullyLinkedModuleWhenCreatingKernelThenError struct ModuleDynamicLinkTests : public Test { void SetUp() override { Test::SetUp(); - module0 = std::make_unique(device, nullptr); - module1 = std::make_unique(device, nullptr); + module0 = std::make_unique(device, nullptr, ModuleType::User); + module1 = std::make_unique(device, nullptr, ModuleType::User); } std::unique_ptr module0; std::unique_ptr module1; @@ -513,7 +534,7 @@ HWTEST_F(ModuleTranslationUnitTest, GivenRebuildPrecompiledKernelsFlagAndFileWit moduleDesc.pInputModule = reinterpret_cast(src.get()); moduleDesc.inputSize = size; - Module module(device, nullptr); + Module module(device, nullptr, ModuleType::User); MockModuleTU *tu = new MockModuleTU(device); module.translationUnit.reset(tu); @@ -540,7 +561,7 @@ HWTEST_F(ModuleTranslationUnitTest, GivenRebuildPrecompiledKernelsFlagAndFileWit moduleDesc.pInputModule = reinterpret_cast(src.get()); moduleDesc.inputSize = size; - Module module(device, nullptr); + Module module(device, nullptr, ModuleType::User); MockModuleTU *tu = new MockModuleTU(device); module.translationUnit.reset(tu); @@ -608,7 +629,7 @@ TEST(BuildOptions, givenSrcOptionNameInSrcNamesWhenMovingBuildOptionsThenOptionI TEST_F(ModuleTest, givenInternalOptionsWhenBindlessEnabledThenBindlesOptionsPassed) { DebugManagerStateRestore restorer; DebugManager.flags.UseBindlessMode.set(1); - auto module = std::make_unique(device, nullptr); + auto module = std::make_unique(device, nullptr, ModuleType::User); ASSERT_NE(nullptr, module); std::string buildOptions; @@ -623,7 +644,7 @@ TEST_F(ModuleTest, givenInternalOptionsWhenBindlessEnabledThenBindlesOptionsPass TEST_F(ModuleTest, givenInternalOptionsWhenBindlessDisabledThenBindlesOptionsNotPassed) { DebugManagerStateRestore restorer; DebugManager.flags.UseBindlessMode.set(0); - auto module = std::make_unique(device, nullptr); + auto module = std::make_unique(device, nullptr, ModuleType::User); ASSERT_NE(nullptr, module); std::string buildOptions; diff --git a/opencl/source/helpers/built_ins_helper.cpp b/opencl/source/helpers/built_ins_helper.cpp index 3867417a63..0c0b78d37f 100644 --- a/opencl/source/helpers/built_ins_helper.cpp +++ b/opencl/source/helpers/built_ins_helper.cpp @@ -30,7 +30,7 @@ ProgramInfo createProgramInfoForSip(std::vector &binary, size_t size, cons std::tie(decodeError, singleDeviceBinaryFormat) = NEO::decodeSingleDeviceBinary(programInfo, deviceBinary, decodeErrors, decodeWarnings); UNRECOVERABLE_IF(DecodeError::Success != decodeError); - auto success = programInfo.kernelInfos[0]->createKernelAllocation(device); + auto success = programInfo.kernelInfos[0]->createKernelAllocation(device, true); UNRECOVERABLE_IF(!success); return programInfo; diff --git a/opencl/source/kernel/kernel.cpp b/opencl/source/kernel/kernel.cpp index 5217e96302..dbd22761b1 100644 --- a/opencl/source/kernel/kernel.cpp +++ b/opencl/source/kernel/kernel.cpp @@ -744,7 +744,7 @@ void Kernel::substituteKernelHeap(void *newKernelHeap, size_t newKernelHeapSize) } else { memoryManager->checkGpuUsageAndDestroyGraphicsAllocations(pKernelInfo->kernelAllocation); pKernelInfo->kernelAllocation = nullptr; - status = pKernelInfo->createKernelAllocation(getDevice().getDevice()); + status = pKernelInfo->createKernelAllocation(getDevice().getDevice(), isBuiltIn); } UNRECOVERABLE_IF(!status); } diff --git a/opencl/source/program/kernel_info.cpp b/opencl/source/program/kernel_info.cpp index ba66d7f490..108163f60d 100644 --- a/opencl/source/program/kernel_info.cpp +++ b/opencl/source/program/kernel_info.cpp @@ -416,10 +416,11 @@ uint32_t KernelInfo::getConstantBufferSize() const { return patchInfo.dataParameterStream ? patchInfo.dataParameterStream->DataParameterStreamSize : 0; } -bool KernelInfo::createKernelAllocation(const Device &device) { +bool KernelInfo::createKernelAllocation(const Device &device, bool internalIsa) { UNRECOVERABLE_IF(kernelAllocation); auto kernelIsaSize = heapInfo.KernelHeapSize; - kernelAllocation = device.getMemoryManager()->allocateGraphicsMemoryWithProperties({device.getRootDeviceIndex(), kernelIsaSize, GraphicsAllocation::AllocationType::KERNEL_ISA, device.getDeviceBitfield()}); + const auto allocType = internalIsa ? GraphicsAllocation::AllocationType::KERNEL_ISA_INTERNAL : GraphicsAllocation::AllocationType::KERNEL_ISA; + kernelAllocation = device.getMemoryManager()->allocateGraphicsMemoryWithProperties({device.getRootDeviceIndex(), kernelIsaSize, allocType, device.getDeviceBitfield()}); if (!kernelAllocation) { return false; } diff --git a/opencl/source/program/kernel_info.h b/opencl/source/program/kernel_info.h index e55cd9de63..c1f0eebff8 100644 --- a/opencl/source/program/kernel_info.h +++ b/opencl/source/program/kernel_info.h @@ -184,7 +184,7 @@ struct KernelInfo { return -1; } - bool createKernelAllocation(const Device &device); + bool createKernelAllocation(const Device &device, bool internalIsa); void apply(const DeviceInfoKernelPayloadConstants &constants); std::string attributes; diff --git a/opencl/source/program/process_device_binary.cpp b/opencl/source/program/process_device_binary.cpp index b8cab8d2ad..4cdc80a796 100644 --- a/opencl/source/program/process_device_binary.cpp +++ b/opencl/source/program/process_device_binary.cpp @@ -196,7 +196,7 @@ cl_int Program::processProgramInfo(ProgramInfo &src, const ClDevice &clDevice) { for (auto &kernelInfo : this->kernelInfoArray) { cl_int retVal = CL_SUCCESS; if (kernelInfo->heapInfo.KernelHeapSize) { - retVal = kernelInfo->createKernelAllocation(clDevice.getDevice()) ? CL_SUCCESS : CL_OUT_OF_HOST_MEMORY; + retVal = kernelInfo->createKernelAllocation(clDevice.getDevice(), isBuiltIn) ? CL_SUCCESS : CL_OUT_OF_HOST_MEMORY; } if (retVal != CL_SUCCESS) { diff --git a/opencl/source/utilities/logger.cpp b/opencl/source/utilities/logger.cpp index 975abb78c6..c92a2420b8 100644 --- a/opencl/source/utilities/logger.cpp +++ b/opencl/source/utilities/logger.cpp @@ -280,6 +280,8 @@ const char *FileLogger::getAllocationTypeString(GraphicsAllocation c return "INTERNAL_HOST_MEMORY"; case GraphicsAllocation::AllocationType::KERNEL_ISA: return "KERNEL_ISA"; + case GraphicsAllocation::AllocationType::KERNEL_ISA_INTERNAL: + return "KERNEL_ISA_INTERNAL"; case GraphicsAllocation::AllocationType::LINEAR_STREAM: return "LINEAR_STREAM"; case GraphicsAllocation::AllocationType::MAP_ALLOCATION: diff --git a/opencl/test/unit_test/command_queue/blit_enqueue_tests.cpp b/opencl/test/unit_test/command_queue/blit_enqueue_tests.cpp index abc6af996d..7182db7eb5 100644 --- a/opencl/test/unit_test/command_queue/blit_enqueue_tests.cpp +++ b/opencl/test/unit_test/command_queue/blit_enqueue_tests.cpp @@ -1628,7 +1628,7 @@ HWTEST_TEMPLATED_F(BlitCopyTests, givenKernelAllocationInLocalMemoryWhenCreating auto initialTaskCount = bcsMockContext->bcsCsr->peekTaskCount(); - kernelInfo.createKernelAllocation(device->getDevice()); + kernelInfo.createKernelAllocation(device->getDevice(), false); if (kernelInfo.kernelAllocation->isAllocatedInLocalMemoryPool()) { EXPECT_EQ(initialTaskCount + 1, bcsMockContext->bcsCsr->peekTaskCount()); @@ -1650,7 +1650,7 @@ HWTEST_TEMPLATED_F(BlitCopyTests, givenKernelAllocationInLocalMemoryWhenCreating auto initialTaskCount = bcsMockContext->bcsCsr->peekTaskCount(); - kernelInfo.createKernelAllocation(device->getDevice()); + kernelInfo.createKernelAllocation(device->getDevice(), false); EXPECT_EQ(initialTaskCount, bcsMockContext->bcsCsr->peekTaskCount()); @@ -1670,7 +1670,7 @@ HWTEST_TEMPLATED_F(BlitCopyTests, givenKernelAllocationInLocalMemoryWhenCreating auto initialTaskCount = bcsMockContext->bcsCsr->peekTaskCount(); - kernelInfo.createKernelAllocation(device->getDevice()); + kernelInfo.createKernelAllocation(device->getDevice(), false); EXPECT_EQ(initialTaskCount, bcsMockContext->bcsCsr->peekTaskCount()); diff --git a/opencl/test/unit_test/command_stream/aub_command_stream_receiver_1_tests.cpp b/opencl/test/unit_test/command_stream/aub_command_stream_receiver_1_tests.cpp index a08d2b60fe..ae17ae8ccd 100644 --- a/opencl/test/unit_test/command_stream/aub_command_stream_receiver_1_tests.cpp +++ b/opencl/test/unit_test/command_stream/aub_command_stream_receiver_1_tests.cpp @@ -720,6 +720,7 @@ HWTEST_F(AubCommandStreamReceiverTests, givenAubCommandStreamReceiverWhenWriteMe GraphicsAllocation::AllocationType::CONSTANT_SURFACE, GraphicsAllocation::AllocationType::GLOBAL_SURFACE, GraphicsAllocation::AllocationType::KERNEL_ISA, + GraphicsAllocation::AllocationType::KERNEL_ISA_INTERNAL, GraphicsAllocation::AllocationType::PRIVATE_SURFACE, GraphicsAllocation::AllocationType::SCRATCH_SURFACE, GraphicsAllocation::AllocationType::BUFFER, diff --git a/opencl/test/unit_test/gen12lp/device_queue_tests_gen12lp.cpp b/opencl/test/unit_test/gen12lp/device_queue_tests_gen12lp.cpp index d2c0492ca5..9960031cf5 100644 --- a/opencl/test/unit_test/gen12lp/device_queue_tests_gen12lp.cpp +++ b/opencl/test/unit_test/gen12lp/device_queue_tests_gen12lp.cpp @@ -19,7 +19,7 @@ GEN12LPTEST_F(DeviceQueueHwTest, givenDeviceQueueWhenRunningOnCCsThenFfidSkipOff auto device = pContext->getDevice(0); std::unique_ptr mockParentKernel(MockParentKernel::create(*pContext)); KernelInfo *blockInfo = const_cast(mockParentKernel->mockProgram->blockKernelManager->getBlockKernelInfo(0)); - blockInfo->createKernelAllocation(device->getDevice()); + blockInfo->createKernelAllocation(device->getDevice(), false); ASSERT_NE(nullptr, blockInfo->getGraphicsAllocation()); const_cast(blockInfo->patchInfo.threadPayload)->OffsetToSkipSetFFIDGP = 0x1234; diff --git a/opencl/test/unit_test/kernel/kernel_tests.cpp b/opencl/test/unit_test/kernel/kernel_tests.cpp index 98311ec7d1..c7d7052dbb 100644 --- a/opencl/test/unit_test/kernel/kernel_tests.cpp +++ b/opencl/test/unit_test/kernel/kernel_tests.cpp @@ -3068,7 +3068,7 @@ TEST(KernelTest, givenKernelLocalIdGenerationByRuntimeFalseWhenGettingStartOffse threadPayload.OffsetToSkipPerThreadDataLoad = 128u; mockKernel.kernelInfo.patchInfo.threadPayload = &threadPayload; - mockKernel.kernelInfo.createKernelAllocation(device->getDevice()); + mockKernel.kernelInfo.createKernelAllocation(device->getDevice(), false); auto allocationOffset = mockKernel.kernelInfo.getGraphicsAllocation()->getGpuAddressToPatch(); mockKernel.mockKernel->setStartOffset(128); @@ -3086,7 +3086,7 @@ TEST(KernelTest, givenKernelLocalIdGenerationByRuntimeTrueAndLocalIdsUsedWhenGet threadPayload.OffsetToSkipPerThreadDataLoad = 128u; mockKernel.kernelInfo.patchInfo.threadPayload = &threadPayload; - mockKernel.kernelInfo.createKernelAllocation(device->getDevice()); + mockKernel.kernelInfo.createKernelAllocation(device->getDevice(), false); auto allocationOffset = mockKernel.kernelInfo.getGraphicsAllocation()->getGpuAddressToPatch(); mockKernel.mockKernel->setStartOffset(128); @@ -3104,7 +3104,7 @@ TEST(KernelTest, givenKernelLocalIdGenerationByRuntimeFalseAndLocalIdsNotUsedWhe threadPayload.OffsetToSkipPerThreadDataLoad = 128u; mockKernel.kernelInfo.patchInfo.threadPayload = &threadPayload; - mockKernel.kernelInfo.createKernelAllocation(device->getDevice()); + mockKernel.kernelInfo.createKernelAllocation(device->getDevice(), false); auto allocationOffset = mockKernel.kernelInfo.getGraphicsAllocation()->getGpuAddressToPatch(); mockKernel.mockKernel->setStartOffset(128); diff --git a/opencl/test/unit_test/kernel/substitute_kernel_heap_tests.cpp b/opencl/test/unit_test/kernel/substitute_kernel_heap_tests.cpp index f91c30bb46..a1381a9a15 100644 --- a/opencl/test/unit_test/kernel/substitute_kernel_heap_tests.cpp +++ b/opencl/test/unit_test/kernel/substitute_kernel_heap_tests.cpp @@ -23,7 +23,7 @@ TEST_F(KernelSubstituteTest, givenKernelWhenSubstituteKernelHeapWithGreaterSizeT kernel.kernelInfo.heapInfo.KernelHeapSize = initialHeapSize; EXPECT_EQ(nullptr, kernel.kernelInfo.kernelAllocation); - kernel.kernelInfo.createKernelAllocation(*pDevice); + kernel.kernelInfo.createKernelAllocation(*pDevice, false); auto firstAllocation = kernel.kernelInfo.kernelAllocation; EXPECT_NE(nullptr, firstAllocation); auto firstAllocationSize = firstAllocation->getUnderlyingBufferSize(); @@ -53,7 +53,7 @@ TEST_F(KernelSubstituteTest, givenKernelWhenSubstituteKernelHeapWithSameSizeThen kernel.kernelInfo.heapInfo.KernelHeapSize = initialHeapSize; EXPECT_EQ(nullptr, kernel.kernelInfo.kernelAllocation); - kernel.kernelInfo.createKernelAllocation(*pDevice); + kernel.kernelInfo.createKernelAllocation(*pDevice, false); auto firstAllocation = kernel.kernelInfo.kernelAllocation; EXPECT_NE(nullptr, firstAllocation); auto firstAllocationSize = firstAllocation->getUnderlyingBufferSize(); @@ -82,7 +82,7 @@ TEST_F(KernelSubstituteTest, givenKernelWhenSubstituteKernelHeapWithSmallerSizeT kernel.kernelInfo.heapInfo.KernelHeapSize = initialHeapSize; EXPECT_EQ(nullptr, kernel.kernelInfo.kernelAllocation); - kernel.kernelInfo.createKernelAllocation(*pDevice); + kernel.kernelInfo.createKernelAllocation(*pDevice, false); auto firstAllocation = kernel.kernelInfo.kernelAllocation; EXPECT_NE(nullptr, firstAllocation); auto firstAllocationSize = firstAllocation->getUnderlyingBufferSize(); @@ -113,7 +113,7 @@ TEST_F(KernelSubstituteTest, givenKernelWithUsedKernelAllocationWhenSubstituteKe const size_t initialHeapSize = 0x40; kernel.kernelInfo.heapInfo.KernelHeapSize = initialHeapSize; - kernel.kernelInfo.createKernelAllocation(*pDevice); + kernel.kernelInfo.createKernelAllocation(*pDevice, false); auto firstAllocation = kernel.kernelInfo.kernelAllocation; uint32_t notReadyTaskCount = *commandStreamReceiver.getTagAddress() + 1u; diff --git a/opencl/test/unit_test/memory_manager/graphics_allocation_tests.cpp b/opencl/test/unit_test/memory_manager/graphics_allocation_tests.cpp index 04d4842c8a..a8fba71530 100644 --- a/opencl/test/unit_test/memory_manager/graphics_allocation_tests.cpp +++ b/opencl/test/unit_test/memory_manager/graphics_allocation_tests.cpp @@ -134,6 +134,10 @@ TEST(GraphicsAllocationTest, whenAllocationTypeIsKernelIsaThenCpuAccessIsNotRequ EXPECT_FALSE(GraphicsAllocation::isCpuAccessRequired(GraphicsAllocation::AllocationType::KERNEL_ISA)); } +TEST(GraphicsAllocationTest, whenAllocationTypeIsKernelIsaInternalThenCpuAccessIsNotRequired) { + EXPECT_FALSE(GraphicsAllocation::isCpuAccessRequired(GraphicsAllocation::AllocationType::KERNEL_ISA_INTERNAL)); +} + TEST(GraphicsAllocationTest, whenAllocationTypeIsLinearStreamThenCpuAccessIsRequired) { EXPECT_TRUE(GraphicsAllocation::isCpuAccessRequired(GraphicsAllocation::AllocationType::LINEAR_STREAM)); } diff --git a/opencl/test/unit_test/memory_manager/memory_manager_allocate_in_preferred_pool_tests.inl b/opencl/test/unit_test/memory_manager/memory_manager_allocate_in_preferred_pool_tests.inl index e27445dac2..a7204513d6 100644 --- a/opencl/test/unit_test/memory_manager/memory_manager_allocate_in_preferred_pool_tests.inl +++ b/opencl/test/unit_test/memory_manager/memory_manager_allocate_in_preferred_pool_tests.inl @@ -634,6 +634,10 @@ HWTEST_F(GetAllocationDataTestHw, givenKernelIsaTypeWhenGetAllocationDataIsCalle AllocationProperties properties{mockRootDeviceIndex, 1, GraphicsAllocation::AllocationType::KERNEL_ISA, mockDeviceBitfield}; mockMemoryManager.getAllocationData(allocData, properties, nullptr, mockMemoryManager.createStorageInfoFromProperties(properties)); EXPECT_NE(defaultHwInfo->featureTable.ftrLocalMemory, allocData.flags.useSystemMemory); + + AllocationProperties properties2{mockRootDeviceIndex, 1, GraphicsAllocation::AllocationType::KERNEL_ISA_INTERNAL, mockDeviceBitfield}; + mockMemoryManager.getAllocationData(allocData, properties2, nullptr, mockMemoryManager.createStorageInfoFromProperties(properties)); + EXPECT_NE(defaultHwInfo->featureTable.ftrLocalMemory, allocData.flags.useSystemMemory); } HWTEST_F(GetAllocationDataTestHw, givenLinearStreamWhenGetAllocationDataIsCalledThenSystemMemoryIsNotRequested) { diff --git a/opencl/test/unit_test/memory_manager/memory_manager_tests.cpp b/opencl/test/unit_test/memory_manager/memory_manager_tests.cpp index 8c26fb8da8..d86eb50cbf 100644 --- a/opencl/test/unit_test/memory_manager/memory_manager_tests.cpp +++ b/opencl/test/unit_test/memory_manager/memory_manager_tests.cpp @@ -2197,12 +2197,20 @@ TEST_F(HeapSelectorTest, given32bitInternalAllocationWhenSelectingHeapThenIntern GraphicsAllocation allocation{0, GraphicsAllocation::AllocationType::KERNEL_ISA, nullptr, 0, 0, 0, MemoryPool::MemoryNull}; allocation.set32BitAllocation(true); EXPECT_EQ(MemoryManager::selectInternalHeap(allocation.isAllocatedInLocalMemoryPool()), memoryManager->selectHeap(&allocation, false, false, false)); + + GraphicsAllocation allocation2{0, GraphicsAllocation::AllocationType::KERNEL_ISA_INTERNAL, nullptr, 0, 0, 0, MemoryPool::MemoryNull}; + allocation2.set32BitAllocation(true); + EXPECT_EQ(MemoryManager::selectInternalHeap(allocation2.isAllocatedInLocalMemoryPool()), memoryManager->selectHeap(&allocation2, false, false, false)); } TEST_F(HeapSelectorTest, givenNon32bitInternalAllocationWhenSelectingHeapThenInternalHeapIsUsed) { GraphicsAllocation allocation{0, GraphicsAllocation::AllocationType::KERNEL_ISA, nullptr, 0, 0, 0, MemoryPool::MemoryNull}; allocation.set32BitAllocation(false); EXPECT_EQ(MemoryManager::selectInternalHeap(allocation.isAllocatedInLocalMemoryPool()), memoryManager->selectHeap(&allocation, false, false, false)); + + GraphicsAllocation allocation2{0, GraphicsAllocation::AllocationType::KERNEL_ISA_INTERNAL, nullptr, 0, 0, 0, MemoryPool::MemoryNull}; + allocation2.set32BitAllocation(false); + EXPECT_EQ(MemoryManager::selectInternalHeap(allocation2.isAllocatedInLocalMemoryPool()), memoryManager->selectHeap(&allocation2, false, false, false)); } TEST_F(HeapSelectorTest, given32bitExternalAllocationWhenSelectingHeapThenExternalHeapIsUsed) { diff --git a/opencl/test/unit_test/os_interface/linux/drm_memory_manager_tests.cpp b/opencl/test/unit_test/os_interface/linux/drm_memory_manager_tests.cpp index b316434f02..b175b39945 100644 --- a/opencl/test/unit_test/os_interface/linux/drm_memory_manager_tests.cpp +++ b/opencl/test/unit_test/os_interface/linux/drm_memory_manager_tests.cpp @@ -3971,6 +3971,29 @@ TEST(DrmAllocationTest, givenResourceRegistrationEnabledWhenAllocationTypeShould EXPECT_EQ(Drm::ResourceClass::MaxSize, drm.registeredClass); } } + +TEST(DrmAllocationTest, givenResourceRegistrationEnabledWhenAllocationTypeShouldNotBeRegisteredThenNoBindHandleCreated) { + auto executionEnvironment = std::make_unique(); + executionEnvironment->prepareRootDeviceEnvironments(1); + + DrmMockResources drm(*executionEnvironment->rootDeviceEnvironments[0]); + + drm.registeredClass = Drm::ResourceClass::MaxSize; + + for (uint32_t i = 3; i < 3 + static_cast(Drm::ResourceClass::MaxSize); i++) { + drm.classHandles.push_back(i); + } + + { + MockBufferObject bo(&drm, 0, 0, 1); + MockDrmAllocation allocation(GraphicsAllocation::AllocationType::KERNEL_ISA_INTERNAL, MemoryPool::System4KBPages); + allocation.bufferObjects[0] = &bo; + allocation.registerBOBindExtHandle(&drm); + EXPECT_EQ(0u, bo.bindExtHandles.size()); + } + EXPECT_EQ(Drm::ResourceClass::MaxSize, drm.registeredClass); +} + TEST(DrmAllocationTest, givenResourceRegistrationNotEnabledWhenRegisteringBindExtHandleThenHandleIsNotAddedToBo) { auto executionEnvironment = std::make_unique(); executionEnvironment->prepareRootDeviceEnvironments(1); diff --git a/opencl/test/unit_test/program/kernel_info_tests.cpp b/opencl/test/unit_test/program/kernel_info_tests.cpp index bf51e40b9a..d6ba840f13 100644 --- a/opencl/test/unit_test/program/kernel_info_tests.cpp +++ b/opencl/test/unit_test/program/kernel_info_tests.cpp @@ -119,7 +119,7 @@ TEST(KernelInfoTest, givenKernelInfoWhenCreateKernelAllocationThenCopyWholeKerne heap[i] = static_cast(i); } - auto retVal = kernelInfo.createKernelAllocation(*device); + auto retVal = kernelInfo.createKernelAllocation(*device, false); EXPECT_TRUE(retVal); auto allocation = kernelInfo.kernelAllocation; EXPECT_EQ(0, memcmp(allocation->getUnderlyingBuffer(), heap, heapSize)); @@ -127,6 +127,38 @@ TEST(KernelInfoTest, givenKernelInfoWhenCreateKernelAllocationThenCopyWholeKerne device->getMemoryManager()->checkGpuUsageAndDestroyGraphicsAllocations(allocation); } +TEST(KernelInfoTest, givenKernelInfoWhenCreatingKernelAllocationWithInternalIsaFalseTypeThenCorrectAllocationTypeIsUsed) { + KernelInfo kernelInfo; + auto factory = UltDeviceFactory{1, 0}; + auto device = factory.rootDevices[0]; + const size_t heapSize = 0x40; + char heap[heapSize]; + kernelInfo.heapInfo.KernelHeapSize = heapSize; + kernelInfo.heapInfo.pKernelHeap = &heap; + + auto retVal = kernelInfo.createKernelAllocation(*device, false); + EXPECT_TRUE(retVal); + auto allocation = kernelInfo.kernelAllocation; + EXPECT_EQ(GraphicsAllocation::AllocationType::KERNEL_ISA, allocation->getAllocationType()); + device->getMemoryManager()->checkGpuUsageAndDestroyGraphicsAllocations(allocation); +} + +TEST(KernelInfoTest, givenKernelInfoWhenCreatingKernelAllocationWithInternalIsaTrueTypeThenCorrectAllocationTypeIsUsed) { + KernelInfo kernelInfo; + auto factory = UltDeviceFactory{1, 0}; + auto device = factory.rootDevices[0]; + const size_t heapSize = 0x40; + char heap[heapSize]; + kernelInfo.heapInfo.KernelHeapSize = heapSize; + kernelInfo.heapInfo.pKernelHeap = &heap; + + auto retVal = kernelInfo.createKernelAllocation(*device, true); + EXPECT_TRUE(retVal); + auto allocation = kernelInfo.kernelAllocation; + EXPECT_EQ(GraphicsAllocation::AllocationType::KERNEL_ISA_INTERNAL, allocation->getAllocationType()); + device->getMemoryManager()->checkGpuUsageAndDestroyGraphicsAllocations(allocation); +} + class MyMemoryManager : public OsAgnosticMemoryManager { public: using OsAgnosticMemoryManager::OsAgnosticMemoryManager; @@ -138,7 +170,7 @@ TEST(KernelInfoTest, givenKernelInfoWhenCreateKernelAllocationAndCannotAllocateM auto executionEnvironment = new MockExecutionEnvironment(defaultHwInfo.get()); executionEnvironment->memoryManager.reset(new MyMemoryManager(*executionEnvironment)); auto device = std::unique_ptr(Device::create(executionEnvironment, mockRootDeviceIndex)); - auto retVal = kernelInfo.createKernelAllocation(*device); + auto retVal = kernelInfo.createKernelAllocation(*device, false); EXPECT_FALSE(retVal); } @@ -237,7 +269,7 @@ TEST_F(KernelInfoMultiRootDeviceTests, kernelAllocationHasCorrectRootDeviceIndex kernelInfo.heapInfo.KernelHeapSize = heapSize; kernelInfo.heapInfo.pKernelHeap = &heap; - auto retVal = kernelInfo.createKernelAllocation(device->getDevice()); + auto retVal = kernelInfo.createKernelAllocation(device->getDevice(), false); EXPECT_TRUE(retVal); auto allocation = kernelInfo.kernelAllocation; ASSERT_NE(nullptr, allocation); diff --git a/opencl/test/unit_test/utilities/file_logger_tests.cpp b/opencl/test/unit_test/utilities/file_logger_tests.cpp index c7a5b265b7..f84e4100ca 100644 --- a/opencl/test/unit_test/utilities/file_logger_tests.cpp +++ b/opencl/test/unit_test/utilities/file_logger_tests.cpp @@ -909,6 +909,7 @@ AllocationTypeTestCase allocationTypeValues[] = { {GraphicsAllocation::AllocationType::INTERNAL_HEAP, "INTERNAL_HEAP"}, {GraphicsAllocation::AllocationType::INTERNAL_HOST_MEMORY, "INTERNAL_HOST_MEMORY"}, {GraphicsAllocation::AllocationType::KERNEL_ISA, "KERNEL_ISA"}, + {GraphicsAllocation::AllocationType::KERNEL_ISA_INTERNAL, "KERNEL_ISA_INTERNAL"}, {GraphicsAllocation::AllocationType::LINEAR_STREAM, "LINEAR_STREAM"}, {GraphicsAllocation::AllocationType::MAP_ALLOCATION, "MAP_ALLOCATION"}, {GraphicsAllocation::AllocationType::MCS, "MCS"}, diff --git a/shared/source/aub/aub_helper.h b/shared/source/aub/aub_helper.h index d7ac9064b9..716db5eb6d 100644 --- a/shared/source/aub/aub_helper.h +++ b/shared/source/aub/aub_helper.h @@ -21,6 +21,7 @@ class AubHelper : public NonCopyableOrMovableClass { case GraphicsAllocation::AllocationType::CONSTANT_SURFACE: case GraphicsAllocation::AllocationType::GLOBAL_SURFACE: case GraphicsAllocation::AllocationType::KERNEL_ISA: + case GraphicsAllocation::AllocationType::KERNEL_ISA_INTERNAL: case GraphicsAllocation::AllocationType::PRIVATE_SURFACE: case GraphicsAllocation::AllocationType::SCRATCH_SURFACE: case GraphicsAllocation::AllocationType::BUFFER: diff --git a/shared/source/command_stream/command_stream_receiver.cpp b/shared/source/command_stream/command_stream_receiver.cpp index 12fceb8788..5c32e37161 100644 --- a/shared/source/command_stream/command_stream_receiver.cpp +++ b/shared/source/command_stream/command_stream_receiver.cpp @@ -591,7 +591,7 @@ void CommandStreamReceiver::printDeviceIndex() { void CommandStreamReceiver::checkForNewResources(uint32_t submittedTaskCount, uint32_t allocationTaskCount, GraphicsAllocation &gfxAllocation) { if (useNewResourceImplicitFlush) { - if (allocationTaskCount == GraphicsAllocation::objectNotUsed && gfxAllocation.getAllocationType() != GraphicsAllocation::AllocationType::KERNEL_ISA) { + if (allocationTaskCount == GraphicsAllocation::objectNotUsed && !GraphicsAllocation::isIsaAllocationType(gfxAllocation.getAllocationType())) { newResources = true; if (DebugManager.flags.ProvideVerboseImplicitFlush.get()) { printf("New resource detected of type %llu\n", static_cast(gfxAllocation.getAllocationType())); diff --git a/shared/source/helpers/heap_assigner.cpp b/shared/source/helpers/heap_assigner.cpp index 7992db971d..8552eda34a 100644 --- a/shared/source/helpers/heap_assigner.cpp +++ b/shared/source/helpers/heap_assigner.cpp @@ -17,7 +17,7 @@ HeapAssigner::HeapAssigner() { apiAllowExternalHeapForSshAndDsh = ApiSpecificConfig::getHeapConfiguration(); } bool HeapAssigner::useInternal32BitHeap(GraphicsAllocation::AllocationType allocType) { - return allocType == GraphicsAllocation::AllocationType::KERNEL_ISA || + return GraphicsAllocation::isIsaAllocationType(allocType) || allocType == GraphicsAllocation::AllocationType::INTERNAL_HEAP || allocType == GraphicsAllocation::AllocationType::DEBUG_MODULE_AREA; } diff --git a/shared/source/memory_manager/graphics_allocation.h b/shared/source/memory_manager/graphics_allocation.h index 900e3888ee..e4c9cdb776 100644 --- a/shared/source/memory_manager/graphics_allocation.h +++ b/shared/source/memory_manager/graphics_allocation.h @@ -69,6 +69,7 @@ class GraphicsAllocation : public IDNode { INTERNAL_HEAP, INTERNAL_HOST_MEMORY, KERNEL_ISA, + KERNEL_ISA_INTERNAL, LINEAR_STREAM, MAP_ALLOCATION, MCS, @@ -221,6 +222,12 @@ class GraphicsAllocation : public IDNode { allocationType == AllocationType::RING_BUFFER || allocationType == AllocationType::SEMAPHORE_BUFFER; } + + static bool isIsaAllocationType(GraphicsAllocation::AllocationType type) { + return type == GraphicsAllocation::AllocationType::KERNEL_ISA || + type == GraphicsAllocation::AllocationType::KERNEL_ISA_INTERNAL; + } + void *getReservedAddressPtr() const { return this->reservedAddressRangeInfo.addressPtr; } diff --git a/shared/source/memory_manager/memory_manager.cpp b/shared/source/memory_manager/memory_manager.cpp index 647cc38cee..bbbcd80fae 100644 --- a/shared/source/memory_manager/memory_manager.cpp +++ b/shared/source/memory_manager/memory_manager.cpp @@ -349,7 +349,7 @@ bool MemoryManager::getAllocationData(AllocationData &allocationData, const Allo break; } - if (properties.allocationType == GraphicsAllocation::AllocationType::KERNEL_ISA) { + if (GraphicsAllocation::isIsaAllocationType(properties.allocationType)) { allocationData.flags.useSystemMemory = hwHelper.useSystemMemoryPlacementForISA(*hwInfo); } @@ -361,6 +361,7 @@ bool MemoryManager::getAllocationData(AllocationData &allocationData, const Allo case GraphicsAllocation::AllocationType::INSTRUCTION_HEAP: case GraphicsAllocation::AllocationType::INTERNAL_HEAP: case GraphicsAllocation::AllocationType::KERNEL_ISA: + case GraphicsAllocation::AllocationType::KERNEL_ISA_INTERNAL: case GraphicsAllocation::AllocationType::LINEAR_STREAM: case GraphicsAllocation::AllocationType::MCS: case GraphicsAllocation::AllocationType::SCRATCH_SURFACE: diff --git a/shared/source/os_interface/linux/drm_memory_manager_allocate_in_device_pool_dg1.cpp b/shared/source/os_interface/linux/drm_memory_manager_allocate_in_device_pool_dg1.cpp index 34cd372526..336880af02 100644 --- a/shared/source/os_interface/linux/drm_memory_manager_allocate_in_device_pool_dg1.cpp +++ b/shared/source/os_interface/linux/drm_memory_manager_allocate_in_device_pool_dg1.cpp @@ -125,6 +125,7 @@ uint64_t getGpuAddress(GraphicsAllocation::AllocationType allocType, GfxPartitio sizeAllocated = 0; break; case GraphicsAllocation::AllocationType::KERNEL_ISA: + case GraphicsAllocation::AllocationType::KERNEL_ISA_INTERNAL: case GraphicsAllocation::AllocationType::INTERNAL_HEAP: gpuAddress = GmmHelper::canonize(gfxPartition->heapAllocate(HeapIndex::HEAP_INTERNAL_DEVICE_MEMORY, sizeAllocated)); break; diff --git a/shared/test/unit_test/heap_assigner/heap_assigner_tests.cpp b/shared/test/unit_test/heap_assigner/heap_assigner_tests.cpp index 636f6e230a..6ea6a2d934 100644 --- a/shared/test/unit_test/heap_assigner/heap_assigner_tests.cpp +++ b/shared/test/unit_test/heap_assigner/heap_assigner_tests.cpp @@ -20,11 +20,15 @@ class AlocationHelperTests : public Test { HWTEST_F(AlocationHelperTests, givenKernelIsaTypeWhenUse32BitHeapCalledThenTrueReturned) { EXPECT_TRUE(heapAssigner.use32BitHeap(GraphicsAllocation::AllocationType::KERNEL_ISA)); + EXPECT_TRUE(heapAssigner.use32BitHeap(GraphicsAllocation::AllocationType::KERNEL_ISA_INTERNAL)); } HWTEST_F(AlocationHelperTests, givenKernelIsaTypeWhenUseIternalAllocatorThenUseHeapInternal) { auto heapIndex = heapAssigner.get32BitHeapIndex(GraphicsAllocation::AllocationType::KERNEL_ISA, true, *defaultHwInfo, false); EXPECT_EQ(heapIndex, NEO::HeapIndex::HEAP_INTERNAL_DEVICE_MEMORY); + + heapIndex = heapAssigner.get32BitHeapIndex(GraphicsAllocation::AllocationType::KERNEL_ISA_INTERNAL, true, *defaultHwInfo, false); + EXPECT_EQ(heapIndex, NEO::HeapIndex::HEAP_INTERNAL_DEVICE_MEMORY); } HWTEST_F(AlocationHelperTests, givenNotInternalTypeWhenUseIternalAllocatorThenUseHeapExternal) { @@ -32,8 +36,9 @@ HWTEST_F(AlocationHelperTests, givenNotInternalTypeWhenUseIternalAllocatorThenUs EXPECT_EQ(heapIndex, NEO::HeapIndex::HEAP_EXTERNAL_DEVICE_MEMORY); } -HWTEST_F(AlocationHelperTests, givenKernelIsaTypeWhenUseInternalAllocatorCalledThenTrueReturned) { +HWTEST_F(AlocationHelperTests, givenKernelIsaTypesWhenUseInternalAllocatorCalledThenTrueReturned) { EXPECT_TRUE(heapAssigner.useInternal32BitHeap(GraphicsAllocation::AllocationType::KERNEL_ISA)); + EXPECT_TRUE(heapAssigner.useInternal32BitHeap(GraphicsAllocation::AllocationType::KERNEL_ISA_INTERNAL)); } HWTEST_F(AlocationHelperTests, givenInternalHeapTypeWhenUseInternalAllocatorCalledThenTrueReturned) {