From 55952d33dfef5a853891bce64330ed43b3f1e0f8 Mon Sep 17 00:00:00 2001 From: Jaime Arteaga Date: Mon, 6 Mar 2023 06:23:17 +0000 Subject: [PATCH] feature: Enable use KMD calls in prefetch APIs If applications call Prefetch APIs, like zeCommandListAppendMemoryPrefetch and clEnqueueMigrateMemINTEL, then enable the use of KMD calls by default. Signed-off-by: Jaime Arteaga --- .../xe_hpc_core/cmdlist_xe_hpc_core.cpp | 2 +- .../xe_hpc_core/test_cmdlist_xe_hpc_core.cpp | 36 ++++++++++++++----- opencl/source/api/api.cpp | 2 +- .../api/cl_unified_shared_memory_tests.inl | 7 ++-- .../debug_settings/debug_variables_base.inl | 2 +- shared/test/common/test_files/igdrcl.config | 2 +- 6 files changed, 34 insertions(+), 17 deletions(-) diff --git a/level_zero/core/source/xe_hpc_core/cmdlist_xe_hpc_core.cpp b/level_zero/core/source/xe_hpc_core/cmdlist_xe_hpc_core.cpp index 03e84dd0c5..3ad29da2be 100644 --- a/level_zero/core/source/xe_hpc_core/cmdlist_xe_hpc_core.cpp +++ b/level_zero/core/source/xe_hpc_core/cmdlist_xe_hpc_core.cpp @@ -35,7 +35,7 @@ ze_result_t CommandListCoreFamily::appendMemoryPrefetch(const return ZE_RESULT_ERROR_INVALID_ARGUMENT; } - if (NEO::DebugManager.flags.AppendMemoryPrefetchForKmdMigratedSharedAllocations.get() > 0) { + if (NEO::DebugManager.flags.AppendMemoryPrefetchForKmdMigratedSharedAllocations.get() == true) { this->performMemoryPrefetch = true; auto prefetchManager = device->getDriverHandle()->getMemoryManager()->getPrefetchManager(); if (prefetchManager) { diff --git a/level_zero/core/test/unit_tests/xe_hpc_core/test_cmdlist_xe_hpc_core.cpp b/level_zero/core/test/unit_tests/xe_hpc_core/test_cmdlist_xe_hpc_core.cpp index 575d6d881a..3563582fc4 100644 --- a/level_zero/core/test/unit_tests/xe_hpc_core/test_cmdlist_xe_hpc_core.cpp +++ b/level_zero/core/test/unit_tests/xe_hpc_core/test_cmdlist_xe_hpc_core.cpp @@ -67,7 +67,33 @@ HWTEST2_F(CommandListAppendLaunchKernelXeHpcCore, givenKernelUsingSyncBufferWhen using CommandListStatePrefetchXeHpcCore = Test; -HWTEST2_F(CommandListStatePrefetchXeHpcCore, givenUnifiedSharedMemoryWhenPrefetchApiIsCalledThenDontRequestMemoryPrefetchByDefault, IsXeHpcCore) { +HWTEST2_F(CommandListStatePrefetchXeHpcCore, givenUnifiedSharedMemoryWhenPrefetchApiIsCalledThenRequestMemoryPrefetchByDefault, IsXeHpcCore) { + auto pCommandList = std::make_unique>>(); + auto result = pCommandList->initialize(device, NEO::EngineGroupType::Compute, 0u); + ASSERT_EQ(ZE_RESULT_SUCCESS, result); + + size_t size = 10; + size_t alignment = 1u; + void *ptr = nullptr; + + ze_device_mem_alloc_desc_t deviceDesc = {}; + ze_host_mem_alloc_desc_t hostDesc = {}; + result = context->allocSharedMem(device->toHandle(), &deviceDesc, &hostDesc, size, alignment, &ptr); + EXPECT_EQ(ZE_RESULT_SUCCESS, result); + EXPECT_NE(nullptr, ptr); + + result = pCommandList->appendMemoryPrefetch(ptr, size); + EXPECT_EQ(ZE_RESULT_SUCCESS, result); + + EXPECT_TRUE(pCommandList->isMemoryPrefetchRequested()); + + context->freeMem(ptr); +} + +HWTEST2_F(CommandListStatePrefetchXeHpcCore, givenUnifiedSharedMemoryWhenPrefetchApiAndDebuKeyDisabledIsCalledThenRequestMemoryPrefetchIsNotPerformed, IsXeHpcCore) { + DebugManagerStateRestore restore; + DebugManager.flags.AppendMemoryPrefetchForKmdMigratedSharedAllocations.set(0); + auto pCommandList = std::make_unique>>(); auto result = pCommandList->initialize(device, NEO::EngineGroupType::Compute, 0u); ASSERT_EQ(ZE_RESULT_SUCCESS, result); @@ -92,7 +118,6 @@ HWTEST2_F(CommandListStatePrefetchXeHpcCore, givenUnifiedSharedMemoryWhenPrefetc HWTEST2_F(CommandListStatePrefetchXeHpcCore, givenAppendMemoryPrefetchForKmdMigratedSharedAllocationsWhenPrefetchApiIsCalledThenRequestMemoryPrefetch, IsXeHpcCore) { DebugManagerStateRestore restore; - DebugManager.flags.AppendMemoryPrefetchForKmdMigratedSharedAllocations.set(1); auto pCommandList = std::make_unique>>(); auto result = pCommandList->initialize(device, NEO::EngineGroupType::Compute, 0u); @@ -118,7 +143,6 @@ HWTEST2_F(CommandListStatePrefetchXeHpcCore, givenAppendMemoryPrefetchForKmdMigr HWTEST2_F(CommandListStatePrefetchXeHpcCore, givenAppendMemoryPrefetchForKmdMigratedSharedAllocationsSetWhenPrefetchApiIsCalledOnUnifiedSharedMemoryThenAppendAllocationForPrefetch, IsXeHpcCore) { DebugManagerStateRestore restore; - DebugManager.flags.AppendMemoryPrefetchForKmdMigratedSharedAllocations.set(1); DebugManager.flags.UseKmdMigration.set(1); auto memoryManager = static_cast(device->getDriverHandle()->getMemoryManager()); @@ -150,7 +174,6 @@ HWTEST2_F(CommandListStatePrefetchXeHpcCore, givenAppendMemoryPrefetchForKmdMigr HWTEST2_F(CommandListStatePrefetchXeHpcCore, givenAppendMemoryPrefetchForKmdMigratedSharedAllocationsSetWhenPrefetchApiIsCalledOnUnifiedDeviceMemoryThenDontAppendAllocationForPrefetch, IsXeHpcCore) { DebugManagerStateRestore restore; - DebugManager.flags.AppendMemoryPrefetchForKmdMigratedSharedAllocations.set(1); DebugManager.flags.UseKmdMigration.set(1); auto memoryManager = static_cast(device->getDriverHandle()->getMemoryManager()); @@ -181,7 +204,6 @@ HWTEST2_F(CommandListStatePrefetchXeHpcCore, givenAppendMemoryPrefetchForKmdMigr HWTEST2_F(CommandListStatePrefetchXeHpcCore, givenAppendMemoryPrefetchForKmdMigratedSharedAllocationsSetWhenPrefetchApiIsCalledOnUnifiedHostMemoryThenDontAppendAllocationForPrefetch, IsXeHpcCore) { DebugManagerStateRestore restore; - DebugManager.flags.AppendMemoryPrefetchForKmdMigratedSharedAllocations.set(1); DebugManager.flags.UseKmdMigration.set(1); auto memoryManager = static_cast(device->getDriverHandle()->getMemoryManager()); @@ -216,7 +238,6 @@ HWTEST2_F(CommandListStatePrefetchXeHpcCore, givenAppendMemoryPrefetchForKmdMigr using WALKER_TYPE = typename FamilyType::WALKER_TYPE; DebugManagerStateRestore restore; - DebugManager.flags.AppendMemoryPrefetchForKmdMigratedSharedAllocations.set(1); DebugManager.flags.UseKmdMigration.set(1); EXPECT_EQ(0b0001u, neoDevice->deviceBitfield.to_ulong()); @@ -268,7 +289,6 @@ HWTEST2_F(CommandListStatePrefetchXeHpcCore, givenAppendMemoryPrefetchForKmdMigr using WALKER_TYPE = typename FamilyType::WALKER_TYPE; DebugManagerStateRestore restore; - DebugManager.flags.AppendMemoryPrefetchForKmdMigratedSharedAllocations.set(1); DebugManager.flags.UseKmdMigration.set(1); neoDevice->deviceBitfield = 0b0010; @@ -322,7 +342,6 @@ HWTEST2_F(CommandListStatePrefetchXeHpcCore, givenAppendMemoryPrefetchForKmdMigr using WALKER_TYPE = typename FamilyType::WALKER_TYPE; DebugManagerStateRestore restore; - DebugManager.flags.AppendMemoryPrefetchForKmdMigratedSharedAllocations.set(1); DebugManager.flags.UseKmdMigration.set(1); neoDevice->deviceBitfield = 0b1000; @@ -394,7 +413,6 @@ HWTEST2_F(CommandListStatePrefetchXeHpcCore, givenAppendMemoryPrefetchForKmdMigr using WALKER_TYPE = typename FamilyType::WALKER_TYPE; DebugManagerStateRestore restore; - DebugManager.flags.AppendMemoryPrefetchForKmdMigratedSharedAllocations.set(1); DebugManager.flags.UseKmdMigration.set(1); neoDevice->deviceBitfield = 0b001; diff --git a/opencl/source/api/api.cpp b/opencl/source/api/api.cpp index 4b08af5ee6..5524257261 100644 --- a/opencl/source/api/api.cpp +++ b/opencl/source/api/api.cpp @@ -4198,7 +4198,7 @@ CL_API_ENTRY cl_int CL_API_CALL clEnqueueMigrateMemINTEL( pEvent->setCmdType(CL_COMMAND_MIGRATEMEM_INTEL); } - if (NEO::DebugManager.flags.AppendMemoryPrefetchForKmdMigratedSharedAllocations.get() > 0) { + if (NEO::DebugManager.flags.AppendMemoryPrefetchForKmdMigratedSharedAllocations.get() == true) { auto pSvmAllocMgr = pCommandQueue->getContext().getSVMAllocsManager(); UNRECOVERABLE_IF(pSvmAllocMgr == nullptr); diff --git a/opencl/test/unit_test/api/cl_unified_shared_memory_tests.inl b/opencl/test/unit_test/api/cl_unified_shared_memory_tests.inl index 150a2c5530..920d503005 100644 --- a/opencl/test/unit_test/api/cl_unified_shared_memory_tests.inl +++ b/opencl/test/unit_test/api/cl_unified_shared_memory_tests.inl @@ -889,9 +889,11 @@ TEST(clUnifiedSharedMemoryTests, whenClEnqueueMigrateMemINTELisCalledWithWrongQu EXPECT_EQ(CL_INVALID_COMMAND_QUEUE, retVal); } -TEST(clUnifiedSharedMemoryTests, whenClEnqueueMigrateMemINTELisCalledWithProperParametersThenSuccessIsReturned) { +TEST(clUnifiedSharedMemoryTests, whenClEnqueueMigrateMemINTELisCalledWithProperParametersAndDebugKeyDisabledThenSuccessIsReturned) { + DebugManagerStateRestore restorer; MockCommandQueue cmdQ; void *unifiedMemoryAlloc = reinterpret_cast(0x1234); + DebugManager.flags.AppendMemoryPrefetchForKmdMigratedSharedAllocations.set(0); auto retVal = clEnqueueMigrateMemINTEL(&cmdQ, unifiedMemoryAlloc, 10, 0, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); @@ -900,7 +902,6 @@ TEST(clUnifiedSharedMemoryTests, whenClEnqueueMigrateMemINTELisCalledWithProperP TEST(clUnifiedSharedMemoryTests, givenUseKmdMigrationAndAppendMemoryPrefetchForKmdMigratedSharedAllocationsWhenClEnqueueMigrateMemINTELisCalledThenExplicitlyMigrateMemoryToTheDeviceAssociatedWithCommandQueue) { DebugManagerStateRestore restorer; DebugManager.flags.UseKmdMigration.set(1); - DebugManager.flags.AppendMemoryPrefetchForKmdMigratedSharedAllocations.set(1); MockContext mockContext; auto device = mockContext.getDevice(0u); @@ -926,7 +927,6 @@ TEST(clUnifiedSharedMemoryTests, givenUseKmdMigrationAndAppendMemoryPrefetchForK TEST(clUnifiedSharedMemoryTests, givenContextWithMultipleSubdevicesWhenClEnqueueMigrateMemINTELisCalledThenExplicitlyMigrateMemoryToTheSubDeviceAssociatedWithCommandQueue) { DebugManagerStateRestore restorer; DebugManager.flags.UseKmdMigration.set(1); - DebugManager.flags.AppendMemoryPrefetchForKmdMigratedSharedAllocations.set(1); UltClDeviceFactory deviceFactory{1, 4}; cl_device_id allDevices[] = {deviceFactory.rootDevices[0], deviceFactory.subDevices[0], deviceFactory.subDevices[1], @@ -955,7 +955,6 @@ TEST(clUnifiedSharedMemoryTests, givenContextWithMultipleSubdevicesWhenClEnqueue TEST(clUnifiedSharedMemoryTests, givenContextWithMultipleSubdevicesWhenClEnqueueMigrateMemINTELisCalledThenExplicitlyMigrateMemoryToTheRootDeviceAssociatedWithCommandQueue) { DebugManagerStateRestore restorer; DebugManager.flags.UseKmdMigration.set(1); - DebugManager.flags.AppendMemoryPrefetchForKmdMigratedSharedAllocations.set(1); UltClDeviceFactory deviceFactory{1, 4}; cl_device_id allDevices[] = {deviceFactory.rootDevices[0], deviceFactory.subDevices[0], deviceFactory.subDevices[1], diff --git a/shared/source/debug_settings/debug_variables_base.inl b/shared/source/debug_settings/debug_variables_base.inl index 0ae86734e8..1043f8c2fc 100644 --- a/shared/source/debug_settings/debug_variables_base.inl +++ b/shared/source/debug_settings/debug_variables_base.inl @@ -79,6 +79,7 @@ DECLARE_DEBUG_VARIABLE(bool, DisableForceToStateless, false, "Do not force state DECLARE_DEBUG_VARIABLE(bool, ForceTheoreticalMaxWorkGroupCount, false, "Do not apply any limitation to max cooperative/concurrent work-group count queries") DECLARE_DEBUG_VARIABLE(bool, DisableScratchPages, false, "Disable scratch pages during VM creations") DECLARE_DEBUG_VARIABLE(bool, DontDisableZebinIfVmeUsed, false, "When enabled, driver will not add -cl-intel-disable-zebin internal option when vme is used") +DECLARE_DEBUG_VARIABLE(bool, AppendMemoryPrefetchForKmdMigratedSharedAllocations, true, "Allow prefetching shared memory to the device associated with the specified command list") DECLARE_DEBUG_VARIABLE(std::string, ForceDeviceId, std::string("unk"), "Override device id in AUB/TBX mode") DECLARE_DEBUG_VARIABLE(std::string, FilterDeviceId, std::string("unk"), "Device id filter, adapter matching device id will be opened; ignored when unk") DECLARE_DEBUG_VARIABLE(std::string, FilterBdfPath, std::string("unk"), "Linux-only, BDF path filter, only matching paths will be opened; ignored when unk") @@ -199,7 +200,6 @@ DECLARE_DEBUG_VARIABLE(int32_t, UseDrmVirtualEnginesForBcs, -1, "-1: default, 0: DECLARE_DEBUG_VARIABLE(int32_t, LimitEngineCountForVirtualBcs, -1, "-1: default, >0 Only use VirtualEngine with limited amount of engines, not max ") DECLARE_DEBUG_VARIABLE(int32_t, LimitEngineCountForVirtualCcs, -1, "-1: default, >0 Only use VirtualEngine with limited amount of engines, not max ") DECLARE_DEBUG_VARIABLE(int32_t, CreateContextWithAccessCounters, -1, "-1: default, 0: ignore, 1: create context with Access Counter programming") -DECLARE_DEBUG_VARIABLE(int32_t, AppendMemoryPrefetchForKmdMigratedSharedAllocations, -1, "-1: default, 0: ignore, 1: allow prefetching shared memory to the device associated with the specified command list") DECLARE_DEBUG_VARIABLE(int32_t, AccessCountersTrigger, -1, "-1: default - disabled, 0: disabled, >= 0: triggering thresholds") DECLARE_DEBUG_VARIABLE(int32_t, AccessCountersGranularity, -1, "-1: default - ACG_2MB, >= 0: granularites - 0: ACG_128K, 1: ACG_2M, 2: ACG_16M, 3: ACG_16M") DECLARE_DEBUG_VARIABLE(int32_t, OverridePatIndex, -1, "-1: default, >=0: PatIndex to override") diff --git a/shared/test/common/test_files/igdrcl.config b/shared/test/common/test_files/igdrcl.config index 8cd3a83a7d..ad4fbc518c 100644 --- a/shared/test/common/test_files/igdrcl.config +++ b/shared/test/common/test_files/igdrcl.config @@ -436,7 +436,7 @@ UseDrmVirtualEnginesForBcs = -1 LimitEngineCountForVirtualBcs = -1 LimitEngineCountForVirtualCcs = -1 ForceRunAloneContext = -1 -AppendMemoryPrefetchForKmdMigratedSharedAllocations = -1 +AppendMemoryPrefetchForKmdMigratedSharedAllocations = 1 CreateContextWithAccessCounters = -1 AccessCountersTrigger = -1 AccessCountersGranularity = -1