From 7334920ed37dac94e11625554fc00d1409e40c6c Mon Sep 17 00:00:00 2001 From: Maciej Dziuban Date: Fri, 9 Jul 2021 15:46:57 +0000 Subject: [PATCH] Add UsmInitialPlacement debug flag Signed-off-by: Maciej Dziuban --- .../test/unit_test/test_files/igdrcl.config | 1 + .../debug_settings/debug_variables_base.inl | 1 + .../cpu_page_fault_manager.cpp | 11 ++- .../cpu_page_fault_manager_tests.cpp | 89 +++++++++++++++++++ 4 files changed, 101 insertions(+), 1 deletion(-) diff --git a/opencl/test/unit_test/test_files/igdrcl.config b/opencl/test/unit_test/test_files/igdrcl.config index 82fc5637d5..97ef60a995 100644 --- a/opencl/test/unit_test/test_files/igdrcl.config +++ b/opencl/test/unit_test/test_files/igdrcl.config @@ -164,6 +164,7 @@ DisableIndirectAccess = -1 ForceOCLVersion = 0 ForceOCL21FeaturesSupport = -1 ForcePreemptionMode = -1 +UsmInitialPlacement = -1 ForceKernelPreemptionMode = -1 NodeOrdinal = -1 OverrideThreadArbitrationPolicy = -1 diff --git a/shared/source/debug_settings/debug_variables_base.inl b/shared/source/debug_settings/debug_variables_base.inl index d40eeb3b8e..7ef8a853d9 100644 --- a/shared/source/debug_settings/debug_variables_base.inl +++ b/shared/source/debug_settings/debug_variables_base.inl @@ -317,6 +317,7 @@ DECLARE_DEBUG_VARIABLE(int32_t, PassBoundBOToExec, -1, "Pass bound BOs to exec c DECLARE_DEBUG_VARIABLE(int32_t, EnableStaticPartitioning, -1, "Divide workload into partitions during dispatch, -1: default, 0: disabled, 1: enabled") DECLARE_DEBUG_VARIABLE(int32_t, UpdateTaskCountFromWait, -1, " Do not update task count after each enqueue, but send update request while wait, -1: default(disabled), 0: disabled, 1: enabled") DECLARE_DEBUG_VARIABLE(int32_t, DeferOsContextInitialization, -1, "-1: default, 0: create all contexts immediately, 1: defer, if possible") +DECLARE_DEBUG_VARIABLE(int32_t, UsmInitialPlacement, -1, "-1: default, 0: optimize for first CPU access, 1: optimize for first GPU access") DECLARE_DEBUG_VARIABLE(int32_t, ForceHostPointerImport, -1, "-1: default, 0: disable, 1: enable, Forces the driver to import every host pointer coming into driver, WARNING this is not spec complaint.") DECLARE_DEBUG_VARIABLE(bool, UseMaxSimdSizeToDeduceMaxWorkgroupSize, false, "With this flag on, max workgroup size is deduced using SIMD32 instead of SIMD8, this causes the max wkg size to be 4 times bigger") DECLARE_DEBUG_VARIABLE(bool, ReturnRawGpuTimestamps, false, "Driver returns raw GPU tiemstamps instead of calculated ones.") diff --git a/shared/source/page_fault_manager/cpu_page_fault_manager.cpp b/shared/source/page_fault_manager/cpu_page_fault_manager.cpp index e56ca071f4..4ab737d0ef 100644 --- a/shared/source/page_fault_manager/cpu_page_fault_manager.cpp +++ b/shared/source/page_fault_manager/cpu_page_fault_manager.cpp @@ -17,7 +17,16 @@ namespace NEO { void PageFaultManager::insertAllocation(void *ptr, size_t size, SVMAllocsManager *unifiedMemoryManager, void *cmdQ, const MemoryProperties &memoryProperties) { - const bool initialPlacementCpu = !memoryProperties.allocFlags.usmInitialPlacementGpu; + bool initialPlacementCpu = true; + if (memoryProperties.allocFlags.usmInitialPlacementGpu) { + initialPlacementCpu = false; + } + if (memoryProperties.allocFlags.usmInitialPlacementCpu) { + initialPlacementCpu = true; + } + if (const int32_t debugFlag = DebugManager.flags.UsmInitialPlacement.get(); debugFlag != -1) { + initialPlacementCpu = debugFlag != 1; + } const auto domain = initialPlacementCpu ? AllocationDomain::Cpu : AllocationDomain::None; std::unique_lock lock{mtx}; diff --git a/shared/test/unit_test/page_fault_manager/cpu_page_fault_manager_tests.cpp b/shared/test/unit_test/page_fault_manager/cpu_page_fault_manager_tests.cpp index 5bbeee9468..ea4f4ac906 100644 --- a/shared/test/unit_test/page_fault_manager/cpu_page_fault_manager_tests.cpp +++ b/shared/test/unit_test/page_fault_manager/cpu_page_fault_manager_tests.cpp @@ -451,3 +451,92 @@ TEST(PageFaultManager, givenHwCsrWhenSelectingHandlerThenHwGpuDomainHandlerIsSet EXPECT_EQ(defaultHandler, reinterpret_cast(pageFaultManager->gpuDomainHandler)); } } + +struct PageFaultManagerTestWithDebugFlag : public ::testing::TestWithParam { + void SetUp() { + memoryManager = std::make_unique(executionEnvironment); + unifiedMemoryManager = std::make_unique(memoryManager.get(), false); + pageFaultManager = std::make_unique(); + cmdQ = reinterpret_cast(0xFFFF); + } + + MemoryProperties memoryProperties{}; + MockExecutionEnvironment executionEnvironment{}; + std::unique_ptr memoryManager; + std::unique_ptr unifiedMemoryManager; + std::unique_ptr pageFaultManager; + void *cmdQ; +}; + +TEST_P(PageFaultManagerTestWithDebugFlag, givenDebugFlagWhenInsertingAllocationThenItOverridesHints) { + DebugManagerStateRestore restore; + DebugManager.flags.UsmInitialPlacement.set(GetParam()); // Should be ignored by the driver, when passing hints + const auto expectedDomain = GetParam() == 1 ? PageFaultManager::AllocationDomain::None : PageFaultManager::AllocationDomain::Cpu; + + void *allocs[] = { + reinterpret_cast(0x1), + reinterpret_cast(0x2), + reinterpret_cast(0x3), + reinterpret_cast(0x4), + }; + + memoryProperties.allocFlags.usmInitialPlacementCpu = 0; + memoryProperties.allocFlags.usmInitialPlacementGpu = 0; + pageFaultManager->insertAllocation(allocs[0], 10, unifiedMemoryManager.get(), cmdQ, memoryProperties); + EXPECT_EQ(expectedDomain, pageFaultManager->memoryData.at(allocs[0]).domain); + + memoryProperties.allocFlags.usmInitialPlacementCpu = 0; + memoryProperties.allocFlags.usmInitialPlacementGpu = 1; + pageFaultManager->insertAllocation(allocs[1], 10, unifiedMemoryManager.get(), cmdQ, memoryProperties); + EXPECT_EQ(expectedDomain, pageFaultManager->memoryData.at(allocs[1]).domain); + + memoryProperties.allocFlags.usmInitialPlacementCpu = 1; + memoryProperties.allocFlags.usmInitialPlacementGpu = 0; + pageFaultManager->insertAllocation(allocs[2], 10, unifiedMemoryManager.get(), cmdQ, memoryProperties); + EXPECT_EQ(expectedDomain, pageFaultManager->memoryData.at(allocs[2]).domain); + + memoryProperties.allocFlags.usmInitialPlacementCpu = 1; + memoryProperties.allocFlags.usmInitialPlacementGpu = 1; + pageFaultManager->insertAllocation(allocs[3], 10, unifiedMemoryManager.get(), cmdQ, memoryProperties); + EXPECT_EQ(expectedDomain, pageFaultManager->memoryData.at(allocs[3]).domain); +} + +INSTANTIATE_TEST_SUITE_P( + PageFaultManagerTestWithDebugFlag, + PageFaultManagerTestWithDebugFlag, + ::testing::Values(0, 1)); + +TEST_F(PageFaultManagerTest, givenNoUsmInitialPlacementFlagsWHenInsertingUsmAllocationThenUseTheDefaultDomain) { + MemoryProperties memoryProperties{}; + MockExecutionEnvironment executionEnvironment{}; + auto memoryManager = std::make_unique(executionEnvironment); + auto unifiedMemoryManager = std::make_unique(memoryManager.get(), false); + auto pageFaultManager = std::make_unique(); + auto cmdQ = reinterpret_cast(0xFFFF); + void *allocs[] = { + reinterpret_cast(0x1), + reinterpret_cast(0x2), + reinterpret_cast(0x3), + reinterpret_cast(0x4), + }; + + memoryProperties.allocFlags.usmInitialPlacementCpu = 0; + memoryProperties.allocFlags.usmInitialPlacementGpu = 0; + pageFaultManager->insertAllocation(allocs[0], 10, unifiedMemoryManager.get(), cmdQ, memoryProperties); + EXPECT_EQ(PageFaultManager::AllocationDomain::Cpu, pageFaultManager->memoryData.at(allocs[0]).domain); + + memoryProperties.allocFlags.usmInitialPlacementCpu = 0; + memoryProperties.allocFlags.usmInitialPlacementGpu = 1; + pageFaultManager->insertAllocation(allocs[1], 10, unifiedMemoryManager.get(), cmdQ, memoryProperties); + EXPECT_EQ(PageFaultManager::AllocationDomain::None, pageFaultManager->memoryData.at(allocs[1]).domain); + + memoryProperties.allocFlags.usmInitialPlacementCpu = 1; + memoryProperties.allocFlags.usmInitialPlacementGpu = 0; + pageFaultManager->insertAllocation(allocs[2], 10, unifiedMemoryManager.get(), cmdQ, memoryProperties); + EXPECT_EQ(PageFaultManager::AllocationDomain::Cpu, pageFaultManager->memoryData.at(allocs[2]).domain); + + memoryProperties.allocFlags.usmInitialPlacementCpu = 1; + memoryProperties.allocFlags.usmInitialPlacementGpu = 1; + pageFaultManager->insertAllocation(allocs[3], 10, unifiedMemoryManager.get(), cmdQ, memoryProperties); + EXPECT_EQ(PageFaultManager::AllocationDomain::Cpu, pageFaultManager->memoryData.at(allocs[3]).domain); +}