diff --git a/shared/source/debug_settings/debug_variables_base.inl b/shared/source/debug_settings/debug_variables_base.inl index 9a17807ca8..899c176cd1 100644 --- a/shared/source/debug_settings/debug_variables_base.inl +++ b/shared/source/debug_settings/debug_variables_base.inl @@ -483,6 +483,7 @@ DECLARE_DEBUG_VARIABLE(bool, EnableDebuggerMmapMemoryAccess, false, "Mmap used t DECLARE_DEBUG_VARIABLE(bool, ForceDefaultGrfCompilationMode, false, "Adds build option -cl-intel-128-GRF-per-thread to force kernel compilation in Default-GRF mode") DECLARE_DEBUG_VARIABLE(bool, ForceLargeGrfCompilationMode, false, "Adds build option -cl-intel-256-GRF-per-thread to force kernel compilation in Large-GRF mode") DECLARE_DEBUG_VARIABLE(bool, EnableConcurrentSharedCrossP2PDeviceAccess, false, "Enables the concurrent use between host and peer devices of shared-allocations ") +DECLARE_DEBUG_VARIABLE(bool, AllocateSharedAllocationsInHeapExtended, false, "When enabled driver can allocate shared unified memory allocation in heap extended. (0 - disable, 1 - enable)") DECLARE_DEBUG_VARIABLE(int32_t, ForceAutoGrfCompilationMode, -1, "Adds build option -*-intel-enable-auto-large-GRF-mode to force kernel compilation") DECLARE_DEBUG_VARIABLE(int32_t, ForceOCLVersion, 0, "Force specific OpenCL API version") DECLARE_DEBUG_VARIABLE(int32_t, ForceOCL21FeaturesSupport, -1, "-1: default, 0: disable, 1:enable. Force support of OpenCL 2.0 and OpenCL 2.1 API features") diff --git a/shared/source/os_interface/linux/drm_memory_manager.cpp b/shared/source/os_interface/linux/drm_memory_manager.cpp index f6e750b592..cbc66a4fcb 100644 --- a/shared/source/os_interface/linux/drm_memory_manager.cpp +++ b/shared/source/os_interface/linux/drm_memory_manager.cpp @@ -2128,7 +2128,19 @@ GraphicsAllocation *DrmMemoryManager::createSharedUnifiedMemoryAllocation(const auto alignment = allocationData.alignment; auto totalSizeToAlloc = size + alignment; - auto cpuPointer = this->mmapFunction(0, totalSizeToAlloc, PROT_NONE, MAP_SHARED | MAP_ANONYMOUS, -1, 0); + + uint64_t preferredAddress = 0; + auto gfxPartition = getGfxPartition(allocationData.rootDeviceIndex); + auto canAllocateInHeapExtended = DebugManager.flags.AllocateSharedAllocationsInHeapExtended.get(); + if (canAllocateInHeapExtended && gfxPartition->getHeapLimit(HeapIndex::HEAP_EXTENDED) > 0u && !allocationData.flags.resource48Bit) { + preferredAddress = acquireGpuRange(totalSizeToAlloc, allocationData.rootDeviceIndex, HeapIndex::HEAP_EXTENDED); + } + + auto cpuPointer = this->mmapFunction(reinterpret_cast(preferredAddress), totalSizeToAlloc, PROT_NONE, MAP_SHARED | MAP_ANONYMOUS, -1, 0); + if (castToUint64(cpuPointer) != preferredAddress) { + releaseGpuRange(reinterpret_cast(preferredAddress), totalSizeToAlloc, allocationData.rootDeviceIndex); + preferredAddress = 0; + } if (cpuPointer == MAP_FAILED) { PRINT_DEBUG_STRING(DebugManager.flags.PrintDebugMessages.get(), stderr, "%s", "mmap return of MAP_FAILED\n"); @@ -2160,6 +2172,7 @@ GraphicsAllocation *DrmMemoryManager::createSharedUnifiedMemoryAllocation(const if (ret) { this->munmapFunction(cpuPointer, totalSizeToAlloc); + releaseGpuRange(reinterpret_cast(preferredAddress), totalSizeToAlloc, allocationData.rootDeviceIndex); return nullptr; } @@ -2169,6 +2182,7 @@ GraphicsAllocation *DrmMemoryManager::createSharedUnifiedMemoryAllocation(const if (!ioctlHelper->setVmBoAdvise(bo->peekHandle(), vmAdviseAttribute, nullptr)) { this->munmapFunction(cpuBasePointer, totalSizeToAlloc); + releaseGpuRange(reinterpret_cast(preferredAddress), totalSizeToAlloc, allocationData.rootDeviceIndex); return nullptr; } @@ -2182,6 +2196,7 @@ GraphicsAllocation *DrmMemoryManager::createSharedUnifiedMemoryAllocation(const uint64_t offset = 0; if (!retrieveMmapOffsetForBufferObject(allocationData.rootDeviceIndex, *bo, mmapOffsetWb, offset)) { this->munmapFunction(cpuBasePointer, totalSizeToAlloc); + releaseGpuRange(reinterpret_cast(preferredAddress), totalSizeToAlloc, allocationData.rootDeviceIndex); return nullptr; } @@ -2200,6 +2215,7 @@ GraphicsAllocation *DrmMemoryManager::createSharedUnifiedMemoryAllocation(const auto allocation = std::make_unique(allocationData.rootDeviceIndex, allocationData.type, bos, cpuPointer, canonizedGpuAddress, size, MemoryPool::LocalMemory); allocation->setMmapPtr(cpuBasePointer); allocation->setMmapSize(totalSizeToAlloc); + allocation->setReservedAddressRange(reinterpret_cast(preferredAddress), totalSizeToAlloc); if (!allocation->setCacheRegion(&drm, static_cast(allocationData.cacheRegion))) { this->munmapFunction(cpuBasePointer, totalSizeToAlloc); for (auto bo : bos) { diff --git a/shared/test/common/mocks/CMakeLists.txt b/shared/test/common/mocks/CMakeLists.txt index e03f521775..af3908d9f4 100644 --- a/shared/test/common/mocks/CMakeLists.txt +++ b/shared/test/common/mocks/CMakeLists.txt @@ -105,6 +105,7 @@ else() ${CMAKE_CURRENT_SOURCE_DIR}/linux/mock_drm_memory_manager.h ${CMAKE_CURRENT_SOURCE_DIR}/linux/mock_drm_wrappers.cpp ${CMAKE_CURRENT_SOURCE_DIR}/linux/mock_drm_wrappers.h + ${CMAKE_CURRENT_SOURCE_DIR}/linux/mock_ioctl_helper.h ${CMAKE_CURRENT_SOURCE_DIR}/linux/mock_os_time_linux.h ) endif() diff --git a/shared/test/common/mocks/linux/mock_drm_memory_manager.h b/shared/test/common/mocks/linux/mock_drm_memory_manager.h index ab42ab9bb7..06effca5b8 100644 --- a/shared/test/common/mocks/linux/mock_drm_memory_manager.h +++ b/shared/test/common/mocks/linux/mock_drm_memory_manager.h @@ -56,6 +56,7 @@ class TestedDrmMemoryManager : public MemoryManagerCreate { using DrmMemoryManager::createAllocWithAlignmentFromUserptr; using DrmMemoryManager::createGraphicsAllocation; using DrmMemoryManager::createMultiHostAllocation; + using DrmMemoryManager::createSharedUnifiedMemoryAllocation; using DrmMemoryManager::eraseSharedBoHandleWrapper; using DrmMemoryManager::eraseSharedBufferObject; using DrmMemoryManager::getDefaultDrmContextId; diff --git a/shared/test/common/mocks/linux/mock_ioctl_helper.h b/shared/test/common/mocks/linux/mock_ioctl_helper.h new file mode 100644 index 0000000000..34c548e0d8 --- /dev/null +++ b/shared/test/common/mocks/linux/mock_ioctl_helper.h @@ -0,0 +1,27 @@ +/* + * Copyright (C) 2023 Intel Corporation + * + * SPDX-License-Identifier: MIT + * + */ + +#pragma once +#include "shared/source/os_interface/linux/ioctl_helper.h" + +namespace NEO { + +class MockIoctlHelper : public IoctlHelperPrelim20 { + public: + using IoctlHelperPrelim20::IoctlHelperPrelim20; + unsigned int getIoctlRequestValue(DrmIoctl ioctlRequest) const override { + return ioctlRequestValue; + }; + + int getDrmParamValue(DrmParam drmParam) const override { + return drmParamValue; + } + + unsigned int ioctlRequestValue = 1234u; + int drmParamValue = 1234; +}; +} // namespace NEO diff --git a/shared/test/common/os_interface/linux/device_command_stream_fixture.h b/shared/test/common/os_interface/linux/device_command_stream_fixture.h index dfe669be22..46308e0c1b 100644 --- a/shared/test/common/os_interface/linux/device_command_stream_fixture.h +++ b/shared/test/common/os_interface/linux/device_command_stream_fixture.h @@ -93,6 +93,7 @@ class DrmMockCustom : public Drm { using Drm::bindAvailable; using Drm::cacheInfo; using Drm::completionFenceSupported; + using Drm::ioctlHelper; using Drm::memoryInfo; using Drm::setupIoctlHelper; diff --git a/shared/test/common/test_files/igdrcl.config b/shared/test/common/test_files/igdrcl.config index 4d26646247..3ab7b4ec73 100644 --- a/shared/test/common/test_files/igdrcl.config +++ b/shared/test/common/test_files/igdrcl.config @@ -509,3 +509,4 @@ ExperimentalCopyThroughLockWaitlistSizeThreshold= -1 ForceDummyBlitWa = -1 DetectIndirectAccessInKernel = -1 OptimizeIoqBarriersHandling = -1 +AllocateSharedAllocationsInHeapExtended = 0 \ No newline at end of file diff --git a/shared/test/unit_test/os_interface/linux/drm_memory_manager_tests.cpp b/shared/test/unit_test/os_interface/linux/drm_memory_manager_tests.cpp index 0bcfbb213d..6d3f7b74aa 100644 --- a/shared/test/unit_test/os_interface/linux/drm_memory_manager_tests.cpp +++ b/shared/test/unit_test/os_interface/linux/drm_memory_manager_tests.cpp @@ -19,6 +19,7 @@ #include "shared/test/common/mocks/linux/mock_drm_allocation.h" #include "shared/test/common/mocks/linux/mock_drm_command_stream_receiver.h" #include "shared/test/common/mocks/linux/mock_drm_memory_manager.h" +#include "shared/test/common/mocks/linux/mock_ioctl_helper.h" #include "shared/test/common/mocks/linux/mock_os_context_linux.h" #include "shared/test/common/mocks/mock_allocation_properties.h" #include "shared/test/common/mocks/mock_execution_environment.h" @@ -6464,3 +6465,110 @@ TEST_F(DrmMemoryManagerTest, given48bAddressSpaceCpuAnd57bGpuWhenAllocatingHostU EXPECT_NE(hostUSM->getGpuAddress(), gpuAddress); memoryManager->freeGraphicsMemory(hostUSM); } + +TEST_F(DrmMemoryManagerTest, given57bAddressSpaceCpuAndGpuWhenAllocatingSharedUSMThenAddressFromExtendedHeapIsPassedAsHintAndSetAsGpuAddressAndReservedAddress) { + if (defaultHwInfo->capabilityTable.gpuAddressSpace < maxNBitValue(57)) { + GTEST_SKIP(); + } + + DebugManagerStateRestore restorer; + DebugManager.flags.AllocateSharedAllocationsInHeapExtended.set(true); + VariableBackup backupCaptureExtendedPointers(&SysCalls::mmapCaptureExtendedPointers, true); + VariableBackup backupAllowExtendedPointers(&SysCalls::mmapAllowExtendedPointers, true); + SysCalls::mmapCapturedExtendedPointers.clear(); + std::vector regionInfo(1); + regionInfo[0].region = {drm_i915_gem_memory_class::I915_MEMORY_CLASS_SYSTEM, 0}; + + auto &drm = static_cast(memoryManager->getDrm(mockRootDeviceIndex)); + drm.memoryInfo.reset(new MemoryInfo(regionInfo, drm)); + drm.ioctlHelper = std::make_unique(drm); + + AllocationData allocationData{}; + allocationData.size = MemoryConstants::cacheLineSize; + allocationData.rootDeviceIndex = mockRootDeviceIndex; + allocationData.alignment = MemoryConstants::pageSize; + allocationData.useMmapObject = true; + + auto sharedUSM = memoryManager->createSharedUnifiedMemoryAllocation(allocationData); + EXPECT_NE(nullptr, sharedUSM); + + EXPECT_EQ(2u, SysCalls::mmapCapturedExtendedPointers.size()); + auto gpuAddress = reinterpret_cast(SysCalls::mmapCapturedExtendedPointers[0]); + SysCalls::mmapCapturedExtendedPointers.clear(); + auto gmmHelper = memoryManager->getGmmHelper(mockRootDeviceIndex); + EXPECT_LE(memoryManager->getGfxPartition(mockRootDeviceIndex)->getHeapBase(HeapIndex::HEAP_EXTENDED), gmmHelper->decanonize(gpuAddress)); + EXPECT_GT(memoryManager->getGfxPartition(mockRootDeviceIndex)->getHeapLimit(HeapIndex::HEAP_EXTENDED), gmmHelper->decanonize(gpuAddress)); + + EXPECT_EQ(sharedUSM->getGpuAddress(), gpuAddress); + EXPECT_EQ(sharedUSM->getReservedAddressPtr(), reinterpret_cast(gpuAddress)); + memoryManager->freeGraphicsMemory(sharedUSM); +} + +TEST_F(DrmMemoryManagerTest, given48bAddressSpaceCpuAnd57bGpuWhenAllocatingSharedUSMThenAddressFromExtendedHeapIsPassedAsHintAndThenIgnored) { + if (defaultHwInfo->capabilityTable.gpuAddressSpace < maxNBitValue(57)) { + GTEST_SKIP(); + } + DebugManagerStateRestore restorer; + DebugManager.flags.AllocateSharedAllocationsInHeapExtended.set(true); + VariableBackup backupCaptureExtendedPointers(&SysCalls::mmapCaptureExtendedPointers, true); + VariableBackup backupAllowExtendedPointers(&SysCalls::mmapAllowExtendedPointers, false); + SysCalls::mmapCapturedExtendedPointers.clear(); + std::vector regionInfo(1); + regionInfo[0].region = {drm_i915_gem_memory_class::I915_MEMORY_CLASS_SYSTEM, 0}; + + auto &drm = static_cast(memoryManager->getDrm(mockRootDeviceIndex)); + drm.memoryInfo.reset(new MemoryInfo(regionInfo, drm)); + drm.ioctlHelper = std::make_unique(drm); + + AllocationData allocationData{}; + allocationData.size = MemoryConstants::cacheLineSize; + allocationData.rootDeviceIndex = mockRootDeviceIndex; + allocationData.alignment = MemoryConstants::pageSize; + allocationData.useMmapObject = true; + + auto sharedUSM = memoryManager->createSharedUnifiedMemoryAllocation(allocationData); + EXPECT_NE(nullptr, sharedUSM); + + EXPECT_EQ(1u, SysCalls::mmapCapturedExtendedPointers.size()); + auto gpuAddress = reinterpret_cast(SysCalls::mmapCapturedExtendedPointers[0]); + SysCalls::mmapCapturedExtendedPointers.clear(); + auto gmmHelper = memoryManager->getGmmHelper(mockRootDeviceIndex); + EXPECT_LE(memoryManager->getGfxPartition(mockRootDeviceIndex)->getHeapBase(HeapIndex::HEAP_EXTENDED), gmmHelper->decanonize(gpuAddress)); + EXPECT_GT(memoryManager->getGfxPartition(mockRootDeviceIndex)->getHeapLimit(HeapIndex::HEAP_EXTENDED), gmmHelper->decanonize(gpuAddress)); + + EXPECT_NE(sharedUSM->getGpuAddress(), gpuAddress); + memoryManager->freeGraphicsMemory(sharedUSM); +} + +TEST_F(DrmMemoryManagerTest, given57bAddressSpaceCpuAndGpuWhenAllocating48bResourceSharedUSMThenAddressFromExtendedHeapIsNotUsed) { + if (defaultHwInfo->capabilityTable.gpuAddressSpace < maxNBitValue(57)) { + GTEST_SKIP(); + } + DebugManagerStateRestore restorer; + DebugManager.flags.AllocateSharedAllocationsInHeapExtended.set(true); + VariableBackup backupCaptureExtendedPointers(&SysCalls::mmapCaptureExtendedPointers, true); + VariableBackup backupAllowExtendedPointers(&SysCalls::mmapAllowExtendedPointers, true); + SysCalls::mmapCapturedExtendedPointers.clear(); + std::vector regionInfo(1); + regionInfo[0].region = {drm_i915_gem_memory_class::I915_MEMORY_CLASS_SYSTEM, 0}; + + auto &drm = static_cast(memoryManager->getDrm(mockRootDeviceIndex)); + drm.memoryInfo.reset(new MemoryInfo(regionInfo, drm)); + drm.ioctlHelper = std::make_unique(drm); + + AllocationData allocationData{}; + allocationData.size = MemoryConstants::cacheLineSize; + allocationData.rootDeviceIndex = mockRootDeviceIndex; + allocationData.alignment = MemoryConstants::pageSize; + allocationData.useMmapObject = true; + allocationData.flags.resource48Bit = true; + + auto sharedUSM = memoryManager->createSharedUnifiedMemoryAllocation(allocationData); + EXPECT_NE(nullptr, sharedUSM); + + EXPECT_EQ(0u, SysCalls::mmapCapturedExtendedPointers.size()); + + EXPECT_LT(sharedUSM->getGpuAddress(), maxNBitValue(48)); + EXPECT_EQ(sharedUSM->getReservedAddressPtr(), nullptr); + memoryManager->freeGraphicsMemory(sharedUSM); +} diff --git a/shared/test/unit_test/os_interface/linux/drm_tests.cpp b/shared/test/unit_test/os_interface/linux/drm_tests.cpp index 3fb7f3bf33..7ea666e333 100644 --- a/shared/test/unit_test/os_interface/linux/drm_tests.cpp +++ b/shared/test/unit_test/os_interface/linux/drm_tests.cpp @@ -22,6 +22,7 @@ #include "shared/test/common/helpers/test_files.h" #include "shared/test/common/helpers/variable_backup.h" #include "shared/test/common/libult/linux/drm_mock.h" +#include "shared/test/common/mocks/linux/mock_ioctl_helper.h" #include "shared/test/common/mocks/mock_execution_environment.h" #include "shared/test/common/mocks/mock_memory_manager.h" #include "shared/test/common/os_interface/linux/sys_calls_linux_ult.h" @@ -1415,21 +1416,6 @@ TEST(DrmWrapperTest, WhenGettingRevisionParamValueThenIoctlHelperIsNotNeeded) { EXPECT_EQ(getDrmParamValue(DrmParam::ParamRevision, nullptr), static_cast(I915_PARAM_REVISION)); } -class MockIoctlHelper : public IoctlHelperPrelim20 { - public: - using IoctlHelperPrelim20::IoctlHelperPrelim20; - unsigned int getIoctlRequestValue(DrmIoctl ioctlRequest) const override { - return ioctlRequestValue; - }; - - int getDrmParamValue(DrmParam drmParam) const override { - return drmParamValue; - } - - unsigned int ioctlRequestValue = 1234u; - int drmParamValue = 1234; -}; - TEST(DrmWrapperTest, whenGettingDrmParamOrIoctlRequestValueThenUseIoctlHelperWhenAvailable) { auto executionEnvironment = std::make_unique(); DrmMock drm{*executionEnvironment->rootDeviceEnvironments[0]};