From 30dd15144c4a46017c8617533ee4553bc8052ffb Mon Sep 17 00:00:00 2001 From: Piotr Fusik Date: Thu, 10 Jan 2019 11:10:58 +0100 Subject: [PATCH] Add debug variable to disable host ptr tracking. Change-Id: Ifc866e06a4519e7590d40d8ad136147ecc80225d --- runtime/device/device.h | 4 ++-- runtime/memory_manager/memory_manager.cpp | 6 ++--- runtime/memory_manager/memory_manager.h | 2 +- runtime/os_interface/debug_variables_base.inl | 7 +++--- .../os_interface/linux/drm_memory_manager.cpp | 2 +- .../windows/wddm_memory_manager.cpp | 4 ++-- .../windows/wddm_memory_manager.h | 4 ++-- .../memory_manager/memory_manager_tests.cpp | 17 ++++++++++++++ .../linux/drm_memory_manager_tests.cpp | 23 ++++++++++++++++++- unit_tests/test_files/igdrcl.config | 1 + 10 files changed, 54 insertions(+), 16 deletions(-) diff --git a/runtime/device/device.h b/runtime/device/device.h index c228e83a4b..7889d30333 100644 --- a/runtime/device/device.h +++ b/runtime/device/device.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2017-2018 Intel Corporation + * Copyright (C) 2017-2019 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -116,7 +116,7 @@ class Device : public BaseObject<_cl_device_id> { ExecutionEnvironment *getExecutionEnvironment() const { return executionEnvironment; } const HardwareCapabilities &getHardwareCapabilities() const { return hardwareCapabilities; } uint32_t getDeviceIndex() const { return deviceIndex; } - bool isFullRangeSvm() { + bool isFullRangeSvm() const { return getHardwareInfo().capabilityTable.gpuAddressSpace == MemoryConstants::max48BitAddress; } diff --git a/runtime/memory_manager/memory_manager.cpp b/runtime/memory_manager/memory_manager.cpp index 8fa7a82ff4..a016346017 100644 --- a/runtime/memory_manager/memory_manager.cpp +++ b/runtime/memory_manager/memory_manager.cpp @@ -289,13 +289,11 @@ bool MemoryManager::getAllocationData(AllocationData &allocationData, const Allo GraphicsAllocation *MemoryManager::allocateGraphicsMemoryInPreferredPool(AllocationProperties properties, DevicesBitfield devicesBitfield, const void *hostPtr) { AllocationData allocationData; - AllocationStatus status = AllocationStatus::Error; - getAllocationData(allocationData, properties, devicesBitfield, hostPtr); UNRECOVERABLE_IF(allocationData.type == GraphicsAllocation::AllocationType::SHARED_RESOURCE); - GraphicsAllocation *allocation = nullptr; - allocation = allocateGraphicsMemoryInDevicePool(allocationData, status); + AllocationStatus status = AllocationStatus::Error; + GraphicsAllocation *allocation = allocateGraphicsMemoryInDevicePool(allocationData, status); if (!allocation && status == AllocationStatus::RetryInNonDevicePool) { allocation = allocateGraphicsMemory(allocationData); } diff --git a/runtime/memory_manager/memory_manager.h b/runtime/memory_manager/memory_manager.h index 34a6a45ab7..bc81e01674 100644 --- a/runtime/memory_manager/memory_manager.h +++ b/runtime/memory_manager/memory_manager.h @@ -107,7 +107,7 @@ class MemoryManager { } GraphicsAllocation *allocateGraphicsMemoryForHostPtr(size_t size, void *ptr, bool fullRangeSvm, bool requiresL3Flush) { - if (fullRangeSvm) { + if (fullRangeSvm && DebugManager.flags.EnableHostPtrTracking.get()) { return allocateGraphicsMemory({false, size, GraphicsAllocation::AllocationType::UNDECIDED}, ptr); } else { auto allocation = allocateGraphicsMemoryForNonSvmHostPtr(size, ptr); diff --git a/runtime/os_interface/debug_variables_base.inl b/runtime/os_interface/debug_variables_base.inl index ec033491a1..12dece8c5a 100644 --- a/runtime/os_interface/debug_variables_base.inl +++ b/runtime/os_interface/debug_variables_base.inl @@ -70,13 +70,14 @@ DECLARE_DEBUG_VARIABLE(bool, Force32bitAddressing, false, "Forces 32 bit address DECLARE_DEBUG_VARIABLE(bool, ForceCsrFlushing, false, "Forces flushing of command stream receiver") DECLARE_DEBUG_VARIABLE(bool, ForceCsrReprogramming, false, "Forces reprogramming of command stream receiver") DECLARE_DEBUG_VARIABLE(bool, DisableStatelessToStatefulOptimization, false, "Disables stateless to stateful optimization for buffers") -DECLARE_DEBUG_VARIABLE(bool, DisableConcurrentBlockExecution, 0, "disables concurrent block kernel execution") +DECLARE_DEBUG_VARIABLE(bool, DisableConcurrentBlockExecution, false, "disables concurrent block kernel execution") DECLARE_DEBUG_VARIABLE(bool, UseNewHeapAllocator, true, "Custom 4GB heap allocator is used") DECLARE_DEBUG_VARIABLE(bool, UseNoRingFlushesKmdMode, true, "Windows only, passes flag to KMD that informs KMD to not emit any ring buffer flushes.") DECLARE_DEBUG_VARIABLE(bool, DisableZeroCopyForUseHostPtr, false, "When active all buffer allocations created with CL_MEM_USE_HOST_PTR flag will not share memory with CPU.") DECLARE_DEBUG_VARIABLE(bool, DisableZeroCopyForBuffers, false, "When active all buffer allocations will not share memory with CPU.") -DECLARE_DEBUG_VARIABLE(bool, ForceResourceLockOnTransferCalls, 0, "Forces resource locking on memory transfer calls") -DECLARE_DEBUG_VARIABLE(bool, EnableMakeResidentOnMapGpuVa, 0, "Make allocations resident on call mapGpuVirtualAddress") +DECLARE_DEBUG_VARIABLE(bool, ForceResourceLockOnTransferCalls, false, "Forces resource locking on memory transfer calls") +DECLARE_DEBUG_VARIABLE(bool, EnableMakeResidentOnMapGpuVa, false, "Make allocations resident on call mapGpuVirtualAddress") +DECLARE_DEBUG_VARIABLE(bool, EnableHostPtrTracking, true, "Enable host ptr tracking") /*FEATURE FLAGS*/ DECLARE_DEBUG_VARIABLE(bool, EnableNV12, true, "Enables NV12 extension") diff --git a/runtime/os_interface/linux/drm_memory_manager.cpp b/runtime/os_interface/linux/drm_memory_manager.cpp index 5b1687ee7b..bb1ed239f9 100644 --- a/runtime/os_interface/linux/drm_memory_manager.cpp +++ b/runtime/os_interface/linux/drm_memory_manager.cpp @@ -73,7 +73,7 @@ DrmMemoryManager::~DrmMemoryManager() { } void DrmMemoryManager::initInternalRangeAllocator(size_t gpuRange) { - if (gpuRange < MemoryConstants::max48BitAddress) { + if (gpuRange < MemoryConstants::max48BitAddress || !DebugManager.flags.EnableHostPtrTracking.get()) { // set the allocator with the whole reduced address space range this->limitedGpuAddressRangeAllocator.reset(new AllocatorLimitedRange(0, gpuRange)); diff --git a/runtime/os_interface/windows/wddm_memory_manager.cpp b/runtime/os_interface/windows/wddm_memory_manager.cpp index cdbf0d4bdc..88d9b908c9 100644 --- a/runtime/os_interface/windows/wddm_memory_manager.cpp +++ b/runtime/os_interface/windows/wddm_memory_manager.cpp @@ -416,7 +416,7 @@ void WddmMemoryManager::cleanOsHandles(OsHandleStorage &handleStorage) { } } -void WddmMemoryManager::obtainGpuAddresFromFragments(WddmAllocation *allocation, OsHandleStorage &handleStorage) { +void WddmMemoryManager::obtainGpuAddressFromFragments(WddmAllocation *allocation, OsHandleStorage &handleStorage) { if (this->force32bitAllocations && (handleStorage.fragmentCount > 0)) { auto hostPtr = allocation->getUnderlyingBuffer(); auto fragment = hostPtrManager->getFragment(hostPtr); @@ -436,7 +436,7 @@ void WddmMemoryManager::obtainGpuAddresFromFragments(WddmAllocation *allocation, GraphicsAllocation *WddmMemoryManager::createGraphicsAllocation(OsHandleStorage &handleStorage, size_t hostPtrSize, const void *hostPtr) { auto allocation = new WddmAllocation(const_cast(hostPtr), hostPtrSize, nullptr, MemoryPool::System4KBPages, getOsContextCount(), false); allocation->fragmentsStorage = handleStorage; - obtainGpuAddresFromFragments(allocation, handleStorage); + obtainGpuAddressFromFragments(allocation, handleStorage); return allocation; } diff --git a/runtime/os_interface/windows/wddm_memory_manager.h b/runtime/os_interface/windows/wddm_memory_manager.h index be8d3e51f6..df7f5103af 100644 --- a/runtime/os_interface/windows/wddm_memory_manager.h +++ b/runtime/os_interface/windows/wddm_memory_manager.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2017-2018 Intel Corporation + * Copyright (C) 2017-2019 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -52,7 +52,7 @@ class WddmMemoryManager : public MemoryManager { OsContext *getRegisteredOsContext(uint32_t osContextId) { return registeredOsContexts[osContextId]; } - void obtainGpuAddresFromFragments(WddmAllocation *allocation, OsHandleStorage &handleStorage); + void obtainGpuAddressFromFragments(WddmAllocation *allocation, OsHandleStorage &handleStorage); GraphicsAllocation *createGraphicsAllocation(OsHandleStorage &handleStorage, size_t hostPtrSize, const void *hostPtr) override; diff --git a/unit_tests/memory_manager/memory_manager_tests.cpp b/unit_tests/memory_manager/memory_manager_tests.cpp index 339bbcfb9c..65cbaae3b0 100644 --- a/unit_tests/memory_manager/memory_manager_tests.cpp +++ b/unit_tests/memory_manager/memory_manager_tests.cpp @@ -1076,6 +1076,23 @@ TEST_P(OsAgnosticMemoryManagerWithParams, givenFullGpuAddressSpaceWhenAllocateGr memoryManager.freeGraphicsMemory(allocation); } +TEST_P(OsAgnosticMemoryManagerWithParams, givenDisabledHostPtrTrackingWhenAllocateGraphicsMemoryForHostPtrIsCalledThenAllocationWithoutFragmentsIsCreated) { + DebugManagerStateRestore restore; + DebugManager.flags.EnableHostPtrTracking.set(false); + + bool requiresL3Flush = GetParam(); + ExecutionEnvironment executionEnvironment; + OsAgnosticMemoryManager memoryManager(false, false, executionEnvironment); + auto hostPtr = reinterpret_cast(0x5001); + + auto allocation = memoryManager.allocateGraphicsMemoryForHostPtr(13, hostPtr, true, requiresL3Flush); + EXPECT_NE(nullptr, allocation); + EXPECT_EQ(0u, allocation->fragmentsStorage.fragmentCount); + EXPECT_EQ(requiresL3Flush, allocation->flushL3Required); + + memoryManager.freeGraphicsMemory(allocation); +} + INSTANTIATE_TEST_CASE_P(OsAgnosticMemoryManagerWithParams, OsAgnosticMemoryManagerWithParams, ::testing::Values(false, true)); diff --git a/unit_tests/os_interface/linux/drm_memory_manager_tests.cpp b/unit_tests/os_interface/linux/drm_memory_manager_tests.cpp index 4db602a37a..cd80daa464 100644 --- a/unit_tests/os_interface/linux/drm_memory_manager_tests.cpp +++ b/unit_tests/os_interface/linux/drm_memory_manager_tests.cpp @@ -16,6 +16,7 @@ #include "runtime/mem_obj/buffer.h" #include "runtime/mem_obj/image.h" #include "runtime/memory_manager/host_ptr_manager.h" +#include "runtime/memory_manager/memory_constants.h" #include "runtime/os_interface/linux/allocator_helper.h" #include "runtime/os_interface/linux/drm_allocation.h" #include "runtime/os_interface/linux/drm_buffer_object.h" @@ -2153,7 +2154,7 @@ TEST_F(DrmMemoryManagerTest, givenLimitedRangeAllocatorWhenAskedForExternalAlloc memoryManager->freeGraphicsMemory(drmAllocation); } -TEST_F(DrmMemoryManagerTest, givenLimitedRangeAllocatorWhenAskedForInternalAllocationWithNoPointerandHugeBufferSizeThenAllocationFromInternalHeapFailed) { +TEST_F(DrmMemoryManagerTest, givenLimitedRangeAllocatorWhenAskedForInternalAllocationWithNoPointerAndHugeBufferSizeThenAllocationFromInternalHeapFailed) { memoryManager->forceLimitedRangeAllocator(0xFFFFFFFFF); auto bufferSize = 128 * MemoryConstants::megaByte + 4 * MemoryConstants::pageSize; @@ -3043,3 +3044,23 @@ TEST_F(DrmMemoryManagerTest, givenLimitedRangeAllocatorWithZeroBaseAndSizeWhenAs EXPECT_EQ(base, allocator.getBase()); } + +TEST_F(DrmMemoryManagerTest, givenDisabledHostPtrTrackingWhenAllocateGraphicsMemoryForNonSvmHostPtrIsCalledWithNotAlignedPtrIsPassedThenAllocationIsCreated) { + DebugManagerStateRestore restore; + DebugManager.flags.EnableHostPtrTracking.set(false); + + ExecutionEnvironment executionEnvironment; + std::unique_ptr memoryManager(new (std::nothrow) TestedDrmMemoryManager(Drm::get(0), false, false, executionEnvironment)); + + memoryManager->forceLimitedRangeAllocator(MemoryConstants::max48BitAddress); + + void *hostPtr = reinterpret_cast(0x5001); + auto allocation = memoryManager->allocateGraphicsMemoryForNonSvmHostPtr(13, hostPtr); + + EXPECT_NE(nullptr, allocation); + EXPECT_EQ(reinterpret_cast(0x5001), allocation->getUnderlyingBuffer()); + EXPECT_EQ(13u, allocation->getUnderlyingBufferSize()); + EXPECT_EQ(1u, allocation->allocationOffset); + + memoryManager->freeGraphicsMemory(allocation); +} diff --git a/unit_tests/test_files/igdrcl.config b/unit_tests/test_files/igdrcl.config index a93769f65b..1f2c0c7dcb 100644 --- a/unit_tests/test_files/igdrcl.config +++ b/unit_tests/test_files/igdrcl.config @@ -103,3 +103,4 @@ RenderCompressedImagesEnabled = -1 RenderCompressedBuffersEnabled = -1 AUBDumpForceAllToLocalMemory = 0 EnableCacheFlushAfterWalker = 0 +EnableHostPtrTracking = 1