From 213dc2fe2417f9a45b9ecbc1e08976d21697f896 Mon Sep 17 00:00:00 2001 From: Maciej Plewka Date: Mon, 13 Jun 2022 14:13:34 +0000 Subject: [PATCH] Make CPU copy for read buffer when host ptr is write combined on DG2 With this commit on DG2 32bit driver will check if passed host ptr for clEnqueueReadBuffer is write combined memory. If check will be true copy will be make on CPU. Signed-off-by: Maciej Plewka --- opencl/source/command_queue/command_queue.cpp | 6 +++++ .../extra_allocation_data_xehp_and_later.inl | 6 +++++ shared/source/memory_manager/memory_manager.h | 1 + shared/source/os_interface/hw_info_config.h | 3 +++ shared/source/os_interface/hw_info_config.inl | 5 ++++ .../os_interface/windows/CMakeLists.txt | 1 + .../windows/wddm_memory_manager.cpp | 4 ++-- .../windows/wddm_memory_manager.h | 1 + .../wddm_memory_manager_drm_or_wddm.cpp | 14 +++++++++++ .../windows/wddm_memory_manager_wddm.cpp | 20 ++++++++++++++++ .../dg2/os_agnostic_hw_info_config_dg2.inl | 23 +++++++++++++++++++ .../dg2/excludes_xe_hpg_core_dg2.cpp | 1 + .../dg2/hw_info_config_tests_dg2.cpp | 9 ++++++++ 13 files changed, 92 insertions(+), 2 deletions(-) create mode 100644 shared/source/os_interface/windows/wddm_memory_manager_drm_or_wddm.cpp create mode 100644 shared/source/os_interface/windows/wddm_memory_manager_wddm.cpp diff --git a/opencl/source/command_queue/command_queue.cpp b/opencl/source/command_queue/command_queue.cpp index dfd5885b65..4fa2736cd3 100644 --- a/opencl/source/command_queue/command_queue.cpp +++ b/opencl/source/command_queue/command_queue.cpp @@ -836,6 +836,12 @@ size_t CommandQueue::estimateTimestampPacketNodesCount(const MultiDispatchInfo & bool CommandQueue::bufferCpuCopyAllowed(Buffer *buffer, cl_command_type commandType, cl_bool blocking, size_t size, void *ptr, cl_uint numEventsInWaitList, const cl_event *eventWaitList) { + const auto &hwInfo = device->getHardwareInfo(); + const auto &hwInfoConfig = HwInfoConfig::get(hwInfo.platform.eProductFamily); + if (CL_COMMAND_READ_BUFFER == commandType && hwInfoConfig->isCpuCopyNecessary(ptr, buffer->getMemoryManager())) { + return true; + } + auto debugVariableSet = false; // Requested by debug variable or allowed by Buffer if (CL_COMMAND_READ_BUFFER == commandType && DebugManager.flags.DoCpuCopyOnReadBuffer.get() != -1) { diff --git a/shared/source/helpers/extra_allocation_data_xehp_and_later.inl b/shared/source/helpers/extra_allocation_data_xehp_and_later.inl index f748f3d569..d08c09162f 100644 --- a/shared/source/helpers/extra_allocation_data_xehp_and_later.inl +++ b/shared/source/helpers/extra_allocation_data_xehp_and_later.inl @@ -38,5 +38,11 @@ void HwHelperHw::setExtraAllocationData(AllocationData &allocationData, allocationData.storageInfo.isLockable = false; } } + + if (HwInfoConfig::get(hwInfo.platform.eProductFamily)->isStorageInfoAdjustmentRequired()) { + if (properties.allocationType == AllocationType::BUFFER && !properties.flags.preferCompressed && !properties.flags.shareable) { + allocationData.storageInfo.isLockable = true; + } + } } } // namespace NEO diff --git a/shared/source/memory_manager/memory_manager.h b/shared/source/memory_manager/memory_manager.h index 7e1c4a59ac..00f028c389 100644 --- a/shared/source/memory_manager/memory_manager.h +++ b/shared/source/memory_manager/memory_manager.h @@ -213,6 +213,7 @@ class MemoryManager { static uint32_t maxOsContextCount; virtual void commonCleanup(){}; virtual bool isCpuCopyRequired(const void *ptr) { return false; } + virtual bool isWCMemory(const void *ptr) { return false; } virtual void registerSysMemAlloc(GraphicsAllocation *allocation){}; virtual void registerLocalMemAlloc(GraphicsAllocation *allocation, uint32_t rootDeviceIndex){}; diff --git a/shared/source/os_interface/hw_info_config.h b/shared/source/os_interface/hw_info_config.h index e260c944a9..f7ed60f16f 100644 --- a/shared/source/os_interface/hw_info_config.h +++ b/shared/source/os_interface/hw_info_config.h @@ -23,6 +23,7 @@ struct PipelineSelectArgs; class OSInterface; class HwInfoConfig; class GraphicsAllocation; +class MemoryManager; enum class DriverModelType; extern HwInfoConfig *hwInfoConfigFactory[IGFX_MAX_PRODUCT]; @@ -113,6 +114,7 @@ class HwInfoConfig { virtual bool isBcsReportWaRequired(const HardwareInfo &hwInfo) const = 0; virtual bool isBlitCopyRequiredForLocalMemory(const HardwareInfo &hwInfo, const GraphicsAllocation &allocation) const = 0; virtual bool isImplicitScalingSupported(const HardwareInfo &hwInfo) const = 0; + virtual bool isCpuCopyNecessary(const void *ptr, MemoryManager *memoryManager) const = 0; MOCKABLE_VIRTUAL ~HwInfoConfig() = default; @@ -208,6 +210,7 @@ class HwInfoConfigHw : public HwInfoConfig { bool isBcsReportWaRequired(const HardwareInfo &hwInfo) const override; bool isBlitCopyRequiredForLocalMemory(const HardwareInfo &hwInfo, const GraphicsAllocation &allocation) const override; bool isImplicitScalingSupported(const HardwareInfo &hwInfo) const override; + bool isCpuCopyNecessary(const void *ptr, MemoryManager *memoryManager) const override; protected: HwInfoConfigHw() = default; diff --git a/shared/source/os_interface/hw_info_config.inl b/shared/source/os_interface/hw_info_config.inl index 9b1eec53cc..e5d59266e3 100644 --- a/shared/source/os_interface/hw_info_config.inl +++ b/shared/source/os_interface/hw_info_config.inl @@ -12,6 +12,7 @@ #include "shared/source/helpers/hw_helper.h" #include "shared/source/helpers/preamble.h" #include "shared/source/memory_manager/graphics_allocation.h" +#include "shared/source/memory_manager/memory_manager.h" #include "shared/source/os_interface/hw_info_config.h" namespace NEO { @@ -413,4 +414,8 @@ bool HwInfoConfigHw::isImplicitScalingSupported(const HardwareInfo & return false; } +template +bool HwInfoConfigHw::isCpuCopyNecessary(const void *ptr, MemoryManager *memoryManager) const { + return false; +} } // namespace NEO diff --git a/shared/source/os_interface/windows/CMakeLists.txt b/shared/source/os_interface/windows/CMakeLists.txt index 108a8f1d37..b231b9fba8 100644 --- a/shared/source/os_interface/windows/CMakeLists.txt +++ b/shared/source/os_interface/windows/CMakeLists.txt @@ -93,6 +93,7 @@ set(NEO_CORE_OS_INTERFACE_WDDM ${CMAKE_CURRENT_SOURCE_DIR}/wddm/wddm.h ${CMAKE_CURRENT_SOURCE_DIR}/wddm/wddm_defs.h ${CMAKE_CURRENT_SOURCE_DIR}/wddm/wddm_residency_logger.h + ${CMAKE_CURRENT_SOURCE_DIR}/wddm_memory_manager_${DRIVER_MODEL}.cpp ${CMAKE_CURRENT_SOURCE_DIR}/wddm_memory_manager.cpp ${CMAKE_CURRENT_SOURCE_DIR}/wddm_memory_manager.h ${CMAKE_CURRENT_SOURCE_DIR}/wddm/um_km_data_translator.cpp diff --git a/shared/source/os_interface/windows/wddm_memory_manager.cpp b/shared/source/os_interface/windows/wddm_memory_manager.cpp index 0596298af8..827d785950 100644 --- a/shared/source/os_interface/windows/wddm_memory_manager.cpp +++ b/shared/source/os_interface/windows/wddm_memory_manager.cpp @@ -886,7 +886,7 @@ bool WddmMemoryManager::isCpuCopyRequired(const void *ptr) { } // dummy read - //cacheable = *localVariablePointer; + // cacheable = *localVariablePointer; _mm_lfence(); timestamp0 = __rdtsc(); @@ -904,7 +904,7 @@ bool WddmMemoryManager::isCpuCopyRequired(const void *ptr) { fastestLocalRead = localVariableReadDelta; } // dummy read - //cacheable = *volatileInputPtr; + // cacheable = *volatileInputPtr; _mm_lfence(); timestamp0 = __rdtsc(); diff --git a/shared/source/os_interface/windows/wddm_memory_manager.h b/shared/source/os_interface/windows/wddm_memory_manager.h index 765634bd17..d7e88ac3b2 100644 --- a/shared/source/os_interface/windows/wddm_memory_manager.h +++ b/shared/source/os_interface/windows/wddm_memory_manager.h @@ -71,6 +71,7 @@ class WddmMemoryManager : public MemoryManager { void *reserveCpuAddressRange(size_t size, uint32_t rootDeviceIndex) override; void releaseReservedCpuAddressRange(void *reserved, size_t size, uint32_t rootDeviceIndex) override; bool isCpuCopyRequired(const void *ptr) override; + bool isWCMemory(const void *ptr) override; AddressRange reserveGpuAddress(size_t size, uint32_t rootDeviceIndex) override { return AddressRange{0, 0}; }; void freeGpuAddress(AddressRange addressRange, uint32_t rootDeviceIndex) override{}; diff --git a/shared/source/os_interface/windows/wddm_memory_manager_drm_or_wddm.cpp b/shared/source/os_interface/windows/wddm_memory_manager_drm_or_wddm.cpp new file mode 100644 index 0000000000..94b13f3167 --- /dev/null +++ b/shared/source/os_interface/windows/wddm_memory_manager_drm_or_wddm.cpp @@ -0,0 +1,14 @@ +/* + * Copyright (C) 2022 Intel Corporation + * + * SPDX-License-Identifier: MIT + * + */ + +#include "shared/source/os_interface/windows/wddm_memory_manager.h" + +namespace NEO { +bool WddmMemoryManager::isWCMemory(const void *ptr) { + return false; +} +} // namespace NEO \ No newline at end of file diff --git a/shared/source/os_interface/windows/wddm_memory_manager_wddm.cpp b/shared/source/os_interface/windows/wddm_memory_manager_wddm.cpp new file mode 100644 index 0000000000..ccf2879be0 --- /dev/null +++ b/shared/source/os_interface/windows/wddm_memory_manager_wddm.cpp @@ -0,0 +1,20 @@ +/* + * Copyright (C) 2022 Intel Corporation + * + * SPDX-License-Identifier: MIT + * + */ + +#include "shared/source/os_interface/windows/wddm_memory_manager.h" + +#include + +namespace NEO { +constexpr uint32_t pageWriteCombine = 0x400; + +bool WddmMemoryManager::isWCMemory(const void *ptr) { + MEMORY_BASIC_INFORMATION info; + VirtualQuery(ptr, &info, sizeof(info)); + return info.AllocationProtect & pageWriteCombine; +} +} // namespace NEO \ No newline at end of file diff --git a/shared/source/xe_hpg_core/dg2/os_agnostic_hw_info_config_dg2.inl b/shared/source/xe_hpg_core/dg2/os_agnostic_hw_info_config_dg2.inl index 45f79c1712..9fa0b79a36 100644 --- a/shared/source/xe_hpg_core/dg2/os_agnostic_hw_info_config_dg2.inl +++ b/shared/source/xe_hpg_core/dg2/os_agnostic_hw_info_config_dg2.inl @@ -5,6 +5,8 @@ * */ +#include "shared/source/memory_manager/memory_manager.h" + template <> void HwInfoConfigHw::adjustSamplerState(void *sampler, const HardwareInfo &hwInfo) { using SAMPLER_STATE = typename XE_HPG_COREFamily::SAMPLER_STATE; @@ -180,3 +182,24 @@ template <> bool HwInfoConfigHw::isTimestampWaitSupportedForEvents() const { return true; } + +template <> +bool HwInfoConfigHw::isCpuCopyNecessary(const void *ptr, MemoryManager *memoryManager) const { + if (memoryManager) { + if constexpr (is32bit) { + return memoryManager->isWCMemory(ptr); + } else { + return false; + } + } else { + return false; + } +} +template <> +bool HwInfoConfigHw::isStorageInfoAdjustmentRequired() const { + if constexpr (is32bit) { + return true; + } else { + return false; + } +} diff --git a/shared/test/unit_test/xe_hpg_core/dg2/excludes_xe_hpg_core_dg2.cpp b/shared/test/unit_test/xe_hpg_core/dg2/excludes_xe_hpg_core_dg2.cpp index 39f7ed09a4..4a06a2162d 100644 --- a/shared/test/unit_test/xe_hpg_core/dg2/excludes_xe_hpg_core_dg2.cpp +++ b/shared/test/unit_test/xe_hpg_core/dg2/excludes_xe_hpg_core_dg2.cpp @@ -17,3 +17,4 @@ HWTEST_EXCLUDE_PRODUCT(HwInfoConfigTest, givenHwInfoConfigWhenAskedIfPipeControl HWTEST_EXCLUDE_PRODUCT(HwInfoConfigTest, givenHwInfoConfigWhenAskedIfTile64With3DSurfaceOnBCSIsSupportedThenTrueIsReturned, IGFX_DG2); HWTEST_EXCLUDE_PRODUCT(HwInfoConfigTest, WhenAllowRenderCompressionIsCalledThenTrueIsReturned, IGFX_DG2); HWTEST_EXCLUDE_PRODUCT(HwInfoConfigTest, whenConvertingTimestampsToCsDomainThenNothingIsChanged, IGFX_DG2); +HWTEST_EXCLUDE_PRODUCT(HwInfoConfigTest, givenHwInfoConfigWhenAskedIfStorageInfoAdjustmentIsRequiredThenFalseIsReturned, IGFX_DG2); diff --git a/shared/test/unit_test/xe_hpg_core/dg2/hw_info_config_tests_dg2.cpp b/shared/test/unit_test/xe_hpg_core/dg2/hw_info_config_tests_dg2.cpp index cc7f98fcf6..cecaee0427 100644 --- a/shared/test/unit_test/xe_hpg_core/dg2/hw_info_config_tests_dg2.cpp +++ b/shared/test/unit_test/xe_hpg_core/dg2/hw_info_config_tests_dg2.cpp @@ -450,3 +450,12 @@ DG2TEST_F(ProductConfigTests, givenDg2G11DeviceIdWhenDifferentRevisionIsPassedTh EXPECT_EQ(productConfig, DG2_G11); } } + +DG2TEST_F(HwInfoConfigTestDg2, givenHwInfoConfigWhenAskedIfStorageInfoAdjustmentIsRequiredThenTrueIsReturned) { + auto hwInfoConfig = HwInfoConfig::get(defaultHwInfo->platform.eProductFamily); + if constexpr (is32bit) { + EXPECT_TRUE(hwInfoConfig->isStorageInfoAdjustmentRequired()); + } else { + EXPECT_FALSE(hwInfoConfig->isStorageInfoAdjustmentRequired()); + } +} \ No newline at end of file