Make CPU copy for read buffer when host ptr is write combined on DG2

With this commit on DG2 32bit driver will check if passed host ptr for
clEnqueueReadBuffer is write combined memory. If check will be true copy
will be make on CPU.

Signed-off-by: Maciej Plewka <maciej.plewka@intel.com>
This commit is contained in:
Maciej Plewka
2022-06-13 14:13:34 +00:00
committed by Compute-Runtime-Automation
parent cdfe2ce8ad
commit 213dc2fe24
13 changed files with 92 additions and 2 deletions

View File

@@ -836,6 +836,12 @@ size_t CommandQueue::estimateTimestampPacketNodesCount(const MultiDispatchInfo &
bool CommandQueue::bufferCpuCopyAllowed(Buffer *buffer, cl_command_type commandType, cl_bool blocking, size_t size, void *ptr, bool CommandQueue::bufferCpuCopyAllowed(Buffer *buffer, cl_command_type commandType, cl_bool blocking, size_t size, void *ptr,
cl_uint numEventsInWaitList, const cl_event *eventWaitList) { cl_uint numEventsInWaitList, const cl_event *eventWaitList) {
const auto &hwInfo = device->getHardwareInfo();
const auto &hwInfoConfig = HwInfoConfig::get(hwInfo.platform.eProductFamily);
if (CL_COMMAND_READ_BUFFER == commandType && hwInfoConfig->isCpuCopyNecessary(ptr, buffer->getMemoryManager())) {
return true;
}
auto debugVariableSet = false; auto debugVariableSet = false;
// Requested by debug variable or allowed by Buffer // Requested by debug variable or allowed by Buffer
if (CL_COMMAND_READ_BUFFER == commandType && DebugManager.flags.DoCpuCopyOnReadBuffer.get() != -1) { if (CL_COMMAND_READ_BUFFER == commandType && DebugManager.flags.DoCpuCopyOnReadBuffer.get() != -1) {

View File

@@ -38,5 +38,11 @@ void HwHelperHw<Family>::setExtraAllocationData(AllocationData &allocationData,
allocationData.storageInfo.isLockable = false; allocationData.storageInfo.isLockable = false;
} }
} }
if (HwInfoConfig::get(hwInfo.platform.eProductFamily)->isStorageInfoAdjustmentRequired()) {
if (properties.allocationType == AllocationType::BUFFER && !properties.flags.preferCompressed && !properties.flags.shareable) {
allocationData.storageInfo.isLockable = true;
}
}
} }
} // namespace NEO } // namespace NEO

View File

@@ -213,6 +213,7 @@ class MemoryManager {
static uint32_t maxOsContextCount; static uint32_t maxOsContextCount;
virtual void commonCleanup(){}; virtual void commonCleanup(){};
virtual bool isCpuCopyRequired(const void *ptr) { return false; } virtual bool isCpuCopyRequired(const void *ptr) { return false; }
virtual bool isWCMemory(const void *ptr) { return false; }
virtual void registerSysMemAlloc(GraphicsAllocation *allocation){}; virtual void registerSysMemAlloc(GraphicsAllocation *allocation){};
virtual void registerLocalMemAlloc(GraphicsAllocation *allocation, uint32_t rootDeviceIndex){}; virtual void registerLocalMemAlloc(GraphicsAllocation *allocation, uint32_t rootDeviceIndex){};

View File

@@ -23,6 +23,7 @@ struct PipelineSelectArgs;
class OSInterface; class OSInterface;
class HwInfoConfig; class HwInfoConfig;
class GraphicsAllocation; class GraphicsAllocation;
class MemoryManager;
enum class DriverModelType; enum class DriverModelType;
extern HwInfoConfig *hwInfoConfigFactory[IGFX_MAX_PRODUCT]; extern HwInfoConfig *hwInfoConfigFactory[IGFX_MAX_PRODUCT];
@@ -113,6 +114,7 @@ class HwInfoConfig {
virtual bool isBcsReportWaRequired(const HardwareInfo &hwInfo) const = 0; virtual bool isBcsReportWaRequired(const HardwareInfo &hwInfo) const = 0;
virtual bool isBlitCopyRequiredForLocalMemory(const HardwareInfo &hwInfo, const GraphicsAllocation &allocation) const = 0; virtual bool isBlitCopyRequiredForLocalMemory(const HardwareInfo &hwInfo, const GraphicsAllocation &allocation) const = 0;
virtual bool isImplicitScalingSupported(const HardwareInfo &hwInfo) const = 0; virtual bool isImplicitScalingSupported(const HardwareInfo &hwInfo) const = 0;
virtual bool isCpuCopyNecessary(const void *ptr, MemoryManager *memoryManager) const = 0;
MOCKABLE_VIRTUAL ~HwInfoConfig() = default; MOCKABLE_VIRTUAL ~HwInfoConfig() = default;
@@ -208,6 +210,7 @@ class HwInfoConfigHw : public HwInfoConfig {
bool isBcsReportWaRequired(const HardwareInfo &hwInfo) const override; bool isBcsReportWaRequired(const HardwareInfo &hwInfo) const override;
bool isBlitCopyRequiredForLocalMemory(const HardwareInfo &hwInfo, const GraphicsAllocation &allocation) const override; bool isBlitCopyRequiredForLocalMemory(const HardwareInfo &hwInfo, const GraphicsAllocation &allocation) const override;
bool isImplicitScalingSupported(const HardwareInfo &hwInfo) const override; bool isImplicitScalingSupported(const HardwareInfo &hwInfo) const override;
bool isCpuCopyNecessary(const void *ptr, MemoryManager *memoryManager) const override;
protected: protected:
HwInfoConfigHw() = default; HwInfoConfigHw() = default;

View File

@@ -12,6 +12,7 @@
#include "shared/source/helpers/hw_helper.h" #include "shared/source/helpers/hw_helper.h"
#include "shared/source/helpers/preamble.h" #include "shared/source/helpers/preamble.h"
#include "shared/source/memory_manager/graphics_allocation.h" #include "shared/source/memory_manager/graphics_allocation.h"
#include "shared/source/memory_manager/memory_manager.h"
#include "shared/source/os_interface/hw_info_config.h" #include "shared/source/os_interface/hw_info_config.h"
namespace NEO { namespace NEO {
@@ -413,4 +414,8 @@ bool HwInfoConfigHw<gfxProduct>::isImplicitScalingSupported(const HardwareInfo &
return false; return false;
} }
template <PRODUCT_FAMILY gfxProduct>
bool HwInfoConfigHw<gfxProduct>::isCpuCopyNecessary(const void *ptr, MemoryManager *memoryManager) const {
return false;
}
} // namespace NEO } // namespace NEO

View File

@@ -93,6 +93,7 @@ set(NEO_CORE_OS_INTERFACE_WDDM
${CMAKE_CURRENT_SOURCE_DIR}/wddm/wddm.h ${CMAKE_CURRENT_SOURCE_DIR}/wddm/wddm.h
${CMAKE_CURRENT_SOURCE_DIR}/wddm/wddm_defs.h ${CMAKE_CURRENT_SOURCE_DIR}/wddm/wddm_defs.h
${CMAKE_CURRENT_SOURCE_DIR}/wddm/wddm_residency_logger.h ${CMAKE_CURRENT_SOURCE_DIR}/wddm/wddm_residency_logger.h
${CMAKE_CURRENT_SOURCE_DIR}/wddm_memory_manager_${DRIVER_MODEL}.cpp
${CMAKE_CURRENT_SOURCE_DIR}/wddm_memory_manager.cpp ${CMAKE_CURRENT_SOURCE_DIR}/wddm_memory_manager.cpp
${CMAKE_CURRENT_SOURCE_DIR}/wddm_memory_manager.h ${CMAKE_CURRENT_SOURCE_DIR}/wddm_memory_manager.h
${CMAKE_CURRENT_SOURCE_DIR}/wddm/um_km_data_translator.cpp ${CMAKE_CURRENT_SOURCE_DIR}/wddm/um_km_data_translator.cpp

View File

@@ -886,7 +886,7 @@ bool WddmMemoryManager::isCpuCopyRequired(const void *ptr) {
} }
// dummy read // dummy read
//cacheable = *localVariablePointer; // cacheable = *localVariablePointer;
_mm_lfence(); _mm_lfence();
timestamp0 = __rdtsc(); timestamp0 = __rdtsc();
@@ -904,7 +904,7 @@ bool WddmMemoryManager::isCpuCopyRequired(const void *ptr) {
fastestLocalRead = localVariableReadDelta; fastestLocalRead = localVariableReadDelta;
} }
// dummy read // dummy read
//cacheable = *volatileInputPtr; // cacheable = *volatileInputPtr;
_mm_lfence(); _mm_lfence();
timestamp0 = __rdtsc(); timestamp0 = __rdtsc();

View File

@@ -71,6 +71,7 @@ class WddmMemoryManager : public MemoryManager {
void *reserveCpuAddressRange(size_t size, uint32_t rootDeviceIndex) override; void *reserveCpuAddressRange(size_t size, uint32_t rootDeviceIndex) override;
void releaseReservedCpuAddressRange(void *reserved, size_t size, uint32_t rootDeviceIndex) override; void releaseReservedCpuAddressRange(void *reserved, size_t size, uint32_t rootDeviceIndex) override;
bool isCpuCopyRequired(const void *ptr) override; bool isCpuCopyRequired(const void *ptr) override;
bool isWCMemory(const void *ptr) override;
AddressRange reserveGpuAddress(size_t size, uint32_t rootDeviceIndex) override { return AddressRange{0, 0}; }; AddressRange reserveGpuAddress(size_t size, uint32_t rootDeviceIndex) override { return AddressRange{0, 0}; };
void freeGpuAddress(AddressRange addressRange, uint32_t rootDeviceIndex) override{}; void freeGpuAddress(AddressRange addressRange, uint32_t rootDeviceIndex) override{};

View File

@@ -0,0 +1,14 @@
/*
* Copyright (C) 2022 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include "shared/source/os_interface/windows/wddm_memory_manager.h"
namespace NEO {
bool WddmMemoryManager::isWCMemory(const void *ptr) {
return false;
}
} // namespace NEO

View File

@@ -0,0 +1,20 @@
/*
* Copyright (C) 2022 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include "shared/source/os_interface/windows/wddm_memory_manager.h"
#include <winnt.h>
namespace NEO {
constexpr uint32_t pageWriteCombine = 0x400;
bool WddmMemoryManager::isWCMemory(const void *ptr) {
MEMORY_BASIC_INFORMATION info;
VirtualQuery(ptr, &info, sizeof(info));
return info.AllocationProtect & pageWriteCombine;
}
} // namespace NEO

View File

@@ -5,6 +5,8 @@
* *
*/ */
#include "shared/source/memory_manager/memory_manager.h"
template <> template <>
void HwInfoConfigHw<gfxProduct>::adjustSamplerState(void *sampler, const HardwareInfo &hwInfo) { void HwInfoConfigHw<gfxProduct>::adjustSamplerState(void *sampler, const HardwareInfo &hwInfo) {
using SAMPLER_STATE = typename XE_HPG_COREFamily::SAMPLER_STATE; using SAMPLER_STATE = typename XE_HPG_COREFamily::SAMPLER_STATE;
@@ -180,3 +182,24 @@ template <>
bool HwInfoConfigHw<gfxProduct>::isTimestampWaitSupportedForEvents() const { bool HwInfoConfigHw<gfxProduct>::isTimestampWaitSupportedForEvents() const {
return true; return true;
} }
template <>
bool HwInfoConfigHw<gfxProduct>::isCpuCopyNecessary(const void *ptr, MemoryManager *memoryManager) const {
if (memoryManager) {
if constexpr (is32bit) {
return memoryManager->isWCMemory(ptr);
} else {
return false;
}
} else {
return false;
}
}
template <>
bool HwInfoConfigHw<gfxProduct>::isStorageInfoAdjustmentRequired() const {
if constexpr (is32bit) {
return true;
} else {
return false;
}
}

View File

@@ -17,3 +17,4 @@ HWTEST_EXCLUDE_PRODUCT(HwInfoConfigTest, givenHwInfoConfigWhenAskedIfPipeControl
HWTEST_EXCLUDE_PRODUCT(HwInfoConfigTest, givenHwInfoConfigWhenAskedIfTile64With3DSurfaceOnBCSIsSupportedThenTrueIsReturned, IGFX_DG2); HWTEST_EXCLUDE_PRODUCT(HwInfoConfigTest, givenHwInfoConfigWhenAskedIfTile64With3DSurfaceOnBCSIsSupportedThenTrueIsReturned, IGFX_DG2);
HWTEST_EXCLUDE_PRODUCT(HwInfoConfigTest, WhenAllowRenderCompressionIsCalledThenTrueIsReturned, IGFX_DG2); HWTEST_EXCLUDE_PRODUCT(HwInfoConfigTest, WhenAllowRenderCompressionIsCalledThenTrueIsReturned, IGFX_DG2);
HWTEST_EXCLUDE_PRODUCT(HwInfoConfigTest, whenConvertingTimestampsToCsDomainThenNothingIsChanged, IGFX_DG2); HWTEST_EXCLUDE_PRODUCT(HwInfoConfigTest, whenConvertingTimestampsToCsDomainThenNothingIsChanged, IGFX_DG2);
HWTEST_EXCLUDE_PRODUCT(HwInfoConfigTest, givenHwInfoConfigWhenAskedIfStorageInfoAdjustmentIsRequiredThenFalseIsReturned, IGFX_DG2);

View File

@@ -450,3 +450,12 @@ DG2TEST_F(ProductConfigTests, givenDg2G11DeviceIdWhenDifferentRevisionIsPassedTh
EXPECT_EQ(productConfig, DG2_G11); EXPECT_EQ(productConfig, DG2_G11);
} }
} }
DG2TEST_F(HwInfoConfigTestDg2, givenHwInfoConfigWhenAskedIfStorageInfoAdjustmentIsRequiredThenTrueIsReturned) {
auto hwInfoConfig = HwInfoConfig::get(defaultHwInfo->platform.eProductFamily);
if constexpr (is32bit) {
EXPECT_TRUE(hwInfoConfig->isStorageInfoAdjustmentRequired());
} else {
EXPECT_FALSE(hwInfoConfig->isStorageInfoAdjustmentRequired());
}
}