mirror of
https://github.com/intel/compute-runtime.git
synced 2025-12-21 09:14:47 +08:00
Make CPU copy for read buffer when host ptr is write combined on DG2
With this commit on DG2 32bit driver will check if passed host ptr for clEnqueueReadBuffer is write combined memory. If check will be true copy will be make on CPU. Signed-off-by: Maciej Plewka <maciej.plewka@intel.com>
This commit is contained in:
committed by
Compute-Runtime-Automation
parent
cdfe2ce8ad
commit
213dc2fe24
@@ -836,6 +836,12 @@ size_t CommandQueue::estimateTimestampPacketNodesCount(const MultiDispatchInfo &
|
|||||||
bool CommandQueue::bufferCpuCopyAllowed(Buffer *buffer, cl_command_type commandType, cl_bool blocking, size_t size, void *ptr,
|
bool CommandQueue::bufferCpuCopyAllowed(Buffer *buffer, cl_command_type commandType, cl_bool blocking, size_t size, void *ptr,
|
||||||
cl_uint numEventsInWaitList, const cl_event *eventWaitList) {
|
cl_uint numEventsInWaitList, const cl_event *eventWaitList) {
|
||||||
|
|
||||||
|
const auto &hwInfo = device->getHardwareInfo();
|
||||||
|
const auto &hwInfoConfig = HwInfoConfig::get(hwInfo.platform.eProductFamily);
|
||||||
|
if (CL_COMMAND_READ_BUFFER == commandType && hwInfoConfig->isCpuCopyNecessary(ptr, buffer->getMemoryManager())) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
auto debugVariableSet = false;
|
auto debugVariableSet = false;
|
||||||
// Requested by debug variable or allowed by Buffer
|
// Requested by debug variable or allowed by Buffer
|
||||||
if (CL_COMMAND_READ_BUFFER == commandType && DebugManager.flags.DoCpuCopyOnReadBuffer.get() != -1) {
|
if (CL_COMMAND_READ_BUFFER == commandType && DebugManager.flags.DoCpuCopyOnReadBuffer.get() != -1) {
|
||||||
|
|||||||
@@ -38,5 +38,11 @@ void HwHelperHw<Family>::setExtraAllocationData(AllocationData &allocationData,
|
|||||||
allocationData.storageInfo.isLockable = false;
|
allocationData.storageInfo.isLockable = false;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (HwInfoConfig::get(hwInfo.platform.eProductFamily)->isStorageInfoAdjustmentRequired()) {
|
||||||
|
if (properties.allocationType == AllocationType::BUFFER && !properties.flags.preferCompressed && !properties.flags.shareable) {
|
||||||
|
allocationData.storageInfo.isLockable = true;
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
} // namespace NEO
|
} // namespace NEO
|
||||||
|
|||||||
@@ -213,6 +213,7 @@ class MemoryManager {
|
|||||||
static uint32_t maxOsContextCount;
|
static uint32_t maxOsContextCount;
|
||||||
virtual void commonCleanup(){};
|
virtual void commonCleanup(){};
|
||||||
virtual bool isCpuCopyRequired(const void *ptr) { return false; }
|
virtual bool isCpuCopyRequired(const void *ptr) { return false; }
|
||||||
|
virtual bool isWCMemory(const void *ptr) { return false; }
|
||||||
|
|
||||||
virtual void registerSysMemAlloc(GraphicsAllocation *allocation){};
|
virtual void registerSysMemAlloc(GraphicsAllocation *allocation){};
|
||||||
virtual void registerLocalMemAlloc(GraphicsAllocation *allocation, uint32_t rootDeviceIndex){};
|
virtual void registerLocalMemAlloc(GraphicsAllocation *allocation, uint32_t rootDeviceIndex){};
|
||||||
|
|||||||
@@ -23,6 +23,7 @@ struct PipelineSelectArgs;
|
|||||||
class OSInterface;
|
class OSInterface;
|
||||||
class HwInfoConfig;
|
class HwInfoConfig;
|
||||||
class GraphicsAllocation;
|
class GraphicsAllocation;
|
||||||
|
class MemoryManager;
|
||||||
enum class DriverModelType;
|
enum class DriverModelType;
|
||||||
|
|
||||||
extern HwInfoConfig *hwInfoConfigFactory[IGFX_MAX_PRODUCT];
|
extern HwInfoConfig *hwInfoConfigFactory[IGFX_MAX_PRODUCT];
|
||||||
@@ -113,6 +114,7 @@ class HwInfoConfig {
|
|||||||
virtual bool isBcsReportWaRequired(const HardwareInfo &hwInfo) const = 0;
|
virtual bool isBcsReportWaRequired(const HardwareInfo &hwInfo) const = 0;
|
||||||
virtual bool isBlitCopyRequiredForLocalMemory(const HardwareInfo &hwInfo, const GraphicsAllocation &allocation) const = 0;
|
virtual bool isBlitCopyRequiredForLocalMemory(const HardwareInfo &hwInfo, const GraphicsAllocation &allocation) const = 0;
|
||||||
virtual bool isImplicitScalingSupported(const HardwareInfo &hwInfo) const = 0;
|
virtual bool isImplicitScalingSupported(const HardwareInfo &hwInfo) const = 0;
|
||||||
|
virtual bool isCpuCopyNecessary(const void *ptr, MemoryManager *memoryManager) const = 0;
|
||||||
|
|
||||||
MOCKABLE_VIRTUAL ~HwInfoConfig() = default;
|
MOCKABLE_VIRTUAL ~HwInfoConfig() = default;
|
||||||
|
|
||||||
@@ -208,6 +210,7 @@ class HwInfoConfigHw : public HwInfoConfig {
|
|||||||
bool isBcsReportWaRequired(const HardwareInfo &hwInfo) const override;
|
bool isBcsReportWaRequired(const HardwareInfo &hwInfo) const override;
|
||||||
bool isBlitCopyRequiredForLocalMemory(const HardwareInfo &hwInfo, const GraphicsAllocation &allocation) const override;
|
bool isBlitCopyRequiredForLocalMemory(const HardwareInfo &hwInfo, const GraphicsAllocation &allocation) const override;
|
||||||
bool isImplicitScalingSupported(const HardwareInfo &hwInfo) const override;
|
bool isImplicitScalingSupported(const HardwareInfo &hwInfo) const override;
|
||||||
|
bool isCpuCopyNecessary(const void *ptr, MemoryManager *memoryManager) const override;
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
HwInfoConfigHw() = default;
|
HwInfoConfigHw() = default;
|
||||||
|
|||||||
@@ -12,6 +12,7 @@
|
|||||||
#include "shared/source/helpers/hw_helper.h"
|
#include "shared/source/helpers/hw_helper.h"
|
||||||
#include "shared/source/helpers/preamble.h"
|
#include "shared/source/helpers/preamble.h"
|
||||||
#include "shared/source/memory_manager/graphics_allocation.h"
|
#include "shared/source/memory_manager/graphics_allocation.h"
|
||||||
|
#include "shared/source/memory_manager/memory_manager.h"
|
||||||
#include "shared/source/os_interface/hw_info_config.h"
|
#include "shared/source/os_interface/hw_info_config.h"
|
||||||
|
|
||||||
namespace NEO {
|
namespace NEO {
|
||||||
@@ -413,4 +414,8 @@ bool HwInfoConfigHw<gfxProduct>::isImplicitScalingSupported(const HardwareInfo &
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template <PRODUCT_FAMILY gfxProduct>
|
||||||
|
bool HwInfoConfigHw<gfxProduct>::isCpuCopyNecessary(const void *ptr, MemoryManager *memoryManager) const {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
} // namespace NEO
|
} // namespace NEO
|
||||||
|
|||||||
@@ -93,6 +93,7 @@ set(NEO_CORE_OS_INTERFACE_WDDM
|
|||||||
${CMAKE_CURRENT_SOURCE_DIR}/wddm/wddm.h
|
${CMAKE_CURRENT_SOURCE_DIR}/wddm/wddm.h
|
||||||
${CMAKE_CURRENT_SOURCE_DIR}/wddm/wddm_defs.h
|
${CMAKE_CURRENT_SOURCE_DIR}/wddm/wddm_defs.h
|
||||||
${CMAKE_CURRENT_SOURCE_DIR}/wddm/wddm_residency_logger.h
|
${CMAKE_CURRENT_SOURCE_DIR}/wddm/wddm_residency_logger.h
|
||||||
|
${CMAKE_CURRENT_SOURCE_DIR}/wddm_memory_manager_${DRIVER_MODEL}.cpp
|
||||||
${CMAKE_CURRENT_SOURCE_DIR}/wddm_memory_manager.cpp
|
${CMAKE_CURRENT_SOURCE_DIR}/wddm_memory_manager.cpp
|
||||||
${CMAKE_CURRENT_SOURCE_DIR}/wddm_memory_manager.h
|
${CMAKE_CURRENT_SOURCE_DIR}/wddm_memory_manager.h
|
||||||
${CMAKE_CURRENT_SOURCE_DIR}/wddm/um_km_data_translator.cpp
|
${CMAKE_CURRENT_SOURCE_DIR}/wddm/um_km_data_translator.cpp
|
||||||
|
|||||||
@@ -886,7 +886,7 @@ bool WddmMemoryManager::isCpuCopyRequired(const void *ptr) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// dummy read
|
// dummy read
|
||||||
//cacheable = *localVariablePointer;
|
// cacheable = *localVariablePointer;
|
||||||
|
|
||||||
_mm_lfence();
|
_mm_lfence();
|
||||||
timestamp0 = __rdtsc();
|
timestamp0 = __rdtsc();
|
||||||
@@ -904,7 +904,7 @@ bool WddmMemoryManager::isCpuCopyRequired(const void *ptr) {
|
|||||||
fastestLocalRead = localVariableReadDelta;
|
fastestLocalRead = localVariableReadDelta;
|
||||||
}
|
}
|
||||||
// dummy read
|
// dummy read
|
||||||
//cacheable = *volatileInputPtr;
|
// cacheable = *volatileInputPtr;
|
||||||
|
|
||||||
_mm_lfence();
|
_mm_lfence();
|
||||||
timestamp0 = __rdtsc();
|
timestamp0 = __rdtsc();
|
||||||
|
|||||||
@@ -71,6 +71,7 @@ class WddmMemoryManager : public MemoryManager {
|
|||||||
void *reserveCpuAddressRange(size_t size, uint32_t rootDeviceIndex) override;
|
void *reserveCpuAddressRange(size_t size, uint32_t rootDeviceIndex) override;
|
||||||
void releaseReservedCpuAddressRange(void *reserved, size_t size, uint32_t rootDeviceIndex) override;
|
void releaseReservedCpuAddressRange(void *reserved, size_t size, uint32_t rootDeviceIndex) override;
|
||||||
bool isCpuCopyRequired(const void *ptr) override;
|
bool isCpuCopyRequired(const void *ptr) override;
|
||||||
|
bool isWCMemory(const void *ptr) override;
|
||||||
|
|
||||||
AddressRange reserveGpuAddress(size_t size, uint32_t rootDeviceIndex) override { return AddressRange{0, 0}; };
|
AddressRange reserveGpuAddress(size_t size, uint32_t rootDeviceIndex) override { return AddressRange{0, 0}; };
|
||||||
void freeGpuAddress(AddressRange addressRange, uint32_t rootDeviceIndex) override{};
|
void freeGpuAddress(AddressRange addressRange, uint32_t rootDeviceIndex) override{};
|
||||||
|
|||||||
@@ -0,0 +1,14 @@
|
|||||||
|
/*
|
||||||
|
* Copyright (C) 2022 Intel Corporation
|
||||||
|
*
|
||||||
|
* SPDX-License-Identifier: MIT
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "shared/source/os_interface/windows/wddm_memory_manager.h"
|
||||||
|
|
||||||
|
namespace NEO {
|
||||||
|
bool WddmMemoryManager::isWCMemory(const void *ptr) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
} // namespace NEO
|
||||||
@@ -0,0 +1,20 @@
|
|||||||
|
/*
|
||||||
|
* Copyright (C) 2022 Intel Corporation
|
||||||
|
*
|
||||||
|
* SPDX-License-Identifier: MIT
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "shared/source/os_interface/windows/wddm_memory_manager.h"
|
||||||
|
|
||||||
|
#include <winnt.h>
|
||||||
|
|
||||||
|
namespace NEO {
|
||||||
|
constexpr uint32_t pageWriteCombine = 0x400;
|
||||||
|
|
||||||
|
bool WddmMemoryManager::isWCMemory(const void *ptr) {
|
||||||
|
MEMORY_BASIC_INFORMATION info;
|
||||||
|
VirtualQuery(ptr, &info, sizeof(info));
|
||||||
|
return info.AllocationProtect & pageWriteCombine;
|
||||||
|
}
|
||||||
|
} // namespace NEO
|
||||||
@@ -5,6 +5,8 @@
|
|||||||
*
|
*
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
#include "shared/source/memory_manager/memory_manager.h"
|
||||||
|
|
||||||
template <>
|
template <>
|
||||||
void HwInfoConfigHw<gfxProduct>::adjustSamplerState(void *sampler, const HardwareInfo &hwInfo) {
|
void HwInfoConfigHw<gfxProduct>::adjustSamplerState(void *sampler, const HardwareInfo &hwInfo) {
|
||||||
using SAMPLER_STATE = typename XE_HPG_COREFamily::SAMPLER_STATE;
|
using SAMPLER_STATE = typename XE_HPG_COREFamily::SAMPLER_STATE;
|
||||||
@@ -180,3 +182,24 @@ template <>
|
|||||||
bool HwInfoConfigHw<gfxProduct>::isTimestampWaitSupportedForEvents() const {
|
bool HwInfoConfigHw<gfxProduct>::isTimestampWaitSupportedForEvents() const {
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template <>
|
||||||
|
bool HwInfoConfigHw<gfxProduct>::isCpuCopyNecessary(const void *ptr, MemoryManager *memoryManager) const {
|
||||||
|
if (memoryManager) {
|
||||||
|
if constexpr (is32bit) {
|
||||||
|
return memoryManager->isWCMemory(ptr);
|
||||||
|
} else {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
template <>
|
||||||
|
bool HwInfoConfigHw<gfxProduct>::isStorageInfoAdjustmentRequired() const {
|
||||||
|
if constexpr (is32bit) {
|
||||||
|
return true;
|
||||||
|
} else {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|||||||
@@ -17,3 +17,4 @@ HWTEST_EXCLUDE_PRODUCT(HwInfoConfigTest, givenHwInfoConfigWhenAskedIfPipeControl
|
|||||||
HWTEST_EXCLUDE_PRODUCT(HwInfoConfigTest, givenHwInfoConfigWhenAskedIfTile64With3DSurfaceOnBCSIsSupportedThenTrueIsReturned, IGFX_DG2);
|
HWTEST_EXCLUDE_PRODUCT(HwInfoConfigTest, givenHwInfoConfigWhenAskedIfTile64With3DSurfaceOnBCSIsSupportedThenTrueIsReturned, IGFX_DG2);
|
||||||
HWTEST_EXCLUDE_PRODUCT(HwInfoConfigTest, WhenAllowRenderCompressionIsCalledThenTrueIsReturned, IGFX_DG2);
|
HWTEST_EXCLUDE_PRODUCT(HwInfoConfigTest, WhenAllowRenderCompressionIsCalledThenTrueIsReturned, IGFX_DG2);
|
||||||
HWTEST_EXCLUDE_PRODUCT(HwInfoConfigTest, whenConvertingTimestampsToCsDomainThenNothingIsChanged, IGFX_DG2);
|
HWTEST_EXCLUDE_PRODUCT(HwInfoConfigTest, whenConvertingTimestampsToCsDomainThenNothingIsChanged, IGFX_DG2);
|
||||||
|
HWTEST_EXCLUDE_PRODUCT(HwInfoConfigTest, givenHwInfoConfigWhenAskedIfStorageInfoAdjustmentIsRequiredThenFalseIsReturned, IGFX_DG2);
|
||||||
|
|||||||
@@ -450,3 +450,12 @@ DG2TEST_F(ProductConfigTests, givenDg2G11DeviceIdWhenDifferentRevisionIsPassedTh
|
|||||||
EXPECT_EQ(productConfig, DG2_G11);
|
EXPECT_EQ(productConfig, DG2_G11);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
DG2TEST_F(HwInfoConfigTestDg2, givenHwInfoConfigWhenAskedIfStorageInfoAdjustmentIsRequiredThenTrueIsReturned) {
|
||||||
|
auto hwInfoConfig = HwInfoConfig::get(defaultHwInfo->platform.eProductFamily);
|
||||||
|
if constexpr (is32bit) {
|
||||||
|
EXPECT_TRUE(hwInfoConfig->isStorageInfoAdjustmentRequired());
|
||||||
|
} else {
|
||||||
|
EXPECT_FALSE(hwInfoConfig->isStorageInfoAdjustmentRequired());
|
||||||
|
}
|
||||||
|
}
|
||||||
Reference in New Issue
Block a user