refactor: Add key to force zero copy without coherency

Signed-off-by: Lukasz Jobczyk <lukasz.jobczyk@intel.com>
This commit is contained in:
Lukasz Jobczyk
2023-12-01 14:35:38 +00:00
committed by Compute-Runtime-Automation
parent 253035878a
commit c8c3f862f4
11 changed files with 112 additions and 6 deletions

View File

@@ -9,6 +9,7 @@
#include "shared/source/device/device.h"
#include "shared/source/helpers/flush_stamp.h"
#include "shared/source/helpers/get_info.h"
#include "shared/source/utilities/cpuintrinsics.h"
#include "shared/source/utilities/logger.h"
#include "opencl/source/command_queue/command_queue.h"
@@ -20,6 +21,15 @@
#include "opencl/source/mem_obj/image.h"
namespace NEO {
void cachelineFlushMemory(char *ptr, size_t size) {
const auto lastPtr = ptr + size;
while (ptr < lastPtr) {
CpuIntrinsics::clFlushOpt(ptr);
ptr += MemoryConstants::cacheLineSize;
}
CpuIntrinsics::sfence();
}
void *CommandQueue::cpuDataTransferHandler(TransferProperties &transferProperties, EventsRequest &eventsRequest, cl_int &retVal) {
MapInfo unmapInfo;
Event *outEventObj = nullptr;
@@ -116,6 +126,8 @@ void *CommandQueue::cpuDataTransferHandler(TransferProperties &transferPropertie
transferProperties.memObj->transferDataToHostPtr(transferProperties.size, transferProperties.offset);
}
eventCompleted = true;
} else if (debugManager.flags.AllowZeroCopyWithoutCoherency.get() == 1) {
cachelineFlushMemory(static_cast<char *>(transferProperties.getCpuPtrForReadWrite()), transferProperties.size[0]);
}
break;
case CL_COMMAND_MAP_IMAGE:
@@ -132,6 +144,8 @@ void *CommandQueue::cpuDataTransferHandler(TransferProperties &transferPropertie
transferProperties.memObj->transferDataFromHostPtr(unmapInfo.size, unmapInfo.offset);
}
eventCompleted = true;
} else if (debugManager.flags.AllowZeroCopyWithoutCoherency.get() == 1) {
cachelineFlushMemory(static_cast<char *>(transferProperties.getCpuPtrForReadWrite()), transferProperties.memObj->getSize());
}
if (!unmapInfo.readOnly) {
modifySimulationFlags = true;

View File

@@ -436,7 +436,7 @@ bool MemObj::mappingOnCpuAllowed() const {
auto graphicsAllocation = multiGraphicsAllocation.getDefaultGraphicsAllocation();
return !isTiledAllocation() && !peekSharingHandler() && !isMipMapped(this) && !debugManager.flags.DisableZeroCopyForBuffers.get() &&
!graphicsAllocation->isCompressionEnabled() && MemoryPoolHelper::isSystemMemoryPool(graphicsAllocation->getMemoryPool()) &&
allowCpuAccess();
allowCpuForMapUnmap();
}
bool MemObj::allowCpuAccess() const {
@@ -448,6 +448,14 @@ bool MemObj::allowCpuAccess() const {
return !graphicsAllocation->getDefaultGmm()->getPreferNoCpuAccess();
}
bool MemObj::allowCpuForMapUnmap() const {
auto ret = allowCpuAccess();
if (debugManager.flags.AllowZeroCopyWithoutCoherency.get() != -1) {
ret = debugManager.flags.AllowZeroCopyWithoutCoherency.get();
}
return ret;
}
void MemObj::storeProperties(const cl_mem_properties *properties) {
if (properties) {
for (size_t i = 0; properties[i] != 0; i += 2) {

View File

@@ -132,6 +132,7 @@ class MemObj : public BaseObject<_cl_mem> {
bool checkIfMemoryTransferIsRequired(size_t offsetInMemObject, size_t offsetInHostPtr, const void *ptr, cl_command_type cmdType);
bool mappingOnCpuAllowed() const;
MOCKABLE_VIRTUAL bool allowCpuAccess() const;
bool allowCpuForMapUnmap() const;
virtual size_t calculateOffsetForMapping(const MemObjOffsetArray &offset) const { return offset[0]; }
size_t calculateMappedPtrLength(const MemObjSizeArray &size) const { return calculateOffsetForMapping(size); }
cl_mem_object_type peekClMemObjType() const { return memObjectType; }