[L0][XE_HPC]Perform memcpy on CPU by default

Related-To: NEO-7237

Signed-off-by: Szymon Morek <szymon.morek@intel.com>
This commit is contained in:
Szymon Morek
2022-10-05 10:46:26 +00:00
committed by Compute-Runtime-Automation
parent edca8aa6de
commit 17655e3ed3
12 changed files with 81 additions and 29 deletions

View File

@@ -329,9 +329,6 @@ struct CommandList : _ze_command_list_handle_t {
bool systolicModeSupport = false;
bool pipelineSelectStateTracking = false;
bool stateComputeModeTracking = false;
std::atomic<uint32_t> barrierCounter{0u};
uint32_t latestFlushedBarrierCounter = 0u;
};
using CommandListAllocatorFn = CommandList *(*)(uint32_t);

View File

@@ -2517,7 +2517,6 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendBarrier(ze_event_handle_
}
appendSignalEventPostWalker(signalEvent, workloadPartition);
this->barrierCounter++;
return ZE_RESULT_SUCCESS;
}

View File

@@ -129,9 +129,12 @@ struct CommandListCoreFamilyImmediate : public CommandListCoreFamily<gfxCoreFami
NEO::LogicalStateHelper *getLogicalStateHelper() const override;
bool preferCopyThroughLockedPtr(NEO::SvmAllocationData *dstAlloc, bool dstFound, NEO::SvmAllocationData *srcAlloc, bool srcFound, size_t size);
bool isAllocUSMDeviceMemory(NEO::SvmAllocationData *alloc, bool allocFound);
bool isSuitableUSMDeviceAlloc(NEO::SvmAllocationData *alloc, bool allocFound);
ze_result_t performCpuMemcpy(void *dstptr, const void *srcptr, size_t size, bool isDstDeviceMemory, ze_event_handle_t hSignalEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents);
void *obtainLockedPtrFromDevice(void *ptr, size_t size);
protected:
std::atomic<bool> barrierCalled{false};
};
template <PRODUCT_FAMILY gfxProductFamily>

View File

@@ -246,6 +246,8 @@ ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::appendBarrier(
checkAvailableSpace();
}
ret = CommandListCoreFamily<gfxCoreFamily>::appendBarrier(hSignalEvent, numWaitEvents, phWaitEvents);
this->barrierCalled = true;
return flushImmediate(ret, true, hSignalEvent);
}
@@ -525,15 +527,16 @@ bool CommandListCoreFamilyImmediate<gfxCoreFamily>::preferCopyThroughLockedPtr(N
d2HThreshold = NEO::DebugManager.flags.ExperimentalD2HCpuCopyThreshold.get();
}
if (NEO::HwHelper::get(this->device->getHwInfo().platform.eRenderCoreFamily).copyThroughLockedPtrEnabled()) {
return (!srcFound && isAllocUSMDeviceMemory(dstAlloc, dstFound) && size <= h2DThreshold) ||
(!dstFound && isAllocUSMDeviceMemory(srcAlloc, srcFound) && size <= d2HThreshold);
return (!srcFound && isSuitableUSMDeviceAlloc(dstAlloc, dstFound) && size <= h2DThreshold) ||
(!dstFound && isSuitableUSMDeviceAlloc(srcAlloc, srcFound) && size <= d2HThreshold);
}
return false;
}
template <GFXCORE_FAMILY gfxCoreFamily>
bool CommandListCoreFamilyImmediate<gfxCoreFamily>::isAllocUSMDeviceMemory(NEO::SvmAllocationData *alloc, bool allocFound) {
return allocFound && (alloc->memoryType == InternalMemoryType::DEVICE_UNIFIED_MEMORY);
bool CommandListCoreFamilyImmediate<gfxCoreFamily>::isSuitableUSMDeviceAlloc(NEO::SvmAllocationData *alloc, bool allocFound) {
return allocFound && (alloc->memoryType == InternalMemoryType::DEVICE_UNIFIED_MEMORY) &&
alloc->gpuAllocations.getGraphicsAllocation(this->device->getRootDeviceIndex())->storageInfo.getNumBanks() == 1;
}
template <GFXCORE_FAMILY gfxCoreFamily>
@@ -544,8 +547,7 @@ ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::performCpuMemcpy(void
this->appendBarrier(nullptr, numWaitEvents, phWaitEvents);
}
bool needsFlushTagUpdate = this->latestFlushedBarrierCounter < this->barrierCounter;
if (needsFlushTagUpdate) {
if (this->barrierCalled) {
this->csr->flushTagUpdate();
}
@@ -564,13 +566,13 @@ ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::performCpuMemcpy(void
cpuMemcpyDstPtr = dstptr;
}
if (needsFlushTagUpdate) {
if (this->barrierCalled) {
auto timeoutMicroseconds = NEO::TimeoutControls::maxTimeout;
const auto waitStatus = this->csr->waitForCompletionWithTimeout(NEO::WaitParams{false, false, timeoutMicroseconds}, this->csr->peekTaskCount());
if (waitStatus == NEO::WaitStatus::GpuHang) {
return ZE_RESULT_ERROR_DEVICE_LOST;
}
this->latestFlushedBarrierCounter = this->barrierCounter;
this->barrierCalled = false;
}
if (signalEvent) {