[L0][XE_HPC]Perform memcpy on CPU by default

Related-To: NEO-7237

Enable copy on cpu by default.
This commit also changes barrierCounter to bool
barrierCalled

Signed-off-by: Szymon Morek <szymon.morek@intel.com>
This commit is contained in:
Szymon Morek
2022-09-26 15:41:35 +00:00
committed by Compute-Runtime-Automation
parent 196fa63e2a
commit 7ded401615
12 changed files with 46 additions and 21 deletions

View File

@ -325,9 +325,6 @@ struct CommandList : _ze_command_list_handle_t {
bool systolicModeSupport = false;
bool pipelineSelectStateTracking = false;
bool stateComputeModeTracking = false;
std::atomic<uint32_t> barrierCounter{0u};
uint32_t latestFlushedBarrierCounter = 0u;
};
using CommandListAllocatorFn = CommandList *(*)(uint32_t);

View File

@ -2495,7 +2495,6 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendBarrier(ze_event_handle_
}
appendSignalEventPostWalker(signalEvent, workloadPartition);
this->barrierCounter++;
return ZE_RESULT_SUCCESS;
}

View File

@ -132,6 +132,9 @@ struct CommandListCoreFamilyImmediate : public CommandListCoreFamily<gfxCoreFami
bool isAllocUSMDeviceMemory(NEO::SvmAllocationData *alloc, bool allocFound);
ze_result_t performCpuMemcpy(void *dstptr, const void *srcptr, size_t size, bool isDstDeviceMemory, ze_event_handle_t hSignalEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents);
void *obtainLockedPtrFromDevice(void *ptr, size_t size);
protected:
std::atomic<bool> barrierCalled{false};
};
template <PRODUCT_FAMILY gfxProductFamily>

View File

@ -210,6 +210,7 @@ ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::appendBarrier(
checkAvailableSpace();
}
ret = CommandListCoreFamily<gfxCoreFamily>::appendBarrier(hSignalEvent, numWaitEvents, phWaitEvents);
this->barrierCalled = true;
return flushImmediate(ret, true);
}
@ -500,8 +501,7 @@ ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::performCpuMemcpy(void
this->appendBarrier(nullptr, numWaitEvents, phWaitEvents);
}
bool needsFlushTagUpdate = this->latestFlushedBarrierCounter < this->barrierCounter;
if (needsFlushTagUpdate) {
if (this->barrierCalled) {
this->csr->flushTagUpdate();
}
@ -520,13 +520,13 @@ ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::performCpuMemcpy(void
cpuMemcpyDstPtr = dstptr;
}
if (needsFlushTagUpdate) {
if (this->barrierCalled) {
auto timeoutMicroseconds = NEO::TimeoutControls::maxTimeout;
const auto waitStatus = this->csr->waitForCompletionWithTimeout(NEO::WaitParams{false, false, timeoutMicroseconds}, this->csr->peekTaskCount());
if (waitStatus == NEO::WaitStatus::GpuHang) {
return ZE_RESULT_ERROR_DEVICE_LOST;
}
this->latestFlushedBarrierCounter = this->barrierCounter;
this->barrierCalled = false;
}
if (signalEvent) {

View File

@ -478,6 +478,7 @@ class MockCommandListImmediateHw : public WhiteBox<::L0::CommandListCoreFamilyIm
using BaseClass = WhiteBox<::L0::CommandListCoreFamilyImmediate<gfxCoreFamily>>;
MockCommandListImmediateHw() : BaseClass() {}
using BaseClass::applyMemoryRangesBarrier;
using BaseClass::barrierCalled;
using BaseClass::isFlushTaskSubmissionEnabled;
using BaseClass::isSyncModeQueue;

View File

@ -886,7 +886,6 @@ HWTEST2_F(CommandListAppendLaunchKernelXeHpcCore,
struct AppendMemoryLockedCopyFixture : public DeviceFixture {
void setUp() {
DebugManager.flags.ExperimentalCopyThroughLock.set(1);
DeviceFixture::setUp();
nonUsmHostPtr = new char[sz];
@ -1076,6 +1075,23 @@ HWTEST2_F(AppendMemoryLockedCopyTest, givenImmediateCommandListWhenCpuMemcpyWith
EXPECT_EQ(waitForFlushTagUpdateCalled, 1u);
}
HWTEST2_F(AppendMemoryLockedCopyTest, givenImmediateCommandListWhenAppendBarrierThenSetBarrierCalled, IsXeHpcCore) {
MockCommandListImmediateHw<gfxCoreFamily> cmdList;
cmdList.initialize(device, NEO::EngineGroupType::RenderCompute, 0u);
cmdList.csr = device->getNEODevice()->getInternalEngine().commandStreamReceiver;
EXPECT_FALSE(cmdList.barrierCalled);
cmdList.appendBarrier(nullptr, 0, nullptr);
EXPECT_TRUE(cmdList.barrierCalled);
auto res = cmdList.appendMemoryCopy(devicePtr, nonUsmHostPtr, 1024, nullptr, 0, nullptr);
EXPECT_EQ(res, ZE_RESULT_SUCCESS);
EXPECT_FALSE(cmdList.barrierCalled);
}
template <GFXCORE_FAMILY gfxCoreFamily>
class MockAppendMemoryLockedCopyTestImmediateCmdList : public MockCommandListImmediateHw<gfxCoreFamily> {
public: