performance: do not copy on CPU if events not ready

- in case events are not ready, we want to pipeline the operation instead
of doing it in place of the call.

Signed-off-by: Michal Mrozek <michal.mrozek@intel.com>
This commit is contained in:
Michal Mrozek 2023-05-11 18:17:08 +00:00 committed by Compute-Runtime-Automation
parent e5e39ce8f4
commit ff5fe7f294
2 changed files with 42 additions and 2 deletions

View File

@ -729,7 +729,8 @@ bool CommandListCoreFamilyImmediate<gfxCoreFamily>::preferCopyThroughLockedPtr(C
bool cpuMemCopyEnabled = false;
switch (transferType) {
case HOST_USM_TO_DEVICE_USM: {
case HOST_USM_TO_DEVICE_USM:
case DEVICE_USM_TO_HOST_USM: {
if (this->dependenciesPresent) {
cpuMemCopyEnabled = false;
break;
@ -746,7 +747,6 @@ bool CommandListCoreFamilyImmediate<gfxCoreFamily>::preferCopyThroughLockedPtr(C
}
case HOST_NON_USM_TO_DEVICE_USM:
case DEVICE_USM_TO_HOST_NON_USM:
case DEVICE_USM_TO_HOST_USM:
cpuMemCopyEnabled = true;
break;
default:

View File

@ -2057,6 +2057,46 @@ HWTEST2_F(AppendMemoryLockedCopyTest, givenImmediateCommandListAndUsmHostPtrWhen
EXPECT_TRUE(cmdList.preferCopyThroughLockedPtr(cpuMemCopyInfo, 1, &event));
}
HWTEST2_F(AppendMemoryLockedCopyTest, givenImmediateCommandListAndUsmHostPtrWhenPreferCopyThroughLockedPtrCalledForD2HWhenCopyCantBePerformedImmediatelyThenReturnFalse, IsAtLeastSkl) {
MockCommandListImmediateHw<gfxCoreFamily> cmdList;
cmdList.copyThroughLockedPtrEnabled = true;
cmdList.initialize(device, NEO::EngineGroupType::RenderCompute, 0u);
CpuMemCopyInfo cpuMemCopyInfo(hostPtr, devicePtr, 1024);
auto srcFound = device->getDriverHandle()->findAllocationDataForRange(hostPtr, 1024, &cpuMemCopyInfo.srcAllocData);
ASSERT_TRUE(srcFound);
auto dstFound = device->getDriverHandle()->findAllocationDataForRange(devicePtr, 1024, &cpuMemCopyInfo.dstAllocData);
ASSERT_TRUE(dstFound);
ze_event_pool_desc_t eventPoolDesc = {};
eventPoolDesc.flags = ZE_EVENT_POOL_FLAG_HOST_VISIBLE;
eventPoolDesc.count = 1;
ze_result_t returnValue;
std::unique_ptr<L0::EventPool> eventPool = std::unique_ptr<L0::EventPool>(EventPool::create(device->getDriverHandle(), context, 0, nullptr, &eventPoolDesc, returnValue));
EXPECT_EQ(ZE_RESULT_SUCCESS, returnValue);
ze_event_handle_t event = nullptr;
ze_event_desc_t eventDesc = {};
eventDesc.index = 0;
eventDesc.wait = 0;
eventDesc.signal = 0;
ASSERT_EQ(ZE_RESULT_SUCCESS, eventPool->createEvent(&eventDesc, &event));
std::unique_ptr<L0::Event> eventObject(L0::Event::fromHandle(event));
cmdList.dependenciesPresent = false;
EXPECT_FALSE(cmdList.preferCopyThroughLockedPtr(cpuMemCopyInfo, 1, &event));
cmdList.dependenciesPresent = true;
EXPECT_FALSE(cmdList.preferCopyThroughLockedPtr(cpuMemCopyInfo, 0, nullptr));
cmdList.dependenciesPresent = true;
EXPECT_FALSE(cmdList.preferCopyThroughLockedPtr(cpuMemCopyInfo, 1, &event));
eventObject->setIsCompleted();
cmdList.dependenciesPresent = false;
EXPECT_TRUE(cmdList.preferCopyThroughLockedPtr(cpuMemCopyInfo, 1, &event));
}
HWTEST2_F(AppendMemoryLockedCopyTest, givenImmediateCommandListWhenIsSuitableUSMDeviceAllocThenReturnCorrectValue, IsAtLeastSkl) {
MockCommandListImmediateHw<gfxCoreFamily> cmdList;
cmdList.copyThroughLockedPtrEnabled = true;