refactor(l0): cpu memory copy on immediate cmdlists

Remove redundant boolean whether svm data was found
Pass cpu copy parameters in a structure

Related-To: NEO-7553

Signed-off-by: Dominik Dabek <dominik.dabek@intel.com>
This commit is contained in:
Dominik Dabek
2023-01-09 12:18:49 +00:00
committed by Compute-Runtime-Automation
parent 439aa6c87f
commit d97ac16c59
3 changed files with 105 additions and 82 deletions

View File

@@ -21,6 +21,16 @@ struct EventPool;
struct Event;
inline constexpr size_t maxImmediateCommandSize = 4 * MemoryConstants::kiloByte;
struct CpuMemCopyInfo {
void *const dstPtr;
const void *const srcPtr;
const size_t size;
NEO::SvmAllocationData *dstAllocData{nullptr};
NEO::SvmAllocationData *srcAllocData{nullptr};
CpuMemCopyInfo(void *dstPtr, const void *srcPtr, size_t size) : dstPtr(dstPtr), srcPtr(srcPtr), size(size) {}
};
template <GFXCORE_FAMILY gfxCoreFamily>
struct CommandListCoreFamilyImmediate : public CommandListCoreFamily<gfxCoreFamily> {
using GfxFamily = typename NEO::GfxFamilyMapper<gfxCoreFamily>::GfxFamily;
@@ -141,15 +151,15 @@ struct CommandListCoreFamilyImmediate : public CommandListCoreFamily<gfxCoreFami
void createLogicalStateHelper() override {}
NEO::LogicalStateHelper *getLogicalStateHelper() const override;
bool preferCopyThroughLockedPtr(NEO::SvmAllocationData *dstAlloc, bool dstFound, NEO::SvmAllocationData *srcAlloc, bool srcFound, size_t size);
bool isSuitableUSMHostAlloc(NEO::SvmAllocationData *alloc, bool allocFound);
bool isSuitableUSMDeviceAlloc(NEO::SvmAllocationData *alloc, bool allocFound);
bool isSuitableUSMSharedAlloc(NEO::SvmAllocationData *alloc, bool allocFound);
ze_result_t performCpuMemcpy(void *dstptr, const void *srcptr, size_t size, NEO::SvmAllocationData *dstAlloc, NEO::SvmAllocationData *srcAlloc, ze_event_handle_t hSignalEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents);
bool preferCopyThroughLockedPtr(CpuMemCopyInfo &cpuMemCopyInfo);
bool isSuitableUSMHostAlloc(NEO::SvmAllocationData *alloc);
bool isSuitableUSMDeviceAlloc(NEO::SvmAllocationData *alloc);
bool isSuitableUSMSharedAlloc(NEO::SvmAllocationData *alloc);
ze_result_t performCpuMemcpy(const CpuMemCopyInfo &cpuMemCopyInfo, ze_event_handle_t hSignalEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents);
void *obtainLockedPtrFromDevice(NEO::SvmAllocationData *alloc, void *ptr);
bool waitForEventsFromHost();
void checkWaitEventsState(uint32_t numWaitEvents, ze_event_handle_t *waitEventList);
TransferType getTransferType(NEO::SvmAllocationData *dstAlloc, bool dstFound, NEO::SvmAllocationData *srcAlloc, bool srcFound);
TransferType getTransferType(NEO::SvmAllocationData *dstAlloc, NEO::SvmAllocationData *srcAlloc);
protected:
void printKernelsPrintfOutput(bool hangDetected);
@@ -158,5 +168,4 @@ struct CommandListCoreFamilyImmediate : public CommandListCoreFamily<gfxCoreFami
template <PRODUCT_FAMILY gfxProductFamily>
struct CommandListImmediateProductFamily;
} // namespace L0

View File

@@ -353,13 +353,11 @@ ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::appendMemoryCopy(
}
ze_result_t ret;
NEO::SvmAllocationData *srcAllocData = nullptr;
NEO::SvmAllocationData *dstAllocData = nullptr;
bool srcAllocFound = this->device->getDriverHandle()->findAllocationDataForRange(const_cast<void *>(srcptr), size, &srcAllocData);
bool dstAllocFound = this->device->getDriverHandle()->findAllocationDataForRange(dstptr, size, &dstAllocData);
if (preferCopyThroughLockedPtr(dstAllocData, dstAllocFound, srcAllocData, srcAllocFound, size)) {
return performCpuMemcpy(dstptr, srcptr, size, dstAllocData, srcAllocData, hSignalEvent, numWaitEvents, phWaitEvents);
CpuMemCopyInfo cpuMemCopyInfo(dstptr, srcptr, size);
this->device->getDriverHandle()->findAllocationDataForRange(const_cast<void *>(srcptr), size, &cpuMemCopyInfo.srcAllocData);
this->device->getDriverHandle()->findAllocationDataForRange(dstptr, size, &cpuMemCopyInfo.dstAllocData);
if (preferCopyThroughLockedPtr(cpuMemCopyInfo)) {
return performCpuMemcpy(cpuMemCopyInfo, hSignalEvent, numWaitEvents, phWaitEvents);
}
auto isSplitNeeded = this->isAppendSplitNeeded(dstptr, srcptr, size);
@@ -639,7 +637,7 @@ ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::flushImmediate(ze_res
}
template <GFXCORE_FAMILY gfxCoreFamily>
bool CommandListCoreFamilyImmediate<gfxCoreFamily>::preferCopyThroughLockedPtr(NEO::SvmAllocationData *dstAlloc, bool dstFound, NEO::SvmAllocationData *srcAlloc, bool srcFound, size_t size) {
bool CommandListCoreFamilyImmediate<gfxCoreFamily>::preferCopyThroughLockedPtr(CpuMemCopyInfo &cpuMemCopyInfo) {
if (NEO::DebugManager.flags.ExperimentalForceCopyThroughLock.get() == 1) {
return true;
}
@@ -655,31 +653,30 @@ bool CommandListCoreFamilyImmediate<gfxCoreFamily>::preferCopyThroughLockedPtr(N
auto &gfxCoreHelper = this->device->getGfxCoreHelper();
if (gfxCoreHelper.copyThroughLockedPtrEnabled(this->device->getHwInfo())) {
return (!srcFound && isSuitableUSMDeviceAlloc(dstAlloc, dstFound) && size <= h2DThreshold) ||
(!dstFound && isSuitableUSMDeviceAlloc(srcAlloc, srcFound) && size <= d2HThreshold);
return (!cpuMemCopyInfo.srcAllocData && isSuitableUSMDeviceAlloc(cpuMemCopyInfo.dstAllocData) && cpuMemCopyInfo.size <= h2DThreshold) ||
(!cpuMemCopyInfo.dstAllocData && isSuitableUSMDeviceAlloc(cpuMemCopyInfo.srcAllocData) && cpuMemCopyInfo.size <= d2HThreshold);
}
return false;
}
template <GFXCORE_FAMILY gfxCoreFamily>
bool CommandListCoreFamilyImmediate<gfxCoreFamily>::isSuitableUSMHostAlloc(NEO::SvmAllocationData *alloc, bool allocFound) {
return allocFound && (alloc->memoryType == InternalMemoryType::HOST_UNIFIED_MEMORY);
bool CommandListCoreFamilyImmediate<gfxCoreFamily>::isSuitableUSMHostAlloc(NEO::SvmAllocationData *alloc) {
return alloc && (alloc->memoryType == InternalMemoryType::HOST_UNIFIED_MEMORY);
}
template <GFXCORE_FAMILY gfxCoreFamily>
bool CommandListCoreFamilyImmediate<gfxCoreFamily>::isSuitableUSMDeviceAlloc(NEO::SvmAllocationData *alloc, bool allocFound) {
return allocFound && (alloc->memoryType == InternalMemoryType::DEVICE_UNIFIED_MEMORY) &&
bool CommandListCoreFamilyImmediate<gfxCoreFamily>::isSuitableUSMDeviceAlloc(NEO::SvmAllocationData *alloc) {
return alloc && (alloc->memoryType == InternalMemoryType::DEVICE_UNIFIED_MEMORY) &&
alloc->gpuAllocations.getGraphicsAllocation(this->device->getRootDeviceIndex())->storageInfo.getNumBanks() == 1;
}
template <GFXCORE_FAMILY gfxCoreFamily>
bool CommandListCoreFamilyImmediate<gfxCoreFamily>::isSuitableUSMSharedAlloc(NEO::SvmAllocationData *alloc, bool allocFound) {
return allocFound && (alloc->memoryType == InternalMemoryType::SHARED_UNIFIED_MEMORY);
bool CommandListCoreFamilyImmediate<gfxCoreFamily>::isSuitableUSMSharedAlloc(NEO::SvmAllocationData *alloc) {
return alloc && (alloc->memoryType == InternalMemoryType::SHARED_UNIFIED_MEMORY);
}
template <GFXCORE_FAMILY gfxCoreFamily>
ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::performCpuMemcpy(void *dstptr, const void *srcptr, size_t size, NEO::SvmAllocationData *dstAlloc, NEO::SvmAllocationData *srcAlloc, ze_event_handle_t hSignalEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) {
ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::performCpuMemcpy(const CpuMemCopyInfo &cpuMemCopyInfo, ze_event_handle_t hSignalEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) {
bool needsBarrier = (numWaitEvents > 0);
if (needsBarrier) {
this->appendBarrier(nullptr, numWaitEvents, phWaitEvents);
@@ -697,11 +694,11 @@ ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::performCpuMemcpy(void
signalEvent = Event::fromHandle(hSignalEvent);
}
auto srcLockPointer = obtainLockedPtrFromDevice(srcAlloc, const_cast<void *>(srcptr));
auto dstLockPointer = obtainLockedPtrFromDevice(dstAlloc, dstptr);
auto srcLockPointer = obtainLockedPtrFromDevice(cpuMemCopyInfo.srcAllocData, const_cast<void *>(cpuMemCopyInfo.srcPtr));
auto dstLockPointer = obtainLockedPtrFromDevice(cpuMemCopyInfo.dstAllocData, cpuMemCopyInfo.dstPtr);
const void *cpuMemcpySrcPtr = srcLockPointer ? srcLockPointer : srcptr;
void *cpuMemcpyDstPtr = dstLockPointer ? dstLockPointer : dstptr;
const void *cpuMemcpySrcPtr = srcLockPointer ? srcLockPointer : cpuMemCopyInfo.srcPtr;
void *cpuMemcpyDstPtr = dstLockPointer ? dstLockPointer : cpuMemCopyInfo.dstPtr;
if (this->dependenciesPresent) {
auto timeoutMicroseconds = NEO::TimeoutControls::maxTimeout;
@@ -716,12 +713,13 @@ ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::performCpuMemcpy(void
signalEvent->setGpuStartTimestamp();
}
memcpy_s(cpuMemcpyDstPtr, size, cpuMemcpySrcPtr, size);
memcpy_s(cpuMemcpyDstPtr, cpuMemCopyInfo.size, cpuMemcpySrcPtr, cpuMemCopyInfo.size);
if (signalEvent) {
signalEvent->setGpuEndTimestamp();
signalEvent->hostSignal();
}
return ZE_RESULT_SUCCESS;
}
@@ -739,6 +737,7 @@ void *CommandListCoreFamilyImmediate<gfxCoreFamily>::obtainLockedPtrFromDevice(N
if (!alloc->isLocked()) {
this->device->getDriverHandle()->getMemoryManager()->lockResource(alloc);
}
auto gpuAddress = allocData->gpuAllocations.getGraphicsAllocation(this->device->getRootDeviceIndex())->getGpuAddress();
auto offset = ptrDiff(ptr, gpuAddress);
return ptrOffset(alloc->getLockedPtr(), offset);
@@ -752,13 +751,13 @@ void CommandListCoreFamilyImmediate<gfxCoreFamily>::checkWaitEventsState(uint32_
}
template <GFXCORE_FAMILY gfxCoreFamily>
TransferType CommandListCoreFamilyImmediate<gfxCoreFamily>::getTransferType(NEO::SvmAllocationData *dstAlloc, bool dstFound, NEO::SvmAllocationData *srcAlloc, bool srcFound) {
const bool srcHostUSM = isSuitableUSMHostAlloc(srcAlloc, srcFound);
const bool srcDeviceUSM = isSuitableUSMDeviceAlloc(srcAlloc, srcFound) || isSuitableUSMSharedAlloc(srcAlloc, srcFound);
TransferType CommandListCoreFamilyImmediate<gfxCoreFamily>::getTransferType(NEO::SvmAllocationData *dstAlloc, NEO::SvmAllocationData *srcAlloc) {
const bool srcHostUSM = isSuitableUSMHostAlloc(srcAlloc);
const bool srcDeviceUSM = isSuitableUSMDeviceAlloc(srcAlloc) || isSuitableUSMSharedAlloc(srcAlloc);
const bool srcHostNonUSM = srcAlloc == nullptr;
const bool dstHostUSM = isSuitableUSMHostAlloc(dstAlloc, dstFound);
const bool dstDeviceUSM = isSuitableUSMDeviceAlloc(dstAlloc, dstFound) || isSuitableUSMSharedAlloc(dstAlloc, dstFound);
const bool dstHostUSM = isSuitableUSMHostAlloc(dstAlloc);
const bool dstDeviceUSM = isSuitableUSMDeviceAlloc(dstAlloc) || isSuitableUSMSharedAlloc(dstAlloc);
const bool dstHostNonUSM = dstAlloc == nullptr;
TransferType retVal;

View File

@@ -438,20 +438,20 @@ HWTEST2_F(CommandListCreate, givenFlushErrorWhenPerformingCpuMemoryCopyThenError
internalEngine,
NEO::EngineGroupType::RenderCompute,
returnValue)));
ASSERT_EQ(ZE_RESULT_SUCCESS, returnValue);
EXPECT_EQ(ZE_RESULT_SUCCESS, returnValue);
ASSERT_NE(nullptr, commandList0);
auto &commandStreamReceiver = neoDevice->getUltCommandStreamReceiver<FamilyType>();
commandStreamReceiver.flushReturnValue = SubmissionStatus::OUT_OF_MEMORY;
returnValue = commandList0->performCpuMemcpy(nullptr, nullptr, 8, nullptr, nullptr, nullptr, 1, nullptr);
ASSERT_EQ(ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY, returnValue);
CpuMemCopyInfo cpuMemCopyInfo(nullptr, nullptr, 8);
returnValue = commandList0->performCpuMemcpy(cpuMemCopyInfo, nullptr, 1, nullptr);
EXPECT_EQ(ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY, returnValue);
commandStreamReceiver.flushReturnValue = SubmissionStatus::OUT_OF_HOST_MEMORY;
returnValue = commandList0->performCpuMemcpy(nullptr, nullptr, 8, nullptr, nullptr, nullptr, 1, nullptr);
ASSERT_EQ(ZE_RESULT_ERROR_OUT_OF_HOST_MEMORY, returnValue);
returnValue = commandList0->performCpuMemcpy(cpuMemCopyInfo, nullptr, 1, nullptr);
EXPECT_EQ(ZE_RESULT_ERROR_OUT_OF_HOST_MEMORY, returnValue);
}
HWTEST2_F(CommandListCreate, givenImmediateCommandListWhenAppendingMemoryCopyThenSuccessIsReturned, IsAtLeastSkl) {
@@ -1924,22 +1924,24 @@ using AppendMemoryLockedCopyTest = Test<AppendMemoryLockedCopyFixture>;
HWTEST2_F(AppendMemoryLockedCopyTest, givenImmediateCommandListAndNonUsmHostPtrWhenPreferCopyThroughLockedPtrCalledThenReturnTrue, IsAtLeastSkl) {
MockCommandListImmediateHw<gfxCoreFamily> cmdList;
cmdList.initialize(device, NEO::EngineGroupType::RenderCompute, 0u);
NEO::SvmAllocationData *srcAllocData;
NEO::SvmAllocationData *dstAllocData;
auto srcFound = device->getDriverHandle()->findAllocationDataForRange(nonUsmHostPtr, 1024, &srcAllocData);
auto dstFound = device->getDriverHandle()->findAllocationDataForRange(devicePtr, 1024, &dstAllocData);
EXPECT_TRUE(cmdList.preferCopyThroughLockedPtr(dstAllocData, dstFound, srcAllocData, srcFound, 1024));
CpuMemCopyInfo cpuMemCopyInfo(devicePtr, nonUsmHostPtr, 1024);
auto srcFound = device->getDriverHandle()->findAllocationDataForRange(nonUsmHostPtr, 1024, &cpuMemCopyInfo.srcAllocData);
ASSERT_FALSE(srcFound);
auto dstFound = device->getDriverHandle()->findAllocationDataForRange(devicePtr, 1024, &cpuMemCopyInfo.dstAllocData);
ASSERT_TRUE(dstFound);
EXPECT_TRUE(cmdList.preferCopyThroughLockedPtr(cpuMemCopyInfo));
}
HWTEST2_F(AppendMemoryLockedCopyTest, givenImmediateCommandListWhenIsSuitableUSMDeviceAllocThenReturnCorrectValue, IsAtLeastSkl) {
MockCommandListImmediateHw<gfxCoreFamily> cmdList;
cmdList.initialize(device, NEO::EngineGroupType::RenderCompute, 0u);
NEO::SvmAllocationData *srcAllocData;
NEO::SvmAllocationData *dstAllocData;
auto srcFound = device->getDriverHandle()->findAllocationDataForRange(nonUsmHostPtr, 1024, &srcAllocData);
auto dstFound = device->getDriverHandle()->findAllocationDataForRange(devicePtr, 1024, &dstAllocData);
EXPECT_FALSE(cmdList.isSuitableUSMDeviceAlloc(srcAllocData, srcFound));
EXPECT_TRUE(cmdList.isSuitableUSMDeviceAlloc(dstAllocData, dstFound));
CpuMemCopyInfo cpuMemCopyInfo(devicePtr, nonUsmHostPtr, 1024);
auto srcFound = device->getDriverHandle()->findAllocationDataForRange(nonUsmHostPtr, 1024, &cpuMemCopyInfo.srcAllocData);
EXPECT_FALSE(srcFound);
auto dstFound = device->getDriverHandle()->findAllocationDataForRange(devicePtr, 1024, &cpuMemCopyInfo.dstAllocData);
EXPECT_TRUE(dstFound);
EXPECT_FALSE(cmdList.isSuitableUSMDeviceAlloc(cpuMemCopyInfo.srcAllocData));
EXPECT_TRUE(cmdList.isSuitableUSMDeviceAlloc(cpuMemCopyInfo.dstAllocData));
}
HWTEST2_F(AppendMemoryLockedCopyTest, givenImmediateCommandListWhenIsSuitableUSMHostAllocThenReturnCorrectValue, IsAtLeastSkl) {
@@ -1948,9 +1950,11 @@ HWTEST2_F(AppendMemoryLockedCopyTest, givenImmediateCommandListWhenIsSuitableUSM
NEO::SvmAllocationData *srcAllocData;
NEO::SvmAllocationData *dstAllocData;
auto srcFound = device->getDriverHandle()->findAllocationDataForRange(hostPtr, 1024, &srcAllocData);
EXPECT_TRUE(srcFound);
auto dstFound = device->getDriverHandle()->findAllocationDataForRange(devicePtr, 1024, &dstAllocData);
EXPECT_TRUE(cmdList.isSuitableUSMHostAlloc(srcAllocData, srcFound));
EXPECT_FALSE(cmdList.isSuitableUSMHostAlloc(dstAllocData, dstFound));
EXPECT_TRUE(dstFound);
EXPECT_TRUE(cmdList.isSuitableUSMHostAlloc(srcAllocData));
EXPECT_FALSE(cmdList.isSuitableUSMHostAlloc(dstAllocData));
}
HWTEST2_F(AppendMemoryLockedCopyTest, givenImmediateCommandListWhenIsSuitableUSMSharedAllocThenReturnCorrectValue, IsAtLeastSkl) {
@@ -1960,11 +1964,14 @@ HWTEST2_F(AppendMemoryLockedCopyTest, givenImmediateCommandListWhenIsSuitableUSM
NEO::SvmAllocationData *deviceAllocData;
NEO::SvmAllocationData *sharedAllocData;
auto hostAllocFound = device->getDriverHandle()->findAllocationDataForRange(hostPtr, 1024, &hostAllocData);
EXPECT_TRUE(hostAllocFound);
auto deviceAllocFound = device->getDriverHandle()->findAllocationDataForRange(devicePtr, 1024, &deviceAllocData);
EXPECT_TRUE(deviceAllocFound);
auto sharedAllocFound = device->getDriverHandle()->findAllocationDataForRange(sharedPtr, 1024, &sharedAllocData);
EXPECT_FALSE(cmdList.isSuitableUSMSharedAlloc(hostAllocData, hostAllocFound));
EXPECT_FALSE(cmdList.isSuitableUSMSharedAlloc(deviceAllocData, deviceAllocFound));
EXPECT_TRUE(cmdList.isSuitableUSMSharedAlloc(sharedAllocData, sharedAllocFound));
EXPECT_TRUE(sharedAllocFound);
EXPECT_FALSE(cmdList.isSuitableUSMSharedAlloc(hostAllocData));
EXPECT_FALSE(cmdList.isSuitableUSMSharedAlloc(deviceAllocData));
EXPECT_TRUE(cmdList.isSuitableUSMSharedAlloc(sharedAllocData));
}
struct LocalMemoryMultiSubDeviceFixture : public SingleRootMultiSubDeviceFixture {
@@ -1988,7 +1995,8 @@ HWTEST2_F(LocalMemoryMultiSubDeviceTest, givenImmediateCommandListWhenIsSuitable
NEO::SvmAllocationData *allocData;
auto allocFound = device->getDriverHandle()->findAllocationDataForRange(devicePtr, 2 * MemoryConstants::megaByte, &allocData);
EXPECT_FALSE(cmdList.isSuitableUSMDeviceAlloc(allocData, allocFound));
EXPECT_TRUE(allocFound);
EXPECT_FALSE(cmdList.isSuitableUSMDeviceAlloc(allocData));
context->freeMem(devicePtr);
}
@@ -1996,22 +2004,24 @@ HWTEST2_F(AppendMemoryLockedCopyTest, givenImmediateCommandListAndNonUsmHostPtrA
DebugManager.flags.ExperimentalCopyThroughLock.set(0);
MockCommandListImmediateHw<gfxCoreFamily> cmdList;
cmdList.initialize(device, NEO::EngineGroupType::RenderCompute, 0u);
NEO::SvmAllocationData *srcAllocData;
NEO::SvmAllocationData *dstAllocData;
auto srcFound = device->getDriverHandle()->findAllocationDataForRange(nonUsmHostPtr, 1024, &srcAllocData);
auto dstFound = device->getDriverHandle()->findAllocationDataForRange(devicePtr, 1024, &dstAllocData);
EXPECT_FALSE(cmdList.preferCopyThroughLockedPtr(dstAllocData, dstFound, srcAllocData, srcFound, 1024));
CpuMemCopyInfo cpuMemCopyInfo(devicePtr, nonUsmHostPtr, 1024);
auto srcFound = device->getDriverHandle()->findAllocationDataForRange(nonUsmHostPtr, 1024, &cpuMemCopyInfo.srcAllocData);
ASSERT_FALSE(srcFound);
auto dstFound = device->getDriverHandle()->findAllocationDataForRange(devicePtr, 1024, &cpuMemCopyInfo.dstAllocData);
ASSERT_TRUE(dstFound);
EXPECT_FALSE(cmdList.preferCopyThroughLockedPtr(cpuMemCopyInfo));
}
HWTEST2_F(AppendMemoryLockedCopyTest, givenImmediateCommandListAndForcingLockPtrViaEnvVariableWhenPreferCopyThroughLockPointerCalledThenTrueIsReturned, IsAtLeastSkl) {
DebugManager.flags.ExperimentalForceCopyThroughLock.set(1);
MockCommandListImmediateHw<gfxCoreFamily> cmdList;
cmdList.initialize(device, NEO::EngineGroupType::RenderCompute, 0u);
NEO::SvmAllocationData *srcAllocData;
NEO::SvmAllocationData *dstAllocData;
auto srcFound = device->getDriverHandle()->findAllocationDataForRange(nonUsmHostPtr, 1024, &srcAllocData);
auto dstFound = device->getDriverHandle()->findAllocationDataForRange(devicePtr, 1024, &dstAllocData);
EXPECT_TRUE(cmdList.preferCopyThroughLockedPtr(dstAllocData, dstFound, srcAllocData, srcFound, 1024));
CpuMemCopyInfo cpuMemCopyInfo(devicePtr, nonUsmHostPtr, 1024);
auto srcFound = device->getDriverHandle()->findAllocationDataForRange(nonUsmHostPtr, 1024, &cpuMemCopyInfo.srcAllocData);
ASSERT_FALSE(srcFound);
auto dstFound = device->getDriverHandle()->findAllocationDataForRange(devicePtr, 1024, &cpuMemCopyInfo.dstAllocData);
ASSERT_TRUE(dstFound);
EXPECT_TRUE(cmdList.preferCopyThroughLockedPtr(cpuMemCopyInfo));
}
HWTEST2_F(AppendMemoryLockedCopyTest, givenImmediateCommandListWhenGetTransferTypeThenReturnCorrectValue, IsAtLeastSkl) {
@@ -2024,26 +2034,30 @@ HWTEST2_F(AppendMemoryLockedCopyTest, givenImmediateCommandListWhenGetTransferTy
NEO::SvmAllocationData *sharedUSMAllocData;
const auto hostUSMFound = device->getDriverHandle()->findAllocationDataForRange(hostPtr, 1024, &hostUSMAllocData);
EXPECT_TRUE(hostUSMFound);
const auto hostNonUSMFound = device->getDriverHandle()->findAllocationDataForRange(nonUsmHostPtr, 1024, &hostNonUSMAllocData);
EXPECT_FALSE(hostNonUSMFound);
const auto deviceUSMFound = device->getDriverHandle()->findAllocationDataForRange(devicePtr, 1024, &deviceUSMAllocData);
EXPECT_TRUE(deviceUSMFound);
const auto sharedUSMFound = device->getDriverHandle()->findAllocationDataForRange(sharedPtr, 1024, &sharedUSMAllocData);
EXPECT_TRUE(sharedUSMFound);
EXPECT_EQ(HOST_NON_USM_TO_HOST_USM, cmdList.getTransferType(hostUSMAllocData, hostUSMFound, hostNonUSMAllocData, hostNonUSMFound));
EXPECT_EQ(HOST_NON_USM_TO_DEVICE_USM, cmdList.getTransferType(deviceUSMAllocData, deviceUSMFound, hostNonUSMAllocData, hostNonUSMFound));
EXPECT_EQ(HOST_NON_USM_TO_DEVICE_USM, cmdList.getTransferType(sharedUSMAllocData, sharedUSMFound, hostNonUSMAllocData, hostNonUSMFound));
EXPECT_EQ(HOST_NON_USM_TO_HOST_NON_USM, cmdList.getTransferType(hostNonUSMAllocData, hostNonUSMFound, hostNonUSMAllocData, hostNonUSMFound));
EXPECT_EQ(HOST_NON_USM_TO_HOST_USM, cmdList.getTransferType(hostUSMAllocData, hostNonUSMAllocData));
EXPECT_EQ(HOST_NON_USM_TO_DEVICE_USM, cmdList.getTransferType(deviceUSMAllocData, hostNonUSMAllocData));
EXPECT_EQ(HOST_NON_USM_TO_DEVICE_USM, cmdList.getTransferType(sharedUSMAllocData, hostNonUSMAllocData));
EXPECT_EQ(HOST_NON_USM_TO_HOST_NON_USM, cmdList.getTransferType(hostNonUSMAllocData, hostNonUSMAllocData));
EXPECT_EQ(HOST_USM_TO_HOST_USM, cmdList.getTransferType(hostUSMAllocData, hostUSMFound, hostUSMAllocData, hostUSMFound));
EXPECT_EQ(HOST_USM_TO_DEVICE_USM, cmdList.getTransferType(deviceUSMAllocData, deviceUSMFound, hostUSMAllocData, hostUSMFound));
EXPECT_EQ(HOST_USM_TO_DEVICE_USM, cmdList.getTransferType(sharedUSMAllocData, sharedUSMFound, hostUSMAllocData, hostUSMFound));
EXPECT_EQ(HOST_USM_TO_HOST_NON_USM, cmdList.getTransferType(hostNonUSMAllocData, hostNonUSMFound, hostUSMAllocData, hostUSMFound));
EXPECT_EQ(HOST_USM_TO_HOST_USM, cmdList.getTransferType(hostUSMAllocData, hostUSMAllocData));
EXPECT_EQ(HOST_USM_TO_DEVICE_USM, cmdList.getTransferType(deviceUSMAllocData, hostUSMAllocData));
EXPECT_EQ(HOST_USM_TO_DEVICE_USM, cmdList.getTransferType(sharedUSMAllocData, hostUSMAllocData));
EXPECT_EQ(HOST_USM_TO_HOST_NON_USM, cmdList.getTransferType(hostNonUSMAllocData, hostUSMAllocData));
EXPECT_EQ(DEVICE_USM_TO_HOST_USM, cmdList.getTransferType(hostUSMAllocData, hostUSMFound, deviceUSMAllocData, deviceUSMFound));
EXPECT_EQ(DEVICE_USM_TO_HOST_USM, cmdList.getTransferType(hostUSMAllocData, hostUSMFound, sharedUSMAllocData, sharedUSMFound));
EXPECT_EQ(DEVICE_USM_TO_DEVICE_USM, cmdList.getTransferType(deviceUSMAllocData, deviceUSMFound, deviceUSMAllocData, deviceUSMFound));
EXPECT_EQ(DEVICE_USM_TO_DEVICE_USM, cmdList.getTransferType(sharedUSMAllocData, sharedUSMFound, sharedUSMAllocData, sharedUSMFound));
EXPECT_EQ(DEVICE_USM_TO_HOST_NON_USM, cmdList.getTransferType(hostNonUSMAllocData, hostNonUSMFound, deviceUSMAllocData, deviceUSMFound));
EXPECT_EQ(DEVICE_USM_TO_HOST_NON_USM, cmdList.getTransferType(hostNonUSMAllocData, hostNonUSMFound, sharedUSMAllocData, sharedUSMFound));
EXPECT_EQ(DEVICE_USM_TO_HOST_USM, cmdList.getTransferType(hostUSMAllocData, deviceUSMAllocData));
EXPECT_EQ(DEVICE_USM_TO_HOST_USM, cmdList.getTransferType(hostUSMAllocData, sharedUSMAllocData));
EXPECT_EQ(DEVICE_USM_TO_DEVICE_USM, cmdList.getTransferType(deviceUSMAllocData, deviceUSMAllocData));
EXPECT_EQ(DEVICE_USM_TO_DEVICE_USM, cmdList.getTransferType(sharedUSMAllocData, sharedUSMAllocData));
EXPECT_EQ(DEVICE_USM_TO_HOST_NON_USM, cmdList.getTransferType(hostNonUSMAllocData, deviceUSMAllocData));
EXPECT_EQ(DEVICE_USM_TO_HOST_NON_USM, cmdList.getTransferType(hostNonUSMAllocData, sharedUSMAllocData));
}
HWTEST2_F(AppendMemoryLockedCopyTest, givenImmediateCommandListAndNonUsmHostPtrWhenCopyH2DThenLockPtr, IsAtLeastSkl) {
@@ -2161,6 +2175,7 @@ HWTEST2_F(AppendMemoryLockedCopyTest, givenImmediateCommandListAndNonUsmHostPtrW
NEO::SvmAllocationData *allocData;
device->getDriverHandle()->findAllocationDataForRange(devicePtr, 1024, &allocData);
auto dstAlloc = allocData->gpuAllocations.getGraphicsAllocation(device->getRootDeviceIndex());
auto lockedPtr = reinterpret_cast<char *>(dstAlloc->getLockedPtr());
EXPECT_EQ(0, memcmp(lockedPtr, nonUsmHostPtr, 1024));
}