mirror of
https://github.com/intel/compute-runtime.git
synced 2025-12-30 01:35:20 +08:00
refactor(l0): cpu memory copy on immediate cmdlists
Remove redundant boolean whether svm data was found Pass cpu copy parameters in a structure Related-To: NEO-7553 Signed-off-by: Dominik Dabek <dominik.dabek@intel.com>
This commit is contained in:
committed by
Compute-Runtime-Automation
parent
439aa6c87f
commit
d97ac16c59
@@ -21,6 +21,16 @@ struct EventPool;
|
||||
struct Event;
|
||||
inline constexpr size_t maxImmediateCommandSize = 4 * MemoryConstants::kiloByte;
|
||||
|
||||
struct CpuMemCopyInfo {
|
||||
void *const dstPtr;
|
||||
const void *const srcPtr;
|
||||
const size_t size;
|
||||
NEO::SvmAllocationData *dstAllocData{nullptr};
|
||||
NEO::SvmAllocationData *srcAllocData{nullptr};
|
||||
|
||||
CpuMemCopyInfo(void *dstPtr, const void *srcPtr, size_t size) : dstPtr(dstPtr), srcPtr(srcPtr), size(size) {}
|
||||
};
|
||||
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
struct CommandListCoreFamilyImmediate : public CommandListCoreFamily<gfxCoreFamily> {
|
||||
using GfxFamily = typename NEO::GfxFamilyMapper<gfxCoreFamily>::GfxFamily;
|
||||
@@ -141,15 +151,15 @@ struct CommandListCoreFamilyImmediate : public CommandListCoreFamily<gfxCoreFami
|
||||
void createLogicalStateHelper() override {}
|
||||
NEO::LogicalStateHelper *getLogicalStateHelper() const override;
|
||||
|
||||
bool preferCopyThroughLockedPtr(NEO::SvmAllocationData *dstAlloc, bool dstFound, NEO::SvmAllocationData *srcAlloc, bool srcFound, size_t size);
|
||||
bool isSuitableUSMHostAlloc(NEO::SvmAllocationData *alloc, bool allocFound);
|
||||
bool isSuitableUSMDeviceAlloc(NEO::SvmAllocationData *alloc, bool allocFound);
|
||||
bool isSuitableUSMSharedAlloc(NEO::SvmAllocationData *alloc, bool allocFound);
|
||||
ze_result_t performCpuMemcpy(void *dstptr, const void *srcptr, size_t size, NEO::SvmAllocationData *dstAlloc, NEO::SvmAllocationData *srcAlloc, ze_event_handle_t hSignalEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents);
|
||||
bool preferCopyThroughLockedPtr(CpuMemCopyInfo &cpuMemCopyInfo);
|
||||
bool isSuitableUSMHostAlloc(NEO::SvmAllocationData *alloc);
|
||||
bool isSuitableUSMDeviceAlloc(NEO::SvmAllocationData *alloc);
|
||||
bool isSuitableUSMSharedAlloc(NEO::SvmAllocationData *alloc);
|
||||
ze_result_t performCpuMemcpy(const CpuMemCopyInfo &cpuMemCopyInfo, ze_event_handle_t hSignalEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents);
|
||||
void *obtainLockedPtrFromDevice(NEO::SvmAllocationData *alloc, void *ptr);
|
||||
bool waitForEventsFromHost();
|
||||
void checkWaitEventsState(uint32_t numWaitEvents, ze_event_handle_t *waitEventList);
|
||||
TransferType getTransferType(NEO::SvmAllocationData *dstAlloc, bool dstFound, NEO::SvmAllocationData *srcAlloc, bool srcFound);
|
||||
TransferType getTransferType(NEO::SvmAllocationData *dstAlloc, NEO::SvmAllocationData *srcAlloc);
|
||||
|
||||
protected:
|
||||
void printKernelsPrintfOutput(bool hangDetected);
|
||||
@@ -158,5 +168,4 @@ struct CommandListCoreFamilyImmediate : public CommandListCoreFamily<gfxCoreFami
|
||||
|
||||
template <PRODUCT_FAMILY gfxProductFamily>
|
||||
struct CommandListImmediateProductFamily;
|
||||
|
||||
} // namespace L0
|
||||
|
||||
@@ -353,13 +353,11 @@ ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::appendMemoryCopy(
|
||||
}
|
||||
|
||||
ze_result_t ret;
|
||||
|
||||
NEO::SvmAllocationData *srcAllocData = nullptr;
|
||||
NEO::SvmAllocationData *dstAllocData = nullptr;
|
||||
bool srcAllocFound = this->device->getDriverHandle()->findAllocationDataForRange(const_cast<void *>(srcptr), size, &srcAllocData);
|
||||
bool dstAllocFound = this->device->getDriverHandle()->findAllocationDataForRange(dstptr, size, &dstAllocData);
|
||||
if (preferCopyThroughLockedPtr(dstAllocData, dstAllocFound, srcAllocData, srcAllocFound, size)) {
|
||||
return performCpuMemcpy(dstptr, srcptr, size, dstAllocData, srcAllocData, hSignalEvent, numWaitEvents, phWaitEvents);
|
||||
CpuMemCopyInfo cpuMemCopyInfo(dstptr, srcptr, size);
|
||||
this->device->getDriverHandle()->findAllocationDataForRange(const_cast<void *>(srcptr), size, &cpuMemCopyInfo.srcAllocData);
|
||||
this->device->getDriverHandle()->findAllocationDataForRange(dstptr, size, &cpuMemCopyInfo.dstAllocData);
|
||||
if (preferCopyThroughLockedPtr(cpuMemCopyInfo)) {
|
||||
return performCpuMemcpy(cpuMemCopyInfo, hSignalEvent, numWaitEvents, phWaitEvents);
|
||||
}
|
||||
|
||||
auto isSplitNeeded = this->isAppendSplitNeeded(dstptr, srcptr, size);
|
||||
@@ -639,7 +637,7 @@ ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::flushImmediate(ze_res
|
||||
}
|
||||
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
bool CommandListCoreFamilyImmediate<gfxCoreFamily>::preferCopyThroughLockedPtr(NEO::SvmAllocationData *dstAlloc, bool dstFound, NEO::SvmAllocationData *srcAlloc, bool srcFound, size_t size) {
|
||||
bool CommandListCoreFamilyImmediate<gfxCoreFamily>::preferCopyThroughLockedPtr(CpuMemCopyInfo &cpuMemCopyInfo) {
|
||||
if (NEO::DebugManager.flags.ExperimentalForceCopyThroughLock.get() == 1) {
|
||||
return true;
|
||||
}
|
||||
@@ -655,31 +653,30 @@ bool CommandListCoreFamilyImmediate<gfxCoreFamily>::preferCopyThroughLockedPtr(N
|
||||
|
||||
auto &gfxCoreHelper = this->device->getGfxCoreHelper();
|
||||
if (gfxCoreHelper.copyThroughLockedPtrEnabled(this->device->getHwInfo())) {
|
||||
return (!srcFound && isSuitableUSMDeviceAlloc(dstAlloc, dstFound) && size <= h2DThreshold) ||
|
||||
(!dstFound && isSuitableUSMDeviceAlloc(srcAlloc, srcFound) && size <= d2HThreshold);
|
||||
return (!cpuMemCopyInfo.srcAllocData && isSuitableUSMDeviceAlloc(cpuMemCopyInfo.dstAllocData) && cpuMemCopyInfo.size <= h2DThreshold) ||
|
||||
(!cpuMemCopyInfo.dstAllocData && isSuitableUSMDeviceAlloc(cpuMemCopyInfo.srcAllocData) && cpuMemCopyInfo.size <= d2HThreshold);
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
bool CommandListCoreFamilyImmediate<gfxCoreFamily>::isSuitableUSMHostAlloc(NEO::SvmAllocationData *alloc, bool allocFound) {
|
||||
return allocFound && (alloc->memoryType == InternalMemoryType::HOST_UNIFIED_MEMORY);
|
||||
bool CommandListCoreFamilyImmediate<gfxCoreFamily>::isSuitableUSMHostAlloc(NEO::SvmAllocationData *alloc) {
|
||||
return alloc && (alloc->memoryType == InternalMemoryType::HOST_UNIFIED_MEMORY);
|
||||
}
|
||||
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
bool CommandListCoreFamilyImmediate<gfxCoreFamily>::isSuitableUSMDeviceAlloc(NEO::SvmAllocationData *alloc, bool allocFound) {
|
||||
return allocFound && (alloc->memoryType == InternalMemoryType::DEVICE_UNIFIED_MEMORY) &&
|
||||
bool CommandListCoreFamilyImmediate<gfxCoreFamily>::isSuitableUSMDeviceAlloc(NEO::SvmAllocationData *alloc) {
|
||||
return alloc && (alloc->memoryType == InternalMemoryType::DEVICE_UNIFIED_MEMORY) &&
|
||||
alloc->gpuAllocations.getGraphicsAllocation(this->device->getRootDeviceIndex())->storageInfo.getNumBanks() == 1;
|
||||
}
|
||||
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
bool CommandListCoreFamilyImmediate<gfxCoreFamily>::isSuitableUSMSharedAlloc(NEO::SvmAllocationData *alloc, bool allocFound) {
|
||||
return allocFound && (alloc->memoryType == InternalMemoryType::SHARED_UNIFIED_MEMORY);
|
||||
bool CommandListCoreFamilyImmediate<gfxCoreFamily>::isSuitableUSMSharedAlloc(NEO::SvmAllocationData *alloc) {
|
||||
return alloc && (alloc->memoryType == InternalMemoryType::SHARED_UNIFIED_MEMORY);
|
||||
}
|
||||
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::performCpuMemcpy(void *dstptr, const void *srcptr, size_t size, NEO::SvmAllocationData *dstAlloc, NEO::SvmAllocationData *srcAlloc, ze_event_handle_t hSignalEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) {
|
||||
|
||||
ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::performCpuMemcpy(const CpuMemCopyInfo &cpuMemCopyInfo, ze_event_handle_t hSignalEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) {
|
||||
bool needsBarrier = (numWaitEvents > 0);
|
||||
if (needsBarrier) {
|
||||
this->appendBarrier(nullptr, numWaitEvents, phWaitEvents);
|
||||
@@ -697,11 +694,11 @@ ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::performCpuMemcpy(void
|
||||
signalEvent = Event::fromHandle(hSignalEvent);
|
||||
}
|
||||
|
||||
auto srcLockPointer = obtainLockedPtrFromDevice(srcAlloc, const_cast<void *>(srcptr));
|
||||
auto dstLockPointer = obtainLockedPtrFromDevice(dstAlloc, dstptr);
|
||||
auto srcLockPointer = obtainLockedPtrFromDevice(cpuMemCopyInfo.srcAllocData, const_cast<void *>(cpuMemCopyInfo.srcPtr));
|
||||
auto dstLockPointer = obtainLockedPtrFromDevice(cpuMemCopyInfo.dstAllocData, cpuMemCopyInfo.dstPtr);
|
||||
|
||||
const void *cpuMemcpySrcPtr = srcLockPointer ? srcLockPointer : srcptr;
|
||||
void *cpuMemcpyDstPtr = dstLockPointer ? dstLockPointer : dstptr;
|
||||
const void *cpuMemcpySrcPtr = srcLockPointer ? srcLockPointer : cpuMemCopyInfo.srcPtr;
|
||||
void *cpuMemcpyDstPtr = dstLockPointer ? dstLockPointer : cpuMemCopyInfo.dstPtr;
|
||||
|
||||
if (this->dependenciesPresent) {
|
||||
auto timeoutMicroseconds = NEO::TimeoutControls::maxTimeout;
|
||||
@@ -716,12 +713,13 @@ ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::performCpuMemcpy(void
|
||||
signalEvent->setGpuStartTimestamp();
|
||||
}
|
||||
|
||||
memcpy_s(cpuMemcpyDstPtr, size, cpuMemcpySrcPtr, size);
|
||||
memcpy_s(cpuMemcpyDstPtr, cpuMemCopyInfo.size, cpuMemcpySrcPtr, cpuMemCopyInfo.size);
|
||||
|
||||
if (signalEvent) {
|
||||
signalEvent->setGpuEndTimestamp();
|
||||
signalEvent->hostSignal();
|
||||
}
|
||||
|
||||
return ZE_RESULT_SUCCESS;
|
||||
}
|
||||
|
||||
@@ -739,6 +737,7 @@ void *CommandListCoreFamilyImmediate<gfxCoreFamily>::obtainLockedPtrFromDevice(N
|
||||
if (!alloc->isLocked()) {
|
||||
this->device->getDriverHandle()->getMemoryManager()->lockResource(alloc);
|
||||
}
|
||||
|
||||
auto gpuAddress = allocData->gpuAllocations.getGraphicsAllocation(this->device->getRootDeviceIndex())->getGpuAddress();
|
||||
auto offset = ptrDiff(ptr, gpuAddress);
|
||||
return ptrOffset(alloc->getLockedPtr(), offset);
|
||||
@@ -752,13 +751,13 @@ void CommandListCoreFamilyImmediate<gfxCoreFamily>::checkWaitEventsState(uint32_
|
||||
}
|
||||
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
TransferType CommandListCoreFamilyImmediate<gfxCoreFamily>::getTransferType(NEO::SvmAllocationData *dstAlloc, bool dstFound, NEO::SvmAllocationData *srcAlloc, bool srcFound) {
|
||||
const bool srcHostUSM = isSuitableUSMHostAlloc(srcAlloc, srcFound);
|
||||
const bool srcDeviceUSM = isSuitableUSMDeviceAlloc(srcAlloc, srcFound) || isSuitableUSMSharedAlloc(srcAlloc, srcFound);
|
||||
TransferType CommandListCoreFamilyImmediate<gfxCoreFamily>::getTransferType(NEO::SvmAllocationData *dstAlloc, NEO::SvmAllocationData *srcAlloc) {
|
||||
const bool srcHostUSM = isSuitableUSMHostAlloc(srcAlloc);
|
||||
const bool srcDeviceUSM = isSuitableUSMDeviceAlloc(srcAlloc) || isSuitableUSMSharedAlloc(srcAlloc);
|
||||
const bool srcHostNonUSM = srcAlloc == nullptr;
|
||||
|
||||
const bool dstHostUSM = isSuitableUSMHostAlloc(dstAlloc, dstFound);
|
||||
const bool dstDeviceUSM = isSuitableUSMDeviceAlloc(dstAlloc, dstFound) || isSuitableUSMSharedAlloc(dstAlloc, dstFound);
|
||||
const bool dstHostUSM = isSuitableUSMHostAlloc(dstAlloc);
|
||||
const bool dstDeviceUSM = isSuitableUSMDeviceAlloc(dstAlloc) || isSuitableUSMSharedAlloc(dstAlloc);
|
||||
const bool dstHostNonUSM = dstAlloc == nullptr;
|
||||
|
||||
TransferType retVal;
|
||||
|
||||
@@ -438,20 +438,20 @@ HWTEST2_F(CommandListCreate, givenFlushErrorWhenPerformingCpuMemoryCopyThenError
|
||||
internalEngine,
|
||||
NEO::EngineGroupType::RenderCompute,
|
||||
returnValue)));
|
||||
ASSERT_EQ(ZE_RESULT_SUCCESS, returnValue);
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, returnValue);
|
||||
ASSERT_NE(nullptr, commandList0);
|
||||
|
||||
auto &commandStreamReceiver = neoDevice->getUltCommandStreamReceiver<FamilyType>();
|
||||
|
||||
commandStreamReceiver.flushReturnValue = SubmissionStatus::OUT_OF_MEMORY;
|
||||
|
||||
returnValue = commandList0->performCpuMemcpy(nullptr, nullptr, 8, nullptr, nullptr, nullptr, 1, nullptr);
|
||||
ASSERT_EQ(ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY, returnValue);
|
||||
CpuMemCopyInfo cpuMemCopyInfo(nullptr, nullptr, 8);
|
||||
returnValue = commandList0->performCpuMemcpy(cpuMemCopyInfo, nullptr, 1, nullptr);
|
||||
EXPECT_EQ(ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY, returnValue);
|
||||
|
||||
commandStreamReceiver.flushReturnValue = SubmissionStatus::OUT_OF_HOST_MEMORY;
|
||||
|
||||
returnValue = commandList0->performCpuMemcpy(nullptr, nullptr, 8, nullptr, nullptr, nullptr, 1, nullptr);
|
||||
ASSERT_EQ(ZE_RESULT_ERROR_OUT_OF_HOST_MEMORY, returnValue);
|
||||
returnValue = commandList0->performCpuMemcpy(cpuMemCopyInfo, nullptr, 1, nullptr);
|
||||
EXPECT_EQ(ZE_RESULT_ERROR_OUT_OF_HOST_MEMORY, returnValue);
|
||||
}
|
||||
|
||||
HWTEST2_F(CommandListCreate, givenImmediateCommandListWhenAppendingMemoryCopyThenSuccessIsReturned, IsAtLeastSkl) {
|
||||
@@ -1924,22 +1924,24 @@ using AppendMemoryLockedCopyTest = Test<AppendMemoryLockedCopyFixture>;
|
||||
HWTEST2_F(AppendMemoryLockedCopyTest, givenImmediateCommandListAndNonUsmHostPtrWhenPreferCopyThroughLockedPtrCalledThenReturnTrue, IsAtLeastSkl) {
|
||||
MockCommandListImmediateHw<gfxCoreFamily> cmdList;
|
||||
cmdList.initialize(device, NEO::EngineGroupType::RenderCompute, 0u);
|
||||
NEO::SvmAllocationData *srcAllocData;
|
||||
NEO::SvmAllocationData *dstAllocData;
|
||||
auto srcFound = device->getDriverHandle()->findAllocationDataForRange(nonUsmHostPtr, 1024, &srcAllocData);
|
||||
auto dstFound = device->getDriverHandle()->findAllocationDataForRange(devicePtr, 1024, &dstAllocData);
|
||||
EXPECT_TRUE(cmdList.preferCopyThroughLockedPtr(dstAllocData, dstFound, srcAllocData, srcFound, 1024));
|
||||
CpuMemCopyInfo cpuMemCopyInfo(devicePtr, nonUsmHostPtr, 1024);
|
||||
auto srcFound = device->getDriverHandle()->findAllocationDataForRange(nonUsmHostPtr, 1024, &cpuMemCopyInfo.srcAllocData);
|
||||
ASSERT_FALSE(srcFound);
|
||||
auto dstFound = device->getDriverHandle()->findAllocationDataForRange(devicePtr, 1024, &cpuMemCopyInfo.dstAllocData);
|
||||
ASSERT_TRUE(dstFound);
|
||||
EXPECT_TRUE(cmdList.preferCopyThroughLockedPtr(cpuMemCopyInfo));
|
||||
}
|
||||
|
||||
HWTEST2_F(AppendMemoryLockedCopyTest, givenImmediateCommandListWhenIsSuitableUSMDeviceAllocThenReturnCorrectValue, IsAtLeastSkl) {
|
||||
MockCommandListImmediateHw<gfxCoreFamily> cmdList;
|
||||
cmdList.initialize(device, NEO::EngineGroupType::RenderCompute, 0u);
|
||||
NEO::SvmAllocationData *srcAllocData;
|
||||
NEO::SvmAllocationData *dstAllocData;
|
||||
auto srcFound = device->getDriverHandle()->findAllocationDataForRange(nonUsmHostPtr, 1024, &srcAllocData);
|
||||
auto dstFound = device->getDriverHandle()->findAllocationDataForRange(devicePtr, 1024, &dstAllocData);
|
||||
EXPECT_FALSE(cmdList.isSuitableUSMDeviceAlloc(srcAllocData, srcFound));
|
||||
EXPECT_TRUE(cmdList.isSuitableUSMDeviceAlloc(dstAllocData, dstFound));
|
||||
CpuMemCopyInfo cpuMemCopyInfo(devicePtr, nonUsmHostPtr, 1024);
|
||||
auto srcFound = device->getDriverHandle()->findAllocationDataForRange(nonUsmHostPtr, 1024, &cpuMemCopyInfo.srcAllocData);
|
||||
EXPECT_FALSE(srcFound);
|
||||
auto dstFound = device->getDriverHandle()->findAllocationDataForRange(devicePtr, 1024, &cpuMemCopyInfo.dstAllocData);
|
||||
EXPECT_TRUE(dstFound);
|
||||
EXPECT_FALSE(cmdList.isSuitableUSMDeviceAlloc(cpuMemCopyInfo.srcAllocData));
|
||||
EXPECT_TRUE(cmdList.isSuitableUSMDeviceAlloc(cpuMemCopyInfo.dstAllocData));
|
||||
}
|
||||
|
||||
HWTEST2_F(AppendMemoryLockedCopyTest, givenImmediateCommandListWhenIsSuitableUSMHostAllocThenReturnCorrectValue, IsAtLeastSkl) {
|
||||
@@ -1948,9 +1950,11 @@ HWTEST2_F(AppendMemoryLockedCopyTest, givenImmediateCommandListWhenIsSuitableUSM
|
||||
NEO::SvmAllocationData *srcAllocData;
|
||||
NEO::SvmAllocationData *dstAllocData;
|
||||
auto srcFound = device->getDriverHandle()->findAllocationDataForRange(hostPtr, 1024, &srcAllocData);
|
||||
EXPECT_TRUE(srcFound);
|
||||
auto dstFound = device->getDriverHandle()->findAllocationDataForRange(devicePtr, 1024, &dstAllocData);
|
||||
EXPECT_TRUE(cmdList.isSuitableUSMHostAlloc(srcAllocData, srcFound));
|
||||
EXPECT_FALSE(cmdList.isSuitableUSMHostAlloc(dstAllocData, dstFound));
|
||||
EXPECT_TRUE(dstFound);
|
||||
EXPECT_TRUE(cmdList.isSuitableUSMHostAlloc(srcAllocData));
|
||||
EXPECT_FALSE(cmdList.isSuitableUSMHostAlloc(dstAllocData));
|
||||
}
|
||||
|
||||
HWTEST2_F(AppendMemoryLockedCopyTest, givenImmediateCommandListWhenIsSuitableUSMSharedAllocThenReturnCorrectValue, IsAtLeastSkl) {
|
||||
@@ -1960,11 +1964,14 @@ HWTEST2_F(AppendMemoryLockedCopyTest, givenImmediateCommandListWhenIsSuitableUSM
|
||||
NEO::SvmAllocationData *deviceAllocData;
|
||||
NEO::SvmAllocationData *sharedAllocData;
|
||||
auto hostAllocFound = device->getDriverHandle()->findAllocationDataForRange(hostPtr, 1024, &hostAllocData);
|
||||
EXPECT_TRUE(hostAllocFound);
|
||||
auto deviceAllocFound = device->getDriverHandle()->findAllocationDataForRange(devicePtr, 1024, &deviceAllocData);
|
||||
EXPECT_TRUE(deviceAllocFound);
|
||||
auto sharedAllocFound = device->getDriverHandle()->findAllocationDataForRange(sharedPtr, 1024, &sharedAllocData);
|
||||
EXPECT_FALSE(cmdList.isSuitableUSMSharedAlloc(hostAllocData, hostAllocFound));
|
||||
EXPECT_FALSE(cmdList.isSuitableUSMSharedAlloc(deviceAllocData, deviceAllocFound));
|
||||
EXPECT_TRUE(cmdList.isSuitableUSMSharedAlloc(sharedAllocData, sharedAllocFound));
|
||||
EXPECT_TRUE(sharedAllocFound);
|
||||
EXPECT_FALSE(cmdList.isSuitableUSMSharedAlloc(hostAllocData));
|
||||
EXPECT_FALSE(cmdList.isSuitableUSMSharedAlloc(deviceAllocData));
|
||||
EXPECT_TRUE(cmdList.isSuitableUSMSharedAlloc(sharedAllocData));
|
||||
}
|
||||
|
||||
struct LocalMemoryMultiSubDeviceFixture : public SingleRootMultiSubDeviceFixture {
|
||||
@@ -1988,7 +1995,8 @@ HWTEST2_F(LocalMemoryMultiSubDeviceTest, givenImmediateCommandListWhenIsSuitable
|
||||
|
||||
NEO::SvmAllocationData *allocData;
|
||||
auto allocFound = device->getDriverHandle()->findAllocationDataForRange(devicePtr, 2 * MemoryConstants::megaByte, &allocData);
|
||||
EXPECT_FALSE(cmdList.isSuitableUSMDeviceAlloc(allocData, allocFound));
|
||||
EXPECT_TRUE(allocFound);
|
||||
EXPECT_FALSE(cmdList.isSuitableUSMDeviceAlloc(allocData));
|
||||
context->freeMem(devicePtr);
|
||||
}
|
||||
|
||||
@@ -1996,22 +2004,24 @@ HWTEST2_F(AppendMemoryLockedCopyTest, givenImmediateCommandListAndNonUsmHostPtrA
|
||||
DebugManager.flags.ExperimentalCopyThroughLock.set(0);
|
||||
MockCommandListImmediateHw<gfxCoreFamily> cmdList;
|
||||
cmdList.initialize(device, NEO::EngineGroupType::RenderCompute, 0u);
|
||||
NEO::SvmAllocationData *srcAllocData;
|
||||
NEO::SvmAllocationData *dstAllocData;
|
||||
auto srcFound = device->getDriverHandle()->findAllocationDataForRange(nonUsmHostPtr, 1024, &srcAllocData);
|
||||
auto dstFound = device->getDriverHandle()->findAllocationDataForRange(devicePtr, 1024, &dstAllocData);
|
||||
EXPECT_FALSE(cmdList.preferCopyThroughLockedPtr(dstAllocData, dstFound, srcAllocData, srcFound, 1024));
|
||||
CpuMemCopyInfo cpuMemCopyInfo(devicePtr, nonUsmHostPtr, 1024);
|
||||
auto srcFound = device->getDriverHandle()->findAllocationDataForRange(nonUsmHostPtr, 1024, &cpuMemCopyInfo.srcAllocData);
|
||||
ASSERT_FALSE(srcFound);
|
||||
auto dstFound = device->getDriverHandle()->findAllocationDataForRange(devicePtr, 1024, &cpuMemCopyInfo.dstAllocData);
|
||||
ASSERT_TRUE(dstFound);
|
||||
EXPECT_FALSE(cmdList.preferCopyThroughLockedPtr(cpuMemCopyInfo));
|
||||
}
|
||||
|
||||
HWTEST2_F(AppendMemoryLockedCopyTest, givenImmediateCommandListAndForcingLockPtrViaEnvVariableWhenPreferCopyThroughLockPointerCalledThenTrueIsReturned, IsAtLeastSkl) {
|
||||
DebugManager.flags.ExperimentalForceCopyThroughLock.set(1);
|
||||
MockCommandListImmediateHw<gfxCoreFamily> cmdList;
|
||||
cmdList.initialize(device, NEO::EngineGroupType::RenderCompute, 0u);
|
||||
NEO::SvmAllocationData *srcAllocData;
|
||||
NEO::SvmAllocationData *dstAllocData;
|
||||
auto srcFound = device->getDriverHandle()->findAllocationDataForRange(nonUsmHostPtr, 1024, &srcAllocData);
|
||||
auto dstFound = device->getDriverHandle()->findAllocationDataForRange(devicePtr, 1024, &dstAllocData);
|
||||
EXPECT_TRUE(cmdList.preferCopyThroughLockedPtr(dstAllocData, dstFound, srcAllocData, srcFound, 1024));
|
||||
CpuMemCopyInfo cpuMemCopyInfo(devicePtr, nonUsmHostPtr, 1024);
|
||||
auto srcFound = device->getDriverHandle()->findAllocationDataForRange(nonUsmHostPtr, 1024, &cpuMemCopyInfo.srcAllocData);
|
||||
ASSERT_FALSE(srcFound);
|
||||
auto dstFound = device->getDriverHandle()->findAllocationDataForRange(devicePtr, 1024, &cpuMemCopyInfo.dstAllocData);
|
||||
ASSERT_TRUE(dstFound);
|
||||
EXPECT_TRUE(cmdList.preferCopyThroughLockedPtr(cpuMemCopyInfo));
|
||||
}
|
||||
|
||||
HWTEST2_F(AppendMemoryLockedCopyTest, givenImmediateCommandListWhenGetTransferTypeThenReturnCorrectValue, IsAtLeastSkl) {
|
||||
@@ -2024,26 +2034,30 @@ HWTEST2_F(AppendMemoryLockedCopyTest, givenImmediateCommandListWhenGetTransferTy
|
||||
NEO::SvmAllocationData *sharedUSMAllocData;
|
||||
|
||||
const auto hostUSMFound = device->getDriverHandle()->findAllocationDataForRange(hostPtr, 1024, &hostUSMAllocData);
|
||||
EXPECT_TRUE(hostUSMFound);
|
||||
const auto hostNonUSMFound = device->getDriverHandle()->findAllocationDataForRange(nonUsmHostPtr, 1024, &hostNonUSMAllocData);
|
||||
EXPECT_FALSE(hostNonUSMFound);
|
||||
const auto deviceUSMFound = device->getDriverHandle()->findAllocationDataForRange(devicePtr, 1024, &deviceUSMAllocData);
|
||||
EXPECT_TRUE(deviceUSMFound);
|
||||
const auto sharedUSMFound = device->getDriverHandle()->findAllocationDataForRange(sharedPtr, 1024, &sharedUSMAllocData);
|
||||
EXPECT_TRUE(sharedUSMFound);
|
||||
|
||||
EXPECT_EQ(HOST_NON_USM_TO_HOST_USM, cmdList.getTransferType(hostUSMAllocData, hostUSMFound, hostNonUSMAllocData, hostNonUSMFound));
|
||||
EXPECT_EQ(HOST_NON_USM_TO_DEVICE_USM, cmdList.getTransferType(deviceUSMAllocData, deviceUSMFound, hostNonUSMAllocData, hostNonUSMFound));
|
||||
EXPECT_EQ(HOST_NON_USM_TO_DEVICE_USM, cmdList.getTransferType(sharedUSMAllocData, sharedUSMFound, hostNonUSMAllocData, hostNonUSMFound));
|
||||
EXPECT_EQ(HOST_NON_USM_TO_HOST_NON_USM, cmdList.getTransferType(hostNonUSMAllocData, hostNonUSMFound, hostNonUSMAllocData, hostNonUSMFound));
|
||||
EXPECT_EQ(HOST_NON_USM_TO_HOST_USM, cmdList.getTransferType(hostUSMAllocData, hostNonUSMAllocData));
|
||||
EXPECT_EQ(HOST_NON_USM_TO_DEVICE_USM, cmdList.getTransferType(deviceUSMAllocData, hostNonUSMAllocData));
|
||||
EXPECT_EQ(HOST_NON_USM_TO_DEVICE_USM, cmdList.getTransferType(sharedUSMAllocData, hostNonUSMAllocData));
|
||||
EXPECT_EQ(HOST_NON_USM_TO_HOST_NON_USM, cmdList.getTransferType(hostNonUSMAllocData, hostNonUSMAllocData));
|
||||
|
||||
EXPECT_EQ(HOST_USM_TO_HOST_USM, cmdList.getTransferType(hostUSMAllocData, hostUSMFound, hostUSMAllocData, hostUSMFound));
|
||||
EXPECT_EQ(HOST_USM_TO_DEVICE_USM, cmdList.getTransferType(deviceUSMAllocData, deviceUSMFound, hostUSMAllocData, hostUSMFound));
|
||||
EXPECT_EQ(HOST_USM_TO_DEVICE_USM, cmdList.getTransferType(sharedUSMAllocData, sharedUSMFound, hostUSMAllocData, hostUSMFound));
|
||||
EXPECT_EQ(HOST_USM_TO_HOST_NON_USM, cmdList.getTransferType(hostNonUSMAllocData, hostNonUSMFound, hostUSMAllocData, hostUSMFound));
|
||||
EXPECT_EQ(HOST_USM_TO_HOST_USM, cmdList.getTransferType(hostUSMAllocData, hostUSMAllocData));
|
||||
EXPECT_EQ(HOST_USM_TO_DEVICE_USM, cmdList.getTransferType(deviceUSMAllocData, hostUSMAllocData));
|
||||
EXPECT_EQ(HOST_USM_TO_DEVICE_USM, cmdList.getTransferType(sharedUSMAllocData, hostUSMAllocData));
|
||||
EXPECT_EQ(HOST_USM_TO_HOST_NON_USM, cmdList.getTransferType(hostNonUSMAllocData, hostUSMAllocData));
|
||||
|
||||
EXPECT_EQ(DEVICE_USM_TO_HOST_USM, cmdList.getTransferType(hostUSMAllocData, hostUSMFound, deviceUSMAllocData, deviceUSMFound));
|
||||
EXPECT_EQ(DEVICE_USM_TO_HOST_USM, cmdList.getTransferType(hostUSMAllocData, hostUSMFound, sharedUSMAllocData, sharedUSMFound));
|
||||
EXPECT_EQ(DEVICE_USM_TO_DEVICE_USM, cmdList.getTransferType(deviceUSMAllocData, deviceUSMFound, deviceUSMAllocData, deviceUSMFound));
|
||||
EXPECT_EQ(DEVICE_USM_TO_DEVICE_USM, cmdList.getTransferType(sharedUSMAllocData, sharedUSMFound, sharedUSMAllocData, sharedUSMFound));
|
||||
EXPECT_EQ(DEVICE_USM_TO_HOST_NON_USM, cmdList.getTransferType(hostNonUSMAllocData, hostNonUSMFound, deviceUSMAllocData, deviceUSMFound));
|
||||
EXPECT_EQ(DEVICE_USM_TO_HOST_NON_USM, cmdList.getTransferType(hostNonUSMAllocData, hostNonUSMFound, sharedUSMAllocData, sharedUSMFound));
|
||||
EXPECT_EQ(DEVICE_USM_TO_HOST_USM, cmdList.getTransferType(hostUSMAllocData, deviceUSMAllocData));
|
||||
EXPECT_EQ(DEVICE_USM_TO_HOST_USM, cmdList.getTransferType(hostUSMAllocData, sharedUSMAllocData));
|
||||
EXPECT_EQ(DEVICE_USM_TO_DEVICE_USM, cmdList.getTransferType(deviceUSMAllocData, deviceUSMAllocData));
|
||||
EXPECT_EQ(DEVICE_USM_TO_DEVICE_USM, cmdList.getTransferType(sharedUSMAllocData, sharedUSMAllocData));
|
||||
EXPECT_EQ(DEVICE_USM_TO_HOST_NON_USM, cmdList.getTransferType(hostNonUSMAllocData, deviceUSMAllocData));
|
||||
EXPECT_EQ(DEVICE_USM_TO_HOST_NON_USM, cmdList.getTransferType(hostNonUSMAllocData, sharedUSMAllocData));
|
||||
}
|
||||
|
||||
HWTEST2_F(AppendMemoryLockedCopyTest, givenImmediateCommandListAndNonUsmHostPtrWhenCopyH2DThenLockPtr, IsAtLeastSkl) {
|
||||
@@ -2161,6 +2175,7 @@ HWTEST2_F(AppendMemoryLockedCopyTest, givenImmediateCommandListAndNonUsmHostPtrW
|
||||
NEO::SvmAllocationData *allocData;
|
||||
device->getDriverHandle()->findAllocationDataForRange(devicePtr, 1024, &allocData);
|
||||
auto dstAlloc = allocData->gpuAllocations.getGraphicsAllocation(device->getRootDeviceIndex());
|
||||
|
||||
auto lockedPtr = reinterpret_cast<char *>(dstAlloc->getLockedPtr());
|
||||
EXPECT_EQ(0, memcmp(lockedPtr, nonUsmHostPtr, 1024));
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user