mirror of
https://github.com/intel/compute-runtime.git
synced 2025-12-29 17:13:29 +08:00
Create method to deduce transfer type
Related-To: NEO-7564 Signed-off-by: Fabian Zwolinski <fabian.zwolinski@intel.com>
This commit is contained in:
committed by
Compute-Runtime-Automation
parent
eb002acaa1
commit
d6bfcdb245
@@ -142,11 +142,14 @@ struct CommandListCoreFamilyImmediate : public CommandListCoreFamily<gfxCoreFami
|
||||
NEO::LogicalStateHelper *getLogicalStateHelper() const override;
|
||||
|
||||
bool preferCopyThroughLockedPtr(NEO::SvmAllocationData *dstAlloc, bool dstFound, NEO::SvmAllocationData *srcAlloc, bool srcFound, size_t size);
|
||||
bool isSuitableUSMHostAlloc(NEO::SvmAllocationData *alloc, bool allocFound);
|
||||
bool isSuitableUSMDeviceAlloc(NEO::SvmAllocationData *alloc, bool allocFound);
|
||||
bool isSuitableUSMSharedAlloc(NEO::SvmAllocationData *alloc, bool allocFound);
|
||||
ze_result_t performCpuMemcpy(void *dstptr, const void *srcptr, size_t size, NEO::SvmAllocationData *dstAlloc, NEO::SvmAllocationData *srcAlloc, ze_event_handle_t hSignalEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents);
|
||||
void *obtainLockedPtrFromDevice(NEO::SvmAllocationData *alloc, void *ptr);
|
||||
bool waitForEventsFromHost();
|
||||
void checkWaitEventsState(uint32_t numWaitEvents, ze_event_handle_t *waitEventList);
|
||||
TransferType getTransferType(NEO::SvmAllocationData *dstAlloc, bool dstFound, NEO::SvmAllocationData *srcAlloc, bool srcFound);
|
||||
|
||||
protected:
|
||||
void printKernelsPrintfOutput(bool hangDetected);
|
||||
|
||||
@@ -661,12 +661,22 @@ bool CommandListCoreFamilyImmediate<gfxCoreFamily>::preferCopyThroughLockedPtr(N
|
||||
return false;
|
||||
}
|
||||
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
bool CommandListCoreFamilyImmediate<gfxCoreFamily>::isSuitableUSMHostAlloc(NEO::SvmAllocationData *alloc, bool allocFound) {
|
||||
return allocFound && (alloc->memoryType == InternalMemoryType::HOST_UNIFIED_MEMORY);
|
||||
}
|
||||
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
bool CommandListCoreFamilyImmediate<gfxCoreFamily>::isSuitableUSMDeviceAlloc(NEO::SvmAllocationData *alloc, bool allocFound) {
|
||||
return allocFound && (alloc->memoryType == InternalMemoryType::DEVICE_UNIFIED_MEMORY) &&
|
||||
alloc->gpuAllocations.getGraphicsAllocation(this->device->getRootDeviceIndex())->storageInfo.getNumBanks() == 1;
|
||||
}
|
||||
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
bool CommandListCoreFamilyImmediate<gfxCoreFamily>::isSuitableUSMSharedAlloc(NEO::SvmAllocationData *alloc, bool allocFound) {
|
||||
return allocFound && (alloc->memoryType == InternalMemoryType::SHARED_UNIFIED_MEMORY);
|
||||
}
|
||||
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::performCpuMemcpy(void *dstptr, const void *srcptr, size_t size, NEO::SvmAllocationData *dstAlloc, NEO::SvmAllocationData *srcAlloc, ze_event_handle_t hSignalEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) {
|
||||
|
||||
@@ -741,6 +751,51 @@ void CommandListCoreFamilyImmediate<gfxCoreFamily>::checkWaitEventsState(uint32_
|
||||
}
|
||||
}
|
||||
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
TransferType CommandListCoreFamilyImmediate<gfxCoreFamily>::getTransferType(NEO::SvmAllocationData *dstAlloc, bool dstFound, NEO::SvmAllocationData *srcAlloc, bool srcFound) {
|
||||
const bool srcHostUSM = isSuitableUSMHostAlloc(srcAlloc, srcFound);
|
||||
const bool srcDeviceUSM = isSuitableUSMDeviceAlloc(srcAlloc, srcFound) || isSuitableUSMSharedAlloc(srcAlloc, srcFound);
|
||||
const bool srcHostNonUSM = srcAlloc == nullptr;
|
||||
|
||||
const bool dstHostUSM = isSuitableUSMHostAlloc(dstAlloc, dstFound);
|
||||
const bool dstDeviceUSM = isSuitableUSMDeviceAlloc(dstAlloc, dstFound) || isSuitableUSMSharedAlloc(dstAlloc, dstFound);
|
||||
const bool dstHostNonUSM = dstAlloc == nullptr;
|
||||
|
||||
TransferType retVal;
|
||||
|
||||
if (srcHostNonUSM && dstHostUSM) {
|
||||
retVal = HOST_NON_USM_TO_HOST_USM;
|
||||
}
|
||||
if (srcHostNonUSM && dstDeviceUSM) {
|
||||
retVal = HOST_NON_USM_TO_DEVICE_USM;
|
||||
}
|
||||
if (srcHostNonUSM && dstHostNonUSM) {
|
||||
retVal = HOST_NON_USM_TO_HOST_NON_USM;
|
||||
}
|
||||
|
||||
if (srcHostUSM && dstHostUSM) {
|
||||
retVal = HOST_USM_TO_HOST_USM;
|
||||
}
|
||||
if (srcHostUSM && dstDeviceUSM) {
|
||||
retVal = HOST_USM_TO_DEVICE_USM;
|
||||
}
|
||||
if (srcHostUSM && dstHostNonUSM) {
|
||||
retVal = HOST_USM_TO_HOST_NON_USM;
|
||||
}
|
||||
|
||||
if (srcDeviceUSM && dstHostUSM) {
|
||||
retVal = DEVICE_USM_TO_HOST_USM;
|
||||
}
|
||||
if (srcDeviceUSM && dstDeviceUSM) {
|
||||
retVal = DEVICE_USM_TO_DEVICE_USM;
|
||||
}
|
||||
if (srcDeviceUSM && dstHostNonUSM) {
|
||||
retVal = DEVICE_USM_TO_HOST_NON_USM;
|
||||
}
|
||||
|
||||
return retVal;
|
||||
}
|
||||
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
void CommandListCoreFamilyImmediate<gfxCoreFamily>::printKernelsPrintfOutput(bool hangDetected) {
|
||||
size_t size = this->printfKernelContainer.size();
|
||||
|
||||
@@ -1895,18 +1895,27 @@ struct AppendMemoryLockedCopyFixture : public DeviceFixture {
|
||||
DeviceFixture::setUp();
|
||||
|
||||
nonUsmHostPtr = new char[sz];
|
||||
ze_host_mem_alloc_desc_t hostDesc = {};
|
||||
context->allocHostMem(&hostDesc, sz, 1u, &hostPtr);
|
||||
|
||||
ze_device_mem_alloc_desc_t deviceDesc = {};
|
||||
context->allocDeviceMem(device->toHandle(), &deviceDesc, sz, 1u, &devicePtr);
|
||||
|
||||
context->allocSharedMem(device->toHandle(), &deviceDesc, &hostDesc, sz, 1u, &sharedPtr);
|
||||
}
|
||||
void tearDown() {
|
||||
delete[] nonUsmHostPtr;
|
||||
context->freeMem(hostPtr);
|
||||
context->freeMem(devicePtr);
|
||||
context->freeMem(sharedPtr);
|
||||
DeviceFixture::tearDown();
|
||||
}
|
||||
|
||||
DebugManagerStateRestore restore;
|
||||
char *nonUsmHostPtr;
|
||||
void *hostPtr;
|
||||
void *devicePtr;
|
||||
void *sharedPtr;
|
||||
size_t sz = 4 * MemoryConstants::megaByte;
|
||||
};
|
||||
|
||||
@@ -1933,6 +1942,31 @@ HWTEST2_F(AppendMemoryLockedCopyTest, givenImmediateCommandListWhenIsSuitableUSM
|
||||
EXPECT_TRUE(cmdList.isSuitableUSMDeviceAlloc(dstAllocData, dstFound));
|
||||
}
|
||||
|
||||
HWTEST2_F(AppendMemoryLockedCopyTest, givenImmediateCommandListWhenIsSuitableUSMHostAllocThenReturnCorrectValue, IsAtLeastSkl) {
|
||||
MockCommandListImmediateHw<gfxCoreFamily> cmdList;
|
||||
cmdList.initialize(device, NEO::EngineGroupType::RenderCompute, 0u);
|
||||
NEO::SvmAllocationData *srcAllocData;
|
||||
NEO::SvmAllocationData *dstAllocData;
|
||||
auto srcFound = device->getDriverHandle()->findAllocationDataForRange(hostPtr, 1024, &srcAllocData);
|
||||
auto dstFound = device->getDriverHandle()->findAllocationDataForRange(devicePtr, 1024, &dstAllocData);
|
||||
EXPECT_TRUE(cmdList.isSuitableUSMHostAlloc(srcAllocData, srcFound));
|
||||
EXPECT_FALSE(cmdList.isSuitableUSMHostAlloc(dstAllocData, dstFound));
|
||||
}
|
||||
|
||||
HWTEST2_F(AppendMemoryLockedCopyTest, givenImmediateCommandListWhenIsSuitableUSMSharedAllocThenReturnCorrectValue, IsAtLeastSkl) {
|
||||
MockCommandListImmediateHw<gfxCoreFamily> cmdList;
|
||||
cmdList.initialize(device, NEO::EngineGroupType::RenderCompute, 0u);
|
||||
NEO::SvmAllocationData *hostAllocData;
|
||||
NEO::SvmAllocationData *deviceAllocData;
|
||||
NEO::SvmAllocationData *sharedAllocData;
|
||||
auto hostAllocFound = device->getDriverHandle()->findAllocationDataForRange(hostPtr, 1024, &hostAllocData);
|
||||
auto deviceAllocFound = device->getDriverHandle()->findAllocationDataForRange(devicePtr, 1024, &deviceAllocData);
|
||||
auto sharedAllocFound = device->getDriverHandle()->findAllocationDataForRange(sharedPtr, 1024, &sharedAllocData);
|
||||
EXPECT_FALSE(cmdList.isSuitableUSMSharedAlloc(hostAllocData, hostAllocFound));
|
||||
EXPECT_FALSE(cmdList.isSuitableUSMSharedAlloc(deviceAllocData, deviceAllocFound));
|
||||
EXPECT_TRUE(cmdList.isSuitableUSMSharedAlloc(sharedAllocData, sharedAllocFound));
|
||||
}
|
||||
|
||||
struct LocalMemoryMultiSubDeviceFixture : public SingleRootMultiSubDeviceFixture {
|
||||
void setUp() {
|
||||
DebugManager.flags.EnableLocalMemory.set(1);
|
||||
@@ -1980,6 +2014,38 @@ HWTEST2_F(AppendMemoryLockedCopyTest, givenImmediateCommandListAndForcingLockPtr
|
||||
EXPECT_TRUE(cmdList.preferCopyThroughLockedPtr(dstAllocData, dstFound, srcAllocData, srcFound, 1024));
|
||||
}
|
||||
|
||||
HWTEST2_F(AppendMemoryLockedCopyTest, givenImmediateCommandListWhenGetTransferTypeThenReturnCorrectValue, IsAtLeastSkl) {
|
||||
MockCommandListImmediateHw<gfxCoreFamily> cmdList;
|
||||
cmdList.initialize(device, NEO::EngineGroupType::RenderCompute, 0u);
|
||||
|
||||
NEO::SvmAllocationData *hostUSMAllocData;
|
||||
NEO::SvmAllocationData *hostNonUSMAllocData;
|
||||
NEO::SvmAllocationData *deviceUSMAllocData;
|
||||
NEO::SvmAllocationData *sharedUSMAllocData;
|
||||
|
||||
const auto hostUSMFound = device->getDriverHandle()->findAllocationDataForRange(hostPtr, 1024, &hostUSMAllocData);
|
||||
const auto hostNonUSMFound = device->getDriverHandle()->findAllocationDataForRange(nonUsmHostPtr, 1024, &hostNonUSMAllocData);
|
||||
const auto deviceUSMFound = device->getDriverHandle()->findAllocationDataForRange(devicePtr, 1024, &deviceUSMAllocData);
|
||||
const auto sharedUSMFound = device->getDriverHandle()->findAllocationDataForRange(sharedPtr, 1024, &sharedUSMAllocData);
|
||||
|
||||
EXPECT_EQ(HOST_NON_USM_TO_HOST_USM, cmdList.getTransferType(hostUSMAllocData, hostUSMFound, hostNonUSMAllocData, hostNonUSMFound));
|
||||
EXPECT_EQ(HOST_NON_USM_TO_DEVICE_USM, cmdList.getTransferType(deviceUSMAllocData, deviceUSMFound, hostNonUSMAllocData, hostNonUSMFound));
|
||||
EXPECT_EQ(HOST_NON_USM_TO_DEVICE_USM, cmdList.getTransferType(sharedUSMAllocData, sharedUSMFound, hostNonUSMAllocData, hostNonUSMFound));
|
||||
EXPECT_EQ(HOST_NON_USM_TO_HOST_NON_USM, cmdList.getTransferType(hostNonUSMAllocData, hostNonUSMFound, hostNonUSMAllocData, hostNonUSMFound));
|
||||
|
||||
EXPECT_EQ(HOST_USM_TO_HOST_USM, cmdList.getTransferType(hostUSMAllocData, hostUSMFound, hostUSMAllocData, hostUSMFound));
|
||||
EXPECT_EQ(HOST_USM_TO_DEVICE_USM, cmdList.getTransferType(deviceUSMAllocData, deviceUSMFound, hostUSMAllocData, hostUSMFound));
|
||||
EXPECT_EQ(HOST_USM_TO_DEVICE_USM, cmdList.getTransferType(sharedUSMAllocData, sharedUSMFound, hostUSMAllocData, hostUSMFound));
|
||||
EXPECT_EQ(HOST_USM_TO_HOST_NON_USM, cmdList.getTransferType(hostNonUSMAllocData, hostNonUSMFound, hostUSMAllocData, hostUSMFound));
|
||||
|
||||
EXPECT_EQ(DEVICE_USM_TO_HOST_USM, cmdList.getTransferType(hostUSMAllocData, hostUSMFound, deviceUSMAllocData, deviceUSMFound));
|
||||
EXPECT_EQ(DEVICE_USM_TO_HOST_USM, cmdList.getTransferType(hostUSMAllocData, hostUSMFound, sharedUSMAllocData, sharedUSMFound));
|
||||
EXPECT_EQ(DEVICE_USM_TO_DEVICE_USM, cmdList.getTransferType(deviceUSMAllocData, deviceUSMFound, deviceUSMAllocData, deviceUSMFound));
|
||||
EXPECT_EQ(DEVICE_USM_TO_DEVICE_USM, cmdList.getTransferType(sharedUSMAllocData, sharedUSMFound, sharedUSMAllocData, sharedUSMFound));
|
||||
EXPECT_EQ(DEVICE_USM_TO_HOST_NON_USM, cmdList.getTransferType(hostNonUSMAllocData, hostNonUSMFound, deviceUSMAllocData, deviceUSMFound));
|
||||
EXPECT_EQ(DEVICE_USM_TO_HOST_NON_USM, cmdList.getTransferType(hostNonUSMAllocData, hostNonUSMFound, sharedUSMAllocData, sharedUSMFound));
|
||||
}
|
||||
|
||||
HWTEST2_F(AppendMemoryLockedCopyTest, givenImmediateCommandListAndNonUsmHostPtrWhenCopyH2DThenLockPtr, IsAtLeastSkl) {
|
||||
MockCommandListImmediateHw<gfxCoreFamily> cmdList;
|
||||
cmdList.initialize(device, NEO::EngineGroupType::RenderCompute, 0u);
|
||||
|
||||
Reference in New Issue
Block a user