From d6bfcdb24534a5bcda334522bccbcf71ef862d1b Mon Sep 17 00:00:00 2001 From: Fabian Zwolinski Date: Fri, 20 Jan 2023 17:21:19 +0000 Subject: [PATCH] Create method to deduce transfer type Related-To: NEO-7564 Signed-off-by: Fabian Zwolinski --- .../source/cmdlist/cmdlist_hw_immediate.h | 3 + .../source/cmdlist/cmdlist_hw_immediate.inl | 55 ++++++++++++++++ .../sources/cmdlist/test_cmdlist_7.cpp | 66 +++++++++++++++++++ shared/source/unified_memory/unified_memory.h | 16 ++++- 4 files changed, 139 insertions(+), 1 deletion(-) diff --git a/level_zero/core/source/cmdlist/cmdlist_hw_immediate.h b/level_zero/core/source/cmdlist/cmdlist_hw_immediate.h index 6276592f5e..cbf7d549c8 100644 --- a/level_zero/core/source/cmdlist/cmdlist_hw_immediate.h +++ b/level_zero/core/source/cmdlist/cmdlist_hw_immediate.h @@ -142,11 +142,14 @@ struct CommandListCoreFamilyImmediate : public CommandListCoreFamily::preferCopyThroughLockedPtr(N return false; } +template +bool CommandListCoreFamilyImmediate::isSuitableUSMHostAlloc(NEO::SvmAllocationData *alloc, bool allocFound) { + return allocFound && (alloc->memoryType == InternalMemoryType::HOST_UNIFIED_MEMORY); +} + template bool CommandListCoreFamilyImmediate::isSuitableUSMDeviceAlloc(NEO::SvmAllocationData *alloc, bool allocFound) { return allocFound && (alloc->memoryType == InternalMemoryType::DEVICE_UNIFIED_MEMORY) && alloc->gpuAllocations.getGraphicsAllocation(this->device->getRootDeviceIndex())->storageInfo.getNumBanks() == 1; } +template +bool CommandListCoreFamilyImmediate::isSuitableUSMSharedAlloc(NEO::SvmAllocationData *alloc, bool allocFound) { + return allocFound && (alloc->memoryType == InternalMemoryType::SHARED_UNIFIED_MEMORY); +} + template ze_result_t CommandListCoreFamilyImmediate::performCpuMemcpy(void *dstptr, const void *srcptr, size_t size, NEO::SvmAllocationData *dstAlloc, NEO::SvmAllocationData *srcAlloc, ze_event_handle_t hSignalEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) { @@ -741,6 +751,51 @@ void CommandListCoreFamilyImmediate::checkWaitEventsState(uint32_ } } +template +TransferType CommandListCoreFamilyImmediate::getTransferType(NEO::SvmAllocationData *dstAlloc, bool dstFound, NEO::SvmAllocationData *srcAlloc, bool srcFound) { + const bool srcHostUSM = isSuitableUSMHostAlloc(srcAlloc, srcFound); + const bool srcDeviceUSM = isSuitableUSMDeviceAlloc(srcAlloc, srcFound) || isSuitableUSMSharedAlloc(srcAlloc, srcFound); + const bool srcHostNonUSM = srcAlloc == nullptr; + + const bool dstHostUSM = isSuitableUSMHostAlloc(dstAlloc, dstFound); + const bool dstDeviceUSM = isSuitableUSMDeviceAlloc(dstAlloc, dstFound) || isSuitableUSMSharedAlloc(dstAlloc, dstFound); + const bool dstHostNonUSM = dstAlloc == nullptr; + + TransferType retVal; + + if (srcHostNonUSM && dstHostUSM) { + retVal = HOST_NON_USM_TO_HOST_USM; + } + if (srcHostNonUSM && dstDeviceUSM) { + retVal = HOST_NON_USM_TO_DEVICE_USM; + } + if (srcHostNonUSM && dstHostNonUSM) { + retVal = HOST_NON_USM_TO_HOST_NON_USM; + } + + if (srcHostUSM && dstHostUSM) { + retVal = HOST_USM_TO_HOST_USM; + } + if (srcHostUSM && dstDeviceUSM) { + retVal = HOST_USM_TO_DEVICE_USM; + } + if (srcHostUSM && dstHostNonUSM) { + retVal = HOST_USM_TO_HOST_NON_USM; + } + + if (srcDeviceUSM && dstHostUSM) { + retVal = DEVICE_USM_TO_HOST_USM; + } + if (srcDeviceUSM && dstDeviceUSM) { + retVal = DEVICE_USM_TO_DEVICE_USM; + } + if (srcDeviceUSM && dstHostNonUSM) { + retVal = DEVICE_USM_TO_HOST_NON_USM; + } + + return retVal; +} + template void CommandListCoreFamilyImmediate::printKernelsPrintfOutput(bool hangDetected) { size_t size = this->printfKernelContainer.size(); diff --git a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_7.cpp b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_7.cpp index 1717cf7f2d..7d1b45e2d5 100644 --- a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_7.cpp +++ b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_7.cpp @@ -1895,18 +1895,27 @@ struct AppendMemoryLockedCopyFixture : public DeviceFixture { DeviceFixture::setUp(); nonUsmHostPtr = new char[sz]; + ze_host_mem_alloc_desc_t hostDesc = {}; + context->allocHostMem(&hostDesc, sz, 1u, &hostPtr); + ze_device_mem_alloc_desc_t deviceDesc = {}; context->allocDeviceMem(device->toHandle(), &deviceDesc, sz, 1u, &devicePtr); + + context->allocSharedMem(device->toHandle(), &deviceDesc, &hostDesc, sz, 1u, &sharedPtr); } void tearDown() { delete[] nonUsmHostPtr; + context->freeMem(hostPtr); context->freeMem(devicePtr); + context->freeMem(sharedPtr); DeviceFixture::tearDown(); } DebugManagerStateRestore restore; char *nonUsmHostPtr; + void *hostPtr; void *devicePtr; + void *sharedPtr; size_t sz = 4 * MemoryConstants::megaByte; }; @@ -1933,6 +1942,31 @@ HWTEST2_F(AppendMemoryLockedCopyTest, givenImmediateCommandListWhenIsSuitableUSM EXPECT_TRUE(cmdList.isSuitableUSMDeviceAlloc(dstAllocData, dstFound)); } +HWTEST2_F(AppendMemoryLockedCopyTest, givenImmediateCommandListWhenIsSuitableUSMHostAllocThenReturnCorrectValue, IsAtLeastSkl) { + MockCommandListImmediateHw cmdList; + cmdList.initialize(device, NEO::EngineGroupType::RenderCompute, 0u); + NEO::SvmAllocationData *srcAllocData; + NEO::SvmAllocationData *dstAllocData; + auto srcFound = device->getDriverHandle()->findAllocationDataForRange(hostPtr, 1024, &srcAllocData); + auto dstFound = device->getDriverHandle()->findAllocationDataForRange(devicePtr, 1024, &dstAllocData); + EXPECT_TRUE(cmdList.isSuitableUSMHostAlloc(srcAllocData, srcFound)); + EXPECT_FALSE(cmdList.isSuitableUSMHostAlloc(dstAllocData, dstFound)); +} + +HWTEST2_F(AppendMemoryLockedCopyTest, givenImmediateCommandListWhenIsSuitableUSMSharedAllocThenReturnCorrectValue, IsAtLeastSkl) { + MockCommandListImmediateHw cmdList; + cmdList.initialize(device, NEO::EngineGroupType::RenderCompute, 0u); + NEO::SvmAllocationData *hostAllocData; + NEO::SvmAllocationData *deviceAllocData; + NEO::SvmAllocationData *sharedAllocData; + auto hostAllocFound = device->getDriverHandle()->findAllocationDataForRange(hostPtr, 1024, &hostAllocData); + auto deviceAllocFound = device->getDriverHandle()->findAllocationDataForRange(devicePtr, 1024, &deviceAllocData); + auto sharedAllocFound = device->getDriverHandle()->findAllocationDataForRange(sharedPtr, 1024, &sharedAllocData); + EXPECT_FALSE(cmdList.isSuitableUSMSharedAlloc(hostAllocData, hostAllocFound)); + EXPECT_FALSE(cmdList.isSuitableUSMSharedAlloc(deviceAllocData, deviceAllocFound)); + EXPECT_TRUE(cmdList.isSuitableUSMSharedAlloc(sharedAllocData, sharedAllocFound)); +} + struct LocalMemoryMultiSubDeviceFixture : public SingleRootMultiSubDeviceFixture { void setUp() { DebugManager.flags.EnableLocalMemory.set(1); @@ -1980,6 +2014,38 @@ HWTEST2_F(AppendMemoryLockedCopyTest, givenImmediateCommandListAndForcingLockPtr EXPECT_TRUE(cmdList.preferCopyThroughLockedPtr(dstAllocData, dstFound, srcAllocData, srcFound, 1024)); } +HWTEST2_F(AppendMemoryLockedCopyTest, givenImmediateCommandListWhenGetTransferTypeThenReturnCorrectValue, IsAtLeastSkl) { + MockCommandListImmediateHw cmdList; + cmdList.initialize(device, NEO::EngineGroupType::RenderCompute, 0u); + + NEO::SvmAllocationData *hostUSMAllocData; + NEO::SvmAllocationData *hostNonUSMAllocData; + NEO::SvmAllocationData *deviceUSMAllocData; + NEO::SvmAllocationData *sharedUSMAllocData; + + const auto hostUSMFound = device->getDriverHandle()->findAllocationDataForRange(hostPtr, 1024, &hostUSMAllocData); + const auto hostNonUSMFound = device->getDriverHandle()->findAllocationDataForRange(nonUsmHostPtr, 1024, &hostNonUSMAllocData); + const auto deviceUSMFound = device->getDriverHandle()->findAllocationDataForRange(devicePtr, 1024, &deviceUSMAllocData); + const auto sharedUSMFound = device->getDriverHandle()->findAllocationDataForRange(sharedPtr, 1024, &sharedUSMAllocData); + + EXPECT_EQ(HOST_NON_USM_TO_HOST_USM, cmdList.getTransferType(hostUSMAllocData, hostUSMFound, hostNonUSMAllocData, hostNonUSMFound)); + EXPECT_EQ(HOST_NON_USM_TO_DEVICE_USM, cmdList.getTransferType(deviceUSMAllocData, deviceUSMFound, hostNonUSMAllocData, hostNonUSMFound)); + EXPECT_EQ(HOST_NON_USM_TO_DEVICE_USM, cmdList.getTransferType(sharedUSMAllocData, sharedUSMFound, hostNonUSMAllocData, hostNonUSMFound)); + EXPECT_EQ(HOST_NON_USM_TO_HOST_NON_USM, cmdList.getTransferType(hostNonUSMAllocData, hostNonUSMFound, hostNonUSMAllocData, hostNonUSMFound)); + + EXPECT_EQ(HOST_USM_TO_HOST_USM, cmdList.getTransferType(hostUSMAllocData, hostUSMFound, hostUSMAllocData, hostUSMFound)); + EXPECT_EQ(HOST_USM_TO_DEVICE_USM, cmdList.getTransferType(deviceUSMAllocData, deviceUSMFound, hostUSMAllocData, hostUSMFound)); + EXPECT_EQ(HOST_USM_TO_DEVICE_USM, cmdList.getTransferType(sharedUSMAllocData, sharedUSMFound, hostUSMAllocData, hostUSMFound)); + EXPECT_EQ(HOST_USM_TO_HOST_NON_USM, cmdList.getTransferType(hostNonUSMAllocData, hostNonUSMFound, hostUSMAllocData, hostUSMFound)); + + EXPECT_EQ(DEVICE_USM_TO_HOST_USM, cmdList.getTransferType(hostUSMAllocData, hostUSMFound, deviceUSMAllocData, deviceUSMFound)); + EXPECT_EQ(DEVICE_USM_TO_HOST_USM, cmdList.getTransferType(hostUSMAllocData, hostUSMFound, sharedUSMAllocData, sharedUSMFound)); + EXPECT_EQ(DEVICE_USM_TO_DEVICE_USM, cmdList.getTransferType(deviceUSMAllocData, deviceUSMFound, deviceUSMAllocData, deviceUSMFound)); + EXPECT_EQ(DEVICE_USM_TO_DEVICE_USM, cmdList.getTransferType(sharedUSMAllocData, sharedUSMFound, sharedUSMAllocData, sharedUSMFound)); + EXPECT_EQ(DEVICE_USM_TO_HOST_NON_USM, cmdList.getTransferType(hostNonUSMAllocData, hostNonUSMFound, deviceUSMAllocData, deviceUSMFound)); + EXPECT_EQ(DEVICE_USM_TO_HOST_NON_USM, cmdList.getTransferType(hostNonUSMAllocData, hostNonUSMFound, sharedUSMAllocData, sharedUSMFound)); +} + HWTEST2_F(AppendMemoryLockedCopyTest, givenImmediateCommandListAndNonUsmHostPtrWhenCopyH2DThenLockPtr, IsAtLeastSkl) { MockCommandListImmediateHw cmdList; cmdList.initialize(device, NEO::EngineGroupType::RenderCompute, 0u); diff --git a/shared/source/unified_memory/unified_memory.h b/shared/source/unified_memory/unified_memory.h index 20b989513b..a20dc6bde6 100644 --- a/shared/source/unified_memory/unified_memory.h +++ b/shared/source/unified_memory/unified_memory.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2019-2022 Intel Corporation + * Copyright (C) 2019-2023 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -17,6 +17,20 @@ enum InternalMemoryType : uint32_t { SHARED_UNIFIED_MEMORY = 0b1000 }; +enum TransferType : uint32_t { + HOST_NON_USM_TO_HOST_USM = 0, + HOST_NON_USM_TO_DEVICE_USM = 1, + HOST_NON_USM_TO_HOST_NON_USM = 2, + + HOST_USM_TO_HOST_USM = 3, + HOST_USM_TO_DEVICE_USM = 4, + HOST_USM_TO_HOST_NON_USM = 5, + + DEVICE_USM_TO_HOST_USM = 6, + DEVICE_USM_TO_DEVICE_USM = 7, + DEVICE_USM_TO_HOST_NON_USM = 8 +}; + struct UnifiedMemoryControls { uint32_t generateMask(); bool indirectDeviceAllocationsAllowed = false;