From 934939c8b675194f67ac054b97e7d8fcce3d63c5 Mon Sep 17 00:00:00 2001 From: Lukasz Jobczyk Date: Thu, 15 Sep 2022 14:36:55 +0000 Subject: [PATCH] Add split implementation for append page fault copy Signed-off-by: Lukasz Jobczyk --- level_zero/core/source/cmdlist/cmdlist_hw.h | 2 + level_zero/core/source/cmdlist/cmdlist_hw.inl | 13 +++-- .../source/cmdlist/cmdlist_hw_immediate.inl | 16 +++++- .../xe_hpc_core/test_cmdqueue_xe_hpc_core.cpp | 54 +++++++++++++++++++ 4 files changed, 79 insertions(+), 6 deletions(-) diff --git a/level_zero/core/source/cmdlist/cmdlist_hw.h b/level_zero/core/source/cmdlist/cmdlist_hw.h index 4ecadde896..7f0d2f5691 100644 --- a/level_zero/core/source/cmdlist/cmdlist_hw.h +++ b/level_zero/core/source/cmdlist/cmdlist_hw.h @@ -10,6 +10,7 @@ #include "shared/source/helpers/pipe_control_args.h" #include "shared/source/helpers/vec.h" #include "shared/source/kernel/kernel_arg_descriptor.h" +#include "shared/source/memory_manager/memory_pool.h" #include "level_zero/core/source/builtin/builtin_functions_lib.h" #include "level_zero/core/source/cmdlist/cmdlist_imp.h" @@ -245,6 +246,7 @@ struct CommandListCoreFamily : CommandListImp { size_t getTotalSizeForCopyRegion(const ze_copy_region_t *region, uint32_t pitch, uint32_t slicePitch); bool isAppendSplitNeeded(void *dstPtr, const void *srcPtr, size_t size); + bool isAppendSplitNeeded(NEO::MemoryPool dstPool, NEO::MemoryPool srcPool, size_t size); void applyMemoryRangesBarrier(uint32_t numRanges, const size_t *pRangeSizes, const void **pRanges); diff --git a/level_zero/core/source/cmdlist/cmdlist_hw.inl b/level_zero/core/source/cmdlist/cmdlist_hw.inl index b60f8c9a42..cbfbda3d81 100644 --- a/level_zero/core/source/cmdlist/cmdlist_hw.inl +++ b/level_zero/core/source/cmdlist/cmdlist_hw.inl @@ -2391,17 +2391,22 @@ inline size_t CommandListCoreFamily::getTotalSizeForCopyRegion(co template bool CommandListCoreFamily::isAppendSplitNeeded(void *dstPtr, const void *srcPtr, size_t size) { - constexpr size_t minimalSizeForBcsSplit = 4 * MemoryConstants::megaByte; - auto dstAllocationStruct = getAlignedAllocation(this->device, dstPtr, size, false); auto srcAllocationStruct = getAlignedAllocation(this->device, srcPtr, size, true); auto dstMemoryPool = dstAllocationStruct.alloc->getMemoryPool(); auto srcMemoryPool = srcAllocationStruct.alloc->getMemoryPool(); + return this->isAppendSplitNeeded(dstMemoryPool, srcMemoryPool, size); +} + +template +inline bool CommandListCoreFamily::isAppendSplitNeeded(NEO::MemoryPool dstPool, NEO::MemoryPool srcPool, size_t size) { + constexpr size_t minimalSizeForBcsSplit = 4 * MemoryConstants::megaByte; + return this->isBcsSplitNeeded && size >= minimalSizeForBcsSplit && - ((!NEO::MemoryPoolHelper::isSystemMemoryPool(dstMemoryPool) && NEO::MemoryPoolHelper::isSystemMemoryPool(srcMemoryPool)) || - (!NEO::MemoryPoolHelper::isSystemMemoryPool(srcMemoryPool) && NEO::MemoryPoolHelper::isSystemMemoryPool(dstMemoryPool))); + ((!NEO::MemoryPoolHelper::isSystemMemoryPool(dstPool) && NEO::MemoryPoolHelper::isSystemMemoryPool(srcPool)) || + (!NEO::MemoryPoolHelper::isSystemMemoryPool(srcPool) && NEO::MemoryPoolHelper::isSystemMemoryPool(dstPool))); } template diff --git a/level_zero/core/source/cmdlist/cmdlist_hw_immediate.inl b/level_zero/core/source/cmdlist/cmdlist_hw_immediate.inl index 6c26a2cc43..098972f283 100644 --- a/level_zero/core/source/cmdlist/cmdlist_hw_immediate.inl +++ b/level_zero/core/source/cmdlist/cmdlist_hw_immediate.inl @@ -227,7 +227,6 @@ ze_result_t CommandListCoreFamilyImmediate::appendMemoryCopy( ret = CommandListCoreFamily::appendMemoryCopy(dstptr, srcptr, size, hSignalEvent, numWaitEvents, phWaitEvents); } - return flushImmediate(ret, true); } @@ -320,7 +319,20 @@ ze_result_t CommandListCoreFamilyImmediate::appendPageFaultCopy(N checkAvailableSpace(); } - auto ret = CommandListCoreFamily::appendPageFaultCopy(dstAllocation, srcAllocation, size, flushHost); + ze_result_t ret; + + if (this->isAppendSplitNeeded(dstAllocation->getMemoryPool(), srcAllocation->getMemoryPool(), size)) { + uintptr_t dstAddress = static_cast(dstAllocation->getGpuAddress()); + uintptr_t srcAddress = static_cast(srcAllocation->getGpuAddress()); + ret = static_cast(this->device)->bcsSplit.appendSplitCall(this, dstAddress, srcAddress, size, nullptr, [&](uintptr_t dstAddressParam, uintptr_t srcAddressParam, size_t sizeParam, ze_event_handle_t hSignalEventParam) { + this->appendMemoryCopyBlit(dstAddressParam, dstAllocation, 0u, + srcAddressParam, srcAllocation, 0u, + sizeParam); + return this->appendSignalEvent(hSignalEventParam); + }); + } else { + ret = CommandListCoreFamily::appendPageFaultCopy(dstAllocation, srcAllocation, size, flushHost); + } return flushImmediate(ret, false); } diff --git a/level_zero/core/test/unit_tests/xe_hpc_core/test_cmdqueue_xe_hpc_core.cpp b/level_zero/core/test/unit_tests/xe_hpc_core/test_cmdqueue_xe_hpc_core.cpp index 5b002ff0de..d4bf998dc2 100644 --- a/level_zero/core/test/unit_tests/xe_hpc_core/test_cmdqueue_xe_hpc_core.cpp +++ b/level_zero/core/test/unit_tests/xe_hpc_core/test_cmdqueue_xe_hpc_core.cpp @@ -702,5 +702,59 @@ HWTEST2_F(CommandQueueCommandsXeHpc, givenSplitBcsCopyAndImmediateCommandListWhe EXPECT_EQ(static_cast(testL0Device.get())->bcsSplit.events.createdFromLatestPool, 10u); } +HWTEST2_F(CommandQueueCommandsXeHpc, givenSplitBcsCopyAndImmediateCommandListWhenAppendingPageFaultCopyThenSuccessIsReturned, IsXeHpcCore) { + using MI_FLUSH_DW = typename FamilyType::MI_FLUSH_DW; + + DebugManagerStateRestore restorer; + DebugManager.flags.SplitBcsCopy.set(1); + + ze_result_t returnValue; + auto hwInfo = *NEO::defaultHwInfo; + hwInfo.featureTable.ftrBcsInfo = 0b111111111; + hwInfo.capabilityTable.blitterOperationsSupported = true; + auto testNeoDevice = NEO::MockDevice::createWithNewExecutionEnvironment(&hwInfo); + auto testL0Device = std::unique_ptr(L0::Device::create(driverHandle.get(), testNeoDevice, false, &returnValue)); + + ze_command_queue_desc_t desc = {}; + desc.ordinal = static_cast(testNeoDevice->getEngineGroupIndexFromEngineGroupType(NEO::EngineGroupType::Copy)); + + std::unique_ptr commandList0(CommandList::createImmediate(productFamily, + testL0Device.get(), + &desc, + false, + NEO::EngineGroupType::Copy, + returnValue)); + ASSERT_NE(nullptr, commandList0); + EXPECT_EQ(static_cast(testL0Device.get())->bcsSplit.cmdQs.size(), 4u); + EXPECT_EQ(static_cast(static_cast(testL0Device.get())->bcsSplit.cmdQs[0])->getTaskCount(), 0u); + EXPECT_EQ(static_cast(static_cast(testL0Device.get())->bcsSplit.cmdQs[1])->getTaskCount(), 0u); + EXPECT_EQ(static_cast(static_cast(testL0Device.get())->bcsSplit.cmdQs[2])->getTaskCount(), 0u); + EXPECT_EQ(static_cast(static_cast(testL0Device.get())->bcsSplit.cmdQs[3])->getTaskCount(), 0u); + + constexpr size_t alignment = 4096u; + constexpr size_t size = 8 * MemoryConstants::megaByte; + void *srcPtr; + void *dstPtr; + ze_device_mem_alloc_desc_t deviceDesc = {}; + context->allocDeviceMem(device->toHandle(), + &deviceDesc, + size, alignment, &srcPtr); + ze_host_mem_alloc_desc_t hostDesc = {}; + context->allocHostMem(&hostDesc, size, alignment, &dstPtr); + + auto result = commandList0->appendPageFaultCopy(testL0Device->getDriverHandle()->getSvmAllocsManager()->getSVMAlloc(dstPtr)->gpuAllocations.getDefaultGraphicsAllocation(), + testL0Device->getDriverHandle()->getSvmAllocsManager()->getSVMAlloc(srcPtr)->gpuAllocations.getDefaultGraphicsAllocation(), + size, + false); + ASSERT_EQ(ZE_RESULT_SUCCESS, result); + EXPECT_EQ(static_cast(static_cast(testL0Device.get())->bcsSplit.cmdQs[0])->getTaskCount(), 1u); + EXPECT_EQ(static_cast(static_cast(testL0Device.get())->bcsSplit.cmdQs[1])->getTaskCount(), 1u); + EXPECT_EQ(static_cast(static_cast(testL0Device.get())->bcsSplit.cmdQs[2])->getTaskCount(), 1u); + EXPECT_EQ(static_cast(static_cast(testL0Device.get())->bcsSplit.cmdQs[3])->getTaskCount(), 1u); + + context->freeMem(srcPtr); + context->freeMem(dstPtr); +} + } // namespace ult } // namespace L0