From 9e92324171ef57d28bef975abe48148fcd4046d8 Mon Sep 17 00:00:00 2001 From: Bartosz Dunajski Date: Mon, 12 May 2025 08:55:30 +0000 Subject: [PATCH] feature: copy offload support for fill operations Related-To: NEO-7067 Signed-off-by: Bartosz Dunajski --- level_zero/api/core/ze_copy_api_entrypoints.h | 5 +- level_zero/core/source/cmdlist/cmdlist.h | 2 +- level_zero/core/source/cmdlist/cmdlist_hw.h | 4 +- level_zero/core/source/cmdlist/cmdlist_hw.inl | 31 ++++---- .../source/cmdlist/cmdlist_hw_immediate.h | 2 +- .../source/cmdlist/cmdlist_hw_immediate.inl | 11 +-- .../core/test/unit_tests/mocks/mock_cmdlist.h | 2 +- .../sources/cmdlist/test_cmdlist_1.cpp | 6 +- .../sources/cmdlist/test_cmdlist_2.cpp | 8 +- .../sources/cmdlist/test_cmdlist_3.cpp | 10 +-- .../sources/cmdlist/test_cmdlist_4.cpp | 18 +++-- .../sources/cmdlist/test_cmdlist_6.cpp | 19 ++--- .../sources/cmdlist/test_cmdlist_8.cpp | 13 ++- .../test_cmdlist_append_wait_on_events.cpp | 3 +- .../sources/cmdlist/test_cmdlist_blit.cpp | 13 +-- .../sources/cmdlist/test_cmdlist_fill.cpp | 56 ++++++++----- ...test_cmdlist_fill_event_xehp_and_later.cpp | 28 +++---- .../cmdlist/test_cmdlist_memory_extension.cpp | 2 +- .../cmdlist/test_in_order_cmdlist_1.cpp | 30 +++---- .../cmdlist/test_in_order_cmdlist_2.cpp | 79 +++++++++++++++++-- .../sources/context/test_context.cpp | 12 +-- .../sources/debugger/test_l0_debugger_1.cpp | 20 ++--- .../unit_tests/sources/memory/test_memory.cpp | 24 +++--- 23 files changed, 247 insertions(+), 151 deletions(-) diff --git a/level_zero/api/core/ze_copy_api_entrypoints.h b/level_zero/api/core/ze_copy_api_entrypoints.h index 7c1f172dc7..3dde1ad0f3 100644 --- a/level_zero/api/core/ze_copy_api_entrypoints.h +++ b/level_zero/api/core/ze_copy_api_entrypoints.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2020-2024 Intel Corporation + * Copyright (C) 2020-2025 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -32,7 +32,8 @@ ze_result_t zeCommandListAppendMemoryFill( ze_event_handle_t hEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) { - return L0::CommandList::fromHandle(hCommandList)->appendMemoryFill(ptr, pattern, patternSize, size, hEvent, numWaitEvents, phWaitEvents, false); + CmdListMemoryCopyParams memoryCopyParams = {}; + return L0::CommandList::fromHandle(hCommandList)->appendMemoryFill(ptr, pattern, patternSize, size, hEvent, numWaitEvents, phWaitEvents, memoryCopyParams); } ze_result_t zeCommandListAppendMemoryCopyRegion( diff --git a/level_zero/core/source/cmdlist/cmdlist.h b/level_zero/core/source/cmdlist/cmdlist.h index 7be5d0c8b7..800fb47d90 100644 --- a/level_zero/core/source/cmdlist/cmdlist.h +++ b/level_zero/core/source/cmdlist/cmdlist.h @@ -143,7 +143,7 @@ struct CommandList : _ze_command_list_handle_t { ze_event_handle_t *phWaitEvents, CmdListMemoryCopyParams &memoryCopyParams) = 0; virtual ze_result_t appendMemoryFill(void *ptr, const void *pattern, size_t patternSize, size_t size, ze_event_handle_t hSignalEvent, - uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents, bool relaxedOrderingDispatch) = 0; + uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents, CmdListMemoryCopyParams &memoryCopyParams) = 0; virtual ze_result_t appendMemoryPrefetch(const void *ptr, size_t count) = 0; virtual ze_result_t appendSignalEvent(ze_event_handle_t hEvent, bool relaxedOrderingDispatch) = 0; virtual ze_result_t appendWaitOnEvents(uint32_t numEvents, ze_event_handle_t *phEvent, CommandToPatchContainer *outWaitCmds, diff --git a/level_zero/core/source/cmdlist/cmdlist_hw.h b/level_zero/core/source/cmdlist/cmdlist_hw.h index a3b91ea7e8..0543ddbb39 100644 --- a/level_zero/core/source/cmdlist/cmdlist_hw.h +++ b/level_zero/core/source/cmdlist/cmdlist_hw.h @@ -162,7 +162,7 @@ struct CommandListCoreFamily : public CommandListImp { size_t patternSize, size_t size, ze_event_handle_t hSignalEvent, uint32_t numWaitEvents, - ze_event_handle_t *phWaitEvents, bool relaxedOrderingDispatch) override; + ze_event_handle_t *phWaitEvents, CmdListMemoryCopyParams &memoryCopyParams) override; ze_result_t appendMILoadRegImm(uint32_t reg, uint32_t value, bool isBcs) override; ze_result_t appendMILoadRegReg(uint32_t reg1, uint32_t reg2) override; @@ -272,7 +272,7 @@ struct CommandListCoreFamily : public CommandListImp { size_t patternSize, size_t size, Event *signalEvent, uint32_t numWaitEvents, - ze_event_handle_t *phWaitEvents, bool relaxedOrderingDispatch); + ze_event_handle_t *phWaitEvents, CmdListMemoryCopyParams &memoryCopyParams); MOCKABLE_VIRTUAL ze_result_t appendCopyImageBlit(NEO::GraphicsAllocation *src, NEO::GraphicsAllocation *dst, diff --git a/level_zero/core/source/cmdlist/cmdlist_hw.inl b/level_zero/core/source/cmdlist/cmdlist_hw.inl index 4c194614c4..bc6507a242 100644 --- a/level_zero/core/source/cmdlist/cmdlist_hw.inl +++ b/level_zero/core/source/cmdlist/cmdlist_hw.inl @@ -2162,13 +2162,14 @@ ze_result_t CommandListCoreFamily::appendMemoryFill(void *ptr, size_t size, ze_event_handle_t hSignalEvent, uint32_t numWaitEvents, - ze_event_handle_t *phWaitEvents, bool relaxedOrderingDispatch) { + ze_event_handle_t *phWaitEvents, CmdListMemoryCopyParams &memoryCopyParams) { bool isStateless = (this->cmdListHeapAddressModel == NEO::HeapAddressModel::globalStateless) || this->isStatelessBuiltinsEnabled(); if (size >= 4ull * MemoryConstants::gigaByte) { isStateless = true; } const bool isHeapless = this->isHeaplessModeEnabled(); + memoryCopyParams.copyOffloadAllowed = isCopyOffloadEnabled(); NEO::Device *neoDevice = device->getNEODevice(); uint32_t callId = 0; @@ -2191,13 +2192,13 @@ ze_result_t CommandListCoreFamily::appendMemoryFill(void *ptr, dcFlush = getDcFlushRequired(signalEvent->isSignalScope()); } - if (isCopyOnly(false)) { - auto status = appendBlitFill(ptr, pattern, patternSize, size, signalEvent, numWaitEvents, phWaitEvents, relaxedOrderingDispatch); + if (isCopyOnly(memoryCopyParams.copyOffloadAllowed)) { + auto status = appendBlitFill(ptr, pattern, patternSize, size, signalEvent, numWaitEvents, phWaitEvents, memoryCopyParams); addToMappedEventList(signalEvent); return status; } - ze_result_t res = addEventsToCmdList(numWaitEvents, phWaitEvents, nullptr, relaxedOrderingDispatch, false, true, false, false); + ze_result_t res = addEventsToCmdList(numWaitEvents, phWaitEvents, nullptr, memoryCopyParams.relaxedOrderingDispatch, false, true, false, false); if (res) { return res; } @@ -2426,17 +2427,15 @@ ze_result_t CommandListCoreFamily::appendMemoryFill(void *ptr, } template -ze_result_t CommandListCoreFamily::appendBlitFill(void *ptr, - const void *pattern, - size_t patternSize, - size_t size, - Event *signalEvent, - uint32_t numWaitEvents, - ze_event_handle_t *phWaitEvents, bool relaxedOrderingDispatch) { +ze_result_t CommandListCoreFamily::appendBlitFill(void *ptr, const void *pattern, size_t patternSize, size_t size, Event *signalEvent, uint32_t numWaitEvents, + ze_event_handle_t *phWaitEvents, CmdListMemoryCopyParams &memoryCopyParams) { if (this->maxFillPaternSizeForCopyEngine < patternSize) { return ZE_RESULT_ERROR_INVALID_SIZE; } else { - ze_result_t ret = addEventsToCmdList(numWaitEvents, phWaitEvents, nullptr, relaxedOrderingDispatch, false, true, false, false); + const bool dualStreamCopyOffloadOperation = isDualStreamCopyOffloadOperation(memoryCopyParams.copyOffloadAllowed); + const bool isCopyOnlySignaling = isCopyOnly(dualStreamCopyOffloadOperation) && !useAdditionalBlitProperties; + + ze_result_t ret = addEventsToCmdList(numWaitEvents, phWaitEvents, nullptr, memoryCopyParams.relaxedOrderingDispatch, false, true, false, dualStreamCopyOffloadOperation); if (ret) { return ret; } @@ -2446,7 +2445,7 @@ ze_result_t CommandListCoreFamily::appendBlitFill(void *ptr, } auto neoDevice = device->getNEODevice(); - if (!useAdditionalBlitProperties) { + if (isCopyOnlySignaling) { appendEventForProfiling(signalEvent, nullptr, true, false, false, true); } NEO::GraphicsAllocation *gpuAllocation = device->getDriverHandle()->getDriverSystemMemoryAllocation(ptr, @@ -2479,14 +2478,14 @@ ze_result_t CommandListCoreFamily::appendBlitFill(void *ptr, NEO::BlitCommandsHelper::dispatchBlitMemoryColorFill(blitProperties, *commandContainer.getCommandStream(), neoDevice->getRootDeviceEnvironmentRef()); dummyBlitWa.isWaRequired = true; - if (!useAdditionalBlitProperties) { + if (isCopyOnlySignaling) { appendSignalEventPostWalker(signalEvent, nullptr, nullptr, false, false, true); } - if (isInOrderExecutionEnabled() && !useAdditionalBlitProperties) { + if (isInOrderExecutionEnabled() && isCopyOnlySignaling) { appendSignalInOrderDependencyCounter(signalEvent, false, false, false); } - handleInOrderDependencyCounter(signalEvent, false, false); + handleInOrderDependencyCounter(signalEvent, false, memoryCopyParams.copyOffloadAllowed); } return ZE_RESULT_SUCCESS; } diff --git a/level_zero/core/source/cmdlist/cmdlist_hw_immediate.h b/level_zero/core/source/cmdlist/cmdlist_hw_immediate.h index 9ce07a1984..4558e586e8 100644 --- a/level_zero/core/source/cmdlist/cmdlist_hw_immediate.h +++ b/level_zero/core/source/cmdlist/cmdlist_hw_immediate.h @@ -100,7 +100,7 @@ struct CommandListCoreFamilyImmediate : public CommandListCoreFamily::appendMemoryFill(void size_t patternSize, size_t size, ze_event_handle_t hSignalEvent, uint32_t numWaitEvents, - ze_event_handle_t *phWaitEvents, bool relaxedOrderingDispatch) { - relaxedOrderingDispatch = isRelaxedOrderingDispatchAllowed(numWaitEvents, false); + ze_event_handle_t *phWaitEvents, CmdListMemoryCopyParams &memoryCopyParams) { + memoryCopyParams.relaxedOrderingDispatch = isRelaxedOrderingDispatchAllowed(numWaitEvents, false); - checkAvailableSpace(numWaitEvents, relaxedOrderingDispatch, commonImmediateCommandSize, false); + checkAvailableSpace(numWaitEvents, memoryCopyParams.relaxedOrderingDispatch, commonImmediateCommandSize, false); - auto ret = CommandListCoreFamily::appendMemoryFill(ptr, pattern, patternSize, size, hSignalEvent, numWaitEvents, phWaitEvents, relaxedOrderingDispatch); + auto ret = CommandListCoreFamily::appendMemoryFill(ptr, pattern, patternSize, size, hSignalEvent, numWaitEvents, phWaitEvents, memoryCopyParams); - return flushImmediate(ret, true, hasStallingCmdsForRelaxedOrdering(numWaitEvents, relaxedOrderingDispatch), relaxedOrderingDispatch, NEO::AppendOperations::kernel, false, hSignalEvent, false, nullptr, nullptr); + return flushImmediate(ret, true, hasStallingCmdsForRelaxedOrdering(numWaitEvents, memoryCopyParams.relaxedOrderingDispatch), memoryCopyParams.relaxedOrderingDispatch, + NEO::AppendOperations::kernel, memoryCopyParams.copyOffloadAllowed, hSignalEvent, false, nullptr, nullptr); } template diff --git a/level_zero/core/test/unit_tests/mocks/mock_cmdlist.h b/level_zero/core/test/unit_tests/mocks/mock_cmdlist.h index 1fa1e36e1b..59649199b0 100644 --- a/level_zero/core/test/unit_tests/mocks/mock_cmdlist.h +++ b/level_zero/core/test/unit_tests/mocks/mock_cmdlist.h @@ -492,7 +492,7 @@ struct MockCommandList : public CommandList { size_t size, ze_event_handle_t hEvent, uint32_t numWaitEvents, - ze_event_handle_t *phWaitEvents, bool relaxedOrderingDispatch)); + ze_event_handle_t *phWaitEvents, CmdListMemoryCopyParams &memoryCopyParams)); ADDMETHOD_NOBASE(appendSignalEvent, ze_result_t, ZE_RESULT_SUCCESS, (ze_event_handle_t hEvent, bool relaxedOrderingDispatch)); diff --git a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_1.cpp b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_1.cpp index 5ccef16ee5..cb5b65acdb 100644 --- a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_1.cpp +++ b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_1.cpp @@ -1401,7 +1401,7 @@ HWTEST2_F(CommandListCreateTests, givenDirectSubmissionAndImmCmdListWhenDispatch verifyFlags(commandList->appendMemoryCopyRegion(dstPtr, ®ion, 0, 0, srcPtr, ®ion, 0, 0, nullptr, 0, nullptr, copyParams), false, false); - verifyFlags(commandList->appendMemoryFill(dstPtr, srcPtr, 8, 1, nullptr, 0, nullptr, false), false, false); + verifyFlags(commandList->appendMemoryFill(dstPtr, srcPtr, 8, 1, nullptr, 0, nullptr, copyParams), false, false); verifyFlags(commandList->appendEventReset(event), true, true); @@ -1554,7 +1554,7 @@ HWTEST2_F(CommandListCreateTests, givenDirectSubmissionAndImmCmdListWhenDispatch verifyFlags(commandList->appendMemoryCopyRegion(dstPtr, ®ion, 0, 0, srcPtr, ®ion, 0, 0, nullptr, numWaitEvents, waitlist, copyParams)); resetFlags(); - verifyFlags(commandList->appendMemoryFill(dstPtr, srcPtr, 8, 1, nullptr, numWaitEvents, waitlist, false)); + verifyFlags(commandList->appendMemoryFill(dstPtr, srcPtr, 8, 1, nullptr, numWaitEvents, waitlist, copyParams)); if constexpr (FamilyType::supportsSampler) { auto kernel = device->getBuiltinFunctionsLib()->getImageFunction(ImageBuiltin::copyImageRegion); @@ -1808,7 +1808,7 @@ HWTEST2_F(CommandListCreateTests, givenDirectSubmissionAndImmCmdListWhenDispatch verifyFlags(commandList->appendMemoryCopyRegion(dstPtr, ®ion, 0, 0, srcPtr, ®ion, 0, 0, nullptr, numWaitlistEvents, waitlist, copyParams), hasEventDependencies, hasEventDependencies); - verifyFlags(commandList->appendMemoryFill(dstPtr, srcPtr, 8, 1, nullptr, numWaitlistEvents, waitlist, false), + verifyFlags(commandList->appendMemoryFill(dstPtr, srcPtr, 8, 1, nullptr, numWaitlistEvents, waitlist, copyParams), hasEventDependencies, hasEventDependencies); verifyFlags(commandList->appendEventReset(event), false, false); diff --git a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_2.cpp b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_2.cpp index b4a53c021c..388c27d49e 100644 --- a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_2.cpp +++ b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_2.cpp @@ -132,7 +132,7 @@ class MockCommandListHw : public WhiteBox<::L0::CommandListCoreFamilyimportExternalPointer(dstPtr, MemoryConstants::pageSize); EXPECT_EQ(ZE_RESULT_SUCCESS, result); - ze_result_t ret = cmdList.appendMemoryFill(dstPtr, reinterpret_cast(&pattern), sizeof(pattern), 0, nullptr, 0, nullptr, false); + ze_result_t ret = cmdList.appendMemoryFill(dstPtr, reinterpret_cast(&pattern), sizeof(pattern), 0, nullptr, 0, nullptr, copyParams); EXPECT_GT(cmdList.getAlignedAllocationCalledTimes, 0u); EXPECT_EQ(ret, ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY); result = driverHandle->releaseImportedPointer(dstPtr); @@ -640,7 +640,7 @@ HWTEST2_F(CommandListAppend, givenCopyOnlyCommandListWhenAppendMemoryFillCalledT cmdList.initialize(device, NEO::EngineGroupType::copy, 0u); void *dstPtr = reinterpret_cast(0x1234); int pattern = 1; - cmdList.appendMemoryFill(dstPtr, reinterpret_cast(&pattern), sizeof(pattern), 0, nullptr, 0, nullptr, false); + cmdList.appendMemoryFill(dstPtr, reinterpret_cast(&pattern), sizeof(pattern), 0, nullptr, 0, nullptr, copyParams); EXPECT_GT(cmdList.appendBlitFillCalledTimes, 0u); } @@ -649,7 +649,7 @@ HWTEST2_F(CommandListAppend, givenCommandListWhenAppendMemoryFillCalledThenAppen cmdList.initialize(device, NEO::EngineGroupType::renderCompute, 0u); void *dstPtr = reinterpret_cast(0x1234); int pattern = 1; - cmdList.appendMemoryFill(dstPtr, reinterpret_cast(&pattern), sizeof(pattern), 0, nullptr, 0, nullptr, false); + cmdList.appendMemoryFill(dstPtr, reinterpret_cast(&pattern), sizeof(pattern), 0, nullptr, 0, nullptr, copyParams); EXPECT_EQ(cmdList.appendBlitFillCalledTimes, 0u); } diff --git a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_3.cpp b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_3.cpp index 5c28c56c74..40f65c5803 100644 --- a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_3.cpp +++ b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_3.cpp @@ -514,7 +514,7 @@ HWTEST2_F(CommandListCreateTests, int one = 1; size_t usedBefore = commandContainer.getCommandStream()->getUsed(); result = commandList->appendMemoryFill(dstBuffer, reinterpret_cast(&one), sizeof(one), 4096u, - events[0], 1, &events[1], false); + events[0], 1, &events[1], copyParams); EXPECT_EQ(ZE_RESULT_SUCCESS, result); size_t usedAfter = commandContainer.getCommandStream()->getUsed(); @@ -573,7 +573,7 @@ HWTEST2_F(CommandListCreateTests, size_t usedBefore = commandContainer.getCommandStream()->getUsed(); result = commandList->appendMemoryFill(dstBuffer, reinterpret_cast(&one), sizeof(one), 4096u, - events[0], 1, &events[1], false); + events[0], 1, &events[1], copyParams); EXPECT_EQ(ZE_RESULT_SUCCESS, result); size_t usedAfter = commandContainer.getCommandStream()->getUsed(); @@ -628,7 +628,7 @@ HWTEST2_F(CommandListCreateTests, givenCommandListWhenMemoryFillHavingHostMemory int one = 1; size_t usedBefore = commandContainer.getCommandStream()->getUsed(); result = commandList->appendMemoryFill(dstBuffer, reinterpret_cast(&one), sizeof(one), 4090u, - events[0], 1, &events[1], false); + events[0], 1, &events[1], copyParams); EXPECT_EQ(ZE_RESULT_SUCCESS, result); size_t usedAfter = commandContainer.getCommandStream()->getUsed(); GenCmdList cmdList; @@ -684,7 +684,7 @@ HWTEST2_F(CommandListCreateTests, givenCommandListWhenMemoryFillHavingEventsWith size_t usedBefore = commandContainer.getCommandStream()->getUsed(); result = commandList->appendMemoryFill(dstBuffer, reinterpret_cast(&one), sizeof(one), 4090u, - events[0], 1, &events[1], false); + events[0], 1, &events[1], copyParams); size_t usedAfter = commandContainer.getCommandStream()->getUsed(); EXPECT_EQ(ZE_RESULT_SUCCESS, result); @@ -741,7 +741,7 @@ HWTEST2_F(CommandListCreateTests, givenCommandListWhenMemoryFillHavingEventsWith int one = 1; size_t usedBefore = commandContainer.getCommandStream()->getUsed(); result = commandList->appendMemoryFill(dstBuffer, reinterpret_cast(&one), sizeof(one), 4090u, - events[0], 1, &events[1], false); + events[0], 1, &events[1], copyParams); EXPECT_EQ(ZE_RESULT_SUCCESS, result); size_t usedAfter = commandContainer.getCommandStream()->getUsed(); diff --git a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_4.cpp b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_4.cpp index 085dde7589..5badc50169 100644 --- a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_4.cpp +++ b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_4.cpp @@ -905,8 +905,9 @@ HWTEST2_F(HostPointerManagerCommandListTest, auto ret = hostDriverHandle->importExternalPointer(heapPointer, MemoryConstants::pageSize); EXPECT_EQ(ZE_RESULT_SUCCESS, ret); + CmdListMemoryCopyParams copyParams = {}; int pattern = 1; - ret = commandList->appendMemoryFill(heapPointer, reinterpret_cast(&pattern), sizeof(pattern), 64u, nullptr, 0, nullptr, false); + ret = commandList->appendMemoryFill(heapPointer, reinterpret_cast(&pattern), sizeof(pattern), 64u, nullptr, 0, nullptr, copyParams); EXPECT_EQ(ZE_RESULT_SUCCESS, ret); ret = hostDriverHandle->releaseImportedPointer(heapPointer); @@ -922,8 +923,9 @@ HWTEST2_F(HostPointerManagerCommandListTest, auto ret = hostDriverHandle->importExternalPointer(heapPointer, MemoryConstants::pageSize); EXPECT_EQ(ZE_RESULT_SUCCESS, ret); + CmdListMemoryCopyParams copyParams = {}; char pattern = 'a'; - ret = commandList->appendMemoryFill(heapPointer, reinterpret_cast(&pattern), sizeof(pattern), 64u, nullptr, 0, nullptr, false); + ret = commandList->appendMemoryFill(heapPointer, reinterpret_cast(&pattern), sizeof(pattern), 64u, nullptr, 0, nullptr, copyParams); EXPECT_EQ(ZE_RESULT_SUCCESS, ret); ret = hostDriverHandle->releaseImportedPointer(heapPointer); @@ -1025,8 +1027,9 @@ HWTEST2_F(HostPointerManagerCommandListTest, givenCommandListWhenMemoryFillWithS auto event1 = std::unique_ptr(L0::Event::create(eventPool.get(), &eventDesc, device)); events.push_back(event1.get()); + CmdListMemoryCopyParams copyParams = {}; result = commandList->appendMemoryFill(heapPointer, reinterpret_cast(&one), sizeof(one), size, - events[0], 1, &events[1], false); + events[0], 1, &events[1], copyParams); EXPECT_EQ(ZE_RESULT_SUCCESS, result); ret = hostDriverHandle->releaseImportedPointer(heapPointer); @@ -1071,8 +1074,9 @@ HWTEST2_F(HostPointerManagerCommandListTest, givenCommandListWhenMemoryFillWithS auto event1 = std::unique_ptr(L0::Event::create(eventPool.get(), &eventDesc, device)); events.push_back(event1.get()); + CmdListMemoryCopyParams copyParams = {}; result = commandList->appendMemoryFill(heapPointer, reinterpret_cast(&pattern), sizeof(pattern), size, - events[0], 1, &events[1], false); + events[0], 1, &events[1], copyParams); EXPECT_EQ(ZE_RESULT_SUCCESS, result); ret = hostDriverHandle->releaseImportedPointer(heapPointer); @@ -1116,8 +1120,9 @@ HWTEST2_F(HostPointerManagerCommandListTest, givenImmediateCommandListWhenMemory auto event1 = std::unique_ptr(L0::Event::create(eventPool.get(), &eventDesc, device)); events.push_back(event1.get()); + CmdListMemoryCopyParams copyParams = {}; ret = commandList0->appendMemoryFill(heapPointer, reinterpret_cast(&one), sizeof(one), size, - events[0], 1, &events[1], false); + events[0], 1, &events[1], copyParams); EXPECT_EQ(ZE_RESULT_SUCCESS, ret); ret = hostDriverHandle->releaseImportedPointer(heapPointer); @@ -1165,8 +1170,9 @@ HWTEST2_F(HostPointerManagerCommandListTest, givenImmediateCommandListWhenMemory auto event1 = std::unique_ptr(L0::Event::create(eventPool.get(), &eventDesc, device)); events.push_back(event1.get()); + CmdListMemoryCopyParams copyParams = {}; ret = commandList0->appendMemoryFill(heapPointer, reinterpret_cast(&one), sizeof(one), size, - events[0], 1, &events[1], false); + events[0], 1, &events[1], copyParams); EXPECT_EQ(ZE_RESULT_SUCCESS, ret); ret = hostDriverHandle->releaseImportedPointer(heapPointer); diff --git a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_6.cpp b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_6.cpp index 8f97d19b92..ddd6d96209 100644 --- a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_6.cpp +++ b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_6.cpp @@ -393,7 +393,7 @@ HWTEST2_F(CommandListTest, givenCopyCommandListWhenAppendFillWithDependenciesThe uint32_t patter = 1; auto zeEvent = event->toHandle(); - cmdList.appendMemoryFill(srcPtr, &patter, 1, sizeof(uint32_t), nullptr, 1, &zeEvent, false); + cmdList.appendMemoryFill(srcPtr, &patter, 1, sizeof(uint32_t), nullptr, 1, &zeEvent, copyParams); EXPECT_EQ(device->getNEODevice()->getDefaultEngine().commandStreamReceiver->peekBarrierCount(), 0u); } @@ -1174,7 +1174,7 @@ HWTEST2_F(CommandListTest, givenStatelessWhenAppendMemoryFillIsCalledThenCorrect auto result = context->allocHostMem(&hostDesc, allocSize, allocSize, &dstBuffer); ASSERT_EQ(ZE_RESULT_SUCCESS, result); - commandList->appendMemoryFill(dstBuffer, pattern, patternSize, allocSize, nullptr, 0, nullptr, false); + commandList->appendMemoryFill(dstBuffer, pattern, patternSize, allocSize, nullptr, 0, nullptr, copyParams); bool isStateless = true; bool isHeapless = commandList->isHeaplessModeEnabled(); @@ -1200,12 +1200,12 @@ HWTEST2_F(CommandListTest, givenComputeCommandListWhenMemoryFillInUsmHostThenBui auto result = context->allocHostMem(&hostDesc, allocSize, allocSize, &dstBuffer); ASSERT_EQ(ZE_RESULT_SUCCESS, result); - commandList->appendMemoryFill(dstBuffer, pattern, patternSize, allocSize, nullptr, 0, nullptr, false); + commandList->appendMemoryFill(dstBuffer, pattern, patternSize, allocSize, nullptr, 0, nullptr, copyParams); EXPECT_TRUE(commandList->usedKernelLaunchParams.isBuiltInKernel); EXPECT_FALSE(commandList->usedKernelLaunchParams.isKernelSplitOperation); EXPECT_TRUE(commandList->usedKernelLaunchParams.isDestinationAllocationInSystemMemory); - commandList->appendMemoryFill(dstBuffer, pattern, 1, allocSize, nullptr, 0, nullptr, false); + commandList->appendMemoryFill(dstBuffer, pattern, 1, allocSize, nullptr, 0, nullptr, copyParams); EXPECT_TRUE(commandList->usedKernelLaunchParams.isBuiltInKernel); EXPECT_FALSE(commandList->usedKernelLaunchParams.isKernelSplitOperation); EXPECT_TRUE(commandList->usedKernelLaunchParams.isDestinationAllocationInSystemMemory); @@ -1230,12 +1230,12 @@ HWTEST2_F(CommandListTest, givenComputeCommandListWhenMemoryFillInUsmDeviceThenB size, alignment, &dstBuffer); EXPECT_EQ(ZE_RESULT_SUCCESS, result); - commandList->appendMemoryFill(dstBuffer, pattern, patternSize, size, nullptr, 0, nullptr, false); + commandList->appendMemoryFill(dstBuffer, pattern, patternSize, size, nullptr, 0, nullptr, copyParams); EXPECT_TRUE(commandList->usedKernelLaunchParams.isBuiltInKernel); EXPECT_FALSE(commandList->usedKernelLaunchParams.isKernelSplitOperation); EXPECT_FALSE(commandList->usedKernelLaunchParams.isDestinationAllocationInSystemMemory); - commandList->appendMemoryFill(dstBuffer, pattern, 1, size, nullptr, 0, nullptr, false); + commandList->appendMemoryFill(dstBuffer, pattern, 1, size, nullptr, 0, nullptr, copyParams); EXPECT_TRUE(commandList->usedKernelLaunchParams.isBuiltInKernel); EXPECT_FALSE(commandList->usedKernelLaunchParams.isKernelSplitOperation); EXPECT_FALSE(commandList->usedKernelLaunchParams.isDestinationAllocationInSystemMemory); @@ -1262,12 +1262,12 @@ HWTEST2_F(CommandListTest, givenComputeCommandListWhenMemoryFillRequiresMultiKer constexpr size_t fillSize = size - 1; - commandList->appendMemoryFill(dstBuffer, pattern, patternSize, fillSize, nullptr, 0, nullptr, false); + commandList->appendMemoryFill(dstBuffer, pattern, patternSize, fillSize, nullptr, 0, nullptr, copyParams); EXPECT_TRUE(commandList->usedKernelLaunchParams.isBuiltInKernel); EXPECT_TRUE(commandList->usedKernelLaunchParams.isKernelSplitOperation); EXPECT_FALSE(commandList->usedKernelLaunchParams.isDestinationAllocationInSystemMemory); - commandList->appendMemoryFill(dstBuffer, pattern, 1, fillSize, nullptr, 0, nullptr, false); + commandList->appendMemoryFill(dstBuffer, pattern, 1, fillSize, nullptr, 0, nullptr, copyParams); EXPECT_TRUE(commandList->usedKernelLaunchParams.isBuiltInKernel); EXPECT_TRUE(commandList->usedKernelLaunchParams.isKernelSplitOperation); EXPECT_FALSE(commandList->usedKernelLaunchParams.isDestinationAllocationInSystemMemory); @@ -2978,7 +2978,8 @@ HWTEST2_F(CommandListStateBaseAddressGlobalStatelessTest, auto result = context->allocDeviceMem(device->toHandle(), &deviceDesc, size, 1u, &devicePtr); EXPECT_EQ(ZE_RESULT_SUCCESS, result); - result = commandList->appendMemoryFill(devicePtr, patternPtr, sizeof(pattern), size, nullptr, 0, nullptr, false); + CmdListMemoryCopyParams copyParams = {}; + result = commandList->appendMemoryFill(devicePtr, patternPtr, sizeof(pattern), size, nullptr, 0, nullptr, copyParams); EXPECT_EQ(ZE_RESULT_SUCCESS, result); ssh = container.getIndirectHeap(NEO::HeapType::surfaceState); diff --git a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_8.cpp b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_8.cpp index 386b9fbfad..298f91f4a0 100644 --- a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_8.cpp +++ b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_8.cpp @@ -1415,7 +1415,8 @@ HWTEST2_F(CommandListAppendLaunchKernel, givenUnalignePtrToFillWhenAppendMemoryF ze_host_mem_alloc_desc_t hostDesc = {}; context->allocHostMem(&hostDesc, 0x1000, 0x1000, &dstBuffer); auto builtinKernelByte = device->getBuiltinFunctionsLib()->getFunction(Builtin::fillBufferRightLeftover); - commandList->appendMemoryFill(ptrOffset(dstBuffer, unalignedOffset), &pattern, patternSize, sizeToFill, nullptr, 0, nullptr, false); + CmdListMemoryCopyParams copyParams = {}; + commandList->appendMemoryFill(ptrOffset(dstBuffer, unalignedOffset), &pattern, patternSize, sizeToFill, nullptr, 0, nullptr, copyParams); EXPECT_EQ(commandList->passedKernel, builtinKernelByte); context->freeMem(dstBuffer); } @@ -1430,7 +1431,8 @@ HWTEST2_F(CommandListAppendLaunchKernel, givenUnalignePtrToFillWhenKernelLaunchS ze_host_mem_alloc_desc_t hostDesc = {}; context->allocHostMem(&hostDesc, 0x1000, 0x1000, &dstBuffer); auto builtinKernelByte = device->getBuiltinFunctionsLib()->getFunction(Builtin::fillBufferRightLeftover); - commandList->appendMemoryFill(ptrOffset(dstBuffer, unalignedOffset), &pattern, patternSize, sizeToFill, nullptr, 0, nullptr, false); + CmdListMemoryCopyParams copyParams = {}; + commandList->appendMemoryFill(ptrOffset(dstBuffer, unalignedOffset), &pattern, patternSize, sizeToFill, nullptr, 0, nullptr, copyParams); EXPECT_EQ(commandList->passedKernel, builtinKernelByte); context->freeMem(dstBuffer); } @@ -1445,7 +1447,9 @@ HWTEST2_F(CommandListAppendLaunchKernel, givenUnalignePtrToFillWhenAppendMemoryF ze_host_mem_alloc_desc_t hostDesc = {}; context->allocHostMem(&hostDesc, 0x1000, 0x1000, &dstBuffer); commandList->status = ZE_RESULT_ERROR_INVALID_ARGUMENT; - auto ret = commandList->appendMemoryFill(ptrOffset(dstBuffer, unalignedOffset), &pattern, patternSize, sizeToFill, nullptr, 0, nullptr, true); + CmdListMemoryCopyParams copyParams = {}; + copyParams.relaxedOrderingDispatch = true; + auto ret = commandList->appendMemoryFill(ptrOffset(dstBuffer, unalignedOffset), &pattern, patternSize, sizeToFill, nullptr, 0, nullptr, copyParams); EXPECT_EQ(ret, ZE_RESULT_ERROR_INVALID_ARGUMENT); context->freeMem(dstBuffer); } @@ -1461,7 +1465,8 @@ HWTEST2_F(CommandListAppendLaunchKernel, givenAlignePtrToFillWhenAppendMemoryFil ze_host_mem_alloc_desc_t hostDesc = {}; context->allocHostMem(&hostDesc, 0x1000, 0x1000, &dstBuffer); auto builtinKernelByte = device->getBuiltinFunctionsLib()->getFunction(Builtin::fillBufferMiddle); - commandList->appendMemoryFill(ptrOffset(dstBuffer, unalignedOffset), &pattern, patternSize, sizeToFill, nullptr, 0, nullptr, false); + CmdListMemoryCopyParams copyParams = {}; + commandList->appendMemoryFill(ptrOffset(dstBuffer, unalignedOffset), &pattern, patternSize, sizeToFill, nullptr, 0, nullptr, copyParams); EXPECT_EQ(commandList->passedKernel, builtinKernelByte); context->freeMem(dstBuffer); } diff --git a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_wait_on_events.cpp b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_wait_on_events.cpp index d2b28f3a30..18ec33772d 100644 --- a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_wait_on_events.cpp +++ b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_wait_on_events.cpp @@ -975,9 +975,10 @@ HWTEST_TEMPLATED_F(TbxImmediateCommandListTest, givenTbxModeOnFlushTaskImmediate ze_host_mem_alloc_desc_t hostDesc = {}; context->allocHostMem(&hostDesc, 4096, 4096u, &dstBuffer); + CmdListMemoryCopyParams copyParams = {}; int one = 1; commandListImmediate->appendMemoryFill(dstBuffer, reinterpret_cast(&one), sizeof(one), 4096, - nullptr, 1, &eventHandle, false); + nullptr, 1, &eventHandle, copyParams); EXPECT_EQ(0u, ultCsr.downloadAllocationsCalledCount); diff --git a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_blit.cpp b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_blit.cpp index bf227db209..ac33b29300 100644 --- a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_blit.cpp +++ b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_blit.cpp @@ -72,7 +72,7 @@ HWTEST2_F(AppendMemoryCopyTests, givenCopyOnlyCommandListWhenAppenBlitFillCalled cmdList.initialize(device, NEO::EngineGroupType::copy, 0u); uint64_t pattern[4] = {1, 2, 3, 4}; void *ptr = reinterpret_cast(0x1234); - auto ret = cmdList.appendMemoryFill(ptr, reinterpret_cast(&pattern), sizeof(pattern), 0x1000, nullptr, 0, nullptr, false); + auto ret = cmdList.appendMemoryFill(ptr, reinterpret_cast(&pattern), sizeof(pattern), 0x1000, nullptr, 0, nullptr, copyParams); EXPECT_EQ(ZE_RESULT_ERROR_INVALID_SIZE, ret); } @@ -81,7 +81,7 @@ HWTEST2_F(AppendMemoryCopyTests, givenCopyOnlyCommandListWhenAppenBlitFillToNotD cmdList.initialize(device, NEO::EngineGroupType::copy, 0u); uint8_t pattern = 1; void *ptr = reinterpret_cast(0x1234); - auto ret = cmdList.appendMemoryFill(ptr, reinterpret_cast(&pattern), sizeof(pattern), 0x1000, nullptr, 0, nullptr, false); + auto ret = cmdList.appendMemoryFill(ptr, reinterpret_cast(&pattern), sizeof(pattern), 0x1000, nullptr, 0, nullptr, copyParams); EXPECT_EQ(ret, ZE_RESULT_ERROR_INVALID_ARGUMENT); } @@ -99,7 +99,7 @@ HWTEST2_F(AppendMemoryCopyTests, givenCopyOnlyCommandListWhenAppenBlitFillThenCo commandList.initialize(device, NEO::EngineGroupType::copy, 0u); uint16_t pattern = 1; void *ptr = reinterpret_cast(0x1234); - commandList.appendMemoryFill(ptr, reinterpret_cast(&pattern), sizeof(pattern), 0x1000, nullptr, 0, nullptr, false); + commandList.appendMemoryFill(ptr, reinterpret_cast(&pattern), sizeof(pattern), 0x1000, nullptr, 0, nullptr, copyParams); GenCmdList cmdList; ASSERT_TRUE(FamilyType::Parse::parseCommandBuffer( cmdList, ptrOffset(commandList.getCmdContainer().getCommandStream()->getCpuBase(), 0), commandList.getCmdContainer().getCommandStream()->getUsed())); @@ -129,7 +129,7 @@ HWTEST2_F(AppendMemoryCopyTests, commandList.initialize(device, NEO::EngineGroupType::copy, 0u); uint32_t pattern = 1; - ze_result_t result = commandList.appendMemoryFill(hostPointer.get(), reinterpret_cast(&pattern), sizeof(pattern), size, nullptr, 0, nullptr, false); + ze_result_t result = commandList.appendMemoryFill(hostPointer.get(), reinterpret_cast(&pattern), sizeof(pattern), size, nullptr, 0, nullptr, copyParams); EXPECT_EQ(result, ZE_RESULT_SUCCESS); GenCmdList cmdList; @@ -567,15 +567,16 @@ HWTEST_F(AppendMemoryCopyTests, givenCopyOnlyCommandListWithUseAdditionalBlitPro uint32_t one = 1u; ASSERT_EQ(ZE_RESULT_SUCCESS, result); ASSERT_NE(nullptr, dstBuffer); + CmdListMemoryCopyParams copyParams = {}; commandList->useAdditionalBlitProperties = false; EXPECT_EQ(0u, commandList->additionalBlitPropertiesCalled); - commandList->appendBlitFill(dstBuffer, &one, sizeof(uint8_t), 4096u, nullptr, 0, nullptr, false); + commandList->appendBlitFill(dstBuffer, &one, sizeof(uint8_t), 4096u, nullptr, 0, nullptr, copyParams); EXPECT_EQ(0u, commandList->additionalBlitPropertiesCalled); EXPECT_EQ(1u, commandList->appendSignalInOrderDependencyCounterCalled); commandList->useAdditionalBlitProperties = true; - commandList->appendBlitFill(dstBuffer, &one, sizeof(uint8_t), 4096u, nullptr, 0, nullptr, false); + commandList->appendBlitFill(dstBuffer, &one, sizeof(uint8_t), 4096u, nullptr, 0, nullptr, copyParams); EXPECT_EQ(1u, commandList->additionalBlitPropertiesCalled); EXPECT_EQ(1u, commandList->appendSignalInOrderDependencyCounterCalled); context->freeMem(dstBuffer); diff --git a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_fill.cpp b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_fill.cpp index c8da8cb456..66ab2aa029 100644 --- a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_fill.cpp +++ b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_fill.cpp @@ -31,9 +31,10 @@ HWTEST2_F(AppendFillTest, auto commandList = std::make_unique>>(); commandList->initialize(device, NEO::EngineGroupType::renderCompute, 0u); + CmdListMemoryCopyParams copyParams = {}; auto result = commandList->appendMemoryFill(immediateDstPtr, &immediatePattern, sizeof(immediatePattern), - immediateAllocSize, nullptr, 0, nullptr, false); + immediateAllocSize, nullptr, 0, nullptr, copyParams); EXPECT_EQ(ZE_RESULT_SUCCESS, result); } @@ -42,7 +43,8 @@ HWTEST2_F(AppendFillTest, auto commandList = std::make_unique>>(); commandList->initialize(device, NEO::EngineGroupType::renderCompute, 0u); - auto result = commandList->appendMemoryFill(dstPtr, pattern, patternSize, allocSize, nullptr, 0, nullptr, false); + CmdListMemoryCopyParams copyParams = {}; + auto result = commandList->appendMemoryFill(dstPtr, pattern, patternSize, allocSize, nullptr, 0, nullptr, copyParams); EXPECT_EQ(ZE_RESULT_SUCCESS, result); } @@ -52,7 +54,8 @@ HWTEST2_F(AppendFillTest, commandList->initialize(device, NEO::EngineGroupType::renderCompute, 0u); commandList->thresholdOfCallsToAppendLaunchKernelWithParamsToFail = 0; - auto result = commandList->appendMemoryFill(dstPtr, pattern, patternSize, allocSize, nullptr, 0, nullptr, false); + CmdListMemoryCopyParams copyParams = {}; + auto result = commandList->appendMemoryFill(dstPtr, pattern, patternSize, allocSize, nullptr, 0, nullptr, copyParams); EXPECT_NE(ZE_RESULT_SUCCESS, result); } @@ -61,13 +64,14 @@ HWTEST2_F(AppendFillTest, auto commandList = std::make_unique>>(); commandList->initialize(device, NEO::EngineGroupType::renderCompute, 0u); - ze_result_t result = commandList->appendMemoryFill(dstPtr, pattern, 4, allocSize, nullptr, 0, nullptr, false); + CmdListMemoryCopyParams copyParams = {}; + ze_result_t result = commandList->appendMemoryFill(dstPtr, pattern, 4, allocSize, nullptr, 0, nullptr, copyParams); EXPECT_EQ(ZE_RESULT_SUCCESS, result); size_t patternAllocationsVectorSize = commandList->patternAllocations.size(); EXPECT_EQ(patternAllocationsVectorSize, 1u); uint8_t *newDstPtr = new uint8_t[allocSize]; - result = commandList->appendMemoryFill(newDstPtr, pattern, patternSize, allocSize, nullptr, 0, nullptr, false); + result = commandList->appendMemoryFill(newDstPtr, pattern, patternSize, allocSize, nullptr, 0, nullptr, copyParams); EXPECT_EQ(ZE_RESULT_SUCCESS, result); size_t newPatternAllocationsVectorSize = commandList->patternAllocations.size(); @@ -81,13 +85,14 @@ HWTEST2_F(AppendFillTest, auto commandList = std::make_unique>>(); commandList->initialize(device, NEO::EngineGroupType::renderCompute, 0u); - ze_result_t result = commandList->appendMemoryFill(dstPtr, pattern, 4, allocSize, nullptr, 0, nullptr, false); + CmdListMemoryCopyParams copyParams = {}; + ze_result_t result = commandList->appendMemoryFill(dstPtr, pattern, 4, allocSize, nullptr, 0, nullptr, copyParams); EXPECT_EQ(ZE_RESULT_SUCCESS, result); size_t patternAllocationsVectorSize = commandList->patternAllocations.size(); EXPECT_EQ(patternAllocationsVectorSize, 1u); uint8_t newPattern[patternSize] = {1, 2, 3, 4}; - result = commandList->appendMemoryFill(dstPtr, newPattern, patternSize, allocSize, nullptr, 0, nullptr, false); + result = commandList->appendMemoryFill(dstPtr, newPattern, patternSize, allocSize, nullptr, 0, nullptr, copyParams); EXPECT_EQ(ZE_RESULT_SUCCESS, result); size_t newPatternAllocationsVectorSize = commandList->patternAllocations.size(); @@ -101,7 +106,8 @@ HWTEST2_F(AppendFillTest, int pattern = 0; const size_t size = 1024 * 1024; uint8_t *ptr = new uint8_t[size]; - ze_result_t result = commandList->appendMemoryFill(ptr, &pattern, 1, size, nullptr, 0, nullptr, false); + CmdListMemoryCopyParams copyParams = {}; + ze_result_t result = commandList->appendMemoryFill(ptr, &pattern, 1, size, nullptr, 0, nullptr, copyParams); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_EQ(1u, commandList->numberOfCallsToAppendLaunchKernelWithParams); EXPECT_EQ(size, commandList->xGroupSizes[0] * commandList->threadGroupDimensions[0].groupCountX * 16); @@ -115,7 +121,8 @@ HWTEST2_F(AppendFillTest, int pattern = 0; const size_t size = 1025; uint8_t *ptr = new uint8_t[size]; - ze_result_t result = commandList->appendMemoryFill(ptr, &pattern, 1, size, nullptr, 0, nullptr, false); + CmdListMemoryCopyParams copyParams = {}; + ze_result_t result = commandList->appendMemoryFill(ptr, &pattern, 1, size, nullptr, 0, nullptr, copyParams); size_t filledSize = commandList->xGroupSizes[0] * commandList->threadGroupDimensions[0].groupCountX * 16; filledSize += commandList->xGroupSizes[1] * commandList->threadGroupDimensions[1].groupCountX; EXPECT_EQ(ZE_RESULT_SUCCESS, result); @@ -131,7 +138,8 @@ HWTEST2_F(AppendFillTest, int pattern = 0; const size_t size = 4; uint8_t *ptr = new uint8_t[size]; - ze_result_t result = commandList->appendMemoryFill(ptr, &pattern, 1, size, nullptr, 0, nullptr, false); + CmdListMemoryCopyParams copyParams = {}; + ze_result_t result = commandList->appendMemoryFill(ptr, &pattern, 1, size, nullptr, 0, nullptr, copyParams); size_t filledSize = commandList->xGroupSizes[0] * commandList->threadGroupDimensions[0].groupCountX * 16; filledSize += commandList->xGroupSizes[1] * commandList->threadGroupDimensions[1].groupCountX; EXPECT_EQ(ZE_RESULT_SUCCESS, result); @@ -147,7 +155,8 @@ HWTEST2_F(AppendFillTest, int pattern = 0; const size_t size = neoDevice->getDeviceInfo().maxWorkGroupSize / 2; uint8_t *ptr = new uint8_t[size]; - ze_result_t result = commandList->appendMemoryFill(ptr, &pattern, 1, size, nullptr, 0, nullptr, false); + CmdListMemoryCopyParams copyParams = {}; + ze_result_t result = commandList->appendMemoryFill(ptr, &pattern, 1, size, nullptr, 0, nullptr, copyParams); size_t filledSize = commandList->xGroupSizes[0] * commandList->threadGroupDimensions[0].groupCountX * 16; EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_EQ(1u, commandList->numberOfCallsToAppendLaunchKernelWithParams); @@ -162,7 +171,8 @@ HWTEST2_F(AppendFillTest, int pattern = 0; const size_t size = 1024 * 1024; uint8_t *ptr = new uint8_t[size]; - ze_result_t result = commandList->appendMemoryFill(ptr, &pattern, 1, size, nullptr, 0, nullptr, false); + CmdListMemoryCopyParams copyParams = {}; + ze_result_t result = commandList->appendMemoryFill(ptr, &pattern, 1, size, nullptr, 0, nullptr, copyParams); EXPECT_EQ(ZE_RESULT_SUCCESS, result); auto groupSize = device->getDeviceInfo().maxWorkGroupSize; auto dataTypeSize = sizeof(uint32_t) * 4; @@ -179,7 +189,8 @@ HWTEST2_F(AppendFillTest, uint32_t offset = 1; const size_t size = 1024; uint8_t *ptr = new uint8_t[size]; - ze_result_t result = commandList->appendMemoryFill(ptr + offset, &pattern, 1, size - offset, nullptr, 0, nullptr, false); + CmdListMemoryCopyParams copyParams = {}; + ze_result_t result = commandList->appendMemoryFill(ptr + offset, &pattern, 1, size - offset, nullptr, 0, nullptr, copyParams); size_t filledSize = commandList->xGroupSizes[0] * commandList->threadGroupDimensions[0].groupCountX; filledSize += commandList->xGroupSizes[1] * commandList->threadGroupDimensions[1].groupCountX * 16; filledSize += commandList->xGroupSizes[2] * commandList->threadGroupDimensions[2].groupCountX; @@ -198,7 +209,8 @@ HWTEST2_F(AppendFillTest, uint32_t offset = 1; const size_t size = 2; uint8_t *ptr = new uint8_t[size]; - ze_result_t result = commandList->appendMemoryFill(ptr + offset, &pattern, 1, size - offset, nullptr, 0, nullptr, false); + CmdListMemoryCopyParams copyParams = {}; + ze_result_t result = commandList->appendMemoryFill(ptr + offset, &pattern, 1, size - offset, nullptr, 0, nullptr, copyParams); size_t filledSize = commandList->xGroupSizes[0] * commandList->threadGroupDimensions[0].groupCountX * 16; filledSize += commandList->xGroupSizes[1] * commandList->threadGroupDimensions[1].groupCountX; EXPECT_EQ(ZE_RESULT_SUCCESS, result); @@ -216,7 +228,8 @@ HWTEST2_F(AppendFillTest, uint32_t offset = 1; const size_t size = 1024; uint8_t *ptr = new uint8_t[size]; - ze_result_t result = commandList->appendMemoryFill(ptr + offset, &pattern, 1, size - offset, nullptr, 0, nullptr, false); + CmdListMemoryCopyParams copyParams = {}; + ze_result_t result = commandList->appendMemoryFill(ptr + offset, &pattern, 1, size - offset, nullptr, 0, nullptr, copyParams); EXPECT_NE(ZE_RESULT_SUCCESS, result); delete[] ptr; } @@ -228,7 +241,8 @@ HWTEST2_F(AppendFillTest, size_t nonMultipleSize = allocSize + 1; uint8_t *nonMultipleDstPtr = new uint8_t[nonMultipleSize]; - auto result = commandList->appendMemoryFill(nonMultipleDstPtr, pattern, 4, nonMultipleSize, nullptr, 0, nullptr, false); + CmdListMemoryCopyParams copyParams = {}; + auto result = commandList->appendMemoryFill(nonMultipleDstPtr, pattern, 4, nonMultipleSize, nullptr, 0, nullptr, copyParams); EXPECT_EQ(ZE_RESULT_SUCCESS, result); delete[] nonMultipleDstPtr; @@ -242,7 +256,8 @@ HWTEST2_F(AppendFillTest, size_t nonMultipleSize = allocSize + 1; uint8_t *nonMultipleDstPtr = new uint8_t[nonMultipleSize]; - auto result = commandList->appendMemoryFill(nonMultipleDstPtr, pattern, 4, nonMultipleSize, nullptr, 0, nullptr, false); + CmdListMemoryCopyParams copyParams = {}; + auto result = commandList->appendMemoryFill(nonMultipleDstPtr, pattern, 4, nonMultipleSize, nullptr, 0, nullptr, copyParams); EXPECT_NE(ZE_RESULT_SUCCESS, result); delete[] nonMultipleDstPtr; @@ -273,9 +288,10 @@ HWTEST2_F(AppendFillTest, auto commandList = std::make_unique>>(); commandList->initialize(device, NEO::EngineGroupType::renderCompute, 0u); + CmdListMemoryCopyParams copyParams = {}; result = commandList->appendMemoryFill(immediateDstPtr, &immediatePattern, sizeof(immediatePattern), - immediateAllocSize, event->toHandle(), 0, nullptr, false); + immediateAllocSize, event->toHandle(), 0, nullptr, copyParams); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_EQ(1u, event->getPacketsInUse()); @@ -330,8 +346,8 @@ HWTEST2_F(AppendFillTest, auto commandList = std::make_unique>>(); commandList->initialize(device, NEO::EngineGroupType::renderCompute, 0u); - - result = commandList->appendMemoryFill(dstPtr, pattern, patternSize, allocSize, event->toHandle(), 0, nullptr, false); + CmdListMemoryCopyParams copyParams = {}; + result = commandList->appendMemoryFill(dstPtr, pattern, patternSize, allocSize, event->toHandle(), 0, nullptr, copyParams); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_EQ(1u, event->getPacketsInUse()); diff --git a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_fill_event_xehp_and_later.cpp b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_fill_event_xehp_and_later.cpp index 6ed3d36df1..f0e6af6b92 100644 --- a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_fill_event_xehp_and_later.cpp +++ b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_fill_event_xehp_and_later.cpp @@ -111,10 +111,10 @@ void testSingleTileAppendMemoryFillManyImmediateKernels(FillTestInput &input, Te auto commandList = std::make_unique>(); commandList->initialize(input.device, NEO::EngineGroupType::renderCompute, 0u); auto &commandContainer = commandList->commandContainer; - + CmdListMemoryCopyParams copyParams = {}; size_t usedBefore = commandContainer.getCommandStream()->getUsed(); result = commandList->appendMemoryFill(input.dstPtr, input.patternPtr, - input.patternSize, input.allocSize, event->toHandle(), 0, nullptr, false); + input.patternSize, input.allocSize, event->toHandle(), 0, nullptr, copyParams); EXPECT_EQ(ZE_RESULT_SUCCESS, result); size_t usedAfter = commandContainer.getCommandStream()->getUsed(); @@ -196,10 +196,10 @@ void testSingleTileAppendMemoryFillManyKernels(FillTestInput &input, TestExpecte auto commandList = std::make_unique>(); commandList->initialize(input.device, NEO::EngineGroupType::renderCompute, 0u); auto &commandContainer = commandList->commandContainer; - + CmdListMemoryCopyParams copyParams = {}; size_t usedBefore = commandContainer.getCommandStream()->getUsed(); result = commandList->appendMemoryFill(input.dstPtr, input.patternPtr, - input.patternSize, input.allocSize, event->toHandle(), 0, nullptr, false); + input.patternSize, input.allocSize, event->toHandle(), 0, nullptr, copyParams); EXPECT_EQ(ZE_RESULT_SUCCESS, result); size_t usedAfter = commandContainer.getCommandStream()->getUsed(); @@ -283,10 +283,10 @@ void testSingleTileAppendMemoryFillManyKernelsAndL3Flush(FillTestInput &input, T auto commandList = std::make_unique>(); commandList->initialize(input.device, NEO::EngineGroupType::renderCompute, 0u); auto &commandContainer = commandList->commandContainer; - + CmdListMemoryCopyParams copyParams = {}; size_t usedBefore = commandContainer.getCommandStream()->getUsed(); result = commandList->appendMemoryFill(input.dstPtr, input.patternPtr, - input.patternSize, input.allocSize, event->toHandle(), 0, nullptr, false); + input.patternSize, input.allocSize, event->toHandle(), 0, nullptr, copyParams); EXPECT_EQ(ZE_RESULT_SUCCESS, result); size_t usedAfter = commandContainer.getCommandStream()->getUsed(); @@ -367,10 +367,10 @@ void testSingleTileAppendMemoryFillSingleKernel(FillTestInput &input, TestExpect int pattern = 0; const size_t size = 1024; uint8_t array[size] = {}; - + CmdListMemoryCopyParams copyParams = {}; auto &commandContainer = commandList->commandContainer; size_t usedBefore = commandContainer.getCommandStream()->getUsed(); - result = commandList->appendMemoryFill(array, &pattern, 1, size, event->toHandle(), 0, nullptr, false); + result = commandList->appendMemoryFill(array, &pattern, 1, size, event->toHandle(), 0, nullptr, copyParams); EXPECT_EQ(ZE_RESULT_SUCCESS, result); size_t usedAfter = commandContainer.getCommandStream()->getUsed(); @@ -452,10 +452,10 @@ void testSingleTileAppendMemoryFillSingleKernelAndL3Flush(FillTestInput &input, int pattern = 0; const size_t size = 1024; uint8_t array[size] = {}; - + CmdListMemoryCopyParams copyParams = {}; auto &commandContainer = commandList->commandContainer; size_t usedBefore = commandContainer.getCommandStream()->getUsed(); - result = commandList->appendMemoryFill(array, &pattern, 1, size, event->toHandle(), 0, nullptr, false); + result = commandList->appendMemoryFill(array, &pattern, 1, size, event->toHandle(), 0, nullptr, copyParams); EXPECT_EQ(ZE_RESULT_SUCCESS, result); size_t usedAfter = commandContainer.getCommandStream()->getUsed(); @@ -566,10 +566,10 @@ void testMultiTileAppendMemoryFillManyKernels(FillTestInput &input, TestExpected commandList->initialize(input.device, NEO::EngineGroupType::renderCompute, 0u); EXPECT_EQ(2u, commandList->partitionCount); auto &commandContainer = commandList->commandContainer; - + CmdListMemoryCopyParams copyParams = {}; size_t usedBefore = commandContainer.getCommandStream()->getUsed(); result = commandList->appendMemoryFill(input.dstPtr, input.patternPtr, - input.patternSize, input.allocSize, event->toHandle(), 0, nullptr, false); + input.patternSize, input.allocSize, event->toHandle(), 0, nullptr, copyParams); EXPECT_EQ(ZE_RESULT_SUCCESS, result); size_t usedAfter = commandContainer.getCommandStream()->getUsed(); @@ -685,10 +685,10 @@ void testMultiTileAppendMemoryFillSingleKernelAndL3Flush(FillTestInput &input, T int pattern = 0; const size_t size = 1024; uint8_t array[size] = {}; - + CmdListMemoryCopyParams copyParams = {}; auto &commandContainer = commandList->commandContainer; size_t usedBefore = commandContainer.getCommandStream()->getUsed(); - result = commandList->appendMemoryFill(array, &pattern, 1, size, event->toHandle(), 0, nullptr, false); + result = commandList->appendMemoryFill(array, &pattern, 1, size, event->toHandle(), 0, nullptr, copyParams); EXPECT_EQ(ZE_RESULT_SUCCESS, result); size_t usedAfter = commandContainer.getCommandStream()->getUsed(); diff --git a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_memory_extension.cpp b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_memory_extension.cpp index 85618fd4e1..28a2540d47 100644 --- a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_memory_extension.cpp +++ b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_memory_extension.cpp @@ -171,7 +171,7 @@ class MockCommandListExtensionHw : public WhiteBox<::L0::CommandListCoreFamilyinOrderExecInfo->getCounterValue()); offset = cmdStream->getUsed(); - immCmdList->appendMemoryFill(alloc, ©Data, 1, 16, nullptr, 0, nullptr, false); + immCmdList->appendMemoryFill(alloc, ©Data, 1, 16, nullptr, 0, nullptr, copyParams); findSemaphores(0); // no implicit dependency EXPECT_EQ(8u, immCmdList->inOrderExecInfo->getCounterValue()); @@ -1775,7 +1775,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, InOrderCmdListTests, givenImmediateCmdListWhenDispa } events[0]->makeCounterBasedInitiallyDisabled(eventPool->getAllocation()); - immCmdList->appendMemoryFill(alloc, ©Data, 1, 16, eventHandle, 0, nullptr, false); + immCmdList->appendMemoryFill(alloc, ©Data, 1, 16, eventHandle, 0, nullptr, copyParams); if (dcFlushRequired) { EXPECT_EQ(Event::CounterBasedMode::initiallyDisabled, events[0]->counterBasedMode); } else { @@ -1783,7 +1783,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, InOrderCmdListTests, givenImmediateCmdListWhenDispa } events[0]->makeCounterBasedInitiallyDisabled(eventPool->getAllocation()); - copyOnlyCmdList->appendBlitFill(alloc, ©Data, 1, 16, events[0].get(), 0, nullptr, false); + copyOnlyCmdList->appendBlitFill(alloc, ©Data, 1, 16, events[0].get(), 0, nullptr, copyParams); if (dcFlushRequired) { EXPECT_EQ(Event::CounterBasedMode::initiallyDisabled, events[0]->counterBasedMode); } else { @@ -1914,9 +1914,9 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, InOrderCmdListTests, givenNonInOrderCmdListWhenPass EXPECT_EQ(ZE_RESULT_ERROR_INVALID_ARGUMENT, immCmdList->appendMemoryCopy(©Data, ©Data, 1, eventHandle, 0, nullptr, copyParams)); - EXPECT_EQ(ZE_RESULT_ERROR_INVALID_ARGUMENT, immCmdList->appendMemoryFill(alloc, ©Data, 1, 16, eventHandle, 0, nullptr, false)); + EXPECT_EQ(ZE_RESULT_ERROR_INVALID_ARGUMENT, immCmdList->appendMemoryFill(alloc, ©Data, 1, 16, eventHandle, 0, nullptr, copyParams)); - EXPECT_EQ(ZE_RESULT_ERROR_INVALID_ARGUMENT, copyOnlyCmdList->appendBlitFill(alloc, ©Data, 1, 16, events[0].get(), 0, nullptr, false)); + EXPECT_EQ(ZE_RESULT_ERROR_INVALID_ARGUMENT, copyOnlyCmdList->appendBlitFill(alloc, ©Data, 1, 16, events[0].get(), 0, nullptr, copyParams)); EXPECT_EQ(ZE_RESULT_ERROR_INVALID_ARGUMENT, immCmdList->appendSignalEvent(eventHandle, false)); @@ -4084,10 +4084,10 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, InOrderCmdListTests, givenCopyOnlyInOrderModeWhenPr constexpr size_t size = 128 * sizeof(uint32_t); auto data = allocHostMem(size); - immCmdList->appendMemoryFill(data, data, 1, size, nullptr, 0, nullptr, false); + immCmdList->appendMemoryFill(data, data, 1, size, nullptr, 0, nullptr, copyParams); auto offset = cmdStream->getUsed(); - immCmdList->appendMemoryFill(data, data, 1, size, nullptr, 0, nullptr, false); + immCmdList->appendMemoryFill(data, data, 1, size, nullptr, 0, nullptr, copyParams); GenCmdList cmdList; ASSERT_TRUE(FamilyType::Parse::parseCommandBuffer(cmdList, @@ -4126,7 +4126,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, InOrderCmdListTests, givenInOrderModeWhenProgrammin constexpr size_t size = 128 * sizeof(uint32_t); auto data = allocHostMem(size); - immCmdList->appendMemoryFill(data, data, 1, (size / 2) + 1, events[0]->toHandle(), 0, nullptr, false); + immCmdList->appendMemoryFill(data, data, 1, (size / 2) + 1, events[0]->toHandle(), 0, nullptr, copyParams); GenCmdList cmdList; ASSERT_TRUE(FamilyType::Parse::parseCommandBuffer(cmdList, cmdStream->getCpuBase(), cmdStream->getUsed())); @@ -4174,7 +4174,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, InOrderCmdListTests, givenInOrderModeWhenProgrammin constexpr size_t size = 128 * sizeof(uint32_t); auto data = allocHostMem(size); - immCmdList->appendMemoryFill(data, data, 1, (size / 2) + 1, events[0]->toHandle(), 0, nullptr, false); + immCmdList->appendMemoryFill(data, data, 1, (size / 2) + 1, events[0]->toHandle(), 0, nullptr, copyParams); GenCmdList cmdList; ASSERT_TRUE(FamilyType::Parse::parseCommandBuffer(cmdList, cmdStream->getCpuBase(), cmdStream->getUsed())); @@ -4217,7 +4217,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, InOrderCmdListTests, givenInOrderModeWhenProgrammin constexpr size_t size = 128 * sizeof(uint32_t); auto data = allocHostMem(size); - immCmdList->appendMemoryFill(data, data, 1, (size / 2) + 1, nullptr, 0, nullptr, false); + immCmdList->appendMemoryFill(data, data, 1, (size / 2) + 1, nullptr, 0, nullptr, copyParams); GenCmdList cmdList; ASSERT_TRUE(FamilyType::Parse::parseCommandBuffer(cmdList, cmdStream->getCpuBase(), cmdStream->getUsed())); @@ -4260,7 +4260,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, InOrderCmdListTests, givenInOrderModeWhenProgrammin constexpr size_t size = 128 * sizeof(uint32_t); auto data = allocHostMem(size); - immCmdList->appendMemoryFill(data, data, 1, size, nullptr, 0, nullptr, false); + immCmdList->appendMemoryFill(data, data, 1, size, nullptr, 0, nullptr, copyParams); GenCmdList cmdList; ASSERT_TRUE(FamilyType::Parse::parseCommandBuffer(cmdList, cmdStream->getCpuBase(), cmdStream->getUsed())); @@ -5934,8 +5934,8 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, InOrderCmdListTests, givenStandaloneEventWhenCallin auto immCmdList = createImmCmdList(); - immCmdList->appendMemoryFill(data, data, 1, size, eHandle1, 0, nullptr, false); - immCmdList->appendMemoryFill(data, data, 1, size, nullptr, 1, &eHandle2, false); + immCmdList->appendMemoryFill(data, data, 1, size, eHandle1, 0, nullptr, copyParams); + immCmdList->appendMemoryFill(data, data, 1, size, nullptr, 1, &eHandle2, copyParams); immCmdList->appendLaunchKernel(kernel->toHandle(), groupCount, eHandle3, 0, nullptr, launchParams, false); context->freeMem(data); @@ -5994,8 +5994,8 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, InOrderCmdListTests, givenStandaloneEventAndCopyOnl auto immCmdList = createCopyOnlyImmCmdList(); - immCmdList->appendMemoryFill(data, data, 1, size, eHandle1, 0, nullptr, false); - immCmdList->appendMemoryFill(data, data, 1, size, nullptr, 1, &eHandle2, false); + immCmdList->appendMemoryFill(data, data, 1, size, eHandle1, 0, nullptr, copyParams); + immCmdList->appendMemoryFill(data, data, 1, size, nullptr, 1, &eHandle2, copyParams); context->freeMem(data); zeEventDestroy(eHandle1); diff --git a/level_zero/core/test/unit_tests/sources/cmdlist/test_in_order_cmdlist_2.cpp b/level_zero/core/test/unit_tests/sources/cmdlist/test_in_order_cmdlist_2.cpp index 624231d381..6e1ddfaf23 100644 --- a/level_zero/core/test/unit_tests/sources/cmdlist/test_in_order_cmdlist_2.cpp +++ b/level_zero/core/test/unit_tests/sources/cmdlist/test_in_order_cmdlist_2.cpp @@ -427,6 +427,71 @@ HWTEST2_F(CopyOffloadInOrderTests, givenCopyOffloadEnabledWhenProgrammingHwCmdsT EXPECT_EQ(initialMainTaskCount, mainQueueCsr->taskCount); EXPECT_EQ(initialCopyTaskCount + 2, copyQueueCsr->taskCount); } + + auto data = allocHostMem(1); + { + auto offset = cmdStream->getUsed(); + + immCmdList->appendMemoryFill(data, data, 1, 1, nullptr, 0, nullptr, copyParams); + + GenCmdList cmdList; + ASSERT_TRUE(FamilyType::Parse::parseCommandBuffer(cmdList, + ptrOffset(cmdStream->getCpuBase(), offset), + (cmdStream->getUsed() - offset))); + + auto fillItor = findBltFillCmd(cmdList.begin(), cmdList.end()); + EXPECT_NE(cmdList.end(), fillItor); + + EXPECT_EQ(initialMainTaskCount, mainQueueCsr->taskCount); + EXPECT_EQ(initialCopyTaskCount + 3, copyQueueCsr->taskCount); + } + context->freeMem(data); +} + +HWTEST2_F(CopyOffloadInOrderTests, givenNonDualStreamOffloadWhenFillCalledThenSkipSycCommands, IsAtLeastXeHpCore) { + using MI_LOAD_REGISTER_REG = typename FamilyType::MI_LOAD_REGISTER_REG; + using MI_LOAD_REGISTER_IMM = typename FamilyType::MI_LOAD_REGISTER_IMM; + using MI_STORE_REGISTER_MEM = typename FamilyType::MI_STORE_REGISTER_MEM; + + debugManager.flags.OverrideCopyOffloadMode.set(nonDualStreamMode); + + auto immCmdList = createImmCmdListWithOffload(); + + auto eventPool = createEvents(1, true); + + auto cmdStream = immCmdList->getCmdContainer().getCommandStream(); + + auto data = allocHostMem(1); + + immCmdList->appendMemoryFill(data, data, 1, 1, events[0]->toHandle(), 0, nullptr, copyParams); + + GenCmdList cmdList; + ASSERT_TRUE(FamilyType::Parse::parseCommandBuffer(cmdList, cmdStream->getCpuBase(), cmdStream->getUsed())); + + auto miFlushCmds = findAll(cmdList.begin(), cmdList.end()); + EXPECT_EQ(0u, miFlushCmds.size()); + + auto lrrCmds = findAll(cmdList.begin(), cmdList.end()); + auto lriCmds = findAll(cmdList.begin(), cmdList.end()); + auto lrmCmds = findAll(cmdList.begin(), cmdList.end()); + + for (auto &lrr : lrrCmds) { + auto lrrCmd = genCmdCast(*lrr); + EXPECT_TRUE(lrrCmd->getSourceRegisterAddress() < RegisterOffsets::bcs0Base); + EXPECT_TRUE(lrrCmd->getDestinationRegisterAddress() < RegisterOffsets::bcs0Base); + } + + for (auto &lri : lriCmds) { + auto lriCmd = genCmdCast(*lri); + EXPECT_TRUE(lriCmd->getRegisterOffset() < RegisterOffsets::bcs0Base); + } + + for (auto &lrm : lrmCmds) { + auto lrmCmd = genCmdCast(*lrm); + EXPECT_TRUE(lrmCmd->getRegisterAddress() < RegisterOffsets::bcs0Base); + } + + context->freeMem(data); } HWTEST2_F(CopyOffloadInOrderTests, givenCopyOffloadEnabledAndD2DAllocWhenProgrammingHwCmdsThenDontUseCopyCommands, IsAtLeastXeHpCore) { @@ -1650,7 +1715,7 @@ HWTEST2_F(InOrderRegularCmdListTests, givenInOrderModeWhenDispatchingRegularCmdL regularCmdList->appendMemoryCopyRegion(data, ®ion, 1, 1, data, ®ion, 1, 1, nullptr, 0, nullptr, copyParams); - regularCmdList->appendMemoryFill(data, data, 1, size, nullptr, 0, nullptr, false); + regularCmdList->appendMemoryFill(data, data, 1, size, nullptr, 0, nullptr, copyParams); regularCmdList->appendSignalEvent(eventHandle, false); @@ -1667,7 +1732,7 @@ HWTEST2_F(InOrderRegularCmdListTests, givenInOrderModeWhenDispatchingRegularCmdL } offset = copyOnlyCmdStream->getUsed(); - regularCopyOnlyCmdList->appendMemoryFill(data, data, 1, size, nullptr, 0, nullptr, false); + regularCopyOnlyCmdList->appendMemoryFill(data, data, 1, size, nullptr, 0, nullptr, copyParams); { GenCmdList cmdList; @@ -2583,7 +2648,7 @@ HWTEST2_F(MultiTileSynchronizedDispatchTests, givenLimitedSyncDispatchWhenAppend EXPECT_TRUE(verifyTokenCheck(1)); offset = cmdStream->getUsed(); - immCmdList->appendMemoryFill(alloc, alloc, 2, 2, nullptr, 0, nullptr, false); + immCmdList->appendMemoryFill(alloc, alloc, 2, 2, nullptr, 0, nullptr, copyParams); EXPECT_TRUE(verifyTokenCheck(1)); offset = cmdStream->getUsed(); @@ -2937,8 +3002,8 @@ HWTEST2_F(MultiTileInOrderCmdListTests, givenStandaloneEventWhenCallingAppendThe auto immCmdList = createMultiTileImmCmdList(); - immCmdList->appendMemoryFill(data, data, 1, size, eHandle1, 0, nullptr, false); - immCmdList->appendMemoryFill(data, data, 1, size, nullptr, 1, &eHandle2, false); + immCmdList->appendMemoryFill(data, data, 1, size, eHandle1, 0, nullptr, copyParams); + immCmdList->appendMemoryFill(data, data, 1, size, nullptr, 1, &eHandle2, copyParams); immCmdList->appendLaunchKernel(kernel->toHandle(), groupCount, eHandle3, 0, nullptr, launchParams, false); context->freeMem(data); @@ -3018,8 +3083,8 @@ HWTEST2_F(MultiTileInOrderCmdListTests, givenStandaloneEventAndCopyOnlyCmdListWh auto immCmdList = createCopyOnlyImmCmdList(); - immCmdList->appendMemoryFill(data, data, 1, size, eHandle1, 0, nullptr, false); - immCmdList->appendMemoryFill(data, data, 1, size, nullptr, 1, &eHandle2, false); + immCmdList->appendMemoryFill(data, data, 1, size, eHandle1, 0, nullptr, copyParams); + immCmdList->appendMemoryFill(data, data, 1, size, nullptr, 1, &eHandle2, copyParams); context->freeMem(data); zeEventDestroy(eHandle1); diff --git a/level_zero/core/test/unit_tests/sources/context/test_context.cpp b/level_zero/core/test/unit_tests/sources/context/test_context.cpp index 601c102f83..4de258d43d 100644 --- a/level_zero/core/test/unit_tests/sources/context/test_context.cpp +++ b/level_zero/core/test/unit_tests/sources/context/test_context.cpp @@ -939,10 +939,10 @@ HWTEST_F(ContextMakeMemoryResidentAndMigrationTests, ze_host_mem_alloc_desc_t hostDesc = {}; result = context->allocSharedMem(device->toHandle(), &deviceDesc, &hostDesc, 16384u, 4090u, &dstBuffer); ASSERT_EQ(ZE_RESULT_SUCCESS, result); - + CmdListMemoryCopyParams copyParams = {}; int one = 1; result = commandList0->appendMemoryFill(dstBuffer, reinterpret_cast(&one), sizeof(one), 4090u, - nullptr, 0, nullptr, false); + nullptr, 0, nullptr, copyParams); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_EQ(mockPageFaultManager->moveAllocationToGpuDomainCalledTimes, 1u); @@ -998,10 +998,10 @@ HWTEST_F(ContextMakeMemoryResidentAndMigrationTests, NEO::EngineGroupType::compute, 0u, result, false)); - + CmdListMemoryCopyParams copyParams = {}; int one = 1; result = commandListRegular->appendMemoryFill(dstBuffer, reinterpret_cast(&one), sizeof(one), 4090u, - nullptr, 0, nullptr, false); + nullptr, 0, nullptr, copyParams); EXPECT_EQ(ZE_RESULT_SUCCESS, result); commandListRegular->close(); @@ -1072,10 +1072,10 @@ HWTEST_F(ContextMakeMemoryResidentAndMigrationTests, ze_host_mem_alloc_desc_t hostDesc = {}; result = context->allocHostMem(&hostDesc, 4096u, 0u, &dstBuffer); ASSERT_EQ(ZE_RESULT_SUCCESS, result); - + CmdListMemoryCopyParams copyParams = {}; int one = 1; result = commandList0->appendMemoryFill(dstBuffer, reinterpret_cast(&one), sizeof(one), 4090u, - nullptr, 0, nullptr, false); + nullptr, 0, nullptr, copyParams); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_EQ(mockPageFaultManager->moveAllocationToGpuDomainCalledTimes, 3u); diff --git a/level_zero/core/test/unit_tests/sources/debugger/test_l0_debugger_1.cpp b/level_zero/core/test/unit_tests/sources/debugger/test_l0_debugger_1.cpp index 1e4ecd4a2c..41f15c273e 100644 --- a/level_zero/core/test/unit_tests/sources/debugger/test_l0_debugger_1.cpp +++ b/level_zero/core/test/unit_tests/sources/debugger/test_l0_debugger_1.cpp @@ -477,8 +477,8 @@ HWTEST_F(L0DebuggerSimpleTest, givenUseCsrImmediateSubmissionEnabledForImmediate queueDesc.mode = ZE_COMMAND_QUEUE_MODE_SYNCHRONOUS; ze_result_t returnValue = ZE_RESULT_SUCCESS; auto commandList = CommandList::createImmediate(productFamily, device, &queueDesc, true, NEO::EngineGroupType::renderCompute, returnValue); - - result = commandList->appendMemoryFill(dstPtr, reinterpret_cast(&pattern), sizeof(pattern), 4096u, nullptr, 0, nullptr, false); + CmdListMemoryCopyParams copyParams = {}; + result = commandList->appendMemoryFill(dstPtr, reinterpret_cast(&pattern), sizeof(pattern), 4096u, nullptr, 0, nullptr, copyParams); ASSERT_EQ(ZE_RESULT_SUCCESS, result); context->freeMem(dstPtr); @@ -500,8 +500,8 @@ HWTEST_F(L0DebuggerSimpleTest, givenUseCsrImmediateSubmissionEnabledForImmediate queueDesc.mode = ZE_COMMAND_QUEUE_MODE_SYNCHRONOUS; ze_result_t returnValue = ZE_RESULT_SUCCESS; auto commandList = CommandList::createImmediate(productFamily, device, &queueDesc, true, NEO::EngineGroupType::renderCompute, returnValue); - - result = commandList->appendMemoryFill(dstPtr, reinterpret_cast(&pattern), sizeof(pattern), 4096u, nullptr, 0, nullptr, false); + CmdListMemoryCopyParams copyParams = {}; + result = commandList->appendMemoryFill(dstPtr, reinterpret_cast(&pattern), sizeof(pattern), 4096u, nullptr, 0, nullptr, copyParams); ASSERT_EQ(ZE_RESULT_SUCCESS, result); context->freeMem(dstPtr); @@ -523,8 +523,8 @@ HWTEST_F(L0DebuggerSimpleTest, givenUseCsrImmediateSubmissionDisabledForImmediat queueDesc.mode = ZE_COMMAND_QUEUE_MODE_SYNCHRONOUS; ze_result_t returnValue = ZE_RESULT_SUCCESS; auto commandList = CommandList::createImmediate(productFamily, device, &queueDesc, true, NEO::EngineGroupType::renderCompute, returnValue); - - result = commandList->appendMemoryFill(dstPtr, reinterpret_cast(&pattern), sizeof(pattern), 4096u, nullptr, 0, nullptr, false); + CmdListMemoryCopyParams copyParams = {}; + result = commandList->appendMemoryFill(dstPtr, reinterpret_cast(&pattern), sizeof(pattern), 4096u, nullptr, 0, nullptr, copyParams); ASSERT_EQ(ZE_RESULT_SUCCESS, result); context->freeMem(dstPtr); @@ -549,9 +549,9 @@ HWTEST_F(L0DebuggerSimpleTest, givenUseCsrImmediateSubmissionEnabledForRegularCo auto result = context->allocDeviceMem(device->toHandle(), &deviceDesc, 16384u, 4096u, &dstPtr); ASSERT_EQ(ZE_RESULT_SUCCESS, result); int pattern = 1; - + CmdListMemoryCopyParams copyParams = {}; auto commandList = CommandList::fromHandle(commandLists[0]); - result = commandList->appendMemoryFill(dstPtr, reinterpret_cast(&pattern), sizeof(pattern), 4096u, nullptr, 0, nullptr, false); + result = commandList->appendMemoryFill(dstPtr, reinterpret_cast(&pattern), sizeof(pattern), 4096u, nullptr, 0, nullptr, copyParams); ASSERT_EQ(ZE_RESULT_SUCCESS, result); commandList->close(); @@ -585,9 +585,9 @@ HWTEST_F(L0DebuggerSimpleTest, givenUseCsrImmediateSubmissionDisabledForRegularC auto result = context->allocSharedMem(device->toHandle(), &deviceDesc, &hostDesc, 16384u, 4096u, &dstPtr); ASSERT_EQ(ZE_RESULT_SUCCESS, result); int pattern = 1; - + CmdListMemoryCopyParams copyParams = {}; auto commandList = CommandList::fromHandle(commandLists[0]); - result = commandList->appendMemoryFill(dstPtr, reinterpret_cast(&pattern), sizeof(pattern), 4096u, nullptr, 0, nullptr, false); + result = commandList->appendMemoryFill(dstPtr, reinterpret_cast(&pattern), sizeof(pattern), 4096u, nullptr, 0, nullptr, copyParams); ASSERT_EQ(ZE_RESULT_SUCCESS, result); commandList->close(); diff --git a/level_zero/core/test/unit_tests/sources/memory/test_memory.cpp b/level_zero/core/test/unit_tests/sources/memory/test_memory.cpp index 945d699d5c..daff18083f 100644 --- a/level_zero/core/test/unit_tests/sources/memory/test_memory.cpp +++ b/level_zero/core/test/unit_tests/sources/memory/test_memory.cpp @@ -4056,9 +4056,9 @@ HWTEST2_F(MultipleDevicePeerAllocationTest, auto commandList = std::make_unique<::L0::ult::CommandListCoreFamily>(); commandList->initialize(device1, NEO::EngineGroupType::renderCompute, 0u); - + CmdListMemoryCopyParams copyParams; char pattern = 'a'; - result = commandList->appendBlitFill(ptr, &pattern, sizeof(pattern), size, nullptr, 0, nullptr, false); + result = commandList->appendBlitFill(ptr, &pattern, sizeof(pattern), size, nullptr, 0, nullptr, copyParams); EXPECT_EQ(result, ZE_RESULT_ERROR_INVALID_ARGUMENT); result = context->freeMem(ptr); @@ -4082,9 +4082,9 @@ HWTEST2_F(MultipleDevicePeerAllocationTest, auto commandList = std::make_unique<::L0::ult::CommandListCoreFamily>(); commandList->initialize(device0, NEO::EngineGroupType::renderCompute, 0u); - + CmdListMemoryCopyParams copyParams; char pattern = 'a'; - result = commandList->appendBlitFill(ptr, &pattern, sizeof(pattern), size, nullptr, 0, nullptr, false); + result = commandList->appendBlitFill(ptr, &pattern, sizeof(pattern), size, nullptr, 0, nullptr, copyParams); EXPECT_EQ(result, ZE_RESULT_SUCCESS); result = context->freeMem(ptr); @@ -4109,9 +4109,9 @@ HWTEST2_F(MultipleDevicePeerAllocationTest, auto commandList = std::make_unique<::L0::ult::CommandListCoreFamily>(); commandList->initialize(device1, NEO::EngineGroupType::renderCompute, 0u); - + CmdListMemoryCopyParams copyParams; char pattern = 'a'; - result = commandList->appendBlitFill(ptr, &pattern, sizeof(pattern), size, nullptr, 0, nullptr, false); + result = commandList->appendBlitFill(ptr, &pattern, sizeof(pattern), size, nullptr, 0, nullptr, copyParams); EXPECT_EQ(result, ZE_RESULT_SUCCESS); result = context->freeMem(ptr); @@ -4140,9 +4140,9 @@ HWTEST2_F(MultipleDevicePeerAllocationTest, auto commandList = std::make_unique<::L0::ult::CommandListCoreFamily>(); commandList->initialize(device1, NEO::EngineGroupType::renderCompute, 0u); - + CmdListMemoryCopyParams copyParams; char pattern = 'a'; - result = commandList->appendBlitFill(ptr, &pattern, sizeof(pattern), size, nullptr, 0, nullptr, false); + result = commandList->appendBlitFill(ptr, &pattern, sizeof(pattern), size, nullptr, 0, nullptr, copyParams); EXPECT_EQ(result, ZE_RESULT_SUCCESS); result = context->freeMem(ptr); @@ -4167,9 +4167,9 @@ HWTEST2_F(MultipleDevicePeerAllocationTest, auto commandList = std::make_unique<::L0::ult::CommandListCoreFamily>(); commandList->initialize(device0, NEO::EngineGroupType::renderCompute, 0u); - + CmdListMemoryCopyParams copyParams; char pattern = 'a'; - result = commandList->appendBlitFill(ptr, &pattern, sizeof(pattern), size, nullptr, 0, nullptr, false); + result = commandList->appendBlitFill(ptr, &pattern, sizeof(pattern), size, nullptr, 0, nullptr, copyParams); EXPECT_EQ(result, ZE_RESULT_SUCCESS); result = context->freeMem(ptr); @@ -4186,9 +4186,9 @@ HWTEST2_F(MultipleDevicePeerAllocationTest, auto commandList = std::make_unique<::L0::ult::CommandListCoreFamily>(); commandList->initialize(device0, NEO::EngineGroupType::renderCompute, 0u); - + CmdListMemoryCopyParams copyParams; char pattern = 'a'; - ze_result_t result = commandList->appendBlitFill(ptr, &pattern, sizeof(pattern), size, nullptr, 0, nullptr, false); + ze_result_t result = commandList->appendBlitFill(ptr, &pattern, sizeof(pattern), size, nullptr, 0, nullptr, copyParams); EXPECT_EQ(result, ZE_RESULT_ERROR_INVALID_ARGUMENT); delete[] ptr;