diff --git a/level_zero/api/core/ze_copy.cpp b/level_zero/api/core/ze_copy.cpp index 6bc981daf9..7109299986 100644 --- a/level_zero/api/core/ze_copy.cpp +++ b/level_zero/api/core/ze_copy.cpp @@ -14,10 +14,10 @@ zeCommandListAppendMemoryCopy( void *dstptr, const void *srcptr, size_t size, - ze_event_handle_t hEvent, + ze_event_handle_t hSignalEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) { - return L0::CommandList::fromHandle(hCommandList)->appendMemoryCopy(dstptr, srcptr, size, hEvent, 0, nullptr); + return L0::CommandList::fromHandle(hCommandList)->appendMemoryCopy(dstptr, srcptr, size, hSignalEvent, numWaitEvents, phWaitEvents); } ZE_APIEXPORT ze_result_t ZE_APICALL @@ -55,10 +55,10 @@ zeCommandListAppendImageCopy( ze_command_list_handle_t hCommandList, ze_image_handle_t hDstImage, ze_image_handle_t hSrcImage, - ze_event_handle_t hEvent, + ze_event_handle_t hSignalEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) { - return L0::CommandList::fromHandle(hCommandList)->appendImageCopy(hDstImage, hSrcImage, hEvent, 0, nullptr); + return L0::CommandList::fromHandle(hCommandList)->appendImageCopy(hDstImage, hSrcImage, hSignalEvent, numWaitEvents, phWaitEvents); } ZE_APIEXPORT ze_result_t ZE_APICALL @@ -68,10 +68,10 @@ zeCommandListAppendImageCopyRegion( ze_image_handle_t hSrcImage, const ze_image_region_t *pDstRegion, const ze_image_region_t *pSrcRegion, - ze_event_handle_t hEvent, + ze_event_handle_t hSignalEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) { - return L0::CommandList::fromHandle(hCommandList)->appendImageCopyRegion(hDstImage, hSrcImage, pDstRegion, pSrcRegion, hEvent, 0, nullptr); + return L0::CommandList::fromHandle(hCommandList)->appendImageCopyRegion(hDstImage, hSrcImage, pDstRegion, pSrcRegion, hSignalEvent, numWaitEvents, phWaitEvents); } ZE_APIEXPORT ze_result_t ZE_APICALL @@ -80,10 +80,10 @@ zeCommandListAppendImageCopyToMemory( void *dstptr, ze_image_handle_t hSrcImage, const ze_image_region_t *pSrcRegion, - ze_event_handle_t hEvent, + ze_event_handle_t hSignalEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) { - return L0::CommandList::fromHandle(hCommandList)->appendImageCopyToMemory(dstptr, hSrcImage, pSrcRegion, hEvent, numWaitEvents, phWaitEvents); + return L0::CommandList::fromHandle(hCommandList)->appendImageCopyToMemory(dstptr, hSrcImage, pSrcRegion, hSignalEvent, numWaitEvents, phWaitEvents); } ZE_APIEXPORT ze_result_t ZE_APICALL @@ -92,10 +92,10 @@ zeCommandListAppendImageCopyFromMemory( ze_image_handle_t hDstImage, const void *srcptr, const ze_image_region_t *pDstRegion, - ze_event_handle_t hEvent, + ze_event_handle_t hSignalEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) { - return L0::CommandList::fromHandle(hCommandList)->appendImageCopyFromMemory(hDstImage, srcptr, pDstRegion, hEvent, numWaitEvents, phWaitEvents); + return L0::CommandList::fromHandle(hCommandList)->appendImageCopyFromMemory(hDstImage, srcptr, pDstRegion, hSignalEvent, numWaitEvents, phWaitEvents); } ZE_APIEXPORT ze_result_t ZE_APICALL diff --git a/level_zero/core/source/cmdlist/cmdlist_hw.h b/level_zero/core/source/cmdlist/cmdlist_hw.h index ef21872c1e..c076c176fe 100644 --- a/level_zero/core/source/cmdlist/cmdlist_hw.h +++ b/level_zero/core/source/cmdlist/cmdlist_hw.h @@ -147,8 +147,7 @@ struct CommandListCoreFamily : CommandListImp { uint64_t dstOffset, uintptr_t srcPtr, NEO::GraphicsAllocation *srcPtrAlloc, uint64_t srcOffset, - uint32_t size, - ze_event_handle_t hSignalEvent); + uint32_t size); MOCKABLE_VIRTUAL ze_result_t appendMemoryCopyBlitRegion(NEO::GraphicsAllocation *srcAlloc, NEO::GraphicsAllocation *dstAlloc, diff --git a/level_zero/core/source/cmdlist/cmdlist_hw.inl b/level_zero/core/source/cmdlist/cmdlist_hw.inl index 3db0869418..b67230e9e6 100644 --- a/level_zero/core/source/cmdlist/cmdlist_hw.inl +++ b/level_zero/core/source/cmdlist/cmdlist_hw.inl @@ -688,8 +688,7 @@ ze_result_t CommandListCoreFamily::appendMemoryCopyBlit(uintptr_t uint64_t dstOffset, uintptr_t srcPtr, NEO::GraphicsAllocation *srcPtrAlloc, uint64_t srcOffset, - uint32_t size, - ze_event_handle_t hSignalEvent) { + uint32_t size) { dstOffset += ptrDiff(dstPtr, dstPtrAlloc->getGpuAddress()); srcOffset += ptrDiff(srcPtr, srcPtrAlloc->getGpuAddress()); using GfxFamily = typename NEO::GfxFamilyMapper::GfxFamily; @@ -840,14 +839,17 @@ ze_result_t CommandListCoreFamily::appendMemoryCopy(void *dstptr, auto dstAllocationStruct = getAlignedAllocation(this->device, dstptr, size); auto srcAllocationStruct = getAlignedAllocation(this->device, srcptr, size); - ze_result_t ret = ZE_RESULT_SUCCESS; + ze_result_t ret = addEventsToCmdList(hSignalEvent, numWaitEvents, phWaitEvents); + + if (ret) { + return ret; + } - appendEventForProfiling(hSignalEvent, true); if (ret == ZE_RESULT_SUCCESS && leftSize) { ret = isCopyOnly() ? appendMemoryCopyBlit(dstAllocationStruct.alignedAllocationPtr, dstAllocationStruct.alloc, dstAllocationStruct.offset, srcAllocationStruct.alignedAllocationPtr, - srcAllocationStruct.alloc, srcAllocationStruct.offset, static_cast(leftSize), hSignalEvent) + srcAllocationStruct.alloc, srcAllocationStruct.offset, static_cast(leftSize)) : appendMemoryCopyKernelWithGA(reinterpret_cast(&dstAllocationStruct.alignedAllocationPtr), dstAllocationStruct.alloc, dstAllocationStruct.offset, reinterpret_cast(&srcAllocationStruct.alignedAllocationPtr), @@ -860,7 +862,7 @@ ze_result_t CommandListCoreFamily::appendMemoryCopy(void *dstptr, ret = isCopyOnly() ? appendMemoryCopyBlit(dstAllocationStruct.alignedAllocationPtr, dstAllocationStruct.alloc, leftSize + dstAllocationStruct.offset, srcAllocationStruct.alignedAllocationPtr, - srcAllocationStruct.alloc, leftSize + srcAllocationStruct.offset, static_cast(middleSizeBytes), hSignalEvent) + srcAllocationStruct.alloc, leftSize + srcAllocationStruct.offset, static_cast(middleSizeBytes)) : appendMemoryCopyKernelWithGA(reinterpret_cast(&dstAllocationStruct.alignedAllocationPtr), dstAllocationStruct.alloc, leftSize + dstAllocationStruct.offset, reinterpret_cast(&srcAllocationStruct.alignedAllocationPtr), @@ -874,7 +876,7 @@ ze_result_t CommandListCoreFamily::appendMemoryCopy(void *dstptr, ret = isCopyOnly() ? appendMemoryCopyBlit(dstAllocationStruct.alignedAllocationPtr, dstAllocationStruct.alloc, leftSize + middleSizeBytes + dstAllocationStruct.offset, srcAllocationStruct.alignedAllocationPtr, - srcAllocationStruct.alloc, leftSize + middleSizeBytes + srcAllocationStruct.offset, static_cast(rightSize), hSignalEvent) + srcAllocationStruct.alloc, leftSize + middleSizeBytes + srcAllocationStruct.offset, static_cast(rightSize)) : appendMemoryCopyKernelWithGA(reinterpret_cast(&dstAllocationStruct.alignedAllocationPtr), dstAllocationStruct.alloc, leftSize + middleSizeBytes + dstAllocationStruct.offset, reinterpret_cast(&srcAllocationStruct.alignedAllocationPtr), diff --git a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_2.cpp b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_2.cpp index 40c51d427a..d5668171e5 100644 --- a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_2.cpp +++ b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_2.cpp @@ -51,8 +51,7 @@ class MockCommandListHw : public WhiteBox<::L0::CommandListCoreFamilygetRegisterAddress(), GP_THREAD_TIME_REG_ADDRESS_OFFSET_LOW); } +HWTEST2_F(CommandListCreate, givenCommandListWhenMemoryCopyWithSignalEventsThenSemaphoreWaitAndPipeControlAreFound, Platforms) { + using SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT; + using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; + + ze_result_t result = ZE_RESULT_SUCCESS; + std::unique_ptr commandList(CommandList::create(productFamily, device, NEO::EngineGroupType::RenderCompute, result)); + auto &commandContainer = commandList->commandContainer; + + void *srcPtr = reinterpret_cast(0x1234); + void *dstPtr = reinterpret_cast(0x2345); + + ze_event_pool_desc_t eventPoolDesc = {}; + eventPoolDesc.count = 2; + auto eventPool = std::unique_ptr(L0::EventPool::create(driverHandle.get(), 0, nullptr, &eventPoolDesc)); + + std::vector events; + + ze_event_desc_t eventDesc = {}; + eventDesc.index = 0; + eventDesc.wait = ZE_EVENT_SCOPE_FLAG_HOST; + auto event = std::unique_ptr(L0::Event::create(eventPool.get(), &eventDesc, device)); + events.push_back(event.get()); + eventDesc.index = 1; + auto event1 = std::unique_ptr(L0::Event::create(eventPool.get(), &eventDesc, device)); + events.push_back(event1.get()); + + result = commandList->appendMemoryCopy(dstPtr, srcPtr, 0x1001, nullptr, 2u, events.data()); + EXPECT_EQ(ZE_RESULT_SUCCESS, result); + + GenCmdList cmdList; + ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer( + cmdList, ptrOffset(commandContainer.getCommandStream()->getCpuBase(), 0), commandContainer.getCommandStream()->getUsed())); + + auto itor = find(cmdList.begin(), cmdList.end()); + EXPECT_NE(cmdList.end(), itor); + itor++; + itor = find(itor, cmdList.end()); + EXPECT_NE(cmdList.end(), itor); + itor++; + itor = find(itor, cmdList.end()); + EXPECT_NE(cmdList.end(), itor); +} + using ImageSupport = IsWithinProducts; HWTEST2_F(CommandListCreate, givenCopyCommandListWhenCopyFromMemoryToImageThenBlitImageCopyCalled, ImageSupport) { @@ -970,7 +1012,7 @@ class MockCommandListForMemFill : public WhiteBox<::L0::CommandListCoreFamily(dstPtr), 0x1000, 0, sizeof(uint32_t), MemoryPool::System4KBPages); - commandList->appendMemoryCopyBlit(ptrOffset(dstPtr, dstOffset), &mockAllocationDst, 0, ptrOffset(srcPtr, srcOffset), &mockAllocationSrc, 0, copySize, nullptr); + commandList->appendMemoryCopyBlit(ptrOffset(dstPtr, dstOffset), &mockAllocationDst, 0, ptrOffset(srcPtr, srcOffset), &mockAllocationSrc, 0, copySize); auto &commandContainer = commandList->commandContainer; GenCmdList genCmdList;