Add wait events to appendMemoryCopy

appendMemoryCopy has wait events since v1.0, so add the corresponding
support. Other copy operations were missing passing this
in the entry points, but already had support internally.

Also, rename some variables for consistency.

Also, remove signal event from appendMemoryCopyBlit since it is not
used.

Signed-off-by: Jaime Arteaga <jaime.a.arteaga.molina@intel.com>
This commit is contained in:
Jaime Arteaga
2020-11-06 17:24:31 -08:00
committed by Compute-Runtime-Automation
parent 40d5cc71c9
commit 4c5ff75371
5 changed files with 67 additions and 25 deletions

View File

@ -14,10 +14,10 @@ zeCommandListAppendMemoryCopy(
void *dstptr,
const void *srcptr,
size_t size,
ze_event_handle_t hEvent,
ze_event_handle_t hSignalEvent,
uint32_t numWaitEvents,
ze_event_handle_t *phWaitEvents) {
return L0::CommandList::fromHandle(hCommandList)->appendMemoryCopy(dstptr, srcptr, size, hEvent, 0, nullptr);
return L0::CommandList::fromHandle(hCommandList)->appendMemoryCopy(dstptr, srcptr, size, hSignalEvent, numWaitEvents, phWaitEvents);
}
ZE_APIEXPORT ze_result_t ZE_APICALL
@ -55,10 +55,10 @@ zeCommandListAppendImageCopy(
ze_command_list_handle_t hCommandList,
ze_image_handle_t hDstImage,
ze_image_handle_t hSrcImage,
ze_event_handle_t hEvent,
ze_event_handle_t hSignalEvent,
uint32_t numWaitEvents,
ze_event_handle_t *phWaitEvents) {
return L0::CommandList::fromHandle(hCommandList)->appendImageCopy(hDstImage, hSrcImage, hEvent, 0, nullptr);
return L0::CommandList::fromHandle(hCommandList)->appendImageCopy(hDstImage, hSrcImage, hSignalEvent, numWaitEvents, phWaitEvents);
}
ZE_APIEXPORT ze_result_t ZE_APICALL
@ -68,10 +68,10 @@ zeCommandListAppendImageCopyRegion(
ze_image_handle_t hSrcImage,
const ze_image_region_t *pDstRegion,
const ze_image_region_t *pSrcRegion,
ze_event_handle_t hEvent,
ze_event_handle_t hSignalEvent,
uint32_t numWaitEvents,
ze_event_handle_t *phWaitEvents) {
return L0::CommandList::fromHandle(hCommandList)->appendImageCopyRegion(hDstImage, hSrcImage, pDstRegion, pSrcRegion, hEvent, 0, nullptr);
return L0::CommandList::fromHandle(hCommandList)->appendImageCopyRegion(hDstImage, hSrcImage, pDstRegion, pSrcRegion, hSignalEvent, numWaitEvents, phWaitEvents);
}
ZE_APIEXPORT ze_result_t ZE_APICALL
@ -80,10 +80,10 @@ zeCommandListAppendImageCopyToMemory(
void *dstptr,
ze_image_handle_t hSrcImage,
const ze_image_region_t *pSrcRegion,
ze_event_handle_t hEvent,
ze_event_handle_t hSignalEvent,
uint32_t numWaitEvents,
ze_event_handle_t *phWaitEvents) {
return L0::CommandList::fromHandle(hCommandList)->appendImageCopyToMemory(dstptr, hSrcImage, pSrcRegion, hEvent, numWaitEvents, phWaitEvents);
return L0::CommandList::fromHandle(hCommandList)->appendImageCopyToMemory(dstptr, hSrcImage, pSrcRegion, hSignalEvent, numWaitEvents, phWaitEvents);
}
ZE_APIEXPORT ze_result_t ZE_APICALL
@ -92,10 +92,10 @@ zeCommandListAppendImageCopyFromMemory(
ze_image_handle_t hDstImage,
const void *srcptr,
const ze_image_region_t *pDstRegion,
ze_event_handle_t hEvent,
ze_event_handle_t hSignalEvent,
uint32_t numWaitEvents,
ze_event_handle_t *phWaitEvents) {
return L0::CommandList::fromHandle(hCommandList)->appendImageCopyFromMemory(hDstImage, srcptr, pDstRegion, hEvent, numWaitEvents, phWaitEvents);
return L0::CommandList::fromHandle(hCommandList)->appendImageCopyFromMemory(hDstImage, srcptr, pDstRegion, hSignalEvent, numWaitEvents, phWaitEvents);
}
ZE_APIEXPORT ze_result_t ZE_APICALL

View File

@ -147,8 +147,7 @@ struct CommandListCoreFamily : CommandListImp {
uint64_t dstOffset, uintptr_t srcPtr,
NEO::GraphicsAllocation *srcPtrAlloc,
uint64_t srcOffset,
uint32_t size,
ze_event_handle_t hSignalEvent);
uint32_t size);
MOCKABLE_VIRTUAL ze_result_t appendMemoryCopyBlitRegion(NEO::GraphicsAllocation *srcAlloc,
NEO::GraphicsAllocation *dstAlloc,

View File

@ -688,8 +688,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryCopyBlit(uintptr_t
uint64_t dstOffset, uintptr_t srcPtr,
NEO::GraphicsAllocation *srcPtrAlloc,
uint64_t srcOffset,
uint32_t size,
ze_event_handle_t hSignalEvent) {
uint32_t size) {
dstOffset += ptrDiff<uintptr_t>(dstPtr, dstPtrAlloc->getGpuAddress());
srcOffset += ptrDiff<uintptr_t>(srcPtr, srcPtrAlloc->getGpuAddress());
using GfxFamily = typename NEO::GfxFamilyMapper<gfxCoreFamily>::GfxFamily;
@ -840,14 +839,17 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryCopy(void *dstptr,
auto dstAllocationStruct = getAlignedAllocation(this->device, dstptr, size);
auto srcAllocationStruct = getAlignedAllocation(this->device, srcptr, size);
ze_result_t ret = ZE_RESULT_SUCCESS;
ze_result_t ret = addEventsToCmdList(hSignalEvent, numWaitEvents, phWaitEvents);
if (ret) {
return ret;
}
appendEventForProfiling(hSignalEvent, true);
if (ret == ZE_RESULT_SUCCESS && leftSize) {
ret = isCopyOnly() ? appendMemoryCopyBlit(dstAllocationStruct.alignedAllocationPtr,
dstAllocationStruct.alloc, dstAllocationStruct.offset,
srcAllocationStruct.alignedAllocationPtr,
srcAllocationStruct.alloc, srcAllocationStruct.offset, static_cast<uint32_t>(leftSize), hSignalEvent)
srcAllocationStruct.alloc, srcAllocationStruct.offset, static_cast<uint32_t>(leftSize))
: appendMemoryCopyKernelWithGA(reinterpret_cast<void *>(&dstAllocationStruct.alignedAllocationPtr),
dstAllocationStruct.alloc, dstAllocationStruct.offset,
reinterpret_cast<void *>(&srcAllocationStruct.alignedAllocationPtr),
@ -860,7 +862,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryCopy(void *dstptr,
ret = isCopyOnly() ? appendMemoryCopyBlit(dstAllocationStruct.alignedAllocationPtr,
dstAllocationStruct.alloc, leftSize + dstAllocationStruct.offset,
srcAllocationStruct.alignedAllocationPtr,
srcAllocationStruct.alloc, leftSize + srcAllocationStruct.offset, static_cast<uint32_t>(middleSizeBytes), hSignalEvent)
srcAllocationStruct.alloc, leftSize + srcAllocationStruct.offset, static_cast<uint32_t>(middleSizeBytes))
: appendMemoryCopyKernelWithGA(reinterpret_cast<void *>(&dstAllocationStruct.alignedAllocationPtr),
dstAllocationStruct.alloc, leftSize + dstAllocationStruct.offset,
reinterpret_cast<void *>(&srcAllocationStruct.alignedAllocationPtr),
@ -874,7 +876,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryCopy(void *dstptr,
ret = isCopyOnly() ? appendMemoryCopyBlit(dstAllocationStruct.alignedAllocationPtr,
dstAllocationStruct.alloc, leftSize + middleSizeBytes + dstAllocationStruct.offset,
srcAllocationStruct.alignedAllocationPtr,
srcAllocationStruct.alloc, leftSize + middleSizeBytes + srcAllocationStruct.offset, static_cast<uint32_t>(rightSize), hSignalEvent)
srcAllocationStruct.alloc, leftSize + middleSizeBytes + srcAllocationStruct.offset, static_cast<uint32_t>(rightSize))
: appendMemoryCopyKernelWithGA(reinterpret_cast<void *>(&dstAllocationStruct.alignedAllocationPtr),
dstAllocationStruct.alloc, leftSize + middleSizeBytes + dstAllocationStruct.offset,
reinterpret_cast<void *>(&srcAllocationStruct.alignedAllocationPtr),

View File

@ -51,8 +51,7 @@ class MockCommandListHw : public WhiteBox<::L0::CommandListCoreFamily<gfxCoreFam
uint64_t dstOffset, uintptr_t srcPtr,
NEO::GraphicsAllocation *srcPtrAlloc,
uint64_t srcOffset,
uint32_t size,
ze_event_handle_t hSignalEvent) override {
uint32_t size) override {
appendMemoryCopyBlitCalledTimes++;
return ZE_RESULT_SUCCESS;
}
@ -506,6 +505,49 @@ HWTEST2_F(CommandListCreate, givenCommandListWhenTimestampPassedToMemoryCopyThen
EXPECT_EQ(cmd->getRegisterAddress(), GP_THREAD_TIME_REG_ADDRESS_OFFSET_LOW);
}
HWTEST2_F(CommandListCreate, givenCommandListWhenMemoryCopyWithSignalEventsThenSemaphoreWaitAndPipeControlAreFound, Platforms) {
using SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT;
using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL;
ze_result_t result = ZE_RESULT_SUCCESS;
std::unique_ptr<L0::CommandList> commandList(CommandList::create(productFamily, device, NEO::EngineGroupType::RenderCompute, result));
auto &commandContainer = commandList->commandContainer;
void *srcPtr = reinterpret_cast<void *>(0x1234);
void *dstPtr = reinterpret_cast<void *>(0x2345);
ze_event_pool_desc_t eventPoolDesc = {};
eventPoolDesc.count = 2;
auto eventPool = std::unique_ptr<L0::EventPool>(L0::EventPool::create(driverHandle.get(), 0, nullptr, &eventPoolDesc));
std::vector<ze_event_handle_t> events;
ze_event_desc_t eventDesc = {};
eventDesc.index = 0;
eventDesc.wait = ZE_EVENT_SCOPE_FLAG_HOST;
auto event = std::unique_ptr<L0::Event>(L0::Event::create(eventPool.get(), &eventDesc, device));
events.push_back(event.get());
eventDesc.index = 1;
auto event1 = std::unique_ptr<L0::Event>(L0::Event::create(eventPool.get(), &eventDesc, device));
events.push_back(event1.get());
result = commandList->appendMemoryCopy(dstPtr, srcPtr, 0x1001, nullptr, 2u, events.data());
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
GenCmdList cmdList;
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(
cmdList, ptrOffset(commandContainer.getCommandStream()->getCpuBase(), 0), commandContainer.getCommandStream()->getUsed()));
auto itor = find<SEMAPHORE_WAIT *>(cmdList.begin(), cmdList.end());
EXPECT_NE(cmdList.end(), itor);
itor++;
itor = find<SEMAPHORE_WAIT *>(itor, cmdList.end());
EXPECT_NE(cmdList.end(), itor);
itor++;
itor = find<PIPE_CONTROL *>(itor, cmdList.end());
EXPECT_NE(cmdList.end(), itor);
}
using ImageSupport = IsWithinProducts<IGFX_SKYLAKE, IGFX_TIGERLAKE_LP>;
HWTEST2_F(CommandListCreate, givenCopyCommandListWhenCopyFromMemoryToImageThenBlitImageCopyCalled, ImageSupport) {
@ -970,7 +1012,7 @@ class MockCommandListForMemFill : public WhiteBox<::L0::CommandListCoreFamily<gf
ze_result_t appendMemoryCopyBlit(NEO::GraphicsAllocation *dstPtrAlloc,
uint64_t dstOffset,
NEO::GraphicsAllocation *srcPtrAlloc,
uint64_t srcOffset, uint32_t size, ze_event_handle_t hSignalEvent) override {
uint64_t srcOffset, uint32_t size) override {
appendMemoryCopyBlitCalledTimes++;
return ZE_RESULT_SUCCESS;
}

View File

@ -33,8 +33,7 @@ class MockCommandListForMemFill : public WhiteBox<::L0::CommandListCoreFamily<gf
uint64_t dstOffset, uintptr_t srcPtr,
NEO::GraphicsAllocation *srcPtrAlloc,
uint64_t srcOffset,
uint32_t size,
ze_event_handle_t hSignalEvent) override {
uint32_t size) override {
appendMemoryCopyBlitCalledTimes++;
return ZE_RESULT_SUCCESS;
}
@ -288,7 +287,7 @@ HWTEST2_F(AppendMemoryCopy, givenCopyOnlyCommandListWhenWithDcFlushAddedIsNotAdd
NEO::MockGraphicsAllocation mockAllocationDst(0, NEO::GraphicsAllocation::AllocationType::INTERNAL_HOST_MEMORY,
reinterpret_cast<void *>(dstPtr), 0x1000, 0, sizeof(uint32_t),
MemoryPool::System4KBPages);
commandList->appendMemoryCopyBlit(ptrOffset(dstPtr, dstOffset), &mockAllocationDst, 0, ptrOffset(srcPtr, srcOffset), &mockAllocationSrc, 0, copySize, nullptr);
commandList->appendMemoryCopyBlit(ptrOffset(dstPtr, dstOffset), &mockAllocationDst, 0, ptrOffset(srcPtr, srcOffset), &mockAllocationSrc, 0, copySize);
auto &commandContainer = commandList->commandContainer;
GenCmdList genCmdList;