diff --git a/level_zero/core/source/cmdlist/cmdlist_hw.h b/level_zero/core/source/cmdlist/cmdlist_hw.h index 5977a93d3e..f97f29c4e4 100644 --- a/level_zero/core/source/cmdlist/cmdlist_hw.h +++ b/level_zero/core/source/cmdlist/cmdlist_hw.h @@ -133,7 +133,8 @@ struct CommandListCoreFamily : CommandListImp { MOCKABLE_VIRTUAL ze_result_t appendMemoryCopyBlit(NEO::GraphicsAllocation *dstPtrAlloc, uint64_t dstOffset, NEO::GraphicsAllocation *srcPtrAlloc, - uint64_t srcOffset, uint32_t size); + uint64_t srcOffset, uint32_t size, + ze_event_handle_t hSignalEvent); MOCKABLE_VIRTUAL ze_result_t appendMemoryCopyBlitRegion(NEO::GraphicsAllocation *srcAlloc, NEO::GraphicsAllocation *dstAlloc, @@ -141,7 +142,7 @@ struct CommandListCoreFamily : CommandListImp { ze_copy_region_t dstRegion, Vec3 copySize, size_t srcRowPitch, size_t srcSlicePitch, size_t dstRowPitch, size_t dstSlicePitch, - size_t srcSize, size_t dstSize); + size_t srcSize, size_t dstSize, ze_event_handle_t hSignalEvent); MOCKABLE_VIRTUAL ze_result_t appendMemoryCopyKernel2d(NEO::GraphicsAllocation *dstAlloc, NEO::GraphicsAllocation *srcAlloc, Builtin builtin, const ze_copy_region_t *dstRegion, @@ -168,7 +169,7 @@ struct CommandListCoreFamily : CommandListImp { size_t srcRowPitch, size_t srcSlicePitch, size_t dstRowPitch, size_t dstSlicePitch, size_t bytesPerPixel, Vec3 copySize, - Vec3 srcSize, Vec3 dstSize); + Vec3 srcSize, Vec3 dstSize, ze_event_handle_t hSignalEvent); ze_result_t appendLaunchKernelWithParams(ze_kernel_handle_t hKernel, const ze_group_count_t *pThreadGroupDimensions, @@ -183,6 +184,7 @@ struct CommandListCoreFamily : CommandListImp { ze_result_t setGlobalWorkSizeIndirect(NEO::CrossThreadDataOffset offsets[3], void *crossThreadAddress, uint32_t lws[3]); void appendEventForProfiling(ze_event_handle_t hEvent, bool beforeWalker); + void appendEventForProfilingCopyCommand(ze_event_handle_t hEvent, bool beforeWalker); void appendSignalEventPostWalker(ze_event_handle_t hEvent); bool useMemCopyToBlitFill(size_t patternSize); diff --git a/level_zero/core/source/cmdlist/cmdlist_hw.inl b/level_zero/core/source/cmdlist/cmdlist_hw.inl index 9fe7a2f2ad..35a0ee677c 100644 --- a/level_zero/core/source/cmdlist/cmdlist_hw.inl +++ b/level_zero/core/source/cmdlist/cmdlist_hw.inl @@ -125,9 +125,7 @@ ze_result_t CommandListCoreFamily::appendLaunchKernelIndirect(ze_ ze_result_t ret = appendLaunchKernelWithParams(hKernel, pDispatchArgumentsBuffer, nullptr, true, false); - if (hEvent) { - appendSignalEventPostWalker(hEvent); - } + appendSignalEventPostWalker(hEvent); return ret; } @@ -160,9 +158,7 @@ ze_result_t CommandListCoreFamily::appendLaunchMultipleKernelsInd } } - if (hEvent) { - appendSignalEventPostWalker(hEvent); - } + appendSignalEventPostWalker(hEvent); return ZE_RESULT_SUCCESS; } @@ -204,9 +200,7 @@ ze_result_t CommandListCoreFamily::appendBarrier(ze_event_handle_ NEO::MemorySynchronizationCommands::addPipeControl(*commandContainer.getCommandStream(), args); } - if (hSignalEvent) { - this->appendSignalEventPostWalker(hSignalEvent); - } + appendSignalEventPostWalker(hSignalEvent); return ZE_RESULT_SUCCESS; } @@ -225,9 +219,7 @@ ze_result_t CommandListCoreFamily::appendMemoryRangesBarrier(uint applyMemoryRangesBarrier(numRanges, pRangeSizes, pRanges); - if (hSignalEvent) { - this->appendSignalEventPostWalker(hSignalEvent); - } + this->appendSignalEventPostWalker(hSignalEvent); if (this->cmdListType == CommandListType::TYPE_IMMEDIATE) { executeCommandListImmediate(true); @@ -273,7 +265,7 @@ ze_result_t CommandListCoreFamily::appendImageCopyFromMemory(ze_i if (isCopyOnlyCmdList) { return appendCopyImageBlit(allocationStruct.alloc, image->getAllocation(), {0, 0, 0}, {pDstRegion->originX, pDstRegion->originY, pDstRegion->originZ}, rowPitch, slicePitch, - rowPitch, slicePitch, bytesPerPixel, {pDstRegion->width, pDstRegion->height, pDstRegion->depth}, {pDstRegion->width, pDstRegion->height, pDstRegion->depth}, imgSize); + rowPitch, slicePitch, bytesPerPixel, {pDstRegion->width, pDstRegion->height, pDstRegion->depth}, {pDstRegion->width, pDstRegion->height, pDstRegion->depth}, imgSize, hEvent); } Kernel *builtinKernel = nullptr; @@ -379,7 +371,7 @@ ze_result_t CommandListCoreFamily::appendImageCopyToMemory(void * if (isCopyOnlyCmdList) { return appendCopyImageBlit(image->getAllocation(), allocationStruct.alloc, {pSrcRegion->originX, pSrcRegion->originY, pSrcRegion->originZ}, {0, 0, 0}, rowPitch, slicePitch, - rowPitch, slicePitch, bytesPerPixel, {pSrcRegion->width, pSrcRegion->height, pSrcRegion->depth}, imgSize, {pSrcRegion->width, pSrcRegion->height, pSrcRegion->depth}); + rowPitch, slicePitch, bytesPerPixel, {pSrcRegion->width, pSrcRegion->height, pSrcRegion->depth}, imgSize, {pSrcRegion->width, pSrcRegion->height, pSrcRegion->depth}, hEvent); } Kernel *builtinKernel = nullptr; @@ -525,7 +517,7 @@ ze_result_t CommandListCoreFamily::appendImageCopyRegion(ze_image return appendCopyImageBlit(srcImage->getAllocation(), dstImage->getAllocation(), {srcRegion.originX, srcRegion.originY, srcRegion.originZ}, {dstRegion.originX, dstRegion.originY, dstRegion.originZ}, srcRowPitch, srcSlicePitch, - dstRowPitch, dstSlicePitch, bytesPerPixel, {srcRegion.width, srcRegion.height, srcRegion.depth}, srcImgSize, dstImgSize); + dstRowPitch, dstSlicePitch, bytesPerPixel, {srcRegion.width, srcRegion.height, srcRegion.depth}, srcImgSize, dstImgSize, hEvent); } auto kernel = device->getBuiltinFunctionsLib()->getImageFunction(ImageBuiltin::CopyImageRegion); @@ -627,12 +619,16 @@ ze_result_t CommandListCoreFamily::appendMemoryCopyBlit(NEO::Grap uint64_t dstOffset, NEO::GraphicsAllocation *srcPtrAlloc, uint64_t srcOffset, - uint32_t size) { + uint32_t size, + ze_event_handle_t hSignalEvent) { using GfxFamily = typename NEO::GfxFamilyMapper::GfxFamily; auto blitProperties = NEO::BlitProperties::constructPropertiesForCopyBuffer(dstPtrAlloc, srcPtrAlloc, {dstOffset, 0, 0}, {srcOffset, 0, 0}, {size, 0, 0}, 0, 0, 0, 0); commandContainer.addToResidencyContainer(dstPtrAlloc); commandContainer.addToResidencyContainer(srcPtrAlloc); + appendEventForProfiling(hSignalEvent, true); NEO::BlitCommandsHelper::dispatchBlitCommandsForBuffer(blitProperties, *commandContainer.getCommandStream(), *device->getNEODevice()->getExecutionEnvironment()->rootDeviceEnvironments[device->getRootDeviceIndex()]); + this->appendSignalEventPostWalker(hSignalEvent); + return ZE_RESULT_SUCCESS; } @@ -643,7 +639,7 @@ ze_result_t CommandListCoreFamily::appendMemoryCopyBlitRegion(NEO ze_copy_region_t dstRegion, Vec3 copySize, size_t srcRowPitch, size_t srcSlicePitch, size_t dstRowPitch, size_t dstSlicePitch, - size_t srcSize, size_t dstSize) { + size_t srcSize, size_t dstSize, ze_event_handle_t hSignalEvent) { using GfxFamily = typename NEO::GfxFamilyMapper::GfxFamily; Vec3 srcPtrOffset = {srcRegion.originX, srcRegion.originY, srcRegion.originZ}; @@ -654,7 +650,10 @@ ze_result_t CommandListCoreFamily::appendMemoryCopyBlitRegion(NEO dstRowPitch, dstSlicePitch); commandContainer.addToResidencyContainer(dstAlloc); commandContainer.addToResidencyContainer(srcAlloc); + appendEventForProfiling(hSignalEvent, true); NEO::BlitCommandsHelper::dispatchBlitCommandsForBuffer(blitProperties, *commandContainer.getCommandStream(), *device->getNEODevice()->getExecutionEnvironment()->rootDeviceEnvironments[device->getRootDeviceIndex()]); + this->appendSignalEventPostWalker(hSignalEvent); + return ZE_RESULT_SUCCESS; } @@ -665,7 +664,7 @@ ze_result_t CommandListCoreFamily::appendCopyImageBlit(NEO::Graph size_t srcRowPitch, size_t srcSlicePitch, size_t dstRowPitch, size_t dstSlicePitch, size_t bytesPerPixel, Vec3 copySize, - Vec3 srcSize, Vec3 dstSize) { + Vec3 srcSize, Vec3 dstSize, ze_event_handle_t hSignalEvent) { using GfxFamily = typename NEO::GfxFamilyMapper::GfxFamily; auto blitProperties = NEO::BlitProperties::constructPropertiesForCopyBuffer(dst, src, @@ -676,7 +675,10 @@ ze_result_t CommandListCoreFamily::appendCopyImageBlit(NEO::Graph blitProperties.dstSize = dstSize; commandContainer.addToResidencyContainer(dst); commandContainer.addToResidencyContainer(src); + appendEventForProfiling(hSignalEvent, true); NEO::BlitCommandsHelper::dispatchBlitCommandsForImages(blitProperties, *commandContainer.getCommandStream(), *device->getNEODevice()->getExecutionEnvironment()->rootDeviceEnvironments[device->getRootDeviceIndex()]); + this->appendSignalEventPostWalker(hSignalEvent); + return ZE_RESULT_SUCCESS; } @@ -761,7 +763,7 @@ ze_result_t CommandListCoreFamily::appendMemoryCopy(void *dstptr, if (ret == ZE_RESULT_SUCCESS && leftSize) { ret = isCopyOnlyCmdList ? appendMemoryCopyBlit(dstAllocationStruct.alloc, dstAllocationStruct.offset, - srcAllocationStruct.alloc, srcAllocationStruct.offset, static_cast(leftSize)) + srcAllocationStruct.alloc, srcAllocationStruct.offset, static_cast(leftSize), hSignalEvent) : appendMemoryCopyKernelWithGA(reinterpret_cast(&dstAllocationStruct.alignedAllocationPtr), dstAllocationStruct.alloc, dstAllocationStruct.offset, reinterpret_cast(&srcAllocationStruct.alignedAllocationPtr), @@ -772,7 +774,7 @@ ze_result_t CommandListCoreFamily::appendMemoryCopy(void *dstptr, if (ret == ZE_RESULT_SUCCESS && middleSizeBytes) { ret = isCopyOnlyCmdList ? appendMemoryCopyBlit(dstAllocationStruct.alloc, leftSize + dstAllocationStruct.offset, - srcAllocationStruct.alloc, leftSize + srcAllocationStruct.offset, static_cast(middleSizeBytes)) + srcAllocationStruct.alloc, leftSize + srcAllocationStruct.offset, static_cast(middleSizeBytes), hSignalEvent) : appendMemoryCopyKernelWithGA(reinterpret_cast(&dstAllocationStruct.alignedAllocationPtr), dstAllocationStruct.alloc, leftSize + dstAllocationStruct.offset, reinterpret_cast(&srcAllocationStruct.alignedAllocationPtr), @@ -784,7 +786,7 @@ ze_result_t CommandListCoreFamily::appendMemoryCopy(void *dstptr, if (ret == ZE_RESULT_SUCCESS && rightSize) { ret = isCopyOnlyCmdList ? appendMemoryCopyBlit(dstAllocationStruct.alloc, leftSize + middleSizeBytes + dstAllocationStruct.offset, - srcAllocationStruct.alloc, leftSize + middleSizeBytes + srcAllocationStruct.offset, static_cast(rightSize)) + srcAllocationStruct.alloc, leftSize + middleSizeBytes + srcAllocationStruct.offset, static_cast(rightSize), hSignalEvent) : appendMemoryCopyKernelWithGA(reinterpret_cast(&dstAllocationStruct.alignedAllocationPtr), dstAllocationStruct.alloc, leftSize + middleSizeBytes + dstAllocationStruct.offset, reinterpret_cast(&srcAllocationStruct.alignedAllocationPtr), @@ -793,9 +795,7 @@ ze_result_t CommandListCoreFamily::appendMemoryCopy(void *dstptr, Builtin::CopyBufferToBufferSide); } - if (hSignalEvent) { - this->appendSignalEventPostWalker(hSignalEvent); - } + this->appendSignalEventPostWalker(hSignalEvent); if (dstAllocationStruct.needsFlush && !isCopyOnlyCmdList) { NEO::PipeControlArgs args(true); @@ -840,12 +840,14 @@ ze_result_t CommandListCoreFamily::appendMemoryCopyRegion(void *d ze_result_t result = ZE_RESULT_SUCCESS; if (srcRegion->depth > 1) { - result = isCopyOnlyCmdList ? appendMemoryCopyBlitRegion(srcAllocationStruct.alloc, dstAllocationStruct.alloc, *srcRegion, *dstRegion, {srcRegion->width, srcRegion->height, srcRegion->depth}, srcPitch, srcSlicePitch, dstPitch, dstSlicePitch, srcSize, dstSize) + result = isCopyOnlyCmdList ? appendMemoryCopyBlitRegion(srcAllocationStruct.alloc, dstAllocationStruct.alloc, *srcRegion, *dstRegion, {srcRegion->width, srcRegion->height, srcRegion->depth}, + srcPitch, srcSlicePitch, dstPitch, dstSlicePitch, srcSize, dstSize, hSignalEvent) : this->appendMemoryCopyKernel3d(dstAllocationStruct.alloc, srcAllocationStruct.alloc, Builtin::CopyBufferRectBytes3d, dstRegion, dstPitch, dstSlicePitch, dstAllocationStruct.offset, srcRegion, srcPitch, srcSlicePitch, srcAllocationStruct.offset, hSignalEvent, 0, nullptr); } else { - result = isCopyOnlyCmdList ? appendMemoryCopyBlitRegion(srcAllocationStruct.alloc, dstAllocationStruct.alloc, *srcRegion, *dstRegion, {srcRegion->width, srcRegion->height, srcRegion->depth}, srcPitch, srcSlicePitch, dstPitch, dstSlicePitch, srcSize, dstSize) + result = isCopyOnlyCmdList ? appendMemoryCopyBlitRegion(srcAllocationStruct.alloc, dstAllocationStruct.alloc, *srcRegion, *dstRegion, {srcRegion->width, srcRegion->height, srcRegion->depth}, + srcPitch, srcSlicePitch, dstPitch, dstSlicePitch, srcSize, dstSize, hSignalEvent) : this->appendMemoryCopyKernel2d(dstAllocationStruct.alloc, srcAllocationStruct.alloc, Builtin::CopyBufferRectBytes2d, dstRegion, dstPitch, dstAllocationStruct.offset, srcRegion, srcPitch, srcAllocationStruct.offset, hSignalEvent, 0, nullptr); @@ -1095,9 +1097,7 @@ ze_result_t CommandListCoreFamily::appendMemoryFill(void *ptr, 0, nullptr); } - if (hEvent) { - this->appendSignalEventPostWalker(hEvent); - } + this->appendSignalEventPostWalker(hEvent); if (hostPointerNeedsFlush) { NEO::PipeControlArgs args(true); @@ -1141,15 +1141,16 @@ ze_result_t CommandListCoreFamily::appendBlitFill(void *ptr, uint32_t patternToCommand[4] = {}; memcpy_s(&patternToCommand, sizeof(patternToCommand), pattern, patternSize); NEO::BlitCommandsHelper::dispatchBlitMemoryColorFill(allocData->gpuAllocation, patternToCommand, patternSize, *commandContainer.getCommandStream(), size, *device->getNEODevice()->getExecutionEnvironment()->rootDeviceEnvironments[device->getRootDeviceIndex()]); - if (hEvent) { - this->appendSignalEventPostWalker(hEvent); - } + appendSignalEventPostWalker(hEvent); } return ZE_RESULT_SUCCESS; } template void CommandListCoreFamily::appendSignalEventPostWalker(ze_event_handle_t hEvent) { + if (hEvent == nullptr) { + return; + } auto event = Event::fromHandle(hEvent); if (event->isTimestampEvent) { appendEventForProfiling(hEvent, false); @@ -1157,6 +1158,22 @@ void CommandListCoreFamily::appendSignalEventPostWalker(ze_event_ CommandListCoreFamily::appendSignalEvent(hEvent); } } +template +void CommandListCoreFamily::appendEventForProfilingCopyCommand(ze_event_handle_t hEvent, bool beforeWalker) { + using GfxFamily = typename NEO::GfxFamilyMapper::GfxFamily; + auto event = Event::fromHandle(hEvent); + + if (!event->isTimestampEvent) { + return; + } + commandContainer.addToResidencyContainer(&event->getAllocation()); + auto baseAddr = event->getGpuAddress(); + auto contextOffset = beforeWalker ? offsetof(KernelTimestampEvent, contextStart) : offsetof(KernelTimestampEvent, contextEnd); + auto globalOffset = beforeWalker ? offsetof(KernelTimestampEvent, globalStart) : offsetof(KernelTimestampEvent, globalEnd); + + NEO::EncodeStoreMMIO::encode(*commandContainer.getCommandStream(), REG_GLOBAL_TIMESTAMP_LDW, ptrOffset(baseAddr, globalOffset)); + NEO::EncodeStoreMMIO::encode(*commandContainer.getCommandStream(), GP_THREAD_TIME_REG_ADDRESS_OFFSET_LOW, ptrOffset(baseAddr, contextOffset)); +} template inline uint64_t CommandListCoreFamily::getInputBufferSize(NEO::ImageType imageType, diff --git a/level_zero/core/source/cmdlist/cmdlist_hw_base.inl b/level_zero/core/source/cmdlist/cmdlist_hw_base.inl index b568ef528b..db1cc36400 100644 --- a/level_zero/core/source/cmdlist/cmdlist_hw_base.inl +++ b/level_zero/core/source/cmdlist/cmdlist_hw_base.inl @@ -60,9 +60,7 @@ ze_result_t CommandListCoreFamily::appendLaunchKernelWithParams(z reinterpret_cast(pThreadGroupDimensions), isIndirect, isPredicate, kernel, 0, device->getNEODevice(), commandListPreemptionMode); - if (hEvent) { - appendSignalEventPostWalker(hEvent); - } + appendSignalEventPostWalker(hEvent); commandContainer.addToResidencyContainer(functionImmutableData->getIsaGraphicsAllocation()); auto &residencyContainer = kernel->getResidencyContainer(); @@ -82,32 +80,28 @@ void CommandListCoreFamily::appendEventForProfiling(ze_event_hand if (!hEvent) { return; } - - auto event = Event::fromHandle(hEvent); - - if (!event->isTimestampEvent) { - return; - } - - using PIPE_CONTROL = typename GfxFamily::PIPE_CONTROL; - using POST_SYNC_OPERATION = typename PIPE_CONTROL::POST_SYNC_OPERATION; - - commandContainer.addToResidencyContainer(&event->getAllocation()); - auto baseAddr = event->getGpuAddress(); - - if (beforeWalker) { - auto contextStartAddr = baseAddr; - auto globalStartAddr = baseAddr + offsetof(KernelTimestampEvent, globalStart); - - NEO::EncodeStoreMMIO::encode(*commandContainer.getCommandStream(), REG_GLOBAL_TIMESTAMP_LDW, globalStartAddr); - NEO::EncodeStoreMMIO::encode(*commandContainer.getCommandStream(), GP_THREAD_TIME_REG_ADDRESS_OFFSET_LOW, contextStartAddr); + if (isCopyOnly()) { + appendEventForProfilingCopyCommand(hEvent, beforeWalker); } else { - auto contextEndAddr = baseAddr + offsetof(KernelTimestampEvent, contextEnd); - auto globalEndAddr = baseAddr + offsetof(KernelTimestampEvent, globalEnd); + auto event = Event::fromHandle(hEvent); + + if (!event->isTimestampEvent) { + return; + } + + commandContainer.addToResidencyContainer(&event->getAllocation()); + auto baseAddr = event->getGpuAddress(); + + if (beforeWalker) { + auto contextStartAddr = baseAddr; + auto globalStartAddr = baseAddr + offsetof(KernelTimestampEvent, globalStart); + + NEO::EncodeStoreMMIO::encode(*commandContainer.getCommandStream(), REG_GLOBAL_TIMESTAMP_LDW, globalStartAddr); + NEO::EncodeStoreMMIO::encode(*commandContainer.getCommandStream(), GP_THREAD_TIME_REG_ADDRESS_OFFSET_LOW, contextStartAddr); - if (isCopyOnlyCmdList) { - NEO::EncodeMiFlushDW::programMiFlushDw(*commandContainer.getCommandStream(), globalEndAddr, 0llu, true, true); } else { + auto contextEndAddr = baseAddr + offsetof(KernelTimestampEvent, contextEnd); + auto globalEndAddr = baseAddr + offsetof(KernelTimestampEvent, globalEnd); NEO::PipeControlArgs args; args.dcFlushEnable = false; diff --git a/level_zero/core/test/unit_tests/mocks/mock_cmdlist.h b/level_zero/core/test/unit_tests/mocks/mock_cmdlist.h index b2e5b167a9..f2f6f1c07d 100644 --- a/level_zero/core/test/unit_tests/mocks/mock_cmdlist.h +++ b/level_zero/core/test/unit_tests/mocks/mock_cmdlist.h @@ -31,9 +31,17 @@ struct WhiteBox<::L0::CommandListCoreFamily> : public ::L0::CommandListCoreFamily { using GfxFamily = typename NEO::GfxFamilyMapper::GfxFamily; using BaseClass = ::L0::CommandListCoreFamily; + using BaseClass::appendBlitFill; + using BaseClass::appendCopyImageBlit; + using BaseClass::appendEventForProfiling; + using BaseClass::appendEventForProfilingCopyCommand; using BaseClass::appendLaunchKernelWithParams; + using BaseClass::appendMemoryCopyBlit; + using BaseClass::appendMemoryCopyBlitRegion; + using BaseClass::appendSignalEventPostWalker; using BaseClass::commandListPreemptionMode; using BaseClass::getAlignedAllocation; + using BaseClass::hostPtrMap; WhiteBox() : ::L0::CommandListCoreFamily(BaseClass::defaultNumIddsPerBlock) {} }; diff --git a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist.cpp b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist.cpp index 32f346eab3..f2bc3cc86b 100644 --- a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist.cpp +++ b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist.cpp @@ -7,15 +7,16 @@ #include "shared/source/gmm_helper/gmm_helper.h" #include "shared/source/helpers/hw_info.h" +#include "shared/source/helpers/register_offsets.h" #include "shared/test/unit_test/cmd_parse/gen_cmd_parse.h" #include "opencl/test/unit_test/mocks/mock_graphics_allocation.h" #include "test.h" -#include "level_zero/core/source/cmdlist/cmdlist_hw.h" #include "level_zero/core/source/driver/driver_handle_imp.h" #include "level_zero/core/source/image/image_hw.h" #include "level_zero/core/test/unit_tests/fixtures/device_fixture.h" +#include "level_zero/core/test/unit_tests/mocks/mock_cmdlist.h" #include "level_zero/core/test/unit_tests/mocks/mock_event.h" #include "level_zero/core/test/unit_tests/mocks/mock_kernel.h" @@ -247,7 +248,7 @@ HWTEST_F(CommandListCreate, givenCommandListWhenSetBarrierThenPipeControlIsProgr template class MockCommandList : public WhiteBox<::L0::CommandListCoreFamily> { public: - MockCommandList() : WhiteBox<::L0::CommandListCoreFamily>(1) {} + MockCommandList() : WhiteBox<::L0::CommandListCoreFamily>() {} AlignedAllocationData getAlignedAllocation(L0::Device *device, const void *buffer, uint64_t bufferSize) override { return {0, 0, nullptr, true}; @@ -267,7 +268,7 @@ class MockCommandList : public WhiteBox<::L0::CommandListCoreFamily copySize, size_t srcRowPitch, size_t srcSlicePitch, size_t dstRowPitch, size_t dstSlicePitch, - size_t srcSize, size_t dstSize) override { + size_t srcSize, size_t dstSize, ze_event_handle_t hSignalEvent) override { appendMemoryCopyBlitRegionCalledTimes++; return ZE_RESULT_SUCCESS; } @@ -315,7 +316,7 @@ class MockCommandList : public WhiteBox<::L0::CommandListCoreFamily copySize, - Vec3 srcSize, Vec3 dstSize) override { + Vec3 srcSize, Vec3 dstSize, ze_event_handle_t hSignalEvent) override { appendCopyImageBlitCalledTimes++; return ZE_RESULT_SUCCESS; } @@ -400,7 +401,6 @@ using AppendMemoryCopy = CommandListCreate; template class MockAppendMemoryCopy : public MockCommandList { public: - using CommandList::hostPtrMap; AlignedAllocationData getAlignedAllocation(L0::Device *device, const void *buffer, uint64_t bufferSize) override { return L0::CommandListCoreFamily::getAlignedAllocation(device, buffer, bufferSize); } @@ -484,13 +484,13 @@ HWTEST2_F(AppendMemoryCopy, givenCopyOnlyCommandListAndHostPointersWhenMemoryCop using GfxFamily = typename NEO::GfxFamilyMapper::GfxFamily; using XY_COPY_BLT = typename GfxFamily::XY_COPY_BLT; - WhiteBox> cmdList(1); - cmdList.initialize(device, true); + auto commandList = std::make_unique>>(); + commandList->initialize(device, true); void *srcPtr = reinterpret_cast(0x1234); void *dstPtr = reinterpret_cast(0x2345); - cmdList.appendMemoryCopy(dstPtr, srcPtr, 8, nullptr, 0, nullptr); + commandList->appendMemoryCopy(dstPtr, srcPtr, 8, nullptr, 0, nullptr); - auto &commandContainer = cmdList.commandContainer; + auto &commandContainer = commandList->commandContainer; GenCmdList genCmdList; ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer( genCmdList, ptrOffset(commandContainer.getCommandStream()->getCpuBase(), 0), commandContainer.getCommandStream()->getUsed())); @@ -507,15 +507,15 @@ HWTEST2_F(AppendMemoryCopy, givenCopyOnlyCommandListAndHostPointersWhenMemoryCop using GfxFamily = typename NEO::GfxFamilyMapper::GfxFamily; using XY_COPY_BLT = typename GfxFamily::XY_COPY_BLT; - WhiteBox> cmdList(1); - cmdList.initialize(device, true); + auto commandList = std::make_unique>>(); + commandList->initialize(device, true); void *srcPtr = reinterpret_cast(0x1234); void *dstPtr = reinterpret_cast(0x2345); ze_copy_region_t dstRegion = {4, 4, 0, 2, 2, 1}; ze_copy_region_t srcRegion = {4, 4, 0, 2, 2, 1}; - cmdList.appendMemoryCopyRegion(dstPtr, &dstRegion, 0, 0, srcPtr, &srcRegion, 0, 0, nullptr); + commandList->appendMemoryCopyRegion(dstPtr, &dstRegion, 0, 0, srcPtr, &srcRegion, 0, 0, nullptr); - auto &commandContainer = cmdList.commandContainer; + auto &commandContainer = commandList->commandContainer; GenCmdList genCmdList; ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer( genCmdList, ptrOffset(commandContainer.getCommandStream()->getCpuBase(), 0), commandContainer.getCommandStream()->getUsed())); @@ -627,7 +627,7 @@ HWTEST_F(CommandListCreate, givenCommandListyWhenAppendWaitEventsWithDcFlushTheP template class MockCommandListForMemFill : public WhiteBox<::L0::CommandListCoreFamily> { public: - MockCommandListForMemFill() : WhiteBox<::L0::CommandListCoreFamily>(1) {} + MockCommandListForMemFill() : WhiteBox<::L0::CommandListCoreFamily>() {} AlignedAllocationData getAlignedAllocation(L0::Device *device, const void *buffer, uint64_t bufferSize) override { return {0, 0, nullptr, true}; @@ -635,7 +635,7 @@ class MockCommandListForMemFill : public WhiteBox<::L0::CommandListCoreFamily::GfxFamily; + using MI_STORE_REGISTER_MEM = typename GfxFamily::MI_STORE_REGISTER_MEM; + auto commandList = std::make_unique>>(); + commandList->initialize(device, true); + ze_event_pool_desc_t eventPoolDesc = { + ZE_EVENT_POOL_DESC_VERSION_CURRENT, + ZE_EVENT_POOL_FLAG_TIMESTAMP, + 1}; + ze_event_desc_t eventDesc = { + ZE_EVENT_DESC_VERSION_CURRENT, + 0, + ZE_EVENT_SCOPE_FLAG_NONE, + ZE_EVENT_SCOPE_FLAG_NONE}; + auto eventPool = std::unique_ptr(L0::EventPool::create(driverHandle.get(), 0, nullptr, &eventPoolDesc)); + auto event = std::unique_ptr(L0::Event::create(eventPool.get(), &eventDesc, device)); + + NEO::MockGraphicsAllocation mockAllocationSrc(0, NEO::GraphicsAllocation::AllocationType::INTERNAL_HOST_MEMORY, + reinterpret_cast(0x1234), 0x1000, 0, sizeof(uint32_t), + MemoryPool::System4KBPages); + NEO::MockGraphicsAllocation mockAllocationDst(0, NEO::GraphicsAllocation::AllocationType::INTERNAL_HOST_MEMORY, + reinterpret_cast(0x1234), 0x1000, 0, sizeof(uint32_t), + MemoryPool::System4KBPages); + uint32_t size = 0x1000; + + commandList->appendMemoryCopyBlit(&mockAllocationDst, 0, &mockAllocationSrc, 0, size, event->toHandle()); + GenCmdList cmdList; + ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer( + cmdList, ptrOffset(commandList->commandContainer.getCommandStream()->getCpuBase(), 0), commandList->commandContainer.getCommandStream()->getUsed())); + auto itor = find(cmdList.begin(), cmdList.end()); + EXPECT_NE(cmdList.end(), itor); + auto cmd = genCmdCast(*itor); + EXPECT_EQ(cmd->getRegisterAddress(), REG_GLOBAL_TIMESTAMP_LDW); +} + +HWTEST2_F(CommandListCreate, givenCopyCommandListWhenTimestampPassedToMemoryCopyRegionBlitThenTimeStampRegistersAreAdded, Platforms) { + using GfxFamily = typename NEO::GfxFamilyMapper::GfxFamily; + using MI_STORE_REGISTER_MEM = typename GfxFamily::MI_STORE_REGISTER_MEM; + auto commandList = std::make_unique>>(); + commandList->initialize(device, true); + ze_event_pool_desc_t eventPoolDesc = { + ZE_EVENT_POOL_DESC_VERSION_CURRENT, + ZE_EVENT_POOL_FLAG_TIMESTAMP, + 1}; + ze_event_desc_t eventDesc = { + ZE_EVENT_DESC_VERSION_CURRENT, + 0, + ZE_EVENT_SCOPE_FLAG_NONE, + ZE_EVENT_SCOPE_FLAG_NONE}; + auto eventPool = std::unique_ptr(L0::EventPool::create(driverHandle.get(), 0, nullptr, &eventPoolDesc)); + auto event = std::unique_ptr(L0::Event::create(eventPool.get(), &eventDesc, device)); + + ze_copy_region_t srcRegion = {4, 4, 4, 2, 2, 2}; + ze_copy_region_t dstRegion = {4, 4, 4, 2, 2, 2}; + NEO::MockGraphicsAllocation mockAllocationSrc(0, NEO::GraphicsAllocation::AllocationType::INTERNAL_HOST_MEMORY, + reinterpret_cast(0x1234), 0x1000, 0, sizeof(uint32_t), + MemoryPool::System4KBPages); + NEO::MockGraphicsAllocation mockAllocationDst(0, NEO::GraphicsAllocation::AllocationType::INTERNAL_HOST_MEMORY, + reinterpret_cast(0x1234), 0x1000, 0, sizeof(uint32_t), + MemoryPool::System4KBPages); + + commandList->appendMemoryCopyBlitRegion(&mockAllocationDst, &mockAllocationSrc, srcRegion, dstRegion, {0, 0, 0}, 0, 0, 0, 0, 0, 0, event->toHandle()); + GenCmdList cmdList; + ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer( + cmdList, ptrOffset(commandList->commandContainer.getCommandStream()->getCpuBase(), 0), commandList->commandContainer.getCommandStream()->getUsed())); + auto itor = find(cmdList.begin(), cmdList.end()); + EXPECT_NE(cmdList.end(), itor); + auto cmd = genCmdCast(*itor); + EXPECT_EQ(cmd->getRegisterAddress(), REG_GLOBAL_TIMESTAMP_LDW); +} + +HWTEST2_F(CommandListCreate, givenCopyCommandListWhenTimestampPassedToImageCopyBlitThenTimeStampRegistersAreAdded, Platforms) { + using GfxFamily = typename NEO::GfxFamilyMapper::GfxFamily; + using MI_STORE_REGISTER_MEM = typename GfxFamily::MI_STORE_REGISTER_MEM; + auto commandList = std::make_unique>>(); + commandList->initialize(device, true); + ze_event_pool_desc_t eventPoolDesc = { + ZE_EVENT_POOL_DESC_VERSION_CURRENT, + ZE_EVENT_POOL_FLAG_TIMESTAMP, + 1}; + ze_event_desc_t eventDesc = { + ZE_EVENT_DESC_VERSION_CURRENT, + 0, + ZE_EVENT_SCOPE_FLAG_NONE, + ZE_EVENT_SCOPE_FLAG_NONE}; + auto eventPool = std::unique_ptr(L0::EventPool::create(driverHandle.get(), 0, nullptr, &eventPoolDesc)); + auto event = std::unique_ptr(L0::Event::create(eventPool.get(), &eventDesc, device)); + + NEO::MockGraphicsAllocation mockAllocationSrc(0, NEO::GraphicsAllocation::AllocationType::INTERNAL_HOST_MEMORY, + reinterpret_cast(0x1234), 0x1000, 0, sizeof(uint32_t), + MemoryPool::System4KBPages); + NEO::MockGraphicsAllocation mockAllocationDst(0, NEO::GraphicsAllocation::AllocationType::INTERNAL_HOST_MEMORY, + reinterpret_cast(0x1234), 0x1000, 0, sizeof(uint32_t), + MemoryPool::System4KBPages); + + commandList->appendCopyImageBlit(&mockAllocationDst, &mockAllocationSrc, {0, 0, 0}, {0, 0, 0}, 0, 0, 0, 0, 1, {0, 0, 0}, {0, 0, 0}, {0, 0, 0}, event->toHandle()); + GenCmdList cmdList; + ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer( + cmdList, ptrOffset(commandList->commandContainer.getCommandStream()->getCpuBase(), 0), commandList->commandContainer.getCommandStream()->getUsed())); + auto itor = find(cmdList.begin(), cmdList.end()); + EXPECT_NE(cmdList.end(), itor); + auto cmd = genCmdCast(*itor); + EXPECT_EQ(cmd->getRegisterAddress(), REG_GLOBAL_TIMESTAMP_LDW); +} + +HWTEST2_F(CommandListCreate, givenCopyCommandListWhenProfilingBeforeCommandForCopyOnlyThenCommandsHaveCorrectEventOffsets, Platforms) { + using GfxFamily = typename NEO::GfxFamilyMapper::GfxFamily; + using MI_STORE_REGISTER_MEM = typename GfxFamily::MI_STORE_REGISTER_MEM; + auto commandList = std::make_unique>>(); + commandList->initialize(device, true); + ze_event_pool_desc_t eventPoolDesc = { + ZE_EVENT_POOL_DESC_VERSION_CURRENT, + ZE_EVENT_POOL_FLAG_TIMESTAMP, + 1}; + ze_event_desc_t eventDesc = { + ZE_EVENT_DESC_VERSION_CURRENT, + 0, + ZE_EVENT_SCOPE_FLAG_NONE, + ZE_EVENT_SCOPE_FLAG_NONE}; + auto eventPool = std::unique_ptr(L0::EventPool::create(driverHandle.get(), 0, nullptr, &eventPoolDesc)); + auto event = std::unique_ptr(L0::Event::create(eventPool.get(), &eventDesc, device)); + + commandList->appendEventForProfilingCopyCommand(event->toHandle(), true); + + auto contextOffset = offsetof(KernelTimestampEvent, contextStart); + auto globalOffset = offsetof(KernelTimestampEvent, globalStart); + auto baseAddr = event->getGpuAddress(); + GenCmdList cmdList; + ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer( + cmdList, ptrOffset(commandList->commandContainer.getCommandStream()->getCpuBase(), 0), commandList->commandContainer.getCommandStream()->getUsed())); + auto itor = find(cmdList.begin(), cmdList.end()); + EXPECT_NE(cmdList.end(), itor); + auto cmd = genCmdCast(*itor); + EXPECT_EQ(cmd->getRegisterAddress(), REG_GLOBAL_TIMESTAMP_LDW); + EXPECT_EQ(cmd->getMemoryAddress(), ptrOffset(baseAddr, globalOffset)); + EXPECT_NE(cmdList.end(), ++itor); + cmd = genCmdCast(*itor); + EXPECT_EQ(cmd->getRegisterAddress(), GP_THREAD_TIME_REG_ADDRESS_OFFSET_LOW); + EXPECT_EQ(cmd->getMemoryAddress(), ptrOffset(baseAddr, contextOffset)); +} + +HWTEST2_F(CommandListCreate, givenCopyCommandListWhenProfilingAfterCommandForCopyOnlyThenCommandsHaveCorrectEventOffsets, Platforms) { + using GfxFamily = typename NEO::GfxFamilyMapper::GfxFamily; + using MI_STORE_REGISTER_MEM = typename GfxFamily::MI_STORE_REGISTER_MEM; + auto commandList = std::make_unique>>(); + commandList->initialize(device, true); + ze_event_pool_desc_t eventPoolDesc = { + ZE_EVENT_POOL_DESC_VERSION_CURRENT, + ZE_EVENT_POOL_FLAG_TIMESTAMP, + 1}; + ze_event_desc_t eventDesc = { + ZE_EVENT_DESC_VERSION_CURRENT, + 0, + ZE_EVENT_SCOPE_FLAG_NONE, + ZE_EVENT_SCOPE_FLAG_NONE}; + auto eventPool = std::unique_ptr(L0::EventPool::create(driverHandle.get(), 0, nullptr, &eventPoolDesc)); + auto event = std::unique_ptr(L0::Event::create(eventPool.get(), &eventDesc, device)); + + commandList->appendEventForProfilingCopyCommand(event->toHandle(), false); + + auto contextOffset = offsetof(KernelTimestampEvent, contextEnd); + auto globalOffset = offsetof(KernelTimestampEvent, globalEnd); + auto baseAddr = event->getGpuAddress(); + GenCmdList cmdList; + ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer( + cmdList, ptrOffset(commandList->commandContainer.getCommandStream()->getCpuBase(), 0), commandList->commandContainer.getCommandStream()->getUsed())); + auto itor = find(cmdList.begin(), cmdList.end()); + EXPECT_NE(cmdList.end(), itor); + auto cmd = genCmdCast(*itor); + EXPECT_EQ(cmd->getRegisterAddress(), REG_GLOBAL_TIMESTAMP_LDW); + EXPECT_EQ(cmd->getMemoryAddress(), ptrOffset(baseAddr, globalOffset)); + EXPECT_NE(cmdList.end(), ++itor); + cmd = genCmdCast(*itor); + EXPECT_EQ(cmd->getRegisterAddress(), GP_THREAD_TIME_REG_ADDRESS_OFFSET_LOW); + EXPECT_EQ(cmd->getMemoryAddress(), ptrOffset(baseAddr, contextOffset)); +} + +HWTEST2_F(CommandListCreate, givenNullEventWhenAppendEventAfterWalkerThenNothingAddedToStream, Platforms) { + using GfxFamily = typename NEO::GfxFamilyMapper::GfxFamily; + using MI_STORE_REGISTER_MEM = typename GfxFamily::MI_STORE_REGISTER_MEM; + auto commandList = std::make_unique>>(); + commandList->initialize(device, true); + + auto usedBefore = commandList->commandContainer.getCommandStream()->getUsed(); + + commandList->appendSignalEventPostWalker(nullptr); + + EXPECT_EQ(commandList->commandContainer.getCommandStream()->getUsed(), usedBefore); +} + } // namespace ult } // namespace L0 diff --git a/shared/source/command_container/command_encoder.h b/shared/source/command_container/command_encoder.h index b2e5b454ce..f2f5845c93 100644 --- a/shared/source/command_container/command_encoder.h +++ b/shared/source/command_container/command_encoder.h @@ -150,6 +150,11 @@ struct EncodeStoreMMIO { static void encode(LinearStream &csr, uint32_t offset, uint64_t address); static void remapOffset(MI_STORE_REGISTER_MEM *pStoreRegMem); }; +template +struct AppendStoreMMIO { + using MI_STORE_REGISTER_MEM = typename GfxFamily::MI_STORE_REGISTER_MEM; + static void appendRemap(MI_STORE_REGISTER_MEM *cmd); +}; template struct EncodeSurfaceState {