From 819d6489970029a94a85686934d7bf3b98ecff0c Mon Sep 17 00:00:00 2001 From: Zbigniew Zdanowicz Date: Fri, 8 Apr 2022 18:48:45 +0000 Subject: [PATCH] Use single event for multiple kernels Related-To: NEO-6871 Signed-off-by: Zbigniew Zdanowicz --- level_zero/core/source/cmdlist/cmdlist.cpp | 12 + level_zero/core/source/cmdlist/cmdlist.h | 1 + level_zero/core/source/cmdlist/cmdlist_hw.h | 2 +- level_zero/core/source/cmdlist/cmdlist_hw.inl | 132 ++++---- .../core/source/cmdlist/cmdlist_hw_base.inl | 16 + .../cmdlist/cmdlist_hw_xehp_and_later.inl | 26 ++ level_zero/core/source/event/event.h | 19 +- level_zero/core/source/event/event_impl.inl | 38 +-- .../unit_tests/fixtures/cmdlist_fixture.h | 93 ++++++ .../core/test/unit_tests/mocks/mock_cmdlist.h | 71 +++++ .../sources/cmdlist/test_cmdlist_2.cpp | 27 +- .../sources/cmdlist/test_cmdlist_4.cpp | 19 +- .../cmdlist/test_cmdlist_append_barrier.cpp | 91 +----- .../cmdlist/test_cmdlist_append_memory.cpp | 203 ++++++------ .../test_cmdlist_append_signal_event.cpp | 74 +++++ .../test_cmdlist_append_wait_on_events.cpp | 4 +- .../sources/cmdlist/test_cmdlist_fill.cpp | 214 +++++++++++++ .../cmdlist/test_cmdlist_xehp_and_later.cpp | 290 ++++++++++++++++++ .../unit_tests/sources/event/test_event.cpp | 24 +- .../unit_test/helpers/hw_helper_tests.cpp | 36 +++ shared/test/common/helpers/unit_test_helper.h | 4 + .../test/common/helpers/unit_test_helper.inl | 5 + .../unit_test_helper_bdw_and_later.inl | 5 + .../unit_test_helper_xehp_and_later.inl | 5 + .../test_macros/header/common_matchers.h | 1 + 25 files changed, 1074 insertions(+), 338 deletions(-) diff --git a/level_zero/core/source/cmdlist/cmdlist.cpp b/level_zero/core/source/cmdlist/cmdlist.cpp index 8e0668ed44..8dbef67d6d 100644 --- a/level_zero/core/source/cmdlist/cmdlist.cpp +++ b/level_zero/core/source/cmdlist/cmdlist.cpp @@ -181,4 +181,16 @@ void CommandList::handleIndirectAllocationResidency() { } } +bool CommandList::setupTimestampEventForMultiTile(ze_event_handle_t signalEvent) { + if (this->partitionCount > 1 && + signalEvent) { + auto event = Event::fromHandle(signalEvent); + if (event->isEventTimestampFlagSet()) { + event->setPacketsInUse(this->partitionCount); + return true; + } + } + return false; +} + } // namespace L0 diff --git a/level_zero/core/source/cmdlist/cmdlist.h b/level_zero/core/source/cmdlist/cmdlist.h index d29c008ec3..ec21b18759 100644 --- a/level_zero/core/source/cmdlist/cmdlist.h +++ b/level_zero/core/source/cmdlist/cmdlist.h @@ -264,6 +264,7 @@ struct CommandList : _ze_command_list_handle_t { protected: NEO::GraphicsAllocation *getAllocationFromHostPtrMap(const void *buffer, uint64_t bufferSize); NEO::GraphicsAllocation *getHostPtrAlloc(const void *buffer, uint64_t bufferSize, bool hostCopyAllowed); + bool setupTimestampEventForMultiTile(ze_event_handle_t signalEvent); std::map hostPtrMap; std::vector ownedPrivateAllocations; diff --git a/level_zero/core/source/cmdlist/cmdlist_hw.h b/level_zero/core/source/cmdlist/cmdlist_hw.h index 4e57d3af0b..7eb078f443 100644 --- a/level_zero/core/source/cmdlist/cmdlist_hw.h +++ b/level_zero/core/source/cmdlist/cmdlist_hw.h @@ -230,7 +230,7 @@ struct CommandListCoreFamily : CommandListImp { ze_result_t setGlobalWorkSizeIndirect(NEO::CrossThreadDataOffset offsets[3], uint64_t crossThreadAddress, uint32_t lws[3]); ze_result_t programSyncBuffer(Kernel &kernel, NEO::Device &device, const ze_group_count_t *pThreadGroupDimensions); void appendWriteKernelTimestamp(ze_event_handle_t hEvent, bool beforeWalker, bool maskLsb, bool workloadPartition); - void adjustWriteKernelTimestamp(uint64_t globalAddress, uint64_t contextAddress, bool maskLsb, uint32_t mask); + void adjustWriteKernelTimestamp(uint64_t globalAddress, uint64_t contextAddress, bool maskLsb, uint32_t mask, bool workloadPartition); void appendEventForProfiling(ze_event_handle_t hEvent, bool beforeWalker, bool workloadPartition); void appendEventForProfilingAllWalkers(ze_event_handle_t hEvent, bool beforeWalker); void appendEventForProfilingCopyCommand(ze_event_handle_t hEvent, bool beforeWalker); diff --git a/level_zero/core/source/cmdlist/cmdlist_hw.inl b/level_zero/core/source/cmdlist/cmdlist_hw.inl index c83da1f295..67a3111880 100644 --- a/level_zero/core/source/cmdlist/cmdlist_hw.inl +++ b/level_zero/core/source/cmdlist/cmdlist_hw.inl @@ -364,9 +364,11 @@ ze_result_t CommandListCoreFamily::appendMemoryRangesBarrier(uint return ret; } - appendEventForProfiling(hSignalEvent, true, false); + bool workloadPartition = setupTimestampEventForMultiTile(hSignalEvent); + + appendEventForProfiling(hSignalEvent, true, workloadPartition); applyMemoryRangesBarrier(numRanges, pRangeSizes, pRanges); - appendSignalEventPostWalker(hSignalEvent, false); + appendSignalEventPostWalker(hSignalEvent, workloadPartition); if (this->cmdListType == CommandListType::TYPE_IMMEDIATE) { executeCommandListImmediate(true); @@ -800,22 +802,6 @@ ze_result_t CommandListCoreFamily::appendMemAdvise(ze_device_hand return ZE_RESULT_ERROR_INVALID_ARGUMENT; } -template -ze_result_t CommandListCoreFamily::appendLaunchKernelSplit(ze_kernel_handle_t hKernel, - const ze_group_count_t *pThreadGroupDimensions, - ze_event_handle_t hEvent) { - return appendLaunchKernelWithParams(hKernel, pThreadGroupDimensions, nullptr, false, false, false); -} - -template -void CommandListCoreFamily::appendEventForProfilingAllWalkers(ze_event_handle_t hEvent, bool beforeWalker) { - if (beforeWalker) { - appendEventForProfiling(hEvent, true, false); - } else { - appendSignalEventPostWalker(hEvent, false); - } -} - template ze_result_t CommandListCoreFamily::appendMemoryCopyKernelWithGA(void *dstPtr, NEO::GraphicsAllocation *dstPtrAlloc, @@ -1075,18 +1061,21 @@ ze_result_t CommandListCoreFamily::appendMemoryCopy(void *dstptr, if (isStateless) { func = Builtin::CopyBufferToBufferSideStateless; } - ret = isCopyOnly() ? appendMemoryCopyBlit(dstAllocationStruct.alignedAllocationPtr, - dstAllocationStruct.alloc, dstAllocationStruct.offset, - srcAllocationStruct.alignedAllocationPtr, - srcAllocationStruct.alloc, srcAllocationStruct.offset, leftSize) - : appendMemoryCopyKernelWithGA(reinterpret_cast(&dstAllocationStruct.alignedAllocationPtr), - dstAllocationStruct.alloc, dstAllocationStruct.offset, - reinterpret_cast(&srcAllocationStruct.alignedAllocationPtr), - srcAllocationStruct.alloc, srcAllocationStruct.offset, - leftSize, 1UL, - func, - hSignalEvent, - isStateless); + if (isCopyOnly()) { + ret = appendMemoryCopyBlit(dstAllocationStruct.alignedAllocationPtr, + dstAllocationStruct.alloc, dstAllocationStruct.offset, + srcAllocationStruct.alignedAllocationPtr, + srcAllocationStruct.alloc, srcAllocationStruct.offset, leftSize); + } else { + ret = appendMemoryCopyKernelWithGA(reinterpret_cast(&dstAllocationStruct.alignedAllocationPtr), + dstAllocationStruct.alloc, dstAllocationStruct.offset, + reinterpret_cast(&srcAllocationStruct.alignedAllocationPtr), + srcAllocationStruct.alloc, srcAllocationStruct.offset, + leftSize, 1UL, + func, + hSignalEvent, + isStateless); + } } if (ret == ZE_RESULT_SUCCESS && middleSizeBytes) { @@ -1094,19 +1083,22 @@ ze_result_t CommandListCoreFamily::appendMemoryCopy(void *dstptr, if (isStateless) { func = Builtin::CopyBufferToBufferMiddleStateless; } - ret = isCopyOnly() ? appendMemoryCopyBlit(dstAllocationStruct.alignedAllocationPtr, - dstAllocationStruct.alloc, leftSize + dstAllocationStruct.offset, - srcAllocationStruct.alignedAllocationPtr, - srcAllocationStruct.alloc, leftSize + srcAllocationStruct.offset, middleSizeBytes) - : appendMemoryCopyKernelWithGA(reinterpret_cast(&dstAllocationStruct.alignedAllocationPtr), - dstAllocationStruct.alloc, leftSize + dstAllocationStruct.offset, - reinterpret_cast(&srcAllocationStruct.alignedAllocationPtr), - srcAllocationStruct.alloc, leftSize + srcAllocationStruct.offset, - middleSizeBytes, - middleElSize, - func, - hSignalEvent, - isStateless); + if (isCopyOnly()) { + ret = appendMemoryCopyBlit(dstAllocationStruct.alignedAllocationPtr, + dstAllocationStruct.alloc, leftSize + dstAllocationStruct.offset, + srcAllocationStruct.alignedAllocationPtr, + srcAllocationStruct.alloc, leftSize + srcAllocationStruct.offset, middleSizeBytes); + } else { + ret = appendMemoryCopyKernelWithGA(reinterpret_cast(&dstAllocationStruct.alignedAllocationPtr), + dstAllocationStruct.alloc, leftSize + dstAllocationStruct.offset, + reinterpret_cast(&srcAllocationStruct.alignedAllocationPtr), + srcAllocationStruct.alloc, leftSize + srcAllocationStruct.offset, + middleSizeBytes, + middleElSize, + func, + hSignalEvent, + isStateless); + } } if (ret == ZE_RESULT_SUCCESS && rightSize) { @@ -1114,18 +1106,21 @@ ze_result_t CommandListCoreFamily::appendMemoryCopy(void *dstptr, if (isStateless) { func = Builtin::CopyBufferToBufferSideStateless; } - ret = isCopyOnly() ? appendMemoryCopyBlit(dstAllocationStruct.alignedAllocationPtr, - dstAllocationStruct.alloc, leftSize + middleSizeBytes + dstAllocationStruct.offset, - srcAllocationStruct.alignedAllocationPtr, - srcAllocationStruct.alloc, leftSize + middleSizeBytes + srcAllocationStruct.offset, rightSize) - : appendMemoryCopyKernelWithGA(reinterpret_cast(&dstAllocationStruct.alignedAllocationPtr), - dstAllocationStruct.alloc, leftSize + middleSizeBytes + dstAllocationStruct.offset, - reinterpret_cast(&srcAllocationStruct.alignedAllocationPtr), - srcAllocationStruct.alloc, leftSize + middleSizeBytes + srcAllocationStruct.offset, - rightSize, 1UL, - func, - hSignalEvent, - isStateless); + if (isCopyOnly()) { + ret = appendMemoryCopyBlit(dstAllocationStruct.alignedAllocationPtr, + dstAllocationStruct.alloc, leftSize + middleSizeBytes + dstAllocationStruct.offset, + srcAllocationStruct.alignedAllocationPtr, + srcAllocationStruct.alloc, leftSize + middleSizeBytes + srcAllocationStruct.offset, rightSize); + } else { + ret = appendMemoryCopyKernelWithGA(reinterpret_cast(&dstAllocationStruct.alignedAllocationPtr), + dstAllocationStruct.alloc, leftSize + middleSizeBytes + dstAllocationStruct.offset, + reinterpret_cast(&srcAllocationStruct.alignedAllocationPtr), + srcAllocationStruct.alloc, leftSize + middleSizeBytes + srcAllocationStruct.offset, + rightSize, 1UL, + func, + hSignalEvent, + isStateless); + } } appendEventForProfilingAllWalkers(hSignalEvent, false); @@ -1557,6 +1552,7 @@ ze_result_t CommandListCoreFamily::appendMemoryFill(void *ptr, reinterpret_cast(patternGfxAllocPtr) + patternOffsetRemainder, patternGfxAlloc); builtinFunctionRemainder->setArgumentValue(3, sizeof(patternAllocationSize), &patternAllocationSize); + res = appendLaunchKernelSplit(builtinFunctionRemainder->toHandle(), &dispatchFuncArgs, hSignalEvent); if (res) { return res; @@ -1951,7 +1947,7 @@ void CommandListCoreFamily::appendWriteKernelTimestamp(ze_event_h constexpr uint32_t mask = 0xfffffffe; auto event = Event::fromHandle(hEvent); - auto baseAddr = event->getGpuAddress(this->device); + auto baseAddr = event->getPacketAddress(this->device); auto contextOffset = beforeWalker ? event->getContextStartOffset() : event->getContextEndOffset(); auto globalOffset = beforeWalker ? event->getGlobalStartOffset() : event->getGlobalEndOffset(); @@ -1966,7 +1962,7 @@ void CommandListCoreFamily::appendWriteKernelTimestamp(ze_event_h NEO::EncodeStoreMMIO::encode(*commandContainer.getCommandStream(), GP_THREAD_TIME_REG_ADDRESS_OFFSET_LOW, contextAddress, workloadPartition); } - adjustWriteKernelTimestamp(globalAddress, contextAddress, maskLsb, mask); + adjustWriteKernelTimestamp(globalAddress, contextAddress, maskLsb, mask, workloadPartition); } template @@ -2018,6 +2014,9 @@ ze_result_t CommandListCoreFamily::appendWriteGlobalTimestamp( } } + bool workloadPartition = setupTimestampEventForMultiTile(hSignalEvent); + appendEventForProfiling(hSignalEvent, true, workloadPartition); + const auto &hwInfo = this->device->getHwInfo(); if (isCopyOnly()) { NEO::MiFlushArgs args; @@ -2031,17 +2030,16 @@ ze_result_t CommandListCoreFamily::appendWriteGlobalTimestamp( } else { NEO::PipeControlArgs args; - NEO::MemorySynchronizationCommands::addPipeControlWithPostSync( + NEO::MemorySynchronizationCommands::addPipeControlAndProgramPostSyncOperation( *commandContainer.getCommandStream(), POST_SYNC_OPERATION::POST_SYNC_OPERATION_WRITE_TIMESTAMP, reinterpret_cast(dstptr), 0, + hwInfo, args); } - if (hSignalEvent) { - CommandListCoreFamily::appendSignalEventPostWalker(hSignalEvent, false); - } + appendSignalEventPostWalker(hSignalEvent, workloadPartition); auto allocationStruct = getAlignedAllocation(this->device, dstptr, sizeof(uint64_t), false); commandContainer.addToResidencyContainer(allocationStruct.alloc); @@ -2263,7 +2261,7 @@ void CommandListCoreFamily::programStateBaseAddress(NEO::CommandC } template -void CommandListCoreFamily::adjustWriteKernelTimestamp(uint64_t globalAddress, uint64_t contextAddress, bool maskLsb, uint32_t mask) {} +void CommandListCoreFamily::adjustWriteKernelTimestamp(uint64_t globalAddress, uint64_t contextAddress, bool maskLsb, uint32_t mask, bool workloadPartition) {} template ze_result_t CommandListCoreFamily::appendBarrier(ze_event_handle_t hSignalEvent, @@ -2274,15 +2272,7 @@ ze_result_t CommandListCoreFamily::appendBarrier(ze_event_handle_ if (ret) { return ret; } - bool workloadPartition = false; - if (this->partitionCount > 1 && - hSignalEvent) { - auto event = Event::fromHandle(hSignalEvent); - if (event->isEventTimestampFlagSet()) { - event->setPacketsInUse(this->partitionCount); - workloadPartition = true; - } - } + bool workloadPartition = setupTimestampEventForMultiTile(hSignalEvent); appendEventForProfiling(hSignalEvent, true, workloadPartition); if (isCopyOnly()) { diff --git a/level_zero/core/source/cmdlist/cmdlist_hw_base.inl b/level_zero/core/source/cmdlist/cmdlist_hw_base.inl index 027e79a19e..c37e0f34d1 100644 --- a/level_zero/core/source/cmdlist/cmdlist_hw_base.inl +++ b/level_zero/core/source/cmdlist/cmdlist_hw_base.inl @@ -201,4 +201,20 @@ inline size_t CommandListCoreFamily::estimateBufferSizeMultiTileB return 0; } +template +ze_result_t CommandListCoreFamily::appendLaunchKernelSplit(ze_kernel_handle_t hKernel, + const ze_group_count_t *pThreadGroupDimensions, + ze_event_handle_t hEvent) { + return appendLaunchKernelWithParams(hKernel, pThreadGroupDimensions, nullptr, false, false, false); +} + +template +void CommandListCoreFamily::appendEventForProfilingAllWalkers(ze_event_handle_t hEvent, bool beforeWalker) { + if (beforeWalker) { + appendEventForProfiling(hEvent, true, false); + } else { + appendSignalEventPostWalker(hEvent, false); + } +} + } // namespace L0 diff --git a/level_zero/core/source/cmdlist/cmdlist_hw_xehp_and_later.inl b/level_zero/core/source/cmdlist/cmdlist_hw_xehp_and_later.inl index d7bd0ed666..aefb5c5f3f 100644 --- a/level_zero/core/source/cmdlist/cmdlist_hw_xehp_and_later.inl +++ b/level_zero/core/source/cmdlist/cmdlist_hw_xehp_and_later.inl @@ -344,4 +344,30 @@ inline size_t CommandListCoreFamily::estimateBufferSizeMultiTileB false); } +template +ze_result_t CommandListCoreFamily::appendLaunchKernelSplit(ze_kernel_handle_t hKernel, + const ze_group_count_t *pThreadGroupDimensions, + ze_event_handle_t hEvent) { + if (hEvent) { + Event::fromHandle(hEvent)->increaseKernelCount(); + } + return appendLaunchKernelWithParams(hKernel, pThreadGroupDimensions, hEvent, false, false, false); +} + +template +void CommandListCoreFamily::appendEventForProfilingAllWalkers(ze_event_handle_t hEvent, bool beforeWalker) { + if (isCopyOnly()) { + if (beforeWalker) { + appendEventForProfiling(hEvent, true, false); + } else { + appendSignalEventPostWalker(hEvent, false); + } + } else { + if (hEvent && beforeWalker) { + auto event = Event::fromHandle(hEvent); + event->zeroKernelCount(); + } + } +} + } // namespace L0 diff --git a/level_zero/core/source/event/event.h b/level_zero/core/source/event/event.h index cec34b5ead..506f24277e 100644 --- a/level_zero/core/source/event/event.h +++ b/level_zero/core/source/event/event.h @@ -96,6 +96,17 @@ struct Event : _ze_event_handle_t { return isTimestampEvent || usingContextEndOffset; } + void increaseKernelCount() { + kernelCount++; + UNRECOVERABLE_IF(kernelCount > EventPacketsCount::maxKernelSplit); + } + uint32_t getKernelCount() const { + return kernelCount; + } + void zeroKernelCount() { + kernelCount = 0; + } + uint64_t globalStartTS; uint64_t globalEndTS; uint64_t contextStartTS; @@ -110,8 +121,6 @@ struct Event : _ze_event_handle_t { ze_event_scope_flags_t signalScope = 0u; ze_event_scope_flags_t waitScope = 0u; - uint32_t kernelCount = 1u; - bool l3FlushWaApplied = false; protected: @@ -122,6 +131,9 @@ struct Event : _ze_event_handle_t { size_t timestampSizeInDw = 0u; size_t singlePacketSize = 0u; size_t eventPoolOffset = 0u; + + uint32_t kernelCount = 1u; + bool isTimestampEvent = false; bool usingContextEndOffset = false; }; @@ -180,8 +192,7 @@ struct EventImp : public Event { protected: ze_result_t calculateProfilingData(); - ze_result_t queryStatusKernelTimestamp(); - ze_result_t queryStatusNonTimestamp(); + ze_result_t queryStatusEventPackets(); ze_result_t hostEventSetValue(TagSizeT eventValue); ze_result_t hostEventSetValueTimestamps(TagSizeT eventVal); void assignKernelEventCompletionData(void *address); diff --git a/level_zero/core/source/event/event_impl.inl b/level_zero/core/source/event/event_impl.inl index 636add0a05..0db3224488 100644 --- a/level_zero/core/source/event/event_impl.inl +++ b/level_zero/core/source/event/event_impl.inl @@ -104,33 +104,13 @@ void EventImp::assignKernelEventCompletionData(void *address) { } template -ze_result_t EventImp::queryStatusKernelTimestamp() { +ze_result_t EventImp::queryStatusEventPackets() { assignKernelEventCompletionData(hostAddress); uint32_t queryVal = Event::STATE_CLEARED; for (uint32_t i = 0; i < kernelCount; i++) { uint32_t packetsToCheck = kernelEventCompletionData[i].getPacketsUsed(); for (uint32_t packetId = 0; packetId < packetsToCheck; packetId++) { - bool ready = NEO::WaitUtils::waitFunctionWithPredicate( - static_cast(kernelEventCompletionData[i].getContextEndAddress(packetId)), - queryVal, - std::not_equal_to()); - if (!ready) { - return ZE_RESULT_NOT_READY; - } - } - } - this->csr->getInternalAllocationStorage()->cleanAllocationList(this->csr->peekTaskCount(), NEO::AllocationUsage::TEMPORARY_ALLOCATION); - return ZE_RESULT_SUCCESS; -} - -template -ze_result_t EventImp::queryStatusNonTimestamp() { - assignKernelEventCompletionData(hostAddress); - uint32_t queryVal = Event::STATE_CLEARED; - for (uint32_t i = 0; i < kernelCount; i++) { - uint32_t packetsToCheck = kernelEventCompletionData[i].getPacketsUsed(); - for (uint32_t packetId = 0; packetId < packetsToCheck; packetId++) { - void const *queryAddress = usingContextEndOffset + void const *queryAddress = isUsingContextEndOffset() ? kernelEventCompletionData[i].getContextEndAddress(packetId) : kernelEventCompletionData[i].getContextStartAddress(packetId); bool ready = NEO::WaitUtils::waitFunctionWithPredicate( @@ -156,11 +136,7 @@ ze_result_t EventImp::queryStatus() { *hostAddr = metricStreamer->getNotificationState(); } this->csr->downloadAllocations(); - if (isEventTimestampFlagSet()) { - return queryStatusKernelTimestamp(); - } else { - return queryStatusNonTimestamp(); - } + return queryStatusEventPackets(); } template @@ -274,11 +250,9 @@ ze_result_t EventImp::hostSynchronize(uint64_t timeout) { template ze_result_t EventImp::reset() { - if (isEventTimestampFlagSet()) { - kernelCount = EventPacketsCount::maxKernelSplit; - for (uint32_t i = 0; i < kernelCount; i++) { - kernelEventCompletionData[i].setPacketsUsed(NEO::TimestampPacketSizeControl::preferredPacketCount); - } + kernelCount = EventPacketsCount::maxKernelSplit; + for (uint32_t i = 0; i < kernelCount; i++) { + kernelEventCompletionData[i].setPacketsUsed(NEO::TimestampPacketSizeControl::preferredPacketCount); } hostEventSetValue(Event::STATE_INITIAL); resetPackets(); diff --git a/level_zero/core/test/unit_tests/fixtures/cmdlist_fixture.h b/level_zero/core/test/unit_tests/fixtures/cmdlist_fixture.h index 334437ad22..b4ee257995 100644 --- a/level_zero/core/test/unit_tests/fixtures/cmdlist_fixture.h +++ b/level_zero/core/test/unit_tests/fixtures/cmdlist_fixture.h @@ -8,6 +8,8 @@ #pragma once #include "shared/source/command_container/implicit_scaling.h" +#include "shared/test/common/cmd_parse/gen_cmd_parse.h" +#include "shared/test/common/helpers/unit_test_helper.h" #include "shared/test/common/helpers/variable_backup.h" #include "shared/test/common/test_macros/test.h" @@ -90,5 +92,96 @@ struct MultiTileCommandListFixture : public SingleRootMultiSubDeviceFixture { std::unique_ptr> osLocalMemoryBackup; }; +template +void validateTimestampRegisters(GenCmdList &cmdList, + GenCmdList::iterator &startIt, + uint32_t firstLoadRegisterRegSrcAddress, + uint64_t firstStoreRegMemAddress, + uint32_t secondLoadRegisterRegSrcAddress, + uint64_t secondStoreRegMemAddress, + bool workloadPartition) { + using MI_LOAD_REGISTER_REG = typename FamilyType::MI_LOAD_REGISTER_REG; + using MI_LOAD_REGISTER_IMM = typename FamilyType::MI_LOAD_REGISTER_IMM; + using MI_MATH = typename FamilyType::MI_MATH; + using MI_STORE_REGISTER_MEM = typename FamilyType::MI_STORE_REGISTER_MEM; + + constexpr uint32_t mask = 0xfffffffe; + + auto itor = find(startIt, cmdList.end()); + + { + ASSERT_NE(cmdList.end(), itor); + auto cmdLoadReg = genCmdCast(*itor); + EXPECT_EQ(firstLoadRegisterRegSrcAddress, cmdLoadReg->getSourceRegisterAddress()); + EXPECT_EQ(CS_GPR_R0, cmdLoadReg->getDestinationRegisterAddress()); + } + + itor++; + { + ASSERT_NE(cmdList.end(), itor); + auto cmdLoadImm = genCmdCast(*itor); + EXPECT_EQ(CS_GPR_R1, cmdLoadImm->getRegisterOffset()); + EXPECT_EQ(mask, cmdLoadImm->getDataDword()); + } + + itor++; + { + ASSERT_NE(cmdList.end(), itor); + auto cmdMath = genCmdCast(*itor); + EXPECT_EQ(3u, cmdMath->DW0.BitField.DwordLength); + } + + itor++; + { + ASSERT_NE(cmdList.end(), itor); + auto cmdMem = genCmdCast(*itor); + EXPECT_EQ(CS_GPR_R2, cmdMem->getRegisterAddress()); + EXPECT_EQ(firstStoreRegMemAddress, cmdMem->getMemoryAddress()); + if (workloadPartition) { + EXPECT_TRUE(UnitTestHelper::getWorkloadPartitionForStoreRegisterMemCmd(*cmdMem)); + } else { + EXPECT_FALSE(UnitTestHelper::getWorkloadPartitionForStoreRegisterMemCmd(*cmdMem)); + } + } + + itor++; + { + ASSERT_NE(cmdList.end(), itor); + auto cmdLoadReg = genCmdCast(*itor); + EXPECT_EQ(secondLoadRegisterRegSrcAddress, cmdLoadReg->getSourceRegisterAddress()); + EXPECT_EQ(CS_GPR_R0, cmdLoadReg->getDestinationRegisterAddress()); + } + + itor++; + { + ASSERT_NE(cmdList.end(), itor); + auto cmdLoadImm = genCmdCast(*itor); + EXPECT_EQ(CS_GPR_R1, cmdLoadImm->getRegisterOffset()); + EXPECT_EQ(mask, cmdLoadImm->getDataDword()); + } + + itor++; + { + ASSERT_NE(cmdList.end(), itor); + auto cmdMath = genCmdCast(*itor); + EXPECT_EQ(3u, cmdMath->DW0.BitField.DwordLength); + } + + itor++; + { + ASSERT_NE(cmdList.end(), itor); + auto cmdMem = genCmdCast(*itor); + EXPECT_EQ(CS_GPR_R2, cmdMem->getRegisterAddress()); + EXPECT_EQ(secondStoreRegMemAddress, cmdMem->getMemoryAddress()); + if (workloadPartition) { + EXPECT_TRUE(UnitTestHelper::getWorkloadPartitionForStoreRegisterMemCmd(*cmdMem)); + } else { + EXPECT_FALSE(UnitTestHelper::getWorkloadPartitionForStoreRegisterMemCmd(*cmdMem)); + } + } + itor++; + startIt = itor; +} + } // namespace ult } // namespace L0 diff --git a/level_zero/core/test/unit_tests/mocks/mock_cmdlist.h b/level_zero/core/test/unit_tests/mocks/mock_cmdlist.h index 30d233d01b..a2d467cf43 100644 --- a/level_zero/core/test/unit_tests/mocks/mock_cmdlist.h +++ b/level_zero/core/test/unit_tests/mocks/mock_cmdlist.h @@ -347,5 +347,76 @@ struct MockCommandList : public CommandList { uint8_t *batchBuffer = nullptr; NEO::GraphicsAllocation *mockAllocation = nullptr; }; + +template +class MockAppendMemoryCopy : public CommandListCoreFamily { + public: + using BaseClass = CommandListCoreFamily; + + ADDMETHOD(appendMemoryCopyKernelWithGA, ze_result_t, false, ZE_RESULT_SUCCESS, + (void *dstPtr, NEO::GraphicsAllocation *dstPtrAlloc, + uint64_t dstOffset, void *srcPtr, + NEO::GraphicsAllocation *srcPtrAlloc, + uint64_t srcOffset, uint64_t size, + uint64_t elementSize, Builtin builtin, + ze_event_handle_t hSignalEvent, + bool isStateless), + (dstPtr, dstPtrAlloc, dstOffset, srcPtr, srcPtrAlloc, srcOffset, size, elementSize, builtin, hSignalEvent, isStateless)); + + ADDMETHOD_NOBASE(appendMemoryCopyBlit, ze_result_t, ZE_RESULT_SUCCESS, + (uintptr_t dstPtr, + NEO::GraphicsAllocation *dstPtrAlloc, + uint64_t dstOffset, uintptr_t srcPtr, + NEO::GraphicsAllocation *srcPtrAlloc, + uint64_t srcOffset, + uint64_t size)); + + AlignedAllocationData getAlignedAllocation(L0::Device *device, const void *buffer, uint64_t bufferSize, bool allowHostCopy) override { + return L0::CommandListCoreFamily::getAlignedAllocation(device, buffer, bufferSize, allowHostCopy); + } + + ze_result_t appendMemoryCopyKernel2d(AlignedAllocationData *dstAlignedAllocation, AlignedAllocationData *srcAlignedAllocation, + Builtin builtin, const ze_copy_region_t *dstRegion, + uint32_t dstPitch, size_t dstOffset, + const ze_copy_region_t *srcRegion, uint32_t srcPitch, + size_t srcOffset, ze_event_handle_t hSignalEvent, + uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) override { + srcAlignedPtr = srcAlignedAllocation->alignedAllocationPtr; + dstAlignedPtr = dstAlignedAllocation->alignedAllocationPtr; + return L0::CommandListCoreFamily::appendMemoryCopyKernel2d(dstAlignedAllocation, srcAlignedAllocation, builtin, dstRegion, dstPitch, dstOffset, srcRegion, srcPitch, srcOffset, hSignalEvent, numWaitEvents, phWaitEvents); + } + + ze_result_t appendMemoryCopyKernel3d(AlignedAllocationData *dstAlignedAllocation, AlignedAllocationData *srcAlignedAllocation, + Builtin builtin, const ze_copy_region_t *dstRegion, + uint32_t dstPitch, uint32_t dstSlicePitch, size_t dstOffset, + const ze_copy_region_t *srcRegion, uint32_t srcPitch, + uint32_t srcSlicePitch, size_t srcOffset, + ze_event_handle_t hSignalEvent, uint32_t numWaitEvents, + ze_event_handle_t *phWaitEvents) override { + srcAlignedPtr = srcAlignedAllocation->alignedAllocationPtr; + dstAlignedPtr = dstAlignedAllocation->alignedAllocationPtr; + return L0::CommandListCoreFamily::appendMemoryCopyKernel3d(dstAlignedAllocation, srcAlignedAllocation, builtin, dstRegion, dstPitch, dstSlicePitch, dstOffset, srcRegion, srcPitch, srcSlicePitch, srcOffset, hSignalEvent, numWaitEvents, phWaitEvents); + } + + ze_result_t appendMemoryCopyBlitRegion(NEO::GraphicsAllocation *srcAllocation, + NEO::GraphicsAllocation *dstAllocation, + size_t srcOffset, + size_t dstOffset, + ze_copy_region_t srcRegion, + ze_copy_region_t dstRegion, const Vec3 ©Size, + size_t srcRowPitch, size_t srcSlicePitch, + size_t dstRowPitch, size_t dstSlicePitch, + const Vec3 &srcSize, const Vec3 &dstSize, ze_event_handle_t hSignalEvent, + uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) override { + srcBlitCopyRegionOffset = srcOffset; + dstBlitCopyRegionOffset = dstOffset; + return L0::CommandListCoreFamily::appendMemoryCopyBlitRegion(srcAllocation, dstAllocation, srcOffset, dstOffset, srcRegion, dstRegion, copySize, srcRowPitch, srcSlicePitch, dstRowPitch, dstSlicePitch, srcSize, dstSize, hSignalEvent, numWaitEvents, phWaitEvents); + } + uintptr_t srcAlignedPtr; + uintptr_t dstAlignedPtr; + size_t srcBlitCopyRegionOffset = 0; + size_t dstBlitCopyRegionOffset = 0; +}; + } // namespace ult } // namespace L0 diff --git a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_2.cpp b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_2.cpp index d6cc59e53b..afc4e6aaaf 100644 --- a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_2.cpp +++ b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_2.cpp @@ -501,13 +501,6 @@ HWTEST2_F(CommandListCreate, givenCommandListWhenMemoryCopyWithSignalEventsThenS itor++; itor = find(itor, cmdList.end()); EXPECT_NE(cmdList.end(), itor); - itor++; - itor = find(itor, cmdList.end()); - if (MemorySynchronizationCommands::getDcFlushEnable(true, *defaultHwInfo)) { - EXPECT_NE(cmdList.end(), itor); - } else { - EXPECT_EQ(cmdList.end(), itor); - } } using platformSupport = IsWithinProducts; @@ -540,22 +533,18 @@ HWTEST2_F(CommandListCreate, givenCommandListWhenMemoryCopyWithSignalEventScopeS cmdList, ptrOffset(commandContainer.getCommandStream()->getCpuBase(), 0), commandContainer.getCommandStream()->getUsed())); auto iterator = findAll(cmdList.begin(), cmdList.end()); - bool postSyncFound = false; + uint32_t postSyncFound = 0; ASSERT_NE(0u, iterator.size()); - uint32_t numPCs = 0; for (auto it : iterator) { auto cmd = genCmdCast(*it); - numPCs++; if ((cmd->getPostSyncOperation() == POST_SYNC_OPERATION::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA) && (cmd->getImmediateData() == Event::STATE_SIGNALED) && (cmd->getDcFlushEnable())) { - postSyncFound = true; - break; + postSyncFound++; } } - ASSERT_TRUE(postSyncFound); - EXPECT_EQ(numPCs, iterator.size()); + EXPECT_EQ(1u, postSyncFound); } HWTEST2_F(CommandListCreate, givenCommandListWhenMemoryCopyWithSignalEventScopeSetToSubDeviceThenB2BPipeControlIsAddedWithDcFlushForLastPC, platformSupport) { @@ -585,22 +574,18 @@ HWTEST2_F(CommandListCreate, givenCommandListWhenMemoryCopyWithSignalEventScopeS cmdList, ptrOffset(commandContainer.getCommandStream()->getCpuBase(), 0), commandContainer.getCommandStream()->getUsed())); auto iterator = findAll(cmdList.begin(), cmdList.end()); - bool postSyncFound = false; + uint32_t postSyncFound = 0; ASSERT_NE(0u, iterator.size()); - uint32_t numPCs = 0; for (auto it : iterator) { auto cmd = genCmdCast(*it); - numPCs++; if ((cmd->getPostSyncOperation() == POST_SYNC_OPERATION::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA) && (cmd->getImmediateData() == Event::STATE_SIGNALED) && (!cmd->getDcFlushEnable())) { - postSyncFound = true; - break; + postSyncFound++; } } - ASSERT_TRUE(postSyncFound); - EXPECT_EQ(numPCs, iterator.size() - 1); + EXPECT_EQ(1u, postSyncFound); auto it = *(iterator.end() - 1); auto cmd1 = genCmdCast(*it); diff --git a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_4.cpp b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_4.cpp index f33e6a8cc7..25a3689500 100644 --- a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_4.cpp +++ b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_4.cpp @@ -77,12 +77,19 @@ HWTEST2_F(CommandListCreate, givenCommandListWhenAppendWriteGlobalTimestampCalle ptrOffset(commandContainer.getCommandStream()->getCpuBase(), commandStreamOffset), commandContainer.getCommandStream()->getUsed() - commandStreamOffset)); - auto iterator = find(cmdList.begin(), cmdList.end()); - auto cmd = genCmdCast(*iterator); - EXPECT_TRUE(cmd->getCommandStreamerStallEnable()); - EXPECT_FALSE(cmd->getDcFlushEnable()); - EXPECT_EQ(timestampAddress, NEO::UnitTestHelper::getPipeControlPostSyncAddress(*cmd)); - EXPECT_EQ(POST_SYNC_OPERATION::POST_SYNC_OPERATION_WRITE_TIMESTAMP, cmd->getPostSyncOperation()); + auto pcList = findAll(cmdList.begin(), cmdList.end()); + ASSERT_NE(0u, pcList.size()); + bool foundTimestampPipeControl = false; + for (auto it : pcList) { + auto cmd = genCmdCast(*it); + if (cmd->getPostSyncOperation() == POST_SYNC_OPERATION::POST_SYNC_OPERATION_WRITE_TIMESTAMP) { + EXPECT_TRUE(cmd->getCommandStreamerStallEnable()); + EXPECT_FALSE(cmd->getDcFlushEnable()); + EXPECT_EQ(timestampAddress, NEO::UnitTestHelper::getPipeControlPostSyncAddress(*cmd)); + foundTimestampPipeControl = true; + } + } + EXPECT_TRUE(foundTimestampPipeControl); } HWTEST2_F(CommandListCreate, givenCommandListWhenAppendWriteGlobalTimestampCalledThenTimestampAllocationIsInsideResidencyContainer, IsAtLeastSkl) { diff --git a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_barrier.cpp b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_barrier.cpp index 91d2f75fce..a6ab1c471a 100644 --- a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_barrier.cpp +++ b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_barrier.cpp @@ -7,7 +7,6 @@ #include "shared/source/command_container/command_encoder.h" #include "shared/source/helpers/hw_helper.h" -#include "shared/test/common/cmd_parse/gen_cmd_parse.h" #include "shared/test/common/helpers/unit_test_helper.h" #include "shared/test/common/test_macros/test.h" @@ -382,82 +381,6 @@ HWTEST2_F(MultiTileCommandListAppendBarrier, EXPECT_EQ(1u, postSyncFound); } -template -void validateTimestampRegisters(GenCmdList &cmdList, - uint64_t firstRegisterAddress, uint64_t secondRegisterAddress) { - using MI_LOAD_REGISTER_REG = typename FamilyType::MI_LOAD_REGISTER_REG; - using MI_LOAD_REGISTER_IMM = typename FamilyType::MI_LOAD_REGISTER_IMM; - using MI_MATH = typename FamilyType::MI_MATH; - using MI_STORE_REGISTER_MEM = typename FamilyType::MI_STORE_REGISTER_MEM; - - constexpr uint32_t mask = 0xfffffffe; - - auto itor = find(cmdList.begin(), cmdList.end()); - - { - ASSERT_NE(cmdList.end(), itor); - auto cmdLoadReg = genCmdCast(*itor); - EXPECT_EQ(REG_GLOBAL_TIMESTAMP_LDW, cmdLoadReg->getSourceRegisterAddress()); - EXPECT_EQ(CS_GPR_R0, cmdLoadReg->getDestinationRegisterAddress()); - } - - itor++; - { - ASSERT_NE(cmdList.end(), itor); - auto cmdLoadImm = genCmdCast(*itor); - EXPECT_EQ(CS_GPR_R1, cmdLoadImm->getRegisterOffset()); - EXPECT_EQ(mask, cmdLoadImm->getDataDword()); - } - - itor++; - { - ASSERT_NE(cmdList.end(), itor); - auto cmdMath = genCmdCast(*itor); - EXPECT_EQ(3u, cmdMath->DW0.BitField.DwordLength); - } - - itor++; - { - ASSERT_NE(cmdList.end(), itor); - auto cmdMem = genCmdCast(*itor); - EXPECT_EQ(CS_GPR_R2, cmdMem->getRegisterAddress()); - EXPECT_EQ(firstRegisterAddress, cmdMem->getMemoryAddress()); - EXPECT_TRUE(cmdMem->getWorkloadPartitionIdOffsetEnable()); - } - - itor++; - { - ASSERT_NE(cmdList.end(), itor); - auto cmdLoadReg = genCmdCast(*itor); - EXPECT_EQ(GP_THREAD_TIME_REG_ADDRESS_OFFSET_LOW, cmdLoadReg->getSourceRegisterAddress()); - EXPECT_EQ(CS_GPR_R0, cmdLoadReg->getDestinationRegisterAddress()); - } - - itor++; - { - ASSERT_NE(cmdList.end(), itor); - auto cmdLoadImm = genCmdCast(*itor); - EXPECT_EQ(CS_GPR_R1, cmdLoadImm->getRegisterOffset()); - EXPECT_EQ(mask, cmdLoadImm->getDataDword()); - } - - itor++; - { - ASSERT_NE(cmdList.end(), itor); - auto cmdMath = genCmdCast(*itor); - EXPECT_EQ(3u, cmdMath->DW0.BitField.DwordLength); - } - - itor++; - { - ASSERT_NE(cmdList.end(), itor); - auto cmdMem = genCmdCast(*itor); - EXPECT_EQ(CS_GPR_R2, cmdMem->getRegisterAddress()); - EXPECT_EQ(secondRegisterAddress, cmdMem->getMemoryAddress()); - EXPECT_TRUE(cmdMem->getWorkloadPartitionIdOffsetEnable()); - } -} - HWTEST2_F(MultiTileCommandListAppendBarrier, GivenTimestampEventSignalWhenAppendingMultTileBarrierThenExpectMultiTileBarrierAndTimestampOperations, IsWithinXeGfxFamily) { using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; @@ -533,7 +456,12 @@ HWTEST2_F(MultiTileCommandListAppendBarrier, ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(cmdList, cmdBuffer, timestampRegisters)); - validateTimestampRegisters(cmdList, globalStartAddress, contextStartAddress); + auto begin = cmdList.begin(); + validateTimestampRegisters(cmdList, + begin, + REG_GLOBAL_TIMESTAMP_LDW, globalStartAddress, + GP_THREAD_TIME_REG_ADDRESS_OFFSET_LOW, contextStartAddress, + true); auto gpuBaseAddress = cmdListStream->getGraphicsAllocation()->getGpuAddress() + useSizeBefore + timestampRegisters; @@ -557,7 +485,12 @@ HWTEST2_F(MultiTileCommandListAppendBarrier, ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(cmdList, cmdBuffer, timestampRegisters)); - validateTimestampRegisters(cmdList, globalEndAddress, contextEndAddress); + begin = cmdList.begin(); + validateTimestampRegisters(cmdList, + begin, + REG_GLOBAL_TIMESTAMP_LDW, globalEndAddress, + GP_THREAD_TIME_REG_ADDRESS_OFFSET_LOW, contextEndAddress, + true); } } // namespace ult diff --git a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_memory.cpp b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_memory.cpp index 21055cf601..3664778f42 100644 --- a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_memory.cpp +++ b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_memory.cpp @@ -8,6 +8,7 @@ #include "shared/test/common/cmd_parse/gen_cmd_parse.h" #include "shared/test/common/test_macros/test.h" +#include "level_zero/core/test/unit_tests/fixtures/cmdlist_fixture.h" #include "level_zero/core/test/unit_tests/fixtures/device_fixture.h" #include "level_zero/core/test/unit_tests/mocks/mock_cmdlist.h" #include "level_zero/core/test/unit_tests/mocks/mock_cmdqueue.h" @@ -17,70 +18,6 @@ namespace ult { using AppendMemoryCopy = Test; -template -class MockAppendMemoryCopy : public WhiteBox<::L0::CommandListCoreFamily> { - public: - ADDMETHOD_NOBASE(appendMemoryCopyKernelWithGA, ze_result_t, ZE_RESULT_SUCCESS, - (void *dstPtr, NEO::GraphicsAllocation *dstPtrAlloc, - uint64_t dstOffset, void *srcPtr, - NEO::GraphicsAllocation *srcPtrAlloc, - uint64_t srcOffset, uint64_t size, - uint64_t elementSize, Builtin builtin, - ze_event_handle_t hSignalEvent, - bool isStateless)); - ADDMETHOD_NOBASE(appendMemoryCopyBlit, ze_result_t, ZE_RESULT_SUCCESS, - (uintptr_t dstPtr, - NEO::GraphicsAllocation *dstPtrAlloc, - uint64_t dstOffset, uintptr_t srcPtr, - NEO::GraphicsAllocation *srcPtrAlloc, - uint64_t srcOffset, - uint64_t size)); - AlignedAllocationData getAlignedAllocation(L0::Device *device, const void *buffer, uint64_t bufferSize, bool allowHostCopy) override { - return L0::CommandListCoreFamily::getAlignedAllocation(device, buffer, bufferSize, allowHostCopy); - } - ze_result_t appendMemoryCopyKernel2d(AlignedAllocationData *dstAlignedAllocation, AlignedAllocationData *srcAlignedAllocation, - Builtin builtin, const ze_copy_region_t *dstRegion, - uint32_t dstPitch, size_t dstOffset, - const ze_copy_region_t *srcRegion, uint32_t srcPitch, - size_t srcOffset, ze_event_handle_t hSignalEvent, - uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) override { - srcAlignedPtr = srcAlignedAllocation->alignedAllocationPtr; - dstAlignedPtr = dstAlignedAllocation->alignedAllocationPtr; - return L0::CommandListCoreFamily::appendMemoryCopyKernel2d(dstAlignedAllocation, srcAlignedAllocation, builtin, dstRegion, dstPitch, dstOffset, srcRegion, srcPitch, srcOffset, hSignalEvent, numWaitEvents, phWaitEvents); - } - - ze_result_t appendMemoryCopyKernel3d(AlignedAllocationData *dstAlignedAllocation, AlignedAllocationData *srcAlignedAllocation, - Builtin builtin, const ze_copy_region_t *dstRegion, - uint32_t dstPitch, uint32_t dstSlicePitch, size_t dstOffset, - const ze_copy_region_t *srcRegion, uint32_t srcPitch, - uint32_t srcSlicePitch, size_t srcOffset, - ze_event_handle_t hSignalEvent, uint32_t numWaitEvents, - ze_event_handle_t *phWaitEvents) override { - srcAlignedPtr = srcAlignedAllocation->alignedAllocationPtr; - dstAlignedPtr = dstAlignedAllocation->alignedAllocationPtr; - return L0::CommandListCoreFamily::appendMemoryCopyKernel3d(dstAlignedAllocation, srcAlignedAllocation, builtin, dstRegion, dstPitch, dstSlicePitch, dstOffset, srcRegion, srcPitch, srcSlicePitch, srcOffset, hSignalEvent, numWaitEvents, phWaitEvents); - } - - ze_result_t appendMemoryCopyBlitRegion(NEO::GraphicsAllocation *srcAllocation, - NEO::GraphicsAllocation *dstAllocation, - size_t srcOffset, - size_t dstOffset, - ze_copy_region_t srcRegion, - ze_copy_region_t dstRegion, const Vec3 ©Size, - size_t srcRowPitch, size_t srcSlicePitch, - size_t dstRowPitch, size_t dstSlicePitch, - const Vec3 &srcSize, const Vec3 &dstSize, ze_event_handle_t hSignalEvent, - uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) override { - srcBlitCopyRegionOffset = srcOffset; - dstBlitCopyRegionOffset = dstOffset; - return L0::CommandListCoreFamily::appendMemoryCopyBlitRegion(srcAllocation, dstAllocation, srcOffset, dstOffset, srcRegion, dstRegion, copySize, srcRowPitch, srcSlicePitch, dstRowPitch, dstSlicePitch, srcSize, dstSize, hSignalEvent, numWaitEvents, phWaitEvents); - } - uintptr_t srcAlignedPtr; - uintptr_t dstAlignedPtr; - size_t srcBlitCopyRegionOffset = 0; - size_t dstBlitCopyRegionOffset = 0; -}; - HWTEST2_F(AppendMemoryCopy, givenCommandListAndHostPointersWhenMemoryCopyRegionCalledThenTwoNewAllocationAreAddedToHostMapPtr, IsAtLeastSkl) { MockAppendMemoryCopy cmdList; cmdList.initialize(device, NEO::EngineGroupType::RenderCompute, 0u); @@ -343,12 +280,14 @@ HWTEST2_F(AppendMemoryCopy, givenCopyCommandListWhenTimestampPassedToMemoryCopyT } using SupportedPlatforms = IsWithinProducts; -HWTEST2_F(AppendMemoryCopy, givenCommandListWhenTimestampPassedToMemoryCopyThenAppendProfilingCalledOnceBeforeAndAfterCommand, SupportedPlatforms) { +HWTEST2_F(AppendMemoryCopy, + givenCommandListUsesTimestampPassedToMemoryCopyWhenTwoKernelsAreUsedThenAppendProfilingCalledForSinglePacket, SupportedPlatforms) { using GfxFamily = typename NEO::GfxFamilyMapper::GfxFamily; - using MI_LOAD_REGISTER_REG = typename GfxFamily::MI_LOAD_REGISTER_REG; - using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; + using GPGPU_WALKER = typename GfxFamily::GPGPU_WALKER; MockAppendMemoryCopy commandList; + commandList.appendMemoryCopyKernelWithGACallBase = true; + commandList.initialize(device, NEO::EngineGroupType::RenderCompute, 0u); void *srcPtr = reinterpret_cast(0x1234); void *dstPtr = reinterpret_cast(0x2345); @@ -365,65 +304,97 @@ HWTEST2_F(AppendMemoryCopy, givenCommandListWhenTimestampPassedToMemoryCopyThenA EXPECT_EQ(ZE_RESULT_SUCCESS, result); auto event = std::unique_ptr(L0::Event::create(eventPool.get(), &eventDesc, device)); + uint64_t globalStartAddress = event->getGpuAddress(device) + event->getGlobalStartOffset(); + uint64_t contextStartAddress = event->getGpuAddress(device) + event->getContextStartOffset(); + uint64_t globalEndAddress = event->getGpuAddress(device) + event->getGlobalEndOffset(); + uint64_t contextEndAddress = event->getGpuAddress(device) + event->getContextEndOffset(); + commandList.appendMemoryCopy(dstPtr, srcPtr, 0x100, event->toHandle(), 0, nullptr); - EXPECT_GT(commandList.appendMemoryCopyKernelWithGACalled, 0u); - EXPECT_EQ(commandList.appendMemoryCopyBlitCalled, 0u); + EXPECT_EQ(2u, commandList.appendMemoryCopyKernelWithGACalled); + EXPECT_EQ(0u, commandList.appendMemoryCopyBlitCalled); EXPECT_EQ(1u, event->getPacketsInUse()); + EXPECT_EQ(1u, event->getKernelCount()); GenCmdList cmdList; ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer( cmdList, ptrOffset(commandList.commandContainer.getCommandStream()->getCpuBase(), 0), commandList.commandContainer.getCommandStream()->getUsed())); - auto itor = find(cmdList.begin(), cmdList.end()); - EXPECT_NE(cmdList.end(), itor); - { - auto cmd = genCmdCast(*itor); - EXPECT_EQ(cmd->getSourceRegisterAddress(), REG_GLOBAL_TIMESTAMP_LDW); - } - itor++; - itor = find(itor, cmdList.end()); - EXPECT_NE(cmdList.end(), itor); - { - auto cmd = genCmdCast(*itor); - EXPECT_EQ(cmd->getSourceRegisterAddress(), GP_THREAD_TIME_REG_ADDRESS_OFFSET_LOW); - } + auto itorWalkers = findAll(cmdList.begin(), cmdList.end()); + auto begin = cmdList.begin(); + ASSERT_EQ(2u, itorWalkers.size()); + auto secondWalker = itorWalkers[1]; - itor++; - itor = find(itor, cmdList.end()); - EXPECT_NE(cmdList.end(), itor); - { - auto cmd = genCmdCast(*itor); - EXPECT_FALSE(cmd->getDcFlushEnable()); - } + validateTimestampRegisters(cmdList, + begin, + REG_GLOBAL_TIMESTAMP_LDW, globalStartAddress, + GP_THREAD_TIME_REG_ADDRESS_OFFSET_LOW, contextStartAddress, + false); - itor++; - itor = find(itor, cmdList.end()); - EXPECT_NE(cmdList.end(), itor); - { - auto cmd = genCmdCast(*itor); - EXPECT_EQ(cmd->getSourceRegisterAddress(), REG_GLOBAL_TIMESTAMP_LDW); - } - - itor++; - itor = find(itor, cmdList.end()); - EXPECT_NE(cmdList.end(), itor); - { - auto cmd = genCmdCast(*itor); - EXPECT_EQ(cmd->getSourceRegisterAddress(), GP_THREAD_TIME_REG_ADDRESS_OFFSET_LOW); - } - - auto temp = itor; - auto numPCs = findAll(temp, cmdList.end()); - //we should have only one PC with dcFlush added - ASSERT_EQ(1u, numPCs.size()); - - itor = find(itor, cmdList.end()); - EXPECT_NE(cmdList.end(), itor); - { - auto cmd = genCmdCast(*itor); - EXPECT_EQ(MemorySynchronizationCommands::getDcFlushEnable(true, *defaultHwInfo), cmd->getDcFlushEnable()); - } + validateTimestampRegisters(cmdList, + secondWalker, + REG_GLOBAL_TIMESTAMP_LDW, globalEndAddress, + GP_THREAD_TIME_REG_ADDRESS_OFFSET_LOW, contextEndAddress, + false); } + +HWTEST2_F(AppendMemoryCopy, + givenCommandListUsesTimestampPassedToMemoryCopyWhenThreeKernelsAreUsedThenAppendProfilingCalledForSinglePacket, SupportedPlatforms) { + using GfxFamily = typename NEO::GfxFamilyMapper::GfxFamily; + using GPGPU_WALKER = typename GfxFamily::GPGPU_WALKER; + + MockAppendMemoryCopy commandList; + commandList.appendMemoryCopyKernelWithGACallBase = true; + + commandList.initialize(device, NEO::EngineGroupType::RenderCompute, 0u); + void *srcPtr = reinterpret_cast(0x1231); + void *dstPtr = reinterpret_cast(0x200002345); + + ze_event_pool_desc_t eventPoolDesc = {}; + eventPoolDesc.count = 1; + eventPoolDesc.flags = ZE_EVENT_POOL_FLAG_KERNEL_TIMESTAMP; + + ze_event_desc_t eventDesc = {}; + eventDesc.index = 0; + + ze_result_t result = ZE_RESULT_SUCCESS; + auto eventPool = std::unique_ptr(L0::EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, result)); + EXPECT_EQ(ZE_RESULT_SUCCESS, result); + auto event = std::unique_ptr(L0::Event::create(eventPool.get(), &eventDesc, device)); + + uint64_t globalStartAddress = event->getGpuAddress(device) + event->getGlobalStartOffset(); + uint64_t contextStartAddress = event->getGpuAddress(device) + event->getContextStartOffset(); + uint64_t globalEndAddress = event->getGpuAddress(device) + event->getGlobalEndOffset(); + uint64_t contextEndAddress = event->getGpuAddress(device) + event->getContextEndOffset(); + + commandList.appendMemoryCopy(dstPtr, srcPtr, 0x100002345, event->toHandle(), 0, nullptr); + EXPECT_EQ(3u, commandList.appendMemoryCopyKernelWithGACalled); + EXPECT_EQ(0u, commandList.appendMemoryCopyBlitCalled); + EXPECT_EQ(1u, event->getPacketsInUse()); + EXPECT_EQ(1u, event->getKernelCount()); + + GenCmdList cmdList; + ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer( + cmdList, ptrOffset(commandList.commandContainer.getCommandStream()->getCpuBase(), 0), + commandList.commandContainer.getCommandStream()->getUsed())); + + auto itorWalkers = findAll(cmdList.begin(), cmdList.end()); + auto begin = cmdList.begin(); + ASSERT_EQ(3u, itorWalkers.size()); + auto thirdWalker = itorWalkers[2]; + + validateTimestampRegisters(cmdList, + begin, + REG_GLOBAL_TIMESTAMP_LDW, globalStartAddress, + GP_THREAD_TIME_REG_ADDRESS_OFFSET_LOW, contextStartAddress, + false); + + validateTimestampRegisters(cmdList, + thirdWalker, + REG_GLOBAL_TIMESTAMP_LDW, globalEndAddress, + GP_THREAD_TIME_REG_ADDRESS_OFFSET_LOW, contextEndAddress, + false); +} + } // namespace ult } // namespace L0 diff --git a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_signal_event.cpp b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_signal_event.cpp index 31fd9a7595..eb835233ec 100644 --- a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_signal_event.cpp +++ b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_signal_event.cpp @@ -355,5 +355,79 @@ HWTEST2_F(CommandListAppendSignalEvent, EXPECT_EQ(1u, postSyncFound); } +HWTEST2_F(CommandListAppendSignalEvent, + givenMultiTileCommandListWhenAppendWriteGlobalTimestampCalledWithSignalEventThenWorkPartitionedRegistersAreUsed, IsAtLeastXeHpCore) { + using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; + using POST_SYNC_OPERATION = typename PIPE_CONTROL::POST_SYNC_OPERATION; + auto &commandContainer = commandList->commandContainer; + + uint64_t timestampAddress = 0x12345678555500; + uint64_t *dstptr = reinterpret_cast(timestampAddress); + + constexpr uint32_t packets = 2u; + + event->setEventTimestampFlag(true); + commandList->partitionCount = packets; + + commandList->appendWriteGlobalTimestamp(dstptr, event->toHandle(), 0, nullptr); + EXPECT_EQ(packets, event->getPacketsInUse()); + + auto eventGpuAddress = event->getGpuAddress(device); + uint64_t contextStartAddress = eventGpuAddress + event->getContextStartOffset(); + uint64_t globalStartAddress = eventGpuAddress + event->getGlobalStartOffset(); + uint64_t contextEndAddress = eventGpuAddress + event->getContextEndOffset(); + uint64_t globalEndAddress = eventGpuAddress + event->getGlobalEndOffset(); + + GenCmdList cmdList; + ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer( + cmdList, ptrOffset(commandContainer.getCommandStream()->getCpuBase(), 0), commandContainer.getCommandStream()->getUsed())); + + auto itorPC = find(cmdList.begin(), cmdList.end()); + EXPECT_NE(cmdList.end(), itorPC); + auto cmd = genCmdCast(*itorPC); + while (cmd->getPostSyncOperation() != POST_SYNC_OPERATION::POST_SYNC_OPERATION_WRITE_TIMESTAMP) { + itorPC++; + itorPC = find(itorPC, cmdList.end()); + EXPECT_NE(cmdList.end(), itorPC); + cmd = genCmdCast(*itorPC); + } + EXPECT_TRUE(cmd->getCommandStreamerStallEnable()); + EXPECT_FALSE(cmd->getDcFlushEnable()); + EXPECT_EQ(timestampAddress, NEO::UnitTestHelper::getPipeControlPostSyncAddress(*cmd)); + + auto startCmdList = cmdList.begin(); + validateTimestampRegisters(cmdList, + startCmdList, + REG_GLOBAL_TIMESTAMP_LDW, globalStartAddress, + GP_THREAD_TIME_REG_ADDRESS_OFFSET_LOW, contextStartAddress, + true); + + if (UnitTestHelper::timestampRegisterHighAddress()) { + uint64_t globalStartAddressHigh = globalStartAddress + sizeof(uint32_t); + uint64_t contextStartAddressHigh = contextStartAddress + sizeof(uint32_t); + validateTimestampRegisters(cmdList, + startCmdList, + REG_GLOBAL_TIMESTAMP_UN, globalStartAddressHigh, + 0x23AC, contextStartAddressHigh, + true); + } + + validateTimestampRegisters(cmdList, + startCmdList, + REG_GLOBAL_TIMESTAMP_LDW, globalEndAddress, + GP_THREAD_TIME_REG_ADDRESS_OFFSET_LOW, contextEndAddress, + true); + + if (UnitTestHelper::timestampRegisterHighAddress()) { + uint64_t globalEndAddressHigh = globalEndAddress + sizeof(uint32_t); + uint64_t contextEndAddressHigh = contextEndAddress + sizeof(uint32_t); + validateTimestampRegisters(cmdList, + startCmdList, + REG_GLOBAL_TIMESTAMP_UN, globalEndAddressHigh, + 0x23AC, contextEndAddressHigh, + true); + } +} + } // namespace ult } // namespace L0 diff --git a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_wait_on_events.cpp b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_wait_on_events.cpp index 570642b23a..bdb20d0775 100644 --- a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_wait_on_events.cpp +++ b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_wait_on_events.cpp @@ -210,9 +210,9 @@ HWTEST_F(CommandListAppendWaitOnEvent, WhenAppendingWaitOnTimestampEventWithThre auto event = std::unique_ptr(L0::Event::create(eventPool.get(), &eventDesc, device)); event->setPacketsInUse(3u); - event->kernelCount = 2; + event->increaseKernelCount(); event->setPacketsInUse(3u); - event->kernelCount = 3; + event->increaseKernelCount(); event->setPacketsInUse(3u); ASSERT_EQ(9u, event->getPacketsInUse()); diff --git a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_fill.cpp b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_fill.cpp index 07f07d713a..de8ee73e2d 100644 --- a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_fill.cpp +++ b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_fill.cpp @@ -6,11 +6,13 @@ */ #include "shared/source/memory_manager/memory_manager.h" +#include "shared/test/common/cmd_parse/gen_cmd_parse.h" #include "shared/test/common/mocks/mock_graphics_allocation.h" #include "shared/test/common/test_macros/test.h" #include "level_zero/core/source/builtin/builtin_functions_lib_impl.h" #include "level_zero/core/source/kernel/kernel_imp.h" +#include "level_zero/core/test/unit_tests/fixtures/cmdlist_fixture.h" #include "level_zero/core/test/unit_tests/fixtures/device_fixture.h" #include "level_zero/core/test/unit_tests/mocks/mock_built_ins.h" #include "level_zero/core/test/unit_tests/mocks/mock_cmdlist.h" @@ -210,5 +212,217 @@ HWTEST2_F(AppendFillTest, delete[] nonMultipleDstPtr; } +using IsBetweenGen9AndGen12lp = IsWithinGfxCore; + +HWTEST2_F(AppendFillTest, + givenCallToAppendMemoryFillWithImmediateValueWhenTimestampEventUsesRegistersThenSinglePacketUsesRegisterProfiling, IsBetweenGen9AndGen12lp) { + using GfxFamily = typename NEO::GfxFamilyMapper::GfxFamily; + using GPGPU_WALKER = typename GfxFamily::GPGPU_WALKER; + + ze_event_pool_desc_t eventPoolDesc = {}; + eventPoolDesc.count = 1; + eventPoolDesc.flags = ZE_EVENT_POOL_FLAG_KERNEL_TIMESTAMP; + + ze_event_desc_t eventDesc = {}; + eventDesc.index = 0; + + ze_result_t result = ZE_RESULT_SUCCESS; + auto eventPool = std::unique_ptr(L0::EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, result)); + EXPECT_EQ(ZE_RESULT_SUCCESS, result); + auto event = std::unique_ptr(L0::Event::create(eventPool.get(), &eventDesc, device)); + + uint64_t globalStartAddress = event->getGpuAddress(device) + event->getGlobalStartOffset(); + uint64_t contextStartAddress = event->getGpuAddress(device) + event->getContextStartOffset(); + uint64_t globalEndAddress = event->getGpuAddress(device) + event->getGlobalEndOffset(); + uint64_t contextEndAddress = event->getGpuAddress(device) + event->getContextEndOffset(); + + auto commandList = std::make_unique>>(); + commandList->initialize(device, NEO::EngineGroupType::RenderCompute, 0u); + + result = commandList->appendMemoryFill(immediateDstPtr, &immediatePattern, + sizeof(immediatePattern), + immediateAllocSize, event->toHandle(), 0, nullptr); + EXPECT_EQ(ZE_RESULT_SUCCESS, result); + + EXPECT_EQ(1u, event->getPacketsInUse()); + EXPECT_EQ(1u, event->getKernelCount()); + + GenCmdList cmdList; + ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer( + cmdList, ptrOffset(commandList->commandContainer.getCommandStream()->getCpuBase(), 0), + commandList->commandContainer.getCommandStream()->getUsed())); + + auto itorWalkers = findAll(cmdList.begin(), cmdList.end()); + auto begin = cmdList.begin(); + ASSERT_EQ(2u, itorWalkers.size()); + auto secondWalker = itorWalkers[1]; + + validateTimestampRegisters(cmdList, + begin, + REG_GLOBAL_TIMESTAMP_LDW, globalStartAddress, + GP_THREAD_TIME_REG_ADDRESS_OFFSET_LOW, contextStartAddress, + false); + + validateTimestampRegisters(cmdList, + secondWalker, + REG_GLOBAL_TIMESTAMP_LDW, globalEndAddress, + GP_THREAD_TIME_REG_ADDRESS_OFFSET_LOW, contextEndAddress, + false); +} + +HWTEST2_F(AppendFillTest, + givenCallToAppendMemoryFillWhenTimestampEventUsesRegistersThenSinglePacketUsesRegisterProfiling, IsBetweenGen9AndGen12lp) { + using GfxFamily = typename NEO::GfxFamilyMapper::GfxFamily; + using GPGPU_WALKER = typename GfxFamily::GPGPU_WALKER; + + ze_event_pool_desc_t eventPoolDesc = {}; + eventPoolDesc.count = 1; + eventPoolDesc.flags = ZE_EVENT_POOL_FLAG_KERNEL_TIMESTAMP; + + ze_event_desc_t eventDesc = {}; + eventDesc.index = 0; + + ze_result_t result = ZE_RESULT_SUCCESS; + auto eventPool = std::unique_ptr(L0::EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, result)); + EXPECT_EQ(ZE_RESULT_SUCCESS, result); + auto event = std::unique_ptr(L0::Event::create(eventPool.get(), &eventDesc, device)); + + uint64_t globalStartAddress = event->getGpuAddress(device) + event->getGlobalStartOffset(); + uint64_t contextStartAddress = event->getGpuAddress(device) + event->getContextStartOffset(); + uint64_t globalEndAddress = event->getGpuAddress(device) + event->getGlobalEndOffset(); + uint64_t contextEndAddress = event->getGpuAddress(device) + event->getContextEndOffset(); + + auto commandList = std::make_unique>>(); + commandList->initialize(device, NEO::EngineGroupType::RenderCompute, 0u); + + result = commandList->appendMemoryFill(dstPtr, pattern, patternSize, allocSize, event->toHandle(), 0, nullptr); + EXPECT_EQ(ZE_RESULT_SUCCESS, result); + + EXPECT_EQ(1u, event->getPacketsInUse()); + EXPECT_EQ(1u, event->getKernelCount()); + + GenCmdList cmdList; + ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer( + cmdList, ptrOffset(commandList->commandContainer.getCommandStream()->getCpuBase(), 0), + commandList->commandContainer.getCommandStream()->getUsed())); + + auto itorWalkers = findAll(cmdList.begin(), cmdList.end()); + auto begin = cmdList.begin(); + ASSERT_EQ(2u, itorWalkers.size()); + auto secondWalker = itorWalkers[1]; + + validateTimestampRegisters(cmdList, + begin, + REG_GLOBAL_TIMESTAMP_LDW, globalStartAddress, + GP_THREAD_TIME_REG_ADDRESS_OFFSET_LOW, contextStartAddress, + false); + + validateTimestampRegisters(cmdList, + secondWalker, + REG_GLOBAL_TIMESTAMP_LDW, globalEndAddress, + GP_THREAD_TIME_REG_ADDRESS_OFFSET_LOW, contextEndAddress, + false); +} + +HWTEST2_F(AppendFillTest, + givenCallToAppendMemoryFillWithImmediateValueWhenTimestampEventUsesComputeWalkerPostSyncThenSeparateKernelsUsesPostSyncProfiling, IsAtLeastXeHpCore) { + using GfxFamily = typename NEO::GfxFamilyMapper::GfxFamily; + using COMPUTE_WALKER = typename GfxFamily::COMPUTE_WALKER; + using POSTSYNC_DATA = typename FamilyType::POSTSYNC_DATA; + + ze_event_pool_desc_t eventPoolDesc = {}; + eventPoolDesc.count = 1; + eventPoolDesc.flags = ZE_EVENT_POOL_FLAG_KERNEL_TIMESTAMP; + + ze_event_desc_t eventDesc = {}; + eventDesc.index = 0; + + ze_result_t result = ZE_RESULT_SUCCESS; + auto eventPool = std::unique_ptr(L0::EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, result)); + EXPECT_EQ(ZE_RESULT_SUCCESS, result); + auto event = std::unique_ptr(L0::Event::create(eventPool.get(), &eventDesc, device)); + + uint64_t firstKernelEventAddress = event->getGpuAddress(device); + uint64_t secondKernelEventAddress = event->getGpuAddress(device) + event->getSinglePacketSize(); + + auto commandList = std::make_unique>>(); + commandList->initialize(device, NEO::EngineGroupType::RenderCompute, 0u); + + result = commandList->appendMemoryFill(immediateDstPtr, &immediatePattern, + sizeof(immediatePattern), + immediateAllocSize, event->toHandle(), 0, nullptr); + EXPECT_EQ(ZE_RESULT_SUCCESS, result); + + EXPECT_EQ(2u, event->getPacketsInUse()); + EXPECT_EQ(2u, event->getKernelCount()); + + GenCmdList cmdList; + ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer( + cmdList, ptrOffset(commandList->commandContainer.getCommandStream()->getCpuBase(), 0), + commandList->commandContainer.getCommandStream()->getUsed())); + + auto itorWalkers = findAll(cmdList.begin(), cmdList.end()); + ASSERT_EQ(2u, itorWalkers.size()); + auto firstWalker = itorWalkers[0]; + auto secondWalker = itorWalkers[1]; + + auto walkerCmd = genCmdCast(*firstWalker); + EXPECT_EQ(POSTSYNC_DATA::OPERATION::OPERATION_WRITE_TIMESTAMP, walkerCmd->getPostSync().getOperation()); + EXPECT_EQ(firstKernelEventAddress, walkerCmd->getPostSync().getDestinationAddress()); + + walkerCmd = genCmdCast(*secondWalker); + EXPECT_EQ(POSTSYNC_DATA::OPERATION::OPERATION_WRITE_TIMESTAMP, walkerCmd->getPostSync().getOperation()); + EXPECT_EQ(secondKernelEventAddress, walkerCmd->getPostSync().getDestinationAddress()); +} + +HWTEST2_F(AppendFillTest, + givenCallToAppendMemoryFillWhenTimestampEventUsesComputeWalkerPostSyncThenSeparateKernelsUsesPostSyncProfiling, IsAtLeastXeHpCore) { + using GfxFamily = typename NEO::GfxFamilyMapper::GfxFamily; + using COMPUTE_WALKER = typename GfxFamily::COMPUTE_WALKER; + using POSTSYNC_DATA = typename FamilyType::POSTSYNC_DATA; + + ze_event_pool_desc_t eventPoolDesc = {}; + eventPoolDesc.count = 1; + eventPoolDesc.flags = ZE_EVENT_POOL_FLAG_KERNEL_TIMESTAMP; + + ze_event_desc_t eventDesc = {}; + eventDesc.index = 0; + + ze_result_t result = ZE_RESULT_SUCCESS; + auto eventPool = std::unique_ptr(L0::EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, result)); + EXPECT_EQ(ZE_RESULT_SUCCESS, result); + auto event = std::unique_ptr(L0::Event::create(eventPool.get(), &eventDesc, device)); + + uint64_t firstKernelEventAddress = event->getGpuAddress(device); + uint64_t secondKernelEventAddress = event->getGpuAddress(device) + event->getSinglePacketSize(); + + auto commandList = std::make_unique>>(); + commandList->initialize(device, NEO::EngineGroupType::RenderCompute, 0u); + + result = commandList->appendMemoryFill(dstPtr, pattern, patternSize, allocSize, event->toHandle(), 0, nullptr); + EXPECT_EQ(ZE_RESULT_SUCCESS, result); + + EXPECT_EQ(2u, event->getPacketsInUse()); + EXPECT_EQ(2u, event->getKernelCount()); + + GenCmdList cmdList; + ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer( + cmdList, ptrOffset(commandList->commandContainer.getCommandStream()->getCpuBase(), 0), + commandList->commandContainer.getCommandStream()->getUsed())); + + auto itorWalkers = findAll(cmdList.begin(), cmdList.end()); + ASSERT_EQ(2u, itorWalkers.size()); + auto firstWalker = itorWalkers[0]; + auto secondWalker = itorWalkers[1]; + + auto walkerCmd = genCmdCast(*firstWalker); + EXPECT_EQ(POSTSYNC_DATA::OPERATION::OPERATION_WRITE_TIMESTAMP, walkerCmd->getPostSync().getOperation()); + EXPECT_EQ(firstKernelEventAddress, walkerCmd->getPostSync().getDestinationAddress()); + + walkerCmd = genCmdCast(*secondWalker); + EXPECT_EQ(POSTSYNC_DATA::OPERATION::OPERATION_WRITE_TIMESTAMP, walkerCmd->getPostSync().getOperation()); + EXPECT_EQ(secondKernelEventAddress, walkerCmd->getPostSync().getDestinationAddress()); +} + } // namespace ult } // namespace L0 diff --git a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_xehp_and_later.cpp b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_xehp_and_later.cpp index 20426be0d2..88b7cf0958 100644 --- a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_xehp_and_later.cpp +++ b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_xehp_and_later.cpp @@ -278,5 +278,295 @@ HWTEST2_F(CommandListAppendLaunchKernel, givenVariousKernelsAndPatchingDisallowe pCommandList->reset(); } +using AppendMemoryCopyXeHpAndLater = Test; + +HWTEST2_F(AppendMemoryCopyXeHpAndLater, + givenCommandListWhenTimestampProvidedByComputeWalkerPostSyncPassedToMemoryCopyThenAppendProfilingCalledForThreeSeparateKernels, + IsAtLeastXeHpCore) { + using GfxFamily = typename NEO::GfxFamilyMapper::GfxFamily; + using COMPUTE_WALKER = typename GfxFamily::COMPUTE_WALKER; + using POSTSYNC_DATA = typename FamilyType::POSTSYNC_DATA; + + MockAppendMemoryCopy commandList; + commandList.appendMemoryCopyKernelWithGACallBase = true; + + commandList.initialize(device, NEO::EngineGroupType::RenderCompute, 0u); + void *srcPtr = reinterpret_cast(0x1231); + void *dstPtr = reinterpret_cast(0x200002345); + + ze_event_pool_desc_t eventPoolDesc = {}; + eventPoolDesc.count = 1; + eventPoolDesc.flags = ZE_EVENT_POOL_FLAG_KERNEL_TIMESTAMP; + + ze_event_desc_t eventDesc = {}; + eventDesc.index = 0; + + ze_result_t result = ZE_RESULT_SUCCESS; + auto eventPool = std::unique_ptr(L0::EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, result)); + EXPECT_EQ(ZE_RESULT_SUCCESS, result); + auto event = std::unique_ptr(L0::Event::create(eventPool.get(), &eventDesc, device)); + + uint64_t firstKernelEventAddress = event->getGpuAddress(device); + uint64_t secondKernelEventAddress = event->getGpuAddress(device) + event->getSinglePacketSize(); + uint64_t thirdKernelEventAddress = event->getGpuAddress(device) + 2 * event->getSinglePacketSize(); + + commandList.appendMemoryCopy(dstPtr, srcPtr, 0x100002345, event->toHandle(), 0, nullptr); + EXPECT_EQ(3u, commandList.appendMemoryCopyKernelWithGACalled); + EXPECT_EQ(0u, commandList.appendMemoryCopyBlitCalled); + EXPECT_EQ(3u, event->getPacketsInUse()); + EXPECT_EQ(3u, event->getKernelCount()); + + GenCmdList cmdList; + ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer( + cmdList, ptrOffset(commandList.commandContainer.getCommandStream()->getCpuBase(), 0), + commandList.commandContainer.getCommandStream()->getUsed())); + + auto itorWalkers = findAll(cmdList.begin(), cmdList.end()); + ASSERT_EQ(3u, itorWalkers.size()); + auto firstWalker = itorWalkers[0]; + auto secondWalker = itorWalkers[1]; + auto thirdWalker = itorWalkers[2]; + + auto walkerCmd = genCmdCast(*firstWalker); + EXPECT_EQ(POSTSYNC_DATA::OPERATION::OPERATION_WRITE_TIMESTAMP, walkerCmd->getPostSync().getOperation()); + EXPECT_EQ(firstKernelEventAddress, walkerCmd->getPostSync().getDestinationAddress()); + + walkerCmd = genCmdCast(*secondWalker); + EXPECT_EQ(POSTSYNC_DATA::OPERATION::OPERATION_WRITE_TIMESTAMP, walkerCmd->getPostSync().getOperation()); + EXPECT_EQ(secondKernelEventAddress, walkerCmd->getPostSync().getDestinationAddress()); + + walkerCmd = genCmdCast(*thirdWalker); + EXPECT_EQ(POSTSYNC_DATA::OPERATION::OPERATION_WRITE_TIMESTAMP, walkerCmd->getPostSync().getOperation()); + EXPECT_EQ(thirdKernelEventAddress, walkerCmd->getPostSync().getDestinationAddress()); +} + +HWTEST2_F(AppendMemoryCopyXeHpAndLater, + givenMultiTileCommandListWhenTimestampProvidedByComputeWalkerPostSyncPassedToMemoryCopyThenAppendProfilingCalledForThreeSeparateMultiTileKernels, + IsAtLeastXeHpCore) { + using GfxFamily = typename NEO::GfxFamilyMapper::GfxFamily; + using COMPUTE_WALKER = typename GfxFamily::COMPUTE_WALKER; + using POSTSYNC_DATA = typename FamilyType::POSTSYNC_DATA; + + MockAppendMemoryCopy commandList; + commandList.appendMemoryCopyKernelWithGACallBase = true; + commandList.partitionCount = 2; + + commandList.initialize(device, NEO::EngineGroupType::RenderCompute, 0u); + void *srcPtr = reinterpret_cast(0x1231); + void *dstPtr = reinterpret_cast(0x200002345); + + ze_event_pool_desc_t eventPoolDesc = {}; + eventPoolDesc.count = 1; + eventPoolDesc.flags = ZE_EVENT_POOL_FLAG_KERNEL_TIMESTAMP; + + ze_event_desc_t eventDesc = {}; + eventDesc.index = 0; + + ze_result_t result = ZE_RESULT_SUCCESS; + auto eventPool = std::unique_ptr(L0::EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, result)); + EXPECT_EQ(ZE_RESULT_SUCCESS, result); + auto event = std::unique_ptr(L0::Event::create(eventPool.get(), &eventDesc, device)); + + uint64_t firstKernelEventAddress = event->getGpuAddress(device); + uint64_t secondKernelEventAddress = event->getGpuAddress(device) + 2 * event->getSinglePacketSize(); + uint64_t thirdKernelEventAddress = event->getGpuAddress(device) + 4 * event->getSinglePacketSize(); + + commandList.appendMemoryCopy(dstPtr, srcPtr, 0x100002345, event->toHandle(), 0, nullptr); + EXPECT_EQ(3u, commandList.appendMemoryCopyKernelWithGACalled); + EXPECT_EQ(0u, commandList.appendMemoryCopyBlitCalled); + EXPECT_EQ(6u, event->getPacketsInUse()); + EXPECT_EQ(3u, event->getKernelCount()); + + GenCmdList cmdList; + ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer( + cmdList, ptrOffset(commandList.commandContainer.getCommandStream()->getCpuBase(), 0), + commandList.commandContainer.getCommandStream()->getUsed())); + + auto itorWalkers = findAll(cmdList.begin(), cmdList.end()); + ASSERT_EQ(3u, itorWalkers.size()); + auto firstWalker = itorWalkers[0]; + auto secondWalker = itorWalkers[1]; + auto thirdWalker = itorWalkers[2]; + + auto walkerCmd = genCmdCast(*firstWalker); + EXPECT_EQ(POSTSYNC_DATA::OPERATION::OPERATION_WRITE_TIMESTAMP, walkerCmd->getPostSync().getOperation()); + EXPECT_EQ(firstKernelEventAddress, walkerCmd->getPostSync().getDestinationAddress()); + + walkerCmd = genCmdCast(*secondWalker); + EXPECT_EQ(POSTSYNC_DATA::OPERATION::OPERATION_WRITE_TIMESTAMP, walkerCmd->getPostSync().getOperation()); + EXPECT_EQ(secondKernelEventAddress, walkerCmd->getPostSync().getDestinationAddress()); + + walkerCmd = genCmdCast(*thirdWalker); + EXPECT_EQ(POSTSYNC_DATA::OPERATION::OPERATION_WRITE_TIMESTAMP, walkerCmd->getPostSync().getOperation()); + EXPECT_EQ(thirdKernelEventAddress, walkerCmd->getPostSync().getDestinationAddress()); +} + +HWTEST2_F(AppendMemoryCopyXeHpAndLater, + givenCommandListAndEventWithSignalScopeWhenTimestampProvidedByComputeWalkerPostSyncPassedToMemoryCopyThenAppendProfilingCalledForThreeSeparateKernelsAndL3FlushWaHandled, + isXeHpOrXeHpgCore) { + using GfxFamily = typename NEO::GfxFamilyMapper::GfxFamily; + using COMPUTE_WALKER = typename GfxFamily::COMPUTE_WALKER; + using POSTSYNC_DATA = typename FamilyType::POSTSYNC_DATA; + using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; + using POST_SYNC_OPERATION = typename PIPE_CONTROL::POST_SYNC_OPERATION; + + MockAppendMemoryCopy commandList; + commandList.appendMemoryCopyKernelWithGACallBase = true; + + commandList.initialize(device, NEO::EngineGroupType::RenderCompute, 0u); + void *srcPtr = reinterpret_cast(0x1231); + void *dstPtr = reinterpret_cast(0x200002345); + + ze_event_pool_desc_t eventPoolDesc = {}; + eventPoolDesc.count = 1; + eventPoolDesc.flags = ZE_EVENT_POOL_FLAG_KERNEL_TIMESTAMP; + + ze_event_desc_t eventDesc = {}; + eventDesc.index = 0; + eventDesc.signal = ZE_EVENT_SCOPE_FLAG_HOST; + + ze_result_t result = ZE_RESULT_SUCCESS; + auto eventPool = std::unique_ptr(L0::EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, result)); + EXPECT_EQ(ZE_RESULT_SUCCESS, result); + auto event = std::unique_ptr(L0::Event::create(eventPool.get(), &eventDesc, device)); + + uint64_t firstKernelEventAddress = event->getGpuAddress(device); + uint64_t secondKernelEventAddress = event->getGpuAddress(device) + 2 * event->getSinglePacketSize(); + uint64_t thirdKernelEventAddress = event->getGpuAddress(device) + 4 * event->getSinglePacketSize(); + + commandList.appendMemoryCopy(dstPtr, srcPtr, 0x100002345, event->toHandle(), 0, nullptr); + EXPECT_EQ(3u, commandList.appendMemoryCopyKernelWithGACalled); + EXPECT_EQ(0u, commandList.appendMemoryCopyBlitCalled); + EXPECT_EQ(6u, event->getPacketsInUse()); + EXPECT_EQ(3u, event->getKernelCount()); + + GenCmdList cmdList; + ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer( + cmdList, ptrOffset(commandList.commandContainer.getCommandStream()->getCpuBase(), 0), + commandList.commandContainer.getCommandStream()->getUsed())); + + auto itorWalkers = findAll(cmdList.begin(), cmdList.end()); + ASSERT_EQ(3u, itorWalkers.size()); + auto firstWalker = itorWalkers[0]; + auto secondWalker = itorWalkers[1]; + auto thirdWalker = itorWalkers[2]; + + auto walkerCmd = genCmdCast(*firstWalker); + EXPECT_EQ(POSTSYNC_DATA::OPERATION::OPERATION_WRITE_TIMESTAMP, walkerCmd->getPostSync().getOperation()); + EXPECT_EQ(firstKernelEventAddress, walkerCmd->getPostSync().getDestinationAddress()); + + walkerCmd = genCmdCast(*secondWalker); + EXPECT_EQ(POSTSYNC_DATA::OPERATION::OPERATION_WRITE_TIMESTAMP, walkerCmd->getPostSync().getOperation()); + EXPECT_EQ(secondKernelEventAddress, walkerCmd->getPostSync().getDestinationAddress()); + + walkerCmd = genCmdCast(*thirdWalker); + EXPECT_EQ(POSTSYNC_DATA::OPERATION::OPERATION_WRITE_TIMESTAMP, walkerCmd->getPostSync().getOperation()); + EXPECT_EQ(thirdKernelEventAddress, walkerCmd->getPostSync().getDestinationAddress()); + + auto itorPipeControls = findAll(cmdList.begin(), cmdList.end()); + uint64_t eventGpuAddress = firstKernelEventAddress + event->getSinglePacketSize(); + if (event->isUsingContextEndOffset()) { + eventGpuAddress += event->getContextEndOffset(); + } + uint32_t postSyncPipeControls = 0; + for (auto it : itorPipeControls) { + auto cmd = genCmdCast(*it); + if (cmd->getPostSyncOperation() == POST_SYNC_OPERATION::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA) { + EXPECT_EQ(cmd->getImmediateData(), Event::STATE_SIGNALED); + EXPECT_TRUE(cmd->getCommandStreamerStallEnable()); + EXPECT_FALSE(cmd->getWorkloadPartitionIdOffsetEnable()); + EXPECT_TRUE(cmd->getDcFlushEnable()); + EXPECT_EQ(eventGpuAddress, NEO::UnitTestHelper::getPipeControlPostSyncAddress(*cmd)); + postSyncPipeControls++; + eventGpuAddress += (2 * event->getSinglePacketSize()); + } + } + EXPECT_EQ(3u, postSyncPipeControls); +} + +HWTEST2_F(AppendMemoryCopyXeHpAndLater, + givenMultiTileCommandListAndEventWithSignalScopeWhenTimestampProvidedByComputeWalkerPostSyncPassedToMemoryCopyThenAppendProfilingCalledForThreeSeparateMultiTileKernelsAndL3FlushWaHandled, + isXeHpOrXeHpgCore) { + using GfxFamily = typename NEO::GfxFamilyMapper::GfxFamily; + using COMPUTE_WALKER = typename GfxFamily::COMPUTE_WALKER; + using POSTSYNC_DATA = typename FamilyType::POSTSYNC_DATA; + using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; + using POST_SYNC_OPERATION = typename PIPE_CONTROL::POST_SYNC_OPERATION; + + MockAppendMemoryCopy commandList; + commandList.appendMemoryCopyKernelWithGACallBase = true; + commandList.partitionCount = 2; + + commandList.initialize(device, NEO::EngineGroupType::RenderCompute, 0u); + void *srcPtr = reinterpret_cast(0x1231); + void *dstPtr = reinterpret_cast(0x200002345); + + ze_event_pool_desc_t eventPoolDesc = {}; + eventPoolDesc.count = 1; + eventPoolDesc.flags = ZE_EVENT_POOL_FLAG_KERNEL_TIMESTAMP; + + ze_event_desc_t eventDesc = {}; + eventDesc.index = 0; + eventDesc.signal = ZE_EVENT_SCOPE_FLAG_HOST; + + ze_result_t result = ZE_RESULT_SUCCESS; + auto eventPool = std::unique_ptr(L0::EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, result)); + EXPECT_EQ(ZE_RESULT_SUCCESS, result); + auto event = std::unique_ptr(L0::Event::create(eventPool.get(), &eventDesc, device)); + + uint64_t firstKernelEventAddress = event->getGpuAddress(device); + uint64_t secondKernelEventAddress = event->getGpuAddress(device) + 4 * event->getSinglePacketSize(); + uint64_t thirdKernelEventAddress = event->getGpuAddress(device) + 8 * event->getSinglePacketSize(); + + commandList.appendMemoryCopy(dstPtr, srcPtr, 0x100002345, event->toHandle(), 0, nullptr); + EXPECT_EQ(3u, commandList.appendMemoryCopyKernelWithGACalled); + EXPECT_EQ(0u, commandList.appendMemoryCopyBlitCalled); + EXPECT_EQ(12u, event->getPacketsInUse()); + EXPECT_EQ(3u, event->getKernelCount()); + + GenCmdList cmdList; + ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer( + cmdList, ptrOffset(commandList.commandContainer.getCommandStream()->getCpuBase(), 0), + commandList.commandContainer.getCommandStream()->getUsed())); + + auto itorWalkers = findAll(cmdList.begin(), cmdList.end()); + ASSERT_EQ(3u, itorWalkers.size()); + auto firstWalker = itorWalkers[0]; + auto secondWalker = itorWalkers[1]; + auto thirdWalker = itorWalkers[2]; + + auto walkerCmd = genCmdCast(*firstWalker); + EXPECT_EQ(POSTSYNC_DATA::OPERATION::OPERATION_WRITE_TIMESTAMP, walkerCmd->getPostSync().getOperation()); + EXPECT_EQ(firstKernelEventAddress, walkerCmd->getPostSync().getDestinationAddress()); + + walkerCmd = genCmdCast(*secondWalker); + EXPECT_EQ(POSTSYNC_DATA::OPERATION::OPERATION_WRITE_TIMESTAMP, walkerCmd->getPostSync().getOperation()); + EXPECT_EQ(secondKernelEventAddress, walkerCmd->getPostSync().getDestinationAddress()); + + walkerCmd = genCmdCast(*thirdWalker); + EXPECT_EQ(POSTSYNC_DATA::OPERATION::OPERATION_WRITE_TIMESTAMP, walkerCmd->getPostSync().getOperation()); + EXPECT_EQ(thirdKernelEventAddress, walkerCmd->getPostSync().getDestinationAddress()); + + auto itorPipeControls = findAll(cmdList.begin(), cmdList.end()); + uint64_t eventGpuAddress = firstKernelEventAddress + 2 * event->getSinglePacketSize(); + if (event->isUsingContextEndOffset()) { + eventGpuAddress += event->getContextEndOffset(); + } + uint32_t postSyncPipeControls = 0; + for (auto it : itorPipeControls) { + auto cmd = genCmdCast(*it); + if (cmd->getPostSyncOperation() == POST_SYNC_OPERATION::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA) { + EXPECT_EQ(cmd->getImmediateData(), Event::STATE_SIGNALED); + EXPECT_TRUE(cmd->getCommandStreamerStallEnable()); + EXPECT_TRUE(cmd->getWorkloadPartitionIdOffsetEnable()); + EXPECT_TRUE(cmd->getDcFlushEnable()); + EXPECT_EQ(eventGpuAddress, NEO::UnitTestHelper::getPipeControlPostSyncAddress(*cmd)); + postSyncPipeControls++; + eventGpuAddress += (4 * event->getSinglePacketSize()); + } + } + EXPECT_EQ(3u, postSyncPipeControls); +} + } // namespace ult } // namespace L0 diff --git a/level_zero/core/test/unit_tests/sources/event/test_event.cpp b/level_zero/core/test/unit_tests/sources/event/test_event.cpp index cdf05f9dca..524457496a 100644 --- a/level_zero/core/test/unit_tests/sources/event/test_event.cpp +++ b/level_zero/core/test/unit_tests/sources/event/test_event.cpp @@ -551,15 +551,27 @@ TEST_F(EventCreate, givenEventWhenSignaledAndResetFromTheHostThenCorrectDataAndO auto eventPool = std::unique_ptr(L0::EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, result)); EXPECT_EQ(ZE_RESULT_SUCCESS, result); ASSERT_NE(nullptr, eventPool); - auto event = std::unique_ptr(L0::Event::create(eventPool.get(), &eventDesc, device)); + + auto &l0HwHelper = L0HwHelper::get(device->getHwInfo().platform.eRenderCoreFamily); + auto event = std::unique_ptr(l0HwHelper.createEvent(eventPool.get(), &eventDesc, device)); ASSERT_NE(nullptr, event); - if (L0HwHelper::get(device->getHwInfo().platform.eRenderCoreFamily).multiTileCapablePlatform()) { + if (l0HwHelper.multiTileCapablePlatform()) { EXPECT_TRUE(event->isUsingContextEndOffset()); } else { EXPECT_FALSE(event->isUsingContextEndOffset()); } + uint32_t *eventCompletionMemory = reinterpret_cast(event->getHostAddress()); + if (event->isUsingContextEndOffset()) { + eventCompletionMemory = ptrOffset(eventCompletionMemory, event->getContextEndOffset()); + } + uint32_t maxPacketsCount = EventPacketsCount::maxKernelSplit * NEO::TimestampPacketSizeControl::preferredPacketCount; + for (uint32_t i = 0; i < maxPacketsCount; i++) { + EXPECT_EQ(Event::STATE_INITIAL, *eventCompletionMemory); + eventCompletionMemory = ptrOffset(eventCompletionMemory, event->getSinglePacketSize()); + } + result = event->queryStatus(); EXPECT_EQ(ZE_RESULT_NOT_READY, result); @@ -1064,7 +1076,7 @@ TEST_F(TimestampEventCreate, givenEventTimestampsCreatedWhenResetIsInvokeThenCor EXPECT_EQ(1u, event->kernelEventCompletionData[j].getPacketsUsed()); } - EXPECT_EQ(1u, event->kernelCount); + EXPECT_EQ(1u, event->getKernelCount()); } TEST_F(TimestampEventCreate, givenSingleTimestampEventThenAllocationSizeCreatedForAllTimestamps) { @@ -1093,13 +1105,13 @@ TEST_F(TimestampEventCreate, givenEventTimestampWhenPacketCountIsSetThenCorrectO gpuAddr += (4u * event->getSinglePacketSize()); - event->kernelCount = 2; + event->increaseKernelCount(); event->setPacketsInUse(2u); EXPECT_EQ(6u, event->getPacketsInUse()); EXPECT_EQ(gpuAddr, event->getPacketAddress(device)); gpuAddr += (2u * event->getSinglePacketSize()); - event->kernelCount = 3; + event->increaseKernelCount(); EXPECT_EQ(gpuAddr, event->getPacketAddress(device)); EXPECT_EQ(7u, event->getPacketsInUse()); } @@ -1122,7 +1134,7 @@ TEST_F(TimestampEventCreate, givenEventWhenSignaledAndResetFromTheHostThenCorrec } EXPECT_EQ(1u, event->kernelEventCompletionData[j].getPacketsUsed()); } - EXPECT_EQ(1u, event->kernelCount); + EXPECT_EQ(1u, event->getKernelCount()); } TEST_F(TimestampEventCreate, givenpCountZeroCallingQueryTimestampExpThenpCountSetProperly) { diff --git a/opencl/test/unit_test/helpers/hw_helper_tests.cpp b/opencl/test/unit_test/helpers/hw_helper_tests.cpp index 2e6fdb6822..c9ad39233b 100644 --- a/opencl/test/unit_test/helpers/hw_helper_tests.cpp +++ b/opencl/test/unit_test/helpers/hw_helper_tests.cpp @@ -385,6 +385,42 @@ HWTEST_F(PipeControlHelperTests, WhenIsDcFlushAllowedIsCalledThenCorrectResultIs EXPECT_EQ(hwInfoConfig.isDcFlushAllowed(), MemorySynchronizationCommands::getDcFlushEnable(true, *defaultHwInfo)); } +HWTEST_F(PipeControlHelperTests, WhenPipeControlPostSyncTimestampUsedThenCorrectPostSyncUsed) { + using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; + std::unique_ptr buffer(new uint8_t[128]); + + LinearStream stream(buffer.get(), 128); + uint64_t address = 0x1234567887654320; + uint64_t immediateData = 0x0; + + PipeControlArgs args; + MemorySynchronizationCommands::addPipeControlWithPostSync( + stream, PIPE_CONTROL::POST_SYNC_OPERATION_WRITE_TIMESTAMP, address, immediateData, args); + auto pipeControl = genCmdCast(stream.getCpuBase()); + ASSERT_NE(nullptr, pipeControl); + EXPECT_EQ(address, NEO::UnitTestHelper::getPipeControlPostSyncAddress(*pipeControl)); + EXPECT_EQ(immediateData, pipeControl->getImmediateData()); + EXPECT_EQ(PIPE_CONTROL::POST_SYNC_OPERATION_WRITE_TIMESTAMP, pipeControl->getPostSyncOperation()); +} + +HWTEST_F(PipeControlHelperTests, WhenPipeControlPostSyncWriteImmediateDataUsedThenCorrectPostSyncUsed) { + using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; + std::unique_ptr buffer(new uint8_t[128]); + + LinearStream stream(buffer.get(), 128); + uint64_t address = 0x1234567887654320; + uint64_t immediateData = 0x1234; + + PipeControlArgs args; + MemorySynchronizationCommands::addPipeControlWithPostSync( + stream, PIPE_CONTROL::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA, address, immediateData, args); + auto pipeControl = genCmdCast(stream.getCpuBase()); + ASSERT_NE(nullptr, pipeControl); + EXPECT_EQ(address, NEO::UnitTestHelper::getPipeControlPostSyncAddress(*pipeControl)); + EXPECT_EQ(immediateData, pipeControl->getImmediateData()); + EXPECT_EQ(PIPE_CONTROL::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA, pipeControl->getPostSyncOperation()); +} + TEST(HwInfoTest, givenHwInfoWhenChosenEngineTypeQueriedThenDefaultIsReturned) { HardwareInfo hwInfo = *defaultHwInfo; hwInfo.capabilityTable.defaultEngineType = aub_stream::ENGINE_RCS; diff --git a/shared/test/common/helpers/unit_test_helper.h b/shared/test/common/helpers/unit_test_helper.h index 37cc68e2b6..79a4e349d6 100644 --- a/shared/test/common/helpers/unit_test_helper.h +++ b/shared/test/common/helpers/unit_test_helper.h @@ -75,6 +75,10 @@ struct UnitTestHelper { static void adjustKernelDescriptorForImplicitArgs(KernelDescriptor &kernelDescriptor); static std::vector getProgrammedLargeGrfValues(CommandStreamReceiver &csr, LinearStream &linearStream); + + static bool getWorkloadPartitionForStoreRegisterMemCmd(typename GfxFamily::MI_STORE_REGISTER_MEM &storeRegisterMem); + + static bool timestampRegisterHighAddress(); }; } // namespace NEO diff --git a/shared/test/common/helpers/unit_test_helper.inl b/shared/test/common/helpers/unit_test_helper.inl index c9239ccd91..18f66e94a4 100644 --- a/shared/test/common/helpers/unit_test_helper.inl +++ b/shared/test/common/helpers/unit_test_helper.inl @@ -70,4 +70,9 @@ inline uint64_t UnitTestHelper::getPipeControlPostSyncAddress(const t return (gpuAddressHigh << 32) | gpuAddress; } +template +bool UnitTestHelper::timestampRegisterHighAddress() { + return false; +} + } // namespace NEO diff --git a/shared/test/common/helpers/unit_test_helper_bdw_and_later.inl b/shared/test/common/helpers/unit_test_helper_bdw_and_later.inl index 6b42458c3a..9c9da21b68 100644 --- a/shared/test/common/helpers/unit_test_helper_bdw_and_later.inl +++ b/shared/test/common/helpers/unit_test_helper_bdw_and_later.inl @@ -72,4 +72,9 @@ std::vector UnitTestHelper::getProgrammedLargeGrfValues(Command return {}; } +template +inline bool UnitTestHelper::getWorkloadPartitionForStoreRegisterMemCmd(typename GfxFamily::MI_STORE_REGISTER_MEM &storeRegisterMem) { + return false; +} + } // namespace NEO diff --git a/shared/test/common/helpers/unit_test_helper_xehp_and_later.inl b/shared/test/common/helpers/unit_test_helper_xehp_and_later.inl index dff9d76294..cfb5378363 100644 --- a/shared/test/common/helpers/unit_test_helper_xehp_and_later.inl +++ b/shared/test/common/helpers/unit_test_helper_xehp_and_later.inl @@ -100,4 +100,9 @@ std::vector UnitTestHelper::getProgrammedLargeGrfValues(Command return largeGrfValues; } +template +inline bool UnitTestHelper::getWorkloadPartitionForStoreRegisterMemCmd(typename GfxFamily::MI_STORE_REGISTER_MEM &storeRegisterMem) { + return storeRegisterMem.getWorkloadPartitionIdOffsetEnable(); +} + } // namespace NEO diff --git a/shared/test/common/test_macros/header/common_matchers.h b/shared/test/common/test_macros/header/common_matchers.h index d54259b78d..6a32e9b40d 100644 --- a/shared/test/common/test_macros/header/common_matchers.h +++ b/shared/test/common/test_macros/header/common_matchers.h @@ -34,6 +34,7 @@ using IsAtMostXeHpgCore = IsAtMostGfxCore; using IsAtLeastXeHpcCore = IsAtLeastGfxCore; using IsAtMostXeHpcCore = IsAtMostGfxCore; +using isXeHpOrXeHpgCore = IsAnyGfxCores; using isXeHpOrXeHpcCore = IsAnyGfxCores; using isXeHpcOrXeHpgCore = IsAnyGfxCores;