diff --git a/level_zero/core/source/cmdlist/cmdlist_hw.h b/level_zero/core/source/cmdlist/cmdlist_hw.h index bed29af6c6..e3035dbf6f 100644 --- a/level_zero/core/source/cmdlist/cmdlist_hw.h +++ b/level_zero/core/source/cmdlist/cmdlist_hw.h @@ -182,7 +182,7 @@ struct CommandListCoreFamily : public CommandListImp { uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents, bool relaxedOrderingDispatch) override; void appendMultiPartitionPrologue(uint32_t partitionDataSize) override; void appendMultiPartitionEpilogue() override; - void appendEventForProfilingAllWalkers(Event *event, void **syncCmdBuffer, bool beforeWalker, bool singlePacketEvent, bool skipAddingEventToResidency); + void appendEventForProfilingAllWalkers(Event *event, void **syncCmdBuffer, CommandToPatchContainer *outTimeStampSyncCmds, bool beforeWalker, bool singlePacketEvent, bool skipAddingEventToResidency); ze_result_t addEventsToCmdList(uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents, bool relaxedOrderingAllowed, bool trackDependencies, bool waitForImplicitInOrderDependency); ze_result_t reserveSpace(size_t size, void **ptr) override; @@ -285,11 +285,11 @@ struct CommandListCoreFamily : public CommandListImp { ze_result_t setGlobalWorkSizeIndirect(NEO::CrossThreadDataOffset offsets[3], uint64_t crossThreadAddress, uint32_t lws[3]); ze_result_t programSyncBuffer(Kernel &kernel, NEO::Device &device, const ze_group_count_t &threadGroupDimensions); - void appendWriteKernelTimestamp(Event *event, bool beforeWalker, bool maskLsb, bool workloadPartition); - void adjustWriteKernelTimestamp(uint64_t globalAddress, uint64_t contextAddress, bool maskLsb, uint32_t mask, bool workloadPartition); - void appendEventForProfiling(Event *event, bool beforeWalker, bool skipBarrierForEndProfiling); + void appendWriteKernelTimestamp(Event *event, CommandToPatchContainer *outTimeStampSyncCmds, bool beforeWalker, bool maskLsb, bool workloadPartition); + void adjustWriteKernelTimestamp(uint64_t globalAddress, uint64_t contextAddress, uint64_t baseAddress, CommandToPatchContainer *outTimeStampSyncCmds, bool maskLsb, uint32_t mask, bool workloadPartition); + void appendEventForProfiling(Event *event, CommandToPatchContainer *outTimeStampSyncCmds, bool beforeWalker, bool skipBarrierForEndProfiling, bool skipAddingEventToResidency); void appendEventForProfilingCopyCommand(Event *event, bool beforeWalker); - void appendSignalEventPostWalker(Event *event, void **syncCmdBuffer, bool skipBarrierForEndProfiling, bool skipAddingEventToResidency); + void appendSignalEventPostWalker(Event *event, void **syncCmdBuffer, CommandToPatchContainer *outTimeStampSyncCmds, bool skipBarrierForEndProfiling, bool skipAddingEventToResidency); virtual void programStateBaseAddress(NEO::CommandContainer &container, bool useSbaProperties); void appendComputeBarrierCommand(); NEO::PipeControlArgs createBarrierFlags(); diff --git a/level_zero/core/source/cmdlist/cmdlist_hw.inl b/level_zero/core/source/cmdlist/cmdlist_hw.inl index f716346d33..3e85e49aa3 100644 --- a/level_zero/core/source/cmdlist/cmdlist_hw.inl +++ b/level_zero/core/source/cmdlist/cmdlist_hw.inl @@ -463,12 +463,12 @@ ze_result_t CommandListCoreFamily::appendLaunchKernelIndirect(ze_ return ZE_RESULT_ERROR_INVALID_ARGUMENT; } - appendEventForProfiling(event, true, false); + appendEventForProfiling(event, nullptr, true, false, false); launchParams.isIndirect = true; ret = appendLaunchKernelWithParams(Kernel::fromHandle(kernelHandle), pDispatchArgumentsBuffer, nullptr, launchParams); addToMappedEventList(event); - appendSignalEventPostWalker(event, nullptr, false, false); + appendSignalEventPostWalker(event, nullptr, nullptr, false, false); handleInOrderDependencyCounter(event, isInOrderNonWalkerSignalingRequired(event)); @@ -503,7 +503,7 @@ ze_result_t CommandListCoreFamily::appendLaunchMultipleKernelsInd return ZE_RESULT_ERROR_INVALID_ARGUMENT; } - appendEventForProfiling(event, true, false); + appendEventForProfiling(event, nullptr, true, false, false); auto allocData = device->getDriverHandle()->getSvmAllocsManager()->getSVMAlloc(static_cast(pNumLaunchArguments)); auto alloc = allocData->gpuAllocations.getGraphicsAllocation(device->getRootDeviceIndex()); commandContainer.addToResidencyContainer(alloc); @@ -519,7 +519,7 @@ ze_result_t CommandListCoreFamily::appendLaunchMultipleKernelsInd } } addToMappedEventList(event); - appendSignalEventPostWalker(event, nullptr, false, false); + appendSignalEventPostWalker(event, nullptr, nullptr, false, false); return ret; } @@ -603,9 +603,9 @@ ze_result_t CommandListCoreFamily::appendMemoryRangesBarrier(uint return ZE_RESULT_ERROR_INVALID_ARGUMENT; } - appendEventForProfiling(signalEvent, true, false); + appendEventForProfiling(signalEvent, nullptr, true, false, false); applyMemoryRangesBarrier(numRanges, pRangeSizes, pRanges); - appendSignalEventPostWalker(signalEvent, nullptr, false, false); + appendSignalEventPostWalker(signalEvent, nullptr, nullptr, false, false); addToMappedEventList(signalEvent); if (this->isInOrderExecutionEnabled()) { @@ -1265,7 +1265,7 @@ ze_result_t CommandListCoreFamily::appendMemoryCopyBlitRegion(Ali return ZE_RESULT_ERROR_INVALID_ARGUMENT; } - appendEventForProfiling(signalEvent, true, false); + appendEventForProfiling(signalEvent, nullptr, true, false, false); auto &rootDeviceEnvironment = device->getNEODevice()->getRootDeviceEnvironmentRef(); bool copyRegionPreferred = NEO::BlitCommandsHelper::isCopyRegionPreferred(copySizeModified, rootDeviceEnvironment, blitProperties.isSystemMemoryPoolUsed); if (copyRegionPreferred) { @@ -1275,7 +1275,7 @@ ze_result_t CommandListCoreFamily::appendMemoryCopyBlitRegion(Ali } makeResidentDummyAllocation(); - appendSignalEventPostWalker(signalEvent, nullptr, false, false); + appendSignalEventPostWalker(signalEvent, nullptr, nullptr, false, false); return ZE_RESULT_SUCCESS; } @@ -1300,11 +1300,11 @@ ze_result_t CommandListCoreFamily::appendCopyImageBlit(NEO::Graph commandContainer.addToResidencyContainer(src); commandContainer.addToResidencyContainer(clearColorAllocation); - appendEventForProfiling(signalEvent, true, false); + appendEventForProfiling(signalEvent, nullptr, true, false, false); NEO::BlitCommandsHelper::dispatchBlitCommandsForImageRegion(blitProperties, *commandContainer.getCommandStream(), *dummyBlitWa.rootDeviceEnvironment); makeResidentDummyAllocation(); - appendSignalEventPostWalker(signalEvent, nullptr, false, false); + appendSignalEventPostWalker(signalEvent, nullptr, nullptr, false, false); return ZE_RESULT_SUCCESS; } @@ -1462,7 +1462,7 @@ ze_result_t CommandListCoreFamily::appendMemoryCopy(void *dstptr, launchParams.pipeControlSignalling = (signalEvent && singlePipeControlPacket) || dstAllocationStruct.needsFlush; - appendEventForProfilingAllWalkers(signalEvent, nullptr, true, singlePipeControlPacket, false); + appendEventForProfilingAllWalkers(signalEvent, nullptr, nullptr, true, singlePipeControlPacket, false); if (isCopyOnly()) { ret = appendMemoryCopyBlit(dstAllocationStruct.alignedAllocationPtr, @@ -1527,7 +1527,7 @@ ze_result_t CommandListCoreFamily::appendMemoryCopy(void *dstptr, } } - appendEventForProfilingAllWalkers(signalEvent, nullptr, false, singlePipeControlPacket, false); + appendEventForProfilingAllWalkers(signalEvent, nullptr, nullptr, false, singlePipeControlPacket, false); addFlushRequiredCommand(dstAllocationStruct.needsFlush, signalEvent); addToMappedEventList(signalEvent); @@ -1921,7 +1921,7 @@ ze_result_t CommandListCoreFamily::appendMemoryFill(void *ptr, launchParams.isKernelSplitOperation = (fillArguments.leftRemainingBytes > 0 || fillArguments.rightRemainingBytes > 0); bool singlePipeControlPacket = eventSignalPipeControl(launchParams.isKernelSplitOperation, dcFlush); - appendEventForProfilingAllWalkers(signalEvent, nullptr, true, singlePipeControlPacket, false); + appendEventForProfilingAllWalkers(signalEvent, nullptr, nullptr, true, singlePipeControlPacket, false); if (fillArguments.leftRemainingBytes > 0) { launchParams.numKernelsInSplitLaunch++; @@ -2066,7 +2066,7 @@ ze_result_t CommandListCoreFamily::appendMemoryFill(void *ptr, } addToMappedEventList(signalEvent); - appendEventForProfilingAllWalkers(signalEvent, nullptr, false, singlePipeControlPacket, false); + appendEventForProfilingAllWalkers(signalEvent, nullptr, nullptr, false, singlePipeControlPacket, false); addFlushRequiredCommand(hostPointerNeedsFlush, signalEvent); bool nonWalkerInOrderCmdChaining = false; @@ -2115,7 +2115,7 @@ ze_result_t CommandListCoreFamily::appendBlitFill(void *ptr, } auto neoDevice = device->getNEODevice(); - appendEventForProfiling(signalEvent, true, false); + appendEventForProfiling(signalEvent, nullptr, true, false, false); NEO::GraphicsAllocation *gpuAllocation = device->getDriverHandle()->getDriverSystemMemoryAllocation(ptr, size, neoDevice->getRootDeviceIndex(), @@ -2143,7 +2143,7 @@ ze_result_t CommandListCoreFamily::appendBlitFill(void *ptr, neoDevice->getRootDeviceEnvironmentRef()); makeResidentDummyAllocation(); - appendSignalEventPostWalker(signalEvent, nullptr, false, false); + appendSignalEventPostWalker(signalEvent, nullptr, nullptr, false, false); if (isInOrderExecutionEnabled()) { appendSignalInOrderDependencyCounter(signalEvent); @@ -2154,12 +2154,12 @@ ze_result_t CommandListCoreFamily::appendBlitFill(void *ptr, } template -void CommandListCoreFamily::appendSignalEventPostWalker(Event *event, void **syncCmdBuffer, bool skipBarrierForEndProfiling, bool skipAddingEventToResidency) { +void CommandListCoreFamily::appendSignalEventPostWalker(Event *event, void **syncCmdBuffer, CommandToPatchContainer *outTimeStampSyncCmds, bool skipBarrierForEndProfiling, bool skipAddingEventToResidency) { if (event == nullptr || !event->getPoolAllocation(this->device)) { return; } if (event->isEventTimestampFlagSet()) { - appendEventForProfiling(event, false, skipBarrierForEndProfiling); + appendEventForProfiling(event, outTimeStampSyncCmds, false, skipBarrierForEndProfiling, skipAddingEventToResidency); } else { event->resetKernelCountAndPacketUsedCount(); if (!skipAddingEventToResidency) { @@ -2186,7 +2186,7 @@ void CommandListCoreFamily::appendEventForProfilingCopyCommand(Ev makeResidentDummyAllocation(); dispatchEventPostSyncOperation(event, nullptr, Event::STATE_SIGNALED, true, false, false, false); } - appendWriteKernelTimestamp(event, beforeWalker, false, false); + appendWriteKernelTimestamp(event, nullptr, beforeWalker, false, false); } template @@ -2600,29 +2600,54 @@ ze_result_t CommandListCoreFamily::programSyncBuffer(Kernel &kern } template -void CommandListCoreFamily::appendWriteKernelTimestamp(Event *event, bool beforeWalker, bool maskLsb, bool workloadPartition) { +void CommandListCoreFamily::appendWriteKernelTimestamp(Event *event, CommandToPatchContainer *outTimeStampSyncCmds, bool beforeWalker, bool maskLsb, bool workloadPartition) { constexpr uint32_t mask = 0xfffffffe; auto baseAddr = event->getPacketAddress(this->device); - auto contextOffset = beforeWalker ? event->getContextStartOffset() : event->getContextEndOffset(); + auto globalOffset = beforeWalker ? event->getGlobalStartOffset() : event->getGlobalEndOffset(); + auto contextOffset = beforeWalker ? event->getContextStartOffset() : event->getContextEndOffset(); + + void **globalPostSyncCmdBuffer = nullptr; + void **contextPostSyncCmdBuffer = nullptr; + + void *globalPostSyncCmd = nullptr; + void *contextPostSyncCmd = nullptr; + + if (outTimeStampSyncCmds != nullptr) { + globalPostSyncCmdBuffer = &globalPostSyncCmd; + contextPostSyncCmdBuffer = &contextPostSyncCmd; + } uint64_t globalAddress = ptrOffset(baseAddr, globalOffset); uint64_t contextAddress = ptrOffset(baseAddr, contextOffset); if (maskLsb) { - NEO::EncodeMathMMIO::encodeBitwiseAndVal(commandContainer, RegisterOffsets::globalTimestampLdw, mask, globalAddress, workloadPartition, nullptr); - NEO::EncodeMathMMIO::encodeBitwiseAndVal(commandContainer, RegisterOffsets::gpThreadTimeRegAddressOffsetLow, mask, contextAddress, workloadPartition, nullptr); + NEO::EncodeMathMMIO::encodeBitwiseAndVal(commandContainer, RegisterOffsets::globalTimestampLdw, mask, globalAddress, workloadPartition, globalPostSyncCmdBuffer); + NEO::EncodeMathMMIO::encodeBitwiseAndVal(commandContainer, RegisterOffsets::gpThreadTimeRegAddressOffsetLow, mask, contextAddress, workloadPartition, contextPostSyncCmdBuffer); } else { - NEO::EncodeStoreMMIO::encode(*commandContainer.getCommandStream(), RegisterOffsets::globalTimestampLdw, globalAddress, workloadPartition, nullptr); - NEO::EncodeStoreMMIO::encode(*commandContainer.getCommandStream(), RegisterOffsets::gpThreadTimeRegAddressOffsetLow, contextAddress, workloadPartition, nullptr); + NEO::EncodeStoreMMIO::encode(*commandContainer.getCommandStream(), RegisterOffsets::globalTimestampLdw, globalAddress, workloadPartition, globalPostSyncCmdBuffer); + NEO::EncodeStoreMMIO::encode(*commandContainer.getCommandStream(), RegisterOffsets::gpThreadTimeRegAddressOffsetLow, contextAddress, workloadPartition, contextPostSyncCmdBuffer); } - adjustWriteKernelTimestamp(globalAddress, contextAddress, maskLsb, mask, workloadPartition); + if (outTimeStampSyncCmds != nullptr) { + CommandToPatch ctxCmd; + ctxCmd.type = CommandToPatch::TimestampEventPostSyncStoreRegMem; + + ctxCmd.offset = globalOffset; + ctxCmd.pDestination = globalPostSyncCmd; + outTimeStampSyncCmds->push_back(ctxCmd); + + ctxCmd.offset = contextOffset; + ctxCmd.pDestination = contextPostSyncCmd; + outTimeStampSyncCmds->push_back(ctxCmd); + } + + adjustWriteKernelTimestamp(globalAddress, contextAddress, baseAddr, outTimeStampSyncCmds, maskLsb, mask, workloadPartition); } template -void CommandListCoreFamily::appendEventForProfiling(Event *event, bool beforeWalker, bool skipBarrierForEndProfiling) { +void CommandListCoreFamily::appendEventForProfiling(Event *event, CommandToPatchContainer *outTimeStampSyncCmds, bool beforeWalker, bool skipBarrierForEndProfiling, bool skipAddingEventToResidency) { if (!event) { return; } @@ -2634,7 +2659,9 @@ void CommandListCoreFamily::appendEventForProfiling(Event *event, return; } - commandContainer.addToResidencyContainer(event->getPoolAllocation(this->device)); + if (!skipAddingEventToResidency) { + commandContainer.addToResidencyContainer(event->getPoolAllocation(this->device)); + } bool workloadPartition = isTimestampEventForMultiTile(event); appendDispatchOffsetRegister(workloadPartition, true); @@ -2642,7 +2669,7 @@ void CommandListCoreFamily::appendEventForProfiling(Event *event, if (beforeWalker) { event->resetKernelCountAndPacketUsedCount(); bool workloadPartition = setupTimestampEventForMultiTile(event); - appendWriteKernelTimestamp(event, beforeWalker, true, workloadPartition); + appendWriteKernelTimestamp(event, outTimeStampSyncCmds, beforeWalker, true, workloadPartition); } else { dispatchEventPostSyncOperation(event, nullptr, Event::STATE_SIGNALED, true, false, false, true); @@ -2658,7 +2685,7 @@ void CommandListCoreFamily::appendEventForProfiling(Event *event, uint64_t baseAddr = event->getGpuAddress(this->device); NEO::MemorySynchronizationCommands::addAdditionalSynchronization(*commandContainer.getCommandStream(), baseAddr, false, rootDeviceEnvironment); - appendWriteKernelTimestamp(event, beforeWalker, true, workloadPartition); + appendWriteKernelTimestamp(event, outTimeStampSyncCmds, beforeWalker, true, workloadPartition); } appendDispatchOffsetRegister(workloadPartition, false); @@ -2684,7 +2711,7 @@ ze_result_t CommandListCoreFamily::appendWriteGlobalTimestamp( return ZE_RESULT_ERROR_INVALID_ARGUMENT; } - appendEventForProfiling(signalEvent, true, false); + appendEventForProfiling(signalEvent, nullptr, true, false, false); auto allocationStruct = getAlignedAllocationData(this->device, dstptr, sizeof(uint64_t), false); if (allocationStruct.alloc == nullptr) { @@ -2715,7 +2742,7 @@ ze_result_t CommandListCoreFamily::appendWriteGlobalTimestamp( args); } - appendSignalEventPostWalker(signalEvent, nullptr, false, false); + appendSignalEventPostWalker(signalEvent, nullptr, nullptr, false, false); if (this->isInOrderExecutionEnabled()) { appendSignalInOrderDependencyCounter(signalEvent); @@ -3027,7 +3054,7 @@ void CommandListCoreFamily::updateStreamPropertiesForRegularComma auto frontEndStateAddress = NEO::PreambleHelper::getSpaceForVfeState(commandContainer.getCommandStream(), device->getHwInfo(), engineGroupType); auto frontEndStateCmd = new FrontEndStateCommand; NEO::PreambleHelper::programVfeState(frontEndStateCmd, rootDeviceEnvironment, 0, 0, device->getMaxNumHwThreads(), finalStreamState); - commandsToPatch.push_back({frontEndStateAddress, frontEndStateCmd, CommandToPatch::FrontEndState}); + commandsToPatch.push_back({frontEndStateAddress, frontEndStateCmd, 0, CommandToPatch::FrontEndState}); } if (this->frontEndStateTracking && !this->dispatchCmdListBatchBufferAsPrimary) { auto &stream = *commandContainer.getCommandStream(); @@ -3236,7 +3263,7 @@ ze_result_t CommandListCoreFamily::appendBarrier(ze_event_handle_ return ZE_RESULT_ERROR_INVALID_ARGUMENT; } - appendEventForProfiling(signalEvent, true, false); + appendEventForProfiling(signalEvent, nullptr, true, false, false); if (!this->isInOrderExecutionEnabled()) { if (isCopyOnly()) { @@ -3259,7 +3286,7 @@ ze_result_t CommandListCoreFamily::appendBarrier(ze_event_handle_ } addToMappedEventList(signalEvent); - appendSignalEventPostWalker(signalEvent, nullptr, this->isInOrderExecutionEnabled(), false); + appendSignalEventPostWalker(signalEvent, nullptr, nullptr, this->isInOrderExecutionEnabled(), false); if (isInOrderExecutionEnabled()) { appendSignalInOrderDependencyCounter(signalEvent); @@ -3392,7 +3419,7 @@ ze_result_t CommandListCoreFamily::appendWaitOnMemory(void *desc, return ZE_RESULT_ERROR_INVALID_ARGUMENT; } - appendEventForProfiling(signalEvent, true, false); + appendEventForProfiling(signalEvent, nullptr, true, false, false); commandContainer.addToResidencyContainer(srcAllocationStruct.alloc); uint64_t gpuAddress = static_cast(srcAllocationStruct.alignedAllocationPtr); @@ -3424,7 +3451,7 @@ ze_result_t CommandListCoreFamily::appendWaitOnMemory(void *desc, NEO::MemorySynchronizationCommands::addAdditionalSynchronization(*commandContainer.getCommandStream(), gpuAddress, true, rootDeviceEnvironment); } - appendSignalEventPostWalker(signalEvent, nullptr, false, false); + appendSignalEventPostWalker(signalEvent, nullptr, nullptr, false, false); if (this->isInOrderExecutionEnabled()) { appendSignalInOrderDependencyCounter(signalEvent); diff --git a/level_zero/core/source/cmdlist/cmdlist_hw_dg2_and_pvc.inl b/level_zero/core/source/cmdlist/cmdlist_hw_dg2_and_pvc.inl index ae32db1a49..aa42db3df5 100644 --- a/level_zero/core/source/cmdlist/cmdlist_hw_dg2_and_pvc.inl +++ b/level_zero/core/source/cmdlist/cmdlist_hw_dg2_and_pvc.inl @@ -1,5 +1,5 @@ /* - * Copyright (C) 2023 Intel Corporation + * Copyright (C) 2023-2024 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -16,6 +16,6 @@ inline NEO::PreemptionMode CommandListCoreFamily::obtainKernelPre } template -void CommandListCoreFamily::adjustWriteKernelTimestamp(uint64_t globalAddress, uint64_t contextAddress, bool maskLsb, uint32_t mask, bool workloadPartition) {} +void CommandListCoreFamily::adjustWriteKernelTimestamp(uint64_t globalAddress, uint64_t contextAddress, uint64_t baseAddress, CommandToPatchContainer *outTimeStampSyncCmds, bool maskLsb, uint32_t mask, bool workloadPartition) {} } // namespace L0 diff --git a/level_zero/core/source/cmdlist/cmdlist_hw_skl_to_tgllp.inl b/level_zero/core/source/cmdlist/cmdlist_hw_skl_to_tgllp.inl index 6c589d4075..7fc2d0d8cd 100644 --- a/level_zero/core/source/cmdlist/cmdlist_hw_skl_to_tgllp.inl +++ b/level_zero/core/source/cmdlist/cmdlist_hw_skl_to_tgllp.inl @@ -38,7 +38,7 @@ size_t CommandListCoreFamily::getReserveSshSize() { } template -void CommandListCoreFamily::adjustWriteKernelTimestamp(uint64_t globalAddress, uint64_t contextAddress, bool maskLsb, uint32_t mask, bool workloadPartition) {} +void CommandListCoreFamily::adjustWriteKernelTimestamp(uint64_t globalAddress, uint64_t contextAddress, uint64_t baseAddress, CommandToPatchContainer *outTimeStampSyncCmds, bool maskLsb, uint32_t mask, bool workloadPartition) {} template bool CommandListCoreFamily::isInOrderNonWalkerSignalingRequired(const Event *event) const { @@ -108,7 +108,7 @@ ze_result_t CommandListCoreFamily::appendLaunchKernelWithParams(K dsh = dshReserveArgs.indirectHeapReservation; } - appendEventForProfiling(event, true, false); + appendEventForProfiling(event, nullptr, true, false, false); auto perThreadScratchSize = std::max(this->getCommandListPerThreadScratchSize(0u), kernel->getImmutableData()->getDescriptor().kernelAttributes.perThreadScratchSize[0]); this->setCommandListPerThreadScratchSize(0u, perThreadScratchSize); @@ -247,7 +247,7 @@ ze_result_t CommandListCoreFamily::appendLaunchKernelWithParams(K *reinterpret_cast(surfaceStateSpace) = surfaceState; } - appendSignalEventPostWalker(event, nullptr, false, false); + appendSignalEventPostWalker(event, nullptr, nullptr, false, false); commandContainer.addToResidencyContainer(kernelImmutableData->getIsaGraphicsAllocation()); auto &residencyContainer = kernel->getResidencyContainer(); @@ -264,16 +264,16 @@ ze_result_t CommandListCoreFamily::appendLaunchKernelWithParams(K } if (NEO::PauseOnGpuProperties::pauseModeAllowed(NEO::debugManager.flags.PauseOnEnqueue.get(), neoDevice->debugExecutionCounter.load(), NEO::PauseOnGpuProperties::PauseMode::BeforeWorkload)) { - commandsToPatch.push_back({0x0, additionalCommands.front(), CommandToPatch::PauseOnEnqueuePipeControlStart}); + commandsToPatch.push_back({0x0, additionalCommands.front(), 0, CommandToPatch::PauseOnEnqueuePipeControlStart}); additionalCommands.pop_front(); - commandsToPatch.push_back({0x0, additionalCommands.front(), CommandToPatch::PauseOnEnqueueSemaphoreStart}); + commandsToPatch.push_back({0x0, additionalCommands.front(), 0, CommandToPatch::PauseOnEnqueueSemaphoreStart}); additionalCommands.pop_front(); } if (NEO::PauseOnGpuProperties::pauseModeAllowed(NEO::debugManager.flags.PauseOnEnqueue.get(), neoDevice->debugExecutionCounter.load(), NEO::PauseOnGpuProperties::PauseMode::AfterWorkload)) { - commandsToPatch.push_back({0x0, additionalCommands.front(), CommandToPatch::PauseOnEnqueuePipeControlEnd}); + commandsToPatch.push_back({0x0, additionalCommands.front(), 0, CommandToPatch::PauseOnEnqueuePipeControlEnd}); additionalCommands.pop_front(); - commandsToPatch.push_back({0x0, additionalCommands.front(), CommandToPatch::PauseOnEnqueueSemaphoreEnd}); + commandsToPatch.push_back({0x0, additionalCommands.front(), 0, CommandToPatch::PauseOnEnqueueSemaphoreEnd}); additionalCommands.pop_front(); } @@ -329,11 +329,11 @@ ze_result_t CommandListCoreFamily::appendLaunchKernelSplit(Kernel } template -void CommandListCoreFamily::appendEventForProfilingAllWalkers(Event *event, void **syncCmdBuffer, bool beforeWalker, bool singlePacketEvent, bool skipAddingEventToResidency) { +void CommandListCoreFamily::appendEventForProfilingAllWalkers(Event *event, void **syncCmdBuffer, CommandToPatchContainer *outTimeStampSyncCmds, bool beforeWalker, bool singlePacketEvent, bool skipAddingEventToResidency) { if (beforeWalker) { - appendEventForProfiling(event, true, false); + appendEventForProfiling(event, outTimeStampSyncCmds, true, false, skipAddingEventToResidency); } else { - appendSignalEventPostWalker(event, syncCmdBuffer, false, skipAddingEventToResidency); + appendSignalEventPostWalker(event, syncCmdBuffer, outTimeStampSyncCmds, false, skipAddingEventToResidency); } } diff --git a/level_zero/core/source/cmdlist/cmdlist_hw_xehp_and_later.inl b/level_zero/core/source/cmdlist/cmdlist_hw_xehp_and_later.inl index e94403fc16..86aef2a5c0 100644 --- a/level_zero/core/source/cmdlist/cmdlist_hw_xehp_and_later.inl +++ b/level_zero/core/source/cmdlist/cmdlist_hw_xehp_and_later.inl @@ -286,7 +286,7 @@ ze_result_t CommandListCoreFamily::appendLaunchKernelWithParams(K std::list additionalCommands; if (compactEvent) { - appendEventForProfilingAllWalkers(compactEvent, nullptr, true, true, false); + appendEventForProfilingAllWalkers(compactEvent, nullptr, launchParams.outListCommands, true, true, launchParams.omitAddingEventResidency); } bool inOrderExecSignalRequired = (this->isInOrderExecutionEnabled() && !launchParams.isKernelSplitOperation && !launchParams.pipeControlSignalling); @@ -353,7 +353,7 @@ ze_result_t CommandListCoreFamily::appendLaunchKernelWithParams(K launchParams.outSyncCommand->type = CommandToPatch::SignalEventPostSyncPipeControl; syncCmdBuffer = &launchParams.outSyncCommand->pDestination; } - appendEventForProfilingAllWalkers(compactEvent, syncCmdBuffer, false, true, launchParams.omitAddingEventResidency); + appendEventForProfilingAllWalkers(compactEvent, syncCmdBuffer, launchParams.outListCommands, false, true, launchParams.omitAddingEventResidency); } else if (event) { event->setPacketsInUse(partitionCount); if (l3FlushEnable) { @@ -426,16 +426,16 @@ ze_result_t CommandListCoreFamily::appendLaunchKernelWithParams(K } if (NEO::PauseOnGpuProperties::pauseModeAllowed(NEO::debugManager.flags.PauseOnEnqueue.get(), neoDevice->debugExecutionCounter.load(), NEO::PauseOnGpuProperties::PauseMode::BeforeWorkload)) { - commandsToPatch.push_back({0x0, additionalCommands.front(), CommandToPatch::PauseOnEnqueuePipeControlStart}); + commandsToPatch.push_back({0x0, additionalCommands.front(), 0, CommandToPatch::PauseOnEnqueuePipeControlStart}); additionalCommands.pop_front(); - commandsToPatch.push_back({0x0, additionalCommands.front(), CommandToPatch::PauseOnEnqueueSemaphoreStart}); + commandsToPatch.push_back({0x0, additionalCommands.front(), 0, CommandToPatch::PauseOnEnqueueSemaphoreStart}); additionalCommands.pop_front(); } if (NEO::PauseOnGpuProperties::pauseModeAllowed(NEO::debugManager.flags.PauseOnEnqueue.get(), neoDevice->debugExecutionCounter.load(), NEO::PauseOnGpuProperties::PauseMode::AfterWorkload)) { - commandsToPatch.push_back({0x0, additionalCommands.front(), CommandToPatch::PauseOnEnqueuePipeControlEnd}); + commandsToPatch.push_back({0x0, additionalCommands.front(), 0, CommandToPatch::PauseOnEnqueuePipeControlEnd}); additionalCommands.pop_front(); - commandsToPatch.push_back({0x0, additionalCommands.front(), CommandToPatch::PauseOnEnqueueSemaphoreEnd}); + commandsToPatch.push_back({0x0, additionalCommands.front(), 0, CommandToPatch::PauseOnEnqueueSemaphoreEnd}); additionalCommands.pop_front(); } @@ -511,12 +511,12 @@ ze_result_t CommandListCoreFamily::appendLaunchKernelSplit(Kernel } template -void CommandListCoreFamily::appendEventForProfilingAllWalkers(Event *event, void **syncCmdBuffer, bool beforeWalker, bool singlePacketEvent, bool skipAddingEventToResidency) { +void CommandListCoreFamily::appendEventForProfilingAllWalkers(Event *event, void **syncCmdBuffer, CommandToPatchContainer *outTimeStampSyncCmds, bool beforeWalker, bool singlePacketEvent, bool skipAddingEventToResidency) { if (isCopyOnly() || singlePacketEvent) { if (beforeWalker) { - appendEventForProfiling(event, true, false); + appendEventForProfiling(event, outTimeStampSyncCmds, true, false, skipAddingEventToResidency); } else { - appendSignalEventPostWalker(event, syncCmdBuffer, false, skipAddingEventToResidency); + appendSignalEventPostWalker(event, syncCmdBuffer, outTimeStampSyncCmds, false, skipAddingEventToResidency); } } else { if (event) { diff --git a/level_zero/core/source/cmdlist/cmdlist_launch_params.h b/level_zero/core/source/cmdlist/cmdlist_launch_params.h index 0a02a78cfd..b35c2cef6a 100644 --- a/level_zero/core/source/cmdlist/cmdlist_launch_params.h +++ b/level_zero/core/source/cmdlist/cmdlist_launch_params.h @@ -9,6 +9,7 @@ #include "shared/source/helpers/definitions/command_encoder_args.h" +#include #include #include @@ -24,17 +25,21 @@ struct CommandToPatch { ComputeWalker, SignalEventPostSyncPipeControl, WaitEventSemaphoreWait, + TimestampEventPostSyncStoreRegMem, Invalid }; void *pDestination = nullptr; void *pCommand = nullptr; + size_t offset = 0; CommandType type = Invalid; }; +using CommandToPatchContainer = std::vector; + struct CmdListKernelLaunchParams { void *outWalker = nullptr; CommandToPatch *outSyncCommand = nullptr; - std::vector *outListCommands = nullptr; + CommandToPatchContainer *outListCommands = nullptr; NEO::RequiredPartitionDim requiredPartitionDim = NEO::RequiredPartitionDim::none; NEO::RequiredDispatchWalkOrder requiredDispatchWalkOrder = NEO::RequiredDispatchWalkOrder::none; uint32_t additionalSizeParam = NEO::additionalKernelLaunchSizeParamNotSet; diff --git a/level_zero/core/source/device/bcs_split.h b/level_zero/core/source/device/bcs_split.h index 18e985c276..1f1b977822 100644 --- a/level_zero/core/source/device/bcs_split.h +++ b/level_zero/core/source/device/bcs_split.h @@ -109,7 +109,7 @@ struct BcsSplit { cmdList->addEventsToCmdList(numWaitEvents, phWaitEvents, hasRelaxedOrderingDependencies, false, true); if (signalEvent && i == 0u) { - cmdList->appendEventForProfilingAllWalkers(signalEvent, nullptr, true, true, false); + cmdList->appendEventForProfilingAllWalkers(signalEvent, nullptr, nullptr, true, true, false); } auto localSize = totalSize / engineCount; @@ -137,9 +137,9 @@ struct BcsSplit { cmdList->addEventsToCmdList(static_cast(cmdQsForSplit.size()), eventHandles.data(), hasRelaxedOrderingDependencies, false, true); if (signalEvent) { - cmdList->appendEventForProfilingAllWalkers(signalEvent, nullptr, false, true, false); + cmdList->appendEventForProfilingAllWalkers(signalEvent, nullptr, nullptr, false, true, false); } - cmdList->appendEventForProfilingAllWalkers(this->events.marker[markerEventIndex], nullptr, false, true, false); + cmdList->appendEventForProfilingAllWalkers(this->events.marker[markerEventIndex], nullptr, nullptr, false, true, false); if (cmdList->isInOrderExecutionEnabled()) { cmdList->appendSignalInOrderDependencyCounter(signalEvent); diff --git a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_1.cpp b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_1.cpp index 36989c5931..ed34bff83f 100644 --- a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_1.cpp +++ b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_1.cpp @@ -2945,7 +2945,7 @@ HWTEST2_F(CommandListCreate, givenNullEventWhenAppendEventAfterWalkerThenNothing auto usedBefore = commandList->getCmdContainer().getCommandStream()->getUsed(); - commandList->appendSignalEventPostWalker(nullptr, nullptr, false, false); + commandList->appendSignalEventPostWalker(nullptr, nullptr, nullptr, false, false); EXPECT_EQ(commandList->getCmdContainer().getCommandStream()->getUsed(), usedBefore); } diff --git a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_signal_event.cpp b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_signal_event.cpp index ea6adc1feb..cb498094b1 100644 --- a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_signal_event.cpp +++ b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_signal_event.cpp @@ -412,7 +412,7 @@ HWTEST2_F(CommandListAppendUsedPacketSignalEvent, event->signalScope = ZE_EVENT_SCOPE_FLAG_HOST; commandList->partitionCount = packets; - commandList->appendSignalEventPostWalker(event.get(), nullptr, false, false); + commandList->appendSignalEventPostWalker(event.get(), nullptr, nullptr, false, false); EXPECT_EQ(packets, event->getPacketsInUse()); auto gpuAddress = event->getCompletionFieldGpuAddress(device); @@ -472,7 +472,7 @@ HWTEST2_F(CommandListAppendUsedPacketSignalEvent, event->signalScope = ZE_EVENT_SCOPE_FLAG_HOST; commandList->partitionCount = packets; - commandList->appendSignalEventPostWalker(event.get(), nullptr, false, false); + commandList->appendSignalEventPostWalker(event.get(), nullptr, nullptr, false, false); EXPECT_EQ(packets, event->getPacketsInUse()); auto gpuAddress = event->getCompletionFieldGpuAddress(device); @@ -645,7 +645,7 @@ HWTEST2_F(CommandListAppendUsedPacketSignalEvent, event->setEventTimestampFlag(false); - commandList->appendSignalEventPostWalker(event.get(), nullptr, false, false); + commandList->appendSignalEventPostWalker(event.get(), nullptr, nullptr, false, false); size_t usedAfterSize = cmdStream->getUsed(); GenCmdList cmdList; diff --git a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_xehp_and_later.cpp b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_xehp_and_later.cpp index 114938c759..da46074bf0 100644 --- a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_xehp_and_later.cpp +++ b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_xehp_and_later.cpp @@ -865,7 +865,7 @@ struct CommandListSignalAllEventPacketFixture : public ModuleFixture { commandList->setupTimestampEventForMultiTile(event.get()); size_t sizeBefore = cmdStream->getUsed(); - commandList->appendEventForProfiling(event.get(), false, false); + commandList->appendEventForProfiling(event.get(), nullptr, false, false, false); size_t sizeAfter = cmdStream->getUsed(); EXPECT_EQ(ZE_RESULT_SUCCESS, result); @@ -950,7 +950,7 @@ struct CommandListSignalAllEventPacketFixture : public ModuleFixture { commandList->setupTimestampEventForMultiTile(event.get()); size_t sizeBefore = cmdStream->getUsed(); - commandList->appendSignalEventPostWalker(event.get(), nullptr, false, false); + commandList->appendSignalEventPostWalker(event.get(), nullptr, nullptr, false, false); size_t sizeAfter = cmdStream->getUsed(); EXPECT_EQ(ZE_RESULT_SUCCESS, result); @@ -2426,7 +2426,7 @@ HWTEST2_F(CommandListCreate, givenAppendSignalEventWhenSkipAddToResidencyTrueThe auto commandStreamOffset = commandContainer.getCommandStream()->getUsed(); bool skipAdd = true; - commandList->appendEventForProfilingAllWalkers(event.get(), &pipeControlBuffer, false, true, skipAdd); + commandList->appendEventForProfilingAllWalkers(event.get(), &pipeControlBuffer, nullptr, false, true, skipAdd); auto eventAllocIt = std::find(residencyContainer.begin(), residencyContainer.end(), eventAllocation); EXPECT_EQ(residencyContainer.end(), eventAllocIt); @@ -2453,7 +2453,7 @@ HWTEST2_F(CommandListCreate, givenAppendSignalEventWhenSkipAddToResidencyTrueThe commandStreamOffset = commandContainer.getCommandStream()->getUsed(); skipAdd = false; - commandList->appendEventForProfilingAllWalkers(event.get(), &pipeControlBuffer, false, true, skipAdd); + commandList->appendEventForProfilingAllWalkers(event.get(), &pipeControlBuffer, nullptr, false, true, skipAdd); eventAllocIt = std::find(residencyContainer.begin(), residencyContainer.end(), eventAllocation); EXPECT_NE(residencyContainer.end(), eventAllocIt); @@ -2477,8 +2477,75 @@ HWTEST2_F(CommandListCreate, givenAppendSignalEventWhenSkipAddToResidencyTrueThe ASSERT_EQ(postSyncPipeControl, pipeControlBuffer); } +HWTEST2_F(CommandListCreate, + givenAppendTimestampSignalEventWhenSkipAddToResidencyTrueAndOutRegMemListProvidedThenAllocationNotAddedToResidencyAndStoreRegMemCmdsStored, + IsAtLeastXeHpCore) { + using MI_STORE_REGISTER_MEM = typename FamilyType::MI_STORE_REGISTER_MEM; + + auto commandList = std::make_unique>>(); + auto result = commandList->initialize(device, NEO::EngineGroupType::compute, 0u); + ASSERT_EQ(ZE_RESULT_SUCCESS, result); + auto &commandContainer = commandList->getCmdContainer(); + + ze_event_pool_desc_t eventPoolDesc = {}; + eventPoolDesc.count = 1; + eventPoolDesc.flags = ZE_EVENT_POOL_FLAG_KERNEL_TIMESTAMP; + + ze_event_desc_t eventDesc = {}; + eventDesc.index = 0; + eventDesc.signal = 0; + + auto eventPool = std::unique_ptr(L0::EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, result)); + EXPECT_EQ(ZE_RESULT_SUCCESS, result); + auto event = std::unique_ptr(L0::Event::create(eventPool.get(), &eventDesc, device)); + ASSERT_NE(nullptr, event.get()); + + auto &residencyContainer = commandContainer.getResidencyContainer(); + auto eventAllocation = event->getPoolAllocation(device); + auto eventBaseAddress = event->getGpuAddress(device); + + CommandToPatchContainer outStoreRegMemCmdList; + + auto commandStreamOffset = commandContainer.getCommandStream()->getUsed(); + bool skipAdd = true; + + bool before = true; + commandList->appendEventForProfilingAllWalkers(event.get(), nullptr, &outStoreRegMemCmdList, before, true, skipAdd); + before = false; + commandList->appendEventForProfilingAllWalkers(event.get(), nullptr, &outStoreRegMemCmdList, before, true, skipAdd); + + auto eventAllocIt = std::find(residencyContainer.begin(), residencyContainer.end(), eventAllocation); + EXPECT_EQ(residencyContainer.end(), eventAllocIt); + + GenCmdList cmdList; + ASSERT_TRUE(FamilyType::Parse::parseCommandBuffer( + cmdList, + ptrOffset(commandContainer.getCommandStream()->getCpuBase(), commandStreamOffset), + commandContainer.getCommandStream()->getUsed() - commandStreamOffset)); + + auto storeRegMemList = findAll(cmdList.begin(), cmdList.end()); + ASSERT_NE(0u, storeRegMemList.size()); + ASSERT_NE(0u, outStoreRegMemCmdList.size()); + + ASSERT_EQ(storeRegMemList.size(), outStoreRegMemCmdList.size()); + + for (size_t i = 0; i < storeRegMemList.size(); i++) { + MI_STORE_REGISTER_MEM *storeRegMem = genCmdCast(*storeRegMemList[i]); + + auto &cmdToPatch = outStoreRegMemCmdList[i]; + EXPECT_EQ(CommandToPatch::TimestampEventPostSyncStoreRegMem, cmdToPatch.type); + MI_STORE_REGISTER_MEM *outStoreRegMem = genCmdCast(cmdToPatch.pDestination); + ASSERT_NE(nullptr, outStoreRegMem); + + EXPECT_EQ(storeRegMem, outStoreRegMem); + + auto cmdAddress = eventBaseAddress + cmdToPatch.offset; + EXPECT_EQ(cmdAddress, outStoreRegMem->getMemoryAddress()); + } +} + HWTEST2_F(CommandListAppendLaunchKernel, - givenL3EventCompationPlatformWhenAppendKernelWithSignalScopeEventAndCmdPatchListProvidedThenDispatchSignalPostSyncCmdAndStoreInPatchList, + givenL3EventCompactionPlatformWhenAppendKernelWithSignalScopeEventAndCmdPatchListProvidedThenDispatchSignalPostSyncCmdAndStoreInPatchList, IsAtLeastXeHpCore) { using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; using POST_SYNC_OPERATION = typename PIPE_CONTROL::POST_SYNC_OPERATION; @@ -2538,5 +2605,72 @@ HWTEST2_F(CommandListAppendLaunchKernel, EXPECT_EQ(postSyncPipeControl, signalCmd.pDestination); } +HWTEST2_F(CommandListAppendLaunchKernel, + givenL3EventCompactionPlatformWhenAppendKernelWithTimestampSignalScopeEventAndCmdPatchListProvidedThenDispatchSignalPostSyncCmdAndStoreInPatchList, + IsAtLeastXeHpCore) { + using MI_STORE_REGISTER_MEM = typename FamilyType::MI_STORE_REGISTER_MEM; + + Mock<::L0::KernelImp> kernel; + auto mockModule = std::unique_ptr(new Mock(device, nullptr)); + kernel.module = mockModule.get(); + + auto commandList = std::make_unique>>(); + auto result = commandList->initialize(device, NEO::EngineGroupType::compute, 0u); + ASSERT_EQ(ZE_RESULT_SUCCESS, result); + commandList->dcFlushSupport = true; + commandList->compactL3FlushEventPacket = true; + + auto &commandContainer = commandList->getCmdContainer(); + + ze_event_pool_desc_t eventPoolDesc = {}; + eventPoolDesc.count = 1; + eventPoolDesc.flags = ZE_EVENT_POOL_FLAG_KERNEL_TIMESTAMP; + + ze_event_desc_t eventDesc = {}; + eventDesc.index = 0; + eventDesc.signal = ZE_EVENT_SCOPE_FLAG_HOST; + + auto eventPool = std::unique_ptr(L0::EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, result)); + EXPECT_EQ(ZE_RESULT_SUCCESS, result); + auto event = std::unique_ptr(L0::Event::create(eventPool.get(), &eventDesc, device)); + ASSERT_NE(nullptr, event.get()); + + auto eventBaseAddress = event->getGpuAddress(device); + + ze_group_count_t groupCount{1, 1, 1}; + CmdListKernelLaunchParams launchParams = {}; + CommandToPatchContainer outStoreRegMemCmdList; + launchParams.outListCommands = &outStoreRegMemCmdList; + auto commandStreamOffset = commandContainer.getCommandStream()->getUsed(); + result = commandList->appendLaunchKernel(kernel.toHandle(), groupCount, event->toHandle(), 0, nullptr, launchParams, false); + ASSERT_EQ(ZE_RESULT_SUCCESS, result); + + GenCmdList cmdList; + ASSERT_TRUE(FamilyType::Parse::parseCommandBuffer( + cmdList, + ptrOffset(commandContainer.getCommandStream()->getCpuBase(), commandStreamOffset), + commandContainer.getCommandStream()->getUsed() - commandStreamOffset)); + + auto storeRegMemList = findAll(cmdList.begin(), cmdList.end()); + ASSERT_NE(0u, storeRegMemList.size()); + ASSERT_NE(0u, outStoreRegMemCmdList.size()); + + ASSERT_EQ(storeRegMemList.size(), outStoreRegMemCmdList.size()); + + for (size_t i = 0; i < storeRegMemList.size(); i++) { + MI_STORE_REGISTER_MEM *storeRegMem = genCmdCast(*storeRegMemList[i]); + + auto &cmdToPatch = outStoreRegMemCmdList[i]; + EXPECT_EQ(CommandToPatch::TimestampEventPostSyncStoreRegMem, cmdToPatch.type); + MI_STORE_REGISTER_MEM *outStoreRegMem = genCmdCast(cmdToPatch.pDestination); + ASSERT_NE(nullptr, outStoreRegMem); + + EXPECT_EQ(storeRegMem, outStoreRegMem); + + auto cmdAddress = eventBaseAddress + cmdToPatch.offset; + EXPECT_EQ(cmdAddress, outStoreRegMem->getMemoryAddress()); + } +} + } // namespace ult } // namespace L0 diff --git a/level_zero/core/test/unit_tests/xe_hpc_core/pvc/test_cmdlist_pvc.cpp b/level_zero/core/test/unit_tests/xe_hpc_core/pvc/test_cmdlist_pvc.cpp index 53fac7ce21..11a0772630 100644 --- a/level_zero/core/test/unit_tests/xe_hpc_core/pvc/test_cmdlist_pvc.cpp +++ b/level_zero/core/test/unit_tests/xe_hpc_core/pvc/test_cmdlist_pvc.cpp @@ -1,5 +1,5 @@ /* - * Copyright (C) 2022-2023 Intel Corporation + * Copyright (C) 2022-2024 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -60,7 +60,7 @@ PVCTEST_F(CommandListEventFenceTestsPvc, givenCommandListWithProfilingEventAfter auto eventPool = std::unique_ptr(L0::EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, result)); EXPECT_EQ(ZE_RESULT_SUCCESS, result); auto event = std::unique_ptr(L0::Event::create(eventPool.get(), &eventDesc, device)); - commandList->appendEventForProfiling(event.get(), false, false); + commandList->appendEventForProfiling(event.get(), nullptr, false, false, false); GenCmdList cmdList; ASSERT_TRUE(FamilyType::Parse::parseCommandBuffer( diff --git a/level_zero/core/test/unit_tests/xe_hpc_core/test_cmdlist_xe_hpc_core.cpp b/level_zero/core/test/unit_tests/xe_hpc_core/test_cmdlist_xe_hpc_core.cpp index c55ba91e92..84f9aa12ad 100644 --- a/level_zero/core/test/unit_tests/xe_hpc_core/test_cmdlist_xe_hpc_core.cpp +++ b/level_zero/core/test/unit_tests/xe_hpc_core/test_cmdlist_xe_hpc_core.cpp @@ -656,7 +656,7 @@ HWTEST2_F(CommandListEventFenceTestsXeHpcCore, givenCommandListWithProfilingEven auto eventPool = std::unique_ptr(L0::EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, result)); EXPECT_EQ(ZE_RESULT_SUCCESS, result); auto event = std::unique_ptr(L0::Event::create(eventPool.get(), &eventDesc, device)); - commandList->appendEventForProfiling(event.get(), false, false); + commandList->appendEventForProfiling(event.get(), nullptr, false, false, false); GenCmdList cmdList; ASSERT_TRUE(FamilyType::Parse::parseCommandBuffer( @@ -686,7 +686,7 @@ HWTEST2_F(CommandListEventFenceTestsXeHpcCore, givenCommandListWithRegularEventA auto eventPool = std::unique_ptr(L0::EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, result)); EXPECT_EQ(ZE_RESULT_SUCCESS, result); auto event = std::unique_ptr(L0::Event::create(eventPool.get(), &eventDesc, device)); - commandList->appendSignalEventPostWalker(event.get(), nullptr, false, false); + commandList->appendSignalEventPostWalker(event.get(), nullptr, nullptr, false, false); GenCmdList cmdList; ASSERT_TRUE(FamilyType::Parse::parseCommandBuffer( diff --git a/level_zero/core/test/unit_tests/xe_hpg_core/test_cmdlist_xe_hpg_core.cpp b/level_zero/core/test/unit_tests/xe_hpg_core/test_cmdlist_xe_hpg_core.cpp index e624786240..f970b4b95d 100644 --- a/level_zero/core/test/unit_tests/xe_hpg_core/test_cmdlist_xe_hpg_core.cpp +++ b/level_zero/core/test/unit_tests/xe_hpg_core/test_cmdlist_xe_hpg_core.cpp @@ -1,5 +1,5 @@ /* - * Copyright (C) 2021-2023 Intel Corporation + * Copyright (C) 2021-2024 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -92,7 +92,7 @@ HWTEST2_F(CommandListCreate, givenNotCopyCommandListWhenProfilingEventBeforeComm auto eventPool = std::unique_ptr(L0::EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, result)); EXPECT_EQ(ZE_RESULT_SUCCESS, result); auto event = std::unique_ptr(L0::Event::create(eventPool.get(), &eventDesc, device)); - commandList->appendEventForProfiling(event.get(), true, false); + commandList->appendEventForProfiling(event.get(), nullptr, true, false, false); GenCmdList cmdList; ASSERT_TRUE(FamilyType::Parse::parseCommandBuffer( @@ -128,7 +128,7 @@ HWTEST2_F(CommandListCreate, givenNotCopyCommandListWhenProfilingEventAfterComma auto eventPool = std::unique_ptr(L0::EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, result)); EXPECT_EQ(ZE_RESULT_SUCCESS, result); auto event = std::unique_ptr(L0::Event::create(eventPool.get(), &eventDesc, device)); - commandList->appendEventForProfiling(event.get(), false, false); + commandList->appendEventForProfiling(event.get(), nullptr, false, false, false); GenCmdList cmdList; ASSERT_TRUE(FamilyType::Parse::parseCommandBuffer( @@ -163,7 +163,7 @@ HWTEST2_F(CommandListCreate, givenCopyCommandListWhenProfilingEventThenStoreRegC auto eventPool = std::unique_ptr(L0::EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, result)); EXPECT_EQ(ZE_RESULT_SUCCESS, result); auto event = std::unique_ptr(L0::Event::create(eventPool.get(), &eventDesc, device)); - commandList->appendEventForProfiling(event.get(), false, false); + commandList->appendEventForProfiling(event.get(), nullptr, false, false, false); GenCmdList cmdList; ASSERT_TRUE(FamilyType::Parse::parseCommandBuffer( cmdList, ptrOffset(commandList->getCmdContainer().getCommandStream()->getCpuBase(), 0), commandList->getCmdContainer().getCommandStream()->getUsed()));