feature: add timestamp postsync command list argument

Related-To: NEO-10064

Signed-off-by: Zbigniew Zdanowicz <zbigniew.zdanowicz@intel.com>
This commit is contained in:
Zbigniew Zdanowicz
2024-02-26 17:44:07 +00:00
committed by Compute-Runtime-Automation
parent a8fbed6120
commit 8840b6d02f
13 changed files with 250 additions and 84 deletions

View File

@@ -182,7 +182,7 @@ struct CommandListCoreFamily : public CommandListImp {
uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents, bool relaxedOrderingDispatch) override;
void appendMultiPartitionPrologue(uint32_t partitionDataSize) override;
void appendMultiPartitionEpilogue() override;
void appendEventForProfilingAllWalkers(Event *event, void **syncCmdBuffer, bool beforeWalker, bool singlePacketEvent, bool skipAddingEventToResidency);
void appendEventForProfilingAllWalkers(Event *event, void **syncCmdBuffer, CommandToPatchContainer *outTimeStampSyncCmds, bool beforeWalker, bool singlePacketEvent, bool skipAddingEventToResidency);
ze_result_t addEventsToCmdList(uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents, bool relaxedOrderingAllowed, bool trackDependencies, bool waitForImplicitInOrderDependency);
ze_result_t reserveSpace(size_t size, void **ptr) override;
@@ -285,11 +285,11 @@ struct CommandListCoreFamily : public CommandListImp {
ze_result_t setGlobalWorkSizeIndirect(NEO::CrossThreadDataOffset offsets[3], uint64_t crossThreadAddress, uint32_t lws[3]);
ze_result_t programSyncBuffer(Kernel &kernel, NEO::Device &device, const ze_group_count_t &threadGroupDimensions);
void appendWriteKernelTimestamp(Event *event, bool beforeWalker, bool maskLsb, bool workloadPartition);
void adjustWriteKernelTimestamp(uint64_t globalAddress, uint64_t contextAddress, bool maskLsb, uint32_t mask, bool workloadPartition);
void appendEventForProfiling(Event *event, bool beforeWalker, bool skipBarrierForEndProfiling);
void appendWriteKernelTimestamp(Event *event, CommandToPatchContainer *outTimeStampSyncCmds, bool beforeWalker, bool maskLsb, bool workloadPartition);
void adjustWriteKernelTimestamp(uint64_t globalAddress, uint64_t contextAddress, uint64_t baseAddress, CommandToPatchContainer *outTimeStampSyncCmds, bool maskLsb, uint32_t mask, bool workloadPartition);
void appendEventForProfiling(Event *event, CommandToPatchContainer *outTimeStampSyncCmds, bool beforeWalker, bool skipBarrierForEndProfiling, bool skipAddingEventToResidency);
void appendEventForProfilingCopyCommand(Event *event, bool beforeWalker);
void appendSignalEventPostWalker(Event *event, void **syncCmdBuffer, bool skipBarrierForEndProfiling, bool skipAddingEventToResidency);
void appendSignalEventPostWalker(Event *event, void **syncCmdBuffer, CommandToPatchContainer *outTimeStampSyncCmds, bool skipBarrierForEndProfiling, bool skipAddingEventToResidency);
virtual void programStateBaseAddress(NEO::CommandContainer &container, bool useSbaProperties);
void appendComputeBarrierCommand();
NEO::PipeControlArgs createBarrierFlags();

View File

@@ -463,12 +463,12 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelIndirect(ze_
return ZE_RESULT_ERROR_INVALID_ARGUMENT;
}
appendEventForProfiling(event, true, false);
appendEventForProfiling(event, nullptr, true, false, false);
launchParams.isIndirect = true;
ret = appendLaunchKernelWithParams(Kernel::fromHandle(kernelHandle), pDispatchArgumentsBuffer,
nullptr, launchParams);
addToMappedEventList(event);
appendSignalEventPostWalker(event, nullptr, false, false);
appendSignalEventPostWalker(event, nullptr, nullptr, false, false);
handleInOrderDependencyCounter(event, isInOrderNonWalkerSignalingRequired(event));
@@ -503,7 +503,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchMultipleKernelsInd
return ZE_RESULT_ERROR_INVALID_ARGUMENT;
}
appendEventForProfiling(event, true, false);
appendEventForProfiling(event, nullptr, true, false, false);
auto allocData = device->getDriverHandle()->getSvmAllocsManager()->getSVMAlloc(static_cast<const void *>(pNumLaunchArguments));
auto alloc = allocData->gpuAllocations.getGraphicsAllocation(device->getRootDeviceIndex());
commandContainer.addToResidencyContainer(alloc);
@@ -519,7 +519,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchMultipleKernelsInd
}
}
addToMappedEventList(event);
appendSignalEventPostWalker(event, nullptr, false, false);
appendSignalEventPostWalker(event, nullptr, nullptr, false, false);
return ret;
}
@@ -603,9 +603,9 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryRangesBarrier(uint
return ZE_RESULT_ERROR_INVALID_ARGUMENT;
}
appendEventForProfiling(signalEvent, true, false);
appendEventForProfiling(signalEvent, nullptr, true, false, false);
applyMemoryRangesBarrier(numRanges, pRangeSizes, pRanges);
appendSignalEventPostWalker(signalEvent, nullptr, false, false);
appendSignalEventPostWalker(signalEvent, nullptr, nullptr, false, false);
addToMappedEventList(signalEvent);
if (this->isInOrderExecutionEnabled()) {
@@ -1265,7 +1265,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryCopyBlitRegion(Ali
return ZE_RESULT_ERROR_INVALID_ARGUMENT;
}
appendEventForProfiling(signalEvent, true, false);
appendEventForProfiling(signalEvent, nullptr, true, false, false);
auto &rootDeviceEnvironment = device->getNEODevice()->getRootDeviceEnvironmentRef();
bool copyRegionPreferred = NEO::BlitCommandsHelper<GfxFamily>::isCopyRegionPreferred(copySizeModified, rootDeviceEnvironment, blitProperties.isSystemMemoryPoolUsed);
if (copyRegionPreferred) {
@@ -1275,7 +1275,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryCopyBlitRegion(Ali
}
makeResidentDummyAllocation();
appendSignalEventPostWalker(signalEvent, nullptr, false, false);
appendSignalEventPostWalker(signalEvent, nullptr, nullptr, false, false);
return ZE_RESULT_SUCCESS;
}
@@ -1300,11 +1300,11 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendCopyImageBlit(NEO::Graph
commandContainer.addToResidencyContainer(src);
commandContainer.addToResidencyContainer(clearColorAllocation);
appendEventForProfiling(signalEvent, true, false);
appendEventForProfiling(signalEvent, nullptr, true, false, false);
NEO::BlitCommandsHelper<GfxFamily>::dispatchBlitCommandsForImageRegion(blitProperties, *commandContainer.getCommandStream(), *dummyBlitWa.rootDeviceEnvironment);
makeResidentDummyAllocation();
appendSignalEventPostWalker(signalEvent, nullptr, false, false);
appendSignalEventPostWalker(signalEvent, nullptr, nullptr, false, false);
return ZE_RESULT_SUCCESS;
}
@@ -1462,7 +1462,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryCopy(void *dstptr,
launchParams.pipeControlSignalling = (signalEvent && singlePipeControlPacket) || dstAllocationStruct.needsFlush;
appendEventForProfilingAllWalkers(signalEvent, nullptr, true, singlePipeControlPacket, false);
appendEventForProfilingAllWalkers(signalEvent, nullptr, nullptr, true, singlePipeControlPacket, false);
if (isCopyOnly()) {
ret = appendMemoryCopyBlit(dstAllocationStruct.alignedAllocationPtr,
@@ -1527,7 +1527,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryCopy(void *dstptr,
}
}
appendEventForProfilingAllWalkers(signalEvent, nullptr, false, singlePipeControlPacket, false);
appendEventForProfilingAllWalkers(signalEvent, nullptr, nullptr, false, singlePipeControlPacket, false);
addFlushRequiredCommand(dstAllocationStruct.needsFlush, signalEvent);
addToMappedEventList(signalEvent);
@@ -1921,7 +1921,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryFill(void *ptr,
launchParams.isKernelSplitOperation = (fillArguments.leftRemainingBytes > 0 || fillArguments.rightRemainingBytes > 0);
bool singlePipeControlPacket = eventSignalPipeControl(launchParams.isKernelSplitOperation, dcFlush);
appendEventForProfilingAllWalkers(signalEvent, nullptr, true, singlePipeControlPacket, false);
appendEventForProfilingAllWalkers(signalEvent, nullptr, nullptr, true, singlePipeControlPacket, false);
if (fillArguments.leftRemainingBytes > 0) {
launchParams.numKernelsInSplitLaunch++;
@@ -2066,7 +2066,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryFill(void *ptr,
}
addToMappedEventList(signalEvent);
appendEventForProfilingAllWalkers(signalEvent, nullptr, false, singlePipeControlPacket, false);
appendEventForProfilingAllWalkers(signalEvent, nullptr, nullptr, false, singlePipeControlPacket, false);
addFlushRequiredCommand(hostPointerNeedsFlush, signalEvent);
bool nonWalkerInOrderCmdChaining = false;
@@ -2115,7 +2115,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendBlitFill(void *ptr,
}
auto neoDevice = device->getNEODevice();
appendEventForProfiling(signalEvent, true, false);
appendEventForProfiling(signalEvent, nullptr, true, false, false);
NEO::GraphicsAllocation *gpuAllocation = device->getDriverHandle()->getDriverSystemMemoryAllocation(ptr,
size,
neoDevice->getRootDeviceIndex(),
@@ -2143,7 +2143,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendBlitFill(void *ptr,
neoDevice->getRootDeviceEnvironmentRef());
makeResidentDummyAllocation();
appendSignalEventPostWalker(signalEvent, nullptr, false, false);
appendSignalEventPostWalker(signalEvent, nullptr, nullptr, false, false);
if (isInOrderExecutionEnabled()) {
appendSignalInOrderDependencyCounter(signalEvent);
@@ -2154,12 +2154,12 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendBlitFill(void *ptr,
}
template <GFXCORE_FAMILY gfxCoreFamily>
void CommandListCoreFamily<gfxCoreFamily>::appendSignalEventPostWalker(Event *event, void **syncCmdBuffer, bool skipBarrierForEndProfiling, bool skipAddingEventToResidency) {
void CommandListCoreFamily<gfxCoreFamily>::appendSignalEventPostWalker(Event *event, void **syncCmdBuffer, CommandToPatchContainer *outTimeStampSyncCmds, bool skipBarrierForEndProfiling, bool skipAddingEventToResidency) {
if (event == nullptr || !event->getPoolAllocation(this->device)) {
return;
}
if (event->isEventTimestampFlagSet()) {
appendEventForProfiling(event, false, skipBarrierForEndProfiling);
appendEventForProfiling(event, outTimeStampSyncCmds, false, skipBarrierForEndProfiling, skipAddingEventToResidency);
} else {
event->resetKernelCountAndPacketUsedCount();
if (!skipAddingEventToResidency) {
@@ -2186,7 +2186,7 @@ void CommandListCoreFamily<gfxCoreFamily>::appendEventForProfilingCopyCommand(Ev
makeResidentDummyAllocation();
dispatchEventPostSyncOperation(event, nullptr, Event::STATE_SIGNALED, true, false, false, false);
}
appendWriteKernelTimestamp(event, beforeWalker, false, false);
appendWriteKernelTimestamp(event, nullptr, beforeWalker, false, false);
}
template <GFXCORE_FAMILY gfxCoreFamily>
@@ -2600,29 +2600,54 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::programSyncBuffer(Kernel &kern
}
template <GFXCORE_FAMILY gfxCoreFamily>
void CommandListCoreFamily<gfxCoreFamily>::appendWriteKernelTimestamp(Event *event, bool beforeWalker, bool maskLsb, bool workloadPartition) {
void CommandListCoreFamily<gfxCoreFamily>::appendWriteKernelTimestamp(Event *event, CommandToPatchContainer *outTimeStampSyncCmds, bool beforeWalker, bool maskLsb, bool workloadPartition) {
constexpr uint32_t mask = 0xfffffffe;
auto baseAddr = event->getPacketAddress(this->device);
auto contextOffset = beforeWalker ? event->getContextStartOffset() : event->getContextEndOffset();
auto globalOffset = beforeWalker ? event->getGlobalStartOffset() : event->getGlobalEndOffset();
auto contextOffset = beforeWalker ? event->getContextStartOffset() : event->getContextEndOffset();
void **globalPostSyncCmdBuffer = nullptr;
void **contextPostSyncCmdBuffer = nullptr;
void *globalPostSyncCmd = nullptr;
void *contextPostSyncCmd = nullptr;
if (outTimeStampSyncCmds != nullptr) {
globalPostSyncCmdBuffer = &globalPostSyncCmd;
contextPostSyncCmdBuffer = &contextPostSyncCmd;
}
uint64_t globalAddress = ptrOffset(baseAddr, globalOffset);
uint64_t contextAddress = ptrOffset(baseAddr, contextOffset);
if (maskLsb) {
NEO::EncodeMathMMIO<GfxFamily>::encodeBitwiseAndVal(commandContainer, RegisterOffsets::globalTimestampLdw, mask, globalAddress, workloadPartition, nullptr);
NEO::EncodeMathMMIO<GfxFamily>::encodeBitwiseAndVal(commandContainer, RegisterOffsets::gpThreadTimeRegAddressOffsetLow, mask, contextAddress, workloadPartition, nullptr);
NEO::EncodeMathMMIO<GfxFamily>::encodeBitwiseAndVal(commandContainer, RegisterOffsets::globalTimestampLdw, mask, globalAddress, workloadPartition, globalPostSyncCmdBuffer);
NEO::EncodeMathMMIO<GfxFamily>::encodeBitwiseAndVal(commandContainer, RegisterOffsets::gpThreadTimeRegAddressOffsetLow, mask, contextAddress, workloadPartition, contextPostSyncCmdBuffer);
} else {
NEO::EncodeStoreMMIO<GfxFamily>::encode(*commandContainer.getCommandStream(), RegisterOffsets::globalTimestampLdw, globalAddress, workloadPartition, nullptr);
NEO::EncodeStoreMMIO<GfxFamily>::encode(*commandContainer.getCommandStream(), RegisterOffsets::gpThreadTimeRegAddressOffsetLow, contextAddress, workloadPartition, nullptr);
NEO::EncodeStoreMMIO<GfxFamily>::encode(*commandContainer.getCommandStream(), RegisterOffsets::globalTimestampLdw, globalAddress, workloadPartition, globalPostSyncCmdBuffer);
NEO::EncodeStoreMMIO<GfxFamily>::encode(*commandContainer.getCommandStream(), RegisterOffsets::gpThreadTimeRegAddressOffsetLow, contextAddress, workloadPartition, contextPostSyncCmdBuffer);
}
adjustWriteKernelTimestamp(globalAddress, contextAddress, maskLsb, mask, workloadPartition);
if (outTimeStampSyncCmds != nullptr) {
CommandToPatch ctxCmd;
ctxCmd.type = CommandToPatch::TimestampEventPostSyncStoreRegMem;
ctxCmd.offset = globalOffset;
ctxCmd.pDestination = globalPostSyncCmd;
outTimeStampSyncCmds->push_back(ctxCmd);
ctxCmd.offset = contextOffset;
ctxCmd.pDestination = contextPostSyncCmd;
outTimeStampSyncCmds->push_back(ctxCmd);
}
adjustWriteKernelTimestamp(globalAddress, contextAddress, baseAddr, outTimeStampSyncCmds, maskLsb, mask, workloadPartition);
}
template <GFXCORE_FAMILY gfxCoreFamily>
void CommandListCoreFamily<gfxCoreFamily>::appendEventForProfiling(Event *event, bool beforeWalker, bool skipBarrierForEndProfiling) {
void CommandListCoreFamily<gfxCoreFamily>::appendEventForProfiling(Event *event, CommandToPatchContainer *outTimeStampSyncCmds, bool beforeWalker, bool skipBarrierForEndProfiling, bool skipAddingEventToResidency) {
if (!event) {
return;
}
@@ -2634,7 +2659,9 @@ void CommandListCoreFamily<gfxCoreFamily>::appendEventForProfiling(Event *event,
return;
}
commandContainer.addToResidencyContainer(event->getPoolAllocation(this->device));
if (!skipAddingEventToResidency) {
commandContainer.addToResidencyContainer(event->getPoolAllocation(this->device));
}
bool workloadPartition = isTimestampEventForMultiTile(event);
appendDispatchOffsetRegister(workloadPartition, true);
@@ -2642,7 +2669,7 @@ void CommandListCoreFamily<gfxCoreFamily>::appendEventForProfiling(Event *event,
if (beforeWalker) {
event->resetKernelCountAndPacketUsedCount();
bool workloadPartition = setupTimestampEventForMultiTile(event);
appendWriteKernelTimestamp(event, beforeWalker, true, workloadPartition);
appendWriteKernelTimestamp(event, outTimeStampSyncCmds, beforeWalker, true, workloadPartition);
} else {
dispatchEventPostSyncOperation(event, nullptr, Event::STATE_SIGNALED, true, false, false, true);
@@ -2658,7 +2685,7 @@ void CommandListCoreFamily<gfxCoreFamily>::appendEventForProfiling(Event *event,
uint64_t baseAddr = event->getGpuAddress(this->device);
NEO::MemorySynchronizationCommands<GfxFamily>::addAdditionalSynchronization(*commandContainer.getCommandStream(), baseAddr, false, rootDeviceEnvironment);
appendWriteKernelTimestamp(event, beforeWalker, true, workloadPartition);
appendWriteKernelTimestamp(event, outTimeStampSyncCmds, beforeWalker, true, workloadPartition);
}
appendDispatchOffsetRegister(workloadPartition, false);
@@ -2684,7 +2711,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendWriteGlobalTimestamp(
return ZE_RESULT_ERROR_INVALID_ARGUMENT;
}
appendEventForProfiling(signalEvent, true, false);
appendEventForProfiling(signalEvent, nullptr, true, false, false);
auto allocationStruct = getAlignedAllocationData(this->device, dstptr, sizeof(uint64_t), false);
if (allocationStruct.alloc == nullptr) {
@@ -2715,7 +2742,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendWriteGlobalTimestamp(
args);
}
appendSignalEventPostWalker(signalEvent, nullptr, false, false);
appendSignalEventPostWalker(signalEvent, nullptr, nullptr, false, false);
if (this->isInOrderExecutionEnabled()) {
appendSignalInOrderDependencyCounter(signalEvent);
@@ -3027,7 +3054,7 @@ void CommandListCoreFamily<gfxCoreFamily>::updateStreamPropertiesForRegularComma
auto frontEndStateAddress = NEO::PreambleHelper<GfxFamily>::getSpaceForVfeState(commandContainer.getCommandStream(), device->getHwInfo(), engineGroupType);
auto frontEndStateCmd = new FrontEndStateCommand;
NEO::PreambleHelper<GfxFamily>::programVfeState(frontEndStateCmd, rootDeviceEnvironment, 0, 0, device->getMaxNumHwThreads(), finalStreamState);
commandsToPatch.push_back({frontEndStateAddress, frontEndStateCmd, CommandToPatch::FrontEndState});
commandsToPatch.push_back({frontEndStateAddress, frontEndStateCmd, 0, CommandToPatch::FrontEndState});
}
if (this->frontEndStateTracking && !this->dispatchCmdListBatchBufferAsPrimary) {
auto &stream = *commandContainer.getCommandStream();
@@ -3236,7 +3263,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendBarrier(ze_event_handle_
return ZE_RESULT_ERROR_INVALID_ARGUMENT;
}
appendEventForProfiling(signalEvent, true, false);
appendEventForProfiling(signalEvent, nullptr, true, false, false);
if (!this->isInOrderExecutionEnabled()) {
if (isCopyOnly()) {
@@ -3259,7 +3286,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendBarrier(ze_event_handle_
}
addToMappedEventList(signalEvent);
appendSignalEventPostWalker(signalEvent, nullptr, this->isInOrderExecutionEnabled(), false);
appendSignalEventPostWalker(signalEvent, nullptr, nullptr, this->isInOrderExecutionEnabled(), false);
if (isInOrderExecutionEnabled()) {
appendSignalInOrderDependencyCounter(signalEvent);
@@ -3392,7 +3419,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendWaitOnMemory(void *desc,
return ZE_RESULT_ERROR_INVALID_ARGUMENT;
}
appendEventForProfiling(signalEvent, true, false);
appendEventForProfiling(signalEvent, nullptr, true, false, false);
commandContainer.addToResidencyContainer(srcAllocationStruct.alloc);
uint64_t gpuAddress = static_cast<uint64_t>(srcAllocationStruct.alignedAllocationPtr);
@@ -3424,7 +3451,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendWaitOnMemory(void *desc,
NEO::MemorySynchronizationCommands<GfxFamily>::addAdditionalSynchronization(*commandContainer.getCommandStream(), gpuAddress, true, rootDeviceEnvironment);
}
appendSignalEventPostWalker(signalEvent, nullptr, false, false);
appendSignalEventPostWalker(signalEvent, nullptr, nullptr, false, false);
if (this->isInOrderExecutionEnabled()) {
appendSignalInOrderDependencyCounter(signalEvent);

View File

@@ -1,5 +1,5 @@
/*
* Copyright (C) 2023 Intel Corporation
* Copyright (C) 2023-2024 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@@ -16,6 +16,6 @@ inline NEO::PreemptionMode CommandListCoreFamily<gfxCoreFamily>::obtainKernelPre
}
template <GFXCORE_FAMILY gfxCoreFamily>
void CommandListCoreFamily<gfxCoreFamily>::adjustWriteKernelTimestamp(uint64_t globalAddress, uint64_t contextAddress, bool maskLsb, uint32_t mask, bool workloadPartition) {}
void CommandListCoreFamily<gfxCoreFamily>::adjustWriteKernelTimestamp(uint64_t globalAddress, uint64_t contextAddress, uint64_t baseAddress, CommandToPatchContainer *outTimeStampSyncCmds, bool maskLsb, uint32_t mask, bool workloadPartition) {}
} // namespace L0

View File

@@ -38,7 +38,7 @@ size_t CommandListCoreFamily<gfxCoreFamily>::getReserveSshSize() {
}
template <GFXCORE_FAMILY gfxCoreFamily>
void CommandListCoreFamily<gfxCoreFamily>::adjustWriteKernelTimestamp(uint64_t globalAddress, uint64_t contextAddress, bool maskLsb, uint32_t mask, bool workloadPartition) {}
void CommandListCoreFamily<gfxCoreFamily>::adjustWriteKernelTimestamp(uint64_t globalAddress, uint64_t contextAddress, uint64_t baseAddress, CommandToPatchContainer *outTimeStampSyncCmds, bool maskLsb, uint32_t mask, bool workloadPartition) {}
template <GFXCORE_FAMILY gfxCoreFamily>
bool CommandListCoreFamily<gfxCoreFamily>::isInOrderNonWalkerSignalingRequired(const Event *event) const {
@@ -108,7 +108,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelWithParams(K
dsh = dshReserveArgs.indirectHeapReservation;
}
appendEventForProfiling(event, true, false);
appendEventForProfiling(event, nullptr, true, false, false);
auto perThreadScratchSize = std::max<std::uint32_t>(this->getCommandListPerThreadScratchSize(0u),
kernel->getImmutableData()->getDescriptor().kernelAttributes.perThreadScratchSize[0]);
this->setCommandListPerThreadScratchSize(0u, perThreadScratchSize);
@@ -247,7 +247,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelWithParams(K
*reinterpret_cast<typename GfxFamily::RENDER_SURFACE_STATE *>(surfaceStateSpace) = surfaceState;
}
appendSignalEventPostWalker(event, nullptr, false, false);
appendSignalEventPostWalker(event, nullptr, nullptr, false, false);
commandContainer.addToResidencyContainer(kernelImmutableData->getIsaGraphicsAllocation());
auto &residencyContainer = kernel->getResidencyContainer();
@@ -264,16 +264,16 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelWithParams(K
}
if (NEO::PauseOnGpuProperties::pauseModeAllowed(NEO::debugManager.flags.PauseOnEnqueue.get(), neoDevice->debugExecutionCounter.load(), NEO::PauseOnGpuProperties::PauseMode::BeforeWorkload)) {
commandsToPatch.push_back({0x0, additionalCommands.front(), CommandToPatch::PauseOnEnqueuePipeControlStart});
commandsToPatch.push_back({0x0, additionalCommands.front(), 0, CommandToPatch::PauseOnEnqueuePipeControlStart});
additionalCommands.pop_front();
commandsToPatch.push_back({0x0, additionalCommands.front(), CommandToPatch::PauseOnEnqueueSemaphoreStart});
commandsToPatch.push_back({0x0, additionalCommands.front(), 0, CommandToPatch::PauseOnEnqueueSemaphoreStart});
additionalCommands.pop_front();
}
if (NEO::PauseOnGpuProperties::pauseModeAllowed(NEO::debugManager.flags.PauseOnEnqueue.get(), neoDevice->debugExecutionCounter.load(), NEO::PauseOnGpuProperties::PauseMode::AfterWorkload)) {
commandsToPatch.push_back({0x0, additionalCommands.front(), CommandToPatch::PauseOnEnqueuePipeControlEnd});
commandsToPatch.push_back({0x0, additionalCommands.front(), 0, CommandToPatch::PauseOnEnqueuePipeControlEnd});
additionalCommands.pop_front();
commandsToPatch.push_back({0x0, additionalCommands.front(), CommandToPatch::PauseOnEnqueueSemaphoreEnd});
commandsToPatch.push_back({0x0, additionalCommands.front(), 0, CommandToPatch::PauseOnEnqueueSemaphoreEnd});
additionalCommands.pop_front();
}
@@ -329,11 +329,11 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelSplit(Kernel
}
template <GFXCORE_FAMILY gfxCoreFamily>
void CommandListCoreFamily<gfxCoreFamily>::appendEventForProfilingAllWalkers(Event *event, void **syncCmdBuffer, bool beforeWalker, bool singlePacketEvent, bool skipAddingEventToResidency) {
void CommandListCoreFamily<gfxCoreFamily>::appendEventForProfilingAllWalkers(Event *event, void **syncCmdBuffer, CommandToPatchContainer *outTimeStampSyncCmds, bool beforeWalker, bool singlePacketEvent, bool skipAddingEventToResidency) {
if (beforeWalker) {
appendEventForProfiling(event, true, false);
appendEventForProfiling(event, outTimeStampSyncCmds, true, false, skipAddingEventToResidency);
} else {
appendSignalEventPostWalker(event, syncCmdBuffer, false, skipAddingEventToResidency);
appendSignalEventPostWalker(event, syncCmdBuffer, outTimeStampSyncCmds, false, skipAddingEventToResidency);
}
}

View File

@@ -286,7 +286,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelWithParams(K
std::list<void *> additionalCommands;
if (compactEvent) {
appendEventForProfilingAllWalkers(compactEvent, nullptr, true, true, false);
appendEventForProfilingAllWalkers(compactEvent, nullptr, launchParams.outListCommands, true, true, launchParams.omitAddingEventResidency);
}
bool inOrderExecSignalRequired = (this->isInOrderExecutionEnabled() && !launchParams.isKernelSplitOperation && !launchParams.pipeControlSignalling);
@@ -353,7 +353,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelWithParams(K
launchParams.outSyncCommand->type = CommandToPatch::SignalEventPostSyncPipeControl;
syncCmdBuffer = &launchParams.outSyncCommand->pDestination;
}
appendEventForProfilingAllWalkers(compactEvent, syncCmdBuffer, false, true, launchParams.omitAddingEventResidency);
appendEventForProfilingAllWalkers(compactEvent, syncCmdBuffer, launchParams.outListCommands, false, true, launchParams.omitAddingEventResidency);
} else if (event) {
event->setPacketsInUse(partitionCount);
if (l3FlushEnable) {
@@ -426,16 +426,16 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelWithParams(K
}
if (NEO::PauseOnGpuProperties::pauseModeAllowed(NEO::debugManager.flags.PauseOnEnqueue.get(), neoDevice->debugExecutionCounter.load(), NEO::PauseOnGpuProperties::PauseMode::BeforeWorkload)) {
commandsToPatch.push_back({0x0, additionalCommands.front(), CommandToPatch::PauseOnEnqueuePipeControlStart});
commandsToPatch.push_back({0x0, additionalCommands.front(), 0, CommandToPatch::PauseOnEnqueuePipeControlStart});
additionalCommands.pop_front();
commandsToPatch.push_back({0x0, additionalCommands.front(), CommandToPatch::PauseOnEnqueueSemaphoreStart});
commandsToPatch.push_back({0x0, additionalCommands.front(), 0, CommandToPatch::PauseOnEnqueueSemaphoreStart});
additionalCommands.pop_front();
}
if (NEO::PauseOnGpuProperties::pauseModeAllowed(NEO::debugManager.flags.PauseOnEnqueue.get(), neoDevice->debugExecutionCounter.load(), NEO::PauseOnGpuProperties::PauseMode::AfterWorkload)) {
commandsToPatch.push_back({0x0, additionalCommands.front(), CommandToPatch::PauseOnEnqueuePipeControlEnd});
commandsToPatch.push_back({0x0, additionalCommands.front(), 0, CommandToPatch::PauseOnEnqueuePipeControlEnd});
additionalCommands.pop_front();
commandsToPatch.push_back({0x0, additionalCommands.front(), CommandToPatch::PauseOnEnqueueSemaphoreEnd});
commandsToPatch.push_back({0x0, additionalCommands.front(), 0, CommandToPatch::PauseOnEnqueueSemaphoreEnd});
additionalCommands.pop_front();
}
@@ -511,12 +511,12 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelSplit(Kernel
}
template <GFXCORE_FAMILY gfxCoreFamily>
void CommandListCoreFamily<gfxCoreFamily>::appendEventForProfilingAllWalkers(Event *event, void **syncCmdBuffer, bool beforeWalker, bool singlePacketEvent, bool skipAddingEventToResidency) {
void CommandListCoreFamily<gfxCoreFamily>::appendEventForProfilingAllWalkers(Event *event, void **syncCmdBuffer, CommandToPatchContainer *outTimeStampSyncCmds, bool beforeWalker, bool singlePacketEvent, bool skipAddingEventToResidency) {
if (isCopyOnly() || singlePacketEvent) {
if (beforeWalker) {
appendEventForProfiling(event, true, false);
appendEventForProfiling(event, outTimeStampSyncCmds, true, false, skipAddingEventToResidency);
} else {
appendSignalEventPostWalker(event, syncCmdBuffer, false, skipAddingEventToResidency);
appendSignalEventPostWalker(event, syncCmdBuffer, outTimeStampSyncCmds, false, skipAddingEventToResidency);
}
} else {
if (event) {

View File

@@ -9,6 +9,7 @@
#include "shared/source/helpers/definitions/command_encoder_args.h"
#include <cstddef>
#include <cstdint>
#include <vector>
@@ -24,17 +25,21 @@ struct CommandToPatch {
ComputeWalker,
SignalEventPostSyncPipeControl,
WaitEventSemaphoreWait,
TimestampEventPostSyncStoreRegMem,
Invalid
};
void *pDestination = nullptr;
void *pCommand = nullptr;
size_t offset = 0;
CommandType type = Invalid;
};
using CommandToPatchContainer = std::vector<CommandToPatch>;
struct CmdListKernelLaunchParams {
void *outWalker = nullptr;
CommandToPatch *outSyncCommand = nullptr;
std::vector<CommandToPatch> *outListCommands = nullptr;
CommandToPatchContainer *outListCommands = nullptr;
NEO::RequiredPartitionDim requiredPartitionDim = NEO::RequiredPartitionDim::none;
NEO::RequiredDispatchWalkOrder requiredDispatchWalkOrder = NEO::RequiredDispatchWalkOrder::none;
uint32_t additionalSizeParam = NEO::additionalKernelLaunchSizeParamNotSet;

View File

@@ -109,7 +109,7 @@ struct BcsSplit {
cmdList->addEventsToCmdList(numWaitEvents, phWaitEvents, hasRelaxedOrderingDependencies, false, true);
if (signalEvent && i == 0u) {
cmdList->appendEventForProfilingAllWalkers(signalEvent, nullptr, true, true, false);
cmdList->appendEventForProfilingAllWalkers(signalEvent, nullptr, nullptr, true, true, false);
}
auto localSize = totalSize / engineCount;
@@ -137,9 +137,9 @@ struct BcsSplit {
cmdList->addEventsToCmdList(static_cast<uint32_t>(cmdQsForSplit.size()), eventHandles.data(), hasRelaxedOrderingDependencies, false, true);
if (signalEvent) {
cmdList->appendEventForProfilingAllWalkers(signalEvent, nullptr, false, true, false);
cmdList->appendEventForProfilingAllWalkers(signalEvent, nullptr, nullptr, false, true, false);
}
cmdList->appendEventForProfilingAllWalkers(this->events.marker[markerEventIndex], nullptr, false, true, false);
cmdList->appendEventForProfilingAllWalkers(this->events.marker[markerEventIndex], nullptr, nullptr, false, true, false);
if (cmdList->isInOrderExecutionEnabled()) {
cmdList->appendSignalInOrderDependencyCounter(signalEvent);

View File

@@ -2945,7 +2945,7 @@ HWTEST2_F(CommandListCreate, givenNullEventWhenAppendEventAfterWalkerThenNothing
auto usedBefore = commandList->getCmdContainer().getCommandStream()->getUsed();
commandList->appendSignalEventPostWalker(nullptr, nullptr, false, false);
commandList->appendSignalEventPostWalker(nullptr, nullptr, nullptr, false, false);
EXPECT_EQ(commandList->getCmdContainer().getCommandStream()->getUsed(), usedBefore);
}

View File

@@ -412,7 +412,7 @@ HWTEST2_F(CommandListAppendUsedPacketSignalEvent,
event->signalScope = ZE_EVENT_SCOPE_FLAG_HOST;
commandList->partitionCount = packets;
commandList->appendSignalEventPostWalker(event.get(), nullptr, false, false);
commandList->appendSignalEventPostWalker(event.get(), nullptr, nullptr, false, false);
EXPECT_EQ(packets, event->getPacketsInUse());
auto gpuAddress = event->getCompletionFieldGpuAddress(device);
@@ -472,7 +472,7 @@ HWTEST2_F(CommandListAppendUsedPacketSignalEvent,
event->signalScope = ZE_EVENT_SCOPE_FLAG_HOST;
commandList->partitionCount = packets;
commandList->appendSignalEventPostWalker(event.get(), nullptr, false, false);
commandList->appendSignalEventPostWalker(event.get(), nullptr, nullptr, false, false);
EXPECT_EQ(packets, event->getPacketsInUse());
auto gpuAddress = event->getCompletionFieldGpuAddress(device);
@@ -645,7 +645,7 @@ HWTEST2_F(CommandListAppendUsedPacketSignalEvent,
event->setEventTimestampFlag(false);
commandList->appendSignalEventPostWalker(event.get(), nullptr, false, false);
commandList->appendSignalEventPostWalker(event.get(), nullptr, nullptr, false, false);
size_t usedAfterSize = cmdStream->getUsed();
GenCmdList cmdList;

View File

@@ -865,7 +865,7 @@ struct CommandListSignalAllEventPacketFixture : public ModuleFixture {
commandList->setupTimestampEventForMultiTile(event.get());
size_t sizeBefore = cmdStream->getUsed();
commandList->appendEventForProfiling(event.get(), false, false);
commandList->appendEventForProfiling(event.get(), nullptr, false, false, false);
size_t sizeAfter = cmdStream->getUsed();
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
@@ -950,7 +950,7 @@ struct CommandListSignalAllEventPacketFixture : public ModuleFixture {
commandList->setupTimestampEventForMultiTile(event.get());
size_t sizeBefore = cmdStream->getUsed();
commandList->appendSignalEventPostWalker(event.get(), nullptr, false, false);
commandList->appendSignalEventPostWalker(event.get(), nullptr, nullptr, false, false);
size_t sizeAfter = cmdStream->getUsed();
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
@@ -2426,7 +2426,7 @@ HWTEST2_F(CommandListCreate, givenAppendSignalEventWhenSkipAddToResidencyTrueThe
auto commandStreamOffset = commandContainer.getCommandStream()->getUsed();
bool skipAdd = true;
commandList->appendEventForProfilingAllWalkers(event.get(), &pipeControlBuffer, false, true, skipAdd);
commandList->appendEventForProfilingAllWalkers(event.get(), &pipeControlBuffer, nullptr, false, true, skipAdd);
auto eventAllocIt = std::find(residencyContainer.begin(), residencyContainer.end(), eventAllocation);
EXPECT_EQ(residencyContainer.end(), eventAllocIt);
@@ -2453,7 +2453,7 @@ HWTEST2_F(CommandListCreate, givenAppendSignalEventWhenSkipAddToResidencyTrueThe
commandStreamOffset = commandContainer.getCommandStream()->getUsed();
skipAdd = false;
commandList->appendEventForProfilingAllWalkers(event.get(), &pipeControlBuffer, false, true, skipAdd);
commandList->appendEventForProfilingAllWalkers(event.get(), &pipeControlBuffer, nullptr, false, true, skipAdd);
eventAllocIt = std::find(residencyContainer.begin(), residencyContainer.end(), eventAllocation);
EXPECT_NE(residencyContainer.end(), eventAllocIt);
@@ -2477,8 +2477,75 @@ HWTEST2_F(CommandListCreate, givenAppendSignalEventWhenSkipAddToResidencyTrueThe
ASSERT_EQ(postSyncPipeControl, pipeControlBuffer);
}
HWTEST2_F(CommandListCreate,
givenAppendTimestampSignalEventWhenSkipAddToResidencyTrueAndOutRegMemListProvidedThenAllocationNotAddedToResidencyAndStoreRegMemCmdsStored,
IsAtLeastXeHpCore) {
using MI_STORE_REGISTER_MEM = typename FamilyType::MI_STORE_REGISTER_MEM;
auto commandList = std::make_unique<WhiteBox<::L0::CommandListCoreFamily<gfxCoreFamily>>>();
auto result = commandList->initialize(device, NEO::EngineGroupType::compute, 0u);
ASSERT_EQ(ZE_RESULT_SUCCESS, result);
auto &commandContainer = commandList->getCmdContainer();
ze_event_pool_desc_t eventPoolDesc = {};
eventPoolDesc.count = 1;
eventPoolDesc.flags = ZE_EVENT_POOL_FLAG_KERNEL_TIMESTAMP;
ze_event_desc_t eventDesc = {};
eventDesc.index = 0;
eventDesc.signal = 0;
auto eventPool = std::unique_ptr<L0::EventPool>(L0::EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, result));
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
auto event = std::unique_ptr<L0::Event>(L0::Event::create<typename FamilyType::TimestampPacketType>(eventPool.get(), &eventDesc, device));
ASSERT_NE(nullptr, event.get());
auto &residencyContainer = commandContainer.getResidencyContainer();
auto eventAllocation = event->getPoolAllocation(device);
auto eventBaseAddress = event->getGpuAddress(device);
CommandToPatchContainer outStoreRegMemCmdList;
auto commandStreamOffset = commandContainer.getCommandStream()->getUsed();
bool skipAdd = true;
bool before = true;
commandList->appendEventForProfilingAllWalkers(event.get(), nullptr, &outStoreRegMemCmdList, before, true, skipAdd);
before = false;
commandList->appendEventForProfilingAllWalkers(event.get(), nullptr, &outStoreRegMemCmdList, before, true, skipAdd);
auto eventAllocIt = std::find(residencyContainer.begin(), residencyContainer.end(), eventAllocation);
EXPECT_EQ(residencyContainer.end(), eventAllocIt);
GenCmdList cmdList;
ASSERT_TRUE(FamilyType::Parse::parseCommandBuffer(
cmdList,
ptrOffset(commandContainer.getCommandStream()->getCpuBase(), commandStreamOffset),
commandContainer.getCommandStream()->getUsed() - commandStreamOffset));
auto storeRegMemList = findAll<MI_STORE_REGISTER_MEM *>(cmdList.begin(), cmdList.end());
ASSERT_NE(0u, storeRegMemList.size());
ASSERT_NE(0u, outStoreRegMemCmdList.size());
ASSERT_EQ(storeRegMemList.size(), outStoreRegMemCmdList.size());
for (size_t i = 0; i < storeRegMemList.size(); i++) {
MI_STORE_REGISTER_MEM *storeRegMem = genCmdCast<MI_STORE_REGISTER_MEM *>(*storeRegMemList[i]);
auto &cmdToPatch = outStoreRegMemCmdList[i];
EXPECT_EQ(CommandToPatch::TimestampEventPostSyncStoreRegMem, cmdToPatch.type);
MI_STORE_REGISTER_MEM *outStoreRegMem = genCmdCast<MI_STORE_REGISTER_MEM *>(cmdToPatch.pDestination);
ASSERT_NE(nullptr, outStoreRegMem);
EXPECT_EQ(storeRegMem, outStoreRegMem);
auto cmdAddress = eventBaseAddress + cmdToPatch.offset;
EXPECT_EQ(cmdAddress, outStoreRegMem->getMemoryAddress());
}
}
HWTEST2_F(CommandListAppendLaunchKernel,
givenL3EventCompationPlatformWhenAppendKernelWithSignalScopeEventAndCmdPatchListProvidedThenDispatchSignalPostSyncCmdAndStoreInPatchList,
givenL3EventCompactionPlatformWhenAppendKernelWithSignalScopeEventAndCmdPatchListProvidedThenDispatchSignalPostSyncCmdAndStoreInPatchList,
IsAtLeastXeHpCore) {
using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL;
using POST_SYNC_OPERATION = typename PIPE_CONTROL::POST_SYNC_OPERATION;
@@ -2538,5 +2605,72 @@ HWTEST2_F(CommandListAppendLaunchKernel,
EXPECT_EQ(postSyncPipeControl, signalCmd.pDestination);
}
HWTEST2_F(CommandListAppendLaunchKernel,
givenL3EventCompactionPlatformWhenAppendKernelWithTimestampSignalScopeEventAndCmdPatchListProvidedThenDispatchSignalPostSyncCmdAndStoreInPatchList,
IsAtLeastXeHpCore) {
using MI_STORE_REGISTER_MEM = typename FamilyType::MI_STORE_REGISTER_MEM;
Mock<::L0::KernelImp> kernel;
auto mockModule = std::unique_ptr<Module>(new Mock<Module>(device, nullptr));
kernel.module = mockModule.get();
auto commandList = std::make_unique<WhiteBox<::L0::CommandListCoreFamily<gfxCoreFamily>>>();
auto result = commandList->initialize(device, NEO::EngineGroupType::compute, 0u);
ASSERT_EQ(ZE_RESULT_SUCCESS, result);
commandList->dcFlushSupport = true;
commandList->compactL3FlushEventPacket = true;
auto &commandContainer = commandList->getCmdContainer();
ze_event_pool_desc_t eventPoolDesc = {};
eventPoolDesc.count = 1;
eventPoolDesc.flags = ZE_EVENT_POOL_FLAG_KERNEL_TIMESTAMP;
ze_event_desc_t eventDesc = {};
eventDesc.index = 0;
eventDesc.signal = ZE_EVENT_SCOPE_FLAG_HOST;
auto eventPool = std::unique_ptr<L0::EventPool>(L0::EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, result));
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
auto event = std::unique_ptr<L0::Event>(L0::Event::create<typename FamilyType::TimestampPacketType>(eventPool.get(), &eventDesc, device));
ASSERT_NE(nullptr, event.get());
auto eventBaseAddress = event->getGpuAddress(device);
ze_group_count_t groupCount{1, 1, 1};
CmdListKernelLaunchParams launchParams = {};
CommandToPatchContainer outStoreRegMemCmdList;
launchParams.outListCommands = &outStoreRegMemCmdList;
auto commandStreamOffset = commandContainer.getCommandStream()->getUsed();
result = commandList->appendLaunchKernel(kernel.toHandle(), groupCount, event->toHandle(), 0, nullptr, launchParams, false);
ASSERT_EQ(ZE_RESULT_SUCCESS, result);
GenCmdList cmdList;
ASSERT_TRUE(FamilyType::Parse::parseCommandBuffer(
cmdList,
ptrOffset(commandContainer.getCommandStream()->getCpuBase(), commandStreamOffset),
commandContainer.getCommandStream()->getUsed() - commandStreamOffset));
auto storeRegMemList = findAll<MI_STORE_REGISTER_MEM *>(cmdList.begin(), cmdList.end());
ASSERT_NE(0u, storeRegMemList.size());
ASSERT_NE(0u, outStoreRegMemCmdList.size());
ASSERT_EQ(storeRegMemList.size(), outStoreRegMemCmdList.size());
for (size_t i = 0; i < storeRegMemList.size(); i++) {
MI_STORE_REGISTER_MEM *storeRegMem = genCmdCast<MI_STORE_REGISTER_MEM *>(*storeRegMemList[i]);
auto &cmdToPatch = outStoreRegMemCmdList[i];
EXPECT_EQ(CommandToPatch::TimestampEventPostSyncStoreRegMem, cmdToPatch.type);
MI_STORE_REGISTER_MEM *outStoreRegMem = genCmdCast<MI_STORE_REGISTER_MEM *>(cmdToPatch.pDestination);
ASSERT_NE(nullptr, outStoreRegMem);
EXPECT_EQ(storeRegMem, outStoreRegMem);
auto cmdAddress = eventBaseAddress + cmdToPatch.offset;
EXPECT_EQ(cmdAddress, outStoreRegMem->getMemoryAddress());
}
}
} // namespace ult
} // namespace L0

View File

@@ -1,5 +1,5 @@
/*
* Copyright (C) 2022-2023 Intel Corporation
* Copyright (C) 2022-2024 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@@ -60,7 +60,7 @@ PVCTEST_F(CommandListEventFenceTestsPvc, givenCommandListWithProfilingEventAfter
auto eventPool = std::unique_ptr<L0::EventPool>(L0::EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, result));
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
auto event = std::unique_ptr<L0::Event>(L0::Event::create<typename FamilyType::TimestampPacketType>(eventPool.get(), &eventDesc, device));
commandList->appendEventForProfiling(event.get(), false, false);
commandList->appendEventForProfiling(event.get(), nullptr, false, false, false);
GenCmdList cmdList;
ASSERT_TRUE(FamilyType::Parse::parseCommandBuffer(

View File

@@ -656,7 +656,7 @@ HWTEST2_F(CommandListEventFenceTestsXeHpcCore, givenCommandListWithProfilingEven
auto eventPool = std::unique_ptr<L0::EventPool>(L0::EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, result));
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
auto event = std::unique_ptr<L0::Event>(L0::Event::create<typename FamilyType::TimestampPacketType>(eventPool.get(), &eventDesc, device));
commandList->appendEventForProfiling(event.get(), false, false);
commandList->appendEventForProfiling(event.get(), nullptr, false, false, false);
GenCmdList cmdList;
ASSERT_TRUE(FamilyType::Parse::parseCommandBuffer(
@@ -686,7 +686,7 @@ HWTEST2_F(CommandListEventFenceTestsXeHpcCore, givenCommandListWithRegularEventA
auto eventPool = std::unique_ptr<L0::EventPool>(L0::EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, result));
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
auto event = std::unique_ptr<L0::Event>(L0::Event::create<typename FamilyType::TimestampPacketType>(eventPool.get(), &eventDesc, device));
commandList->appendSignalEventPostWalker(event.get(), nullptr, false, false);
commandList->appendSignalEventPostWalker(event.get(), nullptr, nullptr, false, false);
GenCmdList cmdList;
ASSERT_TRUE(FamilyType::Parse::parseCommandBuffer(

View File

@@ -1,5 +1,5 @@
/*
* Copyright (C) 2021-2023 Intel Corporation
* Copyright (C) 2021-2024 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@@ -92,7 +92,7 @@ HWTEST2_F(CommandListCreate, givenNotCopyCommandListWhenProfilingEventBeforeComm
auto eventPool = std::unique_ptr<L0::EventPool>(L0::EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, result));
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
auto event = std::unique_ptr<L0::Event>(L0::Event::create<typename FamilyType::TimestampPacketType>(eventPool.get(), &eventDesc, device));
commandList->appendEventForProfiling(event.get(), true, false);
commandList->appendEventForProfiling(event.get(), nullptr, true, false, false);
GenCmdList cmdList;
ASSERT_TRUE(FamilyType::Parse::parseCommandBuffer(
@@ -128,7 +128,7 @@ HWTEST2_F(CommandListCreate, givenNotCopyCommandListWhenProfilingEventAfterComma
auto eventPool = std::unique_ptr<L0::EventPool>(L0::EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, result));
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
auto event = std::unique_ptr<L0::Event>(L0::Event::create<typename FamilyType::TimestampPacketType>(eventPool.get(), &eventDesc, device));
commandList->appendEventForProfiling(event.get(), false, false);
commandList->appendEventForProfiling(event.get(), nullptr, false, false, false);
GenCmdList cmdList;
ASSERT_TRUE(FamilyType::Parse::parseCommandBuffer(
@@ -163,7 +163,7 @@ HWTEST2_F(CommandListCreate, givenCopyCommandListWhenProfilingEventThenStoreRegC
auto eventPool = std::unique_ptr<L0::EventPool>(L0::EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, result));
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
auto event = std::unique_ptr<L0::Event>(L0::Event::create<typename FamilyType::TimestampPacketType>(eventPool.get(), &eventDesc, device));
commandList->appendEventForProfiling(event.get(), false, false);
commandList->appendEventForProfiling(event.get(), nullptr, false, false, false);
GenCmdList cmdList;
ASSERT_TRUE(FamilyType::Parse::parseCommandBuffer(
cmdList, ptrOffset(commandList->getCmdContainer().getCommandStream()->getCpuBase(), 0), commandList->getCmdContainer().getCommandStream()->getUsed()));