mirror of
https://github.com/intel/compute-runtime.git
synced 2026-01-03 14:55:24 +08:00
feature: add timestamp postsync command list argument
Related-To: NEO-10064 Signed-off-by: Zbigniew Zdanowicz <zbigniew.zdanowicz@intel.com>
This commit is contained in:
committed by
Compute-Runtime-Automation
parent
a8fbed6120
commit
8840b6d02f
@@ -182,7 +182,7 @@ struct CommandListCoreFamily : public CommandListImp {
|
||||
uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents, bool relaxedOrderingDispatch) override;
|
||||
void appendMultiPartitionPrologue(uint32_t partitionDataSize) override;
|
||||
void appendMultiPartitionEpilogue() override;
|
||||
void appendEventForProfilingAllWalkers(Event *event, void **syncCmdBuffer, bool beforeWalker, bool singlePacketEvent, bool skipAddingEventToResidency);
|
||||
void appendEventForProfilingAllWalkers(Event *event, void **syncCmdBuffer, CommandToPatchContainer *outTimeStampSyncCmds, bool beforeWalker, bool singlePacketEvent, bool skipAddingEventToResidency);
|
||||
ze_result_t addEventsToCmdList(uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents, bool relaxedOrderingAllowed, bool trackDependencies, bool waitForImplicitInOrderDependency);
|
||||
|
||||
ze_result_t reserveSpace(size_t size, void **ptr) override;
|
||||
@@ -285,11 +285,11 @@ struct CommandListCoreFamily : public CommandListImp {
|
||||
|
||||
ze_result_t setGlobalWorkSizeIndirect(NEO::CrossThreadDataOffset offsets[3], uint64_t crossThreadAddress, uint32_t lws[3]);
|
||||
ze_result_t programSyncBuffer(Kernel &kernel, NEO::Device &device, const ze_group_count_t &threadGroupDimensions);
|
||||
void appendWriteKernelTimestamp(Event *event, bool beforeWalker, bool maskLsb, bool workloadPartition);
|
||||
void adjustWriteKernelTimestamp(uint64_t globalAddress, uint64_t contextAddress, bool maskLsb, uint32_t mask, bool workloadPartition);
|
||||
void appendEventForProfiling(Event *event, bool beforeWalker, bool skipBarrierForEndProfiling);
|
||||
void appendWriteKernelTimestamp(Event *event, CommandToPatchContainer *outTimeStampSyncCmds, bool beforeWalker, bool maskLsb, bool workloadPartition);
|
||||
void adjustWriteKernelTimestamp(uint64_t globalAddress, uint64_t contextAddress, uint64_t baseAddress, CommandToPatchContainer *outTimeStampSyncCmds, bool maskLsb, uint32_t mask, bool workloadPartition);
|
||||
void appendEventForProfiling(Event *event, CommandToPatchContainer *outTimeStampSyncCmds, bool beforeWalker, bool skipBarrierForEndProfiling, bool skipAddingEventToResidency);
|
||||
void appendEventForProfilingCopyCommand(Event *event, bool beforeWalker);
|
||||
void appendSignalEventPostWalker(Event *event, void **syncCmdBuffer, bool skipBarrierForEndProfiling, bool skipAddingEventToResidency);
|
||||
void appendSignalEventPostWalker(Event *event, void **syncCmdBuffer, CommandToPatchContainer *outTimeStampSyncCmds, bool skipBarrierForEndProfiling, bool skipAddingEventToResidency);
|
||||
virtual void programStateBaseAddress(NEO::CommandContainer &container, bool useSbaProperties);
|
||||
void appendComputeBarrierCommand();
|
||||
NEO::PipeControlArgs createBarrierFlags();
|
||||
|
||||
@@ -463,12 +463,12 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelIndirect(ze_
|
||||
return ZE_RESULT_ERROR_INVALID_ARGUMENT;
|
||||
}
|
||||
|
||||
appendEventForProfiling(event, true, false);
|
||||
appendEventForProfiling(event, nullptr, true, false, false);
|
||||
launchParams.isIndirect = true;
|
||||
ret = appendLaunchKernelWithParams(Kernel::fromHandle(kernelHandle), pDispatchArgumentsBuffer,
|
||||
nullptr, launchParams);
|
||||
addToMappedEventList(event);
|
||||
appendSignalEventPostWalker(event, nullptr, false, false);
|
||||
appendSignalEventPostWalker(event, nullptr, nullptr, false, false);
|
||||
|
||||
handleInOrderDependencyCounter(event, isInOrderNonWalkerSignalingRequired(event));
|
||||
|
||||
@@ -503,7 +503,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchMultipleKernelsInd
|
||||
return ZE_RESULT_ERROR_INVALID_ARGUMENT;
|
||||
}
|
||||
|
||||
appendEventForProfiling(event, true, false);
|
||||
appendEventForProfiling(event, nullptr, true, false, false);
|
||||
auto allocData = device->getDriverHandle()->getSvmAllocsManager()->getSVMAlloc(static_cast<const void *>(pNumLaunchArguments));
|
||||
auto alloc = allocData->gpuAllocations.getGraphicsAllocation(device->getRootDeviceIndex());
|
||||
commandContainer.addToResidencyContainer(alloc);
|
||||
@@ -519,7 +519,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchMultipleKernelsInd
|
||||
}
|
||||
}
|
||||
addToMappedEventList(event);
|
||||
appendSignalEventPostWalker(event, nullptr, false, false);
|
||||
appendSignalEventPostWalker(event, nullptr, nullptr, false, false);
|
||||
|
||||
return ret;
|
||||
}
|
||||
@@ -603,9 +603,9 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryRangesBarrier(uint
|
||||
return ZE_RESULT_ERROR_INVALID_ARGUMENT;
|
||||
}
|
||||
|
||||
appendEventForProfiling(signalEvent, true, false);
|
||||
appendEventForProfiling(signalEvent, nullptr, true, false, false);
|
||||
applyMemoryRangesBarrier(numRanges, pRangeSizes, pRanges);
|
||||
appendSignalEventPostWalker(signalEvent, nullptr, false, false);
|
||||
appendSignalEventPostWalker(signalEvent, nullptr, nullptr, false, false);
|
||||
addToMappedEventList(signalEvent);
|
||||
|
||||
if (this->isInOrderExecutionEnabled()) {
|
||||
@@ -1265,7 +1265,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryCopyBlitRegion(Ali
|
||||
return ZE_RESULT_ERROR_INVALID_ARGUMENT;
|
||||
}
|
||||
|
||||
appendEventForProfiling(signalEvent, true, false);
|
||||
appendEventForProfiling(signalEvent, nullptr, true, false, false);
|
||||
auto &rootDeviceEnvironment = device->getNEODevice()->getRootDeviceEnvironmentRef();
|
||||
bool copyRegionPreferred = NEO::BlitCommandsHelper<GfxFamily>::isCopyRegionPreferred(copySizeModified, rootDeviceEnvironment, blitProperties.isSystemMemoryPoolUsed);
|
||||
if (copyRegionPreferred) {
|
||||
@@ -1275,7 +1275,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryCopyBlitRegion(Ali
|
||||
}
|
||||
makeResidentDummyAllocation();
|
||||
|
||||
appendSignalEventPostWalker(signalEvent, nullptr, false, false);
|
||||
appendSignalEventPostWalker(signalEvent, nullptr, nullptr, false, false);
|
||||
return ZE_RESULT_SUCCESS;
|
||||
}
|
||||
|
||||
@@ -1300,11 +1300,11 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendCopyImageBlit(NEO::Graph
|
||||
commandContainer.addToResidencyContainer(src);
|
||||
commandContainer.addToResidencyContainer(clearColorAllocation);
|
||||
|
||||
appendEventForProfiling(signalEvent, true, false);
|
||||
appendEventForProfiling(signalEvent, nullptr, true, false, false);
|
||||
NEO::BlitCommandsHelper<GfxFamily>::dispatchBlitCommandsForImageRegion(blitProperties, *commandContainer.getCommandStream(), *dummyBlitWa.rootDeviceEnvironment);
|
||||
makeResidentDummyAllocation();
|
||||
|
||||
appendSignalEventPostWalker(signalEvent, nullptr, false, false);
|
||||
appendSignalEventPostWalker(signalEvent, nullptr, nullptr, false, false);
|
||||
return ZE_RESULT_SUCCESS;
|
||||
}
|
||||
|
||||
@@ -1462,7 +1462,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryCopy(void *dstptr,
|
||||
|
||||
launchParams.pipeControlSignalling = (signalEvent && singlePipeControlPacket) || dstAllocationStruct.needsFlush;
|
||||
|
||||
appendEventForProfilingAllWalkers(signalEvent, nullptr, true, singlePipeControlPacket, false);
|
||||
appendEventForProfilingAllWalkers(signalEvent, nullptr, nullptr, true, singlePipeControlPacket, false);
|
||||
|
||||
if (isCopyOnly()) {
|
||||
ret = appendMemoryCopyBlit(dstAllocationStruct.alignedAllocationPtr,
|
||||
@@ -1527,7 +1527,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryCopy(void *dstptr,
|
||||
}
|
||||
}
|
||||
|
||||
appendEventForProfilingAllWalkers(signalEvent, nullptr, false, singlePipeControlPacket, false);
|
||||
appendEventForProfilingAllWalkers(signalEvent, nullptr, nullptr, false, singlePipeControlPacket, false);
|
||||
addFlushRequiredCommand(dstAllocationStruct.needsFlush, signalEvent);
|
||||
addToMappedEventList(signalEvent);
|
||||
|
||||
@@ -1921,7 +1921,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryFill(void *ptr,
|
||||
launchParams.isKernelSplitOperation = (fillArguments.leftRemainingBytes > 0 || fillArguments.rightRemainingBytes > 0);
|
||||
bool singlePipeControlPacket = eventSignalPipeControl(launchParams.isKernelSplitOperation, dcFlush);
|
||||
|
||||
appendEventForProfilingAllWalkers(signalEvent, nullptr, true, singlePipeControlPacket, false);
|
||||
appendEventForProfilingAllWalkers(signalEvent, nullptr, nullptr, true, singlePipeControlPacket, false);
|
||||
|
||||
if (fillArguments.leftRemainingBytes > 0) {
|
||||
launchParams.numKernelsInSplitLaunch++;
|
||||
@@ -2066,7 +2066,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryFill(void *ptr,
|
||||
}
|
||||
|
||||
addToMappedEventList(signalEvent);
|
||||
appendEventForProfilingAllWalkers(signalEvent, nullptr, false, singlePipeControlPacket, false);
|
||||
appendEventForProfilingAllWalkers(signalEvent, nullptr, nullptr, false, singlePipeControlPacket, false);
|
||||
addFlushRequiredCommand(hostPointerNeedsFlush, signalEvent);
|
||||
|
||||
bool nonWalkerInOrderCmdChaining = false;
|
||||
@@ -2115,7 +2115,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendBlitFill(void *ptr,
|
||||
}
|
||||
|
||||
auto neoDevice = device->getNEODevice();
|
||||
appendEventForProfiling(signalEvent, true, false);
|
||||
appendEventForProfiling(signalEvent, nullptr, true, false, false);
|
||||
NEO::GraphicsAllocation *gpuAllocation = device->getDriverHandle()->getDriverSystemMemoryAllocation(ptr,
|
||||
size,
|
||||
neoDevice->getRootDeviceIndex(),
|
||||
@@ -2143,7 +2143,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendBlitFill(void *ptr,
|
||||
neoDevice->getRootDeviceEnvironmentRef());
|
||||
makeResidentDummyAllocation();
|
||||
|
||||
appendSignalEventPostWalker(signalEvent, nullptr, false, false);
|
||||
appendSignalEventPostWalker(signalEvent, nullptr, nullptr, false, false);
|
||||
|
||||
if (isInOrderExecutionEnabled()) {
|
||||
appendSignalInOrderDependencyCounter(signalEvent);
|
||||
@@ -2154,12 +2154,12 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendBlitFill(void *ptr,
|
||||
}
|
||||
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
void CommandListCoreFamily<gfxCoreFamily>::appendSignalEventPostWalker(Event *event, void **syncCmdBuffer, bool skipBarrierForEndProfiling, bool skipAddingEventToResidency) {
|
||||
void CommandListCoreFamily<gfxCoreFamily>::appendSignalEventPostWalker(Event *event, void **syncCmdBuffer, CommandToPatchContainer *outTimeStampSyncCmds, bool skipBarrierForEndProfiling, bool skipAddingEventToResidency) {
|
||||
if (event == nullptr || !event->getPoolAllocation(this->device)) {
|
||||
return;
|
||||
}
|
||||
if (event->isEventTimestampFlagSet()) {
|
||||
appendEventForProfiling(event, false, skipBarrierForEndProfiling);
|
||||
appendEventForProfiling(event, outTimeStampSyncCmds, false, skipBarrierForEndProfiling, skipAddingEventToResidency);
|
||||
} else {
|
||||
event->resetKernelCountAndPacketUsedCount();
|
||||
if (!skipAddingEventToResidency) {
|
||||
@@ -2186,7 +2186,7 @@ void CommandListCoreFamily<gfxCoreFamily>::appendEventForProfilingCopyCommand(Ev
|
||||
makeResidentDummyAllocation();
|
||||
dispatchEventPostSyncOperation(event, nullptr, Event::STATE_SIGNALED, true, false, false, false);
|
||||
}
|
||||
appendWriteKernelTimestamp(event, beforeWalker, false, false);
|
||||
appendWriteKernelTimestamp(event, nullptr, beforeWalker, false, false);
|
||||
}
|
||||
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
@@ -2600,29 +2600,54 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::programSyncBuffer(Kernel &kern
|
||||
}
|
||||
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
void CommandListCoreFamily<gfxCoreFamily>::appendWriteKernelTimestamp(Event *event, bool beforeWalker, bool maskLsb, bool workloadPartition) {
|
||||
void CommandListCoreFamily<gfxCoreFamily>::appendWriteKernelTimestamp(Event *event, CommandToPatchContainer *outTimeStampSyncCmds, bool beforeWalker, bool maskLsb, bool workloadPartition) {
|
||||
constexpr uint32_t mask = 0xfffffffe;
|
||||
|
||||
auto baseAddr = event->getPacketAddress(this->device);
|
||||
auto contextOffset = beforeWalker ? event->getContextStartOffset() : event->getContextEndOffset();
|
||||
|
||||
auto globalOffset = beforeWalker ? event->getGlobalStartOffset() : event->getGlobalEndOffset();
|
||||
auto contextOffset = beforeWalker ? event->getContextStartOffset() : event->getContextEndOffset();
|
||||
|
||||
void **globalPostSyncCmdBuffer = nullptr;
|
||||
void **contextPostSyncCmdBuffer = nullptr;
|
||||
|
||||
void *globalPostSyncCmd = nullptr;
|
||||
void *contextPostSyncCmd = nullptr;
|
||||
|
||||
if (outTimeStampSyncCmds != nullptr) {
|
||||
globalPostSyncCmdBuffer = &globalPostSyncCmd;
|
||||
contextPostSyncCmdBuffer = &contextPostSyncCmd;
|
||||
}
|
||||
|
||||
uint64_t globalAddress = ptrOffset(baseAddr, globalOffset);
|
||||
uint64_t contextAddress = ptrOffset(baseAddr, contextOffset);
|
||||
|
||||
if (maskLsb) {
|
||||
NEO::EncodeMathMMIO<GfxFamily>::encodeBitwiseAndVal(commandContainer, RegisterOffsets::globalTimestampLdw, mask, globalAddress, workloadPartition, nullptr);
|
||||
NEO::EncodeMathMMIO<GfxFamily>::encodeBitwiseAndVal(commandContainer, RegisterOffsets::gpThreadTimeRegAddressOffsetLow, mask, contextAddress, workloadPartition, nullptr);
|
||||
NEO::EncodeMathMMIO<GfxFamily>::encodeBitwiseAndVal(commandContainer, RegisterOffsets::globalTimestampLdw, mask, globalAddress, workloadPartition, globalPostSyncCmdBuffer);
|
||||
NEO::EncodeMathMMIO<GfxFamily>::encodeBitwiseAndVal(commandContainer, RegisterOffsets::gpThreadTimeRegAddressOffsetLow, mask, contextAddress, workloadPartition, contextPostSyncCmdBuffer);
|
||||
} else {
|
||||
NEO::EncodeStoreMMIO<GfxFamily>::encode(*commandContainer.getCommandStream(), RegisterOffsets::globalTimestampLdw, globalAddress, workloadPartition, nullptr);
|
||||
NEO::EncodeStoreMMIO<GfxFamily>::encode(*commandContainer.getCommandStream(), RegisterOffsets::gpThreadTimeRegAddressOffsetLow, contextAddress, workloadPartition, nullptr);
|
||||
NEO::EncodeStoreMMIO<GfxFamily>::encode(*commandContainer.getCommandStream(), RegisterOffsets::globalTimestampLdw, globalAddress, workloadPartition, globalPostSyncCmdBuffer);
|
||||
NEO::EncodeStoreMMIO<GfxFamily>::encode(*commandContainer.getCommandStream(), RegisterOffsets::gpThreadTimeRegAddressOffsetLow, contextAddress, workloadPartition, contextPostSyncCmdBuffer);
|
||||
}
|
||||
|
||||
adjustWriteKernelTimestamp(globalAddress, contextAddress, maskLsb, mask, workloadPartition);
|
||||
if (outTimeStampSyncCmds != nullptr) {
|
||||
CommandToPatch ctxCmd;
|
||||
ctxCmd.type = CommandToPatch::TimestampEventPostSyncStoreRegMem;
|
||||
|
||||
ctxCmd.offset = globalOffset;
|
||||
ctxCmd.pDestination = globalPostSyncCmd;
|
||||
outTimeStampSyncCmds->push_back(ctxCmd);
|
||||
|
||||
ctxCmd.offset = contextOffset;
|
||||
ctxCmd.pDestination = contextPostSyncCmd;
|
||||
outTimeStampSyncCmds->push_back(ctxCmd);
|
||||
}
|
||||
|
||||
adjustWriteKernelTimestamp(globalAddress, contextAddress, baseAddr, outTimeStampSyncCmds, maskLsb, mask, workloadPartition);
|
||||
}
|
||||
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
void CommandListCoreFamily<gfxCoreFamily>::appendEventForProfiling(Event *event, bool beforeWalker, bool skipBarrierForEndProfiling) {
|
||||
void CommandListCoreFamily<gfxCoreFamily>::appendEventForProfiling(Event *event, CommandToPatchContainer *outTimeStampSyncCmds, bool beforeWalker, bool skipBarrierForEndProfiling, bool skipAddingEventToResidency) {
|
||||
if (!event) {
|
||||
return;
|
||||
}
|
||||
@@ -2634,7 +2659,9 @@ void CommandListCoreFamily<gfxCoreFamily>::appendEventForProfiling(Event *event,
|
||||
return;
|
||||
}
|
||||
|
||||
commandContainer.addToResidencyContainer(event->getPoolAllocation(this->device));
|
||||
if (!skipAddingEventToResidency) {
|
||||
commandContainer.addToResidencyContainer(event->getPoolAllocation(this->device));
|
||||
}
|
||||
bool workloadPartition = isTimestampEventForMultiTile(event);
|
||||
|
||||
appendDispatchOffsetRegister(workloadPartition, true);
|
||||
@@ -2642,7 +2669,7 @@ void CommandListCoreFamily<gfxCoreFamily>::appendEventForProfiling(Event *event,
|
||||
if (beforeWalker) {
|
||||
event->resetKernelCountAndPacketUsedCount();
|
||||
bool workloadPartition = setupTimestampEventForMultiTile(event);
|
||||
appendWriteKernelTimestamp(event, beforeWalker, true, workloadPartition);
|
||||
appendWriteKernelTimestamp(event, outTimeStampSyncCmds, beforeWalker, true, workloadPartition);
|
||||
} else {
|
||||
dispatchEventPostSyncOperation(event, nullptr, Event::STATE_SIGNALED, true, false, false, true);
|
||||
|
||||
@@ -2658,7 +2685,7 @@ void CommandListCoreFamily<gfxCoreFamily>::appendEventForProfiling(Event *event,
|
||||
|
||||
uint64_t baseAddr = event->getGpuAddress(this->device);
|
||||
NEO::MemorySynchronizationCommands<GfxFamily>::addAdditionalSynchronization(*commandContainer.getCommandStream(), baseAddr, false, rootDeviceEnvironment);
|
||||
appendWriteKernelTimestamp(event, beforeWalker, true, workloadPartition);
|
||||
appendWriteKernelTimestamp(event, outTimeStampSyncCmds, beforeWalker, true, workloadPartition);
|
||||
}
|
||||
|
||||
appendDispatchOffsetRegister(workloadPartition, false);
|
||||
@@ -2684,7 +2711,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendWriteGlobalTimestamp(
|
||||
return ZE_RESULT_ERROR_INVALID_ARGUMENT;
|
||||
}
|
||||
|
||||
appendEventForProfiling(signalEvent, true, false);
|
||||
appendEventForProfiling(signalEvent, nullptr, true, false, false);
|
||||
|
||||
auto allocationStruct = getAlignedAllocationData(this->device, dstptr, sizeof(uint64_t), false);
|
||||
if (allocationStruct.alloc == nullptr) {
|
||||
@@ -2715,7 +2742,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendWriteGlobalTimestamp(
|
||||
args);
|
||||
}
|
||||
|
||||
appendSignalEventPostWalker(signalEvent, nullptr, false, false);
|
||||
appendSignalEventPostWalker(signalEvent, nullptr, nullptr, false, false);
|
||||
|
||||
if (this->isInOrderExecutionEnabled()) {
|
||||
appendSignalInOrderDependencyCounter(signalEvent);
|
||||
@@ -3027,7 +3054,7 @@ void CommandListCoreFamily<gfxCoreFamily>::updateStreamPropertiesForRegularComma
|
||||
auto frontEndStateAddress = NEO::PreambleHelper<GfxFamily>::getSpaceForVfeState(commandContainer.getCommandStream(), device->getHwInfo(), engineGroupType);
|
||||
auto frontEndStateCmd = new FrontEndStateCommand;
|
||||
NEO::PreambleHelper<GfxFamily>::programVfeState(frontEndStateCmd, rootDeviceEnvironment, 0, 0, device->getMaxNumHwThreads(), finalStreamState);
|
||||
commandsToPatch.push_back({frontEndStateAddress, frontEndStateCmd, CommandToPatch::FrontEndState});
|
||||
commandsToPatch.push_back({frontEndStateAddress, frontEndStateCmd, 0, CommandToPatch::FrontEndState});
|
||||
}
|
||||
if (this->frontEndStateTracking && !this->dispatchCmdListBatchBufferAsPrimary) {
|
||||
auto &stream = *commandContainer.getCommandStream();
|
||||
@@ -3236,7 +3263,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendBarrier(ze_event_handle_
|
||||
return ZE_RESULT_ERROR_INVALID_ARGUMENT;
|
||||
}
|
||||
|
||||
appendEventForProfiling(signalEvent, true, false);
|
||||
appendEventForProfiling(signalEvent, nullptr, true, false, false);
|
||||
|
||||
if (!this->isInOrderExecutionEnabled()) {
|
||||
if (isCopyOnly()) {
|
||||
@@ -3259,7 +3286,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendBarrier(ze_event_handle_
|
||||
}
|
||||
|
||||
addToMappedEventList(signalEvent);
|
||||
appendSignalEventPostWalker(signalEvent, nullptr, this->isInOrderExecutionEnabled(), false);
|
||||
appendSignalEventPostWalker(signalEvent, nullptr, nullptr, this->isInOrderExecutionEnabled(), false);
|
||||
|
||||
if (isInOrderExecutionEnabled()) {
|
||||
appendSignalInOrderDependencyCounter(signalEvent);
|
||||
@@ -3392,7 +3419,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendWaitOnMemory(void *desc,
|
||||
return ZE_RESULT_ERROR_INVALID_ARGUMENT;
|
||||
}
|
||||
|
||||
appendEventForProfiling(signalEvent, true, false);
|
||||
appendEventForProfiling(signalEvent, nullptr, true, false, false);
|
||||
|
||||
commandContainer.addToResidencyContainer(srcAllocationStruct.alloc);
|
||||
uint64_t gpuAddress = static_cast<uint64_t>(srcAllocationStruct.alignedAllocationPtr);
|
||||
@@ -3424,7 +3451,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendWaitOnMemory(void *desc,
|
||||
NEO::MemorySynchronizationCommands<GfxFamily>::addAdditionalSynchronization(*commandContainer.getCommandStream(), gpuAddress, true, rootDeviceEnvironment);
|
||||
}
|
||||
|
||||
appendSignalEventPostWalker(signalEvent, nullptr, false, false);
|
||||
appendSignalEventPostWalker(signalEvent, nullptr, nullptr, false, false);
|
||||
|
||||
if (this->isInOrderExecutionEnabled()) {
|
||||
appendSignalInOrderDependencyCounter(signalEvent);
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) 2023 Intel Corporation
|
||||
* Copyright (C) 2023-2024 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
@@ -16,6 +16,6 @@ inline NEO::PreemptionMode CommandListCoreFamily<gfxCoreFamily>::obtainKernelPre
|
||||
}
|
||||
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
void CommandListCoreFamily<gfxCoreFamily>::adjustWriteKernelTimestamp(uint64_t globalAddress, uint64_t contextAddress, bool maskLsb, uint32_t mask, bool workloadPartition) {}
|
||||
void CommandListCoreFamily<gfxCoreFamily>::adjustWriteKernelTimestamp(uint64_t globalAddress, uint64_t contextAddress, uint64_t baseAddress, CommandToPatchContainer *outTimeStampSyncCmds, bool maskLsb, uint32_t mask, bool workloadPartition) {}
|
||||
|
||||
} // namespace L0
|
||||
|
||||
@@ -38,7 +38,7 @@ size_t CommandListCoreFamily<gfxCoreFamily>::getReserveSshSize() {
|
||||
}
|
||||
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
void CommandListCoreFamily<gfxCoreFamily>::adjustWriteKernelTimestamp(uint64_t globalAddress, uint64_t contextAddress, bool maskLsb, uint32_t mask, bool workloadPartition) {}
|
||||
void CommandListCoreFamily<gfxCoreFamily>::adjustWriteKernelTimestamp(uint64_t globalAddress, uint64_t contextAddress, uint64_t baseAddress, CommandToPatchContainer *outTimeStampSyncCmds, bool maskLsb, uint32_t mask, bool workloadPartition) {}
|
||||
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
bool CommandListCoreFamily<gfxCoreFamily>::isInOrderNonWalkerSignalingRequired(const Event *event) const {
|
||||
@@ -108,7 +108,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelWithParams(K
|
||||
dsh = dshReserveArgs.indirectHeapReservation;
|
||||
}
|
||||
|
||||
appendEventForProfiling(event, true, false);
|
||||
appendEventForProfiling(event, nullptr, true, false, false);
|
||||
auto perThreadScratchSize = std::max<std::uint32_t>(this->getCommandListPerThreadScratchSize(0u),
|
||||
kernel->getImmutableData()->getDescriptor().kernelAttributes.perThreadScratchSize[0]);
|
||||
this->setCommandListPerThreadScratchSize(0u, perThreadScratchSize);
|
||||
@@ -247,7 +247,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelWithParams(K
|
||||
*reinterpret_cast<typename GfxFamily::RENDER_SURFACE_STATE *>(surfaceStateSpace) = surfaceState;
|
||||
}
|
||||
|
||||
appendSignalEventPostWalker(event, nullptr, false, false);
|
||||
appendSignalEventPostWalker(event, nullptr, nullptr, false, false);
|
||||
|
||||
commandContainer.addToResidencyContainer(kernelImmutableData->getIsaGraphicsAllocation());
|
||||
auto &residencyContainer = kernel->getResidencyContainer();
|
||||
@@ -264,16 +264,16 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelWithParams(K
|
||||
}
|
||||
|
||||
if (NEO::PauseOnGpuProperties::pauseModeAllowed(NEO::debugManager.flags.PauseOnEnqueue.get(), neoDevice->debugExecutionCounter.load(), NEO::PauseOnGpuProperties::PauseMode::BeforeWorkload)) {
|
||||
commandsToPatch.push_back({0x0, additionalCommands.front(), CommandToPatch::PauseOnEnqueuePipeControlStart});
|
||||
commandsToPatch.push_back({0x0, additionalCommands.front(), 0, CommandToPatch::PauseOnEnqueuePipeControlStart});
|
||||
additionalCommands.pop_front();
|
||||
commandsToPatch.push_back({0x0, additionalCommands.front(), CommandToPatch::PauseOnEnqueueSemaphoreStart});
|
||||
commandsToPatch.push_back({0x0, additionalCommands.front(), 0, CommandToPatch::PauseOnEnqueueSemaphoreStart});
|
||||
additionalCommands.pop_front();
|
||||
}
|
||||
|
||||
if (NEO::PauseOnGpuProperties::pauseModeAllowed(NEO::debugManager.flags.PauseOnEnqueue.get(), neoDevice->debugExecutionCounter.load(), NEO::PauseOnGpuProperties::PauseMode::AfterWorkload)) {
|
||||
commandsToPatch.push_back({0x0, additionalCommands.front(), CommandToPatch::PauseOnEnqueuePipeControlEnd});
|
||||
commandsToPatch.push_back({0x0, additionalCommands.front(), 0, CommandToPatch::PauseOnEnqueuePipeControlEnd});
|
||||
additionalCommands.pop_front();
|
||||
commandsToPatch.push_back({0x0, additionalCommands.front(), CommandToPatch::PauseOnEnqueueSemaphoreEnd});
|
||||
commandsToPatch.push_back({0x0, additionalCommands.front(), 0, CommandToPatch::PauseOnEnqueueSemaphoreEnd});
|
||||
additionalCommands.pop_front();
|
||||
}
|
||||
|
||||
@@ -329,11 +329,11 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelSplit(Kernel
|
||||
}
|
||||
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
void CommandListCoreFamily<gfxCoreFamily>::appendEventForProfilingAllWalkers(Event *event, void **syncCmdBuffer, bool beforeWalker, bool singlePacketEvent, bool skipAddingEventToResidency) {
|
||||
void CommandListCoreFamily<gfxCoreFamily>::appendEventForProfilingAllWalkers(Event *event, void **syncCmdBuffer, CommandToPatchContainer *outTimeStampSyncCmds, bool beforeWalker, bool singlePacketEvent, bool skipAddingEventToResidency) {
|
||||
if (beforeWalker) {
|
||||
appendEventForProfiling(event, true, false);
|
||||
appendEventForProfiling(event, outTimeStampSyncCmds, true, false, skipAddingEventToResidency);
|
||||
} else {
|
||||
appendSignalEventPostWalker(event, syncCmdBuffer, false, skipAddingEventToResidency);
|
||||
appendSignalEventPostWalker(event, syncCmdBuffer, outTimeStampSyncCmds, false, skipAddingEventToResidency);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -286,7 +286,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelWithParams(K
|
||||
std::list<void *> additionalCommands;
|
||||
|
||||
if (compactEvent) {
|
||||
appendEventForProfilingAllWalkers(compactEvent, nullptr, true, true, false);
|
||||
appendEventForProfilingAllWalkers(compactEvent, nullptr, launchParams.outListCommands, true, true, launchParams.omitAddingEventResidency);
|
||||
}
|
||||
|
||||
bool inOrderExecSignalRequired = (this->isInOrderExecutionEnabled() && !launchParams.isKernelSplitOperation && !launchParams.pipeControlSignalling);
|
||||
@@ -353,7 +353,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelWithParams(K
|
||||
launchParams.outSyncCommand->type = CommandToPatch::SignalEventPostSyncPipeControl;
|
||||
syncCmdBuffer = &launchParams.outSyncCommand->pDestination;
|
||||
}
|
||||
appendEventForProfilingAllWalkers(compactEvent, syncCmdBuffer, false, true, launchParams.omitAddingEventResidency);
|
||||
appendEventForProfilingAllWalkers(compactEvent, syncCmdBuffer, launchParams.outListCommands, false, true, launchParams.omitAddingEventResidency);
|
||||
} else if (event) {
|
||||
event->setPacketsInUse(partitionCount);
|
||||
if (l3FlushEnable) {
|
||||
@@ -426,16 +426,16 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelWithParams(K
|
||||
}
|
||||
|
||||
if (NEO::PauseOnGpuProperties::pauseModeAllowed(NEO::debugManager.flags.PauseOnEnqueue.get(), neoDevice->debugExecutionCounter.load(), NEO::PauseOnGpuProperties::PauseMode::BeforeWorkload)) {
|
||||
commandsToPatch.push_back({0x0, additionalCommands.front(), CommandToPatch::PauseOnEnqueuePipeControlStart});
|
||||
commandsToPatch.push_back({0x0, additionalCommands.front(), 0, CommandToPatch::PauseOnEnqueuePipeControlStart});
|
||||
additionalCommands.pop_front();
|
||||
commandsToPatch.push_back({0x0, additionalCommands.front(), CommandToPatch::PauseOnEnqueueSemaphoreStart});
|
||||
commandsToPatch.push_back({0x0, additionalCommands.front(), 0, CommandToPatch::PauseOnEnqueueSemaphoreStart});
|
||||
additionalCommands.pop_front();
|
||||
}
|
||||
|
||||
if (NEO::PauseOnGpuProperties::pauseModeAllowed(NEO::debugManager.flags.PauseOnEnqueue.get(), neoDevice->debugExecutionCounter.load(), NEO::PauseOnGpuProperties::PauseMode::AfterWorkload)) {
|
||||
commandsToPatch.push_back({0x0, additionalCommands.front(), CommandToPatch::PauseOnEnqueuePipeControlEnd});
|
||||
commandsToPatch.push_back({0x0, additionalCommands.front(), 0, CommandToPatch::PauseOnEnqueuePipeControlEnd});
|
||||
additionalCommands.pop_front();
|
||||
commandsToPatch.push_back({0x0, additionalCommands.front(), CommandToPatch::PauseOnEnqueueSemaphoreEnd});
|
||||
commandsToPatch.push_back({0x0, additionalCommands.front(), 0, CommandToPatch::PauseOnEnqueueSemaphoreEnd});
|
||||
additionalCommands.pop_front();
|
||||
}
|
||||
|
||||
@@ -511,12 +511,12 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelSplit(Kernel
|
||||
}
|
||||
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
void CommandListCoreFamily<gfxCoreFamily>::appendEventForProfilingAllWalkers(Event *event, void **syncCmdBuffer, bool beforeWalker, bool singlePacketEvent, bool skipAddingEventToResidency) {
|
||||
void CommandListCoreFamily<gfxCoreFamily>::appendEventForProfilingAllWalkers(Event *event, void **syncCmdBuffer, CommandToPatchContainer *outTimeStampSyncCmds, bool beforeWalker, bool singlePacketEvent, bool skipAddingEventToResidency) {
|
||||
if (isCopyOnly() || singlePacketEvent) {
|
||||
if (beforeWalker) {
|
||||
appendEventForProfiling(event, true, false);
|
||||
appendEventForProfiling(event, outTimeStampSyncCmds, true, false, skipAddingEventToResidency);
|
||||
} else {
|
||||
appendSignalEventPostWalker(event, syncCmdBuffer, false, skipAddingEventToResidency);
|
||||
appendSignalEventPostWalker(event, syncCmdBuffer, outTimeStampSyncCmds, false, skipAddingEventToResidency);
|
||||
}
|
||||
} else {
|
||||
if (event) {
|
||||
|
||||
@@ -9,6 +9,7 @@
|
||||
|
||||
#include "shared/source/helpers/definitions/command_encoder_args.h"
|
||||
|
||||
#include <cstddef>
|
||||
#include <cstdint>
|
||||
#include <vector>
|
||||
|
||||
@@ -24,17 +25,21 @@ struct CommandToPatch {
|
||||
ComputeWalker,
|
||||
SignalEventPostSyncPipeControl,
|
||||
WaitEventSemaphoreWait,
|
||||
TimestampEventPostSyncStoreRegMem,
|
||||
Invalid
|
||||
};
|
||||
void *pDestination = nullptr;
|
||||
void *pCommand = nullptr;
|
||||
size_t offset = 0;
|
||||
CommandType type = Invalid;
|
||||
};
|
||||
|
||||
using CommandToPatchContainer = std::vector<CommandToPatch>;
|
||||
|
||||
struct CmdListKernelLaunchParams {
|
||||
void *outWalker = nullptr;
|
||||
CommandToPatch *outSyncCommand = nullptr;
|
||||
std::vector<CommandToPatch> *outListCommands = nullptr;
|
||||
CommandToPatchContainer *outListCommands = nullptr;
|
||||
NEO::RequiredPartitionDim requiredPartitionDim = NEO::RequiredPartitionDim::none;
|
||||
NEO::RequiredDispatchWalkOrder requiredDispatchWalkOrder = NEO::RequiredDispatchWalkOrder::none;
|
||||
uint32_t additionalSizeParam = NEO::additionalKernelLaunchSizeParamNotSet;
|
||||
|
||||
@@ -109,7 +109,7 @@ struct BcsSplit {
|
||||
cmdList->addEventsToCmdList(numWaitEvents, phWaitEvents, hasRelaxedOrderingDependencies, false, true);
|
||||
|
||||
if (signalEvent && i == 0u) {
|
||||
cmdList->appendEventForProfilingAllWalkers(signalEvent, nullptr, true, true, false);
|
||||
cmdList->appendEventForProfilingAllWalkers(signalEvent, nullptr, nullptr, true, true, false);
|
||||
}
|
||||
|
||||
auto localSize = totalSize / engineCount;
|
||||
@@ -137,9 +137,9 @@ struct BcsSplit {
|
||||
|
||||
cmdList->addEventsToCmdList(static_cast<uint32_t>(cmdQsForSplit.size()), eventHandles.data(), hasRelaxedOrderingDependencies, false, true);
|
||||
if (signalEvent) {
|
||||
cmdList->appendEventForProfilingAllWalkers(signalEvent, nullptr, false, true, false);
|
||||
cmdList->appendEventForProfilingAllWalkers(signalEvent, nullptr, nullptr, false, true, false);
|
||||
}
|
||||
cmdList->appendEventForProfilingAllWalkers(this->events.marker[markerEventIndex], nullptr, false, true, false);
|
||||
cmdList->appendEventForProfilingAllWalkers(this->events.marker[markerEventIndex], nullptr, nullptr, false, true, false);
|
||||
|
||||
if (cmdList->isInOrderExecutionEnabled()) {
|
||||
cmdList->appendSignalInOrderDependencyCounter(signalEvent);
|
||||
|
||||
@@ -2945,7 +2945,7 @@ HWTEST2_F(CommandListCreate, givenNullEventWhenAppendEventAfterWalkerThenNothing
|
||||
|
||||
auto usedBefore = commandList->getCmdContainer().getCommandStream()->getUsed();
|
||||
|
||||
commandList->appendSignalEventPostWalker(nullptr, nullptr, false, false);
|
||||
commandList->appendSignalEventPostWalker(nullptr, nullptr, nullptr, false, false);
|
||||
|
||||
EXPECT_EQ(commandList->getCmdContainer().getCommandStream()->getUsed(), usedBefore);
|
||||
}
|
||||
|
||||
@@ -412,7 +412,7 @@ HWTEST2_F(CommandListAppendUsedPacketSignalEvent,
|
||||
event->signalScope = ZE_EVENT_SCOPE_FLAG_HOST;
|
||||
|
||||
commandList->partitionCount = packets;
|
||||
commandList->appendSignalEventPostWalker(event.get(), nullptr, false, false);
|
||||
commandList->appendSignalEventPostWalker(event.get(), nullptr, nullptr, false, false);
|
||||
EXPECT_EQ(packets, event->getPacketsInUse());
|
||||
|
||||
auto gpuAddress = event->getCompletionFieldGpuAddress(device);
|
||||
@@ -472,7 +472,7 @@ HWTEST2_F(CommandListAppendUsedPacketSignalEvent,
|
||||
event->signalScope = ZE_EVENT_SCOPE_FLAG_HOST;
|
||||
|
||||
commandList->partitionCount = packets;
|
||||
commandList->appendSignalEventPostWalker(event.get(), nullptr, false, false);
|
||||
commandList->appendSignalEventPostWalker(event.get(), nullptr, nullptr, false, false);
|
||||
EXPECT_EQ(packets, event->getPacketsInUse());
|
||||
|
||||
auto gpuAddress = event->getCompletionFieldGpuAddress(device);
|
||||
@@ -645,7 +645,7 @@ HWTEST2_F(CommandListAppendUsedPacketSignalEvent,
|
||||
|
||||
event->setEventTimestampFlag(false);
|
||||
|
||||
commandList->appendSignalEventPostWalker(event.get(), nullptr, false, false);
|
||||
commandList->appendSignalEventPostWalker(event.get(), nullptr, nullptr, false, false);
|
||||
size_t usedAfterSize = cmdStream->getUsed();
|
||||
|
||||
GenCmdList cmdList;
|
||||
|
||||
@@ -865,7 +865,7 @@ struct CommandListSignalAllEventPacketFixture : public ModuleFixture {
|
||||
|
||||
commandList->setupTimestampEventForMultiTile(event.get());
|
||||
size_t sizeBefore = cmdStream->getUsed();
|
||||
commandList->appendEventForProfiling(event.get(), false, false);
|
||||
commandList->appendEventForProfiling(event.get(), nullptr, false, false, false);
|
||||
size_t sizeAfter = cmdStream->getUsed();
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
|
||||
@@ -950,7 +950,7 @@ struct CommandListSignalAllEventPacketFixture : public ModuleFixture {
|
||||
|
||||
commandList->setupTimestampEventForMultiTile(event.get());
|
||||
size_t sizeBefore = cmdStream->getUsed();
|
||||
commandList->appendSignalEventPostWalker(event.get(), nullptr, false, false);
|
||||
commandList->appendSignalEventPostWalker(event.get(), nullptr, nullptr, false, false);
|
||||
size_t sizeAfter = cmdStream->getUsed();
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
|
||||
@@ -2426,7 +2426,7 @@ HWTEST2_F(CommandListCreate, givenAppendSignalEventWhenSkipAddToResidencyTrueThe
|
||||
|
||||
auto commandStreamOffset = commandContainer.getCommandStream()->getUsed();
|
||||
bool skipAdd = true;
|
||||
commandList->appendEventForProfilingAllWalkers(event.get(), &pipeControlBuffer, false, true, skipAdd);
|
||||
commandList->appendEventForProfilingAllWalkers(event.get(), &pipeControlBuffer, nullptr, false, true, skipAdd);
|
||||
|
||||
auto eventAllocIt = std::find(residencyContainer.begin(), residencyContainer.end(), eventAllocation);
|
||||
EXPECT_EQ(residencyContainer.end(), eventAllocIt);
|
||||
@@ -2453,7 +2453,7 @@ HWTEST2_F(CommandListCreate, givenAppendSignalEventWhenSkipAddToResidencyTrueThe
|
||||
|
||||
commandStreamOffset = commandContainer.getCommandStream()->getUsed();
|
||||
skipAdd = false;
|
||||
commandList->appendEventForProfilingAllWalkers(event.get(), &pipeControlBuffer, false, true, skipAdd);
|
||||
commandList->appendEventForProfilingAllWalkers(event.get(), &pipeControlBuffer, nullptr, false, true, skipAdd);
|
||||
eventAllocIt = std::find(residencyContainer.begin(), residencyContainer.end(), eventAllocation);
|
||||
EXPECT_NE(residencyContainer.end(), eventAllocIt);
|
||||
|
||||
@@ -2477,8 +2477,75 @@ HWTEST2_F(CommandListCreate, givenAppendSignalEventWhenSkipAddToResidencyTrueThe
|
||||
ASSERT_EQ(postSyncPipeControl, pipeControlBuffer);
|
||||
}
|
||||
|
||||
HWTEST2_F(CommandListCreate,
|
||||
givenAppendTimestampSignalEventWhenSkipAddToResidencyTrueAndOutRegMemListProvidedThenAllocationNotAddedToResidencyAndStoreRegMemCmdsStored,
|
||||
IsAtLeastXeHpCore) {
|
||||
using MI_STORE_REGISTER_MEM = typename FamilyType::MI_STORE_REGISTER_MEM;
|
||||
|
||||
auto commandList = std::make_unique<WhiteBox<::L0::CommandListCoreFamily<gfxCoreFamily>>>();
|
||||
auto result = commandList->initialize(device, NEO::EngineGroupType::compute, 0u);
|
||||
ASSERT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
auto &commandContainer = commandList->getCmdContainer();
|
||||
|
||||
ze_event_pool_desc_t eventPoolDesc = {};
|
||||
eventPoolDesc.count = 1;
|
||||
eventPoolDesc.flags = ZE_EVENT_POOL_FLAG_KERNEL_TIMESTAMP;
|
||||
|
||||
ze_event_desc_t eventDesc = {};
|
||||
eventDesc.index = 0;
|
||||
eventDesc.signal = 0;
|
||||
|
||||
auto eventPool = std::unique_ptr<L0::EventPool>(L0::EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, result));
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
auto event = std::unique_ptr<L0::Event>(L0::Event::create<typename FamilyType::TimestampPacketType>(eventPool.get(), &eventDesc, device));
|
||||
ASSERT_NE(nullptr, event.get());
|
||||
|
||||
auto &residencyContainer = commandContainer.getResidencyContainer();
|
||||
auto eventAllocation = event->getPoolAllocation(device);
|
||||
auto eventBaseAddress = event->getGpuAddress(device);
|
||||
|
||||
CommandToPatchContainer outStoreRegMemCmdList;
|
||||
|
||||
auto commandStreamOffset = commandContainer.getCommandStream()->getUsed();
|
||||
bool skipAdd = true;
|
||||
|
||||
bool before = true;
|
||||
commandList->appendEventForProfilingAllWalkers(event.get(), nullptr, &outStoreRegMemCmdList, before, true, skipAdd);
|
||||
before = false;
|
||||
commandList->appendEventForProfilingAllWalkers(event.get(), nullptr, &outStoreRegMemCmdList, before, true, skipAdd);
|
||||
|
||||
auto eventAllocIt = std::find(residencyContainer.begin(), residencyContainer.end(), eventAllocation);
|
||||
EXPECT_EQ(residencyContainer.end(), eventAllocIt);
|
||||
|
||||
GenCmdList cmdList;
|
||||
ASSERT_TRUE(FamilyType::Parse::parseCommandBuffer(
|
||||
cmdList,
|
||||
ptrOffset(commandContainer.getCommandStream()->getCpuBase(), commandStreamOffset),
|
||||
commandContainer.getCommandStream()->getUsed() - commandStreamOffset));
|
||||
|
||||
auto storeRegMemList = findAll<MI_STORE_REGISTER_MEM *>(cmdList.begin(), cmdList.end());
|
||||
ASSERT_NE(0u, storeRegMemList.size());
|
||||
ASSERT_NE(0u, outStoreRegMemCmdList.size());
|
||||
|
||||
ASSERT_EQ(storeRegMemList.size(), outStoreRegMemCmdList.size());
|
||||
|
||||
for (size_t i = 0; i < storeRegMemList.size(); i++) {
|
||||
MI_STORE_REGISTER_MEM *storeRegMem = genCmdCast<MI_STORE_REGISTER_MEM *>(*storeRegMemList[i]);
|
||||
|
||||
auto &cmdToPatch = outStoreRegMemCmdList[i];
|
||||
EXPECT_EQ(CommandToPatch::TimestampEventPostSyncStoreRegMem, cmdToPatch.type);
|
||||
MI_STORE_REGISTER_MEM *outStoreRegMem = genCmdCast<MI_STORE_REGISTER_MEM *>(cmdToPatch.pDestination);
|
||||
ASSERT_NE(nullptr, outStoreRegMem);
|
||||
|
||||
EXPECT_EQ(storeRegMem, outStoreRegMem);
|
||||
|
||||
auto cmdAddress = eventBaseAddress + cmdToPatch.offset;
|
||||
EXPECT_EQ(cmdAddress, outStoreRegMem->getMemoryAddress());
|
||||
}
|
||||
}
|
||||
|
||||
HWTEST2_F(CommandListAppendLaunchKernel,
|
||||
givenL3EventCompationPlatformWhenAppendKernelWithSignalScopeEventAndCmdPatchListProvidedThenDispatchSignalPostSyncCmdAndStoreInPatchList,
|
||||
givenL3EventCompactionPlatformWhenAppendKernelWithSignalScopeEventAndCmdPatchListProvidedThenDispatchSignalPostSyncCmdAndStoreInPatchList,
|
||||
IsAtLeastXeHpCore) {
|
||||
using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL;
|
||||
using POST_SYNC_OPERATION = typename PIPE_CONTROL::POST_SYNC_OPERATION;
|
||||
@@ -2538,5 +2605,72 @@ HWTEST2_F(CommandListAppendLaunchKernel,
|
||||
EXPECT_EQ(postSyncPipeControl, signalCmd.pDestination);
|
||||
}
|
||||
|
||||
HWTEST2_F(CommandListAppendLaunchKernel,
|
||||
givenL3EventCompactionPlatformWhenAppendKernelWithTimestampSignalScopeEventAndCmdPatchListProvidedThenDispatchSignalPostSyncCmdAndStoreInPatchList,
|
||||
IsAtLeastXeHpCore) {
|
||||
using MI_STORE_REGISTER_MEM = typename FamilyType::MI_STORE_REGISTER_MEM;
|
||||
|
||||
Mock<::L0::KernelImp> kernel;
|
||||
auto mockModule = std::unique_ptr<Module>(new Mock<Module>(device, nullptr));
|
||||
kernel.module = mockModule.get();
|
||||
|
||||
auto commandList = std::make_unique<WhiteBox<::L0::CommandListCoreFamily<gfxCoreFamily>>>();
|
||||
auto result = commandList->initialize(device, NEO::EngineGroupType::compute, 0u);
|
||||
ASSERT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
commandList->dcFlushSupport = true;
|
||||
commandList->compactL3FlushEventPacket = true;
|
||||
|
||||
auto &commandContainer = commandList->getCmdContainer();
|
||||
|
||||
ze_event_pool_desc_t eventPoolDesc = {};
|
||||
eventPoolDesc.count = 1;
|
||||
eventPoolDesc.flags = ZE_EVENT_POOL_FLAG_KERNEL_TIMESTAMP;
|
||||
|
||||
ze_event_desc_t eventDesc = {};
|
||||
eventDesc.index = 0;
|
||||
eventDesc.signal = ZE_EVENT_SCOPE_FLAG_HOST;
|
||||
|
||||
auto eventPool = std::unique_ptr<L0::EventPool>(L0::EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, result));
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
auto event = std::unique_ptr<L0::Event>(L0::Event::create<typename FamilyType::TimestampPacketType>(eventPool.get(), &eventDesc, device));
|
||||
ASSERT_NE(nullptr, event.get());
|
||||
|
||||
auto eventBaseAddress = event->getGpuAddress(device);
|
||||
|
||||
ze_group_count_t groupCount{1, 1, 1};
|
||||
CmdListKernelLaunchParams launchParams = {};
|
||||
CommandToPatchContainer outStoreRegMemCmdList;
|
||||
launchParams.outListCommands = &outStoreRegMemCmdList;
|
||||
auto commandStreamOffset = commandContainer.getCommandStream()->getUsed();
|
||||
result = commandList->appendLaunchKernel(kernel.toHandle(), groupCount, event->toHandle(), 0, nullptr, launchParams, false);
|
||||
ASSERT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
|
||||
GenCmdList cmdList;
|
||||
ASSERT_TRUE(FamilyType::Parse::parseCommandBuffer(
|
||||
cmdList,
|
||||
ptrOffset(commandContainer.getCommandStream()->getCpuBase(), commandStreamOffset),
|
||||
commandContainer.getCommandStream()->getUsed() - commandStreamOffset));
|
||||
|
||||
auto storeRegMemList = findAll<MI_STORE_REGISTER_MEM *>(cmdList.begin(), cmdList.end());
|
||||
ASSERT_NE(0u, storeRegMemList.size());
|
||||
ASSERT_NE(0u, outStoreRegMemCmdList.size());
|
||||
|
||||
ASSERT_EQ(storeRegMemList.size(), outStoreRegMemCmdList.size());
|
||||
|
||||
for (size_t i = 0; i < storeRegMemList.size(); i++) {
|
||||
MI_STORE_REGISTER_MEM *storeRegMem = genCmdCast<MI_STORE_REGISTER_MEM *>(*storeRegMemList[i]);
|
||||
|
||||
auto &cmdToPatch = outStoreRegMemCmdList[i];
|
||||
EXPECT_EQ(CommandToPatch::TimestampEventPostSyncStoreRegMem, cmdToPatch.type);
|
||||
MI_STORE_REGISTER_MEM *outStoreRegMem = genCmdCast<MI_STORE_REGISTER_MEM *>(cmdToPatch.pDestination);
|
||||
ASSERT_NE(nullptr, outStoreRegMem);
|
||||
|
||||
EXPECT_EQ(storeRegMem, outStoreRegMem);
|
||||
|
||||
auto cmdAddress = eventBaseAddress + cmdToPatch.offset;
|
||||
EXPECT_EQ(cmdAddress, outStoreRegMem->getMemoryAddress());
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace ult
|
||||
} // namespace L0
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) 2022-2023 Intel Corporation
|
||||
* Copyright (C) 2022-2024 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
@@ -60,7 +60,7 @@ PVCTEST_F(CommandListEventFenceTestsPvc, givenCommandListWithProfilingEventAfter
|
||||
auto eventPool = std::unique_ptr<L0::EventPool>(L0::EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, result));
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
auto event = std::unique_ptr<L0::Event>(L0::Event::create<typename FamilyType::TimestampPacketType>(eventPool.get(), &eventDesc, device));
|
||||
commandList->appendEventForProfiling(event.get(), false, false);
|
||||
commandList->appendEventForProfiling(event.get(), nullptr, false, false, false);
|
||||
|
||||
GenCmdList cmdList;
|
||||
ASSERT_TRUE(FamilyType::Parse::parseCommandBuffer(
|
||||
|
||||
@@ -656,7 +656,7 @@ HWTEST2_F(CommandListEventFenceTestsXeHpcCore, givenCommandListWithProfilingEven
|
||||
auto eventPool = std::unique_ptr<L0::EventPool>(L0::EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, result));
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
auto event = std::unique_ptr<L0::Event>(L0::Event::create<typename FamilyType::TimestampPacketType>(eventPool.get(), &eventDesc, device));
|
||||
commandList->appendEventForProfiling(event.get(), false, false);
|
||||
commandList->appendEventForProfiling(event.get(), nullptr, false, false, false);
|
||||
|
||||
GenCmdList cmdList;
|
||||
ASSERT_TRUE(FamilyType::Parse::parseCommandBuffer(
|
||||
@@ -686,7 +686,7 @@ HWTEST2_F(CommandListEventFenceTestsXeHpcCore, givenCommandListWithRegularEventA
|
||||
auto eventPool = std::unique_ptr<L0::EventPool>(L0::EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, result));
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
auto event = std::unique_ptr<L0::Event>(L0::Event::create<typename FamilyType::TimestampPacketType>(eventPool.get(), &eventDesc, device));
|
||||
commandList->appendSignalEventPostWalker(event.get(), nullptr, false, false);
|
||||
commandList->appendSignalEventPostWalker(event.get(), nullptr, nullptr, false, false);
|
||||
|
||||
GenCmdList cmdList;
|
||||
ASSERT_TRUE(FamilyType::Parse::parseCommandBuffer(
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) 2021-2023 Intel Corporation
|
||||
* Copyright (C) 2021-2024 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
@@ -92,7 +92,7 @@ HWTEST2_F(CommandListCreate, givenNotCopyCommandListWhenProfilingEventBeforeComm
|
||||
auto eventPool = std::unique_ptr<L0::EventPool>(L0::EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, result));
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
auto event = std::unique_ptr<L0::Event>(L0::Event::create<typename FamilyType::TimestampPacketType>(eventPool.get(), &eventDesc, device));
|
||||
commandList->appendEventForProfiling(event.get(), true, false);
|
||||
commandList->appendEventForProfiling(event.get(), nullptr, true, false, false);
|
||||
|
||||
GenCmdList cmdList;
|
||||
ASSERT_TRUE(FamilyType::Parse::parseCommandBuffer(
|
||||
@@ -128,7 +128,7 @@ HWTEST2_F(CommandListCreate, givenNotCopyCommandListWhenProfilingEventAfterComma
|
||||
auto eventPool = std::unique_ptr<L0::EventPool>(L0::EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, result));
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
auto event = std::unique_ptr<L0::Event>(L0::Event::create<typename FamilyType::TimestampPacketType>(eventPool.get(), &eventDesc, device));
|
||||
commandList->appendEventForProfiling(event.get(), false, false);
|
||||
commandList->appendEventForProfiling(event.get(), nullptr, false, false, false);
|
||||
|
||||
GenCmdList cmdList;
|
||||
ASSERT_TRUE(FamilyType::Parse::parseCommandBuffer(
|
||||
@@ -163,7 +163,7 @@ HWTEST2_F(CommandListCreate, givenCopyCommandListWhenProfilingEventThenStoreRegC
|
||||
auto eventPool = std::unique_ptr<L0::EventPool>(L0::EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, result));
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
auto event = std::unique_ptr<L0::Event>(L0::Event::create<typename FamilyType::TimestampPacketType>(eventPool.get(), &eventDesc, device));
|
||||
commandList->appendEventForProfiling(event.get(), false, false);
|
||||
commandList->appendEventForProfiling(event.get(), nullptr, false, false, false);
|
||||
GenCmdList cmdList;
|
||||
ASSERT_TRUE(FamilyType::Parse::parseCommandBuffer(
|
||||
cmdList, ptrOffset(commandList->getCmdContainer().getCommandStream()->getCpuBase(), 0), commandList->getCmdContainer().getCommandStream()->getUsed()));
|
||||
|
||||
Reference in New Issue
Block a user