Fix relax ordering check for event remaining packets

Related-To: NEO-7490

Signed-off-by: Zbigniew Zdanowicz <zbigniew.zdanowicz@intel.com>
This commit is contained in:
Zbigniew Zdanowicz 2023-01-20 11:30:38 +00:00 committed by Compute-Runtime-Automation
parent 5a5596957a
commit fe3f32eb68
2 changed files with 3 additions and 28 deletions

View File

@ -300,7 +300,6 @@ struct CommandListCoreFamily : CommandListImp {
compactL3FlushEvent(dcFlush);
}
void allocateKernelPrivateMemoryIfNeeded(Kernel *kernel, uint32_t sizePerHwThread);
void waitOnRemainingEventPackets(Event *event);
CmdListEventOperation estimateEventPostSync(Event *event, uint32_t operations);
void dispatchPostSyncCopy(uint64_t gpuAddress, uint32_t value, bool workloadPartition);
void dispatchPostSyncCompute(uint64_t gpuAddress, uint32_t value, bool workloadPartition);

View File

@ -2026,7 +2026,9 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendWaitOnEvents(uint32_t nu
commandContainer.addToResidencyContainer(&event->getAllocation(this->device));
gpuAddr = event->getCompletionFieldGpuAddress(this->device);
uint32_t packetsToWait = event->getPacketsInUse();
if (this->signalAllEventPackets) {
packetsToWait = event->getMaxPacketsCount();
}
for (uint32_t i = 0u; i < packetsToWait; i++) {
if (relaxedOrdering) {
NEO::EncodeBatchBufferStartOrEnd<GfxFamily>::programConditionalDataMemBatchBufferStart(*commandContainer.getCommandStream(), 0, gpuAddr, eventStateClear,
@ -2040,9 +2042,6 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendWaitOnEvents(uint32_t nu
gpuAddr += event->getSinglePacketSize();
}
if (this->signalAllEventPackets) {
waitOnRemainingEventPackets(event);
}
}
if (NEO::DebugManager.flags.EnableSWTags.get()) {
@ -2714,29 +2713,6 @@ void CommandListCoreFamily<gfxCoreFamily>::allocateKernelPrivateMemoryIfNeeded(K
}
}
template <GFXCORE_FAMILY gfxCoreFamily>
void CommandListCoreFamily<gfxCoreFamily>::waitOnRemainingEventPackets(Event *event) {
using COMPARE_OPERATION = typename GfxFamily::MI_SEMAPHORE_WAIT::COMPARE_OPERATION;
uint32_t packetUsed = event->getPacketsInUse();
uint32_t packetsRemaining = event->getMaxPacketsCount() - packetUsed;
if (packetsRemaining == 0) {
return;
}
uint64_t gpuAddress = event->getCompletionFieldGpuAddress(this->device);
size_t packetSize = event->getSinglePacketSize();
gpuAddress += packetSize * packetUsed;
for (uint32_t i = 0; i < packetsRemaining; i++) {
NEO::EncodeSempahore<GfxFamily>::addMiSemaphoreWaitCommand(*commandContainer.getCommandStream(),
gpuAddress,
Event::STATE_CLEARED,
COMPARE_OPERATION::COMPARE_OPERATION_SAD_NOT_EQUAL_SDD);
gpuAddress += packetSize;
}
}
template <GFXCORE_FAMILY gfxCoreFamily>
CmdListEventOperation CommandListCoreFamily<gfxCoreFamily>::estimateEventPostSync(Event *event, uint32_t operations) {
CmdListEventOperation ret;