diff --git a/level_zero/api/driver_experimental/public/zex_event.cpp b/level_zero/api/driver_experimental/public/zex_event.cpp index 8b03527a49..a2ba9e2ff6 100644 --- a/level_zero/api/driver_experimental/public/zex_event.cpp +++ b/level_zero/api/driver_experimental/public/zex_event.cpp @@ -97,10 +97,6 @@ zexCounterBasedEventCreate2(ze_context_handle_t hContext, ze_device_handle_t hDe auto l0Event = device->getL0GfxCoreHelper().createStandaloneEvent(eventDescriptor, device, result); - if (signalScope ^ counterBasedEventDesc->signalScope) { - l0Event->setMitigateHostVisibleSignal(); - } - *phEvent = l0Event; return result; diff --git a/level_zero/core/source/cmdlist/cmdlist_hw.inl b/level_zero/core/source/cmdlist/cmdlist_hw.inl index d8fb115aaa..3b5004ba7c 100644 --- a/level_zero/core/source/cmdlist/cmdlist_hw.inl +++ b/level_zero/core/source/cmdlist/cmdlist_hw.inl @@ -488,7 +488,7 @@ ze_result_t CommandListCoreFamily::appendLaunchKernel(ze_kernel_h auto res = appendLaunchKernelWithParams(kernel, threadGroupDimensions, event, launchParams); if (!launchParams.skipInOrderNonWalkerSignaling) { - handleInOrderDependencyCounter(event, isInOrderNonWalkerSignalingRequired(event) && !(event && event->isCounterBased() && event->isEventTimestampFlagSet()), false); + handleInOrderDependencyCounter(event, isInOrderNonWalkerSignalingRequired(event) && !event->isCounterBased(), false); } if (this->synchronizedDispatchMode != NEO::SynchronizedDispatchMode::disabled) { @@ -2026,7 +2026,7 @@ ze_result_t CommandListCoreFamily::appendMemoryCopy(void *dstptr, if (hSignalEvent) { signalEvent = Event::fromHandle(hSignalEvent); launchParams.isHostSignalScopeEvent = signalEvent->isSignalScope(ZE_EVENT_SCOPE_FLAG_HOST); - dcFlush = getDcFlushRequired(signalEvent->isSignalScope()); + dcFlush = getDcFlushRequired(signalEvent->isFlushRequiredForSignal()); } if (!handleCounterBasedEventOperations(signalEvent, false)) { @@ -2545,7 +2545,7 @@ ze_result_t CommandListCoreFamily::appendMemoryFill(void *ptr, if (hSignalEvent) { signalEvent = Event::fromHandle(hSignalEvent); launchParams.isHostSignalScopeEvent = signalEvent->isSignalScope(ZE_EVENT_SCOPE_FLAG_HOST); - dcFlush = getDcFlushRequired(signalEvent->isSignalScope()); + dcFlush = getDcFlushRequired(signalEvent->isFlushRequiredForSignal()); registerWalkerWithProfilingEnqueued(signalEvent); } @@ -4276,7 +4276,6 @@ ze_result_t CommandListCoreFamily::appendBarrier(ze_event_handle_ if (isInOrderExecutionEnabled() && isSkippingInOrderBarrierAllowed(hSignalEvent, numWaitEvents, phWaitEvents)) { if (hSignalEvent) { auto event = Event::fromHandle(hSignalEvent); - event->setEventOnBarrierOptimized(true); assignInOrderExecInfoToEvent(event); } @@ -4722,7 +4721,7 @@ void CommandListCoreFamily::dispatchEventPostSyncOperation(Event template void CommandListCoreFamily::dispatchEventRemainingPacketsPostSyncOperation(Event *event, bool copyOperation) { - if (this->signalAllEventPackets && !event->isCounterBasedExplicitlyEnabled() && event->getPacketsInUse() < event->getMaxPacketsCount()) { + if (!event->isCounterBasedExplicitlyEnabled() && event->getPacketsInUse() < event->getMaxPacketsCount()) { uint32_t packets = event->getMaxPacketsCount() - event->getPacketsInUse(); CmdListEventOperation remainingPacketsOperation = estimateEventPostSync(event, packets); @@ -4854,7 +4853,6 @@ bool CommandListCoreFamily::handleCounterBasedEventOperations(Eve signalEvent->resetInOrderTimestampNode(tag, this->partitionCount); signalEvent->resetAdditionalTimestampNode(nullptr, this->partitionCount, false); } - signalEvent->setEventOnBarrierOptimized(false); } return true; diff --git a/level_zero/core/source/cmdlist/cmdlist_hw_immediate.inl b/level_zero/core/source/cmdlist/cmdlist_hw_immediate.inl index 3324026c5d..648bd1beb3 100644 --- a/level_zero/core/source/cmdlist/cmdlist_hw_immediate.inl +++ b/level_zero/core/source/cmdlist/cmdlist_hw_immediate.inl @@ -631,7 +631,6 @@ ze_result_t CommandListCoreFamilyImmediate::appendBarrier(ze_even if (isSkippingInOrderBarrierAllowed(hSignalEvent, numWaitEvents, phWaitEvents)) { if (hSignalEvent) { auto event = Event::fromHandle(hSignalEvent); - event->setEventOnBarrierOptimized(true); assignInOrderExecInfoToEvent(event); } @@ -1338,7 +1337,9 @@ ze_result_t CommandListCoreFamilyImmediate::flushImmediate(ze_res if (signalEvent) { signalEvent->setCsr(static_cast(queue)->getCsr(), isInOrderExecutionEnabled()); - this->latestFlushIsHostVisible |= signalEvent->isSignalScope(ZE_EVENT_SCOPE_FLAG_HOST) && !this->latestFlushIsDualCopyOffload; + if (!this->latestFlushIsDualCopyOffload) { + this->latestFlushIsHostVisible |= signalEvent->isFlushRequiredForSignal() || (this->isHeaplessModeEnabled() && signalEvent->isSignalScope(ZE_EVENT_SCOPE_FLAG_HOST)); + } } if (inputRet == ZE_RESULT_SUCCESS) { @@ -1812,7 +1813,7 @@ ze_result_t CommandListCoreFamilyImmediate::appendCommandLists(ui bool dcFlush = false; if (hSignalEvent) { signalEvent = Event::fromHandle(hSignalEvent); - dcFlush = this->getDcFlushRequired(signalEvent->isSignalScope()); + dcFlush = this->getDcFlushRequired(signalEvent->isFlushRequiredForSignal()); } if (!this->handleCounterBasedEventOperations(signalEvent, false)) { diff --git a/level_zero/core/source/cmdlist/cmdlist_hw_xehp_and_later.inl b/level_zero/core/source/cmdlist/cmdlist_hw_xehp_and_later.inl index 79369c4db1..fc5e0803a9 100644 --- a/level_zero/core/source/cmdlist/cmdlist_hw_xehp_and_later.inl +++ b/level_zero/core/source/cmdlist/cmdlist_hw_xehp_and_later.inl @@ -44,7 +44,7 @@ bool CommandListCoreFamily::isInOrderNonWalkerSignalingRequired(c return false; } - const bool flushRequired = compactL3FlushEvent(getDcFlushRequired(event->isSignalScope())); + const bool flushRequired = compactL3FlushEvent(getDcFlushRequired(event->isFlushRequiredForSignal())); const bool inOrderRequired = !this->duplicatedInOrderCounterStorageEnabled && (event->isEventTimestampFlagSet() || !event->isCounterBased()); @@ -172,7 +172,6 @@ ze_result_t CommandListCoreFamily::appendLaunchKernelWithParams(K uint64_t eventAddress = 0; bool isTimestampEvent = false; - bool l3FlushInPipeControlEnable = false; bool isFlushL3AfterPostSync = false; bool isHostSignalScopeEvent = launchParams.isHostSignalScopeEvent; bool interruptEvent = false; @@ -184,7 +183,7 @@ ze_result_t CommandListCoreFamily::appendLaunchKernelWithParams(K event->setKernelWithPrintfDeviceMutex(kernel->getDevicePrintfKernelMutex()); } isHostSignalScopeEvent = event->isSignalScope(ZE_EVENT_SCOPE_FLAG_HOST); - if (compactL3FlushEvent(getDcFlushRequired(event->isSignalScope()))) { + if (compactL3FlushEvent(getDcFlushRequired(event->isFlushRequiredForSignal()))) { compactEvent = event; } else { NEO::GraphicsAllocation *eventPoolAlloc = event->getAllocation(this->device); @@ -196,15 +195,7 @@ ze_result_t CommandListCoreFamily::appendLaunchKernelWithParams(K eventAddress = event->getPacketAddress(this->device); isTimestampEvent = event->isEventTimestampFlagSet(); } - - bool flushRequired = event->isSignalScope() && - !launchParams.isKernelSplitOperation; - - l3FlushInPipeControlEnable = getDcFlushRequired(flushRequired) && - !this->l3FlushAfterPostSyncEnabled; - isFlushL3AfterPostSync = isHostSignalScopeEvent && this->l3FlushAfterPostSyncEnabled && !launchParams.isKernelSplitOperation; - interruptEvent = event->isInterruptModeEnabled(); } } @@ -304,7 +295,7 @@ ze_result_t CommandListCoreFamily::appendLaunchKernelWithParams(K std::list additionalCommands; - if (compactEvent && !compactEvent->isCounterBased()) { + if (compactEvent) { appendEventForProfilingAllWalkers(compactEvent, nullptr, launchParams.outListCommands, true, true, launchParams.omitAddingEventResidency, false); } @@ -322,29 +313,18 @@ ze_result_t CommandListCoreFamily::appendLaunchKernelWithParams(K inOrderNonWalkerSignalling = isInOrderNonWalkerSignalingRequired(event); if (inOrderExecSignalRequired) { - if (!compactEvent || !compactEvent->isCounterBased() || compactEvent->isEventTimestampFlagSet()) { - if (inOrderNonWalkerSignalling) { - if (!event->getAllocation(this->device)) { - event->resetInOrderTimestampNode(device->getInOrderTimestampAllocator()->getTag(), this->partitionCount); - } - if (!event->isCounterBased()) { - dispatchEventPostSyncOperation(event, nullptr, launchParams.outListCommands, Event::STATE_CLEARED, false, false, false, false, false); - } else if (compactEvent) { - eventAddress = event->getPacketAddress(this->device); - isTimestampEvent = true; - if (!launchParams.omitAddingEventResidency) { - commandContainer.addToResidencyContainer(event->getAllocation(this->device)); - } - } - } else { - inOrderCounterValue = this->inOrderExecInfo->getCounterValue() + getInOrderIncrementValue(); - inOrderExecInfo = this->inOrderExecInfo.get(); - if (event && event->isCounterBased()) { - isCounterBasedEvent = true; - if (event->getInOrderIncrementValue(this->partitionCount) > 0) { - inOrderIncrementGpuAddress = event->getInOrderExecInfo()->getBaseDeviceAddress(); - inOrderIncrementValue = event->getInOrderIncrementValue(this->partitionCount); - } + if (inOrderNonWalkerSignalling) { + if (!event->isCounterBased()) { + dispatchEventPostSyncOperation(event, nullptr, launchParams.outListCommands, Event::STATE_CLEARED, false, false, false, false, false); + } + } else { + inOrderCounterValue = this->inOrderExecInfo->getCounterValue() + getInOrderIncrementValue(); + inOrderExecInfo = this->inOrderExecInfo.get(); + if (event && event->isCounterBased()) { + isCounterBasedEvent = true; + if (event->getInOrderIncrementValue(this->partitionCount) > 0) { + inOrderIncrementGpuAddress = event->getInOrderExecInfo()->getBaseDeviceAddress(); + inOrderIncrementValue = event->getInOrderIncrementValue(this->partitionCount); } } } @@ -480,7 +460,7 @@ ze_result_t CommandListCoreFamily::appendLaunchKernelWithParams(K } if (!launchParams.makeKernelCommandView) { - if (compactEvent && !compactEvent->isCounterBased()) { + if (compactEvent) { void **syncCmdBuffer = nullptr; if (launchParams.outSyncCommand != nullptr) { launchParams.outSyncCommand->type = CommandToPatch::SignalEventPostSyncPipeControl; @@ -490,12 +470,9 @@ ze_result_t CommandListCoreFamily::appendLaunchKernelWithParams(K if (compactEvent->isInterruptModeEnabled()) { NEO::EncodeUserInterrupt::encode(*commandContainer.getCommandStream()); } - } else if (event && !compactL3FlushEvent(getDcFlushRequired(event->isSignalScope()))) { + } else if (event) { event->setPacketsInUse(partitionCount); - if (l3FlushInPipeControlEnable) { - programEventL3Flush(event); - } if (!launchParams.isKernelSplitOperation) { dispatchEventRemainingPacketsPostSyncOperation(event, false); } @@ -505,20 +482,12 @@ ze_result_t CommandListCoreFamily::appendLaunchKernelWithParams(K if (inOrderExecSignalRequired) { if (inOrderNonWalkerSignalling) { if (!launchParams.skipInOrderNonWalkerSignaling) { - if (!(event->isCounterBased() && event->isEventTimestampFlagSet())) { - if (compactEvent && compactEvent->isCounterBased()) { - auto pcCmdPtr = this->commandContainer.getCommandStream()->getSpace(0u); - inOrderCounterValue = this->inOrderExecInfo->getCounterValue() + getInOrderIncrementValue(); - appendSignalInOrderDependencyCounter(event, false, true, textureFlushRequired, false); - addCmdForPatching(nullptr, pcCmdPtr, nullptr, inOrderCounterValue, NEO::InOrderPatchCommandHelpers::PatchCmdType::pipeControl); - textureFlushRequired = false; - } else { - appendWaitOnSingleEvent(event, launchParams.outListCommands, false, false, CommandToPatch::CbEventTimestampPostSyncSemaphoreWait); - appendSignalInOrderDependencyCounter(event, false, false, false, false); - } - } else { + if (event->isCounterBased()) { this->latestOperationHasOptimizedCbEvent = true; event->setOptimizedCbEvent(true); + } else { + appendWaitOnSingleEvent(event, launchParams.outListCommands, false, false, CommandToPatch::CbEventTimestampPostSyncSemaphoreWait); + appendSignalInOrderDependencyCounter(event, false, false, false, false); } } } else { @@ -668,7 +637,7 @@ ze_result_t CommandListCoreFamily::appendLaunchKernelSplit(Kernel Event *event, CmdListKernelLaunchParams &launchParams) { if (event) { - if (eventSignalPipeControl(launchParams.isKernelSplitOperation, getDcFlushRequired(event->isSignalScope()))) { + if (eventSignalPipeControl(launchParams.isKernelSplitOperation, getDcFlushRequired(event->isFlushRequiredForSignal()))) { event = nullptr; } else { event->increaseKernelCount(); diff --git a/level_zero/core/source/event/event.h b/level_zero/core/source/event/event.h index cd816d800a..b2a731e355 100644 --- a/level_zero/core/source/event/event.h +++ b/level_zero/core/source/event/event.h @@ -323,6 +323,7 @@ struct Event : _ze_event_handle_t { void updateInOrderExecState(const std::shared_ptr &newInOrderExecInfo, uint64_t signalValue, uint32_t allocationOffset); bool isCounterBased() const { return ((counterBasedMode == CounterBasedMode::explicitlyEnabled) || (counterBasedMode == CounterBasedMode::implicitlyEnabled)); } bool isCounterBasedExplicitlyEnabled() const { return (counterBasedMode == CounterBasedMode::explicitlyEnabled); } + bool isFlushRequiredForSignal() const { return !isCounterBased() && isSignalScope(); } void enableCounterBasedMode(bool apiRequest, uint32_t flags); void disableImplicitCounterBasedMode(); uint64_t getInOrderExecSignalValueWithSubmissionCounter() const; @@ -357,19 +358,11 @@ struct Event : _ze_event_handle_t { bool isIpcImported() const { return isFromIpcPool; } - void setMitigateHostVisibleSignal() { - this->mitigateHostVisibleSignal = true; - } - virtual ze_result_t hostEventSetValue(State eventState) = 0; size_t getOffsetInSharedAlloc() const { return offsetInSharedAlloc; } void setReportEmptyCbEventAsReady(bool reportEmptyCbEventAsReady) { this->reportEmptyCbEventAsReady = reportEmptyCbEventAsReady; } - void setEventOnBarrierOptimized(bool value) { - this->isEventOnBarrierOptimized = value; - } - static bool isAggregatedEvent(const Event *event) { return (event && event->getInOrderIncrementValue(1) > 0); } MOCKABLE_VIRTUAL CommandList *getRecordedSignalFrom() const { @@ -466,9 +459,7 @@ struct Event : _ze_event_handle_t { bool kmdWaitMode = false; bool interruptMode = false; bool isSharableCounterBased = false; - bool mitigateHostVisibleSignal = false; bool reportEmptyCbEventAsReady = true; - bool isEventOnBarrierOptimized = false; bool optimizedCbEvent = false; bool graphExternalEvent = false; }; diff --git a/level_zero/core/source/event/event_impl.inl b/level_zero/core/source/event/event_impl.inl index cdb8752504..7a2eb110b9 100644 --- a/level_zero/core/source/event/event_impl.inl +++ b/level_zero/core/source/event/event_impl.inl @@ -746,13 +746,10 @@ ze_result_t EventImp::hostSynchronize(uint64_t timeout) { auto &hwInfo = this->device->getHwInfo(); auto isHeaplessModeDisabled = !this->device->getCompilerProductHelper().isHeaplessModeEnabled(hwInfo); auto isDcFlushAllowed = this->device->getProductHelper().isDcFlushAllowed(); - auto isFlushForOptimizedBarrierRequired = isDcFlushAllowed && this->isEventOnBarrierOptimized; - if ((((this->isCounterBased() && !this->inOrderTimestampNode.empty()) || this->mitigateHostVisibleSignal) && isDcFlushAllowed && isHeaplessModeDisabled) || - isFlushForOptimizedBarrierRequired) { + if (((isCounterBased() || this->inOrderExecInfo.get()) && isDcFlushAllowed && isHeaplessModeDisabled)) { auto lock = this->csrs[0]->obtainUniqueOwnership(); this->csrs[0]->flushTagUpdate(); taskCountToWaitForL3Flush = this->csrs[0]->peekLatestFlushedTaskCount(); - this->setEventOnBarrierOptimized(false); } waitStartTime = std::chrono::high_resolution_clock::now(); diff --git a/level_zero/core/source/mutable_cmdlist/mutable_cmdlist_hw.inl b/level_zero/core/source/mutable_cmdlist/mutable_cmdlist_hw.inl index f2a7eaf836..16526ab208 100644 --- a/level_zero/core/source/mutable_cmdlist/mutable_cmdlist_hw.inl +++ b/level_zero/core/source/mutable_cmdlist/mutable_cmdlist_hw.inl @@ -231,7 +231,7 @@ inline ze_result_t MutableCommandListCoreFamily::appendLaunchKern MutablePipeControl *signalPipeControl = nullptr; if (mutableEventParams.counterBasedEvent) { // both TS and L3 flush events need additional clean Store Data Imm -> signal cmd (CW or PC or StoreRegMem) -> sync SemWait - if (mutableEventParams.counterBasedTimestampEvent || mutableEventParams.l3FlushEvent) { + if (mutableEventParams.counterBasedTimestampEvent) { auto &eventVariableSemaphoreWaitList = signalEventVariableDesc.eventVariable->getSemWaitList(); auto &eventVariableStoreDataImmList = signalEventVariableDesc.eventVariable->getStoreDataImmList(); @@ -239,17 +239,6 @@ inline ze_result_t MutableCommandListCoreFamily::appendLaunchKern eventVariableSemaphoreWaitList, eventVariableStoreDataImmList); walker = this->appendKernelMutableComputeWalker; } - if (mutableEventParams.l3FlushEventTimestampSyncCmds) { - // L3 TS is signaled by StoreRegMem - auto &eventVariableStoreRegMemList = signalEventVariableDesc.eventVariable->getStoreRegMemList(); - captureStandaloneTimestampSignalEventCommands(eventVariableStoreRegMemList); - } else if (mutableEventParams.l3FlushEventSyncCmd) { - // L3 Immediate is signaled by PC - auto signalPipeControlPtr = std::make_unique>(mutableEventParams.signalCmd.pDestination); - mutablePipeControlCmds.emplace_back(std::move(signalPipeControlPtr)); - signalPipeControl = (*mutablePipeControlCmds.rbegin()).get(); - } - if (mutableEventParams.inOrderIncrementEvent) { walker = this->appendKernelMutableComputeWalker; } @@ -816,7 +805,7 @@ void MutableCommandListCoreFamily::storeSignalEventVariable(Mutab launchParams.omitAddingEventResidency = event->getAllocation(this->device) != nullptr; - mutableEventParams.l3FlushEvent = CommandListCoreFamily::compactL3FlushEvent(CommandListCoreFamily::getDcFlushRequired(event->isSignalScope())); + mutableEventParams.l3FlushEvent = CommandListCoreFamily::compactL3FlushEvent(CommandListCoreFamily::getDcFlushRequired(event->isFlushRequiredForSignal())); if (CommandListImp::isInOrderExecutionEnabled()) { mutableEventParams.eventInsideInOrder = true; mutableEventParams.counterBasedEvent = event->isCounterBased(); diff --git a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_copy_event_xehp_and_later.cpp b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_copy_event_xehp_and_later.cpp index 1c41d33cdc..9d0a46619e 100644 --- a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_copy_event_xehp_and_later.cpp +++ b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_copy_event_xehp_and_later.cpp @@ -1008,54 +1008,6 @@ HWTEST2_F(AppendMemoryCopyXeHpAndLaterMultiPacket, testSingleTileAppendMemoryCopyThreeKernelsAndL3Flush(input, arg); } -HWTEST2_F(AppendMemoryCopyXeHpAndLaterMultiPacket, - givenCommandListAndTimestampEventWithSignalScopeWhenTimestampProvidedByComputeWalkerPostSyncPassedToMemoryCopyThenAppendProfilingCalledForSingleSeparateKernelAndL3FlushWithPostSyncAddedOnce, - IsXeHpgCore) { - arg.expectedPacketsInUse = 2; - arg.expectedKernelCount = 1; - arg.expectedWalkerPostSyncOp = 3; - arg.expectedPostSyncPipeControls = 1; - arg.postSyncAddressZero = false; - - input.srcPtr = reinterpret_cast(0x1000); - input.dstPtr = reinterpret_cast(0x20000000); - input.size = 0x100000000; - - input.eventPoolFlags = ZE_EVENT_POOL_FLAG_KERNEL_TIMESTAMP; - - if (input.signalAllPackets) { - constexpr uint32_t reminderPostSyncOps = 2; - arg.expectStoreDataImm = reminderPostSyncOps; - input.storeDataImmOffset = arg.expectedPacketsInUse * NEO::TimestampPackets::getSinglePacketSize(); - } - - testSingleTileAppendMemoryCopySingleKernelAndL3Flush(input, arg); -} - -HWTEST2_F(AppendMemoryCopyXeHpAndLaterMultiPacket, - givenCommandListAndEventWithSignalScopeWhenImmediateProvidedByComputeWalkerPostSyncPassedToMemoryCopyThenAppendProfilingCalledForSingleSeparateKernelAndL3FlushWithPostSyncAddedOnce, - IsXeHpgCore) { - arg.expectedPacketsInUse = 2; - arg.expectedKernelCount = 1; - arg.expectedWalkerPostSyncOp = input.device->isImplicitScalingCapable() ? 3 : 1; - arg.expectedPostSyncPipeControls = 1; - arg.postSyncAddressZero = false; - - input.srcPtr = reinterpret_cast(0x1000); - input.dstPtr = reinterpret_cast(0x20000000); - input.size = 0x100000000; - - input.eventPoolFlags = 0; - - if (input.signalAllPackets) { - constexpr uint32_t reminderPostSyncOps = 2; - arg.expectStoreDataImm = reminderPostSyncOps; - input.storeDataImmOffset = arg.expectedPacketsInUse * testEvent->getSinglePacketSize(); - } - - testSingleTileAppendMemoryCopySingleKernelAndL3Flush(input, arg); -} - HWTEST2_F(AppendMemoryCopyXeHpAndLaterMultiPacket, givenCommandListWhenMemoryCopyWithSignalEventScopeSetToSubDeviceThenB2BPipeControlIsAddedWithDcFlushWithPostSyncForLastPC, IsXeHpgCore) { input.srcPtr = reinterpret_cast(0x1231); @@ -1162,42 +1114,6 @@ HWTEST2_F(AppendMemoryCopyXeHpAndLaterSinglePacket, testSingleTileAppendMemoryCopyThreeKernelsAndL3Flush(input, arg); } -HWTEST2_F(AppendMemoryCopyXeHpAndLaterSinglePacket, - givenCommandListAndTimestampEventWithSignalScopeWhenTimestampProvidedByComputeWalkerPostSyncPassedToMemoryCopyThenAppendProfilingCalledForSingleSeparateKernelAndL3FlushWithPostSyncAddedOnce, - IsXeHpgCore) { - arg.expectedPacketsInUse = 2; - arg.expectedKernelCount = 1; - arg.expectedWalkerPostSyncOp = 3; - arg.expectedPostSyncPipeControls = 1; - arg.postSyncAddressZero = false; - - input.srcPtr = reinterpret_cast(0x1000); - input.dstPtr = reinterpret_cast(0x20000000); - input.size = 0x100000000; - - input.eventPoolFlags = ZE_EVENT_POOL_FLAG_KERNEL_TIMESTAMP; - - testSingleTileAppendMemoryCopySingleKernelAndL3Flush(input, arg); -} - -HWTEST2_F(AppendMemoryCopyXeHpAndLaterSinglePacket, - givenCommandListAndEventWithSignalScopeWhenImmediateProvidedByComputeWalkerPostSyncPassedToMemoryCopyThenAppendProfilingCalledForSingleSeparateKernelAndL3FlushWithPostSyncAddedOnce, - IsXeHpgCore) { - arg.expectedPacketsInUse = 2; - arg.expectedKernelCount = 1; - arg.expectedWalkerPostSyncOp = input.device->isImplicitScalingCapable() ? 3 : 1; - arg.expectedPostSyncPipeControls = 1; - arg.postSyncAddressZero = false; - - input.srcPtr = reinterpret_cast(0x1000); - input.dstPtr = reinterpret_cast(0x20000000); - input.size = 0x100000000; - - input.eventPoolFlags = 0; - - testSingleTileAppendMemoryCopySingleKernelAndL3Flush(input, arg); -} - HWTEST2_F(AppendMemoryCopyXeHpAndLaterSinglePacket, givenCommandListWhenMemoryCopyWithSignalEventScopeSetToSubDeviceThenB2BPipeControlIsAddedWithDcFlushWithPostSyncForLastPC, IsXeHpgCore) { input.srcPtr = reinterpret_cast(0x1231); @@ -1292,54 +1208,6 @@ HWTEST2_F(MultiTileAppendMemoryCopyXeHpAndLaterMultiPacket, testMultiTileAppendMemoryCopyThreeKernelsAndL3Flush(input, arg); } -HWTEST2_F(MultiTileAppendMemoryCopyXeHpAndLaterMultiPacket, - givenMultiTileCommandListAndTimestampEventWithSignalScopeWhenTimestampProvidedByComputeWalkerPostSyncPassedToMemoryCopyThenAppendProfilingCalledForSingleSeparateMultiTileKernelAndL3FlushWithPostSyncAddedForScopedEvent, - IsXeHpgCore) { - arg.expectedPacketsInUse = 4; - arg.expectedKernelCount = 1; - arg.expectedWalkerPostSyncOp = 3; - arg.expectedPostSyncPipeControls = 1; - arg.postSyncAddressZero = false; - - input.srcPtr = reinterpret_cast(0x1000); - input.dstPtr = reinterpret_cast(0x20000000); - input.size = 0x100000000; - - input.eventPoolFlags = ZE_EVENT_POOL_FLAG_KERNEL_TIMESTAMP; - - if (input.signalAllPackets) { - constexpr uint32_t reminderPostSyncOps = 2; - arg.expectStoreDataImm = reminderPostSyncOps; - input.storeDataImmOffset = arg.expectedPacketsInUse * NEO::TimestampPackets::getSinglePacketSize(); - } - - testMultiTileAppendMemoryCopySingleKernelAndL3Flush(input, arg); -} - -HWTEST2_F(MultiTileAppendMemoryCopyXeHpAndLaterMultiPacket, - givenMultiTileCommandListAndEventWithSignalScopeWhenImmdiateProvidedByComputeWalkerAndPipeControlPostSyncPassedToMemoryCopyThenAppendProfilingCalledForSingleSeparateMultiTileKernelAndL3FlushWithPostSyncAddedForScopedEvent, - IsXeHpgCore) { - arg.expectedPacketsInUse = 4; - arg.expectedKernelCount = 1; - arg.expectedWalkerPostSyncOp = 1; - arg.expectedPostSyncPipeControls = 1; - arg.postSyncAddressZero = false; - - input.srcPtr = reinterpret_cast(0x1000); - input.dstPtr = reinterpret_cast(0x20000000); - input.size = 0x100000000; - - input.eventPoolFlags = 0; - - if (input.signalAllPackets) { - constexpr uint32_t reminderPostSyncOps = 2; - arg.expectStoreDataImm = reminderPostSyncOps; - input.storeDataImmOffset = arg.expectedPacketsInUse * testEvent->getSinglePacketSize(); - } - - testMultiTileAppendMemoryCopySingleKernelAndL3Flush(input, arg); -} - using MultiTileAppendMemoryCopyXeHpAndLaterSinglePacket = Test>; HWTEST2_F(MultiTileAppendMemoryCopyXeHpAndLaterSinglePacket, @@ -1433,42 +1301,6 @@ HWTEST2_F(MultiTileAppendMemoryCopyXeHpAndLaterSinglePacket, testMultiTileAppendMemoryCopyThreeKernelsAndL3Flush(input, arg); } -HWTEST2_F(MultiTileAppendMemoryCopyXeHpAndLaterSinglePacket, - givenMultiTileCommandListAndTimestampEventWithSignalScopeWhenTimestampProvidedByComputeWalkerPostSyncPassedToMemoryCopyThenAppendProfilingCalledForSingleKernelPostSync, - IsXeHpgCore) { - arg.expectedPacketsInUse = 4; - arg.expectedKernelCount = 1; - arg.expectedWalkerPostSyncOp = 3; - arg.expectedPostSyncPipeControls = 1; - arg.postSyncAddressZero = false; - - input.srcPtr = reinterpret_cast(0x1000); - input.dstPtr = reinterpret_cast(0x20000000); - input.size = 0x100000000; - - input.eventPoolFlags = ZE_EVENT_POOL_FLAG_KERNEL_TIMESTAMP; - - testMultiTileAppendMemoryCopySingleKernelAndL3Flush(input, arg); -} - -HWTEST2_F(MultiTileAppendMemoryCopyXeHpAndLaterSinglePacket, - givenMultiTileCommandListAndEventWithSignalScopeWhenImmediateProvidedByComputeWalkerAndPipeControlPostSyncPassedToMemoryCopyThenAppendProfilingCalledForSingleKernelAndL3FlushPipeControlPostSyncAddedForScopedEvent, - IsXeHpgCore) { - arg.expectedPacketsInUse = 4; - arg.expectedKernelCount = 1; - arg.expectedWalkerPostSyncOp = 1; - arg.expectedPostSyncPipeControls = 1; - arg.postSyncAddressZero = false; - - input.srcPtr = reinterpret_cast(0x1000); - input.dstPtr = reinterpret_cast(0x20000000); - input.size = 0x100000000; - - input.eventPoolFlags = 0; - - testMultiTileAppendMemoryCopySingleKernelAndL3Flush(input, arg); -} - using AppendMemoryCopyL3CompactEventTest = Test>; HWTEST2_F(AppendMemoryCopyL3CompactEventTest, diff --git a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_fill_event_xehp_and_later.cpp b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_fill_event_xehp_and_later.cpp index 798358e1b5..bca7d4e24c 100644 --- a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_fill_event_xehp_and_later.cpp +++ b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_fill_event_xehp_and_later.cpp @@ -822,26 +822,6 @@ HWTEST2_F(AppendFillMultiPacketEventTest, testSingleTileAppendMemoryFillSingleKernel(input, arg); } -HWTEST2_F(AppendFillMultiPacketEventTest, - givenAppendMemoryFillUsingSinglePacketEventWhenPatternDispatchOneKernelThenUseComputeWalkerPostSyncAndL3PostSync, - IsXeHpgCore) { - arg.expectedPacketsInUse = 2; - arg.expectedKernelCount = 1; - arg.expectedWalkerPostSyncOp = 3; - arg.expectedPostSyncPipeControls = 1; - arg.postSyncAddressZero = false; - - input.eventPoolFlags = ZE_EVENT_POOL_FLAG_KERNEL_TIMESTAMP; - - if (input.signalAllPackets) { - constexpr uint32_t reminderPostSyncOps = 2; - arg.expectStoreDataImm = reminderPostSyncOps; - input.storeDataImmOffset = arg.expectedPacketsInUse * NEO::TimestampPackets::getSinglePacketSize(); - } - - testSingleTileAppendMemoryFillSingleKernelAndL3Flush(input, arg); -} - using AppendFillSinglePacketEventTest = Test>; HWTEST2_F(AppendFillSinglePacketEventTest, @@ -902,20 +882,6 @@ HWTEST2_F(AppendFillSinglePacketEventTest, testSingleTileAppendMemoryFillSingleKernel(input, arg); } -HWTEST2_F(AppendFillSinglePacketEventTest, - givenAppendMemoryFillUsingSinglePacketEventWhenPatternDispatchOneKernelThenUseComputeWalkerPostSyncAndL3PostSync, - IsXeHpgCore) { - arg.expectedPacketsInUse = 2; - arg.expectedKernelCount = 1; - arg.expectedWalkerPostSyncOp = 3; - arg.expectedPostSyncPipeControls = 1; - arg.postSyncAddressZero = false; - - input.eventPoolFlags = ZE_EVENT_POOL_FLAG_KERNEL_TIMESTAMP; - - testSingleTileAppendMemoryFillSingleKernelAndL3Flush(input, arg); -} - using MultiTileAppendFillEventMultiPacketTest = Test>; HWTEST2_F(MultiTileAppendFillEventMultiPacketTest, @@ -981,48 +947,6 @@ HWTEST2_F(MultiTileAppendFillEventMultiPacketTest, testMultiTileAppendMemoryFillManyKernels(input, arg); } -HWTEST2_F(MultiTileAppendFillEventMultiPacketTest, - givenMultiTileCmdListCallToAppendMemoryFillWhenSignalScopeTimestampEventUsesComputeWalkerPostSyncThenSingleKernelsUsesWalkerPostSyncProfilingAndSingleDcFlushWithImmediatePostSync, IsXeHpgCore) { - // kernel uses 4 packets, in addition to kernel two packets, use 2 packets to two tile cache flush - arg.expectedPacketsInUse = 4; - arg.expectedKernelCount = 1; - arg.expectedWalkerPostSyncOp = 3; - // cache flush with event signal - arg.expectedPostSyncPipeControls = 1; - arg.postSyncAddressZero = false; - - input.eventPoolFlags = ZE_EVENT_POOL_FLAG_KERNEL_TIMESTAMP; - - if (input.signalAllPackets) { - constexpr uint32_t reminderPostSyncOps = 2; - arg.expectStoreDataImm = reminderPostSyncOps; - input.storeDataImmOffset = arg.expectedPacketsInUse * NEO::TimestampPackets::getSinglePacketSize(); - } - - testMultiTileAppendMemoryFillSingleKernelAndL3Flush(input, arg); -} - -HWTEST2_F(MultiTileAppendFillEventMultiPacketTest, - givenMultiTileCmdListCallToAppendMemoryFillWhenSignalScopeImmediateEventUsesComputeWalkerPostSyncThenSingleKernelUsesWalkerPostSyncAndSingleDcFlushWithPostSync, IsXeHpgCore) { - // kernel uses 4 packets, in addition to kernel two packets, use 2 packets to two tile cache flush - arg.expectedPacketsInUse = 4; - arg.expectedKernelCount = 1; - arg.expectedWalkerPostSyncOp = 1; - // cache flush with event signal - arg.expectedPostSyncPipeControls = 1; - arg.postSyncAddressZero = false; - - input.eventPoolFlags = 0; - - if (input.signalAllPackets) { - constexpr uint32_t reminderPostSyncOps = 2; - arg.expectStoreDataImm = reminderPostSyncOps; - input.storeDataImmOffset = arg.expectedPacketsInUse * testEvent->getSinglePacketSize(); - } - - testMultiTileAppendMemoryFillSingleKernelAndL3Flush(input, arg); -} - using MultiTileAppendFillEventSinglePacketTest = Test>; HWTEST2_F(MultiTileAppendFillEventSinglePacketTest, diff --git a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_xehp_and_later.cpp b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_xehp_and_later.cpp index aa4faf7465..dce112ba7a 100644 --- a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_xehp_and_later.cpp +++ b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_xehp_and_later.cpp @@ -331,10 +331,9 @@ struct AppendKernelTestInput { bool useFirstEventPacketAddress = false; }; -template +template struct CommandListAppendLaunchKernelCompactL3FlushEventFixture : public ModuleFixture { void setUp() { - debugManager.flags.CompactL3FlushEventPacket.set(compactL3FlushEventPacket); debugManager.flags.SignalAllEventPackets.set(0); if constexpr (multiTile == 1) { debugManager.flags.CreateMultipleSubDevices.set(2); @@ -451,37 +450,7 @@ struct CommandListAppendLaunchKernelCompactL3FlushEventFixture : public ModuleFi TestExpectedValues arg = {}; }; -using CommandListAppendLaunchKernelCompactL3FlushDisabledTest = Test>; - -HWTEST2_F(CommandListAppendLaunchKernelCompactL3FlushDisabledTest, - givenAppendKernelWithSignalScopeTimestampEventWhenComputeWalkerTimestampPostsyncAndL3ImmediatePostsyncUsedThenExpectComputeWalkerAndPipeControlPostsync, - IsXeHpgCore) { - arg.expectedKernelCount = 1; - arg.expectedPacketsInUse = 2; - arg.expectedPostSyncPipeControls = 1; - arg.expectedWalkerPostSyncOp = 3; - arg.postSyncAddressZero = false; - - input.eventPoolFlags = ZE_EVENT_POOL_FLAG_KERNEL_TIMESTAMP; - - testAppendLaunchKernelAndL3Flush(input, arg); -} - -HWTEST2_F(CommandListAppendLaunchKernelCompactL3FlushDisabledTest, - givenAppendKernelWithSignalScopeImmediateEventWhenComputeWalkerImmediatePostsyncAndL3ImmediatePostsyncUsedThenExpectComputeWalkerAndPipeControlPostsync, - IsXeHpgCore) { - arg.expectedKernelCount = 1; - arg.expectedPacketsInUse = 2; - arg.expectedPostSyncPipeControls = 1; - arg.expectedWalkerPostSyncOp = input.device->isImplicitScalingCapable() ? 3 : 1; - arg.postSyncAddressZero = false; - - input.eventPoolFlags = 0; - - testAppendLaunchKernelAndL3Flush(input, arg); -} - -using CommandListAppendLaunchKernelCompactL3FlushEnabledTest = Test>; +using CommandListAppendLaunchKernelCompactL3FlushEnabledTest = Test>; HWTEST2_F(CommandListAppendLaunchKernelCompactL3FlushEnabledTest, givenAppendKernelWithSignalScopeTimestampEventWhenRegisterTimestampPostsyncUsedThenExpectNoComputeWalkerAndPipeControlPostsync, @@ -513,37 +482,7 @@ HWTEST2_F(CommandListAppendLaunchKernelCompactL3FlushEnabledTest, testAppendLaunchKernelAndL3Flush(input, arg); } -using CommandListAppendLaunchKernelMultiTileCompactL3FlushDisabledTest = Test>; - -HWTEST2_F(CommandListAppendLaunchKernelMultiTileCompactL3FlushDisabledTest, - givenAppendMultiTileKernelWithSignalScopeTimestampEventWhenComputeWalkerTimestampPostsyncAndL3ImmediatePostsyncUsedThenExpectComputeWalkerAndPipeControlPostsync, - IsXeHpgCore) { - arg.expectedKernelCount = 1; - arg.expectedPacketsInUse = 4; - arg.expectedPostSyncPipeControls = 1; - arg.expectedWalkerPostSyncOp = 3; - arg.postSyncAddressZero = false; - - input.eventPoolFlags = ZE_EVENT_POOL_FLAG_KERNEL_TIMESTAMP; - - testAppendLaunchKernelAndL3Flush(input, arg); -} - -HWTEST2_F(CommandListAppendLaunchKernelMultiTileCompactL3FlushDisabledTest, - givenAppendMultiTileKernelWithSignalScopeImmediateEventWhenComputeWalkerImmediatePostsyncAndL3ImmediatePostsyncUsedThenExpectComputeWalkerAndPipeControlPostsync, - IsXeHpgCore) { - arg.expectedKernelCount = 1; - arg.expectedPacketsInUse = 4; - arg.expectedPostSyncPipeControls = 1; - arg.expectedWalkerPostSyncOp = 1; - arg.postSyncAddressZero = false; - - input.eventPoolFlags = 0; - - testAppendLaunchKernelAndL3Flush(input, arg); -} - -using CommandListAppendLaunchKernelMultiTileCompactL3FlushEnabledTest = Test>; +using CommandListAppendLaunchKernelMultiTileCompactL3FlushEnabledTest = Test>; HWTEST2_F(CommandListAppendLaunchKernelMultiTileCompactL3FlushEnabledTest, givenAppendMultiTileKernelWithSignalScopeTimestampEventWhenRegisterTimestampPostsyncUsedThenExpectNoComputeWalkerAndPipeControlPostsync, diff --git a/level_zero/core/test/unit_tests/sources/cmdlist/test_in_order_cmdlist_1.cpp b/level_zero/core/test/unit_tests/sources/cmdlist/test_in_order_cmdlist_1.cpp index 8cf14c8791..32203c47fd 100644 --- a/level_zero/core/test/unit_tests/sources/cmdlist/test_in_order_cmdlist_1.cpp +++ b/level_zero/core/test/unit_tests/sources/cmdlist/test_in_order_cmdlist_1.cpp @@ -2091,7 +2091,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, InOrderCmdListTests, givenEventWithRequiredPipeCont auto sdiItor = find(cmdList.begin(), cmdList.end()); - if (immCmdList->eventSignalPipeControl(false, immCmdList->getDcFlushRequired(events[0]->isSignalScope()))) { + if (immCmdList->eventSignalPipeControl(false, immCmdList->getDcFlushRequired(events[0]->isFlushRequiredForSignal()))) { EXPECT_NE(cmdList.end(), sdiItor); } else { EXPECT_EQ(cmdList.end(), sdiItor); @@ -2655,7 +2655,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, InOrderCmdListTests, givenInOrderModeWhenProgrammin auto eventPool = createEvents(1, false); - bool isCompactEvent = immCmdList->compactL3FlushEvent(immCmdList->getDcFlushRequired(events[0]->isSignalScope())); + bool isCompactEvent = immCmdList->compactL3FlushEvent(immCmdList->getDcFlushRequired(events[0]->isFlushRequiredForSignal())); immCmdList->appendLaunchKernel(kernel->toHandle(), groupCount, nullptr, 0, nullptr, launchParams); @@ -3266,11 +3266,11 @@ HWTEST_F(InOrderCmdListTests, givenHostVisibleEventOnLatestFlushWhenCallingSynch events[0]->signalScope = ZE_EVENT_SCOPE_FLAG_HOST; immCmdList->appendLaunchKernel(kernel->toHandle(), groupCount, events[0]->toHandle(), 0, nullptr, launchParams); - EXPECT_TRUE(immCmdList->latestFlushIsHostVisible); + EXPECT_EQ(!immCmdList->dcFlushSupport || immCmdList->isHeaplessModeEnabled(), immCmdList->latestFlushIsHostVisible); immCmdList->hostSynchronize(0, false); - if (!immCmdList->isHeaplessModeEnabled() && immCmdList->latestOperationHasOptimizedCbEvent) { + if (!immCmdList->latestFlushIsHostVisible || (!immCmdList->isHeaplessModeEnabled() && immCmdList->latestOperationHasOptimizedCbEvent)) { EXPECT_EQ(0u, immCmdList->synchronizeInOrderExecutionCalled); EXPECT_EQ(3u, ultCsr->waitForCompletionWithTimeoutTaskCountCalled); } else if (immCmdList->dcFlushSupport) { @@ -3284,7 +3284,7 @@ HWTEST_F(InOrderCmdListTests, givenHostVisibleEventOnLatestFlushWhenCallingSynch // handle post sync operations immCmdList->hostSynchronize(0, true); - if (!immCmdList->isHeaplessModeEnabled() && immCmdList->latestOperationHasOptimizedCbEvent) { + if (!immCmdList->latestFlushIsHostVisible || (!immCmdList->isHeaplessModeEnabled() && immCmdList->latestOperationHasOptimizedCbEvent)) { EXPECT_EQ(0u, immCmdList->synchronizeInOrderExecutionCalled); EXPECT_EQ(4u, ultCsr->waitForCompletionWithTimeoutTaskCountCalled); } else if (immCmdList->dcFlushSupport) { @@ -3309,14 +3309,13 @@ HWTEST_F(InOrderCmdListTests, givenEmptyTempAllocationsStorageWhenCallingSynchro events[0]->signalScope = ZE_EVENT_SCOPE_FLAG_HOST; immCmdList->appendLaunchKernel(kernel->toHandle(), groupCount, events[0]->toHandle(), 0, nullptr, launchParams); - EXPECT_TRUE(immCmdList->latestFlushIsHostVisible); - + EXPECT_EQ(!immCmdList->dcFlushSupport || immCmdList->isHeaplessModeEnabled(), immCmdList->latestFlushIsHostVisible); EXPECT_EQ(0u, immCmdList->synchronizeInOrderExecutionCalled); EXPECT_EQ(0u, ultCsr->waitForCompletionWithTimeoutTaskCountCalled); immCmdList->hostSynchronize(0, true); - if (!immCmdList->isHeaplessModeEnabled() && immCmdList->latestOperationHasOptimizedCbEvent) { + if (!immCmdList->latestFlushIsHostVisible || (!immCmdList->isHeaplessModeEnabled() && immCmdList->latestOperationHasOptimizedCbEvent)) { EXPECT_EQ(0u, immCmdList->synchronizeInOrderExecutionCalled); EXPECT_EQ(1u, ultCsr->waitForCompletionWithTimeoutTaskCountCalled); } else { @@ -3328,7 +3327,7 @@ HWTEST_F(InOrderCmdListTests, givenEmptyTempAllocationsStorageWhenCallingSynchro immCmdList->hostSynchronize(0, true); - if (!immCmdList->isHeaplessModeEnabled() && immCmdList->latestOperationHasOptimizedCbEvent) { + if (!immCmdList->latestFlushIsHostVisible || (!immCmdList->isHeaplessModeEnabled() && immCmdList->latestOperationHasOptimizedCbEvent)) { EXPECT_EQ(0u, immCmdList->synchronizeInOrderExecutionCalled); EXPECT_EQ(2u, ultCsr->waitForCompletionWithTimeoutTaskCountCalled); } else { @@ -3371,9 +3370,9 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, InOrderCmdListTests, givenMultipleAllocationsForWri auto eventPool2 = createEvents(1, false); events[2]->makeCounterBasedInitiallyDisabled(eventPool2->getAllocation()); - bool isCompactEvent0 = immCmdList->compactL3FlushEvent(immCmdList->getDcFlushRequired(events[0]->isSignalScope())); - bool isCompactEvent1 = immCmdList->compactL3FlushEvent(immCmdList->getDcFlushRequired(events[1]->isSignalScope())); - bool isCompactEvent2 = immCmdList->compactL3FlushEvent(immCmdList->getDcFlushRequired(events[2]->isSignalScope())); + bool isCompactEvent0 = immCmdList->compactL3FlushEvent(immCmdList->getDcFlushRequired(events[0]->isFlushRequiredForSignal())); + bool isCompactEvent1 = immCmdList->compactL3FlushEvent(immCmdList->getDcFlushRequired(events[1]->isFlushRequiredForSignal())); + bool isCompactEvent2 = immCmdList->compactL3FlushEvent(immCmdList->getDcFlushRequired(events[2]->isFlushRequiredForSignal())); EXPECT_TRUE(immCmdList->isInOrderNonWalkerSignalingRequired(events[0].get())); EXPECT_EQ(isCompactEvent1, immCmdList->isInOrderNonWalkerSignalingRequired(events[1].get())); @@ -4243,63 +4242,6 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, InOrderCmdListTests, givenCopyOnlyInOrderModeWhenPr context->freeMem(data); } -HWCMDTEST_F(IGFX_XE_HP_CORE, InOrderCmdListTests, givenInOrderModeWhenProgrammingFillWithSplitAndOutEventThenSignalInOrderAllocation) { - using MI_STORE_DATA_IMM = typename FamilyType::MI_STORE_DATA_IMM; - using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; - - auto immCmdList = createImmCmdList(); - - auto cmdStream = immCmdList->getCmdContainer().getCommandStream(); - - auto eventPool = createEvents(1, false); - - constexpr size_t size = 128 * sizeof(uint32_t); - auto data = allocHostMem(size); - - immCmdList->appendMemoryFill(data, data, 1, (size / 2) + 1, events[0]->toHandle(), 0, nullptr, copyParams); - - GenCmdList cmdList; - ASSERT_TRUE(FamilyType::Parse::parseCommandBuffer(cmdList, cmdStream->getCpuBase(), cmdStream->getUsed())); - - auto walkerItor = NEO::UnitTestHelper::findWalkerTypeCmd(cmdList.begin(), cmdList.end()); - - ASSERT_NE(cmdList.end(), walkerItor); - - auto pcItors = findAll(walkerItor, cmdList.end()); - EXPECT_FALSE(pcItors.empty()); - - bool foundMatchingPipeControl = false; - for (auto pcItor : pcItors) { - auto pcCmd = genCmdCast(*pcItor); - ASSERT_NE(nullptr, pcCmd); - - if (pcCmd->getDcFlushEnable() == immCmdList->getDcFlushRequired(true) && - UnitTestHelper::getPipeControlHdcPipelineFlush(*pcCmd) && - pcCmd->getUnTypedDataPortCacheFlush()) { - foundMatchingPipeControl = true; - break; - } - } - - EXPECT_TRUE(foundMatchingPipeControl); - - auto sdiItor = find(walkerItor, cmdList.end()); - ASSERT_NE(cmdList.end(), sdiItor); - - auto sdiCmd = genCmdCast(*sdiItor); - ASSERT_NE(nullptr, sdiCmd); - - auto inOrderExecInfo = immCmdList->inOrderExecInfo; - uint64_t syncVa = inOrderExecInfo->isHostStorageDuplicated() ? reinterpret_cast(inOrderExecInfo->getBaseHostAddress()) : inOrderExecInfo->getBaseDeviceAddress(); - - EXPECT_EQ(syncVa, sdiCmd->getAddress()); - EXPECT_EQ(immCmdList->isQwordInOrderCounter(), sdiCmd->getStoreQword()); - EXPECT_EQ(1u, sdiCmd->getDataDword0()); - EXPECT_EQ(0u, sdiCmd->getDataDword1()); - - context->freeMem(data); -} - HWCMDTEST_F(IGFX_XE_HP_CORE, InOrderCmdListTests, givenInOrderModeWhenProgrammingFillWithSplitAndOutProfilingEventThenSignalInOrderAllocation) { using MI_STORE_DATA_IMM = typename FamilyType::MI_STORE_DATA_IMM; using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; @@ -4447,17 +4389,9 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, InOrderCmdListTests, givenInOrderModeWhenProgrammin using PostSyncType = std::decay_t; if (!immCmdList->inOrderAtomicSignalingEnabled) { - EXPECT_EQ(PostSyncType::OPERATION::OPERATION_NO_WRITE, postSync.getOperation()); - EXPECT_EQ(0u, postSync.getImmediateData()); + EXPECT_EQ(PostSyncType::OPERATION::OPERATION_WRITE_TIMESTAMP, postSync.getOperation()); } - - auto l3FlushAfterPostSyncEnabled = this->neoDevice->getProductHelper().isL3FlushAfterPostSyncSupported(true); - if (l3FlushAfterPostSyncEnabled) { - EXPECT_NE(0u, postSync.getDestinationAddress()); - } else { - EXPECT_EQ(0u, postSync.getDestinationAddress()); - } - + EXPECT_NE(0u, postSync.getDestinationAddress()); context->freeMem(data); } @@ -4691,7 +4625,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, InOrderCmdListTests, givenInOrderModeWhenProgrammin auto eventPool = createEvents(1, false); - bool isCompactEvent = immCmdList->compactL3FlushEvent(immCmdList->getDcFlushRequired(events[0]->isSignalScope())); + bool isCompactEvent = immCmdList->compactL3FlushEvent(immCmdList->getDcFlushRequired(events[0]->isFlushRequiredForSignal())); auto eventHandle = events[0]->toHandle(); @@ -4934,9 +4868,8 @@ HWTEST_F(InOrderCmdListTests, givenRegularCmdListWhenProgrammingAppendBarrierWit } HWTEST_F(InOrderCmdListTests, givenEventCounterReusedFromPreviousAppendWhenHostSynchronizeThenFlushCaches) { - if (!device->getProductHelper().isDcFlushAllowed()) { - GTEST_SKIP(); - } + auto isHeaplessModeDisabled = !device->getCompilerProductHelper().isHeaplessModeEnabled(device->getHwInfo()); + auto cacheFlushRequired = device->getProductHelper().isDcFlushAllowed() && isHeaplessModeDisabled; auto ultCsr = static_cast *>(device->getNEODevice()->getDefaultEngine().commandStreamReceiver); auto cmdList = createImmCmdList(); @@ -4950,32 +4883,11 @@ HWTEST_F(InOrderCmdListTests, givenEventCounterReusedFromPreviousAppendWhenHostS EXPECT_FALSE(ultCsr->flushTagUpdateCalled); events[0]->hostSynchronize(std::numeric_limits::max()); - EXPECT_TRUE(ultCsr->flushTagUpdateCalled); + auto flushTagCount = cacheFlushRequired ? 1 : 0; + EXPECT_EQ(flushTagCount, ultCsr->flushTagUpdateCalled); EXPECT_EQ(1u, events[0]->inOrderExecSignalValue); } -HWTEST_F(InOrderCmdListTests, givenEventCounterNotReusedFromPreviousAppendWhenHostSynchronizeThenDontFlushCaches) { - if (!device->getProductHelper().isDcFlushAllowed()) { - GTEST_SKIP(); - } - auto ultCsr = static_cast *>(device->getNEODevice()->getDefaultEngine().commandStreamReceiver); - - auto cmdList = createImmCmdList(); - - cmdList->appendLaunchKernel(kernel->toHandle(), groupCount, nullptr, 0, nullptr, launchParams); - EXPECT_EQ(1u, cmdList->inOrderExecInfo->getCounterValue()); - - auto eventPool = createEvents(1, false); - auto eventHandle = events[0]->toHandle(); - cmdList->appendBarrier(eventHandle, 0, nullptr, false); - cmdList->appendLaunchKernel(kernel->toHandle(), groupCount, eventHandle, 0, nullptr, launchParams); - - EXPECT_FALSE(ultCsr->flushTagUpdateCalled); - events[0]->hostSynchronize(std::numeric_limits::max()); - EXPECT_FALSE(ultCsr->flushTagUpdateCalled); - EXPECT_EQ(2u, events[0]->inOrderExecSignalValue); -} - HWTEST_F(InOrderCmdListTests, givenTsCbEventWhenAppendNonKernelOperationOnNonHeaplessNonDcFlushPlatformThenWaitOnCounter) { if (device->getProductHelper().isDcFlushAllowed()) { GTEST_SKIP(); @@ -4996,8 +4908,6 @@ HWTEST_F(InOrderCmdListTests, givenTsCbEventWhenAppendNonKernelOperationOnNonHea EXPECT_FALSE(ultCsr->flushTagUpdateCalled); events[0]->hostSynchronize(std::numeric_limits::max()); - EXPECT_FALSE(ultCsr->flushTagUpdateCalled); - EXPECT_EQ(0u, ultCsr->waitForCompletionWithTimeoutTaskCountCalled); EXPECT_EQ(1u, CpuIntrinsicsTests::pauseCounter); EXPECT_EQ(1u, events[0]->inOrderExecSignalValue); } @@ -5462,7 +5372,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, InOrderCmdListTests, givenAubModeWhenSyncCalledAlwa immCmdList->hostSynchronize(0, false); - auto expectPollForCompletion = (immCmdList->isHeaplessModeEnabled() || !immCmdList->latestOperationHasOptimizedCbEvent) ? 1u : 0u; + auto expectPollForCompletion = immCmdList->latestFlushIsHostVisible && (immCmdList->isHeaplessModeEnabled() || !immCmdList->latestOperationHasOptimizedCbEvent) ? 1u : 0u; EXPECT_EQ(expectPollForCompletion++, ultCsr->pollForAubCompletionCalled); events[0]->hostSynchronize(std::numeric_limits::max()); @@ -6600,7 +6510,6 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, InOrderCmdListTests, givenDebugFlagSetWhenKernelSpl HWCMDTEST_F(IGFX_XE_HP_CORE, InOrderCmdListTests, givenInOrderModeWhenProgrammingKernelSplitWithEventThenSignalCounter) { using MI_STORE_DATA_IMM = typename FamilyType::MI_STORE_DATA_IMM; - using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; debugManager.flags.EnableCopyWithStagingBuffers.set(0); auto immCmdList = createImmCmdList(); @@ -6619,33 +6528,9 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, InOrderCmdListTests, givenInOrderModeWhenProgrammin GenCmdList cmdList; ASSERT_TRUE(FamilyType::Parse::parseCommandBuffer(cmdList, cmdStream->getCpuBase(), cmdStream->getUsed())); - auto pcItors = findAll(cmdList.begin(), cmdList.end()); - ASSERT_FALSE(pcItors.empty()); - auto cmdItor = pcItors[0]; - - bool foundMatchingPipeControl = false; - for (auto pcItor : pcItors) { - auto pcCmd = genCmdCast(*pcItor); - ASSERT_NE(nullptr, pcCmd); - - if (pcCmd->getDcFlushEnable() == immCmdList->getDcFlushRequired(true) && - UnitTestHelper::getPipeControlHdcPipelineFlush(*pcCmd) && - pcCmd->getUnTypedDataPortCacheFlush()) { - foundMatchingPipeControl = true; - cmdItor = pcItor; - break; - } - } - - EXPECT_TRUE(foundMatchingPipeControl); - - auto sdiCmd = genCmdCast(*(++cmdItor)); - - while (sdiCmd == nullptr && cmdItor != cmdList.end()) { - sdiCmd = genCmdCast(*(++cmdItor)); - } - - ASSERT_NE(nullptr, sdiCmd); + auto sdiCmds = findAll(cmdList.begin(), cmdList.end()); + ASSERT_FALSE(sdiCmds.empty()); + auto sdiCmd = genCmdCast(*sdiCmds[0]); auto inOrderExecInfo = immCmdList->inOrderExecInfo; uint64_t syncVa = inOrderExecInfo->isHostStorageDuplicated() ? reinterpret_cast(inOrderExecInfo->getBaseHostAddress()) : inOrderExecInfo->getBaseDeviceAddress(); diff --git a/level_zero/core/test/unit_tests/sources/cmdlist/test_in_order_cmdlist_2.cpp b/level_zero/core/test/unit_tests/sources/cmdlist/test_in_order_cmdlist_2.cpp index e5ed2881d1..c7941eab71 100644 --- a/level_zero/core/test/unit_tests/sources/cmdlist/test_in_order_cmdlist_2.cpp +++ b/level_zero/core/test/unit_tests/sources/cmdlist/test_in_order_cmdlist_2.cpp @@ -166,7 +166,6 @@ HWTEST2_F(CopyOffloadInOrderTests, givenNonDualStreamModeWhenSubmittedThenUseDef EXPECT_EQ(taskCount + 1, csr->taskCount.load()); EXPECT_FALSE(immCmdList->latestFlushIsDualCopyOffload); - EXPECT_TRUE(immCmdList->latestFlushIsHostVisible); } HWTEST2_F(CopyOffloadInOrderTests, givenStagingCopyEnabledWhenCopyCalledThenOffloadOnlyIfPreferred, IsAtLeastXeCore) { @@ -2741,9 +2740,9 @@ HWTEST2_F(StandaloneInOrderTimestampAllocationTests, givenNonWalkerCounterSignal cmdList->appendLaunchKernel(kernel->toHandle(), groupCount, eventHandle, 0, nullptr, launchParams); - bool isCompactEvent = cmdList->compactL3FlushEvent(cmdList->getDcFlushRequired(events[0]->isSignalScope())); + bool isCompactEvent = cmdList->compactL3FlushEvent(cmdList->getDcFlushRequired(events[0]->isFlushRequiredForSignal())); - if (cmdList->getDcFlushRequired(events[0]->isSignalScope())) { + if (cmdList->getDcFlushRequired(events[0]->isFlushRequiredForSignal())) { EXPECT_EQ(isCompactEvent, events[0]->getAllocation(device) == nullptr); } else { EXPECT_EQ(isCompactEvent, events[0]->getAllocation(device) != nullptr); @@ -4064,7 +4063,7 @@ HWTEST2_F(MultiTileInOrderCmdListTests, givenMultiTileInOrderModeWhenProgramming immCmdList->appendLaunchKernel(kernel->toHandle(), groupCount, eventHandle, 0, nullptr, launchParams); - auto isCompactEvent = immCmdList->compactL3FlushEvent(immCmdList->getDcFlushRequired(events[0]->isSignalScope())); + auto isCompactEvent = immCmdList->compactL3FlushEvent(immCmdList->getDcFlushRequired(events[0]->isFlushRequiredForSignal())); { GenCmdList cmdList; diff --git a/level_zero/core/test/unit_tests/xe_hpg_core/test_cmdlist_xe_hpg_core.cpp b/level_zero/core/test/unit_tests/xe_hpg_core/test_cmdlist_xe_hpg_core.cpp index 84ec6abcfb..e38747c1fe 100644 --- a/level_zero/core/test/unit_tests/xe_hpg_core/test_cmdlist_xe_hpg_core.cpp +++ b/level_zero/core/test/unit_tests/xe_hpg_core/test_cmdlist_xe_hpg_core.cpp @@ -415,75 +415,5 @@ HWTEST2_F(CommandListCreate, GivenComputeModePropertiesWhenUpdateStreamPropertie EXPECT_FALSE(commandList->finalStreamState.stateComputeMode.isCoherencyRequired.isDirty); } -using CommandListAppendLaunchKernelXeHpgCore = Test; -HWTEST2_F(CommandListAppendLaunchKernelXeHpgCore, givenEventWhenAppendKernelIsCalledThenImmediateDataPostSyncIsAdded, IsXeHpgCore) { - using POSTSYNC_DATA = typename FamilyType::POSTSYNC_DATA; - using DefaultWalkerType = typename FamilyType::DefaultWalkerType; - using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; - using POST_SYNC_OPERATION = typename PIPE_CONTROL::POST_SYNC_OPERATION; - - DebugManagerStateRestore restorer; - debugManager.flags.CompactL3FlushEventPacket.set(0); - - Mock<::L0::KernelImp> kernel; - auto pMockModule = std::unique_ptr(new Mock(device, nullptr)); - kernel.module = pMockModule.get(); - - kernel.setGroupSize(1, 1, 1); - ze_group_count_t groupCount{8, 1, 1}; - auto commandList = std::make_unique>>(); - auto result = commandList->initialize(device, NEO::EngineGroupType::cooperativeCompute, 0u); - ASSERT_EQ(ZE_RESULT_SUCCESS, result); - - ze_event_pool_desc_t eventPoolDesc = {}; - eventPoolDesc.flags = ZE_EVENT_POOL_FLAG_HOST_VISIBLE; - eventPoolDesc.count = 1; - - ze_event_desc_t eventDesc = {}; - eventDesc.index = 0; - eventDesc.signal = ZE_EVENT_SCOPE_FLAG_HOST; - eventDesc.wait = ZE_EVENT_SCOPE_FLAG_HOST; - - auto eventPool = std::unique_ptr(EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, result)); - EXPECT_EQ(ZE_RESULT_SUCCESS, result); - auto event = std::unique_ptr(Event::create(eventPool.get(), &eventDesc, device, result)); - - auto usedSpaceBefore = commandList->getCmdContainer().getCommandStream()->getUsed(); - CmdListKernelLaunchParams launchParams = {}; - result = commandList->appendLaunchKernel(kernel.toHandle(), groupCount, event->toHandle(), 0, nullptr, launchParams); - EXPECT_EQ(ZE_RESULT_SUCCESS, result); - - auto usedSpaceAfter = commandList->getCmdContainer().getCommandStream()->getUsed(); - EXPECT_GT(usedSpaceAfter, usedSpaceBefore); - - GenCmdList cmdList; - EXPECT_TRUE(FamilyType::Parse::parseCommandBuffer( - cmdList, - ptrOffset(commandList->getCmdContainer().getCommandStream()->getCpuBase(), usedSpaceBefore), - usedSpaceAfter - usedSpaceBefore)); - - auto gpuAddress = event->getGpuAddress(device); - - auto itorWalker = find(cmdList.begin(), cmdList.end()); - ASSERT_NE(cmdList.end(), itorWalker); - auto cmdWalker = genCmdCast(*itorWalker); - auto &postSync = cmdWalker->getPostSync(); - EXPECT_EQ(POSTSYNC_DATA::OPERATION_WRITE_IMMEDIATE_DATA, postSync.getOperation()); - EXPECT_EQ(gpuAddress, postSync.getDestinationAddress()); - - gpuAddress += event->getSinglePacketSize(); - auto itorPC = findAll(itorWalker, cmdList.end()); - ASSERT_NE(0u, itorPC.size()); - uint32_t postSyncCount = 0u; - for (auto it : itorPC) { - auto cmd = genCmdCast(*it); - if (cmd->getPostSyncOperation() == POST_SYNC_OPERATION::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA) { - EXPECT_EQ(gpuAddress, NEO::UnitTestHelper::getPipeControlPostSyncAddress(*cmd)); - postSyncCount++; - } - } - EXPECT_EQ(1u, postSyncCount); -} - } // namespace ult } // namespace L0