diff --git a/level_zero/api/core/ze_event_api_entrypoints.h b/level_zero/api/core/ze_event_api_entrypoints.h index 61ee037f90..ad682b7d6f 100644 --- a/level_zero/api/core/ze_event_api_entrypoints.h +++ b/level_zero/api/core/ze_event_api_entrypoints.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2020-2022 Intel Corporation + * Copyright (C) 2020-2023 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -65,7 +65,7 @@ ze_result_t zeCommandListAppendWaitOnEvents( ze_command_list_handle_t hCommandList, uint32_t numEvents, ze_event_handle_t *phEvents) { - return L0::CommandList::fromHandle(hCommandList)->appendWaitOnEvents(numEvents, phEvents, false); + return L0::CommandList::fromHandle(hCommandList)->appendWaitOnEvents(numEvents, phEvents, false, true); } ze_result_t zeEventHostSignal( diff --git a/level_zero/core/source/cmdlist/cmdlist.h b/level_zero/core/source/cmdlist/cmdlist.h index 3970e38829..756f17e9d1 100644 --- a/level_zero/core/source/cmdlist/cmdlist.h +++ b/level_zero/core/source/cmdlist/cmdlist.h @@ -136,7 +136,7 @@ struct CommandList : _ze_command_list_handle_t { uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents, bool relaxedOrderingDispatch) = 0; virtual ze_result_t appendMemoryPrefetch(const void *ptr, size_t count) = 0; virtual ze_result_t appendSignalEvent(ze_event_handle_t hEvent) = 0; - virtual ze_result_t appendWaitOnEvents(uint32_t numEvents, ze_event_handle_t *phEvent, bool relaxedOrderingAllowed) = 0; + virtual ze_result_t appendWaitOnEvents(uint32_t numEvents, ze_event_handle_t *phEvent, bool relaxedOrderingAllowed, bool trackDependencies) = 0; virtual ze_result_t appendWriteGlobalTimestamp(uint64_t *dstptr, ze_event_handle_t hSignalEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) = 0; virtual ze_result_t appendMemoryCopyFromContext(void *dstptr, ze_context_handle_t hContextSrc, diff --git a/level_zero/core/source/cmdlist/cmdlist_hw.h b/level_zero/core/source/cmdlist/cmdlist_hw.h index 824b34e79a..b8235820eb 100644 --- a/level_zero/core/source/cmdlist/cmdlist_hw.h +++ b/level_zero/core/source/cmdlist/cmdlist_hw.h @@ -165,7 +165,7 @@ struct CommandListCoreFamily : CommandListImp { uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) override; ze_result_t appendSignalEvent(ze_event_handle_t hEvent) override; - ze_result_t appendWaitOnEvents(uint32_t numEvents, ze_event_handle_t *phEvent, bool relaxedOrderingAllowed) override; + ze_result_t appendWaitOnEvents(uint32_t numEvents, ze_event_handle_t *phEvent, bool relaxedOrderingAllowed, bool trackDependencies) override; ze_result_t appendWriteGlobalTimestamp(uint64_t *dstptr, ze_event_handle_t hSignalEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) override; ze_result_t appendMemoryCopyFromContext(void *dstptr, ze_context_handle_t hContextSrc, const void *srcptr, @@ -174,7 +174,7 @@ struct CommandListCoreFamily : CommandListImp { void appendMultiPartitionPrologue(uint32_t partitionDataSize) override; void appendMultiPartitionEpilogue() override; void appendEventForProfilingAllWalkers(Event *event, bool beforeWalker, bool singlePacketEvent); - ze_result_t addEventsToCmdList(uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents, bool relaxedOrderingAllowed); + ze_result_t addEventsToCmdList(uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents, bool relaxedOrderingAllowed, bool trackDependencies); ze_result_t reserveSpace(size_t size, void **ptr) override; ze_result_t reset() override; diff --git a/level_zero/core/source/cmdlist/cmdlist_hw.inl b/level_zero/core/source/cmdlist/cmdlist_hw.inl index f9405de2b6..1fe7c6221d 100644 --- a/level_zero/core/source/cmdlist/cmdlist_hw.inl +++ b/level_zero/core/source/cmdlist/cmdlist_hw.inl @@ -265,7 +265,7 @@ ze_result_t CommandListCoreFamily::appendLaunchKernel(ze_kernel_h callId = neoDevice->getRootDeviceEnvironment().tagsManager->currentCallCount; } - ze_result_t ret = addEventsToCmdList(numWaitEvents, phWaitEvents, relaxedOrderingDispatch); + ze_result_t ret = addEventsToCmdList(numWaitEvents, phWaitEvents, relaxedOrderingDispatch, true); if (ret) { return ret; } @@ -299,7 +299,7 @@ ze_result_t CommandListCoreFamily::appendLaunchCooperativeKernel( uint32_t numWaitEvents, ze_event_handle_t *waitEventHandles, bool relaxedOrderingDispatch) { - ze_result_t ret = addEventsToCmdList(numWaitEvents, waitEventHandles, relaxedOrderingDispatch); + ze_result_t ret = addEventsToCmdList(numWaitEvents, waitEventHandles, relaxedOrderingDispatch, true); if (ret) { return ret; } @@ -323,7 +323,7 @@ ze_result_t CommandListCoreFamily::appendLaunchKernelIndirect(ze_ uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents, bool relaxedOrderingDispatch) { - ze_result_t ret = addEventsToCmdList(numWaitEvents, phWaitEvents, relaxedOrderingDispatch); + ze_result_t ret = addEventsToCmdList(numWaitEvents, phWaitEvents, relaxedOrderingDispatch, true); if (ret) { return ret; } @@ -353,7 +353,7 @@ ze_result_t CommandListCoreFamily::appendLaunchMultipleKernelsInd uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents, bool relaxedOrderingDispatch) { - ze_result_t ret = addEventsToCmdList(numWaitEvents, phWaitEvents, relaxedOrderingDispatch); + ze_result_t ret = addEventsToCmdList(numWaitEvents, phWaitEvents, relaxedOrderingDispatch, true); if (ret) { return ret; } @@ -440,7 +440,7 @@ ze_result_t CommandListCoreFamily::appendMemoryRangesBarrier(uint uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) { - ze_result_t ret = addEventsToCmdList(numWaitEvents, phWaitEvents, false); + ze_result_t ret = addEventsToCmdList(numWaitEvents, phWaitEvents, false, true); if (ret) { return ret; } @@ -1027,7 +1027,7 @@ ze_result_t CommandListCoreFamily::appendMemoryCopyBlitRegion(NEO blitProperties.srcSize = srcSize; blitProperties.dstSize = dstSize; - ze_result_t ret = addEventsToCmdList(numWaitEvents, phWaitEvents, relaxedOrderingDispatch); + ze_result_t ret = addEventsToCmdList(numWaitEvents, phWaitEvents, relaxedOrderingDispatch, true); if (ret) { return ret; } @@ -1180,7 +1180,7 @@ ze_result_t CommandListCoreFamily::appendMemoryCopy(void *dstptr, isStateless = true; } - ze_result_t ret = addEventsToCmdList(numWaitEvents, phWaitEvents, relaxedOrderingDispatch); + ze_result_t ret = addEventsToCmdList(numWaitEvents, phWaitEvents, relaxedOrderingDispatch, true); if (ret) { return ret; @@ -1575,7 +1575,7 @@ ze_result_t CommandListCoreFamily::appendMemoryFill(void *ptr, return appendBlitFill(ptr, pattern, patternSize, size, signalEvent, numWaitEvents, phWaitEvents, relaxedOrderingDispatch); } - ze_result_t res = addEventsToCmdList(numWaitEvents, phWaitEvents, relaxedOrderingDispatch); + ze_result_t res = addEventsToCmdList(numWaitEvents, phWaitEvents, relaxedOrderingDispatch, true); if (res) { return res; } @@ -1763,7 +1763,7 @@ ze_result_t CommandListCoreFamily::appendBlitFill(void *ptr, if (gfxCoreHelper.getMaxFillPaternSizeForCopyEngine() < patternSize) { return ZE_RESULT_ERROR_INVALID_SIZE; } else { - ze_result_t ret = addEventsToCmdList(numWaitEvents, phWaitEvents, relaxedOrderingDispatch); + ze_result_t ret = addEventsToCmdList(numWaitEvents, phWaitEvents, relaxedOrderingDispatch, true); if (ret) { return ret; } @@ -1929,11 +1929,11 @@ inline size_t CommandListCoreFamily::getAllocationOffsetForAppend } template -inline ze_result_t CommandListCoreFamily::addEventsToCmdList(uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents, bool relaxedOrderingAllowed) { +inline ze_result_t CommandListCoreFamily::addEventsToCmdList(uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents, bool relaxedOrderingAllowed, bool trackDependencies) { if (numWaitEvents > 0) { if (phWaitEvents) { - CommandListCoreFamily::appendWaitOnEvents(numWaitEvents, phWaitEvents, relaxedOrderingAllowed); + CommandListCoreFamily::appendWaitOnEvents(numWaitEvents, phWaitEvents, relaxedOrderingAllowed, trackDependencies); } else { return ZE_RESULT_ERROR_INVALID_ARGUMENT; } @@ -1975,7 +1975,7 @@ ze_result_t CommandListCoreFamily::appendSignalEvent(ze_event_han } template -ze_result_t CommandListCoreFamily::appendWaitOnEvents(uint32_t numEvents, ze_event_handle_t *phEvent, bool relaxedOrderingAllowed) { +ze_result_t CommandListCoreFamily::appendWaitOnEvents(uint32_t numEvents, ze_event_handle_t *phEvent, bool relaxedOrderingAllowed, bool trackDependencies) { using COMPARE_OPERATION = typename GfxFamily::MI_SEMAPHORE_WAIT::COMPARE_OPERATION; NEO::Device *neoDevice = device->getNEODevice(); @@ -2040,7 +2040,7 @@ ze_result_t CommandListCoreFamily::appendWaitOnEvents(uint32_t nu } } - if (this->cmdListType == TYPE_IMMEDIATE && isCopyOnly()) { + if (this->cmdListType == TYPE_IMMEDIATE && isCopyOnly() && trackDependencies) { NEO::MiFlushArgs args; args.commandWithPostSync = true; const auto &productHelper = this->device->getProductHelper(); @@ -2146,7 +2146,7 @@ ze_result_t CommandListCoreFamily::appendWriteGlobalTimestamp( if (numWaitEvents > 0) { if (phWaitEvents) { - CommandListCoreFamily::appendWaitOnEvents(numWaitEvents, phWaitEvents, false); + CommandListCoreFamily::appendWaitOnEvents(numWaitEvents, phWaitEvents, false, true); } else { return ZE_RESULT_ERROR_INVALID_ARGUMENT; } @@ -2567,7 +2567,7 @@ ze_result_t CommandListCoreFamily::appendBarrier(ze_event_handle_ uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) { - ze_result_t ret = addEventsToCmdList(numWaitEvents, phWaitEvents, false); + ze_result_t ret = addEventsToCmdList(numWaitEvents, phWaitEvents, false, true); if (ret) { return ret; } diff --git a/level_zero/core/source/cmdlist/cmdlist_hw_immediate.h b/level_zero/core/source/cmdlist/cmdlist_hw_immediate.h index 0ce08edd1d..b4ef967b80 100644 --- a/level_zero/core/source/cmdlist/cmdlist_hw_immediate.h +++ b/level_zero/core/source/cmdlist/cmdlist_hw_immediate.h @@ -91,7 +91,7 @@ struct CommandListCoreFamilyImmediate : public CommandListCoreFamily::appendPageFaultCopy(N } template -ze_result_t CommandListCoreFamilyImmediate::appendWaitOnEvents(uint32_t numEvents, ze_event_handle_t *phWaitEvents, bool relaxedOrderingAllowed) { +ze_result_t CommandListCoreFamilyImmediate::appendWaitOnEvents(uint32_t numEvents, ze_event_handle_t *phWaitEvents, bool relaxedOrderingAllowed, bool trackDependencies) { bool allSignaled = true; for (auto i = 0u; i < numEvents; i++) { allSignaled &= (!this->dcFlushSupport && Event::fromHandle(phWaitEvents[i])->isAlreadyCompleted()); @@ -541,7 +541,7 @@ ze_result_t CommandListCoreFamilyImmediate::appendWaitOnEvents(ui checkAvailableSpace(numEvents); checkWaitEventsState(numEvents, phWaitEvents); } - auto ret = CommandListCoreFamily::appendWaitOnEvents(numEvents, phWaitEvents, relaxedOrderingAllowed); + auto ret = CommandListCoreFamily::appendWaitOnEvents(numEvents, phWaitEvents, relaxedOrderingAllowed, trackDependencies); this->dependenciesPresent = true; return flushImmediate(ret, true, true, false, nullptr); } diff --git a/level_zero/core/source/device/bcs_split.h b/level_zero/core/source/device/bcs_split.h index d481b52b8d..613232bd22 100644 --- a/level_zero/core/source/device/bcs_split.h +++ b/level_zero/core/source/device/bcs_split.h @@ -83,7 +83,7 @@ struct BcsSplit { for (size_t i = 0; i < this->cmdQs.size(); i++) { if (barrierRequired) { auto barrierEventHandle = this->events.barrier[markerEventIndex]->toHandle(); - cmdList->addEventsToCmdList(1u, &barrierEventHandle, hasRelaxedOrderingDependencies); + cmdList->addEventsToCmdList(1u, &barrierEventHandle, hasRelaxedOrderingDependencies, false); } auto localSize = totalSize / engineCount; @@ -105,7 +105,7 @@ struct BcsSplit { engineCount--; } - cmdList->addEventsToCmdList(static_cast(this->cmdQs.size()), eventHandles.data(), hasRelaxedOrderingDependencies); + cmdList->addEventsToCmdList(static_cast(this->cmdQs.size()), eventHandles.data(), hasRelaxedOrderingDependencies, false); cmdList->appendEventForProfilingAllWalkers(this->events.marker[markerEventIndex], false, true); if (hSignalEvent) { diff --git a/level_zero/core/test/unit_tests/mocks/mock_cmdlist.h b/level_zero/core/test/unit_tests/mocks/mock_cmdlist.h index 35f2fedbbc..85f49560a5 100644 --- a/level_zero/core/test/unit_tests/mocks/mock_cmdlist.h +++ b/level_zero/core/test/unit_tests/mocks/mock_cmdlist.h @@ -331,7 +331,7 @@ struct MockCommandList : public CommandList { ADDMETHOD_NOBASE(appendWaitOnEvents, ze_result_t, ZE_RESULT_SUCCESS, (uint32_t numEvents, - ze_event_handle_t *phEvent, bool relaxedOrderingAllowed)); + ze_event_handle_t *phEvent, bool relaxedOrderingAllowed, bool trackDependencies)); ADDMETHOD_NOBASE(appendWriteGlobalTimestamp, ze_result_t, ZE_RESULT_SUCCESS, (uint64_t * dstptr, diff --git a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_1.cpp b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_1.cpp index 07cc3a9e8f..0de9f16c70 100644 --- a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_1.cpp +++ b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_1.cpp @@ -1041,7 +1041,7 @@ HWTEST2_F(CommandListCreate, givenDirectSubmissionAndImmCmdListWhenDispatchingTh verifyFlags(commandList->appendPageFaultCopy(kernel.getIsaAllocation(), kernel.getIsaAllocation(), 1, false), false, false); - verifyFlags(commandList->appendWaitOnEvents(1, &event, false), true, true); + verifyFlags(commandList->appendWaitOnEvents(1, &event, false, true), true, true); verifyFlags(commandList->appendWriteGlobalTimestamp(reinterpret_cast(dstPtr), nullptr, 0, nullptr), true, true); @@ -1172,7 +1172,7 @@ HWTEST2_F(CommandListCreate, givenDirectSubmissionAndImmCmdListWhenDispatchingTh verifyFlags(commandList->appendPageFaultCopy(kernel.getIsaAllocation(), kernel.getIsaAllocation(), 1, false), false, false); - verifyFlags(commandList->appendWaitOnEvents(1, &event, false), false, false); + verifyFlags(commandList->appendWaitOnEvents(1, &event, false, true), false, false); verifyFlags(commandList->appendWriteGlobalTimestamp(reinterpret_cast(dstPtr), nullptr, numWaitlistEvents, waitlist), false, false); @@ -1283,7 +1283,7 @@ HWTEST_F(CommandListCreate, GivenGpuHangWhenCreatingImmediateCommandListAndAppen ASSERT_NE(nullptr, eventObject->csr); ASSERT_EQ(static_cast(device)->getNEODevice()->getDefaultEngine().commandStreamReceiver, eventObject->csr); - returnValue = commandList->appendWaitOnEvents(1, &event, false); + returnValue = commandList->appendWaitOnEvents(1, &event, false, true); EXPECT_EQ(ZE_RESULT_SUCCESS, returnValue); returnValue = commandList->appendBarrier(nullptr, 1, &event); @@ -1345,7 +1345,7 @@ HWTEST2_F(CommandListCreate, GivenGpuHangOnExecutingCommandListsWhenCreatingImme commandList->csr = &mockCommandStreamReceiver; static_cast *>(commandList->cmdQImmediate)->csr = &mockCommandStreamReceiver; - returnValue = commandList->appendWaitOnEvents(1, &event, false); + returnValue = commandList->appendWaitOnEvents(1, &event, false, true); EXPECT_EQ(ZE_RESULT_ERROR_DEVICE_LOST, returnValue); commandList->csr = oldCsr; @@ -1421,7 +1421,7 @@ HWTEST2_F(CommandListCreate, GivenGpuHangOnSynchronizingWhenCreatingImmediateCom commandList->csr = &mockCommandStreamReceiver; static_cast *>(commandList->cmdQImmediate)->csr = &mockCommandStreamReceiver; - returnValue = commandList->appendWaitOnEvents(1, &event, false); + returnValue = commandList->appendWaitOnEvents(1, &event, false, true); EXPECT_EQ(ZE_RESULT_ERROR_DEVICE_LOST, returnValue); commandList->csr = oldCsr; static_cast *>(commandList->cmdQImmediate)->csr = oldCsr; @@ -1471,7 +1471,7 @@ HWTEST2_F(CommandListCreate, GivenGpuHangOnSynchronizingWhenCreatingImmediateCom auto oldCommandQueue = commandList->cmdQImmediate; commandList->cmdQImmediate = &mockCommandQueue; - returnValue = commandList->appendWaitOnEvents(1, &event, false); + returnValue = commandList->appendWaitOnEvents(1, &event, false, true); EXPECT_EQ(ZE_RESULT_ERROR_DEVICE_LOST, returnValue); commandList->cmdQImmediate = oldCommandQueue; } @@ -1520,7 +1520,7 @@ HWTEST2_F(CommandListCreate, GivenGpuHangOnSynchronizingWhenCreatingImmediateCom auto oldCommandQueue = commandList->cmdQImmediate; commandList->cmdQImmediate = &mockCommandQueue; - returnValue = commandList->appendWaitOnEvents(1, &event, false); + returnValue = commandList->appendWaitOnEvents(1, &event, false, true); EXPECT_EQ(ZE_RESULT_ERROR_DEVICE_LOST, returnValue); commandList->cmdQImmediate = oldCommandQueue; } @@ -1563,7 +1563,7 @@ HWTEST_F(CommandListCreate, GivenGpuHangWhenCreatingImmediateCommandListAndAppen ASSERT_NE(nullptr, eventObject->csr); ASSERT_EQ(static_cast(device)->getNEODevice()->getDefaultEngine().commandStreamReceiver, eventObject->csr); - returnValue = commandList->appendWaitOnEvents(1, &event, false); + returnValue = commandList->appendWaitOnEvents(1, &event, false, true); EXPECT_EQ(ZE_RESULT_SUCCESS, returnValue); returnValue = commandList->appendBarrier(nullptr, 1, &event); @@ -1656,7 +1656,7 @@ HWTEST_F(CommandListCreate, GivenGpuHangAndEnabledFlushTaskSubmissionFlagWhenCre commandList->csr = &mockCommandStreamReceiver; static_cast *>(commandList->cmdQImmediate)->csr = &mockCommandStreamReceiver; - returnValue = commandList->appendWaitOnEvents(1, &event, false); + returnValue = commandList->appendWaitOnEvents(1, &event, false, true); EXPECT_EQ(ZE_RESULT_ERROR_DEVICE_LOST, returnValue); commandList->csr = oldCsr; diff --git a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_4.cpp b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_4.cpp index b1a61faad1..ec284b67ef 100644 --- a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_4.cpp +++ b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_4.cpp @@ -6,6 +6,7 @@ */ #include "shared/source/built_ins/sip.h" +#include "shared/source/command_container/command_encoder.h" #include "shared/source/command_container/encode_surface_state.h" #include "shared/source/helpers/gfx_core_helper.h" #include "shared/test/common/cmd_parse/gen_cmd_parse.h" @@ -590,7 +591,7 @@ HWTEST_F(CommandListImmediateFlushTaskComputeTests, givenUseCsrImmediateSubmissi std::unique_ptr commandList(CommandList::createImmediate(productFamily, device, &queueDesc, false, NEO::EngineGroupType::Compute, returnValue)); ze_event_handle_t hEventHandle = event->toHandle(); - result = commandList->appendWaitOnEvents(1, &hEventHandle, false); + result = commandList->appendWaitOnEvents(1, &hEventHandle, false, true); ASSERT_EQ(ZE_RESULT_SUCCESS, result); context->destroy(); @@ -703,6 +704,97 @@ HWTEST_F(CommandListImmediateFlushTaskComputeTests, givenUseCsrImmediateSubmissi context->destroy(); } +HWTEST2_F(CommandListCreate, givenImmediateCopyOnlyCmdListWhenAppendBarrierThenIncrementBarrierCountAndDispatchBarrierTagUpdate, IsAtLeastSkl) { + using MI_FLUSH_DW = typename FamilyType::MI_FLUSH_DW; + + ze_command_queue_desc_t queueDesc = {}; + ze_result_t returnValue = ZE_RESULT_SUCCESS; + std::unique_ptr commandList(CommandList::createImmediate(productFamily, device, &queueDesc, false, NEO::EngineGroupType::Copy, returnValue)); + EXPECT_EQ(commandList->csr->getNextBarrierCount(), 0u); + + auto result = commandList->appendBarrier(nullptr, 0, nullptr); + ASSERT_EQ(ZE_RESULT_SUCCESS, result); + EXPECT_EQ(commandList->csr->getNextBarrierCount(), 2u); + + GenCmdList cmdList; + ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer( + cmdList, ptrOffset(commandList->commandContainer.getCommandStream()->getCpuBase(), 0), commandList->commandContainer.getCommandStream()->getUsed())); + auto itor = find(cmdList.begin(), cmdList.end()); + if (EncodeMiFlushDW::getMiFlushDwWaSize()) { + itor++; + } + EXPECT_NE(cmdList.end(), itor); + auto cmd = genCmdCast(*itor); + EXPECT_EQ(cmd->getPostSyncOperation(), MI_FLUSH_DW::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA_QWORD); + EXPECT_EQ(cmd->getDestinationAddress(), commandList->csr->getBarrierCountGpuAddress()); + EXPECT_EQ(cmd->getImmediateData(), 2u); +} + +HWTEST2_F(CommandListCreate, givenImmediateCopyOnlyCmdListWhenAppendWaitOnEventsThenIncrementBarrierCountAndDispatchBarrierTagUpdate, IsAtLeastSkl) { + using MI_FLUSH_DW = typename FamilyType::MI_FLUSH_DW; + + ze_command_queue_desc_t queueDesc = {}; + ze_result_t returnValue = ZE_RESULT_SUCCESS; + std::unique_ptr commandList(CommandList::createImmediate(productFamily, device, &queueDesc, false, NEO::EngineGroupType::Copy, returnValue)); + EXPECT_EQ(commandList->csr->getNextBarrierCount(), 0u); + + ze_event_pool_desc_t eventPoolDesc = {}; + eventPoolDesc.count = 1; + + ze_event_desc_t eventDesc = {}; + eventDesc.index = 0; + eventDesc.signal = ZE_EVENT_SCOPE_FLAG_DEVICE; + + ze_result_t result = ZE_RESULT_SUCCESS; + auto eventPool = std::unique_ptr(EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, result)); + EXPECT_EQ(ZE_RESULT_SUCCESS, result); + auto event = std::unique_ptr(Event::create(eventPool.get(), &eventDesc, device)); + auto eventHandle = event->toHandle(); + + result = commandList->appendWaitOnEvents(1u, &eventHandle, false, true); + ASSERT_EQ(ZE_RESULT_SUCCESS, result); + EXPECT_EQ(commandList->csr->getNextBarrierCount(), 2u); + + GenCmdList cmdList; + ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer( + cmdList, ptrOffset(commandList->commandContainer.getCommandStream()->getCpuBase(), 0), commandList->commandContainer.getCommandStream()->getUsed())); + auto itor = find(cmdList.begin(), cmdList.end()); + if (EncodeMiFlushDW::getMiFlushDwWaSize()) { + itor++; + } + EXPECT_NE(cmdList.end(), itor); + auto cmd = genCmdCast(*itor); + EXPECT_EQ(cmd->getPostSyncOperation(), MI_FLUSH_DW::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA_QWORD); + EXPECT_EQ(cmd->getDestinationAddress(), commandList->csr->getBarrierCountGpuAddress()); + EXPECT_EQ(cmd->getImmediateData(), 2u); +} + +HWTEST2_F(CommandListCreate, givenImmediateCopyOnlyCmdListWhenAppendWaitOnEventsWithTrackDependenciesSetToFalseThenDoNotIncrementBarrierCountAndDispatchBarrierTagUpdate, IsAtLeastSkl) { + using MI_FLUSH_DW = typename FamilyType::MI_FLUSH_DW; + + ze_command_queue_desc_t queueDesc = {}; + ze_result_t returnValue = ZE_RESULT_SUCCESS; + std::unique_ptr commandList(CommandList::createImmediate(productFamily, device, &queueDesc, false, NEO::EngineGroupType::Copy, returnValue)); + EXPECT_EQ(commandList->csr->getNextBarrierCount(), 0u); + + ze_event_pool_desc_t eventPoolDesc = {}; + eventPoolDesc.count = 1; + + ze_event_desc_t eventDesc = {}; + eventDesc.index = 0; + eventDesc.signal = ZE_EVENT_SCOPE_FLAG_DEVICE; + + ze_result_t result = ZE_RESULT_SUCCESS; + auto eventPool = std::unique_ptr(EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, result)); + EXPECT_EQ(ZE_RESULT_SUCCESS, result); + auto event = std::unique_ptr(Event::create(eventPool.get(), &eventDesc, device)); + auto eventHandle = event->toHandle(); + + result = commandList->appendWaitOnEvents(1u, &eventHandle, false, false); + ASSERT_EQ(ZE_RESULT_SUCCESS, result); + EXPECT_EQ(commandList->csr->getNextBarrierCount(), 1u); +} + HWTEST_F(CommandListCreate, GivenCommandListWhenUnalignedPtrThenLeftMiddleAndRightCopyAdded) { using XY_COPY_BLT = typename FamilyType::XY_COPY_BLT; ze_result_t returnValue; diff --git a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_5.cpp b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_5.cpp index f601f8fbf3..ed3aa76267 100644 --- a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_5.cpp +++ b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_5.cpp @@ -610,7 +610,7 @@ HWTEST_F(CommandListCreate, givenCommandListWithCopyOnlyWhenAppendWaitEventsWith event.signalScope = 0; event.waitScope = ZE_EVENT_SCOPE_FLAG_HOST; auto eventHandle = event.toHandle(); - commandList->appendWaitOnEvents(1, &eventHandle, false); + commandList->appendWaitOnEvents(1, &eventHandle, false, true); GenCmdList cmdList; ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer( cmdList, ptrOffset(commandContainer.getCommandStream()->getCpuBase(), 0), commandContainer.getCommandStream()->getUsed())); @@ -634,7 +634,7 @@ HWTEST_F(CommandListCreate, givenCommandListyWhenAppendWaitEventsWithDcFlushThen event.signalScope = 0; event.waitScope = ZE_EVENT_SCOPE_FLAG_HOST; auto eventHandle = event.toHandle(); - commandList->appendWaitOnEvents(1, &eventHandle, false); + commandList->appendWaitOnEvents(1, &eventHandle, false, true); GenCmdList cmdList; ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer( cmdList, ptrOffset(commandContainer.getCommandStream()->getCpuBase(), 0), commandContainer.getCommandStream()->getUsed())); @@ -665,7 +665,7 @@ HWTEST_F(CommandListCreate, givenCommandListWhenAppendWaitEventsWithDcFlushThenP event2.waitScope = ZE_EVENT_SCOPE_FLAG_HOST; ze_event_handle_t events[] = {&event, &event2}; - commandList->appendWaitOnEvents(2, events, false); + commandList->appendWaitOnEvents(2, events, false, true); GenCmdList cmdList; ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer( cmdList, ptrOffset(commandContainer.getCommandStream()->getCpuBase(), 0), commandContainer.getCommandStream()->getUsed())); @@ -717,7 +717,7 @@ HWTEST_F(CommandListCreate, givenAsyncCmdQueueAndImmediateCommandListWhenAppendW ze_event_handle_t events[] = {&event, &event2}; size_t startOffset = commandContainer.getCommandStream()->getUsed(); - commandList->appendWaitOnEvents(2, events, false); + commandList->appendWaitOnEvents(2, events, false, true); size_t endOffset = commandContainer.getCommandStream()->getUsed(); size_t usedBufferSize = (endOffset - startOffset); @@ -763,7 +763,7 @@ HWTEST_F(CommandListCreate, givenAsyncCmdQueueAndImmediateCommandListWhenAppendW ze_event_handle_t events[] = {&event, &event2}; size_t startOffset = commandContainer.getCommandStream()->getUsed(); - commandList->appendWaitOnEvents(2, events, false); + commandList->appendWaitOnEvents(2, events, false, true); size_t endOffset = commandContainer.getCommandStream()->getUsed(); size_t usedBufferSize = (endOffset - startOffset); @@ -803,7 +803,7 @@ HWTEST_F(CommandListCreate, givenFlushTaskFlagEnabledAndAsyncCmdQueueAndCopyOnly ze_event_handle_t events[] = {&event, &event2}; auto used = commandContainer.getCommandStream()->getUsed(); - commandList->appendWaitOnEvents(2, events, false); + commandList->appendWaitOnEvents(2, events, false, true); GenCmdList cmdList; ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer( cmdList, ptrOffset(commandContainer.getCommandStream()->getCpuBase(), 0), commandContainer.getCommandStream()->getUsed())); @@ -834,7 +834,7 @@ HWTEST_F(CommandListCreate, givenAsyncCmdQueueAndCopyOnlyImmediateCommandListWhe ze_event_handle_t events[] = {&event, &event2}; auto used = commandContainer.getCommandStream()->getUsed(); - commandList->appendWaitOnEvents(2, events, false); + commandList->appendWaitOnEvents(2, events, false, true); GenCmdList cmdList; ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer( @@ -866,7 +866,7 @@ HWTEST_F(CommandListCreate, givenAsyncCmdQueueAndTbxCsrWithCopyOnlyImmediateComm event2.waitScope = 0; ze_event_handle_t events[] = {&event, &event2}; - auto ret = commandList->appendWaitOnEvents(2, events, false); + auto ret = commandList->appendWaitOnEvents(2, events, false, true); EXPECT_EQ(ZE_RESULT_SUCCESS, ret); } diff --git a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_7.cpp b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_7.cpp index e5c050d9c4..3048972902 100644 --- a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_7.cpp +++ b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_7.cpp @@ -2360,7 +2360,7 @@ HWTEST2_F(AppendMemoryLockedCopyTest, givenImmediateCommandListWhenAppendWaitOnE EXPECT_EQ(ZE_RESULT_SUCCESS, returnValue); auto event = std::unique_ptr(Event::create(eventPool.get(), &eventDesc, device)); auto eventHandle = event->toHandle(); - cmdList.appendWaitOnEvents(1, &eventHandle, false); + cmdList.appendWaitOnEvents(1, &eventHandle, false, true); EXPECT_TRUE(cmdList.dependenciesPresent); diff --git a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_launch_kernel_2.cpp b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_launch_kernel_2.cpp index 3171670cee..bd0c8918e4 100644 --- a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_launch_kernel_2.cpp +++ b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_launch_kernel_2.cpp @@ -636,7 +636,7 @@ HWTEST_F(CommandListAppendLaunchKernelSWTags, givenEnableSWTagsWhenAppendWaitOnE eventPool->createEvent(&eventDesc, &hEvent); - auto result = commandList->appendWaitOnEvents(1, &hEvent, false); + auto result = commandList->appendWaitOnEvents(1, &hEvent, false, true); ASSERT_EQ(ZE_RESULT_SUCCESS, result); auto usedSpaceAfter = cmdStream->getUsed(); diff --git a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_wait_on_events.cpp b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_wait_on_events.cpp index 83035454ab..d5dfb00bd5 100644 --- a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_wait_on_events.cpp +++ b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_wait_on_events.cpp @@ -31,7 +31,7 @@ HWTEST_F(CommandListAppendWaitOnEvent, WhenAppendingWaitOnEventThenSemaphoreWait using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT; auto usedSpaceBefore = commandList->commandContainer.getCommandStream()->getUsed(); ze_event_handle_t hEventHandle = event->toHandle(); - auto result = commandList->appendWaitOnEvents(1, &hEventHandle, false); + auto result = commandList->appendWaitOnEvents(1, &hEventHandle, false, true); ASSERT_EQ(ZE_RESULT_SUCCESS, result); auto usedSpaceAfter = commandList->commandContainer.getCommandStream()->getUsed(); @@ -85,7 +85,7 @@ HWTEST2_F(CommandListAppendWaitOnEvent, givenImmediateCmdListWithDirectSubmissio ultCsr->directSubmission.reset(directSubmission); ze_event_handle_t hEventHandle = event->toHandle(); - auto result = static_cast *>(immCommandList.get())->addEventsToCmdList(1, &hEventHandle, true); + auto result = static_cast *>(immCommandList.get())->addEventsToCmdList(1, &hEventHandle, true, true); ASSERT_EQ(ZE_RESULT_SUCCESS, result); auto usedSpaceAfter = immCommandList->commandContainer.getCommandStream()->getUsed(); @@ -270,7 +270,7 @@ HWTEST_F(CommandListAppendWaitOnEvent, givenTwoEventsWhenWaitOnEventsAppendedThe ze_event_handle_t handles[2] = {event->toHandle(), event->toHandle()}; - auto result = commandList->appendWaitOnEvents(2, handles, false); + auto result = commandList->appendWaitOnEvents(2, handles, false, true); ASSERT_EQ(ZE_RESULT_SUCCESS, result); auto usedSpaceAfter = commandList->commandContainer.getCommandStream()->getUsed(); @@ -301,7 +301,7 @@ HWTEST_F(CommandListAppendWaitOnEvent, givenTwoEventsWhenWaitOnEventsAppendedThe HWTEST_F(CommandListAppendWaitOnEvent, WhenAppendingWaitOnEventsThenEventGraphicsAllocationIsAddedToResidencyContainer) { ze_event_handle_t hEventHandle = event->toHandle(); - auto result = commandList->appendWaitOnEvents(1, &hEventHandle, false); + auto result = commandList->appendWaitOnEvents(1, &hEventHandle, false, true); ASSERT_EQ(ZE_RESULT_SUCCESS, result); auto &residencyContainer = commandList->commandContainer.getResidencyContainer(); @@ -328,7 +328,7 @@ HWTEST_F(CommandListAppendWaitOnEvent, givenEventWithWaitScopeFlagDeviceWhenAppe auto event = std::unique_ptr(Event::create(eventPool.get(), &eventDesc, device)); ze_event_handle_t hEventHandle = event->toHandle(); - auto result = commandList->appendWaitOnEvents(1, &hEventHandle, false); + auto result = commandList->appendWaitOnEvents(1, &hEventHandle, false, true); ASSERT_EQ(ZE_RESULT_SUCCESS, result); auto usedSpaceAfter = commandList->commandContainer.getCommandStream()->getUsed(); @@ -373,7 +373,7 @@ HWTEST_F(CommandListAppendWaitOnUsedPacketSignalEvent, WhenAppendingWaitOnTimest event->setPacketsInUse(3u); ze_event_handle_t hEventHandle = event->toHandle(); - result = commandList->appendWaitOnEvents(1, &hEventHandle, false); + result = commandList->appendWaitOnEvents(1, &hEventHandle, false, true); ASSERT_EQ(ZE_RESULT_SUCCESS, result); auto usedSpaceAfter = commandList->commandContainer.getCommandStream()->getUsed(); ASSERT_GT(usedSpaceAfter, usedSpaceBefore); @@ -436,7 +436,7 @@ HWTEST_F(CommandListAppendWaitOnUsedPacketSignalEvent, WhenAppendingWaitOnTimest ASSERT_EQ(9u, event->getPacketsInUse()); ze_event_handle_t hEventHandle = event->toHandle(); - result = commandList->appendWaitOnEvents(1, &hEventHandle, false); + result = commandList->appendWaitOnEvents(1, &hEventHandle, false, true); ASSERT_EQ(ZE_RESULT_SUCCESS, result); auto usedSpaceAfter = commandList->commandContainer.getCommandStream()->getUsed(); ASSERT_GT(usedSpaceAfter, usedSpaceBefore); @@ -542,7 +542,7 @@ HWTEST_F(CommandListAppendWaitOnEvent, givenCommandBufferIsEmptyWhenAppendingWai ze_event_handle_t hEventHandle = event->toHandle(); auto oldCommandBuffer = commandList->commandContainer.getCommandStream()->getGraphicsAllocation(); - auto result = commandList->appendWaitOnEvents(1, &hEventHandle, false); + auto result = commandList->appendWaitOnEvents(1, &hEventHandle, false, true); ASSERT_EQ(ZE_RESULT_SUCCESS, result); auto usedSpaceAfter = commandList->commandContainer.getCommandStream()->getUsed(); @@ -604,7 +604,7 @@ HWTEST2_F(MultTileCommandListAppendWaitOnEvent, ze_event_handle_t eventHandle = event->toHandle(); auto usedSpaceBefore = commandList->commandContainer.getCommandStream()->getUsed(); - auto result = commandList->appendWaitOnEvents(1, &eventHandle, false); + auto result = commandList->appendWaitOnEvents(1, &eventHandle, false, true); ASSERT_EQ(ZE_RESULT_SUCCESS, result); auto usedSpaceAfter = commandList->commandContainer.getCommandStream()->getUsed(); @@ -643,7 +643,7 @@ HWTEST2_F(CommandListAppendWaitOnEvent, givenImmediateCommandListWhenAppendWaitO ze_event_handle_t eventHandle = event->toHandle(); EXPECT_FALSE(cmdList.dependenciesPresent); - EXPECT_EQ(ZE_RESULT_SUCCESS, cmdList.appendWaitOnEvents(1, &eventHandle, false)); + EXPECT_EQ(ZE_RESULT_SUCCESS, cmdList.appendWaitOnEvents(1, &eventHandle, false, true)); EXPECT_TRUE(cmdList.dependenciesPresent); } @@ -657,7 +657,7 @@ HWTEST2_F(CommandListAppendWaitOnEvent, givenImmediateCommandListWhenAppendWaitO ze_event_handle_t eventHandle = event->toHandle(); EXPECT_FALSE(cmdList.dependenciesPresent); - EXPECT_EQ(ZE_RESULT_SUCCESS, cmdList.appendWaitOnEvents(1, &eventHandle, false)); + EXPECT_EQ(ZE_RESULT_SUCCESS, cmdList.appendWaitOnEvents(1, &eventHandle, false, true)); EXPECT_FALSE(cmdList.dependenciesPresent); } @@ -738,7 +738,7 @@ HWTEST_TEMPLATED_F(TbxImmediateCommandListTest, givenTbxModeOnFlushTaskImmediate auto &ultCsr = neoDevice->getUltCommandStreamReceiver(); auto eventHandle = event->toHandle(); - commandListImmediate->appendWaitOnEvents(1, &eventHandle, false); + commandListImmediate->appendWaitOnEvents(1, &eventHandle, false, true); EXPECT_TRUE(ultCsr.downloadAllocationsCalled); } diff --git a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_xehp_and_later.cpp b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_xehp_and_later.cpp index 628d91cdc7..1dcd9f141a 100644 --- a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_xehp_and_later.cpp +++ b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_xehp_and_later.cpp @@ -1055,7 +1055,7 @@ struct CommandListSignalAllEventPacketFixture : public ModuleFixture { size_t sizeBefore = cmdStream->getUsed(); auto eventHandle = event->toHandle(); - result = commandList->appendWaitOnEvents(1, &eventHandle, false); + result = commandList->appendWaitOnEvents(1, &eventHandle, false, true); size_t sizeAfter = cmdStream->getUsed(); EXPECT_EQ(ZE_RESULT_SUCCESS, result); diff --git a/level_zero/core/test/unit_tests/sources/debugger/test_l0_debugger_1.cpp b/level_zero/core/test/unit_tests/sources/debugger/test_l0_debugger_1.cpp index bad99c9069..28d0a69f30 100644 --- a/level_zero/core/test/unit_tests/sources/debugger/test_l0_debugger_1.cpp +++ b/level_zero/core/test/unit_tests/sources/debugger/test_l0_debugger_1.cpp @@ -271,7 +271,7 @@ HWTEST_F(L0DebuggerSimpleTest, givenUseCsrImmediateSubmissionEnabledWithImmediat ASSERT_NE(nullptr, eventObject->csr); ASSERT_EQ(static_cast(device)->getNEODevice()->getDefaultEngine().commandStreamReceiver, eventObject->csr); - returnValue = commandList->appendWaitOnEvents(1, &event, false); + returnValue = commandList->appendWaitOnEvents(1, &event, false, true); EXPECT_EQ(returnValue, ZE_RESULT_SUCCESS); returnValue = commandList->appendBarrier(nullptr, 1, &event); @@ -334,7 +334,7 @@ HWTEST_F(L0DebuggerSimpleTest, givenUseCsrImmediateSubmissionDisabledWithImmedia ASSERT_NE(nullptr, eventObject->csr); ASSERT_EQ(static_cast(device)->getNEODevice()->getDefaultEngine().commandStreamReceiver, eventObject->csr); - returnValue = commandList->appendWaitOnEvents(1, &event, false); + returnValue = commandList->appendWaitOnEvents(1, &event, false, true); EXPECT_EQ(returnValue, ZE_RESULT_SUCCESS); returnValue = commandList->appendBarrier(nullptr, 1, &event); diff --git a/level_zero/tools/source/metrics/metric_oa_query_imp.cpp b/level_zero/tools/source/metrics/metric_oa_query_imp.cpp index d39aceb178..d2251bce87 100644 --- a/level_zero/tools/source/metrics/metric_oa_query_imp.cpp +++ b/level_zero/tools/source/metrics/metric_oa_query_imp.cpp @@ -796,7 +796,7 @@ ze_result_t OaMetricQueryImp::writeMetricQuery(CommandList &commandList, ze_even commandList.commandContainer.addToResidencyContainer(pool.pAllocation); // Wait for events before executing query. - commandList.appendWaitOnEvents(numWaitEvents, phWaitEvents, false); + commandList.appendWaitOnEvents(numWaitEvents, phWaitEvents, false, true); if (metricQueriesSize) {