diff --git a/level_zero/core/source/cmdlist/cmdlist_hw.inl b/level_zero/core/source/cmdlist/cmdlist_hw.inl index 23fe09036e..701cb18622 100644 --- a/level_zero/core/source/cmdlist/cmdlist_hw.inl +++ b/level_zero/core/source/cmdlist/cmdlist_hw.inl @@ -2623,75 +2623,88 @@ void CommandListCoreFamily::appendWaitOnInOrderDependency(std::sh NEO::EncodeBatchBufferStartOrEnd::programConditionalDataMemBatchBufferStart(*commandContainer.getCommandStream(), 0, gpuAddress, waitValue, NEO::CompareOperation::less, true, isQwordInOrderCounter(), isCopyOnly(copyOffloadOperation)); } else { - using MI_SEMAPHORE_WAIT = typename GfxFamily::MI_SEMAPHORE_WAIT; - using MI_LOAD_REGISTER_IMM = typename GfxFamily::MI_LOAD_REGISTER_IMM; + auto resolveDependenciesViaPipeControls = !this->isCopyOnly(copyOffloadOperation) && !this->asMutable() && implicitDependency && this->dcFlushSupport; - bool indirectMode = false; + if (NEO::debugManager.flags.ResolveDependenciesViaPipeControls.get() != -1) { + resolveDependenciesViaPipeControls = NEO::debugManager.flags.ResolveDependenciesViaPipeControls.get(); + } - size_t inOrderPatchListIndex = std::numeric_limits::max(); - if (isQwordInOrderCounter()) { - indirectMode = true; + if (resolveDependenciesViaPipeControls) { + NEO::PipeControlArgs args; + args.csStallOnly = true; + NEO::MemorySynchronizationCommands::addSingleBarrier(*commandContainer.getCommandStream(), args); + break; + } else { + using MI_SEMAPHORE_WAIT = typename GfxFamily::MI_SEMAPHORE_WAIT; + using MI_LOAD_REGISTER_IMM = typename GfxFamily::MI_LOAD_REGISTER_IMM; - constexpr uint32_t firstRegister = RegisterOffsets::csGprR0; - constexpr uint32_t secondRegister = RegisterOffsets::csGprR0 + 4; + bool indirectMode = false; - auto lri1 = commandContainer.getCommandStream()->template getSpaceForCmd(); - auto lri2 = commandContainer.getCommandStream()->template getSpaceForCmd(); + size_t inOrderPatchListIndex = std::numeric_limits::max(); + if (isQwordInOrderCounter()) { + indirectMode = true; - if (!noopDispatch) { - NEO::LriHelper::program(lri1, firstRegister, getLowPart(waitValue), true, isCopyOnly(copyOffloadOperation)); - NEO::LriHelper::program(lri2, secondRegister, getHighPart(waitValue), true, isCopyOnly(copyOffloadOperation)); - } else { - memset(lri1, 0, sizeof(MI_LOAD_REGISTER_IMM)); - memset(lri2, 0, sizeof(MI_LOAD_REGISTER_IMM)); + constexpr uint32_t firstRegister = RegisterOffsets::csGprR0; + constexpr uint32_t secondRegister = RegisterOffsets::csGprR0 + 4; + + auto lri1 = commandContainer.getCommandStream()->template getSpaceForCmd(); + auto lri2 = commandContainer.getCommandStream()->template getSpaceForCmd(); + + if (!noopDispatch) { + NEO::LriHelper::program(lri1, firstRegister, getLowPart(waitValue), true, isCopyOnly(copyOffloadOperation)); + NEO::LriHelper::program(lri2, secondRegister, getHighPart(waitValue), true, isCopyOnly(copyOffloadOperation)); + } else { + memset(lri1, 0, sizeof(MI_LOAD_REGISTER_IMM)); + memset(lri2, 0, sizeof(MI_LOAD_REGISTER_IMM)); + } + + if (inOrderExecInfo->isRegularCmdList()) { + inOrderPatchListIndex = addCmdForPatching((implicitDependency ? nullptr : &inOrderExecInfo), lri1, lri2, waitValue, NEO::InOrderPatchCommandHelpers::PatchCmdType::lri64b); + if (noopDispatch) { + disablePatching(inOrderPatchListIndex); + } + } + if (outListCommands != nullptr) { + auto &lri1ToPatch = outListCommands->emplace_back(); + lri1ToPatch.type = CommandToPatch::CbWaitEventLoadRegisterImm; + lri1ToPatch.pDestination = lri1; + lri1ToPatch.inOrderPatchListIndex = inOrderPatchListIndex; + lri1ToPatch.offset = firstRegister; + + auto &lri2ToPatch = outListCommands->emplace_back(); + lri2ToPatch.type = CommandToPatch::CbWaitEventLoadRegisterImm; + lri2ToPatch.pDestination = lri2; + lri2ToPatch.inOrderPatchListIndex = inOrderPatchListIndex; + lri2ToPatch.offset = secondRegister; + } } - if (inOrderExecInfo->isRegularCmdList()) { - inOrderPatchListIndex = addCmdForPatching((implicitDependency ? nullptr : &inOrderExecInfo), lri1, lri2, waitValue, NEO::InOrderPatchCommandHelpers::PatchCmdType::lri64b); + auto semaphoreCommand = reinterpret_cast(commandContainer.getCommandStream()->getSpace(sizeof(MI_SEMAPHORE_WAIT))); + + if (!noopDispatch) { + NEO::EncodeSemaphore::programMiSemaphoreWait(semaphoreCommand, gpuAddress, waitValue, COMPARE_OPERATION::COMPARE_OPERATION_SAD_GREATER_THAN_OR_EQUAL_SDD, + false, true, isQwordInOrderCounter(), indirectMode, false); + } else { + memset(semaphoreCommand, 0, sizeof(MI_SEMAPHORE_WAIT)); + } + + if (inOrderExecInfo->isRegularCmdList() && !isQwordInOrderCounter()) { + inOrderPatchListIndex = addCmdForPatching((implicitDependency ? nullptr : &inOrderExecInfo), semaphoreCommand, nullptr, waitValue, NEO::InOrderPatchCommandHelpers::PatchCmdType::semaphore); if (noopDispatch) { disablePatching(inOrderPatchListIndex); } + } else { + inOrderPatchListIndex = std::numeric_limits::max(); } + if (outListCommands != nullptr) { - auto &lri1ToPatch = outListCommands->emplace_back(); - lri1ToPatch.type = CommandToPatch::CbWaitEventLoadRegisterImm; - lri1ToPatch.pDestination = lri1; - lri1ToPatch.inOrderPatchListIndex = inOrderPatchListIndex; - lri1ToPatch.offset = firstRegister; - - auto &lri2ToPatch = outListCommands->emplace_back(); - lri2ToPatch.type = CommandToPatch::CbWaitEventLoadRegisterImm; - lri2ToPatch.pDestination = lri2; - lri2ToPatch.inOrderPatchListIndex = inOrderPatchListIndex; - lri2ToPatch.offset = secondRegister; + auto &semaphoreWaitPatch = outListCommands->emplace_back(); + semaphoreWaitPatch.type = CommandToPatch::CbWaitEventSemaphoreWait; + semaphoreWaitPatch.pDestination = semaphoreCommand; + semaphoreWaitPatch.offset = i * immWriteOffset; + semaphoreWaitPatch.inOrderPatchListIndex = inOrderPatchListIndex; } } - - auto semaphoreCommand = reinterpret_cast(commandContainer.getCommandStream()->getSpace(sizeof(MI_SEMAPHORE_WAIT))); - - if (!noopDispatch) { - NEO::EncodeSemaphore::programMiSemaphoreWait(semaphoreCommand, gpuAddress, waitValue, COMPARE_OPERATION::COMPARE_OPERATION_SAD_GREATER_THAN_OR_EQUAL_SDD, - false, true, isQwordInOrderCounter(), indirectMode, false); - } else { - memset(semaphoreCommand, 0, sizeof(MI_SEMAPHORE_WAIT)); - } - - if (inOrderExecInfo->isRegularCmdList() && !isQwordInOrderCounter()) { - inOrderPatchListIndex = addCmdForPatching((implicitDependency ? nullptr : &inOrderExecInfo), semaphoreCommand, nullptr, waitValue, NEO::InOrderPatchCommandHelpers::PatchCmdType::semaphore); - if (noopDispatch) { - disablePatching(inOrderPatchListIndex); - } - } else { - inOrderPatchListIndex = std::numeric_limits::max(); - } - - if (outListCommands != nullptr) { - auto &semaphoreWaitPatch = outListCommands->emplace_back(); - semaphoreWaitPatch.type = CommandToPatch::CbWaitEventSemaphoreWait; - semaphoreWaitPatch.pDestination = semaphoreCommand; - semaphoreWaitPatch.offset = i * immWriteOffset; - semaphoreWaitPatch.inOrderPatchListIndex = inOrderPatchListIndex; - } } gpuAddress += immWriteOffset; diff --git a/level_zero/core/test/unit_tests/fixtures/in_order_cmd_list_fixture.h b/level_zero/core/test/unit_tests/fixtures/in_order_cmd_list_fixture.h index 247aae8d5f..280bce13d9 100644 --- a/level_zero/core/test/unit_tests/fixtures/in_order_cmd_list_fixture.h +++ b/level_zero/core/test/unit_tests/fixtures/in_order_cmd_list_fixture.h @@ -70,6 +70,7 @@ struct InOrderCmdListFixture : public ::Test { void SetUp() override { NEO::debugManager.flags.ForcePreemptionMode.set(static_cast(NEO::PreemptionMode::Disabled)); + NEO::debugManager.flags.ResolveDependenciesViaPipeControls.set(0u); ::Test::SetUp(); createKernel(); diff --git a/level_zero/core/test/unit_tests/sources/cmdlist/test_in_order_cmdlist_1.cpp b/level_zero/core/test/unit_tests/sources/cmdlist/test_in_order_cmdlist_1.cpp index 57961a610a..58ca0f6606 100644 --- a/level_zero/core/test/unit_tests/sources/cmdlist/test_in_order_cmdlist_1.cpp +++ b/level_zero/core/test/unit_tests/sources/cmdlist/test_in_order_cmdlist_1.cpp @@ -977,6 +977,33 @@ HWTEST2_F(InOrderCmdListTests, givenInOrderModeWhenSubmittingThenProgramSemaphor ASSERT_TRUE(verifyInOrderDependency(itor, 1, immCmdList->inOrderExecInfo->getBaseDeviceAddress() + counterOffset, immCmdList->isQwordInOrderCounter(), false)); } +HWTEST2_F(InOrderCmdListTests, givenResolveDependenciesViaPipeControlsForInOrderModeWhenSubmittingThenProgramPipeControlInBetweenDispatches, IsAtLeastXeHpCore) { + DebugManagerStateRestore restorer; + NEO::debugManager.flags.ResolveDependenciesViaPipeControls.set(1); + + uint32_t counterOffset = 64; + + auto immCmdList = createImmCmdList(); + immCmdList->inOrderExecInfo->setAllocationOffset(counterOffset); + + auto cmdStream = immCmdList->getCmdContainer().getCommandStream(); + + immCmdList->appendLaunchKernel(kernel->toHandle(), groupCount, nullptr, 0, nullptr, launchParams, false); + + auto offset = cmdStream->getUsed(); + + immCmdList->appendLaunchKernel(kernel->toHandle(), groupCount, nullptr, 0, nullptr, launchParams, false); + + GenCmdList cmdList; + ASSERT_TRUE(FamilyType::Parse::parseCommandBuffer( + cmdList, + ptrOffset(cmdStream->getCpuBase(), offset), + cmdStream->getUsed() - offset)); + + auto itor = find(cmdList.begin(), cmdList.end()); + ASSERT_NE(cmdList.end(), itor); +} + HWTEST2_F(InOrderCmdListTests, givenDependencyFromDifferentRootDeviceWhenAppendCalledThenCreatePeerAllocation, MatchAny) { NEO::UltDeviceFactory deviceFactory{2, 0};