From 9db77efd038719c1fdbbd996b35b70b008150443 Mon Sep 17 00:00:00 2001 From: Aravind Gopalakrishnan Date: Sat, 8 Feb 2025 02:44:45 +0000 Subject: [PATCH] feature: Append recorded commandlist into immediate (8/N) - Add primary dispatch capability for immediate command list - turn off usage of parent linear stream for copy engine Related-To: NEO-10356 Signed-off-by: Aravind Gopalakrishnan --- level_zero/core/source/cmdlist/cmdlist_hw.inl | 2 +- .../source/cmdlist/cmdlist_hw_immediate.h | 2 +- .../source/cmdlist/cmdlist_hw_immediate.inl | 73 ++++++--- level_zero/core/source/cmdqueue/cmdqueue.cpp | 2 +- .../core/source/cmdqueue/cmdqueue_hw.inl | 18 +- .../sources/cmdlist/test_cmdlist_2.cpp | 60 ++++++- .../sources/cmdlist/test_cmdlist_5.cpp | 155 ++++++++++++++++-- .../sources/cmdlist/test_cmdlist_7.cpp | 8 +- .../test_cmdlist_append_launch_kernel_3.cpp | 2 +- .../test_cmdlist_append_signal_event.cpp | 96 +++++++++-- .../test_cmdlist_append_wait_on_events.cpp | 8 +- .../sources/cmdqueue/test_cmdqueue_1.cpp | 42 ----- .../source/command_container/cmdcontainer.h | 4 +- .../debug_settings/debug_variables_base.inl | 2 + shared/test/common/test_files/igdrcl.config | 2 + .../command_container_tests.cpp | 35 ++++ 16 files changed, 401 insertions(+), 110 deletions(-) diff --git a/level_zero/core/source/cmdlist/cmdlist_hw.inl b/level_zero/core/source/cmdlist/cmdlist_hw.inl index 632bcdf5a8..323b7aec18 100644 --- a/level_zero/core/source/cmdlist/cmdlist_hw.inl +++ b/level_zero/core/source/cmdlist/cmdlist_hw.inl @@ -247,7 +247,7 @@ ze_result_t CommandListCoreFamily::initialize(Device *device, NEO this->l1CachePolicyData.init(productHelper); this->cmdListHeapAddressModel = L0GfxCoreHelper::getHeapAddressModel(rootDeviceEnvironment); this->dummyBlitWa.rootDeviceEnvironment = &(neoDevice->getRootDeviceEnvironmentRef()); - this->dispatchCmdListBatchBufferAsPrimary = L0GfxCoreHelper::dispatchCmdListBatchBufferAsPrimary(rootDeviceEnvironment, !isImmediateType()); + this->dispatchCmdListBatchBufferAsPrimary = L0GfxCoreHelper::dispatchCmdListBatchBufferAsPrimary(rootDeviceEnvironment, !(isImmediateType() && this->internalUsage)); this->useOnlyGlobalTimestamps = gfxCoreHelper.useOnlyGlobalTimestamps(); this->maxFillPaternSizeForCopyEngine = productHelper.getMaxFillPaternSizeForCopyEngine(); this->heaplessModeEnabled = compilerProductHelper.isHeaplessModeEnabled(); diff --git a/level_zero/core/source/cmdlist/cmdlist_hw_immediate.h b/level_zero/core/source/cmdlist/cmdlist_hw_immediate.h index cc417a69d6..e4b6c67639 100644 --- a/level_zero/core/source/cmdlist/cmdlist_hw_immediate.h +++ b/level_zero/core/source/cmdlist/cmdlist_hw_immediate.h @@ -200,7 +200,7 @@ struct CommandListCoreFamilyImmediate : public CommandListCoreFamily::CommandListCoreFamilyImmediate(ui } template -void CommandListCoreFamilyImmediate::checkAvailableSpace(uint32_t numEvents, bool hasRelaxedOrderingDependencies, size_t commandSize) { +void CommandListCoreFamilyImmediate::checkAvailableSpace(uint32_t numEvents, bool hasRelaxedOrderingDependencies, size_t commandSize, bool requestCommandBufferInLocalMem) { this->commandContainer.fillReusableAllocationLists(); - /* Command container might has two command buffers. If it has, one is in local memory, because relaxed ordering requires that and one in system for copying it into ring buffer. - If relaxed ordering is needed in given dispatch and current command stream is in system memory, swap of command streams is required to ensure local memory. Same in the opposite scenario. */ - if (hasRelaxedOrderingDependencies == NEO::MemoryPoolHelper::isSystemMemoryPool(this->commandContainer.getCommandStream()->getGraphicsAllocation()->getMemoryPool())) { + // Command container might have two command buffers - one in local mem (mainly for relaxed ordering and any other specific purposes) and one in system mem for copying into ring buffer. + // If relaxed ordering is needed in given dispatch or if we need to force Local mem usage, and current command stream is in system memory, swap of command streams is required to ensure local memory. + // If relaxed ordering is not needed and command buffer is in local mem, then also we need to swap. + bool swapStreams = false; + if (hasRelaxedOrderingDependencies) { + if (NEO::MemoryPoolHelper::isSystemMemoryPool(this->commandContainer.getCommandStream()->getGraphicsAllocation()->getMemoryPool())) { + swapStreams = true; + } + } else { + if (requestCommandBufferInLocalMem && NEO::MemoryPoolHelper::isSystemMemoryPool(this->commandContainer.getCommandStream()->getGraphicsAllocation()->getMemoryPool())) { + swapStreams = true; + } else if (!requestCommandBufferInLocalMem && !NEO::MemoryPoolHelper::isSystemMemoryPool(this->commandContainer.getCommandStream()->getGraphicsAllocation()->getMemoryPool())) { + swapStreams = true; + } + } + + if (swapStreams) { if (this->commandContainer.swapStreams()) { this->cmdListCurrentStartOffset = this->commandContainer.getCommandStream()->getUsed(); } @@ -515,7 +529,7 @@ ze_result_t CommandListCoreFamilyImmediate::appendLaunchKernel( relaxedOrderingDispatch = isRelaxedOrderingDispatchAllowed(numWaitEvents, false); bool stallingCmdsForRelaxedOrdering = hasStallingCmdsForRelaxedOrdering(numWaitEvents, relaxedOrderingDispatch); - checkAvailableSpace(numWaitEvents, relaxedOrderingDispatch, commonImmediateCommandSize); + checkAvailableSpace(numWaitEvents, relaxedOrderingDispatch, commonImmediateCommandSize, false); bool hostWait = waitForEventsFromHost(); if (hostWait) { this->synchronizeEventList(numWaitEvents, phWaitEvents); @@ -572,7 +586,7 @@ ze_result_t CommandListCoreFamilyImmediate::appendLaunchKernelInd ze_event_handle_t hSignalEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents, bool relaxedOrderingDispatch) { relaxedOrderingDispatch = isRelaxedOrderingDispatchAllowed(numWaitEvents, false); - checkAvailableSpace(numWaitEvents, relaxedOrderingDispatch, commonImmediateCommandSize); + checkAvailableSpace(numWaitEvents, relaxedOrderingDispatch, commonImmediateCommandSize, false); auto ret = CommandListCoreFamily::appendLaunchKernelIndirect(kernelHandle, pDispatchArgumentsBuffer, hSignalEvent, numWaitEvents, phWaitEvents, relaxedOrderingDispatch); @@ -599,7 +613,7 @@ ze_result_t CommandListCoreFamilyImmediate::appendBarrier(ze_even isStallingOperation = hasStallingCmdsForRelaxedOrdering(numWaitEvents, relaxedOrderingDispatch); } - checkAvailableSpace(numWaitEvents, false, commonImmediateCommandSize); + checkAvailableSpace(numWaitEvents, false, commonImmediateCommandSize, false); ret = CommandListCoreFamily::appendBarrier(hSignalEvent, numWaitEvents, phWaitEvents, relaxedOrderingDispatch); @@ -624,7 +638,7 @@ ze_result_t CommandListCoreFamilyImmediate::appendMemoryCopy( auto sizePerBlit = sizeof(typename GfxFamily::XY_COPY_BLT) + NEO::BlitCommandsHelper::estimatePostBlitCommandSize(); estimatedSize += nBlits * sizePerBlit; } - checkAvailableSpace(numWaitEvents, memoryCopyParams.relaxedOrderingDispatch, estimatedSize); + checkAvailableSpace(numWaitEvents, memoryCopyParams.relaxedOrderingDispatch, estimatedSize, false); bool hasStallindCmds = hasStallingCmdsForRelaxedOrdering(numWaitEvents, memoryCopyParams.relaxedOrderingDispatch); @@ -680,7 +694,7 @@ ze_result_t CommandListCoreFamilyImmediate::appendMemoryCopyRegio auto sizePerBlit = sizeof(typename GfxFamily::XY_COPY_BLT) + NEO::BlitCommandsHelper::estimatePostBlitCommandSize(); estimatedSize += xBlits * yBlits * zBlits * sizePerBlit; } - checkAvailableSpace(numWaitEvents, memoryCopyParams.relaxedOrderingDispatch, estimatedSize); + checkAvailableSpace(numWaitEvents, memoryCopyParams.relaxedOrderingDispatch, estimatedSize, false); bool hasStallindCmds = hasStallingCmdsForRelaxedOrdering(numWaitEvents, memoryCopyParams.relaxedOrderingDispatch); @@ -723,7 +737,7 @@ ze_result_t CommandListCoreFamilyImmediate::appendMemoryFill(void ze_event_handle_t *phWaitEvents, bool relaxedOrderingDispatch) { relaxedOrderingDispatch = isRelaxedOrderingDispatchAllowed(numWaitEvents, false); - checkAvailableSpace(numWaitEvents, relaxedOrderingDispatch, commonImmediateCommandSize); + checkAvailableSpace(numWaitEvents, relaxedOrderingDispatch, commonImmediateCommandSize, false); auto ret = CommandListCoreFamily::appendMemoryFill(ptr, pattern, patternSize, size, hSignalEvent, numWaitEvents, phWaitEvents, relaxedOrderingDispatch); @@ -737,7 +751,7 @@ ze_result_t CommandListCoreFamilyImmediate::appendSignalEvent(ze_ relaxedOrderingDispatch = isRelaxedOrderingDispatchAllowed(0, false); bool hasStallingCmds = !Event::fromHandle(hSignalEvent)->isCounterBased() || hasStallingCmdsForRelaxedOrdering(0, relaxedOrderingDispatch); - checkAvailableSpace(0, false, commonImmediateCommandSize); + checkAvailableSpace(0, false, commonImmediateCommandSize, false); ret = CommandListCoreFamily::appendSignalEvent(hSignalEvent, relaxedOrderingDispatch); return flushImmediate(ret, true, hasStallingCmds, relaxedOrderingDispatch, false, false, hSignalEvent, false); } @@ -746,7 +760,7 @@ template ze_result_t CommandListCoreFamilyImmediate::appendEventReset(ze_event_handle_t hSignalEvent) { ze_result_t ret = ZE_RESULT_SUCCESS; - checkAvailableSpace(0, false, commonImmediateCommandSize); + checkAvailableSpace(0, false, commonImmediateCommandSize, false); ret = CommandListCoreFamily::appendEventReset(hSignalEvent); return flushImmediate(ret, true, true, false, false, false, hSignalEvent, false); } @@ -756,7 +770,7 @@ ze_result_t CommandListCoreFamilyImmediate::appendPageFaultCopy(N NEO::GraphicsAllocation *srcAllocation, size_t size, bool flushHost) { - checkAvailableSpace(0, false, commonImmediateCommandSize); + checkAvailableSpace(0, false, commonImmediateCommandSize, false); ze_result_t ret; @@ -793,7 +807,7 @@ ze_result_t CommandListCoreFamilyImmediate::appendWaitOnEvents(ui } if (!skipFlush) { - checkAvailableSpace(numEvents, false, commonImmediateCommandSize); + checkAvailableSpace(numEvents, false, commonImmediateCommandSize, false); } auto ret = CommandListCoreFamily::appendWaitOnEvents(numEvents, phWaitEvents, outWaitCmds, relaxedOrderingAllowed, trackDependencies, apiRequest, skipAddingWaitEventsToResidency, false, copyOffloadOperation); @@ -811,7 +825,7 @@ ze_result_t CommandListCoreFamilyImmediate::appendWriteGlobalTime uint64_t *dstptr, ze_event_handle_t hSignalEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) { - checkAvailableSpace(numWaitEvents, false, commonImmediateCommandSize); + checkAvailableSpace(numWaitEvents, false, commonImmediateCommandSize, false); auto ret = CommandListCoreFamily::appendWriteGlobalTimestamp(dstptr, hSignalEvent, numWaitEvents, phWaitEvents); @@ -855,7 +869,7 @@ ze_result_t CommandListCoreFamilyImmediate::appendImageCopyRegion auto sizePerBlit = sizeof(typename GfxFamily::XY_BLOCK_COPY_BLT) + NEO::BlitCommandsHelper::estimatePostBlitCommandSize(); estimatedSize += nBlits * sizePerBlit; } - checkAvailableSpace(numWaitEvents, relaxedOrderingDispatch, estimatedSize); + checkAvailableSpace(numWaitEvents, relaxedOrderingDispatch, estimatedSize, false); auto ret = CommandListCoreFamily::appendImageCopyRegion(hDstImage, hSrcImage, pDstRegion, pSrcRegion, hSignalEvent, numWaitEvents, phWaitEvents, relaxedOrderingDispatch); @@ -873,7 +887,7 @@ ze_result_t CommandListCoreFamilyImmediate::appendImageCopyFromMe ze_event_handle_t *phWaitEvents, bool relaxedOrderingDispatch) { relaxedOrderingDispatch = isRelaxedOrderingDispatchAllowed(numWaitEvents, false); - checkAvailableSpace(numWaitEvents, relaxedOrderingDispatch, commonImmediateCommandSize); + checkAvailableSpace(numWaitEvents, relaxedOrderingDispatch, commonImmediateCommandSize, false); auto ret = CommandListCoreFamily::appendImageCopyFromMemory(hDstImage, srcPtr, pDstRegion, hSignalEvent, numWaitEvents, phWaitEvents, relaxedOrderingDispatch); @@ -891,7 +905,7 @@ ze_result_t CommandListCoreFamilyImmediate::appendImageCopyToMemo ze_event_handle_t *phWaitEvents, bool relaxedOrderingDispatch) { relaxedOrderingDispatch = isRelaxedOrderingDispatchAllowed(numWaitEvents, false); - checkAvailableSpace(numWaitEvents, relaxedOrderingDispatch, commonImmediateCommandSize); + checkAvailableSpace(numWaitEvents, relaxedOrderingDispatch, commonImmediateCommandSize, false); auto ret = CommandListCoreFamily::appendImageCopyToMemory(dstPtr, hSrcImage, pSrcRegion, hSignalEvent, numWaitEvents, phWaitEvents, relaxedOrderingDispatch); @@ -911,7 +925,7 @@ ze_result_t CommandListCoreFamilyImmediate::appendImageCopyFromMe ze_event_handle_t *phWaitEvents, bool relaxedOrderingDispatch) { relaxedOrderingDispatch = isRelaxedOrderingDispatchAllowed(numWaitEvents, false); - checkAvailableSpace(numWaitEvents, relaxedOrderingDispatch, commonImmediateCommandSize); + checkAvailableSpace(numWaitEvents, relaxedOrderingDispatch, commonImmediateCommandSize, false); auto ret = CommandListCoreFamily::appendImageCopyFromMemoryExt(hDstImage, srcPtr, pDstRegion, srcRowPitch, srcSlicePitch, hSignalEvent, numWaitEvents, phWaitEvents, relaxedOrderingDispatch); @@ -931,7 +945,7 @@ ze_result_t CommandListCoreFamilyImmediate::appendImageCopyToMemo ze_event_handle_t *phWaitEvents, bool relaxedOrderingDispatch) { relaxedOrderingDispatch = isRelaxedOrderingDispatchAllowed(numWaitEvents, false); - checkAvailableSpace(numWaitEvents, relaxedOrderingDispatch, commonImmediateCommandSize); + checkAvailableSpace(numWaitEvents, relaxedOrderingDispatch, commonImmediateCommandSize, false); auto ret = CommandListCoreFamily::appendImageCopyToMemoryExt(dstPtr, hSrcImage, pSrcRegion, destRowPitch, destSlicePitch, hSignalEvent, numWaitEvents, phWaitEvents, relaxedOrderingDispatch); @@ -946,7 +960,7 @@ ze_result_t CommandListCoreFamilyImmediate::appendMemoryRangesBar ze_event_handle_t hSignalEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) { - checkAvailableSpace(numWaitEvents, false, commonImmediateCommandSize); + checkAvailableSpace(numWaitEvents, false, commonImmediateCommandSize, false); auto ret = CommandListCoreFamily::appendMemoryRangesBarrier(numRanges, pRangeSizes, pRanges, hSignalEvent, numWaitEvents, phWaitEvents); return flushImmediate(ret, true, true, false, false, false, hSignalEvent, false); @@ -954,14 +968,14 @@ ze_result_t CommandListCoreFamilyImmediate::appendMemoryRangesBar template ze_result_t CommandListCoreFamilyImmediate::appendWaitOnMemory(void *desc, void *ptr, uint64_t data, ze_event_handle_t signalEventHandle, bool useQwordData) { - checkAvailableSpace(0, false, commonImmediateCommandSize); + checkAvailableSpace(0, false, commonImmediateCommandSize, false); auto ret = CommandListCoreFamily::appendWaitOnMemory(desc, ptr, data, signalEventHandle, useQwordData); return flushImmediate(ret, true, false, false, false, false, signalEventHandle, false); } template ze_result_t CommandListCoreFamilyImmediate::appendWriteToMemory(void *desc, void *ptr, uint64_t data) { - checkAvailableSpace(0, false, commonImmediateCommandSize); + checkAvailableSpace(0, false, commonImmediateCommandSize, false); auto ret = CommandListCoreFamily::appendWriteToMemory(desc, ptr, data); return flushImmediate(ret, true, false, false, false, false, nullptr, false); } @@ -971,7 +985,7 @@ ze_result_t CommandListCoreFamilyImmediate::appendWaitExternalSem const ze_intel_external_semaphore_wait_params_exp_t *params, ze_event_handle_t hSignalEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) { - checkAvailableSpace(0, false, commonImmediateCommandSize); + checkAvailableSpace(0, false, commonImmediateCommandSize, false); auto ret = ZE_RESULT_SUCCESS; if (numWaitEvents) { @@ -1016,7 +1030,7 @@ ze_result_t CommandListCoreFamilyImmediate::appendSignalExternalS const ze_intel_external_semaphore_signal_params_exp_t *params, ze_event_handle_t hSignalEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) { - checkAvailableSpace(0, false, commonImmediateCommandSize); + checkAvailableSpace(0, false, commonImmediateCommandSize, false); auto ret = ZE_RESULT_SUCCESS; if (numWaitEvents) { @@ -1663,7 +1677,10 @@ ze_result_t CommandListCoreFamilyImmediate::appendCommandLists(ui ze_event_handle_t hSignalEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) { auto ret = ZE_RESULT_SUCCESS; - checkAvailableSpace(numWaitEvents, false, commonImmediateCommandSize); + + // For API functionality, we require command buffer alloc in local mem for. + // So ensure we force it when checking available space and when allocating any new comand buffer allocs + checkAvailableSpace(numWaitEvents, false, commonImmediateCommandSize, true); if (numWaitEvents) { ret = this->appendWaitOnEvents(numWaitEvents, phWaitEvents, nullptr, false, true, true, true, true, false); } @@ -1688,7 +1705,9 @@ ze_result_t CommandListCoreFamilyImmediate::appendCommandLists(ui } bool hasStallingCmds = true; - return flushImmediate(ret, true, hasStallingCmds, relaxedOrderingDispatch, true, false, hSignalEvent, true); + ret = flushImmediate(ret, true, hasStallingCmds, relaxedOrderingDispatch, true, false, hSignalEvent, true); + + return ret; } } // namespace L0 diff --git a/level_zero/core/source/cmdqueue/cmdqueue.cpp b/level_zero/core/source/cmdqueue/cmdqueue.cpp index d1594e83ee..ffc7ad1ad6 100644 --- a/level_zero/core/source/cmdqueue/cmdqueue.cpp +++ b/level_zero/core/source/cmdqueue/cmdqueue.cpp @@ -98,7 +98,7 @@ ze_result_t CommandQueueImp::initialize(bool copyOnly, bool isInternal, bool imm auto &productHelper = rootDeviceEnvironment.getHelper(); this->doubleSbaWa = productHelper.isAdditionalStateBaseAddressWARequired(hwInfo); this->cmdListHeapAddressModel = L0GfxCoreHelper::getHeapAddressModel(rootDeviceEnvironment); - this->dispatchCmdListBatchBufferAsPrimary = L0GfxCoreHelper::dispatchCmdListBatchBufferAsPrimary(rootDeviceEnvironment, !immediateCmdListQueue); + this->dispatchCmdListBatchBufferAsPrimary = L0GfxCoreHelper::dispatchCmdListBatchBufferAsPrimary(rootDeviceEnvironment, !(immediateCmdListQueue && internalUsage)); auto &compilerProductHelper = rootDeviceEnvironment.getHelper(); this->heaplessModeEnabled = compilerProductHelper.isHeaplessModeEnabled(); this->heaplessStateInitEnabled = compilerProductHelper.isHeaplessStateInitEnabled(this->heaplessModeEnabled); diff --git a/level_zero/core/source/cmdqueue/cmdqueue_hw.inl b/level_zero/core/source/cmdqueue/cmdqueue_hw.inl index 79f17127cf..3011565b0a 100644 --- a/level_zero/core/source/cmdqueue/cmdqueue_hw.inl +++ b/level_zero/core/source/cmdqueue/cmdqueue_hw.inl @@ -113,11 +113,23 @@ ze_result_t CommandQueueHw::executeCommandLists( this->startingCmdBuffer = &this->commandStream; + NEO::LinearStream *parentStream = nullptr; if (this->isCopyOnlyCommandQueue) { - ret = this->executeCommandListsCopyOnly(ctx, numCommandLists, phCommandLists, hFence, parentImmediateCommandlistLinearStream); + if (NEO::debugManager.flags.ForceParentCommandStreamUsageForImmediateAppendForCopyEngine.get() == 1) { + parentStream = parentImmediateCommandlistLinearStream; + } + ret = this->executeCommandListsCopyOnly(ctx, numCommandLists, phCommandLists, hFence, parentStream); } else if (this->heaplessStateInitEnabled) { + parentStream = parentImmediateCommandlistLinearStream; + if (NEO::debugManager.flags.ForceParentCommandStreamUsageForImmediateAppendForComputeEngine.get() == 0) { + parentStream = nullptr; + } ret = this->executeCommandListsRegularHeapless(ctx, numCommandLists, phCommandLists, hFence, parentImmediateCommandlistLinearStream); } else { + parentStream = parentImmediateCommandlistLinearStream; + if (NEO::debugManager.flags.ForceParentCommandStreamUsageForImmediateAppendForComputeEngine.get() == 0) { + parentStream = nullptr; + } ret = this->executeCommandListsRegular(ctx, numCommandLists, phCommandLists, hFence, parentImmediateCommandlistLinearStream); } @@ -1287,10 +1299,6 @@ void CommandQueueHw::programOneCmdListBatchBufferStartSecondaryBa } } } - - if (ctx.containsParentImmediateStream) { - NEO::EncodeBatchBufferStartOrEnd::programBatchBufferEnd(commandContainer); - } } template diff --git a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_2.cpp b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_2.cpp index ecb1dc0403..37591a0dbe 100644 --- a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_2.cpp +++ b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_2.cpp @@ -1733,10 +1733,10 @@ HWTEST_F(PrimaryBatchBufferCmdListTest, givenForcedPrimaryBatchBufferWhenRegular EXPECT_TRUE(commandList->dispatchCmdListBatchBufferAsPrimary); EXPECT_TRUE(commandQueue->dispatchCmdListBatchBufferAsPrimary); - EXPECT_FALSE(commandListImmediate->dispatchCmdListBatchBufferAsPrimary); + EXPECT_TRUE(commandListImmediate->dispatchCmdListBatchBufferAsPrimary); ASSERT_NE(nullptr, commandListImmediate->cmdQImmediate); auto immediateCmdQueue = static_cast(commandListImmediate->cmdQImmediate); - EXPECT_FALSE(immediateCmdQueue->dispatchCmdListBatchBufferAsPrimary); + EXPECT_TRUE(immediateCmdQueue->dispatchCmdListBatchBufferAsPrimary); } HWTEST_F(PrimaryBatchBufferCmdListTest, givenPrimaryBatchBufferWhenAppendingKernelAndClosingCommandListThenExpectAlignedSpaceForBatchBufferStart) { @@ -1885,6 +1885,62 @@ HWTEST2_F(PrimaryBatchBufferCmdListTest, givenRelaxedOrderingAndRegularCmdListAn } } +HWTEST2_F(PrimaryBatchBufferCmdListTest, givenRegularCmdListAndSubmittedToImmediateWhenFlushingOnCcsWithoutParentStreamThenExecutePasses, IsAtLeastXeHpcCore) { + DebugManagerStateRestore restore; + debugManager.flags.ForceParentCommandStreamUsageForImmediateAppendForComputeEngine.set(0); + + ze_command_queue_desc_t desc = {}; + desc.mode = ZE_COMMAND_QUEUE_MODE_ASYNCHRONOUS; + ze_result_t returnValue = ZE_RESULT_ERROR_UNINITIALIZED; + auto immCommandList = zeUniquePtr(CommandList::createImmediate(productFamily, device, &desc, false, NEO::EngineGroupType::compute, returnValue)); + ASSERT_NE(nullptr, immCommandList); + + ze_group_count_t groupCount{1, 1, 1}; + CmdListKernelLaunchParams launchParams = {}; + EXPECT_EQ(ZE_RESULT_SUCCESS, commandList->appendLaunchKernel(kernel->toHandle(), groupCount, nullptr, 0, nullptr, launchParams, false)); + + EXPECT_EQ(ZE_RESULT_SUCCESS, commandList->close()); + + auto cmdListHandle = commandList->toHandle(); + EXPECT_EQ(ZE_RESULT_SUCCESS, immCommandList->appendCommandLists(1, &cmdListHandle, nullptr, 0, nullptr)); +} + +HWTEST2_F(PrimaryBatchBufferCmdListTest, givenRegularCmdListAndSubmittedToImmediateWhenFlushingOnBcsWithoutParentStreamThenExecutePasses, IsAtLeastXeHpcCore) { + DebugManagerStateRestore restore; + debugManager.flags.ForceParentCommandStreamUsageForImmediateAppendForCopyEngine.set(0); + + ze_command_queue_desc_t desc = {}; + desc.mode = ZE_COMMAND_QUEUE_MODE_ASYNCHRONOUS; + ze_result_t returnValue = ZE_RESULT_ERROR_UNINITIALIZED; + auto immCommandList = zeUniquePtr(CommandList::createImmediate(productFamily, device, &desc, false, NEO::EngineGroupType::copy, returnValue)); + ASSERT_NE(nullptr, immCommandList); + + EXPECT_EQ(ZE_RESULT_SUCCESS, commandList->appendBarrier(nullptr, 0, nullptr, false)); + + EXPECT_EQ(ZE_RESULT_SUCCESS, commandList->close()); + + auto cmdListHandle = commandList->toHandle(); + EXPECT_EQ(ZE_RESULT_SUCCESS, immCommandList->appendCommandLists(1, &cmdListHandle, nullptr, 0, nullptr)); +} + +HWTEST2_F(PrimaryBatchBufferCmdListTest, givenRegularCmdListAndSubmittedToImmediateWhenFlushingOnBcsWithParentStreamThenExecutePasses, IsAtLeastXeHpcCore) { + DebugManagerStateRestore restore; + debugManager.flags.ForceParentCommandStreamUsageForImmediateAppendForCopyEngine.set(1); + + ze_command_queue_desc_t desc = {}; + desc.mode = ZE_COMMAND_QUEUE_MODE_ASYNCHRONOUS; + ze_result_t returnValue = ZE_RESULT_ERROR_UNINITIALIZED; + auto immCommandList = zeUniquePtr(CommandList::createImmediate(productFamily, device, &desc, false, NEO::EngineGroupType::copy, returnValue)); + ASSERT_NE(nullptr, immCommandList); + + EXPECT_EQ(ZE_RESULT_SUCCESS, commandList->appendBarrier(nullptr, 0, nullptr, false)); + + EXPECT_EQ(ZE_RESULT_SUCCESS, commandList->close()); + + auto cmdListHandle = commandList->toHandle(); + EXPECT_EQ(ZE_RESULT_SUCCESS, immCommandList->appendCommandLists(1, &cmdListHandle, nullptr, 0, nullptr)); +} + HWTEST_F(PrimaryBatchBufferCmdListTest, givenCmdListWhenCallingSynchronizeThenUnregisterCsrClient) { ze_group_count_t groupCount{1, 1, 1}; CmdListKernelLaunchParams launchParams = {}; diff --git a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_5.cpp b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_5.cpp index d27b36fd22..d39edf0475 100644 --- a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_5.cpp +++ b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_5.cpp @@ -919,7 +919,8 @@ HWTEST2_F(CommandListCreate, givenSecondaryCommandStreamForImmediateCmdListWhenC CommandStreamReceiver *csr = nullptr; device->getCsrForOrdinalAndIndex(&csr, desc.ordinal, desc.index, ZE_COMMAND_QUEUE_PRIORITY_NORMAL, false); reinterpret_cast *>(csr)->directSubmissionAvailable = true; - std::unique_ptr commandList(CommandList::createImmediate(productFamily, device, &desc, false, NEO::EngineGroupType::copy, returnValue)); + std::unique_ptr commandList(CommandList::createImmediate(productFamily, device, &desc, false, NEO::EngineGroupType::compute, returnValue)); + EXPECT_EQ(ZE_RESULT_SUCCESS, returnValue); ASSERT_NE(nullptr, commandList); EXPECT_NE(reinterpret_cast(&commandList->getCmdContainer())->secondaryCommandStreamForImmediateCmdList.get(), nullptr); EXPECT_TRUE(MemoryPoolHelper::isSystemMemoryPool(reinterpret_cast(&commandList->getCmdContainer())->secondaryCommandStreamForImmediateCmdList->getGraphicsAllocation()->getMemoryPool())); @@ -927,7 +928,7 @@ HWTEST2_F(CommandListCreate, givenSecondaryCommandStreamForImmediateCmdListWhenC auto immediateCmdList = static_cast *>(commandList.get()); auto secondaryCmdStream = reinterpret_cast(&commandList->getCmdContainer())->secondaryCommandStreamForImmediateCmdList.get(); - immediateCmdList->checkAvailableSpace(0u, false, commonImmediateCommandSize); + immediateCmdList->checkAvailableSpace(0u, false, commonImmediateCommandSize, false); EXPECT_EQ(commandList->getCmdContainer().getCommandStream(), secondaryCmdStream); EXPECT_TRUE(MemoryPoolHelper::isSystemMemoryPool(commandList->getCmdContainer().getCommandStream()->getGraphicsAllocation()->getMemoryPool())); @@ -938,6 +939,147 @@ HWTEST2_F(CommandListCreate, givenSecondaryCommandStreamForImmediateCmdListWhenC EXPECT_TRUE(MemoryPoolHelper::isSystemMemoryPool(reinterpret_cast(&commandList->getCmdContainer())->secondaryCommandStreamForImmediateCmdList->getGraphicsAllocation()->getMemoryPool())); } +HWTEST2_F(CommandListCreate, givenSecondaryCommandStreamInHostMemForImmediateCmdListWhenCheckAvailableSpaceForRelaxedOrderingThenSwapCommandStreams, IsAtMostXe2HpgCore) { + auto &gfxCoreHelper = device->getGfxCoreHelper(); + auto &productHelper = device->getProductHelper(); + if (!gfxCoreHelper.isPlatformFlushTaskEnabled(productHelper)) { + GTEST_SKIP(); + } + DebugManagerStateRestore restorer; + debugManager.flags.DirectSubmissionFlatRingBuffer.set(-1); + debugManager.flags.DirectSubmissionRelaxedOrdering.set(1); + + static_cast(device->getNEODevice()->getMemoryManager())->localMemorySupported[0] = true; + ze_command_queue_desc_t desc = {}; + desc.mode = ZE_COMMAND_QUEUE_MODE_ASYNCHRONOUS; + ze_result_t returnValue; + CommandStreamReceiver *csr = nullptr; + device->getCsrForOrdinalAndIndex(&csr, desc.ordinal, desc.index, ZE_COMMAND_QUEUE_PRIORITY_NORMAL, false); + reinterpret_cast *>(csr)->directSubmissionAvailable = true; + std::unique_ptr commandList(CommandList::createImmediate(productFamily, device, &desc, false, NEO::EngineGroupType::compute, returnValue)); + EXPECT_EQ(ZE_RESULT_SUCCESS, returnValue); + ASSERT_NE(nullptr, commandList); + EXPECT_NE(reinterpret_cast(&commandList->getCmdContainer())->secondaryCommandStreamForImmediateCmdList.get(), nullptr); + EXPECT_TRUE(MemoryPoolHelper::isSystemMemoryPool(reinterpret_cast(&commandList->getCmdContainer())->secondaryCommandStreamForImmediateCmdList->getGraphicsAllocation()->getMemoryPool())); + + auto immediateCmdList = static_cast *>(commandList.get()); + + auto secondaryCmdStream = reinterpret_cast(&commandList->getCmdContainer())->secondaryCommandStreamForImmediateCmdList.get(); + + immediateCmdList->getCmdContainer().swapStreams(); + EXPECT_EQ(commandList->getCmdContainer().getCommandStream(), secondaryCmdStream); + EXPECT_TRUE(MemoryPoolHelper::isSystemMemoryPool(commandList->getCmdContainer().getCommandStream()->getGraphicsAllocation()->getMemoryPool())); + + immediateCmdList->checkAvailableSpace(0u, true, commonImmediateCommandSize, false); + + EXPECT_NE(commandList->getCmdContainer().getCommandStream(), secondaryCmdStream); +} + +struct CmdContainerMockLocalAllocTests : public CommandContainer { + using CommandContainer::secondaryCommandStreamForImmediateCmdList; +}; + +HWTEST2_F(CommandListCreate, givenSecondaryCommandStreamForImmediateCmdListButNotYetUsingHostAllocWhenCallingAppendCmdlistsThenDoNotSwapCommandStreams, IsPVC) { + DebugManagerStateRestore restorer; + debugManager.flags.DirectSubmissionFlatRingBuffer.set(-1); + debugManager.flags.DirectSubmissionRelaxedOrdering.set(1); + + static_cast(device->getNEODevice()->getMemoryManager())->localMemorySupported[0] = true; + ze_command_queue_desc_t desc = {}; + desc.mode = ZE_COMMAND_QUEUE_MODE_ASYNCHRONOUS; + ze_result_t returnValue; + CommandStreamReceiver *csr = nullptr; + device->getCsrForOrdinalAndIndex(&csr, desc.ordinal, desc.index, ZE_COMMAND_QUEUE_PRIORITY_NORMAL, false); + reinterpret_cast *>(csr)->directSubmissionAvailable = true; + std::unique_ptr commandList(CommandList::createImmediate(productFamily, device, &desc, false, NEO::EngineGroupType::copy, returnValue)); + ASSERT_NE(nullptr, commandList); + EXPECT_NE(reinterpret_cast(&commandList->getCmdContainer())->secondaryCommandStreamForImmediateCmdList.get(), nullptr); + EXPECT_TRUE(MemoryPoolHelper::isSystemMemoryPool(reinterpret_cast(&commandList->getCmdContainer())->secondaryCommandStreamForImmediateCmdList->getGraphicsAllocation()->getMemoryPool())); + + auto secondaryCmdStream = reinterpret_cast(&commandList->getCmdContainer())->secondaryCommandStreamForImmediateCmdList.get(); + + std::unique_ptr commandListRegular(CommandList::create(productFamily, device, NEO::EngineGroupType::copy, 0u, returnValue, false)); + commandListRegular->close(); + auto commandListHandle = commandListRegular->toHandle(); + + ze_result_t result = ZE_RESULT_SUCCESS; + result = commandList->appendCommandLists(1u, &commandListHandle, nullptr, 0u, nullptr); + EXPECT_EQ(ZE_RESULT_SUCCESS, result); + + EXPECT_NE(commandList->getCmdContainer().getCommandStream(), secondaryCmdStream); + EXPECT_FALSE(MemoryPoolHelper::isSystemMemoryPool(commandList->getCmdContainer().getCommandStream()->getGraphicsAllocation()->getMemoryPool())); +} + +HWTEST2_F(CommandListCreate, givenSecondaryCommandStreamForImmediateCmdListButAndUsingHostAllocWhenCallingAppendCmdlistsThenSwapCommandStreamsAndAppendSucceeds, IsAtMostXe2HpgCore) { + DebugManagerStateRestore restorer; + debugManager.flags.DirectSubmissionFlatRingBuffer.set(-1); + + static_cast(device->getNEODevice()->getMemoryManager())->localMemorySupported[0] = true; + ze_command_queue_desc_t desc = {}; + desc.mode = ZE_COMMAND_QUEUE_MODE_ASYNCHRONOUS; + ze_result_t returnValue; + CommandStreamReceiver *csr = nullptr; + device->getCsrForOrdinalAndIndex(&csr, desc.ordinal, desc.index, ZE_COMMAND_QUEUE_PRIORITY_NORMAL, false); + reinterpret_cast *>(csr)->directSubmissionAvailable = true; + std::unique_ptr commandList(CommandList::createImmediate(productFamily, device, &desc, false, NEO::EngineGroupType::copy, returnValue)); + ASSERT_NE(nullptr, commandList); + EXPECT_NE(reinterpret_cast(&commandList->getCmdContainer())->secondaryCommandStreamForImmediateCmdList.get(), nullptr); + EXPECT_TRUE(MemoryPoolHelper::isSystemMemoryPool(reinterpret_cast(&commandList->getCmdContainer())->secondaryCommandStreamForImmediateCmdList->getGraphicsAllocation()->getMemoryPool())); + + auto secondaryCmdStream = reinterpret_cast(&commandList->getCmdContainer())->secondaryCommandStreamForImmediateCmdList.get(); + + std::unique_ptr commandListRegular(CommandList::create(productFamily, device, NEO::EngineGroupType::copy, 0u, returnValue, false)); + commandListRegular->close(); + auto commandListHandle = commandListRegular->toHandle(); + + commandList->getCmdContainer().swapStreams(); + EXPECT_EQ(commandList->getCmdContainer().getCommandStream(), secondaryCmdStream); + + ze_result_t result = ZE_RESULT_SUCCESS; + result = commandList->appendCommandLists(1u, &commandListHandle, nullptr, 0u, nullptr); + EXPECT_EQ(ZE_RESULT_SUCCESS, result); + + EXPECT_NE(commandList->getCmdContainer().getCommandStream(), secondaryCmdStream); +} + +HWTEST2_F(CommandListCreate, givenSecondaryCommandStreamForImmediateCmdListAndAlreadyUsingHostAllocThenAppendingRegularCommandlistsIntoImmediateUsesLocalAndRestoresHostAlloc, MatchAny) { + if (!device->getHwInfo().featureTable.flags.ftrLocalMemory) { + GTEST_SKIP(); + } + DebugManagerStateRestore restorer; + debugManager.flags.DirectSubmissionFlatRingBuffer.set(-1); + + static_cast(device->getNEODevice()->getMemoryManager())->localMemorySupported[0] = true; + ze_command_queue_desc_t desc = {}; + desc.mode = ZE_COMMAND_QUEUE_MODE_ASYNCHRONOUS; + ze_result_t returnValue; + CommandStreamReceiver *csr = nullptr; + device->getCsrForOrdinalAndIndex(&csr, desc.ordinal, desc.index, ZE_COMMAND_QUEUE_PRIORITY_NORMAL, false); + reinterpret_cast *>(csr)->directSubmissionAvailable = true; + std::unique_ptr commandList(CommandList::createImmediate(productFamily, device, &desc, false, NEO::EngineGroupType::copy, returnValue)); + ASSERT_NE(nullptr, commandList); + EXPECT_NE(reinterpret_cast(&commandList->getCmdContainer())->secondaryCommandStreamForImmediateCmdList.get(), nullptr); + EXPECT_TRUE(MemoryPoolHelper::isSystemMemoryPool(reinterpret_cast(&commandList->getCmdContainer())->secondaryCommandStreamForImmediateCmdList->getGraphicsAllocation()->getMemoryPool())); + + auto immediateCmdList = static_cast *>(commandList.get()); + auto secondaryCmdStream = reinterpret_cast(&commandList->getCmdContainer())->secondaryCommandStreamForImmediateCmdList.get(); + + immediateCmdList->checkAvailableSpace(0u, false, commonImmediateCommandSize, false); + + EXPECT_EQ(commandList->getCmdContainer().getCommandStream(), secondaryCmdStream); + EXPECT_TRUE(MemoryPoolHelper::isSystemMemoryPool(commandList->getCmdContainer().getCommandStream()->getGraphicsAllocation()->getMemoryPool())); + + std::unique_ptr commandListRegular(CommandList::create(productFamily, device, NEO::EngineGroupType::copy, 0u, returnValue, false)); + commandListRegular->close(); + auto commandListHandle = commandListRegular->toHandle(); + + ze_result_t result = ZE_RESULT_SUCCESS; + result = commandList->appendCommandLists(1u, &commandListHandle, nullptr, 0u, nullptr); + EXPECT_EQ(ZE_RESULT_SUCCESS, result); + EXPECT_EQ(commandList->getCmdContainer().getCommandStream(), secondaryCmdStream); + EXPECT_TRUE(commandList->getCmdContainer().usingSecondaryCmdbufInHostMem()); +} + HWTEST2_F(CommandListCreate, givenNoSecondaryCommandStreamForImmediateCmdListWhenCheckAvailableSpaceThenNotSwapCommandStreams, MatchAny) { if (!device->getHwInfo().featureTable.flags.ftrLocalMemory) { GTEST_SKIP(); @@ -956,7 +1098,7 @@ HWTEST2_F(CommandListCreate, givenNoSecondaryCommandStreamForImmediateCmdListWhe auto immediateCmdList = static_cast *>(commandList.get()); auto cmdStream = commandList->getCmdContainer().getCommandStream(); - immediateCmdList->checkAvailableSpace(0u, false, commonImmediateCommandSize); + immediateCmdList->checkAvailableSpace(0u, false, commonImmediateCommandSize, false); EXPECT_EQ(commandList->getCmdContainer().getCommandStream(), cmdStream); EXPECT_FALSE(MemoryPoolHelper::isSystemMemoryPool(commandList->getCmdContainer().getCommandStream()->getGraphicsAllocation()->getMemoryPool())); @@ -1989,7 +2131,6 @@ HWTEST2_F(CommandListStateBaseAddressPrivateHeapTest, HWTEST2_F(CommandListStateBaseAddressPrivateHeapTest, givenStateBaseAddressTrackingWhenImmediateCmdListAppendKernelChangesHeapsAndExecuteThenFinalBaseAddressStateIsStoredInCsr, MatchAny) { - checkAndPrepareBindlessKernel(); StackVec sbaCmdsSizes = {}; @@ -2157,7 +2298,6 @@ HWTEST2_F(CommandListStateBaseAddressPrivateHeapTest, HWTEST2_F(CommandListStateBaseAddressPrivateHeapTest, givenStateBaseAddressTrackingWhenRegularCmdListAppendKernelAndExecuteAndImmediateCmdListAppendKernelSharingCsrThenBaseAddressStateIsUpdatedInCsr, MatchAny) { - checkAndPrepareBindlessKernel(); StackVec sbaCmdsSizes = {}; @@ -2388,7 +2528,6 @@ HWTEST2_F(CommandListStateBaseAddressPrivateHeapTest, HWTEST2_F(CommandListStateBaseAddressPrivateHeapTest, givenStateBaseAddressTrackingWhenImmediateCmdListAppendKernelAndRegularCmdListAppendKernelAndExecuteSharingCsrThenBaseAddressStateIsUpdatedInCsr, MatchAny) { - checkAndPrepareBindlessKernel(); StackVec sbaCmdsSizes = {}; @@ -2616,7 +2755,6 @@ HWTEST2_F(CommandListStateBaseAddressPrivateHeapTest, HWTEST2_F(CommandListStateBaseAddressPrivateHeapTest, givenStateBaseAddressTrackingWhenRegularCmdListAppendUncachedKernelFirstAndExecuteAndImmediateCmdListAppendUncachedKernelThenMocsStateIsUpdatedInCsr, MatchAny) { - checkAndPrepareBindlessKernel(); StackVec sbaCmdsSizes = {}; @@ -2981,7 +3119,6 @@ HWTEST2_F(CommandListStateBaseAddressPrivateHeapTest, HWTEST2_F(CommandListStateBaseAddressPrivateHeapTest, givenCommandListAppendsKernelWhenCommandListIsResetThenBaseAddressPropertiesAreResetToo, MatchAny) { - ze_group_count_t groupCount{1, 1, 1}; CmdListKernelLaunchParams launchParams = {}; auto result = commandList->appendLaunchKernel(kernel->toHandle(), groupCount, nullptr, 0, nullptr, launchParams, false); @@ -3016,7 +3153,6 @@ HWTEST2_F(CommandListStateBaseAddressPrivateHeapTest, HWTEST2_F(CommandListStateBaseAddressPrivateHeapTest, givenCommandListAppendsKernelWhenCommandListIsResetThenStateHeapsRetainPosition, MatchAny) { - ze_group_count_t groupCount{1, 1, 1}; CmdListKernelLaunchParams launchParams = {}; auto result = commandList->appendLaunchKernel(kernel->toHandle(), groupCount, nullptr, 0, nullptr, launchParams, false); @@ -3252,7 +3388,6 @@ HWTEST2_F(CommandListStateBaseAddressPrivateHeapTest, HWTEST2_F(CommandListStateBaseAddressPrivateHeapTest, givenCommandListUsingPrivateSurfaceHeapWhenTaskCountZeroAndCommandListDestroyedThenCsrDoNotDispatchesStateCacheFlush, HeapfulSupportedMatch) { - DebugManagerStateRestore restorer; debugManager.flags.ContextGroupSize.set(0); NEO::MockDevice *mockNeoDevice(NEO::MockDevice::createWithNewExecutionEnvironment(NEO::defaultHwInfo.get(), 0)); diff --git a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_7.cpp b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_7.cpp index e026ea52a6..9ef3617c14 100644 --- a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_7.cpp +++ b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_7.cpp @@ -1889,12 +1889,12 @@ HWTEST2_F(CommandListCreate, givenImmediateCommandListWhenThereIsNoEnoughSpaceFo commandList->getCmdContainer().getCommandStream()->getGraphicsAllocation()->updateTaskCount(0u, 0u); commandList->getCmdContainer().getCommandStream()->getSpace(useSize); - reinterpret_cast *>(commandList.get())->checkAvailableSpace(0, false, commonImmediateCommandSize); + reinterpret_cast *>(commandList.get())->checkAvailableSpace(0, false, commonImmediateCommandSize, false); EXPECT_EQ(1U, commandList->getCmdContainer().getCmdBufferAllocations().size()); commandList->getCmdContainer().getCommandStream()->getSpace(useSize); auto latestFlushedTaskCount = whiteBoxCmdList->getCsr(false)->peekLatestFlushedTaskCount(); - reinterpret_cast *>(commandList.get())->checkAvailableSpace(0, false, commonImmediateCommandSize); + reinterpret_cast *>(commandList.get())->checkAvailableSpace(0, false, commonImmediateCommandSize, false); EXPECT_EQ(1U, commandList->getCmdContainer().getCmdBufferAllocations().size()); EXPECT_EQ(latestFlushedTaskCount + 1, whiteBoxCmdList->getCsr(false)->peekLatestFlushedTaskCount()); } @@ -1916,12 +1916,12 @@ HWTEST2_F(CommandListCreate, givenImmediateCommandListWhenThereIsNoEnoughSpaceFo commandList->getCmdContainer().getCommandStream()->getGraphicsAllocation()->updateTaskCount(0u, 0u); commandList->getCmdContainer().getCommandStream()->getSpace(useSize); - reinterpret_cast *>(commandList.get())->checkAvailableSpace(numEvents, false, commonImmediateCommandSize); + reinterpret_cast *>(commandList.get())->checkAvailableSpace(numEvents, false, commonImmediateCommandSize, false); EXPECT_EQ(1U, commandList->getCmdContainer().getCmdBufferAllocations().size()); commandList->getCmdContainer().getCommandStream()->getSpace(useSize); auto latestFlushedTaskCount = whiteBoxCmdList->getCsr(false)->peekLatestFlushedTaskCount(); - reinterpret_cast *>(commandList.get())->checkAvailableSpace(numEvents, false, commonImmediateCommandSize); + reinterpret_cast *>(commandList.get())->checkAvailableSpace(numEvents, false, commonImmediateCommandSize, false); EXPECT_EQ(1U, commandList->getCmdContainer().getCmdBufferAllocations().size()); EXPECT_EQ(latestFlushedTaskCount + 1, whiteBoxCmdList->getCsr(false)->peekLatestFlushedTaskCount()); } diff --git a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_launch_kernel_3.cpp b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_launch_kernel_3.cpp index 06671c51af..6b12b2932d 100644 --- a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_launch_kernel_3.cpp +++ b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_launch_kernel_3.cpp @@ -1244,7 +1244,7 @@ HWTEST2_F(MultiTileImmediateCommandListAppendLaunchKernelXeHpCoreTest, givenImpl auto itorBbStart = find(cmdList.begin(), cmdList.end()); ASSERT_NE(cmdList.end(), itorBbStart); auto cmdBbStart = genCmdCast(*itorBbStart); - EXPECT_EQ(MI_BATCH_BUFFER_START::SECOND_LEVEL_BATCH_BUFFER::SECOND_LEVEL_BATCH_BUFFER_SECOND_LEVEL_BATCH, cmdBbStart->getSecondLevelBatchBuffer()); + EXPECT_NE(MI_BATCH_BUFFER_START::SECOND_LEVEL_BATCH_BUFFER::SECOND_LEVEL_BATCH_BUFFER_SECOND_LEVEL_BATCH, cmdBbStart->getSecondLevelBatchBuffer()); } } // namespace ult } // namespace L0 diff --git a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_signal_event.cpp b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_signal_event.cpp index a772eb2eef..80d466a7d9 100644 --- a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_signal_event.cpp +++ b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_signal_event.cpp @@ -154,7 +154,7 @@ HWTEST2_F(CommandListAppendSignalEvent, givenCommandListWhenAppendWriteGlobalTim EXPECT_FALSE(cmd->getDcFlushEnable()); } -HWTEST2_F(CommandListAppendSignalEvent, givenImmediateCmdListAndAppendingRegularCommandlistWithWaitOnEventsAndSignalEventThenUseSemaphoreAndPipeControl, IsAtLeastXeHpcCore) { +HWTEST2_F(CommandListAppendSignalEvent, givenImmediateCmdListAndAppendingRegularCommandlistWithWaitOnEventsAndSignalEventThenUseSemaphoreAndPipeControl, IsXeHpcCore) { using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT; using POST_SYNC_OPERATION = typename PIPE_CONTROL::POST_SYNC_OPERATION; @@ -222,6 +222,81 @@ HWTEST2_F(CommandListAppendSignalEvent, givenImmediateCmdListAndAppendingRegular ASSERT_TRUE(postSyncFound); } +HWTEST2_F(CommandListAppendSignalEvent, givenImmediateCmdListAndSecondaryDispatchModeForcedAndAppendingRegularCommandlistWithWaitOnEventsAndSignalEventThenUseSemaphoreAndPipeControl, MatchAny) { + DebugManagerStateRestore restorer; + debugManager.flags.DispatchCmdlistCmdBufferPrimary.set(0); + + using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; + using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT; + using POST_SYNC_OPERATION = typename PIPE_CONTROL::POST_SYNC_OPERATION; + using MI_BATCH_BUFFER_START = typename FamilyType::MI_BATCH_BUFFER_START; + using MI_BATCH_BUFFER_END = typename FamilyType::MI_BATCH_BUFFER_END; + + ze_event_pool_desc_t eventPoolDesc = {}; + eventPoolDesc.count = 1; + eventPoolDesc.flags = ZE_EVENT_POOL_FLAG_HOST_VISIBLE; + + ze_event_desc_t eventDesc = {}; + eventDesc.index = 0; + eventDesc.signal = ZE_EVENT_SCOPE_FLAG_HOST; + + ze_result_t result = ZE_RESULT_SUCCESS; + auto eventPoolHostVisible = std::unique_ptr(EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, result)); + EXPECT_EQ(ZE_RESULT_SUCCESS, result); + auto eventHostVisible = std::unique_ptr(Event::create(eventPoolHostVisible.get(), &eventDesc, device)); + + auto waitEventPool = std::unique_ptr(EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, result)); + EXPECT_EQ(ZE_RESULT_SUCCESS, result); + auto waitEvent = std::unique_ptr(Event::create(waitEventPool.get(), &eventDesc, device)); + + ze_command_queue_desc_t desc = {}; + desc.mode = ZE_COMMAND_QUEUE_MODE_SYNCHRONOUS; + ze_result_t returnValue; + std::unique_ptr immCommandList(CommandList::createImmediate(productFamily, device, &desc, false, NEO::EngineGroupType::renderCompute, returnValue)); + ASSERT_NE(nullptr, immCommandList); + + ze_event_handle_t hSignalEventHandle = eventHostVisible->toHandle(); + ze_event_handle_t hWaitEventHandle = waitEvent->toHandle(); + std::unique_ptr commandListRegular(CommandList::create(productFamily, device, NEO::EngineGroupType::compute, 0u, returnValue, false)); + commandListRegular->close(); + auto commandListHandle = commandListRegular->toHandle(); + auto usedSpaceBefore = immCommandList->getCmdContainer().getCommandStream()->getUsed(); + result = immCommandList->appendCommandLists(1u, &commandListHandle, hSignalEventHandle, 1u, &hWaitEventHandle); + + EXPECT_EQ(ZE_RESULT_SUCCESS, result); + + auto usedSpaceAfter = immCommandList->getCmdContainer().getCommandStream()->getUsed(); + ASSERT_GT(usedSpaceAfter, usedSpaceBefore); + + GenCmdList cmdList; + ASSERT_TRUE(FamilyType::Parse::parseCommandBuffer(cmdList, + immCommandList->getCmdContainer().getCommandStream()->getCpuBase(), + usedSpaceAfter)); + + auto itorSemaphore = find(cmdList.begin(), cmdList.end()); + ASSERT_NE(cmdList.end(), itorSemaphore); + + auto itorBBStart = find(itorSemaphore, cmdList.end()); + ASSERT_NE(cmdList.end(), itorBBStart); + + auto itorPC = findAll(itorBBStart, cmdList.end()); + ASSERT_NE(0u, itorPC.size()); + bool postSyncFound = false; + for (auto it : itorPC) { + auto cmd = genCmdCast(*it); + if (cmd->getPostSyncOperation() == POST_SYNC_OPERATION::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA) { + EXPECT_NE(cmd->getImmediateData(), Event::STATE_CLEARED); + EXPECT_TRUE(cmd->getCommandStreamerStallEnable()); + EXPECT_EQ(MemorySynchronizationCommands::getDcFlushEnable(true, device->getNEODevice()->getRootDeviceEnvironment()), cmd->getDcFlushEnable()); + postSyncFound = true; + } + } + ASSERT_TRUE(postSyncFound); + + auto itorBBEnd = find(itorBBStart, cmdList.end()); + ASSERT_NE(cmdList.end(), itorBBEnd); +} + HWTEST2_F(CommandListAppendSignalEvent, givenImmediateCmdListWithComputeQueueAndAppendingRegularCommandlistThenCsrMakeNonTesidentSkippedFromCmdQueue, IsAtLeastXeHpcCore) { ze_command_queue_desc_t desc = {}; desc.mode = ZE_COMMAND_QUEUE_MODE_ASYNCHRONOUS; @@ -254,7 +329,6 @@ HWTEST2_F(CommandListAppendSignalEvent, givenImmediateCmdListWithComputeQueueAnd HWTEST2_F(CommandListAppendSignalEvent, givenCopyOnlyImmediateCmdListAndAppendingRegularCommandlistWithWaitOnEventsAndSignalEventThenUseSemaphoreAndFlushDw, IsAtLeastXeHpcCore) { using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT; - using MI_BATCH_BUFFER_START = typename FamilyType::MI_BATCH_BUFFER_START; using MI_FLUSH_DW = typename FamilyType::MI_FLUSH_DW; ze_event_pool_desc_t eventPoolDesc = {}; @@ -283,6 +357,10 @@ HWTEST2_F(CommandListAppendSignalEvent, givenCopyOnlyImmediateCmdListAndAppendin ze_event_handle_t hSignalEventHandle = eventHostVisible->toHandle(); ze_event_handle_t hWaitEventHandle = waitEvent->toHandle(); std::unique_ptr commandListRegular(CommandList::create(productFamily, device, NEO::EngineGroupType::copy, 0u, returnValue, false)); + void *srcPtr = reinterpret_cast(0x1234); + void *dstPtr = reinterpret_cast(0x2345); + CmdListMemoryCopyParams copyParams = {}; + commandListRegular->appendMemoryCopy(dstPtr, srcPtr, 8, nullptr, 0, nullptr, copyParams); commandListRegular->close(); auto commandListHandle = commandListRegular->toHandle(); auto usedSpaceBefore = immCommandList->getCmdContainer().getCommandStream()->getUsed(); @@ -301,9 +379,6 @@ HWTEST2_F(CommandListAppendSignalEvent, givenCopyOnlyImmediateCmdListAndAppendin auto itorSemaphore = find(cmdList.begin(), cmdList.end()); ASSERT_NE(cmdList.end(), itorSemaphore); - auto itorBBStart = find(itorSemaphore, cmdList.end()); - ASSERT_NE(cmdList.end(), itorBBStart); - uint32_t expectedMiFlushCount = 1; NEO::EncodeDummyBlitWaArgs waArgs{false, &(device->getNEODevice()->getRootDeviceEnvironmentRef())}; if (MockEncodeMiFlushDW::getWaSize(waArgs) > 0) { @@ -316,7 +391,7 @@ HWTEST2_F(CommandListAppendSignalEvent, givenCopyOnlyImmediateCmdListAndAppendin EXPECT_EQ(expectedMiFlushCount, static_cast(itorMiFlush.size())); } -HWTEST2_F(CommandListAppendSignalEvent, givenImmediateCmdListWithCopyQueueAndAppendingRegularCommandlistThenCsrMakeNonTesidentSkippedFromCmdQueue, IsAtLeastXeHpcCore) { +HWTEST2_F(CommandListAppendSignalEvent, givenImmediateCmdListWithCopyQueueAndAppendingRegularCommandlistThenCsrMakeNonResidentFromCmdQueue, IsAtMostXeHpgCore) { ze_command_queue_desc_t desc = {}; desc.mode = ZE_COMMAND_QUEUE_MODE_ASYNCHRONOUS; @@ -328,22 +403,21 @@ HWTEST2_F(CommandListAppendSignalEvent, givenImmediateCmdListWithCopyQueueAndApp device, &desc, false, - NEO::EngineGroupType::copy, + NEO::EngineGroupType::compute, returnValue))); EXPECT_EQ(ZE_RESULT_SUCCESS, returnValue); ASSERT_NE(nullptr, commandList0); auto &commandStreamReceiver = neoDevice->getUltCommandStreamReceiver(); - auto heaplessStateInit = commandStreamReceiver.heaplessStateInitialized; - std::unique_ptr commandListRegular(CommandList::create(productFamily, device, NEO::EngineGroupType::copy, 0u, returnValue, false)); + std::unique_ptr commandListRegular(CommandList::create(productFamily, device, NEO::EngineGroupType::compute, 0u, returnValue, false)); commandListRegular->close(); auto commandListHandle = commandListRegular->toHandle(); ze_result_t result = ZE_RESULT_SUCCESS; result = commandList0->appendCommandLists(1u, &commandListHandle, nullptr, 0u, nullptr); EXPECT_EQ(ZE_RESULT_SUCCESS, result); - EXPECT_EQ(heaplessStateInit ? 2u : 1u, commandStreamReceiver.makeSurfacePackNonResidentCalled); + EXPECT_EQ(1u, commandStreamReceiver.makeSurfacePackNonResidentCalled); } HWTEST2_F(CommandListAppendSignalEvent, givenTimestampEventUsedInSignalThenPipeControlAppendedCorrectly, MatchAny) { @@ -664,7 +738,7 @@ HWTEST2_F(CommandListAppendUsedPacketSignalEvent, event->signalScope = ZE_EVENT_SCOPE_FLAG_HOST; commandList->partitionCount = packets; - commandList->checkAvailableSpace(0, false, commonImmediateCommandSize); + commandList->checkAvailableSpace(0, false, commonImmediateCommandSize, false); commandList->appendSignalEventPostWalker(event.get(), nullptr, nullptr, false, false, false); EXPECT_EQ(packets, event->getPacketsInUse()); diff --git a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_wait_on_events.cpp b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_wait_on_events.cpp index b1beccc213..a7412ed8ff 100644 --- a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_wait_on_events.cpp +++ b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_wait_on_events.cpp @@ -202,7 +202,6 @@ HWTEST2_F(CommandListAppendWaitOnEvent, givenImmediateCmdListWithDirectSubmissio HWTEST2_F(CommandListAppendWaitOnEvent, givenImmediateCmdListAndAppendingRegularCommandlistWithWaitOnEventsThenUseSemaphore, IsAtLeastXeHpcCore) { using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT; - using MI_BATCH_BUFFER_START = typename FamilyType::MI_BATCH_BUFFER_START; ze_command_queue_desc_t desc = {}; desc.mode = ZE_COMMAND_QUEUE_MODE_SYNCHRONOUS; @@ -212,6 +211,10 @@ HWTEST2_F(CommandListAppendWaitOnEvent, givenImmediateCmdListAndAppendingRegular ze_event_handle_t hEventHandle = event->toHandle(); std::unique_ptr commandListRegular(CommandList::create(productFamily, device, NEO::EngineGroupType::compute, 0u, returnValue, false)); + void *srcPtr = reinterpret_cast(0x1234); + void *dstPtr = reinterpret_cast(0x2345); + CmdListMemoryCopyParams copyParams = {}; + commandListRegular->appendMemoryCopy(dstPtr, srcPtr, 8, nullptr, 0, nullptr, copyParams); commandListRegular->close(); auto commandListHandle = commandListRegular->toHandle(); auto result = immCommandList->appendCommandLists(1u, &commandListHandle, nullptr, 1u, &hEventHandle); @@ -227,9 +230,6 @@ HWTEST2_F(CommandListAppendWaitOnEvent, givenImmediateCmdListAndAppendingRegular auto itor = find(cmdList.begin(), cmdList.end()); ASSERT_NE(cmdList.end(), itor); - - auto itorBBStart = find(itor, cmdList.end()); - ASSERT_NE(cmdList.end(), itorBBStart); } template diff --git a/level_zero/core/test/unit_tests/sources/cmdqueue/test_cmdqueue_1.cpp b/level_zero/core/test/unit_tests/sources/cmdqueue/test_cmdqueue_1.cpp index 4e092cda42..00a805faca 100644 --- a/level_zero/core/test/unit_tests/sources/cmdqueue/test_cmdqueue_1.cpp +++ b/level_zero/core/test/unit_tests/sources/cmdqueue/test_cmdqueue_1.cpp @@ -540,48 +540,6 @@ HWTEST_F(CommandQueueCreate, givenUpdateTaskCountFromWaitAndRegularCmdListWhenDi commandQueue->destroy(); } -HWTEST_F(CommandQueueCreate, givenUpdateTaskCountFromWaitAndImmediateCmdListWhenDispatchTaskCountWriteThenNoPipeControlFlushed) { - using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; - using POST_SYNC_OPERATION = typename FamilyType::PIPE_CONTROL::POST_SYNC_OPERATION; - - DebugManagerStateRestore restorer; - debugManager.flags.UpdateTaskCountFromWait.set(3); - - const ze_command_queue_desc_t desc = {}; - ze_result_t returnValue; - auto commandQueue = whiteboxCast(CommandQueue::create(productFamily, - device, - neoDevice->getDefaultEngine().commandStreamReceiver, - &desc, - false, - false, - true, - returnValue)); - - auto commandList = CommandList::createImmediate(productFamily, device, &desc, false, NEO::EngineGroupType::renderCompute, returnValue); - ASSERT_NE(nullptr, commandList); - - ze_command_list_handle_t cmdListHandle = commandList->toHandle(); - commandQueue->executeCommandLists(1, &cmdListHandle, nullptr, false, nullptr); - - GenCmdList cmdList; - ASSERT_TRUE(FamilyType::Parse::parseCommandBuffer( - cmdList, ptrOffset(commandQueue->commandStream.getCpuBase(), 0), commandQueue->commandStream.getUsed())); - - auto pipeControls = findAll(cmdList.begin(), cmdList.end()); - bool pipeControlsPostSync = false; - for (size_t i = 0; i < pipeControls.size(); i++) { - auto pipeControl = reinterpret_cast(*pipeControls[i]); - if (pipeControl->getPostSyncOperation() == POST_SYNC_OPERATION::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA) { - pipeControlsPostSync = true; - } - } - EXPECT_FALSE(pipeControlsPostSync); - - commandList->destroy(); - commandQueue->destroy(); -} - HWTEST_F(CommandQueueCreate, givenContainerWithAllocationsWhenResidencyContainerIsEmptyThenMakeResidentWasNotCalled) { auto csr = std::make_unique(*neoDevice->getExecutionEnvironment(), 0, neoDevice->getDeviceBitfield()); csr->setupContext(*neoDevice->getDefaultEngine().osContext); diff --git a/shared/source/command_container/cmdcontainer.h b/shared/source/command_container/cmdcontainer.h index a6557a0313..165ec8f072 100644 --- a/shared/source/command_container/cmdcontainer.h +++ b/shared/source/command_container/cmdcontainer.h @@ -89,6 +89,8 @@ class CommandContainer : public NonCopyableOrMovableClass { LinearStream *getCommandStream() { return commandStream.get(); } + bool usingSecondaryCmdbufInHostMem() { return useSecondaryCommandStream; } + IndirectHeap *getIndirectHeap(HeapType heapType); HeapHelper *getHeapHelper() { return heapHelper.get(); } @@ -119,7 +121,7 @@ class CommandContainer : public NonCopyableOrMovableClass { GraphicsAllocation *obtainNextCommandBufferAllocation(); GraphicsAllocation *obtainNextCommandBufferAllocation(bool forceHostMemory); - bool swapStreams(); + MOCKABLE_VIRTUAL bool swapStreams(); void reset(); diff --git a/shared/source/debug_settings/debug_variables_base.inl b/shared/source/debug_settings/debug_variables_base.inl index a381a77078..ed6ea60fd2 100644 --- a/shared/source/debug_settings/debug_variables_base.inl +++ b/shared/source/debug_settings/debug_variables_base.inl @@ -308,6 +308,8 @@ DECLARE_DEBUG_VARIABLE(int32_t, DebugUmdMaxReadWriteRetry, -1, "-1: default , > DECLARE_DEBUG_VARIABLE(int32_t, ForceIndirectDetectionForCMKernels, -1, "-1: default , 0 : disable indirect detection for CM kernels, 1 : enable indirect detection for CM kernels") DECLARE_DEBUG_VARIABLE(int32_t, PipelinedEuThreadArbitration, -1, "-1: default. 1: Use Walker field, 0: Use StateComputeMode command to program pipelinedEuThreadArbitration") DECLARE_DEBUG_VARIABLE(bool, ForceUseOnlyGlobalTimestamps, 0, "0- default disabled, 1: enable use only global timestamp") +DECLARE_DEBUG_VARIABLE(int32_t, ForceParentCommandStreamUsageForImmediateAppendForCopyEngine, -1, "-1 - default (disabled), 0: disable, 1: enable") +DECLARE_DEBUG_VARIABLE(int32_t, ForceParentCommandStreamUsageForImmediateAppendForComputeEngine, -1, "-1 - default (enabled), 0: disable, 1: enable") /*LOGGING FLAGS*/ DECLARE_DEBUG_VARIABLE(int32_t, PrintDriverDiagnostics, -1, "prints driver diagnostics messages to standard output, value corresponds to hint level") diff --git a/shared/test/common/test_files/igdrcl.config b/shared/test/common/test_files/igdrcl.config index f1c8f3f128..89fc1e8f00 100644 --- a/shared/test/common/test_files/igdrcl.config +++ b/shared/test/common/test_files/igdrcl.config @@ -665,4 +665,6 @@ PipelinedEuThreadArbitration = -1 ExperimentalUSMAllocationReuseCleaner = -1 DummyPageBackingEnabled = 0 EnableDeferBacking = 0 +ForceParentCommandStreamUsageForImmediateAppendForCopyEngine = -1 +ForceParentCommandStreamUsageForImmediateAppendForComputeEngine = -1 # Please don't edit below this line diff --git a/shared/test/unit_test/command_container/command_container_tests.cpp b/shared/test/unit_test/command_container/command_container_tests.cpp index b24a7fc900..15446cef55 100644 --- a/shared/test/unit_test/command_container/command_container_tests.cpp +++ b/shared/test/unit_test/command_container/command_container_tests.cpp @@ -98,6 +98,25 @@ TEST_F(CommandContainerHeapStateTests, givenDirtyHeapsWhenSettingStateForSingleH } } +using CommandContainerSwapStreamTest = Test; +TEST_F(CommandContainerSwapStreamTest, givenCommandContainerInitializedWithSecondaryCmdBufferAndForceSwapStreamsReturnsFalseThenCallIsUnsuccessful) { + class MyMockCommandContainer : public CommandContainer { + public: + bool swapStreams() override { + swapStreamsCalled++; + return forceSwapAction; + } + + uint32_t swapStreamsCalled = 0u; + bool forceSwapAction = false; + }; + + MyMockCommandContainer cmdContainer; + cmdContainer.initialize(pDevice, nullptr, HeapSize::defaultHeapSize, true, true); + + EXPECT_FALSE(cmdContainer.swapStreams()); +} + TEST_F(CommandContainerTest, givenCmdContainerWhenCreatingCommandBufferThenCorrectAllocationTypeIsSet) { CommandContainer cmdContainer; cmdContainer.initialize(pDevice, nullptr, HeapSize::defaultHeapSize, true, false); @@ -126,6 +145,22 @@ TEST_F(CommandContainerTest, givenCreateSecondaryCmdBufferInHostMemWhenInitializ EXPECT_EQ(cmdContainer.secondaryCommandStreamForImmediateCmdList.get(), cmdStream); } +TEST_F(CommandContainerTest, givenCreateSecondaryCmdBufferInHostMemWhenInitializeThenCreateAdditionalLinearStreamAndReturnAccordingly) { + MyMockCommandContainer cmdContainer; + cmdContainer.initialize(pDevice, nullptr, HeapSize::defaultHeapSize, true, true); + + EXPECT_NE(cmdContainer.secondaryCommandStreamForImmediateCmdList.get(), nullptr); + + auto secondaryCmdStream = cmdContainer.secondaryCommandStreamForImmediateCmdList.get(); + auto cmdStream = cmdContainer.getCommandStream(); + + EXPECT_TRUE(cmdContainer.swapStreams()); + EXPECT_TRUE(cmdContainer.usingSecondaryCmdbufInHostMem()); + + EXPECT_EQ(cmdContainer.getCommandStream(), secondaryCmdStream); + EXPECT_EQ(cmdContainer.secondaryCommandStreamForImmediateCmdList.get(), cmdStream); +} + TEST_F(CommandContainerTest, whenInitializeThenNotCreateAdditionalLinearStream) { MyMockCommandContainer cmdContainer; cmdContainer.initialize(pDevice, nullptr, HeapSize::defaultHeapSize, true, false);