From 2f5be7a48dddfe9bdf3a3c955872cd97e82da810 Mon Sep 17 00:00:00 2001 From: Lukasz Jobczyk Date: Wed, 22 Feb 2023 07:29:42 +0000 Subject: [PATCH] Copy command buffer into ring buffer Resolves: NEO-7422 Signed-off-by: Lukasz Jobczyk --- level_zero/core/source/cmdlist/cmdlist_hw.inl | 14 +- .../source/cmdlist/cmdlist_hw_immediate.h | 2 +- .../source/cmdlist/cmdlist_hw_immediate.inl | 77 ++++----- .../sources/cmdlist/test_cmdlist_5.cpp | 134 +++++++++++++++ .../sources/cmdlist/test_cmdlist_7.cpp | 8 +- .../source_level_debugger_tests.cpp | 2 +- .../source/command_container/cmdcontainer.cpp | 45 ++++- .../source/command_container/cmdcontainer.h | 9 +- .../debug_settings/debug_variables_base.inl | 1 + .../direct_submission/direct_submission_hw.h | 1 + .../direct_submission_hw.inl | 33 +++- .../memory_manager/allocations_list.cpp | 9 +- .../source/memory_manager/allocations_list.h | 3 +- .../memory_manager/definitions/storage_info.h | 1 + .../source/memory_manager/memory_manager.cpp | 1 + .../os_interface/linux/drm_memory_manager.cpp | 1 + .../common/mocks/mock_direct_submission_hw.h | 1 + shared/test/common/test_files/igdrcl.config | 1 + .../command_container_tests.cpp | 154 +++++++++++------- .../command_stream/linear_stream_tests.cpp | 12 +- .../unit_test/debugger/test_l0_debugger.cpp | 8 +- .../direct_submission_tests_2.cpp | 114 +++++++++++++ .../command_encoder_tests_xehp_and_later.cpp | 2 +- .../encoders/test_encode_command_buffer.cpp | 14 +- .../encoders/test_encode_dispatch_kernel.cpp | 18 +- ..._encode_dispatch_kernel_xehp_and_later.cpp | 6 +- .../unit_test/encoders/test_encode_math.cpp | 8 +- .../test_encode_math_xehp_and_later.cpp | 2 +- .../encoders/test_encode_set_mmio.cpp | 2 +- .../fixtures/command_container_fixture.cpp | 2 +- .../fixtures/direct_submission_fixture.h | 6 + .../gen11/test_encode_math_gen11.cpp | 2 +- .../gen12lp/test_command_encoder_gen12lp.cpp | 10 +- .../gen12lp/test_encode_math_gen12lp.cpp | 2 +- .../unit_test/gen8/test_encode_math_gen8.cpp | 2 +- .../gen9/test_command_encoder_gen9.cpp | 6 +- .../unit_test/gen9/test_encode_math_gen9.cpp | 2 +- .../linux/drm_command_stream_tests_1.cpp | 1 + .../linux/drm_memory_manager_tests.cpp | 2 +- .../windows/device_command_stream_tests.cpp | 2 + 40 files changed, 557 insertions(+), 163 deletions(-) diff --git a/level_zero/core/source/cmdlist/cmdlist_hw.inl b/level_zero/core/source/cmdlist/cmdlist_hw.inl index 3312e3ff63..d1cfa91b2c 100644 --- a/level_zero/core/source/cmdlist/cmdlist_hw.inl +++ b/level_zero/core/source/cmdlist/cmdlist_hw.inl @@ -167,7 +167,19 @@ ze_result_t CommandListCoreFamily::initialize(Device *device, NEO commandContainer.setReservedSshSize(getReserveSshSize()); DeviceImp *deviceImp = static_cast(device); - auto returnValue = commandContainer.initialize(deviceImp->getActiveDevice(), deviceImp->allocationsForReuse.get(), !isCopyOnly()); + + auto createSecondaryCmdBufferInHostMem = this->cmdListType == TYPE_IMMEDIATE && + this->isFlushTaskSubmissionEnabled && + !device->isImplicitScalingCapable() && + this->csr && + this->csr->isAnyDirectSubmissionEnabled() && + deviceImp->getNEODevice()->getMemoryManager()->isLocalMemorySupported(deviceImp->getRootDeviceIndex()); + + if (NEO::DebugManager.flags.DirectSubmissionFlatRingBuffer.get() != -1) { + createSecondaryCmdBufferInHostMem &= !!NEO::DebugManager.flags.DirectSubmissionFlatRingBuffer.get(); + } + + auto returnValue = commandContainer.initialize(deviceImp->getActiveDevice(), deviceImp->allocationsForReuse.get(), !isCopyOnly(), createSecondaryCmdBufferInHostMem); if (!this->pipelineSelectStateTracking) { // allow systolic support set in container when tracking disabled // setting systolic support allows dispatching untracked command in legacy mode diff --git a/level_zero/core/source/cmdlist/cmdlist_hw_immediate.h b/level_zero/core/source/cmdlist/cmdlist_hw_immediate.h index 07ff200c88..ee937eb16f 100644 --- a/level_zero/core/source/cmdlist/cmdlist_hw_immediate.h +++ b/level_zero/core/source/cmdlist/cmdlist_hw_immediate.h @@ -147,7 +147,7 @@ struct CommandListCoreFamilyImmediate : public CommandListCoreFamily::getLogic } template -void CommandListCoreFamilyImmediate::checkAvailableSpace(uint32_t numEvents) { +void CommandListCoreFamilyImmediate::checkAvailableSpace(uint32_t numEvents, bool hasRelaxedOrderingDependencies) { this->commandContainer.fillReusableAllocationLists(); + + /* Command container might has two command buffers. If it has, one is in local memory, because relaxed ordering requires that and one in system for copying it into ring buffer. + If relaxed ordering is needed in given dispatch and current command stream is in system memory, swap of command streams is required to ensure local memory. Same in the opposite scenario. */ + if (hasRelaxedOrderingDependencies == NEO::MemoryPoolHelper::isSystemMemoryPool(this->commandContainer.getCommandStream()->getGraphicsAllocation()->getMemoryPool())) { + if (this->commandContainer.swapStreams()) { + this->cmdListCurrentStartOffset = this->commandContainer.getCommandStream()->getUsed(); + } + } + size_t semaphoreSize = NEO::EncodeSempahore::getSizeMiSemaphoreWait() * numEvents; if (this->commandContainer.getCommandStream()->getAvailableSpace() < maxImmediateCommandSize + semaphoreSize) { + bool requireSystemMemoryCommandBuffer = !hasRelaxedOrderingDependencies; - auto alloc = this->commandContainer.reuseExistingCmdBuffer(); + auto alloc = this->commandContainer.reuseExistingCmdBuffer(requireSystemMemoryCommandBuffer); this->commandContainer.addCurrentCommandBufferToReusableAllocationList(); if (!alloc) { - alloc = this->commandContainer.allocateCommandBuffer(); + alloc = this->commandContainer.allocateCommandBuffer(requireSystemMemoryCommandBuffer); this->commandContainer.getCmdBufferAllocations().push_back(alloc); } this->commandContainer.setCmdBuffer(alloc); @@ -328,8 +338,10 @@ ze_result_t CommandListCoreFamilyImmediate::appendLaunchKernel( ze_event_handle_t hSignalEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents, const CmdListKernelLaunchParams &launchParams, bool relaxedOrderingDispatch) { + relaxedOrderingDispatch = isRelaxedOrderingDispatchAllowed(numWaitEvents); + if (this->isFlushTaskSubmissionEnabled) { - checkAvailableSpace(numWaitEvents); + checkAvailableSpace(numWaitEvents, relaxedOrderingDispatch); } bool hostWait = waitForEventsFromHost(); if (hostWait || this->eventWaitlistSyncRequired()) { @@ -340,8 +352,6 @@ ze_result_t CommandListCoreFamilyImmediate::appendLaunchKernel( } } - relaxedOrderingDispatch = isRelaxedOrderingDispatchAllowed(numWaitEvents); - auto ret = CommandListCoreFamily::appendLaunchKernel(kernelHandle, threadGroupDimensions, hSignalEvent, numWaitEvents, phWaitEvents, launchParams, relaxedOrderingDispatch); @@ -352,14 +362,13 @@ template ze_result_t CommandListCoreFamilyImmediate::appendLaunchKernelIndirect( ze_kernel_handle_t kernelHandle, const ze_group_count_t *pDispatchArgumentsBuffer, ze_event_handle_t hSignalEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents, bool relaxedOrderingDispatch) { + relaxedOrderingDispatch = isRelaxedOrderingDispatchAllowed(numWaitEvents); if (this->isFlushTaskSubmissionEnabled) { - checkAvailableSpace(numWaitEvents); + checkAvailableSpace(numWaitEvents, relaxedOrderingDispatch); checkWaitEventsState(numWaitEvents, phWaitEvents); } - relaxedOrderingDispatch = isRelaxedOrderingDispatchAllowed(numWaitEvents); - auto ret = CommandListCoreFamily::appendLaunchKernelIndirect(kernelHandle, pDispatchArgumentsBuffer, hSignalEvent, numWaitEvents, phWaitEvents, relaxedOrderingDispatch); return flushImmediate(ret, true, false, relaxedOrderingDispatch, hSignalEvent); @@ -373,7 +382,7 @@ ze_result_t CommandListCoreFamilyImmediate::appendBarrier( ze_result_t ret = ZE_RESULT_SUCCESS; if (this->isFlushTaskSubmissionEnabled) { - checkAvailableSpace(numWaitEvents); + checkAvailableSpace(numWaitEvents, false); checkWaitEventsState(numWaitEvents, phWaitEvents); } ret = CommandListCoreFamily::appendBarrier(hSignalEvent, numWaitEvents, phWaitEvents); @@ -390,9 +399,10 @@ ze_result_t CommandListCoreFamilyImmediate::appendMemoryCopy( ze_event_handle_t hSignalEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents, bool relaxedOrderingDispatch) { + relaxedOrderingDispatch = isRelaxedOrderingDispatchAllowed(numWaitEvents); if (this->isFlushTaskSubmissionEnabled) { - checkAvailableSpace(numWaitEvents); + checkAvailableSpace(numWaitEvents, relaxedOrderingDispatch); checkWaitEventsState(numWaitEvents, phWaitEvents); } @@ -407,8 +417,6 @@ ze_result_t CommandListCoreFamilyImmediate::appendMemoryCopy( } } - relaxedOrderingDispatch = isRelaxedOrderingDispatchAllowed(numWaitEvents); - NEO::TransferDirection direction; auto isSplitNeeded = this->isAppendSplitNeeded(dstptr, srcptr, size, direction); if (isSplitNeeded) { @@ -436,16 +444,15 @@ ze_result_t CommandListCoreFamilyImmediate::appendMemoryCopyRegio ze_event_handle_t hSignalEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents, bool relaxedOrderingDispatch) { + relaxedOrderingDispatch = isRelaxedOrderingDispatchAllowed(numWaitEvents); if (this->isFlushTaskSubmissionEnabled) { - checkAvailableSpace(numWaitEvents); + checkAvailableSpace(numWaitEvents, relaxedOrderingDispatch); checkWaitEventsState(numWaitEvents, phWaitEvents); } ze_result_t ret; - relaxedOrderingDispatch = isRelaxedOrderingDispatchAllowed(numWaitEvents); - NEO::TransferDirection direction; auto isSplitNeeded = this->isAppendSplitNeeded(dstPtr, srcPtr, this->getTotalSizeForCopyRegion(dstRegion, dstPitch, dstSlicePitch), direction); if (isSplitNeeded) { @@ -478,14 +485,13 @@ ze_result_t CommandListCoreFamilyImmediate::appendMemoryFill(void ze_event_handle_t hSignalEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents, bool relaxedOrderingDispatch) { + relaxedOrderingDispatch = isRelaxedOrderingDispatchAllowed(numWaitEvents); if (this->isFlushTaskSubmissionEnabled) { - checkAvailableSpace(numWaitEvents); + checkAvailableSpace(numWaitEvents, relaxedOrderingDispatch); checkWaitEventsState(numWaitEvents, phWaitEvents); } - relaxedOrderingDispatch = isRelaxedOrderingDispatchAllowed(numWaitEvents); - auto ret = CommandListCoreFamily::appendMemoryFill(ptr, pattern, patternSize, size, hSignalEvent, numWaitEvents, phWaitEvents, relaxedOrderingDispatch); return flushImmediate(ret, true, false, relaxedOrderingDispatch, hSignalEvent); @@ -497,7 +503,7 @@ ze_result_t CommandListCoreFamilyImmediate::appendSignalEvent(ze_ ze_result_t ret = ZE_RESULT_SUCCESS; if (this->isFlushTaskSubmissionEnabled) { - checkAvailableSpace(0); + checkAvailableSpace(0, false); } ret = CommandListCoreFamily::appendSignalEvent(hSignalEvent); return flushImmediate(ret, true, true, false, hSignalEvent); @@ -509,7 +515,7 @@ ze_result_t CommandListCoreFamilyImmediate::appendEventReset(ze_e ze_result_t ret = ZE_RESULT_SUCCESS; if (this->isFlushTaskSubmissionEnabled) { - checkAvailableSpace(0); + checkAvailableSpace(0, false); } ret = CommandListCoreFamily::appendEventReset(hSignalEvent); return flushImmediate(ret, true, true, false, hSignalEvent); @@ -521,7 +527,7 @@ ze_result_t CommandListCoreFamilyImmediate::appendPageFaultCopy(N size_t size, bool flushHost) { if (this->isFlushTaskSubmissionEnabled) { - checkAvailableSpace(0); + checkAvailableSpace(0, false); } ze_result_t ret; @@ -557,7 +563,7 @@ ze_result_t CommandListCoreFamilyImmediate::appendWaitOnEvents(ui return ZE_RESULT_SUCCESS; } if (this->isFlushTaskSubmissionEnabled) { - checkAvailableSpace(numEvents); + checkAvailableSpace(numEvents, false); checkWaitEventsState(numEvents, phWaitEvents); } auto ret = CommandListCoreFamily::appendWaitOnEvents(numEvents, phWaitEvents, relaxedOrderingAllowed, trackDependencies); @@ -571,7 +577,7 @@ ze_result_t CommandListCoreFamilyImmediate::appendWriteGlobalTime uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) { if (this->isFlushTaskSubmissionEnabled) { - checkAvailableSpace(numWaitEvents); + checkAvailableSpace(numWaitEvents, false); checkWaitEventsState(numWaitEvents, phWaitEvents); } auto ret = CommandListCoreFamily::appendWriteGlobalTimestamp(dstptr, hSignalEvent, numWaitEvents, phWaitEvents); @@ -606,14 +612,13 @@ ze_result_t CommandListCoreFamilyImmediate::appendImageCopyRegion ze_event_handle_t hSignalEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents, bool relaxedOrderingDispatch) { + relaxedOrderingDispatch = isRelaxedOrderingDispatchAllowed(numWaitEvents); if (this->isFlushTaskSubmissionEnabled) { - checkAvailableSpace(numWaitEvents); + checkAvailableSpace(numWaitEvents, relaxedOrderingDispatch); checkWaitEventsState(numWaitEvents, phWaitEvents); } - relaxedOrderingDispatch = isRelaxedOrderingDispatchAllowed(numWaitEvents); - auto ret = CommandListCoreFamily::appendImageCopyRegion(hDstImage, hSrcImage, pDstRegion, pSrcRegion, hSignalEvent, numWaitEvents, phWaitEvents, relaxedOrderingDispatch); return flushImmediate(ret, true, false, relaxedOrderingDispatch, hSignalEvent); @@ -627,14 +632,13 @@ ze_result_t CommandListCoreFamilyImmediate::appendImageCopyFromMe ze_event_handle_t hSignalEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents, bool relaxedOrderingDispatch) { + relaxedOrderingDispatch = isRelaxedOrderingDispatchAllowed(numWaitEvents); if (this->isFlushTaskSubmissionEnabled) { - checkAvailableSpace(numWaitEvents); + checkAvailableSpace(numWaitEvents, relaxedOrderingDispatch); checkWaitEventsState(numWaitEvents, phWaitEvents); } - relaxedOrderingDispatch = isRelaxedOrderingDispatchAllowed(numWaitEvents); - auto ret = CommandListCoreFamily::appendImageCopyFromMemory(hDstImage, srcPtr, pDstRegion, hSignalEvent, numWaitEvents, phWaitEvents, relaxedOrderingDispatch); @@ -649,14 +653,13 @@ ze_result_t CommandListCoreFamilyImmediate::appendImageCopyToMemo ze_event_handle_t hSignalEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents, bool relaxedOrderingDispatch) { + relaxedOrderingDispatch = isRelaxedOrderingDispatchAllowed(numWaitEvents); if (this->isFlushTaskSubmissionEnabled) { - checkAvailableSpace(numWaitEvents); + checkAvailableSpace(numWaitEvents, relaxedOrderingDispatch); checkWaitEventsState(numWaitEvents, phWaitEvents); } - relaxedOrderingDispatch = isRelaxedOrderingDispatchAllowed(numWaitEvents); - auto ret = CommandListCoreFamily::appendImageCopyToMemory(dstPtr, hSrcImage, pSrcRegion, hSignalEvent, numWaitEvents, phWaitEvents, relaxedOrderingDispatch); @@ -671,7 +674,7 @@ ze_result_t CommandListCoreFamilyImmediate::appendMemoryRangesBar uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) { if (this->isFlushTaskSubmissionEnabled) { - checkAvailableSpace(numWaitEvents); + checkAvailableSpace(numWaitEvents, false); checkWaitEventsState(numWaitEvents, phWaitEvents); } auto ret = CommandListCoreFamily::appendMemoryRangesBarrier(numRanges, pRangeSizes, pRanges, hSignalEvent, numWaitEvents, phWaitEvents); @@ -684,13 +687,13 @@ ze_result_t CommandListCoreFamilyImmediate::appendLaunchCooperati ze_event_handle_t hSignalEvent, uint32_t numWaitEvents, ze_event_handle_t *waitEventHandles, bool relaxedOrderingDispatch) { + relaxedOrderingDispatch = isRelaxedOrderingDispatchAllowed(numWaitEvents); + if (this->isFlushTaskSubmissionEnabled) { - checkAvailableSpace(numWaitEvents); + checkAvailableSpace(numWaitEvents, relaxedOrderingDispatch); checkWaitEventsState(numWaitEvents, waitEventHandles); } - relaxedOrderingDispatch = isRelaxedOrderingDispatchAllowed(numWaitEvents); - auto ret = CommandListCoreFamily::appendLaunchCooperativeKernel(kernelHandle, launchKernelArgs, hSignalEvent, numWaitEvents, waitEventHandles, relaxedOrderingDispatch); return flushImmediate(ret, true, false, relaxedOrderingDispatch, hSignalEvent); } diff --git a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_5.cpp b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_5.cpp index d2ce16d1a4..660b72e818 100644 --- a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_5.cpp +++ b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_5.cpp @@ -11,6 +11,7 @@ #include "shared/source/kernel/implicit_args.h" #include "shared/test/common/cmd_parse/gen_cmd_parse.h" #include "shared/test/common/helpers/unit_test_helper.h" +#include "shared/test/common/libult/ult_command_stream_receiver.h" #include "shared/test/common/test_macros/hw_test.h" #include "level_zero/core/source/kernel/kernel_imp.h" @@ -813,6 +814,139 @@ HWTEST_F(CommandListCreate, givenFlushTaskFlagEnabledAndAsyncCmdQueueAndCopyOnly EXPECT_GT(commandContainer.getCommandStream()->getUsed(), used); } +struct CmdContainerMock : public CommandContainer { + using CommandContainer::secondaryCommandStreamForImmediateCmdList; +}; + +HWTEST_F(CommandListCreate, givenImmediateCopyOnlySingleTileDirectSubmissionCommandListWhenInitializeThenCreateSecondaryCmdBufferInSystemMemory) { + DebugManagerStateRestore restorer; + DebugManager.flags.DirectSubmissionFlatRingBuffer.set(-1); + + ze_command_queue_desc_t desc = {}; + desc.mode = ZE_COMMAND_QUEUE_MODE_ASYNCHRONOUS; + ze_result_t returnValue; + CommandStreamReceiver *csr = nullptr; + device->getCsrForOrdinalAndIndex(&csr, desc.ordinal, desc.index); + reinterpret_cast *>(csr)->directSubmissionAvailable = true; + std::unique_ptr commandList(CommandList::createImmediate(productFamily, device, &desc, false, NEO::EngineGroupType::Copy, returnValue)); + ASSERT_NE(nullptr, commandList); + + auto localMemSupported = device->getHwInfo().featureTable.flags.ftrLocalMemory; + EXPECT_EQ(reinterpret_cast(&commandList->commandContainer)->secondaryCommandStreamForImmediateCmdList.get() != nullptr, localMemSupported); + if (localMemSupported) { + EXPECT_TRUE(MemoryPoolHelper::isSystemMemoryPool(reinterpret_cast(&commandList->commandContainer)->secondaryCommandStreamForImmediateCmdList->getGraphicsAllocation()->getMemoryPool())); + } +} + +HWTEST2_F(CommandListCreate, givenSecondaryCommandStreamForImmediateCmdListWhenCheckAvailableSpaceThenSwapCommandStreams, IsAtLeastSkl) { + if (!device->getHwInfo().featureTable.flags.ftrLocalMemory) { + GTEST_SKIP(); + } + DebugManagerStateRestore restorer; + DebugManager.flags.DirectSubmissionFlatRingBuffer.set(-1); + + static_cast(device->getNEODevice()->getMemoryManager())->localMemorySupported[0] = true; + ze_command_queue_desc_t desc = {}; + desc.mode = ZE_COMMAND_QUEUE_MODE_ASYNCHRONOUS; + ze_result_t returnValue; + CommandStreamReceiver *csr = nullptr; + device->getCsrForOrdinalAndIndex(&csr, desc.ordinal, desc.index); + reinterpret_cast *>(csr)->directSubmissionAvailable = true; + std::unique_ptr commandList(CommandList::createImmediate(productFamily, device, &desc, false, NEO::EngineGroupType::Copy, returnValue)); + ASSERT_NE(nullptr, commandList); + EXPECT_NE(reinterpret_cast(&commandList->commandContainer)->secondaryCommandStreamForImmediateCmdList.get(), nullptr); + EXPECT_TRUE(MemoryPoolHelper::isSystemMemoryPool(reinterpret_cast(&commandList->commandContainer)->secondaryCommandStreamForImmediateCmdList->getGraphicsAllocation()->getMemoryPool())); + + auto immediateCmdList = static_cast *>(commandList.get()); + auto secondaryCmdStream = reinterpret_cast(&commandList->commandContainer)->secondaryCommandStreamForImmediateCmdList.get(); + + immediateCmdList->checkAvailableSpace(0u, false); + + EXPECT_EQ(commandList->commandContainer.getCommandStream(), secondaryCmdStream); + EXPECT_TRUE(MemoryPoolHelper::isSystemMemoryPool(commandList->commandContainer.getCommandStream()->getGraphicsAllocation()->getMemoryPool())); +} + +HWTEST2_F(CommandListCreate, givenNoSecondaryCommandStreamForImmediateCmdListWhenCheckAvailableSpaceThenNotSwapCommandStreams, IsAtLeastSkl) { + if (!device->getHwInfo().featureTable.flags.ftrLocalMemory) { + GTEST_SKIP(); + } + DebugManagerStateRestore restorer; + DebugManager.flags.DirectSubmissionFlatRingBuffer.set(-1); + + static_cast(device->getNEODevice()->getMemoryManager())->localMemorySupported[0] = true; + ze_command_queue_desc_t desc = {}; + desc.mode = ZE_COMMAND_QUEUE_MODE_ASYNCHRONOUS; + ze_result_t returnValue; + std::unique_ptr commandList(CommandList::createImmediate(productFamily, device, &desc, false, NEO::EngineGroupType::Copy, returnValue)); + ASSERT_NE(nullptr, commandList); + EXPECT_EQ(reinterpret_cast(&commandList->commandContainer)->secondaryCommandStreamForImmediateCmdList.get(), nullptr); + + auto immediateCmdList = static_cast *>(commandList.get()); + auto cmdStream = commandList->commandContainer.getCommandStream(); + + immediateCmdList->checkAvailableSpace(0u, false); + + EXPECT_EQ(commandList->commandContainer.getCommandStream(), cmdStream); + EXPECT_FALSE(MemoryPoolHelper::isSystemMemoryPool(commandList->commandContainer.getCommandStream()->getGraphicsAllocation()->getMemoryPool())); +} + +HWTEST_F(CommandListCreate, givenDirectSubmissionFlatRingBufferFlagDisabledImmediateCopyOnlySingleTileDirectSubmissionCommandListWhenInitializeThenNotCreateSecondaryCmdBufferInSystemMemory) { + DebugManagerStateRestore restorer; + DebugManager.flags.DirectSubmissionFlatRingBuffer.set(0u); + + ze_command_queue_desc_t desc = {}; + desc.mode = ZE_COMMAND_QUEUE_MODE_ASYNCHRONOUS; + ze_result_t returnValue; + CommandStreamReceiver *csr = nullptr; + device->getCsrForOrdinalAndIndex(&csr, desc.ordinal, desc.index); + reinterpret_cast *>(csr)->directSubmissionAvailable = true; + std::unique_ptr commandList(CommandList::createImmediate(productFamily, device, &desc, false, NEO::EngineGroupType::Copy, returnValue)); + ASSERT_NE(nullptr, commandList); + + EXPECT_EQ(reinterpret_cast(&commandList->commandContainer)->secondaryCommandStreamForImmediateCmdList.get(), nullptr); +} + +HWTEST_F(CommandListCreate, givenImmediateCopyOnlySingleTileCommandListWhenInitializeThenNotCreateSecondaryCmdBufferInSystemMemory) { + DebugManagerStateRestore restorer; + DebugManager.flags.DirectSubmissionFlatRingBuffer.set(-1); + ze_command_queue_desc_t desc = {}; + desc.mode = ZE_COMMAND_QUEUE_MODE_ASYNCHRONOUS; + ze_result_t returnValue; + std::unique_ptr commandList(CommandList::createImmediate(productFamily, device, &desc, false, NEO::EngineGroupType::Copy, returnValue)); + ASSERT_NE(nullptr, commandList); + + EXPECT_EQ(reinterpret_cast(&commandList->commandContainer)->secondaryCommandStreamForImmediateCmdList.get(), nullptr); +} + +using CommandListCreateImplicitScaling = Test>; +HWTEST_F(CommandListCreateImplicitScaling, givenImmediateCopyOnlyDirectSubmissionCommandListWhenInitializeThenNotCreateSecondaryCmdBufferInSystemMemory) { + DebugManagerStateRestore restorer; + DebugManager.flags.DirectSubmissionFlatRingBuffer.set(-1); + ze_command_queue_desc_t desc = {}; + desc.mode = ZE_COMMAND_QUEUE_MODE_ASYNCHRONOUS; + ze_result_t returnValue; + CommandStreamReceiver *csr = nullptr; + device->getCsrForOrdinalAndIndex(&csr, desc.ordinal, desc.index); + reinterpret_cast *>(csr)->directSubmissionAvailable = true; + std::unique_ptr commandList(CommandList::createImmediate(productFamily, device, &desc, false, NEO::EngineGroupType::Copy, returnValue)); + ASSERT_NE(nullptr, commandList); + + EXPECT_EQ(reinterpret_cast(&commandList->commandContainer)->secondaryCommandStreamForImmediateCmdList.get(), nullptr); +} + +HWTEST_F(CommandListCreate, givenCopyOnlySingleTileDirectSubmissionCommandListWhenInitializeThenNotCreateSecondaryCmdBufferInSystemMemory) { + DebugManagerStateRestore restorer; + DebugManager.flags.DirectSubmissionFlatRingBuffer.set(-1); + ze_result_t returnValue; + CommandStreamReceiver *csr = nullptr; + device->getCsrForOrdinalAndIndex(&csr, 0u, 0u); + reinterpret_cast *>(csr)->directSubmissionAvailable = true; + std::unique_ptr commandList(CommandList::create(productFamily, device, NEO::EngineGroupType::Copy, 0u, returnValue)); + ASSERT_NE(nullptr, commandList); + + EXPECT_EQ(reinterpret_cast(&commandList->commandContainer)->secondaryCommandStreamForImmediateCmdList.get(), nullptr); +} + HWTEST_F(CommandListCreate, givenAsyncCmdQueueAndCopyOnlyImmediateCommandListWhenAppendWaitEventsWithSubdeviceScopeThenMiFlushAndSemWaitAreAdded) { using SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT; diff --git a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_7.cpp b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_7.cpp index 82fa70bdb8..51b2c5862c 100644 --- a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_7.cpp +++ b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_7.cpp @@ -1894,12 +1894,12 @@ HWTEST2_F(CommandListCreate, givenImmediateCommandListWhenThereIsNoEnoughSpaceFo commandList->commandContainer.getCommandStream()->getGraphicsAllocation()->updateTaskCount(0u, 0u); commandList->commandContainer.getCommandStream()->getSpace(useSize); - reinterpret_cast *>(commandList.get())->checkAvailableSpace(0); + reinterpret_cast *>(commandList.get())->checkAvailableSpace(0, false); EXPECT_EQ(1U, commandList->commandContainer.getCmdBufferAllocations().size()); commandList->commandContainer.getCommandStream()->getSpace(useSize); auto latestFlushedTaskCount = commandList->csr->peekLatestFlushedTaskCount(); - reinterpret_cast *>(commandList.get())->checkAvailableSpace(0); + reinterpret_cast *>(commandList.get())->checkAvailableSpace(0, false); EXPECT_EQ(1U, commandList->commandContainer.getCmdBufferAllocations().size()); EXPECT_EQ(latestFlushedTaskCount + 1, commandList->csr->peekLatestFlushedTaskCount()); } @@ -1920,12 +1920,12 @@ HWTEST2_F(CommandListCreate, givenImmediateCommandListWhenThereIsNoEnoughSpaceFo commandList->commandContainer.getCommandStream()->getGraphicsAllocation()->updateTaskCount(0u, 0u); commandList->commandContainer.getCommandStream()->getSpace(useSize); - reinterpret_cast *>(commandList.get())->checkAvailableSpace(numEvents); + reinterpret_cast *>(commandList.get())->checkAvailableSpace(numEvents, false); EXPECT_EQ(1U, commandList->commandContainer.getCmdBufferAllocations().size()); commandList->commandContainer.getCommandStream()->getSpace(useSize); auto latestFlushedTaskCount = commandList->csr->peekLatestFlushedTaskCount(); - reinterpret_cast *>(commandList.get())->checkAvailableSpace(numEvents); + reinterpret_cast *>(commandList.get())->checkAvailableSpace(numEvents, false); EXPECT_EQ(1U, commandList->commandContainer.getCmdBufferAllocations().size()); EXPECT_EQ(latestFlushedTaskCount + 1, commandList->csr->peekLatestFlushedTaskCount()); } diff --git a/opencl/test/unit_test/source_level_debugger/source_level_debugger_tests.cpp b/opencl/test/unit_test/source_level_debugger/source_level_debugger_tests.cpp index df8084e3b2..e793e39f7d 100644 --- a/opencl/test/unit_test/source_level_debugger/source_level_debugger_tests.cpp +++ b/opencl/test/unit_test/source_level_debugger/source_level_debugger_tests.cpp @@ -704,7 +704,7 @@ TEST(SourceLevelDebugger, whenCaptureSBACalledThenNoCommandsAreAddedToStream) { MockSourceLevelDebugger debugger; CommandContainer container; - container.initialize(device.get(), nullptr, true); + container.initialize(device.get(), nullptr, true, false); NEO::Debugger::SbaAddresses sbaAddresses = {}; debugger.captureStateBaseAddress(*container.getCommandStream(), sbaAddresses, false); diff --git a/shared/source/command_container/cmdcontainer.cpp b/shared/source/command_container/cmdcontainer.cpp index c93d1dee48..048a747d06 100644 --- a/shared/source/command_container/cmdcontainer.cpp +++ b/shared/source/command_container/cmdcontainer.cpp @@ -65,7 +65,7 @@ CommandContainer::CommandContainer(uint32_t maxNumAggregatedIdds) : CommandConta numIddsPerBlock = maxNumAggregatedIdds; } -CommandContainer::ErrorCode CommandContainer::initialize(Device *device, AllocationsList *reusableAllocationList, bool requireHeaps) { +CommandContainer::ErrorCode CommandContainer::initialize(Device *device, AllocationsList *reusableAllocationList, bool requireHeaps, bool createSecondaryCmdBufferInHostMem) { this->device = device; this->reusableAllocationList = reusableAllocationList; size_t alignedSize = alignUp(this->getTotalCmdBufferSize(), MemoryConstants::pageSize64k); @@ -85,6 +85,19 @@ CommandContainer::ErrorCode CommandContainer::initialize(Device *device, Allocat commandStream->replaceGraphicsAllocation(cmdBufferAllocation); + if (createSecondaryCmdBufferInHostMem) { + this->useSecondaryCommandStream = true; + + auto cmdBufferAllocationHost = this->obtainNextCommandBufferAllocation(true); + if (!cmdBufferAllocationHost) { + return ErrorCode::OUT_OF_DEVICE_MEMORY; + } + secondaryCommandStreamForImmediateCmdList = std::make_unique(cmdBufferAllocationHost->getUnderlyingBuffer(), + alignedSize - cmdBufferReservedSize, this, gfxCoreHelper.getBatchBufferEndSize()); + secondaryCommandStreamForImmediateCmdList->replaceGraphicsAllocation(cmdBufferAllocationHost); + cmdBufferAllocations.push_back(cmdBufferAllocationHost); + } + if (!getFlushTaskUsedForImmediate()) { addToResidencyContainer(cmdBufferAllocation); } @@ -139,6 +152,14 @@ void CommandContainer::addToResidencyContainer(GraphicsAllocation *alloc) { this->residencyContainer.push_back(alloc); } +bool CommandContainer::swapStreams() { + if (this->useSecondaryCommandStream) { + this->commandStream.swap(this->secondaryCommandStreamForImmediateCmdList); + return true; + } + return false; +} + void CommandContainer::removeDuplicatesFromResidencyContainer() { std::sort(this->residencyContainer.begin(), this->residencyContainer.end()); this->residencyContainer.erase(std::unique(this->residencyContainer.begin(), this->residencyContainer.end()), this->residencyContainer.end()); @@ -275,14 +296,18 @@ void CommandContainer::handleCmdBufferAllocations(size_t startIndex) { } GraphicsAllocation *CommandContainer::obtainNextCommandBufferAllocation() { + return this->obtainNextCommandBufferAllocation(false); +} +GraphicsAllocation *CommandContainer::obtainNextCommandBufferAllocation(bool forceHostMemory) { + forceHostMemory &= this->useSecondaryCommandStream; GraphicsAllocation *cmdBufferAllocation = nullptr; if (this->reusableAllocationList) { size_t alignedSize = alignUp(this->getTotalCmdBufferSize(), MemoryConstants::pageSize64k); - cmdBufferAllocation = this->reusableAllocationList->detachAllocation(alignedSize, nullptr, nullptr, AllocationType::COMMAND_BUFFER).release(); + cmdBufferAllocation = this->reusableAllocationList->detachAllocation(alignedSize, nullptr, forceHostMemory, nullptr, AllocationType::COMMAND_BUFFER).release(); } if (!cmdBufferAllocation) { - cmdBufferAllocation = this->allocateCommandBuffer(); + cmdBufferAllocation = this->allocateCommandBuffer(forceHostMemory); } return cmdBufferAllocation; @@ -381,10 +406,15 @@ void CommandContainer::reserveSpaceForDispatch(HeapReserveArguments &sshReserveA } GraphicsAllocation *CommandContainer::reuseExistingCmdBuffer() { + return this->reuseExistingCmdBuffer(false); +} + +GraphicsAllocation *CommandContainer::reuseExistingCmdBuffer(bool forceHostMemory) { + forceHostMemory &= this->useSecondaryCommandStream; size_t alignedSize = alignUp(this->getTotalCmdBufferSize(), MemoryConstants::pageSize64k); - auto cmdBufferAllocation = this->immediateReusableAllocationList->detachAllocation(alignedSize, nullptr, this->immediateCmdListCsr, AllocationType::COMMAND_BUFFER).release(); + auto cmdBufferAllocation = this->immediateReusableAllocationList->detachAllocation(alignedSize, nullptr, forceHostMemory, this->immediateCmdListCsr, AllocationType::COMMAND_BUFFER).release(); if (!cmdBufferAllocation) { - this->reusableAllocationList->detachAllocation(alignedSize, nullptr, this->immediateCmdListCsr, AllocationType::COMMAND_BUFFER).release(); + this->reusableAllocationList->detachAllocation(alignedSize, nullptr, forceHostMemory, this->immediateCmdListCsr, AllocationType::COMMAND_BUFFER).release(); } if (cmdBufferAllocation) { @@ -409,6 +439,10 @@ void CommandContainer::setCmdBuffer(GraphicsAllocation *cmdBuffer) { } GraphicsAllocation *CommandContainer::allocateCommandBuffer() { + return this->allocateCommandBuffer(false); +} + +GraphicsAllocation *CommandContainer::allocateCommandBuffer(bool forceHostMemory) { size_t alignedSize = alignUp(this->getTotalCmdBufferSize(), MemoryConstants::pageSize64k); AllocationProperties properties{device->getRootDeviceIndex(), true /* allocateMemory*/, @@ -417,6 +451,7 @@ GraphicsAllocation *CommandContainer::allocateCommandBuffer() { (device->getNumGenericSubDevices() > 1u) /* multiOsContextCapable */, false, device->getDeviceBitfield()}; + properties.flags.forceSystemMemory = forceHostMemory && this->useSecondaryCommandStream; return device->getMemoryManager()->allocateGraphicsMemoryWithProperties(properties); } diff --git a/shared/source/command_container/cmdcontainer.h b/shared/source/command_container/cmdcontainer.h index 7ab11a3b59..cbf0f19597 100644 --- a/shared/source/command_container/cmdcontainer.h +++ b/shared/source/command_container/cmdcontainer.h @@ -97,7 +97,7 @@ class CommandContainer : public NonCopyableOrMovableClass { void *getHeapSpaceAllowGrow(HeapType heapType, size_t size); - ErrorCode initialize(Device *device, AllocationsList *reusableAllocationList, bool requireHeaps); + ErrorCode initialize(Device *device, AllocationsList *reusableAllocationList, bool requireHeaps, bool createSecondaryCmdBufferInHostMem); void prepareBindfulSsh(); @@ -111,6 +111,9 @@ class CommandContainer : public NonCopyableOrMovableClass { void handleCmdBufferAllocations(size_t startIndex); GraphicsAllocation *obtainNextCommandBufferAllocation(); + GraphicsAllocation *obtainNextCommandBufferAllocation(bool forceHostMemory); + + bool swapStreams(); void reset(); @@ -139,7 +142,9 @@ class CommandContainer : public NonCopyableOrMovableClass { void reserveSpaceForDispatch(HeapReserveArguments &sshReserveArg, HeapReserveArguments &dshReserveArg, bool getDsh); GraphicsAllocation *reuseExistingCmdBuffer(); + GraphicsAllocation *reuseExistingCmdBuffer(bool forceHostMemory); GraphicsAllocation *allocateCommandBuffer(); + GraphicsAllocation *allocateCommandBuffer(bool forceHostMemory); void setCmdBuffer(GraphicsAllocation *cmdBuffer); void addCurrentCommandBufferToReusableAllocationList(); @@ -177,6 +182,8 @@ class CommandContainer : public NonCopyableOrMovableClass { std::unique_ptr heapHelper; std::unique_ptr commandStream; + std::unique_ptr secondaryCommandStreamForImmediateCmdList; + bool useSecondaryCommandStream = false; uint64_t instructionHeapBaseAddress = 0u; uint64_t indirectObjectHeapBaseAddress = 0u; diff --git a/shared/source/debug_settings/debug_variables_base.inl b/shared/source/debug_settings/debug_variables_base.inl index 2616016421..c073175a1e 100644 --- a/shared/source/debug_settings/debug_variables_base.inl +++ b/shared/source/debug_settings/debug_variables_base.inl @@ -328,6 +328,7 @@ DECLARE_DEBUG_VARIABLE(int32_t, DirectSubmissionOverrideComputeSupport, -1, "Ove DECLARE_DEBUG_VARIABLE(int32_t, DirectSubmissionDisableCacheFlush, -1, "-1: driver default, 0: additional cache flush is present 1: disable dispatching cache flush commands") DECLARE_DEBUG_VARIABLE(int32_t, DirectSubmissionNewResourceTlbFlush, -1, "-1: driver default - flush when new resource is bound, 0: disabled, 1: enabled") DECLARE_DEBUG_VARIABLE(int32_t, DirectSubmissionDisableMonitorFence, -1, "Disable dispatching monitor fence commands") +DECLARE_DEBUG_VARIABLE(int32_t, DirectSubmissionFlatRingBuffer, 0, "-1: default, 0: disable, 1: enable, Copies task command buffer directly into ring, implemented for immediate command lists only") DECLARE_DEBUG_VARIABLE(int32_t, EnableDirectSubmissionController, -1, "Enable direct submission terminating after given timeout, -1: default, 0: disabled, 1: enabled") DECLARE_DEBUG_VARIABLE(int32_t, DirectSubmissionControllerTimeout, -1, "Set direct submission controller timeout, -1: default 5000 us, >=0: timeout in us") DECLARE_DEBUG_VARIABLE(int32_t, DirectSubmissionControllerDivisor, -1, "Set direct submission controller timeout divider, -1: default 1, >0: divider value") diff --git a/shared/source/direct_submission/direct_submission_hw.h b/shared/source/direct_submission/direct_submission_hw.h index 5078ef7e0c..5545fa6eee 100644 --- a/shared/source/direct_submission/direct_submission_hw.h +++ b/shared/source/direct_submission/direct_submission_hw.h @@ -113,6 +113,7 @@ class DirectSubmissionHw { virtual uint64_t updateTagValue() = 0; virtual void getTagAddressValue(TagData &tagData) = 0; void unblockGpu(); + bool copyCommandBufferIntoRing(BatchBuffer &batchBuffer); void cpuCachelineFlush(void *ptr, size_t size); diff --git a/shared/source/direct_submission/direct_submission_hw.inl b/shared/source/direct_submission/direct_submission_hw.inl index c7ac88f9a9..93c4bdde76 100644 --- a/shared/source/direct_submission/direct_submission_hw.inl +++ b/shared/source/direct_submission/direct_submission_hw.inl @@ -704,13 +704,22 @@ void *DirectSubmissionHw::dispatchWorkloadSection(BatchBu relaxedOrderingReturnPtrCmdStream.replaceBuffer(relaxedOrderingReturnPtrCmds, RelaxedOrderingHelper::getSizeReturnPtrRegs()); } - dispatchStartSection(commandStreamAddress); + auto copyCmdBuffer = this->copyCommandBufferIntoRing(batchBuffer); + + if (copyCmdBuffer) { + auto cmdStreamTaskPtr = ptrOffset(batchBuffer.stream->getCpuBase(), batchBuffer.startOffset); + auto sizeToCopy = ptrDiff(returnCmd, cmdStreamTaskPtr); + auto ringPtr = ringCommandStream.getSpace(sizeToCopy); + memcpy(ringPtr, cmdStreamTaskPtr, sizeToCopy); + } else { + dispatchStartSection(commandStreamAddress); + } uint64_t returnGpuPointer = ringCommandStream.getCurrentGpuAddressPosition(); if (this->relaxedOrderingEnabled && batchBuffer.hasRelaxedOrderingDependencies) { dispatchRelaxedOrderingReturnPtrRegs(relaxedOrderingReturnPtrCmdStream, returnGpuPointer); - } else { + } else if (!copyCmdBuffer) { setReturnAddress(returnCmd, returnGpuPointer); } } else if (workloadMode == 1) { @@ -880,6 +889,21 @@ void DirectSubmissionHw::dispatchTaskStoreSection(uint64_ memcpy_s(dst, RelaxedOrderingHelper::getSizeTaskStoreSection(), preinitializedTaskStoreSection.get(), RelaxedOrderingHelper::getSizeTaskStoreSection()); } +template +bool DirectSubmissionHw::copyCommandBufferIntoRing(BatchBuffer &batchBuffer) { + auto ret = this->osContext.getNumSupportedDevices() == 1u && + !batchBuffer.chainedBatchBuffer && + batchBuffer.commandBufferAllocation && + MemoryPoolHelper::isSystemMemoryPool(batchBuffer.commandBufferAllocation->getMemoryPool()) && + !batchBuffer.hasRelaxedOrderingDependencies; + + if (DebugManager.flags.DirectSubmissionFlatRingBuffer.get() != -1) { + ret &= !!DebugManager.flags.DirectSubmissionFlatRingBuffer.get(); + } + + return ret; +} + template bool DirectSubmissionHw::dispatchCommandBuffer(BatchBuffer &batchBuffer, FlushStampTracker &flushStamp) { // for now workloads requiring cache coherency are not supported @@ -894,6 +918,11 @@ bool DirectSubmissionHw::dispatchCommandBuffer(BatchBuffe bool relaxedOrderingSchedulerWillBeNeeded = (this->relaxedOrderingSchedulerRequired || batchBuffer.hasRelaxedOrderingDependencies); size_t dispatchSize = getSizeDispatch(relaxedOrderingSchedulerWillBeNeeded, batchBuffer.hasRelaxedOrderingDependencies); + + if (this->copyCommandBufferIntoRing(batchBuffer)) { + dispatchSize += (batchBuffer.stream->getUsed() - batchBuffer.startOffset) - 2 * getSizeStartSection(); + } + size_t cycleSize = getSizeSwitchRingBufferSection(); size_t requiredMinimalSize = dispatchSize + cycleSize + getSizeEnd(relaxedOrderingSchedulerWillBeNeeded); if (this->relaxedOrderingEnabled) { diff --git a/shared/source/memory_manager/allocations_list.cpp b/shared/source/memory_manager/allocations_list.cpp index 19c4d6ee8e..55b9d578ba 100644 --- a/shared/source/memory_manager/allocations_list.cpp +++ b/shared/source/memory_manager/allocations_list.cpp @@ -21,6 +21,7 @@ struct ReusableAllocationRequirements { uint32_t contextId; uint32_t activeTileCount; uint32_t tagOffset; + bool forceSystemMemoryFlag; }; bool checkTagAddressReady(ReusableAllocationRequirements *requirements, NEO::GraphicsAllocation *gfxAllocation) { @@ -42,6 +43,10 @@ AllocationsList::AllocationsList(AllocationUsage allocationUsage) : allocationUsage(allocationUsage) {} std::unique_ptr AllocationsList::detachAllocation(size_t requiredMinimalSize, const void *requiredPtr, CommandStreamReceiver *commandStreamReceiver, AllocationType allocationType) { + return this->detachAllocation(requiredMinimalSize, requiredPtr, false, commandStreamReceiver, allocationType); +} + +std::unique_ptr AllocationsList::detachAllocation(size_t requiredMinimalSize, const void *requiredPtr, bool forceSystemMemoryFlag, CommandStreamReceiver *commandStreamReceiver, AllocationType allocationType) { ReusableAllocationRequirements req; req.requiredMinimalSize = requiredMinimalSize; req.csrTagAddress = (commandStreamReceiver == nullptr) ? nullptr : commandStreamReceiver->getTagAddress(); @@ -50,6 +55,7 @@ std::unique_ptr AllocationsList::detachAllocation(size_t req req.requiredPtr = requiredPtr; req.activeTileCount = (commandStreamReceiver == nullptr) ? 1u : commandStreamReceiver->getActivePartitions(); req.tagOffset = (commandStreamReceiver == nullptr) ? 0u : commandStreamReceiver->getPostSyncWriteOffset(); + req.forceSystemMemoryFlag = forceSystemMemoryFlag; GraphicsAllocation *a = nullptr; GraphicsAllocation *retAlloc = processLocked(a, static_cast(&req)); return std::unique_ptr(retAlloc); @@ -60,7 +66,8 @@ GraphicsAllocation *AllocationsList::detachAllocationImpl(GraphicsAllocation *, auto *curr = head; while (curr != nullptr) { if ((req->allocationType == curr->getAllocationType()) && - (curr->getUnderlyingBufferSize() >= req->requiredMinimalSize)) { + (curr->getUnderlyingBufferSize() >= req->requiredMinimalSize) && + (curr->storageInfo.systemMemoryForced == req->forceSystemMemoryFlag)) { if (req->csrTagAddress == nullptr) { return removeOneImpl(curr, nullptr); } diff --git a/shared/source/memory_manager/allocations_list.h b/shared/source/memory_manager/allocations_list.h index 942b4745e5..911cdf12ca 100644 --- a/shared/source/memory_manager/allocations_list.h +++ b/shared/source/memory_manager/allocations_list.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2018-2022 Intel Corporation + * Copyright (C) 2018-2023 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -21,6 +21,7 @@ class AllocationsList : public IDList { AllocationsList(AllocationUsage allocationUsage); std::unique_ptr detachAllocation(size_t requiredMinimalSize, const void *requiredPtr, CommandStreamReceiver *commandStreamReceiver, AllocationType allocationType); + std::unique_ptr detachAllocation(size_t requiredMinimalSize, const void *requiredPtr, bool forceSystemMemoryFlag, CommandStreamReceiver *commandStreamReceiver, AllocationType allocationType); void freeAllGraphicsAllocations(Device *neoDevice); private: diff --git a/shared/source/memory_manager/definitions/storage_info.h b/shared/source/memory_manager/definitions/storage_info.h index 42e4eae3c3..e551f9682e 100644 --- a/shared/source/memory_manager/definitions/storage_info.h +++ b/shared/source/memory_manager/definitions/storage_info.h @@ -31,6 +31,7 @@ struct StorageInfo { bool isLockable = false; bool localOnlyRequired = false; bool systemMemoryPlacement = true; + bool systemMemoryForced = false; char resourceTag[AppResourceDefines::maxStrLen + 1] = ""; uint32_t getMemoryBanks() const { return static_cast(memoryBanks.to_ulong()); } uint32_t getTotalBanksCnt() const; diff --git a/shared/source/memory_manager/memory_manager.cpp b/shared/source/memory_manager/memory_manager.cpp index d7f2c0acbf..a149ebdb69 100644 --- a/shared/source/memory_manager/memory_manager.cpp +++ b/shared/source/memory_manager/memory_manager.cpp @@ -530,6 +530,7 @@ bool MemoryManager::getAllocationData(AllocationData &allocationData, const Allo allocationData.flags.isUSMHostAllocation = properties.flags.isUSMHostAllocation; allocationData.storageInfo.systemMemoryPlacement = allocationData.flags.useSystemMemory; + allocationData.storageInfo.systemMemoryForced = properties.flags.forceSystemMemory; return true; } diff --git a/shared/source/os_interface/linux/drm_memory_manager.cpp b/shared/source/os_interface/linux/drm_memory_manager.cpp index 658fa76685..bbd255c1bd 100644 --- a/shared/source/os_interface/linux/drm_memory_manager.cpp +++ b/shared/source/os_interface/linux/drm_memory_manager.cpp @@ -2003,6 +2003,7 @@ DrmAllocation *DrmMemoryManager::createAllocWithAlignment(const AllocationData & bo.release(); allocation->isShareableHostMemory = true; + allocation->storageInfo = allocationData.storageInfo; return allocation.release(); } else { return createAllocWithAlignmentFromUserptr(allocationData, size, alignment, alignedSize, gpuAddress); diff --git a/shared/test/common/mocks/mock_direct_submission_hw.h b/shared/test/common/mocks/mock_direct_submission_hw.h index 3a88144d0f..7b4223bff6 100644 --- a/shared/test/common/mocks/mock_direct_submission_hw.h +++ b/shared/test/common/mocks/mock_direct_submission_hw.h @@ -18,6 +18,7 @@ struct MockDirectSubmissionHw : public DirectSubmissionHw using BaseClass::activeTiles; using BaseClass::allocateResources; using BaseClass::completionFenceAllocation; + using BaseClass::copyCommandBufferIntoRing; using BaseClass::cpuCachelineFlush; using BaseClass::currentQueueWorkCount; using BaseClass::currentRelaxedOrderingQueueSize; diff --git a/shared/test/common/test_files/igdrcl.config b/shared/test/common/test_files/igdrcl.config index 9886160aa0..77bc6391de 100644 --- a/shared/test/common/test_files/igdrcl.config +++ b/shared/test/common/test_files/igdrcl.config @@ -451,6 +451,7 @@ EnableRingSwitchTagUpdateWa = -1 PlaformSupportEvictIfNecessaryFlag = -1 DirectSubmissionReadBackCommandBuffer = -1 DirectSubmissionReadBackRingBuffer = -1 +DirectSubmissionFlatRingBuffer = 0 ReadBackCommandBufferAllocation = -1 PrintImageBlitBlockCopyCmdDetails = 0 LogGdiCalls = 0 diff --git a/shared/test/unit_test/command_container/command_container_tests.cpp b/shared/test/unit_test/command_container/command_container_tests.cpp index 69314a22aa..9abe43dde0 100644 --- a/shared/test/unit_test/command_container/command_container_tests.cpp +++ b/shared/test/unit_test/command_container/command_container_tests.cpp @@ -34,6 +34,7 @@ class MyMockCommandContainer : public CommandContainer { using CommandContainer::dirtyHeaps; using CommandContainer::getTotalCmdBufferSize; using CommandContainer::immediateReusableAllocationList; + using CommandContainer::secondaryCommandStreamForImmediateCmdList; }; struct CommandContainerHeapStateTests : public ::testing::Test { @@ -87,7 +88,7 @@ TEST_F(CommandContainerHeapStateTests, givenDirtyHeapsWhenSettingStateForSingleH TEST_F(CommandContainerTest, givenCmdContainerWhenCreatingCommandBufferThenCorrectAllocationTypeIsSet) { CommandContainer cmdContainer; - cmdContainer.initialize(pDevice, nullptr, true); + cmdContainer.initialize(pDevice, nullptr, true, false); ASSERT_NE(0u, cmdContainer.getCmdBufferAllocations().size()); EXPECT_EQ(AllocationType::COMMAND_BUFFER, cmdContainer.getCmdBufferAllocations()[0]->getAllocationType()); @@ -98,9 +99,37 @@ TEST_F(CommandContainerTest, givenCmdContainerWhenCreatingCommandBufferThenCorre EXPECT_EQ(AllocationType::COMMAND_BUFFER, cmdContainer.getCmdBufferAllocations()[1]->getAllocationType()); } +TEST_F(CommandContainerTest, givenCreateSecondaryCmdBufferInHostMemWhenInitializeThenCreateAdditionalLinearStream) { + MyMockCommandContainer cmdContainer; + cmdContainer.initialize(pDevice, nullptr, true, true); + + EXPECT_NE(cmdContainer.secondaryCommandStreamForImmediateCmdList.get(), nullptr); + + auto secondaryCmdStream = cmdContainer.secondaryCommandStreamForImmediateCmdList.get(); + auto cmdStream = cmdContainer.getCommandStream(); + + EXPECT_TRUE(cmdContainer.swapStreams()); + + EXPECT_EQ(cmdContainer.getCommandStream(), secondaryCmdStream); + EXPECT_EQ(cmdContainer.secondaryCommandStreamForImmediateCmdList.get(), cmdStream); +} + +TEST_F(CommandContainerTest, whenInitializeThenNotCreateAdditionalLinearStream) { + MyMockCommandContainer cmdContainer; + cmdContainer.initialize(pDevice, nullptr, true, false); + + EXPECT_EQ(cmdContainer.secondaryCommandStreamForImmediateCmdList.get(), nullptr); + + auto cmdStream = cmdContainer.getCommandStream(); + + EXPECT_FALSE(cmdContainer.swapStreams()); + + EXPECT_EQ(cmdContainer.getCommandStream(), cmdStream); +} + TEST_F(CommandContainerTest, givenCmdContainerWhenAllocatingHeapsThenSetCorrectAllocationTypes) { CommandContainer cmdContainer; - cmdContainer.initialize(pDevice, nullptr, true); + cmdContainer.initialize(pDevice, nullptr, true, false); for (uint32_t i = 0; i < HeapType::NUM_TYPES; i++) { HeapType heapType = static_cast(i); @@ -121,7 +150,7 @@ TEST_F(CommandContainerTest, givenCmdContainerWhenAllocatingHeapsThenSetCorrectA TEST_F(CommandContainerTest, givenCommandContainerWhenInitializeThenEverythingIsInitialized) { CommandContainer cmdContainer; - auto status = cmdContainer.initialize(pDevice, nullptr, true); + auto status = cmdContainer.initialize(pDevice, nullptr, true, false); EXPECT_EQ(CommandContainer::ErrorCode::SUCCESS, status); EXPECT_EQ(pDevice, cmdContainer.getDevice()); @@ -151,7 +180,7 @@ TEST_F(CommandContainerTest, givenCommandContainerWhenInitializeThenEverythingIs TEST_F(CommandContainerTest, givenCommandContainerWhenHeapNotRequiredThenHeapIsNotInitialized) { CommandContainer cmdContainer; - auto status = cmdContainer.initialize(pDevice, nullptr, false); + auto status = cmdContainer.initialize(pDevice, nullptr, false, false); EXPECT_EQ(CommandContainer::ErrorCode::SUCCESS, status); EXPECT_EQ(pDevice, cmdContainer.getDevice()); @@ -188,7 +217,7 @@ TEST_F(CommandContainerTest, givenEnabledLocalMemoryAndIsaInSystemMemoryWhenCmdC auto instructionHeapBaseAddress = device->getMemoryManager()->getInternalHeapBaseAddress(0, false); CommandContainer cmdContainer; - auto status = cmdContainer.initialize(device.get(), nullptr, true); + auto status = cmdContainer.initialize(device.get(), nullptr, true, false); EXPECT_EQ(CommandContainer::ErrorCode::SUCCESS, status); EXPECT_EQ(instructionHeapBaseAddress, cmdContainer.getInstructionHeapBaseAddress()); @@ -207,7 +236,7 @@ TEST_F(CommandContainerTest, givenForceDefaultHeapSizeWhenCmdContainerIsInitiali auto device = std::unique_ptr(Device::create(executionEnvironment, 0u)); CommandContainer cmdContainer; - auto status = cmdContainer.initialize(device.get(), nullptr, true); + auto status = cmdContainer.initialize(device.get(), nullptr, true, false); EXPECT_EQ(CommandContainer::ErrorCode::SUCCESS, status); auto indirectHeap = cmdContainer.getIndirectHeap(IndirectHeap::Type::INDIRECT_OBJECT); @@ -217,14 +246,21 @@ TEST_F(CommandContainerTest, givenForceDefaultHeapSizeWhenCmdContainerIsInitiali TEST_F(CommandContainerTest, givenCommandContainerDuringInitWhenAllocateGfxMemoryFailsThenErrorIsReturned) { CommandContainer cmdContainer; pDevice->executionEnvironment->memoryManager.reset(new FailMemoryManager(0, *pDevice->executionEnvironment)); - auto status = cmdContainer.initialize(pDevice, nullptr, true); + auto status = cmdContainer.initialize(pDevice, nullptr, true, false); + EXPECT_EQ(CommandContainer::ErrorCode::OUT_OF_DEVICE_MEMORY, status); +} + +TEST_F(CommandContainerTest, givenCreateSecondaryCmdBufferInHostMemWhenAllocateSecondaryCmdStreamFailsDuringInitializeThenErrorIsReturned) { + CommandContainer cmdContainer; + static_cast(pDevice->getMemoryManager())->maxSuccessAllocatedGraphicsMemoryIndex = 7; + auto status = cmdContainer.initialize(pDevice, nullptr, true, true); EXPECT_EQ(CommandContainer::ErrorCode::OUT_OF_DEVICE_MEMORY, status); } TEST_F(CommandContainerTest, givenCmdContainerWithAllocsListWhenAllocateAndResetThenCmdBufferAllocIsReused) { AllocationsList allocList; auto cmdContainer = std::make_unique(); - cmdContainer->initialize(pDevice, &allocList, true); + cmdContainer->initialize(pDevice, &allocList, true, false); auto &cmdBufferAllocs = cmdContainer->getCmdBufferAllocations(); auto memoryManager = static_cast(pDevice->getMemoryManager()); EXPECT_EQ(memoryManager->handleFenceCompletionCalled, 0u); @@ -261,7 +297,7 @@ TEST_F(CommandContainerTest, givenReusableAllocationsAndRemoveUserFenceInCmdlist AllocationsList allocList; auto cmdContainer = std::make_unique(); - cmdContainer->initialize(pDevice, &allocList, true); + cmdContainer->initialize(pDevice, &allocList, true, false); auto &cmdBufferAllocs = cmdContainer->getCmdBufferAllocations(); auto memoryManager = static_cast(pDevice->getMemoryManager()); EXPECT_EQ(0u, memoryManager->handleFenceCompletionCalled); @@ -283,7 +319,7 @@ TEST_F(CommandContainerTest, givenCommandContainerDuringInitWhenAllocateHeapMemo CommandContainer cmdContainer; auto tempMemoryManager = pDevice->executionEnvironment->memoryManager.release(); pDevice->executionEnvironment->memoryManager.reset(new FailMemoryManager(1, *pDevice->executionEnvironment)); - auto status = cmdContainer.initialize(pDevice, nullptr, true); + auto status = cmdContainer.initialize(pDevice, nullptr, true, false); EXPECT_EQ(CommandContainer::ErrorCode::OUT_OF_DEVICE_MEMORY, status); delete tempMemoryManager; } @@ -298,10 +334,10 @@ TEST_F(CommandContainerTest, givenCommandContainerWhenSettingIndirectHeapAllocat TEST_F(CommandContainerTest, givenHeapAllocationsWhenDestroyCommandContainerThenHeapAllocationsAreReused) { std::unique_ptr cmdContainer(new CommandContainer); - cmdContainer->initialize(pDevice, nullptr, true); + cmdContainer->initialize(pDevice, nullptr, true, false); auto heapAllocationsAddress = cmdContainer->getIndirectHeapAllocation(HeapType::SURFACE_STATE)->getUnderlyingBuffer(); cmdContainer.reset(new CommandContainer); - cmdContainer->initialize(pDevice, nullptr, true); + cmdContainer->initialize(pDevice, nullptr, true, false); bool status = true; for (uint32_t i = 0; i < HeapType::NUM_TYPES && !status; i++) { auto heapType = static_cast(i); @@ -316,7 +352,7 @@ TEST_F(CommandContainerTest, givenHeapAllocationsWhenDestroyCommandContainerThen TEST_F(CommandContainerTest, givenCommandContainerWhenResetThenStateIsReset) { CommandContainer cmdContainer; - cmdContainer.initialize(pDevice, nullptr, true); + cmdContainer.initialize(pDevice, nullptr, true, false); LinearStream stream; uint32_t usedSize = 1; cmdContainer.getCommandStream()->getSpace(usedSize); @@ -330,7 +366,7 @@ TEST_F(CommandContainerTest, givenCommandContainerWhenResetThenStateIsReset) { TEST_F(CommandContainerTest, givenCommandContainerWhenWantToAddNullPtrToResidencyContainerThenNothingIsAdded) { CommandContainer cmdContainer; - cmdContainer.initialize(pDevice, nullptr, true); + cmdContainer.initialize(pDevice, nullptr, true, false); auto size = cmdContainer.getResidencyContainer().size(); cmdContainer.addToResidencyContainer(nullptr); EXPECT_EQ(cmdContainer.getResidencyContainer().size(), size); @@ -338,7 +374,7 @@ TEST_F(CommandContainerTest, givenCommandContainerWhenWantToAddNullPtrToResidenc TEST_F(CommandContainerTest, givenCommandContainerWhenWantToAddAlreadyAddedAllocationAndDuplicatesRemovedThenExpectedSizeIsReturned) { CommandContainer cmdContainer; - cmdContainer.initialize(pDevice, nullptr, true); + cmdContainer.initialize(pDevice, nullptr, true, false); MockGraphicsAllocation mockAllocation; auto sizeBefore = cmdContainer.getResidencyContainer().size(); @@ -363,7 +399,7 @@ HWTEST_F(CommandContainerTest, givenCmdContainerWhenInitializeCalledThenSSHHeapH using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE; std::unique_ptr cmdContainer(new CommandContainer); cmdContainer->setReservedSshSize(4 * MemoryConstants::pageSize); - cmdContainer->initialize(pDevice, nullptr, true); + cmdContainer->initialize(pDevice, nullptr, true, false); cmdContainer->setDirtyStateForAllHeaps(false); auto heap = cmdContainer->getIndirectHeap(HeapType::SURFACE_STATE); @@ -376,7 +412,7 @@ HWTEST_F(CommandContainerTest, givenNotEnoughSpaceInSSHWhenGettingHeapWithRequir using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE; std::unique_ptr cmdContainer(new CommandContainer); cmdContainer->setReservedSshSize(4 * MemoryConstants::pageSize); - cmdContainer->initialize(pDevice, nullptr, true); + cmdContainer->initialize(pDevice, nullptr, true, false); cmdContainer->setDirtyStateForAllHeaps(false); auto heap = cmdContainer->getIndirectHeap(HeapType::SURFACE_STATE); @@ -391,7 +427,7 @@ HWTEST_F(CommandContainerTest, givenNotEnoughSpaceInSSHWhenGettingHeapWithRequir TEST_F(CommandContainerTest, givenAvailableSpaceWhenGetHeapWithRequiredSizeAndAlignmentCalledThenExistingAllocationIsReturned) { std::unique_ptr cmdContainer(new CommandContainer); - cmdContainer->initialize(pDevice, nullptr, true); + cmdContainer->initialize(pDevice, nullptr, true, false); cmdContainer->setDirtyStateForAllHeaps(false); HeapType heapTypes[] = {HeapType::SURFACE_STATE, HeapType::DYNAMIC_STATE}; @@ -426,7 +462,7 @@ TEST_F(CommandContainerTest, givenAvailableSpaceWhenGetHeapWithRequiredSizeAndAl TEST_F(CommandContainerTest, givenUnalignedAvailableSpaceWhenGetHeapWithRequiredSizeAndAlignmentCalledThenHeapReturnedIsCorrectlyAligned) { std::unique_ptr cmdContainer(new CommandContainer); - cmdContainer->initialize(pDevice, nullptr, true); + cmdContainer->initialize(pDevice, nullptr, true, false); cmdContainer->setDirtyStateForAllHeaps(false); auto heapAllocation = cmdContainer->getIndirectHeapAllocation(HeapType::SURFACE_STATE); auto heap = cmdContainer->getIndirectHeap(HeapType::SURFACE_STATE); @@ -450,7 +486,7 @@ TEST_F(CommandContainerTest, givenUnalignedAvailableSpaceWhenGetHeapWithRequired TEST_F(CommandContainerTest, givenNoAlignmentAndAvailableSpaceWhenGetHeapWithRequiredSizeAndAlignmentCalledThenHeapReturnedIsNotAligned) { std::unique_ptr cmdContainer(new CommandContainer); - cmdContainer->initialize(pDevice, nullptr, true); + cmdContainer->initialize(pDevice, nullptr, true, false); cmdContainer->setDirtyStateForAllHeaps(false); auto heapAllocation = cmdContainer->getIndirectHeapAllocation(HeapType::SURFACE_STATE); auto heap = cmdContainer->getIndirectHeap(HeapType::SURFACE_STATE); @@ -474,7 +510,7 @@ TEST_F(CommandContainerTest, givenNoAlignmentAndAvailableSpaceWhenGetHeapWithReq TEST_F(CommandContainerTest, givenNotEnoughSpaceWhenGetHeapWithRequiredSizeAndAlignmentCalledThenNewAllocationIsReturned) { std::unique_ptr cmdContainer(new CommandContainer); - cmdContainer->initialize(pDevice, nullptr, true); + cmdContainer->initialize(pDevice, nullptr, true, false); cmdContainer->setDirtyStateForAllHeaps(false); HeapType heapTypes[] = {HeapType::SURFACE_STATE, HeapType::DYNAMIC_STATE}; @@ -512,7 +548,7 @@ TEST_F(CommandContainerTest, givenNotEnoughSpaceWhenGetHeapWithRequiredSizeAndAl TEST_F(CommandContainerTest, givenNotEnoughSpaceWhenCreatedAlocationHaveDifferentBaseThenHeapIsDirty) { std::unique_ptr cmdContainer(new CommandContainer); - cmdContainer->initialize(pDevice, nullptr, true); + cmdContainer->initialize(pDevice, nullptr, true, false); cmdContainer->setDirtyStateForAllHeaps(false); HeapType type = HeapType::INDIRECT_OBJECT; @@ -544,7 +580,7 @@ TEST_F(CommandContainerTest, givenNotEnoughSpaceWhenCreatedAlocationHaveDifferen TEST_F(CommandContainerTest, whenAllocateNextCmdBufferIsCalledThenNewAllocationIsCreatedAndCommandStreamReplaced) { std::unique_ptr cmdContainer(new CommandContainer); - cmdContainer->initialize(pDevice, nullptr, true); + cmdContainer->initialize(pDevice, nullptr, true, false); auto stream = cmdContainer->getCommandStream(); ASSERT_NE(nullptr, stream); @@ -572,7 +608,7 @@ TEST_F(CommandContainerTest, whenAllocateNextCmdBufferIsCalledThenNewAllocationI TEST_F(CommandContainerTest, whenResettingCommandContainerThenStoredCmdBuffersAreFreedAndStreamIsReplacedWithInitialBuffer) { std::unique_ptr cmdContainer(new CommandContainer); - cmdContainer->initialize(pDevice, nullptr, true); + cmdContainer->initialize(pDevice, nullptr, true, false); cmdContainer->allocateNextCommandBuffer(); cmdContainer->allocateNextCommandBuffer(); @@ -618,7 +654,7 @@ TEST_P(CommandContainerHeaps, givenCommandContainerWhenGetAllowHeapGrowCalledThe CommandContainer cmdContainer; - cmdContainer.initialize(pDevice, nullptr, true); + cmdContainer.initialize(pDevice, nullptr, true, false); if (!pDevice->getHardwareInfo().capabilityTable.supportsImages && HeapType::DYNAMIC_STATE == heapType) { EXPECT_EQ(cmdContainer.getIndirectHeap(heapType), nullptr); } else { @@ -636,7 +672,7 @@ TEST_P(CommandContainerHeaps, givenCommandContainerWhenGetingMoreThanAvailableSi HeapType heapType = GetParam(); CommandContainer cmdContainer; - cmdContainer.initialize(pDevice, nullptr, true); + cmdContainer.initialize(pDevice, nullptr, true, false); cmdContainer.setDirtyStateForAllHeaps(false); auto heap = cmdContainer.getIndirectHeap(heapType); if (!pDevice->getHardwareInfo().capabilityTable.supportsImages && HeapType::DYNAMIC_STATE == heapType) { @@ -671,10 +707,10 @@ TEST_P(CommandContainerHeaps, givenCommandContainerForDifferentRootDevicesThenHe auto device1 = std::unique_ptr(Device::create(executionEnvironment, 1u)); CommandContainer cmdContainer0; - cmdContainer0.initialize(device0.get(), nullptr, true); + cmdContainer0.initialize(device0.get(), nullptr, true, false); CommandContainer cmdContainer1; - cmdContainer1.initialize(device1.get(), nullptr, true); + cmdContainer1.initialize(device1.get(), nullptr, true, false); if (!pDevice->getHardwareInfo().capabilityTable.supportsImages && HeapType::DYNAMIC_STATE == heapType) { EXPECT_EQ(cmdContainer0.getIndirectHeap(heapType), nullptr); EXPECT_EQ(cmdContainer1.getIndirectHeap(heapType), nullptr); @@ -701,13 +737,13 @@ TEST_F(CommandContainerHeaps, givenCommandContainerForDifferentRootDevicesThenCm auto device1 = std::unique_ptr(Device::create(executionEnvironment, 1u)); CommandContainer cmdContainer0; - cmdContainer0.initialize(device0.get(), nullptr, true); + cmdContainer0.initialize(device0.get(), nullptr, true, false); EXPECT_EQ(1u, cmdContainer0.getCmdBufferAllocations().size()); uint32_t cmdBufferAllocationIndex0 = cmdContainer0.getCmdBufferAllocations().front()->getRootDeviceIndex(); EXPECT_EQ(device0->getRootDeviceIndex(), cmdBufferAllocationIndex0); CommandContainer cmdContainer1; - cmdContainer1.initialize(device1.get(), nullptr, true); + cmdContainer1.initialize(device1.get(), nullptr, true, false); EXPECT_EQ(1u, cmdContainer1.getCmdBufferAllocations().size()); uint32_t cmdBufferAllocationIndex1 = cmdContainer1.getCmdBufferAllocations().front()->getRootDeviceIndex(); EXPECT_EQ(device1->getRootDeviceIndex(), cmdBufferAllocationIndex1); @@ -729,13 +765,13 @@ TEST_F(CommandContainerHeaps, givenCommandContainerForDifferentRootDevicesThenIn auto &gfxCoreHelper1 = device1->getGfxCoreHelper(); CommandContainer cmdContainer0; - cmdContainer0.initialize(device0.get(), nullptr, true); + cmdContainer0.initialize(device0.get(), nullptr, true, false); bool useLocalMemory0 = !gfxCoreHelper0.useSystemMemoryPlacementForISA(device0->getHardwareInfo()); uint64_t baseAddressHeapDevice0 = device0->getMemoryManager()->getInternalHeapBaseAddress(device0->getRootDeviceIndex(), useLocalMemory0); EXPECT_EQ(cmdContainer0.getInstructionHeapBaseAddress(), baseAddressHeapDevice0); CommandContainer cmdContainer1; - cmdContainer1.initialize(device1.get(), nullptr, true); + cmdContainer1.initialize(device1.get(), nullptr, true, false); bool useLocalMemory1 = !gfxCoreHelper1.useSystemMemoryPlacementForISA(device0->getHardwareInfo()); uint64_t baseAddressHeapDevice1 = device1->getMemoryManager()->getInternalHeapBaseAddress(device1->getRootDeviceIndex(), useLocalMemory1); EXPECT_EQ(cmdContainer1.getInstructionHeapBaseAddress(), baseAddressHeapDevice1); @@ -746,7 +782,7 @@ TEST_F(CommandContainerTest, givenCommandContainerWhenDestructionThenNonHeapAllo MockGraphicsAllocation alloc; size_t size = 0x1000; alloc.setSize(size); - cmdContainer->initialize(pDevice, nullptr, true); + cmdContainer->initialize(pDevice, nullptr, true, false); cmdContainer->getDeallocationContainer().push_back(&alloc); cmdContainer.reset(); EXPECT_EQ(alloc.getUnderlyingBufferSize(), size); @@ -754,7 +790,7 @@ TEST_F(CommandContainerTest, givenCommandContainerWhenDestructionThenNonHeapAllo TEST_F(CommandContainerTest, givenContainerAllocatesNextCommandBufferWhenResetingContainerThenExpectFirstCommandBufferAllocationIsReused) { auto cmdContainer = std::make_unique(); - cmdContainer->initialize(pDevice, nullptr, true); + cmdContainer->initialize(pDevice, nullptr, true, false); auto stream = cmdContainer->getCommandStream(); ASSERT_NE(nullptr, stream); @@ -799,14 +835,14 @@ class MyLinearStreamMock : public LinearStream { TEST_F(CommandContainerTest, givenCmdContainerWhenContainerIsInitializedThenStreamContainsContainerPtr) { CommandContainer cmdContainer; - cmdContainer.initialize(pDevice, nullptr, true); + cmdContainer.initialize(pDevice, nullptr, true, false); EXPECT_EQ(reinterpret_cast(cmdContainer.getCommandStream())->cmdContainer, &cmdContainer); } TEST_F(CommandContainerTest, givenCmdContainerWhenContainerIsInitializedThenStreamSizeEqualAlignedTotalCmdBuffSizeDecreasedOfReservedSize) { CommandContainer cmdContainer; - cmdContainer.initialize(pDevice, nullptr, true); + cmdContainer.initialize(pDevice, nullptr, true, false); size_t alignedSize = alignUp(CommandContainer::totalCmdBufferSize, MemoryConstants::pageSize64k); EXPECT_EQ(cmdContainer.getCommandStream()->getMaxAvailableSpace(), alignedSize - CommandContainer::cmdBufferReservedSize); } @@ -816,21 +852,21 @@ TEST_F(CommandContainerTest, GivenCmdContainerAndDebugFlagWhenContainerIsInitial DebugManager.flags.OverrideCmdListCmdBufferSizeInKb.set(0); MyMockCommandContainer cmdContainer; - cmdContainer.initialize(pDevice, nullptr, true); + cmdContainer.initialize(pDevice, nullptr, true, false); size_t alignedSize = alignUp(cmdContainer.getTotalCmdBufferSize(), MemoryConstants::pageSize64k); EXPECT_EQ(cmdContainer.getCommandStream()->getMaxAvailableSpace(), alignedSize - MyMockCommandContainer::cmdBufferReservedSize); auto newSizeInKB = 512; DebugManager.flags.OverrideCmdListCmdBufferSizeInKb.set(newSizeInKB); MyMockCommandContainer cmdContainer2; - cmdContainer2.initialize(pDevice, nullptr, true); + cmdContainer2.initialize(pDevice, nullptr, true, false); alignedSize = alignUp(cmdContainer.getTotalCmdBufferSize(), MemoryConstants::pageSize64k); EXPECT_EQ(cmdContainer2.getCommandStream()->getMaxAvailableSpace(), alignedSize - MyMockCommandContainer::cmdBufferReservedSize); } TEST_F(CommandContainerTest, givenCmdContainerWhenAlocatingNextCmdBufferThenStreamSizeEqualAlignedTotalCmdBuffSizeDecreasedOfReservedSize) { CommandContainer cmdContainer; - cmdContainer.initialize(pDevice, nullptr, true); + cmdContainer.initialize(pDevice, nullptr, true, false); cmdContainer.allocateNextCommandBuffer(); size_t alignedSize = alignUp(CommandContainer::totalCmdBufferSize, MemoryConstants::pageSize64k); EXPECT_EQ(cmdContainer.getCommandStream()->getMaxAvailableSpace(), alignedSize - CommandContainer::cmdBufferReservedSize); @@ -838,7 +874,7 @@ TEST_F(CommandContainerTest, givenCmdContainerWhenAlocatingNextCmdBufferThenStre TEST_F(CommandContainerTest, givenCmdContainerWhenCloseAndAllocateNextCommandBufferCalledThenBBEndPlacedAtEndOfLinearStream) { CommandContainer cmdContainer; - cmdContainer.initialize(pDevice, nullptr, true); + cmdContainer.initialize(pDevice, nullptr, true, false); auto &gfxCoreHelper = pDevice->getGfxCoreHelper(); auto ptr = cmdContainer.getCommandStream()->getSpace(0u); cmdContainer.closeAndAllocateNextCommandBuffer(); @@ -847,7 +883,7 @@ TEST_F(CommandContainerTest, givenCmdContainerWhenCloseAndAllocateNextCommandBuf TEST_F(CommandContainerTest, givenCmdContainerWhenCloseAndAllocateNextCommandBufferCalledThenNewCmdBufferAllocationCreated) { CommandContainer cmdContainer; - cmdContainer.initialize(pDevice, nullptr, true); + cmdContainer.initialize(pDevice, nullptr, true, false); EXPECT_EQ(cmdContainer.getCmdBufferAllocations().size(), 1u); cmdContainer.closeAndAllocateNextCommandBuffer(); EXPECT_EQ(cmdContainer.getCmdBufferAllocations().size(), 2u); @@ -855,7 +891,7 @@ TEST_F(CommandContainerTest, givenCmdContainerWhenCloseAndAllocateNextCommandBuf TEST_F(CommandContainerTest, givenCmdContainerWhenSetCmdBufferThenCmdBufferSetCorrectly) { CommandContainer cmdContainer; - cmdContainer.initialize(pDevice, nullptr, true); + cmdContainer.initialize(pDevice, nullptr, true, false); AllocationProperties properties{pDevice->getRootDeviceIndex(), true /* allocateMemory*/, @@ -874,7 +910,7 @@ TEST_F(CommandContainerTest, givenCmdContainerWhenSetCmdBufferThenCmdBufferSetCo TEST_F(CommandContainerTest, givenCmdContainerWhenReuseExistingCmdBufferWithoutAnyAllocationInListThenReturnNullptr) { auto cmdContainer = std::make_unique(); AllocationsList allocList; - cmdContainer->initialize(pDevice, &allocList, false); + cmdContainer->initialize(pDevice, &allocList, false, false); auto csr = pDevice->getDefaultEngine().commandStreamReceiver; cmdContainer->setImmediateCmdListCsr(csr); cmdContainer->immediateReusableAllocationList = std::make_unique(); @@ -891,7 +927,7 @@ HWTEST_F(CommandContainerTest, givenCmdContainerWhenReuseExistingCmdBufferWithAl *csr.tagAddress = 0u; AllocationsList allocList; - cmdContainer->initialize(pDevice, &allocList, false); + cmdContainer->initialize(pDevice, &allocList, false, false); cmdContainer->setImmediateCmdListCsr(&csr); cmdContainer->immediateReusableAllocationList = std::make_unique(); @@ -912,7 +948,7 @@ HWTEST_F(CommandContainerTest, givenCmdContainerWhenReuseExistingCmdBufferWithAl *csr.tagAddress = 10u; AllocationsList allocList; - cmdContainer->initialize(pDevice, &allocList, false); + cmdContainer->initialize(pDevice, &allocList, false, false); cmdContainer->setImmediateCmdListCsr(&csr); cmdContainer->immediateReusableAllocationList = std::make_unique(); @@ -929,7 +965,7 @@ HWTEST_F(CommandContainerTest, givenCmdContainerWhenReuseExistingCmdBufferWithAl TEST_F(CommandContainerTest, GivenCmdContainerWhenContainerIsInitializedThenSurfaceStateIndirectHeapSizeIsCorrect) { MyMockCommandContainer cmdContainer; - cmdContainer.initialize(pDevice, nullptr, true); + cmdContainer.initialize(pDevice, nullptr, true, false); auto size = cmdContainer.allocationIndirectHeaps[IndirectHeap::Type::SURFACE_STATE]->getUnderlyingBufferSize(); constexpr size_t expectedHeapSize = MemoryConstants::pageSize64k; EXPECT_EQ(expectedHeapSize, size); @@ -962,7 +998,7 @@ HWTEST_F(CommandContainerTest, givenCmdContainerHasImmediateCsrWhenGettingHeapWi cmdContainer.immediateReusableAllocationList = std::make_unique(); cmdContainer.setNumIddPerBlock(1); - auto code = cmdContainer.initialize(pDevice, nullptr, true); + auto code = cmdContainer.initialize(pDevice, nullptr, true, false); EXPECT_EQ(CommandContainer::ErrorCode::SUCCESS, code); EXPECT_EQ(nullptr, cmdContainer.getIndirectHeap(HeapType::DYNAMIC_STATE)); @@ -1138,7 +1174,7 @@ HWTEST_F(CommandContainerTest, givenCmdContainerUsedInRegularCmdListWhenGettingH HeapReserveArguments sshReserveArgs = {sshHeapPtr, 0, sshAlign}; HeapReserveArguments dshReserveArgs = {dshHeapPtr, 0, dshAlign}; - auto code = cmdContainer.initialize(pDevice, nullptr, true); + auto code = cmdContainer.initialize(pDevice, nullptr, true, false); EXPECT_EQ(CommandContainer::ErrorCode::SUCCESS, code); cmdContainer.reserveSpaceForDispatch(sshReserveArgs, dshReserveArgs, true); @@ -1186,7 +1222,7 @@ HWTEST_F(CommandContainerTest, givenCmdContainerUsingPrivateHeapsWhenGettingRese HeapReserveArguments sshReserveArgs = {sshHeapPtr, 0, sshAlign}; HeapReserveArguments dshReserveArgs = {dshHeapPtr, 0, dshAlign}; - auto code = cmdContainer.initialize(pDevice, nullptr, true); + auto code = cmdContainer.initialize(pDevice, nullptr, true, false); EXPECT_EQ(CommandContainer::ErrorCode::SUCCESS, code); constexpr size_t nonZeroSshSize = 4 * MemoryConstants::kiloByte; @@ -1229,7 +1265,7 @@ HWTEST_F(CommandContainerTest, cmdContainer.setNumIddPerBlock(1); - auto code = cmdContainer.initialize(pDevice, nullptr, true); + auto code = cmdContainer.initialize(pDevice, nullptr, true, false); EXPECT_EQ(CommandContainer::ErrorCode::SUCCESS, code); constexpr size_t misalignedSize = 11; @@ -1339,7 +1375,7 @@ TEST_F(CommandContainerTest, givenCmdContainerWhenFillReusableAllocationListsThe auto csr = pDevice->getDefaultEngine().commandStreamReceiver; AllocationsList allocList; - cmdContainer->initialize(pDevice, &allocList, true); + cmdContainer->initialize(pDevice, &allocList, true, false); cmdContainer->setImmediateCmdListCsr(csr); auto heapHelper = reinterpret_cast(cmdContainer->getHeapHelper()); @@ -1366,7 +1402,7 @@ TEST_F(CommandContainerTest, givenCmdContainerWhenFillReusableAllocationListsWit AllocationsList allocList; cmdContainer->enableHeapSharing(); - cmdContainer->initialize(pDevice, &allocList, true); + cmdContainer->initialize(pDevice, &allocList, true, false); cmdContainer->setImmediateCmdListCsr(csr); auto &reusableHeapsList = reinterpret_cast(cmdContainer->getHeapHelper())->storageForReuse->getAllocationsForReuse(); @@ -1386,7 +1422,7 @@ TEST_F(CommandContainerTest, givenCmdContainerWhenFillReusableAllocationListsWit auto csr = pDevice->getDefaultEngine().commandStreamReceiver; auto cmdContainer = std::make_unique(); AllocationsList allocList; - cmdContainer->initialize(pDevice, &allocList, true); + cmdContainer->initialize(pDevice, &allocList, true, false); cmdContainer->setImmediateCmdListCsr(csr); auto &reusableHeapsList = reinterpret_cast(cmdContainer->getHeapHelper())->storageForReuse->getAllocationsForReuse(); @@ -1407,7 +1443,7 @@ TEST_F(CommandContainerTest, givenCmdContainerWhenFillReusableAllocationListsWit DebugManager.flags.SetAmountOfReusableAllocations.set(1); auto cmdContainer = std::make_unique(); AllocationsList allocList; - cmdContainer->initialize(pDevice, &allocList, false); + cmdContainer->initialize(pDevice, &allocList, false, false); EXPECT_EQ(cmdContainer->immediateReusableAllocationList, nullptr); cmdContainer->fillReusableAllocationLists(); @@ -1422,7 +1458,7 @@ TEST_F(CommandContainerTest, givenCmdContainerWhenFillReusableAllocationListsAnd DebugManager.flags.SetAmountOfReusableAllocations.set(1); auto cmdContainer = std::make_unique(); AllocationsList allocList; - cmdContainer->initialize(pDevice, &allocList, false); + cmdContainer->initialize(pDevice, &allocList, false, false); EXPECT_EQ(cmdContainer->immediateReusableAllocationList, nullptr); EXPECT_TRUE(allocList.peekIsEmpty()); @@ -1443,7 +1479,7 @@ TEST_F(CommandContainerTest, givenCmdContainerWithoutGlobalListWhenFillReusableA DebugManagerStateRestore dbgRestore; DebugManager.flags.SetAmountOfReusableAllocations.set(1); auto cmdContainer = std::make_unique(); - cmdContainer->initialize(pDevice, nullptr, false); + cmdContainer->initialize(pDevice, nullptr, false, false); EXPECT_EQ(cmdContainer->immediateReusableAllocationList, nullptr); cmdContainer->fillReusableAllocationLists(); @@ -1461,7 +1497,7 @@ TEST_F(CommandContainerTest, givenCmdContainerWhenFillReusableAllocationListsWit auto cmdContainer = std::make_unique(); auto csr = pDevice->getDefaultEngine().commandStreamReceiver; AllocationsList allocList; - cmdContainer->initialize(pDevice, &allocList, false); + cmdContainer->initialize(pDevice, &allocList, false, false); cmdContainer->setImmediateCmdListCsr(csr); EXPECT_EQ(cmdContainer->immediateReusableAllocationList, nullptr); @@ -1478,7 +1514,7 @@ TEST_F(CommandContainerTest, givenCmdContainerAndCsrWhenGetHeapWithRequiredSizeA auto cmdContainer = std::make_unique(); auto csr = pDevice->getDefaultEngine().commandStreamReceiver; AllocationsList allocList; - cmdContainer->initialize(pDevice, &allocList, true); + cmdContainer->initialize(pDevice, &allocList, true, false); cmdContainer->setImmediateCmdListCsr(csr); cmdContainer->fillReusableAllocationLists(); @@ -1502,7 +1538,7 @@ TEST_F(CommandContainerTest, givenCmdContainerWhenFillReusableAllocationListsAnd auto cmdContainer = std::make_unique(); auto csr = pDevice->getDefaultEngine().commandStreamReceiver; AllocationsList allocList; - cmdContainer->initialize(pDevice, &allocList, false); + cmdContainer->initialize(pDevice, &allocList, false, false); cmdContainer->setImmediateCmdListCsr(csr); cmdContainer->fillReusableAllocationLists(); diff --git a/shared/test/unit_test/command_stream/linear_stream_tests.cpp b/shared/test/unit_test/command_stream/linear_stream_tests.cpp index 1b12643317..a5e40f5b32 100644 --- a/shared/test/unit_test/command_stream/linear_stream_tests.cpp +++ b/shared/test/unit_test/command_stream/linear_stream_tests.cpp @@ -161,7 +161,7 @@ TEST_F(LinearStreamTest, givenLinearStreamWithoutCmdContainerWhenOneByteLeftInSt using CommandContainerLinearStreamTest = Test; TEST_F(CommandContainerLinearStreamTest, givenLinearStreamWithCmdContainerWhenOneByteLeftInStreamThenGetSpaceThrowAbort) { CommandContainer cmdContainer; - cmdContainer.initialize(pDevice, nullptr, true); + cmdContainer.initialize(pDevice, nullptr, true, false); auto stream = reinterpret_cast(cmdContainer.getCommandStream()); stream->sizeUsed = stream->getMaxAvailableSpace() - 1; EXPECT_THROW(stream->getSpace(1), std::exception); @@ -169,7 +169,7 @@ TEST_F(CommandContainerLinearStreamTest, givenLinearStreamWithCmdContainerWhenOn TEST_F(CommandContainerLinearStreamTest, givenLinearStreamWithCmdContainerWhenThereIsNoSpaceForCommandAndBBEndThenNewCmdBufferAllocated) { CommandContainer cmdContainer; - cmdContainer.initialize(pDevice, nullptr, true); + cmdContainer.initialize(pDevice, nullptr, true, false); auto &gfxCoreHelper = pDevice->getGfxCoreHelper(); auto stream = reinterpret_cast(cmdContainer.getCommandStream()); size_t dummyCommandSize = 2; @@ -181,7 +181,7 @@ TEST_F(CommandContainerLinearStreamTest, givenLinearStreamWithCmdContainerWhenTh TEST_F(CommandContainerLinearStreamTest, givenLinearStreamWithCmdContainerWhenThereIsNoSpaceForCommandAndBBEndThenLinearStreamHasNewAllocation) { CommandContainer cmdContainer; - cmdContainer.initialize(pDevice, nullptr, true); + cmdContainer.initialize(pDevice, nullptr, true, false); auto &gfxCoreHelper = pDevice->getGfxCoreHelper(); auto stream = reinterpret_cast(cmdContainer.getCommandStream()); size_t dummyCommandSize = 2; @@ -194,7 +194,7 @@ TEST_F(CommandContainerLinearStreamTest, givenLinearStreamWithCmdContainerWhenTh TEST_F(CommandContainerLinearStreamTest, givenLinearStreamWithCmdContainerWhenThereIsNoSpaceForCommandAndBBEndThenGetSpaceReturnPtrFromNewAllocation) { CommandContainer cmdContainer; - cmdContainer.initialize(pDevice, nullptr, true); + cmdContainer.initialize(pDevice, nullptr, true, false); auto &gfxCoreHelper = pDevice->getGfxCoreHelper(); auto stream = reinterpret_cast(cmdContainer.getCommandStream()); size_t dummyCommandSize = 2; @@ -206,7 +206,7 @@ TEST_F(CommandContainerLinearStreamTest, givenLinearStreamWithCmdContainerWhenTh TEST_F(CommandContainerLinearStreamTest, givenLinearStreamWithCmdContainerWhenThereIsSpaceForCommandAndBBEndThenNewCmdBufferIsNotAllocated) { CommandContainer cmdContainer; - cmdContainer.initialize(pDevice, nullptr, true); + cmdContainer.initialize(pDevice, nullptr, true, false); auto &gfxCoreHelper = pDevice->getGfxCoreHelper(); auto stream = reinterpret_cast(cmdContainer.getCommandStream()); size_t dummyCommandSize = 2; @@ -218,7 +218,7 @@ TEST_F(CommandContainerLinearStreamTest, givenLinearStreamWithCmdContainerWhenTh TEST_F(CommandContainerLinearStreamTest, givenLinearStreamWithCmdContainerWhenThereIsNoSpaceForCommandAndBBEndThenBBEndAddedAtEndOfStream) { CommandContainer cmdContainer; - cmdContainer.initialize(pDevice, nullptr, true); + cmdContainer.initialize(pDevice, nullptr, true, false); auto &gfxCoreHelper = pDevice->getGfxCoreHelper(); auto stream = reinterpret_cast(cmdContainer.getCommandStream()); size_t dummyCommandSize = 2; diff --git a/shared/test/unit_test/debugger/test_l0_debugger.cpp b/shared/test/unit_test/debugger/test_l0_debugger.cpp index 92f193edd5..832f7c24bc 100644 --- a/shared/test/unit_test/debugger/test_l0_debugger.cpp +++ b/shared/test/unit_test/debugger/test_l0_debugger.cpp @@ -638,7 +638,7 @@ HWTEST2_P(L0DebuggerSimpleParameterizedTest, givenNotChangedSurfaceStateWhenCapt debugger->sbaTrackingGpuVa.address = 0x45670000; NEO::CommandContainer container; - container.initialize(pDevice, nullptr, true); + container.initialize(pDevice, nullptr, true, false); NEO::Debugger::SbaAddresses sba = {}; sba.SurfaceStateBaseAddress = 0x123456000; @@ -661,7 +661,7 @@ HWTEST2_P(L0DebuggerSimpleParameterizedTest, givenChangedBaseAddressesWhenCaptur debugger->sbaTrackingGpuVa.address = 0x45670000; { NEO::CommandContainer container; - container.initialize(pDevice, nullptr, true); + container.initialize(pDevice, nullptr, true, false); NEO::Debugger::SbaAddresses sba = {}; sba.SurfaceStateBaseAddress = 0x123456000; @@ -674,7 +674,7 @@ HWTEST2_P(L0DebuggerSimpleParameterizedTest, givenChangedBaseAddressesWhenCaptur { NEO::CommandContainer container; - container.initialize(pDevice, nullptr, true); + container.initialize(pDevice, nullptr, true, false); NEO::Debugger::SbaAddresses sba = {}; sba.GeneralStateBaseAddress = 0x123456000; @@ -687,7 +687,7 @@ HWTEST2_P(L0DebuggerSimpleParameterizedTest, givenChangedBaseAddressesWhenCaptur { NEO::CommandContainer container; - container.initialize(pDevice, nullptr, true); + container.initialize(pDevice, nullptr, true, false); NEO::Debugger::SbaAddresses sba = {}; sba.BindlessSurfaceStateBaseAddress = 0x123456000; diff --git a/shared/test/unit_test/direct_submission/direct_submission_tests_2.cpp b/shared/test/unit_test/direct_submission/direct_submission_tests_2.cpp index 948444c41f..d93f9871bb 100644 --- a/shared/test/unit_test/direct_submission/direct_submission_tests_2.cpp +++ b/shared/test/unit_test/direct_submission/direct_submission_tests_2.cpp @@ -15,6 +15,7 @@ #include "shared/source/helpers/flush_stamp.h" #include "shared/source/helpers/gfx_core_helper.h" #include "shared/source/helpers/register_offsets.h" +#include "shared/source/memory_manager/memory_allocation.h" #include "shared/source/utilities/cpuintrinsics.h" #include "shared/test/common/cmd_parse/hw_parse.h" #include "shared/test/common/helpers/debug_manager_state_restore.h" @@ -251,6 +252,119 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, DirectSubmissionDispatchBufferTest, EXPECT_TRUE(foundFenceUpdate); } +HWTEST_F(DirectSubmissionDispatchBufferTest, givenCopyCommandBufferIntoRingWhenDispatchCommandBufferThenCopyTaskStream) { + using MI_BATCH_BUFFER_START = typename FamilyType::MI_BATCH_BUFFER_START; + using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT; + using Dispatcher = RenderDispatcher; + + DebugManagerStateRestore restorer; + DebugManager.flags.DirectSubmissionFlatRingBuffer.set(-1); + + FlushStampTracker flushStamp(true); + MockDirectSubmissionHw directSubmission(*pDevice->getDefaultEngine().commandStreamReceiver); + EXPECT_TRUE(directSubmission.copyCommandBufferIntoRing(batchBuffer)); + + bool ret = directSubmission.initialize(true, false); + EXPECT_TRUE(ret); + + size_t sizeUsed = directSubmission.ringCommandStream.getUsed(); + batchBuffer.endCmdPtr = batchBuffer.stream->getCpuBase(); + ret = directSubmission.dispatchCommandBuffer(batchBuffer, flushStamp); + + HardwareParse hwParse; + hwParse.parseCommands(directSubmission.ringCommandStream, sizeUsed); + auto semaphoreIt = find(hwParse.cmdList.begin(), hwParse.cmdList.end()); + + MI_BATCH_BUFFER_START *bbStart = hwParse.getCommand(hwParse.cmdList.begin(), semaphoreIt); + EXPECT_EQ(nullptr, bbStart); +} + +HWTEST_F(DirectSubmissionDispatchBufferTest, givenDefaultDirectSubmissionFlatRingBufferAndSingleTileDirectSubmissionWhenSubmitSystemMemNotChainedBatchBufferWithoutRelaxingDependenciesThenCopyIntoRing) { + using MI_BATCH_BUFFER_START = typename FamilyType::MI_BATCH_BUFFER_START; + using Dispatcher = RenderDispatcher; + + DebugManagerStateRestore restorer; + DebugManager.flags.DirectSubmissionFlatRingBuffer.set(-1); + + MockDirectSubmissionHw directSubmission(*pDevice->getDefaultEngine().commandStreamReceiver); + EXPECT_TRUE(directSubmission.copyCommandBufferIntoRing(batchBuffer)); +} + +HWTEST_F(DirectSubmissionDispatchBufferTest, givenDefaultDirectSubmissionFlatRingBufferAndSingleTileDirectSubmissionWhenSubmitSystemMemNotChainedBatchBufferWithoutCommandBufferRelaxingDependenciesThenNotCopyIntoRing) { + using MI_BATCH_BUFFER_START = typename FamilyType::MI_BATCH_BUFFER_START; + using Dispatcher = RenderDispatcher; + + DebugManagerStateRestore restorer; + DebugManager.flags.DirectSubmissionFlatRingBuffer.set(-1); + + batchBuffer.commandBufferAllocation = nullptr; + + MockDirectSubmissionHw directSubmission(*pDevice->getDefaultEngine().commandStreamReceiver); + EXPECT_FALSE(directSubmission.copyCommandBufferIntoRing(batchBuffer)); +} + +HWTEST_F(DirectSubmissionDispatchBufferTest, givenDisabledDirectSubmissionFlatRingBufferAndSingleTileDirectSubmissionWhenSubmitSystemMemNotChainedBatchBufferWithoutRelaxingDependenciesThenNotCopyIntoRing) { + using Dispatcher = RenderDispatcher; + + DebugManagerStateRestore restorer; + DebugManager.flags.DirectSubmissionFlatRingBuffer.set(0); + + MockDirectSubmissionHw directSubmission(*pDevice->getDefaultEngine().commandStreamReceiver); + EXPECT_FALSE(directSubmission.copyCommandBufferIntoRing(batchBuffer)); +} + +HWTEST_F(DirectSubmissionDispatchBufferTest, givenDefaultDirectSubmissionFlatRingBufferAndSingleTileDirectSubmissionWhenSubmitSystemMemNotChainedBatchBufferWithRelaxingDependenciesThenNotCopyIntoRing) { + using Dispatcher = RenderDispatcher; + + DebugManagerStateRestore restorer; + DebugManager.flags.DirectSubmissionFlatRingBuffer.set(-1); + + batchBuffer.hasRelaxedOrderingDependencies = true; + + MockDirectSubmissionHw directSubmission(*pDevice->getDefaultEngine().commandStreamReceiver); + EXPECT_FALSE(directSubmission.copyCommandBufferIntoRing(batchBuffer)); +} + +HWTEST_F(DirectSubmissionDispatchBufferTest, givenDefaultDirectSubmissionFlatRingBufferAndSingleTileDirectSubmissionWhenSubmitSystemMemChainedBatchBufferWithoutRelaxingDependenciesThenNotCopyIntoRing) { + using Dispatcher = RenderDispatcher; + + DebugManagerStateRestore restorer; + DebugManager.flags.DirectSubmissionFlatRingBuffer.set(-1); + + batchBuffer.chainedBatchBuffer = reinterpret_cast(0x1234); + + MockDirectSubmissionHw directSubmission(*pDevice->getDefaultEngine().commandStreamReceiver); + EXPECT_FALSE(directSubmission.copyCommandBufferIntoRing(batchBuffer)); +} + +HWTEST_F(DirectSubmissionDispatchBufferTest, givenDefaultDirectSubmissionFlatRingBufferAndSingleTileDirectSubmissionWhenSubmitLocalMemNotChainedBatchBufferWithoutRelaxingDependenciesThenNotCopyIntoRing) { + using Dispatcher = RenderDispatcher; + + DebugManagerStateRestore restorer; + DebugManager.flags.DirectSubmissionFlatRingBuffer.set(-1); + + static_cast(batchBuffer.commandBufferAllocation)->overrideMemoryPool(MemoryPool::LocalMemory); + + MockDirectSubmissionHw directSubmission(*pDevice->getDefaultEngine().commandStreamReceiver); + EXPECT_FALSE(directSubmission.copyCommandBufferIntoRing(batchBuffer)); +} + +HWTEST_F(DirectSubmissionDispatchBufferTest, givenDefaultDirectSubmissionFlatRingBufferAndMultiTileDirectSubmissionWhenSubmitSystemMemNotChainedBatchBufferWithoutRelaxingDependenciesThenNotCopyIntoRing) { + using Dispatcher = RenderDispatcher; + + DebugManagerStateRestore restorer; + DebugManager.flags.DirectSubmissionFlatRingBuffer.set(-1); + + std::unique_ptr osContext(OsContext::create(pDevice->getExecutionEnvironment()->rootDeviceEnvironments[0]->osInterface.get(), pDevice->getRootDeviceIndex(), 0, + EngineDescriptorHelper::getDefaultDescriptor({aub_stream::ENGINE_CCS, EngineUsage::Regular}, + PreemptionMode::ThreadGroup, 0b11))); + pDevice->getDefaultEngine().commandStreamReceiver->setupContext(*osContext.get()); + + MockDirectSubmissionHw directSubmission(*pDevice->getDefaultEngine().commandStreamReceiver); + + EXPECT_FALSE(directSubmission.copyCommandBufferIntoRing(batchBuffer)); +} + HWTEST_F(DirectSubmissionDispatchBufferTest, givenDirectSubmissionDisableMonitorFenceWhenDispatchWorkloadCalledThenExpectStartWithoutMonitorFence) { using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; diff --git a/shared/test/unit_test/encoders/command_encoder_tests_xehp_and_later.cpp b/shared/test/unit_test/encoders/command_encoder_tests_xehp_and_later.cpp index a122fa9215..26addfc620 100644 --- a/shared/test/unit_test/encoders/command_encoder_tests_xehp_and_later.cpp +++ b/shared/test/unit_test/encoders/command_encoder_tests_xehp_and_later.cpp @@ -162,7 +162,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, XeHPAndLaterCommandEncoderTest, givenOffsetAndValue GenCmdList commands; CommandContainer cmdContainer; - cmdContainer.initialize(pDevice, nullptr, true); + cmdContainer.initialize(pDevice, nullptr, true, false); constexpr uint32_t regOffset = 0x2000u; constexpr uint32_t immVal = 0xbaau; constexpr uint64_t dstAddress = 0xDEADCAF0u; diff --git a/shared/test/unit_test/encoders/test_encode_command_buffer.cpp b/shared/test/unit_test/encoders/test_encode_command_buffer.cpp index 4dabd4a49b..532b3d92b2 100644 --- a/shared/test/unit_test/encoders/test_encode_command_buffer.cpp +++ b/shared/test/unit_test/encoders/test_encode_command_buffer.cpp @@ -18,7 +18,7 @@ using EncodeBatchBufferStartOrEndTest = Test; HWTEST_F(EncodeBatchBufferStartOrEndTest, givenCommandContainerWhenEncodeBBEndThenCommandIsAdded) { CommandContainer cmdContainer; - cmdContainer.initialize(pDevice, nullptr, true); + cmdContainer.initialize(pDevice, nullptr, true, false); EncodeBatchBufferStartOrEnd::programBatchBufferEnd(cmdContainer); GenCmdList commands; @@ -31,7 +31,7 @@ HWTEST_F(EncodeBatchBufferStartOrEndTest, givenCommandContainerWhenEncodeBBEndTh HWTEST_F(EncodeBatchBufferStartOrEndTest, givenCommandContainerWhenEncodeBBStartThenCommandIsAdded) { CommandContainer cmdContainer; - cmdContainer.initialize(pDevice, nullptr, true); + cmdContainer.initialize(pDevice, nullptr, true, false); EncodeBatchBufferStartOrEnd::programBatchBufferStart(cmdContainer.getCommandStream(), 0, true, false, false); GenCmdList commands; @@ -44,7 +44,7 @@ HWTEST_F(EncodeBatchBufferStartOrEndTest, givenCommandContainerWhenEncodeBBStart HWTEST_F(EncodeBatchBufferStartOrEndTest, givenCommandContainerWhenEncodeBBStartWithSecondLevelParameterThenCommandIsProgrammedCorrectly) { CommandContainer cmdContainer; - cmdContainer.initialize(pDevice, nullptr, true); + cmdContainer.initialize(pDevice, nullptr, true, false); EncodeBatchBufferStartOrEnd::programBatchBufferStart(cmdContainer.getCommandStream(), 0, true, false, false); GenCmdList commands; @@ -61,7 +61,7 @@ HWTEST_F(EncodeBatchBufferStartOrEndTest, givenCommandContainerWhenEncodeBBStart HWTEST_F(EncodeBatchBufferStartOrEndTest, givenCommandContainerWhenEncodeBBStartWithFirstLevelParameterThenCommandIsProgrammedCorrectly) { CommandContainer cmdContainer; - cmdContainer.initialize(pDevice, nullptr, true); + cmdContainer.initialize(pDevice, nullptr, true, false); EncodeBatchBufferStartOrEnd::programBatchBufferStart(cmdContainer.getCommandStream(), 0, false, false, false); GenCmdList commands; @@ -78,7 +78,7 @@ HWTEST_F(EncodeBatchBufferStartOrEndTest, givenCommandContainerWhenEncodeBBStart HWTEST_F(EncodeBatchBufferStartOrEndTest, givenGpuAddressWhenEncodeBBStartThenAddressIsProgrammedCorrectly) { CommandContainer cmdContainer; - cmdContainer.initialize(pDevice, nullptr, true); + cmdContainer.initialize(pDevice, nullptr, true, false); uint64_t gpuAddress = 12 * MemoryConstants::pageSize; EncodeBatchBufferStartOrEnd::programBatchBufferStart(cmdContainer.getCommandStream(), gpuAddress, false, false, false); @@ -99,7 +99,7 @@ using EncodeNoopTest = Test; HWTEST_F(EncodeNoopTest, WhenAligningLinearStreamToCacheLineSizeThenItIsAlignedCorrectly) { CommandContainer cmdContainer; - cmdContainer.initialize(pDevice, nullptr, true); + cmdContainer.initialize(pDevice, nullptr, true, false); auto commandStream = cmdContainer.getCommandStream(); EncodeNoop::alignToCacheLine(*commandStream); @@ -112,7 +112,7 @@ HWTEST_F(EncodeNoopTest, WhenAligningLinearStreamToCacheLineSizeThenItIsAlignedC HWTEST_F(EncodeNoopTest, WhenEmittingNoopsThenExpectCorrectNumberOfBytesNooped) { CommandContainer cmdContainer; - cmdContainer.initialize(pDevice, nullptr, true); + cmdContainer.initialize(pDevice, nullptr, true, false); auto commandStream = cmdContainer.getCommandStream(); size_t usedBefore = commandStream->getUsed(); diff --git a/shared/test/unit_test/encoders/test_encode_dispatch_kernel.cpp b/shared/test/unit_test/encoders/test_encode_dispatch_kernel.cpp index 0468049781..55832ba44c 100644 --- a/shared/test/unit_test/encoders/test_encode_dispatch_kernel.cpp +++ b/shared/test/unit_test/encoders/test_encode_dispatch_kernel.cpp @@ -506,7 +506,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, CommandEncodeStatesTest, givenForceBtpPrefetchModeDe { DebugManager.flags.ForceBtpPrefetchMode.set(-1); cmdContainer.reset(new MyMockCommandContainer()); - cmdContainer->initialize(pDevice, nullptr, true); + cmdContainer->initialize(pDevice, nullptr, true, false); bool requiresUncachedMocs = false; EncodeDispatchKernelArgs dispatchArgs = createDefaultDispatchKernelArgs(pDevice, dispatchInterface.get(), dims, requiresUncachedMocs); @@ -537,7 +537,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, CommandEncodeStatesTest, givenForceBtpPrefetchModeDe { DebugManager.flags.ForceBtpPrefetchMode.set(0); cmdContainer.reset(new MyMockCommandContainer()); - cmdContainer->initialize(pDevice, nullptr, true); + cmdContainer->initialize(pDevice, nullptr, true, false); bool requiresUncachedMocs = false; EncodeDispatchKernelArgs dispatchArgs = createDefaultDispatchKernelArgs(pDevice, dispatchInterface.get(), dims, requiresUncachedMocs); @@ -564,7 +564,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, CommandEncodeStatesTest, givenForceBtpPrefetchModeDe { DebugManager.flags.ForceBtpPrefetchMode.set(1); cmdContainer.reset(new MyMockCommandContainer()); - cmdContainer->initialize(pDevice, nullptr, true); + cmdContainer->initialize(pDevice, nullptr, true, false); bool requiresUncachedMocs = false; EncodeDispatchKernelArgs dispatchArgs = createDefaultDispatchKernelArgs(pDevice, dispatchInterface.get(), dims, requiresUncachedMocs); @@ -1269,7 +1269,7 @@ HWTEST_F(BindlessCommandEncodeStatesContainerTest, givenBindlessKernelAndBindles DebugManagerStateRestore dbgRestorer; DebugManager.flags.UseBindlessMode.set(1); auto commandContainer = std::make_unique(); - commandContainer->initialize(pDevice, nullptr, true); + commandContainer->initialize(pDevice, nullptr, true, false); commandContainer->setDirtyStateForAllHeaps(false); pDevice->getExecutionEnvironment()->rootDeviceEnvironments[pDevice->getRootDeviceIndex()]->createBindlessHeapsHelper(pDevice->getMemoryManager(), pDevice->getNumGenericSubDevices() > 1, @@ -1305,7 +1305,7 @@ HWTEST2_F(BindlessCommandEncodeStatesContainerTest, givenBindlessKernelAndBindle DebugManagerStateRestore dbgRestorer; DebugManager.flags.UseBindlessMode.set(1); auto commandContainer = std::make_unique(); - commandContainer->initialize(pDevice, nullptr, true); + commandContainer->initialize(pDevice, nullptr, true, false); commandContainer->setDirtyStateForAllHeaps(false); pDevice->getExecutionEnvironment()->rootDeviceEnvironments[pDevice->getRootDeviceIndex()]->createBindlessHeapsHelper(pDevice->getMemoryManager(), pDevice->getNumGenericSubDevices() > 1, @@ -1344,7 +1344,7 @@ HWTEST_F(BindlessCommandEncodeStatesContainerTest, givenBindfulKernelWhenBindles DebugManagerStateRestore dbgRestorer; DebugManager.flags.UseBindlessMode.set(1); auto commandContainer = std::make_unique(); - commandContainer->initialize(pDevice, nullptr, true); + commandContainer->initialize(pDevice, nullptr, true, false); commandContainer->setDirtyStateForAllHeaps(false); pDevice->getExecutionEnvironment()->rootDeviceEnvironments[pDevice->getRootDeviceIndex()]->createBindlessHeapsHelper(pDevice->getMemoryManager(), pDevice->getNumGenericSubDevices() > 1, @@ -1380,7 +1380,7 @@ HWTEST_F(BindlessCommandEncodeStatesContainerTest, givenBindlessModeEnabledWhenD DebugManagerStateRestore dbgRestorer; DebugManager.flags.UseBindlessMode.set(1); auto commandContainer = std::make_unique(); - commandContainer->initialize(pDevice, nullptr, true); + commandContainer->initialize(pDevice, nullptr, true, false); commandContainer->setDirtyStateForAllHeaps(false); pDevice->getExecutionEnvironment()->rootDeviceEnvironments[pDevice->getRootDeviceIndex()]->createBindlessHeapsHelper(pDevice->getMemoryManager(), pDevice->getNumGenericSubDevices() > 1, @@ -1422,7 +1422,7 @@ HWTEST_F(BindlessCommandEncodeStatesTest, givenGlobalBindlessHeapsWhenDispatchin DebugManagerStateRestore restorer; DebugManager.flags.UseBindlessMode.set(1); auto cmdContainer = std::make_unique(); - cmdContainer->initialize(pDevice, nullptr, true); + cmdContainer->initialize(pDevice, nullptr, true, false); cmdContainer->setDirtyStateForAllHeaps(false); using SAMPLER_BORDER_COLOR_STATE = typename FamilyType::SAMPLER_BORDER_COLOR_STATE; using INTERFACE_DESCRIPTOR_DATA = typename FamilyType::INTERFACE_DESCRIPTOR_DATA; @@ -1474,7 +1474,7 @@ HWTEST_F(BindlessCommandEncodeStatesTest, givenBindlessModeDisabledelWithSampler DebugManagerStateRestore restorer; DebugManager.flags.UseBindlessMode.set(0); auto cmdContainer = std::make_unique(); - cmdContainer->initialize(pDevice, nullptr, true); + cmdContainer->initialize(pDevice, nullptr, true, false); using SAMPLER_STATE = typename FamilyType::SAMPLER_STATE; using INTERFACE_DESCRIPTOR_DATA = typename FamilyType::INTERFACE_DESCRIPTOR_DATA; uint32_t numSamplers = 1; diff --git a/shared/test/unit_test/encoders/test_encode_dispatch_kernel_xehp_and_later.cpp b/shared/test/unit_test/encoders/test_encode_dispatch_kernel_xehp_and_later.cpp index 7a6eab3539..7108cb035e 100644 --- a/shared/test/unit_test/encoders/test_encode_dispatch_kernel_xehp_and_later.cpp +++ b/shared/test/unit_test/encoders/test_encode_dispatch_kernel_xehp_and_later.cpp @@ -438,7 +438,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, CommandEncodeStatesTest, givenForceBtpPrefetchModeD { DebugManager.flags.ForceBtpPrefetchMode.set(-1); cmdContainer.reset(new MyMockCommandContainer()); - cmdContainer->initialize(pDevice, nullptr, true); + cmdContainer->initialize(pDevice, nullptr, true, false); bool requiresUncachedMocs = false; EncodeDispatchKernelArgs dispatchArgs = createDefaultDispatchKernelArgs(pDevice, dispatchInterface.get(), dims, requiresUncachedMocs); @@ -471,7 +471,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, CommandEncodeStatesTest, givenForceBtpPrefetchModeD { DebugManager.flags.ForceBtpPrefetchMode.set(0); cmdContainer.reset(new MyMockCommandContainer()); - cmdContainer->initialize(pDevice, nullptr, true); + cmdContainer->initialize(pDevice, nullptr, true, false); bool requiresUncachedMocs = false; EncodeDispatchKernelArgs dispatchArgs = createDefaultDispatchKernelArgs(pDevice, dispatchInterface.get(), dims, requiresUncachedMocs); @@ -495,7 +495,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, CommandEncodeStatesTest, givenForceBtpPrefetchModeD { DebugManager.flags.ForceBtpPrefetchMode.set(1); cmdContainer.reset(new MyMockCommandContainer()); - cmdContainer->initialize(pDevice, nullptr, true); + cmdContainer->initialize(pDevice, nullptr, true, false); bool requiresUncachedMocs = false; EncodeDispatchKernelArgs dispatchArgs = createDefaultDispatchKernelArgs(pDevice, dispatchInterface.get(), dims, requiresUncachedMocs); diff --git a/shared/test/unit_test/encoders/test_encode_math.cpp b/shared/test/unit_test/encoders/test_encode_math.cpp index 72fade9902..6a37de1de0 100644 --- a/shared/test/unit_test/encoders/test_encode_math.cpp +++ b/shared/test/unit_test/encoders/test_encode_math.cpp @@ -147,7 +147,7 @@ HWTEST_F(CommandEncoderMathTest, WhenReservingCommandThenBitfieldSetCorrectly) { GenCmdList commands; CommandContainer cmdContainer; - cmdContainer.initialize(pDevice, nullptr, true); + cmdContainer.initialize(pDevice, nullptr, true, false); EncodeMath::commandReserve(cmdContainer); @@ -177,7 +177,7 @@ HWTEST_F(CommandEncoderMathTest, givenOffsetAndValueWhenEncodeBitwiseAndValIsCal GenCmdList commands; CommandContainer cmdContainer; - cmdContainer.initialize(pDevice, nullptr, true); + cmdContainer.initialize(pDevice, nullptr, true, false); constexpr uint32_t regOffset = 0x2000u; constexpr uint32_t immVal = 0xbaau; constexpr uint64_t dstAddress = 0xDEADCAF0u; @@ -223,7 +223,7 @@ HWTEST_F(CommandEncoderMathTest, WhenSettingGroupSizeIndirectThenCommandsAreCorr using MI_STORE_REGISTER_MEM = typename FamilyType::MI_STORE_REGISTER_MEM; CommandContainer cmdContainer; - cmdContainer.initialize(pDevice, nullptr, true); + cmdContainer.initialize(pDevice, nullptr, true, false); CrossThreadDataOffset offsets[3] = {0, sizeof(uint32_t), 2 * sizeof(uint32_t)}; uint32_t crossThreadAddress[3] = {}; @@ -249,7 +249,7 @@ HWTEST_F(CommandEncoderMathTest, WhenSettingGroupCountIndirectThenCommandsAreCor using MI_STORE_REGISTER_MEM = typename FamilyType::MI_STORE_REGISTER_MEM; CommandContainer cmdContainer; - cmdContainer.initialize(pDevice, nullptr, true); + cmdContainer.initialize(pDevice, nullptr, true, false); CrossThreadDataOffset offsets[3] = {0, sizeof(uint32_t), 2 * sizeof(uint32_t)}; uint32_t crossThreadAddress[3] = {}; diff --git a/shared/test/unit_test/encoders/test_encode_math_xehp_and_later.cpp b/shared/test/unit_test/encoders/test_encode_math_xehp_and_later.cpp index f3917061ae..7ccb0f149b 100644 --- a/shared/test/unit_test/encoders/test_encode_math_xehp_and_later.cpp +++ b/shared/test/unit_test/encoders/test_encode_math_xehp_and_later.cpp @@ -24,7 +24,7 @@ HWTEST2_F(XeHPAndLaterCommandEncoderMathTest, WhenAppendsAGreaterThanThenPredica using MI_MATH_ALU_INST_INLINE = typename FamilyType::MI_MATH_ALU_INST_INLINE; CommandContainer cmdContainer; - cmdContainer.initialize(pDevice, nullptr, true); + cmdContainer.initialize(pDevice, nullptr, true, false); EncodeMathMMIO::encodeGreaterThanPredicate(cmdContainer, 0xDEADBEEFCAF0u, 17u); diff --git a/shared/test/unit_test/encoders/test_encode_set_mmio.cpp b/shared/test/unit_test/encoders/test_encode_set_mmio.cpp index b04928d7db..f37799eb2b 100644 --- a/shared/test/unit_test/encoders/test_encode_set_mmio.cpp +++ b/shared/test/unit_test/encoders/test_encode_set_mmio.cpp @@ -18,7 +18,7 @@ class CommandSetMMIOFixture : public DeviceFixture { void setUp() { DeviceFixture::setUp(); cmdContainer = std::make_unique(); - cmdContainer->initialize(pDevice, nullptr, true); + cmdContainer->initialize(pDevice, nullptr, true, false); } void tearDown() { cmdContainer.reset(); diff --git a/shared/test/unit_test/fixtures/command_container_fixture.cpp b/shared/test/unit_test/fixtures/command_container_fixture.cpp index 0a9d16377a..c0c443e260 100644 --- a/shared/test/unit_test/fixtures/command_container_fixture.cpp +++ b/shared/test/unit_test/fixtures/command_container_fixture.cpp @@ -15,7 +15,7 @@ namespace NEO { void CommandEncodeStatesFixture::setUp() { DeviceFixture::setUp(); cmdContainer = std::make_unique(); - cmdContainer->initialize(pDevice, nullptr, true); + cmdContainer->initialize(pDevice, nullptr, true, false); cmdContainer->setDirtyStateForAllHeaps(false); const auto &hwInfo = pDevice->getHardwareInfo(); auto &productHelper = pDevice->getProductHelper(); diff --git a/shared/test/unit_test/fixtures/direct_submission_fixture.h b/shared/test/unit_test/fixtures/direct_submission_fixture.h index 37ccb6cb42..e83c7323df 100644 --- a/shared/test/unit_test/fixtures/direct_submission_fixture.h +++ b/shared/test/unit_test/fixtures/direct_submission_fixture.h @@ -38,16 +38,20 @@ struct DirectSubmissionFixture : public DeviceFixture { struct DirectSubmissionDispatchBufferFixture : public DirectSubmissionFixture { void setUp() { + DebugManager.flags.DirectSubmissionFlatRingBuffer.set(0); DirectSubmissionFixture::setUp(); MemoryManager *memoryManager = pDevice->getExecutionEnvironment()->memoryManager.get(); const AllocationProperties commandBufferProperties{pDevice->getRootDeviceIndex(), 0x1000, AllocationType::COMMAND_BUFFER, pDevice->getDeviceBitfield()}; commandBuffer = memoryManager->allocateGraphicsMemoryWithProperties(commandBufferProperties); + stream = std::make_unique(commandBuffer); + stream->getSpace(0x40); batchBuffer.endCmdPtr = &bbStart[0]; batchBuffer.commandBufferAllocation = commandBuffer; batchBuffer.usedSize = 0x40; batchBuffer.taskStartAddress = 0x881112340000; + batchBuffer.stream = stream.get(); } void tearDown() { @@ -60,4 +64,6 @@ struct DirectSubmissionDispatchBufferFixture : public DirectSubmissionFixture { BatchBuffer batchBuffer; uint8_t bbStart[64]; GraphicsAllocation *commandBuffer; + DebugManagerStateRestore restorer; + std::unique_ptr stream; }; diff --git a/shared/test/unit_test/gen11/test_encode_math_gen11.cpp b/shared/test/unit_test/gen11/test_encode_math_gen11.cpp index 04dde53da5..b15bb82481 100644 --- a/shared/test/unit_test/gen11/test_encode_math_gen11.cpp +++ b/shared/test/unit_test/gen11/test_encode_math_gen11.cpp @@ -26,7 +26,7 @@ GEN11TEST_F(CommandEncoderMathTestGen11, WhenAppendsAGreaterThanThenPredicateCor using MI_MATH_ALU_INST_INLINE = typename FamilyType::MI_MATH_ALU_INST_INLINE; CommandContainer cmdContainer; - cmdContainer.initialize(pDevice, nullptr, true); + cmdContainer.initialize(pDevice, nullptr, true, false); EncodeMathMMIO::encodeGreaterThanPredicate(cmdContainer, 0xDEADBEEFCAF0u, 17u); diff --git a/shared/test/unit_test/gen12lp/test_command_encoder_gen12lp.cpp b/shared/test/unit_test/gen12lp/test_command_encoder_gen12lp.cpp index cb6b873f92..7748dafe9e 100644 --- a/shared/test/unit_test/gen12lp/test_command_encoder_gen12lp.cpp +++ b/shared/test/unit_test/gen12lp/test_command_encoder_gen12lp.cpp @@ -33,7 +33,7 @@ GEN12LPTEST_F(CommandEncoderTest, WhenAdjustComputeModeIsCalledThenStateComputeM CommandContainer cmdContainer; - auto ret = cmdContainer.initialize(pDevice, nullptr, true); + auto ret = cmdContainer.initialize(pDevice, nullptr, true, false); ASSERT_EQ(CommandContainer::ErrorCode::SUCCESS, ret); auto usedSpaceBefore = cmdContainer.getCommandStream()->getUsed(); @@ -62,7 +62,7 @@ GEN12LPTEST_F(CommandEncoderTest, WhenAdjustComputeModeIsCalledThenStateComputeM GEN12LPTEST_F(CommandEncoderTest, givenCommandContainerWhenEncodeL3StateThenDoNotDispatchMMIOCommand) { CommandContainer cmdContainer; - cmdContainer.initialize(pDevice, nullptr, true); + cmdContainer.initialize(pDevice, nullptr, true, false); EncodeL3State::encode(cmdContainer, false); GenCmdList commands; @@ -84,7 +84,7 @@ GEN12LPTEST_F(CommandEncodeStatesTest, givenVariousEngineTypesWhenEncodeSbaThenA CommandContainer cmdContainer; - auto ret = cmdContainer.initialize(pDevice, nullptr, true); + auto ret = cmdContainer.initialize(pDevice, nullptr, true, false); ASSERT_EQ(CommandContainer::ErrorCode::SUCCESS, ret); auto gmmHelper = cmdContainer.getDevice()->getRootDeviceEnvironment().getGmmHelper(); @@ -127,7 +127,7 @@ GEN12LPTEST_F(CommandEncoderTest, GivenGen12LpWhenProgrammingL3StateOnThenExpect using MI_LOAD_REGISTER_IMM = typename FamilyType::MI_LOAD_REGISTER_IMM; CommandContainer cmdContainer; - cmdContainer.initialize(pDevice, nullptr, true); + cmdContainer.initialize(pDevice, nullptr, true, false); EncodeL3State::encode(cmdContainer, true); @@ -141,7 +141,7 @@ GEN12LPTEST_F(CommandEncoderTest, GivenGen12LpWhenProgrammingL3StateOnThenExpect GEN12LPTEST_F(CommandEncoderTest, GivenGen12LpWhenProgrammingL3StateOffThenExpectNoCommandsDispatched) { using MI_LOAD_REGISTER_IMM = typename FamilyType::MI_LOAD_REGISTER_IMM; CommandContainer cmdContainer; - cmdContainer.initialize(pDevice, nullptr, true); + cmdContainer.initialize(pDevice, nullptr, true, false); EncodeL3State::encode(cmdContainer, false); diff --git a/shared/test/unit_test/gen12lp/test_encode_math_gen12lp.cpp b/shared/test/unit_test/gen12lp/test_encode_math_gen12lp.cpp index 8ea544e1b8..6c48960da1 100644 --- a/shared/test/unit_test/gen12lp/test_encode_math_gen12lp.cpp +++ b/shared/test/unit_test/gen12lp/test_encode_math_gen12lp.cpp @@ -24,7 +24,7 @@ GEN12LPTEST_F(CommandEncoderMathTestGen12Lp, WhenAppendsAGreaterThanThenPredicat using MI_MATH_ALU_INST_INLINE = typename FamilyType::MI_MATH_ALU_INST_INLINE; CommandContainer cmdContainer; - cmdContainer.initialize(pDevice, nullptr, true); + cmdContainer.initialize(pDevice, nullptr, true, false); EncodeMathMMIO::encodeGreaterThanPredicate(cmdContainer, 0xDEADBEEFCAF0u, 17u); diff --git a/shared/test/unit_test/gen8/test_encode_math_gen8.cpp b/shared/test/unit_test/gen8/test_encode_math_gen8.cpp index ad84c2ffc8..184f1ae911 100644 --- a/shared/test/unit_test/gen8/test_encode_math_gen8.cpp +++ b/shared/test/unit_test/gen8/test_encode_math_gen8.cpp @@ -26,7 +26,7 @@ GEN8TEST_F(CommandEncoderMathTestGen8, WhenAppendsAGreaterThanThenPredicateCorre using MI_MATH_ALU_INST_INLINE = typename FamilyType::MI_MATH_ALU_INST_INLINE; CommandContainer cmdContainer; - cmdContainer.initialize(pDevice, nullptr, true); + cmdContainer.initialize(pDevice, nullptr, true, false); EncodeMathMMIO::encodeGreaterThanPredicate(cmdContainer, 0xDEADBEEFCAF0u, 17u); diff --git a/shared/test/unit_test/gen9/test_command_encoder_gen9.cpp b/shared/test/unit_test/gen9/test_command_encoder_gen9.cpp index 30a8cbb736..7d651e669e 100644 --- a/shared/test/unit_test/gen9/test_command_encoder_gen9.cpp +++ b/shared/test/unit_test/gen9/test_command_encoder_gen9.cpp @@ -23,7 +23,7 @@ using CommandEncoderTest = Test; GEN9TEST_F(CommandEncoderTest, WhenProgrammingThenLoadRegisterImmIsUsed) { CommandContainer cmdContainer; - cmdContainer.initialize(pDevice, nullptr, true); + cmdContainer.initialize(pDevice, nullptr, true, false); EncodeL3State::encode(cmdContainer, false); GenCmdList commands; @@ -36,7 +36,7 @@ GEN9TEST_F(CommandEncoderTest, WhenProgrammingThenLoadRegisterImmIsUsed) { GEN9TEST_F(CommandEncoderTest, givenNoSlmThenCorrectMmioIsSet) { CommandContainer cmdContainer; - cmdContainer.initialize(pDevice, nullptr, true); + cmdContainer.initialize(pDevice, nullptr, true, false); EncodeL3State::encode(cmdContainer, false); GenCmdList commands; @@ -54,7 +54,7 @@ GEN9TEST_F(CommandEncoderTest, givenNoSlmThenCorrectMmioIsSet) { GEN9TEST_F(CommandEncoderTest, givenSlmThenCorrectMmioIsSet) { CommandContainer cmdContainer; - cmdContainer.initialize(pDevice, nullptr, true); + cmdContainer.initialize(pDevice, nullptr, true, false); EncodeL3State::encode(cmdContainer, true); GenCmdList commands; diff --git a/shared/test/unit_test/gen9/test_encode_math_gen9.cpp b/shared/test/unit_test/gen9/test_encode_math_gen9.cpp index 941750fff0..da20bb17e2 100644 --- a/shared/test/unit_test/gen9/test_encode_math_gen9.cpp +++ b/shared/test/unit_test/gen9/test_encode_math_gen9.cpp @@ -26,7 +26,7 @@ GEN9TEST_F(CommandEncoderMathTestGen9, WhenAppendsAGreaterThanThenPredicateCorre using MI_MATH_ALU_INST_INLINE = typename FamilyType::MI_MATH_ALU_INST_INLINE; CommandContainer cmdContainer; - cmdContainer.initialize(pDevice, nullptr, true); + cmdContainer.initialize(pDevice, nullptr, true, false); EncodeMathMMIO::encodeGreaterThanPredicate(cmdContainer, 0xDEADBEEFCAF0u, 17u); diff --git a/shared/test/unit_test/os_interface/linux/drm_command_stream_tests_1.cpp b/shared/test/unit_test/os_interface/linux/drm_command_stream_tests_1.cpp index 2f40512cd7..9e0d124f97 100644 --- a/shared/test/unit_test/os_interface/linux/drm_command_stream_tests_1.cpp +++ b/shared/test/unit_test/os_interface/linux/drm_command_stream_tests_1.cpp @@ -674,6 +674,7 @@ struct DrmCommandStreamDirectSubmissionTest : public DrmCommandStreamEnhancedTes void setUpT() { DebugManager.flags.EnableDirectSubmission.set(1u); DebugManager.flags.DirectSubmissionDisableMonitorFence.set(0); + DebugManager.flags.DirectSubmissionFlatRingBuffer.set(0); DrmCommandStreamEnhancedTest::setUpT(); auto hwInfo = device->getRootDeviceEnvironment().getMutableHardwareInfo(); auto engineType = device->getDefaultEngine().osContext->getEngineType(); diff --git a/shared/test/unit_test/os_interface/linux/drm_memory_manager_tests.cpp b/shared/test/unit_test/os_interface/linux/drm_memory_manager_tests.cpp index a3465c34fa..d795732c8d 100644 --- a/shared/test/unit_test/os_interface/linux/drm_memory_manager_tests.cpp +++ b/shared/test/unit_test/os_interface/linux/drm_memory_manager_tests.cpp @@ -3798,7 +3798,7 @@ TEST(DrmMemoryManager, givenEnabledResourceRegistrationWhenSshIsAllocatedThenItI auto device = std::unique_ptr(MockDevice::create(executionEnvironment, 0)); CommandContainer cmdContainer; - cmdContainer.initialize(device.get(), nullptr, true); + cmdContainer.initialize(device.get(), nullptr, true, false); auto *ssh = cmdContainer.getIndirectHeap(NEO::HeapType::SURFACE_STATE); auto bo = static_cast(ssh->getGraphicsAllocation())->getBO(); diff --git a/shared/test/unit_test/os_interface/windows/device_command_stream_tests.cpp b/shared/test/unit_test/os_interface/windows/device_command_stream_tests.cpp index 8531acfe99..7f7230690d 100644 --- a/shared/test/unit_test/os_interface/windows/device_command_stream_tests.cpp +++ b/shared/test/unit_test/os_interface/windows/device_command_stream_tests.cpp @@ -1135,6 +1135,7 @@ HWTEST_TEMPLATED_F(WddmCommandStreamMockGdiTest, givenDirectSubmissionEnabledOnR auto mockCsr = static_cast *>(csr); DebugManager.flags.EnableDirectSubmission.set(1); + DebugManager.flags.DirectSubmissionFlatRingBuffer.set(0); auto hwInfo = device->getRootDeviceEnvironment().getMutableHardwareInfo(); hwInfo->capabilityTable.directSubmissionEngines.data[aub_stream::ENGINE_RCS].engineSupported = true; @@ -1178,6 +1179,7 @@ HWTEST_TEMPLATED_F(WddmCommandStreamMockGdiTest, givenDirectSubmissionEnabledOnB auto mockCsr = static_cast *>(csr); DebugManager.flags.EnableDirectSubmission.set(1); + DebugManager.flags.DirectSubmissionFlatRingBuffer.set(0); auto hwInfo = device->getRootDeviceEnvironment().getMutableHardwareInfo(); hwInfo->capabilityTable.directSubmissionEngines.data[aub_stream::ENGINE_BCS].engineSupported = true;