diff --git a/level_zero/core/source/cmdlist/cmdlist.h b/level_zero/core/source/cmdlist/cmdlist.h index 806c3fda4f..a45cb5b7a0 100644 --- a/level_zero/core/source/cmdlist/cmdlist.h +++ b/level_zero/core/source/cmdlist/cmdlist.h @@ -366,6 +366,7 @@ struct CommandList : _ze_command_list_handle_t { UnifiedMemoryControls unifiedMemoryControls; NEO::PrefetchContext prefetchContext; NEO::L1CachePolicy l1CachePolicyData{}; + NEO::EncodeDummyBlitWaArgs dummyBlitWa{}; int64_t currentSurfaceStateBaseAddress = NEO::StreamProperty64::initValue; int64_t currentDynamicStateBaseAddress = NEO::StreamProperty64::initValue; @@ -417,7 +418,7 @@ struct CommandList : _ze_command_list_handle_t { bool compactL3FlushEventPacket = false; bool dynamicHeapRequired = false; bool kernelWithAssertAppended = false; - NEO::EncodeDummyBlitWaArgs dummyBlitWa{}; + bool dispatchCmdListBatchBufferAsPrimary = false; }; using CommandListAllocatorFn = CommandList *(*)(uint32_t); diff --git a/level_zero/core/source/cmdlist/cmdlist_hw.inl b/level_zero/core/source/cmdlist/cmdlist_hw.inl index c45b521fbb..345838f9ae 100644 --- a/level_zero/core/source/cmdlist/cmdlist_hw.inl +++ b/level_zero/core/source/cmdlist/cmdlist_hw.inl @@ -174,6 +174,7 @@ ze_result_t CommandListCoreFamily::initialize(Device *device, NEO this->finalStreamState.initSupport(rootDeviceEnvironment); this->commandContainer.setStateBaseAddressTracking(this->stateBaseAddressTracking); this->dummyBlitWa.rootDeviceEnvironment = &(device->getNEODevice()->getRootDeviceEnvironmentRef()); + this->dispatchCmdListBatchBufferAsPrimary = L0GfxCoreHelper::dispatchCmdListBatchBufferAsPrimary(); if (device->isImplicitScalingCapable() && !this->internalUsage && !isCopyOnly()) { this->partitionCount = static_cast(this->device->getNEODevice()->getDeviceBitfield().count()); diff --git a/level_zero/core/source/cmdqueue/cmdqueue.cpp b/level_zero/core/source/cmdqueue/cmdqueue.cpp index 7ab3ad13c2..22b9fca345 100644 --- a/level_zero/core/source/cmdqueue/cmdqueue.cpp +++ b/level_zero/core/source/cmdqueue/cmdqueue.cpp @@ -98,6 +98,7 @@ ze_result_t CommandQueueImp::initialize(bool copyOnly, bool isInternal) { auto &productHelper = rootDeviceEnvironment.getHelper(); this->doubleSbaWa = productHelper.isAdditionalStateBaseAddressWARequired(hwInfo); this->cmdListHeapAddressModel = L0GfxCoreHelper::getHeapAddressModel(rootDeviceEnvironment); + this->dispatchCmdListBatchBufferAsPrimary = L0GfxCoreHelper::dispatchCmdListBatchBufferAsPrimary(); } return returnValue; } diff --git a/level_zero/core/source/cmdqueue/cmdqueue.h b/level_zero/core/source/cmdqueue/cmdqueue.h index ba64e687bc..8b044d7c06 100644 --- a/level_zero/core/source/cmdqueue/cmdqueue.h +++ b/level_zero/core/source/cmdqueue/cmdqueue.h @@ -82,6 +82,7 @@ struct CommandQueue : _ze_command_queue_handle_t { bool stateComputeModeTracking = false; bool stateBaseAddressTracking = false; bool doubleSbaWa = false; + bool dispatchCmdListBatchBufferAsPrimary = false; }; using CommandQueueAllocatorFn = CommandQueue *(*)(Device *device, NEO::CommandStreamReceiver *csr, diff --git a/level_zero/core/source/gfx_core_helpers/l0_gfx_core_helper.cpp b/level_zero/core/source/gfx_core_helpers/l0_gfx_core_helper.cpp index 526ed154d0..0b6833a2e4 100644 --- a/level_zero/core/source/gfx_core_helpers/l0_gfx_core_helper.cpp +++ b/level_zero/core/source/gfx_core_helpers/l0_gfx_core_helper.cpp @@ -103,6 +103,14 @@ NEO::HeapAddressModel L0GfxCoreHelper::getHeapAddressModel(const NEO::RootDevice return l0GfxCoreHelper.getPlatformHeapAddressModel(); } +bool L0GfxCoreHelper::dispatchCmdListBatchBufferAsPrimary() { + bool defaultValue = false; + if (NEO::DebugManager.flags.DispatchCmdlistCmdBufferPrimary.get() != -1) { + return !!(NEO::DebugManager.flags.DispatchCmdlistCmdBufferPrimary.get()); + } + return defaultValue; +} + } // namespace L0 template <> diff --git a/level_zero/core/source/gfx_core_helpers/l0_gfx_core_helper.h b/level_zero/core/source/gfx_core_helpers/l0_gfx_core_helper.h index 054f98f46e..ea9932e92b 100644 --- a/level_zero/core/source/gfx_core_helpers/l0_gfx_core_helper.h +++ b/level_zero/core/source/gfx_core_helpers/l0_gfx_core_helper.h @@ -50,6 +50,7 @@ class L0GfxCoreHelper : public NEO::ApiGfxCoreHelper { static bool useDynamicEventPacketsCount(const NEO::HardwareInfo &hwInfo); static bool useSignalAllEventPackets(const NEO::HardwareInfo &hwInfo); static NEO::HeapAddressModel getHeapAddressModel(const NEO::RootDeviceEnvironment &rootDeviceEnvironment); + static bool dispatchCmdListBatchBufferAsPrimary(); virtual void setAdditionalGroupProperty(ze_command_queue_group_properties_t &groupProperty, NEO::EngineGroupT &group) const = 0; virtual L0::Event *createEvent(L0::EventPool *eventPool, const ze_event_desc_t *desc, L0::Device *device) const = 0; diff --git a/level_zero/core/test/unit_tests/mocks/mock_cmdlist.h b/level_zero/core/test/unit_tests/mocks/mock_cmdlist.h index 401ed307b0..0fe61ddd58 100644 --- a/level_zero/core/test/unit_tests/mocks/mock_cmdlist.h +++ b/level_zero/core/test/unit_tests/mocks/mock_cmdlist.h @@ -59,6 +59,7 @@ struct WhiteBox<::L0::CommandListCoreFamily> using BaseClass::currentIndirectObjectBaseAddress; using BaseClass::currentSurfaceStateBaseAddress; using BaseClass::device; + using BaseClass::dispatchCmdListBatchBufferAsPrimary; using BaseClass::doubleSbaWa; using BaseClass::engineGroupType; using BaseClass::estimateBufferSizeMultiTileBarrier; @@ -210,6 +211,7 @@ struct WhiteBox<::L0::CommandList> : public ::L0::CommandListImp { using BaseClass::currentIndirectObjectBaseAddress; using BaseClass::currentSurfaceStateBaseAddress; using BaseClass::device; + using BaseClass::dispatchCmdListBatchBufferAsPrimary; using BaseClass::doubleSbaWa; using BaseClass::finalStreamState; using BaseClass::frontEndStateTracking; diff --git a/level_zero/core/test/unit_tests/mocks/mock_cmdqueue.h b/level_zero/core/test/unit_tests/mocks/mock_cmdqueue.h index 56acfb13bd..6bcb09cafa 100644 --- a/level_zero/core/test/unit_tests/mocks/mock_cmdqueue.h +++ b/level_zero/core/test/unit_tests/mocks/mock_cmdqueue.h @@ -35,6 +35,7 @@ struct WhiteBox<::L0::CommandQueue> : public ::L0::CommandQueueImp { using BaseClass::taskCount; using CommandQueue::activeSubDevices; using CommandQueue::cmdListHeapAddressModel; + using CommandQueue::dispatchCmdListBatchBufferAsPrimary; using CommandQueue::doubleSbaWa; using CommandQueue::frontEndStateTracking; using CommandQueue::internalUsage; @@ -71,6 +72,7 @@ struct MockCommandQueueHw : public L0::CommandQueueHw { using BaseClass::printfKernelContainer; using L0::CommandQueue::activeSubDevices; using L0::CommandQueue::cmdListHeapAddressModel; + using L0::CommandQueue::dispatchCmdListBatchBufferAsPrimary; using L0::CommandQueue::doubleSbaWa; using L0::CommandQueue::frontEndStateTracking; using L0::CommandQueue::internalUsage; diff --git a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_1.cpp b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_1.cpp index 94c4662390..59bb371b67 100644 --- a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_1.cpp +++ b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_1.cpp @@ -2435,6 +2435,9 @@ TEST_F(CommandListCreate, givenCreatedCommandListWhenGettingTrackingFlagsThenDef auto expectedHeapAddressModel = l0GfxCoreHelper.getPlatformHeapAddressModel(); EXPECT_EQ(expectedHeapAddressModel, commandList->getCmdListHeapAddressModel()); EXPECT_EQ(expectedHeapAddressModel, commandList->getCmdContainer().getHeapAddressModel()); + + auto expectedDispatchCmdListBatchBufferAsPrimary = L0GfxCoreHelper::dispatchCmdListBatchBufferAsPrimary(); + EXPECT_EQ(expectedDispatchCmdListBatchBufferAsPrimary, commandList->dispatchCmdListBatchBufferAsPrimary); } } // namespace ult diff --git a/level_zero/core/test/unit_tests/sources/cmdqueue/test_cmdqueue_1.cpp b/level_zero/core/test/unit_tests/sources/cmdqueue/test_cmdqueue_1.cpp index 92231f797c..a687152368 100644 --- a/level_zero/core/test/unit_tests/sources/cmdqueue/test_cmdqueue_1.cpp +++ b/level_zero/core/test/unit_tests/sources/cmdqueue/test_cmdqueue_1.cpp @@ -2010,6 +2010,9 @@ TEST_F(CommandQueueCreate, givenCreatedCommandQueueWhenGettingTrackingFlagsThenD auto expectedHeapAddressModel = l0GfxCoreHelper.getPlatformHeapAddressModel(); EXPECT_EQ(expectedHeapAddressModel, commandQueue->cmdListHeapAddressModel); + auto expectedDispatchCmdListBatchBufferAsPrimary = L0GfxCoreHelper::dispatchCmdListBatchBufferAsPrimary(); + EXPECT_EQ(expectedDispatchCmdListBatchBufferAsPrimary, commandQueue->dispatchCmdListBatchBufferAsPrimary); + commandQueue->destroy(); } diff --git a/level_zero/core/test/unit_tests/sources/helper/l0_gfx_core_helper_tests.cpp b/level_zero/core/test/unit_tests/sources/helper/l0_gfx_core_helper_tests.cpp index 66ec45a665..cf031447ab 100644 --- a/level_zero/core/test/unit_tests/sources/helper/l0_gfx_core_helper_tests.cpp +++ b/level_zero/core/test/unit_tests/sources/helper/l0_gfx_core_helper_tests.cpp @@ -857,5 +857,17 @@ HWTEST2_F(L0GfxCoreHelperTest, givenL0GfxCoreHelperWhenGettingSupportedNumGrfsTh EXPECT_EQ(expectedValues, l0GfxCoreHelper.getSupportedNumGrfs()); } +TEST_F(L0GfxCoreHelperTest, givenL0GfxCoreHelperUsingOverrideDebugKeyWhenGettingDispatchCmdListCmdBufferPrimaryThenUseDbgKeyValue) { + DebugManagerStateRestore restorer; + + DebugManager.flags.DispatchCmdlistCmdBufferPrimary.set(0); + + EXPECT_FALSE(L0GfxCoreHelper::dispatchCmdListBatchBufferAsPrimary()); + + DebugManager.flags.DispatchCmdlistCmdBufferPrimary.set(1); + + EXPECT_TRUE(L0GfxCoreHelper::dispatchCmdListBatchBufferAsPrimary()); +} + } // namespace ult } // namespace L0 diff --git a/shared/source/debug_settings/debug_variables_base.inl b/shared/source/debug_settings/debug_variables_base.inl index d6bd5e491c..5919c13c75 100644 --- a/shared/source/debug_settings/debug_variables_base.inl +++ b/shared/source/debug_settings/debug_variables_base.inl @@ -315,6 +315,7 @@ DECLARE_DEBUG_VARIABLE(int32_t, SplitBcsMaskD2H, 0, "0: default, >0: bitmask: in DECLARE_DEBUG_VARIABLE(int32_t, ReuseKernelBinaries, -1, "-1: default, 0:disabled, 1: enabled. If enabled, driver reuses kernel binaries.") DECLARE_DEBUG_VARIABLE(int32_t, SetAmountOfReusableAllocations, -1, "-1: default, 0:disabled, > 1: enabled. If enabled, driver will fill reusable allocation lists with given amount of command buffers and heaps at initialization of immediate command list.") DECLARE_DEBUG_VARIABLE(int32_t, UseHighAlignmentForHeapExtended, -1, "-1: default, 0:disabled, > 1: enabled. If enabled, driver aligns HEAP_EXTENDED allocations to GPU VA that is next power of 2 for a given size, if disables GPU VA is using 2MB/64KB alignment.") +DECLARE_DEBUG_VARIABLE(int32_t, DispatchCmdlistCmdBufferPrimary, -1, "-1: default, 0: dispatch command buffers as seconadry, 1: dispatch command buffers as primary and chain") /*DIRECT SUBMISSION FLAGS*/ DECLARE_DEBUG_VARIABLE(int32_t, EnableDirectSubmission, -1, "-1: default (disabled), 0: disable, 1:enable. Enables direct submission of command buffers bypassing KMD") diff --git a/shared/test/common/test_files/igdrcl.config b/shared/test/common/test_files/igdrcl.config index 941fdfabb2..95b8382112 100644 --- a/shared/test/common/test_files/igdrcl.config +++ b/shared/test/common/test_files/igdrcl.config @@ -212,6 +212,7 @@ UseAsyncDrmExec = -1 EnableMultiStorageResources = -1 SelectCmdListHeapAddressModel = -1 MultiStorageGranularity = -1 +DispatchCmdlistCmdBufferPrimary = -1 MultiStoragePolicy = -1; PrintExecutionBuffer = 0 PrintBOsForSubmit = 0