diff --git a/level_zero/core/source/cmdlist/cmdlist_hw_xehp_and_later.inl b/level_zero/core/source/cmdlist/cmdlist_hw_xehp_and_later.inl index 2e1891db2f..7f44652705 100644 --- a/level_zero/core/source/cmdlist/cmdlist_hw_xehp_and_later.inl +++ b/level_zero/core/source/cmdlist/cmdlist_hw_xehp_and_later.inl @@ -520,7 +520,7 @@ void CommandListCoreFamily::appendEventForProfilingAllWalkers(Eve template void CommandListCoreFamily::appendDispatchOffsetRegister(bool workloadPartitionEvent, bool beforeProfilingCmds) { - if (workloadPartitionEvent && NEO::ApiSpecificConfig::isDynamicPostSyncAllocLayoutEnabled()) { + if (workloadPartitionEvent && !device->getL0GfxCoreHelper().hasUnifiedPostSyncAllocationLayout()) { auto offset = beforeProfilingCmds ? NEO::ImplicitScalingDispatch::getTimeStampPostSyncOffset() : NEO::ImplicitScalingDispatch::getImmediateWritePostSyncOffset(); NEO::ImplicitScalingDispatch::dispatchOffsetRegister(*commandContainer.getCommandStream(), offset); diff --git a/level_zero/core/source/event/event.cpp b/level_zero/core/source/event/event.cpp index 715a1f9ed0..c4f6d45dc9 100644 --- a/level_zero/core/source/event/event.cpp +++ b/level_zero/core/source/event/event.cpp @@ -39,6 +39,9 @@ namespace L0 { template Event *Event::create(EventPool *, const ze_event_desc_t *, Device *); template Event *Event::create(EventPool *, const ze_event_desc_t *, Device *); +template struct EventImp; +template struct EventImp; + ze_result_t EventPool::initialize(DriverHandle *driver, Context *context, uint32_t numDevices, ze_device_handle_t *deviceHandles) { this->context = static_cast(context); diff --git a/level_zero/core/source/event/event_imp.h b/level_zero/core/source/event/event_imp.h index aeec9b6287..e988290ace 100644 --- a/level_zero/core/source/event/event_imp.h +++ b/level_zero/core/source/event/event_imp.h @@ -26,19 +26,7 @@ class KernelEventCompletionData : public NEO::TimestampPackets struct EventImp : public Event { - EventImp(EventPool *eventPool, int index, Device *device, bool tbxMode) - : Event(eventPool, index, device), tbxMode(tbxMode) { - contextStartOffset = NEO::TimestampPackets::getContextStartOffset(); - contextEndOffset = NEO::TimestampPackets::getContextEndOffset(); - globalStartOffset = NEO::TimestampPackets::getGlobalStartOffset(); - globalEndOffset = NEO::TimestampPackets::getGlobalEndOffset(); - timestampSizeInDw = (sizeof(TagSizeT) / sizeof(uint32_t)); - singlePacketSize = NEO::TimestampPackets::getSinglePacketSize(); - - if (NEO::ApiSpecificConfig::isDynamicPostSyncAllocLayoutEnabled()) { - singlePacketSize = sizeof(uint64_t); - } - } + EventImp(EventPool *eventPool, int index, Device *device, bool tbxMode); ~EventImp() override {} diff --git a/level_zero/core/source/event/event_impl.inl b/level_zero/core/source/event/event_impl.inl index 4cf5b7752c..28c063ff5e 100644 --- a/level_zero/core/source/event/event_impl.inl +++ b/level_zero/core/source/event/event_impl.inl @@ -58,7 +58,7 @@ Event *Event::create(EventPool *eventPool, const ze_event_desc_t *desc, Device * event->kernelEventCompletionData = std::make_unique[]>(event->maxKernelCount); - bool useContextEndOffset = eventPool->isImplicitScalingCapableFlagSet() && !NEO::ApiSpecificConfig::isDynamicPostSyncAllocLayoutEnabled(); + bool useContextEndOffset = false; int32_t overrideUseContextEndOffset = NEO::debugManager.flags.UseContextEndOffsetForEventCompletion.get(); if (overrideUseContextEndOffset != -1) { useContextEndOffset = !!overrideUseContextEndOffset; @@ -114,6 +114,17 @@ Event *Event::create(EventPool *eventPool, const ze_event_desc_t *desc, Device * return event; } +template +EventImp::EventImp(EventPool *eventPool, int index, Device *device, bool tbxMode) + : Event(eventPool, index, device), tbxMode(tbxMode) { + contextStartOffset = NEO::TimestampPackets::getContextStartOffset(); + contextEndOffset = NEO::TimestampPackets::getContextEndOffset(); + globalStartOffset = NEO::TimestampPackets::getGlobalStartOffset(); + globalEndOffset = NEO::TimestampPackets::getGlobalEndOffset(); + timestampSizeInDw = (sizeof(TagSizeT) / sizeof(uint32_t)); + singlePacketSize = device->getL0GfxCoreHelper().getImmediateWritePostSyncOffset(); +} + template ze_result_t EventImp::calculateProfilingData() { constexpr uint32_t skipL3EventPacketIndex = 2u; diff --git a/level_zero/core/source/gfx_core_helpers/l0_gfx_core_helper.h b/level_zero/core/source/gfx_core_helpers/l0_gfx_core_helper.h index ebb48ffabb..2e69e30dca 100644 --- a/level_zero/core/source/gfx_core_helpers/l0_gfx_core_helper.h +++ b/level_zero/core/source/gfx_core_helpers/l0_gfx_core_helper.h @@ -86,6 +86,8 @@ class L0GfxCoreHelper : public NEO::ApiGfxCoreHelper { virtual bool platformSupportsImmediateComputeFlushTask() const = 0; virtual zet_debug_regset_type_intel_gpu_t getRegsetTypeForLargeGrfDetection() const = 0; virtual uint32_t getCmdListWaitOnMemoryDataSize() const = 0; + virtual bool hasUnifiedPostSyncAllocationLayout() const = 0; + virtual uint32_t getImmediateWritePostSyncOffset() const = 0; protected: L0GfxCoreHelper() = default; @@ -125,6 +127,8 @@ class L0GfxCoreHelperHw : public L0GfxCoreHelper { bool platformSupportsImmediateComputeFlushTask() const override; zet_debug_regset_type_intel_gpu_t getRegsetTypeForLargeGrfDetection() const override; uint32_t getCmdListWaitOnMemoryDataSize() const override; + bool hasUnifiedPostSyncAllocationLayout() const override; + uint32_t getImmediateWritePostSyncOffset() const override; protected: L0GfxCoreHelperHw() = default; diff --git a/level_zero/core/source/gfx_core_helpers/l0_gfx_core_helper_base.inl b/level_zero/core/source/gfx_core_helpers/l0_gfx_core_helper_base.inl index 8015a1a764..ed5d03c06e 100644 --- a/level_zero/core/source/gfx_core_helpers/l0_gfx_core_helper_base.inl +++ b/level_zero/core/source/gfx_core_helpers/l0_gfx_core_helper_base.inl @@ -5,6 +5,7 @@ * */ +#include "shared/source/command_container/implicit_scaling.h" #include "shared/source/debug_settings/debug_settings_manager.h" #include "shared/source/helpers/gfx_core_helper.h" @@ -53,4 +54,14 @@ uint32_t L0GfxCoreHelperHw::getCmdListWaitOnMemoryDataSize() const { } } +template +bool L0GfxCoreHelperHw::hasUnifiedPostSyncAllocationLayout() const { + return false; +} + +template +uint32_t L0GfxCoreHelperHw::getImmediateWritePostSyncOffset() const { + return NEO::ImplicitScalingDispatch::getImmediateWritePostSyncOffset(); +} + } // namespace L0 diff --git a/level_zero/core/source/helpers/api_specific_config_l0.cpp b/level_zero/core/source/helpers/api_specific_config_l0.cpp index 86482e36ce..19e67ea85b 100644 --- a/level_zero/core/source/helpers/api_specific_config_l0.cpp +++ b/level_zero/core/source/helpers/api_specific_config_l0.cpp @@ -37,10 +37,6 @@ bool ApiSpecificConfig::isDeviceAllocationCacheEnabled() { return false; } -bool ApiSpecificConfig::isDynamicPostSyncAllocLayoutEnabled() { - return (NEO::debugManager.flags.EnableDynamicPostSyncAllocLayout.get() != 0); -} - ApiSpecificConfig::ApiType ApiSpecificConfig::getApiType() { return ApiSpecificConfig::L0; } diff --git a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_barrier.cpp b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_barrier.cpp index 49e823d7c7..a983586d2f 100644 --- a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_barrier.cpp +++ b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_barrier.cpp @@ -13,6 +13,7 @@ #include "level_zero/core/source/cmdlist/cmdlist_hw_immediate.h" #include "level_zero/core/source/event/event.h" +#include "level_zero/core/source/gfx_core_helpers/l0_gfx_core_helper.h" #include "level_zero/core/test/unit_tests/fixtures/cmdlist_fixture.inl" #include "level_zero/core/test/unit_tests/fixtures/device_fixture.h" @@ -307,6 +308,8 @@ struct MultiTileCommandListAppendBarrierFixture : public MultiTileCommandListFix using MI_MATH = typename FamilyType::MI_MATH; using MI_STORE_REGISTER_MEM = typename FamilyType::MI_STORE_REGISTER_MEM; + auto &rootDeviceEnv = device->getNEODevice()->getRootDeviceEnvironment(); + ze_event_pool_desc_t eventPoolDesc = {}; eventPoolDesc.flags = ZE_EVENT_POOL_FLAG_HOST_VISIBLE | ZE_EVENT_POOL_FLAG_KERNEL_TIMESTAMP; eventPoolDesc.count = 2; @@ -354,7 +357,7 @@ struct MultiTileCommandListAppendBarrierFixture : public MultiTileCommandListFix } size_t postBarrierSynchronization = NEO::MemorySynchronizationCommands::getSizeForSingleBarrier(false) + - NEO::MemorySynchronizationCommands::getSizeForSingleAdditionalSynchronization(device->getNEODevice()->getRootDeviceEnvironment()); + NEO::MemorySynchronizationCommands::getSizeForSingleAdditionalSynchronization(rootDeviceEnv); size_t stopRegisters = timestampRegisters + postBarrierSynchronization; auto useSizeBefore = cmdListStream->getUsed(); @@ -363,8 +366,10 @@ struct MultiTileCommandListAppendBarrierFixture : public MultiTileCommandListFix ASSERT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_EQ(2u, eventTimeStamp->getPacketsInUse()); + auto unifiedPostSyncLayout = device->getL0GfxCoreHelper().hasUnifiedPostSyncAllocationLayout(); + size_t totaSizedBarrierWithTimestampEvent = multiTileBarrierSize + timestampRegisters + stopRegisters; - if (NEO::ApiSpecificConfig::isDynamicPostSyncAllocLayoutEnabled()) { + if (!unifiedPostSyncLayout) { totaSizedBarrierWithTimestampEvent += 4 * sizeof(MI_LOAD_REGISTER_IMM); } @@ -374,7 +379,7 @@ struct MultiTileCommandListAppendBarrierFixture : public MultiTileCommandListFix GenCmdList cmdList; auto registersSizeToParse = timestampRegisters; - if (NEO::ApiSpecificConfig::isDynamicPostSyncAllocLayoutEnabled()) { + if (!unifiedPostSyncLayout) { registersSizeToParse += sizeof(MI_LOAD_REGISTER_IMM); } @@ -389,7 +394,7 @@ struct MultiTileCommandListAppendBarrierFixture : public MultiTileCommandListFix true); auto barrierOffset = timestampRegisters; - if (NEO::ApiSpecificConfig::isDynamicPostSyncAllocLayoutEnabled()) { + if (!unifiedPostSyncLayout) { barrierOffset += 2 * sizeof(MI_LOAD_REGISTER_IMM); } diff --git a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_launch_kernel_2.cpp b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_launch_kernel_2.cpp index 24eb69471b..608173a676 100644 --- a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_launch_kernel_2.cpp +++ b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_launch_kernel_2.cpp @@ -1517,8 +1517,6 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, MultiTileCommandListAppendLaunchKernelXeHpCoreTest, using DefaultWalkerType = typename FamilyType::DefaultWalkerType; using POSTSYNC_DATA = typename FamilyType::POSTSYNC_DATA; - debugManager.flags.EnableDynamicPostSyncAllocLayout.set(1); - ze_event_pool_desc_t eventPoolDesc = {}; eventPoolDesc.stype = ZE_STRUCTURE_TYPE_EVENT_POOL_DESC; eventPoolDesc.flags = ZE_EVENT_POOL_FLAG_HOST_VISIBLE; diff --git a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_signal_event.cpp b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_signal_event.cpp index bb3a2f2695..7242617ee1 100644 --- a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_signal_event.cpp +++ b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_signal_event.cpp @@ -14,6 +14,7 @@ #include "shared/test/common/test_macros/hw_test.h" #include "level_zero/core/source/cmdlist/cmdlist_hw_immediate.h" +#include "level_zero/core/source/gfx_core_helpers/l0_gfx_core_helper.h" #include "level_zero/core/test/unit_tests/fixtures/cmdlist_fixture.inl" #include "level_zero/core/test/unit_tests/fixtures/device_fixture.h" #include "level_zero/core/test/unit_tests/mocks/mock_cmdlist.h" @@ -264,8 +265,14 @@ HWTEST2_F(CommandListAppendUsedPacketSignalEvent, givenMultiTileAndDynamicPostSy commandList->partitionCount = 2; EXPECT_EQ(ZE_RESULT_SUCCESS, commandList->appendSignalEvent(event->toHandle())); - size_t expectedSize = NEO::MemorySynchronizationCommands::getSizeForBarrierWithPostSyncOperation(device->getNEODevice()->getRootDeviceEnvironment(), false) + - (2 * sizeof(MI_LOAD_REGISTER_IMM)); + size_t expectedSize = NEO::MemorySynchronizationCommands::getSizeForBarrierWithPostSyncOperation(device->getNEODevice()->getRootDeviceEnvironment(), false); + + auto unifiedPostSyncLayout = device->getL0GfxCoreHelper().hasUnifiedPostSyncAllocationLayout(); + + if (!unifiedPostSyncLayout) { + expectedSize += (2 * sizeof(MI_LOAD_REGISTER_IMM)); + } + size_t usedSize = cmdStream->getUsed() - offset; EXPECT_EQ(expectedSize, usedSize); @@ -286,10 +293,15 @@ HWTEST2_F(CommandListAppendUsedPacketSignalEvent, givenMultiTileAndDynamicPostSy auto endLriItor = cmdList.rbegin(); lriCmd = genCmdCast(*endLriItor); - ASSERT_NE(nullptr, lriCmd); - EXPECT_EQ(NEO::PartitionRegisters::addressOffsetCCSOffset, lriCmd->getRegisterOffset()); - EXPECT_EQ(NEO::ImplicitScalingDispatch::getImmediateWritePostSyncOffset(), lriCmd->getDataDword()); + if (unifiedPostSyncLayout) { + EXPECT_EQ(nullptr, lriCmd); + } else { + ASSERT_NE(nullptr, lriCmd); + + EXPECT_EQ(NEO::PartitionRegisters::addressOffsetCCSOffset, lriCmd->getRegisterOffset()); + EXPECT_EQ(NEO::ImplicitScalingDispatch::getImmediateWritePostSyncOffset(), lriCmd->getDataDword()); + } } event->setEventTimestampFlag(false); diff --git a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_copy_event_xehp_and_later.cpp b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_copy_event_xehp_and_later.cpp index 538e00184a..7c481e2e42 100644 --- a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_copy_event_xehp_and_later.cpp +++ b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_copy_event_xehp_and_later.cpp @@ -1261,14 +1261,10 @@ HWTEST2_F(MultiTileAppendMemoryCopyXeHpAndLaterMultiPacket, IsXeHpOrXeHpgCore) { arg.expectedPacketsInUse = 8; arg.expectedKernelCount = 3; - arg.expectedWalkerPostSyncOp = 3; + arg.expectedWalkerPostSyncOp = 1; arg.expectedPostSyncPipeControls = 1; arg.postSyncAddressZero = false; - if (NEO::ApiSpecificConfig::isDynamicPostSyncAllocLayoutEnabled()) { - arg.expectedWalkerPostSyncOp = 1; - } - input.srcPtr = reinterpret_cast(0x1231); input.dstPtr = reinterpret_cast(0x200002345); input.size = 0x100002345; @@ -1307,14 +1303,10 @@ HWTEST2_F(MultiTileAppendMemoryCopyXeHpAndLaterMultiPacket, IsXeHpOrXeHpgCore) { arg.expectedPacketsInUse = 4; arg.expectedKernelCount = 1; - arg.expectedWalkerPostSyncOp = 3; + arg.expectedWalkerPostSyncOp = 1; arg.expectedPostSyncPipeControls = 1; arg.postSyncAddressZero = false; - if (NEO::ApiSpecificConfig::isDynamicPostSyncAllocLayoutEnabled()) { - arg.expectedWalkerPostSyncOp = 1; - } - input.srcPtr = reinterpret_cast(0x1000); input.dstPtr = reinterpret_cast(0x20000000); input.size = 0x100000000; @@ -1446,14 +1438,10 @@ HWTEST2_F(MultiTileAppendMemoryCopyXeHpAndLaterSinglePacket, IsXeHpOrXeHpgCore) { arg.expectedPacketsInUse = 4; arg.expectedKernelCount = 1; - arg.expectedWalkerPostSyncOp = 3; + arg.expectedWalkerPostSyncOp = 1; arg.expectedPostSyncPipeControls = 1; arg.postSyncAddressZero = false; - if (NEO::ApiSpecificConfig::isDynamicPostSyncAllocLayoutEnabled()) { - arg.expectedWalkerPostSyncOp = 1; - } - input.srcPtr = reinterpret_cast(0x1000); input.dstPtr = reinterpret_cast(0x20000000); input.size = 0x100000000; diff --git a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_fill_event_xehp_and_later.cpp b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_fill_event_xehp_and_later.cpp index 54e37e2827..8980708066 100644 --- a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_fill_event_xehp_and_later.cpp +++ b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_fill_event_xehp_and_later.cpp @@ -935,10 +935,7 @@ HWTEST2_F(MultiTileAppendFillEventMultiPacketTest, // two kernels and each kernel uses two packets (for two tiles), in total 4 arg.expectedPacketsInUse = 4; arg.expectedKernelCount = 2; - arg.expectedWalkerPostSyncOp = 3; - if (NEO::ApiSpecificConfig::isDynamicPostSyncAllocLayoutEnabled()) { - arg.expectedWalkerPostSyncOp = 1; - } + arg.expectedWalkerPostSyncOp = 1; arg.expectedPostSyncPipeControls = 0; if (NEO::MemorySynchronizationCommands::getDcFlushEnable(true, input.device->getNEODevice()->getRootDeviceEnvironment())) { @@ -991,15 +988,11 @@ HWTEST2_F(MultiTileAppendFillEventMultiPacketTest, // kernel uses 4 packets, in addition to kernel two packets, use 2 packets to two tile cache flush arg.expectedPacketsInUse = 4; arg.expectedKernelCount = 1; - arg.expectedWalkerPostSyncOp = 3; + arg.expectedWalkerPostSyncOp = 1; // cache flush with event signal arg.expectedPostSyncPipeControls = 1; arg.postSyncAddressZero = false; - if (NEO::ApiSpecificConfig::isDynamicPostSyncAllocLayoutEnabled()) { - arg.expectedWalkerPostSyncOp = 1; - } - input.eventPoolFlags = 0; if (input.signalAllPackets) { @@ -1216,10 +1209,7 @@ HWTEST2_F(MultiTileAppendFillCompactL3EventTest, } else { arg.expectedPacketsInUse = 4; arg.expectedKernelCount = 2; - arg.expectedWalkerPostSyncOp = 3; - if (NEO::ApiSpecificConfig::isDynamicPostSyncAllocLayoutEnabled()) { - arg.expectedWalkerPostSyncOp = 1; - } + arg.expectedWalkerPostSyncOp = 1; arg.expectedPostSyncPipeControls = 0; arg.postSyncAddressZero = false; input.storeDataImmOffset = arg.expectedPacketsInUse * testEvent->getSinglePacketSize(); diff --git a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_xehp_and_later.cpp b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_xehp_and_later.cpp index 7f4a426ea8..4ff0a2f63d 100644 --- a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_xehp_and_later.cpp +++ b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_xehp_and_later.cpp @@ -94,21 +94,6 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, CommandListTests, whenCommandListIsCreatedThenPCAnd EXPECT_TRUE(cmdSba->getDisableSupportForMultiGpuAtomicsForStatelessAccesses()); } -HWTEST2_F(CommandListTests, givenDebugFlagSetWhenCallingRegisterOffsetThenDontProgramMmio, IsAtLeastXeHpCore) { - DebugManagerStateRestore restorer; - debugManager.flags.EnableDynamicPostSyncAllocLayout.set(0); - - auto pCommandList = std::make_unique>>(); - pCommandList->initialize(device, NEO::EngineGroupType::compute, 0u); - auto &commandContainer = pCommandList->getCmdContainer(); - - auto offset = commandContainer.getCommandStream()->getUsed(); - - pCommandList->appendDispatchOffsetRegister(true, true); - - EXPECT_EQ(offset, commandContainer.getCommandStream()->getUsed()); -} - HWTEST2_F(CommandListTests, whenCommandListIsCreatedAndProgramExtendedPipeControlPriorToNonPipelinedStateCommandIsEnabledThenPCAndStateBaseAddressCmdsAreAddedAndCorrectlyProgrammed, IsAtLeastXeHpCore) { using STATE_BASE_ADDRESS = typename FamilyType::STATE_BASE_ADDRESS; using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; @@ -548,13 +533,9 @@ HWTEST2_F(CommandListAppendLaunchKernelMultiTileCompactL3FlushDisabledTest, arg.expectedKernelCount = 1; arg.expectedPacketsInUse = 4; arg.expectedPostSyncPipeControls = 1; - arg.expectedWalkerPostSyncOp = 3; + arg.expectedWalkerPostSyncOp = 1; arg.postSyncAddressZero = false; - if (NEO::ApiSpecificConfig::isDynamicPostSyncAllocLayoutEnabled()) { - arg.expectedWalkerPostSyncOp = 1; - } - input.eventPoolFlags = 0; testAppendLaunchKernelAndL3Flush(input, arg); @@ -664,7 +645,7 @@ struct CommandListSignalAllEventPacketFixture : public ModuleFixture { expectedWalkerPostSyncOp = 1; } - if (NEO::ApiSpecificConfig::isDynamicPostSyncAllocLayoutEnabled() && expectedWalkerPostSyncOp == 3 && eventPoolFlags == 0 && multiTile != 0) { + if (expectedWalkerPostSyncOp == 3 && eventPoolFlags == 0 && multiTile != 0) { expectedWalkerPostSyncOp = 1; } @@ -1356,7 +1337,6 @@ HWTEST2_F(MultiTileCommandListSignalAllEventPacketTest, givenSignalPacketsEventW struct MultiTileCommandListSignalAllocLayoutTest : public MultiTileCommandListSignalAllEventPacketTest { void SetUp() override { - debugManager.flags.EnableDynamicPostSyncAllocLayout.set(1); MultiTileCommandListSignalAllEventPacketTest::SetUp(); } }; diff --git a/level_zero/core/test/unit_tests/sources/event/test_event.cpp b/level_zero/core/test/unit_tests/sources/event/test_event.cpp index 025979b42e..6bde6d38db 100644 --- a/level_zero/core/test/unit_tests/sources/event/test_event.cpp +++ b/level_zero/core/test/unit_tests/sources/event/test_event.cpp @@ -548,37 +548,6 @@ TEST_F(EventPoolIPCHandleTests, whenGettingIpcHandleForEventPoolWithDeviceAllocT using EventPoolCreateMultiDevice = Test; -HWTEST_F(EventPoolCreateMultiDevice, givenDebugFlagSetWhenCreatingEventThenUseTsPacketSize) { - debugManager.flags.EnableDynamicPostSyncAllocLayout.set(0); - - ASSERT_NE(0u, driverHandle->devices.size()); - auto device = driverHandle->devices[0]; - - auto deviceHandle = device->toHandle(); - - ze_event_pool_desc_t eventPoolDesc = {ZE_STRUCTURE_TYPE_EVENT_POOL_DESC}; - eventPoolDesc.count = 1; - - ze_result_t result = ZE_RESULT_SUCCESS; - auto eventPool = L0::EventPool::create(device->getDriverHandle(), context, 1, &deviceHandle, &eventPoolDesc, result); - EXPECT_EQ(ZE_RESULT_SUCCESS, result); - ze_event_desc_t eventDesc = {}; - ze_event_handle_t hEvent = nullptr; - - result = eventPool->createEvent(&eventDesc, &hEvent); - ASSERT_EQ(ZE_RESULT_SUCCESS, result); - - auto eventObj = Event::fromHandle(hEvent); - - constexpr size_t timestampPacketTypeSize = sizeof(typename FamilyType::TimestampPacketType); - - EXPECT_EQ(timestampPacketTypeSize * 4, eventObj->getSinglePacketSize()); - - eventObj->destroy(); - - eventPool->destroy(); -} - TEST_F(EventPoolCreateMultiDevice, whenGettingIpcHandleForEventPoolWhenHostShareableMemoryIsFalseThenUnsuportedIsReturned) { uint32_t numEvents = 4; ze_event_pool_desc_t eventPoolDesc = { @@ -3008,30 +2977,12 @@ HWTEST_F(EventSizeTests, whenCreatingEventPoolThenUseCorrectSizeAndAlignment) { EXPECT_EQ(timestampPacketTypeSize * 2, eventObj0->getContextEndOffset()); EXPECT_EQ(timestampPacketTypeSize * 3, eventObj0->getGlobalEndOffset()); - if (NEO::ApiSpecificConfig::isDynamicPostSyncAllocLayoutEnabled()) { - EXPECT_EQ(sizeof(uint64_t), eventObj0->getSinglePacketSize()); - } else { - EXPECT_EQ(timestampPacketTypeSize * 4, eventObj0->getSinglePacketSize()); - } + EXPECT_EQ(l0GfxCoreHelper.getImmediateWritePostSyncOffset(), eventObj0->getSinglePacketSize()); auto hostPtrDiff = ptrDiff(eventObj1->getHostAddress(), eventObj0->getHostAddress()); EXPECT_EQ(expectedSize, hostPtrDiff); } -HWTEST_F(EventSizeTests, givenDebugFlagSetWhenCreatingEventThenUseTsPacketSize) { - debugManager.flags.EnableDynamicPostSyncAllocLayout.set(0); - - ze_result_t result = ZE_RESULT_SUCCESS; - eventPool.reset(EventPool::create(device->getDriverHandle(), context, 1, &hDevice, &eventPoolDesc, result)); - EXPECT_EQ(ZE_RESULT_SUCCESS, result); - - createEvents(); - - constexpr size_t timestampPacketTypeSize = sizeof(typename FamilyType::TimestampPacketType); - - EXPECT_EQ(timestampPacketTypeSize * 4, eventObj0->getSinglePacketSize()); -} - HWTEST_F(EventSizeTests, givenDebugFlagwhenCreatingEventPoolThenUseCorrectSizeAndAlignment) { auto &gfxCoreHelper = device->getGfxCoreHelper(); auto &hwInfo = device->getHwInfo(); @@ -3094,9 +3045,8 @@ HWTEST_F(EventSizeTests, givenDebugFlagwhenCreatingEventPoolThenUseCorrectSizeAn } } -HWTEST_F(EventTests, givenDebugFlagSetWhenCreatingNonTimestampEventsThenPacketsSizeIsQword) { +HWTEST_F(EventTests, whenCreatingNonTimestampEventsThenPacketsSizeIsQword) { DebugManagerStateRestore restore; - debugManager.flags.EnableDynamicPostSyncAllocLayout.set(1); ze_result_t result = ZE_RESULT_SUCCESS; @@ -3122,7 +3072,7 @@ HWTEST_F(EventTests, givenDebugFlagSetWhenCreatingNonTimestampEventsThenPacketsS auto timestampSinglePacketSize = NEO::TimestampPackets::getSinglePacketSize(); EXPECT_EQ(timestampSinglePacketSize, timestampEvent->getSinglePacketSize()); - EXPECT_EQ(sizeof(uint64_t), regularEvent->getSinglePacketSize()); + EXPECT_EQ(device->getL0GfxCoreHelper().getImmediateWritePostSyncOffset(), regularEvent->getSinglePacketSize()); timestampEvent->destroy(); regularEvent->destroy(); diff --git a/level_zero/core/test/unit_tests/sources/helper/api_specific_config_l0_tests.cpp b/level_zero/core/test/unit_tests/sources/helper/api_specific_config_l0_tests.cpp index c268656a70..ba7ef6c5d6 100644 --- a/level_zero/core/test/unit_tests/sources/helper/api_specific_config_l0_tests.cpp +++ b/level_zero/core/test/unit_tests/sources/helper/api_specific_config_l0_tests.cpp @@ -43,16 +43,6 @@ TEST(ApiSpecificConfigL0Tests, WhenCheckingIfDeviceAllocationCacheIsEnabledThenR EXPECT_FALSE(ApiSpecificConfig::isDeviceAllocationCacheEnabled()); } -TEST(ApiSpecificConfigL0Tests, GivenDebugFlagSetWhenCheckingIfDynamicPostSyncAllocLayoutEnabledThenReturnFalse) { - DebugManagerStateRestore restore; - - EXPECT_TRUE(ApiSpecificConfig::isDynamicPostSyncAllocLayoutEnabled()); - - debugManager.flags.EnableDynamicPostSyncAllocLayout.set(0); - - EXPECT_FALSE(ApiSpecificConfig::isDynamicPostSyncAllocLayoutEnabled()); -} - TEST(ApiSpecificConfigL0Tests, GivenDebugFlagCombinationsGetCorrectSharedAllocPrefetchEnabled) { DebugManagerStateRestore restore; diff --git a/level_zero/core/test/unit_tests/sources/helper/l0_gfx_core_helper_tests.cpp b/level_zero/core/test/unit_tests/sources/helper/l0_gfx_core_helper_tests.cpp index 5576d13aad..1f0b21b38d 100644 --- a/level_zero/core/test/unit_tests/sources/helper/l0_gfx_core_helper_tests.cpp +++ b/level_zero/core/test/unit_tests/sources/helper/l0_gfx_core_helper_tests.cpp @@ -5,6 +5,7 @@ * */ +#include "shared/source/command_container/implicit_scaling.h" #include "shared/source/helpers/aligned_memory.h" #include "shared/source/helpers/basic_math.h" #include "shared/source/helpers/gfx_core_helper.h" @@ -887,6 +888,20 @@ HWTEST2_F(L0GfxCoreHelperTest, givenL0GfxCoreHelperOnGenPlatformsWhenGettingPlat EXPECT_FALSE(l0GfxCoreHelper.platformSupportsImmediateComputeFlushTask()); } +HWTEST_F(L0GfxCoreHelperTest, whenAskingForUnifiedPostSyncAllocLayoutThenReturnFalse) { + MockExecutionEnvironment executionEnvironment; + auto &l0GfxCoreHelper = executionEnvironment.rootDeviceEnvironments[0]->getHelper(); + + EXPECT_FALSE(l0GfxCoreHelper.hasUnifiedPostSyncAllocationLayout()); +} + +HWTEST_F(L0GfxCoreHelperTest, whenAskingForImmediateWritePostSyncOffsetThenReturnValueFromImplicitScalingHelper) { + MockExecutionEnvironment executionEnvironment; + auto &l0GfxCoreHelper = executionEnvironment.rootDeviceEnvironments[0]->getHelper(); + + EXPECT_EQ(NEO::ImplicitScalingDispatch::getImmediateWritePostSyncOffset(), l0GfxCoreHelper.getImmediateWritePostSyncOffset()); +} + TEST_F(L0GfxCoreHelperTest, givenL0GfxCoreHelperWhenGettingDefaultUseImmediateFlushTaskThenUsePlatformDefaultSetting) { MockExecutionEnvironment executionEnvironment; auto &rootDeviceEnvironment = *executionEnvironment.rootDeviceEnvironments[0].get(); diff --git a/opencl/source/helpers/api_specific_config_ocl.cpp b/opencl/source/helpers/api_specific_config_ocl.cpp index cd9e11c1ea..53e15dac66 100644 --- a/opencl/source/helpers/api_specific_config_ocl.cpp +++ b/opencl/source/helpers/api_specific_config_ocl.cpp @@ -36,10 +36,6 @@ bool ApiSpecificConfig::isDeviceAllocationCacheEnabled() { return false; } -bool ApiSpecificConfig::isDynamicPostSyncAllocLayoutEnabled() { - return false; -} - ApiSpecificConfig::ApiType ApiSpecificConfig::getApiType() { return ApiSpecificConfig::OCL; } diff --git a/opencl/test/unit_test/helpers/api_specific_config_ocl_tests.cpp b/opencl/test/unit_test/helpers/api_specific_config_ocl_tests.cpp index 7856b3a639..7d9b7b90fa 100644 --- a/opencl/test/unit_test/helpers/api_specific_config_ocl_tests.cpp +++ b/opencl/test/unit_test/helpers/api_specific_config_ocl_tests.cpp @@ -45,16 +45,6 @@ TEST(ApiSpecificConfigOclTests, WhenCheckingIfDeviceAllocationCacheIsEnabledThen EXPECT_FALSE(ApiSpecificConfig::isDeviceAllocationCacheEnabled()); } -TEST(ApiSpecificConfigOclTests, WhenCheckingIfDynamicPostSyncAllocLayoutEnabledThenReturnFalse) { - DebugManagerStateRestore restore; - - EXPECT_FALSE(ApiSpecificConfig::isDynamicPostSyncAllocLayoutEnabled()); - - debugManager.flags.EnableDynamicPostSyncAllocLayout.set(1); - - EXPECT_FALSE(ApiSpecificConfig::isDynamicPostSyncAllocLayoutEnabled()); -} - TEST(ApiSpecificConfigOclTests, givenEnableStatelessCompressionWhenProvidingSvmGpuAllocationThenPreferCompressedBuffer) { DebugManagerStateRestore dbgRestorer; debugManager.flags.RenderCompressedBuffersEnabled.set(1); diff --git a/shared/source/command_container/command_encoder_xehp_and_later.inl b/shared/source/command_container/command_encoder_xehp_and_later.inl index a1219ba956..2b1cb7b6cf 100644 --- a/shared/source/command_container/command_encoder_xehp_and_later.inl +++ b/shared/source/command_container/command_encoder_xehp_and_later.inl @@ -393,9 +393,7 @@ void EncodeDispatchKernel::encode(CommandContainer &container, EncodeDis if (args.partitionCount > 1 && !args.isInternal) { const uint64_t workPartitionAllocationGpuVa = args.device->getDefaultEngine().commandStreamReceiver->getWorkPartitionAllocationGpuAddress(); - if (args.eventAddress != 0 && !NEO::ApiSpecificConfig::isDynamicPostSyncAllocLayoutEnabled()) { - walkerCmd.getPostSync().setOperation(POSTSYNC_DATA::OPERATION_WRITE_TIMESTAMP); - } + ImplicitScalingDispatch::dispatchCommands(*listCmdBufferStream, walkerCmd, &args.outWalkerPtr, diff --git a/shared/source/command_container/implicit_scaling_xehp_and_later.inl b/shared/source/command_container/implicit_scaling_xehp_and_later.inl index 4d60bfe9ad..413b8a7e1b 100644 --- a/shared/source/command_container/implicit_scaling_xehp_and_later.inl +++ b/shared/source/command_container/implicit_scaling_xehp_and_later.inl @@ -241,10 +241,7 @@ inline void ImplicitScalingDispatch::dispatchOffsetRegister(LinearStr template inline uint32_t ImplicitScalingDispatch::getImmediateWritePostSyncOffset() { - if (ApiSpecificConfig::isDynamicPostSyncAllocLayoutEnabled()) { - return static_cast(sizeof(uint64_t)); - } - return static_cast(GfxCoreHelperHw::getSingleTimestampPacketSizeHw()); + return static_cast(sizeof(uint64_t)); } template diff --git a/shared/source/debug_settings/debug_variables_base.inl b/shared/source/debug_settings/debug_variables_base.inl index 79fd9a001c..08c7c5d04b 100644 --- a/shared/source/debug_settings/debug_variables_base.inl +++ b/shared/source/debug_settings/debug_variables_base.inl @@ -254,7 +254,6 @@ DECLARE_DEBUG_VARIABLE(int32_t, ForceTlbFlush, -1, "-1: default, 0: Tlb flush d DECLARE_DEBUG_VARIABLE(int32_t, DebugSetMemoryDiagnosticsDelay, -1, "-1: default, >=0: delay time in minutes necessary for completion of Memory diagnostics") DECLARE_DEBUG_VARIABLE(int32_t, EnableDeviceStateVerification, -1, "-1: default, 0: disable, 1: enable check of device state before submit on Windows") DECLARE_DEBUG_VARIABLE(int32_t, EnableDeviceStateVerificationAfterFailedSubmission, -1, "-1: default, 0: disable, 1: enable check of device state after failed submit on Windows") -DECLARE_DEBUG_VARIABLE(int32_t, EnableDynamicPostSyncAllocLayout, -1, "-1: default, 0: Keep Timestamp size layout, 1: Use write immediate layout (qword) and switch dynamically to TS for profiling") DECLARE_DEBUG_VARIABLE(int32_t, PrintTimestampPacketUsage, -1, "-1: default, 0: Disabled, 1: Print when TSP is allocated, initialized, returned to pool, etc.") DECLARE_DEBUG_VARIABLE(int32_t, SynchronizeEventBeforeReset, -1, "-1: default, 0: Disabled, 1: Synchronize Event completion on host before calling reset. 2: Synchronize + print extra logs.") DECLARE_DEBUG_VARIABLE(int32_t, TrackNumCsrClientsOnSyncPoints, -1, "-1: default, 0: Disabled, 1: If set, synchronization points like zeEventHostSynchronize will unregister CmdQ from CSR clients") diff --git a/shared/source/helpers/api_specific_config.h b/shared/source/helpers/api_specific_config.h index 1a1581b267..e6b18605ad 100644 --- a/shared/source/helpers/api_specific_config.h +++ b/shared/source/helpers/api_specific_config.h @@ -23,7 +23,6 @@ struct ApiSpecificConfig { static bool getGlobalBindlessHeapConfiguration(); static bool getBindlessMode(const ReleaseHelper *); static bool isDeviceAllocationCacheEnabled(); - static bool isDynamicPostSyncAllocLayoutEnabled(); static ApiType getApiType(); static std::string getName(); static uint64_t getReducedMaxAllocSize(uint64_t maxAllocSize); diff --git a/shared/test/common/test_files/igdrcl.config b/shared/test/common/test_files/igdrcl.config index b357a848ea..5a4c5ecc11 100644 --- a/shared/test/common/test_files/igdrcl.config +++ b/shared/test/common/test_files/igdrcl.config @@ -542,7 +542,6 @@ OverrideHwIpVersion = -1 PrintGlobalTimestampInNs = 0 EnableDeviceStateVerification = -1 VfBarResourceAllocationWa = 1 -EnableDynamicPostSyncAllocLayout = -1 PrintTimestampPacketUsage = -1 TrackNumCsrClientsOnSyncPoints = -1 EventTimestampRefreshIntervalInMilliSec = -1 diff --git a/shared/test/unit_test/api_specific_config_ult.cpp b/shared/test/unit_test/api_specific_config_ult.cpp index 7bd886fb75..fea9d2203d 100644 --- a/shared/test/unit_test/api_specific_config_ult.cpp +++ b/shared/test/unit_test/api_specific_config_ult.cpp @@ -40,10 +40,6 @@ bool ApiSpecificConfig::isDeviceAllocationCacheEnabled() { return false; } -bool ApiSpecificConfig::isDynamicPostSyncAllocLayoutEnabled() { - return (NEO::debugManager.flags.EnableDynamicPostSyncAllocLayout.get() == 1); -} - ApiSpecificConfig::ApiType ApiSpecificConfig::getApiType() { return apiTypeForUlts; } diff --git a/shared/test/unit_test/encoders/test_encode_dispatch_kernel_xehp_and_later.cpp b/shared/test/unit_test/encoders/test_encode_dispatch_kernel_xehp_and_later.cpp index 2cc3001bdd..f3775f4a90 100644 --- a/shared/test/unit_test/encoders/test_encode_dispatch_kernel_xehp_and_later.cpp +++ b/shared/test/unit_test/encoders/test_encode_dispatch_kernel_xehp_and_later.cpp @@ -1058,38 +1058,6 @@ struct CommandEncodeStatesImplicitScalingFixtureT : public CommandEncodeStatesFi using CommandEncodeStatesImplicitScalingFixture = CommandEncodeStatesImplicitScalingFixtureT; using CommandEncodeStatesImplicitScaling = Test; -HWCMDTEST_F(IGFX_XE_HP_CORE, CommandEncodeStatesImplicitScaling, - givenStaticPartitioningWhenNonTimestampEventProvidedThenExpectTimestampComputeWalkerPostSync) { - using DefaultWalkerType = typename FamilyType::DefaultWalkerType; - using POSTSYNC_DATA = typename FamilyType::POSTSYNC_DATA; - - uint32_t dims[] = {16, 1, 1}; - std::unique_ptr dispatchInterface(new MockDispatchKernelEncoder()); - - bool requiresUncachedMocs = false; - uint64_t eventAddress = 0xFF112233000; - EncodeDispatchKernelArgs dispatchArgs = createDefaultDispatchKernelArgs(pDevice, dispatchInterface.get(), dims, requiresUncachedMocs); - dispatchArgs.eventAddress = eventAddress; - dispatchArgs.partitionCount = 2; - - EncodeDispatchKernel::template encode(*cmdContainer.get(), dispatchArgs); - size_t usedBuffer = cmdContainer->getCommandStream()->getUsed(); - EXPECT_EQ(2u, dispatchArgs.partitionCount); - - GenCmdList partitionedWalkerList; - CmdParse::parseCommandBuffer( - partitionedWalkerList, - cmdContainer->getCommandStream()->getCpuBase(), - usedBuffer); - - auto itor = find(partitionedWalkerList.begin(), partitionedWalkerList.end()); - ASSERT_NE(itor, partitionedWalkerList.end()); - auto partitionWalkerCmd = genCmdCast(*itor); - auto &postSync = partitionWalkerCmd->getPostSync(); - EXPECT_EQ(POSTSYNC_DATA::OPERATION_WRITE_TIMESTAMP, postSync.getOperation()); - EXPECT_EQ(eventAddress, postSync.getDestinationAddress()); -} - HWCMDTEST_F(IGFX_XE_HP_CORE, CommandEncodeStatesImplicitScaling, givenCooperativeKernelWhenEncodingDispatchKernelThenExpectPartitionSizeEqualWorkgroupSize) { using DefaultWalkerType = typename FamilyType::DefaultWalkerType; using BATCH_BUFFER_START = typename FamilyType::MI_BATCH_BUFFER_START; diff --git a/shared/test/unit_test/encoders/test_implicit_scaling_xehp_and_later.cpp b/shared/test/unit_test/encoders/test_implicit_scaling_xehp_and_later.cpp index c24374b858..c8b082cff1 100644 --- a/shared/test/unit_test/encoders/test_implicit_scaling_xehp_and_later.cpp +++ b/shared/test/unit_test/encoders/test_implicit_scaling_xehp_and_later.cpp @@ -1564,15 +1564,3 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, ImplicitScalingTests, auto bbStart = reinterpret_cast(*bbStartList.begin()); EXPECT_EQ(MI_BATCH_BUFFER_START::SECOND_LEVEL_BATCH_BUFFER::SECOND_LEVEL_BATCH_BUFFER_SECOND_LEVEL_BATCH, bbStart->getSecondLevelBatchBuffer()); } - -HWCMDTEST_F(IGFX_XE_HP_CORE, ImplicitScalingTests, givenDebugFlagSetWhenCheckingImmWriteOffsetThenReturnQwordSize) { - EXPECT_EQ(static_cast(GfxCoreHelperHw::getSingleTimestampPacketSizeHw()), ImplicitScalingDispatch::getImmediateWritePostSyncOffset()); - - debugManager.flags.EnableDynamicPostSyncAllocLayout.set(1); - - if (ApiSpecificConfig::isDynamicPostSyncAllocLayoutEnabled()) { - EXPECT_EQ(static_cast(sizeof(uint64_t)), ImplicitScalingDispatch::getImmediateWritePostSyncOffset()); - } else { - EXPECT_EQ(static_cast(GfxCoreHelperHw::getSingleTimestampPacketSizeHw()), ImplicitScalingDispatch::getImmediateWritePostSyncOffset()); - } -} \ No newline at end of file