From c66546df7327994c08c37c5737954d7b407b3684 Mon Sep 17 00:00:00 2001 From: "Lu, Wenbin" Date: Fri, 24 Mar 2023 21:26:48 +0000 Subject: [PATCH] Disable kernel timestamp when not using implicit scaling Related-To: LOCI-2826 Signed-off-by: Lu, Wenbin --- level_zero/core/source/event/event.cpp | 4 ++ level_zero/core/source/event/event.h | 6 +++ level_zero/core/source/event/event_impl.inl | 3 +- ...test_cmdlist_copy_event_xehp_and_later.cpp | 9 ++-- .../cmdlist/test_cmdlist_xehp_and_later.cpp | 7 +-- .../unit_tests/sources/event/test_event.cpp | 45 ++++++++++++++++--- .../xe_hpc_core/test_event_xe_hpc_core.cpp | 5 ++- 7 files changed, 59 insertions(+), 20 deletions(-) diff --git a/level_zero/core/source/event/event.cpp b/level_zero/core/source/event/event.cpp index 7c3cbf3b72..4d7bacc3b9 100644 --- a/level_zero/core/source/event/event.cpp +++ b/level_zero/core/source/event/event.cpp @@ -72,6 +72,8 @@ ze_result_t EventPool::initialize(DriverHandle *driver, Context *context, uint32 if (maxRootDeviceIndex < eventDevice->getNEODevice()->getRootDeviceIndex()) { maxRootDeviceIndex = eventDevice->getNEODevice()->getRootDeviceIndex(); } + + isImplicitScalingCapable |= eventDevice->isImplicitScalingCapable(); } rootDeviceIndices.remove_duplicates(); @@ -223,6 +225,7 @@ ze_result_t EventPool::getIpcHandle(ze_ipc_event_pool_handle_t *ipcHandle) { poolData.rootDeviceIndex = this->getDevice()->getRootDeviceIndex(); poolData.isDeviceEventPoolAllocation = this->isDeviceEventPoolAllocation; poolData.isHostVisibleEventPoolAllocation = this->isHostVisibleEventPoolAllocation; + poolData.isImplicitScalingCapable = this->isImplicitScalingCapable; poolData.maxEventPackets = this->getEventMaxPackets(); poolData.numDevices = static_cast(this->devices.size()); @@ -239,6 +242,7 @@ ze_result_t EventPool::openEventPoolIpcHandle(const ze_ipc_event_pool_handle_t & auto eventPool = std::make_unique(&desc); eventPool->isDeviceEventPoolAllocation = poolData.isDeviceEventPoolAllocation; eventPool->isHostVisibleEventPoolAllocation = poolData.isHostVisibleEventPoolAllocation; + eventPool->isImplicitScalingCapable = poolData.isImplicitScalingCapable; ze_device_handle_t *deviceHandlesUsed = deviceHandles; UNRECOVERABLE_IF(numDevices == 0); diff --git a/level_zero/core/source/event/event.h b/level_zero/core/source/event/event.h index 8f216e4494..a6ce882b74 100644 --- a/level_zero/core/source/event/event.h +++ b/level_zero/core/source/event/event.h @@ -49,6 +49,7 @@ struct IpcEventPoolData { uint32_t numDevices = 0; bool isDeviceEventPoolAllocation = false; bool isHostVisibleEventPoolAllocation = false; + bool isImplicitScalingCapable = false; }; #pragma pack() static_assert(sizeof(IpcEventPoolData) <= ZE_MAX_IPC_HANDLE_SIZE, "IpcEventPoolData is bigger than ZE_MAX_IPC_HANDLE_SIZE"); @@ -299,6 +300,10 @@ struct EventPool : _ze_event_pool_handle_t { return isImportedIpcPool; } + bool isImplicitScalingCapableFlagSet() const { + return isImplicitScalingCapable; + } + protected: EventPool() = default; EventPool(size_t numEvents) : numEvents(numEvents) {} @@ -323,6 +328,7 @@ struct EventPool : _ze_event_pool_handle_t { bool isHostVisibleEventPoolAllocation = false; bool isImportedIpcPool = false; bool isShareableEventMemory = false; + bool isImplicitScalingCapable = false; }; } // namespace L0 diff --git a/level_zero/core/source/event/event_impl.inl b/level_zero/core/source/event/event_impl.inl index 6ba8b72996..77ae963979 100644 --- a/level_zero/core/source/event/event_impl.inl +++ b/level_zero/core/source/event/event_impl.inl @@ -31,7 +31,6 @@ Event *Event::create(EventPool *eventPool, const ze_event_desc_t *desc, Device * event->setEventTimestampFlag(true); } auto &hwInfo = neoDevice->getHardwareInfo(); - auto &l0GfxCoreHelper = neoDevice->getRootDeviceEnvironment().getHelper(); event->signalAllEventPackets = L0GfxCoreHelper::useSignalAllEventPackets(hwInfo); @@ -51,7 +50,7 @@ Event *Event::create(EventPool *eventPool, const ze_event_desc_t *desc, Device * event->kernelEventCompletionData = std::make_unique[]>(event->maxKernelCount); - bool useContextEndOffset = l0GfxCoreHelper.multiTileCapablePlatform(); + bool useContextEndOffset = eventPool->isImplicitScalingCapableFlagSet(); int32_t overrideUseContextEndOffset = NEO::DebugManager.flags.UseContextEndOffsetForEventCompletion.get(); if (overrideUseContextEndOffset != -1) { useContextEndOffset = !!overrideUseContextEndOffset; diff --git a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_copy_event_xehp_and_later.cpp b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_copy_event_xehp_and_later.cpp index f7db699d6c..8a58e664c8 100644 --- a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_copy_event_xehp_and_later.cpp +++ b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_copy_event_xehp_and_later.cpp @@ -971,10 +971,9 @@ HWTEST2_F(AppendMemoryCopyXeHpAndLaterMultiPacket, HWTEST2_F(AppendMemoryCopyXeHpAndLaterMultiPacket, givenCommandListAndEventWithSignalScopeWhenImmediateProvidedByComputeWalkerPostSyncPassedToMemoryCopyThenAppendProfilingCalledForThreeSeparateKernelsAndL3FlushWithPostSyncAddedOnce, IsXeHpOrXeHpgCore) { - auto &l0GfxCoreHelper = input.device->getNEODevice()->getRootDeviceEnvironment().getHelper(); arg.expectedPacketsInUse = 4; arg.expectedKernelCount = 3; - arg.expectedWalkerPostSyncOp = l0GfxCoreHelper.multiTileCapablePlatform() ? 3 : 1; + arg.expectedWalkerPostSyncOp = input.device->isImplicitScalingCapable() ? 3 : 1; arg.expectedPostSyncPipeControls = 1; arg.postSyncAddressZero = false; @@ -1014,10 +1013,9 @@ HWTEST2_F(AppendMemoryCopyXeHpAndLaterMultiPacket, HWTEST2_F(AppendMemoryCopyXeHpAndLaterMultiPacket, givenCommandListAndEventWithSignalScopeWhenImmediateProvidedByComputeWalkerPostSyncPassedToMemoryCopyThenAppendProfilingCalledForSingleSeparateKernelAndL3FlushWithPostSyncAddedOnce, IsXeHpOrXeHpgCore) { - auto &l0GfxCoreHelper = input.device->getNEODevice()->getRootDeviceEnvironment().getHelper(); arg.expectedPacketsInUse = 2; arg.expectedKernelCount = 1; - arg.expectedWalkerPostSyncOp = l0GfxCoreHelper.multiTileCapablePlatform() ? 3 : 1; + arg.expectedWalkerPostSyncOp = input.device->isImplicitScalingCapable() ? 3 : 1; arg.expectedPostSyncPipeControls = 1; arg.postSyncAddressZero = false; @@ -1163,10 +1161,9 @@ HWTEST2_F(AppendMemoryCopyXeHpAndLaterSinglePacket, HWTEST2_F(AppendMemoryCopyXeHpAndLaterSinglePacket, givenCommandListAndEventWithSignalScopeWhenImmediateProvidedByComputeWalkerPostSyncPassedToMemoryCopyThenAppendProfilingCalledForSingleSeparateKernelAndL3FlushWithPostSyncAddedOnce, IsXeHpOrXeHpgCore) { - auto &l0GfxCoreHelper = input.device->getNEODevice()->getRootDeviceEnvironment().getHelper(); arg.expectedPacketsInUse = 2; arg.expectedKernelCount = 1; - arg.expectedWalkerPostSyncOp = l0GfxCoreHelper.multiTileCapablePlatform() ? 3 : 1; + arg.expectedWalkerPostSyncOp = input.device->isImplicitScalingCapable() ? 3 : 1; arg.expectedPostSyncPipeControls = 1; arg.postSyncAddressZero = false; diff --git a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_xehp_and_later.cpp b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_xehp_and_later.cpp index 57a35831ea..b06cadbec8 100644 --- a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_xehp_and_later.cpp +++ b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_xehp_and_later.cpp @@ -458,11 +458,10 @@ HWTEST2_F(CommandListAppendLaunchKernelCompactL3FlushDisabledTest, HWTEST2_F(CommandListAppendLaunchKernelCompactL3FlushDisabledTest, givenAppendKernelWithSignalScopeImmediateEventWhenComputeWalkerImmediatePostsyncAndL3ImmediatePostsyncUsedThenExpectComputeWalkerAndPipeControlPostsync, IsXeHpOrXeHpgCore) { - auto &l0GfxCoreHelper = input.device->getNEODevice()->getRootDeviceEnvironment().getHelper(); arg.expectedKernelCount = 1; arg.expectedPacketsInUse = 2; arg.expectedPostSyncPipeControls = 1; - arg.expectedWalkerPostSyncOp = l0GfxCoreHelper.multiTileCapablePlatform() ? 3 : 1; + arg.expectedWalkerPostSyncOp = input.device->isImplicitScalingCapable() ? 3 : 1; arg.postSyncAddressZero = false; input.eventPoolFlags = 0; @@ -595,8 +594,6 @@ struct CommandListSignalAllEventPacketFixture : public ModuleFixture { using OPERATION = typename POSTSYNC_DATA::OPERATION; using MI_STORE_DATA_IMM = typename FamilyType::MI_STORE_DATA_IMM; - auto &l0GfxCoreHelper = device->getNEODevice()->getRootDeviceEnvironment().getHelper(); - auto commandList = std::make_unique>>(); auto result = commandList->initialize(device, NEO::EngineGroupType::Compute, 0u); ASSERT_EQ(ZE_RESULT_SUCCESS, result); @@ -634,7 +631,7 @@ struct CommandListSignalAllEventPacketFixture : public ModuleFixture { auto firstWalker = itorWalkers[0]; uint32_t expectedWalkerPostSyncOp = 3; - if (multiTile == 0 && eventPoolFlags == 0 && !l0GfxCoreHelper.multiTileCapablePlatform()) { + if (multiTile == 0 && eventPoolFlags == 0 && !eventPool->isImplicitScalingCapableFlagSet()) { expectedWalkerPostSyncOp = 1; } auto walkerCmd = genCmdCast(*firstWalker); diff --git a/level_zero/core/test/unit_tests/sources/event/test_event.cpp b/level_zero/core/test/unit_tests/sources/event/test_event.cpp index e25bc88830..8aa418c36a 100644 --- a/level_zero/core/test/unit_tests/sources/event/test_event.cpp +++ b/level_zero/core/test/unit_tests/sources/event/test_event.cpp @@ -408,6 +408,42 @@ TEST_F(EventPoolIPCHandleTests, whenGettingIpcHandleForEventPoolThenHandleAndIsH driverHandle->setMemoryManager(curMemoryManager); } +TEST_F(EventPoolIPCHandleTests, whenGettingIpcHandleForEventPoolThenIsImplicitScalingCapableReturnedInHandle) { + uint32_t numEvents = 2; + ze_event_pool_desc_t eventPoolDesc = { + ZE_STRUCTURE_TYPE_EVENT_POOL_DESC, + nullptr, + ZE_EVENT_POOL_FLAG_IPC, + numEvents}; + + auto deviceHandle = device->toHandle(); + ze_result_t result = ZE_RESULT_SUCCESS; + auto curMemoryManager = driverHandle->getMemoryManager(); + MemoryManagerEventPoolIpcMock *mockMemoryManager = new MemoryManagerEventPoolIpcMock(*neoDevice->executionEnvironment); + driverHandle->setMemoryManager(mockMemoryManager); + auto eventPool = EventPool::create(driverHandle.get(), context, 1, &deviceHandle, &eventPoolDesc, result); + EXPECT_EQ(ZE_RESULT_SUCCESS, result); + EXPECT_NE(nullptr, eventPool); + + ze_ipc_event_pool_handle_t ipcHandle = {}; + ze_result_t res = eventPool->getIpcHandle(&ipcHandle); + EXPECT_EQ(res, ZE_RESULT_SUCCESS); + + auto &ipcHandleData = *reinterpret_cast(ipcHandle.data); + constexpr uint64_t expectedHandle = static_cast(-1); + EXPECT_NE(expectedHandle, ipcHandleData.handle); + + EXPECT_EQ(ipcHandleData.numEvents, 2u); + EXPECT_EQ(ipcHandleData.numDevices, 1u); + EXPECT_EQ(ipcHandleData.isImplicitScalingCapable, device->isImplicitScalingCapable()); + EXPECT_EQ(ipcHandleData.isImplicitScalingCapable, eventPool->isImplicitScalingCapableFlagSet()); + + res = eventPool->destroy(); + EXPECT_EQ(res, ZE_RESULT_SUCCESS); + delete mockMemoryManager; + driverHandle->setMemoryManager(curMemoryManager); +} + TEST_F(EventPoolIPCHandleTests, whenGettingIpcHandleForEventPoolThenHandleAndNumDevicesReturnedInHandle) { uint32_t numEvents = 4; ze_event_pool_desc_t eventPoolDesc = { @@ -1285,7 +1321,7 @@ TEST_F(EventCreate, givenEventWhenSignaledAndResetFromTheHostThenCorrectDataAndO auto event = std::unique_ptr(l0GfxCoreHelper.createEvent(eventPool.get(), &eventDesc, device)); ASSERT_NE(nullptr, event); - if (l0GfxCoreHelper.multiTileCapablePlatform()) { + if (eventPool->isImplicitScalingCapableFlagSet()) { EXPECT_TRUE(event->isUsingContextEndOffset()); } else { EXPECT_FALSE(event->isUsingContextEndOffset()); @@ -1349,9 +1385,7 @@ HWTEST2_F(EventCreate, givenPlatformSupportMultTileWhenDebugKeyIsSetToNotUseCont DebugManagerStateRestore restorer; NEO::DebugManager.flags.UseContextEndOffsetForEventCompletion.set(0); auto &l0GfxCoreHelper = getHelper(); - - bool useContextEndOffset = l0GfxCoreHelper.multiTileCapablePlatform(); - EXPECT_TRUE(useContextEndOffset); + EXPECT_TRUE(l0GfxCoreHelper.multiTileCapablePlatform()); ze_event_pool_desc_t eventPoolDesc = { ZE_STRUCTURE_TYPE_EVENT_POOL_DESC, @@ -1387,8 +1421,7 @@ HWTEST2_F(EventCreate, givenPlatformNotSupportsMultTileWhenDebugKeyIsSetToUseCon DebugManagerStateRestore restorer; NEO::DebugManager.flags.UseContextEndOffsetForEventCompletion.set(1); auto &l0GfxCoreHelper = getHelper(); - bool useContextEndOffset = l0GfxCoreHelper.multiTileCapablePlatform(); - EXPECT_FALSE(useContextEndOffset); + EXPECT_FALSE(l0GfxCoreHelper.multiTileCapablePlatform()); ze_event_pool_desc_t eventPoolDesc = { ZE_STRUCTURE_TYPE_EVENT_POOL_DESC, diff --git a/level_zero/core/test/unit_tests/xe_hpc_core/test_event_xe_hpc_core.cpp b/level_zero/core/test/unit_tests/xe_hpc_core/test_event_xe_hpc_core.cpp index 2a01b6d64c..6cee55aca3 100644 --- a/level_zero/core/test/unit_tests/xe_hpc_core/test_event_xe_hpc_core.cpp +++ b/level_zero/core/test/unit_tests/xe_hpc_core/test_event_xe_hpc_core.cpp @@ -62,8 +62,11 @@ HWTEST2_F(EventPoolIPCHandleHpcCoreTests, whenGettingIpcHandleForEventPoolWithDe EXPECT_NE(expectedHandle, ipcHandleData.handle); EXPECT_EQ(numEvents, ipcHandleData.numEvents); EXPECT_EQ(0u, ipcHandleData.rootDeviceIndex); + EXPECT_EQ(1u, ipcHandleData.numDevices); EXPECT_TRUE(ipcHandleData.isDeviceEventPoolAllocation); EXPECT_TRUE(ipcHandleData.isHostVisibleEventPoolAllocation); + EXPECT_EQ(ipcHandleData.isImplicitScalingCapable, device->isImplicitScalingCapable()); + EXPECT_EQ(ipcHandleData.isImplicitScalingCapable, eventPool->isImplicitScalingCapableFlagSet()); res = eventPool->destroy(); EXPECT_EQ(res, ZE_RESULT_SUCCESS); @@ -114,4 +117,4 @@ HWTEST2_F(EventPoolIPCHandleHpcCoreTests, whenOpeningIpcHandleForEventPoolWithHo } } // namespace ult -} // namespace L0 \ No newline at end of file +} // namespace L0