From 595aa2f62641da41a82d09372bfae1a5b1c8c18b Mon Sep 17 00:00:00 2001 From: Zbigniew Zdanowicz Date: Tue, 13 Dec 2022 18:50:36 +0000 Subject: [PATCH] Fix issues in signal all event packets 6/n This fix unifies setting event properties in event pool, so they are shared betweem all event pool devices. Related-To: NEO-7490 Signed-off-by: Zbigniew Zdanowicz --- level_zero/core/source/device/device.h | 1 + level_zero/core/source/device/device_imp.cpp | 8 ++++++ level_zero/core/source/device/device_imp.h | 1 + level_zero/core/source/driver/driver_handle.h | 1 + .../core/source/driver/driver_handle_imp.cpp | 18 +++++++++++++ .../core/source/driver/driver_handle_imp.h | 1 + level_zero/core/source/event/event.cpp | 2 ++ level_zero/core/source/event/event.h | 5 ++++ level_zero/core/source/event/event_impl.inl | 11 +++----- .../core/test/unit_tests/mocks/mock_device.h | 1 + .../unit_tests/mocks/mock_driver_handle.h | 1 + .../unit_tests/sources/event/test_event.cpp | 25 ++++++++++--------- 12 files changed, 56 insertions(+), 19 deletions(-) diff --git a/level_zero/core/source/device/device.h b/level_zero/core/source/device/device.h index dcadb2f1aa..864082481c 100644 --- a/level_zero/core/source/device/device.h +++ b/level_zero/core/source/device/device.h @@ -139,6 +139,7 @@ struct Device : _ze_device_handle_t { virtual void storeReusableAllocation(NEO::GraphicsAllocation &alloc) = 0; virtual ze_result_t getFabricVertex(ze_fabric_vertex_handle_t *phVertex) = 0; virtual uint32_t getEventMaxPacketCount() const = 0; + virtual uint32_t getEventMaxKernelCount() const = 0; protected: NEO::Device *neoDevice = nullptr; diff --git a/level_zero/core/source/device/device_imp.cpp b/level_zero/core/source/device/device_imp.cpp index 6904baef63..a442435a3a 100644 --- a/level_zero/core/source/device/device_imp.cpp +++ b/level_zero/core/source/device/device_imp.cpp @@ -1615,4 +1615,12 @@ uint32_t DeviceImp::getEventMaxPacketCount() const { } return basePackets; } + +uint32_t DeviceImp::getEventMaxKernelCount() const { + const auto &hardwareInfo = this->getHwInfo(); + auto &l0GfxCoreHelper = L0GfxCoreHelper::get(hardwareInfo.platform.eRenderCoreFamily); + + return l0GfxCoreHelper.getEventMaxKernelCount(hardwareInfo); +} + } // namespace L0 diff --git a/level_zero/core/source/device/device_imp.h b/level_zero/core/source/device/device_imp.h index a41f7dfe1b..fcc1f0ebdb 100644 --- a/level_zero/core/source/device/device_imp.h +++ b/level_zero/core/source/device/device_imp.h @@ -146,6 +146,7 @@ struct DeviceImp : public Device { ze_result_t queryDeviceLuid(ze_device_luid_ext_properties_t *deviceLuidProperties); ze_result_t setDeviceLuid(ze_device_luid_ext_properties_t *deviceLuidProperties); uint32_t getEventMaxPacketCount() const override; + uint32_t getEventMaxKernelCount() const override; uint32_t queryDeviceNodeMask(); protected: diff --git a/level_zero/core/source/driver/driver_handle.h b/level_zero/core/source/driver/driver_handle.h index 606b0dc90f..9db1542923 100644 --- a/level_zero/core/source/driver/driver_handle.h +++ b/level_zero/core/source/driver/driver_handle.h @@ -67,6 +67,7 @@ struct DriverHandle : _ze_driver_handle_t { uintptr_t *gpuAddress) = 0; virtual ze_result_t fabricVertexGetExp(uint32_t *pCount, ze_fabric_vertex_handle_t *phDevices) = 0; virtual uint32_t getEventMaxPacketCount(uint32_t numDevices, ze_device_handle_t *deviceHandles) const = 0; + virtual uint32_t getEventMaxKernelCount(uint32_t numDevices, ze_device_handle_t *deviceHandles) const = 0; static DriverHandle *fromHandle(ze_driver_handle_t handle) { return static_cast(handle); } inline ze_driver_handle_t toHandle() { return this; } diff --git a/level_zero/core/source/driver/driver_handle_imp.cpp b/level_zero/core/source/driver/driver_handle_imp.cpp index 63d3cccb0a..a3d3d92acd 100644 --- a/level_zero/core/source/driver/driver_handle_imp.cpp +++ b/level_zero/core/source/driver/driver_handle_imp.cpp @@ -764,4 +764,22 @@ uint32_t DriverHandleImp::getEventMaxPacketCount(uint32_t numDevices, ze_device_ return maxCount; } +uint32_t DriverHandleImp::getEventMaxKernelCount(uint32_t numDevices, ze_device_handle_t *deviceHandles) const { + uint32_t maxCount = 0; + + if (numDevices == 0) { + for (auto device : this->devices) { + auto deviceMaxCount = device->getEventMaxKernelCount(); + maxCount = std::max(maxCount, deviceMaxCount); + } + } else { + for (uint32_t i = 0; i < numDevices; i++) { + auto deviceMaxCount = Device::fromHandle(deviceHandles[i])->getEventMaxKernelCount(); + maxCount = std::max(maxCount, deviceMaxCount); + } + } + + return maxCount; +} + } // namespace L0 diff --git a/level_zero/core/source/driver/driver_handle_imp.h b/level_zero/core/source/driver/driver_handle_imp.h index a94293977f..ba13651a9c 100644 --- a/level_zero/core/source/driver/driver_handle_imp.h +++ b/level_zero/core/source/driver/driver_handle_imp.h @@ -83,6 +83,7 @@ struct DriverHandleImp : public DriverHandle { ze_result_t fabricEdgeGetExp(ze_fabric_vertex_handle_t hVertexA, ze_fabric_vertex_handle_t hVertexB, uint32_t *pCount, ze_fabric_edge_handle_t *phEdges); uint32_t getEventMaxPacketCount(uint32_t numDevices, ze_device_handle_t *deviceHandles) const override; + uint32_t getEventMaxKernelCount(uint32_t numDevices, ze_device_handle_t *deviceHandles) const override; std::unique_ptr hostPointerManager; // Experimental functions diff --git a/level_zero/core/source/event/event.cpp b/level_zero/core/source/event/event.cpp index cf515e390a..daea987b33 100644 --- a/level_zero/core/source/event/event.cpp +++ b/level_zero/core/source/event/event.cpp @@ -168,8 +168,10 @@ void EventPoolImp::initializeSizeParameters(uint32_t numDevices, ze_device_handl bool useDynamicEventPackets = l0GfxCoreHelper.useDynamicEventPacketsCount(hwInfo); eventPackets = EventPacketsCount::eventPackets; + maxKernelCount = EventPacketsCount::maxKernelSplit; if (useDynamicEventPackets) { eventPackets = driver.getEventMaxPacketCount(numDevices, deviceHandles); + maxKernelCount = driver.getEventMaxKernelCount(numDevices, deviceHandles); } setEventSize(static_cast(alignUp(eventPackets * gfxCoreHelper.getSingleTimestampPacketSize(), eventAlignment))); diff --git a/level_zero/core/source/event/event.h b/level_zero/core/source/event/event.h index 535af69f12..5510be2977 100644 --- a/level_zero/core/source/event/event.h +++ b/level_zero/core/source/event/event.h @@ -302,6 +302,10 @@ struct EventPool : _ze_event_pool_handle_t { return false; } + uint32_t getMaxKernelCount() const { + return maxKernelCount; + } + std::unique_ptr eventPoolAllocations; ze_event_pool_flags_t eventPoolFlags; @@ -314,6 +318,7 @@ struct EventPool : _ze_event_pool_handle_t { uint32_t eventAlignment = 0; uint32_t eventSize = 0; uint32_t eventPackets = 0; + uint32_t maxKernelCount = 0; }; struct EventPoolImp : public EventPool { diff --git a/level_zero/core/source/event/event_impl.inl b/level_zero/core/source/event/event_impl.inl index b48eca05e9..aed0163fc8 100644 --- a/level_zero/core/source/event/event_impl.inl +++ b/level_zero/core/source/event/event_impl.inl @@ -29,13 +29,7 @@ Event *Event::create(EventPool *eventPool, const ze_event_desc_t *desc, Device * auto &hwInfo = neoDevice->getHardwareInfo(); auto &l0GfxCoreHelper = neoDevice->getRootDeviceEnvironment().getHelper(); - uint32_t maxKernels = EventPacketsCount::maxKernelSplit; - if (l0GfxCoreHelper.useDynamicEventPacketsCount(hwInfo)) { - maxKernels = l0GfxCoreHelper.getEventMaxKernelCount(hwInfo); - } - event->signalAllEventPackets = L0GfxCoreHelper::useSignalAllEventPackets(hwInfo); - event->kernelEventCompletionData = std::make_unique[]>(maxKernels); auto alloc = eventPool->getAllocation().getGraphicsAllocation(neoDevice->getRootDeviceIndex()); @@ -46,9 +40,12 @@ Event *Event::create(EventPool *eventPool, const ze_event_desc_t *desc, Device * event->signalScope = desc->signal; event->waitScope = desc->wait; event->csr = csr; - event->maxKernelCount = maxKernels; + event->maxKernelCount = eventPool->getMaxKernelCount(); event->maxPacketCount = eventPool->getEventMaxPackets(); + event->kernelEventCompletionData = + std::make_unique[]>(event->maxKernelCount); + bool useContextEndOffset = l0GfxCoreHelper.multiTileCapablePlatform(); int32_t overrideUseContextEndOffset = NEO::DebugManager.flags.UseContextEndOffsetForEventCompletion.get(); if (overrideUseContextEndOffset != -1) { diff --git a/level_zero/core/test/unit_tests/mocks/mock_device.h b/level_zero/core/test/unit_tests/mocks/mock_device.h index f6b4efd4e9..6fec80fe2d 100644 --- a/level_zero/core/test/unit_tests/mocks/mock_device.h +++ b/level_zero/core/test/unit_tests/mocks/mock_device.h @@ -84,6 +84,7 @@ struct Mock : public Device { ADDMETHOD_NOBASE_VOIDRETURN(storeReusableAllocation, (NEO::GraphicsAllocation & alloc)); ADDMETHOD_NOBASE(getFabricVertex, ze_result_t, ZE_RESULT_SUCCESS, (ze_fabric_vertex_handle_t * phVertex)); ADDMETHOD_CONST_NOBASE(getEventMaxPacketCount, uint32_t, 8, ()) + ADDMETHOD_CONST_NOBASE(getEventMaxKernelCount, uint32_t, 3, ()) DebugSession *createDebugSession(const zet_debug_config_t &config, ze_result_t &result, bool isRootAttach) override { result = ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; diff --git a/level_zero/core/test/unit_tests/mocks/mock_driver_handle.h b/level_zero/core/test/unit_tests/mocks/mock_driver_handle.h index b97b0ce60c..422bd82798 100644 --- a/level_zero/core/test/unit_tests/mocks/mock_driver_handle.h +++ b/level_zero/core/test/unit_tests/mocks/mock_driver_handle.h @@ -36,6 +36,7 @@ struct Mock : public DriverHandleImp { ADDMETHOD_NOBASE(getHostPointerBaseAddress, ze_result_t, ZE_RESULT_ERROR_UNSUPPORTED_FEATURE, (void *ptr, void **baseAddress)) ADDMETHOD_NOBASE(findHostPointerAllocation, NEO::GraphicsAllocation *, nullptr, (void *ptr, size_t size, uint32_t rootDeviceIndex)) ADDMETHOD_CONST_NOBASE(getEventMaxPacketCount, uint32_t, 8, (uint32_t, ze_device_handle_t *)) + ADDMETHOD_CONST_NOBASE(getEventMaxKernelCount, uint32_t, 3, (uint32_t, ze_device_handle_t *)) void setupDevices(std::vector> devices); diff --git a/level_zero/core/test/unit_tests/sources/event/test_event.cpp b/level_zero/core/test/unit_tests/sources/event/test_event.cpp index a256a614b6..75ae88293a 100644 --- a/level_zero/core/test/unit_tests/sources/event/test_event.cpp +++ b/level_zero/core/test/unit_tests/sources/event/test_event.cpp @@ -1928,10 +1928,8 @@ TEST_F(EventUsedPacketSignalTests, givenEventUseMultiplePacketsWhenHostSignalThe } } -HWTEST2_F(EventUsedPacketSignalTests, WhenSettingL3FlushOnEventThenSetOnParticularKernel, IsAtLeastXeHpCore) { - DebugManagerStateRestore restorer; - DebugManager.flags.UsePipeControlMultiKernelEventSync.set(0); - +using EventUsedPacketSignalNoCompactionTests = Test>; +HWTEST2_F(EventUsedPacketSignalNoCompactionTests, WhenSettingL3FlushOnEventThenSetOnParticularKernel, IsAtLeastXeHpCore) { auto event = whiteboxCast(Event::create(eventPool.get(), &eventDesc, device)); ASSERT_NE(event, nullptr); EXPECT_FALSE(event->getL3FlushForCurrenKernel()); @@ -2279,16 +2277,9 @@ struct MockEventCompletion : public EventImp { MockEventCompletion(L0::EventPool *eventPool, int index, L0::Device *device) : EventImp(eventPool, index, device, false) { auto neoDevice = device->getNEODevice(); auto &hwInfo = neoDevice->getHardwareInfo(); - auto &l0GfxCoreHelper = L0GfxCoreHelper::get(hwInfo.platform.eRenderCoreFamily); signalAllEventPackets = L0GfxCoreHelper::useSignalAllEventPackets(hwInfo); - uint32_t maxKernels = EventPacketsCount::maxKernelSplit; - if (l0GfxCoreHelper.useDynamicEventPacketsCount(hwInfo)) { - maxKernels = l0GfxCoreHelper.getEventMaxKernelCount(hwInfo); - } - kernelEventCompletionData = std::make_unique[]>(maxKernels); - auto alloc = eventPool->getAllocation().getGraphicsAllocation(neoDevice->getRootDeviceIndex()); uint64_t baseHostAddr = reinterpret_cast(alloc->getUnderlyingBuffer()); @@ -2297,8 +2288,10 @@ struct MockEventCompletion : public EventImp { hostAddress = reinterpret_cast(baseHostAddr + eventPoolOffset); csr = neoDevice->getDefaultEngine().commandStreamReceiver; - maxKernelCount = maxKernels; + maxKernelCount = eventPool->getMaxKernelCount(); maxPacketCount = eventPool->getEventMaxPackets(); + + kernelEventCompletionData = std::make_unique[]>(maxKernelCount); } void assignKernelEventCompletionData(void *address) override { @@ -2442,6 +2435,9 @@ struct EventDynamicPacketUseFixture : public DeviceFixture { EXPECT_EQ(ZE_RESULT_SUCCESS, result); ASSERT_NE(nullptr, eventPool); + auto expectedMaxKernelCount = driverHandle->getEventMaxKernelCount(0, nullptr); + EXPECT_EQ(expectedMaxKernelCount, eventPool->getMaxKernelCount()); + auto eventPoolMaxPackets = eventPool->getEventMaxPackets(); auto expectedPoolMaxPackets = l0GfxCoreHelper.getEventBaseMaxPacketCount(hwInfo); if constexpr (multiTile == 1) { @@ -2465,6 +2461,7 @@ struct EventDynamicPacketUseFixture : public DeviceFixture { EXPECT_EQ(expectedPoolMaxPackets, event->getMaxPacketsCount()); uint32_t maxKernels = l0GfxCoreHelper.getEventMaxKernelCount(hwInfo); + EXPECT_EQ(expectedMaxKernelCount, maxKernels); EXPECT_EQ(maxKernels, event->getMaxKernelCount()); } @@ -2499,6 +2496,9 @@ struct EventDynamicPacketUseFixture : public DeviceFixture { EXPECT_EQ(ZE_RESULT_SUCCESS, result); ASSERT_NE(nullptr, eventPool); + auto expectedMaxKernelCount = driverHandle->getEventMaxKernelCount(1, deviceHandles.data()); + EXPECT_EQ(expectedMaxKernelCount, eventPool->getMaxKernelCount()); + auto eventPoolMaxPackets = eventPool->getEventMaxPackets(); auto expectedPoolMaxPackets = l0GfxCoreHelper.getEventBaseMaxPacketCount(hwInfo); @@ -2520,6 +2520,7 @@ struct EventDynamicPacketUseFixture : public DeviceFixture { EXPECT_EQ(expectedPoolMaxPackets, event->getMaxPacketsCount()); uint32_t maxKernels = l0GfxCoreHelper.getEventMaxKernelCount(hwInfo); + EXPECT_EQ(expectedMaxKernelCount, maxKernels); EXPECT_EQ(maxKernels, event->getMaxKernelCount()); }