From 0cc0706062d5e15455bb59f0b2e59dd3d3eb7a2c Mon Sep 17 00:00:00 2001 From: "Chodor, Jaroslaw" Date: Thu, 22 May 2025 11:59:28 +0000 Subject: [PATCH] performance: Optimizing memory usage for recorded commands Related-To: NEO-15638 Signed-off-by: Chodor, Jaroslaw --- .../unit_tests/experimental/test_graph.cpp | 53 +++++ .../graph/captured_apis/graph_captured_apis.h | 184 ++++++++++-------- .../experimental/source/graph/graph.cpp | 105 +++++----- level_zero/experimental/source/graph/graph.h | 11 +- 4 files changed, 220 insertions(+), 133 deletions(-) diff --git a/level_zero/core/test/unit_tests/experimental/test_graph.cpp b/level_zero/core/test/unit_tests/experimental/test_graph.cpp index 0b3997ddc0..9838947c99 100644 --- a/level_zero/core/test/unit_tests/experimental/test_graph.cpp +++ b/level_zero/core/test/unit_tests/experimental/test_graph.cpp @@ -932,5 +932,58 @@ TEST(GraphExecution, GivenExecutableGraphWithSubGraphsWhenSubmittingItToCommandL EXPECT_EQ(0U, subCmdlist.appendSignalEventCalled); } +TEST(ClosureExternalStorage, GivenEventWaitListThenRecordsItProperly) { + MockEvent events[10]; + ze_event_handle_t eventHandles[10]; + std::transform(events, events + 10, eventHandles, [](auto &ev) { return &ev; }); + + L0::ClosureExternalStorage storage; + EXPECT_EQ(L0::ClosureExternalStorage::invalidEventsWaitListId, storage.registerEventsWaitList(eventHandles, eventHandles)); + + auto waitList0Id = storage.registerEventsWaitList(eventHandles, eventHandles + 1); + auto waitList1Id = storage.registerEventsWaitList(eventHandles + 3, eventHandles + 5); + auto waitList2Id = storage.registerEventsWaitList(eventHandles + 8, eventHandles + 10); + + EXPECT_NE(L0::ClosureExternalStorage::invalidEventsWaitListId, waitList0Id); + EXPECT_NE(L0::ClosureExternalStorage::invalidEventsWaitListId, waitList1Id); + EXPECT_NE(L0::ClosureExternalStorage::invalidEventsWaitListId, waitList2Id); + + EXPECT_EQ(nullptr, storage.getEventsWaitList(L0::ClosureExternalStorage::invalidEventsWaitListId)); + + ASSERT_NE(nullptr, storage.getEventsWaitList(waitList0Id)); + EXPECT_EQ(eventHandles[0], storage.getEventsWaitList(waitList0Id)[0]); + + ASSERT_NE(nullptr, storage.getEventsWaitList(waitList1Id)); + EXPECT_EQ(eventHandles[3], storage.getEventsWaitList(waitList1Id)[0]); + EXPECT_EQ(eventHandles[4], storage.getEventsWaitList(waitList1Id)[1]); + + ASSERT_NE(nullptr, storage.getEventsWaitList(waitList2Id)); + EXPECT_EQ(eventHandles[8], storage.getEventsWaitList(waitList2Id)[0]); + EXPECT_EQ(eventHandles[9], storage.getEventsWaitList(waitList2Id)[1]); +} + +TEST(ClosureExternalStorage, GivenKernelMutableStateThenRecordsItProperly) { + KernelMutableState s1; + s1.globalOffsets[0] = 5U; + KernelMutableState s2; + s2.globalOffsets[0] = 7U; + + L0::ClosureExternalStorage storage; + + auto kernelState1Id = storage.registerKernelState(std::move(s1)); + auto kernelState2Id = storage.registerKernelState(std::move(s2)); + + EXPECT_NE(L0::ClosureExternalStorage::invalidEventsWaitListId, kernelState1Id); + EXPECT_NE(L0::ClosureExternalStorage::invalidEventsWaitListId, kernelState2Id); + + EXPECT_EQ(nullptr, storage.getKernelMutableState(L0::ClosureExternalStorage::invalidKernelStateId)); + + ASSERT_NE(nullptr, storage.getKernelMutableState(kernelState1Id)); + EXPECT_EQ(5U, storage.getKernelMutableState(kernelState1Id)->globalOffsets[0]); + + ASSERT_NE(nullptr, storage.getKernelMutableState(kernelState2Id)); + EXPECT_EQ(7U, storage.getKernelMutableState(kernelState2Id)->globalOffsets[0]); +} + } // namespace ult } // namespace L0 diff --git a/level_zero/experimental/source/graph/captured_apis/graph_captured_apis.h b/level_zero/experimental/source/graph/captured_apis/graph_captured_apis.h index 6cea0502b6..b7a91f7f5f 100644 --- a/level_zero/experimental/source/graph/captured_apis/graph_captured_apis.h +++ b/level_zero/experimental/source/graph/captured_apis/graph_captured_apis.h @@ -56,6 +56,47 @@ enum class CaptureApi { struct CommandList; +struct ClosureExternalStorage { + using EventsWaitListId = int64_t; + using KernelStateId = int64_t; + + static constexpr EventsWaitListId invalidEventsWaitListId = -1; + static constexpr KernelStateId invalidKernelStateId = -1; + + EventsWaitListId registerEventsWaitList(ze_event_handle_t *begin, ze_event_handle_t *end) { + if (begin == end) { + return invalidEventsWaitListId; + } + auto ret = waitEvents.size(); + waitEvents.insert(std::end(waitEvents), begin, end); + return static_cast(ret); + } + + KernelStateId registerKernelState(KernelMutableState &&state) { + auto ret = kernelStates.size(); + kernelStates.push_back(std::move(state)); + return static_cast(ret); + } + + ze_event_handle_t *getEventsWaitList(EventsWaitListId id) { + if (id < 0) { + return nullptr; + } + return waitEvents.data() + id; + } + + KernelMutableState *getKernelMutableState(KernelStateId id) { + if (id < 0) { + return nullptr; + } + return kernelStates.data() + id; + } + + protected: + std::vector waitEvents; + std::vector kernelStates; +}; + template struct Closure { static constexpr bool isSupported = false; @@ -69,9 +110,9 @@ struct Closure { ze_event_handle_t *phWaitEvents = nullptr; }; - Closure(const ApiArgs &apiArgs) {} + Closure(const ApiArgs &apiArgs, ClosureExternalStorage &externalStorage) {} - ze_result_t instantiateTo(CommandList &executionTarget) const { + ze_result_t instantiateTo(CommandList &executionTarget, ClosureExternalStorage &externalStorage) const { DEBUG_BREAK_IF(true); return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; } @@ -136,28 +177,22 @@ struct IndirectArgsWithWaitEvents { IndirectArgsWithWaitEvents() = default; template requires HasPhWaitEvents - IndirectArgsWithWaitEvents(const ApiArgsT &apiArgs) { - waitEvents.reserve(apiArgs.numWaitEvents); - for (uint32_t i = 0; i < apiArgs.numWaitEvents; ++i) { - waitEvents.push_back(apiArgs.phWaitEvents[i]); - } + IndirectArgsWithWaitEvents(const ApiArgsT &apiArgs, ClosureExternalStorage &externalStorage) { + waitEvents = externalStorage.registerEventsWaitList(apiArgs.phWaitEvents, apiArgs.phWaitEvents + apiArgs.numWaitEvents); } template requires(HasPhEvents && (false == HasPhWaitEvents)) - IndirectArgsWithWaitEvents(const ApiArgsT &apiArgs) { - waitEvents.reserve(apiArgs.numEvents); - for (uint32_t i = 0; i < apiArgs.numEvents; ++i) { - waitEvents.push_back(apiArgs.phEvents[i]); - } + IndirectArgsWithWaitEvents(const ApiArgsT &apiArgs, ClosureExternalStorage &externalStorage) { + waitEvents = externalStorage.registerEventsWaitList(apiArgs.phEvents, apiArgs.phEvents + apiArgs.numEvents); } - StackVec waitEvents; + ClosureExternalStorage::EventsWaitListId waitEvents = ClosureExternalStorage::invalidEventsWaitListId; }; struct EmptyIndirectArgs { template - EmptyIndirectArgs(const ApiArgsT &apiArgs) {} + EmptyIndirectArgs(const ApiArgsT &apiArgs, ClosureExternalStorage &externalStorage) {} }; template <> @@ -177,9 +212,9 @@ struct Closure { using IndirectArgs = IndirectArgsWithWaitEvents; IndirectArgs indirectArgs; - Closure(const ApiArgs &apiArgs) : apiArgs(apiArgs), indirectArgs(apiArgs) {} + Closure(const ApiArgs &apiArgs, ClosureExternalStorage &externalStorage) : apiArgs(apiArgs), indirectArgs(apiArgs, externalStorage) {} - ze_result_t instantiateTo(CommandList &executionTarget) const; + ze_result_t instantiateTo(CommandList &executionTarget, ClosureExternalStorage &externalStorage) const; }; template <> @@ -196,9 +231,9 @@ struct Closure { using IndirectArgs = IndirectArgsWithWaitEvents; IndirectArgs indirectArgs; - Closure(const ApiArgs &apiArgs) : apiArgs(apiArgs), indirectArgs(apiArgs) {} + Closure(const ApiArgs &apiArgs, ClosureExternalStorage &externalStorage) : apiArgs(apiArgs), indirectArgs(apiArgs, externalStorage) {} - ze_result_t instantiateTo(CommandList &executionTarget) const; + ze_result_t instantiateTo(CommandList &executionTarget, ClosureExternalStorage &externalStorage) const; }; template <> @@ -214,9 +249,9 @@ struct Closure { using IndirectArgs = IndirectArgsWithWaitEvents; IndirectArgs indirectArgs; - Closure(const ApiArgs &apiArgs) : apiArgs(apiArgs), indirectArgs(apiArgs) {} + Closure(const ApiArgs &apiArgs, ClosureExternalStorage &externalStorage) : apiArgs(apiArgs), indirectArgs(apiArgs, externalStorage) {} - ze_result_t instantiateTo(CommandList &executionTarget) const; + ze_result_t instantiateTo(CommandList &executionTarget, ClosureExternalStorage &externalStorage) const; }; template <> @@ -234,9 +269,9 @@ struct Closure { using IndirectArgs = IndirectArgsWithWaitEvents; IndirectArgs indirectArgs; - Closure(const ApiArgs &apiArgs) : apiArgs(apiArgs), indirectArgs(apiArgs) {} + Closure(const ApiArgs &apiArgs, ClosureExternalStorage &externalStorage) : apiArgs(apiArgs), indirectArgs(apiArgs, externalStorage) {} - ze_result_t instantiateTo(CommandList &executionTarget) const; + ze_result_t instantiateTo(CommandList &executionTarget, ClosureExternalStorage &externalStorage) const; }; template <> @@ -254,7 +289,7 @@ struct Closure { } apiArgs; struct IndirectArgs : IndirectArgsWithWaitEvents { - IndirectArgs(const Closure::ApiArgs &apiArgs) : IndirectArgsWithWaitEvents(apiArgs) { + IndirectArgs(const Closure::ApiArgs &apiArgs, ClosureExternalStorage &externalStorage) : IndirectArgsWithWaitEvents(apiArgs, externalStorage) { rangeSizes.resize(apiArgs.numRanges); ranges.resize(apiArgs.numRanges); std::copy_n(apiArgs.pRangeSizes, apiArgs.numRanges, rangeSizes.begin()); @@ -264,9 +299,9 @@ struct Closure { StackVec ranges; } indirectArgs; - Closure(const ApiArgs &apiArgs) : apiArgs(apiArgs), indirectArgs(apiArgs) {} + Closure(const ApiArgs &apiArgs, ClosureExternalStorage &externalStorage) : apiArgs(apiArgs), indirectArgs(apiArgs, externalStorage) {} - ze_result_t instantiateTo(CommandList &executionTarget) const; + ze_result_t instantiateTo(CommandList &executionTarget, ClosureExternalStorage &externalStorage) const; }; template <> @@ -285,16 +320,16 @@ struct Closure { } apiArgs; struct IndirectArgs : IndirectArgsWithWaitEvents { - IndirectArgs(const Closure::ApiArgs &apiArgs) : IndirectArgsWithWaitEvents(apiArgs) { + IndirectArgs(const Closure::ApiArgs &apiArgs, ClosureExternalStorage &externalStorage) : IndirectArgsWithWaitEvents(apiArgs, externalStorage) { pattern.resize(apiArgs.patternSize); memcpy_s(pattern.data(), pattern.size(), apiArgs.pattern, apiArgs.patternSize); } StackVec pattern; } indirectArgs; - Closure(const ApiArgs &apiArgs) : apiArgs(apiArgs), indirectArgs(apiArgs) {} + Closure(const ApiArgs &apiArgs, ClosureExternalStorage &externalStorage) : apiArgs(apiArgs), indirectArgs(apiArgs, externalStorage) {} - ze_result_t instantiateTo(CommandList &executionTarget) const; + ze_result_t instantiateTo(CommandList &executionTarget, ClosureExternalStorage &externalStorage) const; }; template <> @@ -317,7 +352,7 @@ struct Closure { } apiArgs; struct IndirectArgs : IndirectArgsWithWaitEvents { - IndirectArgs(const Closure::ApiArgs &apiArgs) : IndirectArgsWithWaitEvents(apiArgs) { + IndirectArgs(const Closure::ApiArgs &apiArgs, ClosureExternalStorage &externalStorage) : IndirectArgsWithWaitEvents(apiArgs, externalStorage) { dstRegion = *apiArgs.dstRegion; srcRegion = *apiArgs.srcRegion; } @@ -325,9 +360,9 @@ struct Closure { ze_copy_region_t srcRegion; } indirectArgs; - Closure(const ApiArgs &apiArgs) : apiArgs(apiArgs), indirectArgs(apiArgs) {} + Closure(const ApiArgs &apiArgs, ClosureExternalStorage &externalStorage) : apiArgs(apiArgs), indirectArgs(apiArgs, externalStorage) {} - ze_result_t instantiateTo(CommandList &executionTarget) const; + ze_result_t instantiateTo(CommandList &executionTarget, ClosureExternalStorage &externalStorage) const; }; template <> @@ -348,9 +383,9 @@ struct Closure { using IndirectArgs = IndirectArgsWithWaitEvents; IndirectArgs indirectArgs; - Closure(const ApiArgs &apiArgs) : apiArgs(apiArgs), indirectArgs(apiArgs) {} + Closure(const ApiArgs &apiArgs, ClosureExternalStorage &externalStorage) : apiArgs(apiArgs), indirectArgs(apiArgs, externalStorage) {} - ze_result_t instantiateTo(CommandList &executionTarget) const; + ze_result_t instantiateTo(CommandList &executionTarget, ClosureExternalStorage &externalStorage) const; }; template <> @@ -369,9 +404,9 @@ struct Closure { using IndirectArgs = IndirectArgsWithWaitEvents; IndirectArgs indirectArgs; - Closure(const ApiArgs &apiArgs) : apiArgs(apiArgs), indirectArgs(apiArgs) {} + Closure(const ApiArgs &apiArgs, ClosureExternalStorage &externalStorage) : apiArgs(apiArgs), indirectArgs(apiArgs, externalStorage) {} - ze_result_t instantiateTo(CommandList &executionTarget) const; + ze_result_t instantiateTo(CommandList &executionTarget, ClosureExternalStorage &externalStorage) const; }; template <> @@ -390,7 +425,7 @@ struct Closure { } apiArgs; struct IndirectArgs : IndirectArgsWithWaitEvents { - IndirectArgs(const Closure::ApiArgs &apiArgs) : IndirectArgsWithWaitEvents(apiArgs) { + IndirectArgs(const Closure::ApiArgs &apiArgs, ClosureExternalStorage &externalStorage) : IndirectArgsWithWaitEvents(apiArgs, externalStorage) { dstRegion = *apiArgs.pDstRegion; srcRegion = *apiArgs.pSrcRegion; } @@ -398,9 +433,9 @@ struct Closure { ze_image_region_t srcRegion; } indirectArgs; - Closure(const ApiArgs &apiArgs) : apiArgs(apiArgs), indirectArgs(apiArgs) {} + Closure(const ApiArgs &apiArgs, ClosureExternalStorage &externalStorage) : apiArgs(apiArgs), indirectArgs(apiArgs, externalStorage) {} - ze_result_t instantiateTo(CommandList &executionTarget) const; + ze_result_t instantiateTo(CommandList &executionTarget, ClosureExternalStorage &externalStorage) const; }; template <> @@ -418,15 +453,15 @@ struct Closure { } apiArgs; struct IndirectArgs : IndirectArgsWithWaitEvents { - IndirectArgs(const Closure::ApiArgs &apiArgs) : IndirectArgsWithWaitEvents(apiArgs) { + IndirectArgs(const Closure::ApiArgs &apiArgs, ClosureExternalStorage &externalStorage) : IndirectArgsWithWaitEvents(apiArgs, externalStorage) { srcRegion = *apiArgs.pSrcRegion; } ze_image_region_t srcRegion; } indirectArgs; - Closure(const ApiArgs &apiArgs) : apiArgs(apiArgs), indirectArgs(apiArgs) {} + Closure(const ApiArgs &apiArgs, ClosureExternalStorage &externalStorage) : apiArgs(apiArgs), indirectArgs(apiArgs, externalStorage) {} - ze_result_t instantiateTo(CommandList &executionTarget) const; + ze_result_t instantiateTo(CommandList &executionTarget, ClosureExternalStorage &externalStorage) const; }; template <> @@ -444,15 +479,15 @@ struct Closure { } apiArgs; struct IndirectArgs : IndirectArgsWithWaitEvents { - IndirectArgs(const Closure::ApiArgs &apiArgs) : IndirectArgsWithWaitEvents(apiArgs) { + IndirectArgs(const Closure::ApiArgs &apiArgs, ClosureExternalStorage &externalStorage) : IndirectArgsWithWaitEvents(apiArgs, externalStorage) { dstRegion = *apiArgs.pDstRegion; } ze_image_region_t dstRegion; } indirectArgs; - Closure(const ApiArgs &apiArgs) : apiArgs(apiArgs), indirectArgs(apiArgs) {} + Closure(const ApiArgs &apiArgs, ClosureExternalStorage &externalStorage) : apiArgs(apiArgs), indirectArgs(apiArgs, externalStorage) {} - ze_result_t instantiateTo(CommandList &executionTarget) const; + ze_result_t instantiateTo(CommandList &executionTarget, ClosureExternalStorage &externalStorage) const; }; template <> @@ -468,9 +503,9 @@ struct Closure { using IndirectArgs = EmptyIndirectArgs; IndirectArgs indirectArgs; - Closure(const ApiArgs &apiArgs) : apiArgs(apiArgs), indirectArgs(apiArgs) {} + Closure(const ApiArgs &apiArgs, ClosureExternalStorage &externalStorage) : apiArgs(apiArgs), indirectArgs(apiArgs, externalStorage) {} - ze_result_t instantiateTo(CommandList &executionTarget) const; + ze_result_t instantiateTo(CommandList &executionTarget, ClosureExternalStorage &externalStorage) const; }; template <> @@ -488,9 +523,9 @@ struct Closure { using IndirectArgs = EmptyIndirectArgs; IndirectArgs indirectArgs; - Closure(const ApiArgs &apiArgs) : apiArgs(apiArgs), indirectArgs(apiArgs) {} + Closure(const ApiArgs &apiArgs, ClosureExternalStorage &externalStorage) : apiArgs(apiArgs), indirectArgs(apiArgs, externalStorage) {} - ze_result_t instantiateTo(CommandList &executionTarget) const; + ze_result_t instantiateTo(CommandList &executionTarget, ClosureExternalStorage &externalStorage) const; }; template <> @@ -505,9 +540,9 @@ struct Closure { using IndirectArgs = EmptyIndirectArgs; IndirectArgs indirectArgs; - Closure(const ApiArgs &apiArgs) : apiArgs(apiArgs), indirectArgs(apiArgs) {} + Closure(const ApiArgs &apiArgs, ClosureExternalStorage &externalStorage) : apiArgs(apiArgs), indirectArgs(apiArgs, externalStorage) {} - ze_result_t instantiateTo(CommandList &executionTarget) const; + ze_result_t instantiateTo(CommandList &executionTarget, ClosureExternalStorage &externalStorage) const; }; template <> @@ -522,9 +557,9 @@ struct Closure { using IndirectArgs = EmptyIndirectArgs; IndirectArgs indirectArgs; - Closure(const ApiArgs &apiArgs) : apiArgs(apiArgs), indirectArgs(apiArgs) {} + Closure(const ApiArgs &apiArgs, ClosureExternalStorage &externalStorage) : apiArgs(apiArgs), indirectArgs(apiArgs, externalStorage) {} - ze_result_t instantiateTo(CommandList &executionTarget) const; + ze_result_t instantiateTo(CommandList &executionTarget, ClosureExternalStorage &externalStorage) const; }; template <> @@ -543,7 +578,7 @@ struct Closure { } apiArgs; struct IndirectArgs : IndirectArgsWithWaitEvents { - IndirectArgs(const Closure::ApiArgs &apiArgs) : IndirectArgsWithWaitEvents(apiArgs) { + IndirectArgs(const Closure::ApiArgs &apiArgs, ClosureExternalStorage &externalStorage) : IndirectArgsWithWaitEvents(apiArgs, externalStorage) { events.resize(apiArgs.numEvents); offsets.resize(apiArgs.numEvents); std::copy_n(apiArgs.phEvents, apiArgs.numEvents, events.begin()); @@ -556,9 +591,9 @@ struct Closure { StackVec offsets; } indirectArgs; - Closure(const ApiArgs &apiArgs) : apiArgs(apiArgs), indirectArgs(apiArgs) {} + Closure(const ApiArgs &apiArgs, ClosureExternalStorage &externalStorage) : apiArgs(apiArgs), indirectArgs(apiArgs, externalStorage) {} - ze_result_t instantiateTo(CommandList &executionTarget) const; + ze_result_t instantiateTo(CommandList &executionTarget, ClosureExternalStorage &externalStorage) const; }; template <> @@ -576,7 +611,7 @@ struct Closure { } apiArgs; struct IndirectArgs : IndirectArgsWithWaitEvents { - IndirectArgs(const Closure::ApiArgs &apiArgs) : IndirectArgsWithWaitEvents(apiArgs) { + IndirectArgs(const Closure::ApiArgs &apiArgs, ClosureExternalStorage &externalStorage) : IndirectArgsWithWaitEvents(apiArgs, externalStorage) { semaphores.resize(apiArgs.numSemaphores); std::copy_n(apiArgs.phSemaphores, apiArgs.numSemaphores, semaphores.begin()); signalParams = *apiArgs.signalParams; @@ -585,9 +620,9 @@ struct Closure { ze_external_semaphore_signal_params_ext_t signalParams; } indirectArgs; - Closure(const ApiArgs &apiArgs) : apiArgs(apiArgs), indirectArgs(apiArgs) {} + Closure(const ApiArgs &apiArgs, ClosureExternalStorage &externalStorage) : apiArgs(apiArgs), indirectArgs(apiArgs, externalStorage) {} - ze_result_t instantiateTo(CommandList &executionTarget) const; + ze_result_t instantiateTo(CommandList &executionTarget, ClosureExternalStorage &externalStorage) const; }; template <> @@ -605,7 +640,7 @@ struct Closure { } apiArgs; struct IndirectArgs : IndirectArgsWithWaitEvents { - IndirectArgs(const Closure::ApiArgs &apiArgs) : IndirectArgsWithWaitEvents(apiArgs) { + IndirectArgs(const Closure::ApiArgs &apiArgs, ClosureExternalStorage &externalStorage) : IndirectArgsWithWaitEvents(apiArgs, externalStorage) { semaphores.resize(apiArgs.numSemaphores); std::copy_n(apiArgs.phSemaphores, apiArgs.numSemaphores, semaphores.begin()); waitParams = *apiArgs.waitParams; @@ -614,9 +649,9 @@ struct Closure { ze_external_semaphore_wait_params_ext_t waitParams; } indirectArgs; - Closure(const ApiArgs &apiArgs) : apiArgs(apiArgs), indirectArgs(apiArgs) {} + Closure(const ApiArgs &apiArgs, ClosureExternalStorage &externalStorage) : apiArgs(apiArgs), indirectArgs(apiArgs, externalStorage) {} - ze_result_t instantiateTo(CommandList &executionTarget) const; + ze_result_t instantiateTo(CommandList &executionTarget, ClosureExternalStorage &externalStorage) const; }; template <> @@ -636,15 +671,15 @@ struct Closure { } apiArgs; struct IndirectArgs : IndirectArgsWithWaitEvents { - IndirectArgs(const Closure::ApiArgs &apiArgs) : IndirectArgsWithWaitEvents(apiArgs) { + IndirectArgs(const Closure::ApiArgs &apiArgs, ClosureExternalStorage &externalStorage) : IndirectArgsWithWaitEvents(apiArgs, externalStorage) { srcRegion = *apiArgs.pSrcRegion; } ze_image_region_t srcRegion; } indirectArgs; - Closure(const ApiArgs &apiArgs) : apiArgs(apiArgs), indirectArgs(apiArgs) {} + Closure(const ApiArgs &apiArgs, ClosureExternalStorage &externalStorage) : apiArgs(apiArgs), indirectArgs(apiArgs, externalStorage) {} - ze_result_t instantiateTo(CommandList &executionTarget) const; + ze_result_t instantiateTo(CommandList &executionTarget, ClosureExternalStorage &externalStorage) const; }; template <> @@ -664,15 +699,15 @@ struct Closure { } apiArgs; struct IndirectArgs : IndirectArgsWithWaitEvents { - IndirectArgs(const Closure::ApiArgs &apiArgs) : IndirectArgsWithWaitEvents(apiArgs) { + IndirectArgs(const Closure::ApiArgs &apiArgs, ClosureExternalStorage &externalStorage) : IndirectArgsWithWaitEvents(apiArgs, externalStorage) { dstRegion = *apiArgs.pDstRegion; } ze_image_region_t dstRegion; } indirectArgs; - Closure(const ApiArgs &apiArgs) : apiArgs(apiArgs), indirectArgs(apiArgs) {} + Closure(const ApiArgs &apiArgs, ClosureExternalStorage &externalStorage) : apiArgs(apiArgs), indirectArgs(apiArgs, externalStorage) {} - ze_result_t instantiateTo(CommandList &executionTarget) const; + ze_result_t instantiateTo(CommandList &executionTarget, ClosureExternalStorage &externalStorage) const; }; template <> @@ -688,20 +723,15 @@ struct Closure { ze_event_handle_t *phWaitEvents; } apiArgs; - struct IndirectArgs { + struct IndirectArgs : IndirectArgsWithWaitEvents { + IndirectArgs(const Closure::ApiArgs &apiArgs, ClosureExternalStorage &externalStorage); ze_group_count_t launchKernelArgs; - KernelMutableState kernelState; - StackVec waitEvents; + ClosureExternalStorage::KernelStateId kernelState = ClosureExternalStorage::invalidKernelStateId; } indirectArgs; - Closure(const ApiArgs &apiArgs); - Closure(const Closure &) = delete; - Closure(Closure &&rhs) = default; - Closure &operator=(const Closure &) = delete; - Closure &operator=(Closure &&) = delete; - ~Closure() = default; + Closure(const ApiArgs &apiArgs, ClosureExternalStorage &externalStorage) : apiArgs(apiArgs), indirectArgs(apiArgs, externalStorage) {} - ze_result_t instantiateTo(CommandList &executionTarget) const; + ze_result_t instantiateTo(CommandList &executionTarget, ClosureExternalStorage &externalStorage) const; }; } // namespace L0 diff --git a/level_zero/experimental/source/graph/graph.cpp b/level_zero/experimental/source/graph/graph.cpp index 1ee5b45838..8c3cd7dbf9 100644 --- a/level_zero/experimental/source/graph/graph.cpp +++ b/level_zero/experimental/source/graph/graph.cpp @@ -89,76 +89,76 @@ auto getOptionalData(ContainerT &container) { return container.empty() ? nullptr : container.data(); } -ze_result_t Closure::instantiateTo(L0::CommandList &executionTarget) const { - return zeCommandListAppendMemoryCopy(&executionTarget, apiArgs.dstptr, apiArgs.srcptr, apiArgs.size, apiArgs.hSignalEvent, apiArgs.numWaitEvents, const_cast(getOptionalData(indirectArgs.waitEvents))); +ze_result_t Closure::instantiateTo(L0::CommandList &executionTarget, ClosureExternalStorage &externalStorage) const { + return zeCommandListAppendMemoryCopy(&executionTarget, apiArgs.dstptr, apiArgs.srcptr, apiArgs.size, apiArgs.hSignalEvent, apiArgs.numWaitEvents, externalStorage.getEventsWaitList(indirectArgs.waitEvents)); } -ze_result_t Closure::instantiateTo(L0::CommandList &executionTarget) const { - return zeCommandListAppendBarrier(&executionTarget, apiArgs.hSignalEvent, apiArgs.numWaitEvents, const_cast(getOptionalData(indirectArgs.waitEvents))); +ze_result_t Closure::instantiateTo(L0::CommandList &executionTarget, ClosureExternalStorage &externalStorage) const { + return zeCommandListAppendBarrier(&executionTarget, apiArgs.hSignalEvent, apiArgs.numWaitEvents, externalStorage.getEventsWaitList(indirectArgs.waitEvents)); } -ze_result_t Closure::instantiateTo(L0::CommandList &executionTarget) const { - return zeCommandListAppendWaitOnEvents(&executionTarget, apiArgs.numEvents, const_cast(getOptionalData(indirectArgs.waitEvents))); +ze_result_t Closure::instantiateTo(L0::CommandList &executionTarget, ClosureExternalStorage &externalStorage) const { + return zeCommandListAppendWaitOnEvents(&executionTarget, apiArgs.numEvents, externalStorage.getEventsWaitList(indirectArgs.waitEvents)); } -ze_result_t Closure::instantiateTo(L0::CommandList &executionTarget) const { - return zeCommandListAppendWriteGlobalTimestamp(&executionTarget, apiArgs.dstptr, apiArgs.hSignalEvent, apiArgs.numWaitEvents, const_cast(getOptionalData(indirectArgs.waitEvents))); +ze_result_t Closure::instantiateTo(L0::CommandList &executionTarget, ClosureExternalStorage &externalStorage) const { + return zeCommandListAppendWriteGlobalTimestamp(&executionTarget, apiArgs.dstptr, apiArgs.hSignalEvent, apiArgs.numWaitEvents, externalStorage.getEventsWaitList(indirectArgs.waitEvents)); } -ze_result_t Closure::instantiateTo(L0::CommandList &executionTarget) const { +ze_result_t Closure::instantiateTo(L0::CommandList &executionTarget, ClosureExternalStorage &externalStorage) const { return zeCommandListAppendMemoryRangesBarrier(&executionTarget, apiArgs.numRanges, getOptionalData(indirectArgs.rangeSizes), const_cast(getOptionalData(indirectArgs.ranges)), - apiArgs.hSignalEvent, apiArgs.numWaitEvents, const_cast(getOptionalData(indirectArgs.waitEvents))); + apiArgs.hSignalEvent, apiArgs.numWaitEvents, externalStorage.getEventsWaitList(indirectArgs.waitEvents)); } -ze_result_t Closure::instantiateTo(L0::CommandList &executionTarget) const { +ze_result_t Closure::instantiateTo(L0::CommandList &executionTarget, ClosureExternalStorage &externalStorage) const { return zeCommandListAppendMemoryFill(&executionTarget, apiArgs.ptr, getOptionalData(indirectArgs.pattern), apiArgs.patternSize, apiArgs.size, - apiArgs.hSignalEvent, apiArgs.numWaitEvents, const_cast(getOptionalData(indirectArgs.waitEvents))); + apiArgs.hSignalEvent, apiArgs.numWaitEvents, externalStorage.getEventsWaitList(indirectArgs.waitEvents)); } -ze_result_t Closure::instantiateTo(L0::CommandList &executionTarget) const { +ze_result_t Closure::instantiateTo(L0::CommandList &executionTarget, ClosureExternalStorage &externalStorage) const { return zeCommandListAppendMemoryCopyRegion(&executionTarget, apiArgs.dstptr, &indirectArgs.dstRegion, apiArgs.dstPitch, apiArgs.dstSlicePitch, apiArgs.srcptr, &indirectArgs.srcRegion, apiArgs.srcPitch, apiArgs.srcSlicePitch, - apiArgs.hSignalEvent, apiArgs.numWaitEvents, const_cast(getOptionalData(indirectArgs.waitEvents))); + apiArgs.hSignalEvent, apiArgs.numWaitEvents, externalStorage.getEventsWaitList(indirectArgs.waitEvents)); } -ze_result_t Closure::instantiateTo(L0::CommandList &executionTarget) const { +ze_result_t Closure::instantiateTo(L0::CommandList &executionTarget, ClosureExternalStorage &externalStorage) const { return zeCommandListAppendMemoryCopyFromContext(&executionTarget, apiArgs.dstptr, apiArgs.hContextSrc, apiArgs.srcptr, apiArgs.size, - apiArgs.hSignalEvent, apiArgs.numWaitEvents, const_cast(getOptionalData(indirectArgs.waitEvents))); + apiArgs.hSignalEvent, apiArgs.numWaitEvents, externalStorage.getEventsWaitList(indirectArgs.waitEvents)); } -ze_result_t Closure::instantiateTo(L0::CommandList &executionTarget) const { +ze_result_t Closure::instantiateTo(L0::CommandList &executionTarget, ClosureExternalStorage &externalStorage) const { return zeCommandListAppendImageCopy(&executionTarget, apiArgs.hDstImage, apiArgs.hSrcImage, - apiArgs.hSignalEvent, apiArgs.numWaitEvents, const_cast(getOptionalData(indirectArgs.waitEvents))); + apiArgs.hSignalEvent, apiArgs.numWaitEvents, externalStorage.getEventsWaitList(indirectArgs.waitEvents)); } -ze_result_t Closure::instantiateTo(L0::CommandList &executionTarget) const { +ze_result_t Closure::instantiateTo(L0::CommandList &executionTarget, ClosureExternalStorage &externalStorage) const { return zeCommandListAppendImageCopyRegion(&executionTarget, apiArgs.hDstImage, apiArgs.hSrcImage, &indirectArgs.dstRegion, &indirectArgs.srcRegion, - apiArgs.hSignalEvent, apiArgs.numWaitEvents, const_cast(getOptionalData(indirectArgs.waitEvents))); + apiArgs.hSignalEvent, apiArgs.numWaitEvents, externalStorage.getEventsWaitList(indirectArgs.waitEvents)); } -ze_result_t Closure::instantiateTo(L0::CommandList &executionTarget) const { +ze_result_t Closure::instantiateTo(L0::CommandList &executionTarget, ClosureExternalStorage &externalStorage) const { return zeCommandListAppendImageCopyToMemory(&executionTarget, apiArgs.dstptr, apiArgs.hSrcImage, &indirectArgs.srcRegion, - apiArgs.hSignalEvent, apiArgs.numWaitEvents, const_cast(getOptionalData(indirectArgs.waitEvents))); + apiArgs.hSignalEvent, apiArgs.numWaitEvents, externalStorage.getEventsWaitList(indirectArgs.waitEvents)); } -ze_result_t Closure::instantiateTo(L0::CommandList &executionTarget) const { +ze_result_t Closure::instantiateTo(L0::CommandList &executionTarget, ClosureExternalStorage &externalStorage) const { return zeCommandListAppendImageCopyFromMemory(&executionTarget, apiArgs.hDstImage, apiArgs.srcptr, &indirectArgs.dstRegion, - apiArgs.hSignalEvent, apiArgs.numWaitEvents, const_cast(getOptionalData(indirectArgs.waitEvents))); + apiArgs.hSignalEvent, apiArgs.numWaitEvents, externalStorage.getEventsWaitList(indirectArgs.waitEvents)); } -ze_result_t Closure::instantiateTo(L0::CommandList &executionTarget) const { +ze_result_t Closure::instantiateTo(L0::CommandList &executionTarget, ClosureExternalStorage &externalStorage) const { return zeCommandListAppendMemoryPrefetch(&executionTarget, apiArgs.ptr, apiArgs.size); } -ze_result_t Closure::instantiateTo(L0::CommandList &executionTarget) const { +ze_result_t Closure::instantiateTo(L0::CommandList &executionTarget, ClosureExternalStorage &externalStorage) const { return zeCommandListAppendMemAdvise(&executionTarget, apiArgs.hDevice, apiArgs.ptr, @@ -166,78 +166,73 @@ ze_result_t Closure::instantiateTo(L0: apiArgs.advice); } -ze_result_t Closure::instantiateTo(L0::CommandList &executionTarget) const { +ze_result_t Closure::instantiateTo(L0::CommandList &executionTarget, ClosureExternalStorage &externalStorage) const { return zeCommandListAppendSignalEvent(&executionTarget, apiArgs.hEvent); } -ze_result_t Closure::instantiateTo(L0::CommandList &executionTarget) const { +ze_result_t Closure::instantiateTo(L0::CommandList &executionTarget, ClosureExternalStorage &externalStorage) const { return zeCommandListAppendEventReset(&executionTarget, apiArgs.hEvent); } -ze_result_t Closure::instantiateTo(L0::CommandList &executionTarget) const { +ze_result_t Closure::instantiateTo(L0::CommandList &executionTarget, ClosureExternalStorage &externalStorage) const { return zeCommandListAppendQueryKernelTimestamps(&executionTarget, apiArgs.numEvents, const_cast(getOptionalData(indirectArgs.events)), apiArgs.dstptr, getOptionalData(indirectArgs.offsets), - apiArgs.hSignalEvent, apiArgs.numWaitEvents, const_cast(getOptionalData(indirectArgs.waitEvents))); + apiArgs.hSignalEvent, apiArgs.numWaitEvents, externalStorage.getEventsWaitList(indirectArgs.waitEvents)); } -ze_result_t Closure::instantiateTo(L0::CommandList &executionTarget) const { +ze_result_t Closure::instantiateTo(L0::CommandList &executionTarget, ClosureExternalStorage &externalStorage) const { return zeCommandListAppendSignalExternalSemaphoreExt(&executionTarget, apiArgs.numSemaphores, const_cast(getOptionalData(indirectArgs.semaphores)), const_cast(&indirectArgs.signalParams), - apiArgs.hSignalEvent, apiArgs.numWaitEvents, const_cast(getOptionalData(indirectArgs.waitEvents))); + apiArgs.hSignalEvent, apiArgs.numWaitEvents, externalStorage.getEventsWaitList(indirectArgs.waitEvents)); } -ze_result_t Closure::instantiateTo(L0::CommandList &executionTarget) const { +ze_result_t Closure::instantiateTo(L0::CommandList &executionTarget, ClosureExternalStorage &externalStorage) const { return zeCommandListAppendWaitExternalSemaphoreExt(&executionTarget, apiArgs.numSemaphores, const_cast(getOptionalData(indirectArgs.semaphores)), const_cast(&indirectArgs.waitParams), - apiArgs.hSignalEvent, apiArgs.numWaitEvents, const_cast(getOptionalData(indirectArgs.waitEvents))); + apiArgs.hSignalEvent, apiArgs.numWaitEvents, externalStorage.getEventsWaitList(indirectArgs.waitEvents)); } -ze_result_t Closure::instantiateTo(L0::CommandList &executionTarget) const { +ze_result_t Closure::instantiateTo(L0::CommandList &executionTarget, ClosureExternalStorage &externalStorage) const { return zeCommandListAppendImageCopyToMemoryExt(&executionTarget, apiArgs.dstptr, apiArgs.hSrcImage, &indirectArgs.srcRegion, apiArgs.destRowPitch, apiArgs.destSlicePitch, - apiArgs.hSignalEvent, apiArgs.numWaitEvents, const_cast(getOptionalData(indirectArgs.waitEvents))); + apiArgs.hSignalEvent, apiArgs.numWaitEvents, externalStorage.getEventsWaitList(indirectArgs.waitEvents)); } -ze_result_t Closure::instantiateTo(L0::CommandList &executionTarget) const { +ze_result_t Closure::instantiateTo(L0::CommandList &executionTarget, ClosureExternalStorage &externalStorage) const { return zeCommandListAppendImageCopyFromMemoryExt(&executionTarget, apiArgs.hDstImage, apiArgs.srcptr, &indirectArgs.dstRegion, apiArgs.srcRowPitch, apiArgs.srcSlicePitch, - apiArgs.hSignalEvent, apiArgs.numWaitEvents, const_cast(getOptionalData(indirectArgs.waitEvents))); + apiArgs.hSignalEvent, apiArgs.numWaitEvents, externalStorage.getEventsWaitList(indirectArgs.waitEvents)); } -Closure::Closure(const ApiArgs &apiArgs) : apiArgs{apiArgs} { - this->apiArgs.launchKernelArgs = nullptr; - this->apiArgs.phWaitEvents = nullptr; - this->indirectArgs.launchKernelArgs = *apiArgs.launchKernelArgs; +Closure::IndirectArgs::IndirectArgs(const ApiArgs &apiArgs, ClosureExternalStorage &externalStorage) : IndirectArgsWithWaitEvents(apiArgs, externalStorage) { + this->launchKernelArgs = *apiArgs.launchKernelArgs; auto kernel = static_cast(Kernel::fromHandle(apiArgs.kernelHandle)); - this->indirectArgs.kernelState = kernel->getMutableState(); - - this->indirectArgs.waitEvents.reserve(apiArgs.numWaitEvents); - for (uint32_t i = 0; i < apiArgs.numWaitEvents; ++i) { - this->indirectArgs.waitEvents.push_back(apiArgs.phWaitEvents[i]); - } + L0::KernelMutableState stateSnapshot; + stateSnapshot = kernel->getMutableState(); + this->kernelState = externalStorage.registerKernelState(std::move(stateSnapshot)); } -ze_result_t Closure::instantiateTo(L0::CommandList &executionTarget) const { +ze_result_t Closure::instantiateTo(L0::CommandList &executionTarget, ClosureExternalStorage &externalStorage) const { auto kernel = static_cast(Kernel::fromHandle(apiArgs.kernelHandle)); - kernel->getMutableState() = this->indirectArgs.kernelState; - return zeCommandListAppendLaunchKernel(&executionTarget, apiArgs.kernelHandle, &indirectArgs.launchKernelArgs, apiArgs.hSignalEvent, apiArgs.numWaitEvents, const_cast(getOptionalData(indirectArgs.waitEvents))); + kernel->getMutableState() = *externalStorage.getKernelMutableState(this->indirectArgs.kernelState); + return zeCommandListAppendLaunchKernel(&executionTarget, apiArgs.kernelHandle, &indirectArgs.launchKernelArgs, apiArgs.hSignalEvent, apiArgs.numWaitEvents, externalStorage.getEventsWaitList(indirectArgs.waitEvents)); } ExecutableGraph::~ExecutableGraph() = default; @@ -283,10 +278,10 @@ void ExecutableGraph::instantiateFrom(Graph &graph, const GraphInstatiateSetting switch (static_cast(cmd.index())) { default: break; -#define RR_CAPTURED_API(X) \ - case CaptureApi::X: \ - std::get(CaptureApi::X)>(cmd).instantiateTo(*currCmdList); \ - DEBUG_BREAK_IF(err != ZE_RESULT_SUCCESS); \ +#define RR_CAPTURED_API(X) \ + case CaptureApi::X: \ + std::get(CaptureApi::X)>(cmd).instantiateTo(*currCmdList, graph.getExternalStorage()); \ + DEBUG_BREAK_IF(err != ZE_RESULT_SUCCESS); \ break; RR_CAPTURED_APIS() #undef RR_CAPTURED_API diff --git a/level_zero/experimental/source/graph/graph.h b/level_zero/experimental/source/graph/graph.h index db037b6f18..7848b19431 100644 --- a/level_zero/experimental/source/graph/graph.h +++ b/level_zero/experimental/source/graph/graph.h @@ -81,7 +81,7 @@ struct Graph : _ze_graph_handle_t { using ApiArgsT = typename Closure::ApiArgs; auto capturedArgs = ApiArgsT{apiArgs...}; - commands.push_back(CapturedCommand{Closure(capturedArgs)}); + commands.push_back(CapturedCommand{Closure(capturedArgs, externalStorage)}); return ZE_RESULT_SUCCESS; } @@ -145,10 +145,14 @@ struct Graph : _ze_graph_handle_t { void tryJoinOnNextCommand(L0::CommandList &childCmdList, L0::Event &joinEvent); void forkTo(L0::CommandList &childCmdList, Graph *&child, L0::Event &forkEvent); void registerSignallingEventFromPreviousCommand(L0::Event &ev); + ClosureExternalStorage &getExternalStorage() { + return externalStorage; + } protected: void unregisterSignallingEvents(); + ClosureExternalStorage externalStorage; std::vector commands; StackVec subGraphs; @@ -268,4 +272,9 @@ struct ExecutableGraph : _ze_executable_graph_handle_t { GraphSubmissionChain submissionChain; }; +constexpr size_t maxVariantSize = 3 * 64; +#define RR_CAPTURED_API(X) \ + static_assert(sizeof(Closure) < maxVariantSize, #X " is too big for common variant. Please export some of its state to ClosureExternalStorage"); +RR_CAPTURED_APIS() +#undef RR_CAPTURED_API } // namespace L0 \ No newline at end of file