mirror of
https://github.com/intel/compute-runtime.git
synced 2025-12-29 00:58:39 +08:00
performance: Optimizing memory usage for recorded commands
Related-To: NEO-15638 Signed-off-by: Chodor, Jaroslaw <jaroslaw.chodor@intel.com>
This commit is contained in:
committed by
Compute-Runtime-Automation
parent
b5646b45e9
commit
0cc0706062
@@ -932,5 +932,58 @@ TEST(GraphExecution, GivenExecutableGraphWithSubGraphsWhenSubmittingItToCommandL
|
||||
EXPECT_EQ(0U, subCmdlist.appendSignalEventCalled);
|
||||
}
|
||||
|
||||
TEST(ClosureExternalStorage, GivenEventWaitListThenRecordsItProperly) {
|
||||
MockEvent events[10];
|
||||
ze_event_handle_t eventHandles[10];
|
||||
std::transform(events, events + 10, eventHandles, [](auto &ev) { return &ev; });
|
||||
|
||||
L0::ClosureExternalStorage storage;
|
||||
EXPECT_EQ(L0::ClosureExternalStorage::invalidEventsWaitListId, storage.registerEventsWaitList(eventHandles, eventHandles));
|
||||
|
||||
auto waitList0Id = storage.registerEventsWaitList(eventHandles, eventHandles + 1);
|
||||
auto waitList1Id = storage.registerEventsWaitList(eventHandles + 3, eventHandles + 5);
|
||||
auto waitList2Id = storage.registerEventsWaitList(eventHandles + 8, eventHandles + 10);
|
||||
|
||||
EXPECT_NE(L0::ClosureExternalStorage::invalidEventsWaitListId, waitList0Id);
|
||||
EXPECT_NE(L0::ClosureExternalStorage::invalidEventsWaitListId, waitList1Id);
|
||||
EXPECT_NE(L0::ClosureExternalStorage::invalidEventsWaitListId, waitList2Id);
|
||||
|
||||
EXPECT_EQ(nullptr, storage.getEventsWaitList(L0::ClosureExternalStorage::invalidEventsWaitListId));
|
||||
|
||||
ASSERT_NE(nullptr, storage.getEventsWaitList(waitList0Id));
|
||||
EXPECT_EQ(eventHandles[0], storage.getEventsWaitList(waitList0Id)[0]);
|
||||
|
||||
ASSERT_NE(nullptr, storage.getEventsWaitList(waitList1Id));
|
||||
EXPECT_EQ(eventHandles[3], storage.getEventsWaitList(waitList1Id)[0]);
|
||||
EXPECT_EQ(eventHandles[4], storage.getEventsWaitList(waitList1Id)[1]);
|
||||
|
||||
ASSERT_NE(nullptr, storage.getEventsWaitList(waitList2Id));
|
||||
EXPECT_EQ(eventHandles[8], storage.getEventsWaitList(waitList2Id)[0]);
|
||||
EXPECT_EQ(eventHandles[9], storage.getEventsWaitList(waitList2Id)[1]);
|
||||
}
|
||||
|
||||
TEST(ClosureExternalStorage, GivenKernelMutableStateThenRecordsItProperly) {
|
||||
KernelMutableState s1;
|
||||
s1.globalOffsets[0] = 5U;
|
||||
KernelMutableState s2;
|
||||
s2.globalOffsets[0] = 7U;
|
||||
|
||||
L0::ClosureExternalStorage storage;
|
||||
|
||||
auto kernelState1Id = storage.registerKernelState(std::move(s1));
|
||||
auto kernelState2Id = storage.registerKernelState(std::move(s2));
|
||||
|
||||
EXPECT_NE(L0::ClosureExternalStorage::invalidEventsWaitListId, kernelState1Id);
|
||||
EXPECT_NE(L0::ClosureExternalStorage::invalidEventsWaitListId, kernelState2Id);
|
||||
|
||||
EXPECT_EQ(nullptr, storage.getKernelMutableState(L0::ClosureExternalStorage::invalidKernelStateId));
|
||||
|
||||
ASSERT_NE(nullptr, storage.getKernelMutableState(kernelState1Id));
|
||||
EXPECT_EQ(5U, storage.getKernelMutableState(kernelState1Id)->globalOffsets[0]);
|
||||
|
||||
ASSERT_NE(nullptr, storage.getKernelMutableState(kernelState2Id));
|
||||
EXPECT_EQ(7U, storage.getKernelMutableState(kernelState2Id)->globalOffsets[0]);
|
||||
}
|
||||
|
||||
} // namespace ult
|
||||
} // namespace L0
|
||||
|
||||
@@ -56,6 +56,47 @@ enum class CaptureApi {
|
||||
|
||||
struct CommandList;
|
||||
|
||||
struct ClosureExternalStorage {
|
||||
using EventsWaitListId = int64_t;
|
||||
using KernelStateId = int64_t;
|
||||
|
||||
static constexpr EventsWaitListId invalidEventsWaitListId = -1;
|
||||
static constexpr KernelStateId invalidKernelStateId = -1;
|
||||
|
||||
EventsWaitListId registerEventsWaitList(ze_event_handle_t *begin, ze_event_handle_t *end) {
|
||||
if (begin == end) {
|
||||
return invalidEventsWaitListId;
|
||||
}
|
||||
auto ret = waitEvents.size();
|
||||
waitEvents.insert(std::end(waitEvents), begin, end);
|
||||
return static_cast<EventsWaitListId>(ret);
|
||||
}
|
||||
|
||||
KernelStateId registerKernelState(KernelMutableState &&state) {
|
||||
auto ret = kernelStates.size();
|
||||
kernelStates.push_back(std::move(state));
|
||||
return static_cast<KernelStateId>(ret);
|
||||
}
|
||||
|
||||
ze_event_handle_t *getEventsWaitList(EventsWaitListId id) {
|
||||
if (id < 0) {
|
||||
return nullptr;
|
||||
}
|
||||
return waitEvents.data() + id;
|
||||
}
|
||||
|
||||
KernelMutableState *getKernelMutableState(KernelStateId id) {
|
||||
if (id < 0) {
|
||||
return nullptr;
|
||||
}
|
||||
return kernelStates.data() + id;
|
||||
}
|
||||
|
||||
protected:
|
||||
std::vector<ze_event_handle_t> waitEvents;
|
||||
std::vector<KernelMutableState> kernelStates;
|
||||
};
|
||||
|
||||
template <CaptureApi api>
|
||||
struct Closure {
|
||||
static constexpr bool isSupported = false;
|
||||
@@ -69,9 +110,9 @@ struct Closure {
|
||||
ze_event_handle_t *phWaitEvents = nullptr;
|
||||
};
|
||||
|
||||
Closure(const ApiArgs &apiArgs) {}
|
||||
Closure(const ApiArgs &apiArgs, ClosureExternalStorage &externalStorage) {}
|
||||
|
||||
ze_result_t instantiateTo(CommandList &executionTarget) const {
|
||||
ze_result_t instantiateTo(CommandList &executionTarget, ClosureExternalStorage &externalStorage) const {
|
||||
DEBUG_BREAK_IF(true);
|
||||
return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE;
|
||||
}
|
||||
@@ -136,28 +177,22 @@ struct IndirectArgsWithWaitEvents {
|
||||
IndirectArgsWithWaitEvents() = default;
|
||||
template <typename ApiArgsT>
|
||||
requires HasPhWaitEvents<ApiArgsT>
|
||||
IndirectArgsWithWaitEvents(const ApiArgsT &apiArgs) {
|
||||
waitEvents.reserve(apiArgs.numWaitEvents);
|
||||
for (uint32_t i = 0; i < apiArgs.numWaitEvents; ++i) {
|
||||
waitEvents.push_back(apiArgs.phWaitEvents[i]);
|
||||
}
|
||||
IndirectArgsWithWaitEvents(const ApiArgsT &apiArgs, ClosureExternalStorage &externalStorage) {
|
||||
waitEvents = externalStorage.registerEventsWaitList(apiArgs.phWaitEvents, apiArgs.phWaitEvents + apiArgs.numWaitEvents);
|
||||
}
|
||||
|
||||
template <typename ApiArgsT>
|
||||
requires(HasPhEvents<ApiArgsT> && (false == HasPhWaitEvents<ApiArgsT>))
|
||||
IndirectArgsWithWaitEvents(const ApiArgsT &apiArgs) {
|
||||
waitEvents.reserve(apiArgs.numEvents);
|
||||
for (uint32_t i = 0; i < apiArgs.numEvents; ++i) {
|
||||
waitEvents.push_back(apiArgs.phEvents[i]);
|
||||
}
|
||||
IndirectArgsWithWaitEvents(const ApiArgsT &apiArgs, ClosureExternalStorage &externalStorage) {
|
||||
waitEvents = externalStorage.registerEventsWaitList(apiArgs.phEvents, apiArgs.phEvents + apiArgs.numEvents);
|
||||
}
|
||||
|
||||
StackVec<ze_event_handle_t, 8> waitEvents;
|
||||
ClosureExternalStorage::EventsWaitListId waitEvents = ClosureExternalStorage::invalidEventsWaitListId;
|
||||
};
|
||||
|
||||
struct EmptyIndirectArgs {
|
||||
template <typename ApiArgsT>
|
||||
EmptyIndirectArgs(const ApiArgsT &apiArgs) {}
|
||||
EmptyIndirectArgs(const ApiArgsT &apiArgs, ClosureExternalStorage &externalStorage) {}
|
||||
};
|
||||
|
||||
template <>
|
||||
@@ -177,9 +212,9 @@ struct Closure<CaptureApi::zeCommandListAppendMemoryCopy> {
|
||||
using IndirectArgs = IndirectArgsWithWaitEvents;
|
||||
IndirectArgs indirectArgs;
|
||||
|
||||
Closure(const ApiArgs &apiArgs) : apiArgs(apiArgs), indirectArgs(apiArgs) {}
|
||||
Closure(const ApiArgs &apiArgs, ClosureExternalStorage &externalStorage) : apiArgs(apiArgs), indirectArgs(apiArgs, externalStorage) {}
|
||||
|
||||
ze_result_t instantiateTo(CommandList &executionTarget) const;
|
||||
ze_result_t instantiateTo(CommandList &executionTarget, ClosureExternalStorage &externalStorage) const;
|
||||
};
|
||||
|
||||
template <>
|
||||
@@ -196,9 +231,9 @@ struct Closure<CaptureApi::zeCommandListAppendBarrier> {
|
||||
using IndirectArgs = IndirectArgsWithWaitEvents;
|
||||
IndirectArgs indirectArgs;
|
||||
|
||||
Closure(const ApiArgs &apiArgs) : apiArgs(apiArgs), indirectArgs(apiArgs) {}
|
||||
Closure(const ApiArgs &apiArgs, ClosureExternalStorage &externalStorage) : apiArgs(apiArgs), indirectArgs(apiArgs, externalStorage) {}
|
||||
|
||||
ze_result_t instantiateTo(CommandList &executionTarget) const;
|
||||
ze_result_t instantiateTo(CommandList &executionTarget, ClosureExternalStorage &externalStorage) const;
|
||||
};
|
||||
|
||||
template <>
|
||||
@@ -214,9 +249,9 @@ struct Closure<CaptureApi::zeCommandListAppendWaitOnEvents> {
|
||||
using IndirectArgs = IndirectArgsWithWaitEvents;
|
||||
IndirectArgs indirectArgs;
|
||||
|
||||
Closure(const ApiArgs &apiArgs) : apiArgs(apiArgs), indirectArgs(apiArgs) {}
|
||||
Closure(const ApiArgs &apiArgs, ClosureExternalStorage &externalStorage) : apiArgs(apiArgs), indirectArgs(apiArgs, externalStorage) {}
|
||||
|
||||
ze_result_t instantiateTo(CommandList &executionTarget) const;
|
||||
ze_result_t instantiateTo(CommandList &executionTarget, ClosureExternalStorage &externalStorage) const;
|
||||
};
|
||||
|
||||
template <>
|
||||
@@ -234,9 +269,9 @@ struct Closure<CaptureApi::zeCommandListAppendWriteGlobalTimestamp> {
|
||||
using IndirectArgs = IndirectArgsWithWaitEvents;
|
||||
IndirectArgs indirectArgs;
|
||||
|
||||
Closure(const ApiArgs &apiArgs) : apiArgs(apiArgs), indirectArgs(apiArgs) {}
|
||||
Closure(const ApiArgs &apiArgs, ClosureExternalStorage &externalStorage) : apiArgs(apiArgs), indirectArgs(apiArgs, externalStorage) {}
|
||||
|
||||
ze_result_t instantiateTo(CommandList &executionTarget) const;
|
||||
ze_result_t instantiateTo(CommandList &executionTarget, ClosureExternalStorage &externalStorage) const;
|
||||
};
|
||||
|
||||
template <>
|
||||
@@ -254,7 +289,7 @@ struct Closure<CaptureApi::zeCommandListAppendMemoryRangesBarrier> {
|
||||
} apiArgs;
|
||||
|
||||
struct IndirectArgs : IndirectArgsWithWaitEvents {
|
||||
IndirectArgs(const Closure::ApiArgs &apiArgs) : IndirectArgsWithWaitEvents(apiArgs) {
|
||||
IndirectArgs(const Closure::ApiArgs &apiArgs, ClosureExternalStorage &externalStorage) : IndirectArgsWithWaitEvents(apiArgs, externalStorage) {
|
||||
rangeSizes.resize(apiArgs.numRanges);
|
||||
ranges.resize(apiArgs.numRanges);
|
||||
std::copy_n(apiArgs.pRangeSizes, apiArgs.numRanges, rangeSizes.begin());
|
||||
@@ -264,9 +299,9 @@ struct Closure<CaptureApi::zeCommandListAppendMemoryRangesBarrier> {
|
||||
StackVec<const void *, 1> ranges;
|
||||
} indirectArgs;
|
||||
|
||||
Closure(const ApiArgs &apiArgs) : apiArgs(apiArgs), indirectArgs(apiArgs) {}
|
||||
Closure(const ApiArgs &apiArgs, ClosureExternalStorage &externalStorage) : apiArgs(apiArgs), indirectArgs(apiArgs, externalStorage) {}
|
||||
|
||||
ze_result_t instantiateTo(CommandList &executionTarget) const;
|
||||
ze_result_t instantiateTo(CommandList &executionTarget, ClosureExternalStorage &externalStorage) const;
|
||||
};
|
||||
|
||||
template <>
|
||||
@@ -285,16 +320,16 @@ struct Closure<CaptureApi::zeCommandListAppendMemoryFill> {
|
||||
} apiArgs;
|
||||
|
||||
struct IndirectArgs : IndirectArgsWithWaitEvents {
|
||||
IndirectArgs(const Closure::ApiArgs &apiArgs) : IndirectArgsWithWaitEvents(apiArgs) {
|
||||
IndirectArgs(const Closure::ApiArgs &apiArgs, ClosureExternalStorage &externalStorage) : IndirectArgsWithWaitEvents(apiArgs, externalStorage) {
|
||||
pattern.resize(apiArgs.patternSize);
|
||||
memcpy_s(pattern.data(), pattern.size(), apiArgs.pattern, apiArgs.patternSize);
|
||||
}
|
||||
StackVec<uint8_t, 16> pattern;
|
||||
} indirectArgs;
|
||||
|
||||
Closure(const ApiArgs &apiArgs) : apiArgs(apiArgs), indirectArgs(apiArgs) {}
|
||||
Closure(const ApiArgs &apiArgs, ClosureExternalStorage &externalStorage) : apiArgs(apiArgs), indirectArgs(apiArgs, externalStorage) {}
|
||||
|
||||
ze_result_t instantiateTo(CommandList &executionTarget) const;
|
||||
ze_result_t instantiateTo(CommandList &executionTarget, ClosureExternalStorage &externalStorage) const;
|
||||
};
|
||||
|
||||
template <>
|
||||
@@ -317,7 +352,7 @@ struct Closure<CaptureApi::zeCommandListAppendMemoryCopyRegion> {
|
||||
} apiArgs;
|
||||
|
||||
struct IndirectArgs : IndirectArgsWithWaitEvents {
|
||||
IndirectArgs(const Closure::ApiArgs &apiArgs) : IndirectArgsWithWaitEvents(apiArgs) {
|
||||
IndirectArgs(const Closure::ApiArgs &apiArgs, ClosureExternalStorage &externalStorage) : IndirectArgsWithWaitEvents(apiArgs, externalStorage) {
|
||||
dstRegion = *apiArgs.dstRegion;
|
||||
srcRegion = *apiArgs.srcRegion;
|
||||
}
|
||||
@@ -325,9 +360,9 @@ struct Closure<CaptureApi::zeCommandListAppendMemoryCopyRegion> {
|
||||
ze_copy_region_t srcRegion;
|
||||
} indirectArgs;
|
||||
|
||||
Closure(const ApiArgs &apiArgs) : apiArgs(apiArgs), indirectArgs(apiArgs) {}
|
||||
Closure(const ApiArgs &apiArgs, ClosureExternalStorage &externalStorage) : apiArgs(apiArgs), indirectArgs(apiArgs, externalStorage) {}
|
||||
|
||||
ze_result_t instantiateTo(CommandList &executionTarget) const;
|
||||
ze_result_t instantiateTo(CommandList &executionTarget, ClosureExternalStorage &externalStorage) const;
|
||||
};
|
||||
|
||||
template <>
|
||||
@@ -348,9 +383,9 @@ struct Closure<CaptureApi::zeCommandListAppendMemoryCopyFromContext> {
|
||||
using IndirectArgs = IndirectArgsWithWaitEvents;
|
||||
IndirectArgs indirectArgs;
|
||||
|
||||
Closure(const ApiArgs &apiArgs) : apiArgs(apiArgs), indirectArgs(apiArgs) {}
|
||||
Closure(const ApiArgs &apiArgs, ClosureExternalStorage &externalStorage) : apiArgs(apiArgs), indirectArgs(apiArgs, externalStorage) {}
|
||||
|
||||
ze_result_t instantiateTo(CommandList &executionTarget) const;
|
||||
ze_result_t instantiateTo(CommandList &executionTarget, ClosureExternalStorage &externalStorage) const;
|
||||
};
|
||||
|
||||
template <>
|
||||
@@ -369,9 +404,9 @@ struct Closure<CaptureApi::zeCommandListAppendImageCopy> {
|
||||
using IndirectArgs = IndirectArgsWithWaitEvents;
|
||||
IndirectArgs indirectArgs;
|
||||
|
||||
Closure(const ApiArgs &apiArgs) : apiArgs(apiArgs), indirectArgs(apiArgs) {}
|
||||
Closure(const ApiArgs &apiArgs, ClosureExternalStorage &externalStorage) : apiArgs(apiArgs), indirectArgs(apiArgs, externalStorage) {}
|
||||
|
||||
ze_result_t instantiateTo(CommandList &executionTarget) const;
|
||||
ze_result_t instantiateTo(CommandList &executionTarget, ClosureExternalStorage &externalStorage) const;
|
||||
};
|
||||
|
||||
template <>
|
||||
@@ -390,7 +425,7 @@ struct Closure<CaptureApi::zeCommandListAppendImageCopyRegion> {
|
||||
} apiArgs;
|
||||
|
||||
struct IndirectArgs : IndirectArgsWithWaitEvents {
|
||||
IndirectArgs(const Closure::ApiArgs &apiArgs) : IndirectArgsWithWaitEvents(apiArgs) {
|
||||
IndirectArgs(const Closure::ApiArgs &apiArgs, ClosureExternalStorage &externalStorage) : IndirectArgsWithWaitEvents(apiArgs, externalStorage) {
|
||||
dstRegion = *apiArgs.pDstRegion;
|
||||
srcRegion = *apiArgs.pSrcRegion;
|
||||
}
|
||||
@@ -398,9 +433,9 @@ struct Closure<CaptureApi::zeCommandListAppendImageCopyRegion> {
|
||||
ze_image_region_t srcRegion;
|
||||
} indirectArgs;
|
||||
|
||||
Closure(const ApiArgs &apiArgs) : apiArgs(apiArgs), indirectArgs(apiArgs) {}
|
||||
Closure(const ApiArgs &apiArgs, ClosureExternalStorage &externalStorage) : apiArgs(apiArgs), indirectArgs(apiArgs, externalStorage) {}
|
||||
|
||||
ze_result_t instantiateTo(CommandList &executionTarget) const;
|
||||
ze_result_t instantiateTo(CommandList &executionTarget, ClosureExternalStorage &externalStorage) const;
|
||||
};
|
||||
|
||||
template <>
|
||||
@@ -418,15 +453,15 @@ struct Closure<CaptureApi::zeCommandListAppendImageCopyToMemory> {
|
||||
} apiArgs;
|
||||
|
||||
struct IndirectArgs : IndirectArgsWithWaitEvents {
|
||||
IndirectArgs(const Closure::ApiArgs &apiArgs) : IndirectArgsWithWaitEvents(apiArgs) {
|
||||
IndirectArgs(const Closure::ApiArgs &apiArgs, ClosureExternalStorage &externalStorage) : IndirectArgsWithWaitEvents(apiArgs, externalStorage) {
|
||||
srcRegion = *apiArgs.pSrcRegion;
|
||||
}
|
||||
ze_image_region_t srcRegion;
|
||||
} indirectArgs;
|
||||
|
||||
Closure(const ApiArgs &apiArgs) : apiArgs(apiArgs), indirectArgs(apiArgs) {}
|
||||
Closure(const ApiArgs &apiArgs, ClosureExternalStorage &externalStorage) : apiArgs(apiArgs), indirectArgs(apiArgs, externalStorage) {}
|
||||
|
||||
ze_result_t instantiateTo(CommandList &executionTarget) const;
|
||||
ze_result_t instantiateTo(CommandList &executionTarget, ClosureExternalStorage &externalStorage) const;
|
||||
};
|
||||
|
||||
template <>
|
||||
@@ -444,15 +479,15 @@ struct Closure<CaptureApi::zeCommandListAppendImageCopyFromMemory> {
|
||||
} apiArgs;
|
||||
|
||||
struct IndirectArgs : IndirectArgsWithWaitEvents {
|
||||
IndirectArgs(const Closure::ApiArgs &apiArgs) : IndirectArgsWithWaitEvents(apiArgs) {
|
||||
IndirectArgs(const Closure::ApiArgs &apiArgs, ClosureExternalStorage &externalStorage) : IndirectArgsWithWaitEvents(apiArgs, externalStorage) {
|
||||
dstRegion = *apiArgs.pDstRegion;
|
||||
}
|
||||
ze_image_region_t dstRegion;
|
||||
} indirectArgs;
|
||||
|
||||
Closure(const ApiArgs &apiArgs) : apiArgs(apiArgs), indirectArgs(apiArgs) {}
|
||||
Closure(const ApiArgs &apiArgs, ClosureExternalStorage &externalStorage) : apiArgs(apiArgs), indirectArgs(apiArgs, externalStorage) {}
|
||||
|
||||
ze_result_t instantiateTo(CommandList &executionTarget) const;
|
||||
ze_result_t instantiateTo(CommandList &executionTarget, ClosureExternalStorage &externalStorage) const;
|
||||
};
|
||||
|
||||
template <>
|
||||
@@ -468,9 +503,9 @@ struct Closure<CaptureApi::zeCommandListAppendMemoryPrefetch> {
|
||||
using IndirectArgs = EmptyIndirectArgs;
|
||||
IndirectArgs indirectArgs;
|
||||
|
||||
Closure(const ApiArgs &apiArgs) : apiArgs(apiArgs), indirectArgs(apiArgs) {}
|
||||
Closure(const ApiArgs &apiArgs, ClosureExternalStorage &externalStorage) : apiArgs(apiArgs), indirectArgs(apiArgs, externalStorage) {}
|
||||
|
||||
ze_result_t instantiateTo(CommandList &executionTarget) const;
|
||||
ze_result_t instantiateTo(CommandList &executionTarget, ClosureExternalStorage &externalStorage) const;
|
||||
};
|
||||
|
||||
template <>
|
||||
@@ -488,9 +523,9 @@ struct Closure<CaptureApi::zeCommandListAppendMemAdvise> {
|
||||
using IndirectArgs = EmptyIndirectArgs;
|
||||
IndirectArgs indirectArgs;
|
||||
|
||||
Closure(const ApiArgs &apiArgs) : apiArgs(apiArgs), indirectArgs(apiArgs) {}
|
||||
Closure(const ApiArgs &apiArgs, ClosureExternalStorage &externalStorage) : apiArgs(apiArgs), indirectArgs(apiArgs, externalStorage) {}
|
||||
|
||||
ze_result_t instantiateTo(CommandList &executionTarget) const;
|
||||
ze_result_t instantiateTo(CommandList &executionTarget, ClosureExternalStorage &externalStorage) const;
|
||||
};
|
||||
|
||||
template <>
|
||||
@@ -505,9 +540,9 @@ struct Closure<CaptureApi::zeCommandListAppendSignalEvent> {
|
||||
using IndirectArgs = EmptyIndirectArgs;
|
||||
IndirectArgs indirectArgs;
|
||||
|
||||
Closure(const ApiArgs &apiArgs) : apiArgs(apiArgs), indirectArgs(apiArgs) {}
|
||||
Closure(const ApiArgs &apiArgs, ClosureExternalStorage &externalStorage) : apiArgs(apiArgs), indirectArgs(apiArgs, externalStorage) {}
|
||||
|
||||
ze_result_t instantiateTo(CommandList &executionTarget) const;
|
||||
ze_result_t instantiateTo(CommandList &executionTarget, ClosureExternalStorage &externalStorage) const;
|
||||
};
|
||||
|
||||
template <>
|
||||
@@ -522,9 +557,9 @@ struct Closure<CaptureApi::zeCommandListAppendEventReset> {
|
||||
using IndirectArgs = EmptyIndirectArgs;
|
||||
IndirectArgs indirectArgs;
|
||||
|
||||
Closure(const ApiArgs &apiArgs) : apiArgs(apiArgs), indirectArgs(apiArgs) {}
|
||||
Closure(const ApiArgs &apiArgs, ClosureExternalStorage &externalStorage) : apiArgs(apiArgs), indirectArgs(apiArgs, externalStorage) {}
|
||||
|
||||
ze_result_t instantiateTo(CommandList &executionTarget) const;
|
||||
ze_result_t instantiateTo(CommandList &executionTarget, ClosureExternalStorage &externalStorage) const;
|
||||
};
|
||||
|
||||
template <>
|
||||
@@ -543,7 +578,7 @@ struct Closure<CaptureApi::zeCommandListAppendQueryKernelTimestamps> {
|
||||
} apiArgs;
|
||||
|
||||
struct IndirectArgs : IndirectArgsWithWaitEvents {
|
||||
IndirectArgs(const Closure::ApiArgs &apiArgs) : IndirectArgsWithWaitEvents(apiArgs) {
|
||||
IndirectArgs(const Closure::ApiArgs &apiArgs, ClosureExternalStorage &externalStorage) : IndirectArgsWithWaitEvents(apiArgs, externalStorage) {
|
||||
events.resize(apiArgs.numEvents);
|
||||
offsets.resize(apiArgs.numEvents);
|
||||
std::copy_n(apiArgs.phEvents, apiArgs.numEvents, events.begin());
|
||||
@@ -556,9 +591,9 @@ struct Closure<CaptureApi::zeCommandListAppendQueryKernelTimestamps> {
|
||||
StackVec<size_t, 1> offsets;
|
||||
} indirectArgs;
|
||||
|
||||
Closure(const ApiArgs &apiArgs) : apiArgs(apiArgs), indirectArgs(apiArgs) {}
|
||||
Closure(const ApiArgs &apiArgs, ClosureExternalStorage &externalStorage) : apiArgs(apiArgs), indirectArgs(apiArgs, externalStorage) {}
|
||||
|
||||
ze_result_t instantiateTo(CommandList &executionTarget) const;
|
||||
ze_result_t instantiateTo(CommandList &executionTarget, ClosureExternalStorage &externalStorage) const;
|
||||
};
|
||||
|
||||
template <>
|
||||
@@ -576,7 +611,7 @@ struct Closure<CaptureApi::zeCommandListAppendSignalExternalSemaphoreExt> {
|
||||
} apiArgs;
|
||||
|
||||
struct IndirectArgs : IndirectArgsWithWaitEvents {
|
||||
IndirectArgs(const Closure::ApiArgs &apiArgs) : IndirectArgsWithWaitEvents(apiArgs) {
|
||||
IndirectArgs(const Closure::ApiArgs &apiArgs, ClosureExternalStorage &externalStorage) : IndirectArgsWithWaitEvents(apiArgs, externalStorage) {
|
||||
semaphores.resize(apiArgs.numSemaphores);
|
||||
std::copy_n(apiArgs.phSemaphores, apiArgs.numSemaphores, semaphores.begin());
|
||||
signalParams = *apiArgs.signalParams;
|
||||
@@ -585,9 +620,9 @@ struct Closure<CaptureApi::zeCommandListAppendSignalExternalSemaphoreExt> {
|
||||
ze_external_semaphore_signal_params_ext_t signalParams;
|
||||
} indirectArgs;
|
||||
|
||||
Closure(const ApiArgs &apiArgs) : apiArgs(apiArgs), indirectArgs(apiArgs) {}
|
||||
Closure(const ApiArgs &apiArgs, ClosureExternalStorage &externalStorage) : apiArgs(apiArgs), indirectArgs(apiArgs, externalStorage) {}
|
||||
|
||||
ze_result_t instantiateTo(CommandList &executionTarget) const;
|
||||
ze_result_t instantiateTo(CommandList &executionTarget, ClosureExternalStorage &externalStorage) const;
|
||||
};
|
||||
|
||||
template <>
|
||||
@@ -605,7 +640,7 @@ struct Closure<CaptureApi::zeCommandListAppendWaitExternalSemaphoreExt> {
|
||||
} apiArgs;
|
||||
|
||||
struct IndirectArgs : IndirectArgsWithWaitEvents {
|
||||
IndirectArgs(const Closure::ApiArgs &apiArgs) : IndirectArgsWithWaitEvents(apiArgs) {
|
||||
IndirectArgs(const Closure::ApiArgs &apiArgs, ClosureExternalStorage &externalStorage) : IndirectArgsWithWaitEvents(apiArgs, externalStorage) {
|
||||
semaphores.resize(apiArgs.numSemaphores);
|
||||
std::copy_n(apiArgs.phSemaphores, apiArgs.numSemaphores, semaphores.begin());
|
||||
waitParams = *apiArgs.waitParams;
|
||||
@@ -614,9 +649,9 @@ struct Closure<CaptureApi::zeCommandListAppendWaitExternalSemaphoreExt> {
|
||||
ze_external_semaphore_wait_params_ext_t waitParams;
|
||||
} indirectArgs;
|
||||
|
||||
Closure(const ApiArgs &apiArgs) : apiArgs(apiArgs), indirectArgs(apiArgs) {}
|
||||
Closure(const ApiArgs &apiArgs, ClosureExternalStorage &externalStorage) : apiArgs(apiArgs), indirectArgs(apiArgs, externalStorage) {}
|
||||
|
||||
ze_result_t instantiateTo(CommandList &executionTarget) const;
|
||||
ze_result_t instantiateTo(CommandList &executionTarget, ClosureExternalStorage &externalStorage) const;
|
||||
};
|
||||
|
||||
template <>
|
||||
@@ -636,15 +671,15 @@ struct Closure<CaptureApi::zeCommandListAppendImageCopyToMemoryExt> {
|
||||
} apiArgs;
|
||||
|
||||
struct IndirectArgs : IndirectArgsWithWaitEvents {
|
||||
IndirectArgs(const Closure::ApiArgs &apiArgs) : IndirectArgsWithWaitEvents(apiArgs) {
|
||||
IndirectArgs(const Closure::ApiArgs &apiArgs, ClosureExternalStorage &externalStorage) : IndirectArgsWithWaitEvents(apiArgs, externalStorage) {
|
||||
srcRegion = *apiArgs.pSrcRegion;
|
||||
}
|
||||
ze_image_region_t srcRegion;
|
||||
} indirectArgs;
|
||||
|
||||
Closure(const ApiArgs &apiArgs) : apiArgs(apiArgs), indirectArgs(apiArgs) {}
|
||||
Closure(const ApiArgs &apiArgs, ClosureExternalStorage &externalStorage) : apiArgs(apiArgs), indirectArgs(apiArgs, externalStorage) {}
|
||||
|
||||
ze_result_t instantiateTo(CommandList &executionTarget) const;
|
||||
ze_result_t instantiateTo(CommandList &executionTarget, ClosureExternalStorage &externalStorage) const;
|
||||
};
|
||||
|
||||
template <>
|
||||
@@ -664,15 +699,15 @@ struct Closure<CaptureApi::zeCommandListAppendImageCopyFromMemoryExt> {
|
||||
} apiArgs;
|
||||
|
||||
struct IndirectArgs : IndirectArgsWithWaitEvents {
|
||||
IndirectArgs(const Closure::ApiArgs &apiArgs) : IndirectArgsWithWaitEvents(apiArgs) {
|
||||
IndirectArgs(const Closure::ApiArgs &apiArgs, ClosureExternalStorage &externalStorage) : IndirectArgsWithWaitEvents(apiArgs, externalStorage) {
|
||||
dstRegion = *apiArgs.pDstRegion;
|
||||
}
|
||||
ze_image_region_t dstRegion;
|
||||
} indirectArgs;
|
||||
|
||||
Closure(const ApiArgs &apiArgs) : apiArgs(apiArgs), indirectArgs(apiArgs) {}
|
||||
Closure(const ApiArgs &apiArgs, ClosureExternalStorage &externalStorage) : apiArgs(apiArgs), indirectArgs(apiArgs, externalStorage) {}
|
||||
|
||||
ze_result_t instantiateTo(CommandList &executionTarget) const;
|
||||
ze_result_t instantiateTo(CommandList &executionTarget, ClosureExternalStorage &externalStorage) const;
|
||||
};
|
||||
|
||||
template <>
|
||||
@@ -688,20 +723,15 @@ struct Closure<CaptureApi::zeCommandListAppendLaunchKernel> {
|
||||
ze_event_handle_t *phWaitEvents;
|
||||
} apiArgs;
|
||||
|
||||
struct IndirectArgs {
|
||||
struct IndirectArgs : IndirectArgsWithWaitEvents {
|
||||
IndirectArgs(const Closure::ApiArgs &apiArgs, ClosureExternalStorage &externalStorage);
|
||||
ze_group_count_t launchKernelArgs;
|
||||
KernelMutableState kernelState;
|
||||
StackVec<ze_event_handle_t, 8> waitEvents;
|
||||
ClosureExternalStorage::KernelStateId kernelState = ClosureExternalStorage::invalidKernelStateId;
|
||||
} indirectArgs;
|
||||
|
||||
Closure(const ApiArgs &apiArgs);
|
||||
Closure(const Closure &) = delete;
|
||||
Closure(Closure &&rhs) = default;
|
||||
Closure &operator=(const Closure &) = delete;
|
||||
Closure &operator=(Closure &&) = delete;
|
||||
~Closure() = default;
|
||||
Closure(const ApiArgs &apiArgs, ClosureExternalStorage &externalStorage) : apiArgs(apiArgs), indirectArgs(apiArgs, externalStorage) {}
|
||||
|
||||
ze_result_t instantiateTo(CommandList &executionTarget) const;
|
||||
ze_result_t instantiateTo(CommandList &executionTarget, ClosureExternalStorage &externalStorage) const;
|
||||
};
|
||||
|
||||
} // namespace L0
|
||||
|
||||
@@ -89,76 +89,76 @@ auto getOptionalData(ContainerT &container) {
|
||||
return container.empty() ? nullptr : container.data();
|
||||
}
|
||||
|
||||
ze_result_t Closure<CaptureApi::zeCommandListAppendMemoryCopy>::instantiateTo(L0::CommandList &executionTarget) const {
|
||||
return zeCommandListAppendMemoryCopy(&executionTarget, apiArgs.dstptr, apiArgs.srcptr, apiArgs.size, apiArgs.hSignalEvent, apiArgs.numWaitEvents, const_cast<ze_event_handle_t *>(getOptionalData(indirectArgs.waitEvents)));
|
||||
ze_result_t Closure<CaptureApi::zeCommandListAppendMemoryCopy>::instantiateTo(L0::CommandList &executionTarget, ClosureExternalStorage &externalStorage) const {
|
||||
return zeCommandListAppendMemoryCopy(&executionTarget, apiArgs.dstptr, apiArgs.srcptr, apiArgs.size, apiArgs.hSignalEvent, apiArgs.numWaitEvents, externalStorage.getEventsWaitList(indirectArgs.waitEvents));
|
||||
}
|
||||
|
||||
ze_result_t Closure<CaptureApi::zeCommandListAppendBarrier>::instantiateTo(L0::CommandList &executionTarget) const {
|
||||
return zeCommandListAppendBarrier(&executionTarget, apiArgs.hSignalEvent, apiArgs.numWaitEvents, const_cast<ze_event_handle_t *>(getOptionalData(indirectArgs.waitEvents)));
|
||||
ze_result_t Closure<CaptureApi::zeCommandListAppendBarrier>::instantiateTo(L0::CommandList &executionTarget, ClosureExternalStorage &externalStorage) const {
|
||||
return zeCommandListAppendBarrier(&executionTarget, apiArgs.hSignalEvent, apiArgs.numWaitEvents, externalStorage.getEventsWaitList(indirectArgs.waitEvents));
|
||||
}
|
||||
|
||||
ze_result_t Closure<CaptureApi::zeCommandListAppendWaitOnEvents>::instantiateTo(L0::CommandList &executionTarget) const {
|
||||
return zeCommandListAppendWaitOnEvents(&executionTarget, apiArgs.numEvents, const_cast<ze_event_handle_t *>(getOptionalData(indirectArgs.waitEvents)));
|
||||
ze_result_t Closure<CaptureApi::zeCommandListAppendWaitOnEvents>::instantiateTo(L0::CommandList &executionTarget, ClosureExternalStorage &externalStorage) const {
|
||||
return zeCommandListAppendWaitOnEvents(&executionTarget, apiArgs.numEvents, externalStorage.getEventsWaitList(indirectArgs.waitEvents));
|
||||
}
|
||||
|
||||
ze_result_t Closure<CaptureApi::zeCommandListAppendWriteGlobalTimestamp>::instantiateTo(L0::CommandList &executionTarget) const {
|
||||
return zeCommandListAppendWriteGlobalTimestamp(&executionTarget, apiArgs.dstptr, apiArgs.hSignalEvent, apiArgs.numWaitEvents, const_cast<ze_event_handle_t *>(getOptionalData(indirectArgs.waitEvents)));
|
||||
ze_result_t Closure<CaptureApi::zeCommandListAppendWriteGlobalTimestamp>::instantiateTo(L0::CommandList &executionTarget, ClosureExternalStorage &externalStorage) const {
|
||||
return zeCommandListAppendWriteGlobalTimestamp(&executionTarget, apiArgs.dstptr, apiArgs.hSignalEvent, apiArgs.numWaitEvents, externalStorage.getEventsWaitList(indirectArgs.waitEvents));
|
||||
}
|
||||
|
||||
ze_result_t Closure<CaptureApi::zeCommandListAppendMemoryRangesBarrier>::instantiateTo(L0::CommandList &executionTarget) const {
|
||||
ze_result_t Closure<CaptureApi::zeCommandListAppendMemoryRangesBarrier>::instantiateTo(L0::CommandList &executionTarget, ClosureExternalStorage &externalStorage) const {
|
||||
return zeCommandListAppendMemoryRangesBarrier(&executionTarget, apiArgs.numRanges, getOptionalData(indirectArgs.rangeSizes), const_cast<const void **>(getOptionalData(indirectArgs.ranges)),
|
||||
apiArgs.hSignalEvent, apiArgs.numWaitEvents, const_cast<ze_event_handle_t *>(getOptionalData(indirectArgs.waitEvents)));
|
||||
apiArgs.hSignalEvent, apiArgs.numWaitEvents, externalStorage.getEventsWaitList(indirectArgs.waitEvents));
|
||||
}
|
||||
|
||||
ze_result_t Closure<CaptureApi::zeCommandListAppendMemoryFill>::instantiateTo(L0::CommandList &executionTarget) const {
|
||||
ze_result_t Closure<CaptureApi::zeCommandListAppendMemoryFill>::instantiateTo(L0::CommandList &executionTarget, ClosureExternalStorage &externalStorage) const {
|
||||
return zeCommandListAppendMemoryFill(&executionTarget, apiArgs.ptr, getOptionalData(indirectArgs.pattern), apiArgs.patternSize, apiArgs.size,
|
||||
apiArgs.hSignalEvent, apiArgs.numWaitEvents, const_cast<ze_event_handle_t *>(getOptionalData(indirectArgs.waitEvents)));
|
||||
apiArgs.hSignalEvent, apiArgs.numWaitEvents, externalStorage.getEventsWaitList(indirectArgs.waitEvents));
|
||||
}
|
||||
|
||||
ze_result_t Closure<CaptureApi::zeCommandListAppendMemoryCopyRegion>::instantiateTo(L0::CommandList &executionTarget) const {
|
||||
ze_result_t Closure<CaptureApi::zeCommandListAppendMemoryCopyRegion>::instantiateTo(L0::CommandList &executionTarget, ClosureExternalStorage &externalStorage) const {
|
||||
return zeCommandListAppendMemoryCopyRegion(&executionTarget, apiArgs.dstptr, &indirectArgs.dstRegion, apiArgs.dstPitch, apiArgs.dstSlicePitch,
|
||||
apiArgs.srcptr, &indirectArgs.srcRegion, apiArgs.srcPitch, apiArgs.srcSlicePitch,
|
||||
apiArgs.hSignalEvent, apiArgs.numWaitEvents, const_cast<ze_event_handle_t *>(getOptionalData(indirectArgs.waitEvents)));
|
||||
apiArgs.hSignalEvent, apiArgs.numWaitEvents, externalStorage.getEventsWaitList(indirectArgs.waitEvents));
|
||||
}
|
||||
|
||||
ze_result_t Closure<CaptureApi::zeCommandListAppendMemoryCopyFromContext>::instantiateTo(L0::CommandList &executionTarget) const {
|
||||
ze_result_t Closure<CaptureApi::zeCommandListAppendMemoryCopyFromContext>::instantiateTo(L0::CommandList &executionTarget, ClosureExternalStorage &externalStorage) const {
|
||||
return zeCommandListAppendMemoryCopyFromContext(&executionTarget, apiArgs.dstptr, apiArgs.hContextSrc, apiArgs.srcptr, apiArgs.size,
|
||||
apiArgs.hSignalEvent, apiArgs.numWaitEvents, const_cast<ze_event_handle_t *>(getOptionalData(indirectArgs.waitEvents)));
|
||||
apiArgs.hSignalEvent, apiArgs.numWaitEvents, externalStorage.getEventsWaitList(indirectArgs.waitEvents));
|
||||
}
|
||||
|
||||
ze_result_t Closure<CaptureApi::zeCommandListAppendImageCopy>::instantiateTo(L0::CommandList &executionTarget) const {
|
||||
ze_result_t Closure<CaptureApi::zeCommandListAppendImageCopy>::instantiateTo(L0::CommandList &executionTarget, ClosureExternalStorage &externalStorage) const {
|
||||
return zeCommandListAppendImageCopy(&executionTarget, apiArgs.hDstImage, apiArgs.hSrcImage,
|
||||
apiArgs.hSignalEvent, apiArgs.numWaitEvents, const_cast<ze_event_handle_t *>(getOptionalData(indirectArgs.waitEvents)));
|
||||
apiArgs.hSignalEvent, apiArgs.numWaitEvents, externalStorage.getEventsWaitList(indirectArgs.waitEvents));
|
||||
}
|
||||
|
||||
ze_result_t Closure<CaptureApi::zeCommandListAppendImageCopyRegion>::instantiateTo(L0::CommandList &executionTarget) const {
|
||||
ze_result_t Closure<CaptureApi::zeCommandListAppendImageCopyRegion>::instantiateTo(L0::CommandList &executionTarget, ClosureExternalStorage &externalStorage) const {
|
||||
return zeCommandListAppendImageCopyRegion(&executionTarget, apiArgs.hDstImage, apiArgs.hSrcImage, &indirectArgs.dstRegion, &indirectArgs.srcRegion,
|
||||
apiArgs.hSignalEvent, apiArgs.numWaitEvents, const_cast<ze_event_handle_t *>(getOptionalData(indirectArgs.waitEvents)));
|
||||
apiArgs.hSignalEvent, apiArgs.numWaitEvents, externalStorage.getEventsWaitList(indirectArgs.waitEvents));
|
||||
}
|
||||
|
||||
ze_result_t Closure<CaptureApi::zeCommandListAppendImageCopyToMemory>::instantiateTo(L0::CommandList &executionTarget) const {
|
||||
ze_result_t Closure<CaptureApi::zeCommandListAppendImageCopyToMemory>::instantiateTo(L0::CommandList &executionTarget, ClosureExternalStorage &externalStorage) const {
|
||||
return zeCommandListAppendImageCopyToMemory(&executionTarget,
|
||||
apiArgs.dstptr,
|
||||
apiArgs.hSrcImage,
|
||||
&indirectArgs.srcRegion,
|
||||
apiArgs.hSignalEvent, apiArgs.numWaitEvents, const_cast<ze_event_handle_t *>(getOptionalData(indirectArgs.waitEvents)));
|
||||
apiArgs.hSignalEvent, apiArgs.numWaitEvents, externalStorage.getEventsWaitList(indirectArgs.waitEvents));
|
||||
}
|
||||
|
||||
ze_result_t Closure<CaptureApi::zeCommandListAppendImageCopyFromMemory>::instantiateTo(L0::CommandList &executionTarget) const {
|
||||
ze_result_t Closure<CaptureApi::zeCommandListAppendImageCopyFromMemory>::instantiateTo(L0::CommandList &executionTarget, ClosureExternalStorage &externalStorage) const {
|
||||
return zeCommandListAppendImageCopyFromMemory(&executionTarget,
|
||||
apiArgs.hDstImage,
|
||||
apiArgs.srcptr,
|
||||
&indirectArgs.dstRegion,
|
||||
apiArgs.hSignalEvent, apiArgs.numWaitEvents, const_cast<ze_event_handle_t *>(getOptionalData(indirectArgs.waitEvents)));
|
||||
apiArgs.hSignalEvent, apiArgs.numWaitEvents, externalStorage.getEventsWaitList(indirectArgs.waitEvents));
|
||||
}
|
||||
|
||||
ze_result_t Closure<CaptureApi::zeCommandListAppendMemoryPrefetch>::instantiateTo(L0::CommandList &executionTarget) const {
|
||||
ze_result_t Closure<CaptureApi::zeCommandListAppendMemoryPrefetch>::instantiateTo(L0::CommandList &executionTarget, ClosureExternalStorage &externalStorage) const {
|
||||
return zeCommandListAppendMemoryPrefetch(&executionTarget,
|
||||
apiArgs.ptr,
|
||||
apiArgs.size);
|
||||
}
|
||||
|
||||
ze_result_t Closure<CaptureApi::zeCommandListAppendMemAdvise>::instantiateTo(L0::CommandList &executionTarget) const {
|
||||
ze_result_t Closure<CaptureApi::zeCommandListAppendMemAdvise>::instantiateTo(L0::CommandList &executionTarget, ClosureExternalStorage &externalStorage) const {
|
||||
return zeCommandListAppendMemAdvise(&executionTarget,
|
||||
apiArgs.hDevice,
|
||||
apiArgs.ptr,
|
||||
@@ -166,78 +166,73 @@ ze_result_t Closure<CaptureApi::zeCommandListAppendMemAdvise>::instantiateTo(L0:
|
||||
apiArgs.advice);
|
||||
}
|
||||
|
||||
ze_result_t Closure<CaptureApi::zeCommandListAppendSignalEvent>::instantiateTo(L0::CommandList &executionTarget) const {
|
||||
ze_result_t Closure<CaptureApi::zeCommandListAppendSignalEvent>::instantiateTo(L0::CommandList &executionTarget, ClosureExternalStorage &externalStorage) const {
|
||||
return zeCommandListAppendSignalEvent(&executionTarget,
|
||||
apiArgs.hEvent);
|
||||
}
|
||||
|
||||
ze_result_t Closure<CaptureApi::zeCommandListAppendEventReset>::instantiateTo(L0::CommandList &executionTarget) const {
|
||||
ze_result_t Closure<CaptureApi::zeCommandListAppendEventReset>::instantiateTo(L0::CommandList &executionTarget, ClosureExternalStorage &externalStorage) const {
|
||||
return zeCommandListAppendEventReset(&executionTarget, apiArgs.hEvent);
|
||||
}
|
||||
|
||||
ze_result_t Closure<CaptureApi::zeCommandListAppendQueryKernelTimestamps>::instantiateTo(L0::CommandList &executionTarget) const {
|
||||
ze_result_t Closure<CaptureApi::zeCommandListAppendQueryKernelTimestamps>::instantiateTo(L0::CommandList &executionTarget, ClosureExternalStorage &externalStorage) const {
|
||||
return zeCommandListAppendQueryKernelTimestamps(&executionTarget,
|
||||
apiArgs.numEvents,
|
||||
const_cast<ze_event_handle_t *>(getOptionalData(indirectArgs.events)),
|
||||
apiArgs.dstptr,
|
||||
getOptionalData(indirectArgs.offsets),
|
||||
apiArgs.hSignalEvent, apiArgs.numWaitEvents, const_cast<ze_event_handle_t *>(getOptionalData(indirectArgs.waitEvents)));
|
||||
apiArgs.hSignalEvent, apiArgs.numWaitEvents, externalStorage.getEventsWaitList(indirectArgs.waitEvents));
|
||||
}
|
||||
|
||||
ze_result_t Closure<CaptureApi::zeCommandListAppendSignalExternalSemaphoreExt>::instantiateTo(L0::CommandList &executionTarget) const {
|
||||
ze_result_t Closure<CaptureApi::zeCommandListAppendSignalExternalSemaphoreExt>::instantiateTo(L0::CommandList &executionTarget, ClosureExternalStorage &externalStorage) const {
|
||||
return zeCommandListAppendSignalExternalSemaphoreExt(&executionTarget,
|
||||
apiArgs.numSemaphores,
|
||||
const_cast<ze_external_semaphore_ext_handle_t *>(getOptionalData(indirectArgs.semaphores)),
|
||||
const_cast<ze_external_semaphore_signal_params_ext_t *>(&indirectArgs.signalParams),
|
||||
apiArgs.hSignalEvent, apiArgs.numWaitEvents, const_cast<ze_event_handle_t *>(getOptionalData(indirectArgs.waitEvents)));
|
||||
apiArgs.hSignalEvent, apiArgs.numWaitEvents, externalStorage.getEventsWaitList(indirectArgs.waitEvents));
|
||||
}
|
||||
|
||||
ze_result_t Closure<CaptureApi::zeCommandListAppendWaitExternalSemaphoreExt>::instantiateTo(L0::CommandList &executionTarget) const {
|
||||
ze_result_t Closure<CaptureApi::zeCommandListAppendWaitExternalSemaphoreExt>::instantiateTo(L0::CommandList &executionTarget, ClosureExternalStorage &externalStorage) const {
|
||||
return zeCommandListAppendWaitExternalSemaphoreExt(&executionTarget,
|
||||
apiArgs.numSemaphores,
|
||||
const_cast<ze_external_semaphore_ext_handle_t *>(getOptionalData(indirectArgs.semaphores)),
|
||||
const_cast<ze_external_semaphore_wait_params_ext_t *>(&indirectArgs.waitParams),
|
||||
apiArgs.hSignalEvent, apiArgs.numWaitEvents, const_cast<ze_event_handle_t *>(getOptionalData(indirectArgs.waitEvents)));
|
||||
apiArgs.hSignalEvent, apiArgs.numWaitEvents, externalStorage.getEventsWaitList(indirectArgs.waitEvents));
|
||||
}
|
||||
|
||||
ze_result_t Closure<CaptureApi::zeCommandListAppendImageCopyToMemoryExt>::instantiateTo(L0::CommandList &executionTarget) const {
|
||||
ze_result_t Closure<CaptureApi::zeCommandListAppendImageCopyToMemoryExt>::instantiateTo(L0::CommandList &executionTarget, ClosureExternalStorage &externalStorage) const {
|
||||
return zeCommandListAppendImageCopyToMemoryExt(&executionTarget,
|
||||
apiArgs.dstptr,
|
||||
apiArgs.hSrcImage,
|
||||
&indirectArgs.srcRegion,
|
||||
apiArgs.destRowPitch,
|
||||
apiArgs.destSlicePitch,
|
||||
apiArgs.hSignalEvent, apiArgs.numWaitEvents, const_cast<ze_event_handle_t *>(getOptionalData(indirectArgs.waitEvents)));
|
||||
apiArgs.hSignalEvent, apiArgs.numWaitEvents, externalStorage.getEventsWaitList(indirectArgs.waitEvents));
|
||||
}
|
||||
|
||||
ze_result_t Closure<CaptureApi::zeCommandListAppendImageCopyFromMemoryExt>::instantiateTo(L0::CommandList &executionTarget) const {
|
||||
ze_result_t Closure<CaptureApi::zeCommandListAppendImageCopyFromMemoryExt>::instantiateTo(L0::CommandList &executionTarget, ClosureExternalStorage &externalStorage) const {
|
||||
return zeCommandListAppendImageCopyFromMemoryExt(&executionTarget,
|
||||
apiArgs.hDstImage,
|
||||
apiArgs.srcptr,
|
||||
&indirectArgs.dstRegion,
|
||||
apiArgs.srcRowPitch,
|
||||
apiArgs.srcSlicePitch,
|
||||
apiArgs.hSignalEvent, apiArgs.numWaitEvents, const_cast<ze_event_handle_t *>(getOptionalData(indirectArgs.waitEvents)));
|
||||
apiArgs.hSignalEvent, apiArgs.numWaitEvents, externalStorage.getEventsWaitList(indirectArgs.waitEvents));
|
||||
}
|
||||
|
||||
Closure<CaptureApi::zeCommandListAppendLaunchKernel>::Closure(const ApiArgs &apiArgs) : apiArgs{apiArgs} {
|
||||
this->apiArgs.launchKernelArgs = nullptr;
|
||||
this->apiArgs.phWaitEvents = nullptr;
|
||||
this->indirectArgs.launchKernelArgs = *apiArgs.launchKernelArgs;
|
||||
Closure<CaptureApi::zeCommandListAppendLaunchKernel>::IndirectArgs::IndirectArgs(const ApiArgs &apiArgs, ClosureExternalStorage &externalStorage) : IndirectArgsWithWaitEvents(apiArgs, externalStorage) {
|
||||
this->launchKernelArgs = *apiArgs.launchKernelArgs;
|
||||
|
||||
auto kernel = static_cast<KernelImp *>(Kernel::fromHandle(apiArgs.kernelHandle));
|
||||
this->indirectArgs.kernelState = kernel->getMutableState();
|
||||
|
||||
this->indirectArgs.waitEvents.reserve(apiArgs.numWaitEvents);
|
||||
for (uint32_t i = 0; i < apiArgs.numWaitEvents; ++i) {
|
||||
this->indirectArgs.waitEvents.push_back(apiArgs.phWaitEvents[i]);
|
||||
}
|
||||
L0::KernelMutableState stateSnapshot;
|
||||
stateSnapshot = kernel->getMutableState();
|
||||
this->kernelState = externalStorage.registerKernelState(std::move(stateSnapshot));
|
||||
}
|
||||
|
||||
ze_result_t Closure<CaptureApi::zeCommandListAppendLaunchKernel>::instantiateTo(L0::CommandList &executionTarget) const {
|
||||
ze_result_t Closure<CaptureApi::zeCommandListAppendLaunchKernel>::instantiateTo(L0::CommandList &executionTarget, ClosureExternalStorage &externalStorage) const {
|
||||
auto kernel = static_cast<KernelImp *>(Kernel::fromHandle(apiArgs.kernelHandle));
|
||||
kernel->getMutableState() = this->indirectArgs.kernelState;
|
||||
return zeCommandListAppendLaunchKernel(&executionTarget, apiArgs.kernelHandle, &indirectArgs.launchKernelArgs, apiArgs.hSignalEvent, apiArgs.numWaitEvents, const_cast<ze_event_handle_t *>(getOptionalData(indirectArgs.waitEvents)));
|
||||
kernel->getMutableState() = *externalStorage.getKernelMutableState(this->indirectArgs.kernelState);
|
||||
return zeCommandListAppendLaunchKernel(&executionTarget, apiArgs.kernelHandle, &indirectArgs.launchKernelArgs, apiArgs.hSignalEvent, apiArgs.numWaitEvents, externalStorage.getEventsWaitList(indirectArgs.waitEvents));
|
||||
}
|
||||
|
||||
ExecutableGraph::~ExecutableGraph() = default;
|
||||
@@ -283,10 +278,10 @@ void ExecutableGraph::instantiateFrom(Graph &graph, const GraphInstatiateSetting
|
||||
switch (static_cast<CaptureApi>(cmd.index())) {
|
||||
default:
|
||||
break;
|
||||
#define RR_CAPTURED_API(X) \
|
||||
case CaptureApi::X: \
|
||||
std::get<static_cast<size_t>(CaptureApi::X)>(cmd).instantiateTo(*currCmdList); \
|
||||
DEBUG_BREAK_IF(err != ZE_RESULT_SUCCESS); \
|
||||
#define RR_CAPTURED_API(X) \
|
||||
case CaptureApi::X: \
|
||||
std::get<static_cast<size_t>(CaptureApi::X)>(cmd).instantiateTo(*currCmdList, graph.getExternalStorage()); \
|
||||
DEBUG_BREAK_IF(err != ZE_RESULT_SUCCESS); \
|
||||
break;
|
||||
RR_CAPTURED_APIS()
|
||||
#undef RR_CAPTURED_API
|
||||
|
||||
@@ -81,7 +81,7 @@ struct Graph : _ze_graph_handle_t {
|
||||
|
||||
using ApiArgsT = typename Closure<api>::ApiArgs;
|
||||
auto capturedArgs = ApiArgsT{apiArgs...};
|
||||
commands.push_back(CapturedCommand{Closure<api>(capturedArgs)});
|
||||
commands.push_back(CapturedCommand{Closure<api>(capturedArgs, externalStorage)});
|
||||
return ZE_RESULT_SUCCESS;
|
||||
}
|
||||
|
||||
@@ -145,10 +145,14 @@ struct Graph : _ze_graph_handle_t {
|
||||
void tryJoinOnNextCommand(L0::CommandList &childCmdList, L0::Event &joinEvent);
|
||||
void forkTo(L0::CommandList &childCmdList, Graph *&child, L0::Event &forkEvent);
|
||||
void registerSignallingEventFromPreviousCommand(L0::Event &ev);
|
||||
ClosureExternalStorage &getExternalStorage() {
|
||||
return externalStorage;
|
||||
}
|
||||
|
||||
protected:
|
||||
void unregisterSignallingEvents();
|
||||
|
||||
ClosureExternalStorage externalStorage;
|
||||
std::vector<CapturedCommand> commands;
|
||||
StackVec<Graph *, 16> subGraphs;
|
||||
|
||||
@@ -268,4 +272,9 @@ struct ExecutableGraph : _ze_executable_graph_handle_t {
|
||||
GraphSubmissionChain submissionChain;
|
||||
};
|
||||
|
||||
constexpr size_t maxVariantSize = 3 * 64;
|
||||
#define RR_CAPTURED_API(X) \
|
||||
static_assert(sizeof(Closure<CaptureApi::X>) < maxVariantSize, #X " is too big for common variant. Please export some of its state to ClosureExternalStorage");
|
||||
RR_CAPTURED_APIS()
|
||||
#undef RR_CAPTURED_API
|
||||
} // namespace L0
|
||||
Reference in New Issue
Block a user