feature: Adding basic record and replay mechanism

Related-To: NEO-15373

Signed-off-by: Chodor, Jaroslaw <jaroslaw.chodor@intel.com>
This commit is contained in:
Chodor, Jaroslaw
2025-07-01 14:43:15 +00:00
committed by Compute-Runtime-Automation
parent 699c55ddd5
commit 58228a36fe
14 changed files with 835 additions and 40 deletions

View File

@@ -7,6 +7,7 @@
target_sources(${L0_STATIC_LIB_NAME}
PRIVATE
${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt
${CMAKE_CURRENT_SOURCE_DIR}/graph.cpp
${CMAKE_CURRENT_SOURCE_DIR}/graph.h
)

View File

@@ -0,0 +1,130 @@
/*
* Copyright (C) 2025 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include "level_zero/experimental/source/graph/graph.h"
#include "level_zero/core/source/cmdlist/cmdlist.h"
#include "level_zero/core/source/context/context.h"
namespace L0 {
Graph::~Graph() {
for (auto *sg : subGraphs) {
if (false == sg->wasPreallocated()) {
delete sg;
}
}
}
void Graph::startCapturingFrom(L0::CommandList &captureSrc, bool isSubGraph) {
this->captureSrc = &captureSrc;
captureSrc.getDeviceHandle(&this->captureTargetDesc.hDevice);
this->captureTargetDesc.desc.stype = ZE_STRUCTURE_TYPE_COMMAND_LIST_DESC;
this->captureTargetDesc.desc.pNext = nullptr;
captureSrc.getOrdinal(&this->captureTargetDesc.desc.commandQueueGroupOrdinal);
if (isSubGraph) {
this->executionTarget = &captureSrc;
}
}
void Graph::stopCapturing() {
this->captureSrc = nullptr;
}
Closure<CaptureApi::zeCommandListAppendMemoryCopy>::Closure(const ApiArgs &apiArgs)
: apiArgs(apiArgs) {
this->indirectArgs.waitEvents.reserve(apiArgs.numWaitEvents);
for (uint32_t i = 0; i < apiArgs.numWaitEvents; ++i) {
this->indirectArgs.waitEvents.push_back(apiArgs.phWaitEvents[i]);
}
}
ze_result_t Closure<CaptureApi::zeCommandListAppendMemoryCopy>::instantiateTo(L0::CommandList &executionTarget) const {
return zeCommandListAppendMemoryCopy(&executionTarget, apiArgs.dstptr, apiArgs.srcptr, apiArgs.size, apiArgs.hSignalEvent, apiArgs.numWaitEvents, apiArgs.numWaitEvents ? const_cast<ze_event_handle_t *>(indirectArgs.waitEvents.data()) : nullptr);
}
Closure<CaptureApi::zeCommandListAppendBarrier>::Closure(const ApiArgs &apiArgs)
: apiArgs(apiArgs) {
this->indirectArgs.waitEvents.reserve(apiArgs.numWaitEvents);
for (uint32_t i = 0; i < apiArgs.numWaitEvents; ++i) {
this->indirectArgs.waitEvents.push_back(apiArgs.phWaitEvents[i]);
}
}
ze_result_t Closure<CaptureApi::zeCommandListAppendBarrier>::instantiateTo(L0::CommandList &executionTarget) const {
return zeCommandListAppendBarrier(&executionTarget, apiArgs.hSignalEvent, apiArgs.numWaitEvents, apiArgs.numWaitEvents ? const_cast<ze_event_handle_t *>(indirectArgs.waitEvents.data()) : nullptr);
}
ExecutableGraph::~ExecutableGraph() = default;
void ExecutableGraph::instantiateFrom(Graph &graph) {
this->src = &graph;
this->executionTarget = graph.getExecutionTarget();
if (graph.empty() == false) {
[[maybe_unused]] ze_result_t err = ZE_RESULT_SUCCESS;
ze_command_list_handle_t cmdListHandle = nullptr;
src->getContext()->createCommandList(src->getCaptureTargetDesc().hDevice, &src->getCaptureTargetDesc().desc, &cmdListHandle);
L0::CommandList *hwCommands = L0::CommandList::fromHandle(cmdListHandle);
UNRECOVERABLE_IF(nullptr == hwCommands);
this->hwCommands.reset(hwCommands);
for (const CapturedCommand &cmd : src->getCapturedCommands()) {
switch (static_cast<CaptureApi>(cmd.index())) {
default:
break;
#define RR_CAPTURED_API(X) \
case CaptureApi::X: \
std::get<static_cast<size_t>(CaptureApi::X)>(cmd).instantiateTo(*hwCommands); \
DEBUG_BREAK_IF(err != ZE_RESULT_SUCCESS); \
break;
RR_CAPTURED_APIS()
#undef RR_CAPTURED_API
}
}
hwCommands->close();
}
this->subGraphs.reserve(graph.getSubgraphs().size());
for (auto &srcSubgraph : graph.getSubgraphs()) {
auto execSubGraph = std::make_unique<ExecutableGraph>();
execSubGraph->instantiateFrom(*srcSubgraph);
this->subGraphs.push_back(std::move(execSubGraph));
}
}
ze_result_t ExecutableGraph::execute(L0::CommandList *executionTarget, void *pNext, ze_event_handle_t hSignalEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) {
if (nullptr == executionTarget) {
executionTarget = this->executionTarget;
}
UNRECOVERABLE_IF(nullptr == executionTarget);
if (this->empty()) {
if (numWaitEvents) {
executionTarget->appendWaitOnEvents(numWaitEvents, phWaitEvents, nullptr, false, true, true, false, false, false);
}
if (nullptr == hSignalEvent) {
return ZE_RESULT_SUCCESS;
}
executionTarget->appendSignalEvent(hSignalEvent, false);
} else {
auto commands = this->hwCommands.get();
ze_command_list_handle_t graphCmdList = commands;
auto res = executionTarget->appendCommandLists(1, &graphCmdList, hSignalEvent, numWaitEvents, phWaitEvents);
if (ZE_RESULT_SUCCESS != res) {
return res;
}
}
for (auto &subGraph : this->subGraphs) {
auto res = subGraph->execute(nullptr, pNext, nullptr, 0, nullptr);
if (ZE_RESULT_SUCCESS != res) {
return res;
}
}
return ZE_RESULT_SUCCESS;
}
} // namespace L0

View File

@@ -7,7 +7,16 @@
#pragma once
#include "level_zero/driver_experimental/zex_graph.h"
#include "shared/source/utilities/stackvec.h"
#include "level_zero/ze_api.h"
#include <memory>
#include <variant>
#include <vector>
typedef struct _ze_graph_handle_t *ze_graph_handle_t;
typedef struct _ze_executable_graph_handle_t *ze_executable_graph_handle_t;
struct _ze_graph_handle_t {
};
@@ -19,10 +28,118 @@ namespace L0 {
struct Context;
#define RR_CAPTURED_APIS() \
RR_CAPTURED_API(zeCommandListAppendWriteGlobalTimestamp) \
RR_CAPTURED_API(zeCommandListAppendBarrier) \
RR_CAPTURED_API(zeCommandListAppendMemoryRangesBarrier) \
RR_CAPTURED_API(zeCommandListAppendMemoryCopy) \
RR_CAPTURED_API(zeCommandListAppendMemoryFill) \
RR_CAPTURED_API(zeCommandListAppendMemoryCopyRegion) \
RR_CAPTURED_API(zeCommandListAppendMemoryCopyFromContext) \
RR_CAPTURED_API(zeCommandListAppendImageCopy) \
RR_CAPTURED_API(zeCommandListAppendImageCopyRegion) \
RR_CAPTURED_API(zeCommandListAppendImageCopyToMemory) \
RR_CAPTURED_API(zeCommandListAppendImageCopyFromMemory) \
RR_CAPTURED_API(zeCommandListAppendMemoryPrefetch) \
RR_CAPTURED_API(zeCommandListAppendMemAdvise) \
RR_CAPTURED_API(zeCommandListAppendSignalEvent) \
RR_CAPTURED_API(zeCommandListAppendWaitOnEvents) \
RR_CAPTURED_API(zeCommandListAppendEventReset) \
RR_CAPTURED_API(zeCommandListAppendQueryKernelTimestamps) \
RR_CAPTURED_API(zeCommandListAppendLaunchKernel) \
RR_CAPTURED_API(zeCommandListAppendLaunchCooperativeKernel) \
RR_CAPTURED_API(zeCommandListAppendLaunchKernelIndirect) \
RR_CAPTURED_API(zeCommandListAppendLaunchMultipleKernelsIndirect) \
RR_CAPTURED_API(zeCommandListAppendSignalExternalSemaphoreExt) \
RR_CAPTURED_API(zeCommandListAppendWaitExternalSemaphoreExt) \
RR_CAPTURED_API(zeCommandListAppendImageCopyToMemoryExt) \
RR_CAPTURED_API(zeCommandListAppendImageCopyFromMemoryExt)
enum class CaptureApi {
#define RR_CAPTURED_API(X) X,
RR_CAPTURED_APIS()
#undef RR_CAPTURED_API
};
struct CommandList;
template <CaptureApi api>
struct Closure {
inline static constexpr bool isSupported = false;
struct ApiArgs {
template <typename ArgsT>
ApiArgs(ArgsT...) {}
};
Closure(const ApiArgs &apiArgs) {}
ze_result_t instantiateTo(CommandList &executionTarget) const {
DEBUG_BREAK_IF(true);
return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE;
}
};
template <>
struct Closure<CaptureApi::zeCommandListAppendMemoryCopy> {
inline static constexpr bool isSupported = true;
struct ApiArgs {
ze_command_list_handle_t hCommandList;
void *dstptr;
const void *srcptr;
size_t size;
ze_event_handle_t hSignalEvent;
uint32_t numWaitEvents;
ze_event_handle_t *phWaitEvents;
} apiArgs;
struct IndirectArgs {
StackVec<ze_event_handle_t, 8> waitEvents;
} indirectArgs;
Closure(const ApiArgs &apiArgs);
ze_result_t instantiateTo(CommandList &executionTarget) const;
};
template <>
struct Closure<CaptureApi::zeCommandListAppendBarrier> {
inline static constexpr bool isSupported = true;
struct ApiArgs {
ze_command_list_handle_t hCommandList;
ze_event_handle_t hSignalEvent;
uint32_t numWaitEvents;
ze_event_handle_t *phWaitEvents;
} apiArgs;
struct IndirectArgs {
StackVec<ze_event_handle_t, 8> waitEvents;
} indirectArgs;
Closure(const ApiArgs &apiArgs);
ze_result_t instantiateTo(CommandList &executionTarget) const;
};
using ClosureVariants = std::variant<
#define RR_CAPTURED_API(X) Closure<CaptureApi::X>,
RR_CAPTURED_APIS()
#undef RR_CAPTURED_API
int>;
using CapturedCommand = ClosureVariants;
struct Graph : _ze_graph_handle_t {
Graph(L0::Context *ctx, bool preallocated) : ctx(ctx), preallocated(preallocated) {
commands.reserve(16);
}
~Graph();
Graph(const Graph &) = delete;
Graph &operator=(const Graph &) = delete;
static Graph *fromHandle(ze_graph_handle_t handle) {
return static_cast<Graph *>(handle);
}
@@ -31,18 +148,102 @@ struct Graph : _ze_graph_handle_t {
return preallocated;
}
void startCapturingFrom(L0::CommandList &captureSrc, bool isSubGraph);
void stopCapturing();
template <CaptureApi api, typename... TArgs>
ze_result_t capture(TArgs... apiArgs) {
if (false == Closure<api>::isSupported) {
return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE;
}
using ApirArgsT = typename Closure<api>::ApiArgs;
auto capturedArgs = ApirArgsT{apiArgs...};
commands.push_back(CapturedCommand{Closure<api>(capturedArgs)});
return ZE_RESULT_SUCCESS;
}
const std::vector<CapturedCommand> &getCapturedCommands() {
return commands;
}
const StackVec<Graph *, 16> &getSubgraphs() {
return subGraphs;
}
L0::Context *getContext() const {
return ctx;
}
struct CaptureTargetDesc {
ze_device_handle_t hDevice = nullptr;
ze_command_list_desc_t desc;
};
const CaptureTargetDesc &getCaptureTargetDesc() const {
return captureTargetDesc;
}
bool empty() const {
return commands.empty();
}
L0::CommandList *getExecutionTarget() const {
return executionTarget;
}
bool isSubGraph() const {
return (nullptr != executionTarget);
}
void addSubGraph(Graph *subGraph) {
subGraphs.push_back(subGraph);
}
protected:
std::vector<CapturedCommand> commands;
StackVec<Graph *, 16> subGraphs;
L0::CommandList *captureSrc = nullptr;
CaptureTargetDesc captureTargetDesc;
L0::CommandList *executionTarget = nullptr;
L0::Context *ctx = nullptr;
bool preallocated = false;
};
struct ExecutableGraph : _ze_executable_graph_handle_t {
ExecutableGraph(Graph *src) {
ExecutableGraph() {
}
void instantiateFrom(Graph &graph);
~ExecutableGraph();
static ExecutableGraph *fromHandle(ze_executable_graph_handle_t handle) {
return static_cast<ExecutableGraph *>(handle);
}
bool empty() {
return nullptr == hwCommands;
}
bool isSubGraph() const {
return (nullptr != executionTarget);
}
const StackVec<std::unique_ptr<ExecutableGraph>, 16> &getSubgraphs() {
return subGraphs;
}
ze_result_t execute(L0::CommandList *executionTarget, void *pNext, ze_event_handle_t hSignalEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents);
protected:
Graph *src = nullptr;
L0::CommandList *executionTarget = nullptr;
std::unique_ptr<L0::CommandList> hwCommands;
StackVec<std::unique_ptr<ExecutableGraph>, 16> subGraphs;
};
} // namespace L0