mirror of
https://github.com/intel/compute-runtime.git
synced 2026-01-03 14:55:24 +08:00
feature: Adding basic record and replay mechanism
Related-To: NEO-15373 Signed-off-by: Chodor, Jaroslaw <jaroslaw.chodor@intel.com>
This commit is contained in:
committed by
Compute-Runtime-Automation
parent
699c55ddd5
commit
58228a36fe
@@ -7,6 +7,7 @@
|
||||
target_sources(${L0_STATIC_LIB_NAME}
|
||||
PRIVATE
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/graph.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/graph.h
|
||||
)
|
||||
|
||||
|
||||
130
level_zero/experimental/source/graph/graph.cpp
Normal file
130
level_zero/experimental/source/graph/graph.cpp
Normal file
@@ -0,0 +1,130 @@
|
||||
/*
|
||||
* Copyright (C) 2025 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#include "level_zero/experimental/source/graph/graph.h"
|
||||
|
||||
#include "level_zero/core/source/cmdlist/cmdlist.h"
|
||||
#include "level_zero/core/source/context/context.h"
|
||||
|
||||
namespace L0 {
|
||||
|
||||
Graph::~Graph() {
|
||||
for (auto *sg : subGraphs) {
|
||||
if (false == sg->wasPreallocated()) {
|
||||
delete sg;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void Graph::startCapturingFrom(L0::CommandList &captureSrc, bool isSubGraph) {
|
||||
this->captureSrc = &captureSrc;
|
||||
captureSrc.getDeviceHandle(&this->captureTargetDesc.hDevice);
|
||||
this->captureTargetDesc.desc.stype = ZE_STRUCTURE_TYPE_COMMAND_LIST_DESC;
|
||||
this->captureTargetDesc.desc.pNext = nullptr;
|
||||
captureSrc.getOrdinal(&this->captureTargetDesc.desc.commandQueueGroupOrdinal);
|
||||
if (isSubGraph) {
|
||||
this->executionTarget = &captureSrc;
|
||||
}
|
||||
}
|
||||
|
||||
void Graph::stopCapturing() {
|
||||
this->captureSrc = nullptr;
|
||||
}
|
||||
|
||||
Closure<CaptureApi::zeCommandListAppendMemoryCopy>::Closure(const ApiArgs &apiArgs)
|
||||
: apiArgs(apiArgs) {
|
||||
this->indirectArgs.waitEvents.reserve(apiArgs.numWaitEvents);
|
||||
for (uint32_t i = 0; i < apiArgs.numWaitEvents; ++i) {
|
||||
this->indirectArgs.waitEvents.push_back(apiArgs.phWaitEvents[i]);
|
||||
}
|
||||
}
|
||||
|
||||
ze_result_t Closure<CaptureApi::zeCommandListAppendMemoryCopy>::instantiateTo(L0::CommandList &executionTarget) const {
|
||||
return zeCommandListAppendMemoryCopy(&executionTarget, apiArgs.dstptr, apiArgs.srcptr, apiArgs.size, apiArgs.hSignalEvent, apiArgs.numWaitEvents, apiArgs.numWaitEvents ? const_cast<ze_event_handle_t *>(indirectArgs.waitEvents.data()) : nullptr);
|
||||
}
|
||||
|
||||
Closure<CaptureApi::zeCommandListAppendBarrier>::Closure(const ApiArgs &apiArgs)
|
||||
: apiArgs(apiArgs) {
|
||||
this->indirectArgs.waitEvents.reserve(apiArgs.numWaitEvents);
|
||||
for (uint32_t i = 0; i < apiArgs.numWaitEvents; ++i) {
|
||||
this->indirectArgs.waitEvents.push_back(apiArgs.phWaitEvents[i]);
|
||||
}
|
||||
}
|
||||
|
||||
ze_result_t Closure<CaptureApi::zeCommandListAppendBarrier>::instantiateTo(L0::CommandList &executionTarget) const {
|
||||
return zeCommandListAppendBarrier(&executionTarget, apiArgs.hSignalEvent, apiArgs.numWaitEvents, apiArgs.numWaitEvents ? const_cast<ze_event_handle_t *>(indirectArgs.waitEvents.data()) : nullptr);
|
||||
}
|
||||
|
||||
ExecutableGraph::~ExecutableGraph() = default;
|
||||
|
||||
void ExecutableGraph::instantiateFrom(Graph &graph) {
|
||||
this->src = &graph;
|
||||
this->executionTarget = graph.getExecutionTarget();
|
||||
|
||||
if (graph.empty() == false) {
|
||||
[[maybe_unused]] ze_result_t err = ZE_RESULT_SUCCESS;
|
||||
ze_command_list_handle_t cmdListHandle = nullptr;
|
||||
src->getContext()->createCommandList(src->getCaptureTargetDesc().hDevice, &src->getCaptureTargetDesc().desc, &cmdListHandle);
|
||||
L0::CommandList *hwCommands = L0::CommandList::fromHandle(cmdListHandle);
|
||||
UNRECOVERABLE_IF(nullptr == hwCommands);
|
||||
this->hwCommands.reset(hwCommands);
|
||||
|
||||
for (const CapturedCommand &cmd : src->getCapturedCommands()) {
|
||||
switch (static_cast<CaptureApi>(cmd.index())) {
|
||||
default:
|
||||
break;
|
||||
#define RR_CAPTURED_API(X) \
|
||||
case CaptureApi::X: \
|
||||
std::get<static_cast<size_t>(CaptureApi::X)>(cmd).instantiateTo(*hwCommands); \
|
||||
DEBUG_BREAK_IF(err != ZE_RESULT_SUCCESS); \
|
||||
break;
|
||||
RR_CAPTURED_APIS()
|
||||
#undef RR_CAPTURED_API
|
||||
}
|
||||
}
|
||||
hwCommands->close();
|
||||
}
|
||||
|
||||
this->subGraphs.reserve(graph.getSubgraphs().size());
|
||||
for (auto &srcSubgraph : graph.getSubgraphs()) {
|
||||
auto execSubGraph = std::make_unique<ExecutableGraph>();
|
||||
execSubGraph->instantiateFrom(*srcSubgraph);
|
||||
this->subGraphs.push_back(std::move(execSubGraph));
|
||||
}
|
||||
}
|
||||
|
||||
ze_result_t ExecutableGraph::execute(L0::CommandList *executionTarget, void *pNext, ze_event_handle_t hSignalEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) {
|
||||
if (nullptr == executionTarget) {
|
||||
executionTarget = this->executionTarget;
|
||||
}
|
||||
UNRECOVERABLE_IF(nullptr == executionTarget);
|
||||
if (this->empty()) {
|
||||
if (numWaitEvents) {
|
||||
executionTarget->appendWaitOnEvents(numWaitEvents, phWaitEvents, nullptr, false, true, true, false, false, false);
|
||||
}
|
||||
if (nullptr == hSignalEvent) {
|
||||
return ZE_RESULT_SUCCESS;
|
||||
}
|
||||
executionTarget->appendSignalEvent(hSignalEvent, false);
|
||||
} else {
|
||||
auto commands = this->hwCommands.get();
|
||||
ze_command_list_handle_t graphCmdList = commands;
|
||||
auto res = executionTarget->appendCommandLists(1, &graphCmdList, hSignalEvent, numWaitEvents, phWaitEvents);
|
||||
if (ZE_RESULT_SUCCESS != res) {
|
||||
return res;
|
||||
}
|
||||
}
|
||||
for (auto &subGraph : this->subGraphs) {
|
||||
auto res = subGraph->execute(nullptr, pNext, nullptr, 0, nullptr);
|
||||
if (ZE_RESULT_SUCCESS != res) {
|
||||
return res;
|
||||
}
|
||||
}
|
||||
return ZE_RESULT_SUCCESS;
|
||||
}
|
||||
|
||||
} // namespace L0
|
||||
@@ -7,7 +7,16 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "level_zero/driver_experimental/zex_graph.h"
|
||||
#include "shared/source/utilities/stackvec.h"
|
||||
|
||||
#include "level_zero/ze_api.h"
|
||||
|
||||
#include <memory>
|
||||
#include <variant>
|
||||
#include <vector>
|
||||
|
||||
typedef struct _ze_graph_handle_t *ze_graph_handle_t;
|
||||
typedef struct _ze_executable_graph_handle_t *ze_executable_graph_handle_t;
|
||||
|
||||
struct _ze_graph_handle_t {
|
||||
};
|
||||
@@ -19,10 +28,118 @@ namespace L0 {
|
||||
|
||||
struct Context;
|
||||
|
||||
#define RR_CAPTURED_APIS() \
|
||||
RR_CAPTURED_API(zeCommandListAppendWriteGlobalTimestamp) \
|
||||
RR_CAPTURED_API(zeCommandListAppendBarrier) \
|
||||
RR_CAPTURED_API(zeCommandListAppendMemoryRangesBarrier) \
|
||||
RR_CAPTURED_API(zeCommandListAppendMemoryCopy) \
|
||||
RR_CAPTURED_API(zeCommandListAppendMemoryFill) \
|
||||
RR_CAPTURED_API(zeCommandListAppendMemoryCopyRegion) \
|
||||
RR_CAPTURED_API(zeCommandListAppendMemoryCopyFromContext) \
|
||||
RR_CAPTURED_API(zeCommandListAppendImageCopy) \
|
||||
RR_CAPTURED_API(zeCommandListAppendImageCopyRegion) \
|
||||
RR_CAPTURED_API(zeCommandListAppendImageCopyToMemory) \
|
||||
RR_CAPTURED_API(zeCommandListAppendImageCopyFromMemory) \
|
||||
RR_CAPTURED_API(zeCommandListAppendMemoryPrefetch) \
|
||||
RR_CAPTURED_API(zeCommandListAppendMemAdvise) \
|
||||
RR_CAPTURED_API(zeCommandListAppendSignalEvent) \
|
||||
RR_CAPTURED_API(zeCommandListAppendWaitOnEvents) \
|
||||
RR_CAPTURED_API(zeCommandListAppendEventReset) \
|
||||
RR_CAPTURED_API(zeCommandListAppendQueryKernelTimestamps) \
|
||||
RR_CAPTURED_API(zeCommandListAppendLaunchKernel) \
|
||||
RR_CAPTURED_API(zeCommandListAppendLaunchCooperativeKernel) \
|
||||
RR_CAPTURED_API(zeCommandListAppendLaunchKernelIndirect) \
|
||||
RR_CAPTURED_API(zeCommandListAppendLaunchMultipleKernelsIndirect) \
|
||||
RR_CAPTURED_API(zeCommandListAppendSignalExternalSemaphoreExt) \
|
||||
RR_CAPTURED_API(zeCommandListAppendWaitExternalSemaphoreExt) \
|
||||
RR_CAPTURED_API(zeCommandListAppendImageCopyToMemoryExt) \
|
||||
RR_CAPTURED_API(zeCommandListAppendImageCopyFromMemoryExt)
|
||||
|
||||
enum class CaptureApi {
|
||||
#define RR_CAPTURED_API(X) X,
|
||||
RR_CAPTURED_APIS()
|
||||
#undef RR_CAPTURED_API
|
||||
};
|
||||
|
||||
struct CommandList;
|
||||
|
||||
template <CaptureApi api>
|
||||
struct Closure {
|
||||
inline static constexpr bool isSupported = false;
|
||||
|
||||
struct ApiArgs {
|
||||
template <typename ArgsT>
|
||||
ApiArgs(ArgsT...) {}
|
||||
};
|
||||
|
||||
Closure(const ApiArgs &apiArgs) {}
|
||||
|
||||
ze_result_t instantiateTo(CommandList &executionTarget) const {
|
||||
DEBUG_BREAK_IF(true);
|
||||
return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE;
|
||||
}
|
||||
};
|
||||
|
||||
template <>
|
||||
struct Closure<CaptureApi::zeCommandListAppendMemoryCopy> {
|
||||
inline static constexpr bool isSupported = true;
|
||||
|
||||
struct ApiArgs {
|
||||
ze_command_list_handle_t hCommandList;
|
||||
void *dstptr;
|
||||
const void *srcptr;
|
||||
size_t size;
|
||||
ze_event_handle_t hSignalEvent;
|
||||
uint32_t numWaitEvents;
|
||||
ze_event_handle_t *phWaitEvents;
|
||||
} apiArgs;
|
||||
|
||||
struct IndirectArgs {
|
||||
StackVec<ze_event_handle_t, 8> waitEvents;
|
||||
} indirectArgs;
|
||||
|
||||
Closure(const ApiArgs &apiArgs);
|
||||
|
||||
ze_result_t instantiateTo(CommandList &executionTarget) const;
|
||||
};
|
||||
|
||||
template <>
|
||||
struct Closure<CaptureApi::zeCommandListAppendBarrier> {
|
||||
inline static constexpr bool isSupported = true;
|
||||
|
||||
struct ApiArgs {
|
||||
ze_command_list_handle_t hCommandList;
|
||||
ze_event_handle_t hSignalEvent;
|
||||
uint32_t numWaitEvents;
|
||||
ze_event_handle_t *phWaitEvents;
|
||||
} apiArgs;
|
||||
|
||||
struct IndirectArgs {
|
||||
StackVec<ze_event_handle_t, 8> waitEvents;
|
||||
} indirectArgs;
|
||||
|
||||
Closure(const ApiArgs &apiArgs);
|
||||
|
||||
ze_result_t instantiateTo(CommandList &executionTarget) const;
|
||||
};
|
||||
|
||||
using ClosureVariants = std::variant<
|
||||
#define RR_CAPTURED_API(X) Closure<CaptureApi::X>,
|
||||
RR_CAPTURED_APIS()
|
||||
#undef RR_CAPTURED_API
|
||||
int>;
|
||||
using CapturedCommand = ClosureVariants;
|
||||
|
||||
struct Graph : _ze_graph_handle_t {
|
||||
Graph(L0::Context *ctx, bool preallocated) : ctx(ctx), preallocated(preallocated) {
|
||||
commands.reserve(16);
|
||||
}
|
||||
|
||||
~Graph();
|
||||
|
||||
Graph(const Graph &) = delete;
|
||||
Graph &operator=(const Graph &) = delete;
|
||||
|
||||
static Graph *fromHandle(ze_graph_handle_t handle) {
|
||||
return static_cast<Graph *>(handle);
|
||||
}
|
||||
@@ -31,18 +148,102 @@ struct Graph : _ze_graph_handle_t {
|
||||
return preallocated;
|
||||
}
|
||||
|
||||
void startCapturingFrom(L0::CommandList &captureSrc, bool isSubGraph);
|
||||
void stopCapturing();
|
||||
|
||||
template <CaptureApi api, typename... TArgs>
|
||||
ze_result_t capture(TArgs... apiArgs) {
|
||||
if (false == Closure<api>::isSupported) {
|
||||
return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE;
|
||||
}
|
||||
|
||||
using ApirArgsT = typename Closure<api>::ApiArgs;
|
||||
auto capturedArgs = ApirArgsT{apiArgs...};
|
||||
commands.push_back(CapturedCommand{Closure<api>(capturedArgs)});
|
||||
return ZE_RESULT_SUCCESS;
|
||||
}
|
||||
|
||||
const std::vector<CapturedCommand> &getCapturedCommands() {
|
||||
return commands;
|
||||
}
|
||||
|
||||
const StackVec<Graph *, 16> &getSubgraphs() {
|
||||
return subGraphs;
|
||||
}
|
||||
|
||||
L0::Context *getContext() const {
|
||||
return ctx;
|
||||
}
|
||||
|
||||
struct CaptureTargetDesc {
|
||||
ze_device_handle_t hDevice = nullptr;
|
||||
ze_command_list_desc_t desc;
|
||||
};
|
||||
|
||||
const CaptureTargetDesc &getCaptureTargetDesc() const {
|
||||
return captureTargetDesc;
|
||||
}
|
||||
|
||||
bool empty() const {
|
||||
return commands.empty();
|
||||
}
|
||||
|
||||
L0::CommandList *getExecutionTarget() const {
|
||||
return executionTarget;
|
||||
}
|
||||
|
||||
bool isSubGraph() const {
|
||||
return (nullptr != executionTarget);
|
||||
}
|
||||
|
||||
void addSubGraph(Graph *subGraph) {
|
||||
subGraphs.push_back(subGraph);
|
||||
}
|
||||
|
||||
protected:
|
||||
std::vector<CapturedCommand> commands;
|
||||
StackVec<Graph *, 16> subGraphs;
|
||||
|
||||
L0::CommandList *captureSrc = nullptr;
|
||||
CaptureTargetDesc captureTargetDesc;
|
||||
L0::CommandList *executionTarget = nullptr;
|
||||
|
||||
L0::Context *ctx = nullptr;
|
||||
bool preallocated = false;
|
||||
};
|
||||
|
||||
struct ExecutableGraph : _ze_executable_graph_handle_t {
|
||||
ExecutableGraph(Graph *src) {
|
||||
ExecutableGraph() {
|
||||
}
|
||||
|
||||
void instantiateFrom(Graph &graph);
|
||||
|
||||
~ExecutableGraph();
|
||||
|
||||
static ExecutableGraph *fromHandle(ze_executable_graph_handle_t handle) {
|
||||
return static_cast<ExecutableGraph *>(handle);
|
||||
}
|
||||
|
||||
bool empty() {
|
||||
return nullptr == hwCommands;
|
||||
}
|
||||
|
||||
bool isSubGraph() const {
|
||||
return (nullptr != executionTarget);
|
||||
}
|
||||
|
||||
const StackVec<std::unique_ptr<ExecutableGraph>, 16> &getSubgraphs() {
|
||||
return subGraphs;
|
||||
}
|
||||
|
||||
ze_result_t execute(L0::CommandList *executionTarget, void *pNext, ze_event_handle_t hSignalEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents);
|
||||
|
||||
protected:
|
||||
Graph *src = nullptr;
|
||||
L0::CommandList *executionTarget = nullptr;
|
||||
std::unique_ptr<L0::CommandList> hwCommands;
|
||||
|
||||
StackVec<std::unique_ptr<ExecutableGraph>, 16> subGraphs;
|
||||
};
|
||||
|
||||
} // namespace L0
|
||||
Reference in New Issue
Block a user