compute-runtime/level_zero/core/source/event/event.h

343 lines
11 KiB
C
Raw Normal View History

/*
* Copyright (C) 2020-2023 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#pragma once
#include "shared/source/helpers/timestamp_packet_size_control.h"
#include "shared/source/memory_manager/multi_graphics_allocation.h"
#include <level_zero/ze_api.h>
#include <atomic>
#include <bitset>
#include <chrono>
#include <limits>
#include <memory>
#include <vector>
struct _ze_event_handle_t {};
struct _ze_event_pool_handle_t {};
namespace NEO {
class CommandStreamReceiver;
class GraphicsAllocation;
class MultiGraphicsAllocation;
struct RootDeviceEnvironment;
} // namespace NEO
namespace L0 {
typedef uint64_t FlushStamp;
struct EventPool;
struct MetricStreamer;
struct ContextImp;
struct Context;
struct DriverHandle;
struct DriverHandleImp;
struct Device;
struct Kernel;
#pragma pack(1)
struct IpcEventPoolData {
uint64_t handle = 0;
size_t numEvents = 0;
uint32_t rootDeviceIndex = 0;
uint32_t maxEventPackets = 0;
uint32_t numDevices = 0;
bool isDeviceEventPoolAllocation = false;
bool isHostVisibleEventPoolAllocation = false;
bool isImplicitScalingCapable = false;
};
#pragma pack()
static_assert(sizeof(IpcEventPoolData) <= ZE_MAX_IPC_HANDLE_SIZE, "IpcEventPoolData is bigger than ZE_MAX_IPC_HANDLE_SIZE");
namespace EventPacketsCount {
inline constexpr uint32_t maxKernelSplit = 3;
inline constexpr uint32_t eventPackets = maxKernelSplit * NEO ::TimestampPacketSizeControl::preferredPacketCount;
} // namespace EventPacketsCount
struct Event : _ze_event_handle_t {
virtual ~Event() = default;
virtual ze_result_t destroy();
virtual ze_result_t hostSignal() = 0;
virtual ze_result_t hostSynchronize(uint64_t timeout) = 0;
virtual ze_result_t queryStatus() = 0;
virtual ze_result_t reset() = 0;
virtual ze_result_t queryKernelTimestamp(ze_kernel_timestamp_result_t *dstptr) = 0;
virtual ze_result_t queryTimestampsExp(Device *device, uint32_t *count, ze_kernel_timestamp_result_t *timestamps) = 0;
enum State : uint32_t {
STATE_SIGNALED = 0u,
HOST_CACHING_DISABLED_PERMANENT = std::numeric_limits<uint32_t>::max() - 2,
HOST_CACHING_DISABLED = std::numeric_limits<uint32_t>::max() - 1,
STATE_CLEARED = std::numeric_limits<uint32_t>::max(),
STATE_INITIAL = STATE_CLEARED
};
template <typename TagSizeT>
static Event *create(EventPool *eventPool, const ze_event_desc_t *desc, Device *device);
static Event *fromHandle(ze_event_handle_t handle) { return static_cast<Event *>(handle); }
inline ze_event_handle_t toHandle() { return this; }
MOCKABLE_VIRTUAL NEO::GraphicsAllocation &getAllocation(Device *device) const;
MOCKABLE_VIRTUAL uint64_t getGpuAddress(Device *device) const;
virtual uint32_t getPacketsInUse() const = 0;
virtual uint32_t getPacketsUsedInLastKernel() = 0;
virtual uint64_t getPacketAddress(Device *device) = 0;
MOCKABLE_VIRTUAL void resetPackets(bool resetAllPackets);
virtual void resetKernelCountAndPacketUsedCount() = 0;
void *getHostAddress() const { return hostAddress; }
virtual void setPacketsInUse(uint32_t value) = 0;
uint32_t getCurrKernelDataIndex() const { return kernelCount - 1; }
MOCKABLE_VIRTUAL void setGpuStartTimestamp();
MOCKABLE_VIRTUAL void setGpuEndTimestamp();
size_t getCompletionFieldOffset() const {
return this->isUsingContextEndOffset() ? this->getContextEndOffset() : 0;
}
uint64_t getCompletionFieldGpuAddress(Device *device) const {
return this->getGpuAddress(device) + getCompletionFieldOffset();
}
void *getCompletionFieldHostAddress() const;
size_t getContextStartOffset() const {
return contextStartOffset;
}
size_t getContextEndOffset() const {
return contextEndOffset;
}
size_t getGlobalStartOffset() const {
return globalStartOffset;
}
size_t getGlobalEndOffset() const {
return globalEndOffset;
}
size_t getSinglePacketSize() const {
return singlePacketSize;
}
size_t getTimestampSizeInDw() const {
return timestampSizeInDw;
}
void setEventTimestampFlag(bool timestampFlag) {
isTimestampEvent = timestampFlag;
}
bool isEventTimestampFlagSet() const {
return isTimestampEvent;
}
void setUsingContextEndOffset(bool usingContextEndOffset) {
this->usingContextEndOffset = usingContextEndOffset;
}
bool isUsingContextEndOffset() const {
return isTimestampEvent || usingContextEndOffset;
}
void setCsr(NEO::CommandStreamReceiver *csr) {
this->csrs[0] = csr;
}
void appendAdditionalCsr(NEO::CommandStreamReceiver *additonalCsr) {
for (const auto &csr : csrs) {
if (csr == additonalCsr) {
return;
}
}
csrs.push_back(additonalCsr);
}
void increaseKernelCount();
uint32_t getKernelCount() const {
return kernelCount;
}
void zeroKernelCount() {
kernelCount = 0;
}
bool getL3FlushForCurrenKernel() {
return l3FlushAppliedOnKernel.test(kernelCount - 1);
}
void setL3FlushForCurrentKernel() {
l3FlushAppliedOnKernel.set(kernelCount - 1);
}
void resetCompletionStatus() {
if (this->isCompleted.load() != HOST_CACHING_DISABLED_PERMANENT) {
this->isCompleted.store(STATE_CLEARED);
}
}
void disableHostCaching(bool disableFromRegularList) {
this->isCompleted.store(disableFromRegularList ? HOST_CACHING_DISABLED_PERMANENT : HOST_CACHING_DISABLED);
}
void setIsCompleted();
bool isAlreadyCompleted() {
return this->isCompleted == STATE_SIGNALED;
}
uint32_t getMaxPacketsCount() const {
return maxPacketCount;
}
void setMaxKernelCount(uint32_t value) {
maxKernelCount = value;
}
uint32_t getMaxKernelCount() const {
return maxKernelCount;
}
void setKernelForPrintf(Kernel *inputKernelPtr) {
kernelWithPrintf = inputKernelPtr;
}
Kernel *getKernelForPrintf() {
return kernelWithPrintf;
}
bool isSignalScope() const {
return !!signalScope;
}
bool isSignalScope(ze_event_scope_flags_t flag) const {
return !!(signalScope & flag);
}
bool isWaitScope() const {
return !!waitScope;
}
void setMetricStreamer(MetricStreamer *metricStreamer) {
this->metricStreamer = metricStreamer;
}
void enableInOrderExecMode(NEO::GraphicsAllocation &inOrderDependenciesAllocation, uint32_t signalValue);
protected:
Event(EventPool *eventPool, int index, Device *device) : device(device), eventPool(eventPool), index(index) {}
uint64_t globalStartTS = 1;
uint64_t globalEndTS = 1;
uint64_t contextStartTS = 1;
uint64_t contextEndTS = 1;
std::chrono::microseconds gpuHangCheckPeriod{500'000};
std::bitset<EventPacketsCount::maxKernelSplit> l3FlushAppliedOnKernel;
size_t contextStartOffset = 0u;
size_t contextEndOffset = 0u;
size_t globalStartOffset = 0u;
size_t globalEndOffset = 0u;
size_t timestampSizeInDw = 0u;
size_t singlePacketSize = 0u;
size_t eventPoolOffset = 0u;
size_t cpuStartTimestamp = 0u;
size_t gpuStartTimestamp = 0u;
size_t gpuEndTimestamp = 0u;
// Metric streamer instance associated with the event.
MetricStreamer *metricStreamer = nullptr;
StackVec<NEO::CommandStreamReceiver *, 1> csrs;
void *hostAddress = nullptr;
Device *device = nullptr;
EventPool *eventPool = nullptr;
Kernel *kernelWithPrintf = nullptr;
NEO::GraphicsAllocation *inOrderExecDataAllocation = nullptr;
uint32_t maxKernelCount = 0;
uint32_t kernelCount = 1u;
uint32_t maxPacketCount = 0;
uint32_t totalEventSize = 0;
uint32_t inOrderExecSignalValue = 0;
ze_event_scope_flags_t signalScope = 0u;
ze_event_scope_flags_t waitScope = 0u;
int index = 0;
std::atomic<State> isCompleted{STATE_INITIAL};
bool isTimestampEvent = false;
bool usingContextEndOffset = false;
bool signalAllEventPackets = false;
bool isFromIpcPool = false;
bool inOrderExecEvent = false;
};
struct EventPool : _ze_event_pool_handle_t {
static EventPool *create(DriverHandle *driver, Context *context, uint32_t numDevices, ze_device_handle_t *deviceHandles, const ze_event_pool_desc_t *desc, ze_result_t &result);
static ze_result_t openEventPoolIpcHandle(const ze_ipc_event_pool_handle_t &ipcEventPoolHandle, ze_event_pool_handle_t *eventPoolHandle,
DriverHandleImp *driver, ContextImp *context, uint32_t numDevices, ze_device_handle_t *deviceHandles);
EventPool(const ze_event_pool_desc_t *desc) : EventPool(desc->count) {
eventPoolFlags = desc->flags;
}
virtual ~EventPool();
MOCKABLE_VIRTUAL ze_result_t destroy();
MOCKABLE_VIRTUAL ze_result_t getIpcHandle(ze_ipc_event_pool_handle_t *ipcHandle);
MOCKABLE_VIRTUAL ze_result_t closeIpcHandle();
MOCKABLE_VIRTUAL ze_result_t createEvent(const ze_event_desc_t *desc, ze_event_handle_t *eventHandle);
static EventPool *fromHandle(ze_event_pool_handle_t handle) {
return static_cast<EventPool *>(handle);
}
inline ze_event_pool_handle_t toHandle() { return this; }
MOCKABLE_VIRTUAL NEO::MultiGraphicsAllocation &getAllocation() { return *eventPoolAllocations; }
uint32_t getEventSize() const { return eventSize; }
void setEventSize(uint32_t size) { eventSize = size; }
void setEventAlignment(uint32_t alignment) { eventAlignment = alignment; }
size_t getNumEvents() const { return numEvents; }
uint32_t getEventMaxPackets() const { return eventPackets; }
size_t getEventPoolSize() const { return eventPoolSize; }
bool isEventPoolTimestampFlagSet() const;
bool isEventPoolDeviceAllocationFlagSet() const {
if (!(eventPoolFlags & ZE_EVENT_POOL_FLAG_HOST_VISIBLE)) {
return true;
}
return false;
}
uint32_t getMaxKernelCount() const {
return maxKernelCount;
}
ze_result_t initialize(DriverHandle *driver, Context *context, uint32_t numDevices, ze_device_handle_t *deviceHandles);
void initializeSizeParameters(uint32_t numDevices, ze_device_handle_t *deviceHandles, DriverHandleImp &driver, const NEO::RootDeviceEnvironment &rootDeviceEnvironment);
Device *getDevice() const { return devices[0]; }
bool getImportedIpcPool() const {
return isImportedIpcPool;
}
bool isImplicitScalingCapableFlagSet() const {
return isImplicitScalingCapable;
}
protected:
EventPool() = default;
EventPool(size_t numEvents) : numEvents(numEvents) {}
std::vector<Device *> devices;
std::unique_ptr<NEO::MultiGraphicsAllocation> eventPoolAllocations;
void *eventPoolPtr = nullptr;
ContextImp *context = nullptr;
size_t numEvents = 1;
size_t eventPoolSize = 0;
uint32_t eventAlignment = 0;
uint32_t eventSize = 0;
uint32_t eventPackets = 0;
uint32_t maxKernelCount = 0;
ze_event_pool_flags_t eventPoolFlags;
bool isDeviceEventPoolAllocation = false;
bool isHostVisibleEventPoolAllocation = false;
bool isImportedIpcPool = false;
bool isShareableEventMemory = false;
bool isImplicitScalingCapable = false;
};
} // namespace L0