compute-runtime/shared/source/device/device.h

315 lines
13 KiB
C++

/*
* Copyright (C) 2018-2024 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#pragma once
#include "shared/source/command_stream/preemption_mode.h"
#include "shared/source/device/device_info.h"
#include "shared/source/helpers/engine_control.h"
#include "shared/source/helpers/engine_node_helper.h"
#include "shared/source/helpers/non_copyable_or_moveable.h"
#include "shared/source/helpers/options.h"
#include "shared/source/os_interface/performance_counters.h"
#include "shared/source/os_interface/product_helper.h"
#include "shared/source/utilities/isa_pool_allocator.h"
#include "shared/source/utilities/reference_tracked_object.h"
#include <array>
#include <mutex>
namespace NEO {
class BindlessHeapsHelper;
class BuiltIns;
class CompilerInterface;
class ExecutionEnvironment;
class Debugger;
class GmmClientContext;
class GmmHelper;
class SyncBufferHandler;
enum class EngineGroupType : uint32_t;
class DebuggerL0;
class OSTime;
class SubDevice;
struct PhysicalDevicePciBusInfo;
class GfxCoreHelper;
class ProductHelper;
class CompilerProductHelper;
class ReleaseHelper;
class UsmMemAllocPoolsManager;
struct SelectorCopyEngine : NonCopyableOrMovableClass {
std::atomic<bool> isMainUsed = false;
std::atomic<uint32_t> selector = 0;
};
using EnginesT = std::vector<EngineControl>;
struct EngineGroupT {
EngineGroupType engineGroupType;
EnginesT engines;
};
using EngineGroupsT = std::vector<EngineGroupT>;
using CsrContainer = std::vector<std::unique_ptr<CommandStreamReceiver>>;
struct SecondaryContexts {
SecondaryContexts() = default;
SecondaryContexts(SecondaryContexts &&in) {
this->engines = std::move(in.engines);
this->regularCounter = in.regularCounter.load();
this->highPriorityCounter = in.highPriorityCounter.load();
this->regularEnginesTotal = in.regularEnginesTotal;
this->highPriorityEnginesTotal = in.highPriorityEnginesTotal;
}
SecondaryContexts(const SecondaryContexts &in) = delete;
SecondaryContexts &operator=(const SecondaryContexts &) = delete;
EngineControl *getEngine(const EngineUsage usage);
EnginesT engines; // vector of secondary EngineControls
std::atomic<uint8_t> regularCounter = 0; // Counter used to assign next regular EngineControl
std::atomic<uint8_t> highPriorityCounter = 0; // Counter used to assign next highPriority EngineControl
std::atomic<uint8_t> assignedContextsCounter = 0; // Counter of assigned contexts in group
uint32_t regularEnginesTotal;
uint32_t highPriorityEnginesTotal;
std::vector<int32_t> npIndices;
std::vector<int32_t> hpIndices;
std::mutex mutex;
};
struct RTDispatchGlobalsInfo {
GraphicsAllocation *rtDispatchGlobalsArray = nullptr;
std::vector<GraphicsAllocation *> rtStacks; // per tile
};
class Device : public ReferenceTrackedObject<Device> {
public:
Device &operator=(const Device &) = delete;
Device(const Device &) = delete;
~Device() override;
template <typename DeviceT, typename... ArgsT>
static DeviceT *create(ArgsT &&...args) {
DeviceT *device = new DeviceT(std::forward<ArgsT>(args)...);
return createDeviceInternals(device);
}
virtual void incRefInternal() {
ReferenceTrackedObject<Device>::incRefInternal();
}
virtual unique_ptr_if_unused<Device> decRefInternal() {
return ReferenceTrackedObject<Device>::decRefInternal();
}
bool getDeviceAndHostTimer(uint64_t *deviceTimestamp, uint64_t *hostTimestamp) const;
bool getHostTimer(uint64_t *hostTimestamp) const;
const HardwareInfo &getHardwareInfo() const;
const DeviceInfo &getDeviceInfo() const;
EngineControl *tryGetEngine(aub_stream::EngineType engineType, EngineUsage engineUsage);
EngineControl &getEngine(aub_stream::EngineType engineType, EngineUsage engineUsage);
EngineGroupsT &getRegularEngineGroups() {
return this->regularEngineGroups;
}
const EngineGroupT *tryGetRegularEngineGroup(EngineGroupType engineGroupType) const;
size_t getEngineGroupIndexFromEngineGroupType(EngineGroupType engineGroupType) const;
EngineControl &getEngine(uint32_t index);
EngineControl &getDefaultEngine();
EngineControl &getNextEngineForCommandQueue();
EngineControl &getInternalEngine();
EngineControl *getInternalCopyEngine();
EngineControl *getHpCopyEngine();
SelectorCopyEngine &getSelectorCopyEngine();
MemoryManager *getMemoryManager() const;
GmmHelper *getGmmHelper() const;
GmmClientContext *getGmmClientContext() const;
OSTime *getOSTime() const;
double getProfilingTimerResolution();
uint64_t getProfilingTimerClock();
double getPlatformHostTimerResolution() const;
GFXCORE_FAMILY getRenderCoreFamily() const;
PerformanceCounters *getPerformanceCounters() { return performanceCounters.get(); }
PreemptionMode getPreemptionMode() const { return preemptionMode; }
void overridePreemptionMode(PreemptionMode mode) { preemptionMode = mode; }
Debugger *getDebugger() const;
DebuggerL0 *getL0Debugger();
const EnginesT &getAllEngines() const;
const std::string getDeviceName() const;
ExecutionEnvironment *getExecutionEnvironment() const { return executionEnvironment; }
const RootDeviceEnvironment &getRootDeviceEnvironment() const;
RootDeviceEnvironment &getRootDeviceEnvironmentRef() const;
bool isFullRangeSvm() const;
static bool isBlitSplitEnabled();
static bool isInitDeviceWithFirstSubmissionEnabled(CommandStreamReceiverType csrType);
bool isBcsSplitSupported();
bool isInitDeviceWithFirstSubmissionSupported(CommandStreamReceiverType csrType);
bool areSharedSystemAllocationsAllowed() const;
template <typename SpecializedDeviceT>
void setSpecializedDevice(SpecializedDeviceT *specializedDevice) {
this->specializedDevice = reinterpret_cast<uintptr_t>(specializedDevice);
}
template <typename SpecializedDeviceT>
SpecializedDeviceT *getSpecializedDevice() const {
return reinterpret_cast<SpecializedDeviceT *>(specializedDevice);
}
MOCKABLE_VIRTUAL CompilerInterface *getCompilerInterface() const;
BuiltIns *getBuiltIns() const;
void allocateSyncBufferHandler();
uint32_t getRootDeviceIndex() const {
return this->rootDeviceIndex;
}
uint32_t getNumGenericSubDevices() const;
Device *getSubDevice(uint32_t deviceId) const;
Device *getNearestGenericSubDevice(uint32_t deviceId);
virtual Device *getRootDevice() const = 0;
DeviceBitfield getDeviceBitfield() const { return deviceBitfield; };
uint32_t getNumSubDevices() const { return numSubDevices; }
virtual bool isSubDevice() const = 0;
bool hasRootCsr() const { return rootCsrCreated; }
bool isEngineInstanced() const { return engineInstanced; }
BindlessHeapsHelper *getBindlessHeapsHelper() const;
static decltype(&PerformanceCounters::create) createPerformanceCountersFunc;
std::unique_ptr<SyncBufferHandler> syncBufferHandler;
GraphicsAllocation *getRTMemoryBackedBuffer() { return rtMemoryBackedBuffer; }
RTDispatchGlobalsInfo *getRTDispatchGlobals(uint32_t maxBvhLevels);
bool rayTracingIsInitialized() const { return rtMemoryBackedBuffer != nullptr; }
void initializeRayTracing(uint32_t maxBvhLevels);
void allocateRTDispatchGlobals(uint32_t maxBvhLevels);
uint64_t getGlobalMemorySize(uint32_t deviceBitfield) const;
const std::vector<SubDevice *> &getSubDevices() const { return subdevices; }
bool getUuid(std::array<uint8_t, ProductHelper::uuidSize> &uuid);
void generateUuid(std::array<uint8_t, ProductHelper::uuidSize> &uuid);
void getAdapterLuid(std::array<uint8_t, ProductHelper::luidSize> &luid);
MOCKABLE_VIRTUAL bool verifyAdapterLuid();
void getAdapterMask(uint32_t &nodeMask);
const GfxCoreHelper &getGfxCoreHelper() const;
const ProductHelper &getProductHelper() const;
const CompilerProductHelper &getCompilerProductHelper() const;
ReleaseHelper *getReleaseHelper() const;
ISAPoolAllocator &getIsaPoolAllocator() {
return isaPoolAllocator;
}
UsmMemAllocPoolsManager *getUsmMemAllocPoolsManager() {
return deviceUsmMemAllocPoolsManager.get();
}
MOCKABLE_VIRTUAL void stopDirectSubmissionAndWaitForCompletion();
bool isAnyDirectSubmissionEnabled();
bool isStateSipRequired() const {
return (getPreemptionMode() == PreemptionMode::MidThread || getDebugger() != nullptr) && getCompilerInterface();
}
MOCKABLE_VIRTUAL EngineControl *getSecondaryEngineCsr(EngineTypeUsage engineTypeUsage, bool allocateInterrupt);
bool isSecondaryContextEngineType(aub_stream::EngineType type) {
return EngineHelpers::isCcs(type) || EngineHelpers::isBcs(type);
}
GraphicsAllocation *getDebugSurface() const { return debugSurface; }
void setDebugSurface(GraphicsAllocation *debugSurface) { this->debugSurface = debugSurface; };
const CsrContainer &getSecondaryCsrs() const { return secondaryCsrs; }
std::atomic<uint32_t> debugExecutionCounter = 0;
protected:
Device() = delete;
Device(ExecutionEnvironment *executionEnvironment, const uint32_t rootDeviceIndex);
MOCKABLE_VIRTUAL void initializeCaps();
template <typename T>
static T *createDeviceInternals(T *device) {
if (false == device->createDeviceImpl()) {
delete device;
return nullptr;
}
return device;
}
MOCKABLE_VIRTUAL bool createDeviceImpl();
bool initDeviceWithEngines();
void initializeCommonResources();
bool initDeviceFully();
virtual bool createEngines();
void addEngineToEngineGroup(EngineControl &engine);
MOCKABLE_VIRTUAL bool createEngine(EngineTypeUsage engineTypeUsage);
MOCKABLE_VIRTUAL bool initializeEngines();
MOCKABLE_VIRTUAL bool createSecondaryEngine(CommandStreamReceiver *primaryCsr, EngineTypeUsage engineTypeUsage);
MOCKABLE_VIRTUAL std::unique_ptr<CommandStreamReceiver> createCommandStreamReceiver() const;
MOCKABLE_VIRTUAL SubDevice *createSubDevice(uint32_t subDeviceIndex);
MOCKABLE_VIRTUAL SubDevice *createEngineInstancedSubDevice(uint32_t subDeviceIndex, aub_stream::EngineType engineType);
MOCKABLE_VIRTUAL size_t getMaxParameterSizeFromIGC() const;
double getPercentOfGlobalMemoryAvailable() const;
virtual void createBindlessHeapsHelper() {}
bool createSubDevices();
bool createGenericSubDevices();
bool createEngineInstancedSubDevices();
virtual bool genericSubDevicesAllowed();
bool engineInstancedSubDevicesAllowed();
void setAsEngineInstanced();
void finalizeRayTracing();
void createSecondaryContexts(const EngineControl &primaryEngine, SecondaryContexts &secondaryEnginesForType, uint32_t contextCount, uint32_t regularPriorityCount, uint32_t highPriorityContextCount);
DeviceInfo deviceInfo = {};
std::unique_ptr<PerformanceCounters> performanceCounters;
CsrContainer commandStreamReceivers;
EnginesT allEngines;
std::unordered_map<aub_stream::EngineType, SecondaryContexts> secondaryEngines;
CsrContainer secondaryCsrs;
EngineGroupsT regularEngineGroups;
std::vector<SubDevice *> subdevices;
PreemptionMode preemptionMode = PreemptionMode::Disabled;
ExecutionEnvironment *executionEnvironment = nullptr;
aub_stream::EngineType engineInstancedType = aub_stream::EngineType::NUM_ENGINES;
uint32_t defaultEngineIndex = 0;
uint32_t numSubDevices = 0;
std::atomic_uint32_t regularCommandQueuesCreatedWithinDeviceCount{0};
std::bitset<8> availableEnginesForCommandQueueusRoundRobin = 0;
uint32_t queuesPerEngineCount = 1;
void initializeEngineRoundRobinControls();
bool hasGenericSubDevices = false;
bool engineInstanced = false;
bool rootCsrCreated = false;
const uint32_t rootDeviceIndex;
GraphicsAllocation *debugSurface = nullptr;
SelectorCopyEngine selectorCopyEngine = {};
EngineControl *hpCopyEngine = nullptr;
DeviceBitfield deviceBitfield = 1;
uintptr_t specializedDevice = reinterpret_cast<uintptr_t>(nullptr);
GraphicsAllocation *rtMemoryBackedBuffer = nullptr;
std::vector<RTDispatchGlobalsInfo *> rtDispatchGlobalsInfos;
ISAPoolAllocator isaPoolAllocator;
std::unique_ptr<UsmMemAllocPoolsManager> deviceUsmMemAllocPoolsManager;
struct {
bool isValid = false;
std::array<uint8_t, ProductHelper::uuidSize> id;
} uuid;
bool generateUuidFromPciBusInfo(const PhysicalDevicePciBusInfo &pciBusInfo, std::array<uint8_t, ProductHelper::uuidSize> &uuid);
};
inline EngineControl &Device::getDefaultEngine() {
return allEngines[defaultEngineIndex];
}
inline SelectorCopyEngine &Device::getSelectorCopyEngine() {
return selectorCopyEngine;
}
} // namespace NEO