Files
compute-runtime/shared/source/device/device.cpp
Jitendra Sharma f8c89fe984 Refactor memory manager so as to support device reset
Move out neoDevice dependent pieces of memory manager code into
separate methods. Those methods could be used for recreating a neoDevice
after a device reset is performed.

Related-To: LOCI-2615

Signed-off-by: Jitendra Sharma <jitendra.sharma@intel.com>
2021-10-19 07:16:29 +02:00

587 lines
20 KiB
C++

/*
* Copyright (C) 2018-2021 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include "shared/source/device/device.h"
#include "shared/source/command_stream/command_stream_receiver.h"
#include "shared/source/command_stream/experimental_command_buffer.h"
#include "shared/source/command_stream/preemption.h"
#include "shared/source/execution_environment/root_device_environment.h"
#include "shared/source/gmm_helper/gmm_helper.h"
#include "shared/source/helpers/hw_helper.h"
#include "shared/source/helpers/ray_tracing_helper.h"
#include "shared/source/memory_manager/memory_manager.h"
#include "shared/source/os_interface/driver_info.h"
#include "shared/source/os_interface/os_context.h"
#include "shared/source/os_interface/os_interface.h"
#include "shared/source/os_interface/os_time.h"
#include "shared/source/source_level_debugger/source_level_debugger.h"
#include "shared/source/utilities/software_tags_manager.h"
namespace NEO {
decltype(&PerformanceCounters::create) Device::createPerformanceCountersFunc = PerformanceCounters::create;
extern CommandStreamReceiver *createCommandStream(ExecutionEnvironment &executionEnvironment,
uint32_t rootDeviceIndex,
const DeviceBitfield deviceBitfield);
Device::Device(ExecutionEnvironment *executionEnvironment)
: executionEnvironment(executionEnvironment) {
this->executionEnvironment->incRefInternal();
}
Device::~Device() {
getMemoryManager()->freeGraphicsMemory(rtMemoryBackedBuffer);
rtMemoryBackedBuffer = nullptr;
DEBUG_BREAK_IF(nullptr == executionEnvironment->memoryManager.get());
if (performanceCounters) {
performanceCounters->shutdown();
}
for (auto &engine : engines) {
engine.commandStreamReceiver->flushBatchedSubmissions();
}
engines.clear();
for (auto subdevice : subdevices) {
if (subdevice) {
delete subdevice;
}
}
subdevices.clear();
syncBufferHandler.reset();
commandStreamReceivers.clear();
executionEnvironment->memoryManager->waitForDeletions();
executionEnvironment->decRefInternal();
}
SubDevice *Device::createSubDevice(uint32_t subDeviceIndex) {
return Device::create<SubDevice>(executionEnvironment, subDeviceIndex, *getRootDevice());
}
SubDevice *Device::createEngineInstancedSubDevice(uint32_t subDeviceIndex, aub_stream::EngineType engineType) {
return Device::create<SubDevice>(executionEnvironment, subDeviceIndex, *getRootDevice(), engineType);
}
bool Device::genericSubDevicesAllowed() {
auto deviceMask = executionEnvironment->rootDeviceEnvironments[getRootDeviceIndex()]->deviceAffinityMask.getGenericSubDevicesMask();
uint32_t subDeviceCount = HwHelper::getSubDevicesCount(&getHardwareInfo());
deviceBitfield = maxNBitValue(subDeviceCount);
deviceBitfield &= deviceMask;
numSubDevices = static_cast<uint32_t>(deviceBitfield.count());
if (numSubDevices == 1) {
numSubDevices = 0;
}
return (numSubDevices > 0);
}
bool Device::engineInstancedSubDevicesAllowed() {
bool notAllowed = !DebugManager.flags.EngineInstancedSubDevices.get();
notAllowed |= engineInstanced;
notAllowed |= (getHardwareInfo().gtSystemInfo.CCSInfo.NumberOfCCSEnabled < 2);
notAllowed |= ((HwHelper::getSubDevicesCount(&getHardwareInfo()) < 2) && (!DebugManager.flags.AllowSingleTileEngineInstancedSubDevices.get()));
if (notAllowed) {
return false;
}
UNRECOVERABLE_IF(deviceBitfield.count() != 1);
uint32_t subDeviceIndex = Math::log2(static_cast<uint32_t>(deviceBitfield.to_ulong()));
auto enginesMask = getRootDeviceEnvironment().deviceAffinityMask.getEnginesMask(subDeviceIndex);
auto ccsCount = getHardwareInfo().gtSystemInfo.CCSInfo.NumberOfCCSEnabled;
numSubDevices = std::min(ccsCount, static_cast<uint32_t>(enginesMask.count()));
if (numSubDevices == 1) {
numSubDevices = 0;
}
return (numSubDevices > 0);
}
bool Device::createEngineInstancedSubDevices() {
UNRECOVERABLE_IF(deviceBitfield.count() != 1);
UNRECOVERABLE_IF(!subdevices.empty());
uint32_t subDeviceIndex = Math::log2(static_cast<uint32_t>(deviceBitfield.to_ulong()));
auto enginesMask = getRootDeviceEnvironment().deviceAffinityMask.getEnginesMask(subDeviceIndex);
auto ccsCount = getHardwareInfo().gtSystemInfo.CCSInfo.NumberOfCCSEnabled;
subdevices.resize(ccsCount, nullptr);
for (uint32_t i = 0; i < ccsCount; i++) {
if (!enginesMask.test(i)) {
continue;
}
auto engineType = static_cast<aub_stream::EngineType>(aub_stream::EngineType::ENGINE_CCS + i);
auto subDevice = createEngineInstancedSubDevice(subDeviceIndex, engineType);
UNRECOVERABLE_IF(!subDevice);
subdevices[i] = subDevice;
}
return true;
}
bool Device::createGenericSubDevices() {
UNRECOVERABLE_IF(!subdevices.empty());
uint32_t subDeviceCount = HwHelper::getSubDevicesCount(&getHardwareInfo());
subdevices.resize(subDeviceCount, nullptr);
for (auto i = 0u; i < subDeviceCount; i++) {
if (!deviceBitfield.test(i)) {
continue;
}
auto subDevice = createSubDevice(i);
if (!subDevice) {
return false;
}
subdevices[i] = subDevice;
}
hasGenericSubDevices = true;
return true;
}
bool Device::createSubDevices() {
if (genericSubDevicesAllowed()) {
return createGenericSubDevices();
}
if (engineInstancedSubDevicesAllowed()) {
return createEngineInstancedSubDevices();
}
return true;
}
void Device::setAsEngineInstanced() {
if (subdevices.size() > 0) {
return;
}
UNRECOVERABLE_IF(deviceBitfield.count() != 1);
uint32_t subDeviceIndex = Math::log2(static_cast<uint32_t>(deviceBitfield.to_ulong()));
auto enginesMask = getRootDeviceEnvironment().deviceAffinityMask.getEnginesMask(subDeviceIndex);
if (enginesMask.count() != 1) {
return;
}
auto ccsCount = getHardwareInfo().gtSystemInfo.CCSInfo.NumberOfCCSEnabled;
for (uint32_t i = 0; i < ccsCount; i++) {
if (!enginesMask.test(i)) {
continue;
}
UNRECOVERABLE_IF(engineInstanced);
engineInstanced = true;
engineInstancedType = static_cast<aub_stream::EngineType>(aub_stream::EngineType::ENGINE_CCS + i);
}
UNRECOVERABLE_IF(!engineInstanced);
}
bool Device::createDeviceImpl() {
if (!createSubDevices()) {
return false;
}
setAsEngineInstanced();
auto &hwInfo = getHardwareInfo();
preemptionMode = PreemptionHelper::getDefaultPreemptionMode(hwInfo);
executionEnvironment->rootDeviceEnvironments[getRootDeviceIndex()]->initGmm();
if (!getDebugger()) {
this->executionEnvironment->rootDeviceEnvironments[getRootDeviceIndex()]->initDebugger();
}
if (!createEngines()) {
return false;
}
getDefaultEngine().osContext->setDefaultContext(true);
for (auto &engine : engines) {
auto commandStreamReceiver = engine.commandStreamReceiver;
commandStreamReceiver->postInitFlagsSetup();
}
uint32_t defaultEngineIndexWithinMemoryManager = 0;
for (auto engineIndex = 0u; engineIndex < executionEnvironment->memoryManager->getRegisteredEnginesCount(); engineIndex++) {
OsContext *engine = executionEnvironment->memoryManager->getRegisteredEngines()[engineIndex].osContext;
if (engine == getDefaultEngine().osContext) {
defaultEngineIndexWithinMemoryManager = engineIndex;
break;
}
}
executionEnvironment->memoryManager->setDefaultEngineIndex(getRootDeviceIndex(), defaultEngineIndexWithinMemoryManager);
getRootDeviceEnvironmentRef().initOsTime();
initializeCaps();
if (getOSTime()->getOSInterface()) {
if (hwInfo.capabilityTable.instrumentationEnabled) {
performanceCounters = createPerformanceCountersFunc(this);
}
}
executionEnvironment->memoryManager->setForce32BitAllocations(getDeviceInfo().force32BitAddressess);
if (DebugManager.flags.EnableExperimentalCommandBuffer.get() > 0) {
for (auto &engine : engines) {
auto csr = engine.commandStreamReceiver;
csr->setExperimentalCmdBuffer(std::make_unique<ExperimentalCommandBuffer>(csr, getDeviceInfo().profilingTimerResolution));
}
}
if (DebugManager.flags.EnableSWTags.get() && !getRootDeviceEnvironment().tagsManager->isInitialized()) {
getRootDeviceEnvironment().tagsManager->initialize(*this);
}
createBindlessHeapsHelper();
return true;
}
bool Device::createEngines() {
if (engineInstanced) {
return createEngine(0, {engineInstancedType, EngineUsage::Regular});
}
auto &hwInfo = getHardwareInfo();
auto gpgpuEngines = HwHelper::get(hwInfo.platform.eRenderCoreFamily).getGpgpuEngineInstances(hwInfo);
uint32_t deviceCsrIndex = 0;
for (auto &engine : gpgpuEngines) {
if (!createEngine(deviceCsrIndex++, engine)) {
return false;
}
}
return true;
}
void Device::addEngineToEngineGroup(EngineControl &engine) {
const HardwareInfo &hardwareInfo = this->getHardwareInfo();
const HwHelper &hwHelper = NEO::HwHelper::get(hardwareInfo.platform.eRenderCoreFamily);
const EngineGroupType engineGroupType = hwHelper.getEngineGroupType(engine.getEngineType(), engine.getEngineUsage(), hardwareInfo);
if (!hwHelper.isSubDeviceEngineSupported(hardwareInfo, getDeviceBitfield(), engine.getEngineType())) {
return;
}
if (hwHelper.isCopyOnlyEngineType(engineGroupType) && DebugManager.flags.EnableBlitterOperationsSupport.get() == 0) {
return;
}
const uint32_t engineGroupIndex = static_cast<uint32_t>(engineGroupType);
this->engineGroups[engineGroupIndex].push_back(engine);
}
std::unique_ptr<CommandStreamReceiver> Device::createCommandStreamReceiver() const {
return std::unique_ptr<CommandStreamReceiver>(createCommandStream(*executionEnvironment, getRootDeviceIndex(), getDeviceBitfield()));
}
bool Device::createEngine(uint32_t deviceCsrIndex, EngineTypeUsage engineTypeUsage) {
const auto &hwInfo = getHardwareInfo();
const auto engineType = engineTypeUsage.first;
const auto engineUsage = engineTypeUsage.second;
const auto defaultEngineType = engineInstanced ? this->engineInstancedType : getChosenEngineType(hwInfo);
const bool isDefaultEngine = defaultEngineType == engineType && engineUsage == EngineUsage::Regular;
const bool createAsEngineInstanced = engineInstanced && EngineHelpers::isCcs(engineType);
std::unique_ptr<CommandStreamReceiver> commandStreamReceiver = createCommandStreamReceiver();
if (!commandStreamReceiver) {
return false;
}
bool internalUsage = (engineTypeUsage.second == EngineUsage::Internal);
if (internalUsage) {
commandStreamReceiver->initializeDefaultsForInternalEngine();
}
if (commandStreamReceiver->needsPageTableManager()) {
commandStreamReceiver->createPageTableManager();
}
bool lowPriority = (engineTypeUsage.second == EngineUsage::LowPriority);
EngineDescriptor engineDescriptor(engineTypeUsage, getDeviceBitfield(), preemptionMode, false, createAsEngineInstanced);
auto osContext = executionEnvironment->memoryManager->createAndRegisterOsContext(commandStreamReceiver.get(), engineDescriptor);
if (osContext->isImmediateContextInitializationEnabled(isDefaultEngine)) {
osContext->ensureContextInitialized();
}
commandStreamReceiver->setupContext(*osContext);
if (!commandStreamReceiver->initializeTagAllocation()) {
return false;
}
if (!commandStreamReceiver->createGlobalFenceAllocation()) {
return false;
}
if (isDefaultEngine) {
defaultEngineIndex = deviceCsrIndex;
}
if (preemptionMode == PreemptionMode::MidThread && !commandStreamReceiver->createPreemptionAllocation()) {
return false;
}
EngineControl engine{commandStreamReceiver.get(), osContext};
engines.push_back(engine);
if (!lowPriority && !internalUsage) {
addEngineToEngineGroup(engine);
}
commandStreamReceivers.push_back(std::move(commandStreamReceiver));
return true;
}
const HardwareInfo &Device::getHardwareInfo() const { return *getRootDeviceEnvironment().getHardwareInfo(); }
const DeviceInfo &Device::getDeviceInfo() const {
return deviceInfo;
}
double Device::getProfilingTimerResolution() {
return getOSTime()->getDynamicDeviceTimerResolution(getHardwareInfo());
}
uint64_t Device::getProfilingTimerClock() {
return getOSTime()->getDynamicDeviceTimerClock(getHardwareInfo());
}
bool Device::isSimulation() const {
auto &hwInfo = getHardwareInfo();
bool simulation = hwInfo.capabilityTable.isSimulation(hwInfo.platform.usDeviceID);
for (const auto &engine : engines) {
if (engine.commandStreamReceiver->getType() != CommandStreamReceiverType::CSR_HW) {
simulation = true;
}
}
if (hwInfo.featureTable.ftrSimulationMode) {
simulation = true;
}
return simulation;
}
double Device::getPlatformHostTimerResolution() const {
if (getOSTime()) {
return getOSTime()->getHostTimerResolution();
}
return 0.0;
}
GFXCORE_FAMILY Device::getRenderCoreFamily() const {
return this->getHardwareInfo().platform.eRenderCoreFamily;
}
bool Device::isDebuggerActive() const {
return deviceInfo.debuggerActive;
}
const std::vector<EngineControl> *Device::getNonEmptyEngineGroup(size_t index) const {
auto nonEmptyGroupIndex = 0u;
for (auto groupIndex = 0u; groupIndex < CommonConstants::engineGroupCount; groupIndex++) {
const std::vector<EngineControl> *currentGroup = &engineGroups[groupIndex];
if (currentGroup->empty()) {
continue;
}
if (index == nonEmptyGroupIndex) {
return currentGroup;
}
nonEmptyGroupIndex++;
}
return nullptr;
}
size_t Device::getIndexOfNonEmptyEngineGroup(EngineGroupType engineGroupType) const {
const auto groupIndex = static_cast<size_t>(engineGroupType);
UNRECOVERABLE_IF(groupIndex >= CommonConstants::engineGroupCount);
UNRECOVERABLE_IF(engineGroups[groupIndex].empty());
size_t result = 0u;
for (auto currentGroupIndex = 0u; currentGroupIndex < groupIndex; currentGroupIndex++) {
if (!engineGroups[currentGroupIndex].empty()) {
result++;
}
}
return result;
}
EngineControl *Device::tryGetEngine(aub_stream::EngineType engineType, EngineUsage engineUsage) {
for (auto &engine : engines) {
if ((engine.getEngineType() == engineType) &&
(engine.getEngineUsage() == engineUsage)) {
return &engine;
}
}
if (DebugManager.flags.OverrideInvalidEngineWithDefault.get()) {
return &engines[0];
}
return nullptr;
}
EngineControl &Device::getEngine(aub_stream::EngineType engineType, EngineUsage engineUsage) {
auto engine = tryGetEngine(engineType, engineUsage);
UNRECOVERABLE_IF(!engine);
return *engine;
}
EngineControl &Device::getEngine(uint32_t index) {
UNRECOVERABLE_IF(index >= engines.size());
return engines[index];
}
bool Device::getDeviceAndHostTimer(uint64_t *deviceTimestamp, uint64_t *hostTimestamp) const {
bool retVal = getOSTime()->getCpuTime(hostTimestamp);
if (retVal) {
TimeStampData timeStamp;
retVal = getOSTime()->getCpuGpuTime(&timeStamp);
if (retVal) {
if (DebugManager.flags.EnableDeviceBasedTimestamps.get()) {
auto resolution = getOSTime()->getDynamicDeviceTimerResolution(getHardwareInfo());
*deviceTimestamp = static_cast<uint64_t>(timeStamp.GPUTimeStamp * resolution);
} else
*deviceTimestamp = *hostTimestamp;
}
}
return retVal;
}
bool Device::getHostTimer(uint64_t *hostTimestamp) const {
return getOSTime()->getCpuTime(hostTimestamp);
}
uint32_t Device::getNumGenericSubDevices() const {
return (hasRootCsr() ? getNumSubDevices() : 0);
}
Device *Device::getSubDevice(uint32_t deviceId) const {
UNRECOVERABLE_IF(deviceId >= subdevices.size());
return subdevices[deviceId];
}
Device *Device::getNearestGenericSubDevice(uint32_t deviceId) {
/*
* EngineInstanced: Upper level
* Generic SubDevice: 'this'
* RootCsr Device: Next level SubDevice (generic)
*/
if (engineInstanced) {
return getRootDevice()->getNearestGenericSubDevice(Math::log2(static_cast<uint32_t>(deviceBitfield.to_ulong())));
}
if (subdevices.empty() || !hasRootCsr()) {
return const_cast<Device *>(this);
}
UNRECOVERABLE_IF(deviceId >= subdevices.size());
return subdevices[deviceId];
}
BindlessHeapsHelper *Device::getBindlessHeapsHelper() const {
return getRootDeviceEnvironment().getBindlessHeapsHelper();
}
GmmClientContext *Device::getGmmClientContext() const {
return getGmmHelper()->getClientContext();
}
void Device::allocateSyncBufferHandler() {
static std::mutex mutex;
std::unique_lock<std::mutex> lock(mutex);
if (syncBufferHandler.get() == nullptr) {
syncBufferHandler = std::make_unique<SyncBufferHandler>(*this);
UNRECOVERABLE_IF(syncBufferHandler.get() == nullptr);
}
}
uint64_t Device::getGlobalMemorySize(uint32_t deviceBitfield) const {
auto globalMemorySize = getMemoryManager()->isLocalMemorySupported(this->getRootDeviceIndex())
? getMemoryManager()->getLocalMemorySize(this->getRootDeviceIndex(), deviceBitfield)
: getMemoryManager()->getSystemSharedMemory(this->getRootDeviceIndex());
globalMemorySize = std::min(globalMemorySize, getMemoryManager()->getMaxApplicationAddress() + 1);
double percentOfGlobalMemoryAvailable = getPercentOfGlobalMemoryAvailable();
globalMemorySize = static_cast<uint64_t>(static_cast<double>(globalMemorySize) * percentOfGlobalMemoryAvailable);
return globalMemorySize;
}
double Device::getPercentOfGlobalMemoryAvailable() const {
if (DebugManager.flags.ClDeviceGlobalMemSizeAvailablePercent.get() != -1) {
return 0.01 * static_cast<double>(DebugManager.flags.ClDeviceGlobalMemSizeAvailablePercent.get());
}
return getMemoryManager()->getPercentOfGlobalMemoryAvailable(this->getRootDeviceIndex());
}
NEO::SourceLevelDebugger *Device::getSourceLevelDebugger() {
auto debugger = getDebugger();
if (debugger) {
return debugger->isLegacy() ? static_cast<NEO::SourceLevelDebugger *>(debugger) : nullptr;
}
return nullptr;
}
const std::vector<EngineControl> &Device::getEngines() const {
return this->engines;
}
EngineControl &Device::getInternalEngine() {
if (this->engines[0].commandStreamReceiver->getType() != CommandStreamReceiverType::CSR_HW) {
return this->getDefaultEngine();
}
auto engineType = getChosenEngineType(getHardwareInfo());
return this->getNearestGenericSubDevice(0)->getEngine(engineType, EngineUsage::Internal);
}
EngineControl *Device::getInternalCopyEngine() {
for (auto &engine : engines) {
if (engine.osContext->getEngineType() == aub_stream::ENGINE_BCS &&
engine.osContext->isInternalEngine()) {
return &engine;
}
}
return nullptr;
}
void Device::initializeRayTracing() {
if (rtMemoryBackedBuffer == nullptr) {
auto size = RayTracingHelper::getTotalMemoryBackedFifoSize(*this);
rtMemoryBackedBuffer = getMemoryManager()->allocateGraphicsMemoryWithProperties({getRootDeviceIndex(), size, GraphicsAllocation::AllocationType::BUFFER, getDeviceBitfield()});
}
}
OSTime *Device::getOSTime() const { return getRootDeviceEnvironment().osTime.get(); };
} // namespace NEO