1368 lines
53 KiB
C++
1368 lines
53 KiB
C++
/*
|
|
* Copyright (C) 2018-2025 Intel Corporation
|
|
*
|
|
* SPDX-License-Identifier: MIT
|
|
*
|
|
*/
|
|
|
|
#include "shared/source/device/device.h"
|
|
|
|
#include "shared/source/ail/ail_configuration.h"
|
|
#include "shared/source/built_ins/sip.h"
|
|
#include "shared/source/command_stream/command_stream_receiver.h"
|
|
#include "shared/source/command_stream/preemption.h"
|
|
#include "shared/source/command_stream/submission_status.h"
|
|
#include "shared/source/debug_settings/debug_settings_manager.h"
|
|
#include "shared/source/debugger/debugger_l0.h"
|
|
#include "shared/source/device/sub_device.h"
|
|
#include "shared/source/execution_environment/execution_environment.h"
|
|
#include "shared/source/execution_environment/root_device_environment.h"
|
|
#include "shared/source/gmm_helper/gmm_helper.h"
|
|
#include "shared/source/helpers/api_specific_config.h"
|
|
#include "shared/source/helpers/compiler_product_helper.h"
|
|
#include "shared/source/helpers/gfx_core_helper.h"
|
|
#include "shared/source/helpers/ray_tracing_helper.h"
|
|
#include "shared/source/memory_manager/allocation_properties.h"
|
|
#include "shared/source/memory_manager/memory_manager.h"
|
|
#include "shared/source/memory_manager/unified_memory_pooling.h"
|
|
#include "shared/source/os_interface/driver_info.h"
|
|
#include "shared/source/os_interface/os_context.h"
|
|
#include "shared/source/os_interface/os_interface.h"
|
|
#include "shared/source/os_interface/os_time.h"
|
|
#include "shared/source/program/sync_buffer_handler.h"
|
|
#include "shared/source/release_helper/release_helper.h"
|
|
#include "shared/source/sip_external_lib/sip_external_lib.h"
|
|
#include "shared/source/unified_memory/usm_memory_support.h"
|
|
#include "shared/source/utilities/software_tags_manager.h"
|
|
|
|
namespace NEO {
|
|
|
|
decltype(&PerformanceCounters::create) Device::createPerformanceCountersFunc = PerformanceCounters::create;
|
|
extern CommandStreamReceiver *createCommandStream(ExecutionEnvironment &executionEnvironment,
|
|
uint32_t rootDeviceIndex,
|
|
const DeviceBitfield deviceBitfield);
|
|
|
|
Device::Device(ExecutionEnvironment *executionEnvironment, const uint32_t rootDeviceIndex)
|
|
: executionEnvironment(executionEnvironment), rootDeviceIndex(rootDeviceIndex), isaPoolAllocator(this), deviceTimestampPoolAllocator(this) {
|
|
this->executionEnvironment->incRefInternal();
|
|
this->executionEnvironment->rootDeviceEnvironments[rootDeviceIndex]->setDummyBlitProperties(rootDeviceIndex);
|
|
if (auto ailHelper = this->executionEnvironment->rootDeviceEnvironments[rootDeviceIndex]->getAILConfigurationHelper(); ailHelper && ailHelper->isAdjustMicrosecondResolutionRequired()) {
|
|
microsecondResolution = ailHelper->getMicrosecondResolution();
|
|
}
|
|
}
|
|
|
|
Device::~Device() {
|
|
DEBUG_BREAK_IF(nullptr == executionEnvironment->memoryManager.get());
|
|
|
|
if (performanceCounters) {
|
|
performanceCounters->shutdown();
|
|
}
|
|
|
|
for (auto &engine : allEngines) {
|
|
engine.commandStreamReceiver->flushBatchedSubmissions();
|
|
}
|
|
allEngines.clear();
|
|
finalizeRayTracing();
|
|
|
|
for (auto subdevice : subdevices) {
|
|
if (subdevice) {
|
|
delete subdevice;
|
|
}
|
|
}
|
|
subdevices.clear();
|
|
|
|
syncBufferHandler.reset();
|
|
isaPoolAllocator.releasePools();
|
|
deviceTimestampPoolAllocator.releasePools();
|
|
if (deviceUsmMemAllocPoolsManager) {
|
|
deviceUsmMemAllocPoolsManager->cleanup();
|
|
}
|
|
secondaryCsrs.clear();
|
|
executionEnvironment->memoryManager->releaseSecondaryOsContexts(this->getRootDeviceIndex());
|
|
commandStreamReceivers.clear();
|
|
executionEnvironment->memoryManager->waitForDeletions();
|
|
|
|
executionEnvironment->decRefInternal();
|
|
}
|
|
|
|
SubDevice *Device::createSubDevice(uint32_t subDeviceIndex) {
|
|
return Device::create<SubDevice>(executionEnvironment, subDeviceIndex, *getRootDevice());
|
|
}
|
|
|
|
bool Device::genericSubDevicesAllowed() {
|
|
auto deviceMask = executionEnvironment->rootDeviceEnvironments[getRootDeviceIndex()]->deviceAffinityMask.getGenericSubDevicesMask();
|
|
uint32_t subDeviceCount = GfxCoreHelper::getSubDevicesCount(&getHardwareInfo());
|
|
deviceBitfield = maxNBitValue(subDeviceCount);
|
|
|
|
if (!executionEnvironment->rootDeviceEnvironments[getRootDeviceIndex()]->isExposeSingleDeviceMode()) {
|
|
deviceBitfield &= deviceMask;
|
|
}
|
|
|
|
numSubDevices = static_cast<uint32_t>(deviceBitfield.count());
|
|
if (numSubDevices == 1 && (executionEnvironment->getDeviceHierarchyMode() != DeviceHierarchyMode::combined || subDeviceCount == 1)) {
|
|
numSubDevices = 0;
|
|
}
|
|
|
|
return (numSubDevices > 0);
|
|
}
|
|
|
|
bool Device::createGenericSubDevices() {
|
|
UNRECOVERABLE_IF(!subdevices.empty());
|
|
uint32_t subDeviceCount = GfxCoreHelper::getSubDevicesCount(&getHardwareInfo());
|
|
|
|
subdevices.resize(subDeviceCount, nullptr);
|
|
|
|
for (auto i = 0u; i < subDeviceCount; i++) {
|
|
if (!deviceBitfield.test(i)) {
|
|
continue;
|
|
}
|
|
auto subDevice = createSubDevice(i);
|
|
if (!subDevice) {
|
|
return false;
|
|
}
|
|
subdevices[i] = subDevice;
|
|
}
|
|
|
|
hasGenericSubDevices = true;
|
|
return true;
|
|
}
|
|
|
|
bool Device::createSubDevices() {
|
|
if (genericSubDevicesAllowed()) {
|
|
return createGenericSubDevices();
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
bool Device::createDeviceImpl() {
|
|
preemptionMode = PreemptionHelper::getDefaultPreemptionMode(getHardwareInfo());
|
|
|
|
if (!isSubDevice()) {
|
|
// init sub devices first
|
|
if (!createSubDevices()) {
|
|
return false;
|
|
}
|
|
|
|
// initialize common resources once
|
|
if (!initializeCommonResources()) {
|
|
return false;
|
|
}
|
|
}
|
|
|
|
// create engines
|
|
if (!initDeviceWithEngines()) {
|
|
return false;
|
|
}
|
|
|
|
// go back to root-device init
|
|
if (isSubDevice()) {
|
|
return true;
|
|
}
|
|
|
|
if (getL0Debugger()) {
|
|
getL0Debugger()->initialize();
|
|
}
|
|
|
|
// continue proper init for all devices
|
|
return initDeviceFully();
|
|
}
|
|
|
|
bool Device::initDeviceWithEngines() {
|
|
auto &productHelper = getProductHelper();
|
|
if (getDebugger() && productHelper.disableL3CacheForDebug(getHardwareInfo())) {
|
|
getGmmHelper()->forceAllResourcesUncached();
|
|
}
|
|
|
|
getRootDeviceEnvironmentRef().initOsTime();
|
|
|
|
initializeCaps();
|
|
|
|
return createEngines();
|
|
}
|
|
|
|
bool Device::initializeCommonResources() {
|
|
if (getExecutionEnvironment()->isDebuggingEnabled()) {
|
|
const auto rootDeviceIndex = getRootDeviceIndex();
|
|
auto rootDeviceEnvironment = getExecutionEnvironment()->rootDeviceEnvironments[rootDeviceIndex].get();
|
|
rootDeviceEnvironment->initDebuggerL0(this);
|
|
if (rootDeviceEnvironment->debugger == nullptr) {
|
|
NEO::printDebugString(NEO::debugManager.flags.PrintDebugMessages.get(), stderr, "Debug mode is not enabled in the system.\n");
|
|
return false;
|
|
}
|
|
}
|
|
|
|
auto &hwInfo = getHardwareInfo();
|
|
auto &gfxCoreHelper = getGfxCoreHelper();
|
|
auto debugSurfaceSize = gfxCoreHelper.getSipKernelMaxDbgSurfaceSize(hwInfo);
|
|
if (this->isStateSipRequired()) {
|
|
bool ret = SipKernel::initSipKernel(SipKernel::getSipKernelType(*this), *this);
|
|
UNRECOVERABLE_IF(!ret);
|
|
debugSurfaceSize = NEO::SipKernel::getSipKernel(*this, nullptr).getStateSaveAreaSize(this);
|
|
}
|
|
|
|
const bool isDebugSurfaceRequired = getL0Debugger();
|
|
if (isDebugSurfaceRequired) {
|
|
allocateDebugSurface(debugSurfaceSize);
|
|
}
|
|
|
|
bool usmPoolManagerEnabled = ApiSpecificConfig::isDeviceUsmPoolingEnabled() &&
|
|
getProductHelper().isDeviceUsmPoolAllocatorSupported();
|
|
|
|
if (NEO::debugManager.flags.EnableDeviceUsmAllocationPool.get() != -1) {
|
|
usmPoolManagerEnabled = NEO::debugManager.flags.EnableDeviceUsmAllocationPool.get() > 0;
|
|
}
|
|
|
|
if (usmPoolManagerEnabled && NEO::debugManager.flags.ExperimentalUSMAllocationReuseVersion.get() == 2) {
|
|
|
|
RootDeviceIndicesContainer rootDeviceIndices;
|
|
rootDeviceIndices.pushUnique(getRootDeviceIndex());
|
|
std::map<uint32_t, DeviceBitfield> deviceBitfields;
|
|
deviceBitfields.emplace(getRootDeviceIndex(), getDeviceBitfield());
|
|
deviceUsmMemAllocPoolsManager.reset(new UsmMemAllocPoolsManager(getMemoryManager(), rootDeviceIndices, deviceBitfields, this, InternalMemoryType::deviceUnifiedMemory));
|
|
}
|
|
return true;
|
|
}
|
|
|
|
void Device::initUsmReuseLimits() {
|
|
const bool usmDeviceAllocationsCacheEnabled = NEO::ApiSpecificConfig::isDeviceAllocationCacheEnabled() && this->getProductHelper().isDeviceUsmAllocationReuseSupported();
|
|
auto ailConfiguration = this->getAilConfigurationHelper();
|
|
const bool limitDeviceMemoryForReuse = ailConfiguration && ailConfiguration->limitAmountOfDeviceMemoryForRecycling();
|
|
auto fractionOfTotalMemoryForRecycling = (limitDeviceMemoryForReuse || !usmDeviceAllocationsCacheEnabled) ? 0 : 0.08;
|
|
if (debugManager.flags.ExperimentalEnableDeviceAllocationCache.get() != -1) {
|
|
fractionOfTotalMemoryForRecycling = 0.01 * std::min(100, debugManager.flags.ExperimentalEnableDeviceAllocationCache.get());
|
|
}
|
|
const auto totalDeviceMemory = this->getGlobalMemorySize(static_cast<uint32_t>(this->getDeviceBitfield().to_ulong()));
|
|
auto maxAllocationsSavedForReuseSize = static_cast<uint64_t>(fractionOfTotalMemoryForRecycling * totalDeviceMemory);
|
|
|
|
auto limitAllocationsReuseThreshold = static_cast<uint64_t>(0.8 * totalDeviceMemory);
|
|
const auto limitFlagValue = debugManager.flags.ExperimentalUSMAllocationReuseLimitThreshold.get();
|
|
if (limitFlagValue != -1) {
|
|
if (limitFlagValue == 0) {
|
|
limitAllocationsReuseThreshold = UsmReuseInfo::notLimited;
|
|
} else {
|
|
const auto fractionOfTotalMemoryToLimitReuse = limitFlagValue / 100.0;
|
|
limitAllocationsReuseThreshold = static_cast<uint64_t>(fractionOfTotalMemoryToLimitReuse * totalDeviceMemory);
|
|
}
|
|
}
|
|
this->usmReuseInfo.init(maxAllocationsSavedForReuseSize, limitAllocationsReuseThreshold);
|
|
}
|
|
|
|
bool Device::shouldLimitAllocationsReuse() const {
|
|
const bool isIntegratedDevice = getHardwareInfo().capabilityTable.isIntegratedDevice;
|
|
if (isIntegratedDevice) {
|
|
return getMemoryManager()->shouldLimitAllocationsReuse();
|
|
}
|
|
return getMemoryManager()->getUsedLocalMemorySize(getRootDeviceIndex()) >= this->usmReuseInfo.getLimitAllocationsReuseThreshold();
|
|
}
|
|
|
|
void Device::resetUsmAllocationPool(UsmMemAllocPool *usmMemAllocPool) {
|
|
this->usmMemAllocPool.reset(usmMemAllocPool);
|
|
}
|
|
|
|
void Device::cleanupUsmAllocationPool() {
|
|
if (usmMemAllocPool) {
|
|
usmMemAllocPool->cleanup();
|
|
}
|
|
}
|
|
|
|
bool Device::initDeviceFully() {
|
|
|
|
if (!getRootDeviceEnvironment().isExposeSingleDeviceMode()) {
|
|
for (auto &subdevice : this->subdevices) {
|
|
if (subdevice && !subdevice->initDeviceFully()) {
|
|
return false;
|
|
}
|
|
}
|
|
}
|
|
|
|
if (!initializeEngines()) {
|
|
return false;
|
|
}
|
|
|
|
getDefaultEngine().osContext->setDefaultContext(true);
|
|
|
|
for (auto &engine : allEngines) {
|
|
auto commandStreamReceiver = engine.commandStreamReceiver;
|
|
commandStreamReceiver->postInitFlagsSetup();
|
|
}
|
|
|
|
auto ®isteredEngines = executionEnvironment->memoryManager->getRegisteredEngines(rootDeviceIndex);
|
|
uint32_t defaultEngineIndexWithinMemoryManager = 0;
|
|
for (auto engineIndex = 0u; engineIndex < registeredEngines.size(); engineIndex++) {
|
|
OsContext *engine = registeredEngines[engineIndex].osContext;
|
|
if (engine == getDefaultEngine().osContext) {
|
|
defaultEngineIndexWithinMemoryManager = engineIndex;
|
|
break;
|
|
}
|
|
}
|
|
executionEnvironment->memoryManager->setDefaultEngineIndex(getRootDeviceIndex(), defaultEngineIndexWithinMemoryManager);
|
|
|
|
auto &hwInfo = getHardwareInfo();
|
|
if (getRootDeviceEnvironment().osInterface) {
|
|
if (hwInfo.capabilityTable.instrumentationEnabled) {
|
|
performanceCounters = createPerformanceCountersFunc(this);
|
|
}
|
|
}
|
|
|
|
executionEnvironment->memoryManager->setForce32BitAllocations(getDeviceInfo().force32BitAddresses);
|
|
|
|
if (debugManager.flags.EnableSWTags.get() && !getRootDeviceEnvironment().tagsManager->isInitialized()) {
|
|
getRootDeviceEnvironment().tagsManager->initialize(*this);
|
|
}
|
|
|
|
createBindlessHeapsHelper();
|
|
uuid.isValid = false;
|
|
initUsmReuseLimits();
|
|
|
|
if (getRootDeviceEnvironment().osInterface == nullptr) {
|
|
return true;
|
|
}
|
|
|
|
auto &gfxCoreHelper = getGfxCoreHelper();
|
|
auto &productHelper = getProductHelper();
|
|
if (debugManager.flags.EnableChipsetUniqueUUID.get() != 0) {
|
|
if (gfxCoreHelper.isChipsetUniqueUUIDSupported()) {
|
|
|
|
auto deviceIndex = isSubDevice() ? static_cast<SubDevice *>(this)->getSubDeviceIndex() + 1 : 0;
|
|
uuid.isValid = productHelper.getUuid(getRootDeviceEnvironment().osInterface->getDriverModel(), getRootDevice()->getNumSubDevices(), deviceIndex, uuid.id);
|
|
}
|
|
}
|
|
|
|
if (!uuid.isValid) {
|
|
PhysicalDevicePciBusInfo pciBusInfo = getRootDeviceEnvironment().osInterface->getDriverModel()->getPciBusInfo();
|
|
uuid.isValid = generateUuidFromPciBusInfo(pciBusInfo, uuid.id);
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
bool Device::createEngines() {
|
|
auto &gfxCoreHelper = getGfxCoreHelper();
|
|
auto gpgpuEngines = gfxCoreHelper.getGpgpuEngineInstances(getRootDeviceEnvironment());
|
|
|
|
for (auto &engine : gpgpuEngines) {
|
|
|
|
if (isSubDevice() && getRootDeviceEnvironment().isExposeSingleDeviceMode() && EngineHelpers::isComputeEngine(engine.first)) {
|
|
continue;
|
|
}
|
|
|
|
if (!createEngine(engine)) {
|
|
return false;
|
|
}
|
|
}
|
|
|
|
if (gfxCoreHelper.areSecondaryContextsSupported()) {
|
|
auto &hwInfo = this->getHardwareInfo();
|
|
|
|
auto hpCopyEngine = getHpCopyEngine();
|
|
|
|
for (auto engineGroupType : {EngineGroupType::compute, EngineGroupType::copy, EngineGroupType::linkedCopy}) {
|
|
auto engineGroup = tryGetRegularEngineGroup(engineGroupType);
|
|
|
|
if (!engineGroup) {
|
|
continue;
|
|
}
|
|
|
|
auto contextCount = gfxCoreHelper.getContextGroupContextsCount();
|
|
bool hpEngineAvailable = false;
|
|
|
|
if (NEO::EngineHelper::isCopyOnlyEngineType(engineGroupType)) {
|
|
hpEngineAvailable = hpCopyEngine != nullptr;
|
|
}
|
|
|
|
auto highPriorityContextCount = gfxCoreHelper.getContextGroupHpContextsCount(engineGroupType, hpEngineAvailable);
|
|
|
|
if (debugManager.flags.OverrideNumHighPriorityContexts.get() != -1) {
|
|
highPriorityContextCount = static_cast<uint32_t>(debugManager.flags.OverrideNumHighPriorityContexts.get());
|
|
}
|
|
|
|
if (getRootDeviceEnvironment().osInterface && getRootDeviceEnvironment().osInterface->getAggregatedProcessCount() > 1) {
|
|
const auto numProcesses = getRootDeviceEnvironment().osInterface->getAggregatedProcessCount();
|
|
|
|
contextCount = std::max(contextCount / numProcesses, 2u);
|
|
highPriorityContextCount = std::max(contextCount / 2, 1u);
|
|
|
|
} else {
|
|
if (engineGroupType == EngineGroupType::compute && hwInfo.gtSystemInfo.CCSInfo.NumberOfCCSEnabled > 1) {
|
|
contextCount = contextCount / hwInfo.gtSystemInfo.CCSInfo.NumberOfCCSEnabled;
|
|
highPriorityContextCount = highPriorityContextCount / hwInfo.gtSystemInfo.CCSInfo.NumberOfCCSEnabled;
|
|
}
|
|
|
|
if (engineGroupType == EngineGroupType::copy || engineGroupType == EngineGroupType::linkedCopy) {
|
|
gfxCoreHelper.adjustCopyEngineRegularContextCount(engineGroup->engines.size(), contextCount);
|
|
}
|
|
}
|
|
for (uint32_t engineIndex = 0; engineIndex < static_cast<uint32_t>(engineGroup->engines.size()); engineIndex++) {
|
|
auto engineType = engineGroup->engines[engineIndex].getEngineType();
|
|
|
|
if ((static_cast<uint32_t>(debugManager.flags.SecondaryContextEngineTypeMask.get()) & (1 << static_cast<uint32_t>(engineType))) == 0) {
|
|
continue;
|
|
}
|
|
|
|
UNRECOVERABLE_IF(secondaryEngines.find(engineType) != secondaryEngines.end());
|
|
auto &secondaryEnginesForType = secondaryEngines[engineType];
|
|
|
|
auto primaryEngine = engineGroup->engines[engineIndex];
|
|
|
|
createSecondaryContexts(primaryEngine, secondaryEnginesForType, contextCount, contextCount - highPriorityContextCount, highPriorityContextCount);
|
|
}
|
|
}
|
|
|
|
if (hpCopyEngine) {
|
|
auto engineType = hpCopyEngine->getEngineType();
|
|
if ((static_cast<uint32_t>(debugManager.flags.SecondaryContextEngineTypeMask.get()) & (1 << static_cast<uint32_t>(engineType))) != 0) {
|
|
|
|
UNRECOVERABLE_IF(secondaryEngines.find(engineType) != secondaryEngines.end());
|
|
auto &secondaryEnginesForType = secondaryEngines[engineType];
|
|
|
|
auto primaryEngine = *hpCopyEngine;
|
|
|
|
auto contextCount = gfxCoreHelper.getContextGroupContextsCount();
|
|
|
|
createSecondaryContexts(primaryEngine, secondaryEnginesForType, contextCount, 0, contextCount);
|
|
}
|
|
}
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
void Device::createSecondaryContexts(const EngineControl &primaryEngine, SecondaryContexts &secondaryEnginesForType, uint32_t contextCount, uint32_t regularPriorityCount, uint32_t highPriorityContextCount) {
|
|
secondaryEnginesForType.regularEnginesTotal = contextCount - highPriorityContextCount;
|
|
secondaryEnginesForType.highPriorityEnginesTotal = highPriorityContextCount;
|
|
secondaryEnginesForType.regularCounter = 0;
|
|
secondaryEnginesForType.highPriorityCounter = 0;
|
|
secondaryEnginesForType.assignedContextsCounter = 1;
|
|
|
|
NEO::EngineTypeUsage engineTypeUsage;
|
|
engineTypeUsage.first = primaryEngine.getEngineType();
|
|
engineTypeUsage.second = primaryEngine.getEngineUsage();
|
|
|
|
UNRECOVERABLE_IF(engineTypeUsage.second != EngineUsage::regular && engineTypeUsage.second != EngineUsage::highPriority);
|
|
|
|
secondaryEnginesForType.engines.push_back(primaryEngine);
|
|
|
|
for (uint32_t i = 1; i < contextCount; i++) {
|
|
|
|
if (i >= contextCount - highPriorityContextCount) {
|
|
engineTypeUsage.second = EngineUsage::highPriority;
|
|
}
|
|
this->createSecondaryEngine(primaryEngine.commandStreamReceiver, engineTypeUsage);
|
|
}
|
|
|
|
primaryEngine.osContext->setContextGroup(true);
|
|
}
|
|
|
|
void Device::allocateDebugSurface(size_t debugSurfaceSize) {
|
|
this->debugSurface = getMemoryManager()->allocateGraphicsMemoryWithProperties(
|
|
{getRootDeviceIndex(), true,
|
|
debugSurfaceSize,
|
|
NEO::AllocationType::debugContextSaveArea,
|
|
false,
|
|
false,
|
|
getDeviceBitfield()});
|
|
|
|
for (auto &subdevice : this->subdevices) {
|
|
if (subdevice) {
|
|
subdevice->debugSurface = this->debugSurface;
|
|
}
|
|
}
|
|
}
|
|
|
|
void Device::addEngineToEngineGroup(EngineControl &engine) {
|
|
auto &hardwareInfo = this->getHardwareInfo();
|
|
auto &gfxCoreHelper = getGfxCoreHelper();
|
|
auto &productHelper = getProductHelper();
|
|
auto &rootDeviceEnvironment = this->getRootDeviceEnvironment();
|
|
|
|
EngineGroupType engineGroupType = gfxCoreHelper.getEngineGroupType(engine.getEngineType(), engine.getEngineUsage(), hardwareInfo);
|
|
productHelper.adjustEngineGroupType(engineGroupType);
|
|
|
|
if (!gfxCoreHelper.isSubDeviceEngineSupported(rootDeviceEnvironment, getDeviceBitfield(), engine.getEngineType())) {
|
|
return;
|
|
}
|
|
|
|
if (EngineHelper::isCopyOnlyEngineType(engineGroupType) && debugManager.flags.EnableBlitterOperationsSupport.get() == 0) {
|
|
return;
|
|
}
|
|
|
|
if (this->regularEngineGroups.empty() || this->regularEngineGroups.back().engineGroupType != engineGroupType) {
|
|
this->regularEngineGroups.push_back(EngineGroupT{});
|
|
this->regularEngineGroups.back().engineGroupType = engineGroupType;
|
|
}
|
|
|
|
auto &engines = this->regularEngineGroups.back().engines;
|
|
|
|
if (engines.size() > 0 && engines.back().getEngineType() == engine.getEngineType()) {
|
|
return; // Type already added. Exposing multiple contexts for the same engine is disabled.
|
|
}
|
|
|
|
engines.push_back(engine);
|
|
}
|
|
|
|
std::unique_ptr<CommandStreamReceiver> Device::createCommandStreamReceiver() const {
|
|
return std::unique_ptr<CommandStreamReceiver>(createCommandStream(*executionEnvironment, getRootDeviceIndex(), getDeviceBitfield()));
|
|
}
|
|
|
|
bool Device::createEngine(EngineTypeUsage engineTypeUsage) {
|
|
const auto &hwInfo = getHardwareInfo();
|
|
auto &gfxCoreHelper = getGfxCoreHelper();
|
|
const auto engineType = engineTypeUsage.first;
|
|
const auto engineUsage = engineTypeUsage.second;
|
|
const auto defaultEngineType = getChosenEngineType(hwInfo);
|
|
const bool isDefaultEngine = defaultEngineType == engineType && engineUsage == EngineUsage::regular;
|
|
|
|
bool primaryEngineTypeAllowed = (EngineHelpers::isCcs(engineType) || EngineHelpers::isBcs(engineType));
|
|
|
|
if (debugManager.flags.SecondaryContextEngineTypeMask.get() != -1) {
|
|
primaryEngineTypeAllowed &= (static_cast<uint32_t>(debugManager.flags.SecondaryContextEngineTypeMask.get()) & (1 << static_cast<uint32_t>(engineType))) != 0;
|
|
}
|
|
|
|
const bool isPrimaryEngine = primaryEngineTypeAllowed && (engineUsage == EngineUsage::regular || engineUsage == EngineUsage::highPriority);
|
|
const bool useContextGroup = isPrimaryEngine && gfxCoreHelper.areSecondaryContextsSupported();
|
|
|
|
UNRECOVERABLE_IF(EngineHelpers::isBcs(engineType) && !hwInfo.capabilityTable.blitterOperationsSupported);
|
|
|
|
std::unique_ptr<CommandStreamReceiver> commandStreamReceiver = createCommandStreamReceiver();
|
|
if (!commandStreamReceiver) {
|
|
return false;
|
|
}
|
|
|
|
if (commandStreamReceiver->needsPageTableManager()) {
|
|
commandStreamReceiver->createPageTableManager();
|
|
}
|
|
|
|
EngineDescriptor engineDescriptor(engineTypeUsage, getDeviceBitfield(), preemptionMode, false);
|
|
|
|
auto osContext = executionEnvironment->memoryManager->createAndRegisterOsContext(commandStreamReceiver.get(), engineDescriptor);
|
|
osContext->setContextGroup(useContextGroup);
|
|
osContext->setIsPrimaryEngine(isPrimaryEngine);
|
|
osContext->setIsDefaultEngine(isDefaultEngine);
|
|
|
|
DEBUG_BREAK_IF(getDeviceBitfield().count() > 1 && !osContext->isRootDevice());
|
|
|
|
commandStreamReceiver->setupContext(*osContext);
|
|
|
|
if (osContext->isImmediateContextInitializationEnabled(isDefaultEngine)) {
|
|
if (!commandStreamReceiver->initializeResources(false, this->getPreemptionMode())) {
|
|
return false;
|
|
}
|
|
}
|
|
|
|
if (!commandStreamReceiver->initializeTagAllocation()) {
|
|
return false;
|
|
}
|
|
|
|
if (!commandStreamReceiver->createGlobalFenceAllocation()) {
|
|
return false;
|
|
}
|
|
|
|
EngineControl engine{commandStreamReceiver.get(), osContext};
|
|
allEngines.push_back(engine);
|
|
if (engineUsage == EngineUsage::regular) {
|
|
addEngineToEngineGroup(engine);
|
|
}
|
|
|
|
if (NEO::EngineHelpers::isBcs(engine.osContext->getEngineType()) && engine.osContext->isHighPriority()) {
|
|
hpCopyEngine = &allEngines[allEngines.size() - 1];
|
|
}
|
|
|
|
commandStreamReceivers.push_back(std::move(commandStreamReceiver));
|
|
|
|
return true;
|
|
}
|
|
|
|
bool Device::initializeEngines() {
|
|
uint32_t deviceCsrIndex = 0;
|
|
bool defaultEngineAlreadySet = false;
|
|
for (auto &engine : allEngines) {
|
|
bool firstSubmissionDone = false;
|
|
if (engine.osContext->getIsDefaultEngine() && !defaultEngineAlreadySet) {
|
|
defaultEngineAlreadySet = true;
|
|
defaultEngineIndex = deviceCsrIndex;
|
|
|
|
if (engine.osContext->isDebuggableContext() ||
|
|
this->isInitDeviceWithFirstSubmissionSupported(engine.commandStreamReceiver->getType())) {
|
|
if (SubmissionStatus::success != engine.commandStreamReceiver->initializeDeviceWithFirstSubmission(*this)) {
|
|
return false;
|
|
}
|
|
firstSubmissionDone = true;
|
|
}
|
|
}
|
|
|
|
auto &compilerProductHelper = this->getCompilerProductHelper();
|
|
auto heaplessEnabled = compilerProductHelper.isHeaplessModeEnabled(this->getHardwareInfo());
|
|
|
|
bool isHeaplessStateInit = engine.osContext->getIsPrimaryEngine() && compilerProductHelper.isHeaplessStateInitEnabled(heaplessEnabled);
|
|
bool initializeDevice = (engine.osContext->isPartOfContextGroup() || isHeaplessStateInit) && !firstSubmissionDone;
|
|
|
|
if (initializeDevice) {
|
|
engine.commandStreamReceiver->initializeResources(false, this->getPreemptionMode());
|
|
|
|
if (debugManager.flags.DeferStateInitSubmissionToFirstRegularUsage.get() != 1) {
|
|
engine.commandStreamReceiver->initializeDeviceWithFirstSubmission(*this);
|
|
}
|
|
}
|
|
deviceCsrIndex++;
|
|
}
|
|
return true;
|
|
}
|
|
|
|
bool Device::createSecondaryEngine(CommandStreamReceiver *primaryCsr, EngineTypeUsage engineTypeUsage) {
|
|
std::unique_ptr<CommandStreamReceiver> commandStreamReceiver = createCommandStreamReceiver();
|
|
if (!commandStreamReceiver) {
|
|
return false;
|
|
}
|
|
|
|
EngineDescriptor engineDescriptor(engineTypeUsage, primaryCsr->peekDeviceBitfield(), preemptionMode, primaryCsr->getOsContext().isRootDevice());
|
|
|
|
auto osContext = executionEnvironment->memoryManager->createAndRegisterSecondaryOsContext(&primaryCsr->getOsContext(), commandStreamReceiver.get(), engineDescriptor);
|
|
osContext->incRefInternal();
|
|
commandStreamReceiver->setupContext(*osContext);
|
|
commandStreamReceiver->setPrimaryCsr(primaryCsr);
|
|
|
|
DEBUG_BREAK_IF(osContext->getDeviceBitfield().count() > 1 && !osContext->isRootDevice());
|
|
|
|
EngineControl engine{commandStreamReceiver.get(), osContext};
|
|
|
|
secondaryEngines[engineTypeUsage.first].engines.push_back(engine);
|
|
secondaryCsrs.push_back(std::move(commandStreamReceiver));
|
|
|
|
return true;
|
|
}
|
|
|
|
EngineControl *Device::getSecondaryEngineCsr(EngineTypeUsage engineTypeUsage, bool allocateInterrupt) {
|
|
if (secondaryEngines.find(engineTypeUsage.first) == secondaryEngines.end()) {
|
|
return nullptr;
|
|
}
|
|
|
|
auto &secondaryEnginesForType = secondaryEngines[engineTypeUsage.first];
|
|
|
|
auto engineControl = secondaryEnginesForType.getEngine(engineTypeUsage.second);
|
|
|
|
bool isPrimaryContextInGroup = engineControl->osContext->getIsPrimaryEngine() && engineControl->osContext->isPartOfContextGroup();
|
|
|
|
if (isPrimaryContextInGroup && allocateInterrupt) {
|
|
// Context 0 is already pre-initialized. We need non-initialized context, to pass context creation flag.
|
|
// If all contexts are already initialized, just take next available. Interrupt request is only a hint.
|
|
engineControl = secondaryEnginesForType.getEngine(engineTypeUsage.second);
|
|
}
|
|
|
|
isPrimaryContextInGroup = engineControl->osContext->getIsPrimaryEngine() && engineControl->osContext->isPartOfContextGroup();
|
|
|
|
if (!isPrimaryContextInGroup) {
|
|
auto commandStreamReceiver = engineControl->commandStreamReceiver;
|
|
|
|
auto lock = commandStreamReceiver->obtainUniqueOwnership();
|
|
|
|
if (!commandStreamReceiver->isInitialized()) {
|
|
|
|
if (commandStreamReceiver->needsPageTableManager()) {
|
|
commandStreamReceiver->createPageTableManager();
|
|
}
|
|
|
|
EngineDescriptor engineDescriptor(engineTypeUsage, getDeviceBitfield(), preemptionMode, false);
|
|
|
|
if (!commandStreamReceiver->initializeResources(allocateInterrupt, this->getPreemptionMode())) {
|
|
return nullptr;
|
|
}
|
|
|
|
if (!commandStreamReceiver->initializeTagAllocation()) {
|
|
return nullptr;
|
|
}
|
|
}
|
|
}
|
|
return engineControl;
|
|
}
|
|
|
|
const HardwareInfo &Device::getHardwareInfo() const { return *getRootDeviceEnvironment().getHardwareInfo(); }
|
|
|
|
const DeviceInfo &Device::getDeviceInfo() const {
|
|
return deviceInfo;
|
|
}
|
|
|
|
double Device::getProfilingTimerResolution() {
|
|
return getOSTime()->getDynamicDeviceTimerResolution();
|
|
}
|
|
|
|
uint64_t Device::getProfilingTimerClock() {
|
|
return getOSTime()->getDynamicDeviceTimerClock();
|
|
}
|
|
|
|
bool Device::isBcsSplitSupported() {
|
|
auto &productHelper = getProductHelper();
|
|
auto bcsSplit = productHelper.isBlitSplitEnqueueWARequired(getHardwareInfo()) &&
|
|
Device::isBlitSplitEnabled();
|
|
|
|
if (debugManager.flags.SplitBcsCopy.get() != -1) {
|
|
bcsSplit = debugManager.flags.SplitBcsCopy.get();
|
|
}
|
|
|
|
return bcsSplit;
|
|
}
|
|
|
|
bool Device::isInitDeviceWithFirstSubmissionSupported(CommandStreamReceiverType csrType) {
|
|
return getProductHelper().isInitDeviceWithFirstSubmissionRequired(getHardwareInfo()) &&
|
|
Device::isInitDeviceWithFirstSubmissionEnabled(csrType);
|
|
}
|
|
|
|
double Device::getPlatformHostTimerResolution() const {
|
|
if (getOSTime()) {
|
|
return getOSTime()->getHostTimerResolution();
|
|
}
|
|
|
|
return 0.0;
|
|
}
|
|
|
|
GFXCORE_FAMILY Device::getRenderCoreFamily() const {
|
|
return this->getHardwareInfo().platform.eRenderCoreFamily;
|
|
}
|
|
|
|
Debugger *Device::getDebugger() const {
|
|
return getRootDeviceEnvironment().debugger.get();
|
|
}
|
|
|
|
bool Device::areSharedSystemAllocationsAllowed() const {
|
|
if ((debugManager.flags.EnableRecoverablePageFaults.get() == 0) || (debugManager.flags.EnableSharedSystemUsmSupport.get() == 0)) {
|
|
return false;
|
|
}
|
|
uint64_t mask = (UnifiedSharedMemoryFlags::access | UnifiedSharedMemoryFlags::atomicAccess | UnifiedSharedMemoryFlags::concurrentAccess | UnifiedSharedMemoryFlags::concurrentAtomicAccess);
|
|
if (getHardwareInfo().capabilityTable.sharedSystemMemCapabilities & mask) {
|
|
return true;
|
|
}
|
|
return false;
|
|
}
|
|
|
|
size_t Device::getEngineGroupIndexFromEngineGroupType(EngineGroupType engineGroupType) const {
|
|
for (size_t i = 0; i < regularEngineGroups.size(); i++) {
|
|
if (regularEngineGroups[i].engineGroupType == engineGroupType) {
|
|
return i;
|
|
}
|
|
}
|
|
UNRECOVERABLE_IF(true);
|
|
return 0;
|
|
}
|
|
|
|
EngineControl *Device::tryGetEngine(aub_stream::EngineType engineType, EngineUsage engineUsage) {
|
|
for (auto &engine : allEngines) {
|
|
if ((engine.getEngineType() == engineType) &&
|
|
(engine.getEngineUsage() == engineUsage)) {
|
|
return &engine;
|
|
}
|
|
}
|
|
|
|
if (debugManager.flags.OverrideInvalidEngineWithDefault.get()) {
|
|
return &allEngines[0];
|
|
}
|
|
return nullptr;
|
|
}
|
|
|
|
EngineControl &Device::getEngine(aub_stream::EngineType engineType, EngineUsage engineUsage) {
|
|
auto engine = tryGetEngine(engineType, engineUsage);
|
|
UNRECOVERABLE_IF(!engine);
|
|
return *engine;
|
|
}
|
|
|
|
EngineControl &Device::getEngine(uint32_t index) {
|
|
UNRECOVERABLE_IF(index >= allEngines.size());
|
|
return allEngines[index];
|
|
}
|
|
|
|
bool Device::getDeviceAndHostTimer(uint64_t *deviceTimestamp, uint64_t *hostTimestamp) const {
|
|
TimeStampData timeStamp;
|
|
auto retVal = getOSTime()->getGpuCpuTime(&timeStamp, true);
|
|
if (retVal == TimeQueryStatus::success) {
|
|
*hostTimestamp = timeStamp.cpuTimeinNS;
|
|
if (debugManager.flags.EnableDeviceBasedTimestamps.get()) {
|
|
auto resolution = getOSTime()->getDynamicDeviceTimerResolution();
|
|
*deviceTimestamp = getGfxCoreHelper().getGpuTimeStampInNS(timeStamp.gpuTimeStamp, resolution);
|
|
} else {
|
|
*deviceTimestamp = *hostTimestamp;
|
|
}
|
|
return true;
|
|
}
|
|
return false;
|
|
}
|
|
|
|
bool Device::getHostTimer(uint64_t *hostTimestamp) const {
|
|
return getOSTime()->getCpuTime(hostTimestamp);
|
|
}
|
|
|
|
uint32_t Device::getNumGenericSubDevices() const {
|
|
return (hasRootCsr() ? getNumSubDevices() : 0);
|
|
}
|
|
|
|
Device *Device::getSubDevice(uint32_t deviceId) const {
|
|
UNRECOVERABLE_IF(deviceId >= subdevices.size());
|
|
return subdevices[deviceId];
|
|
}
|
|
|
|
Device *Device::getNearestGenericSubDevice(uint32_t deviceId) {
|
|
if (subdevices.empty() || !hasRootCsr()) {
|
|
return this;
|
|
}
|
|
UNRECOVERABLE_IF(deviceId >= subdevices.size());
|
|
return subdevices[deviceId];
|
|
}
|
|
|
|
BindlessHeapsHelper *Device::getBindlessHeapsHelper() const {
|
|
return getRootDeviceEnvironment().getBindlessHeapsHelper();
|
|
}
|
|
|
|
GmmClientContext *Device::getGmmClientContext() const {
|
|
return getGmmHelper()->getClientContext();
|
|
}
|
|
|
|
void Device::allocateSyncBufferHandler() {
|
|
static std::mutex mutex;
|
|
std::unique_lock<std::mutex> lock(mutex);
|
|
if (syncBufferHandler.get() == nullptr) {
|
|
syncBufferHandler = std::make_unique<SyncBufferHandler>(*this);
|
|
UNRECOVERABLE_IF(syncBufferHandler.get() == nullptr);
|
|
}
|
|
}
|
|
|
|
uint64_t Device::getGlobalMemorySize(uint32_t deviceBitfield) const {
|
|
auto globalMemorySize = getMemoryManager()->isLocalMemorySupported(this->getRootDeviceIndex())
|
|
? getMemoryManager()->getLocalMemorySize(this->getRootDeviceIndex(), deviceBitfield)
|
|
: getMemoryManager()->getSystemSharedMemory(this->getRootDeviceIndex());
|
|
globalMemorySize = std::min(globalMemorySize, getMemoryManager()->getMaxApplicationAddress() + 1);
|
|
double percentOfGlobalMemoryAvailable = getPercentOfGlobalMemoryAvailable();
|
|
globalMemorySize = static_cast<uint64_t>(static_cast<double>(globalMemorySize) * percentOfGlobalMemoryAvailable);
|
|
|
|
if (debugManager.flags.ClDeviceGlobalMemSizeAvailablePercent.get() == -1 &&
|
|
!getMemoryManager()->isLocalMemorySupported(this->getRootDeviceIndex())) {
|
|
const uint64_t internalResourcesSize = 450 * MemoryConstants::megaByte;
|
|
globalMemorySize = std::max(static_cast<uint64_t>(0), globalMemorySize - internalResourcesSize);
|
|
}
|
|
|
|
return globalMemorySize;
|
|
}
|
|
|
|
double Device::getPercentOfGlobalMemoryAvailable() const {
|
|
if (debugManager.flags.ClDeviceGlobalMemSizeAvailablePercent.get() != -1) {
|
|
return 0.01 * static_cast<double>(debugManager.flags.ClDeviceGlobalMemSizeAvailablePercent.get());
|
|
}
|
|
return getMemoryManager()->getPercentOfGlobalMemoryAvailable(this->getRootDeviceIndex());
|
|
}
|
|
|
|
NEO::DebuggerL0 *Device::getL0Debugger() {
|
|
auto debugger = getDebugger();
|
|
return debugger ? static_cast<NEO::DebuggerL0 *>(debugger) : nullptr;
|
|
}
|
|
|
|
const std::vector<EngineControl> &Device::getAllEngines() const {
|
|
return this->allEngines;
|
|
}
|
|
|
|
const RootDeviceEnvironment &Device::getRootDeviceEnvironment() const {
|
|
return *executionEnvironment->rootDeviceEnvironments[getRootDeviceIndex()];
|
|
}
|
|
|
|
RootDeviceEnvironment &Device::getRootDeviceEnvironmentRef() const {
|
|
return *executionEnvironment->rootDeviceEnvironments[getRootDeviceIndex()];
|
|
}
|
|
|
|
bool Device::isFullRangeSvm() const {
|
|
return getRootDeviceEnvironment().isFullRangeSvm();
|
|
}
|
|
|
|
EngineControl &Device::getInternalEngine() {
|
|
if (this->allEngines[0].commandStreamReceiver->getType() != CommandStreamReceiverType::hardware) {
|
|
return this->getDefaultEngine();
|
|
}
|
|
|
|
auto engineType = getChosenEngineType(getHardwareInfo());
|
|
|
|
return this->getNearestGenericSubDevice(0)->getEngine(engineType, EngineUsage::internal);
|
|
}
|
|
|
|
EngineControl &Device::getNextEngineForCommandQueue() {
|
|
this->initializeEngineRoundRobinControls();
|
|
|
|
const auto &defaultEngine = this->getDefaultEngine();
|
|
|
|
const auto &hardwareInfo = this->getHardwareInfo();
|
|
const auto &gfxCoreHelper = getGfxCoreHelper();
|
|
const auto engineGroupType = gfxCoreHelper.getEngineGroupType(defaultEngine.getEngineType(), defaultEngine.getEngineUsage(), hardwareInfo);
|
|
|
|
const auto defaultEngineGroupIndex = this->getEngineGroupIndexFromEngineGroupType(engineGroupType);
|
|
auto &engineGroup = this->getRegularEngineGroups()[defaultEngineGroupIndex];
|
|
|
|
auto engineIndex = 0u;
|
|
do {
|
|
engineIndex = (this->regularCommandQueuesCreatedWithinDeviceCount++ / this->queuesPerEngineCount) % engineGroup.engines.size();
|
|
} while (!this->availableEnginesForCommandQueueusRoundRobin.test(engineIndex));
|
|
return engineGroup.engines[engineIndex];
|
|
}
|
|
|
|
EngineControl *Device::getInternalCopyEngine() {
|
|
if (!getHardwareInfo().capabilityTable.blitterOperationsSupported) {
|
|
return nullptr;
|
|
}
|
|
|
|
const auto &productHelper = this->getProductHelper();
|
|
auto expectedEngine = productHelper.getDefaultCopyEngine();
|
|
|
|
if (debugManager.flags.ForceBCSForInternalCopyEngine.get() != -1) {
|
|
expectedEngine = EngineHelpers::mapBcsIndexToEngineType(debugManager.flags.ForceBCSForInternalCopyEngine.get(), true);
|
|
}
|
|
|
|
for (auto &engine : allEngines) {
|
|
if (engine.osContext->getEngineType() == expectedEngine &&
|
|
engine.osContext->isInternalEngine()) {
|
|
return &engine;
|
|
}
|
|
}
|
|
return nullptr;
|
|
}
|
|
|
|
EngineControl *Device::getHpCopyEngine() {
|
|
return hpCopyEngine;
|
|
}
|
|
|
|
RTDispatchGlobalsInfo *Device::getRTDispatchGlobals(uint32_t maxBvhLevels) {
|
|
if (rtDispatchGlobalsInfos.size() == 0) {
|
|
return nullptr;
|
|
}
|
|
|
|
size_t last = rtDispatchGlobalsInfos.size() - 1;
|
|
if (maxBvhLevels > last) {
|
|
return nullptr;
|
|
}
|
|
|
|
for (size_t i = last; i >= maxBvhLevels; i--) {
|
|
if (rtDispatchGlobalsInfos[i] != nullptr) {
|
|
return rtDispatchGlobalsInfos[i];
|
|
}
|
|
|
|
if (i == 0) {
|
|
break;
|
|
}
|
|
}
|
|
|
|
allocateRTDispatchGlobals(maxBvhLevels);
|
|
return rtDispatchGlobalsInfos[maxBvhLevels];
|
|
}
|
|
|
|
void Device::initializeRayTracing(uint32_t maxBvhLevels) {
|
|
initializeRTMemoryBackedBuffer();
|
|
|
|
while (rtDispatchGlobalsInfos.size() <= maxBvhLevels) {
|
|
rtDispatchGlobalsInfos.push_back(nullptr);
|
|
}
|
|
}
|
|
|
|
void Device::initializeRTMemoryBackedBuffer() {
|
|
if (rtMemoryBackedBuffer == nullptr) {
|
|
auto size = RayTracingHelper::getTotalMemoryBackedFifoSize(*this);
|
|
|
|
AllocationProperties allocProps(getRootDeviceIndex(), true, size, AllocationType::buffer, true, getDeviceBitfield());
|
|
auto &productHelper = getProductHelper();
|
|
allocProps.flags.resource48Bit = productHelper.is48bResourceNeededForRayTracing();
|
|
allocProps.flags.isUSMDeviceAllocation = true;
|
|
|
|
rtMemoryBackedBuffer = getMemoryManager()->allocateGraphicsMemoryWithProperties(allocProps);
|
|
}
|
|
}
|
|
|
|
void Device::finalizeRayTracing() {
|
|
getMemoryManager()->freeGraphicsMemory(rtMemoryBackedBuffer);
|
|
rtMemoryBackedBuffer = nullptr;
|
|
|
|
for (size_t i = 0; i < rtDispatchGlobalsInfos.size(); i++) {
|
|
auto rtDispatchGlobalsInfo = rtDispatchGlobalsInfos[i];
|
|
if (rtDispatchGlobalsInfo == nullptr) {
|
|
continue;
|
|
}
|
|
for (size_t j = 0; j < rtDispatchGlobalsInfo->rtStacks.size(); j++) {
|
|
getMemoryManager()->freeGraphicsMemory(rtDispatchGlobalsInfo->rtStacks[j]);
|
|
rtDispatchGlobalsInfo->rtStacks[j] = nullptr;
|
|
}
|
|
|
|
getMemoryManager()->freeGraphicsMemory(rtDispatchGlobalsInfo->rtDispatchGlobalsArray);
|
|
rtDispatchGlobalsInfo->rtDispatchGlobalsArray = nullptr;
|
|
|
|
delete rtDispatchGlobalsInfos[i];
|
|
rtDispatchGlobalsInfos[i] = nullptr;
|
|
}
|
|
}
|
|
|
|
void Device::initializeEngineRoundRobinControls() {
|
|
if (this->availableEnginesForCommandQueueusRoundRobin.any()) {
|
|
return;
|
|
}
|
|
|
|
uint32_t queuesPerEngine = 1u;
|
|
|
|
if (debugManager.flags.CmdQRoundRobindEngineAssignNTo1.get() != -1) {
|
|
queuesPerEngine = debugManager.flags.CmdQRoundRobindEngineAssignNTo1.get();
|
|
}
|
|
|
|
this->queuesPerEngineCount = queuesPerEngine;
|
|
|
|
std::bitset<8> availableEngines = std::numeric_limits<uint8_t>::max();
|
|
|
|
if (debugManager.flags.CmdQRoundRobindEngineAssignBitfield.get() != -1) {
|
|
availableEngines = debugManager.flags.CmdQRoundRobindEngineAssignBitfield.get();
|
|
}
|
|
|
|
this->availableEnginesForCommandQueueusRoundRobin = availableEngines;
|
|
}
|
|
|
|
OSTime *Device::getOSTime() const { return getRootDeviceEnvironment().osTime.get(); };
|
|
|
|
bool Device::getUuid(std::array<uint8_t, ProductHelper::uuidSize> &uuid) {
|
|
if (this->uuid.isValid) {
|
|
uuid = this->uuid.id;
|
|
|
|
auto hwInfo = getHardwareInfo();
|
|
auto subDevicesCount = GfxCoreHelper::getSubDevicesCount(&hwInfo);
|
|
|
|
if (subDevicesCount > 1 && deviceBitfield.count() == 1) {
|
|
// In case of no sub devices created (bits set in affinity mask == 1), return the UUID of enabled sub-device.
|
|
uint32_t subDeviceIndex = Math::log2(static_cast<uint32_t>(deviceBitfield.to_ulong()));
|
|
uuid[ProductHelper::uuidSize - 1] = subDeviceIndex + 1;
|
|
}
|
|
}
|
|
return this->uuid.isValid;
|
|
}
|
|
|
|
bool Device::generateUuidFromPciBusInfo(const PhysicalDevicePciBusInfo &pciBusInfo, std::array<uint8_t, ProductHelper::uuidSize> &uuid) {
|
|
if (pciBusInfo.pciDomain != PhysicalDevicePciBusInfo::invalidValue) {
|
|
generateUuid(uuid);
|
|
|
|
/* Device UUID uniquely identifies a device within a system.
|
|
* We generate it based on device information along with PCI information
|
|
* This guarantees uniqueness of UUIDs on a system even when multiple
|
|
* identical Intel GPUs are present.
|
|
*/
|
|
|
|
/* We want to have UUID matching between different GPU APIs (including outside
|
|
* of compute_runtime project - i.e. other than L0 or OCL). This structure definition
|
|
* has been agreed upon by various Intel driver teams.
|
|
*
|
|
* Consult other driver teams before changing this.
|
|
*/
|
|
|
|
struct DeviceUUID {
|
|
uint16_t vendorID;
|
|
uint16_t deviceID;
|
|
uint16_t revisionID;
|
|
uint16_t pciDomain;
|
|
uint8_t pciBus;
|
|
uint8_t pciDev;
|
|
uint8_t pciFunc;
|
|
uint8_t reserved[4];
|
|
uint8_t subDeviceID;
|
|
};
|
|
static_assert(sizeof(DeviceUUID) == ProductHelper::uuidSize);
|
|
|
|
DeviceUUID deviceUUID{};
|
|
memcpy_s(&deviceUUID, sizeof(DeviceUUID), uuid.data(), uuid.size());
|
|
|
|
deviceUUID.pciDomain = static_cast<uint16_t>(pciBusInfo.pciDomain);
|
|
deviceUUID.pciBus = static_cast<uint8_t>(pciBusInfo.pciBus);
|
|
deviceUUID.pciDev = static_cast<uint8_t>(pciBusInfo.pciDevice);
|
|
deviceUUID.pciFunc = static_cast<uint8_t>(pciBusInfo.pciFunction);
|
|
|
|
memcpy_s(uuid.data(), uuid.size(), &deviceUUID, sizeof(DeviceUUID));
|
|
|
|
return true;
|
|
}
|
|
return false;
|
|
}
|
|
|
|
void Device::generateUuid(std::array<uint8_t, ProductHelper::uuidSize> &uuid) {
|
|
const auto &deviceInfo = getDeviceInfo();
|
|
const auto &hardwareInfo = getHardwareInfo();
|
|
uint32_t rootDeviceIndex = getRootDeviceIndex();
|
|
uint16_t vendorId = static_cast<uint16_t>(deviceInfo.vendorId);
|
|
uint16_t deviceId = static_cast<uint16_t>(hardwareInfo.platform.usDeviceID);
|
|
uint16_t revisionId = static_cast<uint16_t>(hardwareInfo.platform.usRevId);
|
|
uint8_t subDeviceId = isSubDevice() ? static_cast<SubDevice *>(this)->getSubDeviceIndex() + 1 : 0;
|
|
uuid.fill(0);
|
|
memcpy_s(&uuid[0], sizeof(uint32_t), &vendorId, sizeof(vendorId));
|
|
memcpy_s(&uuid[2], sizeof(uint32_t), &deviceId, sizeof(deviceId));
|
|
memcpy_s(&uuid[4], sizeof(uint32_t), &revisionId, sizeof(revisionId));
|
|
memcpy_s(&uuid[6], sizeof(uint32_t), &rootDeviceIndex, sizeof(rootDeviceIndex));
|
|
uuid[15] = subDeviceId;
|
|
}
|
|
|
|
void Device::getAdapterMask(uint32_t &nodeMask) {
|
|
if (verifyAdapterLuid()) {
|
|
nodeMask = 1;
|
|
}
|
|
}
|
|
|
|
const GfxCoreHelper &Device::getGfxCoreHelper() const {
|
|
return getRootDeviceEnvironment().getHelper<GfxCoreHelper>();
|
|
}
|
|
|
|
const ProductHelper &Device::getProductHelper() const {
|
|
return getRootDeviceEnvironment().getHelper<ProductHelper>();
|
|
}
|
|
|
|
const CompilerProductHelper &Device::getCompilerProductHelper() const {
|
|
return getRootDeviceEnvironment().getHelper<CompilerProductHelper>();
|
|
}
|
|
|
|
ReleaseHelper *Device::getReleaseHelper() const {
|
|
return getRootDeviceEnvironment().getReleaseHelper();
|
|
}
|
|
|
|
AILConfiguration *Device::getAilConfigurationHelper() const {
|
|
return getRootDeviceEnvironment().getAILConfigurationHelper();
|
|
}
|
|
|
|
void Device::stopDirectSubmissionAndWaitForCompletion() {
|
|
for (auto &engine : allEngines) {
|
|
auto csr = engine.commandStreamReceiver;
|
|
if (csr->isAnyDirectSubmissionEnabled()) {
|
|
csr->stopDirectSubmission(true, true);
|
|
}
|
|
}
|
|
}
|
|
|
|
bool Device::isAnyDirectSubmissionEnabled() const {
|
|
return this->isAnyDirectSubmissionEnabledImpl(false);
|
|
}
|
|
|
|
bool Device::isAnyDirectSubmissionLightEnabled() const {
|
|
return this->isAnyDirectSubmissionEnabledImpl(true);
|
|
}
|
|
|
|
bool Device::isAnyDirectSubmissionEnabledImpl(bool light) const {
|
|
for (const auto &engine : allEngines) {
|
|
auto enabled = light ? engine.osContext->isDirectSubmissionLightActive() : engine.commandStreamReceiver->isAnyDirectSubmissionEnabled();
|
|
if (enabled) {
|
|
return true;
|
|
}
|
|
}
|
|
return false;
|
|
}
|
|
|
|
void Device::allocateRTDispatchGlobals(uint32_t maxBvhLevels) {
|
|
UNRECOVERABLE_IF(rtDispatchGlobalsInfos.size() < maxBvhLevels + 1);
|
|
UNRECOVERABLE_IF(rtDispatchGlobalsInfos[maxBvhLevels] != nullptr);
|
|
|
|
uint32_t extraBytesLocal = 0;
|
|
uint32_t extraBytesGlobal = 0;
|
|
uint32_t dispatchGlobalsStride = MemoryConstants::pageSize64k;
|
|
UNRECOVERABLE_IF(RayTracingHelper::getDispatchGlobalSize() > dispatchGlobalsStride);
|
|
|
|
bool allocFailed = false;
|
|
|
|
uint32_t tileCount = 1;
|
|
if (this->getNumSubDevices() > 1) {
|
|
// If device encompasses multiple tiles, allocate RTDispatchGlobals for each tile
|
|
tileCount = this->getNumSubDevices();
|
|
}
|
|
|
|
auto dispatchGlobalsSize = tileCount * dispatchGlobalsStride;
|
|
auto rtStackSize = RayTracingHelper::getRTStackSizePerTile(*this, tileCount, maxBvhLevels, extraBytesLocal, extraBytesGlobal);
|
|
|
|
std::unique_ptr<RTDispatchGlobalsInfo> dispatchGlobalsInfo = std::make_unique<RTDispatchGlobalsInfo>();
|
|
|
|
auto &productHelper = getProductHelper();
|
|
|
|
GraphicsAllocation *dispatchGlobalsArrayAllocation = nullptr;
|
|
|
|
AllocationProperties arrayAllocProps(getRootDeviceIndex(), true, dispatchGlobalsSize,
|
|
AllocationType::globalSurface, true, getDeviceBitfield());
|
|
arrayAllocProps.flags.resource48Bit = productHelper.is48bResourceNeededForRayTracing();
|
|
arrayAllocProps.flags.isUSMDeviceAllocation = true;
|
|
dispatchGlobalsArrayAllocation = getMemoryManager()->allocateGraphicsMemoryWithProperties(arrayAllocProps);
|
|
|
|
if (dispatchGlobalsArrayAllocation == nullptr) {
|
|
return;
|
|
}
|
|
|
|
for (unsigned int tile = 0; tile < tileCount; tile++) {
|
|
DeviceBitfield deviceBitfield =
|
|
(tileCount == 1)
|
|
? this->getDeviceBitfield()
|
|
: subdevices[tile]->getDeviceBitfield();
|
|
|
|
AllocationProperties allocProps(getRootDeviceIndex(), true, rtStackSize, AllocationType::buffer, true, deviceBitfield);
|
|
allocProps.flags.resource48Bit = productHelper.is48bResourceNeededForRayTracing();
|
|
allocProps.flags.isUSMDeviceAllocation = true;
|
|
|
|
auto rtStackAllocation = getMemoryManager()->allocateGraphicsMemoryWithProperties(allocProps);
|
|
|
|
if (rtStackAllocation == nullptr) {
|
|
allocFailed = true;
|
|
break;
|
|
}
|
|
|
|
RTDispatchGlobals dispatchGlobals = {0};
|
|
|
|
dispatchGlobals.rtMemBasePtr = rtStackAllocation->getGpuAddress() + rtStackSize;
|
|
dispatchGlobals.callStackHandlerKSP = reinterpret_cast<uint64_t>(nullptr);
|
|
auto releaseHelper = getReleaseHelper();
|
|
dispatchGlobals.stackSizePerRay = releaseHelper ? releaseHelper->getStackSizePerRay() : 0;
|
|
|
|
auto rtStacksPerDss = RayTracingHelper::getNumRtStacksPerDss(*this);
|
|
dispatchGlobals.numDSSRTStacks = rtStacksPerDss;
|
|
dispatchGlobals.maxBVHLevels = maxBvhLevels;
|
|
uint32_t *dispatchGlobalsAsArray = reinterpret_cast<uint32_t *>(&dispatchGlobals);
|
|
dispatchGlobalsAsArray[7] = 1;
|
|
|
|
if (releaseHelper) {
|
|
bool heaplessEnabled = this->getCompilerProductHelper().isHeaplessModeEnabled(this->getHardwareInfo());
|
|
releaseHelper->adjustRTDispatchGlobals(static_cast<void *>(&dispatchGlobals), rtStacksPerDss, heaplessEnabled, maxBvhLevels);
|
|
}
|
|
|
|
MemoryTransferHelper::transferMemoryToAllocation(productHelper.isBlitCopyRequiredForLocalMemory(this->getRootDeviceEnvironment(), *dispatchGlobalsArrayAllocation),
|
|
*this,
|
|
dispatchGlobalsArrayAllocation,
|
|
tile * dispatchGlobalsStride,
|
|
&dispatchGlobals,
|
|
sizeof(RTDispatchGlobals));
|
|
|
|
dispatchGlobalsInfo->rtStacks.push_back(rtStackAllocation);
|
|
}
|
|
|
|
if (allocFailed) {
|
|
for (auto allocation : dispatchGlobalsInfo->rtStacks) {
|
|
getMemoryManager()->freeGraphicsMemory(allocation);
|
|
}
|
|
|
|
getMemoryManager()->freeGraphicsMemory(dispatchGlobalsArrayAllocation);
|
|
return;
|
|
}
|
|
|
|
dispatchGlobalsInfo->rtDispatchGlobalsArray = dispatchGlobalsArrayAllocation;
|
|
rtDispatchGlobalsInfos[maxBvhLevels] = dispatchGlobalsInfo.release();
|
|
}
|
|
|
|
MemoryManager *Device::getMemoryManager() const {
|
|
return executionEnvironment->memoryManager.get();
|
|
}
|
|
|
|
GmmHelper *Device::getGmmHelper() const {
|
|
return getRootDeviceEnvironment().getGmmHelper();
|
|
}
|
|
|
|
CompilerInterface *Device::getCompilerInterface() const {
|
|
return executionEnvironment->rootDeviceEnvironments[getRootDeviceIndex()]->getCompilerInterface();
|
|
}
|
|
|
|
SipExternalLib *Device::getSipExternalLibInterface() const {
|
|
return executionEnvironment->rootDeviceEnvironments[getRootDeviceIndex()]->getSipExternalLibInterface();
|
|
}
|
|
|
|
BuiltIns *Device::getBuiltIns() const {
|
|
return executionEnvironment->rootDeviceEnvironments[getRootDeviceIndex()]->getBuiltIns();
|
|
}
|
|
|
|
const EngineGroupT *Device::tryGetRegularEngineGroup(EngineGroupType engineGroupType) const {
|
|
for (auto &engineGroup : regularEngineGroups) {
|
|
if (engineGroup.engineGroupType == engineGroupType) {
|
|
return &engineGroup;
|
|
}
|
|
}
|
|
return nullptr;
|
|
}
|
|
|
|
EngineControl *SecondaryContexts::getEngine(EngineUsage usage) {
|
|
auto secondaryEngineIndex = 0;
|
|
|
|
std::lock_guard<std::mutex> guard(mutex);
|
|
|
|
if (usage == EngineUsage::highPriority) {
|
|
if (highPriorityEnginesTotal == 0) {
|
|
return nullptr;
|
|
}
|
|
// Use index from reserved HP pool
|
|
if (hpIndices.size() < highPriorityEnginesTotal) {
|
|
secondaryEngineIndex = (highPriorityCounter.fetch_add(1)) % (highPriorityEnginesTotal);
|
|
secondaryEngineIndex += regularEnginesTotal;
|
|
hpIndices.push_back(secondaryEngineIndex);
|
|
}
|
|
// Check if there is free index
|
|
else if (assignedContextsCounter < regularEnginesTotal) {
|
|
secondaryEngineIndex = assignedContextsCounter.fetch_add(1);
|
|
highPriorityCounter.fetch_add(1);
|
|
hpIndices.push_back(secondaryEngineIndex);
|
|
}
|
|
// Assign from existing indices
|
|
else {
|
|
auto index = (highPriorityCounter.fetch_add(1)) % (hpIndices.size());
|
|
secondaryEngineIndex = hpIndices[index];
|
|
}
|
|
|
|
if (engines[secondaryEngineIndex].osContext->getEngineUsage() != EngineUsage::highPriority) {
|
|
engines[secondaryEngineIndex].osContext->overrideEngineUsage(EngineUsage::highPriority);
|
|
}
|
|
|
|
} else if (usage == EngineUsage::regular) {
|
|
if (regularEnginesTotal == 0) {
|
|
return nullptr;
|
|
}
|
|
if (npIndices.size() == 0) {
|
|
regularCounter.fetch_add(1);
|
|
npIndices.push_back(secondaryEngineIndex);
|
|
}
|
|
// Check if there is free index
|
|
else if (assignedContextsCounter < regularEnginesTotal) {
|
|
secondaryEngineIndex = assignedContextsCounter.fetch_add(1);
|
|
regularCounter.fetch_add(1);
|
|
npIndices.push_back(secondaryEngineIndex);
|
|
}
|
|
// Assign from existing indices
|
|
else {
|
|
auto index = (regularCounter.fetch_add(1)) % (npIndices.size());
|
|
secondaryEngineIndex = npIndices[index];
|
|
}
|
|
} else {
|
|
DEBUG_BREAK_IF(true);
|
|
}
|
|
|
|
return &engines[secondaryEngineIndex];
|
|
}
|
|
|
|
void Device::stopDirectSubmissionForCopyEngine() {
|
|
auto internalBcsEngine = getInternalCopyEngine();
|
|
if (internalBcsEngine == nullptr || getHardwareInfo().featureTable.ftrBcsInfo.count() > 1) {
|
|
return;
|
|
}
|
|
auto regularBcsEngine = tryGetEngine(internalBcsEngine->osContext->getEngineType(), EngineUsage::regular);
|
|
if (regularBcsEngine == nullptr) {
|
|
return;
|
|
}
|
|
auto regularBcs = regularBcsEngine->commandStreamReceiver;
|
|
if (regularBcs->isAnyDirectSubmissionEnabled()) {
|
|
regularBcs->stopDirectSubmission(false, true);
|
|
}
|
|
}
|
|
|
|
std::vector<DeviceVector> Device::groupDevices(DeviceVector devices) {
|
|
std::map<PRODUCT_FAMILY, size_t> productsMap;
|
|
std::vector<DeviceVector> outDevices;
|
|
for (auto &device : devices) {
|
|
if (device) {
|
|
auto productFamily = device->getHardwareInfo().platform.eProductFamily;
|
|
auto result = productsMap.find(productFamily);
|
|
if (result == productsMap.end()) {
|
|
productsMap.insert({productFamily, productsMap.size()});
|
|
outDevices.push_back(DeviceVector{});
|
|
}
|
|
auto productId = productsMap[productFamily];
|
|
outDevices[productId].push_back(std::move(device));
|
|
}
|
|
}
|
|
std::sort(outDevices.begin(), outDevices.end(), [](DeviceVector &lhs, DeviceVector &rhs) -> bool {
|
|
auto &leftHwInfo = lhs[0]->getHardwareInfo(); // NOLINT(clang-analyzer-cplusplus.Move) - MSVC assumes usage of moved vector
|
|
auto &rightHwInfo = rhs[0]->getHardwareInfo(); // NOLINT(clang-analyzer-cplusplus.Move)
|
|
if (leftHwInfo.capabilityTable.isIntegratedDevice != rightHwInfo.capabilityTable.isIntegratedDevice) {
|
|
return rightHwInfo.capabilityTable.isIntegratedDevice;
|
|
}
|
|
return leftHwInfo.platform.eProductFamily > rightHwInfo.platform.eProductFamily;
|
|
});
|
|
return outDevices;
|
|
}
|
|
|
|
} // namespace NEO
|