/* * Copyright (C) 2018-2024 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/device/device.h" #include "shared/source/built_ins/sip.h" #include "shared/source/command_stream/command_stream_receiver.h" #include "shared/source/command_stream/preemption.h" #include "shared/source/command_stream/submission_status.h" #include "shared/source/debug_settings/debug_settings_manager.h" #include "shared/source/debugger/debugger_l0.h" #include "shared/source/device/sub_device.h" #include "shared/source/execution_environment/execution_environment.h" #include "shared/source/execution_environment/root_device_environment.h" #include "shared/source/gmm_helper/gmm_helper.h" #include "shared/source/helpers/api_specific_config.h" #include "shared/source/helpers/compiler_product_helper.h" #include "shared/source/helpers/gfx_core_helper.h" #include "shared/source/helpers/ray_tracing_helper.h" #include "shared/source/memory_manager/allocation_properties.h" #include "shared/source/memory_manager/memory_manager.h" #include "shared/source/os_interface/driver_info.h" #include "shared/source/os_interface/os_context.h" #include "shared/source/os_interface/os_interface.h" #include "shared/source/os_interface/os_time.h" #include "shared/source/program/sync_buffer_handler.h" #include "shared/source/utilities/software_tags_manager.h" namespace NEO { decltype(&PerformanceCounters::create) Device::createPerformanceCountersFunc = PerformanceCounters::create; extern CommandStreamReceiver *createCommandStream(ExecutionEnvironment &executionEnvironment, uint32_t rootDeviceIndex, const DeviceBitfield deviceBitfield); Device::Device(ExecutionEnvironment *executionEnvironment, const uint32_t rootDeviceIndex) : executionEnvironment(executionEnvironment), rootDeviceIndex(rootDeviceIndex), isaPoolAllocator(this) { this->executionEnvironment->incRefInternal(); this->executionEnvironment->rootDeviceEnvironments[rootDeviceIndex]->setDummyBlitProperties(rootDeviceIndex); } Device::~Device() { finalizeRayTracing(); DEBUG_BREAK_IF(nullptr == executionEnvironment->memoryManager.get()); if (performanceCounters) { performanceCounters->shutdown(); } for (auto &engine : allEngines) { engine.commandStreamReceiver->flushBatchedSubmissions(); } allEngines.clear(); for (auto subdevice : subdevices) { if (subdevice) { delete subdevice; } } subdevices.clear(); syncBufferHandler.reset(); isaPoolAllocator.releasePools(); secondaryCsrs.clear(); executionEnvironment->memoryManager->releaseSecondaryOsContexts(this->getRootDeviceIndex()); commandStreamReceivers.clear(); executionEnvironment->memoryManager->waitForDeletions(); executionEnvironment->decRefInternal(); } SubDevice *Device::createSubDevice(uint32_t subDeviceIndex) { return Device::create(executionEnvironment, subDeviceIndex, *getRootDevice()); } SubDevice *Device::createEngineInstancedSubDevice(uint32_t subDeviceIndex, aub_stream::EngineType engineType) { return Device::create(executionEnvironment, subDeviceIndex, *getRootDevice(), engineType); } bool Device::genericSubDevicesAllowed() { auto deviceMask = executionEnvironment->rootDeviceEnvironments[getRootDeviceIndex()]->deviceAffinityMask.getGenericSubDevicesMask(); uint32_t subDeviceCount = GfxCoreHelper::getSubDevicesCount(&getHardwareInfo()); deviceBitfield = maxNBitValue(subDeviceCount); deviceBitfield &= deviceMask; numSubDevices = static_cast(deviceBitfield.count()); if (numSubDevices == 1) { numSubDevices = 0; } return (numSubDevices > 0); } bool Device::engineInstancedSubDevicesAllowed() { bool notAllowed = !debugManager.flags.EngineInstancedSubDevices.get(); notAllowed |= engineInstanced; notAllowed |= (getHardwareInfo().gtSystemInfo.CCSInfo.NumberOfCCSEnabled < 2); notAllowed |= ((GfxCoreHelper::getSubDevicesCount(&getHardwareInfo()) < 2) && (!debugManager.flags.AllowSingleTileEngineInstancedSubDevices.get())); if (notAllowed) { return false; } UNRECOVERABLE_IF(deviceBitfield.count() != 1); uint32_t subDeviceIndex = Math::log2(static_cast(deviceBitfield.to_ulong())); auto enginesMask = getRootDeviceEnvironment().deviceAffinityMask.getEnginesMask(subDeviceIndex); auto ccsCount = getHardwareInfo().gtSystemInfo.CCSInfo.NumberOfCCSEnabled; numSubDevices = std::min(ccsCount, static_cast(enginesMask.count())); if (numSubDevices == 1) { numSubDevices = 0; } return (numSubDevices > 0); } bool Device::createEngineInstancedSubDevices() { UNRECOVERABLE_IF(deviceBitfield.count() != 1); UNRECOVERABLE_IF(!subdevices.empty()); uint32_t subDeviceIndex = Math::log2(static_cast(deviceBitfield.to_ulong())); auto enginesMask = getRootDeviceEnvironment().deviceAffinityMask.getEnginesMask(subDeviceIndex); auto ccsCount = getHardwareInfo().gtSystemInfo.CCSInfo.NumberOfCCSEnabled; subdevices.resize(ccsCount, nullptr); for (uint32_t i = 0; i < ccsCount; i++) { if (!enginesMask.test(i)) { continue; } auto engineType = static_cast(aub_stream::EngineType::ENGINE_CCS + i); auto subDevice = createEngineInstancedSubDevice(subDeviceIndex, engineType); UNRECOVERABLE_IF(!subDevice); subdevices[i] = subDevice; } return true; } bool Device::createGenericSubDevices() { UNRECOVERABLE_IF(!subdevices.empty()); uint32_t subDeviceCount = GfxCoreHelper::getSubDevicesCount(&getHardwareInfo()); subdevices.resize(subDeviceCount, nullptr); for (auto i = 0u; i < subDeviceCount; i++) { if (!deviceBitfield.test(i)) { continue; } auto subDevice = createSubDevice(i); if (!subDevice) { return false; } subdevices[i] = subDevice; } hasGenericSubDevices = true; return true; } bool Device::createSubDevices() { if (genericSubDevicesAllowed()) { return createGenericSubDevices(); } if (engineInstancedSubDevicesAllowed()) { return createEngineInstancedSubDevices(); } return true; } void Device::setAsEngineInstanced() { if (subdevices.size() > 0) { return; } UNRECOVERABLE_IF(deviceBitfield.count() != 1); uint32_t subDeviceIndex = Math::log2(static_cast(deviceBitfield.to_ulong())); auto enginesMask = getRootDeviceEnvironment().deviceAffinityMask.getEnginesMask(subDeviceIndex); if (enginesMask.count() != 1) { return; } auto ccsCount = getHardwareInfo().gtSystemInfo.CCSInfo.NumberOfCCSEnabled; for (uint32_t i = 0; i < ccsCount; i++) { if (!enginesMask.test(i)) { continue; } UNRECOVERABLE_IF(engineInstanced); engineInstanced = true; engineInstancedType = static_cast(aub_stream::EngineType::ENGINE_CCS + i); } UNRECOVERABLE_IF(!engineInstanced); } bool Device::createDeviceImpl() { // init sub devices first if (!createSubDevices()) { return false; } // create engines if (!initDeviceWithEngines()) { return false; } // go back to root-device init if (isSubDevice()) { return true; } // initialize common resources once initializeCommonResources(); // continue proper init for all devices return initDeviceFully(); } bool Device::initDeviceWithEngines() { setAsEngineInstanced(); auto &hwInfo = getHardwareInfo(); preemptionMode = PreemptionHelper::getDefaultPreemptionMode(hwInfo); auto &productHelper = getProductHelper(); if (getDebugger() && productHelper.disableL3CacheForDebug(hwInfo)) { getGmmHelper()->forceAllResourcesUncached(); } getRootDeviceEnvironmentRef().initOsTime(); initializeCaps(); return createEngines(); } void Device::initializeCommonResources() { if (getExecutionEnvironment()->isDebuggingEnabled()) { const auto rootDeviceIndex = getRootDeviceIndex(); auto rootDeviceEnvironment = getExecutionEnvironment()->rootDeviceEnvironments[rootDeviceIndex].get(); rootDeviceEnvironment->initDebuggerL0(this); if (rootDeviceEnvironment->debugger == nullptr) { NEO::printDebugString(NEO::debugManager.flags.PrintDebugMessages.get(), stderr, "Debug mode is not enabled in the system.\n"); } } auto &hwInfo = getHardwareInfo(); auto &gfxCoreHelper = getGfxCoreHelper(); auto debugSurfaceSize = gfxCoreHelper.getSipKernelMaxDbgSurfaceSize(hwInfo); if (this->isStateSipRequired()) { bool ret = SipKernel::initSipKernel(SipKernel::getSipKernelType(*this), *this); UNRECOVERABLE_IF(!ret); debugSurfaceSize = NEO::SipKernel::getSipKernel(*this, nullptr).getStateSaveAreaSize(this); } const bool allocateDebugSurface = getL0Debugger(); if (allocateDebugSurface) { debugSurface = getMemoryManager()->allocateGraphicsMemoryWithProperties( {getRootDeviceIndex(), true, debugSurfaceSize, NEO::AllocationType::debugContextSaveArea, false, false, getDeviceBitfield()}); } } bool Device::initDeviceFully() { for (auto &subdevice : this->subdevices) { if (subdevice && !subdevice->initDeviceFully()) { return false; } } if (!initializeEngines()) { return false; } getDefaultEngine().osContext->setDefaultContext(true); for (auto &engine : allEngines) { auto commandStreamReceiver = engine.commandStreamReceiver; commandStreamReceiver->postInitFlagsSetup(); } auto ®isteredEngines = executionEnvironment->memoryManager->getRegisteredEngines(rootDeviceIndex); uint32_t defaultEngineIndexWithinMemoryManager = 0; for (auto engineIndex = 0u; engineIndex < registeredEngines.size(); engineIndex++) { OsContext *engine = registeredEngines[engineIndex].osContext; if (engine == getDefaultEngine().osContext) { defaultEngineIndexWithinMemoryManager = engineIndex; break; } } executionEnvironment->memoryManager->setDefaultEngineIndex(getRootDeviceIndex(), defaultEngineIndexWithinMemoryManager); auto &hwInfo = getHardwareInfo(); if (getRootDeviceEnvironment().osInterface) { if (hwInfo.capabilityTable.instrumentationEnabled) { performanceCounters = createPerformanceCountersFunc(this); } } executionEnvironment->memoryManager->setForce32BitAllocations(getDeviceInfo().force32BitAddressess); if (debugManager.flags.EnableSWTags.get() && !getRootDeviceEnvironment().tagsManager->isInitialized()) { getRootDeviceEnvironment().tagsManager->initialize(*this); } createBindlessHeapsHelper(); if (!isEngineInstanced()) { uuid.isValid = false; if (getRootDeviceEnvironment().osInterface == nullptr) { return true; } auto &gfxCoreHelper = getGfxCoreHelper(); auto &productHelper = getProductHelper(); if (debugManager.flags.EnableChipsetUniqueUUID.get() != 0) { if (gfxCoreHelper.isChipsetUniqueUUIDSupported()) { auto deviceIndex = isSubDevice() ? static_cast(this)->getSubDeviceIndex() + 1 : 0; uuid.isValid = productHelper.getUuid(getRootDeviceEnvironment().osInterface->getDriverModel(), getRootDevice()->getNumSubDevices(), deviceIndex, uuid.id); } } if (!uuid.isValid) { PhysicalDevicePciBusInfo pciBusInfo = getRootDeviceEnvironment().osInterface->getDriverModel()->getPciBusInfo(); uuid.isValid = generateUuidFromPciBusInfo(pciBusInfo, uuid.id); } } return true; } bool Device::createEngines() { if (engineInstanced) { return createEngine({engineInstancedType, EngineUsage::regular}); } auto &gfxCoreHelper = getGfxCoreHelper(); auto gpgpuEngines = gfxCoreHelper.getGpgpuEngineInstances(getRootDeviceEnvironment()); for (auto &engine : gpgpuEngines) { if (!createEngine(engine)) { return false; } } if (gfxCoreHelper.areSecondaryContextsSupported()) { for (auto engineGroupType : {EngineGroupType::compute, EngineGroupType::copy, EngineGroupType::linkedCopy}) { auto engineGroup = tryGetRegularEngineGroup(engineGroupType); if (!engineGroup) { continue; } auto contextCount = gfxCoreHelper.getContextGroupContextsCount(); auto highPriorityContextCount = std::min(contextCount / 2, 4u); if (debugManager.flags.OverrideNumHighPriorityContexts.get() != -1) { highPriorityContextCount = static_cast(debugManager.flags.OverrideNumHighPriorityContexts.get()); } for (uint32_t engineIndex = 0; engineIndex < static_cast(engineGroup->engines.size()); engineIndex++) { auto engineType = engineGroup->engines[engineIndex].getEngineType(); if ((static_cast(debugManager.flags.SecondaryContextEngineTypeMask.get()) & (1 << static_cast(engineType))) == 0) { continue; } UNRECOVERABLE_IF(secondaryEngines.find(engineType) != secondaryEngines.end()); auto &secondaryEnginesForType = secondaryEngines[engineType]; auto primaryEngine = engineGroup->engines[engineIndex]; secondaryEnginesForType.regularEnginesTotal = contextCount - highPriorityContextCount; secondaryEnginesForType.highPriorityEnginesTotal = highPriorityContextCount; secondaryEnginesForType.regularCounter = 0; secondaryEnginesForType.highPriorityCounter = 0; secondaryEnginesForType.assignedContextsCounter = 1; NEO::EngineTypeUsage engineTypeUsage; engineTypeUsage.first = primaryEngine.getEngineType(); secondaryEnginesForType.engines.push_back(primaryEngine); for (uint32_t i = 1; i < contextCount; i++) { engineTypeUsage.second = EngineUsage::regular; if (i >= contextCount - highPriorityContextCount) { engineTypeUsage.second = EngineUsage::highPriority; } createSecondaryEngine(primaryEngine.commandStreamReceiver, engineTypeUsage); } primaryEngine.osContext->setContextGroup(true); } } } return true; } void Device::addEngineToEngineGroup(EngineControl &engine) { auto &hardwareInfo = this->getHardwareInfo(); auto &gfxCoreHelper = getGfxCoreHelper(); auto &productHelper = getProductHelper(); auto &rootDeviceEnvironment = this->getRootDeviceEnvironment(); EngineGroupType engineGroupType = gfxCoreHelper.getEngineGroupType(engine.getEngineType(), engine.getEngineUsage(), hardwareInfo); productHelper.adjustEngineGroupType(engineGroupType); if (!gfxCoreHelper.isSubDeviceEngineSupported(rootDeviceEnvironment, getDeviceBitfield(), engine.getEngineType())) { return; } if (EngineHelper::isCopyOnlyEngineType(engineGroupType) && debugManager.flags.EnableBlitterOperationsSupport.get() == 0) { return; } if (this->regularEngineGroups.empty() || this->regularEngineGroups.back().engineGroupType != engineGroupType) { this->regularEngineGroups.push_back(EngineGroupT{}); this->regularEngineGroups.back().engineGroupType = engineGroupType; } auto &engines = this->regularEngineGroups.back().engines; if (engines.size() > 0 && engines.back().getEngineType() == engine.getEngineType()) { return; // Type already added. Exposing multiple contexts for the same engine is disabled. } engines.push_back(engine); } std::unique_ptr Device::createCommandStreamReceiver() const { return std::unique_ptr(createCommandStream(*executionEnvironment, getRootDeviceIndex(), getDeviceBitfield())); } bool Device::createEngine(EngineTypeUsage engineTypeUsage) { const auto &hwInfo = getHardwareInfo(); auto &gfxCoreHelper = getGfxCoreHelper(); const auto engineType = engineTypeUsage.first; const auto engineUsage = engineTypeUsage.second; const auto defaultEngineType = engineInstanced ? this->engineInstancedType : getChosenEngineType(hwInfo); const bool isDefaultEngine = defaultEngineType == engineType && engineUsage == EngineUsage::regular; const bool createAsEngineInstanced = engineInstanced && EngineHelpers::isCcs(engineType); bool primaryEngineTypeAllowed = (EngineHelpers::isCcs(engineType) || EngineHelpers::isBcs(engineType)); if (debugManager.flags.SecondaryContextEngineTypeMask.get() != -1) { primaryEngineTypeAllowed &= (static_cast(debugManager.flags.SecondaryContextEngineTypeMask.get()) & (1 << static_cast(engineType))) != 0; } const bool isPrimaryEngine = primaryEngineTypeAllowed && (engineUsage == EngineUsage::regular); const bool useContextGroup = isPrimaryEngine && gfxCoreHelper.areSecondaryContextsSupported(); UNRECOVERABLE_IF(EngineHelpers::isBcs(engineType) && !hwInfo.capabilityTable.blitterOperationsSupported); std::unique_ptr commandStreamReceiver = createCommandStreamReceiver(); if (!commandStreamReceiver) { return false; } bool internalUsage = (engineUsage == EngineUsage::internal); if (internalUsage) { commandStreamReceiver->initializeDefaultsForInternalEngine(); } if (commandStreamReceiver->needsPageTableManager()) { commandStreamReceiver->createPageTableManager(); } EngineDescriptor engineDescriptor(engineTypeUsage, getDeviceBitfield(), preemptionMode, false, createAsEngineInstanced); auto osContext = executionEnvironment->memoryManager->createAndRegisterOsContext(commandStreamReceiver.get(), engineDescriptor); osContext->setContextGroup(useContextGroup); osContext->setIsPrimaryEngine(isPrimaryEngine); osContext->setIsDefaultEngine(isDefaultEngine); commandStreamReceiver->setupContext(*osContext); if (osContext->isImmediateContextInitializationEnabled(isDefaultEngine)) { if (!commandStreamReceiver->initializeResources(false)) { return false; } } if (!commandStreamReceiver->initializeTagAllocation()) { return false; } if (!commandStreamReceiver->createGlobalFenceAllocation()) { return false; } if (preemptionMode == PreemptionMode::MidThread && !commandStreamReceiver->createPreemptionAllocation()) { return false; } EngineControl engine{commandStreamReceiver.get(), osContext}; allEngines.push_back(engine); if (engineUsage == EngineUsage::regular) { addEngineToEngineGroup(engine); } commandStreamReceivers.push_back(std::move(commandStreamReceiver)); return true; } bool Device::initializeEngines() { uint32_t deviceCsrIndex = 0; bool defaultEngineAlreadySet = false; for (auto &engine : allEngines) { bool firstSubmissionDone = false; if (engine.osContext->getIsDefaultEngine() && !defaultEngineAlreadySet) { defaultEngineAlreadySet = true; defaultEngineIndex = deviceCsrIndex; if (engine.osContext->isDebuggableContext() || this->isInitDeviceWithFirstSubmissionSupported(engine.commandStreamReceiver->getType())) { if (SubmissionStatus::success != engine.commandStreamReceiver->initializeDeviceWithFirstSubmission(*this)) { return false; } firstSubmissionDone = true; } } auto &compilerProductHelper = this->getCompilerProductHelper(); auto heaplessEnabled = compilerProductHelper.isHeaplessModeEnabled(); bool isHeaplessStateInit = engine.osContext->getIsPrimaryEngine() && compilerProductHelper.isHeaplessStateInitEnabled(heaplessEnabled); bool initializeDevice = (engine.osContext->isPartOfContextGroup() || isHeaplessStateInit) && !firstSubmissionDone; if (initializeDevice) { engine.commandStreamReceiver->initializeResources(false); engine.commandStreamReceiver->initializeDeviceWithFirstSubmission(*this); } deviceCsrIndex++; } return true; } bool Device::createSecondaryEngine(CommandStreamReceiver *primaryCsr, EngineTypeUsage engineTypeUsage) { auto engineUsage = engineTypeUsage.second; std::unique_ptr commandStreamReceiver = createCommandStreamReceiver(); if (!commandStreamReceiver) { return false; } bool internalUsage = (engineUsage == EngineUsage::internal); if (internalUsage) { commandStreamReceiver->initializeDefaultsForInternalEngine(); } EngineDescriptor engineDescriptor(engineTypeUsage, getDeviceBitfield(), preemptionMode, false, false); auto osContext = executionEnvironment->memoryManager->createAndRegisterSecondaryOsContext(&primaryCsr->getOsContext(), commandStreamReceiver.get(), engineDescriptor); osContext->incRefInternal(); commandStreamReceiver->setupContext(*osContext); commandStreamReceiver->setPrimaryCsr(primaryCsr); EngineControl engine{commandStreamReceiver.get(), osContext}; secondaryEngines[engineTypeUsage.first].engines.push_back(engine); secondaryCsrs.push_back(std::move(commandStreamReceiver)); return true; } EngineControl *Device::getSecondaryEngineCsr(EngineTypeUsage engineTypeUsage, bool allocateInterrupt) { if (secondaryEngines.find(engineTypeUsage.first) == secondaryEngines.end()) { return nullptr; } auto &secondaryEnginesForType = secondaryEngines[engineTypeUsage.first]; auto engineControl = secondaryEnginesForType.getEngine(engineTypeUsage.second); bool isPrimaryContextInGroup = engineControl->osContext->getIsPrimaryEngine() && engineControl->osContext->isPartOfContextGroup(); if (isPrimaryContextInGroup && allocateInterrupt) { // Context 0 is already pre-initialized. We need non-initialized context, to pass context creation flag. // If all contexts are already initialized, just take next available. Interrupt request is only a hint. engineControl = secondaryEnginesForType.getEngine(engineTypeUsage.second); } isPrimaryContextInGroup = engineControl->osContext->getIsPrimaryEngine() && engineControl->osContext->isPartOfContextGroup(); if (!isPrimaryContextInGroup) { auto commandStreamReceiver = engineControl->commandStreamReceiver; auto lock = commandStreamReceiver->obtainUniqueOwnership(); if (!commandStreamReceiver->isInitialized()) { if (commandStreamReceiver->needsPageTableManager()) { commandStreamReceiver->createPageTableManager(); } EngineDescriptor engineDescriptor(engineTypeUsage, getDeviceBitfield(), preemptionMode, false, false); if (!commandStreamReceiver->initializeResources(allocateInterrupt)) { return nullptr; } if (!commandStreamReceiver->initializeTagAllocation()) { return nullptr; } if (preemptionMode == PreemptionMode::MidThread && !commandStreamReceiver->createPreemptionAllocation()) { return nullptr; } } } return engineControl; } const HardwareInfo &Device::getHardwareInfo() const { return *getRootDeviceEnvironment().getHardwareInfo(); } const DeviceInfo &Device::getDeviceInfo() const { return deviceInfo; } double Device::getProfilingTimerResolution() { return getOSTime()->getDynamicDeviceTimerResolution(getHardwareInfo()); } uint64_t Device::getProfilingTimerClock() { return getOSTime()->getDynamicDeviceTimerClock(getHardwareInfo()); } bool Device::isBcsSplitSupported() { auto &productHelper = getProductHelper(); auto bcsSplit = productHelper.isBlitSplitEnqueueWARequired(getHardwareInfo()) && Device::isBlitSplitEnabled(); if (debugManager.flags.SplitBcsCopy.get() != -1) { bcsSplit = debugManager.flags.SplitBcsCopy.get(); } return bcsSplit; } bool Device::isInitDeviceWithFirstSubmissionSupported(CommandStreamReceiverType csrType) { return !this->executionEnvironment->areMetricsEnabled() && getProductHelper().isInitDeviceWithFirstSubmissionRequired(getHardwareInfo()) && Device::isInitDeviceWithFirstSubmissionEnabled(csrType); } double Device::getPlatformHostTimerResolution() const { if (getOSTime()) { return getOSTime()->getHostTimerResolution(); } return 0.0; } GFXCORE_FAMILY Device::getRenderCoreFamily() const { return this->getHardwareInfo().platform.eRenderCoreFamily; } Debugger *Device::getDebugger() const { return getRootDeviceEnvironment().debugger.get(); } bool Device::areSharedSystemAllocationsAllowed() const { auto sharedSystemAllocationsSupport = static_cast(getHardwareInfo().capabilityTable.sharedSystemMemCapabilities); if (debugManager.flags.EnableSharedSystemUsmSupport.get() != -1) { sharedSystemAllocationsSupport = debugManager.flags.EnableSharedSystemUsmSupport.get(); } return sharedSystemAllocationsSupport; } size_t Device::getEngineGroupIndexFromEngineGroupType(EngineGroupType engineGroupType) const { for (size_t i = 0; i < regularEngineGroups.size(); i++) { if (regularEngineGroups[i].engineGroupType == engineGroupType) { return i; } } UNRECOVERABLE_IF(true); return 0; } EngineControl *Device::tryGetEngine(aub_stream::EngineType engineType, EngineUsage engineUsage) { for (auto &engine : allEngines) { if ((engine.getEngineType() == engineType) && (engine.getEngineUsage() == engineUsage)) { return &engine; } } if (debugManager.flags.OverrideInvalidEngineWithDefault.get()) { return &allEngines[0]; } return nullptr; } EngineControl &Device::getEngine(aub_stream::EngineType engineType, EngineUsage engineUsage) { auto engine = tryGetEngine(engineType, engineUsage); UNRECOVERABLE_IF(!engine); return *engine; } EngineControl &Device::getEngine(uint32_t index) { UNRECOVERABLE_IF(index >= allEngines.size()); return allEngines[index]; } bool Device::getDeviceAndHostTimer(uint64_t *deviceTimestamp, uint64_t *hostTimestamp) const { TimeStampData timeStamp; auto retVal = getOSTime()->getGpuCpuTime(&timeStamp, true); if (retVal) { *hostTimestamp = timeStamp.cpuTimeinNS; if (debugManager.flags.EnableDeviceBasedTimestamps.get()) { auto resolution = getOSTime()->getDynamicDeviceTimerResolution(getHardwareInfo()); *deviceTimestamp = getGfxCoreHelper().getGpuTimeStampInNS(timeStamp.gpuTimeStamp, resolution); } else *deviceTimestamp = *hostTimestamp; } return retVal; } bool Device::getHostTimer(uint64_t *hostTimestamp) const { return getOSTime()->getCpuTime(hostTimestamp); } uint32_t Device::getNumGenericSubDevices() const { return (hasRootCsr() ? getNumSubDevices() : 0); } Device *Device::getSubDevice(uint32_t deviceId) const { UNRECOVERABLE_IF(deviceId >= subdevices.size()); return subdevices[deviceId]; } Device *Device::getNearestGenericSubDevice(uint32_t deviceId) { /* * EngineInstanced: Upper level * Generic SubDevice: 'this' * RootCsr Device: Next level SubDevice (generic) */ if (engineInstanced) { return getRootDevice()->getNearestGenericSubDevice(Math::log2(static_cast(deviceBitfield.to_ulong()))); } if (subdevices.empty() || !hasRootCsr()) { return this; } UNRECOVERABLE_IF(deviceId >= subdevices.size()); return subdevices[deviceId]; } BindlessHeapsHelper *Device::getBindlessHeapsHelper() const { return getRootDeviceEnvironment().getBindlessHeapsHelper(); } GmmClientContext *Device::getGmmClientContext() const { return getGmmHelper()->getClientContext(); } void Device::allocateSyncBufferHandler() { static std::mutex mutex; std::unique_lock lock(mutex); if (syncBufferHandler.get() == nullptr) { syncBufferHandler = std::make_unique(*this); UNRECOVERABLE_IF(syncBufferHandler.get() == nullptr); } } uint64_t Device::getGlobalMemorySize(uint32_t deviceBitfield) const { auto globalMemorySize = getMemoryManager()->isLocalMemorySupported(this->getRootDeviceIndex()) ? getMemoryManager()->getLocalMemorySize(this->getRootDeviceIndex(), deviceBitfield) : getMemoryManager()->getSystemSharedMemory(this->getRootDeviceIndex()); globalMemorySize = std::min(globalMemorySize, getMemoryManager()->getMaxApplicationAddress() + 1); double percentOfGlobalMemoryAvailable = getPercentOfGlobalMemoryAvailable(); globalMemorySize = static_cast(static_cast(globalMemorySize) * percentOfGlobalMemoryAvailable); if (debugManager.flags.ClDeviceGlobalMemSizeAvailablePercent.get() == -1 && !getMemoryManager()->isLocalMemorySupported(this->getRootDeviceIndex())) { const uint64_t internalResourcesSize = 450 * MemoryConstants::megaByte; globalMemorySize = std::max(static_cast(0), globalMemorySize - internalResourcesSize); } return globalMemorySize; } double Device::getPercentOfGlobalMemoryAvailable() const { if (debugManager.flags.ClDeviceGlobalMemSizeAvailablePercent.get() != -1) { return 0.01 * static_cast(debugManager.flags.ClDeviceGlobalMemSizeAvailablePercent.get()); } return getMemoryManager()->getPercentOfGlobalMemoryAvailable(this->getRootDeviceIndex()); } NEO::DebuggerL0 *Device::getL0Debugger() { auto debugger = getDebugger(); return debugger ? static_cast(debugger) : nullptr; } const std::vector &Device::getAllEngines() const { return this->allEngines; } const RootDeviceEnvironment &Device::getRootDeviceEnvironment() const { return *executionEnvironment->rootDeviceEnvironments[getRootDeviceIndex()]; } RootDeviceEnvironment &Device::getRootDeviceEnvironmentRef() const { return *executionEnvironment->rootDeviceEnvironments[getRootDeviceIndex()]; } bool Device::isFullRangeSvm() const { return getRootDeviceEnvironment().isFullRangeSvm(); } EngineControl &Device::getInternalEngine() { if (this->allEngines[0].commandStreamReceiver->getType() != CommandStreamReceiverType::hardware) { return this->getDefaultEngine(); } auto engineType = getChosenEngineType(getHardwareInfo()); return this->getNearestGenericSubDevice(0)->getEngine(engineType, EngineUsage::internal); } EngineControl &Device::getNextEngineForCommandQueue() { this->initializeEngineRoundRobinControls(); const auto &defaultEngine = this->getDefaultEngine(); const auto &hardwareInfo = this->getHardwareInfo(); const auto &gfxCoreHelper = getGfxCoreHelper(); const auto engineGroupType = gfxCoreHelper.getEngineGroupType(defaultEngine.getEngineType(), defaultEngine.getEngineUsage(), hardwareInfo); const auto defaultEngineGroupIndex = this->getEngineGroupIndexFromEngineGroupType(engineGroupType); auto &engineGroup = this->getRegularEngineGroups()[defaultEngineGroupIndex]; auto engineIndex = 0u; do { engineIndex = (this->regularCommandQueuesCreatedWithinDeviceCount++ / this->queuesPerEngineCount) % engineGroup.engines.size(); } while (!this->availableEnginesForCommandQueueusRoundRobin.test(engineIndex)); return engineGroup.engines[engineIndex]; } EngineControl *Device::getInternalCopyEngine() { if (!getHardwareInfo().capabilityTable.blitterOperationsSupported) { return nullptr; } const auto &productHelper = this->getProductHelper(); auto expectedEngine = productHelper.getDefaultCopyEngine(); if (debugManager.flags.ForceBCSForInternalCopyEngine.get() != -1) { expectedEngine = EngineHelpers::mapBcsIndexToEngineType(debugManager.flags.ForceBCSForInternalCopyEngine.get(), true); } for (auto &engine : allEngines) { if (engine.osContext->getEngineType() == expectedEngine && engine.osContext->isInternalEngine()) { return &engine; } } return nullptr; } RTDispatchGlobalsInfo *Device::getRTDispatchGlobals(uint32_t maxBvhLevels) { if (rtDispatchGlobalsInfos.size() == 0) { return nullptr; } size_t last = rtDispatchGlobalsInfos.size() - 1; if (maxBvhLevels > last) { return nullptr; } for (size_t i = last; i >= maxBvhLevels; i--) { if (rtDispatchGlobalsInfos[i] != nullptr) { return rtDispatchGlobalsInfos[i]; } if (i == 0) { break; } } allocateRTDispatchGlobals(maxBvhLevels); return rtDispatchGlobalsInfos[maxBvhLevels]; } void Device::initializeRayTracing(uint32_t maxBvhLevels) { if (rtMemoryBackedBuffer == nullptr) { auto size = RayTracingHelper::getTotalMemoryBackedFifoSize(*this); AllocationProperties allocProps(getRootDeviceIndex(), true, size, AllocationType::buffer, true, getDeviceBitfield()); auto &productHelper = getProductHelper(); allocProps.flags.resource48Bit = productHelper.is48bResourceNeededForRayTracing(); allocProps.flags.isUSMDeviceAllocation = true; rtMemoryBackedBuffer = getMemoryManager()->allocateGraphicsMemoryWithProperties(allocProps); } while (rtDispatchGlobalsInfos.size() <= maxBvhLevels) { rtDispatchGlobalsInfos.push_back(nullptr); } } void Device::finalizeRayTracing() { getMemoryManager()->freeGraphicsMemory(rtMemoryBackedBuffer); rtMemoryBackedBuffer = nullptr; for (size_t i = 0; i < rtDispatchGlobalsInfos.size(); i++) { auto rtDispatchGlobalsInfo = rtDispatchGlobalsInfos[i]; if (rtDispatchGlobalsInfo == nullptr) { continue; } for (size_t j = 0; j < rtDispatchGlobalsInfo->rtStacks.size(); j++) { getMemoryManager()->freeGraphicsMemory(rtDispatchGlobalsInfo->rtStacks[j]); rtDispatchGlobalsInfo->rtStacks[j] = nullptr; } getMemoryManager()->freeGraphicsMemory(rtDispatchGlobalsInfo->rtDispatchGlobalsArray); rtDispatchGlobalsInfo->rtDispatchGlobalsArray = nullptr; delete rtDispatchGlobalsInfos[i]; rtDispatchGlobalsInfos[i] = nullptr; } } void Device::initializeEngineRoundRobinControls() { if (this->availableEnginesForCommandQueueusRoundRobin.any()) { return; } uint32_t queuesPerEngine = 1u; if (debugManager.flags.CmdQRoundRobindEngineAssignNTo1.get() != -1) { queuesPerEngine = debugManager.flags.CmdQRoundRobindEngineAssignNTo1.get(); } this->queuesPerEngineCount = queuesPerEngine; std::bitset<8> availableEngines = std::numeric_limits::max(); if (debugManager.flags.CmdQRoundRobindEngineAssignBitfield.get() != -1) { availableEngines = debugManager.flags.CmdQRoundRobindEngineAssignBitfield.get(); } this->availableEnginesForCommandQueueusRoundRobin = availableEngines; } OSTime *Device::getOSTime() const { return getRootDeviceEnvironment().osTime.get(); }; bool Device::getUuid(std::array &uuid) { if (this->uuid.isValid) { uuid = this->uuid.id; auto hwInfo = getHardwareInfo(); auto subDevicesCount = GfxCoreHelper::getSubDevicesCount(&hwInfo); if (subDevicesCount > 1 && deviceBitfield.count() == 1) { // In case of no sub devices created (bits set in affinity mask == 1), return the UUID of enabled sub-device. uint32_t subDeviceIndex = Math::log2(static_cast(deviceBitfield.to_ulong())); uuid[ProductHelper::uuidSize - 1] = subDeviceIndex + 1; } } return this->uuid.isValid; } bool Device::generateUuidFromPciBusInfo(const PhysicalDevicePciBusInfo &pciBusInfo, std::array &uuid) { if (pciBusInfo.pciDomain != PhysicalDevicePciBusInfo::invalidValue) { generateUuid(uuid); /* Device UUID uniquely identifies a device within a system. * We generate it based on device information along with PCI information * This guarantees uniqueness of UUIDs on a system even when multiple * identical Intel GPUs are present. */ /* We want to have UUID matching between different GPU APIs (including outside * of compute_runtime project - i.e. other than L0 or OCL). This structure definition * has been agreed upon by various Intel driver teams. * * Consult other driver teams before changing this. */ struct DeviceUUID { uint16_t vendorID; uint16_t deviceID; uint16_t revisionID; uint16_t pciDomain; uint8_t pciBus; uint8_t pciDev; uint8_t pciFunc; uint8_t reserved[4]; uint8_t subDeviceID; }; static_assert(sizeof(DeviceUUID) == ProductHelper::uuidSize); DeviceUUID deviceUUID{}; memcpy_s(&deviceUUID, sizeof(DeviceUUID), uuid.data(), uuid.size()); deviceUUID.pciDomain = static_cast(pciBusInfo.pciDomain); deviceUUID.pciBus = static_cast(pciBusInfo.pciBus); deviceUUID.pciDev = static_cast(pciBusInfo.pciDevice); deviceUUID.pciFunc = static_cast(pciBusInfo.pciFunction); memcpy_s(uuid.data(), uuid.size(), &deviceUUID, sizeof(DeviceUUID)); return true; } return false; } void Device::generateUuid(std::array &uuid) { const auto &deviceInfo = getDeviceInfo(); const auto &hardwareInfo = getHardwareInfo(); uint32_t rootDeviceIndex = getRootDeviceIndex(); uint16_t vendorId = static_cast(deviceInfo.vendorId); uint16_t deviceId = static_cast(hardwareInfo.platform.usDeviceID); uint16_t revisionId = static_cast(hardwareInfo.platform.usRevId); uint8_t subDeviceId = isSubDevice() ? static_cast(this)->getSubDeviceIndex() + 1 : 0; uuid.fill(0); memcpy_s(&uuid[0], sizeof(uint32_t), &vendorId, sizeof(vendorId)); memcpy_s(&uuid[2], sizeof(uint32_t), &deviceId, sizeof(deviceId)); memcpy_s(&uuid[4], sizeof(uint32_t), &revisionId, sizeof(revisionId)); memcpy_s(&uuid[6], sizeof(uint32_t), &rootDeviceIndex, sizeof(rootDeviceIndex)); uuid[15] = subDeviceId; } void Device::getAdapterMask(uint32_t &nodeMask) { if (verifyAdapterLuid()) { nodeMask = 1; } } const GfxCoreHelper &Device::getGfxCoreHelper() const { return getRootDeviceEnvironment().getHelper(); } const ProductHelper &Device::getProductHelper() const { return getRootDeviceEnvironment().getHelper(); } const CompilerProductHelper &Device::getCompilerProductHelper() const { return getRootDeviceEnvironment().getHelper(); } ReleaseHelper *Device::getReleaseHelper() const { return getRootDeviceEnvironment().getReleaseHelper(); } void Device::stopDirectSubmissionAndWaitForCompletion() { for (auto &engine : allEngines) { auto csr = engine.commandStreamReceiver; if (csr->isAnyDirectSubmissionEnabled()) { auto lock = csr->obtainUniqueOwnership(); csr->stopDirectSubmission(true); } } } bool Device::isAnyDirectSubmissionEnabled() { bool enabled = false; for (auto &engine : allEngines) { auto csr = engine.commandStreamReceiver; enabled |= csr->isAnyDirectSubmissionEnabled(); } return enabled; } void Device::allocateRTDispatchGlobals(uint32_t maxBvhLevels) { UNRECOVERABLE_IF(rtDispatchGlobalsInfos.size() < maxBvhLevels + 1); UNRECOVERABLE_IF(rtDispatchGlobalsInfos[maxBvhLevels] != nullptr); uint32_t extraBytesLocal = 0; uint32_t extraBytesGlobal = 0; uint32_t dispatchGlobalsStride = MemoryConstants::pageSize64k; UNRECOVERABLE_IF(RayTracingHelper::getDispatchGlobalSize() > dispatchGlobalsStride); bool allocFailed = false; uint32_t tileCount = 1; if (this->getNumSubDevices() > 1) { // If device encompasses multiple tiles, allocate RTDispatchGlobals for each tile tileCount = this->getNumSubDevices(); } auto dispatchGlobalsSize = tileCount * dispatchGlobalsStride; auto rtStackSize = RayTracingHelper::getRTStackSizePerTile(*this, tileCount, maxBvhLevels, extraBytesLocal, extraBytesGlobal); std::unique_ptr dispatchGlobalsInfo = std::make_unique(); auto &productHelper = getProductHelper(); GraphicsAllocation *dispatchGlobalsArrayAllocation = nullptr; AllocationProperties arrayAllocProps(getRootDeviceIndex(), true, dispatchGlobalsSize, AllocationType::globalSurface, true, getDeviceBitfield()); arrayAllocProps.flags.resource48Bit = productHelper.is48bResourceNeededForRayTracing(); arrayAllocProps.flags.isUSMDeviceAllocation = true; dispatchGlobalsArrayAllocation = getMemoryManager()->allocateGraphicsMemoryWithProperties(arrayAllocProps); if (dispatchGlobalsArrayAllocation == nullptr) { return; } for (unsigned int tile = 0; tile < tileCount; tile++) { DeviceBitfield deviceBitfield = (tileCount == 1) ? this->getDeviceBitfield() : subdevices[tile]->getDeviceBitfield(); AllocationProperties allocProps(getRootDeviceIndex(), true, rtStackSize, AllocationType::buffer, true, deviceBitfield); allocProps.flags.resource48Bit = productHelper.is48bResourceNeededForRayTracing(); allocProps.flags.isUSMDeviceAllocation = true; auto rtStackAllocation = getMemoryManager()->allocateGraphicsMemoryWithProperties(allocProps); if (rtStackAllocation == nullptr) { allocFailed = true; break; } struct RTDispatchGlobals dispatchGlobals = {0}; dispatchGlobals.rtMemBasePtr = rtStackAllocation->getGpuAddress() + rtStackSize; dispatchGlobals.callStackHandlerKSP = reinterpret_cast(nullptr); dispatchGlobals.stackSizePerRay = 0; dispatchGlobals.numDSSRTStacks = RayTracingHelper::stackDssMultiplier; dispatchGlobals.maxBVHLevels = maxBvhLevels; uint32_t *dispatchGlobalsAsArray = reinterpret_cast(&dispatchGlobals); dispatchGlobalsAsArray[7] = 1; MemoryTransferHelper::transferMemoryToAllocation(productHelper.isBlitCopyRequiredForLocalMemory(this->getRootDeviceEnvironment(), *dispatchGlobalsArrayAllocation), *this, dispatchGlobalsArrayAllocation, tile * dispatchGlobalsStride, &dispatchGlobals, sizeof(RTDispatchGlobals)); dispatchGlobalsInfo->rtStacks.push_back(rtStackAllocation); } if (allocFailed) { for (auto allocation : dispatchGlobalsInfo->rtStacks) { getMemoryManager()->freeGraphicsMemory(allocation); } getMemoryManager()->freeGraphicsMemory(dispatchGlobalsArrayAllocation); return; } dispatchGlobalsInfo->rtDispatchGlobalsArray = dispatchGlobalsArrayAllocation; rtDispatchGlobalsInfos[maxBvhLevels] = dispatchGlobalsInfo.release(); } MemoryManager *Device::getMemoryManager() const { return executionEnvironment->memoryManager.get(); } GmmHelper *Device::getGmmHelper() const { return getRootDeviceEnvironment().getGmmHelper(); } CompilerInterface *Device::getCompilerInterface() const { return executionEnvironment->rootDeviceEnvironments[getRootDeviceIndex()]->getCompilerInterface(); } BuiltIns *Device::getBuiltIns() const { return executionEnvironment->rootDeviceEnvironments[getRootDeviceIndex()]->getBuiltIns(); } const EngineGroupT *Device::tryGetRegularEngineGroup(EngineGroupType engineGroupType) const { for (auto &engineGroup : regularEngineGroups) { if (engineGroup.engineGroupType == engineGroupType) { return &engineGroup; } } return nullptr; } EngineControl *SecondaryContexts::getEngine(EngineUsage usage) { auto secondaryEngineIndex = 0; std::lock_guard guard(mutex); if (usage == EngineUsage::highPriority) { // Use index from reserved HP pool if (hpIndices.size() < highPriorityEnginesTotal) { secondaryEngineIndex = (highPriorityCounter.fetch_add(1)) % (highPriorityEnginesTotal); secondaryEngineIndex += regularEnginesTotal; hpIndices.push_back(secondaryEngineIndex); } // Check if there is free index else if (assignedContextsCounter < regularEnginesTotal) { secondaryEngineIndex = assignedContextsCounter.fetch_add(1); highPriorityCounter.fetch_add(1); hpIndices.push_back(secondaryEngineIndex); } // Assign from existing indices else { auto index = (highPriorityCounter.fetch_add(1)) % (hpIndices.size()); secondaryEngineIndex = hpIndices[index]; } if (engines[secondaryEngineIndex].osContext->getEngineUsage() != EngineUsage::highPriority) { engines[secondaryEngineIndex].osContext->overrideEngineUsage(EngineUsage::highPriority); } } else if (usage == EngineUsage::regular) { if (npIndices.size() == 0) { regularCounter.fetch_add(1); npIndices.push_back(secondaryEngineIndex); } // Check if there is free index else if (assignedContextsCounter < regularEnginesTotal) { secondaryEngineIndex = assignedContextsCounter.fetch_add(1); regularCounter.fetch_add(1); npIndices.push_back(secondaryEngineIndex); } // Assign from existing indices else { auto index = (regularCounter.fetch_add(1)) % (npIndices.size()); secondaryEngineIndex = npIndices[index]; } } else { DEBUG_BREAK_IF(true); } return &engines[secondaryEngineIndex]; } } // namespace NEO