/* * Copyright (C) 2020-2023 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/command_container/implicit_scaling.h" #include "shared/source/command_stream/preemption_mode.h" #include "shared/source/compiler_interface/compiler_cache.h" #include "shared/source/compiler_interface/compiler_interface.h" #include "shared/source/debugger/debugger.h" #include "shared/source/device/device.h" #include "shared/source/execution_environment/execution_environment.h" #include "shared/source/execution_environment/root_device_environment.h" #include "shared/source/helpers/aligned_memory.h" #include "shared/source/helpers/api_specific_config.h" #include "shared/source/helpers/basic_math.h" #include "shared/source/helpers/hw_helper.h" #include "shared/source/memory_manager/memory_manager.h" #include "shared/source/os_interface/hw_info_config.h" #include "shared/source/os_interface/os_interface.h" #include "shared/source/source_level_debugger/source_level_debugger.h" #include namespace NEO { static const char *spirvWithVersion = "SPIR-V_1.2 "; size_t Device::getMaxParameterSizeFromIGC() const { CompilerInterface *compilerInterface = getCompilerInterface(); if (nullptr != compilerInterface) { auto igcFtrWa = compilerInterface->getIgcFeaturesAndWorkarounds(*this); return igcFtrWa->GetMaxOCLParamSize(); } return 0; } void Device::initializeCaps() { auto &hwInfo = getHardwareInfo(); auto addressing32bitAllowed = is64bit; auto &productHelper = this->getRootDeviceEnvironment().getHelper(); auto &gfxCoreHelper = this->getRootDeviceEnvironment().getHelper(); bool ocl21FeaturesEnabled = hwInfo.capabilityTable.supportsOcl21Features; if (DebugManager.flags.ForceOCLVersion.get() != 0) { ocl21FeaturesEnabled = (DebugManager.flags.ForceOCLVersion.get() == 21); } if (DebugManager.flags.ForceOCL21FeaturesSupport.get() != -1) { ocl21FeaturesEnabled = DebugManager.flags.ForceOCL21FeaturesSupport.get(); } if (ocl21FeaturesEnabled) { addressing32bitAllowed = false; } deviceInfo.vendorId = 0x8086; deviceInfo.maxReadImageArgs = 128; deviceInfo.maxWriteImageArgs = 128; deviceInfo.maxParameterSize = 2048; deviceInfo.addressBits = 64; deviceInfo.ilVersion = spirvWithVersion; // copy system info to prevent misaligned reads const auto systemInfo = hwInfo.gtSystemInfo; deviceInfo.globalMemCachelineSize = 64; uint32_t allSubDevicesMask = static_cast(getDeviceBitfield().to_ulong()); constexpr uint32_t singleSubDeviceMask = 1; deviceInfo.globalMemSize = getGlobalMemorySize(allSubDevicesMask); deviceInfo.maxMemAllocSize = getGlobalMemorySize(singleSubDeviceMask); // Allocation can be placed only on one SubDevice if (DebugManager.flags.Force32bitAddressing.get() || addressing32bitAllowed || is32bit) { double percentOfGlobalMemoryAvailable = getPercentOfGlobalMemoryAvailable(); deviceInfo.globalMemSize = std::min(deviceInfo.globalMemSize, static_cast(4 * GB * percentOfGlobalMemoryAvailable)); deviceInfo.addressBits = 32; deviceInfo.force32BitAddressess = is64bit; } deviceInfo.globalMemSize = alignDown(deviceInfo.globalMemSize, MemoryConstants::pageSize); deviceInfo.maxMemAllocSize = std::min(deviceInfo.globalMemSize, deviceInfo.maxMemAllocSize); // if globalMemSize was reduced for 32b uint32_t subDeviceCount = gfxCoreHelper.getSubDevicesCount(&getHardwareInfo()); bool platformImplicitScaling = gfxCoreHelper.platformSupportsImplicitScaling(hwInfo); if (((NEO::ImplicitScalingHelper::isImplicitScalingEnabled( getDeviceBitfield(), platformImplicitScaling))) && (!isSubDevice()) && (subDeviceCount > 1)) { deviceInfo.maxMemAllocSize = deviceInfo.globalMemSize; } if (!areSharedSystemAllocationsAllowed()) { deviceInfo.maxMemAllocSize = ApiSpecificConfig::getReducedMaxAllocSize(deviceInfo.maxMemAllocSize); deviceInfo.maxMemAllocSize = std::min(deviceInfo.maxMemAllocSize, gfxCoreHelper.getMaxMemAllocSize()); } // Some specific driver model configurations may impose additional limitations auto driverModelMaxMemAlloc = std::numeric_limits::max(); if (this->executionEnvironment->rootDeviceEnvironments[0]->osInterface) { driverModelMaxMemAlloc = this->executionEnvironment->rootDeviceEnvironments[0]->osInterface->getDriverModel()->getMaxMemAllocSize(); } deviceInfo.maxMemAllocSize = std::min(driverModelMaxMemAlloc, deviceInfo.maxMemAllocSize); deviceInfo.profilingTimerResolution = getProfilingTimerResolution(); if (DebugManager.flags.OverrideProfilingTimerResolution.get() != -1) { deviceInfo.profilingTimerResolution = static_cast(DebugManager.flags.OverrideProfilingTimerResolution.get()); deviceInfo.outProfilingTimerClock = static_cast(1000000000.0 / deviceInfo.profilingTimerResolution); } else { deviceInfo.outProfilingTimerClock = static_cast(getProfilingTimerClock()); } deviceInfo.outProfilingTimerResolution = static_cast(deviceInfo.profilingTimerResolution); constexpr uint64_t maxPixelSize = 16; deviceInfo.imageMaxBufferSize = static_cast(deviceInfo.maxMemAllocSize / maxPixelSize); deviceInfo.maxNumEUsPerSubSlice = 0; deviceInfo.numThreadsPerEU = 0; auto simdSizeUsed = DebugManager.flags.UseMaxSimdSizeToDeduceMaxWorkgroupSize.get() ? CommonConstants::maximalSimdSize : gfxCoreHelper.getMinimalSIMDSize(); deviceInfo.maxNumEUsPerSubSlice = (systemInfo.EuCountPerPoolMin == 0 || hwInfo.featureTable.flags.ftrPooledEuEnabled == 0) ? (systemInfo.EUCount / systemInfo.SubSliceCount) : systemInfo.EuCountPerPoolMin; if (systemInfo.DualSubSliceCount != 0) { deviceInfo.maxNumEUsPerDualSubSlice = (systemInfo.EuCountPerPoolMin == 0 || hwInfo.featureTable.flags.ftrPooledEuEnabled == 0) ? (systemInfo.EUCount / systemInfo.DualSubSliceCount) : systemInfo.EuCountPerPoolMin; } else { deviceInfo.maxNumEUsPerDualSubSlice = deviceInfo.maxNumEUsPerSubSlice; } deviceInfo.numThreadsPerEU = systemInfo.ThreadCount / systemInfo.EUCount; deviceInfo.threadsPerEUConfigs = gfxCoreHelper.getThreadsPerEUConfigs(); auto maxWS = productHelper.getMaxThreadsForWorkgroupInDSSOrSS(hwInfo, static_cast(deviceInfo.maxNumEUsPerSubSlice), static_cast(deviceInfo.maxNumEUsPerDualSubSlice)) * simdSizeUsed; maxWS = Math::prevPowerOfTwo(maxWS); deviceInfo.maxWorkGroupSize = std::min(maxWS, 1024u); if (DebugManager.flags.OverrideMaxWorkgroupSize.get() != -1) { deviceInfo.maxWorkGroupSize = DebugManager.flags.OverrideMaxWorkgroupSize.get(); } deviceInfo.maxWorkItemSizes[0] = deviceInfo.maxWorkGroupSize; deviceInfo.maxWorkItemSizes[1] = deviceInfo.maxWorkGroupSize; deviceInfo.maxWorkItemSizes[2] = deviceInfo.maxWorkGroupSize; deviceInfo.maxSamplers = gfxCoreHelper.getMaxNumSamplers(); deviceInfo.computeUnitsUsedForScratch = gfxCoreHelper.getComputeUnitsUsedForScratch(this->getRootDeviceEnvironment()); deviceInfo.maxFrontEndThreads = gfxCoreHelper.getMaxThreadsForVfe(hwInfo); deviceInfo.localMemSize = hwInfo.capabilityTable.slmSize * KB; if (DebugManager.flags.OverrideSlmSize.get() != -1) { deviceInfo.localMemSize = DebugManager.flags.OverrideSlmSize.get() * KB; } deviceInfo.imageSupport = hwInfo.capabilityTable.supportsImages; deviceInfo.image2DMaxWidth = 16384; deviceInfo.image2DMaxHeight = 16384; deviceInfo.image3DMaxDepth = 2048; deviceInfo.imageMaxArraySize = 2048; deviceInfo.printfBufferSize = 4 * MB; deviceInfo.maxClockFrequency = hwInfo.capabilityTable.maxRenderFrequency; deviceInfo.maxSubGroups = gfxCoreHelper.getDeviceSubGroupSizes(); deviceInfo.vmeAvcSupportsPreemption = hwInfo.capabilityTable.ftrSupportsVmeAvcPreemption; NEO::Debugger *debugger = getRootDeviceEnvironment().debugger.get(); deviceInfo.debuggerActive = false; if (debugger) { UNRECOVERABLE_IF(!debugger->isLegacy()); deviceInfo.debuggerActive = static_cast(debugger)->isDebuggerActive(); } if (deviceInfo.debuggerActive) { this->preemptionMode = PreemptionMode::Disabled; } deviceInfo.name = this->getDeviceName(); size_t maxParameterSizeFromIgc = getMaxParameterSizeFromIGC(); if (maxParameterSizeFromIgc > 0) { deviceInfo.maxParameterSize = maxParameterSizeFromIgc; } } } // namespace NEO