/* * Copyright (C) 2018-2023 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "program.h" #include "shared/source/command_stream/command_stream_receiver.h" #include "shared/source/compiler_interface/compiler_options.h" #include "shared/source/compiler_interface/external_functions.h" #include "shared/source/compiler_interface/intermediate_representations.h" #include "shared/source/debugger/debugger_l0.h" #include "shared/source/device/device.h" #include "shared/source/device_binary_format/elf/elf_encoder.h" #include "shared/source/device_binary_format/elf/ocl_elf.h" #include "shared/source/execution_environment/execution_environment.h" #include "shared/source/execution_environment/root_device_environment.h" #include "shared/source/helpers/api_specific_config.h" #include "shared/source/helpers/compiler_options_parser.h" #include "shared/source/helpers/compiler_product_helper.h" #include "shared/source/helpers/gfx_core_helper.h" #include "shared/source/helpers/hw_info.h" #include "shared/source/memory_manager/memory_manager.h" #include "shared/source/memory_manager/unified_memory_manager.h" #include "shared/source/os_interface/os_context.h" #include "shared/source/program/kernel_info.h" #include "opencl/source/cl_device/cl_device.h" #include "opencl/source/context/context.h" namespace NEO { Program::Program(Context *context, bool isBuiltIn, const ClDeviceVector &clDevicesIn) : executionEnvironment(*clDevicesIn[0]->getExecutionEnvironment()), context(context), clDevices(clDevicesIn), isBuiltIn(isBuiltIn) { if (this->context && !this->isBuiltIn) { this->context->incRefInternal(); } maxRootDeviceIndex = 0; for (const auto &device : clDevicesIn) { if (device->getRootDeviceIndex() > maxRootDeviceIndex) { maxRootDeviceIndex = device->getRootDeviceIndex(); } deviceBuildInfos[device] = {}; for (auto i = 0u; i < device->getNumSubDevices(); i++) { auto subDevice = device->getSubDevice(i); if (isDeviceAssociated(*subDevice)) { deviceBuildInfos[device].associatedSubDevices.push_back(subDevice); } } } buildInfos.resize(maxRootDeviceIndex + 1); debuggerInfos.resize(maxRootDeviceIndex + 1); } std::string Program::getInternalOptions() const { auto pClDevice = clDevices[0]; auto force32BitAddressess = pClDevice->getSharedDeviceInfo().force32BitAddressess; auto internalOptions = getOclVersionCompilerInternalOption(pClDevice->getEnabledClVersion()); if (force32BitAddressess && !isBuiltIn) { CompilerOptions::concatenateAppend(internalOptions, CompilerOptions::arch32bit); } auto &hwInfo = pClDevice->getHardwareInfo(); const auto &compilerProductHelper = pClDevice->getRootDeviceEnvironment().getHelper(); auto forceToStatelessRequired = compilerProductHelper.isForceToStatelessRequired(); auto disableStatelessToStatefulOptimization = DebugManager.flags.DisableStatelessToStatefulOptimization.get(); if ((isBuiltIn && is32bit) || forceToStatelessRequired || disableStatelessToStatefulOptimization) { CompilerOptions::concatenateAppend(internalOptions, CompilerOptions::greaterThan4gbBuffersRequired); } if (ApiSpecificConfig::getBindlessMode()) { CompilerOptions::concatenateAppend(internalOptions, CompilerOptions::bindlessMode); } auto enableStatelessToStatefulWithOffset = pClDevice->getGfxCoreHelper().isStatelessToStatefulWithOffsetSupported(); if (DebugManager.flags.EnableStatelessToStatefulBufferOffsetOpt.get() != -1) { enableStatelessToStatefulWithOffset = DebugManager.flags.EnableStatelessToStatefulBufferOffsetOpt.get() != 0; } if (enableStatelessToStatefulWithOffset) { CompilerOptions::concatenateAppend(internalOptions, CompilerOptions::hasBufferOffsetArg); } const auto &productHelper = pClDevice->getProductHelper(); if (productHelper.isForceEmuInt32DivRemSPWARequired(hwInfo)) { CompilerOptions::concatenateAppend(internalOptions, CompilerOptions::forceEmuInt32DivRemSP); } if (hwInfo.capabilityTable.supportsImages) { CompilerOptions::concatenateAppend(internalOptions, CompilerOptions::enableImageSupport); } if (pClDevice->getDevice().getExecutionEnvironment()->isFP64EmulationEnabled()) { CompilerOptions::concatenateAppend(internalOptions, CompilerOptions::enableFP64GenEmu); } CompilerOptions::concatenateAppend(internalOptions, CompilerOptions::preserveVec3Type); auto isDebuggerActive = pClDevice->getDevice().getDebugger() != nullptr; CompilerOptions::concatenateAppend(internalOptions, compilerProductHelper.getCachingPolicyOptions(isDebuggerActive)); return internalOptions; } Program::~Program() { for (auto i = 0u; i < buildInfos.size(); i++) { cleanCurrentKernelInfo(i); } for (const auto &buildInfo : buildInfos) { if (buildInfo.constantSurface) { if ((nullptr != context) && (nullptr != context->getSVMAllocsManager()) && (context->getSVMAllocsManager()->getSVMAlloc(reinterpret_cast(buildInfo.constantSurface->getGpuAddress())))) { context->getSVMAllocsManager()->freeSVMAlloc(reinterpret_cast(buildInfo.constantSurface->getGpuAddress())); } else { this->executionEnvironment.memoryManager->checkGpuUsageAndDestroyGraphicsAllocations(buildInfo.constantSurface); } } if (buildInfo.globalSurface) { if ((nullptr != context) && (nullptr != context->getSVMAllocsManager()) && (context->getSVMAllocsManager()->getSVMAlloc(reinterpret_cast(buildInfo.globalSurface->getGpuAddress())))) { context->getSVMAllocsManager()->freeSVMAlloc(reinterpret_cast(buildInfo.globalSurface->getGpuAddress())); } else { this->executionEnvironment.memoryManager->checkGpuUsageAndDestroyGraphicsAllocations(buildInfo.globalSurface); } } } notifyModuleDestroy(); if (context && !isBuiltIn) { context->decRefInternal(); } } cl_int Program::createProgramFromBinary( const void *pBinary, size_t binarySize, ClDevice &clDevice) { auto rootDeviceIndex = clDevice.getRootDeviceIndex(); cl_int retVal = CL_INVALID_BINARY; this->irBinary.reset(); this->irBinarySize = 0U; this->isSpirV = false; this->buildInfos[rootDeviceIndex].unpackedDeviceBinary.reset(); this->buildInfos[rootDeviceIndex].unpackedDeviceBinarySize = 0U; this->buildInfos[rootDeviceIndex].packedDeviceBinary.reset(); this->buildInfos[rootDeviceIndex].packedDeviceBinarySize = 0U; this->createdFrom = CreatedFrom::BINARY; ArrayRef archive(reinterpret_cast(pBinary), binarySize); bool isSpirV = NEO::isSpirVBitcode(archive); if (isSpirV || NEO::isLlvmBitcode(archive)) { deviceBuildInfos[&clDevice].programBinaryType = CL_PROGRAM_BINARY_TYPE_INTERMEDIATE; retVal = processSpirBinary(archive.begin(), archive.size(), isSpirV); } else if (isAnyDeviceBinaryFormat(archive)) { deviceBuildInfos[&clDevice].programBinaryType = CL_PROGRAM_BINARY_TYPE_EXECUTABLE; this->isCreatedFromBinary = true; auto &rootDeviceEnvironment = *executionEnvironment.rootDeviceEnvironments[rootDeviceIndex]; auto hwInfo = rootDeviceEnvironment.getHardwareInfo(); auto productAbbreviation = hardwarePrefix[hwInfo->platform.eProductFamily]; TargetDevice targetDevice = getTargetDevice(rootDeviceEnvironment); std::string decodeErrors; std::string decodeWarnings; auto singleDeviceBinary = unpackSingleDeviceBinary(archive, ConstStringRef(productAbbreviation, strlen(productAbbreviation)), targetDevice, decodeErrors, decodeWarnings); if (decodeWarnings.empty() == false) { PRINT_DEBUG_STRING(DebugManager.flags.PrintDebugMessages.get(), stderr, "%s\n", decodeWarnings.c_str()); } bool singleDeviceBinaryEmpty = singleDeviceBinary.intermediateRepresentation.empty() && singleDeviceBinary.deviceBinary.empty(); if (singleDeviceBinaryEmpty || (singleDeviceBinary.deviceBinary.empty() && DebugManager.flags.DisableKernelRecompilation.get())) { retVal = CL_INVALID_BINARY; PRINT_DEBUG_STRING(DebugManager.flags.PrintDebugMessages.get(), stderr, "%s\n", decodeErrors.c_str()); } else { retVal = CL_SUCCESS; this->irBinary = makeCopy(reinterpret_cast(singleDeviceBinary.intermediateRepresentation.begin()), singleDeviceBinary.intermediateRepresentation.size()); this->irBinarySize = singleDeviceBinary.intermediateRepresentation.size(); this->isSpirV = NEO::isSpirVBitcode(ArrayRef(reinterpret_cast(this->irBinary.get()), this->irBinarySize)); this->options = singleDeviceBinary.buildOptions.str(); if (singleDeviceBinary.format == NEO::DeviceBinaryFormat::Zebin) { this->options += " " + NEO::CompilerOptions::enableZebin.str(); } this->buildInfos[rootDeviceIndex].debugData = makeCopy(reinterpret_cast(singleDeviceBinary.debugData.begin()), singleDeviceBinary.debugData.size()); this->buildInfos[rootDeviceIndex].debugDataSize = singleDeviceBinary.debugData.size(); this->isGeneratedByIgc = singleDeviceBinary.generator == GeneratorType::Igc; auto isVmeUsed = containsVmeUsage(this->buildInfos[rootDeviceIndex].kernelInfoArray); bool rebuild = isRebuiltToPatchtokensRequired(&clDevice.getDevice(), archive, this->options, this->isBuiltIn, isVmeUsed); rebuild |= DebugManager.flags.RebuildPrecompiledKernels.get(); if (rebuild && 0u == this->irBinarySize) { return CL_INVALID_BINARY; } if ((false == singleDeviceBinary.deviceBinary.empty()) && (false == rebuild)) { this->buildInfos[rootDeviceIndex].unpackedDeviceBinary = makeCopy(reinterpret_cast(singleDeviceBinary.deviceBinary.begin()), singleDeviceBinary.deviceBinary.size()); this->buildInfos[rootDeviceIndex].unpackedDeviceBinarySize = singleDeviceBinary.deviceBinary.size(); this->buildInfos[rootDeviceIndex].packedDeviceBinary = makeCopy(reinterpret_cast(archive.begin()), archive.size()); this->buildInfos[rootDeviceIndex].packedDeviceBinarySize = archive.size(); } else { this->isCreatedFromBinary = false; this->requiresRebuild = true; } switch (singleDeviceBinary.format) { default: break; case DeviceBinaryFormat::OclLibrary: deviceBuildInfos[&clDevice].programBinaryType = CL_PROGRAM_BINARY_TYPE_LIBRARY; break; case DeviceBinaryFormat::OclCompiledObject: deviceBuildInfos[&clDevice].programBinaryType = CL_PROGRAM_BINARY_TYPE_COMPILED_OBJECT; break; } } } return retVal; } cl_int Program::setProgramSpecializationConstant(cl_uint specId, size_t specSize, const void *specValue) { if (!isSpirV) { return CL_INVALID_PROGRAM; } static std::mutex mutex; std::lock_guard lock(mutex); auto &device = clDevices[0]->getDevice(); if (!areSpecializationConstantsInitialized) { auto pCompilerInterface = device.getCompilerInterface(); if (nullptr == pCompilerInterface) { return CL_OUT_OF_HOST_MEMORY; } SpecConstantInfo specConstInfo; auto retVal = pCompilerInterface->getSpecConstantsInfo(device, ArrayRef(irBinary.get(), irBinarySize), specConstInfo); if (retVal != TranslationOutput::ErrorCode::Success) { return CL_INVALID_VALUE; } this->specConstantsIds.reset(specConstInfo.idsBuffer.release()); this->specConstantsSizes.reset(specConstInfo.sizesBuffer.release()); areSpecializationConstantsInitialized = true; } return updateSpecializationConstant(specId, specSize, specValue); } cl_int Program::updateSpecializationConstant(cl_uint specId, size_t specSize, const void *specValue) { for (uint32_t i = 0; i < specConstantsIds->GetSize(); i++) { if (specConstantsIds->GetMemory()[i] == specId) { if (specConstantsSizes->GetMemory()[i] == static_cast(specSize)) { uint64_t specConstValue = 0u; memcpy_s(&specConstValue, sizeof(uint64_t), specValue, specSize); specConstantsValues[specId] = specConstValue; return CL_SUCCESS; } else { return CL_INVALID_VALUE; } } } return CL_INVALID_SPEC_ID; } cl_int Program::getSource(std::string &binary) const { cl_int retVal = CL_INVALID_PROGRAM; binary = {}; if (!sourceCode.empty()) { binary = sourceCode; retVal = CL_SUCCESS; } return retVal; } void Program::updateBuildLog(uint32_t rootDeviceIndex, const char *pErrorString, size_t errorStringSize) { auto isTerminator = [](char c) { return c == '\0'; }; auto errorString = ConstStringRef(pErrorString, errorStringSize).trimEnd(isTerminator); if (errorString.empty()) { return; } auto &buildLog = buildInfos[rootDeviceIndex].buildLog; if (false == buildLog.empty()) { buildLog.append("\n"); } buildLog.append(errorString.begin(), errorString.end()); } const char *Program::getBuildLog(uint32_t rootDeviceIndex) const { auto ¤tLog = buildInfos[rootDeviceIndex].buildLog; return currentLog.c_str(); } void Program::cleanCurrentKernelInfo(uint32_t rootDeviceIndex) { auto &buildInfo = buildInfos[rootDeviceIndex]; for (auto &kernelInfo : buildInfo.kernelInfoArray) { if (kernelInfo->kernelAllocation) { // register cache flush in all csrs where kernel allocation was used for (auto &engine : this->executionEnvironment.memoryManager->getRegisteredEngines(rootDeviceIndex)) { auto contextId = engine.osContext->getContextId(); if (kernelInfo->kernelAllocation->isUsedByOsContext(contextId)) { engine.commandStreamReceiver->registerInstructionCacheFlush(); } } if (executionEnvironment.memoryManager->isKernelBinaryReuseEnabled()) { auto lock = executionEnvironment.memoryManager->lockKernelAllocationMap(); auto kernelName = kernelInfo->kernelDescriptor.kernelMetadata.kernelName; auto &storedBinaries = executionEnvironment.memoryManager->getKernelAllocationMap(); auto kernelAllocations = storedBinaries.find(kernelName); if (kernelAllocations != storedBinaries.end()) { kernelAllocations->second.reuseCounter--; if (kernelAllocations->second.reuseCounter == 0) { this->executionEnvironment.memoryManager->checkGpuUsageAndDestroyGraphicsAllocations(kernelAllocations->second.kernelAllocation); storedBinaries.erase(kernelAllocations); } } } else { this->executionEnvironment.memoryManager->checkGpuUsageAndDestroyGraphicsAllocations(kernelInfo->kernelAllocation); } } delete kernelInfo; } buildInfo.kernelInfoArray.clear(); } void Program::updateNonUniformFlag() { // Look for -cl-std=CL substring and extract value behind which can be 1.2 2.0 2.1 and convert to value auto pos = options.find(clStdOptionName); if (pos == std::string::npos) { programOptionVersion = 12u; // Default is 1.2 } else { std::stringstream ss{options.c_str() + pos + clStdOptionName.size()}; uint32_t majorV = 0u, minorV = 0u; char dot = 0u; ss >> majorV; ss >> dot; ss >> minorV; programOptionVersion = majorV * 10u + minorV; } if (programOptionVersion >= 20u && (false == CompilerOptions::contains(options, CompilerOptions::uniformWorkgroupSize))) { allowNonUniform = true; } } void Program::updateNonUniformFlag(const Program **inputPrograms, size_t numInputPrograms) { bool allowNonUniform = true; for (cl_uint i = 0; i < numInputPrograms; i++) { allowNonUniform = allowNonUniform && inputPrograms[i]->getAllowNonUniform(); } this->allowNonUniform = allowNonUniform; } void Program::replaceDeviceBinary(std::unique_ptr &&newBinary, size_t newBinarySize, uint32_t rootDeviceIndex) { if (isAnyPackedDeviceBinaryFormat(ArrayRef(reinterpret_cast(newBinary.get()), newBinarySize))) { this->buildInfos[rootDeviceIndex].packedDeviceBinary = std::move(newBinary); this->buildInfos[rootDeviceIndex].packedDeviceBinarySize = newBinarySize; this->buildInfos[rootDeviceIndex].unpackedDeviceBinary.reset(); this->buildInfos[rootDeviceIndex].unpackedDeviceBinarySize = 0U; if (isAnySingleDeviceBinaryFormat(ArrayRef(reinterpret_cast(this->buildInfos[rootDeviceIndex].packedDeviceBinary.get()), this->buildInfos[rootDeviceIndex].packedDeviceBinarySize))) { this->buildInfos[rootDeviceIndex].unpackedDeviceBinary = makeCopy(buildInfos[rootDeviceIndex].packedDeviceBinary.get(), buildInfos[rootDeviceIndex].packedDeviceBinarySize); this->buildInfos[rootDeviceIndex].unpackedDeviceBinarySize = buildInfos[rootDeviceIndex].packedDeviceBinarySize; } } else { this->buildInfos[rootDeviceIndex].packedDeviceBinary.reset(); this->buildInfos[rootDeviceIndex].packedDeviceBinarySize = 0U; this->buildInfos[rootDeviceIndex].unpackedDeviceBinary = std::move(newBinary); this->buildInfos[rootDeviceIndex].unpackedDeviceBinarySize = newBinarySize; } } cl_int Program::packDeviceBinary(ClDevice &clDevice) { auto rootDeviceIndex = clDevice.getRootDeviceIndex(); if (nullptr != buildInfos[rootDeviceIndex].packedDeviceBinary) { return CL_SUCCESS; } auto &rootDeviceEnvironment = *executionEnvironment.rootDeviceEnvironments[rootDeviceIndex]; if (nullptr != this->buildInfos[rootDeviceIndex].unpackedDeviceBinary.get()) { SingleDeviceBinary singleDeviceBinary = {}; singleDeviceBinary.targetDevice = NEO::getTargetDevice(rootDeviceEnvironment); singleDeviceBinary.buildOptions = this->options; singleDeviceBinary.deviceBinary = ArrayRef(reinterpret_cast(this->buildInfos[rootDeviceIndex].unpackedDeviceBinary.get()), this->buildInfos[rootDeviceIndex].unpackedDeviceBinarySize); singleDeviceBinary.intermediateRepresentation = ArrayRef(reinterpret_cast(this->irBinary.get()), this->irBinarySize); singleDeviceBinary.debugData = ArrayRef(reinterpret_cast(this->buildInfos[rootDeviceIndex].debugData.get()), this->buildInfos[rootDeviceIndex].debugDataSize); std::string packWarnings; std::string packErrors; auto packedDeviceBinary = NEO::packDeviceBinary(singleDeviceBinary, packErrors, packWarnings); if (packedDeviceBinary.empty()) { DEBUG_BREAK_IF(true); return CL_OUT_OF_HOST_MEMORY; } this->buildInfos[rootDeviceIndex].packedDeviceBinary = makeCopy(packedDeviceBinary.data(), packedDeviceBinary.size()); this->buildInfos[rootDeviceIndex].packedDeviceBinarySize = packedDeviceBinary.size(); } else if (nullptr != this->irBinary.get()) { NEO::Elf::ElfEncoder<> elfEncoder(true, true, 1U); if (deviceBuildInfos[&clDevice].programBinaryType == CL_PROGRAM_BINARY_TYPE_LIBRARY) { elfEncoder.getElfFileHeader().type = NEO::Elf::ET_OPENCL_LIBRARY; } else { elfEncoder.getElfFileHeader().type = NEO::Elf::ET_OPENCL_OBJECTS; } elfEncoder.appendSection(NEO::Elf::SHT_OPENCL_SPIRV, NEO::Elf::SectionNamesOpenCl::spirvObject, ArrayRef::fromAny(this->irBinary.get(), this->irBinarySize)); elfEncoder.appendSection(NEO::Elf::SHT_OPENCL_OPTIONS, NEO::Elf::SectionNamesOpenCl::buildOptions, this->options); auto elfData = elfEncoder.encode(); this->buildInfos[rootDeviceIndex].packedDeviceBinary = makeCopy(elfData.data(), elfData.size()); this->buildInfos[rootDeviceIndex].packedDeviceBinarySize = elfData.size(); } else { return CL_INVALID_PROGRAM; } return CL_SUCCESS; } void Program::setBuildStatus(cl_build_status status) { for (auto &deviceBuildInfo : deviceBuildInfos) { deviceBuildInfo.second.buildStatus = status; } } void Program::setBuildStatusSuccess(const ClDeviceVector &deviceVector, cl_program_binary_type binaryType) { for (const auto &device : deviceVector) { deviceBuildInfos[device].buildStatus = CL_BUILD_SUCCESS; if (deviceBuildInfos[device].programBinaryType != binaryType) { std::unique_lock lock(lockMutex); clDevicesInProgram.push_back(device); } deviceBuildInfos[device].programBinaryType = binaryType; for (const auto &subDevice : deviceBuildInfos[device].associatedSubDevices) { deviceBuildInfos[subDevice].buildStatus = CL_BUILD_SUCCESS; if (deviceBuildInfos[subDevice].programBinaryType != binaryType) { std::unique_lock lock(lockMutex); clDevicesInProgram.push_back(subDevice); } deviceBuildInfos[subDevice].programBinaryType = binaryType; } } } bool Program::containsVmeUsage(const std::vector &kernelInfos) const { for (auto kernelInfo : kernelInfos) { if (kernelInfo->isVmeUsed()) { return true; } } return false; } void Program::disableZebinIfVmeEnabled(std::string &options, std::string &internalOptions, const std::string &sourceCode) { const char *vmeOptions[] = {"cl_intel_device_side_advanced_vme_enable", "cl_intel_device_side_avc_vme_enable", "cl_intel_device_side_vme_enable"}; const char *vmeEnabledExtensions[] = {"cl_intel_motion_estimation : enable", "cl_intel_device_side_avc_motion_estimation : enable", "cl_intel_advanced_motion_estimation : enable"}; auto containsVme = [](const auto &data, const auto &patterns) { for (const auto &pattern : patterns) { auto pos = data.find(pattern); if (pos != std::string::npos) { return true; } } return false; }; if (DebugManager.flags.DontDisableZebinIfVmeUsed.get() == true) { return; } if (containsVme(options, vmeOptions) || containsVme(sourceCode, vmeEnabledExtensions)) { const auto &rootDevice = getDevices()[0]->getDevice().getRootDevice(); rootDevice->getCompilerInterface()->disableZebin(options, internalOptions); } } bool Program::isValidCallback(void(CL_CALLBACK *funcNotify)(cl_program program, void *userData), void *userData) { return funcNotify != nullptr || userData == nullptr; } void Program::invokeCallback(void(CL_CALLBACK *funcNotify)(cl_program program, void *userData), void *userData) { if (funcNotify != nullptr) { (*funcNotify)(this, userData); } } bool Program::isDeviceAssociated(const ClDevice &clDevice) const { return std::any_of(clDevices.begin(), clDevices.end(), [&](auto programDevice) { return programDevice == &clDevice; }); } cl_int Program::processInputDevices(ClDeviceVector *&deviceVectorPtr, cl_uint numDevices, const cl_device_id *deviceList, const ClDeviceVector &allAvailableDevices) { if (deviceList == nullptr) { if (numDevices == 0) { deviceVectorPtr = const_cast(&allAvailableDevices); } else { return CL_INVALID_VALUE; } } else { if (numDevices == 0) { return CL_INVALID_VALUE; } else { for (auto i = 0u; i < numDevices; i++) { auto device = castToObject(deviceList[i]); if (!device || !std::any_of(allAvailableDevices.begin(), allAvailableDevices.end(), [&](auto validDevice) { return validDevice == device; })) { return CL_INVALID_DEVICE; } deviceVectorPtr->push_back(device); } } } return CL_SUCCESS; } void Program::prependFilePathToOptions(const std::string &filename) { auto isCMCOptionUsed = CompilerOptions::contains(options, CompilerOptions::useCMCompiler); if (!filename.empty() && false == isCMCOptionUsed) { // Add "-s" flag first so it will be ignored by clang in case the options already have this flag set. options = CompilerOptions::generateSourcePath.str() + " " + CompilerOptions::wrapInQuotes(filename) + " " + options; } } const std::vector Program::internalOptionsToExtract = {CompilerOptions::gtpinRera, CompilerOptions::defaultGrf, CompilerOptions::largeGrf, CompilerOptions::autoGrf, CompilerOptions::greaterThan4gbBuffersRequired, CompilerOptions::numThreadsPerEu}; bool Program::isFlagOption(ConstStringRef option) { if (option == CompilerOptions::numThreadsPerEu) { return false; } return true; } bool Program::isOptionValueValid(ConstStringRef option, ConstStringRef value) { if (option == CompilerOptions::numThreadsPerEu) { const auto &threadCounts = clDevices[0]->getSharedDeviceInfo().threadsPerEUConfigs; if (std::find(threadCounts.begin(), threadCounts.end(), atoi(value.data())) != threadCounts.end()) { return true; } } return false; } Context &Program::getContext() const { return *context; } Context *Program::getContextPtr() const { return context; } const ClDeviceVector &Program::getDevicesInProgram() const { if (clDevicesInProgram.empty()) { return clDevices; } else { return clDevicesInProgram; } } void Program::notifyModuleCreate() { if (isBuiltIn) { return; } for (const auto &device : clDevices) { if (device->getDevice().getL0Debugger()) { auto debuggerL0 = device->getDevice().getL0Debugger(); auto rootDeviceIndex = device->getRootDeviceIndex(); auto &buildInfo = this->buildInfos[rootDeviceIndex]; auto refBin = ArrayRef(reinterpret_cast(buildInfo.unpackedDeviceBinary.get()), buildInfo.unpackedDeviceBinarySize); if (NEO::isDeviceBinaryFormat(refBin)) { createDebugZebin(rootDeviceIndex); NEO::DebugData debugData; debugData.vIsa = reinterpret_cast(buildInfo.debugData.get()); debugData.vIsaSize = static_cast(buildInfo.debugDataSize); this->debuggerInfos[rootDeviceIndex].debugElfHandle = debuggerL0->registerElf(&debugData); auto allocs = getModuleAllocations(device->getRootDeviceIndex()); debuggerL0->attachZebinModuleToSegmentAllocations(allocs, this->debuggerInfos[rootDeviceIndex].debugModuleHandle, this->debuggerInfos[rootDeviceIndex].debugElfHandle); device->getDevice().getL0Debugger()->notifyModuleLoadAllocations(&device->getDevice(), allocs); auto minGpuAddressAlloc = std::min_element(allocs.begin(), allocs.end(), [](const auto &alloc1, const auto &alloc2) { return alloc1->getGpuAddress() < alloc2->getGpuAddress(); }); this->debuggerInfos[rootDeviceIndex].moduleLoadAddress = (*minGpuAddressAlloc)->getGpuAddress(); debuggerL0->notifyModuleCreate(buildInfo.debugData.get(), static_cast(buildInfo.debugDataSize), this->debuggerInfos[rootDeviceIndex].moduleLoadAddress); } } } } void Program::notifyModuleDestroy() { if (isBuiltIn) { return; } for (const auto &device : clDevices) { if (device->getDevice().getL0Debugger()) { auto debuggerL0 = device->getDevice().getL0Debugger(); auto rootDeviceIndex = device->getRootDeviceIndex(); auto tempHandle = this->debuggerInfos[rootDeviceIndex].debugModuleHandle; if (tempHandle != 0) { debuggerL0->removeZebinModule(tempHandle); } debuggerL0->notifyModuleDestroy(this->debuggerInfos[rootDeviceIndex].moduleLoadAddress); } } } StackVec Program::getModuleAllocations(uint32_t rootIndex) { StackVec allocs; auto &kernelInfoArray = buildInfos[rootIndex].kernelInfoArray; for (const auto &kernelInfo : kernelInfoArray) { allocs.push_back(kernelInfo->getGraphicsAllocation()); } GraphicsAllocation *globalsForPatching = getGlobalSurface(rootIndex); GraphicsAllocation *constantsForPatching = getConstantSurface(rootIndex); if (globalsForPatching) { allocs.push_back(globalsForPatching); } if (constantsForPatching) { allocs.push_back(constantsForPatching); } return allocs; } } // namespace NEO