diff --git a/IGC/CMakeLists.txt b/IGC/CMakeLists.txt index ef849a05e..a5af5c444 100644 --- a/IGC/CMakeLists.txt +++ b/IGC/CMakeLists.txt @@ -3223,132 +3223,7 @@ endif() # ==================================== LLVM package ==================================================== -if(TARGET LLVMCore) - # LLVM targets have been already defined. - if(LLVM_USE_PREBUILT) - message(STATUS "[IGC] Using LLVM prebuilt ${LLVM_PACKAGE_VERSION}") - message(STATUS "[IGC] Using LLVM libraries from: ${LLVM_LIBRARY_DIR}") - else() - message(STATUS "[IGC] Using LLVM sources") - endif() - - message(STATUS "[IGC] Using LLVM includes from: ${LLVM_INCLUDE_DIRS}") -else() - - message(STATUS "[IGC] LLVM targets are not defined. Searching for LLVM.") - if(NOT IGC_PREFERRED_LLVM_VERSION) - set(IGC_PREFERRED_LLVM_VERSION "7.0.0") - endif() - - # Look for LLVM sources. - if(EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/../../llvm_patches) - set(IGC_LLVM_HOME_DIR ${CMAKE_CURRENT_SOURCE_DIR}/../../llvm_patches) - elseif(EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/../external/llvm) - set(IGC_LLVM_HOME_DIR ${CMAKE_CURRENT_SOURCE_DIR}/../external/llvm) - endif() - - if(IGC_LLVM_HOME_DIR AND NOT IGC_OPTION__FORCE_SYSTEM_LLVM) - message(STATUS "[IGC] IGC will build LLVM from sources, because ${IGC_LLVM_HOME_DIR} was found. Use IGC_OPTION__FORCE_SYSTEM_LLVM=1 to use system LLVM.") - if(IGC_OPTION__LLVM_DUMP_DIR) - set(LLVM_PREBUILT_PATH ${IGC_OPTION__LLVM_DUMP_DIR}) - include(${IGC_LLVM_HOME_DIR}/llvm_prebuilt.cmake) - else() - message(STATUS "[IGC] LLVM compilation from ${IGC_LLVM_HOME_DIR}") - # Place LLVM build directory inside IGC build directory - add_subdirectory(${IGC_LLVM_HOME_DIR} ${CMAKE_CURRENT_BINARY_DIR}/llvm/build) - endif() - else() - find_package(LLVM ${IGC_PREFERRED_LLVM_VERSION}) - if(LLVM_FOUND) - message(STATUS "[IGC] Using system LLVM ${LLVM_PACKAGE_VERSION}") - set(IGC_BUILD__USING_SYSTEM_LLVM TRUE) - else() - message(FATAL_ERROR "[IGC] Unknown location of LLVM component. Couldn't find neither LLVM package in the system nor LLVM source files.") - endif() - endif() -endif() - -if(NOT DEFINED COMMON_CLANG_LIBRARY_NAME) - set(COMMON_CLANG_LIBRARY_NAME opencl-clang) -endif() - -if(WIN32) - igc_arch_get_cpu(_cpuSuffix) - set(COMMON_CLANG_LIBRARY_NAME ${COMMON_CLANG_LIBRARY_NAME}${_cpuSuffix}) - set(COMMON_CLANG_LIB_FULL_NAME "${COMMON_CLANG_LIBRARY_NAME}${CMAKE_SHARED_LIBRARY_SUFFIX}") - - message(STATUS "OpenCL Clang library name to load: ${COMMON_CLANG_LIB_FULL_NAME}") - - set_property(DIRECTORY APPEND PROPERTY COMPILE_DEFINITIONS - COMMON_CLANG_LIB_FULL_NAME=\"${COMMON_CLANG_LIB_FULL_NAME}\" - ) -else() - set(COMMON_CLANG_LIB_FULL_NAME "lib${COMMON_CLANG_LIBRARY_NAME}${CMAKE_SHARED_LIBRARY_SUFFIX}") -endif() - -if(LLVM_LINK_LLVM_DYLIB) - # LLVM was built and configured in a way that tools (in our case IGC) should be linked - # against single LLVM dynamic library. - - # SET_LLVM_LIB_PATH is a CMake variable that can be passed in to specify the location - # to look for the LLVM .so. In some cases this is useful if multiple LLVM versions are installed - if(SET_LLVM_LIB_PATH) - set(IGC_BUILD__LLVM_LIBS_TO_LINK "${SET_LLVM_LIB_PATH}/libLLVM-${LLVM_VERSION_MAJOR}.so") - message(STATUS "[IGC] Link against specified LLVM dylib ${IGC_BUILD__LLVM_LIBS_TO_LINK}") - else() - find_library(IGC_BUILD__LLVM_LIBS_TO_LINK "libLLVM-${LLVM_VERSION_MAJOR}.so") - if(IGC_BUILD__LLVM_LIBS_TO_LINK) - message(STATUS "[IGC] Link against found LLVM dylib ${IGC_BUILD__LLVM_LIBS_TO_LINK}") - else() - message(FATAL_ERROR "[IGC] Could not find the LLVM dylib. Aborting.") - endif() - endif() - -else() - # LLVM was built into multiple libraries (static or shared). - message(STATUS "[IGC] Link against LLVM static or shared component libs") - - # Link targets/dependencies (in required link order). - # NOTE: Since the libraries are grouped in the same link group (in GCC/CLANG), - # there is no longer need to order in most dependant first manner. - set(IGC_BUILD__LLVM_LIBS_TO_LINK - "LLVMipo" - "LLVMIRReader" - "LLVMBitWriter" - "LLVMBinaryFormat" - "LLVMAsmParser" - "LLVMBitReader" - "LLVMLinker" - "LLVMCodeGen" - "LLVMScalarOpts" - "LLVMTransformUtils" - "LLVMAnalysis" - "LLVMTarget" - "LLVMObjCARCOpts" - "LLVMVectorize" - "LLVMInstrumentation" - "LLVMObject" - "LLVMMCParser" - "LLVMProfileData" - "LLVMMC" - "LLVMCore" - "LLVMSupport" - "LLVMDemangle" - ) - - if(LLVM_VERSION_MAJOR GREATER_EQUAL 8) - list(APPEND IGC_BUILD__LLVM_LIBS_TO_LINK - "LLVMInstCombine" - ) - endif() - - if(LLVM_VERSION_MAJOR GREATER_EQUAL 9) - list(APPEND IGC_BUILD__LLVM_LIBS_TO_LINK - "LLVMBitstreamReader" - ) - endif() - -endif() +include("${CMAKE_CURRENT_SOURCE_DIR}/../external/llvm/llvm.cmake") # ==================================== WrapperLLVM package ============================================= diff --git a/IGC/Compiler/tests/CMakeLists.txt b/IGC/Compiler/tests/CMakeLists.txt index ff3d665c3..798eb50e3 100644 --- a/IGC/Compiler/tests/CMakeLists.txt +++ b/IGC/Compiler/tests/CMakeLists.txt @@ -1,6 +1,6 @@ # # -if(NOT TARGET igc_opt OR LLVM_USE_PREBUILT) +if(NOT TARGET igc_opt OR ${IGC_OPTION__LLVM_PREBUILDS}) message("[check-igc] LIT tests disabled. Missing igc_opt target.") elseif(NOT BS_ENABLE_ULT) message("[check-igc] LIT tests disabled. Set `BS_ENABLE_ULT` to enable.") diff --git a/IGC/ElfPackager/CMakeLists.txt b/IGC/ElfPackager/CMakeLists.txt index 563ac9d6e..4c6823741 100644 --- a/IGC/ElfPackager/CMakeLists.txt +++ b/IGC/ElfPackager/CMakeLists.txt @@ -41,7 +41,7 @@ if (MSVC) set(CMAKE_SHARED_LINKER_FLAGS_RELEASE "${CMAKE_SHARED_LINKER_FLAGS_RELEASE} /OPT:REF /OPT:ICF") set(CMAKE_SHARED_LINKER_FLAGS_RELEASE-INTERNAL "${CMAKE_SHARED_LINKER_FLAGS_RELEASE-INTERNAL} /OPT:REF /OPT:ICF") SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /Zi") -elseif(NOT IGC_BUILD__USING_SYSTEM_LLVM) +elseif(NOT ${IGC_OPTION__LLVM_FROM_SYSTEM}) set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${LLVM_TOOLS_BINARY_DIR}) set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${LLVM_TOOLS_BINARY_DIR}) set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${LLVM_TOOLS_BINARY_DIR}) diff --git a/IGC/VectorCompiler/CMakeLists.txt b/IGC/VectorCompiler/CMakeLists.txt index 86f343eee..8300cf764 100644 --- a/IGC/VectorCompiler/CMakeLists.txt +++ b/IGC/VectorCompiler/CMakeLists.txt @@ -49,7 +49,7 @@ igc_arch_get_cpu(_cpuSuffix) set(BUILD_EXTERNAL YES) # --- LLVM --- -if(IGC_BUILD__USING_SYSTEM_LLVM OR (WIN32 AND LLVM_USE_PREBUILT)) +if(${IGC_OPTION__LLVM_FROM_SYSTEM} OR (WIN32 AND ${IGC_OPTION__LLVM_PREBUILDS})) message(STATUS "[VC] Using system llvm") # Need to search for llvm-tblgen @@ -65,7 +65,7 @@ if(IGC_BUILD__USING_SYSTEM_LLVM OR (WIN32 AND LLVM_USE_PREBUILT)) # find_package was called by igc cmake, no need to do it again. else() # Prebuilt llvm does not have tblgen... - if(LLVM_USE_PREBUILT) + if(${IGC_OPTION__LLVM_PREBUILDS}) message(FATAL_ERROR "[VC] vector compiler with prebuilt llvm is not supported") endif() diff --git a/IGC/VectorCompiler/cmake/spirv.cmake b/IGC/VectorCompiler/cmake/spirv.cmake index 4fc055794..a317cd26e 100644 --- a/IGC/VectorCompiler/cmake/spirv.cmake +++ b/IGC/VectorCompiler/cmake/spirv.cmake @@ -80,7 +80,7 @@ if(DEFINED SPIRVDLL_SRC) message(FATAL_ERROR "[VC] Cannot find SPIRVDLL sources in ${SPIRVDLL_SRC}") endif() set(SPIRV_SOURCES ${SPIRVDLL_SRC}) - if(IGC_OPTION__FORCE_SYSTEM_LLVM) + if(${IGC_OPTION__LLVM_FROM_SYSTEM}) ExternalProject_Add(SPIRVDLL_EX PREFIX ${CMAKE_CURRENT_BINARY_DIR}/SPIRVDLL @@ -100,7 +100,7 @@ if(DEFINED SPIRVDLL_SRC) INSTALL_DIR ${CMAKE_CURRENT_BINARY_DIR}/spirv-install ) - endif(IGC_OPTION__FORCE_SYSTEM_LLVM) + endif(${IGC_OPTION__LLVM_FROM_SYSTEM}) add_dependencies(SPIRVDLL_EX VCCodeGen) install(FILES @@ -158,7 +158,7 @@ else() ${SPRIV_BRANCH_PATCH} ) - if(IGC_OPTION__FORCE_SYSTEM_LLVM) + if(${IGC_OPTION__LLVM_FROM_SYSTEM}) ExternalProject_Add(SPIRVDLL_EX PREFIX ${CMAKE_CURRENT_BINARY_DIR}/SPIRVDLL @@ -178,7 +178,7 @@ else() INSTALL_DIR ${CMAKE_CURRENT_BINARY_DIR}/spirv-install ) - endif(IGC_OPTION__FORCE_SYSTEM_LLVM) + endif(${IGC_OPTION__LLVM_FROM_SYSTEM}) add_dependencies(SPIRVDLL_EX VCCodeGen) diff --git a/documentation/build_ubuntu.md b/documentation/build_ubuntu.md index eda1c650a..0be0729f1 100644 --- a/documentation/build_ubuntu.md +++ b/documentation/build_ubuntu.md @@ -50,7 +50,6 @@ $ git clone https://github.com/intel/vc-intrinsics vc-intrinsics $ git clone -b release/10.x https://github.com/llvm/llvm-project llvm-project $ git clone -b ocl-open-100 https://github.com/intel/opencl-clang llvm-project/llvm/projects/opencl-clang $ git clone -b llvm_release_100 https://github.com/KhronosGroup/SPIRV-LLVM-Translator llvm-project/llvm/projects/llvm-spirv -$ git clone https://github.com/intel/llvm-patches llvm_patches ``` These commands will set up a workspace with LLVM 10. If you wish to use any other version please refer to the [component revision table](#Revision-table) @@ -59,7 +58,6 @@ Correct directory tree looks like this: |- igc https://github.com/intel/intel-graphics-compiler |- vc-intrinsics https://github.com/intel/vc-intrinsics - |- llvm_patches https://github.com/intel/llvm-patches |- llvm-project https://github.com/llvm/llvm-project |- llvm/projects/opencl-clang https://github.com/intel/opencl-clang |- llvm/projects/llvm-spirv https://github.com/KhronosGroup/SPIRV-LLVM-Translator @@ -173,7 +171,6 @@ Latest known configuration that compiles successfully: |-|-|-| | igc | master | 7d11ff43f42564fdfe2753b4d008abfd56ec9671 | | vc-intrinsics | master | eabcd2022cf868a658b257b8ea6ad62acbbe7dc5 | -| llvm_patches | master | d8b63ab67d688db9e60bca469d58f6aa3ec6b2a1 | | llvm-project | release/11.x | llvmorg-11.0.0 | | opencl-clang | ocl-open-110 | cdacb8a1dba95e8ebc5d948c0e0e574f87b1e861 | | SPIRV-LLVM-Translator | llvm_release_110 | d6dc999eee381158a26f048a333467c9ce7e77f2 | diff --git a/external/llvm/CMakeLists.txt b/external/llvm/CMakeLists.txt new file mode 100644 index 000000000..632813240 --- /dev/null +++ b/external/llvm/CMakeLists.txt @@ -0,0 +1,392 @@ +#===================== begin_copyright_notice ================================== + +#Copyright (c) 2017 Intel Corporation + +#Permission is hereby granted, free of charge, to any person obtaining a +#copy of this software and associated documentation files (the +#"Software"), to deal in the Software without restriction, including +#without limitation the rights to use, copy, modify, merge, publish, +#distribute, sublicense, and/or sell copies of the Software, and to +#permit persons to whom the Software is furnished to do so, subject to +#the following conditions: + +#The above copyright notice and this permission notice shall be included +#in all copies or substantial portions of the Software. + +#THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS +#OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +#MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +#IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY +#CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +#TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE +#SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + +#======================= end_copyright_notice ================================== + +# +# LLVM module +# + +cmake_minimum_required(VERSION 3.4.3) + +if(BS_USE_OSDM_BUILD_SYSTEM) + include(${BUILD_SYS_INC}/utils.cmake) + bs_find_patch() + bs_find_python3() +else() + if (NOT PATCH) + find_program(PATCH NAMES patch patch.exe) + endif() + if (NOT PYTHON) + find_program(PYTHON NAMES python3 python python3.exe python.exe) + endif() +endif() + +include(llvm_utils.cmake) + +if(NOT WIN32) + set(LLVM_TARGETS_TO_BUILD "X86" CACHE STRING "desc" FORCE) +else() + set(LLVM_TARGETS_TO_BUILD "" CACHE STRING "desc" FORCE) +endif() +set(LLVM_BUILD_TOOLS true CACHE BOOL "desc" FORCE) +set(LLVM_INCLUDE_TOOLS true CACHE BOOL "desc" FORCE) +set(LLVM_INSTALL_UTILS false CACHE BOOL "desc" FORCE) +set(LLVM_INCLUDE_UTILS true CACHE BOOL "desc" FORCE) # required to run LIT tests +set(LLVM_BUILD_UTILS false CACHE BOOL "desc" FORCE) +set(LLVM_BUILD_EXAMPLES false CACHE BOOL "desc" FORCE) +set(LLVM_INCLUDE_EXAMPLES false CACHE BOOL "desc" FORCE) +set(LLVM_BUILD_TESTS false CACHE BOOL "desc" FORCE) +set(LLVM_INCLUDE_TESTS false CACHE BOOL "desc" FORCE) +set(LLVM_APPEND_VC_REV false CACHE BOOL "desc" FORCE) +set(LLVM_ENABLE_THREADS true CACHE BOOL "desc" FORCE) +set(LLVM_ENABLE_CXX1Y false CACHE BOOL "desc" FORCE) +set(LLVM_ENABLE_PIC true CACHE BOOL "desc" FORCE) +set(LLVM_ENABLE_WARNINGS true CACHE BOOL "desc" FORCE) +set(LLVM_ENABLE_PEDANTIC true CACHE BOOL "desc" FORCE) +set(LLVM_ENABLE_WERROR false CACHE BOOL "desc" FORCE) +set(LLVM_ABI_BREAKING_CHECKS FORCE_OFF CACHE BOOL "desc" FORCE) +set(LLVM_BUILD_RUNTIME true CACHE BOOL "desc" FORCE) +set(LLVM_ENABLE_TERMINFO false CACHE BOOL "desc" FORCE) +set(LLVM_ENABLE_DUMP true CACHE BOOL "desc" FORCE) +set(LLVM_BUILD_LLVM_DYLIB false CACHE BOOL "desc" FORCE) +set(LLVM_LINK_LLVM_DYLIB false CACHE BOOL "desc" FORCE) +set(LLVM_ENABLE_PROJECTS "clang" CACHE STRING "desc" FORCE) + +if(DEFINED BUILD_PACKAGE) + if(NOT WIN32) + set(LLVM_BUILD_LLVM_DYLIB true CACHE BOOL "desc" FORCE) + set(LLVM_LINK_LLVM_DYLIB true CACHE BOOL "desc" FORCE) + endif() + set(LLVM_INSTALL_UTILS true CACHE BOOL "desc" FORCE) + set(LLVM_BUILD_UTILS true CACHE BOOL "desc" FORCE) +endif() + + +set(LLVM_OPTIONS + "-DLLVM_TARGETS_TO_BUILD=${LLVM_TARGETS_TO_BUILD}" + "-DLLVM_BUILD_TOOLS=${LLVM_BUILD_TOOLS}" + "-DLLVM_INCLUDE_TOOLS=${LLVM_INCLUDE_TOOLS}" + "-DLLVM_INSTALL_UTILS=${LLVM_INSTALL_UTILS}" + "-DLLVM_INCLUDE_UTILS=${LLVM_INCLUDE_UTILS}" + "-DLLVM_BUILD_UTILS=${LLVM_BUILD_UTILS}" + "-DLLVM_BUILD_EXAMPLES=${LLVM_BUILD_EXAMPLES}" + "-DLLVM_INCLUDE_EXAMPLES=${LLVM_INCLUDE_EXAMPLES}" + "-DLLVM_BUILD_TESTS=${LLVM_BUILD_TESTS}" + "-DLLVM_INCLUDE_TESTS=${LLVM_INCLUDE_TESTS}" + "-DLLVM_APPEND_VC_REV=${LLVM_APPEND_VC_REV}" + "-DLLVM_ENABLE_THREADS=${LLVM_ENABLE_THREADS}" + "-DLLVM_ENABLE_CXX1Y=${LLVM_ENABLE_CXX1Y}" + "-DLLVM_ENABLE_PIC=${LLVM_ENABLE_PIC}" + "-DLLVM_ENABLE_WARNINGS=${LLVM_ENABLE_WARNINGS}" + "-DLLVM_ENABLE_PEDANTIC=${LLVM_ENABLE_PEDANTIC}" + "-DLLVM_ENABLE_WERROR=${LLVM_ENABLE_WERROR}" + "-DLLVM_ABI_BREAKING_CHECKS=${LLVM_ABI_BREAKING_CHECKS}" + "-DLLVM_BUILD_RUNTIME=${LLVM_BUILD_RUNTIME}" + "-DLLVM_ENABLE_TERMINFO=${LLVM_ENABLE_TERMINFO}" + "-DLLVM_ENABLE_DUMP=${LLVM_ENABLE_DUMP}" + "-DLLVM_BUILD_LLVM_DYLIB=${LLVM_BUILD_LLVM_DYLIB}" + "-DLLVM_LINK_LLVM_DYLIB=${LLVM_LINK_LLVM_DYLIB}" + "-DLLVM_ENABLE_PROJECTS=${LLVM_ENABLE_PROJECTS}" +) + +if(DEFINED BUILD_PACKAGE) + set(LLVM_OPTIONS + "${LLVM_OPTIONS}" + "-DCMAKE_INSTALL_PREFIX=${CMAKE_INSTALL_PREFIX}") +endif() + + +if(_ASSERTBUILD) + set(LLVM_ENABLE_ASSERTIONS true CACHE STRING "desc" FORCE) + set(LLVM_OPTIONS + "${LLVM_OPTIONS}" + "-DLLVM_ENABLE_ASSERTIONS=${LLVM_ENABLE_ASSERTIONS}" + ) +endif() + +if(WIN32) + set(LLVM_USE_CRT_DEBUG MTd CACHE STRING "desc" FORCE) + set(LLVM_USE_CRT_RELEASE MT CACHE STRING "desc" FORCE) + set(LLVM_USE_CRT_RELEASEINTERNAL MT CACHE STRING "desc" FORCE) + set(LLVM_OPTIONS + "${LLVM_OPTIONS}" + "-DLLVM_USE_CRT_DEBUG=${LLVM_USE_CRT_DEBUG}" + "-DLLVM_USE_CRT_RELEASE=${LLVM_USE_CRT_RELEASE}" + "-DLLVM_USE_CRT_RELEASEINTERNAL=${LLVM_USE_CRT_RELEASEINTERNAL}" + ) +else() + set(LLVM_ENABLE_EH true CACHE BOOL "desc" FORCE) + set(LLVM_ENABLE_RTTI true CACHE BOOL "desc" FORCE) + if ("${ARCH}" STREQUAL "32") + set(LLVM_BUILD_32_BITS true CACHE BOOL "desc" FORCE) + else() + set(LLVM_BUILD_32_BITS false CACHE BOOL "desc" FORCE) + endif() + set(LLVM_OPTIONS + "${LLVM_OPTIONS}" + "-DLLVM_ENABLE_EH=${LLVM_ENABLE_EH}" + "-DLLVM_ENABLE_RTTI=${LLVM_ENABLE_RTTI}" + "-DLLVM_BUILD_32_BITS=${LLVM_BUILD_32_BITS}" + ) +endif() + +if(COMMON_CLANG_LIBRARY_NAME) + set(LLVM_OPTIONS + "${LLVM_OPTIONS}" + "-DCOMMON_CLANG_LIBRARY_NAME=${COMMON_CLANG_LIBRARY_NAME}" + ) +endif() + +if((IGC_OPTION__LLVM_SOURCES_DIR MATCHES "llvm-project$") OR DEFINED BUILD_PACKAGE) + if(EXISTS ${IGC_OPTION__LLVM_SOURCES_DIR}) + message(STATUS "[LLVM_PATCHER] : Manual read LLVM version from llvm-project/llvm/CMakeLists.txt file") + file(STRINGS ${IGC_OPTION__LLVM_SOURCES_DIR}/llvm/CMakeLists.txt LLVM_VERSION_MAJOR_MATCH REGEX "set\\(LLVM_VERSION_MAJOR [0-9]+\\)") + file(STRINGS ${IGC_OPTION__LLVM_SOURCES_DIR}/llvm/CMakeLists.txt LLVM_VERSION_MINOR_MATCH REGEX "set\\(LLVM_VERSION_MINOR [0-9]+\\)") + file(STRINGS ${IGC_OPTION__LLVM_SOURCES_DIR}/llvm/CMakeLists.txt LLVM_VERSION_PATCH_MATCH REGEX "set\\(LLVM_VERSION_PATCH [0-9]+\\)") + + string(REGEX MATCH "[0-9]+" LLVM_VERSION_MAJOR ${LLVM_VERSION_MAJOR_MATCH}) + string(REGEX MATCH "[0-9]+" LLVM_VERSION_MINOR ${LLVM_VERSION_MINOR_MATCH}) + string(REGEX MATCH "[0-9]+" LLVM_VERSION_PATCH ${LLVM_VERSION_PATCH_MATCH}) + + set(IGC_OPTION__LLVM_PREFERRED_VERSION "${LLVM_VERSION_MAJOR}.${LLVM_VERSION_MINOR}.${LLVM_VERSION_PATCH}" PARENT_SCOPE) + + message(STATUS "[LLVM_PATCHER] : In folder ${LLVM_SOURCE_URL} found LLVM version : ${IGC_OPTION__LLVM_PREFERRED_VERSION}") + endif() +endif() + +# Use LLVM stock sources or patched stock sources +if(${IGC_OPTION__LLVM_STOCK_SOURCES}) + set(LLVM_APPLY_PATCHES false) +else() + set(LLVM_APPLY_PATCHES true) +endif() + +if(NOT LLVM_SOURCE_DIR) + set(LLVM_SOURCE_DIR "${CMAKE_CURRENT_BINARY_DIR}/../src/llvm") +endif() + +message(STATUS "[LLVM_PATCHER] : LLVM_SOURCE_URL = ${IGC_OPTION__LLVM_SOURCES_DIR}") +message(STATUS "[LLVM_PATCHER] : LLVM_OPTIONS = ${LLVM_OPTIONS}") + +if(NOT EXISTS ${LLVM_SOURCE_DIR}) + # Copy stock LLVM sources to LLVM_SOURCE_DIR + message(STATUS "[LLVM_PATCHER] : Copying stock LLVM and CLANG sources ${LLVM_SOURCE_URL} to ${LLVM_SOURCE_DIR}/../") + message(STATUS "[LLVM_PATCHER] : Copying stock LLVM and CLANG sources ${IGC_OPTION__LLVM_SOURCES_DIR} to ${LLVM_SOURCE_DIR}") + execute_process(COMMAND ${CMAKE_COMMAND} -E copy_directory ${IGC_OPTION__LLVM_SOURCES_DIR}/.git ${LLVM_SOURCE_DIR}/../.git) + execute_process(COMMAND ${CMAKE_COMMAND} -E copy_directory ${IGC_OPTION__LLVM_SOURCES_DIR}/clang ${LLVM_SOURCE_DIR}/../clang) + execute_process(COMMAND ${CMAKE_COMMAND} -E copy_directory ${IGC_OPTION__LLVM_SOURCES_DIR}/llvm ${LLVM_SOURCE_DIR}) + + FILE(GLOB dirsPatch RELATIVE ${CMAKE_CURRENT_SOURCE_DIR}/releases ${CMAKE_CURRENT_SOURCE_DIR}/releases/*) + list(SORT dirsPatch) + list(REVERSE dirsPatch) + + if(NOT LLVM_VERSION_MAJOR) + string(REGEX MATCH "[0-9]+" LLVM_VERSION_MAJOR ${IGC_OPTION__LLVM_PREFERRED_VERSION}) + endif() + + if(LLVM_APPLY_PATCHES) + # Customization patches will be applied if present. + foreach(dirPatch ${dirsPatch}) + #Apply all patches for given major version of LLVM + string(REGEX MATCH "[0-9]+" LLVM_VER_MAJOR_FOLDER ${dirPatch}) + if(NOT LLVM_VER_MAJOR_FOLDER) + continue() + endif() + if(NOT ${LLVM_VERSION_MAJOR} MATCHES ${LLVM_VER_MAJOR_FOLDER} ) + continue() + endif() + message(STATUS "[LLVM_PATCHER] : Applying patches for LLVM from version ${dirPatch}") + + file(GLOB LLVM_PATCH_FILES + "${CMAKE_CURRENT_SOURCE_DIR}/releases/${dirPatch}/patches_external/*.patch" + ) + # Sort list of patch files. + if(LLVM_PATCH_FILES) + list(SORT LLVM_PATCH_FILES) + endif() + # Apply customization patches if any. + foreach(patch_file ${LLVM_PATCH_FILES}) + message(STATUS "[LLVM_PATCHER] : Apply ${patch_file} file") + execute_process(COMMAND ${PATCH} -d ${LLVM_SOURCE_DIR} -p1 -i ${patch_file} RESULT_VARIABLE rv) + if(NOT rv EQUAL 0) + message(FATAL_ERROR "[LLVM_PATCHER] : error: applying patch '${patch_file}' failed") + endif() + endforeach() + endforeach() + endif() +endif() + + +if(CMAKE_CONFIGURATION_TYPES) + # Multi-configuration generator. + set (CMAKE_CONFIGURATION_TYPES "Debug" "Release" "ReleaseInternal") + message(STATUS "[LLVM_PATCHER] : LLVM_CONFIGURATION_TYPES = ${CMAKE_CONFIGURATION_TYPES}") +else() + # Single-configuration generator. + message(STATUS "[LLVM_PATCHER] : LLVM_BUILD_TYPE = ${CMAKE_BUILD_TYPE}") +endif() + +# Set python interpreter for LLVM +llvm_utils_python_set() +# Do not alter LLVM compilation flags +llvm_utils_push_build_flags() +if(DEFINED BUILD_PACKAGE) + add_subdirectory(${LLVM_SOURCE_DIR} ${CMAKE_CURRENT_BINARY_DIR}/src) +else() + add_subdirectory(${LLVM_SOURCE_DIR} ${CMAKE_CURRENT_BINARY_DIR}/src EXCLUDE_FROM_ALL) +endif() +llvm_utils_pop_build_flags() +llvm_utils_python_restore() + +# Get version of LLVM that we are currently using: +get_directory_property(LLVM_VERSION_MAJOR DIRECTORY ${LLVM_SOURCE_DIR} DEFINITION "LLVM_VERSION_MAJOR") +get_directory_property(LLVM_VERSION_MINOR DIRECTORY ${LLVM_SOURCE_DIR} DEFINITION "LLVM_VERSION_MINOR") +get_directory_property(LLVM_VERSION_PATCH DIRECTORY ${LLVM_SOURCE_DIR} DEFINITION "LLVM_VERSION_PATCH") +get_directory_property(LLVM_VERSION_SUFFIX DIRECTORY ${LLVM_SOURCE_DIR} DEFINITION "LLVM_VERSION_SUFFIX") +# Make LLVM version accessible in parent scope: +set(LLVM_VERSION_MAJOR ${LLVM_VERSION_MAJOR} PARENT_SCOPE) +set(LLVM_VERSION_MINOR ${LLVM_VERSION_MINOR} PARENT_SCOPE) +set(LLVM_VERSION_PATCH ${LLVM_VERSION_PATCH} PARENT_SCOPE) +set(LLVM_VERSION_SUFFIX ${LLVM_VERSION_SUFFIX} PARENT_SCOPE) + +set(PACKAGE_VERSION "${LLVM_VERSION_MAJOR}.${LLVM_VERSION_MINOR}.${LLVM_VERSION_PATCH}${LLVM_VERSION_SUFFIX}") + +if(NOT (${PACKAGE_VERSION} EQUAL ${IGC_OPTION__LLVM_PREFERRED_VERSION})) + message(FATAL_ERROR "[LLVM_PATCHER] : Expected LLVM version ${IGC_OPTION__LLVM_PREFERRED_VERSION} but found ${PACKAGE_VERSION}.") +endif() + +set(LLVM_INCLUDE_DIRS "${LLVM_SOURCE_DIR}/include;${CMAKE_CURRENT_BINARY_DIR}/src/include") +set(LLVM_INCLUDE_DIRS ${LLVM_INCLUDE_DIRS} PARENT_SCOPE) + +if(DEFINED BUILD_PACKAGE) + if(UNIX) + if(EXISTS "/etc/debian_version") + set(CPACK_GENERATOR "DEB") + elseif(EXISTS "/etc/redhat-release") + set(CPACK_GENERATOR "RPM") + else() + set(CPACK_GENERATOR "TXZ") + set(CPACK_SET_DESTDIR ON) + endif() + + set(CPACK_DEBIAN_PACKAGE_MAINTAINER "intel") + set(CPACK_PACKAGE_ARCHITECTURE "x86_64") + set(CPACK_PACKAGE_NAME "intel") + set(CPACK_PACKAGE_VERSION_MAJOR ${LLVM_VERSION_MAJOR}) + set(CPACK_PACKAGE_VERSION_MINOR ${LLVM_VERSION_MINOR}) + set(CPACK_PACKAGE_VERSION_PATCH ${LLVM_VERSION_PATCH}) + set(CPACK_PACKAGE_VERSION_SUFFIX ${LLVM_VERSION_SUFFIX}) + set(CPACK_PACKAGE_VERSION "${CPACK_PACKAGE_VERSION_MAJOR}.${CPACK_PACKAGE_VERSION_MINOR}.${CPACK_PACKAGE_VERSION_PATCH}${CPACK_PACKAGE_VERSION_SUFFIX}") + set(CPACK_PACKAGE_INSTALL_DIRECTORY ${IGC_INSTALL_TIME_ROOT_DIR}) + set(CPACK_DEBIAN_PACKAGE_DESCRIPTION "LLVM patched library version for Intel") + set(CPACK_DEBIAN_PACKAGE_SHLIBDEPS ON) + set(CPACK_DEBIAN_PACKAGE_ARCHITECTURE "amd64") + set(CPACK_RPM_PACKAGE_RELOCATABLE TRUE) + set(CPACK_RPM_PACKAGE_ARCHITECTURE "x86_64") + + if(DEFINED LLVM_PACKAGE_RELEASE) + set(CPACK_DEBIAN_PACKAGE_RELEASE ${LLVM_PACKAGE_RELEASE}) + set(CPACK_RPM_PACKAGE_RELEASE ${LLVM_PACKAGE_RELEASE}) + else() + set(CPACK_DEBIAN_PACKAGE_RELEASE 1) + set(CPACK_RPM_PACKAGE_RELEASE 1) + endif() + + set(CPACK_DEBIAN_INTEL-LLVM-PATCHED-LIBRARY_FILE_NAME "intel-llvm-patched-library_${CPACK_PACKAGE_VERSION}-${CPACK_DEBIAN_PACKAGE_RELEASE}_${CPACK_DEBIAN_PACKAGE_ARCHITECTURE}.deb") + set(CPACK_DEBIAN_INTEL-LLVM-PATCHED-LIBRARY-DEVEL_FILE_NAME "intel-llvm-patched-library-devel_${CPACK_PACKAGE_VERSION}-${CPACK_DEBIAN_PACKAGE_RELEASE}_${CPACK_DEBIAN_PACKAGE_ARCHITECTURE}.deb") + + set(CPACK_RPM_INTEL-LLVM-PATCHED-LIBRARY_FILE_NAME "intel-llvm-patched-library-${CPACK_PACKAGE_VERSION}-${CPACK_RPM_PACKAGE_RELEASE}.${CPACK_RPM_PACKAGE_ARCHITECTURE}.rpm") + set(CPACK_RPM_INTEL-LLVM-PATCHED-LIBRARY-DEVEL_FILE_NAME "intel-llvm-patched-library-devel-${CPACK_PACKAGE_VERSION}-${CPACK_RPM_PACKAGE_RELEASE}.${CPACK_RPM_PACKAGE_ARCHITECTURE}.rpm") + + set(CPACK_ARCHIVE_INTEL-LLVM-PATCHED-LIBRARY_FILE_NAME "intel-llvm-patched-library-${CPACK_PACKAGE_VERSION}-${CPACK_DEBIAN_PACKAGE_RELEASE}.${CPACK_PACKAGE_ARCHITECTURE}") + set(CPACK_ARCHIVE_INTEL-LLVM-PATCHED-LIBRARY-DEVEL_FILE_NAME "intel-llvm-patched-library-devel-${CPACK_PACKAGE_VERSION}-${CPACK_DEBIAN_PACKAGE_RELEASE}.${CPACK_PACKAGE_ARCHITECTURE}") + + set(CPACK_DEB_COMPONENT_INSTALL ON) + set(CPACK_RPM_COMPONENT_INSTALL ON) + set(CPACK_ARCHIVE_COMPONENT_INSTALL ON) + set(CPACK_COMPONENTS_ALL intel-llvm-patched-library intel-llvm-patched-library-devel) + + include(CPack) + include(GNUInstallDirs) + + install(FILES $ DESTINATION ${CMAKE_INSTALL_FULL_LIBDIR} COMPONENT intel-llvm-patched-library) + set(BUILD_SRC_DIR_LLVM ${LLVM_SOURCE_DIR}/../build/src) + + install(DIRECTORY ${BUILD_SRC_DIR_LLVM}/lib/cmake/llvm + DESTINATION ${CMAKE_INSTALL_FULL_LIBDIR}/cmake + COMPONENT intel-llvm-patched-library + FILES_MATCHING PATTERN *.cmake + PATTERN .svn EXCLUDE + PATTERN LLVMConfig.cmake EXCLUDE + PATTERN LLVMConfigVersion.cmake EXCLUDE + PATTERN LLVM-Config.cmake EXCLUDE + PATTERN LLVMExports.cmake EXCLUDE + PATTERN GetHostTriple.cmake EXCLUDE) + + install(FILES + ${BUILD_SRC_DIR_LLVM}/cmake/modules/CMakeFiles/LLVMConfig.cmake + ${BUILD_SRC_DIR_LLVM}/lib/cmake/llvm/LLVMConfigVersion.cmake + ${LLVM_SOURCE_DIR}/cmake/modules/LLVM-Config.cmake + ${BUILD_SRC_DIR_LLVM}/cmake/modules/CMakeFiles/Export/lib/cmake/llvm/LLVMExports.cmake + DESTINATION ${CMAKE_INSTALL_FULL_LIBDIR}/cmake/llvm + COMPONENT intel-llvm-patched-library) + + set(BIN_PATH ${BUILD_SRC_DIR_LLVM}/bin) + install(FILES + ${BIN_PATH}/llvm-link + ${BIN_PATH}/llvm-config + ${BIN_PATH}/llvm-tblgen + ${BIN_PATH}/not + ${BIN_PATH}/FileCheck + ${BIN_PATH}/count + DESTINATION ${CMAKE_INSTALL_FULL_BINDIR} + COMPONENT intel-llvm-patched-library + ) + + install(DIRECTORY ${LLVM_SOURCE_DIR}/include/llvm ${LLVM_SOURCE_DIR}/include/llvm-c DESTINATION ${CMAKE_INSTALL_FULL_INCLUDEDIR} + COMPONENT intel-llvm-patched-library-devel + FILES_MATCHING + PATTERN "*.def" + PATTERN "*.h" + PATTERN "*.td" + PATTERN "*.inc" + PATTERN "LICENSE.TXT" + PATTERN ".svn" EXCLUDE) + + set(LLVM_INCLUDE_DIR ${CMAKE_CURRENT_BINARY_DIR}/src/include) + + install(DIRECTORY ${LLVM_INCLUDE_DIR}/llvm ${LLVM_INCLUDE_DIR}/llvm-c DESTINATION ${CMAKE_INSTALL_FULL_INCLUDEDIR} + COMPONENT intel-llvm-patched-library-devel + FILES_MATCHING + PATTERN "*.def" + PATTERN "*.h" + PATTERN "*.gen" + PATTERN "*.inc" + PATTERN "CMakeFiles" EXCLUDE + PATTERN "config.h" EXCLUDE + PATTERN ".svn" EXCLUDE) + + endif() +endif() \ No newline at end of file diff --git a/external/llvm/llvm.cmake b/external/llvm/llvm.cmake new file mode 100644 index 000000000..d7bff8f22 --- /dev/null +++ b/external/llvm/llvm.cmake @@ -0,0 +1,212 @@ +#===================== begin_copyright_notice ================================== + +#Copyright (c) 2017 Intel Corporation + +#Permission is hereby granted, free of charge, to any person obtaining a +#copy of this software and associated documentation files (the +#"Software"), to deal in the Software without restriction, including +#without limitation the rights to use, copy, modify, merge, publish, +#distribute, sublicense, and/or sell copies of the Software, and to +#permit persons to whom the Software is furnished to do so, subject to +#the following conditions: + +#The above copyright notice and this permission notice shall be included +#in all copies or substantial portions of the Software. + +#THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS +#OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +#MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +#IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY +#CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +#TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE +#SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + +#======================= end_copyright_notice ================================== + +# Order of choosing way how to take LLVM +#1. IGC_OPTION__LLVM_SOURCES - use llvm sources to build +# IGC_OPTION__LLVM_STOCK_SOURCES - use LLVM stock sources or patched stock sources // By default False +# IGC_OPTION__LLVM_SOURCES_DIR - set path to llvm sources folder +#2. IGC_OPTION__LLVM_PREBUILDS - use prebuilt llvm toolchain +# IGC_OPTION__LLVM_PREBUILDS_DIR - set path to prebuilt llvm folder +#3. IGC_OPTION__LLVM_FROM_SYSTEM - use LLVM from system + +# IGC_OPTION__LLVM_PREFERRED_VERSION - define which version of llvm to pick, ex. "7.1.0" // By default 10.0.0 + +### Check if user manual setup some of flag +if(NOT IGC_OPTION__LLVM_SOURCES) + set(IGC_OPTION__LLVM_SOURCES FALSE) +elseif(${IGC_OPTION__LLVM_SOURCES}) + set(IGC_OPTION__LLVM_PREBUILDS FALSE) + set(IGC_OPTION__LLVM_FROM_SYSTEM FALSE) +endif() + +if(NOT IGC_OPTION__LLVM_PREBUILDS) + set(IGC_OPTION__LLVM_PREBUILDS FALSE) +elseif(${IGC_OPTION__LLVM_PREBUILDS}) + set(IGC_OPTION__LLVM_FROM_SYSTEM FALSE) +endif() + +if(NOT IGC_OPTION__LLVM_FROM_SYSTEM) + set(IGC_OPTION__LLVM_FROM_SYSTEM FALSE) +endif() +### + +if(NOT IGC_OPTION__LLVM_STOCK_SOURCES) + set(IGC_OPTION__LLVM_STOCK_SOURCES FALSE) +endif() + +set(IGC_LLVM_TOOLS_DIR ${CMAKE_CURRENT_SOURCE_DIR}/../external/llvm) + +### Get preferred version of LLVM ### +if(NOT IGC_OPTION__LLVM_PREFERRED_VERSION) + include(${IGC_LLVM_TOOLS_DIR}/llvm_preferred_version.cmake) +endif() + +set(IGC_LOOKING_FOR_LLVM TRUE) + +### Check by order first available way to link with LLVM ### +if(${IGC_OPTION__LLVM_SOURCES} OR (NOT ${IGC_OPTION__LLVM_PREBUILDS} AND NOT ${IGC_OPTION__LLVM_FROM_SYSTEM})) + ### Get LLVM source code path + if(NOT IGC_OPTION__LLVM_SOURCES_DIR) + include(${IGC_LLVM_TOOLS_DIR}/llvm_source_path.cmake) + endif() + + if(NOT EXISTS ${IGC_OPTION__LLVM_SOURCES_DIR}) + if(${IGC_OPTION__LLVM_SOURCES}) + # User wants to build from sources, but we couldn't find them. + message(FATAL_ERROR "[IGC] : Cannot find LLVM sources, please provide sources path by IGC_OPTION__LLVM_SOURCES_DIR flag.") + endif() + else() + set(IGC_LOOKING_FOR_LLVM FALSE) + set(IGC_OPTION__LLVM_SOURCES TRUE) + endif() +endif() +if(${IGC_LOOKING_FOR_LLVM} AND (${IGC_OPTION__LLVM_PREBUILDS} OR (NOT ${IGC_OPTION__LLVM_FROM_SYSTEM}))) + ### Get LLVM prebuilts path + if(NOT IGC_OPTION__LLVM_PREBUILDS_DIR) + include(${IGC_LLVM_TOOLS_DIR}/llvm_prebuilt_path.cmake) + endif() + + if(NOT EXISTS ${IGC_OPTION__LLVM_PREBUILDS_DIR}) + if(${IGC_OPTION__LLVM_PREBUILDS}) + # User wants to build from prebuilts, but we couldn't find them. + message(FATAL_ERROR "[IGC] : Cannot find LLVM prebuilts, please provide path by IGC_OPTION__LLVM_PREBUILDS_DIR flag.") + endif() + else() + set(IGC_LOOKING_FOR_LLVM FALSE) + set(IGC_OPTION__LLVM_PREBUILDS TRUE) + endif() +endif() +if(${IGC_LOOKING_FOR_LLVM}) + # Try to find the LLVM in the system + find_package(LLVM ${IGC_OPTION__LLVM_PREFERRED_VERSION}) + + if(LLVM_FOUND) + set(IGC_OPTION__LLVM_FROM_SYSTEM TRUE) + else() + if(${IGC_OPTION__LLVM_FROM_SYSTEM}) + # User wants to build using LLVM from system, but we couldn't find them. + message(FATAL_ERROR "[IGC] : Cannot find LLVM in system in version ${IGC_OPTION__LLVM_PREFERRED_VERSION}. Please provide other version by flag IGC_OPTION__LLVM_PREFERRED_VERSION or install the missing one.") + else() + message(FATAL_ERROR "[IGC] : Cannot find LLVM sources, prebuilt libraries or even installed package in system. Please provide LLVM.") + endif() + endif() +endif() +### + +if(NOT DEFINED COMMON_CLANG_LIBRARY_NAME) + set(COMMON_CLANG_LIBRARY_NAME opencl-clang) +endif() + +if(WIN32) + igc_arch_get_cpu(_cpuSuffix) + set(COMMON_CLANG_LIBRARY_NAME ${COMMON_CLANG_LIBRARY_NAME}${_cpuSuffix}) + set(COMMON_CLANG_LIB_FULL_NAME "${COMMON_CLANG_LIBRARY_NAME}${CMAKE_SHARED_LIBRARY_SUFFIX}") + + message(STATUS "OpenCL Clang library name to load: ${COMMON_CLANG_LIB_FULL_NAME}") + + set_property(DIRECTORY APPEND PROPERTY COMPILE_DEFINITIONS + COMMON_CLANG_LIB_FULL_NAME=\"${COMMON_CLANG_LIB_FULL_NAME}\" + ) +else() + set(COMMON_CLANG_LIB_FULL_NAME "lib${COMMON_CLANG_LIBRARY_NAME}${CMAKE_SHARED_LIBRARY_SUFFIX}") +endif() + +if(${IGC_OPTION__LLVM_SOURCES}) + message(STATUS "[IGC] IGC will build LLVM from sources.") + message(STATUS "[IGC] LLVM sources folder : ${IGC_OPTION__LLVM_SOURCES_DIR}") + message(STATUS "[IGC] LLVM sources in stock version : ${IGC_OPTION__LLVM_STOCK_SOURCES}") + add_subdirectory(${IGC_LLVM_TOOLS_DIR} ${CMAKE_CURRENT_BINARY_DIR}/llvm/build) +elseif(${IGC_OPTION__LLVM_PREBUILDS}) + message(STATUS "[IGC] IGC will take LLVM prebuilts.") + message(STATUS "[IGC] LLVM prebuilts folder : ${IGC_OPTION__LLVM_PREBUILDS_DIR}") + include(${IGC_LLVM_PATCHER_DIR}/llvm_prebuilt.cmake) +elseif(${IGC_OPTION__LLVM_FROM_SYSTEM}) + message(STATUS "[IGC] IGC will take LLVM from system.") +endif() + +if(LLVM_LINK_LLVM_DYLIB) + # LLVM was built and configured in a way that tools (in our case IGC) should be linked + # against single LLVM dynamic library. + + # SET_LLVM_LIB_PATH is a CMake variable that can be passed in to specify the location + # to look for the LLVM .so. In some cases this is useful if multiple LLVM versions are installed + if(SET_LLVM_LIB_PATH) + set(IGC_BUILD__LLVM_LIBS_TO_LINK "${SET_LLVM_LIB_PATH}/libLLVM-${LLVM_VERSION_MAJOR}.so") + message(STATUS "[IGC] Link against specified LLVM dylib ${IGC_BUILD__LLVM_LIBS_TO_LINK}") + else() + find_library(IGC_BUILD__LLVM_LIBS_TO_LINK "libLLVM-${LLVM_VERSION_MAJOR}.so") + if(IGC_BUILD__LLVM_LIBS_TO_LINK) + message(STATUS "[IGC] Link against found LLVM dylib ${IGC_BUILD__LLVM_LIBS_TO_LINK}") + else() + message(FATAL_ERROR "[IGC] Could not find the LLVM dylib. Aborting.") + endif() + endif() + +else() + # LLVM was built into multiple libraries (static or shared). + message(STATUS "[IGC] Link against LLVM static or shared component libs") + + # Link targets/dependencies (in required link order). + # NOTE: Since the libraries are grouped in the same link group (in GCC/CLANG), + # there is no longer need to order in most dependant first manner. + set(IGC_BUILD__LLVM_LIBS_TO_LINK + "LLVMipo" + "LLVMIRReader" + "LLVMBitWriter" + "LLVMBinaryFormat" + "LLVMAsmParser" + "LLVMBitReader" + "LLVMLinker" + "LLVMCodeGen" + "LLVMScalarOpts" + "LLVMTransformUtils" + "LLVMAnalysis" + "LLVMTarget" + "LLVMObjCARCOpts" + "LLVMVectorize" + "LLVMInstrumentation" + "LLVMObject" + "LLVMMCParser" + "LLVMProfileData" + "LLVMMC" + "LLVMCore" + "LLVMSupport" + "LLVMDemangle" + ) + + if(LLVM_VERSION_MAJOR GREATER_EQUAL 8) + list(APPEND IGC_BUILD__LLVM_LIBS_TO_LINK + "LLVMInstCombine" + ) + endif() + + if(LLVM_VERSION_MAJOR GREATER_EQUAL 9) + list(APPEND IGC_BUILD__LLVM_LIBS_TO_LINK + "LLVMBitstreamReader" + ) + endif() + +endif() diff --git a/external/llvm/llvm_prebuilt.cmake b/external/llvm/llvm_prebuilt.cmake new file mode 100644 index 000000000..349073e07 --- /dev/null +++ b/external/llvm/llvm_prebuilt.cmake @@ -0,0 +1,44 @@ +#===================== begin_copyright_notice ================================== + +#Copyright (c) 2017 Intel Corporation + +#Permission is hereby granted, free of charge, to any person obtaining a +#copy of this software and associated documentation files (the +#"Software"), to deal in the Software without restriction, including +#without limitation the rights to use, copy, modify, merge, publish, +#distribute, sublicense, and/or sell copies of the Software, and to +#permit persons to whom the Software is furnished to do so, subject to +#the following conditions: + +#The above copyright notice and this permission notice shall be included +#in all copies or substantial portions of the Software. + +#THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS +#OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +#MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +#IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY +#CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +#TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE +#SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + +#======================= end_copyright_notice ================================== + +if(EXISTS ${IGC_OPTION__LLVM_PREBUILDS_DIR}/include) + list(APPEND CMAKE_MODULE_PATH "${IGC_OPTION__LLVM_PREBUILDS_DIR}/lib/cmake/llvm/") + + set(LLVM_DIR ${IGC_OPTION__LLVM_PREBUILDS_DIR}/lib/cmake/llvm) + include(${IGC_OPTION__LLVM_PREBUILDS_DIR}/lib/cmake/llvm/LLVMConfig.cmake) + include(${IGC_OPTION__LLVM_PREBUILDS_DIR}/lib/cmake/llvm/AddLLVM.cmake) + + find_package(LLVM REQUIRED CONFIG) + + set(LLVM_INCLUDE_DIRS "${IGC_OPTION__LLVM_PREBUILDS_DIR}/include") + + set(IGC_OPTION__LLVM_PREBUILDS True) + + + message(STATUS "[LLVM_PATCHER\\Prebuilt] : Found prebuilt of llvm in version ${PACKAGE_VERSION}") +endif() + + diff --git a/external/llvm/llvm_prebuilt_path.cmake b/external/llvm/llvm_prebuilt_path.cmake new file mode 100644 index 000000000..ac966a4d0 --- /dev/null +++ b/external/llvm/llvm_prebuilt_path.cmake @@ -0,0 +1,49 @@ +#===================== begin_copyright_notice ================================== + +#Copyright (c) 2017 Intel Corporation + +#Permission is hereby granted, free of charge, to any person obtaining a +#copy of this software and associated documentation files (the +#"Software"), to deal in the Software without restriction, including +#without limitation the rights to use, copy, modify, merge, publish, +#distribute, sublicense, and/or sell copies of the Software, and to +#permit persons to whom the Software is furnished to do so, subject to +#the following conditions: + +#The above copyright notice and this permission notice shall be included +#in all copies or substantial portions of the Software. + +#THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS +#OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +#MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +#IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY +#CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +#TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE +#SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + +#======================= end_copyright_notice ================================== + + +set(LLVM_BUILD_TYPE ${CMAKE_BUILD_TYPE}) + +if(DEFINED BUILD_TYPE) + if(${BUILD_TYPE} STREQUAL "release") + set(LLVM_BUILD_TYPE "Release") + else() + set(LLVM_BUILD_TYPE "Debug") + endif() +endif() + + +if(NOT DEFINED IGC_OPTION__LLVM_PREBUILDS_DIR) + if(NOT EXISTS ${IGC_OPTION__LLVM_PREBUILDS_DIR}) + set(IGC_OPTION__LLVM_PREBUILDS_DIR "${CMAKE_CURRENT_SOURCE_DIR}/../../prebuild-llvm/${LLVM_BUILD_TYPE}") + endif() +endif() + +### Look for config file +if(NOT EXISTS ${IGC_OPTION__LLVM_PREBUILDS_DIR}/lib/cmake/llvm/LLVMConfig.cmake) + ### Not found + set(IGC_OPTION__LLVM_PREBUILDS_DIR "empty") +endif() diff --git a/external/llvm/llvm_preferred_version.cmake b/external/llvm/llvm_preferred_version.cmake new file mode 100644 index 000000000..4659ad5af --- /dev/null +++ b/external/llvm/llvm_preferred_version.cmake @@ -0,0 +1,29 @@ +#===================== begin_copyright_notice ================================== + +#Copyright (c) 2017 Intel Corporation + +#Permission is hereby granted, free of charge, to any person obtaining a +#copy of this software and associated documentation files (the +#"Software"), to deal in the Software without restriction, including +#without limitation the rights to use, copy, modify, merge, publish, +#distribute, sublicense, and/or sell copies of the Software, and to +#permit persons to whom the Software is furnished to do so, subject to +#the following conditions: + +#The above copyright notice and this permission notice shall be included +#in all copies or substantial portions of the Software. + +#THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS +#OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +#MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +#IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY +#CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +#TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE +#SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + +#======================= end_copyright_notice ================================== + +if(NOT IGC_OPTION__LLVM_PREFERRED_VERSION) + set(IGC_OPTION__LLVM_PREFERRED_VERSION "10.0.0") +endif() diff --git a/external/llvm/llvm_source_path.cmake b/external/llvm/llvm_source_path.cmake new file mode 100644 index 000000000..1e998a406 --- /dev/null +++ b/external/llvm/llvm_source_path.cmake @@ -0,0 +1,43 @@ +#===================== begin_copyright_notice ================================== + +#Copyright (c) 2017 Intel Corporation + +#Permission is hereby granted, free of charge, to any person obtaining a +#copy of this software and associated documentation files (the +#"Software"), to deal in the Software without restriction, including +#without limitation the rights to use, copy, modify, merge, publish, +#distribute, sublicense, and/or sell copies of the Software, and to +#permit persons to whom the Software is furnished to do so, subject to +#the following conditions: + +#The above copyright notice and this permission notice shall be included +#in all copies or substantial portions of the Software. + +#THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS +#OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +#MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +#IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY +#CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +#TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE +#SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + +#======================= end_copyright_notice ================================== + +# Use LLVM sources stored at fixed location +if(NOT DEFINED BUILD_PACKAGE) + set(IGC_OPTION__LLVM_SOURCES_DIR ${CMAKE_CURRENT_SOURCE_DIR}/../../llvm-project) +endif() + +if(NOT EXISTS ${IGC_OPTION__LLVM_SOURCES_DIR}) + # Use LLVM sources stored at fixed location + set(IGC_OPTION__LLVM_SOURCES_DIR /opt/src/llvm-project_${IGC_OPTION__LLVM_PREFERRED_VERSION}) +endif() +if(NOT EXISTS ${IGC_OPTION__LLVM_SOURCES_DIR}) + # Use LLVM sources stored at fixed location + set(IGC_OPTION__LLVM_SOURCES_DIR ${CMAKE_CURRENT_SOURCE_DIR}/../../llvm-project_${IGC_OPTION__LLVM_PREFERRED_VERSION}) +endif() +if(NOT EXISTS ${IGC_OPTION__LLVM_SOURCES_DIR}) + # Use LLVM sources stored at fixed location + set(IGC_OPTION__LLVM_SOURCES_DIR ${CMAKE_CURRENT_SOURCE_DIR}/../../../../llvm-project_${IGC_OPTION__LLVM_PREFERRED_VERSION}) +endif() diff --git a/external/llvm/llvm_utils.cmake b/external/llvm/llvm_utils.cmake new file mode 100644 index 000000000..d1f1e9519 --- /dev/null +++ b/external/llvm/llvm_utils.cmake @@ -0,0 +1,180 @@ +#===================== begin_copyright_notice ================================== + +#Copyright (c) 2017 Intel Corporation + +#Permission is hereby granted, free of charge, to any person obtaining a +#copy of this software and associated documentation files (the +#"Software"), to deal in the Software without restriction, including +#without limitation the rights to use, copy, modify, merge, publish, +#distribute, sublicense, and/or sell copies of the Software, and to +#permit persons to whom the Software is furnished to do so, subject to +#the following conditions: + +#The above copyright notice and this permission notice shall be included +#in all copies or substantial portions of the Software. + +#THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS +#OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +#MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +#IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY +#CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +#TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE +#SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + +#======================= end_copyright_notice ================================== + + +# +# Macro to set python interpreter for LLVM +# +macro(llvm_utils_python_set) + # Unset directory scope PYTHON_EXECUTABLE variable + unset(PYTHON_EXECUTABLE) + # Check for cached PYTHON_EXECUTABLE variable + if(PYTHON_EXECUTABLE) + # If cached PYTHON_EXECUTABLE already exists save it to restore + set(PYTHON_EXECUTABLE_BACKUP ${PYTHON_EXECUTABLE}) + endif() + # Set python interpreter for LLVM + set(PYTHON_EXECUTABLE ${PYTHON} CACHE PATH "desc" FORCE) + message(STATUS "[LLVM] PYTHON_EXECUTABLE = ${PYTHON_EXECUTABLE}") +endmacro() + +# +# Macro to restore python interpreter +# +macro(llvm_utils_python_restore) + if(PYTHON_EXECUTABLE_BACKUP) + # Restore python interpreter + set(PYTHON_EXECUTABLE ${PYTHON_EXECUTABLE_BACKUP} CACHE PATH "desc" FORCE) + else() + # Clear python interpreter for LLVM + unset(PYTHON_EXECUTABLE CACHE) + endif() +endmacro() + +# +# Macro to clear and backup build flags already set +# +macro(llvm_utils_push_build_flags) + message(STATUS "[LLVM] Clearing build system compilation flags") + + set(CMAKE_C_FLAGS_BACKUP ${CMAKE_C_FLAGS}) + set(CMAKE_CXX_FLAGS_BACKUP ${CMAKE_CXX_FLAGS}) + set(CMAKE_SHARED_LINKER_FLAGS_BACKUP ${CMAKE_SHARED_LINKER_FLAGS}) + set(CMAKE_EXE_LINKER_FLAGS_BACKUP ${CMAKE_EXE_LINKER_FLAGS}) + set(CMAKE_STATIC_LINKER_FLAGS_BACKUP ${CMAKE_STATIC_LINKER_FLAGS}) + set(CMAKE_LOCAL_LINKER_FLAGS_BACKUP ${CMAKE_LOCAL_LINKER_FLAGS}) + set(CMAKE_MODULE_LINKER_FLAGS_BACKUP ${CMAKE_MODULE_LINKER_FLAGS}) + + if(PRINT_DEBUG) + message(STATUS "[LLVM] llvm_utils_push_build_flags() CMAKE_C_FLAGS = ${CMAKE_C_FLAGS}") + message(STATUS "[LLVM] llvm_utils_push_build_flags() CMAKE_CXX_FLAGS = ${CMAKE_CXX_FLAGS}") + message(STATUS "[LLVM] llvm_utils_push_build_flags() CMAKE_SHARED_LINKER_FLAGS = ${CMAKE_SHARED_LINKER_FLAGS}") + message(STATUS "[LLVM] llvm_utils_push_build_flags() CMAKE_EXE_LINKER_FLAGS = ${CMAKE_EXE_LINKER_FLAGS}") + message(STATUS "[LLVM] llvm_utils_push_build_flags() CMAKE_STATIC_LINKER_FLAGS = ${CMAKE_STATIC_LINKER_FLAGS}") + message(STATUS "[LLVM] llvm_utils_push_build_flags() CMAKE_LOCAL_LINKER_FLAGS = ${CMAKE_LOCAL_LINKER_FLAGS}") + message(STATUS "[LLVM] llvm_utils_push_build_flags() CMAKE_MODULE_LINKER_FLAGS = ${CMAKE_MODULE_LINKER_FLAGS}") + endif() + + unset(CMAKE_C_FLAGS) + unset(CMAKE_CXX_FLAGS) + unset(CMAKE_SHARED_LINKER_FLAGS) + unset(CMAKE_EXE_LINKER_FLAGS) + unset(CMAKE_STATIC_LINKER_FLAGS) + unset(CMAKE_LOCAL_LINKER_FLAGS) + unset(CMAKE_MODULE_LINKER_FLAGS) + + foreach(configuration_type ${CMAKE_CONFIGURATION_TYPES} ${CMAKE_BUILD_TYPE}) + string(TOUPPER ${configuration_type} capitalized_configuration_type) + + set(CMAKE_C_FLAGS_${capitalized_configuration_type}_BACKUP ${CMAKE_C_FLAGS_${capitalized_configuration_type}}) + set(CMAKE_CXX_FLAGS_${capitalized_configuration_type}_BACKUP ${CMAKE_CXX_FLAGS_${capitalized_configuration_type}}) + set(CMAKE_SHARED_LINKER_FLAGS_${capitalized_configuration_type}_BACKUP ${CMAKE_SHARED_LINKER_FLAGS_${capitalized_configuration_type}}) + set(CMAKE_EXE_LINKER_FLAGS_${capitalized_configuration_type}_BACKUP ${CMAKE_EXE_LINKER_FLAGS_${capitalized_configuration_type}}) + set(CMAKE_STATIC_LINKER_FLAGS_${capitalized_configuration_type}_BACKUP ${CMAKE_STATIC_LINKER_FLAGS_${capitalized_configuration_type}}) + set(CMAKE_MODULE_LINKER_FLAGS_${capitalized_configuration_type}_BACKUP ${CMAKE_MODULE_LINKER_FLAGS_${capitalized_configuration_type}}) + + if(PRINT_DEBUG) + message(STATUS "[LLVM] llvm_utils_push_build_flags() CMAKE_C_FLAGS_${capitalized_configuration_type} = ${CMAKE_C_FLAGS_${capitalized_configuration_type}}") + message(STATUS "[LLVM] llvm_utils_push_build_flags() CMAKE_CXX_FLAGS_${capitalized_configuration_type} = ${CMAKE_CXX_FLAGS_${capitalized_configuration_type}}") + message(STATUS "[LLVM] llvm_utils_push_build_flags() CMAKE_SHARED_LINKER_FLAGS_${capitalized_configuration_type} = ${CMAKE_SHARED_LINKER_FLAGS_${capitalized_configuration_type}}") + message(STATUS "[LLVM] llvm_utils_push_build_flags() CMAKE_EXE_LINKER_FLAGS_${capitalized_configuration_type} = ${CMAKE_EXE_LINKER_FLAGS_${capitalized_configuration_type}}") + message(STATUS "[LLVM] llvm_utils_push_build_flags() CMAKE_STATIC_LINKER_FLAGS_${capitalized_configuration_type} = ${CMAKE_STATIC_LINKER_FLAGS_${capitalized_configuration_type}}") + message(STATUS "[LLVM] llvm_utils_push_build_flags() CMAKE_MODULE_LINKER_FLAGS_${capitalized_configuration_type} = ${CMAKE_MODULE_LINKER_FLAGS_${capitalized_configuration_type}}") + endif() + + unset(CMAKE_C_FLAGS_${capitalized_configuration_type}) + unset(CMAKE_CXX_FLAGS_${capitalized_configuration_type}) + unset(CMAKE_SHARED_LINKER_FLAGS_${capitalized_configuration_type}) + unset(CMAKE_EXE_LINKER_FLAGS_${capitalized_configuration_type}) + unset(CMAKE_STATIC_LINKER_FLAGS_${capitalized_configuration_type}) + unset(CMAKE_MODULE_LINKER_FLAGS_${capitalized_configuration_type}) + + endforeach() + +endmacro() + +# +# Macro to restore build flags set previously +# +macro(llvm_utils_pop_build_flags) + message(STATUS "[LLVM] Restoring build system compilation flags") + + set(CMAKE_C_FLAGS ${CMAKE_C_FLAGS_BACKUP}) + set(CMAKE_CXX_FLAGS ${CMAKE_CXX_FLAGS_BACKUP}) + set(CMAKE_SHARED_LINKER_FLAGS ${CMAKE_SHARED_LINKER_FLAGS_BACKUP}) + set(CMAKE_EXE_LINKER_FLAGS ${CMAKE_EXE_LINKER_FLAGS_BACKUP}) + set(CMAKE_STATIC_LINKER_FLAGS ${CMAKE_STATIC_LINKER_FLAGS_BACKUP}) + set(CMAKE_LOCAL_LINKER_FLAGS ${CMAKE_LOCAL_LINKER_FLAGS_BACKUP}) + set(CMAKE_MODULE_LINKER_FLAGS ${CMAKE_MODULE_LINKER_FLAGS_BACKUP}) + + if(PRINT_DEBUG) + message(STATUS "[LLVM] llvm_utils_pop_build_flags() CMAKE_C_FLAGS = ${CMAKE_C_FLAGS}") + message(STATUS "[LLVM] llvm_utils_pop_build_flags() CMAKE_CXX_FLAGS = ${CMAKE_CXX_FLAGS}") + message(STATUS "[LLVM] llvm_utils_pop_build_flags() CMAKE_SHARED_LINKER_FLAGS = ${CMAKE_SHARED_LINKER_FLAGS}") + message(STATUS "[LLVM] llvm_utils_pop_build_flags() CMAKE_EXE_LINKER_FLAGS = ${CMAKE_EXE_LINKER_FLAGS}") + message(STATUS "[LLVM] llvm_utils_pop_build_flags() CMAKE_STATIC_LINKER_FLAGS = ${CMAKE_STATIC_LINKER_FLAGS}") + message(STATUS "[LLVM] llvm_utils_pop_build_flags() CMAKE_LOCAL_LINKER_FLAGS = ${CMAKE_LOCAL_LINKER_FLAGS}") + message(STATUS "[LLVM] llvm_utils_pop_build_flags() CMAKE_MODULE_LINKER_FLAGS = ${CMAKE_MODULE_LINKER_FLAGS}") + endif() + + unset(CMAKE_C_FLAGS_BACKUP) + unset(CMAKE_CXX_FLAGS_BACKUP) + unset(CMAKE_SHARED_LINKER_FLAGS_BACKUP) + unset(CMAKE_EXE_LINKER_FLAGS_BACKUP) + unset(CMAKE_STATIC_LINKER_FLAGS_BACKUP) + unset(CMAKE_LOCAL_LINKER_FLAGS_BACKUP) + unset(CMAKE_MODULE_LINKER_FLAGS_BACKUP) + + foreach(configuration_type ${CMAKE_CONFIGURATION_TYPES} ${CMAKE_BUILD_TYPE}) + string(TOUPPER ${configuration_type} capitalized_configuration_type) + + set(CMAKE_C_FLAGS_${capitalized_configuration_type} ${CMAKE_C_FLAGS_${capitalized_configuration_type}_BACKUP}) + set(CMAKE_CXX_FLAGS_${capitalized_configuration_type} ${CMAKE_CXX_FLAGS_${capitalized_configuration_type}_BACKUP}) + set(CMAKE_SHARED_LINKER_FLAGS_${capitalized_configuration_type} ${CMAKE_SHARED_LINKER_FLAGS_${capitalized_configuration_type}_BACKUP}) + set(CMAKE_EXE_LINKER_FLAGS_${capitalized_configuration_type} ${CMAKE_EXE_LINKER_FLAGS_${capitalized_configuration_type}_BACKUP}) + set(CMAKE_STATIC_LINKER_FLAGS_${capitalized_configuration_type} ${CMAKE_STATIC_LINKER_FLAGS_${capitalized_configuration_type}_BACKUP}) + set(CMAKE_MODULE_LINKER_FLAGS_${capitalized_configuration_type} ${CMAKE_MODULE_LINKER_FLAGS_${capitalized_configuration_type}_BACKUP}) + + if(PRINT_DEBUG) + message(STATUS "[LLVM] llvm_utils_pop_build_flags() CMAKE_C_FLAGS_${capitalized_configuration_type} = ${CMAKE_C_FLAGS_${capitalized_configuration_type}}") + message(STATUS "[LLVM] llvm_utils_pop_build_flags() CMAKE_CXX_FLAGS_${capitalized_configuration_type} = ${CMAKE_CXX_FLAGS_${capitalized_configuration_type}}") + message(STATUS "[LLVM] llvm_utils_pop_build_flags() CMAKE_SHARED_LINKER_FLAGS_${capitalized_configuration_type} = ${CMAKE_SHARED_LINKER_FLAGS_${capitalized_configuration_type}}") + message(STATUS "[LLVM] llvm_utils_pop_build_flags() CMAKE_EXE_LINKER_FLAGS_${capitalized_configuration_type} = ${CMAKE_EXE_LINKER_FLAGS_${capitalized_configuration_type}}") + message(STATUS "[LLVM] llvm_utils_pop_build_flags() CMAKE_STATIC_LINKER_FLAGS_${capitalized_configuration_type} = ${CMAKE_STATIC_LINKER_FLAGS_${capitalized_configuration_type}}") + message(STATUS "[LLVM] llvm_utils_pop_build_flags() CMAKE_MODULE_LINKER_FLAGS_${capitalized_configuration_type} = ${CMAKE_MODULE_LINKER_FLAGS_${capitalized_configuration_type}}") + endif() + + unset(CMAKE_C_FLAGS_${capitalized_configuration_type}_BACKUP) + unset(CMAKE_CXX_FLAGS_${capitalized_configuration_type}_BACKUP) + unset(CMAKE_SHARED_LINKER_FLAGS_${capitalized_configuration_type}_BACKUP) + unset(CMAKE_EXE_LINKER_FLAGS_${capitalized_configuration_type}_BACKUP) + unset(CMAKE_STATIC_LINKER_FLAGS_${capitalized_configuration_type}_BACKUP) + unset(CMAKE_MODULE_LINKER_FLAGS_${capitalized_configuration_type}_BACKUP) + + endforeach() + +endmacro() + diff --git a/external/llvm/releases/10.0.0/patches_external/BasicBlockUtils-Add-metadata-fixing-in-SplitBlockPre.patch b/external/llvm/releases/10.0.0/patches_external/BasicBlockUtils-Add-metadata-fixing-in-SplitBlockPre.patch new file mode 100644 index 000000000..15c3b57cf --- /dev/null +++ b/external/llvm/releases/10.0.0/patches_external/BasicBlockUtils-Add-metadata-fixing-in-SplitBlockPre.patch @@ -0,0 +1,105 @@ +From eeb816d95f0910bd246e37bb2bb3923acf0edf6b Mon Sep 17 00:00:00 2001 +From: Aleksander Us +Date: Mon, 26 Aug 2019 15:47:41 +0300 +Subject: [PATCH] [BasicBlockUtils] Add metadata fixing in + SplitBlockPredecessors. + +In case when BB is header of some loop and predecessor is latch of +this loop, metadata was not attached to newly created basic block. +This led to loss of loop metadata for other passes. +--- + lib/Transforms/Utils/BasicBlockUtils.cpp | 23 ++++++++---- + test/Transforms/LoopSimplify/loop_metadata.ll | 36 +++++++++++++++++++ + 2 files changed, 52 insertions(+), 7 deletions(-) + create mode 100644 test/Transforms/LoopSimplify/loop_metadata.ll + +diff --git a/lib/Transforms/Utils/BasicBlockUtils.cpp b/lib/Transforms/Utils/BasicBlockUtils.cpp +index 5fa371377c8..3a90ae061fb 100644 +--- a/lib/Transforms/Utils/BasicBlockUtils.cpp ++++ b/lib/Transforms/Utils/BasicBlockUtils.cpp +@@ -579,24 +579,33 @@ BasicBlock *llvm::SplitBlockPredecessors(BasicBlock *BB, + + // The new block unconditionally branches to the old block. + BranchInst *BI = BranchInst::Create(BB, NewBB); ++ bool IsBBHeader = LI && LI->isLoopHeader(BB); ++ Loop *BBLoop = LI ? LI->getLoopFor(BB) : nullptr; + // Splitting the predecessors of a loop header creates a preheader block. +- if (LI && LI->isLoopHeader(BB)) ++ if (IsBBHeader) + // Using the loop start line number prevents debuggers stepping into the + // loop body for this instruction. +- BI->setDebugLoc(LI->getLoopFor(BB)->getStartLoc()); ++ BI->setDebugLoc(BBLoop->getStartLoc()); + else + BI->setDebugLoc(BB->getFirstNonPHIOrDbg()->getDebugLoc()); + + // Move the edges from Preds to point to NewBB instead of BB. +- for (unsigned i = 0, e = Preds.size(); i != e; ++i) { ++ for (BasicBlock *Pred : Preds) { ++ Instruction *PI = Pred->getTerminator(); + // This is slightly more strict than necessary; the minimum requirement + // is that there be no more than one indirectbr branching to BB. And + // all BlockAddress uses would need to be updated. +- assert(!isa(Preds[i]->getTerminator()) && ++ assert(!isa(PI) && + "Cannot split an edge from an IndirectBrInst"); +- assert(!isa(Preds[i]->getTerminator()) && +- "Cannot split an edge from a CallBrInst"); +- Preds[i]->getTerminator()->replaceUsesOfWith(BB, NewBB); ++ assert(!isa(PI) && "Cannot split an edge from a CallBrInst"); ++ if (IsBBHeader && BBLoop->contains(Pred) && BBLoop->isLoopLatch(Pred)) { ++ // Update loop metadata if it exists. ++ if (MDNode *LoopMD = PI->getMetadata(LLVMContext::MD_loop)) { ++ BI->setMetadata(LLVMContext::MD_loop, LoopMD); ++ PI->setMetadata(LLVMContext::MD_loop, nullptr); ++ } ++ } ++ PI->replaceUsesOfWith(BB, NewBB); + } + + // Insert a new PHI node into NewBB for every PHI node in BB and that new PHI +diff --git a/test/Transforms/LoopSimplify/loop_metadata.ll b/test/Transforms/LoopSimplify/loop_metadata.ll +new file mode 100644 +index 00000000000..c15c92fe3ae +--- /dev/null ++++ b/test/Transforms/LoopSimplify/loop_metadata.ll +@@ -0,0 +1,36 @@ ++; RUN: opt -S -loop-simplify < %s | FileCheck %s ++ ++; CHECK: for.cond.loopexit: ++; CHECK: br label %for.cond, !llvm.loop !0 ++; CHECK: br i1 %cmp1, label %for.body1, label %for.cond.loopexit ++ ++define void @foo() { ++entry: ++ br label %for.cond ++ ++for.cond: ; preds = %for.cond1, %entry ++ %j = phi i32 [ 0, %entry ], [ %add, %for.cond1 ] ++ %cmp = icmp ult i32 %j, 8 ++ br i1 %cmp, label %for.body, label %for.end ++ ++for.body: ; preds = %for.cond ++ %dummy1 = add i32 1, 1 ++ %add = add nuw nsw i32 %j, 1 ++ br label %for.cond1 ++ ++for.cond1: ; preds = %for.body1, %for.body ++ %i.0 = phi i32 [ 1, %for.body ], [ %inc, %for.body1 ] ++ %cmp1 = icmp ult i32 %i.0, 8 ++ br i1 %cmp1, label %for.body1, label %for.cond, !llvm.loop !0 ++ ++for.body1: ; preds = %for.cond1 ++ %dummy2 = add i32 1, 1 ++ %inc = add nuw nsw i32 %i.0, 1 ++ br label %for.cond1 ++ ++for.end: ; preds = %for.cond ++ ret void ++} ++ ++!0 = distinct !{!0, !1} ++!1 = !{!"llvm.loop.unroll.full"} +-- +2.18.0 + diff --git a/external/llvm/releases/10.0.0/patches_external/IndVarSimplify-Do-not-use-SCEV-expander-for-IVCount-.patch b/external/llvm/releases/10.0.0/patches_external/IndVarSimplify-Do-not-use-SCEV-expander-for-IVCount-.patch new file mode 100644 index 000000000..398c3a6ee --- /dev/null +++ b/external/llvm/releases/10.0.0/patches_external/IndVarSimplify-Do-not-use-SCEV-expander-for-IVCount-.patch @@ -0,0 +1,140 @@ +From 35e218a886f4c066eabd18685240d55270bd5a6d Mon Sep 17 00:00:00 2001 +From: Aleksander Us +Date: Mon, 26 Aug 2019 15:45:47 +0300 +Subject: [PATCH] [IndVarSimplify] Do not use SCEV expander for IVCount in + LFTR when possible. + +SCEV analysis cannot properly cache instruction with poison flags +(for example, add nsw outside of loop will not be reused by expander). +This can lead to generating of additional instructions by SCEV expander. + +Example IR: + + ... + %maxval = add nuw nsw i32 %a1, %a2 + ... +for.body: + ... + %cmp22 = icmp ult i32 %ivadd, %maxval + br i1 %cmp22, label %for.body, label %for.end + ... + +SCEV expander will generate copy of %maxval in preheader but without +nuw/nsw flags. This can be avoided by explicit check that iv count +value gives the same SCEV expressions as calculated by LFTR. +--- + lib/Transforms/Scalar/IndVarSimplify.cpp | 12 +++++++++- + test/Transforms/IndVarSimplify/add_nsw.ll | 23 ++++++++++++++++++++ + test/Transforms/IndVarSimplify/lftr-reuse.ll | 9 +++----- + test/Transforms/IndVarSimplify/udiv.ll | 1 + + 4 files changed, 38 insertions(+), 7 deletions(-) + create mode 100644 test/Transforms/IndVarSimplify/add_nsw.ll + +diff --git a/lib/Transforms/Scalar/IndVarSimplify.cpp b/lib/Transforms/Scalar/IndVarSimplify.cpp +index f9fc698a4a9..5e04dac8aa6 100644 +--- a/lib/Transforms/Scalar/IndVarSimplify.cpp ++++ b/lib/Transforms/Scalar/IndVarSimplify.cpp +@@ -2375,6 +2375,17 @@ static Value *genLoopLimit(PHINode *IndVar, BasicBlock *ExitingBB, + if (UsePostInc) + IVLimit = SE->getAddExpr(IVLimit, SE->getOne(IVLimit->getType())); + ++ // If computed limit is equal to old limit then do not use SCEV expander ++ // because it can lost NUW/NSW flags and create extra instructions. ++ BranchInst *BI = cast(ExitingBB->getTerminator()); ++ if (ICmpInst *Cmp = dyn_cast(BI->getOperand(0))) { ++ Value *Limit = Cmp->getOperand(0); ++ if (!L->isLoopInvariant(Limit)) ++ Limit = Cmp->getOperand(1); ++ if (SE->getSCEV(Limit) == IVLimit) ++ return Limit; ++ } ++ + // Expand the code for the iteration count. + assert(SE->isLoopInvariant(IVLimit, L) && + "Computed iteration count is not loop invariant!"); +@@ -2383,7 +2394,6 @@ static Value *genLoopLimit(PHINode *IndVar, BasicBlock *ExitingBB, + // SCEV expression (IVInit) for a pointer type IV value (IndVar). + Type *LimitTy = ExitCount->getType()->isPointerTy() ? + IndVar->getType() : ExitCount->getType(); +- BranchInst *BI = cast(ExitingBB->getTerminator()); + return Rewriter.expandCodeFor(IVLimit, LimitTy, BI); + } + } +diff --git a/test/Transforms/IndVarSimplify/add_nsw.ll b/test/Transforms/IndVarSimplify/add_nsw.ll +new file mode 100644 +index 00000000000..abd1cbb6c51 +--- /dev/null ++++ b/test/Transforms/IndVarSimplify/add_nsw.ll +@@ -0,0 +1,23 @@ ++; RUN: opt -indvars -S %s | FileCheck %s ++ ++target datalayout = "e-p:32:32-i64:64-n8:16:32" ++ ++; CHECK: for.body.preheader: ++; CHECK-NOT: add ++; CHECK: for.body: ++ ++define void @foo(i32 %a1, i32 %a2) { ++entry: ++ %maxval = add nuw nsw i32 %a1, %a2 ++ %cmp = icmp slt i32 %maxval, 1 ++ br i1 %cmp, label %for.end, label %for.body ++ ++for.body: ; preds = %entry, %for.body ++ %j.02 = phi i32 [ 0, %entry ], [ %add31, %for.body ] ++ %add31 = add nuw nsw i32 %j.02, 1 ++ %cmp22 = icmp slt i32 %add31, %maxval ++ br i1 %cmp22, label %for.body, label %for.end ++ ++for.end: ; preds = %for.body ++ ret void ++} +diff --git a/test/Transforms/IndVarSimplify/lftr-reuse.ll b/test/Transforms/IndVarSimplify/lftr-reuse.ll +index 14ae9738696..509d662b767 100644 +--- a/test/Transforms/IndVarSimplify/lftr-reuse.ll ++++ b/test/Transforms/IndVarSimplify/lftr-reuse.ll +@@ -67,11 +67,9 @@ define void @expandOuterRecurrence(i32 %arg) nounwind { + ; CHECK-NEXT: [[CMP1:%.*]] = icmp slt i32 0, [[SUB1]] + ; CHECK-NEXT: br i1 [[CMP1]], label [[OUTER_PREHEADER:%.*]], label [[EXIT:%.*]] + ; CHECK: outer.preheader: +-; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[ARG]], -1 + ; CHECK-NEXT: br label [[OUTER:%.*]] + ; CHECK: outer: +-; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i32 [ [[TMP0]], [[OUTER_PREHEADER]] ], [ [[INDVARS_IV_NEXT:%.*]], [[OUTER_INC:%.*]] ] +-; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[I_INC:%.*]], [[OUTER_INC]] ], [ 0, [[OUTER_PREHEADER]] ] ++; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[I_INC:%.*]], [[OUTER_INC:%.*]] ], [ 0, [[OUTER_PREHEADER]] ] + ; CHECK-NEXT: [[SUB2:%.*]] = sub nsw i32 [[ARG]], [[I]] + ; CHECK-NEXT: [[SUB3:%.*]] = sub nsw i32 [[SUB2]], 1 + ; CHECK-NEXT: [[CMP2:%.*]] = icmp slt i32 0, [[SUB3]] +@@ -81,14 +79,13 @@ define void @expandOuterRecurrence(i32 %arg) nounwind { + ; CHECK: inner: + ; CHECK-NEXT: [[J:%.*]] = phi i32 [ 0, [[INNER_PH]] ], [ [[J_INC:%.*]], [[INNER]] ] + ; CHECK-NEXT: [[J_INC]] = add nuw nsw i32 [[J]], 1 +-; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i32 [[J_INC]], [[INDVARS_IV]] ++; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i32 [[J_INC]], [[SUB3]] + ; CHECK-NEXT: br i1 [[EXITCOND]], label [[INNER]], label [[OUTER_INC_LOOPEXIT:%.*]] + ; CHECK: outer.inc.loopexit: + ; CHECK-NEXT: br label [[OUTER_INC]] + ; CHECK: outer.inc: + ; CHECK-NEXT: [[I_INC]] = add nuw nsw i32 [[I]], 1 +-; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add i32 [[INDVARS_IV]], -1 +-; CHECK-NEXT: [[EXITCOND1:%.*]] = icmp ne i32 [[I_INC]], [[TMP0]] ++; CHECK-NEXT: [[EXITCOND1:%.*]] = icmp ne i32 [[I_INC]], [[SUB1]] + ; CHECK-NEXT: br i1 [[EXITCOND1]], label [[OUTER]], label [[EXIT_LOOPEXIT:%.*]] + ; CHECK: exit.loopexit: + ; CHECK-NEXT: br label [[EXIT]] +diff --git a/test/Transforms/IndVarSimplify/udiv.ll b/test/Transforms/IndVarSimplify/udiv.ll +index b3f2c2a6a66..3530343ef4a 100644 +--- a/test/Transforms/IndVarSimplify/udiv.ll ++++ b/test/Transforms/IndVarSimplify/udiv.ll +@@ -133,6 +133,7 @@ declare i32 @printf(i8* nocapture, ...) nounwind + ; CHECK-LABEL: @foo( + ; CHECK: for.body.preheader: + ; CHECK-NOT: udiv ++; CHECK: for.body: + + define void @foo(double* %p, i64 %n) nounwind { + entry: +-- +2.18.0 + diff --git a/external/llvm/releases/10.0.0/patches_external/InstCombine-visitBitCast-do-not-crash-on-weird-bitca.patch b/external/llvm/releases/10.0.0/patches_external/InstCombine-visitBitCast-do-not-crash-on-weird-bitca.patch new file mode 100644 index 000000000..34bff3ccf --- /dev/null +++ b/external/llvm/releases/10.0.0/patches_external/InstCombine-visitBitCast-do-not-crash-on-weird-bitca.patch @@ -0,0 +1,46 @@ +From 381054a989ebd0b585fee46f2a01a7c5de10acf7 Mon Sep 17 00:00:00 2001 +From: Roman Lebedev +Date: Wed, 24 Jun 2020 21:12:09 +0300 +Subject: [PATCH] [InstCombine] visitBitCast(): do not crash on weird `bitcast + <1 x i8*> to i8*` + +Even if we know that RHS of a bitcast is a pointer, +we can't assume LHS is, because it might be +a single-element vector of pointer. +--- + lib/Transforms/InstCombine/InstCombineCasts.cpp | 3 ++- + test/Transforms/InstCombine/bitcast.ll | 6 ++++++ + 2 files changed, 8 insertions(+), 1 deletion(-) + +diff --git a/lib/Transforms/InstCombine/InstCombineCasts.cpp b/lib/Transforms/InstCombine/InstCombineCasts.cpp +index 3750f31e3cf..a8c87ea3558 100644 +--- a/lib/Transforms/InstCombine/InstCombineCasts.cpp ++++ b/lib/Transforms/InstCombine/InstCombineCasts.cpp +@@ -2471,8 +2471,9 @@ Instruction *InstCombiner::visitBitCast(BitCastInst &CI) { + if (DestTy == Src->getType()) + return replaceInstUsesWith(CI, Src); + +- if (PointerType *DstPTy = dyn_cast(DestTy)) { ++ if (isa(SrcTy) && isa(DestTy)) { + PointerType *SrcPTy = cast(SrcTy); ++ PointerType *DstPTy = cast(DestTy); + Type *DstElTy = DstPTy->getElementType(); + Type *SrcElTy = SrcPTy->getElementType(); + +diff --git a/test/Transforms/InstCombine/bitcast.ll b/test/Transforms/InstCombine/bitcast.ll +index 0f0cbdb364a..c4ee52f27a8 100644 +--- a/test/Transforms/InstCombine/bitcast.ll ++++ b/test/Transforms/InstCombine/bitcast.ll +@@ -561,3 +561,9 @@ define void @constant_fold_vector_to_half() { + store volatile half bitcast (<4 x i4> to half), half* undef + ret void + } ++ ++; Ensure that we do not crash when looking at such a weird bitcast. ++define i8* @bitcast_from_single_element_pointer_vector_to_pointer(<1 x i8*> %ptrvec) { ++ %ptr = bitcast <1 x i8*> %ptrvec to i8* ++ ret i8* %ptr ++} +-- +2.17.1 + diff --git a/external/llvm/releases/10.0.0/patches_external/fix_for_llvm-as_buildbreak.patch b/external/llvm/releases/10.0.0/patches_external/fix_for_llvm-as_buildbreak.patch new file mode 100644 index 000000000..09f9ff825 --- /dev/null +++ b/external/llvm/releases/10.0.0/patches_external/fix_for_llvm-as_buildbreak.patch @@ -0,0 +1,11 @@ +# Description : Fix build break for building llvm-as + +--- a/tools/llvm-as/CMakeLists.txt ++++ b/tools/llvm-as/CMakeLists.txt +@@ -2,6 +2,7 @@ set(LLVM_LINK_COMPONENTS + AsmParser + BitWriter + Core ++ Demangle + Support + ) diff --git a/external/llvm/releases/10.0.0/patches_external/fix_for_llvm-link_buildbreak.patch b/external/llvm/releases/10.0.0/patches_external/fix_for_llvm-link_buildbreak.patch new file mode 100644 index 000000000..ef298ad57 --- /dev/null +++ b/external/llvm/releases/10.0.0/patches_external/fix_for_llvm-link_buildbreak.patch @@ -0,0 +1,12 @@ +# Description : Fix build break for building llvm-link + +--- a/tools/llvm-link/CMakeLists.txt ++++ b/tools/llvm-link/CMakeLists.txt +@@ -8,6 +8,7 @@ set(LLVM_LINK_COMPONENTS + Support + TransformUtils + IPO ++ Demangle + ) + + add_llvm_tool(llvm-link diff --git a/external/llvm/releases/10.0.0/patches_external/fix_for_opt_buildbreak.patch b/external/llvm/releases/10.0.0/patches_external/fix_for_opt_buildbreak.patch new file mode 100644 index 000000000..e0174084a --- /dev/null +++ b/external/llvm/releases/10.0.0/patches_external/fix_for_opt_buildbreak.patch @@ -0,0 +1,12 @@ +# Description : Fix build break for building opt + +--- a/tools/opt/CMakeLists.txt ++++ b/tools/opt/CMakeLists.txt +@@ -9,6 +9,7 @@ set(LLVM_LINK_COMPONENTS + CodeGen + Core + Coroutines ++ Demangle + IPO + IRReader + InstCombine diff --git a/external/llvm/releases/11.0.0/patches_external/BasicBlockUtils-Add-metadata-fixing-in-SplitBlockPre.patch b/external/llvm/releases/11.0.0/patches_external/BasicBlockUtils-Add-metadata-fixing-in-SplitBlockPre.patch new file mode 100644 index 000000000..15c3b57cf --- /dev/null +++ b/external/llvm/releases/11.0.0/patches_external/BasicBlockUtils-Add-metadata-fixing-in-SplitBlockPre.patch @@ -0,0 +1,105 @@ +From eeb816d95f0910bd246e37bb2bb3923acf0edf6b Mon Sep 17 00:00:00 2001 +From: Aleksander Us +Date: Mon, 26 Aug 2019 15:47:41 +0300 +Subject: [PATCH] [BasicBlockUtils] Add metadata fixing in + SplitBlockPredecessors. + +In case when BB is header of some loop and predecessor is latch of +this loop, metadata was not attached to newly created basic block. +This led to loss of loop metadata for other passes. +--- + lib/Transforms/Utils/BasicBlockUtils.cpp | 23 ++++++++---- + test/Transforms/LoopSimplify/loop_metadata.ll | 36 +++++++++++++++++++ + 2 files changed, 52 insertions(+), 7 deletions(-) + create mode 100644 test/Transforms/LoopSimplify/loop_metadata.ll + +diff --git a/lib/Transforms/Utils/BasicBlockUtils.cpp b/lib/Transforms/Utils/BasicBlockUtils.cpp +index 5fa371377c8..3a90ae061fb 100644 +--- a/lib/Transforms/Utils/BasicBlockUtils.cpp ++++ b/lib/Transforms/Utils/BasicBlockUtils.cpp +@@ -579,24 +579,33 @@ BasicBlock *llvm::SplitBlockPredecessors(BasicBlock *BB, + + // The new block unconditionally branches to the old block. + BranchInst *BI = BranchInst::Create(BB, NewBB); ++ bool IsBBHeader = LI && LI->isLoopHeader(BB); ++ Loop *BBLoop = LI ? LI->getLoopFor(BB) : nullptr; + // Splitting the predecessors of a loop header creates a preheader block. +- if (LI && LI->isLoopHeader(BB)) ++ if (IsBBHeader) + // Using the loop start line number prevents debuggers stepping into the + // loop body for this instruction. +- BI->setDebugLoc(LI->getLoopFor(BB)->getStartLoc()); ++ BI->setDebugLoc(BBLoop->getStartLoc()); + else + BI->setDebugLoc(BB->getFirstNonPHIOrDbg()->getDebugLoc()); + + // Move the edges from Preds to point to NewBB instead of BB. +- for (unsigned i = 0, e = Preds.size(); i != e; ++i) { ++ for (BasicBlock *Pred : Preds) { ++ Instruction *PI = Pred->getTerminator(); + // This is slightly more strict than necessary; the minimum requirement + // is that there be no more than one indirectbr branching to BB. And + // all BlockAddress uses would need to be updated. +- assert(!isa(Preds[i]->getTerminator()) && ++ assert(!isa(PI) && + "Cannot split an edge from an IndirectBrInst"); +- assert(!isa(Preds[i]->getTerminator()) && +- "Cannot split an edge from a CallBrInst"); +- Preds[i]->getTerminator()->replaceUsesOfWith(BB, NewBB); ++ assert(!isa(PI) && "Cannot split an edge from a CallBrInst"); ++ if (IsBBHeader && BBLoop->contains(Pred) && BBLoop->isLoopLatch(Pred)) { ++ // Update loop metadata if it exists. ++ if (MDNode *LoopMD = PI->getMetadata(LLVMContext::MD_loop)) { ++ BI->setMetadata(LLVMContext::MD_loop, LoopMD); ++ PI->setMetadata(LLVMContext::MD_loop, nullptr); ++ } ++ } ++ PI->replaceUsesOfWith(BB, NewBB); + } + + // Insert a new PHI node into NewBB for every PHI node in BB and that new PHI +diff --git a/test/Transforms/LoopSimplify/loop_metadata.ll b/test/Transforms/LoopSimplify/loop_metadata.ll +new file mode 100644 +index 00000000000..c15c92fe3ae +--- /dev/null ++++ b/test/Transforms/LoopSimplify/loop_metadata.ll +@@ -0,0 +1,36 @@ ++; RUN: opt -S -loop-simplify < %s | FileCheck %s ++ ++; CHECK: for.cond.loopexit: ++; CHECK: br label %for.cond, !llvm.loop !0 ++; CHECK: br i1 %cmp1, label %for.body1, label %for.cond.loopexit ++ ++define void @foo() { ++entry: ++ br label %for.cond ++ ++for.cond: ; preds = %for.cond1, %entry ++ %j = phi i32 [ 0, %entry ], [ %add, %for.cond1 ] ++ %cmp = icmp ult i32 %j, 8 ++ br i1 %cmp, label %for.body, label %for.end ++ ++for.body: ; preds = %for.cond ++ %dummy1 = add i32 1, 1 ++ %add = add nuw nsw i32 %j, 1 ++ br label %for.cond1 ++ ++for.cond1: ; preds = %for.body1, %for.body ++ %i.0 = phi i32 [ 1, %for.body ], [ %inc, %for.body1 ] ++ %cmp1 = icmp ult i32 %i.0, 8 ++ br i1 %cmp1, label %for.body1, label %for.cond, !llvm.loop !0 ++ ++for.body1: ; preds = %for.cond1 ++ %dummy2 = add i32 1, 1 ++ %inc = add nuw nsw i32 %i.0, 1 ++ br label %for.cond1 ++ ++for.end: ; preds = %for.cond ++ ret void ++} ++ ++!0 = distinct !{!0, !1} ++!1 = !{!"llvm.loop.unroll.full"} +-- +2.18.0 + diff --git a/external/llvm/releases/11.0.0/patches_external/IndVarSimplify-Do-not-use-SCEV-expander-for-IVCount-.patch b/external/llvm/releases/11.0.0/patches_external/IndVarSimplify-Do-not-use-SCEV-expander-for-IVCount-.patch new file mode 100644 index 000000000..398c3a6ee --- /dev/null +++ b/external/llvm/releases/11.0.0/patches_external/IndVarSimplify-Do-not-use-SCEV-expander-for-IVCount-.patch @@ -0,0 +1,140 @@ +From 35e218a886f4c066eabd18685240d55270bd5a6d Mon Sep 17 00:00:00 2001 +From: Aleksander Us +Date: Mon, 26 Aug 2019 15:45:47 +0300 +Subject: [PATCH] [IndVarSimplify] Do not use SCEV expander for IVCount in + LFTR when possible. + +SCEV analysis cannot properly cache instruction with poison flags +(for example, add nsw outside of loop will not be reused by expander). +This can lead to generating of additional instructions by SCEV expander. + +Example IR: + + ... + %maxval = add nuw nsw i32 %a1, %a2 + ... +for.body: + ... + %cmp22 = icmp ult i32 %ivadd, %maxval + br i1 %cmp22, label %for.body, label %for.end + ... + +SCEV expander will generate copy of %maxval in preheader but without +nuw/nsw flags. This can be avoided by explicit check that iv count +value gives the same SCEV expressions as calculated by LFTR. +--- + lib/Transforms/Scalar/IndVarSimplify.cpp | 12 +++++++++- + test/Transforms/IndVarSimplify/add_nsw.ll | 23 ++++++++++++++++++++ + test/Transforms/IndVarSimplify/lftr-reuse.ll | 9 +++----- + test/Transforms/IndVarSimplify/udiv.ll | 1 + + 4 files changed, 38 insertions(+), 7 deletions(-) + create mode 100644 test/Transforms/IndVarSimplify/add_nsw.ll + +diff --git a/lib/Transforms/Scalar/IndVarSimplify.cpp b/lib/Transforms/Scalar/IndVarSimplify.cpp +index f9fc698a4a9..5e04dac8aa6 100644 +--- a/lib/Transforms/Scalar/IndVarSimplify.cpp ++++ b/lib/Transforms/Scalar/IndVarSimplify.cpp +@@ -2375,6 +2375,17 @@ static Value *genLoopLimit(PHINode *IndVar, BasicBlock *ExitingBB, + if (UsePostInc) + IVLimit = SE->getAddExpr(IVLimit, SE->getOne(IVLimit->getType())); + ++ // If computed limit is equal to old limit then do not use SCEV expander ++ // because it can lost NUW/NSW flags and create extra instructions. ++ BranchInst *BI = cast(ExitingBB->getTerminator()); ++ if (ICmpInst *Cmp = dyn_cast(BI->getOperand(0))) { ++ Value *Limit = Cmp->getOperand(0); ++ if (!L->isLoopInvariant(Limit)) ++ Limit = Cmp->getOperand(1); ++ if (SE->getSCEV(Limit) == IVLimit) ++ return Limit; ++ } ++ + // Expand the code for the iteration count. + assert(SE->isLoopInvariant(IVLimit, L) && + "Computed iteration count is not loop invariant!"); +@@ -2383,7 +2394,6 @@ static Value *genLoopLimit(PHINode *IndVar, BasicBlock *ExitingBB, + // SCEV expression (IVInit) for a pointer type IV value (IndVar). + Type *LimitTy = ExitCount->getType()->isPointerTy() ? + IndVar->getType() : ExitCount->getType(); +- BranchInst *BI = cast(ExitingBB->getTerminator()); + return Rewriter.expandCodeFor(IVLimit, LimitTy, BI); + } + } +diff --git a/test/Transforms/IndVarSimplify/add_nsw.ll b/test/Transforms/IndVarSimplify/add_nsw.ll +new file mode 100644 +index 00000000000..abd1cbb6c51 +--- /dev/null ++++ b/test/Transforms/IndVarSimplify/add_nsw.ll +@@ -0,0 +1,23 @@ ++; RUN: opt -indvars -S %s | FileCheck %s ++ ++target datalayout = "e-p:32:32-i64:64-n8:16:32" ++ ++; CHECK: for.body.preheader: ++; CHECK-NOT: add ++; CHECK: for.body: ++ ++define void @foo(i32 %a1, i32 %a2) { ++entry: ++ %maxval = add nuw nsw i32 %a1, %a2 ++ %cmp = icmp slt i32 %maxval, 1 ++ br i1 %cmp, label %for.end, label %for.body ++ ++for.body: ; preds = %entry, %for.body ++ %j.02 = phi i32 [ 0, %entry ], [ %add31, %for.body ] ++ %add31 = add nuw nsw i32 %j.02, 1 ++ %cmp22 = icmp slt i32 %add31, %maxval ++ br i1 %cmp22, label %for.body, label %for.end ++ ++for.end: ; preds = %for.body ++ ret void ++} +diff --git a/test/Transforms/IndVarSimplify/lftr-reuse.ll b/test/Transforms/IndVarSimplify/lftr-reuse.ll +index 14ae9738696..509d662b767 100644 +--- a/test/Transforms/IndVarSimplify/lftr-reuse.ll ++++ b/test/Transforms/IndVarSimplify/lftr-reuse.ll +@@ -67,11 +67,9 @@ define void @expandOuterRecurrence(i32 %arg) nounwind { + ; CHECK-NEXT: [[CMP1:%.*]] = icmp slt i32 0, [[SUB1]] + ; CHECK-NEXT: br i1 [[CMP1]], label [[OUTER_PREHEADER:%.*]], label [[EXIT:%.*]] + ; CHECK: outer.preheader: +-; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[ARG]], -1 + ; CHECK-NEXT: br label [[OUTER:%.*]] + ; CHECK: outer: +-; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i32 [ [[TMP0]], [[OUTER_PREHEADER]] ], [ [[INDVARS_IV_NEXT:%.*]], [[OUTER_INC:%.*]] ] +-; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[I_INC:%.*]], [[OUTER_INC]] ], [ 0, [[OUTER_PREHEADER]] ] ++; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[I_INC:%.*]], [[OUTER_INC:%.*]] ], [ 0, [[OUTER_PREHEADER]] ] + ; CHECK-NEXT: [[SUB2:%.*]] = sub nsw i32 [[ARG]], [[I]] + ; CHECK-NEXT: [[SUB3:%.*]] = sub nsw i32 [[SUB2]], 1 + ; CHECK-NEXT: [[CMP2:%.*]] = icmp slt i32 0, [[SUB3]] +@@ -81,14 +79,13 @@ define void @expandOuterRecurrence(i32 %arg) nounwind { + ; CHECK: inner: + ; CHECK-NEXT: [[J:%.*]] = phi i32 [ 0, [[INNER_PH]] ], [ [[J_INC:%.*]], [[INNER]] ] + ; CHECK-NEXT: [[J_INC]] = add nuw nsw i32 [[J]], 1 +-; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i32 [[J_INC]], [[INDVARS_IV]] ++; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i32 [[J_INC]], [[SUB3]] + ; CHECK-NEXT: br i1 [[EXITCOND]], label [[INNER]], label [[OUTER_INC_LOOPEXIT:%.*]] + ; CHECK: outer.inc.loopexit: + ; CHECK-NEXT: br label [[OUTER_INC]] + ; CHECK: outer.inc: + ; CHECK-NEXT: [[I_INC]] = add nuw nsw i32 [[I]], 1 +-; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add i32 [[INDVARS_IV]], -1 +-; CHECK-NEXT: [[EXITCOND1:%.*]] = icmp ne i32 [[I_INC]], [[TMP0]] ++; CHECK-NEXT: [[EXITCOND1:%.*]] = icmp ne i32 [[I_INC]], [[SUB1]] + ; CHECK-NEXT: br i1 [[EXITCOND1]], label [[OUTER]], label [[EXIT_LOOPEXIT:%.*]] + ; CHECK: exit.loopexit: + ; CHECK-NEXT: br label [[EXIT]] +diff --git a/test/Transforms/IndVarSimplify/udiv.ll b/test/Transforms/IndVarSimplify/udiv.ll +index b3f2c2a6a66..3530343ef4a 100644 +--- a/test/Transforms/IndVarSimplify/udiv.ll ++++ b/test/Transforms/IndVarSimplify/udiv.ll +@@ -133,6 +133,7 @@ declare i32 @printf(i8* nocapture, ...) nounwind + ; CHECK-LABEL: @foo( + ; CHECK: for.body.preheader: + ; CHECK-NOT: udiv ++; CHECK: for.body: + + define void @foo(double* %p, i64 %n) nounwind { + entry: +-- +2.18.0 + diff --git a/external/llvm/releases/11.0.0/patches_external/fix_for_llvm-as_buildbreak.patch b/external/llvm/releases/11.0.0/patches_external/fix_for_llvm-as_buildbreak.patch new file mode 100644 index 000000000..09f9ff825 --- /dev/null +++ b/external/llvm/releases/11.0.0/patches_external/fix_for_llvm-as_buildbreak.patch @@ -0,0 +1,11 @@ +# Description : Fix build break for building llvm-as + +--- a/tools/llvm-as/CMakeLists.txt ++++ b/tools/llvm-as/CMakeLists.txt +@@ -2,6 +2,7 @@ set(LLVM_LINK_COMPONENTS + AsmParser + BitWriter + Core ++ Demangle + Support + ) diff --git a/external/llvm/releases/11.0.0/patches_external/fix_for_llvm-link_buildbreak.patch b/external/llvm/releases/11.0.0/patches_external/fix_for_llvm-link_buildbreak.patch new file mode 100644 index 000000000..ef298ad57 --- /dev/null +++ b/external/llvm/releases/11.0.0/patches_external/fix_for_llvm-link_buildbreak.patch @@ -0,0 +1,12 @@ +# Description : Fix build break for building llvm-link + +--- a/tools/llvm-link/CMakeLists.txt ++++ b/tools/llvm-link/CMakeLists.txt +@@ -8,6 +8,7 @@ set(LLVM_LINK_COMPONENTS + Support + TransformUtils + IPO ++ Demangle + ) + + add_llvm_tool(llvm-link diff --git a/external/llvm/releases/11.0.0/patches_external/fix_for_opt_buildbreak.patch b/external/llvm/releases/11.0.0/patches_external/fix_for_opt_buildbreak.patch new file mode 100644 index 000000000..40ec66876 --- /dev/null +++ b/external/llvm/releases/11.0.0/patches_external/fix_for_opt_buildbreak.patch @@ -0,0 +1,12 @@ +# Description : Fix build break for building opt + +--- a/tools/opt/CMakeLists.txt ++++ b/tools/opt/CMakeLists.txt +@@ -9,6 +9,7 @@ set(LLVM_LINK_COMPONENTS + CodeGen + Core + Coroutines ++ Demangle + Extensions + IPO + IRReader diff --git a/external/llvm/releases/4.0.0/patches_external/2_1-enable-aggressive-combining.patch b/external/llvm/releases/4.0.0/patches_external/2_1-enable-aggressive-combining.patch new file mode 100644 index 000000000..d5f8a9048 --- /dev/null +++ b/external/llvm/releases/4.0.0/patches_external/2_1-enable-aggressive-combining.patch @@ -0,0 +1,20 @@ +# Description : Enable aggressive (gep (gep base, idx0), idx1) combining + + +diff -Naur --strip-trailing-cr a/lib/Transforms/InstCombine/InstructionCombining.cpp b/lib/Transforms/InstCombine/InstructionCombining.cpp +--- a/lib/Transforms/InstCombine/InstructionCombining.cpp 2017-10-24 08:51:48.146973901 -0400 ++++ b/lib/Transforms/InstCombine/InstructionCombining.cpp 2017-10-24 08:52:36.064972024 -0400 +@@ -1573,11 +1573,13 @@ + // normalized. + if (SO1->getType() != GO1->getType()) + return nullptr; ++#if 0 + // Only do the combine when GO1 and SO1 are both constants. Only in + // this case, we are sure the cost after the merge is never more than + // that before the merge. + if (!isa(GO1) || !isa(SO1)) + return nullptr; ++#endif + Sum = Builder->CreateAdd(SO1, GO1, PtrOp->getName()+".sum"); + } + diff --git a/external/llvm/releases/4.0.0/patches_external/4_1-non-recursive-sink-hoist-region.patch b/external/llvm/releases/4.0.0/patches_external/4_1-non-recursive-sink-hoist-region.patch new file mode 100644 index 000000000..21cdb4a0b --- /dev/null +++ b/external/llvm/releases/4.0.0/patches_external/4_1-non-recursive-sink-hoist-region.patch @@ -0,0 +1,240 @@ +# Description : Large CFGs can cause a stack overflow due to recursive step +# for each basic block in a region. Instead create a worklist and iterate +# to limit the stack usage + + +diff -Naur --strip-trailing-cr a/lib/Transforms/Scalar/LICM.cpp b/lib/Transforms/Scalar/LICM.cpp +--- a/lib/Transforms/Scalar/LICM.cpp 2017-02-21 11:01:56.000000000 -0800 ++++ b/lib/Transforms/Scalar/LICM.cpp 2018-03-14 12:43:23.452319700 -0700 +@@ -323,6 +323,30 @@ + return Changed; + } + ++// Does a BFS from a given node to all of its children inside a given loop. ++// The returned vector of nodes includes the starting point. ++static SmallVector ++collectChildrenInLoop(DomTreeNode *N, const Loop *CurLoop) { ++ SmallVector Worklist; ++ auto add_region_to_worklist = [&](DomTreeNode *DTN) { ++ // Only include subregions in the top level loop. ++ BasicBlock *BB = DTN->getBlock(); ++ if (CurLoop->contains(BB)) ++ Worklist.push_back(DTN); ++ }; ++ ++ add_region_to_worklist(N); ++ ++ for (size_t I = 0; I < Worklist.size(); I++) { ++ DomTreeNode *DTN = Worklist[I]; ++ for (DomTreeNode *Child : DTN->getChildren()) ++ add_region_to_worklist(Child); ++ } ++ ++ return Worklist; ++} ++ ++ + /// Walk the specified region of the CFG (defined by all blocks dominated by + /// the specified block, and that are in the current loop) in reverse depth + /// first order w.r.t the DominatorTree. This allows us to visit uses before +@@ -338,51 +362,53 @@ + CurLoop != nullptr && CurAST != nullptr && SafetyInfo != nullptr && + "Unexpected input to sinkRegion"); + +- BasicBlock *BB = N->getBlock(); +- // If this subregion is not in the top level loop at all, exit. +- if (!CurLoop->contains(BB)) +- return false; ++ // We want to visit children before parents. We will enque all the parents ++ // before their children in the worklist and process the worklist in reverse ++ // order. ++ SmallVector Worklist = collectChildrenInLoop(N, CurLoop); ++ + +- // We are processing blocks in reverse dfo, so process children first. + bool Changed = false; +- const std::vector &Children = N->getChildren(); +- for (DomTreeNode *Child : Children) +- Changed |= +- sinkRegion(Child, AA, LI, DT, TLI, CurLoop, CurAST, SafetyInfo, ORE); +- +- // Only need to process the contents of this block if it is not part of a +- // subloop (which would already have been processed). +- if (inSubLoop(BB, CurLoop, LI)) +- return Changed; +- +- for (BasicBlock::iterator II = BB->end(); II != BB->begin();) { +- Instruction &I = *--II; +- +- // If the instruction is dead, we would try to sink it because it isn't used +- // in the loop, instead, just delete it. +- if (isInstructionTriviallyDead(&I, TLI)) { +- DEBUG(dbgs() << "LICM deleting dead inst: " << I << '\n'); +- ++II; +- CurAST->deleteValue(&I); +- I.eraseFromParent(); +- Changed = true; ++ for (DomTreeNode *DTN : reverse(Worklist)) { ++ BasicBlock *BB = DTN->getBlock(); ++ ++ ++ ++ // Only need to process the contents of this block if it is not part of a ++ // subloop (which would already have been processed). ++ if (inSubLoop(BB, CurLoop, LI)) + continue; +- } + +- // Check to see if we can sink this instruction to the exit blocks +- // of the loop. We can do this if the all users of the instruction are +- // outside of the loop. In this case, it doesn't even matter if the +- // operands of the instruction are loop invariant. +- // +- if (isNotUsedInLoop(I, CurLoop, SafetyInfo) && +- canSinkOrHoistInst(I, AA, DT, CurLoop, CurAST, SafetyInfo, ORE)) { +- ++II; +- Changed |= sink(I, LI, DT, CurLoop, CurAST, SafetyInfo, ORE); ++ for (BasicBlock::iterator II = BB->end(); II != BB->begin();) { ++ Instruction &I = *--II; ++ ++ // If the instruction is dead, we would try to sink it because it isn't used ++ // in the loop, instead, just delete it. ++ if (isInstructionTriviallyDead(&I, TLI)) { ++ DEBUG(dbgs() << "LICM deleting dead inst: " << I << '\n'); ++ ++II; ++ CurAST->deleteValue(&I); ++ I.eraseFromParent(); ++ Changed = true; ++ continue; ++ } ++ ++ // Check to see if we can sink this instruction to the exit blocks ++ // of the loop. We can do this if the all users of the instruction are ++ // outside of the loop. In this case, it doesn't even matter if the ++ // operands of the instruction are loop invariant. ++ // ++ if (isNotUsedInLoop(I, CurLoop, SafetyInfo) && ++ canSinkOrHoistInst(I, AA, DT, CurLoop, CurAST, SafetyInfo, ORE)) { ++ ++II; ++ Changed |= sink(I, LI, DT, CurLoop, CurAST, SafetyInfo, ORE); ++ } + } + } + return Changed; + } + ++ + /// Walk the specified region of the CFG (defined by all blocks dominated by + /// the specified block, and that are in the current loop) in depth first + /// order w.r.t the DominatorTree. This allows us to visit definitions before +@@ -397,50 +423,73 @@ + CurLoop != nullptr && CurAST != nullptr && SafetyInfo != nullptr && + "Unexpected input to hoistRegion"); + +- BasicBlock *BB = N->getBlock(); ++ // We want to visit parents before children. We will enque all the parents ++ // before their children in the worklist and process the worklist in order. ++ SmallVector Worklist = collectChildrenInLoop(N, CurLoop); ++ + +- // If this subregion is not in the top level loop at all, exit. +- if (!CurLoop->contains(BB)) +- return false; + +- // Only need to process the contents of this block if it is not part of a +- // subloop (which would already have been processed). + bool Changed = false; +- if (!inSubLoop(BB, CurLoop, LI)) +- for (BasicBlock::iterator II = BB->begin(), E = BB->end(); II != E;) { +- Instruction &I = *II++; +- // Try constant folding this instruction. If all the operands are +- // constants, it is technically hoistable, but it would be better to just +- // fold it. +- if (Constant *C = ConstantFoldInstruction( +- &I, I.getModule()->getDataLayout(), TLI)) { +- DEBUG(dbgs() << "LICM folding inst: " << I << " --> " << *C << '\n'); +- CurAST->copyValue(&I, C); +- I.replaceAllUsesWith(C); +- if (isInstructionTriviallyDead(&I, TLI)) { +- CurAST->deleteValue(&I); ++ for (DomTreeNode *DTN : Worklist) { ++ BasicBlock *BB = DTN->getBlock(); ++ // Only need to process the contents of this block if it is not part of a ++ // subloop (which would already have been processed). ++ ++ if (!inSubLoop(BB, CurLoop, LI)) ++ for (BasicBlock::iterator II = BB->begin(), E = BB->end(); II != E;) { ++ Instruction &I = *II++; ++ // Try constant folding this instruction. If all the operands are ++ // constants, it is technically hoistable, but it would be better to ++ // just fold it. ++ if (Constant *C = ConstantFoldInstruction( ++ &I, I.getModule()->getDataLayout(), TLI)) { ++ DEBUG(dbgs() << "LICM folding inst: " << I << " --> " << *C << '\n'); ++ CurAST->copyValue(&I, C); ++ I.replaceAllUsesWith(C); ++ if (isInstructionTriviallyDead(&I, TLI)) { ++ CurAST->deleteValue(&I); ++ I.eraseFromParent(); ++ } ++ Changed = true; ++ continue; ++ } ++ ++ // Attempt to remove floating point division out of the loop by ++ // converting it to a reciprocal multiplication. ++ if (I.getOpcode() == Instruction::FDiv && ++ CurLoop->isLoopInvariant(I.getOperand(1)) && ++ I.hasAllowReciprocal()) { ++ auto Divisor = I.getOperand(1); ++ auto One = llvm::ConstantFP::get(Divisor->getType(), 1.0); ++ auto ReciprocalDivisor = BinaryOperator::CreateFDiv(One, Divisor); ++ ReciprocalDivisor->setFastMathFlags(I.getFastMathFlags()); ++ ReciprocalDivisor->insertBefore(&I); ++ ++ auto Product = ++ BinaryOperator::CreateFMul(I.getOperand(0), ReciprocalDivisor); ++ Product->setFastMathFlags(I.getFastMathFlags()); ++ Product->insertAfter(&I); ++ I.replaceAllUsesWith(Product); + I.eraseFromParent(); ++ ++ hoist(*ReciprocalDivisor, DT, CurLoop, SafetyInfo, ORE); ++ Changed = true; ++ continue; + } +- Changed = true; +- continue; +- } + +- // Try hoisting the instruction out to the preheader. We can only do this +- // if all of the operands of the instruction are loop invariant and if it +- // is safe to hoist the instruction. +- // +- if (CurLoop->hasLoopInvariantOperands(&I) && +- canSinkOrHoistInst(I, AA, DT, CurLoop, CurAST, SafetyInfo, ORE) && +- isSafeToExecuteUnconditionally( +- I, DT, CurLoop, SafetyInfo, ORE, +- CurLoop->getLoopPreheader()->getTerminator())) +- Changed |= hoist(I, DT, CurLoop, SafetyInfo, ORE); +- } ++ // Try hoisting the instruction out to the preheader. We can only do ++ // this if all of the operands of the instruction are loop invariant and ++ // if it is safe to hoist the instruction. ++ // ++ if (CurLoop->hasLoopInvariantOperands(&I) && ++ canSinkOrHoistInst(I, AA, DT, CurLoop, CurAST, SafetyInfo, ORE) && ++ isSafeToExecuteUnconditionally( ++ I, DT, CurLoop, SafetyInfo, ORE, ++ CurLoop->getLoopPreheader()->getTerminator())) ++ Changed |= hoist(I, DT, CurLoop, SafetyInfo, ORE); ++ } ++ } + +- const std::vector &Children = N->getChildren(); +- for (DomTreeNode *Child : Children) +- Changed |= +- hoistRegion(Child, AA, LI, DT, TLI, CurLoop, CurAST, SafetyInfo, ORE); + return Changed; + } + diff --git a/external/llvm/releases/4.0.0/patches_external/4_2-Fix_invariant_fdiv_hoisting_in_LICM.patch b/external/llvm/releases/4.0.0/patches_external/4_2-Fix_invariant_fdiv_hoisting_in_LICM.patch new file mode 100644 index 000000000..79880d666 --- /dev/null +++ b/external/llvm/releases/4.0.0/patches_external/4_2-Fix_invariant_fdiv_hoisting_in_LICM.patch @@ -0,0 +1,129 @@ +# Description : FDiv is replaced with multiplication by reciprocal and invariant +# reciprocal is hoisted out of the loop, while multiplication remains +# even if invariant. + + +diff -Naur --strip-trailing-cr a/lib/Transforms/Scalar/LICM.cpp b/lib/Transforms/Scalar/LICM.cpp +--- a/lib/Transforms/Scalar/LICM.cpp 2018-08-09 14:04:49.868088200 -0700 ++++ b/lib/Transforms/Scalar/LICM.cpp 2018-08-09 13:57:15.100467100 -0700 +@@ -435,59 +435,72 @@ + // Only need to process the contents of this block if it is not part of a + // subloop (which would already have been processed). + +- if (!inSubLoop(BB, CurLoop, LI)) +- for (BasicBlock::iterator II = BB->begin(), E = BB->end(); II != E;) { +- Instruction &I = *II++; +- // Try constant folding this instruction. If all the operands are +- // constants, it is technically hoistable, but it would be better to +- // just fold it. +- if (Constant *C = ConstantFoldInstruction( +- &I, I.getModule()->getDataLayout(), TLI)) { +- DEBUG(dbgs() << "LICM folding inst: " << I << " --> " << *C << '\n'); +- CurAST->copyValue(&I, C); +- I.replaceAllUsesWith(C); +- if (isInstructionTriviallyDead(&I, TLI)) { +- CurAST->deleteValue(&I); +- I.eraseFromParent(); +- } +- Changed = true; +- continue; +- } ++ if (inSubLoop(BB, CurLoop, LI)) ++ continue; + +- // Attempt to remove floating point division out of the loop by +- // converting it to a reciprocal multiplication. +- if (I.getOpcode() == Instruction::FDiv && +- CurLoop->isLoopInvariant(I.getOperand(1)) && +- I.hasAllowReciprocal()) { +- auto Divisor = I.getOperand(1); +- auto One = llvm::ConstantFP::get(Divisor->getType(), 1.0); +- auto ReciprocalDivisor = BinaryOperator::CreateFDiv(One, Divisor); +- ReciprocalDivisor->setFastMathFlags(I.getFastMathFlags()); +- ReciprocalDivisor->insertBefore(&I); +- +- auto Product = +- BinaryOperator::CreateFMul(I.getOperand(0), ReciprocalDivisor); +- Product->setFastMathFlags(I.getFastMathFlags()); +- Product->insertAfter(&I); +- I.replaceAllUsesWith(Product); ++ // Keep track of whether the prefix of instructions visited so far are such ++ // that the next instruction visited is guaranteed to execute if the loop ++ // is entered. ++ bool IsMustExecute = CurLoop->getHeader() == BB; ++ ++ for (BasicBlock::iterator II = BB->begin(), E = BB->end(); II != E;) { ++ Instruction &I = *II++; ++ // Try constant folding this instruction. If all the operands are ++ // constants, it is technically hoistable, but it would be better to ++ // just fold it. ++ if (Constant *C = ConstantFoldInstruction( ++ &I, I.getModule()->getDataLayout(), TLI)) { ++ DEBUG(dbgs() << "LICM folding inst: " << I << " --> " << *C << '\n'); ++ CurAST->copyValue(&I, C); ++ I.replaceAllUsesWith(C); ++ if (isInstructionTriviallyDead(&I, TLI)) { ++ CurAST->deleteValue(&I); + I.eraseFromParent(); +- +- hoist(*ReciprocalDivisor, DT, CurLoop, SafetyInfo, ORE); +- Changed = true; +- continue; + } ++ Changed = true; ++ continue; ++ } ++ ++ // Try hoisting the instruction out to the preheader. We can only do ++ // this if all of the operands of the instruction are loop invariant and ++ // if it is safe to hoist the instruction. ++ // ++ if (CurLoop->hasLoopInvariantOperands(&I) && ++ canSinkOrHoistInst(I, AA, DT, CurLoop, CurAST, SafetyInfo, ORE) && ++ (IsMustExecute || ++ isSafeToExecuteUnconditionally( ++ I, DT, CurLoop, SafetyInfo, ORE, ++ CurLoop->getLoopPreheader()->getTerminator()))) { ++ Changed |= hoist(I, DT, CurLoop, SafetyInfo, ORE); ++ continue; ++ } + +- // Try hoisting the instruction out to the preheader. We can only do +- // this if all of the operands of the instruction are loop invariant and +- // if it is safe to hoist the instruction. +- // +- if (CurLoop->hasLoopInvariantOperands(&I) && +- canSinkOrHoistInst(I, AA, DT, CurLoop, CurAST, SafetyInfo, ORE) && +- isSafeToExecuteUnconditionally( +- I, DT, CurLoop, SafetyInfo, ORE, +- CurLoop->getLoopPreheader()->getTerminator())) +- Changed |= hoist(I, DT, CurLoop, SafetyInfo, ORE); ++ // Attempt to remove floating point division out of the loop by ++ // converting it to a reciprocal multiplication. ++ if (I.getOpcode() == Instruction::FDiv && ++ CurLoop->isLoopInvariant(I.getOperand(1)) && ++ I.hasAllowReciprocal()) { ++ auto Divisor = I.getOperand(1); ++ auto One = llvm::ConstantFP::get(Divisor->getType(), 1.0); ++ auto ReciprocalDivisor = BinaryOperator::CreateFDiv(One, Divisor); ++ ReciprocalDivisor->setFastMathFlags(I.getFastMathFlags()); ++ ReciprocalDivisor->insertBefore(&I); ++ ++ auto Product = ++ BinaryOperator::CreateFMul(I.getOperand(0), ReciprocalDivisor); ++ Product->setFastMathFlags(I.getFastMathFlags()); ++ Product->insertAfter(&I); ++ I.replaceAllUsesWith(Product); ++ I.eraseFromParent(); ++ ++ hoist(*ReciprocalDivisor, DT, CurLoop, SafetyInfo, ORE); ++ Changed = true; ++ continue; + } ++ ++ if (IsMustExecute) ++ IsMustExecute = isGuaranteedToTransferExecutionToSuccessor(&I); ++ } + } + + return Changed; diff --git a/external/llvm/releases/4.0.0/patches_external/Add-Reassoc-Contract-ApproxFunc-to-FMF.patch b/external/llvm/releases/4.0.0/patches_external/Add-Reassoc-Contract-ApproxFunc-to-FMF.patch new file mode 100644 index 000000000..98195b6da --- /dev/null +++ b/external/llvm/releases/4.0.0/patches_external/Add-Reassoc-Contract-ApproxFunc-to-FMF.patch @@ -0,0 +1,1536 @@ +# Description : Adds AllowReassoc, AllowContract and ApproxFunc to FastMathFlags + + +diff --git a/docs/LangRef.rst b/docs/LangRef.rst +--- a/docs/LangRef.rst ++++ b/docs/LangRef.rst +@@ -2167,11 +2167,11 @@ handlers). + Fast-Math Flags + --------------- + +-LLVM IR floating-point binary ops (:ref:`fadd `, ++LLVM IR floating-point operations (:ref:`fadd `, + :ref:`fsub `, :ref:`fmul `, :ref:`fdiv `, + :ref:`frem `, :ref:`fcmp `) and :ref:`call ` +-instructions have the following flags that can be set to enable +-otherwise unsafe floating point transformations. ++may use the following flags to enable otherwise unsafe ++floating-point transformations. + + ``nnan`` + No NaNs - Allow optimizations to assume the arguments and result are not +@@ -2191,10 +2191,21 @@ otherwise unsafe floating point transformations. + Allow Reciprocal - Allow optimizations to use the reciprocal of an + argument rather than perform division. + ++``contract`` ++ Allow floating-point contraction (e.g. fusing a multiply followed by an ++ addition into a fused multiply-and-add). ++ ++``afn`` ++ Approximate functions - Allow substitution of approximate calculations for ++ functions (sin, log, sqrt, etc). See floating-point intrinsic definitions ++ for places where this can apply to LLVM's intrinsic math functions. ++ ++``reassoc`` ++ Allow reassociation transformations for floating-point instructions. ++ This may dramatically change results in floating point. ++ + ``fast`` +- Fast - Allow algebraically equivalent transformations that may +- dramatically change results in floating point (e.g. reassociate). This +- flag implies all the others. ++ This flag implies all of the others. + + .. _uselistorder: + +@@ -10045,7 +10056,7 @@ Syntax: + """"""" + + This is an overloaded intrinsic. You can use ``llvm.sqrt`` on any +-floating point or vector of floating point type. Not all targets support ++floating-point or vector of floating-point type. Not all targets support + all types however. + + :: +@@ -10059,24 +10070,22 @@ all types however. + Overview: + """"""""" + +-The '``llvm.sqrt``' intrinsics return the sqrt of the specified operand, +-returning the same value as the libm '``sqrt``' functions would. Unlike +-``sqrt`` in libm, however, ``llvm.sqrt`` has undefined behavior for +-negative numbers other than -0.0 (which allows for better optimization, +-because there is no need to worry about errno being set). +-``llvm.sqrt(-0.0)`` is defined to return -0.0 like IEEE sqrt. ++The '``llvm.sqrt``' intrinsics return the square root of the specified value. + + Arguments: + """""""""" + +-The argument and return value are floating point numbers of the same +-type. ++The argument and return value are floating-point numbers of the same type. + + Semantics: + """""""""" + +-This function returns the sqrt of the specified operand if it is a +-nonnegative floating point number. ++Return the same value as a corresponding libm '``sqrt``' function but without ++trapping or setting ``errno``. For types specified by IEEE-754, the result ++matches a conforming libm implementation. ++ ++When specified with the fast-math-flag 'afn', the result may be approximated ++using a less accurate calculation. + + '``llvm.powi.*``' Intrinsic + ^^^^^^^^^^^^^^^^^^^^^^^^^^^ +@@ -10123,7 +10132,7 @@ Syntax: + """"""" + + This is an overloaded intrinsic. You can use ``llvm.sin`` on any +-floating point or vector of floating point type. Not all targets support ++floating-point or vector of floating-point type. Not all targets support + all types however. + + :: +@@ -10142,15 +10151,16 @@ The '``llvm.sin.*``' intrinsics return the sine of the operand. + Arguments: + """""""""" + +-The argument and return value are floating point numbers of the same +-type. ++The argument and return value are floating-point numbers of the same type. + + Semantics: + """""""""" + +-This function returns the sine of the specified operand, returning the +-same values as the libm ``sin`` functions would, and handles error +-conditions in the same way. ++Return the same value as a corresponding libm '``sin``' function but without ++trapping or setting ``errno``. ++ ++When specified with the fast-math-flag 'afn', the result may be approximated ++using a less accurate calculation. + + '``llvm.cos.*``' Intrinsic + ^^^^^^^^^^^^^^^^^^^^^^^^^^ +@@ -10159,7 +10169,7 @@ Syntax: + """"""" + + This is an overloaded intrinsic. You can use ``llvm.cos`` on any +-floating point or vector of floating point type. Not all targets support ++floating-point or vector of floating-point type. Not all targets support + all types however. + + :: +@@ -10178,15 +10188,16 @@ The '``llvm.cos.*``' intrinsics return the cosine of the operand. + Arguments: + """""""""" + +-The argument and return value are floating point numbers of the same +-type. ++The argument and return value are floating-point numbers of the same type. + + Semantics: + """""""""" + +-This function returns the cosine of the specified operand, returning the +-same values as the libm ``cos`` functions would, and handles error +-conditions in the same way. ++Return the same value as a corresponding libm '``cos``' function but without ++trapping or setting ``errno``. ++ ++When specified with the fast-math-flag 'afn', the result may be approximated ++using a less accurate calculation. + + '``llvm.pow.*``' Intrinsic + ^^^^^^^^^^^^^^^^^^^^^^^^^^ +@@ -10195,7 +10206,7 @@ Syntax: + """"""" + + This is an overloaded intrinsic. You can use ``llvm.pow`` on any +-floating point or vector of floating point type. Not all targets support ++floating-point or vector of floating-point type. Not all targets support + all types however. + + :: +@@ -10215,15 +10226,16 @@ specified (positive or negative) power. + Arguments: + """""""""" + +-The second argument is a floating point power, and the first is a value +-to raise to that power. ++The arguments and return value are floating-point numbers of the same type. + + Semantics: + """""""""" + +-This function returns the first value raised to the second power, +-returning the same values as the libm ``pow`` functions would, and +-handles error conditions in the same way. ++Return the same value as a corresponding libm '``pow``' function but without ++trapping or setting ``errno``. ++ ++When specified with the fast-math-flag 'afn', the result may be approximated ++using a less accurate calculation. + + '``llvm.exp.*``' Intrinsic + ^^^^^^^^^^^^^^^^^^^^^^^^^^ +@@ -10232,7 +10244,7 @@ Syntax: + """"""" + + This is an overloaded intrinsic. You can use ``llvm.exp`` on any +-floating point or vector of floating point type. Not all targets support ++floating-point or vector of floating-point type. Not all targets support + all types however. + + :: +@@ -10251,14 +10263,16 @@ The '``llvm.exp.*``' intrinsics perform the exp function. + Arguments: + """""""""" + +-The argument and return value are floating point numbers of the same +-type. ++The argument and return value are floating-point numbers of the same type. + + Semantics: + """""""""" + +-This function returns the same values as the libm ``exp`` functions +-would, and handles error conditions in the same way. ++Return the same value as a corresponding libm '``exp``' function but without ++trapping or setting ``errno``. ++ ++When specified with the fast-math-flag 'afn', the result may be approximated ++using a less accurate calculation. + + '``llvm.exp2.*``' Intrinsic + ^^^^^^^^^^^^^^^^^^^^^^^^^^^ +@@ -10267,7 +10281,7 @@ Syntax: + """"""" + + This is an overloaded intrinsic. You can use ``llvm.exp2`` on any +-floating point or vector of floating point type. Not all targets support ++floating-point or vector of floating-point type. Not all targets support + all types however. + + :: +@@ -10286,14 +10300,16 @@ The '``llvm.exp2.*``' intrinsics perform the exp2 function. + Arguments: + """""""""" + +-The argument and return value are floating point numbers of the same +-type. ++The argument and return value are floating-point numbers of the same type. + + Semantics: + """""""""" + +-This function returns the same values as the libm ``exp2`` functions +-would, and handles error conditions in the same way. ++Return the same value as a corresponding libm '``exp2``' function but without ++trapping or setting ``errno``. ++ ++When specified with the fast-math-flag 'afn', the result may be approximated ++using a less accurate calculation. + + '``llvm.log.*``' Intrinsic + ^^^^^^^^^^^^^^^^^^^^^^^^^^ +@@ -10302,7 +10318,7 @@ Syntax: + """"""" + + This is an overloaded intrinsic. You can use ``llvm.log`` on any +-floating point or vector of floating point type. Not all targets support ++floating-point or vector of floating-point type. Not all targets support + all types however. + + :: +@@ -10321,14 +10337,16 @@ The '``llvm.log.*``' intrinsics perform the log function. + Arguments: + """""""""" + +-The argument and return value are floating point numbers of the same +-type. ++The argument and return value are floating-point numbers of the same type. + + Semantics: + """""""""" + +-This function returns the same values as the libm ``log`` functions +-would, and handles error conditions in the same way. ++Return the same value as a corresponding libm '``log``' function but without ++trapping or setting ``errno``. ++ ++When specified with the fast-math-flag 'afn', the result may be approximated ++using a less accurate calculation. + + '``llvm.log10.*``' Intrinsic + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +@@ -10337,7 +10355,7 @@ Syntax: + """"""" + + This is an overloaded intrinsic. You can use ``llvm.log10`` on any +-floating point or vector of floating point type. Not all targets support ++floating-point or vector of floating-point type. Not all targets support + all types however. + + :: +@@ -10356,14 +10374,16 @@ The '``llvm.log10.*``' intrinsics perform the log10 function. + Arguments: + """""""""" + +-The argument and return value are floating point numbers of the same +-type. ++The argument and return value are floating-point numbers of the same type. + + Semantics: + """""""""" + +-This function returns the same values as the libm ``log10`` functions +-would, and handles error conditions in the same way. ++Return the same value as a corresponding libm '``log10``' function but without ++trapping or setting ``errno``. ++ ++When specified with the fast-math-flag 'afn', the result may be approximated ++using a less accurate calculation. + + '``llvm.log2.*``' Intrinsic + ^^^^^^^^^^^^^^^^^^^^^^^^^^^ +@@ -10372,7 +10392,7 @@ Syntax: + """"""" + + This is an overloaded intrinsic. You can use ``llvm.log2`` on any +-floating point or vector of floating point type. Not all targets support ++floating-point or vector of floating-point type. Not all targets support + all types however. + + :: +@@ -10391,14 +10411,16 @@ The '``llvm.log2.*``' intrinsics perform the log2 function. + Arguments: + """""""""" + +-The argument and return value are floating point numbers of the same +-type. ++The argument and return value are floating-point numbers of the same type. + + Semantics: + """""""""" + +-This function returns the same values as the libm ``log2`` functions +-would, and handles error conditions in the same way. ++Return the same value as a corresponding libm '``log2``' function but without ++trapping or setting ``errno``. ++ ++When specified with the fast-math-flag 'afn', the result may be approximated ++using a less accurate calculation. + + '``llvm.fma.*``' Intrinsic + ^^^^^^^^^^^^^^^^^^^^^^^^^^ +@@ -10407,7 +10429,7 @@ Syntax: + """"""" + + This is an overloaded intrinsic. You can use ``llvm.fma`` on any +-floating point or vector of floating point type. Not all targets support ++floating-point or vector of floating-point type. Not all targets support + all types however. + + :: +@@ -10421,20 +10443,21 @@ all types however. + Overview: + """"""""" + +-The '``llvm.fma.*``' intrinsics perform the fused multiply-add +-operation. ++The '``llvm.fma.*``' intrinsics perform the fused multiply-add operation. + + Arguments: + """""""""" + +-The argument and return value are floating point numbers of the same +-type. ++The arguments and return value are floating-point numbers of the same type. + + Semantics: + """""""""" + +-This function returns the same values as the libm ``fma`` functions +-would, and does not set errno. ++Return the same value as a corresponding libm '``fma``' function but without ++trapping or setting ``errno``. ++ ++When specified with the fast-math-flag 'afn', the result may be approximated ++using a less accurate calculation. + + '``llvm.fabs.*``' Intrinsic + ^^^^^^^^^^^^^^^^^^^^^^^^^^^ +diff --git a/include/llvm/CodeGen/SelectionDAGNodes.h b/include/llvm/CodeGen/SelectionDAGNodes.h +--- a/include/llvm/CodeGen/SelectionDAGNodes.h ++++ b/include/llvm/CodeGen/SelectionDAGNodes.h +@@ -350,6 +350,7 @@ private: + bool NoSignedZeros : 1; + bool AllowReciprocal : 1; + bool VectorReduction : 1; ++ bool AllowContract : 1; + + public: + /// Default constructor turns off all optimization flags. +@@ -363,6 +364,7 @@ public: + NoSignedZeros = false; + AllowReciprocal = false; + VectorReduction = false; ++ AllowContract = false; + } + + // These are mutators for each flag. +@@ -375,6 +377,7 @@ public: + void setNoSignedZeros(bool b) { NoSignedZeros = b; } + void setAllowReciprocal(bool b) { AllowReciprocal = b; } + void setVectorReduction(bool b) { VectorReduction = b; } ++ void setAllowContract(bool b) { AllowContract = b; } + + // These are accessors for each flag. + bool hasNoUnsignedWrap() const { return NoUnsignedWrap; } +@@ -386,6 +389,7 @@ public: + bool hasNoSignedZeros() const { return NoSignedZeros; } + bool hasAllowReciprocal() const { return AllowReciprocal; } + bool hasVectorReduction() const { return VectorReduction; } ++ bool hasAllowContract() const { return AllowContract; } + + /// Clear any flags in this flag set that aren't also set in Flags. + void intersectWith(const SDNodeFlags *Flags) { +@@ -397,6 +401,7 @@ public: + NoInfs &= Flags->NoInfs; + NoSignedZeros &= Flags->NoSignedZeros; + AllowReciprocal &= Flags->AllowReciprocal; ++ AllowContract &= Flags->AllowContract; + } + }; + +diff --git a/include/llvm/IR/Instruction.h b/include/llvm/IR/Instruction.h +--- a/include/llvm/IR/Instruction.h ++++ b/include/llvm/IR/Instruction.h +@@ -279,10 +279,15 @@ public: + /// Determine whether the exact flag is set. + bool isExact() const; + +- /// Set or clear the unsafe-algebra flag on this instruction, which must be an ++ /// Set or clear all fast-math-flags on this instruction, which must be an + /// operator which supports this flag. See LangRef.html for the meaning of + /// this flag. +- void setHasUnsafeAlgebra(bool B); ++ void setFast(bool B); ++ ++ /// Set or clear the reassociation flag on this instruction, which must be ++ /// an operator which supports this flag. See LangRef.html for the meaning of ++ /// this flag. ++ void setHasAllowReassoc(bool B); + + /// Set or clear the no-nans flag on this instruction, which must be an + /// operator which supports this flag. See LangRef.html for the meaning of +@@ -304,6 +309,11 @@ public: + /// this flag. + void setHasAllowReciprocal(bool B); + ++ /// Set or clear the approximate-math-functions flag on this instruction, ++ /// which must be an operator which supports this flag. See LangRef.html for ++ /// the meaning of this flag. ++ void setHasApproxFunc(bool B); ++ + /// Convenience function for setting multiple fast-math flags on this + /// instruction, which must be an operator which supports these flags. See + /// LangRef.html for the meaning of these flags. +@@ -314,8 +324,11 @@ public: + /// LangRef.html for the meaning of these flags. + void copyFastMathFlags(FastMathFlags FMF); + +- /// Determine whether the unsafe-algebra flag is set. +- bool hasUnsafeAlgebra() const; ++ /// Determine whether all fast-math-flags are set. ++ bool isFast() const; ++ ++ /// Determine whether the allow-reassociation flag is set. ++ bool hasAllowReassoc() const; + + /// Determine whether the no-NaNs flag is set. + bool hasNoNaNs() const; +@@ -329,6 +342,12 @@ public: + /// Determine whether the allow-reciprocal flag is set. + bool hasAllowReciprocal() const; + ++ /// Determine whether the allow-contract flag is set. ++ bool hasAllowContract() const; ++ ++ /// Determine whether the approximate-math-functions flag is set. ++ bool hasApproxFunc() const; ++ + /// Convenience function for getting all the fast-math flags, which must be an + /// operator which supports these flags. See LangRef.html for the meaning of + /// these flags. +diff --git a/include/llvm/IR/Operator.h b/include/llvm/IR/Operator.h +--- a/include/llvm/IR/Operator.h ++++ b/include/llvm/IR/Operator.h +@@ -171,44 +171,61 @@ private: + + unsigned Flags = 0; + +- FastMathFlags(unsigned F) : Flags(F) { } ++ FastMathFlags(unsigned F) { ++ // If all 7 bits are set, turn this into -1. If the number of bits grows, ++ // this must be updated. This is intended to provide some forward binary ++ // compatibility insurance for the meaning of 'fast' in case bits are added. ++ if (F == 0x7F) Flags = ~0U; ++ else Flags = F; ++ } + + public: ++ // This is how the bits are used in Value::SubclassOptionalData so they ++ // should fit there too. ++ // WARNING: We're out of space. SubclassOptionalData only has 7 bits. New ++ // functionality will require a change in how this information is stored. + enum { +- UnsafeAlgebra = (1 << 0), ++ AllowReassoc = (1 << 0), + NoNaNs = (1 << 1), + NoInfs = (1 << 2), + NoSignedZeros = (1 << 3), +- AllowReciprocal = (1 << 4) ++ AllowReciprocal = (1 << 4), ++ AllowContract = (1 << 5), ++ ApproxFunc = (1 << 6) + }; + + FastMathFlags() = default; + +- /// Whether any flag is set + bool any() const { return Flags != 0; } ++ bool none() const { return Flags == 0; } ++ bool all() const { return Flags == ~0U; } + +- /// Set all the flags to false + void clear() { Flags = 0; } ++ void set() { Flags = ~0U; } + + /// Flag queries ++ bool allowReassoc() const { return 0 != (Flags & AllowReassoc); } + bool noNaNs() const { return 0 != (Flags & NoNaNs); } + bool noInfs() const { return 0 != (Flags & NoInfs); } + bool noSignedZeros() const { return 0 != (Flags & NoSignedZeros); } + bool allowReciprocal() const { return 0 != (Flags & AllowReciprocal); } +- bool unsafeAlgebra() const { return 0 != (Flags & UnsafeAlgebra); } ++ bool allowContract() const { return 0 != (Flags & AllowContract); } ++ bool approxFunc() const { return 0 != (Flags & ApproxFunc); } ++ /// 'Fast' means all bits are set. ++ bool isFast() const { return all(); } + + /// Flag setters ++ void setAllowReassoc() { Flags |= AllowReassoc; } + void setNoNaNs() { Flags |= NoNaNs; } + void setNoInfs() { Flags |= NoInfs; } + void setNoSignedZeros() { Flags |= NoSignedZeros; } + void setAllowReciprocal() { Flags |= AllowReciprocal; } +- void setUnsafeAlgebra() { +- Flags |= UnsafeAlgebra; +- setNoNaNs(); +- setNoInfs(); +- setNoSignedZeros(); +- setAllowReciprocal(); ++ // TODO: Change the other set* functions to take a parameter? ++ void setAllowContract(bool B) { ++ Flags = (Flags & ~AllowContract) | B * AllowContract; + } ++ void setApproxFunc() { Flags |= ApproxFunc; } ++ void setFast() { set(); } + + void operator&=(const FastMathFlags &OtherFlags) { + Flags &= OtherFlags.Flags; +@@ -221,18 +238,21 @@ class FPMathOperator : public Operator { + private: + friend class Instruction; + +- void setHasUnsafeAlgebra(bool B) { ++ /// 'Fast' means all bits are set. ++ void setFast(bool B) { ++ setHasAllowReassoc(B); ++ setHasNoNaNs(B); ++ setHasNoInfs(B); ++ setHasNoSignedZeros(B); ++ setHasAllowReciprocal(B); ++ setHasAllowContract(B); ++ setHasApproxFunc(B); ++ } ++ ++ void setHasAllowReassoc(bool B) { + SubclassOptionalData = +- (SubclassOptionalData & ~FastMathFlags::UnsafeAlgebra) | +- (B * FastMathFlags::UnsafeAlgebra); +- +- // Unsafe algebra implies all the others +- if (B) { +- setHasNoNaNs(true); +- setHasNoInfs(true); +- setHasNoSignedZeros(true); +- setHasAllowReciprocal(true); +- } ++ (SubclassOptionalData & ~FastMathFlags::AllowReassoc) | ++ (B * FastMathFlags::AllowReassoc); + } + + void setHasNoNaNs(bool B) { +@@ -259,6 +279,18 @@ private: + (B * FastMathFlags::AllowReciprocal); + } + ++ void setHasAllowContract(bool B) { ++ SubclassOptionalData = ++ (SubclassOptionalData & ~FastMathFlags::AllowContract) | ++ (B * FastMathFlags::AllowContract); ++ } ++ ++ void setHasApproxFunc(bool B) { ++ SubclassOptionalData = ++ (SubclassOptionalData & ~FastMathFlags::ApproxFunc) | ++ (B * FastMathFlags::ApproxFunc); ++ } ++ + /// Convenience function for setting multiple fast-math flags. + /// FMF is a mask of the bits to set. + void setFastMathFlags(FastMathFlags FMF) { +@@ -272,36 +304,53 @@ private: + } + + public: +- /// Test whether this operation is permitted to be +- /// algebraically transformed, aka the 'A' fast-math property. +- bool hasUnsafeAlgebra() const { +- return (SubclassOptionalData & FastMathFlags::UnsafeAlgebra) != 0; ++ /// Test if this operation allows all non-strict floating-point transforms. ++ bool isFast() const { ++ return ((SubclassOptionalData & FastMathFlags::AllowReassoc) != 0 && ++ (SubclassOptionalData & FastMathFlags::NoNaNs) != 0 && ++ (SubclassOptionalData & FastMathFlags::NoInfs) != 0 && ++ (SubclassOptionalData & FastMathFlags::NoSignedZeros) != 0 && ++ (SubclassOptionalData & FastMathFlags::AllowReciprocal) != 0 && ++ (SubclassOptionalData & FastMathFlags::AllowContract) != 0 && ++ (SubclassOptionalData & FastMathFlags::ApproxFunc) != 0); ++ } ++ ++ /// Test if this operation may be simplified with reassociative transforms. ++ bool hasAllowReassoc() const { ++ return (SubclassOptionalData & FastMathFlags::AllowReassoc) != 0; + } + +- /// Test whether this operation's arguments and results are to be +- /// treated as non-NaN, aka the 'N' fast-math property. ++ /// Test if this operation's arguments and results are assumed not-NaN. + bool hasNoNaNs() const { + return (SubclassOptionalData & FastMathFlags::NoNaNs) != 0; + } + +- /// Test whether this operation's arguments and results are to be +- /// treated as NoN-Inf, aka the 'I' fast-math property. ++ /// Test if this operation's arguments and results are assumed not-infinite. + bool hasNoInfs() const { + return (SubclassOptionalData & FastMathFlags::NoInfs) != 0; + } + +- /// Test whether this operation can treat the sign of zero +- /// as insignificant, aka the 'S' fast-math property. ++ /// Test if this operation can ignore the sign of zero. + bool hasNoSignedZeros() const { + return (SubclassOptionalData & FastMathFlags::NoSignedZeros) != 0; + } + +- /// Test whether this operation is permitted to use +- /// reciprocal instead of division, aka the 'R' fast-math property. ++ /// Test if this operation can use reciprocal multiply instead of division. + bool hasAllowReciprocal() const { + return (SubclassOptionalData & FastMathFlags::AllowReciprocal) != 0; + } + ++ /// Test if this operation can be floating-point contracted (FMA). ++ bool hasAllowContract() const { ++ return (SubclassOptionalData & FastMathFlags::AllowContract) != 0; ++ } ++ ++ /// Test if this operation allows approximations of math library functions or ++ /// intrinsics. ++ bool hasApproxFunc() const { ++ return (SubclassOptionalData & FastMathFlags::ApproxFunc) != 0; ++ } ++ + /// Convenience function for getting all the fast-math flags + FastMathFlags getFastMathFlags() const { + return FastMathFlags(SubclassOptionalData); +diff --git a/include/llvm/Transforms/Utils/LoopUtils.h b/include/llvm/Transforms/Utils/LoopUtils.h +--- a/include/llvm/Transforms/Utils/LoopUtils.h ++++ b/include/llvm/Transforms/Utils/LoopUtils.h +@@ -323,15 +323,13 @@ public: + /// not have the "fast-math" property. Such operation requires a relaxed FP + /// mode. + bool hasUnsafeAlgebra() { +- return InductionBinOp && +- !cast(InductionBinOp)->hasUnsafeAlgebra(); ++ return InductionBinOp && !cast(InductionBinOp)->isFast(); + } + + /// Returns induction operator that does not have "fast-math" property + /// and requires FP unsafe mode. + Instruction *getUnsafeAlgebraInst() { +- if (!InductionBinOp || +- cast(InductionBinOp)->hasUnsafeAlgebra()) ++ if (!InductionBinOp || cast(InductionBinOp)->isFast()) + return nullptr; + return InductionBinOp; + } +diff --git a/lib/AsmParser/LLLexer.cpp b/lib/AsmParser/LLLexer.cpp +--- a/lib/AsmParser/LLLexer.cpp ++++ b/lib/AsmParser/LLLexer.cpp +@@ -548,6 +548,9 @@ lltok::Kind LLLexer::LexIdentifier() { + KEYWORD(ninf); + KEYWORD(nsz); + KEYWORD(arcp); ++ KEYWORD(contract); ++ KEYWORD(reassoc); ++ KEYWORD(afn); + KEYWORD(fast); + KEYWORD(nuw); + KEYWORD(nsw); +diff --git a/lib/AsmParser/LLParser.h b/lib/AsmParser/LLParser.h +--- a/lib/AsmParser/LLParser.h ++++ b/lib/AsmParser/LLParser.h +@@ -188,11 +188,17 @@ namespace llvm { + FastMathFlags FMF; + while (true) + switch (Lex.getKind()) { +- case lltok::kw_fast: FMF.setUnsafeAlgebra(); Lex.Lex(); continue; ++ case lltok::kw_fast: FMF.setFast(); Lex.Lex(); continue; + case lltok::kw_nnan: FMF.setNoNaNs(); Lex.Lex(); continue; + case lltok::kw_ninf: FMF.setNoInfs(); Lex.Lex(); continue; + case lltok::kw_nsz: FMF.setNoSignedZeros(); Lex.Lex(); continue; + case lltok::kw_arcp: FMF.setAllowReciprocal(); Lex.Lex(); continue; ++ case lltok::kw_contract: ++ FMF.setAllowContract(true); ++ Lex.Lex(); ++ continue; ++ case lltok::kw_reassoc: FMF.setAllowReassoc(); Lex.Lex(); continue; ++ case lltok::kw_afn: FMF.setApproxFunc(); Lex.Lex(); continue; + default: return FMF; + } + return FMF; +diff --git a/lib/AsmParser/LLToken.h b/lib/AsmParser/LLToken.h +--- a/lib/AsmParser/LLToken.h ++++ b/lib/AsmParser/LLToken.h +@@ -98,6 +98,9 @@ enum Kind { + kw_ninf, + kw_nsz, + kw_arcp, ++ kw_contract, ++ kw_reassoc, ++ kw_afn, + kw_fast, + kw_nuw, + kw_nsw, +diff --git a/lib/Bitcode/Reader/BitcodeReader.cpp b/lib/Bitcode/Reader/BitcodeReader.cpp +--- a/lib/Bitcode/Reader/BitcodeReader.cpp ++++ b/lib/Bitcode/Reader/BitcodeReader.cpp +@@ -961,8 +961,8 @@ static Comdat::SelectionKind getDecodedComdatSelectionKind(unsigned Val) { + + static FastMathFlags getDecodedFastMathFlags(unsigned Val) { + FastMathFlags FMF; +- if (0 != (Val & FastMathFlags::UnsafeAlgebra)) +- FMF.setUnsafeAlgebra(); ++ if (0 != (Val & FastMathFlags::AllowReassoc)) ++ FMF.setAllowReassoc(); + if (0 != (Val & FastMathFlags::NoNaNs)) + FMF.setNoNaNs(); + if (0 != (Val & FastMathFlags::NoInfs)) +@@ -971,6 +971,10 @@ static FastMathFlags getDecodedFastMathFlags(unsigned Val) { + FMF.setNoSignedZeros(); + if (0 != (Val & FastMathFlags::AllowReciprocal)) + FMF.setAllowReciprocal(); ++ if (0 != (Val & FastMathFlags::AllowContract)) ++ FMF.setAllowContract(true); ++ if (0 != (Val & FastMathFlags::ApproxFunc)) ++ FMF.setApproxFunc(); + return FMF; + } + +diff --git a/lib/Bitcode/Writer/BitcodeWriter.cpp b/lib/Bitcode/Writer/BitcodeWriter.cpp +--- a/lib/Bitcode/Writer/BitcodeWriter.cpp ++++ b/lib/Bitcode/Writer/BitcodeWriter.cpp +@@ -1316,8 +1316,8 @@ static uint64_t getOptimizationFlags(const Value *V) { + if (PEO->isExact()) + Flags |= 1 << bitc::PEO_EXACT; + } else if (const auto *FPMO = dyn_cast(V)) { +- if (FPMO->hasUnsafeAlgebra()) +- Flags |= FastMathFlags::UnsafeAlgebra; ++ if (FPMO->hasAllowReassoc()) ++ Flags |= FastMathFlags::AllowReassoc; + if (FPMO->hasNoNaNs()) + Flags |= FastMathFlags::NoNaNs; + if (FPMO->hasNoInfs()) +@@ -1326,6 +1326,10 @@ static uint64_t getOptimizationFlags(const Value *V) { + Flags |= FastMathFlags::NoSignedZeros; + if (FPMO->hasAllowReciprocal()) + Flags |= FastMathFlags::AllowReciprocal; ++ if (FPMO->hasAllowContract()) ++ Flags |= FastMathFlags::AllowContract; ++ if (FPMO->hasApproxFunc()) ++ Flags |= FastMathFlags::ApproxFunc; + } + + return Flags; +diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +--- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp ++++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +@@ -2446,7 +2446,7 @@ static bool isVectorReductionOp(const User *I) { + case Instruction::FAdd: + case Instruction::FMul: + if (const FPMathOperator *FPOp = dyn_cast(Inst)) +- if (FPOp->getFastMathFlags().unsafeAlgebra()) ++ if (FPOp->getFastMathFlags().isFast()) + break; + LLVM_FALLTHROUGH; + default: +@@ -2492,7 +2492,7 @@ static bool isVectorReductionOp(const User *I) { + + if (Inst->getOpcode() == OpCode || isa(U)) { + if (const FPMathOperator *FPOp = dyn_cast(Inst)) +- if (!isa(FPOp) && !FPOp->getFastMathFlags().unsafeAlgebra()) ++ if (!isa(FPOp) && !FPOp->getFastMathFlags().isFast()) + return false; + UsersToVisit.push_back(U); + } else if (const ShuffleVectorInst *ShufInst = +@@ -2583,11 +2583,13 @@ void SelectionDAGBuilder::visitBinary(const User &I, unsigned OpCode) { + Flags.setVectorReduction(vec_redux); + if (EnableFMFInDAG) { + Flags.setAllowReciprocal(FMF.allowReciprocal()); ++ Flags.setAllowContract(FMF.allowContract()); + Flags.setNoInfs(FMF.noInfs()); + Flags.setNoNaNs(FMF.noNaNs()); + Flags.setNoSignedZeros(FMF.noSignedZeros()); +- Flags.setUnsafeAlgebra(FMF.unsafeAlgebra()); ++ Flags.setUnsafeAlgebra(FMF.isFast()); + } ++ + SDValue BinNodeValue = DAG.getNode(OpCode, getCurSDLoc(), Op1.getValueType(), + Op1, Op2, &Flags); + setValue(&I, BinNodeValue); +diff --git a/lib/IR/AsmWriter.cpp b/lib/IR/AsmWriter.cpp +--- a/lib/IR/AsmWriter.cpp ++++ b/lib/IR/AsmWriter.cpp +@@ -1061,10 +1061,12 @@ static void writeAtomicRMWOperation(raw_ostream &Out, + + static void WriteOptimizationInfo(raw_ostream &Out, const User *U) { + if (const FPMathOperator *FPO = dyn_cast(U)) { +- // Unsafe algebra implies all the others, no need to write them all out +- if (FPO->hasUnsafeAlgebra()) ++ // 'Fast' is an abbreviation for all fast-math-flags. ++ if (FPO->isFast()) + Out << " fast"; + else { ++ if (FPO->hasAllowReassoc()) ++ Out << " reassoc"; + if (FPO->hasNoNaNs()) + Out << " nnan"; + if (FPO->hasNoInfs()) +@@ -1073,6 +1075,10 @@ static void WriteOptimizationInfo(raw_ostream &Out, const User *U) { + Out << " nsz"; + if (FPO->hasAllowReciprocal()) + Out << " arcp"; ++ if (FPO->hasAllowContract()) ++ Out << " contract"; ++ if (FPO->hasApproxFunc()) ++ Out << " afn"; + } + } + +diff --git a/lib/IR/Instruction.cpp b/lib/IR/Instruction.cpp +--- a/lib/IR/Instruction.cpp ++++ b/lib/IR/Instruction.cpp +@@ -126,9 +126,14 @@ bool Instruction::isExact() const { + return cast(this)->isExact(); + } + +-void Instruction::setHasUnsafeAlgebra(bool B) { ++void Instruction::setFast(bool B) { + assert(isa(this) && "setting fast-math flag on invalid op"); +- cast(this)->setHasUnsafeAlgebra(B); ++ cast(this)->setFast(B); ++} ++ ++void Instruction::setHasAllowReassoc(bool B) { ++ assert(isa(this) && "setting fast-math flag on invalid op"); ++ cast(this)->setHasAllowReassoc(B); + } + + void Instruction::setHasNoNaNs(bool B) { +@@ -151,6 +156,11 @@ void Instruction::setHasAllowReciprocal(bool B) { + cast(this)->setHasAllowReciprocal(B); + } + ++void Instruction::setHasApproxFunc(bool B) { ++ assert(isa(this) && "setting fast-math flag on invalid op"); ++ cast(this)->setHasApproxFunc(B); ++} ++ + void Instruction::setFastMathFlags(FastMathFlags FMF) { + assert(isa(this) && "setting fast-math flag on invalid op"); + cast(this)->setFastMathFlags(FMF); +@@ -161,9 +171,14 @@ void Instruction::copyFastMathFlags(FastMathFlags FMF) { + cast(this)->copyFastMathFlags(FMF); + } + +-bool Instruction::hasUnsafeAlgebra() const { ++bool Instruction::isFast() const { + assert(isa(this) && "getting fast-math flag on invalid op"); +- return cast(this)->hasUnsafeAlgebra(); ++ return cast(this)->isFast(); ++} ++ ++bool Instruction::hasAllowReassoc() const { ++ assert(isa(this) && "getting fast-math flag on invalid op"); ++ return cast(this)->hasAllowReassoc(); + } + + bool Instruction::hasNoNaNs() const { +@@ -186,6 +201,16 @@ bool Instruction::hasAllowReciprocal() const { + return cast(this)->hasAllowReciprocal(); + } + ++bool Instruction::hasAllowContract() const { ++ assert(isa(this) && "getting fast-math flag on invalid op"); ++ return cast(this)->hasAllowContract(); ++} ++ ++bool Instruction::hasApproxFunc() const { ++ assert(isa(this) && "getting fast-math flag on invalid op"); ++ return cast(this)->hasApproxFunc(); ++} ++ + FastMathFlags Instruction::getFastMathFlags() const { + assert(isa(this) && "getting fast-math flag on invalid op"); + return cast(this)->getFastMathFlags(); +@@ -540,7 +565,7 @@ bool Instruction::isAssociative() const { + switch (Opcode) { + case FMul: + case FAdd: +- return cast(this)->hasUnsafeAlgebra(); ++ return cast(this)->isFast(); + default: + return false; + } +diff --git a/lib/Transforms/InstCombine/InstCombineAddSub.cpp b/lib/Transforms/InstCombine/InstCombineAddSub.cpp +--- a/lib/Transforms/InstCombine/InstCombineAddSub.cpp ++++ b/lib/Transforms/InstCombine/InstCombineAddSub.cpp +@@ -470,7 +470,7 @@ Value *FAddCombine::performFactorization(Instruction *I) { + return nullptr; + + FastMathFlags Flags; +- Flags.setUnsafeAlgebra(); ++ Flags.setFast(); + if (I0) Flags &= I->getFastMathFlags(); + if (I1) Flags &= I->getFastMathFlags(); + +@@ -499,7 +499,7 @@ Value *FAddCombine::performFactorization(Instruction *I) { + } + + Value *FAddCombine::simplify(Instruction *I) { +- assert(I->hasUnsafeAlgebra() && "Should be in unsafe mode"); ++ assert(I->isFast() && "Expected 'fast' instruction"); + + // Currently we are not able to handle vector type. + if (I->getType()->isVectorTy()) +@@ -1455,7 +1455,7 @@ Instruction *InstCombiner::visitFAdd(BinaryOperator &I) { + } + } + +- if (I.hasUnsafeAlgebra()) { ++ if (I.isFast()) { + if (Value *V = FAddCombine(Builder).simplify(&I)) + return replaceInstUsesWith(I, V); + } +@@ -1788,7 +1788,7 @@ Instruction *InstCombiner::visitFSub(BinaryOperator &I) { + } + } + +- if (I.hasUnsafeAlgebra()) { ++ if (I.isFast()) { + if (Value *V = FAddCombine(Builder).simplify(&I)) + return replaceInstUsesWith(I, V); + } +diff --git a/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp b/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp +--- a/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp ++++ b/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp +@@ -480,7 +480,7 @@ static void detectLog2OfHalf(Value *&Op, Value *&Y, IntrinsicInst *&Log2) { + IntrinsicInst *II = dyn_cast(Op); + if (!II) + return; +- if (II->getIntrinsicID() != Intrinsic::log2 || !II->hasUnsafeAlgebra()) ++ if (II->getIntrinsicID() != Intrinsic::log2 || !II->isFast()) + return; + Log2 = II; + +@@ -491,7 +491,8 @@ static void detectLog2OfHalf(Value *&Op, Value *&Y, IntrinsicInst *&Log2) { + Instruction *I = dyn_cast(OpLog2Of); + if (!I) + return; +- if (I->getOpcode() != Instruction::FMul || !I->hasUnsafeAlgebra()) ++ ++ if (I->getOpcode() != Instruction::FMul || !I->isFast()) + return; + + if (match(I->getOperand(0), m_SpecificFP(0.5))) +@@ -595,7 +596,7 @@ Value *InstCombiner::foldFMulConst(Instruction *FMulOrDiv, Constant *C, + } + + if (R) { +- R->setHasUnsafeAlgebra(true); ++ R->setFast(true); + InsertNewInstWith(R, *InsertBefore); + } + +@@ -616,7 +617,7 @@ Instruction *InstCombiner::visitFMul(BinaryOperator &I) { + SimplifyFMulInst(Op0, Op1, I.getFastMathFlags(), DL, &TLI, &DT, &AC)) + return replaceInstUsesWith(I, V); + +- bool AllowReassociate = I.hasUnsafeAlgebra(); ++ bool AllowReassociate = I.isFast(); + + // Simplify mul instructions with a constant RHS. + if (isa(Op1)) { +@@ -1314,7 +1315,7 @@ Instruction *InstCombiner::visitFDiv(BinaryOperator &I) { + if (Instruction *R = FoldOpIntoSelect(I, SI)) + return R; + +- bool AllowReassociate = I.hasUnsafeAlgebra(); ++ bool AllowReassociate = I.isFast(); + bool AllowReciprocal = I.hasAllowReciprocal(); + + if (Constant *Op1C = dyn_cast(Op1)) { +diff --git a/lib/Transforms/Scalar/Reassociate.cpp b/lib/Transforms/Scalar/Reassociate.cpp +--- a/lib/Transforms/Scalar/Reassociate.cpp ++++ b/lib/Transforms/Scalar/Reassociate.cpp +@@ -128,8 +128,7 @@ XorOpnd::XorOpnd(Value *V) { + static BinaryOperator *isReassociableOp(Value *V, unsigned Opcode) { + if (V->hasOneUse() && isa(V) && + cast(V)->getOpcode() == Opcode && +- (!isa(V) || +- cast(V)->hasUnsafeAlgebra())) ++ (!isa(V) || cast(V)->isFast())) + return cast(V); + return nullptr; + } +@@ -139,8 +138,7 @@ static BinaryOperator *isReassociableOp(Value *V, unsigned Opcode1, + if (V->hasOneUse() && isa(V) && + (cast(V)->getOpcode() == Opcode1 || + cast(V)->getOpcode() == Opcode2) && +- (!isa(V) || +- cast(V)->hasUnsafeAlgebra())) ++ (!isa(V) || cast(V)->isFast())) + return cast(V); + return nullptr; + } +@@ -552,7 +550,7 @@ static bool LinearizeExprTree(BinaryOperator *I, + assert((!isa(Op) || + cast(Op)->getOpcode() != Opcode + || (isa(Op) && +- !cast(Op)->hasUnsafeAlgebra())) && ++ !cast(Op)->isFast())) && + "Should have been handled above!"); + assert(Op->hasOneUse() && "Has uses outside the expression tree!"); + +@@ -2005,8 +2003,8 @@ void ReassociatePass::OptimizeInst(Instruction *I) { + if (I->getType()->isVectorTy() && I->getOpcode() == Instruction::Xor) + return; + +- // Don't optimize floating point instructions that don't have unsafe algebra. +- if (I->getType()->isFPOrFPVectorTy() && !I->hasUnsafeAlgebra()) ++ // Don't optimize floating-point instructions unless they are 'fast'. ++ if (I->getType()->isFPOrFPVectorTy() && !I->isFast()) + return; + + // Do not reassociate boolean (i1) expressions. We want to preserve the +diff --git a/lib/Transforms/Utils/LoopUtils.cpp b/lib/Transforms/Utils/LoopUtils.cpp +--- a/lib/Transforms/Utils/LoopUtils.cpp ++++ b/lib/Transforms/Utils/LoopUtils.cpp +@@ -425,7 +425,7 @@ RecurrenceDescriptor::isRecurrenceInstr(Instruction *I, RecurrenceKind Kind, + InstDesc &Prev, bool HasFunNoNaNAttr) { + bool FP = I->getType()->isFloatingPointTy(); + Instruction *UAI = Prev.getUnsafeAlgebraInst(); +- if (!UAI && FP && !I->hasUnsafeAlgebra()) ++ if (!UAI && FP && !I->isFast()) + UAI = I; // Found an unsafe (unvectorizable) algebra instruction. + + switch (I->getOpcode()) { +@@ -638,11 +638,11 @@ Value *RecurrenceDescriptor::createMinMaxOp(IRBuilder<> &Builder, + break; + } + +- // We only match FP sequences with unsafe algebra, so we can unconditionally ++ // We only match FP sequences that are 'fast', so we can unconditionally + // set it on any generated instructions. + IRBuilder<>::FastMathFlagGuard FMFG(Builder); + FastMathFlags FMF; +- FMF.setUnsafeAlgebra(); ++ FMF.setFast(); + Builder.setFastMathFlags(FMF); + + Value *Cmp; +@@ -746,7 +746,7 @@ Value *InductionDescriptor::transform(IRBuilder<> &B, Value *Index, + + // Floating point operations had to be 'fast' to enable the induction. + FastMathFlags Flags; +- Flags.setUnsafeAlgebra(); ++ Flags.setFast(); + + Value *MulExp = B.CreateFMul(StepValue, Index); + if (isa(MulExp)) +diff --git a/lib/Transforms/Utils/SimplifyLibCalls.cpp b/lib/Transforms/Utils/SimplifyLibCalls.cpp +--- a/lib/Transforms/Utils/SimplifyLibCalls.cpp ++++ b/lib/Transforms/Utils/SimplifyLibCalls.cpp +@@ -1054,7 +1054,7 @@ Value *LibCallSimplifier::optimizePow(CallInst *CI, IRBuilder<> &B) { + // Example: x = 1000, y = 0.001. + // pow(exp(x), y) = pow(inf, 0.001) = inf, whereas exp(x*y) = exp(1). + auto *OpC = dyn_cast(Op1); +- if (OpC && OpC->hasUnsafeAlgebra() && CI->hasUnsafeAlgebra()) { ++ if (OpC && OpC->isFast() && CI->isFast()) { + LibFunc::Func Func; + Function *OpCCallee = OpC->getCalledFunction(); + if (OpCCallee && TLI->getLibFunc(OpCCallee->getName(), Func) && +@@ -1079,7 +1079,7 @@ Value *LibCallSimplifier::optimizePow(CallInst *CI, IRBuilder<> &B) { + LibFunc::sqrtl)) { + // If -ffast-math: + // pow(x, -0.5) -> 1.0 / sqrt(x) +- if (CI->hasUnsafeAlgebra()) { ++ if (CI->isFast()) { + IRBuilder<>::FastMathFlagGuard Guard(B); + B.setFastMathFlags(CI->getFastMathFlags()); + +@@ -1099,7 +1099,7 @@ Value *LibCallSimplifier::optimizePow(CallInst *CI, IRBuilder<> &B) { + LibFunc::fabsl)) { + + // In -ffast-math, pow(x, 0.5) -> sqrt(x). +- if (CI->hasUnsafeAlgebra()) { ++ if (CI->isFast()) { + IRBuilder<>::FastMathFlagGuard Guard(B); + B.setFastMathFlags(CI->getFastMathFlags()); + +@@ -1131,7 +1131,7 @@ Value *LibCallSimplifier::optimizePow(CallInst *CI, IRBuilder<> &B) { + return B.CreateFDiv(ConstantFP::get(CI->getType(), 1.0), Op1, "powrecip"); + + // In -ffast-math, generate repeated fmul instead of generating pow(x, n). +- if (CI->hasUnsafeAlgebra()) { ++ if (CI->isFast()) { + APFloat V = abs(Op2C->getValueAPF()); + // We limit to a max of 7 fmul(s). Thus max exponent is 32. + // This transformation applies to integer exponents only. +@@ -1228,9 +1228,9 @@ Value *LibCallSimplifier::optimizeFMinFMax(CallInst *CI, IRBuilder<> &B) { + + IRBuilder<>::FastMathFlagGuard Guard(B); + FastMathFlags FMF; +- if (CI->hasUnsafeAlgebra()) { +- // Unsafe algebra sets all fast-math-flags to true. +- FMF.setUnsafeAlgebra(); ++ if (CI->isFast()) { ++ // If the call is 'fast', then anything we create here will also be 'fast'. ++ FMF.setFast(); + } else { + // At a minimum, no-nans-fp-math must be true. + if (!CI->hasNoNaNs()) +@@ -1261,13 +1261,13 @@ Value *LibCallSimplifier::optimizeLog(CallInst *CI, IRBuilder<> &B) { + if (UnsafeFPShrink && hasFloatVersion(Name)) + Ret = optimizeUnaryDoubleFP(CI, B, true); + +- if (!CI->hasUnsafeAlgebra()) ++ if (!CI->isFast()) + return Ret; + Value *Op1 = CI->getArgOperand(0); + auto *OpC = dyn_cast(Op1); + +- // The earlier call must also be unsafe in order to do these transforms. +- if (!OpC || !OpC->hasUnsafeAlgebra()) ++ // The earlier call must also be 'fast' in order to do these transforms. ++ if (!OpC || !OpC->isFast()) + return Ret; + + // log(pow(x,y)) -> y*log(x) +@@ -1277,7 +1277,7 @@ Value *LibCallSimplifier::optimizeLog(CallInst *CI, IRBuilder<> &B) { + + IRBuilder<>::FastMathFlagGuard Guard(B); + FastMathFlags FMF; +- FMF.setUnsafeAlgebra(); ++ FMF.setFast(); + B.setFastMathFlags(FMF); + + LibFunc::Func Func; +@@ -1306,11 +1306,11 @@ Value *LibCallSimplifier::optimizeSqrt(CallInst *CI, IRBuilder<> &B) { + Callee->getIntrinsicID() == Intrinsic::sqrt)) + Ret = optimizeUnaryDoubleFP(CI, B, true); + +- if (!CI->hasUnsafeAlgebra()) ++ if (!CI->isFast()) + return Ret; + + Instruction *I = dyn_cast(CI->getArgOperand(0)); +- if (!I || I->getOpcode() != Instruction::FMul || !I->hasUnsafeAlgebra()) ++ if (!I || I->getOpcode() != Instruction::FMul || !I->isFast()) + return Ret; + + // We're looking for a repeated factor in a multiplication tree, +@@ -1332,8 +1332,7 @@ Value *LibCallSimplifier::optimizeSqrt(CallInst *CI, IRBuilder<> &B) { + Value *OtherMul0, *OtherMul1; + if (match(Op0, m_FMul(m_Value(OtherMul0), m_Value(OtherMul1)))) { + // Pattern: sqrt((x * y) * z) +- if (OtherMul0 == OtherMul1 && +- cast(Op0)->hasUnsafeAlgebra()) { ++ if (OtherMul0 == OtherMul1 && cast(Op0)->isFast()) { + // Matched: sqrt((x * x) * z) + RepeatOp = OtherMul0; + OtherOp = Op1; +@@ -1378,8 +1377,8 @@ Value *LibCallSimplifier::optimizeTan(CallInst *CI, IRBuilder<> &B) { + if (!OpC) + return Ret; + +- // Both calls must allow unsafe optimizations in order to remove them. +- if (!CI->hasUnsafeAlgebra() || !OpC->hasUnsafeAlgebra()) ++ // Both calls must be 'fast' in order to remove them. ++ if (!CI->isFast() || !OpC->isFast()) + return Ret; + + // tan(atan(x)) -> x +@@ -2017,7 +2016,7 @@ Value *LibCallSimplifier::optimizeCall(CallInst *CI) { + // Command-line parameter overrides instruction attribute. + if (EnableUnsafeFPShrink.getNumOccurrences() > 0) + UnsafeFPShrink = EnableUnsafeFPShrink; +- else if (isa(CI) && CI->hasUnsafeAlgebra()) ++ else if (isa(CI) && CI->isFast()) + UnsafeFPShrink = true; + + // First, check for intrinsics. +diff --git a/lib/Transforms/Vectorize/LoopVectorize.cpp b/lib/Transforms/Vectorize/LoopVectorize.cpp +--- a/lib/Transforms/Vectorize/LoopVectorize.cpp ++++ b/lib/Transforms/Vectorize/LoopVectorize.cpp +@@ -2373,7 +2373,7 @@ Value *InnerLoopVectorizer::getStepVector(Value *Val, int StartIdx, Value *Step, + + // Floating point operations had to be 'fast' to enable the induction. + FastMathFlags Flags; +- Flags.setUnsafeAlgebra(); ++ Flags.setFast(); + + Value *MulOp = Builder.CreateFMul(Cv, Step); + if (isa(MulOp)) +@@ -3593,7 +3593,7 @@ static void cse(BasicBlock *BB) { + static Value *addFastMathFlag(Value *V) { + if (isa(V)) { + FastMathFlags Flags; +- Flags.setUnsafeAlgebra(); ++ Flags.setFast(); + cast(V)->setFastMathFlags(Flags); + } + return V; +@@ -5382,7 +5382,7 @@ bool LoopVectorizationLegality::canVectorizeInstrs() { + // operations, shuffles, or casts, as they don't change precision or + // semantics. + } else if (I.getType()->isFloatingPointTy() && (CI || I.isBinaryOp()) && +- !I.hasUnsafeAlgebra()) { ++ !I.isFast()) { + DEBUG(dbgs() << "LV: Found FP op with unsafe algebra.\n"); + Hints->setPotentiallyUnsafe(); + } +diff --git a/lib/Transforms/Vectorize/SLPVectorizer.cpp b/lib/Transforms/Vectorize/SLPVectorizer.cpp +--- a/lib/Transforms/Vectorize/SLPVectorizer.cpp ++++ b/lib/Transforms/Vectorize/SLPVectorizer.cpp +@@ -4266,7 +4266,7 @@ public: + Value *VectorizedTree = nullptr; + IRBuilder<> Builder(ReductionRoot); + FastMathFlags Unsafe; +- Unsafe.setUnsafeAlgebra(); ++ Unsafe.setFast(); + Builder.setFastMathFlags(Unsafe); + unsigned i = 0; + +diff --git a/test/Assembler/fast-math-flags.ll b/test/Assembler/fast-math-flags.ll +--- a/test/Assembler/fast-math-flags.ll ++++ b/test/Assembler/fast-math-flags.ll +@@ -7,6 +7,8 @@ + @vec = external global <3 x float> + @arr = external global [3 x float] + ++declare float @foo(float) ++ + define float @none(float %x, float %y) { + entry: + ; CHECK: %vec = load <3 x float>, <3 x float>* @vec +@@ -74,6 +76,40 @@ entry: + ret float %e + } + ++; CHECK: @contract( ++define float @contract(float %x, float %y) { ++entry: ++; CHECK: %a = fsub contract float %x, %y ++ %a = fsub contract float %x, %y ++; CHECK: %b = fadd contract float %x, %y ++ %b = fadd contract float %x, %y ++; CHECK: %c = fmul contract float %a, %b ++ %c = fmul contract float %a, %b ++ ret float %c ++} ++ ++; CHECK: @reassoc( ++define float @reassoc(float %x, float %y) { ++; CHECK: %a = fsub reassoc float %x, %y ++ %a = fsub reassoc float %x, %y ++; CHECK: %b = fmul reassoc float %x, %y ++ %b = fmul reassoc float %x, %y ++; CHECK: %c = call reassoc float @foo(float %b) ++ %c = call reassoc float @foo(float %b) ++ ret float %c ++} ++ ++; CHECK: @afn( ++define float @afn(float %x, float %y) { ++; CHECK: %a = fdiv afn float %x, %y ++ %a = fdiv afn float %x, %y ++; CHECK: %b = frem afn float %x, %y ++ %b = frem afn float %x, %y ++; CHECK: %c = call afn float @foo(float %b) ++ %c = call afn float @foo(float %b) ++ ret float %c ++} ++ + ; CHECK: no_nan_inf + define float @no_nan_inf(float %x, float %y) { + entry: +@@ -118,10 +154,10 @@ entry: + ; CHECK: %arr = load [3 x float], [3 x float]* @arr + %arr = load [3 x float], [3 x float]* @arr + +-; CHECK: %a = fadd nnan ninf float %x, %y +- %a = fadd ninf nnan float %x, %y +-; CHECK: %a_vec = fadd nnan <3 x float> %vec, %vec +- %a_vec = fadd nnan <3 x float> %vec, %vec ++; CHECK: %a = fadd nnan ninf afn float %x, %y ++ %a = fadd ninf nnan afn float %x, %y ++; CHECK: %a_vec = fadd reassoc nnan <3 x float> %vec, %vec ++ %a_vec = fadd reassoc nnan <3 x float> %vec, %vec + ; CHECK: %b = fsub fast float %x, %y + %b = fsub nnan nsz fast float %x, %y + ; CHECK: %b_vec = fsub nnan <3 x float> %vec, %vec +diff --git a/test/Bitcode/compatibility-3.6.ll b/test/Bitcode/compatibility-3.6.ll +--- a/test/Bitcode/compatibility-3.6.ll ++++ b/test/Bitcode/compatibility-3.6.ll +@@ -612,7 +612,9 @@ define void @fastmathflags(float %op1, float %op2) { + %f.arcp = fadd arcp float %op1, %op2 + ; CHECK: %f.arcp = fadd arcp float %op1, %op2 + %f.fast = fadd fast float %op1, %op2 +- ; CHECK: %f.fast = fadd fast float %op1, %op2 ++ ; 'fast' used to be its own bit, but this changed in Oct 2017. ++ ; The binary test file does not have the newer 'contract' and 'afn' bits set, so this is not fully 'fast'. ++ ; CHECK: %f.fast = fadd reassoc nnan ninf nsz arcp float %op1, %op2 + ret void + } + +diff --git a/test/Bitcode/compatibility-3.7.ll b/test/Bitcode/compatibility-3.7.ll +--- a/test/Bitcode/compatibility-3.7.ll ++++ b/test/Bitcode/compatibility-3.7.ll +@@ -656,7 +656,9 @@ define void @fastmathflags(float %op1, float %op2) { + %f.arcp = fadd arcp float %op1, %op2 + ; CHECK: %f.arcp = fadd arcp float %op1, %op2 + %f.fast = fadd fast float %op1, %op2 +- ; CHECK: %f.fast = fadd fast float %op1, %op2 ++ ; 'fast' used to be its own bit, but this changed in Oct 2017. ++ ; The binary test file does not have the newer 'contract' and 'afn' bits set, so this is not fully 'fast'. ++ ; CHECK: %f.fast = fadd reassoc nnan ninf nsz arcp float %op1, %op2 + ret void + } + +diff --git a/test/Bitcode/compatibility-3.8.ll b/test/Bitcode/compatibility-3.8.ll +--- a/test/Bitcode/compatibility-3.8.ll ++++ b/test/Bitcode/compatibility-3.8.ll +@@ -687,7 +687,9 @@ define void @fastmathflags(float %op1, float %op2) { + %f.arcp = fadd arcp float %op1, %op2 + ; CHECK: %f.arcp = fadd arcp float %op1, %op2 + %f.fast = fadd fast float %op1, %op2 +- ; CHECK: %f.fast = fadd fast float %op1, %op2 ++ ; 'fast' used to be its own bit, but this changed in Oct 2017. ++ ; The binary test file does not have the newer 'contract' and 'afn' bits set, so this is not fully 'fast'. ++ ; CHECK: %f.fast = fadd reassoc nnan ninf nsz arcp float %op1, %op2 + ret void + } + +@@ -700,7 +702,9 @@ declare <4 x double> @fmf3() + ; CHECK-LABEL: fastMathFlagsForCalls( + define void @fastMathFlagsForCalls(float %f, double %d1, <4 x double> %d2) { + %call.fast = call fast float @fmf1() +- ; CHECK: %call.fast = call fast float @fmf1() ++ ; 'fast' used to be its own bit, but this changed in Oct 2017. ++ ; The binary test file does not have the newer 'contract' and 'aml' bits set, so this is not fully 'fast'. ++ ; CHECK: %call.fast = call reassoc nnan ninf nsz arcp float @fmf1() + + ; Throw in some other attributes to make sure those stay in the right places. + +diff --git a/test/Bitcode/compatibility-3.9.ll b/test/Bitcode/compatibility-3.9.ll +--- a/test/Bitcode/compatibility-3.9.ll ++++ b/test/Bitcode/compatibility-3.9.ll +@@ -758,7 +758,9 @@ define void @fastmathflags(float %op1, float %op2) { + %f.arcp = fadd arcp float %op1, %op2 + ; CHECK: %f.arcp = fadd arcp float %op1, %op2 + %f.fast = fadd fast float %op1, %op2 +- ; CHECK: %f.fast = fadd fast float %op1, %op2 ++ ; 'fast' used to be its own bit, but this changed in Oct 2017. ++ ; The binary test file does not have the newer 'contract' and 'afn' bits set, so this is not fully 'fast'. ++ ; CHECK: %f.fast = fadd reassoc nnan ninf nsz arcp float %op1, %op2 + ret void + } + +@@ -771,7 +773,9 @@ declare <4 x double> @fmf3() + ; CHECK-LABEL: fastMathFlagsForCalls( + define void @fastMathFlagsForCalls(float %f, double %d1, <4 x double> %d2) { + %call.fast = call fast float @fmf1() +- ; CHECK: %call.fast = call fast float @fmf1() ++ ; 'fast' used to be its own bit, but this changed in Oct 2017. ++ ; The binary test file does not have the newer 'contract' and 'afn' bits set, so this is not fully 'fast'. ++ ; CHECK: %call.fast = call reassoc nnan ninf nsz arcp float @fmf1() + + ; Throw in some other attributes to make sure those stay in the right places. + +diff --git a/test/Bitcode/compatibility.ll b/test/Bitcode/compatibility.ll +--- a/test/Bitcode/compatibility.ll ++++ b/test/Bitcode/compatibility.ll +@@ -760,6 +760,12 @@ define void @fastmathflags(float %op1, float %op2) { + ; CHECK: %f.nsz = fadd nsz float %op1, %op2 + %f.arcp = fadd arcp float %op1, %op2 + ; CHECK: %f.arcp = fadd arcp float %op1, %op2 ++ %f.contract = fadd contract float %op1, %op2 ++ ; CHECK: %f.contract = fadd contract float %op1, %op2 ++ %f.afn = fadd afn float %op1, %op2 ++ ; CHECK: %f.afn = fadd afn float %op1, %op2 ++ %f.reassoc = fadd reassoc float %op1, %op2 ++ ; CHECK: %f.reassoc = fadd reassoc float %op1, %op2 + %f.fast = fadd fast float %op1, %op2 + ; CHECK: %f.fast = fadd fast float %op1, %op2 + ret void +diff --git a/unittests/IR/IRBuilderTest.cpp b/unittests/IR/IRBuilderTest.cpp +--- a/unittests/IR/IRBuilderTest.cpp ++++ b/unittests/IR/IRBuilderTest.cpp +@@ -144,17 +144,40 @@ TEST_F(IRBuilderTest, FastMathFlags) { + FastMathFlags FMF; + Builder.setFastMathFlags(FMF); + ++ // By default, no flags are set. + F = Builder.CreateFAdd(F, F); + EXPECT_FALSE(Builder.getFastMathFlags().any()); +- +- FMF.setUnsafeAlgebra(); ++ ASSERT_TRUE(isa(F)); ++ FAdd = cast(F); ++ EXPECT_FALSE(FAdd->hasNoNaNs()); ++ EXPECT_FALSE(FAdd->hasNoInfs()); ++ EXPECT_FALSE(FAdd->hasNoSignedZeros()); ++ EXPECT_FALSE(FAdd->hasAllowReciprocal()); ++ EXPECT_FALSE(FAdd->hasAllowContract()); ++ EXPECT_FALSE(FAdd->hasAllowReassoc()); ++ EXPECT_FALSE(FAdd->hasApproxFunc()); ++ ++ // Set all flags in the instruction. ++ FAdd->setFast(true); ++ EXPECT_TRUE(FAdd->hasNoNaNs()); ++ EXPECT_TRUE(FAdd->hasNoInfs()); ++ EXPECT_TRUE(FAdd->hasNoSignedZeros()); ++ EXPECT_TRUE(FAdd->hasAllowReciprocal()); ++ EXPECT_TRUE(FAdd->hasAllowContract()); ++ EXPECT_TRUE(FAdd->hasAllowReassoc()); ++ EXPECT_TRUE(FAdd->hasApproxFunc()); ++ ++ // All flags are set in the builder. ++ FMF.setFast(); + Builder.setFastMathFlags(FMF); + + F = Builder.CreateFAdd(F, F); + EXPECT_TRUE(Builder.getFastMathFlags().any()); ++ EXPECT_TRUE(Builder.getFastMathFlags().all()); + ASSERT_TRUE(isa(F)); + FAdd = cast(F); + EXPECT_TRUE(FAdd->hasNoNaNs()); ++ EXPECT_TRUE(FAdd->isFast()); + + // Now, try it with CreateBinOp + F = Builder.CreateBinOp(Instruction::FAdd, F, F); +@@ -162,21 +185,23 @@ TEST_F(IRBuilderTest, FastMathFlags) { + ASSERT_TRUE(isa(F)); + FAdd = cast(F); + EXPECT_TRUE(FAdd->hasNoNaNs()); ++ EXPECT_TRUE(FAdd->isFast()); + + F = Builder.CreateFDiv(F, F); +- EXPECT_TRUE(Builder.getFastMathFlags().any()); +- EXPECT_TRUE(Builder.getFastMathFlags().UnsafeAlgebra); ++ EXPECT_TRUE(Builder.getFastMathFlags().all()); + ASSERT_TRUE(isa(F)); + FDiv = cast(F); + EXPECT_TRUE(FDiv->hasAllowReciprocal()); + ++ // Clear all FMF in the builder. + Builder.clearFastMathFlags(); + + F = Builder.CreateFDiv(F, F); + ASSERT_TRUE(isa(F)); + FDiv = cast(F); + EXPECT_FALSE(FDiv->hasAllowReciprocal()); +- ++ ++ // Try individual flags. + FMF.clear(); + FMF.setAllowReciprocal(); + Builder.setFastMathFlags(FMF); +@@ -207,7 +232,44 @@ TEST_F(IRBuilderTest, FastMathFlags) { + EXPECT_TRUE(FCmp->hasAllowReciprocal()); + + Builder.clearFastMathFlags(); +- ++ ++ // Test FP-contract ++ FC = Builder.CreateFAdd(F, F); ++ ASSERT_TRUE(isa(FC)); ++ FAdd = cast(FC); ++ EXPECT_FALSE(FAdd->hasAllowContract()); ++ ++ FMF.clear(); ++ FMF.setAllowContract(true); ++ Builder.setFastMathFlags(FMF); ++ ++ FC = Builder.CreateFAdd(F, F); ++ EXPECT_TRUE(Builder.getFastMathFlags().any()); ++ EXPECT_TRUE(Builder.getFastMathFlags().AllowContract); ++ ASSERT_TRUE(isa(FC)); ++ FAdd = cast(FC); ++ EXPECT_TRUE(FAdd->hasAllowContract()); ++ ++ FMF.setApproxFunc(); ++ Builder.clearFastMathFlags(); ++ Builder.setFastMathFlags(FMF); ++ // Now 'aml' and 'contract' are set. ++ F = Builder.CreateFMul(F, F); ++ FAdd = cast(F); ++ EXPECT_TRUE(FAdd->hasApproxFunc()); ++ EXPECT_TRUE(FAdd->hasAllowContract()); ++ EXPECT_FALSE(FAdd->hasAllowReassoc()); ++ ++ FMF.setAllowReassoc(); ++ Builder.clearFastMathFlags(); ++ Builder.setFastMathFlags(FMF); ++ // Now 'aml' and 'contract' and 'reassoc' are set. ++ F = Builder.CreateFMul(F, F); ++ FAdd = cast(F); ++ EXPECT_TRUE(FAdd->hasApproxFunc()); ++ EXPECT_TRUE(FAdd->hasAllowContract()); ++ EXPECT_TRUE(FAdd->hasAllowReassoc()); ++ + // Test a call with FMF. + auto CalleeTy = FunctionType::get(Type::getFloatTy(Ctx), + /*isVarArg=*/false); diff --git a/external/llvm/releases/4.0.0/patches_external/ConstantFolding-Constant-fold-llvm.sqrt-x-like-other.patch b/external/llvm/releases/4.0.0/patches_external/ConstantFolding-Constant-fold-llvm.sqrt-x-like-other.patch new file mode 100644 index 000000000..317fba131 --- /dev/null +++ b/external/llvm/releases/4.0.0/patches_external/ConstantFolding-Constant-fold-llvm.sqrt-x-like-other.patch @@ -0,0 +1,86 @@ +# Description : Constant fold llvm.sqrt like other intrinsics + + +From 12044b3dd7855fd39334ac5321645b27c6c7b49e Mon Sep 17 00:00:00 2001 +From: Justin Lebar +Date: Sat, 21 Jan 2017 00:59:57 +0000 +Subject: [PATCH] [ConstantFolding] Constant-fold llvm.sqrt(x) like other + intrinsics. + +Summary: +Currently we return undef, but we're in the process of changing the +LangRef so that llvm.sqrt behaves like the other math intrinsics, +matching the return value of the standard libcall but not setting errno. + +This change is legal even without the LangRef change because currently +calling llvm.sqrt(x) where x is negative is spec'ed to be UB. But in +practice it's also safe because we're simply constant-folding fewer +inputs: Inputs >= -0 get constant-folded as before, but inputs < -0 now +aren't constant-folded, because ConstantFoldFP aborts if the host math +function raises an fp exception. + +Reviewers: hfinkel, efriedma, sanjoy + +Subscribers: llvm-commits + +Differential Revision: https://reviews.llvm.org/D28929 + +git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@292692 91177308-0d34-0410-b5e6-96231b3b80d8 +--- + lib/Analysis/ConstantFolding.cpp | 15 ++------------- + test/Transforms/InstCombine/constant-fold-math.ll | 5 +++-- + 2 files changed, 5 insertions(+), 15 deletions(-) + +diff --git a/lib/Analysis/ConstantFolding.cpp b/lib/Analysis/ConstantFolding.cpp +index 7386727..db8ac82 100644 +--- a/lib/Analysis/ConstantFolding.cpp ++++ b/lib/Analysis/ConstantFolding.cpp +@@ -1630,6 +1630,8 @@ Constant *ConstantFoldScalarCall(StringRef Name, unsigned IntrinsicID, Type *Ty, + return ConstantFoldFP(sin, V, Ty); + case Intrinsic::cos: + return ConstantFoldFP(cos, V, Ty); ++ case Intrinsic::sqrt: ++ return ConstantFoldFP(sqrt, V, Ty); + } + + if (!TLI) +@@ -1683,19 +1685,6 @@ Constant *ConstantFoldScalarCall(StringRef Name, unsigned IntrinsicID, Type *Ty, + else if ((Name == "log10" && V > 0 && TLI->has(LibFunc::log10)) || + (Name == "log10f" && V > 0 && TLI->has(LibFunc::log10f))) + return ConstantFoldFP(log10, V, Ty); +- else if (IntrinsicID == Intrinsic::sqrt && +- (Ty->isHalfTy() || Ty->isFloatTy() || Ty->isDoubleTy())) { +- if (V >= -0.0) +- return ConstantFoldFP(sqrt, V, Ty); +- else { +- // Unlike the sqrt definitions in C/C++, POSIX, and IEEE-754 - which +- // all guarantee or favor returning NaN - the square root of a +- // negative number is not defined for the LLVM sqrt intrinsic. +- // This is because the intrinsic should only be emitted in place of +- // libm's sqrt function when using "no-nans-fp-math". +- return UndefValue::get(Ty); +- } +- } + break; + case 'r': + if ((Name == "round" && TLI->has(LibFunc::round)) || +diff --git a/test/Transforms/InstCombine/constant-fold-math.ll b/test/Transforms/InstCombine/constant-fold-math.ll +index ce8d337..6eb371a 100644 +--- a/test/Transforms/InstCombine/constant-fold-math.ll ++++ b/test/Transforms/InstCombine/constant-fold-math.ll +@@ -45,9 +45,10 @@ define double @constant_fold_fmuladd_f64() #0 { + ret double %x + } + +-; The sqrt intrinsic is undefined for negative inputs besides -0.0. ++; Currently we don't constant-fold intrinsics whose corresponding libcalls ++; raise an fp exception. + ; CHECK-LABEL: @bad_sqrt +-; CHECK-NEXT: ret double undef ++; CHECK-NEXT: call double @llvm.sqrt.f64(double -2 + define double @bad_sqrt() { + %x = call double @llvm.sqrt.f64(double -2.000000e+00) + ret double %x +-- +2.7.4 + diff --git a/external/llvm/releases/4.0.0/patches_external/Enable-gcc-8-build.patch b/external/llvm/releases/4.0.0/patches_external/Enable-gcc-8-build.patch new file mode 100644 index 000000000..f57f8afc4 --- /dev/null +++ b/external/llvm/releases/4.0.0/patches_external/Enable-gcc-8-build.patch @@ -0,0 +1,17 @@ +# Description : Enable LLVM build with gcc 8 + + +diff -Naur --strip-trailing-cr a/include/llvm/ExecutionEngine/Orc/OrcRemoteTargetClient.h b/include/llvm/ExecutionEngine/Orc/OrcRemoteTargetClient.h +--- a/include/llvm/ExecutionEngine/Orc/OrcRemoteTargetClient.h ++++ b/include/llvm/ExecutionEngine/Orc/OrcRemoteTargetClient.h +@@ -687,8 +687,8 @@ private: + + uint32_t getTrampolineSize() const { return RemoteTrampolineSize; } + +- Expected> readMem(char *Dst, JITTargetAddress Src, +- uint64_t Size) { ++ Expected> readMem(char *Dst, JITTargetAddress Src, ++ uint64_t Size) { + // Check for an 'out-of-band' error, e.g. from an MM destructor. + if (ExistingError) + return std::move(ExistingError); diff --git a/external/llvm/releases/4.0.0/patches_external/Enabling-test-Offset32-Regression-Fix.patch b/external/llvm/releases/4.0.0/patches_external/Enabling-test-Offset32-Regression-Fix.patch new file mode 100644 index 000000000..a29f57310 --- /dev/null +++ b/external/llvm/releases/4.0.0/patches_external/Enabling-test-Offset32-Regression-Fix.patch @@ -0,0 +1,48 @@ +# Description : Limit recursion depth of constant evolving. + + +diff -Naur --strip-trailing-cr a/lib/Analysis/ScalarEvolution.cpp b/lib/Analysis/ScalarEvolution.cpp +--- a/lib/Analysis/ScalarEvolution.cpp 2017-10-23 09:53:04.925214744 -0400 ++++ b/lib/Analysis/ScalarEvolution.cpp 2017-10-23 14:26:57.327570973 -0400 +@@ -137,6 +137,10 @@ + cl::desc("Maximum depth of recursive value complexity comparisons"), + cl::init(2)); + ++static cl::opt MaxConstantEvolvingDepth( ++ "scalar-evolution-max-constant-evolving-depth", cl::Hidden, ++ cl::desc("Maximum depth of recursive constant evolving"), ++ cl::init(32)); + //===----------------------------------------------------------------------===// + // SCEV class definitions + //===----------------------------------------------------------------------===// +@@ -6408,7 +6412,10 @@ + /// recursing through each instruction operand until reaching a loop header phi. + static PHINode * + getConstantEvolvingPHIOperands(Instruction *UseInst, const Loop *L, +- DenseMap &PHIMap) { ++ DenseMap &PHIMap, ++ unsigned Depth) { ++ if (Depth > MaxConstantEvolvingDepth) ++ return nullptr; + + // Otherwise, we can evaluate this instruction if all of its operands are + // constant or derived from a PHI node themselves. +@@ -6428,7 +6435,7 @@ + if (!P) { + // Recurse and memoize the results, whether a phi is found or not. + // This recursive call invalidates pointers into PHIMap. +- P = getConstantEvolvingPHIOperands(OpInst, L, PHIMap); ++ P = getConstantEvolvingPHIOperands(OpInst, L, PHIMap, Depth + 1); + PHIMap[OpInst] = P; + } + if (!P) +@@ -6455,7 +6462,8 @@ + + // Record non-constant instructions contained by the loop. + DenseMap PHIMap; +- return getConstantEvolvingPHIOperands(I, L, PHIMap); ++ ++ return getConstantEvolvingPHIOperands(I, L, PHIMap, 0); + } + + /// EvaluateExpression - Given an expression that passes the diff --git a/external/llvm/releases/4.0.0/patches_external/Fix-crash-due-to-bad-bitcast.patch b/external/llvm/releases/4.0.0/patches_external/Fix-crash-due-to-bad-bitcast.patch new file mode 100644 index 000000000..c2b424fda --- /dev/null +++ b/external/llvm/releases/4.0.0/patches_external/Fix-crash-due-to-bad-bitcast.patch @@ -0,0 +1,34 @@ +# Description : [SROA] Fix crash due to bad bitcast + + +diff -Naur --strip-trailing-cr a/lib/Transforms/Scalar/SROA.cpp b/lib/Transforms/Scalar/SROA.cpp +--- a/lib/Transforms/Scalar/SROA.cpp 2017-10-23 15:06:46.139477387 -0400 ++++ b/lib/Transforms/Scalar/SROA.cpp 2017-10-23 15:06:52.595477134 -0400 +@@ -3692,7 +3692,8 @@ + int Idx = 0, Size = Offsets.Splits.size(); + for (;;) { + auto *PartTy = Type::getIntNTy(Ty->getContext(), PartSize * 8); +- auto *PartPtrTy = PartTy->getPointerTo(SI->getPointerAddressSpace()); ++ auto *LoadPartPtrTy = PartTy->getPointerTo(LI->getPointerAddressSpace()); ++ auto *StorePartPtrTy = PartTy->getPointerTo(SI->getPointerAddressSpace()); + + // Either lookup a split load or create one. + LoadInst *PLoad; +@@ -3703,7 +3704,7 @@ + PLoad = IRB.CreateAlignedLoad( + getAdjustedPtr(IRB, DL, LoadBasePtr, + APInt(DL.getPointerSizeInBits(), PartOffset), +- PartPtrTy, LoadBasePtr->getName() + "."), ++ LoadPartPtrTy, LoadBasePtr->getName() + "."), + getAdjustedAlignment(LI, PartOffset, DL), /*IsVolatile*/ false, + LI->getName()); + } +@@ -3713,7 +3714,7 @@ + StoreInst *PStore = IRB.CreateAlignedStore( + PLoad, getAdjustedPtr(IRB, DL, StoreBasePtr, + APInt(DL.getPointerSizeInBits(), PartOffset), +- PartPtrTy, StoreBasePtr->getName() + "."), ++ StorePartPtrTy, StoreBasePtr->getName() + "."), + getAdjustedAlignment(SI, PartOffset, DL), /*IsVolatile*/ false); + + // Now build a new slice for the alloca. diff --git a/external/llvm/releases/4.0.0/patches_external/Fixed-faulty-PHI-node-update.patch b/external/llvm/releases/4.0.0/patches_external/Fixed-faulty-PHI-node-update.patch new file mode 100644 index 000000000..dfea7a4c5 --- /dev/null +++ b/external/llvm/releases/4.0.0/patches_external/Fixed-faulty-PHI-node-update.patch @@ -0,0 +1,142 @@ + +From 08bf51ee2ac040f0101a0755790df1176e9c07a0 Mon Sep 17 00:00:00 2001 +From: Karl-Johan Karlsson +Date: Tue, 22 May 2018 08:46:48 +0000 +Subject: [PATCH] [LowerSwitch] Fixed faulty PHI node update + +Summary: +When lowerswitch merge several cases into a new default block it's not +updating the PHI nodes accordingly. The code that update the PHI nodes +for the default edge only update the first entry and do not remove the +remaining ones, to make sure the number of entries match the number of +predecessors. + +This is easily fixed by replacing the code that update the PHI node with +the already existing utility function for updating PHI nodes. + +Reviewers: hans, reames, arsenm + +Reviewed By: arsenm + +Subscribers: wdng, llvm-commits + +Differential Revision: https://reviews.llvm.org/D47055 + +git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@332960 91177308-0d34-0410-b5e6-96231b3b80d8 +--- + lib/Transforms/Utils/LowerSwitch.cpp | 18 +++++------ + test/Transforms/Util/lowerswitch.ll | 58 +++++++++++++++++++++++++++++++++++- + 2 files changed, 66 insertions(+), 10 deletions(-) + +diff --git a/lib/Transforms/Utils/LowerSwitch.cpp b/lib/Transforms/Utils/LowerSwitch.cpp +index 441c5fd8b5af..76ad35832dc3 100644 +--- a/lib/Transforms/Utils/LowerSwitch.cpp ++++ b/lib/Transforms/Utils/LowerSwitch.cpp +@@ -512,25 +512,25 @@ void LowerSwitch::processSwitchInst(SwitchInst *SI, + } + } + ++ unsigned NrOfDefaults = (SI->getDefaultDest() == Default) ? 1 : 0; ++ for (auto &Case : SI->cases()) ++ if (Case.getCaseSuccessor() == Default) ++ NrOfDefaults++; ++ + // Create a new, empty default block so that the new hierarchy of + // if-then statements go to this and the PHI nodes are happy. + BasicBlock *NewDefault = BasicBlock::Create(SI->getContext(), "NewDefault"); + F->getBasicBlockList().insert(Default->getIterator(), NewDefault); + BranchInst::Create(Default, NewDefault); + +- // If there is an entry in any PHI nodes for the default edge, make sure +- // to update them as well. +- for (BasicBlock::iterator I = Default->begin(); isa(I); ++I) { +- PHINode *PN = cast(I); +- int BlockIdx = PN->getBasicBlockIndex(OrigBlock); +- assert(BlockIdx != -1 && "Switch didn't go to this successor??"); +- PN->setIncomingBlock((unsigned)BlockIdx, NewDefault); +- } +- + BasicBlock *SwitchBlock = + switchConvert(Cases.begin(), Cases.end(), LowerBound, UpperBound, Val, + OrigBlock, OrigBlock, NewDefault, UnreachableRanges); + ++ // If there are entries in any PHI nodes for the default edge, make sure ++ // to update them as well. ++ fixPhis(Default, OrigBlock, NewDefault, NrOfDefaults); ++ + // Branch to our shiny new if-then stuff... + BranchInst::Create(SwitchBlock, OrigBlock); + +diff --git a/test/Transforms/Util/lowerswitch.ll b/test/Transforms/Util/lowerswitch.ll +index 1eddb43c1a06..70e1e239b3dd 100644 +--- a/test/Transforms/Util/lowerswitch.ll ++++ b/test/Transforms/Util/lowerswitch.ll +@@ -3,7 +3,7 @@ + ; Test that we don't crash and have a different basic block for each incoming edge. + define void @test0() { + ; CHECK-LABEL: @test0 +-; CHECK: %merge = phi i64 [ 1, %BB3 ], [ 0, %NewDefault ], [ 0, %NodeBlock5 ], [ 0, %LeafBlock1 ] ++; CHECK: %merge = phi i64 [ 1, %BB3 ], [ 0, %NodeBlock5 ], [ 0, %LeafBlock1 ], [ 0, %NewDefault ] + BB1: + switch i32 undef, label %BB2 [ + i32 3, label %BB2 +@@ -186,3 +186,59 @@ define void @test2(i32 %mode) { + ._crit_edge: ; preds = %34, %0 + ret void + } ++ ++; Test that the PHI node in for.cond should have one entry for each predecessor ++; of its parent basic block after lowerswitch merged several cases into a new ++; default block. ++define void @test3() { ++; CHECK-LABEL: @test3 ++entry: ++ br label %lbl1 ++ ++lbl1: ; preds = %cleanup, %entry ++ br label %lbl2 ++ ++lbl2: ; preds = %cleanup, %lbl1 ++ br label %for.cond ++ ++for.cond: ; preds = %cleanup, %cleanup, %lbl2 ++; CHECK: for.cond: ++; CHECK: phi i16 [ undef, %lbl2 ], [ %b.3, %NewDefault ]{{$}} ++; CHECK: for.cond1: ++ %b.2 = phi i16 [ undef, %lbl2 ], [ %b.3, %cleanup ], [ %b.3, %cleanup ] ++ br label %for.cond1 ++ ++for.cond1: ; preds = %for.inc, %for.cond ++ %b.3 = phi i16 [ %b.2, %for.cond ], [ undef, %for.inc ] ++ %tobool = icmp ne i16 %b.3, 0 ++ br i1 %tobool, label %for.body, label %for.end ++ ++for.body: ; preds = %for.cond1 ++ br i1 undef, label %if.then, label %for.inc ++ ++if.then: ; preds = %for.body ++ br label %cleanup ++ ++for.inc: ; preds = %for.body ++ br label %for.cond1 ++ ++for.end: ; preds = %for.cond1 ++ br i1 undef, label %if.then4, label %for.body7 ++ ++if.then4: ; preds = %for.end ++ br label %cleanup ++ ++for.body7: ; preds = %for.end ++ br label %cleanup ++ ++cleanup: ; preds = %for.body7, %if.then4, %if.then ++ switch i32 undef, label %unreachable [ ++ i32 0, label %for.cond ++ i32 2, label %lbl1 ++ i32 5, label %for.cond ++ i32 3, label %lbl2 ++ ] ++ ++unreachable: ; preds = %cleanup ++ unreachable ++} diff --git a/external/llvm/releases/4.0.0/patches_external/add_win_crt_info.patch b/external/llvm/releases/4.0.0/patches_external/add_win_crt_info.patch new file mode 100644 index 000000000..b159deee3 --- /dev/null +++ b/external/llvm/releases/4.0.0/patches_external/add_win_crt_info.patch @@ -0,0 +1,17 @@ +# Description : Add possibility to use ChooseMSVCCRT-script, when +# include LLVM library. + +--- a/cmake/modules/LLVMConfig.cmake.in ++++ b/cmake/modules/LLVMConfig.cmake.in +@@ -10,6 +10,11 @@ set(LLVM_PACKAGE_VERSION @PACKAGE_VERSION@) + + set(LLVM_BUILD_TYPE @CMAKE_BUILD_TYPE@) + ++set(LLVM_USE_CRT_DEBUG @LLVM_USE_CRT_DEBUG@) ++set(LLVM_USE_CRT_MINSIZEREL @LLVM_USE_CRT_MINSIZEREL@) ++set(LLVM_USE_CRT_RELEASE @LLVM_USE_CRT_RELEASE@) ++set(LLVM_USE_CRT_RELWITHDEBINFO @LLVM_USE_CRT_RELWITHDEBINFO@) ++ + set(LLVM_COMMON_DEPENDS @LLVM_COMMON_DEPENDS@) + + set(LLVM_AVAILABLE_LIBS @LLVM_AVAILABLE_LIBS@) diff --git a/external/llvm/releases/7.0.0/patches_external/BasicBlockUtils-Add-metadata-fixing-in-SplitBlockPre.patch b/external/llvm/releases/7.0.0/patches_external/BasicBlockUtils-Add-metadata-fixing-in-SplitBlockPre.patch new file mode 100644 index 000000000..643b72a09 --- /dev/null +++ b/external/llvm/releases/7.0.0/patches_external/BasicBlockUtils-Add-metadata-fixing-in-SplitBlockPre.patch @@ -0,0 +1,92 @@ +From cb729efafe85052dc43413ad403b5e67ee002303 Mon Sep 17 00:00:00 2001 +From: Aleksander Us +Date: Mon, 26 Aug 2019 15:47:41 +0300 +Subject: [PATCH] [BasicBlockUtils] Add metadata fixing in + SplitBlockPredecessors. + +In case when BB is header of some loop and predecessor is latch of +this loop, metadata was not attached to newly created basic block. +This led to loss of loop metadata for other passes. +--- + lib/Transforms/Utils/BasicBlockUtils.cpp | 17 +++++++-- + test/Transforms/LoopSimplify/loop_metadata.ll | 36 +++++++++++++++++++ + 2 files changed, 50 insertions(+), 3 deletions(-) + create mode 100644 test/Transforms/LoopSimplify/loop_metadata.ll + +diff --git a/lib/Transforms/Utils/BasicBlockUtils.cpp b/lib/Transforms/Utils/BasicBlockUtils.cpp +index 516a785dce1..6a0a1826df2 100644 +--- a/lib/Transforms/Utils/BasicBlockUtils.cpp ++++ b/lib/Transforms/Utils/BasicBlockUtils.cpp +@@ -507,14 +507,24 @@ BasicBlock *llvm::SplitBlockPredecessors(BasicBlock *BB, + BranchInst *BI = BranchInst::Create(BB, NewBB); + BI->setDebugLoc(BB->getFirstNonPHIOrDbg()->getDebugLoc()); + ++ bool IsBBHeader = LI && LI->isLoopHeader(BB); ++ Loop *BBLoop = LI ? LI->getLoopFor(BB) : nullptr; + // Move the edges from Preds to point to NewBB instead of BB. +- for (unsigned i = 0, e = Preds.size(); i != e; ++i) { ++ for (BasicBlock *Pred : Preds) { ++ Instruction *PI = Pred->getTerminator(); + // This is slightly more strict than necessary; the minimum requirement + // is that there be no more than one indirectbr branching to BB. And + // all BlockAddress uses would need to be updated. +- assert(!isa(Preds[i]->getTerminator()) && ++ assert(!isa(PI) && + "Cannot split an edge from an IndirectBrInst"); +- Preds[i]->getTerminator()->replaceUsesOfWith(BB, NewBB); ++ if (IsBBHeader && BBLoop->contains(Pred) && BBLoop->isLoopLatch(Pred)) { ++ // Update loop metadata if it exists. ++ if (MDNode *LoopMD = PI->getMetadata(LLVMContext::MD_loop)) { ++ BI->setMetadata(LLVMContext::MD_loop, LoopMD); ++ PI->setMetadata(LLVMContext::MD_loop, nullptr); ++ } ++ } ++ PI->replaceUsesOfWith(BB, NewBB); + } + + // Insert a new PHI node into NewBB for every PHI node in BB and that new PHI +diff --git a/test/Transforms/LoopSimplify/loop_metadata.ll b/test/Transforms/LoopSimplify/loop_metadata.ll +new file mode 100644 +index 00000000000..c15c92fe3ae +--- /dev/null ++++ b/test/Transforms/LoopSimplify/loop_metadata.ll +@@ -0,0 +1,36 @@ ++; RUN: opt -S -loop-simplify < %s | FileCheck %s ++ ++; CHECK: for.cond.loopexit: ++; CHECK: br label %for.cond, !llvm.loop !0 ++; CHECK: br i1 %cmp1, label %for.body1, label %for.cond.loopexit ++ ++define void @foo() { ++entry: ++ br label %for.cond ++ ++for.cond: ; preds = %for.cond1, %entry ++ %j = phi i32 [ 0, %entry ], [ %add, %for.cond1 ] ++ %cmp = icmp ult i32 %j, 8 ++ br i1 %cmp, label %for.body, label %for.end ++ ++for.body: ; preds = %for.cond ++ %dummy1 = add i32 1, 1 ++ %add = add nuw nsw i32 %j, 1 ++ br label %for.cond1 ++ ++for.cond1: ; preds = %for.body1, %for.body ++ %i.0 = phi i32 [ 1, %for.body ], [ %inc, %for.body1 ] ++ %cmp1 = icmp ult i32 %i.0, 8 ++ br i1 %cmp1, label %for.body1, label %for.cond, !llvm.loop !0 ++ ++for.body1: ; preds = %for.cond1 ++ %dummy2 = add i32 1, 1 ++ %inc = add nuw nsw i32 %i.0, 1 ++ br label %for.cond1 ++ ++for.end: ; preds = %for.cond ++ ret void ++} ++ ++!0 = distinct !{!0, !1} ++!1 = !{!"llvm.loop.unroll.full"} +-- +2.18.0 + diff --git a/external/llvm/releases/7.0.0/patches_external/IndVarSimplify-Do-not-use-SCEV-expander-for-IVCount-.patch b/external/llvm/releases/7.0.0/patches_external/IndVarSimplify-Do-not-use-SCEV-expander-for-IVCount-.patch new file mode 100644 index 000000000..c314efc12 --- /dev/null +++ b/external/llvm/releases/7.0.0/patches_external/IndVarSimplify-Do-not-use-SCEV-expander-for-IVCount-.patch @@ -0,0 +1,100 @@ +From cbf2de408fa9a89ee446d0159ecd8bb81340f0b4 Mon Sep 17 00:00:00 2001 +From: Aleksander Us +Date: Mon, 26 Aug 2019 15:45:47 +0300 +Subject: [PATCH] [IndVarSimplify] Do not use SCEV expander for IVCount in + LFTR when possible. + +SCEV analysis cannot properly cache instruction with poison flags +(for example, add nsw outside of loop will not be reused by expander). +This can lead to generating of additional instructions by SCEV expander. + +Example IR: + + ... + %maxval = add nuw nsw i32 %a1, %a2 + ... +for.body: + ... + %cmp22 = icmp ult i32 %ivadd, %maxval + br i1 %cmp22, label %for.body, label %for.end + ... + +SCEV expander will generate copy of %maxval in preheader but without +nuw/nsw flags. This can be avoided by explicit check that iv count +value gives the same SCEV expressions as calculated by LFTR. +--- + lib/Transforms/Scalar/IndVarSimplify.cpp | 13 ++++++++++++- + test/Transforms/IndVarSimplify/add_nsw.ll | 23 +++++++++++++++++++++++ + test/Transforms/IndVarSimplify/udiv.ll | 1 + + 3 files changed, 36 insertions(+), 1 deletion(-) + create mode 100644 test/Transforms/IndVarSimplify/add_nsw.ll + +diff --git a/lib/Transforms/Scalar/IndVarSimplify.cpp b/lib/Transforms/Scalar/IndVarSimplify.cpp +index 8656e88b79c..1744b424722 100644 +--- a/lib/Transforms/Scalar/IndVarSimplify.cpp ++++ b/lib/Transforms/Scalar/IndVarSimplify.cpp +@@ -2174,8 +2174,19 @@ static Value *genLoopLimit(PHINode *IndVar, const SCEV *IVCount, Loop *L, + + IVLimit = SE->getAddExpr(IVInit, IVCount); + } +- // Expand the code for the iteration count. ++ ++ // If computed limit is equal to old limit then do not use SCEV expander ++ // because it can lost NUW/NSW flags and create extra instructions. + BranchInst *BI = cast(L->getExitingBlock()->getTerminator()); ++ if (ICmpInst *Cmp = dyn_cast(BI->getOperand(0))) { ++ Value *Limit = Cmp->getOperand(0); ++ if (!L->isLoopInvariant(Limit)) ++ Limit = Cmp->getOperand(1); ++ if (SE->getSCEV(Limit) == IVLimit) ++ return Limit; ++ } ++ ++ // Expand the code for the iteration count. + IRBuilder<> Builder(BI); + assert(SE->isLoopInvariant(IVLimit, L) && + "Computed iteration count is not loop invariant!"); +diff --git a/test/Transforms/IndVarSimplify/add_nsw.ll b/test/Transforms/IndVarSimplify/add_nsw.ll +new file mode 100644 +index 00000000000..abd1cbb6c51 +--- /dev/null ++++ b/test/Transforms/IndVarSimplify/add_nsw.ll +@@ -0,0 +1,23 @@ ++; RUN: opt -indvars -S %s | FileCheck %s ++ ++target datalayout = "e-p:32:32-i64:64-n8:16:32" ++ ++; CHECK: for.body.preheader: ++; CHECK-NOT: add ++; CHECK: for.body: ++ ++define void @foo(i32 %a1, i32 %a2) { ++entry: ++ %maxval = add nuw nsw i32 %a1, %a2 ++ %cmp = icmp slt i32 %maxval, 1 ++ br i1 %cmp, label %for.end, label %for.body ++ ++for.body: ; preds = %entry, %for.body ++ %j.02 = phi i32 [ 0, %entry ], [ %add31, %for.body ] ++ %add31 = add nuw nsw i32 %j.02, 1 ++ %cmp22 = icmp slt i32 %add31, %maxval ++ br i1 %cmp22, label %for.body, label %for.end ++ ++for.end: ; preds = %for.body ++ ret void ++} +diff --git a/test/Transforms/IndVarSimplify/udiv.ll b/test/Transforms/IndVarSimplify/udiv.ll +index b3f2c2a6a66..3530343ef4a 100644 +--- a/test/Transforms/IndVarSimplify/udiv.ll ++++ b/test/Transforms/IndVarSimplify/udiv.ll +@@ -133,6 +133,7 @@ declare i32 @printf(i8* nocapture, ...) nounwind + ; CHECK-LABEL: @foo( + ; CHECK: for.body.preheader: + ; CHECK-NOT: udiv ++; CHECK: for.body: + + define void @foo(double* %p, i64 %n) nounwind { + entry: +-- +2.18.0 + diff --git a/external/llvm/releases/7.0.0/patches_external/Use-depth-limit-for-trunc-analysis.patch b/external/llvm/releases/7.0.0/patches_external/Use-depth-limit-for-trunc-analysis.patch new file mode 100644 index 000000000..4f1e90456 --- /dev/null +++ b/external/llvm/releases/7.0.0/patches_external/Use-depth-limit-for-trunc-analysis.patch @@ -0,0 +1,274 @@ +From a4cc6b11f900f62e2570a43980290441cfcbc925 Mon Sep 17 00:00:00 2001 +From: Teresa Johnson +Date: Tue, 12 Mar 2019 18:28:05 +0000 +Subject: [PATCH] Use depth limit for trunc analysis + +Summary: +This fixes an extremely long compile time caused by recursive analysis +of truncs, which were not previously subject to any depth limits unlike +some of the other ops. I decided to use the same control used for +sext/zext, since the routines analyzing these are sometimes mutually +recursive with the trunc analysis. + +Reviewers: mkazantsev, sanjoy + +Subscribers: sanjoy, jdoerfert, llvm-commits + +Tags: #llvm + +Differential Revision: https://reviews.llvm.org/D58994 + +llvm-svn: 355949 +--- + llvm/include/llvm/Analysis/ScalarEvolution.h | 8 ++- + llvm/lib/Analysis/ScalarEvolution.cpp | 65 ++++++++++--------- + .../Analysis/ScalarEvolution/limit-depth.ll | 30 ++++++++- + 3 files changed, 70 insertions(+), 33 deletions(-) + +diff --git a/include/llvm/Analysis/ScalarEvolution.h b/include/llvm/Analysis/ScalarEvolution.h +index 89918e3c205..b0855e18800 100644 +--- a/include/llvm/Analysis/ScalarEvolution.h ++++ b/include/llvm/Analysis/ScalarEvolution.h +@@ -521,7 +521,7 @@ public: + const SCEV *getConstant(ConstantInt *V); + const SCEV *getConstant(const APInt &Val); + const SCEV *getConstant(Type *Ty, uint64_t V, bool isSigned = false); +- const SCEV *getTruncateExpr(const SCEV *Op, Type *Ty); ++ const SCEV *getTruncateExpr(const SCEV *Op, Type *Ty, unsigned Depth = 0); + const SCEV *getZeroExtendExpr(const SCEV *Op, Type *Ty, unsigned Depth = 0); + const SCEV *getSignExtendExpr(const SCEV *Op, Type *Ty, unsigned Depth = 0); + const SCEV *getAnyExtendExpr(const SCEV *Op, Type *Ty); +@@ -619,11 +619,13 @@ public: + + /// Return a SCEV corresponding to a conversion of the input value to the + /// specified type. If the type must be extended, it is zero extended. +- const SCEV *getTruncateOrZeroExtend(const SCEV *V, Type *Ty); ++ const SCEV *getTruncateOrZeroExtend(const SCEV *V, Type *Ty, ++ unsigned Depth = 0); + + /// Return a SCEV corresponding to a conversion of the input value to the + /// specified type. If the type must be extended, it is sign extended. +- const SCEV *getTruncateOrSignExtend(const SCEV *V, Type *Ty); ++ const SCEV *getTruncateOrSignExtend(const SCEV *V, Type *Ty, ++ unsigned Depth = 0); + + /// Return a SCEV corresponding to a conversion of the input value to the + /// specified type. If the type must be extended, it is zero extended. The +diff --git a/lib/Analysis/ScalarEvolution.cpp b/lib/Analysis/ScalarEvolution.cpp +index 0e715b8814f..30708125203 100644 +--- a/lib/Analysis/ScalarEvolution.cpp ++++ b/lib/Analysis/ScalarEvolution.cpp +@@ -197,9 +197,9 @@ static cl::opt MaxConstantEvolvingDepth( + cl::desc("Maximum depth of recursive constant evolving"), cl::init(32)); + + static cl::opt +- MaxExtDepth("scalar-evolution-max-ext-depth", cl::Hidden, +- cl::desc("Maximum depth of recursive SExt/ZExt"), +- cl::init(8)); ++ MaxCastDepth("scalar-evolution-max-cast-depth", cl::Hidden, ++ cl::desc("Maximum depth of recursive SExt/ZExt/Trunc"), ++ cl::init(8)); + + static cl::opt + MaxAddRecSize("scalar-evolution-max-add-rec-size", cl::Hidden, +@@ -1221,8 +1221,8 @@ const SCEV *SCEVAddRecExpr::evaluateAtIteration(const SCEV *It, + // SCEV Expression folder implementations + //===----------------------------------------------------------------------===// + +-const SCEV *ScalarEvolution::getTruncateExpr(const SCEV *Op, +- Type *Ty) { ++const SCEV *ScalarEvolution::getTruncateExpr(const SCEV *Op, Type *Ty, ++ unsigned Depth) { + assert(getTypeSizeInBits(Op->getType()) > getTypeSizeInBits(Ty) && + "This is not a truncating conversion!"); + assert(isSCEVable(Ty) && +@@ -1243,15 +1243,23 @@ const SCEV *ScalarEvolution::getTruncateExpr(const SCEV *Op, + + // trunc(trunc(x)) --> trunc(x) + if (const SCEVTruncateExpr *ST = dyn_cast(Op)) +- return getTruncateExpr(ST->getOperand(), Ty); ++ return getTruncateExpr(ST->getOperand(), Ty, Depth + 1); + + // trunc(sext(x)) --> sext(x) if widening or trunc(x) if narrowing + if (const SCEVSignExtendExpr *SS = dyn_cast(Op)) +- return getTruncateOrSignExtend(SS->getOperand(), Ty); ++ return getTruncateOrSignExtend(SS->getOperand(), Ty, Depth + 1); + + // trunc(zext(x)) --> zext(x) if widening or trunc(x) if narrowing + if (const SCEVZeroExtendExpr *SZ = dyn_cast(Op)) +- return getTruncateOrZeroExtend(SZ->getOperand(), Ty); ++ return getTruncateOrZeroExtend(SZ->getOperand(), Ty, Depth + 1); ++ ++ if (Depth > MaxCastDepth) { ++ SCEV *S = ++ new (SCEVAllocator) SCEVTruncateExpr(ID.Intern(SCEVAllocator), Op, Ty); ++ UniqueSCEVs.InsertNode(S, IP); ++ addToLoopUseLists(S); ++ return S; ++ } + + // trunc(x1 + ... + xN) --> trunc(x1) + ... + trunc(xN) and + // trunc(x1 * ... * xN) --> trunc(x1) * ... * trunc(xN), +@@ -1263,7 +1271,7 @@ const SCEV *ScalarEvolution::getTruncateExpr(const SCEV *Op, + unsigned numTruncs = 0; + for (unsigned i = 0, e = CommOp->getNumOperands(); i != e && numTruncs < 2; + ++i) { +- const SCEV *S = getTruncateExpr(CommOp->getOperand(i), Ty); ++ const SCEV *S = getTruncateExpr(CommOp->getOperand(i), Ty, Depth + 1); + if (!isa(CommOp->getOperand(i)) && isa(S)) + numTruncs++; + Operands.push_back(S); +@@ -1287,7 +1295,7 @@ const SCEV *ScalarEvolution::getTruncateExpr(const SCEV *Op, + if (const SCEVAddRecExpr *AddRec = dyn_cast(Op)) { + SmallVector Operands; + for (const SCEV *Op : AddRec->operands()) +- Operands.push_back(getTruncateExpr(Op, Ty)); ++ Operands.push_back(getTruncateExpr(Op, Ty, Depth + 1)); + return getAddRecExpr(Operands, AddRec->getLoop(), SCEV::FlagAnyWrap); + } + +@@ -1621,7 +1629,7 @@ ScalarEvolution::getZeroExtendExpr(const SCEV *Op, Type *Ty, unsigned Depth) { + ID.AddPointer(Ty); + void *IP = nullptr; + if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S; +- if (Depth > MaxExtDepth) { ++ if (Depth > MaxCastDepth) { + SCEV *S = new (SCEVAllocator) SCEVZeroExtendExpr(ID.Intern(SCEVAllocator), + Op, Ty); + UniqueSCEVs.InsertNode(S, IP); +@@ -1639,7 +1647,7 @@ ScalarEvolution::getZeroExtendExpr(const SCEV *Op, Type *Ty, unsigned Depth) { + unsigned NewBits = getTypeSizeInBits(Ty); + if (CR.truncate(TruncBits).zeroExtend(NewBits).contains( + CR.zextOrTrunc(NewBits))) +- return getTruncateOrZeroExtend(X, Ty); ++ return getTruncateOrZeroExtend(X, Ty, Depth); + } + + // If the input value is a chrec scev, and we can prove that the value +@@ -1681,9 +1689,9 @@ ScalarEvolution::getZeroExtendExpr(const SCEV *Op, Type *Ty, unsigned Depth) { + // Check whether the backedge-taken count can be losslessly casted to + // the addrec's type. The count is always unsigned. + const SCEV *CastedMaxBECount = +- getTruncateOrZeroExtend(MaxBECount, Start->getType()); +- const SCEV *RecastedMaxBECount = +- getTruncateOrZeroExtend(CastedMaxBECount, MaxBECount->getType()); ++ getTruncateOrZeroExtend(MaxBECount, Start->getType(), Depth); ++ const SCEV *RecastedMaxBECount = getTruncateOrZeroExtend( ++ CastedMaxBECount, MaxBECount->getType(), Depth); + if (MaxBECount == RecastedMaxBECount) { + Type *WideTy = IntegerType::get(getContext(), BitWidth * 2); + // Check whether Start+Step*MaxBECount has no unsigned overflow. +@@ -1932,7 +1940,7 @@ ScalarEvolution::getSignExtendExpr(const SCEV *Op, Type *Ty, unsigned Depth) { + void *IP = nullptr; + if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S; + // Limit recursion depth. +- if (Depth > MaxExtDepth) { ++ if (Depth > MaxCastDepth) { + SCEV *S = new (SCEVAllocator) SCEVSignExtendExpr(ID.Intern(SCEVAllocator), + Op, Ty); + UniqueSCEVs.InsertNode(S, IP); +@@ -1950,7 +1958,7 @@ ScalarEvolution::getSignExtendExpr(const SCEV *Op, Type *Ty, unsigned Depth) { + unsigned NewBits = getTypeSizeInBits(Ty); + if (CR.truncate(TruncBits).signExtend(NewBits).contains( + CR.sextOrTrunc(NewBits))) +- return getTruncateOrSignExtend(X, Ty); ++ return getTruncateOrSignExtend(X, Ty, Depth); + } + + if (auto *SA = dyn_cast(Op)) { +@@ -2025,9 +2033,9 @@ ScalarEvolution::getSignExtendExpr(const SCEV *Op, Type *Ty, unsigned Depth) { + // Check whether the backedge-taken count can be losslessly casted to + // the addrec's type. The count is always unsigned. + const SCEV *CastedMaxBECount = +- getTruncateOrZeroExtend(MaxBECount, Start->getType()); +- const SCEV *RecastedMaxBECount = +- getTruncateOrZeroExtend(CastedMaxBECount, MaxBECount->getType()); ++ getTruncateOrZeroExtend(MaxBECount, Start->getType(), Depth); ++ const SCEV *RecastedMaxBECount = getTruncateOrZeroExtend( ++ CastedMaxBECount, MaxBECount->getType(), Depth); + if (MaxBECount == RecastedMaxBECount) { + Type *WideTy = IntegerType::get(getContext(), BitWidth * 2); + // Check whether Start+Step*MaxBECount has no signed overflow. +@@ -4017,29 +4025,28 @@ const SCEV *ScalarEvolution::getMinusSCEV(const SCEV *LHS, const SCEV *RHS, + return getAddExpr(LHS, getNegativeSCEV(RHS, NegFlags), AddFlags, Depth); + } + +-const SCEV * +-ScalarEvolution::getTruncateOrZeroExtend(const SCEV *V, Type *Ty) { ++const SCEV *ScalarEvolution::getTruncateOrZeroExtend(const SCEV *V, Type *Ty, ++ unsigned Depth) { + Type *SrcTy = V->getType(); + assert(SrcTy->isIntOrPtrTy() && Ty->isIntOrPtrTy() && + "Cannot truncate or zero extend with non-integer arguments!"); + if (getTypeSizeInBits(SrcTy) == getTypeSizeInBits(Ty)) + return V; // No conversion + if (getTypeSizeInBits(SrcTy) > getTypeSizeInBits(Ty)) +- return getTruncateExpr(V, Ty); +- return getZeroExtendExpr(V, Ty); ++ return getTruncateExpr(V, Ty, Depth); ++ return getZeroExtendExpr(V, Ty, Depth); + } + +-const SCEV * +-ScalarEvolution::getTruncateOrSignExtend(const SCEV *V, +- Type *Ty) { ++const SCEV *ScalarEvolution::getTruncateOrSignExtend(const SCEV *V, Type *Ty, ++ unsigned Depth) { + Type *SrcTy = V->getType(); + assert(SrcTy->isIntOrPtrTy() && Ty->isIntOrPtrTy() && + "Cannot truncate or zero extend with non-integer arguments!"); + if (getTypeSizeInBits(SrcTy) == getTypeSizeInBits(Ty)) + return V; // No conversion + if (getTypeSizeInBits(SrcTy) > getTypeSizeInBits(Ty)) +- return getTruncateExpr(V, Ty); +- return getSignExtendExpr(V, Ty); ++ return getTruncateExpr(V, Ty, Depth); ++ return getSignExtendExpr(V, Ty, Depth); + } + + const SCEV * +diff --git a/test/Analysis/ScalarEvolution/limit-depth.ll b/test/Analysis/ScalarEvolution/limit-depth.ll +index f4154130233..6fdf8c5df97 100644 +--- a/test/Analysis/ScalarEvolution/limit-depth.ll ++++ b/test/Analysis/ScalarEvolution/limit-depth.ll +@@ -1,4 +1,4 @@ +-; RUN: opt -scalar-evolution-max-arith-depth=0 -scalar-evolution-max-ext-depth=0 -analyze -scalar-evolution < %s | FileCheck %s ++; RUN: opt -scalar-evolution-max-arith-depth=0 -scalar-evolution-max-cast-depth=0 -analyze -scalar-evolution < %s | FileCheck %s + + ; Check that depth set to 0 prevents getAddExpr and getMulExpr from making + ; transformations in SCEV. We expect the result to be very straightforward. +@@ -98,3 +98,31 @@ exit: + %ze2 = zext i64 %iv2.inc to i128 + ret void + } ++ ++define void @test_trunc(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e, i32 %f) { ++; CHECK-LABEL: @test_trunc ++; CHECK: %trunc2 = trunc i64 %iv2.inc to i32 ++; CHECK-NEXT: --> {(trunc i64 (1 + {7,+,1}<%loop>) to i32),+,1}<%loop2> ++entry: ++ br label %loop ++ ++loop: ++ %iv = phi i128 [ 6, %entry ], [ %iv.inc, %loop ] ++ %iv.inc = add nsw i128 %iv, 1 ++ %cond = icmp sle i128 %iv.inc, 50 ++ br i1 %cond, label %loop, label %between ++ ++between: ++ %trunc = trunc i128 %iv.inc to i64 ++ br label %loop2 ++ ++loop2: ++ %iv2 = phi i64 [ %trunc, %between ], [ %iv2.inc, %loop2 ] ++ %iv2.inc = add nuw i64 %iv2, 1 ++ %cond2 = icmp sle i64 %iv2.inc, 50 ++ br i1 %cond2, label %loop2, label %exit ++ ++exit: ++ %trunc2 = trunc i64 %iv2.inc to i32 ++ ret void ++} +-- +2.18.0 + diff --git a/external/llvm/releases/7.0.0/patches_external/add_win_crt_info.patch b/external/llvm/releases/7.0.0/patches_external/add_win_crt_info.patch new file mode 100644 index 000000000..b159deee3 --- /dev/null +++ b/external/llvm/releases/7.0.0/patches_external/add_win_crt_info.patch @@ -0,0 +1,17 @@ +# Description : Add possibility to use ChooseMSVCCRT-script, when +# include LLVM library. + +--- a/cmake/modules/LLVMConfig.cmake.in ++++ b/cmake/modules/LLVMConfig.cmake.in +@@ -10,6 +10,11 @@ set(LLVM_PACKAGE_VERSION @PACKAGE_VERSION@) + + set(LLVM_BUILD_TYPE @CMAKE_BUILD_TYPE@) + ++set(LLVM_USE_CRT_DEBUG @LLVM_USE_CRT_DEBUG@) ++set(LLVM_USE_CRT_MINSIZEREL @LLVM_USE_CRT_MINSIZEREL@) ++set(LLVM_USE_CRT_RELEASE @LLVM_USE_CRT_RELEASE@) ++set(LLVM_USE_CRT_RELWITHDEBINFO @LLVM_USE_CRT_RELWITHDEBINFO@) ++ + set(LLVM_COMMON_DEPENDS @LLVM_COMMON_DEPENDS@) + + set(LLVM_AVAILABLE_LIBS @LLVM_AVAILABLE_LIBS@) diff --git a/external/llvm/releases/7.0.0/patches_external/export-utility-to-targets-build-install_1.patch b/external/llvm/releases/7.0.0/patches_external/export-utility-to-targets-build-install_1.patch new file mode 100644 index 000000000..7a94d4fdb --- /dev/null +++ b/external/llvm/releases/7.0.0/patches_external/export-utility-to-targets-build-install_1.patch @@ -0,0 +1,37 @@ +From 3436463bf7021c9986f3a52ea0b699abfcbd0b95 Mon Sep 17 00:00:00 2001 +From: Stefan Granitz +Date: Fri, 11 Jan 2019 19:34:34 +0000 +Subject: [PATCH] [CMake] Export utility targets to the build/install tree + depending on LLVM_BUILD/INSTALL_UTILS + +Summary: +Allow external projects to import test-related targets like FileCheck, count, not etc. and query binary paths, properties, etc. +This would be useful for LLDB, because it reduces the difference between in-tree vs. standalone builds and simplifies CMake logic. + +Reviewers: chapuni, gottesmm, beanz + +Reviewed By: beanz + +Subscribers: mgorny, lldb-commits, llvm-commits, #lldb + +Differential Revision: https://reviews.llvm.org/D56606 + +git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@350959 91177308-0d34-0410-b5e6-96231b3b80d8 +--- + cmake/modules/AddLLVM.cmake | 3 +++ + 1 file changed, 3 insertions(+) + +diff --git a/cmake/modules/AddLLVM.cmake b/cmake/modules/AddLLVM.cmake +index 9b7d24184fe0..4dbc0ddaf4f0 100644 +--- a/cmake/modules/AddLLVM.cmake ++++ b/cmake/modules/AddLLVM.cmake +@@ -920,6 +920,9 @@ macro(add_llvm_utility name) + DEPENDS ${name} + COMPONENT ${name}) + endif() ++ set_property(GLOBAL APPEND PROPERTY LLVM_EXPORTS ${name}) ++ elseif( LLVM_BUILD_UTILS ) ++ set_property(GLOBAL APPEND PROPERTY LLVM_EXPORTS_BUILDTREE_ONLY ${name}) + endif() + endmacro(add_llvm_utility name) + \ No newline at end of file diff --git a/external/llvm/releases/7.0.0/patches_external/export-utility-to-targets-build-install_2.patch b/external/llvm/releases/7.0.0/patches_external/export-utility-to-targets-build-install_2.patch new file mode 100644 index 000000000..f26b48255 --- /dev/null +++ b/external/llvm/releases/7.0.0/patches_external/export-utility-to-targets-build-install_2.patch @@ -0,0 +1,65 @@ +From 4ce5f86eddc0bf42a971db4509829036d5b17b92 Mon Sep 17 00:00:00 2001 +From: Stefan Granitz +Date: Fri, 1 Feb 2019 13:08:09 +0000 +Subject: [PATCH] [CMake] Add install targets for utilities to LLVM exports if + LLVM_INSTALL_UTILS=ON + +Summary: D56606 was only appending target names to the `LLVM_EXPORTS`/`LLVM_EXPORTS_BUILDTREE_ONLY` properties. Targets showed up correctly in the build-tree `LLVMExports.cmake`, but they were missing in the installed one (as we found in https://bugs.llvm.org/show_bug.cgi?id=40443), because install did not register them explicitly. + +Reviewers: mgorny, smeenai, beanz, gottesmm, dschuff, tstellar, serge-sans-paille + +Reviewed By: smeenai + +Subscribers: llvm-commits + +Differential Revision: https://reviews.llvm.org/D57383 + +git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@352869 91177308-0d34-0410-b5e6-96231b3b80d8 +--- + cmake/modules/AddLLVM.cmake | 32 +++++++++++++++++++++----------- + 1 file changed, 21 insertions(+), 11 deletions(-) + +diff --git a/cmake/modules/AddLLVM.cmake b/cmake/modules/AddLLVM.cmake +index 39781bf34e24..607d6e682b49 100644 +--- a/cmake/modules/AddLLVM.cmake ++++ b/cmake/modules/AddLLVM.cmake +@@ -924,18 +924,28 @@ macro(add_llvm_utility name) + + add_llvm_executable(${name} DISABLE_LLVM_LINK_LLVM_DYLIB ${ARGN}) + set_target_properties(${name} PROPERTIES FOLDER "Utils") +- if( LLVM_INSTALL_UTILS AND LLVM_BUILD_UTILS ) +- install (TARGETS ${name} +- RUNTIME DESTINATION ${LLVM_UTILS_INSTALL_DIR} +- COMPONENT ${name}) +- if (NOT CMAKE_CONFIGURATION_TYPES) +- add_llvm_install_targets(install-${name} +- DEPENDS ${name} +- COMPONENT ${name}) ++ if (NOT LLVM_INSTALL_TOOLCHAIN_ONLY) ++ if (LLVM_INSTALL_UTILS AND LLVM_BUILD_UTILS) ++ if (${name} IN_LIST LLVM_DISTRIBUTION_COMPONENTS OR ++ NOT LLVM_DISTRIBUTION_COMPONENTS) ++ set(export_to_llvmexports EXPORT LLVMExports) ++ set_property(GLOBAL PROPERTY LLVM_HAS_EXPORTS True) ++ endif() ++ ++ install(TARGETS ${name} ++ ${export_to_llvmexports} ++ RUNTIME DESTINATION ${LLVM_UTILS_INSTALL_DIR} ++ COMPONENT ${name}) ++ ++ if (NOT LLVM_ENABLE_IDE) ++ add_llvm_install_targets(install-${name} ++ DEPENDS ${name} ++ COMPONENT ${name}) ++ endif() ++ set_property(GLOBAL APPEND PROPERTY LLVM_EXPORTS ${name}) ++ elseif(LLVM_BUILD_UTILS) ++ set_property(GLOBAL APPEND PROPERTY LLVM_EXPORTS_BUILDTREE_ONLY ${name}) + endif() +- set_property(GLOBAL APPEND PROPERTY LLVM_EXPORTS ${name}) +- elseif( LLVM_BUILD_UTILS ) +- set_property(GLOBAL APPEND PROPERTY LLVM_EXPORTS_BUILDTREE_ONLY ${name}) + endif() + endmacro(add_llvm_utility name) + \ No newline at end of file diff --git a/external/llvm/releases/7.0.0/patches_external/fix-pointer-for-lifetime-intrinsic.patch b/external/llvm/releases/7.0.0/patches_external/fix-pointer-for-lifetime-intrinsic.patch new file mode 100644 index 000000000..e0ba0617f --- /dev/null +++ b/external/llvm/releases/7.0.0/patches_external/fix-pointer-for-lifetime-intrinsic.patch @@ -0,0 +1,15 @@ +diff -Naur a/lib/Transforms/Scalar/SROA.cpp b/lib/Transforms/Scalar/SROA.cpp +--- a/lib/Transforms/Scalar/SROA.cpp 2018-08-31 17:53:05.000000000 +0200 ++++ b/lib/Transforms/Scalar/SROA.cpp 2018-12-20 18:48:12.788662000 +0100 +@@ -3033,7 +3033,10 @@ + ConstantInt *Size = + ConstantInt::get(cast(II.getArgOperand(0)->getType()), + NewEndOffset - NewBeginOffset); +- Value *Ptr = getNewAllocaSlicePtr(IRB, OldPtr->getType()); ++ // Lifetime intrinsics always expect an i8* so directly get such a pointer ++ // for the new alloca slice. ++ Type *PointerTy = IRB.getInt8PtrTy(OldPtr->getType()->getPointerAddressSpace()); ++ Value *Ptr = getNewAllocaSlicePtr(IRB, PointerTy); + Value *New; + if (II.getIntrinsicID() == Intrinsic::lifetime_start) + New = IRB.CreateLifetimeStart(Ptr, Size); diff --git a/external/llvm/releases/7.0.0/patches_external/initializaton_order_fiasco-workaround.patch b/external/llvm/releases/7.0.0/patches_external/initializaton_order_fiasco-workaround.patch new file mode 100644 index 000000000..c21646ab4 --- /dev/null +++ b/external/llvm/releases/7.0.0/patches_external/initializaton_order_fiasco-workaround.patch @@ -0,0 +1,53 @@ +# Description : workaround for https://bugs.llvm.org/show_bug.cgi?id=41367 +# When Fixed in Open Source : Needed always +# Category : Bugfix +diff --git a/include/llvm/Support/ManagedStatic.h b/include/llvm/Support/ManagedStatic.h +index b4bf321..3166d59 100644 ++++ b/include/llvm/Support/ManagedStatic.h +--- a/include/llvm/Support/ManagedStatic.h +@@ -17,6 +17,10 @@ + #include + #include + ++#if !defined(_MSC_VER) || (_MSC_VER >= 1925) || defined(__clang__) ++#define LLVM_USE_CONSTEXPR_CTOR ++#endif ++ + namespace llvm { + + /// object_creator - Helper method for ManagedStatic. +@@ -36,21 +40,33 @@ template struct object_deleter { + /// ManagedStaticBase - Common base class for ManagedStatic instances. + class ManagedStaticBase { + protected: ++#ifdef LLVM_USE_CONSTEXPR_CTOR ++ mutable std::atomic Ptr{}; ++ mutable void (*DeleterFn)(void *) = nullptr; ++ mutable const ManagedStaticBase *Next = nullptr; ++#else + // This should only be used as a static variable, which guarantees that this + // will be zero initialized. + mutable std::atomic Ptr; + mutable void (*DeleterFn)(void*); + mutable const ManagedStaticBase *Next; +- ++#endif + void RegisterManagedStatic(void *(*creator)(), void (*deleter)(void*)) const; + + public: ++#ifdef LLVM_USE_CONSTEXPR_CTOR ++ constexpr ManagedStaticBase() = default; ++#endif + /// isConstructed - Return true if this object has not been created yet. + bool isConstructed() const { return Ptr != nullptr; } + + void destroy() const; + }; + ++// we don't need LLVM_USE_CONSTEXPR_CTOR anymore as it is used only to define ++// a proper constructor and initializers ++#undef LLVM_USE_CONSTEXPR_CTOR ++ + /// ManagedStatic - This transparently changes the behavior of global statics to + /// be lazily constructed on demand (good for reducing startup times of dynamic + /// libraries that link in LLVM components) and for making destruction be diff --git a/external/llvm/releases/8.0.0/patches_external/BasicBlockUtils-Add-metadata-fixing-in-SplitBlockPre.patch b/external/llvm/releases/8.0.0/patches_external/BasicBlockUtils-Add-metadata-fixing-in-SplitBlockPre.patch new file mode 100644 index 000000000..b2324a994 --- /dev/null +++ b/external/llvm/releases/8.0.0/patches_external/BasicBlockUtils-Add-metadata-fixing-in-SplitBlockPre.patch @@ -0,0 +1,92 @@ +From 2e01cb0653db81ce9fd9cc3a8a1f997920cf1b69 Mon Sep 17 00:00:00 2001 +From: Aleksander Us +Date: Mon, 26 Aug 2019 15:47:41 +0300 +Subject: [PATCH] [BasicBlockUtils] Add metadata fixing in + SplitBlockPredecessors. + +In case when BB is header of some loop and predecessor is latch of +this loop, metadata was not attached to newly created basic block. +This led to loss of loop metadata for other passes. +--- + lib/Transforms/Utils/BasicBlockUtils.cpp | 17 +++++++-- + test/Transforms/LoopSimplify/loop_metadata.ll | 36 +++++++++++++++++++ + 2 files changed, 50 insertions(+), 3 deletions(-) + create mode 100644 test/Transforms/LoopSimplify/loop_metadata.ll + +diff --git a/lib/Transforms/Utils/BasicBlockUtils.cpp b/lib/Transforms/Utils/BasicBlockUtils.cpp +index 7da768252fc..c28682df298 100644 +--- a/lib/Transforms/Utils/BasicBlockUtils.cpp ++++ b/lib/Transforms/Utils/BasicBlockUtils.cpp +@@ -536,14 +536,24 @@ BasicBlock *llvm::SplitBlockPredecessors(BasicBlock *BB, + BranchInst *BI = BranchInst::Create(BB, NewBB); + BI->setDebugLoc(BB->getFirstNonPHIOrDbg()->getDebugLoc()); + ++ bool IsBBHeader = LI && LI->isLoopHeader(BB); ++ Loop *BBLoop = LI ? LI->getLoopFor(BB) : nullptr; + // Move the edges from Preds to point to NewBB instead of BB. +- for (unsigned i = 0, e = Preds.size(); i != e; ++i) { ++ for (BasicBlock *Pred : Preds) { ++ Instruction *PI = Pred->getTerminator(); + // This is slightly more strict than necessary; the minimum requirement + // is that there be no more than one indirectbr branching to BB. And + // all BlockAddress uses would need to be updated. +- assert(!isa(Preds[i]->getTerminator()) && ++ assert(!isa(PI) && + "Cannot split an edge from an IndirectBrInst"); +- Preds[i]->getTerminator()->replaceUsesOfWith(BB, NewBB); ++ if (IsBBHeader && BBLoop->contains(Pred) && BBLoop->isLoopLatch(Pred)) { ++ // Update loop metadata if it exists. ++ if (MDNode *LoopMD = PI->getMetadata(LLVMContext::MD_loop)) { ++ BI->setMetadata(LLVMContext::MD_loop, LoopMD); ++ PI->setMetadata(LLVMContext::MD_loop, nullptr); ++ } ++ } ++ PI->replaceUsesOfWith(BB, NewBB); + } + + // Insert a new PHI node into NewBB for every PHI node in BB and that new PHI +diff --git a/test/Transforms/LoopSimplify/loop_metadata.ll b/test/Transforms/LoopSimplify/loop_metadata.ll +new file mode 100644 +index 00000000000..c15c92fe3ae +--- /dev/null ++++ b/test/Transforms/LoopSimplify/loop_metadata.ll +@@ -0,0 +1,36 @@ ++; RUN: opt -S -loop-simplify < %s | FileCheck %s ++ ++; CHECK: for.cond.loopexit: ++; CHECK: br label %for.cond, !llvm.loop !0 ++; CHECK: br i1 %cmp1, label %for.body1, label %for.cond.loopexit ++ ++define void @foo() { ++entry: ++ br label %for.cond ++ ++for.cond: ; preds = %for.cond1, %entry ++ %j = phi i32 [ 0, %entry ], [ %add, %for.cond1 ] ++ %cmp = icmp ult i32 %j, 8 ++ br i1 %cmp, label %for.body, label %for.end ++ ++for.body: ; preds = %for.cond ++ %dummy1 = add i32 1, 1 ++ %add = add nuw nsw i32 %j, 1 ++ br label %for.cond1 ++ ++for.cond1: ; preds = %for.body1, %for.body ++ %i.0 = phi i32 [ 1, %for.body ], [ %inc, %for.body1 ] ++ %cmp1 = icmp ult i32 %i.0, 8 ++ br i1 %cmp1, label %for.body1, label %for.cond, !llvm.loop !0 ++ ++for.body1: ; preds = %for.cond1 ++ %dummy2 = add i32 1, 1 ++ %inc = add nuw nsw i32 %i.0, 1 ++ br label %for.cond1 ++ ++for.end: ; preds = %for.cond ++ ret void ++} ++ ++!0 = distinct !{!0, !1} ++!1 = !{!"llvm.loop.unroll.full"} +-- +2.18.0 + diff --git a/external/llvm/releases/8.0.0/patches_external/IndVarSimplify-Do-not-use-SCEV-expander-for-IVCount-.patch b/external/llvm/releases/8.0.0/patches_external/IndVarSimplify-Do-not-use-SCEV-expander-for-IVCount-.patch new file mode 100644 index 000000000..5da6f83dd --- /dev/null +++ b/external/llvm/releases/8.0.0/patches_external/IndVarSimplify-Do-not-use-SCEV-expander-for-IVCount-.patch @@ -0,0 +1,100 @@ +From a64f085d0f1ce0725d2ca896e32ad213515658a0 Mon Sep 17 00:00:00 2001 +From: Aleksander Us +Date: Mon, 26 Aug 2019 15:45:47 +0300 +Subject: [PATCH] [IndVarSimplify] Do not use SCEV expander for IVCount in + LFTR when possible. + +SCEV analysis cannot properly cache instruction with poison flags +(for example, add nsw outside of loop will not be reused by expander). +This can lead to generating of additional instructions by SCEV expander. + +Example IR: + + ... + %maxval = add nuw nsw i32 %a1, %a2 + ... +for.body: + ... + %cmp22 = icmp ult i32 %ivadd, %maxval + br i1 %cmp22, label %for.body, label %for.end + ... + +SCEV expander will generate copy of %maxval in preheader but without +nuw/nsw flags. This can be avoided by explicit check that iv count +value gives the same SCEV expressions as calculated by LFTR. +--- + lib/Transforms/Scalar/IndVarSimplify.cpp | 13 ++++++++++++- + test/Transforms/IndVarSimplify/add_nsw.ll | 23 +++++++++++++++++++++++ + test/Transforms/IndVarSimplify/udiv.ll | 1 + + 3 files changed, 36 insertions(+), 1 deletion(-) + create mode 100644 test/Transforms/IndVarSimplify/add_nsw.ll + +diff --git a/lib/Transforms/Scalar/IndVarSimplify.cpp b/lib/Transforms/Scalar/IndVarSimplify.cpp +index 48d8e457ba7..4c7b6b4bbf4 100644 +--- a/lib/Transforms/Scalar/IndVarSimplify.cpp ++++ b/lib/Transforms/Scalar/IndVarSimplify.cpp +@@ -2318,8 +2318,19 @@ static Value *genLoopLimit(PHINode *IndVar, const SCEV *IVCount, Loop *L, + + IVLimit = SE->getAddExpr(IVInit, IVCount); + } +- // Expand the code for the iteration count. ++ ++ // If computed limit is equal to old limit then do not use SCEV expander ++ // because it can lost NUW/NSW flags and create extra instructions. + BranchInst *BI = cast(L->getExitingBlock()->getTerminator()); ++ if (ICmpInst *Cmp = dyn_cast(BI->getOperand(0))) { ++ Value *Limit = Cmp->getOperand(0); ++ if (!L->isLoopInvariant(Limit)) ++ Limit = Cmp->getOperand(1); ++ if (SE->getSCEV(Limit) == IVLimit) ++ return Limit; ++ } ++ ++ // Expand the code for the iteration count. + IRBuilder<> Builder(BI); + assert(SE->isLoopInvariant(IVLimit, L) && + "Computed iteration count is not loop invariant!"); +diff --git a/test/Transforms/IndVarSimplify/add_nsw.ll b/test/Transforms/IndVarSimplify/add_nsw.ll +new file mode 100644 +index 00000000000..abd1cbb6c51 +--- /dev/null ++++ b/test/Transforms/IndVarSimplify/add_nsw.ll +@@ -0,0 +1,23 @@ ++; RUN: opt -indvars -S %s | FileCheck %s ++ ++target datalayout = "e-p:32:32-i64:64-n8:16:32" ++ ++; CHECK: for.body.preheader: ++; CHECK-NOT: add ++; CHECK: for.body: ++ ++define void @foo(i32 %a1, i32 %a2) { ++entry: ++ %maxval = add nuw nsw i32 %a1, %a2 ++ %cmp = icmp slt i32 %maxval, 1 ++ br i1 %cmp, label %for.end, label %for.body ++ ++for.body: ; preds = %entry, %for.body ++ %j.02 = phi i32 [ 0, %entry ], [ %add31, %for.body ] ++ %add31 = add nuw nsw i32 %j.02, 1 ++ %cmp22 = icmp slt i32 %add31, %maxval ++ br i1 %cmp22, label %for.body, label %for.end ++ ++for.end: ; preds = %for.body ++ ret void ++} +diff --git a/test/Transforms/IndVarSimplify/udiv.ll b/test/Transforms/IndVarSimplify/udiv.ll +index b3f2c2a6a66..3530343ef4a 100644 +--- a/test/Transforms/IndVarSimplify/udiv.ll ++++ b/test/Transforms/IndVarSimplify/udiv.ll +@@ -133,6 +133,7 @@ declare i32 @printf(i8* nocapture, ...) nounwind + ; CHECK-LABEL: @foo( + ; CHECK: for.body.preheader: + ; CHECK-NOT: udiv ++; CHECK: for.body: + + define void @foo(double* %p, i64 %n) nounwind { + entry: +-- +2.18.0 + diff --git a/external/llvm/releases/8.0.0/patches_external/Use-depth-limit-for-trunc-analysis.patch b/external/llvm/releases/8.0.0/patches_external/Use-depth-limit-for-trunc-analysis.patch new file mode 100644 index 000000000..35e313aea --- /dev/null +++ b/external/llvm/releases/8.0.0/patches_external/Use-depth-limit-for-trunc-analysis.patch @@ -0,0 +1,274 @@ +From 9ad02e3f95d0f98a800966b2df1f5d9eaf5ff038 Mon Sep 17 00:00:00 2001 +From: Teresa Johnson +Date: Tue, 12 Mar 2019 18:28:05 +0000 +Subject: [PATCH] Use depth limit for trunc analysis + +Summary: +This fixes an extremely long compile time caused by recursive analysis +of truncs, which were not previously subject to any depth limits unlike +some of the other ops. I decided to use the same control used for +sext/zext, since the routines analyzing these are sometimes mutually +recursive with the trunc analysis. + +Reviewers: mkazantsev, sanjoy + +Subscribers: sanjoy, jdoerfert, llvm-commits + +Tags: #llvm + +Differential Revision: https://reviews.llvm.org/D58994 + +llvm-svn: 355949 +--- + llvm/include/llvm/Analysis/ScalarEvolution.h | 8 ++- + llvm/lib/Analysis/ScalarEvolution.cpp | 65 ++++++++++--------- + .../Analysis/ScalarEvolution/limit-depth.ll | 30 ++++++++- + 3 files changed, 70 insertions(+), 33 deletions(-) + +diff --git a/include/llvm/Analysis/ScalarEvolution.h b/include/llvm/Analysis/ScalarEvolution.h +index 8f4200b07e5..044c369bba2 100644 +--- a/include/llvm/Analysis/ScalarEvolution.h ++++ b/include/llvm/Analysis/ScalarEvolution.h +@@ -521,7 +521,7 @@ public: + const SCEV *getConstant(ConstantInt *V); + const SCEV *getConstant(const APInt &Val); + const SCEV *getConstant(Type *Ty, uint64_t V, bool isSigned = false); +- const SCEV *getTruncateExpr(const SCEV *Op, Type *Ty); ++ const SCEV *getTruncateExpr(const SCEV *Op, Type *Ty, unsigned Depth = 0); + const SCEV *getZeroExtendExpr(const SCEV *Op, Type *Ty, unsigned Depth = 0); + const SCEV *getSignExtendExpr(const SCEV *Op, Type *Ty, unsigned Depth = 0); + const SCEV *getAnyExtendExpr(const SCEV *Op, Type *Ty); +@@ -619,11 +619,13 @@ public: + + /// Return a SCEV corresponding to a conversion of the input value to the + /// specified type. If the type must be extended, it is zero extended. +- const SCEV *getTruncateOrZeroExtend(const SCEV *V, Type *Ty); ++ const SCEV *getTruncateOrZeroExtend(const SCEV *V, Type *Ty, ++ unsigned Depth = 0); + + /// Return a SCEV corresponding to a conversion of the input value to the + /// specified type. If the type must be extended, it is sign extended. +- const SCEV *getTruncateOrSignExtend(const SCEV *V, Type *Ty); ++ const SCEV *getTruncateOrSignExtend(const SCEV *V, Type *Ty, ++ unsigned Depth = 0); + + /// Return a SCEV corresponding to a conversion of the input value to the + /// specified type. If the type must be extended, it is zero extended. The +diff --git a/lib/Analysis/ScalarEvolution.cpp b/lib/Analysis/ScalarEvolution.cpp +index e5134f2eeda..21ec498339d 100644 +--- a/lib/Analysis/ScalarEvolution.cpp ++++ b/lib/Analysis/ScalarEvolution.cpp +@@ -203,9 +203,9 @@ static cl::opt MaxConstantEvolvingDepth( + cl::desc("Maximum depth of recursive constant evolving"), cl::init(32)); + + static cl::opt +- MaxExtDepth("scalar-evolution-max-ext-depth", cl::Hidden, +- cl::desc("Maximum depth of recursive SExt/ZExt"), +- cl::init(8)); ++ MaxCastDepth("scalar-evolution-max-cast-depth", cl::Hidden, ++ cl::desc("Maximum depth of recursive SExt/ZExt/Trunc"), ++ cl::init(8)); + + static cl::opt + MaxAddRecSize("scalar-evolution-max-add-rec-size", cl::Hidden, +@@ -1219,8 +1219,8 @@ const SCEV *SCEVAddRecExpr::evaluateAtIteration(const SCEV *It, + // SCEV Expression folder implementations + //===----------------------------------------------------------------------===// + +-const SCEV *ScalarEvolution::getTruncateExpr(const SCEV *Op, +- Type *Ty) { ++const SCEV *ScalarEvolution::getTruncateExpr(const SCEV *Op, Type *Ty, ++ unsigned Depth) { + assert(getTypeSizeInBits(Op->getType()) > getTypeSizeInBits(Ty) && + "This is not a truncating conversion!"); + assert(isSCEVable(Ty) && +@@ -1241,15 +1241,23 @@ const SCEV *ScalarEvolution::getTruncateExpr(const SCEV *Op, + + // trunc(trunc(x)) --> trunc(x) + if (const SCEVTruncateExpr *ST = dyn_cast(Op)) +- return getTruncateExpr(ST->getOperand(), Ty); ++ return getTruncateExpr(ST->getOperand(), Ty, Depth + 1); + + // trunc(sext(x)) --> sext(x) if widening or trunc(x) if narrowing + if (const SCEVSignExtendExpr *SS = dyn_cast(Op)) +- return getTruncateOrSignExtend(SS->getOperand(), Ty); ++ return getTruncateOrSignExtend(SS->getOperand(), Ty, Depth + 1); + + // trunc(zext(x)) --> zext(x) if widening or trunc(x) if narrowing + if (const SCEVZeroExtendExpr *SZ = dyn_cast(Op)) +- return getTruncateOrZeroExtend(SZ->getOperand(), Ty); ++ return getTruncateOrZeroExtend(SZ->getOperand(), Ty, Depth + 1); ++ ++ if (Depth > MaxCastDepth) { ++ SCEV *S = ++ new (SCEVAllocator) SCEVTruncateExpr(ID.Intern(SCEVAllocator), Op, Ty); ++ UniqueSCEVs.InsertNode(S, IP); ++ addToLoopUseLists(S); ++ return S; ++ } + + // trunc(x1 + ... + xN) --> trunc(x1) + ... + trunc(xN) and + // trunc(x1 * ... * xN) --> trunc(x1) * ... * trunc(xN), +@@ -1261,7 +1269,7 @@ const SCEV *ScalarEvolution::getTruncateExpr(const SCEV *Op, + unsigned numTruncs = 0; + for (unsigned i = 0, e = CommOp->getNumOperands(); i != e && numTruncs < 2; + ++i) { +- const SCEV *S = getTruncateExpr(CommOp->getOperand(i), Ty); ++ const SCEV *S = getTruncateExpr(CommOp->getOperand(i), Ty, Depth + 1); + if (!isa(CommOp->getOperand(i)) && isa(S)) + numTruncs++; + Operands.push_back(S); +@@ -1285,7 +1293,7 @@ const SCEV *ScalarEvolution::getTruncateExpr(const SCEV *Op, + if (const SCEVAddRecExpr *AddRec = dyn_cast(Op)) { + SmallVector Operands; + for (const SCEV *Op : AddRec->operands()) +- Operands.push_back(getTruncateExpr(Op, Ty)); ++ Operands.push_back(getTruncateExpr(Op, Ty, Depth + 1)); + return getAddRecExpr(Operands, AddRec->getLoop(), SCEV::FlagAnyWrap); + } + +@@ -1619,7 +1627,7 @@ ScalarEvolution::getZeroExtendExpr(const SCEV *Op, Type *Ty, unsigned Depth) { + ID.AddPointer(Ty); + void *IP = nullptr; + if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S; +- if (Depth > MaxExtDepth) { ++ if (Depth > MaxCastDepth) { + SCEV *S = new (SCEVAllocator) SCEVZeroExtendExpr(ID.Intern(SCEVAllocator), + Op, Ty); + UniqueSCEVs.InsertNode(S, IP); +@@ -1637,7 +1645,7 @@ ScalarEvolution::getZeroExtendExpr(const SCEV *Op, Type *Ty, unsigned Depth) { + unsigned NewBits = getTypeSizeInBits(Ty); + if (CR.truncate(TruncBits).zeroExtend(NewBits).contains( + CR.zextOrTrunc(NewBits))) +- return getTruncateOrZeroExtend(X, Ty); ++ return getTruncateOrZeroExtend(X, Ty, Depth); + } + + // If the input value is a chrec scev, and we can prove that the value +@@ -1679,9 +1687,9 @@ ScalarEvolution::getZeroExtendExpr(const SCEV *Op, Type *Ty, unsigned Depth) { + // Check whether the backedge-taken count can be losslessly casted to + // the addrec's type. The count is always unsigned. + const SCEV *CastedMaxBECount = +- getTruncateOrZeroExtend(MaxBECount, Start->getType()); +- const SCEV *RecastedMaxBECount = +- getTruncateOrZeroExtend(CastedMaxBECount, MaxBECount->getType()); ++ getTruncateOrZeroExtend(MaxBECount, Start->getType(), Depth); ++ const SCEV *RecastedMaxBECount = getTruncateOrZeroExtend( ++ CastedMaxBECount, MaxBECount->getType(), Depth); + if (MaxBECount == RecastedMaxBECount) { + Type *WideTy = IntegerType::get(getContext(), BitWidth * 2); + // Check whether Start+Step*MaxBECount has no unsigned overflow. +@@ -1930,7 +1938,7 @@ ScalarEvolution::getSignExtendExpr(const SCEV *Op, Type *Ty, unsigned Depth) { + void *IP = nullptr; + if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S; + // Limit recursion depth. +- if (Depth > MaxExtDepth) { ++ if (Depth > MaxCastDepth) { + SCEV *S = new (SCEVAllocator) SCEVSignExtendExpr(ID.Intern(SCEVAllocator), + Op, Ty); + UniqueSCEVs.InsertNode(S, IP); +@@ -1948,7 +1956,7 @@ ScalarEvolution::getSignExtendExpr(const SCEV *Op, Type *Ty, unsigned Depth) { + unsigned NewBits = getTypeSizeInBits(Ty); + if (CR.truncate(TruncBits).signExtend(NewBits).contains( + CR.sextOrTrunc(NewBits))) +- return getTruncateOrSignExtend(X, Ty); ++ return getTruncateOrSignExtend(X, Ty, Depth); + } + + if (auto *SA = dyn_cast(Op)) { +@@ -2023,9 +2031,9 @@ ScalarEvolution::getSignExtendExpr(const SCEV *Op, Type *Ty, unsigned Depth) { + // Check whether the backedge-taken count can be losslessly casted to + // the addrec's type. The count is always unsigned. + const SCEV *CastedMaxBECount = +- getTruncateOrZeroExtend(MaxBECount, Start->getType()); +- const SCEV *RecastedMaxBECount = +- getTruncateOrZeroExtend(CastedMaxBECount, MaxBECount->getType()); ++ getTruncateOrZeroExtend(MaxBECount, Start->getType(), Depth); ++ const SCEV *RecastedMaxBECount = getTruncateOrZeroExtend( ++ CastedMaxBECount, MaxBECount->getType(), Depth); + if (MaxBECount == RecastedMaxBECount) { + Type *WideTy = IntegerType::get(getContext(), BitWidth * 2); + // Check whether Start+Step*MaxBECount has no signed overflow. +@@ -4022,29 +4030,28 @@ const SCEV *ScalarEvolution::getMinusSCEV(const SCEV *LHS, const SCEV *RHS, + return getAddExpr(LHS, getNegativeSCEV(RHS, NegFlags), AddFlags, Depth); + } + +-const SCEV * +-ScalarEvolution::getTruncateOrZeroExtend(const SCEV *V, Type *Ty) { ++const SCEV *ScalarEvolution::getTruncateOrZeroExtend(const SCEV *V, Type *Ty, ++ unsigned Depth) { + Type *SrcTy = V->getType(); + assert(SrcTy->isIntOrPtrTy() && Ty->isIntOrPtrTy() && + "Cannot truncate or zero extend with non-integer arguments!"); + if (getTypeSizeInBits(SrcTy) == getTypeSizeInBits(Ty)) + return V; // No conversion + if (getTypeSizeInBits(SrcTy) > getTypeSizeInBits(Ty)) +- return getTruncateExpr(V, Ty); +- return getZeroExtendExpr(V, Ty); ++ return getTruncateExpr(V, Ty, Depth); ++ return getZeroExtendExpr(V, Ty, Depth); + } + +-const SCEV * +-ScalarEvolution::getTruncateOrSignExtend(const SCEV *V, +- Type *Ty) { ++const SCEV *ScalarEvolution::getTruncateOrSignExtend(const SCEV *V, Type *Ty, ++ unsigned Depth) { + Type *SrcTy = V->getType(); + assert(SrcTy->isIntOrPtrTy() && Ty->isIntOrPtrTy() && + "Cannot truncate or zero extend with non-integer arguments!"); + if (getTypeSizeInBits(SrcTy) == getTypeSizeInBits(Ty)) + return V; // No conversion + if (getTypeSizeInBits(SrcTy) > getTypeSizeInBits(Ty)) +- return getTruncateExpr(V, Ty); +- return getSignExtendExpr(V, Ty); ++ return getTruncateExpr(V, Ty, Depth); ++ return getSignExtendExpr(V, Ty, Depth); + } + + const SCEV * +diff --git a/test/Analysis/ScalarEvolution/limit-depth.ll b/test/Analysis/ScalarEvolution/limit-depth.ll +index f4154130233..6fdf8c5df97 100644 +--- a/test/Analysis/ScalarEvolution/limit-depth.ll ++++ b/test/Analysis/ScalarEvolution/limit-depth.ll +@@ -1,4 +1,4 @@ +-; RUN: opt -scalar-evolution-max-arith-depth=0 -scalar-evolution-max-ext-depth=0 -analyze -scalar-evolution < %s | FileCheck %s ++; RUN: opt -scalar-evolution-max-arith-depth=0 -scalar-evolution-max-cast-depth=0 -analyze -scalar-evolution < %s | FileCheck %s + + ; Check that depth set to 0 prevents getAddExpr and getMulExpr from making + ; transformations in SCEV. We expect the result to be very straightforward. +@@ -98,3 +98,31 @@ exit: + %ze2 = zext i64 %iv2.inc to i128 + ret void + } ++ ++define void @test_trunc(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e, i32 %f) { ++; CHECK-LABEL: @test_trunc ++; CHECK: %trunc2 = trunc i64 %iv2.inc to i32 ++; CHECK-NEXT: --> {(trunc i64 (1 + {7,+,1}<%loop>) to i32),+,1}<%loop2> ++entry: ++ br label %loop ++ ++loop: ++ %iv = phi i128 [ 6, %entry ], [ %iv.inc, %loop ] ++ %iv.inc = add nsw i128 %iv, 1 ++ %cond = icmp sle i128 %iv.inc, 50 ++ br i1 %cond, label %loop, label %between ++ ++between: ++ %trunc = trunc i128 %iv.inc to i64 ++ br label %loop2 ++ ++loop2: ++ %iv2 = phi i64 [ %trunc, %between ], [ %iv2.inc, %loop2 ] ++ %iv2.inc = add nuw i64 %iv2, 1 ++ %cond2 = icmp sle i64 %iv2.inc, 50 ++ br i1 %cond2, label %loop2, label %exit ++ ++exit: ++ %trunc2 = trunc i64 %iv2.inc to i32 ++ ret void ++} +-- +2.18.0 + diff --git a/external/llvm/releases/8.0.0/patches_external/add_win_crt_info.patch b/external/llvm/releases/8.0.0/patches_external/add_win_crt_info.patch new file mode 100644 index 000000000..b159deee3 --- /dev/null +++ b/external/llvm/releases/8.0.0/patches_external/add_win_crt_info.patch @@ -0,0 +1,17 @@ +# Description : Add possibility to use ChooseMSVCCRT-script, when +# include LLVM library. + +--- a/cmake/modules/LLVMConfig.cmake.in ++++ b/cmake/modules/LLVMConfig.cmake.in +@@ -10,6 +10,11 @@ set(LLVM_PACKAGE_VERSION @PACKAGE_VERSION@) + + set(LLVM_BUILD_TYPE @CMAKE_BUILD_TYPE@) + ++set(LLVM_USE_CRT_DEBUG @LLVM_USE_CRT_DEBUG@) ++set(LLVM_USE_CRT_MINSIZEREL @LLVM_USE_CRT_MINSIZEREL@) ++set(LLVM_USE_CRT_RELEASE @LLVM_USE_CRT_RELEASE@) ++set(LLVM_USE_CRT_RELWITHDEBINFO @LLVM_USE_CRT_RELWITHDEBINFO@) ++ + set(LLVM_COMMON_DEPENDS @LLVM_COMMON_DEPENDS@) + + set(LLVM_AVAILABLE_LIBS @LLVM_AVAILABLE_LIBS@) diff --git a/external/llvm/releases/8.0.0/patches_external/export-utility-to-targets-build-install.patch b/external/llvm/releases/8.0.0/patches_external/export-utility-to-targets-build-install.patch new file mode 100644 index 000000000..01836e0a0 --- /dev/null +++ b/external/llvm/releases/8.0.0/patches_external/export-utility-to-targets-build-install.patch @@ -0,0 +1,65 @@ +From 4ce5f86eddc0bf42a971db4509829036d5b17b92 Mon Sep 17 00:00:00 2001 +From: Stefan Granitz +Date: Fri, 1 Feb 2019 13:08:09 +0000 +Subject: [PATCH] [CMake] Add install targets for utilities to LLVM exports if + LLVM_INSTALL_UTILS=ON + +Summary: D56606 was only appending target names to the `LLVM_EXPORTS`/`LLVM_EXPORTS_BUILDTREE_ONLY` properties. Targets showed up correctly in the build-tree `LLVMExports.cmake`, but they were missing in the installed one (as we found in https://bugs.llvm.org/show_bug.cgi?id=40443), because install did not register them explicitly. + +Reviewers: mgorny, smeenai, beanz, gottesmm, dschuff, tstellar, serge-sans-paille + +Reviewed By: smeenai + +Subscribers: llvm-commits + +Differential Revision: https://reviews.llvm.org/D57383 + +git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@352869 91177308-0d34-0410-b5e6-96231b3b80d8 +--- + cmake/modules/AddLLVM.cmake | 32 +++++++++++++++++++++----------- + 1 file changed, 21 insertions(+), 11 deletions(-) + +diff --git a/cmake/modules/AddLLVM.cmake b/cmake/modules/AddLLVM.cmake +index 39781bf34e24..607d6e682b49 100644 +--- a/cmake/modules/AddLLVM.cmake ++++ b/cmake/modules/AddLLVM.cmake +@@ -911,18 +911,28 @@ macro(add_llvm_utility name) + + add_llvm_executable(${name} DISABLE_LLVM_LINK_LLVM_DYLIB ${ARGN}) + set_target_properties(${name} PROPERTIES FOLDER "Utils") +- if( LLVM_INSTALL_UTILS AND LLVM_BUILD_UTILS ) +- install (TARGETS ${name} +- RUNTIME DESTINATION ${LLVM_UTILS_INSTALL_DIR} +- COMPONENT ${name}) +- if (NOT LLVM_ENABLE_IDE) +- add_llvm_install_targets(install-${name} +- DEPENDS ${name} +- COMPONENT ${name}) ++ if (NOT LLVM_INSTALL_TOOLCHAIN_ONLY) ++ if (LLVM_INSTALL_UTILS AND LLVM_BUILD_UTILS) ++ if (${name} IN_LIST LLVM_DISTRIBUTION_COMPONENTS OR ++ NOT LLVM_DISTRIBUTION_COMPONENTS) ++ set(export_to_llvmexports EXPORT LLVMExports) ++ set_property(GLOBAL PROPERTY LLVM_HAS_EXPORTS True) ++ endif() ++ ++ install(TARGETS ${name} ++ ${export_to_llvmexports} ++ RUNTIME DESTINATION ${LLVM_UTILS_INSTALL_DIR} ++ COMPONENT ${name}) ++ ++ if (NOT LLVM_ENABLE_IDE) ++ add_llvm_install_targets(install-${name} ++ DEPENDS ${name} ++ COMPONENT ${name}) ++ endif() ++ set_property(GLOBAL APPEND PROPERTY LLVM_EXPORTS ${name}) ++ elseif(LLVM_BUILD_UTILS) ++ set_property(GLOBAL APPEND PROPERTY LLVM_EXPORTS_BUILDTREE_ONLY ${name}) + endif() +- set_property(GLOBAL APPEND PROPERTY LLVM_EXPORTS ${name}) +- elseif( LLVM_BUILD_UTILS ) +- set_property(GLOBAL APPEND PROPERTY LLVM_EXPORTS_BUILDTREE_ONLY ${name}) + endif() + endmacro(add_llvm_utility name) + \ No newline at end of file diff --git a/external/llvm/releases/8.0.0/patches_external/fix_cast_and_phi_processing.patch b/external/llvm/releases/8.0.0/patches_external/fix_cast_and_phi_processing.patch new file mode 100644 index 000000000..410f1e357 --- /dev/null +++ b/external/llvm/releases/8.0.0/patches_external/fix_cast_and_phi_processing.patch @@ -0,0 +1,66 @@ +diff -Naur a/lib/Transforms/InstCombine/InstCombineCasts.cpp b/lib/Transforms/InstCombine/InstCombineCasts.cpp +--- a/lib/Transforms/InstCombine/InstCombineCasts.cpp 2019-02-01 15:13:36.350506800 +0100 ++++ b/lib/Transforms/InstCombine/InstCombineCasts.cpp 2019-02-01 15:28:02.040172100 +0100 +@@ -2167,7 +2167,7 @@ + SmallSetVector OldPhiNodes; + + // Find all of the A->B casts and PHI nodes. +- // We need to inpect all related PHI nodes, but PHIs can be cyclic, so ++ // We need to inspect all related PHI nodes, but PHIs can be cyclic, so + // OldPhiNodes is used to track all known PHI nodes, before adding a new + // PHI to PhiWorklist, it is checked against and added to OldPhiNodes first. + PhiWorklist.push_back(PN); +@@ -2242,20 +2242,43 @@ + } + } + ++ // Traverse all accumulated PHI nodes and process its users, ++ // which are Stores and BitcCasts. Without this processing ++ // NewPHI nodes could be replicated and could lead to extra ++ // moves generated after DeSSA. + // If there is a store with type B, change it to type A. +- for (User *U : PN->users()) { +- auto *SI = dyn_cast(U); +- if (SI && SI->isSimple() && SI->getOperand(0) == PN) { +- Builder.SetInsertPoint(SI); +- auto *NewBC = +- cast(Builder.CreateBitCast(NewPNodes[PN], SrcTy)); +- SI->setOperand(0, NewBC); +- Worklist.Add(SI); +- assert(hasStoreUsersOnly(*NewBC)); ++ ++ ++ // Replace users of BitCast B->A with NewPHI. These will help ++ // later to get rid off a closure formed by OldPHI nodes. ++ Instruction *RetVal = nullptr; ++ for (auto *OldPN : OldPhiNodes) { ++ PHINode *NewPN = NewPNodes[OldPN]; ++ for (User *V : OldPN->users()) { ++ if (auto *SI = dyn_cast(V)) { ++ if (SI->isSimple() && SI->getOperand(0) == OldPN) { ++ Builder.SetInsertPoint(SI); ++ auto *NewBC = ++ cast(Builder.CreateBitCast(NewPN, SrcTy)); ++ SI->setOperand(0, NewBC); ++ Worklist.Add(SI); ++ assert(hasStoreUsersOnly(*NewBC)); ++ } ++ } ++ else if (auto *BCI = dyn_cast(V)) { ++ // Verify it's a B->A cast. ++ Type *TyB = BCI->getOperand(0)->getType(); ++ Type *TyA = BCI->getType(); ++ if (TyA == DestTy && TyB == SrcTy) { ++ Instruction *I = replaceInstUsesWith(*BCI, NewPN); ++ if (BCI == &CI) ++ RetVal = I; ++ } ++ } + } + } + +- return replaceInstUsesWith(CI, NewPNodes[PN]); ++ return RetVal; + } + + Instruction *InstCombiner::visitBitCast(BitCastInst &CI) { diff --git a/external/llvm/releases/8.0.0/patches_external/fix_for_typo_regex.patch b/external/llvm/releases/8.0.0/patches_external/fix_for_typo_regex.patch new file mode 100644 index 000000000..12d4c7e6c --- /dev/null +++ b/external/llvm/releases/8.0.0/patches_external/fix_for_typo_regex.patch @@ -0,0 +1,13 @@ +--- a/tools/llvm-config/CMakeLists.txt ++++ b/tools/llvm-config/CMakeLists.txt +@@ -34,8 +34,8 @@ get_property(COMPILE_FLAGS TARGET llvm-config PROPERTY COMPILE_FLAGS) + # NOTE: We don't want to start extracting any random C/CXX flags that the + # user may add that could affect the ABI. We only want to extract flags + # that have been added by the LLVM build system. +-string(REGEX MATCH "-std=[^ ]\+" LLVM_CXX_STD_FLAG ${CMAKE_CXX_FLAGS}) +-string(REGEX MATCH "-std=[^ ]\+" LLVM_C_STD_FLAG ${CMAKE_C_FLAGS}) ++string(REGEX MATCH "-std=[^ ]\\+" LLVM_CXX_STD_FLAG ${CMAKE_CXX_FLAGS}) ++string(REGEX MATCH "-std=[^ ]\\+" LLVM_C_STD_FLAG ${CMAKE_C_FLAGS}) + + # Use configure_file to create BuildVariables.inc. + set(LLVM_SRC_ROOT ${LLVM_MAIN_SRC_DIR}) diff --git a/external/llvm/releases/9.0.0/patches_external/BasicBlockUtils-Add-metadata-fixing-in-SplitBlockPre.patch b/external/llvm/releases/9.0.0/patches_external/BasicBlockUtils-Add-metadata-fixing-in-SplitBlockPre.patch new file mode 100644 index 000000000..15c3b57cf --- /dev/null +++ b/external/llvm/releases/9.0.0/patches_external/BasicBlockUtils-Add-metadata-fixing-in-SplitBlockPre.patch @@ -0,0 +1,105 @@ +From eeb816d95f0910bd246e37bb2bb3923acf0edf6b Mon Sep 17 00:00:00 2001 +From: Aleksander Us +Date: Mon, 26 Aug 2019 15:47:41 +0300 +Subject: [PATCH] [BasicBlockUtils] Add metadata fixing in + SplitBlockPredecessors. + +In case when BB is header of some loop and predecessor is latch of +this loop, metadata was not attached to newly created basic block. +This led to loss of loop metadata for other passes. +--- + lib/Transforms/Utils/BasicBlockUtils.cpp | 23 ++++++++---- + test/Transforms/LoopSimplify/loop_metadata.ll | 36 +++++++++++++++++++ + 2 files changed, 52 insertions(+), 7 deletions(-) + create mode 100644 test/Transforms/LoopSimplify/loop_metadata.ll + +diff --git a/lib/Transforms/Utils/BasicBlockUtils.cpp b/lib/Transforms/Utils/BasicBlockUtils.cpp +index 5fa371377c8..3a90ae061fb 100644 +--- a/lib/Transforms/Utils/BasicBlockUtils.cpp ++++ b/lib/Transforms/Utils/BasicBlockUtils.cpp +@@ -579,24 +579,33 @@ BasicBlock *llvm::SplitBlockPredecessors(BasicBlock *BB, + + // The new block unconditionally branches to the old block. + BranchInst *BI = BranchInst::Create(BB, NewBB); ++ bool IsBBHeader = LI && LI->isLoopHeader(BB); ++ Loop *BBLoop = LI ? LI->getLoopFor(BB) : nullptr; + // Splitting the predecessors of a loop header creates a preheader block. +- if (LI && LI->isLoopHeader(BB)) ++ if (IsBBHeader) + // Using the loop start line number prevents debuggers stepping into the + // loop body for this instruction. +- BI->setDebugLoc(LI->getLoopFor(BB)->getStartLoc()); ++ BI->setDebugLoc(BBLoop->getStartLoc()); + else + BI->setDebugLoc(BB->getFirstNonPHIOrDbg()->getDebugLoc()); + + // Move the edges from Preds to point to NewBB instead of BB. +- for (unsigned i = 0, e = Preds.size(); i != e; ++i) { ++ for (BasicBlock *Pred : Preds) { ++ Instruction *PI = Pred->getTerminator(); + // This is slightly more strict than necessary; the minimum requirement + // is that there be no more than one indirectbr branching to BB. And + // all BlockAddress uses would need to be updated. +- assert(!isa(Preds[i]->getTerminator()) && ++ assert(!isa(PI) && + "Cannot split an edge from an IndirectBrInst"); +- assert(!isa(Preds[i]->getTerminator()) && +- "Cannot split an edge from a CallBrInst"); +- Preds[i]->getTerminator()->replaceUsesOfWith(BB, NewBB); ++ assert(!isa(PI) && "Cannot split an edge from a CallBrInst"); ++ if (IsBBHeader && BBLoop->contains(Pred) && BBLoop->isLoopLatch(Pred)) { ++ // Update loop metadata if it exists. ++ if (MDNode *LoopMD = PI->getMetadata(LLVMContext::MD_loop)) { ++ BI->setMetadata(LLVMContext::MD_loop, LoopMD); ++ PI->setMetadata(LLVMContext::MD_loop, nullptr); ++ } ++ } ++ PI->replaceUsesOfWith(BB, NewBB); + } + + // Insert a new PHI node into NewBB for every PHI node in BB and that new PHI +diff --git a/test/Transforms/LoopSimplify/loop_metadata.ll b/test/Transforms/LoopSimplify/loop_metadata.ll +new file mode 100644 +index 00000000000..c15c92fe3ae +--- /dev/null ++++ b/test/Transforms/LoopSimplify/loop_metadata.ll +@@ -0,0 +1,36 @@ ++; RUN: opt -S -loop-simplify < %s | FileCheck %s ++ ++; CHECK: for.cond.loopexit: ++; CHECK: br label %for.cond, !llvm.loop !0 ++; CHECK: br i1 %cmp1, label %for.body1, label %for.cond.loopexit ++ ++define void @foo() { ++entry: ++ br label %for.cond ++ ++for.cond: ; preds = %for.cond1, %entry ++ %j = phi i32 [ 0, %entry ], [ %add, %for.cond1 ] ++ %cmp = icmp ult i32 %j, 8 ++ br i1 %cmp, label %for.body, label %for.end ++ ++for.body: ; preds = %for.cond ++ %dummy1 = add i32 1, 1 ++ %add = add nuw nsw i32 %j, 1 ++ br label %for.cond1 ++ ++for.cond1: ; preds = %for.body1, %for.body ++ %i.0 = phi i32 [ 1, %for.body ], [ %inc, %for.body1 ] ++ %cmp1 = icmp ult i32 %i.0, 8 ++ br i1 %cmp1, label %for.body1, label %for.cond, !llvm.loop !0 ++ ++for.body1: ; preds = %for.cond1 ++ %dummy2 = add i32 1, 1 ++ %inc = add nuw nsw i32 %i.0, 1 ++ br label %for.cond1 ++ ++for.end: ; preds = %for.cond ++ ret void ++} ++ ++!0 = distinct !{!0, !1} ++!1 = !{!"llvm.loop.unroll.full"} +-- +2.18.0 + diff --git a/external/llvm/releases/9.0.0/patches_external/IndVarSimplify-Do-not-use-SCEV-expander-for-IVCount-.patch b/external/llvm/releases/9.0.0/patches_external/IndVarSimplify-Do-not-use-SCEV-expander-for-IVCount-.patch new file mode 100644 index 000000000..398c3a6ee --- /dev/null +++ b/external/llvm/releases/9.0.0/patches_external/IndVarSimplify-Do-not-use-SCEV-expander-for-IVCount-.patch @@ -0,0 +1,140 @@ +From 35e218a886f4c066eabd18685240d55270bd5a6d Mon Sep 17 00:00:00 2001 +From: Aleksander Us +Date: Mon, 26 Aug 2019 15:45:47 +0300 +Subject: [PATCH] [IndVarSimplify] Do not use SCEV expander for IVCount in + LFTR when possible. + +SCEV analysis cannot properly cache instruction with poison flags +(for example, add nsw outside of loop will not be reused by expander). +This can lead to generating of additional instructions by SCEV expander. + +Example IR: + + ... + %maxval = add nuw nsw i32 %a1, %a2 + ... +for.body: + ... + %cmp22 = icmp ult i32 %ivadd, %maxval + br i1 %cmp22, label %for.body, label %for.end + ... + +SCEV expander will generate copy of %maxval in preheader but without +nuw/nsw flags. This can be avoided by explicit check that iv count +value gives the same SCEV expressions as calculated by LFTR. +--- + lib/Transforms/Scalar/IndVarSimplify.cpp | 12 +++++++++- + test/Transforms/IndVarSimplify/add_nsw.ll | 23 ++++++++++++++++++++ + test/Transforms/IndVarSimplify/lftr-reuse.ll | 9 +++----- + test/Transforms/IndVarSimplify/udiv.ll | 1 + + 4 files changed, 38 insertions(+), 7 deletions(-) + create mode 100644 test/Transforms/IndVarSimplify/add_nsw.ll + +diff --git a/lib/Transforms/Scalar/IndVarSimplify.cpp b/lib/Transforms/Scalar/IndVarSimplify.cpp +index f9fc698a4a9..5e04dac8aa6 100644 +--- a/lib/Transforms/Scalar/IndVarSimplify.cpp ++++ b/lib/Transforms/Scalar/IndVarSimplify.cpp +@@ -2375,6 +2375,17 @@ static Value *genLoopLimit(PHINode *IndVar, BasicBlock *ExitingBB, + if (UsePostInc) + IVLimit = SE->getAddExpr(IVLimit, SE->getOne(IVLimit->getType())); + ++ // If computed limit is equal to old limit then do not use SCEV expander ++ // because it can lost NUW/NSW flags and create extra instructions. ++ BranchInst *BI = cast(ExitingBB->getTerminator()); ++ if (ICmpInst *Cmp = dyn_cast(BI->getOperand(0))) { ++ Value *Limit = Cmp->getOperand(0); ++ if (!L->isLoopInvariant(Limit)) ++ Limit = Cmp->getOperand(1); ++ if (SE->getSCEV(Limit) == IVLimit) ++ return Limit; ++ } ++ + // Expand the code for the iteration count. + assert(SE->isLoopInvariant(IVLimit, L) && + "Computed iteration count is not loop invariant!"); +@@ -2383,7 +2394,6 @@ static Value *genLoopLimit(PHINode *IndVar, BasicBlock *ExitingBB, + // SCEV expression (IVInit) for a pointer type IV value (IndVar). + Type *LimitTy = ExitCount->getType()->isPointerTy() ? + IndVar->getType() : ExitCount->getType(); +- BranchInst *BI = cast(ExitingBB->getTerminator()); + return Rewriter.expandCodeFor(IVLimit, LimitTy, BI); + } + } +diff --git a/test/Transforms/IndVarSimplify/add_nsw.ll b/test/Transforms/IndVarSimplify/add_nsw.ll +new file mode 100644 +index 00000000000..abd1cbb6c51 +--- /dev/null ++++ b/test/Transforms/IndVarSimplify/add_nsw.ll +@@ -0,0 +1,23 @@ ++; RUN: opt -indvars -S %s | FileCheck %s ++ ++target datalayout = "e-p:32:32-i64:64-n8:16:32" ++ ++; CHECK: for.body.preheader: ++; CHECK-NOT: add ++; CHECK: for.body: ++ ++define void @foo(i32 %a1, i32 %a2) { ++entry: ++ %maxval = add nuw nsw i32 %a1, %a2 ++ %cmp = icmp slt i32 %maxval, 1 ++ br i1 %cmp, label %for.end, label %for.body ++ ++for.body: ; preds = %entry, %for.body ++ %j.02 = phi i32 [ 0, %entry ], [ %add31, %for.body ] ++ %add31 = add nuw nsw i32 %j.02, 1 ++ %cmp22 = icmp slt i32 %add31, %maxval ++ br i1 %cmp22, label %for.body, label %for.end ++ ++for.end: ; preds = %for.body ++ ret void ++} +diff --git a/test/Transforms/IndVarSimplify/lftr-reuse.ll b/test/Transforms/IndVarSimplify/lftr-reuse.ll +index 14ae9738696..509d662b767 100644 +--- a/test/Transforms/IndVarSimplify/lftr-reuse.ll ++++ b/test/Transforms/IndVarSimplify/lftr-reuse.ll +@@ -67,11 +67,9 @@ define void @expandOuterRecurrence(i32 %arg) nounwind { + ; CHECK-NEXT: [[CMP1:%.*]] = icmp slt i32 0, [[SUB1]] + ; CHECK-NEXT: br i1 [[CMP1]], label [[OUTER_PREHEADER:%.*]], label [[EXIT:%.*]] + ; CHECK: outer.preheader: +-; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[ARG]], -1 + ; CHECK-NEXT: br label [[OUTER:%.*]] + ; CHECK: outer: +-; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i32 [ [[TMP0]], [[OUTER_PREHEADER]] ], [ [[INDVARS_IV_NEXT:%.*]], [[OUTER_INC:%.*]] ] +-; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[I_INC:%.*]], [[OUTER_INC]] ], [ 0, [[OUTER_PREHEADER]] ] ++; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[I_INC:%.*]], [[OUTER_INC:%.*]] ], [ 0, [[OUTER_PREHEADER]] ] + ; CHECK-NEXT: [[SUB2:%.*]] = sub nsw i32 [[ARG]], [[I]] + ; CHECK-NEXT: [[SUB3:%.*]] = sub nsw i32 [[SUB2]], 1 + ; CHECK-NEXT: [[CMP2:%.*]] = icmp slt i32 0, [[SUB3]] +@@ -81,14 +79,13 @@ define void @expandOuterRecurrence(i32 %arg) nounwind { + ; CHECK: inner: + ; CHECK-NEXT: [[J:%.*]] = phi i32 [ 0, [[INNER_PH]] ], [ [[J_INC:%.*]], [[INNER]] ] + ; CHECK-NEXT: [[J_INC]] = add nuw nsw i32 [[J]], 1 +-; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i32 [[J_INC]], [[INDVARS_IV]] ++; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i32 [[J_INC]], [[SUB3]] + ; CHECK-NEXT: br i1 [[EXITCOND]], label [[INNER]], label [[OUTER_INC_LOOPEXIT:%.*]] + ; CHECK: outer.inc.loopexit: + ; CHECK-NEXT: br label [[OUTER_INC]] + ; CHECK: outer.inc: + ; CHECK-NEXT: [[I_INC]] = add nuw nsw i32 [[I]], 1 +-; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add i32 [[INDVARS_IV]], -1 +-; CHECK-NEXT: [[EXITCOND1:%.*]] = icmp ne i32 [[I_INC]], [[TMP0]] ++; CHECK-NEXT: [[EXITCOND1:%.*]] = icmp ne i32 [[I_INC]], [[SUB1]] + ; CHECK-NEXT: br i1 [[EXITCOND1]], label [[OUTER]], label [[EXIT_LOOPEXIT:%.*]] + ; CHECK: exit.loopexit: + ; CHECK-NEXT: br label [[EXIT]] +diff --git a/test/Transforms/IndVarSimplify/udiv.ll b/test/Transforms/IndVarSimplify/udiv.ll +index b3f2c2a6a66..3530343ef4a 100644 +--- a/test/Transforms/IndVarSimplify/udiv.ll ++++ b/test/Transforms/IndVarSimplify/udiv.ll +@@ -133,6 +133,7 @@ declare i32 @printf(i8* nocapture, ...) nounwind + ; CHECK-LABEL: @foo( + ; CHECK: for.body.preheader: + ; CHECK-NOT: udiv ++; CHECK: for.body: + + define void @foo(double* %p, i64 %n) nounwind { + entry: +-- +2.18.0 + diff --git a/external/llvm/releases/9.0.0/patches_external/add_win_crt_info.patch b/external/llvm/releases/9.0.0/patches_external/add_win_crt_info.patch new file mode 100644 index 000000000..b159deee3 --- /dev/null +++ b/external/llvm/releases/9.0.0/patches_external/add_win_crt_info.patch @@ -0,0 +1,17 @@ +# Description : Add possibility to use ChooseMSVCCRT-script, when +# include LLVM library. + +--- a/cmake/modules/LLVMConfig.cmake.in ++++ b/cmake/modules/LLVMConfig.cmake.in +@@ -10,6 +10,11 @@ set(LLVM_PACKAGE_VERSION @PACKAGE_VERSION@) + + set(LLVM_BUILD_TYPE @CMAKE_BUILD_TYPE@) + ++set(LLVM_USE_CRT_DEBUG @LLVM_USE_CRT_DEBUG@) ++set(LLVM_USE_CRT_MINSIZEREL @LLVM_USE_CRT_MINSIZEREL@) ++set(LLVM_USE_CRT_RELEASE @LLVM_USE_CRT_RELEASE@) ++set(LLVM_USE_CRT_RELWITHDEBINFO @LLVM_USE_CRT_RELWITHDEBINFO@) ++ + set(LLVM_COMMON_DEPENDS @LLVM_COMMON_DEPENDS@) + + set(LLVM_AVAILABLE_LIBS @LLVM_AVAILABLE_LIBS@) diff --git a/external/llvm/releases/9.0.0/patches_external/fix_for_llvm-as_buildbreak.patch b/external/llvm/releases/9.0.0/patches_external/fix_for_llvm-as_buildbreak.patch new file mode 100644 index 000000000..09f9ff825 --- /dev/null +++ b/external/llvm/releases/9.0.0/patches_external/fix_for_llvm-as_buildbreak.patch @@ -0,0 +1,11 @@ +# Description : Fix build break for building llvm-as + +--- a/tools/llvm-as/CMakeLists.txt ++++ b/tools/llvm-as/CMakeLists.txt +@@ -2,6 +2,7 @@ set(LLVM_LINK_COMPONENTS + AsmParser + BitWriter + Core ++ Demangle + Support + ) diff --git a/external/llvm/releases/9.0.0/patches_external/fix_for_llvm-link_buildbreak.patch b/external/llvm/releases/9.0.0/patches_external/fix_for_llvm-link_buildbreak.patch new file mode 100644 index 000000000..ef298ad57 --- /dev/null +++ b/external/llvm/releases/9.0.0/patches_external/fix_for_llvm-link_buildbreak.patch @@ -0,0 +1,12 @@ +# Description : Fix build break for building llvm-link + +--- a/tools/llvm-link/CMakeLists.txt ++++ b/tools/llvm-link/CMakeLists.txt +@@ -8,6 +8,7 @@ set(LLVM_LINK_COMPONENTS + Support + TransformUtils + IPO ++ Demangle + ) + + add_llvm_tool(llvm-link diff --git a/external/llvm/releases/9.0.0/patches_external/fix_for_opt_buildbreak.patch b/external/llvm/releases/9.0.0/patches_external/fix_for_opt_buildbreak.patch new file mode 100644 index 000000000..e0174084a --- /dev/null +++ b/external/llvm/releases/9.0.0/patches_external/fix_for_opt_buildbreak.patch @@ -0,0 +1,12 @@ +# Description : Fix build break for building opt + +--- a/tools/opt/CMakeLists.txt ++++ b/tools/opt/CMakeLists.txt +@@ -9,6 +9,7 @@ set(LLVM_LINK_COMPONENTS + CodeGen + Core + Coroutines ++ Demangle + IPO + IRReader + InstCombine