From ef8b3b5ffd562955fbb78c72c7820cd9cd99d313 Mon Sep 17 00:00:00 2001 From: "Joel E. Denny" Date: Fri, 19 Feb 2021 10:59:52 -0500 Subject: [PATCH] [OpenMP] Fix nvptx CUDA_VERSION conversion As mentioned in PR#49250, without this patch, ptxas for CUDA 9.1 fails in the following two tests: - openmp/libomptarget/test/mapping/lambda_mapping.cpp - openmp/libomptarget/test/offloading/bug49021.cpp The error looks like: ``` ptxas /tmp/lambda_mapping-081ea9.s, line 828; error : Not a name of any known instruction: 'activemask' ``` The problem is that our cmake script converts CUDA version strings incorrectly: 9.1 becomes 9100, but it should be 9010, as shown in `getCudaVersion` in `clang/lib/Driver/ToolChains/Cuda.cpp`. Thus, `openmp/libomptarget/deviceRTLs/nvptx/src/target_impl.cu` inadvertently enables `activemask` because it apparently becomes available in 9.2. This patch fixes the conversion. This patch does not fix the other two tests in PR#49250. Reviewed By: tianshilei1992 Differential Revision: https://reviews.llvm.org/D97012 --- openmp/libomptarget/deviceRTLs/nvptx/CMakeLists.txt | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/openmp/libomptarget/deviceRTLs/nvptx/CMakeLists.txt b/openmp/libomptarget/deviceRTLs/nvptx/CMakeLists.txt index 5478cd3f6aea..1e64e7745582 100644 --- a/openmp/libomptarget/deviceRTLs/nvptx/CMakeLists.txt +++ b/openmp/libomptarget/deviceRTLs/nvptx/CMakeLists.txt @@ -172,7 +172,15 @@ foreach(sm ${nvptx_sm_list}) list(GET ptx_feature_list ${itr} ptx_num) set(cuda_flags ${sm_flags}) list(APPEND cuda_flags -Xclang -target-feature -Xclang +ptx${ptx_num}) - list(APPEND cuda_flags "-DCUDA_VERSION=${cuda_version}00") + if("${cuda_version}" MATCHES "^([0-9]+)([0-9])$") + set(cuda_version_major ${CMAKE_MATCH_1}) + set(cuda_version_minor ${CMAKE_MATCH_2}) + else() + libomptarget_error_say( + "Unrecognized CUDA version format: ${cuda_version}") + endif() + list(APPEND cuda_flags + "-DCUDA_VERSION=${cuda_version_major}0${cuda_version_minor}0") set(bc_files "") foreach(src ${cuda_src_files})