From 2981f3a284302bb12b292bcf09e7e09ae2eb696a Mon Sep 17 00:00:00 2001 From: Joseph Huber Date: Fri, 7 Jun 2024 08:45:35 -0500 Subject: [PATCH] [Clang] Add timeout for GPU detection utilities (#94751) Summary: The utilities `nvptx-arch` and `amdgpu-arch` are used to support `--offload-arch=native` among other utilities in clang. However, these rely on the GPU drivers to query the features. In certain cases these drivers can become locked up, which will lead to indefinate hangs on any compiler jobs running in the meantime. This patch adds a ten second timeout period for these utilities before it kills the job and errors out. --- clang/include/clang/Driver/ToolChain.h | 3 ++- clang/lib/Driver/ToolChain.cpp | 8 ++++---- clang/lib/Driver/ToolChains/AMDGPU.cpp | 2 +- clang/lib/Driver/ToolChains/Cuda.cpp | 2 +- 4 files changed, 8 insertions(+), 7 deletions(-) diff --git a/clang/include/clang/Driver/ToolChain.h b/clang/include/clang/Driver/ToolChain.h index a4f9cad98aa8..9789cfacafd7 100644 --- a/clang/include/clang/Driver/ToolChain.h +++ b/clang/include/clang/Driver/ToolChain.h @@ -205,7 +205,8 @@ protected: /// Executes the given \p Executable and returns the stdout. llvm::Expected> - executeToolChainProgram(StringRef Executable) const; + executeToolChainProgram(StringRef Executable, + unsigned SecondsToWait = 0) const; void setTripleEnvironment(llvm::Triple::EnvironmentType Env); diff --git a/clang/lib/Driver/ToolChain.cpp b/clang/lib/Driver/ToolChain.cpp index 0e86bc07e0ea..40ab2e91125d 100644 --- a/clang/lib/Driver/ToolChain.cpp +++ b/clang/lib/Driver/ToolChain.cpp @@ -104,7 +104,8 @@ ToolChain::ToolChain(const Driver &D, const llvm::Triple &T, } llvm::Expected> -ToolChain::executeToolChainProgram(StringRef Executable) const { +ToolChain::executeToolChainProgram(StringRef Executable, + unsigned SecondsToWait) const { llvm::SmallString<64> OutputFile; llvm::sys::fs::createTemporaryFile("toolchain-program", "txt", OutputFile); llvm::FileRemover OutputRemover(OutputFile.c_str()); @@ -115,9 +116,8 @@ ToolChain::executeToolChainProgram(StringRef Executable) const { }; std::string ErrorMessage; - if (llvm::sys::ExecuteAndWait(Executable, {}, {}, Redirects, - /* SecondsToWait */ 0, - /*MemoryLimit*/ 0, &ErrorMessage)) + if (llvm::sys::ExecuteAndWait(Executable, {}, {}, Redirects, SecondsToWait, + /*MemoryLimit=*/0, &ErrorMessage)) return llvm::createStringError(std::error_code(), Executable + ": " + ErrorMessage); diff --git a/clang/lib/Driver/ToolChains/AMDGPU.cpp b/clang/lib/Driver/ToolChains/AMDGPU.cpp index 9ffea57b005d..11a98a0ec314 100644 --- a/clang/lib/Driver/ToolChains/AMDGPU.cpp +++ b/clang/lib/Driver/ToolChains/AMDGPU.cpp @@ -877,7 +877,7 @@ AMDGPUToolChain::getSystemGPUArchs(const ArgList &Args) const { else Program = GetProgramPath("amdgpu-arch"); - auto StdoutOrErr = executeToolChainProgram(Program); + auto StdoutOrErr = executeToolChainProgram(Program, /*SecondsToWait=*/10); if (!StdoutOrErr) return StdoutOrErr.takeError(); diff --git a/clang/lib/Driver/ToolChains/Cuda.cpp b/clang/lib/Driver/ToolChains/Cuda.cpp index bbc8be91fd70..2dfc7457b0ac 100644 --- a/clang/lib/Driver/ToolChains/Cuda.cpp +++ b/clang/lib/Driver/ToolChains/Cuda.cpp @@ -826,7 +826,7 @@ NVPTXToolChain::getSystemGPUArchs(const ArgList &Args) const { else Program = GetProgramPath("nvptx-arch"); - auto StdoutOrErr = executeToolChainProgram(Program); + auto StdoutOrErr = executeToolChainProgram(Program, /*SecondsToWait=*/10); if (!StdoutOrErr) return StdoutOrErr.takeError();