[Clang] Add timeout for GPU detection utilities (#94751)

Summary:
The utilities `nvptx-arch` and `amdgpu-arch` are used to support
`--offload-arch=native` among other utilities in clang. However, these
rely on the GPU drivers to query the features. In certain cases these
drivers can become locked up, which will lead to indefinate hangs on any
compiler jobs running in the meantime.

This patch adds a ten second timeout period for these utilities before
it kills the job and errors out.
This commit is contained in:
Joseph Huber
2024-06-07 08:45:35 -05:00
committed by GitHub
parent c5fcc2ea55
commit 2981f3a284
4 changed files with 8 additions and 7 deletions

View File

@@ -205,7 +205,8 @@ protected:
/// Executes the given \p Executable and returns the stdout.
llvm::Expected<std::unique_ptr<llvm::MemoryBuffer>>
executeToolChainProgram(StringRef Executable) const;
executeToolChainProgram(StringRef Executable,
unsigned SecondsToWait = 0) const;
void setTripleEnvironment(llvm::Triple::EnvironmentType Env);

View File

@@ -104,7 +104,8 @@ ToolChain::ToolChain(const Driver &D, const llvm::Triple &T,
}
llvm::Expected<std::unique_ptr<llvm::MemoryBuffer>>
ToolChain::executeToolChainProgram(StringRef Executable) const {
ToolChain::executeToolChainProgram(StringRef Executable,
unsigned SecondsToWait) const {
llvm::SmallString<64> OutputFile;
llvm::sys::fs::createTemporaryFile("toolchain-program", "txt", OutputFile);
llvm::FileRemover OutputRemover(OutputFile.c_str());
@@ -115,9 +116,8 @@ ToolChain::executeToolChainProgram(StringRef Executable) const {
};
std::string ErrorMessage;
if (llvm::sys::ExecuteAndWait(Executable, {}, {}, Redirects,
/* SecondsToWait */ 0,
/*MemoryLimit*/ 0, &ErrorMessage))
if (llvm::sys::ExecuteAndWait(Executable, {}, {}, Redirects, SecondsToWait,
/*MemoryLimit=*/0, &ErrorMessage))
return llvm::createStringError(std::error_code(),
Executable + ": " + ErrorMessage);

View File

@@ -877,7 +877,7 @@ AMDGPUToolChain::getSystemGPUArchs(const ArgList &Args) const {
else
Program = GetProgramPath("amdgpu-arch");
auto StdoutOrErr = executeToolChainProgram(Program);
auto StdoutOrErr = executeToolChainProgram(Program, /*SecondsToWait=*/10);
if (!StdoutOrErr)
return StdoutOrErr.takeError();

View File

@@ -826,7 +826,7 @@ NVPTXToolChain::getSystemGPUArchs(const ArgList &Args) const {
else
Program = GetProgramPath("nvptx-arch");
auto StdoutOrErr = executeToolChainProgram(Program);
auto StdoutOrErr = executeToolChainProgram(Program, /*SecondsToWait=*/10);
if (!StdoutOrErr)
return StdoutOrErr.takeError();