[sanitizer] Intercept glibc 2.38 __isoc23_* functions

`strtol("0b1", 0, 0)` can be (pre-C23) 0 or (C23) 1.
`sscanf("0b10", "%i", &x)` is similar. glibc 2.38 introduced
`__isoc23_strtol` and `__isoc23_scanf` family functions for binary
compatibility.

When `_ISOC2X_SOURCE` is defined (implied by `_GNU_SOURCE`) or
`__STDC_VERSION__ > 201710L`, `__GLIBC_USE_ISOC2X` is defined to 1 and
these `__isoc23_*` symbols are used.

Add `__isoc23_` versions for the following interceptors:

* sanitizer_common_interceptors.inc implements strtoimax/strtoumax.
  Remove incorrect FIXME about https://github.com/google/sanitizers/issues/321
* asan_interceptors.cpp implements just strtol and strtoll. The default
  `replace_str` mode checks `nptr` is readable and `endptr` is writable.
  atoi reuses the existing strtol interceptor.
* msan_interceptors.cpp implements strtol family functions and their
  `_l` versions. Tested by lib/msan/tests/msan_test.cpp
* sanitizer_common_interceptors.inc implements scanf family functions.

The strtol family functions are spreaded, which is not great, but the
patch (intended for release/17.x) does not attempt to address the issue.

Add symbols to lib/sanitizer_common/symbolizer/scripts/global_symbols.txt to
support both glibc pre-2.38 and 2.38.

When build bots migrate to glibc 2.38+, we will lose test coverage for
non-isoc23 versions since the existing C++ unittests imply `_GNU_SOURCE`.
Add test/sanitizer_common/TestCases/{strtol.c,scanf.c}.
They catch msan false positive in the absence of the interceptors.

Fix https://github.com/llvm/llvm-project/issues/64388
Fix https://github.com/llvm/llvm-project/issues/64946

Link: https://lists.gnu.org/archive/html/info-gnu/2023-07/msg00010.html
("The GNU C Library version 2.38 is now available")

Reviewed By: #sanitizers, vitalybuka, mgorny

Differential Revision: https://reviews.llvm.org/D158943
This commit is contained in:
Fangrui Song
2023-08-28 00:49:49 -07:00
parent 98cf20f890
commit ad7e250100
6 changed files with 214 additions and 38 deletions

View File

@@ -602,19 +602,34 @@ INTERCEPTOR(char*, strncpy, char *to, const char *from, uptr size) {
return REAL(strncpy)(to, from, size);
}
INTERCEPTOR(long, strtol, const char *nptr, char **endptr, int base) {
void *ctx;
ASAN_INTERCEPTOR_ENTER(ctx, strtol);
ENSURE_ASAN_INITED();
if (!flags()->replace_str) {
return REAL(strtol)(nptr, endptr, base);
}
template <typename Fn>
static ALWAYS_INLINE auto StrtolImpl(void *ctx, Fn real, const char *nptr,
char **endptr, int base)
-> decltype(real(nullptr, nullptr, 0)) {
if (!flags()->replace_str)
return real(nptr, endptr, base);
char *real_endptr;
long result = REAL(strtol)(nptr, &real_endptr, base);
auto res = real(nptr, &real_endptr, base);
StrtolFixAndCheck(ctx, nptr, endptr, real_endptr, base);
return result;
return res;
}
# define INTERCEPTOR_STRTO_BASE(ret_type, func) \
INTERCEPTOR(ret_type, func, const char *nptr, char **endptr, int base) { \
void *ctx; \
ASAN_INTERCEPTOR_ENTER(ctx, func); \
ENSURE_ASAN_INITED(); \
return StrtolImpl(ctx, REAL(func), nptr, endptr, base); \
}
INTERCEPTOR_STRTO_BASE(long, strtol)
INTERCEPTOR_STRTO_BASE(long long, strtoll)
# if SANITIZER_GLIBC
INTERCEPTOR_STRTO_BASE(long, __isoc23_strtol)
INTERCEPTOR_STRTO_BASE(long long, __isoc23_strtoll)
# endif
INTERCEPTOR(int, atoi, const char *nptr) {
void *ctx;
ASAN_INTERCEPTOR_ENTER(ctx, atoi);
@@ -653,19 +668,6 @@ INTERCEPTOR(long, atol, const char *nptr) {
return result;
}
INTERCEPTOR(long long, strtoll, const char *nptr, char **endptr, int base) {
void *ctx;
ASAN_INTERCEPTOR_ENTER(ctx, strtoll);
ENSURE_ASAN_INITED();
if (!flags()->replace_str) {
return REAL(strtoll)(nptr, endptr, base);
}
char *real_endptr;
long long result = REAL(strtoll)(nptr, &real_endptr, base);
StrtolFixAndCheck(ctx, nptr, endptr, real_endptr, base);
return result;
}
INTERCEPTOR(long long, atoll, const char *nptr) {
void *ctx;
ASAN_INTERCEPTOR_ENTER(ctx, atoll);
@@ -766,6 +768,10 @@ void InitializeAsanInterceptors() {
ASAN_INTERCEPT_FUNC(atoll);
ASAN_INTERCEPT_FUNC(strtol);
ASAN_INTERCEPT_FUNC(strtoll);
# if SANITIZER_GLIBC
ASAN_INTERCEPT_FUNC(__isoc23_strtol);
ASAN_INTERCEPT_FUNC(__isoc23_strtoll);
# endif
// Intecept jump-related functions.
ASAN_INTERCEPT_FUNC(longjmp);

View File

@@ -464,6 +464,25 @@ INTERCEPTORS_STRTO_BASE(long long, wcstoll, wchar_t)
INTERCEPTORS_STRTO_BASE(unsigned long, wcstoul, wchar_t)
INTERCEPTORS_STRTO_BASE(unsigned long long, wcstoull, wchar_t)
#if SANITIZER_GLIBC
INTERCEPTORS_STRTO(double, __isoc23_strtod, char)
INTERCEPTORS_STRTO(float, __isoc23_strtof, char)
INTERCEPTORS_STRTO(long double, __isoc23_strtold, char)
INTERCEPTORS_STRTO_BASE(long, __isoc23_strtol, char)
INTERCEPTORS_STRTO_BASE(long long, __isoc23_strtoll, char)
INTERCEPTORS_STRTO_BASE(unsigned long, __isoc23_strtoul, char)
INTERCEPTORS_STRTO_BASE(unsigned long long, __isoc23_strtoull, char)
INTERCEPTORS_STRTO_BASE(u64, __isoc23_strtouq, char)
INTERCEPTORS_STRTO(double, __isoc23_wcstod, wchar_t)
INTERCEPTORS_STRTO(float, __isoc23_wcstof, wchar_t)
INTERCEPTORS_STRTO(long double, __isoc23_wcstold, wchar_t)
INTERCEPTORS_STRTO_BASE(long, __isoc23_wcstol, wchar_t)
INTERCEPTORS_STRTO_BASE(long long, __isoc23_wcstoll, wchar_t)
INTERCEPTORS_STRTO_BASE(unsigned long, __isoc23_wcstoul, wchar_t)
INTERCEPTORS_STRTO_BASE(unsigned long long, __isoc23_wcstoull, wchar_t)
#endif
#if SANITIZER_NETBSD
#define INTERCEPT_STRTO(func) \
INTERCEPT_FUNCTION(func); \
@@ -1748,6 +1767,24 @@ void InitializeInterceptors() {
INTERCEPT_STRTO(wcstoul);
INTERCEPT_STRTO(wcstoll);
INTERCEPT_STRTO(wcstoull);
#ifdef SANITIZER_GLIBC
INTERCEPT_STRTO(__isoc23_strtod);
INTERCEPT_STRTO(__isoc23_strtof);
INTERCEPT_STRTO(__isoc23_strtold);
INTERCEPT_STRTO(__isoc23_strtol);
INTERCEPT_STRTO(__isoc23_strtoul);
INTERCEPT_STRTO(__isoc23_strtoll);
INTERCEPT_STRTO(__isoc23_strtoull);
INTERCEPT_STRTO(__isoc23_strtouq);
INTERCEPT_STRTO(__isoc23_wcstod);
INTERCEPT_STRTO(__isoc23_wcstof);
INTERCEPT_STRTO(__isoc23_wcstold);
INTERCEPT_STRTO(__isoc23_wcstol);
INTERCEPT_STRTO(__isoc23_wcstoul);
INTERCEPT_STRTO(__isoc23_wcstoll);
INTERCEPT_STRTO(__isoc23_wcstoull);
#endif
#ifdef SANITIZER_NLDBL_VERSION
INTERCEPT_FUNCTION_VER(vswprintf, SANITIZER_NLDBL_VERSION);
INTERCEPT_FUNCTION_VER(swprintf, SANITIZER_NLDBL_VERSION);

View File

@@ -1491,6 +1491,16 @@ VSCANF_INTERCEPTOR_IMPL(__isoc99_vsscanf, false, str, format, ap)
INTERCEPTOR(int, __isoc99_vfscanf, void *stream, const char *format, va_list ap)
VSCANF_INTERCEPTOR_IMPL(__isoc99_vfscanf, false, stream, format, ap)
INTERCEPTOR(int, __isoc23_vscanf, const char *format, va_list ap)
VSCANF_INTERCEPTOR_IMPL(__isoc23_vscanf, false, format, ap)
INTERCEPTOR(int, __isoc23_vsscanf, const char *str, const char *format,
va_list ap)
VSCANF_INTERCEPTOR_IMPL(__isoc23_vsscanf, false, str, format, ap)
INTERCEPTOR(int, __isoc23_vfscanf, void *stream, const char *format, va_list ap)
VSCANF_INTERCEPTOR_IMPL(__isoc23_vfscanf, false, stream, format, ap)
#endif // SANITIZER_INTERCEPT_ISOC99_SCANF
INTERCEPTOR(int, scanf, const char *format, ...)
@@ -1511,6 +1521,15 @@ FORMAT_INTERCEPTOR_IMPL(__isoc99_fscanf, __isoc99_vfscanf, stream, format)
INTERCEPTOR(int, __isoc99_sscanf, const char *str, const char *format, ...)
FORMAT_INTERCEPTOR_IMPL(__isoc99_sscanf, __isoc99_vsscanf, str, format)
INTERCEPTOR(int, __isoc23_scanf, const char *format, ...)
FORMAT_INTERCEPTOR_IMPL(__isoc23_scanf, __isoc23_vscanf, format)
INTERCEPTOR(int, __isoc23_fscanf, void *stream, const char *format, ...)
FORMAT_INTERCEPTOR_IMPL(__isoc23_fscanf, __isoc23_vfscanf, stream, format)
INTERCEPTOR(int, __isoc23_sscanf, const char *str, const char *format, ...)
FORMAT_INTERCEPTOR_IMPL(__isoc23_sscanf, __isoc23_vsscanf, str, format)
#endif
#endif
@@ -1534,7 +1553,13 @@ FORMAT_INTERCEPTOR_IMPL(__isoc99_sscanf, __isoc99_vsscanf, str, format)
COMMON_INTERCEPT_FUNCTION(__isoc99_fscanf); \
COMMON_INTERCEPT_FUNCTION(__isoc99_vscanf); \
COMMON_INTERCEPT_FUNCTION(__isoc99_vsscanf); \
COMMON_INTERCEPT_FUNCTION(__isoc99_vfscanf);
COMMON_INTERCEPT_FUNCTION(__isoc99_vfscanf); \
COMMON_INTERCEPT_FUNCTION(__isoc23_scanf); \
COMMON_INTERCEPT_FUNCTION(__isoc23_sscanf); \
COMMON_INTERCEPT_FUNCTION(__isoc23_fscanf); \
COMMON_INTERCEPT_FUNCTION(__isoc23_vscanf); \
COMMON_INTERCEPT_FUNCTION(__isoc23_vsscanf); \
COMMON_INTERCEPT_FUNCTION(__isoc23_vfscanf);
#else
#define INIT_ISOC99_SCANF
#endif
@@ -3539,30 +3564,26 @@ UNUSED static inline void StrtolFixAndCheck(void *ctx, const char *nptr,
(real_endptr - nptr) + 1 : 0);
}
#if SANITIZER_INTERCEPT_STRTOIMAX
INTERCEPTOR(INTMAX_T, strtoimax, const char *nptr, char **endptr, int base) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, strtoimax, nptr, endptr, base);
// FIXME: under ASan the call below may write to freed memory and corrupt
// its metadata. See
// https://github.com/google/sanitizers/issues/321.
template <typename Fn>
static ALWAYS_INLINE auto StrtoimaxImpl(void *ctx, Fn real, const char *nptr,
char **endptr, int base)
-> decltype(real(nullptr, nullptr, 0)) {
char *real_endptr;
INTMAX_T res = REAL(strtoimax)(nptr, &real_endptr, base);
auto res = real(nptr, &real_endptr, base);
StrtolFixAndCheck(ctx, nptr, endptr, real_endptr, base);
return res;
}
INTERCEPTOR(INTMAX_T, strtoimax, const char *nptr, char **endptr, int base) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, strtoimax, nptr, endptr, base);
return StrtoimaxImpl(ctx, REAL(strtoimax), nptr, endptr, base);
}
INTERCEPTOR(UINTMAX_T, strtoumax, const char *nptr, char **endptr, int base) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, strtoumax, nptr, endptr, base);
// FIXME: under ASan the call below may write to freed memory and corrupt
// its metadata. See
// https://github.com/google/sanitizers/issues/321.
char *real_endptr;
UINTMAX_T res = REAL(strtoumax)(nptr, &real_endptr, base);
StrtolFixAndCheck(ctx, nptr, endptr, real_endptr, base);
return res;
return StrtoimaxImpl(ctx, REAL(strtoumax), nptr, endptr, base);
}
#define INIT_STRTOIMAX \
@@ -3572,6 +3593,25 @@ INTERCEPTOR(UINTMAX_T, strtoumax, const char *nptr, char **endptr, int base) {
#define INIT_STRTOIMAX
#endif
#if SANITIZER_INTERCEPT_STRTOIMAX && SANITIZER_GLIBC
INTERCEPTOR(INTMAX_T, __isoc23_strtoimax, const char *nptr, char **endptr, int base) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, __isoc23_strtoimax, nptr, endptr, base);
return StrtoimaxImpl(ctx, REAL(__isoc23_strtoimax), nptr, endptr, base);
}
INTERCEPTOR(UINTMAX_T, __isoc23_strtoumax, const char *nptr, char **endptr, int base) {
void *ctx;
COMMON_INTERCEPTOR_ENTER(ctx, __isoc23_strtoumax, nptr, endptr, base);
return StrtoimaxImpl(ctx, REAL(__isoc23_strtoumax), nptr, endptr, base);
}
# define INIT_STRTOIMAX_C23 \
COMMON_INTERCEPT_FUNCTION(__isoc23_strtoimax); \
COMMON_INTERCEPT_FUNCTION(__isoc23_strtoumax);
#else
# define INIT_STRTOIMAX_C23
#endif
#if SANITIZER_INTERCEPT_MBSTOWCS
INTERCEPTOR(SIZE_T, mbstowcs, wchar_t *dest, const char *src, SIZE_T len) {
void *ctx;
@@ -10304,6 +10344,7 @@ static void InitializeCommonInterceptors() {
INIT_GETCWD;
INIT_GET_CURRENT_DIR_NAME;
INIT_STRTOIMAX;
INIT_STRTOIMAX_C23;
INIT_MBSTOWCS;
INIT_MBSNRTOWCS;
INIT_WCSTOMBS;

View File

@@ -36,6 +36,13 @@ __interceptor_pthread_setspecific w
__interceptor_read w
__interceptor_realpath w
__isinf U
__isoc23_sscanf U
__isoc23_strtol U
__isoc23_strtoll U
__isoc23_strtoll_l U
__isoc23_strtoull U
__isoc23_strtoull_l U
__isoc23_vsscanf U
__isoc99_sscanf U
__isoc99_vsscanf U
__lshrdi3 U

View File

@@ -0,0 +1,24 @@
// RUN: %clang -std=c17 %s -o %t && %run %t
/// Test __isoc23_* for glibc 2.38+.
// RUN: %clang -std=c23 %s -o %t && %run %t
#include <assert.h>
#include <stdarg.h>
#include <stdio.h>
int test_vsscanf(const char *buf, const char *fmt, ...) {
va_list ap;
va_start(ap, fmt);
int ret = vsscanf(buf, fmt, ap);
va_end(ap);
return ret;
}
int main(int argc, char **argv) {
int x, y;
assert(sscanf("42", "%d", &x) == 1);
assert(x == 42);
assert(test_vsscanf("42", "%d", &y) == 1);
assert(y == 42);
return 0;
}

View File

@@ -0,0 +1,61 @@
// RUN: %clang -std=c17 %s -o %t && %run %t
/// Test __isoc23_* for glibc 2.38+.
// RUN: %clang -std=c23 %s -o %t && %run %t
#include <assert.h>
#include <inttypes.h>
#include <stdio.h>
#include <stdlib.h>
#include <wchar.h>
#define TESTL(func) \
{ \
char *end; \
long l = (long)func("42", &end, 0); \
assert(l == 42); \
assert(*end == '\0'); \
}
#define TESTF(func) \
{ \
char *end; \
long l = (long)func("42", &end); \
assert(l == 42); \
assert(*end == '\0'); \
}
#define WTESTL(func) \
{ \
wchar_t *end; \
long l = (long)func(L"42", &end, 0); \
assert(l == 42); \
assert(*end == L'\0'); \
}
#define WTESTF(func) \
{ \
wchar_t *end; \
long l = (long)func(L"42", &end); \
assert(l == 42); \
assert(*end == '\0'); \
}
int main() {
TESTL(strtol);
TESTL(strtoll);
TESTL(strtoimax);
TESTL(strtoul);
TESTL(strtoull);
TESTL(strtoumax);
TESTF(strtof);
TESTF(strtod);
TESTF(strtold);
WTESTL(wcstol);
WTESTL(wcstoll);
WTESTL(wcstoul);
WTESTL(wcstoull);
WTESTF(wcstof);
WTESTF(wcstod);
WTESTF(wcstold);
}