diff --git a/clang/lib/Lex/Lexer.cpp b/clang/lib/Lex/Lexer.cpp index 974414c1e452..35ba8c11eca6 100644 --- a/clang/lib/Lex/Lexer.cpp +++ b/clang/lib/Lex/Lexer.cpp @@ -3085,6 +3085,8 @@ bool Lexer::LexUnicode(Token &Result, uint32_t C, const char *CurPtr) { maybeDiagnoseIDCharCompat(PP->getDiagnostics(), C, makeCharRange(*this, BufferPtr, CurPtr), /*IsFirst=*/true); + maybeDiagnoseUTF8Homoglyph(PP->getDiagnostics(), C, + makeCharRange(*this, BufferPtr, CurPtr)); } MIOpt.ReadToken(); @@ -3879,7 +3881,6 @@ LexNextToken: // We can't just reset CurPtr to BufferPtr because BufferPtr may point to // an escaped newline. --CurPtr; - const char *UTF8StartPtr = CurPtr; llvm::ConversionResult Status = llvm::convertUTF8Sequence((const llvm::UTF8 **)&CurPtr, (const llvm::UTF8 *)BufferEnd, @@ -3894,9 +3895,6 @@ LexNextToken: // (We manually eliminate the tail call to avoid recursion.) goto LexNextToken; } - if (!isLexingRawMode()) - maybeDiagnoseUTF8Homoglyph(PP->getDiagnostics(), CodePoint, - makeCharRange(*this, UTF8StartPtr, CurPtr)); return LexUnicode(Result, CodePoint, CurPtr); } diff --git a/clang/test/Lexer/unicode.c b/clang/test/Lexer/unicode.c index bebab829880c..01285fbc2138 100644 --- a/clang/test/Lexer/unicode.c +++ b/clang/test/Lexer/unicode.c @@ -45,3 +45,8 @@ int ⁠xx‍; // expected-warning@-3 {{identifier contains Unicode character that is invisible in some environments}} int foo​bar = 0; // expected-warning {{identifier contains Unicode character that is invisible in some environments}} int x = foobar; // expected-error {{undeclared identifier}} + +int ∣foo; // expected-error {{non-ASCII character}} +#ifndef PP_ONLY +#define ∶ x // expected-error {{macro name must be an identifier}} +#endif