mirror of
https://github.com/intel/llvm.git
synced 2026-02-03 10:39:35 +08:00
Preprocessor: preserve whitespace in -traditional-cpp mode.
Note that unlike GNU cpp we currently do not preserve whitespace in macros (even in -traditional-cpp mode). <rdar://problem/12897179> llvm-svn: 175778
This commit is contained in:
@@ -174,8 +174,8 @@ public:
|
||||
/// SetKeepWhitespaceMode - This method lets clients enable or disable
|
||||
/// whitespace retention mode.
|
||||
void SetKeepWhitespaceMode(bool Val) {
|
||||
assert((!Val || LexingRawMode) &&
|
||||
"Can only enable whitespace retention in raw mode");
|
||||
assert((!Val || LexingRawMode || LangOpts.TraditionalCPP) &&
|
||||
"Can only retain whitespace in raw mode or -traditional-cpp");
|
||||
ExtendedTokenMode = Val ? 2 : 0;
|
||||
}
|
||||
|
||||
@@ -194,6 +194,14 @@ public:
|
||||
ExtendedTokenMode = Mode ? 1 : 0;
|
||||
}
|
||||
|
||||
/// Sets the extended token mode back to its initial value, according to the
|
||||
/// language options and preprocessor. This controls whether the lexer
|
||||
/// produces comment and whitespace tokens.
|
||||
///
|
||||
/// This requires the lexer to have an associated preprocessor. A standalone
|
||||
/// lexer has nothing to reset to.
|
||||
void resetExtendedTokenMode();
|
||||
|
||||
const char *getBufferStart() const { return BufferStart; }
|
||||
|
||||
/// ReadToEndOfLine - Read the rest of the current preprocessor line as an
|
||||
|
||||
@@ -548,7 +548,7 @@ static void PrintPreprocessedTokens(Preprocessor &PP, Token &Tok,
|
||||
|
||||
// Tokens that can contain embedded newlines need to adjust our current
|
||||
// line number.
|
||||
if (Tok.getKind() == tok::comment)
|
||||
if (Tok.getKind() == tok::comment || Tok.getKind() == tok::unknown)
|
||||
Callbacks->HandleNewlinesInToken(TokPtr, Len);
|
||||
} else {
|
||||
std::string S = PP.getSpelling(Tok);
|
||||
@@ -556,7 +556,7 @@ static void PrintPreprocessedTokens(Preprocessor &PP, Token &Tok,
|
||||
|
||||
// Tokens that can contain embedded newlines need to adjust our current
|
||||
// line number.
|
||||
if (Tok.getKind() == tok::comment)
|
||||
if (Tok.getKind() == tok::comment || Tok.getKind() == tok::unknown)
|
||||
Callbacks->HandleNewlinesInToken(&S[0], S.size());
|
||||
}
|
||||
Callbacks->setEmittedTokensOnThisLine();
|
||||
|
||||
@@ -122,8 +122,15 @@ Lexer::Lexer(FileID FID, const llvm::MemoryBuffer *InputFile, Preprocessor &PP)
|
||||
InitLexer(InputFile->getBufferStart(), InputFile->getBufferStart(),
|
||||
InputFile->getBufferEnd());
|
||||
|
||||
// Default to keeping comments if the preprocessor wants them.
|
||||
SetCommentRetentionState(PP.getCommentRetentionState());
|
||||
resetExtendedTokenMode();
|
||||
}
|
||||
|
||||
void Lexer::resetExtendedTokenMode() {
|
||||
assert(PP && "Cannot reset token mode without a preprocessor");
|
||||
if (LangOpts.TraditionalCPP)
|
||||
SetKeepWhitespaceMode(true);
|
||||
else
|
||||
SetCommentRetentionState(PP->getCommentRetentionState());
|
||||
}
|
||||
|
||||
/// Lexer constructor - Create a new raw lexer object. This object is only
|
||||
@@ -1844,6 +1851,8 @@ void Lexer::LexCharConstant(Token &Result, const char *CurPtr,
|
||||
///
|
||||
bool Lexer::SkipWhitespace(Token &Result, const char *CurPtr) {
|
||||
// Whitespace - Skip it, then return the token after the whitespace.
|
||||
bool SawNewline = isVerticalWhitespace(CurPtr[-1]);
|
||||
|
||||
unsigned char Char = *CurPtr; // Skip consequtive spaces efficiently.
|
||||
while (1) {
|
||||
// Skip horizontal whitespace very aggressively.
|
||||
@@ -1851,7 +1860,7 @@ bool Lexer::SkipWhitespace(Token &Result, const char *CurPtr) {
|
||||
Char = *++CurPtr;
|
||||
|
||||
// Otherwise if we have something other than whitespace, we're done.
|
||||
if (Char != '\n' && Char != '\r')
|
||||
if (!isVerticalWhitespace(Char))
|
||||
break;
|
||||
|
||||
if (ParsingPreprocessorDirective) {
|
||||
@@ -1861,24 +1870,27 @@ bool Lexer::SkipWhitespace(Token &Result, const char *CurPtr) {
|
||||
}
|
||||
|
||||
// ok, but handle newline.
|
||||
// The returned token is at the start of the line.
|
||||
Result.setFlag(Token::StartOfLine);
|
||||
// No leading whitespace seen so far.
|
||||
Result.clearFlag(Token::LeadingSpace);
|
||||
SawNewline = true;
|
||||
Char = *++CurPtr;
|
||||
}
|
||||
|
||||
// If this isn't immediately after a newline, there is leading space.
|
||||
char PrevChar = CurPtr[-1];
|
||||
if (PrevChar != '\n' && PrevChar != '\r')
|
||||
Result.setFlag(Token::LeadingSpace);
|
||||
|
||||
// If the client wants us to return whitespace, return it now.
|
||||
if (isKeepWhitespaceMode()) {
|
||||
FormTokenWithChars(Result, CurPtr, tok::unknown);
|
||||
if (SawNewline)
|
||||
IsAtStartOfLine = true;
|
||||
// FIXME: The next token will not have LeadingSpace set.
|
||||
return true;
|
||||
}
|
||||
|
||||
// If this isn't immediately after a newline, there is leading space.
|
||||
char PrevChar = CurPtr[-1];
|
||||
bool HasLeadingSpace = !isVerticalWhitespace(PrevChar);
|
||||
|
||||
Result.setFlagValue(Token::LeadingSpace, HasLeadingSpace);
|
||||
if (SawNewline)
|
||||
Result.setFlag(Token::StartOfLine);
|
||||
|
||||
BufferPtr = CurPtr;
|
||||
return false;
|
||||
}
|
||||
@@ -2269,7 +2281,6 @@ bool Lexer::SkipBlockComment(Token &Result, const char *CurPtr) {
|
||||
// efficiently now. This is safe even in KeepWhitespaceMode because we would
|
||||
// have already returned above with the comment as a token.
|
||||
if (isHorizontalWhitespace(*CurPtr)) {
|
||||
Result.setFlag(Token::LeadingSpace);
|
||||
SkipWhitespace(Result, CurPtr+1);
|
||||
return false;
|
||||
}
|
||||
@@ -2351,7 +2362,7 @@ bool Lexer::LexEndOfFile(Token &Result, const char *CurPtr) {
|
||||
FormTokenWithChars(Result, CurPtr, tok::eod);
|
||||
|
||||
// Restore comment saving mode, in case it was disabled for directive.
|
||||
SetCommentRetentionState(PP->getCommentRetentionState());
|
||||
resetExtendedTokenMode();
|
||||
return true; // Have a token.
|
||||
}
|
||||
|
||||
@@ -2718,6 +2729,7 @@ LexNextToken:
|
||||
// whitespace.
|
||||
if (isKeepWhitespaceMode()) {
|
||||
FormTokenWithChars(Result, CurPtr, tok::unknown);
|
||||
// FIXME: The next token will not have LeadingSpace set.
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -2785,7 +2797,7 @@ LexNextToken:
|
||||
|
||||
// Restore comment saving mode, in case it was disabled for directive.
|
||||
if (PP)
|
||||
SetCommentRetentionState(PP->getCommentRetentionState());
|
||||
resetExtendedTokenMode();
|
||||
|
||||
// Since we consumed a newline, we are back at the start of a line.
|
||||
IsAtStartOfLine = true;
|
||||
@@ -2793,8 +2805,7 @@ LexNextToken:
|
||||
Kind = tok::eod;
|
||||
break;
|
||||
}
|
||||
// The returned token is at the start of the line.
|
||||
Result.setFlag(Token::StartOfLine);
|
||||
|
||||
// No leading whitespace seen so far.
|
||||
Result.clearFlag(Token::LeadingSpace);
|
||||
|
||||
|
||||
@@ -269,7 +269,7 @@ void Preprocessor::SkipExcludedConditionalBlock(SourceLocation IfTokenLoc,
|
||||
if (Tok.isNot(tok::raw_identifier)) {
|
||||
CurPPLexer->ParsingPreprocessorDirective = false;
|
||||
// Restore comment saving mode.
|
||||
if (CurLexer) CurLexer->SetCommentRetentionState(KeepComments);
|
||||
if (CurLexer) CurLexer->resetExtendedTokenMode();
|
||||
continue;
|
||||
}
|
||||
|
||||
@@ -285,7 +285,7 @@ void Preprocessor::SkipExcludedConditionalBlock(SourceLocation IfTokenLoc,
|
||||
FirstChar != 'i' && FirstChar != 'e') {
|
||||
CurPPLexer->ParsingPreprocessorDirective = false;
|
||||
// Restore comment saving mode.
|
||||
if (CurLexer) CurLexer->SetCommentRetentionState(KeepComments);
|
||||
if (CurLexer) CurLexer->resetExtendedTokenMode();
|
||||
continue;
|
||||
}
|
||||
|
||||
@@ -302,7 +302,7 @@ void Preprocessor::SkipExcludedConditionalBlock(SourceLocation IfTokenLoc,
|
||||
if (IdLen >= 20) {
|
||||
CurPPLexer->ParsingPreprocessorDirective = false;
|
||||
// Restore comment saving mode.
|
||||
if (CurLexer) CurLexer->SetCommentRetentionState(KeepComments);
|
||||
if (CurLexer) CurLexer->resetExtendedTokenMode();
|
||||
continue;
|
||||
}
|
||||
memcpy(DirectiveBuf, &DirectiveStr[0], IdLen);
|
||||
@@ -408,7 +408,7 @@ void Preprocessor::SkipExcludedConditionalBlock(SourceLocation IfTokenLoc,
|
||||
|
||||
CurPPLexer->ParsingPreprocessorDirective = false;
|
||||
// Restore comment saving mode.
|
||||
if (CurLexer) CurLexer->SetCommentRetentionState(KeepComments);
|
||||
if (CurLexer) CurLexer->resetExtendedTokenMode();
|
||||
}
|
||||
|
||||
// Finally, if we are out of the conditional (saw an #endif or ran off the end
|
||||
@@ -594,6 +594,7 @@ void Preprocessor::HandleDirective(Token &Result) {
|
||||
// mode. Tell the lexer this so any newlines we see will be converted into an
|
||||
// EOD token (which terminates the directive).
|
||||
CurPPLexer->ParsingPreprocessorDirective = true;
|
||||
if (CurLexer) CurLexer->SetKeepWhitespaceMode(false);
|
||||
|
||||
++NumDirectives;
|
||||
|
||||
@@ -638,14 +639,9 @@ void Preprocessor::HandleDirective(Token &Result) {
|
||||
// and reset to previous state when returning from this function.
|
||||
ResetMacroExpansionHelper helper(this);
|
||||
|
||||
TryAgain:
|
||||
switch (Result.getKind()) {
|
||||
case tok::eod:
|
||||
return; // null directive.
|
||||
case tok::comment:
|
||||
// Handle stuff like "# /*foo*/ define X" in -E -C mode.
|
||||
LexUnexpandedToken(Result);
|
||||
goto TryAgain;
|
||||
case tok::code_completion:
|
||||
if (CodeComplete)
|
||||
CodeComplete->CodeCompleteDirective(
|
||||
|
||||
@@ -4,9 +4,61 @@
|
||||
|
||||
/*
|
||||
RUN: %clang_cc1 -traditional-cpp %s -E -o %t
|
||||
RUN: FileCheck < %t %s
|
||||
RUN: FileCheck -strict-whitespace < %t %s
|
||||
*/
|
||||
|
||||
/* CHECK: foo // bar
|
||||
/* CHECK: {{^}}foo // bar{{$}}
|
||||
*/
|
||||
foo // bar
|
||||
|
||||
|
||||
/* The lines in this file contain hard tab characters and trailing whitespace;
|
||||
* do not change them! */
|
||||
|
||||
/* CHECK: {{^}} indented!{{$}}
|
||||
* CHECK: {{^}}tab separated values{{$}}
|
||||
*/
|
||||
indented!
|
||||
tab separated values
|
||||
|
||||
#define bracket(x) >>>x<<<
|
||||
bracket(| spaces |)
|
||||
/* CHECK: {{^}}>>>| spaces |<<<{{$}}
|
||||
*/
|
||||
|
||||
/* This is still a preprocessing directive. */
|
||||
# define foo bar
|
||||
foo!
|
||||
-
|
||||
foo! foo!
|
||||
/* CHECK: {{^}}bar!{{$}}
|
||||
* CHECK: {{^}} bar! bar! {{$}}
|
||||
*/
|
||||
|
||||
/* Deliberately check a leading newline with spaces on that line. */
|
||||
|
||||
# define foo bar
|
||||
foo!
|
||||
-
|
||||
foo! foo!
|
||||
/* CHECK: {{^}}bar!{{$}}
|
||||
* CHECK: {{^}} bar! bar! {{$}}
|
||||
*/
|
||||
|
||||
/* FIXME: -traditional-cpp should not consider this a preprocessing directive
|
||||
* because the # isn't in the first column.
|
||||
*/
|
||||
#define foo2 bar
|
||||
foo2!
|
||||
/* If this were working, both of these checks would be on.
|
||||
* CHECK-NOT: {{^}} #define foo2 bar{{$}}
|
||||
* CHECK-NOT: {{^}}foo2!{{$}}
|
||||
*/
|
||||
|
||||
/* FIXME: -traditional-cpp should not homogenize whitespace in macros.
|
||||
*/
|
||||
#define bracket2(x) >>> x <<<
|
||||
bracket2(spaces)
|
||||
/* If this were working, this check would be on.
|
||||
* CHECK-NOT: {{^}}>>> spaces <<<{{$}}
|
||||
*/
|
||||
|
||||
Reference in New Issue
Block a user