[refactor][extract] insert semicolons into extracted/inserted code

when needed

This commit implements the semicolon insertion logic into the extract
refactoring. The following rules are used:

- extracting expression: add terminating ';' to the extracted function.
- extracting statements that don't require terminating ';' (e.g. switch): add
  terminating ';' to the callee.
- extracting statements with ';':  move (if possible) the original ';' from the
  callee and add terminating ';'.
- otherwise, add ';' to both places.

Differential Revision: https://reviews.llvm.org/D39441

llvm-svn: 317343
This commit is contained in:
Alex Lorenz
2017-11-03 18:11:22 +00:00
parent 666e23b513
commit ebbbb81266
9 changed files with 450 additions and 22 deletions

View File

@@ -466,6 +466,13 @@ public:
const LangOptions &LangOpts,
unsigned MaxLines = 0);
/// Finds the token that comes right after the given location.
///
/// Returns the next token, or none if the location is inside a macro.
static Optional<Token> findNextToken(SourceLocation Loc,
const SourceManager &SM,
const LangOptions &LangOpts);
/// \brief Checks that the given token is the first token that occurs after
/// the given location (this excludes comments and whitespace). Returns the
/// location immediately after the specified token. If the token is not found

View File

@@ -1212,18 +1212,12 @@ const char *Lexer::SkipEscapedNewLines(const char *P) {
}
}
/// \brief Checks that the given token is the first token that occurs after the
/// given location (this excludes comments and whitespace). Returns the location
/// immediately after the specified token. If the token is not found or the
/// location is inside a macro, the returned source location will be invalid.
SourceLocation Lexer::findLocationAfterToken(SourceLocation Loc,
tok::TokenKind TKind,
const SourceManager &SM,
const LangOptions &LangOpts,
bool SkipTrailingWhitespaceAndNewLine) {
Optional<Token> Lexer::findNextToken(SourceLocation Loc,
const SourceManager &SM,
const LangOptions &LangOpts) {
if (Loc.isMacroID()) {
if (!Lexer::isAtEndOfMacroExpansion(Loc, SM, LangOpts, &Loc))
return SourceLocation();
return None;
}
Loc = Lexer::getLocForEndOfToken(Loc, 0, SM, LangOpts);
@@ -1234,7 +1228,7 @@ SourceLocation Lexer::findLocationAfterToken(SourceLocation Loc,
bool InvalidTemp = false;
StringRef File = SM.getBufferData(LocInfo.first, &InvalidTemp);
if (InvalidTemp)
return SourceLocation();
return None;
const char *TokenBegin = File.data() + LocInfo.second;
@@ -1244,15 +1238,25 @@ SourceLocation Lexer::findLocationAfterToken(SourceLocation Loc,
// Find the token.
Token Tok;
lexer.LexFromRawLexer(Tok);
if (Tok.isNot(TKind))
return Tok;
}
/// \brief Checks that the given token is the first token that occurs after the
/// given location (this excludes comments and whitespace). Returns the location
/// immediately after the specified token. If the token is not found or the
/// location is inside a macro, the returned source location will be invalid.
SourceLocation Lexer::findLocationAfterToken(
SourceLocation Loc, tok::TokenKind TKind, const SourceManager &SM,
const LangOptions &LangOpts, bool SkipTrailingWhitespaceAndNewLine) {
Optional<Token> Tok = findNextToken(Loc, SM, LangOpts);
if (!Tok || Tok->isNot(TKind))
return SourceLocation();
SourceLocation TokenLoc = Tok.getLocation();
SourceLocation TokenLoc = Tok->getLocation();
// Calculate how much whitespace needs to be skipped if any.
unsigned NumWhitespaceChars = 0;
if (SkipTrailingWhitespaceAndNewLine) {
const char *TokenEnd = SM.getCharacterData(TokenLoc) +
Tok.getLength();
const char *TokenEnd = SM.getCharacterData(TokenLoc) + Tok->getLength();
unsigned char C = *TokenEnd;
while (isHorizontalWhitespace(C)) {
C = *(++TokenEnd);
@@ -1269,7 +1273,7 @@ SourceLocation Lexer::findLocationAfterToken(SourceLocation Loc,
}
}
return TokenLoc.getLocWithOffset(Tok.getLength() + NumWhitespaceChars);
return TokenLoc.getLocWithOffset(Tok->getLength() + NumWhitespaceChars);
}
/// getCharAndSizeSlow - Peek a single 'character' from the specified buffer,

View File

@@ -4,7 +4,8 @@ add_clang_library(clangToolingRefactor
ASTSelection.cpp
ASTSelectionRequirements.cpp
AtomicChange.cpp
Extract.cpp
Extract/Extract.cpp
Extract/SourceExtraction.cpp
RefactoringActions.cpp
Rename/RenamingAction.cpp
Rename/SymbolOccurrences.cpp

View File

@@ -14,6 +14,7 @@
//===----------------------------------------------------------------------===//
#include "clang/Tooling/Refactoring/Extract/Extract.h"
#include "SourceExtraction.h"
#include "clang/AST/ASTContext.h"
#include "clang/AST/DeclCXX.h"
#include "clang/AST/Expr.h"
@@ -145,6 +146,8 @@ ExtractFunction::createSourceReplacements(RefactoringRuleContext &Context) {
PP.SuppressLifetimeQualifiers = true;
PP.SuppressUnwrittenScope = true;
ExtractionSemicolonPolicy Semicolons = ExtractionSemicolonPolicy::compute(
Code[Code.size() - 1], ExtractedRange, SM, LangOpts);
AtomicChange Change(SM, ExtractedDeclLocation);
// Create the replacement for the extracted declaration.
{
@@ -162,8 +165,8 @@ ExtractFunction::createSourceReplacements(RefactoringRuleContext &Context) {
if (IsExpr && !ReturnType->isVoidType())
OS << "return ";
OS << ExtractedCodeRewriter.getRewrittenText(ExtractedRange);
// FIXME: Compute the correct semicolon policy.
OS << ';';
if (Semicolons.isNeededInExtractedFunction())
OS << ';';
OS << "\n}\n\n";
auto Err = Change.insert(SM, ExtractedDeclLocation, OS.str());
if (Err)
@@ -178,7 +181,8 @@ ExtractFunction::createSourceReplacements(RefactoringRuleContext &Context) {
OS << DeclName << '(';
// FIXME: Forward arguments.
OS << ')';
// FIXME: Add semicolon if needed.
if (Semicolons.isNeededInOriginalFunction())
OS << ';';
auto Err = Change.replace(
SM, CharSourceRange::getTokenRange(ExtractedRange), OS.str());

View File

@@ -0,0 +1,112 @@
//===--- SourceExtraction.cpp - Clang refactoring library -----------------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
#include "SourceExtraction.h"
#include "clang/AST/Stmt.h"
#include "clang/AST/StmtCXX.h"
#include "clang/AST/StmtObjC.h"
#include "clang/Basic/SourceManager.h"
#include "clang/Lex/Lexer.h"
using namespace clang;
namespace {
/// Returns true if the token at the given location is a semicolon.
bool isSemicolonAtLocation(SourceLocation TokenLoc, const SourceManager &SM,
const LangOptions &LangOpts) {
return Lexer::getSourceText(
CharSourceRange::getTokenRange(TokenLoc, TokenLoc), SM,
LangOpts) == ";";
}
/// Returns true if there should be a semicolon after the given statement.
bool isSemicolonRequiredAfter(const Stmt *S) {
if (isa<CompoundStmt>(S))
return false;
if (const auto *If = dyn_cast<IfStmt>(S))
return isSemicolonRequiredAfter(If->getElse() ? If->getElse()
: If->getThen());
if (const auto *While = dyn_cast<WhileStmt>(S))
return isSemicolonRequiredAfter(While->getBody());
if (const auto *For = dyn_cast<ForStmt>(S))
return isSemicolonRequiredAfter(For->getBody());
if (const auto *CXXFor = dyn_cast<CXXForRangeStmt>(S))
return isSemicolonRequiredAfter(CXXFor->getBody());
if (const auto *ObjCFor = dyn_cast<ObjCForCollectionStmt>(S))
return isSemicolonRequiredAfter(ObjCFor->getBody());
switch (S->getStmtClass()) {
case Stmt::SwitchStmtClass:
case Stmt::CXXTryStmtClass:
case Stmt::ObjCAtSynchronizedStmtClass:
case Stmt::ObjCAutoreleasePoolStmtClass:
case Stmt::ObjCAtTryStmtClass:
return false;
default:
return true;
}
}
/// Returns true if the two source locations are on the same line.
bool areOnSameLine(SourceLocation Loc1, SourceLocation Loc2,
const SourceManager &SM) {
return !Loc1.isMacroID() && !Loc2.isMacroID() &&
SM.getSpellingLineNumber(Loc1) == SM.getSpellingLineNumber(Loc2);
}
} // end anonymous namespace
namespace clang {
namespace tooling {
ExtractionSemicolonPolicy
ExtractionSemicolonPolicy::compute(const Stmt *S, SourceRange &ExtractedRange,
const SourceManager &SM,
const LangOptions &LangOpts) {
auto neededInExtractedFunction = []() {
return ExtractionSemicolonPolicy(true, false);
};
auto neededInOriginalFunction = []() {
return ExtractionSemicolonPolicy(false, true);
};
/// The extracted expression should be terminated with a ';'. The call to
/// the extracted function will replace this expression, so it won't need
/// a terminating ';'.
if (isa<Expr>(S))
return neededInExtractedFunction();
/// Some statements don't need to be terminated with ';'. The call to the
/// extracted function will be a standalone statement, so it should be
/// terminated with a ';'.
bool NeedsSemi = isSemicolonRequiredAfter(S);
if (!NeedsSemi)
return neededInOriginalFunction();
/// Some statements might end at ';'. The extraction will move that ';', so
/// the call to the extracted function should be terminated with a ';'.
SourceLocation End = ExtractedRange.getEnd();
if (isSemicolonAtLocation(End, SM, LangOpts))
return neededInOriginalFunction();
/// Other statements should generally have a trailing ';'. We can try to find
/// it and move it together it with the extracted code.
Optional<Token> NextToken = Lexer::findNextToken(End, SM, LangOpts);
if (NextToken && NextToken->is(tok::semi) &&
areOnSameLine(NextToken->getLocation(), End, SM)) {
ExtractedRange.setEnd(NextToken->getLocation());
return neededInOriginalFunction();
}
/// Otherwise insert semicolons in both places.
return ExtractionSemicolonPolicy(true, true);
}
} // end namespace tooling
} // end namespace clang

View File

@@ -0,0 +1,52 @@
//===--- SourceExtraction.cpp - Clang refactoring library -----------------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
#ifndef LLVM_CLANG_LIB_TOOLING_REFACTORING_EXTRACT_SOURCE_EXTRACTION_H
#define LLVM_CLANG_LIB_TOOLING_REFACTORING_EXTRACT_SOURCE_EXTRACTION_H
#include "clang/Basic/LLVM.h"
namespace clang {
class LangOptions;
class SourceManager;
class SourceRange;
class Stmt;
namespace tooling {
/// Determines which semicolons should be inserted during extraction.
class ExtractionSemicolonPolicy {
public:
bool isNeededInExtractedFunction() const {
return IsNeededInExtractedFunction;
}
bool isNeededInOriginalFunction() const { return IsNeededInOriginalFunction; }
/// Returns the semicolon insertion policy that is needed for extraction of
/// the given statement from the given source range.
static ExtractionSemicolonPolicy compute(const Stmt *S,
SourceRange &ExtractedRange,
const SourceManager &SM,
const LangOptions &LangOpts);
private:
ExtractionSemicolonPolicy(bool IsNeededInExtractedFunction,
bool IsNeededInOriginalFunction)
: IsNeededInExtractedFunction(IsNeededInExtractedFunction),
IsNeededInOriginalFunction(IsNeededInOriginalFunction) {}
bool IsNeededInExtractedFunction;
bool IsNeededInOriginalFunction;
};
} // end namespace tooling
} // end namespace clang
#endif // LLVM_CLANG_LIB_TOOLING_REFACTORING_EXTRACT_SOURCE_EXTRACTION_H

View File

@@ -20,10 +20,10 @@ void simpleExtractStmtNoCaptures() {
// CHECK: 1 'astatement' results:
// CHECK: static void extracted() {
// CHECK-NEXT: int a = 1;
// CHECK-NEXT: int b = 2;;{{$}}
// CHECK-NEXT: int b = 2;{{$}}
// CHECK-NEXT: }{{[[:space:]].*}}
// CHECK-NEXT: void simpleExtractStmtNoCaptures() {
// CHECK-NEXT: /*range astatement=->+1:13*/extracted(){{$}}
// CHECK-NEXT: /*range astatement=->+1:13*/extracted();{{$}}
// CHECK-NEXT: }

View File

@@ -0,0 +1,192 @@
// RUN: clang-refactor extract -selection=test:%s %s -- -std=c++11 2>&1 | grep -v CHECK | FileCheck %s
struct Rectangle { int width, height; };
void extractStatement(const Rectangle &r) {
/*range adeclstmt=->+0:59*/int area = r.width * r.height;
}
// CHECK: 1 'adeclstmt' results:
// CHECK: static void extracted() {
// CHECK-NEXT: int area = r.width * r.height;{{$}}
// CHECK-NEXT: }{{[[:space:]].*}}
// CHECK-NEXT: void extractStatement(const Rectangle &r) {
// CHECK-NEXT: /*range adeclstmt=->+0:59*/extracted();{{$}}
// CHECK-NEXT: }
void extractStatementNoSemiIf(const Rectangle &r) {
/*range bextractif=->+2:4*/if (r.width) {
int x = r.height;
}
}
// CHECK: 1 'bextractif' results:
// CHECK: static void extracted() {
// CHECK-NEXT: if (r.width) {
// CHECK-NEXT: int x = r.height;
// CHECK-NEXT: }{{$}}
// CHECK-NEXT: }{{[[:space:]].*}}
// CHECK-NEXT: void extractStatementNoSemiIf(const Rectangle &r) {
// CHECK-NEXT: /*range bextractif=->+2:4*/extracted();{{$}}
// CHECK-NEXT: }
void extractStatementDontExtraneousSemi(const Rectangle &r) {
/*range cextractif=->+2:4*/if (r.width) {
int x = r.height;
} ;
} //^ This semicolon shouldn't be extracted.
// CHECK: 1 'cextractif' results:
// CHECK: static void extracted() {
// CHECK-NEXT: if (r.width) {
// CHECK-NEXT: int x = r.height;
// CHECK-NEXT: }{{$}}
// CHECK-NEXT: }{{[[:space:]].*}}
// CHECK-NEXT: void extractStatementDontExtraneousSemi(const Rectangle &r) {
// CHECK-NEXT: extracted(); ;{{$}}
// CHECK-NEXT: }
void extractStatementNotSemiSwitch() {
/*range dextract=->+5:4*/switch (2) {
case 1:
break;
case 2:
break;
}
}
// CHECK: 1 'dextract' results:
// CHECK: static void extracted() {
// CHECK-NEXT: switch (2) {
// CHECK-NEXT: case 1:
// CHECK-NEXT: break;
// CHECK-NEXT: case 2:
// CHECK-NEXT: break;
// CHECK-NEXT: }{{$}}
// CHECK-NEXT: }{{[[:space:]].*}}
// CHECK-NEXT: void extractStatementNotSemiSwitch() {
// CHECK-NEXT: extracted();{{$}}
// CHECK-NEXT: }
void extractStatementNotSemiWhile() {
/*range eextract=->+2:4*/while (true) {
int x = 0;
}
}
// CHECK: 1 'eextract' results:
// CHECK: static void extracted() {
// CHECK-NEXT: while (true) {
// CHECK-NEXT: int x = 0;
// CHECK-NEXT: }{{$}}
// CHECK-NEXT: }{{[[:space:]].*}}
// CHECK-NEXT: void extractStatementNotSemiWhile() {
// CHECK-NEXT: extracted();{{$}}
// CHECK-NEXT: }
void extractStatementNotSemiFor() {
/*range fextract=->+1:4*/for (int i = 0; i < 10; ++i) {
}
}
// CHECK: 1 'fextract' results:
// CHECK: static void extracted() {
// CHECK-NEXT: for (int i = 0; i < 10; ++i) {
// CHECK-NEXT: }{{$}}
// CHECK-NEXT: }{{[[:space:]].*}}
// CHECK-NEXT: void extractStatementNotSemiFor() {
// CHECK-NEXT: extracted();{{$}}
// CHECK-NEXT: }
struct XS {
int *begin() { return 0; }
int *end() { return 0; }
};
void extractStatementNotSemiRangedFor(XS xs) {
/*range gextract=->+1:4*/for (int i : xs) {
}
}
// CHECK: 1 'gextract' results:
// CHECK: static void extracted() {
// CHECK-NEXT: for (int i : xs) {
// CHECK-NEXT: }{{$}}
// CHECK-NEXT: }{{[[:space:]].*}}
// CHECK-NEXT: void extractStatementNotSemiRangedFor(XS xs) {
// CHECK-NEXT: extracted();{{$}}
// CHECK-NEXT: }
void extractStatementNotSemiRangedTryCatch() {
/*range hextract=->+3:4*/try { int x = 0; }
catch (const int &i) {
int y = i;
}
}
// CHECK: 1 'hextract' results:
// CHECK: static void extracted() {
// CHECK-NEXT: try { int x = 0; }
// CHECK-NEXT: catch (const int &i) {
// CHECK-NEXT: int y = i;
// CHECK-NEXT: }{{$}}
// CHECK-NEXT: }{{[[:space:]].*}}
// CHECK-NEXT: void extractStatementNotSemiRangedTryCatch() {
// CHECK-NEXT: extracted();{{$}}
// CHECK-NEXT: }
void extractCantFindSemicolon() {
/*range iextract=->+1:17*/do {
} while (true)
// Add a semicolon in both the extracted and original function as we don't
// want to extract the semicolon below.
;
}
// CHECK: 1 'iextract' results:
// CHECK: static void extracted() {
// CHECK-NEXT: do {
// CHECK-NEXT: } while (true);{{$}}
// CHECK-NEXT: }{{[[:space:]].*}}
// CHECK-NEXT: void extractCantFindSemicolon() {
// CHECK-NEXT: extracted();{{$}}
// CHECK-NEXT: //
// CHECK-NEXT: //
// CHECK-NEXT: ;
// CHECK-NEXT: }
void extractFindSemicolon() {
/*range jextract=->+1:17*/do {
} while (true) /*grab*/ ;
}
// CHECK: 1 'jextract' results:
// CHECK: static void extracted() {
// CHECK-NEXT: do {
// CHECK-NEXT: } while (true) /*grab*/ ;{{$}}
// CHECK-NEXT: }{{[[:space:]].*}}
// CHECK-NEXT: void extractFindSemicolon() {
// CHECK-NEXT: extracted();{{$}}
// CHECK-NEXT: }
void call();
void careForNonCompoundSemicolons1() {
/*range kextract=->+1:11*/if (true)
call();
}
// CHECK: 1 'kextract' results:
// CHECK: static void extracted() {
// CHECK-NEXT: if (true)
// CHECK-NEXT: call();{{$}}
// CHECK-NEXT: }{{[[:space:]].*}}
// CHECK-NEXT: void careForNonCompoundSemicolons1() {
// CHECK-NEXT: extracted();{{$}}
// CHECK-NEXT: }
void careForNonCompoundSemicolons2() {
/*range lextract=->+3:1*/for (int i = 0; i < 10; ++i)
while (i != 0)
;
// end right here111!
}
// CHECK: 1 'lextract' results:
// CHECK: static void extracted() {
// CHECK-NEXT: for (int i = 0; i < 10; ++i)
// CHECK-NEXT: while (i != 0)
// CHECK-NEXT: ;{{$}}
// CHECK-NEXT: }{{[[:space:]].*}}
// CHECK-NEXT: void careForNonCompoundSemicolons2() {
// CHECK-NEXT: extracted();{{$}}
// CHECK-NEXT: //
// CHECK-NEXT: }

View File

@@ -0,0 +1,56 @@
// RUN: clang-refactor extract -selection=test:%s %s -- 2>&1 | grep -v CHECK | FileCheck %s
@interface NSArray
+ (id)arrayWithObjects:(const id [])objects count:(unsigned long)cnt;
@end
void extractStatementNoSemiObjCFor(NSArray *array) {
/*range astmt=->+2:4*/for (id i in array) {
int x = 0;
}
}
// CHECK: 1 'astmt' results:
// CHECK: static void extracted() {
// CHECK-NEXT: for (id i in array) {
// CHECK-NEXT: int x = 0;
// CHECK-NEXT: }{{$}}
// CHECK-NEXT: }{{[[:space:]].*}}
void extractStatementNoSemiSync() {
id lock;
/*range bstmt=->+2:4*/@synchronized(lock) {
int x = 0;
}
}
// CHECK: 1 'bstmt' results:
// CHECK: static void extracted() {
// CHECK-NEXT: @synchronized(lock) {
// CHECK-NEXT: int x = 0;
// CHECK-NEXT: }{{$}}
// CHECK-NEXT: }{{[[:space:]].*}}
void extractStatementNoSemiAutorel() {
/*range cstmt=->+2:4*/@autoreleasepool {
int x = 0;
}
}
// CHECK: 1 'cstmt' results:
// CHECK: static void extracted() {
// CHECK-NEXT: @autoreleasepool {
// CHECK-NEXT: int x = 0;
// CHECK-NEXT: }{{$}}
// CHECK-NEXT: }{{[[:space:]].*}}
void extractStatementNoSemiTryFinalllllly() {
/*range dstmt=->+3:4*/@try {
int x = 0;
} @finally {
}
}
// CHECK: 1 'dstmt' results:
// CHECK: static void extracted() {
// CHECK-NEXT: @try {
// CHECK-NEXT: int x = 0;
// CHECK-NEXT: } @finally {
// CHECK-NEXT: }{{$}}
// CHECK-NEXT: }{{[[:space:]].*}}