start implementing a token rewriter. At this point, it just reads in a file

and lets a client iterate over it. 

llvm-svn: 57407
This commit is contained in:
Chris Lattner
2008-10-12 05:44:03 +00:00
parent 1782da2f84
commit b6aa53b7de
5 changed files with 131 additions and 1 deletions

View File

@@ -21,6 +21,12 @@ void clang::DoRewriteTest(Preprocessor &PP, const std::string &InFileName,
SourceManager &SM = PP.getSourceManager();
const LangOptions &LangOpts = PP.getLangOptions();
TokenRewriter Rewriter(SM.getMainFileID(), SM, LangOpts);
std::pair<const char*,const char*> File =SM.getBufferData(SM.getMainFileID());
// Create a lexer to lex all the tokens of the main file in raw mode. Even
@@ -37,5 +43,7 @@ void clang::DoRewriteTest(Preprocessor &PP, const std::string &InFileName,
RawLex.LexFromRawLexer(RawTok);
}
for (TokenRewriter::token_iterator I = Rewriter.token_begin(),
E = Rewriter.token_end(); I != E; ++I)
std::cout << PP.getSpelling(*I);
}

View File

@@ -123,6 +123,7 @@
DE4772FC0C10EAEC002239E8 /* CGExpr.cpp in Sources */ = {isa = PBXBuildFile; fileRef = DE4772FB0C10EAEC002239E8 /* CGExpr.cpp */; };
DE47999C0D2EBE1A00706D2D /* SemaExprObjC.cpp in Sources */ = {isa = PBXBuildFile; fileRef = DE47999B0D2EBE1A00706D2D /* SemaExprObjC.cpp */; };
DE4DC79E0EA1C09E00069E5A /* RewriteTest.cpp in Sources */ = {isa = PBXBuildFile; fileRef = DE4DC79D0EA1C09E00069E5A /* RewriteTest.cpp */; };
DE4DC7A30EA1C33E00069E5A /* TokenRewriter.cpp in Sources */ = {isa = PBXBuildFile; fileRef = DE4DC7A20EA1C33E00069E5A /* TokenRewriter.cpp */; };
DE5932D10AD60FF400BC794C /* clang.cpp in Sources */ = {isa = PBXBuildFile; fileRef = DE5932CD0AD60FF400BC794C /* clang.cpp */; };
DE5932D20AD60FF400BC794C /* clang.h in CopyFiles */ = {isa = PBXBuildFile; fileRef = DE5932CE0AD60FF400BC794C /* clang.h */; };
DE5932D30AD60FF400BC794C /* PrintParserCallbacks.cpp in Sources */ = {isa = PBXBuildFile; fileRef = DE5932CF0AD60FF400BC794C /* PrintParserCallbacks.cpp */; };
@@ -446,6 +447,7 @@
DE47999B0D2EBE1A00706D2D /* SemaExprObjC.cpp */ = {isa = PBXFileReference; fileEncoding = 4; indentWidth = 2; lastKnownFileType = sourcecode.cpp.cpp; name = SemaExprObjC.cpp; path = lib/Sema/SemaExprObjC.cpp; sourceTree = "<group>"; tabWidth = 2; };
DE4DC7980EA1BE4400069E5A /* TokenRewriter.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = TokenRewriter.h; path = clang/Rewrite/TokenRewriter.h; sourceTree = "<group>"; };
DE4DC79D0EA1C09E00069E5A /* RewriteTest.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = RewriteTest.cpp; path = Driver/RewriteTest.cpp; sourceTree = "<group>"; };
DE4DC7A20EA1C33E00069E5A /* TokenRewriter.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = TokenRewriter.cpp; path = lib/Rewrite/TokenRewriter.cpp; sourceTree = "<group>"; };
DE53370B0CE2D96F00D9A028 /* RewriteRope.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = RewriteRope.h; path = clang/Rewrite/RewriteRope.h; sourceTree = "<group>"; };
DE5932CD0AD60FF400BC794C /* clang.cpp */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.cpp.cpp; name = clang.cpp; path = Driver/clang.cpp; sourceTree = "<group>"; };
DE5932CE0AD60FF400BC794C /* clang.h */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.c.h; name = clang.h; path = Driver/clang.h; sourceTree = "<group>"; };
@@ -1073,6 +1075,7 @@
72D16C1E0D9975C400E6DA4A /* HTMLRewrite.cpp */,
DEF7D9F80C9C8B1D0001F598 /* Rewriter.cpp */,
DECAB0CF0DB3C84200E13CCB /* RewriteRope.cpp */,
DE4DC7A20EA1C33E00069E5A /* TokenRewriter.cpp */,
);
name = Rewrite;
sourceTree = "<group>";
@@ -1259,6 +1262,7 @@
3551068C0E9A8546006A4E44 /* ParsePragma.cpp in Sources */,
3551068D0E9A8546006A4E44 /* ParseTentative.cpp in Sources */,
DE4DC79E0EA1C09E00069E5A /* RewriteTest.cpp in Sources */,
DE4DC7A30EA1C33E00069E5A /* TokenRewriter.cpp in Sources */,
);
runOnlyForDeploymentPostprocessing = 0;
};

View File

@@ -155,6 +155,11 @@ public:
/// directly.
unsigned getRawEncoding() const { return ID; }
bool operator<(const SourceLocation &RHS) const {
return ID < RHS.ID;
}
/// getFromRawEncoding - Turn a raw encoding of a SourceLocation object into
/// a real SourceLocation.
static SourceLocation getFromRawEncoding(unsigned Encoding) {

View File

@@ -0,0 +1,60 @@
//===--- TokenRewriter.h - Token-based Rewriter -----------------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file defines the TokenRewriter class, which is used for code
// transformations.
//
//===----------------------------------------------------------------------===//
#ifndef LLVM_CLANG_TOKENREWRITER_H
#define LLVM_CLANG_TOKENREWRITER_H
#include "clang/Basic/SourceLocation.h"
#include <list>
#include <map>
namespace clang {
class Token;
class LangOptions;
class TokenRewriter {
/// TokenList - This is the list of raw tokens that make up this file. Each
/// of these tokens has a unique SourceLocation, which is a FileID.
std::list<Token> TokenList;
/// TokenRefTy - This is the type used to refer to a token in the TokenList.
typedef std::list<Token>::iterator TokenRefTy;
/// TokenAtLoc - This map indicates which token exists at a specific
/// SourceLocation. Since each token has a unique SourceLocation, this is a
/// one to one map. The token can return its own location directly, to map
/// backwards.
std::map<SourceLocation, TokenRefTy> TokenAtLoc;
public:
/// TokenRewriter - This creates a TokenRewriter for the file with the
/// specified FileID.
TokenRewriter(unsigned FileID, SourceManager &SM, const LangOptions &LO);
typedef std::list<Token>::const_iterator token_iterator;
token_iterator token_begin() const { return TokenList.begin(); }
token_iterator token_end() const { return TokenList.end(); }
private:
/// AddToken - Add the specified token into the Rewriter before the other
/// position.
void AddToken(const Token &T, TokenRefTy Where);
};
} // end namespace clang
#endif

View File

@@ -0,0 +1,53 @@
//===--- TokenRewriter.cpp - Token-based code rewriting interface ---------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file implements the TokenRewriter class, which is used for code
// transformations.
//
//===----------------------------------------------------------------------===//
#include "clang/Rewrite/TokenRewriter.h"
#include "clang/Lex/Lexer.h"
#include "clang/Basic/SourceManager.h"
using namespace clang;
TokenRewriter::TokenRewriter(unsigned FileID, SourceManager &SM,
const LangOptions &LangOpts) {
std::pair<const char*,const char*> File = SM.getBufferData(FileID);
// Create a lexer to lex all the tokens of the main file in raw mode.
Lexer RawLex(SourceLocation::getFileLoc(FileID, 0),
LangOpts, File.first, File.second);
// Return all comments and whitespace as tokens.
RawLex.SetKeepWhitespaceMode(true);
// Lex the file, populating our datastructures.
Token RawTok;
RawLex.LexFromRawLexer(RawTok);
while (RawTok.isNot(tok::eof)) {
AddToken(RawTok, TokenList.end());
RawLex.LexFromRawLexer(RawTok);
}
}
/// AddToken - Add the specified token into the Rewriter before the other
/// position.
void TokenRewriter::AddToken(const Token &T, TokenRefTy Where) {
Where = TokenList.insert(Where, T);
bool InsertSuccess = TokenAtLoc.insert(std::make_pair(T.getLocation(),
Where)).second;
assert(InsertSuccess && "Token location already in rewriter!");
InsertSuccess = InsertSuccess;
}