diff --git a/clang/AST/SemaExpr.cpp b/clang/AST/SemaExpr.cpp index e56a680f26f4..715cb414b260 100644 --- a/clang/AST/SemaExpr.cpp +++ b/clang/AST/SemaExpr.cpp @@ -22,21 +22,9 @@ #include "clang/Basic/LangOptions.h" #include "clang/Basic/TargetInfo.h" #include "llvm/ADT/SmallString.h" -#include "llvm/ADT/StringExtras.h" using namespace llvm; using namespace clang; -#include - -/// HexDigitValue - Return the value of the specified hex digit, or -1 if it's -/// not valid. -static int HexDigitValue(char C) { - if (C >= '0' && C <= '9') return C-'0'; - if (C >= 'a' && C <= 'f') return C-'a'+10; - if (C >= 'A' && C <= 'F') return C-'A'+10; - return -1; -} - /// ParseStringLiteral - The specified tokens were lexed as pasted string /// fragments (e.g. "foo" "bar" L"baz"). The result string has to handle string /// concatenation ([C99 5.1.1.2, translation phase #6]), so it may come from @@ -47,198 +35,18 @@ Action::ExprResult Sema::ParseStringLiteral(const LexerToken *StringToks, unsigned NumStringToks) { assert(NumStringToks && "Must have at least one string!"); - // Scan all of the string portions, remember the max individual token length, - // computing a bound on the concatenated string length, and see whether any - // piece is a wide-string. If any of the string portions is a wide-string - // literal, the result is a wide-string literal [C99 6.4.5p4]. - unsigned MaxTokenLength = StringToks[0].getLength(); - unsigned SizeBound = StringToks[0].getLength()-2; // -2 for "". - bool AnyWide = StringToks[0].getKind() == tok::wide_string_literal; - - // The common case is that there is only one string fragment. - for (unsigned i = 1; i != NumStringToks; ++i) { - // The string could be shorter than this if it needs cleaning, but this is a - // reasonable bound, which is all we need. - SizeBound += StringToks[i].getLength()-2; // -2 for "". + StringLiteralParser Literal(StringToks, NumStringToks, PP, Context.Target); + if (Literal.hadError) + return ExprResult(true); - // Remember maximum string piece length. - if (StringToks[i].getLength() > MaxTokenLength) - MaxTokenLength = StringToks[i].getLength(); - - // Remember if we see any wide strings. - AnyWide |= StringToks[i].getKind() == tok::wide_string_literal; - } - - - // Include space for the null terminator. - ++SizeBound; - - // TODO: K&R warning: "traditional C rejects string constant concatenation" - - // Get the width in bytes of wchar_t. If no wchar_t strings are used, do not - // query the target. As such, wchar_tByteWidth is only valid if AnyWide=true. - unsigned wchar_tByteWidth = ~0U; - if (AnyWide) - wchar_tByteWidth =Context.Target.getWCharWidth(StringToks[0].getLocation()); - - // The output buffer size needs to be large enough to hold wide characters. - // This is a worst-case assumption which basically corresponds to L"" "long". - if (AnyWide) - SizeBound *= wchar_tByteWidth; - - // Create a temporary buffer to hold the result string data. - SmallString<512> ResultBuf; - ResultBuf.resize(SizeBound); - - // Likewise, but for each string piece. - SmallString<512> TokenBuf; - TokenBuf.resize(MaxTokenLength); - - // Loop over all the strings, getting their spelling, and expanding them to - // wide strings as appropriate. - char *ResultPtr = &ResultBuf[0]; // Next byte to fill in. - - for (unsigned i = 0, e = NumStringToks; i != e; ++i) { - const char *ThisTokBuf = &TokenBuf[0]; - // Get the spelling of the token, which eliminates trigraphs, etc. We know - // that ThisTokBuf points to a buffer that is big enough for the whole token - // and 'spelled' tokens can only shrink. - unsigned ThisTokLen = PP.getSpelling(StringToks[i], ThisTokBuf); - const char *ThisTokEnd = ThisTokBuf+ThisTokLen-1; // Skip end quote. - - // TODO: Input character set mapping support. - - // Skip L marker for wide strings. - if (ThisTokBuf[0] == 'L') ++ThisTokBuf; - - assert(ThisTokBuf[0] == '"' && "Expected quote, lexer broken?"); - ++ThisTokBuf; - - while (ThisTokBuf != ThisTokEnd) { - // Is this a span of non-escape characters? - if (ThisTokBuf[0] != '\\') { - const char *InStart = ThisTokBuf; - do { - ++ThisTokBuf; - } while (ThisTokBuf != ThisTokEnd && ThisTokBuf[0] != '\\'); - - // Copy the character span over. - unsigned Len = ThisTokBuf-InStart; - if (!AnyWide) { - memcpy(ResultPtr, InStart, Len); - ResultPtr += Len; - } else { - // Note: our internal rep of wide char tokens is always little-endian. - for (; Len; --Len, ++InStart) { - *ResultPtr++ = InStart[0]; - // Add zeros at the end. - for (unsigned i = 1, e = wchar_tByteWidth; i != e; ++i) - *ResultPtr++ = 0; - } - } - continue; - } - - // Otherwise, this is an escape character. Skip the '\' char. - ++ThisTokBuf; - - // We know that this character can't be off the end of the buffer, because - // that would have been \", which would not have been the end of string. - unsigned ResultChar = *ThisTokBuf++; - switch (ResultChar) { - // These map to themselves. - case '\\': case '\'': case '"': case '?': break; - - // These have fixed mappings. - case 'a': - // TODO: K&R: the meaning of '\\a' is different in traditional C - ResultChar = 7; - break; - case 'b': - ResultChar = 8; - break; - case 'e': - Diag(StringToks[i].getLocation(), diag::ext_nonstandard_escape, "e"); - ResultChar = 27; - break; - case 'f': - ResultChar = 12; - break; - case 'n': - ResultChar = 10; - break; - case 'r': - ResultChar = 13; - break; - case 't': - ResultChar = 9; - break; - case 'v': - ResultChar = 11; - break; - - //case 'u': case 'U': // FIXME: UCNs. - case 'x': // Hex escape. - if (ThisTokBuf == ThisTokEnd || - (ResultChar = HexDigitValue(*ThisTokBuf)) == ~0U) { - Diag(StringToks[i].getLocation(), diag::err_hex_escape_no_digits); - ResultChar = 0; - break; - } - ++ThisTokBuf; // Consumed one hex digit. - - assert(0 && "hex escape: unimp!"); - break; - case '0': case '1': case '2': case '3': - case '4': case '5': case '6': case '7': - // Octal escapes. - assert(0 && "octal escape: unimp!"); - break; - - // Otherwise, these are not valid escapes. - case '(': case '{': case '[': case '%': - // GCC accepts these as extensions. We warn about them as such though. - if (!PP.getLangOptions().NoExtensions) { - Diag(StringToks[i].getLocation(), diag::ext_nonstandard_escape, - std::string()+(char)ResultChar); - break; - } - // FALL THROUGH. - default: - if (isgraph(ThisTokBuf[0])) { - Diag(StringToks[i].getLocation(), diag::ext_unknown_escape, - std::string()+(char)ResultChar); - } else { - Diag(StringToks[i].getLocation(), diag::ext_unknown_escape, - "x"+utohexstr(ResultChar)); - } - } - - // Note: our internal rep of wide char tokens is always little-endian. - *ResultPtr++ = ResultChar & 0xFF; - - if (AnyWide) { - for (unsigned i = 1, e = wchar_tByteWidth; i != e; ++i) - *ResultPtr++ = ResultChar >> i*8; - } - } - } - - // Add zero terminator. - *ResultPtr = 0; - if (AnyWide) { - for (unsigned i = 1, e = wchar_tByteWidth; i != e; ++i) - *ResultPtr++ = 0; - } - SmallVector StringTokLocs; for (unsigned i = 0; i != NumStringToks; ++i) StringTokLocs.push_back(StringToks[i].getLocation()); - + // FIXME: use factory. - // Pass &StringTokLocs[0], StringTokLocs.size() to factory! - return new StringLiteral(&ResultBuf[0], ResultPtr-&ResultBuf[0], AnyWide); + return new StringLiteral(Literal.GetString(), Literal.GetStringLength(), + Literal.AnyWide); } diff --git a/clang/Lex/LiteralSupport.cpp b/clang/Lex/LiteralSupport.cpp index 1fea9440cd58..62d370af2d54 100644 --- a/clang/Lex/LiteralSupport.cpp +++ b/clang/Lex/LiteralSupport.cpp @@ -15,6 +15,7 @@ #include "clang/Lex/Preprocessor.h" #include "clang/Basic/TargetInfo.h" #include "clang/Basic/Diagnostic.h" +#include "llvm/ADT/StringExtras.h" using namespace llvm; using namespace clang; @@ -273,3 +274,233 @@ void NumericLiteralParser::Diag(SourceLocation Loc, unsigned DiagID, PP.Diag(Loc, DiagID, M); hadError = true; } + +/// string-literal: [C99 6.4.5] +/// " [s-char-sequence] " +/// L" [s-char-sequence] " +/// s-char-sequence: +/// s-char +/// s-char-sequence s-char +/// s-char: +/// any source character except the double quote ", +/// backslash \, or newline character +/// escape-character +/// universal-character-name +/// escape-character: [C99 6.4.4.4] +/// \ escape-code +/// universal-character-name +/// escape-code: +/// character-escape-code +/// octal-escape-code +/// hex-escape-code +/// character-escape-code: one of +/// n t b r f v a +/// \ ' " ? +/// octal-escape-code: +/// octal-digit +/// octal-digit octal-digit +/// octal-digit octal-digit octal-digit +/// hex-escape-code: +/// x hex-digit +/// hex-escape-code hex-digit +/// universal-character-name: +/// \u hex-quad +/// \U hex-quad hex-quad +/// hex-quad: +/// hex-digit hex-digit hex-digit hex-digit + +StringLiteralParser:: +StringLiteralParser(const LexerToken *StringToks, unsigned NumStringToks, + Preprocessor &pp, TargetInfo &t) : + PP(pp), Target(t) +{ + // Scan all of the string portions, remember the max individual token length, + // computing a bound on the concatenated string length, and see whether any + // piece is a wide-string. If any of the string portions is a wide-string + // literal, the result is a wide-string literal [C99 6.4.5p4]. + MaxTokenLength = StringToks[0].getLength(); + SizeBound = StringToks[0].getLength()-2; // -2 for "". + AnyWide = StringToks[0].getKind() == tok::wide_string_literal; + + // The common case is that there is only one string fragment. + for (unsigned i = 1; i != NumStringToks; ++i) { + // The string could be shorter than this if it needs cleaning, but this is a + // reasonable bound, which is all we need. + SizeBound += StringToks[i].getLength()-2; // -2 for "". + + // Remember maximum string piece length. + if (StringToks[i].getLength() > MaxTokenLength) + MaxTokenLength = StringToks[i].getLength(); + + // Remember if we see any wide strings. + AnyWide |= StringToks[i].getKind() == tok::wide_string_literal; + } + + + // Include space for the null terminator. + ++SizeBound; + + // TODO: K&R warning: "traditional C rejects string constant concatenation" + + // Get the width in bytes of wchar_t. If no wchar_t strings are used, do not + // query the target. As such, wchar_tByteWidth is only valid if AnyWide=true. + wchar_tByteWidth = ~0U; + if (AnyWide) + wchar_tByteWidth = Target.getWCharWidth(StringToks[0].getLocation()); + + // The output buffer size needs to be large enough to hold wide characters. + // This is a worst-case assumption which basically corresponds to L"" "long". + if (AnyWide) + SizeBound *= wchar_tByteWidth; + + // Size the temporary buffer to hold the result string data. + ResultBuf.resize(SizeBound); + + // Likewise, but for each string piece. + SmallString<512> TokenBuf; + TokenBuf.resize(MaxTokenLength); + + // Loop over all the strings, getting their spelling, and expanding them to + // wide strings as appropriate. + ResultPtr = &ResultBuf[0]; // Next byte to fill in. + + for (unsigned i = 0, e = NumStringToks; i != e; ++i) { + const char *ThisTokBuf = &TokenBuf[0]; + // Get the spelling of the token, which eliminates trigraphs, etc. We know + // that ThisTokBuf points to a buffer that is big enough for the whole token + // and 'spelled' tokens can only shrink. + unsigned ThisTokLen = PP.getSpelling(StringToks[i], ThisTokBuf); + const char *ThisTokEnd = ThisTokBuf+ThisTokLen-1; // Skip end quote. + + // TODO: Input character set mapping support. + + // Skip L marker for wide strings. + if (ThisTokBuf[0] == 'L') ++ThisTokBuf; + + assert(ThisTokBuf[0] == '"' && "Expected quote, lexer broken?"); + ++ThisTokBuf; + + while (ThisTokBuf != ThisTokEnd) { + // Is this a span of non-escape characters? + if (ThisTokBuf[0] != '\\') { + const char *InStart = ThisTokBuf; + do { + ++ThisTokBuf; + } while (ThisTokBuf != ThisTokEnd && ThisTokBuf[0] != '\\'); + + // Copy the character span over. + unsigned Len = ThisTokBuf-InStart; + if (!AnyWide) { + memcpy(ResultPtr, InStart, Len); + ResultPtr += Len; + } else { + // Note: our internal rep of wide char tokens is always little-endian. + for (; Len; --Len, ++InStart) { + *ResultPtr++ = InStart[0]; + // Add zeros at the end. + for (unsigned i = 1, e = wchar_tByteWidth; i != e; ++i) + *ResultPtr++ = 0; + } + } + continue; + } + + // Otherwise, this is an escape character. Skip the '\' char. + ++ThisTokBuf; + + // We know that this character can't be off the end of the buffer, because + // that would have been \", which would not have been the end of string. + unsigned ResultChar = *ThisTokBuf++; + switch (ResultChar) { + // These map to themselves. + case '\\': case '\'': case '"': case '?': break; + + // These have fixed mappings. + case 'a': + // TODO: K&R: the meaning of '\\a' is different in traditional C + ResultChar = 7; + break; + case 'b': + ResultChar = 8; + break; + case 'e': + Diag(StringToks[i].getLocation(), diag::ext_nonstandard_escape, "e"); + ResultChar = 27; + break; + case 'f': + ResultChar = 12; + break; + case 'n': + ResultChar = 10; + break; + case 'r': + ResultChar = 13; + break; + case 't': + ResultChar = 9; + break; + case 'v': + ResultChar = 11; + break; + + //case 'u': case 'U': // FIXME: UCNs. + case 'x': // Hex escape. + if (ThisTokBuf == ThisTokEnd || + (ResultChar = HexDigitValue(*ThisTokBuf)) == ~0U) { + Diag(StringToks[i].getLocation(), diag::err_hex_escape_no_digits); + ResultChar = 0; + break; + } + ++ThisTokBuf; // Consumed one hex digit. + + assert(0 && "hex escape: unimp!"); + break; + case '0': case '1': case '2': case '3': + case '4': case '5': case '6': case '7': + // Octal escapes. + assert(0 && "octal escape: unimp!"); + break; + + // Otherwise, these are not valid escapes. + case '(': case '{': case '[': case '%': + // GCC accepts these as extensions. We warn about them as such though. + if (!PP.getLangOptions().NoExtensions) { + Diag(StringToks[i].getLocation(), diag::ext_nonstandard_escape, + std::string()+(char)ResultChar); + break; + } + // FALL THROUGH. + default: + if (isgraph(ThisTokBuf[0])) { + Diag(StringToks[i].getLocation(), diag::ext_unknown_escape, + std::string()+(char)ResultChar); + } else { + Diag(StringToks[i].getLocation(), diag::ext_unknown_escape, + "x"+utohexstr(ResultChar)); + } + } + + // Note: our internal rep of wide char tokens is always little-endian. + *ResultPtr++ = ResultChar & 0xFF; + + if (AnyWide) { + for (unsigned i = 1, e = wchar_tByteWidth; i != e; ++i) + *ResultPtr++ = ResultChar >> i*8; + } + } + } + + // Add zero terminator. + *ResultPtr = 0; + if (AnyWide) { + for (unsigned i = 1, e = wchar_tByteWidth; i != e; ++i) + *ResultPtr++ = 0; + } +} + +void StringLiteralParser::Diag(SourceLocation Loc, unsigned DiagID, + const std::string &M) { + PP.Diag(Loc, DiagID, M); + hadError = true; +} + diff --git a/clang/Sema/SemaExpr.cpp b/clang/Sema/SemaExpr.cpp index e56a680f26f4..715cb414b260 100644 --- a/clang/Sema/SemaExpr.cpp +++ b/clang/Sema/SemaExpr.cpp @@ -22,21 +22,9 @@ #include "clang/Basic/LangOptions.h" #include "clang/Basic/TargetInfo.h" #include "llvm/ADT/SmallString.h" -#include "llvm/ADT/StringExtras.h" using namespace llvm; using namespace clang; -#include - -/// HexDigitValue - Return the value of the specified hex digit, or -1 if it's -/// not valid. -static int HexDigitValue(char C) { - if (C >= '0' && C <= '9') return C-'0'; - if (C >= 'a' && C <= 'f') return C-'a'+10; - if (C >= 'A' && C <= 'F') return C-'A'+10; - return -1; -} - /// ParseStringLiteral - The specified tokens were lexed as pasted string /// fragments (e.g. "foo" "bar" L"baz"). The result string has to handle string /// concatenation ([C99 5.1.1.2, translation phase #6]), so it may come from @@ -47,198 +35,18 @@ Action::ExprResult Sema::ParseStringLiteral(const LexerToken *StringToks, unsigned NumStringToks) { assert(NumStringToks && "Must have at least one string!"); - // Scan all of the string portions, remember the max individual token length, - // computing a bound on the concatenated string length, and see whether any - // piece is a wide-string. If any of the string portions is a wide-string - // literal, the result is a wide-string literal [C99 6.4.5p4]. - unsigned MaxTokenLength = StringToks[0].getLength(); - unsigned SizeBound = StringToks[0].getLength()-2; // -2 for "". - bool AnyWide = StringToks[0].getKind() == tok::wide_string_literal; - - // The common case is that there is only one string fragment. - for (unsigned i = 1; i != NumStringToks; ++i) { - // The string could be shorter than this if it needs cleaning, but this is a - // reasonable bound, which is all we need. - SizeBound += StringToks[i].getLength()-2; // -2 for "". + StringLiteralParser Literal(StringToks, NumStringToks, PP, Context.Target); + if (Literal.hadError) + return ExprResult(true); - // Remember maximum string piece length. - if (StringToks[i].getLength() > MaxTokenLength) - MaxTokenLength = StringToks[i].getLength(); - - // Remember if we see any wide strings. - AnyWide |= StringToks[i].getKind() == tok::wide_string_literal; - } - - - // Include space for the null terminator. - ++SizeBound; - - // TODO: K&R warning: "traditional C rejects string constant concatenation" - - // Get the width in bytes of wchar_t. If no wchar_t strings are used, do not - // query the target. As such, wchar_tByteWidth is only valid if AnyWide=true. - unsigned wchar_tByteWidth = ~0U; - if (AnyWide) - wchar_tByteWidth =Context.Target.getWCharWidth(StringToks[0].getLocation()); - - // The output buffer size needs to be large enough to hold wide characters. - // This is a worst-case assumption which basically corresponds to L"" "long". - if (AnyWide) - SizeBound *= wchar_tByteWidth; - - // Create a temporary buffer to hold the result string data. - SmallString<512> ResultBuf; - ResultBuf.resize(SizeBound); - - // Likewise, but for each string piece. - SmallString<512> TokenBuf; - TokenBuf.resize(MaxTokenLength); - - // Loop over all the strings, getting their spelling, and expanding them to - // wide strings as appropriate. - char *ResultPtr = &ResultBuf[0]; // Next byte to fill in. - - for (unsigned i = 0, e = NumStringToks; i != e; ++i) { - const char *ThisTokBuf = &TokenBuf[0]; - // Get the spelling of the token, which eliminates trigraphs, etc. We know - // that ThisTokBuf points to a buffer that is big enough for the whole token - // and 'spelled' tokens can only shrink. - unsigned ThisTokLen = PP.getSpelling(StringToks[i], ThisTokBuf); - const char *ThisTokEnd = ThisTokBuf+ThisTokLen-1; // Skip end quote. - - // TODO: Input character set mapping support. - - // Skip L marker for wide strings. - if (ThisTokBuf[0] == 'L') ++ThisTokBuf; - - assert(ThisTokBuf[0] == '"' && "Expected quote, lexer broken?"); - ++ThisTokBuf; - - while (ThisTokBuf != ThisTokEnd) { - // Is this a span of non-escape characters? - if (ThisTokBuf[0] != '\\') { - const char *InStart = ThisTokBuf; - do { - ++ThisTokBuf; - } while (ThisTokBuf != ThisTokEnd && ThisTokBuf[0] != '\\'); - - // Copy the character span over. - unsigned Len = ThisTokBuf-InStart; - if (!AnyWide) { - memcpy(ResultPtr, InStart, Len); - ResultPtr += Len; - } else { - // Note: our internal rep of wide char tokens is always little-endian. - for (; Len; --Len, ++InStart) { - *ResultPtr++ = InStart[0]; - // Add zeros at the end. - for (unsigned i = 1, e = wchar_tByteWidth; i != e; ++i) - *ResultPtr++ = 0; - } - } - continue; - } - - // Otherwise, this is an escape character. Skip the '\' char. - ++ThisTokBuf; - - // We know that this character can't be off the end of the buffer, because - // that would have been \", which would not have been the end of string. - unsigned ResultChar = *ThisTokBuf++; - switch (ResultChar) { - // These map to themselves. - case '\\': case '\'': case '"': case '?': break; - - // These have fixed mappings. - case 'a': - // TODO: K&R: the meaning of '\\a' is different in traditional C - ResultChar = 7; - break; - case 'b': - ResultChar = 8; - break; - case 'e': - Diag(StringToks[i].getLocation(), diag::ext_nonstandard_escape, "e"); - ResultChar = 27; - break; - case 'f': - ResultChar = 12; - break; - case 'n': - ResultChar = 10; - break; - case 'r': - ResultChar = 13; - break; - case 't': - ResultChar = 9; - break; - case 'v': - ResultChar = 11; - break; - - //case 'u': case 'U': // FIXME: UCNs. - case 'x': // Hex escape. - if (ThisTokBuf == ThisTokEnd || - (ResultChar = HexDigitValue(*ThisTokBuf)) == ~0U) { - Diag(StringToks[i].getLocation(), diag::err_hex_escape_no_digits); - ResultChar = 0; - break; - } - ++ThisTokBuf; // Consumed one hex digit. - - assert(0 && "hex escape: unimp!"); - break; - case '0': case '1': case '2': case '3': - case '4': case '5': case '6': case '7': - // Octal escapes. - assert(0 && "octal escape: unimp!"); - break; - - // Otherwise, these are not valid escapes. - case '(': case '{': case '[': case '%': - // GCC accepts these as extensions. We warn about them as such though. - if (!PP.getLangOptions().NoExtensions) { - Diag(StringToks[i].getLocation(), diag::ext_nonstandard_escape, - std::string()+(char)ResultChar); - break; - } - // FALL THROUGH. - default: - if (isgraph(ThisTokBuf[0])) { - Diag(StringToks[i].getLocation(), diag::ext_unknown_escape, - std::string()+(char)ResultChar); - } else { - Diag(StringToks[i].getLocation(), diag::ext_unknown_escape, - "x"+utohexstr(ResultChar)); - } - } - - // Note: our internal rep of wide char tokens is always little-endian. - *ResultPtr++ = ResultChar & 0xFF; - - if (AnyWide) { - for (unsigned i = 1, e = wchar_tByteWidth; i != e; ++i) - *ResultPtr++ = ResultChar >> i*8; - } - } - } - - // Add zero terminator. - *ResultPtr = 0; - if (AnyWide) { - for (unsigned i = 1, e = wchar_tByteWidth; i != e; ++i) - *ResultPtr++ = 0; - } - SmallVector StringTokLocs; for (unsigned i = 0; i != NumStringToks; ++i) StringTokLocs.push_back(StringToks[i].getLocation()); - + // FIXME: use factory. - // Pass &StringTokLocs[0], StringTokLocs.size() to factory! - return new StringLiteral(&ResultBuf[0], ResultPtr-&ResultBuf[0], AnyWide); + return new StringLiteral(Literal.GetString(), Literal.GetStringLength(), + Literal.AnyWide); } diff --git a/clang/include/clang/Lex/LiteralSupport.h b/clang/include/clang/Lex/LiteralSupport.h index c89f2fc78489..03f4dfa9cf85 100644 --- a/clang/include/clang/Lex/LiteralSupport.h +++ b/clang/include/clang/Lex/LiteralSupport.h @@ -15,12 +15,14 @@ #define LLVM_CLANG_LITERALSUPPORT_H #include +#include "llvm/ADT/SmallString.h" namespace llvm { namespace clang { class Diagnostic; class Preprocessor; +class LexerToken; class SourceLocation; class TargetInfo; @@ -91,6 +93,37 @@ private: return ptr; } }; + +class StringLiteralParser { + Preprocessor &PP; + TargetInfo &Target; + + unsigned MaxTokenLength; + unsigned SizeBound; + unsigned wchar_tByteWidth; + SmallString<512> ResultBuf; + char *ResultPtr; // cursor +public: + StringLiteralParser(const LexerToken *StringToks, unsigned NumStringToks, + Preprocessor &PP, TargetInfo &T); + bool hadError; + bool AnyWide; + + const char *GetString() { return &ResultBuf[0]; } + unsigned GetStringLength() { return ResultPtr-&ResultBuf[0]; } +private: + void Diag(SourceLocation Loc, unsigned DiagID, + const std::string &M = std::string()); + + /// HexDigitValue - Return the value of the specified hex digit, or -1 if it's + /// not valid. + static int HexDigitValue(char C) { + if (C >= '0' && C <= '9') return C-'0'; + if (C >= 'a' && C <= 'f') return C-'a'+10; + if (C >= 'A' && C <= 'F') return C-'A'+10; + return -1; + } +}; } // end namespace clang } // end namespace llvm