diff --git a/clang/AST/SemaExpr.cpp b/clang/AST/SemaExpr.cpp
index e56a680f26f4..715cb414b260 100644
--- a/clang/AST/SemaExpr.cpp
+++ b/clang/AST/SemaExpr.cpp
@@ -22,21 +22,9 @@
 #include "clang/Basic/LangOptions.h"
 #include "clang/Basic/TargetInfo.h"
 #include "llvm/ADT/SmallString.h"
-#include "llvm/ADT/StringExtras.h"
 using namespace llvm;
 using namespace clang;
 
-#include <iostream>
-
-/// HexDigitValue - Return the value of the specified hex digit, or -1 if it's
-/// not valid.
-static int HexDigitValue(char C) {
-  if (C >= '0' && C <= '9') return C-'0';
-  if (C >= 'a' && C <= 'f') return C-'a'+10;
-  if (C >= 'A' && C <= 'F') return C-'A'+10;
-  return -1;
-}
-
 /// ParseStringLiteral - The specified tokens were lexed as pasted string
 /// fragments (e.g. "foo" "bar" L"baz").  The result string has to handle string
 /// concatenation ([C99 5.1.1.2, translation phase #6]), so it may come from
@@ -47,198 +35,18 @@ Action::ExprResult
 Sema::ParseStringLiteral(const LexerToken *StringToks, unsigned NumStringToks) {
   assert(NumStringToks && "Must have at least one string!");
 
-  // Scan all of the string portions, remember the max individual token length,
-  // computing a bound on the concatenated string length, and see whether any
-  // piece is a wide-string.  If any of the string portions is a wide-string
-  // literal, the result is a wide-string literal [C99 6.4.5p4].
-  unsigned MaxTokenLength = StringToks[0].getLength();
-  unsigned SizeBound = StringToks[0].getLength()-2;  // -2 for "".
-  bool AnyWide = StringToks[0].getKind() == tok::wide_string_literal;
-  
-  // The common case is that there is only one string fragment.
-  for (unsigned i = 1; i != NumStringToks; ++i) {
-    // The string could be shorter than this if it needs cleaning, but this is a
-    // reasonable bound, which is all we need.
-    SizeBound += StringToks[i].getLength()-2;  // -2 for "".
+  StringLiteralParser Literal(StringToks, NumStringToks, PP, Context.Target);
+  if (Literal.hadError)
+    return ExprResult(true);
 
-    // Remember maximum string piece length.
-    if (StringToks[i].getLength() > MaxTokenLength) 
-      MaxTokenLength = StringToks[i].getLength();
-    
-    // Remember if we see any wide strings.
-    AnyWide |= StringToks[i].getKind() == tok::wide_string_literal;
-  }
-  
-  
-  // Include space for the null terminator.
-  ++SizeBound;
-  
-  // TODO: K&R warning: "traditional C rejects string constant concatenation"
-  
-  // Get the width in bytes of wchar_t.  If no wchar_t strings are used, do not
-  // query the target.  As such, wchar_tByteWidth is only valid if AnyWide=true.
-  unsigned wchar_tByteWidth = ~0U;
-  if (AnyWide)
-    wchar_tByteWidth =Context.Target.getWCharWidth(StringToks[0].getLocation());
-  
-  // The output buffer size needs to be large enough to hold wide characters.
-  // This is a worst-case assumption which basically corresponds to L"" "long".
-  if (AnyWide)
-    SizeBound *= wchar_tByteWidth;
-  
-  // Create a temporary buffer to hold the result string data.
-  SmallString<512> ResultBuf;
-  ResultBuf.resize(SizeBound);
-  
-  // Likewise, but for each string piece.
-  SmallString<512> TokenBuf;
-  TokenBuf.resize(MaxTokenLength);
-  
-  // Loop over all the strings, getting their spelling, and expanding them to
-  // wide strings as appropriate.
-  char *ResultPtr = &ResultBuf[0];   // Next byte to fill in.
-  
-  for (unsigned i = 0, e = NumStringToks; i != e; ++i) {
-    const char *ThisTokBuf = &TokenBuf[0];
-    // Get the spelling of the token, which eliminates trigraphs, etc.  We know
-    // that ThisTokBuf points to a buffer that is big enough for the whole token
-    // and 'spelled' tokens can only shrink.
-    unsigned ThisTokLen = PP.getSpelling(StringToks[i], ThisTokBuf);
-    const char *ThisTokEnd = ThisTokBuf+ThisTokLen-1;  // Skip end quote.
-    
-    // TODO: Input character set mapping support.
-    
-    // Skip L marker for wide strings.
-    if (ThisTokBuf[0] == 'L') ++ThisTokBuf;
-    
-    assert(ThisTokBuf[0] == '"' && "Expected quote, lexer broken?");
-    ++ThisTokBuf;
-    
-    while (ThisTokBuf != ThisTokEnd) {
-      // Is this a span of non-escape characters?
-      if (ThisTokBuf[0] != '\\') {
-        const char *InStart = ThisTokBuf;
-        do {
-          ++ThisTokBuf;
-        } while (ThisTokBuf != ThisTokEnd && ThisTokBuf[0] != '\\');
-        
-        // Copy the character span over.
-        unsigned Len = ThisTokBuf-InStart;
-        if (!AnyWide) {
-          memcpy(ResultPtr, InStart, Len);
-          ResultPtr += Len;
-        } else {
-          // Note: our internal rep of wide char tokens is always little-endian.
-          for (; Len; --Len, ++InStart) {
-            *ResultPtr++ = InStart[0];
-            // Add zeros at the end.
-            for (unsigned i = 1, e = wchar_tByteWidth; i != e; ++i)
-              *ResultPtr++ = 0;
-          }
-        }
-        continue;
-      }
-      
-      // Otherwise, this is an escape character.  Skip the '\' char.
-      ++ThisTokBuf;
-      
-      // We know that this character can't be off the end of the buffer, because
-      // that would have been \", which would not have been the end of string.
-      unsigned ResultChar = *ThisTokBuf++;
-      switch (ResultChar) {
-      // These map to themselves.
-      case '\\': case '\'': case '"': case '?': break;
-        
-      // These have fixed mappings.
-      case 'a':
-        // TODO: K&R: the meaning of '\\a' is different in traditional C
-        ResultChar = 7;
-        break;
-      case 'b':
-        ResultChar = 8;
-        break;
-      case 'e':
-        Diag(StringToks[i].getLocation(), diag::ext_nonstandard_escape, "e");
-        ResultChar = 27;
-        break;
-      case 'f':
-        ResultChar = 12;
-        break;
-      case 'n':
-        ResultChar = 10;
-        break;
-      case 'r':
-        ResultChar = 13;
-        break;
-      case 't':
-        ResultChar = 9;
-        break;
-      case 'v':
-        ResultChar = 11;
-        break;
-        
-      //case 'u': case 'U':  // FIXME: UCNs.
-      case 'x': // Hex escape.
-        if (ThisTokBuf == ThisTokEnd ||
-            (ResultChar = HexDigitValue(*ThisTokBuf)) == ~0U) {
-          Diag(StringToks[i].getLocation(), diag::err_hex_escape_no_digits);
-          ResultChar = 0;
-          break;
-        }
-        ++ThisTokBuf; // Consumed one hex digit.
-        
-        assert(0 && "hex escape: unimp!");
-        break;
-      case '0': case '1': case '2': case '3':
-      case '4': case '5': case '6': case '7':
-        // Octal escapes.
-        assert(0 && "octal escape: unimp!");
-        break;
-        
-      // Otherwise, these are not valid escapes.
-      case '(': case '{': case '[': case '%':
-        // GCC accepts these as extensions.  We warn about them as such though.
-        if (!PP.getLangOptions().NoExtensions) {
-          Diag(StringToks[i].getLocation(), diag::ext_nonstandard_escape,
-               std::string()+(char)ResultChar);
-          break;
-        }
-        // FALL THROUGH.
-      default:
-        if (isgraph(ThisTokBuf[0])) {
-          Diag(StringToks[i].getLocation(), diag::ext_unknown_escape,
-               std::string()+(char)ResultChar);
-        } else {
-          Diag(StringToks[i].getLocation(), diag::ext_unknown_escape,
-               "x"+utohexstr(ResultChar));
-        }
-      }
-
-      // Note: our internal rep of wide char tokens is always little-endian.
-      *ResultPtr++ = ResultChar & 0xFF;
-      
-      if (AnyWide) {
-        for (unsigned i = 1, e = wchar_tByteWidth; i != e; ++i)
-          *ResultPtr++ = ResultChar >> i*8;
-      }
-    }
-  }
-  
-  // Add zero terminator.
-  *ResultPtr = 0;
-  if (AnyWide) {
-    for (unsigned i = 1, e = wchar_tByteWidth; i != e; ++i)
-      *ResultPtr++ = 0;
-  }
-  
   SmallVector<SourceLocation, 4> StringTokLocs;
   for (unsigned i = 0; i != NumStringToks; ++i)
     StringTokLocs.push_back(StringToks[i].getLocation());
-  
+    
   // FIXME: use factory.
-  
   // Pass &StringTokLocs[0], StringTokLocs.size() to factory!
-  return new StringLiteral(&ResultBuf[0], ResultPtr-&ResultBuf[0], AnyWide);
+  return new StringLiteral(Literal.GetString(), Literal.GetStringLength(), 
+                           Literal.AnyWide);
 }
 
 
diff --git a/clang/Lex/LiteralSupport.cpp b/clang/Lex/LiteralSupport.cpp
index 1fea9440cd58..62d370af2d54 100644
--- a/clang/Lex/LiteralSupport.cpp
+++ b/clang/Lex/LiteralSupport.cpp
@@ -15,6 +15,7 @@
 #include "clang/Lex/Preprocessor.h"
 #include "clang/Basic/TargetInfo.h"
 #include "clang/Basic/Diagnostic.h"
+#include "llvm/ADT/StringExtras.h"
 
 using namespace llvm;
 using namespace clang;
@@ -273,3 +274,233 @@ void NumericLiteralParser::Diag(SourceLocation Loc, unsigned DiagID,
   PP.Diag(Loc, DiagID, M);
   hadError = true;
 }
+
+///       string-literal: [C99 6.4.5]
+///          " [s-char-sequence] "
+///         L" [s-char-sequence] "
+///       s-char-sequence:
+///         s-char
+///         s-char-sequence s-char
+///       s-char:
+///         any source character except the double quote ",
+///           backslash \, or newline character
+///         escape-character
+///         universal-character-name
+///       escape-character: [C99 6.4.4.4]
+///         \ escape-code
+///         universal-character-name
+///       escape-code:
+///         character-escape-code
+///         octal-escape-code
+///         hex-escape-code
+///       character-escape-code: one of
+///         n t b r f v a
+///         \ ' " ?
+///       octal-escape-code:
+///         octal-digit
+///         octal-digit octal-digit
+///         octal-digit octal-digit octal-digit
+///       hex-escape-code:
+///         x hex-digit
+///         hex-escape-code hex-digit
+///       universal-character-name:
+///         \u hex-quad
+///         \U hex-quad hex-quad
+///       hex-quad:
+///         hex-digit hex-digit hex-digit hex-digit
+
+StringLiteralParser::
+StringLiteralParser(const LexerToken *StringToks, unsigned NumStringToks,
+                    Preprocessor &pp, TargetInfo &t) : 
+  PP(pp), Target(t) 
+{
+  // Scan all of the string portions, remember the max individual token length,
+  // computing a bound on the concatenated string length, and see whether any
+  // piece is a wide-string.  If any of the string portions is a wide-string
+  // literal, the result is a wide-string literal [C99 6.4.5p4].
+  MaxTokenLength = StringToks[0].getLength();
+  SizeBound = StringToks[0].getLength()-2;  // -2 for "".
+  AnyWide = StringToks[0].getKind() == tok::wide_string_literal;
+  
+  // The common case is that there is only one string fragment.
+  for (unsigned i = 1; i != NumStringToks; ++i) {
+    // The string could be shorter than this if it needs cleaning, but this is a
+    // reasonable bound, which is all we need.
+    SizeBound += StringToks[i].getLength()-2;  // -2 for "".
+    
+    // Remember maximum string piece length.
+    if (StringToks[i].getLength() > MaxTokenLength) 
+      MaxTokenLength = StringToks[i].getLength();
+    
+    // Remember if we see any wide strings.
+    AnyWide |= StringToks[i].getKind() == tok::wide_string_literal;
+  }
+  
+  
+  // Include space for the null terminator.
+  ++SizeBound;
+  
+  // TODO: K&R warning: "traditional C rejects string constant concatenation"
+  
+  // Get the width in bytes of wchar_t.  If no wchar_t strings are used, do not
+  // query the target.  As such, wchar_tByteWidth is only valid if AnyWide=true.
+  wchar_tByteWidth = ~0U;
+  if (AnyWide)
+    wchar_tByteWidth = Target.getWCharWidth(StringToks[0].getLocation());
+  
+  // The output buffer size needs to be large enough to hold wide characters.
+  // This is a worst-case assumption which basically corresponds to L"" "long".
+  if (AnyWide)
+    SizeBound *= wchar_tByteWidth;
+  
+  // Size the temporary buffer to hold the result string data.
+  ResultBuf.resize(SizeBound);
+  
+  // Likewise, but for each string piece.
+  SmallString<512> TokenBuf;
+  TokenBuf.resize(MaxTokenLength);
+  
+  // Loop over all the strings, getting their spelling, and expanding them to
+  // wide strings as appropriate.
+  ResultPtr = &ResultBuf[0];   // Next byte to fill in.
+  
+  for (unsigned i = 0, e = NumStringToks; i != e; ++i) {
+    const char *ThisTokBuf = &TokenBuf[0];
+    // Get the spelling of the token, which eliminates trigraphs, etc.  We know
+    // that ThisTokBuf points to a buffer that is big enough for the whole token
+    // and 'spelled' tokens can only shrink.
+    unsigned ThisTokLen = PP.getSpelling(StringToks[i], ThisTokBuf);
+    const char *ThisTokEnd = ThisTokBuf+ThisTokLen-1;  // Skip end quote.
+    
+    // TODO: Input character set mapping support.
+    
+    // Skip L marker for wide strings.
+    if (ThisTokBuf[0] == 'L') ++ThisTokBuf;
+    
+    assert(ThisTokBuf[0] == '"' && "Expected quote, lexer broken?");
+    ++ThisTokBuf;
+    
+    while (ThisTokBuf != ThisTokEnd) {
+      // Is this a span of non-escape characters?
+      if (ThisTokBuf[0] != '\\') {
+        const char *InStart = ThisTokBuf;
+        do {
+          ++ThisTokBuf;
+        } while (ThisTokBuf != ThisTokEnd && ThisTokBuf[0] != '\\');
+        
+        // Copy the character span over.
+        unsigned Len = ThisTokBuf-InStart;
+        if (!AnyWide) {
+          memcpy(ResultPtr, InStart, Len);
+          ResultPtr += Len;
+        } else {
+          // Note: our internal rep of wide char tokens is always little-endian.
+          for (; Len; --Len, ++InStart) {
+            *ResultPtr++ = InStart[0];
+            // Add zeros at the end.
+            for (unsigned i = 1, e = wchar_tByteWidth; i != e; ++i)
+            *ResultPtr++ = 0;
+          }
+        }
+        continue;
+      }
+      
+      // Otherwise, this is an escape character.  Skip the '\' char.
+      ++ThisTokBuf;
+      
+      // We know that this character can't be off the end of the buffer, because
+      // that would have been \", which would not have been the end of string.
+      unsigned ResultChar = *ThisTokBuf++;
+      switch (ResultChar) {
+        // These map to themselves.
+      case '\\': case '\'': case '"': case '?': break;
+        
+        // These have fixed mappings.
+      case 'a':
+        // TODO: K&R: the meaning of '\\a' is different in traditional C
+        ResultChar = 7;
+        break;
+      case 'b':
+        ResultChar = 8;
+        break;
+      case 'e':
+        Diag(StringToks[i].getLocation(), diag::ext_nonstandard_escape, "e");
+        ResultChar = 27;
+        break;
+      case 'f':
+        ResultChar = 12;
+        break;
+      case 'n':
+        ResultChar = 10;
+        break;
+      case 'r':
+        ResultChar = 13;
+        break;
+      case 't':
+        ResultChar = 9;
+        break;
+      case 'v':
+        ResultChar = 11;
+        break;
+        
+        //case 'u': case 'U':  // FIXME: UCNs.
+      case 'x': // Hex escape.
+        if (ThisTokBuf == ThisTokEnd ||
+            (ResultChar = HexDigitValue(*ThisTokBuf)) == ~0U) {
+          Diag(StringToks[i].getLocation(), diag::err_hex_escape_no_digits);
+          ResultChar = 0;
+          break;
+        }
+        ++ThisTokBuf; // Consumed one hex digit.
+        
+        assert(0 && "hex escape: unimp!");
+        break;
+      case '0': case '1': case '2': case '3':
+      case '4': case '5': case '6': case '7':
+        // Octal escapes.
+        assert(0 && "octal escape: unimp!");
+        break;
+        
+        // Otherwise, these are not valid escapes.
+      case '(': case '{': case '[': case '%':
+        // GCC accepts these as extensions.  We warn about them as such though.
+        if (!PP.getLangOptions().NoExtensions) {
+          Diag(StringToks[i].getLocation(), diag::ext_nonstandard_escape,
+               std::string()+(char)ResultChar);
+          break;
+        }
+        // FALL THROUGH.
+      default:
+        if (isgraph(ThisTokBuf[0])) {
+          Diag(StringToks[i].getLocation(), diag::ext_unknown_escape,
+               std::string()+(char)ResultChar);
+        } else {
+          Diag(StringToks[i].getLocation(), diag::ext_unknown_escape,
+               "x"+utohexstr(ResultChar));
+        }
+      }
+      
+      // Note: our internal rep of wide char tokens is always little-endian.
+      *ResultPtr++ = ResultChar & 0xFF;
+      
+      if (AnyWide) {
+        for (unsigned i = 1, e = wchar_tByteWidth; i != e; ++i)
+          *ResultPtr++ = ResultChar >> i*8;
+      }
+    }
+  }
+  
+  // Add zero terminator.
+  *ResultPtr = 0;
+  if (AnyWide) {
+    for (unsigned i = 1, e = wchar_tByteWidth; i != e; ++i)
+    *ResultPtr++ = 0;
+  }
+}
+
+void StringLiteralParser::Diag(SourceLocation Loc, unsigned DiagID, 
+                               const std::string &M) {
+  PP.Diag(Loc, DiagID, M);
+  hadError = true;
+}
+
diff --git a/clang/Sema/SemaExpr.cpp b/clang/Sema/SemaExpr.cpp
index e56a680f26f4..715cb414b260 100644
--- a/clang/Sema/SemaExpr.cpp
+++ b/clang/Sema/SemaExpr.cpp
@@ -22,21 +22,9 @@
 #include "clang/Basic/LangOptions.h"
 #include "clang/Basic/TargetInfo.h"
 #include "llvm/ADT/SmallString.h"
-#include "llvm/ADT/StringExtras.h"
 using namespace llvm;
 using namespace clang;
 
-#include <iostream>
-
-/// HexDigitValue - Return the value of the specified hex digit, or -1 if it's
-/// not valid.
-static int HexDigitValue(char C) {
-  if (C >= '0' && C <= '9') return C-'0';
-  if (C >= 'a' && C <= 'f') return C-'a'+10;
-  if (C >= 'A' && C <= 'F') return C-'A'+10;
-  return -1;
-}
-
 /// ParseStringLiteral - The specified tokens were lexed as pasted string
 /// fragments (e.g. "foo" "bar" L"baz").  The result string has to handle string
 /// concatenation ([C99 5.1.1.2, translation phase #6]), so it may come from
@@ -47,198 +35,18 @@ Action::ExprResult
 Sema::ParseStringLiteral(const LexerToken *StringToks, unsigned NumStringToks) {
   assert(NumStringToks && "Must have at least one string!");
 
-  // Scan all of the string portions, remember the max individual token length,
-  // computing a bound on the concatenated string length, and see whether any
-  // piece is a wide-string.  If any of the string portions is a wide-string
-  // literal, the result is a wide-string literal [C99 6.4.5p4].
-  unsigned MaxTokenLength = StringToks[0].getLength();
-  unsigned SizeBound = StringToks[0].getLength()-2;  // -2 for "".
-  bool AnyWide = StringToks[0].getKind() == tok::wide_string_literal;
-  
-  // The common case is that there is only one string fragment.
-  for (unsigned i = 1; i != NumStringToks; ++i) {
-    // The string could be shorter than this if it needs cleaning, but this is a
-    // reasonable bound, which is all we need.
-    SizeBound += StringToks[i].getLength()-2;  // -2 for "".
+  StringLiteralParser Literal(StringToks, NumStringToks, PP, Context.Target);
+  if (Literal.hadError)
+    return ExprResult(true);
 
-    // Remember maximum string piece length.
-    if (StringToks[i].getLength() > MaxTokenLength) 
-      MaxTokenLength = StringToks[i].getLength();
-    
-    // Remember if we see any wide strings.
-    AnyWide |= StringToks[i].getKind() == tok::wide_string_literal;
-  }
-  
-  
-  // Include space for the null terminator.
-  ++SizeBound;
-  
-  // TODO: K&R warning: "traditional C rejects string constant concatenation"
-  
-  // Get the width in bytes of wchar_t.  If no wchar_t strings are used, do not
-  // query the target.  As such, wchar_tByteWidth is only valid if AnyWide=true.
-  unsigned wchar_tByteWidth = ~0U;
-  if (AnyWide)
-    wchar_tByteWidth =Context.Target.getWCharWidth(StringToks[0].getLocation());
-  
-  // The output buffer size needs to be large enough to hold wide characters.
-  // This is a worst-case assumption which basically corresponds to L"" "long".
-  if (AnyWide)
-    SizeBound *= wchar_tByteWidth;
-  
-  // Create a temporary buffer to hold the result string data.
-  SmallString<512> ResultBuf;
-  ResultBuf.resize(SizeBound);
-  
-  // Likewise, but for each string piece.
-  SmallString<512> TokenBuf;
-  TokenBuf.resize(MaxTokenLength);
-  
-  // Loop over all the strings, getting their spelling, and expanding them to
-  // wide strings as appropriate.
-  char *ResultPtr = &ResultBuf[0];   // Next byte to fill in.
-  
-  for (unsigned i = 0, e = NumStringToks; i != e; ++i) {
-    const char *ThisTokBuf = &TokenBuf[0];
-    // Get the spelling of the token, which eliminates trigraphs, etc.  We know
-    // that ThisTokBuf points to a buffer that is big enough for the whole token
-    // and 'spelled' tokens can only shrink.
-    unsigned ThisTokLen = PP.getSpelling(StringToks[i], ThisTokBuf);
-    const char *ThisTokEnd = ThisTokBuf+ThisTokLen-1;  // Skip end quote.
-    
-    // TODO: Input character set mapping support.
-    
-    // Skip L marker for wide strings.
-    if (ThisTokBuf[0] == 'L') ++ThisTokBuf;
-    
-    assert(ThisTokBuf[0] == '"' && "Expected quote, lexer broken?");
-    ++ThisTokBuf;
-    
-    while (ThisTokBuf != ThisTokEnd) {
-      // Is this a span of non-escape characters?
-      if (ThisTokBuf[0] != '\\') {
-        const char *InStart = ThisTokBuf;
-        do {
-          ++ThisTokBuf;
-        } while (ThisTokBuf != ThisTokEnd && ThisTokBuf[0] != '\\');
-        
-        // Copy the character span over.
-        unsigned Len = ThisTokBuf-InStart;
-        if (!AnyWide) {
-          memcpy(ResultPtr, InStart, Len);
-          ResultPtr += Len;
-        } else {
-          // Note: our internal rep of wide char tokens is always little-endian.
-          for (; Len; --Len, ++InStart) {
-            *ResultPtr++ = InStart[0];
-            // Add zeros at the end.
-            for (unsigned i = 1, e = wchar_tByteWidth; i != e; ++i)
-              *ResultPtr++ = 0;
-          }
-        }
-        continue;
-      }
-      
-      // Otherwise, this is an escape character.  Skip the '\' char.
-      ++ThisTokBuf;
-      
-      // We know that this character can't be off the end of the buffer, because
-      // that would have been \", which would not have been the end of string.
-      unsigned ResultChar = *ThisTokBuf++;
-      switch (ResultChar) {
-      // These map to themselves.
-      case '\\': case '\'': case '"': case '?': break;
-        
-      // These have fixed mappings.
-      case 'a':
-        // TODO: K&R: the meaning of '\\a' is different in traditional C
-        ResultChar = 7;
-        break;
-      case 'b':
-        ResultChar = 8;
-        break;
-      case 'e':
-        Diag(StringToks[i].getLocation(), diag::ext_nonstandard_escape, "e");
-        ResultChar = 27;
-        break;
-      case 'f':
-        ResultChar = 12;
-        break;
-      case 'n':
-        ResultChar = 10;
-        break;
-      case 'r':
-        ResultChar = 13;
-        break;
-      case 't':
-        ResultChar = 9;
-        break;
-      case 'v':
-        ResultChar = 11;
-        break;
-        
-      //case 'u': case 'U':  // FIXME: UCNs.
-      case 'x': // Hex escape.
-        if (ThisTokBuf == ThisTokEnd ||
-            (ResultChar = HexDigitValue(*ThisTokBuf)) == ~0U) {
-          Diag(StringToks[i].getLocation(), diag::err_hex_escape_no_digits);
-          ResultChar = 0;
-          break;
-        }
-        ++ThisTokBuf; // Consumed one hex digit.
-        
-        assert(0 && "hex escape: unimp!");
-        break;
-      case '0': case '1': case '2': case '3':
-      case '4': case '5': case '6': case '7':
-        // Octal escapes.
-        assert(0 && "octal escape: unimp!");
-        break;
-        
-      // Otherwise, these are not valid escapes.
-      case '(': case '{': case '[': case '%':
-        // GCC accepts these as extensions.  We warn about them as such though.
-        if (!PP.getLangOptions().NoExtensions) {
-          Diag(StringToks[i].getLocation(), diag::ext_nonstandard_escape,
-               std::string()+(char)ResultChar);
-          break;
-        }
-        // FALL THROUGH.
-      default:
-        if (isgraph(ThisTokBuf[0])) {
-          Diag(StringToks[i].getLocation(), diag::ext_unknown_escape,
-               std::string()+(char)ResultChar);
-        } else {
-          Diag(StringToks[i].getLocation(), diag::ext_unknown_escape,
-               "x"+utohexstr(ResultChar));
-        }
-      }
-
-      // Note: our internal rep of wide char tokens is always little-endian.
-      *ResultPtr++ = ResultChar & 0xFF;
-      
-      if (AnyWide) {
-        for (unsigned i = 1, e = wchar_tByteWidth; i != e; ++i)
-          *ResultPtr++ = ResultChar >> i*8;
-      }
-    }
-  }
-  
-  // Add zero terminator.
-  *ResultPtr = 0;
-  if (AnyWide) {
-    for (unsigned i = 1, e = wchar_tByteWidth; i != e; ++i)
-      *ResultPtr++ = 0;
-  }
-  
   SmallVector<SourceLocation, 4> StringTokLocs;
   for (unsigned i = 0; i != NumStringToks; ++i)
     StringTokLocs.push_back(StringToks[i].getLocation());
-  
+    
   // FIXME: use factory.
-  
   // Pass &StringTokLocs[0], StringTokLocs.size() to factory!
-  return new StringLiteral(&ResultBuf[0], ResultPtr-&ResultBuf[0], AnyWide);
+  return new StringLiteral(Literal.GetString(), Literal.GetStringLength(), 
+                           Literal.AnyWide);
 }
 
 
diff --git a/clang/include/clang/Lex/LiteralSupport.h b/clang/include/clang/Lex/LiteralSupport.h
index c89f2fc78489..03f4dfa9cf85 100644
--- a/clang/include/clang/Lex/LiteralSupport.h
+++ b/clang/include/clang/Lex/LiteralSupport.h
@@ -15,12 +15,14 @@
 #define LLVM_CLANG_LITERALSUPPORT_H
 
 #include <string>
+#include "llvm/ADT/SmallString.h"
 
 namespace llvm {
 namespace clang {
 
 class Diagnostic;
 class Preprocessor;
+class LexerToken;
 class SourceLocation;
 class TargetInfo;
     
@@ -91,6 +93,37 @@ private:
     return ptr;
   }
 };
+
+class StringLiteralParser {
+  Preprocessor &PP;
+  TargetInfo &Target;
+  
+  unsigned MaxTokenLength;
+  unsigned SizeBound;
+  unsigned wchar_tByteWidth;
+  SmallString<512> ResultBuf;
+  char *ResultPtr; // cursor
+public:
+  StringLiteralParser(const LexerToken *StringToks, unsigned NumStringToks,
+                      Preprocessor &PP, TargetInfo &T);
+  bool hadError;
+  bool AnyWide;
+  
+  const char *GetString() { return &ResultBuf[0]; }
+  unsigned GetStringLength() { return ResultPtr-&ResultBuf[0]; }
+private:
+  void Diag(SourceLocation Loc, unsigned DiagID, 
+            const std::string &M = std::string());
+
+  /// HexDigitValue - Return the value of the specified hex digit, or -1 if it's
+  /// not valid.
+  static int HexDigitValue(char C) {
+    if (C >= '0' && C <= '9') return C-'0';
+    if (C >= 'a' && C <= 'f') return C-'a'+10;
+    if (C >= 'A' && C <= 'F') return C-'A'+10;
+    return -1;
+  }
+};
   
 }  // end namespace clang
 }  // end namespace llvm