initial release
diff --git a/re2/tostring.cc b/re2/tostring.cc
new file mode 100644
index 0000000..9792eac
--- /dev/null
+++ b/re2/tostring.cc
@@ -0,0 +1,333 @@
+// Copyright 2006 The RE2 Authors.  All Rights Reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Format a regular expression structure as a string.
+// Tested by parse_unittest.cc
+
+#include "util/util.h"
+#include "re2/regexp.h"
+#include "re2/walker-inl.h"
+
+namespace re2 {
+
+enum {
+  PrecAtom,
+  PrecUnary,
+  PrecConcat,
+  PrecAlternate,
+  PrecEmpty,
+  PrecParen,
+  PrecToplevel,
+};
+
+// Helper function.  See description below.
+static void AppendCCRange(string* t, Rune lo, Rune hi);
+
+// Walker to generate string in s_.
+// The arg pointers are actually integers giving the
+// context precedence.
+// The child_args are always NULL.
+class ToStringWalker : public Regexp::Walker<int> {
+ public:
+  explicit ToStringWalker(string* t) : t_(t) {}
+
+  virtual int PreVisit(Regexp* re, int parent_arg, bool* stop);
+  virtual int PostVisit(Regexp* re, int parent_arg, int pre_arg,
+                        int* child_args, int nchild_args);
+  virtual int ShortVisit(Regexp* re, int parent_arg) {
+    return 0;
+  }
+
+ private:
+  string* t_;  // The string the walker appends to.
+
+  DISALLOW_EVIL_CONSTRUCTORS(ToStringWalker);
+};
+
+string Regexp::ToString() {
+  string t;
+  ToStringWalker w(&t);
+  w.WalkExponential(this, PrecToplevel, 100000);
+  if (w.stopped_early())
+    t += " [truncated]";
+  return t;
+}
+
+#define ToString DontCallToString  // Avoid accidental recursion.
+
+// Visits re before children are processed.
+// Appends ( if needed and passes new precedence to children.
+int ToStringWalker::PreVisit(Regexp* re, int parent_arg, bool* stop) {
+  int prec = parent_arg;
+  int nprec = PrecAtom;
+
+  switch (re->op()) {
+    case kRegexpNoMatch:
+    case kRegexpEmptyMatch:
+    case kRegexpLiteral:
+    case kRegexpAnyChar:
+    case kRegexpAnyByte:
+    case kRegexpBeginLine:
+    case kRegexpEndLine:
+    case kRegexpBeginText:
+    case kRegexpEndText:
+    case kRegexpWordBoundary:
+    case kRegexpNoWordBoundary:
+    case kRegexpCharClass:
+      nprec = PrecAtom;
+      break;
+
+    case kRegexpConcat:
+    case kRegexpLiteralString:
+      if (prec < PrecConcat)
+        t_->append("(?:");
+      nprec = PrecConcat;
+      break;
+
+    case kRegexpAlternate:
+      if (prec < PrecAlternate)
+        t_->append("(?:");
+      nprec = PrecAlternate;
+      break;
+
+    case kRegexpCapture:
+      t_->append("(");
+      if (re->name()) {
+        t_->append("?P<");
+        t_->append(*re->name());
+        t_->append(">");
+      }
+      nprec = PrecParen;
+      break;
+
+    case kRegexpStar:
+    case kRegexpPlus:
+    case kRegexpQuest:
+    case kRegexpRepeat:
+      if (prec < PrecUnary)
+        t_->append("(?:");
+      // The subprecedence here is PrecAtom instead of PrecUnary
+      // because PCRE treats two unary ops in a row as a parse error.
+      nprec = PrecAtom;
+      break;
+  }
+
+  return nprec;
+}
+
+static void AppendLiteral(string *t, Rune r, bool foldcase) {
+  if (r != 0 && r < 0x80 && strchr("(){}[]*+?|.^$\\", r)) {
+    t->append(1, '\\');
+    t->append(1, r);
+  } else if (foldcase && 'a' <= r && r <= 'z') {
+    if ('a' <= r && r <= 'z')
+      r += 'A' - 'a';
+    t->append(1, '[');
+    t->append(1, r);
+    t->append(1, r + 'a' - 'A');
+    t->append(1, ']');
+  } else {
+    AppendCCRange(t, r, r);
+  }
+}
+
+// Visits re after children are processed.
+// For childless regexps, all the work is done here.
+// For regexps with children, append any unary suffixes or ).
+int ToStringWalker::PostVisit(Regexp* re, int parent_arg, int pre_arg,
+                              int* child_args, int nchild_args) {
+  int prec = parent_arg;
+  switch (re->op()) {
+    case kRegexpNoMatch:
+      // There's no simple symbol for "no match", but
+      // [^0-Runemax] excludes everything.
+      t_->append("[^\\x00-\\x{10ffff}]");
+      break;
+
+    case kRegexpEmptyMatch:
+      // Append (?:) to make empty string visible,
+      // unless this is already being parenthesized.
+      if (prec < PrecEmpty)
+        t_->append("(?:)");
+      break;
+
+    case kRegexpLiteral:
+      AppendLiteral(t_, re->rune(), re->parse_flags() & Regexp::FoldCase);
+      break;
+
+    case kRegexpLiteralString:
+      for (int i = 0; i < re->nrunes(); i++)
+        AppendLiteral(t_, re->runes()[i], re->parse_flags() & Regexp::FoldCase);
+      if (prec < PrecConcat)
+        t_->append(")");
+      break;
+
+    case kRegexpConcat:
+      if (prec < PrecConcat)
+        t_->append(")");
+      break;
+
+    case kRegexpAlternate:
+      // Clumsy but workable: the children all appended |
+      // at the end of their strings, so just remove the last one.
+      if ((*t_)[t_->size()-1] == '|')
+        t_->erase(t_->size()-1);
+      else
+        LOG(DFATAL) << "Bad final char: " << t_;
+      if (prec < PrecAlternate)
+        t_->append(")");
+      break;
+
+    case kRegexpStar:
+      t_->append("*");
+      if (re->parse_flags() & Regexp::NonGreedy)
+        t_->append("?");
+      if (prec < PrecUnary)
+        t_->append(")");
+      break;
+
+    case kRegexpPlus:
+      t_->append("+");
+      if (re->parse_flags() & Regexp::NonGreedy)
+        t_->append("?");
+      if (prec < PrecUnary)
+        t_->append(")");
+      break;
+
+    case kRegexpQuest:
+      t_->append("?");
+      if (re->parse_flags() & Regexp::NonGreedy)
+        t_->append("?");
+      if (prec < PrecUnary)
+        t_->append(")");
+      break;
+
+    case kRegexpRepeat:
+      if (re->max() == -1)
+        t_->append(StringPrintf("{%d,}", re->min()));
+      else if (re->min() == re->max())
+        t_->append(StringPrintf("{%d}", re->min()));
+      else
+        t_->append(StringPrintf("{%d,%d}", re->min(), re->max()));
+      if (re->parse_flags() & Regexp::NonGreedy)
+        t_->append("?");
+      if (prec < PrecUnary)
+        t_->append(")");
+      break;
+
+    case kRegexpAnyChar:
+      t_->append(".");
+      break;
+
+    case kRegexpAnyByte:
+      t_->append("\\C");
+      break;
+
+    case kRegexpBeginLine:
+      t_->append("^");
+      break;
+
+    case kRegexpEndLine:
+      t_->append("$");
+      break;
+
+    case kRegexpBeginText:
+      t_->append("(?-m:^)");
+      break;
+
+    case kRegexpEndText:
+      t_->append("(?-m:$)");
+      break;
+
+    case kRegexpWordBoundary:
+      t_->append("\\b");
+      break;
+
+    case kRegexpNoWordBoundary:
+      t_->append("\\B");
+      break;
+
+    case kRegexpCharClass: {
+      if (re->cc()->size() == 0) {
+        // Simplify is supposed to rewrite this to NoMatch.
+        LOG(DFATAL) << "Empty char class in ToString.";
+        t_->append("[^\\x00-\\x{10ffff}]");
+        break;
+      }
+      t_->append("[");
+      // Heuristic: show class as negated if it contains the
+      // non-character 0xFFFE.
+      CharClass* cc = re->cc();
+      if (cc->Contains(0xFFFE)) {
+        cc = cc->Copy();
+        cc->Negate();
+        t_->append("^");
+      }
+      for (CharClass::iterator i = cc->begin(); i != cc->end(); ++i)
+        AppendCCRange(t_, i->lo, i->hi);
+      if (cc != re->cc())
+        delete cc;
+      t_->append("]");
+      break;
+    }
+
+    case kRegexpCapture:
+      t_->append(")");
+      break;
+  }
+
+  // If the parent is an alternation, append the | for it.
+  if (prec == PrecAlternate)
+    t_->append("|");
+
+  return 0;
+}
+
+// Appends a rune for use in a character class to the string t.
+static void AppendCCChar(string* t, Rune r) {
+  if (0x20 <= r && r <= 0x7E) {
+    if (strchr("[]^-\\", r))
+      t->append("\\");
+    t->append(1, r);
+    return;
+  }
+  switch (r) {
+    default:
+      break;
+
+    case '\r':
+      t->append("\\r");
+      return;
+
+    case '\t':
+      t->append("\\t");
+      return;
+
+    case '\n':
+      t->append("\\n");
+      return;
+
+    case '\f':
+      t->append("\\f");
+      return;
+  }
+
+  if (r < 0x100) {
+    StringAppendF(t, "\\x%02x", static_cast<int>(r));
+    return;
+  }
+  StringAppendF(t, "\\x{%x}", static_cast<int>(r));
+}
+
+static void AppendCCRange(string* t, Rune lo, Rune hi) {
+  if (lo > hi)
+    return;
+  AppendCCChar(t, lo);
+  if (lo < hi) {
+    t->append("-");
+    AppendCCChar(t, hi);
+  }
+}
+
+}  // namespace re2