bug fix: handle alternation involving a regexp
ending in \C* (or .* in Latin-1 mode) correctly

Bug only affected calls to Match, not FullMatch or PartialMatch.

R=rsc
CC=re2-dev
http://codereview.appspot.com/1690054
diff --git a/re2/dfa.cc b/re2/dfa.cc
index 3bea979..3f5e4b0 100644
--- a/re2/dfa.cc
+++ b/re2/dfa.cc
@@ -586,6 +586,7 @@
   int n = 0;
   uint needflags = 0;     // flags needed by kInstEmptyWidth instructions
   bool sawmatch = false;  // whether queue contains guaranteed kInstMatch
+  bool sawmark = false;  // whether queue contains a Mark
   if (DebugDFA)
     fprintf(stderr, "WorkqToCachedState %s [%#x]", DumpWorkq(q).c_str(), flag);
   for (Workq::iterator it = q->begin(); it != q->end(); ++it) {
@@ -593,17 +594,26 @@
     if (sawmatch && (kind_ == Prog::kFirstMatch || q->is_mark(id)))
       break;
     if (q->is_mark(id)) {
-      if (n > 0 && inst[n-1] != Mark)
+      if (n > 0 && inst[n-1] != Mark) {
+        sawmark = true;
         inst[n++] = Mark;
+      }
       continue;
     }
     Prog::Inst* ip = prog_->inst(id);
     switch (ip->opcode()) {
       case kInstAltMatch:
+        // This state will continue to a match no matter what
+        // the rest of the input is.  If it is the highest priority match
+        // being considered, return the special FullMatchState
+        // to indicate that it's all matches from here out.
         if (kind_ != Prog::kManyMatch &&
             (kind_ != Prog::kFirstMatch ||
-             (it == q->begin() && ip->greedy(prog_)))) {
+             (it == q->begin() && ip->greedy(prog_))) &&
+            (kind_ != Prog::kLongestMatch || !sawmark)) {
           delete[] inst;
+          if (DebugDFA)
+            fprintf(stderr, " -> FullMatchState\n");
           return FullMatchState;
         }
         // Fall through.
@@ -663,6 +673,8 @@
   // if the state is *not* a matching state.
   if (n == 0 && flag == 0) {
     delete[] inst;
+    if (DebugDFA)
+      fprintf(stderr, " -> DeadState\n");
     return DeadState;
   }
 
diff --git a/re2/testing/dfa_test.cc b/re2/testing/dfa_test.cc
index acb338a..acd6432 100644
--- a/re2/testing/dfa_test.cc
+++ b/re2/testing/dfa_test.cc
@@ -100,7 +100,10 @@
   //LOG(INFO) << s;
   Regexp* re = Regexp::Parse(s.c_str(), Regexp::LikePerl, NULL);
   CHECK(re);
-  for (int i = 17; i < 28; i++) {
+  int max = 28;
+  if (DEBUG_MODE)
+    max = 24;
+  for (int i = 17; i < max; i++) {
     int limit = 1<<i;
     int usage, progusage, dfamem;
     {
diff --git a/re2/testing/random_test.cc b/re2/testing/random_test.cc
index 6d6b5eb..91d2b32 100644
--- a/re2/testing/random_test.cc
+++ b/re2/testing/random_test.cc
@@ -23,6 +23,14 @@
                        const vector<string>& ops,
                        int maxstrlen, const vector<string>& stralphabet,
                        const string& wrapper) {
+  // Limit to smaller test cases in debug mode,
+  // because everything is so much slower.
+  if (DEBUG_MODE) {
+    maxatoms--;
+    maxops--;
+    maxstrlen /= 2;
+  }
+
   ExhaustiveTester t(maxatoms, maxops, alphabet, ops,
                      maxstrlen, stralphabet, wrapper, "");
   t.RandomStrings(FLAGS_stringseed, FLAGS_stringcount);
@@ -35,14 +43,14 @@
 // Tests random small regexps involving literals and egrep operators.
 TEST(Random, SmallEgrepLiterals) {
   RandomTest(5, 5, Explode("abc."), RegexpGenerator::EgrepOps(),
-             20, Explode("abc"),
+             15, Explode("abc"),
              "");
 }
 
 // Tests random bigger regexps involving literals and egrep operators.
 TEST(Random, BigEgrepLiterals) {
   RandomTest(10, 10, Explode("abc."), RegexpGenerator::EgrepOps(),
-             20, Explode("abc"),
+             15, Explode("abc"),
              "");
 }
 
@@ -50,7 +58,7 @@
 // and egrep operators.
 TEST(Random, SmallEgrepCaptures) {
   RandomTest(5, 5, Split(" ", "a (b) ."), RegexpGenerator::EgrepOps(),
-             20, Explode("abc"),
+             15, Explode("abc"),
              "");
 }
 
@@ -58,7 +66,7 @@
 // and egrep operators.
 TEST(Random, BigEgrepCaptures) {
   RandomTest(10, 10, Split(" ", "a (b) ."), RegexpGenerator::EgrepOps(),
-             20, Explode("abc"),
+             15, Explode("abc"),
              "");
 }
 
diff --git a/re2/testing/search_test.cc b/re2/testing/search_test.cc
index 419d7ba..588e5d5 100644
--- a/re2/testing/search_test.cc
+++ b/re2/testing/search_test.cc
@@ -264,6 +264,8 @@
   { "[^\\\\]+", "Aa\\" },
   { "[acegikmoqsuwy]+", "acegikmoqsuwyACEGIKMOQSUWY" },
 
+  // Former bugs.
+  { "a\\C*|ba\\C", "baba" },
 };
 
 TEST(Regexp, SearchTests) {