re2: fix bitstate for empty char classes
Fixes issue 52.
R=rsc
CC=re2-dev
https://codereview.appspot.com/50370043
diff --git a/re2/compile.cc b/re2/compile.cc
index 9cddb71..0eb38e7 100644
--- a/re2/compile.cc
+++ b/re2/compile.cc
@@ -374,6 +374,8 @@
// Given a fragment for a, returns a fragment for a? or a?? (if nongreedy)
Frag Compiler::Quest(Frag a, bool nongreedy) {
+ if (IsNoMatch(a))
+ return Nop();
int id = AllocInst(1);
if (id < 0)
return NoMatch();
@@ -446,6 +448,8 @@
// Given a fragment a, returns a fragment with capturing parens around a.
Frag Compiler::Capture(Frag a, int n) {
+ if (IsNoMatch(a))
+ return NoMatch();
int id = AllocInst(2);
if (id < 0)
return NoMatch();
diff --git a/re2/testing/re2_test.cc b/re2/testing/re2_test.cc
index 0c8896f..e2f9775 100644
--- a/re2/testing/re2_test.cc
+++ b/re2/testing/re2_test.cc
@@ -483,6 +483,21 @@
CHECK(!RE2(empties[i]).Match("abc", 0, 3, RE2::UNANCHORED, NULL, 0));
}
+// Bitstate assumes that kInstFail instructions in
+// alternations or capture groups have been "compiled away".
+TEST(EmptyCharset, BitstateAssumptions) {
+ // Captures trigger use of Bitstate.
+ static const char *nop_empties[] = {
+ "((((()))))" "[^\\S\\s]?",
+ "((((()))))" "([^\\S\\s])?",
+ "((((()))))" "([^\\S\\s]|[^\\S\\s])?",
+ "((((()))))" "(([^\\S\\s]|[^\\S\\s])|)"
+ };
+ StringPiece group[6];
+ for (int i = 0; i < arraysize(nop_empties); i++)
+ CHECK(RE2(nop_empties[i]).Match("", 0, 0, RE2::UNANCHORED, group, 6));
+}
+
// Test that named groups work correctly.
TEST(Capture, NamedGroups) {
{