re2: import various changes from Google's copy

Change-Id: I63bd284d742da393713dfdd0164d2ff38fd44920
Reviewed-on: https://code-review.googlesource.com/1592
Reviewed-by: Russ Cox <[email protected]>
diff --git a/re2/dfa.cc b/re2/dfa.cc
index 5b609e6..0e567ae 100644
--- a/re2/dfa.cc
+++ b/re2/dfa.cc
@@ -2015,6 +2015,7 @@
   // Build minimum prefix.
   State* s = params.start;
   min->clear();
+  MutexLock lock(&mutex_);
   for (int i = 0; i < maxlen; i++) {
     if (previously_visited_states[s] > kMaxEltRepetitions) {
       VLOG(2) << "Hit kMaxEltRepetitions=" << kMaxEltRepetitions
@@ -2024,7 +2025,7 @@
     previously_visited_states[s]++;
 
     // Stop if min is a match.
-    State* ns = RunStateOnByteUnlocked(s, kByteEndText);
+    State* ns = RunStateOnByte(s, kByteEndText);
     if (ns == NULL)  // DFA out of memory
       return false;
     if (ns != DeadState && (ns == FullMatchState || ns->IsMatch()))
@@ -2033,7 +2034,7 @@
     // Try to extend the string with low bytes.
     bool extended = false;
     for (int j = 0; j < 256; j++) {
-      ns = RunStateOnByteUnlocked(s, j);
+      ns = RunStateOnByte(s, j);
       if (ns == NULL)  // DFA out of memory
         return false;
       if (ns == FullMatchState ||
@@ -2063,7 +2064,7 @@
     // Try to extend the string with high bytes.
     bool extended = false;
     for (int j = 255; j >= 0; j--) {
-      State* ns = RunStateOnByteUnlocked(s, j);
+      State* ns = RunStateOnByte(s, j);
       if (ns == NULL)
         return false;
       if (ns == FullMatchState ||
diff --git a/re2/filtered_re2.cc b/re2/filtered_re2.cc
index dd2fd37..d6a1a4e 100644
--- a/re2/filtered_re2.cc
+++ b/re2/filtered_re2.cc
@@ -89,12 +89,17 @@
   return !matching_regexps->empty();
 }
 
+void FilteredRE2::AllPotentials(
+    const vector<int>& atoms,
+    vector<int>* potential_regexps) const {
+  prefilter_tree_->RegexpsGivenStrings(atoms, potential_regexps);
+}
+
 void FilteredRE2::RegexpsGivenStrings(const vector<int>& matched_atoms,
                                       vector<int>* passed_regexps) {
   prefilter_tree_->RegexpsGivenStrings(matched_atoms, passed_regexps);
 }
 
-
 void FilteredRE2::PrintPrefilter(int regexpid) {
   prefilter_tree_->PrintPrefilter(regexpid);
 }
diff --git a/re2/filtered_re2.h b/re2/filtered_re2.h
index e597f44..0f161e2 100644
--- a/re2/filtered_re2.h
+++ b/re2/filtered_re2.h
@@ -67,6 +67,14 @@
                   const vector<int>& atoms,
                   vector<int>* matching_regexps) const;
 
+  // Returns the indices of all potentially matching regexps after first
+  // clearing potential_regexps.
+  // A regexp is potentially matching if it passes the filter.
+  // If a regexp passes the filter it may still not match.
+  // A regexp that does not pass the filter is guaranteed to not match.
+  void AllPotentials(const vector<int>& atoms,
+                     vector<int>* potential_regexps) const;
+
   // The number of regexps added.
   int NumRegexps() const { return re2_vec_.size(); }
 
diff --git a/re2/prefilter_tree.cc b/re2/prefilter_tree.cc
index 22bc374..70f2436 100644
--- a/re2/prefilter_tree.cc
+++ b/re2/prefilter_tree.cc
@@ -109,9 +109,11 @@
       // this trigger. TODO(vsri): Adjust the threshold appropriately,
       // make it a function of total number of nodes?
       bool have_other_guard = true;
-      for (StdIntMap::iterator it = parents->begin(); it != parents->end(); ++it)
+      for (StdIntMap::iterator it = parents->begin();
+           it != parents->end(); ++it) {
         have_other_guard = have_other_guard &&
             (entries_[it->first].propagate_up_at_count > 1);
+      }
 
       if (have_other_guard) {
         for (StdIntMap::iterator it = parents->begin();
@@ -208,7 +210,7 @@
   }
   entries_.resize(node_map_.size());
 
-  // Create parent IntMap for the entries.
+  // Create parent StdIntMap for the entries.
   for (int i = v.size()  - 1; i >= 0; i--) {
     Prefilter* prefilter = v[i];
     if (prefilter == NULL)
@@ -256,8 +258,10 @@
           uniq_child.insert(child_id);
           // To the child, we want to add to parent indices.
           Entry* child_entry = &entries_[child_id];
-          if (child_entry->parents->find(prefilter->unique_id()) == child_entry->parents->end())
+          if (child_entry->parents->find(prefilter->unique_id()) ==
+              child_entry->parents->end()) {
             (*child_entry->parents)[prefilter->unique_id()] = 1;
+          }
         }
         entry->propagate_up_at_count =
             prefilter->op() == Prefilter::AND ? uniq_child.size() : 1;
diff --git a/re2/re2.cc b/re2/re2.cc
index d67ef45..fa30b14 100644
--- a/re2/re2.cc
+++ b/re2/re2.cc
@@ -976,16 +976,23 @@
 // Largest number spec that we are willing to parse
 static const int kMaxNumberLength = 32;
 
-// REQUIRES "buf" must have length at least kMaxNumberLength+1
+// REQUIRES "buf" must have length at least nbuf.
 // Copies "str" into "buf" and null-terminates.
 // Overwrites *np with the new length.
-static const char* TerminateNumber(char* buf, const char* str, int* np) {
+static const char* TerminateNumber(char* buf, int nbuf, const char* str, int* np,
+                                   bool accept_spaces) {
   int n = *np;
   if (n <= 0) return "";
   if (n > 0 && isspace(*str)) {
     // We are less forgiving than the strtoxxx() routines and do not
-    // allow leading spaces.
-    return "";
+    // allow leading spaces. We do allow leading spaces for floats.
+    if (!accept_spaces) {
+      return "";
+    }
+    while (n > 0 && isspace(*str)) {
+      n--;
+      str++;
+    }
   }
 
   // Although buf has a fixed maximum size, we can still handle
@@ -1015,7 +1022,7 @@
     str--;
   }
 
-  if (n > kMaxNumberLength) return "";
+  if (n > nbuf-1) return "";
 
   memmove(buf, str, n);
   if (neg) {
@@ -1032,7 +1039,7 @@
                                int radix) {
   if (n == 0) return false;
   char buf[kMaxNumberLength+1];
-  str = TerminateNumber(buf, str, &n);
+  str = TerminateNumber(buf, sizeof buf, str, &n, false);
   char* end;
   errno = 0;
   long r = strtol(str, &end, radix);
@@ -1049,7 +1056,7 @@
                                 int radix) {
   if (n == 0) return false;
   char buf[kMaxNumberLength+1];
-  str = TerminateNumber(buf, str, &n);
+  str = TerminateNumber(buf, sizeof buf, str, &n, false);
   if (str[0] == '-') {
    // strtoul() will silently accept negative numbers and parse
    // them.  This module is more strict and treats them as errors.
@@ -1121,7 +1128,7 @@
                                    int radix) {
   if (n == 0) return false;
   char buf[kMaxNumberLength+1];
-  str = TerminateNumber(buf, str, &n);
+  str = TerminateNumber(buf, sizeof buf, str, &n, false);
   char* end;
   errno = 0;
   int64 r = strtoll(str, &end, radix);
@@ -1138,7 +1145,7 @@
                                     int radix) {
   if (n == 0) return false;
   char buf[kMaxNumberLength+1];
-  str = TerminateNumber(buf, str, &n);
+  str = TerminateNumber(buf, sizeof buf, str, &n, false);
   if (str[0] == '-') {
     // strtoull() will silently accept negative numbers and parse
     // them.  This module is more strict and treats them as errors.
@@ -1158,19 +1165,17 @@
 static bool parse_double_float(const char* str, int n, bool isfloat, void *dest) {
   if (n == 0) return false;
   static const int kMaxLength = 200;
-  char buf[kMaxLength];
-  if (n >= kMaxLength) return false;
-  memcpy(buf, str, n);
-  buf[n] = '\0';
-  errno = 0;
+  char buf[kMaxLength+1];
+  str = TerminateNumber(buf, sizeof buf, str, &n, true);
   char* end;
+  errno = 0;
   double r;
   if (isfloat) {
-    r = strtof(buf, &end);
+    r = strtof(str, &end);
   } else {
-    r = strtod(buf, &end);
+    r = strtod(str, &end);
   }
-  if (end != buf + n) return false;   // Leftover junk
+  if (end != str + n) return false;   // Leftover junk
   if (errno) return false;
   if (dest == NULL) return true;
   if (isfloat) {
diff --git a/re2/re2.h b/re2/re2.h
index 39fbbb3..71126db 100644
--- a/re2/re2.h
+++ b/re2/re2.h
@@ -397,6 +397,8 @@
   //
   // Returns true iff a match occurred and the extraction happened
   // successfully;  if no match occurs, the string is left unaffected.
+  //
+  // REQUIRES: "text" must not alias any part of "*out".
   static bool Extract(const StringPiece &text,
                       const RE2& pattern,
                       const StringPiece &rewrite,
diff --git a/re2/regexp.cc b/re2/regexp.cc
index c67c3b0..3667fda 100644
--- a/re2/regexp.cc
+++ b/re2/regexp.cc
@@ -43,7 +43,8 @@
       delete[] runes_;
       break;
     case kRegexpCharClass:
-      cc_->Delete();
+      if (cc_)
+        cc_->Delete();
       delete ccb_;
       break;
   }
@@ -211,6 +212,13 @@
   if (nsub == 1)
     return sub[0];
 
+  if (nsub == 0) {
+    if (op == kRegexpAlternate)
+      return new Regexp(kRegexpNoMatch, flags);
+    else
+      return new Regexp(kRegexpEmptyMatch, flags);
+  }
+
   Regexp** subcopy = NULL;
   if (op == kRegexpAlternate && can_factor) {
     // Going to edit sub; make a copy so we don't step on caller.
@@ -916,7 +924,7 @@
 
 CharClass* CharClassBuilder::GetCharClass() {
   CharClass* cc = CharClass::New(ranges_.size());
-  int n = 0;
+  size_t n = 0;
   for (iterator it = begin(); it != end(); ++it)
     cc->ranges_[n++] = *it;
   cc->nranges_ = n;
diff --git a/re2/simplify.cc b/re2/simplify.cc
index 185d6d3..9c0021e 100644
--- a/re2/simplify.cc
+++ b/re2/simplify.cc
@@ -325,7 +325,6 @@
     // General case: x{4,} is xxxx+
     Regexp* nre = new Regexp(kRegexpConcat, f);
     nre->AllocSub(min);
-    VLOG(1) << "Simplify " << min;
     Regexp** nre_subs = nre->sub();
     for (int i = 0; i < min-1; i++)
       nre_subs[i] = re->Incref();
diff --git a/re2/stringpiece.h b/re2/stringpiece.h
index ab9297c..37c73d1 100644
--- a/re2/stringpiece.h
+++ b/re2/stringpiece.h
@@ -138,6 +138,8 @@
 
   int copy(char* buf, size_type n, size_type pos = 0) const;
 
+  bool contains(StringPiece s) const;
+
   int find(const StringPiece& s, size_type pos = 0) const;
   int find(char c, size_type pos = 0) const;
   int rfind(const StringPiece& s, size_type pos = npos) const;
diff --git a/re2/testing/dfa_test.cc b/re2/testing/dfa_test.cc
index 8e95ae4..c8805c4 100644
--- a/re2/testing/dfa_test.cc
+++ b/re2/testing/dfa_test.cc
@@ -42,7 +42,7 @@
   // Check that single-threaded code works.
   {
     //LOG(INFO) << s;
-    Regexp* re = Regexp::Parse(s.c_str(), Regexp::LikePerl, NULL);
+    Regexp* re = Regexp::Parse(s, Regexp::LikePerl, NULL);
     CHECK(re);
     Prog* prog = re->CompileToProg(0);
     CHECK(prog);
@@ -57,7 +57,7 @@
 
   // Build the DFA simultaneously in a bunch of threads.
   for (int i = 0; i < FLAGS_repeat; i++) {
-    Regexp* re = Regexp::Parse(s.c_str(), Regexp::LikePerl, NULL);
+    Regexp* re = Regexp::Parse(s, Regexp::LikePerl, NULL);
     CHECK(re);
     Prog* prog = re->CompileToProg(0);
     CHECK(prog);
@@ -93,7 +93,7 @@
   s += "b";
 
   //LOG(INFO) << s;
-  Regexp* re = Regexp::Parse(s.c_str(), Regexp::LikePerl, NULL);
+  Regexp* re = Regexp::Parse(s, Regexp::LikePerl, NULL);
   CHECK(re);
   int max = 24;
   for (int i = 17; i < max; i++) {
@@ -115,7 +115,7 @@
       continue;
     //LOG(INFO) << StringPrintf("Limit %d: prog used %d, DFA budget %d, total %d\n",
     //                          limit, progusage, dfamem, usage);
-    CHECK_GT(usage, limit*9/10);
+    CHECK_GT(usage, limit*8/10);
     CHECK_LT(usage, limit + (16<<10));  // 16kB of slop okay
   }
   re->Decref();
diff --git a/re2/testing/exhaustive_tester.cc b/re2/testing/exhaustive_tester.cc
index 54de857..0e90f33 100644
--- a/re2/testing/exhaustive_tester.cc
+++ b/re2/testing/exhaustive_tester.cc
@@ -148,7 +148,7 @@
                     int maxstrlen, const vector<string>& stralphabet,
                     const string& wrapper,
                     const string& topwrapper) {
-  if (DEBUG_MODE && FLAGS_quick_debug_mode) {
+  if (RE2_DEBUG_MODE && FLAGS_quick_debug_mode) {
     if (maxatoms > 1)
       maxatoms--;
     if (maxops > 1)
diff --git a/re2/testing/exhaustive_tester.h b/re2/testing/exhaustive_tester.h
index 2e72de6..1facb97 100644
--- a/re2/testing/exhaustive_tester.h
+++ b/re2/testing/exhaustive_tester.h
@@ -13,6 +13,16 @@
 
 namespace re2 {
 
+#if !defined(NDEBUG)
+// We are in a debug build.
+const bool RE2_DEBUG_MODE = true;
+#elif ADDRESS_SANITIZER || MEMORY_SANITIZER || THREAD_SANITIZER
+// Not a debug build, but still under sanitizers.
+const bool RE2_DEBUG_MODE = true;
+#else
+const bool RE2_DEBUG_MODE = false;
+#endif
+
 // Exhaustive regular expression test: generate all regexps within parameters,
 // then generate all strings of a given length over a given alphabet,
 // then check that NFA, DFA, and PCRE agree about whether each regexp matches
diff --git a/re2/testing/possible_match_test.cc b/re2/testing/possible_match_test.cc
index a18b692..4687165 100644
--- a/re2/testing/possible_match_test.cc
+++ b/re2/testing/possible_match_test.cc
@@ -7,6 +7,7 @@
 #include "re2/prog.h"
 #include "re2/re2.h"
 #include "re2/regexp.h"
+#include "re2/testing/exhaustive_tester.h"
 #include "re2/testing/regexp_generator.h"
 #include "re2/testing/string_generator.h"
 
@@ -136,26 +137,26 @@
   // are no valid UTF-8 strings beginning with byte 0xFF.
   EXPECT_FALSE(RE2("[\\s\\S]+", RE2::Latin1).
                PossibleMatchRange(&min, &max, 10))
-    << "min=" << CEscape(min) << ", max=" << CEscape(max);
+      << "min=" << CEscape(min) << ", max=" << CEscape(max);
   EXPECT_FALSE(RE2("[\\0-\xFF]+", RE2::Latin1).
                PossibleMatchRange(&min, &max, 10))
-    << "min=" << CEscape(min) << ", max=" << CEscape(max);
+      << "min=" << CEscape(min) << ", max=" << CEscape(max);
   EXPECT_FALSE(RE2(".+hello", RE2::Latin1).
                PossibleMatchRange(&min, &max, 10))
-    << "min=" << CEscape(min) << ", max=" << CEscape(max);
+      << "min=" << CEscape(min) << ", max=" << CEscape(max);
   EXPECT_FALSE(RE2(".*hello", RE2::Latin1).
                PossibleMatchRange(&min, &max, 10))
-    << "min=" << CEscape(min) << ", max=" << CEscape(max);
+      << "min=" << CEscape(min) << ", max=" << CEscape(max);
   EXPECT_FALSE(RE2(".*", RE2::Latin1).
                PossibleMatchRange(&min, &max, 10))
-    << "min=" << CEscape(min) << ", max=" << CEscape(max);
+      << "min=" << CEscape(min) << ", max=" << CEscape(max);
   EXPECT_FALSE(RE2("\\C*").
                PossibleMatchRange(&min, &max, 10))
-    << "min=" << CEscape(min) << ", max=" << CEscape(max);
+      << "min=" << CEscape(min) << ", max=" << CEscape(max);
 
   // Fails because it's a malformed regexp.
   EXPECT_FALSE(RE2("*hello").PossibleMatchRange(&min, &max, 10))
-    << "min=" << CEscape(min) << ", max=" << CEscape(max);
+      << "min=" << CEscape(min) << ", max=" << CEscape(max);
 }
 
 // Exhaustive test: generate all regexps within parameters,
@@ -224,7 +225,7 @@
   int natom = 3;
   int noperator = 3;
   int stringlen = 5;
-  if (DEBUG_MODE) {
+  if (RE2_DEBUG_MODE) {
     natom = 2;
     noperator = 3;
     stringlen = 3;
diff --git a/re2/testing/random_test.cc b/re2/testing/random_test.cc
index 91d2b32..d67ae64 100644
--- a/re2/testing/random_test.cc
+++ b/re2/testing/random_test.cc
@@ -25,7 +25,7 @@
                        const string& wrapper) {
   // Limit to smaller test cases in debug mode,
   // because everything is so much slower.
-  if (DEBUG_MODE) {
+  if (RE2_DEBUG_MODE) {
     maxatoms--;
     maxops--;
     maxstrlen /= 2;
diff --git a/re2/testing/re2_test.cc b/re2/testing/re2_test.cc
index e2f9775..0598f85 100644
--- a/re2/testing/re2_test.cc
+++ b/re2/testing/re2_test.cc
@@ -519,6 +519,34 @@
   }
 }
 
+TEST(RE2, CapturedGroupTest) {
+  RE2 re("directions from (?P<S>.*) to (?P<D>.*)");
+  int num_groups = re.NumberOfCapturingGroups();
+  EXPECT_EQ(2, num_groups);
+  string args[4];
+  RE2::Arg arg0(&args[0]);
+  RE2::Arg arg1(&args[1]);
+  RE2::Arg arg2(&args[2]);
+  RE2::Arg arg3(&args[3]);
+
+  const RE2::Arg* const matches[4] = {&arg0, &arg1, &arg2, &arg3};
+  EXPECT_TRUE(RE2::FullMatchN("directions from mountain view to san jose",
+                              re, matches, num_groups));
+  const map<string, int>& named_groups = re.NamedCapturingGroups();
+  EXPECT_TRUE(named_groups.find("S") != named_groups.end());
+  EXPECT_TRUE(named_groups.find("D") != named_groups.end());
+
+  // The named group index is 1-based.
+  int source_group_index = named_groups.find("S")->second;
+  int destination_group_index = named_groups.find("D")->second;
+  EXPECT_EQ(1, source_group_index);
+  EXPECT_EQ(2, destination_group_index);
+
+  // The args is zero-based.
+  EXPECT_EQ("mountain view", args[source_group_index - 1]);
+  EXPECT_EQ("san jose", args[destination_group_index - 1]);
+}
+
 TEST(RE2, FullMatchWithNoArgs) {
   CHECK(RE2::FullMatch("h", "h"));
   CHECK(RE2::FullMatch("hello", "hello"));
@@ -696,7 +724,7 @@
 
 TEST(RE2, FullMatchTypeTests) {
   // Type tests
-  string zeros(100, '0');
+  string zeros(1000, '0');
   {
     char c;
     CHECK(RE2::FullMatch("Hello", "(H)ello", &c));
@@ -798,12 +826,13 @@
 }
 
 TEST(RE2, FloatingPointFullMatchTypes) {
-  string zeros(100, '0');
+  string zeros(1000, '0');
   {
     float v;
     CHECK(RE2::FullMatch("100",   "(.*)", &v));  CHECK_EQ(v, 100);
     CHECK(RE2::FullMatch("-100.", "(.*)", &v));  CHECK_EQ(v, -100);
     CHECK(RE2::FullMatch("1e23",  "(.*)", &v));  CHECK_EQ(v, float(1e23));
+    CHECK(RE2::FullMatch(" 100",  "(.*)", &v));  CHECK_EQ(v, 100);
 
     CHECK(RE2::FullMatch(zeros + "1e23",  "(.*)", &v));
     CHECK_EQ(v, float(1e23));
diff --git a/re2/testing/required_prefix_test.cc b/re2/testing/required_prefix_test.cc
index 1f0b216..aed41f7 100644
--- a/re2/testing/required_prefix_test.cc
+++ b/re2/testing/required_prefix_test.cc
@@ -28,7 +28,7 @@
 
   // Otherwise, it should work.
   { "^abc$", true, "abc", false, "(?-m:$)" },
-  { "^abc", "true", "abc", false, "" },
+  { "^abc", true, "abc", false, "" },
   { "^(?i)abc", true, "abc", true, "" },
   { "^abcd*", true, "abc", false, "d*" },
   { "^[Aa][Bb]cd*", true, "ab", true, "cd*" },
diff --git a/re2/testing/tester.cc b/re2/testing/tester.cc
index 63e9cc4..4146513 100644
--- a/re2/testing/tester.cc
+++ b/re2/testing/tester.cc
@@ -246,6 +246,7 @@
   // 2. It treats $ as this weird thing meaning end of string
   //    or before the \n at the end of the string.
   // 3. It doesn't implement POSIX leftmost-longest matching.
+  // 4. It lets \s match vertical tab.
   // MimicsPCRE() detects 1 and 2.
   if ((Engines() & (1<<kEnginePCRE)) && regexp_->MimicsPCRE() &&
       kind_ != Prog::kLongestMatch) {
@@ -343,7 +344,8 @@
                                Prog::kAnchored, Prog::kLongestMatch,
                                result->submatch,
                                &result->skipped, NULL)) {
-          LOG(ERROR) << "Reverse DFA inconsistency: " << CEscape(regexp_str_)
+          LOG(ERROR) << "Reverse DFA inconsistency: "
+                     << CEscape(regexp_str_)
                      << " on " << CEscape(text);
           result->matched = false;
         }
@@ -405,6 +407,14 @@
         break;
       }
 
+      // PCRE 8.34 or so started allowing vertical tab to match \s,
+      // following a change made in Perl 5.18. RE2 does not.
+      if ((regexp_str_.contains("\\s") || regexp_str_.contains("\\S")) &&
+          text.contains("\v")) {
+        result->skipped = true;
+        break;
+      }
+
       const PCRE::Arg **argptr = new const PCRE::Arg*[nsubmatch];
       PCRE::Arg *a = new PCRE::Arg[nsubmatch];
       for (int i = 0; i < nsubmatch; i++) {
diff --git a/util/atomicops.h b/util/atomicops.h
index dd951f2..f2a0867 100644
--- a/util/atomicops.h
+++ b/util/atomicops.h
@@ -11,8 +11,11 @@
 // ACQUIRE - prevents memory accesses from hoisting above the operation.
 // RELEASE - prevents memory accesses from sinking below the operation.
 
-#if (__clang_major__ * 100 + __clang_minor__ >= 303) || \
-	(__GNUC__ * 1000 + __GNUC_MINOR__ * 100 + __GNUC_PATCHLEVEL__ >= 40801)
+#ifndef __has_builtin
+#define __has_builtin(x) 0
+#endif
+
+#if !defined(OS_NACL) && (__has_builtin(__atomic_load_n) || (__GNUC__*10000 + __GNUC_MINOR__*100 + __GNUC_PATCHLEVEL__ >= 40801))
 
 #define ATOMIC_LOAD_RELAXED(x, p) do { (x) = __atomic_load_n((p), __ATOMIC_RELAXED); } while (0)
 #define ATOMIC_LOAD_CONSUME(x, p) do { (x) = __atomic_load_n((p), __ATOMIC_CONSUME); } while (0)
@@ -47,7 +50,7 @@
   __asm__ __volatile__("sfence" : : : "memory");
 }
 
-#elif defined(__ppc__)
+#elif defined(__ppc__) || defined(__powerpc64__)
 
 static inline void WriteMemoryBarrier() {
   __asm__ __volatile__("eieio" : : : "memory");
@@ -65,6 +68,34 @@
   __asm__ __volatile__("dmb st" : : : "memory");
 }
 
+#elif defined(__arm__) && defined(__linux__)
+
+// Linux on ARM puts a suitable memory barrier at a magic address for us to call.
+static inline void WriteMemoryBarrier() {
+  ((void(*)(void))0xffff0fa0)();
+}
+
+#elif defined(__windows__)
+
+// Windows
+inline void WriteMemoryBarrier() {
+  LONG x;
+  ::InterlockedExchange(&x, 0);
+}
+
+#elif defined(OS_NACL)
+
+// Native Client
+inline void WriteMemoryBarrier() {
+  __sync_synchronize();
+}
+
+#elif defined(__mips__)
+
+inline void WriteMemoryBarrier() {
+  __asm__ __volatile__("sync" : : : "memory");
+}
+
 #else
 
 #include "util/mutex.h"
@@ -122,6 +153,12 @@
   __asm__ __volatile__("mb" : : : "memory");
 }
 
+#elif defined(__mips__)
+
+inline void ReadMemoryBarrier() {
+  __asm__ __volatile__("sync" : : : "memory");
+}
+
 #else
 
 static inline void ReadMemoryBarrier() {}
diff --git a/util/sparse_set.h b/util/sparse_set.h
index d047975..ff592a8 100644
--- a/util/sparse_set.h
+++ b/util/sparse_set.h
@@ -51,19 +51,28 @@
 
 namespace re2 {
 
+static bool InitMemory() {
+#ifdef MEMORY_SANITIZER
+  return true;
+#else
+  return RunningOnValgrind();
+#endif
+}
+
 class SparseSet {
  public:
   SparseSet()
-    : size_(0), max_size_(0), sparse_to_dense_(NULL), dense_(NULL), valgrind_(RunningOnValgrind()) {}
+    : size_(0), max_size_(0), sparse_to_dense_(NULL), dense_(NULL),
+      init_memory_(InitMemory()) {}
 
   SparseSet(int max_size) {
     max_size_ = max_size;
     sparse_to_dense_ = new int[max_size];
     dense_ = new int[max_size];
-    valgrind_ = RunningOnValgrind();
+    init_memory_ = InitMemory();
     // Don't need to zero the memory, but do so anyway
     // to appease Valgrind.
-    if (valgrind_) {
+    if (init_memory_) {
       for (int i = 0; i < max_size; i++) {
         dense_[i] = 0xababababU;
         sparse_to_dense_[i] = 0xababababU;
@@ -95,7 +104,7 @@
       int* a = new int[new_max_size];
       if (sparse_to_dense_) {
         memmove(a, sparse_to_dense_, max_size_*sizeof a[0]);
-        if (valgrind_) {
+        if (init_memory_) {
           for (int i = max_size_; i < new_max_size; i++)
             a[i] = 0xababababU;
         }
@@ -106,7 +115,7 @@
       a = new int[new_max_size];
       if (dense_) {
         memmove(a, dense_, size_*sizeof a[0]);
-        if (valgrind_) {
+        if (init_memory_) {
           for (int i = size_; i < new_max_size; i++)
             a[i] = 0xababababU;
         }
@@ -169,7 +178,7 @@
   int max_size_;
   int* sparse_to_dense_;
   int* dense_;
-  bool valgrind_;
+  bool init_memory_;
 
   DISALLOW_COPY_AND_ASSIGN(SparseSet);
 };
diff --git a/util/stringpiece.cc b/util/stringpiece.cc
index 5859526..f9e2294 100644
--- a/util/stringpiece.cc
+++ b/util/stringpiece.cc
@@ -39,6 +39,10 @@
   return ret;
 }
 
+bool StringPiece::contains(StringPiece s) const {
+  return (size_t)find(s, 0) != npos;
+}
+
 int StringPiece::find(const StringPiece& s, size_type pos) const {
   if (length_ < 0 || pos > static_cast<size_type>(length_))
     return npos;