Introduce the shard_cache_mutex option.
Change-Id: I27abca99e6e2047e9ab8720a8de5687b0003502c
Reviewed-on: https://code-review.googlesource.com/c/36310
Reviewed-by: Paul Wankadia <[email protected]>
diff --git a/re2/prog.cc b/re2/prog.cc
index b0ae375..1103111 100644
--- a/re2/prog.cc
+++ b/re2/prog.cc
@@ -115,6 +115,7 @@
inst_(NULL),
onepass_nodes_(NULL),
dfa_mem_(0),
+ shard_cache_mutex_(false),
dfa_first_(NULL),
dfa_longest_(NULL) {
}
diff --git a/re2/prog.h b/re2/prog.h
index 6d48135..3bfc9e9 100644
--- a/re2/prog.h
+++ b/re2/prog.h
@@ -200,6 +200,8 @@
int inst_count(InstOp op) { return inst_count_[op]; }
void set_dfa_mem(int64_t dfa_mem) { dfa_mem_ = dfa_mem; }
int64_t dfa_mem() { return dfa_mem_; }
+ void set_shard_cache_mutex(bool b) { shard_cache_mutex_ = b; }
+ bool shard_cache_mutex() { return shard_cache_mutex_; }
int flags() { return flags_; }
void set_flags(int flags) { flags_ = flags; }
bool anchor_start() { return anchor_start_; }
@@ -399,6 +401,7 @@
uint8_t* onepass_nodes_; // data for OnePass nodes
int64_t dfa_mem_; // Maximum memory for DFAs.
+ bool shard_cache_mutex_; // shard the DFA state cache mutex
DFA* dfa_first_; // DFA cached for kFirstMatch/kManyMatch
DFA* dfa_longest_; // DFA cached for kLongestMatch/kFullMatch
diff --git a/re2/re2.cc b/re2/re2.cc
index f3f96ae..82911bc 100644
--- a/re2/re2.cc
+++ b/re2/re2.cc
@@ -49,6 +49,7 @@
dot_nl_(false),
never_capture_(false),
case_sensitive_(true),
+ shard_cache_mutex_(false),
perl_classes_(false),
word_boundary_(false),
one_line_(false) {
@@ -210,7 +211,9 @@
// one third to the reverse prog, because the forward
// Prog has two DFAs but the reverse prog has one.
prog_ = suffix_regexp_->CompileToProg(options_.max_mem()*2/3);
- if (prog_ == NULL) {
+ if (prog_ != NULL) {
+ prog_->set_shard_cache_mutex(options_.shard_cache_mutex());
+ } else {
if (options_.log_errors())
LOG(ERROR) << "Error compiling '" << trunc(pattern_) << "'";
error_ = new string("pattern too large - compile failed");
@@ -231,7 +234,9 @@
std::call_once(rprog_once_, [](const RE2* re) {
re->rprog_ =
re->suffix_regexp_->CompileToReverseProg(re->options_.max_mem() / 3);
- if (re->rprog_ == NULL) {
+ if (re->rprog_ != NULL) {
+ re->rprog_->set_shard_cache_mutex(re->options_.shard_cache_mutex());
+ } else {
if (re->options_.log_errors())
LOG(ERROR) << "Error reverse compiling '" << trunc(re->pattern_) << "'";
re->error_ = new string("pattern too large - reverse compile failed");
diff --git a/re2/re2.h b/re2/re2.h
index c93de56..5ea8efd 100644
--- a/re2/re2.h
+++ b/re2/re2.h
@@ -560,6 +560,7 @@
// never_capture (false) parse all parens as non-capturing
// case_sensitive (true) match is case-sensitive (regexp can override
// with (?i) unless in posix_syntax mode)
+ // shard_cache_mutex (false) shard the DFA state cache mutex (see below)
//
// The following options are only consulted when posix_syntax == true.
// When posix_syntax == false, these features are always enabled and
@@ -596,6 +597,11 @@
//
// Once a DFA fills its budget, it flushes its cache and starts over.
// If this happens too often, RE2 falls back on the NFA implementation.
+ //
+ // The shard_cache_mutex option causes RE2 to shard the DFA state cache
+ // mutex N ways; N will be determined automatically. This increases the
+ // memory footprint somewhat, so it should not be used unless profiling
+ // shows that reader lock contention is incurring substantial overhead.
// For now, make the default budget something close to Code Search.
static const int kDefaultMaxMem = 8<<20;
@@ -616,6 +622,7 @@
dot_nl_(false),
never_capture_(false),
case_sensitive_(true),
+ shard_cache_mutex_(false),
perl_classes_(false),
word_boundary_(false),
one_line_(false) {
@@ -664,6 +671,9 @@
bool case_sensitive() const { return case_sensitive_; }
void set_case_sensitive(bool b) { case_sensitive_ = b; }
+ bool shard_cache_mutex() const { return shard_cache_mutex_; }
+ void set_shard_cache_mutex(bool b) { shard_cache_mutex_ = b; }
+
bool perl_classes() const { return perl_classes_; }
void set_perl_classes(bool b) { perl_classes_ = b; }
@@ -690,6 +700,7 @@
bool dot_nl_;
bool never_capture_;
bool case_sensitive_;
+ bool shard_cache_mutex_;
bool perl_classes_;
bool word_boundary_;
bool one_line_;
diff --git a/re2/set.cc b/re2/set.cc
index 9890de9..a624d6a 100644
--- a/re2/set.cc
+++ b/re2/set.cc
@@ -99,6 +99,8 @@
delete[] sub;
prog_ = Prog::CompileSet(re, anchor_, options_.max_mem());
+ if (prog_ != NULL)
+ prog_->set_shard_cache_mutex(options_.shard_cache_mutex());
re->Decref();
return prog_ != NULL;
}