re2/fuzzing/re2_fuzzer.cc - platform/external/regex-re2 - Git at Google

 // Copyright 2016 The RE2 Authors.  All Rights Reserved.
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.

 #include <stddef.h>
 #include <stdint.h>
 #include <map>
 #include <string>

 #include "re2/re2.h"

 using re2::StringPiece;
 using std::string;

 // NOT static, NOT signed.
 uint8_t dummy = 0;

 void Test(StringPiece pattern, const RE2::Options& options, StringPiece text) {
   RE2 re(pattern, options);
   if (!re.ok())
     return;

   // Don't waste time fuzzing high-size programs.
   // (They can cause bug reports due to fuzzer timeouts.)
   int size = re.ProgramSize();
   if (size > 9999)
     return;
   int rsize = re.ReverseProgramSize();
   if (rsize > 9999)
     return;

   // Don't waste time fuzzing high-fanout programs.
   // (They can also cause bug reports due to fuzzer timeouts.)
   std::map<int, int> histogram;
   int fanout = re.ProgramFanout(&histogram);
   if (fanout > 9)
     return;
   int rfanout = re.ReverseProgramFanout(&histogram);
   if (rfanout > 9)
     return;

   StringPiece sp1, sp2, sp3, sp4;
   string s1, s2, s3, s4;
   int i1, i2, i3, i4;
   double d1, d2, d3, d4;

   RE2::FullMatch(text, re, &sp1, &sp2, &sp3, &sp4);
   RE2::PartialMatch(text, re, &s1, &s2, &s3, &s4);

   sp1 = sp2 = text;
   RE2::Consume(&sp1, re, &i1, &i2, &i3, &i4);
   RE2::FindAndConsume(&sp2, re, &d1, &d2, &d3, &d4);

   s3 = s4 = string(text);
   RE2::Replace(&s3, re, "");
   RE2::GlobalReplace(&s4, re, "");

   // Exercise some other API functionality.
   dummy += re.NumberOfCapturingGroups();
   dummy += RE2::QuoteMeta(pattern).size();
 }

 // Entry point for libFuzzer.
 extern "C" int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size) {
   if (size == 0 || size > 999)
     return 0;

   // Crudely limit the use of ., \p and \P.
   // Otherwise, we will waste time on inputs that have long runs of Unicode
   // character classes. The fuzzer has shown itself to be easily capable of
   // generating such patterns that fall within the other limits, but result
   // in timeouts nonetheless. The marginal cost is high - even more so when
   // counted repetition is involved - whereas the marginal benefit is zero.
   int dot = 0;
   int backslash_p = 0;
   for (size_t i = 0; i < size; i++) {
     if (data[i] == '.')
       dot++;
     if (data[i] != '\\')
       continue;
     i++;
     if (i >= size)
       break;
     if (data[i] == 'p' || data[i] == 'P')
       backslash_p++;
   }
   if (dot > 99)
     return 0;
   if (backslash_p > 1)
     return 0;

   // The one-at-a-time hash by Bob Jenkins.
   uint32_t hash = 0;
   for (size_t i = 0; i < size; i++) {
     hash += data[i];
     hash += (hash << 10);
     hash ^= (hash >> 6);
   }
   hash += (hash << 3);
   hash ^= (hash >> 11);
   hash += (hash << 15);

   RE2::Options options;
   options.set_log_errors(false);
   options.set_max_mem(64 << 20);
   options.set_encoding(hash & 1 ? RE2::Options::EncodingLatin1
                                 : RE2::Options::EncodingUTF8);
   options.set_posix_syntax(hash & 2);
   options.set_longest_match(hash & 4);
   options.set_literal(hash & 8);
   options.set_never_nl(hash & 16);
   options.set_dot_nl(hash & 32);
   options.set_never_capture(hash & 64);
   options.set_case_sensitive(hash & 128);
   options.set_perl_classes(hash & 256);
   options.set_word_boundary(hash & 512);
   options.set_one_line(hash & 1024);

   const char* ptr = reinterpret_cast<const char*>(data);
   int len = static_cast<int>(size);

   StringPiece pattern(ptr, len);
   StringPiece text(ptr, len);
   Test(pattern, options, text);

   return 0;
 }
	// Copyright 2016 The RE2 Authors. All Rights Reserved.
	// Use of this source code is governed by a BSD-style
	// license that can be found in the LICENSE file.

	#include <stddef.h>
	#include <stdint.h>
	#include <map>
	#include <string>

	#include "re2/re2.h"

	using re2::StringPiece;
	using std::string;

	// NOT static, NOT signed.
	uint8_t dummy = 0;

	void Test(StringPiece pattern, const RE2::Options& options, StringPiece text) {
	RE2 re(pattern, options);
	if (!re.ok())
	return;

	// Don't waste time fuzzing high-size programs.
	// (They can cause bug reports due to fuzzer timeouts.)
	int size = re.ProgramSize();
	if (size > 9999)
	return;
	int rsize = re.ReverseProgramSize();
	if (rsize > 9999)
	return;

	// Don't waste time fuzzing high-fanout programs.
	// (They can also cause bug reports due to fuzzer timeouts.)
	std::map<int, int> histogram;
	int fanout = re.ProgramFanout(&histogram);
	if (fanout > 9)
	return;
	int rfanout = re.ReverseProgramFanout(&histogram);
	if (rfanout > 9)
	return;

	StringPiece sp1, sp2, sp3, sp4;
	string s1, s2, s3, s4;
	int i1, i2, i3, i4;
	double d1, d2, d3, d4;

	RE2::FullMatch(text, re, &sp1, &sp2, &sp3, &sp4);
	RE2::PartialMatch(text, re, &s1, &s2, &s3, &s4);

	sp1 = sp2 = text;
	RE2::Consume(&sp1, re, &i1, &i2, &i3, &i4);
	RE2::FindAndConsume(&sp2, re, &d1, &d2, &d3, &d4);

	s3 = s4 = string(text);
	RE2::Replace(&s3, re, "");
	RE2::GlobalReplace(&s4, re, "");

	// Exercise some other API functionality.
	dummy += re.NumberOfCapturingGroups();
	dummy += RE2::QuoteMeta(pattern).size();
	}

	// Entry point for libFuzzer.
	extern "C" int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size) {
	if (size == 0 \|\| size > 999)
	return 0;

	// Crudely limit the use of ., \p and \P.
	// Otherwise, we will waste time on inputs that have long runs of Unicode
	// character classes. The fuzzer has shown itself to be easily capable of
	// generating such patterns that fall within the other limits, but result
	// in timeouts nonetheless. The marginal cost is high - even more so when
	// counted repetition is involved - whereas the marginal benefit is zero.
	int dot = 0;
	int backslash_p = 0;
	for (size_t i = 0; i < size; i++) {
	if (data[i] == '.')
	dot++;
	if (data[i] != '\\')
	continue;
	i++;
	if (i >= size)
	break;
	if (data[i] == 'p' \|\| data[i] == 'P')
	backslash_p++;
	}
	if (dot > 99)
	return 0;
	if (backslash_p > 1)
	return 0;

	// The one-at-a-time hash by Bob Jenkins.
	uint32_t hash = 0;
	for (size_t i = 0; i < size; i++) {
	hash += data[i];
	hash += (hash << 10);
	hash ^= (hash >> 6);
	}
	hash += (hash << 3);
	hash ^= (hash >> 11);
	hash += (hash << 15);

	RE2::Options options;
	options.set_log_errors(false);
	options.set_max_mem(64 << 20);
	options.set_encoding(hash & 1 ? RE2::Options::EncodingLatin1
	: RE2::Options::EncodingUTF8);
	options.set_posix_syntax(hash & 2);
	options.set_longest_match(hash & 4);
	options.set_literal(hash & 8);
	options.set_never_nl(hash & 16);
	options.set_dot_nl(hash & 32);
	options.set_never_capture(hash & 64);
	options.set_case_sensitive(hash & 128);
	options.set_perl_classes(hash & 256);
	options.set_word_boundary(hash & 512);
	options.set_one_line(hash & 1024);

	const char* ptr = reinterpret_cast<const char*>(data);
	int len = static_cast<int>(size);

	StringPiece pattern(ptr, len);
	StringPiece text(ptr, len);
	Test(pattern, options, text);

	return 0;
	}