vendor/regex-1.10.5/testdata/word-boundary.toml - toolchain/rustc - Git at Google

 # Some of these are cribbed from RE2's test suite.

 # These test \b. Below are tests for \B.
 [[test]]
 name = "wb1"
 regex = '\b'
 haystack = ""
 matches = []
 unicode = false

 [[test]]
 name = "wb2"
 regex = '\b'
 haystack = "a"
 matches = [[0, 0], [1, 1]]
 unicode = false

 [[test]]
 name = "wb3"
 regex = '\b'
 haystack = "ab"
 matches = [[0, 0], [2, 2]]
 unicode = false

 [[test]]
 name = "wb4"
 regex = '^\b'
 haystack = "ab"
 matches = [[0, 0]]
 unicode = false

 [[test]]
 name = "wb5"
 regex = '\b$'
 haystack = "ab"
 matches = [[2, 2]]
 unicode = false

 [[test]]
 name = "wb6"
 regex = '^\b$'
 haystack = "ab"
 matches = []
 unicode = false

 [[test]]
 name = "wb7"
 regex = '\bbar\b'
 haystack = "nobar bar foo bar"
 matches = [[6, 9], [14, 17]]
 unicode = false

 [[test]]
 name = "wb8"
 regex = 'a\b'
 haystack = "faoa x"
 matches = [[3, 4]]
 unicode = false

 [[test]]
 name = "wb9"
 regex = '\bbar'
 haystack = "bar x"
 matches = [[0, 3]]
 unicode = false

 [[test]]
 name = "wb10"
 regex = '\bbar'
 haystack = "foo\nbar x"
 matches = [[4, 7]]
 unicode = false

 [[test]]
 name = "wb11"
 regex = 'bar\b'
 haystack = "foobar"
 matches = [[3, 6]]
 unicode = false

 [[test]]
 name = "wb12"
 regex = 'bar\b'
 haystack = "foobar\nxxx"
 matches = [[3, 6]]
 unicode = false

 [[test]]
 name = "wb13"
 regex = '(?:foo|bar|[A-Z])\b'
 haystack = "foo"
 matches = [[0, 3]]
 unicode = false

 [[test]]
 name = "wb14"
 regex = '(?:foo|bar|[A-Z])\b'
 haystack = "foo\n"
 matches = [[0, 3]]
 unicode = false

 [[test]]
 name = "wb15"
 regex = '\b(?:foo|bar|[A-Z])'
 haystack = "foo"
 matches = [[0, 3]]
 unicode = false

 [[test]]
 name = "wb16"
 regex = '\b(?:foo|bar|[A-Z])\b'
 haystack = "X"
 matches = [[0, 1]]
 unicode = false

 [[test]]
 name = "wb17"
 regex = '\b(?:foo|bar|[A-Z])\b'
 haystack = "XY"
 matches = []
 unicode = false

 [[test]]
 name = "wb18"
 regex = '\b(?:foo|bar|[A-Z])\b'
 haystack = "bar"
 matches = [[0, 3]]
 unicode = false

 [[test]]
 name = "wb19"
 regex = '\b(?:foo|bar|[A-Z])\b'
 haystack = "foo"
 matches = [[0, 3]]
 unicode = false

 [[test]]
 name = "wb20"
 regex = '\b(?:foo|bar|[A-Z])\b'
 haystack = "foo\n"
 matches = [[0, 3]]
 unicode = false

 [[test]]
 name = "wb21"
 regex = '\b(?:foo|bar|[A-Z])\b'
 haystack = "ffoo bbar N x"
 matches = [[10, 11]]
 unicode = false

 [[test]]
 name = "wb22"
 regex = '\b(?:fo|foo)\b'
 haystack = "fo"
 matches = [[0, 2]]
 unicode = false

 [[test]]
 name = "wb23"
 regex = '\b(?:fo|foo)\b'
 haystack = "foo"
 matches = [[0, 3]]
 unicode = false

 [[test]]
 name = "wb24"
 regex = '\b\b'
 haystack = ""
 matches = []
 unicode = false

 [[test]]
 name = "wb25"
 regex = '\b\b'
 haystack = "a"
 matches = [[0, 0], [1, 1]]
 unicode = false

 [[test]]
 name = "wb26"
 regex = '\b$'
 haystack = ""
 matches = []
 unicode = false

 [[test]]
 name = "wb27"
 regex = '\b$'
 haystack = "x"
 matches = [[1, 1]]
 unicode = false

 [[test]]
 name = "wb28"
 regex = '\b$'
 haystack = "y x"
 matches = [[3, 3]]
 unicode = false

 [[test]]
 name = "wb29"
 regex = '(?-u:\b).$'
 haystack = "x"
 matches = [[0, 1]]

 [[test]]
 name = "wb30"
 regex = '^\b(?:fo|foo)\b'
 haystack = "fo"
 matches = [[0, 2]]
 unicode = false

 [[test]]
 name = "wb31"
 regex = '^\b(?:fo|foo)\b'
 haystack = "foo"
 matches = [[0, 3]]
 unicode = false

 [[test]]
 name = "wb32"
 regex = '^\b$'
 haystack = ""
 matches = []
 unicode = false

 [[test]]
 name = "wb33"
 regex = '^\b$'
 haystack = "x"
 matches = []
 unicode = false

 [[test]]
 name = "wb34"
 regex = '^(?-u:\b).$'
 haystack = "x"
 matches = [[0, 1]]

 [[test]]
 name = "wb35"
 regex = '^(?-u:\b).(?-u:\b)$'
 haystack = "x"
 matches = [[0, 1]]

 [[test]]
 name = "wb36"
 regex = '^^^^^\b$$$$$'
 haystack = ""
 matches = []
 unicode = false

 [[test]]
 name = "wb37"
 regex = '^^^^^(?-u:\b).$$$$$'
 haystack = "x"
 matches = [[0, 1]]

 [[test]]
 name = "wb38"
 regex = '^^^^^\b$$$$$'
 haystack = "x"
 matches = []
 unicode = false

 [[test]]
 name = "wb39"
 regex = '^^^^^(?-u:\b\b\b).(?-u:\b\b\b)$$$$$'
 haystack = "x"
 matches = [[0, 1]]

 [[test]]
 name = "wb40"
 regex = '(?-u:\b).+(?-u:\b)'
 haystack = "$$abc$$"
 matches = [[2, 5]]

 [[test]]
 name = "wb41"
 regex = '\b'
 haystack = "a b c"
 matches = [[0, 0], [1, 1], [2, 2], [3, 3], [4, 4], [5, 5]]
 unicode = false

 [[test]]
 name = "wb42"
 regex = '\bfoo\b'
 haystack = "zzz foo zzz"
 matches = [[4, 7]]
 unicode = false

 [[test]]
 name = "wb43"
 regex = '\b^'
 haystack = "ab"
 matches = [[0, 0]]
 unicode = false

 [[test]]
 name = "wb44"
 regex = '$\b'
 haystack = "ab"
 matches = [[2, 2]]
 unicode = false


 # Tests for \B. Note that \B is not allowed if UTF-8 mode is enabled, so we
 # have to disable it for most of these tests. This is because \B can match at
 # non-UTF-8 boundaries.
 [[test]]
 name = "nb1"
 regex = '\Bfoo\B'
 haystack = "n foo xfoox that"
 matches = [[7, 10]]
 unicode = false
 utf8 = false

 [[test]]
 name = "nb2"
 regex = 'a\B'
 haystack = "faoa x"
 matches = [[1, 2]]
 unicode = false
 utf8 = false

 [[test]]
 name = "nb3"
 regex = '\Bbar'
 haystack = "bar x"
 matches = []
 unicode = false
 utf8 = false

 [[test]]
 name = "nb4"
 regex = '\Bbar'
 haystack = "foo\nbar x"
 matches = []
 unicode = false
 utf8 = false

 [[test]]
 name = "nb5"
 regex = 'bar\B'
 haystack = "foobar"
 matches = []
 unicode = false
 utf8 = false

 [[test]]
 name = "nb6"
 regex = 'bar\B'
 haystack = "foobar\nxxx"
 matches = []
 unicode = false
 utf8 = false

 [[test]]
 name = "nb7"
 regex = '(?:foo|bar|[A-Z])\B'
 haystack = "foox"
 matches = [[0, 3]]
 unicode = false
 utf8 = false

 [[test]]
 name = "nb8"
 regex = '(?:foo|bar|[A-Z])\B'
 haystack = "foo\n"
 matches = []
 unicode = false
 utf8 = false

 [[test]]
 name = "nb9"
 regex = '\B'
 haystack = ""
 matches = [[0, 0]]
 unicode = false
 utf8 = false

 [[test]]
 name = "nb10"
 regex = '\B'
 haystack = "x"
 matches = []
 unicode = false
 utf8 = false

 [[test]]
 name = "nb11"
 regex = '\B(?:foo|bar|[A-Z])'
 haystack = "foo"
 matches = []
 unicode = false
 utf8 = false

 [[test]]
 name = "nb12"
 regex = '\B(?:foo|bar|[A-Z])\B'
 haystack = "xXy"
 matches = [[1, 2]]
 unicode = false
 utf8 = false

 [[test]]
 name = "nb13"
 regex = '\B(?:foo|bar|[A-Z])\B'
 haystack = "XY"
 matches = []
 unicode = false
 utf8 = false

 [[test]]
 name = "nb14"
 regex = '\B(?:foo|bar|[A-Z])\B'
 haystack = "XYZ"
 matches = [[1, 2]]
 unicode = false
 utf8 = false

 [[test]]
 name = "nb15"
 regex = '\B(?:foo|bar|[A-Z])\B'
 haystack = "abara"
 matches = [[1, 4]]
 unicode = false
 utf8 = false

 [[test]]
 name = "nb16"
 regex = '\B(?:foo|bar|[A-Z])\B'
 haystack = "xfoo_"
 matches = [[1, 4]]
 unicode = false
 utf8 = false

 [[test]]
 name = "nb17"
 regex = '\B(?:foo|bar|[A-Z])\B'
 haystack = "xfoo\n"
 matches = []
 unicode = false
 utf8 = false

 [[test]]
 name = "nb18"
 regex = '\B(?:foo|bar|[A-Z])\B'
 haystack = "foo bar vNX"
 matches = [[9, 10]]
 unicode = false
 utf8 = false

 [[test]]
 name = "nb19"
 regex = '\B(?:fo|foo)\B'
 haystack = "xfoo"
 matches = [[1, 3]]
 unicode = false
 utf8 = false

 [[test]]
 name = "nb20"
 regex = '\B(?:foo|fo)\B'
 haystack = "xfooo"
 matches = [[1, 4]]
 unicode = false
 utf8 = false

 [[test]]
 name = "nb21"
 regex = '\B\B'
 haystack = ""
 matches = [[0, 0]]
 unicode = false
 utf8 = false

 [[test]]
 name = "nb22"
 regex = '\B\B'
 haystack = "x"
 matches = []
 unicode = false
 utf8 = false

 [[test]]
 name = "nb23"
 regex = '\B$'
 haystack = ""
 matches = [[0, 0]]
 unicode = false
 utf8 = false

 [[test]]
 name = "nb24"
 regex = '\B$'
 haystack = "x"
 matches = []
 unicode = false
 utf8 = false

 [[test]]
 name = "nb25"
 regex = '\B$'
 haystack = "y x"
 matches = []
 unicode = false
 utf8 = false

 [[test]]
 name = "nb26"
 regex = '\B.$'
 haystack = "x"
 matches = []
 unicode = false
 utf8 = false

 [[test]]
 name = "nb27"
 regex = '^\B(?:fo|foo)\B'
 haystack = "fo"
 matches = []
 unicode = false
 utf8 = false

 [[test]]
 name = "nb28"
 regex = '^\B(?:fo|foo)\B'
 haystack = "fo"
 matches = []
 unicode = false
 utf8 = false

 [[test]]
 name = "nb29"
 regex = '^\B'
 haystack = ""
 matches = [[0, 0]]
 unicode = false
 utf8 = false

 [[test]]
 name = "nb30"
 regex = '^\B'
 haystack = "x"
 matches = []
 unicode = false
 utf8 = false

 [[test]]
 name = "nb31"
 regex = '^\B\B'
 haystack = ""
 matches = [[0, 0]]
 unicode = false
 utf8 = false

 [[test]]
 name = "nb32"
 regex = '^\B\B'
 haystack = "x"
 matches = []
 unicode = false
 utf8 = false

 [[test]]
 name = "nb33"
 regex = '^\B$'
 haystack = ""
 matches = [[0, 0]]
 unicode = false
 utf8 = false

 [[test]]
 name = "nb34"
 regex = '^\B$'
 haystack = "x"
 matches = []
 unicode = false
 utf8 = false

 [[test]]
 name = "nb35"
 regex = '^\B.$'
 haystack = "x"
 matches = []
 unicode = false
 utf8 = false

 [[test]]
 name = "nb36"
 regex = '^\B.\B$'
 haystack = "x"
 matches = []
 unicode = false
 utf8 = false

 [[test]]
 name = "nb37"
 regex = '^^^^^\B$$$$$'
 haystack = ""
 matches = [[0, 0]]
 unicode = false
 utf8 = false

 [[test]]
 name = "nb38"
 regex = '^^^^^\B.$$$$$'
 haystack = "x"
 matches = []
 unicode = false
 utf8 = false

 [[test]]
 name = "nb39"
 regex = '^^^^^\B$$$$$'
 haystack = "x"
 matches = []
 unicode = false
 utf8 = false


 # unicode1* and unicode2* work for both Unicode and ASCII because all matches
 # are reported as byte offsets, and « and » do not correspond to word
 # boundaries at either the character or byte level.
 [[test]]
 name = "unicode1"
 regex = '\bx\b'
 haystack = "«x"
 matches = [[2, 3]]

 [[test]]
 name = "unicode1-only-ascii"
 regex = '\bx\b'
 haystack = "«x"
 matches = [[2, 3]]
 unicode = false

 [[test]]
 name = "unicode2"
 regex = '\bx\b'
 haystack = "x»"
 matches = [[0, 1]]

 [[test]]
 name = "unicode2-only-ascii"
 regex = '\bx\b'
 haystack = "x»"
 matches = [[0, 1]]
 unicode = false

 # ASCII word boundaries are completely oblivious to Unicode characters, so
 # even though β is a character, an ASCII \b treats it as a word boundary
 # when it is adjacent to another ASCII character. (The ASCII \b only looks
 # at the leading byte of β.) For Unicode \b, the tests are precisely inverted.
 [[test]]
 name = "unicode3"
 regex = '\bx\b'
 haystack = 'áxβ'
 matches = []

 [[test]]
 name = "unicode3-only-ascii"
 regex = '\bx\b'
 haystack = 'áxβ'
 matches = [[2, 3]]
 unicode = false

 [[test]]
 name = "unicode4"
 regex = '\Bx\B'
 haystack = 'áxβ'
 matches = [[2, 3]]

 [[test]]
 name = "unicode4-only-ascii"
 regex = '\Bx\B'
 haystack = 'áxβ'
 matches = []
 unicode = false
 utf8 = false

 # The same as above, but with \b instead of \B as a sanity check.
 [[test]]
 name = "unicode5"
 regex = '\b'
 haystack = "0\U0007EF5E"
 matches = [[0, 0], [1, 1]]

 [[test]]
 name = "unicode5-only-ascii"
 regex = '\b'
 haystack = "0\U0007EF5E"
 matches = [[0, 0], [1, 1]]
 unicode = false
 utf8 = false

 [[test]]
 name = "unicode5-noutf8"
 regex = '\b'
 haystack = '0\xFF\xFF\xFF\xFF'
 matches = [[0, 0], [1, 1]]
 unescape = true
 utf8 = false

 [[test]]
 name = "unicode5-noutf8-only-ascii"
 regex = '\b'
 haystack = '0\xFF\xFF\xFF\xFF'
 matches = [[0, 0], [1, 1]]
 unescape = true
 unicode = false
 utf8 = false

 # Weird special case to ensure that ASCII \B treats each individual code unit
 # as a non-word byte. (The specific codepoint is irrelevant. It's an arbitrary
 # codepoint that uses 4 bytes in its UTF-8 encoding and is not a member of the
 # \w character class.)
 [[test]]
 name = "unicode5-not"
 regex = '\B'
 haystack = "0\U0007EF5E"
 matches = [[5, 5]]

 [[test]]
 name = "unicode5-not-only-ascii"
 regex = '\B'
 haystack = "0\U0007EF5E"
 matches = [[2, 2], [3, 3], [4, 4], [5, 5]]
 unicode = false
 utf8 = false

 # This gets no matches since \B only matches in the presence of valid UTF-8
 # when Unicode is enabled, even when UTF-8 mode is disabled.
 [[test]]
 name = "unicode5-not-noutf8"
 regex = '\B'
 haystack = '0\xFF\xFF\xFF\xFF'
 matches = []
 unescape = true
 utf8 = false

 # But this DOES get matches since \B in ASCII mode only looks at individual
 # bytes.
 [[test]]
 name = "unicode5-not-noutf8-only-ascii"
 regex = '\B'
 haystack = '0\xFF\xFF\xFF\xFF'
 matches = [[2, 2], [3, 3], [4, 4], [5, 5]]
 unescape = true
 unicode = false
 utf8 = false

 # Some tests of no particular significance.
 [[test]]
 name = "unicode6"
 regex = '\b[0-9]+\b'
 haystack = "foo 123 bar 456 quux 789"
 matches = [[4, 7], [12, 15], [21, 24]]

 [[test]]
 name = "unicode7"
 regex = '\b[0-9]+\b'
 haystack = "foo 123 bar a456 quux 789"
 matches = [[4, 7], [22, 25]]

 [[test]]
 name = "unicode8"
 regex = '\b[0-9]+\b'
 haystack = "foo 123 bar 456a quux 789"
 matches = [[4, 7], [22, 25]]

 # A variant of the problem described here:
 # https://github.com/google/re2/blob/89567f5de5b23bb5ad0c26cbafc10bdc7389d1fa/re2/dfa.cc#L658-L667
 [[test]]
 name = "alt-with-assertion-repetition"
 regex = '(?:\b|%)+'
 haystack = "z%"
 bounds = [1, 2]
 anchored = true
 matches = [[1, 1]]
	# Some of these are cribbed from RE2's test suite.

	# These test \b. Below are tests for \B.
	[[test]]
	name = "wb1"
	regex = '\b'
	haystack = ""
	matches = []
	unicode = false

	[[test]]
	name = "wb2"
	regex = '\b'
	haystack = "a"
	matches = [[0, 0], [1, 1]]
	unicode = false

	[[test]]
	name = "wb3"
	regex = '\b'
	haystack = "ab"
	matches = [[0, 0], [2, 2]]
	unicode = false

	[[test]]
	name = "wb4"
	regex = '^\b'
	haystack = "ab"
	matches = [[0, 0]]
	unicode = false

	[[test]]
	name = "wb5"
	regex = '\b$'
	haystack = "ab"
	matches = [[2, 2]]
	unicode = false

	[[test]]
	name = "wb6"
	regex = '^\b$'
	haystack = "ab"
	matches = []
	unicode = false

	[[test]]
	name = "wb7"
	regex = '\bbar\b'
	haystack = "nobar bar foo bar"
	matches = [[6, 9], [14, 17]]
	unicode = false

	[[test]]
	name = "wb8"
	regex = 'a\b'
	haystack = "faoa x"
	matches = [[3, 4]]
	unicode = false

	[[test]]
	name = "wb9"
	regex = '\bbar'
	haystack = "bar x"
	matches = [[0, 3]]
	unicode = false

	[[test]]
	name = "wb10"
	regex = '\bbar'
	haystack = "foo\nbar x"
	matches = [[4, 7]]
	unicode = false

	[[test]]
	name = "wb11"
	regex = 'bar\b'
	haystack = "foobar"
	matches = [[3, 6]]
	unicode = false

	[[test]]
	name = "wb12"
	regex = 'bar\b'
	haystack = "foobar\nxxx"
	matches = [[3, 6]]
	unicode = false

	[[test]]
	name = "wb13"
	regex = '(?:foo\|bar\|[A-Z])\b'
	haystack = "foo"
	matches = [[0, 3]]
	unicode = false

	[[test]]
	name = "wb14"
	regex = '(?:foo\|bar\|[A-Z])\b'
	haystack = "foo\n"
	matches = [[0, 3]]
	unicode = false

	[[test]]
	name = "wb15"
	regex = '\b(?:foo\|bar\|[A-Z])'
	haystack = "foo"
	matches = [[0, 3]]
	unicode = false

	[[test]]
	name = "wb16"
	regex = '\b(?:foo\|bar\|[A-Z])\b'
	haystack = "X"
	matches = [[0, 1]]
	unicode = false

	[[test]]
	name = "wb17"
	regex = '\b(?:foo\|bar\|[A-Z])\b'
	haystack = "XY"
	matches = []
	unicode = false

	[[test]]
	name = "wb18"
	regex = '\b(?:foo\|bar\|[A-Z])\b'
	haystack = "bar"
	matches = [[0, 3]]
	unicode = false

	[[test]]
	name = "wb19"
	regex = '\b(?:foo\|bar\|[A-Z])\b'
	haystack = "foo"
	matches = [[0, 3]]
	unicode = false

	[[test]]
	name = "wb20"
	regex = '\b(?:foo\|bar\|[A-Z])\b'
	haystack = "foo\n"
	matches = [[0, 3]]
	unicode = false

	[[test]]
	name = "wb21"
	regex = '\b(?:foo\|bar\|[A-Z])\b'
	haystack = "ffoo bbar N x"
	matches = [[10, 11]]
	unicode = false

	[[test]]
	name = "wb22"
	regex = '\b(?:fo\|foo)\b'
	haystack = "fo"
	matches = [[0, 2]]
	unicode = false

	[[test]]
	name = "wb23"
	regex = '\b(?:fo\|foo)\b'
	haystack = "foo"
	matches = [[0, 3]]
	unicode = false

	[[test]]
	name = "wb24"
	regex = '\b\b'
	haystack = ""
	matches = []
	unicode = false

	[[test]]
	name = "wb25"
	regex = '\b\b'
	haystack = "a"
	matches = [[0, 0], [1, 1]]
	unicode = false

	[[test]]
	name = "wb26"
	regex = '\b$'
	haystack = ""
	matches = []
	unicode = false

	[[test]]
	name = "wb27"
	regex = '\b$'
	haystack = "x"
	matches = [[1, 1]]
	unicode = false

	[[test]]
	name = "wb28"
	regex = '\b$'
	haystack = "y x"
	matches = [[3, 3]]
	unicode = false

	[[test]]
	name = "wb29"
	regex = '(?-u:\b).$'
	haystack = "x"
	matches = [[0, 1]]

	[[test]]
	name = "wb30"
	regex = '^\b(?:fo\|foo)\b'
	haystack = "fo"
	matches = [[0, 2]]
	unicode = false

	[[test]]
	name = "wb31"
	regex = '^\b(?:fo\|foo)\b'
	haystack = "foo"
	matches = [[0, 3]]
	unicode = false

	[[test]]
	name = "wb32"
	regex = '^\b$'
	haystack = ""
	matches = []
	unicode = false

	[[test]]
	name = "wb33"
	regex = '^\b$'
	haystack = "x"
	matches = []
	unicode = false

	[[test]]
	name = "wb34"
	regex = '^(?-u:\b).$'
	haystack = "x"
	matches = [[0, 1]]

	[[test]]
	name = "wb35"
	regex = '^(?-u:\b).(?-u:\b)$'
	haystack = "x"
	matches = [[0, 1]]

	[[test]]
	name = "wb36"
	regex = '^^^^^\b$$$$$'
	haystack = ""
	matches = []
	unicode = false

	[[test]]
	name = "wb37"
	regex = '^^^^^(?-u:\b).$$$$$'
	haystack = "x"
	matches = [[0, 1]]

	[[test]]
	name = "wb38"
	regex = '^^^^^\b$$$$$'
	haystack = "x"
	matches = []
	unicode = false

	[[test]]
	name = "wb39"
	regex = '^^^^^(?-u:\b\b\b).(?-u:\b\b\b)$$$$$'
	haystack = "x"
	matches = [[0, 1]]

	[[test]]
	name = "wb40"
	regex = '(?-u:\b).+(?-u:\b)'
	haystack = "$$abc$$"
	matches = [[2, 5]]

	[[test]]
	name = "wb41"
	regex = '\b'
	haystack = "a b c"
	matches = [[0, 0], [1, 1], [2, 2], [3, 3], [4, 4], [5, 5]]
	unicode = false

	[[test]]
	name = "wb42"
	regex = '\bfoo\b'
	haystack = "zzz foo zzz"
	matches = [[4, 7]]
	unicode = false

	[[test]]
	name = "wb43"
	regex = '\b^'
	haystack = "ab"
	matches = [[0, 0]]
	unicode = false

	[[test]]
	name = "wb44"
	regex = '$\b'
	haystack = "ab"
	matches = [[2, 2]]
	unicode = false


	# Tests for \B. Note that \B is not allowed if UTF-8 mode is enabled, so we
	# have to disable it for most of these tests. This is because \B can match at
	# non-UTF-8 boundaries.
	[[test]]
	name = "nb1"
	regex = '\Bfoo\B'
	haystack = "n foo xfoox that"
	matches = [[7, 10]]
	unicode = false
	utf8 = false

	[[test]]
	name = "nb2"
	regex = 'a\B'
	haystack = "faoa x"
	matches = [[1, 2]]
	unicode = false
	utf8 = false

	[[test]]
	name = "nb3"
	regex = '\Bbar'
	haystack = "bar x"
	matches = []
	unicode = false
	utf8 = false

	[[test]]
	name = "nb4"
	regex = '\Bbar'
	haystack = "foo\nbar x"
	matches = []
	unicode = false
	utf8 = false

	[[test]]
	name = "nb5"
	regex = 'bar\B'
	haystack = "foobar"
	matches = []
	unicode = false
	utf8 = false

	[[test]]
	name = "nb6"
	regex = 'bar\B'
	haystack = "foobar\nxxx"
	matches = []
	unicode = false
	utf8 = false

	[[test]]
	name = "nb7"
	regex = '(?:foo\|bar\|[A-Z])\B'
	haystack = "foox"
	matches = [[0, 3]]
	unicode = false
	utf8 = false

	[[test]]
	name = "nb8"
	regex = '(?:foo\|bar\|[A-Z])\B'
	haystack = "foo\n"
	matches = []
	unicode = false
	utf8 = false

	[[test]]
	name = "nb9"
	regex = '\B'
	haystack = ""
	matches = [[0, 0]]
	unicode = false
	utf8 = false

	[[test]]
	name = "nb10"
	regex = '\B'
	haystack = "x"
	matches = []
	unicode = false
	utf8 = false

	[[test]]
	name = "nb11"
	regex = '\B(?:foo\|bar\|[A-Z])'
	haystack = "foo"
	matches = []
	unicode = false
	utf8 = false

	[[test]]
	name = "nb12"
	regex = '\B(?:foo\|bar\|[A-Z])\B'
	haystack = "xXy"
	matches = [[1, 2]]
	unicode = false
	utf8 = false

	[[test]]
	name = "nb13"
	regex = '\B(?:foo\|bar\|[A-Z])\B'
	haystack = "XY"
	matches = []
	unicode = false
	utf8 = false

	[[test]]
	name = "nb14"
	regex = '\B(?:foo\|bar\|[A-Z])\B'
	haystack = "XYZ"
	matches = [[1, 2]]
	unicode = false
	utf8 = false

	[[test]]
	name = "nb15"
	regex = '\B(?:foo\|bar\|[A-Z])\B'
	haystack = "abara"
	matches = [[1, 4]]
	unicode = false
	utf8 = false

	[[test]]
	name = "nb16"
	regex = '\B(?:foo\|bar\|[A-Z])\B'
	haystack = "xfoo_"
	matches = [[1, 4]]
	unicode = false
	utf8 = false

	[[test]]
	name = "nb17"
	regex = '\B(?:foo\|bar\|[A-Z])\B'
	haystack = "xfoo\n"
	matches = []
	unicode = false
	utf8 = false

	[[test]]
	name = "nb18"
	regex = '\B(?:foo\|bar\|[A-Z])\B'
	haystack = "foo bar vNX"
	matches = [[9, 10]]
	unicode = false
	utf8 = false

	[[test]]
	name = "nb19"
	regex = '\B(?:fo\|foo)\B'
	haystack = "xfoo"
	matches = [[1, 3]]
	unicode = false
	utf8 = false

	[[test]]
	name = "nb20"
	regex = '\B(?:foo\|fo)\B'
	haystack = "xfooo"
	matches = [[1, 4]]
	unicode = false
	utf8 = false

	[[test]]
	name = "nb21"
	regex = '\B\B'
	haystack = ""
	matches = [[0, 0]]
	unicode = false
	utf8 = false

	[[test]]
	name = "nb22"
	regex = '\B\B'
	haystack = "x"
	matches = []
	unicode = false
	utf8 = false

	[[test]]
	name = "nb23"
	regex = '\B$'
	haystack = ""
	matches = [[0, 0]]
	unicode = false
	utf8 = false

	[[test]]
	name = "nb24"
	regex = '\B$'
	haystack = "x"
	matches = []
	unicode = false
	utf8 = false

	[[test]]
	name = "nb25"
	regex = '\B$'
	haystack = "y x"
	matches = []
	unicode = false
	utf8 = false

	[[test]]
	name = "nb26"
	regex = '\B.$'
	haystack = "x"
	matches = []
	unicode = false
	utf8 = false

	[[test]]
	name = "nb27"
	regex = '^\B(?:fo\|foo)\B'
	haystack = "fo"
	matches = []
	unicode = false
	utf8 = false

	[[test]]
	name = "nb28"
	regex = '^\B(?:fo\|foo)\B'
	haystack = "fo"
	matches = []
	unicode = false
	utf8 = false

	[[test]]
	name = "nb29"
	regex = '^\B'
	haystack = ""
	matches = [[0, 0]]
	unicode = false
	utf8 = false

	[[test]]
	name = "nb30"
	regex = '^\B'
	haystack = "x"
	matches = []
	unicode = false
	utf8 = false

	[[test]]
	name = "nb31"
	regex = '^\B\B'
	haystack = ""
	matches = [[0, 0]]
	unicode = false
	utf8 = false

	[[test]]
	name = "nb32"
	regex = '^\B\B'
	haystack = "x"
	matches = []
	unicode = false
	utf8 = false

	[[test]]
	name = "nb33"
	regex = '^\B$'
	haystack = ""
	matches = [[0, 0]]
	unicode = false
	utf8 = false

	[[test]]
	name = "nb34"
	regex = '^\B$'
	haystack = "x"
	matches = []
	unicode = false
	utf8 = false

	[[test]]
	name = "nb35"
	regex = '^\B.$'
	haystack = "x"
	matches = []
	unicode = false
	utf8 = false

	[[test]]
	name = "nb36"
	regex = '^\B.\B$'
	haystack = "x"
	matches = []
	unicode = false
	utf8 = false

	[[test]]
	name = "nb37"
	regex = '^^^^^\B$$$$$'
	haystack = ""
	matches = [[0, 0]]
	unicode = false
	utf8 = false

	[[test]]
	name = "nb38"
	regex = '^^^^^\B.$$$$$'
	haystack = "x"
	matches = []
	unicode = false
	utf8 = false

	[[test]]
	name = "nb39"
	regex = '^^^^^\B$$$$$'
	haystack = "x"
	matches = []
	unicode = false
	utf8 = false


	# unicode1* and unicode2* work for both Unicode and ASCII because all matches
	# are reported as byte offsets, and « and » do not correspond to word
	# boundaries at either the character or byte level.
	[[test]]
	name = "unicode1"
	regex = '\bx\b'
	haystack = "«x"
	matches = [[2, 3]]

	[[test]]
	name = "unicode1-only-ascii"
	regex = '\bx\b'
	haystack = "«x"
	matches = [[2, 3]]
	unicode = false

	[[test]]
	name = "unicode2"
	regex = '\bx\b'
	haystack = "x»"
	matches = [[0, 1]]

	[[test]]
	name = "unicode2-only-ascii"
	regex = '\bx\b'
	haystack = "x»"
	matches = [[0, 1]]
	unicode = false

	# ASCII word boundaries are completely oblivious to Unicode characters, so
	# even though β is a character, an ASCII \b treats it as a word boundary
	# when it is adjacent to another ASCII character. (The ASCII \b only looks
	# at the leading byte of β.) For Unicode \b, the tests are precisely inverted.
	[[test]]
	name = "unicode3"
	regex = '\bx\b'
	haystack = 'áxβ'
	matches = []

	[[test]]
	name = "unicode3-only-ascii"
	regex = '\bx\b'
	haystack = 'áxβ'
	matches = [[2, 3]]
	unicode = false

	[[test]]
	name = "unicode4"
	regex = '\Bx\B'
	haystack = 'áxβ'
	matches = [[2, 3]]

	[[test]]
	name = "unicode4-only-ascii"
	regex = '\Bx\B'
	haystack = 'áxβ'
	matches = []
	unicode = false
	utf8 = false

	# The same as above, but with \b instead of \B as a sanity check.
	[[test]]
	name = "unicode5"
	regex = '\b'
	haystack = "0\U0007EF5E"
	matches = [[0, 0], [1, 1]]

	[[test]]
	name = "unicode5-only-ascii"
	regex = '\b'
	haystack = "0\U0007EF5E"
	matches = [[0, 0], [1, 1]]
	unicode = false
	utf8 = false

	[[test]]
	name = "unicode5-noutf8"
	regex = '\b'
	haystack = '0\xFF\xFF\xFF\xFF'
	matches = [[0, 0], [1, 1]]
	unescape = true
	utf8 = false

	[[test]]
	name = "unicode5-noutf8-only-ascii"
	regex = '\b'
	haystack = '0\xFF\xFF\xFF\xFF'
	matches = [[0, 0], [1, 1]]
	unescape = true
	unicode = false
	utf8 = false

	# Weird special case to ensure that ASCII \B treats each individual code unit
	# as a non-word byte. (The specific codepoint is irrelevant. It's an arbitrary
	# codepoint that uses 4 bytes in its UTF-8 encoding and is not a member of the
	# \w character class.)
	[[test]]
	name = "unicode5-not"
	regex = '\B'
	haystack = "0\U0007EF5E"
	matches = [[5, 5]]

	[[test]]
	name = "unicode5-not-only-ascii"
	regex = '\B'
	haystack = "0\U0007EF5E"
	matches = [[2, 2], [3, 3], [4, 4], [5, 5]]
	unicode = false
	utf8 = false

	# This gets no matches since \B only matches in the presence of valid UTF-8
	# when Unicode is enabled, even when UTF-8 mode is disabled.
	[[test]]
	name = "unicode5-not-noutf8"
	regex = '\B'
	haystack = '0\xFF\xFF\xFF\xFF'
	matches = []
	unescape = true
	utf8 = false

	# But this DOES get matches since \B in ASCII mode only looks at individual
	# bytes.
	[[test]]
	name = "unicode5-not-noutf8-only-ascii"
	regex = '\B'
	haystack = '0\xFF\xFF\xFF\xFF'
	matches = [[2, 2], [3, 3], [4, 4], [5, 5]]
	unescape = true
	unicode = false
	utf8 = false

	# Some tests of no particular significance.
	[[test]]
	name = "unicode6"
	regex = '\b[0-9]+\b'
	haystack = "foo 123 bar 456 quux 789"
	matches = [[4, 7], [12, 15], [21, 24]]

	[[test]]
	name = "unicode7"
	regex = '\b[0-9]+\b'
	haystack = "foo 123 bar a456 quux 789"
	matches = [[4, 7], [22, 25]]

	[[test]]
	name = "unicode8"
	regex = '\b[0-9]+\b'
	haystack = "foo 123 bar 456a quux 789"
	matches = [[4, 7], [22, 25]]

	# A variant of the problem described here:
	# https://github.com/google/re2/blob/89567f5de5b23bb5ad0c26cbafc10bdc7389d1fa/re2/dfa.cc#L658-L667
	[[test]]
	name = "alt-with-assertion-repetition"
	regex = '(?:\b\|%)+'
	haystack = "z%"
	bounds = [1, 2]
	anchored = true
	matches = [[1, 1]]