vendor/regex-1.10.6/testdata/anchored.toml - toolchain/rustc - Git at Google

 # These tests are specifically geared toward searches with 'anchored = true'.
 # While they are interesting in their own right, they are particularly
 # important for testing the one-pass DFA since the one-pass DFA can't work in
 # unanchored contexts.
 #
 # Note that "anchored" in this context does not mean "^". Anchored searches are
 # searches whose matches must begin at the start of the search, which may not
 # be at the start of the haystack. That's why anchored searches---and there are
 # some examples below---can still report multiple matches. This occurs when the
 # matches are adjacent to one another.

 [[test]]
 name = "greedy"
 regex = '(abc)+'
 haystack = "abcabcabc"
 matches = [
   [[0, 9], [6, 9]],
 ]
 anchored = true

 # When a "earliest" search is used, greediness doesn't really exist because
 # matches are reported as soon as they are known.
 [[test]]
 name = "greedy-earliest"
 regex = '(abc)+'
 haystack = "abcabcabc"
 matches = [
   [[0, 3], [0, 3]],
   [[3, 6], [3, 6]],
   [[6, 9], [6, 9]],
 ]
 anchored = true
 search-kind = "earliest"

 [[test]]
 name = "nongreedy"
 regex = '(abc)+?'
 haystack = "abcabcabc"
 matches = [
   [[0, 3], [0, 3]],
   [[3, 6], [3, 6]],
   [[6, 9], [6, 9]],
 ]
 anchored = true

 # When "all" semantics are used, non-greediness doesn't exist since the longest
 # possible match is always taken.
 [[test]]
 name = "nongreedy-all"
 regex = '(abc)+?'
 haystack = "abcabcabc"
 matches = [
   [[0, 9], [6, 9]],
 ]
 anchored = true
 match-kind = "all"

 [[test]]
 name = "word-boundary-unicode-01"
 regex = '\b\w+\b'
 haystack = 'βββ☃'
 matches = [[0, 6]]
 anchored = true

 [[test]]
 name = "word-boundary-nounicode-01"
 regex = '\b\w+\b'
 haystack = 'abcβ'
 matches = [[0, 3]]
 anchored = true
 unicode = false

 # Tests that '.c' doesn't match 'abc' when performing an anchored search from
 # the beginning of the haystack. This test found two different bugs in the
 # PikeVM and the meta engine.
 [[test]]
 name = "no-match-at-start"
 regex = '.c'
 haystack = 'abc'
 matches = []
 anchored = true

 # Like above, but at a non-zero start offset.
 [[test]]
 name = "no-match-at-start-bounds"
 regex = '.c'
 haystack = 'aabc'
 bounds = [1, 4]
 matches = []
 anchored = true

 # This is like no-match-at-start, but hits the "reverse inner" optimization
 # inside the meta engine. (no-match-at-start hits the "reverse suffix"
 # optimization.)
 [[test]]
 name = "no-match-at-start-reverse-inner"
 regex = '.c[a-z]'
 haystack = 'abcz'
 matches = []
 anchored = true

 # Like above, but at a non-zero start offset.
 [[test]]
 name = "no-match-at-start-reverse-inner-bounds"
 regex = '.c[a-z]'
 haystack = 'aabcz'
 bounds = [1, 5]
 matches = []
 anchored = true

 # Same as no-match-at-start, but applies to the meta engine's "reverse
 # anchored" optimization.
 [[test]]
 name = "no-match-at-start-reverse-anchored"
 regex = '.c[a-z]$'
 haystack = 'abcz'
 matches = []
 anchored = true

 # Like above, but at a non-zero start offset.
 [[test]]
 name = "no-match-at-start-reverse-anchored-bounds"
 regex = '.c[a-z]$'
 haystack = 'aabcz'
 bounds = [1, 5]
 matches = []
 anchored = true
	# These tests are specifically geared toward searches with 'anchored = true'.
	# While they are interesting in their own right, they are particularly
	# important for testing the one-pass DFA since the one-pass DFA can't work in
	# unanchored contexts.
	#
	# Note that "anchored" in this context does not mean "^". Anchored searches are
	# searches whose matches must begin at the start of the search, which may not
	# be at the start of the haystack. That's why anchored searches---and there are
	# some examples below---can still report multiple matches. This occurs when the
	# matches are adjacent to one another.

	[[test]]
	name = "greedy"
	regex = '(abc)+'
	haystack = "abcabcabc"
	matches = [
	[[0, 9], [6, 9]],
	]
	anchored = true

	# When a "earliest" search is used, greediness doesn't really exist because
	# matches are reported as soon as they are known.
	[[test]]
	name = "greedy-earliest"
	regex = '(abc)+'
	haystack = "abcabcabc"
	matches = [
	[[0, 3], [0, 3]],
	[[3, 6], [3, 6]],
	[[6, 9], [6, 9]],
	]
	anchored = true
	search-kind = "earliest"

	[[test]]
	name = "nongreedy"
	regex = '(abc)+?'
	haystack = "abcabcabc"
	matches = [
	[[0, 3], [0, 3]],
	[[3, 6], [3, 6]],
	[[6, 9], [6, 9]],
	]
	anchored = true

	# When "all" semantics are used, non-greediness doesn't exist since the longest
	# possible match is always taken.
	[[test]]
	name = "nongreedy-all"
	regex = '(abc)+?'
	haystack = "abcabcabc"
	matches = [
	[[0, 9], [6, 9]],
	]
	anchored = true
	match-kind = "all"

	[[test]]
	name = "word-boundary-unicode-01"
	regex = '\b\w+\b'
	haystack = 'βββ☃'
	matches = [[0, 6]]
	anchored = true

	[[test]]
	name = "word-boundary-nounicode-01"
	regex = '\b\w+\b'
	haystack = 'abcβ'
	matches = [[0, 3]]
	anchored = true
	unicode = false

	# Tests that '.c' doesn't match 'abc' when performing an anchored search from
	# the beginning of the haystack. This test found two different bugs in the
	# PikeVM and the meta engine.
	[[test]]
	name = "no-match-at-start"
	regex = '.c'
	haystack = 'abc'
	matches = []
	anchored = true

	# Like above, but at a non-zero start offset.
	[[test]]
	name = "no-match-at-start-bounds"
	regex = '.c'
	haystack = 'aabc'
	bounds = [1, 4]
	matches = []
	anchored = true

	# This is like no-match-at-start, but hits the "reverse inner" optimization
	# inside the meta engine. (no-match-at-start hits the "reverse suffix"
	# optimization.)
	[[test]]
	name = "no-match-at-start-reverse-inner"
	regex = '.c[a-z]'
	haystack = 'abcz'
	matches = []
	anchored = true

	# Like above, but at a non-zero start offset.
	[[test]]
	name = "no-match-at-start-reverse-inner-bounds"
	regex = '.c[a-z]'
	haystack = 'aabcz'
	bounds = [1, 5]
	matches = []
	anchored = true

	# Same as no-match-at-start, but applies to the meta engine's "reverse
	# anchored" optimization.
	[[test]]
	name = "no-match-at-start-reverse-anchored"
	regex = '.c[a-z]$'
	haystack = 'abcz'
	matches = []
	anchored = true

	# Like above, but at a non-zero start offset.
	[[test]]
	name = "no-match-at-start-reverse-anchored-bounds"
	regex = '.c[a-z]$'
	haystack = 'aabcz'
	bounds = [1, 5]
	matches = []
	anchored = true