| # These tests are specifically geared toward searches with 'anchored = true'. |
| # While they are interesting in their own right, they are particularly |
| # important for testing the one-pass DFA since the one-pass DFA can't work in |
| # unanchored contexts. |
| # |
| # Note that "anchored" in this context does not mean "^". Anchored searches are |
| # searches whose matches must begin at the start of the search, which may not |
| # be at the start of the haystack. That's why anchored searches---and there are |
| # some examples below---can still report multiple matches. This occurs when the |
| # matches are adjacent to one another. |
| |
| [[test]] |
| name = "greedy" |
| regex = '(abc)+' |
| haystack = "abcabcabc" |
| matches = [ |
| [[0, 9], [6, 9]], |
| ] |
| anchored = true |
| |
| # When a "earliest" search is used, greediness doesn't really exist because |
| # matches are reported as soon as they are known. |
| [[test]] |
| name = "greedy-earliest" |
| regex = '(abc)+' |
| haystack = "abcabcabc" |
| matches = [ |
| [[0, 3], [0, 3]], |
| [[3, 6], [3, 6]], |
| [[6, 9], [6, 9]], |
| ] |
| anchored = true |
| search-kind = "earliest" |
| |
| [[test]] |
| name = "nongreedy" |
| regex = '(abc)+?' |
| haystack = "abcabcabc" |
| matches = [ |
| [[0, 3], [0, 3]], |
| [[3, 6], [3, 6]], |
| [[6, 9], [6, 9]], |
| ] |
| anchored = true |
| |
| # When "all" semantics are used, non-greediness doesn't exist since the longest |
| # possible match is always taken. |
| [[test]] |
| name = "nongreedy-all" |
| regex = '(abc)+?' |
| haystack = "abcabcabc" |
| matches = [ |
| [[0, 9], [6, 9]], |
| ] |
| anchored = true |
| match-kind = "all" |
| |
| [[test]] |
| name = "word-boundary-unicode-01" |
| regex = '\b\w+\b' |
| haystack = 'βββ☃' |
| matches = [[0, 6]] |
| anchored = true |
| |
| [[test]] |
| name = "word-boundary-nounicode-01" |
| regex = '\b\w+\b' |
| haystack = 'abcβ' |
| matches = [[0, 3]] |
| anchored = true |
| unicode = false |
| |
| # Tests that '.c' doesn't match 'abc' when performing an anchored search from |
| # the beginning of the haystack. This test found two different bugs in the |
| # PikeVM and the meta engine. |
| [[test]] |
| name = "no-match-at-start" |
| regex = '.c' |
| haystack = 'abc' |
| matches = [] |
| anchored = true |
| |
| # Like above, but at a non-zero start offset. |
| [[test]] |
| name = "no-match-at-start-bounds" |
| regex = '.c' |
| haystack = 'aabc' |
| bounds = [1, 4] |
| matches = [] |
| anchored = true |
| |
| # This is like no-match-at-start, but hits the "reverse inner" optimization |
| # inside the meta engine. (no-match-at-start hits the "reverse suffix" |
| # optimization.) |
| [[test]] |
| name = "no-match-at-start-reverse-inner" |
| regex = '.c[a-z]' |
| haystack = 'abcz' |
| matches = [] |
| anchored = true |
| |
| # Like above, but at a non-zero start offset. |
| [[test]] |
| name = "no-match-at-start-reverse-inner-bounds" |
| regex = '.c[a-z]' |
| haystack = 'aabcz' |
| bounds = [1, 5] |
| matches = [] |
| anchored = true |
| |
| # Same as no-match-at-start, but applies to the meta engine's "reverse |
| # anchored" optimization. |
| [[test]] |
| name = "no-match-at-start-reverse-anchored" |
| regex = '.c[a-z]$' |
| haystack = 'abcz' |
| matches = [] |
| anchored = true |
| |
| # Like above, but at a non-zero start offset. |
| [[test]] |
| name = "no-match-at-start-reverse-anchored-bounds" |
| regex = '.c[a-z]$' |
| haystack = 'aabcz' |
| bounds = [1, 5] |
| matches = [] |
| anchored = true |