| # These tests are for the "special" word boundary assertions. That is, |
| # \b{start}, \b{end}, \b{start-half}, \b{end-half}. These are specialty |
| # assertions for more niche use cases, but hitting those cases without these |
| # assertions is difficult. For example, \b{start-half} and \b{end-half} are |
| # used to implement the -w/--word-regexp flag in a grep program. |
| |
| # Tests for (?-u:\b{start}) |
| |
| [[test]] |
| name = "word-start-ascii-010" |
| regex = '\b{start}' |
| haystack = "a" |
| matches = [[0, 0]] |
| unicode = false |
| |
| [[test]] |
| name = "word-start-ascii-020" |
| regex = '\b{start}' |
| haystack = "a " |
| matches = [[0, 0]] |
| unicode = false |
| |
| [[test]] |
| name = "word-start-ascii-030" |
| regex = '\b{start}' |
| haystack = " a " |
| matches = [[1, 1]] |
| unicode = false |
| |
| [[test]] |
| name = "word-start-ascii-040" |
| regex = '\b{start}' |
| haystack = "" |
| matches = [] |
| unicode = false |
| |
| [[test]] |
| name = "word-start-ascii-050" |
| regex = '\b{start}' |
| haystack = "ab" |
| matches = [[0, 0]] |
| unicode = false |
| |
| [[test]] |
| name = "word-start-ascii-060" |
| regex = '\b{start}' |
| haystack = "𝛃" |
| matches = [] |
| unicode = false |
| |
| [[test]] |
| name = "word-start-ascii-060-bounds" |
| regex = '\b{start}' |
| haystack = "𝛃" |
| bounds = [2, 3] |
| matches = [] |
| unicode = false |
| |
| [[test]] |
| name = "word-start-ascii-070" |
| regex = '\b{start}' |
| haystack = " 𝛃 " |
| matches = [] |
| unicode = false |
| |
| [[test]] |
| name = "word-start-ascii-080" |
| regex = '\b{start}' |
| haystack = "𝛃𐆀" |
| matches = [] |
| unicode = false |
| |
| [[test]] |
| name = "word-start-ascii-090" |
| regex = '\b{start}' |
| haystack = "𝛃b" |
| matches = [[4, 4]] |
| unicode = false |
| |
| [[test]] |
| name = "word-start-ascii-110" |
| regex = '\b{start}' |
| haystack = "b𝛃" |
| matches = [[0, 0]] |
| unicode = false |
| |
| # Tests for (?-u:\b{end}) |
| |
| [[test]] |
| name = "word-end-ascii-010" |
| regex = '\b{end}' |
| haystack = "a" |
| matches = [[1, 1]] |
| unicode = false |
| |
| [[test]] |
| name = "word-end-ascii-020" |
| regex = '\b{end}' |
| haystack = "a " |
| matches = [[1, 1]] |
| unicode = false |
| |
| [[test]] |
| name = "word-end-ascii-030" |
| regex = '\b{end}' |
| haystack = " a " |
| matches = [[2, 2]] |
| unicode = false |
| |
| [[test]] |
| name = "word-end-ascii-040" |
| regex = '\b{end}' |
| haystack = "" |
| matches = [] |
| unicode = false |
| |
| [[test]] |
| name = "word-end-ascii-050" |
| regex = '\b{end}' |
| haystack = "ab" |
| matches = [[2, 2]] |
| unicode = false |
| |
| [[test]] |
| name = "word-end-ascii-060" |
| regex = '\b{end}' |
| haystack = "𝛃" |
| matches = [] |
| unicode = false |
| |
| [[test]] |
| name = "word-end-ascii-060-bounds" |
| regex = '\b{end}' |
| haystack = "𝛃" |
| bounds = [2, 3] |
| matches = [] |
| unicode = false |
| |
| [[test]] |
| name = "word-end-ascii-070" |
| regex = '\b{end}' |
| haystack = " 𝛃 " |
| matches = [] |
| unicode = false |
| |
| [[test]] |
| name = "word-end-ascii-080" |
| regex = '\b{end}' |
| haystack = "𝛃𐆀" |
| matches = [] |
| unicode = false |
| |
| [[test]] |
| name = "word-end-ascii-090" |
| regex = '\b{end}' |
| haystack = "𝛃b" |
| matches = [[5, 5]] |
| unicode = false |
| |
| [[test]] |
| name = "word-end-ascii-110" |
| regex = '\b{end}' |
| haystack = "b𝛃" |
| matches = [[1, 1]] |
| unicode = false |
| |
| # Tests for \b{start} |
| |
| [[test]] |
| name = "word-start-unicode-010" |
| regex = '\b{start}' |
| haystack = "a" |
| matches = [[0, 0]] |
| unicode = true |
| |
| [[test]] |
| name = "word-start-unicode-020" |
| regex = '\b{start}' |
| haystack = "a " |
| matches = [[0, 0]] |
| unicode = true |
| |
| [[test]] |
| name = "word-start-unicode-030" |
| regex = '\b{start}' |
| haystack = " a " |
| matches = [[1, 1]] |
| unicode = true |
| |
| [[test]] |
| name = "word-start-unicode-040" |
| regex = '\b{start}' |
| haystack = "" |
| matches = [] |
| unicode = true |
| |
| [[test]] |
| name = "word-start-unicode-050" |
| regex = '\b{start}' |
| haystack = "ab" |
| matches = [[0, 0]] |
| unicode = true |
| |
| [[test]] |
| name = "word-start-unicode-060" |
| regex = '\b{start}' |
| haystack = "𝛃" |
| matches = [[0, 0]] |
| unicode = true |
| |
| [[test]] |
| name = "word-start-unicode-060-bounds" |
| regex = '\b{start}' |
| haystack = "𝛃" |
| bounds = [2, 3] |
| matches = [] |
| unicode = true |
| |
| [[test]] |
| name = "word-start-unicode-070" |
| regex = '\b{start}' |
| haystack = " 𝛃 " |
| matches = [[1, 1]] |
| unicode = true |
| |
| [[test]] |
| name = "word-start-unicode-080" |
| regex = '\b{start}' |
| haystack = "𝛃𐆀" |
| matches = [[0, 0]] |
| unicode = true |
| |
| [[test]] |
| name = "word-start-unicode-090" |
| regex = '\b{start}' |
| haystack = "𝛃b" |
| matches = [[0, 0]] |
| unicode = true |
| |
| [[test]] |
| name = "word-start-unicode-110" |
| regex = '\b{start}' |
| haystack = "b𝛃" |
| matches = [[0, 0]] |
| unicode = true |
| |
| # Tests for \b{end} |
| |
| [[test]] |
| name = "word-end-unicode-010" |
| regex = '\b{end}' |
| haystack = "a" |
| matches = [[1, 1]] |
| unicode = true |
| |
| [[test]] |
| name = "word-end-unicode-020" |
| regex = '\b{end}' |
| haystack = "a " |
| matches = [[1, 1]] |
| unicode = true |
| |
| [[test]] |
| name = "word-end-unicode-030" |
| regex = '\b{end}' |
| haystack = " a " |
| matches = [[2, 2]] |
| unicode = true |
| |
| [[test]] |
| name = "word-end-unicode-040" |
| regex = '\b{end}' |
| haystack = "" |
| matches = [] |
| unicode = true |
| |
| [[test]] |
| name = "word-end-unicode-050" |
| regex = '\b{end}' |
| haystack = "ab" |
| matches = [[2, 2]] |
| unicode = true |
| |
| [[test]] |
| name = "word-end-unicode-060" |
| regex = '\b{end}' |
| haystack = "𝛃" |
| matches = [[4, 4]] |
| unicode = true |
| |
| [[test]] |
| name = "word-end-unicode-060-bounds" |
| regex = '\b{end}' |
| haystack = "𝛃" |
| bounds = [2, 3] |
| matches = [] |
| unicode = true |
| |
| [[test]] |
| name = "word-end-unicode-070" |
| regex = '\b{end}' |
| haystack = " 𝛃 " |
| matches = [[5, 5]] |
| unicode = true |
| |
| [[test]] |
| name = "word-end-unicode-080" |
| regex = '\b{end}' |
| haystack = "𝛃𐆀" |
| matches = [[4, 4]] |
| unicode = true |
| |
| [[test]] |
| name = "word-end-unicode-090" |
| regex = '\b{end}' |
| haystack = "𝛃b" |
| matches = [[5, 5]] |
| unicode = true |
| |
| [[test]] |
| name = "word-end-unicode-110" |
| regex = '\b{end}' |
| haystack = "b𝛃" |
| matches = [[5, 5]] |
| unicode = true |
| |
| # Tests for (?-u:\b{start-half}) |
| |
| [[test]] |
| name = "word-start-half-ascii-010" |
| regex = '\b{start-half}' |
| haystack = "a" |
| matches = [[0, 0]] |
| unicode = false |
| |
| [[test]] |
| name = "word-start-half-ascii-020" |
| regex = '\b{start-half}' |
| haystack = "a " |
| matches = [[0, 0], [2, 2]] |
| unicode = false |
| |
| [[test]] |
| name = "word-start-half-ascii-030" |
| regex = '\b{start-half}' |
| haystack = " a " |
| matches = [[0, 0], [1, 1], [3, 3]] |
| unicode = false |
| |
| [[test]] |
| name = "word-start-half-ascii-040" |
| regex = '\b{start-half}' |
| haystack = "" |
| matches = [[0, 0]] |
| unicode = false |
| |
| [[test]] |
| name = "word-start-half-ascii-050" |
| regex = '\b{start-half}' |
| haystack = "ab" |
| matches = [[0, 0]] |
| unicode = false |
| |
| [[test]] |
| name = "word-start-half-ascii-060" |
| regex = '\b{start-half}' |
| haystack = "𝛃" |
| matches = [[0, 0], [4, 4]] |
| unicode = false |
| |
| [[test]] |
| name = "word-start-half-ascii-060-noutf8" |
| regex = '\b{start-half}' |
| haystack = "𝛃" |
| matches = [[0, 0], [1, 1], [2, 2], [3, 3], [4, 4]] |
| unicode = false |
| utf8 = false |
| |
| [[test]] |
| name = "word-start-half-ascii-060-bounds" |
| regex = '\b{start-half}' |
| haystack = "𝛃" |
| bounds = [2, 3] |
| matches = [] |
| unicode = false |
| |
| [[test]] |
| name = "word-start-half-ascii-070" |
| regex = '\b{start-half}' |
| haystack = " 𝛃 " |
| matches = [[0, 0], [1, 1], [5, 5], [6, 6]] |
| unicode = false |
| |
| [[test]] |
| name = "word-start-half-ascii-080" |
| regex = '\b{start-half}' |
| haystack = "𝛃𐆀" |
| matches = [[0, 0], [4, 4], [8, 8]] |
| unicode = false |
| |
| [[test]] |
| name = "word-start-half-ascii-090" |
| regex = '\b{start-half}' |
| haystack = "𝛃b" |
| matches = [[0, 0], [4, 4]] |
| unicode = false |
| |
| [[test]] |
| name = "word-start-half-ascii-110" |
| regex = '\b{start-half}' |
| haystack = "b𝛃" |
| matches = [[0, 0], [5, 5]] |
| unicode = false |
| |
| # Tests for (?-u:\b{end-half}) |
| |
| [[test]] |
| name = "word-end-half-ascii-010" |
| regex = '\b{end-half}' |
| haystack = "a" |
| matches = [[1, 1]] |
| unicode = false |
| |
| [[test]] |
| name = "word-end-half-ascii-020" |
| regex = '\b{end-half}' |
| haystack = "a " |
| matches = [[1, 1], [2, 2]] |
| unicode = false |
| |
| [[test]] |
| name = "word-end-half-ascii-030" |
| regex = '\b{end-half}' |
| haystack = " a " |
| matches = [[0, 0], [2, 2], [3, 3]] |
| unicode = false |
| |
| [[test]] |
| name = "word-end-half-ascii-040" |
| regex = '\b{end-half}' |
| haystack = "" |
| matches = [[0, 0]] |
| unicode = false |
| |
| [[test]] |
| name = "word-end-half-ascii-050" |
| regex = '\b{end-half}' |
| haystack = "ab" |
| matches = [[2, 2]] |
| unicode = false |
| |
| [[test]] |
| name = "word-end-half-ascii-060" |
| regex = '\b{end-half}' |
| haystack = "𝛃" |
| matches = [[0, 0], [4, 4]] |
| unicode = false |
| |
| [[test]] |
| name = "word-end-half-ascii-060-bounds" |
| regex = '\b{end-half}' |
| haystack = "𝛃" |
| bounds = [2, 3] |
| matches = [] |
| unicode = false |
| |
| [[test]] |
| name = "word-end-half-ascii-070" |
| regex = '\b{end-half}' |
| haystack = " 𝛃 " |
| matches = [[0, 0], [1, 1], [5, 5], [6, 6]] |
| unicode = false |
| |
| [[test]] |
| name = "word-end-half-ascii-080" |
| regex = '\b{end-half}' |
| haystack = "𝛃𐆀" |
| matches = [[0, 0], [4, 4], [8, 8]] |
| unicode = false |
| |
| [[test]] |
| name = "word-end-half-ascii-090" |
| regex = '\b{end-half}' |
| haystack = "𝛃b" |
| matches = [[0, 0], [5, 5]] |
| unicode = false |
| |
| [[test]] |
| name = "word-end-half-ascii-110" |
| regex = '\b{end-half}' |
| haystack = "b𝛃" |
| matches = [[1, 1], [5, 5]] |
| unicode = false |
| |
| # Tests for \b{start-half} |
| |
| [[test]] |
| name = "word-start-half-unicode-010" |
| regex = '\b{start-half}' |
| haystack = "a" |
| matches = [[0, 0]] |
| unicode = true |
| |
| [[test]] |
| name = "word-start-half-unicode-020" |
| regex = '\b{start-half}' |
| haystack = "a " |
| matches = [[0, 0], [2, 2]] |
| unicode = true |
| |
| [[test]] |
| name = "word-start-half-unicode-030" |
| regex = '\b{start-half}' |
| haystack = " a " |
| matches = [[0, 0], [1, 1], [3, 3]] |
| unicode = true |
| |
| [[test]] |
| name = "word-start-half-unicode-040" |
| regex = '\b{start-half}' |
| haystack = "" |
| matches = [[0, 0]] |
| unicode = true |
| |
| [[test]] |
| name = "word-start-half-unicode-050" |
| regex = '\b{start-half}' |
| haystack = "ab" |
| matches = [[0, 0]] |
| unicode = true |
| |
| [[test]] |
| name = "word-start-half-unicode-060" |
| regex = '\b{start-half}' |
| haystack = "𝛃" |
| matches = [[0, 0]] |
| unicode = true |
| |
| [[test]] |
| name = "word-start-half-unicode-060-bounds" |
| regex = '\b{start-half}' |
| haystack = "𝛃" |
| bounds = [2, 3] |
| matches = [] |
| unicode = true |
| |
| [[test]] |
| name = "word-start-half-unicode-070" |
| regex = '\b{start-half}' |
| haystack = " 𝛃 " |
| matches = [[0, 0], [1, 1], [6, 6]] |
| unicode = true |
| |
| [[test]] |
| name = "word-start-half-unicode-080" |
| regex = '\b{start-half}' |
| haystack = "𝛃𐆀" |
| matches = [[0, 0], [8, 8]] |
| unicode = true |
| |
| [[test]] |
| name = "word-start-half-unicode-090" |
| regex = '\b{start-half}' |
| haystack = "𝛃b" |
| matches = [[0, 0]] |
| unicode = true |
| |
| [[test]] |
| name = "word-start-half-unicode-110" |
| regex = '\b{start-half}' |
| haystack = "b𝛃" |
| matches = [[0, 0]] |
| unicode = true |
| |
| # Tests for \b{end-half} |
| |
| [[test]] |
| name = "word-end-half-unicode-010" |
| regex = '\b{end-half}' |
| haystack = "a" |
| matches = [[1, 1]] |
| unicode = true |
| |
| [[test]] |
| name = "word-end-half-unicode-020" |
| regex = '\b{end-half}' |
| haystack = "a " |
| matches = [[1, 1], [2, 2]] |
| unicode = true |
| |
| [[test]] |
| name = "word-end-half-unicode-030" |
| regex = '\b{end-half}' |
| haystack = " a " |
| matches = [[0, 0], [2, 2], [3, 3]] |
| unicode = true |
| |
| [[test]] |
| name = "word-end-half-unicode-040" |
| regex = '\b{end-half}' |
| haystack = "" |
| matches = [[0, 0]] |
| unicode = true |
| |
| [[test]] |
| name = "word-end-half-unicode-050" |
| regex = '\b{end-half}' |
| haystack = "ab" |
| matches = [[2, 2]] |
| unicode = true |
| |
| [[test]] |
| name = "word-end-half-unicode-060" |
| regex = '\b{end-half}' |
| haystack = "𝛃" |
| matches = [[4, 4]] |
| unicode = true |
| |
| [[test]] |
| name = "word-end-half-unicode-060-bounds" |
| regex = '\b{end-half}' |
| haystack = "𝛃" |
| bounds = [2, 3] |
| matches = [] |
| unicode = true |
| |
| [[test]] |
| name = "word-end-half-unicode-070" |
| regex = '\b{end-half}' |
| haystack = " 𝛃 " |
| matches = [[0, 0], [5, 5], [6, 6]] |
| unicode = true |
| |
| [[test]] |
| name = "word-end-half-unicode-080" |
| regex = '\b{end-half}' |
| haystack = "𝛃𐆀" |
| matches = [[4, 4], [8, 8]] |
| unicode = true |
| |
| [[test]] |
| name = "word-end-half-unicode-090" |
| regex = '\b{end-half}' |
| haystack = "𝛃b" |
| matches = [[5, 5]] |
| unicode = true |
| |
| [[test]] |
| name = "word-end-half-unicode-110" |
| regex = '\b{end-half}' |
| haystack = "b𝛃" |
| matches = [[5, 5]] |
| unicode = true |
| |
| # Specialty tests. |
| |
| # Since \r is special cased in the start state computation (to deal with CRLF |
| # mode), this test ensures that the correct start state is computed when the |
| # pattern starts with a half word boundary assertion. |
| [[test]] |
| name = "word-start-half-ascii-carriage" |
| regex = '\b{start-half}[a-z]+' |
| haystack = 'ABC\rabc' |
| matches = [[4, 7]] |
| bounds = [4, 7] |
| unescape = true |
| |
| # Since \n is also special cased in the start state computation, this test |
| # ensures that the correct start state is computed when the pattern starts with |
| # a half word boundary assertion. |
| [[test]] |
| name = "word-start-half-ascii-linefeed" |
| regex = '\b{start-half}[a-z]+' |
| haystack = 'ABC\nabc' |
| matches = [[4, 7]] |
| bounds = [4, 7] |
| unescape = true |
| |
| # Like the carriage return test above, but with a custom line terminator. |
| [[test]] |
| name = "word-start-half-ascii-customlineterm" |
| regex = '\b{start-half}[a-z]+' |
| haystack = 'ABC!abc' |
| matches = [[4, 7]] |
| bounds = [4, 7] |
| unescape = true |
| line-terminator = '!' |