| # This tests that we can switch the line terminator to the NUL byte. |
| [[test]] |
| name = "nul" |
| regex = '(?m)^[a-z]+$' |
| haystack = '\x00abc\x00' |
| matches = [[1, 4]] |
| unescape = true |
| line-terminator = '\x00' |
| |
| # This tests that '.' will not match the configured line terminator, but will |
| # match \n. |
| [[test]] |
| name = "dot-changes-with-line-terminator" |
| regex = '.' |
| haystack = '\x00\n' |
| matches = [[1, 2]] |
| unescape = true |
| line-terminator = '\x00' |
| |
| # This tests that when we switch the line terminator, \n is no longer |
| # recognized as the terminator. |
| [[test]] |
| name = "not-line-feed" |
| regex = '(?m)^[a-z]+$' |
| haystack = '\nabc\n' |
| matches = [] |
| unescape = true |
| line-terminator = '\x00' |
| |
| # This tests that we can set the line terminator to a non-ASCII byte and have |
| # it behave as expected. |
| [[test]] |
| name = "non-ascii" |
| regex = '(?m)^[a-z]+$' |
| haystack = '\xFFabc\xFF' |
| matches = [[1, 4]] |
| unescape = true |
| line-terminator = '\xFF' |
| utf8 = false |
| |
| # This tests a tricky case where the line terminator is set to \r. This ensures |
| # that the StartLF look-behind assertion is tracked when computing the start |
| # state. |
| [[test]] |
| name = "carriage" |
| regex = '(?m)^[a-z]+' |
| haystack = 'ABC\rabc' |
| matches = [[4, 7]] |
| bounds = [4, 7] |
| unescape = true |
| line-terminator = '\r' |
| |
| # This tests that we can set the line terminator to a byte corresponding to a |
| # word character, and things work as expected. |
| [[test]] |
| name = "word-byte" |
| regex = '(?m)^[a-z]+$' |
| haystack = 'ZabcZ' |
| matches = [[1, 4]] |
| unescape = true |
| line-terminator = 'Z' |
| |
| # This tests that we can set the line terminator to a byte corresponding to a |
| # non-word character, and things work as expected. |
| [[test]] |
| name = "non-word-byte" |
| regex = '(?m)^[a-z]+$' |
| haystack = '%abc%' |
| matches = [[1, 4]] |
| unescape = true |
| line-terminator = '%' |
| |
| # This combines "set line terminator to a word byte" with a word boundary |
| # assertion, which should result in no match even though ^/$ matches. |
| [[test]] |
| name = "word-boundary" |
| regex = '(?m)^\b[a-z]+\b$' |
| haystack = 'ZabcZ' |
| matches = [] |
| unescape = true |
| line-terminator = 'Z' |
| |
| # Like 'word-boundary', but does an anchored search at the point where ^ |
| # matches, but where \b should not. |
| [[test]] |
| name = "word-boundary-at" |
| regex = '(?m)^\b[a-z]+\b$' |
| haystack = 'ZabcZ' |
| matches = [] |
| bounds = [1, 4] |
| anchored = true |
| unescape = true |
| line-terminator = 'Z' |
| |
| # Like 'word-boundary-at', but flips the word boundary to a negation. This |
| # in particular tests a tricky case in DFA engines, where they must consider |
| # explicitly that a starting configuration from a custom line terminator may |
| # also required setting the "is from word byte" flag on a state. Otherwise, |
| # it's treated as "not from a word byte," which would result in \B not matching |
| # here when it should. |
| [[test]] |
| name = "not-word-boundary-at" |
| regex = '(?m)^\B[a-z]+\B$' |
| haystack = 'ZabcZ' |
| matches = [[1, 4]] |
| bounds = [1, 4] |
| anchored = true |
| unescape = true |
| line-terminator = 'Z' |