| # Basic Unicode literal support. |
| [[test]] |
| name = "literal1" |
| regex = '☃' |
| haystack = "☃" |
| matches = [[0, 3]] |
| |
| [[test]] |
| name = "literal2" |
| regex = '☃+' |
| haystack = "☃" |
| matches = [[0, 3]] |
| |
| [[test]] |
| name = "literal3" |
| regex = '☃+' |
| haystack = "☃" |
| matches = [[0, 3]] |
| case-insensitive = true |
| |
| [[test]] |
| name = "literal4" |
| regex = 'Δ' |
| haystack = "δ" |
| matches = [[0, 2]] |
| case-insensitive = true |
| |
| # Unicode word boundaries. |
| [[test]] |
| name = "wb-100" |
| regex = '\d\b' |
| haystack = "6δ" |
| matches = [] |
| |
| [[test]] |
| name = "wb-200" |
| regex = '\d\b' |
| haystack = "6 " |
| matches = [[0, 1]] |
| |
| [[test]] |
| name = "wb-300" |
| regex = '\d\B' |
| haystack = "6δ" |
| matches = [[0, 1]] |
| |
| [[test]] |
| name = "wb-400" |
| regex = '\d\B' |
| haystack = "6 " |
| matches = [] |
| |
| # Unicode character class support. |
| [[test]] |
| name = "class1" |
| regex = '[☃Ⅰ]+' |
| haystack = "☃" |
| matches = [[0, 3]] |
| |
| [[test]] |
| name = "class2" |
| regex = '\pN' |
| haystack = "Ⅰ" |
| matches = [[0, 3]] |
| |
| [[test]] |
| name = "class3" |
| regex = '\pN+' |
| haystack = "Ⅰ1Ⅱ2" |
| matches = [[0, 8]] |
| |
| [[test]] |
| name = "class4" |
| regex = '\PN+' |
| haystack = "abⅠ" |
| matches = [[0, 2]] |
| |
| [[test]] |
| name = "class5" |
| regex = '[\PN]+' |
| haystack = "abⅠ" |
| matches = [[0, 2]] |
| |
| [[test]] |
| name = "class6" |
| regex = '[^\PN]+' |
| haystack = "abⅠ" |
| matches = [[2, 5]] |
| |
| [[test]] |
| name = "class7" |
| regex = '\p{Lu}+' |
| haystack = "ΛΘΓΔα" |
| matches = [[0, 8]] |
| |
| [[test]] |
| name = "class8" |
| regex = '\p{Lu}+' |
| haystack = "ΛΘΓΔα" |
| matches = [[0, 10]] |
| case-insensitive = true |
| |
| [[test]] |
| name = "class9" |
| regex = '\pL+' |
| haystack = "ΛΘΓΔα" |
| matches = [[0, 10]] |
| |
| [[test]] |
| name = "class10" |
| regex = '\p{Ll}+' |
| haystack = "ΛΘΓΔα" |
| matches = [[8, 10]] |
| |
| # Unicode aware "Perl" character classes. |
| [[test]] |
| name = "perl1" |
| regex = '\w+' |
| haystack = "dδd" |
| matches = [[0, 4]] |
| |
| [[test]] |
| name = "perl2" |
| regex = '\w+' |
| haystack = "⥡" |
| matches = [] |
| |
| [[test]] |
| name = "perl3" |
| regex = '\W+' |
| haystack = "⥡" |
| matches = [[0, 3]] |
| |
| [[test]] |
| name = "perl4" |
| regex = '\d+' |
| haystack = "1२३9" |
| matches = [[0, 8]] |
| |
| [[test]] |
| name = "perl5" |
| regex = '\d+' |
| haystack = "Ⅱ" |
| matches = [] |
| |
| [[test]] |
| name = "perl6" |
| regex = '\D+' |
| haystack = "Ⅱ" |
| matches = [[0, 3]] |
| |
| [[test]] |
| name = "perl7" |
| regex = '\s+' |
| haystack = " " |
| matches = [[0, 3]] |
| |
| [[test]] |
| name = "perl8" |
| regex = '\s+' |
| haystack = "☃" |
| matches = [] |
| |
| [[test]] |
| name = "perl9" |
| regex = '\S+' |
| haystack = "☃" |
| matches = [[0, 3]] |
| |
| # Specific tests for Unicode general category classes. |
| [[test]] |
| name = "class-gencat1" |
| regex = '\p{Cased_Letter}' |
| haystack = "A" |
| matches = [[0, 3]] |
| |
| [[test]] |
| name = "class-gencat2" |
| regex = '\p{Close_Punctuation}' |
| haystack = "❯" |
| matches = [[0, 3]] |
| |
| [[test]] |
| name = "class-gencat3" |
| regex = '\p{Connector_Punctuation}' |
| haystack = "⁀" |
| matches = [[0, 3]] |
| |
| [[test]] |
| name = "class-gencat4" |
| regex = '\p{Control}' |
| haystack = "\u009F" |
| matches = [[0, 2]] |
| |
| [[test]] |
| name = "class-gencat5" |
| regex = '\p{Currency_Symbol}' |
| haystack = "£" |
| matches = [[0, 3]] |
| |
| [[test]] |
| name = "class-gencat6" |
| regex = '\p{Dash_Punctuation}' |
| haystack = "〰" |
| matches = [[0, 3]] |
| |
| [[test]] |
| name = "class-gencat7" |
| regex = '\p{Decimal_Number}' |
| haystack = "𑓙" |
| matches = [[0, 4]] |
| |
| [[test]] |
| name = "class-gencat8" |
| regex = '\p{Enclosing_Mark}' |
| haystack = "\uA672" |
| matches = [[0, 3]] |
| |
| [[test]] |
| name = "class-gencat9" |
| regex = '\p{Final_Punctuation}' |
| haystack = "⸡" |
| matches = [[0, 3]] |
| |
| [[test]] |
| name = "class-gencat10" |
| regex = '\p{Format}' |
| haystack = "\U000E007F" |
| matches = [[0, 4]] |
| |
| [[test]] |
| name = "class-gencat11" |
| regex = '\p{Initial_Punctuation}' |
| haystack = "⸜" |
| matches = [[0, 3]] |
| |
| [[test]] |
| name = "class-gencat12" |
| regex = '\p{Letter}' |
| haystack = "Έ" |
| matches = [[0, 2]] |
| |
| [[test]] |
| name = "class-gencat13" |
| regex = '\p{Letter_Number}' |
| haystack = "ↂ" |
| matches = [[0, 3]] |
| |
| [[test]] |
| name = "class-gencat14" |
| regex = '\p{Line_Separator}' |
| haystack = "\u2028" |
| matches = [[0, 3]] |
| |
| [[test]] |
| name = "class-gencat15" |
| regex = '\p{Lowercase_Letter}' |
| haystack = "ϛ" |
| matches = [[0, 2]] |
| |
| [[test]] |
| name = "class-gencat16" |
| regex = '\p{Mark}' |
| haystack = "\U000E01EF" |
| matches = [[0, 4]] |
| |
| [[test]] |
| name = "class-gencat17" |
| regex = '\p{Math}' |
| haystack = "⋿" |
| matches = [[0, 3]] |
| |
| [[test]] |
| name = "class-gencat18" |
| regex = '\p{Modifier_Letter}' |
| haystack = "𖭃" |
| matches = [[0, 4]] |
| |
| [[test]] |
| name = "class-gencat19" |
| regex = '\p{Modifier_Symbol}' |
| haystack = "🏿" |
| matches = [[0, 4]] |
| |
| [[test]] |
| name = "class-gencat20" |
| regex = '\p{Nonspacing_Mark}' |
| haystack = "\U0001E94A" |
| matches = [[0, 4]] |
| |
| [[test]] |
| name = "class-gencat21" |
| regex = '\p{Number}' |
| haystack = "⓿" |
| matches = [[0, 3]] |
| |
| [[test]] |
| name = "class-gencat22" |
| regex = '\p{Open_Punctuation}' |
| haystack = "⦅" |
| matches = [[0, 3]] |
| |
| [[test]] |
| name = "class-gencat23" |
| regex = '\p{Other}' |
| haystack = "\u0BC9" |
| matches = [[0, 3]] |
| |
| [[test]] |
| name = "class-gencat24" |
| regex = '\p{Other_Letter}' |
| haystack = "ꓷ" |
| matches = [[0, 3]] |
| |
| [[test]] |
| name = "class-gencat25" |
| regex = '\p{Other_Number}' |
| haystack = "㉏" |
| matches = [[0, 3]] |
| |
| [[test]] |
| name = "class-gencat26" |
| regex = '\p{Other_Punctuation}' |
| haystack = "𞥞" |
| matches = [[0, 4]] |
| |
| [[test]] |
| name = "class-gencat27" |
| regex = '\p{Other_Symbol}' |
| haystack = "⅌" |
| matches = [[0, 3]] |
| |
| [[test]] |
| name = "class-gencat28" |
| regex = '\p{Paragraph_Separator}' |
| haystack = "\u2029" |
| matches = [[0, 3]] |
| |
| [[test]] |
| name = "class-gencat29" |
| regex = '\p{Private_Use}' |
| haystack = "\U0010FFFD" |
| matches = [[0, 4]] |
| |
| [[test]] |
| name = "class-gencat30" |
| regex = '\p{Punctuation}' |
| haystack = "𑁍" |
| matches = [[0, 4]] |
| |
| [[test]] |
| name = "class-gencat31" |
| regex = '\p{Separator}' |
| haystack = "\u3000" |
| matches = [[0, 3]] |
| |
| [[test]] |
| name = "class-gencat32" |
| regex = '\p{Space_Separator}' |
| haystack = "\u205F" |
| matches = [[0, 3]] |
| |
| [[test]] |
| name = "class-gencat33" |
| regex = '\p{Spacing_Mark}' |
| haystack = "\U00016F7E" |
| matches = [[0, 4]] |
| |
| [[test]] |
| name = "class-gencat34" |
| regex = '\p{Symbol}' |
| haystack = "⯈" |
| matches = [[0, 3]] |
| |
| [[test]] |
| name = "class-gencat35" |
| regex = '\p{Titlecase_Letter}' |
| haystack = "ῼ" |
| matches = [[0, 3]] |
| |
| [[test]] |
| name = "class-gencat36" |
| regex = '\p{Unassigned}' |
| haystack = "\U0010FFFF" |
| matches = [[0, 4]] |
| |
| [[test]] |
| name = "class-gencat37" |
| regex = '\p{Uppercase_Letter}' |
| haystack = "Ꝋ" |
| matches = [[0, 3]] |
| |
| |
| # Tests for Unicode emoji properties. |
| [[test]] |
| name = "class-emoji1" |
| regex = '\p{Emoji}' |
| haystack = "\u23E9" |
| matches = [[0, 3]] |
| |
| [[test]] |
| name = "class-emoji2" |
| regex = '\p{emoji}' |
| haystack = "\U0001F21A" |
| matches = [[0, 4]] |
| |
| [[test]] |
| name = "class-emoji3" |
| regex = '\p{extendedpictographic}' |
| haystack = "\U0001FA6E" |
| matches = [[0, 4]] |
| |
| [[test]] |
| name = "class-emoji4" |
| regex = '\p{extendedpictographic}' |
| haystack = "\U0001FFFD" |
| matches = [[0, 4]] |
| |
| |
| # Tests for Unicode grapheme cluster properties. |
| [[test]] |
| name = "class-gcb1" |
| regex = '\p{grapheme_cluster_break=prepend}' |
| haystack = "\U00011D46" |
| matches = [[0, 4]] |
| |
| [[test]] |
| name = "class-gcb2" |
| regex = '\p{gcb=regional_indicator}' |
| haystack = "\U0001F1E6" |
| matches = [[0, 4]] |
| |
| [[test]] |
| name = "class-gcb3" |
| regex = '\p{gcb=ri}' |
| haystack = "\U0001F1E7" |
| matches = [[0, 4]] |
| |
| [[test]] |
| name = "class-gcb4" |
| regex = '\p{regionalindicator}' |
| haystack = "\U0001F1FF" |
| matches = [[0, 4]] |
| |
| [[test]] |
| name = "class-gcb5" |
| regex = '\p{gcb=lvt}' |
| haystack = "\uC989" |
| matches = [[0, 3]] |
| |
| [[test]] |
| name = "class-gcb6" |
| regex = '\p{gcb=zwj}' |
| haystack = "\u200D" |
| matches = [[0, 3]] |
| |
| # Tests for Unicode word boundary properties. |
| [[test]] |
| name = "class-word-break1" |
| regex = '\p{word_break=Hebrew_Letter}' |
| haystack = "\uFB46" |
| matches = [[0, 3]] |
| |
| [[test]] |
| name = "class-word-break2" |
| regex = '\p{wb=hebrewletter}' |
| haystack = "\uFB46" |
| matches = [[0, 3]] |
| |
| [[test]] |
| name = "class-word-break3" |
| regex = '\p{wb=ExtendNumLet}' |
| haystack = "\uFF3F" |
| matches = [[0, 3]] |
| |
| [[test]] |
| name = "class-word-break4" |
| regex = '\p{wb=WSegSpace}' |
| haystack = "\u3000" |
| matches = [[0, 3]] |
| |
| [[test]] |
| name = "class-word-break5" |
| regex = '\p{wb=numeric}' |
| haystack = "\U0001E950" |
| matches = [[0, 4]] |
| |
| # Tests for Unicode sentence boundary properties. |
| [[test]] |
| name = "class-sentence-break1" |
| regex = '\p{sentence_break=Lower}' |
| haystack = "\u0469" |
| matches = [[0, 2]] |
| |
| [[test]] |
| name = "class-sentence-break2" |
| regex = '\p{sb=lower}' |
| haystack = "\u0469" |
| matches = [[0, 2]] |
| |
| [[test]] |
| name = "class-sentence-break3" |
| regex = '\p{sb=Close}' |
| haystack = "\uFF60" |
| matches = [[0, 3]] |
| |
| [[test]] |
| name = "class-sentence-break4" |
| regex = '\p{sb=Close}' |
| haystack = "\U0001F677" |
| matches = [[0, 4]] |
| |
| [[test]] |
| name = "class-sentence-break5" |
| regex = '\p{sb=SContinue}' |
| haystack = "\uFF64" |
| matches = [[0, 3]] |