vendor/regex-1.10.6/testdata/utf8.toml - toolchain/rustc - Git at Google

 # These test the UTF-8 modes expose by regex-automata. Namely, when utf8 is
 # true, then we promise that the haystack is valid UTF-8. (Otherwise behavior
 # is unspecified.) This also corresponds to building the regex engine with the
 # following two guarantees:
 #
 # 1) For any non-empty match reported, its span is guaranteed to correspond to
 # valid UTF-8.
 # 2) All empty or zero-width matches reported must never split a UTF-8
 # encoded codepoint. If the haystack has invalid UTF-8, then this results in
 # unspecified behavior.
 #
 # The (2) is in particular what we focus our testing on since (1) is generally
 # guaranteed by regex-syntax's AST-to-HIR translator and is well tested there.
 # The thing with (2) is that it can't be described in the HIR, so the regex
 # engines have to handle that case. Thus, we test it here.
 #
 # Note that it is possible to build a regex that has property (1) but not
 # (2), and vice versa. This is done by building the HIR with 'utf8=true' but
 # building the Thompson NFA with 'utf8=false'. We don't test that here because
 # the harness doesn't expose a way to enable or disable UTF-8 mode with that
 # granularity. Instead, those combinations are lightly tested via doc examples.
 # That's not to say that (1) without (2) is uncommon. Indeed, ripgrep uses it
 # because it cannot guarantee that its haystack is valid UTF-8.

 # This tests that an empty regex doesn't split a codepoint.
 [[test]]
 name = "empty-utf8yes"
 regex = ''
 haystack = '☃'
 matches = [[0, 0], [3, 3]]
 unicode = true
 utf8 = true

 # Tests the overlapping case of the above.
 [[test]]
 name = "empty-utf8yes-overlapping"
 regex = ''
 haystack = '☃'
 matches = [[0, 0], [3, 3]]
 unicode = true
 utf8 = true
 match-kind = "all"
 search-kind = "overlapping"

 # This tests that an empty regex DOES split a codepoint when utf=false.
 [[test]]
 name = "empty-utf8no"
 regex = ''
 haystack = '☃'
 matches = [[0, 0], [1, 1], [2, 2], [3, 3]]
 unicode = true
 utf8 = false

 # Tests the overlapping case of the above.
 [[test]]
 name = "empty-utf8no-overlapping"
 regex = ''
 haystack = '☃'
 matches = [[0, 0], [1, 1], [2, 2], [3, 3]]
 unicode = true
 utf8 = false
 match-kind = "all"
 search-kind = "overlapping"

 # This tests that an empty regex doesn't split a codepoint, even if we give
 # it bounds entirely within the codepoint.
 #
 # This is one of the trickier cases and is what motivated the current UTF-8
 # mode design. In particular, at one point, this test failed the 'is_match'
 # variant of the test but not 'find'. This is because the 'is_match' code path
 # is specifically optimized for "was a match found" rather than "where is the
 # match." In the former case, you don't really care about the empty-vs-non-empty
 # matches, and thus, the codepoint splitting filtering logic wasn't getting
 # applied. (In multiple ways across multiple regex engines.) In this way, you
 # can wind up with a situation where 'is_match' says "yes," but 'find' says,
 # "I didn't find anything." Which is... not great.
 #
 # I could have decided to say that providing boundaries that themselves split
 # a codepoint would have unspecified behavior. But I couldn't quite convince
 # myself that such boundaries were the only way to get an inconsistency between
 # 'is_match' and 'find'.
 #
 # Note that I also tried to come up with a test like this that fails without
 # using `bounds`. Specifically, a test where 'is_match' and 'find' disagree.
 # But I couldn't do it, and I'm tempted to conclude it is impossible. The
 # fundamental problem is that you need to simultaneously produce an empty match
 # that splits a codepoint while *not* matching before or after the codepoint.
 [[test]]
 name = "empty-utf8yes-bounds"
 regex = ''
 haystack = '𝛃'
 bounds = [1, 3]
 matches = []
 unicode = true
 utf8 = true

 # Tests the overlapping case of the above.
 [[test]]
 name = "empty-utf8yes-bounds-overlapping"
 regex = ''
 haystack = '𝛃'
 bounds = [1, 3]
 matches = []
 unicode = true
 utf8 = true
 match-kind = "all"
 search-kind = "overlapping"

 # This tests that an empty regex splits a codepoint when the bounds are
 # entirely within the codepoint.
 [[test]]
 name = "empty-utf8no-bounds"
 regex = ''
 haystack = '𝛃'
 bounds = [1, 3]
 matches = [[1, 1], [2, 2], [3, 3]]
 unicode = true
 utf8 = false

 # Tests the overlapping case of the above.
 [[test]]
 name = "empty-utf8no-bounds-overlapping"
 regex = ''
 haystack = '𝛃'
 bounds = [1, 3]
 matches = [[1, 1], [2, 2], [3, 3]]
 unicode = true
 utf8 = false
 match-kind = "all"
 search-kind = "overlapping"

 # In this test, we anchor the search. Since the start position is also a UTF-8
 # boundary, we get a match.
 [[test]]
 name = "empty-utf8yes-anchored"
 regex = ''
 haystack = '𝛃'
 matches = [[0, 0]]
 anchored = true
 unicode = true
 utf8 = true

 # Tests the overlapping case of the above.
 [[test]]
 name = "empty-utf8yes-anchored-overlapping"
 regex = ''
 haystack = '𝛃'
 matches = [[0, 0]]
 anchored = true
 unicode = true
 utf8 = true
 match-kind = "all"
 search-kind = "overlapping"

 # Same as above, except with UTF-8 mode disabled. It almost doesn't change the
 # result, except for the fact that since this is an anchored search and we
 # always find all matches, the test harness will keep reporting matches until
 # none are found. Because it's anchored, matches will be reported so long as
 # they are directly adjacent. Since with UTF-8 mode the next anchored search
 # after the match at [0, 0] fails, iteration stops (and doesn't find the last
 # match at [4, 4]).
 [[test]]
 name = "empty-utf8no-anchored"
 regex = ''
 haystack = '𝛃'
 matches = [[0, 0], [1, 1], [2, 2], [3, 3], [4, 4]]
 anchored = true
 unicode = true
 utf8 = false

 # Tests the overlapping case of the above.
 #
 # Note that overlapping anchored searches are a little weird, and it's not
 # totally clear what their semantics ought to be. For now, we just test the
 # current behavior of our test shim that implements overlapping search. (This
 # is one of the reasons why we don't really expose regex-level overlapping
 # searches.)
 [[test]]
 name = "empty-utf8no-anchored-overlapping"
 regex = ''
 haystack = '𝛃'
 matches = [[0, 0]]
 anchored = true
 unicode = true
 utf8 = false
 match-kind = "all"
 search-kind = "overlapping"

 # In this test, we anchor the search, but also set bounds. The bounds start the
 # search in the middle of a codepoint, so there should never be a match.
 [[test]]
 name = "empty-utf8yes-anchored-bounds"
 regex = ''
 haystack = '𝛃'
 matches = []
 bounds = [1, 3]
 anchored = true
 unicode = true
 utf8 = true

 # Tests the overlapping case of the above.
 [[test]]
 name = "empty-utf8yes-anchored-bounds-overlapping"
 regex = ''
 haystack = '𝛃'
 matches = []
 bounds = [1, 3]
 anchored = true
 unicode = true
 utf8 = true
 match-kind = "all"
 search-kind = "overlapping"

 # Same as above, except with UTF-8 mode disabled. Without UTF-8 mode enabled,
 # matching within a codepoint is allowed. And remember, as in the anchored test
 # above with UTF-8 mode disabled, iteration will report all adjacent matches.
 # The matches at [0, 0] and [4, 4] are not included because of the bounds of
 # the search.
 [[test]]
 name = "empty-utf8no-anchored-bounds"
 regex = ''
 haystack = '𝛃'
 bounds = [1, 3]
 matches = [[1, 1], [2, 2], [3, 3]]
 anchored = true
 unicode = true
 utf8 = false

 # Tests the overlapping case of the above.
 #
 # Note that overlapping anchored searches are a little weird, and it's not
 # totally clear what their semantics ought to be. For now, we just test the
 # current behavior of our test shim that implements overlapping search. (This
 # is one of the reasons why we don't really expose regex-level overlapping
 # searches.)
 [[test]]
 name = "empty-utf8no-anchored-bounds-overlapping"
 regex = ''
 haystack = '𝛃'
 bounds = [1, 3]
 matches = [[1, 1]]
 anchored = true
 unicode = true
 utf8 = false
 match-kind = "all"
 search-kind = "overlapping"

 # This tests that we find the match at the end of the string when the bounds
 # exclude the first match.
 [[test]]
 name = "empty-utf8yes-startbound"
 regex = ''
 haystack = '𝛃'
 bounds = [1, 4]
 matches = [[4, 4]]
 unicode = true
 utf8 = true

 # Tests the overlapping case of the above.
 [[test]]
 name = "empty-utf8yes-startbound-overlapping"
 regex = ''
 haystack = '𝛃'
 bounds = [1, 4]
 matches = [[4, 4]]
 unicode = true
 utf8 = true
 match-kind = "all"
 search-kind = "overlapping"

 # Same as above, except since UTF-8 mode is disabled, we also find the matches
 # inbetween that split the codepoint.
 [[test]]
 name = "empty-utf8no-startbound"
 regex = ''
 haystack = '𝛃'
 bounds = [1, 4]
 matches = [[1, 1], [2, 2], [3, 3], [4, 4]]
 unicode = true
 utf8 = false

 # Tests the overlapping case of the above.
 [[test]]
 name = "empty-utf8no-startbound-overlapping"
 regex = ''
 haystack = '𝛃'
 bounds = [1, 4]
 matches = [[1, 1], [2, 2], [3, 3], [4, 4]]
 unicode = true
 utf8 = false
 match-kind = "all"
 search-kind = "overlapping"

 # This tests that we don't find any matches in an anchored search, even when
 # the bounds include a match (at the end).
 [[test]]
 name = "empty-utf8yes-anchored-startbound"
 regex = ''
 haystack = '𝛃'
 bounds = [1, 4]
 matches = []
 anchored = true
 unicode = true
 utf8 = true

 # Tests the overlapping case of the above.
 [[test]]
 name = "empty-utf8yes-anchored-startbound-overlapping"
 regex = ''
 haystack = '𝛃'
 bounds = [1, 4]
 matches = []
 anchored = true
 unicode = true
 utf8 = true
 match-kind = "all"
 search-kind = "overlapping"

 # Same as above, except since UTF-8 mode is disabled, we also find the matches
 # inbetween that split the codepoint. Even though this is an anchored search,
 # since the matches are adjacent, we find all of them.
 [[test]]
 name = "empty-utf8no-anchored-startbound"
 regex = ''
 haystack = '𝛃'
 bounds = [1, 4]
 matches = [[1, 1], [2, 2], [3, 3], [4, 4]]
 anchored = true
 unicode = true
 utf8 = false

 # Tests the overlapping case of the above.
 #
 # Note that overlapping anchored searches are a little weird, and it's not
 # totally clear what their semantics ought to be. For now, we just test the
 # current behavior of our test shim that implements overlapping search. (This
 # is one of the reasons why we don't really expose regex-level overlapping
 # searches.)
 [[test]]
 name = "empty-utf8no-anchored-startbound-overlapping"
 regex = ''
 haystack = '𝛃'
 bounds = [1, 4]
 matches = [[1, 1]]
 anchored = true
 unicode = true
 utf8 = false
 match-kind = "all"
 search-kind = "overlapping"

 # This tests that we find the match at the end of the haystack in UTF-8 mode
 # when our bounds only include the empty string at the end of the haystack.
 [[test]]
 name = "empty-utf8yes-anchored-endbound"
 regex = ''
 haystack = '𝛃'
 bounds = [4, 4]
 matches = [[4, 4]]
 anchored = true
 unicode = true
 utf8 = true

 # Tests the overlapping case of the above.
 [[test]]
 name = "empty-utf8yes-anchored-endbound-overlapping"
 regex = ''
 haystack = '𝛃'
 bounds = [4, 4]
 matches = [[4, 4]]
 anchored = true
 unicode = true
 utf8 = true
 match-kind = "all"
 search-kind = "overlapping"

 # Same as above, but with UTF-8 mode disabled. Results remain the same since
 # the only possible match does not split a codepoint.
 [[test]]
 name = "empty-utf8no-anchored-endbound"
 regex = ''
 haystack = '𝛃'
 bounds = [4, 4]
 matches = [[4, 4]]
 anchored = true
 unicode = true
 utf8 = false

 # Tests the overlapping case of the above.
 [[test]]
 name = "empty-utf8no-anchored-endbound-overlapping"
 regex = ''
 haystack = '𝛃'
 bounds = [4, 4]
 matches = [[4, 4]]
 anchored = true
 unicode = true
 utf8 = false
 match-kind = "all"
 search-kind = "overlapping"
	# These test the UTF-8 modes expose by regex-automata. Namely, when utf8 is
	# true, then we promise that the haystack is valid UTF-8. (Otherwise behavior
	# is unspecified.) This also corresponds to building the regex engine with the
	# following two guarantees:
	#
	# 1) For any non-empty match reported, its span is guaranteed to correspond to
	# valid UTF-8.
	# 2) All empty or zero-width matches reported must never split a UTF-8
	# encoded codepoint. If the haystack has invalid UTF-8, then this results in
	# unspecified behavior.
	#
	# The (2) is in particular what we focus our testing on since (1) is generally
	# guaranteed by regex-syntax's AST-to-HIR translator and is well tested there.
	# The thing with (2) is that it can't be described in the HIR, so the regex
	# engines have to handle that case. Thus, we test it here.
	#
	# Note that it is possible to build a regex that has property (1) but not
	# (2), and vice versa. This is done by building the HIR with 'utf8=true' but
	# building the Thompson NFA with 'utf8=false'. We don't test that here because
	# the harness doesn't expose a way to enable or disable UTF-8 mode with that
	# granularity. Instead, those combinations are lightly tested via doc examples.
	# That's not to say that (1) without (2) is uncommon. Indeed, ripgrep uses it
	# because it cannot guarantee that its haystack is valid UTF-8.

	# This tests that an empty regex doesn't split a codepoint.
	[[test]]
	name = "empty-utf8yes"
	regex = ''
	haystack = '☃'
	matches = [[0, 0], [3, 3]]
	unicode = true
	utf8 = true

	# Tests the overlapping case of the above.
	[[test]]
	name = "empty-utf8yes-overlapping"
	regex = ''
	haystack = '☃'
	matches = [[0, 0], [3, 3]]
	unicode = true
	utf8 = true
	match-kind = "all"
	search-kind = "overlapping"

	# This tests that an empty regex DOES split a codepoint when utf=false.
	[[test]]
	name = "empty-utf8no"
	regex = ''
	haystack = '☃'
	matches = [[0, 0], [1, 1], [2, 2], [3, 3]]
	unicode = true
	utf8 = false

	# Tests the overlapping case of the above.
	[[test]]
	name = "empty-utf8no-overlapping"
	regex = ''
	haystack = '☃'
	matches = [[0, 0], [1, 1], [2, 2], [3, 3]]
	unicode = true
	utf8 = false
	match-kind = "all"
	search-kind = "overlapping"

	# This tests that an empty regex doesn't split a codepoint, even if we give
	# it bounds entirely within the codepoint.
	#
	# This is one of the trickier cases and is what motivated the current UTF-8
	# mode design. In particular, at one point, this test failed the 'is_match'
	# variant of the test but not 'find'. This is because the 'is_match' code path
	# is specifically optimized for "was a match found" rather than "where is the
	# match." In the former case, you don't really care about the empty-vs-non-empty
	# matches, and thus, the codepoint splitting filtering logic wasn't getting
	# applied. (In multiple ways across multiple regex engines.) In this way, you
	# can wind up with a situation where 'is_match' says "yes," but 'find' says,
	# "I didn't find anything." Which is... not great.
	#
	# I could have decided to say that providing boundaries that themselves split
	# a codepoint would have unspecified behavior. But I couldn't quite convince
	# myself that such boundaries were the only way to get an inconsistency between
	# 'is_match' and 'find'.
	#
	# Note that I also tried to come up with a test like this that fails without
	# using `bounds`. Specifically, a test where 'is_match' and 'find' disagree.
	# But I couldn't do it, and I'm tempted to conclude it is impossible. The
	# fundamental problem is that you need to simultaneously produce an empty match
	# that splits a codepoint while not matching before or after the codepoint.
	[[test]]
	name = "empty-utf8yes-bounds"
	regex = ''
	haystack = '𝛃'
	bounds = [1, 3]
	matches = []
	unicode = true
	utf8 = true

	# Tests the overlapping case of the above.
	[[test]]
	name = "empty-utf8yes-bounds-overlapping"
	regex = ''
	haystack = '𝛃'
	bounds = [1, 3]
	matches = []
	unicode = true
	utf8 = true
	match-kind = "all"
	search-kind = "overlapping"

	# This tests that an empty regex splits a codepoint when the bounds are
	# entirely within the codepoint.
	[[test]]
	name = "empty-utf8no-bounds"
	regex = ''
	haystack = '𝛃'
	bounds = [1, 3]
	matches = [[1, 1], [2, 2], [3, 3]]
	unicode = true
	utf8 = false

	# Tests the overlapping case of the above.
	[[test]]
	name = "empty-utf8no-bounds-overlapping"
	regex = ''
	haystack = '𝛃'
	bounds = [1, 3]
	matches = [[1, 1], [2, 2], [3, 3]]
	unicode = true
	utf8 = false
	match-kind = "all"
	search-kind = "overlapping"

	# In this test, we anchor the search. Since the start position is also a UTF-8
	# boundary, we get a match.
	[[test]]
	name = "empty-utf8yes-anchored"
	regex = ''
	haystack = '𝛃'
	matches = [[0, 0]]
	anchored = true
	unicode = true
	utf8 = true

	# Tests the overlapping case of the above.
	[[test]]
	name = "empty-utf8yes-anchored-overlapping"
	regex = ''
	haystack = '𝛃'
	matches = [[0, 0]]
	anchored = true
	unicode = true
	utf8 = true
	match-kind = "all"
	search-kind = "overlapping"

	# Same as above, except with UTF-8 mode disabled. It almost doesn't change the
	# result, except for the fact that since this is an anchored search and we
	# always find all matches, the test harness will keep reporting matches until
	# none are found. Because it's anchored, matches will be reported so long as
	# they are directly adjacent. Since with UTF-8 mode the next anchored search
	# after the match at [0, 0] fails, iteration stops (and doesn't find the last
	# match at [4, 4]).
	[[test]]
	name = "empty-utf8no-anchored"
	regex = ''
	haystack = '𝛃'
	matches = [[0, 0], [1, 1], [2, 2], [3, 3], [4, 4]]
	anchored = true
	unicode = true
	utf8 = false

	# Tests the overlapping case of the above.
	#
	# Note that overlapping anchored searches are a little weird, and it's not
	# totally clear what their semantics ought to be. For now, we just test the
	# current behavior of our test shim that implements overlapping search. (This
	# is one of the reasons why we don't really expose regex-level overlapping
	# searches.)
	[[test]]
	name = "empty-utf8no-anchored-overlapping"
	regex = ''
	haystack = '𝛃'
	matches = [[0, 0]]
	anchored = true
	unicode = true
	utf8 = false
	match-kind = "all"
	search-kind = "overlapping"

	# In this test, we anchor the search, but also set bounds. The bounds start the
	# search in the middle of a codepoint, so there should never be a match.
	[[test]]
	name = "empty-utf8yes-anchored-bounds"
	regex = ''
	haystack = '𝛃'
	matches = []
	bounds = [1, 3]
	anchored = true
	unicode = true
	utf8 = true

	# Tests the overlapping case of the above.
	[[test]]
	name = "empty-utf8yes-anchored-bounds-overlapping"
	regex = ''
	haystack = '𝛃'
	matches = []
	bounds = [1, 3]
	anchored = true
	unicode = true
	utf8 = true
	match-kind = "all"
	search-kind = "overlapping"

	# Same as above, except with UTF-8 mode disabled. Without UTF-8 mode enabled,
	# matching within a codepoint is allowed. And remember, as in the anchored test
	# above with UTF-8 mode disabled, iteration will report all adjacent matches.
	# The matches at [0, 0] and [4, 4] are not included because of the bounds of
	# the search.
	[[test]]
	name = "empty-utf8no-anchored-bounds"
	regex = ''
	haystack = '𝛃'
	bounds = [1, 3]
	matches = [[1, 1], [2, 2], [3, 3]]
	anchored = true
	unicode = true
	utf8 = false

	# Tests the overlapping case of the above.
	#
	# Note that overlapping anchored searches are a little weird, and it's not
	# totally clear what their semantics ought to be. For now, we just test the
	# current behavior of our test shim that implements overlapping search. (This
	# is one of the reasons why we don't really expose regex-level overlapping
	# searches.)
	[[test]]
	name = "empty-utf8no-anchored-bounds-overlapping"
	regex = ''
	haystack = '𝛃'
	bounds = [1, 3]
	matches = [[1, 1]]
	anchored = true
	unicode = true
	utf8 = false
	match-kind = "all"
	search-kind = "overlapping"

	# This tests that we find the match at the end of the string when the bounds
	# exclude the first match.
	[[test]]
	name = "empty-utf8yes-startbound"
	regex = ''
	haystack = '𝛃'
	bounds = [1, 4]
	matches = [[4, 4]]
	unicode = true
	utf8 = true

	# Tests the overlapping case of the above.
	[[test]]
	name = "empty-utf8yes-startbound-overlapping"
	regex = ''
	haystack = '𝛃'
	bounds = [1, 4]
	matches = [[4, 4]]
	unicode = true
	utf8 = true
	match-kind = "all"
	search-kind = "overlapping"

	# Same as above, except since UTF-8 mode is disabled, we also find the matches
	# inbetween that split the codepoint.
	[[test]]
	name = "empty-utf8no-startbound"
	regex = ''
	haystack = '𝛃'
	bounds = [1, 4]
	matches = [[1, 1], [2, 2], [3, 3], [4, 4]]
	unicode = true
	utf8 = false

	# Tests the overlapping case of the above.
	[[test]]
	name = "empty-utf8no-startbound-overlapping"
	regex = ''
	haystack = '𝛃'
	bounds = [1, 4]
	matches = [[1, 1], [2, 2], [3, 3], [4, 4]]
	unicode = true
	utf8 = false
	match-kind = "all"
	search-kind = "overlapping"

	# This tests that we don't find any matches in an anchored search, even when
	# the bounds include a match (at the end).
	[[test]]
	name = "empty-utf8yes-anchored-startbound"
	regex = ''
	haystack = '𝛃'
	bounds = [1, 4]
	matches = []
	anchored = true
	unicode = true
	utf8 = true

	# Tests the overlapping case of the above.
	[[test]]
	name = "empty-utf8yes-anchored-startbound-overlapping"
	regex = ''
	haystack = '𝛃'
	bounds = [1, 4]
	matches = []
	anchored = true
	unicode = true
	utf8 = true
	match-kind = "all"
	search-kind = "overlapping"

	# Same as above, except since UTF-8 mode is disabled, we also find the matches
	# inbetween that split the codepoint. Even though this is an anchored search,
	# since the matches are adjacent, we find all of them.
	[[test]]
	name = "empty-utf8no-anchored-startbound"
	regex = ''
	haystack = '𝛃'
	bounds = [1, 4]
	matches = [[1, 1], [2, 2], [3, 3], [4, 4]]
	anchored = true
	unicode = true
	utf8 = false

	# Tests the overlapping case of the above.
	#
	# Note that overlapping anchored searches are a little weird, and it's not
	# totally clear what their semantics ought to be. For now, we just test the
	# current behavior of our test shim that implements overlapping search. (This
	# is one of the reasons why we don't really expose regex-level overlapping
	# searches.)
	[[test]]
	name = "empty-utf8no-anchored-startbound-overlapping"
	regex = ''
	haystack = '𝛃'
	bounds = [1, 4]
	matches = [[1, 1]]
	anchored = true
	unicode = true
	utf8 = false
	match-kind = "all"
	search-kind = "overlapping"

	# This tests that we find the match at the end of the haystack in UTF-8 mode
	# when our bounds only include the empty string at the end of the haystack.
	[[test]]
	name = "empty-utf8yes-anchored-endbound"
	regex = ''
	haystack = '𝛃'
	bounds = [4, 4]
	matches = [[4, 4]]
	anchored = true
	unicode = true
	utf8 = true

	# Tests the overlapping case of the above.
	[[test]]
	name = "empty-utf8yes-anchored-endbound-overlapping"
	regex = ''
	haystack = '𝛃'
	bounds = [4, 4]
	matches = [[4, 4]]
	anchored = true
	unicode = true
	utf8 = true
	match-kind = "all"
	search-kind = "overlapping"

	# Same as above, but with UTF-8 mode disabled. Results remain the same since
	# the only possible match does not split a codepoint.
	[[test]]
	name = "empty-utf8no-anchored-endbound"
	regex = ''
	haystack = '𝛃'
	bounds = [4, 4]
	matches = [[4, 4]]
	anchored = true
	unicode = true
	utf8 = false

	# Tests the overlapping case of the above.
	[[test]]
	name = "empty-utf8no-anchored-endbound-overlapping"
	regex = ''
	haystack = '𝛃'
	bounds = [4, 4]
	matches = [[4, 4]]
	anchored = true
	unicode = true
	utf8 = false
	match-kind = "all"
	search-kind = "overlapping"