dictionaries/utf8.dict - platform/external/AFLplusplus - Git at Google

 # https://www.cl.cam.ac.uk/~mgk25/ucs/examples/UTF-8-test.txt


 # Defines byteorder and endianess
 byte_order="\xFE\xFF"

 # Reorder the display of text for RTL reading
 right_to_left="\x20\x2E"

 # Mongolian Vowel Separator: invisible and has the whitespace property
 invisible_separator="\x18\x03"

 # Invisible zero-width character.
 word_join="\x20\x60"

 # Reserved code point
 reserved="\xfe\xfe"

 # Invalid code point
 invalid1="\xff\xff"
 invalid2="\x01\xff\xff"
 invalid3="\xfdd0"

 # unassigned code point
 unassigned="\x0f\xed"

 # illegal low half-surrogate
 illegal_low="\xde\xad"

 # illegal high half-surrogate
 illegal_high="\xda\xad"

 # private use area code usbed by apple for its logo
 apple="\xf8\xff"

 # hostname normalization
 fullwidth_solidus="\xff\x0f"

 # numerical mapping and a value
 bold_eight="\x01\xd7\xd6"

 # # U+00DF normalizes to "ss" during IDNA2003's mapping phase,
 # different from its IDNA2008 mapping. See http://www.unicode.org/reports/tr46/
 weird="\x00\xdf"

 # U+FDFD expands by 11x (UTF-8) and 18x (UTF-16) under NFKC/NFKC
 expansion="\xfd\xfd"

 # U+0390 expands by 3x (UTF-8) under NFD
 expansion2="\x03\x90"

 # U+1F82 expands by 4x (UTF-16) under NFD
 expansion3= "\x1F\x82"

 # U+FB2C expands by 3x (UTF-16) under NFC
 expansion4="\xFB\x2C"

 # Lowecaser expansion: https://twitter.com/jifa/status/625776454479970304
 low_exp1="\x02\x3a"
 low_exp2="\x02\x3e"
 low_exp3="\x00\xdf"
 low_exp4="\x1e\x9e"

 # Null byte
 null="\x00\x00"
 "\xfc\x80\x80\x80\x80\x80"
 "fc\x80\x80\x80\x80\xaf"

 # Confusing new lines
 "\x00\x1b"
 "\x00\x85"
 "\x20\x28"
 "\x20\x29"
	# https://www.cl.cam.ac.uk/~mgk25/ucs/examples/UTF-8-test.txt


	# Defines byteorder and endianess
	byte_order="\xFE\xFF"

	# Reorder the display of text for RTL reading
	right_to_left="\x20\x2E"

	# Mongolian Vowel Separator: invisible and has the whitespace property
	invisible_separator="\x18\x03"

	# Invisible zero-width character.
	word_join="\x20\x60"

	# Reserved code point
	reserved="\xfe\xfe"

	# Invalid code point
	invalid1="\xff\xff"
	invalid2="\x01\xff\xff"
	invalid3="\xfdd0"

	# unassigned code point
	unassigned="\x0f\xed"

	# illegal low half-surrogate
	illegal_low="\xde\xad"

	# illegal high half-surrogate
	illegal_high="\xda\xad"

	# private use area code usbed by apple for its logo
	apple="\xf8\xff"

	# hostname normalization
	fullwidth_solidus="\xff\x0f"

	# numerical mapping and a value
	bold_eight="\x01\xd7\xd6"

	# # U+00DF normalizes to "ss" during IDNA2003's mapping phase,
	# different from its IDNA2008 mapping. See http://www.unicode.org/reports/tr46/
	weird="\x00\xdf"

	# U+FDFD expands by 11x (UTF-8) and 18x (UTF-16) under NFKC/NFKC
	expansion="\xfd\xfd"

	# U+0390 expands by 3x (UTF-8) under NFD
	expansion2="\x03\x90"

	# U+1F82 expands by 4x (UTF-16) under NFD
	expansion3= "\x1F\x82"

	# U+FB2C expands by 3x (UTF-16) under NFC
	expansion4="\xFB\x2C"

	# Lowecaser expansion: https://twitter.com/jifa/status/625776454479970304
	low_exp1="\x02\x3a"
	low_exp2="\x02\x3e"
	low_exp3="\x00\xdf"
	low_exp4="\x1e\x9e"

	# Null byte
	null="\x00\x00"
	"\xfc\x80\x80\x80\x80\x80"
	"fc\x80\x80\x80\x80\xaf"

	# Confusing new lines
	"\x00\x1b"
	"\x00\x85"
	"\x20\x28"
	"\x20\x29"