[USE] Finish converting Unicode positional categories to USE Even compiles.

commit: 44910cef626e6d03baa4d89d8fbe2c088971902d [log] [tgz]
author: Behdad Esfahbod <[email protected]> Mon Jul 20 18:01:10 2015 +0100
committer: Behdad Esfahbod <[email protected]> Mon Jul 20 18:01:10 2015 +0100
tree: 578abc7ac4191cfba62bd468f383806015bc9698
parent: ad725552521273a1f571f04bc96a04221c3e067a [diff] [blame]
diff --git a/src/gen-use-table.py b/src/gen-use-table.py
index f1484fd..7293b73 100755
--- a/src/gen-use-table.py
+++ b/src/gen-use-table.py

@@ -99,7 +99,7 @@
 	'Number',
 	'Brahmi_Joining_Number',
 	# Indic_Positional_Category
-	'Not_Applicable'
+	'Not_Applicable',
 	'Right',
 	'Left',
 	'Visual_Order_Left',
@@ -121,8 +121,7 @@
 	def __str__(self):
 		return self.name
 	def __eq__(self, other):
-		assert isinstance(other, basestring)
-		return self.name == other
+		return self.name == (other if isinstance(other, basestring) else other.name)
 	def __ne__(self, other):
 		return not (self == other)
 
@@ -230,14 +229,95 @@
 	'VM':	is_VOWEL_MOD,
 }
 
+use_positions = {
+	'F': {
+		'Abv': [Top],
+		'Blw': [Bottom],
+		'Pst': [Right],
+	},
+	'M': {
+		'Abv': [Top],
+		'Blw': [Bottom],
+		'Pst': [Right],
+		'Pre': [Left],
+	},
+	'CM': {
+		'Abv': [Top],
+		'Blw': [Bottom],
+	},
+	'V': {
+		'Abv': [Top, Top_And_Bottom, Top_And_Bottom_And_Right, Top_And_Right],
+		'Blw': [Bottom, Overstruck, Bottom_And_Right],
+		'Pst': [Right],
+		'Pre': [Left, Top_And_Left, Top_And_Left_And_Right, Left_And_Right],
+	},
+	'VM': {
+		'Abv': [Top],
+		'Blw': [Bottom, Overstruck],
+		'Pst': [Right],
+		'Pre': [Left],
+	},
+	'SM': {
+		'Abv': [Top],
+		'Blw': [Bottom],
+	},
+	'H': None,
+	'B': None,
+	'FM': None,
+	'SUB': None,
+}
+
 def map_to_use(data):
 	out = {}
 	items = use_mapping.items()
 	for U,(UISC,UIPC,UGC,UBlock) in data.items():
+
+		# Resolve Indic_Syllabic_Category
+
+		# TODO: These don't have UISC assigned in Unicode 8.0, but
+		# have UIPC
+		if U == 0x17DD: UISC = Vowel_Dependent
+		if 0x1CE2 <= U <= 0x1CE8: UISC = Cantillation_Mark
+
+		# TODO: U+1CED should only be allowed after some of
+		# the nasalization marks, maybe only for U+1CE9..U+1CF1.
+		if U == 0x1CED: UISC = Tone_Mark
+
 		evals = [(k, v(U,UISC,UGC)) for k,v in items]
 		values = [k for k,v in evals if v]
 		assert len(values) == 1, "%s %s %s %s" % (hex(U), UISC, UGC, values)
-		out[U] = (values[0], UBlock)
+		USE = values[0]
+
+		# Resolve Indic_Positional_Category
+
+		# TODO: Not in Unicode 8.0 yet, but in spec.
+		if U == 0x1B6C: UIPC = Bottom
+
+		# TODO: These should die, but have UIPC in Unicode 8.0
+		if U in [0x953, 0x954]: UIPC = Not_Applicable
+
+		# TODO: In USE's override list but not in Unicode 8.0
+		if U == 0x103C: UIPC = Left
+
+		# TODO: These are not in USE's override list that we have, nor are they in Unicode 8.0
+		if 0xA926 <= U <= 0xA92A: UIPC = Top
+		if U == 0x111CA: UIPC = Bottom
+		if U == 0x11300: UIPC = Top
+		if U == 0x1133C: UIPC = Bottom
+		if U == 0x1171E: UIPC = Left # Correct?!
+		if 0x1CF2 <= U <= 0x1CF3: UIPC = Right
+		if 0x1CF8 <= U <= 0x1CF9: UIPC = Top
+
+		assert (UIPC in [Not_Applicable, Visual_Order_Left] or
+			USE in use_positions), "%s %s %s %s %s" % (hex(U), UIPC, USE, UISC, UGC)
+
+		pos_mapping = use_positions.get(USE, None)
+		if pos_mapping:
+			values = [k for k,v in pos_mapping.items() if v and UIPC in v]
+			assert len(values) == 1, "%s %s %s %s %s %s" % (hex(U), UIPC, USE, UISC, UGC, values)
+			USE = USE + values[0]
+
+		out[U] = (USE, UBlock)
 	return out
 
 defaults = ('O', 'No_Block')
@@ -275,7 +355,7 @@
 		print
 		print "  /* %s */" % block
 		if start % 16:
-			print ' ' * (20 + (start % 16 * 4)),
+			print ' ' * (20 + (start % 16 * 6)),
 	num = 0
 	assert start % 8 == 0
 	assert (end+1) % 8 == 0
@@ -286,7 +366,7 @@
 		if u in data:
 			num += 1
 		d = data.get (u, defaults)
-		sys.stdout.write ("%4s," % d[0])
+		sys.stdout.write ("%6s," % d[0])
 
 	total += end - start + 1
 	used += num
@@ -302,7 +382,13 @@
 starts = []
 ends = []
 for k,v in sorted(use_mapping.items()):
+	if k in use_positions and use_positions[k]: continue
 	print "#define %s	USE_%s	/* %s */" % (k, k, v.__name__[3:])
+for k,v in sorted(use_positions.items()):
+	if not v: continue
+	for suf in v.keys():
+		tag = k + suf
+		print "#define %s	USE_%s" % (tag, tag)
 print ""
 print "static const USE_TABLE_ELEMENT_TYPE use_table[] = {"
 for u in uu:
@@ -339,6 +425,15 @@
 page_bits = 12
 print "}; /* Table items: %d; occupancy: %d%% */" % (offset, occupancy)
 print
+for k in sorted(use_mapping.keys()):
+	if k in use_positions and use_positions[k]: continue
+	print "#undef %s" % k
+for k,v in sorted(use_positions.items()):
+	if not v: continue
+	for suf in v.keys():
+		tag = k + suf
+		print "#undef %s" % tag
+print
 print "USE_TABLE_ELEMENT_TYPE"
 print "hb_use_get_categories (hb_codepoint_t u)"
 print "{"
@@ -353,17 +448,14 @@
 		print "      if (hb_in_range (u, 0x%04Xu, 0x%04Xu)) return use_table[u - 0x%04Xu + %s];" % (start, end-1, start, offset)
 	for u,d in singles.items ():
 		if p != u>>page_bits: continue
-		print "      if (unlikely (u == 0x%04Xu)) return %s;" % (u, d[0])
+		print "      if (unlikely (u == 0x%04Xu)) return USE_%s;" % (u, d[0])
 	print "      break;"
 	print ""
 print "    default:"
 print "      break;"
 print "  }"
-print "  return _(x,x);"
+print "  return USE_O;"
 print "}"
-print ""
-for k in sorted(use_mapping.keys()):
-	print "#undef %s" % k
 print
 print "/* == End of generated table == */"
commit	44910cef626e6d03baa4d89d8fbe2c088971902d	[log] [tgz]
author	Behdad Esfahbod <[email protected]>	Mon Jul 20 18:01:10 2015 +0100
committer	Behdad Esfahbod <[email protected]>	Mon Jul 20 18:01:10 2015 +0100
tree	578abc7ac4191cfba62bd468f383806015bc9698
parent	ad725552521273a1f571f04bc96a04221c3e067a [diff] [blame]