[arabic-table] Use segmented table
No functional change.
diff --git a/src/gen-arabic-table.py b/src/gen-arabic-table.py
index 6f2c9d5..1596126 100755
--- a/src/gen-arabic-table.py
+++ b/src/gen-arabic-table.py
@@ -71,42 +71,76 @@
for value,short in short_value.items():
print "#define %s %s" % (short, value)
- keys = values.keys()
- min_u = min(keys)
- max_u = max(keys)
+ uu = sorted(values.keys())
num = len(values)
+
+ last = -1
+ ranges = []
+ for u in uu:
+ if u - last <= 1+16*3:
+ ranges[-1][-1] = u
+ else:
+ ranges.append([u,u])
+ last = u
+
print
print "static const uint8_t joining_table[] ="
print "{"
last_block = None
- for u in range(min_u, max_u+1):
+ offset = 0
+ for start,end in ranges:
- value = values.get(u, "JOINING_TYPE_X")
+ print
+ print "#define joining_offset_0x%04x %d" % (start, offset)
- block = blocks.get(u, last_block)
- if block != last_block:
- print "\n\n /* %s */" % block
- last_block = block
- if u % 32 != 0:
+ for u in range(start, end+1):
+
+ block = blocks.get(u, last_block)
+ value = values.get(u, "JOINING_TYPE_X")
+
+ if block != last_block or u == start:
+ if u != start:
+ print
+ print "\n /* %s */" % block
+ last_block = block
+ if u % 32 != 0:
+ print
+ print " /* %04X */" % (u//32*32), " " * (u % 32),
+
+ if u % 32 == 0:
print
- print " /* %04X */" % u, " " * (u % 32),
+ print " /* %04X */ " % u,
+ sys.stdout.write("%s," % short_value[value])
+ print
- if u % 32 == 0:
- print
- print " /* %04X */ " % u,
- sys.stdout.write("%s," % short_value[value])
+ offset += end - start + 1
print
- print "};"
- print
- print "#define JOINING_TABLE_FIRST 0x%04X" % min_u
- print "#define JOINING_TABLE_LAST 0x%04X" % max_u
+ occupancy = num * 100. / offset
+ print "}; /* Table items: %d; occupancy: %d%% */" % (offset, occupancy)
print
- occupancy = num * 100 / (max_u - min_u + 1)
- # Maintain at least 40% occupancy in the table */
- if occupancy < 40:
- raise Exception ("Table too sparse, please investigate: ", occupancy)
-
+ page_bits = 8
+ print
+ print "static unsigned int"
+ print "joining_type (hb_codepoint_t u)"
+ print "{"
+ print " switch (u >> %d)" % page_bits
+ print " {"
+ pages = set([u>>page_bits for u in [s for s,e in ranges]+[e for s,e in ranges]])
+ for p in sorted(pages):
+ print " case 0x%0X:" % p
+ for (start,end) in ranges:
+ if p not in [start>>page_bits, end>>page_bits]: continue
+ offset = "joining_offset_0x%04x" % start
+ print " if (0x%04X <= u && u <= 0x%04X) return joining_table[u - 0x%04X + %s];" % (start, end, start, offset)
+ print " break;"
+ print ""
+ print " default:"
+ print " break;"
+ print " }"
+ print " return X;"
+ print "}"
+ print
for value,short in short_value.items():
print "#undef %s" % (short)
print
diff --git a/src/hb-ot-shape-complex-arabic-table.hh b/src/hb-ot-shape-complex-arabic-table.hh
index ad119da..877625a 100644
--- a/src/hb-ot-shape-complex-arabic-table.hh
+++ b/src/hb-ot-shape-complex-arabic-table.hh
@@ -28,6 +28,7 @@
static const uint8_t joining_table[] =
{
+#define joining_offset_0x0600 0
/* Arabic */
@@ -48,37 +49,55 @@
/* Arabic Supplement */
- /* 0750 */ D,D,D,D,D,D,D,D,D,R,R,R,D,D,D,D,
+ /* 0740 */ D,D,D,D,D,D,D,D,D,R,R,R,D,D,D,D,
/* 0760 */ D,D,D,D,D,D,D,D,D,D,D,R,R,D,D,D,D,R,D,R,R,D,D,D,R,R,D,D,D,D,D,D,
- /* Thaana */
-
- /* 0780 */ X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,
- /* 07A0 */ X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,
+#define joining_offset_0x07ca 384
/* NKo */
- /* 07C0 */ X,X,X,X,X,X,X,X,X,X,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,
- /* 07E0 */ D,D,D,D,D,D,D,D,D,D,D,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,C,X,X,X,X,X,
+ /* 07C0 */ D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,
+ /* 07E0 */ D,D,D,D,D,D,D,D,D,D,D,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,C,
- /* Samaritan */
-
- /* 0800 */ X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,
- /* 0820 */ X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,
+#define joining_offset_0x0840 433
/* Mandaic */
- /* 0840 */ R,D,D,D,D,D,R,D,D,R,D,D,D,D,D,R,D,D,D,D,R,D,U,U,U,X,X,X,X,X,X,X,
- /* 0860 */ X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,
- /* 0880 */ X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,
+ /* 0840 */ R,D,D,D,D,D,R,D,D,R,D,D,D,D,D,R,D,D,D,D,R,D,U,U,U,
+
+#define joining_offset_0x08a0 458
/* Arabic Extended-A */
/* 08A0 */ D,X,D,D,D,D,D,D,D,D,R,R,R,
-};
-#define JOINING_TABLE_FIRST 0x0600
-#define JOINING_TABLE_LAST 0x08AC
+}; /* Table items: 471; occupancy: 66% */
+
+
+static unsigned int
+joining_type (hb_codepoint_t u)
+{
+ switch (u >> 8)
+ {
+ case 0x6:
+ if (0x0600 <= u && u <= 0x077F) return joining_table[u - 0x0600 + joining_offset_0x0600];
+ break;
+
+ case 0x7:
+ if (0x0600 <= u && u <= 0x077F) return joining_table[u - 0x0600 + joining_offset_0x0600];
+ if (0x07CA <= u && u <= 0x07FA) return joining_table[u - 0x07CA + joining_offset_0x07ca];
+ break;
+
+ case 0x8:
+ if (0x0840 <= u && u <= 0x0858) return joining_table[u - 0x0840 + joining_offset_0x0840];
+ if (0x08A0 <= u && u <= 0x08AC) return joining_table[u - 0x08A0 + joining_offset_0x08a0];
+ break;
+
+ default:
+ break;
+ }
+ return X;
+}
#undef X
#undef R
diff --git a/src/hb-ot-shape-complex-arabic.cc b/src/hb-ot-shape-complex-arabic.cc
index ea6d85c..61a55ef 100644
--- a/src/hb-ot-shape-complex-arabic.cc
+++ b/src/hb-ot-shape-complex-arabic.cc
@@ -57,11 +57,9 @@
static unsigned int get_joining_type (hb_codepoint_t u, hb_unicode_general_category_t gen_cat)
{
- if (likely (hb_in_range<hb_codepoint_t> (u, JOINING_TABLE_FIRST, JOINING_TABLE_LAST))) {
- unsigned int j_type = joining_table[u - JOINING_TABLE_FIRST];
- if (likely (j_type != JOINING_TYPE_X))
- return j_type;
- }
+ unsigned int j_type = joining_type(u);
+ if (likely (j_type != JOINING_TYPE_X))
+ return j_type;
/* Mongolian joining data is not in ArabicJoining.txt yet. */
if (unlikely (hb_in_range<hb_codepoint_t> (u, 0x1800, 0x18AF)))