[indic] Add Javanese support!
Seems to be working just fine!
diff --git a/src/hb-ot-shape-complex-indic-machine.rl b/src/hb-ot-shape-complex-indic-machine.rl
index e964655..f3f550f 100644
--- a/src/hb-ot-shape-complex-indic-machine.rl
+++ b/src/hb-ot-shape-complex-indic-machine.rl
@@ -51,12 +51,13 @@
A = 10;
NBSP = 11;
DOTTEDCIRCLE = 12;
-RS = 13;
+RS = 13;
Coeng = 14;
Repha = 15;
Ra = 16;
CM = 17;
Avag = 18;
+CM2 = 41;
c = (C | Ra); # is_consonant
n = ((ZWNJ?.RS)? (N.N?)?); # is_consonant_modifier
@@ -73,14 +74,15 @@
place_holder = NBSP | DOTTEDCIRCLE;
halant_group = (z?.h.(ZWJ.N?)?);
final_halant_group = halant_group | h.ZWNJ;
-halant_or_matra_group = (CM.CM* | final_halant_group | (h.ZWJ)? matra_group{0,4});
+medial_group = CM?.CM2?;
+halant_or_matra_group = (final_halant_group | (h.ZWJ)? matra_group{0,4});
-consonant_syllable = Repha? (cn.halant_group){0,4} cn halant_or_matra_group? syllable_tail;
-vowel_syllable = reph? V.n? (ZWJ | (halant_group.cn){0,4} halant_or_matra_group? syllable_tail);
-standalone_cluster = reph? place_holder.n? (halant_group.cn){0,4} halant_or_matra_group? syllable_tail;
+consonant_syllable = Repha? (cn.halant_group){0,4} cn medial_group halant_or_matra_group syllable_tail;
+vowel_syllable = reph? V.n? (ZWJ | (halant_group.cn){0,4} medial_group halant_or_matra_group syllable_tail);
+standalone_cluster = reph? place_holder.n? (halant_group.cn){0,4} medial_group halant_or_matra_group syllable_tail;
avagraha_cluster = avagraha syllable_tail2;
-broken_cluster = reph? n? (halant_group.cn){0,4} halant_or_matra_group syllable_tail;
+broken_cluster = reph? n? (halant_group.cn){0,4} medial_group halant_or_matra_group syllable_tail;
other = any;
main := |*
diff --git a/src/hb-ot-shape-complex-indic-private.hh b/src/hb-ot-shape-complex-indic-private.hh
index cee1572..7101eb8 100644
--- a/src/hb-ot-shape-complex-indic-private.hh
+++ b/src/hb-ot-shape-complex-indic-private.hh
@@ -43,24 +43,25 @@
* Not sure how to avoid duplication. */
enum indic_category_t {
OT_X = 0,
- OT_C,
- OT_V,
- OT_N,
- OT_H,
- OT_ZWNJ,
- OT_ZWJ,
- OT_M,
- OT_SM,
- OT_VD,
- OT_A,
- OT_NBSP,
- OT_DOTTEDCIRCLE, /* Not in the spec, but special in Uniscribe. /Very very/ special! */
- OT_RS, /* Register Shifter, used in Khmer OT spec */
- OT_Coeng,
- OT_Repha,
- OT_Ra, /* Not explicitly listed in the OT spec, but used in the grammar. */
- OT_CM,
- OT_Avag
+ OT_C = 1,
+ OT_V = 2,
+ OT_N = 3,
+ OT_H = 4,
+ OT_ZWNJ = 5,
+ OT_ZWJ = 6,
+ OT_M = 7,
+ OT_SM = 8,
+ OT_VD = 9,
+ OT_A = 10,
+ OT_NBSP = 11,
+ OT_DOTTEDCIRCLE = 12,
+ OT_RS = 13, /* Register Shifter, used in Khmer OT spec. */
+ OT_Coeng = 14, /* Khmer-style Virama. */
+ OT_Repha = 15, /* Atomically-encoded logical or visual repha. */
+ OT_Ra = 16,
+ OT_CM = 17, /* Consonant-Medial. */
+ OT_Avag = 18, /* Avagraha. */
+ OT_CM2 = 41 /* Consonant-Medial, second slot. */
};
/* Visual positions in a syllable from left to right. */
diff --git a/src/hb-ot-shape-complex-indic.cc b/src/hb-ot-shape-complex-indic.cc
index fcce57e..9a8b4e3 100644
--- a/src/hb-ot-shape-complex-indic.cc
+++ b/src/hb-ot-shape-complex-indic.cc
@@ -152,12 +152,14 @@
return is_one_of (info, JOINER_FLAGS);
}
+#define MEDIAL_FLAGS (FLAG (OT_CM) | FLAG (OT_CM2))
+
/* Note:
*
* We treat Vowels and placeholders as if they were consonants. This is safe because Vowels
* cannot happen in a consonant syllable. The plus side however is, we can call the
* consonant syllable logic from the vowel syllable function and get it all right! */
-#define CONSONANT_FLAGS (FLAG (OT_C) | FLAG (OT_CM) | FLAG (OT_Ra) | FLAG (OT_V) | FLAG (OT_NBSP) | FLAG (OT_DOTTEDCIRCLE))
+#define CONSONANT_FLAGS (FLAG (OT_C) | FLAG (OT_Ra) | MEDIAL_FLAGS | FLAG (OT_V) | FLAG (OT_NBSP) | FLAG (OT_DOTTEDCIRCLE))
static inline bool
is_consonant (const hb_glyph_info_t &info)
{
@@ -213,6 +215,9 @@
else if (unlikely (u == 0x200D)) cat = OT_ZWJ;
else if (unlikely (u == 0x25CC)) cat = OT_DOTTEDCIRCLE;
else if (unlikely (u == 0x0A71)) cat = OT_SM; /* GURMUKHI ADDAK. Move it to the end. */
+ else if (unlikely (u == 0xA982)) cat = OT_SM; /* Javanese repha. */
+ else if (unlikely (u == 0xA9BE)) cat = OT_CM2; /* Javanese medial ya. */
+ else if (unlikely (u == 0xA9BD)) { cat = OT_M; pos = POS_POST_C; } /* Javanese vocalic r. */
if (cat == OT_Repha) {
/* There are two kinds of characters marked as Repha:
@@ -931,7 +936,7 @@
indic_position_t last_pos = POS_START;
for (unsigned int i = start; i < end; i++)
{
- if ((FLAG (info[i].indic_category()) & (JOINER_FLAGS | FLAG (OT_N) | FLAG (OT_RS) | HALANT_OR_COENG_FLAGS)))
+ if ((FLAG (info[i].indic_category()) & (JOINER_FLAGS | FLAG (OT_N) | FLAG (OT_RS) | MEDIAL_FLAGS | HALANT_OR_COENG_FLAGS)))
{
info[i].indic_position() = last_pos;
if (unlikely (info[i].indic_category() == OT_H &&