[Indic] Position Khmer Robat
It's a visual Repha.
Still not positioning logical Repha as occurs in Malayalam.
Another 200 Khmer failures fixed. 547 to go. That's better than
Devanagari!
diff --git a/src/hb-ot-shape-complex-indic-machine.rl b/src/hb-ot-shape-complex-indic-machine.rl
index b87d2df..5f565b6 100644
--- a/src/hb-ot-shape-complex-indic-machine.rl
+++ b/src/hb-ot-shape-complex-indic-machine.rl
@@ -40,33 +40,35 @@
# Same order as enum indic_category_t. Not sure how to avoid duplication.
X = 0;
C = 1;
-Ra = 2;
-V = 3;
-N = 4;
-H = 5;
-ZWNJ = 6;
-ZWJ = 7;
-M = 8;
-SM = 9;
-VD = 10;
-A = 11;
-NBSP = 12;
-DOTTEDCIRCLE = 13;
-RS = 14;
-Coeng = 15;
+V = 2;
+N = 3;
+H = 4;
+ZWNJ = 5;
+ZWJ = 6;
+M = 7;
+SM = 8;
+VD = 9;
+A = 10;
+NBSP = 11;
+DOTTEDCIRCLE = 12;
+RS = 13;
+Coeng = 14;
+Repha = 15;
+Ra = 16;
c = C | Ra; # is_consonant
n = (N.N? | ZWNJ?.RS); # is_consonant_modifier
z = ZWJ|ZWNJ; # is_joiner
h = H | Coeng; # is_halant_or_coeng
+reph = (Ra H | Repha); # possible reph
matra_group = M.N?.H?;
syllable_tail = SM? (Coeng (c|V))? (VD VD?)?;
place_holder = NBSP | DOTTEDCIRCLE;
-consonant_syllable = (c.n? (h.z?|z.h))* c.n? A? (h.z? | matra_group*)? syllable_tail;
-vowel_syllable = (Ra H)? V.n? (z?.h.c | ZWJ.c)* matra_group* syllable_tail;
-standalone_cluster = (Ra H)? place_holder.n? (z? h c)* matra_group* syllable_tail;
+consonant_syllable = Repha? (c.n? (h.z?|z.h))* c.n? A? (h.z? | matra_group*)? syllable_tail;
+vowel_syllable = reph? V.n? (z?.h.c | ZWJ.c)* matra_group* syllable_tail;
+standalone_cluster = reph? place_holder.n? (z? h c)* matra_group* syllable_tail;
other = any;
main := |*
diff --git a/src/hb-ot-shape-complex-indic-private.hh b/src/hb-ot-shape-complex-indic-private.hh
index 0541738..9637018 100644
--- a/src/hb-ot-shape-complex-indic-private.hh
+++ b/src/hb-ot-shape-complex-indic-private.hh
@@ -47,7 +47,6 @@
enum indic_category_t {
OT_X = 0,
OT_C,
- OT_Ra, /* Not explicitly listed in the OT spec, but used in the grammar. */
OT_V,
OT_N,
OT_H,
@@ -60,7 +59,9 @@
OT_NBSP,
OT_DOTTEDCIRCLE, /* Not in the spec, but special in Uniscribe. /Very very/ special! */
OT_RS, /* Register Shifter, used in Khmer OT spec */
- OT_Coeng
+ OT_Coeng,
+ OT_Repha,
+ OT_Ra /* Not explicitly listed in the OT spec, but used in the grammar. */
};
/* Visual positions in a syllable from left to right. */
@@ -92,7 +93,7 @@
INDIC_SYLLABIC_CATEGORY_CONSONANT_MEDIAL = OT_C,
INDIC_SYLLABIC_CATEGORY_CONSONANT_PLACEHOLDER = OT_NBSP,
INDIC_SYLLABIC_CATEGORY_CONSONANT_SUBJOINED = OT_C,
- INDIC_SYLLABIC_CATEGORY_CONSONANT_REPHA = OT_C,
+ INDIC_SYLLABIC_CATEGORY_CONSONANT_REPHA = OT_Repha,
INDIC_SYLLABIC_CATEGORY_MODIFYING_LETTER = OT_X,
INDIC_SYLLABIC_CATEGORY_NUKTA = OT_N,
INDIC_SYLLABIC_CATEGORY_REGISTER_SHIFTER = OT_RS,
diff --git a/src/hb-ot-shape-complex-indic.cc b/src/hb-ot-shape-complex-indic.cc
index 3c83ce6..bbf5024 100644
--- a/src/hb-ot-shape-complex-indic.cc
+++ b/src/hb-ot-shape-complex-indic.cc
@@ -282,6 +282,19 @@
if (unlikely (info.codepoint == 0x17D2))
info.indic_category() = OT_Coeng;
+ if (info.indic_category() == OT_Repha) {
+ /* There are two kinds of characters marked as Repha:
+ * - The ones that are GenCat=Mn are already positioned visually, ie. after base. (eg. Khmer)
+ * - The ones that are GenCat=Lo is encoded logically, ie. beginning of syllable. (eg. Malayalam)
+ *
+ * We recategorize the first kind to look like a Nukta and attached to the base directly.
+ */
+ if (_hb_glyph_info_get_general_category (&info) == HB_UNICODE_GENERAL_CATEGORY_NON_SPACING_MARK)
+ info.indic_category() = OT_N;
+ }
+
+
+ /* Assign positions... */
if (is_consonant (info)) {
info.indic_position() = consonant_position (info.codepoint);
if (is_ra (info.codepoint))