[Indic] Position Khmer Robat It's a visual Repha. Still not positioning logical Repha as occurs in Malayalam. Another 200 Khmer failures fixed. 547 to go. That's better than Devanagari!

commit: db8981f1e0e8625714568c6d0f11f0b317b11d0a [log] [tgz]
author: Behdad Esfahbod <behdad@behdad.org> Tue Jul 17 18:17:30 2012 -0400
committer: Behdad Esfahbod <behdad@behdad.org> Tue Jul 17 23:42:04 2012 -0400
tree: b456d829eed13ba8694e7b25617846f189af22aa
parent: 25bc489498ef7d0beb8fe9ab663e3f0b2f52c9c2 [diff]
diff --git a/src/hb-ot-shape-complex-indic-machine.rl b/src/hb-ot-shape-complex-indic-machine.rl
index b87d2df..5f565b6 100644
--- a/src/hb-ot-shape-complex-indic-machine.rl
+++ b/src/hb-ot-shape-complex-indic-machine.rl

@@ -40,33 +40,35 @@
 # Same order as enum indic_category_t.  Not sure how to avoid duplication.
 X    = 0;
 C    = 1;
-Ra   = 2;
-V    = 3;
-N    = 4;
-H    = 5;
-ZWNJ = 6;
-ZWJ  = 7;
-M    = 8;
-SM   = 9;
-VD   = 10;
-A    = 11;
-NBSP = 12;
-DOTTEDCIRCLE = 13;
-RS   = 14;
-Coeng = 15;
+V    = 2;
+N    = 3;
+H    = 4;
+ZWNJ = 5;
+ZWJ  = 6;
+M    = 7;
+SM   = 8;
+VD   = 9;
+A    = 10;
+NBSP = 11;
+DOTTEDCIRCLE = 12;
+RS   = 13;
+Coeng = 14;
+Repha = 15;
+Ra    = 16;
 
 c = C | Ra;			# is_consonant
 n = (N.N? | ZWNJ?.RS);		# is_consonant_modifier
 z = ZWJ|ZWNJ;			# is_joiner
 h = H | Coeng;			# is_halant_or_coeng
+reph = (Ra H | Repha);		# possible reph
 matra_group = M.N?.H?;
 syllable_tail = SM? (Coeng (c|V))? (VD VD?)?;
 place_holder = NBSP | DOTTEDCIRCLE;
 
 
-consonant_syllable =	(c.n? (h.z?|z.h))* c.n? A? (h.z? | matra_group*)? syllable_tail;
-vowel_syllable =	(Ra H)? V.n? (z?.h.c | ZWJ.c)* matra_group* syllable_tail;
-standalone_cluster =	(Ra H)? place_holder.n? (z? h c)* matra_group* syllable_tail;
+consonant_syllable =	Repha? (c.n? (h.z?|z.h))* c.n? A? (h.z? | matra_group*)? syllable_tail;
+vowel_syllable =	reph? V.n? (z?.h.c | ZWJ.c)* matra_group* syllable_tail;
+standalone_cluster =	reph? place_holder.n? (z? h c)* matra_group* syllable_tail;
 other =			any;
 
 main := |*

diff --git a/src/hb-ot-shape-complex-indic-private.hh b/src/hb-ot-shape-complex-indic-private.hh
index 0541738..9637018 100644
--- a/src/hb-ot-shape-complex-indic-private.hh
+++ b/src/hb-ot-shape-complex-indic-private.hh

@@ -47,7 +47,6 @@
 enum indic_category_t {
   OT_X = 0,
   OT_C,
-  OT_Ra, /* Not explicitly listed in the OT spec, but used in the grammar. */
   OT_V,
   OT_N,
   OT_H,
@@ -60,7 +59,9 @@
   OT_NBSP,
   OT_DOTTEDCIRCLE, /* Not in the spec, but special in Uniscribe. /Very very/ special! */
   OT_RS, /* Register Shifter, used in Khmer OT spec */
-  OT_Coeng
+  OT_Coeng,
+  OT_Repha,
+  OT_Ra /* Not explicitly listed in the OT spec, but used in the grammar. */
 };
 
 /* Visual positions in a syllable from left to right. */
@@ -92,7 +93,7 @@
   INDIC_SYLLABIC_CATEGORY_CONSONANT_MEDIAL	= OT_C,
   INDIC_SYLLABIC_CATEGORY_CONSONANT_PLACEHOLDER	= OT_NBSP,
   INDIC_SYLLABIC_CATEGORY_CONSONANT_SUBJOINED	= OT_C,
-  INDIC_SYLLABIC_CATEGORY_CONSONANT_REPHA	= OT_C,
+  INDIC_SYLLABIC_CATEGORY_CONSONANT_REPHA	= OT_Repha,
   INDIC_SYLLABIC_CATEGORY_MODIFYING_LETTER	= OT_X,
   INDIC_SYLLABIC_CATEGORY_NUKTA			= OT_N,
   INDIC_SYLLABIC_CATEGORY_REGISTER_SHIFTER	= OT_RS,

diff --git a/src/hb-ot-shape-complex-indic.cc b/src/hb-ot-shape-complex-indic.cc
index 3c83ce6..bbf5024 100644
--- a/src/hb-ot-shape-complex-indic.cc
+++ b/src/hb-ot-shape-complex-indic.cc

@@ -282,6 +282,19 @@
     if (unlikely (info.codepoint == 0x17D2))
       info.indic_category() = OT_Coeng;
 
+    if (info.indic_category() == OT_Repha) {
+      /* There are two kinds of characters marked as Repha:
+       * - The ones that are GenCat=Mn are already positioned visually, ie. after base. (eg. Khmer)
+       * - The ones that are GenCat=Lo is encoded logically, ie. beginning of syllable. (eg. Malayalam)
+       *
+       * We recategorize the first kind to look like a Nukta and attached to the base directly.
+       */
+      if (_hb_glyph_info_get_general_category (&info) == HB_UNICODE_GENERAL_CATEGORY_NON_SPACING_MARK)
+        info.indic_category() = OT_N;
+    }
+
+
+    /* Assign positions... */
     if (is_consonant (info)) {
       info.indic_position() = consonant_position (info.codepoint);
       if (is_ra (info.codepoint))
commit	db8981f1e0e8625714568c6d0f11f0b317b11d0a	[log] [tgz]
author	Behdad Esfahbod <behdad@behdad.org>	Tue Jul 17 18:17:30 2012 -0400
committer	Behdad Esfahbod <behdad@behdad.org>	Tue Jul 17 23:42:04 2012 -0400
tree	b456d829eed13ba8694e7b25617846f189af22aa
parent	25bc489498ef7d0beb8fe9ab663e3f0b2f52c9c2 [diff]