[arabic] Implement Unicode Arabic Mark Ordering Algorithm UTR#53

Fixes https://github.com/behdad/harfbuzz/issues/509
diff --git a/src/hb-ot-shape-complex-arabic.cc b/src/hb-ot-shape-complex-arabic.cc
index ed7b3f2..28dd4e1 100644
--- a/src/hb-ot-shape-complex-arabic.cc
+++ b/src/hb-ot-shape-complex-arabic.cc
@@ -613,6 +613,80 @@
   HB_BUFFER_DEALLOCATE_VAR (buffer, arabic_shaping_action);
 }
 
+/* http://www.unicode.org/reports/tr53/tr53-1.pdf */
+
+static hb_codepoint_t
+modifier_combining_marks[] =
+{
+  0x0654u, /* ARABIC HAMZA ABOVE */
+  0x0655u, /* ARABIC HAMZA BELOW */
+  0x0658u, /* ARABIC MARK NOON GHUNNA */
+  0x06DCu, /* ARABIC SMALL HIGH SEEN */
+  0x06E3u, /* ARABIC SMALL LOW SEEN */
+  0x06E7u, /* ARABIC SMALL HIGH YEH */
+  0x06E8u, /* ARABIC SMALL HIGH NOON */
+  0x08F3u, /* ARABIC SMALL HIGH WAW */
+};
+
+static inline bool
+info_is_mcm (const hb_glyph_info_t &info)
+{
+  hb_codepoint_t u = info.codepoint;
+  for (unsigned int i = 0; i < ARRAY_LENGTH (modifier_combining_marks); i++)
+    if (u == modifier_combining_marks[i])
+      return true;
+  return false;
+}
+
+static void
+reorder_marks_arabic (const hb_ot_shape_plan_t *plan,
+		      hb_buffer_t              *buffer,
+		      unsigned int              start,
+		      unsigned int              end)
+{
+  hb_glyph_info_t *info = buffer->info;
+
+  unsigned int i = start;
+  for (unsigned int cc = 220; cc <= 230; cc += 10)
+  {
+    DEBUG_MSG (ARABIC, buffer, "Looking for %d's starting at %d\n", cc, i);
+    while (i < end && info_cc(info[i]) < cc)
+      i++;
+    DEBUG_MSG (ARABIC, buffer, "Looking for %d's stopped at %d\n", cc, i);
+
+    if (i == end)
+      break;
+
+    if (info_cc(info[i]) > cc)
+      continue;
+
+    /* Technically we should also check "info_cc(info[j]) == cc"
+     * in the following loop.  But not doing it is safe; we might
+     * end up moving all the 220 MCMs and 230 MCMs together in one
+     * move and be done. */
+    unsigned int j = i;
+    while (j < end && info_is_mcm (info[j]))
+      j++;
+    DEBUG_MSG (ARABIC, buffer, "Found %d's from %d to %d\n", cc, i, j);
+
+    if (i == j)
+      continue;
+
+    /* Shift it! */
+    DEBUG_MSG (ARABIC, buffer, "Shifting %d's: %d %d\n", cc, i, j);
+    hb_glyph_info_t temp[HB_OT_SHAPE_COMPLEX_MAX_COMBINING_MARKS];
+    assert (j - i <= ARRAY_LENGTH (temp));
+    buffer->merge_out_clusters (start, j);
+    memmove (temp, &info[i], (j - i) * sizeof (hb_glyph_info_t));
+    memmove (&info[start + j - i], &info[start], (i - start) * sizeof (hb_glyph_info_t));
+    memmove (&info[start], temp, (j - i) * sizeof (hb_glyph_info_t));
+
+    start += j - i;
+
+    i = j;
+  }
+}
+
 const hb_ot_complex_shaper_t _hb_ot_complex_shaper_arabic =
 {
   "arabic",
@@ -627,6 +701,7 @@
   NULL, /* compose */
   setup_masks_arabic,
   NULL, /* disable_otl */
+  reorder_marks_arabic,
   HB_OT_SHAPE_ZERO_WIDTH_MARKS_BY_GDEF_LATE,
   true, /* fallback_position */
 };
diff --git a/src/hb-ot-shape-complex-default.cc b/src/hb-ot-shape-complex-default.cc
index 42830ab..857980c 100644
--- a/src/hb-ot-shape-complex-default.cc
+++ b/src/hb-ot-shape-complex-default.cc
@@ -41,6 +41,7 @@
   NULL, /* compose */
   NULL, /* setup_masks */
   NULL, /* disable_otl */
+  NULL, /* reorder_marks */
   HB_OT_SHAPE_ZERO_WIDTH_MARKS_BY_GDEF_LATE,
   true, /* fallback_position */
 };
diff --git a/src/hb-ot-shape-complex-hangul.cc b/src/hb-ot-shape-complex-hangul.cc
index 0e74802..63850d3 100644
--- a/src/hb-ot-shape-complex-hangul.cc
+++ b/src/hb-ot-shape-complex-hangul.cc
@@ -426,6 +426,7 @@
   NULL, /* compose */
   setup_masks_hangul,
   NULL, /* disable_otl */
+  NULL, /* reorder_marks */
   HB_OT_SHAPE_ZERO_WIDTH_MARKS_NONE,
   false, /* fallback_position */
 };
diff --git a/src/hb-ot-shape-complex-hebrew.cc b/src/hb-ot-shape-complex-hebrew.cc
index 96f2494..b8ddadc 100644
--- a/src/hb-ot-shape-complex-hebrew.cc
+++ b/src/hb-ot-shape-complex-hebrew.cc
@@ -181,6 +181,7 @@
   compose_hebrew,
   NULL, /* setup_masks */
   disable_otl_hebrew,
+  NULL, /* reorder_marks */
   HB_OT_SHAPE_ZERO_WIDTH_MARKS_BY_GDEF_LATE,
   true, /* fallback_position */
 };
diff --git a/src/hb-ot-shape-complex-indic.cc b/src/hb-ot-shape-complex-indic.cc
index 00130e6..31dc1c0 100644
--- a/src/hb-ot-shape-complex-indic.cc
+++ b/src/hb-ot-shape-complex-indic.cc
@@ -1840,6 +1840,7 @@
   compose_indic,
   setup_masks_indic,
   NULL, /* disable_otl */
+  NULL, /* reorder_marks */
   HB_OT_SHAPE_ZERO_WIDTH_MARKS_NONE,
   false, /* fallback_position */
 };
diff --git a/src/hb-ot-shape-complex-myanmar.cc b/src/hb-ot-shape-complex-myanmar.cc
index 676d494..4081ed0 100644
--- a/src/hb-ot-shape-complex-myanmar.cc
+++ b/src/hb-ot-shape-complex-myanmar.cc
@@ -524,6 +524,7 @@
   NULL, /* compose */
   NULL, /* setup_masks */
   NULL, /* disable_otl */
+  NULL, /* reorder_marks */
   HB_OT_SHAPE_ZERO_WIDTH_MARKS_BY_GDEF_LATE,
   true, /* fallback_position */
 };
@@ -542,6 +543,7 @@
   NULL, /* compose */
   setup_masks_myanmar,
   NULL, /* disable_otl */
+  NULL, /* reorder_marks */
   HB_OT_SHAPE_ZERO_WIDTH_MARKS_BY_GDEF_EARLY,
   false, /* fallback_position */
 };
diff --git a/src/hb-ot-shape-complex-private.hh b/src/hb-ot-shape-complex-private.hh
index 8fadd7c..9792067 100644
--- a/src/hb-ot-shape-complex-private.hh
+++ b/src/hb-ot-shape-complex-private.hh
@@ -39,6 +39,8 @@
 #define complex_var_u8_1()	var2.u8[3]
 
 
+#define HB_OT_SHAPE_COMPLEX_MAX_COMBINING_MARKS 32
+
 enum hb_ot_shape_zero_width_marks_type_t {
   HB_OT_SHAPE_ZERO_WIDTH_MARKS_NONE,
   HB_OT_SHAPE_ZERO_WIDTH_MARKS_BY_GDEF_EARLY,
@@ -154,6 +156,16 @@
    */
   bool (*disable_otl) (const hb_ot_shape_plan_t *plan);
 
+  /* reorder_marks()
+   * Called during shape().
+   * Shapers can use to modify ordering of combining marks.
+   * May be NULL.
+   */
+  void (*reorder_marks) (const hb_ot_shape_plan_t *plan,
+			 hb_buffer_t              *buffer,
+			 unsigned int              start,
+			 unsigned int              end);
+
   hb_ot_shape_zero_width_marks_type_t zero_width_marks;
 
   bool fallback_position;
diff --git a/src/hb-ot-shape-complex-thai.cc b/src/hb-ot-shape-complex-thai.cc
index 924247f..651c47f 100644
--- a/src/hb-ot-shape-complex-thai.cc
+++ b/src/hb-ot-shape-complex-thai.cc
@@ -378,6 +378,7 @@
   NULL, /* compose */
   NULL, /* setup_masks */
   NULL, /* disable_otl */
+  NULL, /* reorder_marks */
   HB_OT_SHAPE_ZERO_WIDTH_MARKS_BY_GDEF_LATE,
   false,/* fallback_position */
 };
diff --git a/src/hb-ot-shape-complex-tibetan.cc b/src/hb-ot-shape-complex-tibetan.cc
index aadf59f..a85ac0f 100644
--- a/src/hb-ot-shape-complex-tibetan.cc
+++ b/src/hb-ot-shape-complex-tibetan.cc
@@ -58,6 +58,7 @@
   NULL, /* compose */
   NULL, /* setup_masks */
   NULL, /* disable_otl */
+  NULL, /* reorder_marks */
   HB_OT_SHAPE_ZERO_WIDTH_MARKS_BY_GDEF_LATE,
   true, /* fallback_position */
 };
diff --git a/src/hb-ot-shape-complex-use.cc b/src/hb-ot-shape-complex-use.cc
index a5ab0ab..ac3a248 100644
--- a/src/hb-ot-shape-complex-use.cc
+++ b/src/hb-ot-shape-complex-use.cc
@@ -607,6 +607,7 @@
   compose_use,
   setup_masks_use,
   NULL, /* disable_otl */
+  NULL, /* reorder_marks */
   HB_OT_SHAPE_ZERO_WIDTH_MARKS_BY_GDEF_EARLY,
   false, /* fallback_position */
 };
diff --git a/src/hb-ot-shape-normalize.cc b/src/hb-ot-shape-normalize.cc
index a514488..fd9e7c2 100644
--- a/src/hb-ot-shape-normalize.cc
+++ b/src/hb-ot-shape-normalize.cc
@@ -345,14 +345,18 @@
       if (_hb_glyph_info_get_modified_combining_class (&buffer->info[end]) == 0)
         break;
 
-    /* We are going to do a O(n^2).  Only do this if the sequence is short. */
-    if (end - i > 10) {
+    /* We are going to do a O(n^2).  Only do this if the sequence is short,
+     * but not too short ;). */
+    if (end - i < 2 || end - i > HB_OT_SHAPE_COMPLEX_MAX_COMBINING_MARKS) {
       i = end;
       continue;
     }
 
     buffer->sort (i, end, compare_combining_class);
 
+    if (plan->shaper->reorder_marks)
+      plan->shaper->reorder_marks (plan, buffer, i, end);
+
     i = end;
   }