[arabic] Implement Unicode Arabic Mark Ordering Algorithm UTR#53
Fixes https://github.com/behdad/harfbuzz/issues/509
diff --git a/src/hb-ot-shape-complex-arabic.cc b/src/hb-ot-shape-complex-arabic.cc
index ed7b3f2..28dd4e1 100644
--- a/src/hb-ot-shape-complex-arabic.cc
+++ b/src/hb-ot-shape-complex-arabic.cc
@@ -613,6 +613,80 @@
HB_BUFFER_DEALLOCATE_VAR (buffer, arabic_shaping_action);
}
+/* http://www.unicode.org/reports/tr53/tr53-1.pdf */
+
+static hb_codepoint_t
+modifier_combining_marks[] =
+{
+ 0x0654u, /* ARABIC HAMZA ABOVE */
+ 0x0655u, /* ARABIC HAMZA BELOW */
+ 0x0658u, /* ARABIC MARK NOON GHUNNA */
+ 0x06DCu, /* ARABIC SMALL HIGH SEEN */
+ 0x06E3u, /* ARABIC SMALL LOW SEEN */
+ 0x06E7u, /* ARABIC SMALL HIGH YEH */
+ 0x06E8u, /* ARABIC SMALL HIGH NOON */
+ 0x08F3u, /* ARABIC SMALL HIGH WAW */
+};
+
+static inline bool
+info_is_mcm (const hb_glyph_info_t &info)
+{
+ hb_codepoint_t u = info.codepoint;
+ for (unsigned int i = 0; i < ARRAY_LENGTH (modifier_combining_marks); i++)
+ if (u == modifier_combining_marks[i])
+ return true;
+ return false;
+}
+
+static void
+reorder_marks_arabic (const hb_ot_shape_plan_t *plan,
+ hb_buffer_t *buffer,
+ unsigned int start,
+ unsigned int end)
+{
+ hb_glyph_info_t *info = buffer->info;
+
+ unsigned int i = start;
+ for (unsigned int cc = 220; cc <= 230; cc += 10)
+ {
+ DEBUG_MSG (ARABIC, buffer, "Looking for %d's starting at %d\n", cc, i);
+ while (i < end && info_cc(info[i]) < cc)
+ i++;
+ DEBUG_MSG (ARABIC, buffer, "Looking for %d's stopped at %d\n", cc, i);
+
+ if (i == end)
+ break;
+
+ if (info_cc(info[i]) > cc)
+ continue;
+
+ /* Technically we should also check "info_cc(info[j]) == cc"
+ * in the following loop. But not doing it is safe; we might
+ * end up moving all the 220 MCMs and 230 MCMs together in one
+ * move and be done. */
+ unsigned int j = i;
+ while (j < end && info_is_mcm (info[j]))
+ j++;
+ DEBUG_MSG (ARABIC, buffer, "Found %d's from %d to %d\n", cc, i, j);
+
+ if (i == j)
+ continue;
+
+ /* Shift it! */
+ DEBUG_MSG (ARABIC, buffer, "Shifting %d's: %d %d\n", cc, i, j);
+ hb_glyph_info_t temp[HB_OT_SHAPE_COMPLEX_MAX_COMBINING_MARKS];
+ assert (j - i <= ARRAY_LENGTH (temp));
+ buffer->merge_out_clusters (start, j);
+ memmove (temp, &info[i], (j - i) * sizeof (hb_glyph_info_t));
+ memmove (&info[start + j - i], &info[start], (i - start) * sizeof (hb_glyph_info_t));
+ memmove (&info[start], temp, (j - i) * sizeof (hb_glyph_info_t));
+
+ start += j - i;
+
+ i = j;
+ }
+}
+
const hb_ot_complex_shaper_t _hb_ot_complex_shaper_arabic =
{
"arabic",
@@ -627,6 +701,7 @@
NULL, /* compose */
setup_masks_arabic,
NULL, /* disable_otl */
+ reorder_marks_arabic,
HB_OT_SHAPE_ZERO_WIDTH_MARKS_BY_GDEF_LATE,
true, /* fallback_position */
};
diff --git a/src/hb-ot-shape-complex-default.cc b/src/hb-ot-shape-complex-default.cc
index 42830ab..857980c 100644
--- a/src/hb-ot-shape-complex-default.cc
+++ b/src/hb-ot-shape-complex-default.cc
@@ -41,6 +41,7 @@
NULL, /* compose */
NULL, /* setup_masks */
NULL, /* disable_otl */
+ NULL, /* reorder_marks */
HB_OT_SHAPE_ZERO_WIDTH_MARKS_BY_GDEF_LATE,
true, /* fallback_position */
};
diff --git a/src/hb-ot-shape-complex-hangul.cc b/src/hb-ot-shape-complex-hangul.cc
index 0e74802..63850d3 100644
--- a/src/hb-ot-shape-complex-hangul.cc
+++ b/src/hb-ot-shape-complex-hangul.cc
@@ -426,6 +426,7 @@
NULL, /* compose */
setup_masks_hangul,
NULL, /* disable_otl */
+ NULL, /* reorder_marks */
HB_OT_SHAPE_ZERO_WIDTH_MARKS_NONE,
false, /* fallback_position */
};
diff --git a/src/hb-ot-shape-complex-hebrew.cc b/src/hb-ot-shape-complex-hebrew.cc
index 96f2494..b8ddadc 100644
--- a/src/hb-ot-shape-complex-hebrew.cc
+++ b/src/hb-ot-shape-complex-hebrew.cc
@@ -181,6 +181,7 @@
compose_hebrew,
NULL, /* setup_masks */
disable_otl_hebrew,
+ NULL, /* reorder_marks */
HB_OT_SHAPE_ZERO_WIDTH_MARKS_BY_GDEF_LATE,
true, /* fallback_position */
};
diff --git a/src/hb-ot-shape-complex-indic.cc b/src/hb-ot-shape-complex-indic.cc
index 00130e6..31dc1c0 100644
--- a/src/hb-ot-shape-complex-indic.cc
+++ b/src/hb-ot-shape-complex-indic.cc
@@ -1840,6 +1840,7 @@
compose_indic,
setup_masks_indic,
NULL, /* disable_otl */
+ NULL, /* reorder_marks */
HB_OT_SHAPE_ZERO_WIDTH_MARKS_NONE,
false, /* fallback_position */
};
diff --git a/src/hb-ot-shape-complex-myanmar.cc b/src/hb-ot-shape-complex-myanmar.cc
index 676d494..4081ed0 100644
--- a/src/hb-ot-shape-complex-myanmar.cc
+++ b/src/hb-ot-shape-complex-myanmar.cc
@@ -524,6 +524,7 @@
NULL, /* compose */
NULL, /* setup_masks */
NULL, /* disable_otl */
+ NULL, /* reorder_marks */
HB_OT_SHAPE_ZERO_WIDTH_MARKS_BY_GDEF_LATE,
true, /* fallback_position */
};
@@ -542,6 +543,7 @@
NULL, /* compose */
setup_masks_myanmar,
NULL, /* disable_otl */
+ NULL, /* reorder_marks */
HB_OT_SHAPE_ZERO_WIDTH_MARKS_BY_GDEF_EARLY,
false, /* fallback_position */
};
diff --git a/src/hb-ot-shape-complex-private.hh b/src/hb-ot-shape-complex-private.hh
index 8fadd7c..9792067 100644
--- a/src/hb-ot-shape-complex-private.hh
+++ b/src/hb-ot-shape-complex-private.hh
@@ -39,6 +39,8 @@
#define complex_var_u8_1() var2.u8[3]
+#define HB_OT_SHAPE_COMPLEX_MAX_COMBINING_MARKS 32
+
enum hb_ot_shape_zero_width_marks_type_t {
HB_OT_SHAPE_ZERO_WIDTH_MARKS_NONE,
HB_OT_SHAPE_ZERO_WIDTH_MARKS_BY_GDEF_EARLY,
@@ -154,6 +156,16 @@
*/
bool (*disable_otl) (const hb_ot_shape_plan_t *plan);
+ /* reorder_marks()
+ * Called during shape().
+ * Shapers can use to modify ordering of combining marks.
+ * May be NULL.
+ */
+ void (*reorder_marks) (const hb_ot_shape_plan_t *plan,
+ hb_buffer_t *buffer,
+ unsigned int start,
+ unsigned int end);
+
hb_ot_shape_zero_width_marks_type_t zero_width_marks;
bool fallback_position;
diff --git a/src/hb-ot-shape-complex-thai.cc b/src/hb-ot-shape-complex-thai.cc
index 924247f..651c47f 100644
--- a/src/hb-ot-shape-complex-thai.cc
+++ b/src/hb-ot-shape-complex-thai.cc
@@ -378,6 +378,7 @@
NULL, /* compose */
NULL, /* setup_masks */
NULL, /* disable_otl */
+ NULL, /* reorder_marks */
HB_OT_SHAPE_ZERO_WIDTH_MARKS_BY_GDEF_LATE,
false,/* fallback_position */
};
diff --git a/src/hb-ot-shape-complex-tibetan.cc b/src/hb-ot-shape-complex-tibetan.cc
index aadf59f..a85ac0f 100644
--- a/src/hb-ot-shape-complex-tibetan.cc
+++ b/src/hb-ot-shape-complex-tibetan.cc
@@ -58,6 +58,7 @@
NULL, /* compose */
NULL, /* setup_masks */
NULL, /* disable_otl */
+ NULL, /* reorder_marks */
HB_OT_SHAPE_ZERO_WIDTH_MARKS_BY_GDEF_LATE,
true, /* fallback_position */
};
diff --git a/src/hb-ot-shape-complex-use.cc b/src/hb-ot-shape-complex-use.cc
index a5ab0ab..ac3a248 100644
--- a/src/hb-ot-shape-complex-use.cc
+++ b/src/hb-ot-shape-complex-use.cc
@@ -607,6 +607,7 @@
compose_use,
setup_masks_use,
NULL, /* disable_otl */
+ NULL, /* reorder_marks */
HB_OT_SHAPE_ZERO_WIDTH_MARKS_BY_GDEF_EARLY,
false, /* fallback_position */
};
diff --git a/src/hb-ot-shape-normalize.cc b/src/hb-ot-shape-normalize.cc
index a514488..fd9e7c2 100644
--- a/src/hb-ot-shape-normalize.cc
+++ b/src/hb-ot-shape-normalize.cc
@@ -345,14 +345,18 @@
if (_hb_glyph_info_get_modified_combining_class (&buffer->info[end]) == 0)
break;
- /* We are going to do a O(n^2). Only do this if the sequence is short. */
- if (end - i > 10) {
+ /* We are going to do a O(n^2). Only do this if the sequence is short,
+ * but not too short ;). */
+ if (end - i < 2 || end - i > HB_OT_SHAPE_COMPLEX_MAX_COMBINING_MARKS) {
i = end;
continue;
}
buffer->sort (i, end, compare_combining_class);
+ if (plan->shaper->reorder_marks)
+ plan->shaper->reorder_marks (plan, buffer, i, end);
+
i = end;
}