Implement SYRIAC ABBREVIATION MARK with 'stch' feature

The feature is enabled for any character in the Arabic shaper.
We should experiment with using it for Arabic subtending marks.
Though, that has a directionality problem as well, since those
are used with digits...

Fixes https://github.com/behdad/harfbuzz/issues/141
diff --git a/src/hb-ot-shape-complex-arabic.cc b/src/hb-ot-shape-complex-arabic.cc
index c36db46..31e0a11 100644
--- a/src/hb-ot-shape-complex-arabic.cc
+++ b/src/hb-ot-shape-complex-arabic.cc
@@ -28,9 +28,16 @@
 #include "hb-ot-shape-private.hh"
 
 
+#ifndef HB_DEBUG_ARABIC
+#define HB_DEBUG_ARABIC (HB_DEBUG+0)
+#endif
+
+
 /* buffer var allocations */
 #define arabic_shaping_action() complex_var_u8_0() /* arabic shaping action */
 
+#define HB_BUFFER_SCRATCH_FLAG_ARABIC_HAS_STCH HB_BUFFER_SCRATCH_FLAG_COMPLEX0
+
 
 /*
  * Joining types:
@@ -84,7 +91,7 @@
 
 
 /* Same order as the feature array */
-enum {
+enum arabic_action_t {
   ISOL,
   FINA,
   FIN2,
@@ -95,7 +102,11 @@
 
   NONE,
 
-  ARABIC_NUM_FEATURES = NONE
+  ARABIC_NUM_FEATURES = NONE,
+
+  /* We abuse the same byte for other things... */
+  STCH_FIXED,
+  STCH_REPEATING,
 };
 
 static const struct arabic_state_table_entry {
@@ -140,6 +151,11 @@
 		       hb_buffer_t *buffer);
 
 static void
+record_stch (const hb_ot_shape_plan_t *plan,
+	     hb_font_t *font,
+	     hb_buffer_t *buffer);
+
+static void
 collect_features_arabic (hb_ot_shape_planner_t *plan)
 {
   hb_ot_map_builder_t *map = &plan->map;
@@ -165,6 +181,9 @@
 
   map->add_gsub_pause (nuke_joiners);
 
+  map->add_global_bool_feature (HB_TAG('s','t','c','h'));
+  map->add_gsub_pause (record_stch);
+
   map->add_global_bool_feature (HB_TAG('c','c','m','p'));
   map->add_global_bool_feature (HB_TAG('l','o','c','l'));
 
@@ -208,8 +227,10 @@
    * mask_array[NONE] == 0. */
   hb_mask_t mask_array[ARABIC_NUM_FEATURES + 1];
 
-  bool do_fallback;
   arabic_fallback_plan_t *fallback_plan;
+
+  unsigned int do_fallback : 1;
+  unsigned int has_stch : 1;
 };
 
 void *
@@ -220,6 +241,7 @@
     return NULL;
 
   arabic_plan->do_fallback = plan->props.script == HB_SCRIPT_ARABIC;
+  arabic_plan->has_stch = !!plan->map.get_1_mask (HB_TAG ('s','t','c','h'));
   for (unsigned int i = 0; i < ARABIC_NUM_FEATURES; i++) {
     arabic_plan->mask_array[i] = plan->map.get_1_mask (arabic_features[i]);
     arabic_plan->do_fallback = arabic_plan->do_fallback &&
@@ -320,8 +342,6 @@
   hb_glyph_info_t *info = buffer->info;
   for (unsigned int i = 0; i < count; i++)
     info[i].mask |= arabic_plan->mask_array[info[i].arabic_shaping_action()];
-
-  HB_BUFFER_DEALLOCATE_VAR (buffer, arabic_shaping_action);
 }
 
 static void
@@ -371,6 +391,193 @@
   arabic_fallback_plan_shape (fallback_plan, font, buffer);
 }
 
+/*
+ * Stretch feature: "stch".
+ * See example here:
+ * https://www.microsoft.com/typography/OpenTypeDev/syriac/intro.htm
+ * We implement this in a generic way, such that the Arabic subtending
+ * marks can use it as well.
+ */
+
+static void
+record_stch (const hb_ot_shape_plan_t *plan,
+	     hb_font_t *font,
+	     hb_buffer_t *buffer)
+{
+  const arabic_shape_plan_t *arabic_plan = (const arabic_shape_plan_t *) plan->data;
+  if (!arabic_plan->has_stch)
+    return;
+
+  /* 'stch' feature was just applied.  Look for anything that multiplied,
+   * and record it for stch treatment later.  Note that rtlm, frac, etc
+   * are applied before stch, but we assume that they didn't result in
+   * anything multiplying into 5 pieces, so it's safe-ish... */
+
+  unsigned int count = buffer->len;
+  hb_glyph_info_t *info = buffer->info;
+  for (unsigned int i = 0; i < count; i++)
+    if (unlikely (_hb_glyph_info_multiplied (&info[i])))
+    {
+      unsigned int comp = _hb_glyph_info_get_lig_comp (&info[i]);
+      info[i].arabic_shaping_action() = comp % 2 ? STCH_REPEATING : STCH_FIXED;
+      buffer->scratch_flags |= HB_BUFFER_SCRATCH_FLAG_ARABIC_HAS_STCH;
+    }
+}
+
+static void
+apply_stch (const hb_ot_shape_plan_t *plan,
+	    hb_buffer_t              *buffer,
+	    hb_font_t                *font)
+{
+  if (likely (!(buffer->scratch_flags & HB_BUFFER_SCRATCH_FLAG_ARABIC_HAS_STCH)))
+    return;
+
+  /* The Arabic shaper currently always processes in RTL mode, so we should
+   * stretch / position the stretched pieces to the left / preceding glyphs. */
+
+  /* We do a two pass implementation:
+   * First pass calculates the exact number of extra glyphs we need,
+   * We then enlarge buffer to have that much room,
+   * Second pass applies the stretch, copying things to the end of buffer.
+   */
+
+  /* 30 = 2048 / 70.
+   * https://www.microsoft.com/typography/cursivescriptguidelines.mspx */
+  hb_position_t overlap = font->x_scale / 30;
+  DEBUG_MSG (ARABIC, NULL, "overlap for stretching is %d", overlap);
+  int sign = font->x_scale < 0 ? -1 : +1;
+  unsigned int extra_glyphs_needed = 0; // Set during MEASURE, used during CUT
+
+  for (enum step_t { MEASURE, CUT } step = MEASURE; step <= CUT; step = (step_t) (step + 1))
+  {
+    unsigned int count = buffer->len;
+    hb_glyph_info_t *info = buffer->info;
+    hb_glyph_position_t *pos = buffer->pos;
+    unsigned int new_len = count + extra_glyphs_needed; // write head during CUT
+    unsigned int j = new_len;
+    for (unsigned int i = count; i; i--)
+    {
+      if (!hb_in_range<unsigned> (info[i - 1].arabic_shaping_action(), STCH_FIXED, STCH_REPEATING))
+      {
+        if (step == CUT)
+	{
+	  --j;
+	  info[j] = info[i - 1];
+	  pos[j] = pos[i - 1];
+	}
+        continue;
+      }
+
+      /* Yay, justification! */
+
+      hb_position_t w_total = 0; // Total to be filled
+      hb_position_t w_fixed = 0; // Sum of fixed tiles
+      hb_position_t w_repeating = 0; // Sum of repeating tiles
+      int n_fixed = 0;
+      int n_repeating = 0;
+
+      unsigned int end = i;
+      while (i &&
+	     hb_in_range<unsigned> (info[i - 1].arabic_shaping_action(), STCH_FIXED, STCH_REPEATING))
+      {
+	i--;
+	hb_glyph_extents_t extents;
+	if (!font->get_glyph_extents (info[i].codepoint, &extents))
+	  extents.width = 0;
+	extents.width -= overlap;
+	if (info[i].arabic_shaping_action() == STCH_FIXED)
+	{
+	  w_fixed += extents.width;
+	  n_fixed++;
+	}
+	else
+	{
+	  w_repeating += extents.width;
+	  n_repeating++;
+	}
+      }
+      unsigned int start = i;
+      unsigned int context = i;
+      while (context &&
+	     !hb_in_range<unsigned> (info[context - 1].arabic_shaping_action(), STCH_FIXED, STCH_REPEATING) &&
+	     (_hb_glyph_info_is_default_ignorable (&info[context - 1]) ||
+	      HB_UNICODE_GENERAL_CATEGORY_IS_WORD (_hb_glyph_info_get_general_category (&info[context - 1]))))
+      {
+	context--;
+	w_total += pos[context].x_advance;
+      }
+      i++; // Don't touch i again.
+
+      DEBUG_MSG (ARABIC, NULL, "%s stretch at (%d,%d,%d)",
+		 step == MEASURE ? "measuring" : "cutting", context, start, end);
+      DEBUG_MSG (ARABIC, NULL, "rest of word:    count=%d width %d", start - context, w_total);
+      DEBUG_MSG (ARABIC, NULL, "fixed tiles:     count=%d width=%d", n_fixed, w_fixed);
+      DEBUG_MSG (ARABIC, NULL, "repeating tiles: count=%d width=%d", n_repeating, w_repeating);
+
+      /* Number of additional times to repeat each repeating tile. */
+      int n_copies = 0;
+
+      hb_position_t w_remaining = w_total - w_fixed - overlap;
+      if (sign * w_remaining > sign * w_repeating && sign * w_repeating > 0)
+	n_copies = (sign * w_remaining + sign * w_repeating / 2) / (sign * w_repeating) - 1;
+
+      if (step == MEASURE)
+      {
+	extra_glyphs_needed += n_copies * n_repeating;
+	DEBUG_MSG (ARABIC, NULL, "will add extra %d copies of repeating tiles", n_copies);
+      }
+      else
+      {
+        hb_position_t x_offset = -overlap;
+	for (unsigned int k = end; k > start; k--)
+	{
+	  hb_glyph_extents_t extents;
+	  if (!font->get_glyph_extents (info[k - 1].codepoint, &extents))
+	    extents.width = 0;
+	  extents.width -= overlap;
+
+	  unsigned int repeat = 1;
+	  if (info[k - 1].arabic_shaping_action() == STCH_REPEATING)
+	    repeat += n_copies;
+
+	  DEBUG_MSG (ARABIC, NULL, "appending %d copies of glyph %d; j=%d",
+		     repeat, info[k - 1].codepoint, j);
+	  for (unsigned int n = 0; n < repeat; n++)
+	  {
+	    x_offset -= extents.width;
+	    pos[k - 1].x_offset = x_offset;
+	    /* Append copy. */
+	    --j;
+	    info[j] = info[k - 1];
+	    pos[j] = pos[k - 1];
+	  }
+	}
+      }
+    }
+
+    if (step == MEASURE)
+    {
+      if (unlikely (!buffer->ensure (count + extra_glyphs_needed)))
+        break;
+    }
+    else
+    {
+      assert (j == 0);
+      buffer->len = new_len;
+    }
+  }
+}
+
+
+static void
+postprocess_glyphs_arabic (const hb_ot_shape_plan_t *plan,
+			   hb_buffer_t              *buffer,
+			   hb_font_t                *font)
+{
+  apply_stch (plan, buffer, font);
+
+  HB_BUFFER_DEALLOCATE_VAR (buffer, arabic_shaping_action);
+}
 
 const hb_ot_complex_shaper_t _hb_ot_complex_shaper_arabic =
 {
@@ -380,7 +587,7 @@
   data_create_arabic,
   data_destroy_arabic,
   NULL, /* preprocess_text */
-  NULL, /* postprocess_glyphs */
+  postprocess_glyphs_arabic,
   HB_OT_SHAPE_NORMALIZATION_MODE_DEFAULT,
   NULL, /* decompose */
   NULL, /* compose */
diff --git a/src/hb-unicode-private.hh b/src/hb-unicode-private.hh
index 1290453..5d544ec 100644
--- a/src/hb-unicode-private.hh
+++ b/src/hb-unicode-private.hh
@@ -362,5 +362,24 @@
 	 (FLAG (HB_UNICODE_GENERAL_CATEGORY_SPACING_MARK) | \
 	  FLAG (HB_UNICODE_GENERAL_CATEGORY_NON_SPACING_MARK)))
 
+#define HB_UNICODE_GENERAL_CATEGORY_IS_WORD(gen_cat) \
+	(FLAG_SAFE (gen_cat) & \
+	 (FLAG (HB_UNICODE_GENERAL_CATEGORY_UNASSIGNED) | \
+	  FLAG (HB_UNICODE_GENERAL_CATEGORY_PRIVATE_USE) | \
+	  FLAG (HB_UNICODE_GENERAL_CATEGORY_LOWERCASE_LETTER) | \
+	  FLAG (HB_UNICODE_GENERAL_CATEGORY_MODIFIER_LETTER) | \
+	  FLAG (HB_UNICODE_GENERAL_CATEGORY_OTHER_LETTER) | \
+	  FLAG (HB_UNICODE_GENERAL_CATEGORY_TITLECASE_LETTER) | \
+	  FLAG (HB_UNICODE_GENERAL_CATEGORY_UPPERCASE_LETTER) | \
+	  FLAG (HB_UNICODE_GENERAL_CATEGORY_SPACING_MARK) | \
+	  FLAG (HB_UNICODE_GENERAL_CATEGORY_ENCLOSING_MARK) | \
+	  FLAG (HB_UNICODE_GENERAL_CATEGORY_NON_SPACING_MARK) | \
+	  FLAG (HB_UNICODE_GENERAL_CATEGORY_DECIMAL_NUMBER) | \
+	  FLAG (HB_UNICODE_GENERAL_CATEGORY_LETTER_NUMBER) | \
+	  FLAG (HB_UNICODE_GENERAL_CATEGORY_OTHER_NUMBER) | \
+	  FLAG (HB_UNICODE_GENERAL_CATEGORY_CURRENCY_SYMBOL) | \
+	  FLAG (HB_UNICODE_GENERAL_CATEGORY_MODIFIER_SYMBOL) | \
+	  FLAG (HB_UNICODE_GENERAL_CATEGORY_MATH_SYMBOL) | \
+	  FLAG (HB_UNICODE_GENERAL_CATEGORY_OTHER_SYMBOL)))
 
 #endif /* HB_UNICODE_PRIVATE_HH */
diff --git a/test/shaping/texts/in-tree/shaper-arabic/script-syriac/misc/MANIFEST b/test/shaping/texts/in-tree/shaper-arabic/script-syriac/misc/MANIFEST
index ae45bdf..a97a710 100644
--- a/test/shaping/texts/in-tree/shaper-arabic/script-syriac/misc/MANIFEST
+++ b/test/shaping/texts/in-tree/shaper-arabic/script-syriac/misc/MANIFEST
@@ -1 +1,2 @@
+abbreviation-mark.txt
 alaph.txt
diff --git a/test/shaping/texts/in-tree/shaper-arabic/script-syriac/misc/abbreviation-mark.txt b/test/shaping/texts/in-tree/shaper-arabic/script-syriac/misc/abbreviation-mark.txt
new file mode 100644
index 0000000..80a568c
--- /dev/null
+++ b/test/shaping/texts/in-tree/shaper-arabic/script-syriac/misc/abbreviation-mark.txt
@@ -0,0 +1,11 @@
+ܐܒ 
+ܐ܏ 
+ܐ܏ܒ 
+ܐ܏ܒܓ 
+ܐ܏ܒܓܕ 
+ܐ܏ܒܓܕܐ 
+ܐ܏ܒܓܕܐܐܐܐܐܐܐܐܐ 
+ܐ܏ܒܓܕܓܓܓܓܓܓ 
+ܐ܏ܒܓ 
+
+