[unsafe-to-break] Add UNSAFE_TO_BREAK flag

Not all shapers code is updated to set this properly.
GSUB and Arabic shaper are updated.
GPOS and other shapers are NOT.

Fixes https://github.com/behdad/harfbuzz/issues/224
diff --git a/src/hb-buffer-private.hh b/src/hb-buffer-private.hh
index bca308d..e598f9e 100644
--- a/src/hb-buffer-private.hh
+++ b/src/hb-buffer-private.hh
@@ -232,25 +232,31 @@
     for (unsigned int j = 0; j < len; j++)
       info[j].mask |= mask;
   }
-  HB_INTERNAL void set_masks (hb_mask_t value,
-			      hb_mask_t mask,
-			      unsigned int cluster_start,
-			      unsigned int cluster_end);
+  HB_INTERNAL void set_masks (hb_mask_t value, hb_mask_t mask,
+			      unsigned int cluster_start, unsigned int cluster_end);
 
-  HB_INTERNAL void merge_clusters (unsigned int start,
-				   unsigned int end)
+  inline void merge_clusters (unsigned int start, unsigned int end)
   {
     if (end - start < 2)
       return;
     merge_clusters_impl (start, end);
   }
-  HB_INTERNAL void merge_clusters_impl (unsigned int start,
-					unsigned int end);
-  HB_INTERNAL void merge_out_clusters (unsigned int start,
-				       unsigned int end);
+  HB_INTERNAL void merge_clusters_impl (unsigned int start, unsigned int end);
+  HB_INTERNAL void merge_out_clusters (unsigned int start, unsigned int end);
   /* Merge clusters for deleting current glyph, and skip it. */
   HB_INTERNAL void delete_glyph (void);
 
+  inline void unsafe_to_break (unsigned int start,
+			       unsigned int end)
+  {
+    if (end - start < 2)
+      return;
+    unsafe_to_break_impl (start, end);
+  }
+  HB_INTERNAL void unsafe_to_break_impl (unsigned int start, unsigned int end);
+  HB_INTERNAL void unsafe_to_break_from_outbuffer (unsigned int start, unsigned int end);
+
+
   /* Internal methods */
   HB_INTERNAL bool enlarge (unsigned int size);
 
diff --git a/src/hb-buffer-serialize.cc b/src/hb-buffer-serialize.cc
index 85696c5..f4a89ac 100644
--- a/src/hb-buffer-serialize.cc
+++ b/src/hb-buffer-serialize.cc
@@ -145,10 +145,16 @@
 
     if (!(flags & HB_BUFFER_SERIALIZE_FLAG_NO_POSITIONS))
     {
-      p += snprintf (p, ARRAY_LENGTH (b) - (p - b), ",\"dx\":%d,\"dy\":%d",
-		     pos[i].x_offset, pos[i].y_offset);
-      p += snprintf (p, ARRAY_LENGTH (b) - (p - b), ",\"ax\":%d,\"ay\":%d",
-		     pos[i].x_advance, pos[i].y_advance);
+      p += MAX (0, snprintf (p, ARRAY_LENGTH (b) - (p - b), ",\"dx\":%d,\"dy\":%d",
+			     pos[i].x_offset, pos[i].y_offset));
+      p += MAX (0, snprintf (p, ARRAY_LENGTH (b) - (p - b), ",\"ax\":%d,\"ay\":%d",
+			     pos[i].x_advance, pos[i].y_advance));
+    }
+
+    if (flags & HB_BUFFER_SERIALIZE_FLAG_GLYPH_FLAGS)
+    {
+      if (info[i].mask &HB_GLYPH_FLAG_DEFINED)
+	p += MAX (0, snprintf (p, ARRAY_LENGTH (b) - (p - b), ",\"fl\":%u", info[i].mask &HB_GLYPH_FLAG_DEFINED));
     }
 
     if (flags & HB_BUFFER_SERIALIZE_FLAG_GLYPH_EXTENTS)
@@ -156,9 +162,9 @@
       hb_glyph_extents_t extents;
       hb_font_get_glyph_extents(font, info[i].codepoint, &extents);
       p += MAX (0, snprintf (p, ARRAY_LENGTH (b) - (p - b), ",\"xb\":%d,\"yb\":%d",
-        extents.x_bearing, extents.y_bearing));
+		extents.x_bearing, extents.y_bearing));
       p += MAX (0, snprintf (p, ARRAY_LENGTH (b) - (p - b), ",\"w\":%d,\"h\":%d",
-        extents.width, extents.height));
+		extents.width, extents.height));
     }
 
     *p++ = '}';
@@ -226,6 +232,12 @@
 	p += MAX (0, snprintf (p, ARRAY_LENGTH (b) - (p - b), ",%d", pos[i].y_advance));
     }
 
+    if (flags & HB_BUFFER_SERIALIZE_FLAG_GLYPH_FLAGS)
+    {
+      if (info[i].mask &HB_GLYPH_FLAG_DEFINED)
+	p += MAX (0, snprintf (p, ARRAY_LENGTH (b) - (p - b), "#%X", info[i].mask &HB_GLYPH_FLAG_DEFINED));
+    }
+
     if (flags & HB_BUFFER_SERIALIZE_FLAG_GLYPH_EXTENTS)
     {
       hb_glyph_extents_t extents;
diff --git a/src/hb-buffer.cc b/src/hb-buffer.cc
index 3777f7f..afec26f 100644
--- a/src/hb-buffer.cc
+++ b/src/hb-buffer.cc
@@ -637,6 +637,73 @@
   skip_glyph ();
 }
 
+static inline void
+unsafe_to_break_two_infos (hb_glyph_info_t &info1, hb_glyph_info_t &info2)
+{
+  if (info1.cluster == info2.cluster)
+    return;
+  hb_glyph_info_t &unsafe = info1.cluster > info2.cluster ? info1 : info2;
+  unsafe.mask |= HB_GLYPH_FLAG_UNSAFE_TO_BREAK;
+}
+static void
+unsafe_to_break_infos_monotone (hb_glyph_info_t *info, unsigned int start, unsigned int end)
+{
+  for (unsigned int i = start + 1; i < end; i++)
+    unsafe_to_break_two_infos (info[i - 1], info[i]);
+}
+static unsigned int
+infos_min_cluster (const hb_glyph_info_t *info, unsigned int start, unsigned int end,
+		   unsigned int cluster)
+{
+  for (unsigned int i = start; i < end; i++)
+    cluster = MIN (cluster, info[i].cluster);
+  return cluster;
+}
+static void
+infos_unsafe_to_break_cluster (hb_glyph_info_t *info, unsigned int start, unsigned int end,
+			       unsigned int cluster)
+{
+  for (unsigned int i = start; i < end; i++)
+    if (cluster != info[i].cluster)
+      info[i].mask |= HB_GLYPH_FLAG_UNSAFE_TO_BREAK;
+}
+void
+hb_buffer_t::unsafe_to_break_impl (unsigned int start, unsigned int end)
+{
+  if (cluster_level == HB_BUFFER_CLUSTER_LEVEL_CHARACTERS)
+  {
+    unsigned int cluster = (unsigned int) -1;
+    cluster = infos_min_cluster (info, start, end, cluster);
+    infos_unsafe_to_break_cluster (info, start, end, cluster);
+    return;
+  }
+
+  /* The case of monotone clusters can be done faster. */
+  unsafe_to_break_infos_monotone (info, start, end);
+}
+void
+hb_buffer_t::unsafe_to_break_from_outbuffer (unsigned int start, unsigned int end)
+{
+  assert (start <= out_len);
+  assert (idx <= end);
+
+  if (cluster_level == HB_BUFFER_CLUSTER_LEVEL_CHARACTERS)
+  {
+    unsigned int cluster = (unsigned int) -1;
+    cluster = infos_min_cluster (out_info, start, out_len, cluster);
+    cluster = infos_min_cluster (info, idx, end, cluster);
+    infos_unsafe_to_break_cluster (out_info, start, out_len, cluster);
+    infos_unsafe_to_break_cluster (info, idx, end, cluster);
+    return;
+  }
+
+  /* The case of monotone clusters can be done faster. */
+  unsafe_to_break_infos_monotone (out_info, start, out_len);
+  if (start < out_len && idx < end)
+    unsafe_to_break_two_infos (out_info[out_len - 1], info[idx]);
+  unsafe_to_break_infos_monotone (info, idx, end);
+}
+
 void
 hb_buffer_t::guess_segment_properties (void)
 {
diff --git a/src/hb-buffer.h b/src/hb-buffer.h
index bf289c1..995191a 100644
--- a/src/hb-buffer.h
+++ b/src/hb-buffer.h
@@ -63,7 +63,7 @@
  */
 typedef struct hb_glyph_info_t {
   hb_codepoint_t codepoint;
-  hb_mask_t      mask;
+  hb_mask_t      mask; /* Holds hb_glyph_flags_t after hb_shape() */
   uint32_t       cluster;
 
   /*< private >*/
@@ -71,6 +71,11 @@
   hb_var_int_t   var2;
 } hb_glyph_info_t;
 
+typedef enum { /*< flags >*/
+  HB_GLYPH_FLAG_UNSAFE_TO_BREAK		= 0x00000002,
+  HB_GLYPH_FLAG_DEFINED			= 0x00000002 /* OR of all defined flags */
+} hb_glyph_flags_t;
+
 /**
  * hb_glyph_position_t:
  * @x_advance: how much the line advances after drawing this glyph when setting
@@ -403,7 +408,8 @@
   HB_BUFFER_SERIALIZE_FLAG_NO_CLUSTERS		= 0x00000001u,
   HB_BUFFER_SERIALIZE_FLAG_NO_POSITIONS		= 0x00000002u,
   HB_BUFFER_SERIALIZE_FLAG_NO_GLYPH_NAMES	= 0x00000004u,
-  HB_BUFFER_SERIALIZE_FLAG_GLYPH_EXTENTS	= 0x00000008u
+  HB_BUFFER_SERIALIZE_FLAG_GLYPH_EXTENTS	= 0x00000008u,
+  HB_BUFFER_SERIALIZE_FLAG_GLYPH_FLAGS		= 0x00000010u
 } hb_buffer_serialize_flags_t;
 
 /**
diff --git a/src/hb-ot-layout-gsub-table.hh b/src/hb-ot-layout-gsub-table.hh
index 66fcb3f..85be7e7 100644
--- a/src/hb-ot-layout-gsub-table.hh
+++ b/src/hb-ot-layout-gsub-table.hh
@@ -1014,14 +1014,17 @@
     const OffsetArrayOf<Coverage> &lookahead = StructAfter<OffsetArrayOf<Coverage> > (backtrack);
     const ArrayOf<GlyphID> &substitute = StructAfter<ArrayOf<GlyphID> > (lookahead);
 
+  unsigned int start_index = 0, end_index = 0;
     if (match_backtrack (c,
 			 backtrack.len, (USHORT *) backtrack.array,
-			 match_coverage, this) &&
+			 match_coverage, this,
+			 &start_index) &&
         match_lookahead (c,
 			 lookahead.len, (USHORT *) lookahead.array,
 			 match_coverage, this,
-			 1))
+			 1, &end_index))
     {
+      c->buffer->unsafe_to_break_from_outbuffer (start_index, end_index);
       c->replace_glyph_inplace (substitute[index]);
       /* Note: We DON'T decrease buffer->idx.  The main loop does it
        * for us.  This is useful for preventing surprises if someone
diff --git a/src/hb-ot-layout-gsubgpos-private.hh b/src/hb-ot-layout-gsubgpos-private.hh
index 2235d3a..472628a 100644
--- a/src/hb-ot-layout-gsubgpos-private.hh
+++ b/src/hb-ot-layout-gsubgpos-private.hh
@@ -891,7 +891,8 @@
 				    unsigned int count,
 				    const USHORT backtrack[],
 				    match_func_t match_func,
-				    const void *match_data)
+				    const void *match_data,
+				    unsigned int *match_start)
 {
   TRACE_APPLY (NULL);
 
@@ -903,6 +904,8 @@
     if (!skippy_iter.prev ())
       return_trace (false);
 
+  *match_start = skippy_iter.idx;
+
   return_trace (true);
 }
 
@@ -911,7 +914,8 @@
 				    const USHORT lookahead[],
 				    match_func_t match_func,
 				    const void *match_data,
-				    unsigned int offset)
+				    unsigned int offset,
+				    unsigned int *end_index)
 {
   TRACE_APPLY (NULL);
 
@@ -923,6 +927,8 @@
     if (!skippy_iter.next ())
       return_trace (false);
 
+  *end_index = skippy_iter.idx + 1;
+
   return_trace (true);
 }
 
@@ -1145,10 +1151,11 @@
 		      inputCount, input,
 		      lookup_context.funcs.match, lookup_context.match_data,
 		      &match_length, match_positions)
-      && apply_lookup (c,
+      && (c->buffer->unsafe_to_break (c->buffer->idx, c->buffer->idx + match_length),
+	  apply_lookup (c,
 		       inputCount, match_positions,
 		       lookupCount, lookupRecord,
-		       match_length);
+		       match_length));
 }
 
 struct Rule
@@ -1666,7 +1673,7 @@
 					       const LookupRecord lookupRecord[],
 					       ChainContextApplyLookupContext &lookup_context)
 {
-  unsigned int match_length = 0;
+  unsigned int start_index = 0, match_length = 0, end_index = 0;
   unsigned int match_positions[HB_MAX_CONTEXT_LENGTH];
   return match_input (c,
 		      inputCount, input,
@@ -1674,15 +1681,17 @@
 		      &match_length, match_positions)
       && match_backtrack (c,
 			  backtrackCount, backtrack,
-			  lookup_context.funcs.match, lookup_context.match_data[0])
+			  lookup_context.funcs.match, lookup_context.match_data[0],
+			  &start_index)
       && match_lookahead (c,
 			  lookaheadCount, lookahead,
 			  lookup_context.funcs.match, lookup_context.match_data[2],
-			  match_length)
-      && apply_lookup (c,
+			  match_length, &end_index)
+      && (c->buffer->unsafe_to_break_from_outbuffer (start_index, end_index),
+          apply_lookup (c,
 		       inputCount, match_positions,
 		       lookupCount, lookupRecord,
-		       match_length);
+		       match_length));
 }
 
 struct ChainRule
diff --git a/src/hb-ot-map.cc b/src/hb-ot-map.cc
index 014e443..174fd52 100644
--- a/src/hb-ot-map.cc
+++ b/src/hb-ot-map.cc
@@ -191,6 +191,8 @@
 
   /* Allocate bits now */
   unsigned int next_bit = 1;
+  next_bit++; /* Allocate for HB_GLYPH_FLAG_UNSAFE_TO_BREAK */
+
   for (unsigned int i = 0; i < feature_infos.len; i++)
   {
     const feature_info_t *info = &feature_infos[i];
diff --git a/src/hb-ot-shape-complex-arabic.cc b/src/hb-ot-shape-complex-arabic.cc
index 5dbbcd9..ed7b3f2 100644
--- a/src/hb-ot-shape-complex-arabic.cc
+++ b/src/hb-ot-shape-complex-arabic.cc
@@ -321,7 +321,10 @@
     const arabic_state_table_entry *entry = &arabic_state_table[state][this_type];
 
     if (entry->prev_action != NONE && prev != (unsigned int) -1)
+    {
       info[prev].arabic_shaping_action() = entry->prev_action;
+      buffer->unsafe_to_break (prev, i + 1);
+    }
 
     info[i].arabic_shaping_action() = entry->curr_action;