[buffer] Templatize UTF handling Also move UTF routines into a separate file, to be reused from shapers that need it.

diff --git a/src/hb-buffer.cc b/src/hb-buffer.cc
index 5471634..f84511d 100644
--- a/src/hb-buffer.cc
+++ b/src/hb-buffer.cc

@@ -28,6 +28,7 @@
  */
 
 #include "hb-buffer-private.hh"
+#include "hb-utf-private.hh"
 
 #include <string.h>
 
@@ -797,68 +798,44 @@
   buffer->guess_properties ();
 }
 
-#define ADD_UTF(T) \
-	HB_STMT_START { \
-	  if (text_length == -1) { \
-	    text_length = 0; \
-	    const T *p = (const T *) text; \
-	    while (*p) { \
-	      text_length++; \
-	      p++; \
-	    } \
-	  } \
-	  if (item_length == -1) \
-	    item_length = text_length - item_offset; \
-	  buffer->ensure (buffer->len + item_length * sizeof (T) / 4); \
-	  const T *next = (const T *) text + item_offset; \
-	  const T *end = next + item_length; \
-	  while (next < end) { \
-	    hb_codepoint_t u; \
-	    const T *old_next = next; \
-	    next = UTF_NEXT (next, end, u); \
-	    hb_buffer_add (buffer, u, 1,  old_next - (const T *) text); \
-	  } \
-	} HB_STMT_END
-
-
-#define UTF8_COMPUTE(Char, Mask, Len) \
-  if (Char < 128) { Len = 1; Mask = 0x7f; } \
-  else if ((Char & 0xe0) == 0xc0) { Len = 2; Mask = 0x1f; } \
-  else if ((Char & 0xf0) == 0xe0) { Len = 3; Mask = 0x0f; } \
-  else if ((Char & 0xf8) == 0xf0) { Len = 4; Mask = 0x07; } \
-  else Len = 0;
-
-static inline const uint8_t *
-hb_utf8_next (const uint8_t *text,
-	      const uint8_t *end,
-	      hb_codepoint_t *unicode)
+template <typename T>
+static inline void
+hb_buffer_add_utf (hb_buffer_t  *buffer,
+		   const T      *text,
+		   int           text_length,
+		   unsigned int  item_offset,
+		   int           item_length)
 {
-  uint8_t c = *text;
-  unsigned int mask, len;
+  assert (buffer->content_type == HB_BUFFER_CONTENT_TYPE_UNICODE ||
+	  (!buffer->len && buffer->content_type == HB_BUFFER_CONTENT_TYPE_INVALID));
 
-  /* TODO check for overlong sequences? */
+  if (unlikely (hb_object_is_inert (buffer)))
+    return;
 
-  UTF8_COMPUTE (c, mask, len);
-  if (unlikely (!len || (unsigned int) (end - text) < len)) {
-    *unicode = -1;
-    return text + 1;
-  } else {
-    hb_codepoint_t result;
-    unsigned int i;
-    result = c & mask;
-    for (i = 1; i < len; i++)
-      {
-	if (unlikely ((text[i] & 0xc0) != 0x80))
-	  {
-	    *unicode = -1;
-	    return text + 1;
-	  }
-	result <<= 6;
-	result |= (text[i] & 0x3f);
-      }
-    *unicode = result;
-    return text + len;
+  if (text_length == -1) {
+    text_length = 0;
+    const T *p = (const T *) text;
+    while (*p) {
+      text_length++;
+      p++;
+    }
   }
+
+  if (item_length == -1)
+    item_length = text_length - item_offset;
+
+  buffer->ensure (buffer->len + item_length * sizeof (T) / 4);
+
+  const T *next = (const T *) text + item_offset;
+  const T *end = next + item_length;
+  while (next < end) {
+    hb_codepoint_t u;
+    const T *old_next = next;
+    next = hb_utf_next (next, end, &u);
+    hb_buffer_add (buffer, u, 1,  old_next - (const T *) text);
+  }
+
+  buffer->content_type = HB_BUFFER_CONTENT_TYPE_UNICODE;
 }
 
 void
@@ -868,36 +845,7 @@
 		    unsigned int  item_offset,
 		    int           item_length)
 {
-  assert (buffer->content_type == HB_BUFFER_CONTENT_TYPE_UNICODE ||
-	  (!buffer->len && buffer->content_type == HB_BUFFER_CONTENT_TYPE_INVALID));
-  if (unlikely (hb_object_is_inert (buffer)))
-    return;
-  buffer->content_type = HB_BUFFER_CONTENT_TYPE_UNICODE;
-#define UTF_NEXT(S, E, U)	hb_utf8_next (S, E, &(U))
-  ADD_UTF (uint8_t);
-#undef UTF_NEXT
-}
-
-static inline const uint16_t *
-hb_utf16_next (const uint16_t *text,
-	       const uint16_t *end,
-	       hb_codepoint_t *unicode)
-{
-  uint16_t c = *text++;
-
-  if (unlikely (c >= 0xd800 && c < 0xdc00)) {
-    /* high surrogate */
-    uint16_t l;
-    if (text < end && ((l = *text), likely (l >= 0xdc00 && l < 0xe000))) {
-      /* low surrogate */
-      *unicode = ((hb_codepoint_t) ((c) - 0xd800) * 0x400 + (l) - 0xdc00 + 0x10000);
-       text++;
-    } else
-      *unicode = -1;
-  } else
-    *unicode = c;
-
-  return text;
+  hb_buffer_add_utf (buffer, (const uint8_t *) text, text_length, item_offset, item_length);
 }
 
 void
@@ -907,23 +855,7 @@
 		     unsigned int    item_offset,
 		     int            item_length)
 {
-  assert (buffer->content_type == HB_BUFFER_CONTENT_TYPE_UNICODE ||
-	  (!buffer->len && buffer->content_type == HB_BUFFER_CONTENT_TYPE_INVALID));
-  if (unlikely (hb_object_is_inert (buffer)))
-    return;
-  buffer->content_type = HB_BUFFER_CONTENT_TYPE_UNICODE;
-#define UTF_NEXT(S, E, U)	hb_utf16_next (S, E, &(U))
-  ADD_UTF (uint16_t);
-#undef UTF_NEXT
-}
-
-static inline const uint32_t *
-hb_utf32_next (const uint32_t *text,
-	       const uint32_t *end,
-	       hb_codepoint_t *unicode)
-{
-  *unicode = *text;
-  return text + 1;
+  hb_buffer_add_utf (buffer, text, text_length, item_offset, item_length);
 }
 
 void
@@ -933,14 +865,7 @@
 		     unsigned int    item_offset,
 		     int             item_length)
 {
-  assert (buffer->content_type == HB_BUFFER_CONTENT_TYPE_UNICODE ||
-	  (!buffer->len && buffer->content_type == HB_BUFFER_CONTENT_TYPE_INVALID));
-  if (unlikely (hb_object_is_inert (buffer)))
-    return;
-  buffer->content_type = HB_BUFFER_CONTENT_TYPE_UNICODE;
-#define UTF_NEXT(S, E, U)	hb_utf32_next (S, E, &(U))
-  ADD_UTF (uint32_t);
-#undef UTF_NEXT
+  hb_buffer_add_utf (buffer, text, text_length, item_offset, item_length);
 }