[buffer] Templatize UTF handling Also move UTF routines into a separate file, to be reused from shapers that need it.
diff --git a/src/hb-buffer.cc b/src/hb-buffer.cc index 5471634..f84511d 100644 --- a/src/hb-buffer.cc +++ b/src/hb-buffer.cc
@@ -28,6 +28,7 @@ */ #include "hb-buffer-private.hh" +#include "hb-utf-private.hh" #include <string.h> @@ -797,68 +798,44 @@ buffer->guess_properties (); } -#define ADD_UTF(T) \ - HB_STMT_START { \ - if (text_length == -1) { \ - text_length = 0; \ - const T *p = (const T *) text; \ - while (*p) { \ - text_length++; \ - p++; \ - } \ - } \ - if (item_length == -1) \ - item_length = text_length - item_offset; \ - buffer->ensure (buffer->len + item_length * sizeof (T) / 4); \ - const T *next = (const T *) text + item_offset; \ - const T *end = next + item_length; \ - while (next < end) { \ - hb_codepoint_t u; \ - const T *old_next = next; \ - next = UTF_NEXT (next, end, u); \ - hb_buffer_add (buffer, u, 1, old_next - (const T *) text); \ - } \ - } HB_STMT_END - - -#define UTF8_COMPUTE(Char, Mask, Len) \ - if (Char < 128) { Len = 1; Mask = 0x7f; } \ - else if ((Char & 0xe0) == 0xc0) { Len = 2; Mask = 0x1f; } \ - else if ((Char & 0xf0) == 0xe0) { Len = 3; Mask = 0x0f; } \ - else if ((Char & 0xf8) == 0xf0) { Len = 4; Mask = 0x07; } \ - else Len = 0; - -static inline const uint8_t * -hb_utf8_next (const uint8_t *text, - const uint8_t *end, - hb_codepoint_t *unicode) +template <typename T> +static inline void +hb_buffer_add_utf (hb_buffer_t *buffer, + const T *text, + int text_length, + unsigned int item_offset, + int item_length) { - uint8_t c = *text; - unsigned int mask, len; + assert (buffer->content_type == HB_BUFFER_CONTENT_TYPE_UNICODE || + (!buffer->len && buffer->content_type == HB_BUFFER_CONTENT_TYPE_INVALID)); - /* TODO check for overlong sequences? */ + if (unlikely (hb_object_is_inert (buffer))) + return; - UTF8_COMPUTE (c, mask, len); - if (unlikely (!len || (unsigned int) (end - text) < len)) { - *unicode = -1; - return text + 1; - } else { - hb_codepoint_t result; - unsigned int i; - result = c & mask; - for (i = 1; i < len; i++) - { - if (unlikely ((text[i] & 0xc0) != 0x80)) - { - *unicode = -1; - return text + 1; - } - result <<= 6; - result |= (text[i] & 0x3f); - } - *unicode = result; - return text + len; + if (text_length == -1) { + text_length = 0; + const T *p = (const T *) text; + while (*p) { + text_length++; + p++; + } } + + if (item_length == -1) + item_length = text_length - item_offset; + + buffer->ensure (buffer->len + item_length * sizeof (T) / 4); + + const T *next = (const T *) text + item_offset; + const T *end = next + item_length; + while (next < end) { + hb_codepoint_t u; + const T *old_next = next; + next = hb_utf_next (next, end, &u); + hb_buffer_add (buffer, u, 1, old_next - (const T *) text); + } + + buffer->content_type = HB_BUFFER_CONTENT_TYPE_UNICODE; } void @@ -868,36 +845,7 @@ unsigned int item_offset, int item_length) { - assert (buffer->content_type == HB_BUFFER_CONTENT_TYPE_UNICODE || - (!buffer->len && buffer->content_type == HB_BUFFER_CONTENT_TYPE_INVALID)); - if (unlikely (hb_object_is_inert (buffer))) - return; - buffer->content_type = HB_BUFFER_CONTENT_TYPE_UNICODE; -#define UTF_NEXT(S, E, U) hb_utf8_next (S, E, &(U)) - ADD_UTF (uint8_t); -#undef UTF_NEXT -} - -static inline const uint16_t * -hb_utf16_next (const uint16_t *text, - const uint16_t *end, - hb_codepoint_t *unicode) -{ - uint16_t c = *text++; - - if (unlikely (c >= 0xd800 && c < 0xdc00)) { - /* high surrogate */ - uint16_t l; - if (text < end && ((l = *text), likely (l >= 0xdc00 && l < 0xe000))) { - /* low surrogate */ - *unicode = ((hb_codepoint_t) ((c) - 0xd800) * 0x400 + (l) - 0xdc00 + 0x10000); - text++; - } else - *unicode = -1; - } else - *unicode = c; - - return text; + hb_buffer_add_utf (buffer, (const uint8_t *) text, text_length, item_offset, item_length); } void @@ -907,23 +855,7 @@ unsigned int item_offset, int item_length) { - assert (buffer->content_type == HB_BUFFER_CONTENT_TYPE_UNICODE || - (!buffer->len && buffer->content_type == HB_BUFFER_CONTENT_TYPE_INVALID)); - if (unlikely (hb_object_is_inert (buffer))) - return; - buffer->content_type = HB_BUFFER_CONTENT_TYPE_UNICODE; -#define UTF_NEXT(S, E, U) hb_utf16_next (S, E, &(U)) - ADD_UTF (uint16_t); -#undef UTF_NEXT -} - -static inline const uint32_t * -hb_utf32_next (const uint32_t *text, - const uint32_t *end, - hb_codepoint_t *unicode) -{ - *unicode = *text; - return text + 1; + hb_buffer_add_utf (buffer, text, text_length, item_offset, item_length); } void @@ -933,14 +865,7 @@ unsigned int item_offset, int item_length) { - assert (buffer->content_type == HB_BUFFER_CONTENT_TYPE_UNICODE || - (!buffer->len && buffer->content_type == HB_BUFFER_CONTENT_TYPE_INVALID)); - if (unlikely (hb_object_is_inert (buffer))) - return; - buffer->content_type = HB_BUFFER_CONTENT_TYPE_UNICODE; -#define UTF_NEXT(S, E, U) hb_utf32_next (S, E, &(U)) - ADD_UTF (uint32_t); -#undef UTF_NEXT + hb_buffer_add_utf (buffer, text, text_length, item_offset, item_length); }