[buffer] Templatize UTF handling Also move UTF routines into a separate file, to be reused from shapers that need it.
diff --git a/src/Makefile.am b/src/Makefile.am index 4aae7ec..d1a94cd 100644 --- a/src/Makefile.am +++ b/src/Makefile.am
@@ -51,6 +51,7 @@ hb-tt-font.cc \ hb-unicode-private.hh \ hb-unicode.cc \ + hb-utf-private.hh \ hb-warning.cc \ $(NULL) HBHEADERS = \
diff --git a/src/hb-buffer.cc b/src/hb-buffer.cc index 5471634..f84511d 100644 --- a/src/hb-buffer.cc +++ b/src/hb-buffer.cc
@@ -28,6 +28,7 @@ */ #include "hb-buffer-private.hh" +#include "hb-utf-private.hh" #include <string.h> @@ -797,68 +798,44 @@ buffer->guess_properties (); } -#define ADD_UTF(T) \ - HB_STMT_START { \ - if (text_length == -1) { \ - text_length = 0; \ - const T *p = (const T *) text; \ - while (*p) { \ - text_length++; \ - p++; \ - } \ - } \ - if (item_length == -1) \ - item_length = text_length - item_offset; \ - buffer->ensure (buffer->len + item_length * sizeof (T) / 4); \ - const T *next = (const T *) text + item_offset; \ - const T *end = next + item_length; \ - while (next < end) { \ - hb_codepoint_t u; \ - const T *old_next = next; \ - next = UTF_NEXT (next, end, u); \ - hb_buffer_add (buffer, u, 1, old_next - (const T *) text); \ - } \ - } HB_STMT_END - - -#define UTF8_COMPUTE(Char, Mask, Len) \ - if (Char < 128) { Len = 1; Mask = 0x7f; } \ - else if ((Char & 0xe0) == 0xc0) { Len = 2; Mask = 0x1f; } \ - else if ((Char & 0xf0) == 0xe0) { Len = 3; Mask = 0x0f; } \ - else if ((Char & 0xf8) == 0xf0) { Len = 4; Mask = 0x07; } \ - else Len = 0; - -static inline const uint8_t * -hb_utf8_next (const uint8_t *text, - const uint8_t *end, - hb_codepoint_t *unicode) +template <typename T> +static inline void +hb_buffer_add_utf (hb_buffer_t *buffer, + const T *text, + int text_length, + unsigned int item_offset, + int item_length) { - uint8_t c = *text; - unsigned int mask, len; + assert (buffer->content_type == HB_BUFFER_CONTENT_TYPE_UNICODE || + (!buffer->len && buffer->content_type == HB_BUFFER_CONTENT_TYPE_INVALID)); - /* TODO check for overlong sequences? */ + if (unlikely (hb_object_is_inert (buffer))) + return; - UTF8_COMPUTE (c, mask, len); - if (unlikely (!len || (unsigned int) (end - text) < len)) { - *unicode = -1; - return text + 1; - } else { - hb_codepoint_t result; - unsigned int i; - result = c & mask; - for (i = 1; i < len; i++) - { - if (unlikely ((text[i] & 0xc0) != 0x80)) - { - *unicode = -1; - return text + 1; - } - result <<= 6; - result |= (text[i] & 0x3f); - } - *unicode = result; - return text + len; + if (text_length == -1) { + text_length = 0; + const T *p = (const T *) text; + while (*p) { + text_length++; + p++; + } } + + if (item_length == -1) + item_length = text_length - item_offset; + + buffer->ensure (buffer->len + item_length * sizeof (T) / 4); + + const T *next = (const T *) text + item_offset; + const T *end = next + item_length; + while (next < end) { + hb_codepoint_t u; + const T *old_next = next; + next = hb_utf_next (next, end, &u); + hb_buffer_add (buffer, u, 1, old_next - (const T *) text); + } + + buffer->content_type = HB_BUFFER_CONTENT_TYPE_UNICODE; } void @@ -868,36 +845,7 @@ unsigned int item_offset, int item_length) { - assert (buffer->content_type == HB_BUFFER_CONTENT_TYPE_UNICODE || - (!buffer->len && buffer->content_type == HB_BUFFER_CONTENT_TYPE_INVALID)); - if (unlikely (hb_object_is_inert (buffer))) - return; - buffer->content_type = HB_BUFFER_CONTENT_TYPE_UNICODE; -#define UTF_NEXT(S, E, U) hb_utf8_next (S, E, &(U)) - ADD_UTF (uint8_t); -#undef UTF_NEXT -} - -static inline const uint16_t * -hb_utf16_next (const uint16_t *text, - const uint16_t *end, - hb_codepoint_t *unicode) -{ - uint16_t c = *text++; - - if (unlikely (c >= 0xd800 && c < 0xdc00)) { - /* high surrogate */ - uint16_t l; - if (text < end && ((l = *text), likely (l >= 0xdc00 && l < 0xe000))) { - /* low surrogate */ - *unicode = ((hb_codepoint_t) ((c) - 0xd800) * 0x400 + (l) - 0xdc00 + 0x10000); - text++; - } else - *unicode = -1; - } else - *unicode = c; - - return text; + hb_buffer_add_utf (buffer, (const uint8_t *) text, text_length, item_offset, item_length); } void @@ -907,23 +855,7 @@ unsigned int item_offset, int item_length) { - assert (buffer->content_type == HB_BUFFER_CONTENT_TYPE_UNICODE || - (!buffer->len && buffer->content_type == HB_BUFFER_CONTENT_TYPE_INVALID)); - if (unlikely (hb_object_is_inert (buffer))) - return; - buffer->content_type = HB_BUFFER_CONTENT_TYPE_UNICODE; -#define UTF_NEXT(S, E, U) hb_utf16_next (S, E, &(U)) - ADD_UTF (uint16_t); -#undef UTF_NEXT -} - -static inline const uint32_t * -hb_utf32_next (const uint32_t *text, - const uint32_t *end, - hb_codepoint_t *unicode) -{ - *unicode = *text; - return text + 1; + hb_buffer_add_utf (buffer, text, text_length, item_offset, item_length); } void @@ -933,14 +865,7 @@ unsigned int item_offset, int item_length) { - assert (buffer->content_type == HB_BUFFER_CONTENT_TYPE_UNICODE || - (!buffer->len && buffer->content_type == HB_BUFFER_CONTENT_TYPE_INVALID)); - if (unlikely (hb_object_is_inert (buffer))) - return; - buffer->content_type = HB_BUFFER_CONTENT_TYPE_UNICODE; -#define UTF_NEXT(S, E, U) hb_utf32_next (S, E, &(U)) - ADD_UTF (uint32_t); -#undef UTF_NEXT + hb_buffer_add_utf (buffer, text, text_length, item_offset, item_length); }
diff --git a/src/hb-utf-private.hh b/src/hb-utf-private.hh new file mode 100644 index 0000000..829ca50 --- /dev/null +++ b/src/hb-utf-private.hh
@@ -0,0 +1,113 @@ +/* + * Copyright © 2011,2012 Google, Inc. + * + * This is part of HarfBuzz, a text shaping library. + * + * Permission is hereby granted, without written agreement and without + * license or royalty fees, to use, copy, modify, and distribute this + * software and its documentation for any purpose, provided that the + * above copyright notice and the following two paragraphs appear in + * all copies of this software. + * + * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR + * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES + * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN + * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH + * DAMAGE. + * + * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, + * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS + * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO + * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. + * + * Google Author(s): Behdad Esfahbod + */ + +#ifndef HB_UTF_PRIVATE_HH +#define HB_UTF_PRIVATE_HH + +#include "hb-private.hh" + + +/* UTF-8 */ + +#define HB_UTF8_COMPUTE(Char, Mask, Len) \ + if (Char < 128) { Len = 1; Mask = 0x7f; } \ + else if ((Char & 0xe0) == 0xc0) { Len = 2; Mask = 0x1f; } \ + else if ((Char & 0xf0) == 0xe0) { Len = 3; Mask = 0x0f; } \ + else if ((Char & 0xf8) == 0xf0) { Len = 4; Mask = 0x07; } \ + else Len = 0; + +static inline const uint8_t * +hb_utf_next (const uint8_t *text, + const uint8_t *end, + hb_codepoint_t *unicode) +{ + uint8_t c = *text; + unsigned int mask, len; + + /* TODO check for overlong sequences? */ + + HB_UTF8_COMPUTE (c, mask, len); + if (unlikely (!len || (unsigned int) (end - text) < len)) { + *unicode = -1; + return text + 1; + } else { + hb_codepoint_t result; + unsigned int i; + result = c & mask; + for (i = 1; i < len; i++) + { + if (unlikely ((text[i] & 0xc0) != 0x80)) + { + *unicode = -1; + return text + 1; + } + result <<= 6; + result |= (text[i] & 0x3f); + } + *unicode = result; + return text + len; + } +} + + +/* UTF-16 */ + +static inline const uint16_t * +hb_utf_next (const uint16_t *text, + const uint16_t *end, + hb_codepoint_t *unicode) +{ + uint16_t c = *text++; + + if (unlikely (c >= 0xd800 && c < 0xdc00)) { + /* high surrogate */ + uint16_t l; + if (text < end && ((l = *text), likely (l >= 0xdc00 && l < 0xe000))) { + /* low surrogate */ + *unicode = ((hb_codepoint_t) ((c) - 0xd800) * 0x400 + (l) - 0xdc00 + 0x10000); + text++; + } else + *unicode = -1; + } else + *unicode = c; + + return text; +} + + +/* UTF-32 */ + +static inline const uint32_t * +hb_utf_next (const uint32_t *text, + const uint32_t *end, + hb_codepoint_t *unicode) +{ + *unicode = *text; + return text + 1; +} + + +#endif /* HB_UTF_PRIVATE_HH */