Add --utf8-clusters Also fix cairo cluster generation.
diff --git a/util/hb-shape.cc b/util/hb-shape.cc index a76a778..b22bc1f 100644 --- a/util/hb-shape.cc +++ b/util/hb-shape.cc
@@ -36,7 +36,8 @@ void init (const font_options_t *font_opts); void consume_line (hb_buffer_t *buffer, const char *text, - unsigned int text_len); + unsigned int text_len, + hb_bool_t utf8_clusters); void finish (const font_options_t *font_opts); protected: @@ -57,11 +58,12 @@ void output_buffer_t::consume_line (hb_buffer_t *buffer, const char *text, - unsigned int text_len) + unsigned int text_len, + hb_bool_t utf8_clusters) { line_no++; g_string_set_size (gs, 0); - serialize_line (buffer, line_no, text, text_len, font, gs); + serialize_line (buffer, line_no, text, text_len, font, utf8_clusters, gs); fprintf (fp, "%s", gs->str); }
diff --git a/util/hb-view.hh b/util/hb-view.hh index 68a5dd8..66d955b 100644 --- a/util/hb-view.hh +++ b/util/hb-view.hh
@@ -65,7 +65,7 @@ buffer)) fail (FALSE, "All shapers failed"); - output.consume_line (buffer, text, text_len); + output.consume_line (buffer, text, text_len, shaper.utf8_clusters); } hb_buffer_destroy (buffer);
diff --git a/util/helper-cairo.cc b/util/helper-cairo.cc index abb8c15..9374d9e 100644 --- a/util/helper-cairo.cc +++ b/util/helper-cairo.cc
@@ -301,7 +301,8 @@ hb_buffer_t *buffer, const char *text, unsigned int text_len, - double scale) + double scale, + hb_bool_t utf8_clusters) { memset (l, 0, sizeof (*l)); @@ -349,27 +350,38 @@ hb_bool_t backward = HB_DIRECTION_IS_BACKWARD (hb_buffer_get_direction (buffer)); l->cluster_flags = backward ? CAIRO_TEXT_CLUSTER_FLAG_BACKWARD : (cairo_text_cluster_flags_t) 0; unsigned int cluster = 0; + const char *start = l->utf8, *end = start; l->clusters[cluster].num_glyphs++; if (backward) { for (i = l->num_glyphs - 2; i >= 0; i--) { if (hb_glyph[i].cluster != hb_glyph[i+1].cluster) { g_assert (hb_glyph[i].cluster > hb_glyph[i+1].cluster); - l->clusters[cluster].num_bytes += hb_glyph[i].cluster - hb_glyph[i+1].cluster; + if (utf8_clusters) + end = start + hb_glyph[i].cluster - hb_glyph[i+1].cluster; + else + end = g_utf8_offset_to_pointer (start, hb_glyph[i].cluster - hb_glyph[i+1].cluster); + l->clusters[cluster].num_bytes = end - start; + start = end; cluster++; } l->clusters[cluster].num_glyphs++; } - l->clusters[cluster].num_bytes += text_len - hb_glyph[0].cluster; + l->clusters[cluster].num_bytes = l->utf8 + text_len - start; } else { for (i = 1; i < (int) l->num_glyphs; i++) { if (hb_glyph[i].cluster != hb_glyph[i-1].cluster) { g_assert (hb_glyph[i].cluster > hb_glyph[i-1].cluster); - l->clusters[cluster].num_bytes += hb_glyph[i].cluster - hb_glyph[i-1].cluster; + if (utf8_clusters) + end = start + hb_glyph[i].cluster - hb_glyph[i-1].cluster; + else + end = g_utf8_offset_to_pointer (start, hb_glyph[i].cluster - hb_glyph[i-1].cluster); + l->clusters[cluster].num_bytes = end - start; + start = end; cluster++; } l->clusters[cluster].num_glyphs++; } - l->clusters[cluster].num_bytes += text_len - hb_glyph[i - 1].cluster; + l->clusters[cluster].num_bytes = l->utf8 + text_len - start; } } }
diff --git a/util/helper-cairo.hh b/util/helper-cairo.hh index bc3fe1d..2f2c9d4 100644 --- a/util/helper-cairo.hh +++ b/util/helper-cairo.hh
@@ -75,6 +75,7 @@ hb_buffer_t *buffer, const char *text, unsigned int text_len, - double scale); + double scale, + hb_bool_t utf8_clusters); #endif
diff --git a/util/options.cc b/util/options.cc index e5e76c9..e24a026 100644 --- a/util/options.cc +++ b/util/options.cc
@@ -391,6 +391,7 @@ {"direction", 0, 0, G_OPTION_ARG_STRING, &this->direction, "Set text direction (default: auto)", "ltr/rtl/ttb/btt"}, {"language", 0, 0, G_OPTION_ARG_STRING, &this->language, "Set text language (default: $LANG)", "langstr"}, {"script", 0, 0, G_OPTION_ARG_STRING, &this->script, "Set text script (default: auto)", "ISO-15924 tag"}, + {"utf8-clusters", 0, 0, G_OPTION_ARG_NONE, &this->utf8_clusters, "Use UTF-8 byte indices, not char indices", NULL}, {NULL} }; parser->add_group (entries, @@ -404,9 +405,12 @@ " Comma-separated list of font features to apply to text\n" "\n" " Features can be enabled or disabled, either globally or limited to\n" - " specific character ranges. The range indices refer to the positions\n" - " between Unicode characters. The position before the first character\n" - " is 0, and the position after the first character is 1, and so on.\n" + " specific character ranges.\n" + "\n" + " The range indices refer to the positions between Unicode characters,\n" + " unless the --utf8-clusters is provided, in which case range indices\n" + " refer to UTF-8 byte indices. The position before the first character\n" + " is always 0.\n" "\n" " The format is Python-esque. Here is how it all works:\n" "\n" @@ -716,6 +720,7 @@ void format_options_t::serialize_glyphs (hb_buffer_t *buffer, hb_font_t *font, + hb_bool_t utf8_clusters, GString *gs) { FT_Face ft_face = show_glyph_names ? hb_ft_font_get_face (font) : NULL; @@ -739,8 +744,11 @@ } else g_string_append_printf (gs, "%u", info->codepoint); - if (show_clusters) + if (show_clusters) { g_string_append_printf (gs, "=%u", info->cluster); + if (utf8_clusters) + g_string_append (gs, "u8"); + } if (show_positions && (pos->x_offset || pos->y_offset)) { g_string_append_c (gs, '@'); @@ -771,6 +779,7 @@ const char *text, unsigned int text_len, hb_font_t *font, + hb_bool_t utf8_clusters, GString *gs) { if (show_text) { @@ -790,6 +799,6 @@ } serialize_line_no (line_no, gs); - serialize_glyphs (buffer, font, gs); + serialize_glyphs (buffer, font, utf8_clusters, gs); g_string_append_c (gs, '\n'); }
diff --git a/util/options.hh b/util/options.hh index da95017..15d9402 100644 --- a/util/options.hh +++ b/util/options.hh
@@ -140,6 +140,7 @@ features = NULL; num_features = 0; shapers = NULL; + utf8_clusters = false; add_options (parser); } @@ -161,15 +162,16 @@ hb_buffer_reset (buffer); hb_buffer_add_utf8 (buffer, text, text_len, 0, text_len); - /* Reset cluster values to refer to Unicode character index - * instead of UTF-8 index. - * TODO: Add an option for this. */ - unsigned int num_glyphs = hb_buffer_get_length (buffer); - hb_glyph_info_t *info = hb_buffer_get_glyph_infos (buffer, NULL); - for (unsigned int i = 0; i < num_glyphs; i++) - { - info->cluster = i; - info++; + if (!utf8_clusters) { + /* Reset cluster values to refer to Unicode character index + * instead of UTF-8 index. */ + unsigned int num_glyphs = hb_buffer_get_length (buffer); + hb_glyph_info_t *info = hb_buffer_get_glyph_infos (buffer, NULL); + for (unsigned int i = 0; i < num_glyphs; i++) + { + info->cluster = i; + info++; + } } setup_buffer (buffer); @@ -182,6 +184,7 @@ hb_feature_t *features; unsigned int num_features; char **shapers; + hb_bool_t utf8_clusters; }; @@ -285,7 +288,8 @@ virtual void init (const font_options_t *font_opts) = 0; virtual void consume_line (hb_buffer_t *buffer, const char *text, - unsigned int text_len) = 0; + unsigned int text_len, + hb_bool_t utf8_clusters) = 0; virtual void finish (const font_options_t *font_opts) = 0; const char *output_file; @@ -319,6 +323,7 @@ GString *gs); void serialize_glyphs (hb_buffer_t *buffer, hb_font_t *font, + hb_bool_t utf8_clusters, GString *gs); void serialize_line_no (unsigned int line_no, GString *gs); @@ -327,6 +332,7 @@ const char *text, unsigned int text_len, hb_font_t *font, + hb_bool_t utf8_clusters, GString *gs);
diff --git a/util/view-cairo.cc b/util/view-cairo.cc index a03c555..5d8ead7 100644 --- a/util/view-cairo.cc +++ b/util/view-cairo.cc
@@ -36,11 +36,12 @@ void view_cairo_t::consume_line (hb_buffer_t *buffer, const char *text, - unsigned int text_len) + unsigned int text_len, + hb_bool_t utf8_clusters) { direction = hb_buffer_get_direction (buffer); helper_cairo_line_t l; - helper_cairo_line_from_buffer (&l, buffer, text, text_len, scale); + helper_cairo_line_from_buffer (&l, buffer, text, text_len, scale, utf8_clusters); g_array_append_val (lines, l); }
diff --git a/util/view-cairo.hh b/util/view-cairo.hh index 0f4fe94..eec90ea 100644 --- a/util/view-cairo.hh +++ b/util/view-cairo.hh
@@ -43,7 +43,8 @@ void init (const font_options_t *font_opts); void consume_line (hb_buffer_t *buffer, const char *text, - unsigned int text_len); + unsigned int text_len, + hb_bool_t utf8_clusters); void finish (const font_options_t *font_opts); protected: