[util] Make clusters work with char offset instead of UTF-8 offset
This means the --features indices also refer to char position
instead of byte position now. Same for cluster values reported
by hb-shape.
Will add an option for byte indices later.
diff --git a/util/options.hh b/util/options.hh
index 444569e..d6322cd 100644
--- a/util/options.hh
+++ b/util/options.hh
@@ -153,6 +153,18 @@
hb_font_t *font, hb_buffer_t *buffer) {
hb_buffer_reset (buffer);
hb_buffer_add_utf8 (buffer, text, text_len, 0, text_len);
+
+ /* Reset cluster values to refer to Unicode character index
+ * instead of UTF-8 index.
+ * TODO: Add an option for this. */
+ unsigned int num_glyphs = hb_buffer_get_length (buffer);
+ hb_glyph_info_t *info = hb_buffer_get_glyph_infos (buffer, NULL);
+ for (unsigned int i = 0; i < num_glyphs; i++)
+ {
+ info->cluster = i;
+ info++;
+ }
+
setup_buffer (buffer);
return hb_shape_full (font, buffer, features, num_features, NULL, shapers);
}