New API: hb_buffer_[sg]et_replacement_codepoint()
With this change, we now by default replace broken UTF-8/16/32 bits
with U+FFFD. This can be changed by calling new API on the buffer.
Previously the replacement value used to be (hb_codepoint_t)-1.
Note that hb_buffer_clear_contents() does NOT reset the replacement
character.
See discussion here:
https://github.com/behdad/harfbuzz/commit/6f13b6d62daae4989e3cc2fe4b168e5c59650964
New API:
hb_buffer_set_replacement_codepoint()
hb_buffer_get_replacement_codepoint()
diff --git a/src/hb-buffer.cc b/src/hb-buffer.cc
index 242cded..2377ba4 100644
--- a/src/hb-buffer.cc
+++ b/src/hb-buffer.cc
@@ -178,6 +178,7 @@
hb_unicode_funcs_destroy (unicode);
unicode = hb_unicode_funcs_get_default ();
+ replacement = HB_BUFFER_REPLACEMENT_CODEPOINT_DEFAULT;
clear ();
}
@@ -703,6 +704,7 @@
const_cast<hb_unicode_funcs_t *> (&_hb_unicode_funcs_nil),
HB_SEGMENT_PROPERTIES_DEFAULT,
HB_BUFFER_FLAG_DEFAULT,
+ HB_BUFFER_REPLACEMENT_CODEPOINT_DEFAULT,
HB_BUFFER_CONTENT_TYPE_INVALID,
true, /* in_error */
@@ -1048,6 +1050,42 @@
/**
+ * hb_buffer_set_replacement_codepoint:
+ * @buffer: a buffer.
+ * @replacement:
+ *
+ *
+ *
+ * Since: 1.0
+ **/
+void
+hb_buffer_set_replacement_codepoint (hb_buffer_t *buffer,
+ hb_codepoint_t replacement)
+{
+ if (unlikely (hb_object_is_inert (buffer)))
+ return;
+
+ buffer->replacement = replacement;
+}
+
+/**
+ * hb_buffer_get_replacement_codepoint:
+ * @buffer: a buffer.
+ *
+ *
+ *
+ * Return value:
+ *
+ * Since: 1.0
+ **/
+hb_codepoint_t
+hb_buffer_get_replacement_codepoint (hb_buffer_t *buffer)
+{
+ return buffer->replacement;
+}
+
+
+/**
* hb_buffer_reset:
* @buffer: a buffer.
*
@@ -1299,6 +1337,7 @@
int item_length)
{
typedef hb_utf_t<T, true> utf_t;
+ const hb_codepoint_t replacement = buffer->replacement;
assert (buffer->content_type == HB_BUFFER_CONTENT_TYPE_UNICODE ||
(!buffer->len && buffer->content_type == HB_BUFFER_CONTENT_TYPE_INVALID));
@@ -1330,7 +1369,7 @@
while (start < prev && buffer->context_len[0] < buffer->CONTEXT_LENGTH)
{
hb_codepoint_t u;
- prev = utf_t::prev (prev, start, &u);
+ prev = utf_t::prev (prev, start, &u, replacement);
buffer->context[0][buffer->context_len[0]++] = u;
}
}
@@ -1341,7 +1380,7 @@
{
hb_codepoint_t u;
const T *old_next = next;
- next = utf_t::next (next, end, &u);
+ next = utf_t::next (next, end, &u, replacement);
buffer->add (u, old_next - (const T *) text);
}
@@ -1351,7 +1390,7 @@
while (next < end && buffer->context_len[1] < buffer->CONTEXT_LENGTH)
{
hb_codepoint_t u;
- next = utf_t::next (next, end, &u);
+ next = utf_t::next (next, end, &u, replacement);
buffer->context[1][buffer->context_len[1]++] = u;
}