[use] Prohibit visually ambiguous vowel sequences
diff --git a/src/Makefile.am b/src/Makefile.am
--- a/src/Makefile.am
+++ b/src/Makefile.am
@@ -295,6 +295,7 @@
gen-os2-unicode-ranges.py \
gen-tag-table.py \
gen-use-table.py \
+ gen-vowel-constraints.py \
@@ -316,13 +317,17 @@
$(AM_V_GEN) $(builddir)/$^ > $(srcdir)/hb-ot-shape-complex-use-table.cc \
|| ($(RM) $(srcdir)/hb-ot-shape-complex-use-table.cc; false)
+vowel-constraints: gen-vowel-constraints.py use Scripts.txt
+ $(AM_V_GEN) $(builddir)/$^ > $(srcdir)/hb-ot-shape-complex-vowel-constraints.hh \
+ || ($(RM) $(srcdir)/hb-ot-shape-complex-vowel-constraints.hh; false)
emoji-table: gen-emoji-table.py emoji-data.txt
$(AM_V_GEN) $(builddir)/$^ > $(srcdir)/hb-unicode-emoji-table.hh \
|| ($(RM) $(srcdir)/hb-unicode-emoji-table.hh; false)
built-sources: $(BUILT_SOURCES)
-.PHONY: unicode-tables arabic-table indic-table tag-table use-table emoji-table built-sources
+.PHONY: unicode-tables arabic-table indic-table tag-table use-table vowel-constraints emoji-table built-sources
$(patsubst %,$(srcdir)/%,$(HB_BASE_RAGEL_GENERATED_sources)) \
diff --git a/src/Makefile.sources b/src/Makefile.sources
--- a/src/Makefile.sources
+++ b/src/Makefile.sources
@@ -142,6 +142,7 @@
hb-ot-shape-complex-use.cc \
hb-ot-shape-complex-use.hh \
hb-ot-shape-complex-use-table.cc \
+ hb-ot-shape-complex-vowel-constraints.hh \
hb-ot-shape-complex.hh \
hb-ot-shape-normalize.hh \
hb-ot-shape-normalize.cc \
diff --git a/src/gen-vowel-constraints.py b/src/gen-vowel-constraints.py
new file mode 100755
--- /dev/null
+++ b/src/gen-vowel-constraints.py
@@ -0,0 +1,286 @@
+"""Generator of the function to prohibit certain vowel sequences.
+It creates ``preprocess_text_vowel_constraints``, which inserts dotted
+circles into sequences prohibited by the USE script development spec.
+This function should be used as the ``preprocess_text`` of an
+It also creates the helper function ``_output_with_dotted_circle``.
+from __future__ import absolute_import, division, print_function, unicode_literals
+import collections
+ from HTMLParser import HTMLParser
+ def write (s):
+ print (s.encode ('utf-8'), end='')
+except ImportError:
+ from html.parser import HTMLParser
+ def write (s):
+ sys.stdout.flush ()
+ sys.stdout.buffer.write (s.encode ('utf-8'))
+import itertools
+import io
+import sys
+if len (sys.argv) != 3:
+ print ('usage: ./gen-vowel-constraints.py use Scripts.txt', file=sys.stderr)
+ sys.exit (1)
+ from html import unescape
+ def html_unescape (parser, entity):
+ return unescape (entity)
+except ImportError:
+ def html_unescape (parser, entity):
+ return parser.unescape (entity)
+def expect (condition, message=None):
+ if not condition:
+ if message is None:
+ raise AssertionError
+ raise AssertionError (message)
+with io.open (sys.argv[2], encoding='utf-8') as f:
+ scripts_header = [f.readline () for i in range (2)]
+ scripts = {}
+ script_order = {}
+ for line in f:
+ j = line.find ('#')
+ if j >= 0:
+ line = line[:j]
+ fields = [x.strip () for x in line.split (';')]
+ if len (fields) == 1:
+ continue
+ uu = fields[0].split ('..')
+ start = int (uu[0], 16)
+ if len (uu) == 1:
+ end = start
+ else:
+ end = int (uu[1], 16)
+ script = fields[1]
+ for u in range (start, end + 1):
+ scripts[u] = script
+ if script not in script_order:
+ script_order[script] = start
+class ConstraintSet (object):
+ """A set of prohibited code point sequences.
+ Args:
+ constraint (List[int]): A prohibited code point sequence.
+ """
+ def __init__ (self, constraint):
+ # Either a list or a dictionary. As a list of code points, it
+ # represents a prohibited code point sequence. As a dictionary,
+ # it represents a set of prohibited sequences, where each item
+ # represents the set of prohibited sequences starting with the
+ # key (a code point) concatenated with any of the values
+ # (ConstraintSets).
+ self._c = constraint
+ def add (self, constraint):
+ """Add a constraint to this set."""
+ if not constraint:
+ return
+ first = constraint[0]
+ rest = constraint[1:]
+ if isinstance (self._c, list):
+ if constraint == self._c[:len (constraint)]:
+ self._c = constraint
+ elif self._c != constraint[:len (self._c)]:
+ self._c = {self._c[0]: ConstraintSet (self._c[1:])}
+ if isinstance (self._c, dict):
+ if first in self._c:
+ self._c[first].add (rest)
+ else:
+ self._c[first] = ConstraintSet (rest)
+ def _indent (self, depth):
+ return (' ' * depth).replace (' ', '\t')
+ def __str__ (self, index=0, depth=4):
+ s = []
+ indent = self._indent (depth)
+ if isinstance (self._c, list):
+ if len (self._c) == 0:
+ s.append ('{}matched = true;\n'.format (indent))
+ elif len (self._c) == 1:
+ s.append ('{}matched = 0x{:04X}u == buffer->cur ({}).codepoint;\n'.format (indent, next (iter (self._c)), index or ''))
+ else:
+ s.append ('{}if (0x{:04X}u == buffer->cur ({}).codepoint &&\n'.format (indent, self._c[0], index))
+ s.append ('{}buffer->idx + {} < count &&\n'.format (self._indent (depth + 2), len (self._c)))
+ for i, cp in enumerate (self._c[1:], start=1):
+ s.append ('{}0x{:04X}u == buffer->cur ({}).codepoint{}\n'.format (
+ self._indent (depth + 2), cp, index + i, ')' if i == len (self._c) - 1 else ' &&'))
+ s.append ('{}{{\n'.format (indent))
+ for i in range (len (self._c)):
+ s.append ('{}buffer->next_glyph ();\n'.format (self._indent (depth + 1)))
+ s.append ('{}buffer->output_glyph (0x25CCu);\n'.format (self._indent (depth + 1)))
+ s.append ('{}}}\n'.format (indent))
+ else:
+ s.append ('{}switch (buffer->cur ({}).codepoint)\n'.format(indent, index or ''))
+ s.append ('{}{{\n'.format (indent))
+ cases = collections.defaultdict (set)
+ for first, rest in sorted (self._c.items ()):
+ cases[rest.__str__ (index + 1, depth + 2)].add (first)
+ for body, labels in sorted (cases.items (), key=lambda b_ls: sorted (b_ls[1])[0]):
+ for i, cp in enumerate (sorted (labels)):
+ if i % 4 == 0:
+ s.append (self._indent (depth + 1))
+ else:
+ s.append (' ')
+ s.append ('case 0x{:04X}u:{}'.format (cp, '\n' if i % 4 == 3 else ''))
+ if len (labels) % 4 != 0:
+ s.append ('\n')
+ s.append (body)
+ s.append ('{}break;\n'.format (self._indent (depth + 2)))
+ s.append ('{}}}\n'.format (indent))
+ return ''.join (s)
+class USESpecParser (HTMLParser):
+ """A parser for the USE script development spec.
+ Attributes:
+ header (str): The ``updated_at`` timestamp of the spec.
+ constraints (Mapping[str, ConstraintSet]): A map of script names
+ to the scripts' prohibited sequences.
+ """
+ def __init__ (self):
+ HTMLParser.__init__ (self)
+ self.header = ''
+ self.constraints = {}
+ # Whether the next <code> contains the vowel constraints.
+ self._primed = False
+ # Whether the parser is in the <code> element with the constraints.
+ self._in_constraints = False
+ # The text of the constraints.
+ self._constraints = ''
+ def handle_starttag (self, tag, attrs):
+ if tag == 'meta':
+ for attr, value in attrs:
+ if attr == 'name' and value == 'updated_at':
+ self.header = self.get_starttag_text ()
+ break
+ elif tag == 'a':
+ for attr, value in attrs:
+ if attr == 'id' and value == 'ivdvconstraints':
+ self._primed = True
+ break
+ elif self._primed and tag == 'code':
+ self._primed = False
+ self._in_constraints = True
+ def handle_endtag (self, tag):
+ self._in_constraints = False
+ def handle_data (self, data):
+ if self._in_constraints:
+ self._constraints += data
+ def handle_charref (self, name):
+ self.handle_data (html_unescape (self, '&#%s;' % name))
+ def handle_entityref (self, name):
+ self.handle_data (html_unescape (self, '&%s;' % name))
+ def parse (self, filename):
+ """Parse the USE script development spec.
+ Args:
+ filename (str): The file name of the spec.
+ """
+ with io.open (filename, encoding='utf-8') as f:
+ self.feed (f.read ())
+ expect (self.header, 'No header found')
+ for line in self._constraints.splitlines ():
+ constraint = [int (cp, 16) for cp in line.split (';')[0].strip ().split (' ')]
+ expect (2 <= len (constraint), 'Prohibited sequence is too short: {}'.format (constraint))
+ script = scripts[constraint[0]]
+ if script in self.constraints:
+ self.constraints[script].add (constraint)
+ else:
+ self.constraints[script] = ConstraintSet (constraint)
+ expect (self.constraints, 'No constraints found')
+use_parser = USESpecParser ()
+use_parser.parse (sys.argv[1])
+print ('/* == Start of generated functions == */')
+print ('/*')
+print (' * The following functions are generated by running:')
+print (' *')
+print (' * %s use Scripts.txt' % sys.argv[0])
+print (' *')
+print (' * on files with these headers:')
+print (' *')
+print (' * %s' % use_parser.header.strip ())
+for line in scripts_header:
+ print (' * %s' % line.strip ())
+print (' */')
+print ()
+print ()
+print ('static void')
+print ('_output_with_dotted_circle (hb_buffer_t *buffer)')
+print ('{')
+print (' hb_glyph_info_t &dottedcircle = buffer->output_glyph (0x25CCu);')
+print (' _hb_glyph_info_reset_continuation (&dottedcircle);')
+print ()
+print (' buffer->next_glyph ();')
+print ('}')
+print ()
+print ('static void')
+print ('preprocess_text_vowel_constraints (const hb_ot_shape_plan_t *plan,')
+print ('\t\t\t\t hb_buffer_t *buffer,')
+print ('\t\t\t\t hb_font_t *font)')
+print ('{')
+print (' /* UGLY UGLY UGLY business of adding dotted-circle in the middle of')
+print (' * vowel-sequences that look like another vowel. Data for each script')
+print (' * collected from the USE script development spec.')
+print (' *')
+print (' * https://github.com/harfbuzz/harfbuzz/issues/1019')
+print (' */')
+print (' bool processed = false;')
+print (' buffer->clear_output ();')
+print (' unsigned int count = buffer->len;')
+print (' switch ((unsigned) buffer->props.script)')
+print (' {')
+for script, constraints in sorted (use_parser.constraints.items (), key=lambda s_c: script_order[s_c[0]]):
+ print (' case HB_SCRIPT_{}:'.format (script.upper ()))
+ print (' for (buffer->idx = 0; buffer->idx + 1 < count && buffer->successful;)')
+ print (' {')
+ print ('\tbool matched = false;')
+ write (str (constraints))
+ print ('\tbuffer->next_glyph ();')
+ print ('\tif (matched) _output_with_dotted_circle (buffer);')
+ print (' }')
+ print (' processed = true;')
+ print (' break;')
+ print ()
+print (' default:')
+print (' break;')
+print (' }')
+print (' if (processed)')
+print (' {')
+print (' if (buffer->idx < count)')
+print (' buffer->next_glyph ();')
+print (' if (likely (buffer->successful))')
+print (' buffer->swap_buffers ();')
+print (' }')
+print ('}')
+print ()
+print ()
+print ('/* == End of generated functions == */')
diff --git a/src/hb-ot-shape-complex-indic.cc b/src/hb-ot-shape-complex-indic.cc
--- a/src/hb-ot-shape-complex-indic.cc
+++ b/src/hb-ot-shape-complex-indic.cc
@@ -25,6 +25,7 @@
#include "hb-ot-shape-complex-indic.hh"
+#include "hb-ot-shape-complex-vowel-constraints.hh"
#include "hb-ot-layout.hh"
@@ -331,275 +332,6 @@
free (data);
-static void
-_output_with_dotted_circle (hb_buffer_t *buffer)
- hb_glyph_info_t &dottedcircle = buffer->output_glyph (0x25CCu);
- _hb_glyph_info_reset_continuation (&dottedcircle);
- buffer->next_glyph ();
-static void
-preprocess_text_indic (const hb_ot_shape_plan_t *plan,
- hb_buffer_t *buffer,
- hb_font_t *font)
- /* UGLY UGLY UGLY business of adding dotted-circle in the middle of
- * vowel-sequences that look like another vowel. Data for each script
- * collected from Unicode 11 book, tables named "Vowel Letters" with
- * "Use" and "Do Not Use" columns.
- *
- * https://github.com/harfbuzz/harfbuzz/issues/1019
- */
- bool processed = false;
- buffer->clear_output ();
- unsigned int count = buffer->len;
- switch ((unsigned) buffer->props.script)
- {
- for (buffer->idx = 0; buffer->idx + 1 < count && buffer->successful;)
- {
- bool matched = false;
- switch (buffer->cur().codepoint)
- {
- case 0x0905u:
- switch (buffer->cur(1).codepoint)
- {
- case 0x093Au: case 0x093Bu: case 0x093Eu: case 0x0945u:
- case 0x0946u: case 0x0949u: case 0x094Au: case 0x094Bu:
- case 0x094Cu: case 0x094Fu: case 0x0956u: case 0x0957u:
- matched = true;
- break;
- }
- break;
- case 0x0906u:
- switch (buffer->cur(1).codepoint)
- {
- case 0x093Au: case 0x0945u: case 0x0946u: case 0x0947u:
- case 0x0948u:
- matched = true;
- break;
- }
- break;
- case 0x0909u:
- switch (buffer->cur(1).codepoint)
- {
- case 0x0941u:
- matched = true;
- break;
- }
- break;
- case 0x090Fu:
- switch (buffer->cur(1).codepoint)
- {
- case 0x0945u: case 0x0946u: case 0x0947u:
- matched = true;
- break;
- }
- break;
- case 0x0930u:
- if (0x094Du == buffer->cur(1).codepoint &&
- buffer->idx + 2 < count &&
- 0x0907u == buffer->cur(2).codepoint)
- {
- buffer->next_glyph ();
- buffer->next_glyph ();
- buffer->output_glyph (0x25CCu);
- }
- break;
- }
- buffer->next_glyph ();
- if (matched) _output_with_dotted_circle (buffer);
- }
- processed = true;
- break;
- for (buffer->idx = 0; buffer->idx + 1 < count && buffer->successful;)
- {
- bool matched = false;
- switch (buffer->cur().codepoint)
- {
- case 0x0985u:
- matched = 0x09BE == buffer->cur(1).codepoint;
- break;
- case 0x098Bu:
- matched = 0x09C3 == buffer->cur(1).codepoint;
- break;
- case 0x098Cu:
- matched = 0x09E2 == buffer->cur(1).codepoint;
- break;
- }
- buffer->next_glyph ();
- if (matched) _output_with_dotted_circle (buffer);
- }
- processed = true;
- break;
- for (buffer->idx = 0; buffer->idx + 1 < count && buffer->successful;)
- {
- bool matched = false;
- switch (buffer->cur().codepoint)
- {
- case 0x0A05u:
- switch (buffer->cur(1).codepoint)
- {
- case 0x0A3Eu: case 0x0A48u: case 0x0A4Cu:
- matched = true;
- break;
- }
- break;
- case 0x0A72u:
- switch (buffer->cur(1).codepoint)
- {
- case 0x0A3Fu: case 0x0A40u: case 0x0A47u:
- matched = true;
- break;
- }
- break;
- case 0x0A73u:
- switch (buffer->cur(1).codepoint)
- {
- case 0x0A41u: case 0x0A42u: case 0x0A4Bu:
- matched = true;
- break;
- }
- break;
- }
- buffer->next_glyph ();
- if (matched) _output_with_dotted_circle (buffer);
- }
- processed = true;
- break;
- for (buffer->idx = 0; buffer->idx + 1 < count && buffer->successful;)
- {
- bool matched = false;
- switch (buffer->cur().codepoint)
- {
- case 0x0A85u:
- switch (buffer->cur(1).codepoint)
- {
- case 0x0ABEu: case 0x0AC5u: case 0x0AC7u: case 0x0AC8u:
- case 0x0AC9u: case 0x0ACBu: case 0x0ACCu:
- matched = true;
- break;
- }
- break;
- case 0x0AC5u:
- matched = 0x0ABE == buffer->cur(1).codepoint;
- break;
- }
- buffer->next_glyph ();
- if (matched) _output_with_dotted_circle (buffer);
- }
- processed = true;
- break;
- for (buffer->idx = 0; buffer->idx + 1 < count && buffer->successful;)
- {
- bool matched = false;
- switch (buffer->cur().codepoint)
- {
- case 0x0B05u:
- matched = 0x0B3E == buffer->cur(1).codepoint;
- break;
- case 0x0B0Fu: case 0x0B13u:
- matched = 0x0B57 == buffer->cur(1).codepoint;
- break;
- }
- buffer->next_glyph ();
- if (matched) _output_with_dotted_circle (buffer);
- }
- processed = true;
- break;
- for (buffer->idx = 0; buffer->idx + 1 < count && buffer->successful;)
- {
- bool matched = false;
- switch (buffer->cur().codepoint)
- {
- case 0x0C12u:
- switch (buffer->cur(1).codepoint)
- {
- case 0x0C4Cu: case 0x0C55u:
- matched = true;
- break;
- }
- break;
- case 0x0C3Fu: case 0x0C46u: case 0xC4Au:
- matched = 0x0C55 == buffer->cur(1).codepoint;
- break;
- }
- buffer->next_glyph ();
- if (matched) _output_with_dotted_circle (buffer);
- }
- processed = true;
- break;
- for (buffer->idx = 0; buffer->idx + 1 < count && buffer->successful;)
- {
- bool matched = false;
- switch (buffer->cur().codepoint)
- {
- case 0x0C89u: case 0x0C8Bu:
- matched = 0x0CBE == buffer->cur(1).codepoint;
- break;
- case 0x0C92u:
- matched = 0x0CCC == buffer->cur(1).codepoint;
- break;
- }
- buffer->next_glyph ();
- if (matched) _output_with_dotted_circle (buffer);
- }
- processed = true;
- break;
- for (buffer->idx = 0; buffer->idx + 1 < count && buffer->successful;)
- {
- bool matched = false;
- switch (buffer->cur().codepoint)
- {
- case 0x0D07u: case 0x0D09u:
- matched = 0x0D57 == buffer->cur(1).codepoint;
- break;
- case 0x0D0Eu:
- matched = 0x0D46 == buffer->cur(1).codepoint;
- break;
- case 0x0D12u:
- switch (buffer->cur(1).codepoint)
- {
- case 0x0D3Eu: case 0x0D57u:
- matched = true;
- break;
- }
- break;
- }
- buffer->next_glyph ();
- if (matched) _output_with_dotted_circle (buffer);
- }
- processed = true;
- break;
- default:
- break;
- }
- if (processed)
- {
- if (buffer->idx < count)
- buffer->next_glyph ();
- if (likely (buffer->successful))
- buffer->swap_buffers ();
- }
static indic_position_t
consonant_position_from_face (const indic_shape_plan_t *indic_plan,
const hb_codepoint_t consonant,
@@ -1884,7 +1616,7 @@
- preprocess_text_indic,
+ preprocess_text_vowel_constraints,
nullptr, /* postprocess_glyphs */
diff --git a/src/hb-ot-shape-complex-use.cc b/src/hb-ot-shape-complex-use.cc
index f9a580c..8c44fe0 100644
--- a/src/hb-ot-shape-complex-use.cc
+++ b/src/hb-ot-shape-complex-use.cc
@@ -28,6 +28,7 @@
#include "hb-ot-shape-complex-use.hh"
#include "hb-ot-shape-complex-arabic.hh"
+#include "hb-ot-shape-complex-vowel-constraints.hh"
/* buffer var allocations */
#define use_category() complex_var_u8_0()
@@ -591,7 +592,7 @@
nullptr, /* override_features */
- nullptr, /* preprocess_text */
+ preprocess_text_vowel_constraints,
nullptr, /* postprocess_glyphs */
nullptr, /* decompose */
diff --git a/src/hb-ot-shape-complex-vowel-constraints.hh b/src/hb-ot-shape-complex-vowel-constraints.hh
new file mode 100644
--- /dev/null
+++ b/src/hb-ot-shape-complex-vowel-constraints.hh
@@ -0,0 +1,434 @@
+/* == Start of generated functions == */
+ * The following functions are generated by running:
+ *
+ * ./gen-vowel-constraints.py use Scripts.txt
+ *
+ * on files with these headers:
+ *
+ * <meta name="updated_at" content="2018-03-27 12:21 AM" />
+ * # Scripts-11.0.0.txt
+ * # Date: 2018-02-21, 05:34:31 GMT
+ */
+static void
+_output_with_dotted_circle (hb_buffer_t *buffer)
+ hb_glyph_info_t &dottedcircle = buffer->output_glyph (0x25CCu);
+ _hb_glyph_info_reset_continuation (&dottedcircle);
+ buffer->next_glyph ();
+static void
+preprocess_text_vowel_constraints (const hb_ot_shape_plan_t *plan,
+ hb_buffer_t *buffer,
+ hb_font_t *font)
+ /* UGLY UGLY UGLY business of adding dotted-circle in the middle of
+ * vowel-sequences that look like another vowel. Data for each script
+ * collected from the USE script development spec.
+ *
+ * https://github.com/harfbuzz/harfbuzz/issues/1019
+ */
+ bool processed = false;
+ buffer->clear_output ();
+ unsigned int count = buffer->len;
+ switch ((unsigned) buffer->props.script)
+ {
+ for (buffer->idx = 0; buffer->idx + 1 < count && buffer->successful;)
+ {
+ bool matched = false;
+ switch (buffer->cur ().codepoint)
+ {
+ case 0x0905u:
+ switch (buffer->cur (1).codepoint)
+ {
+ case 0x093Au: case 0x093Bu: case 0x093Eu: case 0x0945u:
+ case 0x0946u: case 0x0949u: case 0x094Au: case 0x094Bu:
+ case 0x094Cu: case 0x094Fu: case 0x0956u: case 0x0957u:
+ matched = true;
+ break;
+ }
+ break;
+ case 0x0906u:
+ switch (buffer->cur (1).codepoint)
+ {
+ case 0x093Au: case 0x0945u: case 0x0946u: case 0x0947u:
+ case 0x0948u:
+ matched = true;
+ break;
+ }
+ break;
+ case 0x0909u:
+ matched = 0x0941u == buffer->cur (1).codepoint;
+ break;
+ case 0x090Fu:
+ switch (buffer->cur (1).codepoint)
+ {
+ case 0x0945u: case 0x0946u: case 0x0947u:
+ matched = true;
+ break;
+ }
+ break;
+ case 0x0930u:
+ if (0x094Du == buffer->cur (1).codepoint &&
+ buffer->idx + 2 < count &&
+ 0x0907u == buffer->cur (2).codepoint)
+ {
+ buffer->next_glyph ();
+ buffer->next_glyph ();
+ buffer->output_glyph (0x25CCu);
+ }
+ break;
+ }
+ buffer->next_glyph ();
+ if (matched) _output_with_dotted_circle (buffer);
+ }
+ processed = true;
+ break;
+ for (buffer->idx = 0; buffer->idx + 1 < count && buffer->successful;)
+ {
+ bool matched = false;
+ switch (buffer->cur ().codepoint)
+ {
+ case 0x0985u:
+ matched = 0x09BEu == buffer->cur (1).codepoint;
+ break;
+ case 0x098Bu:
+ matched = 0x09C3u == buffer->cur (1).codepoint;
+ break;
+ case 0x098Cu:
+ matched = 0x09E2u == buffer->cur (1).codepoint;
+ break;
+ }
+ buffer->next_glyph ();
+ if (matched) _output_with_dotted_circle (buffer);
+ }
+ processed = true;
+ break;
+ for (buffer->idx = 0; buffer->idx + 1 < count && buffer->successful;)
+ {
+ bool matched = false;
+ switch (buffer->cur ().codepoint)
+ {
+ case 0x0A05u:
+ switch (buffer->cur (1).codepoint)
+ {
+ case 0x0A3Eu: case 0x0A48u: case 0x0A4Cu:
+ matched = true;
+ break;
+ }
+ break;
+ case 0x0A72u:
+ switch (buffer->cur (1).codepoint)
+ {
+ case 0x0A3Fu: case 0x0A40u: case 0x0A47u:
+ matched = true;
+ break;
+ }
+ break;
+ case 0x0A73u:
+ switch (buffer->cur (1).codepoint)
+ {
+ case 0x0A41u: case 0x0A42u: case 0x0A4Bu:
+ matched = true;
+ break;
+ }
+ break;
+ }
+ buffer->next_glyph ();
+ if (matched) _output_with_dotted_circle (buffer);
+ }
+ processed = true;
+ break;
+ for (buffer->idx = 0; buffer->idx + 1 < count && buffer->successful;)
+ {
+ bool matched = false;
+ switch (buffer->cur ().codepoint)
+ {
+ case 0x0A85u:
+ switch (buffer->cur (1).codepoint)
+ {
+ case 0x0ABEu: case 0x0AC5u: case 0x0AC7u: case 0x0AC8u:
+ case 0x0AC9u: case 0x0ACBu: case 0x0ACCu:
+ matched = true;
+ break;
+ }
+ break;
+ case 0x0AC5u:
+ matched = 0x0ABEu == buffer->cur (1).codepoint;
+ break;
+ }
+ buffer->next_glyph ();
+ if (matched) _output_with_dotted_circle (buffer);
+ }
+ processed = true;
+ break;
+ for (buffer->idx = 0; buffer->idx + 1 < count && buffer->successful;)
+ {
+ bool matched = false;
+ switch (buffer->cur ().codepoint)
+ {
+ case 0x0B05u:
+ matched = 0x0B3Eu == buffer->cur (1).codepoint;
+ break;
+ case 0x0B0Fu: case 0x0B13u:
+ matched = 0x0B57u == buffer->cur (1).codepoint;
+ break;
+ }
+ buffer->next_glyph ();
+ if (matched) _output_with_dotted_circle (buffer);
+ }
+ processed = true;
+ break;
+ for (buffer->idx = 0; buffer->idx + 1 < count && buffer->successful;)
+ {
+ bool matched = false;
+ switch (buffer->cur ().codepoint)
+ {
+ case 0x0C12u:
+ switch (buffer->cur (1).codepoint)
+ {
+ case 0x0C4Cu: case 0x0C55u:
+ matched = true;
+ break;
+ }
+ break;
+ case 0x0C3Fu: case 0x0C46u: case 0x0C4Au:
+ matched = 0x0C55u == buffer->cur (1).codepoint;
+ break;
+ }
+ buffer->next_glyph ();
+ if (matched) _output_with_dotted_circle (buffer);
+ }
+ processed = true;
+ break;
+ for (buffer->idx = 0; buffer->idx + 1 < count && buffer->successful;)
+ {
+ bool matched = false;
+ switch (buffer->cur ().codepoint)
+ {
+ case 0x0C89u: case 0x0C8Bu:
+ matched = 0x0CBEu == buffer->cur (1).codepoint;
+ break;
+ case 0x0C92u:
+ matched = 0x0CCCu == buffer->cur (1).codepoint;
+ break;
+ }
+ buffer->next_glyph ();
+ if (matched) _output_with_dotted_circle (buffer);
+ }
+ processed = true;
+ break;
+ for (buffer->idx = 0; buffer->idx + 1 < count && buffer->successful;)
+ {
+ bool matched = false;
+ switch (buffer->cur ().codepoint)
+ {
+ case 0x0D07u: case 0x0D09u:
+ matched = 0x0D57u == buffer->cur (1).codepoint;
+ break;
+ case 0x0D0Eu:
+ matched = 0x0D46u == buffer->cur (1).codepoint;
+ break;
+ case 0x0D12u:
+ switch (buffer->cur (1).codepoint)
+ {
+ case 0x0D3Eu: case 0x0D57u:
+ matched = true;
+ break;
+ }
+ break;
+ }
+ buffer->next_glyph ();
+ if (matched) _output_with_dotted_circle (buffer);
+ }
+ processed = true;
+ break;
+ for (buffer->idx = 0; buffer->idx + 1 < count && buffer->successful;)
+ {
+ bool matched = false;
+ switch (buffer->cur ().codepoint)
+ {
+ case 0x0D85u:
+ switch (buffer->cur (1).codepoint)
+ {
+ case 0x0DCFu: case 0x0DD0u: case 0x0DD1u:
+ matched = true;
+ break;
+ }
+ break;
+ case 0x0D8Bu: case 0x0D8Fu: case 0x0D94u:
+ matched = 0x0DDFu == buffer->cur (1).codepoint;
+ break;
+ case 0x0D8Du:
+ matched = 0x0DD8u == buffer->cur (1).codepoint;
+ break;
+ case 0x0D91u:
+ switch (buffer->cur (1).codepoint)
+ {
+ case 0x0DCAu: case 0x0DD9u: case 0x0DDAu: case 0x0DDCu:
+ case 0x0DDDu:
+ matched = true;
+ break;
+ }
+ break;
+ }
+ buffer->next_glyph ();
+ if (matched) _output_with_dotted_circle (buffer);
+ }
+ processed = true;
+ break;
+ for (buffer->idx = 0; buffer->idx + 1 < count && buffer->successful;)
+ {
+ bool matched = false;
+ switch (buffer->cur ().codepoint)
+ {
+ case 0x11005u:
+ matched = 0x11038u == buffer->cur (1).codepoint;
+ break;
+ case 0x1100Bu:
+ matched = 0x1103Eu == buffer->cur (1).codepoint;
+ break;
+ case 0x1100Fu:
+ matched = 0x11042u == buffer->cur (1).codepoint;
+ break;
+ }
+ buffer->next_glyph ();
+ if (matched) _output_with_dotted_circle (buffer);
+ }
+ processed = true;
+ break;
+ for (buffer->idx = 0; buffer->idx + 1 < count && buffer->successful;)
+ {
+ bool matched = false;
+ switch (buffer->cur ().codepoint)
+ {
+ case 0x112B0u:
+ switch (buffer->cur (1).codepoint)
+ {
+ case 0x112E0u: case 0x112E5u: case 0x112E6u: case 0x112E7u:
+ case 0x112E8u:
+ matched = true;
+ break;
+ }
+ break;
+ }
+ buffer->next_glyph ();
+ if (matched) _output_with_dotted_circle (buffer);
+ }
+ processed = true;
+ break;
+ for (buffer->idx = 0; buffer->idx + 1 < count && buffer->successful;)
+ {
+ bool matched = false;
+ switch (buffer->cur ().codepoint)
+ {
+ case 0x11481u:
+ matched = 0x114B0u == buffer->cur (1).codepoint;
+ break;
+ case 0x1148Bu: case 0x1148Du:
+ matched = 0x114BAu == buffer->cur (1).codepoint;
+ break;
+ case 0x114AAu:
+ switch (buffer->cur (1).codepoint)
+ {
+ case 0x114B5u: case 0x114B6u:
+ matched = true;
+ break;
+ }
+ break;
+ }
+ buffer->next_glyph ();
+ if (matched) _output_with_dotted_circle (buffer);
+ }
+ processed = true;
+ break;
+ for (buffer->idx = 0; buffer->idx + 1 < count && buffer->successful;)
+ {
+ bool matched = false;
+ switch (buffer->cur ().codepoint)
+ {
+ case 0x11600u: case 0x11601u:
+ switch (buffer->cur (1).codepoint)
+ {
+ case 0x11639u: case 0x1163Au:
+ matched = true;
+ break;
+ }
+ break;
+ }
+ buffer->next_glyph ();
+ if (matched) _output_with_dotted_circle (buffer);
+ }
+ processed = true;
+ break;
+ for (buffer->idx = 0; buffer->idx + 1 < count && buffer->successful;)
+ {
+ bool matched = false;
+ switch (buffer->cur ().codepoint)
+ {
+ case 0x11680u:
+ switch (buffer->cur (1).codepoint)
+ {
+ case 0x116ADu: case 0x116B4u: case 0x116B5u:
+ matched = true;
+ break;
+ }
+ break;
+ case 0x11686u:
+ matched = 0x116B2u == buffer->cur (1).codepoint;
+ break;
+ }
+ buffer->next_glyph ();
+ if (matched) _output_with_dotted_circle (buffer);
+ }
+ processed = true;
+ break;
+ default:
+ break;
+ }
+ if (processed)
+ {
+ if (buffer->idx < count)
+ buffer->next_glyph ();
+ if (likely (buffer->successful))
+ buffer->swap_buffers ();
+ }
+/* == End of generated functions == */
diff --git a/test/shaping/README.md b/test/shaping/README.md
--- a/test/shaping/README.md
+++ b/test/shaping/README.md
@@ -25,10 +25,10 @@
* If the outputs differ, recording fails. Otherwise, it will move the
subset font file into `data/in-house/fonts` and name it after its
hash, and print out the test case input, which you can then redirect
- to an existing or new test file in `data/in-house/tests` using `-o=`,
+ to an existing or new test file in `data/in-house/tests` using `-o`,
-$ ./hb-unicode-encode 41 42 43 627 | ./record-test.sh -o=data/in-house/tests/test-name.test ../../util/hb-shape font.ttf
+$ ./hb-unicode-encode 41 42 43 627 | ./record-test.sh -o data/in-house/tests/test-name.test ../../util/hb-shape font.ttf
If you created a new test file, add it to `data/in-house/Makefile.sources`
diff --git a/test/shaping/data/in-house/fonts/46669c8860cbfea13562a6ca0d83130ee571137b.ttf b/test/shaping/data/in-house/fonts/46669c8860cbfea13562a6ca0d83130ee571137b.ttf
new file mode 100644
--- /dev/null
+++ b/test/shaping/data/in-house/fonts/46669c8860cbfea13562a6ca0d83130ee571137b.ttf
diff --git a/test/shaping/data/in-house/tests/use-vowel-letter-spoofing.tests b/test/shaping/data/in-house/tests/use-vowel-letter-spoofing.tests
new file mode 100644
--- /dev/null
+++ b/test/shaping/data/in-house/tests/use-vowel-letter-spoofing.tests
@@ -0,0 +1,94 @@