[test] Add generated tests for emoji clusters Fixes https://github.com/harfbuzz/harfbuzz/issues/3017 Uses AdobeBlank2.ttf from: https://github.com/adobe-fonts/adobe-blank-2 instead of a dummy empty font so that everything maps to GID 1 and control code points are kept instead of being dropped because there is not space glyph (otherwise we’d need to identify control code points somehow when generating the expectations).

commit: ddf87ffb22d30d6b15083f1f6d5d4fe20417b538 [log] [tgz]
author: Khaled Hosny <khaled@aliftype.com> Thu Jul 29 01:12:46 2021 +0200
committer: Behdad Esfahbod <behdad@behdad.org> Wed Jul 28 18:25:51 2021 -0600
tree: 314fc09189c169b11e2cbacf68b100ad48111e2c
parent: f0a1892ff991581e787193fb81361d68a93933e7 [diff] [blame]
diff --git a/src/gen-emoji-table.py b/src/gen-emoji-table.py
index 1bdd402..dc69ba6 100755
--- a/src/gen-emoji-table.py
+++ b/src/gen-emoji-table.py

@@ -1,16 +1,17 @@
 #!/usr/bin/env python3
 
-"""usage: ./gen-emoji-table.py emoji-data.txt
+"""usage: ./gen-emoji-table.py emoji-data.txt emoji-test.txt
 
 Input file:
 * https://www.unicode.org/Public/UCD/latest/ucd/emoji/emoji-data.txt
+* https://www.unicode.org/Public/emoji/latest/emoji-test.txt
 """
 
 import sys
 from collections import OrderedDict
 import packTab
 
-if len (sys.argv) != 2:
+if len (sys.argv) != 3:
 	sys.exit (__doc__)
 
 f = open(sys.argv[1])
@@ -74,3 +75,37 @@
 print ("#endif /* HB_UNICODE_EMOJI_TABLE_HH */")
 print ()
 print ("/* == End of generated table == */")
+
+
+# Generate test file.
+sequences = []
+with open(sys.argv[2]) as f:
+    for line in f.readlines():
+        if "#" in line:
+            line = line[:line.index("#")]
+        if ";" in line:
+            line = line[:line.index(";")]
+        line = line.strip()
+        if not line:
+            continue
+        line = line.split(" ")
+        if len(line) == 1:
+            continue
+        sequences.append(line)
+
+# Split into number of sequences per line, too small number slows the test, and
+# too big overwhelms the test runner.
+CHUNK = 50
+with open("../test/shaping/data/in-house/tests/emoji-clusters.tests", "w") as f:
+    for i in range(0, len(sequences), CHUNK):
+        outputs = []
+        inputs = []
+        cluster = 0
+        for sequence in sequences[i:i + CHUNK]:
+            outputs.append("|".join(f"1={cluster}" for c in sequence))
+            inputs.append(",".join(sequence))
+            cluster += len(sequence)
+
+        f.write("../fonts/AdobeBlank2.ttf:--no-glyph-names --no-positions --font-funcs=ot")
+        f.write(":" + ",".join(inputs))
+        f.write(":[" + "|".join(outputs) + "]\n")
commit	ddf87ffb22d30d6b15083f1f6d5d4fe20417b538	[log] [tgz]
author	Khaled Hosny <khaled@aliftype.com>	Thu Jul 29 01:12:46 2021 +0200
committer	Behdad Esfahbod <behdad@behdad.org>	Wed Jul 28 18:25:51 2021 -0600
tree	314fc09189c169b11e2cbacf68b100ad48111e2c
parent	f0a1892ff991581e787193fb81361d68a93933e7 [diff] [blame]