[test] Add generated tests for emoji clusters

Fixes https://github.com/harfbuzz/harfbuzz/issues/3017

Uses AdobeBlank2.ttf from:

  https://github.com/adobe-fonts/adobe-blank-2

instead of a dummy empty font so that everything maps to GID 1 and
control code points are kept instead of being dropped because there is
not space glyph (otherwise we’d need to identify control code points
somehow when generating the expectations).
diff --git a/src/gen-emoji-table.py b/src/gen-emoji-table.py
index 1bdd402..dc69ba6 100755
--- a/src/gen-emoji-table.py
+++ b/src/gen-emoji-table.py
@@ -1,16 +1,17 @@
 #!/usr/bin/env python3
 
-"""usage: ./gen-emoji-table.py emoji-data.txt
+"""usage: ./gen-emoji-table.py emoji-data.txt emoji-test.txt
 
 Input file:
 * https://www.unicode.org/Public/UCD/latest/ucd/emoji/emoji-data.txt
+* https://www.unicode.org/Public/emoji/latest/emoji-test.txt
 """
 
 import sys
 from collections import OrderedDict
 import packTab
 
-if len (sys.argv) != 2:
+if len (sys.argv) != 3:
 	sys.exit (__doc__)
 
 f = open(sys.argv[1])
@@ -74,3 +75,37 @@
 print ("#endif /* HB_UNICODE_EMOJI_TABLE_HH */")
 print ()
 print ("/* == End of generated table == */")
+
+
+# Generate test file.
+sequences = []
+with open(sys.argv[2]) as f:
+    for line in f.readlines():
+        if "#" in line:
+            line = line[:line.index("#")]
+        if ";" in line:
+            line = line[:line.index(";")]
+        line = line.strip()
+        if not line:
+            continue
+        line = line.split(" ")
+        if len(line) == 1:
+            continue
+        sequences.append(line)
+
+# Split into number of sequences per line, too small number slows the test, and
+# too big overwhelms the test runner.
+CHUNK = 50
+with open("../test/shaping/data/in-house/tests/emoji-clusters.tests", "w") as f:
+    for i in range(0, len(sequences), CHUNK):
+        outputs = []
+        inputs = []
+        cluster = 0
+        for sequence in sequences[i:i + CHUNK]:
+            outputs.append("|".join(f"1={cluster}" for c in sequence))
+            inputs.append(",".join(sequence))
+            cluster += len(sequence)
+
+        f.write("../fonts/AdobeBlank2.ttf:--no-glyph-names --no-positions --font-funcs=ot")
+        f.write(":" + ",".join(inputs))
+        f.write(":[" + "|".join(outputs) + "]\n")