blob: 09d34e5283b2fa6c3c9711bf9a24a28940e09a5b [file] [log] [blame] [edit]
#!/usr/bin/env python3
"""usage: ./gen-emoji-table.py [--rust] emoji-data.txt emoji-test.txt
Input file:
* https://www.unicode.org/Public/UCD/latest/ucd/emoji/emoji-data.txt
* https://www.unicode.org/Public/emoji/latest/emoji-test.txt
"""
import os
import sys
from collections import OrderedDict
import packTab
if len(sys.argv) > 1 and sys.argv[1] == "--rust":
del sys.argv[1]
language = packTab.languages["rust"]
else:
language = packTab.languages["c"]
if len (sys.argv) != 3:
sys.exit (__doc__)
f = open(sys.argv[1])
header = [f.readline () for _ in range(10)]
ranges = OrderedDict()
for line in f.readlines():
line = line.strip()
if not line or line[0] == '#':
continue
rang, typ = [s.strip() for s in line.split('#')[0].split(';')[:2]]
rang = [int(s, 16) for s in rang.split('..')]
if len(rang) > 1:
start, end = rang
else:
start = end = rang[0]
if typ not in ranges:
ranges[typ] = []
if ranges[typ] and ranges[typ][-1][1] == start - 1:
ranges[typ][-1] = (ranges[typ][-1][0], end)
else:
ranges[typ].append((start, end))
print ("/* == Start of generated table == */")
print ("/*")
print (" * The following tables are generated by running:")
print (" *")
print (" * ./gen-emoji-table.py %semoji-data.txt" %
("--rust " if language.name == "rust" else ""))
print (" *")
print (" * on file with this header:")
print (" *")
for l in header:
print (" * %s" % (l.strip()))
print (" */")
print ()
if language.name == "c":
print ("#ifndef HB_UNICODE_EMOJI_TABLE_HH")
print ("#define HB_UNICODE_EMOJI_TABLE_HH")
print ()
print ('#include "hb-unicode.hh"')
print ()
elif language.name == "rust":
print ("#![allow(unused_parens)]")
print ("#![allow(clippy::unnecessary_cast, clippy::unreadable_literal, clippy::double_parens)]")
print ()
print ("use crate::hb::unicode::Codepoint;")
print ()
else:
assert False, "Unknown language: %s" % language.name
for typ, s in ranges.items():
if typ != "Extended_Pictographic": continue
arr = dict()
for start,end in s:
for i in range(start, end + 1):
arr[i] = 1
sol = packTab.pack_table(arr, 0, compression=9)
code = packTab.Code('_hb_emoji')
if language.name == "c":
sol.genCode(code, 'is_'+typ, language=language)
code.print_code(language=language)
print()
elif language.name == "rust":
sol.genCode(code, 'is_'+typ+'_u8', language=language, private=False)
code.print_code(language=language, private=False)
print()
print ("#[inline]")
print ("pub(crate) fn is_%s (u: Codepoint) -> bool" % typ)
print ("{")
print (" _hb_emoji_is_%s_u8 (u as usize) != 0" % typ)
print ("}")
print()
else:
assert False, "Unknown language: %s" % language.name
print ()
if language.name == "c":
print ("#endif /* HB_UNICODE_EMOJI_TABLE_HH */")
print ()
print ("/* == End of generated table == */")
# Generate test file.
sequences = []
with open(sys.argv[2]) as f:
for line in f.readlines():
if "#" in line:
line = line[:line.index("#")]
if ";" in line:
line = line[:line.index(";")]
line = line.strip()
line = line.split(" ")
if len(line) < 2:
continue
sequences.append(line)
test_path = os.path.join(os.path.dirname(__file__), "..", "test", "shape", "data", "in-house", "tests", "emoji-clusters.tests")
if os.path.isdir(os.path.dirname(test_path)):
try:
with open(test_path, "w") as f:
for sequence in sequences:
f.write("../fonts/AdobeBlank2.ttf;--no-glyph-names --no-positions --font-funcs=ot")
f.write(";" + ",".join(sequence))
f.write(";[" + "|".join("1=0" for c in sequence) + "]\n")
except OSError:
pass