| #!/usr/bin/env python3 |
| |
| """usage: ./gen-emoji-table.py [--rust] emoji-data.txt emoji-test.txt |
| |
| Input file: |
| * https://www.unicode.org/Public/UCD/latest/ucd/emoji/emoji-data.txt |
| * https://www.unicode.org/Public/emoji/latest/emoji-test.txt |
| """ |
| |
| import os |
| import sys |
| from collections import OrderedDict |
| import packTab |
| |
| if len(sys.argv) > 1 and sys.argv[1] == "--rust": |
| del sys.argv[1] |
| language = packTab.languages["rust"] |
| else: |
| language = packTab.languages["c"] |
| |
| if len (sys.argv) != 3: |
| sys.exit (__doc__) |
| |
| f = open(sys.argv[1]) |
| header = [f.readline () for _ in range(10)] |
| |
| ranges = OrderedDict() |
| for line in f.readlines(): |
| line = line.strip() |
| if not line or line[0] == '#': |
| continue |
| rang, typ = [s.strip() for s in line.split('#')[0].split(';')[:2]] |
| |
| rang = [int(s, 16) for s in rang.split('..')] |
| if len(rang) > 1: |
| start, end = rang |
| else: |
| start = end = rang[0] |
| |
| if typ not in ranges: |
| ranges[typ] = [] |
| if ranges[typ] and ranges[typ][-1][1] == start - 1: |
| ranges[typ][-1] = (ranges[typ][-1][0], end) |
| else: |
| ranges[typ].append((start, end)) |
| |
| |
| |
| print ("/* == Start of generated table == */") |
| print ("/*") |
| print (" * The following tables are generated by running:") |
| print (" *") |
| print (" * ./gen-emoji-table.py %semoji-data.txt" % |
| ("--rust " if language.name == "rust" else "")) |
| print (" *") |
| print (" * on file with this header:") |
| print (" *") |
| for l in header: |
| print (" * %s" % (l.strip())) |
| print (" */") |
| print () |
| if language.name == "c": |
| print ("#ifndef HB_UNICODE_EMOJI_TABLE_HH") |
| print ("#define HB_UNICODE_EMOJI_TABLE_HH") |
| print () |
| print ('#include "hb-unicode.hh"') |
| print () |
| elif language.name == "rust": |
| print ("#![allow(unused_parens)]") |
| print ("#![allow(clippy::unnecessary_cast, clippy::unreadable_literal, clippy::double_parens)]") |
| print () |
| print ("use crate::hb::unicode::Codepoint;") |
| print () |
| else: |
| assert False, "Unknown language: %s" % language.name |
| |
| for typ, s in ranges.items(): |
| if typ != "Extended_Pictographic": continue |
| |
| arr = dict() |
| for start,end in s: |
| for i in range(start, end + 1): |
| arr[i] = 1 |
| |
| sol = packTab.pack_table(arr, 0, compression=9) |
| code = packTab.Code('_hb_emoji') |
| if language.name == "c": |
| sol.genCode(code, 'is_'+typ, language=language) |
| code.print_code(language=language) |
| print() |
| elif language.name == "rust": |
| sol.genCode(code, 'is_'+typ+'_u8', language=language, private=False) |
| code.print_code(language=language, private=False) |
| print() |
| print ("#[inline]") |
| print ("pub(crate) fn is_%s (u: Codepoint) -> bool" % typ) |
| print ("{") |
| print (" _hb_emoji_is_%s_u8 (u as usize) != 0" % typ) |
| print ("}") |
| print() |
| else: |
| assert False, "Unknown language: %s" % language.name |
| |
| print () |
| if language.name == "c": |
| print ("#endif /* HB_UNICODE_EMOJI_TABLE_HH */") |
| print () |
| print ("/* == End of generated table == */") |
| |
| |
| # Generate test file. |
| sequences = [] |
| with open(sys.argv[2]) as f: |
| for line in f.readlines(): |
| if "#" in line: |
| line = line[:line.index("#")] |
| if ";" in line: |
| line = line[:line.index(";")] |
| line = line.strip() |
| line = line.split(" ") |
| if len(line) < 2: |
| continue |
| sequences.append(line) |
| |
| test_path = os.path.join(os.path.dirname(__file__), "..", "test", "shape", "data", "in-house", "tests", "emoji-clusters.tests") |
| if os.path.isdir(os.path.dirname(test_path)): |
| try: |
| with open(test_path, "w") as f: |
| for sequence in sequences: |
| f.write("../fonts/AdobeBlank2.ttf;--no-glyph-names --no-positions --font-funcs=ot") |
| f.write(";" + ",".join(sequence)) |
| f.write(";[" + "|".join("1=0" for c in sequence) + "]\n") |
| except OSError: |
| pass |