blob: 0ee8fecfa48e51388594e1e4c682230b395afd49 [file] [log] [blame]
Ebrahim Byagowi8d199072020-02-19 14:56:55 +03301#!/usr/bin/env python3
Behdad Esfahbod1e8f1952018-10-03 17:46:48 +02002
Khaled Hosnyddf87ff2021-07-29 01:12:46 +02003"""usage: ./gen-emoji-table.py emoji-data.txt emoji-test.txt
Ebrahim Byagowi08f1d952020-05-28 15:01:15 +04304
5Input file:
6* https://www.unicode.org/Public/UCD/latest/ucd/emoji/emoji-data.txt
Khaled Hosnyddf87ff2021-07-29 01:12:46 +02007* https://www.unicode.org/Public/emoji/latest/emoji-test.txt
Ebrahim Byagowi08f1d952020-05-28 15:01:15 +04308"""
9
Behdad Esfahbod1e8f1952018-10-03 17:46:48 +020010import sys
Behdad Esfahbod1e8f1952018-10-03 17:46:48 +020011from collections import OrderedDict
Behdad Esfahbod1cdd0fa2019-06-26 14:49:15 -070012import packTab
Behdad Esfahbod1e8f1952018-10-03 17:46:48 +020013
Khaled Hosnyddf87ff2021-07-29 01:12:46 +020014if len (sys.argv) != 3:
Ebrahim Byagowi7554f612020-05-28 22:51:29 +043015 sys.exit (__doc__)
Behdad Esfahbod1e8f1952018-10-03 17:46:48 +020016
17f = open(sys.argv[1])
18header = [f.readline () for _ in range(10)]
19
David Corbett82951182018-11-20 15:41:45 -050020ranges = OrderedDict()
Behdad Esfahbod1e8f1952018-10-03 17:46:48 +020021for line in f.readlines():
22 line = line.strip()
23 if not line or line[0] == '#':
24 continue
25 rang, typ = [s.strip() for s in line.split('#')[0].split(';')[:2]]
26
27 rang = [int(s, 16) for s in rang.split('..')]
28 if len(rang) > 1:
29 start, end = rang
30 else:
31 start = end = rang[0]
32
David Corbett82951182018-11-20 15:41:45 -050033 if typ not in ranges:
34 ranges[typ] = []
35 if ranges[typ] and ranges[typ][-1][1] == start - 1:
36 ranges[typ][-1] = (ranges[typ][-1][0], end)
37 else:
38 ranges[typ].append((start, end))
Behdad Esfahbod1e8f1952018-10-03 17:46:48 +020039
40
41
42print ("/* == Start of generated table == */")
43print ("/*")
44print (" * The following tables are generated by running:")
45print (" *")
46print (" * ./gen-emoji-table.py emoji-data.txt")
47print (" *")
48print (" * on file with this header:")
49print (" *")
50for l in header:
51 print (" * %s" % (l.strip()))
52print (" */")
53print ()
54print ("#ifndef HB_UNICODE_EMOJI_TABLE_HH")
55print ("#define HB_UNICODE_EMOJI_TABLE_HH")
56print ()
57print ('#include "hb-unicode.hh"')
58print ()
59
Ebrahim Byagowi9a7b7bd2019-06-28 22:53:51 +043060for typ, s in ranges.items():
Behdad Esfahbod1e8f1952018-10-03 17:46:48 +020061 if typ != "Extended_Pictographic": continue
Behdad Esfahbod1cdd0fa2019-06-26 14:49:15 -070062
Ebrahim Byagowi9a7b7bd2019-06-28 22:53:51 +043063 arr = dict()
64 for start,end in s:
Behdad Esfahbod855a3f42021-06-09 15:10:52 -060065 for i in range(start, end + 1):
Ebrahim Byagowi9a7b7bd2019-06-28 22:53:51 +043066 arr[i] = 1
Behdad Esfahbod1cdd0fa2019-06-26 14:49:15 -070067
Ebrahim Byagowi9a7b7bd2019-06-28 22:53:51 +043068 sol = packTab.pack_table(arr, 0, compression=3)
69 code = packTab.Code('_hb_emoji')
70 sol.genCode(code, 'is_'+typ)
71 code.print_c(linkage='static inline')
72 print()
Behdad Esfahbod1e8f1952018-10-03 17:46:48 +020073
74print ()
75print ("#endif /* HB_UNICODE_EMOJI_TABLE_HH */")
76print ()
77print ("/* == End of generated table == */")
Khaled Hosnyddf87ff2021-07-29 01:12:46 +020078
79
80# Generate test file.
81sequences = []
82with open(sys.argv[2]) as f:
83 for line in f.readlines():
84 if "#" in line:
85 line = line[:line.index("#")]
86 if ";" in line:
87 line = line[:line.index(";")]
88 line = line.strip()
Khaled Hosnyddf87ff2021-07-29 01:12:46 +020089 line = line.split(" ")
Khaled Hosny9a7ff542021-07-29 01:52:55 +020090 if len(line) < 2:
Khaled Hosnyddf87ff2021-07-29 01:12:46 +020091 continue
92 sequences.append(line)
93
Khaled Hosnyddf87ff2021-07-29 01:12:46 +020094with open("../test/shaping/data/in-house/tests/emoji-clusters.tests", "w") as f:
Khaled Hosny9a7ff542021-07-29 01:52:55 +020095 for sequence in sequences:
Khaled Hosnyddf87ff2021-07-29 01:12:46 +020096 f.write("../fonts/AdobeBlank2.ttf:--no-glyph-names --no-positions --font-funcs=ot")
Khaled Hosny9a7ff542021-07-29 01:52:55 +020097 f.write(":" + ",".join(sequence))
98 f.write(":[" + "|".join("1=0" for c in sequence) + "]\n")