Ebrahim Byagowi | 8d19907 | 2020-02-19 14:56:55 +0330 | [diff] [blame] | 1 | #!/usr/bin/env python3 |
Behdad Esfahbod | 1e8f195 | 2018-10-03 17:46:48 +0200 | [diff] [blame] | 2 | |
Khaled Hosny | ddf87ff | 2021-07-29 01:12:46 +0200 | [diff] [blame] | 3 | """usage: ./gen-emoji-table.py emoji-data.txt emoji-test.txt |
Ebrahim Byagowi | 08f1d95 | 2020-05-28 15:01:15 +0430 | [diff] [blame] | 4 | |
| 5 | Input file: |
| 6 | * https://www.unicode.org/Public/UCD/latest/ucd/emoji/emoji-data.txt |
Khaled Hosny | ddf87ff | 2021-07-29 01:12:46 +0200 | [diff] [blame] | 7 | * https://www.unicode.org/Public/emoji/latest/emoji-test.txt |
Ebrahim Byagowi | 08f1d95 | 2020-05-28 15:01:15 +0430 | [diff] [blame] | 8 | """ |
| 9 | |
Behdad Esfahbod | 1e8f195 | 2018-10-03 17:46:48 +0200 | [diff] [blame] | 10 | import sys |
Behdad Esfahbod | 1e8f195 | 2018-10-03 17:46:48 +0200 | [diff] [blame] | 11 | from collections import OrderedDict |
Behdad Esfahbod | 1cdd0fa | 2019-06-26 14:49:15 -0700 | [diff] [blame] | 12 | import packTab |
Behdad Esfahbod | 1e8f195 | 2018-10-03 17:46:48 +0200 | [diff] [blame] | 13 | |
Khaled Hosny | ddf87ff | 2021-07-29 01:12:46 +0200 | [diff] [blame] | 14 | if len (sys.argv) != 3: |
Ebrahim Byagowi | 7554f61 | 2020-05-28 22:51:29 +0430 | [diff] [blame] | 15 | sys.exit (__doc__) |
Behdad Esfahbod | 1e8f195 | 2018-10-03 17:46:48 +0200 | [diff] [blame] | 16 | |
| 17 | f = open(sys.argv[1]) |
| 18 | header = [f.readline () for _ in range(10)] |
| 19 | |
David Corbett | 8295118 | 2018-11-20 15:41:45 -0500 | [diff] [blame] | 20 | ranges = OrderedDict() |
Behdad Esfahbod | 1e8f195 | 2018-10-03 17:46:48 +0200 | [diff] [blame] | 21 | for line in f.readlines(): |
| 22 | line = line.strip() |
| 23 | if not line or line[0] == '#': |
| 24 | continue |
| 25 | rang, typ = [s.strip() for s in line.split('#')[0].split(';')[:2]] |
| 26 | |
| 27 | rang = [int(s, 16) for s in rang.split('..')] |
| 28 | if len(rang) > 1: |
| 29 | start, end = rang |
| 30 | else: |
| 31 | start = end = rang[0] |
| 32 | |
David Corbett | 8295118 | 2018-11-20 15:41:45 -0500 | [diff] [blame] | 33 | if typ not in ranges: |
| 34 | ranges[typ] = [] |
| 35 | if ranges[typ] and ranges[typ][-1][1] == start - 1: |
| 36 | ranges[typ][-1] = (ranges[typ][-1][0], end) |
| 37 | else: |
| 38 | ranges[typ].append((start, end)) |
Behdad Esfahbod | 1e8f195 | 2018-10-03 17:46:48 +0200 | [diff] [blame] | 39 | |
| 40 | |
| 41 | |
| 42 | print ("/* == Start of generated table == */") |
| 43 | print ("/*") |
| 44 | print (" * The following tables are generated by running:") |
| 45 | print (" *") |
| 46 | print (" * ./gen-emoji-table.py emoji-data.txt") |
| 47 | print (" *") |
| 48 | print (" * on file with this header:") |
| 49 | print (" *") |
| 50 | for l in header: |
| 51 | print (" * %s" % (l.strip())) |
| 52 | print (" */") |
| 53 | print () |
| 54 | print ("#ifndef HB_UNICODE_EMOJI_TABLE_HH") |
| 55 | print ("#define HB_UNICODE_EMOJI_TABLE_HH") |
| 56 | print () |
| 57 | print ('#include "hb-unicode.hh"') |
| 58 | print () |
| 59 | |
Ebrahim Byagowi | 9a7b7bd | 2019-06-28 22:53:51 +0430 | [diff] [blame] | 60 | for typ, s in ranges.items(): |
Behdad Esfahbod | 1e8f195 | 2018-10-03 17:46:48 +0200 | [diff] [blame] | 61 | if typ != "Extended_Pictographic": continue |
Behdad Esfahbod | 1cdd0fa | 2019-06-26 14:49:15 -0700 | [diff] [blame] | 62 | |
Ebrahim Byagowi | 9a7b7bd | 2019-06-28 22:53:51 +0430 | [diff] [blame] | 63 | arr = dict() |
| 64 | for start,end in s: |
Behdad Esfahbod | 855a3f4 | 2021-06-09 15:10:52 -0600 | [diff] [blame] | 65 | for i in range(start, end + 1): |
Ebrahim Byagowi | 9a7b7bd | 2019-06-28 22:53:51 +0430 | [diff] [blame] | 66 | arr[i] = 1 |
Behdad Esfahbod | 1cdd0fa | 2019-06-26 14:49:15 -0700 | [diff] [blame] | 67 | |
Ebrahim Byagowi | 9a7b7bd | 2019-06-28 22:53:51 +0430 | [diff] [blame] | 68 | sol = packTab.pack_table(arr, 0, compression=3) |
| 69 | code = packTab.Code('_hb_emoji') |
| 70 | sol.genCode(code, 'is_'+typ) |
| 71 | code.print_c(linkage='static inline') |
| 72 | print() |
Behdad Esfahbod | 1e8f195 | 2018-10-03 17:46:48 +0200 | [diff] [blame] | 73 | |
| 74 | print () |
| 75 | print ("#endif /* HB_UNICODE_EMOJI_TABLE_HH */") |
| 76 | print () |
| 77 | print ("/* == End of generated table == */") |
Khaled Hosny | ddf87ff | 2021-07-29 01:12:46 +0200 | [diff] [blame] | 78 | |
| 79 | |
| 80 | # Generate test file. |
| 81 | sequences = [] |
| 82 | with open(sys.argv[2]) as f: |
| 83 | for line in f.readlines(): |
| 84 | if "#" in line: |
| 85 | line = line[:line.index("#")] |
| 86 | if ";" in line: |
| 87 | line = line[:line.index(";")] |
| 88 | line = line.strip() |
Khaled Hosny | ddf87ff | 2021-07-29 01:12:46 +0200 | [diff] [blame] | 89 | line = line.split(" ") |
Khaled Hosny | 9a7ff54 | 2021-07-29 01:52:55 +0200 | [diff] [blame] | 90 | if len(line) < 2: |
Khaled Hosny | ddf87ff | 2021-07-29 01:12:46 +0200 | [diff] [blame] | 91 | continue |
| 92 | sequences.append(line) |
| 93 | |
Khaled Hosny | ddf87ff | 2021-07-29 01:12:46 +0200 | [diff] [blame] | 94 | with open("../test/shaping/data/in-house/tests/emoji-clusters.tests", "w") as f: |
Khaled Hosny | 9a7ff54 | 2021-07-29 01:52:55 +0200 | [diff] [blame] | 95 | for sequence in sequences: |
Khaled Hosny | ddf87ff | 2021-07-29 01:12:46 +0200 | [diff] [blame] | 96 | f.write("../fonts/AdobeBlank2.ttf:--no-glyph-names --no-positions --font-funcs=ot") |
Khaled Hosny | 9a7ff54 | 2021-07-29 01:52:55 +0200 | [diff] [blame] | 97 | f.write(":" + ",".join(sequence)) |
| 98 | f.write(":[" + "|".join("1=0" for c in sequence) + "]\n") |