Behdad Esfahbod | 3eb936f | 2010-10-05 18:36:58 -0400 | [diff] [blame] | 1 | #!/usr/bin/python |
| 2 | |
| 3 | import sys |
Behdad Esfahbod | ae4a2b9 | 2012-04-10 16:25:08 -0400 | [diff] [blame] | 4 | import os.path |
Behdad Esfahbod | 3eb936f | 2010-10-05 18:36:58 -0400 | [diff] [blame] | 5 | |
Behdad Esfahbod | ae4a2b9 | 2012-04-10 16:25:08 -0400 | [diff] [blame] | 6 | if len (sys.argv) != 3: |
| 7 | print >>sys.stderr, "usage: ./gen-arabic-table.py ArabicShaping.txt UnicodeData.txt" |
Behdad Esfahbod | 697a65c | 2011-06-01 20:52:00 -0400 | [diff] [blame] | 8 | sys.exit (1) |
| 9 | |
Behdad Esfahbod | ae4a2b9 | 2012-04-10 16:25:08 -0400 | [diff] [blame] | 10 | files = [file (x) for x in sys.argv[1:]] |
Behdad Esfahbod | 697a65c | 2011-06-01 20:52:00 -0400 | [diff] [blame] | 11 | |
Behdad Esfahbod | ae4a2b9 | 2012-04-10 16:25:08 -0400 | [diff] [blame] | 12 | headers = [[files[0].readline (), files[0].readline ()]] |
| 13 | headers.append (["UnicodeData.txt does not have a header."]) |
| 14 | while files[0].readline ().find ('##################') < 0: |
Behdad Esfahbod | 88e7f37 | 2010-12-21 14:18:24 -0500 | [diff] [blame] | 15 | pass |
Behdad Esfahbod | 3eb936f | 2010-10-05 18:36:58 -0400 | [diff] [blame] | 16 | |
Behdad Esfahbod | 14d7841 | 2010-11-17 16:52:58 -0500 | [diff] [blame] | 17 | |
Behdad Esfahbod | ae4a2b9 | 2012-04-10 16:25:08 -0400 | [diff] [blame] | 18 | def print_joining_table(f): |
| 19 | |
| 20 | print |
| 21 | print "static const uint8_t joining_table[] =" |
| 22 | print "{" |
| 23 | |
| 24 | min_u = 0x110000 |
| 25 | max_u = 0 |
| 26 | num = 0 |
| 27 | last = -1 |
| 28 | block = '' |
| 29 | for line in f: |
| 30 | |
| 31 | if line[0] == '#': |
| 32 | if line.find (" characters"): |
| 33 | block = line[2:].strip () |
| 34 | continue |
| 35 | |
| 36 | fields = [x.strip () for x in line.split (';')] |
| 37 | if len (fields) == 1: |
| 38 | continue |
| 39 | |
| 40 | u = int (fields[0], 16) |
| 41 | if u == 0x200C or u == 0x200D: |
| 42 | continue |
| 43 | if u < last: |
| 44 | raise Exception ("Input data character not sorted", u) |
| 45 | min_u = min (min_u, u) |
| 46 | max_u = max (max_u, u) |
| 47 | num += 1 |
| 48 | |
| 49 | if block: |
| 50 | print "\n /* %s */\n" % block |
| 51 | block = '' |
| 52 | |
| 53 | if last != -1: |
| 54 | last += 1 |
| 55 | while last < u: |
| 56 | print " JOINING_TYPE_X, /* %04X */" % last |
| 57 | last += 1 |
| 58 | else: |
| 59 | last = u |
| 60 | |
| 61 | if fields[3] in ["ALAPH", "DALATH RISH"]: |
| 62 | value = "JOINING_GROUP_" + fields[3].replace(' ', '_') |
| 63 | else: |
| 64 | value = "JOINING_TYPE_" + fields[2] |
| 65 | print " %s, /* %s */" % (value, '; '.join(fields)) |
| 66 | |
| 67 | print |
| 68 | print "};" |
| 69 | print |
| 70 | print "#define JOINING_TABLE_FIRST 0x%04X" % min_u |
| 71 | print "#define JOINING_TABLE_LAST 0x%04X" % max_u |
| 72 | print |
| 73 | |
| 74 | occupancy = num * 100 / (max_u - min_u + 1) |
| 75 | # Maintain at least 40% occupancy in the table */ |
| 76 | if occupancy < 40: |
| 77 | raise Exception ("Table too sparse, please investigate: ", occupancy) |
| 78 | |
| 79 | def print_shaping_table(f): |
| 80 | |
| 81 | shapes = {} |
| 82 | ligatures = {} |
| 83 | names = {} |
| 84 | for line in f: |
| 85 | |
| 86 | fields = [x.strip () for x in line.split (';')] |
| 87 | if fields[5][0:1] != '<': |
| 88 | continue |
| 89 | |
| 90 | items = fields[5].split (' ') |
| 91 | shape, items = items[0][1:-1], tuple (int (x, 16) for x in items[1:]) |
| 92 | |
| 93 | if not shape in ['initial', 'medial', 'isolated', 'final']: |
| 94 | continue |
| 95 | |
| 96 | c = int (fields[0], 16) |
| 97 | if len (items) != 1: |
| 98 | # We only care about lam-alef ligatures |
| 99 | if len (items) != 2 or items[0] != 0x0644 or items[1] not in [0x0622, 0x0623, 0x0625, 0x0627]: |
| 100 | continue |
| 101 | |
| 102 | # Save ligature |
| 103 | names[c] = fields[1] |
| 104 | if items not in ligatures: |
| 105 | ligatures[items] = {} |
| 106 | ligatures[items][shape] = c |
| 107 | pass |
| 108 | else: |
| 109 | # Save shape |
| 110 | if items[0] not in names: |
| 111 | names[items[0]] = fields[1] |
| 112 | else: |
| 113 | names[items[0]] = os.path.commonprefix ([names[items[0]], fields[1]]).strip () |
| 114 | if items[0] not in shapes: |
| 115 | shapes[items[0]] = {} |
| 116 | shapes[items[0]][shape] = c |
| 117 | |
| 118 | print |
| 119 | print "static const uint16_t shaping_table[][4] =" |
| 120 | print "{" |
| 121 | |
| 122 | keys = shapes.keys () |
| 123 | min_u, max_u = min (keys), max (keys) |
| 124 | for u in range (min_u, max_u + 1): |
| 125 | s = [shapes[u][shape] if u in shapes and shape in shapes[u] else u |
| 126 | for shape in ['initial', 'medial', 'final', 'isolated']] |
| 127 | value = ', '.join ("0x%04X" % c for c in s) |
| 128 | print " {%s}, /* U+%04X %s */" % (value, u, names[u] if u in names else "") |
| 129 | |
| 130 | print "};" |
| 131 | print |
| 132 | print "#define SHAPING_TABLE_FIRST 0x%04X" % min_u |
| 133 | print "#define SHAPING_TABLE_LAST 0x%04X" % max_u |
| 134 | print |
| 135 | |
Behdad Esfahbod | 939c010 | 2012-04-10 17:20:05 -0400 | [diff] [blame] | 136 | ligas = {} |
Behdad Esfahbod | ae4a2b9 | 2012-04-10 16:25:08 -0400 | [diff] [blame] | 137 | for pair in ligatures.keys (): |
| 138 | for shape in ligatures[pair]: |
| 139 | c = ligatures[pair][shape] |
| 140 | if shape == 'isolated': |
| 141 | liga = (shapes[pair[0]]['initial'], shapes[pair[1]]['final']) |
| 142 | elif shape == 'final': |
| 143 | liga = (shapes[pair[0]]['medial'], shapes[pair[1]]['final']) |
| 144 | else: |
| 145 | raise Exception ("Unexpected shape", shape) |
Behdad Esfahbod | 939c010 | 2012-04-10 17:20:05 -0400 | [diff] [blame] | 146 | if liga[0] not in ligas: |
| 147 | ligas[liga[0]] = [] |
| 148 | ligas[liga[0]].append ((liga[1], c)) |
| 149 | max_i = max (len (ligas[l]) for l in ligas) |
| 150 | print |
| 151 | print "static const struct {" |
| 152 | print " uint16_t first;" |
| 153 | print " struct {" |
| 154 | print " uint16_t second;" |
| 155 | print " uint16_t ligature;" |
| 156 | print " } ligatures[%d];" % max_i |
| 157 | print "} ligature_table[] =" |
| 158 | print "{" |
| 159 | keys = ligas.keys () |
| 160 | keys.sort () |
| 161 | for first in keys: |
| 162 | |
| 163 | print " { 0x%04X, {" % (first) |
| 164 | for liga in ligas[first]: |
| 165 | print " { 0x%04X, 0x%04X }, /* %s */" % (liga[0], liga[1], names[liga[1]]) |
| 166 | print " }}," |
Behdad Esfahbod | ae4a2b9 | 2012-04-10 16:25:08 -0400 | [diff] [blame] | 167 | |
| 168 | print "};" |
| 169 | print |
| 170 | |
| 171 | |
| 172 | |
Behdad Esfahbod | 14d7841 | 2010-11-17 16:52:58 -0500 | [diff] [blame] | 173 | print "/* == Start of generated table == */" |
| 174 | print "/*" |
| 175 | print " * The following table is generated by running:" |
| 176 | print " *" |
Behdad Esfahbod | ae4a2b9 | 2012-04-10 16:25:08 -0400 | [diff] [blame] | 177 | print " * ./gen-arabic-table.py ArabicShaping.txt UnicodeData.txt" |
Behdad Esfahbod | 14d7841 | 2010-11-17 16:52:58 -0500 | [diff] [blame] | 178 | print " *" |
Behdad Esfahbod | 697a65c | 2011-06-01 20:52:00 -0400 | [diff] [blame] | 179 | print " * on files with these headers:" |
Behdad Esfahbod | 14d7841 | 2010-11-17 16:52:58 -0500 | [diff] [blame] | 180 | print " *" |
Behdad Esfahbod | ae4a2b9 | 2012-04-10 16:25:08 -0400 | [diff] [blame] | 181 | for h in headers: |
| 182 | for l in h: |
| 183 | print " * %s" % (l.strip()) |
Behdad Esfahbod | 14d7841 | 2010-11-17 16:52:58 -0500 | [diff] [blame] | 184 | print " */" |
Behdad Esfahbod | 6d4016f | 2012-03-07 15:33:14 -0500 | [diff] [blame] | 185 | print |
| 186 | print "#ifndef HB_OT_SHAPE_COMPLEX_ARABIC_TABLE_HH" |
| 187 | print "#define HB_OT_SHAPE_COMPLEX_ARABIC_TABLE_HH" |
| 188 | print |
Behdad Esfahbod | 14d7841 | 2010-11-17 16:52:58 -0500 | [diff] [blame] | 189 | |
Behdad Esfahbod | ae4a2b9 | 2012-04-10 16:25:08 -0400 | [diff] [blame] | 190 | print_joining_table (files[0]) |
| 191 | print_shaping_table (files[1]) |
Behdad Esfahbod | d606daa | 2011-09-20 14:34:06 -0400 | [diff] [blame] | 192 | |
Behdad Esfahbod | 88e7f37 | 2010-12-21 14:18:24 -0500 | [diff] [blame] | 193 | print |
Behdad Esfahbod | 6d4016f | 2012-03-07 15:33:14 -0500 | [diff] [blame] | 194 | print "#endif /* HB_OT_SHAPE_COMPLEX_ARABIC_TABLE_HH */" |
| 195 | print |
Behdad Esfahbod | 14d7841 | 2010-11-17 16:52:58 -0500 | [diff] [blame] | 196 | print "/* == End of generated table == */" |
Behdad Esfahbod | 88e7f37 | 2010-12-21 14:18:24 -0500 | [diff] [blame] | 197 | |