blob: ccecb406a0b9b06ff497093d476a6e73e3daf4c8 [file] [log] [blame]
Ebrahim Byagowicab2c2c2018-03-29 12:48:47 +04301#!/usr/bin/env python
2
3from __future__ import print_function, division, absolute_import
Behdad Esfahbod3eb936f2010-10-05 18:36:58 -04004
Ebrahim Byagowi80395f12018-03-29 22:00:41 +04305import io, os.path, sys
Behdad Esfahbod3eb936f2010-10-05 18:36:58 -04006
Behdad Esfahbod3f5327a2014-06-20 16:17:42 -04007if len (sys.argv) != 4:
Ebrahim Byagowicab2c2c2018-03-29 12:48:47 +04308 print ("usage: ./gen-arabic-table.py ArabicShaping.txt UnicodeData.txt Blocks.txt", file=sys.stderr)
Behdad Esfahbod697a65c2011-06-01 20:52:00 -04009 sys.exit (1)
10
Ebrahim Byagowi80395f12018-03-29 22:00:41 +043011files = [io.open (x, encoding='utf-8') for x in sys.argv[1:]]
Behdad Esfahbod697a65c2011-06-01 20:52:00 -040012
Behdad Esfahbod3f5327a2014-06-20 16:17:42 -040013headers = [[files[0].readline (), files[0].readline ()], [files[2].readline (), files[2].readline ()]]
Behdad Esfahbodae4a2b92012-04-10 16:25:08 -040014headers.append (["UnicodeData.txt does not have a header."])
15while files[0].readline ().find ('##################') < 0:
Behdad Esfahbod88e7f372010-12-21 14:18:24 -050016 pass
Behdad Esfahbod3eb936f2010-10-05 18:36:58 -040017
Behdad Esfahbod3f5327a2014-06-20 16:17:42 -040018blocks = {}
19def read_blocks(f):
20 global blocks
21 for line in f:
22
23 j = line.find ('#')
24 if j >= 0:
25 line = line[:j]
26
27 fields = [x.strip () for x in line.split (';')]
28 if len (fields) == 1:
29 continue
30
31 uu = fields[0].split ('..')
32 start = int (uu[0], 16)
33 if len (uu) == 1:
34 end = start
35 else:
36 end = int (uu[1], 16)
37
38 t = fields[1]
39
40 for u in range (start, end + 1):
41 blocks[u] = t
Behdad Esfahbod14d78412010-11-17 16:52:58 -050042
Behdad Esfahbodae4a2b92012-04-10 16:25:08 -040043def print_joining_table(f):
44
Behdad Esfahbod3f5327a2014-06-20 16:17:42 -040045 values = {}
Behdad Esfahbodae4a2b92012-04-10 16:25:08 -040046 for line in f:
47
48 if line[0] == '#':
Behdad Esfahbodae4a2b92012-04-10 16:25:08 -040049 continue
50
51 fields = [x.strip () for x in line.split (';')]
52 if len (fields) == 1:
53 continue
54
55 u = int (fields[0], 16)
Behdad Esfahbodae4a2b92012-04-10 16:25:08 -040056
57 if fields[3] in ["ALAPH", "DALATH RISH"]:
58 value = "JOINING_GROUP_" + fields[3].replace(' ', '_')
59 else:
60 value = "JOINING_TYPE_" + fields[2]
Behdad Esfahbodf8867072014-06-20 16:30:10 -040061 values[u] = value
Behdad Esfahbodae4a2b92012-04-10 16:25:08 -040062
Behdad Esfahbod200dfe32014-06-20 16:20:59 -040063 short_value = {}
Behdad Esfahbodf8867072014-06-20 16:30:10 -040064 for value in set([v for v in values.values()] + ['JOINING_TYPE_X']):
Behdad Esfahbod200dfe32014-06-20 16:20:59 -040065 short = ''.join(x[0] for x in value.split('_')[2:])
66 assert short not in short_value.values()
67 short_value[value] = short
68
Ebrahim Byagowicab2c2c2018-03-29 12:48:47 +043069 print ()
Behdad Esfahbod200dfe32014-06-20 16:20:59 -040070 for value,short in short_value.items():
Ebrahim Byagowicab2c2c2018-03-29 12:48:47 +043071 print ("#define %s %s" % (short, value))
Behdad Esfahbod200dfe32014-06-20 16:20:59 -040072
Behdad Esfahbodb900fa22014-06-20 17:59:43 -040073 uu = sorted(values.keys())
Behdad Esfahbod3f5327a2014-06-20 16:17:42 -040074 num = len(values)
Behdad Esfahbod2390d9b2014-06-21 14:07:00 -060075 all_blocks = set([blocks[u] for u in uu])
Behdad Esfahbodb900fa22014-06-20 17:59:43 -040076
Behdad Esfahboddcee8382014-06-22 11:29:59 -060077 last = -100000
Behdad Esfahbodb900fa22014-06-20 17:59:43 -040078 ranges = []
79 for u in uu:
Behdad Esfahbod2390d9b2014-06-21 14:07:00 -060080 if u - last <= 1+16*5:
Behdad Esfahbodb900fa22014-06-20 17:59:43 -040081 ranges[-1][-1] = u
82 else:
83 ranges.append([u,u])
84 last = u
85
Ebrahim Byagowicab2c2c2018-03-29 12:48:47 +043086 print ()
87 print ("static const uint8_t joining_table[] =")
88 print ("{")
Behdad Esfahbod3f5327a2014-06-20 16:17:42 -040089 last_block = None
Behdad Esfahbodb900fa22014-06-20 17:59:43 -040090 offset = 0
91 for start,end in ranges:
Behdad Esfahbodf8867072014-06-20 16:30:10 -040092
Ebrahim Byagowicab2c2c2018-03-29 12:48:47 +043093 print ()
94 print ("#define joining_offset_0x%04xu %d" % (start, offset))
Behdad Esfahbodf8867072014-06-20 16:30:10 -040095
Behdad Esfahbodb900fa22014-06-20 17:59:43 -040096 for u in range(start, end+1):
97
98 block = blocks.get(u, last_block)
99 value = values.get(u, "JOINING_TYPE_X")
100
101 if block != last_block or u == start:
102 if u != start:
Ebrahim Byagowicab2c2c2018-03-29 12:48:47 +0430103 print ()
Behdad Esfahbod2390d9b2014-06-21 14:07:00 -0600104 if block in all_blocks:
Ebrahim Byagowicab2c2c2018-03-29 12:48:47 +0430105 print ("\n /* %s */" % block)
Behdad Esfahbod2390d9b2014-06-21 14:07:00 -0600106 else:
Ebrahim Byagowicab2c2c2018-03-29 12:48:47 +0430107 print ("\n /* FILLER */")
Behdad Esfahbodb900fa22014-06-20 17:59:43 -0400108 last_block = block
109 if u % 32 != 0:
Ebrahim Byagowicab2c2c2018-03-29 12:48:47 +0430110 print ()
111 print (" /* %04X */" % (u//32*32), " " * (u % 32), end="")
Behdad Esfahbodb900fa22014-06-20 17:59:43 -0400112
113 if u % 32 == 0:
Ebrahim Byagowicab2c2c2018-03-29 12:48:47 +0430114 print ()
115 print (" /* %04X */ " % u, end="")
116 print ("%s," % short_value[value], end="")
117 print ()
Behdad Esfahbod3f5327a2014-06-20 16:17:42 -0400118
Behdad Esfahbodb900fa22014-06-20 17:59:43 -0400119 offset += end - start + 1
Ebrahim Byagowicab2c2c2018-03-29 12:48:47 +0430120 print ()
Behdad Esfahbodb900fa22014-06-20 17:59:43 -0400121 occupancy = num * 100. / offset
Ebrahim Byagowicab2c2c2018-03-29 12:48:47 +0430122 print ("}; /* Table items: %d; occupancy: %d%% */" % (offset, occupancy))
123 print ()
Behdad Esfahbodae4a2b92012-04-10 16:25:08 -0400124
Behdad Esfahbod2390d9b2014-06-21 14:07:00 -0600125 page_bits = 12;
Ebrahim Byagowicab2c2c2018-03-29 12:48:47 +0430126 print ()
127 print ("static unsigned int")
128 print ("joining_type (hb_codepoint_t u)")
129 print ("{")
130 print (" switch (u >> %d)" % page_bits)
131 print (" {")
Behdad Esfahbodb900fa22014-06-20 17:59:43 -0400132 pages = set([u>>page_bits for u in [s for s,e in ranges]+[e for s,e in ranges]])
133 for p in sorted(pages):
Ebrahim Byagowicab2c2c2018-03-29 12:48:47 +0430134 print (" case 0x%0Xu:" % p)
Behdad Esfahbodb900fa22014-06-20 17:59:43 -0400135 for (start,end) in ranges:
136 if p not in [start>>page_bits, end>>page_bits]: continue
Behdad Esfahbodc09a6072014-07-11 15:05:36 -0400137 offset = "joining_offset_0x%04xu" % start
Ebrahim Byagowicab2c2c2018-03-29 12:48:47 +0430138 print (" if (hb_in_range<hb_codepoint_t> (u, 0x%04Xu, 0x%04Xu)) return joining_table[u - 0x%04Xu + %s];" % (start, end, start, offset))
139 print (" break;")
140 print ("")
141 print (" default:")
142 print (" break;")
143 print (" }")
144 print (" return X;")
145 print ("}")
146 print ()
Behdad Esfahbod200dfe32014-06-20 16:20:59 -0400147 for value,short in short_value.items():
Ebrahim Byagowicab2c2c2018-03-29 12:48:47 +0430148 print ("#undef %s" % (short))
149 print ()
Behdad Esfahbod200dfe32014-06-20 16:20:59 -0400150
Behdad Esfahbodae4a2b92012-04-10 16:25:08 -0400151def print_shaping_table(f):
152
153 shapes = {}
154 ligatures = {}
155 names = {}
156 for line in f:
157
158 fields = [x.strip () for x in line.split (';')]
159 if fields[5][0:1] != '<':
160 continue
161
162 items = fields[5].split (' ')
163 shape, items = items[0][1:-1], tuple (int (x, 16) for x in items[1:])
164
165 if not shape in ['initial', 'medial', 'isolated', 'final']:
166 continue
167
168 c = int (fields[0], 16)
169 if len (items) != 1:
170 # We only care about lam-alef ligatures
171 if len (items) != 2 or items[0] != 0x0644 or items[1] not in [0x0622, 0x0623, 0x0625, 0x0627]:
172 continue
173
174 # Save ligature
175 names[c] = fields[1]
176 if items not in ligatures:
177 ligatures[items] = {}
178 ligatures[items][shape] = c
179 pass
180 else:
181 # Save shape
182 if items[0] not in names:
183 names[items[0]] = fields[1]
184 else:
185 names[items[0]] = os.path.commonprefix ([names[items[0]], fields[1]]).strip ()
186 if items[0] not in shapes:
187 shapes[items[0]] = {}
188 shapes[items[0]][shape] = c
189
Ebrahim Byagowicab2c2c2018-03-29 12:48:47 +0430190 print ()
191 print ("static const uint16_t shaping_table[][4] =")
192 print ("{")
Behdad Esfahbodae4a2b92012-04-10 16:25:08 -0400193
194 keys = shapes.keys ()
195 min_u, max_u = min (keys), max (keys)
196 for u in range (min_u, max_u + 1):
Behdad Esfahbod07cfbe22012-09-06 01:16:39 -0400197 s = [shapes[u][shape] if u in shapes and shape in shapes[u] else 0
Behdad Esfahbodae4a2b92012-04-10 16:25:08 -0400198 for shape in ['initial', 'medial', 'final', 'isolated']]
Behdad Esfahbodc09a6072014-07-11 15:05:36 -0400199 value = ', '.join ("0x%04Xu" % c for c in s)
Ebrahim Byagowicab2c2c2018-03-29 12:48:47 +0430200 print (" {%s}, /* U+%04X %s */" % (value, u, names[u] if u in names else ""))
Behdad Esfahbodae4a2b92012-04-10 16:25:08 -0400201
Ebrahim Byagowicab2c2c2018-03-29 12:48:47 +0430202 print ("};")
203 print ()
204 print ("#define SHAPING_TABLE_FIRST 0x%04Xu" % min_u)
205 print ("#define SHAPING_TABLE_LAST 0x%04Xu" % max_u)
206 print ()
Behdad Esfahbodae4a2b92012-04-10 16:25:08 -0400207
Behdad Esfahbod939c0102012-04-10 17:20:05 -0400208 ligas = {}
Behdad Esfahbodae4a2b92012-04-10 16:25:08 -0400209 for pair in ligatures.keys ():
210 for shape in ligatures[pair]:
211 c = ligatures[pair][shape]
212 if shape == 'isolated':
213 liga = (shapes[pair[0]]['initial'], shapes[pair[1]]['final'])
214 elif shape == 'final':
215 liga = (shapes[pair[0]]['medial'], shapes[pair[1]]['final'])
216 else:
217 raise Exception ("Unexpected shape", shape)
Behdad Esfahbod939c0102012-04-10 17:20:05 -0400218 if liga[0] not in ligas:
219 ligas[liga[0]] = []
220 ligas[liga[0]].append ((liga[1], c))
221 max_i = max (len (ligas[l]) for l in ligas)
Ebrahim Byagowicab2c2c2018-03-29 12:48:47 +0430222 print ()
223 print ("static const struct ligature_set_t {")
224 print (" uint16_t first;")
225 print (" struct ligature_pairs_t {")
226 print (" uint16_t second;")
227 print (" uint16_t ligature;")
228 print (" } ligatures[%d];" % max_i)
229 print ("} ligature_table[] =")
230 print ("{")
Ebrahim Byagowi26e0cbd2018-03-29 21:22:47 +0430231 for first in sorted (ligas.keys ()):
Behdad Esfahbod939c0102012-04-10 17:20:05 -0400232
Ebrahim Byagowicab2c2c2018-03-29 12:48:47 +0430233 print (" { 0x%04Xu, {" % (first))
Behdad Esfahbod939c0102012-04-10 17:20:05 -0400234 for liga in ligas[first]:
Ebrahim Byagowicab2c2c2018-03-29 12:48:47 +0430235 print (" { 0x%04Xu, 0x%04Xu }, /* %s */" % (liga[0], liga[1], names[liga[1]]))
236 print (" }},")
Behdad Esfahbodae4a2b92012-04-10 16:25:08 -0400237
Ebrahim Byagowicab2c2c2018-03-29 12:48:47 +0430238 print ("};")
239 print ()
Behdad Esfahbodae4a2b92012-04-10 16:25:08 -0400240
241
242
Ebrahim Byagowicab2c2c2018-03-29 12:48:47 +0430243print ("/* == Start of generated table == */")
244print ("/*")
245print (" * The following table is generated by running:")
246print (" *")
247print (" * ./gen-arabic-table.py ArabicShaping.txt UnicodeData.txt Blocks.txt")
248print (" *")
249print (" * on files with these headers:")
250print (" *")
Behdad Esfahbodae4a2b92012-04-10 16:25:08 -0400251for h in headers:
252 for l in h:
Ebrahim Byagowicab2c2c2018-03-29 12:48:47 +0430253 print (" * %s" % (l.strip()))
254print (" */")
255print ()
256print ("#ifndef HB_OT_SHAPE_COMPLEX_ARABIC_TABLE_HH")
257print ("#define HB_OT_SHAPE_COMPLEX_ARABIC_TABLE_HH")
258print ()
Behdad Esfahbod14d78412010-11-17 16:52:58 -0500259
Behdad Esfahbod3f5327a2014-06-20 16:17:42 -0400260read_blocks (files[2])
Behdad Esfahbodae4a2b92012-04-10 16:25:08 -0400261print_joining_table (files[0])
262print_shaping_table (files[1])
Behdad Esfahbodd606daa2011-09-20 14:34:06 -0400263
Ebrahim Byagowicab2c2c2018-03-29 12:48:47 +0430264print ()
265print ("#endif /* HB_OT_SHAPE_COMPLEX_ARABIC_TABLE_HH */")
266print ()
267print ("/* == End of generated table == */")