blob: 06817255c92c5dfd134e5390ef3f63eb234c83b4 [file] [log] [blame]
Behdad Esfahbode2c95112015-07-20 11:32:48 +01001#!/usr/bin/python
2
3import sys
4
Behdad Esfahbod20e246e2015-07-20 15:56:19 +01005if len (sys.argv) != 5:
6 print >>sys.stderr, "usage: ./gen-use-table.py IndicSyllabicCategory.txt IndicPositionalCategory.txt UnicodeData.txt Blocks.txt"
Behdad Esfahbode2c95112015-07-20 11:32:48 +01007 sys.exit (1)
8
9BLACKLISTED_BLOCKS = ["Thai", "Lao", "Tibetan"]
10
11files = [file (x) for x in sys.argv[1:]]
12
Behdad Esfahbod20e246e2015-07-20 15:56:19 +010013headers = [[f.readline () for i in range (2)] for j,f in enumerate(files) if j != 2]
14headers.append (["UnicodeData.txt does not have a header."])
Behdad Esfahbode2c95112015-07-20 11:32:48 +010015
16data = [{} for f in files]
17values = [{} for f in files]
18for i, f in enumerate (files):
19 for line in f:
20
21 j = line.find ('#')
22 if j >= 0:
23 line = line[:j]
24
25 fields = [x.strip () for x in line.split (';')]
26 if len (fields) == 1:
27 continue
28
29 uu = fields[0].split ('..')
30 start = int (uu[0], 16)
31 if len (uu) == 1:
32 end = start
33 else:
34 end = int (uu[1], 16)
35
Behdad Esfahbod20e246e2015-07-20 15:56:19 +010036 t = fields[1 if i != 2 else 2]
Behdad Esfahbode2c95112015-07-20 11:32:48 +010037
38 for u in range (start, end + 1):
39 data[i][u] = t
40 values[i][t] = values[i].get (t, 0) + end - start + 1
41
Behdad Esfahbod20e246e2015-07-20 15:56:19 +010042defaults = ('Other', 'Not_Applicable', 'Cn', 'No_Block')
Behdad Esfahbodba728012015-07-21 11:57:23 +010043
Behdad Esfahbod2d4b62e2015-07-21 16:46:37 +010044# TODO Characters that are not in Unicode Indic files, but used in USE
Behdad Esfahbodba728012015-07-21 11:57:23 +010045data[0][0x034F] = defaults[0]
Behdad Esfahbod2d4b62e2015-07-21 16:46:37 +010046data[0][0x2060] = defaults[0]
David Corbett87f0ad12017-11-02 10:59:25 -040047data[0][0x20F0] = defaults[0]
Behdad Esfahbod2d4b62e2015-07-21 16:46:37 +010048for u in range (0xFE00, 0xFE0F + 1):
49 data[0][u] = defaults[0]
Behdad Esfahbodba728012015-07-21 11:57:23 +010050
51# Merge data into one dict:
Behdad Esfahbode2c95112015-07-20 11:32:48 +010052for i,v in enumerate (defaults):
53 values[i][v] = values[i].get (v, 0) + 1
54combined = {}
55for i,d in enumerate (data):
56 for u,v in d.items ():
Behdad Esfahbod20e246e2015-07-20 15:56:19 +010057 if i >= 2 and not u in combined:
Behdad Esfahbode2c95112015-07-20 11:32:48 +010058 continue
59 if not u in combined:
60 combined[u] = list (defaults)
61 combined[u][i] = v
Behdad Esfahbod20e246e2015-07-20 15:56:19 +010062combined = {k:v for k,v in combined.items() if v[3] not in BLACKLISTED_BLOCKS}
Behdad Esfahbode2c95112015-07-20 11:32:48 +010063data = combined
64del combined
65num = len (data)
66
Behdad Esfahbod20e246e2015-07-20 15:56:19 +010067
68property_names = [
69 # General_Category
70 'Cc', 'Cf', 'Cn', 'Co', 'Cs', 'Ll', 'Lm', 'Lo', 'Lt', 'Lu', 'Mc',
71 'Me', 'Mn', 'Nd', 'Nl', 'No', 'Pc', 'Pd', 'Pe', 'Pf', 'Pi', 'Po',
72 'Ps', 'Sc', 'Sk', 'Sm', 'So', 'Zl', 'Zp', 'Zs',
73 # Indic_Syllabic_Category
Behdad Esfahbodad725552015-07-20 17:00:06 +010074 'Other',
Behdad Esfahbod20e246e2015-07-20 15:56:19 +010075 'Bindu',
76 'Visarga',
77 'Avagraha',
78 'Nukta',
79 'Virama',
80 'Pure_Killer',
81 'Invisible_Stacker',
82 'Vowel_Independent',
83 'Vowel_Dependent',
84 'Vowel',
85 'Consonant_Placeholder',
86 'Consonant',
87 'Consonant_Dead',
88 'Consonant_With_Stacker',
89 'Consonant_Prefixed',
90 'Consonant_Preceding_Repha',
91 'Consonant_Succeeding_Repha',
92 'Consonant_Subjoined',
93 'Consonant_Medial',
94 'Consonant_Final',
95 'Consonant_Head_Letter',
96 'Modifying_Letter',
97 'Tone_Letter',
98 'Tone_Mark',
99 'Gemination_Mark',
100 'Cantillation_Mark',
101 'Register_Shifter',
102 'Syllable_Modifier',
103 'Consonant_Killer',
104 'Non_Joiner',
105 'Joiner',
106 'Number_Joiner',
107 'Number',
108 'Brahmi_Joining_Number',
109 # Indic_Positional_Category
Behdad Esfahbod44910ce2015-07-20 18:01:10 +0100110 'Not_Applicable',
Behdad Esfahbod20e246e2015-07-20 15:56:19 +0100111 'Right',
112 'Left',
113 'Visual_Order_Left',
114 'Left_And_Right',
115 'Top',
116 'Bottom',
117 'Top_And_Bottom',
118 'Top_And_Right',
119 'Top_And_Left',
120 'Top_And_Left_And_Right',
Behdad Esfahbodea535a12017-10-02 17:02:39 +0200121 'Bottom_And_Left',
Behdad Esfahbod20e246e2015-07-20 15:56:19 +0100122 'Bottom_And_Right',
123 'Top_And_Bottom_And_Right',
124 'Overstruck',
125]
126
127class PropertyValue(object):
128 def __init__(self, name_):
129 self.name = name_
Behdad Esfahbodad725552015-07-20 17:00:06 +0100130 def __str__(self):
131 return self.name
132 def __eq__(self, other):
Behdad Esfahbod44910ce2015-07-20 18:01:10 +0100133 return self.name == (other if isinstance(other, basestring) else other.name)
Behdad Esfahbodad725552015-07-20 17:00:06 +0100134 def __ne__(self, other):
135 return not (self == other)
Behdad Esfahbod20e246e2015-07-20 15:56:19 +0100136
137property_values = {}
138
139for name in property_names:
140 value = PropertyValue(name)
141 assert value not in property_values
142 assert value not in globals()
143 property_values[name] = value
144globals().update(property_values)
145
146
147def is_BASE(U, UISC, UGC):
Behdad Esfahbodad725552015-07-20 17:00:06 +0100148 return (UISC in [Number, Consonant, Consonant_Head_Letter,
Behdad Esfahbod9b6312f2016-05-06 17:41:49 +0100149 #SPEC-DRAFT Consonant_Placeholder,
150 Tone_Letter,
151 Vowel_Independent #SPEC-DRAFT
152 ] or
Behdad Esfahbod20e246e2015-07-20 15:56:19 +0100153 (UGC == Lo and UISC in [Avagraha, Bindu, Consonant_Final, Consonant_Medial,
154 Consonant_Subjoined, Vowel, Vowel_Dependent]))
Behdad Esfahbod20e246e2015-07-20 15:56:19 +0100155def is_BASE_IND(U, UISC, UGC):
Behdad Esfahbod9b6312f2016-05-06 17:41:49 +0100156 #SPEC-DRAFT return (UISC in [Consonant_Dead, Modifying_Letter] or UGC == Po)
Behdad Esfahbodad725552015-07-20 17:00:06 +0100157 return (UISC in [Consonant_Dead, Modifying_Letter] or
Behdad Esfahbodea535a12017-10-02 17:02:39 +0200158 (UGC == Po and not U in [0x104E, 0x2022, 0x11A3F, 0x11A45]) or
Behdad Esfahbod9b6312f2016-05-06 17:41:49 +0100159 False # SPEC-DRAFT-OUTDATED! U == 0x002D
160 )
Behdad Esfahbod20e246e2015-07-20 15:56:19 +0100161def is_BASE_NUM(U, UISC, UGC):
162 return UISC == Brahmi_Joining_Number
163def is_BASE_OTHER(U, UISC, UGC):
Behdad Esfahbod9b6312f2016-05-06 17:41:49 +0100164 if UISC == Consonant_Placeholder: return True #SPEC-DRAFT
165 #SPEC-DRAFT return U in [0x00A0, 0x00D7, 0x2015, 0x2022, 0x25CC, 0x25FB, 0x25FC, 0x25FD, 0x25FE]
166 return U in [0x2015, 0x2022, 0x25FB, 0x25FC, 0x25FD, 0x25FE]
Behdad Esfahbod20e246e2015-07-20 15:56:19 +0100167def is_CGJ(U, UISC, UGC):
168 return U == 0x034F
169def is_CONS_FINAL(U, UISC, UGC):
170 return ((UISC == Consonant_Final and UGC != Lo) or
171 UISC == Consonant_Succeeding_Repha)
172def is_CONS_FINAL_MOD(U, UISC, UGC):
Behdad Esfahbod9b6312f2016-05-06 17:41:49 +0100173 #SPEC-DRAFT return UISC in [Consonant_Final_Modifier, Syllable_Modifier]
Behdad Esfahbodad725552015-07-20 17:00:06 +0100174 return UISC == Syllable_Modifier
Behdad Esfahbod20e246e2015-07-20 15:56:19 +0100175def is_CONS_MED(U, UISC, UGC):
176 return UISC == Consonant_Medial and UGC != Lo
177def is_CONS_MOD(U, UISC, UGC):
178 return UISC in [Nukta, Gemination_Mark, Consonant_Killer]
179def is_CONS_SUB(U, UISC, UGC):
Behdad Esfahbod9b6312f2016-05-06 17:41:49 +0100180 #SPEC-DRAFT return UISC == Consonant_Subjoined
Behdad Esfahbodad725552015-07-20 17:00:06 +0100181 return UISC == Consonant_Subjoined and UGC != Lo
Behdad Esfahbode07669f2017-10-03 14:57:14 +0200182def is_CONS_WITH_STACKER(U, UISC, UGC):
183 return UISC == Consonant_With_Stacker
Behdad Esfahbod20e246e2015-07-20 15:56:19 +0100184def is_HALANT(U, UISC, UGC):
185 return UISC in [Virama, Invisible_Stacker]
186def is_HALANT_NUM(U, UISC, UGC):
187 return UISC == Number_Joiner
188def is_ZWNJ(U, UISC, UGC):
189 return UISC == Non_Joiner
190def is_ZWJ(U, UISC, UGC):
191 return UISC == Joiner
192def is_Word_Joiner(U, UISC, UGC):
193 return U == 0x2060
194def is_OTHER(U, UISC, UGC):
Behdad Esfahbodad725552015-07-20 17:00:06 +0100195 #SPEC-OUTDATED return UGC == Zs # or any other SCRIPT_COMMON characters
Behdad Esfahbodba728012015-07-21 11:57:23 +0100196 return (UISC == Other
197 and not is_SYM_MOD(U, UISC, UGC)
198 and not is_CGJ(U, UISC, UGC)
Behdad Esfahbod2d4b62e2015-07-21 16:46:37 +0100199 and not is_Word_Joiner(U, UISC, UGC)
200 and not is_VARIATION_SELECTOR(U, UISC, UGC)
Behdad Esfahbodba728012015-07-21 11:57:23 +0100201 )
Behdad Esfahbod20e246e2015-07-20 15:56:19 +0100202def is_Reserved(U, UISC, UGC):
203 return UGC == 'Cn'
204def is_REPHA(U, UISC, UGC):
Behdad Esfahbode07669f2017-10-03 14:57:14 +0200205 return UISC in [Consonant_Preceding_Repha, Consonant_Prefixed]
Behdad Esfahbod20e246e2015-07-20 15:56:19 +0100206def is_SYM(U, UISC, UGC):
Behdad Esfahbod9b6312f2016-05-06 17:41:49 +0100207 if U == 0x25CC: return False #SPEC-DRAFT
208 #SPEC-DRAFT return UGC in [So, Sc] or UISC == Symbol_Letter
Behdad Esfahbodad725552015-07-20 17:00:06 +0100209 return UGC in [So, Sc]
Behdad Esfahbod20e246e2015-07-20 15:56:19 +0100210def is_SYM_MOD(U, UISC, UGC):
211 return U in [0x1B6B, 0x1B6C, 0x1B6D, 0x1B6E, 0x1B6F, 0x1B70, 0x1B71, 0x1B72, 0x1B73]
212def is_VARIATION_SELECTOR(U, UISC, UGC):
213 return 0xFE00 <= U <= 0xFE0F
214def is_VOWEL(U, UISC, UGC):
Behdad Esfahbod216b0032017-07-14 16:38:51 +0100215 # https://github.com/roozbehp/unicode-data/issues/6
Behdad Esfahbod20e246e2015-07-20 15:56:19 +0100216 return (UISC == Pure_Killer or
Behdad Esfahbod216b0032017-07-14 16:38:51 +0100217 (UGC != Lo and UISC in [Vowel, Vowel_Dependent] and U not in [0xAA29]))
Behdad Esfahbod20e246e2015-07-20 15:56:19 +0100218def is_VOWEL_MOD(U, UISC, UGC):
Behdad Esfahbod216b0032017-07-14 16:38:51 +0100219 # https://github.com/roozbehp/unicode-data/issues/6
Behdad Esfahbod20e246e2015-07-20 15:56:19 +0100220 return (UISC in [Tone_Mark, Cantillation_Mark, Register_Shifter, Visarga] or
Behdad Esfahbod216b0032017-07-14 16:38:51 +0100221 (UGC != Lo and (UISC == Bindu or U in [0xAA29])))
Behdad Esfahbod20e246e2015-07-20 15:56:19 +0100222
223use_mapping = {
224 'B': is_BASE,
Behdad Esfahbod20e246e2015-07-20 15:56:19 +0100225 'IND': is_BASE_IND,
226 'N': is_BASE_NUM,
227 'GB': is_BASE_OTHER,
228 'CGJ': is_CGJ,
229 'F': is_CONS_FINAL,
230 'FM': is_CONS_FINAL_MOD,
231 'M': is_CONS_MED,
232 'CM': is_CONS_MOD,
233 'SUB': is_CONS_SUB,
Behdad Esfahbode07669f2017-10-03 14:57:14 +0200234 'CS': is_CONS_WITH_STACKER,
Behdad Esfahbod20e246e2015-07-20 15:56:19 +0100235 'H': is_HALANT,
236 'HN': is_HALANT_NUM,
237 'ZWNJ': is_ZWNJ,
238 'ZWJ': is_ZWJ,
239 'WJ': is_Word_Joiner,
240 'O': is_OTHER,
241 'Rsv': is_Reserved,
242 'R': is_REPHA,
243 'S': is_SYM,
244 'SM': is_SYM_MOD,
245 'VS': is_VARIATION_SELECTOR,
246 'V': is_VOWEL,
247 'VM': is_VOWEL_MOD,
248}
249
Behdad Esfahbod44910ce2015-07-20 18:01:10 +0100250use_positions = {
251 'F': {
252 'Abv': [Top],
253 'Blw': [Bottom],
254 'Pst': [Right],
255 },
256 'M': {
257 'Abv': [Top],
Behdad Esfahbodea535a12017-10-02 17:02:39 +0200258 'Blw': [Bottom, Bottom_And_Left],
Behdad Esfahbod44910ce2015-07-20 18:01:10 +0100259 'Pst': [Right],
260 'Pre': [Left],
261 },
262 'CM': {
263 'Abv': [Top],
264 'Blw': [Bottom],
265 },
266 'V': {
267 'Abv': [Top, Top_And_Bottom, Top_And_Bottom_And_Right, Top_And_Right],
268 'Blw': [Bottom, Overstruck, Bottom_And_Right],
269 'Pst': [Right],
270 'Pre': [Left, Top_And_Left, Top_And_Left_And_Right, Left_And_Right],
271 },
272 'VM': {
273 'Abv': [Top],
274 'Blw': [Bottom, Overstruck],
275 'Pst': [Right],
276 'Pre': [Left],
277 },
278 'SM': {
279 'Abv': [Top],
280 'Blw': [Bottom],
281 },
282 'H': None,
283 'B': None,
284 'FM': None,
285 'SUB': None,
286}
287
Behdad Esfahbodad725552015-07-20 17:00:06 +0100288def map_to_use(data):
289 out = {}
290 items = use_mapping.items()
291 for U,(UISC,UIPC,UGC,UBlock) in data.items():
Behdad Esfahbod44910ce2015-07-20 18:01:10 +0100292
293 # Resolve Indic_Syllabic_Category
294
295 # TODO: These don't have UISC assigned in Unicode 8.0, but
296 # have UIPC
297 if U == 0x17DD: UISC = Vowel_Dependent
298 if 0x1CE2 <= U <= 0x1CE8: UISC = Cantillation_Mark
299
David Corbett7bfdf1a2017-11-22 16:32:52 -0500300 # TODO: https://github.com/harfbuzz/harfbuzz/pull/627
301 if 0x1BF2 <= U <= 0x1BF3: UISC = Nukta; UIPC = Bottom
302
Behdad Esfahbod44910ce2015-07-20 18:01:10 +0100303 # TODO: U+1CED should only be allowed after some of
304 # the nasalization marks, maybe only for U+1CE9..U+1CF1.
305 if U == 0x1CED: UISC = Tone_Mark
306
ebraminio7c6937e2017-11-20 14:49:22 -0500307 # TODO: https://github.com/harfbuzz/harfbuzz/issues/525
Behdad Esfahbod5680ef82017-10-02 18:20:51 +0200308 if U == 0x1A7F: UISC = Consonant_Final; UIPC = Bottom
309
ebraminio7c6937e2017-11-20 14:49:22 -0500310 # TODO: https://github.com/harfbuzz/harfbuzz/pull/609
David Corbett87f0ad12017-11-02 10:59:25 -0400311 if U == 0x20F0: UISC = Cantillation_Mark; UIPC = Top
312
David Corbett9f259762017-11-21 14:16:10 -0500313 # TODO: https://github.com/harfbuzz/harfbuzz/pull/626
314 if U == 0xA8B4: UISC = Consonant_Medial
315
Behdad Esfahbod29c244a2017-10-02 16:36:21 +0200316 values = [k for k,v in items if v(U,UISC,UGC)]
Behdad Esfahbodad725552015-07-20 17:00:06 +0100317 assert len(values) == 1, "%s %s %s %s" % (hex(U), UISC, UGC, values)
Behdad Esfahbod44910ce2015-07-20 18:01:10 +0100318 USE = values[0]
319
320 # Resolve Indic_Positional_Category
321
322 # TODO: Not in Unicode 8.0 yet, but in spec.
323 if U == 0x1B6C: UIPC = Bottom
324
325 # TODO: These should die, but have UIPC in Unicode 8.0
326 if U in [0x953, 0x954]: UIPC = Not_Applicable
327
328 # TODO: In USE's override list but not in Unicode 8.0
329 if U == 0x103C: UIPC = Left
330
331 # TODO: These are not in USE's override list that we have, nor are they in Unicode 8.0
332 if 0xA926 <= U <= 0xA92A: UIPC = Top
333 if U == 0x111CA: UIPC = Bottom
334 if U == 0x11300: UIPC = Top
335 if U == 0x1133C: UIPC = Bottom
336 if U == 0x1171E: UIPC = Left # Correct?!
337 if 0x1CF2 <= U <= 0x1CF3: UIPC = Right
338 if 0x1CF8 <= U <= 0x1CF9: UIPC = Top
339
340 assert (UIPC in [Not_Applicable, Visual_Order_Left] or
341 USE in use_positions), "%s %s %s %s %s" % (hex(U), UIPC, USE, UISC, UGC)
342
343 pos_mapping = use_positions.get(USE, None)
344 if pos_mapping:
345 values = [k for k,v in pos_mapping.items() if v and UIPC in v]
346 assert len(values) == 1, "%s %s %s %s %s %s" % (hex(U), UIPC, USE, UISC, UGC, values)
347 USE = USE + values[0]
348
349 out[U] = (USE, UBlock)
Behdad Esfahbodad725552015-07-20 17:00:06 +0100350 return out
351
352defaults = ('O', 'No_Block')
353data = map_to_use(data)
Behdad Esfahbod20e246e2015-07-20 15:56:19 +0100354
Behdad Esfahbode2c95112015-07-20 11:32:48 +0100355print "/* == Start of generated table == */"
356print "/*"
357print " * The following table is generated by running:"
358print " *"
Behdad Esfahbod20e246e2015-07-20 15:56:19 +0100359print " * ./gen-use-table.py IndicSyllabicCategory.txt IndicPositionalCategory.txt UnicodeData.txt Blocks.txt"
Behdad Esfahbode2c95112015-07-20 11:32:48 +0100360print " *"
361print " * on files with these headers:"
362print " *"
363for h in headers:
364 for l in h:
365 print " * %s" % (l.strip())
366print " */"
367print
Behdad Esfahbodc48ff282015-07-20 11:46:17 +0100368print '#include "hb-ot-shape-complex-use-private.hh"'
Behdad Esfahbode2c95112015-07-20 11:32:48 +0100369print
370
Behdad Esfahbode2c95112015-07-20 11:32:48 +0100371total = 0
372used = 0
373last_block = None
374def print_block (block, start, end, data):
375 global total, used, last_block
376 if block and block != last_block:
377 print
378 print
379 print " /* %s */" % block
Behdad Esfahbodad725552015-07-20 17:00:06 +0100380 if start % 16:
Behdad Esfahbod44910ce2015-07-20 18:01:10 +0100381 print ' ' * (20 + (start % 16 * 6)),
Behdad Esfahbode2c95112015-07-20 11:32:48 +0100382 num = 0
383 assert start % 8 == 0
384 assert (end+1) % 8 == 0
385 for u in range (start, end+1):
Behdad Esfahbodad725552015-07-20 17:00:06 +0100386 if u % 16 == 0:
Behdad Esfahbode2c95112015-07-20 11:32:48 +0100387 print
388 print " /* %04X */" % u,
389 if u in data:
390 num += 1
391 d = data.get (u, defaults)
Behdad Esfahbod44910ce2015-07-20 18:01:10 +0100392 sys.stdout.write ("%6s," % d[0])
Behdad Esfahbode2c95112015-07-20 11:32:48 +0100393
394 total += end - start + 1
395 used += num
396 if block:
397 last_block = block
398
399uu = data.keys ()
400uu.sort ()
401
402last = -100000
403num = 0
404offset = 0
405starts = []
406ends = []
Behdad Esfahbodad725552015-07-20 17:00:06 +0100407for k,v in sorted(use_mapping.items()):
Behdad Esfahbod44910ce2015-07-20 18:01:10 +0100408 if k in use_positions and use_positions[k]: continue
Behdad Esfahbodad725552015-07-20 17:00:06 +0100409 print "#define %s USE_%s /* %s */" % (k, k, v.__name__[3:])
Behdad Esfahbod44910ce2015-07-20 18:01:10 +0100410for k,v in sorted(use_positions.items()):
411 if not v: continue
412 for suf in v.keys():
413 tag = k + suf
414 print "#define %s USE_%s" % (tag, tag)
Behdad Esfahbodad725552015-07-20 17:00:06 +0100415print ""
Behdad Esfahbodc48ff282015-07-20 11:46:17 +0100416print "static const USE_TABLE_ELEMENT_TYPE use_table[] = {"
Behdad Esfahbode2c95112015-07-20 11:32:48 +0100417for u in uu:
418 if u <= last:
419 continue
Behdad Esfahbodad725552015-07-20 17:00:06 +0100420 block = data[u][1]
Behdad Esfahbode2c95112015-07-20 11:32:48 +0100421
422 start = u//8*8
423 end = start+1
Behdad Esfahbodad725552015-07-20 17:00:06 +0100424 while end in uu and block == data[end][1]:
Behdad Esfahbode2c95112015-07-20 11:32:48 +0100425 end += 1
426 end = (end-1)//8*8 + 7
427
428 if start != last + 1:
429 if start - last <= 1+16*3:
430 print_block (None, last+1, start-1, data)
431 last = start-1
432 else:
433 if last >= 0:
434 ends.append (last + 1)
435 offset += ends[-1] - starts[-1]
436 print
437 print
Behdad Esfahbodc48ff282015-07-20 11:46:17 +0100438 print "#define use_offset_0x%04xu %d" % (start, offset)
Behdad Esfahbode2c95112015-07-20 11:32:48 +0100439 starts.append (start)
440
441 print_block (block, start, end, data)
442 last = end
443ends.append (last + 1)
444offset += ends[-1] - starts[-1]
445print
446print
447occupancy = used * 100. / total
448page_bits = 12
449print "}; /* Table items: %d; occupancy: %d%% */" % (offset, occupancy)
450print
Behdad Esfahbodc48ff282015-07-20 11:46:17 +0100451print "USE_TABLE_ELEMENT_TYPE"
Behdad Esfahbod50780442018-02-13 21:46:28 -0800452print "hb_use_get_category (hb_codepoint_t u)"
Behdad Esfahbode2c95112015-07-20 11:32:48 +0100453print "{"
454print " switch (u >> %d)" % page_bits
455print " {"
Behdad Esfahbodf8daeef2018-01-03 14:27:34 +0000456pages = set([u>>page_bits for u in starts+ends])
Behdad Esfahbode2c95112015-07-20 11:32:48 +0100457for p in sorted(pages):
458 print " case 0x%0Xu:" % p
459 for (start,end) in zip (starts, ends):
460 if p not in [start>>page_bits, end>>page_bits]: continue
Behdad Esfahbodc48ff282015-07-20 11:46:17 +0100461 offset = "use_offset_0x%04xu" % start
Behdad Esfahbod216b0032017-07-14 16:38:51 +0100462 print " if (hb_in_range<hb_codepoint_t> (u, 0x%04Xu, 0x%04Xu)) return use_table[u - 0x%04Xu + %s];" % (start, end-1, start, offset)
Behdad Esfahbode2c95112015-07-20 11:32:48 +0100463 print " break;"
464 print ""
465print " default:"
466print " break;"
467print " }"
Behdad Esfahbod44910ce2015-07-20 18:01:10 +0100468print " return USE_O;"
Behdad Esfahbode2c95112015-07-20 11:32:48 +0100469print "}"
Behdad Esfahbode2c95112015-07-20 11:32:48 +0100470print
Behdad Esfahbodad717822015-07-21 16:43:27 +0100471for k in sorted(use_mapping.keys()):
472 if k in use_positions and use_positions[k]: continue
473 print "#undef %s" % k
474for k,v in sorted(use_positions.items()):
475 if not v: continue
476 for suf in v.keys():
477 tag = k + suf
478 print "#undef %s" % tag
479print
Behdad Esfahbode2c95112015-07-20 11:32:48 +0100480print "/* == End of generated table == */"
481
Behdad Esfahbodad725552015-07-20 17:00:06 +0100482# Maintain at least 50% occupancy in the table */
483if occupancy < 50:
Behdad Esfahbode2c95112015-07-20 11:32:48 +0100484 raise Exception ("Table too sparse, please investigate: ", occupancy)