blob: 5726002a149e0aee8f3b757258d9ea495cf209aa [file] [log] [blame]
Ebrahim Byagowia48dd6e2018-03-28 19:08:19 +04301#!/usr/bin/env python
Behdad Esfahbode2c95112015-07-20 11:32:48 +01002
Ebrahim Byagowicab2c2c2018-03-29 12:48:47 +04303from __future__ import print_function, division, absolute_import
4
Behdad Esfahbode2c95112015-07-20 11:32:48 +01005import sys
6
Behdad Esfahbod20e246e2015-07-20 15:56:19 +01007if len (sys.argv) != 5:
Ebrahim Byagowia48dd6e2018-03-28 19:08:19 +04308 print ("usage: ./gen-use-table.py IndicSyllabicCategory.txt IndicPositionalCategory.txt UnicodeData.txt Blocks.txt", file=sys.stderr)
Behdad Esfahbode2c95112015-07-20 11:32:48 +01009 sys.exit (1)
10
11BLACKLISTED_BLOCKS = ["Thai", "Lao", "Tibetan"]
12
13files = [file (x) for x in sys.argv[1:]]
14
Behdad Esfahbod20e246e2015-07-20 15:56:19 +010015headers = [[f.readline () for i in range (2)] for j,f in enumerate(files) if j != 2]
16headers.append (["UnicodeData.txt does not have a header."])
Behdad Esfahbode2c95112015-07-20 11:32:48 +010017
18data = [{} for f in files]
19values = [{} for f in files]
20for i, f in enumerate (files):
21 for line in f:
22
23 j = line.find ('#')
24 if j >= 0:
25 line = line[:j]
26
27 fields = [x.strip () for x in line.split (';')]
28 if len (fields) == 1:
29 continue
30
31 uu = fields[0].split ('..')
32 start = int (uu[0], 16)
33 if len (uu) == 1:
34 end = start
35 else:
36 end = int (uu[1], 16)
37
Behdad Esfahbod20e246e2015-07-20 15:56:19 +010038 t = fields[1 if i != 2 else 2]
Behdad Esfahbode2c95112015-07-20 11:32:48 +010039
40 for u in range (start, end + 1):
41 data[i][u] = t
42 values[i][t] = values[i].get (t, 0) + end - start + 1
43
Behdad Esfahbod20e246e2015-07-20 15:56:19 +010044defaults = ('Other', 'Not_Applicable', 'Cn', 'No_Block')
Behdad Esfahbodba728012015-07-21 11:57:23 +010045
Behdad Esfahbod2d4b62e2015-07-21 16:46:37 +010046# TODO Characters that are not in Unicode Indic files, but used in USE
Behdad Esfahbodba728012015-07-21 11:57:23 +010047data[0][0x034F] = defaults[0]
Behdad Esfahbod2d4b62e2015-07-21 16:46:37 +010048data[0][0x2060] = defaults[0]
David Corbett87f0ad12017-11-02 10:59:25 -040049data[0][0x20F0] = defaults[0]
Behdad Esfahbod2d4b62e2015-07-21 16:46:37 +010050for u in range (0xFE00, 0xFE0F + 1):
51 data[0][u] = defaults[0]
Behdad Esfahbodba728012015-07-21 11:57:23 +010052
53# Merge data into one dict:
Behdad Esfahbode2c95112015-07-20 11:32:48 +010054for i,v in enumerate (defaults):
55 values[i][v] = values[i].get (v, 0) + 1
56combined = {}
57for i,d in enumerate (data):
58 for u,v in d.items ():
Behdad Esfahbod20e246e2015-07-20 15:56:19 +010059 if i >= 2 and not u in combined:
Behdad Esfahbode2c95112015-07-20 11:32:48 +010060 continue
61 if not u in combined:
62 combined[u] = list (defaults)
63 combined[u][i] = v
Behdad Esfahbod20e246e2015-07-20 15:56:19 +010064combined = {k:v for k,v in combined.items() if v[3] not in BLACKLISTED_BLOCKS}
Behdad Esfahbode2c95112015-07-20 11:32:48 +010065data = combined
66del combined
67num = len (data)
68
Behdad Esfahbod20e246e2015-07-20 15:56:19 +010069
70property_names = [
71 # General_Category
72 'Cc', 'Cf', 'Cn', 'Co', 'Cs', 'Ll', 'Lm', 'Lo', 'Lt', 'Lu', 'Mc',
73 'Me', 'Mn', 'Nd', 'Nl', 'No', 'Pc', 'Pd', 'Pe', 'Pf', 'Pi', 'Po',
74 'Ps', 'Sc', 'Sk', 'Sm', 'So', 'Zl', 'Zp', 'Zs',
75 # Indic_Syllabic_Category
Behdad Esfahbodad725552015-07-20 17:00:06 +010076 'Other',
Behdad Esfahbod20e246e2015-07-20 15:56:19 +010077 'Bindu',
78 'Visarga',
79 'Avagraha',
80 'Nukta',
81 'Virama',
82 'Pure_Killer',
83 'Invisible_Stacker',
84 'Vowel_Independent',
85 'Vowel_Dependent',
86 'Vowel',
87 'Consonant_Placeholder',
88 'Consonant',
89 'Consonant_Dead',
90 'Consonant_With_Stacker',
91 'Consonant_Prefixed',
92 'Consonant_Preceding_Repha',
93 'Consonant_Succeeding_Repha',
94 'Consonant_Subjoined',
95 'Consonant_Medial',
96 'Consonant_Final',
97 'Consonant_Head_Letter',
98 'Modifying_Letter',
99 'Tone_Letter',
100 'Tone_Mark',
101 'Gemination_Mark',
102 'Cantillation_Mark',
103 'Register_Shifter',
104 'Syllable_Modifier',
105 'Consonant_Killer',
106 'Non_Joiner',
107 'Joiner',
108 'Number_Joiner',
109 'Number',
110 'Brahmi_Joining_Number',
111 # Indic_Positional_Category
Behdad Esfahbod44910ce2015-07-20 18:01:10 +0100112 'Not_Applicable',
Behdad Esfahbod20e246e2015-07-20 15:56:19 +0100113 'Right',
114 'Left',
115 'Visual_Order_Left',
116 'Left_And_Right',
117 'Top',
118 'Bottom',
119 'Top_And_Bottom',
120 'Top_And_Right',
121 'Top_And_Left',
122 'Top_And_Left_And_Right',
Behdad Esfahbodea535a12017-10-02 17:02:39 +0200123 'Bottom_And_Left',
Behdad Esfahbod20e246e2015-07-20 15:56:19 +0100124 'Bottom_And_Right',
125 'Top_And_Bottom_And_Right',
126 'Overstruck',
127]
128
129class PropertyValue(object):
130 def __init__(self, name_):
131 self.name = name_
Behdad Esfahbodad725552015-07-20 17:00:06 +0100132 def __str__(self):
133 return self.name
134 def __eq__(self, other):
Behdad Esfahbod44910ce2015-07-20 18:01:10 +0100135 return self.name == (other if isinstance(other, basestring) else other.name)
Behdad Esfahbodad725552015-07-20 17:00:06 +0100136 def __ne__(self, other):
137 return not (self == other)
Behdad Esfahbod20e246e2015-07-20 15:56:19 +0100138
139property_values = {}
140
141for name in property_names:
142 value = PropertyValue(name)
143 assert value not in property_values
144 assert value not in globals()
145 property_values[name] = value
146globals().update(property_values)
147
148
149def is_BASE(U, UISC, UGC):
Behdad Esfahbodad725552015-07-20 17:00:06 +0100150 return (UISC in [Number, Consonant, Consonant_Head_Letter,
Behdad Esfahbod9b6312f2016-05-06 17:41:49 +0100151 #SPEC-DRAFT Consonant_Placeholder,
152 Tone_Letter,
153 Vowel_Independent #SPEC-DRAFT
154 ] or
Behdad Esfahbod20e246e2015-07-20 15:56:19 +0100155 (UGC == Lo and UISC in [Avagraha, Bindu, Consonant_Final, Consonant_Medial,
156 Consonant_Subjoined, Vowel, Vowel_Dependent]))
Behdad Esfahbod20e246e2015-07-20 15:56:19 +0100157def is_BASE_IND(U, UISC, UGC):
Behdad Esfahbod9b6312f2016-05-06 17:41:49 +0100158 #SPEC-DRAFT return (UISC in [Consonant_Dead, Modifying_Letter] or UGC == Po)
Behdad Esfahbodad725552015-07-20 17:00:06 +0100159 return (UISC in [Consonant_Dead, Modifying_Letter] or
Behdad Esfahbodea535a12017-10-02 17:02:39 +0200160 (UGC == Po and not U in [0x104E, 0x2022, 0x11A3F, 0x11A45]) or
Behdad Esfahbod9b6312f2016-05-06 17:41:49 +0100161 False # SPEC-DRAFT-OUTDATED! U == 0x002D
162 )
Behdad Esfahbod20e246e2015-07-20 15:56:19 +0100163def is_BASE_NUM(U, UISC, UGC):
164 return UISC == Brahmi_Joining_Number
165def is_BASE_OTHER(U, UISC, UGC):
Behdad Esfahbod9b6312f2016-05-06 17:41:49 +0100166 if UISC == Consonant_Placeholder: return True #SPEC-DRAFT
167 #SPEC-DRAFT return U in [0x00A0, 0x00D7, 0x2015, 0x2022, 0x25CC, 0x25FB, 0x25FC, 0x25FD, 0x25FE]
168 return U in [0x2015, 0x2022, 0x25FB, 0x25FC, 0x25FD, 0x25FE]
Behdad Esfahbod20e246e2015-07-20 15:56:19 +0100169def is_CGJ(U, UISC, UGC):
170 return U == 0x034F
171def is_CONS_FINAL(U, UISC, UGC):
172 return ((UISC == Consonant_Final and UGC != Lo) or
173 UISC == Consonant_Succeeding_Repha)
174def is_CONS_FINAL_MOD(U, UISC, UGC):
Behdad Esfahbod9b6312f2016-05-06 17:41:49 +0100175 #SPEC-DRAFT return UISC in [Consonant_Final_Modifier, Syllable_Modifier]
Behdad Esfahbodad725552015-07-20 17:00:06 +0100176 return UISC == Syllable_Modifier
Behdad Esfahbod20e246e2015-07-20 15:56:19 +0100177def is_CONS_MED(U, UISC, UGC):
178 return UISC == Consonant_Medial and UGC != Lo
179def is_CONS_MOD(U, UISC, UGC):
180 return UISC in [Nukta, Gemination_Mark, Consonant_Killer]
181def is_CONS_SUB(U, UISC, UGC):
Behdad Esfahbod9b6312f2016-05-06 17:41:49 +0100182 #SPEC-DRAFT return UISC == Consonant_Subjoined
Behdad Esfahbodad725552015-07-20 17:00:06 +0100183 return UISC == Consonant_Subjoined and UGC != Lo
Behdad Esfahbode07669f2017-10-03 14:57:14 +0200184def is_CONS_WITH_STACKER(U, UISC, UGC):
185 return UISC == Consonant_With_Stacker
Behdad Esfahbod20e246e2015-07-20 15:56:19 +0100186def is_HALANT(U, UISC, UGC):
187 return UISC in [Virama, Invisible_Stacker]
188def is_HALANT_NUM(U, UISC, UGC):
189 return UISC == Number_Joiner
190def is_ZWNJ(U, UISC, UGC):
191 return UISC == Non_Joiner
192def is_ZWJ(U, UISC, UGC):
193 return UISC == Joiner
194def is_Word_Joiner(U, UISC, UGC):
195 return U == 0x2060
196def is_OTHER(U, UISC, UGC):
Behdad Esfahbodad725552015-07-20 17:00:06 +0100197 #SPEC-OUTDATED return UGC == Zs # or any other SCRIPT_COMMON characters
Behdad Esfahbodba728012015-07-21 11:57:23 +0100198 return (UISC == Other
199 and not is_SYM_MOD(U, UISC, UGC)
200 and not is_CGJ(U, UISC, UGC)
Behdad Esfahbod2d4b62e2015-07-21 16:46:37 +0100201 and not is_Word_Joiner(U, UISC, UGC)
202 and not is_VARIATION_SELECTOR(U, UISC, UGC)
Behdad Esfahbodba728012015-07-21 11:57:23 +0100203 )
Behdad Esfahbod20e246e2015-07-20 15:56:19 +0100204def is_Reserved(U, UISC, UGC):
205 return UGC == 'Cn'
206def is_REPHA(U, UISC, UGC):
Behdad Esfahbode07669f2017-10-03 14:57:14 +0200207 return UISC in [Consonant_Preceding_Repha, Consonant_Prefixed]
Behdad Esfahbod20e246e2015-07-20 15:56:19 +0100208def is_SYM(U, UISC, UGC):
Behdad Esfahbod9b6312f2016-05-06 17:41:49 +0100209 if U == 0x25CC: return False #SPEC-DRAFT
210 #SPEC-DRAFT return UGC in [So, Sc] or UISC == Symbol_Letter
Behdad Esfahbodad725552015-07-20 17:00:06 +0100211 return UGC in [So, Sc]
Behdad Esfahbod20e246e2015-07-20 15:56:19 +0100212def is_SYM_MOD(U, UISC, UGC):
213 return U in [0x1B6B, 0x1B6C, 0x1B6D, 0x1B6E, 0x1B6F, 0x1B70, 0x1B71, 0x1B72, 0x1B73]
214def is_VARIATION_SELECTOR(U, UISC, UGC):
215 return 0xFE00 <= U <= 0xFE0F
216def is_VOWEL(U, UISC, UGC):
Behdad Esfahbod216b0032017-07-14 16:38:51 +0100217 # https://github.com/roozbehp/unicode-data/issues/6
Behdad Esfahbod20e246e2015-07-20 15:56:19 +0100218 return (UISC == Pure_Killer or
Behdad Esfahbod216b0032017-07-14 16:38:51 +0100219 (UGC != Lo and UISC in [Vowel, Vowel_Dependent] and U not in [0xAA29]))
Behdad Esfahbod20e246e2015-07-20 15:56:19 +0100220def is_VOWEL_MOD(U, UISC, UGC):
Behdad Esfahbod216b0032017-07-14 16:38:51 +0100221 # https://github.com/roozbehp/unicode-data/issues/6
Behdad Esfahbod20e246e2015-07-20 15:56:19 +0100222 return (UISC in [Tone_Mark, Cantillation_Mark, Register_Shifter, Visarga] or
Behdad Esfahbod216b0032017-07-14 16:38:51 +0100223 (UGC != Lo and (UISC == Bindu or U in [0xAA29])))
Behdad Esfahbod20e246e2015-07-20 15:56:19 +0100224
225use_mapping = {
226 'B': is_BASE,
Behdad Esfahbod20e246e2015-07-20 15:56:19 +0100227 'IND': is_BASE_IND,
228 'N': is_BASE_NUM,
229 'GB': is_BASE_OTHER,
230 'CGJ': is_CGJ,
231 'F': is_CONS_FINAL,
232 'FM': is_CONS_FINAL_MOD,
233 'M': is_CONS_MED,
234 'CM': is_CONS_MOD,
235 'SUB': is_CONS_SUB,
Behdad Esfahbode07669f2017-10-03 14:57:14 +0200236 'CS': is_CONS_WITH_STACKER,
Behdad Esfahbod20e246e2015-07-20 15:56:19 +0100237 'H': is_HALANT,
238 'HN': is_HALANT_NUM,
239 'ZWNJ': is_ZWNJ,
240 'ZWJ': is_ZWJ,
241 'WJ': is_Word_Joiner,
242 'O': is_OTHER,
243 'Rsv': is_Reserved,
244 'R': is_REPHA,
245 'S': is_SYM,
246 'SM': is_SYM_MOD,
247 'VS': is_VARIATION_SELECTOR,
248 'V': is_VOWEL,
249 'VM': is_VOWEL_MOD,
250}
251
Behdad Esfahbod44910ce2015-07-20 18:01:10 +0100252use_positions = {
253 'F': {
254 'Abv': [Top],
255 'Blw': [Bottom],
256 'Pst': [Right],
257 },
258 'M': {
259 'Abv': [Top],
Behdad Esfahbodea535a12017-10-02 17:02:39 +0200260 'Blw': [Bottom, Bottom_And_Left],
Behdad Esfahbod44910ce2015-07-20 18:01:10 +0100261 'Pst': [Right],
262 'Pre': [Left],
263 },
264 'CM': {
265 'Abv': [Top],
266 'Blw': [Bottom],
267 },
268 'V': {
269 'Abv': [Top, Top_And_Bottom, Top_And_Bottom_And_Right, Top_And_Right],
270 'Blw': [Bottom, Overstruck, Bottom_And_Right],
271 'Pst': [Right],
272 'Pre': [Left, Top_And_Left, Top_And_Left_And_Right, Left_And_Right],
273 },
274 'VM': {
275 'Abv': [Top],
276 'Blw': [Bottom, Overstruck],
277 'Pst': [Right],
278 'Pre': [Left],
279 },
280 'SM': {
281 'Abv': [Top],
282 'Blw': [Bottom],
283 },
284 'H': None,
285 'B': None,
286 'FM': None,
287 'SUB': None,
288}
289
Behdad Esfahbodad725552015-07-20 17:00:06 +0100290def map_to_use(data):
291 out = {}
292 items = use_mapping.items()
293 for U,(UISC,UIPC,UGC,UBlock) in data.items():
Behdad Esfahbod44910ce2015-07-20 18:01:10 +0100294
295 # Resolve Indic_Syllabic_Category
296
297 # TODO: These don't have UISC assigned in Unicode 8.0, but
298 # have UIPC
299 if U == 0x17DD: UISC = Vowel_Dependent
300 if 0x1CE2 <= U <= 0x1CE8: UISC = Cantillation_Mark
301
David Corbett7bfdf1a2017-11-22 16:32:52 -0500302 # TODO: https://github.com/harfbuzz/harfbuzz/pull/627
303 if 0x1BF2 <= U <= 0x1BF3: UISC = Nukta; UIPC = Bottom
304
Behdad Esfahbod44910ce2015-07-20 18:01:10 +0100305 # TODO: U+1CED should only be allowed after some of
306 # the nasalization marks, maybe only for U+1CE9..U+1CF1.
307 if U == 0x1CED: UISC = Tone_Mark
308
ebraminio7c6937e2017-11-20 14:49:22 -0500309 # TODO: https://github.com/harfbuzz/harfbuzz/issues/525
Behdad Esfahbod5680ef82017-10-02 18:20:51 +0200310 if U == 0x1A7F: UISC = Consonant_Final; UIPC = Bottom
311
ebraminio7c6937e2017-11-20 14:49:22 -0500312 # TODO: https://github.com/harfbuzz/harfbuzz/pull/609
David Corbett87f0ad12017-11-02 10:59:25 -0400313 if U == 0x20F0: UISC = Cantillation_Mark; UIPC = Top
314
David Corbett9f259762017-11-21 14:16:10 -0500315 # TODO: https://github.com/harfbuzz/harfbuzz/pull/626
316 if U == 0xA8B4: UISC = Consonant_Medial
317
Behdad Esfahbod29c244a2017-10-02 16:36:21 +0200318 values = [k for k,v in items if v(U,UISC,UGC)]
Behdad Esfahbodad725552015-07-20 17:00:06 +0100319 assert len(values) == 1, "%s %s %s %s" % (hex(U), UISC, UGC, values)
Behdad Esfahbod44910ce2015-07-20 18:01:10 +0100320 USE = values[0]
321
322 # Resolve Indic_Positional_Category
323
324 # TODO: Not in Unicode 8.0 yet, but in spec.
325 if U == 0x1B6C: UIPC = Bottom
326
327 # TODO: These should die, but have UIPC in Unicode 8.0
328 if U in [0x953, 0x954]: UIPC = Not_Applicable
329
330 # TODO: In USE's override list but not in Unicode 8.0
331 if U == 0x103C: UIPC = Left
332
333 # TODO: These are not in USE's override list that we have, nor are they in Unicode 8.0
334 if 0xA926 <= U <= 0xA92A: UIPC = Top
335 if U == 0x111CA: UIPC = Bottom
336 if U == 0x11300: UIPC = Top
337 if U == 0x1133C: UIPC = Bottom
338 if U == 0x1171E: UIPC = Left # Correct?!
339 if 0x1CF2 <= U <= 0x1CF3: UIPC = Right
340 if 0x1CF8 <= U <= 0x1CF9: UIPC = Top
341
342 assert (UIPC in [Not_Applicable, Visual_Order_Left] or
343 USE in use_positions), "%s %s %s %s %s" % (hex(U), UIPC, USE, UISC, UGC)
344
345 pos_mapping = use_positions.get(USE, None)
346 if pos_mapping:
347 values = [k for k,v in pos_mapping.items() if v and UIPC in v]
348 assert len(values) == 1, "%s %s %s %s %s %s" % (hex(U), UIPC, USE, UISC, UGC, values)
349 USE = USE + values[0]
350
351 out[U] = (USE, UBlock)
Behdad Esfahbodad725552015-07-20 17:00:06 +0100352 return out
353
354defaults = ('O', 'No_Block')
355data = map_to_use(data)
Behdad Esfahbod20e246e2015-07-20 15:56:19 +0100356
Ebrahim Byagowia48dd6e2018-03-28 19:08:19 +0430357print ("/* == Start of generated table == */")
358print ("/*")
359print (" * The following table is generated by running:")
360print (" *")
361print (" * ./gen-use-table.py IndicSyllabicCategory.txt IndicPositionalCategory.txt UnicodeData.txt Blocks.txt")
362print (" *")
363print (" * on files with these headers:")
364print (" *")
Behdad Esfahbode2c95112015-07-20 11:32:48 +0100365for h in headers:
366 for l in h:
Ebrahim Byagowia48dd6e2018-03-28 19:08:19 +0430367 print (" * %s" % (l.strip()))
368print (" */")
369print ()
370print ('#include "hb-ot-shape-complex-use-private.hh"')
371print ()
Behdad Esfahbode2c95112015-07-20 11:32:48 +0100372
Behdad Esfahbode2c95112015-07-20 11:32:48 +0100373total = 0
374used = 0
375last_block = None
376def print_block (block, start, end, data):
377 global total, used, last_block
378 if block and block != last_block:
Ebrahim Byagowia48dd6e2018-03-28 19:08:19 +0430379 print ()
380 print ()
381 print (" /* %s */" % block)
Behdad Esfahbodad725552015-07-20 17:00:06 +0100382 if start % 16:
Ebrahim Byagowia48dd6e2018-03-28 19:08:19 +0430383 print (' ' * (20 + (start % 16 * 6)), end='')
Behdad Esfahbode2c95112015-07-20 11:32:48 +0100384 num = 0
385 assert start % 8 == 0
386 assert (end+1) % 8 == 0
387 for u in range (start, end+1):
Behdad Esfahbodad725552015-07-20 17:00:06 +0100388 if u % 16 == 0:
Ebrahim Byagowia48dd6e2018-03-28 19:08:19 +0430389 print ()
390 print (" /* %04X */" % u, end='')
Behdad Esfahbode2c95112015-07-20 11:32:48 +0100391 if u in data:
392 num += 1
393 d = data.get (u, defaults)
Ebrahim Byagowia48dd6e2018-03-28 19:08:19 +0430394 print ("%6s," % d[0], end='')
Behdad Esfahbode2c95112015-07-20 11:32:48 +0100395
396 total += end - start + 1
397 used += num
398 if block:
399 last_block = block
400
401uu = data.keys ()
402uu.sort ()
403
404last = -100000
405num = 0
406offset = 0
407starts = []
408ends = []
Behdad Esfahbodad725552015-07-20 17:00:06 +0100409for k,v in sorted(use_mapping.items()):
Behdad Esfahbod44910ce2015-07-20 18:01:10 +0100410 if k in use_positions and use_positions[k]: continue
Ebrahim Byagowia48dd6e2018-03-28 19:08:19 +0430411 print ("#define %s USE_%s /* %s */" % (k, k, v.__name__[3:]))
Behdad Esfahbod44910ce2015-07-20 18:01:10 +0100412for k,v in sorted(use_positions.items()):
413 if not v: continue
414 for suf in v.keys():
415 tag = k + suf
Ebrahim Byagowia48dd6e2018-03-28 19:08:19 +0430416 print ("#define %s USE_%s" % (tag, tag))
417print ("")
418print ("static const USE_TABLE_ELEMENT_TYPE use_table[] = {")
Behdad Esfahbode2c95112015-07-20 11:32:48 +0100419for u in uu:
420 if u <= last:
421 continue
Behdad Esfahbodad725552015-07-20 17:00:06 +0100422 block = data[u][1]
Behdad Esfahbode2c95112015-07-20 11:32:48 +0100423
424 start = u//8*8
425 end = start+1
Behdad Esfahbodad725552015-07-20 17:00:06 +0100426 while end in uu and block == data[end][1]:
Behdad Esfahbode2c95112015-07-20 11:32:48 +0100427 end += 1
428 end = (end-1)//8*8 + 7
429
430 if start != last + 1:
431 if start - last <= 1+16*3:
432 print_block (None, last+1, start-1, data)
433 last = start-1
434 else:
435 if last >= 0:
436 ends.append (last + 1)
437 offset += ends[-1] - starts[-1]
Ebrahim Byagowia48dd6e2018-03-28 19:08:19 +0430438 print ()
439 print ()
440 print ("#define use_offset_0x%04xu %d" % (start, offset))
Behdad Esfahbode2c95112015-07-20 11:32:48 +0100441 starts.append (start)
442
443 print_block (block, start, end, data)
444 last = end
445ends.append (last + 1)
446offset += ends[-1] - starts[-1]
Ebrahim Byagowia48dd6e2018-03-28 19:08:19 +0430447print ()
448print ()
Behdad Esfahbode2c95112015-07-20 11:32:48 +0100449occupancy = used * 100. / total
450page_bits = 12
Ebrahim Byagowia48dd6e2018-03-28 19:08:19 +0430451print ("}; /* Table items: %d; occupancy: %d%% */" % (offset, occupancy))
452print ()
453print ("USE_TABLE_ELEMENT_TYPE")
454print ("hb_use_get_category (hb_codepoint_t u)")
455print ("{")
456print (" switch (u >> %d)" % page_bits)
457print (" {")
Behdad Esfahbodf8daeef2018-01-03 14:27:34 +0000458pages = set([u>>page_bits for u in starts+ends])
Behdad Esfahbode2c95112015-07-20 11:32:48 +0100459for p in sorted(pages):
Ebrahim Byagowia48dd6e2018-03-28 19:08:19 +0430460 print (" case 0x%0Xu:" % p)
Behdad Esfahbode2c95112015-07-20 11:32:48 +0100461 for (start,end) in zip (starts, ends):
462 if p not in [start>>page_bits, end>>page_bits]: continue
Behdad Esfahbodc48ff282015-07-20 11:46:17 +0100463 offset = "use_offset_0x%04xu" % start
Ebrahim Byagowia48dd6e2018-03-28 19:08:19 +0430464 print (" if (hb_in_range<hb_codepoint_t> (u, 0x%04Xu, 0x%04Xu)) return use_table[u - 0x%04Xu + %s];" % (start, end-1, start, offset))
465 print (" break;")
466 print ("")
467print (" default:")
468print (" break;")
469print (" }")
470print (" return USE_O;")
471print ("}")
472print ()
Behdad Esfahbodad717822015-07-21 16:43:27 +0100473for k in sorted(use_mapping.keys()):
474 if k in use_positions and use_positions[k]: continue
Ebrahim Byagowia48dd6e2018-03-28 19:08:19 +0430475 print ("#undef %s" % k)
Behdad Esfahbodad717822015-07-21 16:43:27 +0100476for k,v in sorted(use_positions.items()):
477 if not v: continue
478 for suf in v.keys():
479 tag = k + suf
Ebrahim Byagowia48dd6e2018-03-28 19:08:19 +0430480 print ("#undef %s" % tag)
481print ()
482print ("/* == End of generated table == */")
Behdad Esfahbode2c95112015-07-20 11:32:48 +0100483
Behdad Esfahbodad725552015-07-20 17:00:06 +0100484# Maintain at least 50% occupancy in the table */
485if occupancy < 50:
Behdad Esfahbode2c95112015-07-20 11:32:48 +0100486 raise Exception ("Table too sparse, please investigate: ", occupancy)