blob: 6aa5f881c73fbf82f19c4a4b79e4612c9563db01 [file] [log] [blame]
Ebrahim Byagowia48dd6e2018-03-28 19:08:19 +04301#!/usr/bin/env python
Behdad Esfahbode2c95112015-07-20 11:32:48 +01002
Ebrahim Byagowicab2c2c2018-03-29 12:48:47 +04303from __future__ import print_function, division, absolute_import
4
Ebrahim Byagowi80395f12018-03-29 22:00:41 +04305import io, sys
Behdad Esfahbode2c95112015-07-20 11:32:48 +01006
Behdad Esfahbod20e246e2015-07-20 15:56:19 +01007if len (sys.argv) != 5:
Ebrahim Byagowia48dd6e2018-03-28 19:08:19 +04308 print ("usage: ./gen-use-table.py IndicSyllabicCategory.txt IndicPositionalCategory.txt UnicodeData.txt Blocks.txt", file=sys.stderr)
Behdad Esfahbode2c95112015-07-20 11:32:48 +01009 sys.exit (1)
10
11BLACKLISTED_BLOCKS = ["Thai", "Lao", "Tibetan"]
12
Ebrahim Byagowi80395f12018-03-29 22:00:41 +043013files = [io.open (x, encoding='utf-8') for x in sys.argv[1:]]
Behdad Esfahbode2c95112015-07-20 11:32:48 +010014
Behdad Esfahbod20e246e2015-07-20 15:56:19 +010015headers = [[f.readline () for i in range (2)] for j,f in enumerate(files) if j != 2]
16headers.append (["UnicodeData.txt does not have a header."])
Behdad Esfahbode2c95112015-07-20 11:32:48 +010017
18data = [{} for f in files]
19values = [{} for f in files]
20for i, f in enumerate (files):
21 for line in f:
22
23 j = line.find ('#')
24 if j >= 0:
25 line = line[:j]
26
27 fields = [x.strip () for x in line.split (';')]
28 if len (fields) == 1:
29 continue
30
31 uu = fields[0].split ('..')
32 start = int (uu[0], 16)
33 if len (uu) == 1:
34 end = start
35 else:
36 end = int (uu[1], 16)
37
Behdad Esfahbod20e246e2015-07-20 15:56:19 +010038 t = fields[1 if i != 2 else 2]
Behdad Esfahbode2c95112015-07-20 11:32:48 +010039
40 for u in range (start, end + 1):
41 data[i][u] = t
42 values[i][t] = values[i].get (t, 0) + end - start + 1
43
Behdad Esfahbod20e246e2015-07-20 15:56:19 +010044defaults = ('Other', 'Not_Applicable', 'Cn', 'No_Block')
Behdad Esfahbodba728012015-07-21 11:57:23 +010045
Behdad Esfahbod2d4b62e2015-07-21 16:46:37 +010046# TODO Characters that are not in Unicode Indic files, but used in USE
Behdad Esfahbodba728012015-07-21 11:57:23 +010047data[0][0x034F] = defaults[0]
Behdad Esfahbod2d4b62e2015-07-21 16:46:37 +010048data[0][0x2060] = defaults[0]
David Corbett87f0ad12017-11-02 10:59:25 -040049data[0][0x20F0] = defaults[0]
Behdad Esfahbod2d4b62e2015-07-21 16:46:37 +010050for u in range (0xFE00, 0xFE0F + 1):
51 data[0][u] = defaults[0]
Behdad Esfahbodba728012015-07-21 11:57:23 +010052
53# Merge data into one dict:
Behdad Esfahbode2c95112015-07-20 11:32:48 +010054for i,v in enumerate (defaults):
55 values[i][v] = values[i].get (v, 0) + 1
56combined = {}
57for i,d in enumerate (data):
58 for u,v in d.items ():
Behdad Esfahbod20e246e2015-07-20 15:56:19 +010059 if i >= 2 and not u in combined:
Behdad Esfahbode2c95112015-07-20 11:32:48 +010060 continue
61 if not u in combined:
62 combined[u] = list (defaults)
63 combined[u][i] = v
Behdad Esfahbod20e246e2015-07-20 15:56:19 +010064combined = {k:v for k,v in combined.items() if v[3] not in BLACKLISTED_BLOCKS}
Behdad Esfahbode2c95112015-07-20 11:32:48 +010065data = combined
66del combined
67num = len (data)
68
Behdad Esfahbod20e246e2015-07-20 15:56:19 +010069
70property_names = [
71 # General_Category
72 'Cc', 'Cf', 'Cn', 'Co', 'Cs', 'Ll', 'Lm', 'Lo', 'Lt', 'Lu', 'Mc',
73 'Me', 'Mn', 'Nd', 'Nl', 'No', 'Pc', 'Pd', 'Pe', 'Pf', 'Pi', 'Po',
74 'Ps', 'Sc', 'Sk', 'Sm', 'So', 'Zl', 'Zp', 'Zs',
75 # Indic_Syllabic_Category
Behdad Esfahbodad725552015-07-20 17:00:06 +010076 'Other',
Behdad Esfahbod20e246e2015-07-20 15:56:19 +010077 'Bindu',
78 'Visarga',
79 'Avagraha',
80 'Nukta',
81 'Virama',
82 'Pure_Killer',
83 'Invisible_Stacker',
84 'Vowel_Independent',
85 'Vowel_Dependent',
86 'Vowel',
87 'Consonant_Placeholder',
88 'Consonant',
89 'Consonant_Dead',
90 'Consonant_With_Stacker',
91 'Consonant_Prefixed',
92 'Consonant_Preceding_Repha',
93 'Consonant_Succeeding_Repha',
94 'Consonant_Subjoined',
95 'Consonant_Medial',
96 'Consonant_Final',
97 'Consonant_Head_Letter',
Behdad Esfahbod060e6b42018-06-05 17:31:46 -070098 'Consonant_Initial_Postfixed',
Behdad Esfahbod20e246e2015-07-20 15:56:19 +010099 'Modifying_Letter',
100 'Tone_Letter',
101 'Tone_Mark',
102 'Gemination_Mark',
103 'Cantillation_Mark',
104 'Register_Shifter',
105 'Syllable_Modifier',
106 'Consonant_Killer',
107 'Non_Joiner',
108 'Joiner',
109 'Number_Joiner',
110 'Number',
111 'Brahmi_Joining_Number',
112 # Indic_Positional_Category
Behdad Esfahbod44910ce2015-07-20 18:01:10 +0100113 'Not_Applicable',
Behdad Esfahbod20e246e2015-07-20 15:56:19 +0100114 'Right',
115 'Left',
116 'Visual_Order_Left',
117 'Left_And_Right',
118 'Top',
119 'Bottom',
120 'Top_And_Bottom',
121 'Top_And_Right',
122 'Top_And_Left',
123 'Top_And_Left_And_Right',
Behdad Esfahbodea535a12017-10-02 17:02:39 +0200124 'Bottom_And_Left',
Behdad Esfahbod20e246e2015-07-20 15:56:19 +0100125 'Bottom_And_Right',
126 'Top_And_Bottom_And_Right',
127 'Overstruck',
128]
129
Ebrahim Byagowi26e0cbd2018-03-29 21:22:47 +0430130try:
131 basestring
132except NameError:
133 basestring = str
134
Behdad Esfahbod20e246e2015-07-20 15:56:19 +0100135class PropertyValue(object):
136 def __init__(self, name_):
137 self.name = name_
Behdad Esfahbodad725552015-07-20 17:00:06 +0100138 def __str__(self):
139 return self.name
140 def __eq__(self, other):
Behdad Esfahbod44910ce2015-07-20 18:01:10 +0100141 return self.name == (other if isinstance(other, basestring) else other.name)
Behdad Esfahbodad725552015-07-20 17:00:06 +0100142 def __ne__(self, other):
143 return not (self == other)
Ebrahim Byagowi26e0cbd2018-03-29 21:22:47 +0430144 def __hash__(self):
145 return hash(str(self))
Behdad Esfahbod20e246e2015-07-20 15:56:19 +0100146
147property_values = {}
148
149for name in property_names:
150 value = PropertyValue(name)
151 assert value not in property_values
152 assert value not in globals()
153 property_values[name] = value
154globals().update(property_values)
155
156
157def is_BASE(U, UISC, UGC):
David Corbetta2a14842018-06-06 12:57:28 -0400158 return (UISC in [Number, Consonant, Consonant_Head_Letter,
Behdad Esfahbod9b6312f2016-05-06 17:41:49 +0100159 #SPEC-DRAFT Consonant_Placeholder,
160 Tone_Letter,
161 Vowel_Independent #SPEC-DRAFT
162 ] or
Behdad Esfahbod20e246e2015-07-20 15:56:19 +0100163 (UGC == Lo and UISC in [Avagraha, Bindu, Consonant_Final, Consonant_Medial,
164 Consonant_Subjoined, Vowel, Vowel_Dependent]))
Behdad Esfahbod20e246e2015-07-20 15:56:19 +0100165def is_BASE_IND(U, UISC, UGC):
Behdad Esfahbod9b6312f2016-05-06 17:41:49 +0100166 #SPEC-DRAFT return (UISC in [Consonant_Dead, Modifying_Letter] or UGC == Po)
Behdad Esfahbodad725552015-07-20 17:00:06 +0100167 return (UISC in [Consonant_Dead, Modifying_Letter] or
Behdad Esfahbod060e6b42018-06-05 17:31:46 -0700168 (UGC == Po and not U in [0x104B, 0x104E, 0x2022, 0x11A3F, 0x11A45]) or
Behdad Esfahbod9b6312f2016-05-06 17:41:49 +0100169 False # SPEC-DRAFT-OUTDATED! U == 0x002D
170 )
Behdad Esfahbod20e246e2015-07-20 15:56:19 +0100171def is_BASE_NUM(U, UISC, UGC):
172 return UISC == Brahmi_Joining_Number
173def is_BASE_OTHER(U, UISC, UGC):
Behdad Esfahbod9b6312f2016-05-06 17:41:49 +0100174 if UISC == Consonant_Placeholder: return True #SPEC-DRAFT
175 #SPEC-DRAFT return U in [0x00A0, 0x00D7, 0x2015, 0x2022, 0x25CC, 0x25FB, 0x25FC, 0x25FD, 0x25FE]
176 return U in [0x2015, 0x2022, 0x25FB, 0x25FC, 0x25FD, 0x25FE]
Behdad Esfahbod20e246e2015-07-20 15:56:19 +0100177def is_CGJ(U, UISC, UGC):
178 return U == 0x034F
179def is_CONS_FINAL(U, UISC, UGC):
David Corbetta2a14842018-06-06 12:57:28 -0400180 # Consonant_Initial_Postfixed is new in Unicode 11; not in the spec.
Behdad Esfahbod20e246e2015-07-20 15:56:19 +0100181 return ((UISC == Consonant_Final and UGC != Lo) or
David Corbetta2a14842018-06-06 12:57:28 -0400182 UISC == Consonant_Initial_Postfixed or
Behdad Esfahbod20e246e2015-07-20 15:56:19 +0100183 UISC == Consonant_Succeeding_Repha)
184def is_CONS_FINAL_MOD(U, UISC, UGC):
Behdad Esfahbod9b6312f2016-05-06 17:41:49 +0100185 #SPEC-DRAFT return UISC in [Consonant_Final_Modifier, Syllable_Modifier]
Behdad Esfahbodad725552015-07-20 17:00:06 +0100186 return UISC == Syllable_Modifier
Behdad Esfahbod20e246e2015-07-20 15:56:19 +0100187def is_CONS_MED(U, UISC, UGC):
188 return UISC == Consonant_Medial and UGC != Lo
189def is_CONS_MOD(U, UISC, UGC):
190 return UISC in [Nukta, Gemination_Mark, Consonant_Killer]
191def is_CONS_SUB(U, UISC, UGC):
Behdad Esfahbod9b6312f2016-05-06 17:41:49 +0100192 #SPEC-DRAFT return UISC == Consonant_Subjoined
Behdad Esfahbodad725552015-07-20 17:00:06 +0100193 return UISC == Consonant_Subjoined and UGC != Lo
Behdad Esfahbode07669f2017-10-03 14:57:14 +0200194def is_CONS_WITH_STACKER(U, UISC, UGC):
195 return UISC == Consonant_With_Stacker
Behdad Esfahbod20e246e2015-07-20 15:56:19 +0100196def is_HALANT(U, UISC, UGC):
197 return UISC in [Virama, Invisible_Stacker]
198def is_HALANT_NUM(U, UISC, UGC):
199 return UISC == Number_Joiner
200def is_ZWNJ(U, UISC, UGC):
201 return UISC == Non_Joiner
202def is_ZWJ(U, UISC, UGC):
203 return UISC == Joiner
204def is_Word_Joiner(U, UISC, UGC):
205 return U == 0x2060
206def is_OTHER(U, UISC, UGC):
Behdad Esfahbodad725552015-07-20 17:00:06 +0100207 #SPEC-OUTDATED return UGC == Zs # or any other SCRIPT_COMMON characters
Behdad Esfahbodba728012015-07-21 11:57:23 +0100208 return (UISC == Other
209 and not is_SYM_MOD(U, UISC, UGC)
210 and not is_CGJ(U, UISC, UGC)
Behdad Esfahbod2d4b62e2015-07-21 16:46:37 +0100211 and not is_Word_Joiner(U, UISC, UGC)
212 and not is_VARIATION_SELECTOR(U, UISC, UGC)
Behdad Esfahbodba728012015-07-21 11:57:23 +0100213 )
Behdad Esfahbod20e246e2015-07-20 15:56:19 +0100214def is_Reserved(U, UISC, UGC):
215 return UGC == 'Cn'
216def is_REPHA(U, UISC, UGC):
Behdad Esfahbode07669f2017-10-03 14:57:14 +0200217 return UISC in [Consonant_Preceding_Repha, Consonant_Prefixed]
Behdad Esfahbod20e246e2015-07-20 15:56:19 +0100218def is_SYM(U, UISC, UGC):
Behdad Esfahbod9b6312f2016-05-06 17:41:49 +0100219 if U == 0x25CC: return False #SPEC-DRAFT
220 #SPEC-DRAFT return UGC in [So, Sc] or UISC == Symbol_Letter
Behdad Esfahbodad725552015-07-20 17:00:06 +0100221 return UGC in [So, Sc]
Behdad Esfahbod20e246e2015-07-20 15:56:19 +0100222def is_SYM_MOD(U, UISC, UGC):
223 return U in [0x1B6B, 0x1B6C, 0x1B6D, 0x1B6E, 0x1B6F, 0x1B70, 0x1B71, 0x1B72, 0x1B73]
224def is_VARIATION_SELECTOR(U, UISC, UGC):
225 return 0xFE00 <= U <= 0xFE0F
226def is_VOWEL(U, UISC, UGC):
Behdad Esfahbod216b0032017-07-14 16:38:51 +0100227 # https://github.com/roozbehp/unicode-data/issues/6
Behdad Esfahbod20e246e2015-07-20 15:56:19 +0100228 return (UISC == Pure_Killer or
Behdad Esfahbod216b0032017-07-14 16:38:51 +0100229 (UGC != Lo and UISC in [Vowel, Vowel_Dependent] and U not in [0xAA29]))
Behdad Esfahbod20e246e2015-07-20 15:56:19 +0100230def is_VOWEL_MOD(U, UISC, UGC):
Behdad Esfahbod216b0032017-07-14 16:38:51 +0100231 # https://github.com/roozbehp/unicode-data/issues/6
Behdad Esfahbod20e246e2015-07-20 15:56:19 +0100232 return (UISC in [Tone_Mark, Cantillation_Mark, Register_Shifter, Visarga] or
Behdad Esfahbod216b0032017-07-14 16:38:51 +0100233 (UGC != Lo and (UISC == Bindu or U in [0xAA29])))
Behdad Esfahbod20e246e2015-07-20 15:56:19 +0100234
235use_mapping = {
236 'B': is_BASE,
Behdad Esfahbod20e246e2015-07-20 15:56:19 +0100237 'IND': is_BASE_IND,
238 'N': is_BASE_NUM,
239 'GB': is_BASE_OTHER,
240 'CGJ': is_CGJ,
241 'F': is_CONS_FINAL,
242 'FM': is_CONS_FINAL_MOD,
243 'M': is_CONS_MED,
244 'CM': is_CONS_MOD,
245 'SUB': is_CONS_SUB,
Behdad Esfahbode07669f2017-10-03 14:57:14 +0200246 'CS': is_CONS_WITH_STACKER,
Behdad Esfahbod20e246e2015-07-20 15:56:19 +0100247 'H': is_HALANT,
248 'HN': is_HALANT_NUM,
249 'ZWNJ': is_ZWNJ,
250 'ZWJ': is_ZWJ,
251 'WJ': is_Word_Joiner,
252 'O': is_OTHER,
253 'Rsv': is_Reserved,
254 'R': is_REPHA,
255 'S': is_SYM,
256 'SM': is_SYM_MOD,
257 'VS': is_VARIATION_SELECTOR,
258 'V': is_VOWEL,
259 'VM': is_VOWEL_MOD,
260}
261
Behdad Esfahbod44910ce2015-07-20 18:01:10 +0100262use_positions = {
263 'F': {
264 'Abv': [Top],
265 'Blw': [Bottom],
266 'Pst': [Right],
267 },
268 'M': {
269 'Abv': [Top],
Behdad Esfahbodea535a12017-10-02 17:02:39 +0200270 'Blw': [Bottom, Bottom_And_Left],
Behdad Esfahbod44910ce2015-07-20 18:01:10 +0100271 'Pst': [Right],
272 'Pre': [Left],
273 },
274 'CM': {
275 'Abv': [Top],
276 'Blw': [Bottom],
277 },
278 'V': {
279 'Abv': [Top, Top_And_Bottom, Top_And_Bottom_And_Right, Top_And_Right],
280 'Blw': [Bottom, Overstruck, Bottom_And_Right],
281 'Pst': [Right],
282 'Pre': [Left, Top_And_Left, Top_And_Left_And_Right, Left_And_Right],
283 },
284 'VM': {
285 'Abv': [Top],
286 'Blw': [Bottom, Overstruck],
287 'Pst': [Right],
288 'Pre': [Left],
289 },
290 'SM': {
291 'Abv': [Top],
292 'Blw': [Bottom],
293 },
294 'H': None,
295 'B': None,
296 'FM': None,
297 'SUB': None,
298}
299
Behdad Esfahbodad725552015-07-20 17:00:06 +0100300def map_to_use(data):
301 out = {}
302 items = use_mapping.items()
303 for U,(UISC,UIPC,UGC,UBlock) in data.items():
Behdad Esfahbod44910ce2015-07-20 18:01:10 +0100304
305 # Resolve Indic_Syllabic_Category
306
307 # TODO: These don't have UISC assigned in Unicode 8.0, but
308 # have UIPC
309 if U == 0x17DD: UISC = Vowel_Dependent
310 if 0x1CE2 <= U <= 0x1CE8: UISC = Cantillation_Mark
311
David Corbett7bfdf1a2017-11-22 16:32:52 -0500312 # TODO: https://github.com/harfbuzz/harfbuzz/pull/627
313 if 0x1BF2 <= U <= 0x1BF3: UISC = Nukta; UIPC = Bottom
314
Behdad Esfahbod44910ce2015-07-20 18:01:10 +0100315 # TODO: U+1CED should only be allowed after some of
316 # the nasalization marks, maybe only for U+1CE9..U+1CF1.
317 if U == 0x1CED: UISC = Tone_Mark
318
ebraminio7c6937e2017-11-20 14:49:22 -0500319 # TODO: https://github.com/harfbuzz/harfbuzz/issues/525
Behdad Esfahbod5680ef82017-10-02 18:20:51 +0200320 if U == 0x1A7F: UISC = Consonant_Final; UIPC = Bottom
321
ebraminio7c6937e2017-11-20 14:49:22 -0500322 # TODO: https://github.com/harfbuzz/harfbuzz/pull/609
David Corbett87f0ad12017-11-02 10:59:25 -0400323 if U == 0x20F0: UISC = Cantillation_Mark; UIPC = Top
324
David Corbett9f259762017-11-21 14:16:10 -0500325 # TODO: https://github.com/harfbuzz/harfbuzz/pull/626
326 if U == 0xA8B4: UISC = Consonant_Medial
327
Behdad Esfahbod29c244a2017-10-02 16:36:21 +0200328 values = [k for k,v in items if v(U,UISC,UGC)]
Behdad Esfahbodad725552015-07-20 17:00:06 +0100329 assert len(values) == 1, "%s %s %s %s" % (hex(U), UISC, UGC, values)
Behdad Esfahbod44910ce2015-07-20 18:01:10 +0100330 USE = values[0]
331
332 # Resolve Indic_Positional_Category
333
334 # TODO: Not in Unicode 8.0 yet, but in spec.
335 if U == 0x1B6C: UIPC = Bottom
336
337 # TODO: These should die, but have UIPC in Unicode 8.0
338 if U in [0x953, 0x954]: UIPC = Not_Applicable
339
Behdad Esfahbod060e6b42018-06-05 17:31:46 -0700340 # TODO: In USE's override list but not in Unicode 11.0
Behdad Esfahbod44910ce2015-07-20 18:01:10 +0100341 if U == 0x103C: UIPC = Left
342
Behdad Esfahbod060e6b42018-06-05 17:31:46 -0700343 # TODO: These are not in USE's override list that we have, nor are they in Unicode 11.0
Behdad Esfahbod44910ce2015-07-20 18:01:10 +0100344 if 0xA926 <= U <= 0xA92A: UIPC = Top
345 if U == 0x111CA: UIPC = Bottom
346 if U == 0x11300: UIPC = Top
Behdad Esfahbod44910ce2015-07-20 18:01:10 +0100347 if U == 0x1171E: UIPC = Left # Correct?!
348 if 0x1CF2 <= U <= 0x1CF3: UIPC = Right
349 if 0x1CF8 <= U <= 0x1CF9: UIPC = Top
Behdad Esfahbod060e6b42018-06-05 17:31:46 -0700350 # https://github.com/roozbehp/unicode-data/issues/8
351 if U == 0x0A51: UIPC = Bottom
Behdad Esfahbod44910ce2015-07-20 18:01:10 +0100352
353 assert (UIPC in [Not_Applicable, Visual_Order_Left] or
354 USE in use_positions), "%s %s %s %s %s" % (hex(U), UIPC, USE, UISC, UGC)
355
356 pos_mapping = use_positions.get(USE, None)
357 if pos_mapping:
358 values = [k for k,v in pos_mapping.items() if v and UIPC in v]
359 assert len(values) == 1, "%s %s %s %s %s %s" % (hex(U), UIPC, USE, UISC, UGC, values)
360 USE = USE + values[0]
361
362 out[U] = (USE, UBlock)
Behdad Esfahbodad725552015-07-20 17:00:06 +0100363 return out
364
365defaults = ('O', 'No_Block')
366data = map_to_use(data)
Behdad Esfahbod20e246e2015-07-20 15:56:19 +0100367
Ebrahim Byagowia48dd6e2018-03-28 19:08:19 +0430368print ("/* == Start of generated table == */")
369print ("/*")
370print (" * The following table is generated by running:")
371print (" *")
372print (" * ./gen-use-table.py IndicSyllabicCategory.txt IndicPositionalCategory.txt UnicodeData.txt Blocks.txt")
373print (" *")
374print (" * on files with these headers:")
375print (" *")
Behdad Esfahbode2c95112015-07-20 11:32:48 +0100376for h in headers:
377 for l in h:
Ebrahim Byagowia48dd6e2018-03-28 19:08:19 +0430378 print (" * %s" % (l.strip()))
379print (" */")
380print ()
381print ('#include "hb-ot-shape-complex-use-private.hh"')
382print ()
Behdad Esfahbode2c95112015-07-20 11:32:48 +0100383
Behdad Esfahbode2c95112015-07-20 11:32:48 +0100384total = 0
385used = 0
386last_block = None
387def print_block (block, start, end, data):
388 global total, used, last_block
389 if block and block != last_block:
Ebrahim Byagowia48dd6e2018-03-28 19:08:19 +0430390 print ()
391 print ()
392 print (" /* %s */" % block)
Behdad Esfahbodad725552015-07-20 17:00:06 +0100393 if start % 16:
Ebrahim Byagowia48dd6e2018-03-28 19:08:19 +0430394 print (' ' * (20 + (start % 16 * 6)), end='')
Behdad Esfahbode2c95112015-07-20 11:32:48 +0100395 num = 0
396 assert start % 8 == 0
397 assert (end+1) % 8 == 0
398 for u in range (start, end+1):
Behdad Esfahbodad725552015-07-20 17:00:06 +0100399 if u % 16 == 0:
Ebrahim Byagowia48dd6e2018-03-28 19:08:19 +0430400 print ()
401 print (" /* %04X */" % u, end='')
Behdad Esfahbode2c95112015-07-20 11:32:48 +0100402 if u in data:
403 num += 1
404 d = data.get (u, defaults)
Ebrahim Byagowia48dd6e2018-03-28 19:08:19 +0430405 print ("%6s," % d[0], end='')
Behdad Esfahbode2c95112015-07-20 11:32:48 +0100406
407 total += end - start + 1
408 used += num
409 if block:
410 last_block = block
411
Ebrahim Byagowi26e0cbd2018-03-29 21:22:47 +0430412uu = sorted (data.keys ())
Behdad Esfahbode2c95112015-07-20 11:32:48 +0100413
414last = -100000
415num = 0
416offset = 0
417starts = []
418ends = []
Behdad Esfahbodad725552015-07-20 17:00:06 +0100419for k,v in sorted(use_mapping.items()):
Behdad Esfahbod44910ce2015-07-20 18:01:10 +0100420 if k in use_positions and use_positions[k]: continue
Ebrahim Byagowia48dd6e2018-03-28 19:08:19 +0430421 print ("#define %s USE_%s /* %s */" % (k, k, v.__name__[3:]))
Behdad Esfahbod44910ce2015-07-20 18:01:10 +0100422for k,v in sorted(use_positions.items()):
423 if not v: continue
424 for suf in v.keys():
425 tag = k + suf
Ebrahim Byagowia48dd6e2018-03-28 19:08:19 +0430426 print ("#define %s USE_%s" % (tag, tag))
427print ("")
428print ("static const USE_TABLE_ELEMENT_TYPE use_table[] = {")
Behdad Esfahbode2c95112015-07-20 11:32:48 +0100429for u in uu:
430 if u <= last:
431 continue
Behdad Esfahbodad725552015-07-20 17:00:06 +0100432 block = data[u][1]
Behdad Esfahbode2c95112015-07-20 11:32:48 +0100433
434 start = u//8*8
435 end = start+1
Behdad Esfahbodad725552015-07-20 17:00:06 +0100436 while end in uu and block == data[end][1]:
Behdad Esfahbode2c95112015-07-20 11:32:48 +0100437 end += 1
438 end = (end-1)//8*8 + 7
439
440 if start != last + 1:
441 if start - last <= 1+16*3:
442 print_block (None, last+1, start-1, data)
443 last = start-1
444 else:
445 if last >= 0:
446 ends.append (last + 1)
447 offset += ends[-1] - starts[-1]
Ebrahim Byagowia48dd6e2018-03-28 19:08:19 +0430448 print ()
449 print ()
450 print ("#define use_offset_0x%04xu %d" % (start, offset))
Behdad Esfahbode2c95112015-07-20 11:32:48 +0100451 starts.append (start)
452
453 print_block (block, start, end, data)
454 last = end
455ends.append (last + 1)
456offset += ends[-1] - starts[-1]
Ebrahim Byagowia48dd6e2018-03-28 19:08:19 +0430457print ()
458print ()
Behdad Esfahbode2c95112015-07-20 11:32:48 +0100459occupancy = used * 100. / total
460page_bits = 12
Ebrahim Byagowia48dd6e2018-03-28 19:08:19 +0430461print ("}; /* Table items: %d; occupancy: %d%% */" % (offset, occupancy))
462print ()
463print ("USE_TABLE_ELEMENT_TYPE")
464print ("hb_use_get_category (hb_codepoint_t u)")
465print ("{")
466print (" switch (u >> %d)" % page_bits)
467print (" {")
Behdad Esfahbodf8daeef2018-01-03 14:27:34 +0000468pages = set([u>>page_bits for u in starts+ends])
Behdad Esfahbode2c95112015-07-20 11:32:48 +0100469for p in sorted(pages):
Ebrahim Byagowia48dd6e2018-03-28 19:08:19 +0430470 print (" case 0x%0Xu:" % p)
Behdad Esfahbode2c95112015-07-20 11:32:48 +0100471 for (start,end) in zip (starts, ends):
472 if p not in [start>>page_bits, end>>page_bits]: continue
Behdad Esfahbodc48ff282015-07-20 11:46:17 +0100473 offset = "use_offset_0x%04xu" % start
Ebrahim Byagowia48dd6e2018-03-28 19:08:19 +0430474 print (" if (hb_in_range<hb_codepoint_t> (u, 0x%04Xu, 0x%04Xu)) return use_table[u - 0x%04Xu + %s];" % (start, end-1, start, offset))
475 print (" break;")
476 print ("")
477print (" default:")
478print (" break;")
479print (" }")
480print (" return USE_O;")
481print ("}")
482print ()
Behdad Esfahbodad717822015-07-21 16:43:27 +0100483for k in sorted(use_mapping.keys()):
484 if k in use_positions and use_positions[k]: continue
Ebrahim Byagowia48dd6e2018-03-28 19:08:19 +0430485 print ("#undef %s" % k)
Behdad Esfahbodad717822015-07-21 16:43:27 +0100486for k,v in sorted(use_positions.items()):
487 if not v: continue
488 for suf in v.keys():
489 tag = k + suf
Ebrahim Byagowia48dd6e2018-03-28 19:08:19 +0430490 print ("#undef %s" % tag)
491print ()
492print ("/* == End of generated table == */")
Behdad Esfahbode2c95112015-07-20 11:32:48 +0100493
Behdad Esfahbodad725552015-07-20 17:00:06 +0100494# Maintain at least 50% occupancy in the table */
495if occupancy < 50:
Behdad Esfahbode2c95112015-07-20 11:32:48 +0100496 raise Exception ("Table too sparse, please investigate: ", occupancy)