blob: 4dba9c31a18080da9103a1fbf3ff17dde9ceef90 [file] [log] [blame]
Behdad Esfahbod8a3511a2009-11-04 19:45:39 -05001/*
Behdad Esfahbod2409d5f2011-04-21 17:14:28 -04002 * Copyright © 2009 Red Hat, Inc.
3 * Copyright © 2011 Google, Inc.
Behdad Esfahbod8a3511a2009-11-04 19:45:39 -05004 *
Behdad Esfahbodc755cb32010-04-22 00:11:43 -04005 * This is part of HarfBuzz, a text shaping library.
Behdad Esfahbod8a3511a2009-11-04 19:45:39 -05006 *
7 * Permission is hereby granted, without written agreement and without
8 * license or royalty fees, to use, copy, modify, and distribute this
9 * software and its documentation for any purpose, provided that the
10 * above copyright notice and the following two paragraphs appear in
11 * all copies of this software.
12 *
13 * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
14 * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
15 * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
16 * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
17 * DAMAGE.
18 *
19 * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
20 * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
21 * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS
22 * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
23 * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
24 *
25 * Red Hat Author(s): Behdad Esfahbod
Behdad Esfahbodba2d5432012-12-08 19:28:41 -050026 * Google Author(s): Behdad Esfahbod, Roozbeh Pournader
Behdad Esfahbod8a3511a2009-11-04 19:45:39 -050027 */
28
Behdad Esfahbodc77ae402018-08-25 22:36:36 -070029#include "hb.hh"
Behdad Esfahbod8a3511a2009-11-04 19:45:39 -050030
Behdad Esfahbod8a3511a2009-11-04 19:45:39 -050031
Behdad Esfahbod62879ee2011-04-18 23:40:21 -040032/* hb_script_t */
33
34static hb_tag_t
35hb_ot_old_tag_from_script (hb_script_t script)
36{
Behdad Esfahbod10a33292012-11-02 13:38:55 -070037 /* This seems to be accurate as of end of 2012. */
38
Behdad Esfahbod5646dcb2018-10-11 19:39:07 -040039 switch ((hb_tag_t) script)
40 {
Behdad Esfahbod40b5c2e2011-05-13 22:46:36 -040041 case HB_SCRIPT_INVALID: return HB_OT_TAG_DEFAULT_SCRIPT;
42
43 /* KATAKANA and HIRAGANA both map to 'kana' */
Behdad Esfahbod62879ee2011-04-18 23:40:21 -040044 case HB_SCRIPT_HIRAGANA: return HB_TAG('k','a','n','a');
Behdad Esfahbod40b5c2e2011-05-13 22:46:36 -040045
46 /* Spaces at the end are preserved, unlike ISO 15924 */
Behdad Esfahbod62879ee2011-04-18 23:40:21 -040047 case HB_SCRIPT_LAO: return HB_TAG('l','a','o',' ');
48 case HB_SCRIPT_YI: return HB_TAG('y','i',' ',' ');
49 /* Unicode-5.0 additions */
50 case HB_SCRIPT_NKO: return HB_TAG('n','k','o',' ');
51 /* Unicode-5.1 additions */
52 case HB_SCRIPT_VAI: return HB_TAG('v','a','i',' ');
Behdad Esfahbod62879ee2011-04-18 23:40:21 -040053 }
54
55 /* Else, just change first char to lowercase and return */
Behdad Esfahbod76271002014-07-11 14:54:42 -040056 return ((hb_tag_t) script) | 0x20000000u;
Behdad Esfahbod62879ee2011-04-18 23:40:21 -040057}
58
59static hb_script_t
60hb_ot_old_tag_to_script (hb_tag_t tag)
61{
Behdad Esfahbod40b5c2e2011-05-13 22:46:36 -040062 if (unlikely (tag == HB_OT_TAG_DEFAULT_SCRIPT))
63 return HB_SCRIPT_INVALID;
Behdad Esfahbod62879ee2011-04-18 23:40:21 -040064
Behdad Esfahbod40b5c2e2011-05-13 22:46:36 -040065 /* This side of the conversion is fully algorithmic. */
66
67 /* Any spaces at the end of the tag are replaced by repeating the last
68 * letter. Eg 'nko ' -> 'Nkoo' */
Behdad Esfahbod76271002014-07-11 14:54:42 -040069 if (unlikely ((tag & 0x0000FF00u) == 0x00002000u))
70 tag |= (tag >> 8) & 0x0000FF00u; /* Copy second letter to third */
71 if (unlikely ((tag & 0x000000FFu) == 0x00000020u))
72 tag |= (tag >> 8) & 0x000000FFu; /* Copy third letter to fourth */
Behdad Esfahbod40b5c2e2011-05-13 22:46:36 -040073
74 /* Change first char to uppercase and return */
Behdad Esfahbod76271002014-07-11 14:54:42 -040075 return (hb_script_t) (tag & ~0x20000000u);
Behdad Esfahbod62879ee2011-04-18 23:40:21 -040076}
77
78static hb_tag_t
79hb_ot_new_tag_from_script (hb_script_t script)
80{
81 switch ((hb_tag_t) script) {
82 case HB_SCRIPT_BENGALI: return HB_TAG('b','n','g','2');
83 case HB_SCRIPT_DEVANAGARI: return HB_TAG('d','e','v','2');
84 case HB_SCRIPT_GUJARATI: return HB_TAG('g','j','r','2');
85 case HB_SCRIPT_GURMUKHI: return HB_TAG('g','u','r','2');
86 case HB_SCRIPT_KANNADA: return HB_TAG('k','n','d','2');
87 case HB_SCRIPT_MALAYALAM: return HB_TAG('m','l','m','2');
88 case HB_SCRIPT_ORIYA: return HB_TAG('o','r','y','2');
89 case HB_SCRIPT_TAMIL: return HB_TAG('t','m','l','2');
90 case HB_SCRIPT_TELUGU: return HB_TAG('t','e','l','2');
Behdad Esfahbodde796a62012-11-12 17:27:51 -080091 case HB_SCRIPT_MYANMAR: return HB_TAG('m','y','m','2');
Behdad Esfahbod62879ee2011-04-18 23:40:21 -040092 }
93
Behdad Esfahbod40b5c2e2011-05-13 22:46:36 -040094 return HB_OT_TAG_DEFAULT_SCRIPT;
Behdad Esfahbod62879ee2011-04-18 23:40:21 -040095}
96
97static hb_script_t
98hb_ot_new_tag_to_script (hb_tag_t tag)
99{
100 switch (tag) {
101 case HB_TAG('b','n','g','2'): return HB_SCRIPT_BENGALI;
102 case HB_TAG('d','e','v','2'): return HB_SCRIPT_DEVANAGARI;
103 case HB_TAG('g','j','r','2'): return HB_SCRIPT_GUJARATI;
104 case HB_TAG('g','u','r','2'): return HB_SCRIPT_GURMUKHI;
105 case HB_TAG('k','n','d','2'): return HB_SCRIPT_KANNADA;
106 case HB_TAG('m','l','m','2'): return HB_SCRIPT_MALAYALAM;
107 case HB_TAG('o','r','y','2'): return HB_SCRIPT_ORIYA;
108 case HB_TAG('t','m','l','2'): return HB_SCRIPT_TAMIL;
109 case HB_TAG('t','e','l','2'): return HB_SCRIPT_TELUGU;
Behdad Esfahbodde796a62012-11-12 17:27:51 -0800110 case HB_TAG('m','y','m','2'): return HB_SCRIPT_MYANMAR;
Behdad Esfahbod62879ee2011-04-18 23:40:21 -0400111 }
112
113 return HB_SCRIPT_UNKNOWN;
114}
115
David Corbett91067712017-12-08 11:21:14 -0500116void
117hb_ot_tags_from_script (hb_script_t script,
118 hb_tag_t *script_tag_1,
119 hb_tag_t *script_tag_2)
120{
121 unsigned int count = 2;
122 hb_tag_t tags[2];
David Corbett7f1fbfe2018-07-23 21:19:23 -0400123 hb_ot_tags_from_script_and_language (script, HB_LANGUAGE_INVALID, &count, tags, nullptr, nullptr);
David Corbett91067712017-12-08 11:21:14 -0500124 *script_tag_1 = count > 0 ? tags[0] : HB_OT_TAG_DEFAULT_SCRIPT;
125 *script_tag_2 = count > 1 ? tags[1] : HB_OT_TAG_DEFAULT_SCRIPT;
126}
127
Behdad Esfahbod8a3511a2009-11-04 19:45:39 -0500128/*
129 * Complete list at:
Ebrahim Byagowif24b0b92018-04-12 13:40:45 +0430130 * https://docs.microsoft.com/en-us/typography/opentype/spec/scripttags
Behdad Esfahbod62879ee2011-04-18 23:40:21 -0400131 *
132 * Most of the script tags are the same as the ISO 15924 tag but lowercased.
133 * So we just do that, and handle the exceptional cases in a switch.
Behdad Esfahbod8a3511a2009-11-04 19:45:39 -0500134 */
Behdad Esfahbod8a3511a2009-11-04 19:45:39 -0500135
David Corbett91067712017-12-08 11:21:14 -0500136static void
137hb_ot_all_tags_from_script (hb_script_t script,
138 unsigned int *count /* IN/OUT */,
139 hb_tag_t *tags /* OUT */)
Behdad Esfahbod8a3511a2009-11-04 19:45:39 -0500140{
David Corbett91067712017-12-08 11:21:14 -0500141 unsigned int i = 0;
Behdad Esfahbod8a3511a2009-11-04 19:45:39 -0500142
David Corbett91067712017-12-08 11:21:14 -0500143 hb_tag_t new_tag = hb_ot_new_tag_from_script (script);
144 if (unlikely (new_tag != HB_OT_TAG_DEFAULT_SCRIPT))
David Corbett28d091d2018-10-11 17:15:22 -0400145 {
146 tags[i++] = new_tag | '3';
147 if (*count > i)
148 tags[i++] = new_tag;
149 }
Behdad Esfahbod5d91c3d2011-03-16 17:36:32 -0300150
David Corbett91067712017-12-08 11:21:14 -0500151 if (*count > i)
152 {
153 hb_tag_t old_tag = hb_ot_old_tag_from_script (script);
154 if (old_tag != HB_OT_TAG_DEFAULT_SCRIPT)
155 tags[i++] = old_tag;
Behdad Esfahbod62879ee2011-04-18 23:40:21 -0400156 }
David Corbett91067712017-12-08 11:21:14 -0500157
158 *count = i;
Behdad Esfahbod8a3511a2009-11-04 19:45:39 -0500159}
160
Behdad Esfahbodb490fa32010-05-21 11:15:07 +0100161hb_script_t
162hb_ot_tag_to_script (hb_tag_t tag)
163{
David Corbett28d091d2018-10-11 17:15:22 -0400164 unsigned char digit = tag & 0x000000FFu;
165 if (unlikely (digit == '2' || digit == '3'))
166 return hb_ot_new_tag_to_script (tag & 0xFFFFFF32);
Behdad Esfahbodb490fa32010-05-21 11:15:07 +0100167
Behdad Esfahbod62879ee2011-04-18 23:40:21 -0400168 return hb_ot_old_tag_to_script (tag);
Behdad Esfahbodb490fa32010-05-21 11:15:07 +0100169}
Behdad Esfahbod8a3511a2009-11-04 19:45:39 -0500170
Behdad Esfahbod62879ee2011-04-18 23:40:21 -0400171
172/* hb_language_t */
173
Behdad Esfahbod8a3511a2009-11-04 19:45:39 -0500174static int
Behdad Esfahbod0712e912017-10-29 17:01:47 -0600175lang_compare_first_component (const void *pa,
176 const void *pb)
Behdad Esfahbod8a3511a2009-11-04 19:45:39 -0500177{
Behdad Esfahbod0712e912017-10-29 17:01:47 -0600178 const char *a = (const char *) pa;
179 const char *b = (const char *) pb;
Behdad Esfahbod8a3511a2009-11-04 19:45:39 -0500180 unsigned int da, db;
181 const char *p;
182
Behdad Esfahbode3693b72011-06-15 09:33:52 -0400183 p = strchr (a, '-');
Behdad Esfahbod8a3511a2009-11-04 19:45:39 -0500184 da = p ? (unsigned int) (p - a) : strlen (a);
185
Behdad Esfahbode3693b72011-06-15 09:33:52 -0400186 p = strchr (b, '-');
Behdad Esfahbod8a3511a2009-11-04 19:45:39 -0500187 db = p ? (unsigned int) (p - b) : strlen (b);
188
189 return strncmp (a, b, MAX (da, db));
190}
191
David Corbett91067712017-12-08 11:21:14 -0500192static bool
David Corbett2f1f9612017-12-08 22:45:52 -0500193subtag_matches (const char *lang_str,
194 const char *limit,
195 const char *subtag)
David Corbett91067712017-12-08 11:21:14 -0500196{
197 do {
198 const char *s = strstr (lang_str, subtag);
David Corbett2f1f9612017-12-08 22:45:52 -0500199 if (!s || s >= limit)
David Corbett91067712017-12-08 11:21:14 -0500200 return false;
201 if (!ISALNUM (s[strlen (subtag)]))
David Corbett91067712017-12-08 11:21:14 -0500202 return true;
David Corbett91067712017-12-08 11:21:14 -0500203 lang_str = s + strlen (subtag);
Bruce Mitchener8d1e4792018-10-18 22:18:42 +0700204 } while (true);
David Corbett91067712017-12-08 11:21:14 -0500205}
206
Behdad Esfahbod8a3511a2009-11-04 19:45:39 -0500207static hb_bool_t
208lang_matches (const char *lang_str, const char *spec)
209{
210 unsigned int len = strlen (spec);
211
Behdad Esfahbod40b5c2e2011-05-13 22:46:36 -0400212 return strncmp (lang_str, spec, len) == 0 &&
Behdad Esfahbod8a3511a2009-11-04 19:45:39 -0500213 (lang_str[len] == '\0' || lang_str[len] == '-');
214}
215
David Corbett2f1f9612017-12-08 22:45:52 -0500216typedef struct {
217 char language[4];
218 hb_tag_t tags[HB_OT_MAX_TAGS_PER_LANGUAGE];
219} LangTag;
220
221#include "hb-ot-tag-table.hh"
222
223/* The corresponding languages IDs for the following IDs are unclear,
224 * overlap, or are architecturally weird. Needs more research. */
225
226/*{"??", {HB_TAG('B','C','R',' ')}},*/ /* Bible Cree */
227/*{"zh?", {HB_TAG('C','H','N',' ')}},*/ /* Chinese (seen in Microsoft fonts) */
228/*{"ar-Syrc?", {HB_TAG('G','A','R',' ')}},*/ /* Garshuni */
229/*{"??", {HB_TAG('N','G','R',' ')}},*/ /* Nagari */
230/*{"??", {HB_TAG('Y','I','C',' ')}},*/ /* Yi Classic */
231/*{"zh?", {HB_TAG('Z','H','P',' ')}},*/ /* Chinese Phonetic */
232
Behdad Esfahbod8a3511a2009-11-04 19:45:39 -0500233hb_tag_t
234hb_ot_tag_from_language (hb_language_t language)
235{
David Corbett91067712017-12-08 11:21:14 -0500236 unsigned int count = 1;
237 hb_tag_t tags[1];
David Corbett7f1fbfe2018-07-23 21:19:23 -0400238 hb_ot_tags_from_script_and_language (HB_SCRIPT_UNKNOWN, language, nullptr, nullptr, &count, tags);
David Corbett91067712017-12-08 11:21:14 -0500239 return count > 0 ? tags[0] : HB_OT_TAG_DEFAULT_LANGUAGE;
240}
Behdad Esfahbod8a3511a2009-11-04 19:45:39 -0500241
David Corbett91067712017-12-08 11:21:14 -0500242static void
243hb_ot_tags_from_language (const char *lang_str,
244 const char *limit,
245 const char *private_use_subtag,
246 unsigned int *count,
247 hb_tag_t *tags)
248{
249 const char *s;
Behdad Esfahbod8a3511a2009-11-04 19:45:39 -0500250
David Corbett2f1f9612017-12-08 22:45:52 -0500251 /* Check for matches of multiple subtags. */
252 if (hb_ot_tags_from_complex_language (lang_str, limit, count, tags))
253 return;
Sascha Brawer13374282017-01-18 13:51:02 +0100254
David Corbett2f1f9612017-12-08 22:45:52 -0500255 /* Find a language matching in the first component. */
David Corbett7c7cb2a2018-01-20 15:53:09 -0500256 s = strchr (lang_str, '-');
Behdad Esfahbodee5350d2014-07-10 19:06:45 -0400257 {
258 const LangTag *lang_tag;
David Corbett7c7cb2a2018-01-20 15:53:09 -0500259 if (s && limit - lang_str >= 6)
260 {
261 const char *extlang_end = strchr (s + 1, '-');
262 /* If there is an extended language tag, use it. */
263 if (3 == (extlang_end ? extlang_end - s - 1 : strlen (s + 1)) &&
264 ISALPHA (s[1]))
265 lang_str = s + 1;
266 }
Behdad Esfahbodee5350d2014-07-10 19:06:45 -0400267 lang_tag = (LangTag *) bsearch (lang_str, ot_languages,
268 ARRAY_LENGTH (ot_languages), sizeof (LangTag),
Behdad Esfahbod0712e912017-10-29 17:01:47 -0600269 lang_compare_first_component);
Behdad Esfahbodee5350d2014-07-10 19:06:45 -0400270 if (lang_tag)
David Corbett91067712017-12-08 11:21:14 -0500271 {
272 unsigned int i;
273 for (i = 0; i < *count && lang_tag->tags[i] != HB_TAG_NONE; i++)
274 tags[i] = lang_tag->tags[i];
275 *count = i;
276 return;
277 }
Behdad Esfahbodee5350d2014-07-10 19:06:45 -0400278 }
Behdad Esfahbod8a3511a2009-11-04 19:45:39 -0500279
Behdad Esfahbod738d0962011-09-02 13:31:19 -0400280 if (!s)
281 s = lang_str + strlen (lang_str);
282 if (s - lang_str == 3) {
283 /* Assume it's ISO-639-3 and upper-case and use it. */
David Corbett91067712017-12-08 11:21:14 -0500284 tags[0] = hb_tag_from_string (lang_str, s - lang_str) & ~0x20202000u;
285 *count = 1;
286 return;
Behdad Esfahbod738d0962011-09-02 13:31:19 -0400287 }
288
David Corbett91067712017-12-08 11:21:14 -0500289 *count = 0;
290}
291
292static bool
293parse_private_use_subtag (const char *private_use_subtag,
294 unsigned int *count,
295 hb_tag_t *tags,
296 const char *prefix,
297 unsigned char (*normalize) (unsigned char))
298{
299 if (private_use_subtag && count && tags && *count)
300 {
301 const char *s = strstr (private_use_subtag, prefix);
302 if (s)
303 {
304 char tag[4];
305 int i;
306 s += strlen (prefix);
307 for (i = 0; i < 4 && ISALNUM (s[i]); i++)
308 tag[i] = normalize (s[i]);
309 if (i)
310 {
311 for (; i < 4; i++)
312 tag[i] = ' ';
313 tags[0] = HB_TAG (tag[0], tag[1], tag[2], tag[3]);
David Corbett7f1fbfe2018-07-23 21:19:23 -0400314 if ((tags[0] & 0xDFDFDFDF) == HB_OT_TAG_DEFAULT_SCRIPT)
315 tags[0] ^= ~0xDFDFDFDF;
David Corbett91067712017-12-08 11:21:14 -0500316 *count = 1;
317 return false;
318 }
319 }
320 }
321 return true;
322}
323
324/**
David Corbett7f1fbfe2018-07-23 21:19:23 -0400325 * hb_ot_tags_from_script_and_language:
David Corbett91067712017-12-08 11:21:14 -0500326 * @script: an #hb_script_t to convert.
327 * @language: an #hb_language_t to convert.
328 * @script_count: (allow-none): maximum number of script tags to retrieve (IN)
329 * and actual number of script tags retrieved (OUT)
HinTak26092bb2018-10-17 00:54:39 +0100330 * @script_tags: (out) (allow-none): array of size at least @script_count to store the
David Corbett91067712017-12-08 11:21:14 -0500331 * script tag results
332 * @language_count: (allow-none): maximum number of language tags to retrieve
333 * (IN) and actual number of language tags retrieved (OUT)
HinTak26092bb2018-10-17 00:54:39 +0100334 * @language_tags: (out) (allow-none): array of size at least @language_count to store
David Corbett91067712017-12-08 11:21:14 -0500335 * the language tag results
336 *
337 * Converts an #hb_script_t and an #hb_language_t to script and language tags.
338 *
Behdad Esfahbod3d9a0302018-10-18 05:58:17 -0700339 * Since: 2.0.0
David Corbett91067712017-12-08 11:21:14 -0500340 **/
341void
David Corbett7f1fbfe2018-07-23 21:19:23 -0400342hb_ot_tags_from_script_and_language (hb_script_t script,
343 hb_language_t language,
344 unsigned int *script_count /* IN/OUT */,
345 hb_tag_t *script_tags /* OUT */,
346 unsigned int *language_count /* IN/OUT */,
347 hb_tag_t *language_tags /* OUT */)
David Corbett91067712017-12-08 11:21:14 -0500348{
349 bool needs_script = true;
350
351 if (language == HB_LANGUAGE_INVALID)
352 {
353 if (language_count && language_tags && *language_count)
354 *language_count = 0;
Behdad Esfahbodda591f22018-10-11 14:30:15 -0400355 }
356 else
357 {
David Corbett91067712017-12-08 11:21:14 -0500358 const char *lang_str, *s, *limit, *private_use_subtag;
359 bool needs_language;
360
361 lang_str = hb_language_to_string (language);
362 limit = nullptr;
363 private_use_subtag = nullptr;
364 if (lang_str[0] == 'x' && lang_str[1] == '-')
365 {
366 private_use_subtag = lang_str;
367 } else {
368 for (s = lang_str + 1; *s; s++)
369 {
370 if (s[-1] == '-' && s[1] == '-')
371 {
372 if (s[0] == 'x')
373 {
374 private_use_subtag = s;
375 if (!limit)
376 limit = s - 1;
377 break;
378 } else if (!limit)
379 {
380 limit = s - 1;
381 }
382 }
383 }
384 if (!limit)
385 limit = s;
386 }
387
388 needs_script = parse_private_use_subtag (private_use_subtag, script_count, script_tags, "-hbsc", TOLOWER);
389 needs_language = parse_private_use_subtag (private_use_subtag, language_count, language_tags, "-hbot", TOUPPER);
390
391 if (needs_language && language_count && language_tags && *language_count)
392 hb_ot_tags_from_language (lang_str, limit, private_use_subtag, language_count, language_tags);
393 }
394
395 if (needs_script && script_count && script_tags && *script_count)
396 hb_ot_all_tags_from_script (script, script_count, script_tags);
Behdad Esfahbod8a3511a2009-11-04 19:45:39 -0500397}
398
Behdad Esfahbod351f68f2015-06-12 17:46:06 -0700399/**
400 * hb_ot_tag_to_language:
401 *
Ebrahim Byagowif24b0b92018-04-12 13:40:45 +0430402 *
Behdad Esfahbod351f68f2015-06-12 17:46:06 -0700403 *
404 * Return value: (transfer none):
405 *
Behdad Esfahbodb8811422015-09-03 15:53:22 +0430406 * Since: 0.9.2
Behdad Esfahbod351f68f2015-06-12 17:46:06 -0700407 **/
Behdad Esfahbod8a3511a2009-11-04 19:45:39 -0500408hb_language_t
409hb_ot_tag_to_language (hb_tag_t tag)
410{
411 unsigned int i;
Behdad Esfahbod40b5c2e2011-05-13 22:46:36 -0400412
413 if (tag == HB_OT_TAG_DEFAULT_LANGUAGE)
Behdad Esfahboddbdbfe32017-10-15 12:11:08 +0200414 return nullptr;
Behdad Esfahbod8a3511a2009-11-04 19:45:39 -0500415
David Corbett2f1f9612017-12-08 22:45:52 -0500416 {
417 hb_language_t disambiguated_tag = hb_ot_ambiguous_tag_to_language (tag);
418 if (disambiguated_tag != HB_LANGUAGE_INVALID)
419 return disambiguated_tag;
Sascha Brawer13374282017-01-18 13:51:02 +0100420 }
421
Behdad Esfahbod8a3511a2009-11-04 19:45:39 -0500422 for (i = 0; i < ARRAY_LENGTH (ot_languages); i++)
David Corbett91067712017-12-08 11:21:14 -0500423 if (ot_languages[i].tags[0] == tag)
Behdad Esfahbod4c9fe882011-08-26 09:18:53 +0200424 return hb_language_from_string (ot_languages[i].language, -1);
Behdad Esfahbod8a3511a2009-11-04 19:45:39 -0500425
Behdad Esfahbod744970a2011-05-16 18:15:37 -0400426 /* Else return a custom language in the form of "x-hbotABCD" */
Behdad Esfahbod40b5c2e2011-05-13 22:46:36 -0400427 {
428 unsigned char buf[11] = "x-hbot";
429 buf[6] = tag >> 24;
430 buf[7] = (tag >> 16) & 0xFF;
431 buf[8] = (tag >> 8) & 0xFF;
432 buf[9] = tag & 0xFF;
433 if (buf[9] == 0x20)
434 buf[9] = '\0';
435 buf[10] = '\0';
Behdad Esfahbod4c9fe882011-08-26 09:18:53 +0200436 return hb_language_from_string ((char *) buf, -1);
Behdad Esfahbod40b5c2e2011-05-13 22:46:36 -0400437 }
Behdad Esfahbod8a3511a2009-11-04 19:45:39 -0500438}
Behdad Esfahbodacdba3f2010-07-23 15:11:18 -0400439
David Corbett7f1fbfe2018-07-23 21:19:23 -0400440/**
441 * hb_ot_tags_to_script_and_language:
442 * @script_tag: a script tag
443 * @language_tag: a language tag
David Corbettc5510002018-10-11 22:08:14 -0400444 * @script: (allow-none): the #hb_script_t corresponding to @script_tag (OUT).
445 * @language: (allow-none): the #hb_language_t corresponding to @script_tag and
David Corbett7f1fbfe2018-07-23 21:19:23 -0400446 * @language_tag (OUT).
447 *
448 * Converts a script tag and a language tag to an #hb_script_t and an
449 * #hb_language_t.
450 *
Behdad Esfahbod3d9a0302018-10-18 05:58:17 -0700451 * Since: 2.0.0
David Corbett7f1fbfe2018-07-23 21:19:23 -0400452 **/
453void
454hb_ot_tags_to_script_and_language (hb_tag_t script_tag,
455 hb_tag_t language_tag,
456 hb_script_t *script /* OUT */,
457 hb_language_t *language /* OUT */)
458{
459 hb_script_t script_out = hb_ot_tag_to_script (script_tag);
460 if (script)
461 *script = script_out;
Behdad Esfahbod80616642018-10-11 14:16:55 -0400462 if (language)
463 {
David Corbett7f1fbfe2018-07-23 21:19:23 -0400464 unsigned int script_count = 1;
465 hb_tag_t primary_script_tag[1];
Behdad Esfahbod80616642018-10-11 14:16:55 -0400466 hb_ot_tags_from_script_and_language (script_out,
467 HB_LANGUAGE_INVALID,
468 &script_count,
469 primary_script_tag,
470 nullptr, nullptr);
David Corbett7f1fbfe2018-07-23 21:19:23 -0400471 *language = hb_ot_tag_to_language (language_tag);
Behdad Esfahbod80616642018-10-11 14:16:55 -0400472 if (script_count == 0 || primary_script_tag[0] != script_tag)
473 {
David Corbett7f1fbfe2018-07-23 21:19:23 -0400474 unsigned char *buf;
475 const char *lang_str = hb_language_to_string (*language);
476 size_t len = strlen (lang_str);
477 buf = (unsigned char *) malloc (len + 11);
Behdad Esfahbod80616642018-10-11 14:16:55 -0400478 if (unlikely (!buf))
479 {
David Corbett7f1fbfe2018-07-23 21:19:23 -0400480 *language = nullptr;
Behdad Esfahbod80616642018-10-11 14:16:55 -0400481 }
482 else
483 {
David Corbett7f1fbfe2018-07-23 21:19:23 -0400484 memcpy (buf, lang_str, len);
485 if (lang_str[0] != 'x' || lang_str[1] != '-') {
486 buf[len++] = '-';
487 buf[len++] = 'x';
488 }
489 buf[len++] = '-';
490 buf[len++] = 'h';
491 buf[len++] = 'b';
492 buf[len++] = 's';
493 buf[len++] = 'c';
494 buf[len++] = script_tag >> 24;
495 buf[len++] = (script_tag >> 16) & 0xFF;
496 buf[len++] = (script_tag >> 8) & 0xFF;
497 buf[len++] = script_tag & 0xFF;
498 *language = hb_language_from_string ((char *) buf, len);
499 free (buf);
500 }
501 }
502 }
503}
504
Behdad Esfahbodd05b7832016-01-12 16:17:21 +0000505#ifdef MAIN
Behdad Esfahbod70952dd2015-12-07 10:28:46 +0100506static inline void
507test_langs_sorted (void)
508{
509 for (unsigned int i = 1; i < ARRAY_LENGTH (ot_languages); i++)
510 {
511 int c = lang_compare_first_component (ot_languages[i-1].language, ot_languages[i].language);
512 if (c >= 0)
513 {
514 fprintf (stderr, "ot_languages not sorted at index %d: %s %d %s\n",
515 i, ot_languages[i-1].language, c, ot_languages[i].language);
516 abort();
517 }
518 }
519}
Behdad Esfahbodacdba3f2010-07-23 15:11:18 -0400520
Behdad Esfahbod70952dd2015-12-07 10:28:46 +0100521int
522main (void)
523{
524 test_langs_sorted ();
525 return 0;
526}
527
528#endif