Behdad Esfahbod | 65392b7 | 2019-05-22 16:21:21 -0400 | [diff] [blame] | 1 | /* |
| 2 | * Copyright (C) 2012 Grigori Goronzy <greg@kinoho.net> |
| 3 | * |
| 4 | * Permission to use, copy, modify, and/or distribute this software for any |
| 5 | * purpose with or without fee is hereby granted, provided that the above |
| 6 | * copyright notice and this permission notice appear in all copies. |
| 7 | * |
| 8 | * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES |
| 9 | * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF |
| 10 | * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR |
| 11 | * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES |
| 12 | * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN |
| 13 | * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF |
| 14 | * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. |
| 15 | */ |
| 16 | |
| 17 | #include "hb.hh" |
Behdad Esfahbod | ceb4c21 | 2019-07-02 16:02:13 -0700 | [diff] [blame] | 18 | #include "hb-unicode.hh" |
Behdad Esfahbod | 65392b7 | 2019-05-22 16:21:21 -0400 | [diff] [blame] | 19 | #include "hb-machinery.hh" |
| 20 | |
| 21 | #include "hb-ucd-table.hh" |
| 22 | |
| 23 | static hb_unicode_combining_class_t |
| 24 | hb_ucd_combining_class (hb_unicode_funcs_t *ufuncs HB_UNUSED, |
| 25 | hb_codepoint_t unicode, |
| 26 | void *user_data HB_UNUSED) |
| 27 | { |
| 28 | return (hb_unicode_combining_class_t) _hb_ucd_ccc (unicode); |
| 29 | } |
| 30 | |
| 31 | static hb_unicode_general_category_t |
| 32 | hb_ucd_general_category (hb_unicode_funcs_t *ufuncs HB_UNUSED, |
| 33 | hb_codepoint_t unicode, |
| 34 | void *user_data HB_UNUSED) |
| 35 | { |
| 36 | return (hb_unicode_general_category_t) _hb_ucd_gc (unicode); |
| 37 | } |
| 38 | |
| 39 | static hb_codepoint_t |
| 40 | hb_ucd_mirroring (hb_unicode_funcs_t *ufuncs HB_UNUSED, |
| 41 | hb_codepoint_t unicode, |
| 42 | void *user_data HB_UNUSED) |
| 43 | { |
| 44 | return unicode + _hb_ucd_bmg (unicode); |
| 45 | } |
| 46 | |
| 47 | static hb_script_t |
| 48 | hb_ucd_script (hb_unicode_funcs_t *ufuncs HB_UNUSED, |
| 49 | hb_codepoint_t unicode, |
| 50 | void *user_data HB_UNUSED) |
| 51 | { |
| 52 | return _hb_ucd_sc_map[_hb_ucd_sc (unicode)]; |
| 53 | } |
| 54 | |
| 55 | |
| 56 | #define SBASE 0xAC00u |
| 57 | #define LBASE 0x1100u |
| 58 | #define VBASE 0x1161u |
| 59 | #define TBASE 0x11A7u |
| 60 | #define SCOUNT 11172u |
| 61 | #define LCOUNT 19u |
| 62 | #define VCOUNT 21u |
| 63 | #define TCOUNT 28u |
| 64 | #define NCOUNT (VCOUNT * TCOUNT) |
| 65 | |
| 66 | static inline bool |
| 67 | _hb_ucd_decompose_hangul (hb_codepoint_t ab, hb_codepoint_t *a, hb_codepoint_t *b) |
| 68 | { |
| 69 | unsigned si = ab - SBASE; |
| 70 | |
| 71 | if (si >= SCOUNT) |
| 72 | return false; |
| 73 | |
| 74 | if (si % TCOUNT) |
| 75 | { |
| 76 | /* LV,T */ |
| 77 | *a = SBASE + (si / TCOUNT) * TCOUNT; |
| 78 | *b = TBASE + (si % TCOUNT); |
| 79 | return true; |
| 80 | } else { |
| 81 | /* L,V */ |
| 82 | *a = LBASE + (si / NCOUNT); |
| 83 | *b = VBASE + (si % NCOUNT) / TCOUNT; |
| 84 | return true; |
| 85 | } |
| 86 | } |
| 87 | |
| 88 | static inline bool |
| 89 | _hb_ucd_compose_hangul (hb_codepoint_t a, hb_codepoint_t b, hb_codepoint_t *ab) |
| 90 | { |
| 91 | if (a >= SBASE && a < (SBASE + SCOUNT) && b > TBASE && b < (TBASE + TCOUNT) && |
| 92 | !((a - SBASE) % TCOUNT)) |
| 93 | { |
| 94 | /* LV,T */ |
| 95 | *ab = a + (b - TBASE); |
| 96 | return true; |
| 97 | } |
| 98 | else if (a >= LBASE && a < (LBASE + LCOUNT) && b >= VBASE && b < (VBASE + VCOUNT)) |
| 99 | { |
| 100 | /* L,V */ |
| 101 | int li = a - LBASE; |
| 102 | int vi = b - VBASE; |
| 103 | *ab = SBASE + li * NCOUNT + vi * TCOUNT; |
| 104 | return true; |
| 105 | } |
| 106 | else |
| 107 | return false; |
| 108 | } |
| 109 | |
| 110 | static int |
| 111 | _cmp_pair (const void *_key, const void *_item) |
| 112 | { |
| 113 | uint64_t& a = * (uint64_t*) _key; |
| 114 | uint64_t b = (* (uint64_t*) _item) & HB_CODEPOINT_ENCODE3(0x1FFFFFu, 0x1FFFFFu, 0); |
| 115 | |
| 116 | return a < b ? -1 : a > b ? +1 : 0; |
| 117 | } |
Behdad Esfahbod | ec8e635 | 2019-06-24 12:37:23 -0700 | [diff] [blame] | 118 | static int |
| 119 | _cmp_pair_11_7_14 (const void *_key, const void *_item) |
| 120 | { |
| 121 | uint32_t& a = * (uint32_t*) _key; |
| 122 | uint32_t b = (* (uint32_t*) _item) & HB_CODEPOINT_ENCODE3_11_7_14(0x1FFFFFu, 0x1FFFFFu, 0); |
| 123 | |
| 124 | return a < b ? -1 : a > b ? +1 : 0; |
| 125 | } |
Behdad Esfahbod | 65392b7 | 2019-05-22 16:21:21 -0400 | [diff] [blame] | 126 | |
| 127 | static hb_bool_t |
| 128 | hb_ucd_compose (hb_unicode_funcs_t *ufuncs HB_UNUSED, |
| 129 | hb_codepoint_t a, hb_codepoint_t b, hb_codepoint_t *ab, |
| 130 | void *user_data HB_UNUSED) |
| 131 | { |
| 132 | if (_hb_ucd_compose_hangul (a, b, ab)) return true; |
| 133 | |
Behdad Esfahbod | ec8e635 | 2019-06-24 12:37:23 -0700 | [diff] [blame] | 134 | hb_codepoint_t u = 0; |
Behdad Esfahbod | 65392b7 | 2019-05-22 16:21:21 -0400 | [diff] [blame] | 135 | |
Behdad Esfahbod | ec8e635 | 2019-06-24 12:37:23 -0700 | [diff] [blame] | 136 | if ((a & 0xFFFFF800u) == 0x0000u && (b & 0xFFFFFF80) == 0x0300u) |
| 137 | { |
| 138 | uint32_t k = HB_CODEPOINT_ENCODE3_11_7_14 (a, b, 0); |
| 139 | uint32_t *v = (uint32_t*) hb_bsearch (&k, _hb_ucd_dm2_u32_map, |
| 140 | ARRAY_LENGTH (_hb_ucd_dm2_u32_map), |
| 141 | sizeof (*_hb_ucd_dm2_u32_map), |
| 142 | _cmp_pair_11_7_14); |
| 143 | if (likely (!v)) return false; |
| 144 | u = HB_CODEPOINT_DECODE3_11_7_14_3 (*v); |
| 145 | } |
| 146 | else |
| 147 | { |
| 148 | uint64_t k = HB_CODEPOINT_ENCODE3 (a, b, 0); |
| 149 | uint64_t *v = (uint64_t*) hb_bsearch (&k, _hb_ucd_dm2_u64_map, |
| 150 | ARRAY_LENGTH (_hb_ucd_dm2_u64_map), |
| 151 | sizeof (*_hb_ucd_dm2_u64_map), |
| 152 | _cmp_pair); |
| 153 | if (likely (!v)) return false; |
| 154 | u = HB_CODEPOINT_DECODE3_3 (*v); |
| 155 | } |
| 156 | |
Behdad Esfahbod | 65392b7 | 2019-05-22 16:21:21 -0400 | [diff] [blame] | 157 | if (unlikely (!u)) return false; |
Behdad Esfahbod | 65392b7 | 2019-05-22 16:21:21 -0400 | [diff] [blame] | 158 | *ab = u; |
| 159 | return true; |
| 160 | } |
| 161 | |
| 162 | static hb_bool_t |
| 163 | hb_ucd_decompose (hb_unicode_funcs_t *ufuncs HB_UNUSED, |
| 164 | hb_codepoint_t ab, hb_codepoint_t *a, hb_codepoint_t *b, |
| 165 | void *user_data HB_UNUSED) |
| 166 | { |
| 167 | if (_hb_ucd_decompose_hangul (ab, a, b)) return true; |
| 168 | |
| 169 | unsigned i = _hb_ucd_dm (ab); |
| 170 | |
| 171 | if (likely (!i)) return false; |
| 172 | i--; |
| 173 | |
Behdad Esfahbod | 9c933ac | 2019-06-23 17:14:27 -0700 | [diff] [blame] | 174 | if (i < ARRAY_LENGTH (_hb_ucd_dm1_p0_map) + ARRAY_LENGTH (_hb_ucd_dm1_p2_map)) |
Behdad Esfahbod | 65392b7 | 2019-05-22 16:21:21 -0400 | [diff] [blame] | 175 | { |
Behdad Esfahbod | 9c933ac | 2019-06-23 17:14:27 -0700 | [diff] [blame] | 176 | if (i < ARRAY_LENGTH (_hb_ucd_dm1_p0_map)) |
| 177 | *a = _hb_ucd_dm1_p0_map[i]; |
Behdad Esfahbod | 5074d66 | 2019-06-07 14:20:45 -0700 | [diff] [blame] | 178 | else |
| 179 | { |
Behdad Esfahbod | 9c933ac | 2019-06-23 17:14:27 -0700 | [diff] [blame] | 180 | i -= ARRAY_LENGTH (_hb_ucd_dm1_p0_map); |
| 181 | *a = 0x20000 | _hb_ucd_dm1_p2_map[i]; |
Behdad Esfahbod | 5074d66 | 2019-06-07 14:20:45 -0700 | [diff] [blame] | 182 | } |
Behdad Esfahbod | 65392b7 | 2019-05-22 16:21:21 -0400 | [diff] [blame] | 183 | *b = 0; |
| 184 | return true; |
| 185 | } |
Behdad Esfahbod | 9c933ac | 2019-06-23 17:14:27 -0700 | [diff] [blame] | 186 | i -= ARRAY_LENGTH (_hb_ucd_dm1_p0_map) + ARRAY_LENGTH (_hb_ucd_dm1_p2_map); |
Behdad Esfahbod | 65392b7 | 2019-05-22 16:21:21 -0400 | [diff] [blame] | 187 | |
Behdad Esfahbod | ec8e635 | 2019-06-24 12:37:23 -0700 | [diff] [blame] | 188 | if (i < ARRAY_LENGTH (_hb_ucd_dm2_u32_map)) |
| 189 | { |
| 190 | uint32_t v = _hb_ucd_dm2_u32_map[i]; |
| 191 | *a = HB_CODEPOINT_DECODE3_11_7_14_1 (v); |
| 192 | *b = HB_CODEPOINT_DECODE3_11_7_14_2 (v); |
| 193 | return true; |
| 194 | } |
| 195 | i -= ARRAY_LENGTH (_hb_ucd_dm2_u32_map); |
| 196 | |
| 197 | uint64_t v = _hb_ucd_dm2_u64_map[i]; |
Behdad Esfahbod | 65392b7 | 2019-05-22 16:21:21 -0400 | [diff] [blame] | 198 | *a = HB_CODEPOINT_DECODE3_1 (v); |
| 199 | *b = HB_CODEPOINT_DECODE3_2 (v); |
| 200 | return true; |
| 201 | } |
| 202 | |
| 203 | |
| 204 | #if HB_USE_ATEXIT |
| 205 | static void free_static_ucd_funcs (); |
| 206 | #endif |
| 207 | |
| 208 | static struct hb_ucd_unicode_funcs_lazy_loader_t : hb_unicode_funcs_lazy_loader_t<hb_ucd_unicode_funcs_lazy_loader_t> |
| 209 | { |
| 210 | static hb_unicode_funcs_t *create () |
| 211 | { |
| 212 | hb_unicode_funcs_t *funcs = hb_unicode_funcs_create (nullptr); |
| 213 | |
| 214 | hb_unicode_funcs_set_combining_class_func (funcs, hb_ucd_combining_class, nullptr, nullptr); |
| 215 | hb_unicode_funcs_set_general_category_func (funcs, hb_ucd_general_category, nullptr, nullptr); |
| 216 | hb_unicode_funcs_set_mirroring_func (funcs, hb_ucd_mirroring, nullptr, nullptr); |
| 217 | hb_unicode_funcs_set_script_func (funcs, hb_ucd_script, nullptr, nullptr); |
| 218 | hb_unicode_funcs_set_compose_func (funcs, hb_ucd_compose, nullptr, nullptr); |
| 219 | hb_unicode_funcs_set_decompose_func (funcs, hb_ucd_decompose, nullptr, nullptr); |
| 220 | |
| 221 | hb_unicode_funcs_make_immutable (funcs); |
| 222 | |
| 223 | #if HB_USE_ATEXIT |
| 224 | atexit (free_static_ucd_funcs); |
| 225 | #endif |
| 226 | |
| 227 | return funcs; |
| 228 | } |
| 229 | } static_ucd_funcs; |
| 230 | |
| 231 | #if HB_USE_ATEXIT |
| 232 | static |
| 233 | void free_static_ucd_funcs () |
| 234 | { |
| 235 | static_ucd_funcs.free_instance (); |
| 236 | } |
| 237 | #endif |
| 238 | |
Behdad Esfahbod | 65392b7 | 2019-05-22 16:21:21 -0400 | [diff] [blame] | 239 | hb_unicode_funcs_t * |
| 240 | hb_ucd_get_unicode_funcs () |
| 241 | { |
Behdad Esfahbod | 487879e | 2019-05-24 12:37:53 -0400 | [diff] [blame] | 242 | #ifdef HB_NO_UCD |
| 243 | return hb_unicode_funcs_get_empty (); |
| 244 | #endif |
Behdad Esfahbod | 65392b7 | 2019-05-22 16:21:21 -0400 | [diff] [blame] | 245 | return static_ucd_funcs.get_unconst (); |
| 246 | } |