Behdad Esfahbod | e0eabd7 | 2015-07-20 13:30:51 +0100 | [diff] [blame] | 1 | /* |
| 2 | * Copyright © 2015 Mozilla Foundation. |
| 3 | * Copyright © 2015 Google, Inc. |
| 4 | * |
| 5 | * This is part of HarfBuzz, a text shaping library. |
| 6 | * |
| 7 | * Permission is hereby granted, without written agreement and without |
| 8 | * license or royalty fees, to use, copy, modify, and distribute this |
| 9 | * software and its documentation for any purpose, provided that the |
| 10 | * above copyright notice and the following two paragraphs appear in |
| 11 | * all copies of this software. |
| 12 | * |
| 13 | * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR |
| 14 | * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES |
| 15 | * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN |
| 16 | * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH |
| 17 | * DAMAGE. |
| 18 | * |
| 19 | * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, |
| 20 | * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND |
| 21 | * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS |
| 22 | * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO |
| 23 | * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. |
| 24 | * |
| 25 | * Mozilla Author(s): Jonathan Kew |
| 26 | * Google Author(s): Behdad Esfahbod |
| 27 | */ |
| 28 | |
| 29 | #ifndef HB_OT_SHAPE_COMPLEX_USE_MACHINE_HH |
| 30 | #define HB_OT_SHAPE_COMPLEX_USE_MACHINE_HH |
| 31 | |
Behdad Esfahbod | c77ae40 | 2018-08-25 22:36:36 -0700 | [diff] [blame] | 32 | #include "hb.hh" |
Behdad Esfahbod | e0eabd7 | 2015-07-20 13:30:51 +0100 | [diff] [blame] | 33 | |
| 34 | %%{ |
| 35 | machine use_syllable_machine; |
| 36 | alphtype unsigned char; |
| 37 | write data; |
| 38 | }%% |
| 39 | |
| 40 | %%{ |
| 41 | |
| 42 | # Same order as enum use_category_t. Not sure how to avoid duplication. |
| 43 | |
| 44 | O = 0; # OTHER |
| 45 | |
| 46 | B = 1; # BASE |
Behdad Esfahbod | e0eabd7 | 2015-07-20 13:30:51 +0100 | [diff] [blame] | 47 | IND = 3; # BASE_IND |
| 48 | N = 4; # BASE_NUM |
| 49 | GB = 5; # BASE_OTHER |
| 50 | CGJ = 6; # CGJ |
| 51 | #F = 7; # CONS_FINAL |
| 52 | FM = 8; # CONS_FINAL_MOD |
| 53 | #M = 9; # CONS_MED |
| 54 | #CM = 10; # CONS_MOD |
| 55 | SUB = 11; # CONS_SUB |
| 56 | H = 12; # HALANT |
Behdad Esfahbod | 7ce03eb | 2015-07-21 16:55:26 +0100 | [diff] [blame] | 57 | |
Behdad Esfahbod | e0eabd7 | 2015-07-20 13:30:51 +0100 | [diff] [blame] | 58 | HN = 13; # HALANT_NUM |
| 59 | ZWNJ = 14; # Zero width non-joiner |
| 60 | ZWJ = 15; # Zero width joiner |
| 61 | WJ = 16; # Word joiner |
| 62 | Rsv = 17; # Reserved characters |
| 63 | R = 18; # REPHA |
| 64 | S = 19; # SYM |
| 65 | #SM = 20; # SYM_MOD |
| 66 | VS = 21; # VARIATION_SELECTOR |
Behdad Esfahbod | 7ce03eb | 2015-07-21 16:55:26 +0100 | [diff] [blame] | 67 | #V = 36; # VOWEL |
| 68 | #VM = 40; # VOWEL_MOD |
Behdad Esfahbod | e0eabd7 | 2015-07-20 13:30:51 +0100 | [diff] [blame] | 69 | |
| 70 | FAbv = 24; # CONS_FINAL_ABOVE |
| 71 | FBlw = 25; # CONS_FINAL_BELOW |
| 72 | FPst = 26; # CONS_FINAL_POST |
| 73 | MAbv = 27; # CONS_MED_ABOVE |
| 74 | MBlw = 28; # CONS_MED_BELOW |
| 75 | MPst = 29; # CONS_MED_POST |
| 76 | MPre = 30; # CONS_MED_PRE |
| 77 | CMAbv = 31; # CONS_MOD_ABOVE |
| 78 | CMBlw = 32; # CONS_MOD_BELOW |
| 79 | VAbv = 33; # VOWEL_ABOVE / VOWEL_ABOVE_BELOW / VOWEL_ABOVE_BELOW_POST / VOWEL_ABOVE_POST |
| 80 | VBlw = 34; # VOWEL_BELOW / VOWEL_BELOW_POST |
| 81 | VPst = 35; # VOWEL_POST UIPC = Right |
Behdad Esfahbod | 7ce03eb | 2015-07-21 16:55:26 +0100 | [diff] [blame] | 82 | VPre = 22; # VOWEL_PRE / VOWEL_PRE_ABOVE / VOWEL_PRE_ABOVE_POST / VOWEL_PRE_POST |
Behdad Esfahbod | e0eabd7 | 2015-07-20 13:30:51 +0100 | [diff] [blame] | 83 | VMAbv = 37; # VOWEL_MOD_ABOVE |
| 84 | VMBlw = 38; # VOWEL_MOD_BELOW |
| 85 | VMPst = 39; # VOWEL_MOD_POST |
Behdad Esfahbod | 7ce03eb | 2015-07-21 16:55:26 +0100 | [diff] [blame] | 86 | VMPre = 23; # VOWEL_MOD_PRE |
Behdad Esfahbod | e0eabd7 | 2015-07-20 13:30:51 +0100 | [diff] [blame] | 87 | SMAbv = 41; # SYM_MOD_ABOVE |
| 88 | SMBlw = 42; # SYM_MOD_BELOW |
Behdad Esfahbod | e07669f | 2017-10-03 14:57:14 +0200 | [diff] [blame] | 89 | CS = 43; # CONS_WITH_STACKER |
Behdad Esfahbod | e0eabd7 | 2015-07-20 13:30:51 +0100 | [diff] [blame] | 90 | |
Behdad Esfahbod | 75114e0 | 2018-10-03 12:29:56 +0200 | [diff] [blame] | 91 | HVM = 44; # HALANT_OR_VOWEL_MODIFIER |
| 92 | |
| 93 | h = H | HVM; # https://github.com/harfbuzz/harfbuzz/issues/1102 |
Behdad Esfahbod | e0eabd7 | 2015-07-20 13:30:51 +0100 | [diff] [blame] | 94 | |
Behdad Esfahbod | 060e6b4 | 2018-06-05 17:31:46 -0700 | [diff] [blame] | 95 | # Override: Adhoc ZWJ placement. https://github.com/harfbuzz/harfbuzz/issues/542#issuecomment-353169729 |
Behdad Esfahbod | 75114e0 | 2018-10-03 12:29:56 +0200 | [diff] [blame] | 96 | consonant_modifiers = CMAbv* CMBlw* ((ZWJ?.h.ZWJ? B | SUB) VS? CMAbv? CMBlw*)*; |
ebraminio | 7c6937e | 2017-11-20 14:49:22 -0500 | [diff] [blame] | 97 | # Override: Allow two MBlw. https://github.com/harfbuzz/harfbuzz/issues/376 |
Behdad Esfahbod | 9dd29c6 | 2017-07-14 17:01:27 +0100 | [diff] [blame] | 98 | medial_consonants = MPre? MAbv? MBlw?.MBlw? MPst?; |
Behdad Esfahbod | e0eabd7 | 2015-07-20 13:30:51 +0100 | [diff] [blame] | 99 | dependent_vowels = VPre* VAbv* VBlw* VPst*; |
Behdad Esfahbod | 75114e0 | 2018-10-03 12:29:56 +0200 | [diff] [blame] | 100 | vowel_modifiers = HVM? VMPre* VMAbv* VMBlw* VMPst*; |
Behdad Esfahbod | e0eabd7 | 2015-07-20 13:30:51 +0100 | [diff] [blame] | 101 | final_consonants = FAbv* FBlw* FPst* FM?; |
| 102 | |
Behdad Esfahbod | 5101abd | 2018-10-02 17:49:06 +0200 | [diff] [blame] | 103 | complex_syllable_tail = |
| 104 | consonant_modifiers |
| 105 | medial_consonants |
| 106 | dependent_vowels |
| 107 | vowel_modifiers |
| 108 | final_consonants |
| 109 | ; |
| 110 | |
Behdad Esfahbod | e0eabd7 | 2015-07-20 13:30:51 +0100 | [diff] [blame] | 111 | virama_terminated_cluster = |
Behdad Esfahbod | e07669f | 2017-10-03 14:57:14 +0200 | [diff] [blame] | 112 | (R|CS)? (B | GB) VS? |
Behdad Esfahbod | e0eabd7 | 2015-07-20 13:30:51 +0100 | [diff] [blame] | 113 | consonant_modifiers |
Behdad Esfahbod | 75114e0 | 2018-10-03 12:29:56 +0200 | [diff] [blame] | 114 | ZWJ?.h.ZWJ? |
Behdad Esfahbod | e0eabd7 | 2015-07-20 13:30:51 +0100 | [diff] [blame] | 115 | ; |
Behdad Esfahbod | 9b6312f | 2016-05-06 17:41:49 +0100 | [diff] [blame] | 116 | standard_cluster = |
Behdad Esfahbod | e07669f | 2017-10-03 14:57:14 +0200 | [diff] [blame] | 117 | (R|CS)? (B | GB) VS? |
Behdad Esfahbod | 5101abd | 2018-10-02 17:49:06 +0200 | [diff] [blame] | 118 | complex_syllable_tail |
Behdad Esfahbod | e0eabd7 | 2015-07-20 13:30:51 +0100 | [diff] [blame] | 119 | ; |
Behdad Esfahbod | 40c4a99 | 2015-07-21 17:14:54 +0100 | [diff] [blame] | 120 | broken_cluster = |
| 121 | R? |
Behdad Esfahbod | 5101abd | 2018-10-02 17:49:06 +0200 | [diff] [blame] | 122 | complex_syllable_tail |
Behdad Esfahbod | 40c4a99 | 2015-07-21 17:14:54 +0100 | [diff] [blame] | 123 | ; |
| 124 | |
Behdad Esfahbod | d04e461 | 2016-05-06 17:17:00 +0100 | [diff] [blame] | 125 | number_joiner_terminated_cluster = N VS? (HN N VS?)* HN; |
Behdad Esfahbod | eb74535 | 2015-07-20 15:33:25 +0100 | [diff] [blame] | 126 | numeral_cluster = N VS? (HN N VS?)*; |
Behdad Esfahbod | e0eabd7 | 2015-07-20 13:30:51 +0100 | [diff] [blame] | 127 | symbol_cluster = S VS? SMAbv* SMBlw*; |
| 128 | independent_cluster = (IND | O | Rsv | WJ) VS?; |
Behdad Esfahbod | 3e4e761 | 2016-05-06 17:28:25 +0100 | [diff] [blame] | 129 | other = any; |
Behdad Esfahbod | e0eabd7 | 2015-07-20 13:30:51 +0100 | [diff] [blame] | 130 | |
| 131 | main := |* |
| 132 | independent_cluster => { found_syllable (independent_cluster); }; |
| 133 | virama_terminated_cluster => { found_syllable (virama_terminated_cluster); }; |
Behdad Esfahbod | 9b6312f | 2016-05-06 17:41:49 +0100 | [diff] [blame] | 134 | standard_cluster => { found_syllable (standard_cluster); }; |
Behdad Esfahbod | e0eabd7 | 2015-07-20 13:30:51 +0100 | [diff] [blame] | 135 | number_joiner_terminated_cluster => { found_syllable (number_joiner_terminated_cluster); }; |
| 136 | numeral_cluster => { found_syllable (numeral_cluster); }; |
| 137 | symbol_cluster => { found_syllable (symbol_cluster); }; |
Behdad Esfahbod | 40c4a99 | 2015-07-21 17:14:54 +0100 | [diff] [blame] | 138 | broken_cluster => { found_syllable (broken_cluster); }; |
Behdad Esfahbod | 3e4e761 | 2016-05-06 17:28:25 +0100 | [diff] [blame] | 139 | other => { found_syllable (non_cluster); }; |
Behdad Esfahbod | e0eabd7 | 2015-07-20 13:30:51 +0100 | [diff] [blame] | 140 | *|; |
| 141 | |
| 142 | |
| 143 | }%% |
| 144 | |
| 145 | #define found_syllable(syllable_type) \ |
| 146 | HB_STMT_START { \ |
Behdad Esfahbod | 40d5d19 | 2018-10-02 17:04:05 +0200 | [diff] [blame] | 147 | if (0) fprintf (stderr, "syllable %d..%d %s\n", ts, te, #syllable_type); \ |
| 148 | for (unsigned int i = ts; i < te; i++) \ |
Behdad Esfahbod | e0eabd7 | 2015-07-20 13:30:51 +0100 | [diff] [blame] | 149 | info[i].syllable() = (syllable_serial << 4) | syllable_type; \ |
Behdad Esfahbod | e0eabd7 | 2015-07-20 13:30:51 +0100 | [diff] [blame] | 150 | syllable_serial++; \ |
| 151 | if (unlikely (syllable_serial == 16)) syllable_serial = 1; \ |
| 152 | } HB_STMT_END |
| 153 | |
| 154 | static void |
| 155 | find_syllables (hb_buffer_t *buffer) |
| 156 | { |
Behdad Esfahbod | 40d5d19 | 2018-10-02 17:04:05 +0200 | [diff] [blame] | 157 | unsigned int p, pe, eof, ts, te, act; |
Behdad Esfahbod | e0eabd7 | 2015-07-20 13:30:51 +0100 | [diff] [blame] | 158 | int cs; |
| 159 | hb_glyph_info_t *info = buffer->info; |
| 160 | %%{ |
| 161 | write init; |
| 162 | getkey info[p].use_category(); |
| 163 | }%% |
| 164 | |
| 165 | p = 0; |
| 166 | pe = eof = buffer->len; |
| 167 | |
Behdad Esfahbod | e0eabd7 | 2015-07-20 13:30:51 +0100 | [diff] [blame] | 168 | unsigned int syllable_serial = 1; |
| 169 | %%{ |
| 170 | write exec; |
| 171 | }%% |
| 172 | } |
| 173 | |
| 174 | #undef found_syllable |
| 175 | |
| 176 | #endif /* HB_OT_SHAPE_COMPLEX_USE_MACHINE_HH */ |