Behdad Esfahbod | e0eabd7 | 2015-07-20 13:30:51 +0100 | [diff] [blame] | 1 | /* |
| 2 | * Copyright © 2015 Mozilla Foundation. |
| 3 | * Copyright © 2015 Google, Inc. |
| 4 | * |
| 5 | * This is part of HarfBuzz, a text shaping library. |
| 6 | * |
| 7 | * Permission is hereby granted, without written agreement and without |
| 8 | * license or royalty fees, to use, copy, modify, and distribute this |
| 9 | * software and its documentation for any purpose, provided that the |
| 10 | * above copyright notice and the following two paragraphs appear in |
| 11 | * all copies of this software. |
| 12 | * |
| 13 | * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR |
| 14 | * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES |
| 15 | * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN |
| 16 | * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH |
| 17 | * DAMAGE. |
| 18 | * |
| 19 | * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, |
| 20 | * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND |
| 21 | * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS |
| 22 | * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO |
| 23 | * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. |
| 24 | * |
| 25 | * Mozilla Author(s): Jonathan Kew |
| 26 | * Google Author(s): Behdad Esfahbod |
| 27 | */ |
| 28 | |
| 29 | #ifndef HB_OT_SHAPE_COMPLEX_USE_MACHINE_HH |
| 30 | #define HB_OT_SHAPE_COMPLEX_USE_MACHINE_HH |
| 31 | |
| 32 | #include "hb-private.hh" |
| 33 | |
| 34 | %%{ |
| 35 | machine use_syllable_machine; |
| 36 | alphtype unsigned char; |
| 37 | write data; |
| 38 | }%% |
| 39 | |
| 40 | %%{ |
| 41 | |
| 42 | # Same order as enum use_category_t. Not sure how to avoid duplication. |
| 43 | |
| 44 | O = 0; # OTHER |
| 45 | |
| 46 | B = 1; # BASE |
Behdad Esfahbod | e0eabd7 | 2015-07-20 13:30:51 +0100 | [diff] [blame] | 47 | IND = 3; # BASE_IND |
| 48 | N = 4; # BASE_NUM |
| 49 | GB = 5; # BASE_OTHER |
| 50 | CGJ = 6; # CGJ |
| 51 | #F = 7; # CONS_FINAL |
| 52 | FM = 8; # CONS_FINAL_MOD |
| 53 | #M = 9; # CONS_MED |
| 54 | #CM = 10; # CONS_MOD |
| 55 | SUB = 11; # CONS_SUB |
| 56 | H = 12; # HALANT |
Behdad Esfahbod | 7ce03eb | 2015-07-21 16:55:26 +0100 | [diff] [blame] | 57 | |
Behdad Esfahbod | e0eabd7 | 2015-07-20 13:30:51 +0100 | [diff] [blame] | 58 | HN = 13; # HALANT_NUM |
| 59 | ZWNJ = 14; # Zero width non-joiner |
| 60 | ZWJ = 15; # Zero width joiner |
| 61 | WJ = 16; # Word joiner |
| 62 | Rsv = 17; # Reserved characters |
| 63 | R = 18; # REPHA |
| 64 | S = 19; # SYM |
| 65 | #SM = 20; # SYM_MOD |
| 66 | VS = 21; # VARIATION_SELECTOR |
Behdad Esfahbod | 7ce03eb | 2015-07-21 16:55:26 +0100 | [diff] [blame] | 67 | #V = 36; # VOWEL |
| 68 | #VM = 40; # VOWEL_MOD |
Behdad Esfahbod | e0eabd7 | 2015-07-20 13:30:51 +0100 | [diff] [blame] | 69 | |
| 70 | FAbv = 24; # CONS_FINAL_ABOVE |
| 71 | FBlw = 25; # CONS_FINAL_BELOW |
| 72 | FPst = 26; # CONS_FINAL_POST |
| 73 | MAbv = 27; # CONS_MED_ABOVE |
| 74 | MBlw = 28; # CONS_MED_BELOW |
| 75 | MPst = 29; # CONS_MED_POST |
| 76 | MPre = 30; # CONS_MED_PRE |
| 77 | CMAbv = 31; # CONS_MOD_ABOVE |
| 78 | CMBlw = 32; # CONS_MOD_BELOW |
| 79 | VAbv = 33; # VOWEL_ABOVE / VOWEL_ABOVE_BELOW / VOWEL_ABOVE_BELOW_POST / VOWEL_ABOVE_POST |
| 80 | VBlw = 34; # VOWEL_BELOW / VOWEL_BELOW_POST |
| 81 | VPst = 35; # VOWEL_POST UIPC = Right |
Behdad Esfahbod | 7ce03eb | 2015-07-21 16:55:26 +0100 | [diff] [blame] | 82 | VPre = 22; # VOWEL_PRE / VOWEL_PRE_ABOVE / VOWEL_PRE_ABOVE_POST / VOWEL_PRE_POST |
Behdad Esfahbod | e0eabd7 | 2015-07-20 13:30:51 +0100 | [diff] [blame] | 83 | VMAbv = 37; # VOWEL_MOD_ABOVE |
| 84 | VMBlw = 38; # VOWEL_MOD_BELOW |
| 85 | VMPst = 39; # VOWEL_MOD_POST |
Behdad Esfahbod | 7ce03eb | 2015-07-21 16:55:26 +0100 | [diff] [blame] | 86 | VMPre = 23; # VOWEL_MOD_PRE |
Behdad Esfahbod | e0eabd7 | 2015-07-20 13:30:51 +0100 | [diff] [blame] | 87 | SMAbv = 41; # SYM_MOD_ABOVE |
| 88 | SMBlw = 42; # SYM_MOD_BELOW |
Behdad Esfahbod | e07669f | 2017-10-03 14:57:14 +0200 | [diff] [blame] | 89 | CS = 43; # CONS_WITH_STACKER |
Behdad Esfahbod | e0eabd7 | 2015-07-20 13:30:51 +0100 | [diff] [blame] | 90 | |
| 91 | |
Behdad Esfahbod | 72ecaae | 2017-12-24 16:05:07 -0500 | [diff] [blame] | 92 | # Override: Adjoc ZWJ placement. https://github.com/harfbuzz/harfbuzz/issues/542#issuecomment-353169729 |
| 93 | consonant_modifiers = CMAbv* CMBlw* ((ZWJ?.H.ZWJ? B | SUB) VS? CMAbv? CMBlw*)*; |
ebraminio | 7c6937e | 2017-11-20 14:49:22 -0500 | [diff] [blame] | 94 | # Override: Allow two MBlw. https://github.com/harfbuzz/harfbuzz/issues/376 |
Behdad Esfahbod | 9dd29c6 | 2017-07-14 17:01:27 +0100 | [diff] [blame] | 95 | medial_consonants = MPre? MAbv? MBlw?.MBlw? MPst?; |
Behdad Esfahbod | e0eabd7 | 2015-07-20 13:30:51 +0100 | [diff] [blame] | 96 | dependent_vowels = VPre* VAbv* VBlw* VPst*; |
| 97 | vowel_modifiers = VMPre* VMAbv* VMBlw* VMPst*; |
| 98 | final_consonants = FAbv* FBlw* FPst* FM?; |
| 99 | |
| 100 | virama_terminated_cluster = |
Behdad Esfahbod | e07669f | 2017-10-03 14:57:14 +0200 | [diff] [blame] | 101 | (R|CS)? (B | GB) VS? |
Behdad Esfahbod | e0eabd7 | 2015-07-20 13:30:51 +0100 | [diff] [blame] | 102 | consonant_modifiers |
punchcutter | c6dbf6e | 2018-01-31 14:09:04 -0800 | [diff] [blame] | 103 | ZWJ?.H.ZWJ? |
Behdad Esfahbod | e0eabd7 | 2015-07-20 13:30:51 +0100 | [diff] [blame] | 104 | ; |
Behdad Esfahbod | 9b6312f | 2016-05-06 17:41:49 +0100 | [diff] [blame] | 105 | standard_cluster = |
Behdad Esfahbod | e07669f | 2017-10-03 14:57:14 +0200 | [diff] [blame] | 106 | (R|CS)? (B | GB) VS? |
Behdad Esfahbod | e0eabd7 | 2015-07-20 13:30:51 +0100 | [diff] [blame] | 107 | consonant_modifiers |
| 108 | medial_consonants |
| 109 | dependent_vowels |
| 110 | vowel_modifiers |
| 111 | final_consonants |
| 112 | ; |
Behdad Esfahbod | e0eabd7 | 2015-07-20 13:30:51 +0100 | [diff] [blame] | 113 | |
Behdad Esfahbod | 40c4a99 | 2015-07-21 17:14:54 +0100 | [diff] [blame] | 114 | broken_cluster = |
| 115 | R? |
| 116 | consonant_modifiers |
| 117 | medial_consonants |
| 118 | dependent_vowels |
| 119 | vowel_modifiers |
| 120 | final_consonants |
| 121 | ; |
| 122 | |
Behdad Esfahbod | d04e461 | 2016-05-06 17:17:00 +0100 | [diff] [blame] | 123 | number_joiner_terminated_cluster = N VS? (HN N VS?)* HN; |
Behdad Esfahbod | eb74535 | 2015-07-20 15:33:25 +0100 | [diff] [blame] | 124 | numeral_cluster = N VS? (HN N VS?)*; |
Behdad Esfahbod | e0eabd7 | 2015-07-20 13:30:51 +0100 | [diff] [blame] | 125 | symbol_cluster = S VS? SMAbv* SMBlw*; |
| 126 | independent_cluster = (IND | O | Rsv | WJ) VS?; |
Behdad Esfahbod | 3e4e761 | 2016-05-06 17:28:25 +0100 | [diff] [blame] | 127 | other = any; |
Behdad Esfahbod | e0eabd7 | 2015-07-20 13:30:51 +0100 | [diff] [blame] | 128 | |
| 129 | main := |* |
| 130 | independent_cluster => { found_syllable (independent_cluster); }; |
| 131 | virama_terminated_cluster => { found_syllable (virama_terminated_cluster); }; |
Behdad Esfahbod | 9b6312f | 2016-05-06 17:41:49 +0100 | [diff] [blame] | 132 | standard_cluster => { found_syllable (standard_cluster); }; |
Behdad Esfahbod | e0eabd7 | 2015-07-20 13:30:51 +0100 | [diff] [blame] | 133 | number_joiner_terminated_cluster => { found_syllable (number_joiner_terminated_cluster); }; |
| 134 | numeral_cluster => { found_syllable (numeral_cluster); }; |
| 135 | symbol_cluster => { found_syllable (symbol_cluster); }; |
Behdad Esfahbod | 40c4a99 | 2015-07-21 17:14:54 +0100 | [diff] [blame] | 136 | broken_cluster => { found_syllable (broken_cluster); }; |
Behdad Esfahbod | 3e4e761 | 2016-05-06 17:28:25 +0100 | [diff] [blame] | 137 | other => { found_syllable (non_cluster); }; |
Behdad Esfahbod | e0eabd7 | 2015-07-20 13:30:51 +0100 | [diff] [blame] | 138 | *|; |
| 139 | |
| 140 | |
| 141 | }%% |
| 142 | |
| 143 | #define found_syllable(syllable_type) \ |
| 144 | HB_STMT_START { \ |
| 145 | if (0) fprintf (stderr, "syllable %d..%d %s\n", last, p+1, #syllable_type); \ |
| 146 | for (unsigned int i = last; i < p+1; i++) \ |
| 147 | info[i].syllable() = (syllable_serial << 4) | syllable_type; \ |
| 148 | last = p+1; \ |
| 149 | syllable_serial++; \ |
| 150 | if (unlikely (syllable_serial == 16)) syllable_serial = 1; \ |
| 151 | } HB_STMT_END |
| 152 | |
| 153 | static void |
| 154 | find_syllables (hb_buffer_t *buffer) |
| 155 | { |
| 156 | unsigned int p, pe, eof, ts HB_UNUSED, te HB_UNUSED, act HB_UNUSED; |
| 157 | int cs; |
| 158 | hb_glyph_info_t *info = buffer->info; |
| 159 | %%{ |
| 160 | write init; |
| 161 | getkey info[p].use_category(); |
| 162 | }%% |
| 163 | |
| 164 | p = 0; |
| 165 | pe = eof = buffer->len; |
| 166 | |
| 167 | unsigned int last = 0; |
| 168 | unsigned int syllable_serial = 1; |
| 169 | %%{ |
| 170 | write exec; |
| 171 | }%% |
| 172 | } |
| 173 | |
| 174 | #undef found_syllable |
| 175 | |
| 176 | #endif /* HB_OT_SHAPE_COMPLEX_USE_MACHINE_HH */ |