Behdad Esfahbod | 867361c | 2011-06-17 18:35:46 -0400 | [diff] [blame] | 1 | /* |
Behdad Esfahbod | 27aba59 | 2012-05-24 15:00:01 -0400 | [diff] [blame] | 2 | * Copyright © 2011,2012 Google, Inc. |
Behdad Esfahbod | 867361c | 2011-06-17 18:35:46 -0400 | [diff] [blame] | 3 | * |
| 4 | * This is part of HarfBuzz, a text shaping library. |
| 5 | * |
| 6 | * Permission is hereby granted, without written agreement and without |
| 7 | * license or royalty fees, to use, copy, modify, and distribute this |
| 8 | * software and its documentation for any purpose, provided that the |
| 9 | * above copyright notice and the following two paragraphs appear in |
| 10 | * all copies of this software. |
| 11 | * |
| 12 | * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR |
| 13 | * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES |
| 14 | * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN |
| 15 | * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH |
| 16 | * DAMAGE. |
| 17 | * |
| 18 | * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, |
| 19 | * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND |
| 20 | * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS |
| 21 | * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO |
| 22 | * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. |
| 23 | * |
| 24 | * Google Author(s): Behdad Esfahbod |
| 25 | */ |
| 26 | |
| 27 | #ifndef HB_OT_SHAPE_COMPLEX_INDIC_MACHINE_HH |
| 28 | #define HB_OT_SHAPE_COMPLEX_INDIC_MACHINE_HH |
| 29 | |
Behdad Esfahbod | c77ae40 | 2018-08-25 22:36:36 -0700 | [diff] [blame] | 30 | #include "hb.hh" |
Behdad Esfahbod | 867361c | 2011-06-17 18:35:46 -0400 | [diff] [blame] | 31 | |
Behdad Esfahbod | 867361c | 2011-06-17 18:35:46 -0400 | [diff] [blame] | 32 | %%{ |
| 33 | machine indic_syllable_machine; |
| 34 | alphtype unsigned char; |
| 35 | write data; |
| 36 | }%% |
| 37 | |
| 38 | %%{ |
| 39 | |
Behdad Esfahbod | c7fe56a | 2011-06-24 19:05:34 -0400 | [diff] [blame] | 40 | # Same order as enum indic_category_t. Not sure how to avoid duplication. |
Behdad Esfahbod | 867361c | 2011-06-17 18:35:46 -0400 | [diff] [blame] | 41 | C = 1; |
Behdad Esfahbod | db8981f | 2012-07-17 18:17:30 -0400 | [diff] [blame] | 42 | V = 2; |
| 43 | N = 3; |
| 44 | H = 4; |
| 45 | ZWNJ = 5; |
| 46 | ZWJ = 6; |
| 47 | M = 7; |
| 48 | SM = 8; |
Behdad Esfahbod | db8981f | 2012-07-17 18:17:30 -0400 | [diff] [blame] | 49 | A = 10; |
Behdad Esfahbod | cf78dd4 | 2014-05-27 17:53:37 -0400 | [diff] [blame] | 50 | PLACEHOLDER = 11; |
Behdad Esfahbod | db8981f | 2012-07-17 18:17:30 -0400 | [diff] [blame] | 51 | DOTTEDCIRCLE = 12; |
Behdad Esfahbod | c16012e | 2013-10-18 02:27:00 +0200 | [diff] [blame] | 52 | RS = 13; |
Behdad Esfahbod | db8981f | 2012-07-17 18:17:30 -0400 | [diff] [blame] | 53 | Repha = 15; |
| 54 | Ra = 16; |
Behdad Esfahbod | 9f9bd9b | 2014-05-23 15:33:13 -0400 | [diff] [blame] | 55 | Symbol= 18; |
Behdad Esfahbod | f559c63 | 2017-10-03 15:20:07 +0200 | [diff] [blame] | 56 | CS = 19; |
Behdad Esfahbod | 867361c | 2011-06-17 18:35:46 -0400 | [diff] [blame] | 57 | |
Behdad Esfahbod | 3756efa | 2013-10-16 19:06:29 +0200 | [diff] [blame] | 58 | c = (C | Ra); # is_consonant |
Behdad Esfahbod | 552d19b | 2012-07-18 16:00:49 -0400 | [diff] [blame] | 59 | n = ((ZWNJ?.RS)? (N.N?)?); # is_consonant_modifier |
Behdad Esfahbod | 25bc489 | 2012-07-17 17:53:03 -0400 | [diff] [blame] | 60 | z = ZWJ|ZWNJ; # is_joiner |
Behdad Esfahbod | db8981f | 2012-07-17 18:17:30 -0400 | [diff] [blame] | 61 | reph = (Ra H | Repha); # possible reph |
Behdad Esfahbod | 4c3691d | 2012-07-18 14:23:55 -0400 | [diff] [blame] | 62 | |
Behdad Esfahbod | 4ed717e | 2012-09-05 17:21:17 -0400 | [diff] [blame] | 63 | cn = c.ZWJ?.n?; |
Behdad Esfahbod | 422ecd2 | 2012-07-18 23:25:58 -0400 | [diff] [blame] | 64 | forced_rakar = ZWJ H ZWJ Ra; |
Behdad Esfahbod | 9f9bd9b | 2014-05-23 15:33:13 -0400 | [diff] [blame] | 65 | symbol = Symbol.N?; |
Behdad Esfahbod | dff0ece | 2012-07-24 02:30:38 -0400 | [diff] [blame] | 66 | matra_group = z{0,3}.M.N?.(H | forced_rakar)?; |
Behdad Esfahbod | f6df251 | 2018-01-05 17:27:48 +0000 | [diff] [blame] | 67 | syllable_tail = (z?.SM.SM?.ZWNJ?)? A{0,3}?; |
Behdad Esfahbod | 9761f9d | 2018-01-05 15:33:11 +0000 | [diff] [blame] | 68 | halant_group = (z?.H.(ZWJ.N?)?); |
| 69 | final_halant_group = halant_group | H.ZWNJ; |
Behdad Esfahbod | e46c51f | 2018-09-10 17:38:19 +0200 | [diff] [blame] | 70 | halant_or_matra_group = (final_halant_group | matra_group{0,4}); |
Behdad Esfahbod | 867361c | 2011-06-17 18:35:46 -0400 | [diff] [blame] | 71 | |
Behdad Esfahbod | 5101abd | 2018-10-02 17:49:06 +0200 | [diff] [blame] | 72 | complex_syllable_tail = (halant_group.cn){0,4} halant_or_matra_group syllable_tail; |
Behdad Esfahbod | 867361c | 2011-06-17 18:35:46 -0400 | [diff] [blame] | 73 | |
Behdad Esfahbod | 5101abd | 2018-10-02 17:49:06 +0200 | [diff] [blame] | 74 | |
| 75 | consonant_syllable = (Repha|CS)? cn complex_syllable_tail; |
| 76 | vowel_syllable = reph? V.n? (ZWJ | complex_syllable_tail); |
| 77 | standalone_cluster = ((Repha|CS)? PLACEHOLDER | reph? DOTTEDCIRCLE).n? complex_syllable_tail; |
Behdad Esfahbod | ddbdfcb | 2014-05-23 15:39:55 -0400 | [diff] [blame] | 78 | symbol_cluster = symbol syllable_tail; |
Behdad Esfahbod | 5101abd | 2018-10-02 17:49:06 +0200 | [diff] [blame] | 79 | broken_cluster = reph? n? complex_syllable_tail; |
Behdad Esfahbod | 4be46ba | 2012-05-11 14:39:01 +0200 | [diff] [blame] | 80 | other = any; |
Behdad Esfahbod | 743807a | 2011-07-29 16:37:02 -0400 | [diff] [blame] | 81 | |
Behdad Esfahbod | 4be46ba | 2012-05-11 14:39:01 +0200 | [diff] [blame] | 82 | main := |* |
Behdad Esfahbod | 327d14e | 2012-08-31 16:49:34 -0400 | [diff] [blame] | 83 | consonant_syllable => { found_syllable (consonant_syllable); }; |
| 84 | vowel_syllable => { found_syllable (vowel_syllable); }; |
| 85 | standalone_cluster => { found_syllable (standalone_cluster); }; |
Behdad Esfahbod | 9f9bd9b | 2014-05-23 15:33:13 -0400 | [diff] [blame] | 86 | symbol_cluster => { found_syllable (symbol_cluster); }; |
Behdad Esfahbod | 166b5cf | 2012-09-07 14:55:07 -0400 | [diff] [blame] | 87 | broken_cluster => { found_syllable (broken_cluster); }; |
Behdad Esfahbod | 327d14e | 2012-08-31 16:49:34 -0400 | [diff] [blame] | 88 | other => { found_syllable (non_indic_cluster); }; |
Behdad Esfahbod | 4be46ba | 2012-05-11 14:39:01 +0200 | [diff] [blame] | 89 | *|; |
Behdad Esfahbod | 867361c | 2011-06-17 18:35:46 -0400 | [diff] [blame] | 90 | |
Behdad Esfahbod | 867361c | 2011-06-17 18:35:46 -0400 | [diff] [blame] | 91 | |
| 92 | }%% |
| 93 | |
Behdad Esfahbod | 327d14e | 2012-08-31 16:49:34 -0400 | [diff] [blame] | 94 | #define found_syllable(syllable_type) \ |
Behdad Esfahbod | 4be46ba | 2012-05-11 14:39:01 +0200 | [diff] [blame] | 95 | HB_STMT_START { \ |
Behdad Esfahbod | 40d5d19 | 2018-10-02 17:04:05 +0200 | [diff] [blame] | 96 | if (0) fprintf (stderr, "syllable %d..%d %s\n", ts, te, #syllable_type); \ |
| 97 | for (unsigned int i = ts; i < te; i++) \ |
Behdad Esfahbod | 327d14e | 2012-08-31 16:49:34 -0400 | [diff] [blame] | 98 | info[i].syllable() = (syllable_serial << 4) | syllable_type; \ |
Behdad Esfahbod | 4be46ba | 2012-05-11 14:39:01 +0200 | [diff] [blame] | 99 | syllable_serial++; \ |
Behdad Esfahbod | 327d14e | 2012-08-31 16:49:34 -0400 | [diff] [blame] | 100 | if (unlikely (syllable_serial == 16)) syllable_serial = 1; \ |
Behdad Esfahbod | 4be46ba | 2012-05-11 14:39:01 +0200 | [diff] [blame] | 101 | } HB_STMT_END |
Behdad Esfahbod | 867361c | 2011-06-17 18:35:46 -0400 | [diff] [blame] | 102 | |
Behdad Esfahbod | 743807a | 2011-07-29 16:37:02 -0400 | [diff] [blame] | 103 | static void |
Behdad Esfahbod | 166b5cf | 2012-09-07 14:55:07 -0400 | [diff] [blame] | 104 | find_syllables (hb_buffer_t *buffer) |
Behdad Esfahbod | 867361c | 2011-06-17 18:35:46 -0400 | [diff] [blame] | 105 | { |
Behdad Esfahbod | 40d5d19 | 2018-10-02 17:04:05 +0200 | [diff] [blame] | 106 | unsigned int p, pe, eof, ts, te, act; |
Behdad Esfahbod | 867361c | 2011-06-17 18:35:46 -0400 | [diff] [blame] | 107 | int cs; |
Behdad Esfahbod | ef24cc8 | 2012-05-09 17:56:03 +0200 | [diff] [blame] | 108 | hb_glyph_info_t *info = buffer->info; |
Behdad Esfahbod | 867361c | 2011-06-17 18:35:46 -0400 | [diff] [blame] | 109 | %%{ |
| 110 | write init; |
Behdad Esfahbod | ef24cc8 | 2012-05-09 17:56:03 +0200 | [diff] [blame] | 111 | getkey info[p].indic_category(); |
Behdad Esfahbod | 867361c | 2011-06-17 18:35:46 -0400 | [diff] [blame] | 112 | }%% |
| 113 | |
| 114 | p = 0; |
Behdad Esfahbod | 76f7681 | 2011-07-07 22:25:25 -0400 | [diff] [blame] | 115 | pe = eof = buffer->len; |
Behdad Esfahbod | 867361c | 2011-06-17 18:35:46 -0400 | [diff] [blame] | 116 | |
Behdad Esfahbod | 327d14e | 2012-08-31 16:49:34 -0400 | [diff] [blame] | 117 | unsigned int syllable_serial = 1; |
Behdad Esfahbod | 867361c | 2011-06-17 18:35:46 -0400 | [diff] [blame] | 118 | %%{ |
| 119 | write exec; |
| 120 | }%% |
| 121 | } |
| 122 | |
Behdad Esfahbod | 867361c | 2011-06-17 18:35:46 -0400 | [diff] [blame] | 123 | #endif /* HB_OT_SHAPE_COMPLEX_INDIC_MACHINE_HH */ |