blob: c5d945d4ecdbbf05b9e7d44300644a1c98472adc [file] [log] [blame]
Behdad Esfahbod867361c2011-06-17 18:35:46 -04001/*
Behdad Esfahbod27aba592012-05-24 15:00:01 -04002 * Copyright © 2011,2012 Google, Inc.
Behdad Esfahbod867361c2011-06-17 18:35:46 -04003 *
4 * This is part of HarfBuzz, a text shaping library.
5 *
6 * Permission is hereby granted, without written agreement and without
7 * license or royalty fees, to use, copy, modify, and distribute this
8 * software and its documentation for any purpose, provided that the
9 * above copyright notice and the following two paragraphs appear in
10 * all copies of this software.
11 *
12 * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
13 * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
14 * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
15 * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
16 * DAMAGE.
17 *
18 * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
19 * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
20 * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS
21 * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
22 * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
23 *
24 * Google Author(s): Behdad Esfahbod
25 */
26
27#ifndef HB_OT_SHAPE_COMPLEX_INDIC_MACHINE_HH
28#define HB_OT_SHAPE_COMPLEX_INDIC_MACHINE_HH
29
Behdad Esfahbodc77ae402018-08-25 22:36:36 -070030#include "hb.hh"
Behdad Esfahbod867361c2011-06-17 18:35:46 -040031
Behdad Esfahbod867361c2011-06-17 18:35:46 -040032%%{
33 machine indic_syllable_machine;
34 alphtype unsigned char;
35 write data;
36}%%
37
38%%{
39
Behdad Esfahbodc7fe56a2011-06-24 19:05:34 -040040# Same order as enum indic_category_t. Not sure how to avoid duplication.
Behdad Esfahbod867361c2011-06-17 18:35:46 -040041C = 1;
Behdad Esfahboddb8981f2012-07-17 18:17:30 -040042V = 2;
43N = 3;
44H = 4;
45ZWNJ = 5;
46ZWJ = 6;
47M = 7;
48SM = 8;
Behdad Esfahboddb8981f2012-07-17 18:17:30 -040049A = 10;
Behdad Esfahbodcf78dd42014-05-27 17:53:37 -040050PLACEHOLDER = 11;
Behdad Esfahboddb8981f2012-07-17 18:17:30 -040051DOTTEDCIRCLE = 12;
Behdad Esfahbodc16012e2013-10-18 02:27:00 +020052RS = 13;
Behdad Esfahboddb8981f2012-07-17 18:17:30 -040053Repha = 15;
54Ra = 16;
Behdad Esfahbod9f9bd9b2014-05-23 15:33:13 -040055Symbol= 18;
Behdad Esfahbodf559c632017-10-03 15:20:07 +020056CS = 19;
Behdad Esfahbod867361c2011-06-17 18:35:46 -040057
Behdad Esfahbod3756efa2013-10-16 19:06:29 +020058c = (C | Ra); # is_consonant
Behdad Esfahbod552d19b2012-07-18 16:00:49 -040059n = ((ZWNJ?.RS)? (N.N?)?); # is_consonant_modifier
Behdad Esfahbod25bc4892012-07-17 17:53:03 -040060z = ZWJ|ZWNJ; # is_joiner
Behdad Esfahboddb8981f2012-07-17 18:17:30 -040061reph = (Ra H | Repha); # possible reph
Behdad Esfahbod4c3691d2012-07-18 14:23:55 -040062
Behdad Esfahbod4ed717e2012-09-05 17:21:17 -040063cn = c.ZWJ?.n?;
Behdad Esfahbod422ecd22012-07-18 23:25:58 -040064forced_rakar = ZWJ H ZWJ Ra;
Behdad Esfahbod9f9bd9b2014-05-23 15:33:13 -040065symbol = Symbol.N?;
Behdad Esfahboddff0ece2012-07-24 02:30:38 -040066matra_group = z{0,3}.M.N?.(H | forced_rakar)?;
Behdad Esfahbodf6df2512018-01-05 17:27:48 +000067syllable_tail = (z?.SM.SM?.ZWNJ?)? A{0,3}?;
Behdad Esfahbod9761f9d2018-01-05 15:33:11 +000068halant_group = (z?.H.(ZWJ.N?)?);
69final_halant_group = halant_group | H.ZWNJ;
Behdad Esfahbode46c51f2018-09-10 17:38:19 +020070halant_or_matra_group = (final_halant_group | matra_group{0,4});
Behdad Esfahbod867361c2011-06-17 18:35:46 -040071
Behdad Esfahbod5101abd2018-10-02 17:49:06 +020072complex_syllable_tail = (halant_group.cn){0,4} halant_or_matra_group syllable_tail;
Behdad Esfahbod867361c2011-06-17 18:35:46 -040073
Behdad Esfahbod5101abd2018-10-02 17:49:06 +020074
75consonant_syllable = (Repha|CS)? cn complex_syllable_tail;
76vowel_syllable = reph? V.n? (ZWJ | complex_syllable_tail);
77standalone_cluster = ((Repha|CS)? PLACEHOLDER | reph? DOTTEDCIRCLE).n? complex_syllable_tail;
Behdad Esfahbodddbdfcb2014-05-23 15:39:55 -040078symbol_cluster = symbol syllable_tail;
Behdad Esfahbod5101abd2018-10-02 17:49:06 +020079broken_cluster = reph? n? complex_syllable_tail;
Behdad Esfahbod4be46ba2012-05-11 14:39:01 +020080other = any;
Behdad Esfahbod743807a2011-07-29 16:37:02 -040081
Behdad Esfahbod4be46ba2012-05-11 14:39:01 +020082main := |*
Behdad Esfahbod327d14e2012-08-31 16:49:34 -040083 consonant_syllable => { found_syllable (consonant_syllable); };
84 vowel_syllable => { found_syllable (vowel_syllable); };
85 standalone_cluster => { found_syllable (standalone_cluster); };
Behdad Esfahbod9f9bd9b2014-05-23 15:33:13 -040086 symbol_cluster => { found_syllable (symbol_cluster); };
Behdad Esfahbod166b5cf2012-09-07 14:55:07 -040087 broken_cluster => { found_syllable (broken_cluster); };
Behdad Esfahbod327d14e2012-08-31 16:49:34 -040088 other => { found_syllable (non_indic_cluster); };
Behdad Esfahbod4be46ba2012-05-11 14:39:01 +020089*|;
Behdad Esfahbod867361c2011-06-17 18:35:46 -040090
Behdad Esfahbod867361c2011-06-17 18:35:46 -040091
92}%%
93
Behdad Esfahbod327d14e2012-08-31 16:49:34 -040094#define found_syllable(syllable_type) \
Behdad Esfahbod4be46ba2012-05-11 14:39:01 +020095 HB_STMT_START { \
Behdad Esfahbod40d5d192018-10-02 17:04:05 +020096 if (0) fprintf (stderr, "syllable %d..%d %s\n", ts, te, #syllable_type); \
97 for (unsigned int i = ts; i < te; i++) \
Behdad Esfahbod327d14e2012-08-31 16:49:34 -040098 info[i].syllable() = (syllable_serial << 4) | syllable_type; \
Behdad Esfahbod4be46ba2012-05-11 14:39:01 +020099 syllable_serial++; \
Behdad Esfahbod327d14e2012-08-31 16:49:34 -0400100 if (unlikely (syllable_serial == 16)) syllable_serial = 1; \
Behdad Esfahbod4be46ba2012-05-11 14:39:01 +0200101 } HB_STMT_END
Behdad Esfahbod867361c2011-06-17 18:35:46 -0400102
Behdad Esfahbod743807a2011-07-29 16:37:02 -0400103static void
Behdad Esfahbod166b5cf2012-09-07 14:55:07 -0400104find_syllables (hb_buffer_t *buffer)
Behdad Esfahbod867361c2011-06-17 18:35:46 -0400105{
Behdad Esfahbod40d5d192018-10-02 17:04:05 +0200106 unsigned int p, pe, eof, ts, te, act;
Behdad Esfahbod867361c2011-06-17 18:35:46 -0400107 int cs;
Behdad Esfahbodef24cc82012-05-09 17:56:03 +0200108 hb_glyph_info_t *info = buffer->info;
Behdad Esfahbod867361c2011-06-17 18:35:46 -0400109 %%{
110 write init;
Behdad Esfahbodef24cc82012-05-09 17:56:03 +0200111 getkey info[p].indic_category();
Behdad Esfahbod867361c2011-06-17 18:35:46 -0400112 }%%
113
114 p = 0;
Behdad Esfahbod76f76812011-07-07 22:25:25 -0400115 pe = eof = buffer->len;
Behdad Esfahbod867361c2011-06-17 18:35:46 -0400116
Behdad Esfahbod327d14e2012-08-31 16:49:34 -0400117 unsigned int syllable_serial = 1;
Behdad Esfahbod867361c2011-06-17 18:35:46 -0400118 %%{
119 write exec;
120 }%%
121}
122
Behdad Esfahbod867361c2011-06-17 18:35:46 -0400123#endif /* HB_OT_SHAPE_COMPLEX_INDIC_MACHINE_HH */