blob: 35e7ce9067cda93db8e770f519ee41254e22aa0e [file] [log] [blame]
Behdad Esfahbod867361c2011-06-17 18:35:46 -04001/*
Behdad Esfahbod27aba592012-05-24 15:00:01 -04002 * Copyright © 2011,2012 Google, Inc.
Behdad Esfahbod867361c2011-06-17 18:35:46 -04003 *
4 * This is part of HarfBuzz, a text shaping library.
5 *
6 * Permission is hereby granted, without written agreement and without
7 * license or royalty fees, to use, copy, modify, and distribute this
8 * software and its documentation for any purpose, provided that the
9 * above copyright notice and the following two paragraphs appear in
10 * all copies of this software.
11 *
12 * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
13 * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
14 * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
15 * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
16 * DAMAGE.
17 *
18 * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
19 * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
20 * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS
21 * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
22 * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
23 *
24 * Google Author(s): Behdad Esfahbod
25 */
26
27#ifndef HB_OT_SHAPE_COMPLEX_INDIC_MACHINE_HH
28#define HB_OT_SHAPE_COMPLEX_INDIC_MACHINE_HH
29
30#include "hb-private.hh"
31
Behdad Esfahbod867361c2011-06-17 18:35:46 -040032%%{
33 machine indic_syllable_machine;
34 alphtype unsigned char;
35 write data;
36}%%
37
38%%{
39
Behdad Esfahbodc7fe56a2011-06-24 19:05:34 -040040# Same order as enum indic_category_t. Not sure how to avoid duplication.
Behdad Esfahbod867361c2011-06-17 18:35:46 -040041C = 1;
Behdad Esfahboddb8981f2012-07-17 18:17:30 -040042V = 2;
43N = 3;
44H = 4;
45ZWNJ = 5;
46ZWJ = 6;
47M = 7;
48SM = 8;
Behdad Esfahboddb8981f2012-07-17 18:17:30 -040049A = 10;
Behdad Esfahbodcf78dd42014-05-27 17:53:37 -040050PLACEHOLDER = 11;
Behdad Esfahboddb8981f2012-07-17 18:17:30 -040051DOTTEDCIRCLE = 12;
Behdad Esfahbodc16012e2013-10-18 02:27:00 +020052RS = 13;
Behdad Esfahboddb8981f2012-07-17 18:17:30 -040053Repha = 15;
54Ra = 16;
Behdad Esfahbod9cac1332012-11-12 18:41:22 -080055CM = 17;
Behdad Esfahbod9f9bd9b2014-05-23 15:33:13 -040056Symbol= 18;
Behdad Esfahbodf559c632017-10-03 15:20:07 +020057CS = 19;
Behdad Esfahbod867361c2011-06-17 18:35:46 -040058
Behdad Esfahbod3756efa2013-10-16 19:06:29 +020059c = (C | Ra); # is_consonant
Behdad Esfahbod552d19b2012-07-18 16:00:49 -040060n = ((ZWNJ?.RS)? (N.N?)?); # is_consonant_modifier
Behdad Esfahbod25bc4892012-07-17 17:53:03 -040061z = ZWJ|ZWNJ; # is_joiner
Behdad Esfahboddb8981f2012-07-17 18:17:30 -040062reph = (Ra H | Repha); # possible reph
Behdad Esfahbod4c3691d2012-07-18 14:23:55 -040063
Behdad Esfahbod4ed717e2012-09-05 17:21:17 -040064cn = c.ZWJ?.n?;
Behdad Esfahbod422ecd22012-07-18 23:25:58 -040065forced_rakar = ZWJ H ZWJ Ra;
Behdad Esfahbod9f9bd9b2014-05-23 15:33:13 -040066symbol = Symbol.N?;
Behdad Esfahboddff0ece2012-07-24 02:30:38 -040067matra_group = z{0,3}.M.N?.(H | forced_rakar)?;
Behdad Esfahbodf6df2512018-01-05 17:27:48 +000068syllable_tail = (z?.SM.SM?.ZWNJ?)? A{0,3}?;
Behdad Esfahbod9761f9d2018-01-05 15:33:11 +000069halant_group = (z?.H.(ZWJ.N?)?);
70final_halant_group = halant_group | H.ZWNJ;
Behdad Esfahbod8b5d6e72016-05-06 15:59:27 +010071medial_group = CM?;
Behdad Esfahbod9761f9d2018-01-05 15:33:11 +000072halant_or_matra_group = (final_halant_group | (H.ZWJ)? matra_group{0,4});
Behdad Esfahbod867361c2011-06-17 18:35:46 -040073
Behdad Esfahbod867361c2011-06-17 18:35:46 -040074
Behdad Esfahbodf559c632017-10-03 15:20:07 +020075consonant_syllable = (Repha|CS)? (cn.halant_group){0,4} cn medial_group halant_or_matra_group syllable_tail;
Behdad Esfahbodc16012e2013-10-18 02:27:00 +020076vowel_syllable = reph? V.n? (ZWJ | (halant_group.cn){0,4} medial_group halant_or_matra_group syllable_tail);
Behdad Esfahbodf559c632017-10-03 15:20:07 +020077standalone_cluster = ((Repha|CS)? PLACEHOLDER | reph? DOTTEDCIRCLE).n? (halant_group.cn){0,4} medial_group halant_or_matra_group syllable_tail;
Behdad Esfahbodddbdfcb2014-05-23 15:39:55 -040078symbol_cluster = symbol syllable_tail;
Behdad Esfahbodc16012e2013-10-18 02:27:00 +020079broken_cluster = reph? n? (halant_group.cn){0,4} medial_group halant_or_matra_group syllable_tail;
Behdad Esfahbod4be46ba2012-05-11 14:39:01 +020080other = any;
Behdad Esfahbod743807a2011-07-29 16:37:02 -040081
Behdad Esfahbod4be46ba2012-05-11 14:39:01 +020082main := |*
Behdad Esfahbod327d14e2012-08-31 16:49:34 -040083 consonant_syllable => { found_syllable (consonant_syllable); };
84 vowel_syllable => { found_syllable (vowel_syllable); };
85 standalone_cluster => { found_syllable (standalone_cluster); };
Behdad Esfahbod9f9bd9b2014-05-23 15:33:13 -040086 symbol_cluster => { found_syllable (symbol_cluster); };
Behdad Esfahbod166b5cf2012-09-07 14:55:07 -040087 broken_cluster => { found_syllable (broken_cluster); };
Behdad Esfahbod327d14e2012-08-31 16:49:34 -040088 other => { found_syllable (non_indic_cluster); };
Behdad Esfahbod4be46ba2012-05-11 14:39:01 +020089*|;
Behdad Esfahbod867361c2011-06-17 18:35:46 -040090
Behdad Esfahbod867361c2011-06-17 18:35:46 -040091
92}%%
93
Behdad Esfahbod327d14e2012-08-31 16:49:34 -040094#define found_syllable(syllable_type) \
Behdad Esfahbod4be46ba2012-05-11 14:39:01 +020095 HB_STMT_START { \
Behdad Esfahbod327d14e2012-08-31 16:49:34 -040096 if (0) fprintf (stderr, "syllable %d..%d %s\n", last, p+1, #syllable_type); \
Behdad Esfahbod4be46ba2012-05-11 14:39:01 +020097 for (unsigned int i = last; i < p+1; i++) \
Behdad Esfahbod327d14e2012-08-31 16:49:34 -040098 info[i].syllable() = (syllable_serial << 4) | syllable_type; \
Behdad Esfahbod4be46ba2012-05-11 14:39:01 +020099 last = p+1; \
100 syllable_serial++; \
Behdad Esfahbod327d14e2012-08-31 16:49:34 -0400101 if (unlikely (syllable_serial == 16)) syllable_serial = 1; \
Behdad Esfahbod4be46ba2012-05-11 14:39:01 +0200102 } HB_STMT_END
Behdad Esfahbod867361c2011-06-17 18:35:46 -0400103
Behdad Esfahbod743807a2011-07-29 16:37:02 -0400104static void
Behdad Esfahbod166b5cf2012-09-07 14:55:07 -0400105find_syllables (hb_buffer_t *buffer)
Behdad Esfahbod867361c2011-06-17 18:35:46 -0400106{
Ebrahim Byagowi2eaba802018-04-24 09:04:15 +0430107 unsigned int p, pe, eof, ts HB_UNUSED, te, act;
Behdad Esfahbod867361c2011-06-17 18:35:46 -0400108 int cs;
Behdad Esfahbodef24cc82012-05-09 17:56:03 +0200109 hb_glyph_info_t *info = buffer->info;
Behdad Esfahbod867361c2011-06-17 18:35:46 -0400110 %%{
111 write init;
Behdad Esfahbodef24cc82012-05-09 17:56:03 +0200112 getkey info[p].indic_category();
Behdad Esfahbod867361c2011-06-17 18:35:46 -0400113 }%%
114
115 p = 0;
Behdad Esfahbod76f76812011-07-07 22:25:25 -0400116 pe = eof = buffer->len;
Behdad Esfahbod867361c2011-06-17 18:35:46 -0400117
Behdad Esfahbodd69d5ce2011-07-04 12:56:38 -0400118 unsigned int last = 0;
Behdad Esfahbod327d14e2012-08-31 16:49:34 -0400119 unsigned int syllable_serial = 1;
Behdad Esfahbod867361c2011-06-17 18:35:46 -0400120 %%{
121 write exec;
122 }%%
123}
124
Behdad Esfahbod867361c2011-06-17 18:35:46 -0400125#endif /* HB_OT_SHAPE_COMPLEX_INDIC_MACHINE_HH */