blob: 4194497261e4f52614acfdd52c3e3facc2994680 [file] [log] [blame]
Behdad Esfahbodc98b7182013-12-31 15:55:40 +08001/*
2 * Copyright © 2013 Google, Inc.
3 *
4 * This is part of HarfBuzz, a text shaping library.
5 *
6 * Permission is hereby granted, without written agreement and without
7 * license or royalty fees, to use, copy, modify, and distribute this
8 * software and its documentation for any purpose, provided that the
9 * above copyright notice and the following two paragraphs appear in
10 * all copies of this software.
11 *
12 * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
13 * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
14 * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
15 * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
16 * DAMAGE.
17 *
18 * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
19 * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
20 * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS
21 * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
22 * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
23 *
24 * Google Author(s): Behdad Esfahbod
25 */
26
27#include "hb-ot-shape-complex-private.hh"
28
29
30/* Hangul shaper */
31
32
33static const hb_tag_t hangul_features[] =
34{
35 HB_TAG('l','j','m','o'),
36 HB_TAG('v','j','m','o'),
37 HB_TAG('t','j','m','o'),
38 HB_TAG_NONE
39};
40
41static void
42collect_features_hangul (hb_ot_shape_planner_t *plan)
43{
44 for (const hb_tag_t *script_features = hangul_features; script_features && *script_features; script_features++)
45 plan->map.add_global_bool_feature (*script_features);
46}
47
48#define LBase 0x1100
49#define VBase 0x1161
50#define TBase 0x11A7
51#define LCount 19
52#define VCount 21
53#define TCount 28
54#define SBase 0xAC00
55#define NCount (VCount * TCount)
56#define SCount (LCount * NCount)
57
58#define isCombiningL(u) (hb_in_range<hb_codepoint_t> ((u), LBase, LBase+LCount-1))
59#define isCombiningV(u) (hb_in_range<hb_codepoint_t> ((u), VBase, VBase+VCount-1))
60#define isCombiningT(u) (hb_in_range<hb_codepoint_t> ((u), TBase+1, TBase+TCount-1))
61#define isCombinedS(u) (hb_in_range<hb_codepoint_t> ((u), SBase, SBase+SCount-1))
62
Behdad Esfahbod32478652014-01-02 14:01:56 +080063#define isT(u) (hb_in_ranges<hb_codepoint_t> ((u), 0x11A8, 0x11FF, 0xD7CB, 0xD7FB))
Behdad Esfahbodc98b7182013-12-31 15:55:40 +080064
65static void
66preprocess_text_hangul (const hb_ot_shape_plan_t *plan,
67 hb_buffer_t *buffer,
68 hb_font_t *font)
69{
70 /* Hangul syllables come in two shapes: LV, and LVT. Of those:
71 *
72 * - LV can be precomposed, or decomposed. Lets call those
73 * <LV> and <L,V>,
74 * - LVT can be fully precomposed, partically precomposed, or
75 * fully decomposed. Ie. <LVT>, <LV,T>, or <L,V,T>.
76 *
77 * The composition / decomposition is mechanical. However, not
78 * all <L,V> sequences compose, and not all <LV,T> sequences
79 * compose.
80 *
81 * Here are the specifics:
82 *
83 * - <L>: U+1100..115F, U+A960..A97F
84 * - <V>: U+1160..11A7, U+D7B0..D7C7
Behdad Esfahbod32478652014-01-02 14:01:56 +080085 * - <T>: U+11A8..11FF, U+D7CB..D7FB
Behdad Esfahbodc98b7182013-12-31 15:55:40 +080086 *
87 * - Only the <L,V> sequences for the 11xx ranges combine.
88 * - Only <LV,T> sequences for T in U+11A8..11C3 combine.
89 *
90 * Here is what we want to accomplish in this shaper:
91 *
92 * - If the whole syllable can be precomposed, do that,
93 * - Otherwise, fully decompose.
94 *
95 * That is, of the different possible syllables:
96 *
97 * <L>
98 * <L,V>
99 * <L,V,T>
100 * <LV>
101 * <LVT>
102 * <LV, T>
103 *
104 * - <L> needs no work.
105 *
106 * - <LV> and <LVT> can stay the way they are if the font supports them, otherwise we
107 * should fully decompose them if font supports.
108 *
109 * - <L,V> and <L,V,T> we should compose if the whole thing can be composed.
110 *
111 * - <LV,T> we should compose if the whole thing can be composed, otherwise we should
112 * decompose.
113 */
114
115 buffer->clear_output ();
116 unsigned int count = buffer->len;
117 for (buffer->idx = 0; buffer->idx < count;)
118 {
119 hb_codepoint_t u = buffer->cur().codepoint;
120
121 if (isCombiningL(u) && buffer->idx + 1 < count)
122 {
123 hb_codepoint_t l = u;
124 hb_codepoint_t v = buffer->cur(+1).codepoint;
125 if (isCombiningV(v))
126 {
127 /* Have <L,V> or <L,V,T>. */
128 unsigned int len = 2;
129 unsigned int tindex = 0;
130 if (buffer->idx + 2 < count)
131 {
132 hb_codepoint_t t = buffer->cur(+2).codepoint;
133 if (isCombiningT(t))
134 {
135 len = 3;
136 tindex = t - TBase;
137 }
138 else if (isT (t))
139 {
140 /* Old T jamo. Doesn't combine. Don't combine *anything*. */
141 len = 0;
142 }
143 }
144
145 if (len)
146 {
147 hb_codepoint_t s = SBase + (l - LBase) * NCount + (v - VBase) * TCount + tindex;
148 hb_codepoint_t glyph;
149 if (font->get_glyph (s, 0, &glyph))
150 {
151 buffer->replace_glyphs (len, 1, &s);
152 if (unlikely (buffer->in_error))
153 return;
154 continue;
155 }
156 }
157 }
158 }
159
160 else if (isCombinedS(u))
161 {
162 /* Have <LV>, <LVT>, or <LV,T> */
163 hb_codepoint_t s = u;
164 hb_codepoint_t glyph;
165 bool has_glyph = font->get_glyph (s, 0, &glyph);
166 unsigned int lindex = (s - SBase) / NCount;
167 unsigned int nindex = (s - SBase) % NCount;
Behdad Esfahbodbdb20da2014-01-02 14:04:30 +0800168 unsigned int vindex = nindex / TCount;
169 unsigned int tindex = nindex % TCount;
Behdad Esfahbodc98b7182013-12-31 15:55:40 +0800170
171 if (tindex && has_glyph)
172 goto next; /* <LVT> supported. Nothing to do. */
173
174 if (!tindex &&
175 buffer->idx + 1 < count &&
176 isCombiningT (buffer->cur(+1).codepoint))
177 {
178 /* <LV,T>, try to combine. */
179 tindex = buffer->cur(+1).codepoint - TBase;
180 hb_codepoint_t new_s = s + tindex;
181 if (font->get_glyph (new_s, 0, &glyph))
182 {
183 buffer->replace_glyphs (2, 1, &new_s);
184 if (unlikely (buffer->in_error))
185 return;
186 continue;
187 }
188 }
189
190 /* Otherwise, decompose if font doesn't support <LV>,
191 * or if having non-combining <LV,T>. Note that we
192 * already handled combining <LV,T> above. */
193 if (!has_glyph ||
194 (buffer->idx + 1 < count &&
195 isT (buffer->cur(+1).codepoint)))
196 {
197 hb_codepoint_t decomposed[3] = {LBase + lindex,
198 VBase + vindex,
199 TBase + tindex};
200 if (font->get_glyph (decomposed[0], 0, &glyph) &&
201 font->get_glyph (decomposed[1], 0, &glyph) &&
202 (tindex && font->get_glyph (decomposed[2], 0, &glyph)))
203 {
204 buffer->replace_glyphs (1, tindex ? 3 : 2, decomposed);
205 if (unlikely (buffer->in_error))
206 return;
207 continue;
208 }
209 }
210 }
211
212 next:
213 buffer->next_glyph ();
214 }
215 buffer->swap_buffers ();
216}
217
218const hb_ot_complex_shaper_t _hb_ot_complex_shaper_hangul =
219{
220 "hangul",
221 collect_features_hangul,
222 NULL, /* override_features */
223 NULL, /* data_create */
224 NULL, /* data_destroy */
225 preprocess_text_hangul,
Behdad Esfahbod3d6ca0d2013-12-31 16:04:35 +0800226 HB_OT_SHAPE_NORMALIZATION_MODE_DEFAULT,
Behdad Esfahbodc98b7182013-12-31 15:55:40 +0800227 NULL, /* decompose */
228 NULL, /* compose */
229 NULL, /* setup_masks */
Behdad Esfahbod6300cd72013-12-31 16:38:47 +0800230 HB_OT_SHAPE_ZERO_WIDTH_MARKS_DEFAULT,
Behdad Esfahbodc98b7182013-12-31 15:55:40 +0800231 false, /* fallback_position */
232};