blob: 14d8131c498a9ddb8dc5c397e1575660aac5e76c [file] [log] [blame]
Behdad Esfahbodfb194b82011-04-20 02:00:47 -04001/*
Behdad Esfahbod2409d5f2011-04-21 17:14:28 -04002 * Copyright © 2009 Red Hat, Inc.
Behdad Esfahbod6af9cff2011-04-29 12:00:38 -04003 * Copyright © 2011 Codethink Limited
Behdad Esfahbod2409d5f2011-04-21 17:14:28 -04004 * Copyright © 2010,2011 Google, Inc.
Behdad Esfahbodfb194b82011-04-20 02:00:47 -04005 *
6 * This is part of HarfBuzz, a text shaping library.
7 *
8 * Permission is hereby granted, without written agreement and without
9 * license or royalty fees, to use, copy, modify, and distribute this
10 * software and its documentation for any purpose, provided that the
11 * above copyright notice and the following two paragraphs appear in
12 * all copies of this software.
13 *
14 * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
15 * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
16 * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
17 * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
18 * DAMAGE.
19 *
20 * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
21 * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
22 * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS
23 * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
24 * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
25 *
26 * Red Hat Author(s): Behdad Esfahbod
27 * Codethink Author(s): Ryan Lortie
28 * Google Author(s): Behdad Esfahbod
29 */
30
31#ifndef HB_UNICODE_PRIVATE_HH
32#define HB_UNICODE_PRIVATE_HH
33
Behdad Esfahbodc57d4542011-04-20 18:50:27 -040034#include "hb-private.hh"
Behdad Esfahbodfb194b82011-04-20 02:00:47 -040035
36#include "hb-unicode.h"
Behdad Esfahbodfca368c2011-04-21 18:24:02 -040037#include "hb-object-private.hh"
Behdad Esfahbodfb194b82011-04-20 02:00:47 -040038
Behdad Esfahbodfb194b82011-04-20 02:00:47 -040039
40
41/*
42 * hb_unicode_funcs_t
43 */
44
Behdad Esfahbod4b6317c2011-07-07 23:14:42 -040045#define HB_UNICODE_FUNCS_IMPLEMENT_CALLBACKS \
Behdad Esfahbod891c4752011-07-07 23:19:27 -040046 HB_UNICODE_FUNC_IMPLEMENT (combining_class) \
47 HB_UNICODE_FUNC_IMPLEMENT (eastasian_width) \
48 HB_UNICODE_FUNC_IMPLEMENT (general_category) \
49 HB_UNICODE_FUNC_IMPLEMENT (mirroring) \
50 HB_UNICODE_FUNC_IMPLEMENT (script) \
Behdad Esfahbodc4641722011-07-07 23:47:19 -040051 HB_UNICODE_FUNC_IMPLEMENT (compose) \
52 HB_UNICODE_FUNC_IMPLEMENT (decompose) \
Behdad Esfahbod378d2792012-07-31 21:36:16 -040053 HB_UNICODE_FUNC_IMPLEMENT (decompose_compatibility) \
Behdad Esfahbod4b6317c2011-07-07 23:14:42 -040054 /* ^--- Add new callbacks here */
55
Behdad Esfahbod891c4752011-07-07 23:19:27 -040056/* Simple callbacks are those taking a hb_codepoint_t and returning a hb_codepoint_t */
57#define HB_UNICODE_FUNCS_IMPLEMENT_CALLBACKS_SIMPLE \
Behdad Esfahbod21fdcee2012-08-01 16:23:44 -040058 HB_UNICODE_FUNC_IMPLEMENT (hb_unicode_combining_class_t, combining_class) \
Behdad Esfahbod891c4752011-07-07 23:19:27 -040059 HB_UNICODE_FUNC_IMPLEMENT (unsigned int, eastasian_width) \
60 HB_UNICODE_FUNC_IMPLEMENT (hb_unicode_general_category_t, general_category) \
61 HB_UNICODE_FUNC_IMPLEMENT (hb_codepoint_t, mirroring) \
62 HB_UNICODE_FUNC_IMPLEMENT (hb_script_t, script) \
63 /* ^--- Add new simple callbacks here */
64
Behdad Esfahbod1bc1cb32012-06-16 15:21:55 -040065struct hb_unicode_funcs_t {
Behdad Esfahbodfca368c2011-04-21 18:24:02 -040066 hb_object_header_t header;
Behdad Esfahbod6220e5f2012-06-06 03:30:09 -040067 ASSERT_POD ();
Behdad Esfahbodfca368c2011-04-21 18:24:02 -040068
Behdad Esfahbodfb194b82011-04-20 02:00:47 -040069 hb_unicode_funcs_t *parent;
70
Behdad Esfahbodfca368c2011-04-21 18:24:02 -040071 bool immutable;
Behdad Esfahbodfb194b82011-04-20 02:00:47 -040072
Behdad Esfahbod74703152012-08-01 17:01:59 -040073#define HB_UNICODE_FUNC_IMPLEMENT(return_type, name) \
74 inline return_type name (hb_codepoint_t unicode) { return func.name (this, unicode, user_data.name); }
75HB_UNICODE_FUNCS_IMPLEMENT_CALLBACKS_SIMPLE
76#undef HB_UNICODE_FUNC_IMPLEMENT
77
78 inline hb_bool_t compose (hb_codepoint_t a, hb_codepoint_t b,
79 hb_codepoint_t *ab)
80 {
81 *ab = 0;
82 /* XXX, this belongs to indic normalizer. */
83 if ((FLAG (general_category (a)) &
84 (FLAG (HB_UNICODE_GENERAL_CATEGORY_SPACING_MARK) |
85 FLAG (HB_UNICODE_GENERAL_CATEGORY_ENCLOSING_MARK) |
86 FLAG (HB_UNICODE_GENERAL_CATEGORY_NON_SPACING_MARK))))
87 return false;
88 /* XXX, add composition-exclusion exceptions to Indic shaper. */
89 if (a == 0x09AF && b == 0x09BC) { *ab = 0x09DF; return true; }
90 return func.compose (this, a, b, ab, user_data.compose);
91 }
92
93 inline hb_bool_t decompose (hb_codepoint_t ab,
94 hb_codepoint_t *a, hb_codepoint_t *b)
95 {
96 /* XXX FIXME, move these to complex shapers and propagage to normalizer.*/
97 switch (ab) {
98 case 0x0AC9 : return false;
99
100 case 0x0931 : return false;
101 case 0x0B94 : return false;
102
103 /* These ones have Unicode decompositions, but we do it
104 * this way to be close to what Uniscribe does. */
105 case 0x0DDA : *a = 0x0DD9; *b= 0x0DDA; return true;
106 case 0x0DDC : *a = 0x0DD9; *b= 0x0DDC; return true;
107 case 0x0DDD : *a = 0x0DD9; *b= 0x0DDD; return true;
108 case 0x0DDE : *a = 0x0DD9; *b= 0x0DDE; return true;
109
110 case 0x0F77 : *a = 0x0FB2; *b= 0x0F81; return true;
111 case 0x0F79 : *a = 0x0FB3; *b= 0x0F81; return true;
112 case 0x17BE : *a = 0x17C1; *b= 0x17BE; return true;
113 case 0x17BF : *a = 0x17C1; *b= 0x17BF; return true;
114 case 0x17C0 : *a = 0x17C1; *b= 0x17C0; return true;
115 case 0x17C4 : *a = 0x17C1; *b= 0x17C4; return true;
116 case 0x17C5 : *a = 0x17C1; *b= 0x17C5; return true;
117 case 0x1925 : *a = 0x1920; *b= 0x1923; return true;
118 case 0x1926 : *a = 0x1920; *b= 0x1924; return true;
119 case 0x1B3C : *a = 0x1B42; *b= 0x1B3C; return true;
120 case 0x1112E : *a = 0x11127; *b= 0x11131; return true;
121 case 0x1112F : *a = 0x11127; *b= 0x11132; return true;
122#if 0
123 case 0x0B57 : *a = 0xno decomp, -> RIGHT; return true;
124 case 0x1C29 : *a = 0xno decomp, -> LEFT; return true;
125 case 0xA9C0 : *a = 0xno decomp, -> RIGHT; return true;
126 case 0x111BF : *a = 0xno decomp, -> ABOVE; return true;
127#endif
128 }
129 *a = ab; *b = 0;
130 return func.decompose (this, ab, a, b, user_data.decompose);
131 }
132
133 inline unsigned int decompose_compatibility (hb_codepoint_t u,
134 hb_codepoint_t *decomposed)
135 {
136 unsigned int ret = func.decompose_compatibility (this, u, decomposed, user_data.decompose_compatibility);
137 if (ret == 1 && u == decomposed[0]) {
138 decomposed[0] = 0;
139 return 0;
140 }
141 decomposed[ret] = 0;
142 return ret;
143 }
144
Behdad Esfahbodfb194b82011-04-20 02:00:47 -0400145
Behdad Esfahbod208f70f2012-08-01 17:13:10 -0400146 HB_INTERNAL unsigned int
147 modified_combining_class (hb_codepoint_t unicode);
148
149 inline hb_bool_t
150 is_variation_selector (hb_codepoint_t unicode)
151 {
152 return unlikely (hb_in_ranges<hb_codepoint_t> (unicode,
153 0x180B, 0x180D, /* MONGOLIAN FREE VARIATION SELECTOR ONE..THREE */
154 0xFE00, 0xFE0F, /* VARIATION SELECTOR-1..16 */
155 0xE0100, 0xE01EF)); /* VARIATION SELECTOR-17..256 */
156 }
157
158 /* Zero-Width invisible characters:
159 *
160 * 00AD SOFT HYPHEN
161 * 034F COMBINING GRAPHEME JOINER
162 *
163 * 180E MONGOLIAN VOWEL SEPARATOR
164 *
165 * 200B ZERO WIDTH SPACE
166 * 200C ZERO WIDTH NON-JOINER
167 * 200D ZERO WIDTH JOINER
168 * 200E LEFT-TO-RIGHT MARK
169 * 200F RIGHT-TO-LEFT MARK
170 *
171 * 2028 LINE SEPARATOR
172 *
173 * 202A LEFT-TO-RIGHT EMBEDDING
174 * 202B RIGHT-TO-LEFT EMBEDDING
175 * 202C POP DIRECTIONAL FORMATTING
176 * 202D LEFT-TO-RIGHT OVERRIDE
177 * 202E RIGHT-TO-LEFT OVERRIDE
178 *
179 * 2060 WORD JOINER
180 * 2061 FUNCTION APPLICATION
181 * 2062 INVISIBLE TIMES
182 * 2063 INVISIBLE SEPARATOR
183 *
184 * FEFF ZERO WIDTH NO-BREAK SPACE
185 */
186 inline hb_bool_t
187 is_zero_width (hb_codepoint_t ch)
188 {
189 return ((ch & ~0x007F) == 0x2000 && (hb_in_ranges<hb_codepoint_t> (ch,
190 0x200B, 0x200F,
191 0x202A, 0x202E,
192 0x2060, 0x2064) ||
193 (ch == 0x2028))) ||
194 unlikely (ch == 0x0009 ||
195 ch == 0x00AD ||
196 ch == 0x034F ||
197 ch == 0x180E ||
198 ch == 0xFEFF);
199 }
200
201
Behdad Esfahbodfb194b82011-04-20 02:00:47 -0400202 struct {
Behdad Esfahbodc4641722011-07-07 23:47:19 -0400203#define HB_UNICODE_FUNC_IMPLEMENT(name) hb_unicode_##name##_func_t name;
Behdad Esfahbod4b6317c2011-07-07 23:14:42 -0400204 HB_UNICODE_FUNCS_IMPLEMENT_CALLBACKS
205#undef HB_UNICODE_FUNC_IMPLEMENT
Behdad Esfahbodc4641722011-07-07 23:47:19 -0400206 } func;
Behdad Esfahbodfb194b82011-04-20 02:00:47 -0400207
208 struct {
Behdad Esfahbod891c4752011-07-07 23:19:27 -0400209#define HB_UNICODE_FUNC_IMPLEMENT(name) void *name;
Behdad Esfahbod4b6317c2011-07-07 23:14:42 -0400210 HB_UNICODE_FUNCS_IMPLEMENT_CALLBACKS
211#undef HB_UNICODE_FUNC_IMPLEMENT
Behdad Esfahbodfb194b82011-04-20 02:00:47 -0400212 } user_data;
213
214 struct {
Behdad Esfahbod891c4752011-07-07 23:19:27 -0400215#define HB_UNICODE_FUNC_IMPLEMENT(name) hb_destroy_func_t name;
Behdad Esfahbod4b6317c2011-07-07 23:14:42 -0400216 HB_UNICODE_FUNCS_IMPLEMENT_CALLBACKS
217#undef HB_UNICODE_FUNC_IMPLEMENT
Behdad Esfahbodfb194b82011-04-20 02:00:47 -0400218 } destroy;
219};
220
Behdad Esfahbodd4bee9f2011-04-27 09:24:37 -0400221
Behdad Esfahbod5ddd9cc2011-09-16 16:40:44 -0400222#ifdef HAVE_GLIB
Behdad Esfahbodbe4560a2012-06-05 18:14:03 -0400223extern HB_INTERNAL const hb_unicode_funcs_t _hb_glib_unicode_funcs;
224#define _hb_unicode_funcs_default _hb_glib_unicode_funcs
Behdad Esfahbod5ddd9cc2011-09-16 16:40:44 -0400225#elif defined(HAVE_ICU)
Behdad Esfahbodbe4560a2012-06-05 18:14:03 -0400226extern HB_INTERNAL const hb_unicode_funcs_t _hb_icu_unicode_funcs;
227#define _hb_unicode_funcs_default _hb_icu_unicode_funcs
Behdad Esfahbodd4bee9f2011-04-27 09:24:37 -0400228#else
Behdad Esfahbodbc145652012-05-27 10:45:57 -0400229#define HB_UNICODE_FUNCS_NIL 1
Behdad Esfahbod9a5b4212012-07-11 16:35:04 -0400230extern HB_INTERNAL const hb_unicode_funcs_t _hb_unicode_funcs_nil;
Behdad Esfahbodbe4560a2012-06-05 18:14:03 -0400231#define _hb_unicode_funcs_default _hb_unicode_funcs_nil
Behdad Esfahbodd4bee9f2011-04-27 09:24:37 -0400232#endif
233
Behdad Esfahbodfb194b82011-04-20 02:00:47 -0400234
Behdad Esfahbodfb194b82011-04-20 02:00:47 -0400235#endif /* HB_UNICODE_PRIVATE_HH */