blob: bd3250e580adeb59a534e94a5725f79a583bcc8e [file] [log] [blame]
Behdad Esfahboda7c63cd2019-03-30 14:59:40 -07001/*
2 * Copyright © 2007,2008,2009,2010 Red Hat, Inc.
3 * Copyright © 2012,2018 Google, Inc.
4 *
5 * This is part of HarfBuzz, a text shaping library.
6 *
7 * Permission is hereby granted, without written agreement and without
8 * license or royalty fees, to use, copy, modify, and distribute this
9 * software and its documentation for any purpose, provided that the
10 * above copyright notice and the following two paragraphs appear in
11 * all copies of this software.
12 *
13 * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
14 * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
15 * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
16 * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
17 * DAMAGE.
18 *
19 * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
20 * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
21 * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS
22 * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
23 * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
24 *
25 * Red Hat Author(s): Behdad Esfahbod
26 * Google Author(s): Behdad Esfahbod
27 */
28
29#ifndef HB_SANITIZE_HH
30#define HB_SANITIZE_HH
31
32#include "hb.hh"
33#include "hb-blob.hh"
34#include "hb-dispatch.hh"
35
36
37/*
38 * Sanitize
39 *
40 *
41 * === Introduction ===
42 *
43 * The sanitize machinery is at the core of our zero-cost font loading. We
44 * mmap() font file into memory and create a blob out of it. Font subtables
45 * are returned as a readonly sub-blob of the main font blob. These table
46 * blobs are then sanitized before use, to ensure invalid memory access does
47 * not happen. The toplevel sanitize API use is like, eg. to load the 'head'
48 * table:
49 *
50 * hb_blob_t *head_blob = hb_sanitize_context_t ().reference_table<OT::head> (face);
51 *
52 * The blob then can be converted to a head table struct with:
53 *
54 * const head *head_table = head_blob->as<head> ();
55 *
56 * What the reference_table does is, to call hb_face_reference_table() to load
57 * the table blob, sanitize it and return either the sanitized blob, or empty
58 * blob if sanitization failed. The blob->as() function returns the null
59 * object of its template type argument if the blob is empty. Otherwise, it
60 * just casts the blob contents to the desired type.
61 *
62 * Sanitizing a blob of data with a type T works as follows (with minor
63 * simplification):
64 *
65 * - Cast blob content to T*, call sanitize() method of it,
66 * - If sanitize succeeded, return blob.
67 * - Otherwise, if blob is not writable, try making it writable,
68 * or copy if cannot be made writable in-place,
69 * - Call sanitize() again. Return blob if sanitize succeeded.
70 * - Return empty blob otherwise.
71 *
72 *
73 * === The sanitize() contract ===
74 *
75 * The sanitize() method of each object type shall return true if it's safe to
Khaled Hosnya8e72ee2020-12-30 23:08:40 +020076 * call other methods of the object, and %false otherwise.
Behdad Esfahboda7c63cd2019-03-30 14:59:40 -070077 *
78 * Note that what sanitize() checks for might align with what the specification
79 * describes as valid table data, but does not have to be. In particular, we
80 * do NOT want to be pedantic and concern ourselves with validity checks that
81 * are irrelevant to our use of the table. On the contrary, we want to be
82 * lenient with error handling and accept invalid data to the extent that it
83 * does not impose extra burden on us.
84 *
85 * Based on the sanitize contract, one can see that what we check for depends
86 * on how we use the data in other table methods. Ie. if other table methods
87 * assume that offsets do NOT point out of the table data block, then that's
88 * something sanitize() must check for (GSUB/GPOS/GDEF/etc work this way). On
89 * the other hand, if other methods do such checks themselves, then sanitize()
90 * does not have to bother with them (glyf/local work this way). The choice
91 * depends on the table structure and sanitize() performance. For example, to
92 * check glyf/loca offsets in sanitize() would cost O(num-glyphs). We try hard
93 * to avoid such costs during font loading. By postponing such checks to the
94 * actual glyph loading, we reduce the sanitize cost to O(1) and total runtime
95 * cost to O(used-glyphs). As such, this is preferred.
96 *
97 * The same argument can be made re GSUB/GPOS/GDEF, but there, the table
98 * structure is so complicated that by checking all offsets at sanitize() time,
99 * we make the code much simpler in other methods, as offsets and referenced
100 * objects do not need to be validated at each use site.
101 */
102
103/* This limits sanitizing time on really broken fonts. */
104#ifndef HB_SANITIZE_MAX_EDITS
105#define HB_SANITIZE_MAX_EDITS 32
106#endif
107#ifndef HB_SANITIZE_MAX_OPS_FACTOR
Garret Rieger8ef42572021-03-31 15:00:46 -0700108#define HB_SANITIZE_MAX_OPS_FACTOR 64
Behdad Esfahboda7c63cd2019-03-30 14:59:40 -0700109#endif
110#ifndef HB_SANITIZE_MAX_OPS_MIN
111#define HB_SANITIZE_MAX_OPS_MIN 16384
112#endif
113#ifndef HB_SANITIZE_MAX_OPS_MAX
114#define HB_SANITIZE_MAX_OPS_MAX 0x3FFFFFFF
115#endif
Behdad Esfahboda6502432021-01-24 13:30:06 -0700116#ifndef HB_SANITIZE_MAX_SUBTABLES
117#define HB_SANITIZE_MAX_SUBTABLES 0x4000
Ebrahim Byagowid3836032020-03-02 22:41:08 +0330118#endif
Garret Rieger782a7372021-11-04 13:39:11 -0700119
Behdad Esfahboda7c63cd2019-03-30 14:59:40 -0700120struct hb_sanitize_context_t :
121 hb_dispatch_context_t<hb_sanitize_context_t, bool, HB_DEBUG_SANITIZE>
122{
123 hb_sanitize_context_t () :
Behdad Esfahboda7c63cd2019-03-30 14:59:40 -0700124 start (nullptr), end (nullptr),
Ebrahim Byagowid3836032020-03-02 22:41:08 +0330125 max_ops (0), max_subtables (0),
Garret Riegerace98cc2021-11-08 15:47:56 -0800126 recursion_depth (0),
Behdad Esfahboda7c63cd2019-03-30 14:59:40 -0700127 writable (false), edit_count (0),
128 blob (nullptr),
129 num_glyphs (65536),
130 num_glyphs_set (false) {}
131
132 const char *get_name () { return "SANITIZE"; }
133 template <typename T, typename F>
134 bool may_dispatch (const T *obj HB_UNUSED, const F *format)
135 { return format->sanitize (this); }
Behdad Esfahboda7c63cd2019-03-30 14:59:40 -0700136 static return_t default_return_value () { return true; }
137 static return_t no_dispatch_return_value () { return false; }
138 bool stop_sublookup_iteration (const return_t r) const { return !r; }
139
Ebrahim Byagowid3836032020-03-02 22:41:08 +0330140 bool visit_subtables (unsigned count)
141 {
142 max_subtables += count;
Behdad Esfahboda6502432021-01-24 13:30:06 -0700143 return max_subtables < HB_SANITIZE_MAX_SUBTABLES;
Ebrahim Byagowid3836032020-03-02 22:41:08 +0330144 }
145
Behdad Esfahbodc14efb82019-05-05 09:54:58 -0700146 private:
147 template <typename T, typename ...Ts> auto
Behdad Esfahbod83e3eab2019-05-07 20:58:43 -0700148 _dispatch (const T &obj, hb_priority<1>, Ts&&... ds) HB_AUTO_RETURN
Behdad Esfahbod6d555ce2021-11-02 00:18:22 -0600149 ( obj.sanitize (this, std::forward<Ts> (ds)...) )
Behdad Esfahbodc14efb82019-05-05 09:54:58 -0700150 template <typename T, typename ...Ts> auto
Behdad Esfahbod83e3eab2019-05-07 20:58:43 -0700151 _dispatch (const T &obj, hb_priority<0>, Ts&&... ds) HB_AUTO_RETURN
Behdad Esfahbod6d555ce2021-11-02 00:18:22 -0600152 ( obj.dispatch (this, std::forward<Ts> (ds)...) )
Behdad Esfahbodc14efb82019-05-05 09:54:58 -0700153 public:
154 template <typename T, typename ...Ts> auto
Behdad Esfahbod83e3eab2019-05-07 20:58:43 -0700155 dispatch (const T &obj, Ts&&... ds) HB_AUTO_RETURN
Garret Riegerace98cc2021-11-08 15:47:56 -0800156 ( _dispatch (obj, hb_prioritize, std::forward<Ts> (ds)...) )
157
Behdad Esfahbodc14efb82019-05-05 09:54:58 -0700158
Behdad Esfahboda7c63cd2019-03-30 14:59:40 -0700159 void init (hb_blob_t *b)
160 {
161 this->blob = hb_blob_reference (b);
162 this->writable = false;
163 }
164
165 void set_num_glyphs (unsigned int num_glyphs_)
166 {
167 num_glyphs = num_glyphs_;
168 num_glyphs_set = true;
169 }
170 unsigned int get_num_glyphs () { return num_glyphs; }
171
172 void set_max_ops (int max_ops_) { max_ops = max_ops_; }
173
174 template <typename T>
175 void set_object (const T *obj)
176 {
177 reset_object ();
178
179 if (!obj) return;
180
181 const char *obj_start = (const char *) obj;
182 if (unlikely (obj_start < this->start || this->end <= obj_start))
183 this->start = this->end = nullptr;
184 else
185 {
186 this->start = obj_start;
Behdad Esfahbod2ba984f2019-05-07 23:28:22 -0700187 this->end = obj_start + hb_min (size_t (this->end - obj_start), obj->get_size ());
Behdad Esfahboda7c63cd2019-03-30 14:59:40 -0700188 }
189 }
190
191 void reset_object ()
192 {
193 this->start = this->blob->data;
194 this->end = this->start + this->blob->length;
195 assert (this->start <= this->end); /* Must not overflow. */
196 }
197
198 void start_processing ()
199 {
200 reset_object ();
Behdad Esfahbod4c140432022-11-21 12:56:33 -0700201 unsigned m;
202 if (unlikely (hb_unsigned_mul_overflows (this->end - this->start, HB_SANITIZE_MAX_OPS_FACTOR, &m)))
Marcel Fabian Krügerb28c2822019-12-17 02:58:51 +0100203 this->max_ops = HB_SANITIZE_MAX_OPS_MAX;
204 else
Behdad Esfahbod4c140432022-11-21 12:56:33 -0700205 this->max_ops = hb_clamp (m,
Ebrahim Byagowib3987482020-03-04 11:18:19 +0330206 (unsigned) HB_SANITIZE_MAX_OPS_MIN,
207 (unsigned) HB_SANITIZE_MAX_OPS_MAX);
Behdad Esfahboda7c63cd2019-03-30 14:59:40 -0700208 this->edit_count = 0;
209 this->debug_depth = 0;
Garret Riegerace98cc2021-11-08 15:47:56 -0800210 this->recursion_depth = 0;
Behdad Esfahboda7c63cd2019-03-30 14:59:40 -0700211
212 DEBUG_MSG_LEVEL (SANITIZE, start, 0, +1,
213 "start [%p..%p] (%lu bytes)",
214 this->start, this->end,
215 (unsigned long) (this->end - this->start));
216 }
217
218 void end_processing ()
219 {
220 DEBUG_MSG_LEVEL (SANITIZE, this->start, 0, -1,
221 "end [%p..%p] %u edit requests",
222 this->start, this->end, this->edit_count);
223
224 hb_blob_destroy (this->blob);
225 this->blob = nullptr;
226 this->start = this->end = nullptr;
227 }
228
Behdad Esfahbod25a5b282019-05-10 16:01:39 -0700229 unsigned get_edit_count () { return edit_count; }
230
Behdad Esfahboda7c63cd2019-03-30 14:59:40 -0700231 bool check_range (const void *base,
232 unsigned int len) const
233 {
234 const char *p = (const char *) base;
235 bool ok = !len ||
236 (this->start <= p &&
237 p <= this->end &&
238 (unsigned int) (this->end - p) >= len &&
Garret Rieger8ef42572021-03-31 15:00:46 -0700239 (this->max_ops -= len) > 0);
Behdad Esfahboda7c63cd2019-03-30 14:59:40 -0700240
241 DEBUG_MSG_LEVEL (SANITIZE, p, this->debug_depth+1, 0,
242 "check_range [%p..%p]"
243 " (%d bytes) in [%p..%p] -> %s",
244 p, p + len, len,
245 this->start, this->end,
246 ok ? "OK" : "OUT-OF-RANGE");
247
248 return likely (ok);
249 }
250
251 template <typename T>
252 bool check_range (const T *base,
253 unsigned int a,
254 unsigned int b) const
255 {
Behdad Esfahbod4c140432022-11-21 12:56:33 -0700256 unsigned m;
257 return !hb_unsigned_mul_overflows (a, b, &m) &&
258 this->check_range (base, m);
Behdad Esfahboda7c63cd2019-03-30 14:59:40 -0700259 }
260
261 template <typename T>
262 bool check_range (const T *base,
263 unsigned int a,
264 unsigned int b,
265 unsigned int c) const
266 {
Behdad Esfahbod4c140432022-11-21 12:56:33 -0700267 unsigned m;
268 return !hb_unsigned_mul_overflows (a, b, &m) &&
269 this->check_range (base, m, c);
Behdad Esfahboda7c63cd2019-03-30 14:59:40 -0700270 }
271
272 template <typename T>
273 bool check_array (const T *base, unsigned int len) const
274 {
275 return this->check_range (base, len, hb_static_size (T));
276 }
277
278 template <typename T>
279 bool check_array (const T *base,
280 unsigned int a,
281 unsigned int b) const
282 {
283 return this->check_range (base, a, b, hb_static_size (T));
284 }
285
Garret Rieger64b29db2021-11-09 09:13:14 -0800286 bool check_start_recursion (int max_depth)
Garret Riegerace98cc2021-11-08 15:47:56 -0800287 {
Garret Rieger64b29db2021-11-09 09:13:14 -0800288 if (unlikely (recursion_depth >= max_depth)) return false;
Garret Riegerace98cc2021-11-08 15:47:56 -0800289 return ++recursion_depth;
290 }
291
292 bool end_recursion (bool result)
293 {
294 recursion_depth--;
295 return result;
296 }
297
Behdad Esfahboda7c63cd2019-03-30 14:59:40 -0700298 template <typename Type>
299 bool check_struct (const Type *obj) const
300 { return likely (this->check_range (obj, obj->min_size)); }
301
302 bool may_edit (const void *base, unsigned int len)
303 {
304 if (this->edit_count >= HB_SANITIZE_MAX_EDITS)
305 return false;
306
307 const char *p = (const char *) base;
308 this->edit_count++;
309
310 DEBUG_MSG_LEVEL (SANITIZE, p, this->debug_depth+1, 0,
311 "may_edit(%u) [%p..%p] (%d bytes) in [%p..%p] -> %s",
312 this->edit_count,
313 p, p + len, len,
314 this->start, this->end,
315 this->writable ? "GRANTED" : "DENIED");
316
317 return this->writable;
318 }
319
320 template <typename Type, typename ValueType>
321 bool try_set (const Type *obj, const ValueType &v)
322 {
323 if (this->may_edit (obj, hb_static_size (Type)))
324 {
325 * const_cast<Type *> (obj) = v;
326 return true;
327 }
328 return false;
329 }
330
331 template <typename Type>
332 hb_blob_t *sanitize_blob (hb_blob_t *blob)
333 {
334 bool sane;
335
336 init (blob);
337
338 retry:
339 DEBUG_MSG_FUNC (SANITIZE, start, "start");
340
341 start_processing ();
342
343 if (unlikely (!start))
344 {
345 end_processing ();
346 return blob;
347 }
348
349 Type *t = reinterpret_cast<Type *> (const_cast<char *> (start));
350
351 sane = t->sanitize (this);
352 if (sane)
353 {
354 if (edit_count)
355 {
356 DEBUG_MSG_FUNC (SANITIZE, start, "passed first round with %d edits; going for second round", edit_count);
357
Ebrahim Byagowia0b4ac42019-08-24 17:57:14 +0430358 /* sanitize again to ensure no toe-stepping */
359 edit_count = 0;
Behdad Esfahboda7c63cd2019-03-30 14:59:40 -0700360 sane = t->sanitize (this);
361 if (edit_count) {
362 DEBUG_MSG_FUNC (SANITIZE, start, "requested %d edits in second round; FAILLING", edit_count);
363 sane = false;
364 }
365 }
366 }
367 else
368 {
369 if (edit_count && !writable) {
Ebrahim Byagowia0b4ac42019-08-24 17:57:14 +0430370 start = hb_blob_get_data_writable (blob, nullptr);
Behdad Esfahboda7c63cd2019-03-30 14:59:40 -0700371 end = start + blob->length;
372
373 if (start)
374 {
375 writable = true;
376 /* ok, we made it writable by relocating. try again */
377 DEBUG_MSG_FUNC (SANITIZE, start, "retry");
378 goto retry;
379 }
380 }
381 }
382
383 end_processing ();
384
385 DEBUG_MSG_FUNC (SANITIZE, start, sane ? "PASSED" : "FAILED");
386 if (sane)
387 {
388 hb_blob_make_immutable (blob);
389 return blob;
390 }
391 else
392 {
393 hb_blob_destroy (blob);
394 return hb_blob_get_empty ();
395 }
396 }
397
398 template <typename Type>
399 hb_blob_t *reference_table (const hb_face_t *face, hb_tag_t tableTag = Type::tableTag)
400 {
401 if (!num_glyphs_set)
402 set_num_glyphs (hb_face_get_glyph_count (face));
403 return sanitize_blob<Type> (hb_face_reference_table (face, tableTag));
404 }
405
Behdad Esfahboda7c63cd2019-03-30 14:59:40 -0700406 const char *start, *end;
Ebrahim Byagowid3836032020-03-02 22:41:08 +0330407 mutable int max_ops, max_subtables;
Behdad Esfahboda7c63cd2019-03-30 14:59:40 -0700408 private:
Garret Riegerace98cc2021-11-08 15:47:56 -0800409 int recursion_depth;
Behdad Esfahboda7c63cd2019-03-30 14:59:40 -0700410 bool writable;
411 unsigned int edit_count;
412 hb_blob_t *blob;
413 unsigned int num_glyphs;
414 bool num_glyphs_set;
415};
416
417struct hb_sanitize_with_object_t
418{
419 template <typename T>
420 hb_sanitize_with_object_t (hb_sanitize_context_t *c, const T& obj) : c (c)
421 { c->set_object (obj); }
422 ~hb_sanitize_with_object_t ()
423 { c->reset_object (); }
424
425 private:
426 hb_sanitize_context_t *c;
427};
428
429
430#endif /* HB_SANITIZE_HH */