blob: ab95a71c423cdf82b832c44f17f13a4b1cc8a195 [file] [log] [blame]
Garret Rieger20b02a62022-06-24 18:58:17 +00001/*
2 * Copyright © 2022 Google, Inc.
3 *
4 * This is part of HarfBuzz, a text shaping library.
5 *
6 * Permission is hereby granted, without written agreement and without
7 * license or royalty fees, to use, copy, modify, and distribute this
8 * software and its documentation for any purpose, provided that the
9 * above copyright notice and the following two paragraphs appear in
10 * all copies of this software.
11 *
12 * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
13 * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
14 * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
15 * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
16 * DAMAGE.
17 *
18 * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
19 * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
20 * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS
21 * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
22 * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
23 *
24 * Google Author(s): Garret Rieger
25 */
26
Garret Rieger241ebc92022-07-26 00:04:20 +000027#include "../hb-set.hh"
28#include "../hb-priority-queue.hh"
29#include "../hb-serialize.hh"
Garret Riegerce03c352022-07-21 19:07:55 +000030
Garret Rieger20b02a62022-06-24 18:58:17 +000031#ifndef GRAPH_GRAPH_HH
32#define GRAPH_GRAPH_HH
33
34namespace graph {
35
Garret Rieger26749622022-06-24 21:00:54 +000036/**
37 * Represents a serialized table in the form of a graph.
38 * Provides methods for modifying and reordering the graph.
39 */
Garret Rieger20b02a62022-06-24 18:58:17 +000040struct graph_t
41{
42 struct vertex_t
43 {
44 hb_serialize_context_t::object_t obj;
45 int64_t distance = 0 ;
46 int64_t space = 0 ;
47 hb_vector_t<unsigned> parents;
48 unsigned start = 0;
49 unsigned end = 0;
50 unsigned priority = 0;
51
Garret Rieger1405f962022-08-15 23:48:00 +000052 void normalize ()
Garret Rieger07fd0522022-08-15 23:16:51 +000053 {
Garret Rieger1405f962022-08-15 23:48:00 +000054 obj.real_links.qsort ();
55 for (auto& l : obj.real_links)
56 {
57 for (unsigned i = 0; i < l.width; i++)
58 {
59 obj.head[l.position + i] = 0;
60 }
61 }
62 }
63
Garret Riegera3ed9f92022-08-17 23:39:11 +000064 bool equals (const vertex_t& other,
65 const graph_t& graph,
66 const graph_t& other_graph,
67 unsigned depth) const
Garret Rieger1405f962022-08-15 23:48:00 +000068 {
69 if (!(as_bytes () == other.as_bytes ()))
Garret Riegera3ed9f92022-08-17 23:39:11 +000070 {
71 DEBUG_MSG (SUBSET_REPACK, nullptr,
72 "vertex [%lu] bytes != [%lu] bytes, depth = %u",
73 table_size (),
74 other.table_size (),
75 depth);
Garret Rieger07fd0522022-08-15 23:16:51 +000076
Garret Riegera3ed9f92022-08-17 23:39:11 +000077 auto a = as_bytes ();
78 auto b = other.as_bytes ();
79 while (a || b)
80 {
81 DEBUG_MSG (SUBSET_REPACK, nullptr,
82 " 0x%x %s 0x%x", *a, (*a == *b) ? "==" : "!=", *b);
83 a++;
84 b++;
85 }
86 return false;
87 }
88
89 return links_equal (obj.real_links, other.obj.real_links, graph, other_graph, depth);
Garret Rieger07fd0522022-08-15 23:16:51 +000090 }
91
Garret Rieger1405f962022-08-15 23:48:00 +000092 hb_bytes_t as_bytes () const
Garret Rieger07fd0522022-08-15 23:16:51 +000093 {
94 return hb_bytes_t (obj.head, table_size ());
95 }
96
Garret Rieger20b02a62022-06-24 18:58:17 +000097 friend void swap (vertex_t& a, vertex_t& b)
98 {
99 hb_swap (a.obj, b.obj);
100 hb_swap (a.distance, b.distance);
101 hb_swap (a.space, b.space);
102 hb_swap (a.parents, b.parents);
103 hb_swap (a.start, b.start);
104 hb_swap (a.end, b.end);
105 hb_swap (a.priority, b.priority);
106 }
107
Garret Rieger1acd2a82022-08-11 20:22:31 +0000108 hb_hashmap_t<unsigned, unsigned>
109 position_to_index_map () const
110 {
111 hb_hashmap_t<unsigned, unsigned> result;
112
113 for (const auto& l : obj.real_links) {
114 result.set (l.position, l.objidx);
115 }
116
117 return result;
118 }
119
Garret Rieger20b02a62022-06-24 18:58:17 +0000120 bool is_shared () const
121 {
122 return parents.length > 1;
123 }
124
125 unsigned incoming_edges () const
126 {
127 return parents.length;
128 }
129
130 void remove_parent (unsigned parent_index)
131 {
132 for (unsigned i = 0; i < parents.length; i++)
133 {
134 if (parents[i] != parent_index) continue;
135 parents.remove (i);
136 break;
137 }
138 }
139
Garret Rieger1002a3d2022-07-28 20:17:36 +0000140 void remove_real_link (unsigned child_index, const void* offset)
Garret Rieger8d63f602022-07-27 20:36:20 +0000141 {
142 for (unsigned i = 0; i < obj.real_links.length; i++)
143 {
Garret Rieger1002a3d2022-07-28 20:17:36 +0000144 auto& link = obj.real_links[i];
145 if (link.objidx != child_index)
146 continue;
147
148 if ((obj.head + link.position) != offset)
149 continue;
150
Garret Rieger8d63f602022-07-27 20:36:20 +0000151 obj.real_links.remove (i);
Garret Rieger1002a3d2022-07-28 20:17:36 +0000152 return;
Garret Rieger8d63f602022-07-27 20:36:20 +0000153 }
154 }
155
Garret Rieger20b02a62022-06-24 18:58:17 +0000156 void remap_parents (const hb_vector_t<unsigned>& id_map)
157 {
158 for (unsigned i = 0; i < parents.length; i++)
159 parents[i] = id_map[parents[i]];
160 }
161
162 void remap_parent (unsigned old_index, unsigned new_index)
163 {
164 for (unsigned i = 0; i < parents.length; i++)
165 {
166 if (parents[i] == old_index)
167 parents[i] = new_index;
168 }
169 }
170
171 bool is_leaf () const
172 {
173 return !obj.real_links.length && !obj.virtual_links.length;
174 }
175
176 bool raise_priority ()
177 {
178 if (has_max_priority ()) return false;
179 priority++;
180 return true;
181 }
182
183 bool has_max_priority () const {
184 return priority >= 3;
185 }
186
Garret Rieger9db3beb2022-07-25 19:42:58 +0000187 size_t table_size () const {
188 return obj.tail - obj.head;
189 }
190
Garret Rieger20b02a62022-06-24 18:58:17 +0000191 int64_t modified_distance (unsigned order) const
192 {
193 // TODO(garretrieger): once priority is high enough, should try
194 // setting distance = 0 which will force to sort immediately after
195 // it's parent where possible.
196
197 int64_t modified_distance =
198 hb_min (hb_max(distance + distance_modifier (), 0), 0x7FFFFFFFFFF);
199 if (has_max_priority ()) {
200 modified_distance = 0;
201 }
202 return (modified_distance << 18) | (0x003FFFF & order);
203 }
204
205 int64_t distance_modifier () const
206 {
207 if (!priority) return 0;
208 int64_t table_size = obj.tail - obj.head;
209
210 if (priority == 1)
211 return -table_size / 2;
212
213 return -table_size;
214 }
Garret Rieger07fd0522022-08-15 23:16:51 +0000215
216 private:
Garret Riegera3ed9f92022-08-17 23:39:11 +0000217 bool links_equal (const hb_vector_t<hb_serialize_context_t::object_t::link_t>& this_links,
218 const hb_vector_t<hb_serialize_context_t::object_t::link_t>& other_links,
219 const graph_t& graph,
220 const graph_t& other_graph,
221 unsigned depth) const
Garret Rieger07fd0522022-08-15 23:16:51 +0000222 {
223 auto a = this_links.iter ();
224 auto b = other_links.iter ();
225
226 while (a && b)
227 {
228 const auto& link_a = *a;
229 const auto& link_b = *b;
230
231 if (link_a.width != link_b.width ||
232 link_a.is_signed != link_b.is_signed ||
233 link_a.whence != link_b.whence ||
234 link_a.position != link_b.position ||
235 link_a.bias != link_b.bias)
236 return false;
237
Garret Riegera3ed9f92022-08-17 23:39:11 +0000238 if (!graph.vertices_[link_a.objidx].equals (
239 other_graph.vertices_[link_b.objidx], graph, other_graph, depth + 1))
Garret Rieger07fd0522022-08-15 23:16:51 +0000240 return false;
241
242 a++;
243 b++;
244 }
245
246 if (bool (a) != bool (b))
247 return false;
248
249 return true;
250 }
Garret Rieger20b02a62022-06-24 18:58:17 +0000251 };
252
Garret Rieger0083fd12022-08-11 22:09:46 +0000253 template <typename T>
254 struct vertex_and_table_t
255 {
256 vertex_and_table_t () : index (0), vertex (nullptr), table (nullptr)
257 {}
258
259 unsigned index;
260 vertex_t* vertex;
261 T* table;
262
263 operator bool () {
264 return table && vertex;
265 }
266 };
267
Garret Rieger20b02a62022-06-24 18:58:17 +0000268 /*
269 * A topological sorting of an object graph. Ordered
270 * in reverse serialization order (first object in the
271 * serialization is at the end of the list). This matches
272 * the 'packed' object stack used internally in the
273 * serializer
274 */
275 template<typename T>
276 graph_t (const T& objects)
277 : parents_invalid (true),
278 distance_invalid (true),
279 positions_invalid (true),
Garret Rieger5cf2a252022-08-15 22:49:24 +0000280 successful (true),
281 buffers ()
Garret Rieger20b02a62022-06-24 18:58:17 +0000282 {
283 num_roots_for_space_.push (1);
284 bool removed_nil = false;
285 vertices_.alloc (objects.length);
286 vertices_scratch_.alloc (objects.length);
287 for (unsigned i = 0; i < objects.length; i++)
288 {
289 // TODO(grieger): check all links point to valid objects.
290
291 // If this graph came from a serialization buffer object 0 is the
292 // nil object. We don't need it for our purposes here so drop it.
293 if (i == 0 && !objects[i])
294 {
295 removed_nil = true;
296 continue;
297 }
298
299 vertex_t* v = vertices_.push ();
300 if (check_success (!vertices_.in_error ()))
301 v->obj = *objects[i];
302 if (!removed_nil) continue;
303 // Fix indices to account for removed nil object.
304 for (auto& l : v->obj.all_links_writer ()) {
305 l.objidx--;
306 }
307 }
308 }
309
310 ~graph_t ()
311 {
312 vertices_.fini ();
Garret Rieger5cf2a252022-08-15 22:49:24 +0000313 for (char* b : buffers)
314 hb_free (b);
Garret Rieger20b02a62022-06-24 18:58:17 +0000315 }
316
Garret Rieger1405f962022-08-15 23:48:00 +0000317 bool operator== (const graph_t& other) const
Garret Rieger07fd0522022-08-15 23:16:51 +0000318 {
Garret Riegera3ed9f92022-08-17 23:39:11 +0000319 return root ().equals (other.root (), *this, other, 0);
Garret Rieger1405f962022-08-15 23:48:00 +0000320 }
321
322 // Sorts links of all objects in a consistent manner and zeroes all offsets.
323 void normalize ()
324 {
325 for (auto& v : vertices_.writer ())
326 v.normalize ();
Garret Rieger07fd0522022-08-15 23:16:51 +0000327 }
328
Garret Rieger20b02a62022-06-24 18:58:17 +0000329 bool in_error () const
330 {
331 return !successful ||
332 vertices_.in_error () ||
333 num_roots_for_space_.in_error ();
334 }
335
336 const vertex_t& root () const
337 {
338 return vertices_[root_idx ()];
339 }
340
341 unsigned root_idx () const
342 {
343 // Object graphs are in reverse order, the first object is at the end
344 // of the vector. Since the graph is topologically sorted it's safe to
345 // assume the first object has no incoming edges.
346 return vertices_.length - 1;
347 }
348
Garret Riegerb1d38a62022-07-19 23:33:16 +0000349 const hb_serialize_context_t::object_t& object (unsigned i) const
Garret Rieger20b02a62022-06-24 18:58:17 +0000350 {
351 return vertices_[i].obj;
352 }
353
Garret Rieger5cf2a252022-08-15 22:49:24 +0000354 void add_buffer (char* buffer)
355 {
356 buffers.push (buffer);
357 }
358
Garret Rieger20b02a62022-06-24 18:58:17 +0000359 /*
Garret Riegerb00eb772022-08-11 20:33:21 +0000360 * Adds a 16 bit link from parent_id to child_id
361 */
362 template<typename T>
363 void add_link (T* offset,
364 unsigned parent_id,
365 unsigned child_id)
366 {
367 auto& v = vertices_[parent_id];
368 auto* link = v.obj.real_links.push ();
369 link->width = 2;
370 link->objidx = child_id;
371 link->position = (char*) offset - (char*) v.obj.head;
372 vertices_[child_id].parents.push (parent_id);
373 }
374
375 /*
Garret Rieger20b02a62022-06-24 18:58:17 +0000376 * Generates a new topological sorting of graph ordered by the shortest
Garret Rieger65afed02022-07-28 20:54:28 +0000377 * distance to each node if positions are marked as invalid.
378 */
379 void sort_shortest_distance_if_needed ()
380 {
381 if (!positions_invalid) return;
382 sort_shortest_distance ();
383 }
384
385
386 /*
387 * Generates a new topological sorting of graph ordered by the shortest
Garret Rieger20b02a62022-06-24 18:58:17 +0000388 * distance to each node.
389 */
390 void sort_shortest_distance ()
391 {
392 positions_invalid = true;
393
394 if (vertices_.length <= 1) {
395 // Graph of 1 or less doesn't need sorting.
396 return;
397 }
398
399 update_distances ();
400
401 hb_priority_queue_t queue;
402 hb_vector_t<vertex_t> &sorted_graph = vertices_scratch_;
403 if (unlikely (!check_success (sorted_graph.resize (vertices_.length)))) return;
404 hb_vector_t<unsigned> id_map;
405 if (unlikely (!check_success (id_map.resize (vertices_.length)))) return;
406
407 hb_vector_t<unsigned> removed_edges;
408 if (unlikely (!check_success (removed_edges.resize (vertices_.length)))) return;
409 update_parents ();
410
411 queue.insert (root ().modified_distance (0), root_idx ());
412 int new_id = root_idx ();
413 unsigned order = 1;
414 while (!queue.in_error () && !queue.is_empty ())
415 {
416 unsigned next_id = queue.pop_minimum().second;
417
418 hb_swap (sorted_graph[new_id], vertices_[next_id]);
419 const vertex_t& next = sorted_graph[new_id];
420
421 id_map[next_id] = new_id--;
422
423 for (const auto& link : next.obj.all_links ()) {
424 removed_edges[link.objidx]++;
425 if (!(vertices_[link.objidx].incoming_edges () - removed_edges[link.objidx]))
426 // Add the order that the links were encountered to the priority.
427 // This ensures that ties between priorities objects are broken in a consistent
428 // way. More specifically this is set up so that if a set of objects have the same
429 // distance they'll be added to the topological order in the order that they are
430 // referenced from the parent object.
431 queue.insert (vertices_[link.objidx].modified_distance (order++),
432 link.objidx);
433 }
434 }
435
436 check_success (!queue.in_error ());
437 check_success (!sorted_graph.in_error ());
Garret Rieger20b02a62022-06-24 18:58:17 +0000438
439 remap_all_obj_indices (id_map, &sorted_graph);
Garret Rieger20b02a62022-06-24 18:58:17 +0000440 hb_swap (vertices_, sorted_graph);
Garret Riegerfb3f6ad2022-07-29 00:25:19 +0000441
442 if (!check_success (new_id == -1))
443 print_orphaned_nodes ();
Garret Rieger20b02a62022-06-24 18:58:17 +0000444 }
445
Garret Riegerb4f561d2022-07-06 18:49:23 +0000446 /*
447 * Finds the set of nodes (placed into roots) that should be assigned unique spaces.
448 * More specifically this looks for the top most 24 bit or 32 bit links in the graph.
449 * Some special casing is done that is specific to the layout of GSUB/GPOS tables.
450 */
Garret Rieger401066b2022-07-06 18:44:40 +0000451 void find_space_roots (hb_set_t& visited, hb_set_t& roots)
Garret Rieger20b02a62022-06-24 18:58:17 +0000452 {
Garret Rieger401066b2022-07-06 18:44:40 +0000453 int root_index = (int) root_idx ();
454 for (int i = root_index; i >= 0; i--)
Garret Rieger20b02a62022-06-24 18:58:17 +0000455 {
Garret Rieger401066b2022-07-06 18:44:40 +0000456 if (visited.has (i)) continue;
457
Garret Rieger20b02a62022-06-24 18:58:17 +0000458 // Only real links can form 32 bit spaces
459 for (auto& l : vertices_[i].obj.real_links)
460 {
Garret Rieger401066b2022-07-06 18:44:40 +0000461 if (l.is_signed || l.width < 3)
462 continue;
463
464 if (i == root_index && l.width == 3)
465 // Ignore 24bit links from the root node, this skips past the single 24bit
466 // pointer to the lookup list.
467 continue;
468
469 if (l.width == 3)
Garret Rieger20b02a62022-06-24 18:58:17 +0000470 {
Garret Rieger401066b2022-07-06 18:44:40 +0000471 // A 24bit offset forms a root, unless there is 32bit offsets somewhere
Garret Riegerb4f561d2022-07-06 18:49:23 +0000472 // in it's subgraph, then those become the roots instead. This is to make sure
473 // that extension subtables beneath a 24bit lookup become the spaces instead
474 // of the offset to the lookup.
Garret Rieger401066b2022-07-06 18:44:40 +0000475 hb_set_t sub_roots;
476 find_32bit_roots (l.objidx, sub_roots);
477 if (sub_roots) {
478 for (unsigned sub_root_idx : sub_roots) {
479 roots.add (sub_root_idx);
480 find_subgraph (sub_root_idx, visited);
481 }
482 continue;
483 }
Garret Rieger20b02a62022-06-24 18:58:17 +0000484 }
Garret Rieger401066b2022-07-06 18:44:40 +0000485
486 roots.add (l.objidx);
487 find_subgraph (l.objidx, visited);
Garret Rieger20b02a62022-06-24 18:58:17 +0000488 }
489 }
Garret Rieger401066b2022-07-06 18:44:40 +0000490 }
Garret Rieger20b02a62022-06-24 18:58:17 +0000491
Garret Riegerac1a8532022-08-18 00:55:47 +0000492 template <typename T, typename ...Ts>
493 vertex_and_table_t<T> as_table (unsigned parent, const void* offset, Ts... ds)
Garret Rieger0083fd12022-08-11 22:09:46 +0000494 {
Garret Riegerac1a8532022-08-18 00:55:47 +0000495 return as_table_from_index<T> (index_for_offset (parent, offset), std::forward<Ts>(ds)...);
Garret Rieger0083fd12022-08-11 22:09:46 +0000496 }
497
Garret Riegerac1a8532022-08-18 00:55:47 +0000498 template <typename T, typename ...Ts>
499 vertex_and_table_t<T> as_table_from_index (unsigned index, Ts... ds)
Garret Rieger0083fd12022-08-11 22:09:46 +0000500 {
501 if (index >= vertices_.length)
502 return vertex_and_table_t<T> ();
503
504 vertex_and_table_t<T> r;
505 r.vertex = &vertices_[index];
506 r.table = (T*) r.vertex->obj.head;
507 r.index = index;
508 if (!r.table)
509 return vertex_and_table_t<T> ();
510
Garret Riegerac1a8532022-08-18 00:55:47 +0000511 if (!r.table->sanitize (*(r.vertex), std::forward<Ts>(ds)...))
Garret Rieger0083fd12022-08-11 22:09:46 +0000512 return vertex_and_table_t<T> ();
513
514 return r;
515 }
516
Garret Rieger5d824c02022-08-05 01:37:14 +0000517 // Finds the object id of the object pointed to by the offset at 'offset'
518 // within object[node_idx].
519 unsigned index_for_offset (unsigned node_idx, const void* offset) const
Garret Rieger3f7a74f2022-07-19 21:50:13 +0000520 {
521 const auto& node = object (node_idx);
522 if (offset < node.head || offset >= node.tail) return -1;
523
Garret Rieger46b5dbd2022-08-18 01:18:16 +0000524 unsigned length = node.real_links.length;
525 for (unsigned i = 0; i < length; i++)
Garret Rieger3f7a74f2022-07-19 21:50:13 +0000526 {
Garret Rieger46b5dbd2022-08-18 01:18:16 +0000527 // Use direct access for increased performance, this is a hot method.
528 const auto& link = node.real_links.arrayZ[i];
Garret Rieger3f7a74f2022-07-19 21:50:13 +0000529 if (offset != node.head + link.position)
530 continue;
531 return link.objidx;
532 }
533
534 return -1;
535 }
536
Garret Rieger5d824c02022-08-05 01:37:14 +0000537 // Finds the object id of the object pointed to by the offset at 'offset'
538 // within object[node_idx]. Ensures that the returned object is safe to mutate.
539 // That is, if the original child object is shared by parents other than node_idx
540 // it will be duplicated and the duplicate will be returned instead.
541 unsigned mutable_index_for_offset (unsigned node_idx, const void* offset)
542 {
543 unsigned child_idx = index_for_offset (node_idx, offset);
544 auto& child = vertices_[child_idx];
545 for (unsigned p : child.parents)
546 {
547 if (p != node_idx) {
548 return duplicate (node_idx, child_idx);
549 }
550 }
551
552 return child_idx;
553 }
554
Garret Rieger3f7a74f2022-07-19 21:50:13 +0000555
Garret Rieger401066b2022-07-06 18:44:40 +0000556 /*
557 * Assign unique space numbers to each connected subgraph of 24 bit and/or 32 bit offset(s).
558 * Currently, this is implemented specifically tailored to the structure of a GPOS/GSUB
559 * (including with 24bit offsets) table.
560 */
561 bool assign_spaces ()
562 {
Garret Riegerb1d38a62022-07-19 23:33:16 +0000563 update_parents ();
564
Garret Rieger401066b2022-07-06 18:44:40 +0000565 hb_set_t visited;
566 hb_set_t roots;
567 find_space_roots (visited, roots);
568
569 // Mark everything not in the subgraphs of the roots as visited. This prevents
570 // subgraphs from being connected via nodes not in those subgraphs.
Garret Rieger20b02a62022-06-24 18:58:17 +0000571 visited.invert ();
572
573 if (!roots) return false;
574
575 while (roots)
576 {
577 unsigned next = HB_SET_VALUE_INVALID;
578 if (unlikely (!check_success (!roots.in_error ()))) break;
579 if (!roots.next (&next)) break;
580
581 hb_set_t connected_roots;
582 find_connected_nodes (next, roots, visited, connected_roots);
583 if (unlikely (!check_success (!connected_roots.in_error ()))) break;
584
585 isolate_subgraph (connected_roots);
586 if (unlikely (!check_success (!connected_roots.in_error ()))) break;
587
588 unsigned next_space = this->next_space ();
589 num_roots_for_space_.push (0);
590 for (unsigned root : connected_roots)
591 {
592 DEBUG_MSG (SUBSET_REPACK, nullptr, "Subgraph %u gets space %u", root, next_space);
593 vertices_[root].space = next_space;
594 num_roots_for_space_[next_space] = num_roots_for_space_[next_space] + 1;
595 distance_invalid = true;
596 positions_invalid = true;
597 }
598
599 // TODO(grieger): special case for GSUB/GPOS use extension promotions to move 16 bit space
600 // into the 32 bit space as needed, instead of using isolation.
601 }
602
603
604
605 return true;
606 }
607
608 /*
609 * Isolates the subgraph of nodes reachable from root. Any links to nodes in the subgraph
610 * that originate from outside of the subgraph will be removed by duplicating the linked to
611 * object.
612 *
613 * Indices stored in roots will be updated if any of the roots are duplicated to new indices.
614 */
615 bool isolate_subgraph (hb_set_t& roots)
616 {
617 update_parents ();
618 hb_map_t subgraph;
619
620 // incoming edges to root_idx should be all 32 bit in length so we don't need to de-dup these
621 // set the subgraph incoming edge count to match all of root_idx's incoming edges
622 hb_set_t parents;
623 for (unsigned root_idx : roots)
624 {
625 subgraph.set (root_idx, wide_parents (root_idx, parents));
626 find_subgraph (root_idx, subgraph);
627 }
628
629 unsigned original_root_idx = root_idx ();
630 hb_map_t index_map;
631 bool made_changes = false;
632 for (auto entry : subgraph.iter ())
633 {
634 const auto& node = vertices_[entry.first];
635 unsigned subgraph_incoming_edges = entry.second;
636
637 if (subgraph_incoming_edges < node.incoming_edges ())
638 {
639 // Only de-dup objects with incoming links from outside the subgraph.
640 made_changes = true;
641 duplicate_subgraph (entry.first, index_map);
642 }
643 }
644
645 if (!made_changes)
646 return false;
647
648 if (original_root_idx != root_idx ()
649 && parents.has (original_root_idx))
650 {
651 // If the root idx has changed since parents was determined, update root idx in parents
652 parents.add (root_idx ());
653 parents.del (original_root_idx);
654 }
655
656 auto new_subgraph =
657 + subgraph.keys ()
658 | hb_map([&] (unsigned node_idx) {
659 const unsigned *v;
660 if (index_map.has (node_idx, &v)) return *v;
661 return node_idx;
662 })
663 ;
664
665 remap_obj_indices (index_map, new_subgraph);
666 remap_obj_indices (index_map, parents.iter (), true);
667
668 // Update roots set with new indices as needed.
669 unsigned next = HB_SET_VALUE_INVALID;
670 while (roots.next (&next))
671 {
672 const unsigned *v;
673 if (index_map.has (next, &v))
674 {
675 roots.del (next);
676 roots.add (*v);
677 }
678 }
679
680 return true;
681 }
682
683 void find_subgraph (unsigned node_idx, hb_map_t& subgraph)
684 {
685 for (const auto& link : vertices_[node_idx].obj.all_links ())
686 {
687 const unsigned *v;
688 if (subgraph.has (link.objidx, &v))
689 {
690 subgraph.set (link.objidx, *v + 1);
691 continue;
692 }
693 subgraph.set (link.objidx, 1);
694 find_subgraph (link.objidx, subgraph);
695 }
696 }
697
698 void find_subgraph (unsigned node_idx, hb_set_t& subgraph)
699 {
700 if (subgraph.has (node_idx)) return;
701 subgraph.add (node_idx);
702 for (const auto& link : vertices_[node_idx].obj.all_links ())
703 find_subgraph (link.objidx, subgraph);
704 }
705
Garret Rieger9d0b2da2022-07-25 20:46:49 +0000706 size_t find_subgraph_size (unsigned node_idx, hb_set_t& subgraph, unsigned max_depth = -1)
Garret Riegerad0041f2022-07-21 22:50:14 +0000707 {
708 if (subgraph.has (node_idx)) return 0;
709 subgraph.add (node_idx);
710
711 const auto& o = vertices_[node_idx].obj;
712 size_t size = o.tail - o.head;
Garret Rieger9d0b2da2022-07-25 20:46:49 +0000713 if (max_depth == 0)
714 return size;
715
Garret Riegerad0041f2022-07-21 22:50:14 +0000716 for (const auto& link : o.all_links ())
Garret Rieger9d0b2da2022-07-25 20:46:49 +0000717 size += find_subgraph_size (link.objidx, subgraph, max_depth - 1);
Garret Riegerad0041f2022-07-21 22:50:14 +0000718 return size;
719 }
720
Garret Riegerb4f561d2022-07-06 18:49:23 +0000721 /*
722 * Finds the topmost children of 32bit offsets in the subgraph starting
723 * at node_idx. Found indices are placed into 'found'.
724 */
Garret Rieger401066b2022-07-06 18:44:40 +0000725 void find_32bit_roots (unsigned node_idx, hb_set_t& found)
726 {
727 for (const auto& link : vertices_[node_idx].obj.all_links ())
728 {
729 if (!link.is_signed && link.width == 4) {
730 found.add (link.objidx);
731 continue;
732 }
733 find_32bit_roots (link.objidx, found);
734 }
735 }
736
Garret Rieger20b02a62022-06-24 18:58:17 +0000737 /*
Garret Rieger8d63f602022-07-27 20:36:20 +0000738 * Moves the child of old_parent_idx pointed to by old_offset to a new
739 * vertex at the new_offset.
740 */
741 template<typename O>
742 void move_child (unsigned old_parent_idx,
743 const O* old_offset,
744 unsigned new_parent_idx,
745 const O* new_offset)
746 {
Garret Rieger65afed02022-07-28 20:54:28 +0000747 distance_invalid = true;
748 positions_invalid = true;
749
Garret Rieger8d63f602022-07-27 20:36:20 +0000750 auto& old_v = vertices_[old_parent_idx];
751 auto& new_v = vertices_[new_parent_idx];
752
753 unsigned child_id = index_for_offset (old_parent_idx,
754 old_offset);
755
756 auto* new_link = new_v.obj.real_links.push ();
757 new_link->width = O::static_size;
758 new_link->objidx = child_id;
Garret Rieger1002a3d2022-07-28 20:17:36 +0000759 new_link->position = (const char*) new_offset - (const char*) new_v.obj.head;
Garret Rieger8d63f602022-07-27 20:36:20 +0000760
761 auto& child = vertices_[child_id];
762 child.parents.push (new_parent_idx);
763
Garret Rieger3b91fb22022-07-29 20:04:42 +0000764 old_v.remove_real_link (child_id, old_offset);
Garret Rieger8d63f602022-07-27 20:36:20 +0000765 child.remove_parent (old_parent_idx);
766 }
767
768 /*
Garret Rieger20b02a62022-06-24 18:58:17 +0000769 * duplicates all nodes in the subgraph reachable from node_idx. Does not re-assign
770 * links. index_map is updated with mappings from old id to new id. If a duplication has already
771 * been performed for a given index, then it will be skipped.
772 */
773 void duplicate_subgraph (unsigned node_idx, hb_map_t& index_map)
774 {
775 if (index_map.has (node_idx))
776 return;
777
778 index_map.set (node_idx, duplicate (node_idx));
779 for (const auto& l : object (node_idx).all_links ()) {
780 duplicate_subgraph (l.objidx, index_map);
781 }
782 }
783
784 /*
785 * Creates a copy of node_idx and returns it's new index.
786 */
787 unsigned duplicate (unsigned node_idx)
788 {
789 positions_invalid = true;
790 distance_invalid = true;
791
792 auto* clone = vertices_.push ();
793 auto& child = vertices_[node_idx];
794 if (vertices_.in_error ()) {
795 return -1;
796 }
797
798 clone->obj.head = child.obj.head;
799 clone->obj.tail = child.obj.tail;
800 clone->distance = child.distance;
801 clone->space = child.space;
802 clone->parents.reset ();
803
804 unsigned clone_idx = vertices_.length - 2;
805 for (const auto& l : child.obj.real_links)
806 {
807 clone->obj.real_links.push (l);
808 vertices_[l.objidx].parents.push (clone_idx);
809 }
810 for (const auto& l : child.obj.virtual_links)
811 {
812 clone->obj.virtual_links.push (l);
813 vertices_[l.objidx].parents.push (clone_idx);
814 }
815
816 check_success (!clone->obj.real_links.in_error ());
817 check_success (!clone->obj.virtual_links.in_error ());
818
819 // The last object is the root of the graph, so swap back the root to the end.
820 // The root's obj idx does change, however since it's root nothing else refers to it.
821 // all other obj idx's will be unaffected.
822 hb_swap (vertices_[vertices_.length - 2], *clone);
823
824 // Since the root moved, update the parents arrays of all children on the root.
825 for (const auto& l : root ().obj.all_links ())
826 vertices_[l.objidx].remap_parent (root_idx () - 1, root_idx ());
827
828 return clone_idx;
829 }
830
831 /*
832 * Creates a copy of child and re-assigns the link from
833 * parent to the clone. The copy is a shallow copy, objects
834 * linked from child are not duplicated.
835 */
836 bool duplicate (unsigned parent_idx, unsigned child_idx)
837 {
838 update_parents ();
839
840 unsigned links_to_child = 0;
841 for (const auto& l : vertices_[parent_idx].obj.all_links ())
842 {
843 if (l.objidx == child_idx) links_to_child++;
844 }
845
846 if (vertices_[child_idx].incoming_edges () <= links_to_child)
847 {
848 // Can't duplicate this node, doing so would orphan the original one as all remaining links
849 // to child are from parent.
850 DEBUG_MSG (SUBSET_REPACK, nullptr, " Not duplicating %d => %d",
851 parent_idx, child_idx);
852 return false;
853 }
854
855 DEBUG_MSG (SUBSET_REPACK, nullptr, " Duplicating %d => %d",
856 parent_idx, child_idx);
857
858 unsigned clone_idx = duplicate (child_idx);
859 if (clone_idx == (unsigned) -1) return false;
860 // duplicate shifts the root node idx, so if parent_idx was root update it.
861 if (parent_idx == clone_idx) parent_idx++;
862
863 auto& parent = vertices_[parent_idx];
864 for (auto& l : parent.obj.all_links_writer ())
865 {
866 if (l.objidx != child_idx)
867 continue;
868
869 reassign_link (l, parent_idx, clone_idx);
870 }
871
872 return true;
873 }
874
Garret Riegerb1d38a62022-07-19 23:33:16 +0000875
876 /*
877 * Adds a new node to the graph, not connected to anything.
878 */
879 unsigned new_node (char* head, char* tail)
880 {
881 positions_invalid = true;
882 distance_invalid = true;
Garret Riegerb1d38a62022-07-19 23:33:16 +0000883
884 auto* clone = vertices_.push ();
885 if (vertices_.in_error ()) {
886 return -1;
887 }
888
889 clone->obj.head = head;
890 clone->obj.tail = tail;
891 clone->distance = 0;
892 clone->space = 0;
893
894 unsigned clone_idx = vertices_.length - 2;
895
896 // The last object is the root of the graph, so swap back the root to the end.
897 // The root's obj idx does change, however since it's root nothing else refers to it.
898 // all other obj idx's will be unaffected.
899 hb_swap (vertices_[vertices_.length - 2], *clone);
900
901 // Since the root moved, update the parents arrays of all children on the root.
902 for (const auto& l : root ().obj.all_links ())
903 vertices_[l.objidx].remap_parent (root_idx () - 1, root_idx ());
904
905 return clone_idx;
906 }
907
Garret Rieger20b02a62022-06-24 18:58:17 +0000908 /*
909 * Raises the sorting priority of all children.
910 */
911 bool raise_childrens_priority (unsigned parent_idx)
912 {
913 DEBUG_MSG (SUBSET_REPACK, nullptr, " Raising priority of all children of %d",
914 parent_idx);
915 // This operation doesn't change ordering until a sort is run, so no need
916 // to invalidate positions. It does not change graph structure so no need
917 // to update distances or edge counts.
918 auto& parent = vertices_[parent_idx].obj;
919 bool made_change = false;
920 for (auto& l : parent.all_links_writer ())
921 made_change |= vertices_[l.objidx].raise_priority ();
922 return made_change;
923 }
924
Garret Rieger20b02a62022-06-24 18:58:17 +0000925 void print_orphaned_nodes ()
926 {
927 if (!DEBUG_ENABLED(SUBSET_REPACK)) return;
928
929 DEBUG_MSG (SUBSET_REPACK, nullptr, "Graph is not fully connected.");
930 parents_invalid = true;
931 update_parents();
932
933 for (unsigned i = 0; i < root_idx (); i++)
934 {
935 const auto& v = vertices_[i];
936 if (!v.parents)
937 DEBUG_MSG (SUBSET_REPACK, nullptr, "Node %u is orphaned.", i);
938 }
939 }
940
Garret Rieger20b02a62022-06-24 18:58:17 +0000941 unsigned num_roots_for_space (unsigned space) const
942 {
943 return num_roots_for_space_[space];
944 }
945
946 unsigned next_space () const
947 {
948 return num_roots_for_space_.length;
949 }
950
951 void move_to_new_space (const hb_set_t& indices)
952 {
953 num_roots_for_space_.push (0);
954 unsigned new_space = num_roots_for_space_.length - 1;
955
956 for (unsigned index : indices) {
957 auto& node = vertices_[index];
958 num_roots_for_space_[node.space] = num_roots_for_space_[node.space] - 1;
959 num_roots_for_space_[new_space] = num_roots_for_space_[new_space] + 1;
960 node.space = new_space;
961 distance_invalid = true;
962 positions_invalid = true;
963 }
964 }
965
966 unsigned space_for (unsigned index, unsigned* root = nullptr) const
967 {
968 const auto& node = vertices_[index];
969 if (node.space)
970 {
971 if (root != nullptr)
972 *root = index;
973 return node.space;
974 }
975
976 if (!node.parents)
977 {
978 if (root)
979 *root = index;
980 return 0;
981 }
982
983 return space_for (node.parents[0], root);
984 }
985
986 void err_other_error () { this->successful = false; }
987
988 size_t total_size_in_bytes () const {
989 size_t total_size = 0;
990 for (unsigned i = 0; i < vertices_.length; i++) {
991 size_t size = vertices_[i].obj.tail - vertices_[i].obj.head;
992 total_size += size;
993 }
994 return total_size;
995 }
996
997
998 private:
999
1000 /*
Garret Rieger401066b2022-07-06 18:44:40 +00001001 * Returns the numbers of incoming edges that are 24 or 32 bits wide.
Garret Rieger20b02a62022-06-24 18:58:17 +00001002 */
1003 unsigned wide_parents (unsigned node_idx, hb_set_t& parents) const
1004 {
1005 unsigned count = 0;
1006 hb_set_t visited;
1007 for (unsigned p : vertices_[node_idx].parents)
1008 {
1009 if (visited.has (p)) continue;
1010 visited.add (p);
1011
1012 // Only real links can be wide
1013 for (const auto& l : vertices_[p].obj.real_links)
1014 {
Garret Rieger401066b2022-07-06 18:44:40 +00001015 if (l.objidx == node_idx
1016 && (l.width == 3 || l.width == 4)
1017 && !l.is_signed)
Garret Rieger20b02a62022-06-24 18:58:17 +00001018 {
1019 count++;
1020 parents.add (p);
1021 }
1022 }
1023 }
1024 return count;
1025 }
1026
1027 bool check_success (bool success)
1028 { return this->successful && (success || ((void) err_other_error (), false)); }
1029
Garret Rieger70785602022-06-24 19:20:20 +00001030 public:
Garret Rieger20b02a62022-06-24 18:58:17 +00001031 /*
1032 * Creates a map from objid to # of incoming edges.
1033 */
1034 void update_parents ()
1035 {
1036 if (!parents_invalid) return;
1037
1038 for (unsigned i = 0; i < vertices_.length; i++)
1039 vertices_[i].parents.reset ();
1040
1041 for (unsigned p = 0; p < vertices_.length; p++)
1042 {
1043 for (auto& l : vertices_[p].obj.all_links ())
1044 {
1045 vertices_[l.objidx].parents.push (p);
1046 }
1047 }
1048
1049 parents_invalid = false;
1050 }
1051
1052 /*
1053 * compute the serialized start and end positions for each vertex.
1054 */
1055 void update_positions ()
1056 {
1057 if (!positions_invalid) return;
1058
1059 unsigned current_pos = 0;
1060 for (int i = root_idx (); i >= 0; i--)
1061 {
1062 auto& v = vertices_[i];
1063 v.start = current_pos;
1064 current_pos += v.obj.tail - v.obj.head;
1065 v.end = current_pos;
1066 }
1067
1068 positions_invalid = false;
1069 }
1070
1071 /*
1072 * Finds the distance to each object in the graph
1073 * from the initial node.
1074 */
1075 void update_distances ()
1076 {
1077 if (!distance_invalid) return;
1078
1079 // Uses Dijkstra's algorithm to find all of the shortest distances.
1080 // https://en.wikipedia.org/wiki/Dijkstra%27s_algorithm
1081 //
1082 // Implementation Note:
1083 // Since our priority queue doesn't support fast priority decreases
1084 // we instead just add new entries into the queue when a priority changes.
1085 // Redundant ones are filtered out later on by the visited set.
1086 // According to https://www3.cs.stonybrook.edu/~rezaul/papers/TR-07-54.pdf
1087 // for practical performance this is faster then using a more advanced queue
1088 // (such as a fibonacci queue) with a fast decrease priority.
1089 for (unsigned i = 0; i < vertices_.length; i++)
1090 {
1091 if (i == vertices_.length - 1)
1092 vertices_[i].distance = 0;
1093 else
1094 vertices_[i].distance = hb_int_max (int64_t);
1095 }
1096
1097 hb_priority_queue_t queue;
1098 queue.insert (0, vertices_.length - 1);
1099
1100 hb_vector_t<bool> visited;
1101 visited.resize (vertices_.length);
1102
1103 while (!queue.in_error () && !queue.is_empty ())
1104 {
1105 unsigned next_idx = queue.pop_minimum ().second;
1106 if (visited[next_idx]) continue;
1107 const auto& next = vertices_[next_idx];
1108 int64_t next_distance = vertices_[next_idx].distance;
1109 visited[next_idx] = true;
1110
1111 for (const auto& link : next.obj.all_links ())
1112 {
1113 if (visited[link.objidx]) continue;
1114
1115 const auto& child = vertices_[link.objidx].obj;
1116 unsigned link_width = link.width ? link.width : 4; // treat virtual offsets as 32 bits wide
1117 int64_t child_weight = (child.tail - child.head) +
1118 ((int64_t) 1 << (link_width * 8)) * (vertices_[link.objidx].space + 1);
1119 int64_t child_distance = next_distance + child_weight;
1120
1121 if (child_distance < vertices_[link.objidx].distance)
1122 {
1123 vertices_[link.objidx].distance = child_distance;
1124 queue.insert (child_distance, link.objidx);
1125 }
1126 }
1127 }
1128
1129 check_success (!queue.in_error ());
1130 if (!check_success (queue.is_empty ()))
1131 {
1132 print_orphaned_nodes ();
1133 return;
1134 }
1135
1136 distance_invalid = false;
1137 }
1138
Garret Rieger70785602022-06-24 19:20:20 +00001139 private:
Garret Rieger20b02a62022-06-24 18:58:17 +00001140 /*
1141 * Updates a link in the graph to point to a different object. Corrects the
1142 * parents vector on the previous and new child nodes.
1143 */
1144 void reassign_link (hb_serialize_context_t::object_t::link_t& link,
1145 unsigned parent_idx,
1146 unsigned new_idx)
1147 {
1148 unsigned old_idx = link.objidx;
1149 link.objidx = new_idx;
1150 vertices_[old_idx].remove_parent (parent_idx);
1151 vertices_[new_idx].parents.push (parent_idx);
1152 }
1153
1154 /*
1155 * Updates all objidx's in all links using the provided mapping. Corrects incoming edge counts.
1156 */
1157 template<typename Iterator, hb_requires (hb_is_iterator (Iterator))>
1158 void remap_obj_indices (const hb_map_t& id_map,
1159 Iterator subgraph,
1160 bool only_wide = false)
1161 {
1162 if (!id_map) return;
1163 for (unsigned i : subgraph)
1164 {
1165 for (auto& link : vertices_[i].obj.all_links_writer ())
1166 {
1167 const unsigned *v;
1168 if (!id_map.has (link.objidx, &v)) continue;
1169 if (only_wide && !(link.width == 4 && !link.is_signed)) continue;
1170
1171 reassign_link (link, i, *v);
1172 }
1173 }
1174 }
1175
1176 /*
1177 * Updates all objidx's in all links using the provided mapping.
1178 */
1179 void remap_all_obj_indices (const hb_vector_t<unsigned>& id_map,
1180 hb_vector_t<vertex_t>* sorted_graph) const
1181 {
1182 for (unsigned i = 0; i < sorted_graph->length; i++)
1183 {
1184 (*sorted_graph)[i].remap_parents (id_map);
1185 for (auto& link : (*sorted_graph)[i].obj.all_links_writer ())
1186 {
1187 link.objidx = id_map[link.objidx];
1188 }
1189 }
1190 }
1191
1192 /*
1193 * Finds all nodes in targets that are reachable from start_idx, nodes in visited will be skipped.
1194 * For this search the graph is treated as being undirected.
1195 *
1196 * Connected targets will be added to connected and removed from targets. All visited nodes
1197 * will be added to visited.
1198 */
1199 void find_connected_nodes (unsigned start_idx,
1200 hb_set_t& targets,
1201 hb_set_t& visited,
1202 hb_set_t& connected)
1203 {
1204 if (unlikely (!check_success (!visited.in_error ()))) return;
1205 if (visited.has (start_idx)) return;
1206 visited.add (start_idx);
1207
1208 if (targets.has (start_idx))
1209 {
1210 targets.del (start_idx);
1211 connected.add (start_idx);
1212 }
1213
1214 const auto& v = vertices_[start_idx];
1215
1216 // Graph is treated as undirected so search children and parents of start_idx
1217 for (const auto& l : v.obj.all_links ())
1218 find_connected_nodes (l.objidx, targets, visited, connected);
1219
1220 for (unsigned p : v.parents)
1221 find_connected_nodes (p, targets, visited, connected);
1222 }
1223
1224 public:
1225 // TODO(garretrieger): make private, will need to move most of offset overflow code into graph.
1226 hb_vector_t<vertex_t> vertices_;
1227 hb_vector_t<vertex_t> vertices_scratch_;
1228 private:
1229 bool parents_invalid;
1230 bool distance_invalid;
1231 bool positions_invalid;
1232 bool successful;
1233 hb_vector_t<unsigned> num_roots_for_space_;
Garret Rieger5cf2a252022-08-15 22:49:24 +00001234 hb_vector_t<char*> buffers;
Garret Rieger20b02a62022-06-24 18:58:17 +00001235};
1236
1237}
1238
1239#endif // GRAPH_GRAPH_HH