| /* |
| * Copyright (C) 2026 Behdad Esfahbod |
| * |
| * This is part of HarfBuzz, a text shaping library. |
| * |
| * Permission is hereby granted, without written agreement and without |
| * license or royalty fees, to use, copy, modify, and distribute this |
| * software and its documentation for any purpose, provided that the |
| * above copyright notice and the following two paragraphs appear in |
| * all copies of this software. |
| * |
| * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR |
| * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES |
| * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN |
| * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH |
| * DAMAGE. |
| * |
| * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, |
| * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND |
| * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS |
| * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO |
| * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. |
| * |
| * Author(s): Behdad Esfahbod |
| */ |
| |
| #include "hb.hh" |
| |
| #include "hb-gpu.h" |
| #include "hb-gpu.hh" |
| #include "hb-blob.hh" |
| |
| /** |
| * SECTION:hb-gpu |
| * @title: hb-gpu |
| * @short_description: GPU text rendering |
| * @include: hb-gpu.h |
| * |
| * This module provides GPU-side text rendering. Glyph data is |
| * encoded on the CPU into compact blobs that the GPU decodes and |
| * rasterizes directly in the fragment shader, with no intermediate |
| * bitmap atlas. |
| * |
| * Two renderers share the same atlas, vertex stage, and pipeline: |
| * |
| * - Draw, an antialiased monochrome coverage mask for outline |
| * glyphs. |
| * - Paint, a premultiplied-RGBA color accumulator for COLRv0 and |
| * COLRv1 glyphs. Paint also renders plain monochrome outlines |
| * (as a single foreground-colored layer), at some cost over the |
| * Draw path. |
| * |
| * Applications typically pick one renderer per font, based on |
| * whether the font has a color paint table (see |
| * hb_ot_color_has_paint()). The Draw path is meaningfully faster |
| * than Paint on monochrome fonts, so using Paint unconditionally |
| * leaves performance on the table. |
| * |
| * See the [live web demo](https://harfbuzz.github.io/hb-gpu-demo/). |
| * |
| * # Draw |
| * |
| * The draw renderer implements the |
| * [Slug](https://github.com/EricLengyel/Slug) algorithm by Eric |
| * Lengyel. It produces an antialiased coverage mask in the |
| * fragment shader, suitable for compositing over any background |
| * or multiplying with any foreground color. |
| * |
| * ## Encoding |
| * |
| * Each glyph is encoded independently into an opaque blob of |
| * RGBA16I texels (8 bytes each). Typical encoding flow: |
| * |
| * |[<!-- language="plain" --> |
| * hb_gpu_draw_t *draw = hb_gpu_draw_create_or_fail (); |
| * |
| * hb_gpu_draw_glyph (draw, font, gid); |
| * hb_blob_t *blob = hb_gpu_draw_encode (draw); |
| * |
| * // Upload hb_blob_get_data(blob) to the atlas at some offset. |
| * unsigned atlas_offset = upload_to_atlas (blob); |
| * |
| * hb_gpu_draw_recycle_blob (draw, blob); |
| * // repeat for more glyphs... |
| * |
| * hb_gpu_draw_destroy (draw); |
| * ]| |
| * |
| * Create the encoder once and reuse it across glyphs. |
| * hb_gpu_draw_encode() auto-clears the accumulated outlines on |
| * return, so the next call to hb_gpu_draw_glyph() starts fresh; |
| * user configuration (font scale) is preserved. Pass the |
| * encoded blob back via hb_gpu_draw_recycle_blob() after upload |
| * to recycle the buffer across encodes. |
| * |
| * ## Atlas setup |
| * |
| * All encoded blobs are uploaded into a single GL_TEXTURE_BUFFER |
| * backed by a GL_RGBA16I format buffer. Each glyph occupies a |
| * contiguous range of texels at a known offset: |
| * |
| * |[<!-- language="plain" --> |
| * GLuint buf, tex; |
| * glGenBuffers (1, &buf); |
| * glGenTextures (1, &tex); |
| * |
| * glBindBuffer (GL_TEXTURE_BUFFER, buf); |
| * glBufferData (GL_TEXTURE_BUFFER, capacity * 8, NULL, GL_STATIC_DRAW); |
| * |
| * glBindTexture (GL_TEXTURE_BUFFER, tex); |
| * glTexBuffer (GL_TEXTURE_BUFFER, GL_RGBA16I, buf); |
| * |
| * // Upload a glyph blob at 'offset' texels: |
| * glBufferSubData (GL_TEXTURE_BUFFER, |
| * offset * 8, |
| * hb_blob_get_length (blob), |
| * hb_blob_get_data (blob, NULL)); |
| * ]| |
| * |
| * ## Shader compilation |
| * |
| * The library provides GLSL source strings for shared helpers |
| * (atlas access, vertex dilation) and for the draw-renderer stage |
| * code. Prepend a #version directive, concatenate shared and |
| * draw sources in order, and append your own main(): |
| * |
| * |[<!-- language="plain" --> |
| * const char *vert_sources[] = { |
| * "#version 330\n", |
| * hb_gpu_shader_source (HB_GPU_SHADER_STAGE_VERTEX, |
| * HB_GPU_SHADER_LANG_GLSL), |
| * hb_gpu_draw_shader_source (HB_GPU_SHADER_STAGE_VERTEX, |
| * HB_GPU_SHADER_LANG_GLSL), |
| * your_vertex_main |
| * }; |
| * glShaderSource (vert_shader, 4, vert_sources, NULL); |
| * // Same pattern for the fragment shader. |
| * ]| |
| * |
| * hb_gpu_draw_shader_source() returns an empty string for the |
| * vertex stage, so the assembly order above is safe. |
| * |
| * ## Vertex shader |
| * |
| * The vertex library provides one function: |
| * |
| * |[<!-- language="plain" --> |
| * void hb_gpu_dilate (inout vec2 position, inout vec2 texcoord, |
| * vec2 normal, vec4 jac, |
| * mat4 m, vec2 viewport); |
| * ]| |
| * |
| * This expands each glyph quad by approximately half a pixel on |
| * screen to ensure proper antialiasing coverage at the edges. |
| * It must be called in the vertex shader before computing |
| * gl_Position. |
| * |
| * Per-vertex attributes for each glyph quad (2 triangles, 6 |
| * vertices, 4 unique corners): |
| * |
| * - position (vec2): object-space vertex position, computed from |
| * the glyph's bounding box. For corner (cx, cy) where cx,cy |
| * are 0 or 1: |
| * |[<!-- language="plain" --> |
| * pos.x = pen_x + scale * lerp(extent_min_x, extent_max_x, cx) |
| * pos.y = pen_y - scale * lerp(extent_min_y, extent_max_y, cy) |
| * ]| |
| * where scale = font_size / upem. |
| * |
| * - texcoord (vec2): em-space texture coordinates, equal to the |
| * raw extent values: `(ex, ey)` where ex/ey are the glyph's |
| * bounding box corners in font design units. The fragment shader |
| * samples the encoded blob in this coordinate space. |
| * |
| * - normal (vec2): outward normal at each corner. |
| * `(-1, +1)` for (0,0), `(+1, +1)` for (1,0), |
| * `(-1, -1)` for (0,1), `(+1, -1)` for (1,1). |
| * Note: y is negated relative to cx,cy because the common |
| * em-to-object transform flips y. |
| * |
| * - jac (vec4): maps object-space displacements back to |
| * em-space after dilation, stored row-major as |
| * (j00, j01, j10, j11). For the common case of uniform |
| * scaling with y-flip: |
| * |[<!-- language="plain" --> |
| * jac = vec4(emPerPos, 0.0, 0.0, -emPerPos) |
| * ]| |
| * where emPerPos = upem / font_size. For non-trivial |
| * transforms, see the hb_gpu_dilate source. |
| * |
| * - m (mat4): the model-view-projection matrix (uniform). |
| * |
| * - viewport (vec2): the viewport size in pixels (uniform). |
| * |
| * A typical vertex main: |
| * |
| * |[<!-- language="plain" --> |
| * uniform mat4 u_matViewProjection; |
| * uniform vec2 u_viewport; |
| * |
| * in vec2 a_position; |
| * in vec2 a_texcoord; |
| * in vec2 a_normal; |
| * in float a_emPerPos; |
| * in uint a_glyphLoc; |
| * |
| * out vec2 v_texcoord; |
| * flat out uint v_glyphLoc; |
| * |
| * void main () { |
| * vec2 pos = a_position; |
| * vec2 tex = a_texcoord; |
| * vec4 jac = vec4 (a_emPerPos, 0.0, 0.0, -a_emPerPos); |
| * hb_gpu_dilate (pos, tex, a_normal, jac, |
| * u_matViewProjection, u_viewport); |
| * gl_Position = u_matViewProjection * vec4 (pos, 0.0, 1.0); |
| * v_texcoord = tex; |
| * v_glyphLoc = a_glyphLoc; |
| * } |
| * ]| |
| * |
| * ## Font scale |
| * |
| * The encoder works in font design units (upem). For best |
| * results, do not set a scale on the #hb_font_t passed to |
| * hb_gpu_draw_glyph() — leave it at the default (upem). |
| * Scaling is applied later when computing vertex positions: |
| * |
| * |[<!-- language="plain" --> |
| * float scale = font_size / upem; |
| * pos.x = pen_x + scale * extent_x; |
| * pos.y = pen_y - scale * extent_y; |
| * ]| |
| * |
| * If you do set a font scale (e.g. for hinting), the encoded |
| * blob and extents will be in the scaled coordinate space, |
| * and you must adjust emPerPos accordingly. |
| * |
| * ## Dilation |
| * |
| * Each glyph is rendered on a screen-aligned quad whose |
| * corners match the glyph's bounding box. Without dilation, |
| * the antialiased edges at the quad boundary get clipped — the |
| * fragment shader needs at least half a pixel of padding |
| * around the glyph to produce smooth coverage gradients. |
| * |
| * hb_gpu_dilate computes a perspective-correct half-pixel |
| * expansion. It adjusts both the vertex position (expanding |
| * the quad) and the texcoord (so the fragment shader samples |
| * the right location in em-space after expansion). |
| * |
| * The `jac` parameter maps object-space displacements back to |
| * em-space. For the common case of uniform scaling with |
| * y-flip (`pos.y = pen_y - scale * ey`): |
| * |
| * |[<!-- language="plain" --> |
| * float emPerPos = upem / font_size; |
| * jac = vec4(emPerPos, 0.0, 0.0, -emPerPos); |
| * ]| |
| * |
| * For non-uniform scaling or shearing transforms, jac is the |
| * 2x2 inverse of the em-to-object linear transform, stored |
| * row-major. |
| * |
| * ### Static dilation alternative |
| * |
| * Applications that do not need perspective correctness (e.g. |
| * strictly 2D rendering at known sizes) can skip hb_gpu_dilate |
| * and instead expand each quad vertex by a fixed amount along |
| * its normal, based on the minimum expected font size: |
| * |
| * |[<!-- language="plain" --> |
| * float min_ppem = 10.0; // smallest expected size in pixels |
| * float pad = 0.5 * upem / min_ppem; // half-pixel in em units |
| * pos += normal * pad * scale; |
| * texcoord += normal * pad; |
| * ]| |
| * |
| * This over-dilates at larger sizes (wasting fill rate on |
| * transparent pixels) but is simpler to implement and avoids |
| * the need for the MVP matrix and viewport size in the vertex |
| * shader. |
| * |
| * ## Fragment shader |
| * |
| * The fragment library provides two functions: |
| * |
| * |[<!-- language="plain" --> |
| * float hb_gpu_draw (vec2 renderCoord, uint glyphLoc); |
| * ]| |
| * |
| * It requires the `hb_gpu_atlas` uniform to be bound to the |
| * texture containing the encoded glyph data. By default this |
| * is an `isamplerBuffer` (GL_TEXTURE_BUFFER). For platforms |
| * without texture buffers (e.g. WebGL2), define |
| * `HB_GPU_ATLAS_2D` before including the shader source; the |
| * atlas then becomes an `isampler2D` and a |
| * `uniform int hb_gpu_atlas_width` must also be set to the |
| * texture width. |
| * |
| * Parameters: |
| * |
| * - renderCoord: the interpolated em-space coordinate from |
| * the vertex shader (v_texcoord). |
| * |
| * - glyphLoc: the texel offset of this glyph's encoded blob |
| * in the atlas buffer. Passed as a flat varying from the |
| * vertex shader. |
| * |
| * Returns the coverage in [0, 1]: 1.0 inside the glyph, 0.0 |
| * outside, with antialiased edges. |
| * |
| * A typical fragment main: |
| * |
| * |[<!-- language="plain" --> |
| * in vec2 v_texcoord; |
| * flat in uint v_glyphLoc; |
| * out vec4 fragColor; |
| * |
| * void main () { |
| * float coverage = hb_gpu_draw (v_texcoord, v_glyphLoc); |
| * fragColor = vec4 (0.0, 0.0, 0.0, coverage); |
| * } |
| * ]| |
| * |
| * The coverage value can be used with alpha blending |
| * (GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA) to composite |
| * over a background, or multiplied with any color. |
| * |
| * ## Stem darkening |
| * |
| * |[<!-- language="plain" --> |
| * float hb_gpu_stem_darken (float coverage, float brightness, float ppem); |
| * ]| |
| * |
| * Optional stem darkening adjusts coverage at small sizes so |
| * thin stems are not washed out by gamma correction. |
| * |
| * Parameters: |
| * |
| * - coverage: a coverage / alpha value in [0, 1]. For |
| * hb_gpu_draw() this is the returned coverage; for |
| * hb_gpu_paint() it is the alpha of the returned vec4. |
| * |
| * - brightness: brightness of the color that will end up on |
| * screen for this fragment, in [0, 1], computed as |
| * `dot (straight_color.rgb, vec3 (1.0 / 3.0))`. |
| * |
| * - ppem: pixels per em at this fragment, computed as |
| * `1.0 / max (fwidth (v_texcoord).x, fwidth (v_texcoord).y)`. |
| * |
| * Example — draw path: |
| * |
| * |[<!-- language="plain" --> |
| * float coverage = hb_gpu_draw (v_texcoord, v_glyphLoc); |
| * float brightness = dot (u_foreground.rgb, vec3 (1.0 / 3.0)); |
| * float ppem = 1.0 / max (fwidth (v_texcoord).x, |
| * fwidth (v_texcoord).y); |
| * coverage = hb_gpu_stem_darken (coverage, brightness, ppem); |
| * ]| |
| * |
| * Example — paint path: hb_gpu_paint() returns premultiplied |
| * RGBA, and each paint layer has its own color, so compute |
| * brightness from the fragment's own straight (un-premultiplied) |
| * color rather than any shader uniform. This gives the right |
| * per-layer darkening on a multi-color glyph, and reduces to the |
| * draw-path behaviour on a monochrome glyph (whose single |
| * synthesized layer's color is the foreground uniform). |
| * |
| * |[<!-- language="plain" --> |
| * vec4 c = hb_gpu_paint (v_texcoord, v_glyphLoc, u_foreground); |
| * if (c.a > 0.0) { |
| * float brightness = dot (c.rgb, vec3 (1.0 / 3.0)) / c.a; |
| * float ppem = 1.0 / max (fwidth (v_texcoord).x, |
| * fwidth (v_texcoord).y); |
| * float darkened = hb_gpu_stem_darken (c.a, brightness, ppem); |
| * c *= darkened / c.a; // keep premultiplied RGB and A in sync |
| * } |
| * ]| |
| * |
| * # Paint |
| * |
| * The paint renderer walks the font's paint tree (COLRv0 layers |
| * or COLRv1 paint subtree) and records one layer op per solid or |
| * gradient fill, each with its own clip outline encoded as a Slug |
| * (so the paint renderer reuses the draw renderer internally for |
| * clips). Palette colors and custom overrides are resolved and |
| * baked into the blob at encode time, so the shader does not need |
| * a separate palette uniform. |
| * |
| * Monochrome (non-color) glyphs are handled transparently: |
| * hb_gpu_paint_glyph() synthesizes a single foreground-colored |
| * layer from the glyph's outline, so callers can use the paint |
| * renderer uniformly for any font. This costs more per fragment |
| * than the Draw path, but the rendered result is the same. |
| * |
| * ## Encoding |
| * |
| * |[<!-- language="plain" --> |
| * hb_gpu_paint_t *paint = hb_gpu_paint_create_or_fail (); |
| * |
| * // Optional: select a palette and install any custom-palette |
| * // overrides before hb_gpu_paint_glyph — their values are baked |
| * // into the encoded blob. (The foreground color is NOT set here: |
| * // it stays a shader uniform so dark-mode and color-change |
| * // toggles take effect without re-encoding.) |
| * hb_gpu_paint_set_palette (paint, 0); |
| * hb_gpu_paint_set_custom_palette_color (paint, 2, |
| * HB_COLOR (0xff, 0, 0, 0xff)); |
| * |
| * hb_gpu_paint_glyph (paint, font, gid); |
| * hb_glyph_extents_t ext; |
| * hb_blob_t *blob = hb_gpu_paint_encode (paint, &ext); |
| * |
| * // Upload hb_blob_get_data(blob) to the same atlas used by draw. |
| * unsigned atlas_offset = upload_to_atlas (blob); |
| * |
| * hb_gpu_paint_recycle_blob (paint, blob); |
| * // repeat for more glyphs... |
| * |
| * hb_gpu_paint_destroy (paint); |
| * ]| |
| * |
| * As with the draw encoder, create the paint encoder once and |
| * reuse it. hb_gpu_paint_encode() auto-clears on return; user |
| * configuration (palette, custom-palette overrides) is preserved |
| * across encodes. Because colors are baked, changing any of that |
| * configuration afterwards requires re-encoding the affected |
| * glyphs. The encoded blob's texel format is the same RGBA16I |
| * as the draw renderer's, so both kinds of blobs can coexist in |
| * one atlas at different offsets. |
| * |
| * ## Shader composition |
| * |
| * Same pattern as the draw renderer but using |
| * hb_gpu_paint_shader_source() for the paint-specific helpers. |
| * The vertex stage is identical — hb_gpu_paint_shader_source() |
| * returns an empty string for HB_GPU_SHADER_STAGE_VERTEX, so the |
| * fixed assembly order still works: |
| * |
| * |[<!-- language="plain" --> |
| * const char *frag_sources[] = { |
| * "#version 330\n", |
| * hb_gpu_shader_source (HB_GPU_SHADER_STAGE_FRAGMENT, |
| * HB_GPU_SHADER_LANG_GLSL), |
| * hb_gpu_paint_shader_source (HB_GPU_SHADER_STAGE_FRAGMENT, |
| * HB_GPU_SHADER_LANG_GLSL), |
| * your_fragment_main |
| * }; |
| * glShaderSource (frag_shader, 4, frag_sources, NULL); |
| * ]| |
| * |
| * The atlas setup, vertex attributes, dilation, and font scaling |
| * guidance from the Draw section all apply unchanged. |
| * |
| * ## Fragment shader |
| * |
| * The paint library provides one function: |
| * |
| * |[<!-- language="plain" --> |
| * vec4 hb_gpu_paint (vec2 renderCoord, uint glyphLoc, vec4 foreground); |
| * ]| |
| * |
| * Parameters: |
| * |
| * - renderCoord: the interpolated em-space coordinate from the |
| * vertex shader (v_texcoord), as for hb_gpu_draw(). |
| * |
| * - glyphLoc: the texel offset of this glyph's encoded paint blob |
| * in the atlas, passed as a flat varying from the vertex shader. |
| * |
| * - foreground: the foreground color used to resolve palette |
| * entries that COLRv1 marks as "foreground color". The encoder |
| * only records the is-foreground flag per layer/stop; the actual |
| * color is substituted here at render time, so runtime |
| * foreground-color changes (e.g. a dark-mode toggle) take effect |
| * without re-encoding any glyphs. |
| * |
| * Returns premultiplied RGBA: fully transparent outside the glyph, |
| * composited layers inside. Use (GL_ONE, GL_ONE_MINUS_SRC_ALPHA) |
| * blending to composite over a background. |
| * |
| * A typical fragment main: |
| * |
| * |[<!-- language="plain" --> |
| * uniform vec4 u_foreground; |
| * in vec2 v_texcoord; |
| * flat in uint v_glyphLoc; |
| * out vec4 fragColor; |
| * |
| * void main () { |
| * fragColor = hb_gpu_paint (v_texcoord, v_glyphLoc, u_foreground); |
| * } |
| * ]| |
| * |
| * Stem darkening applies to the paint output the same way it does |
| * for draw; the alpha channel of the returned vec4 carries the |
| * coverage. See the Draw section's Stem darkening notes. |
| **/ |
| |
| |
| #include "hb-gpu-fragment-glsl.hh" |
| #include "hb-gpu-fragment-msl.hh" |
| #include "hb-gpu-fragment-wgsl.hh" |
| #include "hb-gpu-fragment-hlsl.hh" |
| #include "hb-gpu-vertex-glsl.hh" |
| #include "hb-gpu-vertex-msl.hh" |
| #include "hb-gpu-vertex-wgsl.hh" |
| #include "hb-gpu-vertex-hlsl.hh" |
| |
| /** |
| * hb_gpu_shader_source: |
| * @stage: pipeline stage (vertex or fragment) |
| * @lang: shader language variant |
| * |
| * Returns the shared helper shader source used by the hb-gpu |
| * renderers for the specified stage and language. The returned |
| * string is static and must not be freed. |
| * |
| * The shared source defines utilities such as the atlas sampler |
| * and the `hb_gpu_fetch()` accessor for the fragment stage, and |
| * the `hb_gpu_dilate()` helper for the vertex stage. Each |
| * renderer-specific source (for example, |
| * hb_gpu_draw_shader_source()) assumes these helpers are already |
| * in scope — callers should concatenate a `#version` directive, |
| * then this shared source, then the renderer-specific source, |
| * then their own `main()`. |
| * |
| * Return value: (transfer none): |
| * A shader source string, or `NULL` if @stage or @lang is |
| * unsupported. |
| * |
| * XSince: REPLACEME |
| **/ |
| const char * |
| hb_gpu_shader_source (hb_gpu_shader_stage_t stage, |
| hb_gpu_shader_lang_t lang) |
| { |
| switch (stage) { |
| case HB_GPU_SHADER_STAGE_FRAGMENT: |
| switch (lang) { |
| case HB_GPU_SHADER_LANG_GLSL: return hb_gpu_fragment_glsl; |
| case HB_GPU_SHADER_LANG_MSL: return hb_gpu_fragment_msl; |
| case HB_GPU_SHADER_LANG_WGSL: return hb_gpu_fragment_wgsl; |
| case HB_GPU_SHADER_LANG_HLSL: return hb_gpu_fragment_hlsl; |
| default: return nullptr; |
| } |
| case HB_GPU_SHADER_STAGE_VERTEX: |
| switch (lang) { |
| case HB_GPU_SHADER_LANG_GLSL: return hb_gpu_vertex_glsl; |
| case HB_GPU_SHADER_LANG_MSL: return hb_gpu_vertex_msl; |
| case HB_GPU_SHADER_LANG_WGSL: return hb_gpu_vertex_wgsl; |
| case HB_GPU_SHADER_LANG_HLSL: return hb_gpu_vertex_hlsl; |
| default: return nullptr; |
| } |
| default: |
| return nullptr; |
| } |
| } |
| |
| |
| /* ---- Internal blob-recycling helpers (shared by draw and paint) ---- */ |
| |
| void |
| _hb_gpu_blob_data_destroy (void *user_data) |
| { |
| auto *bd = (hb_gpu_blob_data_t *) user_data; |
| hb_free (bd->buf); |
| hb_free (bd); |
| } |
| |
| char * |
| _hb_gpu_blob_acquire (hb_blob_t *recycled, |
| unsigned needed, |
| unsigned *out_capacity, |
| char **out_replaced_buf) |
| { |
| *out_replaced_buf = nullptr; |
| |
| if (recycled && recycled->destroy == _hb_gpu_blob_data_destroy) |
| { |
| auto *bd = (hb_gpu_blob_data_t *) recycled->user_data; |
| if (bd->capacity >= needed) |
| { |
| *out_capacity = bd->capacity; |
| return bd->buf; |
| } |
| /* Grow with a 1.5x ramp to amortize repeated growth. */ |
| unsigned alloc_bytes = needed; |
| if (unlikely (hb_unsigned_add_overflows (needed, needed / 2, |
| &alloc_bytes))) |
| alloc_bytes = needed; |
| char *new_buf = (char *) hb_realloc (bd->buf, alloc_bytes); |
| if (new_buf) |
| { |
| bd->buf = new_buf; |
| bd->capacity = alloc_bytes; |
| *out_capacity = alloc_bytes; |
| return new_buf; |
| } |
| /* Realloc failed. Fall through to a fresh hb_malloc and stash |
| * the old buf for the caller to free after _finalize. */ |
| *out_replaced_buf = bd->buf; |
| } |
| |
| char *buf = (char *) hb_malloc (needed); |
| if (unlikely (!buf)) |
| return nullptr; |
| *out_capacity = needed; |
| return buf; |
| } |
| |
| hb_blob_t * |
| _hb_gpu_blob_finalize (char *buf, |
| unsigned capacity, |
| unsigned length, |
| hb_blob_t *recycled, |
| char *replaced_recycled_buf) |
| { |
| if (recycled && recycled->destroy == _hb_gpu_blob_data_destroy) |
| { |
| auto *bd = (hb_gpu_blob_data_t *) recycled->user_data; |
| if (replaced_recycled_buf && replaced_recycled_buf != buf) |
| hb_free (replaced_recycled_buf); |
| bd->buf = buf; |
| bd->capacity = capacity; |
| recycled->data = (const char *) buf; |
| recycled->length = length; |
| return recycled; |
| } |
| |
| /* No recycled blob to update -- create a fresh one with our |
| * destroy closure so the next recycle round can reuse it. */ |
| hb_gpu_blob_data_t *bd = (hb_gpu_blob_data_t *) hb_malloc (sizeof (*bd)); |
| if (unlikely (!bd)) |
| { |
| hb_free (buf); |
| return nullptr; |
| } |
| bd->buf = buf; |
| bd->capacity = capacity; |
| |
| return hb_blob_create ((const char *) buf, length, |
| HB_MEMORY_MODE_WRITABLE, |
| bd, _hb_gpu_blob_data_destroy); |
| } |
| |
| void |
| _hb_gpu_blob_abort (char *buf, hb_blob_t *recycled) |
| { |
| if (!buf) return; |
| if (recycled && recycled->destroy == _hb_gpu_blob_data_destroy) |
| { |
| auto *bd = (hb_gpu_blob_data_t *) recycled->user_data; |
| if (buf == bd->buf) return; /* owned by the recycled blob */ |
| } |
| hb_free (buf); |
| } |
| |
| void |
| _hb_gpu_blob_recycle (hb_blob_t **slot, hb_blob_t *blob) |
| { |
| hb_blob_destroy (*slot); |
| *slot = nullptr; |
| if (!blob || blob == hb_blob_get_empty ()) |
| return; |
| *slot = blob; |
| } |