| static const char *hb_gpu_fragment_msl = |
| "/*\n" |
| " * Copyright (C) 2026 Behdad Esfahbod\n" |
| " *\n" |
| " * This is part of HarfBuzz, a text shaping library.\n" |
| " *\n" |
| " * Permission is hereby granted, without written agreement and without\n" |
| " * license or royalty fees, to use, copy, modify, and distribute this\n" |
| " * software and its documentation for any purpose, provided that the\n" |
| " * above copyright notice and the following two paragraphs appear in\n" |
| " * all copies of this software.\n" |
| " *\n" |
| " * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR\n" |
| " * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES\n" |
| " * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN\n" |
| " * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH\n" |
| " * DAMAGE.\n" |
| " *\n" |
| " * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,\n" |
| " * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND\n" |
| " * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS\n" |
| " * ON AN \"AS IS\" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO\n" |
| " * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.\n" |
| " */\n" |
| "\n" |
| "\n" |
| "/* Shared fragment-shader helpers for the hb-gpu renderers.\n" |
| " *\n" |
| " * Requires Metal Shading Language 2.0.\n" |
| " */\n" |
| "\n" |
| "\n" |
| "#ifndef HB_GPU_UNITS_PER_EM\n" |
| "#define HB_GPU_UNITS_PER_EM 4\n" |
| "#endif\n" |
| "\n" |
| "#define HB_GPU_INV_UNITS float(1.0 / float(HB_GPU_UNITS_PER_EM))\n" |
| "\n" |
| "\n" |
| "int4 hb_gpu_fetch (device const short4* hb_gpu_atlas, int offset)\n" |
| "{\n" |
| " return int4 (hb_gpu_atlas[offset]);\n" |
| "}\n" |
| "\n" |
| "uint _hb_gpu_calc_root_code (float y1, float y2, float y3)\n" |
| "{\n" |
| " uint i1 = as_type<uint> (y1) >> 31U;\n" |
| " uint i2 = as_type<uint> (y2) >> 30U;\n" |
| " uint i3 = as_type<uint> (y3) >> 29U;\n" |
| "\n" |
| " uint shift = (i2 & 2U) | (i1 & ~2U);\n" |
| " shift = (i3 & 4U) | (shift & ~4U);\n" |
| "\n" |
| " return (0x2E74U >> shift) & 0x0101U;\n" |
| "}\n" |
| "\n" |
| "float2 _hb_gpu_solve_horiz_poly (float2 a, float2 b, float2 p1)\n" |
| "{\n" |
| " float ra = 1.0 / a.y;\n" |
| " float rb = 0.5 / b.y;\n" |
| "\n" |
| " float d = sqrt (max (b.y * b.y - a.y * p1.y, 0.0));\n" |
| " float t1 = (b.y - d) * ra;\n" |
| " float t2 = (b.y + d) * ra;\n" |
| "\n" |
| " if (a.y == 0.0)\n" |
| " t1 = t2 = p1.y * rb;\n" |
| "\n" |
| " return float2 ((a.x * t1 - b.x * 2.0) * t1 + p1.x,\n" |
| " (a.x * t2 - b.x * 2.0) * t2 + p1.x);\n" |
| "}\n" |
| "\n" |
| "float2 _hb_gpu_solve_vert_poly (float2 a, float2 b, float2 p1)\n" |
| "{\n" |
| " float ra = 1.0 / a.x;\n" |
| " float rb = 0.5 / b.x;\n" |
| "\n" |
| " float d = sqrt (max (b.x * b.x - a.x * p1.x, 0.0));\n" |
| " float t1 = (b.x - d) * ra;\n" |
| " float t2 = (b.x + d) * ra;\n" |
| "\n" |
| " if (a.x == 0.0)\n" |
| " t1 = t2 = p1.x * rb;\n" |
| "\n" |
| " return float2 ((a.y * t1 - b.y * 2.0) * t1 + p1.y,\n" |
| " (a.y * t2 - b.y * 2.0) * t2 + p1.y);\n" |
| "}\n" |
| "\n" |
| "float _hb_gpu_calc_coverage (float xcov, float ycov, float xwgt, float ywgt)\n" |
| "{\n" |
| " float coverage = max (abs (xcov * xwgt + ycov * ywgt) /\n" |
| " max (xwgt + ywgt, 1.0 / 65536.0),\n" |
| " min (abs (xcov), abs (ycov)));\n" |
| "\n" |
| " return clamp (coverage, 0.0, 1.0);\n" |
| "}\n" |
| "\n" |
| "/* Decoded glyph band info for a pixel position. */\n" |
| "struct _hb_gpu_glyph_info\n" |
| "{\n" |
| " int glyphLoc;\n" |
| " int bandBase;\n" |
| " int2 bandIndex;\n" |
| " int numHBands;\n" |
| " int numVBands;\n" |
| " float2 scale;\n" |
| "};\n" |
| "\n" |
| "_hb_gpu_glyph_info _hb_gpu_decode_glyph (float2 renderCoord, uint glyphLoc_,\n" |
| " device const short4* hb_gpu_atlas)\n" |
| "{\n" |
| " _hb_gpu_glyph_info gi;\n" |
| " gi.glyphLoc = int (glyphLoc_);\n" |
| "\n" |
| " int4 header0 = hb_gpu_fetch (hb_gpu_atlas, gi.glyphLoc);\n" |
| " int4 header1 = hb_gpu_fetch (hb_gpu_atlas, gi.glyphLoc + 1);\n" |
| " float4 ext = float4 (header0) * HB_GPU_INV_UNITS;\n" |
| " gi.numHBands = header1.r;\n" |
| " gi.numVBands = header1.g;\n" |
| " gi.scale = float2 (float (header1.b), float (header1.a));\n" |
| "\n" |
| " float2 extSize = ext.zw - ext.xy;\n" |
| " float2 bandScale = float2 (float (gi.numVBands), float (gi.numHBands)) / max (extSize, float2 (1.0 / 65536.0));\n" |
| " float2 bandOffset = -ext.xy * bandScale;\n" |
| "\n" |
| " gi.bandIndex = clamp (int2 (renderCoord * bandScale + bandOffset),\n" |
| " int2 (0, 0),\n" |
| " int2 (gi.numVBands - 1, gi.numHBands - 1));\n" |
| "\n" |
| " gi.bandBase = gi.glyphLoc + 2;\n" |
| " return gi;\n" |
| "}\n" |
| "\n" |
| "/* Return pixels per em at this fragment.\n" |
| " *\n" |
| " * renderCoord: em-space sample position\n" |
| " * glyphLoc: texel offset of glyph blob in atlas\n" |
| " */\n" |
| "float hb_gpu_ppem (float2 renderCoord, uint glyphLoc_,\n" |
| " device const short4* hb_gpu_atlas)\n" |
| "{\n" |
| " _hb_gpu_glyph_info gi = _hb_gpu_decode_glyph (renderCoord, glyphLoc_, hb_gpu_atlas);\n" |
| " float2 emsPerPixel = fwidth (renderCoord);\n" |
| " return min (gi.scale.x, gi.scale.y) /\n" |
| " max (emsPerPixel.x, emsPerPixel.y);\n" |
| "}\n" |
| "\n" |
| "/* Return per-pixel curve counts: (horizontal, vertical). */\n" |
| "int2 _hb_gpu_curve_counts (float2 renderCoord, uint glyphLoc_,\n" |
| " device const short4* hb_gpu_atlas)\n" |
| "{\n" |
| " _hb_gpu_glyph_info gi = _hb_gpu_decode_glyph (renderCoord, glyphLoc_, hb_gpu_atlas);\n" |
| " int hCount = hb_gpu_fetch (hb_gpu_atlas, gi.bandBase + gi.bandIndex.y).r;\n" |
| " int vCount = hb_gpu_fetch (hb_gpu_atlas, gi.bandBase + gi.numHBands + gi.bandIndex.x).r;\n" |
| " return int2 (hCount, vCount);\n" |
| "}\n" |
| "\n" |
| "/* Single-sample coverage in [0, 1]. */\n" |
| "float _hb_gpu_slug_single (float2 renderCoord, float2 pixelsPerEm, uint glyphLoc_,\n" |
| " device const short4* hb_gpu_atlas)\n" |
| "{\n" |
| "\n" |
| " _hb_gpu_glyph_info gi = _hb_gpu_decode_glyph (renderCoord, glyphLoc_, hb_gpu_atlas);\n" |
| " int glyphLoc = gi.glyphLoc;\n" |
| " int bandBase = gi.bandBase;\n" |
| " int numHBands = gi.numHBands;\n" |
| "\n" |
| " float xcov = 0.0;\n" |
| " float xwgt = 0.0;\n" |
| "\n" |
| " int4 hbandData = hb_gpu_fetch (hb_gpu_atlas, bandBase + gi.bandIndex.y);\n" |
| " int hCurveCount = hbandData.r;\n" |
| " /* Symmetric: choose rightward (desc) or leftward (asc) sort */\n" |
| " float hSplit = float (hbandData.a) * HB_GPU_INV_UNITS;\n" |
| " bool hLeftRay = (renderCoord.x < hSplit);\n" |
| " int hDataOffset = (hLeftRay ? hbandData.b : hbandData.g) + 32768;\n" |
| "\n" |
| " for (int ci = 0; ci < hCurveCount; ci++)\n" |
| " {\n" |
| " int curveOffset = hb_gpu_fetch (hb_gpu_atlas, glyphLoc + hDataOffset + ci).r + 32768;\n" |
| "\n" |
| " int4 raw12 = hb_gpu_fetch (hb_gpu_atlas, glyphLoc + curveOffset);\n" |
| " int4 raw3 = hb_gpu_fetch (hb_gpu_atlas, glyphLoc + curveOffset + 1);\n" |
| "\n" |
| " float4 q12 = float4 (raw12) * HB_GPU_INV_UNITS;\n" |
| " float2 q3 = float2 (raw3.rg) * HB_GPU_INV_UNITS;\n" |
| "\n" |
| " float4 p12 = q12 - float4 (renderCoord, renderCoord);\n" |
| " float2 p3 = q3 - renderCoord;\n" |
| "\n" |
| " if (hLeftRay) {\n" |
| " if (min (min (p12.x, p12.z), p3.x) * pixelsPerEm.x > 0.5) break;\n" |
| " } else {\n" |
| " if (max (max (p12.x, p12.z), p3.x) * pixelsPerEm.x < -0.5) break;\n" |
| " }\n" |
| "\n" |
| " uint code = _hb_gpu_calc_root_code (p12.y, p12.w, p3.y);\n" |
| " if (code != 0U)\n" |
| " {\n" |
| " float2 a = q12.xy - q12.zw * 2.0 + q3;\n" |
| " float2 b = q12.xy - q12.zw;\n" |
| " float2 r = _hb_gpu_solve_horiz_poly (a, b, p12.xy) * pixelsPerEm.x;\n" |
| " /* For leftward ray: saturate(0.5 - r) counts coverage from the left */\n" |
| " float2 cov = hLeftRay ? clamp (float2 (0.5) - r, 0.0, 1.0)\n" |
| " : clamp (r + float2 (0.5), 0.0, 1.0);\n" |
| "\n" |
| " if ((code & 1U) != 0U)\n" |
| " {\n" |
| " xcov += cov.x;\n" |
| " xwgt = max (xwgt, clamp (1.0 - abs (r.x) * 2.0, 0.0, 1.0));\n" |
| " }\n" |
| "\n" |
| " if (code > 1U)\n" |
| " {\n" |
| " xcov -= cov.y;\n" |
| " xwgt = max (xwgt, clamp (1.0 - abs (r.y) * 2.0, 0.0, 1.0));\n" |
| " }\n" |
| " }\n" |
| " }\n" |
| "\n" |
| " float ycov = 0.0;\n" |
| " float ywgt = 0.0;\n" |
| "\n" |
| " int4 vbandData = hb_gpu_fetch (hb_gpu_atlas, bandBase + numHBands + gi.bandIndex.x);\n" |
| " int vCurveCount = vbandData.r;\n" |
| " float vSplit = float (vbandData.a) * HB_GPU_INV_UNITS;\n" |
| " bool vLeftRay = (renderCoord.y < vSplit);\n" |
| " int vDataOffset = (vLeftRay ? vbandData.b : vbandData.g) + 32768;\n" |
| "\n" |
| " for (int ci = 0; ci < vCurveCount; ci++)\n" |
| " {\n" |
| " int curveOffset = hb_gpu_fetch (hb_gpu_atlas, glyphLoc + vDataOffset + ci).r + 32768;\n" |
| "\n" |
| " int4 raw12 = hb_gpu_fetch (hb_gpu_atlas, glyphLoc + curveOffset);\n" |
| " int4 raw3 = hb_gpu_fetch (hb_gpu_atlas, glyphLoc + curveOffset + 1);\n" |
| "\n" |
| " float4 q12 = float4 (raw12) * HB_GPU_INV_UNITS;\n" |
| " float2 q3 = float2 (raw3.rg) * HB_GPU_INV_UNITS;\n" |
| "\n" |
| " float4 p12 = q12 - float4 (renderCoord, renderCoord);\n" |
| " float2 p3 = q3 - renderCoord;\n" |
| "\n" |
| " if (vLeftRay) {\n" |
| " if (min (min (p12.y, p12.w), p3.y) * pixelsPerEm.y > 0.5) break;\n" |
| " } else {\n" |
| " if (max (max (p12.y, p12.w), p3.y) * pixelsPerEm.y < -0.5) break;\n" |
| " }\n" |
| "\n" |
| " uint code = _hb_gpu_calc_root_code (p12.x, p12.z, p3.x);\n" |
| " if (code != 0U)\n" |
| " {\n" |
| " float2 a = q12.xy - q12.zw * 2.0 + q3;\n" |
| " float2 b = q12.xy - q12.zw;\n" |
| " float2 r = _hb_gpu_solve_vert_poly (a, b, p12.xy) * pixelsPerEm.y;\n" |
| " float2 cov = vLeftRay ? clamp (float2 (0.5) - r, 0.0, 1.0)\n" |
| " : clamp (r + float2 (0.5), 0.0, 1.0);\n" |
| "\n" |
| " if ((code & 1U) != 0U)\n" |
| " {\n" |
| " ycov -= cov.x;\n" |
| " ywgt = max (ywgt, clamp (1.0 - abs (r.x) * 2.0, 0.0, 1.0));\n" |
| " }\n" |
| "\n" |
| " if (code > 1U)\n" |
| " {\n" |
| " ycov += cov.y;\n" |
| " ywgt = max (ywgt, clamp (1.0 - abs (r.y) * 2.0, 0.0, 1.0));\n" |
| " }\n" |
| " }\n" |
| " }\n" |
| "\n" |
| " return _hb_gpu_calc_coverage (xcov, ycov, xwgt, ywgt);\n" |
| "}\n" |
| "\n" |
| "/* Return coverage in [0, 1].\n" |
| " *\n" |
| " * renderCoord: em-space sample position\n" |
| " * glyphLoc: texel offset of glyph blob in atlas\n" |
| " * hb_gpu_atlas: device pointer to the atlas buffer\n" |
| " */\n" |
| "/* The MSAA-aware implementation. Caller supplies pixelsPerEm so\n" |
| " * this function can be invoked from non-uniform control flow (for\n" |
| " * example from a paint op-stream branch where a recomputed fwidth\n" |
| " * would be rejected by strict derivative-uniformity rules). */\n" |
| "float _hb_gpu_slug (float2 renderCoord, float2 pixelsPerEm, uint glyphLoc_,\n" |
| " device const short4* hb_gpu_atlas)\n" |
| "{\n" |
| " float c = _hb_gpu_slug_single (renderCoord, pixelsPerEm, glyphLoc_, hb_gpu_atlas);\n" |
| "\n" |
| "#ifndef HB_GPU_NO_MSAA\n" |
| " float ppem = hb_gpu_ppem (renderCoord, glyphLoc_, hb_gpu_atlas);\n" |
| "\n" |
| " if (ppem < 16.0)\n" |
| " {\n" |
| " float2 emsPerPixel = 1.0 / pixelsPerEm;\n" |
| " float2 d = emsPerPixel * (1.0 / 3.0);\n" |
| " float msaa = 0.25 *\n" |
| " (_hb_gpu_slug_single (renderCoord + float2 (-d.x, -d.y), pixelsPerEm, glyphLoc_, hb_gpu_atlas) +\n" |
| " _hb_gpu_slug_single (renderCoord + float2 ( d.x, -d.y), pixelsPerEm, glyphLoc_, hb_gpu_atlas) +\n" |
| " _hb_gpu_slug_single (renderCoord + float2 (-d.x, d.y), pixelsPerEm, glyphLoc_, hb_gpu_atlas) +\n" |
| " _hb_gpu_slug_single (renderCoord + float2 ( d.x, d.y), pixelsPerEm, glyphLoc_, hb_gpu_atlas));\n" |
| "\n" |
| " c = mix (c, msaa, smoothstep (16.0, 8.0, ppem));\n" |
| " }\n" |
| "#endif\n" |
| "\n" |
| " return c;\n" |
| "}\n" |
| "\n" |
| "/* Stem darkening for small sizes.\n" |
| " *\n" |
| " * coverage: output of hb_gpu_draw\n" |
| " * brightness: foreground brightness in [0, 1]\n" |
| " * ppem: pixels per em at this fragment\n" |
| " */\n" |
| "float hb_gpu_stem_darken (float coverage, float brightness, float ppem)\n" |
| "{\n" |
| " return pow (coverage,\n" |
| " mix (pow (2.0, brightness - 0.5), 1.0,\n" |
| " smoothstep (8.0, 48.0, ppem)));\n" |
| "}\n" |
| ; |