blob: c5ee83ec534a97d5bf82b907d07cd15720863c6c [file] [log] [blame] [edit]
#!/usr/bin/env python3
import hashlib
import math
import os
import shlex
import struct
import sys
HB_FUZZING_EXTENDED_MAGIC = b"HBSUBFZ2"
HB_SUBSET_SETS_GLYPH_INDEX = 0
HB_SUBSET_SETS_UNICODE = 1
HB_SUBSET_SETS_DROP_TABLE_TAG = 3
HB_SUBSET_SETS_NAME_ID = 4
HB_SUBSET_SETS_NAME_LANG_ID = 5
HB_SUBSET_SETS_LAYOUT_FEATURE_TAG = 6
HB_SUBSET_SETS_LAYOUT_SCRIPT_TAG = 7
HB_FUZZING_OP_SET_FLAGS = 1
HB_FUZZING_OP_KEEP_EVERYTHING = 2
HB_FUZZING_OP_SET_CLEAR = 3
HB_FUZZING_OP_SET_INVERT = 4
HB_FUZZING_OP_SET_ADD_RANGES = 5
HB_FUZZING_OP_SET_DEL_RANGES = 6
HB_FUZZING_OP_TEXT_ADD = 7
HB_FUZZING_OP_TEXT_DEL = 8
HB_FUZZING_OP_AXIS_PIN_ALL_TO_DEFAULT = 9
HB_FUZZING_OP_AXIS_SET = 10
HB_FUZZING_AXIS_PIN_TO_DEFAULT = 0
HB_FUZZING_AXIS_SET_RANGE = 1
FLAG_BITS = {
"--no-hinting": 0x00000001,
"--retain-gids": 0x00000002,
"--desubroutinize": 0x00000004,
"--name-legacy": 0x00000008,
"--set-overlaps-flag": 0x00000010,
"--passthrough-tables": 0x00000020,
"--notdef-outline": 0x00000040,
"--glyph-names": 0x00000080,
"--no-prune-unicode-ranges": 0x00000100,
"--no-layout-closure": 0x00000200,
"--optimize": 0x00000400,
"--no-bidi-closure": 0x00000800,
"--iftb-requirements": 0x00001000,
"--retain-num-glyphs": 0x00002000,
"--downgrade-cff2": 0x00004000,
}
class UserError(Exception):
pass
def fail(msg: str) -> None:
raise UserError(msg)
def pack_u32(value: int) -> bytes:
return struct.pack("<I", value & 0xFFFFFFFF)
def pack_f32(value: float) -> bytes:
return struct.pack("<f", value)
def emit_set_flags(ops: bytearray, flags: int) -> None:
ops.append(HB_FUZZING_OP_SET_FLAGS)
ops += pack_u32(flags)
def emit_keep_everything(ops: bytearray) -> None:
ops.append(HB_FUZZING_OP_KEEP_EVERYTHING)
def emit_set_clear(ops: bytearray, set_type: int) -> None:
ops += bytes((HB_FUZZING_OP_SET_CLEAR, set_type))
def emit_set_invert(ops: bytearray, set_type: int) -> None:
ops += bytes((HB_FUZZING_OP_SET_INVERT, set_type))
def emit_set_ranges(ops: bytearray, opcode: int, set_type: int, ranges: list[tuple[int, int]]) -> None:
ops += bytes((opcode, set_type))
ops += pack_u32(len(ranges))
for start, end in ranges:
ops += pack_u32(start)
ops += pack_u32(end)
def emit_text(ops: bytearray, opcode: int, codepoints: list[int]) -> None:
ops.append(opcode)
ops += pack_u32(len(codepoints))
for cp in codepoints:
ops += pack_u32(cp)
def emit_pin_all_axes_to_default(ops: bytearray) -> None:
ops.append(HB_FUZZING_OP_AXIS_PIN_ALL_TO_DEFAULT)
def emit_axis_records(ops: bytearray, records: list[tuple[int, int, float, float, float]]) -> None:
ops.append(HB_FUZZING_OP_AXIS_SET)
ops += pack_u32(len(records))
for tag, mode, minimum, middle, maximum in records:
ops += pack_u32(tag)
ops.append(mode)
ops += pack_f32(minimum)
ops += pack_f32(middle)
ops += pack_f32(maximum)
def parse_decimal_ranges(value: str) -> list[tuple[int, int]]:
ranges: list[tuple[int, int]] = []
for chunk in value.replace(" ", ",").split(","):
if not chunk:
continue
if chunk == "*":
fail("Internal error: wildcard range should be handled before parsing.")
if "-" in chunk:
start_text, end_text = chunk.split("-", 1)
try:
start = int(start_text, 10)
end = int(end_text, 10)
except ValueError:
fail(f"Failed parsing decimal range '{chunk}'.")
else:
try:
start = end = int(chunk, 10)
except ValueError:
fail(f"Failed parsing decimal value '{chunk}'.")
if start < 0 or end < start:
fail(f"Invalid decimal range '{chunk}'.")
ranges.append((start, end))
return ranges
def parse_hex_ranges(value: str) -> list[tuple[int, int]]:
ranges: list[tuple[int, int]] = []
for chunk in value.replace(" ", ",").split(","):
if not chunk:
continue
if chunk == "*":
fail("Internal error: wildcard range should be handled before parsing.")
if "-" in chunk:
start_text, end_text = chunk.split("-", 1)
try:
start = int(start_text, 16)
end = int(end_text, 16)
except ValueError:
fail(f"Failed parsing unicode range '{chunk}'.")
else:
try:
start = end = int(chunk, 16)
except ValueError:
fail(f"Failed parsing unicode value '{chunk}'.")
if start < 0 or end < start or end > 0x10FFFF:
fail(f"Invalid unicode range '{chunk}'.")
ranges.append((start, end))
return ranges
def parse_tags(value: str) -> list[tuple[int, int]]:
tags: list[tuple[int, int]] = []
for chunk in value.replace(",", " ").split():
if chunk == "*":
fail("Internal error: wildcard tag set should be handled before parsing.")
if len(chunk) > 4:
fail(f"Tag '{chunk}' is longer than 4 bytes.")
try:
tag_value = int.from_bytes(chunk.ljust(4, " ").encode("ascii"), byteorder="big")
except UnicodeEncodeError:
fail(f"Tag '{chunk}' is not ASCII.")
tags.append((tag_value, tag_value))
return tags
def parse_text_codepoints(value: str) -> list[int]:
codepoints = [ord(ch) for ch in value]
for cp in codepoints:
if cp < 0 or cp > 0x10FFFF:
fail(f"Invalid codepoint value {cp}.")
return codepoints
def parse_tag_value(tag_text: str, kind: str) -> int:
if len(tag_text) > 4:
fail(f"{kind} '{tag_text}' is longer than 4 bytes.")
try:
return int.from_bytes(tag_text.ljust(4, " ").encode("ascii"), byteorder="big")
except UnicodeEncodeError:
fail(f"{kind} '{tag_text}' is not ASCII.")
def parse_axis_number(text: str) -> float:
try:
return float(text)
except ValueError:
fail(f"Failed parsing axis value '{text}'.")
def parse_axis_range(value: str) -> tuple[int, float, float, float]:
if value == "drop":
return HB_FUZZING_AXIS_PIN_TO_DEFAULT, 0.0, 0.0, 0.0
parts = value.split(":")
if len(parts) == 1:
v = parse_axis_number(parts[0])
return HB_FUZZING_AXIS_SET_RANGE, v, v, v
if len(parts) == 2:
minimum = math.nan if parts[0] == "" else parse_axis_number(parts[0])
maximum = math.nan if parts[1] == "" else parse_axis_number(parts[1])
return HB_FUZZING_AXIS_SET_RANGE, minimum, math.nan, maximum
if len(parts) == 3:
minimum = math.nan if parts[0] == "" else parse_axis_number(parts[0])
middle = math.nan if parts[1] == "" else parse_axis_number(parts[1])
maximum = math.nan if parts[2] == "" else parse_axis_number(parts[2])
return HB_FUZZING_AXIS_SET_RANGE, minimum, middle, maximum
fail(f"Failed parsing axis range '{value}'.")
def parse_variations(value: str) -> tuple[bool, list[tuple[int, int, float, float, float]]]:
pin_all = False
records: list[tuple[int, int, float, float, float]] = []
for item in value.replace(" ", ",").split(","):
if not item:
continue
if "=" not in item:
fail(f"Unsupported variation item '{item}'. Expected tag=value.")
tag_text, axis_value_text = item.split("=", 1)
if tag_text == "*":
if axis_value_text != "drop":
fail("Only *=drop is supported for wildcard axis pinning.")
pin_all = True
continue
tag = parse_tag_value(tag_text, "Axis tag")
mode, minimum, middle, maximum = parse_axis_range(axis_value_text)
records.append((tag, mode, minimum, middle, maximum))
return pin_all, records
def add_set_option(ops: bytearray, set_type: int, modifier: str, value: str, parser) -> None:
if modifier == "":
emit_set_clear(ops, set_type)
if value == "*":
if modifier == "-":
emit_set_clear(ops, set_type)
else:
emit_set_clear(ops, set_type)
emit_set_invert(ops, set_type)
return
ranges = parser(value)
if ranges:
emit_set_ranges(
ops,
HB_FUZZING_OP_SET_DEL_RANGES if modifier == "-" else HB_FUZZING_OP_SET_ADD_RANGES,
set_type,
ranges,
)
def parse_output_arg(argv: list[str], usage_text: str) -> tuple[str, list[str]]:
out = ""
args = argv[1:]
while args:
arg = args[0]
if arg == "-o":
if len(args) < 2:
print(usage_text, end="", file=sys.stderr)
raise SystemExit(1)
out = args[1]
args = args[2:]
continue
if arg in ("-h", "--help"):
print(usage_text, end="")
raise SystemExit(0)
if arg == "--":
args = args[1:]
break
break
return out, args
def read_text_file(path: str) -> list[int]:
with open(path, "r", encoding="utf-8") as fp:
return parse_text_codepoints(fp.read())
def read_range_file(path: str, parser) -> list[tuple[int, int]]:
ranges = []
with open(path, "r", encoding="utf-8") as fp:
for line in fp:
line = line.split("#", 1)[0].strip()
if line:
ranges.extend(parser(line))
return ranges
def resolve_seed_output_path(font_path: str, font_bytes: bytes, out_path: str, default_suffix: str) -> tuple[str, str]:
digest = hashlib.sha1(font_bytes).hexdigest()
default_name = digest + default_suffix
default_dir = os.path.join(os.path.dirname(__file__), "fonts")
if not out_path:
display_path = os.path.join(default_dir, default_name)
return display_path, os.path.abspath(display_path)
if out_path.endswith(os.sep):
display_path = os.path.join(out_path, default_name)
return display_path, os.path.abspath(display_path)
abs_out_path = os.path.abspath(out_path)
if os.path.isdir(abs_out_path):
if os.path.isabs(out_path):
display_path = os.path.join(out_path, default_name)
else:
display_path = os.path.join(out_path, default_name)
return display_path, os.path.join(abs_out_path, default_name)
return out_path, abs_out_path
def format_leading_comment(argv: list[str]) -> bytes:
return ("$ " + " ".join(shlex.quote(arg) for arg in argv) + "\n").encode("utf-8")
def write_seed_file(font_file: str,
out_path: str,
ops: bytearray,
default_suffix: str = ".susbset-seed",
leading_comment: bytes = b"") -> str:
font_path = os.path.abspath(font_file)
if not os.path.isfile(font_path):
fail(f"Font file not found: '{font_path}'.")
with open(font_path, "rb") as fp:
font_bytes = fp.read()
display_path, seed_path = resolve_seed_output_path(font_path, font_bytes, out_path, default_suffix)
parent = os.path.dirname(seed_path)
if parent:
os.makedirs(parent, exist_ok=True)
with open(seed_path, "wb") as fp:
fp.write(leading_comment)
fp.write(font_bytes)
fp.write(ops)
fp.write(pack_u32(len(ops)))
fp.write(HB_FUZZING_EXTENDED_MAGIC)
return display_path
def print_recorded_seed_path(seed_path: str) -> None:
print(seed_path)
def main_with_user_errors(main_fn, argv: list[str]) -> int:
try:
return main_fn(argv)
except UserError as e:
print(e, file=sys.stderr)
return 1