blob: 8df6d1cb6a52e8b1367ae0465081328f9fea3ff5 [file] [log] [blame] [edit]
#!/usr/bin/env python3
import xml.etree.ElementTree as ET
import re
import math
def tokenize_path_data(d_attribute):
"""
Splits an SVG path 'd' attribute string into a list of tokens:
- Single-letter commands (M, m, L, l, C, c, etc.)
- Float values (including possible scientific notation)
Returns a list of strings/float, e.g. ['M', 10.0, 20.0, 'L', 30.5, 40.7, 'Z'].
"""
# Regex explanation:
# ([MmZzLlHhVvCcSsQqTtAa]) => one of the valid path command letters
# | => OR
# ([+-]?\d+(\.\d+)?([eE][+-]?\d+)?) => a number that may include optional +/-,
# decimals, and scientific notation
token_pattern = re.compile(r'([MmZzLlHhVvCcSsQqTtAa])|([+-]?\d+(\.\d+)?([eE][+-]?\d+)?)')
raw_tokens = token_pattern.findall(d_attribute)
# raw_tokens is a list of tuples from the capturing groups. We only need one
# of those fields from each match. We'll filter out empty strings and parse floats.
tokens = []
for match in raw_tokens:
# match is a tuple: (command_letter, number_string, ..., ...)
command_letter = match[0]
number_string = match[1]
if command_letter:
# It's a path command like 'M', 'L', etc.
tokens.append(command_letter)
else:
# It's a numeric value
tokens.append(float(number_string))
return tokens
def extract_paths_from_svg(svg_data):
"""
Parses an SVG file and extracts the 'd' attribute from
all <path> elements in order. Returns a list of token lists.
"""
root = ET.fromstring(svg_data)
# Inkscape / Illustrator SVGs often have a default namespace.
# You may need to adjust the namespace if the <path> tags are qualified.
# For example:
# namespace = {'svg': 'http://www.w3.org/2000/svg'}
# for path_elem in root.findall('.//svg:path', namespace):
# ...
# If your SVGs do not use a default namespace, the below should work.
all_token_lists = []
for path_elem in root.findall('.//{http://www.w3.org/2000/svg}path'):
d_attribute = path_elem.get('d')
if d_attribute:
tokens = tokenize_path_data(d_attribute)
all_token_lists.append(tokens)
# If your SVG has no default namespace or if you removed it, you might do:
# for path_elem in root.findall('.//path'):
# d_attribute = path_elem.get('d')
# ...
#
# Adjust as appropriate depending on your actual SVG structure/namespaces.
return all_token_lists
def compare_token_lists(tokens_a, tokens_b):
"""
Compares two lists of path tokens (commands and floats).
Returns True if they match (same commands in same positions,
numeric values within given tolerance), otherwise False.
"""
if len(tokens_a) != len(tokens_b):
return None # Different lengths => not a match
max_diff = 0
for a, b in zip(tokens_a, tokens_b):
if isinstance(a, str) and isinstance(b, str):
# Must match exactly the same command letter
if a != b:
return None
elif isinstance(a, float) and isinstance(b, float):
# Compare numeric values
diff = abs(a - b)
max_diff = max(max_diff, diff)
else:
# One is command, the other is float => mismatch
return None
return max_diff
def compare_svg_files(svg_file_1, svg_file_2):
"""
Compares two SVG files to check if they have the same number of <path> elements,
and each corresponding path has the same structure of commands.
Return max difference between respective numeric values in the paths.
"""
svg_data_1 = open(svg_file_1).read()
svg_data_2 = open(svg_file_2).read()
# If contents match exactly, return 0
if svg_data_1 == svg_data_2:
return 0
paths1 = extract_paths_from_svg(svg_data_1)
paths2 = extract_paths_from_svg(svg_data_2)
# Check that we have the same number of <path> elements
if len(paths1) != len(paths2):
return None
# Compare each path token list
max_diff = 0
for tokens1, tokens2 in zip(paths1, paths2):
ret = compare_token_lists(tokens1, tokens2)
if ret is None:
return ret
max_diff = max(max_diff, ret)
return max_diff
if __name__ == "__main__":
import sys
if '--help' in sys.argv[1:]:
print("Usage: hb-svg-compare TOLERANCE < file_with_svg_pairs.txt")
sys.exit(1)
tolerance = 0
if len(sys.argv) > 1:
tolerance = float(sys.argv[1])
# Read all lines of two SVG file paths from stdin and compare
for line in sys.stdin:
svg1, svg2 = line.strip().split()
diff = compare_svg_files(svg1, svg2)
if diff is None:
diff = "DIFF"
elif diff <= tolerance:
continue
else:
diff = f"{diff:.5f}"
print(f"{diff}\t{svg1}\t{svg2}")
# Flush stdout to make sure output is immediate
sys.stdout.flush()