Touch up on previous commit https://github.com/harfbuzz/harfbuzz/pull/1273

commit: 6d40eb8372a2c74a6e1294b44a2b19c99d11e7da [log] [tgz]
author: Behdad Esfahbod <behdad@behdad.org> Tue Oct 23 02:51:42 2018 -0700
committer: Behdad Esfahbod <behdad@behdad.org> Tue Oct 23 02:51:42 2018 -0700
tree: 6b05ebdc3d1a0fe41dbebe64ca9d2c8986c0f0b8
parent: 205737acdc268b1c90cf00bde2d2038519a8bf48 [diff] [blame]
diff --git a/src/gen-vowel-constraints.py b/src/gen-vowel-constraints.py
index bcb5d27..19629ab 100755
--- a/src/gen-vowel-constraints.py
+++ b/src/gen-vowel-constraints.py

@@ -2,12 +2,10 @@
 
 """Generator of the function to prohibit certain vowel sequences.
 
-It creates ``preprocess_text_vowel_constraints``, which inserts dotted
+It creates ``_hb_preprocess_text_vowel_constraints``, which inserts dotted
 circles into sequences prohibited by the USE script development spec.
 This function should be used as the ``preprocess_text`` of an
 ``hb_ot_complex_shaper_t``.
-
-It also creates the helper function ``_output_with_dotted_circle``.
 """
 
 from __future__ import absolute_import, division, print_function, unicode_literals
@@ -27,23 +25,9 @@
 import sys
 
 if len (sys.argv) != 3:
-	print ('usage: ./gen-vowel-constraints.py use Scripts.txt', file=sys.stderr)
+	print ('usage: ./gen-vowel-constraints.py HBIndicVowelConstraints.txt Scripts.txt', file=sys.stderr)
 	sys.exit (1)
 
-try:
-	from html import unescape
-	def html_unescape (parser, entity):
-		return unescape (entity)
-except ImportError:
-	def html_unescape (parser, entity):
-		return parser.unescape (entity)
-
-def expect (condition, message=None):
-	if not condition:
-		if message is None:
-			raise AssertionError
-		raise AssertionError (message)
-
 with io.open (sys.argv[2], encoding='utf-8') as f:
 	scripts_header = [f.readline () for i in range (2)]
 	scripts = {}
@@ -142,74 +126,22 @@
 			s.append ('{}}}\n'.format (indent))
 		return ''.join (s)
 
-class USESpecParser (HTMLParser):
-	"""A parser for the USE script development spec.
-
-	Attributes:
-		header (str): The ``updated_at`` timestamp of the spec.
-		constraints (Mapping[str, ConstraintSet]): A map of script names
-			to the scripts' prohibited sequences.
-	"""
-	def __init__ (self):
-		HTMLParser.__init__ (self)
-		self.header = ''
-		self.constraints = {}
-		# Whether the next <code> contains the vowel constraints.
-		self._primed = False
-		# Whether the parser is in the <code> element with the constraints.
-		self._in_constraints = False
-		# The text of the constraints.
-		self._constraints = ''
-
-	def handle_starttag (self, tag, attrs):
-		if tag == 'meta':
-			for attr, value in attrs:
-				if attr == 'name' and value == 'updated_at':
-					self.header = self.get_starttag_text ()
-					break
-		elif tag == 'a':
-			for attr, value in attrs:
-				if attr == 'id' and value == 'ivdvconstraints':
-					self._primed = True
-					break
-		elif self._primed and tag == 'code':
-			self._primed = False
-			self._in_constraints = True
-
-	def handle_endtag (self, tag):
-		self._in_constraints = False
-
-	def handle_data (self, data):
-		if self._in_constraints:
-			self._constraints += data
-
-	def handle_charref (self, name):
-		self.handle_data (html_unescape (self, '&#%s;' % name))
-
-	def handle_entityref (self, name):
-		self.handle_data (html_unescape (self, '&%s;' % name))
-
-	def parse (self, filename):
-		"""Parse the USE script development spec.
-
-		Args:
-			filename (str): The file name of the spec.
-		"""
-		with io.open (filename, encoding='utf-8') as f:
-			self.feed (f.read ())
-		expect (self.header, 'No header found')
-		for line in self._constraints.splitlines ():
-			constraint = [int (cp, 16) for cp in line.split (';')[0].strip ().split (' ')]
-			expect (2 <= len (constraint), 'Prohibited sequence is too short: {}'.format (constraint))
-			script = scripts[constraint[0]]
-			if script in self.constraints:
-				self.constraints[script].add (constraint)
-			else:
-				self.constraints[script] = ConstraintSet (constraint)
-		expect (self.constraints, 'No constraints found')
-
-use_parser = USESpecParser ()
-use_parser.parse (sys.argv[1])
+constraints = {}
+with io.open (sys.argv[1], encoding='utf-8') as f:
+	constraints_header = [f.readline ().strip () for i in range (2)]
+	for line in f:
+		j = line.find ('#')
+		if j >= 0:
+			line = line[:j]
+		constraint = [int (cp, 16) for cp in line.split (';')[0].split ()]
+		if not constraint: continue
+		assert 2 <= len (constraint), 'Prohibited sequence is too short: {}'.format (constraint)
+		script = scripts[constraint[0]]
+		if script in constraints:
+			constraints[script].add (constraint)
+		else:
+			constraints[script] = ConstraintSet (constraint)
+		assert constraints, 'No constraints found'
 
 print ('/* == Start of generated functions == */')
 print ('/*')
@@ -219,15 +151,15 @@
 print (' *')
 print (' * on files with these headers:')
 print (' *')
-print (' * %s' % use_parser.header.strip ())
+for line in constraints_header:
+	print (' * %s' % line.strip ())
+print (' *')
 for line in scripts_header:
 	print (' * %s' % line.strip ())
 print (' */')
 print ()
-print ('#ifndef HB_OT_SHAPE_COMPLEX_VOWEL_CONSTRAINTS_HH')
-print ('#define HB_OT_SHAPE_COMPLEX_VOWEL_CONSTRAINTS_HH')
+print ('#include "hb-ot-shape-complex-vowel-constraints.hh"')
 print ()
-
 print ('static void')
 print ('_output_with_dotted_circle (hb_buffer_t *buffer)')
 print ('{')
@@ -238,10 +170,10 @@
 print ('}')
 print ()
 
-print ('static void')
-print ('preprocess_text_vowel_constraints (const hb_ot_shape_plan_t *plan,')
-print ('\t\t\t\t   hb_buffer_t              *buffer,')
-print ('\t\t\t\t   hb_font_t                *font)')
+print ('void')
+print ('_hb_preprocess_text_vowel_constraints (const hb_ot_shape_plan_t *plan,')
+print ('\t\t\t\t       hb_buffer_t              *buffer,')
+print ('\t\t\t\t       hb_font_t                *font)')
 print ('{')
 print ('  /* UGLY UGLY UGLY business of adding dotted-circle in the middle of')
 print ('   * vowel-sequences that look like another vowel.  Data for each script')
@@ -255,7 +187,7 @@
 print ('  switch ((unsigned) buffer->props.script)')
 print ('  {')
 
-for script, constraints in sorted (use_parser.constraints.items (), key=lambda s_c: script_order[s_c[0]]):
+for script, constraints in sorted (constraints.items (), key=lambda s_c: script_order[s_c[0]]):
 	print ('    case HB_SCRIPT_{}:'.format (script.upper ()))
 	print ('      for (buffer->idx = 0; buffer->idx + 1 < count && buffer->successful;)')
 	print ('      {')
@@ -281,6 +213,4 @@
 print ('}')
 
 print ()
-print ('#endif /* HB_OT_SHAPE_COMPLEX_VOWEL_CONSTRAINTS_HH */')
-print ()
 print ('/* == End of generated functions == */')
commit	6d40eb8372a2c74a6e1294b44a2b19c99d11e7da	[log] [tgz]
author	Behdad Esfahbod <behdad@behdad.org>	Tue Oct 23 02:51:42 2018 -0700
committer	Behdad Esfahbod <behdad@behdad.org>	Tue Oct 23 02:51:42 2018 -0700
tree	6b05ebdc3d1a0fe41dbebe64ca9d2c8986c0f0b8
parent	205737acdc268b1c90cf00bde2d2038519a8bf48 [diff] [blame]