third_party/amalgamate/amalgamate.py - third_party/inja - Git at Google

 #!/usr/bin/env python

 # amalgamate.py - Amalgamate C source and header files.
 # Copyright (c) 2012, Erik Edlund <erik.edlund@32767.se>
 #
 # Redistribution and use in source and binary forms, with or without modification,
 # are permitted provided that the following conditions are met:
 #
 #  * Redistributions of source code must retain the above copyright notice,
 #  this list of conditions and the following disclaimer.
 #
 #  * Redistributions in binary form must reproduce the above copyright notice,
 #  this list of conditions and the following disclaimer in the documentation
 #  and/or other materials provided with the distribution.
 #
 #  * Neither the name of Erik Edlund, nor the names of its contributors may
 #  be used to endorse or promote products derived from this software without
 #  specific prior written permission.
 #
 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
 # ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
 # WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
 # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
 # ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
 # (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
 # LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
 # ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
 # SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

 from __future__ import division
 from __future__ import print_function
 from __future__ import unicode_literals

 import argparse
 import datetime
 import json
 import os
 import re
 import sys

 class Amalgamation(object):

 	# Prepends self.source_path to file_path if needed.
 	def actual_path(self, file_path):
 		if not os.path.isabs(file_path):
 			file_path = os.path.join(self.source_path, file_path)
 		return file_path

 	# Search included file_path in self.include_paths and
 	# in source_dir if specified.
 	def find_included_file(self, file_path, source_dir):
 		search_dirs = self.include_paths[:]
 		if source_dir:
 			search_dirs.insert(0, source_dir)

 		for search_dir in search_dirs:
 			search_path = os.path.join(search_dir, file_path)
 			if os.path.isfile(self.actual_path(search_path)):
 				return search_path
 		return None

 	def __init__(self, args):
 		with open(args.config, 'r') as f:
 			config = json.loads(f.read())
 			for key in config:
 				setattr(self, key, config[key])

 			self.verbose = args.verbose == "yes"
 			self.prologue = args.prologue
 			self.source_path = args.source_path
 			self.included_files = []

 	# Generate the amalgamation and write it to the target file.
 	def generate(self):
 		amalgamation = ""

 		if self.prologue:
 			with open(self.prologue, 'r') as f:
 				amalgamation += datetime.datetime.now().strftime(f.read())

 		if self.verbose:
 			print("Config:")
 			print(" target        = {0}".format(self.target))
 			print(" working_dir   = {0}".format(os.getcwd()))
 			print(" include_paths = {0}".format(self.include_paths))
 		print("Creating amalgamation:")
 		for file_path in self.sources:
 			# Do not check the include paths while processing the source
 			# list, all given source paths must be correct.
 			actual_path = self.actual_path(file_path)
 			print(" - processing \"{0}\"".format(file_path))
 			t = TranslationUnit(file_path, self, True)
 			amalgamation += t.content

 		with open(self.target, 'w') as f:
 			f.write(amalgamation)

 		print("...done!\n")
 		if self.verbose:
 			print("Files processed: {0}".format(self.sources))
 			print("Files included: {0}".format(self.included_files))
 		print("")

 class TranslationUnit(object):

 	# // C++ comment.
 	cpp_comment_pattern = re.compile(r"//.*?\n")

 	# /* C comment. */
 	c_comment_pattern = re.compile(r"/\*.*?\*/", re.S)

 	# "complex \"stri\\\ng\" value".
 	string_pattern = re.compile("[^']" r'".*?(?<=[^\\])"', re.S)

 	# Handle simple include directives. Support for advanced
 	# directives where macros and defines needs to expanded is
 	# not a concern right now.
 	include_pattern = re.compile(
 		r'#\s*include\s+(<|")(?P<path>.*?)("|>)', re.S)

 	# #pragma once
 	pragma_once_pattern = re.compile(r'#\s*pragma\s+once', re.S)

 	# Search for pattern in self.content, add the match to
 	# contexts if found and update the index accordingly.
 	def _search_content(self, index, pattern, contexts):
 		match = pattern.search(self.content, index)
 		if match:
 			contexts.append(match)
 			return match.end()
 		return index + 2

 	# Return all the skippable contexts, i.e., comments and strings
 	def _find_skippable_contexts(self):
 		# Find contexts in the content in which a found include
 		# directive should not be processed.
 		skippable_contexts = []

 		# Walk through the content char by char, and try to grab
 		# skippable contexts using regular expressions when found.
 		i = 1
 		content_len = len(self.content)
 		while i < content_len:
 			j = i - 1
 			current = self.content[i]
 			previous = self.content[j]

 			if current == '"':
 				# String value.
 				i = self._search_content(j, self.string_pattern,
 					skippable_contexts)
 			elif current == '*' and previous == '/':
 				# C style comment.
 				i = self._search_content(j, self.c_comment_pattern,
 					skippable_contexts)
 			elif current == '/' and previous == '/':
 				# C++ style comment.
 				i = self._search_content(j, self.cpp_comment_pattern,
 					skippable_contexts)
 			else:
 				# Skip to the next char.
 				i += 1

 		return skippable_contexts

 	# Returns True if the match is within list of other matches
 	def _is_within(self, match, matches):
 		for m in matches:
 			if match.start() > m.start() and \
 					match.end() < m.end():
 				return True
 		return False

 	# Removes pragma once from content
 	def _process_pragma_once(self):
 		content_len = len(self.content)
 		if content_len < len("#include <x>"):
 			return 0

 		# Find contexts in the content in which a found include
 		# directive should not be processed.
 		skippable_contexts = self._find_skippable_contexts()

 		pragmas = []
 		pragma_once_match = self.pragma_once_pattern.search(self.content)
 		while pragma_once_match:
 			if not self._is_within(pragma_once_match, skippable_contexts):
 				pragmas.append(pragma_once_match)

 			pragma_once_match = self.pragma_once_pattern.search(self.content,
 				pragma_once_match.end())

 		# Handle all collected pragma once directives.
 		prev_end = 0
 		tmp_content = ''
 		for pragma_match in pragmas:
 			tmp_content += self.content[prev_end:pragma_match.start()]
 			prev_end = pragma_match.end()
 		tmp_content += self.content[prev_end:]
 		self.content = tmp_content

 	# Include all trivial #include directives into self.content.
 	def _process_includes(self):
 		content_len = len(self.content)
 		if content_len < len("#include <x>"):
 			return 0

 		# Find contexts in the content in which a found include
 		# directive should not be processed.
 		skippable_contexts = self._find_skippable_contexts()

 		# Search for include directives in the content, collect those
 		# which should be included into the content.
 		includes = []
 		include_match = self.include_pattern.search(self.content)
 		while include_match:
 			if not self._is_within(include_match, skippable_contexts):
 				include_path = include_match.group("path")
 				search_same_dir = include_match.group(1) == '"'
 				found_included_path = self.amalgamation.find_included_file(
 					include_path, self.file_dir if search_same_dir else None)
 				if found_included_path:
 					includes.append((include_match, found_included_path))

 			include_match = self.include_pattern.search(self.content,
 				include_match.end())

 		# Handle all collected include directives.
 		prev_end = 0
 		tmp_content = ''
 		for include in includes:
 			include_match, found_included_path = include
 			tmp_content += self.content[prev_end:include_match.start()]
 			tmp_content += "// {0}\n".format(include_match.group(0))
 			if not found_included_path in self.amalgamation.included_files:
 				t = TranslationUnit(found_included_path, self.amalgamation, False)
 				tmp_content += t.content
 			prev_end = include_match.end()
 		tmp_content += self.content[prev_end:]
 		self.content = tmp_content

 		return len(includes)

 	# Make all content processing
 	def _process(self):
 		if not self.is_root:
 			self._process_pragma_once()
 		self._process_includes()

 	def __init__(self, file_path, amalgamation, is_root):
 		self.file_path = file_path
 		self.file_dir = os.path.dirname(file_path)
 		self.amalgamation = amalgamation
 		self.is_root = is_root

 		self.amalgamation.included_files.append(self.file_path)

 		actual_path = self.amalgamation.actual_path(file_path)
 		if not os.path.isfile(actual_path):
 			raise IOError("File not found: \"{0}\"".format(file_path))
 		with open(actual_path, 'r') as f:
 			self.content = f.read()
 			self._process()

 def main():
 	description = "Amalgamate C source and header files."
 	usage = " ".join([
 		"amalgamate.py",
 		"[-v]",
 		"-c path/to/config.json",
 		"-s path/to/source/dir",
 		"[-p path/to/prologue.(c|h)]"
 	])
 	argsparser = argparse.ArgumentParser(
 		description=description, usage=usage)

 	argsparser.add_argument("-v", "--verbose", dest="verbose",
 		choices=["yes", "no"], metavar="", help="be verbose")

 	argsparser.add_argument("-c", "--config", dest="config",
 		required=True, metavar="", help="path to a JSON config file")

 	argsparser.add_argument("-s", "--source", dest="source_path",
 		required=True, metavar="", help="source code path")

 	argsparser.add_argument("-p", "--prologue", dest="prologue",
 		required=False, metavar="", help="path to a C prologue file")

 	amalgamation = Amalgamation(argsparser.parse_args())
 	amalgamation.generate()

 if __name__ == "__main__":
 	main()
	#!/usr/bin/env python

	# amalgamate.py - Amalgamate C source and header files.
	# Copyright (c) 2012, Erik Edlund <erik.edlund@32767.se>
	#
	# Redistribution and use in source and binary forms, with or without modification,
	# are permitted provided that the following conditions are met:
	#
	# * Redistributions of source code must retain the above copyright notice,
	# this list of conditions and the following disclaimer.
	#
	# * Redistributions in binary form must reproduce the above copyright notice,
	# this list of conditions and the following disclaimer in the documentation
	# and/or other materials provided with the distribution.
	#
	# * Neither the name of Erik Edlund, nor the names of its contributors may
	# be used to endorse or promote products derived from this software without
	# specific prior written permission.
	#
	# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
	# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
	# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
	# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
	# ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
	# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
	# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
	# ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
	# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
	# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

	from __future__ import division
	from __future__ import print_function
	from __future__ import unicode_literals

	import argparse
	import datetime
	import json
	import os
	import re
	import sys

	class Amalgamation(object):

	# Prepends self.source_path to file_path if needed.
	def actual_path(self, file_path):
	if not os.path.isabs(file_path):
	file_path = os.path.join(self.source_path, file_path)
	return file_path

	# Search included file_path in self.include_paths and
	# in source_dir if specified.
	def find_included_file(self, file_path, source_dir):
	search_dirs = self.include_paths[:]
	if source_dir:
	search_dirs.insert(0, source_dir)

	for search_dir in search_dirs:
	search_path = os.path.join(search_dir, file_path)
	if os.path.isfile(self.actual_path(search_path)):
	return search_path
	return None

	def __init__(self, args):
	with open(args.config, 'r') as f:
	config = json.loads(f.read())
	for key in config:
	setattr(self, key, config[key])

	self.verbose = args.verbose == "yes"
	self.prologue = args.prologue
	self.source_path = args.source_path
	self.included_files = []

	# Generate the amalgamation and write it to the target file.
	def generate(self):
	amalgamation = ""

	if self.prologue:
	with open(self.prologue, 'r') as f:
	amalgamation += datetime.datetime.now().strftime(f.read())

	if self.verbose:
	print("Config:")
	print(" target = {0}".format(self.target))
	print(" working_dir = {0}".format(os.getcwd()))
	print(" include_paths = {0}".format(self.include_paths))
	print("Creating amalgamation:")
	for file_path in self.sources:
	# Do not check the include paths while processing the source
	# list, all given source paths must be correct.
	actual_path = self.actual_path(file_path)
	print(" - processing \"{0}\"".format(file_path))
	t = TranslationUnit(file_path, self, True)
	amalgamation += t.content

	with open(self.target, 'w') as f:
	f.write(amalgamation)

	print("...done!\n")
	if self.verbose:
	print("Files processed: {0}".format(self.sources))
	print("Files included: {0}".format(self.included_files))
	print("")

	class TranslationUnit(object):

	# // C++ comment.
	cpp_comment_pattern = re.compile(r"//.*?\n")

	# /* C comment. */
	c_comment_pattern = re.compile(r"/\.?\*/", re.S)

	# "complex \"stri\\\ng\" value".
	string_pattern = re.compile("[^']" r'".*?(?<=[^\\])"', re.S)

	# Handle simple include directives. Support for advanced
	# directives where macros and defines needs to expanded is
	# not a concern right now.
	include_pattern = re.compile(
	r'#\sinclude\s+(<\|")(?P<path>.?)("\|>)', re.S)

	# #pragma once
	pragma_once_pattern = re.compile(r'#\s*pragma\s+once', re.S)

	# Search for pattern in self.content, add the match to
	# contexts if found and update the index accordingly.
	def _search_content(self, index, pattern, contexts):
	match = pattern.search(self.content, index)
	if match:
	contexts.append(match)
	return match.end()
	return index + 2

	# Return all the skippable contexts, i.e., comments and strings
	def _find_skippable_contexts(self):
	# Find contexts in the content in which a found include
	# directive should not be processed.
	skippable_contexts = []

	# Walk through the content char by char, and try to grab
	# skippable contexts using regular expressions when found.
	i = 1
	content_len = len(self.content)
	while i < content_len:
	j = i - 1
	current = self.content[i]
	previous = self.content[j]

	if current == '"':
	# String value.
	i = self._search_content(j, self.string_pattern,
	skippable_contexts)
	elif current == '*' and previous == '/':
	# C style comment.
	i = self._search_content(j, self.c_comment_pattern,
	skippable_contexts)
	elif current == '/' and previous == '/':
	# C++ style comment.
	i = self._search_content(j, self.cpp_comment_pattern,
	skippable_contexts)
	else:
	# Skip to the next char.
	i += 1

	return skippable_contexts

	# Returns True if the match is within list of other matches
	def _is_within(self, match, matches):
	for m in matches:
	if match.start() > m.start() and \
	match.end() < m.end():
	return True
	return False

	# Removes pragma once from content
	def _process_pragma_once(self):
	content_len = len(self.content)
	if content_len < len("#include <x>"):
	return 0

	# Find contexts in the content in which a found include
	# directive should not be processed.
	skippable_contexts = self._find_skippable_contexts()

	pragmas = []
	pragma_once_match = self.pragma_once_pattern.search(self.content)
	while pragma_once_match:
	if not self._is_within(pragma_once_match, skippable_contexts):
	pragmas.append(pragma_once_match)

	pragma_once_match = self.pragma_once_pattern.search(self.content,
	pragma_once_match.end())

	# Handle all collected pragma once directives.
	prev_end = 0
	tmp_content = ''
	for pragma_match in pragmas:
	tmp_content += self.content[prev_end:pragma_match.start()]
	prev_end = pragma_match.end()
	tmp_content += self.content[prev_end:]
	self.content = tmp_content

	# Include all trivial #include directives into self.content.
	def _process_includes(self):
	content_len = len(self.content)
	if content_len < len("#include <x>"):
	return 0

	# Find contexts in the content in which a found include
	# directive should not be processed.
	skippable_contexts = self._find_skippable_contexts()

	# Search for include directives in the content, collect those
	# which should be included into the content.
	includes = []
	include_match = self.include_pattern.search(self.content)
	while include_match:
	if not self._is_within(include_match, skippable_contexts):
	include_path = include_match.group("path")
	search_same_dir = include_match.group(1) == '"'
	found_included_path = self.amalgamation.find_included_file(
	include_path, self.file_dir if search_same_dir else None)
	if found_included_path:
	includes.append((include_match, found_included_path))

	include_match = self.include_pattern.search(self.content,
	include_match.end())

	# Handle all collected include directives.
	prev_end = 0
	tmp_content = ''
	for include in includes:
	include_match, found_included_path = include
	tmp_content += self.content[prev_end:include_match.start()]
	tmp_content += "// {0}\n".format(include_match.group(0))
	if not found_included_path in self.amalgamation.included_files:
	t = TranslationUnit(found_included_path, self.amalgamation, False)
	tmp_content += t.content
	prev_end = include_match.end()
	tmp_content += self.content[prev_end:]
	self.content = tmp_content

	return len(includes)

	# Make all content processing
	def _process(self):
	if not self.is_root:
	self._process_pragma_once()
	self._process_includes()

	def __init__(self, file_path, amalgamation, is_root):
	self.file_path = file_path
	self.file_dir = os.path.dirname(file_path)
	self.amalgamation = amalgamation
	self.is_root = is_root

	self.amalgamation.included_files.append(self.file_path)

	actual_path = self.amalgamation.actual_path(file_path)
	if not os.path.isfile(actual_path):
	raise IOError("File not found: \"{0}\"".format(file_path))
	with open(actual_path, 'r') as f:
	self.content = f.read()
	self._process()

	def main():
	description = "Amalgamate C source and header files."
	usage = " ".join([
	"amalgamate.py",
	"[-v]",
	"-c path/to/config.json",
	"-s path/to/source/dir",
	"[-p path/to/prologue.(c\|h)]"
	])
	argsparser = argparse.ArgumentParser(
	description=description, usage=usage)

	argsparser.add_argument("-v", "--verbose", dest="verbose",
	choices=["yes", "no"], metavar="", help="be verbose")

	argsparser.add_argument("-c", "--config", dest="config",
	required=True, metavar="", help="path to a JSON config file")

	argsparser.add_argument("-s", "--source", dest="source_path",
	required=True, metavar="", help="source code path")

	argsparser.add_argument("-p", "--prologue", dest="prologue",
	required=False, metavar="", help="path to a C prologue file")

	amalgamation = Amalgamation(argsparser.parse_args())
	amalgamation.generate()

	if __name__ == "__main__":
	main()