Merge pull request #745 from googlefonts/master

Interface for hb_subset, skeleton for the hb-subset cli, and basic testing rigging.
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 9e067ed..bfe0e30 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -171,6 +171,12 @@
 extract_make_variable(HB_OT_headers ${SRCSOURCES})
 add_prefix_to_list(HB_OT_headers "${PROJECT_SOURCE_DIR}/src/")
 
+extract_make_variable(HB_SUBSET_sources ${SRCSOURCES})
+add_prefix_to_list(HB_SUBSET_sources "${PROJECT_SOURCE_DIR}/src/")
+
+extract_make_variable(HB_SUBSET_headers ${SRCSOURCES})
+add_prefix_to_list(HB_SUBSET_headers "${PROJECT_SOURCE_DIR}/src/")
+
 extract_make_variable(HB_BASE_RAGEL_GENERATED_sources ${SRCSOURCES})
 extract_make_variable(HB_OT_RAGEL_GENERATED_sources ${SRCSOURCES})
 if (IN_HB_DIST)
@@ -185,6 +191,8 @@
 add_prefix_to_list(HB_VIEW_sources "${PROJECT_SOURCE_DIR}/util/")
 extract_make_variable(HB_SHAPE_sources ${UTILSOURCES})
 add_prefix_to_list(HB_SHAPE_sources "${PROJECT_SOURCE_DIR}/util/")
+extract_make_variable(HB_SUBSET_CLI_sources ${UTILSOURCES})
+add_prefix_to_list(HB_SUBSET_CLI_sources "${PROJECT_SOURCE_DIR}/util/")
 extract_make_variable(HB_OT_SHAPE_CLOSURE_sources ${UTILSOURCES})
 add_prefix_to_list(HB_OT_SHAPE_CLOSURE_sources "${PROJECT_SOURCE_DIR}/util/")
 
@@ -246,6 +254,8 @@
   ${HB_FALLBACK_sources}
   ${HB_OT_sources}
   ${HB_OT_RAGEL_GENERATED_sources}
+
+  ${HB_SUBSET_sources}
 )
 
 set (project_extra_sources)
@@ -255,6 +265,7 @@
 
   ${HB_BASE_headers}
   ${HB_OT_headers}
+  ${HB_SUBSET_headers}
 )
 
 
@@ -708,6 +719,9 @@
   add_executable(hb-shape ${HB_SHAPE_sources})
   target_link_libraries(hb-shape harfbuzz)
 
+  add_executable(hb-subset ${HB_SUBSET_CLI_sources})
+  target_link_libraries(hb-subset harfbuzz)
+
   add_executable(hb-ot-shape-closure ${HB_OT_SHAPE_CLOSURE_sources})
   target_link_libraries(hb-ot-shape-closure harfbuzz)
 
diff --git a/configure.ac b/configure.ac
index dec994e..12401f0 100644
--- a/configure.ac
+++ b/configure.ac
@@ -500,6 +500,8 @@
 test/shaping/data/Makefile
 test/shaping/data/in-house/Makefile
 test/shaping/data/text-rendering-tests/Makefile
+test/subset/Makefile
+test/subset/data/Makefile
 docs/Makefile
 docs/version.xml
 ])
diff --git a/src/Makefile.am b/src/Makefile.am
index 833d1f9..dd1c7ae 100644
--- a/src/Makefile.am
+++ b/src/Makefile.am
@@ -27,7 +27,9 @@
 HBDEPS =
 HBSOURCES =  $(HB_BASE_sources)
 HBSOURCES += $(HB_BASE_RAGEL_GENERATED_sources)
+HBSOURCES += $(HB_SUBSET_sources)
 HBHEADERS = $(HB_BASE_headers)
+HBHEADERS += $(HB_SUBSET_headers)
 HBNODISTHEADERS = $(HB_NODIST_headers)
 
 if HAVE_OT
diff --git a/src/Makefile.sources b/src/Makefile.sources
index 213aa22..0b9beb9 100644
--- a/src/Makefile.sources
+++ b/src/Makefile.sources
@@ -181,6 +181,15 @@
 HB_ICU_sources = hb-icu.cc
 HB_ICU_headers = hb-icu.h
 
+# Sources for libharfbuzz-subset
+HB_SUBSET_sources = \
+	hb-subset.cc \
+	$(NULL)
+
+HB_SUBSET_headers = \
+	hb-subset.h \
+	$(NULL)
+
 HB_GOBJECT_sources = hb-gobject-structs.cc
 HB_GOBJECT_STRUCTS_headers = hb-gobject-structs.h
 HB_GOBJECT_headers = hb-gobject.h $(HB_GOBJECT_STRUCTS_headers)
diff --git a/src/hb-subset.cc b/src/hb-subset.cc
new file mode 100644
index 0000000..62f7f0a
--- /dev/null
+++ b/src/hb-subset.cc
@@ -0,0 +1,155 @@
+/*
+ * Copyright © 2009  Red Hat, Inc.
+ * Copyright © 2012  Google, Inc.
+ *
+ *  This is part of HarfBuzz, a text shaping library.
+ *
+ * Permission is hereby granted, without written agreement and without
+ * license or royalty fees, to use, copy, modify, and distribute this
+ * software and its documentation for any purpose, provided that the
+ * above copyright notice and the following two paragraphs appear in
+ * all copies of this software.
+ *
+ * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
+ * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
+ * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
+ * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
+ * DAMAGE.
+ *
+ * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
+ * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
+ * FITNESS FOR A PARTICULAR PURPOSE.  THE SOFTWARE PROVIDED HEREUNDER IS
+ * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
+ * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
+ *
+ * Google Author(s): Garret Rieger, Rod Sheeter
+ */
+
+#include "hb-private.hh"
+
+#include "hb-object-private.hh"
+
+
+struct hb_subset_profile_t {
+  hb_object_header_t header;
+  ASSERT_POD ();
+};
+
+struct hb_subset_input_t {
+  hb_object_header_t header;
+  ASSERT_POD ();
+};
+
+struct hb_subset_face_t {
+  hb_object_header_t header;
+  ASSERT_POD ();
+
+  hb_face_t *face;
+};
+
+
+/**
+ * hb_subset_profile_create:
+ *
+ * Return value: New profile with default settings.
+ *
+ * Since: 1.7.5
+ **/
+hb_subset_profile_t *
+hb_subset_profile_create ()
+{
+  return hb_object_create<hb_subset_profile_t>();
+}
+
+/**
+ * hb_subset_profile_destroy:
+ *
+ * Since: 1.7.5
+ **/
+void
+hb_subset_profile_destroy (hb_subset_profile_t *profile)
+{
+  if (!hb_object_destroy (profile)) return;
+
+  free (profile);
+}
+
+/**
+ * hb_subset_input_create:
+ *
+ * Return value: New subset input.
+ *
+ * Since: 1.7.5
+ **/
+hb_subset_input_t *
+hb_subset_input_create()
+{
+  return hb_object_create<hb_subset_input_t>();
+}
+
+/**
+ * hb_subset_input_destroy:
+ *
+ * Since: 1.7.5
+ **/
+void
+hb_subset_input_destroy(hb_subset_input_t *subset_input)
+{
+  if (!hb_object_destroy (subset_input)) return;
+
+  free (subset_input);
+}
+
+/**
+ * hb_subset_face_create:
+ *
+ * Return value: New subset face.
+ *
+ * Since: 1.7.5
+ **/
+hb_subset_face_t *
+hb_subset_face_create(hb_face_t *face)
+{
+  if (unlikely (!face))
+    face = hb_face_get_empty();
+
+  hb_subset_face_t *subset_face = hb_object_create<hb_subset_face_t> ();
+  subset_face->face = hb_face_reference (face);
+
+  return subset_face;
+}
+
+/**
+ * hb_subset_face_destroy:
+ *
+ * Since: 1.7.5
+ **/
+void
+hb_subset_face_destroy(hb_subset_face_t *subset_face)
+{
+  if (!hb_object_destroy (subset_face)) return;
+
+  hb_face_destroy(subset_face->face);
+  free (subset_face);
+}
+
+/**
+ * hb_subset:
+ * @profile: profile to use for the subsetting.
+ * @input: input to use for the subsetting.
+ * @face: font face data to be subset.
+ * @result: subsetting result.
+ *
+ * Subsets a font according to provided profile and input.
+ **/
+hb_bool_t
+hb_subset(hb_subset_profile_t *profile,
+          hb_subset_input_t *input,
+          hb_subset_face_t *face,
+          hb_blob_t **result /* OUT */)
+{
+  if (!profile || !input || !face) return false;
+
+  *result = hb_face_reference_blob(face->face);
+  return true;
+}
diff --git a/src/hb-subset.h b/src/hb-subset.h
new file mode 100644
index 0000000..84c0c3c
--- /dev/null
+++ b/src/hb-subset.h
@@ -0,0 +1,89 @@
+/*
+ * Copyright © 2018  Google
+ *
+ *  This is part of HarfBuzz, a text shaping library.
+ *
+ * Permission is hereby granted, without written agreement and without
+ * license or royalty fees, to use, copy, modify, and distribute this
+ * software and its documentation for any purpose, provided that the
+ * above copyright notice and the following two paragraphs appear in
+ * all copies of this software.
+ *
+ * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
+ * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
+ * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
+ * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
+ * DAMAGE.
+ *
+ * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
+ * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
+ * FITNESS FOR A PARTICULAR PURPOSE.  THE SOFTWARE PROVIDED HEREUNDER IS
+ * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
+ * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
+ *
+ * Google Author(s): Rod Sheeter
+ */
+
+#ifndef HB_H_IN
+#error "Include <hb.h> instead."
+#endif
+
+#ifndef HB_SUBSET_H
+#define HB_SUBSET_H
+
+#include "hb-common.h"
+#include "hb-face.h"
+
+HB_BEGIN_DECLS
+
+/*
+ * hb_subset_profile_t
+ * Things that change based on target environment, e.g. OS.
+ * Threadsafe for multiple concurrent subset operations.
+ */
+
+typedef struct hb_subset_profile_t hb_subset_profile_t;
+
+HB_EXTERN hb_subset_profile_t *
+hb_subset_profile_create ();
+
+HB_EXTERN void
+hb_subset_profile_destroy (hb_subset_profile_t *profile);
+
+/*
+ * hb_subset_input_t
+ * Things that change based on the input. Characters to keep, etc.
+ */
+
+typedef struct hb_subset_input_t hb_subset_input_t;
+
+HB_EXTERN hb_subset_input_t *
+hb_subset_input_create ();
+
+HB_EXTERN void
+hb_subset_input_destroy (hb_subset_input_t *subset_input);
+
+/*
+ * hb_subset_face_t
+ * Reusable subset-ready plan for a given face. Threadsafe for multiple
+ * concurrent subset operations.
+ */
+
+typedef struct hb_subset_face_t hb_subset_face_t;
+
+HB_EXTERN hb_subset_face_t *
+hb_subset_face_create (hb_face_t *face);
+
+HB_EXTERN void
+hb_subset_face_destroy (hb_subset_face_t *face);
+
+
+HB_EXTERN hb_bool_t
+hb_subset (hb_subset_profile_t *profile,
+           hb_subset_input_t *input,
+           hb_subset_face_t *face,
+           hb_blob_t **result /* OUT */);
+
+HB_END_DECLS
+
+#endif /* HB_SUBSET_H */
diff --git a/src/hb.h b/src/hb.h
index 7402034..e55decf 100644
--- a/src/hb.h
+++ b/src/hb.h
@@ -41,6 +41,7 @@
 #include "hb-set.h"
 #include "hb-shape.h"
 #include "hb-shape-plan.h"
+#include "hb-subset.h"
 #include "hb-unicode.h"
 #include "hb-version.h"
 
diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt
index 2c97f4f..d2b1994 100644
--- a/test/CMakeLists.txt
+++ b/test/CMakeLists.txt
@@ -1,3 +1,4 @@
 add_subdirectory(api)
 add_subdirectory(shaping)
+add_subdirectory(subset)
 add_subdirectory(fuzzing)
diff --git a/test/Makefile.am b/test/Makefile.am
index ad496f5..66b3e6e 100644
--- a/test/Makefile.am
+++ b/test/Makefile.am
@@ -2,7 +2,7 @@
 
 NULL =
 EXTRA_DIST =
-SUBDIRS = api shaping fuzzing
+SUBDIRS = api shaping fuzzing subset
 
 EXTRA_DIST += \
 	CMakeLists.txt \
diff --git a/test/api/Makefile.am b/test/api/Makefile.am
index e22d726..99849fc 100644
--- a/test/api/Makefile.am
+++ b/test/api/Makefile.am
@@ -29,6 +29,7 @@
 	test-object \
 	test-set \
 	test-shape \
+	test-subset \
 	test-unicode \
 	test-version \
 	$(NULL)
diff --git a/test/api/test-subset.c b/test/api/test-subset.c
new file mode 100644
index 0000000..b6986ce
--- /dev/null
+++ b/test/api/test-subset.c
@@ -0,0 +1,67 @@
+/*
+ * Copyright © 2011  Google, Inc.
+ *
+ *  This is part of HarfBuzz, a text shaping library.
+ *
+ * Permission is hereby granted, without written agreement and without
+ * license or royalty fees, to use, copy, modify, and distribute this
+ * software and its documentation for any purpose, provided that the
+ * above copyright notice and the following two paragraphs appear in
+ * all copies of this software.
+ *
+ * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
+ * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
+ * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
+ * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
+ * DAMAGE.
+ *
+ * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
+ * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
+ * FITNESS FOR A PARTICULAR PURPOSE.  THE SOFTWARE PROVIDED HEREUNDER IS
+ * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
+ * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
+ *
+ * Google Author(s): Garret Rieger
+ */
+
+#include "hb-test.h"
+
+/* Unit tests for hb-subset.h */
+
+static const char test_data[] = { 0, 0, 1, 0 };
+
+static void
+test_subset (void)
+{
+  hb_blob_t *font_blob = hb_blob_create(test_data, sizeof(test_data),
+					HB_MEMORY_MODE_READONLY, NULL, NULL);
+  hb_face_t *face = hb_face_create(font_blob, 0);
+
+  hb_subset_profile_t *profile = hb_subset_profile_create();
+  hb_subset_input_t *input = hb_subset_input_create();
+  hb_subset_face_t *subset_face = hb_subset_face_create(face);
+
+  hb_blob_t *output;
+  g_assert(hb_subset(profile, input, subset_face, &output));
+
+  unsigned int output_length;
+  const char *output_data = hb_blob_get_data(output, &output_length);
+  g_assert_cmpmem(test_data, 4, output_data, output_length);
+
+  hb_blob_destroy(output);
+  hb_subset_face_destroy(subset_face);
+  hb_subset_input_destroy(input);
+  hb_subset_profile_destroy(profile);
+  hb_face_destroy(face);
+  hb_blob_destroy(font_blob);
+}
+
+int
+main (int argc, char **argv)
+{
+  hb_test_init (&argc, &argv);
+
+  hb_test_add (test_subset);
+
+  return hb_test_run();
+}
diff --git a/test/subset/CMakeLists.txt b/test/subset/CMakeLists.txt
new file mode 100644
index 0000000..0a1e8f9
--- /dev/null
+++ b/test/subset/CMakeLists.txt
@@ -0,0 +1,9 @@
+if (HB_BUILD_UTILS)
+  file (READ "${CMAKE_CURRENT_SOURCE_DIR}/data/Makefile.sources" SOURCES)
+  extract_make_variable (TESTS ${SOURCES})
+  foreach (test IN ITEMS ${TESTS})
+    add_test (NAME ${test}
+      COMMAND python run-tests.py $<TARGET_FILE:hb-subset> "data/${test}"
+      WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR})
+  endforeach ()
+endif ()
diff --git a/test/subset/Makefile.am b/test/subset/Makefile.am
new file mode 100644
index 0000000..336d33d
--- /dev/null
+++ b/test/subset/Makefile.am
@@ -0,0 +1,22 @@
+# Process this file with automake to produce Makefile.in
+
+NULL =
+EXTRA_DIST =
+CLEANFILES =
+SUBDIRS = data
+
+# Convenience targets:
+lib:
+	@$(MAKE) $(AM_MAKEFLAGS) -C $(top_builddir)/src lib
+
+EXTRA_DIST += \
+	CMakeLists.txt \
+	run-tests.py \
+	subset_test_suite.py \
+	$(NULL)
+
+CLEANFILES += \
+	subset_test_suite.py[co] \
+	$(NULL)
+
+-include $(top_srcdir)/git.mk
diff --git a/test/subset/data/Makefile.am b/test/subset/data/Makefile.am
new file mode 100644
index 0000000..f1234db
--- /dev/null
+++ b/test/subset/data/Makefile.am
@@ -0,0 +1,23 @@
+# Process this file with automake to produce Makefile.in
+
+NULL =
+EXTRA_DIST =
+CLEANFILES =
+SUBDIRS =
+
+EXTRA_DIST = \
+	$(TESTS) \
+	expected/basics \
+	fonts \
+	profiles \
+	$(NULL)
+
+# Convenience targets:
+lib:
+	@$(MAKE) $(AM_MAKEFLAGS) -C $(top_builddir)/src lib
+
+TEST_EXTENSIONS = .tests
+TESTS_LOG_COMPILER = $(srcdir)/../run-tests.py $(top_builddir)/util/hb-subset$(EXEEXT)
+include Makefile.sources
+
+-include $(top_srcdir)/git.mk
diff --git a/test/subset/data/Makefile.sources b/test/subset/data/Makefile.sources
new file mode 100644
index 0000000..37550b6
--- /dev/null
+++ b/test/subset/data/Makefile.sources
@@ -0,0 +1,9 @@
+TESTS = \
+	tests/basics.tests \
+	$(NULL)
+
+XFAIL_TESTS = \
+	$(NULL)
+
+DISABLED_TESTS = \
+	$(NULL)
diff --git a/test/subset/data/expected/basics/Roboto-Regular.abc.default.62.ttf b/test/subset/data/expected/basics/Roboto-Regular.abc.default.62.ttf
new file mode 100644
index 0000000..9d791f7
--- /dev/null
+++ b/test/subset/data/expected/basics/Roboto-Regular.abc.default.62.ttf
Binary files differ
diff --git a/test/subset/data/fonts/Roboto-Regular.abc.ttf b/test/subset/data/fonts/Roboto-Regular.abc.ttf
new file mode 100644
index 0000000..9d791f7
--- /dev/null
+++ b/test/subset/data/fonts/Roboto-Regular.abc.ttf
Binary files differ
diff --git a/test/subset/data/profiles/default.txt b/test/subset/data/profiles/default.txt
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/test/subset/data/profiles/default.txt
diff --git a/test/subset/data/tests/basics.tests b/test/subset/data/tests/basics.tests
new file mode 100644
index 0000000..8a7246b
--- /dev/null
+++ b/test/subset/data/tests/basics.tests
@@ -0,0 +1,8 @@
+FONTS:
+Roboto-Regular.abc.ttf
+
+PROFILES:
+default.txt
+
+SUBSETS:
+b
diff --git a/test/subset/generate-expected-outputs.py b/test/subset/generate-expected-outputs.py
new file mode 100755
index 0000000..f6636de
--- /dev/null
+++ b/test/subset/generate-expected-outputs.py
@@ -0,0 +1,41 @@
+#!/usr/bin/env python
+
+# Pre-generates the expected output subset files (via fonttools) for
+# specified subset test suite(s).
+
+import io
+import os
+import sys
+
+from subprocess import check_call
+from subset_test_suite import SubsetTestSuite
+
+
+def usage():
+	print "Usage: generate-expected-outputs.py <test suite file> ..."
+
+
+def generate_expected_output(input_file, unicodes, output_path):
+	check_call(["fonttools", "subset",
+							input_file,
+							"--unicodes=%s" % unicodes,
+							"--output-file=%s" % output_path])
+
+
+args = sys.argv[1:]
+if not args:
+	usage()
+
+for path in args:
+	with io.open(path, mode="r", encoding="utf-8") as f:
+		test_suite = SubsetTestSuite(path, f.read())
+		output_directory = test_suite.get_output_directory()
+
+		print "Generating output files for %s" % output_directory
+		for test in test_suite.tests():
+			unicodes = test.unicodes()
+			font_name = test.get_font_name()
+			print "Creating subset %s/%s" % (output_directory, font_name)
+			generate_expected_output(test.font_path, unicodes,
+															 os.path.join(output_directory,
+																						font_name))
diff --git a/test/subset/run-tests.py b/test/subset/run-tests.py
new file mode 100755
index 0000000..b005480
--- /dev/null
+++ b/test/subset/run-tests.py
@@ -0,0 +1,86 @@
+#!/usr/bin/env python
+
+# Runs a subsetting test suite. Compares the results of subsetting via harfbuz
+# to subsetting via fonttools.
+
+from __future__ import print_function
+
+import io
+import os
+import subprocess
+import sys
+import tempfile
+
+from subset_test_suite import SubsetTestSuite
+
+
+def cmd(command):
+	p = subprocess.Popen (
+		command, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+	p.wait ()
+	print (p.stderr.read (), end="") # file=sys.stderr
+	return p.stdout.read (), p.returncode
+
+def read_binary(file_path):
+	with open(file_path, 'rb') as f:
+		return f.read()
+
+def fail_test(test, cli_args, message):
+	print ('ERROR: %s' % message)
+	print ('Test State:')
+	print ('  test.font_path    %s' % os.path.abspath(test.font_path))
+	print ('  test.profile_path %s' % os.path.abspath(test.profile_path))
+	print ('  test.unicodes	    %s' % test.unicodes())
+	expected_file = os.path.join(test_suite.get_output_directory(),
+				     test.get_font_name())
+	print ('  expected_file	    %s' % os.path.abspath(expected_file))
+	return 1
+
+def run_test(test):
+	out_file = os.path.join(tempfile.mkdtemp(), test.get_font_name() + '-subset.ttf')
+	cli_args = [hb_subset,
+		    "--font-file=" + test.font_path,
+		    "--output-file=" + out_file,
+		    "--unicodes=%s" % test.unicodes()]
+	_, return_code = cmd(cli_args)
+
+	if return_code:
+		return fail_test(test, cli_args, "%s returned %d" % (' '.join(cli_args), return_code))
+
+	expected = read_binary(os.path.join(test_suite.get_output_directory(),
+					    test.get_font_name()))
+	actual = read_binary(out_file)
+
+	if len(actual) != len(expected):
+		return fail_test(test, cli_args, "expected %d bytes, actual %d: %s" % (
+				len(expected), len(actual), ' '.join(cli_args)))
+
+	if not actual == expected:
+		return fail_test(test, cli_args, 'files are the same length but not the same bytes')
+
+	return 0
+
+
+args = sys.argv[1:]
+if not args or sys.argv[1].find('hb-subset') == -1 or not os.path.exists (sys.argv[1]):
+	print ("First argument does not seem to point to usable hb-subset.")
+	sys.exit (1)
+hb_subset, args = args[0], args[1:]
+
+if not len(args):
+	print ("No tests supplied.")
+	sys.exit (1)
+
+fails = 0
+for path in args:
+	with io.open(path, mode="r", encoding="utf-8") as f:
+		print ("Running tests in " + path)
+		test_suite = SubsetTestSuite(path, f.read())
+		for test in test_suite.tests():
+			fails += run_test(test)
+
+if fails != 0:
+	print (str (fails) + " test(s) failed.")
+	sys.exit(1)
+else:
+	print ("All tests passed.")
diff --git a/test/subset/subset_test_suite.py b/test/subset/subset_test_suite.py
new file mode 100644
index 0000000..256e207
--- /dev/null
+++ b/test/subset/subset_test_suite.py
@@ -0,0 +1,82 @@
+#!/usr/bin/env python
+
+import os
+
+# A single test in a subset test suite. Identifies a font
+# a subsetting profile, and a subset to be cut.
+class Test:
+	def __init__(self, font_path, profile_path, subset):
+		self.font_path = font_path
+		self.profile_path = profile_path
+		self.subset = subset
+
+	def unicodes(self):
+		return ",".join("%X" % ord(c) for (i, c) in enumerate(self.subset))
+
+	def get_font_name(self):
+		font_base_name = os.path.basename(self.font_path)
+		font_base_name_parts = os.path.splitext(font_base_name)
+		profile_name = os.path.splitext(os.path.basename(self.profile_path))[0]
+
+		return "%s.%s.%s%s" % (font_base_name_parts[0],
+													 profile_name,
+													 self.unicodes(),
+													 font_base_name_parts[1])
+
+# A group of tests to perform on the subsetter. Each test
+# Identifies a font a subsetting profile, and a subset to be cut.
+class SubsetTestSuite:
+
+	def __init__(self, test_path, definition):
+		self.test_path = test_path
+		self.fonts = set()
+		self.profiles = set()
+		self.subsets = set()
+		self._parse(definition)
+
+	def get_output_directory(self):
+		test_name = os.path.splitext(os.path.basename(self.test_path))[0]
+		data_dir = os.path.join(os.path.dirname(self.test_path), "..")
+
+		output_dir = os.path.normpath(os.path.join(data_dir, "expected", test_name))
+		if not os.path.exists(output_dir):
+			os.mkdir(output_dir)
+		if not os.path.isdir(output_dir):
+			raise Error("%s is not a directory." % output_dir)
+
+		return output_dir
+
+	def tests(self):
+		for font in self.fonts:
+			font = os.path.join(self._base_path(), "fonts", font)
+			for profile in self.profiles:
+				profile = os.path.join(self._base_path(), "profiles", profile)
+				for subset in self.subsets:
+					yield Test(font, profile, subset)
+
+	def _base_path(self):
+		return os.path.dirname(os.path.dirname(self.test_path))
+
+	def _parse(self, definition):
+		destinations = {
+				"FONTS:": self.fonts,
+				"PROFILES:": self.profiles,
+				"SUBSETS:": self.subsets
+		}
+
+		current_destination = None
+		for line in definition.splitlines():
+			line = line.strip()
+
+			if line.startswith("#"):
+				continue
+
+			if not line:
+				continue
+
+			if line in destinations:
+				current_destination = destinations[line]
+			elif current_destination is not None:
+				current_destination.add(line)
+			else:
+				raise Exception("Failed to parse test suite file.")
diff --git a/util/Makefile.am b/util/Makefile.am
index e6620a2..cd5e31c 100644
--- a/util/Makefile.am
+++ b/util/Makefile.am
@@ -46,6 +46,9 @@
 hb_shape_SOURCES = $(HB_SHAPE_sources)
 bin_PROGRAMS += hb-shape
 
+hb_subset_SOURCES = $(HB_SUBSET_CLI_sources)
+bin_PROGRAMS += hb-subset
+
 if HAVE_OT
 hb_ot_shape_closure_SOURCES = $(HB_OT_SHAPE_CLOSURE_sources)
 bin_PROGRAMS += hb-ot-shape-closure
diff --git a/util/Makefile.sources b/util/Makefile.sources
index d6c00cc..6c815d2 100644
--- a/util/Makefile.sources
+++ b/util/Makefile.sources
@@ -28,3 +28,10 @@
 	options.hh \
 	main-font-text.hh \
 	$(NULL)
+
+HB_SUBSET_CLI_sources = \
+	hb-subset.cc \
+	options.cc \
+	options.hh \
+	main-font-text.hh \
+	$(NULL)
diff --git a/util/hb-subset.cc b/util/hb-subset.cc
new file mode 100644
index 0000000..808cb04
--- /dev/null
+++ b/util/hb-subset.cc
@@ -0,0 +1,110 @@
+/*
+ * Copyright © 2010  Behdad Esfahbod
+ * Copyright © 2011,2012  Google, Inc.
+ *
+ *  This is part of HarfBuzz, a text shaping library.
+ *
+ * Permission is hereby granted, without written agreement and without
+ * license or royalty fees, to use, copy, modify, and distribute this
+ * software and its documentation for any purpose, provided that the
+ * above copyright notice and the following two paragraphs appear in
+ * all copies of this software.
+ *
+ * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
+ * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
+ * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
+ * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
+ * DAMAGE.
+ *
+ * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
+ * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
+ * FITNESS FOR A PARTICULAR PURPOSE.  THE SOFTWARE PROVIDED HEREUNDER IS
+ * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
+ * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
+ *
+ * Google Author(s): Garret Rieger, Rod Sheeter
+ */
+
+#include <unistd.h>
+
+#include "main-font-text.hh"
+#include "hb-subset.h"
+
+/*
+ * Command line interface to the harfbuzz font subsetter.
+ */
+
+struct subset_consumer_t
+{
+  subset_consumer_t (option_parser_t *parser)
+      : failed (false), options(parser) {}
+
+  void init (hb_buffer_t  *buffer_,
+             const font_options_t *font_opts)
+  {
+    font = hb_font_reference (font_opts->get_font ());
+  }
+
+  void consume_line (const char   *text,
+                     unsigned int  text_len,
+                     const char   *text_before,
+                     const char   *text_after)
+  {
+  }
+
+  hb_bool_t 
+  write_file (const char *output_file, hb_blob_t *blob) {    
+    unsigned int data_length;
+    const char* data = hb_blob_get_data (blob, &data_length);
+
+    int fd_out = open(output_file, O_CREAT | O_WRONLY, S_IRWXU);
+    if (fd_out == -1) {
+      fprintf(stderr, "Unable to open output file");
+      return false;
+    }
+    ssize_t bytes_written = write(fd_out, data, data_length);
+    if (bytes_written == -1) {
+      fprintf(stderr, "Unable to write output file\n");
+      return false;
+    }
+    if (bytes_written != data_length) {
+      fprintf(stderr, "Expected %u bytes written, got %ld\n", data_length, 
+              bytes_written);
+      return false;
+    } 
+    return true;
+  }
+
+  void finish (const font_options_t *font_opts)
+  {
+    // TODO(Q1) check for errors from creates and such
+    hb_subset_profile_t *subset_profile = hb_subset_profile_create();
+    hb_subset_input_t *subset_input = hb_subset_input_create();
+    hb_face_t *face = hb_font_get_face (font);    
+    hb_subset_face_t *subset_face = hb_subset_face_create(face);
+
+    hb_blob_t *result = nullptr;
+    failed = !(hb_subset(subset_profile, subset_input, subset_face, &result)
+               && write_file(options.output_file, result));
+
+    hb_subset_profile_destroy (subset_profile);
+    hb_subset_input_destroy (subset_input);
+    hb_subset_face_destroy (subset_face);
+    hb_blob_destroy (result);
+    hb_font_destroy (font);
+  }
+
+  public:
+  bool failed;
+
+  private:
+  output_options_t options;
+  hb_font_t *font;
+};
+
+int
+main (int argc, char **argv)
+{
+  main_font_text_t<subset_consumer_t, 10, 0> driver;
+  return driver.main (argc, argv);
+}