Fixed headers and updated benchmark script.
diff --git a/benchmarks/compare.py b/benchmarks/compare.py
index 9824ca0..48f9704 100755
--- a/benchmarks/compare.py
+++ b/benchmarks/compare.py
@@ -27,17 +27,17 @@
 def Run(cmd):
   subprocess.check_call(cmd, shell=True)
 
-def Benchmark(outbase, bench_cpu=True, runs=12, new=False):
+def Benchmark(outbase, bench_cpu=True, runs=12, fasttable=False):
   tmpfile = "/tmp/bench-output.json"
   Run("rm -rf {}".format(tmpfile))
-  Run("CC=clang bazel test ...")
+  #Run("CC=clang bazel test ...")
+  if fasttable:
+    extra_args = " --//:fasttable_enabled=true"
+  else:
+    extra_args = ""
 
   if bench_cpu:
-    if new:
-      Run("CC=clang bazel build -c opt --copt=-march=native --//:fasttable_enabled=true benchmarks:benchmark")
-    else:
-      Run("CC=clang bazel build -c opt --copt=-march=native benchmarks:benchmark")
-
+    Run("CC=clang bazel build -c opt --copt=-march=native benchmarks:benchmark" + extra_args)
     Run("./bazel-bin/benchmarks/benchmark --benchmark_out_format=json --benchmark_out={} --benchmark_repetitions={}".format(tmpfile, runs))
     with open(tmpfile) as f:
       bench_json = json.load(f)
@@ -51,15 +51,13 @@
         values = (name, run["iterations"], run["cpu_time"])
         print("{} {} {} ns/op".format(*values), file=f)
 
-  if new:
-    Run("CC=clang bazel build -c opt --copt=-g --//:fasttable_enabled=true tests:conformance_upb")
-  else:
-    Run("CC=clang bazel build -c opt --copt=-g tests:conformance_upb")
+  Run("CC=clang bazel build -c opt --copt=-g tests:conformance_upb" + extra_args)
   Run("cp -f bazel-bin/tests/conformance_upb {}.bin".format(outbase))
 
 
 baseline = "master"
 bench_cpu = True
+fasttable = False
 
 if len(sys.argv) > 1:
   baseline = sys.argv[1]
@@ -69,11 +67,11 @@
     pass
 
 # Benchmark our current directory first, since it's more likely to be broken.
-Benchmark("/tmp/new", bench_cpu, new=True)
+Benchmark("/tmp/new", bench_cpu, fasttable=fasttable)
 
 # Benchmark the baseline.
 with GitWorktree(baseline):
-  Benchmark("/tmp/old", bench_cpu)
+  Benchmark("/tmp/old", bench_cpu, fasttable=fasttable)
 
 print()
 print()
diff --git a/upb/decode.c b/upb/decode.c
index 84bdf37..26a8d42 100644
--- a/upb/decode.c
+++ b/upb/decode.c
@@ -1,6 +1,7 @@
 
 #include "upb/decode.h"
 
+#include <setjmp.h>
 #include <string.h>
 
 #include "upb/decode.int.h"
diff --git a/upb/encode.c b/upb/encode.c
index 6d56544..6d36619 100644
--- a/upb/encode.c
+++ b/upb/encode.c
@@ -6,9 +6,11 @@
 #include <string.h>
 
 #include "upb/msg.h"
-#include "upb/port_def.inc"
 #include "upb/upb.h"
 
+/* Must be last. */
+#include "upb/port_def.inc"
+
 #define UPB_PB_VARINT_MAX_LEN 10
 
 UPB_NOINLINE
diff --git a/upb/json_decode.c b/upb/json_decode.c
index 1a18253..bb33744 100644
--- a/upb/json_decode.c
+++ b/upb/json_decode.c
@@ -6,6 +6,7 @@
 #include <inttypes.h>
 #include <limits.h>
 #include <math.h>
+#include <setjmp.h>
 #include <stdlib.h>
 #include <string.h>
 
diff --git a/upb/upb.h b/upb/upb.h
index c432f45..7e7309d 100644
--- a/upb/upb.h
+++ b/upb/upb.h
@@ -6,7 +6,6 @@
 #define UPB_H_
 
 #include <assert.h>
-#include <setjmp.h>
 #include <stdarg.h>
 #include <stdbool.h>
 #include <stddef.h>