Merge "Fix trace naming due to an overwrite permission issue with root permissions"
diff --git a/BUILD b/BUILD
index a349220..a101de8 100644
--- a/BUILD
+++ b/BUILD
@@ -367,11 +367,13 @@
         "include/perfetto/tracing/internal/data_source_internal.h",
         "include/perfetto/tracing/internal/tracing_muxer.h",
         "include/perfetto/tracing/internal/tracing_tls.h",
+        "include/perfetto/tracing/internal/track_event_data_source.h",
         "include/perfetto/tracing/locked_handle.h",
         "include/perfetto/tracing/platform.h",
         "include/perfetto/tracing/trace_writer_base.h",
         "include/perfetto/tracing/tracing.h",
         "include/perfetto/tracing/tracing_backend.h",
+        "include/perfetto/tracing/track_event.h",
     ],
 )
 
@@ -874,6 +876,24 @@
     ],
 )
 
+# GN target: //src/tracing:client_api
+filegroup(
+    name = "src_tracing_client_api",
+    srcs = [
+        "src/tracing/data_source.cc",
+        "src/tracing/internal/in_process_tracing_backend.cc",
+        "src/tracing/internal/in_process_tracing_backend.h",
+        "src/tracing/internal/system_tracing_backend.cc",
+        "src/tracing/internal/system_tracing_backend.h",
+        "src/tracing/internal/tracing_muxer_impl.cc",
+        "src/tracing/internal/tracing_muxer_impl.h",
+        "src/tracing/platform.cc",
+        "src/tracing/tracing.cc",
+        "src/tracing/track_event.cc",
+        "src/tracing/virtual_destructors.cc",
+    ],
+)
+
 # GN target: //src/tracing:common
 filegroup(
     name = "src_tracing_common",
@@ -910,6 +930,14 @@
     ],
 )
 
+# GN target: //src/tracing:platform_posix
+filegroup(
+    name = "src_tracing_platform_posix",
+    srcs = [
+        "src/tracing/platform_posix.cc",
+    ],
+)
+
 # GN target: //src/tracing:tracing
 filegroup(
     name = "src_tracing_tracing",
@@ -1269,6 +1297,14 @@
     ],
 )
 
+# GN target: //protos/perfetto/config:merged_config
+perfetto_cc_proto_library(
+    name = "protos_perfetto_config_merged_config",
+    deps = [
+        ":protos_perfetto_config_merged_config_protos",
+    ],
+)
+
 # GN target: //protos/perfetto/common:lite
 perfetto_cc_proto_library(
     name = "protos_perfetto_common_lite",
@@ -1277,6 +1313,17 @@
     ],
 )
 
+# GN target: //protos/perfetto/config:merged_config
+perfetto_proto_library(
+    name = "protos_perfetto_config_merged_config_protos",
+    srcs = [
+        "protos/perfetto/config/perfetto_config.proto",
+    ],
+    visibility = [
+        "//visibility:public",
+    ],
+)
+
 # GN target: //protos/perfetto/trace/ftrace:lite
 perfetto_cc_proto_library(
     name = "protos_perfetto_trace_ftrace_lite",
@@ -1686,6 +1733,9 @@
     srcs = [
         "protos/perfetto/trace/perfetto_trace.proto",
     ],
+    visibility = [
+        "//visibility:public",
+    ],
 )
 
 # GN target: //protos/perfetto/config/profiling:zero
@@ -1842,6 +1892,9 @@
     srcs = [
         "protos/perfetto/metrics/metrics.proto",
     ],
+    visibility = [
+        "//visibility:public",
+    ],
     deps = [
         ":protos_perfetto_metrics_android_protos",
     ],
@@ -1927,6 +1980,76 @@
 # Public targets
 # ##############################################################################
 
+# GN target: //:libperfetto_client_experimental
+perfetto_cc_library(
+    name = "libperfetto_client_experimental",
+    srcs = [
+        "include/perfetto/tracing.h",
+        ":src_base_base",
+        ":src_base_unix_socket",
+        ":src_ipc_ipc",
+        ":src_protozero_protozero",
+        ":src_tracing_client_api",
+        ":src_tracing_common",
+        ":src_tracing_ipc",
+        ":src_tracing_platform_posix",
+        ":src_tracing_tracing",
+    ],
+    hdrs = [
+        ":include_perfetto_base_base",
+        ":include_perfetto_ext_base_base",
+        ":include_perfetto_ext_ipc_ipc",
+        ":include_perfetto_ext_tracing_core_core",
+        ":include_perfetto_ext_tracing_ipc_ipc",
+        ":include_perfetto_protozero_protozero",
+        ":include_perfetto_tracing_core_core",
+        ":include_perfetto_tracing_tracing",
+    ],
+    visibility = [
+        "//visibility:public",
+    ],
+    deps = [
+        ":protos_perfetto_common_lite",
+        ":protos_perfetto_common_zero",
+        ":protos_perfetto_config_android_lite",
+        ":protos_perfetto_config_android_zero",
+        ":protos_perfetto_config_ftrace_lite",
+        ":protos_perfetto_config_ftrace_zero",
+        ":protos_perfetto_config_gpu_lite",
+        ":protos_perfetto_config_gpu_zero",
+        ":protos_perfetto_config_inode_file_lite",
+        ":protos_perfetto_config_inode_file_zero",
+        ":protos_perfetto_config_lite",
+        ":protos_perfetto_config_power_lite",
+        ":protos_perfetto_config_power_zero",
+        ":protos_perfetto_config_process_stats_lite",
+        ":protos_perfetto_config_process_stats_zero",
+        ":protos_perfetto_config_profiling_lite",
+        ":protos_perfetto_config_profiling_zero",
+        ":protos_perfetto_config_sys_stats_lite",
+        ":protos_perfetto_config_sys_stats_zero",
+        ":protos_perfetto_config_zero",
+        ":protos_perfetto_ipc_ipc",
+        ":protos_perfetto_ipc_wire_protocol",
+        ":protos_perfetto_trace_android_zero",
+        ":protos_perfetto_trace_chrome_zero",
+        ":protos_perfetto_trace_filesystem_zero",
+        ":protos_perfetto_trace_ftrace_zero",
+        ":protos_perfetto_trace_gpu_zero",
+        ":protos_perfetto_trace_interned_data_zero",
+        ":protos_perfetto_trace_minimal_lite",
+        ":protos_perfetto_trace_minimal_zero",
+        ":protos_perfetto_trace_non_minimal_zero",
+        ":protos_perfetto_trace_perfetto_zero",
+        ":protos_perfetto_trace_power_zero",
+        ":protos_perfetto_trace_profiling_zero",
+        ":protos_perfetto_trace_ps_zero",
+        ":protos_perfetto_trace_sys_stats_zero",
+        ":protos_perfetto_trace_track_event_zero",
+        ":protos_perfetto_trace_trusted_lite",
+    ] + PERFETTO_CONFIG.deps.protobuf_lite,
+)
+
 # GN target: //src/perfetto_cmd:perfetto
 perfetto_cc_binary(
     name = "perfetto",
diff --git a/bazel/BUILD b/bazel/BUILD
index 069748d..1bf33a4 100644
--- a/bazel/BUILD
+++ b/bazel/BUILD
@@ -17,3 +17,16 @@
     values = {"cpu": "darwin"},
     visibility = ["//visibility:public"],
 )
+
+config_setting(
+    name = "os_linux",
+    values = {"cpu": "k8"},
+    visibility = ["//visibility:public"],
+)
+
+# Note this config does not imply MSVC.
+config_setting(
+    name = "os_windows",
+    values = {"cpu": "x64_windows"},
+    visibility = ["//visibility:public"],
+)
diff --git a/bazel/jsoncpp.BUILD b/bazel/jsoncpp.BUILD
index 8a33eb8..bef06a1 100644
--- a/bazel/jsoncpp.BUILD
+++ b/bazel/jsoncpp.BUILD
@@ -12,6 +12,8 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+load("@perfetto_cfg//:perfetto_cfg.bzl", "PERFETTO_CONFIG")
+
 cc_library(
     name = "jsoncpp",
     srcs = [
@@ -38,7 +40,7 @@
     ],
     copts = [
         "-Isrc/lib_json",
-    ],
+    ] + PERFETTO_CONFIG.deps_copts.jsoncpp,
     defines = [
         "JSON_USE_EXCEPTION=0",
     ],
diff --git a/bazel/linenoise.BUILD b/bazel/linenoise.BUILD
index d2b6053..050e7c5 100644
--- a/bazel/linenoise.BUILD
+++ b/bazel/linenoise.BUILD
@@ -12,6 +12,8 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+load("@perfetto_cfg//:perfetto_cfg.bzl", "PERFETTO_CONFIG")
+
 cc_library(
     name = "linenoise",
     srcs = [
@@ -23,5 +25,6 @@
     includes = [
         ".",
     ],
+    copts = PERFETTO_CONFIG.deps_copts.linenoise,
     visibility = ["//visibility:public"],
 )
diff --git a/bazel/rules.bzl b/bazel/rules.bzl
index 87c48c7..8713984 100644
--- a/bazel/rules.bzl
+++ b/bazel/rules.bzl
@@ -21,12 +21,14 @@
 
 def default_cc_args():
     return {
-        "deps": [PERFETTO_CONFIG.root + ":build_config_hdr"],
+        "deps": PERFETTO_CONFIG.deps.build_config,
         "copts": [],
         "includes": ["include"],
         "linkopts": select({
+            "@perfetto//bazel:os_linux": ["-ldl", "-lrt"],
             "@perfetto//bazel:os_osx": [],
-            "//conditions:default": ["-ldl", "-lrt"],
+            "@perfetto//bazel:os_windows": [],
+            "//conditions:default": ["-ldl"],
         }),
     }
 
diff --git a/bazel/sqlite.BUILD b/bazel/sqlite.BUILD
index e028f1f1..8b02c6d 100644
--- a/bazel/sqlite.BUILD
+++ b/bazel/sqlite.BUILD
@@ -49,7 +49,7 @@
     "-DSQLITE_TEMP_STORE=3",
     "-DSQLITE_OMIT_LOAD_EXTENSION",
     "-DSQLITE_OMIT_RANDOMNESS",
-]
+] + PERFETTO_CONFIG.deps_copts.sqlite
 
 cc_library(
     name = "sqlite",
diff --git a/bazel/standalone/perfetto_cfg.bzl b/bazel/standalone/perfetto_cfg.bzl
index e89c7a0..1ee8e52 100644
--- a/bazel/standalone/perfetto_cfg.bzl
+++ b/bazel/standalone/perfetto_cfg.bzl
@@ -28,6 +28,12 @@
     # to allow perfetto embedders (e.g. gapid) and google internal builds to
     # override paths and target names to their own third_party.
     deps = struct(
+        # Target exposing the build config header. It should be a valid
+        # cc_library dependency as it will become a dependency of every
+        # perfetto_cc_library target. It needs to expose a
+        # "perfetto_build_flags.h" file that can be included via:
+        # #include "perfetto_build_flags.h".
+        build_config = ["//:build_config_hdr"],
         zlib = ["@perfetto_dep_zlib//:zlib"],
         jsoncpp = ["@perfetto_dep_jsoncpp//:jsoncpp"],
         linenoise = ["@perfetto_dep_linenoise//:linenoise"],
@@ -39,6 +45,16 @@
         protobuf_full = ["@com_google_protobuf//:protobuf"],
     ),
 
+    # This struct allows embedders to customize the cc_opts for Perfetto
+    # 3rd party dependencies. They only have an effect if the dependencies are
+    # initialized with the Perfetto build files (i.e. via perfetto_deps()).
+    deps_copts = struct(
+        zlib = [],
+        jsoncpp = [],
+        linenoise = [],
+        sqlite = [],
+    ),
+
     # This struct allows the embedder to customize copts and other args passed
     # to rules like cc_binary. Prefixed rules (e.g. perfetto_cc_binary) will
     # look into this struct before falling back on native.cc_binary().
diff --git a/bazel/zlib.BUILD b/bazel/zlib.BUILD
index 1557352..5c723ce 100644
--- a/bazel/zlib.BUILD
+++ b/bazel/zlib.BUILD
@@ -12,6 +12,8 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+load("@perfetto_cfg//:perfetto_cfg.bzl", "PERFETTO_CONFIG")
+
 cc_library(
     name = "zlib",
     srcs = [
@@ -48,7 +50,7 @@
     copts = [
         "-DHAVE_HIDDEN",
         "-Isrc",
-    ],
+    ] + PERFETTO_CONFIG.deps_copts.zlib,
     includes = ["zlib"],
     visibility = ["//visibility:public"],
 )
diff --git a/include/perfetto/ext/base/scoped_file.h b/include/perfetto/ext/base/scoped_file.h
index 2fee6a1..24c8970 100644
--- a/include/perfetto/ext/base/scoped_file.h
+++ b/include/perfetto/ext/base/scoped_file.h
@@ -88,7 +88,8 @@
                                   mode_t mode = kInvalidMode) {
   PERFETTO_DCHECK((flags & O_CREAT) == 0 || mode != kInvalidMode);
 #if PERFETTO_BUILDFLAG(PERFETTO_OS_WIN)
-  ScopedFile fd(open(path.c_str(), flags, mode));
+  // Always use O_BINARY on Windows, to avoid silly EOL translations.
+  ScopedFile fd(open(path.c_str(), flags | O_BINARY, mode));
 #else
   // Always open a ScopedFile with O_CLOEXEC so we can safely fork and exec.
   ScopedFile fd(open(path.c_str(), flags | O_CLOEXEC, mode));
diff --git a/include/perfetto/ext/base/string_utils.h b/include/perfetto/ext/base/string_utils.h
index 977673f..df1ed7c 100644
--- a/include/perfetto/ext/base/string_utils.h
+++ b/include/perfetto/ext/base/string_utils.h
@@ -20,6 +20,8 @@
 #include <string>
 #include <vector>
 
+#include "perfetto/ext/base/string_view.h"
+
 namespace perfetto {
 namespace base {
 
@@ -34,6 +36,7 @@
 bool StartsWith(const std::string& str, const std::string& prefix);
 bool EndsWith(const std::string& str, const std::string& suffix);
 bool Contains(const std::string& haystack, const std::string& needle);
+size_t Find(const StringView& needle, const StringView& haystack);
 bool CaseInsensitiveEqual(const std::string& first, const std::string& second);
 std::string Join(const std::vector<std::string>& parts,
                  const std::string& delim);
diff --git a/include/perfetto/profiling/BUILD.gn b/include/perfetto/profiling/BUILD.gn
index a897783..23e19c9 100644
--- a/include/perfetto/profiling/BUILD.gn
+++ b/include/perfetto/profiling/BUILD.gn
@@ -21,3 +21,9 @@
     "symbolizer.h",
   ]
 }
+
+source_set("normalize") {
+  sources = [
+    "normalize.h",
+  ]
+}
diff --git a/src/profiling/memory/ext.h b/include/perfetto/profiling/normalize.h
similarity index 91%
rename from src/profiling/memory/ext.h
rename to include/perfetto/profiling/normalize.h
index b353e98..7b7da1d 100644
--- a/src/profiling/memory/ext.h
+++ b/include/perfetto/profiling/normalize.h
@@ -14,8 +14,8 @@
  * limitations under the License.
  */
 
-#ifndef SRC_PROFILING_MEMORY_EXT_H_
-#define SRC_PROFILING_MEMORY_EXT_H_
+#ifndef INCLUDE_PERFETTO_PROFILING_NORMALIZE_H_
+#define INCLUDE_PERFETTO_PROFILING_NORMALIZE_H_
 
 // Header only code that gets used in other projects.
 // This is currently used in
@@ -63,4 +63,4 @@
 }  // namespace profiling
 }  // namespace perfetto
 
-#endif  // SRC_PROFILING_MEMORY_EXT_H_
+#endif  // INCLUDE_PERFETTO_PROFILING_NORMALIZE_H_
diff --git a/include/perfetto/profiling/pprof_builder.h b/include/perfetto/profiling/pprof_builder.h
index db6f57b..2ec9515 100644
--- a/include/perfetto/profiling/pprof_builder.h
+++ b/include/perfetto/profiling/pprof_builder.h
@@ -21,6 +21,8 @@
 #include <string>
 #include <vector>
 
+#include "perfetto/trace_processor/trace_processor.h"
+
 // TODO(135923303): do not depend on anything in this file as it will be
 // changed heavily as part of fixing b/135923303.
 namespace perfetto {
@@ -33,11 +35,22 @@
   std::string serialized;
 };
 
+bool TraceToPprof(trace_processor::TraceProcessor*,
+                  std::vector<SerializedProfile>* output,
+                  Symbolizer* symbolizer,
+                  uint64_t pid = 0,
+                  const std::vector<uint64_t>& timestamps = {});
+
 bool TraceToPprof(std::istream* input,
                   std::vector<SerializedProfile>* output,
-                  Symbolizer* symbolizer);
+                  Symbolizer* symbolizer,
+                  uint64_t pid = 0,
+                  const std::vector<uint64_t>& timestamps = {});
 
-bool TraceToPprof(std::istream* input, std::vector<SerializedProfile>* output);
+bool TraceToPprof(std::istream* input,
+                  std::vector<SerializedProfile>* output,
+                  uint64_t pid = 0,
+                  const std::vector<uint64_t>& timestamps = {});
 
 }  // namespace trace_to_text
 }  // namespace perfetto
diff --git a/include/perfetto/trace_processor/basic_types.h b/include/perfetto/trace_processor/basic_types.h
index 8bd946d..4c0f951 100644
--- a/include/perfetto/trace_processor/basic_types.h
+++ b/include/perfetto/trace_processor/basic_types.h
@@ -66,8 +66,8 @@
 
   int Compare(const SqlValue& value) const {
     // TODO(lalitm): this is almost the same as what SQLite does with the
-    // exception of comparisons between long and double - we choose (for
-    // performance reasons) to omit comparisons between them.
+    // exception of comparisions between long and double - we choose (for
+    // performance reasons) to omit comparisions between them.
     if (type != value.type)
       return type - value.type;
 
@@ -75,9 +75,11 @@
       case Type::kNull:
         return 0;
       case Type::kLong:
-        return signbit(long_value - value.long_value);
-      case Type::kDouble:
-        return signbit(double_value - value.double_value);
+        return static_cast<int>(long_value - value.long_value);
+      case Type::kDouble: {
+        double diff = double_value - value.double_value;
+        return diff < 0 ? -1 : (diff > 0 ? 1 : 0);
+      }
       case Type::kString:
         return strcmp(string_value, value.string_value);
       case Type::kBytes: {
@@ -85,7 +87,7 @@
         int ret = memcmp(bytes_value, value.bytes_value, bytes);
         if (ret != 0)
           return ret;
-        return signbit(bytes_count - value.bytes_count);
+        return static_cast<int>(bytes_count - value.bytes_count);
       }
     }
     PERFETTO_FATAL("For GCC");
diff --git a/include/perfetto/tracing/BUILD.gn b/include/perfetto/tracing/BUILD.gn
index 2385d90..776876c 100644
--- a/include/perfetto/tracing/BUILD.gn
+++ b/include/perfetto/tracing/BUILD.gn
@@ -16,6 +16,7 @@
   public_deps = [
     "../../../gn:default_deps",
     "../../../protos/perfetto/trace:zero",
+    "../../../protos/perfetto/trace/track_event:zero",
     "../base",
     "../protozero",
   ]
@@ -26,10 +27,12 @@
     "internal/data_source_internal.h",
     "internal/tracing_muxer.h",
     "internal/tracing_tls.h",
+    "internal/track_event_data_source.h",
     "locked_handle.h",
     "platform.h",
     "trace_writer_base.h",
     "tracing.h",
     "tracing_backend.h",
+    "track_event.h",
   ]
 }
diff --git a/src/base/event_fd.cc b/src/base/event_fd.cc
index b8f75bf..976db19 100644
--- a/src/base/event_fd.cc
+++ b/src/base/event_fd.cc
@@ -14,6 +14,9 @@
  * limitations under the License.
  */
 
+#include "perfetto/base/build_config.h"
+#if !PERFETTO_BUILDFLAG(PERFETTO_OS_WIN)
+
 #include <stdint.h>
 #include <unistd.h>
 
@@ -74,3 +77,5 @@
 
 }  // namespace base
 }  // namespace perfetto
+
+#endif  // !PERFETTO_BUILDFLAG(PERFETTO_OS_WIN)
diff --git a/src/base/file_utils.cc b/src/base/file_utils.cc
index 74c985a..b8020ad 100644
--- a/src/base/file_utils.cc
+++ b/src/base/file_utils.cc
@@ -22,7 +22,8 @@
 #include "perfetto/ext/base/scoped_file.h"
 #include "perfetto/ext/base/utils.h"
 
-#if !PERFETTO_BUILDFLAG(PERFETTO_OS_WIN)
+#if !PERFETTO_BUILDFLAG(PERFETTO_OS_WIN) || \
+    PERFETTO_BUILDFLAG(PERFETTO_COMPILER_GCC)
 #include <unistd.h>
 #else
 #include <corecrt_io.h>
diff --git a/src/base/pipe.cc b/src/base/pipe.cc
index 5c9c3d7..c61492f 100644
--- a/src/base/pipe.cc
+++ b/src/base/pipe.cc
@@ -14,6 +14,9 @@
  * limitations under the License.
  */
 
+#include "perfetto/base/build_config.h"
+#if !PERFETTO_BUILDFLAG(PERFETTO_OS_WIN)
+
 #include "perfetto/ext/base/pipe.h"
 
 #include <sys/types.h>
@@ -54,3 +57,5 @@
 
 }  // namespace base
 }  // namespace perfetto
+
+#endif  // !PERFETTO_BUILDFLAG(PERFETTO_OS_WIN)
diff --git a/src/base/string_utils.cc b/src/base/string_utils.cc
index c042290..5dc1bc8 100644
--- a/src/base/string_utils.cc
+++ b/src/base/string_utils.cc
@@ -39,6 +39,18 @@
   return haystack.find(needle) != std::string::npos;
 }
 
+size_t Find(const StringView& needle, const StringView& haystack) {
+  if (needle.size() == 0)
+    return 0;
+  if (needle.size() > haystack.size())
+    return std::string::npos;
+  for (size_t i = 0; i < haystack.size() - (needle.size() - 1); ++i) {
+    if (strncmp(haystack.data() + i, needle.data(), needle.size()) == 0)
+      return i;
+  }
+  return std::string::npos;
+}
+
 bool CaseInsensitiveEqual(const std::string& first, const std::string& second) {
   return first.size() == second.size() &&
          std::equal(
diff --git a/src/base/string_utils_unittest.cc b/src/base/string_utils_unittest.cc
index c14415c..c38f746 100644
--- a/src/base/string_utils_unittest.cc
+++ b/src/base/string_utils_unittest.cc
@@ -108,6 +108,38 @@
   EXPECT_EQ(StripChars("foobar", "froab", '_'), "______");
 }
 
+TEST(StringUtilsTest, Contains) {
+  EXPECT_TRUE(Contains("", ""));
+  EXPECT_TRUE(Contains("abc", ""));
+  EXPECT_TRUE(Contains("abc", "a"));
+  EXPECT_TRUE(Contains("abc", "b"));
+  EXPECT_TRUE(Contains("abc", "c"));
+  EXPECT_TRUE(Contains("abc", "ab"));
+  EXPECT_TRUE(Contains("abc", "bc"));
+  EXPECT_TRUE(Contains("abc", "abc"));
+  EXPECT_FALSE(Contains("abc", "d"));
+  EXPECT_FALSE(Contains("abc", "ac"));
+  EXPECT_FALSE(Contains("abc", "abcd"));
+  EXPECT_FALSE(Contains("", "a"));
+  EXPECT_FALSE(Contains("", "abc"));
+}
+
+TEST(StringUtilsTest, Find) {
+  EXPECT_EQ(Find("", ""), 0u);
+  EXPECT_EQ(Find("", "abc"), 0u);
+  EXPECT_EQ(Find("a", "abc"), 0u);
+  EXPECT_EQ(Find("b", "abc"), 1u);
+  EXPECT_EQ(Find("c", "abc"), 2u);
+  EXPECT_EQ(Find("ab", "abc"), 0u);
+  EXPECT_EQ(Find("bc", "abc"), 1u);
+  EXPECT_EQ(Find("abc", "abc"), 0u);
+  EXPECT_EQ(Find("d", "abc"), std::string::npos);
+  EXPECT_EQ(Find("ac", "abc"), std::string::npos);
+  EXPECT_EQ(Find("abcd", "abc"), std::string::npos);
+  EXPECT_EQ(Find("a", ""), std::string::npos);
+  EXPECT_EQ(Find("abc", ""), std::string::npos);
+}
+
 }  // namespace
 }  // namespace base
 }  // namespace perfetto
diff --git a/src/base/temp_file.cc b/src/base/temp_file.cc
index d57d383..d1e77e9 100644
--- a/src/base/temp_file.cc
+++ b/src/base/temp_file.cc
@@ -14,6 +14,9 @@
  * limitations under the License.
  */
 
+#include "perfetto/base/build_config.h"
+#if !PERFETTO_BUILDFLAG(PERFETTO_OS_WIN)
+
 #include "perfetto/ext/base/temp_file.h"
 
 #include <stdlib.h>
@@ -90,3 +93,6 @@
 
 }  // namespace base
 }  // namespace perfetto
+
+
+#endif  // !PERFETTO_BUILDFLAG(PERFETTO_OS_WIN)
diff --git a/src/base/thread_task_runner.cc b/src/base/thread_task_runner.cc
index d0a1036..0576ee9 100644
--- a/src/base/thread_task_runner.cc
+++ b/src/base/thread_task_runner.cc
@@ -14,6 +14,9 @@
  * limitations under the License.
  */
 
+#include "perfetto/base/build_config.h"
+#if !PERFETTO_BUILDFLAG(PERFETTO_OS_WIN)
+
 #include "perfetto/ext/base/thread_task_runner.h"
 
 #include <condition_variable>
@@ -79,3 +82,5 @@
 
 }  // namespace base
 }  // namespace perfetto
+
+#endif  // !PERFETTO_BUILDFLAG(PERFETTO_OS_WIN)
diff --git a/src/base/unix_task_runner.cc b/src/base/unix_task_runner.cc
index 6ef71aa..422947f 100644
--- a/src/base/unix_task_runner.cc
+++ b/src/base/unix_task_runner.cc
@@ -14,9 +14,10 @@
  * limitations under the License.
  */
 
-#include "perfetto/ext/base/unix_task_runner.h"
-
 #include "perfetto/base/build_config.h"
+#if !PERFETTO_BUILDFLAG(PERFETTO_OS_WIN)
+
+#include "perfetto/ext/base/unix_task_runner.h"
 
 #include <errno.h>
 #include <stdlib.h>
@@ -229,3 +230,5 @@
 
 }  // namespace base
 }  // namespace perfetto
+
+#endif  // !PERFETTO_BUILDFLAG(PERFETTO_OS_WIN)
diff --git a/src/profiling/memory/BUILD.gn b/src/profiling/memory/BUILD.gn
index a384fe8..10af659 100644
--- a/src/profiling/memory/BUILD.gn
+++ b/src/profiling/memory/BUILD.gn
@@ -93,6 +93,7 @@
 source_set("proc_utils") {
   deps = [
     "../../../gn:default_deps",
+    "../../../include/perfetto/profiling:normalize",
     "../../base",
   ]
   sources = [
@@ -212,6 +213,7 @@
     "../../../gn:default_deps",
     "../../../gn:gtest_and_gmock",
     "../../../gn:libunwindstack",
+    "../../../include/perfetto/profiling:normalize",
     "../../base",
     "../../base:test_support",
     "../../tracing",
diff --git a/src/profiling/memory/proc_utils.cc b/src/profiling/memory/proc_utils.cc
index 07f85cd..6d0b6f3 100644
--- a/src/profiling/memory/proc_utils.cc
+++ b/src/profiling/memory/proc_utils.cc
@@ -21,7 +21,7 @@
 #include <unistd.h>
 
 #include "perfetto/ext/base/file_utils.h"
-#include "src/profiling/memory/ext.h"
+#include "perfetto/profiling/normalize.h"
 
 namespace perfetto {
 namespace profiling {
diff --git a/src/profiling/memory/proc_utils_unittest.cc b/src/profiling/memory/proc_utils_unittest.cc
index b1881bb..6c0065c 100644
--- a/src/profiling/memory/proc_utils_unittest.cc
+++ b/src/profiling/memory/proc_utils_unittest.cc
@@ -15,7 +15,7 @@
  */
 
 #include "src/profiling/memory/proc_utils.h"
-#include "src/profiling/memory/ext.h"
+#include "perfetto/profiling/normalize.h"
 
 #include "perfetto/ext/base/utils.h"
 #include "test/gtest_and_gmock.h"
diff --git a/src/trace_processor/db/bit_vector.cc b/src/trace_processor/db/bit_vector.cc
index 5e2db89..edf1af6 100644
--- a/src/trace_processor/db/bit_vector.cc
+++ b/src/trace_processor/db/bit_vector.cc
@@ -19,12 +19,19 @@
 namespace perfetto {
 namespace trace_processor {
 
-BitVector::BitVector(uint32_t count, bool value) : inner_(count, value) {}
+BitVector::BitVector() = default;
 
-BitVector::BitVector(std::vector<bool> inner) : inner_(std::move(inner)) {}
+BitVector::BitVector(uint32_t count, bool value) {
+  Resize(count, value);
+}
+
+BitVector::BitVector(std::vector<Block> blocks,
+                     std::vector<uint32_t> counts,
+                     uint32_t size)
+    : size_(size), counts_(std::move(counts)), blocks_(std::move(blocks)) {}
 
 BitVector BitVector::Copy() const {
-  return BitVector(inner_);
+  return BitVector(blocks_, counts_, size_);
 }
 
 }  // namespace trace_processor
diff --git a/src/trace_processor/db/bit_vector.h b/src/trace_processor/db/bit_vector.h
index 4bddd75..bd85b76 100644
--- a/src/trace_processor/db/bit_vector.h
+++ b/src/trace_processor/db/bit_vector.h
@@ -18,8 +18,10 @@
 #define SRC_TRACE_PROCESSOR_DB_BIT_VECTOR_H_
 
 #include <stdint.h>
+#include <stdio.h>
 
 #include <algorithm>
+#include <array>
 #include <vector>
 
 #include "perfetto/base/logging.h"
@@ -29,17 +31,15 @@
 
 // A bitvector which compactly stores a vector of bools using a single bit
 // for each bool.
-// TODO(lalitm): currently this is just a thin wrapper around std::vector<bool>
-// but in the future, we plan to add quite a few optimizations around ranges
-// of set bits.
 class BitVector {
  public:
   // Creates an empty bitvector.
-  BitVector() = default;
+  BitVector();
 
   // Creates a bitvector of |count| size filled with |value|.
   BitVector(uint32_t count, bool value = false);
 
+  // Enable moving bitvectors as they have no unmovable state.
   BitVector(BitVector&&) noexcept = default;
   BitVector& operator=(BitVector&&) = default;
 
@@ -47,21 +47,14 @@
   BitVector Copy() const;
 
   // Returns the size of the bitvector.
-  uint32_t size() const { return static_cast<uint32_t>(inner_.size()); }
+  uint32_t size() const { return static_cast<uint32_t>(size_); }
 
   // Returns whether the bit at |idx| is set.
   bool IsSet(uint32_t idx) const {
     PERFETTO_DCHECK(idx < size());
-    return inner_[idx];
-  }
 
-  // Returns the index of the next set bit at or after index |idx|.
-  // If there is no other set bits, returns |size()|.
-  uint32_t NextSet(uint32_t idx) const {
-    PERFETTO_DCHECK(idx <= inner_.size());
-    auto it = std::find(inner_.begin() + static_cast<ptrdiff_t>(idx),
-                        inner_.end(), true);
-    return static_cast<uint32_t>(std::distance(inner_.begin(), it));
+    Address a = IndexToAddress(idx);
+    return blocks_[a.block_idx].IsSet(a.block_offset);
   }
 
   // Returns the number of set bits in the bitvector.
@@ -70,35 +63,184 @@
   // Returns the number of set bits between the start of the bitvector
   // (inclusive) and the index |end| (exclusive).
   uint32_t GetNumBitsSet(uint32_t end) const {
-    return static_cast<uint32_t>(std::count(
-        inner_.begin(), inner_.begin() + static_cast<ptrdiff_t>(end), true));
+    if (end == 0)
+      return 0;
+
+    // Although the external interface we present uses an exclusive |end|,
+    // internally it's a lot nicer to work with an inclusive |end| (mainly
+    // because we get block rollovers on exclusive ends which means we need
+    // to have if checks to ensure we don't overflow the number of blocks).
+    Address addr = IndexToAddress(end - 1);
+    uint32_t idx = addr.block_idx;
+
+    // Add the number of set bits until the start of the block to the number
+    // of set bits until the end address inside the block.
+    return counts_[idx] + blocks_[idx].GetNumBitsSet(addr.block_offset);
   }
 
-  // Returns the index of the |n|'th set bit.
+  // Returns the index of the |n|th set bit. Should only be called with |n| <
+  // GetNumBitsSet().
   uint32_t IndexOfNthSet(uint32_t n) const {
-    // TODO(lalitm): improve the performance of this method by investigating
-    // AVX instructions.
-    uint32_t offset = 0;
-    for (uint32_t i = NextSet(0); i < size(); i = NextSet(i + 1), ++offset) {
-      if (offset == n)
-        return i;
+    PERFETTO_DCHECK(n < GetNumBitsSet());
+
+    // First search for the block which, up until the start of it, has more than
+    // n bits set. Note that this should never return |counts.begin()| as
+    // that should always be 0.
+    // TODO(lalitm): investigate whether we can make this faster with small
+    // binary search followed by a linear search instead of binary searching the
+    // full way.
+    auto it = std::upper_bound(counts_.begin(), counts_.end(), n);
+    PERFETTO_DCHECK(it != counts_.begin());
+
+    // Go back one block to find the block which has the bit we are looking for.
+    uint16_t block_idx =
+        static_cast<uint16_t>(std::distance(counts_.begin(), it) - 1);
+
+    // Figure out how many set bits forward we are looking inside the block
+    // by taking away the number of bits at the start of the block from n.
+    uint32_t set_in_block = n - counts_[block_idx];
+
+    // Compute the address of the bit in the block then convert the full
+    // address back to an index.
+    BlockOffset block_offset = blocks_[block_idx].IndexOfNthSet(set_in_block);
+    return AddressToIndex(Address{block_idx, block_offset});
+  }
+
+  // Sets the bit at index |idx| to true.
+  void Set(uint32_t idx) {
+    // Set the bit to the correct value inside the block but store the old
+    // bit to help fix the counts.
+    auto addr = IndexToAddress(idx);
+    bool old_value = blocks_[addr.block_idx].IsSet(addr.block_offset);
+
+    // If the old value was unset, set the bit and add one to the count.
+    if (PERFETTO_LIKELY(!old_value)) {
+      blocks_[addr.block_idx].Set(addr.block_offset);
+
+      uint32_t size = static_cast<uint32_t>(counts_.size());
+      for (uint32_t i = addr.block_idx + 1; i < size; ++i) {
+        counts_[i]++;
+      }
     }
-    PERFETTO_FATAL("Index out of bounds");
   }
 
-  // Sets the value at index |idx| to |value|.
-  void Set(uint32_t idx, bool value) {
-    PERFETTO_DCHECK(idx < size());
-    inner_[idx] = value;
+  // Sets the bit at index |idx| to false.
+  void Clear(uint32_t idx) {
+    // Set the bit to the correct value inside the block but store the old
+    // bit to help fix the counts.
+    auto addr = IndexToAddress(idx);
+    bool old_value = blocks_[addr.block_idx].IsSet(addr.block_offset);
+
+    // If the old value was set, clear the bit and subtract one from all the
+    // counts.
+    if (PERFETTO_LIKELY(old_value)) {
+      blocks_[addr.block_idx].Clear(addr.block_offset);
+
+      uint32_t size = static_cast<uint32_t>(counts_.size());
+      for (uint32_t i = addr.block_idx + 1; i < size; ++i) {
+        counts_[i]--;
+      }
+    }
   }
 
-  // Appends |value| to the bitvector.
-  void Append(bool value) { inner_.push_back(value); }
+  // Appends true to the bitvector.
+  void AppendTrue() {
+    Address addr = IndexToAddress(size_);
+    uint32_t old_blocks_size = static_cast<uint32_t>(blocks_.size());
+    uint32_t new_blocks_size = addr.block_idx + 1;
+
+    if (PERFETTO_UNLIKELY(new_blocks_size > old_blocks_size)) {
+      uint32_t t = GetNumBitsSet();
+      blocks_.emplace_back();
+      counts_.emplace_back(t);
+    }
+
+    size_++;
+    blocks_[addr.block_idx].Set(addr.block_offset);
+  }
+
+  // Appends false to the bitvector.
+  void AppendFalse() {
+    Address addr = IndexToAddress(size_);
+    uint32_t old_blocks_size = static_cast<uint32_t>(blocks_.size());
+    uint32_t new_blocks_size = addr.block_idx + 1;
+
+    if (PERFETTO_UNLIKELY(new_blocks_size > old_blocks_size)) {
+      uint32_t t = GetNumBitsSet();
+      blocks_.emplace_back();
+      counts_.emplace_back(t);
+    }
+
+    size_++;
+    // We don't need to clear the bit as we ensure that anything after
+    // size_ is always set to false.
+  }
 
   // Resizes the BitVector to the given |size|.
   // Truncates the BitVector if |size| < |size()| or fills the new space with
-  // |value| if |size| > |size()|.
-  void Resize(uint32_t size, bool value = false) { inner_.resize(size, value); }
+  // |value| if |size| > |size()|. Calling this method is a noop if |size| ==
+  // |size()|.
+  void Resize(uint32_t size, bool value = false) {
+    uint32_t old_size = size_;
+    if (size == old_size)
+      return;
+
+    // Empty bitvectors should be memory efficient so we don't keep any data
+    // around in the bitvector.
+    if (size == 0) {
+      blocks_.clear();
+      counts_.clear();
+      size_ = 0;
+      return;
+    }
+
+    // Compute the address of the new last bit in the bitvector.
+    Address last_addr = IndexToAddress(size - 1);
+    uint32_t old_blocks_size = static_cast<uint32_t>(counts_.size());
+    uint32_t new_blocks_size = last_addr.block_idx + 1;
+
+    // Then, resize the block and count vectors to have the correct
+    // number of entries.
+    blocks_.resize(new_blocks_size);
+    counts_.resize(new_blocks_size);
+
+    if (new_blocks_size > old_blocks_size) {
+      // If we've increased the number of blocks, we need to fix the
+      // counts vector by setting the newly added counts to the
+      // count of the bitvector. This matches the empty blocks we just
+      // added. Below, we will actually set the bits in the newly added
+      // blocks and as we do that, we will update the counts.
+      //
+      // Note: as we haven't updated |size_| yet GetNumBitsSet() won't take into
+      // account the newly added blocks yet.
+      uint32_t count = GetNumBitsSet();
+      for (uint32_t i = old_blocks_size; i < new_blocks_size; ++i) {
+        counts_[i] = count;
+      }
+    }
+
+    // Actually update the size before we call |Set| below to ensure Set's
+    // invariants make sense.
+    size_ = size;
+    if (size > old_size) {
+      // Just go through all the newly added bits and set them to the true - we
+      // don't need to do this if !value because we always expect the newly
+      // added bits to be zeroed (this is ensured by the else branch).
+      // TODO(lalitm): this is clearly non optimal. Try and have a more
+      // optimized version of this based on setting whole blocks
+      // to 1.
+      if (value) {
+        for (uint32_t i = old_size; i < size; ++i) {
+          Set(i);
+        }
+      }
+    } else {
+      // Throw away all the bits after the new last bit. We do this to make
+      // future lookup, append and resize operations not have to worrying about
+      // trailing garbage bits in the last block.
+      blocks_[last_addr.block_idx].ClearAfter(last_addr.block_offset);
+    }
+  }
 
   // Updates the ith set bit of this bitvector with the value of
   // |other.IsSet(i)|.
@@ -116,20 +258,303 @@
   void UpdateSetBits(const BitVector& other) {
     PERFETTO_DCHECK(other.size() == GetNumBitsSet());
 
-    uint32_t offset = 0;
-    for (uint32_t i = NextSet(0); i < size(); i = NextSet(i + 1), ++offset) {
-      if (!other.IsSet(offset))
-        Set(i, false);
+    // Go through each set bit and if |other| has it unset, then unset the
+    // bit taking care to update the index we consider to take into account
+    // the bits we just unset.
+    // TODO(lalitm): we add an iterator implementation to remove this
+    // inefficient loop.
+    uint32_t removed = 0;
+    for (uint32_t i = 0, size = other.size(); i < size; ++i) {
+      if (!other.IsSet(i)) {
+        Clear(IndexOfNthSet(i - removed++));
+      }
     }
   }
 
  private:
-  BitVector(std::vector<bool>);
+  // Represents the offset of a bit within a block.
+  struct BlockOffset {
+    uint16_t word_idx;
+    uint16_t bit_idx;
+  };
+
+  // Represents the address of a bit within the bitvector.
+  struct Address {
+    uint32_t block_idx;
+    BlockOffset block_offset;
+  };
+
+  // Represents the smallest collection of bits we can refer to as
+  // one unit.
+  //
+  // Currently, this is implemented as a 64 bit integer as this is the
+  // largest type which we can assume to be present on all platforms.
+  class BitWord {
+   public:
+    static constexpr uint32_t kBits = 64;
+
+    // Returns whether the bit at the given index is set.
+    bool IsSet(uint32_t idx) const {
+      PERFETTO_DCHECK(idx < kBits);
+      return (word >> idx) & 1ull;
+    }
+
+    // Sets the bit at the given index to true.
+    void Set(uint32_t idx) {
+      PERFETTO_DCHECK(idx < kBits);
+
+      // Or the value for the true shifted up to |idx| with the word.
+      word |= 1ull << idx;
+    }
+
+    // Sets the bit at the given index to false.
+    void Clear(uint32_t idx) {
+      PERFETTO_DCHECK(idx < kBits);
+
+      // And the integer of all bits set apart from |idx| with the word.
+      word &= ~(1ull << idx);
+    }
+
+    // Clears all the bits (i.e. sets the atom to zero).
+    void ClearAll() { word = 0; }
+
+    // Returns the index of the nth set bit.
+    // Undefined if |n| >= |GetNumBitsSet()|.
+    uint16_t IndexOfNthSet(uint32_t n) const {
+      PERFETTO_DCHECK(n < kBits);
+
+      // The below code is very dense but essentially computes the nth set
+      // bit inside |atom| in the "broadword" style of programming (sometimes
+      // referred to as "SIMD within a register").
+      //
+      // Instead of treating a uint64 as an individual unit, broadword
+      // algorithms treat them as a packed vector of uint8. By doing this, they
+      // allow branchless algorithms when considering bits of a uint64.
+      //
+      // In benchmarks, this algorithm has found to be the fastest, portable
+      // way of computing the nth set bit (if we were only targetting new
+      // versions of x64, we could also use pdep + ctz but unfortunately
+      // this would fail on WASM - this about 2.5-3x faster on x64).
+      //
+      // The code below was taken from the paper
+      // http://vigna.di.unimi.it/ftp/papers/Broadword.pdf
+      uint64_t s = word - ((word & 0xAAAAAAAAAAAAAAAA) >> 1);
+      s = (s & 0x3333333333333333) + ((s >> 2) & 0x3333333333333333);
+      s = ((s + (s >> 4)) & 0x0F0F0F0F0F0F0F0F) * L8;
+
+      uint64_t b = (BwLessThan(s, n * L8) >> 7) * L8 >> 53 & ~7ull;
+      uint64_t l = n - ((s << 8) >> b & 0xFF);
+      s = (BwGtZero(((word >> b & 0xFF) * L8) & 0x8040201008040201) >> 7) * L8;
+
+      uint64_t ret = b + ((BwLessThan(s, l * L8) >> 7) * L8 >> 56);
+
+      return static_cast<uint16_t>(ret);
+    }
+
+    // Returns the number of set bits.
+    uint32_t GetNumBitsSet() const {
+      // We use __builtin_popcountll here as it's available natively for the two
+      // targets we care most about (x64 and WASM).
+      return static_cast<uint32_t>(__builtin_popcountll(word));
+    }
+
+    // Returns the number of set bits up to and including the bit at |idx|.
+    uint32_t GetNumBitsSet(uint32_t idx) const {
+      PERFETTO_DCHECK(idx < kBits);
+
+      // We use __builtin_popcountll here as it's available natively for the two
+      // targets we care most about (x64 and WASM).
+      return static_cast<uint32_t>(__builtin_popcountll(WordUntil(idx)));
+    }
+
+    // Retains all bits up to and including the bit at |idx| and clears
+    // all bits after this point.
+    void ClearAfter(uint32_t idx) {
+      PERFETTO_DCHECK(idx < kBits);
+      word = WordUntil(idx);
+    }
+
+   private:
+    // Constant with all the low bit of every byte set.
+    static constexpr uint64_t L8 = 0x0101010101010101;
+
+    // Constant with all the high bit of every byte set.
+    static constexpr uint64_t H8 = 0x8080808080808080;
+
+    // Returns a packed uint64 encoding whether each byte of x is less
+    // than each corresponding byte of y.
+    // This is computed in the "broadword" style of programming; see
+    // IndexOfNthSet for details on this.
+    static uint64_t BwLessThan(uint64_t x, uint64_t y) {
+      return (((y | H8) - (x & ~H8)) ^ x ^ y) & H8;
+    }
+
+    // Returns a packed uint64 encoding whether each byte of x is greater
+    // than or equal zero.
+    // This is computed in the "broadword" style of programming; see
+    // IndexOfNthSet for details on this.
+    static uint64_t BwGtZero(uint64_t x) { return (((x | H8) - L8) | x) & H8; }
+
+    // Returns the bits up to and including the bit at |idx|.
+    uint64_t WordUntil(uint32_t idx) const {
+      PERFETTO_DCHECK(idx < kBits);
+
+      // To understand what is happeninng here, consider an example.
+      // Suppose we want to all the bits up to the 7th bit in the atom
+      //               7th
+      //                |
+      //                v
+      // atom: 01010101011111000
+      //
+      // The easiest way to do this would be if we had a mask with only
+      // the bottom 7 bits set:
+      // mask: 00000000001111111
+      //
+      // Start with 1 and shift it up (idx + 1) bits we get:
+      // top : 00000000010000000
+      uint64_t top = 1ull << ((idx + 1ull) % kBits);
+
+      // We need to handle the case where idx == 63. In this case |top| will be
+      // zero because 1 << ((idx + 1) % 64) == 1 << (64 % 64) == 1.
+      // In this case, we actually want top == 0. We can do this by shifting
+      // down by (idx + 1) / kBits - this will be a noop for every index other
+      // than idx == 63. This should also be free on intel because of the mod
+      // instruction above.
+      top = top >> ((idx + 1) / kBits);
+
+      // Then if we take away 1, we get precisely the mask we want.
+      uint64_t mask = top - 1u;
+
+      // Finish up by anding the the atom with the computed msk.
+      return word & mask;
+    }
+
+    uint64_t word = 0;
+  };
+
+  // Represents a group of bits with a bitcount such that it is
+  // efficient to work on these bits.
+  //
+  // On x86 architectures we generally target for trace processor, the
+  // size of a cache line is 64 bytes (or 512 bits). For this reason,
+  // we make the size of the block contain 8 atoms as 8 * 64 == 512.
+  //
+  // TODO(lalitm): investigate whether we should tune this value for
+  // WASM and ARM.
+  class Block {
+   public:
+    // See class documentation for how these constants are chosen.
+    static constexpr uint32_t kWords = 8;
+    static constexpr uint32_t kBits = kWords * BitWord::kBits;
+
+    // Returns whether the bit at the given address is set.
+    bool IsSet(const BlockOffset& addr) const {
+      PERFETTO_DCHECK(addr.word_idx < kWords);
+
+      return words_[addr.word_idx].IsSet(addr.bit_idx);
+    }
+
+    // Sets the bit at the given address to true.
+    void Set(const BlockOffset& addr) {
+      PERFETTO_DCHECK(addr.word_idx < kWords);
+
+      words_[addr.word_idx].Set(addr.bit_idx);
+    }
+
+    // Sets the bit at the given address to false.
+    void Clear(const BlockOffset& addr) {
+      PERFETTO_DCHECK(addr.word_idx < kWords);
+
+      words_[addr.word_idx].Clear(addr.bit_idx);
+    }
+
+    // Gets the offset of the nth set bit in this block.
+    BlockOffset IndexOfNthSet(uint32_t n) const {
+      uint32_t count = 0;
+      for (uint16_t i = 0; i < kWords; ++i) {
+        // Keep a running count of all the set bits in the atom.
+        uint32_t value = count + words_[i].GetNumBitsSet();
+        if (value <= n) {
+          count = value;
+          continue;
+        }
+
+        // The running count of set bits is more than |n|. That means this atom
+        // contains the bit we are looking for.
+
+        // Take away the number of set bits to the start of this atom from |n|.
+        uint32_t set_in_atom = n - count;
+
+        // Figure out the index of the set bit inside the atom and create the
+        // address of this bit from that.
+        uint16_t bit_idx = words_[i].IndexOfNthSet(set_in_atom);
+        PERFETTO_DCHECK(bit_idx < 64);
+        return BlockOffset{i, bit_idx};
+      }
+      PERFETTO_FATAL("Index out of bounds");
+    }
+
+    // Gets the number of set bits within a block up to and including the bit
+    // at the given address.
+    uint32_t GetNumBitsSet(const BlockOffset& addr) const {
+      PERFETTO_DCHECK(addr.word_idx < kWords);
+
+      // Count all the set bits in the atom until we reach the last atom
+      // index.
+      uint32_t count = 0;
+      for (uint32_t i = 0; i < addr.word_idx; ++i) {
+        count += words_[i].GetNumBitsSet();
+      }
+
+      // For the last atom, only count the bits upto and including the bit
+      // index.
+      return count + words_[addr.word_idx].GetNumBitsSet(addr.bit_idx);
+    }
+
+    // Retains all bits up to and including the bit at |addr| and clears
+    // all bits after this point.
+    void ClearAfter(const BlockOffset& offset) {
+      PERFETTO_DCHECK(offset.word_idx < kWords);
+
+      // In the first atom, keep the bits until the address specified.
+      words_[offset.word_idx].ClearAfter(offset.bit_idx);
+
+      // For all subsequent atoms, we just clear the whole atom.
+      for (uint32_t i = offset.word_idx + 1; i < kWords; ++i) {
+        words_[i].ClearAll();
+      }
+    }
+
+   private:
+    std::array<BitWord, kWords> words_{};
+  };
+
+  BitVector(std::vector<Block> blocks,
+            std::vector<uint32_t> counts,
+            uint32_t size);
 
   BitVector(const BitVector&) = delete;
   BitVector& operator=(const BitVector&) = delete;
 
-  std::vector<bool> inner_;
+  static Address IndexToAddress(uint32_t idx) {
+    Address a;
+    a.block_idx = idx / Block::kBits;
+
+    uint16_t bit_idx_inside_block = idx % Block::kBits;
+    a.block_offset.word_idx = bit_idx_inside_block / BitWord::kBits;
+    a.block_offset.bit_idx = bit_idx_inside_block % BitWord::kBits;
+    return a;
+  }
+
+  static uint32_t AddressToIndex(Address addr) {
+    return addr.block_idx * Block::kBits +
+           addr.block_offset.word_idx * BitWord::kBits +
+           addr.block_offset.bit_idx;
+  }
+
+  uint32_t size_ = 0;
+  std::vector<uint32_t> counts_;
+  std::vector<Block> blocks_;
 };
 
 }  // namespace trace_processor
diff --git a/src/trace_processor/db/bit_vector_benchmark.cc b/src/trace_processor/db/bit_vector_benchmark.cc
index d6ed930..50e9baf 100644
--- a/src/trace_processor/db/bit_vector_benchmark.cc
+++ b/src/trace_processor/db/bit_vector_benchmark.cc
@@ -18,68 +18,119 @@
 
 #include "src/trace_processor/db/bit_vector.h"
 
-static void BM_BitVectorAppend(benchmark::State& state) {
-  static constexpr uint32_t kPoolSize = 1024 * 1024;
-  std::vector<bool> bit_pool(kPoolSize);
+namespace {
 
-  static constexpr uint32_t kRandomSeed = 42;
-  std::minstd_rand0 rnd_engine(kRandomSeed);
-  for (uint32_t i = 0; i < kPoolSize; ++i) {
-    bit_pool[i] = rnd_engine() % 2;
-  }
+using perfetto::trace_processor::BitVector;
 
-  perfetto::trace_processor::BitVector bv;
-  uint32_t pool_idx = 0;
+}
+
+static void BM_BitVectorAppendTrue(benchmark::State& state) {
+  BitVector bv;
   for (auto _ : state) {
-    bv.Append(bit_pool[pool_idx]);
-    pool_idx = (pool_idx + 1) % kPoolSize;
+    bv.AppendTrue();
     benchmark::ClobberMemory();
   }
 }
-BENCHMARK(BM_BitVectorAppend);
+BENCHMARK(BM_BitVectorAppendTrue);
+
+static void BM_BitVectorAppendFalse(benchmark::State& state) {
+  BitVector bv;
+  for (auto _ : state) {
+    bv.AppendFalse();
+    benchmark::ClobberMemory();
+  }
+}
+BENCHMARK(BM_BitVectorAppendFalse);
 
 static void BM_BitVectorSet(benchmark::State& state) {
+  static constexpr uint32_t kRandomSeed = 42;
+  std::minstd_rand0 rnd_engine(kRandomSeed);
+
+  uint32_t size = static_cast<uint32_t>(state.range(0));
+
+  BitVector bv;
+  for (uint32_t i = 0; i < size; ++i) {
+    if (rnd_engine() % 2) {
+      bv.AppendTrue();
+    } else {
+      bv.AppendFalse();
+    }
+  }
+
   static constexpr uint32_t kPoolSize = 1024 * 1024;
   std::vector<bool> bit_pool(kPoolSize);
   std::vector<uint32_t> row_pool(kPoolSize);
-
-  static constexpr uint32_t kSize = 123456;
-  perfetto::trace_processor::BitVector bv;
-
-  static constexpr uint32_t kRandomSeed = 42;
-  std::minstd_rand0 rnd_engine(kRandomSeed);
   for (uint32_t i = 0; i < kPoolSize; ++i) {
     bit_pool[i] = rnd_engine() % 2;
-    row_pool[i] = rnd_engine() % kSize;
-  }
-
-  for (uint32_t i = 0; i < kSize; ++i) {
-    bv.Append(rnd_engine() % 2);
+    row_pool[i] = rnd_engine() % size;
   }
 
   uint32_t pool_idx = 0;
   for (auto _ : state) {
-    bv.Set(row_pool[pool_idx], bit_pool[pool_idx]);
+    bv.Set(row_pool[pool_idx]);
     pool_idx = (pool_idx + 1) % kPoolSize;
     benchmark::ClobberMemory();
   }
 }
-BENCHMARK(BM_BitVectorSet);
+BENCHMARK(BM_BitVectorSet)
+    ->Arg(64)
+    ->Arg(512)
+    ->Arg(8192)
+    ->Arg(123456)
+    ->Arg(1234567);
 
-static void BM_BitVectorIndexOfNthSet(benchmark::State& state) {
-  static constexpr uint32_t kPoolSize = 1024 * 1024;
-  std::vector<uint32_t> row_pool(kPoolSize);
-
-  static constexpr uint32_t kSize = 123456;
-  perfetto::trace_processor::BitVector bv;
-
+static void BM_BitVectorClear(benchmark::State& state) {
   static constexpr uint32_t kRandomSeed = 42;
   std::minstd_rand0 rnd_engine(kRandomSeed);
-  for (uint32_t i = 0; i < kSize; ++i) {
-    bool value = rnd_engine() % 2;
-    bv.Append(value);
+
+  uint32_t size = static_cast<uint32_t>(state.range(0));
+
+  BitVector bv;
+  for (uint32_t i = 0; i < size; ++i) {
+    if (rnd_engine() % 2) {
+      bv.AppendTrue();
+    } else {
+      bv.AppendFalse();
+    }
   }
 
+  static constexpr uint32_t kPoolSize = 1024 * 1024;
+  std::vector<uint32_t> row_pool(kPoolSize);
+  for (uint32_t i = 0; i < kPoolSize; ++i) {
+    row_pool[i] = rnd_engine() % size;
+  }
+
+  uint32_t pool_idx = 0;
+  for (auto _ : state) {
+    bv.Clear(row_pool[pool_idx]);
+    pool_idx = (pool_idx + 1) % kPoolSize;
+    benchmark::ClobberMemory();
+  }
+}
+BENCHMARK(BM_BitVectorClear)
+    ->Arg(64)
+    ->Arg(512)
+    ->Arg(8192)
+    ->Arg(123456)
+    ->Arg(1234567);
+
+static void BM_BitVectorIndexOfNthSet(benchmark::State& state) {
+  static constexpr uint32_t kRandomSeed = 42;
+  std::minstd_rand0 rnd_engine(kRandomSeed);
+
+  uint32_t size = static_cast<uint32_t>(state.range(0));
+
+  BitVector bv;
+  for (uint32_t i = 0; i < size; ++i) {
+    if (rnd_engine() % 2) {
+      bv.AppendTrue();
+    } else {
+      bv.AppendFalse();
+    }
+  }
+
+  static constexpr uint32_t kPoolSize = 1024 * 1024;
+  std::vector<uint32_t> row_pool(kPoolSize);
   uint32_t set_bit_count = bv.GetNumBitsSet();
   for (uint32_t i = 0; i < kPoolSize; ++i) {
     row_pool[i] = rnd_engine() % set_bit_count;
@@ -91,18 +142,28 @@
     pool_idx = (pool_idx + 1) % kPoolSize;
   }
 }
-BENCHMARK(BM_BitVectorIndexOfNthSet);
+BENCHMARK(BM_BitVectorIndexOfNthSet)
+    ->Arg(64)
+    ->Arg(512)
+    ->Arg(8192)
+    ->Arg(123456)
+    ->Arg(1234567);
 
 static void BM_BitVectorGetNumBitsSet(benchmark::State& state) {
-  static constexpr uint32_t kSize = 123456;
-  perfetto::trace_processor::BitVector bv;
-  uint32_t count = 0;
-
   static constexpr uint32_t kRandomSeed = 42;
   std::minstd_rand0 rnd_engine(kRandomSeed);
-  for (uint32_t i = 0; i < kSize; ++i) {
+
+  uint32_t size = static_cast<uint32_t>(state.range(0));
+
+  uint32_t count = 0;
+  BitVector bv;
+  for (uint32_t i = 0; i < size; ++i) {
     bool value = rnd_engine() % 2;
-    bv.Append(value);
+    if (value) {
+      bv.AppendTrue();
+    } else {
+      bv.AppendFalse();
+    }
 
     if (value)
       count++;
@@ -114,4 +175,9 @@
   }
   PERFETTO_CHECK(res == count);
 }
-BENCHMARK(BM_BitVectorGetNumBitsSet);
+BENCHMARK(BM_BitVectorGetNumBitsSet)
+    ->Arg(64)
+    ->Arg(512)
+    ->Arg(8192)
+    ->Arg(123456)
+    ->Arg(1234567);
diff --git a/src/trace_processor/db/bit_vector_unittest.cc b/src/trace_processor/db/bit_vector_unittest.cc
index 371b4fe..7e7543b 100644
--- a/src/trace_processor/db/bit_vector_unittest.cc
+++ b/src/trace_processor/db/bit_vector_unittest.cc
@@ -16,97 +16,210 @@
 
 #include "src/trace_processor/db/bit_vector.h"
 
+#include <random>
+
 #include "test/gtest_and_gmock.h"
 
 namespace perfetto {
 namespace trace_processor {
 namespace {
 
-TEST(BitVectorUnittest, Set) {
-  BitVector bv(3, true);
-  bv.Set(0, false);
-  bv.Set(1, true);
+TEST(BitVectorUnittest, CreateAllTrue) {
+  BitVector bv(2049, true);
 
-  ASSERT_EQ(bv.size(), 3u);
-  ASSERT_FALSE(bv.IsSet(0));
+  // Ensure that a selection of interesting bits are set.
+  ASSERT_TRUE(bv.IsSet(0));
   ASSERT_TRUE(bv.IsSet(1));
-  ASSERT_TRUE(bv.IsSet(2));
+  ASSERT_TRUE(bv.IsSet(511));
+  ASSERT_TRUE(bv.IsSet(512));
+  ASSERT_TRUE(bv.IsSet(2047));
+  ASSERT_TRUE(bv.IsSet(2048));
 }
 
-TEST(BitVectorUnittest, Append) {
+TEST(BitVectorUnittest, CreateAllFalse) {
+  BitVector bv(2049, false);
+
+  // Ensure that a selection of interesting bits are cleared.
+  ASSERT_FALSE(bv.IsSet(0));
+  ASSERT_FALSE(bv.IsSet(1));
+  ASSERT_FALSE(bv.IsSet(511));
+  ASSERT_FALSE(bv.IsSet(512));
+  ASSERT_FALSE(bv.IsSet(2047));
+  ASSERT_FALSE(bv.IsSet(2048));
+}
+
+TEST(BitVectorUnittest, Set) {
+  BitVector bv(2049, false);
+  bv.Set(0);
+  bv.Set(1);
+  bv.Set(511);
+  bv.Set(512);
+  bv.Set(2047);
+
+  // Ensure the bits we touched are set.
+  ASSERT_TRUE(bv.IsSet(0));
+  ASSERT_TRUE(bv.IsSet(1));
+  ASSERT_TRUE(bv.IsSet(511));
+  ASSERT_TRUE(bv.IsSet(512));
+  ASSERT_TRUE(bv.IsSet(2047));
+
+  // Ensure that a selection of other interestinng bits are
+  // still cleared.
+  ASSERT_FALSE(bv.IsSet(2));
+  ASSERT_FALSE(bv.IsSet(63));
+  ASSERT_FALSE(bv.IsSet(64));
+  ASSERT_FALSE(bv.IsSet(510));
+  ASSERT_FALSE(bv.IsSet(513));
+  ASSERT_FALSE(bv.IsSet(1023));
+  ASSERT_FALSE(bv.IsSet(1024));
+  ASSERT_FALSE(bv.IsSet(2046));
+  ASSERT_FALSE(bv.IsSet(2048));
+  ASSERT_FALSE(bv.IsSet(2048));
+}
+
+TEST(BitVectorUnittest, Clear) {
+  BitVector bv(2049, true);
+  bv.Clear(0);
+  bv.Clear(1);
+  bv.Clear(511);
+  bv.Clear(512);
+  bv.Clear(2047);
+
+  // Ensure the bits we touched are cleared.
+  ASSERT_FALSE(bv.IsSet(0));
+  ASSERT_FALSE(bv.IsSet(1));
+  ASSERT_FALSE(bv.IsSet(511));
+  ASSERT_FALSE(bv.IsSet(512));
+  ASSERT_FALSE(bv.IsSet(2047));
+
+  // Ensure that a selection of other interestinng bits are
+  // still set.
+  ASSERT_TRUE(bv.IsSet(2));
+  ASSERT_TRUE(bv.IsSet(63));
+  ASSERT_TRUE(bv.IsSet(64));
+  ASSERT_TRUE(bv.IsSet(510));
+  ASSERT_TRUE(bv.IsSet(513));
+  ASSERT_TRUE(bv.IsSet(1023));
+  ASSERT_TRUE(bv.IsSet(1024));
+  ASSERT_TRUE(bv.IsSet(2046));
+  ASSERT_TRUE(bv.IsSet(2048));
+}
+
+TEST(BitVectorUnittest, AppendToEmpty) {
   BitVector bv;
-  bv.Append(true);
-  bv.Append(false);
+  bv.AppendTrue();
+  bv.AppendFalse();
 
   ASSERT_EQ(bv.size(), 2u);
   ASSERT_TRUE(bv.IsSet(0));
   ASSERT_FALSE(bv.IsSet(1));
 }
 
-TEST(BitVectorUnittest, NextSet) {
-  BitVector bv(6, false);
-  bv.Set(1, true);
-  bv.Set(2, true);
-  bv.Set(4, true);
+TEST(BitVectorUnittest, AppendToExisting) {
+  BitVector bv(2046, false);
+  bv.AppendTrue();
+  bv.AppendFalse();
+  bv.AppendTrue();
+  bv.AppendTrue();
 
-  ASSERT_EQ(bv.NextSet(0), 1u);
-  ASSERT_EQ(bv.NextSet(1), 1u);
-  ASSERT_EQ(bv.NextSet(2), 2u);
-  ASSERT_EQ(bv.NextSet(3), 4u);
-  ASSERT_EQ(bv.NextSet(4), 4u);
-  ASSERT_EQ(bv.NextSet(5), 6u);
+  ASSERT_EQ(bv.size(), 2050u);
+  ASSERT_TRUE(bv.IsSet(2046));
+  ASSERT_FALSE(bv.IsSet(2047));
+  ASSERT_TRUE(bv.IsSet(2048));
+  ASSERT_TRUE(bv.IsSet(2049));
 }
 
 TEST(BitVectorUnittest, GetNumBitsSet) {
-  BitVector bv(6, false);
-  bv.Set(1, true);
-  bv.Set(2, true);
-  bv.Set(4, true);
+  BitVector bv(2049, false);
+  bv.Set(0);
+  bv.Set(1);
+  bv.Set(511);
+  bv.Set(512);
+  bv.Set(2047);
+  bv.Set(2048);
 
-  ASSERT_EQ(bv.GetNumBitsSet(), 3u);
+  ASSERT_EQ(bv.GetNumBitsSet(), 6u);
 
   ASSERT_EQ(bv.GetNumBitsSet(0), 0u);
-  ASSERT_EQ(bv.GetNumBitsSet(1), 0u);
-  ASSERT_EQ(bv.GetNumBitsSet(2), 1u);
+  ASSERT_EQ(bv.GetNumBitsSet(1), 1u);
+  ASSERT_EQ(bv.GetNumBitsSet(2), 2u);
   ASSERT_EQ(bv.GetNumBitsSet(3), 2u);
-  ASSERT_EQ(bv.GetNumBitsSet(4), 2u);
-  ASSERT_EQ(bv.GetNumBitsSet(5), 3u);
-  ASSERT_EQ(bv.GetNumBitsSet(6), 3u);
+  ASSERT_EQ(bv.GetNumBitsSet(511), 2u);
+  ASSERT_EQ(bv.GetNumBitsSet(512), 3u);
+  ASSERT_EQ(bv.GetNumBitsSet(1023), 4u);
+  ASSERT_EQ(bv.GetNumBitsSet(1024), 4u);
+  ASSERT_EQ(bv.GetNumBitsSet(2047), 4u);
+  ASSERT_EQ(bv.GetNumBitsSet(2048), 5u);
+  ASSERT_EQ(bv.GetNumBitsSet(2049), 6u);
 }
 
 TEST(BitVectorUnittest, IndexOfNthSet) {
-  BitVector bv(6, false);
-  bv.Set(1, true);
-  bv.Set(2, true);
-  bv.Set(4, true);
+  BitVector bv(2050, false);
+  bv.Set(0);
+  bv.Set(1);
+  bv.Set(511);
+  bv.Set(512);
+  bv.Set(2047);
+  bv.Set(2048);
 
-  ASSERT_EQ(bv.IndexOfNthSet(0), 1u);
-  ASSERT_EQ(bv.IndexOfNthSet(1), 2u);
-  ASSERT_EQ(bv.IndexOfNthSet(2), 4u);
+  ASSERT_EQ(bv.IndexOfNthSet(0), 0u);
+  ASSERT_EQ(bv.IndexOfNthSet(1), 1u);
+  ASSERT_EQ(bv.IndexOfNthSet(2), 511u);
+  ASSERT_EQ(bv.IndexOfNthSet(3), 512u);
+  ASSERT_EQ(bv.IndexOfNthSet(4), 2047u);
+  ASSERT_EQ(bv.IndexOfNthSet(5), 2048u);
 }
 
 TEST(BitVectorUnittest, Resize) {
   BitVector bv(1, false);
+
   bv.Resize(2, true);
-  bv.Resize(3, false);
-
-  ASSERT_EQ(bv.IsSet(1), true);
-  ASSERT_EQ(bv.IsSet(2), false);
-
-  bv.Resize(2, false);
-
   ASSERT_EQ(bv.size(), 2u);
   ASSERT_EQ(bv.IsSet(1), true);
+
+  bv.Resize(2049, false);
+  ASSERT_EQ(bv.size(), 2049u);
+  ASSERT_EQ(bv.IsSet(2), false);
+  ASSERT_EQ(bv.IsSet(2047), false);
+  ASSERT_EQ(bv.IsSet(2048), false);
+
+  // Set these two bits; the first should be preserved and the
+  // second should disappear.
+  bv.Set(512);
+  bv.Set(513);
+
+  bv.Resize(513, false);
+  ASSERT_EQ(bv.size(), 513u);
+  ASSERT_EQ(bv.IsSet(1), true);
+  ASSERT_EQ(bv.IsSet(512), true);
+  ASSERT_EQ(bv.GetNumBitsSet(), 2u);
+
+  // When we resize up, we need to be sure that the set bit from
+  // before we resized down is not still present as a garbage bit.
+  bv.Resize(514, false);
+  ASSERT_EQ(bv.size(), 514u);
+  ASSERT_EQ(bv.IsSet(513), false);
+  ASSERT_EQ(bv.GetNumBitsSet(), 2u);
+}
+
+TEST(BitVectorUnittest, AppendAfterResizeDown) {
+  BitVector bv(2049, false);
+  bv.Set(2048);
+
+  bv.Resize(2048);
+  bv.AppendFalse();
+  ASSERT_EQ(bv.IsSet(2048), false);
+  ASSERT_EQ(bv.GetNumBitsSet(), 0u);
 }
 
 TEST(BitVectorUnittest, UpdateSetBits) {
   BitVector bv(6, false);
-  bv.Set(1, true);
-  bv.Set(2, true);
-  bv.Set(4, true);
+  bv.Set(1);
+  bv.Set(2);
+  bv.Set(4);
 
   BitVector picker(3u, true);
-  picker.Set(1, false);
+  picker.Clear(1);
 
   bv.UpdateSetBits(picker);
 
@@ -115,6 +228,37 @@
   ASSERT_TRUE(bv.IsSet(4));
 }
 
+TEST(BitVectorUnittest, QueryStressTest) {
+  BitVector bv;
+  std::vector<bool> bool_vec;
+  std::vector<uint32_t> int_vec;
+
+  static constexpr uint32_t kCount = 4096;
+  std::minstd_rand0 rand;
+  for (uint32_t i = 0; i < kCount; ++i) {
+    bool res = rand() % 2u;
+    if (res) {
+      bv.AppendTrue();
+    } else {
+      bv.AppendFalse();
+    }
+    bool_vec.push_back(res);
+    if (res)
+      int_vec.emplace_back(i);
+  }
+
+  for (uint32_t i = 0; i < kCount; ++i) {
+    uint32_t count = static_cast<uint32_t>(std::count(
+        bool_vec.begin(), bool_vec.begin() + static_cast<int32_t>(i), true));
+    ASSERT_EQ(bv.IsSet(i), bool_vec[i]);
+    ASSERT_EQ(bv.GetNumBitsSet(i), count);
+  }
+
+  for (uint32_t i = 0; i < int_vec.size(); ++i) {
+    ASSERT_EQ(bv.IndexOfNthSet(i), int_vec[i]);
+  }
+}
+
 }  // namespace
 }  // namespace trace_processor
 }  // namespace perfetto
diff --git a/src/trace_processor/db/column.cc b/src/trace_processor/db/column.cc
index 9e46036..91b22ef 100644
--- a/src/trace_processor/db/column.cc
+++ b/src/trace_processor/db/column.cc
@@ -66,6 +66,9 @@
     case FilterOp::kGt:
       iv->RemoveIf([this, value](uint32_t row) { return Get(row) <= value; });
       break;
+    case FilterOp::kNeq:
+      iv->RemoveIf([this, value](uint32_t row) { return Get(row) == value; });
+      break;
   }
 }
 
diff --git a/src/trace_processor/db/column.h b/src/trace_processor/db/column.h
index 8930ba8..3f4b717 100644
--- a/src/trace_processor/db/column.h
+++ b/src/trace_processor/db/column.h
@@ -30,8 +30,9 @@
 namespace trace_processor {
 
 // Represents the possible filter operations on a column.
-enum FilterOp {
+enum class FilterOp {
   kEq,
+  kNeq,
   kGt,
   kLt,
 };
@@ -186,6 +187,9 @@
   Constraint lt(SqlValue value) const {
     return Constraint{col_idx_, FilterOp::kLt, value};
   }
+  Constraint neq(SqlValue value) const {
+    return Constraint{col_idx_, FilterOp::kNeq, value};
+  }
 
   // Returns an Order for each Order type for this Column.
   Order ascending() const { return Order{col_idx_, false}; }
diff --git a/src/trace_processor/db/row_map.h b/src/trace_processor/db/row_map.h
index e6e1f81..7b2b10e 100644
--- a/src/trace_processor/db/row_map.h
+++ b/src/trace_processor/db/row_map.h
@@ -82,7 +82,7 @@
     if (compact_) {
       if (row >= bit_vector_.size())
         bit_vector_.Resize(row + 1, false);
-      bit_vector_.Set(row, true);
+      bit_vector_.Set(row);
     } else {
       index_vector_.emplace_back(row);
     }
@@ -109,8 +109,13 @@
   void RemoveIf(Predicate p) {
     if (compact_) {
       const auto& bv = bit_vector_;
-      for (uint32_t i = bv.NextSet(0); i < bv.size(); i = bv.NextSet(i + 1)) {
-        bit_vector_.Set(i, !p(i));
+      uint32_t removed = 0;
+      for (uint32_t i = 0, size = bv.GetNumBitsSet(); i < size; ++i) {
+        uint32_t idx = bv.IndexOfNthSet(i - removed);
+        if (p(idx)) {
+          removed++;
+          bit_vector_.Clear(idx);
+        }
       }
     } else {
       auto it = std::remove_if(index_vector_.begin(), index_vector_.end(), p);
diff --git a/src/trace_processor/db/row_map_benchmark.cc b/src/trace_processor/db/row_map_benchmark.cc
index 505f7e0..89f41a7 100644
--- a/src/trace_processor/db/row_map_benchmark.cc
+++ b/src/trace_processor/db/row_map_benchmark.cc
@@ -41,7 +41,11 @@
   std::minstd_rand0 rnd_engine(kRandomSeed);
   BitVector bv;
   for (uint32_t i = 0; i < size; ++i) {
-    bv.Append(rnd_engine() % 2);
+    if (rnd_engine() % 2) {
+      bv.AppendTrue();
+    } else {
+      bv.AppendFalse();
+    }
   }
   return bv;
 }
@@ -144,7 +148,7 @@
   static constexpr uint32_t kRandomSeed = 123;
   std::minstd_rand0 rnd_engine(kRandomSeed);
   BitVector bv(rm.size(), false);
-  bv.Set(rnd_engine() % bv.size(), true);
+  bv.Set(rnd_engine() % bv.size());
   RowMap selector(std::move(bv));
 
   for (auto _ : state) {
diff --git a/src/trace_processor/db/row_map_unittest.cc b/src/trace_processor/db/row_map_unittest.cc
index 7e375ac..719b533 100644
--- a/src/trace_processor/db/row_map_unittest.cc
+++ b/src/trace_processor/db/row_map_unittest.cc
@@ -27,12 +27,12 @@
 
 std::shared_ptr<RowMap> BitVectorRowMap() {
   BitVector bv;
-  bv.Append(true);
-  bv.Append(false);
-  bv.Append(true);
-  bv.Append(true);
-  bv.Append(false);
-  bv.Append(true);
+  bv.AppendTrue();
+  bv.AppendFalse();
+  bv.AppendTrue();
+  bv.AppendTrue();
+  bv.AppendFalse();
+  bv.AppendTrue();
   return std::shared_ptr<RowMap>(new RowMap(std::move(bv)));
 }
 
@@ -70,10 +70,10 @@
   RowMap row_map = GetParam()->Copy();
 
   BitVector picker_bv;
-  picker_bv.Append(true);
-  picker_bv.Append(false);
-  picker_bv.Append(false);
-  picker_bv.Append(true);
+  picker_bv.AppendTrue();
+  picker_bv.AppendFalse();
+  picker_bv.AppendFalse();
+  picker_bv.AppendTrue();
   RowMap picker(std::move(picker_bv));
 
   auto res = row_map.SelectRows(picker);
diff --git a/src/trace_processor/db/sparse_vector.h b/src/trace_processor/db/sparse_vector.h
index 12a2a60..c2472ac 100644
--- a/src/trace_processor/db/sparse_vector.h
+++ b/src/trace_processor/db/sparse_vector.h
@@ -51,11 +51,11 @@
   // Adds the given value to the SparseVector.
   void Append(T val) {
     data_.emplace_back(val);
-    valid_.Append(true);
+    valid_.AppendTrue();
   }
 
   // Adds a null value to the SparseVector.
-  void AppendNull() { valid_.Append(false); }
+  void AppendNull() { valid_.AppendFalse(); }
 
   // Adds the given optional value to the SparseVector.
   void Append(base::Optional<T> val) {
@@ -76,7 +76,7 @@
       data_[data_idx] = val;
     } else {
       data_.insert(data_.begin() + static_cast<ptrdiff_t>(data_idx), val);
-      valid_.Set(idx, true);
+      valid_.Set(idx);
     }
   }
 
diff --git a/src/trace_processor/export_json.cc b/src/trace_processor/export_json.cc
index fd5ea07..2ff1f6e 100644
--- a/src/trace_processor/export_json.cc
+++ b/src/trace_processor/export_json.cc
@@ -14,6 +14,9 @@
  * limitations under the License.
  */
 
+#include "perfetto/base/build_config.h"
+#if PERFETTO_BUILDFLAG(PERFETTO_TP_JSON)
+
 #include <inttypes.h>
 #include <json/reader.h>
 #include <json/value.h>
@@ -848,3 +851,5 @@
 }  // namespace json
 }  // namespace trace_processor
 }  // namespace perfetto
+
+#endif  // PERFETTO_BUILDFLAG(PERFETTO_TP_JSON)
diff --git a/src/trace_processor/forwarding_trace_parser.cc b/src/trace_processor/forwarding_trace_parser.cc
index b3aced7..5f6a99f 100644
--- a/src/trace_processor/forwarding_trace_parser.cc
+++ b/src/trace_processor/forwarding_trace_parser.cc
@@ -143,14 +143,14 @@
       base::StartsWith(start, "<html>"))
     return kSystraceTraceType;
 
+  // Ctrace is deflate'ed systrace.
+  if (start.find("TRACE:") != std::string::npos)
+    return kCtraceTraceType;
+
   // Systrace with no header or leading HTML.
   if (base::StartsWith(start, " "))
     return kSystraceTraceType;
 
-  // Ctrace is deflate'ed systrace.
-  if (base::StartsWith(start, "TRACE:"))
-    return kCtraceTraceType;
-
   // gzip'ed trace containing one of the other formats.
   if (base::StartsWith(start, "\x1f\x8b"))
     return kGzipTraceType;
diff --git a/src/trace_processor/ftrace_utils.cc b/src/trace_processor/ftrace_utils.cc
index 081ba94..b58fd73 100644
--- a/src/trace_processor/ftrace_utils.cc
+++ b/src/trace_processor/ftrace_utils.cc
@@ -97,6 +97,8 @@
       state_ |= Atom::kParked;
     else if (c == 'N')
       state_ |= Atom::kNoLoad;
+    else if (c == '|')
+      continue;
     else {
       invalid_char = true;
       break;
@@ -202,10 +204,13 @@
   }
 
   int64_t padding = 16 - static_cast<int64_t>(name.size());
-  if (PERFETTO_LIKELY(padding > 0)) {
+  if (padding > 0) {
     writer->AppendChar(' ', static_cast<size_t>(padding));
   }
-  writer->AppendString(name);
+  for (size_t i = 0; i < name.size(); ++i) {
+    char c = name.data()[i];
+    writer->AppendChar(c == '-' ? '_' : c);
+  }
   writer->AppendChar('-');
 
   size_t pre_pid_pos = writer->pos();
diff --git a/src/trace_processor/ftrace_utils_unittest.cc b/src/trace_processor/ftrace_utils_unittest.cc
index dbfe0e4..3675fb2 100644
--- a/src/trace_processor/ftrace_utils_unittest.cc
+++ b/src/trace_processor/ftrace_utils_unittest.cc
@@ -53,6 +53,9 @@
   ASSERT_STREQ(TaskState(4096).ToString().data(), "R+");
   ASSERT_STREQ(TaskState(130).ToString().data(), "DK");
   ASSERT_STREQ(TaskState(258).ToString().data(), "DW");
+
+  ASSERT_EQ(TaskState("D|K").raw_state(), 130);
+  ASSERT_EQ(TaskState("D|W").raw_state(), 258);
 }
 
 }  // namespace
diff --git a/src/trace_processor/fuchsia_trace_parser.cc b/src/trace_processor/fuchsia_trace_parser.cc
index 5313dd3..9afcfe8 100644
--- a/src/trace_processor/fuchsia_trace_parser.cc
+++ b/src/trace_processor/fuchsia_trace_parser.cc
@@ -197,6 +197,9 @@
           UniqueTid utid =
               procs->UpdateThread(static_cast<uint32_t>(tinfo.tid),
                                   static_cast<uint32_t>(tinfo.pid));
+          // TODO(lalitm): make use of this track id.
+          TrackId track_id = context_->track_tracker->InternThreadTrack(utid);
+          perfetto::base::ignore_result(track_id);
           RowId row = context_->event_tracker->PushInstant(ts, name, 0, utid,
                                                            RefType::kRefUtid);
           for (const Arg& arg : args) {
@@ -266,6 +269,9 @@
           UniqueTid utid =
               procs->UpdateThread(static_cast<uint32_t>(tinfo.tid),
                                   static_cast<uint32_t>(tinfo.pid));
+          // TODO(lalitm): make use of this track id.
+          TrackId track_id = context_->track_tracker->InternThreadTrack(utid);
+          perfetto::base::ignore_result(track_id);
           slices->Begin(ts, utid, RefType::kRefUtid, cat, name);
           break;
         }
@@ -273,6 +279,9 @@
           UniqueTid utid =
               procs->UpdateThread(static_cast<uint32_t>(tinfo.tid),
                                   static_cast<uint32_t>(tinfo.pid));
+          // TODO(lalitm): make use of this track id.
+          TrackId track_id = context_->track_tracker->InternThreadTrack(utid);
+          perfetto::base::ignore_result(track_id);
           // TODO(b/131181693): |cat| and |name| are not passed here so that
           // if two slices end at the same timestep, the slices get closed in
           // the correct order regardless of which end event is processed first.
@@ -285,6 +294,9 @@
           UniqueTid utid =
               procs->UpdateThread(static_cast<uint32_t>(tinfo.tid),
                                   static_cast<uint32_t>(tinfo.pid));
+          // TODO(lalitm): make use of this track id.
+          TrackId track_id = context_->track_tracker->InternThreadTrack(utid);
+          perfetto::base::ignore_result(track_id);
           slices->Scoped(ts, utid, RefType::kRefUtid, cat, name, end_ts - ts);
           break;
         }
diff --git a/src/trace_processor/gzip_trace_parser.cc b/src/trace_processor/gzip_trace_parser.cc
index a374019..4dfce0b 100644
--- a/src/trace_processor/gzip_trace_parser.cc
+++ b/src/trace_processor/gzip_trace_parser.cc
@@ -16,9 +16,13 @@
 
 #include "src/trace_processor/gzip_trace_parser.h"
 
+#include <string>
+
 #include <zlib.h>
 
 #include "perfetto/base/logging.h"
+#include "perfetto/ext/base/string_utils.h"
+#include "perfetto/ext/base/string_view.h"
 #include "src/trace_processor/forwarding_trace_parser.h"
 
 namespace perfetto {
@@ -45,13 +49,15 @@
   if (!inner_) {
     inner_.reset(new ForwardingTraceParser(context_));
 
-    // .ctrace files begin with "TRACE:" strip this if present.
-    static const char kSystraceFileHeader[] = "TRACE:\n";
-    if (size >= strlen(kSystraceFileHeader) &&
-        strncmp(reinterpret_cast<char*>(start), kSystraceFileHeader,
-                strlen(kSystraceFileHeader)) == 0) {
-      start += strlen(kSystraceFileHeader);
-      len -= strlen(kSystraceFileHeader);
+    // .ctrace files begin with: "TRACE:\n" or "done. TRACE:\n" strip this if
+    // present.
+    base::StringView beginning(reinterpret_cast<char*>(start), size);
+
+    static const char* kSystraceFileHeader = "TRACE:\n";
+    size_t offset = Find(kSystraceFileHeader, beginning);
+    if (offset != std::string::npos) {
+      start += strlen(kSystraceFileHeader) + offset;
+      len -= strlen(kSystraceFileHeader) + offset;
     }
   }
 
diff --git a/src/trace_processor/json_trace_parser.cc b/src/trace_processor/json_trace_parser.cc
index 5dc2816..1f0dbb4 100644
--- a/src/trace_processor/json_trace_parser.cc
+++ b/src/trace_processor/json_trace_parser.cc
@@ -14,6 +14,9 @@
  * limitations under the License.
  */
 
+#include "perfetto/base/build_config.h"
+#if PERFETTO_BUILDFLAG(PERFETTO_TP_JSON)
+
 #include "src/trace_processor/json_trace_parser.h"
 
 #include <inttypes.h>
@@ -23,7 +26,6 @@
 #include <limits>
 #include <string>
 
-#include "perfetto/base/build_config.h"
 #include "perfetto/base/logging.h"
 #include "perfetto/ext/base/string_view.h"
 #include "perfetto/ext/base/utils.h"
@@ -31,10 +33,7 @@
 #include "src/trace_processor/process_tracker.h"
 #include "src/trace_processor/slice_tracker.h"
 #include "src/trace_processor/trace_processor_context.h"
-
-#if !PERFETTO_BUILDFLAG(PERFETTO_TP_JSON)
-#error JSON parsing and exporting is not supported in this build configuration
-#endif
+#include "src/trace_processor/track_tracker.h"
 
 namespace perfetto {
 namespace trace_processor {
@@ -88,10 +87,16 @@
 
   switch (phase) {
     case 'B': {  // TRACE_EVENT_BEGIN.
+      // TODO(lalitm): make use of this track id.
+      TrackId track_id = context_->track_tracker->InternThreadTrack(utid);
+      perfetto::base::ignore_result(track_id);
       slice_tracker->Begin(timestamp, utid, RefType::kRefUtid, cat_id, name_id);
       break;
     }
     case 'E': {  // TRACE_EVENT_END.
+      // TODO(lalitm): make use of this track id.
+      TrackId track_id = context_->track_tracker->InternThreadTrack(utid);
+      perfetto::base::ignore_result(track_id);
       slice_tracker->End(timestamp, utid, RefType::kRefUtid, cat_id, name_id);
       break;
     }
@@ -100,6 +105,9 @@
           json_trace_utils::CoerceToNs(value["dur"]);
       if (!opt_dur.has_value())
         return;
+      // TODO(lalitm): make use of this track id.
+      TrackId track_id = context_->track_tracker->InternThreadTrack(utid);
+      perfetto::base::ignore_result(track_id);
       slice_tracker->Scoped(timestamp, utid, RefType::kRefUtid, cat_id, name_id,
                             opt_dur.value());
       break;
@@ -124,3 +132,5 @@
 
 }  // namespace trace_processor
 }  // namespace perfetto
+
+#endif  // PERFETTO_BUILDFLAG(PERFETTO_TP_JSON)
diff --git a/src/trace_processor/json_trace_tokenizer.cc b/src/trace_processor/json_trace_tokenizer.cc
index 274d908..120cc69 100644
--- a/src/trace_processor/json_trace_tokenizer.cc
+++ b/src/trace_processor/json_trace_tokenizer.cc
@@ -14,6 +14,9 @@
  * limitations under the License.
  */
 
+#include "perfetto/base/build_config.h"
+#if PERFETTO_BUILDFLAG(PERFETTO_TP_JSON)
+
 #include "src/trace_processor/json_trace_tokenizer.h"
 
 #include <json/reader.h>
@@ -147,3 +150,5 @@
 
 }  // namespace trace_processor
 }  // namespace perfetto
+
+#endif  // PERFETTO_BUILDFLAG(PERFETTO_TP_JSON)
diff --git a/src/trace_processor/json_trace_utils.cc b/src/trace_processor/json_trace_utils.cc
index 0d6ba70..c4db237 100644
--- a/src/trace_processor/json_trace_utils.cc
+++ b/src/trace_processor/json_trace_utils.cc
@@ -14,17 +14,14 @@
  * limitations under the License.
  */
 
+#include "perfetto/base/build_config.h"
+#if PERFETTO_BUILDFLAG(PERFETTO_TP_JSON)
+
 #include "src/trace_processor/json_trace_utils.h"
 
 #include <json/value.h>
 #include <limits>
 
-#include "perfetto/base/build_config.h"
-
-#if !PERFETTO_BUILDFLAG(PERFETTO_TP_JSON)
-#error JSON parsing and exporting is not supported in this build configuration
-#endif
-
 namespace perfetto {
 namespace trace_processor {
 namespace json_trace_utils {
@@ -83,3 +80,5 @@
 }  // namespace json_trace_utils
 }  // namespace trace_processor
 }  // namespace perfetto
+
+#endif  // PERFETTO_BUILDFLAG(PERFETTO_TP_JSON)
diff --git a/src/trace_processor/metrics/android/heap_profile_callsite_stats.sql b/src/trace_processor/metrics/android/heap_profile_callsite_stats.sql
index 716b17c..f81b3d3 100644
--- a/src/trace_processor/metrics/android/heap_profile_callsite_stats.sql
+++ b/src/trace_processor/metrics/android/heap_profile_callsite_stats.sql
@@ -37,6 +37,21 @@
 SELECT *
 FROM callsite_parser;
 
+-- Join frames with symbols
+CREATE TABLE symbolized_frame AS
+SELECT
+  spf.id AS id,
+  spf.mapping AS mapping,
+  IFNULL(
+    (SELECT name FROM stack_profile_symbol symbol
+      WHERE symbol.symbol_set_id = spf.symbol_set_id
+      LIMIT 1),
+    spf.name
+  ) AS name
+FROM stack_profile_frame spf;
+
+CREATE UNIQUE INDEX symbolized_frame_idx ON symbolized_frame(id);
+
 -- Join with the frames table to get the symbol names.
 -- Output order for position matters (as will be the order in the subsequent aggregate operations).
 -- We use the cross join to force the join order between virtual and non-virtual tables.
@@ -45,16 +60,12 @@
   callsite_id,
   position,
   HeapProfileCallsiteStats_Frame(
-    'name', IFNULL(
-      (SELECT name FROM stack_profile_symbol symbol
-        WHERE symbol.symbol_set_id = spf.symbol_set_id
-        LIMIT 1),
-      spf.name),
+    'name', spf.name,
     'mapping_name', stack_profile_mapping.name
   ) AS frame_proto
 FROM flattened_callsite
 CROSS JOIN stack_profile_callsite
-CROSS JOIN stack_profile_frame spf
+CROSS JOIN symbolized_frame spf
 CROSS JOIN stack_profile_mapping
 WHERE
   flattened_callsite.current_id = stack_profile_callsite.id
diff --git a/src/trace_processor/proto_incremental_state.h b/src/trace_processor/proto_incremental_state.h
index 408b2fd..837325c 100644
--- a/src/trace_processor/proto_incremental_state.h
+++ b/src/trace_processor/proto_incremental_state.h
@@ -82,6 +82,11 @@
 
 }  // namespace proto_incremental_state_internal
 
+struct DefaultFieldName;
+struct BuildIdFieldName;
+struct MappingPathsFieldName;
+struct FunctionNamesFieldName;
+
 // Stores per-packet-sequence incremental state during trace parsing, such as
 // reference timestamps for delta timestamp calculation and interned messages.
 class ProtoIncrementalState {
@@ -161,7 +166,9 @@
     int32_t pid() const { return pid_; }
     int32_t tid() const { return tid_; }
 
-    template <typename MessageType>
+    // Use DefaultFieldName only if there is a single field in InternedData of
+    // the MessageType.
+    template <typename MessageType, typename FieldName = DefaultFieldName>
     InternedDataMap<MessageType>* GetInternedDataMap();
 
    private:
@@ -191,7 +198,9 @@
     InternedDataMap<protos::pbzero::DebugAnnotationName>
         debug_annotation_names_;
     InternedDataMap<protos::pbzero::SourceLocation> source_locations_;
-    InternedDataMap<protos::pbzero::InternedString> interned_strings_;
+    InternedDataMap<protos::pbzero::InternedString> build_ids_;
+    InternedDataMap<protos::pbzero::InternedString> mapping_paths_;
+    InternedDataMap<protos::pbzero::InternedString> function_names_;
     InternedDataMap<protos::pbzero::LogMessageBody> interned_log_messages_;
     InternedDataMap<protos::pbzero::Mapping> mappings_;
     InternedDataMap<protos::pbzero::Frame> frames_;
@@ -253,9 +262,25 @@
 
 template <>
 inline ProtoIncrementalState::InternedDataMap<protos::pbzero::InternedString>*
+ProtoIncrementalState::PacketSequenceState::
+    GetInternedDataMap<protos::pbzero::InternedString, BuildIdFieldName>() {
+  return &build_ids_;
+}
+
+template <>
+inline ProtoIncrementalState::InternedDataMap<protos::pbzero::InternedString>*
 ProtoIncrementalState::PacketSequenceState::GetInternedDataMap<
-    protos::pbzero::InternedString>() {
-  return &interned_strings_;
+    protos::pbzero::InternedString,
+    MappingPathsFieldName>() {
+  return &mapping_paths_;
+}
+
+template <>
+inline ProtoIncrementalState::InternedDataMap<protos::pbzero::InternedString>*
+ProtoIncrementalState::PacketSequenceState::GetInternedDataMap<
+    protos::pbzero::InternedString,
+    FunctionNamesFieldName>() {
+  return &function_names_;
 }
 
 template <>
diff --git a/src/trace_processor/proto_trace_parser.cc b/src/trace_processor/proto_trace_parser.cc
index d8c972a..44099b8 100644
--- a/src/trace_processor/proto_trace_parser.cc
+++ b/src/trace_processor/proto_trace_parser.cc
@@ -36,6 +36,8 @@
 #include "src/trace_processor/heap_profile_tracker.h"
 #include "src/trace_processor/metadata.h"
 #include "src/trace_processor/process_tracker.h"
+#include "src/trace_processor/proto_incremental_state.h"
+#include "src/trace_processor/stack_profile_tracker.h"
 #include "src/trace_processor/syscall_tracker.h"
 #include "src/trace_processor/systrace_parser.h"
 #include "src/trace_processor/trace_processor_context.h"
@@ -132,9 +134,24 @@
       : seq_state_(seq_state) {}
 
   base::Optional<base::StringView> GetString(
-      StackProfileTracker::SourceStringId iid) const override {
-    auto* map =
-        seq_state_->GetInternedDataMap<protos::pbzero::InternedString>();
+      StackProfileTracker::SourceStringId iid,
+      StackProfileTracker::InternedStringType type) const override {
+    ProtoIncrementalState::InternedDataMap<protos::pbzero::InternedString>*
+        map = nullptr;
+    switch (type) {
+      case StackProfileTracker::InternedStringType::kBuildId:
+        map = seq_state_->GetInternedDataMap<protos::pbzero::InternedString,
+                                             BuildIdFieldName>();
+        break;
+      case StackProfileTracker::InternedStringType::kFunctionName:
+        map = seq_state_->GetInternedDataMap<protos::pbzero::InternedString,
+                                             FunctionNamesFieldName>();
+        break;
+      case StackProfileTracker::InternedStringType::kMappingPath:
+        map = seq_state_->GetInternedDataMap<protos::pbzero::InternedString,
+                                             MappingPathsFieldName>();
+        break;
+    }
     auto it = map->find(iid);
     if (it == map->end()) {
       PERFETTO_DLOG("Did not find string %" PRIu64 " in %zu elems", iid,
@@ -1771,6 +1788,9 @@
 
   switch (static_cast<char>(phase)) {
     case 'B': {  // TRACE_EVENT_PHASE_BEGIN.
+      // TODO(lalitm): make use of this track id.
+      TrackId track_id = context_->track_tracker->InternThreadTrack(utid);
+      perfetto::base::ignore_result(track_id);
       auto opt_slice_id = slice_tracker->Begin(
           ts, utid, RefType::kRefUtid, category_id, name_id, args_callback);
       if (opt_slice_id.has_value()) {
@@ -1785,6 +1805,9 @@
       break;
     }
     case 'E': {  // TRACE_EVENT_PHASE_END.
+      // TODO(lalitm): make use of this track id.
+      TrackId track_id = context_->track_tracker->InternThreadTrack(utid);
+      perfetto::base::ignore_result(track_id);
       auto opt_slice_id = slice_tracker->End(
           ts, utid, RefType::kRefUtid, category_id, name_id, args_callback);
       if (opt_slice_id.has_value()) {
@@ -1798,6 +1821,9 @@
       auto duration_ns = legacy_event.duration_us() * 1000;
       if (duration_ns < 0)
         return;
+      // TODO(lalitm): make use of this track id.
+      TrackId track_id = context_->track_tracker->InternThreadTrack(utid);
+      perfetto::base::ignore_result(track_id);
       auto opt_slice_id =
           slice_tracker->Scoped(ts, utid, RefType::kRefUtid, category_id,
                                 name_id, duration_ns, args_callback);
@@ -1823,6 +1849,9 @@
       switch (legacy_event.instant_event_scope()) {
         case LegacyEvent::SCOPE_UNSPECIFIED:
         case LegacyEvent::SCOPE_THREAD: {
+          // TODO(lalitm): make use of this track id.
+          TrackId track_id = context_->track_tracker->InternThreadTrack(utid);
+          perfetto::base::ignore_result(track_id);
           auto opt_slice_id =
               slice_tracker->Scoped(ts, utid, RefType::kRefUtid, category_id,
                                     name_id, duration_ns, args_callback);
@@ -1837,14 +1866,24 @@
           break;
         }
         case LegacyEvent::SCOPE_GLOBAL: {
+          // TODO(lalitm): make use of this track id.
+          TrackId track_id =
+              context_->track_tracker->GetOrCreateChromeGlobalInstantTrack();
+          perfetto::base::ignore_result(track_id);
           slice_tracker->Scoped(ts, /*ref=*/0, RefType::kRefNoRef, category_id,
                                 name_id, duration_ns, args_callback);
           break;
         }
         case LegacyEvent::SCOPE_PROCESS: {
-          slice_tracker->Scoped(ts, procs->GetOrCreateProcess(pid),
-                                RefType::kRefUpid, category_id, name_id,
-                                duration_ns, args_callback);
+          // TODO(lalitm): make use of this track id.
+          UniquePid instant_upid = procs->GetOrCreateProcess(pid);
+          TrackId track_id =
+              context_->track_tracker->InternChromeProcessInstantTrack(
+                  instant_upid);
+          perfetto::base::ignore_result(track_id);
+          slice_tracker->Scoped(ts, instant_upid, RefType::kRefUpid,
+                                category_id, name_id, duration_ns,
+                                args_callback);
           break;
         }
         default: {
@@ -2436,6 +2475,9 @@
       sprintf(fallback, "Event %d", eid);
       name_id = context_->storage->InternString(fallback);
     }
+    // TODO(lalitm): make use of this track id.
+    TrackId track_id = context_->track_tracker->InternThreadTrack(utid);
+    perfetto::base::ignore_result(track_id);
     context_->slice_tracker->Scoped(ts, utid, RefType::kRefUtid, cat_id,
                                     name_id, event.event_duration_ns());
   } else if (event.has_counter_id()) {
diff --git a/src/trace_processor/proto_trace_tokenizer.cc b/src/trace_processor/proto_trace_tokenizer.cc
index 2714db9..e093ae8 100644
--- a/src/trace_processor/proto_trace_tokenizer.cc
+++ b/src/trace_processor/proto_trace_tokenizer.cc
@@ -28,6 +28,7 @@
 #include "src/trace_processor/clock_tracker.h"
 #include "src/trace_processor/event_tracker.h"
 #include "src/trace_processor/process_tracker.h"
+#include "src/trace_processor/proto_incremental_state.h"
 #include "src/trace_processor/stats.h"
 #include "src/trace_processor/trace_blob_view.h"
 #include "src/trace_processor/trace_sorter.h"
@@ -58,7 +59,7 @@
 constexpr uint8_t kTracePacketTag =
     MakeTagLengthDelimited(protos::pbzero::Trace::kPacketFieldNumber);
 
-template <typename MessageType>
+template <typename MessageType, typename FieldName = DefaultFieldName>
 void InternMessage(TraceProcessorContext* context,
                    ProtoIncrementalState::PacketSequenceState* state,
                    TraceBlobView message) {
@@ -77,7 +78,7 @@
   }
   iid = field.as_uint64();
 
-  auto res = state->GetInternedDataMap<MessageType>()->emplace(
+  auto res = state->GetInternedDataMap<MessageType, FieldName>()->emplace(
       iid,
       ProtoIncrementalState::InternedDataView<MessageType>(std::move(message)));
   // If a message with this ID is already interned, its data should not have
@@ -424,17 +425,17 @@
 
   for (auto it = interned_data_decoder.build_ids(); it; ++it) {
     size_t offset = interned_data.offset_of(it->data());
-    InternMessage<protos::pbzero::InternedString>(
+    InternMessage<protos::pbzero::InternedString, BuildIdFieldName>(
         context_, state, interned_data.slice(offset, it->size()));
   }
   for (auto it = interned_data_decoder.mapping_paths(); it; ++it) {
     size_t offset = interned_data.offset_of(it->data());
-    InternMessage<protos::pbzero::InternedString>(
+    InternMessage<protos::pbzero::InternedString, MappingPathsFieldName>(
         context_, state, interned_data.slice(offset, it->size()));
   }
   for (auto it = interned_data_decoder.function_names(); it; ++it) {
     size_t offset = interned_data.offset_of(it->data());
-    InternMessage<protos::pbzero::InternedString>(
+    InternMessage<protos::pbzero::InternedString, FunctionNamesFieldName>(
         context_, state, interned_data.slice(offset, it->size()));
   }
 
diff --git a/src/trace_processor/slice_tracker.cc b/src/trace_processor/slice_tracker.cc
index 70083da..500d23c 100644
--- a/src/trace_processor/slice_tracker.cc
+++ b/src/trace_processor/slice_tracker.cc
@@ -23,6 +23,7 @@
 #include "src/trace_processor/slice_tracker.h"
 #include "src/trace_processor/trace_processor_context.h"
 #include "src/trace_processor/trace_storage.h"
+#include "src/trace_processor/track_tracker.h"
 
 namespace perfetto {
 namespace trace_processor {
@@ -45,6 +46,9 @@
   UniqueTid utid =
       context_->process_tracker->UpdateThread(ftrace_tid, atrace_tgid);
   ftrace_to_atrace_tgid_[ftrace_tid] = atrace_tgid;
+  // TODO(lalitm): make use of this track id.
+  TrackId track_id = context_->track_tracker->InternThreadTrack(utid);
+  perfetto::base::ignore_result(track_id);
   return Begin(timestamp, utid, RefType::kRefUtid, category, name);
 }
 
@@ -137,6 +141,9 @@
   }
   UniqueTid utid =
       context_->process_tracker->UpdateThread(ftrace_tid, actual_tgid);
+  // TODO(lalitm): make use of this track id.
+  TrackId track_id = context_->track_tracker->InternThreadTrack(utid);
+  perfetto::base::ignore_result(track_id);
   return End(timestamp, utid, RefType::kRefUtid);
 }
 
diff --git a/src/trace_processor/sqlite/db_sqlite_table.cc b/src/trace_processor/sqlite/db_sqlite_table.cc
index 8853984..a288918 100644
--- a/src/trace_processor/sqlite/db_sqlite_table.cc
+++ b/src/trace_processor/sqlite/db_sqlite_table.cc
@@ -32,6 +32,8 @@
       return FilterOp::kGt;
     case SQLITE_INDEX_CONSTRAINT_LT:
       return FilterOp::kLt;
+    case SQLITE_INDEX_CONSTRAINT_NE:
+      return FilterOp::kNeq;
     default:
       PERFETTO_FATAL("Currently unsupported constraint");
   }
diff --git a/src/trace_processor/sqlite/sqlite_utils.h b/src/trace_processor/sqlite/sqlite_utils.h
index 83d1181..a30d87c 100644
--- a/src/trace_processor/sqlite/sqlite_utils.h
+++ b/src/trace_processor/sqlite/sqlite_utils.h
@@ -407,7 +407,10 @@
 
   sqlite3_stmt* raw_stmt = nullptr;
   int err = sqlite3_prepare_v2(db, sql, n, &raw_stmt, nullptr);
-
+  if (err != SQLITE_OK) {
+    PERFETTO_ELOG("Preparing database failed");
+    return {};
+  }
   ScopedStmt stmt(raw_stmt);
   PERFETTO_DCHECK(sqlite3_column_count(*stmt) == 2);
 
diff --git a/src/trace_processor/stack_profile_tracker.cc b/src/trace_processor/stack_profile_tracker.cc
index 8d0a157..be518f3 100644
--- a/src/trace_processor/stack_profile_tracker.cc
+++ b/src/trace_processor/stack_profile_tracker.cc
@@ -40,13 +40,15 @@
                                         const InternLookup* intern_lookup) {
   std::string path;
   for (SourceStringId str_id : mapping.name_ids) {
-    auto opt_str = FindString(str_id, intern_lookup);
+    auto opt_str =
+        FindString(str_id, intern_lookup, InternedStringType::kMappingPath);
     if (!opt_str)
       break;
     path += "/" + *opt_str;
   }
 
-  auto opt_build_id = FindAndInternString(mapping.build_id, intern_lookup);
+  auto opt_build_id = FindAndInternString(mapping.build_id, intern_lookup,
+                                          InternedStringType::kBuildId);
   if (!opt_build_id) {
     context_->storage->IncrementStats(stats::stackprofile_invalid_string_id);
     PERFETTO_DFATAL("Invalid string.");
@@ -86,7 +88,8 @@
 int64_t StackProfileTracker::AddFrame(SourceFrameId id,
                                       const SourceFrame& frame,
                                       const InternLookup* intern_lookup) {
-  auto opt_str_id = FindAndInternString(frame.name_id, intern_lookup);
+  auto opt_str_id = FindAndInternString(frame.name_id, intern_lookup,
+                                        InternedStringType::kFunctionName);
   if (!opt_str_id) {
     context_->storage->IncrementStats(stats::stackprofile_invalid_string_id);
     PERFETTO_DFATAL("Invalid string.");
@@ -170,11 +173,12 @@
 
 base::Optional<StringId> StackProfileTracker::FindAndInternString(
     SourceStringId id,
-    const InternLookup* intern_lookup) {
+    const InternLookup* intern_lookup,
+    StackProfileTracker::InternedStringType type) {
   if (id == 0)
     return empty_;
 
-  auto opt_str = FindString(id, intern_lookup);
+  auto opt_str = FindString(id, intern_lookup, type);
   if (!opt_str)
     return empty_;
 
@@ -183,18 +187,20 @@
 
 base::Optional<std::string> StackProfileTracker::FindString(
     SourceStringId id,
-    const InternLookup* intern_lookup) {
+    const InternLookup* intern_lookup,
+    StackProfileTracker::InternedStringType type) {
   if (id == 0)
     return "";
 
   auto it = string_map_.find(id);
   if (it == string_map_.end()) {
     if (intern_lookup) {
-      auto str = intern_lookup->GetString(id);
+      auto str = intern_lookup->GetString(id, type);
       if (!str) {
         context_->storage->IncrementStats(
             stats::stackprofile_invalid_string_id);
         PERFETTO_DFATAL("Invalid string.");
+        return base::nullopt;
       }
       return str->ToStdString();
     }
diff --git a/src/trace_processor/stack_profile_tracker.h b/src/trace_processor/stack_profile_tracker.h
index caf5b91..114865c 100644
--- a/src/trace_processor/stack_profile_tracker.h
+++ b/src/trace_processor/stack_profile_tracker.h
@@ -60,6 +60,12 @@
  public:
   using SourceStringId = uint64_t;
 
+  enum class InternedStringType {
+    kMappingPath,
+    kBuildId,
+    kFunctionName,
+  };
+
   struct SourceMapping {
     SourceStringId build_id = 0;
     uint64_t exact_offset = 0;
@@ -98,7 +104,8 @@
     virtual ~InternLookup();
 
     virtual base::Optional<base::StringView> GetString(
-        SourceStringId) const = 0;
+        SourceStringId,
+        InternedStringType) const = 0;
     virtual base::Optional<SourceMapping> GetMapping(SourceMappingId) const = 0;
     virtual base::Optional<SourceFrame> GetFrame(SourceFrameId) const = 0;
     virtual base::Optional<SourceCallstack> GetCallstack(
@@ -133,9 +140,11 @@
   // InternedData (for versions newer than Q).
   base::Optional<StringId> FindAndInternString(
       SourceStringId,
-      const InternLookup* intern_lookup);
+      const InternLookup* intern_lookup,
+      InternedStringType type);
   base::Optional<std::string> FindString(SourceStringId,
-                                         const InternLookup* intern_lookup);
+                                         const InternLookup* intern_lookup,
+                                         InternedStringType type);
   base::Optional<int64_t> FindMapping(SourceMappingId,
                                       const InternLookup* intern_lookup);
   base::Optional<int64_t> FindFrame(SourceFrameId,
diff --git a/src/trace_processor/syscall_tracker.h b/src/trace_processor/syscall_tracker.h
index 76523dd..4ad84a1 100644
--- a/src/trace_processor/syscall_tracker.h
+++ b/src/trace_processor/syscall_tracker.h
@@ -24,6 +24,7 @@
 #include "src/trace_processor/slice_tracker.h"
 #include "src/trace_processor/trace_processor_context.h"
 #include "src/trace_processor/trace_storage.h"
+#include "src/trace_processor/track_tracker.h"
 
 namespace perfetto {
 namespace trace_processor {
@@ -50,6 +51,9 @@
   void Enter(int64_t ts, UniqueTid utid, uint32_t syscall_num) {
     StringId name = SyscallNumberToStringId(syscall_num);
     if (!name.is_null()) {
+      // TODO(lalitm): make use of this track id.
+      TrackId track_id = context_->track_tracker->InternThreadTrack(utid);
+      perfetto::base::ignore_result(track_id);
       context_->slice_tracker->Begin(ts, utid, RefType::kRefUtid, 0 /* cat */,
                                      name);
     }
@@ -58,6 +62,9 @@
   void Exit(int64_t ts, UniqueTid utid, uint32_t syscall_num) {
     StringId name = SyscallNumberToStringId(syscall_num);
     if (!name.is_null()) {
+      // TODO(lalitm): make use of this track id.
+      TrackId track_id = context_->track_tracker->InternThreadTrack(utid);
+      perfetto::base::ignore_result(track_id);
       context_->slice_tracker->End(ts, utid, RefType::kRefUtid, 0 /* cat */,
                                    name);
     }
diff --git a/src/trace_processor/syscall_tracker_unittest.cc b/src/trace_processor/syscall_tracker_unittest.cc
index fa5fce5..21861ec 100644
--- a/src/trace_processor/syscall_tracker_unittest.cc
+++ b/src/trace_processor/syscall_tracker_unittest.cc
@@ -52,6 +52,8 @@
  public:
   SyscallTrackerTest() {
     context.storage.reset(new TraceStorage());
+    track_tracker = new TrackTracker(&context);
+    context.track_tracker.reset(track_tracker);
     slice_tracker = new MockSliceTracker(&context);
     context.slice_tracker.reset(slice_tracker);
     context.syscall_tracker.reset(new SyscallTracker(&context));
@@ -60,6 +62,7 @@
  protected:
   TraceProcessorContext context;
   MockSliceTracker* slice_tracker;
+  TrackTracker* track_tracker;
 };
 
 TEST_F(SyscallTrackerTest, ReportUnknownSyscalls) {
diff --git a/src/trace_processor/systrace_parser.h b/src/trace_processor/systrace_parser.h
index f85e1dd..c8c3452 100644
--- a/src/trace_processor/systrace_parser.h
+++ b/src/trace_processor/systrace_parser.h
@@ -99,6 +99,11 @@
   size_t len = str.size();
   *out = {};
 
+  constexpr const char* kClockSyncPrefix = "trace_event_clock_sync:";
+  if (len >= strlen(kClockSyncPrefix) &&
+      strncmp(kClockSyncPrefix, s, strlen(kClockSyncPrefix)) == 0)
+    return SystraceParseResult::kUnsupported;
+
   if (len < 2)
     return SystraceParseResult::kFailure;
 
diff --git a/src/trace_processor/systrace_parser_unittest.cc b/src/trace_processor/systrace_parser_unittest.cc
index 5d6f32e..d1a1158 100644
--- a/src/trace_processor/systrace_parser_unittest.cc
+++ b/src/trace_processor/systrace_parser_unittest.cc
@@ -69,6 +69,13 @@
   ASSERT_EQ(ParseSystraceTracePoint("F|123|foo|456", &result),
             Result::kSuccess);
   EXPECT_EQ(result, SystraceTracePoint::F(123, "foo", 456));
+
+  ASSERT_EQ(ParseSystraceTracePoint("trace_event_clock_sync: parent_ts=0.123\n",
+                                    &result),
+            Result::kUnsupported);
+  ASSERT_EQ(ParseSystraceTracePoint("trace_event_clock_sync: realtime_ts=123\n",
+                                    &result),
+            Result::kUnsupported);
 }
 
 }  // namespace
diff --git a/src/trace_processor/systrace_trace_parser.cc b/src/trace_processor/systrace_trace_parser.cc
index 3d81e52..777f262 100644
--- a/src/trace_processor/systrace_trace_parser.cc
+++ b/src/trace_processor/systrace_trace_parser.cc
@@ -34,7 +34,7 @@
 namespace {
 
 std::string SubstrTrim(const std::string& input, size_t start, size_t end) {
-  auto s = input.substr(start, end - start);
+  std::string s = input.substr(start, end - start);
   s.erase(s.begin(), std::find_if(s.begin(), s.end(),
                                   [](int ch) { return !std::isspace(ch); }));
   s.erase(std::find_if(s.rbegin(), s.rend(),
@@ -44,6 +44,17 @@
   return s;
 }
 
+std::pair<size_t, size_t> FindTask(const std::string& line) {
+  size_t start;
+  for (start = 0; start < line.size() && isspace(line[start]); ++start)
+    ;
+  size_t length;
+  for (length = 0; start + length < line.size() && line[start + length] != '-';
+       ++length)
+    ;
+  return std::pair<size_t, size_t>(start, length);
+}
+
 }  // namespace
 
 SystraceTraceParser::SystraceTraceParser(TraceProcessorContext* ctx)
@@ -94,6 +105,8 @@
   return util::OkStatus();
 }
 
+// TODO(hjd): This should be more robust to being passed random input.
+// This can happen if we mess up detecting a gzip trace for example.
 util::Status SystraceTraceParser::ParseSingleSystraceEvent(
     const std::string& buffer) {
   // An example line from buffer looks something like the following:
@@ -102,8 +115,12 @@
   // However, sometimes the tgid can be missing and buffer looks like this:
   // <idle>-0     [000] ...2     0.002188: task_newtask: pid=1 ...
 
-  auto task_idx = 16u;
-  std::string task = SubstrTrim(buffer, 0, task_idx);
+  size_t task_start;
+  size_t task_length;
+  std::tie<size_t, size_t>(task_start, task_length) = FindTask(buffer);
+
+  size_t task_idx = task_start + task_length;
+  std::string task = buffer.substr(task_start, task_length);
 
   // Try and figure out whether tgid is present by searching for '(' but only
   // if it occurs before the start of cpu (indiciated by '[') - this is because
@@ -112,6 +129,10 @@
   auto cpu_idx = buffer.find('[', task_idx + 1);
   bool has_tgid = tgid_idx != std::string::npos && tgid_idx < cpu_idx;
 
+  if (cpu_idx == std::string::npos) {
+    return util::Status("Could not find [ in " + buffer);
+  }
+
   auto pid_end = has_tgid ? cpu_idx : tgid_idx;
   std::string pid_str = SubstrTrim(buffer, task_idx + 1, pid_end);
   auto pid = static_cast<uint32_t>(std::stoi(pid_str));
diff --git a/src/trace_processor/tables/track_tables.h b/src/trace_processor/tables/track_tables.h
index fa142ae..76a4417 100644
--- a/src/trace_processor/tables/track_tables.h
+++ b/src/trace_processor/tables/track_tables.h
@@ -39,6 +39,13 @@
 
 PERFETTO_TP_TABLE(PERFETTO_TP_PROCESS_TRACK_TABLE_DEF);
 
+#define PERFETTO_TP_THREAD_TRACK_TABLE_DEF(NAME, PARENT, C) \
+  NAME(ThreadTrackTable, "thread_track")                    \
+  PARENT(PERFETTO_TP_TRACK_TABLE_DEF, C)                    \
+  C(uint32_t, utid)
+
+PERFETTO_TP_TABLE(PERFETTO_TP_THREAD_TRACK_TABLE_DEF);
+
 #define PERFETTO_TP_GPU_TRACK_DEF(NAME, PARENT, C) \
   NAME(GpuTrackTable, "gpu_track")                 \
   PARENT(PERFETTO_TP_TRACK_TABLE_DEF, C)           \
diff --git a/src/trace_processor/trace_processor_shell.cc b/src/trace_processor/trace_processor_shell.cc
index 342613d..d8eb471 100644
--- a/src/trace_processor/trace_processor_shell.cc
+++ b/src/trace_processor/trace_processor_shell.cc
@@ -771,31 +771,34 @@
 Usage: %s [OPTIONS] trace_file.pb
 
 Options:
- -h, --help                      Prints this guide.
- -v, --version                   Prints the version of trace processor.
- -d, --debug                     Enable virtual table debugging.
- -W, --wide                      Prints interactive output with double column
-                                 width.
- -p, --perf-file FILE            Writes the time taken to ingest the trace and
-                                 execute the queries to the given file. Only
-                                 valid with -q or --run-metrics and the file
-                                 will only be written if the execution
-                                 is successful.
- -q, --query-file FILE           Read and execute an SQL query from a file.
- -i, --interactive               Starts interactive mode even after a query file
-                                 is specified with -q or --run-metrics.
- -e, --export FILE               Export the trace into a SQLite database.
- --run-metrics x,y,z             Runs a comma separated list of metrics and
-                                 prints the result as a TraceMetrics proto to
-                                 stdout. The specified can either be in-built
-                                 metrics or SQL/proto files of extension
-                                 metrics.
- --metrics-output=[binary|text]  Allows the output of --run-metrics to be
-                                 specified in either proto binary or proto
-                                 text format (default: text).
- --extra-metrics PATH            Registers all SQL files at the given path to
-                                 the trace processor and extends the builtin
-                                 metrics proto with $PATH/metrics-ext.proto.)",
+ -h, --help                           Prints this guide.
+ -v, --version                        Prints the version of trace processor.
+ -d, --debug                          Enable virtual table debugging.
+ -W, --wide                           Prints interactive output with double
+                                      column width.
+ -p, --perf-file FILE                 Writes the time taken to ingest the trace
+                                      and execute the queries to the given file.
+                                      Only valid with -q or --run-metrics and
+                                      the file will only be written if the
+                                      execution is successful.
+ -q, --query-file FILE                Read and execute an SQL query from a file.
+ -i, --interactive                    Starts interactive mode even after a query
+                                      file is specified with -q or
+                                      --run-metrics.
+ -e, --export FILE                    Export the trace into a SQLite database.
+ --run-metrics x,y,z                  Runs a comma separated list of metrics and
+                                      prints the result as a TraceMetrics proto
+                                      to stdout. The specified can either be
+                                      in-built metrics or SQL/proto files of
+                                      extension metrics.
+ --metrics-output=[binary|text|json]  Allows the output of --run-metrics to be
+                                      specified in either proto binary, proto
+                                      text format or JSON format (default: proto
+                                      text).
+ --extra-metrics PATH                 Registers all SQL files at the given path
+                                      to the trace processor and extends the
+                                      builtin metrics proto with
+                                      $PATH/metrics-ext.proto.)",
                 argv[0]);
 }
 
diff --git a/src/trace_processor/trace_storage.h b/src/trace_processor/trace_storage.h
index df9092b..0799eae 100644
--- a/src/trace_processor/trace_storage.h
+++ b/src/trace_processor/trace_storage.h
@@ -1174,6 +1174,13 @@
     return &process_track_table_;
   }
 
+  const tables::ThreadTrackTable& thread_track_table() const {
+    return thread_track_table_;
+  }
+  tables::ThreadTrackTable* mutable_thread_track_table() {
+    return &thread_track_table_;
+  }
+
   const Slices& slices() const { return slices_; }
   Slices* mutable_slices() { return &slices_; }
 
@@ -1311,6 +1318,7 @@
   tables::TrackTable track_table_{&string_pool_, nullptr};
   tables::GpuTrackTable gpu_track_table_{&string_pool_, &track_table_};
   tables::ProcessTrackTable process_track_table_{&string_pool_, &track_table_};
+  tables::ThreadTrackTable thread_track_table_{&string_pool_, &track_table_};
 
   // Metadata for gpu tracks.
   GpuContexts gpu_contexts_;
diff --git a/src/trace_processor/track_tracker.cc b/src/trace_processor/track_tracker.cc
index 1b093dc..e1763d5 100644
--- a/src/trace_processor/track_tracker.cc
+++ b/src/trace_processor/track_tracker.cc
@@ -30,6 +30,19 @@
       android_source_(context->storage->InternString("android")),
       context_(context) {}
 
+TrackId TrackTracker::InternThreadTrack(UniqueTid utid) {
+  ThreadTrackTuple tuple{utid};
+  auto it = thread_tracks_.find(tuple);
+  if (it != thread_tracks_.end())
+    return it->second;
+
+  tables::ThreadTrackTable::Row row;
+  row.utid = utid;
+  auto id = context_->storage->mutable_thread_track_table()->Insert(row);
+  thread_tracks_[tuple] = id;
+  return id;
+}
+
 TrackId TrackTracker::InternFuchsiaAsyncTrack(StringId name,
                                               int64_t correlation_id) {
   FuchsiaAsyncTrackTuple tuple{correlation_id};
@@ -118,5 +131,25 @@
   return id;
 }
 
+TrackId TrackTracker::InternChromeProcessInstantTrack(UniquePid upid) {
+  auto it = chrome_process_instant_tracks_.find(upid);
+  if (it != chrome_process_instant_tracks_.end())
+    return it->second;
+
+  tables::ProcessTrackTable::Row row;
+  row.upid = upid;
+  auto id = context_->storage->mutable_process_track_table()->Insert(row);
+  chrome_process_instant_tracks_[upid] = id;
+  return id;
+}
+
+TrackId TrackTracker::GetOrCreateChromeGlobalInstantTrack() {
+  if (!chrome_global_instant_track_id_) {
+    chrome_global_instant_track_id_ =
+        context_->storage->mutable_track_table()->Insert({});
+  }
+  return *chrome_global_instant_track_id_;
+}
+
 }  // namespace trace_processor
 }  // namespace perfetto
diff --git a/src/trace_processor/track_tracker.h b/src/trace_processor/track_tracker.h
index 704fb40..e64de70 100644
--- a/src/trace_processor/track_tracker.h
+++ b/src/trace_processor/track_tracker.h
@@ -28,6 +28,9 @@
  public:
   explicit TrackTracker(TraceProcessorContext*);
 
+  // Interns a thread track into the storage.
+  TrackId InternThreadTrack(UniqueTid utid);
+
   // Interns a Fuchsia async track into the storage.
   TrackId InternFuchsiaAsyncTrack(StringId name, int64_t correlation_id);
 
@@ -36,14 +39,30 @@
 
   // Interns a Chrome track into the storage.
   TrackId InternChromeTrack(StringId name,
-                            base::Optional<uint32_t> upid,
+                            base::Optional<UniquePid> upid,
                             int64_t source_id,
                             StringId source_scope);
 
   // Interns a Android async track into the storage.
-  TrackId InternAndroidAsyncTrack(StringId name, uint32_t upid, int64_t cookie);
+  TrackId InternAndroidAsyncTrack(StringId name,
+                                  UniquePid upid,
+                                  int64_t cookie);
+
+  // Interns a Chrome process instant track into the storage.
+  TrackId InternChromeProcessInstantTrack(UniquePid upid);
+
+  // Lazily creates the track for Chrome global instant events.
+  TrackId GetOrCreateChromeGlobalInstantTrack();
 
  private:
+  struct ThreadTrackTuple {
+    UniqueTid utid;
+
+    friend bool operator<(const ThreadTrackTuple& l,
+                          const ThreadTrackTuple& r) {
+      return l.utid < r.utid;
+    }
+  };
   struct FuchsiaAsyncTrackTuple {
     int64_t correlation_id;
 
@@ -85,10 +104,13 @@
     }
   };
 
+  std::map<ThreadTrackTuple, TrackId> thread_tracks_;
   std::map<FuchsiaAsyncTrackTuple, TrackId> fuchsia_async_tracks_;
   std::map<GpuTrackTuple, TrackId> gpu_tracks_;
   std::map<ChromeTrackTuple, TrackId> chrome_tracks_;
   std::map<AndroidAsyncTrackTuple, TrackId> android_async_tracks_;
+  std::map<UniquePid, TrackId> chrome_process_instant_tracks_;
+  base::Optional<TrackId> chrome_global_instant_track_id_;
 
   StringId source_key_ = 0;
   StringId source_id_key_ = 0;
diff --git a/src/traced/service/builtin_producer.cc b/src/traced/service/builtin_producer.cc
index b11da94..16df2e1 100644
--- a/src/traced/service/builtin_producer.cc
+++ b/src/traced/service/builtin_producer.cc
@@ -66,13 +66,17 @@
   metatrace_dsd.set_will_notify_on_stop(true);
   endpoint_->RegisterDataSource(metatrace_dsd);
 
-  DataSourceDescriptor lazy_heapprofd_dsd;
-  lazy_heapprofd_dsd.set_name(kHeapprofdDataSourceName);
-  endpoint_->RegisterDataSource(lazy_heapprofd_dsd);
+  {
+    DataSourceDescriptor lazy_heapprofd_dsd;
+    lazy_heapprofd_dsd.set_name(kHeapprofdDataSourceName);
+    endpoint_->RegisterDataSource(lazy_heapprofd_dsd);
+  }
 
-  DataSourceDescriptor lazy_java_hprof_dsd;
-  lazy_heapprofd_dsd.set_name(kJavaHprofDataSourceName);
-  endpoint_->RegisterDataSource(lazy_java_hprof_dsd);
+  {
+    DataSourceDescriptor lazy_java_hprof_dsd;
+    lazy_java_hprof_dsd.set_name(kJavaHprofDataSourceName);
+    endpoint_->RegisterDataSource(lazy_java_hprof_dsd);
+  }
 }
 
 void BuiltinProducer::SetupDataSource(DataSourceInstanceID ds_id,
diff --git a/src/tracing/BUILD.gn b/src/tracing/BUILD.gn
index 5a93bbc..fcb67cb 100644
--- a/src/tracing/BUILD.gn
+++ b/src/tracing/BUILD.gn
@@ -252,6 +252,7 @@
     "internal/in_process_tracing_backend.cc",
     "internal/in_process_tracing_backend.h",
     "internal/tracing_muxer_impl.cc",
+    "internal/tracing_muxer_impl.h",
     "platform.cc",
     "tracing.cc",
     "track_event.cc",
diff --git a/tools/gen_bazel b/tools/gen_bazel
index a8643ec..09fd086 100755
--- a/tools/gen_bazel
+++ b/tools/gen_bazel
@@ -47,6 +47,7 @@
 
 # These targets will be exported with public visibility in the generated BUILD.
 public_targets = [
+    '//:libperfetto_client_experimental',
     '//src/perfetto_cmd:perfetto',
     '//src/traced/probes:traced_probes',
     '//src/traced/service:traced',
@@ -66,8 +67,10 @@
 ] + public_targets
 
 # Root proto targets (to force discovery of intermediate proto targets).
+# These targets are marked public.
 proto_targets = [
     '//protos/perfetto/trace:merged_trace',
+    '//protos/perfetto/config:merged_config',
     '//protos/perfetto/metrics:lite',
     '//protos/perfetto/trace:lite',
     '//protos/perfetto/config:lite',
@@ -230,6 +233,9 @@
   deps = [':' + get_sources_label(x) for x in target.proto_deps]
   sources_label.deps = sorted(deps)
 
+  if target.name in proto_targets:
+    sources_label.visibility = ['//visibility:public']
+
   return [plugin_label, sources_label]
 
 
diff --git a/tools/trace_to_text/BUILD.gn b/tools/trace_to_text/BUILD.gn
index 62f9468..44126e7 100644
--- a/tools/trace_to_text/BUILD.gn
+++ b/tools/trace_to_text/BUILD.gn
@@ -126,6 +126,7 @@
     ":pprofbuilder",
     ":symbolizer",
     ":utils",
+    "../../gn:zlib",
   ]
   public_deps = [
     "../../gn:default_deps",
diff --git a/tools/trace_to_text/main.cc b/tools/trace_to_text/main.cc
index 225bf70..52ff38e 100644
--- a/tools/trace_to_text/main.cc
+++ b/tools/trace_to_text/main.cc
@@ -20,10 +20,10 @@
 #include <vector>
 
 #include "perfetto/base/logging.h"
+#include "tools/trace_to_text/symbolize_profile.h"
 #include "tools/trace_to_text/trace_to_profile.h"
 #include "tools/trace_to_text/trace_to_systrace.h"
 #include "tools/trace_to_text/trace_to_text.h"
-#include "tools/trace_to_text/symbolize_profile.h"
 
 #if PERFETTO_BUILDFLAG(PERFETTO_VERSION_GEN)
 #include "perfetto_version.gen.h"
@@ -31,22 +31,22 @@
 #define PERFETTO_GET_GIT_REVISION() "unknown"
 #endif
 
+namespace perfetto {
+namespace trace_to_text {
 namespace {
 
 int Usage(const char* argv0) {
   printf(
-      "Usage: %s systrace|json|text|profile [--truncate start|end] [trace.pb] "
+      "Usage: %s systrace|json|ctrace|text|profile [--truncate start|end] "
+      "[trace.pb] "
       "[trace.txt]\n",
       argv0);
   return 1;
 }
 
-}  // namespace
-
-int main(int argc, char** argv) {
+int Main(int argc, char** argv) {
   std::vector<const char*> positional_args;
-  perfetto::trace_to_text::Keep truncate_keep =
-      perfetto::trace_to_text::Keep::kAll;
+  Keep truncate_keep = Keep::kAll;
   for (int i = 1; i < argc; i++) {
     if (strcmp(argv[i], "-v") == 0 || strcmp(argv[i], "--version") == 0) {
       printf("%s\n", PERFETTO_GET_GIT_REVISION());
@@ -55,9 +55,9 @@
                strcmp(argv[i], "--truncate") == 0) {
       i++;
       if (i <= argc && strcmp(argv[i], "start") == 0) {
-        truncate_keep = perfetto::trace_to_text::Keep::kStart;
+        truncate_keep = Keep::kStart;
       } else if (i <= argc && strcmp(argv[i], "end") == 0) {
-        truncate_keep = perfetto::trace_to_text::Keep::kEnd;
+        truncate_keep = Keep::kEnd;
       } else {
         PERFETTO_ELOG(
             "--truncate must specify whether to keep the end or the "
@@ -105,28 +105,39 @@
   std::string format(positional_args[0]);
 
   if (format == "json")
-    return perfetto::trace_to_text::TraceToSystrace(input_stream, output_stream,
-                                                    truncate_keep,
-                                                    /*wrap_in_json=*/true);
+    return TraceToSystrace(input_stream, output_stream, kSystraceJson,
+                           truncate_keep);
+
   if (format == "systrace")
-    return perfetto::trace_to_text::TraceToSystrace(input_stream, output_stream,
-                                                    truncate_keep,
-                                                    /*wrap_in_json=*/false);
-  if (truncate_keep != perfetto::trace_to_text::Keep::kAll) {
+    return TraceToSystrace(input_stream, output_stream, kSystraceNormal,
+                           truncate_keep);
+
+  if (format == "ctrace")
+    return TraceToSystrace(input_stream, output_stream, kSystraceCompressed,
+                           truncate_keep);
+
+  if (truncate_keep != Keep::kAll) {
     PERFETTO_ELOG(
         "--truncate is unsupported for text|profile|symbolize format.");
     return 1;
   }
 
   if (format == "text")
-    return perfetto::trace_to_text::TraceToText(input_stream, output_stream);
+    return TraceToText(input_stream, output_stream);
 
   if (format == "profile")
-    return perfetto::trace_to_text::TraceToProfile(input_stream, output_stream);
+    return TraceToProfile(input_stream, output_stream);
 
   if (format == "symbolize")
-    return perfetto::trace_to_text::SymbolizeProfile(input_stream,
-                                                     output_stream);
+    return SymbolizeProfile(input_stream, output_stream);
 
   return Usage(argv[0]);
 }
+
+}  // namespace
+}  // namespace trace_to_text
+}  // namespace perfetto
+
+int main(int argc, char** argv) {
+  return perfetto::trace_to_text::Main(argc, argv);
+}
diff --git a/tools/trace_to_text/pprof_builder.cc b/tools/trace_to_text/pprof_builder.cc
index 5d56cf9..1cea63b 100644
--- a/tools/trace_to_text/pprof_builder.cc
+++ b/tools/trace_to_text/pprof_builder.cc
@@ -85,30 +85,14 @@
 using Iterator = trace_processor::TraceProcessor::Iterator;
 
 constexpr const char* kQueryProfiles =
-    "select distinct hpa.upid, hpa.ts from heap_profile_allocation hpa;";
+    "select distinct hpa.upid, hpa.ts, p.pid from heap_profile_allocation hpa, "
+    "process p where p.upid = hpa.upid;";
 
 struct Callsite {
   int64_t id;
   int64_t frame_id;
 };
 
-// Walk tree bottom up and assign the inverse of the frame_ids of the path
-// that was used to reach each node into result.
-void Walk(const std::vector<std::vector<Callsite>> children_map,
-          std::vector<std::vector<int64_t>>* result,
-          std::vector<int64_t> parents,
-          const Callsite& root) {
-  PERFETTO_DCHECK((*result)[static_cast<size_t>(root.id)].empty());
-  parents.push_back(root.frame_id);
-  // pprof stores the frames the other way round that we do, reverse here.
-  (*result)[static_cast<size_t>(root.id)].assign(parents.rbegin(),
-                                                 parents.rend());
-  const std::vector<Callsite>& children =
-      children_map[static_cast<size_t>(root.id)];
-  for (const Callsite& child : children)
-    Walk(children_map, result, parents, child);
-}
-
 // Return map from callsite_id to list of frame_ids that make up the callstack.
 std::vector<std::vector<int64_t>> GetCallsiteToFrames(
     trace_processor::TraceProcessor* tp) {
@@ -120,20 +104,22 @@
     return {};
   }
   int64_t count = count_it.Get(0).long_value;
-  std::vector<std::vector<Callsite>> children(static_cast<size_t>(count));
 
   Iterator it = tp->ExecuteQuery(
-      "select id, parent_id, frame_id from stack_profile_callsite;");
-  std::vector<Callsite> roots;
+      "select id, parent_id, frame_id from stack_profile_callsite order by "
+      "depth;");
+  std::vector<std::vector<int64_t>> result(static_cast<size_t>(count));
   while (it.Next()) {
     int64_t id = it.Get(0).long_value;
     int64_t parent_id = it.Get(1).long_value;
     int64_t frame_id = it.Get(2).long_value;
-    Callsite callsite{id, frame_id};
-    if (parent_id == -1)
-      roots.emplace_back(callsite);
-    else
-      children[static_cast<size_t>(parent_id)].emplace_back(callsite);
+    std::vector<int64_t>& path = result[static_cast<size_t>(id)];
+    path.push_back(frame_id);
+    if (parent_id != -1) {
+      const std::vector<int64_t>& parent_path =
+          result[static_cast<size_t>(parent_id)];
+      path.insert(path.end(), parent_path.begin(), parent_path.end());
+    }
   }
 
   if (!it.Status().ok()) {
@@ -141,13 +127,6 @@
                             it.Status().message().c_str());
     return {};
   }
-
-  std::vector<std::vector<int64_t>> result(static_cast<size_t>(count));
-  auto start = base::GetWallTimeMs();
-  for (const Callsite& root : roots)
-    Walk(children, &result, {}, root);
-  PERFETTO_DLOG("Walked %zu in %llu", children.size(),
-                (base::GetWallTimeMs() - start).count());
   return result;
 }
 
@@ -443,18 +422,28 @@
 
 bool TraceToPprof(std::istream* input,
                   std::vector<SerializedProfile>* output,
-                  Symbolizer* symbolizer) {
+                  Symbolizer* symbolizer,
+                  uint64_t pid,
+                  const std::vector<uint64_t>& timestamps) {
   trace_processor::Config config;
   std::unique_ptr<trace_processor::TraceProcessor> tp =
       trace_processor::TraceProcessor::CreateInstance(config);
 
   if (!ReadTrace(tp.get(), input))
-    return 1;
+    return false;
 
   tp->NotifyEndOfFile();
+  return TraceToPprof(tp.get(), output, symbolizer, pid, timestamps);
+}
+
+bool TraceToPprof(trace_processor::TraceProcessor* tp,
+                  std::vector<SerializedProfile>* output,
+                  Symbolizer* symbolizer,
+                  uint64_t pid,
+                  const std::vector<uint64_t>& timestamps) {
   if (symbolizer) {
     SymbolizeDatabase(
-        tp.get(), symbolizer, [&tp](perfetto::protos::TracePacket packet) {
+        tp, symbolizer, [&tp](perfetto::protos::TracePacket packet) {
           size_t size = static_cast<size_t>(packet.ByteSize());
           std::unique_ptr<uint8_t[]> buf(new uint8_t[size]);
           packet.SerializeToArray(buf.get(), packet.ByteSize());
@@ -489,8 +478,8 @@
   }
 
   int64_t max_symbol_id = max_symbol_id_it.Get(0).long_value;
-  auto callsite_to_frames = GetCallsiteToFrames(tp.get());
-  auto symbol_set_id_to_lines = GetSymbolSetIdToLines(tp.get());
+  auto callsite_to_frames = GetCallsiteToFrames(tp);
+  auto symbol_set_id_to_lines = GetSymbolSetIdToLines(tp);
 
   Iterator it = tp->ExecuteQuery(kQueryProfiles);
   while (it.Next()) {
@@ -498,12 +487,19 @@
                             max_symbol_id);
     uint64_t upid = static_cast<uint64_t>(it.Get(0).long_value);
     uint64_t ts = static_cast<uint64_t>(it.Get(1).long_value);
+    uint64_t profile_pid = static_cast<uint64_t>(it.Get(2).long_value);
+    if ((pid > 0 && profile_pid != pid) ||
+        (!timestamps.empty() && std::find(timestamps.begin(), timestamps.end(),
+                                          ts) == timestamps.end())) {
+      continue;
+    }
+
     std::string pid_query = "select pid from process where upid = ";
     pid_query += std::to_string(upid) + ";";
     Iterator pid_it = tp->ExecuteQuery(pid_query);
     PERFETTO_CHECK(pid_it.Next());
 
-    GProfile profile = builder.GenerateGProfile(tp.get(), upid, ts);
+    GProfile profile = builder.GenerateGProfile(tp, upid, ts);
     output->emplace_back(
         SerializedProfile{static_cast<uint64_t>(pid_it.Get(0).long_value),
                           profile.SerializeAsString()});
@@ -516,8 +512,11 @@
   return true;
 }
 
-bool TraceToPprof(std::istream* input, std::vector<SerializedProfile>* output) {
-  return TraceToPprof(input, output, nullptr);
+bool TraceToPprof(std::istream* input,
+                  std::vector<SerializedProfile>* output,
+                  uint64_t pid,
+                  const std::vector<uint64_t>& timestamps) {
+  return TraceToPprof(input, output, nullptr, pid, timestamps);
 }
 
 }  // namespace trace_to_text
diff --git a/tools/trace_to_text/trace_to_profile.cc b/tools/trace_to_text/trace_to_profile.cc
index e70931d..cc00194 100644
--- a/tools/trace_to_text/trace_to_profile.cc
+++ b/tools/trace_to_text/trace_to_profile.cc
@@ -47,7 +47,10 @@
 namespace perfetto {
 namespace trace_to_text {
 
-int TraceToProfile(std::istream* input, std::ostream* output) {
+int TraceToProfile(std::istream* input,
+                   std::ostream* output,
+                   uint64_t pid,
+                   std::vector<uint64_t> timestamps) {
   std::unique_ptr<Symbolizer> symbolizer;
   auto binary_path = GetPerfettoBinaryPath();
   if (!binary_path.empty()) {
@@ -61,7 +64,7 @@
   }
 
   std::vector<SerializedProfile> profiles;
-  TraceToPprof(input, &profiles, symbolizer.get());
+  TraceToPprof(input, &profiles, symbolizer.get(), pid, timestamps);
   if (profiles.empty()) {
     return 0;
   }
diff --git a/tools/trace_to_text/trace_to_profile.h b/tools/trace_to_text/trace_to_profile.h
index 4e84f8e..629d3ef 100644
--- a/tools/trace_to_text/trace_to_profile.h
+++ b/tools/trace_to_text/trace_to_profile.h
@@ -18,11 +18,15 @@
 #define TOOLS_TRACE_TO_TEXT_TRACE_TO_PROFILE_H_
 
 #include <iostream>
+#include <vector>
 
 namespace perfetto {
 namespace trace_to_text {
 
-int TraceToProfile(std::istream* input, std::ostream* output);
+int TraceToProfile(std::istream* input,
+                   std::ostream* output,
+                   uint64_t pid = 0,
+                   std::vector<uint64_t> timestamps = {});
 
 }  // namespace trace_to_text
 }  // namespace perfetto
diff --git a/tools/trace_to_text/trace_to_systrace.cc b/tools/trace_to_text/trace_to_systrace.cc
index 3e31e34..a7afb5d 100644
--- a/tools/trace_to_text/trace_to_systrace.cc
+++ b/tools/trace_to_text/trace_to_systrace.cc
@@ -25,6 +25,8 @@
 #include <memory>
 #include <utility>
 
+#include <zlib.h>
+
 #include "perfetto/base/build_config.h"
 #include "perfetto/base/logging.h"
 #include "perfetto/ext/base/paged_memory.h"
@@ -46,6 +48,8 @@
 
 namespace {
 
+const size_t kCompressionBufferSize = 500 * 1024;
+
 // Having an empty traceEvents object is necessary for trace viewer to
 // load the json properly.
 const char kTraceHeader[] = R"({
@@ -123,13 +127,79 @@
   }
 }
 
+class TraceWriter {
+ public:
+  TraceWriter(std::ostream* output) : output_(output) {}
+  virtual ~TraceWriter() = default;
+
+  void Write(std::string s) { Write(s.data(), s.size()); }
+
+  virtual void Write(const char* data, size_t sz) {
+    output_->write(data, static_cast<std::streamsize>(sz));
+  }
+
+ private:
+  std::ostream* output_;
+};
+
+class DeflateTraceWriter : public TraceWriter {
+ public:
+  DeflateTraceWriter(std::ostream* output)
+      : TraceWriter(output),
+        buf_(base::PagedMemory::Allocate(kCompressionBufferSize)),
+        start_(static_cast<uint8_t*>(buf_.Get())),
+        end_(start_ + buf_.size()) {
+    CheckEq(deflateInit(&stream_, 9), Z_OK);
+    stream_.next_out = start_;
+    stream_.avail_out = static_cast<unsigned int>(end_ - start_);
+  }
+
+  ~DeflateTraceWriter() override {
+    while (deflate(&stream_, Z_FINISH) != Z_STREAM_END) {
+      Flush();
+    }
+    CheckEq(deflateEnd(&stream_), Z_OK);
+  }
+
+  void Write(const char* data, size_t sz) override {
+    stream_.next_in = reinterpret_cast<uint8_t*>(const_cast<char*>(data));
+    stream_.avail_in = static_cast<unsigned int>(sz);
+    while (stream_.avail_in > 0) {
+      CheckEq(deflate(&stream_, Z_NO_FLUSH), Z_OK);
+      if (stream_.avail_out == 0) {
+        Flush();
+      }
+    }
+  }
+
+ private:
+  void Flush() {
+    TraceWriter::Write(reinterpret_cast<char*>(start_),
+                       static_cast<size_t>(stream_.next_out - start_));
+    stream_.next_out = start_;
+    stream_.avail_out = static_cast<unsigned int>(end_ - start_);
+  }
+
+  void CheckEq(int actual_code, int expected_code) {
+    if (actual_code == expected_code)
+      return;
+    PERFETTO_FATAL("Expected %d got %d: %s", actual_code, expected_code,
+                   stream_.msg);
+  }
+
+  z_stream stream_{};
+  base::PagedMemory buf_;
+  uint8_t* const start_;
+  uint8_t* const end_;
+};
+
 class QueryWriter {
  public:
-  QueryWriter(trace_processor::TraceProcessor* tp, std::ostream* output)
+  QueryWriter(trace_processor::TraceProcessor* tp, TraceWriter* trace_writer)
       : tp_(tp),
         buffer_(base::PagedMemory::Allocate(kBufferSize)),
         global_writer_(static_cast<char*>(buffer_.Get()), kBufferSize),
-        output_(output) {}
+        trace_writer_(trace_writer) {}
 
   template <typename Callback>
   bool RunQuery(const std::string& sql, Callback callback) {
@@ -142,7 +212,7 @@
       if (global_writer_.pos() + line_writer.pos() >= global_writer_.size()) {
         fprintf(stderr, "Writing row %" PRIu32 PROGRESS_CHAR, rows);
         auto str = global_writer_.GetStringView();
-        output_->write(str.data(), static_cast<std::streamsize>(str.size()));
+        trace_writer_->Write(str.data(), str.size());
         global_writer_.reset();
       }
       global_writer_.AppendStringView(line_writer.GetStringView());
@@ -157,7 +227,7 @@
 
     // Flush any dangling pieces in the global writer.
     auto str = global_writer_.GetStringView();
-    output_->write(str.data(), static_cast<std::streamsize>(str.size()));
+    trace_writer_->Write(str.data(), str.size());
     global_writer_.reset();
     return true;
   }
@@ -168,15 +238,21 @@
   trace_processor::TraceProcessor* tp_ = nullptr;
   base::PagedMemory buffer_;
   base::StringWriter global_writer_;
-  std::ostream* output_ = nullptr;
+  TraceWriter* trace_writer_;
 };
 
 }  // namespace
 
 int TraceToSystrace(std::istream* input,
                     std::ostream* output,
-                    Keep truncate_keep,
-                    bool wrap_in_json) {
+                    SystraceKind kind,
+                    Keep truncate_keep) {
+  bool wrap_in_json = kind == kSystraceJson;
+  bool compress = kind == kSystraceCompressed;
+
+  std::unique_ptr<TraceWriter> trace_writer(
+      compress ? new DeflateTraceWriter(output) : new TraceWriter(output));
+
   trace_processor::Config config;
   std::unique_ptr<trace_processor::TraceProcessor> tp =
       trace_processor::TraceProcessor::CreateInstance(config);
@@ -186,7 +262,7 @@
   tp->NotifyEndOfFile();
   using Iterator = trace_processor::TraceProcessor::Iterator;
 
-  QueryWriter q_writer(tp.get(), output);
+  QueryWriter q_writer(tp.get(), trace_writer.get());
   if (wrap_in_json) {
     *output << kTraceHeader;
 
@@ -231,18 +307,18 @@
     *output << kFtraceJsonHeader;
   } else {
     *output << "TRACE:\n";
-    *output << kFtraceHeader;
+    trace_writer->Write(kFtraceHeader);
   }
 
   fprintf(stderr, "Converting trace events" PROGRESS_CHAR);
   fflush(stderr);
 
-  static const char kEstimatSql[] = "select count(1) from raw";
+  static const char kEstimateSql[] = "select count(1) from raw";
   uint32_t raw_events = 0;
   auto e_callback = [&raw_events](Iterator* it, base::StringWriter*) {
     raw_events = static_cast<uint32_t>(it->Get(0).long_value);
   };
-  if (!q_writer.RunQuery(kEstimatSql, e_callback))
+  if (!q_writer.RunQuery(kEstimateSql, e_callback))
     return 1;
 
   auto raw_callback = [wrap_in_json](Iterator* it, base::StringWriter* writer) {
diff --git a/tools/trace_to_text/trace_to_systrace.h b/tools/trace_to_text/trace_to_systrace.h
index c1c4659..3862578 100644
--- a/tools/trace_to_text/trace_to_systrace.h
+++ b/tools/trace_to_text/trace_to_systrace.h
@@ -23,11 +23,12 @@
 namespace trace_to_text {
 
 enum class Keep { kStart = 0, kEnd, kAll };
+enum SystraceKind { kSystraceNormal = 0, kSystraceCompressed, kSystraceJson };
 
 int TraceToSystrace(std::istream* input,
                     std::ostream* output,
-                    Keep truncate_keep,
-                    bool wrap_in_json);
+                    SystraceKind kind,
+                    Keep truncate_keep);
 
 }  // namespace trace_to_text
 }  // namespace perfetto
diff --git a/ui/src/assets/common.scss b/ui/src/assets/common.scss
index 1acf957..b5ac993 100644
--- a/ui/src/assets/common.scss
+++ b/ui/src/assets/common.scss
@@ -459,6 +459,28 @@
     }
   }
 
+  button {
+    background-color: #262f3c;
+    color: #fff;
+    font-size: 0.875rem;
+    padding-left: 1rem;
+    padding-right: 1rem;
+    padding-top: .5rem;
+    padding-bottom: .5rem;
+    border-radius: .25rem;
+    margin-top: 12px;
+  }
+
+  .explanation {
+    font-size: 14px;
+    width: 35%;
+    margin-top: 10px;
+  }
+
+  .material-icons {
+    vertical-align: middle;
+    margin-right: 10px;
+  }
 }
 
 .tickbar {
diff --git a/ui/src/common/actions.ts b/ui/src/common/actions.ts
index b3c37c6..13f9f6b 100644
--- a/ui/src/common/actions.ts
+++ b/ui/src/common/actions.ts
@@ -15,7 +15,7 @@
 import {Draft} from 'immer';
 
 import {assertExists} from '../base/logging';
-import {ConvertTrace} from '../controller/trace_converter';
+import {ConvertTrace, ConvertTraceToPprof} from '../controller/trace_converter';
 
 import {
   AdbRecordingTarget,
@@ -95,11 +95,21 @@
     state.videoEnabled = true;
   },
 
+  // TODO(b/141359485): Actions should only modify state.
   convertTraceToJson(
       _: StateDraft, args: {file: Blob, truncate?: 'start'|'end'}): void {
     ConvertTrace(args.file, args.truncate);
   },
 
+  convertTraceToPprof(_: StateDraft, args: {
+    pid: number,
+    src: string|File|ArrayBuffer,
+    ts1: number,
+    ts2?: number
+  }): void {
+    ConvertTraceToPprof(args.pid, args.src, args.ts1, args.ts2);
+  },
+
   openTraceFromUrl(state: StateDraft, args: {url: string}): void {
     clearTraceState(state);
     const id = `${state.nextId++}`;
diff --git a/ui/src/controller/globals.ts b/ui/src/controller/globals.ts
index 062770b..179c0ee 100644
--- a/ui/src/controller/globals.ts
+++ b/ui/src/controller/globals.ts
@@ -27,8 +27,9 @@
 
 import {ControllerAny} from './controller';
 
-type PublishKinds = 'OverviewData'|'TrackData'|'Threads'|'QueryResult'|
-    'LegacyTrace'|'SliceDetails'|'CounterDetails'|'HeapDumpDetails'|'Loading'|
+type PublishKinds =
+    'OverviewData'|'TrackData'|'Threads'|'QueryResult'|'LegacyTrace'|
+    'SliceDetails'|'CounterDetails'|'HeapDumpDetails'|'FileDownload'|'Loading'|
     'Search'|'BufferUsage'|'RecordingLog'|'SearchResult';
 
 export interface App {
diff --git a/ui/src/controller/selection_controller.ts b/ui/src/controller/selection_controller.ts
index 9e881b7..99cbfcf 100644
--- a/ui/src/controller/selection_controller.ts
+++ b/ui/src/controller/selection_controller.ts
@@ -124,6 +124,9 @@
   }
 
   async heapDumpDetails(ts: number, upid: number) {
+    const pidValue = await this.args.engine.query(
+        `select pid from process where upid = ${upid}`);
+    const pid = pidValue.columns[0].longValues![0];
     const allocatedMemory = await this.args.engine.query(
         `select sum(size) from heap_profile_allocation where ts <= ${
             ts} and size > 0 and upid = ${upid}`);
@@ -133,7 +136,7 @@
             ts} and upid = ${upid}`);
     const allocatedNotFreed = allocatedNotFreedMemory.columns[0].longValues![0];
     const startTime = fromNs(ts) - globals.state.traceTime.startSec;
-    return {ts: startTime, allocated, allocatedNotFreed};
+    return {ts: startTime, allocated, allocatedNotFreed, tsNs: ts, pid};
   }
 
   async counterDetails(ts: number, rightTs: number, id: number) {
diff --git a/ui/src/controller/trace_controller.ts b/ui/src/controller/trace_controller.ts
index 34a79a4..cfa03f2 100644
--- a/ui/src/controller/trace_controller.ts
+++ b/ui/src/controller/trace_controller.ts
@@ -307,6 +307,40 @@
       }
     }
 
+
+    const upidToProcessTracks = new Map();
+    const rawProcessTracks = await engine.query(`
+      select id, upid, name, maxDepth
+      from process_track
+      join (
+        select ref as id, max(depth) as maxDepth
+        from slice
+        where ref_type = 'track' group by ref
+      ) using(id)
+    `);
+    for (let i = 0; i < rawProcessTracks.numRecords; i++) {
+      const trackId = rawProcessTracks.columns[0].longValues![i];
+      const upid = rawProcessTracks.columns[1].longValues![i];
+      const name = rawProcessTracks.columns[2].stringValues![i];
+      const maxDepth = rawProcessTracks.columns[3].longValues![i];
+      const track = {
+        engineId: this.engineId,
+        kind: 'AsyncSliceTrack',
+        name,
+        config: {
+          trackId,
+          maxDepth,
+        },
+      };
+
+      const tracks = upidToProcessTracks.get(upid);
+      if (tracks) {
+        tracks.push(track);
+      } else {
+        upidToProcessTracks.set(upid, [track]);
+      }
+    }
+
     const heapProfiles = await engine.query(`
       select distinct(upid) from heap_profile_allocation`);
 
@@ -513,6 +547,12 @@
               config: {upid}
             });
           }
+
+          if (upidToProcessTracks.has(upid)) {
+            for (const track of upidToProcessTracks.get(upid)) {
+              tracksToAdd.push(Object.assign(track, {trackGroup: pUuid}));
+            }
+          }
         }
       }
       const counterThreadNames = counterUtids[utid];
diff --git a/ui/src/controller/trace_converter.ts b/ui/src/controller/trace_converter.ts
index 5941cb4..e4648a0 100644
--- a/ui/src/controller/trace_converter.ts
+++ b/ui/src/controller/trace_converter.ts
@@ -53,6 +53,74 @@
   (self as {} as {mod: {}}).mod = mod;
 }
 
+export async function ConvertTraceToPprof(
+    pid: number, src: string|File|ArrayBuffer, ts1: number, ts2?: number) {
+  generateBlob(src).then(result => {
+    const mod = trace_to_text({
+      noInitialRun: true,
+      locateFile: (s: string) => s,
+      print: updateStatus,
+      printErr: updateStatus,
+      onRuntimeInitialized: () => {
+        updateStatus('Converting trace');
+        const timestamps = `${ts1}${ts2 === undefined ? '' : `,${ts2}`}`;
+        mod.callMain([
+          'profile',
+          `--pid`,
+          `${pid}`,
+          `--timestamps`,
+          timestamps,
+          '/fs/trace.proto'
+        ]);
+        updateStatus('Trace conversion completed');
+        const heapDirName =
+            Object.keys(mod.FS.lookupPath('/tmp/').node.contents)[0];
+        const heapDirContents =
+            mod.FS.lookupPath(`/tmp/${heapDirName}`).node.contents;
+        const heapDumpFiles = Object.keys(heapDirContents);
+        let fileNum = 0;
+        heapDumpFiles.forEach(heapDump => {
+          const fileContents =
+              mod.FS.lookupPath(`/tmp/${heapDirName}/${heapDump}`)
+                  .node.contents;
+          fileNum++;
+          const fileName = `/heap_dump.${fileNum}.${pid}.pb`;
+          downloadFile(new Blob([fileContents]), fileName);
+        });
+        updateStatus('Profile(s) downloaded');
+      },
+      onAbort: () => {
+        console.log('ABORT');
+      },
+    });
+    mod.FS.mkdir('/fs');
+    mod.FS.mount(
+        mod.FS.filesystems.WORKERFS,
+        {blobs: [{name: 'trace.proto', data: result}]},
+        '/fs');
+  });
+}
+
+async function generateBlob(src: string|ArrayBuffer|File) {
+  let blob: Blob = new Blob();
+  if (typeof src === 'string') {
+    const resp = await fetch(src);
+    if (resp.status !== 200) {
+      throw new Error(`fetch() failed with HTTP error ${resp.status}`);
+    }
+    blob = await resp.blob();
+  } else if (src instanceof ArrayBuffer) {
+    blob = new Blob([new Uint8Array(src, 0, src.byteLength)]);
+  } else {
+    blob = src;
+  }
+  return blob;
+}
+
+function downloadFile(file: Blob, name: string) {
+  globals.publish('FileDownload', {file, name});
+}
+
 function updateStatus(msg: {}) {
   console.log(msg);
   globals.dispatch(Actions.updateStatus({
diff --git a/ui/src/frontend/chrome_slice_panel.ts b/ui/src/frontend/chrome_slice_panel.ts
index 9a6f142..97ce65c 100644
--- a/ui/src/frontend/chrome_slice_panel.ts
+++ b/ui/src/frontend/chrome_slice_panel.ts
@@ -31,9 +31,11 @@
               [m('table',
                  [
                    m('tr', m('th', `Name`), m('td', `${sliceInfo.name}`)),
-                   m('tr',
-                     m('th', `Category`),
-                     m('td', `${sliceInfo.category}`)),
+                   (sliceInfo.category === '[NULL]') ?
+                       null :
+                       m('tr',
+                         m('th', `Category`),
+                         m('td', `${sliceInfo.category}`)),
                    m('tr',
                      m('th', `Start time`),
                      m('td', `${timeToCode(sliceInfo.ts)}`)),
diff --git a/ui/src/frontend/globals.ts b/ui/src/frontend/globals.ts
index f9aca97..e62a889 100644
--- a/ui/src/frontend/globals.ts
+++ b/ui/src/frontend/globals.ts
@@ -44,8 +44,10 @@
 
 export interface HeapDumpDetails {
   ts?: number;
+  tsNs?: number;
   allocated?: number;
   allocatedNotFreed?: number;
+  pid?: number;
 }
 
 export interface QuantizedLoad {
diff --git a/ui/src/frontend/heap_dump_panel.ts b/ui/src/frontend/heap_dump_panel.ts
index d6c14e1..7c31636 100644
--- a/ui/src/frontend/heap_dump_panel.ts
+++ b/ui/src/frontend/heap_dump_panel.ts
@@ -14,6 +14,7 @@
 
 import * as m from 'mithril';
 
+import {Actions} from '../common/actions';
 import {timeToCode} from '../common/time';
 
 import {globals} from './globals';
@@ -22,13 +23,19 @@
 interface HeapDumpDetailsPanelAttrs {}
 
 export class HeapDumpDetailsPanel extends Panel<HeapDumpDetailsPanelAttrs> {
+  private ts = 0;
+  private pid = 0;
+
   view() {
     const heapDumpInfo = globals.heapDumpDetails;
     if (heapDumpInfo && heapDumpInfo.ts && heapDumpInfo.allocated &&
-        heapDumpInfo.allocatedNotFreed) {
+        heapDumpInfo.allocatedNotFreed && heapDumpInfo.tsNs &&
+        heapDumpInfo.pid) {
+      this.ts = heapDumpInfo.tsNs;
+      this.pid = heapDumpInfo.pid;
       return m(
           '.details-panel',
-          m('.details-panel-heading', `Heap Snapshot Details:`),
+          m('.details-panel-heading', `Heap Profile Details:`),
           m(
               '.details-table',
               [m('table',
@@ -47,7 +54,23 @@
                            heapDumpInfo.allocatedNotFreed
                                .toLocaleString()} bytes`)),
                  ])],
-              ));
+              ),
+          m('.explanation',
+            'Heap profile support is in beta. To explore a heap profile,',
+            ' download and open it in ',
+            m(`a[href='https://pprof.corp.google.com']`, 'pprof'),
+            ' (Googlers only) or ',
+            m(`a[href='https://www.speedscope.app']`, 'Speedscope'),
+            '.'),
+          m('button',
+            {
+              onclick: () => {
+                this.downloadPprof();
+              }
+            },
+            m('i.material-icons', 'file_download'),
+            'Download profile'),
+      );
     } else {
       return m(
           '.details-panel',
@@ -55,5 +78,14 @@
     }
   }
 
+  downloadPprof() {
+    const engine = Object.values(globals.state.engines)[0];
+    if (!engine) return;
+    const src = engine.source;
+    // TODO(tneda): add second timestamp
+    globals.dispatch(
+        Actions.convertTraceToPprof({pid: this.pid, ts1: this.ts, src}));
+  }
+
   renderCanvas() {}
 }
diff --git a/ui/src/frontend/index.ts b/ui/src/frontend/index.ts
index 3790f61..2b1effe 100644
--- a/ui/src/frontend/index.ts
+++ b/ui/src/frontend/index.ts
@@ -128,6 +128,17 @@
     this.redraw();
   }
 
+  publishFileDownload(args: {file: File, name?: string}) {
+    const url = URL.createObjectURL(args.file);
+    const a = document.createElement('a');
+    a.href = url;
+    a.download = args.name !== undefined ? args.name : args.file.name;
+    document.body.appendChild(a);
+    a.click();
+    document.body.removeChild(a);
+    URL.revokeObjectURL(url);
+  }
+
   publishLoading(loading: boolean) {
     globals.loading = loading;
     globals.rafScheduler.scheduleRedraw();
diff --git a/ui/src/frontend/keyboard_event_handler.ts b/ui/src/frontend/keyboard_event_handler.ts
index 424c162..3ad39f4 100644
--- a/ui/src/frontend/keyboard_event_handler.ts
+++ b/ui/src/frontend/keyboard_event_handler.ts
@@ -13,10 +13,10 @@
 // limitations under the License.
 
 import {Actions} from '../common/actions';
-import {TimeSpan} from '../common/time';
 
 import {globals} from './globals';
 import {toggleHelp} from './help_modal';
+import {horizontalScrollAndZoomToRange} from './scroll_helper';
 import {executeSearch} from './search_handler';
 
 // Handles all key events than are not handled by the
@@ -67,12 +67,6 @@
   if (startTs !== -1 && endTs !== -1) {
     globals.dispatch(Actions.selectTimeSpan({startTs, endTs}));
     // Zoom into the highlighted time region.
-    const visibleDur = globals.frontendLocalState.visibleWindowTime.end -
-        globals.frontendLocalState.visibleWindowTime.start;
-    const selectDur = endTs - startTs;
-    if (selectDur / visibleDur < 0.05) {
-      globals.frontendLocalState.updateVisibleTime(
-          new TimeSpan(startTs - (selectDur * 2), endTs + (selectDur * 2)));
-    }
+    horizontalScrollAndZoomToRange(startTs, endTs);
   }
 }
diff --git a/ui/src/frontend/scroll_helper.ts b/ui/src/frontend/scroll_helper.ts
new file mode 100644
index 0000000..97f8de6
--- /dev/null
+++ b/ui/src/frontend/scroll_helper.ts
@@ -0,0 +1,83 @@
+// Copyright (C) 2019 The Android Open Source Project
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+import {getContainingTrackId} from '../common/state';
+import {fromNs, TimeSpan, toNs} from '../common/time';
+
+import {globals} from './globals';
+
+/**
+ * Given a timestamp, if |ts| is not currently in view move the view to
+ * center |ts|, keeping the same zoom level.
+ */
+export function horizontalScrollToTs(ts: number) {
+  const startNs = toNs(globals.frontendLocalState.visibleWindowTime.start);
+  const endNs = toNs(globals.frontendLocalState.visibleWindowTime.end);
+  const currentViewNs = endNs - startNs;
+  if (ts < startNs || ts > endNs) {
+    // TODO(taylori): This is an ugly jump, we should do a smooth pan instead.
+    globals.frontendLocalState.updateVisibleTime(new TimeSpan(
+        fromNs(ts - currentViewNs / 2), fromNs(ts + currentViewNs / 2)));
+  }
+}
+
+/**
+ * Given a start and end timestamp (in ns), move the view to center this range
+ * and zoom to a level where the range is 1/5 of the viewport.
+ */
+export function horizontalScrollAndZoomToRange(startTs: number, endTs: number) {
+  const visibleDur = globals.frontendLocalState.visibleWindowTime.end -
+      globals.frontendLocalState.visibleWindowTime.start;
+  const selectDur = endTs - startTs;
+  const viewStartNs = toNs(globals.frontendLocalState.visibleWindowTime.start);
+  const viewEndNs = toNs(globals.frontendLocalState.visibleWindowTime.end);
+  if (selectDur / visibleDur < 0.05 || startTs < viewStartNs ||
+      endTs > viewEndNs) {
+    globals.frontendLocalState.updateVisibleTime(
+        new TimeSpan(startTs - (selectDur * 2), endTs + (selectDur * 2)));
+  }
+}
+
+/**
+ * Given a track id, find a track with that id and scroll it into view. If the
+ * track is nested inside a track group, scroll to that track group instead.
+ */
+export function verticalScrollToTrack(trackId: string|number) {
+  const trackIdString = trackId.toString();
+  let track = document.querySelector('#track_' + trackIdString);
+
+  if (!track) {
+    const parentTrackId = getContainingTrackId(globals.state, trackIdString);
+    if (parentTrackId) {
+      track = document.querySelector('#track_' + parentTrackId);
+    }
+  }
+
+  if (!track) {
+    console.error(`Can't scroll, track (${trackIdString}) not found.`);
+    return;
+  }
+
+  // block: 'nearest' means that it will only scroll if the track is not
+  // currently in view.
+  track.scrollIntoView({behavior: 'smooth', block: 'nearest'});
+}
+
+/**
+ * Scroll vertically and horizontally to reach track (|trackId|) at |ts|.
+ */
+export function scrollToTrackAndTs(trackId: string|number, ts: number) {
+  verticalScrollToTrack(trackId);
+  horizontalScrollToTs(ts);
+}
\ No newline at end of file
diff --git a/ui/src/frontend/search_handler.ts b/ui/src/frontend/search_handler.ts
index 4c64440..351fd27 100644
--- a/ui/src/frontend/search_handler.ts
+++ b/ui/src/frontend/search_handler.ts
@@ -14,10 +14,10 @@
 
 import {searchSegment} from '../base/binary_search';
 import {Actions} from '../common/actions';
-import {getContainingTrackId} from '../common/state';
-import {fromNs, TimeSpan, toNs} from '../common/time';
+import {toNs} from '../common/time';
 
 import {globals} from './globals';
+import {scrollToTrackAndTs} from './scroll_helper';
 
 export function executeSearch(reverse = false) {
   const state = globals.frontendLocalState;
@@ -58,39 +58,11 @@
 }
 
 function moveViewportToCurrentSearch() {
-  // Move viewport if our selection moves outside.
-  const startNs = toNs(globals.frontendLocalState.visibleWindowTime.start);
-  const endNs = toNs(globals.frontendLocalState.visibleWindowTime.end);
   const currentTs = globals.currentSearchResults
                         .tsStarts[globals.frontendLocalState.searchIndex];
-  const currentViewNs = endNs - startNs;
-  if (currentTs < startNs || currentTs > endNs) {
-    // TODO(taylori): This is an ugly jump, we should do a smooth pan instead.
-    globals.frontendLocalState.updateVisibleTime(new TimeSpan(
-        fromNs(currentTs - currentViewNs / 2),
-        fromNs(currentTs + currentViewNs / 2)));
-  }
-
-  // Update vertical (up/down) scroll position
   const trackId = globals.currentSearchResults
                       .trackIds[globals.frontendLocalState.searchIndex];
-  let track = document.querySelector('#track_' + trackId);
-
-  if (!track) {
-    const parentTrackId = getContainingTrackId(globals.state, trackId);
-    if (parentTrackId) {
-      track = document.querySelector('#track_' + parentTrackId);
-    }
-  }
-
-  if (!track) {
-    console.error(`Can't scroll search result track not found (${trackId})`);
-    return;
-  }
-
-  // block: 'nearest' means that it will only scroll if the track is not
-  // currently in view.
-  track.scrollIntoView({behavior: 'smooth', block: 'nearest'});
+  scrollToTrackAndTs(trackId, currentTs);
 }
 
 function selectCurrentSearchResult() {
diff --git a/ui/src/frontend/viewer_page.ts b/ui/src/frontend/viewer_page.ts
index 620981f..63d7a48 100644
--- a/ui/src/frontend/viewer_page.ts
+++ b/ui/src/frontend/viewer_page.ts
@@ -309,9 +309,7 @@
           }));
           break;
         case 'HEAP_DUMP':
-          detailsPanels.push(m(HeapDumpDetailsPanel, {
-            key: 'heap_dump',
-          }));
+          detailsPanels.push(m(HeapDumpDetailsPanel, {key: 'heap_dump'}));
           break;
         case 'CHROME_SLICE':
           detailsPanels.push(m(ChromeSliceDetailsPanel));
diff --git a/ui/src/tracks/all_controller.ts b/ui/src/tracks/all_controller.ts
index 79d3070..ca4f7c8 100644
--- a/ui/src/tracks/all_controller.ts
+++ b/ui/src/tracks/all_controller.ts
@@ -25,3 +25,4 @@
 import './process_summary/controller';
 import './thread_state/controller';
 import './vsync/controller';
+import './async_slices/controller';
diff --git a/ui/src/tracks/all_frontend.ts b/ui/src/tracks/all_frontend.ts
index dfa57fc..3ce3de0 100644
--- a/ui/src/tracks/all_frontend.ts
+++ b/ui/src/tracks/all_frontend.ts
@@ -25,3 +25,4 @@
 import './process_summary/frontend';
 import './thread_state/frontend';
 import './vsync/frontend';
+import './async_slices/frontend';
diff --git a/ui/src/tracks/async_slices/common.ts b/ui/src/tracks/async_slices/common.ts
new file mode 100644
index 0000000..3b7c885
--- /dev/null
+++ b/ui/src/tracks/async_slices/common.ts
@@ -0,0 +1,22 @@
+// Copyright (C) 2019 The Android Open Source Project
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+export {Data} from '../chrome_slices/common';
+
+export const SLICE_TRACK_KIND = 'AsyncSliceTrack';
+
+export interface Config {
+  maxDepth: number;
+  trackId: number;
+}
diff --git a/ui/src/tracks/async_slices/controller.ts b/ui/src/tracks/async_slices/controller.ts
new file mode 100644
index 0000000..e56999b
--- /dev/null
+++ b/ui/src/tracks/async_slices/controller.ts
@@ -0,0 +1,143 @@
+// Copyright (C) 2019 The Android Open Source Project
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+import {fromNs, toNs} from '../../common/time';
+import {LIMIT} from '../../common/track_data';
+import {
+  TrackController,
+  trackControllerRegistry,
+} from '../../controller/track_controller';
+
+import {Config, Data, SLICE_TRACK_KIND} from './common';
+
+class AsyncSliceTrackController extends TrackController<Config, Data> {
+  static readonly kind = SLICE_TRACK_KIND;
+  private setup = false;
+
+  async onBoundsChange(start: number, end: number, resolution: number):
+      Promise<Data> {
+    const startNs = toNs(start);
+    const endNs = toNs(end);
+    // Ns in 1px width. We want all slices smaller than 1px to be grouped.
+    const minNs = toNs(resolution);
+
+    if (!this.setup) {
+      await this.query(
+          `create virtual table ${this.tableName('window')} using window;`);
+
+      await this.query(
+          `create view ${this.tableName('small')} as ` +
+          `select ts,dur,depth,name,slice_id from slice ` +
+          `where ref_type = 'track' ` +
+          `and ref = ${this.config.trackId} ` +
+          `and dur < ${minNs} ` +
+          `order by ts;`);
+
+      await this.query(`create virtual table ${this.tableName('span')} using
+      span_join(${this.tableName('small')} PARTITIONED depth,
+      ${this.tableName('window')});`);
+
+      this.setup = true;
+    }
+
+    const windowDurNs = Math.max(1, endNs - startNs);
+
+    this.query(`update ${this.tableName('window')} set
+    window_start=${startNs},
+    window_dur=${windowDurNs},
+    quantum=${minNs}`);
+
+    await this.query(`drop view if exists ${this.tableName('small')}`);
+    await this.query(`drop view if exists ${this.tableName('big')}`);
+    await this.query(`drop view if exists ${this.tableName('summary')}`);
+
+    await this.query(
+        `create view ${this.tableName('small')} as ` +
+        `select ts,dur,depth,name, slice_id from slice ` +
+        `where ref_type = 'track' ` +
+        `and ref = ${this.config.trackId} ` +
+        `and dur < ${minNs} ` +
+        `order by ts `);
+
+    await this.query(
+        `create view ${this.tableName('big')} as ` +
+        `select ts,dur,depth,name, slice_id from slice ` +
+        `where ref_type = 'track' ` +
+        `and ref = ${this.config.trackId} ` +
+        `and ts >= ${startNs} - dur ` +
+        `and ts <= ${endNs} ` +
+        `and dur >= ${minNs} ` +
+        `order by ts `);
+
+    // So that busy slices never overlap, we use the start of the bucket
+    // as the ts, even though min(ts) would technically be more accurate.
+    await this.query(`create view ${this.tableName('summary')} as select
+      (quantum_ts * ${minNs} + ${startNs}) as ts,
+      ${minNs} as dur,
+      depth,
+      'Busy' as name,
+      -1 as slice_id
+      from ${this.tableName('span')}
+      group by depth, quantum_ts
+      order by ts;`);
+
+    const query = `select * from ${this.tableName('summary')} UNION ` +
+        `select * from ${this.tableName('big')} order by ts limit ${LIMIT}`;
+
+    const rawResult = await this.query(query);
+
+    if (rawResult.error) {
+      throw new Error(`Query error "${query}": ${rawResult.error}`);
+    }
+
+    const numRows = +rawResult.numRecords;
+
+    const slices: Data = {
+      start,
+      end,
+      resolution,
+      length: numRows,
+      strings: [],
+      sliceIds: new Float64Array(numRows),
+      starts: new Float64Array(numRows),
+      ends: new Float64Array(numRows),
+      depths: new Uint16Array(numRows),
+      titles: new Uint16Array(numRows),
+    };
+
+    const stringIndexes = new Map<string, number>();
+    function internString(str: string) {
+      let idx = stringIndexes.get(str);
+      if (idx !== undefined) return idx;
+      idx = slices.strings.length;
+      slices.strings.push(str);
+      stringIndexes.set(str, idx);
+      return idx;
+    }
+
+    for (let row = 0; row < numRows; row++) {
+      const cols = rawResult.columns;
+      const startSec = fromNs(+cols[0].longValues![row]);
+      slices.starts[row] = startSec;
+      slices.ends[row] = startSec + fromNs(+cols[1].longValues![row]);
+      slices.depths[row] = +cols[2].longValues![row];
+      slices.titles[row] = internString(cols[3].stringValues![row]);
+      slices.sliceIds[row] = +cols[4].longValues![row];
+    }
+    return slices;
+  }
+}
+
+
+trackControllerRegistry.register(AsyncSliceTrackController);
diff --git a/ui/src/tracks/async_slices/frontend.ts b/ui/src/tracks/async_slices/frontend.ts
new file mode 100644
index 0000000..918f831
--- /dev/null
+++ b/ui/src/tracks/async_slices/frontend.ts
@@ -0,0 +1,29 @@
+// Copyright (C) 2019 The Android Open Source Project
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+import {TrackState} from '../../common/state';
+import {Track} from '../../frontend/track';
+import {trackRegistry} from '../../frontend/track_registry';
+import {ChromeSliceTrack} from '../chrome_slices/frontend';
+
+import {SLICE_TRACK_KIND} from './common';
+
+export class AsyncSliceTrack extends ChromeSliceTrack {
+  static readonly kind = SLICE_TRACK_KIND;
+  static create(trackState: TrackState): Track {
+    return new AsyncSliceTrack(trackState);
+  }
+}
+
+trackRegistry.register(AsyncSliceTrack);
diff --git a/ui/src/tracks/chrome_slices/common.ts b/ui/src/tracks/chrome_slices/common.ts
index ad240c3..68653da 100644
--- a/ui/src/tracks/chrome_slices/common.ts
+++ b/ui/src/tracks/chrome_slices/common.ts
@@ -30,5 +30,4 @@
   ends: Float64Array;
   depths: Uint16Array;
   titles: Uint16Array;      // Index in |strings|.
-  categories: Uint16Array;  // Index in |strings|.
 }
diff --git a/ui/src/tracks/chrome_slices/controller.ts b/ui/src/tracks/chrome_slices/controller.ts
index 1d44a7c..cbe6486 100644
--- a/ui/src/tracks/chrome_slices/controller.ts
+++ b/ui/src/tracks/chrome_slices/controller.ts
@@ -38,10 +38,8 @@
 
       await this.query(
           `create view ${this.tableName('small')} as ` +
-          `select ts,dur,depth,cat,name,slice_id from slices ` +
+          `select ts,dur,depth,name,slice_id from slice ` +
           `where utid = ${this.config.utid} ` +
-          `and ts >= ${startNs} - dur ` +
-          `and ts <= ${endNs} ` +
           `and dur < ${minNs} ` +
           `order by ts;`);
 
@@ -65,16 +63,14 @@
 
     await this.query(
         `create view ${this.tableName('small')} as ` +
-        `select ts,dur,depth,cat,name, slice_id from slices ` +
+        `select ts,dur,depth,name,slice_id from slice ` +
         `where utid = ${this.config.utid} ` +
-        `and ts >= ${startNs} - dur ` +
-        `and ts <= ${endNs} ` +
         `and dur < ${minNs} ` +
         `order by ts `);
 
     await this.query(
         `create view ${this.tableName('big')} as ` +
-        `select ts,dur,depth,cat,name, slice_id from slices ` +
+        `select ts,dur,depth,name,slice_id from slice ` +
         `where utid = ${this.config.utid} ` +
         `and ts >= ${startNs} - dur ` +
         `and ts <= ${endNs} ` +
@@ -87,11 +83,10 @@
       (quantum_ts * ${minNs} + ${startNs}) as ts,
       ${minNs} as dur,
       depth,
-      cat,
       'Busy' as name,
       -1 as slice_id
       from ${this.tableName('span')}
-      group by cat, depth, quantum_ts
+      group by depth, quantum_ts
       order by ts;`);
 
     const query = `select * from ${this.tableName('summary')} UNION ` +
@@ -116,7 +111,6 @@
       ends: new Float64Array(numRows),
       depths: new Uint16Array(numRows),
       titles: new Uint16Array(numRows),
-      categories: new Uint16Array(numRows),
     };
 
     const stringIndexes = new Map<string, number>();
@@ -135,9 +129,8 @@
       slices.starts[row] = startSec;
       slices.ends[row] = startSec + fromNs(+cols[1].longValues![row]);
       slices.depths[row] = +cols[2].longValues![row];
-      slices.categories[row] = internString(cols[3].stringValues![row]);
-      slices.titles[row] = internString(cols[4].stringValues![row]);
-      slices.sliceIds[row] = +cols[5].longValues![row];
+      slices.titles[row] = internString(cols[3].stringValues![row]);
+      slices.sliceIds[row] = +cols[4].longValues![row];
     }
     return slices;
   }
diff --git a/ui/src/tracks/chrome_slices/frontend.ts b/ui/src/tracks/chrome_slices/frontend.ts
index 5826f67..544d84f 100644
--- a/ui/src/tracks/chrome_slices/frontend.ts
+++ b/ui/src/tracks/chrome_slices/frontend.ts
@@ -34,9 +34,9 @@
   return hash & 0xff;
 }
 
-class ChromeSliceTrack extends Track<Config, Data> {
-  static readonly kind = SLICE_TRACK_KIND;
-  static create(trackState: TrackState): ChromeSliceTrack {
+export class ChromeSliceTrack extends Track<Config, Data> {
+  static readonly kind: string = SLICE_TRACK_KIND;
+  static create(trackState: TrackState): Track {
     return new ChromeSliceTrack(trackState);
   }