Merge "Add a reusable button component."
diff --git a/Android.bp b/Android.bp
index 687d3f5..ec8d768 100644
--- a/Android.bp
+++ b/Android.bp
@@ -4838,6 +4838,7 @@
         "protos/perfetto/trace/perfetto/perfetto_metatrace.proto",
         "protos/perfetto/trace/perfetto/tracing_service_event.proto",
         "protos/perfetto/trace/power/android_energy_estimation_breakdown.proto",
+        "protos/perfetto/trace/power/android_entity_state_residency.proto",
         "protos/perfetto/trace/power/battery_counters.proto",
         "protos/perfetto/trace/power/power_rails.proto",
         "protos/perfetto/trace/profiling/deobfuscation.proto",
@@ -6579,6 +6580,7 @@
     name: "perfetto_protos_perfetto_trace_power_cpp_gen",
     srcs: [
         "protos/perfetto/trace/power/android_energy_estimation_breakdown.proto",
+        "protos/perfetto/trace/power/android_entity_state_residency.proto",
         "protos/perfetto/trace/power/battery_counters.proto",
         "protos/perfetto/trace/power/power_rails.proto",
     ],
@@ -6589,6 +6591,7 @@
     cmd: "mkdir -p $(genDir)/external/perfetto/ && $(location aprotoc) --proto_path=external/perfetto --plugin=protoc-gen-plugin=$(location perfetto_src_protozero_protoc_plugin_cppgen_plugin) --plugin_out=wrapper_namespace=gen:$(genDir)/external/perfetto/ $(in)",
     out: [
         "external/perfetto/protos/perfetto/trace/power/android_energy_estimation_breakdown.gen.cc",
+        "external/perfetto/protos/perfetto/trace/power/android_entity_state_residency.gen.cc",
         "external/perfetto/protos/perfetto/trace/power/battery_counters.gen.cc",
         "external/perfetto/protos/perfetto/trace/power/power_rails.gen.cc",
     ],
@@ -6599,6 +6602,7 @@
     name: "perfetto_protos_perfetto_trace_power_cpp_gen_headers",
     srcs: [
         "protos/perfetto/trace/power/android_energy_estimation_breakdown.proto",
+        "protos/perfetto/trace/power/android_entity_state_residency.proto",
         "protos/perfetto/trace/power/battery_counters.proto",
         "protos/perfetto/trace/power/power_rails.proto",
     ],
@@ -6609,6 +6613,7 @@
     cmd: "mkdir -p $(genDir)/external/perfetto/ && $(location aprotoc) --proto_path=external/perfetto --plugin=protoc-gen-plugin=$(location perfetto_src_protozero_protoc_plugin_cppgen_plugin) --plugin_out=wrapper_namespace=gen:$(genDir)/external/perfetto/ $(in)",
     out: [
         "external/perfetto/protos/perfetto/trace/power/android_energy_estimation_breakdown.gen.h",
+        "external/perfetto/protos/perfetto/trace/power/android_entity_state_residency.gen.h",
         "external/perfetto/protos/perfetto/trace/power/battery_counters.gen.h",
         "external/perfetto/protos/perfetto/trace/power/power_rails.gen.h",
     ],
@@ -6623,6 +6628,7 @@
     name: "perfetto_protos_perfetto_trace_power_lite_gen",
     srcs: [
         "protos/perfetto/trace/power/android_energy_estimation_breakdown.proto",
+        "protos/perfetto/trace/power/android_entity_state_residency.proto",
         "protos/perfetto/trace/power/battery_counters.proto",
         "protos/perfetto/trace/power/power_rails.proto",
     ],
@@ -6632,6 +6638,7 @@
     cmd: "mkdir -p $(genDir)/external/perfetto/ && $(location aprotoc) --proto_path=external/perfetto --cpp_out=lite=true:$(genDir)/external/perfetto/ $(in)",
     out: [
         "external/perfetto/protos/perfetto/trace/power/android_energy_estimation_breakdown.pb.cc",
+        "external/perfetto/protos/perfetto/trace/power/android_entity_state_residency.pb.cc",
         "external/perfetto/protos/perfetto/trace/power/battery_counters.pb.cc",
         "external/perfetto/protos/perfetto/trace/power/power_rails.pb.cc",
     ],
@@ -6642,6 +6649,7 @@
     name: "perfetto_protos_perfetto_trace_power_lite_gen_headers",
     srcs: [
         "protos/perfetto/trace/power/android_energy_estimation_breakdown.proto",
+        "protos/perfetto/trace/power/android_entity_state_residency.proto",
         "protos/perfetto/trace/power/battery_counters.proto",
         "protos/perfetto/trace/power/power_rails.proto",
     ],
@@ -6651,6 +6659,7 @@
     cmd: "mkdir -p $(genDir)/external/perfetto/ && $(location aprotoc) --proto_path=external/perfetto --cpp_out=lite=true:$(genDir)/external/perfetto/ $(in)",
     out: [
         "external/perfetto/protos/perfetto/trace/power/android_energy_estimation_breakdown.pb.h",
+        "external/perfetto/protos/perfetto/trace/power/android_entity_state_residency.pb.h",
         "external/perfetto/protos/perfetto/trace/power/battery_counters.pb.h",
         "external/perfetto/protos/perfetto/trace/power/power_rails.pb.h",
     ],
@@ -6665,6 +6674,7 @@
     name: "perfetto_protos_perfetto_trace_power_zero_gen",
     srcs: [
         "protos/perfetto/trace/power/android_energy_estimation_breakdown.proto",
+        "protos/perfetto/trace/power/android_entity_state_residency.proto",
         "protos/perfetto/trace/power/battery_counters.proto",
         "protos/perfetto/trace/power/power_rails.proto",
     ],
@@ -6675,6 +6685,7 @@
     cmd: "mkdir -p $(genDir)/external/perfetto/ && $(location aprotoc) --proto_path=external/perfetto --plugin=protoc-gen-plugin=$(location protozero_plugin) --plugin_out=wrapper_namespace=pbzero:$(genDir)/external/perfetto/ $(in)",
     out: [
         "external/perfetto/protos/perfetto/trace/power/android_energy_estimation_breakdown.pbzero.cc",
+        "external/perfetto/protos/perfetto/trace/power/android_entity_state_residency.pbzero.cc",
         "external/perfetto/protos/perfetto/trace/power/battery_counters.pbzero.cc",
         "external/perfetto/protos/perfetto/trace/power/power_rails.pbzero.cc",
     ],
@@ -6685,6 +6696,7 @@
     name: "perfetto_protos_perfetto_trace_power_zero_gen_headers",
     srcs: [
         "protos/perfetto/trace/power/android_energy_estimation_breakdown.proto",
+        "protos/perfetto/trace/power/android_entity_state_residency.proto",
         "protos/perfetto/trace/power/battery_counters.proto",
         "protos/perfetto/trace/power/power_rails.proto",
     ],
@@ -6695,6 +6707,7 @@
     cmd: "mkdir -p $(genDir)/external/perfetto/ && $(location aprotoc) --proto_path=external/perfetto --plugin=protoc-gen-plugin=$(location protozero_plugin) --plugin_out=wrapper_namespace=pbzero:$(genDir)/external/perfetto/ $(in)",
     out: [
         "external/perfetto/protos/perfetto/trace/power/android_energy_estimation_breakdown.pbzero.h",
+        "external/perfetto/protos/perfetto/trace/power/android_entity_state_residency.pbzero.h",
         "external/perfetto/protos/perfetto/trace/power/battery_counters.pbzero.h",
         "external/perfetto/protos/perfetto/trace/power/power_rails.pbzero.h",
     ],
@@ -11136,6 +11149,7 @@
         "protos/perfetto/trace/perfetto/perfetto_metatrace.proto",
         "protos/perfetto/trace/perfetto/tracing_service_event.proto",
         "protos/perfetto/trace/power/android_energy_estimation_breakdown.proto",
+        "protos/perfetto/trace/power/android_entity_state_residency.proto",
         "protos/perfetto/trace/power/battery_counters.proto",
         "protos/perfetto/trace/power/power_rails.proto",
         "protos/perfetto/trace/profiling/deobfuscation.proto",
diff --git a/BUILD b/BUILD
index 2232f30..9ab91e9 100644
--- a/BUILD
+++ b/BUILD
@@ -574,6 +574,7 @@
 perfetto_filegroup(
     name = "include_perfetto_public_abi_base",
     srcs = [
+        "include/perfetto/public/abi/atomic.h",
         "include/perfetto/public/abi/export.h",
     ],
 )
@@ -653,6 +654,7 @@
         "include/perfetto/tracing/internal/checked_scope.h",
         "include/perfetto/tracing/internal/compile_time_hash.h",
         "include/perfetto/tracing/internal/data_source_internal.h",
+        "include/perfetto/tracing/internal/data_source_type.h",
         "include/perfetto/tracing/internal/in_process_tracing_backend.h",
         "include/perfetto/tracing/internal/interceptor_trace_writer.h",
         "include/perfetto/tracing/internal/system_tracing_backend.h",
@@ -3992,6 +3994,7 @@
     name = "protos_perfetto_trace_power_protos",
     srcs = [
         "protos/perfetto/trace/power/android_energy_estimation_breakdown.proto",
+        "protos/perfetto/trace/power/android_entity_state_residency.proto",
         "protos/perfetto/trace/power/battery_counters.proto",
         "protos/perfetto/trace/power/power_rails.proto",
     ],
diff --git a/BUILD.gn b/BUILD.gn
index bac6cfc..ac13eb3 100644
--- a/BUILD.gn
+++ b/BUILD.gn
@@ -113,6 +113,9 @@
     "test:client_api_example",
     "test/stress_test",
   ]
+  if (!is_win) {
+    all_targets += [ "examples/shared_lib:example_shlib_data_source" ]
+  }
 }
 
 if (enable_perfetto_trace_processor_json) {
diff --git a/CHANGELOG b/CHANGELOG
index ac77b20..c91f2b5 100644
--- a/CHANGELOG
+++ b/CHANGELOG
@@ -8,6 +8,11 @@
   SDK:
     *
 
+v32.2 - 2023-02-16:
+  SDK:
+    * Fix MSVC warnings.
+
+
 v32.1 - 2023-02-01:
   Trace Processor:
     * Fix build on windows.
diff --git a/examples/README.md b/examples/README.md
index cc39a16..09a6c90 100644
--- a/examples/README.md
+++ b/examples/README.md
@@ -3,3 +3,4 @@
 This directory contains examples for integrating Perfetto into your projects.
 
 - Example applications using the [Perfetto SDK](sdk/README.md).
+- Example applications using the [Perfetto shared library](shared_lib/README.md).
diff --git a/examples/shared_lib/BUILD.gn b/examples/shared_lib/BUILD.gn
new file mode 100644
index 0000000..ff9dc55
--- /dev/null
+++ b/examples/shared_lib/BUILD.gn
@@ -0,0 +1,26 @@
+# Copyright (C) 2022 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# On windows C11 (required for atomics) is not really supported, so we can't
+# really build a C executable.
+assert(!is_win)
+
+executable("example_shlib_data_source") {
+  testonly = true
+  deps = [
+    "../../gn:default_deps",
+    "../../src/shared_lib:libperfetto_c",
+  ]
+  sources = [ "example_shlib_data_source.c" ]
+}
diff --git a/examples/shared_lib/README.md b/examples/shared_lib/README.md
new file mode 100644
index 0000000..a28e4ff
--- /dev/null
+++ b/examples/shared_lib/README.md
@@ -0,0 +1,4 @@
+# Perfetto C SDK example project
+
+This directory contains example programs that link with the perfetto dynamic
+tracing library. The interfaces (API and ABI) are not yet styable.
diff --git a/examples/shared_lib/example_shlib_data_source.c b/examples/shared_lib/example_shlib_data_source.c
new file mode 100644
index 0000000..d743cd3
--- /dev/null
+++ b/examples/shared_lib/example_shlib_data_source.c
@@ -0,0 +1,37 @@
+/*
+ * Copyright (C) 2022 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <threads.h>
+#include <time.h>
+
+#include "perfetto/public/data_source.h"
+#include "perfetto/public/producer.h"
+
+static struct PerfettoDs custom = PERFETTO_DS_INIT();
+
+int main(void) {
+  struct PerfettoProducerInitArgs args = {0};
+  args.backends = PERFETTO_BACKEND_SYSTEM;
+  PerfettoProducerInit(args);
+
+  PerfettoDsRegister(&custom, "com.example.custom_data_source",
+                     PerfettoDsNoCallbacks());
+
+  for (;;) {
+    PERFETTO_DS_TRACE(custom, ctx) {}
+    thrd_sleep(&(struct timespec){.tv_sec = 1}, NULL);
+  }
+}
diff --git a/gn/standalone/BUILD.gn b/gn/standalone/BUILD.gn
index 329aa97..7ccf966 100644
--- a/gn/standalone/BUILD.gn
+++ b/gn/standalone/BUILD.gn
@@ -88,11 +88,11 @@
     ]
   } else if (!is_clang && !is_win) {
     # Use return std::move(...) for compatibility with old GCC compilers.
-    cflags += [ "-Wno-redundant-move" ]
+    cflags_cc = [ "-Wno-redundant-move" ]
 
     # Use after free detection in GCC is still not good enough: it still fails
     # on very obvious false-positives in trace processor.
-    cflags += [ "-Wno-use-after-free" ]
+    cflags_cc += [ "-Wno-use-after-free" ]
   }
 }
 
diff --git a/include/perfetto/base/time.h b/include/perfetto/base/time.h
index 5849d3e..9913f3c 100644
--- a/include/perfetto/base/time.h
+++ b/include/perfetto/base/time.h
@@ -183,6 +183,10 @@
   return std::chrono::duration_cast<TimeSeconds>(GetBootTimeNs());
 }
 
+inline TimeMillis GetBootTimeMs() {
+  return std::chrono::duration_cast<TimeMillis>(GetBootTimeNs());
+}
+
 inline TimeMillis GetWallTimeMs() {
   return std::chrono::duration_cast<TimeMillis>(GetWallTimeNs());
 }
diff --git a/include/perfetto/ext/base/periodic_task.h b/include/perfetto/ext/base/periodic_task.h
index b948322..e58eb01 100644
--- a/include/perfetto/ext/base/periodic_task.h
+++ b/include/perfetto/ext/base/periodic_task.h
@@ -31,14 +31,17 @@
 // A periodic task utility class. It wraps the logic necessary to do periodic
 // tasks using a TaskRunner, taking care of subtleties like ensuring that
 // outstanding tasks are cancelled after reset/dtor.
-// Tasks are aligned on wall time, this is to ensure that when using multiple
-// periodic tasks, they happen at the same time, minimizing wakeups.
+// Tasks are aligned on wall time (unless they are |one_shot|). This is to
+// ensure that when using multiple periodic tasks, they happen at the same time,
+// minimizing context switches.
 // On Linux/Android it also supports suspend-aware mode (via timerfd). On other
 // operating systems it falls back to PostDelayedTask, which is not
 // suspend-aware.
 // TODO(primiano): this should probably become a periodic timer scheduler, so we
 // can use one FD for everything rather than one FD per task. For now we take
 // the hit of a FD-per-task to keep this low-risk.
+// TODO(primiano): consider renaming this class to TimerTask. When |one_shot|
+// is set, the "Periodic" part of the class name becomes a lie.
 class PeriodicTask {
  public:
   explicit PeriodicTask(base::TaskRunner*);
@@ -49,6 +52,7 @@
     std::function<void()> task = nullptr;
     bool start_first_task_immediately = false;
     bool use_suspend_aware_timer = false;
+    bool one_shot = false;
   };
 
   void Start(Args);
diff --git a/include/perfetto/public/BUILD.gn b/include/perfetto/public/BUILD.gn
index ae2c91d..3b5ab5f 100644
--- a/include/perfetto/public/BUILD.gn
+++ b/include/perfetto/public/BUILD.gn
@@ -18,6 +18,9 @@
 }
 
 source_set("protozero") {
-  sources = [ "stream_writer.h" ]
+  sources = [
+    "pb_utils.h",
+    "stream_writer.h",
+  ]
   public_deps = [ "./abi:protozero" ]
 }
diff --git a/include/perfetto/public/abi/BUILD.gn b/include/perfetto/public/abi/BUILD.gn
index f17098b..6d5d565 100644
--- a/include/perfetto/public/abi/BUILD.gn
+++ b/include/perfetto/public/abi/BUILD.gn
@@ -13,7 +13,10 @@
 # limitations under the License.
 
 source_set("base") {
-  sources = [ "export.h" ]
+  sources = [
+    "atomic.h",
+    "export.h",
+  ]
 }
 
 source_set("protozero") {
diff --git a/include/perfetto/public/abi/atomic.h b/include/perfetto/public/abi/atomic.h
new file mode 100644
index 0000000..5506f8b
--- /dev/null
+++ b/include/perfetto/public/abi/atomic.h
@@ -0,0 +1,67 @@
+/*
+ * Copyright (C) 2022 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef INCLUDE_PERFETTO_PUBLIC_ABI_ATOMIC_H_
+#define INCLUDE_PERFETTO_PUBLIC_ABI_ATOMIC_H_
+
+// Problem: C++11 and C11 use a different syntax for atomics and the C11 syntax
+// is not supported in C++11.
+//
+// This header bridges the gap.
+//
+// This assumes that C++11 atomics are binary compatible with C11 atomics. While
+// this is technically not required by the standards, reasonable compilers
+// appear to guarantee this.
+
+#ifdef __cplusplus
+#include <atomic>
+#else
+#include <stdatomic.h>
+#endif
+
+#ifdef __cplusplus
+#define PERFETTO_ATOMIC(TYPE) std::atomic<TYPE>
+#else
+#define PERFETTO_ATOMIC(TYPE) _Atomic(TYPE)
+#endif
+
+#ifdef __cplusplus
+#define PERFETTO_ATOMIC_LOAD std::atomic_load
+#define PERFETTO_ATOMIC_LOAD_EXPLICIT std::atomic_load_explicit
+#define PERFETTO_ATOMIC_STORE std::atomic_store
+#define PERFETTO_ATOMIC_STORE_EXPLICIT std::atomic_store_explicit
+
+#define PERFETTO_MEMORY_ORDER_ACQ_REL std::memory_order_acq_rel
+#define PERFETTO_MEMORY_ORDER_ACQUIRE std::memory_order_acquire
+#define PERFETTO_MEMORY_ORDER_CONSUME std::memory_order_consume
+#define PERFETTO_MEMORY_ORDER_RELAXED std::memory_order_relaxed
+#define PERFETTO_MEMORY_ORDER_RELEASE std::memory_order_release
+#define PERFETTO_MEMORY_ORDER_SEQ_CST std::memory_order_seq_cst
+#else
+#define PERFETTO_ATOMIC_LOAD atomic_load
+#define PERFETTO_ATOMIC_LOAD_EXPLICIT atomic_load_explicit
+#define PERFETTO_ATOMIC_STORE atomic_store
+#define PERFETTO_ATOMIC_STORE_EXPLICIT atomic_store_explicit
+
+#define PERFETTO_MEMORY_ORDER_ACQ_REL memory_order_acq_rel
+#define PERFETTO_MEMORY_ORDER_ACQUIRE memory_order_acquire
+#define PERFETTO_MEMORY_ORDER_CONSUME memory_order_consume
+#define PERFETTO_MEMORY_ORDER_RELAXED memory_order_relaxed
+#define PERFETTO_MEMORY_ORDER_RELEASE memory_order_release
+#define PERFETTO_MEMORY_ORDER_SEQ_CST memory_order_seq_cst
+#endif
+
+#endif  // INCLUDE_PERFETTO_PUBLIC_ABI_ATOMIC_H_
diff --git a/include/perfetto/public/abi/backend_type.h b/include/perfetto/public/abi/backend_type.h
new file mode 100644
index 0000000..fe5fe48
--- /dev/null
+++ b/include/perfetto/public/abi/backend_type.h
@@ -0,0 +1,34 @@
+/*
+ * Copyright (C) 2022 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef INCLUDE_PERFETTO_PUBLIC_ABI_BACKEND_TYPE_H_
+#define INCLUDE_PERFETTO_PUBLIC_ABI_BACKEND_TYPE_H_
+
+#include <stdint.h>
+
+enum {
+  // The in-process tracing backend. Keeps trace buffers in the process memory.
+  PERFETTO_BACKEND_IN_PROCESS = (1 << 0),
+
+  // The system tracing backend. Connects to the system tracing service (e.g.
+  // on Linux/Android/Mac uses a named UNIX socket).
+  PERFETTO_BACKEND_SYSTEM = (1 << 1),
+};
+
+// Or-combination of one or more of the above PERFETTO_BACKEND_ flags.
+typedef uint32_t PerfettoBackendTypes;
+
+#endif  // INCLUDE_PERFETTO_PUBLIC_ABI_BACKEND_TYPE_H_
diff --git a/include/perfetto/public/abi/data_source_abi.h b/include/perfetto/public/abi/data_source_abi.h
new file mode 100644
index 0000000..3a31650
--- /dev/null
+++ b/include/perfetto/public/abi/data_source_abi.h
@@ -0,0 +1,273 @@
+/*
+ * Copyright (C) 2022 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef INCLUDE_PERFETTO_PUBLIC_ABI_DATA_SOURCE_ABI_H_
+#define INCLUDE_PERFETTO_PUBLIC_ABI_DATA_SOURCE_ABI_H_
+
+#include <stdbool.h>
+#include <stdint.h>
+
+#include "perfetto/public/abi/atomic.h"
+#include "perfetto/public/abi/export.h"
+#include "perfetto/public/abi/stream_writer_abi.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+// Internal representation of a data source type.
+struct PerfettoDsImpl;
+
+// Internal thread local state of a data source type.
+struct PerfettoDsTlsImpl;
+
+// Internal thread local state of a data source instance used for tracing.
+struct PerfettoDsTracerImpl;
+
+// A global atomic boolean that's always false.
+extern PERFETTO_SDK_EXPORT PERFETTO_ATOMIC(bool) perfetto_atomic_false;
+
+// There can be more than one data source instance for each data source type.
+// This index identifies one of them.
+typedef uint32_t PerfettoDsInstanceIndex;
+
+// Creates a data source type.
+//
+// The data source type needs to be registered later with
+// PerfettoDsImplRegister().
+PERFETTO_SDK_EXPORT struct PerfettoDsImpl* PerfettoDsImplCreate(void);
+
+// Called when a data source instance of a specific type is created. `ds_config`
+// points to a serialized perfetto.protos.DataSourceConfig message,
+// `ds_config_size` bytes long. `user_arg` is the value passed to
+// PerfettoDsSetCbUserArg().
+typedef void* (*PerfettoDsOnSetupCb)(PerfettoDsInstanceIndex inst_id,
+                                     void* ds_config,
+                                     size_t ds_config_size,
+                                     void* user_arg);
+
+// Called when tracing starts for a data source instance. `user_arg` is the
+// value passed to PerfettoDsSetCbUserArg(). `inst_ctx` is the return
+// value of PerfettoDsOnSetupCb.
+typedef void (*PerfettoDsOnStartCb)(PerfettoDsInstanceIndex inst_id,
+                                    void* user_arg,
+                                    void* inst_ctx);
+
+// Internal handle used to perform operations from the OnStop callback.
+struct PerfettoDsOnStopArgs;
+
+// Internal handle used to signal when the data source stop operation is
+// complete.
+struct PerfettoDsAsyncStopper;
+
+// Tells the tracing service to postpone the stopping of a data source instance.
+// The returned handle can be used to signal the tracing service when the data
+// source instance can be stopped.
+PERFETTO_SDK_EXPORT struct PerfettoDsAsyncStopper* PerfettoDsOnStopArgsPostpone(
+    struct PerfettoDsOnStopArgs*);
+
+// Tells the tracing service to stop a data source instance (whose stop
+// operation was previously postponed with PerfettoDsOnStopArgsPostpone).
+PERFETTO_SDK_EXPORT void PerfettoDsStopDone(struct PerfettoDsAsyncStopper*);
+
+// Called when tracing stops for a data source instance. `user_arg` is the value
+// passed to PerfettoDsSetCbUserArg(). `inst_ctx` is the return value of
+// PerfettoDsOnSetupCb. `args` can be used to postpone stopping this data source
+// instance.
+typedef void (*PerfettoDsOnStopCb)(PerfettoDsInstanceIndex inst_id,
+                                   void* user_arg,
+                                   void* inst_ctx,
+                                   struct PerfettoDsOnStopArgs* args);
+
+// Creates custom state (either thread local state or incremental state) for
+// instance `inst_id`. `user_arg` is the value passed to
+// PerfettoDsSetCbUserArg().
+typedef void* (*PerfettoDsOnCreateCustomState)(
+    PerfettoDsInstanceIndex inst_id,
+    struct PerfettoDsTracerImpl* tracer,
+    void* user_arg);
+
+// Deletes the previously created custom state `obj`.
+typedef void (*PerfettoDsOnDeleteCustomState)(void* obj);
+
+// Setters for callbacks: can not be called after PerfettoDsImplRegister().
+
+PERFETTO_SDK_EXPORT void PerfettoDsSetOnSetupCallback(struct PerfettoDsImpl*,
+                                                      PerfettoDsOnSetupCb);
+
+PERFETTO_SDK_EXPORT void PerfettoDsSetOnStartCallback(struct PerfettoDsImpl*,
+                                                      PerfettoDsOnStartCb);
+
+PERFETTO_SDK_EXPORT void PerfettoDsSetOnStopCallback(struct PerfettoDsImpl*,
+                                                     PerfettoDsOnStopCb);
+
+// Callbacks for custom per instance thread local state.
+PERFETTO_SDK_EXPORT void PerfettoDsSetOnCreateTls(
+    struct PerfettoDsImpl*,
+    PerfettoDsOnCreateCustomState);
+
+PERFETTO_SDK_EXPORT void PerfettoDsSetOnDeleteTls(
+    struct PerfettoDsImpl*,
+    PerfettoDsOnDeleteCustomState);
+
+// Callbacks for custom per instance thread local incremental state.
+PERFETTO_SDK_EXPORT void PerfettoDsSetOnCreateIncr(
+    struct PerfettoDsImpl*,
+    PerfettoDsOnCreateCustomState);
+
+PERFETTO_SDK_EXPORT void PerfettoDsSetOnDeleteIncr(
+    struct PerfettoDsImpl*,
+    PerfettoDsOnDeleteCustomState);
+
+// Stores the `user_arg` that's going to be passed later to the callbacks for
+// this data source type.
+PERFETTO_SDK_EXPORT void PerfettoDsSetCbUserArg(struct PerfettoDsImpl*,
+                                                void* user_arg);
+
+// Registers the `*ds_impl` data source type.
+//
+// `ds_impl` must be obtained via a call to `PerfettoDsImplCreate()`.
+//
+// `**enabled_ptr` will be set to true when the data source type has been
+// enabled.
+//
+// `descriptor` should point to a serialized
+// perfetto.protos.DataSourceDescriptor message, `descriptor_size` bytes long.
+//
+// Returns `true` in case of success, `false` in case of failure (in which case
+// `ds_impl is invalid`).
+PERFETTO_SDK_EXPORT bool PerfettoDsImplRegister(struct PerfettoDsImpl* ds_impl,
+                                                PERFETTO_ATOMIC(bool) *
+                                                    *enabled_ptr,
+                                                const void* descriptor,
+                                                size_t descriptor_size);
+
+// Updates the descriptor the `*ds_impl` data source type.
+//
+// `descriptor` should point to a serialized
+// perfetto.protos.DataSourceDescriptor message, `descriptor_size` bytes long.
+PERFETTO_SDK_EXPORT void PerfettoDsImplUpdateDescriptor(
+    struct PerfettoDsImpl* ds_impl,
+    const void* descriptor,
+    size_t descriptor_size);
+
+// Tries to get the `inst_ctx` returned by PerfettoDsOnSetupCb() for the
+// instance with index `inst_id`.
+//
+// If successful, returns a non-null pointer and acquires a lock, which must be
+// released with PerfettoDsImplReleaseInstanceLocked.
+//
+// If unsuccessful (because the instance was destroyed in the meantime) or if
+// PerfettoDsOnSetupCb() returned a null value, returns null and does not
+// acquire any lock.
+PERFETTO_SDK_EXPORT void* PerfettoDsImplGetInstanceLocked(
+    struct PerfettoDsImpl* ds_impl,
+    PerfettoDsInstanceIndex inst_id);
+
+// Releases a lock previouly acquired by a PerfettoDsImplGetInstanceLocked()
+// call, which must have returned a non null value.
+PERFETTO_SDK_EXPORT void PerfettoDsImplReleaseInstanceLocked(
+    struct PerfettoDsImpl* ds_impl,
+    PerfettoDsInstanceIndex inst_id);
+
+// Gets the data source thread local instance custom state created by
+// the callback passed to `PerfettoDsSetOnCreateTls`.
+PERFETTO_SDK_EXPORT void* PerfettoDsImplGetCustomTls(
+    struct PerfettoDsImpl* ds_impl,
+    struct PerfettoDsTracerImpl* tracer,
+    PerfettoDsInstanceIndex inst_id);
+
+// Gets the data source thread local instance incremental state created by
+// the callback passed to `PerfettoDsSetOnCreateIncr`.
+PERFETTO_SDK_EXPORT void* PerfettoDsImplGetIncrementalState(
+    struct PerfettoDsImpl* ds_impl,
+    struct PerfettoDsTracerImpl* tracer,
+    PerfettoDsInstanceIndex inst_id);
+
+// Iterator for all the active instances (on this thread) of a data source type.
+struct PerfettoDsImplTracerIterator {
+  // Instance id.
+  PerfettoDsInstanceIndex inst_id;
+  // Caches a pointer to the internal thread local state of the data source
+  // type.
+  struct PerfettoDsTlsImpl* tls;
+  // Pointer to the object used to output trace packets. When nullptr, the
+  // iteration is over.
+  struct PerfettoDsTracerImpl* tracer;
+};
+
+// Start iterating over all the active instances of the data source type
+// (`ds_impl`).
+//
+// If the returned tracer is not nullptr, the user must continue the iteration
+// with PerfettoDsImplTraceIterateNext(), until it is. The iteration can
+// only be interrupted early by calling PerfettoDsImplTraceIterateBreak().
+PERFETTO_SDK_EXPORT struct PerfettoDsImplTracerIterator
+PerfettoDsImplTraceIterateBegin(struct PerfettoDsImpl* ds_impl);
+
+// Advances the iterator to the next active instance of the data source type
+// (`ds_impl`).
+//
+// The user must call PerfettoDsImplTraceIterateNext(), until it returns a
+// nullptr tracer. The iteration can only be interrupted early by calling
+// PerfettoDsImplTraceIterateBreak().
+PERFETTO_SDK_EXPORT void PerfettoDsImplTraceIterateNext(
+    struct PerfettoDsImpl* ds_impl,
+    struct PerfettoDsImplTracerIterator* iterator);
+
+// Prematurely interrupts iteration over all the active instances of the data
+// source type (`ds_impl`).
+PERFETTO_SDK_EXPORT void PerfettoDsImplTraceIterateBreak(
+    struct PerfettoDsImpl* ds_impl,
+    struct PerfettoDsImplTracerIterator* iterator);
+
+// Creates a new trace packet on `tracer`. Returns a stream writer that can be
+// used to write data to the packet. The caller must use
+// PerfettoDsTracerImplPacketEnd() when done.
+PERFETTO_SDK_EXPORT struct PerfettoStreamWriter PerfettoDsTracerImplPacketBegin(
+    struct PerfettoDsTracerImpl* tracer);
+
+// Signals that the trace packets created previously on `tracer` with
+// PerfettoDsTracerImplBeginPacket(), has been fully written.
+//
+// `writer` should point to the writer returned by
+// PerfettoDsTracerImplBeginPacket() and cannot be used anymore after this call.
+PERFETTO_SDK_EXPORT void PerfettoDsTracerImplPacketEnd(
+    struct PerfettoDsTracerImpl* tracer,
+    struct PerfettoStreamWriter* writer);
+
+// Called when a flush request is complete.
+typedef void (*PerfettoDsTracerOnFlushCb)(void* user_arg);
+
+// Forces a commit of the thread-local tracing data written so far to the
+// service.
+//
+// If `cb` is not NULL, it is called on a dedicated internal thread (with
+// `user_arg`), when flushing is complete. It may never be called (e.g. if the
+// tracing service disconnects).
+//
+// This is almost never required (tracing data is periodically committed as
+// trace pages are filled up) and has a non-negligible performance hit.
+PERFETTO_SDK_EXPORT void PerfettoDsTracerImplFlush(
+    struct PerfettoDsTracerImpl* tracer,
+    PerfettoDsTracerOnFlushCb cb,
+    void* user_arg);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif  // INCLUDE_PERFETTO_PUBLIC_ABI_DATA_SOURCE_ABI_H_
diff --git a/include/perfetto/public/abi/export.h b/include/perfetto/public/abi/export.h
index d11f5a2..734af85 100644
--- a/include/perfetto/public/abi/export.h
+++ b/include/perfetto/public/abi/export.h
@@ -26,10 +26,23 @@
 #endif
 
 // PERFETTO_SDK_EXPORT: Exports a symbol from the perfetto SDK shared library.
-#if defined(PERFETTO_IMPLEMENTATION)
+//
+// This is controlled by two defines (that likely come from the compiler command
+// line):
+// * PERFETTO_SDK_DISABLE_SHLIB_EXPORT: If this is defined, no export
+//   annotations are added. This might be useful when static linking.
+// * PERFETTO_SDK_SHLIB_IMPLEMENTATION: This must be defined when compiling the
+//   shared library itself (in order to export the symbols), but must be
+//   undefined when compiling objects that use the shared library (in order to
+//   import the symbols).
+#if !defined(PERFETTO_SDK_DISABLE_SHLIB_EXPORT)
+#if defined(PERFETTO_SHLIB_SDK_IMPLEMENTATION)
 #define PERFETTO_SDK_EXPORT PERFETTO_INTERNAL_DLL_EXPORT
 #else
 #define PERFETTO_SDK_EXPORT PERFETTO_INTERNAL_DLL_IMPORT
 #endif
+#else  // defined(PERFETTO_SDK_DISABLE_SHLIB_EXPORT)
+#define PERFETTO_SDK_EXPORT
+#endif  // defined(PERFETTO_SDK_DISABLE_SHLIB_EXPORT)
 
 #endif  // INCLUDE_PERFETTO_PUBLIC_ABI_EXPORT_H_
diff --git a/include/perfetto/public/abi/heap_buffer.h b/include/perfetto/public/abi/heap_buffer.h
new file mode 100644
index 0000000..811ed0b
--- /dev/null
+++ b/include/perfetto/public/abi/heap_buffer.h
@@ -0,0 +1,54 @@
+/*
+ * Copyright (C) 2022 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef INCLUDE_PERFETTO_PUBLIC_ABI_HEAP_BUFFER_H_
+#define INCLUDE_PERFETTO_PUBLIC_ABI_HEAP_BUFFER_H_
+
+#include "perfetto/public/abi/stream_writer_abi.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+// A PerfettoHeapBuffer can be used to serialize protobuf data using the
+// PerfettoStreamWriter interface. Stores data on heap allocated buffers, which
+// can be read back with PerfettoHeapBufferCopyContent().
+
+struct PerfettoHeapBuffer;
+
+// Creates a PerfettoHeapBuffer. Takes a pointer to an (uninitialized)
+// PerfettoStreamWriter (owned by the caller). The stream writer can be user
+// later to serialize protobuf data.
+PERFETTO_SDK_EXPORT struct PerfettoHeapBuffer* PerfettoHeapBufferCreate(
+    struct PerfettoStreamWriter*);
+
+// Copies data from the heap buffer to `dst` (up to `size` bytes).
+PERFETTO_SDK_EXPORT void PerfettoHeapBufferCopyInto(
+    struct PerfettoHeapBuffer*,
+    struct PerfettoStreamWriter*,
+    void* dst,
+    size_t size);
+
+// Destroys the heap buffer.
+PERFETTO_SDK_EXPORT void PerfettoHeapBufferDestroy(
+    struct PerfettoHeapBuffer*,
+    struct PerfettoStreamWriter*);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif  // INCLUDE_PERFETTO_PUBLIC_ABI_HEAP_BUFFER_H_
diff --git a/include/perfetto/public/abi/producer.h b/include/perfetto/public/abi/producer.h
new file mode 100644
index 0000000..f3e7caf
--- /dev/null
+++ b/include/perfetto/public/abi/producer.h
@@ -0,0 +1,41 @@
+/*
+ * Copyright (C) 2022 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef INCLUDE_PERFETTO_PUBLIC_ABI_PRODUCER_H_
+#define INCLUDE_PERFETTO_PUBLIC_ABI_PRODUCER_H_
+
+#include <stdint.h>
+
+#include "perfetto/public/abi/export.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+// Initializes the global system perfetto producer.
+PERFETTO_SDK_EXPORT void PerfettoProducerSystemInit(void);
+
+// Initializes the global in-process perfetto producer.
+PERFETTO_SDK_EXPORT void PerfettoProducerInProcessInit(void);
+
+// Initializes both the global in-process and system perfetto producer.
+PERFETTO_SDK_EXPORT void PerfettoProducerInProcessAndSystemInit(void);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif  // INCLUDE_PERFETTO_PUBLIC_ABI_PRODUCER_H_
diff --git a/include/perfetto/public/abi/tracing_session_abi.h b/include/perfetto/public/abi/tracing_session_abi.h
new file mode 100644
index 0000000..bb044d9
--- /dev/null
+++ b/include/perfetto/public/abi/tracing_session_abi.h
@@ -0,0 +1,77 @@
+/*
+ * Copyright (C) 2022 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef INCLUDE_PERFETTO_PUBLIC_ABI_TRACING_SESSION_ABI_H_
+#define INCLUDE_PERFETTO_PUBLIC_ABI_TRACING_SESSION_ABI_H_
+
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdint.h>
+
+#include "perfetto/public/abi/backend_type.h"
+#include "perfetto/public/abi/export.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+// Opaque pointer to the internal representation of a tracing session.
+struct PerfettoTracingSessionImpl;
+
+PERFETTO_SDK_EXPORT struct PerfettoTracingSessionImpl*
+PerfettoTracingSessionCreate(PerfettoBackendTypes backend);
+
+PERFETTO_SDK_EXPORT void PerfettoTracingSessionSetup(
+    struct PerfettoTracingSessionImpl*,
+    void* cfg_begin,
+    size_t cfg_len);
+
+PERFETTO_SDK_EXPORT void PerfettoTracingSessionStartAsync(
+    struct PerfettoTracingSessionImpl*);
+
+PERFETTO_SDK_EXPORT void PerfettoTracingSessionStartBlocking(
+    struct PerfettoTracingSessionImpl*);
+
+PERFETTO_SDK_EXPORT void PerfettoTracingSessionStopAsync(
+    struct PerfettoTracingSessionImpl*);
+
+PERFETTO_SDK_EXPORT void PerfettoTracingSessionStopBlocking(
+    struct PerfettoTracingSessionImpl*);
+
+// Called back to read pieces of tracing data. `data` points to a chunk of trace
+// data, `size` bytes long. `has_more` is true if there is more tracing data and
+// the callback will be invoked again.
+typedef void (*PerfettoTracingSessionReadCb)(struct PerfettoTracingSessionImpl*,
+                                             const void* data,
+                                             size_t size,
+                                             bool has_more,
+                                             void* user_arg);
+
+// Repeatedly calls cb with data from the tracing session. `user_arg` is passed
+// as is to the callback.
+PERFETTO_SDK_EXPORT void PerfettoTracingSessionReadTraceBlocking(
+    struct PerfettoTracingSessionImpl*,
+    PerfettoTracingSessionReadCb cb,
+    void* user_arg);
+
+PERFETTO_SDK_EXPORT void PerfettoTracingSessionDestroy(
+    struct PerfettoTracingSessionImpl*);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif  // INCLUDE_PERFETTO_PUBLIC_ABI_TRACING_SESSION_ABI_H_
diff --git a/include/perfetto/public/compiler.h b/include/perfetto/public/compiler.h
index 4d8905b..1ff416d 100644
--- a/include/perfetto/public/compiler.h
+++ b/include/perfetto/public/compiler.h
@@ -17,6 +17,8 @@
 #ifndef INCLUDE_PERFETTO_PUBLIC_COMPILER_H_
 #define INCLUDE_PERFETTO_PUBLIC_COMPILER_H_
 
+#include <stddef.h>
+
 #if defined(__GNUC__) || defined(__clang__)
 #define PERFETTO_LIKELY(_x) __builtin_expect(!!(_x), 1)
 #define PERFETTO_UNLIKELY(_x) __builtin_expect(!!(_x), 0)
@@ -33,4 +35,12 @@
 #define PERFETTO_STATIC_CAST(TYPE, VAL) ((TYPE)(VAL))
 #endif
 
+// PERFETTO_NULL: avoids the -Wzero-as-null-pointer-constant warning when
+// writing code that needs to be compiled as C and C++.
+#ifdef __cplusplus
+#define PERFETTO_NULL nullptr
+#else
+#define PERFETTO_NULL NULL
+#endif
+
 #endif  // INCLUDE_PERFETTO_PUBLIC_COMPILER_H_
diff --git a/include/perfetto/public/data_source.h b/include/perfetto/public/data_source.h
new file mode 100644
index 0000000..7bbb7d9
--- /dev/null
+++ b/include/perfetto/public/data_source.h
@@ -0,0 +1,209 @@
+/*
+ * Copyright (C) 2022 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef INCLUDE_PERFETTO_PUBLIC_DATA_SOURCE_H_
+#define INCLUDE_PERFETTO_PUBLIC_DATA_SOURCE_H_
+
+#include <malloc.h>
+#include <string.h>
+
+#include "perfetto/public/abi/atomic.h"
+#include "perfetto/public/abi/data_source_abi.h"
+#include "perfetto/public/compiler.h"
+#include "perfetto/public/pb_utils.h"
+
+// A data source type.
+struct PerfettoDs {
+  // Pointer to a (atomic) boolean, which is set to true if there is at
+  // least one enabled instance of this data source type.
+  PERFETTO_ATOMIC(bool) * enabled;
+  struct PerfettoDsImpl* impl;
+};
+
+// Initializes a PerfettoDs struct.
+#define PERFETTO_DS_INIT() \
+  { &perfetto_atomic_false, PERFETTO_NULL }
+
+// All the callbacks are optional and can be NULL if not needed.
+struct PerfettoDsCallbacks {
+  // Instance lifecycle callbacks:
+  PerfettoDsOnSetupCb on_setup_cb;
+  PerfettoDsOnStartCb on_start_cb;
+  PerfettoDsOnStopCb on_stop_cb;
+
+  // These are called to create/delete custom thread-local instance state, which
+  // can be accessed with PerfettoDsTracerImplGetCustomTls().
+  PerfettoDsOnCreateCustomState on_create_tls_cb;
+  PerfettoDsOnDeleteCustomState on_delete_tls_cb;
+
+  // These are called to create/delete custom thread-local instance incremental
+  // state. Incremental state may be cleared periodically by the tracing service
+  // and can be accessed with PerfettoDsTracerImplGetIncrementalState().
+  PerfettoDsOnCreateCustomState on_create_incr_cb;
+  PerfettoDsOnDeleteCustomState on_delete_incr_cb;
+
+  // Passed to all the callbacks as the `user_arg` param.
+  void* user_arg;
+};
+
+static inline struct PerfettoDsCallbacks PerfettoDsNoCallbacks(void) {
+  struct PerfettoDsCallbacks ret = {PERFETTO_NULL, PERFETTO_NULL, PERFETTO_NULL,
+                                    PERFETTO_NULL, PERFETTO_NULL, PERFETTO_NULL,
+                                    PERFETTO_NULL, PERFETTO_NULL};
+  return ret;
+}
+
+// Registers the data source type `ds`, named `data_source_name` with the global
+// perfetto producer.
+//
+// `callbacks` are called when certain events happen on the data source type.
+// PerfettoDsNoCallbacks() can be used if callbacks are not needed.
+//
+// TODO(ddiproietto): Accept the full DataSourceDescriptor, not just the
+// data_source_name
+static inline bool PerfettoDsRegister(struct PerfettoDs* ds,
+                                      const char* data_source_name,
+                                      struct PerfettoDsCallbacks callbacks) {
+  struct PerfettoDsImpl* ds_impl;
+  bool success;
+  // Build the DataSourceDescriptor protobuf message.
+  size_t data_source_name_len = strlen(data_source_name);
+  uint8_t* data_source_desc = PERFETTO_STATIC_CAST(
+      uint8_t*, malloc(data_source_name_len + PERFETTO_PB_VARINT_MAX_SIZE_32 +
+                       PERFETTO_PB_VARINT_MAX_SIZE_64));
+  uint8_t* write_ptr = data_source_desc;
+  const int32_t name_field_id = 1;  // perfetto.protos.DataSourceDescriptor.name
+  write_ptr = PerfettoPbWriteVarInt(
+      PerfettoPbMakeTag(name_field_id, PERFETTO_PB_WIRE_TYPE_DELIMITED),
+      write_ptr);
+  write_ptr = PerfettoPbWriteVarInt(data_source_name_len, write_ptr);
+  memcpy(write_ptr, data_source_name, data_source_name_len);
+  write_ptr += data_source_name_len;
+
+  ds_impl = PerfettoDsImplCreate();
+  if (callbacks.on_setup_cb) {
+    PerfettoDsSetOnSetupCallback(ds_impl, callbacks.on_setup_cb);
+  }
+  if (callbacks.on_start_cb) {
+    PerfettoDsSetOnStartCallback(ds_impl, callbacks.on_start_cb);
+  }
+  if (callbacks.on_stop_cb) {
+    PerfettoDsSetOnStopCallback(ds_impl, callbacks.on_stop_cb);
+  }
+  if (callbacks.on_create_tls_cb) {
+    PerfettoDsSetOnCreateTls(ds_impl, callbacks.on_create_tls_cb);
+  }
+  if (callbacks.on_delete_tls_cb) {
+    PerfettoDsSetOnDeleteTls(ds_impl, callbacks.on_delete_tls_cb);
+  }
+  if (callbacks.on_create_incr_cb) {
+    PerfettoDsSetOnCreateIncr(ds_impl, callbacks.on_create_incr_cb);
+  }
+  if (callbacks.on_delete_incr_cb) {
+    PerfettoDsSetOnDeleteIncr(ds_impl, callbacks.on_delete_incr_cb);
+  }
+  if (callbacks.user_arg) {
+    PerfettoDsSetCbUserArg(ds_impl, callbacks.user_arg);
+  }
+
+  success = PerfettoDsImplRegister(
+      ds_impl, &ds->enabled, data_source_desc,
+      PERFETTO_STATIC_CAST(size_t, write_ptr - data_source_desc));
+  free(data_source_desc);
+  if (!success) {
+    return false;
+  }
+  ds->impl = ds_impl;
+  return true;
+}
+
+// Iterator for all the active instances (on this thread) of a data source type.
+struct PerfettoDsTracerIterator {
+  struct PerfettoDsImplTracerIterator impl;
+};
+
+static inline struct PerfettoDsTracerIterator PerfettoDsTraceIterateBegin(
+    struct PerfettoDs* ds) {
+  struct PerfettoDsTracerIterator ret;
+  PERFETTO_ATOMIC(bool)* enabled = ds->enabled;
+  if (PERFETTO_LIKELY(!PERFETTO_ATOMIC_LOAD_EXPLICIT(
+          enabled, PERFETTO_MEMORY_ORDER_RELAXED))) {
+    // Tracing fast path: bail out immediately if the enabled flag is false.
+    ret.impl.tracer = PERFETTO_NULL;
+  } else {
+    // Else, make an ABI call to start iteration over the data source type
+    // active instances.
+    ret.impl = PerfettoDsImplTraceIterateBegin(ds->impl);
+  }
+  return ret;
+}
+
+static inline void PerfettoDsTraceIterateNext(
+    struct PerfettoDs* ds,
+    struct PerfettoDsTracerIterator* iterator) {
+  PerfettoDsImplTraceIterateNext(ds->impl, &iterator->impl);
+}
+
+static inline void PerfettoDsTraceIterateBreak(
+    struct PerfettoDs* ds,
+    struct PerfettoDsTracerIterator* iterator) {
+  if (iterator->impl.tracer) {
+    PerfettoDsImplTraceIterateBreak(ds->impl, &iterator->impl);
+  }
+}
+
+// For loop over the active instances of a data source type.
+//
+// `NAME` is the data source type (struct PerfettoDs).
+//
+// A local variable called `ITERATOR` will be instantiated. It can be used to
+// perform tracing on each instance.
+//
+// N.B. The iteration MUST NOT be interrupted early with `break`.
+// PERFETTO_DS_TRACE_BREAK should be used instead.
+#define PERFETTO_DS_TRACE(NAME, ITERATOR)         \
+  for (struct PerfettoDsTracerIterator ITERATOR = \
+           PerfettoDsTraceIterateBegin(&(NAME));  \
+       (ITERATOR).impl.tracer != NULL;            \
+       PerfettoDsTraceIterateNext(&(NAME), &(ITERATOR)))
+
+// Used to break the iteration in a PERFETTO_DS_TRACE loop.
+#define PERFETTO_DS_TRACE_BREAK(NAME, ITERATOR)      \
+  PerfettoDsTraceIterateBreak(&(NAME), &(ITERATOR)); \
+  break
+
+static inline void* PerfettoDsGetCustomTls(
+    struct PerfettoDs* ds,
+    struct PerfettoDsTracerIterator* iterator) {
+  return PerfettoDsImplGetCustomTls(ds->impl, iterator->impl.tracer,
+                                    iterator->impl.inst_id);
+}
+
+static inline void* PerfettoDsGetIncrementalState(
+    struct PerfettoDs* ds,
+    struct PerfettoDsTracerIterator* iterator) {
+  return PerfettoDsImplGetIncrementalState(ds->impl, iterator->impl.tracer,
+                                           iterator->impl.inst_id);
+}
+
+static inline void PerfettoDsTracerFlush(
+    struct PerfettoDsTracerIterator* iterator,
+    PerfettoDsTracerOnFlushCb cb,
+    void* ctx) {
+  PerfettoDsTracerImplFlush(iterator->impl.tracer, cb, ctx);
+}
+
+#endif  // INCLUDE_PERFETTO_PUBLIC_DATA_SOURCE_H_
diff --git a/include/perfetto/public/pb_utils.h b/include/perfetto/public/pb_utils.h
new file mode 100644
index 0000000..17cabc1
--- /dev/null
+++ b/include/perfetto/public/pb_utils.h
@@ -0,0 +1,61 @@
+/*
+ * Copyright (C) 2022 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef INCLUDE_PERFETTO_PUBLIC_PB_UTILS_H_
+#define INCLUDE_PERFETTO_PUBLIC_PB_UTILS_H_
+
+#include <stdint.h>
+
+#include "perfetto/public/compiler.h"
+
+// Type of fields that can be found in a protobuf serialized message.
+enum PerfettoPbWireType {
+  PERFETTO_PB_WIRE_TYPE_VARINT = 0,
+  PERFETTO_PB_WIRE_TYPE_DELIMITED = 2,
+};
+
+// Creates a field tag, which encodes the field type and the field id.
+static inline uint32_t PerfettoPbMakeTag(int32_t field_id,
+                                         enum PerfettoPbWireType wire_type) {
+  return ((PERFETTO_STATIC_CAST(uint32_t, field_id)) << 3) |
+         PERFETTO_STATIC_CAST(uint32_t, wire_type);
+}
+
+enum {
+  // Maximum bytes size of a 64-bit integer encoded as a VarInt.
+  PERFETTO_PB_VARINT_MAX_SIZE_64 = 10,
+  // Maximum bytes size of a 32-bit integer encoded as a VarInt.
+  PERFETTO_PB_VARINT_MAX_SIZE_32 = 5,
+};
+
+// Encodes `value` as a VarInt into `*dst`.
+//
+// `dst` must point into a buffer big enough to represent `value`:
+// PERFETTO_PB_VARINT_MAX_SIZE_* can help.
+static inline uint8_t* PerfettoPbWriteVarInt(uint64_t value, uint8_t* dst) {
+  uint8_t byte;
+  while (value >= 0x80) {
+    byte = (value & 0x7f) | 0x80;
+    *dst++ = byte;
+    value >>= 7;
+  }
+  byte = value & 0x7f;
+  *dst++ = byte;
+
+  return dst;
+}
+
+#endif  // INCLUDE_PERFETTO_PUBLIC_PB_UTILS_H_
diff --git a/include/perfetto/public/producer.h b/include/perfetto/public/producer.h
new file mode 100644
index 0000000..a85cf78
--- /dev/null
+++ b/include/perfetto/public/producer.h
@@ -0,0 +1,42 @@
+/*
+ * Copyright (C) 2023 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef INCLUDE_PERFETTO_PUBLIC_PRODUCER_H_
+#define INCLUDE_PERFETTO_PUBLIC_PRODUCER_H_
+
+#include "perfetto/public/abi/backend_type.h"
+#include "perfetto/public/abi/producer.h"
+
+// Arguments for PerfettoProducerInit. This struct is not ABI-stable, fields can
+// be added and rearranged.
+struct PerfettoProducerInitArgs {
+  // Bitwise-or of backends that should be enabled.
+  PerfettoBackendTypes backends;
+};
+
+// Initializes the global perfetto producer.
+static inline void PerfettoProducerInit(struct PerfettoProducerInitArgs args) {
+  if (args.backends & PERFETTO_BACKEND_IN_PROCESS &&
+      args.backends & PERFETTO_BACKEND_SYSTEM) {
+    PerfettoProducerInProcessAndSystemInit();
+  } else if (args.backends & PERFETTO_BACKEND_IN_PROCESS) {
+    PerfettoProducerInProcessInit();
+  } else if (args.backends & PERFETTO_BACKEND_SYSTEM) {
+    PerfettoProducerSystemInit();
+  }
+}
+
+#endif  // INCLUDE_PERFETTO_PUBLIC_PRODUCER_H_
diff --git a/include/perfetto/tracing/BUILD.gn b/include/perfetto/tracing/BUILD.gn
index f73dafd..21abc8e 100644
--- a/include/perfetto/tracing/BUILD.gn
+++ b/include/perfetto/tracing/BUILD.gn
@@ -38,6 +38,7 @@
     "internal/checked_scope.h",
     "internal/compile_time_hash.h",
     "internal/data_source_internal.h",
+    "internal/data_source_type.h",
     "internal/in_process_tracing_backend.h",
     "internal/interceptor_trace_writer.h",
     "internal/system_tracing_backend.h",
diff --git a/include/perfetto/tracing/data_source.h b/include/perfetto/tracing/data_source.h
index 7206776..30f2010 100644
--- a/include/perfetto/tracing/data_source.h
+++ b/include/perfetto/tracing/data_source.h
@@ -31,15 +31,12 @@
 #include <memory>
 #include <mutex>
 
-#include "perfetto/base/build_config.h"
-#include "perfetto/base/compiler.h"
-#include "perfetto/base/export.h"
-#include "perfetto/protozero/message.h"
 #include "perfetto/protozero/message_handle.h"
 #include "perfetto/tracing/buffer_exhausted_policy.h"
 #include "perfetto/tracing/core/forward_decls.h"
 #include "perfetto/tracing/internal/basic_types.h"
 #include "perfetto/tracing/internal/data_source_internal.h"
+#include "perfetto/tracing/internal/data_source_type.h"
 #include "perfetto/tracing/internal/tracing_muxer.h"
 #include "perfetto/tracing/locked_handle.h"
 #include "perfetto/tracing/trace_writer_base.h"
@@ -143,9 +140,7 @@
 
 struct DefaultDataSourceTraits {
   // |IncrementalStateType| can optionally be used store custom per-sequence
-  // incremental data (e.g., interning tables). It should have a Clear() method
-  // for when incremental state needs to be cleared. See
-  // TraceContext::GetIncrementalState().
+  // incremental data (e.g., interning tables).
   using IncrementalStateType = void;
   // |TlsStateType| can optionally be used to store custom per-sequence
   // session data, which is not reset when incremental state is cleared
@@ -171,12 +166,12 @@
 };
 
 // Templated base class meant to be derived by embedders to create a custom data
-// source. DataSourceType must be the type of the derived class itself, e.g.:
-// class MyDataSource : public DataSourceBase<MyDataSource> {...}.
+// source. DerivedDataSource must be the type of the derived class itself, e.g.:
+// class MyDataSource : public DataSource<MyDataSource> {...}.
 //
 // |DataSourceTraits| allows customizing the behavior of the data source. See
 // |DefaultDataSourceTraits|.
-template <typename DataSourceType,
+template <typename DerivedDataSource,
           typename DataSourceTraits = DefaultDataSourceTraits>
 class DataSource : public DataSourceBase {
   struct DefaultTracePointTraits;
@@ -265,14 +260,14 @@
     // immediately before calling this. The caller is supposed to check for its
     // validity before using it. After checking, the handle is guaranteed to
     // remain valid until the handle goes out of scope.
-    LockedHandle<DataSourceType> GetDataSourceLocked() const {
-      auto* internal_state = static_state_.TryGet(instance_index_);
+    LockedHandle<DerivedDataSource> GetDataSourceLocked() const {
+      auto* internal_state = type_.static_state()->TryGet(instance_index_);
       if (!internal_state)
-        return LockedHandle<DataSourceType>();
+        return LockedHandle<DerivedDataSource>();
       std::unique_lock<std::recursive_mutex> lock(internal_state->lock);
-      return LockedHandle<DataSourceType>(
+      return LockedHandle<DerivedDataSource>(
           std::move(lock),
-          static_cast<DataSourceType*>(internal_state->data_source.get()));
+          static_cast<DerivedDataSource*>(internal_state->data_source.get()));
     }
 
     // Post-condition: returned ptr will be non-null.
@@ -283,15 +278,8 @@
     }
 
     typename DataSourceTraits::IncrementalStateType* GetIncrementalState() {
-      // Recreate incremental state data if it has been reset by the service.
-      if (tls_inst_->incremental_state_generation !=
-          static_state_.incremental_state_generation.load(
-              std::memory_order_relaxed)) {
-        tls_inst_->incremental_state.reset();
-        CreateIncrementalState(tls_inst_);
-      }
-      return reinterpret_cast<typename DataSourceTraits::IncrementalStateType*>(
-          tls_inst_->incremental_state.get());
+      return static_cast<typename DataSourceTraits::IncrementalStateType*>(
+          type_.GetIncrementalState(tls_inst_, instance_index_));
     }
 
    private:
@@ -334,7 +322,8 @@
                                 {}) PERFETTO_ALWAYS_INLINE {
     // |instances| is a per-class bitmap that tells:
     // 1. If the data source is enabled at all.
-    // 2. The index of the slot within |static_state_| that holds the instance
+    // 2. The index of the slot within
+    //    internal::DataSourceStaticState::instances that holds the instance
     //    state. In turn this allows to map the data source to the tracing
     //    session and buffers.
     // memory_order_relaxed is okay because:
@@ -361,129 +350,27 @@
   // GetActiveInstances| to make it possible to use custom storage for
   // the data source enabled state. This is, for example, used by TrackEvent to
   // implement per-tracing category enabled states.
-  //
-  // TODO(primiano): all the stuff below should be outlined from the trace
-  // point. Or at least we should have some compile-time traits like
-  // kOptimizeBinarySize / kOptimizeTracingLatency.
   template <typename Traits = DefaultTracePointTraits, typename Lambda>
   static void TraceWithInstances(
-      uint32_t instances,
+      uint32_t cached_instances,
       Lambda tracing_fn,
       typename Traits::TracePointData trace_point_data = {}) {
-    PERFETTO_DCHECK(instances);
-    constexpr auto kMaxDataSourceInstances = internal::kMaxDataSourceInstances;
+    PERFETTO_DCHECK(cached_instances);
 
-    // See tracing_muxer.h for the structure of the TLS.
-    if (PERFETTO_UNLIKELY(!tls_state_)) {
-      // If the TLS hasn't been obtained yet, it's possible that this thread
-      // hasn't observed the initialization of global state like the muxer yet.
-      // To ensure that the thread "sees" the effects of such initialization,
-      // we have to reload |instances| with an acquire fence, ensuring that any
-      // initialization performed before instances was updated is visible
-      // in this thread.
-      instances &= Traits::GetActiveInstances(trace_point_data)
-                       ->load(std::memory_order_acquire);
-      if (!instances)
-        return;
-      tls_state_ = GetOrCreateDataSourceTLS(&static_state_);
-    }
-
-    // |tls_state_| is valid, which means that the current thread must have
-    // observed the initialization of the muxer, and obtaining it without a
-    // fence is safe.
-    auto* tracing_impl = internal::TracingMuxer::Get();
-
-    // Avoid re-entering the trace point recursively.
-    if (PERFETTO_UNLIKELY(tls_state_->root_tls->is_in_trace_point))
+    if (!type_.TracePrologue<DataSourceTraits, Traits>(
+            &tls_state_, &cached_instances, trace_point_data)) {
       return;
-    internal::ScopedReentrancyAnnotator scoped_annotator(*tls_state_->root_tls);
-
-    // TracingTLS::generation is a global monotonic counter that is incremented
-    // every time a tracing session is stopped. We use that as a signal to force
-    // a slow-path garbage collection of all the trace writers for the current
-    // thread and to destroy the ones that belong to tracing sessions that have
-    // ended. This is to avoid having too many TraceWriter instances alive, each
-    // holding onto one chunk of the shared memory buffer.
-    // Rationale why memory_order_relaxed should be fine:
-    // - The TraceWriter object that we use is always constructed and destructed
-    //   on the current thread. There is no risk of accessing a half-initialized
-    //   TraceWriter (which would be really bad).
-    // - In the worst case, in the case of a race on the generation check, we
-    //   might end up using a TraceWriter for the same data source that belongs
-    //   to a stopped session. This is not really wrong, as we don't give any
-    //   guarantee on the global atomicity of the stop. In the worst case the
-    //   service will reject the data commit if this arrives too late.
-
-    if (PERFETTO_UNLIKELY(
-            tls_state_->root_tls->generation !=
-            tracing_impl->generation(std::memory_order_relaxed))) {
-      // Will update root_tls->generation.
-      tracing_impl->DestroyStoppedTraceWritersForCurrentThread();
     }
 
-    for (uint32_t i = 0; i < kMaxDataSourceInstances; i++) {
-      internal::DataSourceState* instance_state =
-          static_state_.TryGetCached(instances, i);
-      if (!instance_state)
-        continue;
-
-      // Even if we passed the check above, the DataSourceInstance might be
-      // still destroyed concurrently while this code runs. The code below is
-      // designed to deal with such race, as follows:
-      // - We don't access the user-defined data source instance state. The only
-      //   bits of state we use are |backend_id| and |buffer_id|.
-      // - Beyond those two integers, we access only the TraceWriter here. The
-      //   TraceWriter is always safe because it lives on the TLS.
-      // - |instance_state| is backed by static storage, so the pointer is
-      //   always valid, even after the data source instance is destroyed.
-      // - In the case of a race-on-destruction, we'll still see the latest
-      //   backend_id and buffer_id and in the worst case keep trying writing
-      //   into the tracing shared memory buffer after stopped. But this isn't
-      //   really any worse than the case of the stop IPC being delayed by the
-      //   kernel scheduler. The tracing service is robust against data commit
-      //   attemps made after tracing is stopped.
-      // There is a theoretical race that would case the wrong behavior w.r.t
-      // writing data in the wrong buffer, but it's so rare that we ignore it:
-      // if the data source is stopped and started kMaxDataSourceInstances
-      // times (so that the same id is recycled) while we are in this function,
-      // we might end up reusing the old data source's backend_id and buffer_id
-      // for the new one, because we don't see the generation change past this
-      // point. But stopping and starting tracing (even once) takes so much
-      // handshaking to make this extremely unrealistic.
-
-      auto& tls_inst = tls_state_->per_instance[i];
-      if (PERFETTO_UNLIKELY(!tls_inst.trace_writer)) {
-        // Here we need an acquire barrier, which matches the release-store made
-        // by TracingMuxerImpl::SetupDataSource(), to ensure that the backend_id
-        // and buffer_id are consistent.
-        instances &= Traits::GetActiveInstances(trace_point_data)
-                         ->load(std::memory_order_acquire);
-        instance_state = static_state_.TryGetCached(instances, i);
-        if (!instance_state || !instance_state->trace_lambda_enabled.load(
-                                   std::memory_order_relaxed))
-          continue;
-        tls_inst.muxer_id_for_testing = instance_state->muxer_id_for_testing;
-        tls_inst.backend_id = instance_state->backend_id;
-        tls_inst.backend_connection_id = instance_state->backend_connection_id;
-        tls_inst.buffer_id = instance_state->buffer_id;
-        tls_inst.startup_target_buffer_reservation =
-            instance_state->startup_target_buffer_reservation.load(
-                std::memory_order_relaxed);
-        tls_inst.data_source_instance_id =
-            instance_state->data_source_instance_id;
-        tls_inst.is_intercepted = instance_state->interceptor_id != 0;
-        tls_inst.trace_writer = tracing_impl->CreateTraceWriter(
-            &static_state_, i, instance_state,
-            DataSourceType::kBufferExhaustedPolicy);
-        CreateIncrementalState(&tls_inst);
-        CreateDataSourceCustomTLS(TraceContext(&tls_inst, i));
-        // Even in the case of out-of-IDs, SharedMemoryArbiterImpl returns a
-        // NullTraceWriter. The returned pointer should never be null.
-        assert(tls_inst.trace_writer);
-      }
-
-      tracing_fn(TraceContext(&tls_inst, i));
+    for (internal::DataSourceType::InstancesIterator it =
+             type_.BeginIteration<Traits>(cached_instances, tls_state_,
+                                          trace_point_data);
+         it.instance;
+         type_.NextIteration<Traits>(&it, tls_state_, trace_point_data)) {
+      tracing_fn(TraceContext(it.instance, it.i));
     }
+
+    type_.TraceEpilogue(tls_state_);
   }
 
   // Registers the data source on all tracing backends, including ones that
@@ -501,25 +388,29 @@
                        const Args&... constructor_args) {
     // Silences -Wunused-variable warning in case the trace method is not used
     // by the translation unit that declares the data source.
-    (void)static_state_;
+    (void)type_;
     (void)tls_state_;
 
     auto factory = [constructor_args...]() {
       return std::unique_ptr<DataSourceBase>(
-          new DataSourceType(constructor_args...));
+          new DerivedDataSource(constructor_args...));
     };
-    auto* tracing_impl = internal::TracingMuxer::Get();
     internal::DataSourceParams params{
-        DataSourceType::kSupportsMultipleInstances,
-        DataSourceType::kRequiresCallbacksUnderLock};
-    return tracing_impl->RegisterDataSource(descriptor, factory, params,
-                                            &static_state_);
+        DerivedDataSource::kSupportsMultipleInstances,
+        DerivedDataSource::kRequiresCallbacksUnderLock};
+    return type_.Register(
+        descriptor, factory, params, DerivedDataSource::kBufferExhaustedPolicy,
+        GetCreateTlsFn(
+            static_cast<typename DataSourceTraits::TlsStateType*>(nullptr)),
+        GetCreateIncrementalStateFn(
+            static_cast<typename DataSourceTraits::IncrementalStateType*>(
+                nullptr)),
+        nullptr);
   }
 
   // Updates the data source descriptor.
   static void UpdateDescriptor(const DataSourceDescriptor& descriptor) {
-    auto* tracing_impl = internal::TracingMuxer::Get();
-    tracing_impl->UpdateDataSourceDescriptor(descriptor, &static_state_);
+    type_.UpdateDescriptor(descriptor);
   }
 
  private:
@@ -539,80 +430,59 @@
     // implement per-category enabled states.
     struct TracePointData {};
     static constexpr std::atomic<uint32_t>* GetActiveInstances(TracePointData) {
-      return &static_state_.valid_instances;
+      return type_.valid_instances();
     }
   };
 
-  // Create the user provided incremental state in the given thread-local
-  // storage. Note: The second parameter here is used to specialize the case
-  // where there is no incremental state type.
   template <typename T>
-  static void CreateIncrementalStateImpl(
+  static internal::DataSourceInstanceThreadLocalState::ObjectWithDeleter
+  CreateIncrementalState(internal::DataSourceInstanceThreadLocalState*,
+                         uint32_t,
+                         void*) {
+    return internal::DataSourceInstanceThreadLocalState::ObjectWithDeleter(
+        reinterpret_cast<void*>(new T()),
+        [](void* p) { delete reinterpret_cast<T*>(p); });
+  }
+
+  // The second parameter here is used to specialize the case where there is no
+  // incremental state type.
+  template <typename T>
+  static internal::DataSourceType::CreateIncrementalStateFn
+  GetCreateIncrementalStateFn(const T*) {
+    return &CreateIncrementalState<T>;
+  }
+
+  static internal::DataSourceType::CreateIncrementalStateFn
+  GetCreateIncrementalStateFn(const void*) {
+    return nullptr;
+  }
+
+  template <typename T>
+  static internal::DataSourceInstanceThreadLocalState::ObjectWithDeleter
+  CreateDataSourceCustomTls(
       internal::DataSourceInstanceThreadLocalState* tls_inst,
-      const T*) {
-    PERFETTO_DCHECK(!tls_inst->incremental_state);
-    tls_inst->incremental_state_generation =
-        static_state_.incremental_state_generation.load(
-            std::memory_order_relaxed);
-    tls_inst->incremental_state =
-        internal::DataSourceInstanceThreadLocalState::ObjectWithDeleter(
-            reinterpret_cast<void*>(new T()),
-            [](void* p) { delete reinterpret_cast<T*>(p); });
+      uint32_t instance_index,
+      void*) {
+    return internal::DataSourceInstanceThreadLocalState::ObjectWithDeleter(
+        reinterpret_cast<void*>(new T(TraceContext(tls_inst, instance_index))),
+        [](void* p) { delete reinterpret_cast<T*>(p); });
   }
 
-  static void CreateIncrementalStateImpl(
-      internal::DataSourceInstanceThreadLocalState*,
-      const void*) {}
-
-  static void CreateIncrementalState(
-      internal::DataSourceInstanceThreadLocalState* tls_inst) {
-    CreateIncrementalStateImpl(
-        tls_inst,
-        static_cast<typename DataSourceTraits::IncrementalStateType*>(nullptr));
-  }
-
-  // Create the user provided custom tls state in the given TraceContext's
-  // thread-local storage.  Note: The second parameter here is used to
-  // specialize the case where there is no incremental state type.
+  // The second parameter here is used to specialize the case where there is no
+  // tls state type.
   template <typename T>
-  static void CreateDataSourceCustomTLSImpl(const TraceContext& trace_context,
-                                            const T*) {
-    PERFETTO_DCHECK(!trace_context.tls_inst_->data_source_custom_tls);
-    trace_context.tls_inst_->data_source_custom_tls =
-        internal::DataSourceInstanceThreadLocalState::ObjectWithDeleter(
-            reinterpret_cast<void*>(new T(trace_context)),
-            [](void* p) { delete reinterpret_cast<T*>(p); });
+  static internal::DataSourceType::CreateCustomTlsFn GetCreateTlsFn(const T*) {
+    return &CreateDataSourceCustomTls<T>;
   }
 
-  static void CreateDataSourceCustomTLSImpl(const TraceContext&, const void*) {}
-
-  static void CreateDataSourceCustomTLS(const TraceContext& trace_context) {
-    CreateDataSourceCustomTLSImpl(
-        trace_context,
-        static_cast<typename DataSourceTraits::TlsStateType*>(nullptr));
+  static internal::DataSourceType::CreateCustomTlsFn GetCreateTlsFn(
+      const void*) {
+    return nullptr;
   }
 
-  // Note that the returned object is one per-thread per-data-source-type, NOT
-  // per data-source *instance*.
-  static internal::DataSourceThreadLocalState* GetOrCreateDataSourceTLS(
-      internal::DataSourceStaticState* static_state) {
-#if PERFETTO_BUILDFLAG(PERFETTO_OS_IOS)
-    PERFETTO_FATAL("Data source TLS not supported on iOS, see b/158814068");
-#endif
-    auto* tracing_impl = internal::TracingMuxer::Get();
-    internal::TracingTLS* root_tls = tracing_impl->GetOrCreateTracingTLS();
-    internal::DataSourceThreadLocalState* ds_tls =
-        DataSourceTraits::GetDataSourceTLS(static_state, root_tls);
-    // We keep re-initializing as the initialization is idempotent and not worth
-    // the code for extra checks.
-    ds_tls->static_state = static_state;
-    assert(!ds_tls->root_tls || ds_tls->root_tls == root_tls);
-    ds_tls->root_tls = root_tls;
-    return ds_tls;
-  }
-
-  // Static state. Accessed by the static Trace() method fastpaths.
-  static internal::DataSourceStaticState static_state_;
+  // The type of this data source. Accessed by the static Trace() method
+  // fastpaths.
+  static internal::DataSourceType type_;
 
   // This TLS object is a cached raw pointer and has deliberately no destructor.
   // The Platform implementation is supposed to create and manage the lifetime
@@ -626,7 +496,7 @@
 
 // static
 template <typename T, typename D>
-internal::DataSourceStaticState DataSource<T, D>::static_state_;
+internal::DataSourceType DataSource<T, D>::type_;
 // static
 template <typename T, typename D>
 PERFETTO_THREAD_LOCAL internal::DataSourceThreadLocalState*
@@ -651,8 +521,8 @@
 // where a component specific export macro is used.
 #define PERFETTO_DECLARE_DATA_SOURCE_STATIC_MEMBERS_WITH_ATTRS(attrs, ...) \
   template <>                                                              \
-  attrs perfetto::internal::DataSourceStaticState                          \
-      perfetto::DataSource<__VA_ARGS__>::static_state_
+  attrs perfetto::internal::DataSourceType                                 \
+      perfetto::DataSource<__VA_ARGS__>::type_
 
 // This macro must be used once for each data source in one source file to
 // allocate static storage for the data source's static state.
@@ -670,7 +540,7 @@
 // where a component specific export macro is used.
 #define PERFETTO_DEFINE_DATA_SOURCE_STATIC_MEMBERS_WITH_ATTRS(attrs, ...) \
   template <>                                                             \
-  attrs perfetto::internal::DataSourceStaticState                         \
-      perfetto::DataSource<__VA_ARGS__>::static_state_ {}
+  attrs perfetto::internal::DataSourceType                                \
+      perfetto::DataSource<__VA_ARGS__>::type_ {}
 
 #endif  // INCLUDE_PERFETTO_TRACING_DATA_SOURCE_H_
diff --git a/include/perfetto/tracing/internal/data_source_internal.h b/include/perfetto/tracing/internal/data_source_internal.h
index 8cfe5e6..50b235c 100644
--- a/include/perfetto/tracing/internal/data_source_internal.h
+++ b/include/perfetto/tracing/internal/data_source_internal.h
@@ -59,7 +59,7 @@
   // Keep this flag as the first field. This allows the compiler to directly
   // dereference the DataSourceState* pointer in the trace fast-path without
   // doing extra pointr arithmetic.
-  std::atomic<bool> trace_lambda_enabled = false;
+  std::atomic<bool> trace_lambda_enabled{false};
 
   // The overall TracingMuxerImpl instance id, which gets incremented by
   // ResetForTesting.
diff --git a/include/perfetto/tracing/internal/data_source_type.h b/include/perfetto/tracing/internal/data_source_type.h
new file mode 100644
index 0000000..5968223
--- /dev/null
+++ b/include/perfetto/tracing/internal/data_source_type.h
@@ -0,0 +1,316 @@
+#ifndef INCLUDE_PERFETTO_TRACING_INTERNAL_DATA_SOURCE_TYPE_H_
+#define INCLUDE_PERFETTO_TRACING_INTERNAL_DATA_SOURCE_TYPE_H_
+
+#include "perfetto/base/build_config.h"
+#include "perfetto/base/export.h"
+#include "perfetto/tracing/core/forward_decls.h"
+#include "perfetto/tracing/internal/data_source_internal.h"
+#include "perfetto/tracing/internal/tracing_muxer.h"
+
+namespace perfetto {
+namespace internal {
+
+// Represents a data source type (not an instance).
+//
+// All the static state of a DataSource<T> lives here (including
+// DataSourceStaticState).
+//
+// The C shared library API wrapper cannot use DataSource<T>, because it needs
+// to create new data source types at runtime, so it uses this directly.
+//
+// The main reason why this intermediate class exist is to decouple the
+// DataSourceStaticState from the specific DataSource<T>. The C API cannot
+// dynamically create template instances and it needs a way to decouple those at
+// runtime.
+class PERFETTO_EXPORT_COMPONENT DataSourceType {
+ public:
+  // Function pointer type used to create custom per instance thread local
+  // state.
+  using CreateCustomTlsFn =
+      DataSourceInstanceThreadLocalState::ObjectWithDeleter (*)(
+          DataSourceInstanceThreadLocalState* tls_inst,
+          uint32_t instance_index,
+          void* user_arg);
+  // Function pointer type used to create custom per instance thread local
+  // incremental state (which might be cleared periodically by the tracing
+  // service).
+  using CreateIncrementalStateFn =
+      DataSourceInstanceThreadLocalState::ObjectWithDeleter (*)(
+          DataSourceInstanceThreadLocalState* tls_inst,
+          uint32_t instance_index,
+          void* user_arg);
+
+  // Registers the data source type with the central tracing muxer.
+  // * `descriptor` is the data source protobuf descriptor.
+  // * `factory` is a std::function used to create instances of the data source
+  //   type.
+  // * `buffer_exhausted_policy` specifies what to do when the shared memory
+  //   buffer runs out of chunks.
+  // * `create_custom_tls_fn` and `create_incremental_state_fn` are function
+  //   pointers called to create custom state. They will receive `user_arg` as
+  //   an extra param.
+  bool Register(const DataSourceDescriptor& descriptor,
+                TracingMuxer::DataSourceFactory factory,
+                internal::DataSourceParams params,
+                BufferExhaustedPolicy buffer_exhausted_policy,
+                CreateCustomTlsFn create_custom_tls_fn,
+                CreateIncrementalStateFn create_incremental_state_fn,
+                void* user_arg) {
+    buffer_exhausted_policy_ = buffer_exhausted_policy;
+    create_custom_tls_fn_ = create_custom_tls_fn;
+    create_incremental_state_fn_ = create_incremental_state_fn;
+    user_arg_ = user_arg;
+    auto* tracing_impl = TracingMuxer::Get();
+    return tracing_impl->RegisterDataSource(descriptor, factory, params,
+                                            &state_);
+  }
+
+  // Updates the data source type descriptor.
+  void UpdateDescriptor(const DataSourceDescriptor& descriptor) {
+    auto* tracing_impl = TracingMuxer::Get();
+    tracing_impl->UpdateDataSourceDescriptor(descriptor, &state_);
+  }
+
+  // The beginning of a trace point.
+  //
+  // `tls_state` must point to a thread local variable that caches a pointer to
+  // an internal per data source type thread local state.
+  //
+  // `instances` must point to a copy of the current active instances for the
+  // data source type.
+  //
+  // `DataSourceTraits` can be used to customize the thread local storage used
+  // for the data source type.
+  //
+  // `TracePointTraits` and `trace_point_data` are customization point for
+  // getting the active instances bitmap.
+  //
+  // If this returns false, the trace point must be skipped.
+  template <typename DataSourceTraits, typename TracePointTraits>
+  bool TracePrologue(
+      DataSourceThreadLocalState** tls_state,
+      uint32_t* instances,
+      typename TracePointTraits::TracePointData trace_point_data) {
+    // See tracing_muxer.h for the structure of the TLS.
+    if (PERFETTO_UNLIKELY(!*tls_state)) {
+      *tls_state = GetOrCreateDataSourceTLS<DataSourceTraits>();
+      // If the TLS hasn't been obtained yet, it's possible that this thread
+      // hasn't observed the initialization of global state like the muxer yet.
+      // To ensure that the thread "sees" the effects of such initialization,
+      // we have to reload |instances| with an acquire fence, ensuring that any
+      // initialization performed before instances was updated is visible
+      // in this thread.
+      *instances &= TracePointTraits::GetActiveInstances(trace_point_data)
+                        ->load(std::memory_order_acquire);
+      if (!*instances)
+        return false;
+    }
+    auto* tracing_impl = TracingMuxer::Get();
+
+    // Avoid re-entering the trace point recursively.
+    if (PERFETTO_UNLIKELY((*tls_state)->root_tls->is_in_trace_point))
+      return false;
+
+    (*tls_state)->root_tls->is_in_trace_point = true;
+
+    // TracingTLS::generation is a global monotonic counter that is incremented
+    // every time a tracing session is stopped. We use that as a signal to force
+    // a slow-path garbage collection of all the trace writers for the current
+    // thread and to destroy the ones that belong to tracing sessions that have
+    // ended. This is to avoid having too many TraceWriter instances alive, each
+    // holding onto one chunk of the shared memory buffer.
+    // Rationale why memory_order_relaxed should be fine:
+    // - The TraceWriter object that we use is always constructed and destructed
+    //   on the current thread. There is no risk of accessing a half-initialized
+    //   TraceWriter (which would be really bad).
+    // - In the worst case, in the case of a race on the generation check, we
+    //   might end up using a TraceWriter for the same data source that belongs
+    //   to a stopped session. This is not really wrong, as we don't give any
+    //   guarantee on the global atomicity of the stop. In the worst case the
+    //   service will reject the data commit if this arrives too late.
+
+    if (PERFETTO_UNLIKELY(
+            (*tls_state)->root_tls->generation !=
+            tracing_impl->generation(std::memory_order_relaxed))) {
+      // Will update root_tls->generation.
+      tracing_impl->DestroyStoppedTraceWritersForCurrentThread();
+    }
+
+    return true;
+  }
+
+  // Must be called at the ending of a trace point that was not skipped.
+  void TraceEpilogue(DataSourceThreadLocalState* tls_state) {
+    tls_state->root_tls->is_in_trace_point = false;
+  }
+
+  struct InstancesIterator {
+    // A bitmap of the currenly active instances.
+    uint32_t cached_instances;
+    // The current instance index.
+    uint32_t i;
+    // The current instance. If this is `nullptr`, the iteration is over.
+    DataSourceInstanceThreadLocalState* instance;
+  };
+
+  // Returns an iterator to the active instances of this data source type.
+  //
+  // `cached_instances` is a copy of the bitmap of the active instances for this
+  // data source type (usually just a copy of ValidInstances(), but can be
+  // customized).
+  //
+  // `tls_state` is the thread local pointer obtained from TracePrologue.
+  //
+  // `TracePointTraits` and `trace_point_data` are customization point for
+  // getting the active instances bitmap.
+  template <typename TracePointTraits>
+  InstancesIterator BeginIteration(
+      uint32_t cached_instances,
+      DataSourceThreadLocalState* tls_state,
+      typename TracePointTraits::TracePointData trace_point_data)
+      PERFETTO_ALWAYS_INLINE {
+    InstancesIterator it{};
+    it.cached_instances = cached_instances;
+    FirstActiveInstance<TracePointTraits>(&it, tls_state, trace_point_data);
+    return it;
+  }
+
+  // Advances `*iterator` to point to the next active instance of this data
+  // source type.
+  //
+  // `tls_state` is the thread local pointer obtained from TracePrologue.
+  //
+  // `TracePointTraits` and `trace_point_data` are customization point for
+  // getting the active instances bitmap.
+  template <typename TracePointTraits>
+  void NextIteration(InstancesIterator* iterator,
+                     DataSourceThreadLocalState* tls_state,
+                     typename TracePointTraits::TracePointData trace_point_data)
+      PERFETTO_ALWAYS_INLINE {
+    iterator->i++;
+    FirstActiveInstance<TracePointTraits>(iterator, tls_state,
+                                          trace_point_data);
+  }
+
+  void* GetIncrementalState(
+      internal::DataSourceInstanceThreadLocalState* tls_inst,
+      uint32_t instance_index) {
+    // Recreate incremental state data if it has been reset by the service.
+    if (tls_inst->incremental_state_generation !=
+        static_state()->incremental_state_generation.load(
+            std::memory_order_relaxed)) {
+      tls_inst->incremental_state.reset();
+      CreateIncrementalState(tls_inst, instance_index);
+    }
+    return tls_inst->incremental_state.get();
+  }
+
+  std::atomic<uint32_t>* valid_instances() { return &state_.valid_instances; }
+
+  DataSourceStaticState* static_state() { return &state_; }
+
+ private:
+  void CreateIncrementalState(
+      internal::DataSourceInstanceThreadLocalState* tls_inst,
+      uint32_t instance_index) {
+    PERFETTO_DCHECK(create_incremental_state_fn_ != nullptr);
+    tls_inst->incremental_state =
+        create_incremental_state_fn_(tls_inst, instance_index, user_arg_);
+    tls_inst->incremental_state_generation =
+        static_state()->incremental_state_generation.load(
+            std::memory_order_relaxed);
+  }
+
+  void PopulateTlsInst(DataSourceInstanceThreadLocalState* tls_inst,
+                       DataSourceState* instance_state,
+                       uint32_t instance_index);
+
+  // Advances `*iterator` to the first active instance whose index is greater or
+  // equal than `iterator->i`.
+  template <typename TracePointTraits>
+  void FirstActiveInstance(
+      InstancesIterator* iterator,
+      DataSourceThreadLocalState* tls_state,
+      typename TracePointTraits::TracePointData trace_point_data) {
+    iterator->instance = nullptr;
+    for (; iterator->i < kMaxDataSourceInstances; iterator->i++) {
+      DataSourceState* instance_state =
+          state_.TryGetCached(iterator->cached_instances, iterator->i);
+      if (!instance_state)
+        continue;
+      // Even if we passed the check above, the DataSourceInstance might be
+      // still destroyed concurrently while this code runs. The code below is
+      // designed to deal with such race, as follows:
+      // - We don't access the user-defined data source instance state. The only
+      //   bits of state we use are |backend_id| and |buffer_id|.
+      // - Beyond those two integers, we access only the TraceWriter here. The
+      //   TraceWriter is always safe because it lives on the TLS.
+      // - |instance_state| is backed by static storage, so the pointer is
+      //   always valid, even after the data source instance is destroyed.
+      // - In the case of a race-on-destruction, we'll still see the latest
+      //   backend_id and buffer_id and in the worst case keep trying writing
+      //   into the tracing shared memory buffer after stopped. But this isn't
+      //   really any worse than the case of the stop IPC being delayed by the
+      //   kernel scheduler. The tracing service is robust against data commit
+      //   attemps made after tracing is stopped.
+      // There is a theoretical race that would case the wrong behavior w.r.t
+      // writing data in the wrong buffer, but it's so rare that we ignore it:
+      // if the data source is stopped and started kMaxDataSourceInstances
+      // times (so that the same id is recycled) while we are in this function,
+      // we might end up reusing the old data source's backend_id and buffer_id
+      // for the new one, because we don't see the generation change past this
+      // point. But stopping and starting tracing (even once) takes so much
+      // handshaking to make this extremely unrealistic.
+
+      auto& tls_inst = tls_state->per_instance[iterator->i];
+      if (PERFETTO_UNLIKELY(!tls_inst.trace_writer)) {
+        // Here we need an acquire barrier, which matches the release-store made
+        // by TracingMuxerImpl::SetupDataSource(), to ensure that the backend_id
+        // and buffer_id are consistent.
+        iterator->cached_instances &=
+            TracePointTraits::GetActiveInstances(trace_point_data)
+                ->load(std::memory_order_acquire);
+        instance_state =
+            state_.TryGetCached(iterator->cached_instances, iterator->i);
+        if (!instance_state || !instance_state->trace_lambda_enabled.load(
+                                   std::memory_order_relaxed))
+          continue;
+        PopulateTlsInst(&tls_inst, instance_state, iterator->i);
+      }
+      iterator->instance = &tls_inst;
+      break;
+    }
+  }
+
+  // Note that the returned object is one per-thread per-data-source-type, NOT
+  // per data-source *instance*.
+  template <typename DataSourceTraits>
+  DataSourceThreadLocalState* GetOrCreateDataSourceTLS() {
+#if PERFETTO_BUILDFLAG(PERFETTO_OS_IOS)
+    PERFETTO_FATAL("Data source TLS not supported on iOS, see b/158814068");
+#endif
+    auto* tracing_impl = TracingMuxer::Get();
+    TracingTLS* root_tls = tracing_impl->GetOrCreateTracingTLS();
+    DataSourceThreadLocalState* ds_tls =
+        DataSourceTraits::GetDataSourceTLS(&state_, root_tls);
+    // We keep re-initializing as the initialization is idempotent and not worth
+    // the code for extra checks.
+    ds_tls->static_state = &state_;
+    assert(!ds_tls->root_tls || ds_tls->root_tls == root_tls);
+    ds_tls->root_tls = root_tls;
+    return ds_tls;
+  }
+
+  DataSourceStaticState state_;
+  BufferExhaustedPolicy buffer_exhausted_policy_{};
+  CreateCustomTlsFn create_custom_tls_fn_ = nullptr;
+  CreateIncrementalStateFn create_incremental_state_fn_ = nullptr;
+  // User defined pointer that carries extra content for the fn_ callbacks
+  // above. Only used in the C shared library.
+  void* user_arg_ = nullptr;
+};
+
+}  // namespace internal
+}  // namespace perfetto
+
+#endif  // INCLUDE_PERFETTO_TRACING_INTERNAL_DATA_SOURCE_TYPE_H_
diff --git a/include/perfetto/tracing/internal/tracing_tls.h b/include/perfetto/tracing/internal/tracing_tls.h
index b2340a4..dc3a7b6 100644
--- a/include/perfetto/tracing/internal/tracing_tls.h
+++ b/include/perfetto/tracing/internal/tracing_tls.h
@@ -81,6 +81,10 @@
   // handlers. See comment in TracingTLS::~TracingTLS().
   bool is_in_trace_point = false;
 
+  // Used inside a trace point (only one trace point per thread can be active at
+  // any time) to cache the instances bitmap.
+  uint32_t cached_instances = 0;
+
   // By default all data source instances have independent thread-local state
   // (see above).
   std::array<DataSourceThreadLocalState, kMaxDataSources> data_sources_tls{};
diff --git a/include/perfetto/tracing/internal/track_event_data_source.h b/include/perfetto/tracing/internal/track_event_data_source.h
index 7b1c87e..730b1e1 100644
--- a/include/perfetto/tracing/internal/track_event_data_source.h
+++ b/include/perfetto/tracing/internal/track_event_data_source.h
@@ -194,10 +194,11 @@
 
 // A generic track event data source which is instantiated once per track event
 // category namespace.
-template <typename DataSourceType, const TrackEventCategoryRegistry* Registry>
+template <typename DerivedDataSource,
+          const TrackEventCategoryRegistry* Registry>
 class TrackEventDataSource
-    : public DataSource<DataSourceType, TrackEventDataSourceTraits> {
-  using Base = DataSource<DataSourceType, TrackEventDataSourceTraits>;
+    : public DataSource<DerivedDataSource, TrackEventDataSourceTraits> {
+  using Base = DataSource<DerivedDataSource, TrackEventDataSourceTraits>;
 
  public:
   static constexpr bool kRequiresCallbacksUnderLock = false;
diff --git a/protos/perfetto/config/data_source_config.proto b/protos/perfetto/config/data_source_config.proto
index 2bb9056..b15cc39 100644
--- a/protos/perfetto/config/data_source_config.proto
+++ b/protos/perfetto/config/data_source_config.proto
@@ -42,7 +42,7 @@
 import "protos/perfetto/config/system_info/system_info.proto";
 
 // The configuration that is passed to each data source when starting tracing.
-// Next id: 121
+// Next id: 123
 message DataSourceConfig {
   enum SessionInitiator {
     SESSION_INITIATOR_UNSPECIFIED = 0;
@@ -69,6 +69,10 @@
   // DO NOT SET in consumer as this will be overridden by the service.
   optional uint32 trace_duration_ms = 3;
 
+  // If true, |trace_duration_ms| should count also time in suspend. This
+  // is propagated from TraceConfig.prefer_suspend_clock_for_duration.
+  optional bool prefer_suspend_clock_for_duration = 122;
+
   // Set by the service to indicate how long it waits after StopDataSource.
   // DO NOT SET in consumer as this will be overridden by the service.
   optional uint32 stop_timeout_ms = 7;
diff --git a/protos/perfetto/config/perfetto_config.proto b/protos/perfetto/config/perfetto_config.proto
index 585a592..8ea2e4b 100644
--- a/protos/perfetto/config/perfetto_config.proto
+++ b/protos/perfetto/config/perfetto_config.proto
@@ -760,6 +760,10 @@
   // Provides a breakdown of energy estimation for various subsystem (e.g. GPU).
   // Available from Android S.
   optional bool collect_energy_estimation_breakdown = 4;
+
+  // Provides a breakdown of time in state for various subsystems.
+  // Available from Android U.
+  optional bool collect_entity_state_residency = 5;
 }
 
 // End of protos/perfetto/config/power/android_power_config.proto
@@ -2378,7 +2382,7 @@
 // Begin of protos/perfetto/config/data_source_config.proto
 
 // The configuration that is passed to each data source when starting tracing.
-// Next id: 121
+// Next id: 123
 message DataSourceConfig {
   enum SessionInitiator {
     SESSION_INITIATOR_UNSPECIFIED = 0;
@@ -2405,6 +2409,10 @@
   // DO NOT SET in consumer as this will be overridden by the service.
   optional uint32 trace_duration_ms = 3;
 
+  // If true, |trace_duration_ms| should count also time in suspend. This
+  // is propagated from TraceConfig.prefer_suspend_clock_for_duration.
+  optional bool prefer_suspend_clock_for_duration = 122;
+
   // Set by the service to indicate how long it waits after StopDataSource.
   // DO NOT SET in consumer as this will be overridden by the service.
   optional uint32 stop_timeout_ms = 7;
@@ -2522,7 +2530,7 @@
 // It contains the general config for the logging buffer(s) and the configs for
 // all the data source being enabled.
 //
-// Next id: 35.
+// Next id: 37.
 message TraceConfig {
   message BufferConfig {
     optional uint32 size_kb = 1;
@@ -2625,6 +2633,15 @@
   // TriggerConfig.trigger_timeout_ms instead.
   optional uint32 duration_ms = 3;
 
+  // If true, tries to use CLOCK_BOOTTIME for duration_ms rather than
+  // CLOCK_MONOTONIC (which doesn't count time in suspend). Supported only on
+  // Linux/Android, no-op on other platforms. This is used when dealing with
+  // long (e.g. 24h) traces, where suspend can inflate them to weeks of
+  // wall-time, making them more likely to hit device reboots (and hence loss).
+  // This option also changes consistently the semantic of
+  // TrigerConfig.stop_delay_ms.
+  optional bool prefer_suspend_clock_for_duration = 36;
+
   // This is set when --dropbox is passed to the Perfetto command line client
   // and enables guardrails that limit resource usage for traces requested
   // by statsd.
@@ -2791,6 +2808,8 @@
 
       // After a trigger is received either in START_TRACING or STOP_TRACING
       // mode then the trace will end |stop_delay_ms| after triggering.
+      // If |prefer_suspend_clock_for_duration| is set, the duration will be
+      // based on wall-clock, counting also time in suspend.
       optional uint32 stop_delay_ms = 3;
 
       // Limits the number of traces this trigger can start/stop in a rolling
diff --git a/protos/perfetto/config/power/android_power_config.proto b/protos/perfetto/config/power/android_power_config.proto
index 82c4bb2..dddcb28 100644
--- a/protos/perfetto/config/power/android_power_config.proto
+++ b/protos/perfetto/config/power/android_power_config.proto
@@ -43,4 +43,8 @@
   // Provides a breakdown of energy estimation for various subsystem (e.g. GPU).
   // Available from Android S.
   optional bool collect_energy_estimation_breakdown = 4;
+
+  // Provides a breakdown of time in state for various subsystems.
+  // Available from Android U.
+  optional bool collect_entity_state_residency = 5;
 }
diff --git a/protos/perfetto/config/trace_config.proto b/protos/perfetto/config/trace_config.proto
index 4338154..e7c9767 100644
--- a/protos/perfetto/config/trace_config.proto
+++ b/protos/perfetto/config/trace_config.proto
@@ -26,7 +26,7 @@
 // It contains the general config for the logging buffer(s) and the configs for
 // all the data source being enabled.
 //
-// Next id: 35.
+// Next id: 37.
 message TraceConfig {
   message BufferConfig {
     optional uint32 size_kb = 1;
@@ -129,6 +129,15 @@
   // TriggerConfig.trigger_timeout_ms instead.
   optional uint32 duration_ms = 3;
 
+  // If true, tries to use CLOCK_BOOTTIME for duration_ms rather than
+  // CLOCK_MONOTONIC (which doesn't count time in suspend). Supported only on
+  // Linux/Android, no-op on other platforms. This is used when dealing with
+  // long (e.g. 24h) traces, where suspend can inflate them to weeks of
+  // wall-time, making them more likely to hit device reboots (and hence loss).
+  // This option also changes consistently the semantic of
+  // TrigerConfig.stop_delay_ms.
+  optional bool prefer_suspend_clock_for_duration = 36;
+
   // This is set when --dropbox is passed to the Perfetto command line client
   // and enables guardrails that limit resource usage for traces requested
   // by statsd.
@@ -295,6 +304,8 @@
 
       // After a trigger is received either in START_TRACING or STOP_TRACING
       // mode then the trace will end |stop_delay_ms| after triggering.
+      // If |prefer_suspend_clock_for_duration| is set, the duration will be
+      // based on wall-clock, counting also time in suspend.
       optional uint32 stop_delay_ms = 3;
 
       // Limits the number of traces this trigger can start/stop in a rolling
diff --git a/protos/perfetto/trace/interned_data/interned_data.proto b/protos/perfetto/trace/interned_data/interned_data.proto
index eddd7b1..59103a7 100644
--- a/protos/perfetto/trace/interned_data/interned_data.proto
+++ b/protos/perfetto/trace/interned_data/interned_data.proto
@@ -53,7 +53,7 @@
 // emitted proactively in advance of referring to them in later packets.
 //
 // Next reserved id: 8 (up to 15).
-// Next id: 29.
+// Next id: 30.
 message InternedData {
   // TODO(eseckler): Replace iid fields inside interned messages with
   // map<iid, message> type fields in InternedData.
@@ -111,4 +111,7 @@
   // This is is NOT the real address. This is to avoid disclosing KASLR through
   // traces.
   repeated InternedString kernel_symbols = 26;
+
+  // Interned string values in the DebugAnnotation proto.
+  repeated InternedString debug_annotation_string_values = 29;
 }
diff --git a/protos/perfetto/trace/perfetto_trace.proto b/protos/perfetto/trace/perfetto_trace.proto
index fc47541..6205554 100644
--- a/protos/perfetto/trace/perfetto_trace.proto
+++ b/protos/perfetto/trace/perfetto_trace.proto
@@ -760,6 +760,10 @@
   // Provides a breakdown of energy estimation for various subsystem (e.g. GPU).
   // Available from Android S.
   optional bool collect_energy_estimation_breakdown = 4;
+
+  // Provides a breakdown of time in state for various subsystems.
+  // Available from Android U.
+  optional bool collect_entity_state_residency = 5;
 }
 
 // End of protos/perfetto/config/power/android_power_config.proto
@@ -2378,7 +2382,7 @@
 // Begin of protos/perfetto/config/data_source_config.proto
 
 // The configuration that is passed to each data source when starting tracing.
-// Next id: 121
+// Next id: 123
 message DataSourceConfig {
   enum SessionInitiator {
     SESSION_INITIATOR_UNSPECIFIED = 0;
@@ -2405,6 +2409,10 @@
   // DO NOT SET in consumer as this will be overridden by the service.
   optional uint32 trace_duration_ms = 3;
 
+  // If true, |trace_duration_ms| should count also time in suspend. This
+  // is propagated from TraceConfig.prefer_suspend_clock_for_duration.
+  optional bool prefer_suspend_clock_for_duration = 122;
+
   // Set by the service to indicate how long it waits after StopDataSource.
   // DO NOT SET in consumer as this will be overridden by the service.
   optional uint32 stop_timeout_ms = 7;
@@ -2522,7 +2530,7 @@
 // It contains the general config for the logging buffer(s) and the configs for
 // all the data source being enabled.
 //
-// Next id: 35.
+// Next id: 37.
 message TraceConfig {
   message BufferConfig {
     optional uint32 size_kb = 1;
@@ -2625,6 +2633,15 @@
   // TriggerConfig.trigger_timeout_ms instead.
   optional uint32 duration_ms = 3;
 
+  // If true, tries to use CLOCK_BOOTTIME for duration_ms rather than
+  // CLOCK_MONOTONIC (which doesn't count time in suspend). Supported only on
+  // Linux/Android, no-op on other platforms. This is used when dealing with
+  // long (e.g. 24h) traces, where suspend can inflate them to weeks of
+  // wall-time, making them more likely to hit device reboots (and hence loss).
+  // This option also changes consistently the semantic of
+  // TrigerConfig.stop_delay_ms.
+  optional bool prefer_suspend_clock_for_duration = 36;
+
   // This is set when --dropbox is passed to the Perfetto command line client
   // and enables guardrails that limit resource usage for traces requested
   // by statsd.
@@ -2791,6 +2808,8 @@
 
       // After a trigger is received either in START_TRACING or STOP_TRACING
       // mode then the trace will end |stop_delay_ms| after triggering.
+      // If |prefer_suspend_clock_for_duration| is set, the duration will be
+      // based on wall-clock, counting also time in suspend.
       optional uint32 stop_delay_ms = 3;
 
       // Limits the number of traces this trigger can start/stop in a rolling
@@ -8656,7 +8675,7 @@
 //     }
 //   }
 //
-// Next ID: 17.
+// Next ID: 18.
 // Reserved ID: 15
 message DebugAnnotation {
   // Name fields are set only for dictionary entries.
@@ -8672,7 +8691,6 @@
     uint64 uint_value = 3;
     int64 int_value = 4;
     double double_value = 5;
-    string string_value = 6;
     // Pointers are stored in a separate type as the JSON output treats them
     // differently from other uint64 values.
     uint64 pointer_value = 7;
@@ -8683,6 +8701,11 @@
     // Legacy instrumentation may not support conversion of nested data to
     // NestedValue yet.
     string legacy_json_value = 9;
+
+    // interned and non-interned variants of strings.
+    string string_value = 6;
+    // Corresponds to |debug_annotation_string_values| field in InternedData.
+    uint64 string_value_iid = 17;
   }
 
   // Used to embed arbitrary proto messages (which are also typically used to
@@ -9850,7 +9873,7 @@
 // emitted proactively in advance of referring to them in later packets.
 //
 // Next reserved id: 8 (up to 15).
-// Next id: 29.
+// Next id: 30.
 message InternedData {
   // TODO(eseckler): Replace iid fields inside interned messages with
   // map<iid, message> type fields in InternedData.
@@ -9908,6 +9931,9 @@
   // This is is NOT the real address. This is to avoid disclosing KASLR through
   // traces.
   repeated InternedString kernel_symbols = 26;
+
+  // Interned string values in the DebugAnnotation proto.
+  repeated InternedString debug_annotation_string_values = 29;
 }
 
 // End of protos/perfetto/trace/interned_data/interned_data.proto
@@ -10184,6 +10210,48 @@
 
 // End of protos/perfetto/trace/power/android_energy_estimation_breakdown.proto
 
+// Begin of protos/perfetto/trace/power/android_entity_state_residency.proto
+
+message EntityStateResidency {
+  message PowerEntityState {
+    // Index corresponding to the entity
+    optional int32 entity_index = 1;
+
+    // Index corresponding to the state
+    optional int32 state_index = 2;
+
+    // Name of the entity
+    optional string entity_name = 3;
+
+    // Name of the state
+    optional string state_name = 4;
+  }
+
+  // This is only emitted at the beginning of the trace.
+  repeated PowerEntityState power_entity_state = 1;
+
+  message StateResidency {
+    // Index corresponding to PowerEntityState.entity_index
+    optional int32 entity_index = 1;
+
+    // Index corresponding to PowerEntityState.state_index
+    optional int32 state_index = 2;
+
+    // Time since boot that this entity has been in this state
+    optional uint64 total_time_in_state_ms = 3;
+
+    // Total number of times since boot that the entity has entered this state
+    optional uint64 total_state_entry_count = 4;
+
+    // Timestamp of the last time the entity entered this state
+    optional uint64 last_entry_timestamp_ms = 5;
+  }
+
+  repeated StateResidency residency = 2;
+}
+
+// End of protos/perfetto/trace/power/android_entity_state_residency.proto
+
 // Begin of protos/perfetto/trace/power/battery_counters.proto
 
 message BatteryCounters {
@@ -11710,7 +11778,7 @@
 // See the [Buffers and Dataflow](/docs/concepts/buffers.md) doc for details.
 //
 // Next reserved id: 14 (up to 15).
-// Next id: 91.
+// Next id: 92.
 message TracePacket {
   // The timestamp of the TracePacket.
   // By default this timestamps refers to the trace clock (CLOCK_BOOTTIME on
@@ -11779,6 +11847,7 @@
     AndroidGameInterventionList android_game_intervention_list = 83;
     StatsdAtom statsd_atom = 84;
     AndroidSystemProperty android_system_property = 86;
+    EntityStateResidency entity_state_residency = 91;
 
     // Only used in profile packets.
     ProfiledFrameSymbols profiled_frame_symbols = 55;
diff --git a/protos/perfetto/trace/power/BUILD.gn b/protos/perfetto/trace/power/BUILD.gn
index ec595de..1d53a00 100644
--- a/protos/perfetto/trace/power/BUILD.gn
+++ b/protos/perfetto/trace/power/BUILD.gn
@@ -18,6 +18,7 @@
   deps = [ "../../common:@TYPE@" ]
   sources = [
     "android_energy_estimation_breakdown.proto",
+    "android_entity_state_residency.proto",
     "battery_counters.proto",
     "power_rails.proto",
   ]
diff --git a/protos/perfetto/trace/power/android_entity_state_residency.proto b/protos/perfetto/trace/power/android_entity_state_residency.proto
new file mode 100644
index 0000000..6ca767d
--- /dev/null
+++ b/protos/perfetto/trace/power/android_entity_state_residency.proto
@@ -0,0 +1,56 @@
+/*
+ * Copyright (C) 2023 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+syntax = "proto2";
+package perfetto.protos;
+
+message EntityStateResidency {
+  message PowerEntityState {
+    // Index corresponding to the entity
+    optional int32 entity_index = 1;
+
+    // Index corresponding to the state
+    optional int32 state_index = 2;
+
+    // Name of the entity
+    optional string entity_name = 3;
+
+    // Name of the state
+    optional string state_name = 4;
+  }
+
+  // This is only emitted at the beginning of the trace.
+  repeated PowerEntityState power_entity_state = 1;
+
+  message StateResidency {
+    // Index corresponding to PowerEntityState.entity_index
+    optional int32 entity_index = 1;
+
+    // Index corresponding to PowerEntityState.state_index
+    optional int32 state_index = 2;
+
+    // Time since boot that this entity has been in this state
+    optional uint64 total_time_in_state_ms = 3;
+
+    // Total number of times since boot that the entity has entered this state
+    optional uint64 total_state_entry_count = 4;
+
+    // Timestamp of the last time the entity entered this state
+    optional uint64 last_entry_timestamp_ms = 5;
+  }
+
+  repeated StateResidency residency = 2;
+}
diff --git a/protos/perfetto/trace/trace_packet.proto b/protos/perfetto/trace/trace_packet.proto
index 1df67bd..c5c2de5 100644
--- a/protos/perfetto/trace/trace_packet.proto
+++ b/protos/perfetto/trace/trace_packet.proto
@@ -46,6 +46,7 @@
 import "protos/perfetto/trace/perfetto/perfetto_metatrace.proto";
 import "protos/perfetto/trace/perfetto/tracing_service_event.proto";
 import "protos/perfetto/trace/power/android_energy_estimation_breakdown.proto";
+import "protos/perfetto/trace/power/android_entity_state_residency.proto";
 import "protos/perfetto/trace/power/battery_counters.proto";
 import "protos/perfetto/trace/power/power_rails.proto";
 import "protos/perfetto/trace/statsd/statsd_atom.proto";
@@ -93,7 +94,7 @@
 // See the [Buffers and Dataflow](/docs/concepts/buffers.md) doc for details.
 //
 // Next reserved id: 14 (up to 15).
-// Next id: 91.
+// Next id: 92.
 message TracePacket {
   // The timestamp of the TracePacket.
   // By default this timestamps refers to the trace clock (CLOCK_BOOTTIME on
@@ -162,6 +163,7 @@
     AndroidGameInterventionList android_game_intervention_list = 83;
     StatsdAtom statsd_atom = 84;
     AndroidSystemProperty android_system_property = 86;
+    EntityStateResidency entity_state_residency = 91;
 
     // Only used in profile packets.
     ProfiledFrameSymbols profiled_frame_symbols = 55;
diff --git a/protos/perfetto/trace/track_event/debug_annotation.proto b/protos/perfetto/trace/track_event/debug_annotation.proto
index 35a041d..6dba2ab 100644
--- a/protos/perfetto/trace/track_event/debug_annotation.proto
+++ b/protos/perfetto/trace/track_event/debug_annotation.proto
@@ -57,7 +57,7 @@
 //     }
 //   }
 //
-// Next ID: 17.
+// Next ID: 18.
 // Reserved ID: 15
 message DebugAnnotation {
   // Name fields are set only for dictionary entries.
@@ -73,7 +73,6 @@
     uint64 uint_value = 3;
     int64 int_value = 4;
     double double_value = 5;
-    string string_value = 6;
     // Pointers are stored in a separate type as the JSON output treats them
     // differently from other uint64 values.
     uint64 pointer_value = 7;
@@ -84,6 +83,11 @@
     // Legacy instrumentation may not support conversion of nested data to
     // NestedValue yet.
     string legacy_json_value = 9;
+
+    // interned and non-interned variants of strings.
+    string string_value = 6;
+    // Corresponds to |debug_annotation_string_values| field in InternedData.
+    uint64 string_value_iid = 17;
   }
 
   // Used to embed arbitrary proto messages (which are also typically used to
diff --git a/python/perfetto/trace_processor/api.py b/python/perfetto/trace_processor/api.py
index d884759..c21dd6f 100644
--- a/python/perfetto/trace_processor/api.py
+++ b/python/perfetto/trace_processor/api.py
@@ -367,7 +367,9 @@
     if hasattr(self, 'subprocess'):
       self.subprocess.kill()
       self.subprocess.wait()
-    self.http.conn.close()
+
+    if hasattr(self, 'http'):
+      self.http.conn.close()
 
   def __del__(self):
     self.close()
diff --git a/src/android_internal/power_stats.cc b/src/android_internal/power_stats.cc
index f7badf6..2e3e956 100644
--- a/src/android_internal/power_stats.cc
+++ b/src/android_internal/power_stats.cc
@@ -53,6 +53,10 @@
                                      size_t* size_of_arr) = 0;
   virtual bool GetEnergyConsumed(EnergyEstimationBreakdown* breakdown,
                                  size_t* size_of_arr) = 0;
+  virtual bool GetPowerEntityStates(PowerEntityState* state,
+                                    size_t* size_of_arr) = 0;
+  virtual bool GetPowerEntityStateResidency(PowerEntityStateResidency* state,
+                                            size_t* size_of_arr) = 0;
   virtual ~PowerStatsDataProvider() = default;
 };
 
@@ -64,6 +68,10 @@
                              size_t* size_of_arr) override;
   bool GetEnergyConsumed(EnergyEstimationBreakdown* breakdown,
                          size_t* size_of_arr) override;
+  bool GetPowerEntityStates(PowerEntityState* state,
+                            size_t* size_of_arr) override;
+  bool GetPowerEntityStateResidency(PowerEntityStateResidency* state,
+                                    size_t* size_of_arr) override;
 
   PowerStatsHalDataProvider() = default;
   ~PowerStatsHalDataProvider() override = default;
@@ -84,6 +92,10 @@
                              size_t* size_of_arr) override;
   bool GetEnergyConsumed(EnergyEstimationBreakdown* breakdown,
                          size_t* size_of_arr) override;
+  bool GetPowerEntityStates(PowerEntityState* state,
+                            size_t* size_of_arr) override;
+  bool GetPowerEntityStateResidency(PowerEntityStateResidency* state,
+                                    size_t* size_of_arr) override;
 
   PowerStatsAidlDataProvider() = default;
   ~PowerStatsAidlDataProvider() override = default;
@@ -129,6 +141,16 @@
   return GetDataProvider()->GetEnergyConsumed(breakdown, size_of_arr);
 }
 
+bool GetPowerEntityStates(PowerEntityState* state, size_t* size_of_arr) {
+  return GetDataProvider()->GetPowerEntityStates(state, size_of_arr);
+}
+
+bool GetPowerEntityStateResidency(PowerEntityStateResidency* residency,
+                                  size_t* size_of_arr) {
+  return GetDataProvider()->GetPowerEntityStateResidency(residency,
+                                                         size_of_arr);
+}
+
 /*** Power Stats HAL Implemenation *******************************************/
 
 using android::hardware::hidl_vec;
@@ -218,6 +240,17 @@
   return false;
 }
 
+bool PowerStatsHalDataProvider::GetPowerEntityStates(PowerEntityState*,
+                                                     size_t*) {
+  return false;
+}
+
+bool PowerStatsHalDataProvider::GetPowerEntityStateResidency(
+    PowerEntityStateResidency*,
+    size_t*) {
+  return false;
+}
+
 /*** End of Power Stats HAL Implemenation *************************************/
 
 /*** Power Stats AIDL Implemenation *******************************************/
@@ -381,6 +414,95 @@
   }
   return true;
 }
+
+bool PowerStatsAidlDataProvider::GetPowerEntityStates(
+    PowerEntityState* entity_state,
+    size_t* size_of_arr) {
+  const size_t in_array_size = *size_of_arr;
+  *size_of_arr = 0;
+
+  aidl::IPowerStats* svc = MaybeGetService();
+  if (svc == nullptr) {
+    return false;
+  }
+
+  std::vector<aidl::PowerEntity> entities;
+  android::binder::Status status = svc->getPowerEntityInfo(&entities);
+
+  if (!status.isOk()) {
+    if (status.transactionError() == android::DEAD_OBJECT) {
+      // Service has died.  Reset it to attempt to acquire a new one next time.
+      ResetService();
+    }
+    return false;
+  }
+
+  // Iterate through all entities.
+  for (const auto& entity : entities) {
+    if (*size_of_arr >= in_array_size) {
+      break;
+    }
+
+    // Iterate through all states for this entity.
+    for (const auto& state : entity.states) {
+      if (*size_of_arr >= in_array_size) {
+        break;
+      }
+      auto& cur = entity_state[(*size_of_arr)++];
+      cur.entity_id = entity.id;
+      strlcpy(cur.entity_name, entity.name.c_str(), sizeof(cur.entity_name));
+      cur.state_id = state.id;
+      strlcpy(cur.state_name, state.name.c_str(), sizeof(cur.state_name));
+    }
+  }
+  return true;
+}
+
+bool PowerStatsAidlDataProvider::GetPowerEntityStateResidency(
+    PowerEntityStateResidency* residency,
+    size_t* size_of_arr) {
+  const size_t in_array_size = *size_of_arr;
+  *size_of_arr = 0;
+
+  aidl::IPowerStats* svc = MaybeGetService();
+  if (svc == nullptr) {
+    return false;
+  }
+
+  std::vector<int> ids;
+  std::vector<aidl::StateResidencyResult> entities;
+  android::binder::Status status = svc->getStateResidency(ids, &entities);
+
+  if (!status.isOk()) {
+    if (status.transactionError() == android::DEAD_OBJECT) {
+      // Service has died.  Reset it to attempt to acquire a new one next time.
+      ResetService();
+    }
+    return false;
+  }
+
+  // Iterate through all entities.
+  for (const auto& entity : entities) {
+    if (*size_of_arr >= in_array_size) {
+      break;
+    }
+
+    // Iterate through all states for this entity.
+    for (const auto& stateResidencyData : entity.stateResidencyData) {
+      if (*size_of_arr >= in_array_size) {
+        break;
+      }
+      auto& cur = residency[(*size_of_arr)++];
+      cur.entity_id = entity.id;
+      cur.state_id = stateResidencyData.id;
+      cur.total_time_in_state_ms = stateResidencyData.totalTimeInStateMs;
+      cur.total_state_entry_count = stateResidencyData.totalStateEntryCount;
+      cur.last_entry_timestamp_ms = stateResidencyData.lastEntryTimestampMs;
+    }
+  }
+  return true;
+}
+
 /*** End of Power Stats AIDL Implemenation ************************************/
 
 }  // namespace android_internal
diff --git a/src/android_internal/power_stats.h b/src/android_internal/power_stats.h
index 3225d29..46c78ae 100644
--- a/src/android_internal/power_stats.h
+++ b/src/android_internal/power_stats.h
@@ -81,6 +81,21 @@
   int64_t energy_uws;
 };
 
+struct PowerEntityState {
+  int32_t entity_id;
+  int32_t state_id;
+  char entity_name[64];
+  char state_name[64];
+};
+
+struct PowerEntityStateResidency {
+  int32_t entity_id;
+  int32_t state_id;
+  uint64_t total_time_in_state_ms;
+  uint64_t total_state_entry_count;
+  uint64_t last_entry_timestamp_ms;
+};
+
 extern "C" {
 
 // These functions are not thread safe unless specified otherwise.
@@ -101,6 +116,13 @@
 bool __attribute__((visibility("default")))
 GetEnergyConsumed(EnergyEstimationBreakdown* breakdown, size_t* size_of_arr);
 
+bool __attribute__((visibility("default")))
+GetPowerEntityStates(PowerEntityState* state, size_t* size_of_arr);
+
+bool __attribute__((visibility("default")))
+GetPowerEntityStateResidency(PowerEntityStateResidency* residency,
+                             size_t* size_of_arr);
+
 }  // extern "C"
 
 }  // namespace android_internal
diff --git a/src/base/periodic_task.cc b/src/base/periodic_task.cc
index fcbc9de..eaeba30 100644
--- a/src/base/periodic_task.cc
+++ b/src/base/periodic_task.cc
@@ -33,34 +33,49 @@
 namespace base {
 
 namespace {
-base::ScopedPlatformHandle CreateTimerFd(uint32_t period_ms) {
+
+uint32_t GetNextDelayMs(const TimeMillis& now_ms,
+                        const PeriodicTask::Args& args) {
+  if (args.one_shot)
+    return args.period_ms;
+
+  return args.period_ms -
+         static_cast<uint32_t>(now_ms.count() % args.period_ms);
+}
+
+ScopedPlatformHandle CreateTimerFd(const PeriodicTask::Args& args) {
 #if PERFETTO_BUILDFLAG(PERFETTO_OS_LINUX) || \
     (PERFETTO_BUILDFLAG(PERFETTO_OS_ANDROID) && __ANDROID_API__ >= 19)
-  base::ScopedPlatformHandle tfd(
+  ScopedPlatformHandle tfd(
       timerfd_create(CLOCK_BOOTTIME, TFD_CLOEXEC | TFD_NONBLOCK));
-  // The initial phase, aligned on wall clock.
-  uint32_t phase_ms =
-      period_ms -
-      static_cast<uint32_t>(base::GetBootTimeNs().count() % period_ms);
+  uint32_t phase_ms = GetNextDelayMs(GetBootTimeMs(), args);
+
   struct itimerspec its {};
   // The "1 +" is to make sure that we never pass a zero it_value in the
   // unlikely case of phase_ms being 0. That would cause the timer to be
   // considered disarmed by timerfd_settime.
   its.it_value.tv_sec = static_cast<time_t>(phase_ms / 1000u);
   its.it_value.tv_nsec = 1 + static_cast<long>((phase_ms % 1000u) * 1000000u);
-  its.it_interval.tv_sec = static_cast<time_t>(period_ms / 1000u);
-  its.it_interval.tv_nsec = static_cast<long>((period_ms % 1000u) * 1000000u);
+  if (args.one_shot) {
+    its.it_interval.tv_sec = 0;
+    its.it_interval.tv_nsec = 0;
+  } else {
+    const uint32_t period_ms = args.period_ms;
+    its.it_interval.tv_sec = static_cast<time_t>(period_ms / 1000u);
+    its.it_interval.tv_nsec = static_cast<long>((period_ms % 1000u) * 1000000u);
+  }
   if (timerfd_settime(*tfd, 0, &its, nullptr) < 0)
-    return base::ScopedPlatformHandle();
+    return ScopedPlatformHandle();
   return tfd;
 #else
-  base::ignore_result(period_ms);
-  return base::ScopedPlatformHandle();
+  ignore_result(args);
+  return ScopedPlatformHandle();
 #endif
 }
+
 }  // namespace
 
-PeriodicTask::PeriodicTask(base::TaskRunner* task_runner)
+PeriodicTask::PeriodicTask(TaskRunner* task_runner)
     : task_runner_(task_runner), weak_ptr_factory_(this) {}
 
 PeriodicTask::~PeriodicTask() {
@@ -77,7 +92,7 @@
   }
   args_ = std::move(args);
   if (args_.use_suspend_aware_timer) {
-    timer_fd_ = CreateTimerFd(args_.period_ms);
+    timer_fd_ = CreateTimerFd(args_);
     if (timer_fd_) {
       auto weak_this = weak_ptr_factory_.GetWeakPtr();
       task_runner_->AddFileDescriptorWatch(
@@ -99,9 +114,7 @@
   PERFETTO_DCHECK_THREAD(thread_checker_);
   PERFETTO_DCHECK(args_.period_ms > 0);
   PERFETTO_DCHECK(!timer_fd_);
-  uint32_t delay_ms =
-      args_.period_ms -
-      static_cast<uint32_t>(base::GetWallTimeMs().count() % args_.period_ms);
+  uint32_t delay_ms = GetNextDelayMs(GetWallTimeMs(), args_);
   auto weak_this = weak_ptr_factory_.GetWeakPtr();
   task_runner_->PostDelayedTask(
       std::bind(PeriodicTask::RunTaskAndPostNext, weak_this, generation_),
@@ -112,7 +125,7 @@
 // This function can be called in two ways (both from the TaskRunner):
 // 1. When using a timerfd, this task is registered as a FD watch.
 // 2. When using PostDelayedTask, this is the task posted on the TaskRunner.
-void PeriodicTask::RunTaskAndPostNext(base::WeakPtr<PeriodicTask> thiz,
+void PeriodicTask::RunTaskAndPostNext(WeakPtr<PeriodicTask> thiz,
                                       uint32_t generation) {
   if (!thiz || !thiz->args_.task || generation != thiz->generation_)
     return;  // Destroyed or Reset() in the meanwhile.
@@ -126,7 +139,7 @@
     // just need to read() it.
     uint64_t ignored = 0;
     errno = 0;
-    auto rsize = base::Read(*thiz->timer_fd_, &ignored, sizeof(&ignored));
+    auto rsize = Read(*thiz->timer_fd_, &ignored, sizeof(&ignored));
     if (rsize != sizeof(uint64_t)) {
       if (errno == EAGAIN)
         return;  // A spurious wakeup. Rare, but can happen, just ignore.
@@ -135,15 +148,21 @@
     }
 #endif
   }
+
+  // Create a copy of the task to deal with either:
+  // 1. one_shot causing a Reset().
+  // 2. task() invoking internally Reset().
+  // That would cause a reset of the args_.task itself, which would invalidate
+  // the task bind state while we are invoking it.
+  auto task = thiz->args_.task;
+
   // The repetition of the if() is to deal with the ResetTimerFd() case above.
-  if (!thiz->timer_fd_) {
+  if (thiz->args_.one_shot) {
+    thiz->Reset();
+  } else if (!thiz->timer_fd_) {
     thiz->PostNextTask();
   }
-  // Create a copy of the task in the unlikely event that the task ends up
-  // up destroying the PeriodicTask object or calling Reset() on it. That would
-  // cause a reset of the args_.task itself, which would invalidate the task
-  // bind state while we are invoking it.
-  auto task = thiz->args_.task;
+
   task();
 }
 
diff --git a/src/base/periodic_task_unittest.cc b/src/base/periodic_task_unittest.cc
index 4919720..9fae406 100644
--- a/src/base/periodic_task_unittest.cc
+++ b/src/base/periodic_task_unittest.cc
@@ -25,6 +25,9 @@
 #include <unistd.h>
 #endif
 
+#include <chrono>
+#include <thread>
+
 namespace perfetto {
 namespace base {
 
@@ -49,6 +52,26 @@
   EXPECT_EQ(num_callbacks, 3u);
 }
 
+TEST(PeriodicTaskTest, OneShot) {
+  TestTaskRunner task_runner;
+  PeriodicTask pt(&task_runner);
+  uint32_t num_callbacks = 0;
+  auto quit_closure = task_runner.CreateCheckpoint("one_shot_done");
+
+  PeriodicTask::Args args;
+  args.use_suspend_aware_timer = true;
+  args.one_shot = true;
+  args.period_ms = 1;
+  args.task = [&] {
+    ASSERT_EQ(++num_callbacks, 1u);
+    quit_closure();
+  };
+  pt.Start(std::move(args));
+  std::this_thread::sleep_for(std::chrono::milliseconds(3));
+  task_runner.RunUntilCheckpoint("one_shot_done");
+  EXPECT_EQ(num_callbacks, 1u);
+}
+
 // Call Reset() from a callback, ensure no further calls are made.
 TEST(PeriodicTaskTest, ResetFromCallback) {
   TestTaskRunner task_runner;
diff --git a/src/perfetto_cmd/perfetto_cmd.cc b/src/perfetto_cmd/perfetto_cmd.cc
index 25bdfbd..be35ea9 100644
--- a/src/perfetto_cmd/perfetto_cmd.cc
+++ b/src/perfetto_cmd/perfetto_cmd.cc
@@ -202,10 +202,10 @@
 
 PerfettoCmd::~PerfettoCmd() {
   PERFETTO_DCHECK(g_perfetto_cmd == this);
+  g_perfetto_cmd = nullptr;
   if (ctrl_c_handler_installed_) {
     task_runner_.RemoveFileDescriptorWatch(ctrl_c_evt_.fd());
   }
-  g_perfetto_cmd = nullptr;
 }
 
 void PerfettoCmd::PrintUsage(const char* argv0) {
@@ -881,11 +881,14 @@
     LogTriggerEvents(PerfettoTriggerAtom::kCmdTrigger, triggers_to_activate_);
 
     bool finished_with_success = false;
+    auto weak_this = weak_factory_.GetWeakPtr();
     TriggerProducer producer(
         &task_runner_,
-        [this, &finished_with_success](bool success) {
+        [weak_this, &finished_with_success](bool success) {
+          if (!weak_this)
+            return;
           finished_with_success = success;
-          task_runner_.Quit();
+          weak_this->task_runner_.Quit();
         },
         &triggers_to_activate_);
     task_runner_.Run();
@@ -1037,6 +1040,10 @@
   }
 
   // Failsafe mechanism to avoid waiting indefinitely if the service hangs.
+  // Note: when using prefer_suspend_clock_for_duration the actual duration
+  // might be < expected_duration_ms_ measured in in wall time. But this is fine
+  // because the resulting timeout will be conservative (it will be accurate
+  // if the device never suspends, and will be more lax if it does).
   if (expected_duration_ms_) {
     uint32_t trace_timeout = expected_duration_ms_ + 60000 +
                              trace_config_->flush_timeout_ms() +
@@ -1208,13 +1215,18 @@
       return;
     g_perfetto_cmd->SignalCtrlC();
   });
-  task_runner_.AddFileDescriptorWatch(ctrl_c_evt_.fd(), [this] {
+  auto weak_this = weak_factory_.GetWeakPtr();
+  task_runner_.AddFileDescriptorWatch(ctrl_c_evt_.fd(), [weak_this] {
+    if (!weak_this)
+      return;
     PERFETTO_LOG("SIGINT/SIGTERM received: disabling tracing.");
-    ctrl_c_evt_.Clear();
-    consumer_endpoint_->Flush(0, [this](bool flush_success) {
+    weak_this->ctrl_c_evt_.Clear();
+    weak_this->consumer_endpoint_->Flush(0, [weak_this](bool flush_success) {
+      if (!weak_this)
+        return;
       if (!flush_success)
         PERFETTO_ELOG("Final flush unsuccessful.");
-      consumer_endpoint_->DisableTracing();
+      weak_this->consumer_endpoint_->DisableTracing();
     });
   });
 }
@@ -1249,10 +1261,13 @@
   PERFETTO_DCHECK(trace_config_->write_into_file());
 
   if (stop_trace_once_attached_) {
-    consumer_endpoint_->Flush(0, [this](bool flush_success) {
+    auto weak_this = weak_factory_.GetWeakPtr();
+    consumer_endpoint_->Flush(0, [weak_this](bool flush_success) {
+      if (!weak_this)
+        return;
       if (!flush_success)
         PERFETTO_ELOG("Final flush unsuccessful.");
-      consumer_endpoint_->DisableTracing();
+      weak_this->consumer_endpoint_->DisableTracing();
     });
   }
 }
diff --git a/src/perfetto_cmd/perfetto_cmd.h b/src/perfetto_cmd/perfetto_cmd.h
index b64159e..842ddcc 100644
--- a/src/perfetto_cmd/perfetto_cmd.h
+++ b/src/perfetto_cmd/perfetto_cmd.h
@@ -29,6 +29,7 @@
 #include "perfetto/ext/base/pipe.h"
 #include "perfetto/ext/base/scoped_file.h"
 #include "perfetto/ext/base/unix_task_runner.h"
+#include "perfetto/ext/base/weak_ptr.h"
 #include "perfetto/ext/tracing/core/consumer.h"
 #include "perfetto/ext/tracing/ipc/consumer_ipc_client.h"
 #include "src/android_stats/perfetto_atoms.h"
@@ -162,6 +163,7 @@
   // How long we expect to trace for or 0 if the trace is indefinite.
   uint32_t expected_duration_ms_ = 0;
   bool trace_data_timeout_armed_ = false;
+  base::WeakPtrFactory<PerfettoCmd> weak_factory_{this};
 };
 
 }  // namespace perfetto
diff --git a/src/shared_lib/BUILD.gn b/src/shared_lib/BUILD.gn
index 62bd06c..057b036 100644
--- a/src/shared_lib/BUILD.gn
+++ b/src/shared_lib/BUILD.gn
@@ -12,17 +12,21 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import("../../gn/perfetto_component.gni")
-
-perfetto_component("protozero") {
+shared_library("libperfetto_c") {
   deps = [
     "../../gn:default_deps",
-    "../../include/perfetto/base",
     "../../include/perfetto/protozero",
     "../../include/perfetto/public:protozero",
+    "../tracing:client_api",
+    "../tracing:platform_impl",
   ]
   sources = [
+    "data_source.cc",
+    "heap_buffer.cc",
+    "producer.cc",
     "stream_writer.cc",
     "stream_writer.h",
+    "tracing_session.cc",
   ]
+  defines = [ "PERFETTO_SHLIB_SDK_IMPLEMENTATION" ]
 }
diff --git a/src/shared_lib/data_source.cc b/src/shared_lib/data_source.cc
new file mode 100644
index 0000000..dfab70a
--- /dev/null
+++ b/src/shared_lib/data_source.cc
@@ -0,0 +1,439 @@
+/*
+ * Copyright (C) 2022 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "perfetto/public/abi/data_source_abi.h"
+
+#include <bitset>
+
+#include "perfetto/tracing/data_source.h"
+#include "perfetto/tracing/internal/basic_types.h"
+#include "protos/perfetto/common/data_source_descriptor.gen.h"
+#include "protos/perfetto/config/data_source_config.gen.h"
+#include "src/shared_lib/stream_writer.h"
+
+namespace {
+
+using ::perfetto::internal::DataSourceInstanceThreadLocalState;
+using ::perfetto::internal::DataSourceThreadLocalState;
+using ::perfetto::internal::DataSourceType;
+
+thread_local DataSourceThreadLocalState*
+    g_tls_cache[perfetto::internal::kMaxDataSources];
+
+}  // namespace
+
+// Implementation of a shared library data source type (there's one of these per
+// type, not per instance).
+//
+// Returned to the C side when invoking PerfettoDsCreateImpl(). The C side only
+// has an opaque pointer to this.
+struct PerfettoDsImpl {
+  // Instance lifecycle callbacks.
+  PerfettoDsOnSetupCb on_setup_cb = nullptr;
+  PerfettoDsOnStartCb on_start_cb = nullptr;
+  PerfettoDsOnStopCb on_stop_cb = nullptr;
+
+  // These are called to create/delete custom thread-local instance state.
+  PerfettoDsOnCreateCustomState on_create_tls_cb = nullptr;
+  PerfettoDsOnDeleteCustomState on_delete_tls_cb = nullptr;
+
+  // These are called to create/delete custom thread-local instance incremental
+  // state.
+  PerfettoDsOnCreateCustomState on_create_incr_cb = nullptr;
+  PerfettoDsOnDeleteCustomState on_delete_incr_cb = nullptr;
+
+  // Passed to all the callbacks as the `user_arg` param.
+  void* cb_user_arg;
+
+  DataSourceType cpp_type;
+  std::atomic<bool> enabled{false};
+  std::mutex mu;
+  // Guarded by mu
+  std::bitset<perfetto::internal::kMaxDataSourceInstances> enabled_instances;
+
+  bool IsRegistered() {
+    return cpp_type.static_state()->index !=
+           perfetto::internal::kMaxDataSources;
+  }
+};
+
+namespace {
+
+// Represents a global data source instance (there can be more than one of these
+// for a single data source type).
+class ShlibDataSource : public perfetto::DataSourceBase {
+ public:
+  explicit ShlibDataSource(PerfettoDsImpl* type) : type_(*type) {}
+
+  void OnSetup(const SetupArgs& args) override {
+    if (type_.on_setup_cb) {
+      std::vector<uint8_t> serialized_config = args.config->SerializeAsArray();
+      inst_ctx_ = type_.on_setup_cb(
+          args.internal_instance_index, serialized_config.data(),
+          serialized_config.size(), type_.cb_user_arg);
+    }
+    std::lock_guard<std::mutex> lock(type_.mu);
+    const bool was_enabled = type_.enabled_instances.any();
+    type_.enabled_instances.set(args.internal_instance_index);
+    if (!was_enabled && type_.enabled_instances.any()) {
+      type_.enabled.store(true, std::memory_order_release);
+    }
+  }
+
+  void OnStart(const StartArgs& args) override {
+    if (type_.on_start_cb) {
+      type_.on_start_cb(args.internal_instance_index, type_.cb_user_arg,
+                        inst_ctx_);
+    }
+  }
+
+  void OnStop(const StopArgs& args) override {
+    if (type_.on_stop_cb) {
+      type_.on_stop_cb(
+          args.internal_instance_index, type_.cb_user_arg, inst_ctx_,
+          const_cast<PerfettoDsOnStopArgs*>(
+              reinterpret_cast<const PerfettoDsOnStopArgs*>(&args)));
+    }
+
+    std::lock_guard<std::mutex> lock(type_.mu);
+    type_.enabled_instances.reset(args.internal_instance_index);
+    if (type_.enabled_instances.none()) {
+      type_.enabled.store(false, std::memory_order_release);
+    }
+  }
+
+  const PerfettoDsImpl& type() const { return type_; }
+
+  void* inst_ctx() const { return inst_ctx_; }
+
+ private:
+  PerfettoDsImpl& type_;
+  void* inst_ctx_ = nullptr;
+};
+
+struct DataSourceTraits {
+  static DataSourceThreadLocalState* GetDataSourceTLS(
+      perfetto::internal::DataSourceStaticState* static_state,
+      perfetto::internal::TracingTLS* root_tls) {
+    auto* ds_tls = &root_tls->data_sources_tls[static_state->index];
+    // The per-type TLS is either zero-initialized or must have been
+    // initialized for this specific data source type.
+    PERFETTO_DCHECK(!ds_tls->static_state ||
+                    ds_tls->static_state->index == static_state->index);
+    return ds_tls;
+  }
+};
+
+struct TracePointTraits {
+  using TracePointData = DataSourceType*;
+  static std::atomic<uint32_t>* GetActiveInstances(TracePointData s) {
+    return s->valid_instances();
+  }
+};
+
+DataSourceInstanceThreadLocalState::ObjectWithDeleter CreateShlibTls(
+    DataSourceInstanceThreadLocalState* tls_inst,
+    uint32_t inst_idx,
+    void* ctx) {
+  auto* ds_impl = reinterpret_cast<PerfettoDsImpl*>(ctx);
+
+  void* custom_state = ds_impl->on_create_tls_cb(
+      inst_idx, reinterpret_cast<PerfettoDsTracerImpl*>(tls_inst),
+      ds_impl->cb_user_arg);
+  return DataSourceInstanceThreadLocalState::ObjectWithDeleter(
+      custom_state, ds_impl->on_delete_tls_cb);
+}
+
+DataSourceInstanceThreadLocalState::ObjectWithDeleter
+CreateShlibIncrementalState(DataSourceInstanceThreadLocalState* tls_inst,
+                            uint32_t inst_idx,
+                            void* ctx) {
+  auto* ds_impl = reinterpret_cast<PerfettoDsImpl*>(ctx);
+
+  void* custom_state = ds_impl->on_create_incr_cb(
+      inst_idx, reinterpret_cast<PerfettoDsTracerImpl*>(tls_inst),
+      ds_impl->cb_user_arg);
+  return DataSourceInstanceThreadLocalState::ObjectWithDeleter(
+      custom_state, ds_impl->on_delete_incr_cb);
+}
+
+}  // namespace
+
+// Exposed through data_source_abi.h
+std::atomic<bool> perfetto_atomic_false{false};
+
+struct PerfettoDsImpl* PerfettoDsImplCreate() {
+  return new PerfettoDsImpl();
+}
+
+void PerfettoDsSetOnSetupCallback(struct PerfettoDsImpl* ds_impl,
+                                  PerfettoDsOnSetupCb cb) {
+  PERFETTO_CHECK(!ds_impl->IsRegistered());
+  ds_impl->on_setup_cb = cb;
+}
+
+void PerfettoDsSetOnStartCallback(struct PerfettoDsImpl* ds_impl,
+                                  PerfettoDsOnStartCb cb) {
+  PERFETTO_CHECK(!ds_impl->IsRegistered());
+  ds_impl->on_start_cb = cb;
+}
+
+void PerfettoDsSetOnStopCallback(struct PerfettoDsImpl* ds_impl,
+                                 PerfettoDsOnStopCb cb) {
+  PERFETTO_CHECK(!ds_impl->IsRegistered());
+  ds_impl->on_stop_cb = cb;
+}
+
+void PerfettoDsSetOnCreateTls(struct PerfettoDsImpl* ds_impl,
+                              PerfettoDsOnCreateCustomState cb) {
+  PERFETTO_CHECK(!ds_impl->IsRegistered());
+  ds_impl->on_create_tls_cb = cb;
+}
+
+void PerfettoDsSetOnDeleteTls(struct PerfettoDsImpl* ds_impl,
+                              PerfettoDsOnDeleteCustomState cb) {
+  PERFETTO_CHECK(!ds_impl->IsRegistered());
+  ds_impl->on_delete_tls_cb = cb;
+}
+
+void PerfettoDsSetOnCreateIncr(struct PerfettoDsImpl* ds_impl,
+                               PerfettoDsOnCreateCustomState cb) {
+  PERFETTO_CHECK(!ds_impl->IsRegistered());
+  ds_impl->on_create_incr_cb = cb;
+}
+
+void PerfettoDsSetOnDeleteIncr(struct PerfettoDsImpl* ds_impl,
+                               PerfettoDsOnDeleteCustomState cb) {
+  PERFETTO_CHECK(!ds_impl->IsRegistered());
+  ds_impl->on_delete_incr_cb = cb;
+}
+
+void PerfettoDsSetCbUserArg(struct PerfettoDsImpl* ds_impl, void* user_arg) {
+  PERFETTO_CHECK(!ds_impl->IsRegistered());
+  ds_impl->cb_user_arg = user_arg;
+}
+
+bool PerfettoDsImplRegister(struct PerfettoDsImpl* ds_impl,
+                            PERFETTO_ATOMIC(bool) * *enabled_ptr,
+                            const void* descriptor,
+                            size_t descriptor_size) {
+  perfetto::DataSourceDescriptor dsd;
+  dsd.ParseFromArray(descriptor, descriptor_size);
+
+  std::unique_ptr<PerfettoDsImpl> data_source_type(ds_impl);
+
+  auto factory = [ds_impl]() {
+    return std::unique_ptr<perfetto::DataSourceBase>(
+        new ShlibDataSource(ds_impl));
+  };
+
+  DataSourceType::CreateCustomTlsFn create_custom_tls_fn = nullptr;
+  DataSourceType::CreateIncrementalStateFn create_incremental_state_fn =
+      nullptr;
+  void* cb_ctx = nullptr;
+  if (data_source_type->on_create_incr_cb &&
+      data_source_type->on_delete_incr_cb) {
+    create_incremental_state_fn = CreateShlibIncrementalState;
+    cb_ctx = data_source_type.get();
+  }
+  if (data_source_type->on_create_tls_cb &&
+      data_source_type->on_delete_tls_cb) {
+    create_custom_tls_fn = CreateShlibTls;
+    cb_ctx = data_source_type.get();
+  }
+
+  perfetto::internal::DataSourceParams params;
+  params.supports_multiple_instances = true;
+  params.requires_callbacks_under_lock = false;
+  bool success = data_source_type->cpp_type.Register(
+      dsd, factory, params, perfetto::BufferExhaustedPolicy::kDrop,
+      create_custom_tls_fn, create_incremental_state_fn, cb_ctx);
+  if (!success) {
+    return false;
+  }
+  *enabled_ptr = &data_source_type->enabled;
+  perfetto::base::ignore_result(data_source_type.release());
+  return true;
+}
+
+void PerfettoDsImplUpdateDescriptor(struct PerfettoDsImpl* ds_impl,
+                                    const void* descriptor,
+                                    size_t descriptor_size) {
+  perfetto::DataSourceDescriptor dsd;
+  dsd.ParseFromArray(descriptor, descriptor_size);
+
+  ds_impl->cpp_type.UpdateDescriptor(dsd);
+}
+
+PerfettoDsAsyncStopper* PerfettoDsOnStopArgsPostpone(
+    PerfettoDsOnStopArgs* args) {
+  auto* cb = new std::function<void()>();
+  *cb = reinterpret_cast<const ShlibDataSource::StopArgs*>(args)
+            ->HandleStopAsynchronously();
+  return reinterpret_cast<PerfettoDsAsyncStopper*>(cb);
+}
+
+void PerfettoDsStopDone(PerfettoDsAsyncStopper* stopper) {
+  auto* cb = reinterpret_cast<std::function<void()>*>(stopper);
+  (*cb)();
+  delete cb;
+}
+
+void* PerfettoDsImplGetInstanceLocked(struct PerfettoDsImpl* ds_impl,
+                                      PerfettoDsInstanceIndex idx) {
+  auto* internal_state = ds_impl->cpp_type.static_state()->TryGet(idx);
+  if (!internal_state) {
+    return nullptr;
+  }
+  std::unique_lock<std::recursive_mutex> lock(internal_state->lock);
+  auto* data_source =
+      static_cast<ShlibDataSource*>(internal_state->data_source.get());
+  if (&data_source->type() != ds_impl) {
+    // The data source instance has been destroyed and recreated as a different
+    // type while we where tracing.
+    return nullptr;
+  }
+  void* inst_ctx = data_source->inst_ctx();
+  if (inst_ctx != nullptr) {
+    lock.release();
+  }
+  return inst_ctx;
+}
+
+void PerfettoDsImplReleaseInstanceLocked(struct PerfettoDsImpl* ds_impl,
+                                         PerfettoDsInstanceIndex idx) {
+  auto* internal_state = ds_impl->cpp_type.static_state()->TryGet(idx);
+  PERFETTO_CHECK(internal_state);
+  internal_state->lock.unlock();
+}
+
+void* PerfettoDsImplGetCustomTls(struct PerfettoDsImpl*,
+                                 struct PerfettoDsTracerImpl* tracer,
+                                 PerfettoDsInstanceIndex) {
+  auto* tls_inst =
+      reinterpret_cast<DataSourceInstanceThreadLocalState*>(tracer);
+
+  PERFETTO_DCHECK(tls_inst->data_source_custom_tls);
+  return tls_inst->data_source_custom_tls.get();
+}
+
+void* PerfettoDsImplGetIncrementalState(struct PerfettoDsImpl* ds_impl,
+                                        struct PerfettoDsTracerImpl* tracer,
+                                        PerfettoDsInstanceIndex idx) {
+  auto* tls_inst =
+      reinterpret_cast<DataSourceInstanceThreadLocalState*>(tracer);
+
+  return ds_impl->cpp_type.GetIncrementalState(tls_inst, idx);
+}
+
+struct PerfettoDsImplTracerIterator PerfettoDsImplTraceIterateBegin(
+    struct PerfettoDsImpl* ds_impl) {
+  DataSourceThreadLocalState** tls =
+      &g_tls_cache[ds_impl->cpp_type.static_state()->index];
+
+  struct PerfettoDsImplTracerIterator ret = {0, nullptr, nullptr};
+  uint32_t cached_instances =
+      ds_impl->cpp_type.valid_instances()->load(std::memory_order_relaxed);
+  if (!cached_instances) {
+    return ret;
+  }
+  bool res =
+      ds_impl->cpp_type.TracePrologue<DataSourceTraits, TracePointTraits>(
+          tls, &cached_instances, &ds_impl->cpp_type);
+  if (!res) {
+    return ret;
+  }
+  DataSourceType::InstancesIterator it =
+      ds_impl->cpp_type.BeginIteration<TracePointTraits>(cached_instances, *tls,
+                                                         &ds_impl->cpp_type);
+  ret.inst_id = it.i;
+  (*tls)->root_tls->cached_instances = it.cached_instances;
+  ret.tracer = reinterpret_cast<struct PerfettoDsTracerImpl*>(it.instance);
+  if (!ret.tracer) {
+    ds_impl->cpp_type.TraceEpilogue(*tls);
+  }
+
+  ret.tls = reinterpret_cast<struct PerfettoDsTlsImpl*>(*tls);
+  return ret;
+}
+
+void PerfettoDsImplTraceIterateNext(
+    struct PerfettoDsImpl* ds_impl,
+    struct PerfettoDsImplTracerIterator* iterator) {
+  auto* tls = reinterpret_cast<DataSourceThreadLocalState*>(iterator->tls);
+
+  DataSourceType::InstancesIterator it;
+  it.i = iterator->inst_id;
+  it.cached_instances = tls->root_tls->cached_instances;
+  it.instance =
+      reinterpret_cast<DataSourceInstanceThreadLocalState*>(iterator->tracer);
+
+  ds_impl->cpp_type.NextIteration<TracePointTraits>(&it, tls,
+                                                    &ds_impl->cpp_type);
+
+  iterator->inst_id = it.i;
+  tls->root_tls->cached_instances = it.cached_instances;
+  iterator->tracer =
+      reinterpret_cast<struct PerfettoDsTracerImpl*>(it.instance);
+
+  if (!iterator->tracer) {
+    ds_impl->cpp_type.TraceEpilogue(tls);
+  }
+}
+
+void PerfettoDsImplTraceIterateBreak(
+    struct PerfettoDsImpl* ds_impl,
+    struct PerfettoDsImplTracerIterator* iterator) {
+  auto* tls = reinterpret_cast<DataSourceThreadLocalState*>(iterator->tls);
+
+  ds_impl->cpp_type.TraceEpilogue(tls);
+}
+
+struct PerfettoStreamWriter PerfettoDsTracerImplPacketBegin(
+    struct PerfettoDsTracerImpl* tracer) {
+  auto* tls_inst =
+      reinterpret_cast<DataSourceInstanceThreadLocalState*>(tracer);
+
+  auto message_handle = tls_inst->trace_writer->NewTracePacket();
+  struct PerfettoStreamWriter ret;
+  protozero::ScatteredStreamWriter* sw = message_handle.TakeStreamWriter();
+  ret.impl = reinterpret_cast<PerfettoStreamWriterImpl*>(sw);
+  perfetto::UpdateStreamWriter(*sw, &ret);
+  return ret;
+}
+
+void PerfettoDsTracerImplPacketEnd(struct PerfettoDsTracerImpl* tracer,
+                                   struct PerfettoStreamWriter* w) {
+  auto* tls_inst =
+      reinterpret_cast<DataSourceInstanceThreadLocalState*>(tracer);
+  auto* sw = reinterpret_cast<protozero::ScatteredStreamWriter*>(w->impl);
+
+  sw->set_write_ptr(w->write_ptr);
+  tls_inst->trace_writer->FinishTracePacket();
+}
+
+void PerfettoDsTracerImplFlush(struct PerfettoDsTracerImpl* tracer,
+                               PerfettoDsTracerOnFlushCb cb,
+                               void* user_arg) {
+  auto* tls_inst =
+      reinterpret_cast<DataSourceInstanceThreadLocalState*>(tracer);
+
+  std::function<void()> fn;
+  if (cb != nullptr) {
+    fn = [user_arg, cb]() { cb(user_arg); };
+  }
+  tls_inst->trace_writer->Flush(fn);
+}
diff --git a/src/shared_lib/heap_buffer.cc b/src/shared_lib/heap_buffer.cc
new file mode 100644
index 0000000..358dd9f
--- /dev/null
+++ b/src/shared_lib/heap_buffer.cc
@@ -0,0 +1,60 @@
+/*
+ * Copyright (C) 2022 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "perfetto/public/abi/heap_buffer.h"
+
+#include "perfetto/protozero/scattered_heap_buffer.h"
+#include "src/shared_lib/stream_writer.h"
+
+struct PerfettoHeapBuffer* PerfettoHeapBufferCreate(
+    struct PerfettoStreamWriter* w) {
+  auto* shb = new protozero::ScatteredHeapBuffer(4096, 4096);
+  auto* sw = new protozero::ScatteredStreamWriter(shb);
+  shb->set_writer(sw);
+
+  w->impl = reinterpret_cast<PerfettoStreamWriterImpl*>(sw);
+  perfetto::UpdateStreamWriter(*sw, w);
+  return reinterpret_cast<PerfettoHeapBuffer*>(shb);
+}
+
+void PerfettoHeapBufferCopyInto(struct PerfettoHeapBuffer* buf,
+                                struct PerfettoStreamWriter* w,
+                                void* dst,
+                                size_t size) {
+  auto* shb = reinterpret_cast<protozero::ScatteredHeapBuffer*>(buf);
+  auto* sw = reinterpret_cast<protozero::ScatteredStreamWriter*>(w->impl);
+  sw->set_write_ptr(w->write_ptr);
+
+  uint8_t* dst_ptr = reinterpret_cast<uint8_t*>(dst);
+  for (const protozero::ScatteredHeapBuffer::Slice& slice : shb->GetSlices()) {
+    if (size == 0) {
+      break;
+    }
+    protozero::ContiguousMemoryRange used_range = slice.GetUsedRange();
+    size_t to_copy = std::min(size, used_range.size());
+    memcpy(dst_ptr, used_range.begin, to_copy);
+    dst_ptr += to_copy;
+    size -= to_copy;
+  }
+}
+
+void PerfettoHeapBufferDestroy(struct PerfettoHeapBuffer* buf,
+                               struct PerfettoStreamWriter* w) {
+  auto* shb = reinterpret_cast<protozero::ScatteredHeapBuffer*>(buf);
+  auto* sw = reinterpret_cast<protozero::ScatteredStreamWriter*>(w->impl);
+  delete sw;
+  delete shb;
+}
diff --git a/src/shared_lib/producer.cc b/src/shared_lib/producer.cc
new file mode 100644
index 0000000..04e001e
--- /dev/null
+++ b/src/shared_lib/producer.cc
@@ -0,0 +1,41 @@
+/*
+ * Copyright (C) 2022 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "perfetto/public/abi/producer.h"
+
+#include <atomic>
+#include <bitset>
+
+#include "perfetto/tracing/backend_type.h"
+#include "perfetto/tracing/tracing.h"
+
+void PerfettoProducerInProcessInit() {
+  perfetto::TracingInitArgs args;
+  args.backends = perfetto::kInProcessBackend;
+  perfetto::Tracing::Initialize(args);
+}
+
+void PerfettoProducerSystemInit() {
+  perfetto::TracingInitArgs args;
+  args.backends = perfetto::kSystemBackend;
+  perfetto::Tracing::Initialize(args);
+}
+
+void PerfettoProducerInProcessAndSystemInit() {
+  perfetto::TracingInitArgs args;
+  args.backends = perfetto::kInProcessBackend | perfetto::kSystemBackend;
+  perfetto::Tracing::Initialize(args);
+}
diff --git a/src/shared_lib/stream_writer.h b/src/shared_lib/stream_writer.h
index 19a0f37..8a08075 100644
--- a/src/shared_lib/stream_writer.h
+++ b/src/shared_lib/stream_writer.h
@@ -29,7 +29,7 @@
   w->begin = sw.cur_range().begin;
   w->end = sw.cur_range().end;
   w->write_ptr = sw.write_ptr();
-  w->written_previously = sw.written_previously();
+  w->written_previously = static_cast<size_t>(sw.written_previously());
 }
 
 }  // namespace perfetto
diff --git a/src/shared_lib/tracing_session.cc b/src/shared_lib/tracing_session.cc
new file mode 100644
index 0000000..313a88d
--- /dev/null
+++ b/src/shared_lib/tracing_session.cc
@@ -0,0 +1,105 @@
+/*
+ * Copyright (C) 2022 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "perfetto/public/abi/tracing_session_abi.h"
+
+#include <condition_variable>
+#include <mutex>
+
+#include "perfetto/tracing/backend_type.h"
+#include "perfetto/tracing/tracing.h"
+#include "protos/perfetto/config/trace_config.gen.h"
+
+struct PerfettoTracingSessionImpl* PerfettoTracingSessionCreate(
+    PerfettoBackendTypes backend) {
+  uint32_t backend_type = 0;
+  if (backend & PERFETTO_BACKEND_IN_PROCESS) {
+    backend_type |= perfetto::kInProcessBackend;
+  }
+  if (backend & PERFETTO_BACKEND_SYSTEM) {
+    backend_type |= perfetto::kSystemBackend;
+  }
+  std::unique_ptr<perfetto::TracingSession> tracing_session =
+      perfetto::Tracing::NewTrace(
+          static_cast<perfetto::BackendType>(backend_type));
+  return reinterpret_cast<struct PerfettoTracingSessionImpl*>(
+      tracing_session.release());
+}
+
+void PerfettoTracingSessionSetup(struct PerfettoTracingSessionImpl* session,
+                                 void* cfg_begin,
+                                 size_t cfg_len) {
+  auto* ts = reinterpret_cast<perfetto::TracingSession*>(session);
+  perfetto::TraceConfig cfg;
+  cfg.ParseFromArray(cfg_begin, cfg_len);
+  ts->Setup(cfg);
+}
+
+void PerfettoTracingSessionStartAsync(
+    struct PerfettoTracingSessionImpl* session) {
+  auto* ts = reinterpret_cast<perfetto::TracingSession*>(session);
+  ts->Start();
+}
+
+void PerfettoTracingSessionStartBlocking(
+    struct PerfettoTracingSessionImpl* session) {
+  auto* ts = reinterpret_cast<perfetto::TracingSession*>(session);
+  ts->StartBlocking();
+}
+
+void PerfettoTracingSessionStopAsync(
+    struct PerfettoTracingSessionImpl* session) {
+  auto* ts = reinterpret_cast<perfetto::TracingSession*>(session);
+  ts->Stop();
+}
+
+void PerfettoTracingSessionStopBlocking(
+    struct PerfettoTracingSessionImpl* session) {
+  auto* ts = reinterpret_cast<perfetto::TracingSession*>(session);
+  ts->StopBlocking();
+}
+
+void PerfettoTracingSessionReadTraceBlocking(
+    struct PerfettoTracingSessionImpl* session,
+    PerfettoTracingSessionReadCb callback,
+    void* user_arg) {
+  auto* ts = reinterpret_cast<perfetto::TracingSession*>(session);
+
+  std::mutex mutex;
+  std::condition_variable cv;
+
+  bool all_read = false;
+
+  ts->ReadTrace([&mutex, &all_read, &cv, session, callback, user_arg](
+                    perfetto::TracingSession::ReadTraceCallbackArgs args) {
+    callback(session, static_cast<const void*>(args.data), args.size,
+             args.has_more, user_arg);
+    std::unique_lock<std::mutex> lock(mutex);
+    all_read = !args.has_more;
+    if (all_read)
+      cv.notify_one();
+  });
+
+  {
+    std::unique_lock<std::mutex> lock(mutex);
+    cv.wait(lock, [&all_read] { return all_read; });
+  }
+}
+
+void PerfettoTracingSessionDestroy(struct PerfettoTracingSessionImpl* session) {
+  auto* ts = reinterpret_cast<perfetto::TracingSession*>(session);
+  delete ts;
+}
diff --git a/src/trace_processor/metrics/sql/android/android_startup.sql b/src/trace_processor/metrics/sql/android/android_startup.sql
index bd80e8a..1f75415 100644
--- a/src/trace_processor/metrics/sql/android/android_startup.sql
+++ b/src/trace_processor/metrics/sql/android/android_startup.sql
@@ -151,7 +151,7 @@
           MAIN_THREAD_TIME_FOR_LAUNCH_AND_STATE(launches.startup_id, 'Running'), 0
         ),
         'runnable_dur_ns', IFNULL(
-          MAIN_THREAD_TIME_FOR_LAUNCH_AND_STATE(launches.startup_id, 'R*'), 0
+          MAIN_THREAD_TIME_FOR_LAUNCH_IN_RUNNABLE_STATE(launches.startup_id), 0
         ),
         'uninterruptible_sleep_dur_ns', IFNULL(
           MAIN_THREAD_TIME_FOR_LAUNCH_AND_STATE(launches.startup_id, 'D*'), 0
@@ -320,7 +320,7 @@
         UNION ALL
         SELECT 'Main Thread - Time spent in Runnable state'
           AS slow_cause
-        WHERE MAIN_THREAD_TIME_FOR_LAUNCH_AND_STATE(launches.startup_id, 'R*') > 100e6
+        WHERE MAIN_THREAD_TIME_FOR_LAUNCH_IN_RUNNABLE_STATE(launches.startup_id) > 100e6
 
         UNION ALL
         SELECT 'Main Thread - Time spent in interruptible sleep state'
@@ -361,7 +361,7 @@
         SELECT 'Potential CPU contention with '
           || MOST_ACTIVE_PROCESS_FOR_LAUNCH(launches.startup_id)
           AS slow_cause
-        WHERE MAIN_THREAD_TIME_FOR_LAUNCH_AND_STATE(launches.startup_id, 'R*') > 100e6
+        WHERE MAIN_THREAD_TIME_FOR_LAUNCH_IN_RUNNABLE_STATE(launches.startup_id) > 100e6
           AND MOST_ACTIVE_PROCESS_FOR_LAUNCH(launches.startup_id) IS NOT NULL
 
         UNION ALL
diff --git a/src/trace_processor/metrics/sql/android/startup/thread_state_breakdown.sql b/src/trace_processor/metrics/sql/android/startup/thread_state_breakdown.sql
index b85d40b..b3939bb 100644
--- a/src/trace_processor/metrics/sql/android/startup/thread_state_breakdown.sql
+++ b/src/trace_processor/metrics/sql/android/startup/thread_state_breakdown.sql
@@ -66,6 +66,17 @@
   '
 );
 
+-- Given a launch id, returns the aggregate sum of time spent in runnable state
+-- by the main thread of the process being started up.
+SELECT CREATE_FUNCTION(
+  'MAIN_THREAD_TIME_FOR_LAUNCH_IN_RUNNABLE_STATE(startup_id INT)',
+  'INT',
+  '
+    SELECT IFNULL(MAIN_THREAD_TIME_FOR_LAUNCH_AND_STATE($startup_id, "R"), 0)
+      + IFNULL(MAIN_THREAD_TIME_FOR_LAUNCH_AND_STATE($startup_id, "R+"), 0);
+  '
+);
+
 -- Given a launch id, thread state  and io_wait value, returns the aggregate sum
 -- of time spent in that state by the main thread of the process being started up.
 SELECT CREATE_FUNCTION(
diff --git a/src/trace_processor/stdlib/android/binder.sql b/src/trace_processor/stdlib/android/binder.sql
index 7fb9611..f673d46 100644
--- a/src/trace_processor/stdlib/android/binder.sql
+++ b/src/trace_processor/stdlib/android/binder.sql
@@ -58,7 +58,21 @@
 -- @column server_dur dur of the server txn
 CREATE VIEW android_sync_binder_metrics_by_txn AS
 WITH
-  binder_txn AS (
+  -- Adding MATERIALIZED here matters in cases where there are few/no binder
+  -- transactions in the trace. Our cost estimation is not good enough to allow
+  -- the query planner to see through to this fact. Instead, our cost estimation
+  -- causes repeated queries on this table which is slow because it's an O(n)
+  -- query.
+  --
+  -- We should fix this by doing some (ideally all) of the following:
+  --  1) Add support for columnar tables in SQL which will allow for
+  --     "subsetting" the slice table to only contain binder transactions.
+  --  2) Make this query faster by adding improving string filtering.
+  --  3) Add caching so that even if these queries happen many times, they are
+  --     fast.
+  --  4) Improve cost estimation algorithm to allow the joins to happen the
+  --     right way around.
+  binder_txn AS MATERIALIZED (
     SELECT
       slice.id AS binder_txn_id,
       process.name AS process_name,
@@ -69,15 +83,11 @@
       slice.dur,
       thread.is_main_thread
     FROM slice
-    INNER JOIN thread_track
-      ON slice.track_id = thread_track.id
-    INNER JOIN thread
-      USING (utid)
-    INNER JOIN process
-      USING (upid)
-    LEFT JOIN slice non_existent ON non_existent.parent_id = slice.id
+    JOIN thread_track ON slice.track_id = thread_track.id
+    JOIN thread USING (utid)
+    JOIN process USING (upid)
     WHERE slice.name = 'binder transaction'
-    AND non_existent.depth IS NULL
+      AND NOT EXISTS(SELECT 1 FROM slice child WHERE child.parent_id = slice.id)
   ),
   binder_reply AS (
     SELECT
@@ -85,20 +95,20 @@
       binder_reply.ts AS server_ts,
       binder_reply.dur AS server_dur,
       binder_reply.id AS binder_reply_id,
-      IIF(aidl.name LIKE 'AIDL::%' AND aidl.depth = binder_reply.depth + 1, aidl.name, NULL) AS aidl_name,
       reply_thread.name AS server_thread,
       reply_process.name AS server_process,
       reply_thread.utid AS server_utid,
-      reply_process.upid AS server_upid
+      reply_process.upid AS server_upid,
+      aidl.name AS aidl_name
     FROM binder_txn
-    INNER JOIN flow binder_flow
-      ON binder_txn.binder_txn_id = binder_flow.slice_out
-    INNER JOIN slice binder_reply
-      ON binder_flow.slice_in = binder_reply.id
-    INNER JOIN thread_track reply_thread_track ON binder_reply.track_id = reply_thread_track.id
-    INNER JOIN thread reply_thread ON reply_thread.utid = reply_thread_track.utid
-    INNER JOIN process reply_process ON reply_process.upid = reply_thread.upid
-    LEFT JOIN slice aidl ON aidl.parent_id = binder_reply.id
+    JOIN flow binder_flow ON binder_txn.binder_txn_id = binder_flow.slice_out
+    JOIN slice binder_reply ON binder_flow.slice_in = binder_reply.id
+    JOIN thread_track reply_thread_track
+      ON binder_reply.track_id = reply_thread_track.id
+    JOIN thread reply_thread ON reply_thread.utid = reply_thread_track.utid
+    JOIN process reply_process ON reply_process.upid = reply_thread.upid
+    LEFT JOIN slice aidl
+      ON aidl.parent_id = binder_reply.id AND aidl.name LIKE 'AIDL::%'
   )
 SELECT
   MIN(aidl_name) AS aidl_name,
@@ -118,7 +128,7 @@
   server_ts,
   server_dur
 FROM binder_reply
-WHERE client_dur >= 0 AND server_dur >= 0 AND client_dur >= server_dur
+WHERE client_dur != -1 AND server_dur != -1 AND client_dur >= server_dur
 GROUP BY
   process_name,
   thread_name,
diff --git a/src/trace_processor/util/BUILD.gn b/src/trace_processor/util/BUILD.gn
index 98ff445..6613533 100644
--- a/src/trace_processor/util/BUILD.gn
+++ b/src/trace_processor/util/BUILD.gn
@@ -124,6 +124,7 @@
     "../../../gn:default_deps",
     "../../../protos/perfetto/common:zero",
     "../../../protos/perfetto/trace/interned_data:zero",
+    "../../../protos/perfetto/trace/profiling:zero",
     "../../../protos/perfetto/trace/track_event:zero",
     "../../../protos/perfetto/trace_processor:zero",
     "../../protozero",
@@ -243,10 +244,15 @@
     "../../../gn:gtest_and_gmock",
     "../../../protos/perfetto/common:zero",
     "../../../protos/perfetto/trace:non_minimal_zero",
+    "../../../protos/perfetto/trace/interned_data:zero",
+    "../../../protos/perfetto/trace/profiling:zero",
     "../../../protos/perfetto/trace/track_event:zero",
     "../../protozero",
     "../../protozero:testing_messages_zero",
     "../importers/proto:gen_cc_track_event_descriptor",
+    "../importers/proto:minimal",
+    "../storage",
+    "../types",
   ]
   if (enable_perfetto_zlib) {
     sources += [ "gzip_utils_unittest.cc" ]
diff --git a/src/trace_processor/util/bump_allocator.cc b/src/trace_processor/util/bump_allocator.cc
index 1c00a8c..4cbf928 100644
--- a/src/trace_processor/util/bump_allocator.cc
+++ b/src/trace_processor/util/bump_allocator.cc
@@ -98,11 +98,14 @@
   return to_erase_chunks;
 }
 
-uint32_t BumpAllocator::PastEndSerializedId() {
+BumpAllocator::AllocId BumpAllocator::PastTheEndId() {
   if (chunks_.empty()) {
-    return AllocId{erased_front_chunks_count_, 0}.Serialize();
+    return AllocId{erased_front_chunks_count_, 0};
   }
-  return AllocId{LastChunkIndex(), chunks_.back().bump_offset}.Serialize();
+  if (chunks_.back().bump_offset == kChunkSize) {
+    return AllocId{LastChunkIndex() + 1, 0};
+  }
+  return AllocId{LastChunkIndex(), chunks_.back().bump_offset};
 }
 
 base::Optional<BumpAllocator::AllocId> BumpAllocator::TryAllocInLastChunk(
diff --git a/src/trace_processor/util/bump_allocator.h b/src/trace_processor/util/bump_allocator.h
index cd5e593..41ab0b1 100644
--- a/src/trace_processor/util/bump_allocator.h
+++ b/src/trace_processor/util/bump_allocator.h
@@ -22,6 +22,7 @@
 #include <cstring>
 #include <limits>
 #include <memory>
+#include <tuple>
 #include "perfetto/ext/base/circular_queue.h"
 #include "perfetto/ext/base/optional.h"
 #include "perfetto/ext/base/utils.h"
@@ -80,17 +81,13 @@
     uint32_t chunk_index : kChunkIndexAllocIdBits;
     uint32_t chunk_offset : kChunkOffsetAllocIdBits;
 
-    uint32_t Serialize() const {
-      return static_cast<uint32_t>(chunk_index) << kChunkOffsetAllocIdBits |
-             chunk_offset;
+    // Comparision operators mainly for sorting.
+    bool operator<(const AllocId& other) const {
+      return std::tie(chunk_index, chunk_offset) <
+             std::tie(other.chunk_index, other.chunk_offset);
     }
-
-    static AllocId FromSerialized(uint32_t serialized) {
-      AllocId id;
-      id.chunk_index = serialized >> kChunkOffsetAllocIdBits;
-      id.chunk_offset = serialized;
-      return id;
-    }
+    bool operator>=(const AllocId& other) const { return !(*this < other); }
+    bool operator>(const AllocId& other) const { return other < *this; }
   };
   static_assert(sizeof(AllocId) == sizeof(uint32_t),
                 "AllocId should be 32-bit in size to allow serialization");
@@ -142,7 +139,7 @@
 
   // Returns a "past the end" serialized AllocId i.e. a serialized value
   // greater than all previously returned AllocIds.
-  uint32_t PastEndSerializedId();
+  AllocId PastTheEndId();
 
   // Returns the number of erased chunks from the start of this allocator.
   //
diff --git a/src/trace_processor/util/bump_allocator_unittest.cc b/src/trace_processor/util/bump_allocator_unittest.cc
index 4608be6..38490fd 100644
--- a/src/trace_processor/util/bump_allocator_unittest.cc
+++ b/src/trace_processor/util/bump_allocator_unittest.cc
@@ -70,26 +70,13 @@
   allocator_.EraseFrontFreeChunks();
 }
 
-TEST_F(BumpAllocatorUnittest, Serialize) {
-  BumpAllocator::AllocId id = allocator_.Alloc(8);
-  ASSERT_EQ(id.Serialize(), 0u);
-  ASSERT_EQ(allocator_.PastEndSerializedId(), 8u);
+TEST_F(BumpAllocatorUnittest, PastEndOnChunkBoundary) {
+  BumpAllocator::AllocId id = allocator_.Alloc(BumpAllocator::kChunkSize);
+  BumpAllocator::AllocId past_end = allocator_.PastTheEndId();
+  ASSERT_GT(past_end, id);
+  ASSERT_EQ(past_end.chunk_index, 1u);
+  ASSERT_EQ(past_end.chunk_offset, 0u);
   allocator_.Free(id);
-
-  id = allocator_.Alloc(8);
-  ASSERT_EQ(id.Serialize(), 8u);
-  allocator_.Free(id);
-
-  id = allocator_.Alloc(BumpAllocator::kChunkSize);
-  ASSERT_EQ(id.Serialize(), BumpAllocator::kChunkSize);
-  allocator_.Free(id);
-}
-
-TEST_F(BumpAllocatorUnittest, HighNumberSerialize) {
-  BumpAllocator::AllocId id = BumpAllocator::AllocId::FromSerialized(1138352);
-  ASSERT_EQ(id.chunk_index, 1138352 / BumpAllocator::kChunkSize);
-  ASSERT_EQ(id.chunk_offset, 1138352 % BumpAllocator::kChunkSize);
-  ASSERT_EQ(id.Serialize(), 1138352u);
 }
 
 TEST_F(BumpAllocatorUnittest, EraseFrontAccounting) {
@@ -105,8 +92,7 @@
   AllocateWriteReadAndFree(8);
   allocator_.EraseFrontFreeChunks();
 
-  auto past_id =
-      BumpAllocator::AllocId::FromSerialized(allocator_.PastEndSerializedId());
+  auto past_id = allocator_.PastTheEndId();
   ASSERT_EQ(past_id.chunk_index, 1u);
   ASSERT_EQ(past_id.chunk_offset, 0u);
 
diff --git a/src/trace_processor/util/debug_annotation_parser.cc b/src/trace_processor/util/debug_annotation_parser.cc
index 7b49d2d..0c2792e 100644
--- a/src/trace_processor/util/debug_annotation_parser.cc
+++ b/src/trace_processor/util/debug_annotation_parser.cc
@@ -15,10 +15,13 @@
  */
 
 #include "src/trace_processor/util/debug_annotation_parser.h"
+
 #include "perfetto/base/build_config.h"
-#include "protos/perfetto/trace/track_event/debug_annotation.pbzero.h"
 #include "src/trace_processor/util/interned_message_view.h"
 
+#include "protos/perfetto/trace/profiling/profile_common.pbzero.h"
+#include "protos/perfetto/trace/track_event/debug_annotation.pbzero.h"
+
 namespace perfetto {
 namespace trace_processor {
 namespace util {
@@ -81,6 +84,15 @@
     delegate.AddDouble(context_name, annotation.double_value());
   } else if (annotation.has_string_value()) {
     delegate.AddString(context_name, annotation.string_value());
+  } else if (annotation.has_string_value_iid()) {
+    auto* decoder = delegate.GetInternedMessage(
+        protos::pbzero::InternedData::kDebugAnnotationStringValues,
+        annotation.string_value_iid());
+    if (!decoder) {
+      return {base::ErrStatus("Debug annotation with invalid string_value_iid"),
+              false};
+    }
+    delegate.AddString(context_name, decoder->str().ToStdString());
   } else if (annotation.has_pointer_value()) {
     delegate.AddPointer(context_name, reinterpret_cast<const void*>(
                                           annotation.pointer_value()));
diff --git a/src/trace_processor/util/debug_annotation_parser_unittest.cc b/src/trace_processor/util/debug_annotation_parser_unittest.cc
index 7244d02..d9a4168 100644
--- a/src/trace_processor/util/debug_annotation_parser_unittest.cc
+++ b/src/trace_processor/util/debug_annotation_parser_unittest.cc
@@ -18,13 +18,19 @@
 
 #include "perfetto/ext/base/string_view.h"
 #include "perfetto/protozero/scattered_heap_buffer.h"
+#include "perfetto/trace_processor/trace_blob.h"
 #include "perfetto/trace_processor/trace_blob_view.h"
 #include "protos/perfetto/common/descriptor.pbzero.h"
+#include "protos/perfetto/trace/interned_data/interned_data.pbzero.h"
+#include "protos/perfetto/trace/profiling/profile_common.pbzero.h"
 #include "protos/perfetto/trace/test_event.pbzero.h"
 #include "protos/perfetto/trace/track_event/debug_annotation.pbzero.h"
 #include "protos/perfetto/trace/track_event/source_location.pbzero.h"
 #include "src/protozero/test/example_proto/test_messages.pbzero.h"
+#include "src/trace_processor/importers/proto/packet_sequence_state.h"
+#include "src/trace_processor/storage/trace_storage.h"
 #include "src/trace_processor/test_messages.descriptor.h"
+#include "src/trace_processor/types/trace_processor_context.h"
 #include "src/trace_processor/util/interned_message_view.h"
 #include "src/trace_processor/util/proto_to_args_parser.h"
 #include "test/gtest_and_gmock.h"
@@ -48,10 +54,14 @@
 class DebugAnnotationParserTest : public ::testing::Test,
                                   public ProtoToArgsParser::Delegate {
  protected:
-  DebugAnnotationParserTest() {}
+  DebugAnnotationParserTest() : sequence_state_(&context_) {
+    context_.storage.reset(new TraceStorage());
+  }
 
   const std::vector<std::string>& args() const { return args_; }
 
+  PacketSequenceState* mutable_seq_state() { return &sequence_state_; }
+
  private:
   using Key = ProtoToArgsParser::Key;
 
@@ -120,14 +130,25 @@
     return ++array_indices_[array_key];
   }
 
-  InternedMessageView* GetInternedMessageView(uint32_t, uint64_t) override {
-    return nullptr;
+  InternedMessageView* GetInternedMessageView(uint32_t field_id,
+                                              uint64_t iid) override {
+    if (field_id !=
+        protos::pbzero::InternedData::kDebugAnnotationStringValuesFieldNumber) {
+      return nullptr;
+    }
+    return sequence_state_.current_generation()->GetInternedMessageView(
+        field_id, iid);
   }
 
-  PacketSequenceStateGeneration* seq_state() final { return nullptr; }
+  PacketSequenceStateGeneration* seq_state() final {
+    return sequence_state_.current_generation().get();
+  }
 
   std::vector<std::string> args_;
   std::map<std::string, size_t> array_indices_;
+
+  TraceProcessorContext context_;
+  PacketSequenceState sequence_state_;
 };
 
 // This test checks that in when an array is nested inside a dict which is
@@ -273,6 +294,33 @@
                           "root.field_string root.field_string value"));
 }
 
+TEST_F(DebugAnnotationParserTest, InternedString) {
+  protozero::HeapBuffered<protos::pbzero::DebugAnnotation> msg;
+  msg->set_name("root");
+
+  protozero::HeapBuffered<protos::pbzero::InternedString> string;
+  string->set_iid(1);
+  string->set_str("foo");
+  std::vector<uint8_t> data_serialized = string.SerializeAsArray();
+
+  mutable_seq_state()->InternMessage(
+      protos::pbzero::InternedData::kDebugAnnotationStringValuesFieldNumber,
+      TraceBlobView(
+          TraceBlob::CopyFrom(data_serialized.data(), data_serialized.size())));
+
+  msg->set_string_value_iid(1);
+
+  DescriptorPool pool;
+  ProtoToArgsParser args_parser(pool);
+  DebugAnnotationParser parser(args_parser);
+
+  auto status = ParseDebugAnnotation(parser, msg, *this);
+  EXPECT_TRUE(status.ok()) << "DebugAnnotationParser::Parse failed with error: "
+                           << status.message();
+
+  EXPECT_THAT(args(), testing::ElementsAre("root root foo"));
+}
+
 }  // namespace
 }  // namespace util
 }  // namespace trace_processor
diff --git a/src/traced/probes/ftrace/cpu_reader.cc b/src/traced/probes/ftrace/cpu_reader.cc
index 9403af4..b4c1758 100644
--- a/src/traced/probes/ftrace/cpu_reader.cc
+++ b/src/traced/probes/ftrace/cpu_reader.cc
@@ -186,9 +186,10 @@
                              metatrace::FTRACE_CPU_READ_CYCLE);
 
   // Work in batches to keep cache locality, and limit memory usage.
-  size_t batch_pages = std::min(parsing_buf_size_pages, max_pages);
   size_t total_pages_read = 0;
   for (bool is_first_batch = true;; is_first_batch = false) {
+    size_t batch_pages =
+        std::min(parsing_buf_size_pages, max_pages - total_pages_read);
     size_t pages_read = ReadAndProcessBatch(
         parsing_buf, batch_pages, is_first_batch, started_data_sources);
 
diff --git a/src/traced/probes/power/android_power_data_source.cc b/src/traced/probes/power/android_power_data_source.cc
index 9d2d9a5..3f2866e 100644
--- a/src/traced/probes/power/android_power_data_source.cc
+++ b/src/traced/probes/power/android_power_data_source.cc
@@ -33,6 +33,7 @@
 #include "protos/perfetto/common/android_energy_consumer_descriptor.pbzero.h"
 #include "protos/perfetto/config/power/android_power_config.pbzero.h"
 #include "protos/perfetto/trace/power/android_energy_estimation_breakdown.pbzero.h"
+#include "protos/perfetto/trace/power/android_entity_state_residency.pbzero.h"
 #include "protos/perfetto/trace/power/battery_counters.pbzero.h"
 #include "protos/perfetto/trace/power/power_rails.pbzero.h"
 #include "protos/perfetto/trace/trace_packet.pbzero.h"
@@ -44,7 +45,7 @@
 constexpr uint32_t kDefaultPollIntervalMs = 1000;
 constexpr size_t kMaxNumRails = 32;
 constexpr size_t kMaxNumEnergyConsumer = 32;
-constexpr size_t kMaxNumPowerEntities = 256;
+constexpr size_t kMaxNumPowerEntities = 1024;
 }  // namespace
 
 // static
@@ -64,6 +65,10 @@
   PERFETTO_LAZY_LOAD(android_internal::GetEnergyConsumerInfo,
                      get_energy_consumer_info_);
   PERFETTO_LAZY_LOAD(android_internal::GetEnergyConsumed, get_energy_consumed_);
+  PERFETTO_LAZY_LOAD(android_internal::GetPowerEntityStates,
+                     get_power_entity_states_);
+  PERFETTO_LAZY_LOAD(android_internal::GetPowerEntityStateResidency,
+                     get_power_entity_state_residency_);
 
   base::Optional<int64_t> GetCounter(android_internal::BatteryCounter counter) {
     if (!get_battery_counter_)
@@ -132,6 +137,37 @@
     energy_breakdown.resize(num_power_entities);
     return energy_breakdown;
   }
+
+  std::vector<android_internal::PowerEntityState> GetPowerEntityStates() {
+    if (!get_power_entity_states_)
+      return std::vector<android_internal::PowerEntityState>();
+
+    std::vector<android_internal::PowerEntityState> entity(
+        kMaxNumPowerEntities);
+    size_t num_power_entities = entity.size();
+    if (!get_power_entity_states_(&entity[0], &num_power_entities)) {
+      PERFETTO_ELOG("Failed to retrieve power entities.");
+      num_power_entities = 0;
+    }
+    entity.resize(num_power_entities);
+    return entity;
+  }
+
+  std::vector<android_internal::PowerEntityStateResidency>
+  GetPowerEntityStateResidency() {
+    if (!get_power_entity_state_residency_)
+      return std::vector<android_internal::PowerEntityStateResidency>();
+
+    std::vector<android_internal::PowerEntityStateResidency> entity(
+        kMaxNumPowerEntities);
+    size_t num_power_entities = entity.size();
+    if (!get_power_entity_state_residency_(&entity[0], &num_power_entities)) {
+      PERFETTO_ELOG("Failed to retrieve power entities.");
+      num_power_entities = 0;
+    }
+    entity.resize(num_power_entities);
+    return entity;
+  }
 };
 
 AndroidPowerDataSource::AndroidPowerDataSource(
@@ -149,6 +185,8 @@
   rails_collection_enabled_ = pcfg.collect_power_rails();
   energy_breakdown_collection_enabled_ =
       pcfg.collect_energy_estimation_breakdown();
+  entity_state_residency_collection_enabled_ =
+      pcfg.collect_entity_state_residency();
 
   if (poll_interval_ms_ == 0)
     poll_interval_ms_ = kDefaultPollIntervalMs;
@@ -211,6 +249,7 @@
   WriteBatteryCounters();
   WritePowerRailsData();
   WriteEnergyEstimationBreakdown();
+  WriteEntityStateResidency();
 
   should_emit_descriptors_ = false;
 }
@@ -341,6 +380,43 @@
   }
 }
 
+void AndroidPowerDataSource::WriteEntityStateResidency() {
+  if (!entity_state_residency_collection_enabled_)
+    return;
+
+  auto packet = writer_->NewTracePacket();
+  packet->set_timestamp(static_cast<uint64_t>(base::GetBootTimeNs().count()));
+  packet->set_sequence_flags(
+      protos::pbzero::TracePacket::SEQ_NEEDS_INCREMENTAL_STATE);
+
+  auto* outer_proto = packet->set_entity_state_residency();
+  if (should_emit_descriptors_) {
+    auto entity_states = lib_->GetPowerEntityStates();
+    if (entity_states.empty()) {
+      // No entities to collect data for. Don't try again.
+      entity_state_residency_collection_enabled_ = false;
+      return;
+    }
+
+    for (const auto& entity_state : entity_states) {
+      auto* entity_state_proto = outer_proto->add_power_entity_state();
+      entity_state_proto->set_entity_index(entity_state.entity_id);
+      entity_state_proto->set_state_index(entity_state.state_id);
+      entity_state_proto->set_entity_name(entity_state.entity_name);
+      entity_state_proto->set_state_name(entity_state.state_name);
+    }
+  }
+
+  for (const auto& residency_data : lib_->GetPowerEntityStateResidency()) {
+    auto* data = outer_proto->add_residency();
+    data->set_entity_index(residency_data.entity_id);
+    data->set_state_index(residency_data.state_id);
+    data->set_total_time_in_state_ms(residency_data.total_time_in_state_ms);
+    data->set_total_state_entry_count(residency_data.total_state_entry_count);
+    data->set_last_entry_timestamp_ms(residency_data.last_entry_timestamp_ms);
+  }
+}
+
 void AndroidPowerDataSource::Flush(FlushRequestID,
                                    std::function<void()> callback) {
   writer_->Flush(callback);
diff --git a/src/traced/probes/power/android_power_data_source.h b/src/traced/probes/power/android_power_data_source.h
index 56b02f6..926fc21 100644
--- a/src/traced/probes/power/android_power_data_source.h
+++ b/src/traced/probes/power/android_power_data_source.h
@@ -57,6 +57,7 @@
   void WriteBatteryCounters();
   void WritePowerRailsData();
   void WriteEnergyEstimationBreakdown();
+  void WriteEntityStateResidency();
 
   // Battery counters.
   std::bitset<8> counters_enabled_;
@@ -67,6 +68,9 @@
   // Energy estimation.
   bool energy_breakdown_collection_enabled_ = false;
 
+  // Entity state residency
+  bool entity_state_residency_collection_enabled_ = false;
+
   uint32_t poll_interval_ms_ = 0;
   bool should_emit_descriptors_ = true;
 
diff --git a/src/traced/probes/probes_producer.cc b/src/traced/probes/probes_producer.cc
index 0d2573a..cc1e744 100644
--- a/src/traced/probes/probes_producer.cc
+++ b/src/traced/probes/probes_producer.cc
@@ -433,6 +433,10 @@
     // We need to ensure this timeout is worse than the worst case
     // time from us starting to traced managing to disable us.
     // See b/236814186#comment8 for context
+    // Note: when using prefer_suspend_clock_for_duration the actual duration
+    // might be < timeout measured in in wall time. But this is fine
+    // because the resulting timeout will be conservative (it will be accurate
+    // if the device never suspends, and will be more lax if it does).
     uint32_t timeout =
         2 * (kDefaultFlushTimeoutMs + config.trace_duration_ms() +
              config.stop_timeout_ms());
diff --git a/src/traced/service/builtin_producer.cc b/src/traced/service/builtin_producer.cc
index a2007be..0664b5d 100644
--- a/src/traced/service/builtin_producer.cc
+++ b/src/traced/service/builtin_producer.cc
@@ -46,9 +46,11 @@
 
 constexpr char kHeapprofdDataSourceName[] = "android.heapprofd";
 constexpr char kJavaHprofDataSourceName[] = "android.java_hprof";
+constexpr char kJavaHprofOomDataSourceName[] = "android.java_hprof.oom";
 constexpr char kTracedPerfDataSourceName[] = "linux.perf";
 constexpr char kLazyHeapprofdPropertyName[] = "traced.lazy.heapprofd";
 constexpr char kLazyTracedPerfPropertyName[] = "traced.lazy.traced_perf";
+constexpr char kJavaHprofOomActivePropertyName[] = "traced.oome_heap_session.count";
 
 }  // namespace
 
@@ -64,6 +66,8 @@
     SetAndroidProperty(kLazyHeapprofdPropertyName, "");
   if (!lazy_traced_perf_.instance_ids.empty())
     SetAndroidProperty(kLazyTracedPerfPropertyName, "");
+  if (!java_hprof_oome_instances_.empty())
+    SetAndroidProperty(kJavaHprofOomActivePropertyName, "");
 }
 
 void BuiltinProducer::ConnectInProcess(TracingService* svc) {
@@ -102,6 +106,11 @@
     lazy_traced_perf_dsd.set_name(kTracedPerfDataSourceName);
     endpoint_->RegisterDataSource(lazy_traced_perf_dsd);
   }
+  {
+    DataSourceDescriptor java_hprof_oome_dsd;
+    java_hprof_oome_dsd.set_name(kJavaHprofOomDataSourceName);
+    endpoint_->RegisterDataSource(java_hprof_oome_dsd);
+  }
 }
 
 void BuiltinProducer::SetupDataSource(DataSourceInstanceID ds_id,
@@ -120,6 +129,13 @@
     lazy_traced_perf_.instance_ids.emplace(ds_id);
     return;
   }
+
+  if (ds_config.name() == kJavaHprofOomDataSourceName) {
+    java_hprof_oome_instances_.emplace(ds_id);
+    SetAndroidProperty(kJavaHprofOomActivePropertyName,
+                       std::to_string(java_hprof_oome_instances_.size()));
+    return;
+  }
 }
 
 void BuiltinProducer::StartDataSource(DataSourceInstanceID ds_id,
@@ -152,6 +168,13 @@
 
   MaybeInitiateLazyStop(ds_id, &lazy_heapprofd_, kLazyHeapprofdPropertyName);
   MaybeInitiateLazyStop(ds_id, &lazy_traced_perf_, kLazyTracedPerfPropertyName);
+
+  auto oome_it = java_hprof_oome_instances_.find(ds_id);
+  if (oome_it != java_hprof_oome_instances_.end()) {
+    java_hprof_oome_instances_.erase(oome_it);
+    SetAndroidProperty(kJavaHprofOomActivePropertyName,
+                       std::to_string(java_hprof_oome_instances_.size()));
+  }
 }
 
 void BuiltinProducer::MaybeInitiateLazyStop(DataSourceInstanceID ds_id,
diff --git a/src/traced/service/builtin_producer.h b/src/traced/service/builtin_producer.h
index 9873458..7014d44 100644
--- a/src/traced/service/builtin_producer.h
+++ b/src/traced/service/builtin_producer.h
@@ -34,6 +34,7 @@
 // * perfetto metatrace
 // * lazy heapprofd daemon starter (android only)
 // * lazy traced_perf daemon starter (android only)
+// * java_hprof oom data source counter (android only)
 class BuiltinProducer : public Producer {
  public:
   BuiltinProducer(base::TaskRunner* task_runner, uint32_t lazy_stop_delay_ms);
@@ -83,6 +84,7 @@
   MetatraceState metatrace_;
   LazyAndroidDaemonState lazy_heapprofd_;
   LazyAndroidDaemonState lazy_traced_perf_;
+  std::set<DataSourceInstanceID> java_hprof_oome_instances_;
 
   base::WeakPtrFactory<BuiltinProducer> weak_factory_;  // Keep last.
 };
diff --git a/src/tracing/core/tracing_service_impl.cc b/src/tracing/core/tracing_service_impl.cc
index d304ada..6b587b9 100644
--- a/src/tracing/core/tracing_service_impl.cc
+++ b/src/tracing/core/tracing_service_impl.cc
@@ -282,10 +282,10 @@
     case TraceConfig::STATSD_LOGGING_DISABLED:
       return false;
     case TraceConfig::STATSD_LOGGING_UNSPECIFIED:
-      // For backward compatibility with older versions of perfetto_cmd.
-      return cfg.enable_extra_guardrails();
+      break;
   }
-  PERFETTO_FATAL("For GCC");
+  // For backward compatibility with older versions of perfetto_cmd.
+  return cfg.enable_extra_guardrails();
 }
 
 // Appends `data` (which has `size` bytes), to `*packet`. Splits the data in
@@ -836,10 +836,10 @@
 
   if (cfg.write_into_file()) {
     if (!fd ^ !cfg.output_path().empty()) {
-      tracing_sessions_.erase(tsid);
       MaybeLogUploadEvent(
           tracing_session->config, uuid,
           PerfettoStatsdAtom::kTracedEnableTracingInvalidFdOutputFile);
+      tracing_sessions_.erase(tsid);
       return PERFETTO_SVC_ERR(
           "When write_into_file==true either a FD needs to be passed or "
           "output_path must be populated (but not both)");
@@ -868,6 +868,7 @@
 
   // Initialize the log buffers.
   bool did_allocate_all_buffers = true;
+  bool invalid_buffer_config = false;
 
   // Allocate the trace buffers. Also create a map to translate a consumer
   // relative index (TraceConfig.DataSourceConfig.target_buffer) into the
@@ -884,14 +885,24 @@
       break;
     }
     tracing_session->buffers_index.push_back(global_id);
-    const size_t buf_size_bytes = buffer_cfg.size_kb() * 1024u;
-    total_buf_size_kb += buffer_cfg.size_kb();
+    // TraceBuffer size is limited to 32-bit.
+    const uint32_t buf_size_kb = buffer_cfg.size_kb();
+    const uint64_t buf_size_bytes = buf_size_kb * static_cast<uint64_t>(1024);
+    const size_t buf_size = static_cast<size_t>(buf_size_bytes);
+    if (buf_size_bytes == 0 ||
+        buf_size_bytes > std::numeric_limits<uint32_t>::max() ||
+        buf_size != buf_size_bytes) {
+      invalid_buffer_config = true;
+      did_allocate_all_buffers = false;
+      break;
+    }
+    total_buf_size_kb += buf_size_kb;
     TraceBuffer::OverwritePolicy policy =
         buffer_cfg.fill_policy() == TraceConfig::BufferConfig::DISCARD
             ? TraceBuffer::kDiscard
             : TraceBuffer::kOverwrite;
-    auto it_and_inserted = buffers_.emplace(
-        global_id, TraceBuffer::Create(buf_size_bytes, policy));
+    auto it_and_inserted =
+        buffers_.emplace(global_id, TraceBuffer::Create(buf_size, policy));
     PERFETTO_DCHECK(it_and_inserted.second);  // buffers_.count(global_id) == 0.
     std::unique_ptr<TraceBuffer>& trace_buffer = it_and_inserted.first->second;
     if (!trace_buffer) {
@@ -900,25 +911,29 @@
     }
   }
 
-  UpdateMemoryGuardrail();
-
   // This can happen if either:
   // - All the kMaxTraceBufferID slots are taken.
-  // - OOM, or, more relistically, we exhausted virtual memory.
+  // - OOM, or, more realistically, we exhausted virtual memory.
+  // - The buffer size in the config is invalid.
   // In any case, free all the previously allocated buffers and abort.
-  // TODO(fmayer): add a test to cover this case, this is quite subtle.
   if (!did_allocate_all_buffers) {
     for (BufferID global_id : tracing_session->buffers_index) {
       buffer_ids_.Free(global_id);
       buffers_.erase(global_id);
     }
-    tracing_sessions_.erase(tsid);
     MaybeLogUploadEvent(tracing_session->config, uuid,
                         PerfettoStatsdAtom::kTracedEnableTracingOom);
+    tracing_sessions_.erase(tsid);
+    if (invalid_buffer_config) {
+      return PERFETTO_SVC_ERR(
+          "Failed to allocate tracing buffers: Invalid buffer sizes");
+    }
     return PERFETTO_SVC_ERR(
         "Failed to allocate tracing buffers: OOM or too many buffers");
   }
 
+  UpdateMemoryGuardrail();
+
   consumer->tracing_session_id_ = tsid;
 
   // Setup the data sources on the producers without starting them.
@@ -972,9 +987,12 @@
   tracing_session->state = TracingSession::CONFIGURED;
   PERFETTO_LOG(
       "Configured tracing session %" PRIu64
-      ", #sources:%zu, duration:%d ms, #buffers:%d, total "
+      ", #sources:%zu, duration:%d ms%s, #buffers:%d, total "
       "buffer size:%zu KB, total sessions:%zu, uid:%d session name: \"%s\"",
       tsid, cfg.data_sources().size(), tracing_session->config.duration_ms(),
+      tracing_session->config.prefer_suspend_clock_for_duration()
+          ? " (suspend_clock)"
+          : "",
       cfg.buffers_size(), total_buf_size_kb, tracing_sessions_.size(),
       static_cast<unsigned int>(consumer->uid_),
       cfg.unique_session_name().c_str());
@@ -1174,28 +1192,19 @@
   // Trigger delayed task if the trace is time limited.
   const uint32_t trace_duration_ms = tracing_session->config.duration_ms();
   if (trace_duration_ms > 0) {
-    task_runner_->PostDelayedTask(
-        [weak_this, tsid] {
-          // Skip entirely the flush if the trace session doesn't exist anymore.
-          // This is to prevent misleading error messages to be logged.
-          if (!weak_this)
-            return;
-          auto* tracing_session_ptr = weak_this->GetTracingSession(tsid);
-          if (!tracing_session_ptr)
-            return;
-          // If this trace was using STOP_TRACING triggers and we've seen
-          // one, then the trigger overrides the normal timeout. In this
-          // case we just return and let the other task clean up this trace.
-          if (tracing_session_ptr->config.trigger_config().trigger_mode() ==
-                  TraceConfig::TriggerConfig::STOP_TRACING &&
-              !tracing_session_ptr->received_triggers.empty())
-            return;
-          // In all other cases (START_TRACING or no triggers) we flush
-          // after |trace_duration_ms| unconditionally.
-          weak_this->FlushAndDisableTracing(tsid);
-        },
-        trace_duration_ms);
-  }
+    auto stop_task =
+        std::bind(&TracingServiceImpl::StopOnDurationMsExpiry, weak_this, tsid);
+    if (tracing_session->config.prefer_suspend_clock_for_duration()) {
+      base::PeriodicTask::Args stop_args;
+      stop_args.use_suspend_aware_timer = true;
+      stop_args.period_ms = trace_duration_ms;
+      stop_args.one_shot = true;
+      stop_args.task = std::move(stop_task);
+      tracing_session->timed_stop_task.Start(stop_args);
+    } else {
+      task_runner_->PostDelayedTask(std::move(stop_task), trace_duration_ms);
+    }
+  }  // if (trace_duration_ms > 0).
 
   // Start the periodic drain tasks if we should to save the trace into a file.
   if (tracing_session->config.write_into_file()) {
@@ -1232,6 +1241,29 @@
   return base::OkStatus();
 }
 
+// static
+void TracingServiceImpl::StopOnDurationMsExpiry(
+    base::WeakPtr<TracingServiceImpl> weak_this,
+    TracingSessionID tsid) {
+  // Skip entirely the flush if the trace session doesn't exist anymore.
+  // This is to prevent misleading error messages to be logged.
+  if (!weak_this)
+    return;
+  auto* tracing_session_ptr = weak_this->GetTracingSession(tsid);
+  if (!tracing_session_ptr)
+    return;
+  // If this trace was using STOP_TRACING triggers and we've seen
+  // one, then the trigger overrides the normal timeout. In this
+  // case we just return and let the other task clean up this trace.
+  if (tracing_session_ptr->config.trigger_config().trigger_mode() ==
+          TraceConfig::TriggerConfig::STOP_TRACING &&
+      !tracing_session_ptr->received_triggers.empty())
+    return;
+  // In all other cases (START_TRACING or no triggers) we flush
+  // after |trace_duration_ms| unconditionally.
+  weak_this->FlushAndDisableTracing(tsid);
+}
+
 void TracingServiceImpl::StartDataSourceInstance(
     ProducerEndpointImpl* producer,
     TracingSession* tracing_session,
@@ -2704,6 +2736,16 @@
 
   DataSourceConfig& ds_config = ds_instance->config;
   ds_config.set_trace_duration_ms(tracing_session->config.duration_ms());
+
+  // Rationale for `if (prefer) set_prefer(true)`, rather than `set(prefer)`:
+  // ComputeStartupConfigHash() in tracing_muxer_impl.cc compares hashes of the
+  // DataSourceConfig and expects to know (and clear) the fields generated by
+  // the tracing service. Unconditionally adding a new field breaks backward
+  // compatibility of startup tracing with older SDKs, because the serialization
+  // also propagates unkonwn fields, breaking the hash matching check.
+  if (tracing_session->config.prefer_suspend_clock_for_duration())
+    ds_config.set_prefer_suspend_clock_for_duration(true);
+
   ds_config.set_stop_timeout_ms(tracing_session->data_source_stop_timeout_ms());
   ds_config.set_enable_extra_guardrails(
       tracing_session->config.enable_extra_guardrails());
@@ -4269,7 +4311,8 @@
       consumer_maybe_null(consumer),
       consumer_uid(consumer->uid_),
       config(new_config),
-      snapshot_periodic_task(task_runner) {
+      snapshot_periodic_task(task_runner),
+      timed_stop_task(task_runner) {
   // all_data_sources_flushed is special because we store up to 64 events of
   // this type. Other events will go through the default case in
   // SnapshotLifecycleEvent() where they will be given a max history of 1.
diff --git a/src/tracing/core/tracing_service_impl.h b/src/tracing/core/tracing_service_impl.h
index 77bdb6b..4800559 100644
--- a/src/tracing/core/tracing_service_impl.h
+++ b/src/tracing/core/tracing_service_impl.h
@@ -641,6 +641,11 @@
     // etc)
     base::PeriodicTask snapshot_periodic_task;
 
+    // Deferred task that stops the trace when |duration_ms| expires. This is
+    // to handle the case of |prefer_suspend_clock_for_duration| which cannot
+    // use PostDelayedTask.
+    base::PeriodicTask timed_stop_task;
+
     // When non-NULL the packets should be post-processed using the filter.
     std::unique_ptr<protozero::MessageFilter> trace_filter;
     uint64_t filter_input_packets = 0;
@@ -759,6 +764,8 @@
                             const std::string& trigger_name);
   size_t PurgeExpiredAndCountTriggerInWindow(int64_t now_ns,
                                              uint64_t trigger_name_hash);
+  static void StopOnDurationMsExpiry(base::WeakPtr<TracingServiceImpl>,
+                                     TracingSessionID);
 
   base::TaskRunner* const task_runner_;
   std::unique_ptr<SharedMemory::Factory> shm_factory_;
diff --git a/src/tracing/core/tracing_service_impl_unittest.cc b/src/tracing/core/tracing_service_impl_unittest.cc
index 3a12a75..8b8c62c 100644
--- a/src/tracing/core/tracing_service_impl_unittest.cc
+++ b/src/tracing/core/tracing_service_impl_unittest.cc
@@ -54,10 +54,12 @@
 using ::testing::AssertionResult;
 using ::testing::AssertionSuccess;
 using ::testing::Contains;
+using ::testing::DoAll;
 using ::testing::Each;
 using ::testing::ElementsAreArray;
 using ::testing::Eq;
 using ::testing::ExplainMatchResult;
+using ::testing::HasSubstr;
 using ::testing::InSequence;
 using ::testing::Invoke;
 using ::testing::InvokeWithoutArgs;
@@ -66,6 +68,7 @@
 using ::testing::Ne;
 using ::testing::Not;
 using ::testing::Property;
+using ::testing::SaveArg;
 using ::testing::StrictMock;
 using ::testing::StringMatchResultListener;
 using ::testing::StrNe;
@@ -4026,4 +4029,25 @@
               Each(Property(&protos::gen::TracePacket::has_timestamp, false)));
 }
 
+TEST_F(TracingServiceImplTest, InvalidBufferSizes) {
+  std::unique_ptr<MockConsumer> consumer = CreateMockConsumer();
+  consumer->Connect(svc.get());
+
+  TraceConfig trace_config;
+  trace_config.add_buffers()->set_size_kb(128);
+  trace_config.add_buffers()->set_size_kb(256);
+  trace_config.add_buffers()->set_size_kb(4 * 1024 * 1024);
+  auto* ds = trace_config.add_data_sources();
+  auto* ds_config = ds->mutable_config();
+  ds_config->set_name("data_source");
+  consumer->EnableTracing(trace_config);
+
+  std::string error;
+  auto checkpoint = task_runner.CreateCheckpoint("tracing_disabled");
+  EXPECT_CALL(*consumer, OnTracingDisabled(_))
+      .WillOnce(DoAll(SaveArg<0>(&error), checkpoint));
+  task_runner.RunUntilCheckpoint("tracing_disabled");
+  EXPECT_THAT(error, HasSubstr("Invalid buffer sizes"));
+}
+
 }  // namespace perfetto
diff --git a/src/tracing/data_source.cc b/src/tracing/data_source.cc
index 0f4a5d1..902c803 100644
--- a/src/tracing/data_source.cc
+++ b/src/tracing/data_source.cc
@@ -15,6 +15,7 @@
  */
 
 #include "perfetto/tracing/data_source.h"
+#include "perfetto/base/logging.h"
 
 namespace perfetto {
 
@@ -26,4 +27,36 @@
 void DataSourceBase::WillClearIncrementalState(
     const ClearIncrementalStateArgs&) {}
 
+namespace internal {
+
+void DataSourceType::PopulateTlsInst(
+    DataSourceInstanceThreadLocalState* tls_inst,
+    DataSourceState* instance_state,
+    uint32_t instance_index) {
+  auto* tracing_impl = TracingMuxer::Get();
+  tls_inst->muxer_id_for_testing = instance_state->muxer_id_for_testing;
+  tls_inst->backend_id = instance_state->backend_id;
+  tls_inst->backend_connection_id = instance_state->backend_connection_id;
+  tls_inst->buffer_id = instance_state->buffer_id;
+  tls_inst->startup_target_buffer_reservation =
+      instance_state->startup_target_buffer_reservation.load(
+          std::memory_order_relaxed);
+  tls_inst->data_source_instance_id = instance_state->data_source_instance_id;
+  tls_inst->is_intercepted = instance_state->interceptor_id != 0;
+  tls_inst->trace_writer = tracing_impl->CreateTraceWriter(
+      &state_, instance_index, instance_state, buffer_exhausted_policy_);
+  if (create_incremental_state_fn_) {
+    PERFETTO_DCHECK(!tls_inst->incremental_state);
+    CreateIncrementalState(tls_inst, instance_index);
+  }
+  if (create_custom_tls_fn_) {
+    tls_inst->data_source_custom_tls =
+        create_custom_tls_fn_(tls_inst, instance_index, user_arg_);
+  }
+  // Even in the case of out-of-IDs, SharedMemoryArbiterImpl returns a
+  // NullTraceWriter. The returned pointer should never be null.
+  PERFETTO_DCHECK(tls_inst->trace_writer);
+}
+
+}  // namespace internal
 }  // namespace perfetto
diff --git a/src/tracing/test/api_integrationtest.cc b/src/tracing/test/api_integrationtest.cc
index 583d281..34cd1dc 100644
--- a/src/tracing/test/api_integrationtest.cc
+++ b/src/tracing/test/api_integrationtest.cc
@@ -724,28 +724,28 @@
     perfetto::Tracing::ResetForTesting();
   }
 
-  template <typename DataSourceType>
+  template <typename DerivedDataSource>
   TestDataSourceHandle* RegisterDataSource(std::string name) {
     perfetto::DataSourceDescriptor dsd;
     dsd.set_name(name);
-    return RegisterDataSource<DataSourceType>(dsd);
+    return RegisterDataSource<DerivedDataSource>(dsd);
   }
 
-  template <typename DataSourceType>
+  template <typename DerivedDataSource>
   TestDataSourceHandle* RegisterDataSource(
       const perfetto::DataSourceDescriptor& dsd) {
     EXPECT_EQ(data_sources_.count(dsd.name()), 0u);
     TestDataSourceHandle* handle = &data_sources_[dsd.name()];
-    DataSourceType::Register(dsd);
+    DerivedDataSource::Register(dsd);
     return handle;
   }
 
-  template <typename DataSourceType>
+  template <typename DerivedDataSource>
   TestDataSourceHandle* UpdateDataSource(
       const perfetto::DataSourceDescriptor& dsd) {
     EXPECT_EQ(data_sources_.count(dsd.name()), 1u);
     TestDataSourceHandle* handle = &data_sources_[dsd.name()];
-    DataSourceType::UpdateDescriptor(dsd);
+    DerivedDataSource::UpdateDescriptor(dsd);
     return handle;
   }
 
@@ -5560,7 +5560,7 @@
   auto* ds_cfg = cfg.add_data_sources()->mutable_config();
   ds_cfg->set_name("my_data_source");
 
-  std::atomic<bool> quit = false;
+  std::atomic<bool> quit{false};
   WaitableTestEvent outside_tracing;
   WaitableTestEvent tracing;
   std::thread t([&] {
diff --git a/test/cts/heapprofd_java_test_cts.cc b/test/cts/heapprofd_java_test_cts.cc
index c2f3f77..ca60161 100644
--- a/test/cts/heapprofd_java_test_cts.cc
+++ b/test/cts/heapprofd_java_test_cts.cc
@@ -235,16 +235,27 @@
 }
 
 TEST(HeapprofdJavaCtsTest, DebuggableAppOom) {
-  if (IsUserBuild()) return;
-
   std::string app_name = "android.perfetto.cts.app.debuggable";
   const auto& packets = TriggerOomHeapDump(app_name, "*");
   AssertGraphPresent(packets);
 }
 
-TEST(HeapprofdJavaCtsTest, DebuggableAppOomNotSelected) {
-  if (IsUserBuild()) return;
+TEST(HeapprofdJavaCtsTest, ProfileableAppOom) {
+  std::string app_name = "android.perfetto.cts.app.profileable";
+  const auto& packets = TriggerOomHeapDump(app_name, "*");
+  AssertGraphPresent(packets);
+}
 
+TEST(HeapprofdJavaCtsTest, ReleaseAppOom) {
+  std::string app_name = "android.perfetto.cts.app.release";
+  const auto& packets = TriggerOomHeapDump(app_name, "*");
+  if (!IsUserBuild())
+    AssertGraphPresent(packets);
+  else
+    AssertNoProfileContents(packets);
+}
+
+TEST(HeapprofdJavaCtsTest, DebuggableAppOomNotSelected) {
   std::string app_name = "android.perfetto.cts.app.debuggable";
   const auto& packets = TriggerOomHeapDump(app_name, "not.this.app");
   AssertNoProfileContents(packets);
diff --git a/test/cts/test_apps/AndroidManifest_profileable.xml b/test/cts/test_apps/AndroidManifest_profileable.xml
index bac8a66..077fd95 100755
--- a/test/cts/test_apps/AndroidManifest_profileable.xml
+++ b/test/cts/test_apps/AndroidManifest_profileable.xml
@@ -59,6 +59,19 @@
                 <category android:name="android.intent.category.LAUNCHER" />
             </intent-filter>
         </activity-alias>
+        <activity
+          android:name="android.perfetto.cts.app.JavaOomActivity"
+          android:exported="true">
+        </activity>
+        <activity-alias
+          android:name="android.perfetto.cts.app.profileable.JavaOomActivity"
+          android:targetActivity="android.perfetto.cts.app.JavaOomActivity"
+          android:exported="true">
+            <intent-filter>
+                <action android:name="android.intent.action.MAIN" />
+                <category android:name="android.intent.category.LAUNCHER" />
+            </intent-filter>
+        </activity-alias>
     </application>
 </manifest>
 
diff --git a/test/cts/test_apps/AndroidManifest_release.xml b/test/cts/test_apps/AndroidManifest_release.xml
index 5bc0f5b..417a539 100755
--- a/test/cts/test_apps/AndroidManifest_release.xml
+++ b/test/cts/test_apps/AndroidManifest_release.xml
@@ -58,5 +58,18 @@
                 <category android:name="android.intent.category.LAUNCHER" />
             </intent-filter>
         </activity-alias>
+        <activity
+          android:name="android.perfetto.cts.app.JavaOomActivity"
+          android:exported="true">
+        </activity>
+        <activity-alias
+          android:name="android.perfetto.cts.app.release.JavaOomActivity"
+          android:targetActivity="android.perfetto.cts.app.JavaOomActivity"
+          android:exported="true">
+            <intent-filter>
+                <action android:name="android.intent.action.MAIN" />
+                <category android:name="android.intent.category.LAUNCHER" />
+            </intent-filter>
+        </activity-alias>
     </application>
 </manifest>
diff --git a/test/trace_processor/diff_tests/include_index.py b/test/trace_processor/diff_tests/include_index.py
index 4367919..06500d8 100644
--- a/test/trace_processor/diff_tests/include_index.py
+++ b/test/trace_processor/diff_tests/include_index.py
@@ -51,6 +51,7 @@
 from diff_tests.memory.tests_metrics import MemoryMetrics
 from diff_tests.network.tests import Network
 from diff_tests.parsing.tests import Parsing
+from diff_tests.parsing.tests_debug_annotation import ParsingDebugAnnotation
 from diff_tests.parsing.tests_memory_counters import ParsingMemoryCounters
 from diff_tests.parsing.tests_rss_stats import ParsingRssStats
 from diff_tests.performance.tests import Performance
@@ -119,6 +120,8 @@
       *MemoryMetrics(index_path, 'memory', 'MemoryMetrics').fetch(),
       *Network(index_path, 'network', 'Network').fetch(),
       *Parsing(index_path, 'parsing', 'Parsing').fetch(),
+      *ParsingDebugAnnotation(index_path, 'parsing',
+                              'ParsingDebugAnnotation').fetch(),
       *ParsingRssStats(index_path, 'parsing', 'ParsingRssStats').fetch(),
       *ParsingMemoryCounters(index_path, 'parsing',
                              'ParsingMemoryCounters').fetch(),
diff --git a/test/trace_processor/diff_tests/parsing/tests_debug_annotation.py b/test/trace_processor/diff_tests/parsing/tests_debug_annotation.py
new file mode 100644
index 0000000..fab40cb
--- /dev/null
+++ b/test/trace_processor/diff_tests/parsing/tests_debug_annotation.py
@@ -0,0 +1,69 @@
+#!/usr/bin/env python3
+# Copyright (C) 2023 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License a
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from python.generators.diff_tests.testing import Path, DataPath, Metric
+from python.generators.diff_tests.testing import Csv, Json, TextProto
+from python.generators.diff_tests.testing import DiffTestBlueprint
+from python.generators.diff_tests.testing import TestSuite
+
+
+class ParsingDebugAnnotation(TestSuite):
+  # Verify parsing of interned_string_value in DebugAnnotation proto.
+  def test_interned_string_value(self):
+    return DiffTestBlueprint(
+        trace=TextProto(r"""
+        packet {
+          trusted_packet_sequence_id: 1
+          incremental_state_cleared: true
+          track_descriptor {
+            uuid: 1
+            thread {
+              pid: 5
+              tid: 1
+            }
+          }
+        }
+        packet {
+          trusted_packet_sequence_id: 1
+          timestamp: 2000
+          interned_data {
+            debug_annotation_names {
+                iid: 1
+                name: "key"
+            }
+            debug_annotation_string_values {
+                iid: 1
+                str: "value"
+            }
+          }
+          track_event {
+            track_uuid: 1
+            type: TYPE_INSTANT
+            name: "slice1"
+            debug_annotations {
+              name_iid: 1
+              string_value_iid: 1
+            }
+          }
+        }
+        """),
+        query="""
+          SELECT EXTRACT_ARG(s.arg_set_id, 'debug.key') AS value
+          FROM slice s;
+        """,
+        out=Csv("""
+        "value"
+        "value"
+        """))
diff --git a/test/trace_processor/diff_tests/startup/android_startup.out b/test/trace_processor/diff_tests/startup/android_startup.out
index 752db7b..f804a34 100644
--- a/test/trace_processor/diff_tests/startup/android_startup.out
+++ b/test/trace_processor/diff_tests/startup/android_startup.out
@@ -8,7 +8,7 @@
       dur_ns: 108
       main_thread_by_task_state {
         running_dur_ns: 10
-        runnable_dur_ns: 90
+        runnable_dur_ns: 80
         uninterruptible_sleep_dur_ns: 0
         interruptible_sleep_dur_ns: 10
         uninterruptible_io_sleep_dur_ns: 0
diff --git a/test/trace_processor/diff_tests/startup/android_startup_breakdown.out b/test/trace_processor/diff_tests/startup/android_startup_breakdown.out
index 8b35bb5..132aad8 100644
--- a/test/trace_processor/diff_tests/startup/android_startup_breakdown.out
+++ b/test/trace_processor/diff_tests/startup/android_startup_breakdown.out
@@ -8,7 +8,7 @@
       dur_ns: 108000000000
       main_thread_by_task_state {
         running_dur_ns: 25000000000
-        runnable_dur_ns: 30000000000
+        runnable_dur_ns: 5000000000
         uninterruptible_sleep_dur_ns: 0
         interruptible_sleep_dur_ns: 0
         uninterruptible_io_sleep_dur_ns: 0
diff --git a/test/trace_processor/diff_tests/startup/android_startup_breakdown_slow.out b/test/trace_processor/diff_tests/startup/android_startup_breakdown_slow.out
index 3ed8ce3..2974f88 100644
--- a/test/trace_processor/diff_tests/startup/android_startup_breakdown_slow.out
+++ b/test/trace_processor/diff_tests/startup/android_startup_breakdown_slow.out
@@ -8,7 +8,7 @@
       dur_ns: 108000000000
       main_thread_by_task_state {
         running_dur_ns: 25000000000
-        runnable_dur_ns: 30000000000
+        runnable_dur_ns: 5000000000
         uninterruptible_sleep_dur_ns: 0
         interruptible_sleep_dur_ns: 0
         uninterruptible_io_sleep_dur_ns: 0
diff --git a/test/trace_processor/diff_tests/startup/android_startup_slow.out b/test/trace_processor/diff_tests/startup/android_startup_slow.out
index 333883a..22f2d90 100644
--- a/test/trace_processor/diff_tests/startup/android_startup_slow.out
+++ b/test/trace_processor/diff_tests/startup/android_startup_slow.out
@@ -8,7 +8,7 @@
       dur_ns: 108000000000
       main_thread_by_task_state {
         running_dur_ns: 10000000000
-        runnable_dur_ns: 90000000000
+        runnable_dur_ns: 80000000000
         uninterruptible_sleep_dur_ns: 5000000000
         interruptible_sleep_dur_ns: 5000000000
         uninterruptible_io_sleep_dur_ns: 5000000000
diff --git a/ui/release/channels.json b/ui/release/channels.json
index 6d4559f..8ce6203 100644
--- a/ui/release/channels.json
+++ b/ui/release/channels.json
@@ -6,7 +6,7 @@
     },
     {
       "name": "canary",
-      "rev": "14a08703e3e391b713cf627d24a1ab9b30be99c2"
+      "rev": "8ba16d3e77b34ef1b2892ba2a5724da4af44c5e1"
     },
     {
       "name": "autopush",
diff --git a/ui/src/assets/details.scss b/ui/src/assets/details.scss
index 3faaadd..7e98a1c 100644
--- a/ui/src/assets/details.scss
+++ b/ui/src/assets/details.scss
@@ -430,16 +430,13 @@
     .tag-container {
       height: auto;
       min-height: 34px;
-      border: 2px solid #737679;
-      padding: 8px;
-      margin: 8px;
+      padding: 2px;
+      margin: 2px;
       cursor: text;
       border-radius: 3px;
       display: flex;
       align-items: center;
 
-      box-shadow: 0 2px 6px rgba(25, 25, 25, 0.2);
-
       .chips .chip {
         display: inline-block;
         width: auto;