Merge "Add support of extra cflags for msvc build" into main
diff --git a/Android.bp b/Android.bp
index 3481171..bd2375b 100644
--- a/Android.bp
+++ b/Android.bp
@@ -1278,6 +1278,9 @@
     defaults: [
         "perfetto_defaults",
     ],
+    required: [
+        "perfetto_persistent_cfg.pbtxt",
+    ],
 }
 
 // GN: //src/base:perfetto_base_default_platform
@@ -2378,6 +2381,7 @@
         ":perfetto_src_trace_processor_storage_storage",
         ":perfetto_src_trace_processor_tables_tables",
         ":perfetto_src_trace_processor_types_types",
+        ":perfetto_src_trace_processor_util_build_id",
         ":perfetto_src_trace_processor_util_bump_allocator",
         ":perfetto_src_trace_processor_util_descriptors",
         ":perfetto_src_trace_processor_util_glob",
@@ -2391,7 +2395,6 @@
         ":perfetto_src_trace_processor_util_protozero_to_text",
         ":perfetto_src_trace_processor_util_regex",
         ":perfetto_src_trace_processor_util_sql_argument",
-        ":perfetto_src_trace_processor_util_stack_traces_util",
         ":perfetto_src_trace_processor_util_stdlib",
         ":perfetto_src_trace_processor_util_util",
         ":perfetto_src_trace_processor_util_zip_reader",
@@ -5139,6 +5142,7 @@
         "protos/perfetto/metrics/android/android_boot.proto",
         "protos/perfetto/metrics/android/android_boot_unagg.proto",
         "protos/perfetto/metrics/android/android_frame_timeline_metric.proto",
+        "protos/perfetto/metrics/android/android_garbage_collection_unagg_metric.proto",
         "protos/perfetto/metrics/android/android_sysui_notifications_blocking_calls_metric.proto",
         "protos/perfetto/metrics/android/android_trusty_workqueues.proto",
         "protos/perfetto/metrics/android/anr_metric.proto",
@@ -5226,6 +5230,7 @@
         "protos/perfetto/metrics/android/android_boot.proto",
         "protos/perfetto/metrics/android/android_boot_unagg.proto",
         "protos/perfetto/metrics/android/android_frame_timeline_metric.proto",
+        "protos/perfetto/metrics/android/android_garbage_collection_unagg_metric.proto",
         "protos/perfetto/metrics/android/android_sysui_notifications_blocking_calls_metric.proto",
         "protos/perfetto/metrics/android/android_trusty_workqueues.proto",
         "protos/perfetto/metrics/android/anr_metric.proto",
@@ -5296,6 +5301,7 @@
         "protos/perfetto/metrics/android/android_boot.proto",
         "protos/perfetto/metrics/android/android_boot_unagg.proto",
         "protos/perfetto/metrics/android/android_frame_timeline_metric.proto",
+        "protos/perfetto/metrics/android/android_garbage_collection_unagg_metric.proto",
         "protos/perfetto/metrics/android/android_sysui_notifications_blocking_calls_metric.proto",
         "protos/perfetto/metrics/android/android_trusty_workqueues.proto",
         "protos/perfetto/metrics/android/anr_metric.proto",
@@ -10943,7 +10949,6 @@
     name: "perfetto_src_trace_processor_containers_containers",
     srcs: [
         "src/trace_processor/containers/bit_vector.cc",
-        "src/trace_processor/containers/bit_vector_iterators.cc",
         "src/trace_processor/containers/row_map.cc",
         "src/trace_processor/containers/string_pool.cc",
     ],
@@ -11107,14 +11112,18 @@
         "src/trace_processor/importers/common/event_tracker.cc",
         "src/trace_processor/importers/common/flow_tracker.cc",
         "src/trace_processor/importers/common/global_args_tracker.cc",
+        "src/trace_processor/importers/common/mapping_tracker.cc",
         "src/trace_processor/importers/common/metadata_tracker.cc",
         "src/trace_processor/importers/common/process_tracker.cc",
+        "src/trace_processor/importers/common/sched_event_tracker.cc",
         "src/trace_processor/importers/common/slice_tracker.cc",
         "src/trace_processor/importers/common/slice_translation_table.cc",
         "src/trace_processor/importers/common/stack_profile_tracker.cc",
         "src/trace_processor/importers/common/system_info_tracker.cc",
+        "src/trace_processor/importers/common/thread_state_tracker.cc",
         "src/trace_processor/importers/common/trace_parser.cc",
         "src/trace_processor/importers/common/track_tracker.cc",
+        "src/trace_processor/importers/common/virtual_memory_mapping.cc",
     ],
 }
 
@@ -11143,6 +11152,7 @@
         "src/trace_processor/importers/common/process_tracker_unittest.cc",
         "src/trace_processor/importers/common/slice_tracker_unittest.cc",
         "src/trace_processor/importers/common/slice_translation_table_unittest.cc",
+        "src/trace_processor/importers/common/thread_state_tracker_unittest.cc",
     ],
 }
 
@@ -11171,14 +11181,13 @@
         "src/trace_processor/importers/ftrace/drm_tracker.cc",
         "src/trace_processor/importers/ftrace/ftrace_module_impl.cc",
         "src/trace_processor/importers/ftrace/ftrace_parser.cc",
+        "src/trace_processor/importers/ftrace/ftrace_sched_event_tracker.cc",
         "src/trace_processor/importers/ftrace/ftrace_tokenizer.cc",
         "src/trace_processor/importers/ftrace/gpu_work_period_tracker.cc",
         "src/trace_processor/importers/ftrace/iostat_tracker.cc",
         "src/trace_processor/importers/ftrace/mali_gpu_event_tracker.cc",
         "src/trace_processor/importers/ftrace/pkvm_hyp_cpu_tracker.cc",
         "src/trace_processor/importers/ftrace/rss_stat_tracker.cc",
-        "src/trace_processor/importers/ftrace/sched_event_tracker.cc",
-        "src/trace_processor/importers/ftrace/thread_state_tracker.cc",
         "src/trace_processor/importers/ftrace/v4l2_tracker.cc",
         "src/trace_processor/importers/ftrace/virtio_gpu_tracker.cc",
         "src/trace_processor/importers/ftrace/virtio_video_tracker.cc",
@@ -11198,8 +11207,7 @@
     name: "perfetto_src_trace_processor_importers_ftrace_unittests",
     srcs: [
         "src/trace_processor/importers/ftrace/binder_tracker_unittest.cc",
-        "src/trace_processor/importers/ftrace/sched_event_tracker_unittest.cc",
-        "src/trace_processor/importers/ftrace/thread_state_tracker_unittest.cc",
+        "src/trace_processor/importers/ftrace/ftrace_sched_event_tracker_unittest.cc",
     ],
 }
 
@@ -11649,6 +11657,7 @@
         "src/trace_processor/metrics/sql/android/android_dvfs.sql",
         "src/trace_processor/metrics/sql/android/android_fastrpc.sql",
         "src/trace_processor/metrics/sql/android/android_frame_timeline_metric.sql",
+        "src/trace_processor/metrics/sql/android/android_garbage_collection_unagg.sql",
         "src/trace_processor/metrics/sql/android/android_gpu.sql",
         "src/trace_processor/metrics/sql/android/android_hwcomposer.sql",
         "src/trace_processor/metrics/sql/android/android_hwui_metric.sql",
@@ -12055,6 +12064,11 @@
         "src/trace_processor/perfetto_sql/stdlib/sched/thread_executing_span.sql",
         "src/trace_processor/perfetto_sql/stdlib/sched/thread_level_parallelism.sql",
         "src/trace_processor/perfetto_sql/stdlib/sched/thread_state_flattened.sql",
+        "src/trace_processor/perfetto_sql/stdlib/sched/utilization/general.sql",
+        "src/trace_processor/perfetto_sql/stdlib/sched/utilization/process.sql",
+        "src/trace_processor/perfetto_sql/stdlib/sched/utilization/system.sql",
+        "src/trace_processor/perfetto_sql/stdlib/sched/utilization/thread.sql",
+        "src/trace_processor/perfetto_sql/stdlib/slices/cpu_time.sql",
         "src/trace_processor/perfetto_sql/stdlib/slices/flat_slices.sql",
         "src/trace_processor/perfetto_sql/stdlib/slices/slices.sql",
         "src/trace_processor/perfetto_sql/stdlib/slices/with_context.sql",
@@ -12287,6 +12301,7 @@
     srcs: [
         "src/trace_processor/forwarding_trace_parser_unittest.cc",
         "src/trace_processor/ref_counted_unittest.cc",
+        "src/trace_processor/trace_blob_unittest.cc",
     ],
 }
 
@@ -12313,6 +12328,14 @@
     name: "perfetto_src_trace_processor_unittests",
 }
 
+// GN: //src/trace_processor/util:build_id
+filegroup {
+    name: "perfetto_src_trace_processor_util_build_id",
+    srcs: [
+        "src/trace_processor/util/build_id.cc",
+    ],
+}
+
 // GN: //src/trace_processor/util:bump_allocator
 filegroup {
     name: "perfetto_src_trace_processor_util_bump_allocator",
@@ -12413,14 +12436,6 @@
     ],
 }
 
-// GN: //src/trace_processor/util:stack_traces_util
-filegroup {
-    name: "perfetto_src_trace_processor_util_stack_traces_util",
-    srcs: [
-        "src/trace_processor/util/stack_traces_util.cc",
-    ],
-}
-
 // GN: //src/trace_processor/util:stdlib
 filegroup {
     name: "perfetto_src_trace_processor_util_stdlib",
@@ -13948,6 +13963,7 @@
         ":perfetto_src_trace_processor_types_types",
         ":perfetto_src_trace_processor_types_unittests",
         ":perfetto_src_trace_processor_unittests",
+        ":perfetto_src_trace_processor_util_build_id",
         ":perfetto_src_trace_processor_util_bump_allocator",
         ":perfetto_src_trace_processor_util_descriptors",
         ":perfetto_src_trace_processor_util_glob",
@@ -13961,7 +13977,6 @@
         ":perfetto_src_trace_processor_util_protozero_to_text",
         ":perfetto_src_trace_processor_util_regex",
         ":perfetto_src_trace_processor_util_sql_argument",
-        ":perfetto_src_trace_processor_util_stack_traces_util",
         ":perfetto_src_trace_processor_util_stdlib",
         ":perfetto_src_trace_processor_util_unittests",
         ":perfetto_src_trace_processor_util_util",
@@ -14651,6 +14666,7 @@
         ":perfetto_src_trace_processor_storage_storage",
         ":perfetto_src_trace_processor_tables_tables",
         ":perfetto_src_trace_processor_types_types",
+        ":perfetto_src_trace_processor_util_build_id",
         ":perfetto_src_trace_processor_util_bump_allocator",
         ":perfetto_src_trace_processor_util_descriptors",
         ":perfetto_src_trace_processor_util_glob",
@@ -14664,7 +14680,6 @@
         ":perfetto_src_trace_processor_util_protozero_to_text",
         ":perfetto_src_trace_processor_util_regex",
         ":perfetto_src_trace_processor_util_sql_argument",
-        ":perfetto_src_trace_processor_util_stack_traces_util",
         ":perfetto_src_trace_processor_util_stdlib",
         ":perfetto_src_trace_processor_util_util",
         ":perfetto_src_trace_processor_util_zip_reader",
@@ -14885,6 +14900,7 @@
         ":perfetto_src_trace_processor_storage_storage",
         ":perfetto_src_trace_processor_tables_tables",
         ":perfetto_src_trace_processor_types_types",
+        ":perfetto_src_trace_processor_util_build_id",
         ":perfetto_src_trace_processor_util_bump_allocator",
         ":perfetto_src_trace_processor_util_descriptors",
         ":perfetto_src_trace_processor_util_glob",
@@ -14898,7 +14914,6 @@
         ":perfetto_src_trace_processor_util_protozero_to_text",
         ":perfetto_src_trace_processor_util_regex",
         ":perfetto_src_trace_processor_util_sql_argument",
-        ":perfetto_src_trace_processor_util_stack_traces_util",
         ":perfetto_src_trace_processor_util_stdlib",
         ":perfetto_src_trace_processor_util_util",
         ":perfetto_src_trace_processor_util_zip_reader",
@@ -15550,16 +15565,24 @@
         "soong_zip",
     ],
     cmd: "mkdir -p $(genDir)/$(in) " +
-      "&& $(location aprotoc) " +
+        "&& $(location aprotoc) " +
         "--plugin=$(location protoc-gen-javastream) " +
+        "--javastream_opt=include_filter:perfetto.protos.TracePacket,perfetto.protos.ShellTransition,perfetto.protos.ShellHandlerMappings,perfetto.protos.ProtoLogMessage,perfetto.protos.ProtoLogViewerConfig,perfetto.protos.ShellHandlerMapping,perfetto.protos.ShellHandlerMappings,perfetto.protos.ProtoLogGroup,perfetto.protos.ProtoLogConfig,perfetto.protos.DataSourceConfig,perfetto.protos.InternedString,perfetto.protos.InternedData,perfetto.protos.ProtoLogLevel,perfetto.protos.TestEvent,perfetto.protos.TestEvent.TestPayload,perfetto.protos.TestConfig,perfetto.protos.TestConfig.DummyFields " +
         "--javastream_out=$(genDir)/$(in) " +
         "-Iexternal/protobuf/src " +
         "-Iexternal/perfetto " +
         "-I . $(in) " +
-      "&& $(location soong_zip) " +
+        "&& $(location soong_zip) " +
         "-jar -o $(out) -C $(genDir)/$(in) -D $(genDir)/$(in)",
     data: [
         ":libprotobuf-internal-protos",
     ],
     output_extension: "srcjar",
 }
+
+prebuilt_etc {
+    name: "perfetto_persistent_cfg.pbtxt",
+    filename: "persistent_cfg.pbtxt",
+    sub_dir: "perfetto",
+    src: "persistent_cfg.pbtxt",
+}
diff --git a/Android.bp.extras b/Android.bp.extras
index 9c7b0b0..2ce8e95 100644
--- a/Android.bp.extras
+++ b/Android.bp.extras
@@ -184,16 +184,24 @@
         "soong_zip",
     ],
     cmd: "mkdir -p $(genDir)/$(in) " +
-      "&& $(location aprotoc) " +
+        "&& $(location aprotoc) " +
         "--plugin=$(location protoc-gen-javastream) " +
+        "--javastream_opt=include_filter:perfetto.protos.TracePacket,perfetto.protos.ShellTransition,perfetto.protos.ShellHandlerMappings,perfetto.protos.ProtoLogMessage,perfetto.protos.ProtoLogViewerConfig,perfetto.protos.ShellHandlerMapping,perfetto.protos.ShellHandlerMappings,perfetto.protos.ProtoLogGroup,perfetto.protos.ProtoLogConfig,perfetto.protos.DataSourceConfig,perfetto.protos.InternedString,perfetto.protos.InternedData,perfetto.protos.ProtoLogLevel,perfetto.protos.TestEvent,perfetto.protos.TestEvent.TestPayload,perfetto.protos.TestConfig,perfetto.protos.TestConfig.DummyFields " +
         "--javastream_out=$(genDir)/$(in) " +
         "-Iexternal/protobuf/src " +
         "-Iexternal/perfetto " +
         "-I . $(in) " +
-      "&& $(location soong_zip) " +
+        "&& $(location soong_zip) " +
         "-jar -o $(out) -C $(genDir)/$(in) -D $(genDir)/$(in)",
     data: [
         ":libprotobuf-internal-protos",
     ],
     output_extension: "srcjar",
 }
+
+prebuilt_etc {
+    name: "perfetto_persistent_cfg.pbtxt",
+    filename: "persistent_cfg.pbtxt",
+    sub_dir: "perfetto",
+    src: "persistent_cfg.pbtxt",
+}
diff --git a/BUILD b/BUILD
index 6a44d74..592c1ec 100644
--- a/BUILD
+++ b/BUILD
@@ -266,6 +266,7 @@
         ":src_trace_processor_tables_tables",
         ":src_trace_processor_tables_tables_python",
         ":src_trace_processor_types_types",
+        ":src_trace_processor_util_build_id",
         ":src_trace_processor_util_bump_allocator",
         ":src_trace_processor_util_descriptors",
         ":src_trace_processor_util_glob",
@@ -279,7 +280,6 @@
         ":src_trace_processor_util_protozero_to_text",
         ":src_trace_processor_util_regex",
         ":src_trace_processor_util_sql_argument",
-        ":src_trace_processor_util_stack_traces_util",
         ":src_trace_processor_util_stdlib",
         ":src_trace_processor_util_util",
         ":src_trace_processor_util_zip_reader",
@@ -1354,7 +1354,6 @@
     name = "src_trace_processor_containers_containers",
     srcs = [
         "src/trace_processor/containers/bit_vector.cc",
-        "src/trace_processor/containers/bit_vector_iterators.cc",
         "src/trace_processor/containers/row_map.cc",
         "src/trace_processor/containers/string_pool.cc",
     ],
@@ -1365,7 +1364,6 @@
         ":include_perfetto_public_base",
         ":include_perfetto_public_protozero",
         "src/trace_processor/containers/bit_vector.h",
-        "src/trace_processor/containers/bit_vector_iterators.h",
         "src/trace_processor/containers/null_term_string_view.h",
         "src/trace_processor/containers/row_map.h",
         "src/trace_processor/containers/row_map_algorithms.h",
@@ -1467,6 +1465,7 @@
         "src/trace_processor/importers/common/clock_converter.h",
         "src/trace_processor/importers/common/clock_tracker.cc",
         "src/trace_processor/importers/common/clock_tracker.h",
+        "src/trace_processor/importers/common/create_mapping_params.h",
         "src/trace_processor/importers/common/deobfuscation_mapping_table.cc",
         "src/trace_processor/importers/common/deobfuscation_mapping_table.h",
         "src/trace_processor/importers/common/event_tracker.cc",
@@ -1475,10 +1474,15 @@
         "src/trace_processor/importers/common/flow_tracker.h",
         "src/trace_processor/importers/common/global_args_tracker.cc",
         "src/trace_processor/importers/common/global_args_tracker.h",
+        "src/trace_processor/importers/common/mapping_tracker.cc",
+        "src/trace_processor/importers/common/mapping_tracker.h",
         "src/trace_processor/importers/common/metadata_tracker.cc",
         "src/trace_processor/importers/common/metadata_tracker.h",
         "src/trace_processor/importers/common/process_tracker.cc",
         "src/trace_processor/importers/common/process_tracker.h",
+        "src/trace_processor/importers/common/sched_event_state.h",
+        "src/trace_processor/importers/common/sched_event_tracker.cc",
+        "src/trace_processor/importers/common/sched_event_tracker.h",
         "src/trace_processor/importers/common/slice_tracker.cc",
         "src/trace_processor/importers/common/slice_tracker.h",
         "src/trace_processor/importers/common/slice_translation_table.cc",
@@ -1487,9 +1491,13 @@
         "src/trace_processor/importers/common/stack_profile_tracker.h",
         "src/trace_processor/importers/common/system_info_tracker.cc",
         "src/trace_processor/importers/common/system_info_tracker.h",
+        "src/trace_processor/importers/common/thread_state_tracker.cc",
+        "src/trace_processor/importers/common/thread_state_tracker.h",
         "src/trace_processor/importers/common/trace_parser.cc",
         "src/trace_processor/importers/common/track_tracker.cc",
         "src/trace_processor/importers/common/track_tracker.h",
+        "src/trace_processor/importers/common/virtual_memory_mapping.cc",
+        "src/trace_processor/importers/common/virtual_memory_mapping.h",
     ],
 )
 
@@ -1541,6 +1549,8 @@
         "src/trace_processor/importers/ftrace/ftrace_module_impl.h",
         "src/trace_processor/importers/ftrace/ftrace_parser.cc",
         "src/trace_processor/importers/ftrace/ftrace_parser.h",
+        "src/trace_processor/importers/ftrace/ftrace_sched_event_tracker.cc",
+        "src/trace_processor/importers/ftrace/ftrace_sched_event_tracker.h",
         "src/trace_processor/importers/ftrace/ftrace_tokenizer.cc",
         "src/trace_processor/importers/ftrace/ftrace_tokenizer.h",
         "src/trace_processor/importers/ftrace/gpu_work_period_tracker.cc",
@@ -1553,10 +1563,6 @@
         "src/trace_processor/importers/ftrace/pkvm_hyp_cpu_tracker.h",
         "src/trace_processor/importers/ftrace/rss_stat_tracker.cc",
         "src/trace_processor/importers/ftrace/rss_stat_tracker.h",
-        "src/trace_processor/importers/ftrace/sched_event_tracker.cc",
-        "src/trace_processor/importers/ftrace/sched_event_tracker.h",
-        "src/trace_processor/importers/ftrace/thread_state_tracker.cc",
-        "src/trace_processor/importers/ftrace/thread_state_tracker.h",
         "src/trace_processor/importers/ftrace/v4l2_tracker.cc",
         "src/trace_processor/importers/ftrace/v4l2_tracker.h",
         "src/trace_processor/importers/ftrace/virtio_gpu_tracker.cc",
@@ -1949,6 +1955,7 @@
         "src/trace_processor/metrics/sql/android/android_dvfs.sql",
         "src/trace_processor/metrics/sql/android/android_fastrpc.sql",
         "src/trace_processor/metrics/sql/android/android_frame_timeline_metric.sql",
+        "src/trace_processor/metrics/sql/android/android_garbage_collection_unagg.sql",
         "src/trace_processor/metrics/sql/android/android_gpu.sql",
         "src/trace_processor/metrics/sql/android/android_hwcomposer.sql",
         "src/trace_processor/metrics/sql/android/android_hwui_metric.sql",
@@ -2483,6 +2490,17 @@
     ],
 )
 
+# GN target: //src/trace_processor/perfetto_sql/stdlib/sched/utilization:utilization
+perfetto_filegroup(
+    name = "src_trace_processor_perfetto_sql_stdlib_sched_utilization_utilization",
+    srcs = [
+        "src/trace_processor/perfetto_sql/stdlib/sched/utilization/general.sql",
+        "src/trace_processor/perfetto_sql/stdlib/sched/utilization/process.sql",
+        "src/trace_processor/perfetto_sql/stdlib/sched/utilization/system.sql",
+        "src/trace_processor/perfetto_sql/stdlib/sched/utilization/thread.sql",
+    ],
+)
+
 # GN target: //src/trace_processor/perfetto_sql/stdlib/sched:sched
 perfetto_filegroup(
     name = "src_trace_processor_perfetto_sql_stdlib_sched_sched",
@@ -2498,6 +2516,7 @@
 perfetto_filegroup(
     name = "src_trace_processor_perfetto_sql_stdlib_slices_slices",
     srcs = [
+        "src/trace_processor/perfetto_sql/stdlib/slices/cpu_time.sql",
         "src/trace_processor/perfetto_sql/stdlib/slices/flat_slices.sql",
         "src/trace_processor/perfetto_sql/stdlib/slices/slices.sql",
         "src/trace_processor/perfetto_sql/stdlib/slices/with_context.sql",
@@ -2529,6 +2548,7 @@
         ":src_trace_processor_perfetto_sql_stdlib_pkvm_pkvm",
         ":src_trace_processor_perfetto_sql_stdlib_prelude_prelude",
         ":src_trace_processor_perfetto_sql_stdlib_sched_sched",
+        ":src_trace_processor_perfetto_sql_stdlib_sched_utilization_utilization",
         ":src_trace_processor_perfetto_sql_stdlib_slices_slices",
         ":src_trace_processor_perfetto_sql_stdlib_time_time",
     ],
@@ -2682,6 +2702,15 @@
     ],
 )
 
+# GN target: //src/trace_processor/util:build_id
+perfetto_filegroup(
+    name = "src_trace_processor_util_build_id",
+    srcs = [
+        "src/trace_processor/util/build_id.cc",
+        "src/trace_processor/util/build_id.h",
+    ],
+)
+
 # GN target: //src/trace_processor/util:bump_allocator
 perfetto_filegroup(
     name = "src_trace_processor_util_bump_allocator",
@@ -2801,15 +2830,6 @@
     ],
 )
 
-# GN target: //src/trace_processor/util:stack_traces_util
-perfetto_filegroup(
-    name = "src_trace_processor_util_stack_traces_util",
-    srcs = [
-        "src/trace_processor/util/stack_traces_util.cc",
-        "src/trace_processor/util/stack_traces_util.h",
-    ],
-)
-
 # GN target: //src/trace_processor/util:stdlib
 perfetto_filegroup(
     name = "src_trace_processor_util_stdlib",
@@ -4296,6 +4316,7 @@
         "protos/perfetto/metrics/android/android_boot.proto",
         "protos/perfetto/metrics/android/android_boot_unagg.proto",
         "protos/perfetto/metrics/android/android_frame_timeline_metric.proto",
+        "protos/perfetto/metrics/android/android_garbage_collection_unagg_metric.proto",
         "protos/perfetto/metrics/android/android_sysui_notifications_blocking_calls_metric.proto",
         "protos/perfetto/metrics/android/android_trusty_workqueues.proto",
         "protos/perfetto/metrics/android/anr_metric.proto",
@@ -5626,6 +5647,7 @@
         ":src_trace_processor_tables_tables",
         ":src_trace_processor_tables_tables_python",
         ":src_trace_processor_types_types",
+        ":src_trace_processor_util_build_id",
         ":src_trace_processor_util_bump_allocator",
         ":src_trace_processor_util_descriptors",
         ":src_trace_processor_util_glob",
@@ -5639,7 +5661,6 @@
         ":src_trace_processor_util_protozero_to_text",
         ":src_trace_processor_util_regex",
         ":src_trace_processor_util_sql_argument",
-        ":src_trace_processor_util_stack_traces_util",
         ":src_trace_processor_util_stdlib",
         ":src_trace_processor_util_util",
         ":src_trace_processor_util_zip_reader",
@@ -5797,6 +5818,7 @@
         ":src_trace_processor_tables_tables",
         ":src_trace_processor_tables_tables_python",
         ":src_trace_processor_types_types",
+        ":src_trace_processor_util_build_id",
         ":src_trace_processor_util_bump_allocator",
         ":src_trace_processor_util_descriptors",
         ":src_trace_processor_util_glob",
@@ -5810,7 +5832,6 @@
         ":src_trace_processor_util_protozero_to_text",
         ":src_trace_processor_util_regex",
         ":src_trace_processor_util_sql_argument",
-        ":src_trace_processor_util_stack_traces_util",
         ":src_trace_processor_util_stdlib",
         ":src_trace_processor_util_util",
         ":src_trace_processor_util_zip_reader",
@@ -5888,7 +5909,7 @@
         ":src_profiling_deobfuscator",
         ":src_profiling_symbolizer_symbolize_database",
         ":src_profiling_symbolizer_symbolizer",
-        ":src_trace_processor_util_stack_traces_util",
+        ":src_trace_processor_util_build_id",
         ":src_traceconv_pprofbuilder",
         ":src_traceconv_utils",
     ],
@@ -6020,6 +6041,7 @@
         ":src_trace_processor_tables_tables",
         ":src_trace_processor_tables_tables_python",
         ":src_trace_processor_types_types",
+        ":src_trace_processor_util_build_id",
         ":src_trace_processor_util_bump_allocator",
         ":src_trace_processor_util_descriptors",
         ":src_trace_processor_util_glob",
@@ -6033,7 +6055,6 @@
         ":src_trace_processor_util_protozero_to_text",
         ":src_trace_processor_util_regex",
         ":src_trace_processor_util_sql_argument",
-        ":src_trace_processor_util_stack_traces_util",
         ":src_trace_processor_util_stdlib",
         ":src_trace_processor_util_util",
         ":src_trace_processor_util_zip_reader",
diff --git a/BUILD.gn b/BUILD.gn
index bfadeca..1e61d67 100644
--- a/BUILD.gn
+++ b/BUILD.gn
@@ -48,6 +48,10 @@
   all_targets += [ "src/trace_processor:trace_processor_shell" ]
 }
 
+if (enable_perfetto_trace_processor) {
+  all_targets += [ "src/trace_redaction:trace_redactor" ]
+}
+
 if (enable_perfetto_traceconv) {
   all_targets += [ "src/traceconv" ]
   if (is_cross_compiling) {
diff --git a/CHANGELOG b/CHANGELOG
index d61d0aa..d586980 100644
--- a/CHANGELOG
+++ b/CHANGELOG
@@ -9,6 +9,59 @@
     *
 
 
+v43.1 - 2024-03-05:
+  Tracing service and probes:
+    * Cherry-pick of https://r.android.com/2988674, which fixes the android x86
+      standalone build.
+
+
+v43.0 - 2024-03-05:
+  Tracing service and probes:
+    * Buffers marked as `transfer_on_clone` will be flushed independently when
+      cloning.
+    * ftrace: added drain_buffer_percent option to read kernel ring buffer
+      based on occupancy in addition to existing periodic reads. Requires Linux
+      kernel v6.1+.
+    * ftrace: changed default kernel per-cpu ring buffer sizes if a config
+      doesn't request an explicit size via buffer_size_kb. Added
+      buffer_size_lower_bound option that lets the service choose a bigger ring
+      buffer size than requested.
+    * "linux.process_stats" data source: added options to record: process age,
+      time spent running in userspace, and time spent running in kernel mode,
+      using /proc/pid/stat. See "record_process_age" and
+      "record_process_runtime" options.
+  Trace Processor:
+    * Added `decompress_packets` mode to the traceconv tool.
+    * Support memory mapped file access on Windows.
+    * Deprecated `common` standard library module, it will be removed by v45.
+      Some of the functionality have been migrated to other parts of standard
+      library.
+  UI:
+    * Added a "defaultPlugins" list to allow control over which plugins are
+      enabled by default.
+    * Added a feature to allow enabling/disabling plugins permanently from the
+      plugins page.
+    * Added plugin load times to the plugin page.
+    * Added scrolling to the pinned track area.
+    * Added commands for expanding and closing all tracks.
+    * Added {expand,collapse}GroupsByPredicate to plugin API.
+    * Added SimpleSliceTrack & SimpleCounterTrack which have the same
+      functionality of debug tracks but may be added on trace load from within
+      plugins.
+    * Added naive track crash containment, which means tracks that crash should
+      not crash the entire UI.
+    * Don't render "idle" kernel thread slices in thread state tracks.
+    * Fixed crash when using autofill in text inputs in chrome.
+    * Fixed bug where "incomplete" slices were sometimes not rendered.
+    * Fixed crash when calls to CacheStorage fail via promise rejection.
+    * Fixed bug causing slices to occasionally disappear in tracks v2.
+    * Fixes crash in tracksV2 when visible window is negative.
+    * Fix bug when toggling the sidebar without a trace loaded would either
+      crash the UI or simply not work.
+    * Various TabsV2 QoL improvements.
+    * Various AndroidLongBatteryTracing plugin improvements.
+
+
 v42.0 - 2024-02-02:
   Tracing service and probes:
     * Linux/Android: kernel scheduling data (sched_switch and sched_waking
diff --git a/bazel/standalone/perfetto_cfg.bzl b/bazel/standalone/perfetto_cfg.bzl
index 3946db0..cbabb29 100644
--- a/bazel/standalone/perfetto_cfg.bzl
+++ b/bazel/standalone/perfetto_cfg.bzl
@@ -60,6 +60,7 @@
         protobuf_py = [],
         pandas_py = [],
         tp_vendor_py = [],
+        tp_resolvers_py = [],
 
         # There are multiple configurations for the function name demangling
         # logic in trace processor:
diff --git a/docs/instrumentation/tracing-sdk.md b/docs/instrumentation/tracing-sdk.md
index e0fba60..49320e0 100644
--- a/docs/instrumentation/tracing-sdk.md
+++ b/docs/instrumentation/tracing-sdk.md
@@ -30,7 +30,7 @@
 To start using the Client API, first check out the latest SDK release:
 
 ```bash
-git clone https://android.googlesource.com/platform/external/perfetto -b v41.0
+git clone https://android.googlesource.com/platform/external/perfetto -b v43.1
 ```
 
 The SDK consists of two files, `sdk/perfetto.h` and `sdk/perfetto.cc`. These are
diff --git a/examples/sdk/README.md b/examples/sdk/README.md
index fa92eb5..8c53d85 100644
--- a/examples/sdk/README.md
+++ b/examples/sdk/README.md
@@ -15,7 +15,7 @@
 First, check out the latest Perfetto release:
 
 ```bash
-git clone https://android.googlesource.com/platform/external/perfetto -b v41.0
+git clone https://android.googlesource.com/platform/external/perfetto -b v43.1
 ```
 
 Then, build using CMake:
diff --git a/gn/BUILD.gn b/gn/BUILD.gn
index 6c1775a..f59c182 100644
--- a/gn/BUILD.gn
+++ b/gn/BUILD.gn
@@ -442,5 +442,7 @@
 }
 perfetto_py_library("tp_vendor_py") {
 }
+perfetto_py_library("tp_resolvers_py") {
+}
 perfetto_py_library("protobuf_py") {
 }
diff --git a/gn/perfetto_unittests.gni b/gn/perfetto_unittests.gni
index 836ac9a..f1d46ba 100644
--- a/gn/perfetto_unittests.gni
+++ b/gn/perfetto_unittests.gni
@@ -86,6 +86,4 @@
   perfetto_unittests_targets += [ "src/traced_relay:unittests" ]
 }
 
-if (!is_win) {
-  perfetto_unittests_targets += [ "src/trace_redaction:unittests" ]
-}
+perfetto_unittests_targets += [ "src/trace_redaction:unittests" ]
diff --git a/gn/standalone/BUILD.gn b/gn/standalone/BUILD.gn
index a7b7a7c..98cb01e 100644
--- a/gn/standalone/BUILD.gn
+++ b/gn/standalone/BUILD.gn
@@ -280,10 +280,7 @@
       "-msse2",
       "-mfpmath=sse",
     ]
-    ldflags += [
-      "-m32",
-      "-lgcc",
-    ]
+    ldflags += [ "-m32" ]
   } else if (current_cpu == "arm64") {
     cflags += [ "-fno-omit-frame-pointer" ]
   } else if (current_cpu == "x64") {
diff --git a/include/perfetto/ext/base/file_utils.h b/include/perfetto/ext/base/file_utils.h
index b3d8978..47c5146 100644
--- a/include/perfetto/ext/base/file_utils.h
+++ b/include/perfetto/ext/base/file_utils.h
@@ -102,7 +102,8 @@
                                 const std::string& group_name,
                                 const std::string& mode_bits);
 
-std::optional<size_t> GetFileSize(const std::string& path);
+// Returns the size of the file located at |path|, or nullopt in case of error.
+std::optional<uint64_t> GetFileSize(const std::string& path);
 
 }  // namespace base
 }  // namespace perfetto
diff --git a/include/perfetto/ext/base/scoped_mmap.h b/include/perfetto/ext/base/scoped_mmap.h
index 1af620b..f14afcb 100644
--- a/include/perfetto/ext/base/scoped_mmap.h
+++ b/include/perfetto/ext/base/scoped_mmap.h
@@ -22,6 +22,15 @@
 #include "perfetto/base/build_config.h"
 #include "perfetto/ext/base/scoped_file.h"
 
+#if PERFETTO_BUILDFLAG(PERFETTO_OS_LINUX) ||   \
+    PERFETTO_BUILDFLAG(PERFETTO_OS_APPLE) ||   \
+    PERFETTO_BUILDFLAG(PERFETTO_OS_ANDROID) || \
+    PERFETTO_BUILDFLAG(PERFETTO_OS_WIN)
+#define PERFETTO_HAS_MMAP() 1
+#else
+#define PERFETTO_HAS_MMAP() 0
+#endif
+
 namespace perfetto::base {
 
 // RAII wrapper that holds ownership of an mmap()d area and of a file. Calls
@@ -52,6 +61,14 @@
   // return false.
   bool reset() noexcept;
 
+#if PERFETTO_BUILDFLAG(PERFETTO_OS_LINUX) ||   \
+    PERFETTO_BUILDFLAG(PERFETTO_OS_ANDROID) || \
+    PERFETTO_BUILDFLAG(PERFETTO_OS_APPLE)
+  // Takes ownership of an mmap()d area that starts at `data`, `size` bytes
+  // long. `data` should not be MAP_FAILED.
+  static ScopedMmap InheritMmappedRange(void* data, size_t size);
+#endif
+
  private:
   ScopedMmap(const ScopedMmap&) = delete;
   ScopedMmap& operator=(const ScopedMmap&) = delete;
diff --git a/include/perfetto/public/abi/data_source_abi.h b/include/perfetto/public/abi/data_source_abi.h
index d20d351..1dece63 100644
--- a/include/perfetto/public/abi/data_source_abi.h
+++ b/include/perfetto/public/abi/data_source_abi.h
@@ -60,6 +60,8 @@
 // PerfettoDsSetCbUserArg(). The return value of this is passed to all other
 // callbacks (for this data source instance) as `inst_ctx` and can be accessed
 // during tracing with PerfettoDsImplGetInstanceLocked().
+//
+// Can be called from any thread.
 typedef void* (*PerfettoDsOnSetupCb)(struct PerfettoDsImpl*,
                                      PerfettoDsInstanceIndex inst_id,
                                      void* ds_config,
@@ -74,6 +76,8 @@
 // Called when tracing starts for a data source instance. `user_arg` is the
 // value passed to PerfettoDsSetCbUserArg(). `inst_ctx` is the return
 // value of PerfettoDsOnSetupCb.
+//
+// Can be called from any thread.
 typedef void (*PerfettoDsOnStartCb)(struct PerfettoDsImpl*,
                                     PerfettoDsInstanceIndex inst_id,
                                     void* user_arg,
@@ -102,6 +106,10 @@
 // PerfettoDsOnSetupCb.`args` can be used to postpone stopping this data source
 // instance. Note that, in general, it's not a good idea to destroy `inst_ctx`
 // here: PerfettoDsOnDestroyCb should be used instead.
+//
+// Can be called from any thread. Blocking this for too long it's not a good
+// idea and can cause deadlocks. Use PerfettoDsOnStopArgsPostpone() to postpone
+// disabling the data source instance.
 typedef void (*PerfettoDsOnStopCb)(struct PerfettoDsImpl*,
                                    PerfettoDsInstanceIndex inst_id,
                                    void* user_arg,
@@ -112,6 +120,8 @@
 // that `inst_ctx` (which is the return value of PerfettoDsOnSetupCb) can
 // potentially be destroyed. `user_arg` is the value passed to
 // PerfettoDsSetCbUserArg().
+//
+// Can be called from any thread.
 typedef void (*PerfettoDsOnDestroyCb)(struct PerfettoDsImpl*,
                                       void* user_arg,
                                       void* inst_ctx);
@@ -139,6 +149,10 @@
 // PerfettoDsSetCbUserArg(). `inst_ctx` is the return value of
 // PerfettoDsOnSetupCb. `args` can be used to postpone stopping this data source
 // instance.
+//
+// Can be called from any thread. Blocking this for too long it's not a good
+// idea and can cause deadlocks. Use PerfettoDsOnFlushArgsPostpone() to postpone
+// disabling the data source instance.
 typedef void (*PerfettoDsOnFlushCb)(struct PerfettoDsImpl*,
                                     PerfettoDsInstanceIndex inst_id,
                                     void* user_arg,
@@ -175,19 +189,23 @@
                                                       PerfettoDsOnFlushCb);
 
 // Callbacks for custom per instance thread local state.
+//
+// Called from inside a trace point. Trace points inside these will be
+// ignored.
 PERFETTO_SDK_EXPORT void PerfettoDsSetOnCreateTls(
     struct PerfettoDsImpl*,
     PerfettoDsOnCreateCustomState);
-
 PERFETTO_SDK_EXPORT void PerfettoDsSetOnDeleteTls(
     struct PerfettoDsImpl*,
     PerfettoDsOnDeleteCustomState);
 
 // Callbacks for custom per instance thread local incremental state.
+//
+// Called from inside a trace point. Trace points inside these will be
+// ignored.
 PERFETTO_SDK_EXPORT void PerfettoDsSetOnCreateIncr(
     struct PerfettoDsImpl*,
     PerfettoDsOnCreateCustomState);
-
 PERFETTO_SDK_EXPORT void PerfettoDsSetOnDeleteIncr(
     struct PerfettoDsImpl*,
     PerfettoDsOnDeleteCustomState);
diff --git a/include/perfetto/public/data_source.h b/include/perfetto/public/data_source.h
index ccb0470..23fba92 100644
--- a/include/perfetto/public/data_source.h
+++ b/include/perfetto/public/data_source.h
@@ -42,8 +42,11 @@
   { &perfetto_atomic_false, PERFETTO_NULL }
 
 // All the callbacks are optional and can be NULL if not needed.
+//
 struct PerfettoDsParams {
-  // Instance lifecycle callbacks:
+  // Instance lifecycle callbacks.
+  //
+  // Can be called from any thread.
   PerfettoDsOnSetupCb on_setup_cb;
   PerfettoDsOnStartCb on_start_cb;
   PerfettoDsOnStopCb on_stop_cb;
@@ -52,12 +55,18 @@
 
   // These are called to create/delete custom thread-local instance state, which
   // can be accessed with PerfettoDsTracerImplGetCustomTls().
+  //
+  // Called from inside a trace point. Trace points inside these will be
+  // ignored.
   PerfettoDsOnCreateCustomState on_create_tls_cb;
   PerfettoDsOnDeleteCustomState on_delete_tls_cb;
 
   // These are called to create/delete custom thread-local instance incremental
   // state. Incremental state may be cleared periodically by the tracing service
   // and can be accessed with PerfettoDsTracerImplGetIncrementalState().
+  //
+  // Called from inside a trace point. Trace points inside these will be
+  // ignored.
   PerfettoDsOnCreateCustomState on_create_incr_cb;
   PerfettoDsOnDeleteCustomState on_delete_incr_cb;
 
diff --git a/include/perfetto/trace_processor/trace_blob.h b/include/perfetto/trace_processor/trace_blob.h
index 87f327f..2223792 100644
--- a/include/perfetto/trace_processor/trace_blob.h
+++ b/include/perfetto/trace_processor/trace_blob.h
@@ -23,21 +23,15 @@
 #include <memory>
 #include <utility>
 
-#include "perfetto/base/build_config.h"
 #include "perfetto/base/export.h"
-#include "perfetto/base/logging.h"
 #include "perfetto/trace_processor/ref_counted.h"
 
-// TODO(primiano): implement file mmap on Windows.
-#if PERFETTO_BUILDFLAG(PERFETTO_OS_LINUX) || \
-    PERFETTO_BUILDFLAG(PERFETTO_OS_APPLE) || \
-    PERFETTO_BUILDFLAG(PERFETTO_OS_ANDROID)
-#define TRACE_PROCESSOR_HAS_MMAP() 1
-#else
-#define TRACE_PROCESSOR_HAS_MMAP() 0
-#endif
-
 namespace perfetto {
+
+namespace base {
+class ScopedMmap;
+}
+
 namespace trace_processor {
 
 // TraceBlob is a move-only buffer that owns a portion of memory containing
@@ -58,14 +52,16 @@
   static TraceBlob Allocate(size_t size);
   static TraceBlob CopyFrom(const void*, size_t size);
   static TraceBlob TakeOwnership(std::unique_ptr<uint8_t[]>, size_t size);
+  static TraceBlob FromMmap(base::ScopedMmap);
 
+  // DEPRECATED: does not work on Windows.
   // Takes ownership of the mmap region. Will call munmap() on destruction.
   static TraceBlob FromMmap(void* data, size_t size);
 
   ~TraceBlob();
 
   // Allow move.
-  TraceBlob(TraceBlob&& other) noexcept { *this = std::move(other); }
+  TraceBlob(TraceBlob&& other) noexcept;
   TraceBlob& operator=(TraceBlob&&) noexcept;
 
   // Disallow copy.
@@ -76,14 +72,14 @@
   size_t size() const { return size_; }
 
  private:
-  enum class Ownership { kNull = 0, kHeapBuf, kMmaped };
+  enum class Ownership { kNullOrMmaped = 0, kHeapBuf };
 
-  TraceBlob(Ownership ownership, uint8_t* data, size_t size)
-      : ownership_(ownership), data_(data), size_(size) {}
+  TraceBlob(Ownership ownership, uint8_t* data, size_t size);
 
-  Ownership ownership_ = Ownership::kNull;
+  Ownership ownership_ = Ownership::kNullOrMmaped;
   uint8_t* data_ = nullptr;
   size_t size_ = 0;
+  std::unique_ptr<base::ScopedMmap> mapping_;
 };
 
 }  // namespace trace_processor
diff --git a/include/perfetto/tracing/data_source.h b/include/perfetto/tracing/data_source.h
index 68f6fd9..8fec450 100644
--- a/include/perfetto/tracing/data_source.h
+++ b/include/perfetto/tracing/data_source.h
@@ -92,6 +92,8 @@
   // OnSetup() is invoked when tracing is configured. In most cases this happens
   // just before starting the trace. In the case of deferred start (see
   // deferred_start in trace_config.proto) start might happen later.
+  //
+  // Can be called from any thread.
   class SetupArgs {
    public:
     // This is valid only within the scope of the OnSetup() call and must not
@@ -111,6 +113,9 @@
     // The index of this data source instance (0..kMaxDataSourceInstances - 1).
     uint32_t internal_instance_index = 0;
   };
+  // Invoked after tracing is actually started.
+  //
+  // Can be called from any thread.
   virtual void OnStart(const StartArgs&);
 
   class PERFETTO_EXPORT_COMPONENT StopArgs {
@@ -140,6 +145,11 @@
     // The index of this data source instance (0..kMaxDataSourceInstances - 1).
     uint32_t internal_instance_index = 0;
   };
+  // Invoked before tracing is stopped.
+  //
+  // Can be called from any thread. Blocking this for too long it's not a good
+  // idea and can cause deadlocks. Use HandleAsynchronously() to postpone
+  // disabling the data source instance.
   virtual void OnStop(const StopArgs&);
 
   class ClearIncrementalStateArgs {
@@ -147,6 +157,10 @@
     // The index of this data source instance (0..kMaxDataSourceInstances - 1).
     uint32_t internal_instance_index = 0;
   };
+  // Invoked before marking the thread local per-instance incremental state
+  // outdated.
+  //
+  // Can be called from any thread.
   virtual void WillClearIncrementalState(const ClearIncrementalStateArgs&);
 
   class FlushArgs {
@@ -168,6 +182,10 @@
   // Called when the tracing service requests a Flush. Users can override this
   // to tell other threads to flush their TraceContext for this data source
   // (the library cannot execute code on all the threads on its own).
+  //
+  // Can be called from any thread. Blocking this for too long it's not a good
+  // idea and can cause deadlocks. Use HandleAsynchronously() to postpone
+  // sending the flush acknowledgement to the service.
   virtual void OnFlush(const FlushArgs&);
 
   // Determines whether a startup session can be adopted by a service-initiated
diff --git a/infra/git_mirror_bot/mirror_aosp_to_ghub_repo.py b/infra/git_mirror_bot/mirror_aosp_to_ghub_repo.py
index 8a3e061..341a06c 100644
--- a/infra/git_mirror_bot/mirror_aosp_to_ghub_repo.py
+++ b/infra/git_mirror_bot/mirror_aosp_to_ghub_repo.py
@@ -42,7 +42,10 @@
 POLL_PERIOD_SEC = 60
 
 # The actual key is stored into the Google Cloud project metadata.
-ENV = {'GIT_SSH_COMMAND': 'ssh -i ' + os.path.join(CUR_DIR, 'deploy_key')}
+ENV = {
+    'GIT_SSH_COMMAND': 'ssh -i ' + os.path.join(CUR_DIR, 'deploy_key'),
+    'GIT_DIR': WORKDIR,
+}
 
 
 def GitCmd(*args, **kwargs):
@@ -94,14 +97,14 @@
   for line in all_refs.splitlines():
     ref_sha1, ref = line.split()
 
-    FILTER_REGEX = r'(heads/main|heads/releases/.*|tags/v\d+\.\d+)$'
-    m = re.match('refs/' + FILTER_REGEX, ref)
+    FILT_REGEX = r'(heads/main|heads/master|heads/releases/.*|tags/v\d+\.\d+)$'
+    m = re.match('refs/' + FILT_REGEX, ref)
     if m is not None:
       branch = m.group(1)
       current_heads['refs/' + branch] = ref_sha1
       continue
 
-    m = re.match('refs/remotes/upstream/' + FILTER_REGEX, ref)
+    m = re.match('refs/remotes/upstream/' + FILT_REGEX, ref)
     if m is not None:
       branch = m.group(1)
       future_heads['refs/' + branch] = ref_sha1
diff --git a/infra/ui.perfetto.dev/cloudbuild.yaml b/infra/ui.perfetto.dev/cloudbuild.yaml
index 6cfca1b..b8905f6 100644
--- a/infra/ui.perfetto.dev/cloudbuild.yaml
+++ b/infra/ui.perfetto.dev/cloudbuild.yaml
@@ -3,6 +3,11 @@
 - name: gcr.io/$PROJECT_ID/perfetto-ui-builder
   args:
   - 'ui/release/builder_entrypoint.sh'
+  - ''
+  # The extra arg above is load baring. The builder_entrypoint.sh
+  # script can't handle $1 sometimes being defined (as in
+  # cloudbuild.yaml) and sometimes not.
+
 # Timeout = 30 min (last measured time in Feb 2021: 12 min)
 timeout: 1800s
 options:
diff --git a/infra/ui.perfetto.dev/cloudbuild_release.yaml b/infra/ui.perfetto.dev/cloudbuild_release.yaml
new file mode 100644
index 0000000..bcf6995
--- /dev/null
+++ b/infra/ui.perfetto.dev/cloudbuild_release.yaml
@@ -0,0 +1,10 @@
+# See go/perfetto-ui-autopush for docs on how this works end-to-end.
+steps:
+- name: gcr.io/$PROJECT_ID/perfetto-ui-builder
+  args:
+  - 'ui/release/builder_entrypoint.sh'
+  - $BRANCH_NAME
+# Timeout = 30 min (last measured time in Feb 2021: 12 min)
+timeout: 1800s
+options:
+  machineType: E2_HIGHCPU_32
diff --git a/perfetto.rc b/perfetto.rc
index 18432dc..3f1801d 100644
--- a/perfetto.rc
+++ b/perfetto.rc
@@ -156,3 +156,31 @@
     setprop debug.hwui.skia_use_perfetto_track_events false
     setprop debug.renderengine.skia_tracing_enabled false
     setprop debug.renderengine.skia_use_perfetto_track_events false
+
+##############################################################################
+#  perfetto_persistent_sysui_tracing_for_bugreport - Keeps a persistent active
+#  tracing session, for debugging purposes
+##############################################################################
+
+on property:persist.debug.perfetto.persistent_sysui_tracing_for_bugreport=1 && property:persist.traced.enable=1 && property:ro.debuggable=1
+    # Set by traced after listen()ing on the consumer socket. Without this,
+    # perfetto could try to connect to traced before traced is ready to listen.
+    wait_for_prop sys.trace.traced_started 1
+    start perfetto_persistent_sysui_tracing_for_bugreport
+
+# If something disables traced, we don't want init to respawn the service (that will fail) continuously.
+on property:persist.traced.enable=0
+    stop perfetto_persistent_sysui_tracing_for_bugreport
+
+on property:persist.debug.perfetto.persistent_sysui_tracing_for_bugreport=0
+    setprop persist.debug.perfetto.persistent_sysui_tracing_for_bugreport ""
+
+on property:persist.debug.perfetto.persistent_sysui_tracing_for_bugreport=""
+    stop perfetto_persistent_sysui_tracing_for_bugreport
+
+service perfetto_persistent_sysui_tracing_for_bugreport /system/bin/perfetto -c /system/etc/perfetto/persistent_cfg.pbtxt --txt -o /dev/null
+    disabled
+    timeout_period 86400
+    restart_period 60
+    user nobody
+    group nobody
diff --git a/persistent_cfg.pbtxt b/persistent_cfg.pbtxt
new file mode 100644
index 0000000..7d802c0
--- /dev/null
+++ b/persistent_cfg.pbtxt
@@ -0,0 +1,59 @@
+# Persistent tracing configuration. Only enabled on some devices for debugging
+# purposes when the property persist.debug.perfetto.persistent is set to 1.
+
+bugreport_score: 5
+bugreport_filename: "sysui.pftrace"
+unique_session_name: "sysui_persistent"
+flush_timeout_ms: 10000
+compression_type: COMPRESSION_TYPE_DEFLATE
+
+# Buffer 0: for android.surfaceflinger.transactions
+buffers {
+  size_kb: 1024
+  fill_policy: RING_BUFFER
+}
+
+# Buffer 1: for android.surfaceflinger.layers
+buffers {
+  size_kb: 409600
+  fill_policy: RING_BUFFER
+  transfer_on_clone: true
+  clear_before_clone: true
+}
+
+# Buffer 2: for com.android.wm.shell.transition
+buffers {
+  size_kb: 32
+  fill_policy: RING_BUFFER
+}
+
+
+data_sources: {
+  config {
+    name: "android.surfaceflinger.transactions"
+    target_buffer: 0
+    surfaceflinger_transactions_config: {
+      mode: MODE_ACTIVE
+    }
+  }
+}
+
+data_sources: {
+  config {
+    name: "android.surfaceflinger.layers"
+    target_buffer: 1
+    surfaceflinger_layers_config: {
+        mode: MODE_GENERATED_BUGREPORT_ONLY
+        trace_flags: TRACE_FLAG_INPUT
+        trace_flags: TRACE_FLAG_BUFFERS
+    }
+  }
+}
+
+data_sources: {
+  config {
+    name: "com.android.wm.shell.transition"
+    target_buffer: 2
+  }
+}
+
diff --git a/protos/perfetto/config/ftrace/ftrace_config.proto b/protos/perfetto/config/ftrace/ftrace_config.proto
index 4d59f61..95db90e 100644
--- a/protos/perfetto/config/ftrace/ftrace_config.proto
+++ b/protos/perfetto/config/ftrace/ftrace_config.proto
@@ -143,9 +143,10 @@
   // Introduced in: Android T.
   optional bool disable_generic_events = 16;
 
-  // The list of syscalls that should be recorded by sys_{enter,exit} ftrace
-  // events. When empty, all syscalls are recorded. If neither sys_{enter,exit}
-  // are enabled, this setting has no effect. Example: ["sys_read", "sys_open"].
+  // The subset of syscalls to record. Enables raw_syscalls/sys_{enter,exit}.
+  // To record all syscalls, leave this unset and add raw_syscalls to
+  // |ftrace_events|.
+  // Example: ["sys_read", "sys_open"].
   // Introduced in: Android U.
   repeated string syscall_events = 18;
 
diff --git a/protos/perfetto/config/perfetto_config.proto b/protos/perfetto/config/perfetto_config.proto
index f158730..b2a240a 100644
--- a/protos/perfetto/config/perfetto_config.proto
+++ b/protos/perfetto/config/perfetto_config.proto
@@ -870,9 +870,10 @@
   // Introduced in: Android T.
   optional bool disable_generic_events = 16;
 
-  // The list of syscalls that should be recorded by sys_{enter,exit} ftrace
-  // events. When empty, all syscalls are recorded. If neither sys_{enter,exit}
-  // are enabled, this setting has no effect. Example: ["sys_read", "sys_open"].
+  // The subset of syscalls to record. Enables raw_syscalls/sys_{enter,exit}.
+  // To record all syscalls, leave this unset and add raw_syscalls to
+  // |ftrace_events|.
+  // Example: ["sys_read", "sys_open"].
   // Introduced in: Android U.
   repeated string syscall_events = 18;
 
@@ -1102,7 +1103,6 @@
 
     DISABLE_ON_DEMAND = 2;
   }
-
   repeated Quirks quirks = 1;
 
   // If enabled all processes will be scanned and dumped when the trace starts.
@@ -1116,14 +1116,9 @@
   // /proc/pid/status and oom_score_adj every X ms.
   // It will also sample /proc/pid/smaps_rollup if scan_smaps_rollup = true.
   // This is required to be > 100ms to avoid excessive CPU usage.
-  // TODO(primiano): add CPU cost for change this value.
   optional uint32 proc_stats_poll_ms = 4;
 
-  // If empty samples stats for all processes. If non empty samples stats only
-  // for processes matching the given string in their argv0 (i.e. the first
-  // entry of /proc/pid/cmdline).
-  // TODO(primiano): implement this feature.
-  // repeated string proc_stats_filter = 5;
+  // id 5 never used
 
   // This is required to be either = 0 or a multiple of |proc_stats_poll_ms|
   // (default: |proc_stats_poll_ms|). If = 0, will be set to
@@ -1131,22 +1126,30 @@
   // multiple.
   optional uint32 proc_stats_cache_ttl_ms = 6;
 
-  // DEPRECATED record_thread_time_in_state
-  reserved 7;
-
-  // DEPRECATED thread_time_in_state_cache_size
-  reserved 8;
-
-  // If true this will resolve filedescriptors for each process so these
-  // can be mapped to their actual device or file.
-  // Requires raw_syscalls/sys_exit ftrace events to be enabled or
+  // Niche feature: If true this will resolve file descriptors for each process
+  // so these can be mapped to their actual device or file.
+  // Requires raw_syscalls/sys_{enter,exit} ftrace events to be enabled or
   // new fds opened after initially scanning a process will not be
   // recognized.
   optional bool resolve_process_fds = 9;
 
-  // If enabled memory stats from /proc/pid/smaps_rollup will be included
-  // in process stats.
+  // If true, output will include memory stats from /proc/pid/smaps_rollup.
   optional bool scan_smaps_rollup = 10;
+
+  // If true: process descriptions will include process age (starttime in
+  // /proc/pid/stat).
+  // Introduced in: perfetto v44.
+  optional bool record_process_age = 11;
+
+  // If true and |proc_stats_poll_ms| is true, process stats will include time
+  // spent running in user/kernel mode (utime/stime in /proc/pid/stat).
+  // Introduced in: perfetto v44.
+  optional bool record_process_runtime = 12;
+
+  // record_thread_time_in_state
+  reserved 7;
+  // thread_time_in_state_cache_size
+  reserved 8;
 }
 
 // End of protos/perfetto/config/process_stats/process_stats_config.proto
diff --git a/protos/perfetto/config/process_stats/process_stats_config.proto b/protos/perfetto/config/process_stats/process_stats_config.proto
index 239513f..a1c960a 100644
--- a/protos/perfetto/config/process_stats/process_stats_config.proto
+++ b/protos/perfetto/config/process_stats/process_stats_config.proto
@@ -29,7 +29,6 @@
 
     DISABLE_ON_DEMAND = 2;
   }
-
   repeated Quirks quirks = 1;
 
   // If enabled all processes will be scanned and dumped when the trace starts.
@@ -43,14 +42,9 @@
   // /proc/pid/status and oom_score_adj every X ms.
   // It will also sample /proc/pid/smaps_rollup if scan_smaps_rollup = true.
   // This is required to be > 100ms to avoid excessive CPU usage.
-  // TODO(primiano): add CPU cost for change this value.
   optional uint32 proc_stats_poll_ms = 4;
 
-  // If empty samples stats for all processes. If non empty samples stats only
-  // for processes matching the given string in their argv0 (i.e. the first
-  // entry of /proc/pid/cmdline).
-  // TODO(primiano): implement this feature.
-  // repeated string proc_stats_filter = 5;
+  // id 5 never used
 
   // This is required to be either = 0 or a multiple of |proc_stats_poll_ms|
   // (default: |proc_stats_poll_ms|). If = 0, will be set to
@@ -58,20 +52,28 @@
   // multiple.
   optional uint32 proc_stats_cache_ttl_ms = 6;
 
-  // DEPRECATED record_thread_time_in_state
-  reserved 7;
-
-  // DEPRECATED thread_time_in_state_cache_size
-  reserved 8;
-
-  // If true this will resolve filedescriptors for each process so these
-  // can be mapped to their actual device or file.
-  // Requires raw_syscalls/sys_exit ftrace events to be enabled or
+  // Niche feature: If true this will resolve file descriptors for each process
+  // so these can be mapped to their actual device or file.
+  // Requires raw_syscalls/sys_{enter,exit} ftrace events to be enabled or
   // new fds opened after initially scanning a process will not be
   // recognized.
   optional bool resolve_process_fds = 9;
 
-  // If enabled memory stats from /proc/pid/smaps_rollup will be included
-  // in process stats.
+  // If true, output will include memory stats from /proc/pid/smaps_rollup.
   optional bool scan_smaps_rollup = 10;
+
+  // If true: process descriptions will include process age (starttime in
+  // /proc/pid/stat).
+  // Introduced in: perfetto v44.
+  optional bool record_process_age = 11;
+
+  // If true and |proc_stats_poll_ms| is true, process stats will include time
+  // spent running in user/kernel mode (utime/stime in /proc/pid/stat).
+  // Introduced in: perfetto v44.
+  optional bool record_process_runtime = 12;
+
+  // record_thread_time_in_state
+  reserved 7;
+  // thread_time_in_state_cache_size
+  reserved 8;
 }
diff --git a/protos/perfetto/metrics/android/BUILD.gn b/protos/perfetto/metrics/android/BUILD.gn
index 9caeaef..33f4195 100644
--- a/protos/perfetto/metrics/android/BUILD.gn
+++ b/protos/perfetto/metrics/android/BUILD.gn
@@ -26,6 +26,7 @@
     "android_boot.proto",
     "android_boot_unagg.proto",
     "android_frame_timeline_metric.proto",
+    "android_garbage_collection_unagg_metric.proto",
     "android_sysui_notifications_blocking_calls_metric.proto",
     "android_trusty_workqueues.proto",
     "anr_metric.proto",
diff --git a/protos/perfetto/metrics/android/android_boot_unagg.proto b/protos/perfetto/metrics/android/android_boot_unagg.proto
index 12bf12b..c209840 100644
--- a/protos/perfetto/metrics/android/android_boot_unagg.proto
+++ b/protos/perfetto/metrics/android/android_boot_unagg.proto
@@ -19,7 +19,11 @@
 package perfetto.protos;
 
 import "protos/perfetto/metrics/android/app_process_starts_metric.proto";
+import "protos/perfetto/metrics/android/android_garbage_collection_unagg_metric.proto";
 
 message AndroidBootUnagg {
+  // Looks at all apps started after boot complete broadcast.
   optional AndroidAppProcessStartsMetric android_app_process_start_metric = 1;
+  // Looks at all GC that occurs after boot complete broadcast.
+  optional AndroidGarbageCollectionUnaggMetric android_post_boot_gc_metric = 2;
 }
diff --git a/protos/perfetto/metrics/android/android_garbage_collection_unagg_metric.proto b/protos/perfetto/metrics/android/android_garbage_collection_unagg_metric.proto
new file mode 100644
index 0000000..075b9a0
--- /dev/null
+++ b/protos/perfetto/metrics/android/android_garbage_collection_unagg_metric.proto
@@ -0,0 +1,63 @@
+/*
+ * Copyright (C) 2024 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License")= ;
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+syntax = "proto2";
+
+package perfetto.protos;
+
+message AndroidGarbageCollectionUnaggMetric {
+  message GarbageCollectionEvent {
+    // Name of thread running garbage collection.
+    optional string thread_name = 1;
+    // Name of process running garbage collection.
+    optional string process_name = 2;
+    // Type of garbage collection.
+    optional string gc_type = 3;
+    // Whether gargage collection is mark compact or copying.
+    optional int64 is_mark_compact = 4;
+    // MB reclaimed after garbage collection.
+    optional double reclaimed_mb = 5;
+    // Minimum heap size in MB during garbage collection.
+    optional double min_heap_mb = 6;
+    // Maximum heap size in MB during garbage collection.
+    optional double max_heap_mb = 7;
+    // Memory reclaimed per ms of running time.
+    optional double mb_per_ms_of_running_gc = 8;
+    // Memory reclaimed per ms of wall time.
+    optional double mb_per_ms_of_wall_gc = 9;
+    // Garbage collection wall duration.
+    optional int64 gc_dur = 10;
+    // Garbage collection duration spent executing on CPU.
+    optional int64 gc_running_dur = 11;
+    // Garbage collection duration spent waiting for CPU.
+    optional int64 gc_runnable_dur = 12;
+    // Garbage collection duration spent waiting in the Linux kernel on IO.
+    optional int64 gc_unint_io_dur = 13;
+    // Garbage collection duration spent waiting in the Linux kernel without IO.
+    optional int64 gc_unint_non_io_dur = 14;
+    // Garbage collection duration spent waiting in interruptible sleep.
+    optional int64 gc_int_dur = 15;
+    // ts of the event in trace.
+    optional int64 gc_ts = 16;
+    // pid of the event in trace.
+    optional int64 pid = 17;
+    // tid of the event in trace.
+    optional int64 tid = 18;
+    // monotonic duration of event.
+    optional int64 gc_monotonic_dur = 19;
+  }
+  repeated GarbageCollectionEvent gc_events = 1;
+}
\ No newline at end of file
diff --git a/protos/perfetto/metrics/android/app_process_starts_metric.proto b/protos/perfetto/metrics/android/app_process_starts_metric.proto
index dafce94..e8457df 100644
--- a/protos/perfetto/metrics/android/app_process_starts_metric.proto
+++ b/protos/perfetto/metrics/android/app_process_starts_metric.proto
@@ -18,7 +18,6 @@
 
 package perfetto.protos;
 
-// Looks at all apps started after boot complete broadcast.
 message AndroidAppProcessStartsMetric {
   // Next id : 4
   message ProcessStart {
diff --git a/protos/perfetto/metrics/metrics.proto b/protos/perfetto/metrics/metrics.proto
index 8bd4d38..5130b29 100644
--- a/protos/perfetto/metrics/metrics.proto
+++ b/protos/perfetto/metrics/metrics.proto
@@ -21,6 +21,7 @@
 import "protos/perfetto/metrics/android/ad_services_metric.proto";
 import "protos/perfetto/metrics/android/android_boot.proto";
 import "protos/perfetto/metrics/android/android_boot_unagg.proto";
+import "protos/perfetto/metrics/android/android_garbage_collection_unagg_metric.proto";
 import "protos/perfetto/metrics/android/sysui_notif_shade_list_builder_metric.proto";
 import "protos/perfetto/metrics/android/sysui_update_notif_on_ui_mode_changed_metric.proto";
 import "protos/perfetto/metrics/android/android_frame_timeline_metric.proto";
@@ -286,6 +287,9 @@
   // Android boot unaggregated metrics.
   optional AndroidBootUnagg android_boot_unagg = 62;
 
+  // Android garbage collection metrics
+  optional AndroidGarbageCollectionUnaggMetric android_garbage_collection_unagg = 63;
+
   // Demo extensions.
   extensions 450 to 499;
 
diff --git a/protos/perfetto/metrics/perfetto_merged_metrics.proto b/protos/perfetto/metrics/perfetto_merged_metrics.proto
index 58f9731..eb9ab74 100644
--- a/protos/perfetto/metrics/perfetto_merged_metrics.proto
+++ b/protos/perfetto/metrics/perfetto_merged_metrics.proto
@@ -193,9 +193,55 @@
 
 // End of protos/perfetto/metrics/android/android_boot.proto
 
+// Begin of protos/perfetto/metrics/android/android_garbage_collection_unagg_metric.proto
+
+message AndroidGarbageCollectionUnaggMetric {
+  message GarbageCollectionEvent {
+    // Name of thread running garbage collection.
+    optional string thread_name = 1;
+    // Name of process running garbage collection.
+    optional string process_name = 2;
+    // Type of garbage collection.
+    optional string gc_type = 3;
+    // Whether gargage collection is mark compact or copying.
+    optional int64 is_mark_compact = 4;
+    // MB reclaimed after garbage collection.
+    optional double reclaimed_mb = 5;
+    // Minimum heap size in MB during garbage collection.
+    optional double min_heap_mb = 6;
+    // Maximum heap size in MB during garbage collection.
+    optional double max_heap_mb = 7;
+    // Memory reclaimed per ms of running time.
+    optional double mb_per_ms_of_running_gc = 8;
+    // Memory reclaimed per ms of wall time.
+    optional double mb_per_ms_of_wall_gc = 9;
+    // Garbage collection wall duration.
+    optional int64 gc_dur = 10;
+    // Garbage collection duration spent executing on CPU.
+    optional int64 gc_running_dur = 11;
+    // Garbage collection duration spent waiting for CPU.
+    optional int64 gc_runnable_dur = 12;
+    // Garbage collection duration spent waiting in the Linux kernel on IO.
+    optional int64 gc_unint_io_dur = 13;
+    // Garbage collection duration spent waiting in the Linux kernel without IO.
+    optional int64 gc_unint_non_io_dur = 14;
+    // Garbage collection duration spent waiting in interruptible sleep.
+    optional int64 gc_int_dur = 15;
+    // ts of the event in trace.
+    optional int64 gc_ts = 16;
+    // pid of the event in trace.
+    optional int64 pid = 17;
+    // tid of the event in trace.
+    optional int64 tid = 18;
+    // monotonic duration of event.
+    optional int64 gc_monotonic_dur = 19;
+  }
+  repeated GarbageCollectionEvent gc_events = 1;
+}
+// End of protos/perfetto/metrics/android/android_garbage_collection_unagg_metric.proto
+
 // Begin of protos/perfetto/metrics/android/app_process_starts_metric.proto
 
-// Looks at all apps started after boot complete broadcast.
 message AndroidAppProcessStartsMetric {
   // Next id : 4
   message ProcessStart {
@@ -223,7 +269,10 @@
 // Begin of protos/perfetto/metrics/android/android_boot_unagg.proto
 
 message AndroidBootUnagg {
+  // Looks at all apps started after boot complete broadcast.
   optional AndroidAppProcessStartsMetric android_app_process_start_metric = 1;
+  // Looks at all GC that occurs after boot complete broadcast.
+  optional AndroidGarbageCollectionUnaggMetric android_post_boot_gc_metric = 2;
 }
 
 // End of protos/perfetto/metrics/android/android_boot_unagg.proto
@@ -2642,6 +2691,9 @@
   // Android boot unaggregated metrics.
   optional AndroidBootUnagg android_boot_unagg = 62;
 
+  // Android garbage collection metrics
+  optional AndroidGarbageCollectionUnaggMetric android_garbage_collection_unagg = 63;
+
   // Demo extensions.
   extensions 450 to 499;
 
diff --git a/protos/perfetto/trace/perfetto_trace.proto b/protos/perfetto/trace/perfetto_trace.proto
index 490a9e8..dd4ecff 100644
--- a/protos/perfetto/trace/perfetto_trace.proto
+++ b/protos/perfetto/trace/perfetto_trace.proto
@@ -870,9 +870,10 @@
   // Introduced in: Android T.
   optional bool disable_generic_events = 16;
 
-  // The list of syscalls that should be recorded by sys_{enter,exit} ftrace
-  // events. When empty, all syscalls are recorded. If neither sys_{enter,exit}
-  // are enabled, this setting has no effect. Example: ["sys_read", "sys_open"].
+  // The subset of syscalls to record. Enables raw_syscalls/sys_{enter,exit}.
+  // To record all syscalls, leave this unset and add raw_syscalls to
+  // |ftrace_events|.
+  // Example: ["sys_read", "sys_open"].
   // Introduced in: Android U.
   repeated string syscall_events = 18;
 
@@ -1102,7 +1103,6 @@
 
     DISABLE_ON_DEMAND = 2;
   }
-
   repeated Quirks quirks = 1;
 
   // If enabled all processes will be scanned and dumped when the trace starts.
@@ -1116,14 +1116,9 @@
   // /proc/pid/status and oom_score_adj every X ms.
   // It will also sample /proc/pid/smaps_rollup if scan_smaps_rollup = true.
   // This is required to be > 100ms to avoid excessive CPU usage.
-  // TODO(primiano): add CPU cost for change this value.
   optional uint32 proc_stats_poll_ms = 4;
 
-  // If empty samples stats for all processes. If non empty samples stats only
-  // for processes matching the given string in their argv0 (i.e. the first
-  // entry of /proc/pid/cmdline).
-  // TODO(primiano): implement this feature.
-  // repeated string proc_stats_filter = 5;
+  // id 5 never used
 
   // This is required to be either = 0 or a multiple of |proc_stats_poll_ms|
   // (default: |proc_stats_poll_ms|). If = 0, will be set to
@@ -1131,22 +1126,30 @@
   // multiple.
   optional uint32 proc_stats_cache_ttl_ms = 6;
 
-  // DEPRECATED record_thread_time_in_state
-  reserved 7;
-
-  // DEPRECATED thread_time_in_state_cache_size
-  reserved 8;
-
-  // If true this will resolve filedescriptors for each process so these
-  // can be mapped to their actual device or file.
-  // Requires raw_syscalls/sys_exit ftrace events to be enabled or
+  // Niche feature: If true this will resolve file descriptors for each process
+  // so these can be mapped to their actual device or file.
+  // Requires raw_syscalls/sys_{enter,exit} ftrace events to be enabled or
   // new fds opened after initially scanning a process will not be
   // recognized.
   optional bool resolve_process_fds = 9;
 
-  // If enabled memory stats from /proc/pid/smaps_rollup will be included
-  // in process stats.
+  // If true, output will include memory stats from /proc/pid/smaps_rollup.
   optional bool scan_smaps_rollup = 10;
+
+  // If true: process descriptions will include process age (starttime in
+  // /proc/pid/stat).
+  // Introduced in: perfetto v44.
+  optional bool record_process_age = 11;
+
+  // If true and |proc_stats_poll_ms| is true, process stats will include time
+  // spent running in user/kernel mode (utime/stime in /proc/pid/stat).
+  // Introduced in: perfetto v44.
+  optional bool record_process_runtime = 12;
+
+  // record_thread_time_in_state
+  reserved 7;
+  // thread_time_in_state_cache_size
+  reserved 8;
 }
 
 // End of protos/perfetto/config/process_stats/process_stats_config.proto
@@ -13430,7 +13433,7 @@
 // dedicated message (as opposite to be fields in process_tree.proto) because
 // they are dumped at a different rate than cmdline and thread list.
 // Note: not all of these stats will be present in every ProcessStats message
-// and sometimes processes may be missing . This is because counters are
+// and sometimes processes may be missing. This is because counters are
 // cached to reduce emission of counters which do not change.
 message ProcessStats {
   // Per-thread periodically sampled stats.
@@ -13457,6 +13460,8 @@
   message Process {
     optional int32 pid = 1;
 
+    repeated Thread threads = 11;
+
     // See /proc/[pid]/status in `man 5 proc` for a description of these fields.
     optional uint64 vm_size_kb = 2;
     optional uint64 vm_rss_kb = 3;
@@ -13471,8 +13476,6 @@
 
     optional int64 oom_score_adj = 10;
 
-    repeated Thread threads = 11;
-
     // The peak resident set size is resettable in newer Posix kernels.
     // This field specifies if reset is supported and if the writer had reset
     // the peaks after each process stats recording.
@@ -13492,6 +13495,16 @@
     optional uint64 smr_pss_anon_kb = 18;
     optional uint64 smr_pss_file_kb = 19;
     optional uint64 smr_pss_shmem_kb = 20;
+
+    // Time spent scheduled in user mode in nanoseconds. Parsed from utime in
+    // /proc/pid/stat. Recorded if record_process_runtime config option is set.
+    // Resolution of "clock ticks", usually 10ms.
+    optional uint64 runtime_user_mode = 21;
+
+    // Time spent scheduled in kernel mode in nanoseconds. Parsed from stime in
+    // /proc/pid/stat. Recorded if record_process_runtime config option is set.
+    // Resolution of "clock ticks", usually 10ms.
+    optional uint64 runtime_kernel_mode = 22;
   }
   repeated Process processes = 1;
 
@@ -13542,17 +13555,22 @@
     // and it will contain /proc/pid/comm.
     repeated string cmdline = 3;
 
-    // No longer used as of Apr 2018, when the dedicated |threads| field was
-    // introduced in ProcessTree.
-    repeated Thread threads_deprecated = 4 [deprecated = true];
-
     // The uid for the process, as per /proc/pid/status.
     optional int32 uid = 5;
 
     // The non-root-level process IDs if the process runs in a PID namespace.
-    // Read from the NSpid entry of /proc/<pid>/status, with the first element (
-    // root-level process ID) omitted.
+    // Read from the NSpid entry of /proc/<pid>/status, with the first element
+    // (root-level process ID) omitted.
     repeated int32 nspid = 6;
+
+    // Timestamp of when the process was created, in nanoseconds
+    // from boot. Parsed from starttime in /proc/pid/stat.
+    // Recorded if record_process_age config option is set.
+    // Resolution of "clock ticks", usually 10ms.
+    optional uint64 process_start_from_boot = 7;
+
+    // threads_deprecated
+    reserved 4;
   }
 
   // List of processes and threads in the client. These lists are incremental
diff --git a/protos/perfetto/trace/ps/process_stats.proto b/protos/perfetto/trace/ps/process_stats.proto
index 03759b4..5175625 100644
--- a/protos/perfetto/trace/ps/process_stats.proto
+++ b/protos/perfetto/trace/ps/process_stats.proto
@@ -21,7 +21,7 @@
 // dedicated message (as opposite to be fields in process_tree.proto) because
 // they are dumped at a different rate than cmdline and thread list.
 // Note: not all of these stats will be present in every ProcessStats message
-// and sometimes processes may be missing . This is because counters are
+// and sometimes processes may be missing. This is because counters are
 // cached to reduce emission of counters which do not change.
 message ProcessStats {
   // Per-thread periodically sampled stats.
@@ -48,6 +48,8 @@
   message Process {
     optional int32 pid = 1;
 
+    repeated Thread threads = 11;
+
     // See /proc/[pid]/status in `man 5 proc` for a description of these fields.
     optional uint64 vm_size_kb = 2;
     optional uint64 vm_rss_kb = 3;
@@ -62,8 +64,6 @@
 
     optional int64 oom_score_adj = 10;
 
-    repeated Thread threads = 11;
-
     // The peak resident set size is resettable in newer Posix kernels.
     // This field specifies if reset is supported and if the writer had reset
     // the peaks after each process stats recording.
@@ -83,6 +83,16 @@
     optional uint64 smr_pss_anon_kb = 18;
     optional uint64 smr_pss_file_kb = 19;
     optional uint64 smr_pss_shmem_kb = 20;
+
+    // Time spent scheduled in user mode in nanoseconds. Parsed from utime in
+    // /proc/pid/stat. Recorded if record_process_runtime config option is set.
+    // Resolution of "clock ticks", usually 10ms.
+    optional uint64 runtime_user_mode = 21;
+
+    // Time spent scheduled in kernel mode in nanoseconds. Parsed from stime in
+    // /proc/pid/stat. Recorded if record_process_runtime config option is set.
+    // Resolution of "clock ticks", usually 10ms.
+    optional uint64 runtime_kernel_mode = 22;
   }
   repeated Process processes = 1;
 
diff --git a/protos/perfetto/trace/ps/process_tree.proto b/protos/perfetto/trace/ps/process_tree.proto
index 0784923..0b9f04c 100644
--- a/protos/perfetto/trace/ps/process_tree.proto
+++ b/protos/perfetto/trace/ps/process_tree.proto
@@ -54,17 +54,22 @@
     // and it will contain /proc/pid/comm.
     repeated string cmdline = 3;
 
-    // No longer used as of Apr 2018, when the dedicated |threads| field was
-    // introduced in ProcessTree.
-    repeated Thread threads_deprecated = 4 [deprecated = true];
-
     // The uid for the process, as per /proc/pid/status.
     optional int32 uid = 5;
 
     // The non-root-level process IDs if the process runs in a PID namespace.
-    // Read from the NSpid entry of /proc/<pid>/status, with the first element (
-    // root-level process ID) omitted.
+    // Read from the NSpid entry of /proc/<pid>/status, with the first element
+    // (root-level process ID) omitted.
     repeated int32 nspid = 6;
+
+    // Timestamp of when the process was created, in nanoseconds
+    // from boot. Parsed from starttime in /proc/pid/stat.
+    // Recorded if record_process_age config option is set.
+    // Resolution of "clock ticks", usually 10ms.
+    optional uint64 process_start_from_boot = 7;
+
+    // threads_deprecated
+    reserved 4;
   }
 
   // List of processes and threads in the client. These lists are incremental
diff --git a/python/BUILD b/python/BUILD
index 4700d19..3b65af7 100644
--- a/python/BUILD
+++ b/python/BUILD
@@ -94,6 +94,17 @@
 # GN target: //python:trace_processor_py
 perfetto_py_library(
     name = "trace_processor_py",
+    visibility = [
+        "//visibility:public",
+    ],
+    deps = [
+        ":trace_processor_py_no_resolvers",
+    ] + PERFETTO_CONFIG.deps.tp_resolvers_py,
+)
+
+# GN target: //python:trace_processor_py_no_resolvers
+perfetto_py_library(
+    name = "trace_processor_py_no_resolvers",
     srcs = [
         "perfetto/trace_processor/__init__.py",
         "perfetto/trace_processor/api.py",
diff --git a/python/BUILD.gn b/python/BUILD.gn
index 8c42d14..baaa01b 100644
--- a/python/BUILD.gn
+++ b/python/BUILD.gn
@@ -47,7 +47,7 @@
   main = "example.py"
 }
 
-perfetto_py_library("trace_processor_py") {
+perfetto_py_library("trace_processor_py_no_resolvers") {
   sources = [
     "perfetto/trace_processor/__init__.py",
     "perfetto/trace_processor/api.py",
@@ -69,6 +69,13 @@
   ]
 }
 
+perfetto_py_library("trace_processor_py") {
+  deps = [
+    ":trace_processor_py_no_resolvers",
+    "../gn:tp_resolvers_py",
+  ]
+}
+
 perfetto_py_library("trace_uri_resolver") {
   sources = [
     "perfetto/trace_uri_resolver/__init__.py",
diff --git a/python/generators/sql_processing/utils.py b/python/generators/sql_processing/utils.py
index ba14110..539115e 100644
--- a/python/generators/sql_processing/utils.py
+++ b/python/generators/sql_processing/utils.py
@@ -116,7 +116,8 @@
 
 # Allows for nonstandard object names.
 OBJECT_NAME_ALLOWLIST = {
-    'slices/with_context.sql': ['process_slice', 'thread_slice']
+    'slices/with_context.sql': ['process_slice', 'thread_slice'],
+    'slices/cpu_time.sql': ['thread_slice_cpu_time']
 }
 
 # Given a regex pattern and a string to match against, returns all the
diff --git a/python/generators/trace_processor_table/serialize.py b/python/generators/trace_processor_table/serialize.py
index 2881ce0..2096f55 100644
--- a/python/generators/trace_processor_table/serialize.py
+++ b/python/generators/trace_processor_table/serialize.py
@@ -591,7 +591,7 @@
       if (overlays()[i].row_map().IsIndexVector()) {{
         overlay_layers[i].reset(new column::ArrangementOverlay(
             overlays()[i].row_map().GetIfIndexVector(),
-            Indices::State::kNonmonotonic));
+            column::DataLayerChain::Indices::State::kNonmonotonic));
       }} else if (overlays()[i].row_map().IsBitVector()) {{
         overlay_layers[i].reset(new column::SelectorOverlay(
             overlays()[i].row_map().GetIfBitVector()));
diff --git a/python/perfetto/prebuilts/manifests/tracebox.py b/python/perfetto/prebuilts/manifests/tracebox.py
index 04b3de4..44e8aed 100755
--- a/python/perfetto/prebuilts/manifests/tracebox.py
+++ b/python/perfetto/prebuilts/manifests/tracebox.py
@@ -1,15 +1,15 @@
-# This file has been generated by: tools/roll-prebuilts v41.0
+# This file has been generated by: tools/roll-prebuilts v43.1
 TRACEBOX_MANIFEST = [{
     'arch':
         'mac-amd64',
     'file_name':
         'tracebox',
     'file_size':
-        1515224,
+        1564728,
     'url':
-        'https://commondatastorage.googleapis.com/perfetto-luci-artifacts/v41.0/mac-amd64/tracebox',
+        'https://commondatastorage.googleapis.com/perfetto-luci-artifacts/v43.1/mac-amd64/tracebox',
     'sha256':
-        '5a2cd4a6cce0430c85ca6e648c4058679019ebee01377400af2f12dcb7aecacf',
+        'dde1f657b10376f3fd684d1ce4302fd12c0479b567689f5dace8647375edd08c',
     'platform':
         'darwin',
     'machine': ['x86_64']
@@ -19,11 +19,11 @@
     'file_name':
         'tracebox',
     'file_size':
-        1392792,
+        1459160,
     'url':
-        'https://commondatastorage.googleapis.com/perfetto-luci-artifacts/v41.0/mac-arm64/tracebox',
+        'https://commondatastorage.googleapis.com/perfetto-luci-artifacts/v43.1/mac-arm64/tracebox',
     'sha256':
-        '232e13cc957204079e3e5d89c4c9d84c7e689679e1d7b3f722fabecbfd61b9e6',
+        '349fc531090e134d708bfe2c44330c2f08280aa424f4e9f6d139897c1ad14da3',
     'platform':
         'darwin',
     'machine': ['arm64']
@@ -33,11 +33,11 @@
     'file_name':
         'tracebox',
     'file_size':
-        2241912,
+        2314424,
     'url':
-        'https://commondatastorage.googleapis.com/perfetto-luci-artifacts/v41.0/linux-amd64/tracebox',
+        'https://commondatastorage.googleapis.com/perfetto-luci-artifacts/v43.1/linux-amd64/tracebox',
     'sha256':
-        '679b634ef3b95a6f4e751198a8fe4943e513d568b91782a181662548fda011b0',
+        'e35fd880f483ab26d57d292a7c4d1c9df6393bff7f1e7694e7d3642472c8fff9',
     'platform':
         'linux',
     'machine': ['x86_64']
@@ -47,11 +47,11 @@
     'file_name':
         'tracebox',
     'file_size':
-        1349220,
+        1418968,
     'url':
-        'https://commondatastorage.googleapis.com/perfetto-luci-artifacts/v41.0/linux-arm/tracebox',
+        'https://commondatastorage.googleapis.com/perfetto-luci-artifacts/v43.1/linux-arm/tracebox',
     'sha256':
-        'af00007bb0419748a79ea3dd1c2572ba78f0791e9861c5cf2b72ecea75c74032',
+        '7e550ab781f79fcf548f37a7cc3aaa50dbab235b53c445829815d987eb162843',
     'platform':
         'linux',
     'machine': ['armv6l', 'armv7l', 'armv8l']
@@ -61,11 +61,11 @@
     'file_name':
         'tracebox',
     'file_size':
-        2168944,
+        2221176,
     'url':
-        'https://commondatastorage.googleapis.com/perfetto-luci-artifacts/v41.0/linux-arm64/tracebox',
+        'https://commondatastorage.googleapis.com/perfetto-luci-artifacts/v43.1/linux-arm64/tracebox',
     'sha256':
-        'beeaef4a0b1927c1144ae4b66c81660a74ee2e27e70d875503a33f42c711ca14',
+        '355f2c6e66467a9e81855aa34a16fbe8cd68f01089ec0f5e3074f2011328a97f',
     'platform':
         'linux',
     'machine': ['aarch64']
@@ -75,42 +75,42 @@
     'file_name':
         'tracebox',
     'file_size':
-        1247188,
+        1304280,
     'url':
-        'https://commondatastorage.googleapis.com/perfetto-luci-artifacts/v41.0/android-arm/tracebox',
+        'https://commondatastorage.googleapis.com/perfetto-luci-artifacts/v43.1/android-arm/tracebox',
     'sha256':
-        '93efbb520d1faf71fd99319d778c9d187c4ab06d25d1d2e38806c19724ab2012'
+        'b1c31ea2c07b519c40732416ecf91d8dbe0c04355150598c5ca2434249669a92'
 }, {
     'arch':
         'android-arm64',
     'file_name':
         'tracebox',
     'file_size':
-        1886888,
+        2076144,
     'url':
-        'https://commondatastorage.googleapis.com/perfetto-luci-artifacts/v41.0/android-arm64/tracebox',
+        'https://commondatastorage.googleapis.com/perfetto-luci-artifacts/v43.1/android-arm64/tracebox',
     'sha256':
-        '75d4d4114a1e19b66c94bc315d5948751923adec364bae7d0db4125e980e3109'
+        '28d7476c048123b6d73e1af4f5054dffdc87b67163980454761433bf49626848'
 }, {
     'arch':
         'android-x86',
     'file_name':
         'tracebox',
     'file_size':
-        1869740,
+        2253568,
     'url':
-        'https://commondatastorage.googleapis.com/perfetto-luci-artifacts/v41.0/android-x86/tracebox',
+        'https://commondatastorage.googleapis.com/perfetto-luci-artifacts/v43.1/android-x86/tracebox',
     'sha256':
-        '9518b5083c70c7b3f4b37718786b0362a99745eaa4640256000601c52966344b'
+        '22e61978317ac4ef2934768d9e65bee2b1c7a332bdada5b4c1525d6b0339d4ac'
 }, {
     'arch':
         'android-x64',
     'file_name':
         'tracebox',
     'file_size':
-        2149032,
+        2101752,
     'url':
-        'https://commondatastorage.googleapis.com/perfetto-luci-artifacts/v41.0/android-x64/tracebox',
+        'https://commondatastorage.googleapis.com/perfetto-luci-artifacts/v43.1/android-x64/tracebox',
     'sha256':
-        '93d4d42fe36bce640e2337d27d7aa58cf75b42d659a84dcf900e724f6f9c974c'
+        '6c74f75555dc7bb31e54debe9fc27fc4db960d2382862d1a8a9d0cf03f7d8300'
 }]
diff --git a/python/perfetto/prebuilts/manifests/traceconv.py b/python/perfetto/prebuilts/manifests/traceconv.py
index b1901b7..00cae29 100755
--- a/python/perfetto/prebuilts/manifests/traceconv.py
+++ b/python/perfetto/prebuilts/manifests/traceconv.py
@@ -1,15 +1,15 @@
-# This file has been generated by: tools/roll-prebuilts v41.0
+# This file has been generated by: tools/roll-prebuilts v43.1
 TRACECONV_MANIFEST = [{
     'arch':
         'mac-amd64',
     'file_name':
         'traceconv',
     'file_size':
-        9381704,
+        7790424,
     'url':
-        'https://commondatastorage.googleapis.com/perfetto-luci-artifacts/v41.0/mac-amd64/traceconv',
+        'https://commondatastorage.googleapis.com/perfetto-luci-artifacts/v43.1/mac-amd64/traceconv',
     'sha256':
-        'e5678d6e3eebeb6feecb9693f924c708c02ba78bd0ce0a427d1dd7acd2b37120',
+        '88007b64828e835e0326c11f66f0bba7d8ab117562963086a4f19d8cb060204d',
     'platform':
         'darwin',
     'machine': ['x86_64']
@@ -19,11 +19,11 @@
     'file_name':
         'traceconv',
     'file_size':
-        7976744,
+        7264824,
     'url':
-        'https://commondatastorage.googleapis.com/perfetto-luci-artifacts/v41.0/mac-arm64/traceconv',
+        'https://commondatastorage.googleapis.com/perfetto-luci-artifacts/v43.1/mac-arm64/traceconv',
     'sha256':
-        'a199463232d3e8e37502d955a2bd712b1ab431c0ee1903d667bdf97b7345083c',
+        'be5769279ef8442e80130e4bdb6a0a6aa11305442207ea18ff2cf38b21a71a57',
     'platform':
         'darwin',
     'machine': ['arm64']
@@ -33,11 +33,11 @@
     'file_name':
         'traceconv',
     'file_size':
-        9127632,
+        7885952,
     'url':
-        'https://commondatastorage.googleapis.com/perfetto-luci-artifacts/v41.0/linux-amd64/traceconv',
+        'https://commondatastorage.googleapis.com/perfetto-luci-artifacts/v43.1/linux-amd64/traceconv',
     'sha256':
-        '3c8e7b3cef528684d42f8a550cf38643f73f6ea82f6686f88f5b6af4d4e7bbc6',
+        '51cfdf5060bcd87d08402620d88d0243f7bb39f2878906614d53fa3ddd78dd92',
     'platform':
         'linux',
     'machine': ['x86_64']
@@ -47,11 +47,11 @@
     'file_name':
         'traceconv',
     'file_size':
-        6961192,
+        5919372,
     'url':
-        'https://commondatastorage.googleapis.com/perfetto-luci-artifacts/v41.0/linux-arm/traceconv',
+        'https://commondatastorage.googleapis.com/perfetto-luci-artifacts/v43.1/linux-arm/traceconv',
     'sha256':
-        '9aea9075fdda92a326bc15a2a70bd818036588fce9e201d66cbdb16fac72b83a',
+        '04300b1c4dcec1e01bc23017dab3b406f9f0ffd7dd9ea3723784aa8730762bc9',
     'platform':
         'linux',
     'machine': ['armv6l', 'armv7l', 'armv8l']
@@ -61,11 +61,11 @@
     'file_name':
         'traceconv',
     'file_size':
-        8595032,
+        7588200,
     'url':
-        'https://commondatastorage.googleapis.com/perfetto-luci-artifacts/v41.0/linux-arm64/traceconv',
+        'https://commondatastorage.googleapis.com/perfetto-luci-artifacts/v43.1/linux-arm64/traceconv',
     'sha256':
-        '586dac8bdfc3e2c5fe65b132947300abf6b31c66d873e5bd66a87531730f2ff1',
+        'd3edc1cd7b216e18955135e0e9e767cdd7b1b8b7efa64793aa6b923a6c278d68',
     'platform':
         'linux',
     'machine': ['aarch64']
@@ -75,55 +75,55 @@
     'file_name':
         'traceconv',
     'file_size':
-        6575880,
+        5931120,
     'url':
-        'https://commondatastorage.googleapis.com/perfetto-luci-artifacts/v41.0/android-arm/traceconv',
+        'https://commondatastorage.googleapis.com/perfetto-luci-artifacts/v43.1/android-arm/traceconv',
     'sha256':
-        '22af179fabec5b14d21753702670eee432af65a1244725ee31f0f3b960e2363d'
+        '8c3cb3dc96aa6ca296876b8ed56f8eed8c33e12e756b178360cc145263130e7e'
 }, {
     'arch':
         'android-arm64',
     'file_name':
         'traceconv',
     'file_size':
-        7906536,
+        7546224,
     'url':
-        'https://commondatastorage.googleapis.com/perfetto-luci-artifacts/v41.0/android-arm64/traceconv',
+        'https://commondatastorage.googleapis.com/perfetto-luci-artifacts/v43.1/android-arm64/traceconv',
     'sha256':
-        'fedc807169b33370a5aae778ff001d08b079ed06ed0e846eeb251e5479c8de4f'
+        '180cfd2184d601c8f202b6bcd899cc7f63a8bb384505c1a2c3e889dfbe8bdb6d'
 }, {
     'arch':
         'android-x86',
     'file_name':
         'traceconv',
     'file_size':
-        8771276,
+        8176528,
     'url':
-        'https://commondatastorage.googleapis.com/perfetto-luci-artifacts/v41.0/android-x86/traceconv',
+        'https://commondatastorage.googleapis.com/perfetto-luci-artifacts/v43.1/android-x86/traceconv',
     'sha256':
-        'b6d65ff5b4aaeab5c99b31c2ad766bf0253a57e4b700a53f192db1b97cab7b71'
+        'a4e8ff19daa58726138aa66f5adae74b609fceee403c8cddbaaf46d6d07e4cc8'
 }, {
     'arch':
         'android-x64',
     'file_name':
         'traceconv',
     'file_size':
-        8922400,
+        7767560,
     'url':
-        'https://commondatastorage.googleapis.com/perfetto-luci-artifacts/v41.0/android-x64/traceconv',
+        'https://commondatastorage.googleapis.com/perfetto-luci-artifacts/v43.1/android-x64/traceconv',
     'sha256':
-        '098ccf88ffcfb109b7527ab7eb9309710a9098ccecdbc69d87f7b01fa1ed59bc'
+        '19626b87f8c8d956d3807d24faf5764c6bca289f55732cae2f6753dbec33e7f7'
 }, {
     'arch':
         'windows-amd64',
     'file_name':
         'traceconv.exe',
     'file_size':
-        8405504,
+        7645696,
     'url':
-        'https://commondatastorage.googleapis.com/perfetto-luci-artifacts/v41.0/windows-amd64/traceconv.exe',
+        'https://commondatastorage.googleapis.com/perfetto-luci-artifacts/v43.1/windows-amd64/traceconv.exe',
     'sha256':
-        '65a864f0e61595cef693aaf7b081e22f5d471e20c35bf461c511a884167c839f',
+        '24eb5322f22c0219694789fa04aaa5ad09b0746f8b993fd1713e6b3f7943708a',
     'platform':
         'win32',
     'machine': ['amd64']
diff --git a/python/perfetto/trace_processor/metrics.descriptor b/python/perfetto/trace_processor/metrics.descriptor
index 9783b76..5c04c0a 100644
--- a/python/perfetto/trace_processor/metrics.descriptor
+++ b/python/perfetto/trace_processor/metrics.descriptor
Binary files differ
diff --git a/src/base/file_utils.cc b/src/base/file_utils.cc
index db0105c..53e7657 100644
--- a/src/base/file_utils.cc
+++ b/src/base/file_utils.cc
@@ -408,7 +408,7 @@
 #endif
 }
 
-std::optional<size_t> GetFileSize(const std::string& file_path) {
+std::optional<uint64_t> GetFileSize(const std::string& file_path) {
 #if PERFETTO_BUILDFLAG(PERFETTO_OS_WIN)
   // This does not use base::OpenFile to avoid getting an exclusive lock.
   HANDLE file =
@@ -419,9 +419,10 @@
   }
   LARGE_INTEGER file_size;
   file_size.QuadPart = 0;
-  std::optional<size_t> res;
+  std::optional<uint64_t> res;
   if (GetFileSizeEx(file, &file_size)) {
-    res = static_cast<size_t>(file_size.QuadPart);
+    static_assert(sizeof(decltype(file_size.QuadPart)) <= sizeof(uint64_t));
+    res = static_cast<uint64_t>(file_size.QuadPart);
   }
   CloseHandle(file);
   return res;
@@ -434,7 +435,8 @@
   if (fstat(*fd, &buf) == -1) {
     return std::nullopt;
   }
-  return static_cast<size_t>(buf.st_size);
+  static_assert(sizeof(decltype(buf.st_size)) <= sizeof(uint64_t));
+  return static_cast<uint64_t>(buf.st_size);
 #endif
 }
 
diff --git a/src/base/scoped_mmap.cc b/src/base/scoped_mmap.cc
index c12d33b..4bc97b8 100644
--- a/src/base/scoped_mmap.cc
+++ b/src/base/scoped_mmap.cc
@@ -92,6 +92,9 @@
 }
 
 ScopedMmap& ScopedMmap::operator=(ScopedMmap&& other) noexcept {
+  if (this == &other) {
+    return *this;
+  }
   reset();
   std::swap(ptr_, other.ptr_);
   std::swap(length_, other.length_);
@@ -161,6 +164,18 @@
   return ret;
 }
 
+#if PERFETTO_BUILDFLAG(PERFETTO_OS_LINUX) ||   \
+    PERFETTO_BUILDFLAG(PERFETTO_OS_ANDROID) || \
+    PERFETTO_BUILDFLAG(PERFETTO_OS_APPLE)
+// static
+ScopedMmap ScopedMmap::InheritMmappedRange(void* data, size_t size) {
+  ScopedMmap ret;
+  ret.ptr_ = data;
+  ret.length_ = size;
+  return ret;
+}
+#endif
+
 ScopedMmap ReadMmapFilePart(const char* fname, size_t length) {
   return ScopedMmap::FromHandle(OpenFileForMmap(fname), length);
 }
diff --git a/src/base/scoped_mmap_unittest.cc b/src/base/scoped_mmap_unittest.cc
index 2c6b004..ca56bbd 100644
--- a/src/base/scoped_mmap_unittest.cc
+++ b/src/base/scoped_mmap_unittest.cc
@@ -16,6 +16,14 @@
 
 #include "perfetto/ext/base/scoped_mmap.h"
 
+#if PERFETTO_BUILDFLAG(PERFETTO_OS_LINUX) ||   \
+    PERFETTO_BUILDFLAG(PERFETTO_OS_ANDROID) || \
+    PERFETTO_BUILDFLAG(PERFETTO_OS_APPLE)
+#include <sys/mman.h>
+#include <unistd.h>
+#endif
+
+#include "perfetto/ext/base/file_utils.h"
 #include "src/base/test/tmp_dir_tree.h"
 #include "test/gtest_and_gmock.h"
 
@@ -83,5 +91,25 @@
   EXPECT_FALSE(mapped.IsValid());
 }
 
+#if PERFETTO_BUILDFLAG(PERFETTO_OS_LINUX) ||   \
+    PERFETTO_BUILDFLAG(PERFETTO_OS_ANDROID) || \
+    PERFETTO_BUILDFLAG(PERFETTO_OS_APPLE)
+TEST_F(ScopedMmapTest, InheritMmappedRange) {
+  base::TmpDirTree tmp;
+  tmp.AddFile("f1.txt", "ccccc");
+  ScopedPlatformHandle file(
+      base::OpenFile(tmp.AbsolutePath("f1.txt").c_str(), O_RDONLY));
+  void* ptr = mmap(nullptr, 5, PROT_READ, MAP_PRIVATE, *file, 0);
+  ASSERT_NE(ptr, MAP_FAILED);
+
+  ScopedMmap mapped = ScopedMmap::InheritMmappedRange(ptr, 5);
+  file.reset();
+
+  ASSERT_TRUE(mapped.IsValid());
+  ASSERT_EQ(mapped.length(), 5u);
+  EXPECT_EQ(*static_cast<char*>(mapped.data()), 'c');
+}
+#endif
+
 }  // namespace
 }  // namespace perfetto::base
diff --git a/src/profiling/symbolizer/BUILD.gn b/src/profiling/symbolizer/BUILD.gn
index 722d1cf..2eb2c93 100644
--- a/src/profiling/symbolizer/BUILD.gn
+++ b/src/profiling/symbolizer/BUILD.gn
@@ -49,7 +49,7 @@
       "../../../include/perfetto/trace_processor:trace_processor",
       "../../../protos/perfetto/trace:zero",
       "../../../protos/perfetto/trace/profiling:zero",
-      "../../trace_processor/util:stack_traces_util",
+      "../../trace_processor/util:build_id",
     ]
     sources = [
       "symbolize_database.cc",
diff --git a/src/profiling/symbolizer/local_symbolizer.cc b/src/profiling/symbolizer/local_symbolizer.cc
index eee5076..5efb347 100644
--- a/src/profiling/symbolizer/local_symbolizer.cc
+++ b/src/profiling/symbolizer/local_symbolizer.cc
@@ -19,6 +19,7 @@
 #include <fcntl.h>
 
 #include <cinttypes>
+#include <limits>
 #include <memory>
 #include <optional>
 #include <sstream>
@@ -352,18 +353,22 @@
     return std::nullopt;
   }
   // Openfile opens the file with an exclusive lock on windows.
-  std::optional<size_t> size = base::GetFileSize(symbol_file);
-  if (!size.has_value()) {
+  std::optional<uint64_t> file_size = base::GetFileSize(symbol_file);
+  if (!file_size.has_value()) {
     PERFETTO_PLOG("Failed to get file size %s", symbol_file.c_str());
     return std::nullopt;
   }
 
-  if (*size == 0) {
+  static_assert(sizeof(size_t) <= sizeof(uint64_t));
+  size_t size = static_cast<size_t>(
+      std::min<uint64_t>(std::numeric_limits<size_t>::max(), *file_size));
+
+  if (size == 0) {
     return std::nullopt;
   }
 
   std::optional<BuildIdAndLoadBias> build_id_and_load_bias =
-      GetBuildIdAndLoadBias(symbol_file.c_str(), *size);
+      GetBuildIdAndLoadBias(symbol_file.c_str(), size);
   if (!build_id_and_load_bias)
     return std::nullopt;
   if (build_id_and_load_bias->build_id != build_id) {
diff --git a/src/profiling/symbolizer/symbolize_database.cc b/src/profiling/symbolizer/symbolize_database.cc
index 224874b..a008e1a 100644
--- a/src/profiling/symbolizer/symbolize_database.cc
+++ b/src/profiling/symbolizer/symbolize_database.cc
@@ -28,8 +28,7 @@
 #include "protos/perfetto/trace/profiling/profile_common.pbzero.h"
 #include "protos/perfetto/trace/trace.pbzero.h"
 #include "protos/perfetto/trace/trace_packet.pbzero.h"
-
-#include "src/trace_processor/util/stack_traces_util.h"
+#include "src/trace_processor/util/build_id.h"
 
 namespace perfetto {
 namespace profiling {
@@ -56,32 +55,6 @@
   }
 };
 
-std::string FromHex(const char* str, size_t size) {
-  if (size % 2) {
-    PERFETTO_DFATAL_OR_ELOG("Failed to parse hex %s", str);
-    return "";
-  }
-  std::string result(size / 2, '\0');
-  for (size_t i = 0; i < size; i += 2) {
-    char hex_byte[3];
-    hex_byte[0] = str[i];
-    hex_byte[1] = str[i + 1];
-    hex_byte[2] = '\0';
-    char* end;
-    long int byte = strtol(hex_byte, &end, 16);
-    if (*end != '\0') {
-      PERFETTO_DFATAL_OR_ELOG("Failed to parse hex %s", str);
-      return "";
-    }
-    result[i / 2] = static_cast<char>(byte);
-  }
-  return result;
-}
-
-std::string FromHex(const std::string& str) {
-  return FromHex(str.c_str(), str.size());
-}
-
 std::map<UnsymbolizedMapping, std::vector<uint64_t>> GetUnsymbolizedFrames(
     trace_processor::TraceProcessor* tp) {
   std::map<UnsymbolizedMapping, std::vector<uint64_t>> res;
@@ -89,17 +62,10 @@
   while (it.Next()) {
     int64_t load_bias = it.Get(3).AsLong();
     PERFETTO_CHECK(load_bias >= 0);
-    std::string build_id;
-    // TODO(b/148109467): Remove workaround once all active Chrome versions
-    // write raw bytes instead of a string as build_id.
-    std::string raw_build_id = it.Get(1).AsString();
-    if (!trace_processor::util::IsHexModuleId(base::StringView(raw_build_id))) {
-      build_id = FromHex(raw_build_id);
-    } else {
-      build_id = raw_build_id;
-    }
-    UnsymbolizedMapping unsymbolized_mapping{it.Get(0).AsString(), build_id,
-                                             static_cast<uint64_t>(load_bias)};
+    trace_processor::BuildId build_id =
+        trace_processor::BuildId::FromHex(it.Get(1).AsString());
+    UnsymbolizedMapping unsymbolized_mapping{
+        it.Get(0).AsString(), build_id.raw(), static_cast<uint64_t>(load_bias)};
     int64_t rel_pc = it.Get(2).AsLong();
     res[unsymbolized_mapping].emplace_back(rel_pc);
   }
diff --git a/src/trace_processor/BUILD.gn b/src/trace_processor/BUILD.gn
index 694b515..e37c9c6 100644
--- a/src/trace_processor/BUILD.gn
+++ b/src/trace_processor/BUILD.gn
@@ -132,7 +132,6 @@
     "util:descriptors",
     "util:gzip",
     "util:proto_to_args_parser",
-    "util:stack_traces_util",
   ]
   public_deps = [ "../../include/perfetto/trace_processor:storage" ]
 }
@@ -240,6 +239,7 @@
   sources = [
     "forwarding_trace_parser_unittest.cc",
     "ref_counted_unittest.cc",
+    "trace_blob_unittest.cc",
   ]
   deps = [
     ":storage_minimal",
diff --git a/src/trace_processor/containers/BUILD.gn b/src/trace_processor/containers/BUILD.gn
index 08cddfc..43e9603 100644
--- a/src/trace_processor/containers/BUILD.gn
+++ b/src/trace_processor/containers/BUILD.gn
@@ -22,7 +22,6 @@
 perfetto_component("containers") {
   public = [
     "bit_vector.h",
-    "bit_vector_iterators.h",
     "null_term_string_view.h",
     "row_map.h",
     "row_map_algorithms.h",
@@ -30,7 +29,6 @@
   ]
   sources = [
     "bit_vector.cc",
-    "bit_vector_iterators.cc",
     "row_map.cc",
     "string_pool.cc",
   ]
@@ -66,6 +64,7 @@
       ":containers",
       "../../../gn:benchmark",
       "../../../gn:default_deps",
+      "../../base",
     ]
     sources = [
       "bit_vector_benchmark.cc",
diff --git a/src/trace_processor/containers/bit_vector.cc b/src/trace_processor/containers/bit_vector.cc
index 43a5279..75b31d0 100644
--- a/src/trace_processor/containers/bit_vector.cc
+++ b/src/trace_processor/containers/bit_vector.cc
@@ -16,17 +16,27 @@
 
 #include "src/trace_processor/containers/bit_vector.h"
 
+#include <algorithm>
+#include <cstddef>
+#include <cstdint>
+#include <cstring>
+#include <initializer_list>
 #include <limits>
+#include <utility>
+#include <vector>
+
+#include "perfetto/base/build_config.h"
+#include "perfetto/base/compiler.h"
+#include "perfetto/base/logging.h"
+#include "perfetto/public/compiler.h"
 
 #include "protos/perfetto/trace_processor/serialization.pbzero.h"
-#include "src/trace_processor/containers/bit_vector_iterators.h"
 
 #if PERFETTO_BUILDFLAG(PERFETTO_X64_CPU_OPT)
 #include <immintrin.h>
 #endif
 
-namespace perfetto {
-namespace trace_processor {
+namespace perfetto::trace_processor {
 namespace {
 
 // This function implements the PDEP instruction in x64 as a loop.
@@ -35,7 +45,7 @@
 // Unfortunately, as we're emulating this in software, it scales with the number
 // of set bits in |mask| rather than being a constant time instruction:
 // therefore, this should be avoided where real instructions are available.
-uint64_t PdepSlow(uint64_t word, uint64_t mask) {
+PERFETTO_ALWAYS_INLINE uint64_t PdepSlow(uint64_t word, uint64_t mask) {
   if (word == 0 || mask == std::numeric_limits<uint64_t>::max())
     return word;
 
@@ -53,7 +63,7 @@
 }
 
 // See |PdepSlow| for information on PDEP.
-uint64_t Pdep(uint64_t word, uint64_t mask) {
+PERFETTO_ALWAYS_INLINE uint64_t Pdep(uint64_t word, uint64_t mask) {
 #if PERFETTO_BUILDFLAG(PERFETTO_X64_CPU_OPT)
   base::ignore_result(PdepSlow);
   return _pdep_u64(word, mask);
@@ -62,6 +72,52 @@
 #endif
 }
 
+// This function implements the PEXT instruction in x64 as a loop.
+// See https://www.felixcloutier.com/x86/pext for details on what PEXT does.
+//
+// Unfortunately, as we're emulating this in software, it scales with the number
+// of set bits in |mask| rather than being a constant time instruction:
+// therefore, this should be avoided where real instructions are available.
+PERFETTO_ALWAYS_INLINE uint64_t PextSlow(uint64_t word, uint64_t mask) {
+  if (word == 0 || mask == std::numeric_limits<uint64_t>::max())
+    return word;
+
+  // This algorithm is for calculating PEXT was found to be the fastest "simple"
+  // one among those tested when writing this function.
+  uint64_t result = 0;
+  for (uint64_t bb = 1; mask; bb += bb) {
+    // MSVC doesn't like -mask so work around this by doing 0 - mask.
+    if (word & mask & (0ull - mask)) {
+      result |= bb;
+    }
+    mask &= mask - 1;
+  }
+  return result;
+}
+
+// See |PextSlow| for information on PEXT.
+PERFETTO_ALWAYS_INLINE uint64_t Pext(uint64_t word, uint64_t mask) {
+#if PERFETTO_BUILDFLAG(PERFETTO_X64_CPU_OPT)
+  base::ignore_result(PextSlow);
+  return _pext_u64(word, mask);
+#else
+  return PextSlow(word, mask);
+#endif
+}
+
+// This function implements the tzcnt instruction.
+// See https://www.felixcloutier.com/x86/tzcnt for details on what tzcnt does.
+PERFETTO_ALWAYS_INLINE uint32_t Tzcnt(uint64_t value) {
+#if PERFETTO_BUILDFLAG(PERFETTO_X64_CPU_OPT)
+  return static_cast<uint32_t>(_tzcnt_u64(value));
+#elif defined(__GNUC__) || defined(__clang__)
+  return value ? static_cast<uint32_t>(__builtin_ctzll(value)) : 64u;
+#else
+  unsigned long out;
+  return _BitScanForward64(&out, value) ? static_cast<uint32_t>(out) : 64u;
+#endif
+}
+
 }  // namespace
 
 BitVector::BitVector() = default;
@@ -103,7 +159,7 @@
 
   // Compute the address of the new last bit in the bitvector.
   Address last_addr = IndexToAddress(new_size - 1);
-  uint32_t old_blocks_size = static_cast<uint32_t>(counts_.size());
+  auto old_blocks_size = static_cast<uint32_t>(counts_.size());
   uint32_t new_blocks_size = last_addr.block_idx + 1;
 
   // Resize the block and count vectors to have the correct number of entries.
@@ -158,22 +214,23 @@
 }
 
 BitVector BitVector::Copy() const {
-  return BitVector(words_, counts_, size_);
-}
-
-BitVector::AllBitsIterator BitVector::IterateAllBits() const {
-  return AllBitsIterator(this);
-}
-
-BitVector::SetBitsIterator BitVector::IterateSetBits() const {
-  return SetBitsIterator(this);
+  return {words_, counts_, size_};
 }
 
 void BitVector::Not() {
-  for (uint32_t i = 0; i < words_.size(); ++i) {
-    BitWord(&words_[i]).Not();
+  if (size_ == 0) {
+    return;
   }
 
+  for (uint64_t& word : words_) {
+    BitWord(&word).Not();
+  }
+
+  // Make sure to reset the last block's trailing bits to zero to preserve the
+  // invariant of BitVector.
+  Address last_addr = IndexToAddress(size_ - 1);
+  BlockFromIndex(last_addr.block_idx).ClearAfter(last_addr.block_offset);
+
   for (uint32_t i = 1; i < counts_.size(); ++i) {
     counts_[i] = kBitsInBlock * i - counts_[i];
   }
@@ -184,11 +241,7 @@
   for (uint32_t i = 0; i < words_.size(); ++i) {
     BitWord(&words_[i]).Or(sec.words_[i]);
   }
-
-  for (uint32_t i = 1; i < counts_.size(); ++i) {
-    counts_[i] = counts_[i - 1] +
-                 ConstBlock(&words_[Block::kWords * (i - 1)]).CountSetBits();
-  }
+  UpdateCounts(words_, counts_);
 }
 
 void BitVector::And(const BitVector& sec) {
@@ -196,11 +249,7 @@
   for (uint32_t i = 0; i < words_.size(); ++i) {
     BitWord(&words_[i]).And(sec.words_[i]);
   }
-
-  for (uint32_t i = 1; i < counts_.size(); ++i) {
-    counts_[i] = counts_[i - 1] +
-                 ConstBlock(&words_[Block::kWords * (i - 1)]).CountSetBits();
-  }
+  UpdateCounts(words_, counts_);
 }
 
 void BitVector::UpdateSetBits(const BitVector& update) {
@@ -238,7 +287,7 @@
     if (PERFETTO_UNLIKELY(current == 0))
       continue;
 
-    uint8_t popcount = static_cast<uint8_t>(PERFETTO_POPCOUNT(current));
+    auto popcount = static_cast<uint8_t>(PERFETTO_POPCOUNT(current));
     PERFETTO_DCHECK(popcount >= 1);
 
     // Check if we have enough unused bits from the previous iteration - if so,
@@ -288,26 +337,90 @@
   PERFETTO_DCHECK(update_unused_bits == 0);
   PERFETTO_DCHECK(update_ptr == update_ptr_end);
 
-  for (uint32_t i = 0; i < counts_.size() - 1; ++i) {
-    counts_[i + 1] = counts_[i] + ConstBlockFromIndex(i).CountSetBits();
-  }
+  UpdateCounts(words_, counts_);
 
   // After the loop, we should have precisely the same number of bits
   // set as |update|.
   PERFETTO_DCHECK(update.CountSetBits() == CountSetBits());
 }
 
+void BitVector::SelectBits(const BitVector& mask_bv) {
+  // Verify the precondition on the function: the algorithm relies on this
+  // being the case.
+  PERFETTO_DCHECK(size() <= mask_bv.size());
+
+  // Get the set bits in the mask up to the end of |this|: this will precisely
+  // equal the number of bits in |this| at the end of this function.
+  uint32_t set_bits_in_mask = mask_bv.CountSetBits(size());
+
+  const uint64_t* cur_word = words_.data();
+  const uint64_t* end_word = words_.data() + WordCount(size());
+  const uint64_t* cur_mask = mask_bv.words_.data();
+
+  // Used to track the number of bits already set (i.e. by a previous loop
+  // iteration) in |out_word|.
+  uint32_t out_word_bits = 0;
+  uint64_t* out_word = words_.data();
+  for (; cur_word != end_word; ++cur_word, ++cur_mask) {
+    // Loop invariant: we should always have out_word and out_word_bits set
+    // such that there is room for at least one more bit.
+    PERFETTO_DCHECK(out_word_bits < 64);
+
+    // The crux of this function: efficient parallel extract all bits in |this|
+    // which correspond to set bit positions in |this|.
+    uint64_t ext = Pext(*cur_word, *cur_mask);
+
+    // If there are no bits in |out_word| from a previous iteration, set it to
+    // |ext|. Otherwise, concat the newly added bits to the top of the existing
+    // bits.
+    *out_word = out_word_bits == 0 ? ext : *out_word | (ext << out_word_bits);
+
+    // Update the number of bits used in |out_word| by adding the number of set
+    // bit positions in |mask|.
+    auto popcount = static_cast<uint32_t>(PERFETTO_POPCOUNT(*cur_mask));
+    out_word_bits += popcount;
+
+    // The below is a branch-free way to increment |out_word| pointer when we've
+    // packed 64 bits into it.
+    bool spillover = out_word_bits > 64;
+    out_word += out_word_bits >= 64;
+    out_word_bits %= 64;
+
+    // If there was any "spillover" bits (i.e. bits which did not fit in the
+    // previous word), add them into the new out_word. Important: we *must* not
+    // change out_word if there was no spillover as |out_word| could be pointing
+    // to |data + 1| which needs to be preserved for the next loop iteration.
+    *out_word = spillover ? ext >> (popcount - out_word_bits) : *out_word;
+  }
+
+  // Loop post-condition: we must have written as many words as is required
+  // to store |set_bits_in_mask|.
+  PERFETTO_DCHECK(static_cast<uint32_t>(out_word - words_.data()) <=
+                  WordCount(set_bits_in_mask));
+
+  // Resize the BitVector to equal to the number of elements in the  mask we
+  // calculated at the start of the loop.
+  Resize(set_bits_in_mask);
+
+  // Fix up the counts to match the new values. The Resize above should ensure
+  // that a) the counts vector is correctly sized, b) the bits after
+  // |set_bits_in_mask| are cleared (allowing this count algortihm to be
+  // accurate).
+  UpdateCounts(words_, counts_);
+}
+
 BitVector BitVector::FromSortedIndexVector(
     const std::vector<int64_t>& indices) {
   // The rest of the algorithm depends on |indices| being non empty.
   if (indices.empty()) {
-    return BitVector();
+    return {};
   }
 
-  // We are creating the smallest BitVector that can have all of the values from
-  // |indices| set. As we assume that |indices| is sorted, the size would be the
-  // last element + 1 and the last bit of the final BitVector will be set.
-  uint32_t size = static_cast<uint32_t>(indices.back() + 1);
+  // We are creating the smallest BitVector that can have all of the values
+  // from |indices| set. As we assume that |indices| is sorted, the size would
+  // be the last element + 1 and the last bit of the final BitVector will be
+  // set.
+  auto size = static_cast<uint32_t>(indices.back() + 1);
 
   uint32_t block_count = BlockCount(size);
   std::vector<uint64_t> words(block_count * Block::kWords);
@@ -318,14 +431,8 @@
   }
 
   std::vector<uint32_t> counts(block_count);
-  for (uint32_t i = 1; i < counts.size(); ++i) {
-    // The number of set bits in each block is the number of set bits before and
-    // in the previous block.
-    counts[i] = counts[i - 1] +
-                ConstBlock(&words[Block::kWords * (i - 1)]).CountSetBits();
-  }
-
-  return BitVector(words, counts, size);
+  UpdateCounts(words, counts);
+  return {words, counts, size};
 }
 
 BitVector BitVector::IntersectRange(uint32_t range_start,
@@ -335,7 +442,7 @@
   uint32_t end_idx = std::min(range_end, size());
 
   if (range_start >= end_idx)
-    return BitVector();
+    return {};
 
   Builder builder(end_idx, range_start);
   uint32_t front_bits = builder.BitsUntilWordBoundaryOrFull();
@@ -361,6 +468,21 @@
   return std::move(builder).Build();
 }
 
+std::vector<uint32_t> BitVector::GetSetBitIndices() const {
+  uint32_t set_bits = CountSetBits();
+  if (set_bits == 0) {
+    return {};
+  }
+  std::vector<uint32_t> res;
+  res.reserve(set_bits);
+  for (uint32_t i = 0; i < size_; i += BitWord::kBits) {
+    for (uint64_t word = words_[i / BitWord::kBits]; word; word &= word - 1) {
+      res.push_back(i + Tzcnt(word));
+    }
+  }
+  return res;
+}
+
 void BitVector::Serialize(
     protos::pbzero::SerializedColumn::BitVector* msg) const {
   msg->set_size(size_);
@@ -394,5 +516,4 @@
   }
 }
 
-}  // namespace trace_processor
-}  // namespace perfetto
+}  // namespace perfetto::trace_processor
diff --git a/src/trace_processor/containers/bit_vector.h b/src/trace_processor/containers/bit_vector.h
index c7d67c5..bcbf1ce 100644
--- a/src/trace_processor/containers/bit_vector.h
+++ b/src/trace_processor/containers/bit_vector.h
@@ -17,33 +17,27 @@
 #ifndef SRC_TRACE_PROCESSOR_CONTAINERS_BIT_VECTOR_H_
 #define SRC_TRACE_PROCESSOR_CONTAINERS_BIT_VECTOR_H_
 
-#include <stddef.h>
-#include <stdint.h>
-#include <stdio.h>
-
 #include <algorithm>
-#include <array>
-#include <cstring>
-#include <optional>
+#include <cstdint>
+#include <initializer_list>
+#include <iterator>
+#include <utility>
 #include <vector>
 
+#include "perfetto/base/compiler.h"
 #include "perfetto/base/logging.h"
+#include "perfetto/public/compiler.h"
 
 namespace perfetto {
-
-namespace protos {
-namespace pbzero {
+namespace protos::pbzero {
 class SerializedColumn_BitVector;
 class SerializedColumn_BitVector_Decoder;
-}  // namespace pbzero
-}  // namespace protos
+}  // namespace protos::pbzero
 
 namespace trace_processor {
-
 namespace internal {
 
 class BaseIterator;
-class AllBitsIterator;
 class SetBitsIterator;
 
 }  // namespace internal
@@ -52,9 +46,6 @@
 // for each bool.
 class BitVector {
  public:
-  using AllBitsIterator = internal::AllBitsIterator;
-  using SetBitsIterator = internal::SetBitsIterator;
-
   static constexpr uint32_t kBitsInWord = 64;
 
   // Builder class which allows efficiently creating a BitVector by appending
@@ -95,7 +86,7 @@
     // Creates a BitVector from this Builder.
     BitVector Build() && {
       if (size_ == 0)
-        return BitVector();
+        return {};
 
       std::vector<uint32_t> counts(BlockCount(size_));
       PERFETTO_CHECK(skipped_blocks_ <= counts.size());
@@ -103,13 +94,13 @@
         counts[i] = counts[i - 1] +
                     ConstBlock(&words_[Block::kWords * (i - 1)]).CountSetBits();
       }
-      return BitVector(std::move(words_), std::move(counts), size_);
+      return {std::move(words_), std::move(counts), size_};
     }
 
     // Returns the number of bits which are in complete words which can be
     // appended to this builder before having to fallback to |Append| due to
     // being close to the end.
-    uint32_t BitsInCompleteWordsUntilFull() {
+    uint32_t BitsInCompleteWordsUntilFull() const {
       uint32_t next_word = WordCount(global_bit_offset_);
       uint32_t end_word = WordFloor(size_);
       uint32_t complete_words = next_word < end_word ? end_word - next_word : 0;
@@ -120,7 +111,7 @@
     // hitting a word boundary (and thus able to use |AppendWord|) or until the
     // BitVector is full (i.e. no more Appends should happen), whichever would
     // happen first.
-    uint32_t BitsUntilWordBoundaryOrFull() {
+    uint32_t BitsUntilWordBoundaryOrFull() const {
       if (global_bit_offset_ == 0 && size_ < BitWord::kBits) {
         return size_;
       }
@@ -132,7 +123,7 @@
     // Returns the number of bits which should be appended using |Append| before
     // hitting a word boundary (and thus able to use |AppendWord|) or until the
     // BitVector is full (i.e. no more Appends should happen).
-    uint32_t BitsUntilFull() { return size_ - global_bit_offset_; }
+    uint32_t BitsUntilFull() const { return size_ - global_bit_offset_; }
 
    private:
     std::vector<uint64_t> words_;
@@ -144,11 +135,14 @@
   // Creates an empty BitVector.
   BitVector();
 
-  explicit BitVector(std::initializer_list<bool> init);
+  BitVector(std::initializer_list<bool> init);
 
   // Creates a BitVector of |count| size filled with |value|.
   explicit BitVector(uint32_t count, bool value = false);
 
+  BitVector(const BitVector&) = delete;
+  BitVector& operator=(const BitVector&) = delete;
+
   // Enable moving BitVectors as they have no unmovable state.
   BitVector(BitVector&&) noexcept = default;
   BitVector& operator=(BitVector&&) = default;
@@ -238,7 +232,7 @@
     if (PERFETTO_LIKELY(!old_value)) {
       BlockFromIndex(addr.block_idx).Set(addr.block_offset);
 
-      uint32_t size = static_cast<uint32_t>(counts_.size());
+      auto size = static_cast<uint32_t>(counts_.size());
       for (uint32_t i = addr.block_idx + 1; i < size; ++i) {
         counts_[i]++;
       }
@@ -259,7 +253,7 @@
     if (PERFETTO_LIKELY(old_value)) {
       BlockFromIndex(addr.block_idx).Clear(addr.block_offset);
 
-      uint32_t size = static_cast<uint32_t>(counts_.size());
+      auto size = static_cast<uint32_t>(counts_.size());
       for (uint32_t i = addr.block_idx + 1; i < size; ++i) {
         counts_[i]--;
       }
@@ -302,7 +296,9 @@
   // As an example, suppose RangeForTesting(3, 7, [](x) { return x < 5 }). This
   // would result in the following BitVector: [0 0 0 1 1 0 0]
   template <typename Filler = bool(uint32_t)>
-  static BitVector RangeForTesting(uint32_t start, uint32_t end, Filler f) {
+  PERFETTO_WARN_UNUSED_RESULT static BitVector RangeForTesting(uint32_t start,
+                                                               uint32_t end,
+                                                               Filler f) {
     // Compute the block index and BitVector index where we start and end
     // working one block at a time.
     uint32_t start_fast_block = BlockCount(start);
@@ -315,8 +311,6 @@
       for (uint32_t i = start; i < end; ++i) {
         bv.Append(f(i));
       }
-      bv.counts_.emplace_back(bv.CountSetBits());
-      bv.size_ = end;
       return bv;
     }
 
@@ -356,11 +350,13 @@
   // - be sorted
   // - have first element >= 0
   // - last value smaller than numeric limit of uint32_t.
-  static BitVector FromSortedIndexVector(const std::vector<int64_t>&);
+  PERFETTO_WARN_UNUSED_RESULT static BitVector FromSortedIndexVector(
+      const std::vector<int64_t>&);
 
   // Creates a BitVector of size `min(range_end, size())` with bits between
   // |start| and |end| filled with corresponding bits from |this| BitVector.
-  BitVector IntersectRange(uint32_t range_start, uint32_t range_end) const;
+  PERFETTO_WARN_UNUSED_RESULT BitVector
+  IntersectRange(uint32_t range_start, uint32_t range_end) const;
 
   // Requests the removal of unused capacity.
   // Matches the semantics of std::vector::shrink_to_fit.
@@ -381,24 +377,19 @@
   // other: 0 1 1 0
   // This will change this to the following:
   // this:  0 1 0 0 1 0 0
-  // TODO(lalitm): investigate whether we should just change this to And.
   void UpdateSetBits(const BitVector& other);
 
-  // Iterate all the bits in the BitVector.
+  // For each set bit position  in |other|, Selects the value of each bit in
+  // |this| and stores them contiguously in |this|.
   //
-  // Usage:
-  // for (auto it = bv.IterateAllBits(); it; it.Next()) {
-  //   ...
-  // }
-  AllBitsIterator IterateAllBits() const;
-
-  // Iterate all the set bits in the BitVector.
+  // Precondition: |this.size()| <= |other.size()|.
   //
-  // Usage:
-  // for (auto it = bv.IterateSetBits(); it; it.Next()) {
-  //   ...
-  // }
-  SetBitsIterator IterateSetBits() const;
+  // For example suppose the following:
+  // this:  1 1 0 0 1 0 1
+  // other: 0 1 0 1 0 1 0 0 1 0
+  // |this| will change this to the following:
+  // this:  1 0 0
+  void SelectBits(const BitVector& other);
 
   // Returns the approximate cost (in bytes) of storing a BitVector with size
   // |n|. This can be used to make decisions about whether using a BitVector is
@@ -411,6 +402,10 @@
     return BlockCount(n) * Block::kBits + BlockCount(n) * sizeof(uint32_t);
   }
 
+  // Returns a vector<uint32_t> containing the indices of all the set bits
+  // in the BitVector.
+  std::vector<uint32_t> GetSetBitIndices() const;
+
   // Serialize internals of BitVector to proto.
   void Serialize(protos::pbzero::SerializedColumn_BitVector* msg) const;
 
@@ -419,8 +414,8 @@
       const protos::pbzero::SerializedColumn_BitVector_Decoder& bv_msg);
 
  private:
+  using SetBitsIterator = internal::SetBitsIterator;
   friend class internal::BaseIterator;
-  friend class internal::AllBitsIterator;
   friend class internal::SetBitsIterator;
 
   // Represents the offset of a bit within a block.
@@ -636,9 +631,6 @@
   // On x86 architectures we generally target for trace processor, the
   // size of a cache line is 64 bytes (or 512 bits). For this reason,
   // we make the size of the block contain 8 atoms as 8 * 64 == 512.
-  //
-  // TODO(lalitm): investigate whether we should tune this value for
-  // WASM and ARM.
   class Block {
    public:
     // See class documentation for how these constants are chosen.
@@ -812,9 +804,6 @@
             std::vector<uint32_t> counts,
             uint32_t size);
 
-  BitVector(const BitVector&) = delete;
-  BitVector& operator=(const BitVector&) = delete;
-
   // Returns the number of 8 elements blocks in the BitVector.
   uint32_t BlockCount() {
     return static_cast<uint32_t>(words_.size()) / Block::kWords;
@@ -876,6 +865,14 @@
     }
   }
 
+  // Iterate all the set bits in the BitVector.
+  //
+  // Usage:
+  // for (auto it = bv.IterateSetBits(); it; it.Next()) {
+  //   ...
+  // }
+  SetBitsIterator IterateSetBits() const;
+
   // Returns the index of the word which would store |idx|.
   static constexpr uint32_t WordFloor(uint32_t idx) {
     return idx / BitWord::kBits;
@@ -925,6 +922,16 @@
     return block_idx * Block::kBits;
   }
 
+  // Updates the counts in |counts| by counting the set bits in |words|.
+  static void UpdateCounts(const std::vector<uint64_t>& words,
+                           std::vector<uint32_t>& counts) {
+    PERFETTO_CHECK(words.size() == counts.size() * Block::kWords);
+    for (uint32_t i = 1; i < counts.size(); ++i) {
+      counts[i] = counts[i - 1] +
+                  ConstBlock(&words[Block::kWords * (i - 1)]).CountSetBits();
+    }
+  }
+
   uint32_t size_ = 0;
   // See class documentation for how these constants are chosen.
   static constexpr uint16_t kWordsInBlock = Block::kWords;
diff --git a/src/trace_processor/containers/bit_vector_benchmark.cc b/src/trace_processor/containers/bit_vector_benchmark.cc
index 55d3307..de4e85d 100644
--- a/src/trace_processor/containers/bit_vector_benchmark.cc
+++ b/src/trace_processor/containers/bit_vector_benchmark.cc
@@ -12,13 +12,15 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
+#include <cstdint>
 #include <limits>
 #include <random>
+#include <vector>
 
 #include <benchmark/benchmark.h>
 
+#include "perfetto/base/logging.h"
 #include "src/trace_processor/containers/bit_vector.h"
-#include "src/trace_processor/containers/bit_vector_iterators.h"
 
 namespace {
 
@@ -48,7 +50,7 @@
   }
 }
 
-void UpdateSetBitsArgs(benchmark::internal::Benchmark* b) {
+void UpdateSetBitsSelectBitsArgs(benchmark::internal::Benchmark* b) {
   if (IsBenchmarkFunctionalOnly()) {
     b->Args({64, 50, 50});
   } else {
@@ -199,6 +201,29 @@
 }
 BENCHMARK(BM_BitVectorCountSetBits)->Apply(BitVectorArgs);
 
+static void BM_BitVectorGetSetBitIndices(benchmark::State& state) {
+  static constexpr uint32_t kRandomSeed = 42;
+  std::minstd_rand0 rnd_engine(kRandomSeed);
+
+  auto size = static_cast<uint32_t>(state.range(0));
+  auto set_percentage = static_cast<uint32_t>(state.range(1));
+
+  BitVector bv;
+  for (uint32_t i = 0; i < size; ++i) {
+    bool value = rnd_engine() % 100 < set_percentage;
+    if (value) {
+      bv.AppendTrue();
+    } else {
+      bv.AppendFalse();
+    }
+  }
+
+  for (auto _ : state) {
+    benchmark::DoNotOptimize(bv.GetSetBitIndices());
+  }
+}
+BENCHMARK(BM_BitVectorGetSetBitIndices)->Apply(BitVectorArgs);
+
 static void BM_BitVectorResize(benchmark::State& state) {
   static constexpr uint32_t kRandomSeed = 42;
   std::minstd_rand0 rnd_engine(kRandomSeed);
@@ -265,20 +290,50 @@
       picker_set_bit_count, benchmark::Counter::kIsIterationInvariantRate |
                                 benchmark::Counter::kInvert);
 }
-BENCHMARK(BM_BitVectorUpdateSetBits)->Apply(UpdateSetBitsArgs);
+BENCHMARK(BM_BitVectorUpdateSetBits)->Apply(UpdateSetBitsSelectBitsArgs);
 
-static void BM_BitVectorSetBitsIterator(benchmark::State& state) {
-  uint32_t size = static_cast<uint32_t>(state.range(0));
-  uint32_t set_percentage = static_cast<uint32_t>(state.range(1));
+static void BM_BitVectorSelectBits(benchmark::State& state) {
+  static constexpr uint32_t kRandomSeed = 42;
+  std::minstd_rand0 rnd_engine(kRandomSeed);
 
-  BitVector bv = BvWithSizeAndSetPercentage(size, set_percentage);
-  for (auto _ : state) {
-    for (auto it = bv.IterateSetBits(); it; it.Next()) {
-      benchmark::DoNotOptimize(it.index());
+  auto size = static_cast<uint32_t>(state.range(0));
+  auto set_percentage = static_cast<uint32_t>(state.range(1));
+  auto mask_set_percentage = static_cast<uint32_t>(state.range(2));
+
+  BitVector bv;
+  BitVector mask;
+  for (uint32_t i = 0; i < size; ++i) {
+    bool value = rnd_engine() % 100 < set_percentage;
+    if (value) {
+      bv.AppendTrue();
+    } else {
+      bv.AppendFalse();
+    }
+    bool mask_value = rnd_engine() % 100 < mask_set_percentage;
+    if (mask_value) {
+      mask.AppendTrue();
+    } else {
+      mask.AppendFalse();
     }
   }
+
+  uint32_t set_bit_count = bv.CountSetBits();
+  uint32_t mask_set_bit_count = mask.CountSetBits();
+
+  for (auto _ : state) {
+    BitVector copy = bv.Copy();
+    copy.SelectBits(mask);
+    benchmark::DoNotOptimize(copy);
+  }
+
+  state.counters["s/set bit"] = benchmark::Counter(
+      set_bit_count, benchmark::Counter::kIsIterationInvariantRate |
+                         benchmark::Counter::kInvert);
+  state.counters["s/mask bit"] = benchmark::Counter(
+      mask_set_bit_count, benchmark::Counter::kIsIterationInvariantRate |
+                              benchmark::Counter::kInvert);
 }
-BENCHMARK(BM_BitVectorSetBitsIterator)->Apply(BitVectorArgs);
+BENCHMARK(BM_BitVectorSelectBits)->Apply(UpdateSetBitsSelectBitsArgs);
 
 static void BM_BitVectorFromIndexVector(benchmark::State& state) {
   std::vector<int64_t> indices;
diff --git a/src/trace_processor/containers/bit_vector_iterators.cc b/src/trace_processor/containers/bit_vector_iterators.cc
deleted file mode 100644
index 076a129..0000000
--- a/src/trace_processor/containers/bit_vector_iterators.cc
+++ /dev/null
@@ -1,126 +0,0 @@
-/*
- * Copyright (C) 2019 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "src/trace_processor/containers/bit_vector_iterators.h"
-
-namespace perfetto {
-namespace trace_processor {
-namespace internal {
-
-BaseIterator::BaseIterator(BitVector* bv)
-    : size_(bv->size()), bv_(bv), block_(bv_->words_.data()) {}
-
-BaseIterator::~BaseIterator() {
-  if (size_ > 0) {
-    uint32_t block_idx = bv_->IndexToAddress(index_).block_idx;
-    uint32_t last_block_idx = bv_->BlockCount() - 1;
-
-    // If |index_| == |size_| and the last index was on a block boundary, we
-    // can end up one block past the end of the bitvector. Take the
-    // min of the block index and the last block
-    OnBlockChange(std::min(block_idx, last_block_idx), last_block_idx);
-  }
-}
-
-void BaseIterator::OnBlockChange(uint32_t old_block_idx,
-                                 uint32_t new_block_idx) {
-  if (set_bit_count_diff_ != 0) {
-    // If the count of set bits has changed, go through all the counts between
-    // the old and new blocks and modify them.
-    // We only need to go to new_block and not to the end of the bitvector as
-    // the blocks after new_block will either be updated in a future call to
-    // OnBlockChange or in the destructor.
-    for (uint32_t i = old_block_idx + 1; i <= new_block_idx; ++i) {
-      int32_t new_count =
-          static_cast<int32_t>(bv_->counts_[i]) + set_bit_count_diff_;
-      PERFETTO_DCHECK(new_count >= 0);
-
-      bv_->counts_[i] = static_cast<uint32_t>(new_count);
-    }
-  }
-
-  // Reset the changed flag and cache the new block.
-  is_block_changed_ = false;
-  block_ = bv_->BlockFromIndex(new_block_idx);
-}
-
-AllBitsIterator::AllBitsIterator(const BitVector* bv)
-    : BaseIterator(const_cast<BitVector*>(bv)) {}
-
-SetBitsIterator::SetBitsIterator(const BitVector* bv)
-    : BaseIterator(const_cast<BitVector*>(bv)) {
-  set_bit_count_ = bv->CountSetBits();
-
-  if (set_bit_count_ > 0) {
-    // Read a batch of set bit indices starting at index 0.
-    ReadSetBitBatch(0);
-
-    // Fast forward the iterator to the first index in the freshly read
-    // batch of set bots.
-    SetIndex(batch_[0]);
-  }
-}
-
-void SetBitsIterator::ReadSetBitBatch(uint32_t start_idx) {
-  PERFETTO_DCHECK(set_bit_index_ % kBatchSize == 0);
-
-  uint32_t set_bit_count_until_i = set_bit_index_;
-  for (uint32_t i = start_idx; i < size(); ++i) {
-    auto addr = BitVector::IndexToAddress(i);
-
-    // Compute the count to the end of the block noting that the last block
-    // needs to use |set_bit_count_| and not the next count in the vector
-    // because that is OOB.
-    uint32_t set_bits_to_end_of_block =
-        addr.block_idx == bv().counts_.size() - 1
-            ? set_bit_count_
-            : bv().counts_[addr.block_idx + 1];
-
-    // Optimization: If the count of set bits to the end of the block is the
-    // same as the count to the current index, we can just skip the whole
-    // block without iterating through the bits inside.
-    if (set_bits_to_end_of_block == set_bit_count_until_i) {
-      static constexpr BitVector::BlockOffset kLastBlockOffset = {
-          BitVector::Block::kWords - 1, BitVector::BitWord::kBits - 1};
-
-      i = BitVector::AddressToIndex({addr.block_idx, kLastBlockOffset});
-      continue;
-    }
-
-    // If the bit is not set, just bail out.
-    const BitVector::ConstBlock& block =
-        bv().ConstBlockFromIndex(addr.block_idx);
-    if (!block.IsSet(addr.block_offset))
-      continue;
-
-    // Update |batch_| with the index of the current bit.
-    uint32_t batch_idx = set_bit_count_until_i++ % kBatchSize;
-    batch_[batch_idx] = i;
-
-    // If we've reached as many indicies as the batch can store, just
-    // return.
-    if (PERFETTO_UNLIKELY(batch_idx == kBatchSize - 1))
-      return;
-  }
-
-  // We should only get here when we've managed to read all the set bits.
-  // End of batch should return from the body of the loop.
-  PERFETTO_DCHECK(set_bit_count_until_i == set_bit_count_);
-}
-
-}  // namespace internal
-}  // namespace trace_processor
-}  // namespace perfetto
diff --git a/src/trace_processor/containers/bit_vector_iterators.h b/src/trace_processor/containers/bit_vector_iterators.h
deleted file mode 100644
index e4ee878..0000000
--- a/src/trace_processor/containers/bit_vector_iterators.h
+++ /dev/null
@@ -1,198 +0,0 @@
-/*
- * Copyright (C) 2019 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef SRC_TRACE_PROCESSOR_CONTAINERS_BIT_VECTOR_ITERATORS_H_
-#define SRC_TRACE_PROCESSOR_CONTAINERS_BIT_VECTOR_ITERATORS_H_
-
-#include "src/trace_processor/containers/bit_vector.h"
-
-namespace perfetto {
-namespace trace_processor {
-namespace internal {
-
-// Base iterator class for all iterators on BitVector.
-//
-// This class implements caching of one Block at a time to reduce pointer
-// chasing. It also defers updating counts on Clear calls until the end of each
-// block.
-class BaseIterator {
- public:
-  BaseIterator(BitVector* bv);
-  ~BaseIterator();
-
-  BaseIterator(BaseIterator&&) noexcept = default;
-  BaseIterator& operator=(BaseIterator&&) = default;
-
-  // Sets the current bit the iterator points to.
-  void Set() {
-    if (!IsSet()) {
-      block_.Set(block_offset());
-
-      is_block_changed_ = true;
-      ++set_bit_count_diff_;
-    }
-  }
-
-  // Clears the current bit the iterator points to.
-  void Clear() {
-    if (IsSet()) {
-      block_.Clear(block_offset());
-
-      is_block_changed_ = true;
-      --set_bit_count_diff_;
-    }
-  }
-
-  // Returns whether the current bit the iterator points to is set.
-  bool IsSet() { return BitVector::ConstBlock(block_).IsSet(block_offset()); }
-
-  // Returns the index of the current bit the iterator points to.
-  uint32_t index() const { return index_; }
-
- protected:
-  // Sets the index this iterator points to the given value.
-  //
-  // This method also performs some extra work on block boundaries
-  // as it caches the block to improve performance by reducing pointer
-  // chasing on every IsSet and Clear calls.
-  void SetIndex(uint32_t index) {
-    // We should always move the index forward.
-    PERFETTO_DCHECK(index >= index_);
-
-    uint32_t old_index = index_;
-    index_ = index;
-
-    // If we've reached the end of the iterator, just bail out.
-    if (index >= size_)
-      return;
-
-    uint32_t old_block = bv_->IndexToAddress(old_index).block_idx;
-    uint32_t new_block = bv_->IndexToAddress(index).block_idx;
-
-    // Fast path: we're in the same block so we don't need to do
-    // any other work.
-    if (PERFETTO_LIKELY(old_block == new_block))
-      return;
-
-    // Slow path: we have to change block so this will involve flushing the old
-    // block and counts (if necessary).
-    OnBlockChange(old_block, new_block);
-  }
-
-  // Handles flushing count changes and caches a new block.
-  void OnBlockChange(uint32_t old_block, uint32_t new_block);
-
-  uint32_t size() const { return size_; }
-
-  const BitVector& bv() const { return *bv_; }
-
- private:
-  BaseIterator(const BaseIterator&) = delete;
-  BaseIterator& operator=(const BaseIterator&) = delete;
-
-  BitVector::BlockOffset block_offset() const {
-    uint16_t bit_idx_inside_block = index_ % BitVector::Block::kBits;
-
-    BitVector::BlockOffset bo;
-    bo.word_idx = bit_idx_inside_block / BitVector::BitWord::kBits;
-    bo.bit_idx = bit_idx_inside_block % BitVector::BitWord::kBits;
-    return bo;
-  }
-
-  uint32_t index_ = 0;
-  uint32_t size_ = 0;
-
-  bool is_block_changed_ = false;
-  int32_t set_bit_count_diff_ = 0;
-
-  BitVector* bv_;
-  BitVector::Block block_{bv_->words_.data()};
-};
-
-// Iterator over all the bits in a bitvector.
-class AllBitsIterator : public BaseIterator {
- public:
-  AllBitsIterator(const BitVector*);
-
-  // Increments the iterator to point to the next bit.
-  void Next() { SetIndex(index() + 1); }
-
-  // Increments the iterator to skip the next |n| bits and point to the
-  // following one.
-  // Precondition: n >= 1 & index() + n <= size().
-  void Skip(uint32_t n) {
-    PERFETTO_DCHECK(n >= 1);
-    PERFETTO_DCHECK(index() + n <= size());
-
-    SetIndex(index() + n);
-  }
-
-  // Returns whether the iterator is valid.
-  operator bool() const { return index() < size(); }
-};
-
-// Iterator over all the set bits in a bitvector.
-//
-// This iterator works by first finding a batch of indices of set bits.
-// Then, the fast path involves simply incrementing a counter to go to
-// the next index in this batch. On every batch boundary, we hit the
-// slow path where we need to find another n set bits.
-class SetBitsIterator : public BaseIterator {
- public:
-  SetBitsIterator(const BitVector*);
-
-  // Increments the iterator to point to the next set bit.
-  void Next() {
-    // If we are out of bounds, just bail out.
-    if (PERFETTO_UNLIKELY(++set_bit_index_ >= set_bit_count_))
-      return;
-
-    if (PERFETTO_UNLIKELY(set_bit_index_ % kBatchSize == 0))
-      ReadSetBitBatch(batch_.back() + 1);
-
-    SetIndex(batch_[set_bit_index_ % kBatchSize]);
-  }
-
-  // Returns whether the iterator is valid.
-  operator bool() const { return set_bit_index_ < set_bit_count_; }
-
-  // Returns the index of the bit interms of set bits (i.e. how many times
-  // Next() has been called).
-  uint32_t ordinal() const { return set_bit_index_; }
-
- private:
-  static constexpr uint32_t kBatchSize = 1024;
-
-  // Reads a full batch of set bit indices from the bitvector and stores them
-  // in |batch_| below.
-  //
-  // This batch of indices is used on the fast path to quickly jump between
-  // set bits.
-  void ReadSetBitBatch(uint32_t start_idx);
-
-  uint32_t set_bit_index_ = 0;
-  uint32_t set_bit_count_ = 0;
-
-  // Contains an array of indexes; each index points to a set bit in the
-  // bitvector.
-  std::array<uint32_t, kBatchSize> batch_;
-};
-
-}  // namespace internal
-}  // namespace trace_processor
-}  // namespace perfetto
-
-#endif  // SRC_TRACE_PROCESSOR_CONTAINERS_BIT_VECTOR_ITERATORS_H_
diff --git a/src/trace_processor/containers/bit_vector_unittest.cc b/src/trace_processor/containers/bit_vector_unittest.cc
index ee7452f..6c1a01e 100644
--- a/src/trace_processor/containers/bit_vector_unittest.cc
+++ b/src/trace_processor/containers/bit_vector_unittest.cc
@@ -17,17 +17,21 @@
 #include "src/trace_processor/containers/bit_vector.h"
 
 #include <bitset>
-#include <limits>
+#include <cstdint>
 #include <random>
+#include <utility>
+#include <vector>
 
 #include "perfetto/protozero/scattered_heap_buffer.h"
-#include "protos/perfetto/trace_processor/serialization.pbzero.h"
-#include "src/trace_processor/containers/bit_vector_iterators.h"
 #include "test/gtest_and_gmock.h"
 
-namespace perfetto {
-namespace trace_processor {
+#include "protos/perfetto/trace_processor/serialization.pbzero.h"
+
+namespace perfetto::trace_processor {
 namespace {
+using testing::ElementsAre;
+using testing::IsEmpty;
+using testing::UnorderedElementsAre;
 
 TEST(BitVectorUnittest, CreateAllTrue) {
   BitVector bv(2049, true);
@@ -309,150 +313,101 @@
   }
 }
 
-TEST(BitVectorUnittest, IterateAllBitsConst) {
-  BitVector bv;
-  for (uint32_t i = 0; i < 12345; ++i) {
-    if (i % 7 == 0 || i % 13 == 0) {
-      bv.AppendTrue();
-    } else {
-      bv.AppendFalse();
-    }
-  }
+TEST(BitVectorUnittest, SelectBitsSimple) {
+  BitVector bv = {true, false, true, false, true, true, true};
+  BitVector mask = {true, false, true, true, false, false, true};
+  bv.SelectBits(mask);
 
-  uint32_t i = 0;
-  for (auto it = bv.IterateAllBits(); it; it.Next(), ++i) {
-    ASSERT_EQ(it.IsSet(), i % 7 == 0 || i % 13 == 0);
-    ASSERT_EQ(it.index(), i);
-  }
+  ASSERT_EQ(bv.size(), 4u);
+  ASSERT_EQ(bv.IsSet(0), true);
+  ASSERT_EQ(bv.IsSet(1), true);
+  ASSERT_EQ(bv.IsSet(2), false);
+  ASSERT_EQ(bv.IsSet(3), true);
+  ASSERT_EQ(bv.CountSetBits(), 3u);
 }
 
-TEST(BitVectorUnittest, IterateAllBitsSet) {
-  BitVector bv;
-  for (uint32_t i = 0; i < 12345; ++i) {
-    if (i % 7 == 0 || i % 13 == 0) {
-      bv.AppendTrue();
-    } else {
-      bv.AppendFalse();
-    }
-  }
+TEST(BitVectorUnittest, SelectBitsSmallerMain) {
+  BitVector bv = {true, false, true, false};
+  BitVector mask = {true, false, true, true, false, false, true};
+  bv.SelectBits(mask);
 
-  // Unset every 15th bit.
-  for (auto it = bv.IterateAllBits(); it; it.Next()) {
-    if (it.index() % 15 == 0) {
-      it.Set();
-    }
-  }
-
-  // Go through the iterator manually and check it has updated
-  // to not have every 15th bit set.
-  uint32_t count = 0;
-  for (uint32_t i = 0; i < 12345; ++i) {
-    bool is_set = i % 15 == 0 || i % 7 == 0 || i % 13 == 0;
-
-    ASSERT_EQ(bv.IsSet(i), is_set);
-    ASSERT_EQ(bv.CountSetBits(i), count);
-
-    if (is_set) {
-      ASSERT_EQ(bv.IndexOfNthSet(count++), i);
-    }
-  }
+  ASSERT_EQ(bv.size(), 3u);
+  ASSERT_EQ(bv.IsSet(0), true);
+  ASSERT_EQ(bv.IsSet(1), true);
+  ASSERT_EQ(bv.IsSet(2), false);
+  ASSERT_EQ(bv.CountSetBits(), 2u);
 }
 
-TEST(BitVectorUnittest, IterateAllBitsClear) {
-  BitVector bv;
-  for (uint32_t i = 0; i < 12345; ++i) {
-    if (i % 7 == 0 || i % 13 == 0) {
-      bv.AppendTrue();
-    } else {
-      bv.AppendFalse();
-    }
+TEST(BitVectorUnittest, SelectBitsLarge) {
+  BitVector bv = BitVector::RangeForTesting(
+      0, 813, [](uint32_t idx) { return idx % 7 == 0; });
+  BitVector mask = BitVector::RangeForTesting(
+      0, 813, [](uint32_t idx) { return idx % 3 == 0; });
+  bv.SelectBits(mask);
+
+  BitVector expected = BitVector::RangeForTesting(
+      0, 271u, [](uint32_t idx) { return (idx * 3) % 7 == 0; });
+
+  ASSERT_EQ(bv.size(), 271u);
+  for (uint32_t i = 0; i < expected.size(); ++i) {
+    ASSERT_EQ(expected.IsSet(i), bv.IsSet(i)) << "Index " << i;
+    ASSERT_EQ(expected.CountSetBits(i), bv.CountSetBits(i)) << "Index " << i;
   }
-
-  // Unset every 15th bit.
-  for (auto it = bv.IterateAllBits(); it; it.Next()) {
-    if (it.index() % 15 == 0) {
-      it.Clear();
-    }
-  }
-
-  // Go through the iterator manually and check it has updated
-  // to not have every 15th bit set.
-  uint32_t count = 0;
-  for (uint32_t i = 0; i < 12345; ++i) {
-    bool is_set = i % 15 != 0 && (i % 7 == 0 || i % 13 == 0);
-
-    ASSERT_EQ(bv.IsSet(i), is_set);
-    ASSERT_EQ(bv.CountSetBits(i), count);
-
-    if (is_set) {
-      ASSERT_EQ(bv.IndexOfNthSet(count++), i);
-    }
-  }
+  ASSERT_EQ(expected.CountSetBits(), bv.CountSetBits());
 }
 
-TEST(BitVectorUnittest, IterateSetBitsConst) {
-  BitVector bv;
-  std::vector<uint32_t> set_indices;
-  for (uint32_t i = 0; i < 12345; ++i) {
-    if (i % 7 == 0 || i % 13 == 0) {
-      bv.AppendTrue();
-      set_indices.emplace_back(i);
-    } else {
-      bv.AppendFalse();
-    }
-  }
+TEST(BitVectorUnittest, SelectBitsLargeSmallerMain) {
+  BitVector bv = BitVector::RangeForTesting(
+      0, 279, [](uint32_t idx) { return idx % 7 == 0; });
+  BitVector mask = BitVector::RangeForTesting(
+      0, 813, [](uint32_t idx) { return idx % 3 == 0; });
+  bv.SelectBits(mask);
 
-  uint32_t i = 0;
-  for (auto it = bv.IterateSetBits(); it; it.Next(), ++i) {
-    ASSERT_EQ(it.IsSet(), true);
-    ASSERT_EQ(it.index(), set_indices[i]);
+  BitVector expected = BitVector::RangeForTesting(
+      0, 93, [](uint32_t idx) { return (idx * 3) % 7 == 0; });
+
+  ASSERT_EQ(bv.size(), 93u);
+  for (uint32_t i = 0; i < expected.size(); ++i) {
+    ASSERT_EQ(expected.IsSet(i), bv.IsSet(i)) << "Index " << i;
+    ASSERT_EQ(expected.CountSetBits(i), bv.CountSetBits(i)) << "Index " << i;
   }
-  ASSERT_EQ(i, set_indices.size());
+  ASSERT_EQ(expected.CountSetBits(), bv.CountSetBits());
 }
 
-TEST(BitVectorUnittest, IterateSetBitsClear) {
-  BitVector bv;
-  for (uint32_t i = 0; i < 12345; ++i) {
-    if (i % 7 == 0 || i % 13 == 0) {
-      bv.AppendTrue();
-    } else {
-      bv.AppendFalse();
-    }
+TEST(BitVectorUnittest, SelectBitsDense) {
+  BitVector bv =
+      BitVector::RangeForTesting(0, 279, [](uint32_t) { return true; });
+  BitVector mask =
+      BitVector::RangeForTesting(0, 279, [](uint32_t idx) { return idx < 80; });
+  bv.SelectBits(mask);
+
+  BitVector expected =
+      BitVector::RangeForTesting(0, 80, [](uint32_t) { return true; });
+
+  ASSERT_EQ(bv.size(), 80u);
+  for (uint32_t i = 0; i < expected.size(); ++i) {
+    ASSERT_EQ(expected.IsSet(i), bv.IsSet(i)) << "Index " << i;
+    ASSERT_EQ(expected.CountSetBits(i), bv.CountSetBits(i)) << "Index " << i;
   }
-
-  for (auto it = bv.IterateSetBits(); it; it.Next()) {
-    if (it.index() % 15 == 0) {
-      it.Clear();
-    }
-  }
-
-  // Go through the iterator manually and check it has updated
-  // to not have every 15th bit set.
-  uint32_t count = 0;
-  for (uint32_t i = 0; i < 12345; ++i) {
-    bool is_set = i % 15 != 0 && (i % 7 == 0 || i % 13 == 0);
-
-    ASSERT_EQ(bv.IsSet(i), is_set);
-    ASSERT_EQ(bv.CountSetBits(i), count);
-
-    if (is_set) {
-      ASSERT_EQ(bv.IndexOfNthSet(count++), i);
-    }
-  }
+  ASSERT_EQ(expected.CountSetBits(), bv.CountSetBits());
 }
 
-TEST(BitVectorUnittest, IterateSetBitsStartsCorrectly) {
-  BitVector bv;
-  bv.AppendFalse();
-  bv.AppendTrue();
+TEST(BitVectorUnittest, SelectBitsEnd) {
+  BitVector bv = BitVector::RangeForTesting(
+      0, 279, [](uint32_t idx) { return idx % 7 == 0; });
+  BitVector mask = BitVector::RangeForTesting(
+      0, 813, [](uint32_t idx) { return idx % 3 == 0; });
+  bv.SelectBits(mask);
 
-  auto it = bv.IterateSetBits();
-  ASSERT_TRUE(it);
-  ASSERT_EQ(it.index(), 1u);
-  ASSERT_TRUE(it.IsSet());
+  BitVector expected = BitVector::RangeForTesting(
+      0, 93, [](uint32_t idx) { return (idx * 3) % 7 == 0; });
 
-  it.Next();
-  ASSERT_FALSE(it);
+  ASSERT_EQ(bv.size(), 93u);
+  for (uint32_t i = 0; i < expected.size(); ++i) {
+    ASSERT_EQ(expected.IsSet(i), bv.IsSet(i)) << "Index " << i;
+    ASSERT_EQ(expected.CountSetBits(i), bv.CountSetBits(i)) << "Index " << i;
+  }
+  ASSERT_EQ(expected.CountSetBits(), bv.CountSetBits());
 }
 
 TEST(BitVectorUnittest, IntersectRange) {
@@ -515,6 +470,18 @@
   ASSERT_EQ(intersected.CountSetBits(), 217u);
 }
 
+TEST(BitVectorUnittest, IntersectRangeAppendFalse) {
+  BitVector bv(70u, true);
+  BitVector out = bv.IntersectRange(10, 12u);
+  out.Resize(70u);
+
+  ASSERT_TRUE(out.IsSet(10u));
+  ASSERT_TRUE(out.IsSet(11u));
+  ASSERT_FALSE(out.IsSet(12u));
+  ASSERT_FALSE(out.IsSet(60u));
+  ASSERT_FALSE(out.IsSet(69u));
+}
+
 TEST(BitVectorUnittest, Range) {
   BitVector bv =
       BitVector::RangeForTesting(1, 9, [](uint32_t t) { return t % 3 == 0; });
@@ -688,6 +655,8 @@
 
   EXPECT_FALSE(bv.IsSet(2));
   EXPECT_EQ(bv.CountSetBits(), 9u);
+  EXPECT_THAT(bv.GetSetBitIndices(),
+              UnorderedElementsAre(0u, 1u, 3u, 4u, 5u, 6u, 7u, 8u, 9u));
 }
 
 TEST(BitVectorUnittest, NotBig) {
@@ -698,6 +667,14 @@
   EXPECT_EQ(bv.CountSetBits(), 820u);
 }
 
+TEST(BitVectorUnittest, NotAppendAfter) {
+  BitVector bv(30);
+  bv.Not();
+  bv.AppendFalse();
+
+  ASSERT_FALSE(bv.IsSet(30));
+}
+
 TEST(BitVectorUnittest, Or) {
   BitVector bv{1, 1, 0, 0};
   BitVector bv_second{1, 0, 1, 0};
@@ -740,31 +717,23 @@
     if (res)
       int_vec.emplace_back(i);
   }
+}
 
-  auto all_it = bv.IterateAllBits();
-  for (uint32_t i = 0; i < kCount; ++i) {
-    uint32_t count = static_cast<uint32_t>(std::count(
-        bool_vec.begin(), bool_vec.begin() + static_cast<int32_t>(i), true));
-    ASSERT_EQ(bv.IsSet(i), bool_vec[i]);
-    ASSERT_EQ(bv.CountSetBits(i), count);
+TEST(BitVectorUnittest, GetSetBitIndices) {
+  BitVector bv = {true, false, true, false, true, true, false, false};
+  ASSERT_THAT(bv.GetSetBitIndices(), ElementsAre(0u, 2u, 4u, 5u));
+}
 
-    ASSERT_TRUE(all_it);
-    ASSERT_EQ(all_it.IsSet(), bool_vec[i]);
-    ASSERT_EQ(all_it.index(), i);
-    all_it.Next();
-  }
-  ASSERT_FALSE(all_it);
+TEST(BitVectorUnittest, GetSetBitIndicesIntersectRange) {
+  BitVector bv(130u, true);
+  BitVector out = bv.IntersectRange(10, 12);
+  ASSERT_THAT(out.GetSetBitIndices(), ElementsAre(10, 11));
+}
 
-  auto set_it = bv.IterateSetBits();
-  for (uint32_t i = 0; i < int_vec.size(); ++i) {
-    ASSERT_EQ(bv.IndexOfNthSet(i), int_vec[i]);
-
-    ASSERT_TRUE(set_it);
-    ASSERT_EQ(set_it.IsSet(), true);
-    ASSERT_EQ(set_it.index(), int_vec[i]);
-    set_it.Next();
-  }
-  ASSERT_FALSE(set_it);
+TEST(BitVectorUnittest, UpdateSetBitsGetSetBitIndices) {
+  BitVector bv(130u, true);
+  bv.UpdateSetBits(BitVector(60u));
+  ASSERT_THAT(bv.GetSetBitIndices(), IsEmpty());
 }
 
 TEST(BitVectorUnittest, SerializeSimple) {
@@ -800,5 +769,4 @@
 }
 
 }  // namespace
-}  // namespace trace_processor
-}  // namespace perfetto
+}  // namespace perfetto::trace_processor
diff --git a/src/trace_processor/containers/row_map.cc b/src/trace_processor/containers/row_map.cc
index f7925ae..3ae6ee7 100644
--- a/src/trace_processor/containers/row_map.cc
+++ b/src/trace_processor/containers/row_map.cc
@@ -15,8 +15,16 @@
  */
 
 #include "src/trace_processor/containers/row_map.h"
-#include <unordered_set>
 
+#include <algorithm>
+#include <cstdint>
+#include <unordered_set>
+#include <utility>
+#include <variant>
+#include <vector>
+
+#include "perfetto/base/logging.h"
+#include "src/trace_processor/containers/bit_vector.h"
 #include "src/trace_processor/containers/row_map_algorithms.h"
 
 namespace perfetto {
@@ -69,20 +77,16 @@
 
 RowMap Select(const BitVector& bv, Range selector) {
   PERFETTO_DCHECK(selector.end <= bv.CountSetBits());
-
+  if (selector.empty()) {
+    return {};
+  }
   // If we're simply selecting every element in the bitvector, just
   // return a copy of the BitVector without iterating.
-  BitVector ret = bv.Copy();
   if (selector.start == 0 && selector.end == bv.CountSetBits()) {
-    return RowMap(std::move(ret));
+    return RowMap(bv.Copy());
   }
-
-  for (auto it = ret.IterateSetBits(); it; it.Next()) {
-    auto set_idx = it.ordinal();
-    if (set_idx < selector.start || set_idx >= selector.end)
-      it.Clear();
-  }
-  return RowMap(std::move(ret));
+  return RowMap(bv.IntersectRange(bv.IndexOfNthSet(selector.start),
+                                  bv.IndexOfNthSet(selector.end - 1) + 1));
 }
 
 RowMap Select(const BitVector& bv, const BitVector& selector) {
@@ -230,26 +234,26 @@
 RowMap::RowMap(IndexVector vec) : data_(vec) {}
 
 RowMap RowMap::Copy() const {
-  if (auto* range = std::get_if<Range>(&data_)) {
+  if (const auto* range = std::get_if<Range>(&data_)) {
     return RowMap(*range);
   }
-  if (auto* bv = std::get_if<BitVector>(&data_)) {
+  if (const auto* bv = std::get_if<BitVector>(&data_)) {
     return RowMap(bv->Copy());
   }
-  if (auto* vec = std::get_if<IndexVector>(&data_)) {
+  if (const auto* vec = std::get_if<IndexVector>(&data_)) {
     return RowMap(*vec);
   }
   NoVariantMatched();
 }
 
 OutputIndex RowMap::Max() const {
-  if (auto* range = std::get_if<Range>(&data_)) {
+  if (const auto* range = std::get_if<Range>(&data_)) {
     return range->end;
   }
-  if (auto* bv = std::get_if<BitVector>(&data_)) {
+  if (const auto* bv = std::get_if<BitVector>(&data_)) {
     return bv->size();
   }
-  if (auto* vec = std::get_if<IndexVector>(&data_)) {
+  if (const auto* vec = std::get_if<IndexVector>(&data_)) {
     return vec->empty() ? 0 : *std::max_element(vec->begin(), vec->end()) + 1;
   }
   NoVariantMatched();
@@ -272,14 +276,15 @@
 }
 
 RowMap::Iterator::Iterator(const RowMap* rm) : rm_(rm) {
-  if (auto* range = std::get_if<Range>(&rm_->data_)) {
+  if (const auto* range = std::get_if<Range>(&rm_->data_)) {
     ordinal_ = range->start;
     return;
   }
-  if (auto* bv = std::get_if<BitVector>(&rm_->data_)) {
-    set_bits_it_.reset(new BitVector::SetBitsIterator(bv->IterateSetBits()));
+  if (const auto* bv = std::get_if<BitVector>(&rm_->data_)) {
+    results_ = bv->GetSetBitIndices();
     return;
   }
 }
+
 }  // namespace trace_processor
 }  // namespace perfetto
diff --git a/src/trace_processor/containers/row_map.h b/src/trace_processor/containers/row_map.h
index b8f1d3d..88fe7a1 100644
--- a/src/trace_processor/containers/row_map.h
+++ b/src/trace_processor/containers/row_map.h
@@ -17,17 +17,18 @@
 #ifndef SRC_TRACE_PROCESSOR_CONTAINERS_ROW_MAP_H_
 #define SRC_TRACE_PROCESSOR_CONTAINERS_ROW_MAP_H_
 
-#include <stdint.h>
-
-#include <memory>
+#include <algorithm>
+#include <cstdint>
+#include <iterator>
 #include <numeric>
 #include <optional>
+#include <utility>
 #include <variant>
 #include <vector>
 
+#include "perfetto/base/compiler.h"
 #include "perfetto/base/logging.h"
 #include "src/trace_processor/containers/bit_vector.h"
-#include "src/trace_processor/containers/bit_vector_iterators.h"
 
 namespace perfetto {
 namespace trace_processor {
@@ -82,16 +83,16 @@
 
   struct Range {
     Range(OutputIndex start_index, OutputIndex end_index)
-        : start(start_index), end(end_index) {}
+        : start(start_index), end(end_index) {
+      PERFETTO_DCHECK(start_index <= end_index);
+    }
     Range() : start(0), end(0) {}
 
-    OutputIndex start = 0;  // This is an inclusive index.
-    OutputIndex end = 0;    // This is an exclusive index.
+    OutputIndex start;  // This is an inclusive index.
+    OutputIndex end;    // This is an exclusive index.
 
-    uint32_t size() const {
-      PERFETTO_DCHECK(end >= start);
-      return end - start;
-    }
+    bool empty() const { return size() == 0; }
+    uint32_t size() const { return end - start; }
     inline bool Contains(uint32_t val) const {
       return val >= start && val < end;
     }
@@ -106,29 +107,24 @@
    public:
     explicit Iterator(const RowMap* rm);
 
+    Iterator(const Iterator&) = delete;
+    Iterator& operator=(const Iterator&) = delete;
+
     Iterator(Iterator&&) noexcept = default;
     Iterator& operator=(Iterator&&) = default;
 
     // Forwards the iterator to the next row of the RowMap.
-    void Next() {
-      if (std::get_if<Range>(&rm_->data_)) {
-        ++ordinal_;
-      } else if (std::get_if<BitVector>(&rm_->data_)) {
-        set_bits_it_->Next();
-      } else if (std::get_if<IndexVector>(&rm_->data_)) {
-        ++ordinal_;
-      }
-    }
+    void Next() { ++ordinal_; }
 
     // Returns if the iterator is still valid.
-    operator bool() const {
-      if (auto* range = std::get_if<Range>(&rm_->data_)) {
+    explicit operator bool() const {
+      if (const auto* range = std::get_if<Range>(&rm_->data_)) {
         return ordinal_ < range->end;
       }
       if (std::get_if<BitVector>(&rm_->data_)) {
-        return bool(*set_bits_it_);
+        return ordinal_ < results_.size();
       }
-      if (auto* vec = std::get_if<IndexVector>(&rm_->data_)) {
+      if (const auto* vec = std::get_if<IndexVector>(&rm_->data_)) {
         return ordinal_ < vec->size();
       }
       PERFETTO_FATAL("Didn't match any variant type.");
@@ -140,9 +136,9 @@
         return ordinal_;
       }
       if (std::get_if<BitVector>(&rm_->data_)) {
-        return set_bits_it_->index();
+        return results_[ordinal_];
       }
-      if (auto* vec = std::get_if<IndexVector>(&rm_->data_)) {
+      if (const auto* vec = std::get_if<IndexVector>(&rm_->data_)) {
         return (*vec)[ordinal_];
       }
       PERFETTO_FATAL("Didn't match any variant type.");
@@ -150,26 +146,21 @@
 
     // Returns the row of the index the iterator points to.
     InputRow row() const {
-      if (auto* range = std::get_if<Range>(&rm_->data_)) {
+      if (const auto* range = std::get_if<Range>(&rm_->data_)) {
         return ordinal_ - range->start;
       }
-      if (std::get_if<BitVector>(&rm_->data_)) {
-        return set_bits_it_->ordinal();
-      }
-      if (std::get_if<IndexVector>(&rm_->data_)) {
+      if (std::get_if<BitVector>(&rm_->data_) ||
+          std::get_if<IndexVector>(&rm_->data_)) {
         return ordinal_;
       }
       PERFETTO_FATAL("Didn't match any variant type.");
     }
 
    private:
-    Iterator(const Iterator&) = delete;
-    Iterator& operator=(const Iterator&) = delete;
-
     // Ordinal will not be used for BitVector based RowMap.
     uint32_t ordinal_ = 0;
-    // Not nullptr for BitVector based RowMap.
-    std::unique_ptr<BitVector::SetBitsIterator> set_bits_it_;
+    // Not empty for BitVector based RowMap.
+    std::vector<uint32_t> results_;
 
     const RowMap* rm_ = nullptr;
   };
@@ -209,13 +200,13 @@
   // Returns the size of the RowMap; that is the number of indices in the
   // RowMap.
   uint32_t size() const {
-    if (auto* range = std::get_if<Range>(&data_)) {
+    if (const auto* range = std::get_if<Range>(&data_)) {
       return range->size();
     }
-    if (auto* bv = std::get_if<BitVector>(&data_)) {
+    if (const auto* bv = std::get_if<BitVector>(&data_)) {
       return bv->CountSetBits();
     }
-    if (auto* vec = std::get_if<IndexVector>(&data_)) {
+    if (const auto* vec = std::get_if<IndexVector>(&data_)) {
       return static_cast<uint32_t>(vec->size());
     }
     NoVariantMatched();
@@ -226,13 +217,13 @@
 
   // Returns the index at the given |row|.
   OutputIndex Get(InputRow row) const {
-    if (auto* range = std::get_if<Range>(&data_)) {
+    if (const auto* range = std::get_if<Range>(&data_)) {
       return GetRange(*range, row);
     }
-    if (auto* bv = std::get_if<BitVector>(&data_)) {
+    if (const auto* bv = std::get_if<BitVector>(&data_)) {
       return GetBitVector(*bv, row);
     }
-    if (auto* vec = std::get_if<IndexVector>(&data_)) {
+    if (const auto* vec = std::get_if<IndexVector>(&data_)) {
       return GetIndexVector(*vec, row);
     }
     NoVariantMatched();
@@ -240,19 +231,15 @@
 
   // Returns the vector of all indices in the RowMap.
   std::vector<OutputIndex> GetAllIndices() const {
-    if (auto* range = std::get_if<Range>(&data_)) {
+    if (const auto* range = std::get_if<Range>(&data_)) {
       std::vector<uint32_t> res(range->size());
       std::iota(res.begin(), res.end(), range->start);
       return res;
     }
-    if (auto* bv = std::get_if<BitVector>(&data_)) {
-      std::vector<uint32_t> res;
-      for (auto it = bv->IterateSetBits(); it; it.Next()) {
-        res.push_back(it.index());
-      }
-      return res;
+    if (const auto* bv = std::get_if<BitVector>(&data_)) {
+      return bv->GetSetBitIndices();
     }
-    if (auto* vec = std::get_if<IndexVector>(&data_)) {
+    if (const auto* vec = std::get_if<IndexVector>(&data_)) {
       return *vec;
     }
     NoVariantMatched();
@@ -263,13 +250,13 @@
 
   // Returns whether the RowMap contains the given index.
   bool Contains(OutputIndex index) const {
-    if (auto* range = std::get_if<Range>(&data_)) {
+    if (const auto* range = std::get_if<Range>(&data_)) {
       return index >= range->start && index < range->end;
     }
-    if (auto* bv = std::get_if<BitVector>(&data_)) {
+    if (const auto* bv = std::get_if<BitVector>(&data_)) {
       return index < bv->size() && bv->IsSet(index);
     }
-    if (auto* vec = std::get_if<IndexVector>(&data_)) {
+    if (const auto* vec = std::get_if<IndexVector>(&data_)) {
       return std::find(vec->begin(), vec->end(), index) != vec->end();
     }
     NoVariantMatched();
@@ -277,17 +264,17 @@
 
   // Returns the first row of the given |index| in the RowMap.
   std::optional<InputRow> RowOf(OutputIndex index) const {
-    if (auto* range = std::get_if<Range>(&data_)) {
+    if (const auto* range = std::get_if<Range>(&data_)) {
       if (index < range->start || index >= range->end)
         return std::nullopt;
       return index - range->start;
     }
-    if (auto* bv = std::get_if<BitVector>(&data_)) {
+    if (const auto* bv = std::get_if<BitVector>(&data_)) {
       return index < bv->size() && bv->IsSet(index)
                  ? std::make_optional(bv->CountSetBits(index))
                  : std::nullopt;
     }
-    if (auto* vec = std::get_if<IndexVector>(&data_)) {
+    if (const auto* vec = std::get_if<IndexVector>(&data_)) {
       auto it = std::find(vec->begin(), vec->end(), index);
       return it != vec->end() ? std::make_optional(static_cast<InputRow>(
                                     std::distance(vec->begin(), it)))
@@ -362,7 +349,7 @@
 
     // If the selector is empty, just return an empty RowMap.
     if (size == 0u)
-      return RowMap();
+      return {};
 
     // If the selector is just picking a single row, just return that row
     // without any additional overhead.
@@ -398,69 +385,16 @@
   // Clears this RowMap by resetting it to a newly constructed state.
   void Clear() { *this = RowMap(); }
 
-  template <typename Comparator = bool(uint32_t, uint32_t)>
-  void StableSort(IndexVector* out, Comparator c) const {
-    if (auto* range = std::get_if<Range>(&data_)) {
-      std::stable_sort(out->begin(), out->end(),
-                       [range, c](uint32_t a, uint32_t b) {
-                         return c(GetRange(*range, a), GetRange(*range, b));
-                       });
-      return;
-    }
-    if (auto* bv = std::get_if<BitVector>(&data_)) {
-      std::stable_sort(out->begin(), out->end(),
-                       [&bv, c](uint32_t a, uint32_t b) {
-                         return c(GetBitVector(*bv, a), GetBitVector(*bv, b));
-                       });
-      return;
-    }
-    if (auto* vec = std::get_if<IndexVector>(&data_)) {
-      std::stable_sort(
-          out->begin(), out->end(), [vec, c](uint32_t a, uint32_t b) {
-            return c(GetIndexVector(*vec, a), GetIndexVector(*vec, b));
-          });
-      return;
-    }
-    NoVariantMatched();
-  }
-
-  // Filters the indices in |out| by keeping those which meet |p|.
-  template <typename Predicate = bool(OutputIndex)>
-  void Filter(Predicate p) {
-    if (auto* range = std::get_if<Range>(&data_)) {
-      data_ = FilterRange(p, *range);
-      return;
-    }
-    if (auto* bv = std::get_if<BitVector>(&data_)) {
-      for (auto it = bv->IterateSetBits(); it; it.Next()) {
-        if (!p(it.index()))
-          it.Clear();
-      }
-      return;
-    }
-    if (auto* vec = std::get_if<IndexVector>(&data_)) {
-      auto ret = std::remove_if(vec->begin(), vec->end(),
-                                [p](uint32_t i) { return !p(i); });
-      vec->erase(ret, vec->end());
-      return;
-    }
-    NoVariantMatched();
-  }
-
   // Converts this RowMap to an index vector in the most efficient way
   // possible.
-  std::vector<uint32_t> TakeAsIndexVector() const&& {
-    if (auto* range = std::get_if<Range>(&data_)) {
+  std::vector<uint32_t> TakeAsIndexVector() && {
+    if (const auto* range = std::get_if<Range>(&data_)) {
       std::vector<uint32_t> rm(range->size());
       std::iota(rm.begin(), rm.end(), range->start);
       return rm;
     }
-    if (auto* bv = std::get_if<BitVector>(&data_)) {
-      std::vector<uint32_t> rm(bv->CountSetBits());
-      for (auto it = bv->IterateSetBits(); it; it.Next()) {
-        rm[it.ordinal()] = it.index();
-      }
-      return rm;
+    if (const auto* bv = std::get_if<BitVector>(&data_)) {
+      return bv->GetSetBitIndices();
     }
     if (auto* vec = std::get_if<IndexVector>(&data_)) {
       return std::move(*vec);
@@ -530,7 +464,7 @@
     bv.Set(row);
   }
 
-  PERFETTO_NORETURN void NoVariantMatched() const {
+  PERFETTO_NORETURN static void NoVariantMatched() {
     PERFETTO_FATAL("Didn't match any variant type.");
   }
 
diff --git a/src/trace_processor/containers/row_map_algorithms.h b/src/trace_processor/containers/row_map_algorithms.h
index 6b79141..808ea5f 100644
--- a/src/trace_processor/containers/row_map_algorithms.h
+++ b/src/trace_processor/containers/row_map_algorithms.h
@@ -17,11 +17,11 @@
 #ifndef SRC_TRACE_PROCESSOR_CONTAINERS_ROW_MAP_ALGORITHMS_H_
 #define SRC_TRACE_PROCESSOR_CONTAINERS_ROW_MAP_ALGORITHMS_H_
 
+#include <cstdint>
 #include <vector>
 
 #include "perfetto/base/logging.h"
 #include "src/trace_processor/containers/bit_vector.h"
-#include "src/trace_processor/containers/bit_vector_iterators.h"
 
 // This file contains fundamental algorithms used by RowMap.
 //
@@ -51,11 +51,7 @@
 inline std::vector<uint32_t> SelectBvWithIvByConvertToIv(
     const BitVector& bv,
     const std::vector<uint32_t>& selector) {
-  std::vector<uint32_t> bv_conv(bv.CountSetBits());
-  for (auto it = bv.IterateSetBits(); it; it.Next()) {
-    bv_conv[it.ordinal()] = it.index();
-  }
-  return SelectIvWithIv(bv_conv, selector);
+  return SelectIvWithIv(bv.GetSetBitIndices(), selector);
 }
 
 // Returns a vector containing elements from |bv| by selecting indices from
diff --git a/src/trace_processor/db/column/arrangement_overlay.cc b/src/trace_processor/db/column/arrangement_overlay.cc
index b9e3000..4f4af85 100644
--- a/src/trace_processor/db/column/arrangement_overlay.cc
+++ b/src/trace_processor/db/column/arrangement_overlay.cc
@@ -71,8 +71,8 @@
       op != FilterOp::kRegex) {
     Range inner_res = inner_->OrderedIndexSearchValidated(
         op, sql_val,
-        Indices{arrangement_->data() + in.start, in.size(),
-                arrangement_state_});
+        OrderedIndices{arrangement_->data() + in.start, in.size(),
+                       arrangement_state_});
     return RangeOrBitVector(
         Range(inner_res.start + in.start, inner_res.end + in.start));
   }
@@ -122,31 +122,22 @@
   return RangeOrBitVector(std::move(builder).Build());
 }
 
-RangeOrBitVector ArrangementOverlay::ChainImpl::IndexSearchValidated(
+void ArrangementOverlay::ChainImpl::IndexSearchValidated(
     FilterOp op,
     SqlValue sql_val,
-    Indices indices) const {
+    Indices& indices) const {
   PERFETTO_TP_TRACE(metatrace::Category::DB,
                     "ArrangementOverlay::ChainImpl::IndexSearch");
 
-  std::vector<uint32_t> storage_iv(indices.size);
-  // Should be SIMD optimized.
-  for (uint32_t i = 0; i < indices.size; ++i) {
-    storage_iv[i] = (*arrangement_)[indices.data[i]];
+  for (auto& i : indices.tokens) {
+    i.index = (*arrangement_)[i.index];
   }
-
-  // If both the arrangment passed indices are monotonic, we know that this
-  // state was not lost.
-  if (indices.state == Indices::State::kMonotonic) {
-    return inner_->IndexSearchValidated(
-        op, sql_val,
-        Indices{storage_iv.data(), static_cast<uint32_t>(storage_iv.size()),
-                arrangement_state_});
-  }
-  return inner_->IndexSearchValidated(
-      op, sql_val,
-      Indices{storage_iv.data(), static_cast<uint32_t>(storage_iv.size()),
-              Indices::State::kNonmonotonic});
+  // If the indices state is monotonic, we can just pass the arrangement's
+  // state.
+  indices.state = indices.state == Indices::State::kMonotonic
+                      ? arrangement_state_
+                      : Indices::State::kNonmonotonic;
+  return inner_->IndexSearchValidated(op, sql_val, indices);
 }
 
 void ArrangementOverlay::ChainImpl::StableSort(SortToken* start,
diff --git a/src/trace_processor/db/column/arrangement_overlay.h b/src/trace_processor/db/column/arrangement_overlay.h
index bca884f..2bc61cb 100644
--- a/src/trace_processor/db/column/arrangement_overlay.h
+++ b/src/trace_processor/db/column/arrangement_overlay.h
@@ -35,7 +35,7 @@
 class ArrangementOverlay final : public DataLayer {
  public:
   ArrangementOverlay(const std::vector<uint32_t>* arrangement,
-                     Indices::State arrangement_state);
+                     DataLayerChain::Indices::State arrangement_state);
   ~ArrangementOverlay() override;
 
   std::unique_ptr<DataLayerChain> MakeChain(
@@ -59,13 +59,11 @@
 
     RangeOrBitVector SearchValidated(FilterOp, SqlValue, Range) const override;
 
-    RangeOrBitVector IndexSearchValidated(FilterOp,
-                                          SqlValue,
-                                          Indices) const override;
+    void IndexSearchValidated(FilterOp, SqlValue, Indices&) const override;
 
     Range OrderedIndexSearchValidated(FilterOp,
                                       SqlValue,
-                                      Indices) const override {
+                                      const OrderedIndices&) const override {
       PERFETTO_FATAL(
           "OrderedIndexSearch can't be called on ArrangementOverlay");
     }
@@ -91,7 +89,7 @@
 
   std::unique_ptr<DataLayerChain> inner_;
   const std::vector<uint32_t>* arrangement_;
-  const Indices::State arrangement_state_;
+  const DataLayerChain::Indices::State arrangement_state_;
 };
 
 }  // namespace perfetto::trace_processor::column
diff --git a/src/trace_processor/db/column/arrangement_overlay_unittest.cc b/src/trace_processor/db/column/arrangement_overlay_unittest.cc
index 07f13c0..3a26f0c 100644
--- a/src/trace_processor/db/column/arrangement_overlay_unittest.cc
+++ b/src/trace_processor/db/column/arrangement_overlay_unittest.cc
@@ -18,6 +18,7 @@
 
 #include <array>
 #include <cstdint>
+#include <utility>
 #include <vector>
 
 #include "perfetto/trace_processor/basic_types.h"
@@ -35,6 +36,9 @@
 using testing::ElementsAre;
 using testing::IsEmpty;
 
+using Indices = DataLayerChain::Indices;
+using OrderedIndices = DataLayerChain::OrderedIndices;
+
 TEST(ArrangementOverlay, SingleSearch) {
   std::vector<uint32_t> arrangement{1, 1, 2, 2, 3, 3, 4, 4, 1, 1};
   auto fake = FakeStorageChain::SearchSubset(5, std::vector<uint32_t>{1, 2});
@@ -97,13 +101,10 @@
   ArrangementOverlay storage(&arrangement, Indices::State::kNonmonotonic);
   auto chain = storage.MakeChain(std::move(fake));
 
-  std::vector<uint32_t> table_idx{7u, 1u, 3u};
-  RangeOrBitVector res = chain->IndexSearch(
-      FilterOp::kGe, SqlValue::Long(0u),
-      Indices{table_idx.data(), static_cast<uint32_t>(table_idx.size()),
-              Indices::State::kNonmonotonic});
-
-  ASSERT_THAT(utils::ToIndexVectorForTests(res), ElementsAre(1u));
+  Indices indices = Indices::CreateWithIndexPayloadForTesting(
+      {7u, 1u, 3u}, Indices::State::kNonmonotonic);
+  chain->IndexSearch(FilterOp::kGe, SqlValue::Long(0u), indices);
+  ASSERT_THAT(utils::ExtractPayloadForTesting(indices), ElementsAre(1u));
 }
 
 TEST(ArrangementOverlay, OrderingSearch) {
@@ -116,7 +117,6 @@
 
   RangeOrBitVector res =
       chain->Search(FilterOp::kGe, SqlValue::Long(0u), Range(0, 5));
-
   ASSERT_THAT(utils::ToIndexVectorForTests(res), ElementsAre(3, 4));
 }
 
diff --git a/src/trace_processor/db/column/data_layer.cc b/src/trace_processor/db/column/data_layer.cc
index c8f8b68..634cbce 100644
--- a/src/trace_processor/db/column/data_layer.cc
+++ b/src/trace_processor/db/column/data_layer.cc
@@ -113,8 +113,9 @@
   PERFETTO_FATAL("For GCC");
 }
 
-ArrangementOverlay::ArrangementOverlay(const std::vector<uint32_t>* arrangement,
-                                       Indices::State arrangement_state)
+ArrangementOverlay::ArrangementOverlay(
+    const std::vector<uint32_t>* arrangement,
+    DataLayerChain::Indices::State arrangement_state)
     : DataLayer(Impl::kArrangement),
       arrangement_(arrangement),
       arrangement_state_(arrangement_state) {}
diff --git a/src/trace_processor/db/column/data_layer.h b/src/trace_processor/db/column/data_layer.h
index 8f34845..c8d74a8 100644
--- a/src/trace_processor/db/column/data_layer.h
+++ b/src/trace_processor/db/column/data_layer.h
@@ -20,6 +20,8 @@
 #include <cstdint>
 #include <memory>
 #include <string>
+#include <utility>
+#include <vector>
 
 #include "perfetto/base/compiler.h"
 #include "perfetto/base/logging.h"
@@ -107,6 +109,62 @@
   };
   using StorageProto = protos::pbzero::SerializedColumn_Storage;
 
+  // Index vector related data required to Filter using IndexSearch.
+  struct Indices {
+    enum class State {
+      // We can't guarantee that data is in monotonic order.
+      kNonmonotonic,
+      // Data is in monotonic order.
+      kMonotonic,
+    };
+    // Contains an index to an element in the chain and an opaque payload class
+    // which can be set to whatever the user of the chain requires.
+    struct Token {
+      // An index pointing to an element in this chain. Indicates the element
+      // at this index should be filtered.
+      uint32_t index;
+
+      // An opaque value which can be set to some value meaningful to the
+      // caller. While the exact meaning of |payload| should not be depended
+      // upon, implementations are free to make assumptions that |payload| will
+      // be strictly monotonic.
+      uint32_t payload;
+
+      struct PayloadComparator {
+        bool operator()(const Token& a, const Token& b) {
+          return a.payload < b.payload;
+        }
+      };
+    };
+    static Indices Create(const std::vector<uint32_t>& raw, State state) {
+      std::vector<Token> tokens;
+      tokens.reserve(tokens.size());
+      for (uint32_t r : raw) {
+        tokens.push_back(Token{r, r});
+      }
+      return Indices{std::move(tokens), state};
+    }
+    static Indices CreateWithIndexPayloadForTesting(
+        const std::vector<uint32_t>& raw,
+        State state) {
+      std::vector<Token> tokens;
+      tokens.reserve(tokens.size());
+      for (uint32_t i = 0; i < raw.size(); ++i) {
+        tokens.push_back(Token{raw[i], i});
+      }
+      return Indices{std::move(tokens), state};
+    }
+    std::vector<Token> tokens;
+    State state = State::kNonmonotonic;
+  };
+
+  // Index vector related data required to Filter using IndexSearch.
+  struct OrderedIndices {
+    const uint32_t* data = nullptr;
+    uint32_t size = 0;
+    Indices::State state = Indices::State::kNonmonotonic;
+  };
+
   virtual ~DataLayerChain();
 
   // Start of public API.
@@ -130,14 +188,15 @@
   //    to positions in the storage.
   //
   // Notes for implementors:
-  //  * Implementations should ensure that the return value *only* includes
-  //    positions in |range| as callers will expect this to be true and can
-  //    optimize based on this.
+  //  * Implementations should ensure that the return value is empty or *only*
+  //    includes positions in |range|. Callers are free to assume this and can
+  //    optimize based on it.
   //  * Implementations should ensure that, if they return a BitVector, it is
   //    precisely of size |range.end|.
   PERFETTO_ALWAYS_INLINE RangeOrBitVector Search(FilterOp op,
                                                  SqlValue value,
                                                  Range range) const {
+    PERFETTO_DCHECK(range.end <= size());
     switch (ValidateSearchConstraints(op, value)) {
       case SearchValidationResult::kAllData:
         return RangeOrBitVector(range);
@@ -163,24 +222,27 @@
   // Notes for implementors:
   //  * Implementations should ensure that, if they return a BitVector, it is
   //    precisely of size |indices_count|.
-  PERFETTO_ALWAYS_INLINE RangeOrBitVector IndexSearch(FilterOp op,
-                                                      SqlValue value,
-                                                      Indices indices) const {
+  PERFETTO_ALWAYS_INLINE void IndexSearch(FilterOp op,
+                                          SqlValue value,
+                                          Indices& indices) const {
     switch (ValidateSearchConstraints(op, value)) {
       case SearchValidationResult::kAllData:
-        return RangeOrBitVector(Range(0, indices.size));
+        return;
       case SearchValidationResult::kNoData:
-        return RangeOrBitVector(Range());
+        indices.tokens.clear();
+        return;
       case SearchValidationResult::kOk:
-        return IndexSearchValidated(op, value, indices);
+        IndexSearchValidated(op, value, indices);
+        return;
     }
     PERFETTO_FATAL("For GCC");
   }
 
   // Searches for elements which match |op| and |value| at the positions given
-  // by indices data.
+  // by OrderedIndicesdata.
   //
-  // Returns a Range into Indices data of indices that pass the constraint.
+  // Returns a Range into OrderedIndicesdata of OrderedIndicesthat pass the
+  // constraint.
   //
   // Notes for callers:
   //  * Should not be called on:
@@ -190,9 +252,10 @@
   //      result.
   //  * Callers should note that the return value of this function corresponds
   //    to positions in |indices| *not* positions in the storage.
-  PERFETTO_ALWAYS_INLINE Range OrderedIndexSearch(FilterOp op,
-                                                  SqlValue value,
-                                                  Indices indices) const {
+  PERFETTO_ALWAYS_INLINE Range
+  OrderedIndexSearch(FilterOp op,
+                     SqlValue value,
+                     const OrderedIndices& indices) const {
     switch (ValidateSearchConstraints(op, value)) {
       case SearchValidationResult::kAllData:
         return {0, indices.size};
@@ -254,15 +317,13 @@
 
   // Post-validated implementation of |IndexSearch|. See |IndexSearch|'s
   // documentation.
-  virtual RangeOrBitVector IndexSearchValidated(FilterOp,
-                                                SqlValue,
-                                                Indices) const = 0;
+  virtual void IndexSearchValidated(FilterOp, SqlValue, Indices&) const = 0;
 
   // Post-validated implementation of |OrderedIndexSearch|. See
   // |OrderedIndexSearch|'s documentation.
   virtual Range OrderedIndexSearchValidated(FilterOp,
                                             SqlValue,
-                                            Indices) const = 0;
+                                            const OrderedIndices&) const = 0;
 };
 
 }  // namespace perfetto::trace_processor::column
diff --git a/src/trace_processor/db/column/dense_null_overlay.cc b/src/trace_processor/db/column/dense_null_overlay.cc
index d9a7fb1..e171ec0 100644
--- a/src/trace_processor/db/column/dense_null_overlay.cc
+++ b/src/trace_processor/db/column/dense_null_overlay.cc
@@ -21,6 +21,7 @@
 #include <iterator>
 #include <memory>
 #include <utility>
+#include <vector>
 
 #include "perfetto/base/logging.h"
 #include "perfetto/trace_processor/basic_types.h"
@@ -109,8 +110,8 @@
     // |non_null_| which matches the range. Then, resize to |in.end| as this
     // is mandated by the API contract of |Storage::Search|.
     Range inner_range = std::move(inner_res).TakeIfRange();
-    PERFETTO_DCHECK(inner_range.end <= in.end);
-    PERFETTO_DCHECK(inner_range.start >= in.start);
+    PERFETTO_DCHECK(inner_range.empty() || inner_range.end <= in.end);
+    PERFETTO_DCHECK(inner_range.empty() || inner_range.start >= in.start);
     res = non_null_->IntersectRange(inner_range.start, inner_range.end);
     res.Resize(in.end, false);
   } else {
@@ -135,84 +136,68 @@
   return RangeOrBitVector(std::move(res));
 }
 
-RangeOrBitVector DenseNullOverlay::ChainImpl::IndexSearchValidated(
-    FilterOp op,
-    SqlValue sql_val,
-    Indices indices) const {
+void DenseNullOverlay::ChainImpl::IndexSearchValidated(FilterOp op,
+                                                       SqlValue sql_val,
+                                                       Indices& indices) const {
   PERFETTO_TP_TRACE(metatrace::Category::DB,
                     "DenseNullOverlay::ChainImpl::IndexSearch");
 
   if (op == FilterOp::kIsNull) {
+    // Partition the vector into all the null indices followed by all the
+    // non-null indices.
+    auto non_null_it = std::stable_partition(
+        indices.tokens.begin(), indices.tokens.end(),
+        [this](const Indices::Token& t) { return !non_null_->IsSet(t.index); });
+
+    // IndexSearch |inner_| with a vector containing a copy of the non-null
+    // indices.
+    Indices non_null{{non_null_it, indices.tokens.end()}, indices.state};
+    inner_->IndexSearch(op, sql_val, non_null);
+
+    // Replace all the original non-null positions with the result from calling
+    // IndexSearch.
+    auto new_non_null_it =
+        indices.tokens.erase(non_null_it, indices.tokens.end());
+    indices.tokens.insert(new_non_null_it, non_null.tokens.begin(),
+                          non_null.tokens.end());
+
+    // Merge the two sorted index ranges together using the payload as the
+    // comparator. This is a required post-condition of IndexSearch.
+    std::inplace_merge(indices.tokens.begin(), new_non_null_it,
+                       indices.tokens.end(),
+                       Indices::Token::PayloadComparator());
+    return;
+  }
+
+  auto keep_only_non_null = [this, &indices]() {
+    indices.tokens.erase(
+        std::remove_if(indices.tokens.begin(), indices.tokens.end(),
+                       [this](const Indices::Token& idx) {
+                         return !non_null_->IsSet(idx.index);
+                       }),
+        indices.tokens.end());
+    return;
+  };
+  if (op == FilterOp::kIsNotNull) {
     switch (inner_->ValidateSearchConstraints(op, sql_val)) {
-      case SearchValidationResult::kNoData: {
-        BitVector::Builder null_indices(indices.size);
-        for (const uint32_t* it = indices.data;
-             it != indices.data + indices.size; it++) {
-          null_indices.Append(!non_null_->IsSet(*it));
-        }
-        // There is no need to search in underlying storage. We should just
-        // check if the index is set in |non_null_|.
-        return RangeOrBitVector(std::move(null_indices).Build());
-      }
+      case SearchValidationResult::kNoData:
+        indices.tokens.clear();
+        return;
       case SearchValidationResult::kAllData:
-        return RangeOrBitVector(Range(0, indices.size));
-      case SearchValidationResult::kOk:
-        break;
-    }
-  } else if (op == FilterOp::kIsNotNull) {
-    switch (inner_->ValidateSearchConstraints(op, sql_val)) {
-      case SearchValidationResult::kNoData: {
-        BitVector::Builder non_null_indices(indices.size);
-        for (const uint32_t* it = indices.data;
-             it != indices.data + indices.size; it++) {
-          non_null_indices.Append(non_null_->IsSet(*it));
-        }
-        // There is no need to search in underlying storage. We should just
-        // check if the index is set in |non_null_|.
-        return RangeOrBitVector(std::move(non_null_indices).Build());
-      }
-      case SearchValidationResult::kAllData:
-        return RangeOrBitVector(Range(0, indices.size));
+        keep_only_non_null();
+        return;
       case SearchValidationResult::kOk:
         break;
     }
   }
-
-  RangeOrBitVector inner_res =
-      inner_->IndexSearchValidated(op, sql_val, indices);
-  if (inner_res.IsRange()) {
-    Range inner_range = std::move(inner_res).TakeIfRange();
-    BitVector::Builder builder(indices.size, inner_range.start);
-    for (uint32_t i = inner_range.start; i < inner_range.end; ++i) {
-      builder.Append(non_null_->IsSet(indices.data[i]));
-    }
-    return RangeOrBitVector(std::move(builder).Build());
-  }
-
-  BitVector::Builder builder(indices.size);
-  for (uint32_t i = 0; i < indices.size; ++i) {
-    builder.Append(non_null_->IsSet(indices.data[i]));
-  }
-  BitVector non_null = std::move(builder).Build();
-
-  BitVector res = std::move(inner_res).TakeIfBitVector();
-
-  if (op == FilterOp::kIsNull) {
-    BitVector null = std::move(non_null);
-    null.Not();
-    res.Or(null);
-  } else {
-    res.And(non_null);
-  }
-
-  PERFETTO_DCHECK(res.size() == indices.size);
-  return RangeOrBitVector(std::move(res));
+  keep_only_non_null();
+  inner_->IndexSearchValidated(op, sql_val, indices);
 }
 
 Range DenseNullOverlay::ChainImpl::OrderedIndexSearchValidated(
     FilterOp op,
     SqlValue sql_val,
-    Indices indices) const {
+    const OrderedIndices& indices) const {
   // For NOT EQUAL the further analysis needs to be done by the caller.
   PERFETTO_CHECK(op != FilterOp::kNe);
 
@@ -247,7 +232,8 @@
 
   Range inner_range = inner_->OrderedIndexSearchValidated(
       op, sql_val,
-      Indices{first_non_null, non_null_size, Indices::State::kNonmonotonic});
+      OrderedIndices{first_non_null, non_null_size,
+                     Indices::State::kNonmonotonic});
   return {inner_range.start + non_null_offset,
           inner_range.end + non_null_offset};
 }
diff --git a/src/trace_processor/db/column/dense_null_overlay.h b/src/trace_processor/db/column/dense_null_overlay.h
index 0f881fb..b27beb5 100644
--- a/src/trace_processor/db/column/dense_null_overlay.h
+++ b/src/trace_processor/db/column/dense_null_overlay.h
@@ -54,13 +54,11 @@
 
     RangeOrBitVector SearchValidated(FilterOp, SqlValue, Range) const override;
 
-    RangeOrBitVector IndexSearchValidated(FilterOp,
-                                          SqlValue,
-                                          Indices) const override;
+    void IndexSearchValidated(FilterOp, SqlValue, Indices&) const override;
 
     Range OrderedIndexSearchValidated(FilterOp,
                                       SqlValue,
-                                      Indices) const override;
+                                      const OrderedIndices&) const override;
 
     void StableSort(SortToken* start,
                     SortToken* end,
diff --git a/src/trace_processor/db/column/dense_null_overlay_unittest.cc b/src/trace_processor/db/column/dense_null_overlay_unittest.cc
index 0cf6276..efda157 100644
--- a/src/trace_processor/db/column/dense_null_overlay_unittest.cc
+++ b/src/trace_processor/db/column/dense_null_overlay_unittest.cc
@@ -18,6 +18,7 @@
 
 #include <cstdint>
 #include <memory>
+#include <utility>
 #include <vector>
 
 #include "perfetto/trace_processor/basic_types.h"
@@ -35,6 +36,9 @@
 using testing::ElementsAre;
 using testing::IsEmpty;
 
+using Indices = DataLayerChain::Indices;
+using OrderedIndices = DataLayerChain::OrderedIndices;
+
 TEST(DenseNullOverlay, NoFilteringSearch) {
   std::vector<uint32_t> data{0, 1, 0, 1, 0};
   auto numeric = std::make_unique<NumericStorage<uint32_t>>(
@@ -103,12 +107,10 @@
   DenseNullOverlay storage(&bv);
   auto chain = storage.MakeChain(numeric->MakeChain());
 
-  std::vector<uint32_t> index({5, 2, 3, 4, 1});
-  auto res = chain->IndexSearch(
-      FilterOp::kGe, SqlValue::Long(0),
-      Indices{index.data(), static_cast<uint32_t>(index.size()),
-              Indices::State::kNonmonotonic});
-  ASSERT_THAT(utils::ToIndexVectorForTests(res), ElementsAre(0, 2, 3));
+  Indices indices = Indices::CreateWithIndexPayloadForTesting(
+      {5, 2, 3, 4, 1}, Indices::State::kNonmonotonic);
+  chain->IndexSearch(FilterOp::kGe, SqlValue::Long(0), indices);
+  ASSERT_THAT(utils::ExtractPayloadForTesting(indices), ElementsAre(0, 2, 3));
 }
 
 TEST(DenseNullOverlay, IsNullIndexSearch) {
@@ -118,12 +120,11 @@
   DenseNullOverlay storage(&bv);
   auto chain = storage.MakeChain(std::move(fake));
 
-  std::vector<uint32_t> index({5, 2, 3, 4, 1});
-  auto res = chain->IndexSearch(
-      FilterOp::kIsNull, SqlValue(),
-      Indices{index.data(), static_cast<uint32_t>(index.size()),
-              Indices::State::kMonotonic});
-  ASSERT_THAT(utils::ToIndexVectorForTests(res), ElementsAre(0, 1, 2, 3));
+  Indices indices = Indices::CreateWithIndexPayloadForTesting(
+      {5, 2, 3, 4, 1}, Indices::State::kNonmonotonic);
+  chain->IndexSearch(FilterOp::kIsNull, SqlValue(), indices);
+  ASSERT_THAT(utils::ExtractPayloadForTesting(indices),
+              ElementsAre(0, 1, 2, 3));
 }
 
 TEST(DenseNullOverlay, OrderedIndexSearch) {
@@ -134,7 +135,7 @@
   auto chain = storage.MakeChain(std::move(fake));
 
   std::vector<uint32_t> indices_vec({0, 2, 4, 1, 3, 5});
-  Indices indices{indices_vec.data(), 6, Indices::State::kNonmonotonic};
+  OrderedIndices indices{indices_vec.data(), 6, Indices::State::kNonmonotonic};
 
   Range res = chain->OrderedIndexSearch(FilterOp::kIsNull, SqlValue(), indices);
   ASSERT_EQ(res.start, 0u);
diff --git a/src/trace_processor/db/column/dummy_storage.cc b/src/trace_processor/db/column/dummy_storage.cc
index 54d7c61..27b464f 100644
--- a/src/trace_processor/db/column/dummy_storage.cc
+++ b/src/trace_processor/db/column/dummy_storage.cc
@@ -43,15 +43,16 @@
   PERFETTO_FATAL("Shouldn't be called");
 }
 
-RangeOrBitVector DummyStorage::ChainImpl::IndexSearchValidated(FilterOp,
-                                                               SqlValue,
-                                                               Indices) const {
+void DummyStorage::ChainImpl::IndexSearchValidated(FilterOp,
+                                                   SqlValue,
+                                                   Indices&) const {
   PERFETTO_FATAL("Shouldn't be called");
 }
 
-Range DummyStorage::ChainImpl::OrderedIndexSearchValidated(FilterOp,
-                                                           SqlValue,
-                                                           Indices) const {
+Range DummyStorage::ChainImpl::OrderedIndexSearchValidated(
+    FilterOp,
+    SqlValue,
+    const OrderedIndices&) const {
   PERFETTO_FATAL("Shouldn't be called");
 }
 
diff --git a/src/trace_processor/db/column/dummy_storage.h b/src/trace_processor/db/column/dummy_storage.h
index ced41a9..80674bb 100644
--- a/src/trace_processor/db/column/dummy_storage.h
+++ b/src/trace_processor/db/column/dummy_storage.h
@@ -43,13 +43,11 @@
 
     RangeOrBitVector SearchValidated(FilterOp, SqlValue, Range) const override;
 
-    RangeOrBitVector IndexSearchValidated(FilterOp,
-                                          SqlValue,
-                                          Indices) const override;
+    void IndexSearchValidated(FilterOp, SqlValue, Indices&) const override;
 
     Range OrderedIndexSearchValidated(FilterOp,
                                       SqlValue,
-                                      Indices) const override;
+                                      const OrderedIndices&) const override;
 
     void StableSort(SortToken* start,
                     SortToken* end,
diff --git a/src/trace_processor/db/column/fake_storage.cc b/src/trace_processor/db/column/fake_storage.cc
index 9fbc3ae..f587c77 100644
--- a/src/trace_processor/db/column/fake_storage.cc
+++ b/src/trace_processor/db/column/fake_storage.cc
@@ -82,32 +82,39 @@
   PERFETTO_FATAL("For GCC");
 }
 
-RangeOrBitVector FakeStorageChain::IndexSearchValidated(FilterOp,
-                                                        SqlValue,
-                                                        Indices indices) const {
+void FakeStorageChain::IndexSearchValidated(FilterOp,
+                                            SqlValue,
+                                            Indices& indices) const {
   switch (strategy_) {
     case kAll:
-      return RangeOrBitVector(Range(0, indices.size));
+      return;
     case kNone:
-      return RangeOrBitVector(Range());
+      indices.tokens.clear();
+      return;
     case kRange:
-    case kBitVector: {
-      BitVector::Builder builder(indices.size);
-      for (const uint32_t* it = indices.data; it != indices.data + indices.size;
-           ++it) {
-        bool in_range = strategy_ == kRange && range_.Contains(*it);
-        bool in_bv = strategy_ == kBitVector && bit_vector_.IsSet(*it);
-        builder.Append(in_range || in_bv);
-      }
-      return RangeOrBitVector(std::move(builder).Build());
-    }
+      indices.tokens.erase(
+          std::remove_if(indices.tokens.begin(), indices.tokens.end(),
+                         [this](const Indices::Token& token) {
+                           return !range_.Contains(token.index);
+                         }),
+          indices.tokens.end());
+      return;
+    case kBitVector:
+      indices.tokens.erase(
+          std::remove_if(indices.tokens.begin(), indices.tokens.end(),
+                         [this](const Indices::Token& token) {
+                           return !bit_vector_.IsSet(token.index);
+                         }),
+          indices.tokens.end());
+      return;
   }
   PERFETTO_FATAL("For GCC");
 }
 
-Range FakeStorageChain::OrderedIndexSearchValidated(FilterOp,
-                                                    SqlValue,
-                                                    Indices indices) const {
+Range FakeStorageChain::OrderedIndexSearchValidated(
+    FilterOp,
+    SqlValue,
+    const OrderedIndices& indices) const {
   if (strategy_ == kAll) {
     return {0, indices.size};
   }
diff --git a/src/trace_processor/db/column/fake_storage.h b/src/trace_processor/db/column/fake_storage.h
index aca6890..bc02300 100644
--- a/src/trace_processor/db/column/fake_storage.h
+++ b/src/trace_processor/db/column/fake_storage.h
@@ -82,11 +82,11 @@
 
   RangeOrBitVector SearchValidated(FilterOp, SqlValue, Range) const override;
 
-  RangeOrBitVector IndexSearchValidated(FilterOp,
-                                        SqlValue,
-                                        Indices) const override;
+  void IndexSearchValidated(FilterOp, SqlValue, Indices&) const override;
 
-  Range OrderedIndexSearchValidated(FilterOp, SqlValue, Indices) const override;
+  Range OrderedIndexSearchValidated(FilterOp,
+                                    SqlValue,
+                                    const OrderedIndices&) const override;
 
   void StableSort(SortToken* start,
                   SortToken* end,
diff --git a/src/trace_processor/db/column/id_storage.cc b/src/trace_processor/db/column/id_storage.cc
index 6fdb800..e3f4d8a 100644
--- a/src/trace_processor/db/column/id_storage.cc
+++ b/src/trace_processor/db/column/id_storage.cc
@@ -21,7 +21,6 @@
 #include <functional>
 #include <iterator>
 #include <limits>
-#include <memory>
 #include <string>
 #include <utility>
 
@@ -41,40 +40,13 @@
 namespace {
 
 template <typename Comparator>
-RangeOrBitVector IndexSearchWithComparator(uint32_t val,
-                                           const uint32_t* indices,
-                                           uint32_t indices_size,
-                                           Comparator comparator) {
-  // Slow path: we compare <64 elements and append to get us to a word
-  // boundary.
-  const uint32_t* ptr = indices;
-  BitVector::Builder builder(indices_size);
-  uint32_t front_elements = builder.BitsUntilWordBoundaryOrFull();
-  for (uint32_t i = 0; i < front_elements; ++i) {
-    builder.Append(comparator(ptr[i], val));
-  }
-  ptr += front_elements;
-
-  // Fast path: we compare as many groups of 64 elements as we can.
-  // This should be very easy for the compiler to auto-vectorize.
-  uint32_t fast_path_elements = builder.BitsInCompleteWordsUntilFull();
-  for (uint32_t i = 0; i < fast_path_elements; i += BitVector::kBitsInWord) {
-    uint64_t word = 0;
-    // This part should be optimised by SIMD and is expected to be fast.
-    for (uint32_t k = 0; k < BitVector::kBitsInWord; ++k) {
-      bool comp_result = comparator(ptr[i + k], val);
-      word |= static_cast<uint64_t>(comp_result) << k;
-    }
-    builder.AppendWord(word);
-  }
-  ptr += fast_path_elements;
-
-  // Slow path: we compare <64 elements and append to fill the Builder.
-  uint32_t back_elements = builder.BitsUntilFull();
-  for (uint32_t i = 0; i < back_elements; ++i) {
-    builder.Append(comparator(ptr[i], val));
-  }
-  return RangeOrBitVector(std::move(builder).Build());
+void IndexSearchWithComparator(uint32_t val, DataLayerChain::Indices& indices) {
+  indices.tokens.erase(
+      std::remove_if(indices.tokens.begin(), indices.tokens.end(),
+                     [val](const DataLayerChain::Indices::Token& idx) {
+                       return !Comparator()(idx.index, val);
+                     }),
+      indices.tokens.end());
 }
 
 }  // namespace
@@ -226,14 +198,13 @@
   return RangeOrBitVector(BinarySearchIntrinsic(op, val, search_range));
 }
 
-RangeOrBitVector IdStorage::ChainImpl::IndexSearchValidated(
-    FilterOp op,
-    SqlValue sql_val,
-    Indices indices) const {
+void IdStorage::ChainImpl::IndexSearchValidated(FilterOp op,
+                                                SqlValue sql_val,
+                                                Indices& indices) const {
   PERFETTO_TP_TRACE(
       metatrace::Category::DB, "IdStorage::ChainImpl::IndexSearch",
-      [indices, op](metatrace::Record* r) {
-        r->AddArg("Count", std::to_string(indices.size));
+      [&indices, op](metatrace::Record* r) {
+        r->AddArg("Count", std::to_string(indices.tokens.size()));
         r->AddArg("Op", std::to_string(static_cast<uint32_t>(op)));
       });
 
@@ -241,35 +212,30 @@
   // requires special logic.
   if (sql_val.type == SqlValue::kDouble) {
     switch (utils::CompareIntColumnWithDouble(op, &sql_val)) {
+      case SearchValidationResult::kAllData:
+        return;
+      case SearchValidationResult::kNoData:
+        indices.tokens.clear();
+        return;
       case SearchValidationResult::kOk:
         break;
-      case SearchValidationResult::kAllData:
-        return RangeOrBitVector(Range(0, indices.size));
-      case SearchValidationResult::kNoData:
-        return RangeOrBitVector(Range());
     }
   }
 
   auto val = static_cast<uint32_t>(sql_val.AsLong());
   switch (op) {
     case FilterOp::kEq:
-      return IndexSearchWithComparator(val, indices.data, indices.size,
-                                       std::equal_to<>());
+      return IndexSearchWithComparator<std::equal_to<>>(val, indices);
     case FilterOp::kNe:
-      return IndexSearchWithComparator(val, indices.data, indices.size,
-                                       std::not_equal_to<>());
+      return IndexSearchWithComparator<std::not_equal_to<>>(val, indices);
     case FilterOp::kLe:
-      return IndexSearchWithComparator(val, indices.data, indices.size,
-                                       std::less_equal<>());
+      return IndexSearchWithComparator<std::less_equal<>>(val, indices);
     case FilterOp::kLt:
-      return IndexSearchWithComparator(val, indices.data, indices.size,
-                                       std::less<>());
+      return IndexSearchWithComparator<std::less<>>(val, indices);
     case FilterOp::kGt:
-      return IndexSearchWithComparator(val, indices.data, indices.size,
-                                       std::greater<>());
+      return IndexSearchWithComparator<std::greater<>>(val, indices);
     case FilterOp::kGe:
-      return IndexSearchWithComparator(val, indices.data, indices.size,
-                                       std::greater_equal<>());
+      return IndexSearchWithComparator<std::greater_equal<>>(val, indices);
     case FilterOp::kIsNotNull:
     case FilterOp::kIsNull:
     case FilterOp::kGlob:
@@ -279,9 +245,10 @@
   PERFETTO_FATAL("FilterOp not matched");
 }
 
-Range IdStorage::ChainImpl::OrderedIndexSearchValidated(FilterOp op,
-                                                        SqlValue sql_val,
-                                                        Indices indices) const {
+Range IdStorage::ChainImpl::OrderedIndexSearchValidated(
+    FilterOp op,
+    SqlValue sql_val,
+    const OrderedIndices& indices) const {
   PERFETTO_DCHECK(op != FilterOp::kNe);
 
   PERFETTO_TP_TRACE(
@@ -305,10 +272,9 @@
   }
   auto val = static_cast<uint32_t>(sql_val.AsLong());
 
-  // Indices are monotonic non contiguous values if OrderedIndexSearch was
-  // called.
-  // Look for the first and last index and find the result of looking for this
-  // range in IdStorage.
+  // OrderedIndices are monotonic non contiguous values if OrderedIndexSearch
+  // was called. Look for the first and last index and find the result of
+  // looking for this range in IdStorage.
   Range indices_range(indices.data[0], indices.data[indices.size - 1] + 1);
   Range bin_search_ret = BinarySearchIntrinsic(op, val, indices_range);
 
@@ -327,13 +293,13 @@
     case FilterOp::kEq:
       return {val, val + (range.start <= val && val < range.end)};
     case FilterOp::kLe:
-      return {range.start, std::min(val + 1, range.end)};
+      return {range.start, std::clamp(val + 1, range.start, range.end)};
     case FilterOp::kLt:
-      return {range.start, std::min(val, range.end)};
+      return {range.start, std::clamp(val, range.start, range.end)};
     case FilterOp::kGe:
-      return {std::max(val, range.start), range.end};
+      return {std::clamp(val, range.start, range.end), range.end};
     case FilterOp::kGt:
-      return {std::max(val + 1, range.start), range.end};
+      return {std::clamp(val + 1, range.start, range.end), range.end};
     case FilterOp::kIsNotNull:
     case FilterOp::kNe:
     case FilterOp::kIsNull:
diff --git a/src/trace_processor/db/column/id_storage.h b/src/trace_processor/db/column/id_storage.h
index e6e25ab..51b9f0d 100644
--- a/src/trace_processor/db/column/id_storage.h
+++ b/src/trace_processor/db/column/id_storage.h
@@ -53,13 +53,11 @@
 
     RangeOrBitVector SearchValidated(FilterOp, SqlValue, Range) const override;
 
-    RangeOrBitVector IndexSearchValidated(FilterOp,
-                                          SqlValue,
-                                          Indices) const override;
+    void IndexSearchValidated(FilterOp, SqlValue, Indices&) const override;
 
     Range OrderedIndexSearchValidated(FilterOp,
                                       SqlValue,
-                                      Indices) const override;
+                                      const OrderedIndices&) const override;
 
     void StableSort(SortToken* start,
                     SortToken* end,
diff --git a/src/trace_processor/db/column/id_storage_unittest.cc b/src/trace_processor/db/column/id_storage_unittest.cc
index 48ce18e..2d6fd44 100644
--- a/src/trace_processor/db/column/id_storage_unittest.cc
+++ b/src/trace_processor/db/column/id_storage_unittest.cc
@@ -42,6 +42,9 @@
 using testing::ElementsAre;
 using testing::IsEmpty;
 
+using Indices = DataLayerChain::Indices;
+using OrderedIndices = DataLayerChain::OrderedIndices;
+
 TEST(IdStorage, InvalidSearchConstraints) {
   IdStorage storage;
   auto chain = storage.MakeChain();
@@ -214,32 +217,35 @@
   IdStorage storage;
   auto chain = storage.MakeChain();
   SqlValue val = SqlValue::Long(5);
-  std::vector<uint32_t> indices_vec{5, 4, 3, 9, 8, 7};
-  Indices indices{indices_vec.data(), 6, Indices::State::kNonmonotonic};
 
-  FilterOp op = FilterOp::kEq;
-  auto res = chain->IndexSearch(op, val, indices);
-  ASSERT_THAT(utils::ToIndexVectorForTests(res), ElementsAre(0));
+  auto common_indices = Indices::CreateWithIndexPayloadForTesting(
+      {5, 4, 3, 9, 8, 7}, Indices::State::kNonmonotonic);
 
-  op = FilterOp::kNe;
-  res = chain->IndexSearch(op, val, indices);
-  ASSERT_THAT(utils::ToIndexVectorForTests(res), ElementsAre(1, 2, 3, 4, 5));
+  auto indices = common_indices;
+  chain->IndexSearch(FilterOp::kEq, val, indices);
+  ASSERT_THAT(utils::ExtractPayloadForTesting(indices), ElementsAre(0));
 
-  op = FilterOp::kLe;
-  res = chain->IndexSearch(op, val, indices);
-  ASSERT_THAT(utils::ToIndexVectorForTests(res), ElementsAre(0, 1, 2));
+  indices = common_indices;
+  chain->IndexSearch(FilterOp::kNe, val, indices);
+  ASSERT_THAT(utils::ExtractPayloadForTesting(indices),
+              ElementsAre(1, 2, 3, 4, 5));
 
-  op = FilterOp::kLt;
-  res = chain->IndexSearch(op, val, indices);
-  ASSERT_THAT(utils::ToIndexVectorForTests(res), ElementsAre(1, 2));
+  indices = common_indices;
+  chain->IndexSearch(FilterOp::kLe, val, indices);
+  ASSERT_THAT(utils::ExtractPayloadForTesting(indices), ElementsAre(0, 1, 2));
 
-  op = FilterOp::kGe;
-  res = chain->IndexSearch(op, val, indices);
-  ASSERT_THAT(utils::ToIndexVectorForTests(res), ElementsAre(0, 3, 4, 5));
+  indices = common_indices;
+  chain->IndexSearch(FilterOp::kLt, val, indices);
+  ASSERT_THAT(utils::ExtractPayloadForTesting(indices), ElementsAre(1, 2));
 
-  op = FilterOp::kGt;
-  res = chain->IndexSearch(op, val, indices);
-  ASSERT_THAT(utils::ToIndexVectorForTests(res), ElementsAre(3, 4, 5));
+  indices = common_indices;
+  chain->IndexSearch(FilterOp::kGe, val, indices);
+  ASSERT_THAT(utils::ExtractPayloadForTesting(indices),
+              ElementsAre(0, 3, 4, 5));
+
+  indices = common_indices;
+  chain->IndexSearch(FilterOp::kGt, val, indices);
+  ASSERT_THAT(utils::ExtractPayloadForTesting(indices), ElementsAre(3, 4, 5));
 }
 
 TEST(IdStorage, OrderedIndexSearch) {
@@ -247,7 +253,7 @@
   auto chain = storage.MakeChain();
 
   std::vector<uint32_t> indices_vec{0, 1, 2, 4, 4};
-  Indices indices{indices_vec.data(), 5, Indices::State::kMonotonic};
+  OrderedIndices indices{indices_vec.data(), 5, Indices::State::kMonotonic};
 
   Range range =
       chain->OrderedIndexSearch(FilterOp::kEq, SqlValue::Long(2), indices);
@@ -274,17 +280,11 @@
 TEST(IdStorage, IndexSearchEqTooBig) {
   IdStorage storage;
   auto chain = storage.MakeChain();
-  std::vector<uint32_t> indices{1, 3, 5, 7, 9, 11, 2, 4};
 
-  BitVector bv =
-      chain
-          ->IndexSearch(
-              FilterOp::kEq, SqlValue::Long(20),
-              Indices{indices.data(), static_cast<uint32_t>(indices.size()),
-                      Indices::State::kMonotonic})
-          .TakeIfBitVector();
-
-  ASSERT_EQ(bv.CountSetBits(), 0u);
+  auto indices = Indices::CreateWithIndexPayloadForTesting(
+      {1, 3, 5, 7, 9, 11, 2, 4}, Indices::State::kNonmonotonic);
+  chain->IndexSearch(FilterOp::kEq, SqlValue::Long(20), indices);
+  ASSERT_THAT(utils::ExtractPayloadForTesting(indices), IsEmpty());
 }
 
 TEST(IdStorage, SearchWithIdAsDoubleSimple) {
diff --git a/src/trace_processor/db/column/null_overlay.cc b/src/trace_processor/db/column/null_overlay.cc
index 7984656..fa1e679 100644
--- a/src/trace_processor/db/column/null_overlay.cc
+++ b/src/trace_processor/db/column/null_overlay.cc
@@ -48,7 +48,7 @@
   BitVector res;
   if (storage_result.IsRange()) {
     Range range = std::move(storage_result).TakeIfRange();
-    if (range.size() > 0) {
+    if (!range.empty()) {
       res = non_null.IntersectRange(non_null.IndexOfNthSet(range.start),
                                     non_null.IndexOfNthSet(range.end - 1) + 1);
 
@@ -162,76 +162,74 @@
   return RangeOrBitVector(std::move(res));
 }
 
-RangeOrBitVector NullOverlay::ChainImpl::IndexSearchValidated(
-    FilterOp op,
-    SqlValue sql_val,
-    Indices indices) const {
+void NullOverlay::ChainImpl::IndexSearchValidated(FilterOp op,
+                                                  SqlValue sql_val,
+                                                  Indices& indices) const {
   PERFETTO_TP_TRACE(metatrace::Category::DB,
                     "NullOverlay::ChainImpl::IndexSearch");
 
   if (op == FilterOp::kIsNull) {
-    switch (inner_->ValidateSearchConstraints(op, sql_val)) {
-      case SearchValidationResult::kNoData: {
-        BitVector::Builder null_indices(indices.size);
-        for (const uint32_t* it = indices.data;
-             it != indices.data + indices.size; it++) {
-          null_indices.Append(!non_null_->IsSet(*it));
-        }
-        // There is no need to search in underlying storage. We should just
-        // check if the index is set in |non_null_|.
-        return RangeOrBitVector(std::move(null_indices).Build());
-      }
-      case SearchValidationResult::kAllData:
-        return RangeOrBitVector(Range(0, indices.size));
-      case SearchValidationResult::kOk:
-        break;
+    // Partition the vector into all the null indices followed by all the
+    // non-null indices.
+    auto non_null_it = std::stable_partition(
+        indices.tokens.begin(), indices.tokens.end(),
+        [this](const Indices::Token& t) { return !non_null_->IsSet(t.index); });
+
+    // IndexSearch |inner_| with a vector containing a copy of the (translated)
+    // non-null indices.
+    Indices non_null{{non_null_it, indices.tokens.end()}, indices.state};
+    for (auto& token : non_null.tokens) {
+      token.index = non_null_->CountSetBits(token.index);
     }
-  } else if (op == FilterOp::kIsNotNull) {
+    inner_->IndexSearch(op, sql_val, non_null);
+
+    // Replace all the original non-null positions with the result from calling
+    // IndexSearch.
+    auto new_non_null_it =
+        indices.tokens.erase(non_null_it, indices.tokens.end());
+    indices.tokens.insert(new_non_null_it, non_null.tokens.begin(),
+                          non_null.tokens.end());
+
+    // Merge the two sorted index ranges together using the payload as the
+    // comparator. This is a required post-condition of IndexSearch.
+    std::inplace_merge(indices.tokens.begin(), new_non_null_it,
+                       indices.tokens.end(),
+                       Indices::Token::PayloadComparator());
+    return;
+  }
+
+  auto keep_only_non_null = [this, &indices]() {
+    indices.tokens.erase(
+        std::remove_if(indices.tokens.begin(), indices.tokens.end(),
+                       [this](const Indices::Token& idx) {
+                         return !non_null_->IsSet(idx.index);
+                       }),
+        indices.tokens.end());
+    return;
+  };
+  if (op == FilterOp::kIsNotNull) {
     switch (inner_->ValidateSearchConstraints(op, sql_val)) {
-      case SearchValidationResult::kNoData: {
-        BitVector::Builder non_null_indices(indices.size);
-        for (const uint32_t* it = indices.data;
-             it != indices.data + indices.size; it++) {
-          non_null_indices.Append(non_null_->IsSet(*it));
-        }
-        // There is no need to search in underlying storage. We should just
-        // check if the index is set in |non_null_|.
-        return RangeOrBitVector(std::move(non_null_indices).Build());
-      }
+      case SearchValidationResult::kNoData:
+        indices.tokens.clear();
+        return;
       case SearchValidationResult::kAllData:
-        return RangeOrBitVector(Range(0, indices.size));
+        keep_only_non_null();
+        return;
       case SearchValidationResult::kOk:
         break;
     }
   }
-
-  BitVector::Builder storage_non_null(indices.size);
-  std::vector<uint32_t> storage_iv;
-  storage_iv.reserve(indices.size);
-  for (const uint32_t* it = indices.data; it != indices.data + indices.size;
-       it++) {
-    bool is_non_null = non_null_->IsSet(*it);
-    if (is_non_null) {
-      storage_iv.push_back(non_null_->CountSetBits(*it));
-    }
-    storage_non_null.Append(is_non_null);
+  keep_only_non_null();
+  for (auto& token : indices.tokens) {
+    token.index = non_null_->CountSetBits(token.index);
   }
-  RangeOrBitVector range_or_bv = inner_->IndexSearchValidated(
-      op, sql_val,
-      Indices{storage_iv.data(), static_cast<uint32_t>(storage_iv.size()),
-              indices.state});
-  BitVector res =
-      ReconcileStorageResult(op, std::move(storage_non_null).Build(),
-                             std::move(range_or_bv), Range(0, indices.size));
-
-  PERFETTO_DCHECK(res.size() == indices.size);
-  return RangeOrBitVector(std::move(res));
+  inner_->IndexSearchValidated(op, sql_val, indices);
 }
 
 Range NullOverlay::ChainImpl::OrderedIndexSearchValidated(
     FilterOp op,
     SqlValue sql_val,
-    Indices indices) const {
+    const OrderedIndices& indices) const {
   // For NOT EQUAL the translation or results from EQUAL needs to be done by the
   // caller.
   PERFETTO_CHECK(op != FilterOp::kNe);
@@ -272,7 +270,8 @@
   }
 
   Range inner_range = inner_->OrderedIndexSearchValidated(
-      op, sql_val, Indices{storage_iv.data(), non_null_size, indices.state});
+      op, sql_val,
+      OrderedIndices{storage_iv.data(), non_null_size, indices.state});
   return {inner_range.start + non_null_offset,
           inner_range.end + non_null_offset};
 }
diff --git a/src/trace_processor/db/column/null_overlay.h b/src/trace_processor/db/column/null_overlay.h
index 99f6256..625959d 100644
--- a/src/trace_processor/db/column/null_overlay.h
+++ b/src/trace_processor/db/column/null_overlay.h
@@ -53,13 +53,11 @@
 
     RangeOrBitVector SearchValidated(FilterOp, SqlValue, Range) const override;
 
-    RangeOrBitVector IndexSearchValidated(FilterOp,
-                                          SqlValue,
-                                          Indices) const override;
+    void IndexSearchValidated(FilterOp, SqlValue, Indices&) const override;
 
     Range OrderedIndexSearchValidated(FilterOp,
                                       SqlValue,
-                                      Indices) const override;
+                                      const OrderedIndices&) const override;
 
     void StableSort(SortToken* start,
                     SortToken* end,
diff --git a/src/trace_processor/db/column/null_overlay_unittest.cc b/src/trace_processor/db/column/null_overlay_unittest.cc
index ceb6c9f..8f15746 100644
--- a/src/trace_processor/db/column/null_overlay_unittest.cc
+++ b/src/trace_processor/db/column/null_overlay_unittest.cc
@@ -18,6 +18,7 @@
 
 #include <cstdint>
 #include <memory>
+#include <utility>
 #include <vector>
 
 #include "perfetto/trace_processor/basic_types.h"
@@ -35,6 +36,9 @@
 using testing::ElementsAre;
 using testing::IsEmpty;
 
+using Indices = DataLayerChain::Indices;
+using OrderedIndices = DataLayerChain::OrderedIndices;
+
 TEST(NullOverlay, SingleSearch) {
   BitVector bv{0, 1, 0, 1, 1, 1};
   auto fake = FakeStorageChain::SearchSubset(4, std::vector<uint32_t>{1, 2});
@@ -139,12 +143,10 @@
   NullOverlay storage(&bv);
   auto chain = storage.MakeChain(std::move(fake));
 
-  std::vector<uint32_t> table_idx{1, 5, 2};
-  auto res =
-      chain->IndexSearch(FilterOp::kGt, SqlValue::Long(0),
-                         Indices{table_idx.data(), uint32_t(table_idx.size()),
-                                 Indices::State::kNonmonotonic});
-  ASSERT_THAT(utils::ToIndexVectorForTests(res), ElementsAre(0, 1, 2));
+  Indices indices = Indices::CreateWithIndexPayloadForTesting(
+      {1, 5, 2}, Indices::State::kNonmonotonic);
+  chain->IndexSearch(FilterOp::kGt, SqlValue::Long(0), indices);
+  ASSERT_THAT(utils::ExtractPayloadForTesting(indices), ElementsAre(0, 1, 2));
 }
 
 TEST(NullOverlay, IndexSearchPartialElements) {
@@ -153,12 +155,10 @@
   NullOverlay storage(&bv);
   auto chain = storage.MakeChain(std::move(fake));
 
-  std::vector<uint32_t> table_idx{1, 4, 2};
-  auto res =
-      chain->IndexSearch(FilterOp::kGt, SqlValue::Long(0),
-                         Indices{table_idx.data(), uint32_t(table_idx.size()),
-                                 Indices::State::kNonmonotonic});
-  ASSERT_THAT(utils::ToIndexVectorForTests(res), ElementsAre(0, 2));
+  Indices indices = Indices::CreateWithIndexPayloadForTesting(
+      {1, 4, 2}, Indices::State::kNonmonotonic);
+  chain->IndexSearch(FilterOp::kGt, SqlValue::Long(0), indices);
+  ASSERT_THAT(utils::ExtractPayloadForTesting(indices), ElementsAre(0, 2));
 }
 
 TEST(NullOverlay, IndexSearchIsNullOpEmptyRes) {
@@ -167,12 +167,10 @@
   NullOverlay storage(&bv);
   auto chain = storage.MakeChain(std::move(fake));
 
-  std::vector<uint32_t> table_idx{0, 3, 5, 4, 2};
-  auto res =
-      chain->IndexSearch(FilterOp::kIsNull, SqlValue(),
-                         Indices{table_idx.data(), uint32_t(table_idx.size()),
-                                 Indices::State::kNonmonotonic});
-  ASSERT_THAT(utils::ToIndexVectorForTests(res), ElementsAre(0, 1, 3));
+  Indices indices = Indices::CreateWithIndexPayloadForTesting(
+      {0, 3, 5, 4, 2}, Indices::State::kNonmonotonic);
+  chain->IndexSearch(FilterOp::kIsNull, SqlValue(), indices);
+  ASSERT_THAT(utils::ExtractPayloadForTesting(indices), ElementsAre(0, 1, 3));
 }
 
 TEST(NullOverlay, IndexSearchIsNullOp) {
@@ -181,12 +179,11 @@
   NullOverlay storage(&bv);
   auto chain = storage.MakeChain(std::move(fake));
 
-  std::vector<uint32_t> table_idx{0, 3, 2, 4, 5};
-  auto res =
-      chain->IndexSearch(FilterOp::kIsNull, SqlValue(),
-                         Indices{table_idx.data(), uint32_t(table_idx.size()),
-                                 Indices::State::kNonmonotonic});
-  ASSERT_THAT(utils::ToIndexVectorForTests(res), ElementsAre(0, 1, 3, 4));
+  Indices indices = Indices::CreateWithIndexPayloadForTesting(
+      {0, 3, 2, 4, 5}, Indices::State::kNonmonotonic);
+  chain->IndexSearch(FilterOp::kIsNull, SqlValue(), indices);
+  ASSERT_THAT(utils::ExtractPayloadForTesting(indices),
+              ElementsAre(0, 1, 3, 4));
 }
 
 TEST(NullOverlay, IndexSearchIsNotNullOp) {
@@ -195,12 +192,10 @@
   NullOverlay storage(&bv);
   auto chain = storage.MakeChain(std::move(fake));
 
-  std::vector<uint32_t> table_idx{0, 3, 4};
-  auto res =
-      chain->IndexSearch(FilterOp::kIsNotNull, SqlValue(),
-                         Indices{table_idx.data(), uint32_t(table_idx.size()),
-                                 Indices::State::kNonmonotonic});
-  ASSERT_THAT(utils::ToIndexVectorForTests(res), IsEmpty());
+  Indices indices = Indices::CreateWithIndexPayloadForTesting(
+      {0, 3, 4}, Indices::State::kNonmonotonic);
+  chain->IndexSearch(FilterOp::kIsNotNull, SqlValue(), indices);
+  ASSERT_THAT(utils::ExtractPayloadForTesting(indices), IsEmpty());
 }
 
 TEST(NullOverlay, OrderedIndexSearch) {
@@ -214,8 +209,8 @@
   // Passing values on final data
   // NULL, NULL, 0, 1, 1
   std::vector<uint32_t> table_idx{0, 4, 5, 1, 3};
-  Indices indices{table_idx.data(), uint32_t(table_idx.size()),
-                  Indices::State::kNonmonotonic};
+  OrderedIndices indices{table_idx.data(), uint32_t(table_idx.size()),
+                         Indices::State::kNonmonotonic};
 
   Range res = chain->OrderedIndexSearch(FilterOp::kIsNull, SqlValue(), indices);
   ASSERT_EQ(res.start, 0u);
diff --git a/src/trace_processor/db/column/numeric_storage.cc b/src/trace_processor/db/column/numeric_storage.cc
index 529e93d..46ab450 100644
--- a/src/trace_processor/db/column/numeric_storage.cc
+++ b/src/trace_processor/db/column/numeric_storage.cc
@@ -22,11 +22,11 @@
 #include <cstdint>
 #include <functional>
 #include <limits>
-#include <memory>
 #include <optional>
 #include <string>
 #include <utility>
 #include <variant>
+#include <vector>
 
 #include "perfetto/base/logging.h"
 #include "perfetto/public/compiler.h"
@@ -43,6 +43,9 @@
 namespace perfetto::trace_processor::column {
 namespace {
 
+using Indices = DataLayerChain::Indices;
+using OrderedIndices = DataLayerChain::OrderedIndices;
+
 using NumericValue = std::variant<uint32_t, int32_t, int64_t, double>;
 
 // Using the fact that binary operators in std are operators() of classes, we
@@ -135,7 +138,9 @@
 }
 
 template <typename T>
-uint32_t TypedLowerBoundExtrinsic(T val, const T* data, Indices indices) {
+uint32_t TypedLowerBoundExtrinsic(T val,
+                                  const T* data,
+                                  OrderedIndices indices) {
   const auto* lower = std::lower_bound(
       indices.data, indices.data + indices.size, val,
       [data](uint32_t index, T value) { return data[index] < value; });
@@ -144,7 +149,7 @@
 
 uint32_t LowerBoundExtrinsic(const void* vector_ptr,
                              NumericValue val,
-                             Indices indices) {
+                             OrderedIndices indices) {
   if (const auto* u32 = std::get_if<uint32_t>(&val)) {
     const auto* start =
         static_cast<const std::vector<uint32_t>*>(vector_ptr)->data();
@@ -170,7 +175,7 @@
 
 uint32_t UpperBoundExtrinsic(const void* vector_ptr,
                              NumericValue val,
-                             Indices indices) {
+                             OrderedIndices indices) {
   return std::visit(
       [vector_ptr, indices](auto val_data) {
         using T = decltype(val_data);
@@ -458,49 +463,51 @@
   return RangeOrBitVector(LinearSearchInternal(op, val, search_range));
 }
 
-RangeOrBitVector NumericStorageBase::ChainImpl::IndexSearchValidated(
+void NumericStorageBase::ChainImpl::IndexSearchValidated(
     FilterOp op,
     SqlValue sql_val,
-    Indices indices) const {
-  PERFETTO_DCHECK(*std::max_element(indices.data, indices.data + indices.size) <
-                  size());
-
+    Indices& indices) const {
   PERFETTO_TP_TRACE(
       metatrace::Category::DB, "NumericStorage::ChainImpl::IndexSearch",
-      [indices, op](metatrace::Record* r) {
-        r->AddArg("Count", std::to_string(indices.size));
+      [&indices, op](metatrace::Record* r) {
+        r->AddArg("Count", std::to_string(indices.tokens.size()));
         r->AddArg("Op", std::to_string(static_cast<uint32_t>(op)));
       });
 
   // Mismatched types - value is double and column is int.
   if (sql_val.type == SqlValue::kDouble &&
       storage_type_ != ColumnType::kDouble) {
-    auto ret_opt =
-        utils::CanReturnEarly(IntColumnWithDouble(op, &sql_val), indices.size);
-    if (ret_opt) {
-      return RangeOrBitVector(*ret_opt);
+    if (utils::CanReturnEarly(IntColumnWithDouble(op, &sql_val), indices)) {
+      return;
     }
   }
 
   // Mismatched types - column is double and value is int.
   if (sql_val.type != SqlValue::kDouble &&
       storage_type_ == ColumnType::kDouble) {
-    auto ret_opt =
-        utils::CanReturnEarly(DoubleColumnWithInt(op, &sql_val), indices.size);
-    if (ret_opt) {
-      return RangeOrBitVector(*ret_opt);
+    if (utils::CanReturnEarly(DoubleColumnWithInt(op, &sql_val), indices)) {
+      return;
     }
   }
 
   NumericValue val = GetNumericTypeVariant(storage_type_, sql_val);
-  return RangeOrBitVector(
-      IndexSearchInternal(op, val, indices.data, indices.size));
+  std::visit(
+      [this, &indices, op](auto val) {
+        using T = decltype(val);
+        auto* start = static_cast<const std::vector<T>*>(vector_ptr_)->data();
+        std::visit(
+            [start, &indices, val](auto comparator) {
+              utils::IndexSearchWithComparator(val, start, indices, comparator);
+            },
+            GetFilterOpVariant<T>(op));
+      },
+      val);
 }
 
 Range NumericStorageBase::ChainImpl::OrderedIndexSearchValidated(
     FilterOp op,
     SqlValue sql_val,
-    Indices indices) const {
+    const OrderedIndices& indices) const {
   PERFETTO_DCHECK(*std::max_element(indices.data, indices.data + indices.size) <
                   size());
 
@@ -604,27 +611,6 @@
   return std::move(builder).Build();
 }
 
-BitVector NumericStorageBase::ChainImpl::IndexSearchInternal(
-    FilterOp op,
-    NumericValue val,
-    const uint32_t* indices,
-    uint32_t indices_count) const {
-  BitVector::Builder builder(indices_count);
-  std::visit(
-      [this, indices, op, &builder](auto val) {
-        using T = decltype(val);
-        auto* start = static_cast<const std::vector<T>*>(vector_ptr_)->data();
-        std::visit(
-            [start, indices, val, &builder](auto comparator) {
-              utils::IndexSearchWithComparator(val, start, indices, comparator,
-                                               builder);
-            },
-            GetFilterOpVariant<T>(op));
-      },
-      val);
-  return std::move(builder).Build();
-}
-
 Range NumericStorageBase::ChainImpl::BinarySearchIntrinsic(
     FilterOp op,
     NumericValue val,
diff --git a/src/trace_processor/db/column/numeric_storage.h b/src/trace_processor/db/column/numeric_storage.h
index 2dfafc6..f7cb51c 100644
--- a/src/trace_processor/db/column/numeric_storage.h
+++ b/src/trace_processor/db/column/numeric_storage.h
@@ -42,13 +42,11 @@
 
     RangeOrBitVector SearchValidated(FilterOp, SqlValue, Range) const override;
 
-    RangeOrBitVector IndexSearchValidated(FilterOp,
-                                          SqlValue,
-                                          Indices) const override;
+    void IndexSearchValidated(FilterOp, SqlValue, Indices&) const override;
 
     Range OrderedIndexSearchValidated(FilterOp,
                                       SqlValue,
-                                      Indices) const override;
+                                      const OrderedIndices&) const override;
 
     void Serialize(StorageProto*) const override;
 
@@ -63,11 +61,6 @@
 
     BitVector LinearSearchInternal(FilterOp op, NumericValue val, Range) const;
 
-    BitVector IndexSearchInternal(FilterOp op,
-                                  NumericValue value,
-                                  const uint32_t* indices,
-                                  uint32_t indices_count) const;
-
     Range BinarySearchIntrinsic(FilterOp op,
                                 NumericValue val,
                                 Range search_range) const;
diff --git a/src/trace_processor/db/column/numeric_storage_unittest.cc b/src/trace_processor/db/column/numeric_storage_unittest.cc
index 534338c..2b20306 100644
--- a/src/trace_processor/db/column/numeric_storage_unittest.cc
+++ b/src/trace_processor/db/column/numeric_storage_unittest.cc
@@ -42,6 +42,9 @@
 using testing::ElementsAre;
 using testing::IsEmpty;
 
+using Indices = DataLayerChain::Indices;
+using OrderedIndices = DataLayerChain::OrderedIndices;
+
 TEST(NumericStorage, InvalidSearchConstraintsGeneralChecks) {
   std::vector<uint32_t> data_vec(128);
   std::iota(data_vec.begin(), data_vec.end(), 0);
@@ -244,27 +247,36 @@
   auto chain = storage.MakeChain();
 
   // -5, -3, -3, 3, 5, 0
-  std::vector<uint32_t> indices_vec{0, 4, 4, 5, 1, 6};
-  Indices indices{indices_vec.data(), 6, Indices::State::kMonotonic};
+  Indices common_indices = Indices::CreateWithIndexPayloadForTesting(
+      {0, 4, 4, 5, 1, 6}, Indices::State::kNonmonotonic);
   SqlValue val = SqlValue::Long(3);
 
-  auto res = chain->IndexSearch(FilterOp::kEq, val, indices);
-  ASSERT_THAT(utils::ToIndexVectorForTests(res), ElementsAre(3));
+  auto indices = common_indices;
+  chain->IndexSearch(FilterOp::kEq, val, indices);
+  ASSERT_THAT(utils::ExtractPayloadForTesting(indices), ElementsAre(3));
 
-  res = chain->IndexSearch(FilterOp::kNe, val, indices);
-  ASSERT_THAT(utils::ToIndexVectorForTests(res), ElementsAre(0, 1, 2, 4, 5));
+  indices = common_indices;
+  chain->IndexSearch(FilterOp::kNe, val, indices);
+  ASSERT_THAT(utils::ExtractPayloadForTesting(indices),
+              ElementsAre(0, 1, 2, 4, 5));
 
-  res = chain->IndexSearch(FilterOp::kLt, val, indices);
-  ASSERT_THAT(utils::ToIndexVectorForTests(res), ElementsAre(0, 1, 2, 5));
+  indices = common_indices;
+  chain->IndexSearch(FilterOp::kLt, val, indices);
+  ASSERT_THAT(utils::ExtractPayloadForTesting(indices),
+              ElementsAre(0, 1, 2, 5));
 
-  res = chain->IndexSearch(FilterOp::kLe, val, indices);
-  ASSERT_THAT(utils::ToIndexVectorForTests(res), ElementsAre(0, 1, 2, 3, 5));
+  indices = common_indices;
+  chain->IndexSearch(FilterOp::kLe, val, indices);
+  ASSERT_THAT(utils::ExtractPayloadForTesting(indices),
+              ElementsAre(0, 1, 2, 3, 5));
 
-  res = chain->IndexSearch(FilterOp::kGt, val, indices);
-  ASSERT_THAT(utils::ToIndexVectorForTests(res), ElementsAre(4));
+  indices = common_indices;
+  chain->IndexSearch(FilterOp::kGt, val, indices);
+  ASSERT_THAT(utils::ExtractPayloadForTesting(indices), ElementsAre(4));
 
-  res = chain->IndexSearch(FilterOp::kGe, val, indices);
-  ASSERT_THAT(utils::ToIndexVectorForTests(res), ElementsAre(3, 4));
+  indices = common_indices;
+  chain->IndexSearch(FilterOp::kGe, val, indices);
+  ASSERT_THAT(utils::ExtractPayloadForTesting(indices), ElementsAre(3, 4));
 }
 
 TEST(NumericStorage, IndexSearchCompareWithNegative) {
@@ -273,27 +285,35 @@
   auto chain = storage.MakeChain();
 
   // -5, -3, -3, 3, 5, 0
-  std::vector<uint32_t> indices_vec{0, 4, 4, 5, 1, 6};
-  Indices indices{indices_vec.data(), 6, Indices::State::kMonotonic};
+  Indices common_indices = Indices::CreateWithIndexPayloadForTesting(
+      {0, 4, 4, 5, 1, 6}, Indices::State::kNonmonotonic);
   SqlValue val = SqlValue::Long(-3);
 
-  auto res = chain->IndexSearch(FilterOp::kEq, val, indices);
-  ASSERT_THAT(utils::ToIndexVectorForTests(res), ElementsAre(1, 2));
+  auto indices = common_indices;
+  chain->IndexSearch(FilterOp::kEq, val, indices);
+  ASSERT_THAT(utils::ExtractPayloadForTesting(indices), ElementsAre(1, 2));
 
-  res = chain->IndexSearch(FilterOp::kNe, val, indices);
-  ASSERT_THAT(utils::ToIndexVectorForTests(res), ElementsAre(0, 3, 4, 5));
+  indices = common_indices;
+  chain->IndexSearch(FilterOp::kNe, val, indices);
+  ASSERT_THAT(utils::ExtractPayloadForTesting(indices),
+              ElementsAre(0, 3, 4, 5));
 
-  res = chain->IndexSearch(FilterOp::kLt, val, indices);
-  ASSERT_THAT(utils::ToIndexVectorForTests(res), ElementsAre(0));
+  indices = common_indices;
+  chain->IndexSearch(FilterOp::kLt, val, indices);
+  ASSERT_THAT(utils::ExtractPayloadForTesting(indices), ElementsAre(0));
 
-  res = chain->IndexSearch(FilterOp::kLe, val, indices);
-  ASSERT_THAT(utils::ToIndexVectorForTests(res), ElementsAre(0, 1, 2));
+  indices = common_indices;
+  chain->IndexSearch(FilterOp::kLe, val, indices);
+  ASSERT_THAT(utils::ExtractPayloadForTesting(indices), ElementsAre(0, 1, 2));
 
-  res = chain->IndexSearch(FilterOp::kGt, val, indices);
-  ASSERT_THAT(utils::ToIndexVectorForTests(res), ElementsAre(3, 4, 5));
+  indices = common_indices;
+  chain->IndexSearch(FilterOp::kGt, val, indices);
+  ASSERT_THAT(utils::ExtractPayloadForTesting(indices), ElementsAre(3, 4, 5));
 
-  res = chain->IndexSearch(FilterOp::kGe, val, indices);
-  ASSERT_THAT(utils::ToIndexVectorForTests(res), ElementsAre(1, 2, 3, 4, 5));
+  indices = common_indices;
+  chain->IndexSearch(FilterOp::kGe, val, indices);
+  ASSERT_THAT(utils::ExtractPayloadForTesting(indices),
+              ElementsAre(1, 2, 3, 4, 5));
 }
 
 TEST(NumericStorage, SearchFast) {
@@ -348,8 +368,8 @@
 TEST(NumericStorage, OrderedIndexSearch) {
   std::vector<uint32_t> data_vec{30, 40, 50, 60, 90, 80, 70, 0, 10, 20};
   std::vector<uint32_t> sorted_order_vec{7, 8, 9, 0, 1, 2, 3, 6, 5, 4};
-  Indices sorted_order{sorted_order_vec.data(), 10,
-                       Indices::State::kNonmonotonic};
+  OrderedIndices sorted_order{sorted_order_vec.data(), 10,
+                              Indices::State::kNonmonotonic};
 
   NumericStorage<uint32_t> storage(&data_vec, ColumnType::kUint32, false);
   auto chain = storage.MakeChain();
@@ -417,27 +437,36 @@
   auto chain = storage.MakeChain();
 
   // -5, -3, -3, 3, 5, 0
-  std::vector<uint32_t> indices_vec{0, 4, 4, 5, 1, 6};
-  Indices indices{indices_vec.data(), 6, Indices::State::kMonotonic};
+  Indices common_indices = Indices::CreateWithIndexPayloadForTesting(
+      {0, 4, 4, 5, 1, 6}, Indices::State::kNonmonotonic);
   SqlValue val = SqlValue::Double(3);
 
-  auto res = chain->IndexSearch(FilterOp::kEq, val, indices);
-  ASSERT_THAT(utils::ToIndexVectorForTests(res), ElementsAre(3));
+  auto indices = common_indices;
+  chain->IndexSearch(FilterOp::kEq, val, indices);
+  ASSERT_THAT(utils::ExtractPayloadForTesting(indices), ElementsAre(3));
 
-  res = chain->IndexSearch(FilterOp::kNe, val, indices);
-  ASSERT_THAT(utils::ToIndexVectorForTests(res), ElementsAre(0, 1, 2, 4, 5));
+  indices = common_indices;
+  chain->IndexSearch(FilterOp::kNe, val, indices);
+  ASSERT_THAT(utils::ExtractPayloadForTesting(indices),
+              ElementsAre(0, 1, 2, 4, 5));
 
-  res = chain->IndexSearch(FilterOp::kLt, val, indices);
-  ASSERT_THAT(utils::ToIndexVectorForTests(res), ElementsAre(0, 1, 2, 5));
+  indices = common_indices;
+  chain->IndexSearch(FilterOp::kLt, val, indices);
+  ASSERT_THAT(utils::ExtractPayloadForTesting(indices),
+              ElementsAre(0, 1, 2, 5));
 
-  res = chain->IndexSearch(FilterOp::kLe, val, indices);
-  ASSERT_THAT(utils::ToIndexVectorForTests(res), ElementsAre(0, 1, 2, 3, 5));
+  indices = common_indices;
+  chain->IndexSearch(FilterOp::kLe, val, indices);
+  ASSERT_THAT(utils::ExtractPayloadForTesting(indices),
+              ElementsAre(0, 1, 2, 3, 5));
 
-  res = chain->IndexSearch(FilterOp::kGt, val, indices);
-  ASSERT_THAT(utils::ToIndexVectorForTests(res), ElementsAre(4));
+  indices = common_indices;
+  chain->IndexSearch(FilterOp::kGt, val, indices);
+  ASSERT_THAT(utils::ExtractPayloadForTesting(indices), ElementsAre(4));
 
-  res = chain->IndexSearch(FilterOp::kGe, val, indices);
-  ASSERT_THAT(utils::ToIndexVectorForTests(res), ElementsAre(3, 4));
+  indices = common_indices;
+  chain->IndexSearch(FilterOp::kGe, val, indices);
+  ASSERT_THAT(utils::ExtractPayloadForTesting(indices), ElementsAre(3, 4));
 }
 
 TEST(NumericStorage, SearchInt32WithDouble) {
@@ -498,27 +527,36 @@
   auto chain = storage.MakeChain();
 
   // -5, -3, -3, 3, 5, 0
-  std::vector<uint32_t> indices_vec{0, 4, 4, 5, 1, 6};
-  Indices indices{indices_vec.data(), 6, Indices::State::kMonotonic};
+  Indices common_indices = Indices::CreateWithIndexPayloadForTesting(
+      {0, 4, 4, 5, 1, 6}, Indices::State::kNonmonotonic);
   SqlValue val = SqlValue::Double(1.5);
 
-  auto res = chain->IndexSearch(FilterOp::kEq, val, indices);
-  ASSERT_THAT(utils::ToIndexVectorForTests(res), IsEmpty());
+  auto indices = common_indices;
+  chain->IndexSearch(FilterOp::kEq, val, indices);
+  ASSERT_THAT(utils::ExtractPayloadForTesting(indices), IsEmpty());
 
-  res = chain->IndexSearch(FilterOp::kNe, val, indices);
-  ASSERT_THAT(utils::ToIndexVectorForTests(res), ElementsAre(0, 1, 2, 3, 4, 5));
+  indices = common_indices;
+  chain->IndexSearch(FilterOp::kNe, val, indices);
+  ASSERT_THAT(utils::ExtractPayloadForTesting(indices),
+              ElementsAre(0, 1, 2, 3, 4, 5));
 
-  res = chain->IndexSearch(FilterOp::kLt, val, indices);
-  ASSERT_THAT(utils::ToIndexVectorForTests(res), ElementsAre(0, 1, 2, 5));
+  indices = common_indices;
+  chain->IndexSearch(FilterOp::kLt, val, indices);
+  ASSERT_THAT(utils::ExtractPayloadForTesting(indices),
+              ElementsAre(0, 1, 2, 5));
 
-  res = chain->IndexSearch(FilterOp::kLe, val, indices);
-  ASSERT_THAT(utils::ToIndexVectorForTests(res), ElementsAre(0, 1, 2, 5));
+  indices = common_indices;
+  chain->IndexSearch(FilterOp::kLe, val, indices);
+  ASSERT_THAT(utils::ExtractPayloadForTesting(indices),
+              ElementsAre(0, 1, 2, 5));
 
-  res = chain->IndexSearch(FilterOp::kGt, val, indices);
-  ASSERT_THAT(utils::ToIndexVectorForTests(res), ElementsAre(3, 4));
+  indices = common_indices;
+  chain->IndexSearch(FilterOp::kGt, val, indices);
+  ASSERT_THAT(utils::ExtractPayloadForTesting(indices), ElementsAre(3, 4));
 
-  res = chain->IndexSearch(FilterOp::kGe, val, indices);
-  ASSERT_THAT(utils::ToIndexVectorForTests(res), ElementsAre(3, 4));
+  indices = common_indices;
+  chain->IndexSearch(FilterOp::kGe, val, indices);
+  ASSERT_THAT(utils::ExtractPayloadForTesting(indices), ElementsAre(3, 4));
 }
 
 TEST(NumericStorage, IndexSearchInt32WithNegDouble) {
@@ -527,27 +565,34 @@
   auto chain = storage.MakeChain();
 
   // -5, -3, -3, 3, 5, 0
-  std::vector<uint32_t> indices_vec{0, 4, 4, 5, 1, 6};
-  Indices indices{indices_vec.data(), 6, Indices::State::kMonotonic};
+  Indices common_indices = Indices::CreateWithIndexPayloadForTesting(
+      {0, 4, 4, 5, 1, 6}, Indices::State::kNonmonotonic);
   SqlValue val = SqlValue::Double(-2.5);
 
-  auto res = chain->IndexSearch(FilterOp::kEq, val, indices);
-  ASSERT_THAT(utils::ToIndexVectorForTests(res), IsEmpty());
+  auto indices = common_indices;
+  chain->IndexSearch(FilterOp::kEq, val, indices);
+  ASSERT_THAT(utils::ExtractPayloadForTesting(indices), IsEmpty());
 
-  res = chain->IndexSearch(FilterOp::kNe, val, indices);
-  ASSERT_THAT(utils::ToIndexVectorForTests(res), ElementsAre(0, 1, 2, 3, 4, 5));
+  indices = common_indices;
+  chain->IndexSearch(FilterOp::kNe, val, indices);
+  ASSERT_THAT(utils::ExtractPayloadForTesting(indices),
+              ElementsAre(0, 1, 2, 3, 4, 5));
 
-  res = chain->IndexSearch(FilterOp::kLt, val, indices);
-  ASSERT_THAT(utils::ToIndexVectorForTests(res), ElementsAre(0, 1, 2));
+  indices = common_indices;
+  chain->IndexSearch(FilterOp::kLt, val, indices);
+  ASSERT_THAT(utils::ExtractPayloadForTesting(indices), ElementsAre(0, 1, 2));
 
-  res = chain->IndexSearch(FilterOp::kLe, val, indices);
-  ASSERT_THAT(utils::ToIndexVectorForTests(res), ElementsAre(0, 1, 2));
+  indices = common_indices;
+  chain->IndexSearch(FilterOp::kLe, val, indices);
+  ASSERT_THAT(utils::ExtractPayloadForTesting(indices), ElementsAre(0, 1, 2));
 
-  res = chain->IndexSearch(FilterOp::kGt, val, indices);
-  ASSERT_THAT(utils::ToIndexVectorForTests(res), ElementsAre(3, 4, 5));
+  indices = common_indices;
+  chain->IndexSearch(FilterOp::kGt, val, indices);
+  ASSERT_THAT(utils::ExtractPayloadForTesting(indices), ElementsAre(3, 4, 5));
 
-  res = chain->IndexSearch(FilterOp::kGe, val, indices);
-  ASSERT_THAT(utils::ToIndexVectorForTests(res), ElementsAre(3, 4, 5));
+  indices = common_indices;
+  chain->IndexSearch(FilterOp::kGe, val, indices);
+  ASSERT_THAT(utils::ExtractPayloadForTesting(indices), ElementsAre(3, 4, 5));
 }
 
 TEST(NumericStorage, SearchUint32WithNegDouble) {
@@ -581,27 +626,36 @@
   NumericStorage<uint32_t> storage(&data_vec, ColumnType::kInt32, false);
   auto chain = storage.MakeChain();
 
-  std::vector<uint32_t> indices_vec{0, 4, 4, 5, 1, 6};
-  Indices indices{indices_vec.data(), 6, Indices::State::kMonotonic};
+  Indices common_indices = Indices::CreateWithIndexPayloadForTesting(
+      {0, 4, 4, 5, 1, 6}, Indices::State::kNonmonotonic);
   SqlValue val = SqlValue::Double(-2.5);
 
-  auto res = chain->IndexSearch(FilterOp::kEq, val, indices);
-  ASSERT_THAT(utils::ToIndexVectorForTests(res), IsEmpty());
+  auto indices = common_indices;
+  chain->IndexSearch(FilterOp::kEq, val, indices);
+  ASSERT_THAT(utils::ExtractPayloadForTesting(indices), IsEmpty());
 
-  res = chain->IndexSearch(FilterOp::kNe, val, indices);
-  ASSERT_THAT(utils::ToIndexVectorForTests(res), ElementsAre(0, 1, 2, 3, 4, 5));
+  indices = common_indices;
+  chain->IndexSearch(FilterOp::kNe, val, indices);
+  ASSERT_THAT(utils::ExtractPayloadForTesting(indices),
+              ElementsAre(0, 1, 2, 3, 4, 5));
 
-  res = chain->IndexSearch(FilterOp::kLt, val, indices);
-  ASSERT_THAT(utils::ToIndexVectorForTests(res), IsEmpty());
+  indices = common_indices;
+  chain->IndexSearch(FilterOp::kLt, val, indices);
+  ASSERT_THAT(utils::ExtractPayloadForTesting(indices), IsEmpty());
 
-  res = chain->IndexSearch(FilterOp::kLe, val, indices);
-  ASSERT_THAT(utils::ToIndexVectorForTests(res), IsEmpty());
+  indices = common_indices;
+  chain->IndexSearch(FilterOp::kLe, val, indices);
+  ASSERT_THAT(utils::ExtractPayloadForTesting(indices), IsEmpty());
 
-  res = chain->IndexSearch(FilterOp::kGt, val, indices);
-  ASSERT_THAT(utils::ToIndexVectorForTests(res), ElementsAre(0, 1, 2, 3, 4, 5));
+  indices = common_indices;
+  chain->IndexSearch(FilterOp::kGt, val, indices);
+  ASSERT_THAT(utils::ExtractPayloadForTesting(indices),
+              ElementsAre(0, 1, 2, 3, 4, 5));
 
-  res = chain->IndexSearch(FilterOp::kGe, val, indices);
-  ASSERT_THAT(utils::ToIndexVectorForTests(res), ElementsAre(0, 1, 2, 3, 4, 5));
+  indices = common_indices;
+  chain->IndexSearch(FilterOp::kGe, val, indices);
+  ASSERT_THAT(utils::ExtractPayloadForTesting(indices),
+              ElementsAre(0, 1, 2, 3, 4, 5));
 }
 
 TEST(NumericStorage, DoubleColumnWithIntThatCantBeRepresentedAsDouble) {
diff --git a/src/trace_processor/db/column/range_overlay.cc b/src/trace_processor/db/column/range_overlay.cc
index 0b8275b..84f19ce 100644
--- a/src/trace_processor/db/column/range_overlay.cc
+++ b/src/trace_processor/db/column/range_overlay.cc
@@ -65,6 +65,9 @@
   auto inner_res = inner_->SearchValidated(op, sql_val, inner_search_range);
   if (inner_res.IsRange()) {
     Range inner_res_range = std::move(inner_res).TakeIfRange();
+    if (inner_res_range.empty()) {
+      return RangeOrBitVector(Range());
+    }
     return RangeOrBitVector(Range(inner_res_range.start - range_->start,
                                   inner_res_range.end - range_->start));
   }
@@ -105,34 +108,30 @@
   return RangeOrBitVector(std::move(builder).Build());
 }
 
-RangeOrBitVector RangeOverlay::ChainImpl::IndexSearchValidated(
-    FilterOp op,
-    SqlValue sql_val,
-    Indices indices) const {
+void RangeOverlay::ChainImpl::IndexSearchValidated(FilterOp op,
+                                                   SqlValue sql_val,
+                                                   Indices& indices) const {
   PERFETTO_TP_TRACE(metatrace::Category::DB, "RangeOverlay::IndexSearch");
-
-  std::vector<uint32_t> storage_iv(indices.size);
-  // Should be SIMD optimized.
-  for (uint32_t i = 0; i < indices.size; ++i) {
-    storage_iv[i] = indices.data[i] + range_->start;
+  for (auto& token : indices.tokens) {
+    token.index += range_->start;
   }
-  return inner_->IndexSearchValidated(
-      op, sql_val, Indices{storage_iv.data(), indices.size, indices.state});
+  inner_->IndexSearchValidated(op, sql_val, indices);
 }
 
 Range RangeOverlay::ChainImpl::OrderedIndexSearchValidated(
     FilterOp op,
     SqlValue sql_val,
-    Indices indices) const {
+    const OrderedIndices& indices) const {
   PERFETTO_TP_TRACE(metatrace::Category::DB, "RangeOverlay::IndexSearch");
 
-  std::vector<uint32_t> storage_iv(indices.size);
   // Should be SIMD optimized.
+  std::vector<uint32_t> storage_iv(indices.size);
   for (uint32_t i = 0; i < indices.size; ++i) {
     storage_iv[i] = indices.data[i] + range_->start;
   }
   return inner_->OrderedIndexSearchValidated(
-      op, sql_val, Indices{storage_iv.data(), indices.size, indices.state});
+      op, sql_val,
+      OrderedIndices{storage_iv.data(), indices.size, indices.state});
 }
 
 void RangeOverlay::ChainImpl::StableSort(SortToken* start,
diff --git a/src/trace_processor/db/column/range_overlay.h b/src/trace_processor/db/column/range_overlay.h
index 200d5d4..3adb319 100644
--- a/src/trace_processor/db/column/range_overlay.h
+++ b/src/trace_processor/db/column/range_overlay.h
@@ -50,13 +50,11 @@
 
     RangeOrBitVector SearchValidated(FilterOp, SqlValue, Range) const override;
 
-    RangeOrBitVector IndexSearchValidated(FilterOp p,
-                                          SqlValue,
-                                          Indices) const override;
+    void IndexSearchValidated(FilterOp p, SqlValue, Indices&) const override;
 
     Range OrderedIndexSearchValidated(FilterOp,
                                       SqlValue,
-                                      Indices) const override;
+                                      const OrderedIndices&) const override;
 
     void StableSort(SortToken* start,
                     SortToken* end,
diff --git a/src/trace_processor/db/column/range_overlay_unittest.cc b/src/trace_processor/db/column/range_overlay_unittest.cc
index 96c2f80..a965be3 100644
--- a/src/trace_processor/db/column/range_overlay_unittest.cc
+++ b/src/trace_processor/db/column/range_overlay_unittest.cc
@@ -35,6 +35,9 @@
 using testing::IsEmpty;
 using Range = Range;
 
+using Indices = DataLayerChain::Indices;
+using OrderedIndices = DataLayerChain::OrderedIndices;
+
 TEST(RangeOverlay, SearchSingle) {
   Range range(3, 8);
   RangeOverlay storage(&range);
@@ -96,12 +99,10 @@
   RangeOverlay storage(&range);
   auto chain = storage.MakeChain(std::move(fake));
 
-  std::vector<uint32_t> table_idx{1u, 0u, 3u};
-  RangeOrBitVector res = chain->IndexSearch(
-      FilterOp::kGe, SqlValue::Long(0u),
-      Indices{table_idx.data(), static_cast<uint32_t>(table_idx.size()),
-              Indices::State::kNonmonotonic});
-  ASSERT_THAT(utils::ToIndexVectorForTests(res), ElementsAre(1u));
+  Indices indices = Indices::CreateWithIndexPayloadForTesting(
+      {1u, 0u, 3u}, Indices::State::kNonmonotonic);
+  chain->IndexSearch(FilterOp::kGe, SqlValue::Long(0u), indices);
+  ASSERT_THAT(utils::ExtractPayloadForTesting(indices), ElementsAre(1u));
 }
 
 TEST(RangeOverlay, StableSort) {
diff --git a/src/trace_processor/db/column/selector_overlay.cc b/src/trace_processor/db/column/selector_overlay.cc
index ff1e463..999e4bb 100644
--- a/src/trace_processor/db/column/selector_overlay.cc
+++ b/src/trace_processor/db/column/selector_overlay.cc
@@ -56,8 +56,8 @@
   PERFETTO_TP_TRACE(metatrace::Category::DB,
                     "SelectorOverlay::ChainImpl::Search");
 
-  // Figure out the bounds of the indices in the underlying storage and search
-  // it.
+  // Figure out the bounds of the OrderedIndices in the underlying storage and
+  // search it.
   uint32_t start_idx = selector_->IndexOfNthSet(in.start);
   uint32_t end_idx = selector_->IndexOfNthSet(in.end - 1) + 1;
 
@@ -65,6 +65,9 @@
       inner_->SearchValidated(op, sql_val, Range(start_idx, end_idx));
   if (storage_result.IsRange()) {
     Range storage_range = std::move(storage_result).TakeIfRange();
+    if (storage_range.empty()) {
+      return RangeOrBitVector(Range());
+    }
     uint32_t out_start = selector_->CountSetBits(storage_range.start);
     uint32_t out_end = selector_->CountSetBits(storage_range.end);
     return RangeOrBitVector(Range(out_start, out_end));
@@ -72,46 +75,32 @@
 
   BitVector storage_bitvector = std::move(storage_result).TakeIfBitVector();
   PERFETTO_DCHECK(storage_bitvector.size() <= selector_->size());
-
-  // TODO(b/283763282): implement ParallelExtractBits to optimize this
-  // operation.
-  BitVector::Builder res(in.end);
-  for (auto it = selector_->IterateSetBits();
-       it && it.index() < storage_bitvector.size(); it.Next()) {
-    res.Append(storage_bitvector.IsSet(it.index()));
+  storage_bitvector.SelectBits(*selector_);
+  if (storage_bitvector.size() == 0) {
+    return RangeOrBitVector(std::move(storage_bitvector));
   }
-  return RangeOrBitVector(std::move(res).Build());
+  PERFETTO_DCHECK(storage_bitvector.size() == in.end);
+  return RangeOrBitVector(std::move(storage_bitvector));
 }
 
-RangeOrBitVector SelectorOverlay::ChainImpl::IndexSearchValidated(
-    FilterOp op,
-    SqlValue sql_val,
-    Indices indices) const {
-  PERFETTO_DCHECK(
-      indices.size == 0 ||
-      *std::max_element(indices.data, indices.data + indices.size) <=
-          selector_->size());
-  // TODO(b/307482437): Use OrderedIndexSearch if arrangement orders storage.
-
+void SelectorOverlay::ChainImpl::IndexSearchValidated(FilterOp op,
+                                                      SqlValue sql_val,
+                                                      Indices& indices) const {
   PERFETTO_TP_TRACE(metatrace::Category::DB,
                     "SelectorOverlay::ChainImpl::IndexSearch");
 
   // To go from TableIndexVector to StorageIndexVector we need to find index in
   // |selector_| by looking only into set bits.
-  std::vector<uint32_t> storage_iv(indices.size);
-  for (uint32_t i = 0; i < indices.size; ++i) {
-    storage_iv[i] = selector_->IndexOfNthSet(indices.data[i]);
+  for (auto& token : indices.tokens) {
+    token.index = selector_->IndexOfNthSet(token.index);
   }
-  return inner_->IndexSearchValidated(
-      op, sql_val,
-      Indices{storage_iv.data(), static_cast<uint32_t>(storage_iv.size()),
-              indices.state});
+  return inner_->IndexSearchValidated(op, sql_val, indices);
 }
 
 Range SelectorOverlay::ChainImpl::OrderedIndexSearchValidated(
     FilterOp op,
     SqlValue sql_val,
-    Indices indices) const {
+    const OrderedIndices& indices) const {
   // To go from TableIndexVector to StorageIndexVector we need to find index in
   // |selector_| by looking only into set bits.
   std::vector<uint32_t> inner_indices(indices.size);
@@ -120,8 +109,9 @@
   }
   return inner_->OrderedIndexSearchValidated(
       op, sql_val,
-      Indices{inner_indices.data(), static_cast<uint32_t>(inner_indices.size()),
-              indices.state});
+      OrderedIndices{inner_indices.data(),
+                     static_cast<uint32_t>(inner_indices.size()),
+                     indices.state});
 }
 
 void SelectorOverlay::ChainImpl::StableSort(SortToken* start,
diff --git a/src/trace_processor/db/column/selector_overlay.h b/src/trace_processor/db/column/selector_overlay.h
index 04f178a..82e2fe3 100644
--- a/src/trace_processor/db/column/selector_overlay.h
+++ b/src/trace_processor/db/column/selector_overlay.h
@@ -54,13 +54,11 @@
 
     RangeOrBitVector SearchValidated(FilterOp, SqlValue, Range) const override;
 
-    RangeOrBitVector IndexSearchValidated(FilterOp p,
-                                          SqlValue,
-                                          Indices) const override;
+    void IndexSearchValidated(FilterOp p, SqlValue, Indices&) const override;
 
     Range OrderedIndexSearchValidated(FilterOp,
                                       SqlValue,
-                                      Indices) const override;
+                                      const OrderedIndices&) const override;
 
     void StableSort(SortToken* start,
                     SortToken* end,
diff --git a/src/trace_processor/db/column/selector_overlay_unittest.cc b/src/trace_processor/db/column/selector_overlay_unittest.cc
index 11d20f2..0abae4c 100644
--- a/src/trace_processor/db/column/selector_overlay_unittest.cc
+++ b/src/trace_processor/db/column/selector_overlay_unittest.cc
@@ -34,6 +34,9 @@
 using testing::ElementsAre;
 using testing::IsEmpty;
 
+using Indices = DataLayerChain::Indices;
+using OrderedIndices = DataLayerChain::OrderedIndices;
+
 TEST(SelectorOverlay, SingleSearch) {
   BitVector selector{0, 1, 1, 0, 0, 1, 1, 0};
   auto fake = FakeStorageChain::SearchSubset(8, Range(2, 5));
@@ -94,12 +97,10 @@
   SelectorOverlay storage(&selector);
   auto chain = storage.MakeChain(std::move(fake));
 
-  std::vector<uint32_t> table_idx{1u, 0u, 3u};
-  RangeOrBitVector res = chain->IndexSearch(
-      FilterOp::kGe, SqlValue::Long(0u),
-      Indices{table_idx.data(), static_cast<uint32_t>(table_idx.size()),
-              Indices::State::kNonmonotonic});
-  ASSERT_THAT(utils::ToIndexVectorForTests(res), ElementsAre(1u));
+  auto indices = Indices::CreateWithIndexPayloadForTesting(
+      {1u, 0u, 3u}, Indices::State::kNonmonotonic);
+  chain->IndexSearch(FilterOp::kGe, SqlValue::Long(0u), indices);
+  ASSERT_THAT(utils::ExtractPayloadForTesting(indices), ElementsAre(1u));
 }
 
 TEST(SelectorOverlay, OrderedIndexSearchTrivial) {
@@ -111,8 +112,8 @@
   std::vector<uint32_t> table_idx{1u, 0u, 2u};
   Range res = chain->OrderedIndexSearch(
       FilterOp::kGe, SqlValue::Long(0u),
-      Indices{table_idx.data(), static_cast<uint32_t>(table_idx.size()),
-              Indices::State::kNonmonotonic});
+      OrderedIndices{table_idx.data(), static_cast<uint32_t>(table_idx.size()),
+                     Indices::State::kNonmonotonic});
   ASSERT_EQ(res.start, 0u);
   ASSERT_EQ(res.end, 3u);
 }
@@ -126,8 +127,8 @@
   std::vector<uint32_t> table_idx{1u, 0u, 2u};
   Range res = chain->OrderedIndexSearch(
       FilterOp::kGe, SqlValue::Long(0u),
-      Indices{table_idx.data(), static_cast<uint32_t>(table_idx.size()),
-              Indices::State::kNonmonotonic});
+      OrderedIndices{table_idx.data(), static_cast<uint32_t>(table_idx.size()),
+                     Indices::State::kNonmonotonic});
   ASSERT_EQ(res.size(), 0u);
 }
 
diff --git a/src/trace_processor/db/column/set_id_storage.cc b/src/trace_processor/db/column/set_id_storage.cc
index 780761c..23b63b8 100644
--- a/src/trace_processor/db/column/set_id_storage.cc
+++ b/src/trace_processor/db/column/set_id_storage.cc
@@ -194,76 +194,66 @@
   return RangeOrBitVector(BinarySearchIntrinsic(op, val, search_range));
 }
 
-RangeOrBitVector SetIdStorage::ChainImpl::IndexSearchValidated(
-    FilterOp op,
-    SqlValue sql_val,
-    Indices indices) const {
+void SetIdStorage::ChainImpl::IndexSearchValidated(FilterOp op,
+                                                   SqlValue sql_val,
+                                                   Indices& indices) const {
   PERFETTO_TP_TRACE(
       metatrace::Category::DB, "SetIdStorage::ChainImpl::IndexSearch",
-      [indices, op](metatrace::Record* r) {
-        r->AddArg("Count", std::to_string(indices.size));
+      [&indices, op](metatrace::Record* r) {
+        r->AddArg("Count", std::to_string(indices.tokens.size()));
         r->AddArg("Op", std::to_string(static_cast<uint32_t>(op)));
       });
 
   // It's a valid filter operation if |sql_val| is a double, although it
   // requires special logic.
   if (sql_val.type == SqlValue::kDouble) {
-    switch (utils::CompareIntColumnWithDouble(op, &sql_val)) {
-      case SearchValidationResult::kOk:
-        break;
-      case SearchValidationResult::kAllData:
-        return RangeOrBitVector(Range(0, indices.size));
-      case SearchValidationResult::kNoData:
-        return RangeOrBitVector(Range());
+    if (utils::CanReturnEarly(utils::CompareIntColumnWithDouble(op, &sql_val),
+                              indices)) {
+      return;
     }
   }
 
-  auto val = static_cast<uint32_t>(sql_val.AsLong());
-  BitVector::Builder builder(indices.size);
-
   // TODO(mayzner): Instead of utils::IndexSearchWithComparator, use the
   // property of SetId data - that for each index i, data[i] <= i.
+  auto val = static_cast<uint32_t>(sql_val.AsLong());
   switch (op) {
     case FilterOp::kEq:
-      utils::IndexSearchWithComparator(val, values_->data(), indices.data,
-                                       std::equal_to<>(), builder);
+      utils::IndexSearchWithComparator(val, values_->data(), indices,
+                                       std::equal_to<>());
       break;
     case FilterOp::kNe:
-      utils::IndexSearchWithComparator(val, values_->data(), indices.data,
-                                       std::not_equal_to<>(), builder);
+      utils::IndexSearchWithComparator(val, values_->data(), indices,
+                                       std::not_equal_to<>());
       break;
     case FilterOp::kLe:
-      utils::IndexSearchWithComparator(val, values_->data(), indices.data,
-                                       std::less_equal<>(), builder);
+      utils::IndexSearchWithComparator(val, values_->data(), indices,
+                                       std::less_equal<>());
       break;
     case FilterOp::kLt:
-      utils::IndexSearchWithComparator(val, values_->data(), indices.data,
-                                       std::less<>(), builder);
+      utils::IndexSearchWithComparator(val, values_->data(), indices,
+                                       std::less<>());
       break;
     case FilterOp::kGt:
-      utils::IndexSearchWithComparator(val, values_->data(), indices.data,
-                                       std::greater<>(), builder);
+      utils::IndexSearchWithComparator(val, values_->data(), indices,
+                                       std::greater<>());
       break;
     case FilterOp::kGe:
-      utils::IndexSearchWithComparator(val, values_->data(), indices.data,
-                                       std::greater_equal<>(), builder);
+      utils::IndexSearchWithComparator(val, values_->data(), indices,
+                                       std::greater_equal<>());
       break;
     case FilterOp::kIsNotNull:
-      return RangeOrBitVector(Range(0, indices.size));
     case FilterOp::kIsNull:
-      return RangeOrBitVector(Range());
     case FilterOp::kGlob:
     case FilterOp::kRegex:
       PERFETTO_FATAL("Illegal argument");
   }
-  return RangeOrBitVector(std::move(builder).Build());
 }
 
 Range SetIdStorage::ChainImpl::OrderedIndexSearchValidated(
     FilterOp op,
     SqlValue sql_val,
-    Indices indices) const {
-  // Indices are monotonic non-contiguous values.
+    const OrderedIndices& indices) const {
+  // OrderedIndices are monotonic non-contiguous values.
   auto res = SearchValidated(
       op, sql_val, Range(indices.data[0], indices.data[indices.size - 1] + 1));
   PERFETTO_CHECK(res.IsRange());
diff --git a/src/trace_processor/db/column/set_id_storage.h b/src/trace_processor/db/column/set_id_storage.h
index 64f14a3..e5bdb76 100644
--- a/src/trace_processor/db/column/set_id_storage.h
+++ b/src/trace_processor/db/column/set_id_storage.h
@@ -52,13 +52,11 @@
 
     RangeOrBitVector SearchValidated(FilterOp, SqlValue, Range) const override;
 
-    RangeOrBitVector IndexSearchValidated(FilterOp,
-                                          SqlValue,
-                                          Indices) const override;
+    void IndexSearchValidated(FilterOp, SqlValue, Indices&) const override;
 
     Range OrderedIndexSearchValidated(FilterOp,
                                       SqlValue,
-                                      Indices) const override;
+                                      const OrderedIndices&) const override;
 
     void StableSort(SortToken* start,
                     SortToken* end,
diff --git a/src/trace_processor/db/column/set_id_storage_unittest.cc b/src/trace_processor/db/column/set_id_storage_unittest.cc
index 99371dc..7af21ac 100644
--- a/src/trace_processor/db/column/set_id_storage_unittest.cc
+++ b/src/trace_processor/db/column/set_id_storage_unittest.cc
@@ -44,6 +44,9 @@
 using testing::ElementsAre;
 using testing::IsEmpty;
 
+using Indices = DataLayerChain::Indices;
+using OrderedIndices = DataLayerChain::OrderedIndices;
+
 TEST(SetIdStorage, SearchSingle) {
   std::vector<uint32_t> storage_data{0, 0, 2, 2, 4, 4, 6, 6};
   SetIdStorage storage(&storage_data);
@@ -176,32 +179,32 @@
   auto chain = storage.MakeChain();
   SqlValue val = SqlValue::Long(4);
   // 6, 4, 2, 0
-  std::vector<uint32_t> indices_vec{6, 4, 2, 0};
-  Indices indices{indices_vec.data(), 4, Indices::State::kNonmonotonic};
+  auto common_indices = Indices::CreateWithIndexPayloadForTesting(
+      {6, 4, 2, 0}, Indices::State::kNonmonotonic);
 
-  FilterOp op = FilterOp::kEq;
-  auto res = chain->IndexSearch(op, val, indices);
-  ASSERT_THAT(utils::ToIndexVectorForTests(res), ElementsAre(1));
+  auto indices = common_indices;
+  chain->IndexSearch(FilterOp::kEq, val, indices);
+  ASSERT_THAT(utils::ExtractPayloadForTesting(indices), ElementsAre(1));
 
-  op = FilterOp::kNe;
-  res = chain->IndexSearch(op, val, indices);
-  ASSERT_THAT(utils::ToIndexVectorForTests(res), ElementsAre(0, 2, 3));
+  indices = common_indices;
+  chain->IndexSearch(FilterOp::kNe, val, indices);
+  ASSERT_THAT(utils::ExtractPayloadForTesting(indices), ElementsAre(0, 2, 3));
 
-  op = FilterOp::kLe;
-  res = chain->IndexSearch(op, val, indices);
-  ASSERT_THAT(utils::ToIndexVectorForTests(res), ElementsAre(1, 2, 3));
+  indices = common_indices;
+  chain->IndexSearch(FilterOp::kLe, val, indices);
+  ASSERT_THAT(utils::ExtractPayloadForTesting(indices), ElementsAre(1, 2, 3));
 
-  op = FilterOp::kLt;
-  res = chain->IndexSearch(op, val, indices);
-  ASSERT_THAT(utils::ToIndexVectorForTests(res), ElementsAre(2, 3));
+  indices = common_indices;
+  chain->IndexSearch(FilterOp::kLt, val, indices);
+  ASSERT_THAT(utils::ExtractPayloadForTesting(indices), ElementsAre(2, 3));
 
-  op = FilterOp::kGe;
-  res = chain->IndexSearch(op, val, indices);
-  ASSERT_THAT(utils::ToIndexVectorForTests(res), ElementsAre(0, 1));
+  indices = common_indices;
+  chain->IndexSearch(FilterOp::kGe, val, indices);
+  ASSERT_THAT(utils::ExtractPayloadForTesting(indices), ElementsAre(0, 1));
 
-  op = FilterOp::kGt;
-  res = chain->IndexSearch(op, val, indices);
-  ASSERT_THAT(utils::ToIndexVectorForTests(res), ElementsAre(0));
+  indices = common_indices;
+  chain->IndexSearch(FilterOp::kGt, val, indices);
+  ASSERT_THAT(utils::ExtractPayloadForTesting(indices), ElementsAre(0));
 }
 
 TEST(SetIdStorage, OrderedIndexSearchSimple) {
@@ -211,7 +214,7 @@
 
   // 0, 2, 2, 4
   std::vector<uint32_t> indices_vec{0, 3, 3, 5};
-  Indices indices{indices_vec.data(), 4, Indices::State::kMonotonic};
+  OrderedIndices indices{indices_vec.data(), 4, Indices::State::kMonotonic};
 
   Range range =
       chain->OrderedIndexSearch(FilterOp::kEq, SqlValue::Long(2), indices);
@@ -284,13 +287,10 @@
   auto chain = storage.MakeChain();
 
   // {0, 3, 3, 6, 9, 9, 0, 3}
-  std::vector<uint32_t> indices_vec{1, 3, 5, 7, 9, 11, 2, 4};
-  Indices indices{indices_vec.data(), 8, Indices::State::kMonotonic};
-
-  BitVector bv = chain->IndexSearch(FilterOp::kEq, SqlValue::Long(10), indices)
-                     .TakeIfBitVector();
-
-  ASSERT_EQ(bv.CountSetBits(), 0u);
+  auto indices = Indices::CreateWithIndexPayloadForTesting(
+      {1, 3, 5, 7, 9, 11, 2, 4}, Indices::State::kNonmonotonic);
+  chain->IndexSearch(FilterOp::kEq, SqlValue::Long(10), indices);
+  ASSERT_THAT(utils::ExtractPayloadForTesting(indices), IsEmpty());
 }
 
 TEST(SetIdStorage, SearchWithIdAsSimpleDoubleIsInt) {
diff --git a/src/trace_processor/db/column/string_storage.cc b/src/trace_processor/db/column/string_storage.cc
index 05f5932..12d2517 100644
--- a/src/trace_processor/db/column/string_storage.cc
+++ b/src/trace_processor/db/column/string_storage.cc
@@ -76,20 +76,20 @@
 };
 
 struct NotEqual {
-  bool operator()(StringPool::Id lhs, StringPool::Id rhs) {
+  bool operator()(StringPool::Id lhs, StringPool::Id rhs) const {
     return lhs != StringPool::Id::Null() && lhs != rhs;
   }
 };
 
 struct Glob {
-  bool operator()(StringPool::Id lhs, util::GlobMatcher& matcher) const {
+  bool operator()(StringPool::Id lhs, const util::GlobMatcher& matcher) const {
     return lhs != StringPool::Id::Null() && matcher.Matches(pool_->Get(lhs));
   }
   const StringPool* pool_;
 };
 
 struct GlobFullStringPool {
-  GlobFullStringPool(StringPool* pool, util::GlobMatcher& matcher)
+  GlobFullStringPool(StringPool* pool, const util::GlobMatcher& matcher)
       : pool_(pool), matches_(pool->MaxSmallStringId().raw_id()) {
     PERFETTO_DCHECK(!pool->HasLargeString());
     for (auto it = pool->CreateIterator(); it; ++it) {
@@ -97,7 +97,7 @@
       matches_[id.raw_id()] = matcher.Matches(pool->Get(id));
     }
   }
-  bool operator()(StringPool::Id lhs, StringPool::Id) {
+  bool operator()(StringPool::Id lhs, StringPool::Id) const {
     return lhs != StringPool::Id::Null() && matches_[lhs.raw_id()];
   }
   StringPool* pool_;
@@ -105,7 +105,7 @@
 };
 
 struct Regex {
-  bool operator()(StringPool::Id lhs, regex::Regex& pattern) const {
+  bool operator()(StringPool::Id lhs, const regex::Regex& pattern) const {
     return lhs != StringPool::Id::Null() &&
            pattern.Search(pool_->Get(lhs).c_str());
   }
@@ -122,7 +122,7 @@
           id != StringPool::Id::Null() && regex.Search(pool_->Get(id).c_str());
     }
   }
-  bool operator()(StringPool::Id lhs, StringPool::Id) {
+  bool operator()(StringPool::Id lhs, StringPool::Id) const {
     return matches_[lhs.raw_id()];
   }
   StringPool* pool_;
@@ -145,11 +145,12 @@
                              const StringPool::Id* data,
                              NullTermStringView val,
                              Range search_range) {
-  Less comp{pool};
   const auto* lower =
       std::lower_bound(data + search_range.start, data + search_range.end, val,
-                       [comp](StringPool::Id id, NullTermStringView val) {
-                         return comp(id, val);
+                       [pool](StringPool::Id id, NullTermStringView val) {
+                         // TODO(b/328408877): Remove this hack after the
+                         // migration.
+                         return pool->Get(id) < val;
                        });
   return static_cast<uint32_t>(std::distance(data, lower));
 }
@@ -158,11 +159,12 @@
                              const StringPool::Id* data,
                              NullTermStringView val,
                              Range search_range) {
-  Greater comp{pool};
   const auto* upper =
       std::upper_bound(data + search_range.start, data + search_range.end, val,
-                       [comp](NullTermStringView val, StringPool::Id id) {
-                         return comp(id, val);
+                       [pool](NullTermStringView val, StringPool::Id id) {
+                         // TODO(b/328408877): Remove this hack after the
+                         // migration.
+                         return val < pool->Get(id);
                        });
   return static_cast<uint32_t>(std::distance(data, upper));
 }
@@ -172,11 +174,12 @@
                              NullTermStringView val,
                              const uint32_t* indices,
                              uint32_t indices_count) {
-  Less comp{pool};
   const auto* lower =
       std::lower_bound(indices, indices + indices_count, val,
-                       [comp, data](uint32_t index, NullTermStringView val) {
-                         return comp(data[index], val);
+                       [pool, data](uint32_t index, NullTermStringView val) {
+                         // TODO(b/328408877): Remove this hack after the
+                         // migration.
+                         return pool->Get(data[index]) < val;
                        });
   return static_cast<uint32_t>(std::distance(indices, lower));
 }
@@ -186,11 +189,12 @@
                              NullTermStringView val,
                              const uint32_t* indices,
                              uint32_t indices_count) {
-  Greater comp{pool};
   const auto* upper =
       std::upper_bound(indices, indices + indices_count, val,
-                       [comp, data](NullTermStringView val, uint32_t index) {
-                         return comp(data[index], val);
+                       [pool, data](NullTermStringView val, uint32_t index) {
+                         // TODO(b/328408877): Remove this hack after the
+                         // migration.
+                         return val < pool->Get(data[index]);
                        });
   return static_cast<uint32_t>(std::distance(indices, upper));
 }
@@ -320,16 +324,35 @@
       case FilterOp::kGe:
       case FilterOp::kGt:
       case FilterOp::kLe:
-      case FilterOp::kLt:
-        return RangeOrBitVector(
-            BinarySearchIntrinsic(op, sql_val, search_range));
+      case FilterOp::kLt: {
+        auto first_non_null = static_cast<uint32_t>(std::distance(
+            data_->begin(),
+            std::partition_point(data_->begin() + search_range.start,
+                                 data_->begin() + search_range.end,
+                                 [](StringPool::Id id) {
+                                   return id == StringPool::Id::Null();
+                                 })));
+        return RangeOrBitVector(BinarySearchIntrinsic(
+            op, sql_val,
+            {std::max(search_range.start, first_non_null), search_range.end}));
+      }
       case FilterOp::kNe: {
         // Not equal is a special operation on binary search, as it doesn't
         // define a range, and rather just `not` range returned with `equal`
-        // operation.
-        Range r = BinarySearchIntrinsic(FilterOp::kEq, sql_val, search_range);
-        BitVector bv(r.start, true);
-        bv.Resize(r.end);
+        // operation on non null values.
+        auto first_non_null = static_cast<uint32_t>(std::distance(
+            data_->begin(),
+            std::partition_point(data_->begin() + search_range.start,
+                                 data_->begin() + search_range.end,
+                                 [](StringPool::Id id) {
+                                   return id == StringPool::Id::Null();
+                                 })));
+        Range ret = BinarySearchIntrinsic(
+            FilterOp::kEq, sql_val,
+            {std::max(search_range.start, first_non_null), search_range.end});
+        BitVector bv(first_non_null, false);
+        bv.Resize(ret.start, true);
+        bv.Resize(ret.end, false);
         bv.Resize(search_range.end, true);
         return RangeOrBitVector(std::move(bv));
       }
@@ -345,19 +368,71 @@
   return RangeOrBitVector(LinearSearch(op, sql_val, search_range));
 }
 
-RangeOrBitVector StringStorage::ChainImpl::IndexSearchValidated(
-    FilterOp op,
-    SqlValue sql_val,
-    Indices indices) const {
-  PERFETTO_DCHECK(indices.size <= size());
+void StringStorage::ChainImpl::IndexSearchValidated(FilterOp op,
+                                                    SqlValue sql_val,
+                                                    Indices& indices) const {
+  PERFETTO_DCHECK(indices.tokens.size() <= size());
   PERFETTO_TP_TRACE(
       metatrace::Category::DB, "StringStorage::ChainImpl::IndexSearch",
-      [indices, op](metatrace::Record* r) {
-        r->AddArg("Count", std::to_string(indices.size));
+      [&indices, op](metatrace::Record* r) {
+        r->AddArg("Count", std::to_string(indices.tokens.size()));
         r->AddArg("Op", std::to_string(static_cast<uint32_t>(op)));
       });
-  return RangeOrBitVector(
-      IndexSearchInternal(op, sql_val, indices.data, indices.size));
+
+  StringPool::Id val =
+      (op == FilterOp::kIsNull || op == FilterOp::kIsNotNull)
+          ? StringPool::Id::Null()
+          : string_pool_->InternString(base::StringView(sql_val.AsString()));
+  const StringPool::Id* start = data_->data();
+  switch (op) {
+    case FilterOp::kEq:
+      utils::IndexSearchWithComparator(val, start, indices, std::equal_to<>());
+      break;
+    case FilterOp::kNe:
+      utils::IndexSearchWithComparator(val, start, indices, NotEqual());
+      break;
+    case FilterOp::kLe:
+      utils::IndexSearchWithComparator(string_pool_->Get(val), start, indices,
+                                       LessEqual{string_pool_});
+      break;
+    case FilterOp::kLt:
+      utils::IndexSearchWithComparator(string_pool_->Get(val), start, indices,
+                                       Less{string_pool_});
+      break;
+    case FilterOp::kGt:
+      utils::IndexSearchWithComparator(string_pool_->Get(val), start, indices,
+                                       Greater{string_pool_});
+      break;
+    case FilterOp::kGe:
+      utils::IndexSearchWithComparator(string_pool_->Get(val), start, indices,
+                                       GreaterEqual{string_pool_});
+      break;
+    case FilterOp::kGlob: {
+      util::GlobMatcher matcher =
+          util::GlobMatcher::FromPattern(sql_val.AsString());
+      if (matcher.IsEquality()) {
+        utils::IndexSearchWithComparator(val, start, indices,
+                                         std::equal_to<>());
+        break;
+      }
+      utils::IndexSearchWithComparator(std::move(matcher), start, indices,
+                                       Glob{string_pool_});
+      break;
+    }
+    case FilterOp::kRegex: {
+      base::StatusOr<regex::Regex> regex =
+          regex::Regex::Create(sql_val.AsString());
+      utils::IndexSearchWithComparator(std::move(regex.value()), start, indices,
+                                       Regex{string_pool_});
+      break;
+    }
+    case FilterOp::kIsNull:
+      utils::IndexSearchWithComparator(val, start, indices, IsNull());
+      break;
+    case FilterOp::kIsNotNull:
+      utils::IndexSearchWithComparator(val, start, indices, IsNotNull());
+      break;
+  }
 }
 
 BitVector StringStorage::ChainImpl::LinearSearch(FilterOp op,
@@ -452,7 +527,7 @@
 Range StringStorage::ChainImpl::OrderedIndexSearchValidated(
     FilterOp op,
     SqlValue sql_val,
-    Indices indices) const {
+    const OrderedIndices& indices) const {
   StringPool::Id val =
       (op == FilterOp::kIsNull || op == FilterOp::kIsNotNull)
           ? StringPool::Id::Null()
@@ -509,75 +584,6 @@
   PERFETTO_FATAL("For GCC");
 }
 
-RangeOrBitVector StringStorage::ChainImpl::IndexSearchInternal(
-    FilterOp op,
-    SqlValue sql_val,
-    const uint32_t* indices,
-    uint32_t indices_size) const {
-  StringPool::Id val =
-      (op == FilterOp::kIsNull || op == FilterOp::kIsNotNull)
-          ? StringPool::Id::Null()
-          : string_pool_->InternString(base::StringView(sql_val.AsString()));
-  const StringPool::Id* start = data_->data();
-
-  BitVector::Builder builder(indices_size);
-
-  switch (op) {
-    case FilterOp::kEq:
-      utils::IndexSearchWithComparator(val, start, indices, std::equal_to<>(),
-                                       builder);
-      break;
-    case FilterOp::kNe:
-      utils::IndexSearchWithComparator(val, start, indices, NotEqual(),
-                                       builder);
-      break;
-    case FilterOp::kLe:
-      utils::IndexSearchWithComparator(string_pool_->Get(val), start, indices,
-                                       LessEqual{string_pool_}, builder);
-      break;
-    case FilterOp::kLt:
-      utils::IndexSearchWithComparator(string_pool_->Get(val), start, indices,
-                                       Less{string_pool_}, builder);
-      break;
-    case FilterOp::kGt:
-      utils::IndexSearchWithComparator(string_pool_->Get(val), start, indices,
-                                       Greater{string_pool_}, builder);
-      break;
-    case FilterOp::kGe:
-      utils::IndexSearchWithComparator(string_pool_->Get(val), start, indices,
-                                       GreaterEqual{string_pool_}, builder);
-      break;
-    case FilterOp::kGlob: {
-      util::GlobMatcher matcher =
-          util::GlobMatcher::FromPattern(sql_val.AsString());
-      if (matcher.IsEquality()) {
-        utils::IndexSearchWithComparator(val, start, indices, std::equal_to<>(),
-                                         builder);
-        break;
-      }
-      utils::IndexSearchWithComparator(std::move(matcher), start, indices,
-                                       Glob{string_pool_}, builder);
-      break;
-    }
-    case FilterOp::kRegex: {
-      base::StatusOr<regex::Regex> regex =
-          regex::Regex::Create(sql_val.AsString());
-      utils::IndexSearchWithComparator(std::move(regex.value()), start, indices,
-                                       Regex{string_pool_}, builder);
-      break;
-    }
-    case FilterOp::kIsNull:
-      utils::IndexSearchWithComparator(val, start, indices, IsNull(), builder);
-      break;
-    case FilterOp::kIsNotNull:
-      utils::IndexSearchWithComparator(val, start, indices, IsNotNull(),
-                                       builder);
-      break;
-  }
-
-  return RangeOrBitVector(std::move(builder).Build());
-}
-
 Range StringStorage::ChainImpl::BinarySearchIntrinsic(
     FilterOp op,
     SqlValue sql_val,
diff --git a/src/trace_processor/db/column/string_storage.h b/src/trace_processor/db/column/string_storage.h
index d61aef5..f037bd1 100644
--- a/src/trace_processor/db/column/string_storage.h
+++ b/src/trace_processor/db/column/string_storage.h
@@ -55,13 +55,11 @@
 
     RangeOrBitVector SearchValidated(FilterOp, SqlValue, Range) const override;
 
-    RangeOrBitVector IndexSearchValidated(FilterOp,
-                                          SqlValue,
-                                          Indices) const override;
+    void IndexSearchValidated(FilterOp, SqlValue, Indices&) const override;
 
     Range OrderedIndexSearchValidated(FilterOp,
                                       SqlValue,
-                                      Indices) const override;
+                                      const OrderedIndices&) const override;
 
     void StableSort(SortToken* start,
                     SortToken* end,
diff --git a/src/trace_processor/db/column/string_storage_unittest.cc b/src/trace_processor/db/column/string_storage_unittest.cc
index 303ab66..e664ae1 100644
--- a/src/trace_processor/db/column/string_storage_unittest.cc
+++ b/src/trace_processor/db/column/string_storage_unittest.cc
@@ -35,6 +35,9 @@
 using testing::ElementsAre;
 using testing::IsEmpty;
 
+using Indices = DataLayerChain::Indices;
+using OrderedIndices = DataLayerChain::OrderedIndices;
+
 TEST(StringStorage, SearchOneElement) {
   std::vector<std::string> strings{"cheese",  "pasta", "pizza",
                                    "pierogi", "onion", "fries"};
@@ -161,44 +164,48 @@
   auto chain = storage.MakeChain();
   SqlValue val = SqlValue::String("pierogi");
   // "fries", "onion", "pierogi", NULL, "pizza", "pasta", "cheese"
-  std::vector<uint32_t> indices_vec{6, 5, 4, 3, 2, 1, 0};
-  Indices indices{indices_vec.data(), 7, Indices::State::kNonmonotonic};
+  Indices common_indices = Indices::CreateWithIndexPayloadForTesting(
+      {6, 5, 4, 3, 2, 1, 0}, Indices::State::kNonmonotonic);
 
-  FilterOp op = FilterOp::kEq;
-  auto res = chain->IndexSearch(op, val, indices);
-  ASSERT_THAT(utils::ToIndexVectorForTests(res), ElementsAre(2));
+  auto indices = common_indices;
+  chain->IndexSearch(FilterOp::kEq, val, indices);
+  ASSERT_THAT(utils::ExtractPayloadForTesting(indices), ElementsAre(2));
 
-  op = FilterOp::kNe;
-  res = chain->IndexSearch(op, val, indices);
-  ASSERT_THAT(utils::ToIndexVectorForTests(res), ElementsAre(0, 1, 4, 5, 6));
+  indices = common_indices;
+  chain->IndexSearch(FilterOp::kNe, val, indices);
+  ASSERT_THAT(utils::ExtractPayloadForTesting(indices),
+              ElementsAre(0, 1, 4, 5, 6));
 
-  op = FilterOp::kLt;
-  res = chain->IndexSearch(op, val, indices);
-  ASSERT_THAT(utils::ToIndexVectorForTests(res), ElementsAre(0, 1, 5, 6));
+  indices = common_indices;
+  chain->IndexSearch(FilterOp::kLt, val, indices);
+  ASSERT_THAT(utils::ExtractPayloadForTesting(indices),
+              ElementsAre(0, 1, 5, 6));
 
-  op = FilterOp::kLe;
-  res = chain->IndexSearch(op, val, indices);
-  ASSERT_THAT(utils::ToIndexVectorForTests(res), ElementsAre(0, 1, 2, 5, 6));
+  indices = common_indices;
+  chain->IndexSearch(FilterOp::kLe, val, indices);
+  ASSERT_THAT(utils::ExtractPayloadForTesting(indices),
+              ElementsAre(0, 1, 2, 5, 6));
 
-  op = FilterOp::kGt;
-  res = chain->IndexSearch(op, val, indices);
-  ASSERT_THAT(utils::ToIndexVectorForTests(res), ElementsAre(4));
+  indices = common_indices;
+  chain->IndexSearch(FilterOp::kGt, val, indices);
+  ASSERT_THAT(utils::ExtractPayloadForTesting(indices), ElementsAre(4));
 
-  op = FilterOp::kGe;
-  res = chain->IndexSearch(op, val, indices);
-  ASSERT_THAT(utils::ToIndexVectorForTests(res), ElementsAre(2, 4));
+  indices = common_indices;
+  chain->IndexSearch(FilterOp::kGe, val, indices);
+  ASSERT_THAT(utils::ExtractPayloadForTesting(indices), ElementsAre(2, 4));
 
-  op = FilterOp::kIsNull;
-  res = chain->IndexSearch(op, SqlValue(), indices);
-  ASSERT_THAT(utils::ToIndexVectorForTests(res), ElementsAre(3));
+  indices = common_indices;
+  chain->IndexSearch(FilterOp::kIsNull, SqlValue(), indices);
+  ASSERT_THAT(utils::ExtractPayloadForTesting(indices), ElementsAre(3));
 
-  op = FilterOp::kIsNotNull;
-  res = chain->IndexSearch(op, SqlValue(), indices);
-  ASSERT_THAT(utils::ToIndexVectorForTests(res), ElementsAre(0, 1, 2, 4, 5, 6));
+  indices = common_indices;
+  chain->IndexSearch(FilterOp::kIsNotNull, SqlValue(), indices);
+  ASSERT_THAT(utils::ExtractPayloadForTesting(indices),
+              ElementsAre(0, 1, 2, 4, 5, 6));
 
-  op = FilterOp::kGlob;
-  res = chain->IndexSearch(op, SqlValue::String("p*"), indices);
-  ASSERT_THAT(utils::ToIndexVectorForTests(res), ElementsAre(2, 4, 5));
+  indices = common_indices;
+  chain->IndexSearch(FilterOp::kGlob, SqlValue::String("p*"), indices);
+  ASSERT_THAT(utils::ExtractPayloadForTesting(indices), ElementsAre(2, 4, 5));
 }
 
 #if !PERFETTO_BUILDFLAG(PERFETTO_OS_WIN)
@@ -241,10 +248,38 @@
 }
 #endif
 
+TEST(StringStorage, SearchEmptyString) {
+  std::vector<std::string> strings{"", "apple"};
+  std::vector<StringPool::Id> ids(3, StringPool::Id::Null());
+  StringPool pool;
+  for (const auto& string : strings) {
+    ids.push_back(pool.InternString(base::StringView(string)));
+  }
+  StringStorage storage(&pool, &ids, true);
+  auto chain = storage.MakeChain();
+
+  auto res = chain->Search(FilterOp::kEq, SqlValue::String(""), {0, 5});
+  ASSERT_THAT(utils::ToIndexVectorForTests(res), ElementsAre(3));
+}
+
+TEST(StringStorage, SearchEmptyStringIsNull) {
+  std::vector<std::string> strings{"", "apple"};
+  std::vector<StringPool::Id> ids(3, StringPool::Id::Null());
+  StringPool pool;
+  for (const auto& string : strings) {
+    ids.push_back(pool.InternString(base::StringView(string)));
+  }
+  StringStorage storage(&pool, &ids, true);
+  auto chain = storage.MakeChain();
+
+  auto res = chain->Search(FilterOp::kIsNull, SqlValue(), {0, 5});
+  ASSERT_THAT(utils::ToIndexVectorForTests(res), ElementsAre(0, 1, 2));
+}
+
 TEST(StringStorage, SearchSorted) {
   std::vector<std::string> strings{"apple",    "burger",   "cheese",
                                    "doughnut", "eggplant", "fries"};
-  std::vector<StringPool::Id> ids;
+  std::vector<StringPool::Id> ids(3, StringPool::Id::Null());
   StringPool pool;
   for (const auto& string : strings) {
     ids.push_back(pool.InternString(base::StringView(string)));
@@ -252,35 +287,36 @@
   StringStorage storage(&pool, &ids, true);
   auto chain = storage.MakeChain();
   SqlValue val = SqlValue::String("cheese");
-  Range filter_range(0, 6);
+  // 1:NULL, 2:NULL, 3:apple, 4:burger, 5:cheese, 6:doughnut, 7:eggplant,
+  Range filter_range(1, 8);
 
   FilterOp op = FilterOp::kEq;
   auto res = chain->Search(op, val, filter_range);
-  ASSERT_THAT(utils::ToIndexVectorForTests(res), ElementsAre(2));
+  ASSERT_THAT(utils::ToIndexVectorForTests(res), ElementsAre(5));
 
   op = FilterOp::kNe;
   res = chain->Search(op, val, filter_range);
-  ASSERT_THAT(utils::ToIndexVectorForTests(res), ElementsAre(0, 1, 3, 4, 5));
+  ASSERT_THAT(utils::ToIndexVectorForTests(res), ElementsAre(3, 4, 6, 7));
 
   op = FilterOp::kLt;
   res = chain->Search(op, val, filter_range);
-  ASSERT_THAT(utils::ToIndexVectorForTests(res), ElementsAre(0, 1));
+  ASSERT_THAT(utils::ToIndexVectorForTests(res), ElementsAre(3, 4));
 
   op = FilterOp::kLe;
   res = chain->Search(op, val, filter_range);
-  ASSERT_THAT(utils::ToIndexVectorForTests(res), ElementsAre(0, 1, 2));
+  ASSERT_THAT(utils::ToIndexVectorForTests(res), ElementsAre(3, 4, 5));
 
   op = FilterOp::kGt;
   res = chain->Search(op, val, filter_range);
-  ASSERT_THAT(utils::ToIndexVectorForTests(res), ElementsAre(3, 4, 5));
+  ASSERT_THAT(utils::ToIndexVectorForTests(res), ElementsAre(6, 7));
 
   op = FilterOp::kGe;
   res = chain->Search(op, val, filter_range);
-  ASSERT_THAT(utils::ToIndexVectorForTests(res), ElementsAre(2, 3, 4, 5));
+  ASSERT_THAT(utils::ToIndexVectorForTests(res), ElementsAre(5, 6, 7));
 
   op = FilterOp::kGlob;
   res = chain->Search(op, SqlValue::String("*e"), filter_range);
-  ASSERT_THAT(utils::ToIndexVectorForTests(res), ElementsAre(0, 2));
+  ASSERT_THAT(utils::ToIndexVectorForTests(res), ElementsAre(3, 5));
 }
 
 TEST(StringStorage, IndexSearchSorted) {
@@ -294,43 +330,42 @@
   StringStorage storage(&pool, &ids, true);
   auto chain = storage.MakeChain();
   SqlValue val = SqlValue::String("cheese");
-  // fries, eggplant, cheese, burger
-  std::vector<uint32_t> indices_vec{5, 4, 2, 1};
-  Indices indices{indices_vec.data(), 4, Indices::State::kNonmonotonic};
+  Indices common_indices = Indices::CreateWithIndexPayloadForTesting(
+      {5, 4, 2, 1}, Indices::State::kNonmonotonic);
 
-  FilterOp op = FilterOp::kEq;
-  auto res = chain->IndexSearch(op, val, indices);
-  ASSERT_THAT(utils::ToIndexVectorForTests(res), ElementsAre(2));
+  auto indices = common_indices;
+  chain->IndexSearch(FilterOp::kEq, val, indices);
+  ASSERT_THAT(utils::ExtractPayloadForTesting(indices), ElementsAre(2));
 
-  op = FilterOp::kNe;
-  res = chain->IndexSearch(op, val, indices);
-  ASSERT_THAT(utils::ToIndexVectorForTests(res), ElementsAre(0, 1, 3));
+  indices = common_indices;
+  chain->IndexSearch(FilterOp::kNe, val, indices);
+  ASSERT_THAT(utils::ExtractPayloadForTesting(indices), ElementsAre(0, 1, 3));
 
-  op = FilterOp::kLt;
-  res = chain->IndexSearch(op, val, indices);
-  ASSERT_THAT(utils::ToIndexVectorForTests(res), ElementsAre(3));
+  indices = common_indices;
+  chain->IndexSearch(FilterOp::kLt, val, indices);
+  ASSERT_THAT(utils::ExtractPayloadForTesting(indices), ElementsAre(3));
 
-  op = FilterOp::kLe;
-  res = chain->IndexSearch(op, val, indices);
-  ASSERT_THAT(utils::ToIndexVectorForTests(res), ElementsAre(2, 3));
+  indices = common_indices;
+  chain->IndexSearch(FilterOp::kLe, val, indices);
+  ASSERT_THAT(utils::ExtractPayloadForTesting(indices), ElementsAre(2, 3));
 
-  op = FilterOp::kGt;
-  res = chain->IndexSearch(op, val, indices);
-  ASSERT_THAT(utils::ToIndexVectorForTests(res), ElementsAre(0, 1));
+  indices = common_indices;
+  chain->IndexSearch(FilterOp::kGt, val, indices);
+  ASSERT_THAT(utils::ExtractPayloadForTesting(indices), ElementsAre(0, 1));
 
-  op = FilterOp::kGe;
-  res = chain->IndexSearch(op, val, indices);
-  ASSERT_THAT(utils::ToIndexVectorForTests(res), ElementsAre(0, 1, 2));
+  indices = common_indices;
+  chain->IndexSearch(FilterOp::kGe, val, indices);
+  ASSERT_THAT(utils::ExtractPayloadForTesting(indices), ElementsAre(0, 1, 2));
 
-  op = FilterOp::kGlob;
-  res = chain->IndexSearch(op, SqlValue::String("*e"), indices);
-  ASSERT_THAT(utils::ToIndexVectorForTests(res), ElementsAre(2));
+  indices = common_indices;
+  chain->IndexSearch(FilterOp::kGlob, SqlValue::String("*e"), indices);
+  ASSERT_THAT(utils::ExtractPayloadForTesting(indices), ElementsAre(2));
 }
 
 TEST(StringStorage, OrderedIndexSearch) {
   std::vector<std::string> strings{"cheese",  "pasta", "pizza",
                                    "pierogi", "onion", "fries"};
-  std::vector<StringPool::Id> ids;
+  std::vector<StringPool::Id> ids(1, StringPool::Id::Null());
   StringPool pool;
   for (const auto& string : strings) {
     ids.push_back(pool.InternString(base::StringView(string)));
@@ -338,9 +373,8 @@
   StringStorage storage(&pool, &ids);
   auto chain = storage.MakeChain();
   SqlValue val = SqlValue::String("pierogi");
-  // cheese, fries, onion, pasta, pierogi, pizza
-  std::vector<uint32_t> indices_vec{0, 5, 4, 1, 3, 2};
-  Indices indices{indices_vec.data(), 6, Indices::State::kNonmonotonic};
+  std::vector<uint32_t> indices_vec{0, 6, 5, 2, 4, 3};
+  OrderedIndices indices{indices_vec.data(), 6, Indices::State::kNonmonotonic};
 
   FilterOp op = FilterOp::kEq;
   Range res = chain->OrderedIndexSearch(op, val, indices);
@@ -366,6 +400,31 @@
   res = chain->OrderedIndexSearch(op, val, indices);
   ASSERT_EQ(res.start, 4u);
   ASSERT_EQ(res.end, 6u);
+
+  op = FilterOp::kIsNull;
+  res = chain->OrderedIndexSearch(op, val, indices);
+  ASSERT_EQ(res.start, 0u);
+  ASSERT_EQ(res.end, 1u);
+}
+
+TEST(StringStorage, OrderedIndexSearchLowerBoundWithNulls) {
+  std::vector<std::string> strings{"cheese",  "pasta", "pizza",
+                                   "pierogi", "onion", "fries"};
+  std::vector<StringPool::Id> ids(3, StringPool::Id::Null());
+  StringPool pool;
+  for (const auto& string : strings) {
+    ids.push_back(pool.InternString(base::StringView(string)));
+  }
+  StringStorage storage(&pool, &ids);
+  auto chain = storage.MakeChain();
+
+  // NULL, NULL, cheese, pizza
+  std::vector<uint32_t> indices_vec{0, 2, 3, 7};
+  OrderedIndices indices{indices_vec.data(), 4, Indices::State::kNonmonotonic};
+  auto res = chain->OrderedIndexSearch(FilterOp::kEq,
+                                       SqlValue::String("cheese"), indices);
+  ASSERT_EQ(res.start, 2u);
+  ASSERT_EQ(res.end, 3u);
 }
 
 TEST(StringStorage, OrderedIndexSearchIsNull) {
@@ -380,7 +439,7 @@
   auto chain = storage.MakeChain();
 
   std::vector<uint32_t> indices_vec{0, 2, 5, 7};
-  Indices indices{indices_vec.data(), 4, Indices::State::kNonmonotonic};
+  OrderedIndices indices{indices_vec.data(), 4, Indices::State::kNonmonotonic};
   auto res = chain->OrderedIndexSearch(FilterOp::kIsNull, SqlValue(), indices);
   ASSERT_EQ(res.start, 0u);
   ASSERT_EQ(res.end, 2u);
@@ -398,7 +457,7 @@
   auto chain = storage.MakeChain();
 
   std::vector<uint32_t> indices_vec{0, 2, 5, 7};
-  Indices indices{indices_vec.data(), 4, Indices::State::kNonmonotonic};
+  OrderedIndices indices{indices_vec.data(), 4, Indices::State::kNonmonotonic};
   auto res =
       chain->OrderedIndexSearch(FilterOp::kIsNotNull, SqlValue(), indices);
   ASSERT_EQ(res.start, 2u);
diff --git a/src/trace_processor/db/column/types.h b/src/trace_processor/db/column/types.h
index 803c6ce..b59084b 100644
--- a/src/trace_processor/db/column/types.h
+++ b/src/trace_processor/db/column/types.h
@@ -19,6 +19,7 @@
 #include <cstdint>
 #include <utility>
 #include <variant>
+#include <vector>
 
 #include "perfetto/base/logging.h"
 #include "perfetto/trace_processor/basic_types.h"
@@ -113,21 +114,6 @@
   kDummy,
 };
 
-// Index vector related data required to Filter using IndexSearch.
-struct Indices {
-  enum class State {
-    // We can't guarantee that data is in monotonic order.
-    kNonmonotonic,
-    // Data is in monotonic order.
-    // TODO(b/307482437): Use this to optimise filtering if storage is sorted.
-    kMonotonic
-  };
-
-  const uint32_t* data = nullptr;
-  uint32_t size = 0;
-  State state = Indices::State::kNonmonotonic;
-};
-
 }  // namespace perfetto::trace_processor
 
 #endif  // SRC_TRACE_PROCESSOR_DB_COLUMN_TYPES_H_
diff --git a/src/trace_processor/db/column/utils.cc b/src/trace_processor/db/column/utils.cc
index efec000..e24f3bf 100644
--- a/src/trace_processor/db/column/utils.cc
+++ b/src/trace_processor/db/column/utils.cc
@@ -79,6 +79,16 @@
 }
 
 std::vector<uint32_t> ExtractPayloadForTesting(
+    const DataLayerChain::Indices& indices) {
+  std::vector<uint32_t> payload;
+  payload.reserve(indices.tokens.size());
+  for (const auto& token : indices.tokens) {
+    payload.push_back(token.payload);
+  }
+  return payload;
+}
+
+std::vector<uint32_t> ExtractPayloadForTesting(
     std::vector<column::DataLayerChain::SortToken>& tokens) {
   std::vector<uint32_t> payload;
   payload.reserve(tokens.size());
@@ -113,4 +123,18 @@
   PERFETTO_FATAL("For GCC");
 }
 
+bool CanReturnEarly(SearchValidationResult res,
+                    DataLayerChain::Indices& indices) {
+  switch (res) {
+    case SearchValidationResult::kOk:
+      return false;
+    case SearchValidationResult::kAllData:
+      return true;
+    case SearchValidationResult::kNoData:
+      indices.tokens.clear();
+      return true;
+  }
+  PERFETTO_FATAL("For GCC");
+}
+
 }  // namespace perfetto::trace_processor::column::utils
diff --git a/src/trace_processor/db/column/utils.h b/src/trace_processor/db/column/utils.h
index b1c71a3..14c6456 100644
--- a/src/trace_processor/db/column/utils.h
+++ b/src/trace_processor/db/column/utils.h
@@ -16,6 +16,7 @@
 #ifndef SRC_TRACE_PROCESSOR_DB_COLUMN_UTILS_H_
 #define SRC_TRACE_PROCESSOR_DB_COLUMN_UTILS_H_
 
+#include <algorithm>
 #include <cstdint>
 #include <functional>
 #include <optional>
@@ -65,28 +66,14 @@
 template <typename Comparator, typename ValType, typename DataType>
 void IndexSearchWithComparator(ValType val,
                                const DataType* data_ptr,
-                               const uint32_t* indices,
-                               Comparator comparator,
-                               BitVector::Builder& builder) {
-  // Fast path: we compare as many groups of 64 elements as we can.
-  // This should be very easy for the compiler to auto-vectorize.
-  const uint32_t* cur_idx = indices;
-  uint32_t fast_path_elements = builder.BitsInCompleteWordsUntilFull();
-  for (uint32_t i = 0; i < fast_path_elements; i += BitVector::kBitsInWord) {
-    uint64_t word = 0;
-    // This part should be optimised by SIMD and is expected to be fast.
-    for (uint32_t k = 0; k < BitVector::kBitsInWord; ++k, ++cur_idx) {
-      bool comp_result = comparator(*(data_ptr + *cur_idx), val);
-      word |= static_cast<uint64_t>(comp_result) << k;
-    }
-    builder.AppendWord(word);
-  }
-
-  // Slow path: we compare <64 elements and append to fill the Builder.
-  uint32_t back_elements = builder.BitsUntilFull();
-  for (uint32_t i = 0; i < back_elements; ++i, ++cur_idx) {
-    builder.Append(comparator(*(data_ptr + *cur_idx), val));
-  }
+                               DataLayerChain::Indices& indices,
+                               Comparator comparator) {
+  auto it = std::remove_if(indices.tokens.begin(), indices.tokens.end(),
+                           [&comparator, data_ptr,
+                            &val](const DataLayerChain::Indices::Token& token) {
+                             return !comparator(*(data_ptr + token.index), val);
+                           });
+  indices.tokens.erase(it, indices.tokens.end());
 }
 
 template <typename T>
@@ -136,11 +123,18 @@
 std::optional<Range> CanReturnEarly(SearchValidationResult,
                                     uint32_t indices_size);
 
+// If the validation result doesn't require further search, will modify
+// |indices| to match and return true. Otherwise returns false.
+bool CanReturnEarly(SearchValidationResult res,
+                    DataLayerChain::Indices& indices);
+
 std::vector<uint32_t> ExtractPayloadForTesting(
     std::vector<column::DataLayerChain::SortToken>&);
 
 std::vector<uint32_t> ToIndexVectorForTests(RangeOrBitVector&);
 
+std::vector<uint32_t> ExtractPayloadForTesting(const DataLayerChain::Indices&);
+
 }  // namespace perfetto::trace_processor::column::utils
 
 #endif  // SRC_TRACE_PROCESSOR_DB_COLUMN_UTILS_H_
diff --git a/src/trace_processor/db/column_storage_overlay.h b/src/trace_processor/db/column_storage_overlay.h
index c13c095..3e68687 100644
--- a/src/trace_processor/db/column_storage_overlay.h
+++ b/src/trace_processor/db/column_storage_overlay.h
@@ -20,6 +20,7 @@
 #include <stdint.h>
 
 #include <optional>
+#include <utility>
 #include <vector>
 
 #include "src/trace_processor/containers/bit_vector.h"
@@ -41,7 +42,7 @@
   // Allows efficient iteration over the rows of a ColumnStorageOverlay.
   class Iterator {
    public:
-    Iterator(RowMap::Iterator it) : it_(std::move(it)) {}
+    explicit Iterator(RowMap::Iterator it) : it_(std::move(it)) {}
 
     Iterator(Iterator&&) noexcept = default;
     Iterator& operator=(Iterator&&) = default;
@@ -50,7 +51,7 @@
     void Next() { return it_.Next(); }
 
     // Returns if the iterator is still valid.
-    operator bool() const { return it_; }
+    explicit operator bool() const { return bool(it_); }
 
     // Returns the index pointed to by this iterator.
     OutputIndex index() const { return it_.index(); }
diff --git a/src/trace_processor/db/query_executor.cc b/src/trace_processor/db/query_executor.cc
index bd15245..0dd3aa9 100644
--- a/src/trace_processor/db/query_executor.cc
+++ b/src/trace_processor/db/query_executor.cc
@@ -19,6 +19,7 @@
 #include <utility>
 #include <vector>
 
+#include <sys/types.h>
 #include "perfetto/base/logging.h"
 #include "perfetto/trace_processor/basic_types.h"
 #include "src/trace_processor/containers/bit_vector.h"
@@ -116,36 +117,17 @@
   // Create outmost TableIndexVector.
   std::vector<uint32_t> table_indices = std::move(*rm).TakeAsIndexVector();
 
-  RangeOrBitVector matched = chain.IndexSearch(
-      c.op, c.value,
-      Indices{table_indices.data(), static_cast<uint32_t>(table_indices.size()),
-              Indices::State::kMonotonic});
+  using Indices = column::DataLayerChain::Indices;
+  Indices indices = Indices::Create(table_indices, Indices::State::kMonotonic);
+  chain.IndexSearch(c.op, c.value, indices);
 
-  if (matched.IsBitVector()) {
-    BitVector res = std::move(matched).TakeIfBitVector();
-    uint32_t i = 0;
-    table_indices.erase(
-        std::remove_if(table_indices.begin(), table_indices.end(),
-                       [&i, &res](uint32_t) { return !res.IsSet(i++); }),
-        table_indices.end());
-    *rm = RowMap(std::move(table_indices));
-    return;
+  PERFETTO_DCHECK(indices.tokens.size() <= table_indices.size());
+  for (uint32_t i = 0; i < indices.tokens.size(); ++i) {
+    table_indices[i] = indices.tokens[i].payload;
   }
-
-  Range res = std::move(matched).TakeIfRange();
-  if (res.size() == 0) {
-    rm->Clear();
-    return;
-  }
-  if (res.size() == table_indices.size()) {
-    return;
-  }
-
-  PERFETTO_DCHECK(res.end <= table_indices.size());
-  std::vector<uint32_t> res_as_iv(
-      table_indices.begin() + static_cast<int>(res.start),
-      table_indices.begin() + static_cast<int>(res.end));
-  *rm = RowMap(std::move(res_as_iv));
+  table_indices.resize(indices.tokens.size());
+  PERFETTO_DCHECK(std::is_sorted(table_indices.begin(), table_indices.end()));
+  *rm = RowMap(std::move(table_indices));
 }
 
 RowMap QueryExecutor::FilterLegacy(const Table* table,
diff --git a/src/trace_processor/db/query_executor_benchmark.cc b/src/trace_processor/db/query_executor_benchmark.cc
index dd1e8a9..8502e04 100644
--- a/src/trace_processor/db/query_executor_benchmark.cc
+++ b/src/trace_processor/db/query_executor_benchmark.cc
@@ -103,22 +103,27 @@
   return base::SplitString(table_csv, "\n");
 }
 
+StringPool::Id StripAndIntern(StringPool& pool, const std::string& data) {
+  std::string res = base::StripSuffix(base::StripPrefix(data, "\""), "\"");
+  return pool.InternString(base::StringView(res));
+}
+
 SliceTable::Row GetSliceTableRow(const std::string& string_row,
                                  StringPool& pool) {
   std::vector<std::string> row_vec = SplitCSVLine(string_row);
   SliceTable::Row row;
-  PERFETTO_CHECK(row_vec.size() >= 12);
+  PERFETTO_CHECK(row_vec.size() >= 14);
   row.ts = *base::StringToInt64(row_vec[2]);
   row.dur = *base::StringToInt64(row_vec[3]);
   row.track_id = ThreadTrackTable::Id(*base::StringToUInt32(row_vec[4]));
-  row.category = pool.InternString(base::StringView(row_vec[5]));
-  row.name = pool.InternString(base::StringView(row_vec[6]));
+  row.category = StripAndIntern(pool, row_vec[5]);
+  row.name = StripAndIntern(pool, row_vec[6]);
   row.depth = *base::StringToUInt32(row_vec[7]);
   row.stack_id = *base::StringToInt32(row_vec[8]);
   row.parent_stack_id = *base::StringToInt32(row_vec[9]);
-  row.parent_id = base::StringToUInt32(row_vec[11]).has_value()
+  row.parent_id = base::StringToUInt32(row_vec[10]).has_value()
                       ? std::make_optional<SliceTable::Id>(
-                            *base::StringToUInt32(row_vec[11]))
+                            *base::StringToUInt32(row_vec[10]))
                       : std::nullopt;
   row.arg_set_id = *base::StringToUInt32(row_vec[11]);
   row.thread_ts = base::StringToInt64(row_vec[12]);
@@ -236,6 +241,10 @@
       benchmark::Counter(static_cast<double>(table.table_.row_count()),
                          benchmark::Counter::kIsIterationInvariantRate |
                              benchmark::Counter::kInvert);
+  state.counters["s/out"] = benchmark::Counter(
+      static_cast<double>(table.table_.QueryToRowMap(c, {}).size()),
+      benchmark::Counter::kIsIterationInvariantRate |
+          benchmark::Counter::kInvert);
 }
 
 void BenchmarkSliceTableSort(benchmark::State& state,
@@ -261,6 +270,10 @@
       benchmark::Counter(static_cast<double>(table.table_.row_count()),
                          benchmark::Counter::kIsIterationInvariantRate |
                              benchmark::Counter::kInvert);
+  state.counters["s/out"] = benchmark::Counter(
+      static_cast<double>(table.table_.QueryToRowMap({c}, {}).size()),
+      benchmark::Counter::kIsIterationInvariantRate |
+          benchmark::Counter::kInvert);
 }
 
 void BenchmarkFtraceEventTableFilter(benchmark::State& state,
@@ -273,6 +286,10 @@
       benchmark::Counter(static_cast<double>(table.table_.row_count()),
                          benchmark::Counter::kIsIterationInvariantRate |
                              benchmark::Counter::kInvert);
+  state.counters["s/out"] = benchmark::Counter(
+      static_cast<double>(table.table_.QueryToRowMap({c}, {}).size()),
+      benchmark::Counter::kIsIterationInvariantRate |
+          benchmark::Counter::kInvert);
 }
 
 void BenchmarkFtraceEventTableSort(benchmark::State& state,
@@ -289,7 +306,7 @@
 
 void BM_QESliceTableTrackIdEq(benchmark::State& state) {
   SliceTableForBenchmark table(state);
-  BenchmarkSliceTableFilter(state, table, {table.table_.track_id().eq(100)});
+  BenchmarkSliceTableFilter(state, table, {table.table_.track_id().eq(1213)});
 }
 
 BENCHMARK(BM_QESliceTableTrackIdEq);
@@ -304,29 +321,33 @@
 
 void BM_QESliceTableParentIdEq(benchmark::State& state) {
   SliceTableForBenchmark table(state);
-  BenchmarkSliceTableFilter(state, table, {table.table_.parent_id().eq(88)});
+  BenchmarkSliceTableFilter(state, table, {table.table_.parent_id().eq(26711)});
 }
 
 BENCHMARK(BM_QESliceTableParentIdEq);
 
 void BM_QESliceTableNameEq(benchmark::State& state) {
   SliceTableForBenchmark table(state);
-  BenchmarkSliceTableFilter(state, table, {table.table_.name().eq("cheese")});
+  BenchmarkSliceTableFilter(
+      state, table,
+      {table.table_.name().eq("MarkFromReadBarrierWithMeasurements")});
 }
 
 BENCHMARK(BM_QESliceTableNameEq);
 
 void BM_QESliceTableNameGlobNoStars(benchmark::State& state) {
   SliceTableForBenchmark table(state);
-  BenchmarkSliceTableFilter(state, table, {table.table_.name().glob("cheese")});
+  BenchmarkSliceTableFilter(
+      state, table,
+      {table.table_.name().glob("MarkFromReadBarrierWithMeasurements")});
 }
 
 BENCHMARK(BM_QESliceTableNameGlobNoStars);
 
 void BM_QESliceTableNameGlob(benchmark::State& state) {
   SliceTableForBenchmark table(state);
-  BenchmarkSliceTableFilter(state, table,
-                            {table.table_.name().glob("chee*se")});
+  BenchmarkSliceTableFilter(
+      state, table, {table.table_.name().glob("HIDL::IMapper::unlock::*")});
 }
 
 BENCHMARK(BM_QESliceTableNameGlob);
@@ -341,7 +362,9 @@
 
 void BM_QESliceTableSorted(benchmark::State& state) {
   SliceTableForBenchmark table(state);
-  BenchmarkSliceTableFilter(state, table, {table.table_.ts().gt(1000)});
+  BenchmarkSliceTableFilter(state, table,
+                            {table.table_.ts().gt(1738923505854),
+                             table.table_.ts().lt(1738950140556)});
 }
 
 BENCHMARK(BM_QESliceTableSorted);
@@ -349,7 +372,7 @@
 void BM_QEFilterWithSparseSelector(benchmark::State& state) {
   ExpectedFrameTimelineTableForBenchmark table(state);
   BenchmarkExpectedFrameTableFilter(state, table,
-                                    table.table_.track_id().eq(88));
+                                    table.table_.track_id().eq(1445));
 }
 
 BENCHMARK(BM_QEFilterWithSparseSelector);
@@ -379,8 +402,8 @@
   SliceTableForBenchmark table(state);
   BenchmarkSliceTableFilter(
       state, table,
-      {table.table_.ts().ge(1740530419866), table.table_.ts().le(1740530474097),
-       table.table_.track_id().eq(100)});
+      {table.table_.ts().ge(1738923505854), table.table_.ts().le(1738950140556),
+       table.table_.track_id().eq(1422)});
 }
 
 BENCHMARK(BM_QESliceTableTsAndTrackId);
@@ -388,7 +411,8 @@
 void BM_QEFilterOneElement(benchmark::State& state) {
   SliceTableForBenchmark table(state);
   BenchmarkSliceTableFilter(
-      state, table, {table.table_.id().eq(10), table.table_.dur().eq(100)});
+      state, table,
+      {table.table_.id().eq(11732), table.table_.track_id().eq(1422)});
 }
 
 BENCHMARK(BM_QEFilterOneElement);
@@ -407,6 +431,10 @@
       static_cast<double>(slice_sorted_with_duration.row_count()),
       benchmark::Counter::kIsIterationInvariantRate |
           benchmark::Counter::kInvert);
+  state.counters["s/out"] = benchmark::Counter(
+      static_cast<double>(table.table_.QueryToRowMap({c}, {}).size()),
+      benchmark::Counter::kIsIterationInvariantRate |
+          benchmark::Counter::kInvert);
 }
 
 BENCHMARK(BM_QEFilterWithArrangement);
@@ -422,6 +450,10 @@
       benchmark::Counter(static_cast<double>(table.table_.row_count()),
                          benchmark::Counter::kIsIterationInvariantRate |
                              benchmark::Counter::kInvert);
+  state.counters["s/out"] = benchmark::Counter(
+      static_cast<double>(table.table_.QueryToRowMap({c}, {}).size()),
+      benchmark::Counter::kIsIterationInvariantRate |
+          benchmark::Counter::kInvert);
 }
 BENCHMARK(BM_QEDenseNullFilter);
 
@@ -436,6 +468,10 @@
       benchmark::Counter(static_cast<double>(table.table_.row_count()),
                          benchmark::Counter::kIsIterationInvariantRate |
                              benchmark::Counter::kInvert);
+  state.counters["s/out"] = benchmark::Counter(
+      static_cast<double>(table.table_.QueryToRowMap({c}, {}).size()),
+      benchmark::Counter::kIsIterationInvariantRate |
+          benchmark::Counter::kInvert);
 }
 BENCHMARK(BM_QEDenseNullFilterIsNull);
 
@@ -471,10 +507,32 @@
       static_cast<double>(slice_sorted_with_duration.row_count()),
       benchmark::Counter::kIsIterationInvariantRate |
           benchmark::Counter::kInvert);
+  state.counters["s/out"] = benchmark::Counter(
+      static_cast<double>(table.table_.QueryToRowMap({c}, {}).size()),
+      benchmark::Counter::kIsIterationInvariantRate |
+          benchmark::Counter::kInvert);
 }
 
 BENCHMARK(BM_QEFilterOrderedArrangement);
 
+void BM_QESliceFilterIndexSearchOneElement(benchmark::State& state) {
+  SliceTableForBenchmark table(state);
+  BenchmarkSliceTableFilter(
+      state, table,
+      {table.table_.track_id().eq(1422), table.table_.id().eq(11732)});
+}
+
+BENCHMARK(BM_QESliceFilterIndexSearchOneElement);
+
+void BM_QESliceFilterIndexSearch(benchmark::State& state) {
+  SliceTableForBenchmark table(state);
+  BenchmarkSliceTableFilter(state, table,
+                            {table.table_.track_id().eq(1422),
+                             table.table_.name().eq("notifyFramePending")});
+}
+
+BENCHMARK(BM_QESliceFilterIndexSearch);
+
 void BM_QESliceSortNumericAsc(benchmark::State& state) {
   SliceTableForBenchmark table(state);
   BenchmarkSliceTableSort(state, table, {table.table_.track_id().ascending()});
diff --git a/src/trace_processor/db/query_executor_unittest.cc b/src/trace_processor/db/query_executor_unittest.cc
index f0d8f03..161a106 100644
--- a/src/trace_processor/db/query_executor_unittest.cc
+++ b/src/trace_processor/db/query_executor_unittest.cc
@@ -52,6 +52,8 @@
 using ArrangementOverlay = column::ArrangementOverlay;
 using SelectorOverlay = column::SelectorOverlay;
 
+using Indices = column::DataLayerChain::Indices;
+
 TEST(QueryExecutor, OnlyStorageRange) {
   std::vector<int64_t> storage_data{1, 2, 3, 4, 5};
   column::NumericStorage<int64_t> storage(&storage_data, ColumnType::kInt64,
diff --git a/src/trace_processor/db/table.cc b/src/trace_processor/db/table.cc
index 7bedbb7..641c745 100644
--- a/src/trace_processor/db/table.cc
+++ b/src/trace_processor/db/table.cc
@@ -157,7 +157,7 @@
     if (table.overlays_[i].row_map().IsIndexVector()) {
       overlay_layers[i].reset(new column::ArrangementOverlay(
           table.overlays_[i].row_map().GetIfIndexVector(),
-          Indices::State::kNonmonotonic));
+          column::DataLayerChain::Indices::State::kNonmonotonic));
     } else if (table.overlays_[i].row_map().IsBitVector()) {
       overlay_layers[i].reset(new column::SelectorOverlay(
           table.overlays_[i].row_map().GetIfBitVector()));
diff --git a/src/trace_processor/db/table.h b/src/trace_processor/db/table.h
index eab9f92..7d53106 100644
--- a/src/trace_processor/db/table.h
+++ b/src/trace_processor/db/table.h
@@ -23,7 +23,6 @@
 #include <utility>
 #include <vector>
 
-#include "perfetto/base/compiler.h"
 #include "perfetto/base/logging.h"
 #include "perfetto/trace_processor/basic_types.h"
 #include "perfetto/trace_processor/ref_counted.h"
@@ -33,7 +32,6 @@
 #include "src/trace_processor/db/column/data_layer.h"
 #include "src/trace_processor/db/column/types.h"
 #include "src/trace_processor/db/column_storage_overlay.h"
-#include "src/trace_processor/db/query_executor.h"
 
 namespace perfetto::trace_processor {
 
@@ -77,7 +75,7 @@
     }
 
     // Returns whether the row the iterator is pointing at is valid.
-    explicit operator bool() const { return its_[0]; }
+    explicit operator bool() const { return bool(its_[0]); }
 
     // Returns the value at the current row for column |col_idx|.
     SqlValue Get(uint32_t col_idx) const {
diff --git a/src/trace_processor/importers/common/BUILD.gn b/src/trace_processor/importers/common/BUILD.gn
index 823edd8..5108dca 100644
--- a/src/trace_processor/importers/common/BUILD.gn
+++ b/src/trace_processor/importers/common/BUILD.gn
@@ -28,6 +28,7 @@
     "clock_converter.h",
     "clock_tracker.cc",
     "clock_tracker.h",
+    "create_mapping_params.h",
     "deobfuscation_mapping_table.cc",
     "deobfuscation_mapping_table.h",
     "event_tracker.cc",
@@ -36,10 +37,15 @@
     "flow_tracker.h",
     "global_args_tracker.cc",
     "global_args_tracker.h",
+    "mapping_tracker.cc",
+    "mapping_tracker.h",
     "metadata_tracker.cc",
     "metadata_tracker.h",
     "process_tracker.cc",
     "process_tracker.h",
+    "sched_event_state.h",
+    "sched_event_tracker.cc",
+    "sched_event_tracker.h",
     "slice_tracker.cc",
     "slice_tracker.h",
     "slice_translation_table.cc",
@@ -48,9 +54,13 @@
     "stack_profile_tracker.h",
     "system_info_tracker.cc",
     "system_info_tracker.h",
+    "thread_state_tracker.cc",
+    "thread_state_tracker.h",
     "trace_parser.cc",
     "track_tracker.cc",
     "track_tracker.h",
+    "virtual_memory_mapping.cc",
+    "virtual_memory_mapping.h",
   ]
   public_deps = [
     ":trace_parser_hdr",
@@ -70,8 +80,8 @@
     "../../storage",
     "../../tables:tables",
     "../../types",
+    "../../util:build_id",
     "../../util:profiler_util",
-    "../../util:stack_traces_util",
     "../fuchsia:fuchsia_record",
     "../systrace:systrace_line",
   ]
@@ -105,6 +115,7 @@
     "process_tracker_unittest.cc",
     "slice_tracker_unittest.cc",
     "slice_translation_table_unittest.cc",
+    "thread_state_tracker_unittest.cc",
   ]
   testonly = true
   deps = [
diff --git a/src/trace_processor/importers/common/create_mapping_params.h b/src/trace_processor/importers/common/create_mapping_params.h
new file mode 100644
index 0000000..7aba456
--- /dev/null
+++ b/src/trace_processor/importers/common/create_mapping_params.h
@@ -0,0 +1,71 @@
+/*
+ * Copyright (C) 2024 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef SRC_TRACE_PROCESSOR_IMPORTERS_COMMON_CREATE_MAPPING_PARAMS_H_
+#define SRC_TRACE_PROCESSOR_IMPORTERS_COMMON_CREATE_MAPPING_PARAMS_H_
+
+#include <cstddef>
+#include <cstdint>
+#include <optional>
+#include <string>
+#include <tuple>
+
+#include "perfetto/ext/base/hash.h"
+#include "src/trace_processor/importers/common/address_range.h"
+#include "src/trace_processor/util/build_id.h"
+
+namespace perfetto {
+namespace trace_processor {
+
+struct CreateMappingParams {
+  AddressRange memory_range;
+  // This is the offset into the file that has been mapped at
+  // memory_range.start()
+  uint64_t exact_offset = 0;
+  // This is the offset into the file where the ELF header starts. We assume
+  // all file mappings are ELF files an thus this offset is 0.
+  uint64_t start_offset = 0;
+  // This can only be read out of the actual ELF file.
+  uint64_t load_bias = 0;
+  std::string name;
+  std::optional<BuildId> build_id;
+
+  auto ToTuple() const {
+    return std::tie(memory_range, exact_offset, start_offset, load_bias, name,
+                    build_id);
+  }
+
+  bool operator==(const CreateMappingParams& o) const {
+    return ToTuple() == o.ToTuple();
+  }
+
+  struct Hasher {
+    size_t operator()(const CreateMappingParams& p) const {
+      base::Hasher h;
+      h.UpdateAll(p.memory_range.start(), p.memory_range.end(), p.exact_offset,
+                  p.start_offset, p.load_bias, p.name);
+      if (p.build_id) {
+        h.Update(*p.build_id);
+      }
+      return static_cast<size_t>(h.digest());
+    }
+  };
+};
+
+}  // namespace trace_processor
+}  // namespace perfetto
+
+#endif  // SRC_TRACE_PROCESSOR_IMPORTERS_COMMON_CREATE_MAPPING_PARAMS_H_
diff --git a/src/trace_processor/importers/common/mapping_tracker.cc b/src/trace_processor/importers/common/mapping_tracker.cc
new file mode 100644
index 0000000..13b8274
--- /dev/null
+++ b/src/trace_processor/importers/common/mapping_tracker.cc
@@ -0,0 +1,169 @@
+/*
+ * Copyright (C) 2024 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "src/trace_processor/importers/common/mapping_tracker.h"
+
+#include <cstddef>
+#include <cstdint>
+#include <memory>
+#include <utility>
+
+#include "perfetto/ext/base/string_view.h"
+#include "src/trace_processor/importers/common/address_range.h"
+#include "src/trace_processor/storage/trace_storage.h"
+#include "src/trace_processor/types/trace_processor_context.h"
+#include "src/trace_processor/util/build_id.h"
+
+namespace perfetto {
+namespace trace_processor {
+namespace {
+
+bool IsKernelModule(base::StringView name) {
+  return !name.StartsWith("[kernel.kallsyms]");
+}
+
+}  // namespace
+
+JitDelegate::~JitDelegate() = default;
+
+template <typename MappingImpl>
+MappingImpl& MappingTracker::AddMapping(std::unique_ptr<MappingImpl> mapping) {
+  auto ptr = mapping.get();
+  PERFETTO_CHECK(
+      mappings_by_id_.Insert(ptr->mapping_id(), std::move(mapping)).second);
+
+  mappings_by_name_and_build_id_[NameAndBuildId{base::StringView(ptr->name()),
+                                                ptr->build_id()}]
+      .push_back(ptr);
+
+  return *ptr;
+}
+
+KernelMemoryMapping& MappingTracker::CreateKernelMemoryMapping(
+    CreateMappingParams params) {
+  // TODO(carlscab): Guess build_id if not provided. Some tools like simpleperf
+  // add a mapping file_name ->build_id that we could use here
+
+  const bool is_module = IsKernelModule(base::StringView(params.name));
+
+  if (!is_module && kernel_ != nullptr) {
+    PERFETTO_CHECK(params.memory_range == kernel_->memory_range());
+    return *kernel_;
+  }
+
+  std::unique_ptr<KernelMemoryMapping> mapping(
+      new KernelMemoryMapping(context_, std::move(params)));
+
+  if (is_module) {
+    // TODO(carlscab): Overlaps not supported (for now?). Should be fine for
+    // kernel.
+    PERFETTO_CHECK(
+        kernel_modules_.Emplace(mapping->memory_range(), mapping.get()));
+  } else {
+    kernel_ = mapping.get();
+  }
+
+  return AddMapping(std::move(mapping));
+}
+
+UserMemoryMapping& MappingTracker::CreateUserMemoryMapping(
+    UniquePid upid,
+    CreateMappingParams params) {
+  // TODO(carlscab): Guess build_id if not provided. Some tools like simpleperf
+  // add a mapping file_name ->build_id that we could use here
+
+  const AddressRange mapping_range = params.memory_range;
+  std::unique_ptr<UserMemoryMapping> mapping(
+      new UserMemoryMapping(context_, upid, std::move(params)));
+  // TODO(carlscab): Overlaps not supported (for now?).
+  PERFETTO_CHECK(user_memory_[upid].Emplace(mapping_range, mapping.get()));
+
+  jit_delegates_[upid].ForOverlaps(
+      mapping_range, [&](std::pair<const AddressRange, JitDelegate*>& entry) {
+        const auto& jit_range = entry.first;
+        JitDelegate* jit_delegate = entry.second;
+        PERFETTO_CHECK(jit_range.Contains(mapping_range));
+        mapping->SetJitDelegate(jit_delegate);
+      });
+
+  return AddMapping(std::move(mapping));
+}
+
+KernelMemoryMapping* MappingTracker::FindKernelMappingForAddress(
+    uint64_t address) const {
+  if (auto it = kernel_modules_.Find(address); it != kernel_modules_.end()) {
+    return it->second;
+  }
+  if (kernel_ && kernel_->memory_range().Contains(address)) {
+    return kernel_;
+  }
+  return nullptr;
+}
+
+UserMemoryMapping* MappingTracker::FindUserMappingForAddress(
+    UniquePid upid,
+    uint64_t address) const {
+  if (auto* vm = user_memory_.Find(upid); vm) {
+    if (auto it = vm->Find(address); it != vm->end()) {
+      return it->second;
+    }
+  }
+
+  if (auto* delegates = jit_delegates_.Find(upid); delegates) {
+    if (auto it = delegates->Find(address); it != delegates->end()) {
+      return it->second->CreateMapping();
+    }
+  }
+
+  return nullptr;
+}
+
+std::vector<VirtualMemoryMapping*> MappingTracker::FindMappings(
+    base::StringView name,
+    const BuildId& build_id) const {
+  if (auto res = mappings_by_name_and_build_id_.Find({name, build_id});
+      res != nullptr) {
+    return *res;
+  }
+  return {};
+}
+
+VirtualMemoryMapping& MappingTracker::InternMemoryMapping(
+    CreateMappingParams params) {
+  if (auto* mapping = interned_mappings_.Find(params); mapping) {
+    return **mapping;
+  }
+
+  std::unique_ptr<VirtualMemoryMapping> mapping(
+      new VirtualMemoryMapping(context_, params));
+  interned_mappings_.Insert(std::move(params), mapping.get());
+  return AddMapping(std::move(mapping));
+}
+
+void MappingTracker::AddJitRange(UniquePid upid,
+                                 AddressRange jit_range,
+                                 JitDelegate* delegate) {
+  // TODO(carlscab): Deal with overlaps
+  jit_delegates_[upid].DeleteOverlapsAndEmplace(jit_range, delegate);
+  user_memory_[upid].ForOverlaps(
+      jit_range, [&](std::pair<const AddressRange, UserMemoryMapping*>& entry) {
+        PERFETTO_CHECK(jit_range.Contains(entry.first));
+        entry.second->SetJitDelegate(delegate);
+      });
+}
+
+}  // namespace trace_processor
+}  // namespace perfetto
diff --git a/src/trace_processor/importers/common/mapping_tracker.h b/src/trace_processor/importers/common/mapping_tracker.h
new file mode 100644
index 0000000..95dc355
--- /dev/null
+++ b/src/trace_processor/importers/common/mapping_tracker.h
@@ -0,0 +1,168 @@
+/*
+ * Copyright (C) 2024 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef SRC_TRACE_PROCESSOR_IMPORTERS_COMMON_MAPPING_TRACKER_H_
+#define SRC_TRACE_PROCESSOR_IMPORTERS_COMMON_MAPPING_TRACKER_H_
+
+#include <cstddef>
+#include <cstdint>
+#include <memory>
+#include <optional>
+#include <vector>
+
+#include "perfetto/ext/base/flat_hash_map.h"
+#include "perfetto/ext/base/hash.h"
+#include "perfetto/ext/base/string_view.h"
+#include "src/trace_processor/importers/common/address_range.h"
+#include "src/trace_processor/importers/common/virtual_memory_mapping.h"
+#include "src/trace_processor/storage/trace_storage.h"
+#include "src/trace_processor/types/trace_processor_context.h"
+#include "src/trace_processor/util/build_id.h"
+
+namespace perfetto {
+namespace trace_processor {
+
+// Api used to forward frame interning requests for frames that fall in a
+// jitted memory region.
+// MappingTracker allows other trackers to register ranges of memory for
+// which they need to control when a new frame is created. Jitted code can
+// move in memory over time, so the same program counter might refer to
+// different functions at different point in time. MappingTracker does
+// not keep track of such moves but instead delegates the creation of jitted
+// frames to a delegate.
+class JitDelegate {
+ public:
+  virtual ~JitDelegate();
+  // Forward frame interning request.
+  // Implementations are free to intern the frame as needed.
+  // Returns frame_id, and whether a new row as created or not.
+  virtual std::pair<FrameId, bool> InternFrame(
+      VirtualMemoryMapping* mapping,
+      uint64_t rel_pc,
+      base::StringView function_name) = 0;
+
+  // Simpleperf does not emit mmap events for jitted ranges (actually for non
+  // file backed executable mappings). So have a way to generate a mapping on
+  // the fly for FindMapping requests in a jitted region with no associated
+  // mapping.
+  virtual UserMemoryMapping* CreateMapping() = 0;
+};
+
+// Keeps track of all aspects relative to memory mappings.
+// This class keeps track of 3 types of mappings: UserMemoryMapping,
+// KernelMemoryMapping and others. The others are used to represent mapping
+// where we do not have enough information to determine what type of
+// mapping (user, kernel) we are dealing with. This is usually the case with
+// data sources that do not provide enough information about the mappings.
+//
+// TODO(carlscab): Hopefully we can slowly get rid of cases where these other
+// mappings are needed. The biggest blocker right now is determining the upid.
+// we could infer this from the actual samples that use said mapping (those
+// usually have a pid attached). So we would need to have a "fake" mapping that
+// actually materializes when we see a sample with a pid.
+//
+// ATTENTION: No overlaps allowed (for now). Eventually the order in which
+// mappings are create will matter as newer mappings will delete old ones.
+// This is how tools like linux perf behave, mmap event have a timestamp
+// associated and there are no "delete events" just new mmap events that
+// overlap (to be deleted) mappings.
+class MappingTracker {
+ public:
+  explicit MappingTracker(TraceProcessorContext* context) : context_(context) {}
+
+  // Create a new kernel space mapping. Returned reference will be valid for the
+  // duration of this instance.
+  KernelMemoryMapping& CreateKernelMemoryMapping(CreateMappingParams params);
+
+  // Create a new user space mapping. Returned reference will be valid for the
+  // duration of this instance.
+  UserMemoryMapping& CreateUserMemoryMapping(UniquePid upid,
+                                             CreateMappingParams params);
+
+  // Create an "other" mapping. Returned reference will be valid for the
+  // duration of this instance.
+  VirtualMemoryMapping& InternMemoryMapping(CreateMappingParams params);
+
+  // Given an absolute address find the kernel mapping where this address
+  // belongs to. Returns `nullptr` if none is found.
+  KernelMemoryMapping* FindKernelMappingForAddress(uint64_t address) const;
+
+  // Given an absolute address find the user mapping where this address
+  // belongs to. Returns `nullptr` if none is found.
+  UserMemoryMapping* FindUserMappingForAddress(UniquePid upid,
+                                               uint64_t address) const;
+
+  std::vector<VirtualMemoryMapping*> FindMappings(
+      base::StringView name,
+      const BuildId& build_id) const;
+
+  // Marks a range of memory as containing jitted code.
+  // If the added region overlaps with other existing ranges the latter are all
+  // deleted.
+  // Jitted ranges will only be applied to UserMemoryMappings
+  void AddJitRange(UniquePid upid, AddressRange range, JitDelegate* delegate);
+
+ private:
+  template <typename MappingImpl>
+  MappingImpl& AddMapping(std::unique_ptr<MappingImpl> mapping);
+
+  TraceProcessorContext* const context_;
+  base::FlatHashMap<MappingId, std::unique_ptr<VirtualMemoryMapping>>
+      mappings_by_id_;
+
+  base::FlatHashMap<CreateMappingParams,
+                    VirtualMemoryMapping*,
+                    CreateMappingParams::Hasher>
+      interned_mappings_;
+
+  struct NameAndBuildId {
+    base::StringView name;
+    std::optional<BuildId> build_id;
+
+    bool operator==(const NameAndBuildId& o) const {
+      return name == o.name && build_id == o.build_id;
+    }
+
+    bool operator!=(const NameAndBuildId& o) const { return !(*this == o); }
+
+    struct Hasher {
+      size_t operator()(const NameAndBuildId& o) const {
+        base::Hasher hasher;
+        hasher.Update(o.name);
+        if (o.build_id) {
+          hasher.Update(*o.build_id);
+        }
+        return static_cast<size_t>(hasher.digest());
+      }
+    };
+  };
+  base::FlatHashMap<NameAndBuildId,
+                    std::vector<VirtualMemoryMapping*>,
+                    NameAndBuildId::Hasher>
+      mappings_by_name_and_build_id_;
+
+  base::FlatHashMap<UniquePid, AddressRangeMap<UserMemoryMapping*>>
+      user_memory_;
+  AddressRangeMap<KernelMemoryMapping*> kernel_modules_;
+  KernelMemoryMapping* kernel_ = nullptr;
+
+  base::FlatHashMap<UniquePid, AddressRangeMap<JitDelegate*>> jit_delegates_;
+};
+
+}  // namespace trace_processor
+}  // namespace perfetto
+
+#endif  // SRC_TRACE_PROCESSOR_IMPORTERS_COMMON_MAPPING_TRACKER_H_
diff --git a/src/trace_processor/importers/common/sched_event_state.h b/src/trace_processor/importers/common/sched_event_state.h
new file mode 100644
index 0000000..4ebc6ce
--- /dev/null
+++ b/src/trace_processor/importers/common/sched_event_state.h
@@ -0,0 +1,69 @@
+/*
+ * Copyright (C) 2024 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef SRC_TRACE_PROCESSOR_IMPORTERS_COMMON_SCHED_EVENT_STATE_H_
+#define SRC_TRACE_PROCESSOR_IMPORTERS_COMMON_SCHED_EVENT_STATE_H_
+
+#include <iosfwd>
+
+#include "src/trace_processor/storage/trace_storage.h"
+#include "src/trace_processor/types/version_number.h"
+
+namespace perfetto {
+namespace trace_processor {
+
+// Responsible for keeping the state of pending sched events.
+class SchedEventState {
+ public:
+  // Information retained from the preceding sched_switch seen on a given cpu.
+  struct PendingSchedInfo {
+    // The pending scheduling slice that the next event will complete.
+    uint32_t pending_slice_storage_idx = std::numeric_limits<uint32_t>::max();
+
+    // pid/utid/prio corresponding to the last sched_switch seen on this cpu
+    // (its "next_*" fields). There is some duplication with respect to the
+    // slices storage, but we don't always have a slice when decoding events in
+    // the compact format.
+    uint32_t last_pid = std::numeric_limits<uint32_t>::max();
+    UniqueTid last_utid = std::numeric_limits<UniqueTid>::max();
+    int32_t last_prio = std::numeric_limits<int32_t>::max();
+  };
+
+  SchedEventState() {
+    // Pre-allocate space for 128 CPUs, which should be enough for most hosts.
+    // It's OK if this number is too small, the vector will be grown on-demand.
+    pending_sched_per_cpu_.reserve(128);
+  }
+  SchedEventState(const SchedEventState&) = delete;
+  ~SchedEventState() = default;
+
+  // Get the sched info for the given CPU, resizing the vector if necessary.
+  PendingSchedInfo* GetPendingSchedInfoForCpu(uint32_t cpu) {
+    if (PERFETTO_UNLIKELY(cpu >= pending_sched_per_cpu_.size())) {
+      pending_sched_per_cpu_.resize(cpu + 1);
+    }
+    return &pending_sched_per_cpu_[cpu];
+  }
+
+ private:
+  // Information retained from the preceding sched_switch seen on a given cpu.
+  std::vector<PendingSchedInfo> pending_sched_per_cpu_;
+};
+
+}  // namespace trace_processor
+}  // namespace perfetto
+
+#endif  // SRC_TRACE_PROCESSOR_IMPORTERS_COMMON_SCHED_EVENT_STATE_H_
diff --git a/src/trace_processor/util/stack_traces_util.cc b/src/trace_processor/importers/common/sched_event_tracker.cc
similarity index 71%
rename from src/trace_processor/util/stack_traces_util.cc
rename to src/trace_processor/importers/common/sched_event_tracker.cc
index a255560..e541d58 100644
--- a/src/trace_processor/util/stack_traces_util.cc
+++ b/src/trace_processor/importers/common/sched_event_tracker.cc
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2022 The Android Open Source Project
+ * Copyright (C) 2024 The Android Open Source Project
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -14,17 +14,12 @@
  * limitations under the License.
  */
 
-#include "src/trace_processor/util/stack_traces_util.h"
-#include "perfetto/ext/base/string_view.h"
+#include "src/trace_processor/importers/common/sched_event_tracker.h"
 
 namespace perfetto {
 namespace trace_processor {
-namespace util {
 
-bool IsHexModuleId(base::StringView module) {
-  return module.size() == 33;
-}
+SchedEventTracker::~SchedEventTracker() = default;
 
-}  // namespace util
 }  // namespace trace_processor
 }  // namespace perfetto
diff --git a/src/trace_processor/importers/common/sched_event_tracker.h b/src/trace_processor/importers/common/sched_event_tracker.h
new file mode 100644
index 0000000..68a2926
--- /dev/null
+++ b/src/trace_processor/importers/common/sched_event_tracker.h
@@ -0,0 +1,109 @@
+/*
+ * Copyright (C) 2024 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef SRC_TRACE_PROCESSOR_IMPORTERS_COMMON_SCHED_EVENT_TRACKER_H_
+#define SRC_TRACE_PROCESSOR_IMPORTERS_COMMON_SCHED_EVENT_TRACKER_H_
+
+#include "perfetto/ext/base/string_view.h"
+#include "perfetto/ext/base/utils.h"
+#include "src/trace_processor/importers/common/event_tracker.h"
+#include "src/trace_processor/importers/common/process_tracker.h"
+#include "src/trace_processor/importers/common/system_info_tracker.h"
+#include "src/trace_processor/importers/common/thread_state_tracker.h"
+#include "src/trace_processor/storage/trace_storage.h"
+#include "src/trace_processor/types/destructible.h"
+#include "src/trace_processor/types/task_state.h"
+#include "src/trace_processor/types/trace_processor_context.h"
+
+namespace perfetto {
+namespace trace_processor {
+
+// Tracks sched events and stores them into the storage as sched slices.
+class SchedEventTracker : public Destructible {
+ public:
+  PERFETTO_ALWAYS_INLINE
+  SchedEventTracker(TraceProcessorContext* context) : context_(context) {}
+  SchedEventTracker(const SchedEventTracker&) = delete;
+  ~SchedEventTracker() override;
+
+  PERFETTO_ALWAYS_INLINE
+  uint32_t AddStartSlice(uint32_t cpu,
+                         int64_t ts,
+                         UniqueTid next_utid,
+                         int32_t next_prio) {
+    // Open a new scheduling slice, corresponding to the task that was
+    // just switched to. Set the duration to -1, to indicate that the event is
+    // not finished. Duration will be updated later after event finish.
+    auto* sched = context_->storage->mutable_sched_slice_table();
+    auto row_and_id = sched->Insert(
+        {ts, /* duration */ -1, cpu, next_utid, kNullStringId, next_prio});
+    SchedId sched_id = row_and_id.id;
+    return *sched->id().IndexOf(sched_id);
+  }
+
+  PERFETTO_ALWAYS_INLINE
+  bool UpdateEventTrackerTimestamp(int64_t ts,
+                                   const char* event_name,
+                                   size_t stats) {
+    // At this stage all events should be globally timestamp ordered.
+    if (ts < context_->event_tracker->max_timestamp()) {
+      PERFETTO_ELOG(
+          "%s event out of order by %.4f ms, skipping", event_name,
+          static_cast<double>(context_->event_tracker->max_timestamp() - ts) /
+              1e6);
+      context_->storage->IncrementStats(stats);
+      return false;
+    }
+    context_->event_tracker->UpdateMaxTimestamp(ts);
+    return true;
+  }
+
+  PERFETTO_ALWAYS_INLINE
+  void ClosePendingSlice(uint32_t pending_slice_idx,
+                         int64_t ts,
+                         StringId prev_state) {
+    auto* slices = context_->storage->mutable_sched_slice_table();
+
+    int64_t duration = ts - slices->ts()[pending_slice_idx];
+    slices->mutable_dur()->Set(pending_slice_idx, duration);
+
+    // We store the state as a uint16 as we only consider values up to 2048
+    // when unpacking the information inside; this allows savings of 48 bits
+    // per slice.
+    slices->mutable_end_state()->Set(pending_slice_idx, prev_state);
+  }
+
+  PERFETTO_ALWAYS_INLINE
+  StringId TaskStateToStringId(int64_t task_state_int) {
+    using ftrace_utils::TaskState;
+
+    std::optional<VersionNumber> kernel_version =
+        SystemInfoTracker::GetOrCreate(context_)->GetKernelVersion();
+    TaskState task_state = TaskState::FromRawPrevState(
+        static_cast<uint16_t>(task_state_int), kernel_version);
+    return task_state.is_valid()
+               ? context_->storage->InternString(task_state.ToString().data())
+               : kNullStringId;
+  }
+
+ private:
+  TraceProcessorContext* const context_;
+};
+
+}  // namespace trace_processor
+}  // namespace perfetto
+
+#endif  // SRC_TRACE_PROCESSOR_IMPORTERS_COMMON_SCHED_EVENT_TRACKER_H_
diff --git a/src/trace_processor/importers/common/stack_profile_tracker.cc b/src/trace_processor/importers/common/stack_profile_tracker.cc
index ad57523..799dc29 100644
--- a/src/trace_processor/importers/common/stack_profile_tracker.cc
+++ b/src/trace_processor/importers/common/stack_profile_tracker.cc
@@ -16,35 +16,18 @@
 
 #include "src/trace_processor/importers/common/stack_profile_tracker.h"
 
-#include "perfetto/ext/base/string_utils.h"
+#include <cstddef>
+#include <cstdint>
+
 #include "perfetto/ext/base/string_view.h"
 #include "src/trace_processor/storage/trace_storage.h"
 #include "src/trace_processor/tables/profiler_tables_py.h"
 #include "src/trace_processor/types/trace_processor_context.h"
 #include "src/trace_processor/util/profiler_util.h"
-#include "src/trace_processor/util/stack_traces_util.h"
 
 namespace perfetto {
 namespace trace_processor {
 
-namespace {
-std::string CleanBuildId(base::StringView build_id) {
-  if (build_id.empty()) {
-    return build_id.ToStdString();
-  }
-  // If the build_id is 33 characters long, we assume it's a Breakpad debug
-  // identifier which is already in Hex and doesn't need conversion.
-  // TODO(b/148109467): Remove workaround once all active Chrome versions
-  // write raw bytes instead of a string as build_id.
-  if (util::IsHexModuleId(build_id)) {
-    return build_id.ToStdString();
-  }
-
-  return base::ToHex(build_id.data(), build_id.size());
-}
-
-}  // namespace
-
 std::vector<FrameId> StackProfileTracker::JavaFramesForName(
     NameInPackage name) const {
   if (const auto* frames = java_frames_for_name_.Find(name); frames) {
@@ -53,50 +36,6 @@
   return {};
 }
 
-std::vector<MappingId> StackProfileTracker::FindMappingRow(
-    StringId name,
-    StringId build_id) const {
-  if (const auto* mappings =
-          mappings_by_name_and_build_id_.Find(std::make_pair(name, build_id));
-      mappings) {
-    return *mappings;
-  }
-  return {};
-}
-
-std::vector<FrameId> StackProfileTracker::FindFrameIds(MappingId mapping_id,
-                                                       uint64_t rel_pc) const {
-  if (const auto* frames =
-          frame_by_mapping_and_rel_pc_.Find(std::make_pair(mapping_id, rel_pc));
-      frames) {
-    return *frames;
-  }
-  return {};
-}
-
-MappingId StackProfileTracker::InternMapping(
-    const CreateMappingParams& params) {
-  tables::StackProfileMappingTable::Row row;
-  row.build_id = InternBuildId(params.build_id);
-  row.exact_offset = static_cast<int64_t>(params.exact_offset);
-  row.start_offset = static_cast<int64_t>(params.start_offset);
-  row.start = static_cast<int64_t>(params.start);
-  row.end = static_cast<int64_t>(params.end);
-  row.load_bias = static_cast<int64_t>(params.load_bias);
-  row.name = context_->storage->InternString(params.name);
-
-  if (MappingId* id = mapping_unique_row_index_.Find(row); id) {
-    return *id;
-  }
-
-  MappingId mapping_id =
-      context_->storage->mutable_stack_profile_mapping_table()->Insert(row).id;
-  mapping_unique_row_index_.Insert(row, mapping_id);
-  mappings_by_name_and_build_id_[{row.name, row.build_id}].push_back(
-      mapping_id);
-  return mapping_id;
-}
-
 CallsiteId StackProfileTracker::InternCallsite(
     std::optional<CallsiteId> parent_callsite_id,
     FrameId frame_id,
@@ -113,22 +52,12 @@
   return callsite_id;
 }
 
-FrameId StackProfileTracker::InternFrame(MappingId mapping_id,
-                                         uint64_t rel_pc,
-                                         base::StringView function_name) {
-  tables::StackProfileFrameTable::Row row;
-  row.mapping = mapping_id;
-  row.rel_pc = static_cast<int64_t>(rel_pc);
-  row.name = context_->storage->InternString(function_name);
-
-  if (FrameId* id = frame_unique_row_index_.Find(row); id) {
-    return *id;
-  }
-
-  FrameId frame_id =
-      context_->storage->mutable_stack_profile_frame_table()->Insert(row).id;
-  frame_unique_row_index_.Insert(row, frame_id);
-  frame_by_mapping_and_rel_pc_[{mapping_id, rel_pc}].push_back(frame_id);
+void StackProfileTracker::OnFrameCreated(FrameId frame_id) {
+  auto frame =
+      *context_->storage->stack_profile_frame_table().FindById(frame_id);
+  const MappingId mapping_id = frame.mapping();
+  const StringId name_id = frame.name();
+  const auto function_name = context_->storage->GetString(name_id);
 
   if (function_name.find('.') != base::StringView::npos) {
     // Java frames always contain a '.'
@@ -139,21 +68,14 @@
     std::optional<std::string> package =
         PackageFromLocation(context_->storage.get(), mapping_name);
     if (package) {
-      NameInPackage nip{row.name, context_->storage->InternString(
-                                      base::StringView(*package))};
+      NameInPackage nip{
+          name_id, context_->storage->InternString(base::StringView(*package))};
       java_frames_for_name_[nip].push_back(frame_id);
     } else if (mapping_name.find("/memfd:") == 0) {
-      NameInPackage nip{row.name, context_->storage->InternString("memfd")};
+      NameInPackage nip{name_id, context_->storage->InternString("memfd")};
       java_frames_for_name_[nip].push_back(frame_id);
     }
   }
-
-  return frame_id;
-}
-
-StringId StackProfileTracker::InternBuildId(base::StringView build_id) {
-  return context_->storage->InternString(
-      base::StringView(CleanBuildId(build_id)));
 }
 
 }  // namespace trace_processor
diff --git a/src/trace_processor/importers/common/stack_profile_tracker.h b/src/trace_processor/importers/common/stack_profile_tracker.h
index a1067b8..b018f74 100644
--- a/src/trace_processor/importers/common/stack_profile_tracker.h
+++ b/src/trace_processor/importers/common/stack_profile_tracker.h
@@ -20,12 +20,11 @@
 #include <cstdint>
 #include <optional>
 #include <tuple>
-#include <utility>
 #include <vector>
 
 #include "perfetto/ext/base/flat_hash_map.h"
 #include "perfetto/ext/base/hash.h"
-#include "perfetto/ext/base/string_view.h"
+
 #include "src/trace_processor/storage/trace_storage.h"
 #include "src/trace_processor/tables/profiler_tables_py.h"
 
@@ -52,64 +51,39 @@
 
 class StackProfileTracker {
  public:
-  struct CreateMappingParams {
-    base::StringView build_id;
-    uint64_t exact_offset;
-    uint64_t start_offset;
-    uint64_t start;
-    uint64_t end;
-    uint64_t load_bias;
-    base::StringView name;
-  };
-
   explicit StackProfileTracker(TraceProcessorContext* context)
       : context_(context) {}
 
   std::vector<FrameId> JavaFramesForName(NameInPackage name) const;
-  std::vector<MappingId> FindMappingRow(StringId name, StringId build_id) const;
-  std::vector<FrameId> FindFrameIds(MappingId mapping_id,
-                                    uint64_t rel_pc) const;
 
-  MappingId InternMapping(const CreateMappingParams& params);
   CallsiteId InternCallsite(std::optional<CallsiteId> parent_callsite_id,
                             FrameId frame_id,
                             uint32_t depth);
-  FrameId InternFrame(MappingId mapping_id,
-                      uint64_t rel_pc,
-                      base::StringView function_name);
+
+  void OnFrameCreated(FrameId frame_id);
 
  private:
-  StringId InternBuildId(base::StringView build_id);
-
   TraceProcessorContext* const context_;
-  base::FlatHashMap<tables::StackProfileMappingTable::Row, MappingId>
-      mapping_unique_row_index_;
   base::FlatHashMap<tables::StackProfileCallsiteTable::Row, CallsiteId>
       callsite_unique_row_index_;
-  base::FlatHashMap<tables::StackProfileFrameTable::Row, FrameId>
-      frame_unique_row_index_;
 
-  struct MappingHasher {
-    size_t operator()(const std::pair<StringId, StringId>& o) const {
-      return static_cast<size_t>(
-          base::Hasher::Combine(o.first.raw_id(), o.second.raw_id()));
-    }
-  };
-  base::FlatHashMap<std::pair<StringId, StringId>,
-                    std::vector<MappingId>,
-                    MappingHasher>
-      mappings_by_name_and_build_id_;
+  struct FrameKey {
+    MappingId mapping_id;
+    uint64_t rel_pc;
 
-  struct FrameHasher {
-    size_t operator()(const std::pair<MappingId, uint64_t>& o) const {
-      return static_cast<size_t>(
-          base::Hasher::Combine(o.first.value, o.second));
+    bool operator==(const FrameKey& o) const {
+      return mapping_id == o.mapping_id && rel_pc == o.rel_pc;
     }
+
+    bool operator!=(const FrameKey& o) const { return !(*this == o); }
+
+    struct Hasher {
+      size_t operator()(const FrameKey& o) const {
+        return static_cast<size_t>(
+            base::Hasher::Combine(o.mapping_id.value, o.rel_pc));
+      }
+    };
   };
-  base::FlatHashMap<std::pair<MappingId, uint64_t>,
-                    std::vector<FrameId>,
-                    FrameHasher>
-      frame_by_mapping_and_rel_pc_;
 
   base::FlatHashMap<NameInPackage, std::vector<FrameId>, NameInPackage::Hasher>
       java_frames_for_name_;
diff --git a/src/trace_processor/importers/ftrace/thread_state_tracker.cc b/src/trace_processor/importers/common/thread_state_tracker.cc
similarity index 92%
rename from src/trace_processor/importers/ftrace/thread_state_tracker.cc
rename to src/trace_processor/importers/common/thread_state_tracker.cc
index 7eda59e..9c3ee3c 100644
--- a/src/trace_processor/importers/ftrace/thread_state_tracker.cc
+++ b/src/trace_processor/importers/common/thread_state_tracker.cc
@@ -13,7 +13,8 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "src/trace_processor/importers/ftrace/thread_state_tracker.h"
+
+#include "src/trace_processor/importers/common/thread_state_tracker.h"
 #include <optional>
 
 namespace perfetto {
@@ -140,6 +141,20 @@
     row.irq_context = CommonFlagsToIrqContext(*common_flags);
   }
 
+  if (waker_utid.has_value() && HasPreviousRowNumbersForUtid(*waker_utid)) {
+    auto waker_row =
+        RowNumToRef(prev_row_numbers_for_thread_[*waker_utid]->last_row);
+
+    // We expect all wakers to be Running. But there are 2 cases where this
+    // might not be true:
+    // 1. At the start of a trace the 'waker CPU' has not yet started
+    // emitting events.
+    // 2. Data loss.
+    if (IsRunning(waker_row.state())) {
+      row.waker_id = std::make_optional(waker_row.id());
+    }
+  }
+
   auto row_num = storage_->mutable_thread_state_table()->Insert(row).row_number;
 
   if (utid >= prev_row_numbers_for_thread_.size()) {
diff --git a/src/trace_processor/importers/ftrace/thread_state_tracker.h b/src/trace_processor/importers/common/thread_state_tracker.h
similarity index 95%
rename from src/trace_processor/importers/ftrace/thread_state_tracker.h
rename to src/trace_processor/importers/common/thread_state_tracker.h
index 839ad31..2b7206b 100644
--- a/src/trace_processor/importers/ftrace/thread_state_tracker.h
+++ b/src/trace_processor/importers/common/thread_state_tracker.h
@@ -13,8 +13,9 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#ifndef SRC_TRACE_PROCESSOR_IMPORTERS_FTRACE_THREAD_STATE_TRACKER_H_
-#define SRC_TRACE_PROCESSOR_IMPORTERS_FTRACE_THREAD_STATE_TRACKER_H_
+
+#ifndef SRC_TRACE_PROCESSOR_IMPORTERS_COMMON_THREAD_STATE_TRACKER_H_
+#define SRC_TRACE_PROCESSOR_IMPORTERS_COMMON_THREAD_STATE_TRACKER_H_
 
 #include "src/trace_processor/storage/trace_storage.h"
 #include "src/trace_processor/types/destructible.h"
@@ -104,4 +105,4 @@
 }  // namespace trace_processor
 }  // namespace perfetto
 
-#endif  // SRC_TRACE_PROCESSOR_IMPORTERS_FTRACE_THREAD_STATE_TRACKER_H_
+#endif  // SRC_TRACE_PROCESSOR_IMPORTERS_COMMON_THREAD_STATE_TRACKER_H_
diff --git a/src/trace_processor/importers/ftrace/thread_state_tracker_unittest.cc b/src/trace_processor/importers/common/thread_state_tracker_unittest.cc
similarity index 98%
rename from src/trace_processor/importers/ftrace/thread_state_tracker_unittest.cc
rename to src/trace_processor/importers/common/thread_state_tracker_unittest.cc
index 1b0e1a3..ea6794a 100644
--- a/src/trace_processor/importers/ftrace/thread_state_tracker_unittest.cc
+++ b/src/trace_processor/importers/common/thread_state_tracker_unittest.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "src/trace_processor/importers/ftrace/thread_state_tracker.h"
+#include "src/trace_processor/importers/common/thread_state_tracker.h"
 
 #include <algorithm>
 
diff --git a/src/trace_processor/importers/common/virtual_memory_mapping.cc b/src/trace_processor/importers/common/virtual_memory_mapping.cc
new file mode 100644
index 0000000..60166f59
--- /dev/null
+++ b/src/trace_processor/importers/common/virtual_memory_mapping.cc
@@ -0,0 +1,123 @@
+/*
+ * Copyright (C) 2024 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "src/trace_processor/importers/common/virtual_memory_mapping.h"
+
+#include <cstddef>
+#include <cstdint>
+#include <memory>
+#include <optional>
+#include <string>
+#include <utility>
+
+#include "perfetto/ext/base/string_view.h"
+#include "src/trace_processor/importers/common/address_range.h"
+#include "src/trace_processor/importers/common/mapping_tracker.h"
+#include "src/trace_processor/importers/common/stack_profile_tracker.h"
+#include "src/trace_processor/storage/trace_storage.h"
+#include "src/trace_processor/tables/profiler_tables_py.h"
+#include "src/trace_processor/types/trace_processor_context.h"
+#include "src/trace_processor/util/build_id.h"
+
+namespace perfetto {
+namespace trace_processor {
+namespace {
+
+MappingId CreateMapping(TraceProcessorContext* context,
+                        const CreateMappingParams& params) {
+  StringId build_id = context->storage->InternString(base::StringView(
+      params.build_id ? params.build_id->ToHex() : std::string()));
+  MappingId mapping_id =
+      context->storage->mutable_stack_profile_mapping_table()
+          ->Insert(
+              {build_id, static_cast<int64_t>(params.exact_offset),
+               static_cast<int64_t>(params.start_offset),
+               static_cast<int64_t>(params.memory_range.start()),
+               static_cast<int64_t>(params.memory_range.end()),
+               static_cast<int64_t>(params.load_bias),
+               context->storage->InternString(base::StringView(params.name))})
+          .id;
+
+  return mapping_id;
+}
+
+}  // namespace
+
+VirtualMemoryMapping::VirtualMemoryMapping(TraceProcessorContext* context,
+                                           CreateMappingParams params)
+    : context_(context),
+      mapping_id_(CreateMapping(context, params)),
+      memory_range_(params.memory_range),
+      offset_(params.exact_offset),
+      load_bias_(params.load_bias),
+      name_(std::move(params.name)),
+      build_id_(std::move(params.build_id)) {}
+
+VirtualMemoryMapping::~VirtualMemoryMapping() = default;
+
+KernelMemoryMapping::KernelMemoryMapping(TraceProcessorContext* context,
+                                         CreateMappingParams params)
+    : VirtualMemoryMapping(context, std::move(params)) {}
+
+KernelMemoryMapping::~KernelMemoryMapping() = default;
+
+UserMemoryMapping::UserMemoryMapping(TraceProcessorContext* context,
+                                     UniquePid upid,
+                                     CreateMappingParams params)
+    : VirtualMemoryMapping(context, std::move(params)), upid_(upid) {}
+
+UserMemoryMapping::~UserMemoryMapping() = default;
+
+FrameId VirtualMemoryMapping::InternFrame(uint64_t rel_pc,
+                                          base::StringView function_name) {
+  auto [frame_id, was_inserted] =
+      jit_delegate_ ? jit_delegate_->InternFrame(this, rel_pc, function_name)
+                    : InternFrameImpl(rel_pc, function_name);
+  if (was_inserted) {
+    frames_by_rel_pc_[rel_pc].push_back(frame_id);
+    context_->stack_profile_tracker->OnFrameCreated(frame_id);
+  }
+  return frame_id;
+}
+
+std::vector<FrameId> VirtualMemoryMapping::FindFrameIds(uint64_t rel_pc) const {
+  if (auto* res = frames_by_rel_pc_.Find(rel_pc); res != nullptr) {
+    return *res;
+  }
+  return {};
+}
+
+std::pair<FrameId, bool> VirtualMemoryMapping::InternFrameImpl(
+    uint64_t rel_pc,
+    base::StringView function_name) {
+  const FrameKey frame_key{rel_pc,
+                           context_->storage->InternString(function_name)};
+  if (FrameId* id = interned_frames_.Find(frame_key); id) {
+    return {*id, false};
+  }
+
+  const FrameId frame_id =
+      context_->storage->mutable_stack_profile_frame_table()
+          ->Insert(
+              {frame_key.name_id, mapping_id_, static_cast<int64_t>(rel_pc)})
+          .id;
+  interned_frames_.Insert(frame_key, frame_id);
+
+  return {frame_id, true};
+}
+
+}  // namespace trace_processor
+}  // namespace perfetto
diff --git a/src/trace_processor/importers/common/virtual_memory_mapping.h b/src/trace_processor/importers/common/virtual_memory_mapping.h
new file mode 100644
index 0000000..7b8ef58
--- /dev/null
+++ b/src/trace_processor/importers/common/virtual_memory_mapping.h
@@ -0,0 +1,152 @@
+
+/*
+ * Copyright (C) 2024 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef SRC_TRACE_PROCESSOR_IMPORTERS_COMMON_VIRTUAL_MEMORY_MAPPING_H_
+#define SRC_TRACE_PROCESSOR_IMPORTERS_COMMON_VIRTUAL_MEMORY_MAPPING_H_
+
+#include <cstddef>
+#include <cstdint>
+#include <optional>
+#include <string>
+#include <vector>
+
+#include "perfetto/ext/base/flat_hash_map.h"
+#include "perfetto/ext/base/hash.h"
+#include "perfetto/ext/base/string_view.h"
+#include "src/trace_processor/importers/common/address_range.h"
+#include "src/trace_processor/importers/common/create_mapping_params.h"
+#include "src/trace_processor/storage/trace_storage.h"
+#include "src/trace_processor/types/trace_processor_context.h"
+#include "src/trace_processor/util/build_id.h"
+
+namespace perfetto {
+namespace trace_processor {
+
+// TODO(carlscab): Reconsider whether jit is the best abstraction here. All we
+// really care is about mapping a `rel_pc` to a symbol (aka symbolization) and
+// whether is this is constant.
+class JitDelegate;
+
+// Represents a mapping in virtual memory.
+class VirtualMemoryMapping {
+ public:
+  virtual ~VirtualMemoryMapping();
+  // Range of virtual memory this mapping covers.
+  AddressRange memory_range() const { return memory_range_; }
+  MappingId mapping_id() const { return mapping_id_; }
+  // This name could be the path of the underlying file mapped into memory.
+  const std::string& name() const { return name_; }
+  // For file mappings, this is the offset into the file for the first byte in
+  // the mapping
+  uint64_t offset() const { return offset_; }
+  // If the mapped file is an executable or shared library this will return the
+  // load bias, if known. Returns 0 otherwise.
+  uint64_t load_bias() const { return load_bias_; }
+  // If the mapped file is an executable or shared library this will return its
+  // build id, if known.
+  const std::optional<BuildId>& build_id() const { return build_id_; }
+
+  // Whether this maps to a region that holds jitted code.
+  bool is_jitted() const { return jit_delegate_ != nullptr; }
+
+  // Converts an absolute address into a relative one.
+  uint64_t ToRelativePc(uint64_t address) const {
+    return address - memory_range_.start() + offset_ + load_bias_;
+  }
+
+  // Creates a frame for the given `rel_pc`. Note that if the mapping
+  // `is_jitted()` same `rel_pc` values can return different mappings (as jitted
+  // functions can be created and deleted over time.) So for such mappings the
+  // returned `FrameId` should not be cached.
+  FrameId InternFrame(uint64_t rel_pc, base::StringView function_name);
+
+  // Returns all frames ever created in this mapping for the given `rel_pc`.
+  std::vector<FrameId> FindFrameIds(uint64_t rel_pc) const;
+
+ protected:
+  VirtualMemoryMapping(TraceProcessorContext* context,
+                       CreateMappingParams params);
+
+ private:
+  friend class MappingTracker;
+
+  std::pair<FrameId, bool> InternFrameImpl(uint64_t rel_pc,
+                                           base::StringView function_name);
+
+  void SetJitDelegate(JitDelegate* jit_delegate) {
+    jit_delegate_ = jit_delegate;
+  }
+
+  TraceProcessorContext* const context_;
+  const MappingId mapping_id_;
+  const AddressRange memory_range_;
+  const uint64_t offset_;
+  const uint64_t load_bias_;
+  const std::string name_;
+  std::optional<BuildId> const build_id_;
+  JitDelegate* jit_delegate_ = nullptr;
+
+  struct FrameKey {
+    uint64_t rel_pc;
+    // It doesn't seem to make too much sense to key on name, as for the same
+    // mapping and same rel_pc the name should always be the same. But who knows
+    // how producers behave.
+    StringId name_id;
+
+    bool operator==(const FrameKey& o) const {
+      return rel_pc == o.rel_pc && name_id == o.name_id;
+    }
+
+    struct Hasher {
+      size_t operator()(const FrameKey& k) const {
+        return static_cast<size_t>(
+            base::Hasher::Combine(k.rel_pc, k.name_id.raw_id()));
+      }
+    };
+  };
+  base::FlatHashMap<FrameKey, FrameId, FrameKey::Hasher> interned_frames_;
+  base::FlatHashMap<uint64_t, std::vector<FrameId>> frames_by_rel_pc_;
+};
+
+class KernelMemoryMapping : public VirtualMemoryMapping {
+ public:
+  ~KernelMemoryMapping() override;
+
+ private:
+  friend class MappingTracker;
+  KernelMemoryMapping(TraceProcessorContext* context,
+                      CreateMappingParams params);
+};
+
+class UserMemoryMapping : public VirtualMemoryMapping {
+ public:
+  ~UserMemoryMapping() override;
+  UniquePid upid() const { return upid_; }
+
+ private:
+  friend class MappingTracker;
+  UserMemoryMapping(TraceProcessorContext* context,
+                    UniquePid upid,
+                    CreateMappingParams params);
+
+  const UniquePid upid_;
+};
+
+}  // namespace trace_processor
+}  // namespace perfetto
+
+#endif  // SRC_TRACE_PROCESSOR_IMPORTERS_COMMON_VIRTUAL_MEMORY_MAPPING_H_
diff --git a/src/trace_processor/importers/ftrace/BUILD.gn b/src/trace_processor/importers/ftrace/BUILD.gn
index c618978..3a533d1 100644
--- a/src/trace_processor/importers/ftrace/BUILD.gn
+++ b/src/trace_processor/importers/ftrace/BUILD.gn
@@ -37,6 +37,8 @@
     "ftrace_module_impl.h",
     "ftrace_parser.cc",
     "ftrace_parser.h",
+    "ftrace_sched_event_tracker.cc",
+    "ftrace_sched_event_tracker.h",
     "ftrace_tokenizer.cc",
     "ftrace_tokenizer.h",
     "gpu_work_period_tracker.cc",
@@ -49,10 +51,6 @@
     "pkvm_hyp_cpu_tracker.h",
     "rss_stat_tracker.cc",
     "rss_stat_tracker.h",
-    "sched_event_tracker.cc",
-    "sched_event_tracker.h",
-    "thread_state_tracker.cc",
-    "thread_state_tracker.h",
     "v4l2_tracker.cc",
     "v4l2_tracker.h",
     "virtio_gpu_tracker.cc",
@@ -99,8 +97,7 @@
   testonly = true
   sources = [
     "binder_tracker_unittest.cc",
-    "sched_event_tracker_unittest.cc",
-    "thread_state_tracker_unittest.cc",
+    "ftrace_sched_event_tracker_unittest.cc",
   ]
   deps = [
     "../../../../gn:default_deps",
diff --git a/src/trace_processor/importers/ftrace/ftrace_parser.cc b/src/trace_processor/importers/ftrace/ftrace_parser.cc
index 951c03d..6948fc3 100644
--- a/src/trace_processor/importers/ftrace/ftrace_parser.cc
+++ b/src/trace_processor/importers/ftrace/ftrace_parser.cc
@@ -26,9 +26,9 @@
 #include "src/trace_processor/importers/common/metadata_tracker.h"
 #include "src/trace_processor/importers/common/parser_types.h"
 #include "src/trace_processor/importers/common/process_tracker.h"
+#include "src/trace_processor/importers/common/thread_state_tracker.h"
 #include "src/trace_processor/importers/common/track_tracker.h"
 #include "src/trace_processor/importers/ftrace/binder_tracker.h"
-#include "src/trace_processor/importers/ftrace/thread_state_tracker.h"
 #include "src/trace_processor/importers/ftrace/v4l2_tracker.h"
 #include "src/trace_processor/importers/ftrace/virtio_video_tracker.h"
 #include "src/trace_processor/importers/i2c/i2c_tracker.h"
@@ -1120,10 +1120,11 @@
   }
 
   using protos::pbzero::FtraceEvent;
-  SchedEventTracker* sched_tracker = SchedEventTracker::GetOrCreate(context_);
-  sched_tracker->PushSchedSwitchCompact(cpu, ts, data.prev_state,
-                                        static_cast<uint32_t>(data.next_pid),
-                                        data.next_prio, data.next_comm);
+  FtraceSchedEventTracker* ftrace_sched_tracker =
+      FtraceSchedEventTracker::GetOrCreate(context_);
+  ftrace_sched_tracker->PushSchedSwitchCompact(
+      cpu, ts, data.prev_state, static_cast<uint32_t>(data.next_pid),
+      data.next_prio, data.next_comm);
   return util::OkStatus();
 }
 
@@ -1138,8 +1139,9 @@
     return util::OkStatus();
   }
   using protos::pbzero::FtraceEvent;
-  SchedEventTracker* sched_tracker = SchedEventTracker::GetOrCreate(context_);
-  sched_tracker->PushSchedWakingCompact(
+  FtraceSchedEventTracker* ftrace_sched_tracker =
+      FtraceSchedEventTracker::GetOrCreate(context_);
+  ftrace_sched_tracker->PushSchedWakingCompact(
       cpu, ts, static_cast<uint32_t>(data.pid), data.target_cpu, data.prio,
       data.comm, data.common_flags);
   return util::OkStatus();
@@ -1321,7 +1323,7 @@
   protos::pbzero::SchedSwitchFtraceEvent::Decoder ss(blob.data, blob.size);
   uint32_t prev_pid = static_cast<uint32_t>(ss.prev_pid());
   uint32_t next_pid = static_cast<uint32_t>(ss.next_pid());
-  SchedEventTracker::GetOrCreate(context_)->PushSchedSwitch(
+  FtraceSchedEventTracker::GetOrCreate(context_)->PushSchedSwitch(
       cpu, timestamp, prev_pid, ss.prev_comm(), ss.prev_prio(), ss.prev_state(),
       next_pid, ss.next_comm(), ss.next_prio());
 }
diff --git a/src/trace_processor/importers/ftrace/ftrace_parser.h b/src/trace_processor/importers/ftrace/ftrace_parser.h
index f3234bd..b3764f1 100644
--- a/src/trace_processor/importers/ftrace/ftrace_parser.h
+++ b/src/trace_processor/importers/ftrace/ftrace_parser.h
@@ -26,12 +26,12 @@
 #include "src/trace_processor/importers/common/trace_parser.h"
 #include "src/trace_processor/importers/ftrace/drm_tracker.h"
 #include "src/trace_processor/importers/ftrace/ftrace_descriptors.h"
+#include "src/trace_processor/importers/ftrace/ftrace_sched_event_tracker.h"
 #include "src/trace_processor/importers/ftrace/gpu_work_period_tracker.h"
 #include "src/trace_processor/importers/ftrace/iostat_tracker.h"
 #include "src/trace_processor/importers/ftrace/mali_gpu_event_tracker.h"
 #include "src/trace_processor/importers/ftrace/pkvm_hyp_cpu_tracker.h"
 #include "src/trace_processor/importers/ftrace/rss_stat_tracker.h"
-#include "src/trace_processor/importers/ftrace/sched_event_tracker.h"
 #include "src/trace_processor/importers/ftrace/virtio_gpu_tracker.h"
 #include "src/trace_processor/types/trace_processor_context.h"
 
diff --git a/src/trace_processor/importers/ftrace/sched_event_tracker.cc b/src/trace_processor/importers/ftrace/ftrace_sched_event_tracker.cc
similarity index 67%
rename from src/trace_processor/importers/ftrace/sched_event_tracker.cc
rename to src/trace_processor/importers/ftrace/ftrace_sched_event_tracker.cc
index 09459c4..36bf028 100644
--- a/src/trace_processor/importers/ftrace/sched_event_tracker.cc
+++ b/src/trace_processor/importers/ftrace/ftrace_sched_event_tracker.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "src/trace_processor/importers/ftrace/sched_event_tracker.h"
+#include "src/trace_processor/importers/ftrace/ftrace_sched_event_tracker.h"
 
 #include <math.h>
 
@@ -22,9 +22,11 @@
 #include "src/trace_processor/importers/common/args_tracker.h"
 #include "src/trace_processor/importers/common/event_tracker.h"
 #include "src/trace_processor/importers/common/process_tracker.h"
+#include "src/trace_processor/importers/common/sched_event_tracker.h"
+#include "src/trace_processor/importers/common/sched_event_state.h"
 #include "src/trace_processor/importers/common/system_info_tracker.h"
+#include "src/trace_processor/importers/common/thread_state_tracker.h"
 #include "src/trace_processor/importers/ftrace/ftrace_descriptors.h"
-#include "src/trace_processor/importers/ftrace/thread_state_tracker.h"
 #include "src/trace_processor/storage/stats.h"
 #include "src/trace_processor/types/task_state.h"
 #include "src/trace_processor/types/trace_processor_context.h"
@@ -36,9 +38,8 @@
 namespace perfetto {
 namespace trace_processor {
 
-SchedEventTracker::SchedEventTracker(TraceProcessorContext* context)
-    : waker_utid_id_(context->storage->InternString("waker_utid")),
-      context_(context) {
+FtraceSchedEventTracker::FtraceSchedEventTracker(TraceProcessorContext* context)
+    : context_(context) {
   // pre-parse sched_switch
   auto* switch_descriptor = GetMessageDescriptorForId(
       protos::pbzero::FtraceEvent::kSchedSwitchFieldNumber);
@@ -60,15 +61,11 @@
         context->storage->InternString(waking_descriptor->fields[i].name);
   }
   sched_waking_id_ = context->storage->InternString(waking_descriptor->name);
-
-  // Pre-allocate space for 128 CPUs, which should be enough for most hosts.
-  // It's OK if this number is too small, the vector will be grown on-demand.
-  pending_sched_per_cpu_.reserve(128);
 }
 
-SchedEventTracker::~SchedEventTracker() = default;
+FtraceSchedEventTracker::~FtraceSchedEventTracker() = default;
 
-void SchedEventTracker::PushSchedSwitch(uint32_t cpu,
+void FtraceSchedEventTracker::PushSchedSwitch(uint32_t cpu,
                                         int64_t ts,
                                         uint32_t prev_pid,
                                         base::StringView prev_comm,
@@ -77,16 +74,10 @@
                                         uint32_t next_pid,
                                         base::StringView next_comm,
                                         int32_t next_prio) {
-  // At this stage all events should be globally timestamp ordered.
-  if (ts < context_->event_tracker->max_timestamp()) {
-    PERFETTO_ELOG(
-        "sched_switch event out of order by %.4f ms, skipping",
-        static_cast<double>(context_->event_tracker->max_timestamp() - ts) /
-            1e6);
-    context_->storage->IncrementStats(stats::sched_switch_out_of_order);
+  if (!context_->sched_event_tracker->UpdateEventTrackerTimestamp(ts,
+      "sched_switch",stats::sched_switch_out_of_order)) {
     return;
   }
-  context_->event_tracker->UpdateMaxTimestamp(ts);
 
   StringId next_comm_id = context_->storage->InternString(next_comm);
   UniqueTid next_utid = context_->process_tracker->UpdateThreadName(
@@ -94,16 +85,18 @@
 
   // First use this data to close the previous slice.
   bool prev_pid_match_prev_next_pid = false;
-  auto* pending_sched = PendingSchedByCPU(cpu);
+  auto* pending_sched = sched_event_state_.GetPendingSchedInfoForCpu(cpu);
   uint32_t pending_slice_idx = pending_sched->pending_slice_storage_idx;
-  StringId prev_state_string_id = TaskStateToStringId(prev_state);
+  StringId prev_state_string_id = context_->sched_event_tracker
+                                      ->TaskStateToStringId(prev_state);
   if (prev_state_string_id == kNullStringId) {
     context_->storage->IncrementStats(stats::task_state_invalid);
   }
   if (pending_slice_idx < std::numeric_limits<uint32_t>::max()) {
     prev_pid_match_prev_next_pid = prev_pid == pending_sched->last_pid;
     if (PERFETTO_LIKELY(prev_pid_match_prev_next_pid)) {
-      ClosePendingSlice(pending_slice_idx, ts, prev_state_string_id);
+      context_->sched_event_tracker->ClosePendingSlice(pending_slice_idx, ts,
+          prev_state_string_id);
     } else {
       // If the pids are not consistent, make a note of this.
       context_->storage->IncrementStats(stats::mismatched_sched_switch_tids);
@@ -117,9 +110,11 @@
   UniqueTid prev_utid = context_->process_tracker->UpdateThreadName(
       prev_pid, prev_comm_id, ThreadNamePriority::kFtrace);
 
-  auto new_slice_idx = AddRawEventAndStartSlice(
-      cpu, ts, prev_utid, prev_pid, prev_comm_id, prev_prio, prev_state,
-      next_utid, next_pid, next_comm_id, next_prio);
+  AddRawSchedSwitchEvent(cpu, ts, prev_utid, prev_pid, prev_comm_id, prev_prio,
+                         prev_state, next_pid, next_comm_id, next_prio);
+
+  auto new_slice_idx = context_->sched_event_tracker
+                           ->AddStartSlice(cpu, ts, next_utid, next_prio);
 
   // Finally, update the info for the next sched switch on this CPU.
   pending_sched->pending_slice_storage_idx = new_slice_idx;
@@ -132,27 +127,21 @@
       ts, cpu, prev_utid, prev_state_string_id, next_utid);
 }
 
-void SchedEventTracker::PushSchedSwitchCompact(uint32_t cpu,
-                                               int64_t ts,
-                                               int64_t prev_state,
-                                               uint32_t next_pid,
-                                               int32_t next_prio,
-                                               StringId next_comm_id) {
-  // At this stage all events should be globally timestamp ordered.
-  if (ts < context_->event_tracker->max_timestamp()) {
-    PERFETTO_ELOG(
-        "sched_switch event out of order by %.4f ms, skipping",
-        static_cast<double>(context_->event_tracker->max_timestamp() - ts) /
-            1e6);
-    context_->storage->IncrementStats(stats::sched_switch_out_of_order);
+void FtraceSchedEventTracker::PushSchedSwitchCompact(uint32_t cpu,
+                                                     int64_t ts,
+                                                     int64_t prev_state,
+                                                     uint32_t next_pid,
+                                                     int32_t next_prio,
+                                                     StringId next_comm_id) {
+  if (!context_->sched_event_tracker->UpdateEventTrackerTimestamp(ts, 
+      "sched_switch", stats::sched_switch_out_of_order)) {
     return;
   }
-  context_->event_tracker->UpdateMaxTimestamp(ts);
 
   UniqueTid next_utid = context_->process_tracker->UpdateThreadName(
       next_pid, next_comm_id, ThreadNamePriority::kFtrace);
 
-  auto* pending_sched = PendingSchedByCPU(cpu);
+  auto* pending_sched = sched_event_state_.GetPendingSchedInfoForCpu(cpu);
 
   // If we're processing the first compact event for this cpu, don't start a
   // slice since we're missing the "prev_*" fields. The successive events will
@@ -172,12 +161,14 @@
   // Close the pending slice if any (we won't have one when processing the first
   // two compact events for a given cpu).
   uint32_t pending_slice_idx = pending_sched->pending_slice_storage_idx;
-  StringId prev_state_string_id = TaskStateToStringId(prev_state);
+  StringId prev_state_string_id = context_->sched_event_tracker
+                                      ->TaskStateToStringId(prev_state);
   if (prev_state_string_id == kNullStringId) {
     context_->storage->IncrementStats(stats::task_state_invalid);
   }
   if (pending_slice_idx < std::numeric_limits<uint32_t>::max())
-    ClosePendingSlice(pending_slice_idx, ts, prev_state_string_id);
+    context_->sched_event_tracker->ClosePendingSlice(pending_slice_idx, ts,
+        prev_state_string_id);
 
   // Use the previous event's values to infer this event's "prev_*" fields.
   // There are edge cases, but this assumption should still produce sensible
@@ -192,9 +183,10 @@
       context_->storage->thread_table().name()[prev_utid].value_or(
           kNullStringId);
 
-  auto new_slice_idx = AddRawEventAndStartSlice(
-      cpu, ts, prev_utid, prev_pid, prev_comm_id, prev_prio, prev_state,
-      next_utid, next_pid, next_comm_id, next_prio);
+  AddRawSchedSwitchEvent(cpu, ts, prev_utid, prev_pid, prev_comm_id, prev_prio,
+      prev_state, next_pid, next_comm_id, next_prio);
+  auto new_slice_idx = context_->sched_event_tracker
+                           ->AddStartSlice(cpu, ts, next_utid, next_prio);
 
   // Finally, update the info for the next sched switch on this CPU.
   pending_sched->pending_slice_storage_idx = new_slice_idx;
@@ -209,30 +201,24 @@
 
 // Processes a sched_waking that was decoded from a compact representation,
 // adding to the raw and instants tables.
-void SchedEventTracker::PushSchedWakingCompact(uint32_t cpu,
-                                               int64_t ts,
-                                               uint32_t wakee_pid,
-                                               uint16_t target_cpu,
-                                               uint16_t prio,
-                                               StringId comm_id,
-                                               uint16_t common_flags) {
-  // At this stage all events should be globally timestamp ordered.
-  if (ts < context_->event_tracker->max_timestamp()) {
-    PERFETTO_ELOG(
-        "sched_waking event out of order by %.4f ms, skipping",
-        static_cast<double>(context_->event_tracker->max_timestamp() - ts) /
-            1e6);
-    context_->storage->IncrementStats(stats::sched_waking_out_of_order);
+void FtraceSchedEventTracker::PushSchedWakingCompact(uint32_t cpu,
+                                                     int64_t ts,
+                                                     uint32_t wakee_pid,
+                                                     uint16_t target_cpu,
+                                                     uint16_t prio,
+                                                     StringId comm_id,
+                                                     uint16_t common_flags) {
+  if (!context_->sched_event_tracker->UpdateEventTrackerTimestamp(ts,
+      "sched_waking", stats::sched_waking_out_of_order)) {
     return;
   }
-  context_->event_tracker->UpdateMaxTimestamp(ts);
 
   // We infer the task that emitted the event (i.e. common_pid) from the
   // scheduling slices. Drop the event if we haven't seen any sched_switch
   // events for this cpu yet.
   // Note that if sched_switch wasn't enabled, we will have to skip all
   // compact waking events.
-  auto* pending_sched = PendingSchedByCPU(cpu);
+  auto* pending_sched = sched_event_state_.GetPendingSchedInfoForCpu(cpu);
   if (pending_sched->last_utid == std::numeric_limits<UniqueTid>::max()) {
     context_->storage->IncrementStats(stats::compact_sched_waking_skipped);
     return;
@@ -269,14 +255,13 @@
 }
 
 PERFETTO_ALWAYS_INLINE
-uint32_t SchedEventTracker::AddRawEventAndStartSlice(uint32_t cpu,
+void FtraceSchedEventTracker::AddRawSchedSwitchEvent(uint32_t cpu,
                                                      int64_t ts,
                                                      UniqueTid prev_utid,
                                                      uint32_t prev_pid,
                                                      StringId prev_comm_id,
                                                      int32_t prev_prio,
                                                      int64_t prev_state,
-                                                     UniqueTid next_utid,
                                                      uint32_t next_pid,
                                                      StringId next_comm_id,
                                                      int32_t next_prio) {
@@ -305,42 +290,6 @@
     add_raw_arg(SS::kNextPidFieldNumber, Variadic::Integer(next_pid));
     add_raw_arg(SS::kNextPrioFieldNumber, Variadic::Integer(next_prio));
   }
-
-  // Open a new scheduling slice, corresponding to the task that was
-  // just switched to. Set the duration to -1, to indicate that the event is not
-  // finished. Duration will be updated later after event finish.
-  auto* sched = context_->storage->mutable_sched_slice_table();
-  auto row_and_id = sched->Insert(
-      {ts, /* duration */ -1, cpu, next_utid, kNullStringId, next_prio});
-  SchedId sched_id = row_and_id.id;
-  return *sched->id().IndexOf(sched_id);
-}
-
-StringId SchedEventTracker::TaskStateToStringId(int64_t task_state_int) {
-  using ftrace_utils::TaskState;
-
-  std::optional<VersionNumber> kernel_version =
-      SystemInfoTracker::GetOrCreate(context_)->GetKernelVersion();
-  TaskState task_state = TaskState::FromRawPrevState(
-      static_cast<uint16_t>(task_state_int), kernel_version);
-  return task_state.is_valid()
-             ? context_->storage->InternString(task_state.ToString().data())
-             : kNullStringId;
-}
-
-PERFETTO_ALWAYS_INLINE
-void SchedEventTracker::ClosePendingSlice(uint32_t pending_slice_idx,
-                                          int64_t ts,
-                                          StringId prev_state) {
-  auto* slices = context_->storage->mutable_sched_slice_table();
-
-  int64_t duration = ts - slices->ts()[pending_slice_idx];
-  slices->mutable_dur()->Set(pending_slice_idx, duration);
-
-  // We store the state as a uint16 as we only consider values up to 2048
-  // when unpacking the information inside; this allows savings of 48 bits
-  // per slice.
-  slices->mutable_end_state()->Set(pending_slice_idx, prev_state);
 }
 
 }  // namespace trace_processor
diff --git a/src/trace_processor/importers/ftrace/ftrace_sched_event_tracker.h b/src/trace_processor/importers/ftrace/ftrace_sched_event_tracker.h
new file mode 100644
index 0000000..bc90532
--- /dev/null
+++ b/src/trace_processor/importers/ftrace/ftrace_sched_event_tracker.h
@@ -0,0 +1,115 @@
+/*
+ * Copyright (C) 2019 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef SRC_TRACE_PROCESSOR_IMPORTERS_FTRACE_FTRACE_SCHED_EVENT_TRACKER_H_
+#define SRC_TRACE_PROCESSOR_IMPORTERS_FTRACE_FTRACE_SCHED_EVENT_TRACKER_H_
+
+#include <array>
+#include <limits>
+
+#include "perfetto/ext/base/string_view.h"
+#include "perfetto/ext/base/utils.h"
+#include "src/trace_processor/importers/common/sched_event_tracker.h"
+#include "src/trace_processor/importers/common/sched_event_state.h"
+#include "src/trace_processor/storage/trace_storage.h"
+#include "src/trace_processor/types/destructible.h"
+#include "src/trace_processor/types/trace_processor_context.h"
+
+namespace perfetto {
+namespace trace_processor {
+
+class EventTracker;
+
+// Tracks sched events and stores them into the storage as sched slices.
+class FtraceSchedEventTracker : public Destructible {
+ public:
+  explicit FtraceSchedEventTracker(TraceProcessorContext*);
+  ~FtraceSchedEventTracker() override;
+
+  FtraceSchedEventTracker(
+      const FtraceSchedEventTracker& ftrace_sched_event_tracker) = delete;
+  FtraceSchedEventTracker& operator=(
+      const FtraceSchedEventTracker& ftrace_sched_event_tracker) = delete;
+
+  static FtraceSchedEventTracker* GetOrCreate(TraceProcessorContext* context) {
+    if (!context->ftrace_sched_tracker) {
+      context->ftrace_sched_tracker.reset(new FtraceSchedEventTracker(context));
+    }
+    return static_cast<FtraceSchedEventTracker*>(
+        context->ftrace_sched_tracker.get());
+  }
+
+  // This method is called when a sched_switch event is seen in the trace.
+  // Virtual for testing.
+  virtual void PushSchedSwitch(uint32_t cpu,
+                               int64_t timestamp,
+                               uint32_t prev_pid,
+                               base::StringView prev_comm,
+                               int32_t prev_prio,
+                               int64_t prev_state,
+                               uint32_t next_pid,
+                               base::StringView next_comm,
+                               int32_t next_prio);
+
+  void AddRawSchedSwitchEvent(uint32_t cpu,
+                              int64_t ts,
+                              UniqueTid prev_utid,
+                              uint32_t prev_pid,
+                              StringId prev_comm_id,
+                              int32_t prev_prio,
+                              int64_t prev_state,
+                              uint32_t next_pid,
+                              StringId next_comm_id,
+                              int32_t next_prio);
+
+  // This method is called when parsing a sched_switch encoded in the compact
+  // format.
+  void PushSchedSwitchCompact(uint32_t cpu,
+                              int64_t ts,
+                              int64_t prev_state,
+                              uint32_t next_pid,
+                              int32_t next_prio,
+                              StringId next_comm_id);
+
+  // This method is called when parsing a sched_waking encoded in the compact
+  // format. Note that the default encoding is handled by
+  // |EventTracker::PushInstant|.
+  void PushSchedWakingCompact(uint32_t cpu,
+                              int64_t ts,
+                              uint32_t wakee_pid,
+                              uint16_t target_cpu,
+                              uint16_t prio,
+                              StringId comm_id,
+                              uint16_t common_flags);
+
+ private:
+  static constexpr uint8_t kSchedSwitchMaxFieldId = 7;
+  std::array<StringId, kSchedSwitchMaxFieldId + 1> sched_switch_field_ids_;
+  StringId sched_switch_id_;
+
+  static constexpr uint8_t kSchedWakingMaxFieldId = 5;
+  std::array<StringId, kSchedWakingMaxFieldId + 1> sched_waking_field_ids_;
+  StringId sched_waking_id_;
+
+  TraceProcessorContext* const context_;
+
+  SchedEventState sched_event_state_;
+};
+
+}  // namespace trace_processor
+}  // namespace perfetto
+
+#endif  // SRC_TRACE_PROCESSOR_IMPORTERS_FTRACE_FTRACE_SCHED_EVENT_TRACKER_H_
diff --git a/src/trace_processor/importers/ftrace/sched_event_tracker_unittest.cc b/src/trace_processor/importers/ftrace/ftrace_sched_event_tracker_unittest.cc
similarity index 93%
rename from src/trace_processor/importers/ftrace/sched_event_tracker_unittest.cc
rename to src/trace_processor/importers/ftrace/ftrace_sched_event_tracker_unittest.cc
index 5d275d6..a085078 100644
--- a/src/trace_processor/importers/ftrace/sched_event_tracker_unittest.cc
+++ b/src/trace_processor/importers/ftrace/ftrace_sched_event_tracker_unittest.cc
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2020 The Android Open Source Project
+ * Copyright (C) 2023 The Android Open Source Project
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -14,11 +14,12 @@
  * limitations under the License.
  */
 
-#include "src/trace_processor/importers/ftrace/sched_event_tracker.h"
+#include "src/trace_processor/importers/ftrace/ftrace_sched_event_tracker.h"
 
 #include "perfetto/base/logging.h"
 #include "src/trace_processor/importers/common/args_tracker.h"
 #include "src/trace_processor/importers/common/event_tracker.h"
+#include "src/trace_processor/importers/common/sched_event_tracker.h"
 #include "src/trace_processor/importers/common/process_tracker.h"
 #include "test/gtest_and_gmock.h"
 
@@ -39,12 +40,13 @@
     context.args_tracker.reset(new ArgsTracker(&context));
     context.event_tracker.reset(new EventTracker(&context));
     context.process_tracker.reset(new ProcessTracker(&context));
-    sched_tracker = SchedEventTracker::GetOrCreate(&context);
+    context.sched_event_tracker.reset(new SchedEventTracker(&context));
+    sched_tracker = FtraceSchedEventTracker::GetOrCreate(&context);
   }
 
  protected:
   TraceProcessorContext context;
-  SchedEventTracker* sched_tracker;
+  FtraceSchedEventTracker* sched_tracker;
 };
 
 TEST_F(SchedEventTrackerTest, InsertSecondSched) {
diff --git a/src/trace_processor/importers/ftrace/sched_event_tracker.h b/src/trace_processor/importers/ftrace/sched_event_tracker.h
deleted file mode 100644
index 745b7c3..0000000
--- a/src/trace_processor/importers/ftrace/sched_event_tracker.h
+++ /dev/null
@@ -1,139 +0,0 @@
-/*
- * Copyright (C) 2019 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef SRC_TRACE_PROCESSOR_IMPORTERS_FTRACE_SCHED_EVENT_TRACKER_H_
-#define SRC_TRACE_PROCESSOR_IMPORTERS_FTRACE_SCHED_EVENT_TRACKER_H_
-
-#include <array>
-#include <limits>
-
-#include "perfetto/ext/base/string_view.h"
-#include "perfetto/ext/base/utils.h"
-#include "src/trace_processor/storage/trace_storage.h"
-#include "src/trace_processor/types/destructible.h"
-#include "src/trace_processor/types/trace_processor_context.h"
-
-namespace perfetto {
-namespace trace_processor {
-
-class EventTracker;
-
-// Tracks sched events and stores them into the storage as sched slices.
-class SchedEventTracker : public Destructible {
- public:
-  // Declared public for testing only.
-  explicit SchedEventTracker(TraceProcessorContext*);
-  SchedEventTracker(const SchedEventTracker&) = delete;
-  SchedEventTracker& operator=(const SchedEventTracker&) = delete;
-  ~SchedEventTracker() override;
-  static SchedEventTracker* GetOrCreate(TraceProcessorContext* context) {
-    if (!context->sched_tracker) {
-      context->sched_tracker.reset(new SchedEventTracker(context));
-    }
-    return static_cast<SchedEventTracker*>(context->sched_tracker.get());
-  }
-
-  // This method is called when a sched_switch event is seen in the trace.
-  // Virtual for testing.
-  virtual void PushSchedSwitch(uint32_t cpu,
-                               int64_t timestamp,
-                               uint32_t prev_pid,
-                               base::StringView prev_comm,
-                               int32_t prev_prio,
-                               int64_t prev_state,
-                               uint32_t next_pid,
-                               base::StringView next_comm,
-                               int32_t next_prio);
-
-  // This method is called when parsing a sched_switch encoded in the compact
-  // format.
-  void PushSchedSwitchCompact(uint32_t cpu,
-                              int64_t ts,
-                              int64_t prev_state,
-                              uint32_t next_pid,
-                              int32_t next_prio,
-                              StringId next_comm_id);
-
-  // This method is called when parsing a sched_waking encoded in the compact
-  // format. Note that the default encoding is handled by
-  // |EventTracker::PushInstant|.
-  void PushSchedWakingCompact(uint32_t cpu,
-                              int64_t ts,
-                              uint32_t wakee_pid,
-                              uint16_t target_cpu,
-                              uint16_t prio,
-                              StringId comm_id,
-                              uint16_t common_flags);
-
- private:
-  // Information retained from the preceding sched_switch seen on a given cpu.
-  struct PendingSchedInfo {
-    // The pending scheduling slice that the next event will complete.
-    uint32_t pending_slice_storage_idx = std::numeric_limits<uint32_t>::max();
-
-    // pid/utid/prio corresponding to the last sched_switch seen on this cpu
-    // (its "next_*" fields). There is some duplication with respect to the
-    // slices storage, but we don't always have a slice when decoding events in
-    // the compact format.
-    uint32_t last_pid = std::numeric_limits<uint32_t>::max();
-    UniqueTid last_utid = std::numeric_limits<UniqueTid>::max();
-    int32_t last_prio = std::numeric_limits<int32_t>::max();
-  };
-
-  uint32_t AddRawEventAndStartSlice(uint32_t cpu,
-                                    int64_t ts,
-                                    UniqueTid prev_utid,
-                                    uint32_t prev_pid,
-                                    StringId prev_comm_id,
-                                    int32_t prev_prio,
-                                    int64_t prev_state,
-                                    UniqueTid next_utid,
-                                    uint32_t next_pid,
-                                    StringId next_comm_id,
-                                    int32_t next_prio);
-
-  StringId TaskStateToStringId(int64_t task_state);
-
-  void ClosePendingSlice(uint32_t slice_idx, int64_t ts, StringId prev_state);
-
-  // Information retained from the preceding sched_switch seen on a given cpu.
-  std::vector<PendingSchedInfo> pending_sched_per_cpu_;
-
-  // Get the sched info for the given CPU, resizing the vector if necessary.
-  PendingSchedInfo* PendingSchedByCPU(uint32_t cpu) {
-    if (PERFETTO_UNLIKELY(cpu >= pending_sched_per_cpu_.size())) {
-      pending_sched_per_cpu_.resize(cpu + 1);
-    }
-    return &pending_sched_per_cpu_[cpu];
-  }
-
-  static constexpr uint8_t kSchedSwitchMaxFieldId = 7;
-  std::array<StringId, kSchedSwitchMaxFieldId + 1> sched_switch_field_ids_;
-  StringId sched_switch_id_;
-
-  static constexpr uint8_t kSchedWakingMaxFieldId = 5;
-  std::array<StringId, kSchedWakingMaxFieldId + 1> sched_waking_field_ids_;
-  StringId sched_waking_id_;
-
-  StringId waker_utid_id_;
-
-  TraceProcessorContext* const context_;
-};
-
-}  // namespace trace_processor
-}  // namespace perfetto
-
-#endif  // SRC_TRACE_PROCESSOR_IMPORTERS_FTRACE_SCHED_EVENT_TRACKER_H_
diff --git a/src/trace_processor/importers/fuchsia/fuchsia_parser_unittest.cc b/src/trace_processor/importers/fuchsia/fuchsia_parser_unittest.cc
index d644742..9f90bed 100644
--- a/src/trace_processor/importers/fuchsia/fuchsia_parser_unittest.cc
+++ b/src/trace_processor/importers/fuchsia/fuchsia_parser_unittest.cc
@@ -31,7 +31,7 @@
 #include "src/trace_processor/importers/common/slice_tracker.h"
 #include "src/trace_processor/importers/common/stack_profile_tracker.h"
 #include "src/trace_processor/importers/common/track_tracker.h"
-#include "src/trace_processor/importers/ftrace/sched_event_tracker.h"
+#include "src/trace_processor/importers/ftrace/ftrace_sched_event_tracker.h"
 #include "src/trace_processor/importers/proto/additional_modules.h"
 #include "src/trace_processor/importers/proto/default_modules.h"
 #include "src/trace_processor/importers/proto/proto_trace_parser.h"
@@ -91,10 +91,10 @@
 using ::testing::Return;
 using ::testing::ReturnRef;
 using ::testing::UnorderedElementsAreArray;
-class MockSchedEventTracker : public SchedEventTracker {
+class MockSchedEventTracker : public FtraceSchedEventTracker {
  public:
   explicit MockSchedEventTracker(TraceProcessorContext* context)
-      : SchedEventTracker(context) {}
+      : FtraceSchedEventTracker(context) {}
 
   MOCK_METHOD(void,
               PushSchedSwitch,
@@ -244,7 +244,7 @@
     event_ = new MockEventTracker(&context_);
     context_.event_tracker.reset(event_);
     sched_ = new MockSchedEventTracker(&context_);
-    context_.sched_tracker.reset(sched_);
+    context_.ftrace_sched_tracker.reset(sched_);
     process_ = new NiceMock<MockProcessTracker>(&context_);
     context_.process_tracker.reset(process_);
     slice_ = new NiceMock<MockSliceTracker>(&context_);
diff --git a/src/trace_processor/importers/perf/BUILD.gn b/src/trace_processor/importers/perf/BUILD.gn
index 387468f..960cd27 100644
--- a/src/trace_processor/importers/perf/BUILD.gn
+++ b/src/trace_processor/importers/perf/BUILD.gn
@@ -28,6 +28,7 @@
   ]
   deps = [
     "../../../../gn:default_deps",
+    "../../../../protos/perfetto/trace/profiling:zero",
     "../../importers/common",
     "../../importers/common:parser_types",
     "../../sorter",
@@ -47,6 +48,8 @@
     ":perf",
     "../../../../gn:default_deps",
     "../../../../gn:gtest_and_gmock",
+    "../../../../protos/perfetto/trace/profiling:zero",
     "../../../base",
+    "../../importers/common",
   ]
 }
diff --git a/src/trace_processor/importers/perf/perf_data_parser.cc b/src/trace_processor/importers/perf/perf_data_parser.cc
index 11a5a13..bb79209 100644
--- a/src/trace_processor/importers/perf/perf_data_parser.cc
+++ b/src/trace_processor/importers/perf/perf_data_parser.cc
@@ -22,6 +22,7 @@
 #include "perfetto/base/logging.h"
 #include "perfetto/ext/base/string_utils.h"
 #include "perfetto/trace_processor/trace_blob_view.h"
+#include "src/trace_processor/importers/common/mapping_tracker.h"
 #include "src/trace_processor/importers/common/process_tracker.h"
 #include "src/trace_processor/importers/perf/perf_data_reader.h"
 #include "src/trace_processor/importers/perf/perf_data_tracker.h"
@@ -58,7 +59,8 @@
 
   // First instruction pointer in the callchain should be from kernel space, so
   // it shouldn't be available in mappings.
-  if (tracker_->FindMapping(*sample.pid, sample.callchain.front()).ok()) {
+  if (context_->mapping_tracker->FindUserMappingForAddress(
+          *sample.pid, sample.callchain.front())) {
     context_->storage->IncrementStats(stats::perf_samples_skipped);
     return;
   }
@@ -70,19 +72,22 @@
 
   std::vector<FramesTable::Row> frame_rows;
   for (uint32_t i = 1; i < sample.callchain.size(); i++) {
-    auto mapping = tracker_->FindMapping(*sample.pid, sample.callchain[i]);
-    if (!mapping.ok()) {
+    UserMemoryMapping* mapping =
+        context_->mapping_tracker->FindUserMappingForAddress(
+            *sample.pid, sample.callchain[i]);
+    if (!mapping) {
       context_->storage->IncrementStats(stats::perf_samples_skipped);
       return;
     }
     FramesTable::Row new_row;
     std::string mock_name =
-        base::StackString<1024>("%" PRIu64,
-                                sample.callchain[i] - mapping->start)
+        base::StackString<1024>(
+            "%" PRIu64, sample.callchain[i] - mapping->memory_range().start())
             .ToStdString();
     new_row.name = context_->storage->InternString(mock_name.c_str());
-    new_row.mapping = mapping->id;
-    new_row.rel_pc = static_cast<int64_t>(sample.callchain[i] - mapping->start);
+    new_row.mapping = mapping->mapping_id();
+    new_row.rel_pc =
+        static_cast<int64_t>(mapping->ToRelativePc(sample.callchain[i]));
     frame_rows.push_back(new_row);
   }
 
diff --git a/src/trace_processor/importers/perf/perf_data_tokenizer.cc b/src/trace_processor/importers/perf/perf_data_tokenizer.cc
index be1ec02..334148a 100644
--- a/src/trace_processor/importers/perf/perf_data_tokenizer.cc
+++ b/src/trace_processor/importers/perf/perf_data_tokenizer.cc
@@ -15,6 +15,7 @@
  */
 
 #include "src/trace_processor/importers/perf/perf_data_tokenizer.h"
+
 #include <cstdint>
 #include <cstring>
 #include <vector>
@@ -31,9 +32,29 @@
 #include "src/trace_processor/storage/stats.h"
 #include "src/trace_processor/util/status_macros.h"
 
+#include "protos/perfetto/trace/profiling/profile_packet.pbzero.h"
+
 namespace perfetto {
 namespace trace_processor {
 namespace perf_importer {
+namespace {
+protos::pbzero::Profiling::CpuMode GetCpuMode(const perf_event_header& header) {
+  switch (header.misc & kPerfRecordMiscCpumodeMask) {
+    case PERF_RECORD_MISC_KERNEL:
+      return protos::pbzero::Profiling::MODE_KERNEL;
+    case PERF_RECORD_MISC_USER:
+      return protos::pbzero::Profiling::MODE_USER;
+    case PERF_RECORD_MISC_HYPERVISOR:
+      return protos::pbzero::Profiling::MODE_HYPERVISOR;
+    case PERF_RECORD_MISC_GUEST_KERNEL:
+      return protos::pbzero::Profiling::MODE_GUEST_KERNEL;
+    case PERF_RECORD_MISC_GUEST_USER:
+      return protos::pbzero::Profiling::MODE_GUEST_USER;
+    default:
+      return protos::pbzero::Profiling::MODE_UNKNOWN;
+  }
+}
+}  // namespace
 
 PerfDataTokenizer::PerfDataTokenizer(TraceProcessorContext* ctx)
     : context_(ctx),
@@ -124,6 +145,7 @@
                        sizeof(PerfDataTracker::Mmap2Record::Numeric));
         auto record = ParseMmap2Record(record_size);
         RETURN_IF_ERROR(record.status());
+        record->cpu_mode = GetCpuMode(ev_header);
         tracker_->PushMmap2Record(*record);
         break;
       }
diff --git a/src/trace_processor/importers/perf/perf_data_tracker.cc b/src/trace_processor/importers/perf/perf_data_tracker.cc
index 0c9b209..c670258 100644
--- a/src/trace_processor/importers/perf/perf_data_tracker.cc
+++ b/src/trace_processor/importers/perf/perf_data_tracker.cc
@@ -15,12 +15,53 @@
  */
 
 #include "src/trace_processor/importers/perf/perf_data_tracker.h"
+
+#include <optional>
+
 #include "perfetto/base/status.h"
+#include "src/trace_processor/importers/common/address_range.h"
+#include "src/trace_processor/importers/common/mapping_tracker.h"
+#include "src/trace_processor/importers/common/process_tracker.h"
 #include "src/trace_processor/storage/stats.h"
+#include "src/trace_processor/storage/trace_storage.h"
+
+#include "protos/perfetto/trace/profiling/profile_packet.pbzero.h"
 
 namespace perfetto {
 namespace trace_processor {
 namespace perf_importer {
+namespace {
+
+bool IsInKernel(protos::pbzero::Profiling::CpuMode cpu_mode) {
+  switch (cpu_mode) {
+    case protos::pbzero::Profiling::MODE_UNKNOWN:
+      PERFETTO_CHECK(false);
+    case protos::pbzero::Profiling::MODE_GUEST_KERNEL:
+    case protos::pbzero::Profiling::MODE_KERNEL:
+      return true;
+    case protos::pbzero::Profiling::MODE_USER:
+    case protos::pbzero::Profiling::MODE_HYPERVISOR:
+    case protos::pbzero::Profiling::MODE_GUEST_USER:
+      return false;
+  }
+  PERFETTO_CHECK(false);
+}
+
+CreateMappingParams BuildCreateMappingParams(
+    PerfDataTracker::Mmap2Record record) {
+  return {AddressRange::FromStartAndSize(record.num.addr, record.num.len),
+          record.num.pgoff,
+          // start_offset: This is the offset into the file where the ELF header
+          // starts. We assume all file mappings are ELF files an thus this
+          // offset is 0.
+          0,
+          // load_bias: This can only be read out of the actual ELF file, which
+          // we do not have here, so we set it to 0. When symbolizing we will
+          // hopefully have the real load bias and we can compensate there for a
+          // possible mismatch.
+          0, record.filename, std::nullopt};
+}
+}  // namespace
 
 PerfDataTracker::~PerfDataTracker() = default;
 
@@ -48,31 +89,15 @@
 }
 
 void PerfDataTracker::PushMmap2Record(Mmap2Record record) {
-  const auto mappings =
-      context_->storage->mutable_stack_profile_mapping_table();
-  MappingTable::Row row;
-  row.start = static_cast<int64_t>(record.num.addr);
-  row.end = static_cast<int64_t>(record.num.addr + record.num.len);
-  row.name = context_->storage->InternString(record.filename.c_str());
-  MappingTable::Id id = mappings->Insert(row).id;
-  MmapRange mmap2_range{record.num.addr, record.num.addr + record.num.len, id};
-  mmap2_ranges_[record.num.pid].push_back(mmap2_range);
-}
-
-base::StatusOr<PerfDataTracker::MmapRange> PerfDataTracker::FindMapping(
-    uint32_t pid,
-    uint64_t ips) {
-  auto vec = mmap2_ranges_.Find(pid);
-  if (!vec) {
-    return base::ErrStatus("Sample pid not found in mappings.");
+  if (IsInKernel(record.cpu_mode)) {
+    context_->mapping_tracker->CreateKernelMemoryMapping(
+        BuildCreateMappingParams(std::move(record)));
+  } else {
+    UniquePid upid =
+        context_->process_tracker->GetOrCreateProcess(record.num.pid);
+    context_->mapping_tracker->CreateUserMemoryMapping(
+        upid, BuildCreateMappingParams(std::move(record)));
   }
-
-  for (const auto& range : *vec) {
-    if (ips >= range.start && ips < range.end) {
-      return range;
-    }
-  }
-  return base::ErrStatus("No mapping for callstack frame instruction pointer");
 }
 
 base::StatusOr<PerfDataTracker::PerfSample> PerfDataTracker::ParseSample(
diff --git a/src/trace_processor/importers/perf/perf_data_tracker.h b/src/trace_processor/importers/perf/perf_data_tracker.h
index 0ab99aa..11258ed 100644
--- a/src/trace_processor/importers/perf/perf_data_tracker.h
+++ b/src/trace_processor/importers/perf/perf_data_tracker.h
@@ -25,6 +25,7 @@
 #include "perfetto/ext/base/flat_hash_map.h"
 #include "perfetto/ext/base/status_or.h"
 #include "perfetto/ext/base/string_utils.h"
+#include "protos/perfetto/trace/profiling/profile_packet.pbzero.h"
 #include "src/trace_processor/importers/perf/perf_data_reader.h"
 #include "src/trace_processor/importers/perf/perf_event.h"
 #include "src/trace_processor/storage/trace_storage.h"
@@ -76,14 +77,10 @@
       uint32_t prot;
       uint32_t flags;
     };
+    protos::pbzero::Profiling::CpuMode cpu_mode;
     Numeric num;
     std::string filename;
   };
-  struct MmapRange {
-    uint64_t start;
-    uint64_t end;
-    MappingTable::Id id;
-  };
 
   PerfDataTracker(const PerfDataTracker&) = delete;
   PerfDataTracker& operator=(const PerfDataTracker&) = delete;
@@ -103,14 +100,11 @@
   base::StatusOr<PerfSample> ParseSample(
       perfetto::trace_processor::perf_importer::Reader&);
 
-  base::StatusOr<MmapRange> FindMapping(uint32_t pid, uint64_t ips);
-
  private:
   const perf_event_attr* FindAttrWithId(uint64_t id) const;
   TraceProcessorContext* context_;
   std::vector<AttrAndIds> attrs_;
 
-  base::FlatHashMap</*pid=*/uint32_t, std::vector<MmapRange>> mmap2_ranges_;
   uint64_t common_sample_type_;
 };
 }  // namespace perf_importer
diff --git a/src/trace_processor/importers/perf/perf_data_tracker_unittest.cc b/src/trace_processor/importers/perf/perf_data_tracker_unittest.cc
index 6c59be6..3cbdc80 100644
--- a/src/trace_processor/importers/perf/perf_data_tracker_unittest.cc
+++ b/src/trace_processor/importers/perf/perf_data_tracker_unittest.cc
@@ -22,16 +22,35 @@
 #include <vector>
 
 #include "perfetto/base/build_config.h"
+#include "protos/perfetto/trace/profiling/profile_packet.pbzero.h"
+#include "src/trace_processor/importers/common/address_range.h"
+#include "src/trace_processor/importers/common/mapping_tracker.h"
+#include "src/trace_processor/importers/common/process_tracker.h"
+#include "src/trace_processor/importers/common/stack_profile_tracker.h"
 #include "src/trace_processor/importers/perf/perf_event.h"
 #include "test/gtest_and_gmock.h"
 
 namespace perfetto {
 namespace trace_processor {
 namespace perf_importer {
+namespace {
 
-TEST(PerfDataTrackerUnittest, ComputeCommonSampleType) {
-  TraceProcessorContext context;
-  PerfDataTracker* tracker = PerfDataTracker::GetOrCreate(&context);
+class PerfDataTrackerUnittest : public testing::Test {
+ public:
+  PerfDataTrackerUnittest() {
+    context_.storage = std::make_unique<TraceStorage>();
+    context_.process_tracker = std::make_unique<ProcessTracker>(&context_);
+    context_.stack_profile_tracker =
+        std::make_unique<StackProfileTracker>(&context_);
+    context_.mapping_tracker = std::make_unique<MappingTracker>(&context_);
+  }
+
+ protected:
+  TraceProcessorContext context_;
+};
+
+TEST_F(PerfDataTrackerUnittest, ComputeCommonSampleType) {
+  PerfDataTracker* tracker = PerfDataTracker::GetOrCreate(&context_);
 
   PerfDataTracker::AttrAndIds attr_and_ids;
   attr_and_ids.attr.sample_type =
@@ -46,16 +65,15 @@
   EXPECT_FALSE(tracker->common_sample_type() & PERF_SAMPLE_CALLCHAIN);
 }
 
-TEST(PerfDataTrackerUnittest, FindMapping) {
-  TraceProcessorContext context;
-  context.storage = std::make_unique<TraceStorage>();
-  PerfDataTracker* tracker = PerfDataTracker::GetOrCreate(&context);
+TEST_F(PerfDataTrackerUnittest, FindMapping) {
+  PerfDataTracker* tracker = PerfDataTracker::GetOrCreate(&context_);
 
   PerfDataTracker::Mmap2Record rec;
   rec.filename = "file1";
   rec.num.addr = 1000;
   rec.num.len = 100;
   rec.num.pid = 1;
+  rec.cpu_mode = protos::pbzero::Profiling::MODE_USER;
   tracker->PushMmap2Record(rec);
 
   rec.num.addr = 2000;
@@ -64,32 +82,33 @@
   rec.num.addr = 3000;
   tracker->PushMmap2Record(rec);
 
-  auto res_status = tracker->FindMapping(1, 2050);
-  EXPECT_TRUE(res_status.ok());
-  EXPECT_EQ(res_status->start, 2000u);
-  EXPECT_EQ(res_status->end, 2100u);
+  UserMemoryMapping* mapping =
+      context_.mapping_tracker->FindUserMappingForAddress(
+          context_.process_tracker->GetOrCreateProcess(1), 2050);
+  ASSERT_NE(mapping, nullptr);
+  EXPECT_EQ(mapping->memory_range().start(), 2000u);
+  EXPECT_EQ(mapping->memory_range().end(), 2100u);
 }
 
-TEST(PerfDataTrackerUnittest, FindMappingFalse) {
-  TraceProcessorContext context;
-  context.storage = std::make_unique<TraceStorage>();
-  PerfDataTracker* tracker = PerfDataTracker::GetOrCreate(&context);
+TEST_F(PerfDataTrackerUnittest, FindMappingFalse) {
+  PerfDataTracker* tracker = PerfDataTracker::GetOrCreate(&context_);
 
   PerfDataTracker::Mmap2Record rec;
   rec.filename = "file1";
   rec.num.addr = 1000;
   rec.num.len = 100;
   rec.num.pid = 1;
+  rec.cpu_mode = protos::pbzero::Profiling::MODE_USER;
   tracker->PushMmap2Record(rec);
 
-  auto res_status = tracker->FindMapping(2, 2050);
-  EXPECT_FALSE(res_status.ok());
+  UserMemoryMapping* mapping =
+      context_.mapping_tracker->FindUserMappingForAddress(
+          context_.process_tracker->GetOrCreateProcess(2), 2050);
+  EXPECT_EQ(mapping, nullptr);
 }
 
-TEST(PerfDataTrackerUnittest, ParseSampleTrivial) {
-  TraceProcessorContext context;
-  context.storage = std::make_unique<TraceStorage>();
-  PerfDataTracker* tracker = PerfDataTracker::GetOrCreate(&context);
+TEST_F(PerfDataTrackerUnittest, ParseSampleTrivial) {
+  PerfDataTracker* tracker = PerfDataTracker::GetOrCreate(&context_);
 
   PerfDataTracker::AttrAndIds attr_and_ids;
   attr_and_ids.attr.sample_type = PERF_SAMPLE_TIME;
@@ -107,10 +126,8 @@
   EXPECT_EQ(parsed_sample->ts, 100u);
 }
 
-TEST(PerfDataTrackerUnittest, ParseSampleCallchain) {
-  TraceProcessorContext context;
-  context.storage = std::make_unique<TraceStorage>();
-  PerfDataTracker* tracker = PerfDataTracker::GetOrCreate(&context);
+TEST_F(PerfDataTrackerUnittest, ParseSampleCallchain) {
+  PerfDataTracker* tracker = PerfDataTracker::GetOrCreate(&context_);
 
   PerfDataTracker::AttrAndIds attr_and_ids;
   attr_and_ids.attr.sample_type = PERF_SAMPLE_CALLCHAIN;
@@ -137,10 +154,8 @@
   EXPECT_EQ(parsed_sample->callchain.size(), 3u);
 }
 
-TEST(PerfDataTrackerUnittest, ParseSampleWithoutId) {
-  TraceProcessorContext context;
-  context.storage = std::make_unique<TraceStorage>();
-  PerfDataTracker* tracker = PerfDataTracker::GetOrCreate(&context);
+TEST_F(PerfDataTrackerUnittest, ParseSampleWithoutId) {
+  PerfDataTracker* tracker = PerfDataTracker::GetOrCreate(&context_);
 
   PerfDataTracker::AttrAndIds attr_and_ids;
   attr_and_ids.attr.sample_type = PERF_SAMPLE_TID | PERF_SAMPLE_TIME |
@@ -177,10 +192,8 @@
   EXPECT_EQ(sample.ts, parsed_sample->ts);
 }
 
-TEST(PerfDataTrackerUnittest, ParseSampleWithId) {
-  TraceProcessorContext context;
-  context.storage = std::make_unique<TraceStorage>();
-  PerfDataTracker* tracker = PerfDataTracker::GetOrCreate(&context);
+TEST_F(PerfDataTrackerUnittest, ParseSampleWithId) {
+  PerfDataTracker* tracker = PerfDataTracker::GetOrCreate(&context_);
 
   PerfDataTracker::AttrAndIds attr_and_ids;
   attr_and_ids.attr.sample_type = PERF_SAMPLE_CPU | PERF_SAMPLE_TID |
@@ -222,6 +235,7 @@
   EXPECT_EQ(100u, parsed_sample->ts);
 }
 
+}  // namespace
 }  // namespace perf_importer
 }  // namespace trace_processor
 }  // namespace perfetto
diff --git a/src/trace_processor/importers/perf/perf_event.h b/src/trace_processor/importers/perf/perf_event.h
index 53f5f05..c60709f 100644
--- a/src/trace_processor/importers/perf/perf_event.h
+++ b/src/trace_processor/importers/perf/perf_event.h
@@ -223,4 +223,15 @@
   PERF_SAMPLE_MAX = 1U << 25, /* non-ABI */
 };
 
+constexpr auto kPerfRecordMiscCpumodeMask = 0x7;
+
+enum perf_record_misc {
+  PERF_RECORD_MISC_CPUMODE_UNKNOWN = 0,
+  PERF_RECORD_MISC_KERNEL = 1,
+  PERF_RECORD_MISC_USER = 2,
+  PERF_RECORD_MISC_HYPERVISOR = 3,
+  PERF_RECORD_MISC_GUEST_KERNEL = 4,
+  PERF_RECORD_MISC_GUEST_USER = 5,
+};
+
 #endif  // SRC_TRACE_PROCESSOR_IMPORTERS_PERF_PERF_EVENT_H_
diff --git a/src/trace_processor/importers/proto/BUILD.gn b/src/trace_processor/importers/proto/BUILD.gn
index 2eaa5fe..2e542d0 100644
--- a/src/trace_processor/importers/proto/BUILD.gn
+++ b/src/trace_processor/importers/proto/BUILD.gn
@@ -91,9 +91,9 @@
     "../../storage",
     "../../tables",
     "../../types",
+    "../../util:build_id",
     "../../util:gzip",
     "../../util:profiler_util",
-    "../../util:stack_traces_util",
     "../common",
     "../common:parser_types",
     "../ftrace:minimal",
@@ -181,7 +181,6 @@
     "../../util:profiler_util",
     "../../util:proto_profiler",
     "../../util:proto_to_args_parser",
-    "../../util:stack_traces_util",
     "../common",
     "../common:parser_types",
     "../etw:full",
@@ -276,7 +275,6 @@
     "../../types",
     "../../util:descriptors",
     "../../util:profiler_util",
-    "../../util:stack_traces_util",
     "../common",
     "../ftrace:full",
   ]
diff --git a/src/trace_processor/importers/proto/chrome_system_probes_parser.h b/src/trace_processor/importers/proto/chrome_system_probes_parser.h
index b011fe5..ca68814 100644
--- a/src/trace_processor/importers/proto/chrome_system_probes_parser.h
+++ b/src/trace_processor/importers/proto/chrome_system_probes_parser.h
@@ -43,7 +43,7 @@
   // Maps a proto field number for memcounters in ProcessStats::Process to
   // their StringId. Keep kProcStatsProcessSize equal to 1 + max proto field
   // id of ProcessStats::Process. Also update SystemProbesParser.
-  static constexpr size_t kProcStatsProcessSize = 21;
+  static constexpr size_t kProcStatsProcessSize = 23;
   std::array<StringId, kProcStatsProcessSize> proc_stats_process_names_{};
 };
 
diff --git a/src/trace_processor/importers/proto/profile_module.cc b/src/trace_processor/importers/proto/profile_module.cc
index 61596d3..b80cae7 100644
--- a/src/trace_processor/importers/proto/profile_module.cc
+++ b/src/trace_processor/importers/proto/profile_module.cc
@@ -24,6 +24,7 @@
 #include "src/trace_processor/importers/common/clock_tracker.h"
 #include "src/trace_processor/importers/common/deobfuscation_mapping_table.h"
 #include "src/trace_processor/importers/common/event_tracker.h"
+#include "src/trace_processor/importers/common/mapping_tracker.h"
 #include "src/trace_processor/importers/common/process_tracker.h"
 #include "src/trace_processor/importers/common/stack_profile_tracker.h"
 #include "src/trace_processor/importers/proto/packet_sequence_state.h"
@@ -36,8 +37,8 @@
 #include "src/trace_processor/storage/trace_storage.h"
 #include "src/trace_processor/tables/profiler_tables_py.h"
 #include "src/trace_processor/types/trace_processor_context.h"
+#include "src/trace_processor/util/build_id.h"
 #include "src/trace_processor/util/profiler_util.h"
-#include "src/trace_processor/util/stack_traces_util.h"
 
 #include "protos/perfetto/common/builtin_clock.pbzero.h"
 #include "protos/perfetto/common/perf_events.pbzero.h"
@@ -428,19 +429,11 @@
 
 void ProfileModule::ParseModuleSymbols(ConstBytes blob) {
   protos::pbzero::ModuleSymbols::Decoder module_symbols(blob.data, blob.size);
-  StringId build_id;
-  // TODO(b/148109467): Remove workaround once all active Chrome versions
-  // write raw bytes instead of a string as build_id.
-  if (util::IsHexModuleId(module_symbols.build_id())) {
-    build_id = context_->storage->InternString(module_symbols.build_id());
-  } else {
-    build_id = context_->storage->InternString(base::StringView(base::ToHex(
-        module_symbols.build_id().data, module_symbols.build_id().size)));
-  }
+  BuildId build_id = BuildId::FromRaw(module_symbols.build_id());
 
-  auto mapping_ids = context_->stack_profile_tracker->FindMappingRow(
-      context_->storage->InternString(module_symbols.path()), build_id);
-  if (mapping_ids.empty()) {
+  auto mappings =
+      context_->mapping_tracker->FindMappings(module_symbols.path(), build_id);
+  if (mappings.empty()) {
     context_->storage->IncrementStats(stats::stackprofile_invalid_mapping_id);
     return;
   }
@@ -467,12 +460,11 @@
       continue;
     }
     bool frame_found = false;
-    for (MappingId mapping_id : mapping_ids) {
+    for (VirtualMemoryMapping* mapping : mappings) {
       context_->args_translation_table->AddNativeSymbolTranslationRule(
-          mapping_id, address_symbols.address(), last_location);
+          mapping->mapping_id(), address_symbols.address(), last_location);
       std::vector<FrameId> frame_ids =
-          context_->stack_profile_tracker->FindFrameIds(
-              mapping_id, address_symbols.address());
+          mapping->FindFrameIds(address_symbols.address());
 
       for (const FrameId frame_id : frame_ids) {
         auto* frames = context_->storage->mutable_stack_profile_frame_table();
diff --git a/src/trace_processor/importers/proto/profile_packet_sequence_state.cc b/src/trace_processor/importers/proto/profile_packet_sequence_state.cc
index 9d889ef..31841fc 100644
--- a/src/trace_processor/importers/proto/profile_packet_sequence_state.cc
+++ b/src/trace_processor/importers/proto/profile_packet_sequence_state.cc
@@ -19,6 +19,8 @@
 #include "perfetto/base/flat_set.h"
 #include "perfetto/base/logging.h"
 #include "perfetto/ext/base/string_view.h"
+#include "src/trace_processor/importers/common/address_range.h"
+#include "src/trace_processor/importers/common/mapping_tracker.h"
 #include "src/trace_processor/importers/common/process_tracker.h"
 #include "src/trace_processor/importers/common/stack_profile_tracker.h"
 #include "src/trace_processor/importers/proto/packet_sequence_state.h"
@@ -28,6 +30,7 @@
 #include "src/trace_processor/storage/stats.h"
 #include "src/trace_processor/storage/trace_storage.h"
 #include "src/trace_processor/types/trace_processor_context.h"
+#include "src/trace_processor/util/build_id.h"
 
 namespace perfetto {
 namespace trace_processor {
@@ -68,17 +71,16 @@
 
 void ProfilePacketSequenceState::AddMapping(SourceMappingId id,
                                             const SourceMapping& mapping) {
-  StackProfileTracker::CreateMappingParams params;
+  CreateMappingParams params;
   if (std::string* str = strings_.Find(mapping.build_id); str) {
-    params.build_id = base::StringView(*str);
+    params.build_id = BuildId::FromRaw(*str);
   } else {
     context_->storage->IncrementStats(stats::stackprofile_invalid_string_id);
     return;
   }
   params.exact_offset = mapping.exact_offset;
   params.start_offset = mapping.start_offset;
-  params.start = mapping.start;
-  params.end = mapping.end;
+  params.memory_range = AddressRange(mapping.start, mapping.end);
   params.load_bias = mapping.load_bias;
 
   std::vector<base::StringView> path_components;
@@ -93,16 +95,16 @@
       break;
     }
   }
-  std::string path = ProfilePacketUtils::MakeMappingName(path_components);
-  params.name = base::StringView(path);
-  MappingId mapping_id = context_->stack_profile_tracker->InternMapping(params);
-  mappings_.Insert(id, mapping_id);
+
+  params.name = ProfilePacketUtils::MakeMappingName(path_components);
+  mappings_.Insert(
+      id, &context_->mapping_tracker->InternMemoryMapping(std::move(params)));
 }
 
 void ProfilePacketSequenceState::AddFrame(SourceFrameId id,
                                           const SourceFrame& frame) {
-  MappingId* mapping_id = mappings_.Find(frame.mapping_id);
-  if (!mapping_id) {
+  VirtualMemoryMapping** mapping = mappings_.Find(frame.mapping_id);
+  if (!mapping) {
     context_->storage->IncrementStats(stats::stackprofile_invalid_mapping_id);
     return;
   }
@@ -113,9 +115,8 @@
     return;
   }
 
-  FrameId frame_id = context_->stack_profile_tracker->InternFrame(
-      *mapping_id, frame.rel_pc, base::StringView(*function_name));
-
+  FrameId frame_id =
+      (*mapping)->InternFrame(frame.rel_pc, base::StringView(*function_name));
   frames_.Insert(id, frame_id);
 }
 
diff --git a/src/trace_processor/importers/proto/profile_packet_sequence_state.h b/src/trace_processor/importers/proto/profile_packet_sequence_state.h
index 678aab2..99661da 100644
--- a/src/trace_processor/importers/proto/profile_packet_sequence_state.h
+++ b/src/trace_processor/importers/proto/profile_packet_sequence_state.h
@@ -23,7 +23,6 @@
 
 #include "perfetto/ext/base/hash.h"
 #include "perfetto/ext/base/string_view.h"
-#include "protos/perfetto/trace/profiling/profile_common.pbzero.h"
 #include "src/trace_processor/importers/proto/packet_sequence_state_generation.h"
 #include "src/trace_processor/importers/proto/stack_profile_sequence_state.h"
 #include "src/trace_processor/storage/trace_storage.h"
@@ -31,6 +30,8 @@
 namespace perfetto {
 namespace trace_processor {
 
+class VirtualMemoryMapping;
+
 // Keeps sequence specific state for profile packets.
 class ProfilePacketSequenceState final
     : public PacketSequenceStateGeneration::InternedDataTracker {
@@ -126,7 +127,7 @@
   TraceProcessorContext* const context_;
 
   base::FlatHashMap<SourceStringId, std::string> strings_;
-  base::FlatHashMap<SourceMappingId, MappingId> mappings_;
+  base::FlatHashMap<SourceMappingId, VirtualMemoryMapping*> mappings_;
   base::FlatHashMap<SourceFrameId, FrameId> frames_;
   base::FlatHashMap<SourceCallstackId, CallsiteId> callstacks_;
 
diff --git a/src/trace_processor/importers/proto/profile_packet_sequence_state_unittest.cc b/src/trace_processor/importers/proto/profile_packet_sequence_state_unittest.cc
index ab947fa..c9cd6e3 100644
--- a/src/trace_processor/importers/proto/profile_packet_sequence_state_unittest.cc
+++ b/src/trace_processor/importers/proto/profile_packet_sequence_state_unittest.cc
@@ -18,8 +18,9 @@
 
 #include <memory>
 
-#include "src/trace_processor/importers/proto/packet_sequence_state.h"
+#include "src/trace_processor/importers/common/mapping_tracker.h"
 #include "src/trace_processor/importers/common/stack_profile_tracker.h"
+#include "src/trace_processor/importers/proto/packet_sequence_state.h"
 #include "src/trace_processor/types/trace_processor_context.h"
 #include "test/gtest_and_gmock.h"
 
@@ -58,6 +59,7 @@
  public:
   HeapProfileTrackerDupTest() {
     context.storage.reset(new TraceStorage());
+    context.mapping_tracker.reset(new MappingTracker(&context));
     context.stack_profile_tracker.reset(new StackProfileTracker(&context));
     packet_sequence_state.reset(new PacketSequenceState(&context));
 
@@ -196,6 +198,7 @@
 TEST(HeapProfileTrackerTest, SourceMappingPath) {
   TraceProcessorContext context;
   context.storage.reset(new TraceStorage());
+  context.mapping_tracker.reset(new MappingTracker(&context));
   context.stack_profile_tracker.reset(new StackProfileTracker(&context));
   PacketSequenceState pss(&context);
   ProfilePacketSequenceState& ppss =
@@ -229,6 +232,7 @@
 TEST(HeapProfileTrackerTest, Functional) {
   TraceProcessorContext context;
   context.storage.reset(new TraceStorage());
+  context.mapping_tracker.reset(new MappingTracker(&context));
   context.stack_profile_tracker.reset(new StackProfileTracker(&context));
 
   PacketSequenceState pss(&context);
diff --git a/src/trace_processor/importers/proto/proto_trace_parser_unittest.cc b/src/trace_processor/importers/proto/proto_trace_parser_unittest.cc
index 148bf60..e612709 100644
--- a/src/trace_processor/importers/proto/proto_trace_parser_unittest.cc
+++ b/src/trace_processor/importers/proto/proto_trace_parser_unittest.cc
@@ -25,12 +25,13 @@
 #include "src/trace_processor/importers/common/clock_tracker.h"
 #include "src/trace_processor/importers/common/event_tracker.h"
 #include "src/trace_processor/importers/common/flow_tracker.h"
+#include "src/trace_processor/importers/common/mapping_tracker.h"
 #include "src/trace_processor/importers/common/metadata_tracker.h"
 #include "src/trace_processor/importers/common/process_tracker.h"
 #include "src/trace_processor/importers/common/slice_tracker.h"
 #include "src/trace_processor/importers/common/stack_profile_tracker.h"
 #include "src/trace_processor/importers/common/track_tracker.h"
-#include "src/trace_processor/importers/ftrace/sched_event_tracker.h"
+#include "src/trace_processor/importers/ftrace/ftrace_sched_event_tracker.h"
 #include "src/trace_processor/importers/proto/additional_modules.h"
 #include "src/trace_processor/importers/proto/default_modules.h"
 #include "src/trace_processor/importers/proto/proto_trace_parser.h"
@@ -106,10 +107,10 @@
 }
 }  // namespace
 
-class MockSchedEventTracker : public SchedEventTracker {
+class MockSchedEventTracker : public FtraceSchedEventTracker {
  public:
   explicit MockSchedEventTracker(TraceProcessorContext* context)
-      : SchedEventTracker(context) {}
+      : FtraceSchedEventTracker(context) {}
 
   MOCK_METHOD(void,
               PushSchedSwitch,
@@ -252,6 +253,7 @@
     context_.track_tracker.reset(new TrackTracker(&context_));
     context_.global_args_tracker.reset(
         new GlobalArgsTracker(context_.storage.get()));
+    context_.mapping_tracker.reset(new MappingTracker(&context_));
     context_.stack_profile_tracker.reset(new StackProfileTracker(&context_));
     context_.args_tracker.reset(new ArgsTracker(&context_));
     context_.args_translation_table.reset(new ArgsTranslationTable(storage_));
@@ -260,7 +262,7 @@
     event_ = new MockEventTracker(&context_);
     context_.event_tracker.reset(event_);
     sched_ = new MockSchedEventTracker(&context_);
-    context_.sched_tracker.reset(sched_);
+    context_.ftrace_sched_tracker.reset(sched_);
     process_ = new NiceMock<MockProcessTracker>(&context_);
     context_.process_tracker.reset(process_);
     slice_ = new NiceMock<MockSliceTracker>(&context_);
diff --git a/src/trace_processor/importers/proto/stack_profile_sequence_state.cc b/src/trace_processor/importers/proto/stack_profile_sequence_state.cc
index e503a09..a469f92 100644
--- a/src/trace_processor/importers/proto/stack_profile_sequence_state.cc
+++ b/src/trace_processor/importers/proto/stack_profile_sequence_state.cc
@@ -23,6 +23,8 @@
 #include "perfetto/ext/base/string_view.h"
 #include "protos/perfetto/trace/interned_data/interned_data.pbzero.h"
 #include "protos/perfetto/trace/profiling/profile_common.pbzero.h"
+#include "src/trace_processor/importers/common/address_range.h"
+#include "src/trace_processor/importers/common/mapping_tracker.h"
 #include "src/trace_processor/importers/common/stack_profile_tracker.h"
 #include "src/trace_processor/importers/proto/packet_sequence_state.h"
 #include "src/trace_processor/importers/proto/packet_sequence_state_generation.h"
@@ -30,6 +32,7 @@
 #include "src/trace_processor/storage/stats.h"
 #include "src/trace_processor/storage/trace_storage.h"
 #include "src/trace_processor/types/trace_processor_context.h"
+#include "src/trace_processor/util/build_id.h"
 
 namespace perfetto {
 namespace trace_processor {
@@ -48,21 +51,23 @@
 
 std::optional<MappingId> StackProfileSequenceState::FindOrInsertMapping(
     uint64_t iid) {
-  if (MappingId* id = cached_mappings_.Find(iid); id) {
-    return *id;
+  if (VirtualMemoryMapping* mapping = FindOrInsertMappingImpl(iid); mapping) {
+    return mapping->mapping_id();
+  }
+  return std::nullopt;
+}
+
+VirtualMemoryMapping* StackProfileSequenceState::FindOrInsertMappingImpl(
+    uint64_t iid) {
+  if (auto ptr = cached_mappings_.Find(iid); ptr) {
+    return *ptr;
   }
   auto* decoder =
       LookupInternedMessage<protos::pbzero::InternedData::kMappingsFieldNumber,
                             protos::pbzero::Mapping>(iid);
   if (!decoder) {
     context_->storage->IncrementStats(stats::stackprofile_invalid_mapping_id);
-    return std::nullopt;
-  }
-
-  std::optional<base::StringView> build_id =
-      LookupInternedBuildId(decoder->build_id());
-  if (!build_id) {
-    return std::nullopt;
+    return nullptr;
   }
 
   std::vector<base::StringView> path_components;
@@ -75,19 +80,26 @@
     }
     path_components.push_back(*str);
   }
-  std::string path = ProfilePacketUtils::MakeMappingName(path_components);
 
-  StackProfileTracker::CreateMappingParams params;
-  params.build_id = *build_id;
+  CreateMappingParams params;
+  std::optional<base::StringView> build_id =
+      LookupInternedBuildId(decoder->build_id());
+  if (!build_id) {
+    return nullptr;
+  }
+  params.build_id = BuildId::FromRaw(*build_id);
+
+  params.memory_range = AddressRange(decoder->start(), decoder->end());
   params.exact_offset = decoder->exact_offset();
   params.start_offset = decoder->start_offset();
-  params.start = decoder->start();
-  params.end = decoder->end();
   params.load_bias = decoder->load_bias();
-  params.name = base::StringView(path);
-  MappingId mapping_id = context_->stack_profile_tracker->InternMapping(params);
-  cached_mappings_.Insert(iid, mapping_id);
-  return mapping_id;
+  params.name = ProfilePacketUtils::MakeMappingName(path_components);
+
+  VirtualMemoryMapping& mapping =
+      context_->mapping_tracker->InternMemoryMapping(std::move(params));
+
+  cached_mappings_.Insert(iid, &mapping);
+  return &mapping;
 }
 
 std::optional<base::StringView>
@@ -110,11 +122,6 @@
 
 std::optional<base::StringView>
 StackProfileSequenceState::LookupInternedMappingPath(uint64_t iid) {
-  // This should really be an error (value not set) or at the very least return
-  // a null string, but for backward compatibility use an empty string instead.
-  if (iid == 0) {
-    return "";
-  }
   auto* decoder = LookupInternedMessage<
       protos::pbzero::InternedData::kMappingPathsFieldNumber,
       protos::pbzero::InternedString>(iid);
@@ -158,7 +165,7 @@
 
   cached_callstacks_.Insert(iid, *parent_callsite_id);
 
-  return *parent_callsite_id;
+  return parent_callsite_id;
 }
 
 std::optional<FrameId> StackProfileSequenceState::FindOrInsertFrame(
@@ -174,9 +181,9 @@
     return std::nullopt;
   }
 
-  std::optional<MappingId> mapping_id =
-      FindOrInsertMapping(decoder->mapping_id());
-  if (!mapping_id) {
+  VirtualMemoryMapping* mapping =
+      FindOrInsertMappingImpl(decoder->mapping_id());
+  if (!mapping) {
     return std::nullopt;
   }
 
@@ -190,9 +197,7 @@
     function_name = *func;
   }
 
-  FrameId frame_id = context_->stack_profile_tracker->InternFrame(
-      *mapping_id, decoder->rel_pc(), function_name);
-
+  FrameId frame_id = mapping->InternFrame(decoder->rel_pc(), function_name);
   cached_frames_.Insert(iid, frame_id);
 
   return frame_id;
diff --git a/src/trace_processor/importers/proto/stack_profile_sequence_state.h b/src/trace_processor/importers/proto/stack_profile_sequence_state.h
index 7a7879a..82de785 100644
--- a/src/trace_processor/importers/proto/stack_profile_sequence_state.h
+++ b/src/trace_processor/importers/proto/stack_profile_sequence_state.h
@@ -30,6 +30,7 @@
 namespace trace_processor {
 
 class TraceProcessorContext;
+class VirtualMemoryMapping;
 
 class StackProfileSequenceState final
     : public PacketSequenceStateGeneration::InternedDataTracker {
@@ -44,13 +45,15 @@
   std::optional<CallsiteId> FindOrInsertCallstack(uint64_t iid);
 
  private:
+  // Returns `nullptr`if non could be found.
+  VirtualMemoryMapping* FindOrInsertMappingImpl(uint64_t iid);
   std::optional<base::StringView> LookupInternedBuildId(uint64_t iid);
   std::optional<base::StringView> LookupInternedMappingPath(uint64_t iid);
   std::optional<base::StringView> LookupInternedFunctionName(uint64_t iid);
   std::optional<FrameId> FindOrInsertFrame(uint64_t iid);
 
   TraceProcessorContext* const context_;
-  base::FlatHashMap<uint64_t, MappingId> cached_mappings_;
+  base::FlatHashMap<uint64_t, VirtualMemoryMapping*> cached_mappings_;
   base::FlatHashMap<uint64_t, CallsiteId> cached_callstacks_;
   base::FlatHashMap<uint64_t, FrameId> cached_frames_;
 };
diff --git a/src/trace_processor/importers/proto/system_probes_parser.cc b/src/trace_processor/importers/proto/system_probes_parser.cc
index 6bad9b7..09adb4a 100644
--- a/src/trace_processor/importers/proto/system_probes_parser.cc
+++ b/src/trace_processor/importers/proto/system_probes_parser.cc
@@ -16,13 +16,12 @@
 
 #include "src/trace_processor/importers/proto/system_probes_parser.h"
 
-#include <set>
-
 #include "perfetto/base/logging.h"
 #include "perfetto/ext/base/string_utils.h"
 #include "perfetto/ext/base/string_view.h"
 #include "perfetto/ext/traced/sys_stats_counters.h"
 #include "perfetto/protozero/proto_decoder.h"
+#include "src/trace_processor/importers/common/clock_tracker.h"
 #include "src/trace_processor/importers/common/event_tracker.h"
 #include "src/trace_processor/importers/common/metadata_tracker.h"
 #include "src/trace_processor/importers/common/process_tracker.h"
@@ -31,6 +30,7 @@
 #include "src/trace_processor/storage/metadata.h"
 #include "src/trace_processor/types/trace_processor_context.h"
 
+#include "protos/perfetto/common/builtin_clock.pbzero.h"
 #include "protos/perfetto/trace/ps/process_stats.pbzero.h"
 #include "protos/perfetto/trace/ps/process_tree.pbzero.h"
 #include "protos/perfetto/trace/system_info.pbzero.h"
@@ -164,6 +164,12 @@
       context->storage->InternString("mem.smaps.pss.file");
   proc_stats_process_names_[ProcessStats::Process::kSmrPssShmemKbFieldNumber] =
       context->storage->InternString("mem.smaps.pss.shmem");
+  proc_stats_process_names_
+      [ProcessStats::Process::kRuntimeUserModeFieldNumber] =
+          context->storage->InternString("runtime.user_ns");
+  proc_stats_process_names_
+      [ProcessStats::Process::kRuntimeKernelModeFieldNumber] =
+          context->storage->InternString("runtime.kernel_ns");
 
   using PsiResource = protos::pbzero::SysStats::PsiSample::PsiResource;
   sys_stats_psi_resource_names_[PsiResource::PSI_RESOURCE_UNSPECIFIED] =
@@ -517,6 +523,15 @@
       context_->process_tracker->SetProcessUid(
           upid, static_cast<uint32_t>(proc.uid()));
     }
+
+    if (proc.process_start_from_boot() > 0) {
+      base::StatusOr<int64_t> start_ts = context_->clock_tracker->ToTraceTime(
+          protos::pbzero::BUILTIN_CLOCK_BOOTTIME,
+          static_cast<int64_t>(proc.process_start_from_boot()));
+      if (start_ts.ok()) {
+        context_->process_tracker->SetStartTsIfUnset(upid, *start_ts);
+      }
+    }
   }
 
   for (auto it = ps.threads(); it; ++it) {
@@ -545,8 +560,6 @@
 void SystemProbesParser::ParseProcessStats(int64_t ts, ConstBytes blob) {
   using Process = protos::pbzero::ProcessStats::Process;
   protos::pbzero::ProcessStats::Decoder stats(blob.data, blob.size);
-  const auto kOomScoreAdjFieldNumber =
-      protos::pbzero::ProcessStats::Process::kOomScoreAdjFieldNumber;
   for (auto it = stats.processes(); it; ++it) {
     // Maps a process counter field it to its value.
     // E.g., 4 := 1024 -> "mem.rss.anon" := 1024.
@@ -574,9 +587,13 @@
       if (is_counter_field) {
         // Memory counters are in KB, keep values in bytes in the trace
         // processor.
-        counter_values[fld.id()] = fld.id() == kOomScoreAdjFieldNumber
-                                       ? fld.as_int64()
-                                       : fld.as_int64() * 1024;
+        int64_t value = fld.as_int64();
+        if (fld.id() != Process::kOomScoreAdjFieldNumber &&
+            fld.id() != Process::kRuntimeUserModeFieldNumber &&
+            fld.id() != Process::kRuntimeKernelModeFieldNumber) {
+          value = value * 1024;  // KB -> B
+        }
+        counter_values[fld.id()] = value;
         has_counter[fld.id()] = true;
       } else {
         // Chrome fields are processed by ChromeSystemProbesParser.
diff --git a/src/trace_processor/importers/proto/system_probes_parser.h b/src/trace_processor/importers/proto/system_probes_parser.h
index 8893ccf..0378128 100644
--- a/src/trace_processor/importers/proto/system_probes_parser.h
+++ b/src/trace_processor/importers/proto/system_probes_parser.h
@@ -72,7 +72,7 @@
   // their StringId. Keep kProcStatsProcessSize equal to 1 + max proto field
   // id of ProcessStats::Process. Also update the value in
   // ChromeSystemProbesParser.
-  static constexpr size_t kProcStatsProcessSize = 21;
+  static constexpr size_t kProcStatsProcessSize = 23;
   std::array<StringId, kProcStatsProcessSize> proc_stats_process_names_{};
 
   // Maps a SysStats::PsiSample::PsiResource type to its StringId.
diff --git a/src/trace_processor/importers/systrace/systrace_line_parser.cc b/src/trace_processor/importers/systrace/systrace_line_parser.cc
index ccf711e..7bfb844 100644
--- a/src/trace_processor/importers/systrace/systrace_line_parser.cc
+++ b/src/trace_processor/importers/systrace/systrace_line_parser.cc
@@ -23,10 +23,10 @@
 #include "src/trace_processor/importers/common/event_tracker.h"
 #include "src/trace_processor/importers/common/process_tracker.h"
 #include "src/trace_processor/importers/common/slice_tracker.h"
+#include "src/trace_processor/importers/common/thread_state_tracker.h"
 #include "src/trace_processor/importers/common/track_tracker.h"
 #include "src/trace_processor/importers/ftrace/binder_tracker.h"
-#include "src/trace_processor/importers/ftrace/sched_event_tracker.h"
-#include "src/trace_processor/importers/ftrace/thread_state_tracker.h"
+#include "src/trace_processor/importers/ftrace/ftrace_sched_event_tracker.h"
 #include "src/trace_processor/importers/systrace/systrace_parser.h"
 #include "src/trace_processor/types/task_state.h"
 
@@ -107,7 +107,7 @@
       return util::Status("Could not parse sched_switch");
     }
 
-    SchedEventTracker::GetOrCreate(context_)->PushSchedSwitch(
+    FtraceSchedEventTracker::GetOrCreate(context_)->PushSchedSwitch(
         line.cpu, line.ts, prev_pid.value(), prev_comm, prev_prio.value(),
         prev_state, next_pid.value(), next_comm, next_prio.value());
   } else if (line.event_name == "tracing_mark_write" ||
diff --git a/src/trace_processor/metrics/sql/android/BUILD.gn b/src/trace_processor/metrics/sql/android/BUILD.gn
index c92ba32..0aa6e64 100644
--- a/src/trace_processor/metrics/sql/android/BUILD.gn
+++ b/src/trace_processor/metrics/sql/android/BUILD.gn
@@ -35,6 +35,7 @@
     "android_dvfs.sql",
     "android_fastrpc.sql",
     "android_frame_timeline_metric.sql",
+    "android_garbage_collection_unagg.sql",
     "android_gpu.sql",
     "android_hwcomposer.sql",
     "android_hwui_metric.sql",
diff --git a/src/trace_processor/metrics/sql/android/android_boot_unagg.sql b/src/trace_processor/metrics/sql/android/android_boot_unagg.sql
index 2444b22..a1ba096 100644
--- a/src/trace_processor/metrics/sql/android/android_boot_unagg.sql
+++ b/src/trace_processor/metrics/sql/android/android_boot_unagg.sql
@@ -15,6 +15,8 @@
 --
 
 INCLUDE PERFETTO MODULE android.app_process_starts;
+INCLUDE PERFETTO MODULE android.garbage_collection;
+INCLUDE PERFETTO MODULE android.suspend;
 
 DROP VIEW IF EXISTS android_boot_unagg_output;
 CREATE PERFETTO VIEW android_boot_unagg_output AS
@@ -71,6 +73,35 @@
             ASC LIMIT 1 )
             AND reason = "service"
         )
-    )
-  )
+    )),
+    'android_post_boot_gc_metric', (SELECT AndroidGarbageCollectionUnaggMetric(
+        'gc_events', (
+            SELECT RepeatedField(
+                AndroidGarbageCollectionUnaggMetric_GarbageCollectionEvent(
+                    'thread_name', thread_name,
+                    'process_name', process_name,
+                    'gc_type', gc_type,
+                    'is_mark_compact', is_mark_compact,
+                    'reclaimed_mb', reclaimed_mb,
+                    'min_heap_mb', min_heap_mb,
+                    'max_heap_mb', max_heap_mb,
+                    'mb_per_ms_of_running_gc', reclaimed_mb/(gc_running_dur/1e6),
+                    'mb_per_ms_of_wall_gc', reclaimed_mb/(gc_dur/1e6),
+                    'gc_dur', gc_dur,
+                    'gc_running_dur', gc_running_dur,
+                    'gc_runnable_dur', gc_runnable_dur,
+                    'gc_unint_io_dur', gc_unint_io_dur,
+                    'gc_unint_non_io_dur', gc_unint_non_io_dur,
+                    'gc_int_dur', gc_int_dur,
+                    'gc_ts', gc_ts,
+                    'tid', tid,
+                    'pid', pid,
+                    'gc_monotonic_dur', _extract_duration_without_suspend(gc_ts, gc_dur)
+                )
+            ) FROM android_garbage_collection_events WHERE gc_ts > (SELECT COALESCE(MIN(ts), 0)
+                FROM thread_slice WHERE name GLOB "*android.intent.action.USER_UNLOCKED*" ORDER BY ts
+                ASC LIMIT 1
+            )
+        )
+    ))
 );
diff --git a/src/trace_processor/metrics/sql/android/android_garbage_collection_unagg.sql b/src/trace_processor/metrics/sql/android/android_garbage_collection_unagg.sql
new file mode 100644
index 0000000..ee54b7e
--- /dev/null
+++ b/src/trace_processor/metrics/sql/android/android_garbage_collection_unagg.sql
@@ -0,0 +1,47 @@
+--
+-- Copyright 2024 The Android Open Source Project
+--
+-- Licensed under the Apache License, Version 2.0 (the "License");
+-- you may not use this file except in compliance with the License.
+-- You may obtain a copy of the License at
+--
+--     https://www.apache.org/licenses/LICENSE-2.0
+--
+-- Unless required by applicable law or agreed to in writing, software
+-- distributed under the License is distributed on an "AS IS" BASIS,
+-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+-- See the License for the specific language governing permissions and
+-- limitations under the License.
+--
+
+INCLUDE PERFETTO MODULE android.garbage_collection;
+INCLUDE PERFETTO MODULE android.suspend;
+
+DROP VIEW IF EXISTS android_garbage_collection_unagg_output;
+CREATE PERFETTO VIEW android_garbage_collection_unagg_output AS
+SELECT AndroidGarbageCollectionUnaggMetric(
+  'gc_events', (
+    SELECT RepeatedField(
+      AndroidGarbageCollectionUnaggMetric_GarbageCollectionEvent(
+        'thread_name', thread_name,
+        'process_name', process_name,
+        'gc_type', gc_type,
+        'is_mark_compact', is_mark_compact,
+        'reclaimed_mb', reclaimed_mb,
+        'min_heap_mb', min_heap_mb,
+        'max_heap_mb', max_heap_mb,
+        'mb_per_ms_of_running_gc', reclaimed_mb/(gc_running_dur/1e6),
+        'mb_per_ms_of_wall_gc', reclaimed_mb/(gc_dur/1e6),
+        'gc_dur', gc_dur,
+        'gc_running_dur', gc_running_dur,
+        'gc_runnable_dur', gc_runnable_dur,
+        'gc_unint_io_dur', gc_unint_io_dur,
+        'gc_unint_non_io_dur', gc_unint_non_io_dur,
+        'gc_int_dur', gc_int_dur,
+        'gc_ts', gc_ts,
+        'tid', tid,
+        'pid', pid,
+        'gc_monotonic_dur', _extract_duration_without_suspend(gc_ts, gc_dur)
+      )) FROM android_garbage_collection_events
+  )
+);
diff --git a/src/trace_processor/perfetto_sql/engine/perfetto_sql_engine.cc b/src/trace_processor/perfetto_sql/engine/perfetto_sql_engine.cc
index 86d96bc..6d7f965 100644
--- a/src/trace_processor/perfetto_sql/engine/perfetto_sql_engine.cc
+++ b/src/trace_processor/perfetto_sql/engine/perfetto_sql_engine.cc
@@ -499,15 +499,9 @@
   runtime_tables_.Insert(create_table.name, std::move(table));
   base::StackString<1024> create("CREATE VIRTUAL TABLE %s USING runtime_table",
                                  create_table.name.c_str());
-  auto status =
-      Execute(SqlSource::FromTraceProcessorImplementation(create.ToStdString()))
-          .status();
-  if (!status.ok()) {
-    // If the registration of the table with SQLite failed, erase the state
-    // we hold.
-    PERFETTO_CHECK(runtime_tables_.Erase(create_table.name));
-  }
-  return status;
+  return Execute(
+             SqlSource::FromTraceProcessorImplementation(create.ToStdString()))
+      .status();
 }
 
 base::Status PerfettoSqlEngine::ExecuteCreateView(
@@ -734,16 +728,10 @@
 
   base::StackString<1024> create(
       "CREATE VIRTUAL TABLE %s USING runtime_table_function", fn_name.c_str());
-  auto status = Execute(cf.sql.RewriteAllIgnoreExisting(
-                            SqlSource::FromTraceProcessorImplementation(
-                                create.ToStdString())))
-                    .status();
-  if (!status.ok()) {
-    // If the registration of the table with SQLite failed, erase the state
-    // we hold.
-    PERFETTO_CHECK(runtime_table_fn_states_.Erase(lower_name));
-  }
-  return status;
+  return Execute(cf.sql.RewriteAllIgnoreExisting(
+                     SqlSource::FromTraceProcessorImplementation(
+                         create.ToStdString())))
+      .status();
 }
 
 base::Status PerfettoSqlEngine::ExecuteCreateMacro(
diff --git a/src/trace_processor/perfetto_sql/intrinsics/table_functions/ancestor.cc b/src/trace_processor/perfetto_sql/intrinsics/table_functions/ancestor.cc
index 7680b24..f102b1f 100644
--- a/src/trace_processor/perfetto_sql/intrinsics/table_functions/ancestor.cc
+++ b/src/trace_processor/perfetto_sql/intrinsics/table_functions/ancestor.cc
@@ -70,6 +70,10 @@
     // Update the loop variable by looking up the next parent_id.
     maybe_parent_id = ref.parent_id();
   }
+  // We traverse the tree in reverse id order. To ensure we meet the
+  // requirements of the extension vectors being sorted, ensure that we reverse
+  // the row numbers to be in id order.
+  std::reverse(row_numbers_accumulator.begin(), row_numbers_accumulator.end());
   return base::OkStatus();
 }
 
diff --git a/src/trace_processor/perfetto_sql/stdlib/android/thread.sql b/src/trace_processor/perfetto_sql/stdlib/android/thread.sql
index d8c0b74..d7a8d44 100644
--- a/src/trace_processor/perfetto_sql/stdlib/android/thread.sql
+++ b/src/trace_processor/perfetto_sql/stdlib/android/thread.sql
@@ -18,7 +18,7 @@
 SELECT STR_SPLIT(STR_SPLIT(STR_SPLIT(STR_SPLIT($thread_name, "-", 0), "[", 0), ":", 0), " ", 0);
 
 -- Per process stats of threads created in a process
-CREATE PERFETTO FUNCTION android_thread_creation_spam(
+CREATE PERFETTO FUNCTION _android_thread_creation_spam(
   -- Minimum duration between creating and destroying a thread before their the
   -- thread creation event is considered. If NULL, considers all thread creations.
   min_thread_dur FLOAT,
diff --git a/src/trace_processor/perfetto_sql/stdlib/sched/BUILD.gn b/src/trace_processor/perfetto_sql/stdlib/sched/BUILD.gn
index fa5ba77..642b822 100644
--- a/src/trace_processor/perfetto_sql/stdlib/sched/BUILD.gn
+++ b/src/trace_processor/perfetto_sql/stdlib/sched/BUILD.gn
@@ -15,6 +15,7 @@
 import("../../../../../gn/perfetto_sql.gni")
 
 perfetto_sql_source_set("sched") {
+  deps = [ "utilization" ]
   sources = [
     "states.sql",
     "thread_executing_span.sql",
diff --git a/src/trace_processor/perfetto_sql/stdlib/sched/states.sql b/src/trace_processor/perfetto_sql/stdlib/sched/states.sql
index b991692..0a7c7ee 100644
--- a/src/trace_processor/perfetto_sql/stdlib/sched/states.sql
+++ b/src/trace_processor/perfetto_sql/stdlib/sched/states.sql
@@ -70,3 +70,80 @@
     ELSE ''
   END
 );
+
+-- The time a thread spent in each scheduling state during it's lifetime.
+CREATE PERFETTO TABLE sched_thread_time_in_state(
+  -- Utid of the thread.
+  utid INT,
+  -- Total runtime of thread.
+  total_runtime INT,
+  -- One of the scheduling states of kernel thread.
+  state STRING,
+  -- Total time spent in the scheduling state.
+  time_in_state INT,
+  -- Percentage of time thread spent in scheduling state in [0-100] range.
+  percentage_in_state INT
+) AS
+WITH total_dur AS (
+  SELECT
+    utid,
+    sum(dur) AS sum_dur
+  FROM thread_state
+  GROUP BY 1
+),
+summed AS (
+  SELECT
+    utid,
+    state,
+    sum(dur) AS time_in_state
+  FROM thread_state group by 1, 2
+)
+SELECT
+  utid,
+  sum_dur AS total_runtime,
+  state,
+  time_in_state,
+  (time_in_state*100)/(sum_dur) AS percentage_in_state
+FROM summed JOIN total_dur USING (utid);
+
+CREATE PERFETTO MACRO _case_for_state(state Expr)
+RETURNS Expr AS
+MAX(CASE WHEN state = $state THEN percentage_in_state END);
+
+-- Summary of time spent by thread in each scheduling state, in percentage ([0, 100]
+-- ranges). Sum of all states might be smaller than 100, as those values
+-- are rounded down.
+CREATE PERFETTO TABLE sched_percentage_of_time_in_state(
+  -- Utid of the thread.
+  utid INT,
+  -- Percentage of time thread spent in running ('Running') state in [0, 100]
+  -- range.
+  running INT,
+  -- Percentage of time thread spent in runnable ('R') state in [0, 100]
+  -- range.
+  runnable INT,
+  -- Percentage of time thread spent in preempted runnable ('R+') state in
+  -- [0, 100] range.
+  runnable_preempted INT,
+  -- Percentage of time thread spent in sleeping ('S') state in [0, 100] range.
+  sleeping INT,
+  -- Percentage of time thread spent in uninterruptible sleep ('D') state in
+  -- [0, 100] range.
+  uninterruptible_sleep INT,
+  -- Percentage of time thread spent in other ('T', 't', 'X', 'Z', 'x', 'I',
+  -- 'K', 'W', 'P', 'N') states in [0, 100] range.
+  other INT
+) AS
+SELECT
+  utid,
+  _case_for_state!('Running') AS running,
+  _case_for_state!('R') AS runnable,
+  _case_for_state!('R+') AS runnable_preempted,
+  _case_for_state!('S') AS sleeping,
+  _case_for_state!('D') AS uninterruptible_sleep,
+  SUM(
+    CASE WHEN state IN ('T', 't', 'X', 'Z', 'x', 'I', 'K', 'W', 'P', 'N')
+    THEN time_in_state END
+  ) * 100/total_runtime AS other
+FROM sched_thread_time_in_state
+GROUP BY utid;
\ No newline at end of file
diff --git a/src/trace_processor/perfetto_sql/stdlib/sched/utilization/BUILD.gn b/src/trace_processor/perfetto_sql/stdlib/sched/utilization/BUILD.gn
new file mode 100644
index 0000000..d4be2cc
--- /dev/null
+++ b/src/trace_processor/perfetto_sql/stdlib/sched/utilization/BUILD.gn
@@ -0,0 +1,24 @@
+# Copyright (C) 2024 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import("../../../../../../gn/perfetto_sql.gni")
+
+perfetto_sql_source_set("utilization") {
+  sources = [
+    "general.sql",
+    "process.sql",
+    "system.sql",
+    "thread.sql",
+  ]
+}
diff --git a/src/trace_processor/perfetto_sql/stdlib/sched/utilization/general.sql b/src/trace_processor/perfetto_sql/stdlib/sched/utilization/general.sql
new file mode 100644
index 0000000..629e5a3
--- /dev/null
+++ b/src/trace_processor/perfetto_sql/stdlib/sched/utilization/general.sql
@@ -0,0 +1,75 @@
+--
+-- Copyright 2024 The Android Open Source Project
+--
+-- Licensed under the Apache License, Version 2.0 (the "License");
+-- you may not use this file except in compliance with the License.
+-- You may obtain a copy of the License at
+--
+--     https://www.apache.org/licenses/LICENSE-2.0
+--
+-- Unless required by applicable law or agreed to in writing, software
+-- distributed under the License is distributed on an "AS IS" BASIS,
+-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+-- See the License for the specific language governing permissions and
+-- limitations under the License.
+
+-- Returns the timestamp of the start of the partition that contains the |ts|.
+CREATE PERFETTO FUNCTION _partition_start(ts INT, size INT) RETURNS INT AS
+-- Division of two ints would result in floor(ts/size).
+SELECT ($ts/$size)*$size;
+
+-- Returns the number of partitions required to cover all of the trace
+-- timestamps.
+CREATE PERFETTO FUNCTION _partition_count(size INT) RETURNS INT AS
+SELECT
+    (_partition_start(TRACE_END(), $size) -
+    _partition_start(TRACE_START(), $size))/$size + 1;
+
+-- Returns a table of partitions with first partition containing the
+-- TRACE_START() and last one containing TRACE_END().
+CREATE PERFETTO FUNCTION _partitions(size INT)
+RETURNS TABLE (ts INT, ts_end INT) AS
+WITH no_ends AS (
+SELECT
+    _partition_start(TRACE_START(), $size) + (id * $size) AS ts
+-- We are using the sched table for source of ids. If the table is too small
+-- for specified size, the results would be invalid none the less.
+FROM sched
+LIMIT _partition_count($size))
+SELECT ts, ts + $size AS ts_end FROM no_ends;
+
+-- Partitions any |intervals| table with partitions defined in the |partitions|
+-- table.
+CREATE PERFETTO MACRO _interval_partitions(
+  -- Requires |ts| and |ts_end| columns.
+  partitions TableOrSubquery,
+  -- Requires |ts| and |ts_end| column.
+  intervals TableOrSubquery
+) RETURNS TableOrSubquery AS (
+SELECT
+  p.ts AS partition_ts,
+  IIF(i.ts_end < p.ts_end, i.ts_end, p.ts_end) AS ts_end,
+  IIF(i.ts < p.ts, p.ts, i.ts) AS ts
+FROM $intervals i
+JOIN $partitions p
+ON (p.ts <= i.ts AND i.ts < p.ts_end));
+
+-- Returns a table of utilization per given period.
+-- Utilization is calculated as sum of average utilization of each CPU in each
+-- period, which is defined as a multiply of |interval|. For this reason
+-- first and last period might have lower then real utilization.
+CREATE PERFETTO MACRO _sched_avg_utilization_per_period(
+  -- Length of the period on which utilization should be averaged.
+  interval Expr,
+  -- Either sched table or its filtered down version.
+  sched_table TableOrSubquery
+)
+-- The returned table has the schema (ts UINT32, utilization DOUBLE,
+-- unnormalized_utilization DOUBLE).
+RETURNS TableOrSubquery AS (
+SELECT
+  partition_ts AS ts,
+  SUM(ts_end - ts)/(cast_double!($interval) * (SELECT MAX(cpu) + 1 FROM sched)) AS utilization,
+  SUM(ts_end - ts)/cast_double!($interval) AS unnormalized_utilization
+FROM _interval_partitions!(_partitions($interval), $sched_table)
+GROUP BY 1);
diff --git a/src/trace_processor/perfetto_sql/stdlib/sched/utilization/process.sql b/src/trace_processor/perfetto_sql/stdlib/sched/utilization/process.sql
new file mode 100644
index 0000000..8437cb4
--- /dev/null
+++ b/src/trace_processor/perfetto_sql/stdlib/sched/utilization/process.sql
@@ -0,0 +1,72 @@
+--
+-- Copyright 2024 The Android Open Source Project
+--
+-- Licensed under the Apache License, Version 2.0 (the "License");
+-- you may not use this file except in compliance with the License.
+-- You may obtain a copy of the License at
+--
+--     https://www.apache.org/licenses/LICENSE-2.0
+--
+-- Unless required by applicable law or agreed to in writing, software
+-- distributed under the License is distributed on an "AS IS" BASIS,
+-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+-- See the License for the specific language governing permissions and
+-- limitations under the License.
+
+INCLUDE PERFETTO MODULE sched.utilization.general;
+INCLUDE PERFETTO MODULE time.conversion;
+
+-- Returns a table of process utilization per given period.
+-- Utilization is calculated as sum of average utilization of each CPU in each
+-- period, which is defined as a multiply of |interval|. For this reason
+-- first and last period might have lower then real utilization.
+CREATE PERFETTO FUNCTION sched_process_utilization_per_period(
+    -- Length of the period on which utilization should be averaged.
+    interval INT,
+    -- Upid of the process.
+    upid INT
+)
+RETURNS TABLE(
+  -- Timestamp of start of a second.
+  ts INT,
+  -- Sum of average utilization over period.
+  -- Note: as the data is normalized, the values will be in the
+  -- [0, 1] range.
+  utilization DOUBLE,
+  -- Sum of average utilization over all CPUs over period.
+  -- Note: as the data is unnormalized, the values will be in the
+  -- [0, cpu_count] range.
+  unnormalized_utilization DOUBLE
+) AS
+WITH sched_for_upid AS (
+  SELECT
+    ts,
+    ts_end,
+    utid
+  FROM sched
+  JOIN thread USING (utid)
+  JOIN process USING (upid)
+  WHERE upid = $upid AND utid != 0)
+SELECT * FROM _sched_avg_utilization_per_period!($interval, sched_for_upid);
+
+-- Returns a table of process utilization per second.
+-- Utilization is calculated as sum of average utilization of each CPU in each
+-- period, which is defined as a multiply of |interval|. For this reason
+-- first and last period might have lower then real utilization.
+CREATE PERFETTO FUNCTION sched_process_utilization_per_second(
+  -- Upid of the process.
+  upid INT
+)
+RETURNS TABLE (
+  -- Timestamp of start of a second.
+  ts INT,
+  -- Sum of average utilization over period.
+  -- Note: as the data is normalized, the values will be in the
+  -- [0, 1] range.
+  utilization DOUBLE,
+  -- Sum of average utilization over all CPUs over period.
+  -- Note: as the data is unnormalized, the values will be in the
+  -- [0, cpu_count] range.
+  unnormalized_utilization DOUBLE
+) AS
+SELECT * FROM sched_process_utilization_per_period(time_from_s(1), $upid);
\ No newline at end of file
diff --git a/src/trace_processor/perfetto_sql/stdlib/sched/utilization/system.sql b/src/trace_processor/perfetto_sql/stdlib/sched/utilization/system.sql
new file mode 100644
index 0000000..7dd671d
--- /dev/null
+++ b/src/trace_processor/perfetto_sql/stdlib/sched/utilization/system.sql
@@ -0,0 +1,67 @@
+--
+-- Copyright 2024 The Android Open Source Project
+--
+-- Licensed under the Apache License, Version 2.0 (the "License");
+-- you may not use this file except in compliance with the License.
+-- You may obtain a copy of the License at
+--
+--     https://www.apache.org/licenses/LICENSE-2.0
+--
+-- Unless required by applicable law or agreed to in writing, software
+-- distributed under the License is distributed on an "AS IS" BASIS,
+-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+-- See the License for the specific language governing permissions and
+-- limitations under the License.
+
+INCLUDE PERFETTO MODULE sched.utilization.general;
+INCLUDE PERFETTO MODULE time.conversion;
+
+-- The purpose of this module is to provide high level aggregates of system
+-- utilization, akin to /proc/stat results.
+
+-- Returns a table of system utilization per given period.
+-- Utilization is calculated as sum of average utilization of each CPU in each
+-- period, which is defined as a multiply of |interval|. For this reason
+-- first and last period might have lower then real utilization.
+CREATE PERFETTO FUNCTION sched_utilization_per_period(
+  -- Length of the period on which utilization should be averaged.
+  interval INT)
+RETURNS TABLE (
+  -- Timestamp of start of a second.
+  ts INT,
+  -- Sum of average utilization over period.
+  -- Note: as the data is normalized, the values will be in the
+  -- [0, 1] range.
+  utilization DOUBLE,
+  -- Sum of average utilization over all CPUs over period.
+  -- Note: as the data is unnormalized, the values will be in the
+  -- [0, cpu_count] range.
+  unnormalized_utilization DOUBLE
+) AS
+SELECT *
+FROM _sched_avg_utilization_per_period!(
+  $interval,
+  (SELECT * FROM sched WHERE utid != 0)
+);
+
+-- Table with system utilization per second.
+-- Utilization is calculated by sum of average utilization of each CPU every
+-- second. For this reason first and last second might have lower then real
+-- utilization.
+CREATE PERFETTO TABLE sched_utilization_per_second(
+  -- Timestamp of start of a second.
+  ts INT,
+  -- Sum of average utilization over period.
+  -- Note: as the data is normalized, the values will be in the
+  -- [0, 1] range.
+  utilization DOUBLE,
+  -- Sum of average utilization over all CPUs over period.
+  -- Note: as the data is unnormalized, the values will be in the
+  -- [0, cpu_count] range.
+  unnormalized_utilization DOUBLE
+) AS
+SELECT
+  ts,
+  utilization,
+  unnormalized_utilization
+FROM sched_utilization_per_period(time_from_s(1));
\ No newline at end of file
diff --git a/src/trace_processor/perfetto_sql/stdlib/sched/utilization/thread.sql b/src/trace_processor/perfetto_sql/stdlib/sched/utilization/thread.sql
new file mode 100644
index 0000000..ae1d3f6
--- /dev/null
+++ b/src/trace_processor/perfetto_sql/stdlib/sched/utilization/thread.sql
@@ -0,0 +1,70 @@
+--
+-- Copyright 2024 The Android Open Source Project
+--
+-- Licensed under the Apache License, Version 2.0 (the "License");
+-- you may not use this file except in compliance with the License.
+-- You may obtain a copy of the License at
+--
+--     https://www.apache.org/licenses/LICENSE-2.0
+--
+-- Unless required by applicable law or agreed to in writing, software
+-- distributed under the License is distributed on an "AS IS" BASIS,
+-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+-- See the License for the specific language governing permissions and
+-- limitations under the License.
+
+INCLUDE PERFETTO MODULE sched.utilization.general;
+INCLUDE PERFETTO MODULE time.conversion;
+
+-- Returns a table of thread utilization per given period.
+-- Utilization is calculated as sum of average utilization of each CPU in each
+-- period, which is defined as a multiply of |interval|. For this reason
+-- first and last period might have lower then real utilization.
+CREATE PERFETTO FUNCTION sched_thread_utilization_per_period(
+    -- Length of the period on which utilization should be averaged.
+    interval INT,
+    -- Utid of the thread.
+    utid INT
+)
+RETURNS TABLE(
+  -- Timestamp of start of a second.
+  ts INT,
+  -- Sum of average utilization over period.
+  -- Note: as the data is normalized, the values will be in the
+  -- [0, 1] range.
+  utilization DOUBLE,
+  -- Sum of average utilization over all CPUs over period.
+  -- Note: as the data is unnormalized, the values will be in the
+  -- [0, cpu_count] range.
+  unnormalized_utilization DOUBLE
+) AS
+WITH sched_for_utid AS (
+  SELECT
+    ts,
+    ts_end,
+    utid
+  FROM sched
+  WHERE utid = $utid
+) SELECT * FROM _sched_avg_utilization_per_period!($interval, sched_for_utid);
+
+-- Returns a table of thread utilization per second.
+-- Utilization is calculated as sum of average utilization of each CPU in each
+-- period, which is defined as a multiply of |interval|. For this reason
+-- first and last period might have lower then real utilization.
+CREATE PERFETTO FUNCTION sched_thread_utilization_per_second(
+  -- Utid of the thread.
+  utid INT
+)
+RETURNS TABLE (
+  -- Timestamp of start of a second.
+  ts INT,
+  -- Sum of average utilization over period.
+  -- Note: as the data is normalized, the values will be in the
+  -- [0, 1] range.
+  utilization DOUBLE,
+  -- Sum of average utilization over all CPUs over period.
+  -- Note: as the data is unnormalized, the values will be in the
+  -- [0, cpu_count] range.
+  unnormalized_utilization DOUBLE
+) AS
+SELECT * FROM sched_thread_utilization_per_period(time_from_s(1), $utid);
\ No newline at end of file
diff --git a/src/trace_processor/perfetto_sql/stdlib/slices/BUILD.gn b/src/trace_processor/perfetto_sql/stdlib/slices/BUILD.gn
index 2e5f02f..f69eafd 100644
--- a/src/trace_processor/perfetto_sql/stdlib/slices/BUILD.gn
+++ b/src/trace_processor/perfetto_sql/stdlib/slices/BUILD.gn
@@ -16,6 +16,7 @@
 
 perfetto_sql_source_set("slices") {
   sources = [
+    "cpu_time.sql",
     "flat_slices.sql",
     "slices.sql",
     "with_context.sql",
diff --git a/src/trace_processor/perfetto_sql/stdlib/slices/cpu_time.sql b/src/trace_processor/perfetto_sql/stdlib/slices/cpu_time.sql
new file mode 100644
index 0000000..4a36d9c
--- /dev/null
+++ b/src/trace_processor/perfetto_sql/stdlib/slices/cpu_time.sql
@@ -0,0 +1,72 @@
+--
+-- Copyright 2024 The Android Open Source Project
+--
+-- Licensed under the Apache License, Version 2.0 (the "License");
+-- you may not use this file except in compliance with the License.
+-- You may obtain a copy of the License at
+--
+--     https://www.apache.org/licenses/LICENSE-2.0
+--
+-- Unless required by applicable law or agreed to in writing, software
+-- distributed under the License is distributed on an "AS IS" BASIS,
+-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+-- See the License for the specific language governing permissions and
+-- limitations under the License.
+
+-- TODO(mayzner): Replace with good implementation of interval intersect.
+CREATE PERFETTO MACRO _interval_intersect_partition_utid(
+  left_table TableOrSubquery,
+  right_table TableOrSubquery
+)
+RETURNS TableOrSubquery AS
+(
+  WITH on_left AS (
+    SELECT
+      B.ts,
+      IIF(
+        A.ts + A.dur <= B.ts + B.dur,
+        A.ts + A.dur - B.ts, B.dur) AS dur,
+      A.id AS left_id,
+      B.id as right_id
+    FROM $left_table A
+    JOIN $right_table B ON (A.ts <= B.ts AND A.ts + A.dur > B.ts AND A.utid = B.utid)
+  ), on_right AS (
+    SELECT
+      B.ts,
+      IIF(
+        A.ts + A.dur <= B.ts + B.dur,
+        A.ts + A.dur - B.ts, B.dur) AS dur,
+      B.id as left_id,
+      A.id AS right_id
+    FROM $right_table A
+    -- The difference between this table and on_left is the lack of equality on
+    -- A.ts <= B.ts. This is to remove the issue of double accounting
+    -- timestamps that start at the same time.
+    JOIN $left_table B ON (A.ts < B.ts AND A.ts + A.dur > B.ts AND A.utid = B.utid)
+  )
+  SELECT * FROM on_left
+  UNION ALL
+  SELECT * FROM on_right
+);
+
+-- Time each thread slice spent running on CPU.
+-- Requires scheduling data to be available in the trace.
+CREATE PERFETTO TABLE thread_slice_cpu_time(
+    -- Slice id.
+    id INT,
+    -- Duration of the time the slice was running.
+    cpu_time INT) AS
+WITH slice_with_utid AS (
+  SELECT
+      slice.id,
+      slice.ts,
+      slice.dur,
+      utid
+  FROM slice
+  JOIN thread_track ON slice.track_id = thread_track.id
+  JOIN thread USING (utid)
+  WHERE utid != 0)
+SELECT left_id AS id, SUM(dur) AS cpu_time
+FROM _interval_intersect_partition_utid!(slice_with_utid, sched)
+GROUP BY 1
+ORDER BY 1;
\ No newline at end of file
diff --git a/src/trace_processor/read_trace.cc b/src/trace_processor/read_trace.cc
index 64a400b..06bd39d 100644
--- a/src/trace_processor/read_trace.cc
+++ b/src/trace_processor/read_trace.cc
@@ -35,12 +35,6 @@
 #include "protos/perfetto/trace/trace.pbzero.h"
 #include "protos/perfetto/trace/trace_packet.pbzero.h"
 
-#if TRACE_PROCESSOR_HAS_MMAP()
-#include <stdlib.h>
-#include <sys/mman.h>
-#include <unistd.h>
-#endif
-
 namespace perfetto {
 namespace trace_processor {
 namespace {
diff --git a/src/trace_processor/read_trace_internal.cc b/src/trace_processor/read_trace_internal.cc
index 4f56cc0..a632134 100644
--- a/src/trace_processor/read_trace_internal.cc
+++ b/src/trace_processor/read_trace_internal.cc
@@ -19,6 +19,7 @@
 #include "perfetto/base/logging.h"
 #include "perfetto/ext/base/file_utils.h"
 #include "perfetto/ext/base/scoped_file.h"
+#include "perfetto/ext/base/scoped_mmap.h"
 #include "perfetto/ext/base/utils.h"
 #include "perfetto/protozero/proto_utils.h"
 #include "perfetto/trace_processor/trace_processor.h"
@@ -34,11 +35,6 @@
 #include "protos/perfetto/trace/trace.pbzero.h"
 #include "protos/perfetto/trace/trace_packet.pbzero.h"
 
-#if TRACE_PROCESSOR_HAS_MMAP()
-#include <stdlib.h>
-#include <sys/mman.h>
-#include <unistd.h>
-#endif
 namespace perfetto {
 namespace trace_processor {
 namespace {
@@ -78,41 +74,36 @@
     TraceProcessor* tp,
     const char* filename,
     const std::function<void(uint64_t parsed_size)>& progress_callback) {
-  base::ScopedFile fd(base::OpenFile(filename, O_RDONLY));
-  if (!fd)
-    return util::ErrStatus("Could not open trace file (path: %s)", filename);
-
   uint64_t bytes_read = 0;
 
-#if TRACE_PROCESSOR_HAS_MMAP()
+#if PERFETTO_HAS_MMAP()
   char* no_mmap = getenv("TRACE_PROCESSOR_NO_MMAP");
-  uint64_t whole_size_64 = static_cast<uint64_t>(lseek(*fd, 0, SEEK_END));
-  lseek(*fd, 0, SEEK_SET);
   bool use_mmap = !no_mmap || *no_mmap != '1';
-  if (sizeof(size_t) < 8 && whole_size_64 > 2147483648ULL)
-    use_mmap = false;  // Cannot use mmap on 32-bit systems for files > 2GB.
 
   if (use_mmap) {
-    const size_t whole_size = static_cast<size_t>(whole_size_64);
-    void* file_mm = mmap(nullptr, whole_size, PROT_READ, MAP_PRIVATE, *fd, 0);
-    if (file_mm != MAP_FAILED) {
-      TraceBlobView whole_mmap(TraceBlob::FromMmap(file_mm, whole_size));
+    base::ScopedMmap mapped = base::ReadMmapWholeFile(filename);
+    if (mapped.IsValid()) {
+      size_t length = mapped.length();
+      TraceBlobView whole_mmap(TraceBlob::FromMmap(std::move(mapped)));
       // Parse the file in chunks so we get some status update on stdio.
       static constexpr size_t kMmapChunkSize = 128ul * 1024 * 1024;
-      while (bytes_read < whole_size_64) {
+      while (bytes_read < length) {
         progress_callback(bytes_read);
         const size_t bytes_read_z = static_cast<size_t>(bytes_read);
-        size_t slice_size = std::min(whole_size - bytes_read_z, kMmapChunkSize);
+        size_t slice_size = std::min(length - bytes_read_z, kMmapChunkSize);
         TraceBlobView slice = whole_mmap.slice_off(bytes_read_z, slice_size);
         RETURN_IF_ERROR(tp->Parse(std::move(slice)));
         bytes_read += slice_size;
       }  // while (slices)
-    }    // if (!MAP_FAILED)
-  }      // if (use_mmap)
+    }  // if (mapped.IsValid())
+  }  // if (use_mmap)
   if (bytes_read == 0)
     PERFETTO_LOG("Cannot use mmap on this system. Falling back on read()");
-#endif  // TRACE_PROCESSOR_HAS_MMAP()
+#endif  // PERFETTO_HAS_MMAP()
   if (bytes_read == 0) {
+    base::ScopedFile fd(base::OpenFile(filename, O_RDONLY));
+    if (!fd)
+      return util::ErrStatus("Could not open trace file (path: %s)", filename);
     RETURN_IF_ERROR(
         ReadTraceUsingRead(tp, *fd, &bytes_read, progress_callback));
   }
diff --git a/src/trace_processor/tables/sched_tables.py b/src/trace_processor/tables/sched_tables.py
index d65c8be..0af8014 100644
--- a/src/trace_processor/tables/sched_tables.py
+++ b/src/trace_processor/tables/sched_tables.py
@@ -82,7 +82,7 @@
         C('thread_state_id', CppInt64()),
         C('irq_context', CppOptional(CppUint32())),
         C('utid', CppUint32()),
-        C('waker_utid', CppUint32()),
+        C('waker_utid', CppUint32())
     ],
     tabledoc=TableDoc(
         doc='''
@@ -104,7 +104,7 @@
                 '''
                   The unique thread id of the thread which caused a wakeup of
                   this thread.
-                '''
+                ''',
         }))
 
 THREAD_STATE_TABLE = Table(
@@ -120,6 +120,7 @@
         C('io_wait', CppOptional(CppUint32())),
         C('blocked_function', CppOptional(CppString())),
         C('waker_utid', CppOptional(CppUint32())),
+        C('waker_id', CppOptional(CppSelfTableId())),
         C('irq_context', CppOptional(CppUint32())),
     ],
     tabledoc=TableDoc(
@@ -155,6 +156,10 @@
                 '''
                   The unique thread id of the thread which caused a wakeup of
                   this thread.
+                ''',
+            'waker_id':
+                '''
+                  The unique thread state id which caused a wakeup of this thread.
                 '''
         }))
 
diff --git a/src/trace_processor/trace_blob.cc b/src/trace_processor/trace_blob.cc
index a77017b..aebbc11 100644
--- a/src/trace_processor/trace_blob.cc
+++ b/src/trace_processor/trace_blob.cc
@@ -19,16 +19,20 @@
 #include <stdlib.h>
 #include <string.h>
 
+#if PERFETTO_BUILDFLAG(PERFETTO_OS_LINUX) ||   \
+    PERFETTO_BUILDFLAG(PERFETTO_OS_ANDROID) || \
+    PERFETTO_BUILDFLAG(PERFETTO_OS_APPLE)
+#include <sys/mman.h>
+#endif
+
 #include <algorithm>
 
 #include "perfetto/base/compiler.h"
 #include "perfetto/base/logging.h"
+#include "perfetto/ext/base/scoped_mmap.h"
 #include "perfetto/ext/base/utils.h"
 #include "perfetto/trace_processor/basic_types.h"
-
-#if TRACE_PROCESSOR_HAS_MMAP()
-#include <sys/mman.h>
-#endif
+#include "perfetto/trace_processor/ref_counted.h"
 
 namespace perfetto {
 namespace trace_processor {
@@ -56,10 +60,24 @@
 }
 
 // static
+TraceBlob TraceBlob::FromMmap(base::ScopedMmap mapped) {
+  PERFETTO_CHECK(mapped.IsValid());
+  TraceBlob blob(Ownership::kNullOrMmaped, static_cast<uint8_t*>(mapped.data()),
+                 mapped.length());
+  blob.mapping_ = std::make_unique<base::ScopedMmap>(std::move(mapped));
+  return blob;
+}
+
+// static
 TraceBlob TraceBlob::FromMmap(void* data, size_t size) {
-#if TRACE_PROCESSOR_HAS_MMAP()
-  PERFETTO_CHECK(data && data != MAP_FAILED);
-  return TraceBlob(Ownership::kMmaped, static_cast<uint8_t*>(data), size);
+#if PERFETTO_BUILDFLAG(PERFETTO_OS_LINUX) ||   \
+    PERFETTO_BUILDFLAG(PERFETTO_OS_ANDROID) || \
+    PERFETTO_BUILDFLAG(PERFETTO_OS_APPLE)
+  PERFETTO_CHECK(data);
+  TraceBlob blob(Ownership::kNullOrMmaped, static_cast<uint8_t*>(data), size);
+  blob.mapping_ = std::make_unique<base::ScopedMmap>(
+      base::ScopedMmap::InheritMmappedRange(data, size));
+  return blob;
 #else
   base::ignore_result(data);
   base::ignore_result(size);
@@ -67,42 +85,47 @@
 #endif
 }
 
+TraceBlob::TraceBlob(Ownership ownership, uint8_t* data, size_t size)
+    : ownership_(ownership), data_(data), size_(size) {}
+
 TraceBlob::~TraceBlob() {
   switch (ownership_) {
     case Ownership::kHeapBuf:
       delete[] data_;
       break;
 
-    case Ownership::kMmaped:
-#if TRACE_PROCESSOR_HAS_MMAP()
-      PERFETTO_CHECK(munmap(data_, size_) == 0);
-#else
-      PERFETTO_FATAL("mmap not supported");
-#endif
-      break;
-
-    case Ownership::kNull:
-      // Nothing to do.
+    case Ownership::kNullOrMmaped:
+      if (mapping_) {
+        PERFETTO_CHECK(mapping_->reset());
+      }
       break;
   }
   data_ = nullptr;
   size_ = 0;
 }
 
-TraceBlob& TraceBlob::operator=(TraceBlob&& other) noexcept {
-  if (this == &other)
-    return *this;
-  static_assert(sizeof(*this) == base::AlignUp<sizeof(void*)>(
-                                     sizeof(data_) + sizeof(size_) +
-                                     sizeof(ownership_) + sizeof(RefCounted)),
-                "TraceBlob move operator needs updating");
+TraceBlob::TraceBlob(TraceBlob&& other) noexcept
+    : RefCounted(std::move(other)) {
+  static_assert(
+      sizeof(*this) == base::AlignUp<sizeof(void*)>(
+                           sizeof(data_) + sizeof(size_) + sizeof(ownership_) +
+                           sizeof(mapping_) + sizeof(RefCounted)),
+      "TraceBlob move constructor needs updating");
   data_ = other.data_;
   size_ = other.size_;
   ownership_ = other.ownership_;
+  mapping_ = std::move(other.mapping_);
   other.data_ = nullptr;
   other.size_ = 0;
-  other.ownership_ = Ownership::kNull;
-  RefCounted::operator=(std::move(other));
+  other.ownership_ = Ownership::kNullOrMmaped;
+  other.mapping_ = nullptr;
+}
+
+TraceBlob& TraceBlob::operator=(TraceBlob&& other) noexcept {
+  if (this == &other)
+    return *this;
+  this->~TraceBlob();
+  new (this) TraceBlob(std::move(other));
   return *this;
 }
 
diff --git a/src/trace_processor/util/stack_traces_util.cc b/src/trace_processor/trace_blob_unittest.cc
similarity index 68%
copy from src/trace_processor/util/stack_traces_util.cc
copy to src/trace_processor/trace_blob_unittest.cc
index a255560..5063445 100644
--- a/src/trace_processor/util/stack_traces_util.cc
+++ b/src/trace_processor/trace_blob_unittest.cc
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2022 The Android Open Source Project
+ * Copyright (C) 2024 The Android Open Source Project
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -14,17 +14,21 @@
  * limitations under the License.
  */
 
-#include "src/trace_processor/util/stack_traces_util.h"
-#include "perfetto/ext/base/string_view.h"
+#include "perfetto/trace_processor/trace_blob.h"
+
+#include "test/gtest_and_gmock.h"
 
 namespace perfetto {
 namespace trace_processor {
-namespace util {
+namespace {
 
-bool IsHexModuleId(base::StringView module) {
-  return module.size() == 33;
+TEST(TraceBlob, MoveAssignment) {
+  TraceBlob b1 = TraceBlob::Allocate(16);
+  TraceBlob b2 = TraceBlob::Allocate(16);
+
+  b1 = std::move(b2);
 }
 
-}  // namespace util
+}  // namespace
 }  // namespace trace_processor
 }  // namespace perfetto
diff --git a/src/trace_processor/trace_processor_context.cc b/src/trace_processor/trace_processor_context.cc
index c7cea38..2ee7b3d 100644
--- a/src/trace_processor/trace_processor_context.cc
+++ b/src/trace_processor/trace_processor_context.cc
@@ -27,8 +27,10 @@
 #include "src/trace_processor/importers/common/event_tracker.h"
 #include "src/trace_processor/importers/common/flow_tracker.h"
 #include "src/trace_processor/importers/common/global_args_tracker.h"
+#include "src/trace_processor/importers/common/mapping_tracker.h"
 #include "src/trace_processor/importers/common/metadata_tracker.h"
 #include "src/trace_processor/importers/common/process_tracker.h"
+#include "src/trace_processor/importers/common/sched_event_tracker.h"
 #include "src/trace_processor/importers/common/slice_tracker.h"
 #include "src/trace_processor/importers/common/slice_translation_table.h"
 #include "src/trace_processor/importers/common/stack_profile_tracker.h"
diff --git a/src/trace_processor/trace_processor_shell.cc b/src/trace_processor/trace_processor_shell.cc
index 9ad4635..692b105 100644
--- a/src/trace_processor/trace_processor_shell.cc
+++ b/src/trace_processor/trace_processor_shell.cc
@@ -1672,7 +1672,7 @@
     RETURN_IF_ERROR(ExportTraceToDatabase(options.sqlite_file_path));
   }
 
-  if (options.enable_httpd || options.enable_stdiod) {
+  if (options.enable_httpd) {
 #if PERFETTO_HAS_SIGNAL_H()
     if (options.metatrace_path.empty()) {
       // Restore the default signal handler to allow the user to terminate
@@ -1688,18 +1688,16 @@
     }
 #endif
 
-    if (options.enable_httpd) {
 #if PERFETTO_BUILDFLAG(PERFETTO_TP_HTTPD)
-      RunHttpRPCServer(std::move(tp), options.port_number);
-      PERFETTO_FATAL("Should never return");
+    RunHttpRPCServer(std::move(tp), options.port_number);
+    PERFETTO_FATAL("Should never return");
 #else
-      PERFETTO_FATAL("HTTP not available");
+    PERFETTO_FATAL("HTTP not available");
 #endif
-    }
+  }
 
-    if (options.enable_stdiod) {
-      return RunStdioRpcServer(std::move(tp));
-    }
+  if (options.enable_stdiod) {
+    return RunStdioRpcServer(std::move(tp));
   }
 
   if (options.launch_shell) {
diff --git a/src/trace_processor/trace_processor_storage_impl.cc b/src/trace_processor/trace_processor_storage_impl.cc
index 05b88c0..b0c7a15 100644
--- a/src/trace_processor/trace_processor_storage_impl.cc
+++ b/src/trace_processor/trace_processor_storage_impl.cc
@@ -26,8 +26,10 @@
 #include "src/trace_processor/importers/common/clock_tracker.h"
 #include "src/trace_processor/importers/common/event_tracker.h"
 #include "src/trace_processor/importers/common/flow_tracker.h"
+#include "src/trace_processor/importers/common/mapping_tracker.h"
 #include "src/trace_processor/importers/common/metadata_tracker.h"
 #include "src/trace_processor/importers/common/process_tracker.h"
+#include "src/trace_processor/importers/common/sched_event_tracker.h"
 #include "src/trace_processor/importers/common/slice_tracker.h"
 #include "src/trace_processor/importers/common/slice_translation_table.h"
 #include "src/trace_processor/importers/common/stack_profile_tracker.h"
@@ -59,9 +61,11 @@
       new SliceTranslationTable(context_.storage.get()));
   context_.flow_tracker.reset(new FlowTracker(&context_));
   context_.event_tracker.reset(new EventTracker(&context_));
+  context_.sched_event_tracker.reset(new SchedEventTracker(&context_));
   context_.process_tracker.reset(new ProcessTracker(&context_));
   context_.clock_tracker.reset(new ClockTracker(&context_));
   context_.clock_converter.reset(new ClockConverter(&context_));
+  context_.mapping_tracker.reset(new MappingTracker(&context_));
   context_.perf_sample_tracker.reset(new PerfSampleTracker(&context_));
   context_.stack_profile_tracker.reset(new StackProfileTracker(&context_));
   context_.metadata_tracker.reset(new MetadataTracker(context_.storage.get()));
diff --git a/src/trace_processor/types/trace_processor_context.h b/src/trace_processor/types/trace_processor_context.h
index ad4e148..26d1ff5 100644
--- a/src/trace_processor/types/trace_processor_context.h
+++ b/src/trace_processor/types/trace_processor_context.h
@@ -55,11 +55,13 @@
 class StackProfileTracker;
 class HeapGraphTracker;
 class PerfSampleTracker;
+class MappingTracker;
 class MetadataTracker;
 class PacketAnalyzer;
 class ProtoImporterModule;
 class TrackEventModule;
 class ProcessTracker;
+class SchedEventTracker;
 class SliceTracker;
 class SliceTranslationTable;
 class FlowTracker;
@@ -98,8 +100,10 @@
   std::unique_ptr<FlowTracker> flow_tracker;
   std::unique_ptr<ProcessTracker> process_tracker;
   std::unique_ptr<EventTracker> event_tracker;
+  std::unique_ptr<SchedEventTracker> sched_event_tracker;
   std::unique_ptr<ClockTracker> clock_tracker;
   std::unique_ptr<ClockConverter> clock_converter;
+  std::unique_ptr<MappingTracker> mapping_tracker;
   std::unique_ptr<PerfSampleTracker> perf_sample_tracker;
   std::unique_ptr<StackProfileTracker> stack_profile_tracker;
   std::unique_ptr<MetadataTracker> metadata_tracker;
@@ -112,7 +116,6 @@
   std::unique_ptr<Destructible> android_probes_tracker;  // AndroidProbesTracker
   std::unique_ptr<Destructible> binder_tracker;          // BinderTracker
   std::unique_ptr<Destructible> heap_graph_tracker;      // HeapGraphTracker
-  std::unique_ptr<Destructible> sched_tracker;           // SchedEventTracker
   std::unique_ptr<Destructible> syscall_tracker;         // SyscallTracker
   std::unique_ptr<Destructible> system_info_tracker;     // SystemInfoTracker
   std::unique_ptr<Destructible> v4l2_tracker;            // V4l2Tracker
@@ -125,6 +128,8 @@
   std::unique_ptr<Destructible>
       shell_transitions_tracker;             // ShellTransitionsTracker
   std::unique_ptr<Destructible> v8_tracker;  // V8Tracker
+  std::unique_ptr<Destructible>
+      ftrace_sched_tracker;  // FtraceSchedEventTracker
 
   // These fields are trace readers which will be called by |forwarding_parser|
   // once the format of the trace is discovered. They are placed here as they
diff --git a/src/trace_processor/util/BUILD.gn b/src/trace_processor/util/BUILD.gn
index 8420a06..13aefe5 100644
--- a/src/trace_processor/util/BUILD.gn
+++ b/src/trace_processor/util/BUILD.gn
@@ -61,6 +61,17 @@
   }
 }
 
+source_set("build_id") {
+  sources = [
+    "build_id.cc",
+    "build_id.h",
+  ]
+  deps = [
+    "../../../gn:default_deps",
+    "../../../include/perfetto/ext/base:base",
+  ]
+}
+
 source_set("profiler_util") {
   sources = [
     "profiler_util.cc",
@@ -74,18 +85,6 @@
   ]
 }
 
-source_set("stack_traces_util") {
-  sources = [
-    "stack_traces_util.cc",
-    "stack_traces_util.h",
-  ]
-  deps = [
-    "../../../gn:default_deps",
-    "../../../include/perfetto/ext/base:base",
-    "../../../protos/perfetto/trace/profiling:zero",
-  ]
-}
-
 source_set("protozero_to_text") {
   sources = [
     "protozero_to_text.cc",
diff --git a/src/trace_processor/util/build_id.cc b/src/trace_processor/util/build_id.cc
new file mode 100644
index 0000000..20d76dd
--- /dev/null
+++ b/src/trace_processor/util/build_id.cc
@@ -0,0 +1,119 @@
+/*
+ * Copyright (C) 2023 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "src/trace_processor/util/build_id.h"
+
+#include <cctype>
+#include <cstddef>
+#include <string>
+
+#include "perfetto/base/logging.h"
+#include "perfetto/ext/base/string_utils.h"
+#include "perfetto/ext/base/string_view.h"
+
+namespace perfetto {
+namespace trace_processor {
+namespace {
+uint8_t HexToBinary(char c) {
+  switch (c) {
+    case '0':
+      return 0;
+    case '1':
+      return 1;
+    case '2':
+      return 2;
+    case '3':
+      return 3;
+    case '4':
+      return 4;
+    case '5':
+      return 5;
+    case '6':
+      return 6;
+    case '7':
+      return 7;
+    case '8':
+      return 8;
+    case '9':
+      return 9;
+    case 'a':
+    case 'A':
+      return 10;
+    case 'b':
+    case 'B':
+      return 11;
+    case 'c':
+    case 'C':
+      return 12;
+    case 'd':
+    case 'D':
+      return 13;
+    case 'e':
+    case 'E':
+      return 14;
+    case 'f':
+    case 'F':
+      return 15;
+    default:
+      PERFETTO_CHECK(false);
+  }
+}
+
+std::string HexToBinary(base::StringView hex) {
+  std::string res;
+  res.reserve((hex.size() + 1) / 2);
+  auto it = hex.begin();
+
+  if (hex.size() % 2 != 0) {
+    res.push_back(static_cast<char>(HexToBinary(*it)));
+    ++it;
+  }
+
+  while (it != hex.end()) {
+    int v = (HexToBinary(*it++) << 4);
+    v += HexToBinary(*it++);
+    res.push_back(static_cast<char>(v));
+  }
+  return res;
+}
+
+// Returns whether this string is of a hex chrome module or not to decide
+// whether to convert the module to/from hex.
+// TODO(b/148109467): Remove workaround once all active Chrome versions
+// write raw bytes instead of a string as build_id.
+bool IsHexModuleId(base::StringView module) {
+  return module.size() == 33;
+}
+
+}  // namespace
+
+// static
+BuildId BuildId::FromHex(base::StringView data) {
+  if (IsHexModuleId(data)) {
+    return BuildId(data.ToStdString());
+  }
+  return BuildId(HexToBinary(data));
+}
+
+std::string BuildId::ToHex() const {
+  if (IsHexModuleId(base::StringView(raw_))) {
+    return raw_;
+  }
+  return base::ToHex(raw_.data(), raw_.size());
+}
+
+}  // namespace trace_processor
+}  // namespace perfetto
diff --git a/src/trace_processor/util/build_id.h b/src/trace_processor/util/build_id.h
new file mode 100644
index 0000000..64ec9ac
--- /dev/null
+++ b/src/trace_processor/util/build_id.h
@@ -0,0 +1,84 @@
+/*
+ * Copyright (C) 2019 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef SRC_TRACE_PROCESSOR_UTIL_BUILD_ID_H_
+#define SRC_TRACE_PROCESSOR_UTIL_BUILD_ID_H_
+
+#include <string>
+#include <utility>
+
+#include "perfetto/ext/base/hash.h"
+#include "perfetto/ext/base/string_view.h"
+
+namespace perfetto {
+namespace trace_processor {
+
+// Represents the unique identifier of an executable, shared library, or module.
+// For example for ELF files this is the id stored in the .note.gnu.build-id
+// section. Sometimes a breakpad module id is used.
+// This class abstracts away the details of where this id comes from and how it
+// is converted to a StringId which is the representation used by tables in
+// trace_processor.
+class BuildId {
+ public:
+  // Allow hashing with base::Hash.
+  static constexpr bool kHashable = true;
+  size_t size() const { return raw_.size(); }
+  const char* data() const { return raw_.data(); }
+
+  static BuildId FromHex(base::StringView data);
+
+  static BuildId FromRaw(base::StringView data) {
+    return BuildId(data.ToStdString());
+  }
+  static BuildId FromRaw(std::string data) { return BuildId(std::move(data)); }
+  static BuildId FromRaw(const uint8_t* data, size_t size) {
+    return BuildId(std::string(reinterpret_cast<const char*>(data), size));
+  }
+
+  BuildId(const BuildId&) = default;
+  BuildId(BuildId&&) = default;
+
+  BuildId& operator=(const BuildId&) = default;
+  BuildId& operator=(BuildId&&) = default;
+
+  bool operator==(const BuildId& o) const { return raw_ == o.raw_; }
+
+  bool operator!=(const BuildId& o) const { return !(*this == o); }
+
+  bool operator<(const BuildId& o) const { return raw_ < o.raw_; }
+
+  std::string ToHex() const;
+
+  const std::string& raw() const { return raw_; }
+
+ private:
+  explicit BuildId(std::string data) : raw_(std::move(data)) {}
+  std::string raw_;
+};
+
+}  // namespace trace_processor
+}  // namespace perfetto
+
+template <>
+struct std::hash<perfetto::trace_processor::BuildId> {
+  std::size_t operator()(
+      const perfetto::trace_processor::BuildId& o) const noexcept {
+    return perfetto::base::Hasher::Combine(o);
+  }
+};
+
+#endif  // SRC_TRACE_PROCESSOR_UTIL_BUILD_ID_H_
diff --git a/src/trace_processor/util/glob.cc b/src/trace_processor/util/glob.cc
index f930683..3f44f36 100644
--- a/src/trace_processor/util/glob.cc
+++ b/src/trace_processor/util/glob.cc
@@ -78,7 +78,7 @@
   trailing_star_ = !pattern.empty() && empty_segment;
 }
 
-bool GlobMatcher::Matches(base::StringView in) {
+bool GlobMatcher::Matches(base::StringView in) const {
   // If there are no segments, that means the pattern is either '' or '*'
   // (or '**', '***' etc which is really the same as '*'). This means
   // we are match if either a) there is a leading star (== trailing star) or
@@ -115,10 +115,10 @@
   // sequentially with possibly some characters separating them. To handle
   // this, we just need to iteratively find each segment, starting from the
   // previous segment.
-  Segment* segment_start = segments_.begin() + (leading_star_ ? 0 : 1);
-  Segment* segment_end = segments_.end() - (trailing_star_ ? 0 : 1);
+  const Segment* segment_start = segments_.begin() + (leading_star_ ? 0 : 1);
+  const Segment* segment_end = segments_.end() - (trailing_star_ ? 0 : 1);
   size_t find_idx = leading_star_ ? 0 : segments_.front().matched_chars;
-  for (Segment* segment = segment_start; segment < segment_end; ++segment) {
+  for (const auto* segment = segment_start; segment < segment_end; ++segment) {
     size_t pos = Find(in, *segment, find_idx);
     if (pos == base::StringView::npos) {
       return false;
@@ -131,7 +131,8 @@
   return true;
 }
 
-bool GlobMatcher::StartsWithSlow(base::StringView in, const Segment& segment) {
+bool GlobMatcher::StartsWithSlow(base::StringView in,
+                                 const Segment& segment) const {
   base::StringView pattern = segment.pattern;
   for (uint32_t i = 0, p = 0; p < pattern.size(); ++i, ++p) {
     // We've run out of characters to consume in the input but still have more
diff --git a/src/trace_processor/util/glob.h b/src/trace_processor/util/glob.h
index b5bce6e..518cca8 100644
--- a/src/trace_processor/util/glob.h
+++ b/src/trace_processor/util/glob.h
@@ -80,7 +80,7 @@
 
   // Checks the provided string against the pattern and returns whether it
   // matches.
-  bool Matches(base::StringView input);
+  bool Matches(base::StringView input) const;
 
   // Returns whether the comparison should really be an equality comparison.
   bool IsEquality() {
@@ -110,7 +110,7 @@
 
   // Returns whether |input| starts with the pattern in |segment| following
   // glob matching rules.
-  bool StartsWith(base::StringView input, const Segment& segment) {
+  bool StartsWith(base::StringView input, const Segment& segment) const {
     if (!contains_char_class_or_question_) {
       return input.StartsWith(segment.pattern);
     }
@@ -119,7 +119,7 @@
 
   // Returns whether |input| ends with the pattern in |segment| following
   // glob matching rules.
-  bool EndsWith(base::StringView input, const Segment& segment) {
+  bool EndsWith(base::StringView input, const Segment& segment) const {
     if (!contains_char_class_or_question_) {
       return input.EndsWith(segment.pattern);
     }
@@ -131,7 +131,9 @@
   // Returns the index where |input| matches the pattern in |segment|
   // following glob matching rules or base::StringView::npos, if no such index
   // exists.
-  size_t Find(base::StringView input, const Segment& segment, size_t start) {
+  size_t Find(base::StringView input,
+              const Segment& segment,
+              size_t start) const {
     if (!contains_char_class_or_question_) {
       return input.find(segment.pattern, start);
     }
@@ -151,7 +153,7 @@
   // Matches |in| against the given character class.
   static bool MatchesCharacterClass(char input, base::StringView char_class);
 
-  bool StartsWithSlow(base::StringView input, const Segment& segment);
+  bool StartsWithSlow(base::StringView input, const Segment& segment) const;
 
   // IMPORTANT: this should *not* be modified after the constructor as we store
   // pointers to the data inside here.
diff --git a/src/trace_processor/util/stack_traces_util.h b/src/trace_processor/util/stack_traces_util.h
deleted file mode 100644
index 1171786..0000000
--- a/src/trace_processor/util/stack_traces_util.h
+++ /dev/null
@@ -1,36 +0,0 @@
-/*
- * Copyright (C) 2022 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef SRC_TRACE_PROCESSOR_UTIL_STACK_TRACES_UTIL_H_
-#define SRC_TRACE_PROCESSOR_UTIL_STACK_TRACES_UTIL_H_
-
-#include "perfetto/ext/base/string_view.h"
-
-namespace perfetto {
-namespace trace_processor {
-namespace util {
-
-// Returns whether this string is of a hex chrome module or not to decide
-// whether to convert the module to/from hex.
-// TODO(b/148109467): Remove workaround once all active Chrome versions
-// write raw bytes instead of a string as build_id.
-bool IsHexModuleId(base::StringView module);
-
-}  // namespace util
-}  // namespace trace_processor
-}  // namespace perfetto
-
-#endif  // SRC_TRACE_PROCESSOR_UTIL_STACK_TRACES_UTIL_H_
diff --git a/src/trace_redaction/BUILD.gn b/src/trace_redaction/BUILD.gn
index cff9588..f65c588 100644
--- a/src/trace_redaction/BUILD.gn
+++ b/src/trace_redaction/BUILD.gn
@@ -14,6 +14,18 @@
 
 import("../../gn/test.gni")
 
+# The main entry point for external processes. This is separate from
+# "trace_redaction" because tests also depend on "trace_redaction".
+executable("trace_redactor") {
+  sources = [ "main.cc" ]
+  deps = [
+    ":trace_redaction",
+    "../../gn:default_deps",
+    "../../include/perfetto/base",
+    "../../include/perfetto/ext/base",
+  ]
+}
+
 source_set("trace_redaction") {
   sources = [
     "find_package_uid.cc",
diff --git a/src/trace_redaction/find_package_uid.cc b/src/trace_redaction/find_package_uid.cc
index d069fe2..0c46ff5 100644
--- a/src/trace_redaction/find_package_uid.cc
+++ b/src/trace_redaction/find_package_uid.cc
@@ -17,7 +17,9 @@
 #include "src/trace_redaction/find_package_uid.h"
 
 #include "perfetto/ext/base/status_or.h"
-#include "perfetto/ext/base/string_utils.h"
+#include "perfetto/ext/base/string_view.h"
+#include "src/trace_redaction/trace_redaction_framework.h"
+
 #include "protos/perfetto/trace/android/packages_list.pbzero.h"
 #include "protos/perfetto/trace/trace_packet.pbzero.h"
 
diff --git a/src/trace_redaction/find_package_uid.h b/src/trace_redaction/find_package_uid.h
index 4ae34d4..d43b2af 100644
--- a/src/trace_redaction/find_package_uid.h
+++ b/src/trace_redaction/find_package_uid.h
@@ -18,9 +18,10 @@
 #define SRC_TRACE_REDACTION_FIND_PACKAGE_UID_H_
 
 #include "perfetto/ext/base/status_or.h"
-#include "protos/perfetto/trace/trace_packet.pbzero.h"
 #include "src/trace_redaction/trace_redaction_framework.h"
 
+#include "protos/perfetto/trace/trace_packet.pbzero.h"
+
 namespace perfetto::trace_redaction {
 
 // Writes the uid for the package matching `Context.package_name`. Returns
diff --git a/src/trace_redaction/find_package_uid_unittest.cc b/src/trace_redaction/find_package_uid_unittest.cc
index 44f0a45..5f390ab 100644
--- a/src/trace_redaction/find_package_uid_unittest.cc
+++ b/src/trace_redaction/find_package_uid_unittest.cc
@@ -15,18 +15,18 @@
  * limitations under the License.
  */
 
-#include "find_package_uid.h"
+#include "src/trace_redaction/find_package_uid.h"
 
 #include <cstdint>
 #include <string>
 
-#include "protos/perfetto/trace/android/packages_list.gen.h"
-#include "protos/perfetto/trace/ps/process_tree.gen.h"
-#include "protos/perfetto/trace/ps/process_tree.pbzero.h"
-#include "protos/perfetto/trace/trace_packet.gen.h"
 #include "src/base/test/status_matchers.h"
 #include "test/gtest_and_gmock.h"
 
+#include "protos/perfetto/trace/android/packages_list.gen.h"
+#include "protos/perfetto/trace/ps/process_tree.gen.h"
+#include "protos/perfetto/trace/trace_packet.gen.h"
+
 namespace perfetto::trace_redaction {
 
 namespace {
diff --git a/src/trace_redaction/main.cc b/src/trace_redaction/main.cc
new file mode 100644
index 0000000..24ad116
--- /dev/null
+++ b/src/trace_redaction/main.cc
@@ -0,0 +1,68 @@
+/*
+ * Copyright (C) 2024 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "perfetto/base/logging.h"
+#include "perfetto/base/status.h"
+#include "src/trace_redaction/find_package_uid.h"
+#include "src/trace_redaction/prune_package_list.h"
+#include "src/trace_redaction/trace_redaction_framework.h"
+#include "src/trace_redaction/trace_redactor.h"
+
+namespace perfetto::trace_redaction {
+
+// Builds and runs a trace redactor.
+static base::Status Main(std::string_view input,
+                         std::string_view output,
+                         std::string_view package_name) {
+  TraceRedactor redactor;
+
+  // Add all collectors.
+  redactor.collectors()->emplace_back(new FindPackageUid());
+
+  // TODO(vaage): Add all builders.
+
+  // Add all transforms.
+  redactor.transformers()->emplace_back(new PrunePackageList());
+
+  Context context;
+  context.package_name = package_name;
+
+  return redactor.Redact(input, output, &context);
+}
+
+}  // namespace perfetto::trace_redaction
+
+int main(int argc, char** argv) {
+  constexpr int kSuccess = 0;
+  constexpr int kFailure = 1;
+  constexpr int kInvalidArgs = 2;
+
+  if (argc != 4) {
+    PERFETTO_ELOG(
+        "Invalid arguments: %s <input file> <output file> <package name>",
+        argv[0]);
+    return kInvalidArgs;
+  }
+
+  auto result = perfetto::trace_redaction::Main(argv[1], argv[2], argv[3]);
+
+  if (result.ok()) {
+    return kSuccess;
+  }
+
+  PERFETTO_ELOG("Unexpected error: %s", result.c_message());
+  return kFailure;
+}
diff --git a/src/trace_redaction/populate_allow_lists.cc b/src/trace_redaction/populate_allow_lists.cc
index b14b1f6..1ccaa00 100644
--- a/src/trace_redaction/populate_allow_lists.cc
+++ b/src/trace_redaction/populate_allow_lists.cc
@@ -44,6 +44,10 @@
       protos::pbzero::TracePacket::kAndroidSystemPropertyFieldNumber,
       protos::pbzero::TracePacket::kSynchronizationMarkerFieldNumber,
       protos::pbzero::TracePacket::kFtraceEventsFieldNumber,
+
+      // Keep the package list. There are some metrics and stdlib queries that
+      // depend on the package list.
+      protos::pbzero::TracePacket::kPackagesListFieldNumber,
   };
 
   return base::OkStatus();
diff --git a/src/trace_redaction/prune_package_list.cc b/src/trace_redaction/prune_package_list.cc
index 52c1394..83d2355 100644
--- a/src/trace_redaction/prune_package_list.cc
+++ b/src/trace_redaction/prune_package_list.cc
@@ -16,7 +16,12 @@
 
 #include "src/trace_redaction/prune_package_list.h"
 
+#include <string>
+
+#include "perfetto/base/status.h"
+
 #include "protos/perfetto/trace/android/packages_list.gen.h"
+#include "protos/perfetto/trace/trace_packet.gen.h"
 
 namespace perfetto::trace_redaction {
 
diff --git a/src/trace_redaction/prune_package_list.h b/src/trace_redaction/prune_package_list.h
index cf1298f..24d9ec2 100644
--- a/src/trace_redaction/prune_package_list.h
+++ b/src/trace_redaction/prune_package_list.h
@@ -19,6 +19,7 @@
 
 #include <string>
 
+#include "perfetto/base/status.h"
 #include "src/trace_redaction/trace_redaction_framework.h"
 
 namespace perfetto::trace_redaction {
diff --git a/src/trace_redaction/prune_package_list_unittest.cc b/src/trace_redaction/prune_package_list_unittest.cc
index 6a9af6c..0f78cba 100644
--- a/src/trace_redaction/prune_package_list_unittest.cc
+++ b/src/trace_redaction/prune_package_list_unittest.cc
@@ -16,13 +16,15 @@
  */
 
 #include <cstdint>
+#include <memory>
 #include <string>
 
+#include "src/trace_redaction/prune_package_list.h"
+#include "test/gtest_and_gmock.h"
+
 #include "protos/perfetto/trace/android/packages_list.gen.h"
 #include "protos/perfetto/trace/ps/process_tree.gen.h"
 #include "protos/perfetto/trace/trace_packet.gen.h"
-#include "src/trace_redaction/prune_package_list.h"
-#include "test/gtest_and_gmock.h"
 
 namespace perfetto::trace_redaction {
 
diff --git a/src/trace_redaction/scrub_trace_packet.cc b/src/trace_redaction/scrub_trace_packet.cc
index e4f03cf..e56acf9 100644
--- a/src/trace_redaction/scrub_trace_packet.cc
+++ b/src/trace_redaction/scrub_trace_packet.cc
@@ -14,8 +14,12 @@
  * limitations under the License.
  */
 
+#include <string>
+
 #include "src/trace_redaction/scrub_trace_packet.h"
 
+#include "perfetto/base/status.h"
+
 namespace perfetto::trace_redaction {
 // The TracePacket message has a simple structure. At its core its one sub
 // message (e.g. ProcessTree) and some additional context (e.g. timestamp).
diff --git a/src/trace_redaction/scrub_trace_packet.h b/src/trace_redaction/scrub_trace_packet.h
index 06710d3..fbf89ca 100644
--- a/src/trace_redaction/scrub_trace_packet.h
+++ b/src/trace_redaction/scrub_trace_packet.h
@@ -19,8 +19,6 @@
 
 #include "src/trace_redaction/trace_redaction_framework.h"
 
-#include "protos/perfetto/trace/trace_packet.pbzero.h"
-
 namespace perfetto::trace_redaction {
 
 // Drops whole trace packets based on an allow-list (e.g. retain ProcessTree
diff --git a/src/trace_redaction/scrub_trace_packet_unittest.cc b/src/trace_redaction/scrub_trace_packet_unittest.cc
index 82b99d4..f203165 100644
--- a/src/trace_redaction/scrub_trace_packet_unittest.cc
+++ b/src/trace_redaction/scrub_trace_packet_unittest.cc
@@ -21,6 +21,7 @@
 #include "test/gtest_and_gmock.h"
 
 #include "protos/perfetto/trace/ps/process_tree.gen.h"
+#include "protos/perfetto/trace/trace_packet.gen.h"
 #include "protos/perfetto/trace/trace_packet.pbzero.h"
 
 namespace perfetto::trace_redaction {
diff --git a/src/trace_redaction/trace_redaction_framework.h b/src/trace_redaction/trace_redaction_framework.h
index b481961..76b8594 100644
--- a/src/trace_redaction/trace_redaction_framework.h
+++ b/src/trace_redaction/trace_redaction_framework.h
@@ -20,12 +20,11 @@
 #include <cstdint>
 #include <optional>
 #include <string>
-#include <vector>
 
 #include "perfetto/base/flat_set.h"
+#include "perfetto/base/status.h"
 #include "perfetto/ext/base/status_or.h"
 
-#include "protos/perfetto/trace/trace_packet.gen.h"
 #include "protos/perfetto/trace/trace_packet.pbzero.h"
 
 namespace perfetto::trace_redaction {
diff --git a/src/trace_redaction/trace_redactor.cc b/src/trace_redaction/trace_redactor.cc
index 122d2d9..94168e6 100644
--- a/src/trace_redaction/trace_redactor.cc
+++ b/src/trace_redaction/trace_redactor.cc
@@ -16,10 +16,7 @@
 
 #include "src/trace_redaction/trace_redactor.h"
 
-#include <sys/mman.h>
-#include <unistd.h>
 #include <cstddef>
-#include <cstdio>
 #include <string>
 #include <string_view>
 #include <vector>
@@ -27,6 +24,7 @@
 #include "perfetto/base/status.h"
 #include "perfetto/ext/base/file_utils.h"
 #include "perfetto/ext/base/scoped_file.h"
+#include "perfetto/ext/base/scoped_mmap.h"
 #include "perfetto/protozero/scattered_heap_buffer.h"
 #include "perfetto/trace_processor/trace_blob.h"
 #include "perfetto/trace_processor/trace_blob_view.h"
@@ -35,33 +33,10 @@
 #include "protos/perfetto/trace/trace.pbzero.h"
 
 namespace perfetto::trace_redaction {
-namespace {
 
 using Trace = protos::pbzero::Trace;
 using TracePacket = protos::pbzero::TracePacket;
 
-// Basic scoped objects (similar to base::ScopedResource) to make sure page
-// resources are cleaned-up.
-struct MappedFile {
-  void* start_address;
-  size_t size;
-
-  MappedFile() : start_address(MAP_FAILED), size(0) {}
-
-  ~MappedFile() {
-    if (start_address != MAP_FAILED) {
-      munmap(start_address, size);
-    }
-  }
-
-  MappedFile(const MappedFile&) = delete;
-  MappedFile& operator=(const MappedFile&) = delete;
-  MappedFile(MappedFile&&) = delete;
-  MappedFile& operator=(MappedFile&&) = delete;
-};
-
-}  // namespace
-
 TraceRedactor::TraceRedactor() = default;
 
 TraceRedactor::~TraceRedactor() = default;
@@ -70,35 +45,15 @@
                                    std::string_view dest_filename,
                                    Context* context) const {
   const std::string source_filename_str(source_filename);
-
-  base::ScopedFile file_handle(base::OpenFile(source_filename_str, O_RDONLY));
-
-  if (!file_handle) {
-    return base::ErrStatus("Failed to read trace from disk: %s",
+  base::ScopedMmap mapped =
+      base::ReadMmapWholeFile(source_filename_str.c_str());
+  if (!mapped.IsValid()) {
+    return base::ErrStatus("Failed to map pages for trace (%s)",
                            source_filename_str.c_str());
   }
 
-  auto file_size_offset = lseek(*file_handle, 0, SEEK_END);
-
-  if (file_size_offset <= 0) {
-    return base::ErrStatus("Could not determine trace size (%s)",
-                           source_filename_str.c_str());
-  }
-
-  lseek(*file_handle, 0, SEEK_SET);
-
-  MappedFile page;
-  page.size = static_cast<size_t>(file_size_offset);
-  page.start_address =
-      mmap(nullptr, page.size, PROT_READ, MAP_PRIVATE, *file_handle, 0);
-
-  if (page.start_address == MAP_FAILED) {
-    return base::ErrStatus("Failed to map pages for trace (%zu bytes)",
-                           page.size);
-  }
-
   trace_processor::TraceBlobView whole_view(
-      trace_processor::TraceBlob::FromMmap(page.start_address, page.size));
+      trace_processor::TraceBlob::FromMmap(std::move(mapped)));
 
   if (auto status = Collect(context, whole_view); !status.ok()) {
     return status;
diff --git a/src/trace_redaction/trace_redactor.h b/src/trace_redaction/trace_redactor.h
index 99a8832..82ec371 100644
--- a/src/trace_redaction/trace_redactor.h
+++ b/src/trace_redaction/trace_redactor.h
@@ -17,8 +17,12 @@
 #ifndef SRC_TRACE_REDACTION_TRACE_REDACTOR_H_
 #define SRC_TRACE_REDACTION_TRACE_REDACTOR_H_
 
+#include <memory>
+#include <string>
 #include <string_view>
+#include <vector>
 
+#include "perfetto/base/status.h"
 #include "perfetto/trace_processor/trace_blob_view.h"
 #include "src/trace_redaction/trace_redaction_framework.h"
 
diff --git a/src/trace_redaction/trace_redactor_integrationtest.cc b/src/trace_redaction/trace_redactor_integrationtest.cc
index 8e10095..50074b1 100644
--- a/src/trace_redaction/trace_redactor_integrationtest.cc
+++ b/src/trace_redaction/trace_redactor_integrationtest.cc
@@ -14,11 +14,20 @@
  * limitations under the License.
  */
 
+#include <cstdint>
+#include <memory>
+#include <string>
+#include <string_view>
+#include <vector>
+
+#include "perfetto/base/status.h"
 #include "perfetto/ext/base/file_utils.h"
 #include "perfetto/ext/base/temp_file.h"
+#include "src/base/test/tmp_dir_tree.h"
 #include "src/base/test/utils.h"
 #include "src/trace_redaction/find_package_uid.h"
 #include "src/trace_redaction/prune_package_list.h"
+#include "src/trace_redaction/trace_redaction_framework.h"
 #include "src/trace_redaction/trace_redactor.h"
 #include "test/gtest_and_gmock.h"
 
@@ -36,9 +45,6 @@
 constexpr std::string_view kTracePath =
     "test/data/trace-redaction-general.pftrace";
 
-constexpr std::string_view kPackageName =
-    "com.Unity.com.unity.multiplayer.samples.coop";
-
 constexpr uint64_t kPackageUid = 10252;
 
 class TraceRedactorIntegrationTest : public testing::Test {
@@ -49,13 +55,10 @@
  protected:
   void SetUp() override {
     src_trace_ = base::GetTestDataPath(std::string(kTracePath));
-    dest_trace_ = std::make_unique<base::TempFile>(base::TempFile::Create());
   }
 
   const std::string& src_trace() const { return src_trace_; }
 
-  const std::string& dest_trace() const { return dest_trace_->path(); }
-
   std::vector<protozero::ConstBytes> GetPackageInfos(
       const Trace::Decoder& trace) const {
     std::vector<protozero::ConstBytes> infos;
@@ -74,9 +77,8 @@
     return infos;
   }
 
- private:
   std::string src_trace_;
-  std::unique_ptr<base::TempFile> dest_trace_;
+  base::TmpDirTree tmp_dir_;
 };
 
 TEST_F(TraceRedactorIntegrationTest, FindsPackageAndFiltersPackageList) {
@@ -85,39 +87,91 @@
   redaction.transformers()->emplace_back(new PrunePackageList());
 
   Context context;
-  context.package_name = kPackageName;
+  context.package_name = "com.Unity.com.unity.multiplayer.samples.coop";
 
-  auto result = redaction.Redact(src_trace(), dest_trace(), &context);
+  auto result = redaction.Redact(
+      src_trace(), tmp_dir_.AbsolutePath("dst.pftrace"), &context);
+  tmp_dir_.TrackFile("dst.pftrace");
 
   ASSERT_TRUE(result.ok()) << result.message();
 
   std::string redacted_buffer;
-  ASSERT_TRUE(base::ReadFile(dest_trace(), &redacted_buffer));
+  ASSERT_TRUE(
+      base::ReadFile(tmp_dir_.AbsolutePath("dst.pftrace"), &redacted_buffer));
 
   Trace::Decoder redacted_trace(redacted_buffer);
   std::vector<protozero::ConstBytes> infos = GetPackageInfos(redacted_trace);
 
+  ASSERT_TRUE(context.package_uid.has_value());
+  ASSERT_EQ(NormalizeUid(context.package_uid.value()),
+            NormalizeUid(kPackageUid));
+
   // It is possible for two packages_list to appear in the trace. The
   // find_package_uid will stop after the first one is found. Package uids are
   // appear as n * 1,000,000 where n is some integer. It is also possible for
   // two packages_list to contain copies of each other - for example
   // "com.Unity.com.unity.multiplayer.samples.coop" appears in both
   // packages_list.
-  ASSERT_GE(infos.size(), 1u);
+  ASSERT_EQ(infos.size(), 2u);
 
-  for (const auto& info_buffer : infos) {
-    PackageInfo::Decoder info(info_buffer);
+  std::array<PackageInfo::Decoder, 2> decoders = {
+      PackageInfo::Decoder(infos[0]), PackageInfo::Decoder(infos[1])};
 
+  for (auto& decoder : decoders) {
+    ASSERT_TRUE(decoder.has_name());
+    ASSERT_EQ(decoder.name().ToStdString(),
+              "com.Unity.com.unity.multiplayer.samples.coop");
+
+    ASSERT_TRUE(decoder.has_uid());
+    ASSERT_EQ(NormalizeUid(decoder.uid()), NormalizeUid(kPackageUid));
+  }
+}
+
+// It is possible for multiple packages to share a uid. The names will appears
+// across multiple package lists. The only time the package name appears is in
+// the package list, so there is no way to differentiate these packages (only
+// the uid is used later), so each entry should remain.
+TEST_F(TraceRedactorIntegrationTest, RetainsAllInstancesOfUid) {
+  TraceRedactor redaction;
+  redaction.collectors()->emplace_back(new FindPackageUid());
+  redaction.transformers()->emplace_back(new PrunePackageList());
+
+  Context context;
+  context.package_name = "com.google.android.networkstack.tethering";
+
+  auto result = redaction.Redact(
+      src_trace(), tmp_dir_.AbsolutePath("dst.pftrace"), &context);
+  tmp_dir_.TrackFile("dst.pftrace");
+  ASSERT_TRUE(result.ok()) << result.message();
+
+  std::string redacted_buffer;
+  ASSERT_TRUE(
+      base::ReadFile(tmp_dir_.AbsolutePath("dst.pftrace"), &redacted_buffer));
+
+  Trace::Decoder redacted_trace(redacted_buffer);
+  std::vector<protozero::ConstBytes> infos = GetPackageInfos(redacted_trace);
+
+  ASSERT_EQ(infos.size(), 8u);
+
+  std::array<std::string, 8> package_names;
+
+  for (size_t i = 0; i < infos.size(); ++i) {
+    PackageInfo::Decoder info(infos[i]);
     ASSERT_TRUE(info.has_name());
-    ASSERT_EQ(info.name().ToStdString(), kPackageName);
-
-    ASSERT_TRUE(info.has_uid());
-    ASSERT_EQ(NormalizeUid(info.uid()), NormalizeUid(kPackageUid));
+    package_names[i] = info.name().ToStdString();
   }
 
-  ASSERT_TRUE(context.package_uid.has_value());
-  ASSERT_EQ(NormalizeUid(context.package_uid.value()),
-            NormalizeUid(kPackageUid));
+  std::sort(package_names.begin(), package_names.end());
+  ASSERT_EQ(package_names[0], "com.google.android.cellbroadcastservice");
+  ASSERT_EQ(package_names[1], "com.google.android.cellbroadcastservice");
+  ASSERT_EQ(package_names[2], "com.google.android.networkstack");
+  ASSERT_EQ(package_names[3], "com.google.android.networkstack");
+  ASSERT_EQ(package_names[4],
+            "com.google.android.networkstack.permissionconfig");
+  ASSERT_EQ(package_names[5],
+            "com.google.android.networkstack.permissionconfig");
+  ASSERT_EQ(package_names[6], "com.google.android.networkstack.tethering");
+  ASSERT_EQ(package_names[7], "com.google.android.networkstack.tethering");
 }
 
 }  // namespace
diff --git a/src/traced/probes/ftrace/ftrace_config_muxer.cc b/src/traced/probes/ftrace/ftrace_config_muxer.cc
index 6a55f8c..7fcfef1 100644
--- a/src/traced/probes/ftrace/ftrace_config_muxer.cc
+++ b/src/traced/probes/ftrace/ftrace_config_muxer.cc
@@ -460,6 +460,12 @@
     }
   }
 
+  // recording a subset of syscalls -> enable the backing events
+  if (request.syscall_events_size() > 0) {
+    InsertEvent("raw_syscalls", "sys_enter", &events);
+    InsertEvent("raw_syscalls", "sys_exit", &events);
+  }
+
   // function_graph tracer emits two builtin ftrace events
   if (request.enable_function_graph()) {
     InsertEvent("ftrace", "funcgraph_entry", &events);
diff --git a/src/traced/probes/ps/process_stats_data_source.cc b/src/traced/probes/ps/process_stats_data_source.cc
index c157b2c..9a27d29 100644
--- a/src/traced/probes/ps/process_stats_data_source.cc
+++ b/src/traced/probes/ps/process_stats_data_source.cc
@@ -21,11 +21,11 @@
 
 #include <algorithm>
 #include <array>
+#include <optional>
 
 #include "perfetto/base/task_runner.h"
 #include "perfetto/base/time.h"
 #include "perfetto/ext/base/file_utils.h"
-#include "perfetto/ext/base/hash.h"
 #include "perfetto/ext/base/metatrace.h"
 #include "perfetto/ext/base/scoped_file.h"
 #include "perfetto/ext/base/string_splitter.h"
@@ -105,6 +105,41 @@
   return namespaced;
 }
 
+struct ProcessRuntimes {
+  uint64_t utime = 0;
+  uint64_t stime = 0;
+  uint64_t starttime = 0;
+};
+
+std::optional<ProcessRuntimes> ParseProcessRuntimes(
+    const std::string& proc_stat) {
+  // /proc/pid/stat fields of interest, counting from 1:
+  //  utime = 14
+  //  stime = 15
+  //  starttime = 22
+  // sscanf format string below is formatted to 10 values per line.
+  // clang-format off
+  ProcessRuntimes ret = {};
+  if (sscanf(proc_stat.c_str(),
+             "%*d %*s %*c %*d %*d %*d %*d %*d %*u %*u "
+             "%*u %*u %*u %" SCNu64 " %" SCNu64 " %*d %*d %*d %*d %*d "
+             "%*d %" SCNu64 "",
+             &ret.utime, &ret.stime, &ret.starttime) != 3) {
+     PERFETTO_DLOG("empty or unexpected /proc/pid/stat contents");
+     return std::nullopt;
+   }
+  // clang-format on
+  int64_t tickrate = sysconf(_SC_CLK_TCK);
+  if (tickrate <= 0)
+    return std::nullopt;
+  uint64_t ns_per_tick = 1'000'000'000ULL / static_cast<uint64_t>(tickrate);
+
+  ret.utime *= ns_per_tick;
+  ret.stime *= ns_per_tick;
+  ret.starttime *= ns_per_tick;
+  return ret;
+}
+
 // Note: conversions intentionally not checking that the full string was
 // numerical as calling code depends on discarding suffixes in cases such as:
 // * "92 kB" -> 92
@@ -141,6 +176,8 @@
   dump_all_procs_on_start_ = cfg.scan_all_processes_on_start();
   resolve_process_fds_ = cfg.resolve_process_fds();
   scan_smaps_rollup_ = cfg.scan_smaps_rollup();
+  record_process_age_ = cfg.record_process_age();
+  record_process_runtime_ = cfg.record_process_runtime();
 
   enable_on_demand_dumps_ = true;
   for (auto quirk = cfg.quirks(); quirk; ++quirk) {
@@ -193,7 +230,9 @@
   base::FlatSet<int32_t> pids;
   while (int32_t pid = ReadNextNumericDir(*proc_dir)) {
     std::string pid_status = ReadProcPidFile(pid, "status");
-    bool namespaced_process = WriteProcess(pid, pid_status);
+    std::string pid_stat =
+        record_process_age_ ? ReadProcPidFile(pid, "stat") : "";
+    bool namespaced_process = WriteProcess(pid, pid_status, pid_stat);
 
     base::StackString<128> task_path("/proc/%d/task", pid);
     base::ScopedDir task_dir(opendir(task_path.c_str()));
@@ -305,7 +344,9 @@
     // We need to read the status file if |pid| is non-main thread.
     const std::string& proc_status_tgid =
         (tgid == tid ? proc_status : ReadProcPidFile(tgid, "status"));
-    WriteProcess(tgid, proc_status_tgid);
+    const std::string& proc_stat =
+        record_process_age_ ? ReadProcPidFile(tgid, "stat") : "";
+    WriteProcess(tgid, proc_status_tgid, proc_stat);
   }
   if (pid != tgid) {
     PERFETTO_DCHECK(!seen_pids_.count(pid));
@@ -315,14 +356,16 @@
 
 // Returns true if the process is within a PID namespace.
 bool ProcessStatsDataSource::WriteProcess(int32_t pid,
-                                          const std::string& proc_status) {
+                                          const std::string& proc_status,
+                                          const std::string& proc_stat) {
   PERFETTO_DCHECK(ToInt32(ProcStatusEntry(proc_status, "Pid:")) == pid);
 
   // pid might've been reused for a non-main thread before our procfs read
   if (PERFETTO_UNLIKELY(pid != ToInt32(ProcStatusEntry(proc_status, "Tgid:"))))
     return false;
 
-  auto* proc = GetOrCreatePsTree()->add_processes();
+  protos::pbzero::ProcessTree::Process* proc =
+      GetOrCreatePsTree()->add_processes();
   proc->set_pid(pid);
   proc->set_ppid(ToInt32(ProcStatusEntry(proc_status, "PPid:")));
   // Uid will have multiple entries, only return first (real uid).
@@ -336,14 +379,22 @@
       // Some kernels can miss the NUL terminator due to a bug. b/147438623.
       cmdline.push_back('\0');
     }
-    for (base::StringSplitter ss(&cmdline[0], cmdline.size(), '\0');
+    for (base::StringSplitter ss(cmdline.data(), cmdline.size(), '\0');
          ss.Next();) {
       proc->add_cmdline(ss.cur_token());
     }
   } else {
     // Nothing in cmdline so use the thread name instead (which is == "comm").
-    proc->add_cmdline(ProcStatusEntry(proc_status, "Name:").c_str());
+    proc->add_cmdline(ProcStatusEntry(proc_status, "Name:"));
   }
+
+  if (record_process_age_ && !proc_stat.empty()) {
+    std::optional<ProcessRuntimes> times = ParseProcessRuntimes(proc_stat);
+    if (times.has_value()) {
+      proc->set_process_start_from_boot(times->starttime);
+    }
+  }
+
   seen_pids_.insert({pid, pid});
   return namespaced;
 }
@@ -371,7 +422,7 @@
 
   if (record_thread_names_) {
     std::string thread_name = ProcStatusEntry(proc_status, "Name:");
-    thread->set_name(thread_name.c_str());
+    thread->set_name(thread_name);
   }
   seen_pids_.insert({tid, tgid});
 }
@@ -484,9 +535,18 @@
   base::FlatSet<int32_t> pids;
   while (int32_t pid = ReadNextNumericDir(*proc_dir)) {
     cur_ps_stats_process_ = nullptr;
-
     uint32_t pid_u = static_cast<uint32_t>(pid);
-    if (skip_stats_for_pids_.size() > pid_u && skip_stats_for_pids_[pid_u])
+
+    // optional /proc/pid/stat fields
+    if (record_process_runtime_) {
+      std::string proc_stat = ReadProcPidFile(pid, "stat");
+      if (WriteProcessRuntimes(pid, proc_stat)) {
+        pids.insert(pid);
+      }
+    }
+
+    // memory counters
+    if (skip_mem_for_pids_.size() > pid_u && skip_mem_for_pids_[pid_u])
       continue;
 
     std::string proc_status = ReadProcPidFile(pid, "status");
@@ -502,9 +562,9 @@
       // If WriteMemCounters() fails the pid is very likely a kernel thread
       // that has a valid /proc/[pid]/status but no memory values. In this
       // case avoid keep polling it over and over.
-      if (skip_stats_for_pids_.size() <= pid_u)
-        skip_stats_for_pids_.resize(pid_u + 1);
-      skip_stats_for_pids_[pid_u] = true;
+      if (skip_mem_for_pids_.size() <= pid_u)
+        skip_mem_for_pids_.resize(pid_u + 1);
+      skip_mem_for_pids_[pid_u] = true;
       continue;
     }
 
@@ -530,6 +590,25 @@
   WriteProcessTree(pids);
 }
 
+bool ProcessStatsDataSource::WriteProcessRuntimes(
+    int32_t pid,
+    const std::string& proc_stat) {
+  std::optional<ProcessRuntimes> times = ParseProcessRuntimes(proc_stat);
+  if (!times.has_value())
+    return false;
+
+  CachedProcessStats& cached = process_stats_cache_[pid];
+  if (times->utime != cached.runtime_user_mode_ns) {
+    GetOrCreateStatsProcess(pid)->set_runtime_user_mode(times->utime);
+    cached.runtime_user_mode_ns = times->utime;
+  }
+  if (times->stime != cached.runtime_kernel_mode_ns) {
+    GetOrCreateStatsProcess(pid)->set_runtime_kernel_mode(times->stime);
+    cached.runtime_kernel_mode_ns = times->stime;
+  }
+  return true;
+}
+
 // Returns true if the stats for the given |pid| have been written, false it
 // it failed (e.g., |pid| was a kernel thread and, as such, didn't report any
 // memory counters).
@@ -723,7 +802,7 @@
 void ProcessStatsDataSource::ClearIncrementalState() {
   PERFETTO_DLOG("ProcessStatsDataSource clearing incremental state.");
   seen_pids_.clear();
-  skip_stats_for_pids_.clear();
+  skip_mem_for_pids_.clear();
 
   cache_ticks_ = 0;
   process_stats_cache_.clear();
diff --git a/src/traced/probes/ps/process_stats_data_source.h b/src/traced/probes/ps/process_stats_data_source.h
index 1ebdf38..9ec28bc 100644
--- a/src/traced/probes/ps/process_stats_data_source.h
+++ b/src/traced/probes/ps/process_stats_data_source.h
@@ -90,6 +90,8 @@
     uint32_t smr_pss_anon_kb = std::numeric_limits<uint32_t>::max();
     uint32_t smr_pss_file_kb = std::numeric_limits<uint32_t>::max();
     uint32_t smr_pss_shmem_kb = std::numeric_limits<uint32_t>::max();
+    uint64_t runtime_user_mode_ns = std::numeric_limits<uint64_t>::max();
+    uint64_t runtime_kernel_mode_ns = std::numeric_limits<uint64_t>::max();
     // file descriptors
     base::FlatSet<uint64_t> seen_fds;
   };
@@ -105,7 +107,9 @@
   protos::pbzero::ProcessStats_Process* GetOrCreateStatsProcess(int32_t pid);
 
   // Functions for snapshotting process/thread long-term info and relationships.
-  bool WriteProcess(int32_t pid, const std::string& proc_status);
+  bool WriteProcess(int32_t pid,
+                    const std::string& proc_status,
+                    const std::string& proc_stat);
   void WriteThread(int32_t tid, int32_t tgid);
   void WriteDetailedThread(int32_t tid,
                            int32_t tgid,
@@ -115,11 +119,10 @@
   // Functions for periodically sampling process stats/counters.
   static void Tick(base::WeakPtr<ProcessStatsDataSource>);
   void WriteAllProcessStats();
+  bool WriteProcessRuntimes(int32_t pid, const std::string& proc_stat);
   bool WriteMemCounters(int32_t pid, const std::string& proc_status);
   void WriteFds(int32_t pid);
   void WriteSingleFd(int32_t pid, uint64_t fd);
-  bool ShouldWriteThreadStats(int32_t pid);
-  void WriteThreadStats(int32_t pid, int32_t tid);
 
   // Scans /proc/pid/status and writes the ProcessTree packet for input pids.
   void WriteProcessTree(const base::FlatSet<int32_t>&);
@@ -151,6 +154,8 @@
   bool dump_all_procs_on_start_ = false;
   bool resolve_process_fds_ = false;
   bool scan_smaps_rollup_ = false;
+  bool record_process_age_ = false;
+  bool record_process_runtime_ = false;
 
   // This set contains PIDs as per the Linux kernel notion of a PID (which is
   // really a TID). In practice this set will contain all TIDs for all processes
@@ -159,14 +164,10 @@
     int32_t pid;
     int32_t tgid;
 
-    inline SeenPid(int32_t _pid, int32_t _tgid = 0) : pid(_pid), tgid(_tgid) {}
+    SeenPid(int32_t _pid, int32_t _tgid = 0) : pid(_pid), tgid(_tgid) {}
     // TODO(rsavitski): add comparator support to FlatSet
-    inline bool operator==(const SeenPid& other) const {
-      return pid == other.pid;
-    }
-    inline bool operator<(const SeenPid& other) const {
-      return pid < other.pid;
-    }
+    bool operator==(const SeenPid& other) const { return pid == other.pid; }
+    bool operator<(const SeenPid& other) const { return pid < other.pid; }
   };
   base::FlatSet<SeenPid> seen_pids_;
 
@@ -175,7 +176,7 @@
   uint64_t cache_ticks_ = 0;
   protos::pbzero::ProcessStats* cur_ps_stats_ = nullptr;
   protos::pbzero::ProcessStats_Process* cur_ps_stats_process_ = nullptr;
-  std::vector<bool> skip_stats_for_pids_;
+  std::vector<bool> skip_mem_for_pids_;
 
   // Cached process stats per process. Cleared every |cache_ttl_ticks_| *
   // |poll_period_ms_| ms.
diff --git a/src/traced/probes/ps/process_stats_data_source_unittest.cc b/src/traced/probes/ps/process_stats_data_source_unittest.cc
index b329623..421983b 100644
--- a/src/traced/probes/ps/process_stats_data_source_unittest.cc
+++ b/src/traced/probes/ps/process_stats_data_source_unittest.cc
@@ -18,10 +18,11 @@
 
 #include <dirent.h>
 
+#include <memory>
+
 #include "perfetto/ext/base/file_utils.h"
 #include "perfetto/ext/base/string_utils.h"
 #include "perfetto/ext/base/temp_file.h"
-#include "perfetto/protozero/scattered_heap_buffer.h"
 #include "perfetto/tracing/core/data_source_config.h"
 #include "src/base/test/test_task_runner.h"
 #include "src/tracing/core/trace_writer_for_testing.h"
@@ -44,6 +45,26 @@
 namespace perfetto {
 namespace {
 
+std::string ToProcStatString(uint64_t utime_ticks,
+                             uint64_t stime_ticks,
+                             uint64_t starttime_ticks) {
+  return base::StackString<512>{
+      "9346 (comm) S 9245 9245 9245 0 -1 4194304 1006608 10781 8130 5 %" PRIu64
+      " %" PRIu64 " 115 25 20 0 15 0 %" PRIu64
+      " 1206684979200 7065 18446744073709551615 94632071671808 94632198091600 "
+      "140725574671488 0 0 0 0 2 4608 0 0 0 17 3 0 0 0 0 0 94632203476992 "
+      "94632203968624 94632219561984 140725574677889 140725574678594 "
+      "140725574678594 140725574680553 0",
+      utime_ticks, stime_ticks, starttime_ticks}
+      .ToStdString();
+}
+
+uint64_t NsPerClockTick() {
+  int64_t tickrate = sysconf(_SC_CLK_TCK);
+  PERFETTO_CHECK(tickrate > 0);
+  return 1'000'000'000ULL / static_cast<uint64_t>(tickrate);
+}
+
 class TestProcessStatsDataSource : public ProcessStatsDataSource {
  public:
   TestProcessStatsDataSource(base::TaskRunner* task_runner,
@@ -66,12 +87,10 @@
 
   std::unique_ptr<TestProcessStatsDataSource> GetProcessStatsDataSource(
       const DataSourceConfig& cfg) {
-    auto writer =
-        std::unique_ptr<TraceWriterForTesting>(new TraceWriterForTesting());
+    auto writer = std::make_unique<TraceWriterForTesting>();
     writer_raw_ = writer.get();
-    return std::unique_ptr<TestProcessStatsDataSource>(
-        new TestProcessStatsDataSource(&task_runner_, 0, std::move(writer),
-                                       cfg));
+    return std::make_unique<TestProcessStatsDataSource>(&task_runner_, 0,
+                                                        std::move(writer), cfg);
   }
 
   base::TestTaskRunner task_runner_;
@@ -121,13 +140,13 @@
 TEST_F(ProcessStatsDataSourceTest, DontRescanCachedPIDsAndTIDs) {
   // assertion helpers
   auto expected_process = [](int pid) {
-    return [pid](protos::gen::ProcessTree::Process process) {
+    return [pid](const protos::gen::ProcessTree::Process& process) {
       return process.pid() == pid && process.cmdline_size() > 0 &&
              process.cmdline()[0] == "proc_" + std::to_string(pid);
     };
   };
   auto expected_thread = [](int tid) {
-    return [tid](protos::gen::ProcessTree::Thread thread) {
+    return [tid](const protos::gen::ProcessTree::Thread& thread) {
       return thread.tid() == tid && thread.tgid() == tid / 10 * 10 &&
              thread.name() == "thread_" + std::to_string(tid);
     };
@@ -320,6 +339,7 @@
   ProcessStatsConfig cfg;
   cfg.set_proc_stats_poll_ms(1);
   cfg.set_resolve_process_fds(true);
+  cfg.set_record_process_runtime(true);
   cfg.add_quirks(ProcessStatsConfig::DISABLE_ON_DEMAND);
   ds_config.set_process_stats_config_raw(cfg.SerializeAsString());
   auto data_source = GetProcessStatsDataSource(ds_config);
@@ -352,7 +372,7 @@
 
   auto checkpoint = task_runner_.CreateCheckpoint("all_done");
 
-  const auto fake_proc_path = fake_proc.path();
+  const std::string& fake_proc_path = fake_proc.path();
   EXPECT_CALL(*data_source, OpenProcDir())
       .WillRepeatedly(Invoke([&fake_proc_path] {
         return base::ScopedDir(opendir(fake_proc_path.c_str()));
@@ -365,18 +385,24 @@
   int iter = 0;
   for (int pid : kPids) {
     EXPECT_CALL(*data_source, ReadProcPidFile(pid, "status"))
-        .WillRepeatedly(
-            Invoke([checkpoint, &iter](int32_t p, const std::string&) {
-              base::StackString<1024> ret(
-                  "Name:	pid_10\nVmSize:	 %d kB\nVmRSS:\t%d  kB\n",
-                  p * 100 + iter * 10 + 1, p * 100 + iter * 10 + 2);
-              return ret.ToStdString();
-            }));
+        .WillRepeatedly(Invoke([&iter](int32_t p, const std::string&) {
+          return base::StackString<1024>{
+              "Name:	pid_10\nVmSize:	 %d kB\nVmRSS:\t%d  kB\n",
+              p * 100 + iter * 10 + 1, p * 100 + iter * 10 + 2}
+              .ToStdString();
+        }));
 
     // By default scan_smaps_rollup is off and /proc/<pid>/smaps_rollup
     // shouldn't be read.
     EXPECT_CALL(*data_source, ReadProcPidFile(pid, "smaps_rollup")).Times(0);
 
+    EXPECT_CALL(*data_source, ReadProcPidFile(pid, "stat"))
+        .WillRepeatedly(Invoke([&iter](int32_t p, const std::string&) {
+          return ToProcStatString(static_cast<uint64_t>(p * 100 + iter * 10),
+                                  static_cast<uint64_t>(p * 200 + iter * 20),
+                                  /*starttime_ticks=*/0);
+        }));
+
     EXPECT_CALL(*data_source, ReadProcPidFile(pid, "oom_score_adj"))
         .WillRepeatedly(Invoke(
             [checkpoint, kPids, &iter](int32_t inner_pid, const std::string&) {
@@ -404,19 +430,24 @@
   iter = 0;
   for (const auto& proc_counters : processes) {
     int32_t pid = proc_counters.pid();
-    ASSERT_EQ(static_cast<int>(proc_counters.vm_size_kb()),
+    EXPECT_EQ(static_cast<int>(proc_counters.vm_size_kb()),
               pid * 100 + iter * 10 + 1);
-    ASSERT_EQ(static_cast<int>(proc_counters.vm_rss_kb()),
+    EXPECT_EQ(static_cast<int>(proc_counters.vm_rss_kb()),
               pid * 100 + iter * 10 + 2);
-    ASSERT_EQ(static_cast<int>(proc_counters.oom_score_adj()),
+    EXPECT_EQ(static_cast<int>(proc_counters.oom_score_adj()),
               pid * 100 + iter * 10 + 3);
-    ASSERT_EQ(proc_counters.fds().size(), base::ArraySize(kFds));
+    EXPECT_EQ(proc_counters.fds().size(), base::ArraySize(kFds));
     for (const auto& fd_path : proc_counters.fds()) {
-      ASSERT_THAT(kFds, Contains(fd_path.fd()));
-      ASSERT_EQ(fd_path.path(), kDevice);
+      EXPECT_THAT(kFds, Contains(fd_path.fd()));
+      EXPECT_EQ(fd_path.path(), kDevice);
     }
-    if (pid == kPids[base::ArraySize(kPids) - 1])
+    EXPECT_EQ(proc_counters.runtime_user_mode(),
+              static_cast<uint64_t>(pid * 100 + iter * 10) * NsPerClockTick());
+    EXPECT_EQ(proc_counters.runtime_kernel_mode(),
+              static_cast<uint64_t>(pid * 200 + iter * 20) * NsPerClockTick());
+    if (pid == kPids[base::ArraySize(kPids) - 1]) {
       iter++;
+    }
   }
 
   // Cleanup |fake_proc|. TempDir checks that the directory is empty.
@@ -621,5 +652,34 @@
     base::Rmdir(*path);
 }
 
+TEST_F(ProcessStatsDataSourceTest, WriteProcessStartFromBoot) {
+  DataSourceConfig ds_config;
+  ProcessStatsConfig cfg;
+  cfg.set_record_process_age(true);
+  ds_config.set_process_stats_config_raw(cfg.SerializeAsString());
+  auto data_source = GetProcessStatsDataSource(ds_config);
+
+  const char* status =
+      "Name: foo\nTgid:\t42\nPid:   42\nPPid:  17\nUid:  43 44 45 56\n";
+
+  EXPECT_CALL(*data_source, ReadProcPidFile(42, "status"))
+      .WillOnce(Return(status));
+  EXPECT_CALL(*data_source, ReadProcPidFile(42, "stat"))
+      .WillOnce(Return(ToProcStatString(0, 0, 15842)));
+  EXPECT_CALL(*data_source, ReadProcPidFile(42, "cmdline"))
+      .WillOnce(Return(std::string("foo\0bar\0baz\0", 12)));
+
+  data_source->OnPids({42});
+
+  auto trace = writer_raw_->GetAllTracePackets();
+  ASSERT_EQ(trace.size(), 1u);
+  auto ps_tree = trace[0].process_tree();
+  ASSERT_EQ(ps_tree.processes_size(), 1);
+  auto first_process = ps_tree.processes()[0];
+  ASSERT_EQ(first_process.pid(), 42);
+
+  EXPECT_EQ(first_process.process_start_from_boot(), 15842 * NsPerClockTick());
+}
+
 }  // namespace
 }  // namespace perfetto
diff --git a/test/cmdline_integrationtest.cc b/test/cmdline_integrationtest.cc
index 3ac6f22..6fd2b75 100644
--- a/test/cmdline_integrationtest.cc
+++ b/test/cmdline_integrationtest.cc
@@ -961,13 +961,24 @@
                    /*use_explicit_clone=*/true);
 }
 
-// Regression test for b/279753347 .
+// Regression test for b/279753347: --save-for-bugreport would create an empty
+// file if no session with bugreport_score was active.
 TEST_F(PerfettoCmdlineTest, UnavailableBugreportLeavesNoEmptyFiles) {
   ScopedFileRemove remove_on_test_exit(GetBugreportTracePath());
   Exec perfetto_br_proc = ExecPerfetto({"--save-for-bugreport"});
   StartServiceIfRequiredNoNewExecsAfterThis();
   perfetto_br_proc.Run(&stderr_);
-  ASSERT_FALSE(base::FileExists(GetBugreportTracePath()));
+  // No file exists. Great.
+  if (!base::FileExists(GetBugreportTracePath())) {
+    return;
+  }
+  // A file exists. There are two possiblilities:
+  // 1. There was a bugreport_score session.
+  // 2. There was no bugreport_score session and we're hitting b/279753347.
+  //
+  // Let's check that we're not hitting b/279753347, by checking that the file
+  // is not empty.
+  EXPECT_NE(base::GetFileSize(GetBugreportTracePath()), 0);
 }
 
 // Tests that SaveTraceForBugreport() works also if the trace has triggers
diff --git a/test/data/ui-screenshots/ui-android_trace_30s_expand_camera.png.sha256 b/test/data/ui-screenshots/ui-android_trace_30s_expand_camera.png.sha256
index 590d1c0..57df59f 100644
--- a/test/data/ui-screenshots/ui-android_trace_30s_expand_camera.png.sha256
+++ b/test/data/ui-screenshots/ui-android_trace_30s_expand_camera.png.sha256
@@ -1 +1 @@
-71f0799ce780f36dbdeb601ba535ee012b7323afc57254a5b383704ec384da62
\ No newline at end of file
+d12dc4f32f544a32bca4302314d7eb4984a3e6473d78298435396c7f81522564
\ No newline at end of file
diff --git a/test/data/ui-screenshots/ui-android_trace_30s_load.png.sha256 b/test/data/ui-screenshots/ui-android_trace_30s_load.png.sha256
index fd6b0fe..c8ecc01 100644
--- a/test/data/ui-screenshots/ui-android_trace_30s_load.png.sha256
+++ b/test/data/ui-screenshots/ui-android_trace_30s_load.png.sha256
@@ -1 +1 @@
-725ff7289949bda82fc4c2d454e1ee0b4218b3d56f20c1098d6fa7914ef19bfc
\ No newline at end of file
+2273d163c13ea0e296492ba30a2669c70d0e5ab050317c17f108a1836052c908
\ No newline at end of file
diff --git a/test/data/ui-screenshots/ui-routing_open_invalid_trace_from_blank_page.png.sha256 b/test/data/ui-screenshots/ui-routing_open_invalid_trace_from_blank_page.png.sha256
index 98f2571..48ede01 100644
--- a/test/data/ui-screenshots/ui-routing_open_invalid_trace_from_blank_page.png.sha256
+++ b/test/data/ui-screenshots/ui-routing_open_invalid_trace_from_blank_page.png.sha256
@@ -1 +1 @@
-bd495271f97ffe17851f6605acaf24463c2363cb6a4bcf8e1b3fd78b9e18353f
\ No newline at end of file
+d9539daa1cf9a5b621377ed5e4be164adfee64f9f320caa7b0d1abc584276427
\ No newline at end of file
diff --git a/test/trace_processor/diff_tests/metrics/android/android_boot_unagg.out b/test/trace_processor/diff_tests/metrics/android/android_boot_unagg.out
index 568da0b..cb0d712 100644
--- a/test/trace_processor/diff_tests/metrics/android/android_boot_unagg.out
+++ b/test/trace_processor/diff_tests/metrics/android/android_boot_unagg.out
@@ -214,4 +214,80 @@
       total_dur: 583162190
     }
   }
+  android_post_boot_gc_metric {
+    gc_events {
+      thread_name: "HeapTaskDaemon"
+      process_name: "com.google.android.deskclock"
+      gc_type: "full"
+      is_mark_compact: 1
+      reclaimed_mb: 3.1430000000000002
+      min_heap_mb: 2.754000
+      max_heap_mb: 5.897000
+      mb_per_ms_of_running_gc: 0.3262420579054222
+      mb_per_ms_of_wall_gc: 0.004718018639325977
+      gc_dur: 666169475
+      gc_running_dur: 9633951
+      gc_runnable_dur: 44371461
+      gc_unint_io_dur: 0
+      gc_unint_non_io_dur: 0
+      gc_int_dur: 0
+      gc_ts: 39041818011
+      tid: 3185
+      pid: 3162
+      gc_monotonic_dur: 666169475
+    }
+    gc_events {
+      thread_name: "HeapTaskDaemon"
+      process_name: "com.google.android.apps.nexuslauncher"
+      gc_type: "full"
+      is_mark_compact: 1
+      reclaimed_mb: 10.640000
+      min_heap_mb: 6.861000
+      max_heap_mb: 17.501000
+      mb_per_ms_of_running_gc: 93.68918788028213
+      mb_per_ms_of_wall_gc: 0.18073523433053545
+      gc_dur: 58870646
+      gc_running_dur: 113567
+      gc_runnable_dur: 0
+      gc_unint_io_dur: 0
+      gc_unint_non_io_dur: 0
+      gc_int_dur: 0
+      gc_ts: 39849615741
+      tid: 2534
+      pid: 2523
+      gc_monotonic_dur: 58870646
+    }
+    gc_events {
+      thread_name: "HeapTaskDaemon"
+      process_name: "com.google.android.apps.wellbeing"
+      gc_type: "full"
+      is_mark_compact: 1
+      gc_dur: 168393585
+      gc_running_dur: 1714233
+      gc_runnable_dur: 28482219
+      gc_unint_io_dur: 0
+      gc_unint_non_io_dur: 0
+      gc_int_dur: -1
+      gc_ts: 40022150450
+      tid: 2917
+      pid: 2909
+      gc_monotonic_dur: 168393585
+    }
+    gc_events {
+      thread_name: "HeapTaskDaemon"
+      process_name: "com.google.android.euicc"
+      gc_type: "full"
+      is_mark_compact: 1
+      gc_dur: 148630605
+      gc_running_dur: 4148763
+      gc_runnable_dur: -1
+      gc_unint_io_dur: 0
+      gc_unint_non_io_dur: 0
+      gc_int_dur: 0
+      gc_ts: 40041913430
+      tid: 2865
+      pid: 2855
+      gc_monotonic_dur: 148630605
+    }
+  }
 }
\ No newline at end of file
diff --git a/test/trace_processor/diff_tests/metrics/android/android_garbage_collection_unagg.out b/test/trace_processor/diff_tests/metrics/android/android_garbage_collection_unagg.out
new file mode 100644
index 0000000..1341536
--- /dev/null
+++ b/test/trace_processor/diff_tests/metrics/android/android_garbage_collection_unagg.out
@@ -0,0 +1,76 @@
+android_garbage_collection_unagg {
+  gc_events {
+    thread_name: "HeapTaskDaemon"
+    process_name: "com.google.android.deskclock"
+    gc_type: "full"
+    is_mark_compact: 1
+    reclaimed_mb: 3.1430000000000002
+    min_heap_mb: 2.754000
+    max_heap_mb: 5.897000
+    mb_per_ms_of_running_gc: 0.3262420579054222
+    mb_per_ms_of_wall_gc: 0.004718018639325977
+    gc_dur: 666169475
+    gc_running_dur: 9633951
+    gc_runnable_dur: 44371461
+    gc_unint_io_dur: 0
+    gc_unint_non_io_dur: 0
+    gc_int_dur: 0
+    gc_ts: 39041818011
+    tid: 3185
+    pid: 3162
+    gc_monotonic_dur: 666169475
+  }
+  gc_events {
+    thread_name: "HeapTaskDaemon"
+    process_name: "com.google.android.apps.nexuslauncher"
+    gc_type: "full"
+    is_mark_compact: 1
+    reclaimed_mb: 10.640000
+    min_heap_mb: 6.861000
+    max_heap_mb: 17.501000
+    mb_per_ms_of_running_gc: 93.68918788028213
+    mb_per_ms_of_wall_gc: 0.18073523433053545
+    gc_dur: 58870646
+    gc_running_dur: 113567
+    gc_runnable_dur: 0
+    gc_unint_io_dur: 0
+    gc_unint_non_io_dur: 0
+    gc_int_dur: 0
+    gc_ts: 39849615741
+    tid: 2534
+    pid: 2523
+    gc_monotonic_dur: 58870646
+  }
+  gc_events {
+    thread_name: "HeapTaskDaemon"
+    process_name: "com.google.android.apps.wellbeing"
+    gc_type: "full"
+    is_mark_compact: 1
+    gc_dur: 168393585
+    gc_running_dur: 1714233
+    gc_runnable_dur: 28482219
+    gc_unint_io_dur: 0
+    gc_unint_non_io_dur: 0
+    gc_int_dur: -1
+    gc_ts: 40022150450
+    tid: 2917
+    pid: 2909
+    gc_monotonic_dur: 168393585
+  }
+  gc_events {
+    thread_name: "HeapTaskDaemon"
+    process_name: "com.google.android.euicc"
+    gc_type: "full"
+    is_mark_compact: 1
+    gc_dur: 148630605
+    gc_running_dur: 4148763
+    gc_runnable_dur: -1
+    gc_unint_io_dur: 0
+    gc_unint_non_io_dur: 0
+    gc_int_dur: 0
+    gc_ts: 40041913430
+    tid: 2865
+    pid: 2855
+    gc_monotonic_dur: 148630605
+  }
+}
\ No newline at end of file
diff --git a/test/trace_processor/diff_tests/metrics/android/tests.py b/test/trace_processor/diff_tests/metrics/android/tests.py
index cb0c0a2..f5de58f 100644
--- a/test/trace_processor/diff_tests/metrics/android/tests.py
+++ b/test/trace_processor/diff_tests/metrics/android/tests.py
@@ -229,4 +229,10 @@
       trace=DataPath('android_postboot_unlock.pftrace'),
       query=Metric("android_app_process_starts"),
       out=Path('android_app_process_starts.out')
-    )
\ No newline at end of file
+    )
+
+  def test_android_garbage_collection(self):
+    return DiffTestBlueprint(
+        trace=DataPath('android_postboot_unlock.pftrace'),
+        query=Metric('android_garbage_collection_unagg'),
+        out=Path('android_garbage_collection_unagg.out'))
\ No newline at end of file
diff --git a/test/trace_processor/diff_tests/parser/process_tracking/tests.py b/test/trace_processor/diff_tests/parser/process_tracking/tests.py
index f1413bd..22eb3b8 100644
--- a/test/trace_processor/diff_tests/parser/process_tracking/tests.py
+++ b/test/trace_processor/diff_tests/parser/process_tracking/tests.py
@@ -226,3 +226,91 @@
         "tid","pid","pname","tname"
         19999,"[NULL]","[NULL]","real_name"
         """))
+
+  def test_process_stats_process_runtime(self):
+    return DiffTestBlueprint(
+        trace=TextProto(r"""
+        packet {
+          first_packet_on_sequence: true
+          timestamp: 1088821452006028
+          incremental_state_cleared: true
+          process_tree {
+            processes {
+              pid: 9301
+              ppid: 9251
+              uid: 304336
+              nspid: 4
+              nspid: 1
+              cmdline: "/bin/command"
+              process_start_from_boot: 157620000000
+            }
+            collection_end_timestamp: 1088821520810204
+          }
+          trusted_uid: 304336
+          trusted_packet_sequence_id: 3
+          trusted_pid: 1137063
+          previous_packet_dropped: true
+        }
+        packet {
+          timestamp: 1088821520899054
+          process_stats {
+            processes {
+              pid: 9301
+              runtime_user_mode: 16637390000000
+              runtime_kernel_mode: 1327800000000
+              vm_size_kb: 1188971644
+              vm_locked_kb: 0
+              vm_hwm_kb: 1180568
+              vm_rss_kb: 1100672
+              rss_anon_kb: 1045332
+              rss_file_kb: 46848
+              rss_shmem_kb: 8492
+              vm_swap_kb: 163936
+              oom_score_adj: 300
+            }
+            collection_end_timestamp: 1088821539659978
+          }
+          trusted_uid: 304336
+          trusted_packet_sequence_id: 3
+          trusted_pid: 1137063
+        }
+        packet {
+          timestamp: 1088821786436938
+          process_stats {
+            processes {
+              pid: 9301
+              runtime_user_mode: 16638280000000
+              runtime_kernel_mode: 1327860000000
+              vm_size_kb: 1188979836
+              vm_locked_kb: 0
+              vm_hwm_kb: 1180568
+              vm_rss_kb: 895428
+              rss_anon_kb: 832028
+              rss_file_kb: 46848
+              rss_shmem_kb: 16552
+              vm_swap_kb: 163936
+              oom_score_adj: 300
+            }
+            collection_end_timestamp: 1088821817629747
+          }
+          trusted_uid: 304336
+          trusted_packet_sequence_id: 3
+          trusted_pid: 1137063
+        }
+        """),
+        query="""
+        select c.ts, c.value, pct.name, p.pid, p.start_ts, p.cmdline
+        from counter c
+          join process_counter_track pct on (c.track_id = pct.id)
+          join process p using (upid)
+        where pct.name in ("runtime.user_ns", "runtime.kernel_ns")
+          and p.pid = 9301
+        order by ts asc, pct.name asc
+        """,
+        out=Csv("""
+        "ts","value","name","pid","start_ts","cmdline"
+        1088821520899054,1327800000000.000000,"runtime.kernel_ns",9301,157620000000,"/bin/command"
+        1088821520899054,16637390000000.000000,"runtime.user_ns",9301,157620000000,"/bin/command"
+        1088821786436938,1327860000000.000000,"runtime.kernel_ns",9301,157620000000,"/bin/command"
+        1088821786436938,16638280000000.000000,"runtime.user_ns",9301,157620000000,"/bin/command"
+        """))
diff --git a/test/trace_processor/diff_tests/stdlib/android/tests.py b/test/trace_processor/diff_tests/stdlib/android/tests.py
index 956ac87..07801ad 100644
--- a/test/trace_processor/diff_tests/stdlib/android/tests.py
+++ b/test/trace_processor/diff_tests/stdlib/android/tests.py
@@ -357,7 +357,7 @@
         trace=DataPath('android_monitor_contention_trace.atr'),
         query="""
       INCLUDE PERFETTO MODULE android.thread;
-      SELECT * FROM ANDROID_THREAD_CREATION_SPAM(1e9, 1e9);
+      SELECT * FROM _android_thread_creation_spam(1e9, 1e9);
       """,
         out=Csv("""
       "process_name","pid","thread_name_prefix","max_count_per_sec"
diff --git a/test/trace_processor/diff_tests/stdlib/sched/tests.py b/test/trace_processor/diff_tests/stdlib/sched/tests.py
index ee96746..48a8814 100644
--- a/test/trace_processor/diff_tests/stdlib/sched/tests.py
+++ b/test/trace_processor/diff_tests/stdlib/sched/tests.py
@@ -45,6 +45,7 @@
         trace=Path('../../common/synth_1.py'),
         query="""
       INCLUDE PERFETTO MODULE sched.thread_level_parallelism;
+
       SELECT * FROM sched_active_cpu_count;
       """,
         out=Csv("""
@@ -58,3 +59,155 @@
       250,2
       390,2
       """))
+
+  def test_sched_utilization_per_second(self):
+    return DiffTestBlueprint(
+        trace=DataPath('example_android_trace_30s.pb'),
+        query="""
+        INCLUDE PERFETTO MODULE sched.utilization.system;
+
+        SELECT * FROM sched_utilization_per_second;
+        """,
+        out=Csv("""
+        "ts","utilization","unnormalized_utilization"
+        70000000000,0.004545,0.036362
+        71000000000,0.022596,0.180764
+        72000000000,0.163393,1.307146
+        73000000000,0.452122,3.616972
+        74000000000,0.525557,4.204453
+        75000000000,0.388632,3.109057
+        76000000000,0.425447,3.403579
+        77000000000,0.201112,1.608896
+        78000000000,0.280247,2.241977
+        79000000000,0.345228,2.761827
+        80000000000,0.303258,2.426064
+        81000000000,0.487522,3.900172
+        82000000000,0.080542,0.644336
+        83000000000,0.362450,2.899601
+        84000000000,0.076438,0.611501
+        85000000000,0.110689,0.885514
+        86000000000,0.681488,5.451901
+        87000000000,0.808331,6.466652
+        88000000000,0.941768,7.534142
+        89000000000,0.480556,3.844446
+        90000000000,0.453268,3.626142
+        91000000000,0.280310,2.242478
+        92000000000,0.006381,0.051049
+        93000000000,0.030991,0.247932
+        94000000000,0.031981,0.255845
+        95000000000,0.027931,0.223446
+        96000000000,0.063066,0.504529
+        97000000000,0.023847,0.190773
+        98000000000,0.011291,0.090328
+        99000000000,0.024065,0.192518
+        100000000000,0.001964,0.015711
+        """))
+
+  def test_sched_process_utilization_per_second(self):
+    return DiffTestBlueprint(
+        trace=DataPath('example_android_trace_30s.pb'),
+        query="""
+        INCLUDE PERFETTO MODULE sched.utilization.process;
+
+        SELECT *
+        FROM sched_process_utilization_per_second(10);
+        """,
+        out=Csv("""
+        "ts","utilization","unnormalized_utilization"
+        72000000000,0.000187,0.001495
+        73000000000,0.000182,0.001460
+        77000000000,0.000072,0.000579
+        78000000000,0.000275,0.002204
+        82000000000,0.000300,0.002404
+        83000000000,0.000004,0.000034
+        87000000000,0.000133,0.001065
+        88000000000,0.000052,0.000416
+        89000000000,0.000212,0.001697
+        92000000000,0.000207,0.001658
+        97000000000,0.000353,0.002823
+        """))
+
+  def test_sched_thread_utilization_per_second(self):
+    return DiffTestBlueprint(
+        trace=DataPath('example_android_trace_30s.pb'),
+        query="""
+        INCLUDE PERFETTO MODULE sched.utilization.thread;
+
+        SELECT *
+        FROM sched_thread_utilization_per_second(10);
+        """,
+        out=Csv("""
+        "ts","utilization","unnormalized_utilization"
+        70000000000,0.000024,0.000195
+        72000000000,0.000025,0.000200
+        73000000000,0.000053,0.000420
+        74000000000,0.000044,0.000352
+        75000000000,0.000058,0.000461
+        76000000000,0.000075,0.000603
+        77000000000,0.000051,0.000407
+        78000000000,0.000047,0.000374
+        79000000000,0.000049,0.000396
+        80000000000,0.000084,0.000673
+        81000000000,0.000041,0.000329
+        82000000000,0.000048,0.000383
+        83000000000,0.000040,0.000323
+        84000000000,0.000018,0.000145
+        85000000000,0.000053,0.000421
+        86000000000,0.000121,0.000972
+        87000000000,0.000049,0.000392
+        88000000000,0.000036,0.000285
+        89000000000,0.000033,0.000266
+        90000000000,0.000050,0.000401
+        91000000000,0.000025,0.000201
+        92000000000,0.000009,0.000071
+        """))
+
+  def test_sched_thread_time_in_state(self):
+    return DiffTestBlueprint(
+        trace=DataPath('example_android_trace_30s.pb'),
+        query="""
+        INCLUDE PERFETTO MODULE sched.states;
+
+        SELECT *
+        FROM sched_thread_time_in_state
+        ORDER BY utid, state
+        LIMIT 10;
+        """,
+        out=Csv("""
+        "utid","total_runtime","state","time_in_state","percentage_in_state"
+        1,27540674878,"D",596720,0
+        1,27540674878,"R",1988438,0
+        1,27540674878,"R+",2435415,0
+        1,27540674878,"Running",23098223,0
+        1,27540674878,"S",27512556082,99
+        2,27761417087,"D",833039830,3
+        2,27761417087,"R+",2931096,0
+        2,27761417087,"Running",92350845,0
+        2,27761417087,"S",26833095316,96
+        3,29374171050,"R",140800325,0
+        """))
+
+  def test_sched_percentage_of_time_in_state(self):
+    return DiffTestBlueprint(
+        trace=DataPath('example_android_trace_30s.pb'),
+        query="""
+        INCLUDE PERFETTO MODULE sched.states;
+
+        SELECT *
+        FROM sched_percentage_of_time_in_state
+        ORDER BY utid
+        LIMIT 10;
+        """,
+        out=Csv("""
+        "utid","running","runnable","runnable_preempted","sleeping","uninterruptible_sleep","other"
+        1,0,0,0,99,0,"[NULL]"
+        2,0,"[NULL]",0,96,3,"[NULL]"
+        3,5,0,0,93,"[NULL]","[NULL]"
+        4,100,"[NULL]","[NULL]","[NULL]","[NULL]",0
+        5,0,0,0,99,0,"[NULL]"
+        6,0,"[NULL]",0,99,"[NULL]","[NULL]"
+        7,0,0,0,99,"[NULL]","[NULL]"
+        8,0,0,0,98,0,"[NULL]"
+        9,0,"[NULL]","[NULL]",99,"[NULL]","[NULL]"
+        10,0,"[NULL]",0,99,"[NULL]","[NULL]"
+        """))
diff --git a/test/trace_processor/diff_tests/stdlib/slices/tests.py b/test/trace_processor/diff_tests/stdlib/slices/tests.py
index e3db459..053dabf 100644
--- a/test/trace_processor/diff_tests/stdlib/slices/tests.py
+++ b/test/trace_processor/diff_tests/stdlib/slices/tests.py
@@ -91,4 +91,28 @@
         "ThreadControllerImpl::RunTask",174796099970797,186000,0
         "Looper.dispatch: jy3(null)",174800056530797,1368000,0
         "ThreadControllerImpl::RunTask",174800107962797,132000,0
-      """))
\ No newline at end of file
+      """))
+
+  def test_thread_slice_cpu_time(self):
+    return DiffTestBlueprint(
+        trace=DataPath('example_android_trace_30s.pb'),
+        query="""
+        INCLUDE PERFETTO MODULE slices.cpu_time;
+
+        SELECT *
+        FROM thread_slice_cpu_time
+        LIMIT 10;
+        """,
+        out=Csv("""
+        "id","cpu_time"
+        0,178646
+        1,119740
+        2,58073
+        3,98698
+        4,121979
+        5,45000
+        6,35104
+        7,33333
+        8,46926
+        9,17865
+        """))
\ No newline at end of file
diff --git a/test/trace_processor/diff_tests/tables/tests_sched.py b/test/trace_processor/diff_tests/tables/tests_sched.py
index 399cb55..46ff428 100644
--- a/test/trace_processor/diff_tests/tables/tests_sched.py
+++ b/test/trace_processor/diff_tests/tables/tests_sched.py
@@ -102,6 +102,24 @@
         9,"spurious_sched_wakeup",1737211563344,8999,0,178,1195
         """))
 
+  def test_sched_waker_id(self):
+    return DiffTestBlueprint(
+        trace=DataPath('sched_wakeup_trace.atr'),
+        query="""
+        SELECT parent.id
+        FROM thread_state parent
+        JOIN thread_state child
+          ON parent.utid = child.waker_utid AND child.ts BETWEEN parent.ts AND parent.ts + parent.dur
+        WHERE child.id = 15750
+        UNION ALL
+        SELECT waker_id AS id FROM thread_state WHERE id = 15750
+        """,
+        out=Csv("""
+        "id"
+        15748
+        15748
+        """))
+
   def test_raw_common_flags(self):
     return DiffTestBlueprint(
         trace=DataPath('sched_wakeup_trace.atr'),
diff --git a/tools/cpu_profile b/tools/cpu_profile
index 328c6e6..3403e38 100755
--- a/tools/cpu_profile
+++ b/tools/cpu_profile
@@ -37,18 +37,18 @@
 
 
 # ----- Amalgamator: begin of python/perfetto/prebuilts/manifests/traceconv.py
-# This file has been generated by: tools/roll-prebuilts v41.0
+# This file has been generated by: tools/roll-prebuilts v43.1
 TRACECONV_MANIFEST = [{
     'arch':
         'mac-amd64',
     'file_name':
         'traceconv',
     'file_size':
-        9381704,
+        7790424,
     'url':
-        'https://commondatastorage.googleapis.com/perfetto-luci-artifacts/v41.0/mac-amd64/traceconv',
+        'https://commondatastorage.googleapis.com/perfetto-luci-artifacts/v43.1/mac-amd64/traceconv',
     'sha256':
-        'e5678d6e3eebeb6feecb9693f924c708c02ba78bd0ce0a427d1dd7acd2b37120',
+        '88007b64828e835e0326c11f66f0bba7d8ab117562963086a4f19d8cb060204d',
     'platform':
         'darwin',
     'machine': ['x86_64']
@@ -58,11 +58,11 @@
     'file_name':
         'traceconv',
     'file_size':
-        7976744,
+        7264824,
     'url':
-        'https://commondatastorage.googleapis.com/perfetto-luci-artifacts/v41.0/mac-arm64/traceconv',
+        'https://commondatastorage.googleapis.com/perfetto-luci-artifacts/v43.1/mac-arm64/traceconv',
     'sha256':
-        'a199463232d3e8e37502d955a2bd712b1ab431c0ee1903d667bdf97b7345083c',
+        'be5769279ef8442e80130e4bdb6a0a6aa11305442207ea18ff2cf38b21a71a57',
     'platform':
         'darwin',
     'machine': ['arm64']
@@ -72,11 +72,11 @@
     'file_name':
         'traceconv',
     'file_size':
-        9127632,
+        7885952,
     'url':
-        'https://commondatastorage.googleapis.com/perfetto-luci-artifacts/v41.0/linux-amd64/traceconv',
+        'https://commondatastorage.googleapis.com/perfetto-luci-artifacts/v43.1/linux-amd64/traceconv',
     'sha256':
-        '3c8e7b3cef528684d42f8a550cf38643f73f6ea82f6686f88f5b6af4d4e7bbc6',
+        '51cfdf5060bcd87d08402620d88d0243f7bb39f2878906614d53fa3ddd78dd92',
     'platform':
         'linux',
     'machine': ['x86_64']
@@ -86,11 +86,11 @@
     'file_name':
         'traceconv',
     'file_size':
-        6961192,
+        5919372,
     'url':
-        'https://commondatastorage.googleapis.com/perfetto-luci-artifacts/v41.0/linux-arm/traceconv',
+        'https://commondatastorage.googleapis.com/perfetto-luci-artifacts/v43.1/linux-arm/traceconv',
     'sha256':
-        '9aea9075fdda92a326bc15a2a70bd818036588fce9e201d66cbdb16fac72b83a',
+        '04300b1c4dcec1e01bc23017dab3b406f9f0ffd7dd9ea3723784aa8730762bc9',
     'platform':
         'linux',
     'machine': ['armv6l', 'armv7l', 'armv8l']
@@ -100,11 +100,11 @@
     'file_name':
         'traceconv',
     'file_size':
-        8595032,
+        7588200,
     'url':
-        'https://commondatastorage.googleapis.com/perfetto-luci-artifacts/v41.0/linux-arm64/traceconv',
+        'https://commondatastorage.googleapis.com/perfetto-luci-artifacts/v43.1/linux-arm64/traceconv',
     'sha256':
-        '586dac8bdfc3e2c5fe65b132947300abf6b31c66d873e5bd66a87531730f2ff1',
+        'd3edc1cd7b216e18955135e0e9e767cdd7b1b8b7efa64793aa6b923a6c278d68',
     'platform':
         'linux',
     'machine': ['aarch64']
@@ -114,55 +114,55 @@
     'file_name':
         'traceconv',
     'file_size':
-        6575880,
+        5931120,
     'url':
-        'https://commondatastorage.googleapis.com/perfetto-luci-artifacts/v41.0/android-arm/traceconv',
+        'https://commondatastorage.googleapis.com/perfetto-luci-artifacts/v43.1/android-arm/traceconv',
     'sha256':
-        '22af179fabec5b14d21753702670eee432af65a1244725ee31f0f3b960e2363d'
+        '8c3cb3dc96aa6ca296876b8ed56f8eed8c33e12e756b178360cc145263130e7e'
 }, {
     'arch':
         'android-arm64',
     'file_name':
         'traceconv',
     'file_size':
-        7906536,
+        7546224,
     'url':
-        'https://commondatastorage.googleapis.com/perfetto-luci-artifacts/v41.0/android-arm64/traceconv',
+        'https://commondatastorage.googleapis.com/perfetto-luci-artifacts/v43.1/android-arm64/traceconv',
     'sha256':
-        'fedc807169b33370a5aae778ff001d08b079ed06ed0e846eeb251e5479c8de4f'
+        '180cfd2184d601c8f202b6bcd899cc7f63a8bb384505c1a2c3e889dfbe8bdb6d'
 }, {
     'arch':
         'android-x86',
     'file_name':
         'traceconv',
     'file_size':
-        8771276,
+        8176528,
     'url':
-        'https://commondatastorage.googleapis.com/perfetto-luci-artifacts/v41.0/android-x86/traceconv',
+        'https://commondatastorage.googleapis.com/perfetto-luci-artifacts/v43.1/android-x86/traceconv',
     'sha256':
-        'b6d65ff5b4aaeab5c99b31c2ad766bf0253a57e4b700a53f192db1b97cab7b71'
+        'a4e8ff19daa58726138aa66f5adae74b609fceee403c8cddbaaf46d6d07e4cc8'
 }, {
     'arch':
         'android-x64',
     'file_name':
         'traceconv',
     'file_size':
-        8922400,
+        7767560,
     'url':
-        'https://commondatastorage.googleapis.com/perfetto-luci-artifacts/v41.0/android-x64/traceconv',
+        'https://commondatastorage.googleapis.com/perfetto-luci-artifacts/v43.1/android-x64/traceconv',
     'sha256':
-        '098ccf88ffcfb109b7527ab7eb9309710a9098ccecdbc69d87f7b01fa1ed59bc'
+        '19626b87f8c8d956d3807d24faf5764c6bca289f55732cae2f6753dbec33e7f7'
 }, {
     'arch':
         'windows-amd64',
     'file_name':
         'traceconv.exe',
     'file_size':
-        8405504,
+        7645696,
     'url':
-        'https://commondatastorage.googleapis.com/perfetto-luci-artifacts/v41.0/windows-amd64/traceconv.exe',
+        'https://commondatastorage.googleapis.com/perfetto-luci-artifacts/v43.1/windows-amd64/traceconv.exe',
     'sha256':
-        '65a864f0e61595cef693aaf7b081e22f5d471e20c35bf461c511a884167c839f',
+        '24eb5322f22c0219694789fa04aaa5ad09b0746f8b993fd1713e6b3f7943708a',
     'platform':
         'win32',
     'machine': ['amd64']
diff --git a/tools/gen_android_bp b/tools/gen_android_bp
index 25157e0..561657d 100755
--- a/tools/gen_android_bp
+++ b/tools/gen_android_bp
@@ -282,6 +282,7 @@
         ('min_sdk_version', 'S'),
     ],
     'libperfetto': [('export_include_dirs', {'include', buildflags_dir}),],
+    'perfetto': [('required', {'perfetto_persistent_cfg.pbtxt'}),],
 }
 
 
diff --git a/tools/gen_bazel b/tools/gen_bazel
index 31c127a..14775fc 100755
--- a/tools/gen_bazel
+++ b/tools/gen_bazel
@@ -150,6 +150,7 @@
 public_python_targets = [
     '//python:batch_trace_processor',
     '//python:trace_processor_py',
+    '//python:trace_processor_py_no_resolvers',
 ]
 
 # These are Python targets which are exposed by default.
@@ -165,6 +166,7 @@
     '//gn:pandas_py': ['PERFETTO_CONFIG.deps.pandas_py'],
     '//gn:protobuf_py': ['PERFETTO_CONFIG.deps.protobuf_py'],
     '//gn:tp_vendor_py': ['PERFETTO_CONFIG.deps.tp_vendor_py'],
+    '//gn:tp_resolvers_py': ['PERFETTO_CONFIG.deps.tp_resolvers_py'],
 }
 
 # Additional arguments
diff --git a/tools/heap_profile b/tools/heap_profile
index 2df0fa3..0ab79a6 100755
--- a/tools/heap_profile
+++ b/tools/heap_profile
@@ -34,18 +34,18 @@
 
 
 # ----- Amalgamator: begin of python/perfetto/prebuilts/manifests/traceconv.py
-# This file has been generated by: tools/roll-prebuilts v41.0
+# This file has been generated by: tools/roll-prebuilts v43.1
 TRACECONV_MANIFEST = [{
     'arch':
         'mac-amd64',
     'file_name':
         'traceconv',
     'file_size':
-        9381704,
+        7790424,
     'url':
-        'https://commondatastorage.googleapis.com/perfetto-luci-artifacts/v41.0/mac-amd64/traceconv',
+        'https://commondatastorage.googleapis.com/perfetto-luci-artifacts/v43.1/mac-amd64/traceconv',
     'sha256':
-        'e5678d6e3eebeb6feecb9693f924c708c02ba78bd0ce0a427d1dd7acd2b37120',
+        '88007b64828e835e0326c11f66f0bba7d8ab117562963086a4f19d8cb060204d',
     'platform':
         'darwin',
     'machine': ['x86_64']
@@ -55,11 +55,11 @@
     'file_name':
         'traceconv',
     'file_size':
-        7976744,
+        7264824,
     'url':
-        'https://commondatastorage.googleapis.com/perfetto-luci-artifacts/v41.0/mac-arm64/traceconv',
+        'https://commondatastorage.googleapis.com/perfetto-luci-artifacts/v43.1/mac-arm64/traceconv',
     'sha256':
-        'a199463232d3e8e37502d955a2bd712b1ab431c0ee1903d667bdf97b7345083c',
+        'be5769279ef8442e80130e4bdb6a0a6aa11305442207ea18ff2cf38b21a71a57',
     'platform':
         'darwin',
     'machine': ['arm64']
@@ -69,11 +69,11 @@
     'file_name':
         'traceconv',
     'file_size':
-        9127632,
+        7885952,
     'url':
-        'https://commondatastorage.googleapis.com/perfetto-luci-artifacts/v41.0/linux-amd64/traceconv',
+        'https://commondatastorage.googleapis.com/perfetto-luci-artifacts/v43.1/linux-amd64/traceconv',
     'sha256':
-        '3c8e7b3cef528684d42f8a550cf38643f73f6ea82f6686f88f5b6af4d4e7bbc6',
+        '51cfdf5060bcd87d08402620d88d0243f7bb39f2878906614d53fa3ddd78dd92',
     'platform':
         'linux',
     'machine': ['x86_64']
@@ -83,11 +83,11 @@
     'file_name':
         'traceconv',
     'file_size':
-        6961192,
+        5919372,
     'url':
-        'https://commondatastorage.googleapis.com/perfetto-luci-artifacts/v41.0/linux-arm/traceconv',
+        'https://commondatastorage.googleapis.com/perfetto-luci-artifacts/v43.1/linux-arm/traceconv',
     'sha256':
-        '9aea9075fdda92a326bc15a2a70bd818036588fce9e201d66cbdb16fac72b83a',
+        '04300b1c4dcec1e01bc23017dab3b406f9f0ffd7dd9ea3723784aa8730762bc9',
     'platform':
         'linux',
     'machine': ['armv6l', 'armv7l', 'armv8l']
@@ -97,11 +97,11 @@
     'file_name':
         'traceconv',
     'file_size':
-        8595032,
+        7588200,
     'url':
-        'https://commondatastorage.googleapis.com/perfetto-luci-artifacts/v41.0/linux-arm64/traceconv',
+        'https://commondatastorage.googleapis.com/perfetto-luci-artifacts/v43.1/linux-arm64/traceconv',
     'sha256':
-        '586dac8bdfc3e2c5fe65b132947300abf6b31c66d873e5bd66a87531730f2ff1',
+        'd3edc1cd7b216e18955135e0e9e767cdd7b1b8b7efa64793aa6b923a6c278d68',
     'platform':
         'linux',
     'machine': ['aarch64']
@@ -111,55 +111,55 @@
     'file_name':
         'traceconv',
     'file_size':
-        6575880,
+        5931120,
     'url':
-        'https://commondatastorage.googleapis.com/perfetto-luci-artifacts/v41.0/android-arm/traceconv',
+        'https://commondatastorage.googleapis.com/perfetto-luci-artifacts/v43.1/android-arm/traceconv',
     'sha256':
-        '22af179fabec5b14d21753702670eee432af65a1244725ee31f0f3b960e2363d'
+        '8c3cb3dc96aa6ca296876b8ed56f8eed8c33e12e756b178360cc145263130e7e'
 }, {
     'arch':
         'android-arm64',
     'file_name':
         'traceconv',
     'file_size':
-        7906536,
+        7546224,
     'url':
-        'https://commondatastorage.googleapis.com/perfetto-luci-artifacts/v41.0/android-arm64/traceconv',
+        'https://commondatastorage.googleapis.com/perfetto-luci-artifacts/v43.1/android-arm64/traceconv',
     'sha256':
-        'fedc807169b33370a5aae778ff001d08b079ed06ed0e846eeb251e5479c8de4f'
+        '180cfd2184d601c8f202b6bcd899cc7f63a8bb384505c1a2c3e889dfbe8bdb6d'
 }, {
     'arch':
         'android-x86',
     'file_name':
         'traceconv',
     'file_size':
-        8771276,
+        8176528,
     'url':
-        'https://commondatastorage.googleapis.com/perfetto-luci-artifacts/v41.0/android-x86/traceconv',
+        'https://commondatastorage.googleapis.com/perfetto-luci-artifacts/v43.1/android-x86/traceconv',
     'sha256':
-        'b6d65ff5b4aaeab5c99b31c2ad766bf0253a57e4b700a53f192db1b97cab7b71'
+        'a4e8ff19daa58726138aa66f5adae74b609fceee403c8cddbaaf46d6d07e4cc8'
 }, {
     'arch':
         'android-x64',
     'file_name':
         'traceconv',
     'file_size':
-        8922400,
+        7767560,
     'url':
-        'https://commondatastorage.googleapis.com/perfetto-luci-artifacts/v41.0/android-x64/traceconv',
+        'https://commondatastorage.googleapis.com/perfetto-luci-artifacts/v43.1/android-x64/traceconv',
     'sha256':
-        '098ccf88ffcfb109b7527ab7eb9309710a9098ccecdbc69d87f7b01fa1ed59bc'
+        '19626b87f8c8d956d3807d24faf5764c6bca289f55732cae2f6753dbec33e7f7'
 }, {
     'arch':
         'windows-amd64',
     'file_name':
         'traceconv.exe',
     'file_size':
-        8405504,
+        7645696,
     'url':
-        'https://commondatastorage.googleapis.com/perfetto-luci-artifacts/v41.0/windows-amd64/traceconv.exe',
+        'https://commondatastorage.googleapis.com/perfetto-luci-artifacts/v43.1/windows-amd64/traceconv.exe',
     'sha256':
-        '65a864f0e61595cef693aaf7b081e22f5d471e20c35bf461c511a884167c839f',
+        '24eb5322f22c0219694789fa04aaa5ad09b0746f8b993fd1713e6b3f7943708a',
     'platform':
         'win32',
     'machine': ['amd64']
diff --git a/tools/open_trace_in_ui b/tools/open_trace_in_ui
index 0bf8ea7..289b44a 100755
--- a/tools/open_trace_in_ui
+++ b/tools/open_trace_in_ui
@@ -41,14 +41,14 @@
 
   def do_GET(self):
     if self.path != '/' + self.server.expected_fname:
-      self.send_error(404, "File not found")
+      self.send_error(404, 'File not found')
       return
 
     self.server.fname_get_completed = True
     super().do_GET()
 
   def do_POST(self):
-    self.send_error(404, "File not found")
+    self.send_error(404, 'File not found')
 
 
 def prt(msg, colors=ANSI.END):
@@ -77,23 +77,31 @@
 
 
 def main():
-  examples = '\n'.join(
-      [ANSI.BOLD + 'Usage:' + ANSI.END, '  -i path/trace_file_name [-n]'])
+  examples = '\n'.join([
+      ANSI.BOLD + 'Examples:' + ANSI.END,
+      '  tools/open_trace_in_ui trace.pftrace',
+  ])
   parser = argparse.ArgumentParser(
       epilog=examples, formatter_class=argparse.RawTextHelpFormatter)
 
-  help = 'Input trace filename'
-  parser.add_argument('-i', '--trace', help=help)
+  parser.add_argument('positional_trace', metavar='trace', nargs='?')
   parser.add_argument(
       '-n', '--no-open-browser', action='store_true', default=False)
   parser.add_argument('--origin', default='https://ui.perfetto.dev')
+  parser.add_argument(
+      '-i', '--trace', help='input filename (overrides positional argument)')
 
   args = parser.parse_args()
-  trace_file = args.trace
   open_browser = not args.no_open_browser
 
+  trace_file = None
+  if args.positional_trace is not None:
+    trace_file = args.positional_trace
+  if args.trace is not None:
+    trace_file = args.trace
+
   if trace_file is None:
-    prt('Please specify trace file name with -i/--trace argument', ANSI.RED)
+    prt('Please specify trace file name', ANSI.RED)
     sys.exit(1)
   elif not os.path.exists(trace_file):
     prt('%s not found ' % trace_file, ANSI.RED)
diff --git a/tools/record_android_trace b/tools/record_android_trace
index b0f6e40..7199429 100755
--- a/tools/record_android_trace
+++ b/tools/record_android_trace
@@ -33,18 +33,18 @@
 
 
 # ----- Amalgamator: begin of python/perfetto/prebuilts/manifests/tracebox.py
-# This file has been generated by: tools/roll-prebuilts v41.0
+# This file has been generated by: tools/roll-prebuilts v43.1
 TRACEBOX_MANIFEST = [{
     'arch':
         'mac-amd64',
     'file_name':
         'tracebox',
     'file_size':
-        1515224,
+        1564728,
     'url':
-        'https://commondatastorage.googleapis.com/perfetto-luci-artifacts/v41.0/mac-amd64/tracebox',
+        'https://commondatastorage.googleapis.com/perfetto-luci-artifacts/v43.1/mac-amd64/tracebox',
     'sha256':
-        '5a2cd4a6cce0430c85ca6e648c4058679019ebee01377400af2f12dcb7aecacf',
+        'dde1f657b10376f3fd684d1ce4302fd12c0479b567689f5dace8647375edd08c',
     'platform':
         'darwin',
     'machine': ['x86_64']
@@ -54,11 +54,11 @@
     'file_name':
         'tracebox',
     'file_size':
-        1392792,
+        1459160,
     'url':
-        'https://commondatastorage.googleapis.com/perfetto-luci-artifacts/v41.0/mac-arm64/tracebox',
+        'https://commondatastorage.googleapis.com/perfetto-luci-artifacts/v43.1/mac-arm64/tracebox',
     'sha256':
-        '232e13cc957204079e3e5d89c4c9d84c7e689679e1d7b3f722fabecbfd61b9e6',
+        '349fc531090e134d708bfe2c44330c2f08280aa424f4e9f6d139897c1ad14da3',
     'platform':
         'darwin',
     'machine': ['arm64']
@@ -68,11 +68,11 @@
     'file_name':
         'tracebox',
     'file_size':
-        2241912,
+        2314424,
     'url':
-        'https://commondatastorage.googleapis.com/perfetto-luci-artifacts/v41.0/linux-amd64/tracebox',
+        'https://commondatastorage.googleapis.com/perfetto-luci-artifacts/v43.1/linux-amd64/tracebox',
     'sha256':
-        '679b634ef3b95a6f4e751198a8fe4943e513d568b91782a181662548fda011b0',
+        'e35fd880f483ab26d57d292a7c4d1c9df6393bff7f1e7694e7d3642472c8fff9',
     'platform':
         'linux',
     'machine': ['x86_64']
@@ -82,11 +82,11 @@
     'file_name':
         'tracebox',
     'file_size':
-        1349220,
+        1418968,
     'url':
-        'https://commondatastorage.googleapis.com/perfetto-luci-artifacts/v41.0/linux-arm/tracebox',
+        'https://commondatastorage.googleapis.com/perfetto-luci-artifacts/v43.1/linux-arm/tracebox',
     'sha256':
-        'af00007bb0419748a79ea3dd1c2572ba78f0791e9861c5cf2b72ecea75c74032',
+        '7e550ab781f79fcf548f37a7cc3aaa50dbab235b53c445829815d987eb162843',
     'platform':
         'linux',
     'machine': ['armv6l', 'armv7l', 'armv8l']
@@ -96,11 +96,11 @@
     'file_name':
         'tracebox',
     'file_size':
-        2168944,
+        2221176,
     'url':
-        'https://commondatastorage.googleapis.com/perfetto-luci-artifacts/v41.0/linux-arm64/tracebox',
+        'https://commondatastorage.googleapis.com/perfetto-luci-artifacts/v43.1/linux-arm64/tracebox',
     'sha256':
-        'beeaef4a0b1927c1144ae4b66c81660a74ee2e27e70d875503a33f42c711ca14',
+        '355f2c6e66467a9e81855aa34a16fbe8cd68f01089ec0f5e3074f2011328a97f',
     'platform':
         'linux',
     'machine': ['aarch64']
@@ -110,44 +110,44 @@
     'file_name':
         'tracebox',
     'file_size':
-        1247188,
+        1304280,
     'url':
-        'https://commondatastorage.googleapis.com/perfetto-luci-artifacts/v41.0/android-arm/tracebox',
+        'https://commondatastorage.googleapis.com/perfetto-luci-artifacts/v43.1/android-arm/tracebox',
     'sha256':
-        '93efbb520d1faf71fd99319d778c9d187c4ab06d25d1d2e38806c19724ab2012'
+        'b1c31ea2c07b519c40732416ecf91d8dbe0c04355150598c5ca2434249669a92'
 }, {
     'arch':
         'android-arm64',
     'file_name':
         'tracebox',
     'file_size':
-        1886888,
+        2076144,
     'url':
-        'https://commondatastorage.googleapis.com/perfetto-luci-artifacts/v41.0/android-arm64/tracebox',
+        'https://commondatastorage.googleapis.com/perfetto-luci-artifacts/v43.1/android-arm64/tracebox',
     'sha256':
-        '75d4d4114a1e19b66c94bc315d5948751923adec364bae7d0db4125e980e3109'
+        '28d7476c048123b6d73e1af4f5054dffdc87b67163980454761433bf49626848'
 }, {
     'arch':
         'android-x86',
     'file_name':
         'tracebox',
     'file_size':
-        1869740,
+        2253568,
     'url':
-        'https://commondatastorage.googleapis.com/perfetto-luci-artifacts/v41.0/android-x86/tracebox',
+        'https://commondatastorage.googleapis.com/perfetto-luci-artifacts/v43.1/android-x86/tracebox',
     'sha256':
-        '9518b5083c70c7b3f4b37718786b0362a99745eaa4640256000601c52966344b'
+        '22e61978317ac4ef2934768d9e65bee2b1c7a332bdada5b4c1525d6b0339d4ac'
 }, {
     'arch':
         'android-x64',
     'file_name':
         'tracebox',
     'file_size':
-        2149032,
+        2101752,
     'url':
-        'https://commondatastorage.googleapis.com/perfetto-luci-artifacts/v41.0/android-x64/tracebox',
+        'https://commondatastorage.googleapis.com/perfetto-luci-artifacts/v43.1/android-x64/tracebox',
     'sha256':
-        '93d4d42fe36bce640e2337d27d7aa58cf75b42d659a84dcf900e724f6f9c974c'
+        '6c74f75555dc7bb31e54debe9fc27fc4db960d2382862d1a8a9d0cf03f7d8300'
 }]
 
 # ----- Amalgamator: end of python/perfetto/prebuilts/manifests/tracebox.py
diff --git a/tools/tracebox b/tools/tracebox
index 23061c0..5cb2e1b 100755
--- a/tools/tracebox
+++ b/tools/tracebox
@@ -30,18 +30,18 @@
 
 
 # ----- Amalgamator: begin of python/perfetto/prebuilts/manifests/tracebox.py
-# This file has been generated by: tools/roll-prebuilts v41.0
+# This file has been generated by: tools/roll-prebuilts v43.1
 TRACEBOX_MANIFEST = [{
     'arch':
         'mac-amd64',
     'file_name':
         'tracebox',
     'file_size':
-        1515224,
+        1564728,
     'url':
-        'https://commondatastorage.googleapis.com/perfetto-luci-artifacts/v41.0/mac-amd64/tracebox',
+        'https://commondatastorage.googleapis.com/perfetto-luci-artifacts/v43.1/mac-amd64/tracebox',
     'sha256':
-        '5a2cd4a6cce0430c85ca6e648c4058679019ebee01377400af2f12dcb7aecacf',
+        'dde1f657b10376f3fd684d1ce4302fd12c0479b567689f5dace8647375edd08c',
     'platform':
         'darwin',
     'machine': ['x86_64']
@@ -51,11 +51,11 @@
     'file_name':
         'tracebox',
     'file_size':
-        1392792,
+        1459160,
     'url':
-        'https://commondatastorage.googleapis.com/perfetto-luci-artifacts/v41.0/mac-arm64/tracebox',
+        'https://commondatastorage.googleapis.com/perfetto-luci-artifacts/v43.1/mac-arm64/tracebox',
     'sha256':
-        '232e13cc957204079e3e5d89c4c9d84c7e689679e1d7b3f722fabecbfd61b9e6',
+        '349fc531090e134d708bfe2c44330c2f08280aa424f4e9f6d139897c1ad14da3',
     'platform':
         'darwin',
     'machine': ['arm64']
@@ -65,11 +65,11 @@
     'file_name':
         'tracebox',
     'file_size':
-        2241912,
+        2314424,
     'url':
-        'https://commondatastorage.googleapis.com/perfetto-luci-artifacts/v41.0/linux-amd64/tracebox',
+        'https://commondatastorage.googleapis.com/perfetto-luci-artifacts/v43.1/linux-amd64/tracebox',
     'sha256':
-        '679b634ef3b95a6f4e751198a8fe4943e513d568b91782a181662548fda011b0',
+        'e35fd880f483ab26d57d292a7c4d1c9df6393bff7f1e7694e7d3642472c8fff9',
     'platform':
         'linux',
     'machine': ['x86_64']
@@ -79,11 +79,11 @@
     'file_name':
         'tracebox',
     'file_size':
-        1349220,
+        1418968,
     'url':
-        'https://commondatastorage.googleapis.com/perfetto-luci-artifacts/v41.0/linux-arm/tracebox',
+        'https://commondatastorage.googleapis.com/perfetto-luci-artifacts/v43.1/linux-arm/tracebox',
     'sha256':
-        'af00007bb0419748a79ea3dd1c2572ba78f0791e9861c5cf2b72ecea75c74032',
+        '7e550ab781f79fcf548f37a7cc3aaa50dbab235b53c445829815d987eb162843',
     'platform':
         'linux',
     'machine': ['armv6l', 'armv7l', 'armv8l']
@@ -93,11 +93,11 @@
     'file_name':
         'tracebox',
     'file_size':
-        2168944,
+        2221176,
     'url':
-        'https://commondatastorage.googleapis.com/perfetto-luci-artifacts/v41.0/linux-arm64/tracebox',
+        'https://commondatastorage.googleapis.com/perfetto-luci-artifacts/v43.1/linux-arm64/tracebox',
     'sha256':
-        'beeaef4a0b1927c1144ae4b66c81660a74ee2e27e70d875503a33f42c711ca14',
+        '355f2c6e66467a9e81855aa34a16fbe8cd68f01089ec0f5e3074f2011328a97f',
     'platform':
         'linux',
     'machine': ['aarch64']
@@ -107,44 +107,44 @@
     'file_name':
         'tracebox',
     'file_size':
-        1247188,
+        1304280,
     'url':
-        'https://commondatastorage.googleapis.com/perfetto-luci-artifacts/v41.0/android-arm/tracebox',
+        'https://commondatastorage.googleapis.com/perfetto-luci-artifacts/v43.1/android-arm/tracebox',
     'sha256':
-        '93efbb520d1faf71fd99319d778c9d187c4ab06d25d1d2e38806c19724ab2012'
+        'b1c31ea2c07b519c40732416ecf91d8dbe0c04355150598c5ca2434249669a92'
 }, {
     'arch':
         'android-arm64',
     'file_name':
         'tracebox',
     'file_size':
-        1886888,
+        2076144,
     'url':
-        'https://commondatastorage.googleapis.com/perfetto-luci-artifacts/v41.0/android-arm64/tracebox',
+        'https://commondatastorage.googleapis.com/perfetto-luci-artifacts/v43.1/android-arm64/tracebox',
     'sha256':
-        '75d4d4114a1e19b66c94bc315d5948751923adec364bae7d0db4125e980e3109'
+        '28d7476c048123b6d73e1af4f5054dffdc87b67163980454761433bf49626848'
 }, {
     'arch':
         'android-x86',
     'file_name':
         'tracebox',
     'file_size':
-        1869740,
+        2253568,
     'url':
-        'https://commondatastorage.googleapis.com/perfetto-luci-artifacts/v41.0/android-x86/tracebox',
+        'https://commondatastorage.googleapis.com/perfetto-luci-artifacts/v43.1/android-x86/tracebox',
     'sha256':
-        '9518b5083c70c7b3f4b37718786b0362a99745eaa4640256000601c52966344b'
+        '22e61978317ac4ef2934768d9e65bee2b1c7a332bdada5b4c1525d6b0339d4ac'
 }, {
     'arch':
         'android-x64',
     'file_name':
         'tracebox',
     'file_size':
-        2149032,
+        2101752,
     'url':
-        'https://commondatastorage.googleapis.com/perfetto-luci-artifacts/v41.0/android-x64/tracebox',
+        'https://commondatastorage.googleapis.com/perfetto-luci-artifacts/v43.1/android-x64/tracebox',
     'sha256':
-        '93d4d42fe36bce640e2337d27d7aa58cf75b42d659a84dcf900e724f6f9c974c'
+        '6c74f75555dc7bb31e54debe9fc27fc4db960d2382862d1a8a9d0cf03f7d8300'
 }]
 
 # ----- Amalgamator: end of python/perfetto/prebuilts/manifests/tracebox.py
diff --git a/tools/traceconv b/tools/traceconv
index 20831c0..6ad6a9c 100755
--- a/tools/traceconv
+++ b/tools/traceconv
@@ -30,18 +30,18 @@
 
 
 # ----- Amalgamator: begin of python/perfetto/prebuilts/manifests/traceconv.py
-# This file has been generated by: tools/roll-prebuilts v41.0
+# This file has been generated by: tools/roll-prebuilts v43.1
 TRACECONV_MANIFEST = [{
     'arch':
         'mac-amd64',
     'file_name':
         'traceconv',
     'file_size':
-        9381704,
+        7790424,
     'url':
-        'https://commondatastorage.googleapis.com/perfetto-luci-artifacts/v41.0/mac-amd64/traceconv',
+        'https://commondatastorage.googleapis.com/perfetto-luci-artifacts/v43.1/mac-amd64/traceconv',
     'sha256':
-        'e5678d6e3eebeb6feecb9693f924c708c02ba78bd0ce0a427d1dd7acd2b37120',
+        '88007b64828e835e0326c11f66f0bba7d8ab117562963086a4f19d8cb060204d',
     'platform':
         'darwin',
     'machine': ['x86_64']
@@ -51,11 +51,11 @@
     'file_name':
         'traceconv',
     'file_size':
-        7976744,
+        7264824,
     'url':
-        'https://commondatastorage.googleapis.com/perfetto-luci-artifacts/v41.0/mac-arm64/traceconv',
+        'https://commondatastorage.googleapis.com/perfetto-luci-artifacts/v43.1/mac-arm64/traceconv',
     'sha256':
-        'a199463232d3e8e37502d955a2bd712b1ab431c0ee1903d667bdf97b7345083c',
+        'be5769279ef8442e80130e4bdb6a0a6aa11305442207ea18ff2cf38b21a71a57',
     'platform':
         'darwin',
     'machine': ['arm64']
@@ -65,11 +65,11 @@
     'file_name':
         'traceconv',
     'file_size':
-        9127632,
+        7885952,
     'url':
-        'https://commondatastorage.googleapis.com/perfetto-luci-artifacts/v41.0/linux-amd64/traceconv',
+        'https://commondatastorage.googleapis.com/perfetto-luci-artifacts/v43.1/linux-amd64/traceconv',
     'sha256':
-        '3c8e7b3cef528684d42f8a550cf38643f73f6ea82f6686f88f5b6af4d4e7bbc6',
+        '51cfdf5060bcd87d08402620d88d0243f7bb39f2878906614d53fa3ddd78dd92',
     'platform':
         'linux',
     'machine': ['x86_64']
@@ -79,11 +79,11 @@
     'file_name':
         'traceconv',
     'file_size':
-        6961192,
+        5919372,
     'url':
-        'https://commondatastorage.googleapis.com/perfetto-luci-artifacts/v41.0/linux-arm/traceconv',
+        'https://commondatastorage.googleapis.com/perfetto-luci-artifacts/v43.1/linux-arm/traceconv',
     'sha256':
-        '9aea9075fdda92a326bc15a2a70bd818036588fce9e201d66cbdb16fac72b83a',
+        '04300b1c4dcec1e01bc23017dab3b406f9f0ffd7dd9ea3723784aa8730762bc9',
     'platform':
         'linux',
     'machine': ['armv6l', 'armv7l', 'armv8l']
@@ -93,11 +93,11 @@
     'file_name':
         'traceconv',
     'file_size':
-        8595032,
+        7588200,
     'url':
-        'https://commondatastorage.googleapis.com/perfetto-luci-artifacts/v41.0/linux-arm64/traceconv',
+        'https://commondatastorage.googleapis.com/perfetto-luci-artifacts/v43.1/linux-arm64/traceconv',
     'sha256':
-        '586dac8bdfc3e2c5fe65b132947300abf6b31c66d873e5bd66a87531730f2ff1',
+        'd3edc1cd7b216e18955135e0e9e767cdd7b1b8b7efa64793aa6b923a6c278d68',
     'platform':
         'linux',
     'machine': ['aarch64']
@@ -107,55 +107,55 @@
     'file_name':
         'traceconv',
     'file_size':
-        6575880,
+        5931120,
     'url':
-        'https://commondatastorage.googleapis.com/perfetto-luci-artifacts/v41.0/android-arm/traceconv',
+        'https://commondatastorage.googleapis.com/perfetto-luci-artifacts/v43.1/android-arm/traceconv',
     'sha256':
-        '22af179fabec5b14d21753702670eee432af65a1244725ee31f0f3b960e2363d'
+        '8c3cb3dc96aa6ca296876b8ed56f8eed8c33e12e756b178360cc145263130e7e'
 }, {
     'arch':
         'android-arm64',
     'file_name':
         'traceconv',
     'file_size':
-        7906536,
+        7546224,
     'url':
-        'https://commondatastorage.googleapis.com/perfetto-luci-artifacts/v41.0/android-arm64/traceconv',
+        'https://commondatastorage.googleapis.com/perfetto-luci-artifacts/v43.1/android-arm64/traceconv',
     'sha256':
-        'fedc807169b33370a5aae778ff001d08b079ed06ed0e846eeb251e5479c8de4f'
+        '180cfd2184d601c8f202b6bcd899cc7f63a8bb384505c1a2c3e889dfbe8bdb6d'
 }, {
     'arch':
         'android-x86',
     'file_name':
         'traceconv',
     'file_size':
-        8771276,
+        8176528,
     'url':
-        'https://commondatastorage.googleapis.com/perfetto-luci-artifacts/v41.0/android-x86/traceconv',
+        'https://commondatastorage.googleapis.com/perfetto-luci-artifacts/v43.1/android-x86/traceconv',
     'sha256':
-        'b6d65ff5b4aaeab5c99b31c2ad766bf0253a57e4b700a53f192db1b97cab7b71'
+        'a4e8ff19daa58726138aa66f5adae74b609fceee403c8cddbaaf46d6d07e4cc8'
 }, {
     'arch':
         'android-x64',
     'file_name':
         'traceconv',
     'file_size':
-        8922400,
+        7767560,
     'url':
-        'https://commondatastorage.googleapis.com/perfetto-luci-artifacts/v41.0/android-x64/traceconv',
+        'https://commondatastorage.googleapis.com/perfetto-luci-artifacts/v43.1/android-x64/traceconv',
     'sha256':
-        '098ccf88ffcfb109b7527ab7eb9309710a9098ccecdbc69d87f7b01fa1ed59bc'
+        '19626b87f8c8d956d3807d24faf5764c6bca289f55732cae2f6753dbec33e7f7'
 }, {
     'arch':
         'windows-amd64',
     'file_name':
         'traceconv.exe',
     'file_size':
-        8405504,
+        7645696,
     'url':
-        'https://commondatastorage.googleapis.com/perfetto-luci-artifacts/v41.0/windows-amd64/traceconv.exe',
+        'https://commondatastorage.googleapis.com/perfetto-luci-artifacts/v43.1/windows-amd64/traceconv.exe',
     'sha256':
-        '65a864f0e61595cef693aaf7b081e22f5d471e20c35bf461c511a884167c839f',
+        '24eb5322f22c0219694789fa04aaa5ad09b0746f8b993fd1713e6b3f7943708a',
     'platform':
         'win32',
     'machine': ['amd64']
diff --git a/ui/README.md b/ui/README.md
index e64d31c..ec77793 100644
--- a/ui/README.md
+++ b/ui/README.md
@@ -6,6 +6,9 @@
 $ git clone https://android.googlesource.com/platform/external/perfetto/
 $ cd perfetto
 
+# Install build dependencies
+tools/install-build-deps --ui
+
 # Will build into ./out/ui by default. Can be changed with --out path/
 # The final bundle will be available at ./ui/out/dist/.
 # The build script creates a symlink from ./ui/out to $OUT_PATH/ui/.
diff --git a/ui/release/build_all_channels.py b/ui/release/build_all_channels.py
index 5549421..69301fe 100755
--- a/ui/release/build_all_channels.py
+++ b/ui/release/build_all_channels.py
@@ -99,6 +99,8 @@
   parser = argparse.ArgumentParser()
   parser.add_argument('--upload', action='store_true')
   parser.add_argument('--tmp', default='/tmp/perfetto_ui')
+  parser.add_argument('--branch_only')
+
   args = parser.parse_args()
 
   # Read the releases.json, which maps channel names to git refs, e.g.:
@@ -107,20 +109,24 @@
   with open(pjoin(CUR_DIR, 'channels.json')) as f:
     channels = json.load(f)['channels']
 
+  if args.branch_only:
+    channels = [{'name': 'branch', 'rev': args.branch_only}]
+
   merged_dist_dir = pjoin(args.tmp, 'dist')
   check_call_and_log(['rm', '-rf', merged_dist_dir])
   shutil.os.makedirs(merged_dist_dir)
   channel_map = build_all_channels(channels, args.tmp, merged_dist_dir)
 
-  print('Updating index in ' + merged_dist_dir)
-  with open(pjoin(merged_dist_dir, 'index.html'), 'r+') as f:
-    index_html = f.read()
-    f.seek(0, 0)
-    f.truncate()
-    index_html = re.sub(r"data-perfetto_version='[^']*'",
-                        "data-perfetto_version='%s'" % json.dumps(channel_map),
-                        index_html)
-    f.write(index_html)
+  if not args.branch_only:
+    print('Updating index in ' + merged_dist_dir)
+    with open(pjoin(merged_dist_dir, 'index.html'), 'r+') as f:
+      index_html = f.read()
+      f.seek(0, 0)
+      f.truncate()
+      index_html = re.sub(
+          r"data-perfetto_version='[^']*'",
+          "data-perfetto_version='%s'" % json.dumps(channel_map), index_html)
+      f.write(index_html)
 
   if not args.upload:
     return
diff --git a/ui/release/builder_entrypoint.sh b/ui/release/builder_entrypoint.sh
index 7a940b8..27340ea 100755
--- a/ui/release/builder_entrypoint.sh
+++ b/ui/release/builder_entrypoint.sh
@@ -30,11 +30,22 @@
 # support yet triggering from Gerrit.
 
 cd /workspace/
+
 ls -A1 | xargs rm -rf
 UPSTREAM="https://android.googlesource.com/platform/external/perfetto.git"
 git clone $UPSTREAM upstream
 
 cd upstream/
+
+# infra/ui.perfetto.dev/cloudbuild_release.yaml sets $1 to the branch
+# name.
+EXTRA_ARGS=""
+if [[ ! -z $1 ]]; then
+  git checkout $1
+  EXTRA_ARGS="--branch_only=$1"
+fi
+
 git rev-parse HEAD
 mkdir /workspace/tmp
-python3 -u "$CUR_DUR/build_all_channels.py" --upload --tmp=/workspace/tmp
+python3 -u "$CUR_DUR/build_all_channels.py" \
+        --upload --tmp=/workspace/tmp $EXTRA_ARGS
diff --git a/ui/release/channels.json b/ui/release/channels.json
index 778c4d8..2e0ec2d 100644
--- a/ui/release/channels.json
+++ b/ui/release/channels.json
@@ -6,7 +6,7 @@
     },
     {
       "name": "canary",
-      "rev": "f62b11e6b5d34d0b0f1af3d60eb4d40a57b652b6"
+      "rev": "dc270ece0d4ff6106505f6340c5440666e785c0e"
     },
     {
       "name": "autopush",
diff --git a/ui/src/assets/common.scss b/ui/src/assets/common.scss
index 7ce3c1b..b1943e4 100644
--- a/ui/src/assets/common.scss
+++ b/ui/src/assets/common.scss
@@ -12,8 +12,6 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-@use "sass:math";
-
 @import "widgets/theme";
 @import "typefaces";
 @import "fonts";
@@ -45,17 +43,6 @@
   content: $content;
 }
 
-@mixin track_shell_title() {
-  font-size: 14px;
-  max-height: 30px;
-  overflow: hidden;
-  text-align: left;
-  overflow-wrap: break-word;
-  font-family: "Roboto Condensed", sans-serif;
-  font-weight: 300;
-  line-break: anywhere;
-}
-
 * {
   box-sizing: border-box;
   -webkit-tap-highlight-color: transparent;
@@ -159,18 +146,6 @@
 
 .page {
   grid-area: page;
-  position: relative;
-  display: flex;
-  flex-direction: column;
-  overflow: hidden;
-}
-
-.split-panel {
-  flex: 1;
-  display: flex;
-  flex-flow: row;
-  position: relative;
-  overflow: hidden;
 }
 
 .alerts {
@@ -377,169 +352,6 @@
   }
 }
 
-.track-content.pf-track-content-error {
-  // Necessary trig because we have a 45deg stripes
-  $pattern-density: 1px * math.sqrt(2);
-  $pattern-col: #ddd;
-
-  // box-shadow: inset 0 0 0 5px red;
-  background: repeating-linear-gradient(
-    -45deg,
-    $pattern-col,
-    $pattern-col $pattern-density,
-    white $pattern-density,
-    white $pattern-density * 2
-  );
-}
-
-.track {
-  display: grid;
-  grid-template-columns: auto 1fr;
-  grid-template-rows: 1fr 0;
-
-  &::after {
-    display: block;
-    content: "";
-    grid-column: 1 / span 2;
-    border-top: 1px solid var(--track-border-color);
-    margin-top: -1px;
-    z-index: 2;
-  }
-
-  .track-shell {
-    @include transition();
-    padding-left: 10px;
-    display: grid;
-    cursor: grab;
-    grid-template-areas: "title buttons";
-    grid-template-columns: 1fr auto;
-    align-items: center;
-    width: var(--track-shell-width);
-    background: #fff;
-    border-right: 1px solid #c7d0db;
-    overflow: hidden;
-
-    &.drag {
-      background-color: #eee;
-      box-shadow: 0 4px 12px -4px #999 inset;
-    }
-    &.drop-before {
-      box-shadow: 0 4px 2px -1px hsl(213, 40%, 50%) inset;
-    }
-    &.drop-after {
-      box-shadow: 0 -4px 2px -1px hsl(213, 40%, 50%) inset;
-    }
-
-    &.selected {
-      background-color: #ebeef9;
-    }
-
-    &.alternating-thread-track {
-      background: hsl(214, 22%, 95%);
-    }
-
-    .chip {
-      background-color: #bed6ff;
-      border-radius: $pf-border-radius;
-      font-size: smaller;
-      padding: 0 0.1rem;
-      margin-left: 1ch;
-    }
-
-    h1 {
-      grid-area: title;
-      color: hsl(213, 22%, 30%);
-      @include track_shell_title();
-    }
-    .track-buttons {
-      grid-area: buttons;
-      display: flex;
-      height: 100%;
-      align-items: center;
-    }
-    .track-button {
-      @include transition();
-      color: rgb(60, 86, 136);
-      cursor: pointer;
-      width: 22px;
-      font-size: 18px;
-      opacity: 0;
-    }
-
-    .track-button.show {
-      opacity: 1;
-    }
-    .track-button.full-height {
-      display: flex;
-      height: 100%;
-      align-items: center;
-      justify-content: center;
-
-      &:hover {
-        background-color: #ebeef9;
-      }
-    }
-
-    &:hover .track-button {
-      opacity: 1;
-    }
-    &.flash {
-      background-color: #ffe263;
-    }
-  }
-}
-
-.pinned-panel-container {
-  max-height: 50%;
-  box-shadow: 1px 3px 15px rgba(23, 32, 44, 0.3);
-  z-index: 1;
-  flex-grow: 0;
-  flex-shrink: 0;
-  overflow: hidden;
-  overflow-y: auto;
-}
-
-.scrolling-panel-container {
-  overflow-x: hidden;
-  overflow-y: auto;
-  flex: 1 1 auto;
-  will-change: transform; // Force layer creation.
-}
-
-.details-panel-container {
-  box-shadow: #0000003b 0px 0px 3px 1px;
-  overflow: auto;
-}
-
-.header-panel-container {
-  overflow: visible;
-  box-shadow: 1px 3px 15px rgba(23, 32, 44, 0.3);
-  z-index: 2;
-}
-
-.pan-and-zoom-content {
-  flex: 1;
-  position: relative;
-  display: flex;
-  flex-flow: column nowrap;
-}
-
-.overview-timeline {
-  height: 70px;
-}
-
-.time-axis-panel {
-  height: 22px;
-}
-
-.tickbar {
-  height: 5px;
-}
-
-.notes-panel {
-  height: 20px;
-}
-
 .x-scrollable {
   overflow-x: auto;
 }
@@ -594,15 +406,6 @@
   margin: auto 0 auto 1rem;
 }
 
-.debug-panel-border {
-  position: absolute;
-  top: 0;
-  height: 100%;
-  width: 100%;
-  border: 1px solid rgba(69, 187, 73, 0.5);
-  pointer-events: none;
-}
-
 .perf-stats {
   --stroke-color: hsl(217, 39%, 94%);
   position: fixed;
@@ -649,117 +452,6 @@
   }
 }
 
-.track-group-panel {
-  --collapsed-transparent: hsla(190, 49%, 97%, 0);
-  --expanded-transparent: hsl(215, 22%, 19%, 0);
-  display: grid;
-  grid-template-columns: auto 1fr;
-  grid-template-rows: 1fr;
-  transition: background-color 0.4s, color 0.4s;
-  height: 40px;
-  &::after {
-    display: block;
-    content: "";
-    grid-column: 1 / span 2;
-    border-top: 1px solid var(--track-border-color);
-    margin-top: -1px;
-  }
-  &[collapsed="true"] {
-    background-color: var(--collapsed-transparent);
-    .shell {
-      border-right: 1px solid #c7d0db;
-      background-color: var(--collapsed-background);
-    }
-    .track-button {
-      color: rgb(60, 86, 136);
-    }
-  }
-  &[collapsed="false"] {
-    background-color: var(--expanded-transparent);
-    color: white;
-    font-weight: bold;
-    .shell.flash {
-      color: #121212;
-    }
-    .track-button {
-      color: white;
-    }
-    span.chip {
-      color: #121212;
-    }
-  }
-  .shell {
-    padding: 4px 4px;
-    display: grid;
-    grid-template-areas: "fold-button title buttons check";
-    grid-template-columns: 28px 1fr auto 20px;
-    align-items: center;
-    line-height: 1;
-    width: var(--track-shell-width);
-    min-height: 40px;
-    transition: background-color 0.4s;
-
-    .track-title {
-      user-select: text;
-    }
-
-    .track-subtitle {
-      font-size: 0.6rem;
-      font-weight: normal;
-      overflow: hidden;
-      white-space: nowrap;
-      text-overflow: ellipsis;
-      // Maximum width according to grid-template-columns value for .shell
-      width: calc(var(--track-shell-width) - 56px);
-    }
-
-    .chip {
-      background-color: #bed6ff;
-      border-radius: 3px;
-      font-size: smaller;
-      padding: 0 0.1rem;
-      margin-left: 1ch;
-    }
-
-    .title-wrapper {
-      grid-area: title;
-      overflow: hidden;
-    }
-    h1 {
-      @include track_shell_title();
-    }
-    .fold-button {
-      grid-area: fold-button;
-    }
-    .track-button {
-      font-size: 20px;
-    }
-    &:hover {
-      cursor: pointer;
-      .fold-button {
-        color: hsl(45, 100%, 48%);
-      }
-    }
-    &.flash {
-      background-color: #ffe263;
-    }
-    &.selected {
-      background-color: #ebeef9;
-    }
-  }
-  .track-content {
-    display: grid;
-    span {
-      @include track_shell_title();
-      align-self: center;
-    }
-  }
-}
-
-.time-selection-panel {
-  height: 10px;
-}
-
 .cookie-consent {
   position: absolute;
   z-index: 10;
diff --git a/ui/src/assets/panel_container.scss b/ui/src/assets/panel_container.scss
index 9790dd0..6163353 100644
--- a/ui/src/assets/panel_container.scss
+++ b/ui/src/assets/panel_container.scss
@@ -12,42 +12,39 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-.panel-container {
-  position: relative;
-  // display: grid;
-  // grid-template-columns: 1fr;
-  // grid-template-rows: 1fr;
-  // grid-template-areas: "space";
-}
-
-// In the scrolling case, since the canvas is overdrawn and continuously
-// repositioned, we need the canvas to be in a div with overflow hidden and
-// height equaling the total height of the content to prevent scrolling
-// height from growing.
-.scroll-limiter {
-  position: absolute;
-  top: 0;
-  left: 0;
-  overflow: hidden;
-  height: 100%;
-}
-
-canvas.main-canvas {
-  z-index: -1;
-}
-
-.panels {
-  position: relative;
+.pf-panel-container {
+  // We need to drag over this element for various reasons, so just disable
+  // selection over the entire thing.
+  // TODO(stevegolton): If we enable this, we can get scrolling while dragging,
+  // so we might want to enable this here and disable selection in titles
+  // instead.
   user-select: none;
-}
 
-.panel {
-  position: relative; // Otherwise canvas covers panel dom.
+  .pf-panels {
+    // Make this a positioned element so .pf-scroll-limiter is positioned
+    // relative to this element.
+    position: relative;
 
-  &.sticky {
-    position: sticky;
-    z-index: 3;
-    top: 0;
-    background-color: hsl(215, 22%, 19%);
+    // In the scrolling case, since the canvas is overdrawn and continuously
+    // repositioned, we need the canvas to be in a div with overflow hidden and
+    // height equalling the total height of the content to prevent scrolling
+    // height from growing.
+    .pf-scroll-limiter {
+      position: absolute;
+      top: 0;
+      left: 0;
+      bottom: 0;
+      overflow: hidden;
+
+      // Make this overlay invisible to pointer events.
+      pointer-events: none;
+    }
+
+    .pf-panel {
+      &.pf-sticky {
+        position: sticky;
+        top: 0;
+      }
+    }
   }
 }
diff --git a/ui/src/assets/perfetto.scss b/ui/src/assets/perfetto.scss
index 0cf20b8..8bb1cdc 100644
--- a/ui/src/assets/perfetto.scss
+++ b/ui/src/assets/perfetto.scss
@@ -15,6 +15,8 @@
 @import "typefaces";
 @import "common";
 @import "panel_container";
+@import "viewer_page";
+@import "track_panel";
 @import "home_page";
 @import "query_page";
 @import "metrics_page";
diff --git a/ui/src/assets/track_panel.scss b/ui/src/assets/track_panel.scss
new file mode 100644
index 0000000..0091fba
--- /dev/null
+++ b/ui/src/assets/track_panel.scss
@@ -0,0 +1,235 @@
+// Copyright (C) 2024 The Android Open Source Project
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+@use "sass:math";
+
+@mixin track_shell_title() {
+  font-size: 14px;
+  max-height: 30px;
+  overflow: hidden;
+  text-align: left;
+  overflow-wrap: break-word;
+  font-family: "Roboto Condensed", sans-serif;
+  font-weight: 300;
+  line-break: anywhere;
+}
+
+.track-content.pf-track-content-error {
+  // Necessary trig because we have a 45deg stripes
+  $pattern-density: 1px * math.sqrt(2);
+  $pattern-col: #ddd;
+
+  // box-shadow: inset 0 0 0 5px red;
+  background: repeating-linear-gradient(
+    -45deg,
+    $pattern-col,
+    $pattern-col $pattern-density,
+    white $pattern-density,
+    white $pattern-density * 2
+  );
+}
+
+.track {
+  display: grid;
+  grid-template-columns: auto 1fr;
+  grid-template-rows: 1fr 0;
+
+  &::after {
+    display: block;
+    content: "";
+    grid-column: 1 / span 2;
+    border-top: 1px solid var(--track-border-color);
+    margin-top: -1px;
+    z-index: 2;
+  }
+
+  .track-shell {
+    @include transition();
+    padding-left: 10px;
+    display: grid;
+    cursor: grab;
+    grid-template-areas: "title buttons";
+    grid-template-columns: 1fr auto;
+    align-items: center;
+    width: var(--track-shell-width);
+    border-right: 1px solid #c7d0db;
+    overflow: hidden;
+
+    &.drag {
+      background-color: #eee;
+      box-shadow: 0 4px 12px -4px #999 inset;
+    }
+    &.drop-before {
+      box-shadow: 0 4px 2px -1px hsl(213, 40%, 50%) inset;
+    }
+    &.drop-after {
+      box-shadow: 0 -4px 2px -1px hsl(213, 40%, 50%) inset;
+    }
+
+    &.selected {
+      background-color: #ebeef9;
+    }
+
+    .chip {
+      background-color: #bed6ff;
+      border-radius: $pf-border-radius;
+      font-size: smaller;
+      padding: 0 0.1rem;
+      margin-left: 1ch;
+    }
+
+    h1 {
+      grid-area: title;
+      color: hsl(213, 22%, 30%);
+      @include track_shell_title();
+    }
+    .track-buttons {
+      grid-area: buttons;
+      display: flex;
+      height: 100%;
+      align-items: center;
+    }
+    .track-button {
+      @include transition();
+      color: rgb(60, 86, 136);
+      cursor: pointer;
+      width: 22px;
+      font-size: 18px;
+      opacity: 0;
+    }
+
+    .track-button.show {
+      opacity: 1;
+    }
+    .track-button.full-height {
+      display: flex;
+      height: 100%;
+      align-items: center;
+      justify-content: center;
+
+      &:hover {
+        background-color: #ebeef9;
+      }
+    }
+
+    &:hover .track-button {
+      opacity: 1;
+    }
+    &.flash {
+      background-color: #ffe263;
+    }
+  }
+}
+
+.track-group-panel {
+  display: grid;
+  grid-template-columns: auto 1fr;
+  grid-template-rows: 1fr;
+  height: 40px;
+  &::after {
+    display: block;
+    content: "";
+    grid-column: 1 / span 2;
+    border-top: 1px solid var(--track-border-color);
+    margin-top: -1px;
+  }
+  &[collapsed="true"] {
+    background-color: var(--collapsed-background);
+    .shell {
+      border-right: 1px solid #c7d0db;
+    }
+    .track-button {
+      color: rgb(60, 86, 136);
+    }
+  }
+  &[collapsed="false"] {
+    background-color: var(--expanded-background);
+    color: white;
+    font-weight: bold;
+    .shell.flash {
+      color: #121212;
+    }
+    .track-button {
+      color: white;
+    }
+    span.chip {
+      color: #121212;
+    }
+  }
+  .shell {
+    padding: 4px 4px;
+    display: grid;
+    grid-template-areas: "fold-button title buttons check";
+    grid-template-columns: 28px 1fr auto 20px;
+    align-items: center;
+    line-height: 1;
+    width: var(--track-shell-width);
+    min-height: 40px;
+
+    .track-title {
+      user-select: text;
+    }
+
+    .track-subtitle {
+      font-size: 0.6rem;
+      font-weight: normal;
+      overflow: hidden;
+      white-space: nowrap;
+      text-overflow: ellipsis;
+      // Maximum width according to grid-template-columns value for .shell
+      width: calc(var(--track-shell-width) - 56px);
+    }
+
+    .chip {
+      background-color: #bed6ff;
+      border-radius: 3px;
+      font-size: smaller;
+      padding: 0 0.1rem;
+      margin-left: 1ch;
+    }
+
+    .title-wrapper {
+      grid-area: title;
+      overflow: hidden;
+    }
+    h1 {
+      @include track_shell_title();
+    }
+    .fold-button {
+      grid-area: fold-button;
+    }
+    .track-button {
+      font-size: 20px;
+    }
+    &:hover {
+      cursor: pointer;
+      .fold-button {
+        color: hsl(45, 100%, 48%);
+      }
+    }
+    &.flash {
+      background-color: #ffe263;
+    }
+    &.selected {
+      background-color: #ebeef9;
+    }
+  }
+  .track-content {
+    display: grid;
+    span {
+      @include track_shell_title();
+      align-self: center;
+    }
+  }
+}
diff --git a/ui/src/assets/viewer_page.scss b/ui/src/assets/viewer_page.scss
new file mode 100644
index 0000000..3fdd737
--- /dev/null
+++ b/ui/src/assets/viewer_page.scss
@@ -0,0 +1,76 @@
+// Copyright (C) 2024 The Android Open Source Project
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+.viewer-page {
+  position: relative;
+  display: flex;
+  flex-direction: column;
+  overflow: hidden;
+
+  .pinned-panel-container {
+    max-height: 50%;
+    box-shadow: 1px 3px 15px rgba(23, 32, 44, 0.3);
+    z-index: 1;
+    flex-grow: 0;
+    flex-shrink: 0;
+    overflow: hidden;
+    overflow-y: auto;
+  }
+
+  .scrolling-panel-container {
+    overflow-x: hidden;
+    overflow-y: auto;
+    flex: 1 1 auto;
+    will-change: transform; // Force layer creation.
+  }
+
+  .details-panel-container {
+    box-shadow: #0000003b 0px 0px 3px 1px;
+    overflow: auto;
+  }
+
+  .header-panel-container {
+    overflow: visible;
+    box-shadow: 1px 3px 15px rgba(23, 32, 44, 0.3);
+    z-index: 2;
+  }
+
+  .pan-and-zoom-content {
+    flex: 1;
+    position: relative;
+    display: flex;
+    flex-flow: column nowrap;
+    overflow: hidden;
+  }
+
+  .overview-timeline {
+    height: 70px;
+  }
+
+  .time-axis-panel {
+    height: 22px;
+  }
+
+  .tickbar {
+    height: 5px;
+  }
+
+  .notes-panel {
+    height: 20px;
+  }
+
+  .time-selection-panel {
+    height: 10px;
+  }
+}
diff --git a/ui/src/base/classnames.ts b/ui/src/base/classnames.ts
index b2f6007..d69a244 100644
--- a/ui/src/base/classnames.ts
+++ b/ui/src/base/classnames.ts
@@ -18,8 +18,14 @@
 type ArgType = string|false|undefined|ArgType[];
 
 // Join class names together into valid HTML class attributes
-// Falsey elements are ignored
+// Falsy elements are ignored
 // Nested arrays are flattened
-export function classNames(...args: ArgType[]): string {
-  return args.flat().filter((x) => x).join(' ');
+// If all elements are falsy, returns undefined
+export function classNames(...args: ArgType[]): string|undefined {
+  const filtered = args.flat().filter((x) => x);
+  if (filtered.length === 0) {
+    return undefined;
+  } else {
+    return filtered.join(' ');
+  }
 }
diff --git a/ui/src/base/dom_utils.ts b/ui/src/base/dom_utils.ts
index 5caf9d9..1c20a34 100644
--- a/ui/src/base/dom_utils.ts
+++ b/ui/src/base/dom_utils.ts
@@ -31,7 +31,7 @@
 // Throws if the element is not an HTMLElement.
 export function toHTMLElement(el: Element): HTMLElement {
   if (!(el instanceof HTMLElement)) {
-    throw new Error('Element is not an HTLMElement');
+    throw new Error('Element is not an HTMLElement');
   }
   return el as HTMLElement;
 }
diff --git a/ui/src/common/canvas_utils.ts b/ui/src/common/canvas_utils.ts
index e0111f9..3d94ee4 100644
--- a/ui/src/common/canvas_utils.ts
+++ b/ui/src/common/canvas_utils.ts
@@ -175,3 +175,14 @@
     ctx.fillText(text2, x + paddingPx, y + paddingPx + yOffsetPx);
   }
 }
+
+export function canvasClip(
+  ctx: CanvasRenderingContext2D,
+  x: number,
+  y: number,
+  w: number,
+  h: number): void {
+  ctx.beginPath();
+  ctx.rect(x, y, w, h);
+  ctx.clip();
+}
diff --git a/ui/src/common/registry.ts b/ui/src/common/registry.ts
index f20ad5c..4edd5a5 100644
--- a/ui/src/common/registry.ts
+++ b/ui/src/common/registry.ts
@@ -61,6 +61,10 @@
     return registrant;
   }
 
+  tryGet(kind: string): T|undefined {
+    return this.registry.get(kind);
+  }
+
   // Support iteration: for (const foo of fooRegistry.values()) { ... }
   * values() {
     yield* this.registry.values();
diff --git a/ui/src/common/track_cache.ts b/ui/src/common/track_cache.ts
index c531bf2..613ba3d 100644
--- a/ui/src/common/track_cache.ts
+++ b/ui/src/common/track_cache.ts
@@ -98,7 +98,7 @@
   // Look up track into for a given track's URI.
   // Returns |undefined| if no track can be found.
   resolveTrackInfo(uri: string): TrackDescriptor|undefined {
-    return this.trackRegistry.get(uri);
+    return this.trackRegistry.tryGet(uri);
   }
 
   // Creates a new track using |uri| and |params| or retrieves a cached track if
diff --git a/ui/src/controller/cpu_profile_controller.ts b/ui/src/controller/cpu_profile_controller.ts
index a251442..081812a 100644
--- a/ui/src/controller/cpu_profile_controller.ts
+++ b/ui/src/controller/cpu_profile_controller.ts
@@ -116,7 +116,7 @@
             WHERE symbol.symbol_set_id = spf.symbol_set_id
             LIMIT 1
           ),
-          COALESCE(spf.deobfuscated_name, spf.name)
+          COALESCE(spf.deobfuscated_name, spf.name, "")
         ) AS name,
         spm.name AS mapping
       FROM cpu_profile_stack_sample AS samples
diff --git a/ui/src/frontend/aggregation_tab.ts b/ui/src/frontend/aggregation_tab.ts
index 30dd743..4c96c21 100644
--- a/ui/src/frontend/aggregation_tab.ts
+++ b/ui/src/frontend/aggregation_tab.ts
@@ -65,15 +65,6 @@
       }
     }
 
-    // Add this after all aggregation panels, to make it appear after 'Slices'
-    if (globals.selectedFlows.length > 0) {
-      views.push({
-        key: 'selected_flows',
-        name: 'Flow Events',
-        content: m(FlowEventsAreaSelectedPanel),
-      });
-    }
-
     const pivotTableState = globals.state.nonSerializableState.pivotTable;
     if (pivotTableState.selectionArea !== undefined) {
       views.push({
@@ -86,6 +77,15 @@
       });
     }
 
+    // Add this after all aggregation panels, to make it appear after 'Slices'
+    if (globals.selectedFlows.length > 0) {
+      views.push({
+        key: 'selected_flows',
+        name: 'Flow Events',
+        content: m(FlowEventsAreaSelectedPanel),
+      });
+    }
+
     return views;
   }
 
diff --git a/ui/src/frontend/notes_panel.ts b/ui/src/frontend/notes_panel.ts
index 5efacf7..8ffd3a3 100644
--- a/ui/src/frontend/notes_panel.ts
+++ b/ui/src/frontend/notes_panel.ts
@@ -67,7 +67,7 @@
 
   constructor(readonly key: string) {}
 
-  get mithril(): m.Children {
+  render(): m.Children {
     const allCollapsed = Object.values(globals.state.trackGroups)
       .every((group) => group.collapsed);
 
@@ -83,7 +83,7 @@
           this.hoveredX = currentTargetOffset(e).x - TRACK_SHELL_WIDTH;
           raf.scheduleRedraw();
         },
-        mouseenter: (e: MouseEvent) => {
+        onmouseenter: (e: MouseEvent) => {
           this.hoveredX = currentTargetOffset(e).x - TRACK_SHELL_WIDTH;
           raf.scheduleRedraw();
         },
diff --git a/ui/src/frontend/overview_timeline_panel.ts b/ui/src/frontend/overview_timeline_panel.ts
index 0accdb8..b1cc984 100644
--- a/ui/src/frontend/overview_timeline_panel.ts
+++ b/ui/src/frontend/overview_timeline_panel.ts
@@ -94,7 +94,7 @@
       .removeEventListener('mousemove', this.boundOnMouseMove);
   }
 
-  get mithril(): m.Children {
+  render(): m.Children {
     return m('.overview-timeline', {
       oncreate: (vnode) => this.oncreate(vnode),
       onupdate: (vnode) => this.onupdate(vnode),
diff --git a/ui/src/frontend/panel_container.ts b/ui/src/frontend/panel_container.ts
index 9b19db0..52ecf86 100644
--- a/ui/src/frontend/panel_container.ts
+++ b/ui/src/frontend/panel_container.ts
@@ -41,6 +41,7 @@
 } from './flow_events_renderer';
 import {globals} from './globals';
 import {PanelSize} from './panel';
+import {canvasClip} from '../common/canvas_utils';
 
 // If the panel container scrolls, the backing canvas height is
 // SCROLLING_CANVAS_OVERDRAW_FACTOR * parent container height.
@@ -48,7 +49,7 @@
 
 export interface Panel {
   kind: 'panel';
-  mithril: m.Children;
+  render(): m.Children;
   selectable: boolean;
   key: string;
   trackKey?: string;
@@ -67,7 +68,7 @@
 
 export type PanelOrGroup = Panel|PanelGroup;
 
-export interface Attrs {
+export interface PanelContainerAttrs {
   panels: PanelOrGroup[];
   doesScroll: boolean;
   kind: 'TRACKS'|'OVERVIEW';
@@ -83,7 +84,7 @@
   y: number;
 }
 
-export class PanelContainer implements m.ClassComponent<Attrs>,
+export class PanelContainer implements m.ClassComponent<PanelContainerAttrs>,
                                        PerfStatsSource {
   // These values are updated with proper values in oncreate.
   private parentWidth = 0;
@@ -108,7 +109,7 @@
   // Attrs received in the most recent mithril redraw. We receive a new vnode
   // with new attrs on every redraw, and we cache it here so that resize
   // listeners and canvas redraw callbacks can access it.
-  private attrs: Attrs;
+  private attrs: PanelContainerAttrs;
 
   private ctx?: CanvasRenderingContext2D;
 
@@ -116,6 +117,7 @@
 
   private readonly SCROLL_LIMITER_REF = 'scroll-limiter';
   private readonly PANELS_REF = 'panels';
+  private readonly OVERLAY_CANVAS_REF = 'canvas';
 
   get canvasOverdrawFactor() {
     return this.attrs.doesScroll ? SCROLLING_CANVAS_OVERDRAW_FACTOR : 1;
@@ -188,12 +190,12 @@
     globals.timeline.selectArea(area.start, area.end, tracks);
   }
 
-  constructor(vnode: m.CVnode<Attrs>) {
+  constructor(vnode: m.CVnode<PanelContainerAttrs>) {
     this.attrs = vnode.attrs;
     this.flowEventsRenderer = new FlowEventsRenderer();
     this.trash = new Trash();
 
-    const onRedraw = () => this.redrawCanvas();
+    const onRedraw = () => this.renderCanvas();
     raf.addRedrawCallback(onRedraw);
     this.trash.addCallback(() => {
       raf.removeRedrawCallback(onRedraw);
@@ -205,9 +207,9 @@
     });
   }
 
-  oncreate({dom}: m.CVnodeDOM<Attrs>) {
+  oncreate({dom}: m.CVnodeDOM<PanelContainerAttrs>) {
     // Save the canvas context in the state.
-    const canvas = dom.querySelector('.main-canvas') as HTMLCanvasElement;
+    const canvas = findRef(dom, this.OVERLAY_CANVAS_REF) as HTMLCanvasElement;
     const ctx = canvas.getContext('2d');
     if (!ctx) {
       throw Error('Cannot create canvas context');
@@ -226,7 +228,7 @@
       if (parentSizeChanged) {
         this.updateCanvasDimensions();
         this.repositionCanvas();
-        this.redrawCanvas();
+        this.renderCanvas();
       }
     }));
 
@@ -252,12 +254,7 @@
   renderPanel(node: Panel, key: string, extraClass = ''): m.Vnode {
     assertFalse(this.panelByKey.has(key));
     this.panelByKey.set(key, node);
-    const mithril = node.mithril;
-
-    return m(`.panel${extraClass}`, {key, 'data-key': key},
-      perfDebug() ?
-        [mithril, m('.debug-panel-border')] :
-        mithril);
+    return m(`.pf-panel${extraClass}`, {key, 'data-key': key}, node.render());
   }
 
   // Render a tree of panels into one vnode. Argument `path` is used to build
@@ -269,30 +266,30 @@
         'div',
         {key: path},
         this.renderPanel(
-          node.header, `${path}-header`, node.collapsed ? '' : '.sticky'),
+          node.header, `${path}-header`, node.collapsed ? '' : '.pf-sticky'),
         ...node.childTracks.map(
           (child, index) => this.renderTree(child, `${path}-${index}`)));
     }
     return this.renderPanel(node, assertExists(node.key));
   }
 
-  view({attrs}: m.CVnode<Attrs>) {
+  view({attrs}: m.CVnode<PanelContainerAttrs>) {
     this.attrs = attrs;
     this.panelByKey.clear();
     const children = attrs.panels.map(
       (panel, index) => this.renderTree(panel, `track-tree-${index}`));
 
-    return m('.panel-container', {className: attrs.className},
-      m('.panels', {ref: this.PANELS_REF},
-        m('.scroll-limiter', {ref: this.SCROLL_LIMITER_REF},
-          m('canvas.main-canvas'),
+    return m('.pf-panel-container', {className: attrs.className},
+      m('.pf-panels', {ref: this.PANELS_REF},
+        m('.pf-scroll-limiter', {ref: this.SCROLL_LIMITER_REF},
+          m('canvas.pf-overlay-canvas', {ref: this.OVERLAY_CANVAS_REF}),
         ),
         children,
       ),
     );
   }
 
-  onupdate({dom}: m.CVnodeDOM<Attrs>) {
+  onupdate({dom}: m.CVnodeDOM<PanelContainerAttrs>) {
     const totalPanelHeightChanged = this.readPanelHeightsFromDom(dom);
     const parentSizeChanged = this.readParentSizeFromDom(dom);
     const canvasSizeShouldChange =
@@ -304,7 +301,7 @@
         globals.timeline.updateLocalLimits(
           0, this.parentWidth - TRACK_SHELL_WIDTH);
       }
-      this.redrawCanvas();
+      this.renderCanvas();
     }
   }
 
@@ -364,7 +361,7 @@
     this.panelContainerTop = domRect.y;
     this.panelContainerHeight = domRect.height;
 
-    dom.querySelectorAll('.panel').forEach((panelElement) => {
+    dom.querySelectorAll('.pf-panel').forEach((panelElement) => {
       const key = assertExists(panelElement.getAttribute('data-key'));
       const panel = assertExists(this.panelByKey.get(key));
 
@@ -389,7 +386,7 @@
     return yEnd > 0 && yStart < this.canvasHeight;
   }
 
-  private redrawCanvas() {
+  private renderCanvas() {
     const redrawStart = debugNow();
     if (!this.ctx) return;
     this.ctx.clearRect(0, 0, this.parentWidth, this.canvasHeight);
@@ -480,6 +477,12 @@
     const canvasYStart =
         Math.floor(this.scrollTop - this.getCanvasOverdrawHeightPerSide());
     this.ctx.translate(TRACK_SHELL_WIDTH, -canvasYStart);
+
+    // Clip off any drawing happening outside the bounds of the timeline area
+    canvasClip(
+      this.ctx,
+      0, 0, this.parentWidth - TRACK_SHELL_WIDTH, this.totalPanelHeight);
+
     this.ctx.strokeRect(
       startX,
       selectedTracksMaxY,
@@ -499,6 +502,16 @@
     }
     renderStats.addValue(renderTime);
 
+    // Draw a green box around the whole panel
+    ctx.strokeStyle = 'rgba(69, 187, 73, 0.5)';
+    const lineWidth = 1;
+    ctx.lineWidth = lineWidth;
+    ctx.strokeRect(
+      lineWidth/2,
+      lineWidth/2,
+      size.width - lineWidth,
+      size.height - lineWidth);
+
     const statW = 300;
     ctx.fillStyle = 'hsl(97, 100%, 96%)';
     ctx.fillRect(size.width - statW, size.height - 20, statW, 20);
diff --git a/ui/src/frontend/tickmark_panel.ts b/ui/src/frontend/tickmark_panel.ts
index 7600d05..2ed6249 100644
--- a/ui/src/frontend/tickmark_panel.ts
+++ b/ui/src/frontend/tickmark_panel.ts
@@ -35,7 +35,7 @@
 
   constructor(readonly key: string) {}
 
-  get mithril(): m.Children {
+  render(): m.Children {
     return m('.tickbar');
   }
 
diff --git a/ui/src/frontend/time_axis_panel.ts b/ui/src/frontend/time_axis_panel.ts
index 43e7244..ec918b8 100644
--- a/ui/src/frontend/time_axis_panel.ts
+++ b/ui/src/frontend/time_axis_panel.ts
@@ -40,7 +40,7 @@
 
   constructor(readonly key: string) {}
 
-  get mithril() {
+  render(): m.Children {
     return m('.time-axis-panel');
   }
 
diff --git a/ui/src/frontend/time_selection_panel.ts b/ui/src/frontend/time_selection_panel.ts
index 67358b3..ee7838e 100644
--- a/ui/src/frontend/time_selection_panel.ts
+++ b/ui/src/frontend/time_selection_panel.ts
@@ -138,7 +138,7 @@
 
   constructor(readonly key: string) {}
 
-  get mithril(): m.Children {
+  render(): m.Children {
     return m('.time-selection-panel');
   }
 
diff --git a/ui/src/frontend/track_group_panel.ts b/ui/src/frontend/track_group_panel.ts
index 58071ce..3a83c81 100644
--- a/ui/src/frontend/track_group_panel.ts
+++ b/ui/src/frontend/track_group_panel.ts
@@ -12,7 +12,6 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-import {hex} from 'color-convert';
 import m from 'mithril';
 
 import {Icons} from '../base/semantic_icons';
@@ -24,18 +23,14 @@
 import {TrackTags} from '../public';
 
 import {
-  COLLAPSED_BACKGROUND,
-  EXPANDED_BACKGROUND,
   TRACK_SHELL_WIDTH,
 } from './css_constants';
 import {globals} from './globals';
 import {drawGridLines} from './gridline_helper';
 import {PanelSize} from './panel';
 import {Panel} from './panel_container';
-import {CrashButton, renderChips, TrackContent} from './track_panel';
-import {
-  drawVerticalLineAtTime,
-} from './vertical_line_helper';
+import {CrashButton, renderChips, renderHoveredCursorVertical, renderHoveredNoteVertical, renderNoteVerticals, renderWakeupVertical, TrackContent} from './track_panel';
+import {canvasClip} from '../common/canvas_utils';
 
 interface Attrs {
   trackGroupId: string;
@@ -58,7 +53,7 @@
     this.key = attrs.key;
   }
 
-  get mithril(): m.Children {
+  render(): m.Children {
     const {
       trackGroupId,
       title,
@@ -187,86 +182,37 @@
       trackFSM: track,
     } = this.attrs;
 
-    ctx.fillStyle = collapsed ? COLLAPSED_BACKGROUND : EXPANDED_BACKGROUND;
-    ctx.fillRect(0, 0, size.width, size.height);
-
     if (!collapsed) return;
 
-    this.highlightIfTrackSelected(ctx, size);
-
+    ctx.save();
+    canvasClip(
+      ctx, TRACK_SHELL_WIDTH, 0, size.width - TRACK_SHELL_WIDTH, size.height);
     drawGridLines(
       ctx,
       size.width,
       size.height);
 
-    ctx.save();
-    ctx.translate(TRACK_SHELL_WIDTH, 0);
     if (track) {
+      ctx.save();
+      ctx.translate(TRACK_SHELL_WIDTH, 0);
       const trackSize = {...size, width: size.width - TRACK_SHELL_WIDTH};
       if (!track.getError()) {
         track.update();
         track.track.render(ctx, trackSize);
       }
+      ctx.restore();
     }
-    ctx.restore();
 
     this.highlightIfTrackSelected(ctx, size);
 
     const {visibleTimeScale} = globals.timeline;
     // Draw vertical line when hovering on the notes panel.
-    if (globals.state.hoveredNoteTimestamp !== -1n) {
-      drawVerticalLineAtTime(
-        ctx,
-        visibleTimeScale,
-        globals.state.hoveredNoteTimestamp,
-        size.height,
-        `#aaa`);
-    }
-    if (globals.state.hoverCursorTimestamp !== -1n) {
-      drawVerticalLineAtTime(
-        ctx,
-        visibleTimeScale,
-        globals.state.hoverCursorTimestamp,
-        size.height,
-        `#344596`);
-    }
+    renderHoveredNoteVertical(ctx, visibleTimeScale, size);
+    renderHoveredCursorVertical(ctx, visibleTimeScale, size);
+    renderWakeupVertical(ctx, visibleTimeScale, size);
+    renderNoteVerticals(ctx, visibleTimeScale, size);
 
-    if (globals.state.currentSelection !== null) {
-      if (globals.state.currentSelection.kind === 'SLICE' &&
-          globals.sliceDetails.wakeupTs !== undefined) {
-        drawVerticalLineAtTime(
-          ctx,
-          visibleTimeScale,
-          globals.sliceDetails.wakeupTs,
-          size.height,
-          `black`);
-      }
-    }
-    // All marked areas should have semi-transparent vertical lines
-    // marking the start and end.
-    for (const note of Object.values(globals.state.notes)) {
-      if (note.noteType === 'AREA') {
-        const transparentNoteColor =
-            'rgba(' + hex.rgb(note.color.substr(1)).toString() + ', 0.65)';
-        drawVerticalLineAtTime(
-          ctx,
-          visibleTimeScale,
-          globals.state.areas[note.areaId].start,
-          size.height,
-          transparentNoteColor,
-          1);
-        drawVerticalLineAtTime(
-          ctx,
-          visibleTimeScale,
-          globals.state.areas[note.areaId].end,
-          size.height,
-          transparentNoteColor,
-          1);
-      } else if (note.noteType === 'DEFAULT') {
-        drawVerticalLineAtTime(
-          ctx, visibleTimeScale, note.timestamp, size.height, note.color);
-      }
-    }
+    ctx.restore();
   }
 }
 
diff --git a/ui/src/frontend/track_panel.ts b/ui/src/frontend/track_panel.ts
index 697e45a..bb0b664 100644
--- a/ui/src/frontend/track_panel.ts
+++ b/ui/src/frontend/track_panel.ts
@@ -36,6 +36,8 @@
 import {classNames} from '../base/classnames';
 import {Button} from '../widgets/button';
 import {Popup} from '../widgets/popup';
+import {canvasClip} from '../common/canvas_utils';
+import {TimeScale} from './time_scale';
 
 function getTitleSize(title: string): string|undefined {
   const length = title.length;
@@ -133,7 +135,7 @@
   view({attrs}: m.CVnode<TrackShellAttrs>) {
     // The shell should be highlighted if the current search result is inside
     // this track.
-    let highlightClass = '';
+    let highlightClass = undefined;
     const searchIndex = globals.state.searchIndex;
     if (searchIndex !== -1) {
       const trackKey = globals.currentSearchResults.trackKeys[searchIndex];
@@ -142,12 +144,14 @@
       }
     }
 
-    const dragClass = this.dragging ? `drag` : '';
-    const dropClass = this.dropping ? `drop-${this.dropping}` : '';
     return m(
       `.track-shell[draggable=true]`,
       {
-        class: `${highlightClass} ${dragClass} ${dropClass}`,
+        className: classNames(
+          highlightClass,
+          this.dragging && 'drag',
+          this.dropping && `drop-${this.dropping}`,
+        ),
         ondragstart: (e: DragEvent) => this.ondragstart(e, attrs.trackKey),
         ondragend: this.ondragend.bind(this),
         ondragover: this.ondragover.bind(this),
@@ -421,7 +425,7 @@
     return this.attrs.trackKey;
   }
 
-  get mithril(): m.Children {
+  render(): m.Children {
     const attrs = this.attrs;
 
     if (attrs.trackFSM) {
@@ -472,6 +476,8 @@
 
   renderCanvas(ctx: CanvasRenderingContext2D, size: PanelSize) {
     ctx.save();
+    canvasClip(
+      ctx, TRACK_SHELL_WIDTH, 0, size.width - TRACK_SHELL_WIDTH, size.height);
 
     drawGridLines(
       ctx,
@@ -480,6 +486,7 @@
 
     const track = this.attrs.trackFSM;
 
+    ctx.save();
     ctx.translate(TRACK_SHELL_WIDTH, 0);
     if (track !== undefined) {
       const trackSize = {...size, width: size.width - TRACK_SHELL_WIDTH};
@@ -496,59 +503,12 @@
 
     const {visibleTimeScale} = globals.timeline;
     // Draw vertical line when hovering on the notes panel.
-    if (globals.state.hoveredNoteTimestamp !== -1n) {
-      drawVerticalLineAtTime(
-        ctx,
-        visibleTimeScale,
-        globals.state.hoveredNoteTimestamp,
-        size.height,
-        `#aaa`);
-    }
-    if (globals.state.hoverCursorTimestamp !== -1n) {
-      drawVerticalLineAtTime(
-        ctx,
-        visibleTimeScale,
-        globals.state.hoverCursorTimestamp,
-        size.height,
-        `#344596`);
-    }
+    renderHoveredNoteVertical(ctx, visibleTimeScale, size);
+    renderHoveredCursorVertical(ctx, visibleTimeScale, size);
+    renderWakeupVertical(ctx, visibleTimeScale, size);
+    renderNoteVerticals(ctx, visibleTimeScale, size);
 
-    if (globals.state.currentSelection !== null) {
-      if (globals.state.currentSelection.kind === 'SLICE' &&
-          globals.sliceDetails.wakeupTs !== undefined) {
-        drawVerticalLineAtTime(
-          ctx,
-          visibleTimeScale,
-          globals.sliceDetails.wakeupTs,
-          size.height,
-          `black`);
-      }
-    }
-    // All marked areas should have semi-transparent vertical lines
-    // marking the start and end.
-    for (const note of Object.values(globals.state.notes)) {
-      if (note.noteType === 'AREA') {
-        const transparentNoteColor =
-            'rgba(' + hex.rgb(note.color.substr(1)).toString() + ', 0.65)';
-        drawVerticalLineAtTime(
-          ctx,
-          visibleTimeScale,
-          globals.state.areas[note.areaId].start,
-          size.height,
-          transparentNoteColor,
-          1);
-        drawVerticalLineAtTime(
-          ctx,
-          visibleTimeScale,
-          globals.state.areas[note.areaId].end,
-          size.height,
-          transparentNoteColor,
-          1);
-      } else if (note.noteType === 'DEFAULT') {
-        drawVerticalLineAtTime(
-          ctx, visibleTimeScale, note.timestamp, size.height, note.color);
-      }
-    }
+    ctx.restore();
   }
 
   getSliceRect(tStart: time, tDur: time, depth: number): SliceRect|undefined {
@@ -558,3 +518,71 @@
     return this.attrs.trackFSM.track.getSliceRect?.(tStart, tDur, depth);
   }
 }
+
+export function renderHoveredCursorVertical(
+  ctx: CanvasRenderingContext2D, visibleTimeScale: TimeScale, size: PanelSize) {
+  if (globals.state.hoverCursorTimestamp !== -1n) {
+    drawVerticalLineAtTime(
+      ctx,
+      visibleTimeScale,
+      globals.state.hoverCursorTimestamp,
+      size.height,
+      `#344596`);
+  }
+}
+
+export function renderHoveredNoteVertical(
+  ctx: CanvasRenderingContext2D, visibleTimeScale: TimeScale, size: PanelSize) {
+  if (globals.state.hoveredNoteTimestamp !== -1n) {
+    drawVerticalLineAtTime(
+      ctx,
+      visibleTimeScale,
+      globals.state.hoveredNoteTimestamp,
+      size.height,
+      `#aaa`);
+  }
+}
+
+export function renderWakeupVertical(
+  ctx: CanvasRenderingContext2D, visibleTimeScale: TimeScale, size: PanelSize) {
+  if (globals.state.currentSelection !== null) {
+    if (globals.state.currentSelection.kind === 'SLICE' &&
+      globals.sliceDetails.wakeupTs !== undefined) {
+      drawVerticalLineAtTime(
+        ctx,
+        visibleTimeScale,
+        globals.sliceDetails.wakeupTs,
+        size.height,
+        `black`);
+    }
+  }
+}
+
+export function renderNoteVerticals(
+  ctx: CanvasRenderingContext2D, visibleTimeScale: TimeScale, size: PanelSize) {
+  // All marked areas should have semi-transparent vertical lines
+  // marking the start and end.
+  for (const note of Object.values(globals.state.notes)) {
+    if (note.noteType === 'AREA') {
+      const transparentNoteColor = 'rgba(' + hex.rgb(note.color.substr(1)).toString() + ', 0.65)';
+      drawVerticalLineAtTime(
+        ctx,
+        visibleTimeScale,
+        globals.state.areas[note.areaId].start,
+        size.height,
+        transparentNoteColor,
+        1);
+      drawVerticalLineAtTime(
+        ctx,
+        visibleTimeScale,
+        globals.state.areas[note.areaId].end,
+        size.height,
+        transparentNoteColor,
+        1);
+    } else if (note.noteType === 'DEFAULT') {
+      drawVerticalLineAtTime(
+        ctx, visibleTimeScale, note.timestamp, size.height, note.color);
+    }
+  }
+}
+
diff --git a/ui/src/frontend/viewer_page.ts b/ui/src/frontend/viewer_page.ts
index 973fdaf..91f7490 100644
--- a/ui/src/frontend/viewer_page.ts
+++ b/ui/src/frontend/viewer_page.ts
@@ -14,7 +14,7 @@
 
 import m from 'mithril';
 
-import {getScrollbarWidth} from '../base/dom_utils';
+import {findRef, getScrollbarWidth, toHTMLElement} from '../base/dom_utils';
 import {clamp} from '../base/math_utils';
 import {Time} from '../base/time';
 import {Actions} from '../common/actions';
@@ -39,6 +39,7 @@
 import {DISMISSED_PANNING_HINT_KEY} from './topbar';
 import {TrackGroupPanel} from './track_group_panel';
 import {TrackPanel} from './track_panel';
+import {assertExists} from '../base/logging';
 
 const OVERVIEW_PANEL_FLAG = featureFlags.register({
   id: 'overviewVisible',
@@ -88,6 +89,8 @@
   private notesPanel = new NotesPanel('notes');
   private tickmarkPanel = new TickmarkPanel('searchTickmarks');
 
+  private readonly PAN_ZOOM_CONTENT_REF = 'pan-and-zoom-content';
+
   oncreate(vnode: m.CVnodeDOM) {
     const timeline = globals.timeline;
     const updateDimensions = () => {
@@ -107,8 +110,8 @@
     // Once ResizeObservers are out, we can stop accessing the window here.
     window.addEventListener('resize', this.onResize);
 
-    const panZoomEl =
-        vnode.dom.querySelector('.pan-and-zoom-content') as HTMLElement;
+    const panZoomElRaw = findRef(vnode.dom, this.PAN_ZOOM_CONTENT_REF);
+    const panZoomEl = toHTMLElement(assertExists(panZoomElRaw));
 
     this.zoomContent = new PanAndZoomHandler({
       element: panZoomEl,
@@ -279,53 +282,52 @@
     }
 
     const result = m(
-      '.page',
-      m('.split-panel',
-        m('.pan-and-zoom-content',
-          {
-            onclick: () => {
-              // We don't want to deselect when panning/drag selecting.
-              if (this.keepCurrentSelection) {
-                this.keepCurrentSelection = false;
-                return;
-              }
-              globals.makeSelection(Actions.deselect({}));
-            },
+      '.page.viewer-page',
+      m('.pan-and-zoom-content',
+        {
+          ref: this.PAN_ZOOM_CONTENT_REF,
+          onclick: () => {
+            // We don't want to deselect when panning/drag selecting.
+            if (this.keepCurrentSelection) {
+              this.keepCurrentSelection = false;
+              return;
+            }
+            globals.makeSelection(Actions.deselect({}));
           },
-          m(PanelContainer, {
-            className: 'header-panel-container',
-            doesScroll: false,
-            panels: [
-              ...overviewPanel,
-              this.timeAxisPanel,
-              this.timeSelectionPanel,
-              this.notesPanel,
-              this.tickmarkPanel,
-            ],
-            kind: 'OVERVIEW',
+        },
+        m(PanelContainer, {
+          className: 'header-panel-container',
+          doesScroll: false,
+          panels: [
+            ...overviewPanel,
+            this.timeAxisPanel,
+            this.timeSelectionPanel,
+            this.notesPanel,
+            this.tickmarkPanel,
+          ],
+          kind: 'OVERVIEW',
+        }),
+        m(PanelContainer, {
+          className: 'pinned-panel-container',
+          doesScroll: true,
+          panels: globals.state.pinnedTracks.map((key) => {
+            const trackBundle = this.resolveTrack(key);
+            return new TrackPanel({
+              trackKey: key,
+              title: trackBundle.title,
+              tags: trackBundle.tags,
+              trackFSM: trackBundle.trackFSM,
+              revealOnCreate: true,
+            });
           }),
-          m(PanelContainer, {
-            className: 'pinned-panel-container',
-            doesScroll: true,
-            panels: globals.state.pinnedTracks.map((key) => {
-              const trackBundle = this.resolveTrack(key);
-              return new TrackPanel({
-                trackKey: key,
-                title: trackBundle.title,
-                tags: trackBundle.tags,
-                trackFSM: trackBundle.trackFSM,
-                revealOnCreate: true,
-              });
-            }),
-            kind: 'TRACKS',
-          }),
-          m(PanelContainer, {
-            className: 'scrolling-panel-container',
-            doesScroll: true,
-            panels: scrollingPanels,
-            kind: 'TRACKS',
-          }),
-        ),
+          kind: 'TRACKS',
+        }),
+        m(PanelContainer, {
+          className: 'scrolling-panel-container',
+          doesScroll: true,
+          panels: scrollingPanels,
+          kind: 'TRACKS',
+        }),
       ),
       this.renderTabPanel());
 
diff --git a/ui/src/test/ui_integrationtest.ts b/ui/src/test/ui_integrationtest.ts
index d876ab8..861a798 100644
--- a/ui/src/test/ui_integrationtest.ts
+++ b/ui/src/test/ui_integrationtest.ts
@@ -86,7 +86,7 @@
   });
 
   test('expand_camera', async () => {
-    await page.click('.main-canvas');
+    await page.click('.pf-overlay-canvas');
     await page.click('h1[title="com.google.android.GoogleCamera 5506"]');
     await page.evaluate(() => {
       document.querySelector('.scrolling-panel-container')!.scrollTo(0, 400);
@@ -114,7 +114,7 @@
 
   test('expand_browser_proc', async () => {
     const page = await getPage();
-    await page.click('.main-canvas');
+    await page.click('.pf-overlay-canvas');
     await page.click('h1[title="Browser 12685"]');
     await waitForPerfettoIdle(page);
   });
diff --git a/ui/src/tracks/ftrace/index.ts b/ui/src/tracks/ftrace/index.ts
index 24dd886..c3616c6 100644
--- a/ui/src/tracks/ftrace/index.ts
+++ b/ui/src/tracks/ftrace/index.ts
@@ -165,10 +165,6 @@
       },
     });
 
-    if (await this.hasFtrace(ctx.engine)) {
-      ctx.addDefaultTab(ftraceTabUri);
-    }
-
     ctx.registerCommand({
       id: 'perfetto.FtraceRaw#ShowFtraceTab',
       name: 'Show Ftrace Tab',
@@ -178,18 +174,6 @@
     });
   }
 
-  private async hasFtrace(engine: EngineProxy): Promise<boolean> {
-    // Check if we have any ftrace events at all
-    const query = `
-      select
-        *
-      from ftrace_event
-      limit 1`;
-
-    const res = await engine.query(query);
-    return res.numRows() > 0;
-  }
-
   private async lookupCpuCores(engine: EngineProxy): Promise<number[]> {
     const query = 'select distinct cpu from ftrace_event';