Merge "bigtrace: Add minikube integration" into main
diff --git a/Android.bp b/Android.bp
index 11c9b2a..279cfe9 100644
--- a/Android.bp
+++ b/Android.bp
@@ -13163,6 +13163,7 @@
         "src/trace_processor/perfetto_sql/stdlib/deprecated/v42/common/slices.sql",
         "src/trace_processor/perfetto_sql/stdlib/deprecated/v42/common/timestamps.sql",
         "src/trace_processor/perfetto_sql/stdlib/export/to_firefox_profile.sql",
+        "src/trace_processor/perfetto_sql/stdlib/graphs/critical_path.sql",
         "src/trace_processor/perfetto_sql/stdlib/graphs/dominator_tree.sql",
         "src/trace_processor/perfetto_sql/stdlib/graphs/partition.sql",
         "src/trace_processor/perfetto_sql/stdlib/graphs/scan.sql",
diff --git a/BUILD b/BUILD
index 24543ae..cf94216 100644
--- a/BUILD
+++ b/BUILD
@@ -2662,6 +2662,7 @@
 perfetto_filegroup(
     name = "src_trace_processor_perfetto_sql_stdlib_graphs_graphs",
     srcs = [
+        "src/trace_processor/perfetto_sql/stdlib/graphs/critical_path.sql",
         "src/trace_processor/perfetto_sql/stdlib/graphs/dominator_tree.sql",
         "src/trace_processor/perfetto_sql/stdlib/graphs/partition.sql",
         "src/trace_processor/perfetto_sql/stdlib/graphs/scan.sql",
diff --git a/src/trace_processor/perfetto_sql/stdlib/graphs/BUILD.gn b/src/trace_processor/perfetto_sql/stdlib/graphs/BUILD.gn
index 1710494..5167835 100644
--- a/src/trace_processor/perfetto_sql/stdlib/graphs/BUILD.gn
+++ b/src/trace_processor/perfetto_sql/stdlib/graphs/BUILD.gn
@@ -16,6 +16,7 @@
 
 perfetto_sql_source_set("graphs") {
   sources = [
+    "critical_path.sql",
     "dominator_tree.sql",
     "partition.sql",
     "scan.sql",
diff --git a/src/trace_processor/perfetto_sql/stdlib/graphs/critical_path.sql b/src/trace_processor/perfetto_sql/stdlib/graphs/critical_path.sql
new file mode 100644
index 0000000..652f8c0
--- /dev/null
+++ b/src/trace_processor/perfetto_sql/stdlib/graphs/critical_path.sql
@@ -0,0 +1,190 @@
+--
+-- Copyright 2024 The Android Open Source Project
+--
+-- Licensed under the Apache License, Version 2.0 (the "License");
+-- you may not use this file except in compliance with the License.
+-- You may obtain a copy of the License at
+--
+--     https://www.apache.org/licenses/LICENSE-2.0
+--
+-- Unless required by applicable law or agreed to in writing, software
+-- distributed under the License is distributed on an "AS IS" BASIS,
+-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+-- See the License for the specific language governing permissions and
+-- limitations under the License.
+--
+
+INCLUDE PERFETTO MODULE graphs.search;
+
+-- Computes critical paths, the dependency graph of a task.
+-- The critical path is a set of edges reachable from a root node with the sum of the edge
+-- weights just exceeding the root node capacity. This ensures that the tasks in the critical path
+-- completely 'covers' the root capacity.
+-- Typically, every node represents a point in time on some task where it transitioned from
+-- idle to active state.
+--
+-- Example usage on traces with Linux sched information:
+-- ```
+-- -- Compute the userspace critical path from every task sleep.
+-- SELECT * FROM
+--   critical_path_intervals!(
+--   _wakeup_userspace_edges,
+--   (SELECT id AS root_node_id, prev_id - id FROM _wakeup_graph WHERE prev_id IS NOT NULL));
+-- ```
+CREATE PERFETTO MACRO _critical_path(
+  -- A table/view/subquery corresponding to a directed graph on which the
+  -- reachability search should be performed. This table must have the columns
+  -- "source_node_id", "dest_node_id" and "edge_weight" corresponding to the two nodes on
+  -- either end of the edges in the graph and the edge weight.
+  --
+  -- Note: the columns must contain uint32 similar to ids in trace processor
+  -- tables (i.e. the values should be relatively dense and close to zero). The
+  -- implementation makes assumptions on this for performance reasons and, if
+  -- this criteria is not, can lead to enormous amounts of memory being
+  -- allocated.
+  -- An edge weight is the absolute difference between the node ids forming the edge.
+  graph_table TableOrSubQuery,
+  -- A table/view/subquery corresponding to start nodes to |graph_table| which will be the
+  -- roots of the reachability trees. This table must have the columns
+  -- "root_node_id" and "capacity" corresponding to the starting node id and the capacity
+  -- of the root node to contain edge weights.
+  --
+  -- Note: the columns must contain uint32 similar to ids in trace processor
+  -- tables (i.e. the values should be relatively dense and close to zero). The
+  -- implementation makes assumptions on this for performance reasons and, if
+  -- this criteria is not, can lead to enormous amounts of memory being
+  -- allocated.
+  root_table TableOrSubQuery)
+  -- The returned table has the schema (root_id UINT32, id UINT32, parent_id UINT32).
+  -- |root_id| is the id of the root where the critical path computation started.
+  -- |id| is the id of a node in the critical path and |parent_id| is the predecessor of |id|.
+RETURNS TableOrSubQuery
+AS (
+  WITH
+    _edges AS (
+      SELECT source_node_id, dest_node_id, edge_weight FROM $graph_table
+    ),
+    _roots AS (
+      SELECT
+        root_node_id,
+        capacity AS root_target_weight
+      FROM $root_table
+    ),
+    _search_bounds AS (
+      SELECT MIN(root_node_id - root_target_weight) AS min_wakeup,
+             MAX(root_node_id + root_target_weight) AS max_wakeup
+      FROM _roots
+    ),
+    _graph AS (
+      SELECT
+        source_node_id,
+        COALESCE(dest_node_id, source_node_id) AS dest_node_id,
+        edge_weight
+      FROM _edges
+      JOIN _search_bounds
+      WHERE source_node_id BETWEEN min_wakeup AND max_wakeup AND source_node_id IS NOT NULL
+    )
+  SELECT DISTINCT
+    root_node_id AS root_id,
+    parent_node_id AS parent_id,
+    node_id AS id
+  FROM graph_reachable_weight_bounded_dfs !(_graph, _roots, 1) cr
+);
+
+-- Flattens overlapping tasks within a critical path and flattens overlapping critical paths.
+CREATE PERFETTO MACRO _critical_path_to_intervals(critical_path_table TableOrSubquery,
+                                                  node_table TableOrSubquery)
+RETURNS TableOrSubquery
+AS (
+  WITH flat_tasks AS (
+    SELECT
+      node.ts,
+      cr.root_id,
+      cr.id,
+      LEAD(node.ts) OVER (PARTITION BY cr.root_id ORDER BY cr.id) - node.ts AS dur
+    FROM $critical_path_table cr
+    JOIN $node_table node USING(id)
+  ), span_starts AS (
+    SELECT
+      MAX(cr.ts, idle.ts - idle_dur) AS ts,
+      idle.ts AS idle_end_ts,
+      cr.ts + cr.dur AS cr_end_ts,
+      cr.id,
+      cr.root_id
+    FROM flat_tasks cr
+    JOIN $node_table idle ON cr.root_id = idle.id
+  )
+  SELECT
+    ts,
+    MIN(cr_end_ts, idle_end_ts) - ts AS dur,
+    id,
+    root_id
+  FROM span_starts
+  WHERE MIN(idle_end_ts, cr_end_ts) - ts > 0
+);
+
+-- Computes critical paths, the dependency graph of a task and returns a flattened view suitable
+-- for displaying in a UI track without any overlapping intervals.
+-- See the _critical_path MACRO above.
+--
+-- Example usage on traces with Linux sched information:
+-- ```
+-- -- Compute the userspace critical path from every task sleep.
+-- SELECT * FROM
+--   critical_path_intervals!(
+--   _wakeup_userspace_edges,
+--   (SELECT id AS root_node_id, prev_id - id FROM _wakeup_graph WHERE prev_id IS NOT NULL),
+--  _wakeup_intervals);
+-- ```
+CREATE PERFETTO MACRO _critical_path_intervals(
+  -- A table/view/subquery corresponding to a directed graph on which the
+  -- reachability search should be performed. This table must have the columns
+  -- "source_node_id", "dest_node_id" and "edge_weight" corresponding to the two nodes on
+  -- either end of the edges in the graph and the edge weight.
+  --
+  -- Note: the columns must contain uint32 similar to ids in trace processor
+  -- tables (i.e. the values should be relatively dense and close to zero). The
+  -- implementation makes assumptions on this for performance reasons and, if
+  -- this criteria is not, can lead to enormous amounts of memory being
+  -- allocated.
+  -- An edge weight is the absolute difference between the node ids forming the edge.
+  graph_table TableOrSubQuery,
+  -- A table/view/subquery corresponding to start nodes to |graph_table| which will be the
+  -- roots of the reachability trees. This table must have the columns
+  -- "root_node_id" and "capacity" corresponding to the starting node id and the capacity
+  -- of the root node to contain edge weights.
+  --
+  -- Note: the columns must contain uint32 similar to ids in trace processor
+  -- tables (i.e. the values should be relatively dense and close to zero). The
+  -- implementation makes assumptions on this for performance reasons and, if
+  -- this criteria is not, can lead to enormous amounts of memory being
+  -- allocated.
+  root_table TableOrSubQuery,
+  -- A table/view/subquery corresponding to the idle to active transition points on a task.
+  -- This table must have the columns, "id", "ts", "dur" and "idle_dur". ts and dur is the
+  -- timestamp when the task became active and how long it was active for respectively. idle_dur
+  -- is the duration it was idle for before it became active at "ts".
+  --
+  -- Note: the columns must contain uint32 similar to ids in trace processor
+  -- tables (i.e. the values should be relatively dense and close to zero). The
+  -- implementation makes assumptions on this for performance reasons and, if
+  -- this criteria is not, can lead to enormous amounts of memory being
+  -- allocated.
+  -- There should be one row for every node id encountered in the |graph_table|.
+  interval_table TableOrSubQuery)
+-- The returned table has the schema (id UINT32, ts INT64, dur INT64, idle_dur INT64).
+-- |root_node_id| is the id of the starting node under which this edge was encountered.
+-- |node_id| is the id of the node from the input graph and |parent_node_id|
+-- is the id of the node which was the first encountered predecessor in a DFS
+-- search of the graph.
+RETURNS TableOrSubQuery
+AS (
+  WITH _critical_path_nodes AS (
+    SELECT root_id, id FROM _critical_path!($graph_table, $root_table)
+  ) SELECT root_id, id, ts, dur
+    FROM _critical_path_to_intervals !(_critical_path_nodes, $interval_table)
+    UNION ALL
+    SELECT node.id AS root_id, node.id, node.ts, node.dur
+    FROM $interval_table node
+    JOIN $root_table ON root_node_id = id
+);
diff --git a/src/trace_processor/perfetto_sql/stdlib/sched/thread_executing_span.sql b/src/trace_processor/perfetto_sql/stdlib/sched/thread_executing_span.sql
index e3a92e2..58cda1e 100644
--- a/src/trace_processor/perfetto_sql/stdlib/sched/thread_executing_span.sql
+++ b/src/trace_processor/perfetto_sql/stdlib/sched/thread_executing_span.sql
@@ -14,7 +14,9 @@
 -- limitations under the License.
 --
 
+INCLUDE PERFETTO MODULE graphs.critical_path;
 INCLUDE PERFETTO MODULE graphs.search;
+INCLUDE PERFETTO MODULE intervals.overlap;
 
 -- A 'thread_executing_span' is thread_state span starting with a runnable slice
 -- until the next runnable slice that's woken up by a process (as opposed
@@ -42,12 +44,13 @@
   thread_state.state,
   thread_state.utid,
   thread_state.waker_id,
-  thread_state.waker_utid
+  thread_state.waker_utid,
+  IIF(thread_state.irq_context = 0 OR thread_state.irq_context IS NULL,
+      IFNULL(thread_state.io_wait, 0), 1) AS is_irq
 FROM thread_state
 WHERE
   thread_state.dur != -1
-  AND thread_state.waker_utid IS NOT NULL
-  AND (thread_state.irq_context = 0 OR thread_state.irq_context IS NULL);
+  AND thread_state.waker_id IS NOT NULL;
 
 -- Similar to |_runnable_state| but finds the first runnable state at thread.
 CREATE PERFETTO TABLE _first_runnable_state
@@ -66,14 +69,15 @@
   thread_state.state,
   thread_state.utid,
   thread_state.waker_id,
-  thread_state.waker_utid
+  thread_state.waker_utid,
+  IIF(thread_state.irq_context = 0 OR thread_state.irq_context IS NULL,
+      IFNULL(thread_state.io_wait, 0), 1) AS is_irq
 FROM thread_state
 JOIN first_state
   USING (id)
 WHERE
   thread_state.dur != -1
-  AND thread_state.state = 'R'
-  AND (thread_state.irq_context = 0 OR thread_state.irq_context IS NULL);
+  AND thread_state.state = 'R';
 
 --
 -- Finds all sleep states including interruptible (S) and uninterruptible (D).
@@ -155,7 +159,8 @@
       r.utid AS utid,
       r.waker_id,
       r.waker_utid,
-      s.ts AS prev_end_ts
+      s.ts AS prev_end_ts,
+      is_irq
     FROM _runnable_state r
     JOIN _sleep_state s
       ON s.utid = r.utid AND (s.ts + s.dur = r.ts)
@@ -168,15 +173,14 @@
       r.utid AS utid,
       r.waker_id,
       r.waker_utid,
-      NULL AS prev_end_ts
+      NULL AS prev_end_ts,
+      is_irq
     FROM _first_runnable_state r
     LEFT JOIN _first_sleep_state s
       ON s.utid = r.utid
   )
 SELECT
-  all_wakeups.*,
-  LAG(id) OVER (PARTITION BY utid ORDER BY ts) AS prev_id,
-  IFNULL(LEAD(prev_end_ts) OVER (PARTITION BY utid ORDER BY ts), thread_end.end_ts) AS end_ts
+  all_wakeups.*, thread_end.end_ts AS thread_end_ts
 FROM all_wakeups
 LEFT JOIN _thread_end_ts thread_end
   USING (utid);
@@ -203,189 +207,211 @@
 SELECT id, waker_id FROM y WHERE state = 'Running' ORDER BY waker_id;
 
 --
--- Builds the parent-child chain from all thread_executing_spans. The parent is the waker and
--- child is the wakee.
+-- Builds the waker and prev relationships for all thread_executing_spans.
 --
--- Note that this doesn't include the roots. We'll compute the roots below.
--- This two step process improves performance because it's more efficient to scan
--- parent and find a child between than to scan child and find the parent it lies between.
 CREATE PERFETTO TABLE _wakeup_graph
 AS
-SELECT
-  _wakeup_map.id AS waker_id,
-  prev_id,
-  prev_end_ts,
-  _wakeup.id AS id,
-  _wakeup.ts AS ts,
-  _wakeup.end_ts,
-  IIF(_wakeup.state IS NULL OR _wakeup.state = 'S', 0, 1) AS is_kernel,
-  _wakeup.utid,
-  _wakeup.state,
-  _wakeup.blocked_function
-FROM _wakeup
-JOIN _wakeup_map USING(waker_id)
-ORDER BY id;
-
--- The inverse of thread_executing_spans. All the sleeping periods between thread_executing_spans.
-CREATE PERFETTO TABLE _sleep
-AS
 WITH
-  x AS (
+  _wakeup_events AS (
     SELECT
-      id,
-      ts,
-      prev_end_ts,
       utid,
-      state,
-      blocked_function
-    FROM _wakeup_graph
+      thread_end_ts,
+      IIF(is_irq, 'IRQ', state) AS idle_state,
+      blocked_function AS idle_reason,
+      _wakeup.id,
+      IIF(is_irq, NULL, _wakeup_map.id) AS waker_id,
+      _wakeup.ts,
+      prev_end_ts AS idle_ts,
+      IIF(is_irq OR _wakeup_map.id IS NULL OR (state IS NOT NULL AND state != 'S'), 1, 0)
+        AS is_idle_reason_self
+    FROM _wakeup
+    LEFT JOIN _wakeup_map
+      USING (waker_id)
   )
 SELECT
-  ts - prev_end_ts AS dur,
-  prev_end_ts AS ts,
-  id AS root_node_id,
-  utid AS critical_path_utid,
-  id AS critical_path_id,
-  ts - prev_end_ts AS critical_path_blocked_dur,
-  state AS critical_path_blocked_state,
-  blocked_function AS critical_path_blocked_function
-FROM x
-WHERE ts IS NOT NULL;
+  utid,
+  id,
+  waker_id,
+  ts,
+  idle_state,
+  idle_reason,
+  ts - idle_ts AS idle_dur,
+  is_idle_reason_self,
+  LAG(id) OVER (PARTITION BY utid ORDER BY ts) AS prev_id,
+  LEAD(id) OVER (PARTITION BY utid ORDER BY ts) AS next_id,
+  IFNULL(LEAD(idle_ts) OVER (PARTITION BY utid ORDER BY ts), thread_end_ts) - ts AS dur,
+  LEAD(is_idle_reason_self) OVER (PARTITION BY utid ORDER BY ts) AS is_next_idle_reason_self
+FROM _wakeup_events
+ORDER BY id;
 
--- Given a set of critical paths identified by their |root_node_ids|, flattens
--- the critical path tasks such that there are no overlapping intervals. The end of a
--- task in the critical path is the start of the following task in the critical path.
-CREATE PERFETTO MACRO _flatten_critical_path_tasks(_critical_path_table TableOrSubquery)
-RETURNS TableOrSubquery
-AS (
-  WITH
-    x AS (
-      SELECT
-        LEAD(ts) OVER (PARTITION BY root_node_id ORDER BY node_id) AS ts,
-        node_id,
-        ts AS node_ts,
-        root_node_id,
-        utid AS node_utid,
-        _wakeup_graph.prev_end_ts
-      FROM $_critical_path_table
-      JOIN _wakeup_graph
-        ON node_id = id
-    )
-  SELECT node_ts AS ts, root_node_id, node_id, ts - node_ts AS dur, node_utid, prev_end_ts FROM x
-);
+-- View of all the edges for the userspace critical path.
+CREATE PERFETTO VIEW _wakeup_userspace_edges
+AS
+SELECT
+  id AS source_node_id,
+  COALESCE(IIF(is_idle_reason_self, prev_id, waker_id), id) AS dest_node_id,
+  id - COALESCE(IIF(is_idle_reason_self, prev_id, waker_id), id) AS edge_weight
+FROM _wakeup_graph;
+
+-- View of all the edges for the kernel critical path.
+CREATE PERFETTO VIEW _wakeup_kernel_edges
+AS
+SELECT
+  id AS source_node_id,
+  COALESCE(waker_id, id) AS dest_node_id,
+  id - COALESCE(waker_id, id) AS edge_weight
+FROM _wakeup_graph;
+
+-- View of the relevant timestamp and intervals for all nodes in the critical path.
+CREATE PERFETTO VIEW _wakeup_intervals
+AS
+SELECT id, ts, dur, idle_dur FROM _wakeup_graph;
 
 -- Converts a table with <ts, dur, utid> columns to a unique set of wakeup roots <id> that
 -- completely cover the time intervals.
-CREATE PERFETTO MACRO _intervals_to_roots(source_table TableOrSubQuery)
+CREATE PERFETTO MACRO _intervals_to_roots(_source_table TableOrSubQuery,
+                                          _node_table TableOrSubQuery)
 RETURNS TableOrSubQuery
 AS (
-  WITH source AS (
-    SELECT * FROM $source_table
-  ), thread_bounds AS (
-    SELECT utid, MIN(ts) AS min_start, MAX(ts) AS max_start FROM _wakeup_graph GROUP BY utid
-  ), start AS (
-    SELECT
-      _wakeup_graph.utid, max(_wakeup_graph.id) AS start_id, source.ts, source.dur
-      FROM _wakeup_graph
-      JOIN thread_bounds
-        USING (utid)
-      JOIN source
-        ON source.utid = _wakeup_graph.utid AND MAX(source.ts, min_start) >= _wakeup_graph.ts
-     GROUP BY source.ts, source.utid
-  ), end AS (
-    SELECT
-      _wakeup_graph.utid, min(_wakeup_graph.id) AS end_id, source.ts, source.dur
-      FROM _wakeup_graph
-      JOIN thread_bounds
-          USING (utid)
-      JOIN source ON source.utid = _wakeup_graph.utid
-          AND MIN((source.ts + source.dur), max_start) <= _wakeup_graph.ts
-     GROUP BY source.ts, source.utid
-  ), bound AS (
-    SELECT start.utid, start.ts, start.dur, start_id, end_id
-      FROM start
-      JOIN end ON start.ts = end.ts AND start.dur = end.dur AND start.utid = end.utid
-  )
-  SELECT DISTINCT _wakeup_graph.id FROM bound
-  JOIN _wakeup_graph ON _wakeup_graph.id BETWEEN start_id AND end_id
-);
-
--- Flattens overlapping tasks within a critical path and flattens overlapping critical paths.
-CREATE PERFETTO MACRO _flatten_critical_paths(critical_path_table TableOrSubquery, sleeping_table TableOrSubquery)
-RETURNS TableOrSubquery
-AS (
-  WITH
-    span_starts AS (
+  WITH _interval_to_root_nodes AS (
+      SELECT * FROM $_node_table
+    ),
+    _source AS (
+      SELECT * FROM $_source_table
+    ),
+    _thread_bounds AS (
+      SELECT utid, MIN(ts) AS min_start, MAX(ts) AS max_start
+      FROM _interval_to_root_nodes
+      GROUP BY utid
+    ),
+    _start AS (
       SELECT
-        cr.node_utid AS utid,
-        MAX(cr.ts, sleep.ts) AS ts,
-        sleep.ts + sleep.dur AS sleep_end_ts,
-        cr.ts + cr.dur AS cr_end_ts,
-        cr.node_id AS id,
-        cr.root_node_id AS root_id,
-        cr.prev_end_ts AS prev_end_ts,
-        critical_path_utid,
-        critical_path_id,
-        critical_path_blocked_dur,
-        critical_path_blocked_state,
-        critical_path_blocked_function
-      FROM
-        _flatten_critical_path_tasks!($critical_path_table) cr
-      JOIN $sleeping_table sleep
-        USING (root_node_id)
+        _interval_to_root_nodes.utid,
+        MAX(_interval_to_root_nodes.id) AS _start_id,
+        _source.ts,
+        _source.dur
+      FROM _interval_to_root_nodes
+      JOIN _thread_bounds USING (utid)
+      JOIN _source
+        ON _source.utid = _interval_to_root_nodes.utid
+          AND MAX(_source.ts, min_start) >= _interval_to_root_nodes.ts
+      GROUP BY _source.ts, _source.utid
+    ),
+    _end AS (
+      SELECT
+        _interval_to_root_nodes.utid,
+        MIN(_interval_to_root_nodes.id) AS _end_id,
+        _source.ts,
+        _source.dur
+      FROM _interval_to_root_nodes
+      JOIN _thread_bounds USING (utid)
+      JOIN _source
+        ON _source.utid = _interval_to_root_nodes.utid
+          AND MIN((_source.ts + _source.dur), max_start) <= _interval_to_root_nodes.ts
+      GROUP BY _source.ts, _source.utid
+    ),
+    _bound AS (
+      SELECT _start.utid, _start.ts, _start.dur, _start_id, _end_id
+      FROM _start
+      JOIN _end
+        ON _start.ts = _end.ts AND _start.dur = _end.dur AND _start.utid = _end.utid
     )
-  SELECT
-    ts,
-    MIN(cr_end_ts, sleep_end_ts) - ts AS dur,
-    utid,
-    id,
-    root_id,
-    prev_end_ts,
-    critical_path_utid,
-    critical_path_id,
-    critical_path_blocked_dur,
-    critical_path_blocked_state,
-    critical_path_blocked_function
-  FROM span_starts
-  WHERE MIN(sleep_end_ts, cr_end_ts) - ts > 0
+  SELECT DISTINCT id AS root_node_id, id - COALESCE(prev_id, id) AS capacity
+  FROM _bound
+  JOIN _interval_to_root_nodes
+    ON _interval_to_root_nodes.id BETWEEN _start_id AND _end_id
+      AND _interval_to_root_nodes.utid = _bound.utid
 );
 
--- Generates a critical path.
-CREATE PERFETTO MACRO _critical_path(
-        graph_table TableOrSubquery, root_table TableOrSubquery, sleeping_table TableOrSubquery)
-RETURNS TableOrSubquery
+-- Adjusts the userspace critical path such that any interval that includes a kernel stall
+-- gets the next id, the root id of the kernel critical path. This ensures that the merge
+-- step associates the userspace critical path and kernel critical path on the same interval
+-- correctly.
+CREATE PERFETTO MACRO _critical_path_userspace_adjusted(_critical_path_table TableOrSubQuery,
+                                                        _node_table TableOrSubQuery)
+RETURNS TableOrSubQuery
 AS (
-  WITH
-    critical_path AS (
-      SELECT * FROM graph_reachable_weight_bounded_dfs !($graph_table, $root_table, 1)
+    SELECT
+      cr.root_id,
+      cr.root_id AS parent_id,
+      IIF(node.is_next_idle_reason_self, node.next_id, cr.id) AS id,
+      cr.ts,
+      cr.dur
+    FROM (SELECT * FROM $_critical_path_table) cr
+    JOIN $_node_table node
+      USING (id)
+);
+
+-- Adjusts the start and end of the kernel critical path such that it is completely bounded within
+-- its corresponding userspace critical path.
+CREATE PERFETTO MACRO _critical_path_kernel_adjusted(_userspace_critical_path_table TableOrSubQuery,
+                                                     _kernel_critical_path_table TableOrSubQuery,
+                                                     _node_table TableOrSubQuery)
+RETURNS TableOrSubQuery
+AS (
+    SELECT
+      kernel_cr.root_id,
+      kernel_cr.root_id AS parent_id,
+      kernel_cr.id,
+      MAX(kernel_cr.ts, userspace_cr.ts) AS ts,
+      MIN(kernel_cr.ts + kernel_cr.dur, userspace_cr.ts + userspace_cr.dur)
+        - MAX(kernel_cr.ts, userspace_cr.ts) AS dur
+    FROM $_kernel_critical_path_table kernel_cr
+    JOIN $_node_table node
+      ON kernel_cr.parent_id = node.id
+    JOIN $_userspace_critical_path_table userspace_cr
+      ON userspace_cr.id = kernel_cr.parent_id AND userspace_cr.root_id = kernel_cr.root_id
+);
+
+-- Merge the kernel and userspace critical path such that the corresponding kernel critical path
+-- has priority over userpsace critical path it overlaps.
+CREATE PERFETTO MACRO _critical_path_merged(_userspace_critical_path_table TableOrSubQuery,
+                                            _kernel_critical_path_table TableOrSubQuery,
+                                            _node_table TableOrSubQuery)
+RETURNS TableOrSubQuery
+AS (
+WITH _userspace_critical_path AS (
+  SELECT DISTINCT *
+  FROM _critical_path_userspace_adjusted!(
+    $_userspace_critical_path_table,
+    $_node_table)
+  ),
+  _merged_critical_path AS (
+    SELECT * FROM _userspace_critical_path
+     UNION ALL
+    SELECT DISTINCT *
+    FROM _critical_path_kernel_adjusted!(
+      _userspace_critical_path,
+      $_kernel_critical_path_table,
+      $_node_table)
+    WHERE id != parent_id
+    ),
+    _roots_critical_path AS (
+      SELECT root_id, MIN(ts) AS root_ts, MAX(ts + dur) - MIN(ts) AS root_dur
+      FROM _userspace_critical_path
+      GROUP BY root_id
+    ),
+    _roots_and_merged_critical_path AS (
+      SELECT
+        root_id,
+        root_ts,
+        root_dur,
+        parent_id,
+        id,
+        ts,
+        dur
+      FROM _merged_critical_path
+      JOIN _roots_critical_path USING(root_id)
     )
-  SELECT
-    ts,
-    dur,
-    root_id,
-    id,
-    utid,
-    critical_path_utid,
-    critical_path_id,
-    critical_path_blocked_dur,
-    critical_path_blocked_state,
-    critical_path_blocked_function
-  FROM _flatten_critical_paths!(critical_path, $sleeping_table)
-  UNION ALL
-  -- Add roots
-  SELECT
-    ts,
-    end_ts - ts AS dur,
-    id AS root_id,
-    id,
-    utid,
-    utid AS critical_path_utid,
-    NULL AS critical_path_id,
-    NULL AS critical_path_blocked_dur,
-    NULL AS critical_path_blocked_state,
-    NULL AS critical_path_blocked_function
-  FROM $root_table
-  ORDER BY root_id
+    SELECT
+      flat.root_id,
+      flat.id,
+      flat.ts,
+      flat.dur
+    FROM
+    _intervals_flatten!(_roots_and_merged_critical_path) flat
+    WHERE flat.dur > 0
+    GROUP BY flat.root_id, flat.ts
 );
 
 -- Generates the critical path for only the set of roots <id> passed in.
@@ -394,96 +420,92 @@
 -- binder transactions. It might be more efficient to generate the _critical_path
 -- for the entire trace, see _thread_executing_span_critical_path_all, but for a
 -- per-process susbset of binder txns for instance, this is likely faster.
-CREATE PERFETTO MACRO _critical_path_by_roots(roots_table TableOrSubQuery)
+CREATE PERFETTO MACRO _critical_path_by_roots(_roots_table TableOrSubQuery,
+                                              _node_table TableOrSubQuery)
 RETURNS TableOrSubQuery
 AS (
-  WITH roots AS (
-    SELECT * FROM $roots_table
-  ), root_bounds AS (
-    SELECT MIN(id) AS min_root_id, MAX(id) AS max_root_id FROM roots
-  ), wakeup_bounds AS (
-    SELECT COALESCE(_wakeup_graph.prev_id, min_root_id) AS min_wakeup, max_root_id AS max_wakeup
-    FROM root_bounds
-    JOIN _wakeup_graph ON id = min_root_id
-  ) SELECT
-      id,
-      ts,
-      dur,
-      utid,
-      critical_path_id,
-      critical_path_blocked_dur,
-      critical_path_blocked_state,
-      critical_path_blocked_function,
-      critical_path_utid
-      FROM
-        _critical_path
-        !(
+  WITH _userspace_critical_path_by_roots AS (
+    SELECT *
+    FROM
+      _critical_path_intervals
+        !(_wakeup_userspace_edges,
+          $_roots_table,
+          _wakeup_intervals)
+  ),
+  _kernel_nodes AS (
+    SELECT id, root_id FROM _userspace_critical_path_by_roots
+    JOIN $_node_table node USING (id) WHERE is_idle_reason_self = 1
+  ),
+  _kernel_critical_path_by_roots AS (
+    SELECT _kernel_nodes.root_id, cr.root_id AS parent_id, cr.id, cr.ts, cr.dur
+    FROM
+      _critical_path_intervals
+        !(_wakeup_kernel_edges,
           (
-            SELECT
-              id AS source_node_id,
-              COALESCE(waker_id, id) AS dest_node_id,
-              id - COALESCE(waker_id, id) AS edge_weight
-            FROM _wakeup_graph
-            JOIN wakeup_bounds WHERE id BETWEEN min_wakeup AND max_wakeup
+           SELECT graph.id AS root_node_id, graph.id - COALESCE(graph.prev_id, graph.id) AS capacity
+           FROM _kernel_nodes
+           JOIN _wakeup_graph graph USING(id)
           ),
-          (
-            SELECT
-              _wakeup_graph.id AS root_node_id,
-              _wakeup_graph.id - COALESCE(prev_id, _wakeup_graph.id) AS root_target_weight,
-              id,
-              ts,
-              end_ts,
-              utid
-            FROM _wakeup_graph
-            JOIN (SELECT * FROM roots) USING (id)
-          ),
-          _sleep));
+          _wakeup_intervals)
+          cr
+    JOIN _kernel_nodes
+      ON _kernel_nodes.id = cr.root_id
+  ) SELECT * FROM _critical_path_merged!(
+    _userspace_critical_path_by_roots,
+    _kernel_critical_path_by_roots,
+    $_node_table)
+);
 
 -- Generates the critical path for only the time intervals for the utids given.
 -- Currently expensive because of naive interval_intersect implementation.
 -- Prefer _critical_paths_by_roots for performance. This is useful for a small
 -- set of intervals, e.g app startups in a trace.
-CREATE PERFETTO MACRO _critical_path_by_intervals(intervals_table TableOrSubQuery)
+CREATE PERFETTO MACRO _critical_path_by_intervals(_intervals_table TableOrSubQuery,
+                                                  _node_table TableOrSubQuery)
 RETURNS TableOrSubQuery AS (
-WITH span_starts AS (
+  WITH interval_nodes AS (
+    SELECT * FROM $_node_table
+  ), span_starts AS (
     SELECT
-      id,
+      interval_nodes.utid AS root_utid,
+      root_id,
+      span.id,
       MAX(span.ts, intervals.ts) AS ts,
-      MIN(span.ts + span.dur, intervals.ts + intervals.dur) AS end_ts,
-      span.utid,
-      critical_path_id,
-      critical_path_blocked_dur,
-      critical_path_blocked_state,
-      critical_path_blocked_function,
-      critical_path_utid
-    FROM _critical_path_by_roots!(_intervals_to_roots!($intervals_table)) span
+      MIN(span.ts + span.dur, intervals.ts + intervals.dur) AS end_ts
+    FROM _critical_path_by_roots!(
+      _intervals_to_roots!($_intervals_table, $_node_table),
+      interval_nodes) span
+    JOIN interval_nodes ON interval_nodes.id = root_id
     -- TODO(zezeozue): Replace with interval_intersect when partitions are supported
-    JOIN (SELECT * FROM $intervals_table) intervals ON span.critical_path_utid = intervals.utid
+    JOIN (SELECT * FROM $_intervals_table) intervals ON interval_nodes.utid = intervals.utid
         AND ((span.ts BETWEEN intervals.ts AND intervals.ts + intervals.dur)
              OR (intervals.ts BETWEEN span.ts AND span.ts + span.dur))
-) SELECT
-      id,
-      ts,
-      end_ts - ts AS dur,
-      utid,
-      critical_path_id,
-      critical_path_blocked_dur,
-      critical_path_blocked_state,
-      critical_path_blocked_function,
-      critical_path_utid
-   FROM span_starts);
+  ) SELECT
+      span_starts.root_utid,
+      span_starts.root_id,
+      span_starts.id,
+      span_starts.ts,
+      span_starts.end_ts - span_starts.ts AS dur,
+      interval_nodes.utid AS utid
+    FROM span_starts
+    JOIN interval_nodes USING(id));
 
 -- Generates the critical path for a given utid over the <ts, dur> interval.
 -- The duration of a thread executing span in the critical path is the range between the
 -- start of the thread_executing_span and the start of the next span in the critical path.
 CREATE PERFETTO FUNCTION _thread_executing_span_critical_path(
   -- Utid of the thread to compute the critical path for.
-  critical_path_utid INT,
+  root_utid INT,
   -- Timestamp.
   ts LONG,
   -- Duration.
   dur LONG)
 RETURNS TABLE(
+  -- Thread Utid the critical path was filtered to.
+  root_utid INT,
+  -- Id of thread executing span following the sleeping thread state for which the critical path is
+  -- computed.
+  root_id INT,
   -- Id of the first (runnable) thread state in thread_executing_span.
   id INT,
   -- Timestamp of first thread_state in thread_executing_span.
@@ -491,53 +513,8 @@
   -- Duration of thread_executing_span.
   dur LONG,
   -- Utid of thread with thread_state.
-  utid INT,
-  -- Id of thread executing span following the sleeping thread state for which the critical path is computed.
-  critical_path_id INT,
-  -- Critical path duration.
-  critical_path_blocked_dur LONG,
-  -- Sleeping thread state in critical path.
-  critical_path_blocked_state STRING,
-  -- Kernel blocked_function of the critical path.
-  critical_path_blocked_function STRING,
-  -- Thread Utid the critical path was filtered to.
-  critical_path_utid INT
+  utid INT
 ) AS
-SELECT * FROM _critical_path_by_intervals!((SELECT $critical_path_utid AS utid, $ts as ts, $dur AS dur));
-
--- Generates the critical path for all threads for the entire trace duration.
--- The duration of a thread executing span in the critical path is the range between the
--- start of the thread_executing_span and the start of the next span in the critical path.
-CREATE PERFETTO FUNCTION _thread_executing_span_critical_path_all()
-RETURNS
-  TABLE(
-    -- Id of the first (runnable) thread state in thread_executing_span.
-    id INT,
-    -- Timestamp of first thread_state in thread_executing_span.
-    ts LONG,
-    -- Duration of thread_executing_span.
-    dur LONG,
-    -- Utid of thread with thread_state.
-    utid INT,
-    -- Id of thread executing span following the sleeping thread state for which the critical path is computed.
-    critical_path_id INT,
-    -- Critical path duration.
-    critical_path_blocked_dur LONG,
-    -- Sleeping thread state in critical path.
-    critical_path_blocked_state STRING,
-    -- Kernel blocked_function of the critical path.
-    critical_path_blocked_function STRING,
-    -- Thread Utid the critical path was filtered to.
-    critical_path_utid INT)
-AS
-SELECT
-  id,
-  ts,
-  dur,
-  utid,
-  critical_path_id,
-  critical_path_blocked_dur,
-  critical_path_blocked_state,
-  critical_path_blocked_function,
-  critical_path_utid
-FROM _critical_path_by_roots!((SELECT id FROM _wakeup_graph));
+SELECT * FROM _critical_path_by_intervals!(
+  (SELECT $root_utid AS utid, $ts as ts, $dur AS dur),
+  _wakeup_graph);
diff --git a/src/trace_processor/perfetto_sql/stdlib/sched/thread_executing_span_with_slice.sql b/src/trace_processor/perfetto_sql/stdlib/sched/thread_executing_span_with_slice.sql
index 74f73a5..31a4532 100644
--- a/src/trace_processor/perfetto_sql/stdlib/sched/thread_executing_span_with_slice.sql
+++ b/src/trace_processor/perfetto_sql/stdlib/sched/thread_executing_span_with_slice.sql
@@ -17,14 +17,94 @@
 INCLUDE PERFETTO MODULE slices.flat_slices;
 INCLUDE PERFETTO MODULE sched.thread_executing_span;
 
-CREATE PERFETTO TABLE _critical_path_all AS
-SELECT * FROM  _thread_executing_span_critical_path_all();
+CREATE PERFETTO TABLE _critical_path_userspace
+AS
+SELECT *
+FROM
+  _critical_path_intervals
+    !(_wakeup_userspace_edges,
+      (SELECT id AS root_node_id, id - COALESCE(prev_id, id) AS capacity FROM _wakeup_graph),
+      _wakeup_intervals);
+
+CREATE PERFETTO TABLE _critical_path_kernel
+AS
+WITH _kernel_nodes AS (
+  SELECT id, root_id FROM _critical_path_userspace
+  JOIN _wakeup_graph USING (id) WHERE is_idle_reason_self = 1
+)
+SELECT _kernel_nodes.root_id, cr.root_id AS parent_id, cr.id, cr.ts, cr.dur
+FROM
+  _critical_path_intervals
+    !(_wakeup_kernel_edges,
+      (
+        SELECT graph.id AS root_node_id, graph.id - COALESCE(graph.prev_id, graph.id) AS capacity
+        FROM _kernel_nodes
+        JOIN _wakeup_graph graph USING(id)
+      ),
+      _wakeup_intervals) cr
+JOIN _kernel_nodes
+  ON _kernel_nodes.id = cr.root_id;
+
+CREATE PERFETTO TABLE _critical_path_userspace_adjusted AS
+SELECT DISTINCT * FROM _critical_path_userspace_adjusted!(_critical_path_userspace, _wakeup_graph);
+
+CREATE PERFETTO TABLE _critical_path_kernel_adjusted AS
+SELECT DISTINCT * FROM _critical_path_kernel_adjusted!(_critical_path_userspace_adjusted, _critical_path_kernel, _wakeup_graph);
+
+CREATE PERFETTO TABLE _critical_path_merged_adjusted AS
+  SELECT root_id, parent_id, id, ts, dur FROM _critical_path_userspace_adjusted
+  UNION ALL
+  SELECT root_id, parent_id, id, ts, dur FROM _critical_path_kernel_adjusted WHERE id != parent_id;
+
+CREATE PERFETTO TABLE _critical_path_roots AS
+  SELECT root_id, min(ts) AS root_ts, max(ts + dur) - min(ts) AS root_dur
+  FROM _critical_path_userspace_adjusted
+  GROUP BY root_id;
+
+CREATE PERFETTO TABLE _critical_path_roots_and_merged AS
+  WITH roots_and_merged_critical_path AS (
+      SELECT
+        root_id,
+        root_ts,
+        root_dur,
+        parent_id,
+        id,
+        ts,
+        dur
+      FROM _critical_path_merged_adjusted
+      JOIN _critical_path_roots USING(root_id)
+    )
+    SELECT
+      flat.root_id,
+      flat.id,
+      flat.ts,
+      flat.dur
+    FROM
+    _intervals_flatten!(roots_and_merged_critical_path) flat
+    WHERE flat.dur > 0
+    GROUP BY flat.root_id, flat.ts;
+
+CREATE PERFETTO TABLE _critical_path_all
+AS
+SELECT
+  ROW_NUMBER() OVER(ORDER BY cr.ts) AS id,
+  cr.ts,
+  cr.dur,
+  cr.ts + cr.dur AS ts_end,
+  id_graph.utid,
+  id_graph.utid AS c0,
+  root_id_graph.utid AS root_utid
+  FROM _critical_path_roots_and_merged cr
+  JOIN _wakeup_graph id_graph ON cr.id = id_graph.id
+  JOIN _wakeup_graph root_id_graph ON cr.root_id = root_id_graph.id ORDER BY cr.ts;
 
 -- Limited thread_state view that will later be span joined with the |_thread_executing_span_graph|.
 CREATE PERFETTO VIEW _span_thread_state_view
-AS SELECT id AS thread_state_id, ts, dur, utid, state, blocked_function as function, io_wait, cpu FROM thread_state;
+AS
+SELECT id AS thread_state_id, ts, dur, utid, state, blocked_function AS function, io_wait, cpu
+FROM thread_state;
 
--- Limited slice_view that will later be span joined with the |_thread_executing_span_graph|.
+-- Limited slice_view that will later be span joined with the critical path.
 CREATE PERFETTO VIEW _span_slice_view
 AS
 SELECT
@@ -36,50 +116,49 @@
   utid
 FROM _slice_flattened;
 
-CREATE VIRTUAL TABLE _span_thread_state_slice_view
+-- thread state span joined with slice.
+CREATE VIRTUAL TABLE _span_thread_state_slice_sp
 USING
   SPAN_LEFT_JOIN(
     _span_thread_state_view PARTITIONED utid,
     _span_slice_view PARTITIONED utid);
 
--- |_thread_executing_span_graph| span joined with thread_state information.
-CREATE VIRTUAL TABLE _span_critical_path_thread_state_slice_sp
-USING
-  SPAN_JOIN(
-    _critical_path_all PARTITIONED utid,
-    _span_thread_state_slice_view PARTITIONED utid);
-
--- |_thread_executing_span_graph| + thread_state view joined with critical_path information.
-CREATE PERFETTO TABLE _critical_path_thread_state_slice AS
-WITH span_starts AS (
-    SELECT
-      span.id,
-      span.utid,
-      span.critical_path_id,
-      span.critical_path_blocked_dur,
-      span.critical_path_blocked_state,
-      span.critical_path_blocked_function,
-      span.critical_path_utid,
-      thread_state_id,
-      MAX(thread_state.ts, span.ts) AS ts,
-      span.ts + span.dur AS span_end_ts,
-      thread_state.ts + thread_state.dur AS thread_state_end_ts,
-      thread_state.state,
-      thread_state.function,
-      thread_state.cpu,
-      thread_state.io_wait,
-      thread_state.slice_id,
-      thread_state.slice_name,
-      thread_state.slice_depth
-    FROM _critical_path_all span
-    JOIN _span_critical_path_thread_state_slice_sp thread_state USING(id)
-  )
+CREATE PERFETTO TABLE _span_thread_state_slice
+AS
 SELECT
-  id,
-  thread_state_id,
+  ROW_NUMBER() OVER(ORDER BY ts) AS id,
   ts,
-  MIN(span_end_ts, thread_state_end_ts) - ts AS dur,
+  dur,
+  ts + dur AS ts_end,
   utid,
+  utid AS c0,
+  thread_state_id,
+  state,
+  function,
+  cpu,
+  io_wait,
+  slice_id,
+  slice_name,
+  slice_depth
+  FROM _span_thread_state_slice_sp WHERE dur > 0 ORDER BY ts;
+
+CREATE PERFETTO TABLE _critical_path_thread_state_slice_raw
+AS
+SELECT
+  cr.id AS cr_id,
+  th.id AS th_id
+FROM __intrinsic_ii_with_interval_tree('_critical_path_all', 'c0') cr
+JOIN __intrinsic_ii_with_interval_tree('_span_thread_state_slice', 'c0') th
+  USING (c0)
+WHERE cr.ts < th.ts_end AND cr.ts_end > th.ts;
+
+CREATE PERFETTO TABLE _critical_path_thread_state_slice
+AS
+SELECT
+  max(cr.ts, th.ts) as ts,
+  min(cr.ts + cr.dur, th.ts + th.dur) - max(cr.ts, th.ts) as dur,
+  cr.utid,
+  thread_state_id,
   state,
   function,
   cpu,
@@ -87,13 +166,12 @@
   slice_id,
   slice_name,
   slice_depth,
-  critical_path_id,
-  critical_path_blocked_dur,
-  critical_path_blocked_state,
-  critical_path_blocked_function,
-  critical_path_utid
-FROM span_starts
-WHERE MIN(span_end_ts, thread_state_end_ts) - ts > 0;
+  root_utid
+FROM _critical_path_thread_state_slice_raw raw
+JOIN _critical_path_all cr
+  ON cr.id = raw.cr_id
+JOIN _span_thread_state_slice th
+  ON th.id = raw.th_id;
 
 -- Flattened slices span joined with their thread_states. This contains the 'self' information
 -- without 'critical_path' (blocking) information.
@@ -108,7 +186,7 @@
     slice_id AS self_slice_id,
     ts,
     dur,
-    utid AS critical_path_utid,
+    utid AS root_utid,
     state AS self_state,
     blocked_function AS self_function,
     cpu AS self_cpu,
@@ -123,8 +201,8 @@
 CREATE VIRTUAL TABLE _self_and_critical_path_sp
 USING
   SPAN_JOIN(
-    _self_view PARTITIONED critical_path_utid,
-    _critical_path_thread_state_slice PARTITIONED critical_path_utid);
+    _self_view PARTITIONED root_utid,
+    _critical_path_thread_state_slice PARTITIONED root_utid);
 
 -- Returns a view of |_self_and_critical_path_sp| unpivoted over the following columns:
 -- self thread_state.
@@ -138,7 +216,7 @@
 -- critical_path slice_stack (enabled with |enable_critical_path_slice|).
 -- running cpu (if one exists).
 -- A 'stack' is the group of resulting unpivoted rows sharing the same timestamp.
-CREATE PERFETTO FUNCTION _critical_path_stack(critical_path_utid INT, ts LONG, dur LONG, enable_process_name INT, enable_thread_name INT, enable_self_slice INT, enable_critical_path_slice INT)
+CREATE PERFETTO FUNCTION _critical_path_stack(root_utid INT, ts LONG, dur LONG, enable_process_name INT, enable_thread_name INT, enable_self_slice INT, enable_critical_path_slice INT)
 RETURNS
   TABLE(
     id INT,
@@ -148,8 +226,8 @@
     stack_depth INT,
     name STRING,
     table_name STRING,
-    critical_path_utid INT) AS
-  -- Spans filtered to the query time window and critical_path_utid.
+    root_utid INT) AS
+  -- Spans filtered to the query time window and root_utid.
   -- This is a preliminary step that gets the start and end ts of all the rows
   -- so that we can chop the ends of each interval correctly if it overlaps with the query time interval.
   WITH relevant_spans_starts AS (
@@ -172,9 +250,9 @@
       utid,
       MAX(ts, $ts) AS ts,
       MIN(ts + dur, $ts + $dur) AS end_ts,
-      critical_path_utid
+      root_utid
     FROM _self_and_critical_path_sp
-    WHERE dur > 0 AND critical_path_utid = $critical_path_utid
+    WHERE dur > 0 AND root_utid = $root_utid
   ),
   -- This is the final step that gets the |dur| of each span from the start and
   -- and end ts of the previous step.
@@ -201,7 +279,7 @@
       utid,
       ts,
       end_ts - ts AS dur,
-      critical_path_utid,
+      root_utid,
       utid
     FROM relevant_spans_starts
     WHERE dur > 0
@@ -213,11 +291,11 @@
       self_thread_state_id AS id,
       ts,
       dur,
-      critical_path_utid AS utid,
+      root_utid AS utid,
       0 AS stack_depth,
       'thread_state: ' || self_state AS name,
       'thread_state' AS table_name,
-      critical_path_utid
+      root_utid
     FROM relevant_spans
     UNION ALL
     -- Builds the self kernel blocked_function
@@ -225,11 +303,11 @@
       self_thread_state_id AS id,
       ts,
       dur,
-      critical_path_utid AS utid,
+      root_utid AS utid,
       1 AS stack_depth,
       IIF(self_state GLOB 'R*', NULL, 'kernel function: ' || self_function) AS name,
       'thread_state' AS table_name,
-      critical_path_utid
+      root_utid
     FROM relevant_spans
     UNION ALL
     -- Builds the self kernel io_wait
@@ -237,11 +315,11 @@
       self_thread_state_id AS id,
       ts,
       dur,
-      critical_path_utid AS utid,
+      root_utid AS utid,
       2 AS stack_depth,
       IIF(self_state GLOB 'R*', NULL, 'io_wait: ' || self_io_wait) AS name,
       'thread_state' AS table_name,
-      critical_path_utid
+      root_utid
     FROM relevant_spans
     UNION ALL
     -- Builds the self process_name
@@ -253,10 +331,10 @@
       3 AS stack_depth,
       IIF($enable_process_name, 'process_name: ' || process.name, NULL) AS name,
       'thread_state' AS table_name,
-      critical_path_utid
+      root_utid
     FROM relevant_spans
     LEFT JOIN thread
-      ON thread.utid = critical_path_utid
+      ON thread.utid = root_utid
     LEFT JOIN process
       USING (upid)
     -- Builds the self thread_name
@@ -269,10 +347,10 @@
       4 AS stack_depth,
       IIF($enable_thread_name, 'thread_name: ' || thread.name, NULL) AS name,
       'thread_state' AS table_name,
-      critical_path_utid
+      root_utid
     FROM relevant_spans
     LEFT JOIN thread
-      ON thread.utid = critical_path_utid
+      ON thread.utid = root_utid
     JOIN process
       USING (upid)
     UNION ALL
@@ -281,11 +359,11 @@
       anc.id,
       slice.ts,
       slice.dur,
-      critical_path_utid AS utid,
+      root_utid AS utid,
       anc.depth + 5 AS stack_depth,
       IIF($enable_self_slice, anc.name, NULL) AS name,
       'slice' AS table_name,
-      critical_path_utid
+      root_utid
     FROM relevant_spans slice
     JOIN ancestor_slice(self_slice_id) anc WHERE anc.dur != -1
     UNION ALL
@@ -294,11 +372,11 @@
       self_slice_id AS id,
       ts,
       dur,
-      critical_path_utid AS utid,
+      root_utid AS utid,
       self_slice_depth + 5 AS stack_depth,
       IIF($enable_self_slice, self_slice_name, NULL) AS name,
       'slice' AS table_name,
-      critical_path_utid
+      root_utid
     FROM relevant_spans slice
     -- Ordering by stack depth is important to ensure the items can
     -- be renedered in the UI as a debug track in the order in which
@@ -310,9 +388,9 @@
   -- each self slice stack has variable depth and the depth in each stack
   -- most be contiguous in order to efficiently generate a pprof in the future.
   critical_path_start_depth AS MATERIALIZED (
-    SELECT critical_path_utid, ts, MAX(stack_depth) + 1 AS start_depth
+    SELECT root_utid, ts, MAX(stack_depth) + 1 AS start_depth
     FROM self_stack
-    GROUP BY critical_path_utid, ts
+    GROUP BY root_utid, ts
   ),
   critical_path_span AS MATERIALIZED (
     SELECT
@@ -325,15 +403,15 @@
       slice_depth,
       spans.ts,
       spans.dur,
-      spans.critical_path_utid,
+      spans.root_utid,
       utid,
       start_depth
     FROM relevant_spans spans
     JOIN critical_path_start_depth
       ON
-        critical_path_start_depth.critical_path_utid = spans.critical_path_utid
+        critical_path_start_depth.root_utid = spans.root_utid
         AND critical_path_start_depth.ts = spans.ts
-    WHERE critical_path_start_depth.critical_path_utid = $critical_path_utid AND spans.critical_path_utid != spans.utid
+    WHERE critical_path_start_depth.root_utid = $root_utid AND spans.root_utid != spans.utid
   ),
   -- 2. Builds the 'critical_path' stack of items as an ordered UNION ALL
   critical_path_stack AS MATERIALIZED (
@@ -346,7 +424,7 @@
       start_depth AS stack_depth,
       'blocking thread_state: ' || state AS name,
       'thread_state' AS table_name,
-      critical_path_utid
+      root_utid
     FROM critical_path_span
     UNION ALL
     -- Builds the critical_path process_name
@@ -358,7 +436,7 @@
       start_depth + 1 AS stack_depth,
       'blocking process_name: ' || process.name,
       'thread_state' AS table_name,
-      critical_path_utid
+      root_utid
     FROM critical_path_span
     JOIN thread USING (utid)
     LEFT JOIN process USING (upid)
@@ -372,7 +450,7 @@
       start_depth + 2 AS stack_depth,
       'blocking thread_name: ' || thread.name,
       'thread_state' AS table_name,
-      critical_path_utid
+      root_utid
     FROM critical_path_span
     JOIN thread USING (utid)
     UNION ALL
@@ -385,7 +463,7 @@
       start_depth + 3 AS stack_depth,
       'blocking kernel_function: ' || function,
       'thread_state' AS table_name,
-      critical_path_utid
+      root_utid
     FROM critical_path_span
     JOIN thread USING (utid)
     UNION ALL
@@ -398,7 +476,7 @@
       start_depth + 4 AS stack_depth,
       'blocking io_wait: ' || io_wait,
       'thread_state' AS table_name,
-      critical_path_utid
+      root_utid
     FROM critical_path_span
     JOIN thread USING (utid)
     UNION ALL
@@ -411,7 +489,7 @@
       anc.depth + start_depth + 5 AS stack_depth,
       IIF($enable_critical_path_slice, anc.name, NULL) AS name,
       'slice' AS table_name,
-      critical_path_utid
+      root_utid
     FROM critical_path_span slice
     JOIN ancestor_slice(slice_id) anc WHERE anc.dur != -1
     UNION ALL
@@ -424,7 +502,7 @@
       slice_depth + start_depth + 5 AS stack_depth,
       IIF($enable_critical_path_slice, slice_name, NULL) AS name,
       'slice' AS table_name,
-      critical_path_utid
+      root_utid
     FROM critical_path_span slice
     -- Ordering is also important as in the 'self' step above.
     ORDER BY stack_depth
@@ -434,16 +512,16 @@
   -- the critical_path stack and self stack. The self stack depth is
   -- already computed and materialized in |critical_path_start_depth|.
   cpu_start_depth_raw AS (
-    SELECT critical_path_utid, ts, MAX(stack_depth) + 1 AS start_depth
+    SELECT root_utid, ts, MAX(stack_depth) + 1 AS start_depth
     FROM critical_path_stack
-    GROUP BY critical_path_utid, ts
+    GROUP BY root_utid, ts
     UNION ALL
     SELECT * FROM critical_path_start_depth
   ),
   cpu_start_depth AS (
-    SELECT critical_path_utid, ts, MAX(start_depth) AS start_depth
+    SELECT root_utid, ts, MAX(start_depth) AS start_depth
     FROM cpu_start_depth_raw
-    GROUP BY critical_path_utid, ts
+    GROUP BY root_utid, ts
   ),
   -- 3. Builds the 'CPU' stack for 'Running' states in either the self or critical path stack.
   cpu_stack AS (
@@ -455,13 +533,13 @@
       start_depth AS stack_depth,
       'cpu: ' || cpu AS name,
       'thread_state' AS table_name,
-      spans.critical_path_utid
+      spans.root_utid
     FROM relevant_spans spans
     JOIN cpu_start_depth
       ON
-        cpu_start_depth.critical_path_utid = spans.critical_path_utid
+        cpu_start_depth.root_utid = spans.root_utid
         AND cpu_start_depth.ts = spans.ts
-    WHERE cpu_start_depth.critical_path_utid = $critical_path_utid AND state = 'Running' OR self_state = 'Running'
+    WHERE cpu_start_depth.root_utid = $root_utid AND state = 'Running' OR self_state = 'Running'
   ),
   merged AS (
     SELECT * FROM self_stack
@@ -478,7 +556,7 @@
 -- critical_path thread_name, critical_path slice_stack, running_cpu.
 CREATE PERFETTO FUNCTION _thread_executing_span_critical_path_stack(
   -- Thread utid to filter critical paths to.
-  critical_path_utid INT,
+  root_utid INT,
   -- Timestamp of start of time range to filter critical paths to.
   ts LONG,
   -- Duration of time range to filter critical paths to.
@@ -500,25 +578,25 @@
     -- Table name of entity in the critical path (could be either slice or thread_state).
     table_name STRING,
     -- Utid of the thread the critical path was filtered to.
-    critical_path_utid INT
+    root_utid INT
 ) AS
-SELECT * FROM _critical_path_stack($critical_path_utid, $ts, $dur, 1, 1, 1, 1);
+SELECT * FROM _critical_path_stack($root_utid, $ts, $dur, 1, 1, 1, 1);
 
 -- Returns a pprof aggregation of the stacks in |_critical_path_stack|.
-CREATE PERFETTO FUNCTION _critical_path_graph(graph_title STRING, critical_path_utid INT, ts LONG, dur LONG, enable_process_name INT, enable_thread_name INT, enable_self_slice INT, enable_critical_path_slice INT)
+CREATE PERFETTO FUNCTION _critical_path_graph(graph_title STRING, root_utid INT, ts LONG, dur LONG, enable_process_name INT, enable_thread_name INT, enable_self_slice INT, enable_critical_path_slice INT)
 RETURNS TABLE(pprof BYTES)
 AS
 WITH
   stack AS MATERIALIZED (
     SELECT
       ts,
-      dur - IFNULL(LEAD(dur) OVER (PARTITION BY critical_path_utid, ts ORDER BY stack_depth), 0) AS dur,
+      dur - IFNULL(LEAD(dur) OVER (PARTITION BY root_utid, ts ORDER BY stack_depth), 0) AS dur,
       name,
       utid,
-      critical_path_utid,
+      root_utid,
       stack_depth
     FROM
-      _critical_path_stack($critical_path_utid, $ts, $dur, $enable_process_name, $enable_thread_name, $enable_self_slice, $enable_critical_path_slice)
+      _critical_path_stack($root_utid, $ts, $dur, $enable_process_name, $enable_thread_name, $enable_self_slice, $enable_critical_path_slice)
   ),
   graph AS (
     SELECT CAT_STACKS($graph_title) AS stack
@@ -531,7 +609,7 @@
       cr.utid,
       cr.stack_depth,
       CAT_STACKS(graph.stack, cr.name) AS stack,
-      cr.critical_path_utid
+      cr.root_utid
     FROM stack cr, graph
     WHERE stack_depth = 0
     UNION ALL
@@ -542,11 +620,11 @@
       child.utid,
       child.stack_depth,
       CAT_STACKS(stack, child.name) AS stack,
-      child.critical_path_utid
+      child.root_utid
     FROM stack child
     JOIN parent
       ON
-        parent.critical_path_utid = child.critical_path_utid
+        parent.root_utid = child.root_utid
         AND parent.ts = child.ts
         AND child.stack_depth = parent.stack_depth + 1
   ),
@@ -560,7 +638,7 @@
   -- Descriptive name for the graph.
   graph_title STRING,
   -- Thread utid to filter critical paths to.
-  critical_path_utid INT,
+  root_utid INT,
   -- Timestamp of start of time range to filter critical paths to.
   ts INT,
   -- Duration of time range to filter critical paths to.
@@ -570,4 +648,4 @@
   pprof BYTES
 )
 AS
-SELECT * FROM _critical_path_graph($graph_title, $critical_path_utid, $ts, $dur, 1, 1, 1, 1);
+SELECT * FROM _critical_path_graph($graph_title, $root_utid, $ts, $dur, 1, 1, 1, 1);
diff --git a/test/trace_processor/diff_tests/include_index.py b/test/trace_processor/diff_tests/include_index.py
index 534344f..b57a139 100644
--- a/test/trace_processor/diff_tests/include_index.py
+++ b/test/trace_processor/diff_tests/include_index.py
@@ -110,6 +110,7 @@
 from diff_tests.stdlib.counters.tests import StdlibCounterIntervals
 from diff_tests.stdlib.dynamic_tables.tests import DynamicTables
 from diff_tests.stdlib.export.tests import ExportTests
+from diff_tests.stdlib.graphs.critical_path_tests import CriticalPathTests
 from diff_tests.stdlib.graphs.dominator_tree_tests import DominatorTree
 from diff_tests.stdlib.graphs.partition_tests import GraphPartitionTests
 from diff_tests.stdlib.graphs.scan_tests import GraphScanTests
@@ -275,6 +276,7 @@
       *AndroidStdlib(index_path, 'stdlib/android', 'AndroidStdlib').fetch(),
       *LinuxCpu(index_path, 'stdlib/linux/cpu', 'LinuxCpu').fetch(),
       *DominatorTree(index_path, 'stdlib/graphs', 'DominatorTree').fetch(),
+      *CriticalPathTests(index_path, 'stdlib/graphs', 'CriticalPath').fetch(),
       *GraphScanTests(index_path, 'stdlib/graphs', 'GraphScan').fetch(),
       *ExportTests(index_path, 'stdlib/export', 'ExportTests').fetch(),
       *Frames(index_path, 'stdlib/android', 'Frames').fetch(),
diff --git a/test/trace_processor/diff_tests/stdlib/graphs/critical_path_tests.py b/test/trace_processor/diff_tests/stdlib/graphs/critical_path_tests.py
new file mode 100644
index 0000000..7b746aa
--- /dev/null
+++ b/test/trace_processor/diff_tests/stdlib/graphs/critical_path_tests.py
@@ -0,0 +1,104 @@
+#!/usr/bin/env python3
+# Copyright (C) 2024 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License a
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from python.generators.diff_tests.testing import DataPath
+from python.generators.diff_tests.testing import Csv
+from python.generators.diff_tests.testing import DiffTestBlueprint
+from python.generators.diff_tests.testing import TestSuite
+
+class CriticalPathTests(TestSuite):
+
+  def test_critical_path_empty(self):
+    return DiffTestBlueprint(
+        trace=DataPath('counters.json'),
+        query="""
+          INCLUDE PERFETTO MODULE graphs.critical_path;
+
+          WITH edge AS (
+            SELECT 0 as source_node_id, 0 AS dest_node_id
+            WHERE FALSE
+          ), root AS (
+            SELECT 0 as root_node_id, 0 AS capacity
+            WHERE FALSE
+          )
+          SELECT * FROM _critical_path!(
+            (SELECT *, source_node_id - dest_node_id AS edge_weight FROM edge),
+            root
+          );
+        """,
+        out=Csv("""
+        "root_id","parent_id","id"
+        """))
+
+  def test_critical_path(self):
+    return DiffTestBlueprint(
+        trace=DataPath('counters.json'),
+        query="""
+          INCLUDE PERFETTO MODULE graphs.critical_path;
+
+          WITH edge(source_node_id, dest_node_id) AS (
+            values(8, 7), (7, 6), (6, 5), (6, 4), (4, 1), (5, 3), (3, 0)
+          ), root(root_node_id, capacity) AS (
+            values(8, 6)
+          )
+          SELECT * FROM _critical_path!(
+            (SELECT *, source_node_id - dest_node_id AS edge_weight FROM edge),
+            root
+          );
+        """,
+        out=Csv("""
+        "root_id","parent_id","id"
+        8,"[NULL]",8
+        8,3,0
+        8,5,3
+        8,6,5
+        8,7,6
+        8,8,7
+        """))
+
+  def test_critical_path_intervals(self):
+    return DiffTestBlueprint(
+        trace=DataPath('counters.json'),
+        query="""
+          INCLUDE PERFETTO MODULE graphs.critical_path;
+
+          WITH edge(source_node_id, dest_node_id) AS (
+            values(8, 7), (7, 6), (6, 5), (6, 4), (4, 1), (5, 3), (3, 0)
+          ), root(root_node_id, capacity) AS (
+            values(8, 6)
+          ), interval(id, ts, dur, idle_dur) AS (
+            values(8, 8, 1, 6),
+                  (7, 7, 1, 1),
+                  (6, 6, 1, 1),
+                  (5, 5, 1, 1),
+                  (4, 4, 1, 1),
+                  (3, 3, 1, 1),
+                  (2, 2, 1, 1),
+                  (1, 1, 1, 1)
+          )
+          SELECT * FROM _critical_path_intervals!(
+            (SELECT *, source_node_id - dest_node_id AS edge_weight FROM edge),
+            root,
+            interval
+          );
+        """,
+        out=Csv("""
+        "root_id","id","ts","dur"
+        8,3,3,2
+        8,5,5,1
+        8,6,6,1
+        8,7,7,1
+        8,8,8,1
+        """))
diff --git a/test/trace_processor/diff_tests/tables/tests_sched.py b/test/trace_processor/diff_tests/tables/tests_sched.py
index 7944b2d..3c3d2dd 100644
--- a/test/trace_processor/diff_tests/tables/tests_sched.py
+++ b/test/trace_processor/diff_tests/tables/tests_sched.py
@@ -149,30 +149,30 @@
         SELECT
           waker_id,
           prev_id,
-          prev_end_ts,
+          ts - idle_dur AS idle_ts,
           id,
           ts,
-          end_ts,
-          is_kernel,
+          ts + dur AS next_idle_ts ,
+          is_idle_reason_self,
           utid,
-          state,
-          blocked_function
+          idle_state,
+          idle_reason
         FROM _wakeup_graph
         ORDER BY ts
         LIMIT 10
         """,
         out=Csv("""
-        "waker_id","prev_id","prev_end_ts","id","ts","end_ts","is_kernel","utid","state","blocked_function"
-        "[NULL]","[NULL]","[NULL]",5,1735489812571,1735489896509,0,304,"[NULL]","[NULL]"
+        "waker_id","prev_id","idle_ts","id","ts","next_idle_ts","is_idle_reason_self","utid","idle_state","idle_reason"
+        "[NULL]","[NULL]","[NULL]",5,1735489812571,1735489896509,1,304,"[NULL]","[NULL]"
+        "[NULL]","[NULL]","[NULL]",6,1735489833977,1735489886440,1,297,"[NULL]","[NULL]"
         6,"[NULL]","[NULL]",11,1735489876788,1735489953773,0,428,"[NULL]","[NULL]"
         5,"[NULL]","[NULL]",12,1735489879097,1735490217277,0,243,"[NULL]","[NULL]"
         11,"[NULL]","[NULL]",17,1735489933912,1735490587658,0,230,"[NULL]","[NULL]"
-        "[NULL]","[NULL]","[NULL]",20,1735489972385,1735489995809,0,298,"[NULL]","[NULL]"
-        "[NULL]",20,1735489995809,25,1735489999987,1735490055966,0,298,"S","[NULL]"
+        "[NULL]","[NULL]","[NULL]",20,1735489972385,1735489995809,1,298,"[NULL]","[NULL]"
+        "[NULL]",20,1735489995809,25,1735489999987,1735490055966,1,298,"S","[NULL]"
         25,"[NULL]","[NULL]",28,1735490039439,1735490610238,0,421,"[NULL]","[NULL]"
         25,"[NULL]","[NULL]",29,1735490042084,1735490068213,0,420,"[NULL]","[NULL]"
         25,"[NULL]","[NULL]",30,1735490045825,1735491418790,0,1,"[NULL]","[NULL]"
-        17,"[NULL]","[NULL]",41,1735490544063,1735490598211,0,427,"[NULL]","[NULL]"
         """))
 
   def test_thread_executing_span_graph_contains_forked_states(self):
@@ -185,7 +185,7 @@
           waker_id,
           prev_id
         FROM _wakeup_graph
-          WHERE ts = 1735842081507 AND end_ts = 1735842081507 + 293868
+          WHERE ts = 1735842081507 AND ts + dur = 1735842081507 + 293868
         """,
         out=Csv("""
         "id","waker_id","prev_id"
@@ -209,11 +209,11 @@
         trace=DataPath('sched_wakeup_trace.atr'),
         query="""
         INCLUDE PERFETTO MODULE sched.thread_executing_span;
-        SELECT ts,end_ts FROM _wakeup_graph
-          WHERE end_ts IS NULL OR ts IS NULL
+        SELECT ts,dur FROM _wakeup_graph
+          WHERE dur IS NULL OR ts IS NULL
         """,
         out=Csv("""
-        "ts","end_ts"
+        "ts","dur"
         """))
 
   def test_thread_executing_span_graph_accepts_null_irq_context(self):
@@ -225,50 +225,7 @@
         """,
         out=Csv("""
         "count"
-        17
-        """))
-
-  def test_thread_executing_span_flatten_critical_path_tasks(self):
-    return DiffTestBlueprint(
-        trace=DataPath('sched_switch_original.pb'),
-        query="""
-        INCLUDE PERFETTO MODULE sched.thread_executing_span;
-
-        CREATE PERFETTO TABLE graph AS
-        SELECT
-          id AS source_node_id,
-          COALESCE(waker_id, id) AS dest_node_id,
-          id - COALESCE(waker_id, id) AS edge_weight
-        FROM _wakeup_graph;
-
-        CREATE PERFETTO TABLE roots AS
-        SELECT
-          _wakeup_graph.id AS root_node_id,
-          _wakeup_graph.id - COALESCE(prev_id, _wakeup_graph.id) AS root_target_weight,
-          id,
-          ts,
-          end_ts,
-          utid
-        FROM _wakeup_graph LIMIT 10;
-
-        CREATE PERFETTO TABLE critical_path AS
-        SELECT * FROM graph_reachable_weight_bounded_dfs!(graph, roots, 1);
-
-        SELECT * FROM _flatten_critical_path_tasks!(critical_path);
-        """,
-        out=Csv("""
-        "ts","root_node_id","node_id","dur","node_utid","prev_end_ts"
-        807082868359903,29,29,"[NULL]",8,"[NULL]"
-        807082871734539,35,35,"[NULL]",9,"[NULL]"
-        807082871734539,38,35,45052,9,"[NULL]"
-        807082871779591,38,38,"[NULL]",5,807082871764903
-        807082878623081,45,45,"[NULL]",9,807082871805424
-        807082947156994,57,57,"[NULL]",9,807082878865945
-        807082947246838,62,62,"[NULL]",6,807082879179539
-        807082947261525,63,63,"[NULL]",12,"[NULL]"
-        807082947267463,64,64,"[NULL]",13,"[NULL]"
-        807082947278140,65,65,"[NULL]",14,"[NULL]"
-        807082947288765,66,66,"[NULL]",15,"[NULL]"
+        30
         """))
 
   def test_thread_executing_span_intervals_to_roots_edge_case(self):
@@ -278,21 +235,22 @@
         INCLUDE PERFETTO MODULE sched.thread_executing_span;
 
         SELECT * FROM
-        _intervals_to_roots!((SELECT 1477 AS utid, trace_start() AS ts, trace_end() - trace_start() AS dur))
+        _intervals_to_roots!((SELECT 1477 AS utid, trace_start() AS ts, trace_end() - trace_start() AS dur), _wakeup_graph)
+        ORDER BY root_node_id
         LIMIT 10;
         """,
         out=Csv("""
-        "id"
-        11889
-        11892
-        11893
-        11896
-        11897
-        11900
-        11911
-        11916
-        11917
-        11921
+        "root_node_id","capacity"
+        11889,0
+        11980,91
+        12057,77
+        12254,197
+        12521,267
+        12672,151
+        12796,124
+        12802,6
+        12827,25
+        12833,6
         """))
 
   def test_thread_executing_span_intervals_to_roots(self):
@@ -302,21 +260,15 @@
         INCLUDE PERFETTO MODULE sched.thread_executing_span;
 
         SELECT * FROM
-        _intervals_to_roots!((SELECT 1477 AS utid, 1737362149192 AS ts, CAST(2e7 AS INT) AS dur))
+        _intervals_to_roots!((SELECT 1477 AS utid, 1737362149192 AS ts, CAST(2e7 AS INT) AS dur), _wakeup_graph)
+        ORDER BY root_node_id
         LIMIT 10;
         """,
         out=Csv("""
-        "id"
-        11980
-        11983
-        11984
-        11989
-        11990
-        11991
-        11992
-        11993
-        12001
-        12006
+        "root_node_id","capacity"
+        11980,91
+        12057,77
+        12254,197
         """))
 
   def test_thread_executing_span_flatten_critical_paths(self):
@@ -338,20 +290,26 @@
           _wakeup_graph.id - COALESCE(prev_id, _wakeup_graph.id) AS root_target_weight,
           id,
           ts,
-          end_ts,
+          dur,
           utid
         FROM _wakeup_graph;
 
         CREATE PERFETTO TABLE critical_path AS
-        SELECT * FROM graph_reachable_weight_bounded_dfs!(graph, roots, 1);
+        SELECT root_node_id AS root_id, node_id AS id, root_node_id AS parent_id FROM graph_reachable_weight_bounded_dfs!(graph, roots, 1);
 
-        SELECT * FROM _flatten_critical_paths!(critical_path, _sleep);
+        SELECT * FROM _critical_path_to_intervals!(critical_path, _wakeup_graph);
         """,
         out=Csv("""
-        "ts","dur","utid","id","root_id","prev_end_ts","critical_path_utid","critical_path_id","critical_path_blocked_dur","critical_path_blocked_state","critical_path_blocked_function"
-        807082871764903,14688,9,35,38,"[NULL]",5,38,14688,"S","[NULL]"
-        807082947156994,351302,9,57,76,807082878865945,5,76,68858913,"S","[NULL]"
-        807083031589763,324114,21,127,130,"[NULL]",5,130,80026987,"S","[NULL]"
+        "ts","dur","id","root_id"
+        807082871764903,14688,35,38
+        807082871805424,6817657,38,45
+        807082947223556,23282,60,62
+        807082947156994,351302,57,76
+        807082947593348,4229115,76,96
+        807082959078401,95105,105,107
+        807082951886890,79702873,1,130
+        807083031589763,324114,127,130
+        807082947219546,85059279,1,135
         """))
 
   def test_thread_executing_span_critical_path(self):
@@ -360,47 +318,49 @@
         query="""
         INCLUDE PERFETTO MODULE sched.thread_executing_span;
 
-        CREATE PERFETTO TABLE graph AS
-        SELECT
-          id AS source_node_id,
-          COALESCE(waker_id, id) AS dest_node_id,
-          id - COALESCE(waker_id, id) AS edge_weight
-        FROM _wakeup_graph;
-
-        CREATE PERFETTO TABLE roots AS
-        SELECT
-          _wakeup_graph.id AS root_node_id,
-          _wakeup_graph.id - COALESCE(prev_id, _wakeup_graph.id) AS root_target_weight,
-          id,
-          ts,
-          end_ts,
-          utid
-        FROM _wakeup_graph;
-
-        SELECT * FROM _critical_path!(graph, roots, _sleep);
+        SELECT * FROM _critical_path_intervals!(_wakeup_kernel_edges, (SELECT id AS root_node_id, id - COALESCE(prev_id, id)  AS capacity FROM _wakeup_graph), _wakeup_graph) ORDER BY root_id;
         """,
         out=Csv("""
-        "ts","dur","root_id","id","utid","critical_path_utid","critical_path_id","critical_path_blocked_dur","critical_path_blocked_state","critical_path_blocked_function"
-        807082868359903,81302,29,29,8,8,"[NULL]","[NULL]","[NULL]","[NULL]"
-        807082871734539,70885,35,35,9,9,"[NULL]","[NULL]","[NULL]","[NULL]"
-        807082871764903,14688,38,35,9,5,38,14688,"S","[NULL]"
-        807082871779591,55729,38,38,5,5,"[NULL]","[NULL]","[NULL]","[NULL]"
-        807082878623081,242864,45,45,9,9,"[NULL]","[NULL]","[NULL]","[NULL]"
-        807082947156994,436354,57,57,9,9,"[NULL]","[NULL]","[NULL]","[NULL]"
-        807082947246838,1038854,62,62,6,6,"[NULL]","[NULL]","[NULL]","[NULL]"
-        807082947261525,293594,63,63,12,12,"[NULL]","[NULL]","[NULL]","[NULL]"
-        807082947267463,228958,64,64,13,13,"[NULL]","[NULL]","[NULL]","[NULL]"
-        807082947278140,54114,65,65,14,14,"[NULL]","[NULL]","[NULL]","[NULL]"
-        807082947288765,338802,66,66,15,15,"[NULL]","[NULL]","[NULL]","[NULL]"
-        807082947294182,296875,67,67,16,16,"[NULL]","[NULL]","[NULL]","[NULL]"
-        807082947156994,351302,76,57,9,5,76,68858913,"S","[NULL]"
-        807082947508296,122083,76,76,5,5,"[NULL]","[NULL]","[NULL]","[NULL]"
-        807082951822463,104427,96,96,9,9,"[NULL]","[NULL]","[NULL]","[NULL]"
-        807082959173506,215104,107,107,6,6,"[NULL]","[NULL]","[NULL]","[NULL]"
-        807083031589763,436198,127,127,21,21,"[NULL]","[NULL]","[NULL]","[NULL]"
-        807083031589763,324114,130,127,21,5,130,80026987,"S","[NULL]"
-        807083031913877,166302,130,130,5,5,"[NULL]","[NULL]","[NULL]","[NULL]"
-        807083032278825,208490,135,135,2,2,"[NULL]","[NULL]","[NULL]","[NULL]"
+        "root_id","id","ts","dur"
+        1,1,807082862885423,169601892
+        2,2,807082862913183,280521
+        13,13,807082864992767,6772136
+        14,14,807082865019382,14160157
+        17,17,807082865084902,272865
+        29,29,807082868359903,81302
+        35,35,807082871734539,70885
+        38,35,807082871764903,14688
+        38,38,807082871779591,6869792
+        45,38,807082871805424,6817657
+        45,45,807082878623081,242864
+        55,55,807082946856213,609219
+        57,57,807082947156994,436354
+        60,60,807082947223556,83577300
+        62,60,807082947223556,23282
+        62,62,807082947246838,2000260
+        63,63,807082947261525,293594
+        64,64,807082947267463,228958
+        65,65,807082947278140,54114
+        66,66,807082947288765,338802
+        67,67,807082947294182,296875
+        76,57,807082947156994,351302
+        76,76,807082947508296,4378594
+        93,93,807082951711161,2494011
+        96,76,807082947593348,4229115
+        96,96,807082951822463,104427
+        105,105,807082959078401,184115
+        107,105,807082959078401,95105
+        107,107,807082959173506,73362507
+        111,111,807082962662412,149011
+        114,114,807082967942309,334114
+        127,127,807083031589763,436198
+        130,1,807082951886890,79702873
+        130,127,807083031589763,324114
+        130,130,807083031913877,166302
+        135,1,807082947219546,85059279
+        135,135,807083032278825,208490
+        139,139,807083032634138,340625
+        142,142,807083032991378,89218
         """))
 
   def test_thread_executing_span_critical_path_by_roots(self):
@@ -409,20 +369,19 @@
         query="""
         INCLUDE PERFETTO MODULE sched.thread_executing_span;
 
-        SELECT * FROM _critical_path_by_roots!(_intervals_to_roots!((SELECT 6 AS utid, trace_start() AS ts, trace_end() - trace_start() AS dur)));
+        SELECT * FROM _critical_path_by_roots!(_intervals_to_roots!((SELECT 6 AS utid, trace_start() AS ts, trace_end() - trace_start() AS dur), _wakeup_graph), _wakeup_graph);
         """,
         out=Csv("""
-        "id","ts","dur","utid","critical_path_id","critical_path_blocked_dur","critical_path_blocked_state","critical_path_blocked_function","critical_path_utid"
-        62,807082947246838,1038854,6,"[NULL]","[NULL]","[NULL]","[NULL]",6
-        63,807082947261525,293594,12,"[NULL]","[NULL]","[NULL]","[NULL]",12
-        64,807082947267463,228958,13,"[NULL]","[NULL]","[NULL]","[NULL]",13
-        65,807082947278140,54114,14,"[NULL]","[NULL]","[NULL]","[NULL]",14
-        66,807082947288765,338802,15,"[NULL]","[NULL]","[NULL]","[NULL]",15
-        67,807082947294182,296875,16,"[NULL]","[NULL]","[NULL]","[NULL]",16
-        57,807082947156994,351302,9,76,68858913,"S","[NULL]",5
-        76,807082947508296,122083,5,"[NULL]","[NULL]","[NULL]","[NULL]",5
-        96,807082951822463,104427,9,"[NULL]","[NULL]","[NULL]","[NULL]",9
-        107,807082959173506,215104,6,"[NULL]","[NULL]","[NULL]","[NULL]",6
+        "root_id","id","ts","dur"
+        14,14,807082865019382,14160157
+        62,60,807082947223556,23282
+        62,62,807082947246838,2000260
+        107,105,807082959078401,95105
+        107,139,807082959173506,73362507
+        139,139,807083032536013,98125
+        139,142,807083032634138,340625
+        142,142,807083032974763,16615
+        142,142,807083032991378,89218
         """))
 
   def test_thread_executing_span_critical_path_by_intervals(self):
@@ -431,12 +390,19 @@
         query="""
         INCLUDE PERFETTO MODULE sched.thread_executing_span;
 
-        SELECT * FROM _critical_path_by_intervals!((SELECT 6 AS utid, trace_start() AS ts, trace_end() - trace_start() AS dur));
+        SELECT * FROM _critical_path_by_intervals!((SELECT 6 AS utid, trace_start() AS ts, trace_end() - trace_start() AS dur), _wakeup_graph);
         """,
         out=Csv("""
-        "id","ts","dur","utid","critical_path_id","critical_path_blocked_dur","critical_path_blocked_state","critical_path_blocked_function","critical_path_utid"
-        62,807082947246838,1038854,6,"[NULL]","[NULL]","[NULL]","[NULL]",6
-        107,807082959173506,215104,6,"[NULL]","[NULL]","[NULL]","[NULL]",6
+        "root_utid","root_id","id","ts","dur","utid"
+        6,14,14,807082865019382,14160157,6
+        6,62,60,807082947223556,23282,11
+        6,62,62,807082947246838,2000260,6
+        6,107,105,807082959078401,95105,18
+        6,107,139,807082959173506,73362507,6
+        6,139,139,807083032536013,98125,6
+        6,139,142,807083032634138,340625,6
+        6,142,142,807083032974763,16615,6
+        6,142,142,807083032991378,89218,6
         """))
 
   def test_thread_executing_span_critical_path_utid(self):
@@ -445,31 +411,28 @@
         query="""
         INCLUDE PERFETTO MODULE sched.thread_executing_span;
         SELECT
+          root_id,
+          root_utid,
           id,
           ts,
           dur,
-          utid,
-          critical_path_id,
-          critical_path_blocked_dur,
-          critical_path_blocked_state,
-          critical_path_blocked_function,
-          critical_path_utid
+          utid
         FROM _thread_executing_span_critical_path((select utid from thread where tid = 3487), start_ts, end_ts), trace_bounds
         ORDER BY ts
         LIMIT 10
         """,
         out=Csv("""
-        "id","ts","dur","utid","critical_path_id","critical_path_blocked_dur","critical_path_blocked_state","critical_path_blocked_function","critical_path_utid"
-        11889,1737349401439,7705561,1477,"[NULL]","[NULL]","[NULL]","[NULL]",1477
-        11952,1737357107000,547583,1480,11980,547583,"S","[NULL]",1477
-        11980,1737357654583,8430762,1477,"[NULL]","[NULL]","[NULL]","[NULL]",1477
-        12052,1737366085345,50400,91,12057,50400,"S","[NULL]",1477
-        12057,1737366135745,6635927,1477,"[NULL]","[NULL]","[NULL]","[NULL]",1477
-        12081,1737372771672,12798314,1488,12254,12798314,"S","[NULL]",1477
-        12254,1737385569986,21830622,1477,"[NULL]","[NULL]","[NULL]","[NULL]",1477
-        12517,1737407400608,241267,91,12521,241267,"S","[NULL]",1477
-        12521,1737407641875,1830015,1477,"[NULL]","[NULL]","[NULL]","[NULL]",1477
-        12669,1737409471890,68590,91,12672,68590,"S","[NULL]",1477
+        "root_id","root_utid","id","ts","dur","utid"
+        11889,1477,11889,1737349401439,7705561,1477
+        11980,1477,11952,1737357107000,547583,1480
+        11980,1477,11980,1737357654583,8430762,1477
+        12057,1477,12052,1737366085345,50400,91
+        12057,1477,12057,1737366135745,6635927,1477
+        12254,1477,12251,1737372771672,12594070,1488
+        12254,1477,12251,1737385365742,204244,1488
+        12254,1477,12254,1737385569986,21830622,1477
+        12521,1477,12517,1737407400608,241267,91
+        12521,1477,12521,1737407641875,1830015,1477
         """))
 
   def test_thread_executing_span_critical_path_stack(self):
@@ -485,13 +448,13 @@
           stack_depth,
           name,
           table_name,
-          critical_path_utid
+          root_utid
         FROM _thread_executing_span_critical_path_stack((select utid from thread where tid = 3487), start_ts, end_ts), trace_bounds
         WHERE ts = 1737500355691
         ORDER BY utid, id
         """,
         out=Csv("""
-        "id","ts","dur","utid","stack_depth","name","table_name","critical_path_utid"
+        "id","ts","dur","utid","stack_depth","name","table_name","root_utid"
         4271,1737500355691,1456753,1477,5,"bindApplication","slice",1477
         13120,1737500355691,1456753,1477,0,"thread_state: S","thread_state",1477
         13120,1737500355691,1456753,1477,1,"[NULL]","thread_state",1477
@@ -518,164 +481,164 @@
             message_type="perfetto.third_party.perftools.profiles.Profile",
             post_processing=PrintProfileProto,
             contents="""
-        Sample:
-        Values: 0
-        Stack:
-        bindApplication (0x0)
-        thread_name: rs.media.module (0x0)
-        process_name: com.android.providers.media.module (0x0)
-        thread_state: R (0x0)
-        critical path (0x0)
+            Sample:
+            Values: 0
+            Stack:
+            bindApplication (0x0)
+            thread_name: rs.media.module (0x0)
+            process_name: com.android.providers.media.module (0x0)
+            thread_state: R (0x0)
+            critical path (0x0)
 
-        Sample:
-        Values: 0
-        Stack:
-        bindApplication (0x0)
-        thread_name: rs.media.module (0x0)
-        process_name: com.android.providers.media.module (0x0)
-        thread_state: S (0x0)
-        critical path (0x0)
+            Sample:
+            Values: 0
+            Stack:
+            bindApplication (0x0)
+            thread_name: rs.media.module (0x0)
+            process_name: com.android.providers.media.module (0x0)
+            thread_state: S (0x0)
+            critical path (0x0)
 
-        Sample:
-        Values: 0
-        Stack:
-        binder reply (0x0)
-        blocking thread_name: binder:553_3 (0x0)
-        blocking process_name: /system/bin/mediaserver (0x0)
-        blocking thread_state: Running (0x0)
-        binder transaction (0x0)
-        bindApplication (0x0)
-        thread_name: rs.media.module (0x0)
-        process_name: com.android.providers.media.module (0x0)
-        thread_state: S (0x0)
-        critical path (0x0)
+            Sample:
+            Values: 0
+            Stack:
+            binder reply (0x0)
+            blocking thread_name: binder:553_3 (0x0)
+            blocking process_name: /system/bin/mediaserver (0x0)
+            blocking thread_state: Running (0x0)
+            binder transaction (0x0)
+            bindApplication (0x0)
+            thread_name: rs.media.module (0x0)
+            process_name: com.android.providers.media.module (0x0)
+            thread_state: S (0x0)
+            critical path (0x0)
 
-        Sample:
-        Values: 0
-        Stack:
-        binder transaction (0x0)
-        bindApplication (0x0)
-        thread_name: rs.media.module (0x0)
-        process_name: com.android.providers.media.module (0x0)
-        thread_state: S (0x0)
-        critical path (0x0)
+            Sample:
+            Values: 0
+            Stack:
+            binder transaction (0x0)
+            bindApplication (0x0)
+            thread_name: rs.media.module (0x0)
+            process_name: com.android.providers.media.module (0x0)
+            thread_state: S (0x0)
+            critical path (0x0)
 
-        Sample:
-        Values: 0
-        Stack:
-        blocking process_name: /system/bin/mediaserver (0x0)
-        blocking thread_state: Running (0x0)
-        binder transaction (0x0)
-        bindApplication (0x0)
-        thread_name: rs.media.module (0x0)
-        process_name: com.android.providers.media.module (0x0)
-        thread_state: S (0x0)
-        critical path (0x0)
+            Sample:
+            Values: 0
+            Stack:
+            blocking process_name: /system/bin/mediaserver (0x0)
+            blocking thread_state: Running (0x0)
+            binder transaction (0x0)
+            bindApplication (0x0)
+            thread_name: rs.media.module (0x0)
+            process_name: com.android.providers.media.module (0x0)
+            thread_state: S (0x0)
+            critical path (0x0)
 
-        Sample:
-        Values: 0
-        Stack:
-        blocking thread_name: binder:553_3 (0x0)
-        blocking process_name: /system/bin/mediaserver (0x0)
-        blocking thread_state: Running (0x0)
-        binder transaction (0x0)
-        bindApplication (0x0)
-        thread_name: rs.media.module (0x0)
-        process_name: com.android.providers.media.module (0x0)
-        thread_state: S (0x0)
-        critical path (0x0)
+            Sample:
+            Values: 0
+            Stack:
+            blocking thread_name: binder:553_3 (0x0)
+            blocking process_name: /system/bin/mediaserver (0x0)
+            blocking thread_state: Running (0x0)
+            binder transaction (0x0)
+            bindApplication (0x0)
+            thread_name: rs.media.module (0x0)
+            process_name: com.android.providers.media.module (0x0)
+            thread_state: S (0x0)
+            critical path (0x0)
 
-        Sample:
-        Values: 0
-        Stack:
-        blocking thread_state: Running (0x0)
-        binder transaction (0x0)
-        bindApplication (0x0)
-        thread_name: rs.media.module (0x0)
-        process_name: com.android.providers.media.module (0x0)
-        thread_state: S (0x0)
-        critical path (0x0)
+            Sample:
+            Values: 0
+            Stack:
+            blocking thread_state: Running (0x0)
+            binder transaction (0x0)
+            bindApplication (0x0)
+            thread_name: rs.media.module (0x0)
+            process_name: com.android.providers.media.module (0x0)
+            thread_state: S (0x0)
+            critical path (0x0)
 
-        Sample:
-        Values: 0
-        Stack:
-        process_name: com.android.providers.media.module (0x0)
-        thread_state: R (0x0)
-        critical path (0x0)
+            Sample:
+            Values: 0
+            Stack:
+            process_name: com.android.providers.media.module (0x0)
+            thread_state: R (0x0)
+            critical path (0x0)
 
-        Sample:
-        Values: 0
-        Stack:
-        process_name: com.android.providers.media.module (0x0)
-        thread_state: S (0x0)
-        critical path (0x0)
+            Sample:
+            Values: 0
+            Stack:
+            process_name: com.android.providers.media.module (0x0)
+            thread_state: S (0x0)
+            critical path (0x0)
 
-        Sample:
-        Values: 0
-        Stack:
-        thread_name: rs.media.module (0x0)
-        process_name: com.android.providers.media.module (0x0)
-        thread_state: R (0x0)
-        critical path (0x0)
+            Sample:
+            Values: 0
+            Stack:
+            thread_name: rs.media.module (0x0)
+            process_name: com.android.providers.media.module (0x0)
+            thread_state: R (0x0)
+            critical path (0x0)
 
-        Sample:
-        Values: 0
-        Stack:
-        thread_name: rs.media.module (0x0)
-        process_name: com.android.providers.media.module (0x0)
-        thread_state: S (0x0)
-        critical path (0x0)
+            Sample:
+            Values: 0
+            Stack:
+            thread_name: rs.media.module (0x0)
+            process_name: com.android.providers.media.module (0x0)
+            thread_state: S (0x0)
+            critical path (0x0)
 
-        Sample:
-        Values: 0
-        Stack:
-        thread_state: R (0x0)
-        critical path (0x0)
+            Sample:
+            Values: 0
+            Stack:
+            thread_state: R (0x0)
+            critical path (0x0)
 
-        Sample:
-        Values: 0
-        Stack:
-        thread_state: S (0x0)
-        critical path (0x0)
+            Sample:
+            Values: 0
+            Stack:
+            thread_state: S (0x0)
+            critical path (0x0)
 
-        Sample:
-        Values: 1101
-        Stack:
-        binder transaction (0x0)
-        bindApplication (0x0)
-        thread_name: rs.media.module (0x0)
-        process_name: com.android.providers.media.module (0x0)
-        thread_state: R (0x0)
-        critical path (0x0)
+            Sample:
+            Values: 1101
+            Stack:
+            binder transaction (0x0)
+            bindApplication (0x0)
+            thread_name: rs.media.module (0x0)
+            process_name: com.android.providers.media.module (0x0)
+            thread_state: R (0x0)
+            critical path (0x0)
 
-        Sample:
-        Values: 13010
-        Stack:
-        cpu: 0 (0x0)
-        binder reply (0x0)
-        blocking thread_name: binder:553_3 (0x0)
-        blocking process_name: /system/bin/mediaserver (0x0)
-        blocking thread_state: Running (0x0)
-        binder transaction (0x0)
-        bindApplication (0x0)
-        thread_name: rs.media.module (0x0)
-        process_name: com.android.providers.media.module (0x0)
-        thread_state: S (0x0)
-        critical path (0x0)
+            Sample:
+            Values: 13010
+            Stack:
+            cpu: 0 (0x0)
+            binder reply (0x0)
+            blocking thread_name: binder:553_3 (0x0)
+            blocking process_name: /system/bin/mediaserver (0x0)
+            blocking thread_state: Running (0x0)
+            binder transaction (0x0)
+            bindApplication (0x0)
+            thread_name: rs.media.module (0x0)
+            process_name: com.android.providers.media.module (0x0)
+            thread_state: S (0x0)
+            critical path (0x0)
 
-        Sample:
-        Values: 1889
-        Stack:
-        cpu: 0 (0x0)
-        blocking thread_name: binder:553_3 (0x0)
-        blocking process_name: /system/bin/mediaserver (0x0)
-        blocking thread_state: Running (0x0)
-        binder transaction (0x0)
-        bindApplication (0x0)
-        thread_name: rs.media.module (0x0)
-        process_name: com.android.providers.media.module (0x0)
-        thread_state: S (0x0)
-        critical path (0x0)
+            Sample:
+            Values: 1889
+            Stack:
+            cpu: 0 (0x0)
+            blocking thread_name: binder:553_3 (0x0)
+            blocking process_name: /system/bin/mediaserver (0x0)
+            blocking thread_state: Running (0x0)
+            binder transaction (0x0)
+            bindApplication (0x0)
+            thread_name: rs.media.module (0x0)
+            process_name: com.android.providers.media.module (0x0)
+            thread_state: S (0x0)
+            critical path (0x0)
         """))
 
   # Test machine_id ID of the sched table.
diff --git a/ui/src/assets/widgets/flamegraph.scss b/ui/src/assets/widgets/flamegraph.scss
index 825060b..4071cf9 100644
--- a/ui/src/assets/widgets/flamegraph.scss
+++ b/ui/src/assets/widgets/flamegraph.scss
@@ -13,8 +13,8 @@
 // limitations under the License.
 
 .pf-flamegraph {
-  overflow: auto;
   height: 100%;
+  overflow-y: hidden;
 
   .loading-container {
     font-size: larger;
@@ -39,6 +39,22 @@
       }
     }
   }
+
+  .popup-anchor {
+    width: 0px;
+    height: 0px;
+    position: absolute;
+  }
+
+  canvas {
+    user-select: none;
+  }
+
+  .canvas-container {
+    height: 100%;
+    position: relative;
+    overflow-y: auto;
+  }
 }
 
 .pf-flamegraph-filter-bar-popup-content {
@@ -46,3 +62,32 @@
   width: max-content;
   font-family: "Roboto Condensed", sans-serif;
 }
+
+.pf-flamegraph-tooltip-popup {
+  width: max-content;
+  font-family: "Roboto Condensed", sans-serif;
+  font-size: 15px;
+  display: flex;
+  flex-direction: column;
+  padding: 4px;
+
+  .tooltip-text-line {
+    display: flex;
+    cursor: text;
+    padding-top: 4px;
+    gap: 4px;
+  }
+
+  .tooltip-text {
+    cursor: text;
+  }
+
+  .tooltip-bold-text {
+    font-weight: 600;
+    cursor: text;
+  }
+
+  .pf-button-bar {
+    padding-top: 16px;
+  }
+}
diff --git a/ui/src/common/recordingV2/recording_interfaces_v2.ts b/ui/src/common/recordingV2/recording_interfaces_v2.ts
index 805def8..954a145 100644
--- a/ui/src/common/recordingV2/recording_interfaces_v2.ts
+++ b/ui/src/common/recordingV2/recording_interfaces_v2.ts
@@ -75,11 +75,11 @@
 }
 
 export interface ChromeTargetInfo extends TargetInfoBase {
-  targetType: 'CHROME' | 'CHROME_OS';
+  targetType: 'CHROME' | 'CHROME_OS' | 'WINDOWS';
 }
 
 export interface HostOsTargetInfo extends TargetInfoBase {
-  targetType: 'LINUX' | 'MACOS' | 'WINDOWS';
+  targetType: 'LINUX' | 'MACOS';
 }
 
 // Holds information about a target. It's used by the UI and the logic which
diff --git a/ui/src/common/recordingV2/target_factories/chrome_target_factory.ts b/ui/src/common/recordingV2/target_factories/chrome_target_factory.ts
index f68d2f4..68630ee 100644
--- a/ui/src/common/recordingV2/target_factories/chrome_target_factory.ts
+++ b/ui/src/common/recordingV2/target_factories/chrome_target_factory.ts
@@ -22,6 +22,7 @@
   EXTENSION_ID,
   EXTENSION_NOT_INSTALLED,
   isCrOS,
+  isWindows,
 } from '../recording_utils';
 import {targetFactoryRegistry} from '../target_factory_registry';
 import {ChromeTarget} from '../targets/chrome_target';
@@ -55,6 +56,11 @@
     if (isCrOS(navigator.userAgent)) {
       this.targets.push(new ChromeTarget('ChromeOS', 'CHROME_OS'));
     }
+    // Pass through the chrome target since it launches ETW on windows through
+    // same path as when we start chrome tracing.
+    if (isWindows(navigator.userAgent)) {
+      this.targets.push(new ChromeTarget('Windows Desktop', 'WINDOWS'));
+    }
   }
 
   connectNewTarget(): Promise<RecordingTargetV2> {
diff --git a/ui/src/common/recordingV2/target_factories/host_os_target_factory.ts b/ui/src/common/recordingV2/target_factories/host_os_target_factory.ts
index 24eac91..09e73e7 100644
--- a/ui/src/common/recordingV2/target_factories/host_os_target_factory.ts
+++ b/ui/src/common/recordingV2/target_factories/host_os_target_factory.ts
@@ -18,7 +18,7 @@
   RecordingTargetV2,
   TargetFactory,
 } from '../recording_interfaces_v2';
-import {isLinux, isMacOs, isWindows} from '../recording_utils';
+import {isLinux, isMacOs} from '../recording_utils';
 import {targetFactoryRegistry} from '../target_factory_registry';
 import {HostOsTarget} from '../targets/host_os_target';
 
@@ -80,10 +80,6 @@
 }
 
 // We instantiate the host target factory only on Mac, Linux, and Windows.
-if (
-  isMacOs(navigator.userAgent) ||
-  isLinux(navigator.userAgent) ||
-  isWindows(navigator.userAgent)
-) {
+if (isMacOs(navigator.userAgent) || isLinux(navigator.userAgent)) {
   targetFactoryRegistry.register(new HostOsTargetFactory());
 }
diff --git a/ui/src/common/recordingV2/targets/chrome_target.ts b/ui/src/common/recordingV2/targets/chrome_target.ts
index 1072a08..9baaf96 100644
--- a/ui/src/common/recordingV2/targets/chrome_target.ts
+++ b/ui/src/common/recordingV2/targets/chrome_target.ts
@@ -27,7 +27,7 @@
 
   constructor(
     private name: string,
-    private targetType: 'CHROME' | 'CHROME_OS',
+    private targetType: 'CHROME' | 'CHROME_OS' | 'WINDOWS',
   ) {}
 
   getInfo(): ChromeTargetInfo {
diff --git a/ui/src/common/recordingV2/targets/host_os_target.ts b/ui/src/common/recordingV2/targets/host_os_target.ts
index 06962f8..7b32fcc 100644
--- a/ui/src/common/recordingV2/targets/host_os_target.ts
+++ b/ui/src/common/recordingV2/targets/host_os_target.ts
@@ -26,13 +26,12 @@
 import {
   isLinux,
   isMacOs,
-  isWindows,
   WEBSOCKET_CLOSED_ABNORMALLY_CODE,
 } from '../recording_utils';
 import {TracedTracingSession} from '../traced_tracing_session';
 
 export class HostOsTarget implements RecordingTargetV2 {
-  private readonly targetType: 'LINUX' | 'MACOS' | 'WINDOWS';
+  private readonly targetType: 'LINUX' | 'MACOS';
   private readonly name: string;
   private websocket: WebSocket;
   private streams = new Set<HostOsByteStream>();
@@ -50,9 +49,6 @@
     } else if (isLinux(navigator.userAgent)) {
       this.name = 'Linux';
       this.targetType = 'LINUX';
-    } else if (isWindows(navigator.userAgent)) {
-      this.name = 'Windows Desktop';
-      this.targetType = 'WINDOWS';
     } else {
       throw new RecordingError(
         'Host OS target created on an unsupported operating system.',
diff --git a/ui/src/core_plugins/process_summary/process_summary_track.ts b/ui/src/core_plugins/process_summary/process_summary_track.ts
index f4d50b4..ca0277f 100644
--- a/ui/src/core_plugins/process_summary/process_summary_track.ts
+++ b/ui/src/core_plugins/process_summary/process_summary_track.ts
@@ -107,7 +107,7 @@
   }
 
   async onUpdate(): Promise<void> {
-    this.fetcher.requestDataForCurrentTime();
+    await this.fetcher.requestDataForCurrentTime();
   }
 
   async onBoundsChange(
diff --git a/ui/src/frontend/record_page_v2.ts b/ui/src/frontend/record_page_v2.ts
index d8f841a..b1db4ce 100644
--- a/ui/src/frontend/record_page_v2.ts
+++ b/ui/src/frontend/record_page_v2.ts
@@ -75,7 +75,7 @@
 function isChromeTargetInfo(
   targetInfo: TargetInfo,
 ): targetInfo is ChromeTargetInfo {
-  return ['CHROME', 'CHROME_OS'].includes(targetInfo.targetType);
+  return ['CHROME', 'CHROME_OS', 'WINDOWS'].includes(targetInfo.targetType);
 }
 
 function RecordHeader() {
@@ -316,7 +316,7 @@
 
 function RecordingSnippet(targetInfo: TargetInfo) {
   // We don't need commands to start tracing on chrome
-  if (isChromeTargetInfo(targetInfo) || targetInfo.targetType === 'WINDOWS') {
+  if (isChromeTargetInfo(targetInfo)) {
     if (controller.getState() > RecordingState.AUTH_P2) {
       // If the UI has started tracing, don't display a message guiding the user
       // to start recording.
diff --git a/ui/src/widgets/flamegraph.ts b/ui/src/widgets/flamegraph.ts
index f46e74a..bd8181b 100644
--- a/ui/src/widgets/flamegraph.ts
+++ b/ui/src/widgets/flamegraph.ts
@@ -25,13 +25,11 @@
 import {Spinner} from './spinner';
 import {TagInput} from './tag_input';
 import {scheduleFullRedraw} from './raf';
+import {Button, ButtonBar} from './button';
 
-const ROLLOVER_FONT_STYLE = '12px Roboto Condensed';
 const LABEL_FONT_STYLE = '12px Roboto Mono';
-const NODE_HEIGHT = 18;
+const NODE_HEIGHT = 20;
 const MIN_PIXEL_DISPLAYED = 1;
-const TOOLTOP_PADDING_PX = 8;
-const TOOLTIP_OFFSET_PX = 4;
 const FILTER_COMMON_TEXT = `
 - "Show Frame: foo" or "SF: foo" to show only frames containing "foo"
 - "Hide Frame: foo" or "HF: foo" to hide all frames containing "foo"
@@ -157,7 +155,7 @@
   private filterFocus: boolean = false;
   private filterChangeFail: boolean = false;
 
-  private zoomRegionMonitor = new Monitor([() => this.attrs.data]);
+  private dataChangeMonitor = new Monitor([() => this.attrs.data]);
   private zoomRegion?: ZoomRegion;
 
   private renderNodesMonitor = new Monitor([
@@ -167,6 +165,13 @@
   ]);
   private renderNodes?: ReadonlyArray<RenderNode>;
 
+  private tooltipPos?: {
+    node: RenderNode;
+    x: number;
+    state: 'HOVER' | 'CLICK' | 'DECLICK';
+  };
+  private lastClickedNode?: RenderNode;
+
   private hoveredX?: number;
   private hoveredY?: number;
 
@@ -179,6 +184,11 @@
 
   view({attrs}: m.Vnode<FlamegraphAttrs, this>): void | m.Children {
     this.attrs = attrs;
+    if (this.dataChangeMonitor.ifStateChanged()) {
+      this.zoomRegion = undefined;
+      this.lastClickedNode = undefined;
+      this.tooltipPos = undefined;
+    }
     if (attrs.data === undefined) {
       return m(
         '.pf-flamegraph',
@@ -202,44 +212,120 @@
     return m(
       '.pf-flamegraph',
       this.renderFilterBar(attrs),
-      m(`canvas[ref=canvas]`, {
-        style: `height:${canvasHeight}px; width:100%`,
-        onmousemove: ({offsetX, offsetY}: MouseEvent) => {
-          this.hoveredX = offsetX;
-          this.hoveredY = offsetY;
-          scheduleFullRedraw();
-        },
-        onmouseout: () => {
-          this.hoveredX = undefined;
-          this.hoveredY = undefined;
-          document.body.style.cursor = 'default';
-          scheduleFullRedraw();
-        },
-        onclick: ({offsetX, offsetY}: MouseEvent) => {
-          const renderNode = this.renderNodes?.find((n) =>
-            isHovered(offsetX, offsetY, n),
-          );
-          // TODO(lalitm): ignore merged nodes for now as we haven't quite
-          // figured out the UX for this.
-          if (renderNode?.source.kind === 'MERGED') {
-            return;
-          }
-          this.zoomRegion = renderNode?.source;
-          scheduleFullRedraw();
-        },
-      }),
+      m(
+        '.canvas-container',
+        m(
+          Popup,
+          {
+            trigger: m('.popup-anchor', {
+              style: {
+                left: this.tooltipPos?.x + 'px',
+                top: this.tooltipPos?.node.y + 'px',
+              },
+            }),
+            position: PopupPosition.Bottom,
+            isOpen:
+              this.tooltipPos?.state === 'HOVER' ||
+              this.tooltipPos?.state === 'CLICK',
+            className: 'pf-flamegraph-tooltip-popup',
+            offset: NODE_HEIGHT,
+          },
+          this.renderTooltip(),
+        ),
+        m(`canvas[ref=canvas]`, {
+          style: `height:${canvasHeight}px; width:100%`,
+          onmousemove: ({offsetX, offsetY}: MouseEvent) => {
+            scheduleFullRedraw();
+            this.hoveredX = offsetX;
+            this.hoveredY = offsetY;
+            if (this.tooltipPos?.state === 'CLICK') {
+              return;
+            }
+            const renderNode = this.renderNodes?.find((n) =>
+              isIntersecting(offsetX, offsetY, n),
+            );
+            if (renderNode === undefined) {
+              this.tooltipPos = undefined;
+              return;
+            }
+            if (
+              isIntersecting(
+                this.tooltipPos?.x,
+                this.tooltipPos?.node.y,
+                renderNode,
+              )
+            ) {
+              return;
+            }
+            this.tooltipPos = {
+              x: offsetX,
+              node: renderNode,
+              state: 'HOVER',
+            };
+          },
+          onmouseout: () => {
+            this.hoveredX = undefined;
+            this.hoveredY = undefined;
+            document.body.style.cursor = 'default';
+            if (
+              this.tooltipPos?.state === 'HOVER' ||
+              this.tooltipPos?.state === 'DECLICK'
+            ) {
+              this.tooltipPos = undefined;
+            }
+            scheduleFullRedraw();
+          },
+          onclick: ({offsetX, offsetY}: MouseEvent) => {
+            const renderNode = this.renderNodes?.find((n) =>
+              isIntersecting(offsetX, offsetY, n),
+            );
+            this.lastClickedNode = renderNode;
+            if (renderNode === undefined) {
+              this.tooltipPos = undefined;
+            } else if (
+              isIntersecting(
+                this.tooltipPos?.x,
+                this.tooltipPos?.node.y,
+                renderNode,
+              )
+            ) {
+              this.tooltipPos!.state =
+                this.tooltipPos?.state === 'CLICK' ? 'DECLICK' : 'CLICK';
+            } else {
+              this.tooltipPos = {
+                x: offsetX,
+                node: renderNode,
+                state: 'CLICK',
+              };
+            }
+            scheduleFullRedraw();
+          },
+          ondblclick: ({offsetX, offsetY}: MouseEvent) => {
+            const renderNode = this.renderNodes?.find((n) =>
+              isIntersecting(offsetX, offsetY, n),
+            );
+            // TODO(lalitm): ignore merged nodes for now as we haven't quite
+            // figured out the UX for this.
+            if (renderNode?.source.kind === 'MERGED') {
+              return;
+            }
+            this.zoomRegion = renderNode?.source;
+            scheduleFullRedraw();
+          },
+        }),
+      ),
     );
   }
 
   oncreate({dom}: m.VnodeDOM<FlamegraphAttrs, this>) {
-    this.renderCanvas(dom);
+    this.drawCanvas(dom);
   }
 
   onupdate({dom}: m.VnodeDOM<FlamegraphAttrs, this>) {
-    this.renderCanvas(dom);
+    this.drawCanvas(dom);
   }
 
-  private renderCanvas(dom: Element) {
+  private drawCanvas(dom: Element) {
     const canvas = findRef(dom, 'canvas');
     if (canvas === null || !(canvas instanceof HTMLCanvasElement)) {
       return;
@@ -252,21 +338,24 @@
     canvas.height = canvas.offsetHeight * devicePixelRatio;
     this.canvasWidth = canvas.offsetWidth;
 
-    if (this.zoomRegionMonitor.ifStateChanged()) {
-      this.zoomRegion = undefined;
-    }
     if (this.renderNodesMonitor.ifStateChanged()) {
-      this.renderNodes =
-        this.attrs.data === undefined
-          ? undefined
-          : computeRenderNodes(
-              this.attrs.data,
-              this.zoomRegion ?? {
-                queryXStart: 0,
-                queryXEnd: this.attrs.data.allRootsCumulativeValue,
-              },
-              canvas.offsetWidth,
-            );
+      if (this.attrs.data === undefined) {
+        this.renderNodes = undefined;
+        this.lastClickedNode = undefined;
+      } else {
+        this.renderNodes = computeRenderNodes(
+          this.attrs.data,
+          this.zoomRegion ?? {
+            queryXStart: 0,
+            queryXEnd: this.attrs.data.allRootsCumulativeValue,
+          },
+          canvas.offsetWidth,
+        );
+        this.lastClickedNode = this.renderNodes?.find((n) =>
+          isIntersecting(this.lastClickedNode?.x, this.lastClickedNode?.y, n),
+        );
+      }
+      this.tooltipPos = undefined;
     }
     if (this.attrs.data === undefined || this.renderNodes === undefined) {
       return;
@@ -293,8 +382,10 @@
     for (let i = 0; i < this.renderNodes.length; i++) {
       const node = this.renderNodes[i];
       const {x, y, width: width, source, state} = node;
-      const hover = isHovered(this.hoveredX, this.hoveredY, node);
-      hoveredNode = hover ? node : hoveredNode;
+      const hover = isIntersecting(this.hoveredX, this.hoveredY, node);
+      if (hover) {
+        hoveredNode = node;
+      }
       let name: string;
       if (source.kind === 'ROOT') {
         name = `root: ${displaySize(allRootsCumulativeValue, unit)}`;
@@ -316,92 +407,32 @@
         y + (NODE_HEIGHT - 1) / 2,
         maxLabelWidth,
       );
+      if (this.lastClickedNode?.x === x && this.lastClickedNode?.y === y) {
+        ctx.strokeStyle = 'blue';
+        ctx.lineWidth = 2;
+        ctx.beginPath();
+        ctx.moveTo(x, y);
+        ctx.lineTo(x + width, y);
+        ctx.lineTo(x + width, y + NODE_HEIGHT - 1);
+        ctx.lineTo(x, y + NODE_HEIGHT - 1);
+        ctx.lineTo(x, y);
+        ctx.stroke();
+      }
+      ctx.strokeStyle = 'white';
+      ctx.lineWidth = 0.5;
       ctx.beginPath();
       ctx.moveTo(x + width, y);
       ctx.lineTo(x + width, y + NODE_HEIGHT);
       ctx.stroke();
     }
-    if (hoveredNode !== undefined) {
-      this.drawTooltip(
-        ctx,
-        canvas.offsetWidth,
-        canvas.offsetHeight,
-        hoveredNode,
-      );
-    }
-    const kind = hoveredNode?.source.kind;
-    if (kind === 'ROOT' || kind === 'NODE') {
-      canvas.style.cursor = 'pointer';
-    } else {
+    if (hoveredNode === undefined) {
       canvas.style.cursor = 'default';
+    } else {
+      canvas.style.cursor = 'pointer';
     }
     ctx.restore();
   }
 
-  private drawTooltip(
-    ctx: CanvasRenderingContext2D,
-    canvasWidth: number,
-    canvasHeight: number,
-    node: RenderNode,
-  ) {
-    ctx.font = ROLLOVER_FONT_STYLE;
-    ctx.textBaseline = 'top';
-
-    const {unit} = assertExists(this.selectedMetric);
-    const {nodes, allRootsCumulativeValue} = assertExists(this.attrs.data);
-    const nodeSource = node.source;
-    let lines: string[];
-    if (nodeSource.kind === 'NODE') {
-      const {name, cumulativeValue, selfValue} = nodes[nodeSource.queryIdx];
-      const cdisp = displaySize(cumulativeValue, unit);
-      const cpercentage = (cumulativeValue / allRootsCumulativeValue) * 100;
-      const sdisp = displaySize(selfValue, unit);
-      const spercentage = (selfValue / allRootsCumulativeValue) * 100;
-      lines = [
-        name,
-        `Cumulative: ${cdisp} (${cpercentage.toFixed(2)}%)`,
-        `Self: ${sdisp} (${spercentage.toFixed(2)}%)`,
-      ];
-    } else if (nodeSource.kind === 'ROOT') {
-      lines = [
-        'root',
-        `Cumulative: ${allRootsCumulativeValue} (100%)`,
-        'Self: 0',
-      ];
-    } else {
-      lines = ['(merged)', 'Too small to show, use filters'];
-    }
-    const measured = ctx.measureText(lines.join('\n'));
-
-    const heightSample = ctx.measureText(
-      'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ',
-    );
-    const lineHeight = Math.round(heightSample.actualBoundingBoxDescent * 1.5);
-
-    const rectWidth = measured.width + 2 * TOOLTOP_PADDING_PX;
-    const rectHeight = lineHeight * lines.length + 2 * TOOLTOP_PADDING_PX;
-
-    let rectXStart = assertExists(this.hoveredX) + TOOLTIP_OFFSET_PX;
-    let rectYStart = assertExists(this.hoveredY) + TOOLTIP_OFFSET_PX;
-    if (rectXStart + rectWidth > canvasWidth) {
-      rectXStart = canvasWidth - rectWidth;
-    }
-    if (rectYStart + rectHeight > canvasHeight) {
-      rectYStart = canvasHeight - rectHeight;
-    }
-    ctx.fillStyle = 'rgba(255, 255, 255, 0.9)';
-    ctx.fillRect(rectXStart, rectYStart, rectWidth, rectHeight);
-    ctx.fillStyle = 'hsl(200, 50%, 40%)';
-    ctx.textAlign = 'left';
-    for (let i = 0; i < lines.length; i++) {
-      ctx.fillText(
-        lines[i],
-        rectXStart + TOOLTOP_PADDING_PX,
-        rectYStart + TOOLTOP_PADDING_PX + i * lineHeight,
-      );
-    }
-  }
-
   private renderFilterBar(attrs: FlamegraphAttrs) {
     const self = this;
     return m(
@@ -476,6 +507,96 @@
     );
   }
 
+  private renderTooltip() {
+    if (this.tooltipPos === undefined) {
+      return undefined;
+    }
+    const {node} = this.tooltipPos;
+    if (node.source.kind === 'MERGED') {
+      return m(
+        'div',
+        m('.tooltip-bold-text', '(merged)'),
+        m('.tooltip-text', 'Nodes too small to show, please use filters'),
+      );
+    }
+    const {nodes, allRootsCumulativeValue} = assertExists(this.attrs.data);
+    const {unit} = assertExists(this.selectedMetric);
+    if (node.source.kind === 'ROOT') {
+      return m(
+        'div',
+        m('.tooltip-bold-text', 'root'),
+        m(
+          '.tooltip-text-line',
+          m('.tooltip-bold-text', 'Cumulative:'),
+          m('.tooltip-text', displaySize(allRootsCumulativeValue, unit)),
+        ),
+      );
+    }
+    const {queryIdx} = node.source;
+    const {name, cumulativeValue, selfValue} = nodes[queryIdx];
+    return m(
+      'div',
+      m('.tooltip-bold-text', name),
+      m(
+        '.tooltip-text-line',
+        m('.tooltip-bold-text', 'Cumulative:'),
+        m('.tooltip-text', displaySize(cumulativeValue, unit)),
+      ),
+      m(
+        '.tooltip-text-line',
+        m('.tooltip-bold-text', 'Self:'),
+        m('.tooltip-text', displaySize(selfValue, unit)),
+      ),
+      m(
+        ButtonBar,
+        {},
+        m(Button, {
+          label: 'Zoom',
+          onclick: () => {
+            this.zoomRegion = node.source;
+            scheduleFullRedraw();
+          },
+        }),
+        m(Button, {
+          label: 'Show Stack',
+          onclick: () => {
+            this.rawFilters = [...this.rawFilters, `Show Stack: ${name}`];
+            this.attrs.onFiltersChanged(computeFilters(this.rawFilters));
+            this.tooltipPos = undefined;
+            scheduleFullRedraw();
+          },
+        }),
+        m(Button, {
+          label: 'Hide Stack',
+          onclick: () => {
+            this.rawFilters = [...this.rawFilters, `Hide Stack: ${name}`];
+            this.attrs.onFiltersChanged(computeFilters(this.rawFilters));
+            this.tooltipPos = undefined;
+            scheduleFullRedraw();
+          },
+        }),
+        m(Button, {
+          label: 'Show Frame',
+          onclick: () => {
+            this.rawFilters = [...this.rawFilters, `Show Frame: ${name}`];
+            this.attrs.onFiltersChanged(computeFilters(this.rawFilters));
+            this.tooltipPos = undefined;
+            scheduleFullRedraw();
+          },
+        }),
+        m(Button, {
+          label: 'Hide Frame',
+          onclick: () => {
+            this.rawFilters = [...this.rawFilters, `Hide Frame: ${name}`];
+            this.attrs.onFiltersChanged(computeFilters(this.rawFilters));
+            this.tooltipPos = undefined;
+            scheduleFullRedraw();
+          },
+        }),
+      ),
+    );
+  }
+
   private get selectedMetric() {
     return this.attrs.metrics.find(
       (x) => x.name === this.attrs.selectedMetricName,
@@ -578,7 +699,7 @@
   return 'NORMAL';
 }
 
-function isHovered(
+function isIntersecting(
   needleX: number | undefined,
   needleY: number | undefined,
   {x, y, width}: RenderNode,
@@ -588,9 +709,9 @@
   }
   return (
     needleX >= x &&
-    needleX <= x + width &&
+    needleX < x + width &&
     needleY >= y &&
-    needleY <= y + NODE_HEIGHT
+    needleY < y + NODE_HEIGHT
   );
 }