stdlib: Add support for DSU dependent devices

Add support for devices that depend on DSU for power estimates. The
callflow will short circuit device-specific tables that are unnecessary
for that device's calculation. For example, a device that depends on DSU
calculations will short circuit tables that use CPU dependent
calculations.

Runtime of existing Wattson test cases remain the same before and after
this patch (~4300ms).

SQL modules callflow:
 ┌────────┐   ┌────────┐
 │cpu_idle│   │cpu_freq│
 └┬───────┘   └───────┬┘
  │                   │
  │  ┌─────────────┐  │     ┌───────┐
  └─►│cpu_freq_idle│◄─┘     │devfreq│
     └──────┬──────┘        └───┬───┘
            │                   │
       ┌────▼────┐              │
    ┌──┤cpu_split├──────────┐   │
    │  └─────────┘          │   │
    │                       │   │
┌───▼────────────┐ ┌────────▼───▼───┐
│w_cpu_dependence│ │w_dsu_dependence│
└───┬────────────┘ └────────┬───────┘
    │                       │
    │      ┌─────────┐      │
    └─────►│estimates│◄─────┘
           └─────────┘

Bug: 370829192
Test: tools/diff_test_trace_processor.py out/linux/trace_processor_shell --name-filter '.*wattson.*'
Change-Id: I8eb611450605837ed88aea5f5d2934c572e11730
Signed-off-by: Samuel Wu <wusamuel@google.com>
diff --git a/Android.bp b/Android.bp
index f4bc565..e6cb2a6 100644
--- a/Android.bp
+++ b/Android.bp
@@ -13662,6 +13662,7 @@
         "src/trace_processor/perfetto_sql/stdlib/wattson/curves/idle_attribution.sql",
         "src/trace_processor/perfetto_sql/stdlib/wattson/curves/utils.sql",
         "src/trace_processor/perfetto_sql/stdlib/wattson/curves/w_cpu_dependence.sql",
+        "src/trace_processor/perfetto_sql/stdlib/wattson/curves/w_dsu_dependence.sql",
         "src/trace_processor/perfetto_sql/stdlib/wattson/device_infos.sql",
         "src/trace_processor/perfetto_sql/stdlib/wattson/system_state.sql",
     ],
diff --git a/BUILD b/BUILD
index 4c57533..caacf6e 100644
--- a/BUILD
+++ b/BUILD
@@ -3083,6 +3083,7 @@
         "src/trace_processor/perfetto_sql/stdlib/wattson/curves/idle_attribution.sql",
         "src/trace_processor/perfetto_sql/stdlib/wattson/curves/utils.sql",
         "src/trace_processor/perfetto_sql/stdlib/wattson/curves/w_cpu_dependence.sql",
+        "src/trace_processor/perfetto_sql/stdlib/wattson/curves/w_dsu_dependence.sql",
         "src/trace_processor/perfetto_sql/stdlib/wattson/device_infos.sql",
         "src/trace_processor/perfetto_sql/stdlib/wattson/system_state.sql",
     ],
diff --git a/src/trace_processor/perfetto_sql/stdlib/wattson/BUILD.gn b/src/trace_processor/perfetto_sql/stdlib/wattson/BUILD.gn
index 69b9e24..185d6ef 100644
--- a/src/trace_processor/perfetto_sql/stdlib/wattson/BUILD.gn
+++ b/src/trace_processor/perfetto_sql/stdlib/wattson/BUILD.gn
@@ -26,6 +26,7 @@
     "curves/idle_attribution.sql",
     "curves/utils.sql",
     "curves/w_cpu_dependence.sql",
+    "curves/w_dsu_dependence.sql",
     "device_infos.sql",
     "system_state.sql",
   ]
diff --git a/src/trace_processor/perfetto_sql/stdlib/wattson/curves/estimates.sql b/src/trace_processor/perfetto_sql/stdlib/wattson/curves/estimates.sql
index 76b3745..b2162fd 100644
--- a/src/trace_processor/perfetto_sql/stdlib/wattson/curves/estimates.sql
+++ b/src/trace_processor/perfetto_sql/stdlib/wattson/curves/estimates.sql
@@ -16,8 +16,43 @@
 INCLUDE PERFETTO MODULE wattson.cpu_split;
 INCLUDE PERFETTO MODULE wattson.curves.utils;
 INCLUDE PERFETTO MODULE wattson.curves.w_cpu_dependence;
+INCLUDE PERFETTO MODULE wattson.curves.w_dsu_dependence;
 INCLUDE PERFETTO MODULE wattson.device_infos;
 
+-- One of the two tables will be empty, depending on whether the device is
+-- dependent on devfreq or a different CPU's frequency
+CREATE PERFETTO VIEW _curves_w_dependencies(
+  ts LONG,
+  dur LONG,
+  freq_0 INT,
+  idle_0 INT,
+  freq_1 INT,
+  idle_1 INT,
+  freq_2 INT,
+  idle_2 INT,
+  freq_3 INT,
+  idle_3 INT,
+  cpu0_curve FLOAT,
+  cpu1_curve FLOAT,
+  cpu2_curve FLOAT,
+  cpu3_curve FLOAT,
+  cpu4_curve FLOAT,
+  cpu5_curve FLOAT,
+  cpu6_curve FLOAT,
+  cpu7_curve FLOAT,
+  l3_hit_count INT,
+  l3_miss_count INT,
+  no_static INT,
+  all_cpu_deep_idle INT,
+  dependent_freq INT,
+  dependent_policy INT
+) AS
+-- Table that is dependent on differet CPU's frequency
+SELECT * FROM _w_cpu_dependence
+UNION ALL
+-- Table that is dependent of devfreq frequency
+SELECT * FROM _w_dsu_dependence;
+
 -- Final table showing the curves per CPU per slice
 CREATE PERFETTO TABLE _system_state_curves
 AS
@@ -47,7 +82,7 @@
     0,
     base.l3_miss_count * l3_miss_lut.curve_value
   ) as l3_miss_value
-FROM _w_cpu_dependence as base
+FROM _curves_w_dependencies as base
 -- LUT for 2D dependencies
 LEFT JOIN _filtered_curves_2d lut0 ON
   lut0.freq_khz = base.freq_0 AND
diff --git a/src/trace_processor/perfetto_sql/stdlib/wattson/curves/w_cpu_dependence.sql b/src/trace_processor/perfetto_sql/stdlib/wattson/curves/w_cpu_dependence.sql
index 54c8cdd..70b6ea2 100644
--- a/src/trace_processor/perfetto_sql/stdlib/wattson/curves/w_cpu_dependence.sql
+++ b/src/trace_processor/perfetto_sql/stdlib/wattson/curves/w_cpu_dependence.sql
@@ -37,9 +37,28 @@
       static_7
     ) as max_static_vote
   FROM _w_independent_cpus_calc
+  -- _skip_devfreq_for_calc short circuits this table if devfreq is needed
+  JOIN _skip_devfreq_for_calc
 )
 SELECT
-  *,
+  ts,
+  dur,
+  freq_0, idle_0,
+  freq_1, idle_1,
+  freq_2, idle_2,
+  freq_3, idle_3,
+  cpu0_curve,
+  cpu1_curve,
+  cpu2_curve,
+  cpu3_curve,
+  cpu4_curve,
+  cpu5_curve,
+  cpu6_curve,
+  cpu7_curve,
+  l3_hit_count,
+  l3_miss_count,
+  no_static,
+  all_cpu_deep_idle,
   CASE max_static_vote
     WHEN -1 THEN _get_min_freq_vote()
     WHEN static_4 THEN freq_4
@@ -57,4 +76,3 @@
     ELSE 4
   END dependent_policy
 FROM max_power_tbl;
-
diff --git a/src/trace_processor/perfetto_sql/stdlib/wattson/curves/w_dsu_dependence.sql b/src/trace_processor/perfetto_sql/stdlib/wattson/curves/w_dsu_dependence.sql
new file mode 100644
index 0000000..dbaf4ac
--- /dev/null
+++ b/src/trace_processor/perfetto_sql/stdlib/wattson/curves/w_dsu_dependence.sql
@@ -0,0 +1,89 @@
+--
+-- Copyright 2024 The Android Open Source Project
+--
+-- Licensed under the Apache License, Version 2.0 (the "License");
+-- you may not use this file except in compliance with the License.
+-- You may obtain a copy of the License at
+--
+--     https://www.apache.org/licenses/LICENSE-2.0
+--
+-- Unless required by applicable law or agreed to in writing, software
+-- distributed under the License is distributed on an "AS IS" BASIS,
+-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+-- See the License for the specific language governing permissions and
+-- limitations under the License.
+
+INCLUDE PERFETTO MODULE intervals.intersect;
+INCLUDE PERFETTO MODULE linux.devfreq;
+INCLUDE PERFETTO MODULE wattson.cpu_split;
+INCLUDE PERFETTO MODULE wattson.curves.utils;
+INCLUDE PERFETTO MODULE wattson.device_infos;
+
+CREATE PERFETTO TABLE _cpu_curves AS
+SELECT
+  ts, dur,
+  freq_0, idle_0,
+  freq_1, idle_1,
+  freq_2, idle_2,
+  freq_3, idle_3,
+  lut4.curve_value as cpu4_curve,
+  lut5.curve_value as cpu5_curve,
+  lut6.curve_value as cpu6_curve,
+  lut7.curve_value as cpu7_curve,
+  l3_hit_count, l3_miss_count,
+  no_static,
+  MIN(
+    no_static,
+    IFNULL(idle_4, 1),
+    IFNULL(idle_5, 1),
+    IFNULL(idle_6, 1),
+    IFNULL(idle_7, 1)
+  ) as all_cpu_deep_idle
+FROM _w_independent_cpus_calc as base
+-- _use_devfreq_for_calc short circuits this table if devfreq isn't needed
+JOIN _use_devfreq_for_calc
+LEFT JOIN _filtered_curves_1d lut4 ON
+  base.freq_4 = lut4.freq_khz AND
+  base.idle_4 = lut4.idle
+LEFT JOIN _filtered_curves_1d lut5 ON
+  base.freq_5 = lut5.freq_khz AND
+  base.idle_5 = lut5.idle
+LEFT JOIN _filtered_curves_1d lut6 ON
+  base.freq_6 = lut6.freq_khz AND
+  base.idle_6 = lut6.idle
+LEFT JOIN _filtered_curves_1d lut7 ON
+  base.freq_7 = lut7.freq_khz AND
+  base.idle_7 = lut7.idle;
+
+CREATE PERFETTO TABLE _w_dsu_dependence AS
+SELECT
+  c.ts, c.dur,
+  c.freq_0, c.idle_0,
+  c.freq_1, c.idle_1,
+  c.freq_2, c.idle_2,
+  c.freq_3, c.idle_3,
+  -- NULL columns needed to match columns of _get_max_vote before UNION
+  NULL as cpu0_curve,
+  NULL as cpu1_curve,
+  NULL as cpu2_curve,
+  NULL as cpu3_curve,
+  c.cpu4_curve,
+  c.cpu5_curve,
+  c.cpu6_curve,
+  c.cpu7_curve,
+  c.l3_hit_count,
+  c.l3_miss_count,
+  c.no_static,
+  c.all_cpu_deep_idle,
+  d.dsu_freq as dependent_freq,
+  255 as dependent_policy
+FROM _interval_intersect!(
+  (
+    _ii_subquery!(_cpu_curves),
+    _ii_subquery!(linux_devfreq_dsu_counter)
+  ),
+  ()
+) ii
+JOIN _cpu_curves AS c ON c._auto_id = id_0
+JOIN linux_devfreq_dsu_counter AS d on d._auto_id = id_1;
+
diff --git a/src/trace_processor/perfetto_sql/stdlib/wattson/device_infos.sql b/src/trace_processor/perfetto_sql/stdlib/wattson/device_infos.sql
index b9a66a2..6af3c85 100644
--- a/src/trace_processor/perfetto_sql/stdlib/wattson/device_infos.sql
+++ b/src/trace_processor/perfetto_sql/stdlib/wattson/device_infos.sql
@@ -98,7 +98,9 @@
   ("Tensor G4", 4, 4),
   ("Tensor G4", 5, 4),
   ("Tensor G4", 6, 4),
-  ("Tensor G4", 7, 7)
+  ("Tensor G4", 7, 7),
+  -- need 255 policy to match devfreq
+  ("Tensor G4", 255, 255)
 )
 select * from data;
 
@@ -140,3 +142,26 @@
 FROM _device_min_volt_vote as vote_tbl
 JOIN _wattson_device as device
 WHERE vote_tbl.device = device.name;
+
+-- Devices that require using devfreq
+CREATE PERFETTO TABLE _use_devfreq
+AS
+WITH data(device) AS (
+  VALUES
+  ("Tensor G4")
+)
+select * from data;
+
+-- Creates non-empty table if device needs devfreq
+CREATE PERFETTO TABLE _use_devfreq_for_calc AS
+SELECT TRUE AS devfreq_necessary
+FROM _use_devfreq as d
+JOIN _wattson_device as device
+ON d.device = device.name;
+
+-- Creates empty table if device needs devfreq; inverse of _use_devfreq_for_calc
+CREATE PERFETTO TABLE _skip_devfreq_for_calc AS
+SELECT FALSE AS devfreq_necessary
+FROM _use_devfreq as d
+JOIN _wattson_device as device
+ON d.device != device.name;
diff --git a/test/trace_processor/diff_tests/stdlib/wattson/tests.py b/test/trace_processor/diff_tests/stdlib/wattson/tests.py
index b758b96..663b5aa 100644
--- a/test/trace_processor/diff_tests/stdlib/wattson/tests.py
+++ b/test/trace_processor/diff_tests/stdlib/wattson/tests.py
@@ -342,3 +342,71 @@
             2.434878,172,172
             2.256320,414,414
             """))
+
+  # Tests that DSU devfreq calculations are merged correctly
+  def test_wattson_dsu_devfreq(self):
+    return DiffTestBlueprint(
+        trace=DataPath('wattson_tk4_pcmark.pb'),
+        query=("""
+            INCLUDE PERFETTO MODULE wattson.curves.w_dsu_dependence;
+            SELECT * FROM _cpu_curves
+            WHERE ts > 4108586775197
+            LIMIT 20
+            """),
+        out=Csv("""
+            "ts","dur","freq_0","idle_0","freq_1","idle_1","freq_2","idle_2","freq_3","idle_3","cpu4_curve","cpu5_curve","cpu6_curve","cpu7_curve","l3_hit_count","l3_miss_count","no_static","all_cpu_deep_idle"
+            4108586789603,35685,1950000,0,1950000,-1,1950000,-1,1950000,-1,674.240000,674.240000,674.240000,3327.560000,14718,5837,-1,-1
+            4108586825288,30843,1950000,-1,1950000,-1,1950000,-1,1950000,-1,674.240000,674.240000,674.240000,3327.560000,12721,5045,-1,-1
+            4108586856131,13387,1950000,-1,1950000,-1,1950000,-1,1950000,-1,674.240000,674.240000,674.240000,99.470000,5521,2189,-1,-1
+            4108586869518,22542,1950000,-1,1950000,-1,1950000,-1,1950000,-1,674.240000,674.240000,674.240000,3327.560000,9297,3687,-1,-1
+            4108586892060,2482,1950000,-1,1950000,-1,1950000,-1,1950000,0,674.240000,674.240000,674.240000,3327.560000,1023,406,-1,-1
+            4108586894542,68563,1950000,-1,1950000,-1,1950000,-1,1950000,-1,674.240000,674.240000,674.240000,3327.560000,28279,11216,-1,-1
+            4108586963105,59652,1950000,-1,1950000,-1,1950000,-1,1950000,0,674.240000,674.240000,674.240000,3327.560000,24603,9758,-1,-1
+            4108587022757,3743,1950000,0,1950000,-1,1950000,-1,1950000,0,674.240000,674.240000,674.240000,3327.560000,1543,612,-1,-1
+            4108587026500,15992,1950000,-1,1950000,-1,1950000,-1,1950000,0,674.240000,674.240000,674.240000,3327.560000,6595,2616,-1,-1
+            4108587042492,15625,1950000,-1,1950000,-1,1950000,-1,1950000,0,674.240000,674.240000,674.240000,99.470000,6444,2556,-1,-1
+            4108587058117,8138,1950000,-1,1950000,-1,1950000,-1,1950000,0,674.240000,674.240000,674.240000,3327.560000,3356,1331,-1,-1
+            4108587066255,80566,1950000,-1,1950000,-1,1950000,-1,1950000,-1,674.240000,674.240000,674.240000,3327.560000,33229,13179,-1,-1
+            4108587146821,19572,1950000,-1,1950000,-1,1950000,-1,1950000,-1,674.240000,674.240000,674.240000,99.470000,8072,3201,-1,-1
+            4108587166393,219116,1950000,-1,1950000,-1,1950000,-1,1950000,-1,674.240000,674.240000,674.240000,3327.560000,90375,35845,-1,-1
+            4108587385509,81991,1950000,-1,1950000,0,1950000,-1,1950000,-1,674.240000,674.240000,674.240000,3327.560000,33817,13413,-1,-1
+            4108587467500,90413,1950000,-1,1950000,0,1950000,0,1950000,-1,674.240000,674.240000,674.240000,3327.560000,37291,14790,-1,-1
+            4108587557913,92896,1950000,0,1950000,0,1950000,0,1950000,-1,674.240000,674.240000,674.240000,3327.560000,38315,15196,-1,-1
+            4108587650809,95296,1950000,-1,1950000,0,1950000,0,1950000,-1,674.240000,674.240000,674.240000,3327.560000,39305,15589,-1,-1
+            4108587746105,12451,1950000,0,1950000,0,1950000,0,1950000,-1,674.240000,674.240000,674.240000,3327.560000,5135,2036,-1,-1
+            4108587758556,28524,1950000,0,1950000,0,1950000,-1,1950000,-1,674.240000,674.240000,674.240000,3327.560000,11764,4666,-1,-1
+            """))
+
+  # Tests that DSU devfreq calculations are merged correctly
+  def test_wattson_dsu_devfreq(self):
+    return DiffTestBlueprint(
+        trace=DataPath('wattson_tk4_pcmark.pb'),
+        query=("""
+            INCLUDE PERFETTO MODULE wattson.curves.estimates;
+            SELECT * FROM _system_state_mw
+            WHERE ts > 4108586775197
+            LIMIT 20
+            """),
+        out=Csv("""
+            "ts","dur","cpu0_mw","cpu1_mw","cpu2_mw","cpu3_mw","cpu4_mw","cpu5_mw","cpu6_mw","cpu7_mw","dsu_scu_mw"
+            4108586789603,35685,2.670000,205.600000,205.600000,205.600000,674.240000,674.240000,674.240000,3327.560000,1166.695271
+            4108586825288,30843,205.600000,205.600000,205.600000,205.600000,674.240000,674.240000,674.240000,3327.560000,1166.698554
+            4108586856131,13387,205.600000,205.600000,205.600000,205.600000,674.240000,674.240000,674.240000,99.470000,1166.545753
+            4108586869518,22542,205.600000,205.600000,205.600000,205.600000,674.240000,674.240000,674.240000,3327.560000,1166.655587
+            4108586892060,2482,205.600000,205.600000,205.600000,2.670000,674.240000,674.240000,674.240000,3327.560000,1166.164641
+            4108586894542,68563,205.600000,205.600000,205.600000,205.600000,674.240000,674.240000,674.240000,3327.560000,1166.746124
+            4108586963105,59652,205.600000,205.600000,205.600000,2.670000,674.240000,674.240000,674.240000,3327.560000,1166.716706
+            4108587022757,3743,2.670000,205.600000,205.600000,2.670000,674.240000,674.240000,674.240000,3327.560000,1166.170321
+            4108587026500,15992,205.600000,205.600000,205.600000,2.670000,674.240000,674.240000,674.240000,3327.560000,1166.620056
+            4108587042492,15625,205.600000,205.600000,205.600000,2.670000,674.240000,674.240000,674.240000,99.470000,1166.668234
+            4108587058117,8138,205.600000,205.600000,205.600000,2.670000,674.240000,674.240000,674.240000,3327.560000,1166.555033
+            4108587066255,80566,205.600000,205.600000,205.600000,205.600000,674.240000,674.240000,674.240000,3327.560000,1166.717766
+            4108587146821,19572,205.600000,205.600000,205.600000,205.600000,674.240000,674.240000,674.240000,99.470000,1166.626795
+            4108587166393,219116,205.600000,205.600000,205.600000,205.600000,674.240000,674.240000,674.240000,3327.560000,1166.750356
+            4108587385509,81991,205.600000,2.670000,205.600000,205.600000,674.240000,674.240000,674.240000,3327.560000,1166.743880
+            4108587467500,90413,205.600000,2.670000,2.670000,205.600000,674.240000,674.240000,674.240000,3327.560000,1166.736713
+            4108587557913,92896,2.670000,2.670000,2.670000,205.600000,674.240000,674.240000,674.240000,3327.560000,1166.730805
+            4108587650809,95296,205.600000,2.670000,2.670000,205.600000,674.240000,674.240000,674.240000,3327.560000,1166.740927
+            4108587746105,12451,2.670000,2.670000,2.670000,205.600000,674.240000,674.240000,674.240000,3327.560000,1166.556475
+            4108587758556,28524,2.670000,2.670000,205.600000,205.600000,674.240000,674.240000,674.240000,3327.560000,1166.680924
+            """))