stdlib: Add support for DSU dependent devices
Add support for devices that depend on DSU for power estimates. The
callflow will short circuit device-specific tables that are unnecessary
for that device's calculation. For example, a device that depends on DSU
calculations will short circuit tables that use CPU dependent
calculations.
Runtime of existing Wattson test cases remain the same before and after
this patch (~4300ms).
SQL modules callflow:
┌────────┐ ┌────────┐
│cpu_idle│ │cpu_freq│
└┬───────┘ └───────┬┘
│ │
│ ┌─────────────┐ │ ┌───────┐
└─►│cpu_freq_idle│◄─┘ │devfreq│
└──────┬──────┘ └───┬───┘
│ │
┌────▼────┐ │
┌──┤cpu_split├──────────┐ │
│ └─────────┘ │ │
│ │ │
┌───▼────────────┐ ┌────────▼───▼───┐
│w_cpu_dependence│ │w_dsu_dependence│
└───┬────────────┘ └────────┬───────┘
│ │
│ ┌─────────┐ │
└─────►│estimates│◄─────┘
└─────────┘
Bug: 370829192
Test: tools/diff_test_trace_processor.py out/linux/trace_processor_shell --name-filter '.*wattson.*'
Change-Id: I8eb611450605837ed88aea5f5d2934c572e11730
Signed-off-by: Samuel Wu <wusamuel@google.com>
diff --git a/Android.bp b/Android.bp
index f4bc565..e6cb2a6 100644
--- a/Android.bp
+++ b/Android.bp
@@ -13662,6 +13662,7 @@
"src/trace_processor/perfetto_sql/stdlib/wattson/curves/idle_attribution.sql",
"src/trace_processor/perfetto_sql/stdlib/wattson/curves/utils.sql",
"src/trace_processor/perfetto_sql/stdlib/wattson/curves/w_cpu_dependence.sql",
+ "src/trace_processor/perfetto_sql/stdlib/wattson/curves/w_dsu_dependence.sql",
"src/trace_processor/perfetto_sql/stdlib/wattson/device_infos.sql",
"src/trace_processor/perfetto_sql/stdlib/wattson/system_state.sql",
],
diff --git a/BUILD b/BUILD
index 4c57533..caacf6e 100644
--- a/BUILD
+++ b/BUILD
@@ -3083,6 +3083,7 @@
"src/trace_processor/perfetto_sql/stdlib/wattson/curves/idle_attribution.sql",
"src/trace_processor/perfetto_sql/stdlib/wattson/curves/utils.sql",
"src/trace_processor/perfetto_sql/stdlib/wattson/curves/w_cpu_dependence.sql",
+ "src/trace_processor/perfetto_sql/stdlib/wattson/curves/w_dsu_dependence.sql",
"src/trace_processor/perfetto_sql/stdlib/wattson/device_infos.sql",
"src/trace_processor/perfetto_sql/stdlib/wattson/system_state.sql",
],
diff --git a/src/trace_processor/perfetto_sql/stdlib/wattson/BUILD.gn b/src/trace_processor/perfetto_sql/stdlib/wattson/BUILD.gn
index 69b9e24..185d6ef 100644
--- a/src/trace_processor/perfetto_sql/stdlib/wattson/BUILD.gn
+++ b/src/trace_processor/perfetto_sql/stdlib/wattson/BUILD.gn
@@ -26,6 +26,7 @@
"curves/idle_attribution.sql",
"curves/utils.sql",
"curves/w_cpu_dependence.sql",
+ "curves/w_dsu_dependence.sql",
"device_infos.sql",
"system_state.sql",
]
diff --git a/src/trace_processor/perfetto_sql/stdlib/wattson/curves/estimates.sql b/src/trace_processor/perfetto_sql/stdlib/wattson/curves/estimates.sql
index 76b3745..b2162fd 100644
--- a/src/trace_processor/perfetto_sql/stdlib/wattson/curves/estimates.sql
+++ b/src/trace_processor/perfetto_sql/stdlib/wattson/curves/estimates.sql
@@ -16,8 +16,43 @@
INCLUDE PERFETTO MODULE wattson.cpu_split;
INCLUDE PERFETTO MODULE wattson.curves.utils;
INCLUDE PERFETTO MODULE wattson.curves.w_cpu_dependence;
+INCLUDE PERFETTO MODULE wattson.curves.w_dsu_dependence;
INCLUDE PERFETTO MODULE wattson.device_infos;
+-- One of the two tables will be empty, depending on whether the device is
+-- dependent on devfreq or a different CPU's frequency
+CREATE PERFETTO VIEW _curves_w_dependencies(
+ ts LONG,
+ dur LONG,
+ freq_0 INT,
+ idle_0 INT,
+ freq_1 INT,
+ idle_1 INT,
+ freq_2 INT,
+ idle_2 INT,
+ freq_3 INT,
+ idle_3 INT,
+ cpu0_curve FLOAT,
+ cpu1_curve FLOAT,
+ cpu2_curve FLOAT,
+ cpu3_curve FLOAT,
+ cpu4_curve FLOAT,
+ cpu5_curve FLOAT,
+ cpu6_curve FLOAT,
+ cpu7_curve FLOAT,
+ l3_hit_count INT,
+ l3_miss_count INT,
+ no_static INT,
+ all_cpu_deep_idle INT,
+ dependent_freq INT,
+ dependent_policy INT
+) AS
+-- Table that is dependent on differet CPU's frequency
+SELECT * FROM _w_cpu_dependence
+UNION ALL
+-- Table that is dependent of devfreq frequency
+SELECT * FROM _w_dsu_dependence;
+
-- Final table showing the curves per CPU per slice
CREATE PERFETTO TABLE _system_state_curves
AS
@@ -47,7 +82,7 @@
0,
base.l3_miss_count * l3_miss_lut.curve_value
) as l3_miss_value
-FROM _w_cpu_dependence as base
+FROM _curves_w_dependencies as base
-- LUT for 2D dependencies
LEFT JOIN _filtered_curves_2d lut0 ON
lut0.freq_khz = base.freq_0 AND
diff --git a/src/trace_processor/perfetto_sql/stdlib/wattson/curves/w_cpu_dependence.sql b/src/trace_processor/perfetto_sql/stdlib/wattson/curves/w_cpu_dependence.sql
index 54c8cdd..70b6ea2 100644
--- a/src/trace_processor/perfetto_sql/stdlib/wattson/curves/w_cpu_dependence.sql
+++ b/src/trace_processor/perfetto_sql/stdlib/wattson/curves/w_cpu_dependence.sql
@@ -37,9 +37,28 @@
static_7
) as max_static_vote
FROM _w_independent_cpus_calc
+ -- _skip_devfreq_for_calc short circuits this table if devfreq is needed
+ JOIN _skip_devfreq_for_calc
)
SELECT
- *,
+ ts,
+ dur,
+ freq_0, idle_0,
+ freq_1, idle_1,
+ freq_2, idle_2,
+ freq_3, idle_3,
+ cpu0_curve,
+ cpu1_curve,
+ cpu2_curve,
+ cpu3_curve,
+ cpu4_curve,
+ cpu5_curve,
+ cpu6_curve,
+ cpu7_curve,
+ l3_hit_count,
+ l3_miss_count,
+ no_static,
+ all_cpu_deep_idle,
CASE max_static_vote
WHEN -1 THEN _get_min_freq_vote()
WHEN static_4 THEN freq_4
@@ -57,4 +76,3 @@
ELSE 4
END dependent_policy
FROM max_power_tbl;
-
diff --git a/src/trace_processor/perfetto_sql/stdlib/wattson/curves/w_dsu_dependence.sql b/src/trace_processor/perfetto_sql/stdlib/wattson/curves/w_dsu_dependence.sql
new file mode 100644
index 0000000..dbaf4ac
--- /dev/null
+++ b/src/trace_processor/perfetto_sql/stdlib/wattson/curves/w_dsu_dependence.sql
@@ -0,0 +1,89 @@
+--
+-- Copyright 2024 The Android Open Source Project
+--
+-- Licensed under the Apache License, Version 2.0 (the "License");
+-- you may not use this file except in compliance with the License.
+-- You may obtain a copy of the License at
+--
+-- https://www.apache.org/licenses/LICENSE-2.0
+--
+-- Unless required by applicable law or agreed to in writing, software
+-- distributed under the License is distributed on an "AS IS" BASIS,
+-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+-- See the License for the specific language governing permissions and
+-- limitations under the License.
+
+INCLUDE PERFETTO MODULE intervals.intersect;
+INCLUDE PERFETTO MODULE linux.devfreq;
+INCLUDE PERFETTO MODULE wattson.cpu_split;
+INCLUDE PERFETTO MODULE wattson.curves.utils;
+INCLUDE PERFETTO MODULE wattson.device_infos;
+
+CREATE PERFETTO TABLE _cpu_curves AS
+SELECT
+ ts, dur,
+ freq_0, idle_0,
+ freq_1, idle_1,
+ freq_2, idle_2,
+ freq_3, idle_3,
+ lut4.curve_value as cpu4_curve,
+ lut5.curve_value as cpu5_curve,
+ lut6.curve_value as cpu6_curve,
+ lut7.curve_value as cpu7_curve,
+ l3_hit_count, l3_miss_count,
+ no_static,
+ MIN(
+ no_static,
+ IFNULL(idle_4, 1),
+ IFNULL(idle_5, 1),
+ IFNULL(idle_6, 1),
+ IFNULL(idle_7, 1)
+ ) as all_cpu_deep_idle
+FROM _w_independent_cpus_calc as base
+-- _use_devfreq_for_calc short circuits this table if devfreq isn't needed
+JOIN _use_devfreq_for_calc
+LEFT JOIN _filtered_curves_1d lut4 ON
+ base.freq_4 = lut4.freq_khz AND
+ base.idle_4 = lut4.idle
+LEFT JOIN _filtered_curves_1d lut5 ON
+ base.freq_5 = lut5.freq_khz AND
+ base.idle_5 = lut5.idle
+LEFT JOIN _filtered_curves_1d lut6 ON
+ base.freq_6 = lut6.freq_khz AND
+ base.idle_6 = lut6.idle
+LEFT JOIN _filtered_curves_1d lut7 ON
+ base.freq_7 = lut7.freq_khz AND
+ base.idle_7 = lut7.idle;
+
+CREATE PERFETTO TABLE _w_dsu_dependence AS
+SELECT
+ c.ts, c.dur,
+ c.freq_0, c.idle_0,
+ c.freq_1, c.idle_1,
+ c.freq_2, c.idle_2,
+ c.freq_3, c.idle_3,
+ -- NULL columns needed to match columns of _get_max_vote before UNION
+ NULL as cpu0_curve,
+ NULL as cpu1_curve,
+ NULL as cpu2_curve,
+ NULL as cpu3_curve,
+ c.cpu4_curve,
+ c.cpu5_curve,
+ c.cpu6_curve,
+ c.cpu7_curve,
+ c.l3_hit_count,
+ c.l3_miss_count,
+ c.no_static,
+ c.all_cpu_deep_idle,
+ d.dsu_freq as dependent_freq,
+ 255 as dependent_policy
+FROM _interval_intersect!(
+ (
+ _ii_subquery!(_cpu_curves),
+ _ii_subquery!(linux_devfreq_dsu_counter)
+ ),
+ ()
+) ii
+JOIN _cpu_curves AS c ON c._auto_id = id_0
+JOIN linux_devfreq_dsu_counter AS d on d._auto_id = id_1;
+
diff --git a/src/trace_processor/perfetto_sql/stdlib/wattson/device_infos.sql b/src/trace_processor/perfetto_sql/stdlib/wattson/device_infos.sql
index b9a66a2..6af3c85 100644
--- a/src/trace_processor/perfetto_sql/stdlib/wattson/device_infos.sql
+++ b/src/trace_processor/perfetto_sql/stdlib/wattson/device_infos.sql
@@ -98,7 +98,9 @@
("Tensor G4", 4, 4),
("Tensor G4", 5, 4),
("Tensor G4", 6, 4),
- ("Tensor G4", 7, 7)
+ ("Tensor G4", 7, 7),
+ -- need 255 policy to match devfreq
+ ("Tensor G4", 255, 255)
)
select * from data;
@@ -140,3 +142,26 @@
FROM _device_min_volt_vote as vote_tbl
JOIN _wattson_device as device
WHERE vote_tbl.device = device.name;
+
+-- Devices that require using devfreq
+CREATE PERFETTO TABLE _use_devfreq
+AS
+WITH data(device) AS (
+ VALUES
+ ("Tensor G4")
+)
+select * from data;
+
+-- Creates non-empty table if device needs devfreq
+CREATE PERFETTO TABLE _use_devfreq_for_calc AS
+SELECT TRUE AS devfreq_necessary
+FROM _use_devfreq as d
+JOIN _wattson_device as device
+ON d.device = device.name;
+
+-- Creates empty table if device needs devfreq; inverse of _use_devfreq_for_calc
+CREATE PERFETTO TABLE _skip_devfreq_for_calc AS
+SELECT FALSE AS devfreq_necessary
+FROM _use_devfreq as d
+JOIN _wattson_device as device
+ON d.device != device.name;
diff --git a/test/trace_processor/diff_tests/stdlib/wattson/tests.py b/test/trace_processor/diff_tests/stdlib/wattson/tests.py
index b758b96..663b5aa 100644
--- a/test/trace_processor/diff_tests/stdlib/wattson/tests.py
+++ b/test/trace_processor/diff_tests/stdlib/wattson/tests.py
@@ -342,3 +342,71 @@
2.434878,172,172
2.256320,414,414
"""))
+
+ # Tests that DSU devfreq calculations are merged correctly
+ def test_wattson_dsu_devfreq(self):
+ return DiffTestBlueprint(
+ trace=DataPath('wattson_tk4_pcmark.pb'),
+ query=("""
+ INCLUDE PERFETTO MODULE wattson.curves.w_dsu_dependence;
+ SELECT * FROM _cpu_curves
+ WHERE ts > 4108586775197
+ LIMIT 20
+ """),
+ out=Csv("""
+ "ts","dur","freq_0","idle_0","freq_1","idle_1","freq_2","idle_2","freq_3","idle_3","cpu4_curve","cpu5_curve","cpu6_curve","cpu7_curve","l3_hit_count","l3_miss_count","no_static","all_cpu_deep_idle"
+ 4108586789603,35685,1950000,0,1950000,-1,1950000,-1,1950000,-1,674.240000,674.240000,674.240000,3327.560000,14718,5837,-1,-1
+ 4108586825288,30843,1950000,-1,1950000,-1,1950000,-1,1950000,-1,674.240000,674.240000,674.240000,3327.560000,12721,5045,-1,-1
+ 4108586856131,13387,1950000,-1,1950000,-1,1950000,-1,1950000,-1,674.240000,674.240000,674.240000,99.470000,5521,2189,-1,-1
+ 4108586869518,22542,1950000,-1,1950000,-1,1950000,-1,1950000,-1,674.240000,674.240000,674.240000,3327.560000,9297,3687,-1,-1
+ 4108586892060,2482,1950000,-1,1950000,-1,1950000,-1,1950000,0,674.240000,674.240000,674.240000,3327.560000,1023,406,-1,-1
+ 4108586894542,68563,1950000,-1,1950000,-1,1950000,-1,1950000,-1,674.240000,674.240000,674.240000,3327.560000,28279,11216,-1,-1
+ 4108586963105,59652,1950000,-1,1950000,-1,1950000,-1,1950000,0,674.240000,674.240000,674.240000,3327.560000,24603,9758,-1,-1
+ 4108587022757,3743,1950000,0,1950000,-1,1950000,-1,1950000,0,674.240000,674.240000,674.240000,3327.560000,1543,612,-1,-1
+ 4108587026500,15992,1950000,-1,1950000,-1,1950000,-1,1950000,0,674.240000,674.240000,674.240000,3327.560000,6595,2616,-1,-1
+ 4108587042492,15625,1950000,-1,1950000,-1,1950000,-1,1950000,0,674.240000,674.240000,674.240000,99.470000,6444,2556,-1,-1
+ 4108587058117,8138,1950000,-1,1950000,-1,1950000,-1,1950000,0,674.240000,674.240000,674.240000,3327.560000,3356,1331,-1,-1
+ 4108587066255,80566,1950000,-1,1950000,-1,1950000,-1,1950000,-1,674.240000,674.240000,674.240000,3327.560000,33229,13179,-1,-1
+ 4108587146821,19572,1950000,-1,1950000,-1,1950000,-1,1950000,-1,674.240000,674.240000,674.240000,99.470000,8072,3201,-1,-1
+ 4108587166393,219116,1950000,-1,1950000,-1,1950000,-1,1950000,-1,674.240000,674.240000,674.240000,3327.560000,90375,35845,-1,-1
+ 4108587385509,81991,1950000,-1,1950000,0,1950000,-1,1950000,-1,674.240000,674.240000,674.240000,3327.560000,33817,13413,-1,-1
+ 4108587467500,90413,1950000,-1,1950000,0,1950000,0,1950000,-1,674.240000,674.240000,674.240000,3327.560000,37291,14790,-1,-1
+ 4108587557913,92896,1950000,0,1950000,0,1950000,0,1950000,-1,674.240000,674.240000,674.240000,3327.560000,38315,15196,-1,-1
+ 4108587650809,95296,1950000,-1,1950000,0,1950000,0,1950000,-1,674.240000,674.240000,674.240000,3327.560000,39305,15589,-1,-1
+ 4108587746105,12451,1950000,0,1950000,0,1950000,0,1950000,-1,674.240000,674.240000,674.240000,3327.560000,5135,2036,-1,-1
+ 4108587758556,28524,1950000,0,1950000,0,1950000,-1,1950000,-1,674.240000,674.240000,674.240000,3327.560000,11764,4666,-1,-1
+ """))
+
+ # Tests that DSU devfreq calculations are merged correctly
+ def test_wattson_dsu_devfreq(self):
+ return DiffTestBlueprint(
+ trace=DataPath('wattson_tk4_pcmark.pb'),
+ query=("""
+ INCLUDE PERFETTO MODULE wattson.curves.estimates;
+ SELECT * FROM _system_state_mw
+ WHERE ts > 4108586775197
+ LIMIT 20
+ """),
+ out=Csv("""
+ "ts","dur","cpu0_mw","cpu1_mw","cpu2_mw","cpu3_mw","cpu4_mw","cpu5_mw","cpu6_mw","cpu7_mw","dsu_scu_mw"
+ 4108586789603,35685,2.670000,205.600000,205.600000,205.600000,674.240000,674.240000,674.240000,3327.560000,1166.695271
+ 4108586825288,30843,205.600000,205.600000,205.600000,205.600000,674.240000,674.240000,674.240000,3327.560000,1166.698554
+ 4108586856131,13387,205.600000,205.600000,205.600000,205.600000,674.240000,674.240000,674.240000,99.470000,1166.545753
+ 4108586869518,22542,205.600000,205.600000,205.600000,205.600000,674.240000,674.240000,674.240000,3327.560000,1166.655587
+ 4108586892060,2482,205.600000,205.600000,205.600000,2.670000,674.240000,674.240000,674.240000,3327.560000,1166.164641
+ 4108586894542,68563,205.600000,205.600000,205.600000,205.600000,674.240000,674.240000,674.240000,3327.560000,1166.746124
+ 4108586963105,59652,205.600000,205.600000,205.600000,2.670000,674.240000,674.240000,674.240000,3327.560000,1166.716706
+ 4108587022757,3743,2.670000,205.600000,205.600000,2.670000,674.240000,674.240000,674.240000,3327.560000,1166.170321
+ 4108587026500,15992,205.600000,205.600000,205.600000,2.670000,674.240000,674.240000,674.240000,3327.560000,1166.620056
+ 4108587042492,15625,205.600000,205.600000,205.600000,2.670000,674.240000,674.240000,674.240000,99.470000,1166.668234
+ 4108587058117,8138,205.600000,205.600000,205.600000,2.670000,674.240000,674.240000,674.240000,3327.560000,1166.555033
+ 4108587066255,80566,205.600000,205.600000,205.600000,205.600000,674.240000,674.240000,674.240000,3327.560000,1166.717766
+ 4108587146821,19572,205.600000,205.600000,205.600000,205.600000,674.240000,674.240000,674.240000,99.470000,1166.626795
+ 4108587166393,219116,205.600000,205.600000,205.600000,205.600000,674.240000,674.240000,674.240000,3327.560000,1166.750356
+ 4108587385509,81991,205.600000,2.670000,205.600000,205.600000,674.240000,674.240000,674.240000,3327.560000,1166.743880
+ 4108587467500,90413,205.600000,2.670000,2.670000,205.600000,674.240000,674.240000,674.240000,3327.560000,1166.736713
+ 4108587557913,92896,2.670000,2.670000,2.670000,205.600000,674.240000,674.240000,674.240000,3327.560000,1166.730805
+ 4108587650809,95296,205.600000,2.670000,2.670000,205.600000,674.240000,674.240000,674.240000,3327.560000,1166.740927
+ 4108587746105,12451,2.670000,2.670000,2.670000,205.600000,674.240000,674.240000,674.240000,3327.560000,1166.556475
+ 4108587758556,28524,2.670000,2.670000,205.600000,205.600000,674.240000,674.240000,674.240000,3327.560000,1166.680924
+ """))