blob: 422f1e0d10b737108866d958d579ebff37bab2d2 [file]
// Copyright (C) 2026 The Android Open Source Project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// Summary table for the GPU Compute tab.
//
// Shows a sortable, double-click-navigable list of every compute kernel
// launch in the trace. Each row contains the kernel's demangled name,
// duration, compute/memory throughput, register count, and grid size
// rendered as relative percent-bars so hot kernels stand out visually.
import m from 'mithril';
import type {Engine} from '../../trace_processor/engine';
import {
renderPercentBar,
formatNumber,
COMPUTE_RENDER_STAGE_CATEGORY,
} from './details';
import {Icons} from '../../base/semantic_icons';
import {Button} from '../../widgets/button';
import {Icon} from '../../widgets/icon';
import type {GpuComputeContext} from './index';
import {adjustSeconds} from './humanize';
// Per-kernel row returned by {@link fetchKernelSummaryRows}.
export type SummaryRow = {
id: number;
demangledName: string;
durationNSecNum: number | string | null;
computePct: number | string | null;
memoryPct: number | string | null;
registersPerThread: number | string | null;
gridSize: number | string | null;
};
const PAGE_SIZE = 100;
// Component state holding the fetched rows and per-column max values.
type SummaryState = {
rows?: SummaryRow[];
maxDurationNSec?: number;
maxComputePct?: number;
maxMemoryPct?: number;
maxRegisters?: number;
maxGridSize?: number;
launchIndexBySliceId: Map<number, number>;
sortKey: SortKey | null;
sortDescending: boolean;
pageOffset: number;
};
// Renders a bar whose width is proportional to `val / max`.
// Falls back to `—` when the value is non-finite or missing.
const renderRelPercentBar = (
val?: number,
max?: number,
label?: string,
): m.Children => {
const hasLabel = typeof label === 'string' && label.trim() !== '';
const curVal = Number(val);
const maxVal = Number(max);
if (!Number.isFinite(curVal) || !Number.isFinite(maxVal) || maxVal <= 0) {
return hasLabel ? renderPercentBar(0, null, false, label) : '—';
}
const clamped = Math.max(0, Math.min(curVal, maxVal));
const pct = Math.max(0, Math.min(100, (clamped / maxVal) * 100));
return renderPercentBar(pct, null, false, label ?? '');
};
// =============================================================================
// Data fetching
// =============================================================================
// Fetches one summary row per compute kernel launch.
//
// Uses a CTE to pre-join the three counter metrics (duration, compute
// throughput, memory throughput) in a single pass, then extracts
// launch-arg columns from the slice's arg set.
export async function fetchKernelSummaryRows(
ctx: GpuComputeContext,
engine: Engine,
): Promise<SummaryRow[]> {
const durationNames = ctx.sectionRegistry.getWellKnownMetricIds('duration');
const computeNames =
ctx.sectionRegistry.getWellKnownMetricIds('compute_throughput');
const memoryNames =
ctx.sectionRegistry.getWellKnownMetricIds('memory_throughput');
const allNames = [...durationNames, ...computeNames, ...memoryNames];
// Build the IN clause for counter name filtering.
const inClause = allNames.map((n) => `'${n}'`).join(', ');
// Build COALESCE expression that picks the first available metric per role.
const coalesceExpr = (names: string[]): string => {
if (names.length === 0) return 'NULL';
const parts = names.map(
(n) =>
`(SELECT cv.value FROM counter_vals cv WHERE cv.slice_id = cs.id AND cv.metric_name = '${n}' LIMIT 1)`,
);
return names.length === 1 ? parts[0] : `COALESCE(${parts.join(', ')})`;
};
const sql = `
WITH compute_slices AS (
SELECT s.id, s.ts, s.dur, s.arg_set_id, s.name
FROM gpu_slice s
INNER JOIN gpu_track tr ON tr.id = s.track_id
WHERE s.render_stage_category = ${COMPUTE_RENDER_STAGE_CATEGORY}
),
counter_vals AS (
SELECT
cs.id AS slice_id,
tc.name AS metric_name,
c.value
FROM compute_slices cs
INNER JOIN counter c ON c.ts >= cs.ts AND (cs.dur IS NULL OR c.ts < cs.ts + cs.dur)
INNER JOIN gpu_counter_track tc ON tc.id = c.track_id
AND tc.name IN (${inClause})
WHERE c.value <> 0
)
SELECT
cs.id AS id,
COALESCE(
EXTRACT_ARG(cs.arg_set_id, 'kernel_demangled_name'),
EXTRACT_ARG(cs.arg_set_id, 'kernel_name'),
cs.name
) AS demangledName,
CAST(EXTRACT_ARG(cs.arg_set_id, 'registers_per_thread') AS REAL) AS registers_per_thread,
CAST(EXTRACT_ARG(cs.arg_set_id, 'launch.grid_size.x') AS REAL)
* CAST(COALESCE(EXTRACT_ARG(cs.arg_set_id, 'launch.grid_size.y'), 1) AS REAL)
* CAST(COALESCE(EXTRACT_ARG(cs.arg_set_id, 'launch.grid_size.z'), 1) AS REAL) AS grid_size,
COALESCE(
${coalesceExpr(durationNames)},
CAST(cs.dur AS REAL)
) AS durationNSecNum,
${coalesceExpr(computeNames)} AS computePct,
${coalesceExpr(memoryNames)} AS memoryPct
FROM compute_slices cs
ORDER BY cs.ts ASC;
`;
const result = await engine.query(sql);
const iter = result.iter({});
const list: SummaryRow[] = [];
while (iter.valid()) {
list.push({
id: Number(iter.get('id')),
demangledName: String(iter.get('demangledName') ?? ''),
durationNSecNum: (iter.get('durationNSecNum') as number | null) ?? null,
computePct: (iter.get('computePct') as number | string | null) ?? null,
memoryPct: (iter.get('memoryPct') as number | string | null) ?? null,
registersPerThread:
(iter.get('registers_per_thread') as number | string | null) ?? null,
gridSize: (iter.get('grid_size') as number | string | null) ?? null,
});
iter.next();
}
return list;
}
// =============================================================================
// Summary section component
// =============================================================================
// Attrs accepted by {@link KernelSummarySection}.
export interface SummarySectionAttrs extends m.Attributes {
ctx: GpuComputeContext;
engine: Engine;
sliceId?: number;
openSliceInDetail?: (sliceId: number) => void;
prefetchedRows?: SummaryRow[];
}
// Column keys that the table can be sorted by.
type SortKey =
| 'id'
| 'name'
| 'duration'
| 'compute'
| 'memory'
| 'registers'
| 'grid_size';
// Returns the sortable primitive for `key` from a row.
function getSortableValue(
r: SummaryRow,
key: SortKey,
launchIndex: Map<number, number>,
): number | string | undefined {
switch (key) {
case 'id':
return launchIndex.get(r.id) ?? r.id;
case 'name':
return r.demangledName ?? '';
case 'duration':
return Number(r.durationNSecNum);
case 'compute':
return Number(r.computePct);
case 'memory':
return Number(r.memoryPct);
case 'registers':
return Number(r.registersPerThread);
case 'grid_size':
return Number(r.gridSize);
}
}
// Three-way comparator for summary rows.
//
// Non-finite / missing values are pushed to the edge of the sort
// (bottom for descending, top for ascending) so real data stays grouped.
function compare(
a: SummaryRow,
b: SummaryRow,
key: SortKey,
descending: boolean,
launchIndex: Map<number, number>,
): number {
const aVal = getSortableValue(a, key, launchIndex);
const bVal = getSortableValue(b, key, launchIndex);
const isANum = typeof aVal === 'number' && Number.isFinite(aVal);
const isBNum = typeof bVal === 'number' && Number.isFinite(bVal);
// Push non-values to the edge so real data stays grouped
const aUndef = aVal == null || (typeof aVal === 'number' && !isANum);
const bUndef = bVal == null || (typeof bVal === 'number' && !isBNum);
if (aUndef !== bUndef) {
return (aUndef ? 1 : -1) * (descending ? 1 : -1);
}
// Numeric comparison
if (isANum && isBNum) {
const delta = Number(aVal) - Number(bVal);
return descending ? -Math.sign(delta) : Math.sign(delta);
}
// String comparison fallback
const aStr = String(aVal ?? '');
const bStr = String(bVal ?? '');
return descending ? -aStr.localeCompare(bStr) : aStr.localeCompare(bStr);
}
// Mithril component that renders the summary table.
//
// On init it fetches all kernel launches via {@link fetchKernelSummaryRows},
// computes per-column max values for the relative bars, and renders a
// sortable `<table>` whose rows can be double-clicked to navigate to
// the kernel's detail view.
export const KernelSummarySection: m.Component<
SummarySectionAttrs,
SummaryState
> = {
async oninit({attrs, state}) {
state.launchIndexBySliceId = new Map();
state.sortKey = 'id';
state.sortDescending = false;
state.pageOffset = 0;
const rows =
attrs.prefetchedRows ??
(await fetchKernelSummaryRows(attrs.ctx, attrs.engine));
// Build launch-order map so the ID column shows 0, 1, 2, …
rows.forEach((opt, zeroBasedIndex) =>
state.launchIndexBySliceId.set(opt.id, zeroBasedIndex),
);
// Initial sort by launch order (ascending)
rows.sort((a, b) => a.id - b.id);
// Per-column max values drive the relative percent-bar widths
const finiteMax = (arr: Array<number | null | undefined>) => {
const nums = arr
.map(Number)
.filter((x) => Number.isFinite(x)) as number[];
if (nums.length === 0) {
return undefined;
}
return Math.max(...nums);
};
state.rows = rows;
state.maxDurationNSec = finiteMax(
rows.map((r) => Number(r.durationNSecNum)),
);
state.maxComputePct = finiteMax(rows.map((r) => Number(r.computePct)));
state.maxMemoryPct = finiteMax(rows.map((r) => Number(r.memoryPct)));
state.maxRegisters = finiteMax(
rows.map((r) => Number(r.registersPerThread)),
);
state.maxGridSize = finiteMax(rows.map((r) => Number(r.gridSize)));
},
view({state, attrs}) {
const terminology = attrs.ctx.terminologyRegistry.get(
attrs.ctx.terminologyId,
);
const rows = state.rows ?? [];
// Formats a raw metric value into a display label with optional unit.
const label = (
val: number | string | null | undefined,
unit?: string,
): string => {
if (val == null || val === 'null' || val === 'undefined') {
return '—';
}
// Humanize seconds when enabled
if (unit === 'nsecond' && Number.isFinite(Number(val))) {
if (attrs.ctx.humanizeMetrics) {
const {value: v, unit: u} = adjustSeconds(Number(val) / 1e9);
return `${formatNumber(v)} ${u}`;
}
return `${formatNumber(Number(val))} nsecond`;
}
const text = Number.isFinite(Number(val))
? String(formatNumber(Number(val)))
: String(val);
return unit ? `${text} ${unit}` : text;
};
// Sort rows immutably for rendering
const {sortKey, sortDescending, launchIndexBySliceId} = state;
const sortedRows = (() => {
if (!sortKey) return rows;
const copy = rows.slice();
copy.sort((a, b) =>
compare(a, b, sortKey, sortDescending, launchIndexBySliceId),
);
return copy;
})();
// Cycle sort direction on header click
const onSort = (key: SortKey) => {
if (state.sortKey === key) {
state.sortDescending = !state.sortDescending;
} else {
state.sortKey = key;
state.sortDescending = true;
}
state.pageOffset = 0;
};
// Up/down arrow indicator for the active sort column
const arrowIconFor = (key: SortKey) => {
if (state.sortKey !== key) return null;
const icon = state.sortDescending ? 'expand_more' : 'expand_less';
return m(
'i',
{class: 'pf-icon pf-left-icon', style: 'margin-left:6px;'},
icon,
);
};
const headerCell = (text: string, key: SortKey) =>
m(
'th.pf-gpu-compute__summary-th',
{
onclick: () => onSort(key),
title: 'Sort',
},
[text, arrowIconFor(key)],
);
return m(
'.pf-gpu-compute',
m('table.pf-gpu-compute__summary-table', [
m('caption.pf-gpu-compute__summary-caption', [
m('.pf-gpu-compute__summary-caption-row', [
m(Icon, {
icon: Icons.Help,
title: 'About this table',
style: 'font-size:16px;',
}),
m(
'span',
'This table shows all results in the report. Use the column headers to sort the results in this report. Double-Click a result to see detailed metrics.',
),
]),
]),
m('colgroup', [
m('col', {style: 'width:5%'}),
m('col', {style: 'width:25%'}),
m('col', {style: 'width:14%'}),
m('col', {style: 'width:14%'}),
m('col', {style: 'width:14%'}),
m('col', {style: 'width:14%'}),
m('col', {style: 'width:14%'}),
]),
m(
'thead',
m('tr.pf-gpu-compute__summary-thead-row', [
headerCell('ID', 'id'),
headerCell('Demangled Name', 'name'),
headerCell('Duration', 'duration'),
headerCell('Compute Throughput', 'compute'),
headerCell('Memory Throughput', 'memory'),
headerCell('# Registers', 'registers'),
headerCell(`${terminology.grid.title} Size`, 'grid_size'),
]),
),
m(
'tbody',
sortedRows
.slice(state.pageOffset, state.pageOffset + PAGE_SIZE)
.map((r) =>
m(
'tr.pf-gpu-compute__summary-row',
{
ondblclick: () => attrs.openSliceInDetail?.(r.id),
},
[
m(
'td.pf-gpu-compute__summary-td',
String(launchIndexBySliceId.get(r.id) ?? r.id),
),
m(
'td.pf-gpu-compute__summary-td.pf-gpu-compute__summary-td--name',
{title: r.demangledName},
r.demangledName,
),
m(
'td.pf-gpu-compute__summary-td',
renderRelPercentBar(
Number(r.durationNSecNum),
state.maxDurationNSec,
label(Number(r.durationNSecNum), 'nsecond'),
),
),
m(
'td.pf-gpu-compute__summary-td',
renderRelPercentBar(
Number(r.computePct),
state.maxComputePct,
label(r.computePct),
),
),
m(
'td.pf-gpu-compute__summary-td',
renderRelPercentBar(
Number(r.memoryPct),
state.maxMemoryPct,
label(r.memoryPct),
),
),
m(
'td.pf-gpu-compute__summary-td',
renderRelPercentBar(
Number(r.registersPerThread),
state.maxRegisters,
label(r.registersPerThread),
),
),
m(
'td.pf-gpu-compute__summary-td',
renderRelPercentBar(
Number(r.gridSize),
state.maxGridSize,
label(r.gridSize),
),
),
],
),
),
),
]),
sortedRows.length > PAGE_SIZE &&
m('.pf-gpu-compute__summary-pagination', [
m(Button, {
icon: Icons.PrevPage,
disabled: state.pageOffset === 0,
onclick: () => {
state.pageOffset = Math.max(0, state.pageOffset - PAGE_SIZE);
},
}),
m(
'span',
`${state.pageOffset + 1}–${Math.min(state.pageOffset + PAGE_SIZE, sortedRows.length)} of ${sortedRows.length}`,
),
m(Button, {
icon: Icons.NextPage,
disabled: state.pageOffset + PAGE_SIZE >= sortedRows.length,
onclick: () => {
state.pageOffset = Math.min(
state.pageOffset + PAGE_SIZE,
sortedRows.length - PAGE_SIZE,
);
},
}),
]),
);
},
};