blob: 25c64cb3dd1799fcf9a694621001f29ad905a9a4 [file] [log] [blame]
// Copyright (C) 2024 The Android Open Source Project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
import {assertUnreachable} from '../base/logging';
import {getOrCreate} from '../base/utils';
import {ColumnType, SqlValue} from './query_result';
/**
* A dataset defines a set of rows in TraceProcessor and a schema of the
* resultant columns. Dataset implementations describe how to get the data in
* different ways - e.g. 'source' datasets define a dataset as a table name (or
* select statement) + filters, whereas a 'union' dataset defines a dataset as
* the union of other datasets.
*
* The idea is that users can build arbitrarily complex trees of datasets, then
* at any point call `optimize()` to create the smallest possible tree that
* represents the same dataset, and `query()` which produces a select statement
* for the resultant dataset.
*
* Users can also use the `schema` property and `implements()` to get and test
* the schema of a given dataset.
*/
export interface Dataset {
/**
* Get or calculate the resultant schema of this dataset.
*/
readonly schema: DatasetSchema;
/**
* Produce a query for this dataset.
*
* @param schema - The schema to use for extracting columns - if undefined,
* the most specific possible schema is evaluated from the dataset first and
* used instead.
*/
query(schema?: DatasetSchema): string;
/**
* Optimizes a dataset into the smallest possible expression.
*
* For example by combining elements of union data sets that have the same src
* and similar filters into a single set.
*
* For example, the following 'union' dataset...
*
* ```
* {
* union: [
* {
* src: 'foo',
* schema: {
* 'a': NUM,
* 'b': NUM,
* },
* filter: {col: 'a', eq: 1},
* },
* {
* src: 'foo',
* schema: {
* 'a': NUM,
* 'b': NUM,
* },
* filter: {col: 'a', eq: 2},
* },
* ]
* }
* ```
*
* ...will be combined into a single 'source' dataset...
*
* ```
* {
* src: 'foo',
* schema: {
* 'a': NUM,
* 'b': NUM,
* },
* filter: {col: 'a', in: [1, 2]},
* },
* ```
*/
optimize(): Dataset;
/**
* Returns true if this dataset implements a given schema.
*
* @param schema - The schema to test against.
*/
implements(schema: DatasetSchema): boolean;
}
/**
* Defines a list of columns and types that define the shape of the data
* represented by a dataset.
*/
export type DatasetSchema = Record<string, ColumnType>;
/**
* A filter used to express that a column must equal a value.
*/
interface EqFilter {
readonly col: string;
readonly eq: SqlValue;
}
/**
* A filter used to express that column must be one of a set of values.
*/
interface InFilter {
readonly col: string;
readonly in: ReadonlyArray<SqlValue>;
}
/**
* Union of all filter types.
*/
type Filter = EqFilter | InFilter;
/**
* Named arguments for a SourceDataset.
*/
interface SourceDatasetConfig {
readonly src: string;
readonly schema: DatasetSchema;
readonly filter?: Filter;
}
/**
* Defines a dataset with a source SQL select statement of table name, a
* schema describing the columns, and an optional filter.
*/
export class SourceDataset implements Dataset {
readonly src: string;
readonly schema: DatasetSchema;
readonly filter?: Filter;
constructor(config: SourceDatasetConfig) {
this.src = config.src;
this.schema = config.schema;
this.filter = config.filter;
}
query(schema?: DatasetSchema) {
schema = schema ?? this.schema;
const cols = Object.keys(schema);
const whereClause = this.filterToQuery();
return `select ${cols.join(', ')} from (${this.src}) ${whereClause}`.trim();
}
optimize() {
// Cannot optimize SourceDataset
return this;
}
implements(schema: DatasetSchema) {
return Object.entries(schema).every(([name, kind]) => {
return name in this.schema && this.schema[name] === kind;
});
}
private filterToQuery() {
const filter = this.filter;
if (filter === undefined) {
return '';
}
if ('eq' in filter) {
return `where ${filter.col} = ${filter.eq}`;
} else if ('in' in filter) {
return `where ${filter.col} in (${filter.in.join(',')})`;
} else {
assertUnreachable(filter);
}
}
}
/**
* A dataset that represents the union of multiple datasets.
*/
export class UnionDataset implements Dataset {
constructor(readonly union: ReadonlyArray<Dataset>) {}
get schema(): DatasetSchema {
// Find the minimal set of columns that are supported by all datasets of
// the union
let sch: Record<string, ColumnType> | undefined = undefined;
this.union.forEach((ds) => {
const dsSchema = ds.schema;
if (sch === undefined) {
// First time just use this one
sch = dsSchema;
} else {
const newSch: Record<string, ColumnType> = {};
for (const [key, kind] of Object.entries(sch)) {
if (key in dsSchema && dsSchema[key] === kind) {
newSch[key] = kind;
}
}
sch = newSch;
}
});
return sch ?? {};
}
query(schema?: DatasetSchema): string {
schema = schema ?? this.schema;
return this.union
.map((dataset) => dataset.query(schema))
.join(' union all ');
}
optimize(): Dataset {
// Recursively optimize each dataset of this union
const optimizedUnion = this.union.map((ds) => ds.optimize());
// Find all source datasets and combine then based on src
const combinedSrcSets = new Map<string, SourceDataset[]>();
const otherDatasets: Dataset[] = [];
for (const e of optimizedUnion) {
if (e instanceof SourceDataset) {
const set = getOrCreate(combinedSrcSets, e.src, () => []);
set.push(e);
} else {
otherDatasets.push(e);
}
}
const mergedSrcSets = Array.from(combinedSrcSets.values()).map(
(srcGroup) => {
if (srcGroup.length === 1) return srcGroup[0];
// Combine schema across all members in the union
const combinedSchema = srcGroup.reduce((acc, e) => {
Object.assign(acc, e.schema);
return acc;
}, {} as DatasetSchema);
// Merge filters for the same src
const inFilters: InFilter[] = [];
for (const {filter} of srcGroup) {
if (filter) {
if ('eq' in filter) {
inFilters.push({col: filter.col, in: [filter.eq]});
} else {
inFilters.push(filter);
}
}
}
const mergedFilter = mergeFilters(inFilters);
return new SourceDataset({
src: srcGroup[0].src,
schema: combinedSchema,
filter: mergedFilter,
});
},
);
const finalUnion = [...mergedSrcSets, ...otherDatasets];
if (finalUnion.length === 1) {
return finalUnion[0];
} else {
return new UnionDataset(finalUnion);
}
}
implements(schema: DatasetSchema) {
return Object.entries(schema).every(([name, kind]) => {
return name in this.schema && this.schema[name] === kind;
});
}
}
function mergeFilters(filters: InFilter[]): InFilter | undefined {
if (filters.length === 0) return undefined;
const col = filters[0].col;
const values = new Set(filters.flatMap((filter) => filter.in));
return {col, in: Array.from(values)};
}