| // Copyright (C) 2024 The Android Open Source Project |
| // |
| // Licensed under the Apache License, Version 2.0 (the "License"); |
| // you may not use this file except in compliance with the License. |
| // You may obtain a copy of the License at |
| // |
| // http://www.apache.org/licenses/LICENSE-2.0 |
| // |
| // Unless required by applicable law or agreed to in writing, software |
| // distributed under the License is distributed on an "AS IS" BASIS, |
| // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| // See the License for the specific language governing permissions and |
| // limitations under the License. |
| |
| import {UnionDataset, SourceDataset, UnionDatasetWithLineage} from './dataset'; |
| import { |
| BLOB, |
| BLOB_NULL, |
| LONG, |
| LONG_NULL, |
| NUM, |
| NUM_NULL, |
| STR, |
| STR_NULL, |
| UNKNOWN, |
| } from './query_result'; |
| |
| test('get query for simple dataset', () => { |
| const dataset = new SourceDataset({ |
| src: 'slice', |
| schema: {id: NUM}, |
| }); |
| |
| expect(dataset.query()).toEqual(`SELECT |
| id |
| FROM (slice)`); |
| }); |
| |
| test("get query for simple dataset with 'eq' filter", () => { |
| const dataset = new SourceDataset({ |
| src: 'slice', |
| schema: {id: NUM}, |
| filter: { |
| col: 'id', |
| eq: 123, |
| }, |
| }); |
| |
| expect(dataset.query()).toEqual(`SELECT |
| id |
| FROM (slice) |
| WHERE id = 123`); |
| }); |
| |
| test("get query for simple dataset with an 'in' filter", () => { |
| const dataset = new SourceDataset({ |
| src: 'slice', |
| schema: {id: NUM}, |
| filter: { |
| col: 'id', |
| in: [123, 456], |
| }, |
| }); |
| |
| expect(dataset.query()).toEqual(`SELECT |
| id |
| FROM (slice) |
| WHERE id IN (123, 456)`); |
| }); |
| |
| test('get query with column mapping', () => { |
| const dataset = new SourceDataset({ |
| src: 'slice', |
| schema: {id: NUM, name: STR}, |
| select: {id: 'id', name: 'slice_name'}, |
| }); |
| |
| expect(dataset.query()).toEqual(`SELECT |
| id, |
| slice_name AS name |
| FROM (slice)`); |
| }); |
| |
| test('get query with partial column mapping', () => { |
| const dataset = new SourceDataset({ |
| src: 'slice', |
| schema: {id: NUM, name: STR, dur: LONG}, |
| select: {id: 'id', name: 'slice_name', dur: 'dur'}, |
| }); |
| |
| // Only 'name' is mapped, 'id' and 'dur' use their original names |
| expect(dataset.query()).toEqual(`SELECT |
| id, |
| slice_name AS name, |
| dur |
| FROM (slice)`); |
| }); |
| |
| test('get query with column mapping and filter', () => { |
| const dataset = new SourceDataset({ |
| src: 'slice', |
| schema: {id: NUM, name: STR}, |
| select: {id: 'slice_id', name: 'slice_name'}, |
| filter: { |
| col: 'id', |
| eq: 123, |
| }, |
| }); |
| |
| expect(dataset.query()).toEqual(`SELECT |
| slice_id AS id, |
| slice_name AS name |
| FROM (slice) |
| WHERE id = 123`); |
| }); |
| |
| test('get query with single join', () => { |
| const dataset = new SourceDataset({ |
| src: 'slice', |
| schema: {id: NUM, name: STR, tid: NUM}, |
| joins: { |
| thread: {from: 'thread USING (utid)'}, |
| }, |
| }); |
| |
| expect(dataset.query()).toEqual(`SELECT |
| id, |
| name, |
| tid |
| FROM (slice) JOIN thread AS thread USING (utid)`); |
| }); |
| |
| test('get query with multiple joins', () => { |
| const dataset = new SourceDataset({ |
| src: 'slice', |
| schema: {id: NUM, name: STR, process_name: STR}, |
| joins: { |
| thread: {from: 'thread USING (utid)'}, |
| process: {from: 'process USING (upid)'}, |
| }, |
| }); |
| |
| expect(dataset.query()).toEqual(`SELECT |
| id, |
| name, |
| process_name |
| FROM (slice) JOIN thread AS thread USING (utid) JOIN process AS process USING (upid)`); |
| }); |
| |
| test('get query with joins and filter', () => { |
| const dataset = new SourceDataset({ |
| src: 'slice', |
| schema: {id: NUM, name: STR}, |
| joins: { |
| thread: {from: 'thread USING (utid)'}, |
| }, |
| filter: { |
| col: 'id', |
| eq: 123, |
| }, |
| }); |
| |
| expect(dataset.query()).toEqual(`SELECT |
| id, |
| name |
| FROM (slice) JOIN thread AS thread USING (utid) |
| WHERE id = 123`); |
| }); |
| |
| test('get query with joins, column mapping, and filter', () => { |
| const dataset = new SourceDataset({ |
| src: 'slice', |
| schema: {id: NUM, thread_name: STR}, |
| select: {id: 'slice_id', thread_name: 'thread.name'}, |
| joins: { |
| thread: {from: 'thread USING (utid)'}, |
| }, |
| filter: { |
| col: 'id', |
| in: [123, 456], |
| }, |
| }); |
| |
| expect(dataset.query()).toEqual(`SELECT |
| slice_id AS id, |
| thread.name AS thread_name |
| FROM (slice) JOIN thread AS thread USING (utid) |
| WHERE id IN (123, 456)`); |
| }); |
| |
| test('get query with select using object format', () => { |
| const dataset = new SourceDataset({ |
| src: 'slice', |
| schema: {id: NUM, thread_name: STR}, |
| select: { |
| id: 'id', |
| thread_name: {expr: 'thread.name', join: 'thread'}, |
| }, |
| joins: { |
| thread: {from: 'thread USING (utid)'}, |
| }, |
| }); |
| |
| expect(dataset.query()).toEqual(`SELECT |
| id, |
| thread.name AS thread_name |
| FROM (slice) JOIN thread AS thread USING (utid)`); |
| }); |
| |
| test('get query with mixed select formats', () => { |
| const dataset = new SourceDataset({ |
| src: 'slice', |
| schema: {id: NUM, name: STR, thread_name: STR}, |
| select: { |
| id: 'slice_id', |
| name: 'slice.name', |
| thread_name: {expr: 'thread.name', join: 'thread'}, |
| }, |
| joins: { |
| thread: {from: 'thread USING (utid)'}, |
| }, |
| }); |
| |
| expect(dataset.query()).toEqual(`SELECT |
| slice_id AS id, |
| slice.name AS name, |
| thread.name AS thread_name |
| FROM (slice) JOIN thread AS thread USING (utid)`); |
| }); |
| |
| test('unique joins not referenced in select are omitted', () => { |
| const dataset = new SourceDataset({ |
| src: 'slice', |
| schema: {id: NUM, name: STR}, |
| joins: { |
| thread: {from: 'thread USING (utid)', unique: true}, |
| process: {from: 'process USING (upid)', unique: true}, |
| }, |
| }); |
| |
| // Neither join is referenced, so both should be omitted |
| expect(dataset.query()).toEqual(`SELECT |
| id, |
| name |
| FROM (slice)`); |
| }); |
| |
| test('non-unique joins are always included', () => { |
| const dataset = new SourceDataset({ |
| src: 'slice', |
| schema: {id: NUM, name: STR}, |
| joins: { |
| thread: {from: 'thread USING (utid)', unique: false}, |
| process: {from: 'process USING (upid)'}, |
| }, |
| }); |
| |
| // Both joins should be included even though not referenced |
| expect(dataset.query()).toEqual(`SELECT |
| id, |
| name |
| FROM (slice) JOIN thread AS thread USING (utid) JOIN process AS process USING (upid)`); |
| }); |
| |
| test('unique joins referenced in select are included', () => { |
| const dataset = new SourceDataset({ |
| src: 'slice', |
| schema: {id: NUM, thread_name: STR}, |
| select: { |
| id: 'id', |
| thread_name: {expr: 'thread.name', join: 'thread'}, |
| }, |
| joins: { |
| thread: {from: 'thread USING (utid)', unique: true}, |
| process: {from: 'process USING (upid)', unique: true}, |
| }, |
| }); |
| |
| // Only 'thread' join is referenced, so 'process' should be omitted |
| expect(dataset.query()).toEqual(`SELECT |
| id, |
| thread.name AS thread_name |
| FROM (slice) JOIN thread AS thread USING (utid)`); |
| }); |
| |
| test('mixed unique and non-unique joins', () => { |
| const dataset = new SourceDataset({ |
| src: 'slice', |
| schema: {id: NUM, name: STR}, |
| joins: { |
| thread: {from: 'thread USING (utid)', unique: true}, |
| process: {from: 'process USING (upid)', unique: false}, |
| }, |
| }); |
| |
| // 'thread' is unique and not referenced, so omitted |
| // 'process' is not unique, so included |
| expect(dataset.query()).toEqual(`SELECT |
| id, |
| name |
| FROM (slice) JOIN process AS process USING (upid)`); |
| }); |
| |
| test('union query with column elimination', () => { |
| const dataset = UnionDataset.create([ |
| new SourceDataset({ |
| src: 'slice', |
| schema: {id: NUM, name: STR, dur: LONG}, |
| filter: {col: 'id', eq: 123}, |
| }), |
| new SourceDataset({ |
| src: 'slice', |
| schema: {id: NUM, name: STR, dur: LONG}, |
| filter: {col: 'id', eq: 456}, |
| }), |
| ]); |
| |
| // When querying with a subset of columns, only those columns are selected |
| expect(dataset.query({id: NUM, name: STR})).toEqual(`SELECT |
| id, |
| name |
| FROM (slice) |
| WHERE id IN (123, 456)`); |
| }); |
| |
| test('union query with join elimination', () => { |
| const dataset = UnionDataset.create([ |
| new SourceDataset({ |
| src: 'slice', |
| schema: {id: NUM, name: STR, thread_name: STR}, |
| select: { |
| id: 'id', |
| name: 'name', |
| thread_name: {expr: 'thread.name', join: 'thread'}, |
| }, |
| joins: { |
| thread: {from: 'thread USING (utid)', unique: true}, |
| }, |
| filter: {col: 'id', eq: 123}, |
| }), |
| new SourceDataset({ |
| src: 'slice', |
| schema: {id: NUM, name: STR, thread_name: STR}, |
| select: { |
| id: 'id', |
| name: 'name', |
| thread_name: {expr: 'thread.name', join: 'thread'}, |
| }, |
| joins: { |
| thread: {from: 'thread USING (utid)', unique: true}, |
| }, |
| filter: {col: 'id', eq: 456}, |
| }), |
| ]); |
| |
| // When querying without thread_name, the unique thread join should be eliminated |
| expect(dataset.query({id: NUM, name: STR})).toEqual(`SELECT |
| id, |
| name |
| FROM (slice) |
| WHERE id IN (123, 456)`); |
| |
| // When querying with thread_name, the thread join should be included |
| expect(dataset.query({id: NUM, name: STR, thread_name: STR})).toEqual(`SELECT |
| id, |
| name, |
| thread.name AS thread_name |
| FROM (slice) JOIN thread AS thread USING (utid) |
| WHERE id IN (123, 456)`); |
| }); |
| |
| test('get query for union dataset', () => { |
| const dataset = UnionDataset.create([ |
| new SourceDataset({ |
| src: 'slice', |
| schema: {id: NUM}, |
| filter: { |
| col: 'id', |
| eq: 123, |
| }, |
| }), |
| new SourceDataset({ |
| src: 'slice', |
| schema: {id: NUM}, |
| filter: { |
| col: 'id', |
| eq: 456, |
| }, |
| }), |
| ]); |
| |
| // Query automatically optimizes the union into a single source with IN filter |
| expect(dataset.query()).toEqual(`SELECT |
| id |
| FROM (slice) |
| WHERE id IN (123, 456)`); |
| }); |
| |
| test('union dataset batches large numbers of unions', () => { |
| const datasets = []; |
| for (let i = 0; i < 800; i++) { |
| datasets.push( |
| new SourceDataset({ |
| src: 'foo', |
| schema: {bar: NUM}, |
| filter: { |
| col: 'some_id', |
| eq: i, |
| }, |
| }), |
| ); |
| } |
| |
| const query = UnionDataset.create(datasets).query(); |
| |
| // After optimization, all 800 datasets are merged into a single source with |
| // an IN filter, so the query should just be a simple SELECT with WHERE IN. |
| expect(query).toContain('SELECT\n bar\nFROM (foo)\nWHERE some_id IN ('); |
| |
| // The IN clause should contain all 800 values |
| const inMatch = query.match(/IN \(([\d, ]+)\)/); |
| expect(inMatch).toBeTruthy(); |
| if (inMatch) { |
| const values = inMatch[1].split(','); |
| expect(values.length).toBe(800); |
| } |
| }); |
| |
| test('implements', () => { |
| const dataset = new SourceDataset({ |
| src: 'slice', |
| schema: {id: NUM, ts: LONG}, |
| }); |
| |
| expect(dataset.implements({id: NUM})).toBe(true); |
| expect(dataset.implements({id: NUM, ts: LONG})).toBe(true); |
| expect(dataset.implements({id: NUM, ts: LONG, name: STR})).toBe(false); |
| expect(dataset.implements({id: LONG})).toBe(false); |
| }); |
| |
| test('implements with relaxed compat checks on optional types', () => { |
| expect( |
| new SourceDataset({ |
| src: 'slice', |
| schema: {foo: NUM_NULL, bar: LONG_NULL, baz: STR_NULL, qux: BLOB_NULL}, |
| }).implements({ |
| foo: NUM_NULL, |
| bar: LONG_NULL, |
| baz: STR_NULL, |
| qux: BLOB_NULL, |
| }), |
| ).toBe(true); |
| |
| expect( |
| new SourceDataset({ |
| src: 'slice', |
| schema: {foo: NUM, bar: LONG, baz: STR, qux: BLOB}, |
| }).implements({ |
| foo: NUM_NULL, |
| bar: LONG_NULL, |
| baz: STR_NULL, |
| qux: BLOB_NULL, |
| }), |
| ).toBe(true); |
| |
| expect( |
| new SourceDataset({ |
| src: 'slice', |
| schema: {foo: NUM_NULL, bar: LONG_NULL, baz: STR_NULL, qux: BLOB_NULL}, |
| }).implements({ |
| foo: NUM, |
| bar: LONG, |
| baz: STR, |
| qux: BLOB, |
| }), |
| ).toBe(false); |
| }); |
| |
| test('find the schema of a simple dataset', () => { |
| const dataset = new SourceDataset({ |
| src: 'slice', |
| schema: {id: NUM, ts: LONG}, |
| }); |
| |
| expect(dataset.schema).toMatchObject({id: NUM, ts: LONG}); |
| }); |
| |
| test('find the schema of a union where source sets differ in their names', () => { |
| const dataset = UnionDataset.create([ |
| new SourceDataset({ |
| src: 'slice', |
| schema: {foo: NUM}, |
| }), |
| new SourceDataset({ |
| src: 'slice', |
| schema: {bar: NUM}, |
| }), |
| ]); |
| |
| expect(dataset.schema).toMatchObject({}); |
| }); |
| |
| test('find the schema of a union with differing source sets', () => { |
| const dataset = UnionDataset.create([ |
| new SourceDataset({ |
| src: 'slice', |
| schema: {foo: NUM}, |
| }), |
| new SourceDataset({ |
| src: 'slice', |
| schema: {foo: LONG}, |
| }), |
| ]); |
| |
| expect(dataset.schema).toMatchObject({}); |
| }); |
| |
| test('find the schema of a union with one column in common', () => { |
| const dataset = UnionDataset.create([ |
| new SourceDataset({ |
| src: 'slice', |
| schema: {foo: NUM, bar: NUM}, |
| }), |
| new SourceDataset({ |
| src: 'slice', |
| schema: {foo: NUM, baz: NUM}, |
| }), |
| ]); |
| |
| expect(dataset.schema).toMatchObject({foo: NUM}); |
| }); |
| |
| test('optimize a union dataset', () => { |
| const dataset = UnionDataset.create([ |
| new SourceDataset({ |
| src: 'slice', |
| schema: {id: NUM}, |
| filter: { |
| col: 'track_id', |
| eq: 123, |
| }, |
| }), |
| new SourceDataset({ |
| src: 'slice', |
| schema: {id: NUM}, |
| filter: { |
| col: 'track_id', |
| eq: 456, |
| }, |
| }), |
| ]); |
| |
| // Optimization happens automatically in query() |
| expect(dataset.query()).toEqual(`SELECT |
| id |
| FROM (slice) |
| WHERE track_id IN (123, 456)`); |
| }); |
| |
| test('optimize a union dataset with different types of filters', () => { |
| const dataset = UnionDataset.create([ |
| new SourceDataset({ |
| src: 'slice', |
| schema: {id: NUM}, |
| filter: { |
| col: 'track_id', |
| eq: 123, |
| }, |
| }), |
| new SourceDataset({ |
| src: 'slice', |
| schema: {id: NUM}, |
| filter: { |
| col: 'track_id', |
| in: [456, 789], |
| }, |
| }), |
| ]); |
| |
| // Optimization merges all values into a single IN filter |
| expect(dataset.query()).toEqual(`SELECT |
| id |
| FROM (slice) |
| WHERE track_id IN (123, 456, 789)`); |
| }); |
| |
| test('optimize a union dataset with different schemas', () => { |
| const dataset = UnionDataset.create([ |
| new SourceDataset({ |
| src: 'slice', |
| schema: {foo: NUM}, |
| }), |
| new SourceDataset({ |
| src: 'slice', |
| schema: {bar: NUM}, |
| }), |
| ]); |
| |
| // When querying with the union schema (which is empty {}), we get an empty |
| // SELECT. But we can query with a specific schema to get columns. |
| expect(dataset.query({foo: NUM, bar: NUM})).toEqual(`SELECT |
| foo, |
| bar |
| FROM (slice)`); |
| }); |
| |
| test('union type widening', () => { |
| const dataset = UnionDataset.create([ |
| new SourceDataset({ |
| src: 'slice', |
| schema: {foo: NUM, bar: STR_NULL, baz: BLOB, missing: UNKNOWN}, |
| }), |
| new SourceDataset({ |
| src: 'slice', |
| schema: {foo: NUM_NULL, bar: STR, baz: LONG}, |
| }), |
| ]); |
| |
| expect(dataset.schema).toEqual({ |
| foo: NUM_NULL, |
| bar: STR_NULL, |
| baz: UNKNOWN, |
| }); |
| }); |
| |
| test('union dataset keeps datasets with different joins separate', () => { |
| const datasetA = new SourceDataset({ |
| src: 'slice', |
| schema: {id: NUM}, |
| joins: { |
| thread: {from: 'thread USING (utid)'}, |
| }, |
| }); |
| |
| const datasetB = new SourceDataset({ |
| src: 'slice', |
| schema: {id: NUM}, |
| // No joins |
| }); |
| |
| const union = UnionDataset.create([datasetA, datasetB]); |
| const sql = union.query(); |
| |
| // Should contain UNION ALL because they cannot be merged |
| expect(sql).toContain('UNION ALL'); |
| |
| // One part should have the join |
| expect(sql).toContain('JOIN thread'); |
| }); |
| |
| test('union dataset merges datasets with same joins', () => { |
| const datasetA = new SourceDataset({ |
| src: 'slice', |
| schema: {id: NUM}, |
| joins: { |
| thread: {from: 'thread USING (utid)'}, |
| }, |
| filter: {col: 'id', eq: 1}, |
| }); |
| |
| const datasetB = new SourceDataset({ |
| src: 'slice', |
| schema: {id: NUM}, |
| joins: { |
| thread: {from: 'thread USING (utid)'}, |
| }, |
| filter: {col: 'id', eq: 2}, |
| }); |
| |
| const union = UnionDataset.create([datasetA, datasetB]); |
| const sql = union.query(); |
| |
| // Should NOT contain UNION ALL, they should merge |
| expect(sql).not.toContain('UNION ALL'); |
| expect(sql).toContain('WHERE id IN (1, 2)'); |
| }); |
| |
| test('union dataset with lineage keeps datasets with different joins separate', () => { |
| const datasetA = new SourceDataset({ |
| src: 'slice', |
| schema: {id: NUM}, |
| joins: { |
| thread: {from: 'thread USING (utid)'}, |
| }, |
| }); |
| |
| const datasetB = new SourceDataset({ |
| src: 'slice', |
| schema: {id: NUM}, |
| // No joins |
| }); |
| |
| const union = UnionDatasetWithLineage.create([datasetA, datasetB]); |
| const sql = union.query(); |
| |
| // Should contain UNION ALL because they cannot be merged |
| expect(sql).toContain('UNION ALL'); |
| |
| // One part should have the join |
| expect(sql).toContain('JOIN thread'); |
| }); |
| |
| test('union dataset with lineage merges datasets with same joins', () => { |
| const datasetA = new SourceDataset({ |
| src: 'slice', |
| schema: {id: NUM}, |
| joins: { |
| thread: {from: 'thread USING (utid)'}, |
| }, |
| filter: {col: 'id', eq: 1}, |
| }); |
| |
| const datasetB = new SourceDataset({ |
| src: 'slice', |
| schema: {id: NUM}, |
| joins: { |
| thread: {from: 'thread USING (utid)'}, |
| }, |
| filter: {col: 'id', eq: 2}, |
| }); |
| |
| const union = UnionDatasetWithLineage.create([datasetA, datasetB]); |
| const sql = union.query(); |
| |
| // Should NOT contain UNION ALL, they should merge |
| expect(sql).not.toContain('UNION ALL'); |
| expect(sql).toContain('WHERE id IN (1, 2)'); |
| }); |