| /* |
| * Copyright (C) 2022 The Android Open Source Project |
| * |
| * Licensed under the Apache License, Version 2.0 (the "License"); |
| * you may not use this file except in compliance with the License. |
| * You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| |
| #include "src/trace_processor/db/view.h" |
| |
| #include <stddef.h> |
| #include <stdint.h> |
| #include <algorithm> |
| #include <iterator> |
| #include <limits> |
| #include <map> |
| #include <memory> |
| #include <string> |
| #include <vector> |
| |
| #include "perfetto/base/compiler.h" |
| #include "perfetto/base/flat_set.h" |
| #include "perfetto/ext/base/flat_hash_map.h" |
| #include "perfetto/ext/base/string_view.h" |
| #include "perfetto/trace_processor/basic_types.h" |
| #include "src/trace_processor/containers/row_map.h" |
| #include "src/trace_processor/db/column.h" |
| #include "src/trace_processor/db/table.h" |
| #include "src/trace_processor/db/typed_column.h" |
| |
| namespace perfetto { |
| namespace trace_processor { |
| |
| View::View() = default; |
| |
| View::View(std::unique_ptr<TableNode> root, |
| std::vector<SourceColumn> source_col_by_output_idx, |
| Table::Schema schema) |
| : root_node_(std::move(root)), |
| source_col_by_output_idx_(std::move(source_col_by_output_idx)), |
| schema_(std::move(schema)) {} |
| |
| View::~View() = default; |
| |
| base::Status View::Create(Table* root_table, |
| const char* root_table_name, |
| std::initializer_list<JoinTable> joins, |
| std::initializer_list<OutputColumn> cols, |
| View* view) { |
| // Insert the node for the root table; the column indices being nullopt |
| // indicates this is the root. |
| std::unique_ptr<TableNode> root_node( |
| new TableNode{root_table, base::nullopt, base::nullopt, JoinFlag::kNoFlag, |
| TableNode::Children{}}); |
| base::FlatHashMap<base::StringView, TableNode*> node_map; |
| node_map.Insert(root_table_name, root_node.get()); |
| |
| // Verify that all the joins are well-formed and build the join-tree |
| // structure. |
| for (const JoinTable& join : joins) { |
| // Verify that the previous table was previously defined (either by |
| // the root or prior join). |
| TableNode** prev_node_it = node_map.Find(join.prev_table_name); |
| if (!prev_node_it) { |
| return base::ErrStatus( |
| "View has table %s joining with table %s which was not " |
| "previously defined", |
| join.table_name, join.prev_table_name); |
| } |
| TableNode* prev_node = *prev_node_it; |
| |
| // Verify that the previous table's column exists. |
| base::Optional<uint32_t> opt_prev_col_idx = |
| prev_node->table->GetColumnIndexByName(join.prev_col); |
| if (!opt_prev_col_idx) { |
| return base::ErrStatus( |
| "View references column %s in table %s which does not exist", |
| join.prev_col, join.prev_table_name); |
| } |
| |
| // Verify that the current table's column exists. |
| base::Optional<uint32_t> opt_col_idx = |
| join.table->GetColumnIndexByName(join.col); |
| if (!opt_col_idx) { |
| return base::ErrStatus( |
| "View references column %s in table %s which does not exist", |
| join.col, join.table_name); |
| } |
| |
| // TODO(lalitm): add some extra checks about the columns being joined here |
| // (i.e. right column being an id, left column being non-nullable, neither |
| // column being a dummy column etc, neither column is hidden etc.). |
| |
| // Build the node and insert it into the map. |
| prev_node->children.emplace_back( |
| new TableNode{join.table, *opt_col_idx, *opt_prev_col_idx, |
| join.join_flags, TableNode::Children{}}); |
| |
| auto it_and_inserted = |
| node_map.Insert(join.table_name, prev_node->children.back().get()); |
| if (!it_and_inserted.second) { |
| return base::ErrStatus("View has duplicate table name %s", |
| join.table_name); |
| } |
| } |
| |
| // Verify that there all the output columns are well formed. |
| { |
| base::FlatSet<base::StringView> col_names; |
| for (const OutputColumn& col : cols) { |
| auto it_and_inserted = col_names.insert(col.col_name); |
| if (!it_and_inserted.second) { |
| return base::ErrStatus("View has duplicate column %s", col.col_name); |
| } |
| |
| TableNode** node_it = node_map.Find(col.source_table_name); |
| if (!node_it) { |
| return base::ErrStatus( |
| "View references table %s as source for column %s which does " |
| "not exist", |
| col.source_table_name, col.col_name); |
| } |
| |
| const TableNode* node = *node_it; |
| if (!node->table->GetColumnIndexByName(col.source_col_name)) { |
| return base::ErrStatus( |
| "View references column %s in table %s as source for column %s " |
| "which does not exist", |
| col.source_col_name, col.source_table_name, col.col_name); |
| } |
| } |
| } |
| |
| // Build the schema of the output table and a mapping from each output column |
| // to the source column which generates it. |
| std::vector<SourceColumn> source_col_by_output_idx(cols.size()); |
| Table::Schema schema; |
| for (const OutputColumn& col : cols) { |
| const TableNode* node = *node_map.Find(col.source_table_name); |
| uint32_t table_col_idx = |
| *node->table->GetColumnIndexByName(col.source_col_name); |
| |
| const Column& table_col = node->table->GetColumn(table_col_idx); |
| PERFETTO_DCHECK(!table_col.IsHidden()); |
| |
| // TODO(lalitm): if the view specifies the right hand side table as |
| // the source for a joined column, we should be able to use the left hand |
| // side instead. Add this as a future optimization or detect it and |
| // error out. |
| |
| base::StringView source_table_name(col.source_table_name); |
| schema.columns.emplace_back(Table::Schema::Column{ |
| col.col_name, table_col.type(), |
| source_table_name == root_table_name ? table_col.IsId() : false, |
| source_table_name == root_table_name ? table_col.IsSorted() : false, |
| table_col.IsHidden(), |
| source_table_name == root_table_name ? table_col.IsSetId() : false}); |
| |
| uint32_t output_idx = static_cast<uint32_t>(schema.columns.size() - 1); |
| source_col_by_output_idx[output_idx] = {node, table_col_idx}; |
| } |
| |
| *view = View(std::move(root_node), std::move(source_col_by_output_idx), |
| std::move(schema)); |
| return base::OkStatus(); |
| } |
| |
| View::View(Table* root_table, |
| const char* root_table_name, |
| std::initializer_list<JoinTable> joins, |
| std::initializer_list<OutputColumn> columns) { |
| base::Status status = Create(root_table, root_table_name, std::move(joins), |
| std::move(columns), this); |
| if (!status.ok()) { |
| PERFETTO_FATAL("Failed building view: %s", status.c_message()); |
| } |
| } |
| |
| Table View::Query(const std::vector<Constraint>& cs, |
| const std::vector<Order>& ob, |
| const BitVector& cols_used) const { |
| PERFETTO_DCHECK(cols_used.size() == schema_.columns.size()); |
| |
| TableNode* root = root_node_.get(); |
| |
| // Below is the core algorithm which does joining and querying simultaneously. |
| // We do this to allow optimizations on which way to order the join and |
| // filter based on the join type, constraints, row counters etc. |
| // |
| // The algorithm is implemented by the |QueryHelper| class for the purposes |
| // of sharing a bunch of temporary state between the different stages of the |
| // algorithm. |
| |
| // The constructor for query helper builds all the temporary state: |
| // essentially a copy of the join tree with metadata about which tables are |
| // used, which tables remove rows from parents and generates the initial |
| // output tables and RowMaps. |
| QueryHelper helper(root, source_col_by_output_idx_, cs, cols_used); |
| |
| // |FilterAndJoinRecursive| is responsible for filtering all relevant tables |
| // which have a constraint necessary for them, materializing any tables |
| // participating in the join and computing the "child" table and "parent" |
| // RowMap. |
| // |
| // It does *not* propogate the RowMap downwards: this is done by |
| // |ApplyRowMapRecursive|. We don't do this because it would be very |
| // inefficient to constantly propogate the RowMap at every level in the middle |
| // of a DFS (at its heart, this function is a post-order DFS). |
| helper.FilterAndJoinRecursive(root); |
| |
| // |ApplyRowMapRecursive| is responsible for recursively propogating the |
| // join RowMaps downwards. This is necessary because if you have |
| // |
| // A JOIN B JOIN C |
| // |
| // |FilterAndJoinRecursive| will compute the final state of A but only |
| // intermediate states for B and C: for B, it will filter out all rows which |
| // don't exist in C and for C it will simply leave as-is. The fact that |
| // every row in A now has a corresponding row in B and similarily with C |
| // is the job of this function. |
| // |
| // ApplyRowMapRecursive then pushes down the RowMap representing the join A |
| // and B and applies that to B. Finally, it selects the B-C RowMap with the |
| // A-B RowMap and applies this to C's table. |
| helper.ApplyRowMapRecursive(root); |
| |
| // |BuildTable| converts the intermediate tables from the above and generates |
| // a cohesive table matching the schema of this view. Any "not used" columns |
| // are simply replaced with dummy columns who cannot be queried which saves |
| // the cost of doing unnecessary joins. |
| Table filtered = |
| helper.BuildTable(root, schema_, source_col_by_output_idx_, cols_used); |
| |
| // The final step is simply to sort the table resulting from filtering. |
| // |
| // TODO(lalitm): we could be more efficient about this and sort the source |
| // tables *before* we join. However, given sorts are relatively rare, we don't |
| // do this yet. |
| return filtered.Sort(ob); |
| } |
| |
| View::QueryHelper::QueryHelper( |
| TableNode* root_node, |
| const std::vector<SourceColumn>& source_col_by_output_idx, |
| const std::vector<Constraint>& cs, |
| const BitVector& cols_used) |
| : state_(BuildNodeStateMap(root_node, |
| source_col_by_output_idx, |
| cs, |
| cols_used)) {} |
| |
| void View::QueryHelper::FilterAndJoinRecursive(TableNode* node) { |
| NodeState& state = *state_.Find(node); |
| |
| // TODO(lalitm): instead of computing the left table straight away here, we |
| // could more intelligently figure out whether doing the join first is more |
| // efficient. |
| state.output = state.output.Filter(state.cs); |
| |
| const Table& left_table = state.output; |
| RowMap left_rm(0, left_table.row_count()); |
| for (const auto& child : node->children) { |
| NodeState* child_state = state_.Find(child.get()); |
| |
| // If we have no rows, just bail out to minimize work done. |
| if (left_rm.empty()) |
| break; |
| |
| // If the table is not used and doesn't remove any rows in the parent, we |
| // can just rely on the default RowMap. |
| if (!child_state->is_used && !child_state->removes_parent_rows) |
| break; |
| |
| // Recurse on the child table so we now the contents of the right table |
| // before we filter any further. |
| FilterAndJoinRecursive(child.get()); |
| |
| // If the right table is empty, the left table cannot possibly join |
| // without removing rows. |
| const Table& right_table = child_state->output; |
| if (right_table.row_count() == 0) { |
| left_rm = RowMap(); |
| break; |
| } |
| |
| const auto& left_col = *TypedColumn<BaseId>::FromColumn( |
| &state.output.GetColumn(*child->parent_join_col_idx)); |
| const auto& right_col = *IdColumn<BaseId>::FromColumn( |
| &child_state->output.GetColumn(*child->join_col_idx)); |
| |
| // The core join loop. This function iterates through every row in |
| // the left table and figures out whether to keep it if the row |
| // also exists in the right table. While doing this, it also figures |
| // out the row number in the right table for every row in the left table. |
| std::vector<uint32_t> right_rm_iv; |
| right_rm_iv.reserve(left_rm.size()); |
| left_col.overlay().FilterInto(&left_rm, [&](uint32_t idx) { |
| // Check if the right table has the value from the left table. |
| base::Optional<uint32_t> opt_idx = |
| right_col.IndexOf(left_col.GetAtIdx(idx)); |
| |
| // If it doesn't, return false indicating that this row should be |
| // removed from the left table. |
| if (!opt_idx) |
| return false; |
| |
| // If the row does exist, then keep track of the index of the row |
| // for applying to the right table and return true to also keep this |
| // row in the left table. |
| right_rm_iv.emplace_back(*opt_idx); |
| return true; |
| }); |
| child_state->parent_join_rm = RowMap(std::move(right_rm_iv)); |
| } |
| state.output = state.output.Apply(std::move(left_rm)); |
| } |
| |
| void View::QueryHelper::ApplyRowMapRecursive(TableNode* node, RowMap rm) { |
| NodeState& state = *state_.Find(node); |
| for (const auto& child : node->children) { |
| const NodeState& child_state = *state_.Find(child.get()); |
| // If the child table is not used, then we don't need to recurse any |
| // further. |
| if (!child_state.is_used) |
| break; |
| ApplyRowMapRecursive(child.get(), |
| child_state.parent_join_rm.SelectRows(rm)); |
| } |
| state.output = state.output.Apply(std::move(rm)); |
| } |
| |
| View::QueryHelper::NodeStateMap View::QueryHelper::BuildNodeStateMap( |
| TableNode* root_node, |
| const std::vector<SourceColumn>& source_col_by_output_idx, |
| const std::vector<Constraint>& cs, |
| const BitVector& cols_used) { |
| // Populate the map contains all the nodes in the tree. |
| base::FlatHashMap<const TableNode*, QueryHelper::NodeState> node_state; |
| PostOrderDfs(root_node, [&node_state](TableNode* node) { |
| node_state.Insert(node, |
| QueryHelper::NodeState{ |
| {}, false, false, node->table->Copy(), RowMap()}); |
| }); |
| |
| // For each constraint, add the translated constraint to the relevant table's |
| // constraint set. |
| for (const Constraint& c : cs) { |
| const auto& source_col = source_col_by_output_idx[c.col_idx]; |
| auto& metadata = *node_state.Find(source_col.first); |
| metadata.cs.emplace_back(Constraint{source_col.second, c.op, c.value}); |
| } |
| |
| // For each used column, mark the associated table as being used. |
| for (auto it = cols_used.IterateSetBits(); it; it.Next()) { |
| const auto& source = source_col_by_output_idx[it.index()]; |
| node_state.Find(source.first)->is_used = true; |
| } |
| |
| // For each node, figure out whether ti will cause parent rows |
| // to be removed. |
| for (auto it = node_state.GetIterator(); it; ++it) { |
| // The below logic doesn't make sense on the root node. |
| if (it.key() == root_node) |
| continue; |
| |
| // A join will retain (i.e. *not* remove parent rows) if one of the |
| // following is true: |
| // a) the child (right-side of join) table contains every id |
| // which could exist in the parent (left-side) table. |
| // TODO(lalitm): add more conditions here. |
| bool join_retains_parent_rows = |
| (it.key()->join_flags & JoinFlag::kIdAlwaysPresent) != 0; |
| |
| // However, if this table has constraints, then we could always remove the |
| // parents rows even if the join would normally retain all rows. |
| it.value().removes_parent_rows = |
| !it.value().cs.empty() || !join_retains_parent_rows; |
| } |
| |
| // Do a DFS on the node tree and propogate up the is_used and |
| // removes_parent_rows boolean. In other words, if a table is used by SQLite, |
| // every ancestor must also be used as we need to join with every table on the |
| // path between the root and used table. Similarily, if a table removes parent |
| // rows, then it does this recursively upwards. |
| PostOrderDfs(root_node, [&node_state](TableNode* node) { |
| NodeState& state = *node_state.Find(node); |
| for (const auto& child : node->children) { |
| const NodeState& child_metadata = *node_state.Find(child.get()); |
| state.is_used |= child_metadata.is_used; |
| state.removes_parent_rows |= child_metadata.removes_parent_rows; |
| } |
| }); |
| |
| return node_state; |
| } |
| |
| Table View::QueryHelper::BuildTable( |
| TableNode* root, |
| const Table::Schema& schema, |
| const std::vector<SourceColumn>& source_col_by_output_idx, |
| const BitVector& cols_used) { |
| NodeState& root_state = *state_.Find(root); |
| |
| Table output(root->table->string_pool_); |
| output.row_count_ = root_state.output.row_count(); |
| output.overlays_.emplace_back(ColumnStorageOverlay(output.row_count_)); |
| |
| std::map<std::pair<const TableNode*, uint32_t>, uint32_t> cached_rm; |
| for (auto it = cols_used.IterateAllBits(); it; it.Next()) { |
| const char* col_name = schema.columns[it.index()].name.c_str(); |
| if (!it.IsSet()) { |
| output.columns_.emplace_back( |
| Column::DummyColumn(col_name, &output, it.index())); |
| continue; |
| } |
| |
| const auto& source_col = source_col_by_output_idx[it.index()]; |
| |
| Table& node_table = state_.Find(source_col.first)->output; |
| const Column& table_col = node_table.GetColumn(source_col.second); |
| |
| auto it_and_inserted = cached_rm.emplace( |
| std::make_pair(source_col.first, table_col.overlay_index()), |
| static_cast<uint32_t>(output.overlays_.size())); |
| if (it_and_inserted.second) { |
| output.overlays_.emplace_back( |
| std::move(node_table.overlays_[table_col.overlay_index()])); |
| } |
| |
| uint32_t rm_idx = it_and_inserted.first->second; |
| output.columns_.emplace_back( |
| Column(table_col, &output, it.index(), rm_idx, col_name)); |
| } |
| return output; |
| } |
| |
| uint32_t View::GetColumnCount() const { |
| return static_cast<uint32_t>(schema_.columns.size()); |
| } |
| |
| uint32_t View::EstimateRowCount() const { |
| uint32_t count = 0; |
| PostOrderDfs(root_node_.get(), [&count](TableNode* node) { |
| count = std::max(node->table->row_count(), count); |
| }); |
| return count; |
| } |
| |
| } // namespace trace_processor |
| } // namespace perfetto |