tp: migrate ThreadTable, ProcessTable, AndroidLogsTable to Python
This CL introduces the actual conversion of Python generated tables to
C++ and migrates three of the trickier tables over
Change-Id: I443ea5ce823868ec974a442de3a55b83307ffa49
diff --git a/python/generators/trace_processor_table/serialize.py b/python/generators/trace_processor_table/serialize.py
new file mode 100644
index 0000000..43ca5cd
--- /dev/null
+++ b/python/generators/trace_processor_table/serialize.py
@@ -0,0 +1,249 @@
+# Copyright (C) 2022 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from typing import List
+from typing import Optional
+
+from python.generators.trace_processor_table.public import Alias
+from python.generators.trace_processor_table.public import Column
+from python.generators.trace_processor_table.public import ColumnFlag
+from python.generators.trace_processor_table.public import Table
+from python.generators.trace_processor_table.util import parse_type
+from python.generators.trace_processor_table.util import typed_column_type
+from python.generators.trace_processor_table.util import to_cpp_flags
+
+
+class ColumnSerializer:
+ """Functions for serializing a single Column in a table into C++."""
+
+ def __init__(self, table: Table, col_index: int):
+ self.col_index = col_index
+ self.col = table.columns[col_index]
+ self.name = self.col.name
+ self.flags = self.col.flags
+ self.typed_column_type = typed_column_type(table, self.col)
+ self.cpp_type = parse_type(table, self.col.type).cpp_type_with_optionality()
+
+ def colindex(self) -> str:
+ return f' static constexpr uint32_t {self.name} = {self.col_index};'
+
+ def coltype_enum(self) -> str:
+ return f' using {self.name} = {self.typed_column_type};'
+
+ def row_field(self) -> Optional[str]:
+ if self.col._is_auto_added_id or self.col._is_auto_added_type:
+ return None
+ return f' {self.cpp_type} {self.name};'
+
+ def row_param(self) -> Optional[str]:
+ if self.col._is_auto_added_id or self.col._is_auto_added_type:
+ return None
+ return f'{self.cpp_type} in_{self.name} = {{}}'
+
+ def row_initializer(self) -> Optional[str]:
+ if self.col._is_auto_added_id or self.col._is_auto_added_type:
+ return None
+ return f'{self.name}(std::move(in_{self.name}))'
+
+ def flag(self) -> Optional[str]:
+ if self.col._is_auto_added_id or self.col._is_auto_added_type:
+ return None
+ default = f'ColumnType::{self.name}::default_flags()'
+ if self.flags == ColumnFlag.NONE:
+ flags = default
+ else:
+ flags = f'static_cast<uint32_t>({to_cpp_flags(self.flags)}) | {default}'
+ return f'''
+ static constexpr uint32_t {self.name} = {flags};
+ '''
+
+ def storage_init(self) -> Optional[str]:
+ if self.col._is_auto_added_id or self.col._is_auto_added_type:
+ return None
+
+ storage = f'ColumnStorage<ColumnType::{self.name}::stored_type>'
+ # TODO(lalitm): add support for dense columns.
+ return f'''{self.name}_({storage}::Create<false>())'''
+
+ def column_init(self) -> Optional[str]:
+ if self.col._is_auto_added_id or self.col._is_auto_added_type:
+ return None
+ return f'''
+ columns_.emplace_back("{self.name}", &{self.name}_, ColumnFlag::{self.name},
+ this, static_cast<uint32_t>(columns_.size()),
+ overlay_count);
+ '''
+
+ def shrink_to_fit(self) -> Optional[str]:
+ if self.col._is_auto_added_id:
+ return None
+ return f' {self.name}_.ShrinkToFit();'
+
+ def append(self) -> Optional[str]:
+ if self.col._is_auto_added_id or self.col._is_auto_added_type:
+ return None
+ return f' mutable_{self.name}()->Append(std::move(row.{self.name}));'
+
+ def accessor(self) -> Optional[str]:
+ inner = f'columns_[ColumnIndex::{self.name}]'
+ return f'''
+ const {self.typed_column_type}& {self.name}() const {{
+ return static_cast<const ColumnType::{self.name}&>({inner});
+ }}
+ '''
+
+ def mutable_accessor(self) -> Optional[str]:
+ if self.col._is_auto_added_id or self.col._is_auto_added_type:
+ return None
+ return f'''
+ {self.typed_column_type}* mutable_{self.name}() {{
+ return static_cast<ColumnType::{self.name}*>(
+ &columns_[ColumnIndex::{self.name}]);
+ }}
+ '''
+
+ def storage(self) -> Optional[str]:
+ if self.col._is_auto_added_id or self.col._is_auto_added_type:
+ return None
+ name = self.name
+ return f' ColumnStorage<ColumnType::{name}::stored_type> {name}_;'
+
+
+class TableSerializer(object):
+ """Functions for seralizing a single Table into C++."""
+
+ def __init__(self, table: Table):
+ self.table = table
+ self.table_name = table.class_name
+ self.column_serializers = [
+ ColumnSerializer(table, i) for i in range(len(table.columns))
+ ]
+
+ def foreach_col(self, serialize_fn, delimiter='\n') -> str:
+ lines = []
+ for c in self.column_serializers:
+ serialized = serialize_fn(c)
+ if serialized:
+ lines.append(serialized.lstrip('\n').rstrip())
+ return delimiter.join(lines).strip()
+
+ def id_defn(self) -> str:
+ return '''
+ struct Id : public BaseId {
+ Id() = default;
+ explicit constexpr Id(uint32_t v) : BaseId(v) {}
+ };
+ static_assert(std::is_trivially_destructible<Id>::value,
+ "Inheritance used without trivial destruction");
+ '''
+
+ def row_struct(self) -> str:
+ param = self.foreach_col(
+ ColumnSerializer.row_param, delimiter=',\n ')
+ row_init = self.foreach_col(
+ ColumnSerializer.row_initializer, delimiter=',\n ')
+ return f'''
+ struct Row : public macros_internal::RootParentTable::Row {{
+ Row({param})
+ : macros_internal::RootParentTable::Row(nullptr),
+ {row_init} {{
+ type_ = "{self.table.sql_name}";
+ }}
+ {self.foreach_col(ColumnSerializer.row_field)}
+ }};
+ '''
+
+ def constructor(self) -> str:
+ col_init = self.foreach_col(
+ ColumnSerializer.storage_init, delimiter=',\n ')
+ return f'''
+ explicit {self.table_name}(StringPool* pool)
+ : macros_internal::MacroTable(pool, nullptr),
+ {col_init} {{
+ uint32_t overlay_count = static_cast<uint32_t>(overlays_.size()) - 1;
+ {self.foreach_col(ColumnSerializer.column_init)}
+ }}
+ '''
+
+ def serialize(self) -> str:
+ return f'''
+class {self.table_name} : public macros_internal::MacroTable {{
+ public:
+ {self.id_defn().lstrip()}
+ struct ColumnIndex {{
+ {self.foreach_col(ColumnSerializer.colindex)}
+ }};
+ struct ColumnType {{
+ {self.foreach_col(ColumnSerializer.coltype_enum)}
+ }};
+ {self.row_struct().strip()}
+ struct IdAndRow {{
+ uint32_t row;
+ }};
+ struct ColumnFlag {{
+ {self.foreach_col(ColumnSerializer.flag)}
+ }};
+
+ {self.constructor().strip()}
+ ~{self.table_name}() override;
+
+ static const char* Name() {{ return "{self.table.sql_name}"; }}
+
+ void ShrinkToFit() {{
+ {self.foreach_col(ColumnSerializer.shrink_to_fit)}
+ }}
+
+ IdAndRow Insert(const Row& row) {{
+ uint32_t row_number = row_count();
+ type_.Append(string_pool_->InternString(row.type()));
+ {self.foreach_col(ColumnSerializer.append)}
+ UpdateSelfOverlayAfterInsert();
+ return IdAndRow{{row_number}};
+ }}
+
+ {self.foreach_col(ColumnSerializer.accessor)}
+
+ {self.foreach_col(ColumnSerializer.mutable_accessor)}
+
+ private:
+ {self.foreach_col(ColumnSerializer.storage)}
+}};
+ '''.strip('\n')
+
+
+def serialize_header(ifdef_guard: str, tables: List[Table],
+ include_paths: List[str]) -> str:
+ """Serializes a table header file containing the given set of tables."""
+ include_paths_str = '\n'.join([f'#include "{i}"' for i in include_paths])
+ tables_str = '\n\n'.join([TableSerializer(t).serialize() for t in tables])
+ return f'''
+#ifndef {ifdef_guard}
+#define {ifdef_guard}
+
+#include "src/trace_processor/tables/macros.h"
+
+{include_paths_str}
+
+namespace perfetto {{
+namespace trace_processor {{
+namespace tables {{
+
+{tables_str.strip()}
+
+}} // namespace tables
+}} // namespace trace_processor
+}} // namespace perfetto
+
+#endif // {ifdef_guard}
+ '''.strip()
diff --git a/python/generators/trace_processor_table/util.py b/python/generators/trace_processor_table/util.py
index 38f64d1..7acfea7 100644
--- a/python/generators/trace_processor_table/util.py
+++ b/python/generators/trace_processor_table/util.py
@@ -14,7 +14,11 @@
import dataclasses
from dataclasses import dataclass
+from typing import Dict
+from typing import List
+from typing import Set
from typing import Optional
+from typing import Union
from python.generators.trace_processor_table.public import Alias
from python.generators.trace_processor_table.public import Column
@@ -31,7 +35,7 @@
from python.generators.trace_processor_table.public import Table
-@dataclass()
+@dataclass
class ParsedType:
"""Result of parsing a CppColumnType into its parts."""
cpp_type: str
@@ -41,6 +45,24 @@
is_self_id: bool = False
id_table: Optional[Table] = None
+ def cpp_type_with_optionality(self) -> str:
+ """Returns the C++ type wrapping with base::Optional if necessary."""
+
+ # ThreadTable and ProcessTable are special for legacy reasons as they were
+ # around even before the advent of C++ macro tables. Because of this a lot
+ # of code was written assuming that upid and utid were uint32 (e.g. indexing
+ # directly into vectors using them) and it was decided this behaviour was
+ # too expensive in engineering cost to fix given the trivial benefit. For
+ # this reason, continue to maintain this illusion.
+ if self.id_table and (self.id_table.class_name == 'ThreadTable' or
+ self.id_table.class_name == 'ProcessTable'):
+ cpp_type = 'uint32_t'
+ else:
+ cpp_type = self.cpp_type
+ if self.is_optional:
+ return f'base::Optional<{cpp_type}>'
+ return cpp_type
+
def public_sql_name_for_table(table: Table) -> str:
"""Extracts SQL name for the table which should be publicised."""
@@ -92,7 +114,7 @@
Column('type', CppString(), ColumnFlag.NONE, _is_auto_added_type=True),
]
public_sql_name = public_sql_name_for_table(table)
- new_cols_doc = {
+ new_cols_doc: Dict[str, Union[ColumnDoc, str]] = {
'id':
ColumnDoc(doc=f'Unique idenitifier for this {public_sql_name}.'),
'type':
@@ -105,3 +127,71 @@
table,
columns=auto_cols + table.columns,
tabledoc=dataclasses.replace(table.tabledoc, columns=new_cols_doc))
+
+
+def find_table_deps(table: Table) -> Set[str]:
+ """Finds all the other table class names this table depends on.
+
+ By "depends", we mean this table in C++ would need the dependency to be
+ defined (or included) before this table is defined."""
+ deps: Set[str] = set()
+ for c in table.columns:
+ id_table = parse_type(table, c.type).id_table
+ if id_table:
+ deps.add(id_table.class_name)
+ return deps
+
+
+def topological_sort_tables(tables: List[Table]) -> List[Table]:
+ """Topologically sorts a list of tables (i.e. dependenices appear earlier).
+
+ See [1] for information on a topological sort. We do this to allow
+ dependencies to be processed and appear ealier than their dependents.
+
+ [1] https://en.wikipedia.org/wiki/Topological_sorting"""
+ tables_by_name: dict[str, Table] = dict((t.class_name, t) for t in tables)
+ visited: Set[str] = set()
+ result: List[Table] = []
+
+ # Topological sorting is really just a DFS where we put the nodes in the list
+ # after any dependencies.
+ def dfs(table_class_name: str):
+ table = tables_by_name.get(table_class_name)
+ # If the table is not found, that might be because it's not in this list of
+ # tables. Just ignore this as its up to the caller to make sure any external
+ # deps are handled correctly.
+ if not table or table.class_name in visited:
+ return
+ visited.add(table.class_name)
+
+ for dep in find_table_deps(table):
+ dfs(dep)
+ result.append(table)
+
+ for table in tables:
+ dfs(table.class_name)
+ return result
+
+
+def to_cpp_flags(raw_flag: ColumnFlag) -> str:
+ """Converts a ColumnFlag to the C++ flags which it represents
+
+ It is not valid to call this function with ColumnFlag.NONE as in this case
+ defaults for that column should be implicitly used."""
+
+ assert raw_flag != ColumnFlag.NONE
+ flags = []
+ if ColumnFlag.SORTED in raw_flag:
+ flags.append('Column::Flag::kSorted')
+ if ColumnFlag.SET_ID in raw_flag:
+ flags.append('Column::Flag::kSetId')
+ return ' | '.join(flags)
+
+
+def typed_column_type(table: Table, col: Column) -> str:
+ """Returns the TypedColumn/IdColumn C++ type for a given column."""
+
+ parsed = parse_type(table, col.type)
+ if col._is_auto_added_id:
+ return f'IdColumn<{parsed.cpp_type}>'
+ return f'TypedColumn<{parsed.cpp_type_with_optionality()}>'