tp: migrate ThreadTable, ProcessTable, AndroidLogsTable to Python

This CL introduces the actual conversion of Python generated tables to
C++ and migrates three of the trickier tables over

Change-Id: I443ea5ce823868ec974a442de3a55b83307ffa49
diff --git a/python/generators/trace_processor_table/serialize.py b/python/generators/trace_processor_table/serialize.py
new file mode 100644
index 0000000..43ca5cd
--- /dev/null
+++ b/python/generators/trace_processor_table/serialize.py
@@ -0,0 +1,249 @@
+# Copyright (C) 2022 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from typing import List
+from typing import Optional
+
+from python.generators.trace_processor_table.public import Alias
+from python.generators.trace_processor_table.public import Column
+from python.generators.trace_processor_table.public import ColumnFlag
+from python.generators.trace_processor_table.public import Table
+from python.generators.trace_processor_table.util import parse_type
+from python.generators.trace_processor_table.util import typed_column_type
+from python.generators.trace_processor_table.util import to_cpp_flags
+
+
+class ColumnSerializer:
+  """Functions for serializing a single Column in a table into C++."""
+
+  def __init__(self, table: Table, col_index: int):
+    self.col_index = col_index
+    self.col = table.columns[col_index]
+    self.name = self.col.name
+    self.flags = self.col.flags
+    self.typed_column_type = typed_column_type(table, self.col)
+    self.cpp_type = parse_type(table, self.col.type).cpp_type_with_optionality()
+
+  def colindex(self) -> str:
+    return f'    static constexpr uint32_t {self.name} = {self.col_index};'
+
+  def coltype_enum(self) -> str:
+    return f'    using {self.name} = {self.typed_column_type};'
+
+  def row_field(self) -> Optional[str]:
+    if self.col._is_auto_added_id or self.col._is_auto_added_type:
+      return None
+    return f'    {self.cpp_type} {self.name};'
+
+  def row_param(self) -> Optional[str]:
+    if self.col._is_auto_added_id or self.col._is_auto_added_type:
+      return None
+    return f'{self.cpp_type} in_{self.name} = {{}}'
+
+  def row_initializer(self) -> Optional[str]:
+    if self.col._is_auto_added_id or self.col._is_auto_added_type:
+      return None
+    return f'{self.name}(std::move(in_{self.name}))'
+
+  def flag(self) -> Optional[str]:
+    if self.col._is_auto_added_id or self.col._is_auto_added_type:
+      return None
+    default = f'ColumnType::{self.name}::default_flags()'
+    if self.flags == ColumnFlag.NONE:
+      flags = default
+    else:
+      flags = f'static_cast<uint32_t>({to_cpp_flags(self.flags)}) | {default}'
+    return f'''
+      static constexpr uint32_t {self.name} = {flags};
+    '''
+
+  def storage_init(self) -> Optional[str]:
+    if self.col._is_auto_added_id or self.col._is_auto_added_type:
+      return None
+
+    storage = f'ColumnStorage<ColumnType::{self.name}::stored_type>'
+    # TODO(lalitm): add support for dense columns.
+    return f'''{self.name}_({storage}::Create<false>())'''
+
+  def column_init(self) -> Optional[str]:
+    if self.col._is_auto_added_id or self.col._is_auto_added_type:
+      return None
+    return f'''
+    columns_.emplace_back("{self.name}", &{self.name}_, ColumnFlag::{self.name},
+                          this, static_cast<uint32_t>(columns_.size()),
+                          overlay_count);
+    '''
+
+  def shrink_to_fit(self) -> Optional[str]:
+    if self.col._is_auto_added_id:
+      return None
+    return f'    {self.name}_.ShrinkToFit();'
+
+  def append(self) -> Optional[str]:
+    if self.col._is_auto_added_id or self.col._is_auto_added_type:
+      return None
+    return f'    mutable_{self.name}()->Append(std::move(row.{self.name}));'
+
+  def accessor(self) -> Optional[str]:
+    inner = f'columns_[ColumnIndex::{self.name}]'
+    return f'''
+  const {self.typed_column_type}& {self.name}() const {{
+    return static_cast<const ColumnType::{self.name}&>({inner});
+  }}
+  '''
+
+  def mutable_accessor(self) -> Optional[str]:
+    if self.col._is_auto_added_id or self.col._is_auto_added_type:
+      return None
+    return f'''
+  {self.typed_column_type}* mutable_{self.name}() {{
+    return static_cast<ColumnType::{self.name}*>(
+        &columns_[ColumnIndex::{self.name}]);
+  }}
+  '''
+
+  def storage(self) -> Optional[str]:
+    if self.col._is_auto_added_id or self.col._is_auto_added_type:
+      return None
+    name = self.name
+    return f'  ColumnStorage<ColumnType::{name}::stored_type> {name}_;'
+
+
+class TableSerializer(object):
+  """Functions for seralizing a single Table into C++."""
+
+  def __init__(self, table: Table):
+    self.table = table
+    self.table_name = table.class_name
+    self.column_serializers = [
+        ColumnSerializer(table, i) for i in range(len(table.columns))
+    ]
+
+  def foreach_col(self, serialize_fn, delimiter='\n') -> str:
+    lines = []
+    for c in self.column_serializers:
+      serialized = serialize_fn(c)
+      if serialized:
+        lines.append(serialized.lstrip('\n').rstrip())
+    return delimiter.join(lines).strip()
+
+  def id_defn(self) -> str:
+    return '''
+  struct Id : public BaseId {
+    Id() = default;
+    explicit constexpr Id(uint32_t v) : BaseId(v) {}
+  };
+  static_assert(std::is_trivially_destructible<Id>::value,
+                "Inheritance used without trivial destruction");
+    '''
+
+  def row_struct(self) -> str:
+    param = self.foreach_col(
+        ColumnSerializer.row_param, delimiter=',\n        ')
+    row_init = self.foreach_col(
+        ColumnSerializer.row_initializer, delimiter=',\n          ')
+    return f'''
+  struct Row : public macros_internal::RootParentTable::Row {{
+    Row({param})
+        : macros_internal::RootParentTable::Row(nullptr),
+          {row_init} {{
+      type_ = "{self.table.sql_name}";
+    }}
+    {self.foreach_col(ColumnSerializer.row_field)}
+  }};
+    '''
+
+  def constructor(self) -> str:
+    col_init = self.foreach_col(
+        ColumnSerializer.storage_init, delimiter=',\n        ')
+    return f'''
+  explicit {self.table_name}(StringPool* pool)
+      : macros_internal::MacroTable(pool, nullptr),
+        {col_init} {{
+    uint32_t overlay_count = static_cast<uint32_t>(overlays_.size()) - 1;
+    {self.foreach_col(ColumnSerializer.column_init)}
+  }}
+    '''
+
+  def serialize(self) -> str:
+    return f'''
+class {self.table_name} : public macros_internal::MacroTable {{
+ public:
+  {self.id_defn().lstrip()}
+  struct ColumnIndex {{
+    {self.foreach_col(ColumnSerializer.colindex)}
+  }};
+  struct ColumnType {{
+    {self.foreach_col(ColumnSerializer.coltype_enum)}
+  }};
+  {self.row_struct().strip()}
+  struct IdAndRow {{
+    uint32_t row;
+  }};
+  struct ColumnFlag {{
+    {self.foreach_col(ColumnSerializer.flag)}
+  }};
+
+  {self.constructor().strip()}
+  ~{self.table_name}() override;
+
+  static const char* Name() {{ return "{self.table.sql_name}"; }}
+
+  void ShrinkToFit() {{
+    {self.foreach_col(ColumnSerializer.shrink_to_fit)}
+  }}
+
+  IdAndRow Insert(const Row& row) {{
+    uint32_t row_number = row_count();
+    type_.Append(string_pool_->InternString(row.type()));
+    {self.foreach_col(ColumnSerializer.append)}
+    UpdateSelfOverlayAfterInsert();
+    return IdAndRow{{row_number}};
+  }}
+
+  {self.foreach_col(ColumnSerializer.accessor)}
+
+  {self.foreach_col(ColumnSerializer.mutable_accessor)}
+
+ private:
+  {self.foreach_col(ColumnSerializer.storage)}
+}};
+  '''.strip('\n')
+
+
+def serialize_header(ifdef_guard: str, tables: List[Table],
+                     include_paths: List[str]) -> str:
+  """Serializes a table header file containing the given set of tables."""
+  include_paths_str = '\n'.join([f'#include "{i}"' for i in include_paths])
+  tables_str = '\n\n'.join([TableSerializer(t).serialize() for t in tables])
+  return f'''
+#ifndef {ifdef_guard}
+#define {ifdef_guard}
+
+#include "src/trace_processor/tables/macros.h"
+
+{include_paths_str}
+
+namespace perfetto {{
+namespace trace_processor {{
+namespace tables {{
+
+{tables_str.strip()}
+
+}}  // namespace tables
+}}  // namespace trace_processor
+}}  // namespace perfetto
+
+#endif  // {ifdef_guard}
+  '''.strip()
diff --git a/python/generators/trace_processor_table/util.py b/python/generators/trace_processor_table/util.py
index 38f64d1..7acfea7 100644
--- a/python/generators/trace_processor_table/util.py
+++ b/python/generators/trace_processor_table/util.py
@@ -14,7 +14,11 @@
 
 import dataclasses
 from dataclasses import dataclass
+from typing import Dict
+from typing import List
+from typing import Set
 from typing import Optional
+from typing import Union
 
 from python.generators.trace_processor_table.public import Alias
 from python.generators.trace_processor_table.public import Column
@@ -31,7 +35,7 @@
 from python.generators.trace_processor_table.public import Table
 
 
-@dataclass()
+@dataclass
 class ParsedType:
   """Result of parsing a CppColumnType into its parts."""
   cpp_type: str
@@ -41,6 +45,24 @@
   is_self_id: bool = False
   id_table: Optional[Table] = None
 
+  def cpp_type_with_optionality(self) -> str:
+    """Returns the C++ type wrapping with base::Optional if necessary."""
+
+    # ThreadTable and ProcessTable are special for legacy reasons as they were
+    # around even before the advent of C++ macro tables. Because of this a lot
+    # of code was written assuming that upid and utid were uint32 (e.g. indexing
+    # directly into vectors using them) and it was decided this behaviour was
+    # too expensive in engineering cost to fix given the trivial benefit. For
+    # this reason, continue to maintain this illusion.
+    if self.id_table and (self.id_table.class_name == 'ThreadTable' or
+                          self.id_table.class_name == 'ProcessTable'):
+      cpp_type = 'uint32_t'
+    else:
+      cpp_type = self.cpp_type
+    if self.is_optional:
+      return f'base::Optional<{cpp_type}>'
+    return cpp_type
+
 
 def public_sql_name_for_table(table: Table) -> str:
   """Extracts SQL name for the table which should be publicised."""
@@ -92,7 +114,7 @@
       Column('type', CppString(), ColumnFlag.NONE, _is_auto_added_type=True),
   ]
   public_sql_name = public_sql_name_for_table(table)
-  new_cols_doc = {
+  new_cols_doc: Dict[str, Union[ColumnDoc, str]] = {
       'id':
           ColumnDoc(doc=f'Unique idenitifier for this {public_sql_name}.'),
       'type':
@@ -105,3 +127,71 @@
       table,
       columns=auto_cols + table.columns,
       tabledoc=dataclasses.replace(table.tabledoc, columns=new_cols_doc))
+
+
+def find_table_deps(table: Table) -> Set[str]:
+  """Finds all the other table class names this table depends on.
+
+  By "depends", we mean this table in C++ would need the dependency to be
+  defined (or included) before this table is defined."""
+  deps: Set[str] = set()
+  for c in table.columns:
+    id_table = parse_type(table, c.type).id_table
+    if id_table:
+      deps.add(id_table.class_name)
+  return deps
+
+
+def topological_sort_tables(tables: List[Table]) -> List[Table]:
+  """Topologically sorts a list of tables (i.e. dependenices appear earlier).
+
+  See [1] for information on a topological sort. We do this to allow
+  dependencies to be processed and appear ealier than their dependents.
+
+  [1] https://en.wikipedia.org/wiki/Topological_sorting"""
+  tables_by_name: dict[str, Table] = dict((t.class_name, t) for t in tables)
+  visited: Set[str] = set()
+  result: List[Table] = []
+
+  # Topological sorting is really just a DFS where we put the nodes in the list
+  # after any dependencies.
+  def dfs(table_class_name: str):
+    table = tables_by_name.get(table_class_name)
+    # If the table is not found, that might be because it's not in this list of
+    # tables. Just ignore this as its up to the caller to make sure any external
+    # deps are handled correctly.
+    if not table or table.class_name in visited:
+      return
+    visited.add(table.class_name)
+
+    for dep in find_table_deps(table):
+      dfs(dep)
+    result.append(table)
+
+  for table in tables:
+    dfs(table.class_name)
+  return result
+
+
+def to_cpp_flags(raw_flag: ColumnFlag) -> str:
+  """Converts a ColumnFlag to the C++ flags which it represents
+
+  It is not valid to call this function with ColumnFlag.NONE as in this case
+  defaults for that column should be implicitly used."""
+
+  assert raw_flag != ColumnFlag.NONE
+  flags = []
+  if ColumnFlag.SORTED in raw_flag:
+    flags.append('Column::Flag::kSorted')
+  if ColumnFlag.SET_ID in raw_flag:
+    flags.append('Column::Flag::kSetId')
+  return ' | '.join(flags)
+
+
+def typed_column_type(table: Table, col: Column) -> str:
+  """Returns the TypedColumn/IdColumn C++ type for a given column."""
+
+  parsed = parse_type(table, col.type)
+  if col._is_auto_added_id:
+    return f'IdColumn<{parsed.cpp_type}>'
+  return f'TypedColumn<{parsed.cpp_type_with_optionality()}>'