tp: migrate ThreadTable, ProcessTable, AndroidLogsTable to Python

This CL introduces the actual conversion of Python generated tables to
C++ and migrates three of the trickier tables over

Change-Id: I443ea5ce823868ec974a442de3a55b83307ffa49
diff --git a/Android.bp b/Android.bp
index 000a6b4..50cb5bc 100644
--- a/Android.bp
+++ b/Android.bp
@@ -9933,6 +9933,34 @@
     ],
 }
 
+// GN: //src/trace_processor/tables:py_tables_unittest
+genrule {
+    name: "perfetto_src_trace_processor_tables_py_tables_unittest",
+    srcs: [
+        "src/trace_processor/tables/py_tables_unittest.py",
+    ],
+    tools: [
+        "perfetto_src_trace_processor_tables_py_tables_unittest_binary",
+    ],
+    cmd: "$(location perfetto_src_trace_processor_tables_py_tables_unittest_binary) --gen-dir=$(genDir) --inputs $(in) --outputs $(out)",
+    out: [
+        "src/trace_processor/tables/py_tables_unittest_py.h",
+    ],
+}
+
+// GN: //src/trace_processor/tables:py_tables_unittest
+python_binary_host {
+    name: "perfetto_src_trace_processor_tables_py_tables_unittest_binary",
+    srcs: [
+        "python/generators/trace_processor_table/public.py",
+        "python/generators/trace_processor_table/serialize.py",
+        "python/generators/trace_processor_table/util.py",
+        "src/trace_processor/tables/py_tables_unittest.py",
+        "tools/gen_tp_table_headers.py",
+    ],
+    main: "tools/gen_tp_table_headers.py",
+}
+
 // GN: //src/trace_processor/tables:tables
 filegroup {
     name: "perfetto_src_trace_processor_tables_tables",
@@ -9963,6 +9991,7 @@
     name: "perfetto_src_trace_processor_tables_tables_python_binary",
     srcs: [
         "python/generators/trace_processor_table/public.py",
+        "python/generators/trace_processor_table/serialize.py",
         "python/generators/trace_processor_table/util.py",
         "src/trace_processor/tables/android_tables.py",
         "src/trace_processor/tables/metadata_tables.py",
@@ -9976,6 +10005,7 @@
     name: "perfetto_src_trace_processor_tables_unittests",
     srcs: [
         "src/trace_processor/tables/macros_unittest.cc",
+        "src/trace_processor/tables/py_tables_unittest.cc",
     ],
 }
 
@@ -11684,6 +11714,7 @@
         "perfetto_src_trace_processor_metrics_gen_cc_metrics_descriptor",
         "perfetto_src_trace_processor_metrics_sql_gen_amalgamated_sql_metrics",
         "perfetto_src_trace_processor_stdlib_gen_amalgamated_stdlib",
+        "perfetto_src_trace_processor_tables_py_tables_unittest",
         "perfetto_src_trace_processor_tables_tables_python",
         "perfetto_src_traced_probes_ftrace_test_messages_cpp_gen_headers",
         "perfetto_src_traced_probes_ftrace_test_messages_lite_gen_headers",
diff --git a/infra/perfetto.dev/src/gen_sql_tables_reference.js b/infra/perfetto.dev/src/gen_sql_tables_reference.js
index 4b94cc8..9af1d32 100644
--- a/infra/perfetto.dev/src/gen_sql_tables_reference.js
+++ b/infra/perfetto.dev/src/gen_sql_tables_reference.js
@@ -192,15 +192,19 @@
 
 function overrideCppTablesWithJsonTables(cpp, json) {
   const out = [];
-  var jsonLookup = new Map(json.map(i => [i.name, i]));
+  const jsonAdded = new Set();
+  for (const table of json) {
+    out.push(table);
+    jsonAdded.add(table.name);
+  }
   for (const table of cpp) {
-    const jsonTable = jsonLookup.get(table.name);
-    out.push(jsonTable === undefined ? table : jsonTable);
+    if (!jsonAdded.has(table.name)) {
+      out.push(table);
+    }
   }
   return out;
 }
 
-
 function genLink(table) {
   return `[${table.name}](#${table.name})`;
 }
diff --git a/python/BUILD b/python/BUILD
index 292126f..4700d19 100644
--- a/python/BUILD
+++ b/python/BUILD
@@ -61,6 +61,7 @@
     name = "trace_processor_table_generator",
     srcs = [
         "generators/trace_processor_table/public.py",
+        "generators/trace_processor_table/serialize.py",
         "generators/trace_processor_table/util.py",
     ],
 )
diff --git a/python/BUILD.gn b/python/BUILD.gn
index 3ea6191..6dcf9ed 100644
--- a/python/BUILD.gn
+++ b/python/BUILD.gn
@@ -17,6 +17,7 @@
 perfetto_py_library("trace_processor_table_generator") {
   sources = [
     "generators/trace_processor_table/public.py",
+    "generators/trace_processor_table/serialize.py",
     "generators/trace_processor_table/util.py",
   ]
 }
diff --git a/python/generators/trace_processor_table/serialize.py b/python/generators/trace_processor_table/serialize.py
new file mode 100644
index 0000000..43ca5cd
--- /dev/null
+++ b/python/generators/trace_processor_table/serialize.py
@@ -0,0 +1,249 @@
+# Copyright (C) 2022 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from typing import List
+from typing import Optional
+
+from python.generators.trace_processor_table.public import Alias
+from python.generators.trace_processor_table.public import Column
+from python.generators.trace_processor_table.public import ColumnFlag
+from python.generators.trace_processor_table.public import Table
+from python.generators.trace_processor_table.util import parse_type
+from python.generators.trace_processor_table.util import typed_column_type
+from python.generators.trace_processor_table.util import to_cpp_flags
+
+
+class ColumnSerializer:
+  """Functions for serializing a single Column in a table into C++."""
+
+  def __init__(self, table: Table, col_index: int):
+    self.col_index = col_index
+    self.col = table.columns[col_index]
+    self.name = self.col.name
+    self.flags = self.col.flags
+    self.typed_column_type = typed_column_type(table, self.col)
+    self.cpp_type = parse_type(table, self.col.type).cpp_type_with_optionality()
+
+  def colindex(self) -> str:
+    return f'    static constexpr uint32_t {self.name} = {self.col_index};'
+
+  def coltype_enum(self) -> str:
+    return f'    using {self.name} = {self.typed_column_type};'
+
+  def row_field(self) -> Optional[str]:
+    if self.col._is_auto_added_id or self.col._is_auto_added_type:
+      return None
+    return f'    {self.cpp_type} {self.name};'
+
+  def row_param(self) -> Optional[str]:
+    if self.col._is_auto_added_id or self.col._is_auto_added_type:
+      return None
+    return f'{self.cpp_type} in_{self.name} = {{}}'
+
+  def row_initializer(self) -> Optional[str]:
+    if self.col._is_auto_added_id or self.col._is_auto_added_type:
+      return None
+    return f'{self.name}(std::move(in_{self.name}))'
+
+  def flag(self) -> Optional[str]:
+    if self.col._is_auto_added_id or self.col._is_auto_added_type:
+      return None
+    default = f'ColumnType::{self.name}::default_flags()'
+    if self.flags == ColumnFlag.NONE:
+      flags = default
+    else:
+      flags = f'static_cast<uint32_t>({to_cpp_flags(self.flags)}) | {default}'
+    return f'''
+      static constexpr uint32_t {self.name} = {flags};
+    '''
+
+  def storage_init(self) -> Optional[str]:
+    if self.col._is_auto_added_id or self.col._is_auto_added_type:
+      return None
+
+    storage = f'ColumnStorage<ColumnType::{self.name}::stored_type>'
+    # TODO(lalitm): add support for dense columns.
+    return f'''{self.name}_({storage}::Create<false>())'''
+
+  def column_init(self) -> Optional[str]:
+    if self.col._is_auto_added_id or self.col._is_auto_added_type:
+      return None
+    return f'''
+    columns_.emplace_back("{self.name}", &{self.name}_, ColumnFlag::{self.name},
+                          this, static_cast<uint32_t>(columns_.size()),
+                          overlay_count);
+    '''
+
+  def shrink_to_fit(self) -> Optional[str]:
+    if self.col._is_auto_added_id:
+      return None
+    return f'    {self.name}_.ShrinkToFit();'
+
+  def append(self) -> Optional[str]:
+    if self.col._is_auto_added_id or self.col._is_auto_added_type:
+      return None
+    return f'    mutable_{self.name}()->Append(std::move(row.{self.name}));'
+
+  def accessor(self) -> Optional[str]:
+    inner = f'columns_[ColumnIndex::{self.name}]'
+    return f'''
+  const {self.typed_column_type}& {self.name}() const {{
+    return static_cast<const ColumnType::{self.name}&>({inner});
+  }}
+  '''
+
+  def mutable_accessor(self) -> Optional[str]:
+    if self.col._is_auto_added_id or self.col._is_auto_added_type:
+      return None
+    return f'''
+  {self.typed_column_type}* mutable_{self.name}() {{
+    return static_cast<ColumnType::{self.name}*>(
+        &columns_[ColumnIndex::{self.name}]);
+  }}
+  '''
+
+  def storage(self) -> Optional[str]:
+    if self.col._is_auto_added_id or self.col._is_auto_added_type:
+      return None
+    name = self.name
+    return f'  ColumnStorage<ColumnType::{name}::stored_type> {name}_;'
+
+
+class TableSerializer(object):
+  """Functions for seralizing a single Table into C++."""
+
+  def __init__(self, table: Table):
+    self.table = table
+    self.table_name = table.class_name
+    self.column_serializers = [
+        ColumnSerializer(table, i) for i in range(len(table.columns))
+    ]
+
+  def foreach_col(self, serialize_fn, delimiter='\n') -> str:
+    lines = []
+    for c in self.column_serializers:
+      serialized = serialize_fn(c)
+      if serialized:
+        lines.append(serialized.lstrip('\n').rstrip())
+    return delimiter.join(lines).strip()
+
+  def id_defn(self) -> str:
+    return '''
+  struct Id : public BaseId {
+    Id() = default;
+    explicit constexpr Id(uint32_t v) : BaseId(v) {}
+  };
+  static_assert(std::is_trivially_destructible<Id>::value,
+                "Inheritance used without trivial destruction");
+    '''
+
+  def row_struct(self) -> str:
+    param = self.foreach_col(
+        ColumnSerializer.row_param, delimiter=',\n        ')
+    row_init = self.foreach_col(
+        ColumnSerializer.row_initializer, delimiter=',\n          ')
+    return f'''
+  struct Row : public macros_internal::RootParentTable::Row {{
+    Row({param})
+        : macros_internal::RootParentTable::Row(nullptr),
+          {row_init} {{
+      type_ = "{self.table.sql_name}";
+    }}
+    {self.foreach_col(ColumnSerializer.row_field)}
+  }};
+    '''
+
+  def constructor(self) -> str:
+    col_init = self.foreach_col(
+        ColumnSerializer.storage_init, delimiter=',\n        ')
+    return f'''
+  explicit {self.table_name}(StringPool* pool)
+      : macros_internal::MacroTable(pool, nullptr),
+        {col_init} {{
+    uint32_t overlay_count = static_cast<uint32_t>(overlays_.size()) - 1;
+    {self.foreach_col(ColumnSerializer.column_init)}
+  }}
+    '''
+
+  def serialize(self) -> str:
+    return f'''
+class {self.table_name} : public macros_internal::MacroTable {{
+ public:
+  {self.id_defn().lstrip()}
+  struct ColumnIndex {{
+    {self.foreach_col(ColumnSerializer.colindex)}
+  }};
+  struct ColumnType {{
+    {self.foreach_col(ColumnSerializer.coltype_enum)}
+  }};
+  {self.row_struct().strip()}
+  struct IdAndRow {{
+    uint32_t row;
+  }};
+  struct ColumnFlag {{
+    {self.foreach_col(ColumnSerializer.flag)}
+  }};
+
+  {self.constructor().strip()}
+  ~{self.table_name}() override;
+
+  static const char* Name() {{ return "{self.table.sql_name}"; }}
+
+  void ShrinkToFit() {{
+    {self.foreach_col(ColumnSerializer.shrink_to_fit)}
+  }}
+
+  IdAndRow Insert(const Row& row) {{
+    uint32_t row_number = row_count();
+    type_.Append(string_pool_->InternString(row.type()));
+    {self.foreach_col(ColumnSerializer.append)}
+    UpdateSelfOverlayAfterInsert();
+    return IdAndRow{{row_number}};
+  }}
+
+  {self.foreach_col(ColumnSerializer.accessor)}
+
+  {self.foreach_col(ColumnSerializer.mutable_accessor)}
+
+ private:
+  {self.foreach_col(ColumnSerializer.storage)}
+}};
+  '''.strip('\n')
+
+
+def serialize_header(ifdef_guard: str, tables: List[Table],
+                     include_paths: List[str]) -> str:
+  """Serializes a table header file containing the given set of tables."""
+  include_paths_str = '\n'.join([f'#include "{i}"' for i in include_paths])
+  tables_str = '\n\n'.join([TableSerializer(t).serialize() for t in tables])
+  return f'''
+#ifndef {ifdef_guard}
+#define {ifdef_guard}
+
+#include "src/trace_processor/tables/macros.h"
+
+{include_paths_str}
+
+namespace perfetto {{
+namespace trace_processor {{
+namespace tables {{
+
+{tables_str.strip()}
+
+}}  // namespace tables
+}}  // namespace trace_processor
+}}  // namespace perfetto
+
+#endif  // {ifdef_guard}
+  '''.strip()
diff --git a/python/generators/trace_processor_table/util.py b/python/generators/trace_processor_table/util.py
index 38f64d1..7acfea7 100644
--- a/python/generators/trace_processor_table/util.py
+++ b/python/generators/trace_processor_table/util.py
@@ -14,7 +14,11 @@
 
 import dataclasses
 from dataclasses import dataclass
+from typing import Dict
+from typing import List
+from typing import Set
 from typing import Optional
+from typing import Union
 
 from python.generators.trace_processor_table.public import Alias
 from python.generators.trace_processor_table.public import Column
@@ -31,7 +35,7 @@
 from python.generators.trace_processor_table.public import Table
 
 
-@dataclass()
+@dataclass
 class ParsedType:
   """Result of parsing a CppColumnType into its parts."""
   cpp_type: str
@@ -41,6 +45,24 @@
   is_self_id: bool = False
   id_table: Optional[Table] = None
 
+  def cpp_type_with_optionality(self) -> str:
+    """Returns the C++ type wrapping with base::Optional if necessary."""
+
+    # ThreadTable and ProcessTable are special for legacy reasons as they were
+    # around even before the advent of C++ macro tables. Because of this a lot
+    # of code was written assuming that upid and utid were uint32 (e.g. indexing
+    # directly into vectors using them) and it was decided this behaviour was
+    # too expensive in engineering cost to fix given the trivial benefit. For
+    # this reason, continue to maintain this illusion.
+    if self.id_table and (self.id_table.class_name == 'ThreadTable' or
+                          self.id_table.class_name == 'ProcessTable'):
+      cpp_type = 'uint32_t'
+    else:
+      cpp_type = self.cpp_type
+    if self.is_optional:
+      return f'base::Optional<{cpp_type}>'
+    return cpp_type
+
 
 def public_sql_name_for_table(table: Table) -> str:
   """Extracts SQL name for the table which should be publicised."""
@@ -92,7 +114,7 @@
       Column('type', CppString(), ColumnFlag.NONE, _is_auto_added_type=True),
   ]
   public_sql_name = public_sql_name_for_table(table)
-  new_cols_doc = {
+  new_cols_doc: Dict[str, Union[ColumnDoc, str]] = {
       'id':
           ColumnDoc(doc=f'Unique idenitifier for this {public_sql_name}.'),
       'type':
@@ -105,3 +127,71 @@
       table,
       columns=auto_cols + table.columns,
       tabledoc=dataclasses.replace(table.tabledoc, columns=new_cols_doc))
+
+
+def find_table_deps(table: Table) -> Set[str]:
+  """Finds all the other table class names this table depends on.
+
+  By "depends", we mean this table in C++ would need the dependency to be
+  defined (or included) before this table is defined."""
+  deps: Set[str] = set()
+  for c in table.columns:
+    id_table = parse_type(table, c.type).id_table
+    if id_table:
+      deps.add(id_table.class_name)
+  return deps
+
+
+def topological_sort_tables(tables: List[Table]) -> List[Table]:
+  """Topologically sorts a list of tables (i.e. dependenices appear earlier).
+
+  See [1] for information on a topological sort. We do this to allow
+  dependencies to be processed and appear ealier than their dependents.
+
+  [1] https://en.wikipedia.org/wiki/Topological_sorting"""
+  tables_by_name: dict[str, Table] = dict((t.class_name, t) for t in tables)
+  visited: Set[str] = set()
+  result: List[Table] = []
+
+  # Topological sorting is really just a DFS where we put the nodes in the list
+  # after any dependencies.
+  def dfs(table_class_name: str):
+    table = tables_by_name.get(table_class_name)
+    # If the table is not found, that might be because it's not in this list of
+    # tables. Just ignore this as its up to the caller to make sure any external
+    # deps are handled correctly.
+    if not table or table.class_name in visited:
+      return
+    visited.add(table.class_name)
+
+    for dep in find_table_deps(table):
+      dfs(dep)
+    result.append(table)
+
+  for table in tables:
+    dfs(table.class_name)
+  return result
+
+
+def to_cpp_flags(raw_flag: ColumnFlag) -> str:
+  """Converts a ColumnFlag to the C++ flags which it represents
+
+  It is not valid to call this function with ColumnFlag.NONE as in this case
+  defaults for that column should be implicitly used."""
+
+  assert raw_flag != ColumnFlag.NONE
+  flags = []
+  if ColumnFlag.SORTED in raw_flag:
+    flags.append('Column::Flag::kSorted')
+  if ColumnFlag.SET_ID in raw_flag:
+    flags.append('Column::Flag::kSetId')
+  return ' | '.join(flags)
+
+
+def typed_column_type(table: Table, col: Column) -> str:
+  """Returns the TypedColumn/IdColumn C++ type for a given column."""
+
+  parsed = parse_type(table, col.type)
+  if col._is_auto_added_id:
+    return f'IdColumn<{parsed.cpp_type}>'
+  return f'TypedColumn<{parsed.cpp_type_with_optionality()}>'
diff --git a/src/trace_processor/db/column_storage.h b/src/trace_processor/db/column_storage.h
index ac3f8a2..70c8385 100644
--- a/src/trace_processor/db/column_storage.h
+++ b/src/trace_processor/db/column_storage.h
@@ -78,7 +78,7 @@
 
   base::Optional<T> Get(uint32_t idx) const { return nv_.Get(idx); }
   void Append(T val) { nv_.Append(val); }
-  void Append(base::Optional<T> val) { nv_.Append(val); }
+  void Append(base::Optional<T> val) { nv_.Append(std::move(val)); }
   void Set(uint32_t idx, T val) { nv_.Set(idx, val); }
   uint32_t size() const { return nv_.size(); }
   bool IsDense() const { return nv_.IsDense(); }
diff --git a/src/trace_processor/storage/trace_storage.h b/src/trace_processor/storage/trace_storage.h
index 4bb97a8..325f3b0 100644
--- a/src/trace_processor/storage/trace_storage.h
+++ b/src/trace_processor/storage/trace_storage.h
@@ -862,8 +862,8 @@
   tables::ArgTable arg_table_{&string_pool_, nullptr};
 
   // Information about all the threads and processes in the trace.
-  tables::ThreadTable thread_table_{&string_pool_, nullptr};
-  tables::ProcessTable process_table_{&string_pool_, nullptr};
+  tables::ThreadTable thread_table_{&string_pool_};
+  tables::ProcessTable process_table_{&string_pool_};
   tables::FiledescriptorTable filedescriptor_table_{&string_pool_, nullptr};
 
   // Slices coming from userspace events (e.g. Chromium TRACE_EVENT macros).
@@ -899,7 +899,7 @@
 
   tables::CpuFreqTable cpu_freq_table_{&string_pool_, nullptr};
 
-  tables::AndroidLogTable android_log_table_{&string_pool_, nullptr};
+  tables::AndroidLogTable android_log_table_{&string_pool_};
 
   tables::AndroidDumpstateTable android_dumpstate_table_{&string_pool_,
                                                          nullptr};
diff --git a/src/trace_processor/tables/BUILD.gn b/src/trace_processor/tables/BUILD.gn
index d59f749..17a540b 100644
--- a/src/trace_processor/tables/BUILD.gn
+++ b/src/trace_processor/tables/BUILD.gn
@@ -46,10 +46,19 @@
   ]
 }
 
+perfetto_tp_tables("py_tables_unittest") {
+  sources = [ "py_tables_unittest.py" ]
+  generate_docs = true
+}
+
 source_set("unittests") {
   testonly = true
-  sources = [ "macros_unittest.cc" ]
+  sources = [
+    "macros_unittest.cc",
+    "py_tables_unittest.cc",
+  ]
   deps = [
+    ":py_tables_unittest",
     ":tables",
     "../../../gn:default_deps",
     "../../../gn:gtest_and_gmock",
diff --git a/src/trace_processor/tables/android_tables.h b/src/trace_processor/tables/android_tables.h
index 7184bc2..1f46c16 100644
--- a/src/trace_processor/tables/android_tables.h
+++ b/src/trace_processor/tables/android_tables.h
@@ -24,28 +24,6 @@
 namespace trace_processor {
 namespace tables {
 
-// Log entries from Android logcat.
-//
-// NOTE: this table is not sorted by timestamp. This is why we omit the
-// sorted flag on the ts column.
-//
-// @param ts timestamp of log entry.
-// @param utid thread writing the log entry {@joinable thread.utid}.
-// @param prio priority of the log. 3=DEBUG, 4=INFO, 5=WARN, 6=ERROR.
-// @param tag tag of the log entry.
-// @param msg content of the log entry.
-// @tablegroup Events
-#define PERFETTO_TP_ANDROID_LOG_TABLE_DEF(NAME, PARENT, C) \
-  NAME(AndroidLogTable, "android_logs")                    \
-  PERFETTO_TP_ROOT_TABLE(PARENT, C)                        \
-  C(int64_t, ts)                                           \
-  C(uint32_t, utid)                                        \
-  C(uint32_t, prio)                                        \
-  C(base::Optional<StringPool::Id>, tag)                   \
-  C(StringPool::Id, msg)
-
-PERFETTO_TP_TABLE(PERFETTO_TP_ANDROID_LOG_TABLE_DEF);
-
 // A table presenting all game modes and interventions
 // of games installed on the system.
 // This is generated by the game_mode_intervention data-source.
diff --git a/src/trace_processor/tables/metadata_tables.h b/src/trace_processor/tables/metadata_tables.h
index 3193d91..76768ed 100644
--- a/src/trace_processor/tables/metadata_tables.h
+++ b/src/trace_processor/tables/metadata_tables.h
@@ -60,87 +60,6 @@
 
 PERFETTO_TP_TABLE(PERFETTO_TP_METADATA_TABLE_DEF);
 
-// Contains information of threads seen during the trace
-//
-// @name thread
-// @param utid             {uint32_t} Unique thread id. This is != the OS tid.
-//                         This is a monotonic number associated to each thread.
-//                         The OS thread id (tid) cannot be used as primary key
-//                         because tids and pids are recycled by most kernels.
-// @param tid              The OS id for this thread. Note: this is *not*
-//                         unique over the lifetime of the trace so cannot be
-//                         used as a primary key. Use |utid| instead.
-// @param name             The name of the thread. Can be populated from many
-//                         sources (e.g. ftrace, /proc scraping, track event
-//                         etc).
-// @param start_ts         The start timestamp of this thread (if known). Is
-//                         null in most cases unless a thread creation event is
-//                         enabled (e.g. task_newtask ftrace event on
-//                         Linux/Android).
-// @param end_ts           The end timestamp of this thread (if known). Is
-//                         null in most cases unless a thread destruction event
-//                         is enabled (e.g. sched_process_free ftrace event on
-//                         Linux/Android).
-// @param upid             {@joinable process.upid} The process hosting this
-//                         thread.
-// @param is_main_thread   Boolean indicating if this thread is the main thread
-//                         in the process.
-#define PERFETTO_TP_THREAD_TABLE_DEF(NAME, PARENT, C) \
-  NAME(ThreadTable, "internal_thread")                \
-  PERFETTO_TP_ROOT_TABLE(PARENT, C)                   \
-  C(uint32_t, tid)                                    \
-  C(base::Optional<StringPool::Id>, name)             \
-  C(base::Optional<int64_t>, start_ts)                \
-  C(base::Optional<int64_t>, end_ts)                  \
-  C(base::Optional<uint32_t>, upid)                   \
-  C(base::Optional<uint32_t>, is_main_thread)
-
-PERFETTO_TP_TABLE(PERFETTO_TP_THREAD_TABLE_DEF);
-
-// Contains information of processes seen during the trace
-//
-// @name process
-// @param upid            {uint32_t} Unique process id. This is != the OS pid.
-//                        This is a monotonic number associated to each process.
-//                        The OS process id (pid) cannot be used as primary key
-//                        because tids and pids are recycled by most kernels.
-// @param pid             The OS id for this process. Note: this is *not*
-//                        unique over the lifetime of the trace so cannot be
-//                        used as a primary key. Use |upid| instead.
-// @param name            The name of the process. Can be populated from many
-//                        sources (e.g. ftrace, /proc scraping, track event
-//                        etc).
-// @param start_ts        The start timestamp of this process (if known). Is
-//                        null in most cases unless a process creation event is
-//                        enabled (e.g. task_newtask ftrace event on
-//                        Linux/Android).
-// @param end_ts          The end timestamp of this process (if known). Is
-//                        null in most cases unless a process destruction event
-//                        is enabled (e.g. sched_process_free ftrace event on
-//                        Linux/Android).
-// @param parent_upid     {@joinable process.upid} The upid of the process which
-//                        caused this process to be spawned.
-// @param uid             {@joinable package_list.uid} The Unix user id of the
-//                        process.
-// @param android_appid   Android appid of this process.
-// @param cmdline         /proc/cmdline for this process.
-// @param arg_set_id      {@joinable args.arg_set_id} Extra args for this
-//                        process.
-#define PERFETTO_TP_PROCESS_TABLE_DEF(NAME, PARENT, C) \
-  NAME(ProcessTable, "internal_process")               \
-  PERFETTO_TP_ROOT_TABLE(PARENT, C)                    \
-  C(uint32_t, pid)                                     \
-  C(base::Optional<StringPool::Id>, name)              \
-  C(base::Optional<int64_t>, start_ts)                 \
-  C(base::Optional<int64_t>, end_ts)                   \
-  C(base::Optional<uint32_t>, parent_upid)             \
-  C(base::Optional<uint32_t>, uid)                     \
-  C(base::Optional<uint32_t>, android_appid)           \
-  C(base::Optional<StringPool::Id>, cmdline)           \
-  C(uint32_t, arg_set_id)
-
-PERFETTO_TP_TABLE(PERFETTO_TP_PROCESS_TABLE_DEF);
-
 // Contains information of filedescriptors collected during the trace
 //
 // @name filedescriptor
diff --git a/src/trace_processor/tables/py_tables_unittest.cc b/src/trace_processor/tables/py_tables_unittest.cc
new file mode 100644
index 0000000..8966159
--- /dev/null
+++ b/src/trace_processor/tables/py_tables_unittest.cc
@@ -0,0 +1,101 @@
+/*
+ * Copyright (C) 2019 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "src/trace_processor/tables/py_tables_unittest_py.h"
+
+#include "test/gtest_and_gmock.h"
+
+namespace perfetto {
+namespace trace_processor {
+namespace tables {
+
+TestEventTable::~TestEventTable() = default;
+TestArgsTable::~TestArgsTable() = default;
+
+namespace {
+
+class PyTablesUnittest : public ::testing::Test {
+ protected:
+  StringPool pool_;
+
+  TestEventTable event_{&pool_};
+  TestArgsTable args_{&pool_};
+};
+
+TEST_F(PyTablesUnittest, EventTableProprties) {
+  ASSERT_STREQ(TestEventTable::Name(), "event");
+
+  ASSERT_EQ(TestEventTable::ColumnIndex::id, 0u);
+  ASSERT_EQ(TestEventTable::ColumnIndex::type, 1u);
+  ASSERT_EQ(TestEventTable::ColumnIndex::ts, 2u);
+  ASSERT_EQ(TestEventTable::ColumnIndex::arg_set_id, 3u);
+
+  ASSERT_EQ(TestEventTable::ColumnFlag::ts,
+            Column::Flag::kSorted | Column::Flag::kNonNull);
+  ASSERT_EQ(TestEventTable::ColumnFlag::arg_set_id, Column::Flag::kNonNull);
+}
+
+TEST_F(PyTablesUnittest, ArgsTableProprties) {
+  ASSERT_STREQ(TestArgsTable::Name(), "args");
+
+  ASSERT_EQ(TestArgsTable::ColumnIndex::id, 0u);
+  ASSERT_EQ(TestArgsTable::ColumnIndex::type, 1u);
+  ASSERT_EQ(TestArgsTable::ColumnIndex::arg_set_id, 2u);
+
+  ASSERT_EQ(TestArgsTable::ColumnFlag::arg_set_id, Column::Flag::kSorted |
+                                                       Column::Flag::kSetId |
+                                                       Column::Flag::kNonNull);
+}
+
+TEST_F(PyTablesUnittest, InsertEvent) {
+  event_.Insert(TestEventTable::Row(100, 0));
+
+  ASSERT_EQ(event_.type().GetString(0).ToStdString(), "event");
+  ASSERT_EQ(event_.ts()[0], 100);
+  ASSERT_EQ(event_.arg_set_id()[0], 0u);
+}
+
+TEST_F(PyTablesUnittest, InsertEventSpecifyCols) {
+  TestEventTable::Row row;
+  row.ts = 100;
+  row.arg_set_id = 0;
+  event_.Insert(row);
+
+  ASSERT_EQ(event_.type().GetString(0).ToStdString(), "event");
+  ASSERT_EQ(event_.ts()[0], 100);
+  ASSERT_EQ(event_.arg_set_id()[0], 0u);
+}
+
+TEST_F(PyTablesUnittest, MutableColumn) {
+  event_.Insert(TestEventTable::Row(100, 0));
+
+  ASSERT_EQ((*event_.mutable_ts())[0], 100);
+  ASSERT_EQ((*event_.mutable_arg_set_id())[0], 0u);
+}
+
+TEST_F(PyTablesUnittest, ShrinkToFit) {
+  event_.Insert(TestEventTable::Row(100, 0));
+  event_.ShrinkToFit();
+
+  // Unfortunately given the loose restrictions on shrink_to_fit provided by the
+  // standard library, we cannot really assert anything. Just call the method to
+  // ensure it doesn't cause crashes.
+}
+
+}  // namespace
+}  // namespace tables
+}  // namespace trace_processor
+}  // namespace perfetto
diff --git a/src/trace_processor/tables/py_tables_unittest.py b/src/trace_processor/tables/py_tables_unittest.py
new file mode 100644
index 0000000..d167137
--- /dev/null
+++ b/src/trace_processor/tables/py_tables_unittest.py
@@ -0,0 +1,46 @@
+# Copyright (C) 2022 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Contains tables for unittesting."""
+
+from python.generators.trace_processor_table.public import Column as C
+from python.generators.trace_processor_table.public import ColumnFlag
+from python.generators.trace_processor_table.public import CppInt64
+from python.generators.trace_processor_table.public import Table
+from python.generators.trace_processor_table.public import TableDoc
+from python.generators.trace_processor_table.public import CppUint32
+
+EVENT_TABLE = Table(
+    class_name="TestEventTable",
+    sql_name="event",
+    columns=[
+        C("ts", CppInt64(), flags=ColumnFlag.SORTED),
+        C("arg_set_id", CppUint32()),
+    ],
+    tabledoc=TableDoc(doc='', group='', columns={}))
+
+ARGS_TABLE = Table(
+    class_name="TestArgsTable",
+    sql_name="args",
+    columns=[
+        C("arg_set_id",
+          CppUint32(),
+          flags=ColumnFlag.SET_ID | ColumnFlag.SORTED),
+    ],
+    tabledoc=TableDoc(doc='', group='', columns={}))
+
+# Keep this list sorted.
+ALL_TABLES = [
+    ARGS_TABLE,
+    EVENT_TABLE,
+]
diff --git a/tools/gen_tp_table_headers.py b/tools/gen_tp_table_headers.py
index 5c91490..52ebe35 100755
--- a/tools/gen_tp_table_headers.py
+++ b/tools/gen_tp_table_headers.py
@@ -16,25 +16,50 @@
 import argparse
 from dataclasses import dataclass
 import os
+import re
 import runpy
 import sys
 from typing import List
+from typing import Set
 
 # Allow importing of root-relative modules.
 ROOT_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
 sys.path.append(os.path.join(ROOT_DIR))
 
-from python.generators.trace_processor_table.public import Table  #pylint: disable=wrong-import-position)
+#pylint: disable=wrong-import-position
+from python.generators.trace_processor_table.public import Alias
+from python.generators.trace_processor_table.public import Table
+from python.generators.trace_processor_table.serialize import serialize_header
+from python.generators.trace_processor_table.util import find_table_deps
+from python.generators.trace_processor_table.util import augment_table_with_auto_cols
+from python.generators.trace_processor_table.util import topological_sort_tables
+#pylint: enable=wrong-import-position
 
 
 @dataclass
-class GeneratorArg:
-  """Represents a Python module to be converted to a header."""
-  in_path: str
+class Header:
+  """Represents a Python module which will be converted to a header."""
   out_path: str
+  relout_path: str
   tables: List[Table]
 
 
+def normalize_table_for_serialization(table: Table) -> Table:
+  """Normalize the table for generating headers.
+
+  Normalizing = taking the table the user define and converting it into
+  the form needed by the seralizer. Speficially this means:
+  1. Adding the 'id' and 'type" columns.
+  2. Removing any alias columns (for now, these are handled in SQL not C++.
+     This may change in the future.
+  """
+  augmented = augment_table_with_auto_cols(table)
+  augmented.columns = [
+      c for c in augmented.columns if not isinstance(c.type, Alias)
+  ]
+  return augmented
+
+
 def main():
   """Main function."""
   parser = argparse.ArgumentParser()
@@ -46,14 +71,40 @@
   if len(args.inputs) != len(args.outputs):
     raise Exception('Number of inputs must match number of outputs')
 
-  gen_args = []
+  headers: List[Header] = []
   for (in_path, out_path) in zip(args.inputs, args.outputs):
     tables = runpy.run_path(in_path)['ALL_TABLES']
-    gen_args.append(GeneratorArg(in_path, out_path, tables))
+    relout_path = os.path.relpath(out_path, args.gen_dir)
+    headers.append(Header(out_path, relout_path, tables))
 
-  for arg in gen_args:
-    # TODO(lalitm): fill this header with useful content.
-    with open(arg.out_path, 'w', encoding='utf8') as out:
+  # Build a mapping from table class name to the output path of the header
+  # which will be generated for it. This is used to include one header into
+  # another for Id dependencies.
+  table_class_name_to_relout = {}
+  for header in headers:
+    for table in header.tables:
+      table_class_name_to_relout[table.class_name] = header.relout_path
+
+  for header in headers:
+    # Topologically sort the tables in this header to ensure that any deps are
+    # defined *before* the table itself.
+    sorted_tables = topological_sort_tables(
+        [normalize_table_for_serialization(table) for table in header.tables])
+
+    # Find all headers depended on by this table. These will be #include-ed when
+    # generating the header file below so ensure we remove ourself.
+    header_relout_deps: Set[str] = set()
+    for table in sorted_tables:
+      header_relout_deps.union(
+          table_class_name_to_relout[c] for c in find_table_deps(table))
+    header_relout_deps.discard(header.relout_path)
+
+    with open(header.out_path, 'w', encoding='utf8') as out:
+      ifdef_guard = re.sub(r'[^a-zA-Z0-9_-]', '_',
+                           header.relout_path).upper() + '_'
+      out.write(
+          serialize_header(ifdef_guard, sorted_tables,
+                           sorted(header_relout_deps)))
       out.write('\n')