tp: add docs generation to Python generated C++ tables

Add docs generation for Python based C++ tables and migrate the thread
and process tables to use it.

Change-Id: I30955618fbe9ec6217074fbde87484200aac935f
diff --git a/Android.bp b/Android.bp
index 345137e..000a6b4 100644
--- a/Android.bp
+++ b/Android.bp
@@ -9963,6 +9963,7 @@
     name: "perfetto_src_trace_processor_tables_tables_python_binary",
     srcs: [
         "python/generators/trace_processor_table/public.py",
+        "python/generators/trace_processor_table/util.py",
         "src/trace_processor/tables/android_tables.py",
         "src/trace_processor/tables/metadata_tables.py",
         "tools/gen_tp_table_headers.py",
diff --git a/gn/perfetto_tp_tables.gni b/gn/perfetto_tp_tables.gni
index 2cdc43e..77fd80d 100644
--- a/gn/perfetto_tp_tables.gni
+++ b/gn/perfetto_tp_tables.gni
@@ -17,6 +17,7 @@
 template("perfetto_tp_tables") {
   config_name = target_name + "_config"
   action_name = target_name
+  docs_name = target_name + "_docs"
 
   config(config_name) {
     include_dirs = [ root_gen_dir ]
@@ -46,4 +47,15 @@
       perfetto_action_type_for_generator = [ "tp_tables" ]
     }
   }
+
+  if (defined(invoker.generate_docs) && invoker.generate_docs) {
+    action(docs_name) {
+      sources = invoker.sources
+      script = "$perfetto_root_path/tools/gen_tp_table_docs.py"
+      deps = [ "$perfetto_root_path/python:trace_processor_table_generator" ]
+      outputs = [ "$target_gen_dir/$docs_name.json" ]
+      args = [ "--out" ] + rebase_path(outputs, root_build_dir) +
+             rebase_path(invoker.sources, root_build_dir)
+    }
+  }
 }
diff --git a/infra/perfetto.dev/BUILD.gn b/infra/perfetto.dev/BUILD.gn
index d452ae8..6e514f1 100644
--- a/infra/perfetto.dev/BUILD.gn
+++ b/infra/perfetto.dev/BUILD.gn
@@ -285,8 +285,13 @@
 }
 
 nodejs_script("gen_sql_tables_md") {
+  python_label = "../../src/trace_processor/tables:tables_python_docs"
+  python_docs_json = get_label_info(python_label, "target_gen_dir") + "/" +
+                     get_label_info(python_label, "name") + ".json"
+
   script = "src/gen_sql_tables_reference.js"
   inputs = src_sql_tables
+  deps = [ python_label ]
   outputs = [ sql_tables_md ]
   args = [
     "-o",
@@ -298,6 +303,10 @@
       rebase_path(file, root_build_dir),
     ]
   }
+  args += [
+    "-j",
+    rebase_path(python_docs_json, root_build_dir),
+  ]
 }
 
 md_to_html("gen_sql_tables_html") {
diff --git a/infra/perfetto.dev/build.js b/infra/perfetto.dev/build.js
index c6601cf..2b2b07f 100644
--- a/infra/perfetto.dev/build.js
+++ b/infra/perfetto.dev/build.js
@@ -61,10 +61,11 @@
   ninjaBuild();
 
   if (args.watch) {
-    watchDir('infra/perfetto.dev/src/assets');
-    watchDir('src/trace_processor/tables');
-    watchDir('protos');
     watchDir('docs');
+    watchDir('infra/perfetto.dev/src/assets');
+    watchDir('protos');
+    watchDir('python');
+    watchDir('src/trace_processor/tables');
   }
   if (args.serve) {
     startServer();
diff --git a/infra/perfetto.dev/src/gen_sql_tables_reference.js b/infra/perfetto.dev/src/gen_sql_tables_reference.js
index 516dd91..4b94cc8 100644
--- a/infra/perfetto.dev/src/gen_sql_tables_reference.js
+++ b/infra/perfetto.dev/src/gen_sql_tables_reference.js
@@ -25,6 +25,7 @@
 function singleLineComment(comment) {
   comment = comment || '';
   comment = comment.trim();
+  comment = comment.replaceAll('|', '\\|');
   comment = comment.replace(/\.\n/g, '<br>');
   comment = comment.replace(/\n/g, ' ');
   return comment;
@@ -185,6 +186,20 @@
   return tables;
 }
 
+function parseTablesInJson(filePath) {
+  return JSON.parse(fs.readFileSync(filePath, 'UTF8'));
+}
+
+function overrideCppTablesWithJsonTables(cpp, json) {
+  const out = [];
+  var jsonLookup = new Map(json.map(i => [i.name, i]));
+  for (const table of cpp) {
+    const jsonTable = jsonLookup.get(table.name);
+    out.push(jsonTable === undefined ? table : jsonTable);
+  }
+  return out;
+}
+
 
 function genLink(table) {
   return `[${table.name}](#${table.name})`;
@@ -222,15 +237,22 @@
 function main() {
   const inFile = argv['i'];
   const outFile = argv['o'];
+  const jsonFile = argv['j'];
   if (!inFile) {
-    console.error('Usage: -i hdr1.h -i hdr2.h -[-o out.md]');
+    console.error('Usage: -i hdr1.h -i hdr2.h -j tbls.json -[-o out.md]');
     process.exit(1);
   }
 
   // Can be either a string (-i single) or an array (-i one -i two).
   const inFiles = (inFile instanceof Array) ? inFile : [inFile];
+  const cppTables =
+      Array.prototype.concat(...inFiles.map(parseTablesInCppFile));
 
-  const tables = Array.prototype.concat(...inFiles.map(parseTablesInCppFile));
+  // Can be either a string (-j single) or an array (-j one -j two).
+  const jsonFiles = (jsonFile instanceof Array) ? jsonFile : [jsonFile];
+  const jsonTables =
+      Array.prototype.concat(...jsonFiles.map(parseTablesInJson));
+  const tables = overrideCppTablesWithJsonTables(cppTables, jsonTables)
 
   // Resolve parents.
   const tablesIndex = {};    // 'TP_SCHED_SLICE_TABLE_DEF' -> table
diff --git a/python/BUILD b/python/BUILD
index 915ec2f..292126f 100644
--- a/python/BUILD
+++ b/python/BUILD
@@ -61,6 +61,7 @@
     name = "trace_processor_table_generator",
     srcs = [
         "generators/trace_processor_table/public.py",
+        "generators/trace_processor_table/util.py",
     ],
 )
 
diff --git a/python/BUILD.gn b/python/BUILD.gn
index 4dc8fad..3ea6191 100644
--- a/python/BUILD.gn
+++ b/python/BUILD.gn
@@ -15,7 +15,10 @@
 import("../gn/perfetto_python.gni")
 
 perfetto_py_library("trace_processor_table_generator") {
-  sources = [ "generators/trace_processor_table/public.py" ]
+  sources = [
+    "generators/trace_processor_table/public.py",
+    "generators/trace_processor_table/util.py",
+  ]
 }
 
 perfetto_py_library("trace_processor_stdlib_docs") {
diff --git a/python/generators/trace_processor_table/public.py b/python/generators/trace_processor_table/public.py
index df69c86..6b326e8 100644
--- a/python/generators/trace_processor_table/public.py
+++ b/python/generators/trace_processor_table/public.py
@@ -87,16 +87,32 @@
 
   Attributes:
     doc: Freeform docstring for the table.
+    group: The group of tables this table belongs to. Examples include "Tracks",
+    "Events", "ART Heap Graphs" etc: see the docs page for all the existing
+    groups.
     columns: Documentation for each table column.
-    real_table_name: The real name of the table in SQL. Should be
-    specified if wrapping the table with a view.
-    group: The group of tables this table should be assciated
-    with.
+    skip_id_and_type: Skips publishing these columns in the documentation.
+    Should only be used when these columns
+    are not meaningful or are aliased to something better.
   """
   doc: str
+  group: str
   columns: Dict[str, Union[ColumnDoc, str]]
-  real_sql_name: Optional[str] = None
-  group: Optional[str] = None
+  skip_id_and_type: bool = False
+
+
+@dataclass
+class WrappingSqlView:
+  """
+  Specifies information about SQL view wrapping a table.
+
+  Useful for tables which are not exposed directly to
+  SQL but instead are wrapped with a SQL view.
+
+  Attributes:
+    view_name: The name of the SQL view exposed to SQL.
+  """
+  view_name: str
 
 
 @dataclass
@@ -108,12 +124,16 @@
     class_name: Name of the C++ table class.
     sql_name: Name of the table in SQL.
     columns: The columns in this table.
-    tabledoc: Documentation for this table.
+    wrapping_sql_view: See |WrappingSqlView|.
+    tabledoc: Documentation for this table. Can include
+    documentation overrides for auto-added columns (i.e.
+    id and type) and aliases added in |wrapping_sql_view|.
   """
   class_name: str
   sql_name: str
   columns: List[Column]
   tabledoc: TableDoc
+  wrapping_sql_view: Optional[WrappingSqlView] = None
 
 
 @dataclass
@@ -151,3 +171,13 @@
 @dataclass
 class CppSelfTableId(CppColumnType):
   """Represents the Id C++ type."""
+
+
+@dataclass
+class Alias(CppColumnType):
+  """Represents a column which aliases another column.
+
+  Aliasing refers to re-exporting a column with a different name. This is useful
+  especially for exporting "id" columns which names which associate it to the
+  table name: e.g. exporting thread.id as thread.utid"""
+  underlying_column: str
diff --git a/python/generators/trace_processor_table/util.py b/python/generators/trace_processor_table/util.py
new file mode 100644
index 0000000..38f64d1
--- /dev/null
+++ b/python/generators/trace_processor_table/util.py
@@ -0,0 +1,107 @@
+# Copyright (C) 2022 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import dataclasses
+from dataclasses import dataclass
+from typing import Optional
+
+from python.generators.trace_processor_table.public import Alias
+from python.generators.trace_processor_table.public import Column
+from python.generators.trace_processor_table.public import ColumnDoc
+from python.generators.trace_processor_table.public import ColumnFlag
+from python.generators.trace_processor_table.public import CppColumnType
+from python.generators.trace_processor_table.public import CppInt32
+from python.generators.trace_processor_table.public import CppInt64
+from python.generators.trace_processor_table.public import CppOptional
+from python.generators.trace_processor_table.public import CppSelfTableId
+from python.generators.trace_processor_table.public import CppString
+from python.generators.trace_processor_table.public import CppTableId
+from python.generators.trace_processor_table.public import CppUint32
+from python.generators.trace_processor_table.public import Table
+
+
+@dataclass()
+class ParsedType:
+  """Result of parsing a CppColumnType into its parts."""
+  cpp_type: str
+  is_optional: bool = False
+  is_alias: bool = False
+  alias_underlying_name: Optional[str] = None
+  is_self_id: bool = False
+  id_table: Optional[Table] = None
+
+
+def public_sql_name_for_table(table: Table) -> str:
+  """Extracts SQL name for the table which should be publicised."""
+
+  wrapping_view = table.wrapping_sql_view
+  return wrapping_view.view_name if wrapping_view else table.sql_name
+
+
+def parse_type(table: Table, col_type: CppColumnType) -> ParsedType:
+  """Parses a CppColumnType into its constiuient parts."""
+
+  if isinstance(col_type, CppInt64):
+    return ParsedType('int64_t')
+  if isinstance(col_type, CppInt32):
+    return ParsedType('int32_t')
+  if isinstance(col_type, CppUint32):
+    return ParsedType('uint32_t')
+  if isinstance(col_type, CppString):
+    return ParsedType('StringPool::Id')
+
+  if isinstance(col_type, Alias):
+    col = next(c for c in table.columns if c.name == col_type.underlying_column)
+    return ParsedType(
+        parse_type(table, col.type).cpp_type,
+        is_alias=True,
+        alias_underlying_name=col.name)
+
+  if isinstance(col_type, CppTableId):
+    return ParsedType(
+        f'{col_type.table.class_name}::Id', id_table=col_type.table)
+
+  if isinstance(col_type, CppSelfTableId):
+    return ParsedType(
+        f'{table.class_name}::Id', is_self_id=True, id_table=table)
+
+  if isinstance(col_type, CppOptional):
+    inner = parse_type(table, col_type.inner)
+    assert not inner.is_optional, 'Nested optional not allowed'
+    return dataclasses.replace(inner, is_optional=True)
+
+  raise Exception(f'Unknown type {col_type}')
+
+
+def augment_table_with_auto_cols(table: Table) -> Table:
+  """Adds auto-added columns (i.e. id and type) to the user defined table."""
+
+  auto_cols = [
+      Column('id', CppSelfTableId(), ColumnFlag.SORTED, _is_auto_added_id=True),
+      Column('type', CppString(), ColumnFlag.NONE, _is_auto_added_type=True),
+  ]
+  public_sql_name = public_sql_name_for_table(table)
+  new_cols_doc = {
+      'id':
+          ColumnDoc(doc=f'Unique idenitifier for this {public_sql_name}.'),
+      'type':
+          ColumnDoc(doc='''
+                The name of the "most-specific" child table containing this row.
+              '''),
+  }
+  new_cols_doc.update(table.tabledoc.columns)
+  return dataclasses.replace(
+      table,
+      columns=auto_cols + table.columns,
+      tabledoc=dataclasses.replace(table.tabledoc, columns=new_cols_doc))
diff --git a/src/trace_processor/tables/BUILD.gn b/src/trace_processor/tables/BUILD.gn
index 8a00fe3..d59f749 100644
--- a/src/trace_processor/tables/BUILD.gn
+++ b/src/trace_processor/tables/BUILD.gn
@@ -20,6 +20,7 @@
     "android_tables.py",
     "metadata_tables.py",
   ]
+  generate_docs = true
 }
 
 source_set("tables") {
diff --git a/src/trace_processor/tables/metadata_tables.py b/src/trace_processor/tables/metadata_tables.py
index 2beea03..18f0324 100644
--- a/src/trace_processor/tables/metadata_tables.py
+++ b/src/trace_processor/tables/metadata_tables.py
@@ -13,6 +13,7 @@
 # limitations under the License.
 """Contains metadata tables for a wide range of usecases."""
 
+from python.generators.trace_processor_table.public import Alias
 from python.generators.trace_processor_table.public import Column as C
 from python.generators.trace_processor_table.public import ColumnDoc
 from python.generators.trace_processor_table.public import CppInt64
@@ -23,11 +24,13 @@
 from python.generators.trace_processor_table.public import CppTableId
 from python.generators.trace_processor_table.public import CppUint32
 from python.generators.trace_processor_table.public import CppSelfTableId
+from python.generators.trace_processor_table.public import WrappingSqlView
 
 PROCESS_TABLE = Table(
     class_name='ProcessTable',
     sql_name='internal_process',
     columns=[
+        C('upid', Alias(underlying_column='id')),
         C('pid', CppUint32()),
         C('name', CppOptional(CppString())),
         C('start_ts', CppOptional(CppInt64())),
@@ -38,9 +41,11 @@
         C('cmdline', CppOptional(CppString())),
         C('arg_set_id', CppUint32()),
     ],
+    wrapping_sql_view=WrappingSqlView(view_name='process',),
     tabledoc=TableDoc(
         doc='Contains information of processes seen during the trace',
-        real_sql_name='process',
+        group='Misc',
+        skip_id_and_type=True,
         columns={
             'upid':
                 '''
@@ -94,6 +99,7 @@
     class_name='ThreadTable',
     sql_name='internal_thread',
     columns=[
+        C('utid', Alias(underlying_column='id')),
         C('tid', CppUint32()),
         C('name', CppOptional(CppString())),
         C('start_ts', CppOptional(CppInt64())),
@@ -101,47 +107,49 @@
         C('upid', CppOptional(CppTableId(PROCESS_TABLE))),
         C('is_main_thread', CppOptional(CppUint32())),
     ],
+    wrapping_sql_view=WrappingSqlView(view_name='thread',),
     tabledoc=TableDoc(
         doc='Contains information of threads seen during the trace',
-        real_sql_name='thread',
+        group='Misc',
+        skip_id_and_type=True,
         columns={
             'utid':
-                ColumnDoc('''
+                '''
                   Unique thread id. This is != the OS tid. This is a monotonic
                   number associated to each thread. The OS thread id (tid)
                   cannot be used as primary key because tids and pids are
                   recycled by most kernels.
-                '''),
+                ''',
             'tid':
-                ColumnDoc('''
+                '''
                   The OS id for this thread. Note: this is *not* unique over the
                   lifetime of the trace so cannot be used as a primary key. Use
                   |utid| instead.
-                '''),
+                ''',
             'name':
-                ColumnDoc('''
+                '''
                   The name of the thread. Can be populated from many sources
                   (e.g. ftrace, /proc scraping, track event etc).
-                '''),
+                ''',
             'start_ts':
-                ColumnDoc('''
+                '''
                   The start timestamp of this thread (if known). Is null in most
                   cases unless a thread creation event is enabled (e.g.
                   task_newtask ftrace event on Linux/Android).
-                '''),
+                ''',
             'end_ts':
-                ColumnDoc('''
+                '''
                   The end timestamp of this thread (if known). Is null in most
                   cases unless a thread destruction event is enabled (e.g.
                   sched_process_free ftrace event on Linux/Android).
-                '''),
+                ''',
             'upid':
-                ColumnDoc('The process hosting this thread.'),
+                'The process hosting this thread.',
             'is_main_thread':
-                ColumnDoc('''
+                '''
                   Boolean indicating if this thread is the main thread
                   in the process.
-                ''')
+                '''
         }))
 
 # Keep this list sorted.
diff --git a/tools/gen_tp_table_docs.py b/tools/gen_tp_table_docs.py
new file mode 100755
index 0000000..b144a94
--- /dev/null
+++ b/tools/gen_tp_table_docs.py
@@ -0,0 +1,123 @@
+#!/usr/bin/env python3
+# Copyright (C) 2022 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import argparse
+import json
+import os
+import runpy
+import sys
+from typing import Any
+from typing import Dict
+from typing import List
+from typing import Optional
+from typing import Union
+
+# Allow importing of root-relative modules.
+ROOT_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
+sys.path.append(os.path.join(ROOT_DIR))
+
+from python.generators.trace_processor_table.public import Column
+from python.generators.trace_processor_table.public import ColumnDoc
+from python.generators.trace_processor_table.public import Table
+import python.generators.trace_processor_table.util as util
+
+
+def gen_json_for_column(table: Table, col: Column,
+                        doc: Union[ColumnDoc, str]) -> Optional[Dict[str, Any]]:
+  """Generates the JSON documentation for a column in a table."""
+
+  # id and type columns should be skipped if the table specifies so.
+  is_skippable_col = col._is_auto_added_id or col._is_auto_added_type
+  if table.tabledoc.skip_id_and_type and is_skippable_col:
+    return None
+
+  # Our default assumption is the documentation for a column is a plain string
+  # so just make the comment for the column equal to that.
+
+  if isinstance(doc, ColumnDoc):
+    comment = doc.doc
+    if doc.joinable:
+      join_table, join_type = doc.joinable.split('.')
+    else:
+      join_table, join_type = None, None
+  elif isinstance(doc, str):
+    comment = doc
+    join_table, join_type = None, None
+  else:
+    raise Exception('Unknown column documentation type')
+
+  parsed_type = util.parse_type(table, col.type)
+  docs_type = parsed_type.cpp_type
+  if docs_type == 'StringPool::Id':
+    docs_type = 'string'
+
+  ref_class_name = None
+  if parsed_type.id_table and not col._is_auto_added_id:
+    id_table_name = util.public_sql_name_for_table(parsed_type.id_table)
+    ref_class_name = parsed_type.id_table.class_name
+
+    # We shouldn't really specify the join tables when it's a simple id join.
+    assert join_table is None
+    assert join_type is None
+
+    join_table = id_table_name
+    join_type = "id"
+
+  return {
+      'name': col.name,
+      'type': docs_type,
+      'comment': comment,
+      'optional': parsed_type.is_optional,
+      'refTableCppName': ref_class_name,
+      'joinTable': join_table,
+      'joinCol': join_type,
+  }
+
+
+def main():
+  parser = argparse.ArgumentParser()
+  parser.add_argument('--out', required=True)
+  parser.add_argument('inputs', nargs='*')
+  args = parser.parse_args()
+
+  tables: List[Table] = []
+  for in_path in args.inputs:
+    for table in runpy.run_path(in_path)['ALL_TABLES']:
+      tables.append(util.augment_table_with_auto_cols(table))
+
+  table_docs = []
+  for table in tables:
+    doc = table.tabledoc
+    cols = (
+        gen_json_for_column(table, c, doc.columns[c.name])
+        for c in table.columns)
+    table_docs.append({
+        'name': util.public_sql_name_for_table(table),
+        'cppClassName': table.class_name,
+        'defMacro': table.class_name,
+        'comment': doc.doc,
+        'parent': None,
+        'parentDefName': '',
+        'tablegroup': doc.group,
+        'cols': [c for c in cols if c]
+    })
+
+  with open(args.out, 'w') as out:
+    json.dump(table_docs, out, indent=2)
+    out.write('\n')
+
+
+if __name__ == '__main__':
+  exit(main())