tp: Cleanup of presubmit/docs generating code

Change-Id: I4a845ed35bf0e4db5acba6079c239c9a47c7662f
diff --git a/python/generators/stdlib_docs/extractor.py b/python/generators/sql_processing/docs_extractor.py
similarity index 91%
rename from python/generators/stdlib_docs/extractor.py
rename to python/generators/sql_processing/docs_extractor.py
index fce88f2..52fbd66 100644
--- a/python/generators/stdlib_docs/extractor.py
+++ b/python/generators/sql_processing/docs_extractor.py
@@ -17,10 +17,10 @@
 from re import Match
 from typing import List, Optional, Tuple
 
-from python.generators.stdlib_docs.utils import ObjKind
-from python.generators.stdlib_docs.utils import extract_comment
-from python.generators.stdlib_docs.utils import match_pattern
-from python.generators.stdlib_docs.utils import PATTERN_BY_KIND
+from python.generators.sql_processing.utils import ObjKind
+from python.generators.sql_processing.utils import extract_comment
+from python.generators.sql_processing.utils import match_pattern
+from python.generators.sql_processing.utils import PATTERN_BY_KIND
 
 
 class DocsExtractor:
diff --git a/python/generators/stdlib_docs/parse.py b/python/generators/sql_processing/docs_parse.py
similarity index 93%
rename from python/generators/stdlib_docs/parse.py
rename to python/generators/sql_processing/docs_parse.py
index 26b3c49..079a600 100644
--- a/python/generators/stdlib_docs/parse.py
+++ b/python/generators/sql_processing/docs_parse.py
@@ -17,18 +17,18 @@
 from dataclasses import dataclass
 import re
 import sys
-from typing import Any, Dict, List, Optional, Set, Tuple, Union
+from typing import Any, Dict, List, Optional, Set, Tuple
 
-from python.generators.stdlib_docs.extractor import DocsExtractor
-from python.generators.stdlib_docs.utils import ObjKind
-from python.generators.stdlib_docs.utils import ARG_ANNOTATION_PATTERN
-from python.generators.stdlib_docs.utils import NAME_AND_TYPE_PATTERN
-from python.generators.stdlib_docs.utils import FUNCTION_RETURN_PATTERN
-from python.generators.stdlib_docs.utils import COLUMN_ANNOTATION_PATTERN
+from python.generators.sql_processing.docs_extractor import DocsExtractor
+from python.generators.sql_processing.utils import ObjKind
+from python.generators.sql_processing.utils import ARG_ANNOTATION_PATTERN
+from python.generators.sql_processing.utils import NAME_AND_TYPE_PATTERN
+from python.generators.sql_processing.utils import FUNCTION_RETURN_PATTERN
+from python.generators.sql_processing.utils import COLUMN_ANNOTATION_PATTERN
 
 
 def is_internal(name: str) -> bool:
-  return re.match(r'^internal_.*', name, re.IGNORECASE)
+  return re.match(r'^internal_.*', name, re.IGNORECASE) is not None
 
 
 def is_snake_case(s: str) -> bool:
@@ -264,7 +264,7 @@
   def __init__(self, path: str, module: str):
     super().__init__(path, module)
 
-  def parse(self, doc: DocsExtractor.Extract) -> TableFunction:
+  def parse(self, doc: DocsExtractor.Extract) -> Optional[TableFunction]:
     self.name, args, columns, _ = doc.obj_match
 
     # Ignore internal functions.
diff --git a/python/generators/stdlib_docs/utils.py b/python/generators/sql_processing/utils.py
similarity index 72%
rename from python/generators/stdlib_docs/utils.py
rename to python/generators/sql_processing/utils.py
index bc01c32..345c800 100644
--- a/python/generators/stdlib_docs/utils.py
+++ b/python/generators/sql_processing/utils.py
@@ -16,7 +16,7 @@
 import re
 from typing import Dict, List
 
-NAME = r'[a-zA-Z_\d]+'
+NAME = r'[a-zA-Z_\d\{\}]+'
 ANY_WORDS = r'[^\s].*'
 ANY_NON_QUOTE = r'[^\']*.*'
 TYPE = r'[A-Z]+'
@@ -25,16 +25,19 @@
 
 CREATE_TABLE_VIEW_PATTERN = (
     # Match create table/view and catch type
-    fr'CREATE{WS}(?:VIRTUAL )?{WS}(TABLE|VIEW){WS}(?:IF NOT EXISTS)?{WS}'
+    fr'^CREATE{WS}(?:VIRTUAL )?{WS}(TABLE|VIEW){WS}(?:IF NOT EXISTS)?{WS}'
     # Catch the name
     fr'{WS}({NAME}){WS}(?:AS|USING)?{WS}.*')
 
+DROP_TABLE_VIEW_PATTERN = (fr'^DROP{WS}(TABLE|VIEW){WS}IF{WS}EXISTS{WS}'
+                           fr'({NAME});$')
+
 CREATE_FUNCTION_PATTERN = (
-    # Function name: we are matching everything [A-Z]* between ' and ).
+    # Function name.
     fr"CREATE{WS}PERFETTO{WS}FUNCTION{WS}({NAME}){WS}"
-    # Args: anything before closing bracket.
+    # Args: anything in the brackets.
     fr"{WS}\({WS}({ANY_WORDS}){WS}\){WS}"
-    # Type: [A-Z]* between two '.
+    # Type: word after RETURNS.
     fr"{WS}RETURNS{WS}({TYPE}){WS}AS{WS}"
     # Sql: Anything between ' and ');. We are catching \'.
     fr"{WS}({SQL});")
@@ -50,6 +53,14 @@
     # Sql: Anything between ' and ');. We are catching \'.
     fr"{WS}'{WS}({SQL}){WS}'{WS}\){WS};")
 
+COLUMN_ANNOTATION_PATTERN = fr'^\s*({NAME})\s*({ANY_WORDS})'
+
+NAME_AND_TYPE_PATTERN = fr'\s*({NAME})\s+({TYPE})\s*'
+
+ARG_ANNOTATION_PATTERN = fr'\s*{NAME_AND_TYPE_PATTERN}\s+({ANY_WORDS})'
+
+FUNCTION_RETURN_PATTERN = fr'^\s*({TYPE})\s+({ANY_WORDS})'
+
 
 class ObjKind(str, Enum):
   table_view = 'table_view'
@@ -63,13 +74,16 @@
     ObjKind.view_function: CREATE_VIEW_FUNCTION_PATTERN,
 }
 
-COLUMN_ANNOTATION_PATTERN = fr'^\s*({NAME})\s*({ANY_WORDS})'
 
-NAME_AND_TYPE_PATTERN = fr'\s*({NAME})\s+({TYPE})\s*'
-
-ARG_ANNOTATION_PATTERN = fr'\s*{NAME_AND_TYPE_PATTERN}\s+({ANY_WORDS})'
-
-FUNCTION_RETURN_PATTERN = fr'^\s*({TYPE})\s+({ANY_WORDS})'
+# Given a regex pattern and a string to match against, returns all the
+# matching positions. Specifically, it returns a dictionary from the line
+# number of the match to the regex match object.
+def match_pattern(pattern: str, file_str: str) -> Dict[int, re.Match]:
+  line_number_to_matches = {}
+  for match in re.finditer(pattern, file_str, re.MULTILINE):
+    line_id = file_str[:match.start()].count('\n')
+    line_number_to_matches[line_id] = match.groups()
+  return line_number_to_matches
 
 
 # Given a list of lines in a text and the line number, scans backwards to find
@@ -88,12 +102,25 @@
   return comments
 
 
-# Given a regex pattern and a string to match against, returns all the
-# matching positions. Specifically, it returns a dictionary from the line
-# number of the match to the regex match object.
-def match_pattern(pattern: str, file_str: str) -> Dict[int, re.Match]:
-  line_number_to_matches = {}
-  for match in re.finditer(pattern, file_str):
-    line_id = file_str[:match.start()].count('\n')
-    line_number_to_matches[line_id] = match.groups()
-  return line_number_to_matches
+# Given SQL string check whether any of the words is used, and create error
+# string if needed.
+def check_banned_words(sql: str, path: str) -> List[str]:
+  lines = [l.strip() for l in sql.split('\n')]
+  errors = []
+
+  # Ban the use of LIKE in non-comment lines.
+  for line in lines:
+    if line.startswith('--'):
+      continue
+
+    if 'like' in line.casefold():
+      errors.append(
+          'LIKE is banned in trace processor metrics. Prefer GLOB instead.\n'
+          f'Offending file: {path}\n')
+      continue
+
+    if 'create_function' in line.casefold():
+      errors.append('CREATE_FUNCTION is deprecated in trace processor. '
+                    'Prefer CREATE PERFETTO FUNCTION instead.\n'
+                    f'Offending file: {path}')
+  return errors