tp: Cleanup of presubmit/docs generating code
Change-Id: I4a845ed35bf0e4db5acba6079c239c9a47c7662f
diff --git a/python/generators/stdlib_docs/extractor.py b/python/generators/sql_processing/docs_extractor.py
similarity index 91%
rename from python/generators/stdlib_docs/extractor.py
rename to python/generators/sql_processing/docs_extractor.py
index fce88f2..52fbd66 100644
--- a/python/generators/stdlib_docs/extractor.py
+++ b/python/generators/sql_processing/docs_extractor.py
@@ -17,10 +17,10 @@
from re import Match
from typing import List, Optional, Tuple
-from python.generators.stdlib_docs.utils import ObjKind
-from python.generators.stdlib_docs.utils import extract_comment
-from python.generators.stdlib_docs.utils import match_pattern
-from python.generators.stdlib_docs.utils import PATTERN_BY_KIND
+from python.generators.sql_processing.utils import ObjKind
+from python.generators.sql_processing.utils import extract_comment
+from python.generators.sql_processing.utils import match_pattern
+from python.generators.sql_processing.utils import PATTERN_BY_KIND
class DocsExtractor:
diff --git a/python/generators/stdlib_docs/parse.py b/python/generators/sql_processing/docs_parse.py
similarity index 93%
rename from python/generators/stdlib_docs/parse.py
rename to python/generators/sql_processing/docs_parse.py
index 26b3c49..079a600 100644
--- a/python/generators/stdlib_docs/parse.py
+++ b/python/generators/sql_processing/docs_parse.py
@@ -17,18 +17,18 @@
from dataclasses import dataclass
import re
import sys
-from typing import Any, Dict, List, Optional, Set, Tuple, Union
+from typing import Any, Dict, List, Optional, Set, Tuple
-from python.generators.stdlib_docs.extractor import DocsExtractor
-from python.generators.stdlib_docs.utils import ObjKind
-from python.generators.stdlib_docs.utils import ARG_ANNOTATION_PATTERN
-from python.generators.stdlib_docs.utils import NAME_AND_TYPE_PATTERN
-from python.generators.stdlib_docs.utils import FUNCTION_RETURN_PATTERN
-from python.generators.stdlib_docs.utils import COLUMN_ANNOTATION_PATTERN
+from python.generators.sql_processing.docs_extractor import DocsExtractor
+from python.generators.sql_processing.utils import ObjKind
+from python.generators.sql_processing.utils import ARG_ANNOTATION_PATTERN
+from python.generators.sql_processing.utils import NAME_AND_TYPE_PATTERN
+from python.generators.sql_processing.utils import FUNCTION_RETURN_PATTERN
+from python.generators.sql_processing.utils import COLUMN_ANNOTATION_PATTERN
def is_internal(name: str) -> bool:
- return re.match(r'^internal_.*', name, re.IGNORECASE)
+ return re.match(r'^internal_.*', name, re.IGNORECASE) is not None
def is_snake_case(s: str) -> bool:
@@ -264,7 +264,7 @@
def __init__(self, path: str, module: str):
super().__init__(path, module)
- def parse(self, doc: DocsExtractor.Extract) -> TableFunction:
+ def parse(self, doc: DocsExtractor.Extract) -> Optional[TableFunction]:
self.name, args, columns, _ = doc.obj_match
# Ignore internal functions.
diff --git a/python/generators/stdlib_docs/utils.py b/python/generators/sql_processing/utils.py
similarity index 72%
rename from python/generators/stdlib_docs/utils.py
rename to python/generators/sql_processing/utils.py
index bc01c32..345c800 100644
--- a/python/generators/stdlib_docs/utils.py
+++ b/python/generators/sql_processing/utils.py
@@ -16,7 +16,7 @@
import re
from typing import Dict, List
-NAME = r'[a-zA-Z_\d]+'
+NAME = r'[a-zA-Z_\d\{\}]+'
ANY_WORDS = r'[^\s].*'
ANY_NON_QUOTE = r'[^\']*.*'
TYPE = r'[A-Z]+'
@@ -25,16 +25,19 @@
CREATE_TABLE_VIEW_PATTERN = (
# Match create table/view and catch type
- fr'CREATE{WS}(?:VIRTUAL )?{WS}(TABLE|VIEW){WS}(?:IF NOT EXISTS)?{WS}'
+ fr'^CREATE{WS}(?:VIRTUAL )?{WS}(TABLE|VIEW){WS}(?:IF NOT EXISTS)?{WS}'
# Catch the name
fr'{WS}({NAME}){WS}(?:AS|USING)?{WS}.*')
+DROP_TABLE_VIEW_PATTERN = (fr'^DROP{WS}(TABLE|VIEW){WS}IF{WS}EXISTS{WS}'
+ fr'({NAME});$')
+
CREATE_FUNCTION_PATTERN = (
- # Function name: we are matching everything [A-Z]* between ' and ).
+ # Function name.
fr"CREATE{WS}PERFETTO{WS}FUNCTION{WS}({NAME}){WS}"
- # Args: anything before closing bracket.
+ # Args: anything in the brackets.
fr"{WS}\({WS}({ANY_WORDS}){WS}\){WS}"
- # Type: [A-Z]* between two '.
+ # Type: word after RETURNS.
fr"{WS}RETURNS{WS}({TYPE}){WS}AS{WS}"
# Sql: Anything between ' and ');. We are catching \'.
fr"{WS}({SQL});")
@@ -50,6 +53,14 @@
# Sql: Anything between ' and ');. We are catching \'.
fr"{WS}'{WS}({SQL}){WS}'{WS}\){WS};")
+COLUMN_ANNOTATION_PATTERN = fr'^\s*({NAME})\s*({ANY_WORDS})'
+
+NAME_AND_TYPE_PATTERN = fr'\s*({NAME})\s+({TYPE})\s*'
+
+ARG_ANNOTATION_PATTERN = fr'\s*{NAME_AND_TYPE_PATTERN}\s+({ANY_WORDS})'
+
+FUNCTION_RETURN_PATTERN = fr'^\s*({TYPE})\s+({ANY_WORDS})'
+
class ObjKind(str, Enum):
table_view = 'table_view'
@@ -63,13 +74,16 @@
ObjKind.view_function: CREATE_VIEW_FUNCTION_PATTERN,
}
-COLUMN_ANNOTATION_PATTERN = fr'^\s*({NAME})\s*({ANY_WORDS})'
-NAME_AND_TYPE_PATTERN = fr'\s*({NAME})\s+({TYPE})\s*'
-
-ARG_ANNOTATION_PATTERN = fr'\s*{NAME_AND_TYPE_PATTERN}\s+({ANY_WORDS})'
-
-FUNCTION_RETURN_PATTERN = fr'^\s*({TYPE})\s+({ANY_WORDS})'
+# Given a regex pattern and a string to match against, returns all the
+# matching positions. Specifically, it returns a dictionary from the line
+# number of the match to the regex match object.
+def match_pattern(pattern: str, file_str: str) -> Dict[int, re.Match]:
+ line_number_to_matches = {}
+ for match in re.finditer(pattern, file_str, re.MULTILINE):
+ line_id = file_str[:match.start()].count('\n')
+ line_number_to_matches[line_id] = match.groups()
+ return line_number_to_matches
# Given a list of lines in a text and the line number, scans backwards to find
@@ -88,12 +102,25 @@
return comments
-# Given a regex pattern and a string to match against, returns all the
-# matching positions. Specifically, it returns a dictionary from the line
-# number of the match to the regex match object.
-def match_pattern(pattern: str, file_str: str) -> Dict[int, re.Match]:
- line_number_to_matches = {}
- for match in re.finditer(pattern, file_str):
- line_id = file_str[:match.start()].count('\n')
- line_number_to_matches[line_id] = match.groups()
- return line_number_to_matches
+# Given SQL string check whether any of the words is used, and create error
+# string if needed.
+def check_banned_words(sql: str, path: str) -> List[str]:
+ lines = [l.strip() for l in sql.split('\n')]
+ errors = []
+
+ # Ban the use of LIKE in non-comment lines.
+ for line in lines:
+ if line.startswith('--'):
+ continue
+
+ if 'like' in line.casefold():
+ errors.append(
+ 'LIKE is banned in trace processor metrics. Prefer GLOB instead.\n'
+ f'Offending file: {path}\n')
+ continue
+
+ if 'create_function' in line.casefold():
+ errors.append('CREATE_FUNCTION is deprecated in trace processor. '
+ 'Prefer CREATE PERFETTO FUNCTION instead.\n'
+ f'Offending file: {path}')
+ return errors