Lalit Maganti | 6b2ac69 | 2023-05-23 01:24:29 +0100 | [diff] [blame] | 1 | #!/usr/bin/env python3 |
| 2 | # Copyright (C) 2022 The Android Open Source Project |
| 3 | # |
| 4 | # Licensed under the Apache License, Version 2.0 (the 'License'); |
| 5 | # you may not use this file except in compliance with the License. |
| 6 | # You may obtain a copy of the License at |
| 7 | # |
| 8 | # http://www.apache.org/licenses/LICENSE-2.0 |
| 9 | # |
| 10 | # Unless required by applicable law or agreed to in writing, software |
| 11 | # distributed under the License is distributed on an 'AS IS' BASIS, |
| 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 13 | # See the License for the specific language governing permissions and |
| 14 | # limitations under the License. |
| 15 | |
| 16 | from dataclasses import dataclass |
| 17 | from re import Match |
| 18 | from typing import List, Optional, Tuple |
| 19 | |
Anna Mayzner | 1a84bf0 | 2023-08-16 09:56:21 +0000 | [diff] [blame] | 20 | from python.generators.sql_processing.utils import ObjKind |
| 21 | from python.generators.sql_processing.utils import extract_comment |
| 22 | from python.generators.sql_processing.utils import match_pattern |
| 23 | from python.generators.sql_processing.utils import PATTERN_BY_KIND |
Lalit Maganti | 6b2ac69 | 2023-05-23 01:24:29 +0100 | [diff] [blame] | 24 | |
| 25 | |
| 26 | class DocsExtractor: |
| 27 | """Extracts documentation for views/tables/functions from SQL.""" |
| 28 | path: str |
| 29 | module_name: str |
| 30 | sql: str |
| 31 | |
| 32 | @dataclass |
| 33 | class Annotation: |
| 34 | key: str |
| 35 | value: str |
| 36 | |
| 37 | @dataclass |
| 38 | class Extract: |
| 39 | """Extracted documentation for a single view/table/function.""" |
| 40 | obj_kind: ObjKind |
| 41 | obj_match: Match |
| 42 | |
| 43 | description: str |
| 44 | annotations: List['DocsExtractor.Annotation'] |
| 45 | |
| 46 | def __init__(self, path: str, module_name: str, sql: str): |
| 47 | self.path = path |
| 48 | self.module_name = module_name |
| 49 | self.sql = sql |
| 50 | |
| 51 | self.sql_lines = sql.split("\n") |
| 52 | self.errors = [] |
| 53 | |
| 54 | def extract(self) -> List[Extract]: |
| 55 | extracted = [] |
| 56 | extracted += self._extract_for_kind(ObjKind.table_view) |
| 57 | extracted += self._extract_for_kind(ObjKind.function) |
Lalit Maganti | 240a5c0 | 2023-09-25 19:24:26 +0100 | [diff] [blame] | 58 | extracted += self._extract_for_kind(ObjKind.table_function) |
Lalit Maganti | 6b2ac69 | 2023-05-23 01:24:29 +0100 | [diff] [blame] | 59 | return extracted |
| 60 | |
| 61 | def _extract_for_kind(self, kind: ObjKind) -> List[Extract]: |
| 62 | line_number_to_matches = match_pattern(PATTERN_BY_KIND[kind], self.sql) |
| 63 | extracts = [] |
| 64 | for line_number, match in sorted(list(line_number_to_matches.items())): |
| 65 | comment_lines = extract_comment(self.sql_lines, line_number) |
| 66 | e = self._extract_from_comment(kind, match, comment_lines) |
| 67 | if e: |
| 68 | extracts.append(e) |
| 69 | return extracts |
| 70 | |
| 71 | def _extract_from_comment(self, kind: ObjKind, match: Match, |
| 72 | comment_lines: List[str]) -> Optional[Extract]: |
| 73 | extract = DocsExtractor.Extract(kind, match, '', []) |
| 74 | for line in comment_lines: |
| 75 | assert line.startswith('--') |
| 76 | |
| 77 | # Remove the comment. |
| 78 | stripped = line.lstrip('--').lstrip() |
| 79 | |
| 80 | # Ignore lines which only contain '--'. |
| 81 | if not stripped: |
| 82 | continue |
| 83 | |
| 84 | # Check if the line is an annotation. |
| 85 | if not stripped.startswith('@'): |
| 86 | # We are not in annotation: if we haven't seen an annotation yet, we |
| 87 | # must be still be parsing the description. Just add to that |
| 88 | if not extract.annotations: |
| 89 | extract.description += stripped + " " |
| 90 | continue |
| 91 | |
| 92 | # Otherwise, add to the latest annotation. |
| 93 | extract.annotations[-1].value += " " + stripped |
| 94 | continue |
| 95 | |
| 96 | # This line is an annotation: find its name and add a new entry |
| 97 | annotation, rest = stripped.split(' ', 1) |
| 98 | extract.annotations.append(DocsExtractor.Annotation(annotation, rest)) |
| 99 | return extract |