| /* |
| * Copyright (C) 2018 The Android Open Source Project |
| * |
| * Licensed under the Apache License, Version 2.0 (the "License"); |
| * you may not use this file except in compliance with the License. |
| * You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| |
| #ifndef SRC_TRACE_PROCESSOR_SQLITE_SQL_SOURCE_H_ |
| #define SRC_TRACE_PROCESSOR_SQLITE_SQL_SOURCE_H_ |
| |
| #include <cstdint> |
| #include <optional> |
| #include <string> |
| #include <string_view> |
| #include <tuple> |
| #include <vector> |
| |
| #include "perfetto/base/logging.h" |
| |
| namespace perfetto { |
| namespace trace_processor { |
| |
| // An SQL string which retains knowledge of the source of the SQL (i.e. stdlib |
| // module, ExecuteQuery etc). It also supports "rewriting" parts or all of the |
| // SQL string with a different string which is useful in cases where SQL is |
| // substituted such as macros or function inlining. |
| class SqlSource { |
| public: |
| class Rewriter; |
| |
| // Creates a SqlSource instance wrapping SQL passed to |
| // |TraceProcessor::ExecuteQuery|. |
| static SqlSource FromExecuteQuery(std::string sql); |
| |
| // Creates a SqlSource instance wrapping SQL executed when running a metric. |
| static SqlSource FromMetric(std::string sql, const std::string& metric_file); |
| |
| // Creates a SqlSource instance wrapping SQL executed when running a metric |
| // file (i.e. with RUN_METRIC). |
| static SqlSource FromMetricFile(std::string sql, |
| const std::string& metric_file); |
| |
| // Creates a SqlSource instance wrapping SQL executed when including a module. |
| static SqlSource FromModuleInclude(std::string sql, |
| const std::string& module); |
| |
| // Creates a SqlSource instance wrapping SQL which is an internal |
| // implementation detail of trace processor. |
| static SqlSource FromTraceProcessorImplementation(std::string sql); |
| |
| // Returns this SqlSource instance as a string which can be appended as a |
| // "traceback" frame to an error message. Callers should pass an |offset| |
| // parameter which indicates the exact location of the error in the SQL |
| // string. 0 and |sql().size()| are both valid offset positions and correspond |
| // to the start and end of the source respectively. |
| // |
| // Specifically, this string will include: |
| // a) context about the source of the SQL |
| // b) line and column number of the error |
| // c) a snippet of the SQL and a caret (^) character pointing to the location |
| // of the error. |
| std::string AsTraceback(uint32_t offset) const; |
| |
| // Same as |AsTraceback| but for offsets which come from SQLite instead of |
| // from trace processor tokenization or parsing. |
| std::string AsTracebackForSqliteOffset(std::optional<uint32_t> offset) const; |
| |
| // Creates a SqlSource instance with the SQL taken as a substring starting |
| // at |offset| with |len| characters. |
| SqlSource Substr(uint32_t offset, uint32_t len) const; |
| |
| // Rewrites the SQL backing |this| to SQL from |source| ignoring any existing |
| // rewrites in |this|. |
| // |
| // This is useful when PerfettoSQL statements are transpiled into SQLite |
| // statements but we want to preserve the context of the original statement. |
| SqlSource RewriteAllIgnoreExisting(SqlSource source) const; |
| |
| // Returns the SQL string backing this SqlSource instance; |
| const std::string& sql() const { return root_.rewritten_sql; } |
| |
| // Returns the original SQL string backing this SqlSource instance; |
| const std::string& original_sql() const { return root_.original_sql; } |
| |
| // Returns whether this SqlSource has been rewritten. |
| bool IsRewritten() const { return root_.IsRewritten(); } |
| |
| private: |
| struct Rewrite; |
| |
| // Represents a tree of SQL rewrites, preserving the source for each rewrite. |
| // |
| // Suppose that we have the following situation: |
| // User: `SELECT foo!(a) FROM bar!(slice) a` |
| // foo : `$1.x, $1.y` |
| // bar : `(SELECT baz!($1) FROM $1)` |
| // baz : `$1.x, $1.y, $1.z` |
| // |
| // We want to expand this to |
| // ```SELECT a.x, a.y FROM (SELECT slice.x, slice.y, slice.z FROM slice) a``` |
| // while retaining information about the source of the rewrite. |
| // |
| // For example, the string `a.x, a.y` came from foo, `slice.x, slice.y, |
| // slice.z` came from bar, which itself recursively came from baz etc. |
| // |
| // The purpose of this class is to keep track of the information required for |
| // this "tree" of rewrites (i.e. expansions). In the example above, the tree |
| // would look as follows: |
| // User |
| // / | |
| // foo bar |
| // / |
| // baz |
| // |
| // The properties in each of these nodes is as follows: |
| // User { |
| // original_sql: "SELECT foo!(a) FROM bar!(slice) a" |
| // rewritten_sql: "SELECT a.x, a.y FROM (SELECT slice.x, slice.y, slice.z |
| // FROM slice) a" |
| // rewrites: [ |
| // {original_sql_start: 7, original_sql_end: 14, node: foo}, |
| // {original_sql_start: 20, original_sql_end: 31, node: bar}] |
| // ] |
| // } |
| // foo { |
| // original_sql: "$1.x, $1.y" |
| // rewritten_sql: "a.x, a.y" |
| // rewrites: [] |
| // } |
| // bar { |
| // original_sql: "(SELECT baz!($1) FROM $1 LIMIT 1)" |
| // rewritten_sql: "(SELECT slice.x, slice.y, slice.z FROM slice)" |
| // rewrites: [{original_sql_start: 8, original_sql_end: 16, node: baz}] |
| // } |
| // baz { |
| // original_sql = "$1.x, $1.y, $1.z" |
| // rewritten_sql = "slice.x, slice.y, slice.z" |
| // rewrites: [] |
| // } |
| struct Node { |
| std::string name; |
| bool include_traceback_header = false; |
| uint32_t line = 1; |
| uint32_t col = 1; |
| |
| // The original SQL string used to create this node. |
| std::string original_sql; |
| |
| // The list of rewrites which are applied to |original_sql| ordered by the |
| // offsets. |
| std::vector<Rewrite> rewrites; |
| |
| // The SQL string which is the result of applying |rewrites| to |
| // |original_sql|. See |SqlSource::ApplyRewrites| for details on how this is |
| // computed. |
| std::string rewritten_sql; |
| |
| // Returns the "traceback" for this node and all recursive nodes. See |
| // |SqlSource::AsTraceback| for details. |
| std::string AsTraceback(uint32_t rewritten_offset) const; |
| |
| // Returns the "traceback" for this node only. See |SqlSource::AsTraceback| |
| // for details. |
| std::string SelfTraceback(uint32_t rewritten_offset, |
| uint32_t original_offset) const; |
| |
| Node Substr(uint32_t rewritten_offset, uint32_t rewritten_len) const; |
| |
| bool IsRewritten() const { |
| PERFETTO_CHECK(rewrites.empty() == (original_sql == rewritten_sql)); |
| return !rewrites.empty(); |
| } |
| |
| // Given a |rewritten_offset| for this node, returns the offset into the |
| // |original_sql| which matches that |rewritten_offset|. |
| // |
| // IMPORTANT: if |rewritten_offset| is *inside* a rewrite, the original |
| // offset will point to the *start of the rewrite*. For example, if |
| // we have: |
| // original_sql: "SELECT foo!(a) FROM slice a" |
| // rewritten_sql: "SELECT a.x, a.y FROM slice a" |
| // rewrites: [ |
| // { |
| // original_sql_start: 7, |
| // original_sql_end: 14, |
| // rewritten_sql_start: 7, |
| // rewritten_sql_end: 15, |
| // node: foo |
| // } |
| // ] |
| // then: |
| // RewrittenOffsetToOriginalOffset(7) == 7 // 7 = start of foo |
| // RewrittenOffsetToOriginalOffset(14) == 7 // 7 = start of foo |
| // RewrittenOffsetToOriginalOffset(15) == 14 // 14 = end of foo |
| // RewrittenOffsetToOriginalOffset(16) == 15 |
| uint32_t RewrittenOffsetToOriginalOffset(uint32_t rewritten_offset) const; |
| |
| // Given an |original_offset| for this node, returns the index of a |
| // rewrite whose original range contains |original_offset|. |
| // Returns std::nullopt if there is no such rewrite. |
| std::optional<uint32_t> RewriteForOriginalOffset( |
| uint32_t original_offset) const; |
| }; |
| |
| // Defines a rewrite. See the documentation for |SqlSource::Node| for details |
| // on this. |
| struct Rewrite { |
| // The start and end offsets in |original_sql|. |
| uint32_t original_sql_start; |
| uint32_t original_sql_end; |
| |
| // The start and end offsets in |rewritten_sql|. |
| uint32_t rewritten_sql_start; |
| uint32_t rewritten_sql_end; |
| |
| // Node containing the SQL which replaces the segment of SQL in |
| // |original_sql|. |
| Node rewrite_node; |
| }; |
| |
| SqlSource(); |
| explicit SqlSource(Node); |
| SqlSource(std::string sql, std::string name, bool include_traceback_header); |
| |
| static std::string ApplyRewrites(const std::string&, |
| const std::vector<Rewrite>&); |
| |
| Node root_; |
| }; |
| |
| // Used to rewrite a SqlSource using SQL from other SqlSources. |
| class SqlSource::Rewriter { |
| public: |
| // Creates a Rewriter object which can be used to rewrite the SQL backing |
| // |source|. |
| // |
| // Note that rewrites of portions of the SQL which have already been rewritten |
| // is supported but *only in limited cases*. Specifically, the new rewrite |
| // must not cross the boundary of any existing rewrite. |
| // |
| // For example, if we have: |
| // SqlSource { |
| // original_sql: "SELECT foo!(a) FROM bar!(slice) a" |
| // rewritten_sql: "SELECT a.x, a.y FROM (SELECT slice.x FROM slice) a" |
| // } |
| // then the following are valid: |
| // # Replaces "SELECT " with "INSERT ". Valid because it does not touch |
| // # any rewrite. |
| // Rewrite(0, 7, "INSERT ") |
| // |
| // # Replaces "a.x, a." with "a.z, ". Valid because it only touches the |
| // # contents of the existing "foo" rewrite. |
| // Rewrite(7, 14, "a.z, ") |
| // while the following are invalid: |
| // # Fails to replace "SELECT a" with "I". Invalid because it affects both |
| // # non-rewritten source and the "foo" rewrite. |
| // Rewrite(0, 8, "I") |
| // |
| // # Fails to replace "a.x, a.y FROM (" with "(". Invalid because it affects |
| // # the "foo" rewrite, non-rewritten source and the "bar" rewrite. |
| // Rewrite(7, 23, "(") |
| explicit Rewriter(SqlSource source); |
| |
| // Replaces the SQL in |source.rewritten_sql| between |rewritten_start| and |
| // |rewritten_end| with the contents of |rewrite|. |
| // |
| // Note that calls to Rewrite must be monontonic and non-overlapping. i.e. |
| // if Rewrite(0, 10) is called, the next |rewritten_end| must be greater than |
| // or equal to 10. |
| // |
| // Note also that all offsets passed to this function correspond to offsets |
| // into |source.rewritten_sql|: past calls to rewrite do not affect future |
| // offsets. |
| void Rewrite(uint32_t rewritten_start, |
| uint32_t rewritten_end, |
| SqlSource rewrite); |
| |
| // Returns the rewritten SqlSource instance. |
| SqlSource Build() &&; |
| |
| private: |
| explicit Rewriter(Node); |
| |
| Node orig_; |
| std::vector<SqlSource::Rewriter> nested_; |
| std::vector<SqlSource::Rewrite> non_nested_; |
| }; |
| |
| } // namespace trace_processor |
| } // namespace perfetto |
| |
| #endif // SRC_TRACE_PROCESSOR_SQLITE_SQL_SOURCE_H_ |