blob: c3f111be377b0558fa8c423ebb75bcfef0d5b201 [file] [log] [blame]
/*
* Copyright (C) 2018 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef SRC_TRACE_PROCESSOR_SQLITE_SQL_SOURCE_H_
#define SRC_TRACE_PROCESSOR_SQLITE_SQL_SOURCE_H_
#include <cstdint>
#include <optional>
#include <string>
#include <string_view>
#include <tuple>
#include <vector>
#include "perfetto/base/logging.h"
namespace perfetto {
namespace trace_processor {
// An SQL string which retains knowledge of the source of the SQL (i.e. stdlib
// module, ExecuteQuery etc). It also supports "rewriting" parts or all of the
// SQL string with a different string which is useful in cases where SQL is
// substituted such as macros or function inlining.
class SqlSource {
public:
class Rewriter;
// Creates a SqlSource instance wrapping SQL passed to
// |TraceProcessor::ExecuteQuery|.
static SqlSource FromExecuteQuery(std::string sql);
// Creates a SqlSource instance wrapping SQL executed when running a metric.
static SqlSource FromMetric(std::string sql, const std::string& metric_file);
// Creates a SqlSource instance wrapping SQL executed when running a metric
// file (i.e. with RUN_METRIC).
static SqlSource FromMetricFile(std::string sql,
const std::string& metric_file);
// Creates a SqlSource instance wrapping SQL executed when including a module.
static SqlSource FromModuleInclude(std::string sql,
const std::string& module);
// Creates a SqlSource instance wrapping SQL which is an internal
// implementation detail of trace processor.
static SqlSource FromTraceProcessorImplementation(std::string sql);
// Returns this SqlSource instance as a string which can be appended as a
// "traceback" frame to an error message. Callers should pass an |offset|
// parameter which indicates the exact location of the error in the SQL
// string. 0 and |sql().size()| are both valid offset positions and correspond
// to the start and end of the source respectively.
//
// Specifically, this string will include:
// a) context about the source of the SQL
// b) line and column number of the error
// c) a snippet of the SQL and a caret (^) character pointing to the location
// of the error.
std::string AsTraceback(uint32_t offset) const;
// Same as |AsTraceback| but for offsets which come from SQLite instead of
// from trace processor tokenization or parsing.
std::string AsTracebackForSqliteOffset(std::optional<uint32_t> offset) const;
// Creates a SqlSource instance with the SQL taken as a substring starting
// at |offset| with |len| characters.
SqlSource Substr(uint32_t offset, uint32_t len) const;
// Rewrites the SQL backing |this| to SQL from |source| ignoring any existing
// rewrites in |this|.
//
// This is useful when PerfettoSQL statements are transpiled into SQLite
// statements but we want to preserve the context of the original statement.
SqlSource RewriteAllIgnoreExisting(SqlSource source) const;
// Returns the SQL string backing this SqlSource instance;
const std::string& sql() const { return root_.rewritten_sql; }
// Returns the original SQL string backing this SqlSource instance;
const std::string& original_sql() const { return root_.original_sql; }
// Returns whether this SqlSource has been rewritten.
bool IsRewritten() const { return root_.IsRewritten(); }
private:
struct Rewrite;
// Represents a tree of SQL rewrites, preserving the source for each rewrite.
//
// Suppose that we have the following situation:
// User: `SELECT foo!(a) FROM bar!(slice) a`
// foo : `$1.x, $1.y`
// bar : `(SELECT baz!($1) FROM $1)`
// baz : `$1.x, $1.y, $1.z`
//
// We want to expand this to
// ```SELECT a.x, a.y FROM (SELECT slice.x, slice.y, slice.z FROM slice) a```
// while retaining information about the source of the rewrite.
//
// For example, the string `a.x, a.y` came from foo, `slice.x, slice.y,
// slice.z` came from bar, which itself recursively came from baz etc.
//
// The purpose of this class is to keep track of the information required for
// this "tree" of rewrites (i.e. expansions). In the example above, the tree
// would look as follows:
// User
// / |
// foo bar
// /
// baz
//
// The properties in each of these nodes is as follows:
// User {
// original_sql: "SELECT foo!(a) FROM bar!(slice) a"
// rewritten_sql: "SELECT a.x, a.y FROM (SELECT slice.x, slice.y, slice.z
// FROM slice) a"
// rewrites: [
// {original_sql_start: 7, original_sql_end: 14, node: foo},
// {original_sql_start: 20, original_sql_end: 31, node: bar}]
// ]
// }
// foo {
// original_sql: "$1.x, $1.y"
// rewritten_sql: "a.x, a.y"
// rewrites: []
// }
// bar {
// original_sql: "(SELECT baz!($1) FROM $1 LIMIT 1)"
// rewritten_sql: "(SELECT slice.x, slice.y, slice.z FROM slice)"
// rewrites: [{original_sql_start: 8, original_sql_end: 16, node: baz}]
// }
// baz {
// original_sql = "$1.x, $1.y, $1.z"
// rewritten_sql = "slice.x, slice.y, slice.z"
// rewrites: []
// }
struct Node {
std::string name;
bool include_traceback_header = false;
uint32_t line = 1;
uint32_t col = 1;
// The original SQL string used to create this node.
std::string original_sql;
// The list of rewrites which are applied to |original_sql| ordered by the
// offsets.
std::vector<Rewrite> rewrites;
// The SQL string which is the result of applying |rewrites| to
// |original_sql|. See |SqlSource::ApplyRewrites| for details on how this is
// computed.
std::string rewritten_sql;
// Returns the "traceback" for this node and all recursive nodes. See
// |SqlSource::AsTraceback| for details.
std::string AsTraceback(uint32_t rewritten_offset) const;
// Returns the "traceback" for this node only. See |SqlSource::AsTraceback|
// for details.
std::string SelfTraceback(uint32_t rewritten_offset,
uint32_t original_offset) const;
Node Substr(uint32_t rewritten_offset, uint32_t rewritten_len) const;
bool IsRewritten() const {
PERFETTO_CHECK(rewrites.empty() == (original_sql == rewritten_sql));
return !rewrites.empty();
}
// Given a |rewritten_offset| for this node, returns the offset into the
// |original_sql| which matches that |rewritten_offset|.
//
// IMPORTANT: if |rewritten_offset| is *inside* a rewrite, the original
// offset will point to the *start of the rewrite*. For example, if
// we have:
// original_sql: "SELECT foo!(a) FROM slice a"
// rewritten_sql: "SELECT a.x, a.y FROM slice a"
// rewrites: [
// {
// original_sql_start: 7,
// original_sql_end: 14,
// rewritten_sql_start: 7,
// rewritten_sql_end: 15,
// node: foo
// }
// ]
// then:
// RewrittenOffsetToOriginalOffset(7) == 7 // 7 = start of foo
// RewrittenOffsetToOriginalOffset(14) == 7 // 7 = start of foo
// RewrittenOffsetToOriginalOffset(15) == 14 // 14 = end of foo
// RewrittenOffsetToOriginalOffset(16) == 15
uint32_t RewrittenOffsetToOriginalOffset(uint32_t rewritten_offset) const;
// Given an |original_offset| for this node, returns the index of a
// rewrite whose original range contains |original_offset|.
// Returns std::nullopt if there is no such rewrite.
std::optional<uint32_t> RewriteForOriginalOffset(
uint32_t original_offset) const;
};
// Defines a rewrite. See the documentation for |SqlSource::Node| for details
// on this.
struct Rewrite {
// The start and end offsets in |original_sql|.
uint32_t original_sql_start;
uint32_t original_sql_end;
// The start and end offsets in |rewritten_sql|.
uint32_t rewritten_sql_start;
uint32_t rewritten_sql_end;
// Node containing the SQL which replaces the segment of SQL in
// |original_sql|.
Node rewrite_node;
};
SqlSource();
explicit SqlSource(Node);
SqlSource(std::string sql, std::string name, bool include_traceback_header);
static std::string ApplyRewrites(const std::string&,
const std::vector<Rewrite>&);
Node root_;
};
// Used to rewrite a SqlSource using SQL from other SqlSources.
class SqlSource::Rewriter {
public:
// Creates a Rewriter object which can be used to rewrite the SQL backing
// |source|.
//
// Note that rewrites of portions of the SQL which have already been rewritten
// is supported but *only in limited cases*. Specifically, the new rewrite
// must not cross the boundary of any existing rewrite.
//
// For example, if we have:
// SqlSource {
// original_sql: "SELECT foo!(a) FROM bar!(slice) a"
// rewritten_sql: "SELECT a.x, a.y FROM (SELECT slice.x FROM slice) a"
// }
// then the following are valid:
// # Replaces "SELECT " with "INSERT ". Valid because it does not touch
// # any rewrite.
// Rewrite(0, 7, "INSERT ")
//
// # Replaces "a.x, a." with "a.z, ". Valid because it only touches the
// # contents of the existing "foo" rewrite.
// Rewrite(7, 14, "a.z, ")
// while the following are invalid:
// # Fails to replace "SELECT a" with "I". Invalid because it affects both
// # non-rewritten source and the "foo" rewrite.
// Rewrite(0, 8, "I")
//
// # Fails to replace "a.x, a.y FROM (" with "(". Invalid because it affects
// # the "foo" rewrite, non-rewritten source and the "bar" rewrite.
// Rewrite(7, 23, "(")
explicit Rewriter(SqlSource source);
// Replaces the SQL in |source.rewritten_sql| between |rewritten_start| and
// |rewritten_end| with the contents of |rewrite|.
//
// Note that calls to Rewrite must be monontonic and non-overlapping. i.e.
// if Rewrite(0, 10) is called, the next |rewritten_end| must be greater than
// or equal to 10.
//
// Note also that all offsets passed to this function correspond to offsets
// into |source.rewritten_sql|: past calls to rewrite do not affect future
// offsets.
void Rewrite(uint32_t rewritten_start,
uint32_t rewritten_end,
SqlSource rewrite);
// Returns the rewritten SqlSource instance.
SqlSource Build() &&;
private:
explicit Rewriter(Node);
Node orig_;
std::vector<SqlSource::Rewriter> nested_;
std::vector<SqlSource::Rewrite> non_nested_;
};
} // namespace trace_processor
} // namespace perfetto
#endif // SRC_TRACE_PROCESSOR_SQLITE_SQL_SOURCE_H_