blob: c496b4cbe13274f6efcf79e937205772c36e5f7e [file] [log] [blame]
/*
* Copyright (C) 2023 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef SRC_TRACE_PROCESSOR_SQLITE_SQLITE_TOKENIZER_H_
#define SRC_TRACE_PROCESSOR_SQLITE_SQLITE_TOKENIZER_H_
#include <optional>
#include <string_view>
#include "src/trace_processor/sqlite/sql_source.h"
namespace perfetto {
namespace trace_processor {
// List of token types returnable by |SqliteTokenizer|
// 1:1 matches the defintions in SQLite.
enum class SqliteTokenType : uint32_t {
TK_SEMI = 1,
TK_LP = 22,
TK_RP = 23,
TK_COMMA = 25,
TK_NE = 52,
TK_EQ = 53,
TK_GT = 54,
TK_LE = 55,
TK_LT = 56,
TK_GE = 57,
TK_ID = 59,
TK_BITAND = 102,
TK_BITOR = 103,
TK_LSHIFT = 104,
TK_RSHIFT = 105,
TK_PLUS = 106,
TK_MINUS = 107,
TK_STAR = 108,
TK_SLASH = 109,
TK_REM = 110,
TK_CONCAT = 111,
TK_PTR = 112,
TK_BITNOT = 114,
TK_STRING = 117,
TK_DOT = 141,
TK_FLOAT = 153,
TK_BLOB = 154,
TK_INTEGER = 155,
TK_VARIABLE = 156,
TK_SPACE = 183,
TK_ILLEGAL = 184,
// Generic constant which replaces all the keywords in SQLite as we do not
// care about the distinguishing between the vast majority of them.
TK_GENERIC_KEYWORD = 1000,
};
// Tokenizes SQL statements according to SQLite SQL language specification:
// https://www2.sqlite.org/hlr40000.html
//
// Usage of this class:
// SqliteTokenizer tzr(std::move(my_sql_source));
// for (auto t = tzr.Next(); t.token_type != TK_SEMI; t = tzr.Next()) {
// // Handle t here
// }
class SqliteTokenizer {
public:
// A single SQL token according to the SQLite standard.
struct Token {
// The string contents of the token.
std::string_view str;
// The type of the token.
SqliteTokenType token_type = SqliteTokenType::TK_ILLEGAL;
bool operator==(const Token& o) const {
return str == o.str && token_type == o.token_type;
}
// Returns if the token is empty or semicolon.
bool IsTerminal() {
return token_type == SqliteTokenType::TK_SEMI || str.empty();
}
};
enum class EndToken {
kExclusive,
kInclusive,
};
// Creates a tokenizer which tokenizes |sql|.
explicit SqliteTokenizer(SqlSource sql);
// Returns the next SQL token.
Token Next();
// Returns the next SQL token which is not of type TK_SPACE.
Token NextNonWhitespace();
// Returns the next SQL token which is terminal.
Token NextTerminal();
// Returns an SqlSource containing all the tokens between |start| and |end|.
//
// Note: |start| and |end| must both have been previously returned by this
// tokenizer. If |end_token| == kInclusive, the end token is also included
// in the substring.
SqlSource Substr(const Token& start,
const Token& end,
EndToken end_token = EndToken::kExclusive) const;
// Returns an SqlSource containing only the SQL backing |token|.
//
// Note: |token| must have been previously returned by this tokenizer.
SqlSource SubstrToken(const Token& token) const;
// Returns a traceback error message for the SqlSource backing this tokenizer
// pointing to |token|. See SqlSource::AsTraceback for more information about
// this method.
//
// Note: |token| must have been previously returned by this tokenizer.
std::string AsTraceback(const Token&) const;
// Replaces the SQL in |rewriter| between |start| and |end| with the contents
// of |rewrite|. If |end_token| == kInclusive, the end token is also included
// in the rewrite.
void Rewrite(SqlSource::Rewriter& rewriter,
const Token& start,
const Token& end,
SqlSource rewrite,
EndToken end_token = EndToken::kExclusive) const;
// Replaces the SQL in |rewriter| backing |token| with the contents of
// |rewrite|.
void RewriteToken(SqlSource::Rewriter&,
const Token&,
SqlSource rewrite) const;
// Resets this tokenizer to tokenize |source|. Any previous returned tokens
// are invalidated.
void Reset(SqlSource source) {
source_ = std::move(source);
offset_ = 0;
}
private:
SqliteTokenizer(const SqliteTokenizer&) = delete;
SqliteTokenizer& operator=(const SqliteTokenizer&) = delete;
SqliteTokenizer(SqliteTokenizer&&) = delete;
SqliteTokenizer& operator=(SqliteTokenizer&&) = delete;
SqlSource source_;
uint32_t offset_ = 0;
};
} // namespace trace_processor
} // namespace perfetto
#endif // SRC_TRACE_PROCESSOR_SQLITE_SQLITE_TOKENIZER_H_