blob: d2cb76f2e8adad5aa5824f28571bd62271773cc6 [file] [log] [blame]
// Copyright (c) 2019 Pantor. All rights reserved.
#ifndef INCLUDE_INJA_PARSER_HPP_
#define INCLUDE_INJA_PARSER_HPP_
#include <limits>
#include <string>
#include <utility>
#include <vector>
#include "config.hpp"
#include "exceptions.hpp"
#include "function_storage.hpp"
#include "lexer.hpp"
#include "template.hpp"
#include "node.hpp"
#include "token.hpp"
#include "utils.hpp"
#include <nlohmann/json.hpp>
namespace inja {
class ParserStatic {
ParserStatic() {
function_storage.add_builtin("at", 2, Node::Op::At);
function_storage.add_builtin("default", 2, Node::Op::Default);
function_storage.add_builtin("divisibleBy", 2, Node::Op::DivisibleBy);
function_storage.add_builtin("even", 1, Node::Op::Even);
function_storage.add_builtin("first", 1, Node::Op::First);
function_storage.add_builtin("float", 1, Node::Op::Float);
function_storage.add_builtin("int", 1, Node::Op::Int);
function_storage.add_builtin("last", 1, Node::Op::Last);
function_storage.add_builtin("length", 1, Node::Op::Length);
function_storage.add_builtin("lower", 1, Node::Op::Lower);
function_storage.add_builtin("max", 1, Node::Op::Max);
function_storage.add_builtin("min", 1, Node::Op::Min);
function_storage.add_builtin("odd", 1, Node::Op::Odd);
function_storage.add_builtin("range", 1, Node::Op::Range);
function_storage.add_builtin("round", 2, Node::Op::Round);
function_storage.add_builtin("sort", 1, Node::Op::Sort);
function_storage.add_builtin("upper", 1, Node::Op::Upper);
function_storage.add_builtin("exists", 1, Node::Op::Exists);
function_storage.add_builtin("existsIn", 2, Node::Op::ExistsInObject);
function_storage.add_builtin("isBoolean", 1, Node::Op::IsBoolean);
function_storage.add_builtin("isNumber", 1, Node::Op::IsNumber);
function_storage.add_builtin("isInteger", 1, Node::Op::IsInteger);
function_storage.add_builtin("isFloat", 1, Node::Op::IsFloat);
function_storage.add_builtin("isObject", 1, Node::Op::IsObject);
function_storage.add_builtin("isArray", 1, Node::Op::IsArray);
function_storage.add_builtin("isString", 1, Node::Op::IsString);
}
public:
ParserStatic(const ParserStatic &) = delete;
ParserStatic &operator=(const ParserStatic &) = delete;
static const ParserStatic &get_instance() {
static ParserStatic instance;
return instance;
}
FunctionStorage function_storage;
};
/*!
* \brief Class for parsing an inja Template.
*/
class Parser {
struct IfData {
using jump_t = size_t;
jump_t prev_cond_jump;
std::vector<jump_t> uncond_jumps;
explicit IfData(jump_t condJump) : prev_cond_jump(condJump) {}
};
const ParserStatic &parser_static;
const ParserConfig &config;
Lexer lexer;
TemplateStorage &template_storage;
Token tok;
Token peek_tok;
bool have_peek_tok {false};
std::vector<IfData> if_stack;
std::vector<size_t> loop_stack;
void throw_parser_error(const std::string &message) {
throw ParserError(message, lexer.current_position());
}
void get_next_token() {
if (have_peek_tok) {
tok = peek_tok;
have_peek_tok = false;
} else {
tok = lexer.scan();
}
}
void get_peek_token() {
if (!have_peek_tok) {
peek_tok = lexer.scan();
have_peek_tok = true;
}
}
public:
explicit Parser(const ParserConfig &parser_config, const LexerConfig &lexer_config,
TemplateStorage &included_templates)
: config(parser_config), lexer(lexer_config), template_storage(included_templates),
parser_static(ParserStatic::get_instance()) {}
bool parse_expression(Template &tmpl) {
if (!parse_expression_and(tmpl)) {
return false;
}
if (tok.kind != Token::Kind::Id || tok.text != static_cast<decltype(tok.text)>("or")) {
return true;
}
get_next_token();
if (!parse_expression_and(tmpl)) {
return false;
}
append_function(tmpl, Node::Op::Or, 2);
return true;
}
bool parse_expression_and(Template &tmpl) {
if (!parse_expression_not(tmpl)) {
return false;
}
if (tok.kind != Token::Kind::Id || tok.text != static_cast<decltype(tok.text)>("and")) {
return true;
}
get_next_token();
if (!parse_expression_not(tmpl)) {
return false;
}
append_function(tmpl, Node::Op::And, 2);
return true;
}
bool parse_expression_not(Template &tmpl) {
if (tok.kind == Token::Kind::Id && tok.text == static_cast<decltype(tok.text)>("not")) {
get_next_token();
if (!parse_expression_not(tmpl)) {
return false;
}
append_function(tmpl, Node::Op::Not, 1);
return true;
} else {
return parse_expression_comparison(tmpl);
}
}
bool parse_expression_comparison(Template &tmpl) {
if (!parse_expression_datum(tmpl)) {
return false;
}
Node::Op op;
switch (tok.kind) {
case Token::Kind::Id:
if (tok.text == static_cast<decltype(tok.text)>("in")) {
op = Node::Op::In;
} else {
return true;
}
break;
case Token::Kind::Equal:
op = Node::Op::Equal;
break;
case Token::Kind::GreaterThan:
op = Node::Op::Greater;
break;
case Token::Kind::LessThan:
op = Node::Op::Less;
break;
case Token::Kind::LessEqual:
op = Node::Op::LessEqual;
break;
case Token::Kind::GreaterEqual:
op = Node::Op::GreaterEqual;
break;
case Token::Kind::NotEqual:
op = Node::Op::Different;
break;
default:
return true;
}
get_next_token();
if (!parse_expression_datum(tmpl)) {
return false;
}
append_function(tmpl, op, 2);
return true;
}
bool parse_expression_datum(Template &tmpl) {
nonstd::string_view json_first;
size_t bracket_level = 0;
size_t brace_level = 0;
for (;;) {
switch (tok.kind) {
case Token::Kind::LeftParen: {
get_next_token();
if (!parse_expression(tmpl)) {
return false;
}
if (tok.kind != Token::Kind::RightParen) {
throw_parser_error("unmatched '('");
}
get_next_token();
return true;
}
case Token::Kind::Id:
get_peek_token();
if (peek_tok.kind == Token::Kind::LeftParen) {
// function call, parse arguments
Token func_token = tok;
get_next_token(); // id
get_next_token(); // leftParen
unsigned int num_args = 0;
if (tok.kind == Token::Kind::RightParen) {
// no args
get_next_token();
} else {
for (;;) {
if (!parse_expression(tmpl)) {
throw_parser_error("expected expression, got '" + tok.describe() + "'");
}
num_args += 1;
if (tok.kind == Token::Kind::RightParen) {
get_next_token();
break;
}
if (tok.kind != Token::Kind::Comma) {
throw_parser_error("expected ')' or ',', got '" + tok.describe() + "'");
}
get_next_token();
}
}
auto op = parser_static.function_storage.find_builtin(func_token.text, num_args);
if (op != Node::Op::Nop) {
// swap arguments for default(); see comment in RenderTo()
if (op == Node::Op::Default) {
std::swap(tmpl.nodes.back(), *(tmpl.nodes.rbegin() + 1));
}
append_function(tmpl, op, num_args);
return true;
} else {
append_callback(tmpl, func_token.text, num_args);
return true;
}
} else if (tok.text == static_cast<decltype(tok.text)>("true") ||
tok.text == static_cast<decltype(tok.text)>("false") ||
tok.text == static_cast<decltype(tok.text)>("null")) {
// true, false, null are json literals
if (brace_level == 0 && bracket_level == 0) {
json_first = tok.text;
goto returnJson;
}
break;
} else {
// normal literal (json read)
auto flag = config.notation == ElementNotation::Pointer ? Node::Flag::ValueLookupPointer : Node::Flag::ValueLookupDot;
tmpl.nodes.emplace_back(Node::Op::Push, tok.text, flag);
get_next_token();
return true;
}
// json passthrough
case Token::Kind::Number:
case Token::Kind::String:
if (brace_level == 0 && bracket_level == 0) {
json_first = tok.text;
goto returnJson;
}
break;
case Token::Kind::Comma:
case Token::Kind::Colon:
if (brace_level == 0 && bracket_level == 0) {
throw_parser_error("unexpected token '" + tok.describe() + "'");
}
break;
case Token::Kind::LeftBracket:
if (brace_level == 0 && bracket_level == 0) {
json_first = tok.text;
}
bracket_level += 1;
break;
case Token::Kind::LeftBrace:
if (brace_level == 0 && bracket_level == 0) {
json_first = tok.text;
}
brace_level += 1;
break;
case Token::Kind::RightBracket:
if (bracket_level == 0) {
throw_parser_error("unexpected ']'");
}
bracket_level -= 1;
if (brace_level == 0 && bracket_level == 0) {
goto returnJson;
}
break;
case Token::Kind::RightBrace:
if (brace_level == 0) {
throw_parser_error("unexpected '}'");
}
brace_level -= 1;
if (brace_level == 0 && bracket_level == 0) {
goto returnJson;
}
break;
default:
if (brace_level != 0) {
throw_parser_error("unmatched '{'");
}
if (bracket_level != 0) {
throw_parser_error("unmatched '['");
}
return false;
}
get_next_token();
}
returnJson:
// bridge across all intermediate tokens
nonstd::string_view json_text(json_first.data(), tok.text.data() - json_first.data() + tok.text.size());
tmpl.nodes.emplace_back(Node::Op::Push, json::parse(json_text), Node::Flag::ValueImmediate);
get_next_token();
return true;
}
bool parse_statement(Template &tmpl, nonstd::string_view path) {
if (tok.kind != Token::Kind::Id) {
return false;
}
if (tok.text == static_cast<decltype(tok.text)>("if")) {
get_next_token();
// evaluate expression
if (!parse_expression(tmpl)) {
return false;
}
// start a new if block on if stack
if_stack.emplace_back(static_cast<decltype(if_stack)::value_type::jump_t>(tmpl.nodes.size()));
// conditional jump; destination will be filled in by else or endif
tmpl.nodes.emplace_back(Node::Op::ConditionalJump);
} else if (tok.text == static_cast<decltype(tok.text)>("endif")) {
if (if_stack.empty()) {
throw_parser_error("endif without matching if");
}
auto &if_data = if_stack.back();
get_next_token();
// previous conditional jump jumps here
if (if_data.prev_cond_jump != std::numeric_limits<unsigned int>::max()) {
tmpl.nodes[if_data.prev_cond_jump].args = tmpl.nodes.size();
}
// update all previous unconditional jumps to here
for (size_t i : if_data.uncond_jumps) {
tmpl.nodes[i].args = tmpl.nodes.size();
}
// pop if stack
if_stack.pop_back();
} else if (tok.text == static_cast<decltype(tok.text)>("else")) {
if (if_stack.empty()) {
throw_parser_error("else without matching if");
}
auto &if_data = if_stack.back();
get_next_token();
// end previous block with unconditional jump to endif; destination will be
// filled in by endif
if_data.uncond_jumps.push_back(tmpl.nodes.size());
tmpl.nodes.emplace_back(Node::Op::Jump);
// previous conditional jump jumps here
tmpl.nodes[if_data.prev_cond_jump].args = tmpl.nodes.size();
if_data.prev_cond_jump = std::numeric_limits<unsigned int>::max();
// chained else if
if (tok.kind == Token::Kind::Id && tok.text == static_cast<decltype(tok.text)>("if")) {
get_next_token();
// evaluate expression
if (!parse_expression(tmpl)) {
return false;
}
// update "previous jump"
if_data.prev_cond_jump = tmpl.nodes.size();
// conditional jump; destination will be filled in by else or endif
tmpl.nodes.emplace_back(Node::Op::ConditionalJump);
}
} else if (tok.text == static_cast<decltype(tok.text)>("for")) {
get_next_token();
// options: for a in arr; for a, b in obj
if (tok.kind != Token::Kind::Id) {
throw_parser_error("expected id, got '" + tok.describe() + "'");
}
Token value_token = tok;
get_next_token();
Token key_token;
if (tok.kind == Token::Kind::Comma) {
get_next_token();
if (tok.kind != Token::Kind::Id) {
throw_parser_error("expected id, got '" + tok.describe() + "'");
}
key_token = std::move(value_token);
value_token = tok;
get_next_token();
}
if (tok.kind != Token::Kind::Id || tok.text != static_cast<decltype(tok.text)>("in")) {
throw_parser_error("expected 'in', got '" + tok.describe() + "'");
}
get_next_token();
if (!parse_expression(tmpl)) {
return false;
}
loop_stack.push_back(tmpl.nodes.size());
tmpl.nodes.emplace_back(Node::Op::StartLoop);
if (!key_token.text.empty()) {
tmpl.nodes.back().value = key_token.text;
}
tmpl.nodes.back().str = static_cast<std::string>(value_token.text);
tmpl.nodes.back().view = value_token.text;
} else if (tok.text == static_cast<decltype(tok.text)>("endfor")) {
get_next_token();
if (loop_stack.empty()) {
throw_parser_error("endfor without matching for");
}
// update loop with EndLoop index (for empty case)
tmpl.nodes[loop_stack.back()].args = tmpl.nodes.size();
tmpl.nodes.emplace_back(Node::Op::EndLoop);
tmpl.nodes.back().args = loop_stack.back() + 1; // loop body
loop_stack.pop_back();
} else if (tok.text == static_cast<decltype(tok.text)>("include")) {
get_next_token();
if (tok.kind != Token::Kind::String) {
throw_parser_error("expected string, got '" + tok.describe() + "'");
}
// build the relative path
json json_name = json::parse(tok.text);
std::string pathname = static_cast<std::string>(path);
pathname += json_name.get_ref<const std::string &>();
if (pathname.compare(0, 2, "./") == 0) {
pathname.erase(0, 2);
}
// sys::path::remove_dots(pathname, true, sys::path::Style::posix);
if (config.search_included_templates_in_files && template_storage.find(pathname) == template_storage.end()) {
auto include_template = Template(load_file(pathname));
template_storage.emplace(pathname, include_template);
parse_into_template(template_storage.at(pathname), pathname);
}
// generate a reference node
tmpl.nodes.emplace_back(Node::Op::Include, json(pathname), Node::Flag::ValueImmediate);
get_next_token();
} else {
return false;
}
return true;
}
void append_function(Template &tmpl, Node::Op op, unsigned int num_args) {
// we can merge with back-to-back push
if (!tmpl.nodes.empty()) {
Node &last = tmpl.nodes.back();
if (last.op == Node::Op::Push) {
last.op = op;
last.args = num_args;
return;
}
}
// otherwise just add it to the end
tmpl.nodes.emplace_back(op, num_args);
}
void append_callback(Template &tmpl, nonstd::string_view name, unsigned int num_args) {
// we can merge with back-to-back push value (not lookup)
if (!tmpl.nodes.empty()) {
Node &last = tmpl.nodes.back();
if (last.op == Node::Op::Push && (last.flags & Node::Flag::ValueMask) == Node::Flag::ValueImmediate) {
last.op = Node::Op::Callback;
last.args = num_args;
last.str = static_cast<std::string>(name);
last.view = name;
return;
}
}
// otherwise just add it to the end
tmpl.nodes.emplace_back(Node::Op::Callback, num_args);
tmpl.nodes.back().str = static_cast<std::string>(name);
tmpl.nodes.back().view = name;
}
void parse_into(Template &tmpl, nonstd::string_view path) {
lexer.start(tmpl.content);
for (;;) {
get_next_token();
switch (tok.kind) {
case Token::Kind::Eof:
if (!if_stack.empty()) {
throw_parser_error("unmatched if");
}
if (!loop_stack.empty()) {
throw_parser_error("unmatched for");
}
return;
case Token::Kind::Text:
tmpl.nodes.emplace_back(Node::Op::PrintText, tok.text, 0u);
break;
case Token::Kind::StatementOpen:
get_next_token();
if (!parse_statement(tmpl, path)) {
throw_parser_error("expected statement, got '" + tok.describe() + "'");
}
if (tok.kind != Token::Kind::StatementClose) {
throw_parser_error("expected statement close, got '" + tok.describe() + "'");
}
break;
case Token::Kind::LineStatementOpen:
get_next_token();
parse_statement(tmpl, path);
if (tok.kind != Token::Kind::LineStatementClose && tok.kind != Token::Kind::Eof) {
throw_parser_error("expected line statement close, got '" + tok.describe() + "'");
}
break;
case Token::Kind::ExpressionOpen:
get_next_token();
if (!parse_expression(tmpl)) {
throw_parser_error("expected expression, got '" + tok.describe() + "'");
}
append_function(tmpl, Node::Op::PrintValue, 1);
if (tok.kind != Token::Kind::ExpressionClose) {
throw_parser_error("expected expression close, got '" + tok.describe() + "'");
}
break;
case Token::Kind::CommentOpen:
get_next_token();
if (tok.kind != Token::Kind::CommentClose) {
throw_parser_error("expected comment close, got '" + tok.describe() + "'");
}
break;
default:
throw_parser_error("unexpected token '" + tok.describe() + "'");
break;
}
}
}
Template parse(nonstd::string_view input, nonstd::string_view path) {
auto result = Template(static_cast<std::string>(input));
parse_into(result, path);
return result;
}
Template parse(nonstd::string_view input) {
return parse(input, "./");
}
void parse_into_template(Template& tmpl, nonstd::string_view filename) {
nonstd::string_view path = filename.substr(0, filename.find_last_of("/\\") + 1);
// StringRef path = sys::path::parent_path(filename);
auto sub_parser = Parser(config, lexer.get_config(), template_storage);
sub_parser.parse_into(tmpl, path);
}
std::string load_file(nonstd::string_view filename) {
std::ifstream file = open_file_or_throw(static_cast<std::string>(filename));
std::string text((std::istreambuf_iterator<char>(file)), std::istreambuf_iterator<char>());
return text;
}
};
} // namespace inja
#endif // INCLUDE_INJA_PARSER_HPP_