| /* |
| * Copyright (C) 2020 The Android Open Source Project |
| * |
| * Licensed under the Apache License, Version 2.0 (the "License"); |
| * you may not use this file except in compliance with the License. |
| * You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| |
| #ifndef SRC_TRACE_PROCESSOR_UTIL_JSON_PARSER_H_ |
| #define SRC_TRACE_PROCESSOR_UTIL_JSON_PARSER_H_ |
| |
| #include <algorithm> |
| #include <cctype> |
| #include <cmath> |
| #include <cstddef> |
| #include <cstdint> |
| #include <cstring> |
| #include <limits> |
| #include <string> |
| #include <string_view> |
| #include <variant> |
| #include <vector> |
| |
| #include "perfetto/base/logging.h" |
| #include "perfetto/base/status.h" |
| #include "perfetto/ext/base/variant.h" |
| #include "perfetto/public/compiler.h" |
| |
| namespace perfetto::trace_processor::json { |
| |
| // Represents a JSON null value. |
| struct Null {}; |
| // Represents a JSON object, holding its raw string content. |
| struct Object { |
| std::string_view contents; |
| }; |
| // Represents a JSON array, holding its raw string content. |
| struct Array { |
| std::string_view contents; |
| }; |
| // A variant type representing any valid JSON value. |
| using JsonValue = |
| std::variant<Null, bool, int64_t, double, std::string_view, Object, Array>; |
| |
| namespace internal { |
| |
| // Internal return codes for parsing functions. |
| enum class ReturnCode : uint8_t { |
| kOk, |
| kError, |
| kIncompleteInput, |
| }; |
| |
| // Advances the |cur| pointer past any JSON whitespace characters. |
| // Returns false if |end| is reached before any non-whitespace character. |
| inline bool SkipWhitespace(const char*& cur, const char* end) { |
| while (cur != end && |
| (*cur == ' ' || *cur == '\t' || *cur == '\n' || *cur == '\r')) { |
| ++cur; |
| } |
| return cur != end; |
| } |
| |
| // Processes escape sequences within a string segment and appends the unescaped |
| // result to |res|. |
| // |start| and |end| define the string segment (excluding initial/final quotes). |
| // Sets |status| on error. |
| inline ReturnCode UnescapeString(const char* start, |
| const char* end, |
| std::string& res, |
| base::Status& status) { |
| PERFETTO_DCHECK(start != end); |
| // Pre-allocate string capacity, assuming most characters are not escaped. |
| res.reserve(static_cast<size_t>(end - start)); |
| for (const char* it = start; it != end; ++it) { |
| if (*it == '\\') { |
| ++it; |
| PERFETTO_DCHECK(it != end); |
| switch (*it) { |
| case '"': |
| res += '"'; |
| break; |
| case '\\': |
| res += '\\'; |
| break; |
| case '/': |
| res += '/'; |
| break; |
| case 'b': |
| res += '\b'; |
| break; |
| case 'f': |
| res += '\f'; |
| break; |
| case 'n': |
| res += '\n'; |
| break; |
| case 'r': |
| res += '\r'; |
| break; |
| case 't': |
| res += '\t'; |
| break; |
| case 'u': { |
| // Ensure 4 hex digits follow. |
| PERFETTO_DCHECK(it + 4 != end); |
| uint32_t cp = 0; |
| // Parse the 4 hex digits into a code point. |
| for (int j = 0; j < 4; ++j) { |
| char hex = *++it; |
| cp <<= 4; |
| if (hex >= '0' && hex <= '9') { |
| cp += static_cast<uint32_t>(hex - '0'); |
| } else if (hex >= 'a' && hex <= 'f') { |
| cp += static_cast<uint32_t>(hex - 'a' + 10); |
| } else if (hex >= 'A' && hex <= 'F') { |
| cp += static_cast<uint32_t>(hex - 'A' + 10); |
| } else { |
| status = base::ErrStatus("Invalid escape sequence: \\u%c%c%c%c", |
| it[-3], it[-2], it[-1], hex); |
| return ReturnCode::kError; |
| } |
| } |
| // Encode the code point as UTF-8. |
| if (cp <= 0x7F) { |
| // 1-byte sequence |
| res += static_cast<char>(cp); |
| } else if (cp <= 0x7FF) { |
| // 2-byte sequence |
| res += static_cast<char>(0xC0 | (cp >> 6)); |
| res += static_cast<char>(0x80 | (cp & 0x3F)); |
| } else if (cp <= 0xFFFF) { |
| // 3-byte sequence |
| // Check for surrogate pairs, which are not supported directly. |
| if (cp >= 0xD800 && cp <= 0xDFFF) { |
| status = base::ErrStatus( |
| "Invalid escape sequence: \\u%c%c%c%c (code point %u is " |
| "reserved for surrogate pairs)", |
| it[-3], it[-2], it[-1], *it, cp); |
| return ReturnCode::kError; |
| } |
| res += static_cast<char>(0xE0 | (cp >> 12)); |
| res += static_cast<char>(0x80 | ((cp >> 6) & 0x3F)); |
| res += static_cast<char>(0x80 | (cp & 0x3F)); |
| } else { |
| // Code points > 0xFFFF are not supported by \uXXXX in JSON |
| // (they require surrogate pairs). |
| status = base::ErrStatus( |
| "Invalid escape sequence: \\u%c%c%c%c (code point %u > 0xFFFF)", |
| it[-3], it[-2], it[-1], *it, cp); |
| return ReturnCode::kError; |
| } |
| break; |
| } |
| default: |
| // As per JSON spec, other escaped characters are themselves. |
| // However, strict parsers might error here. This one is lenient. |
| // res += *it; // This line was effectively a no-op as it was inside |
| // `default: break;` |
| break; |
| } |
| } else { |
| res += *it; |
| } |
| } |
| return ReturnCode::kOk; |
| } |
| |
| // Scans a JSON string from |start| to |end|, updating |out| to point after the |
| // closing quote. |
| // |str| will view the content of the string (without quotes). |
| // |has_escapes| is set if escape sequences are present. |
| // Sets |err| on parsing errors. |
| inline ReturnCode ScanString(const char* start, |
| const char* end, |
| const char*& out, |
| std::string_view& str, |
| bool& has_escapes, |
| base::Status& err) { |
| const char* cur = start; |
| PERFETTO_DCHECK(cur != end); |
| // Expect a string to start with a double quote. |
| if (PERFETTO_UNLIKELY(*cur != '"')) { |
| err = base::ErrStatus("Expected '\"' at the start of string. Got '%c'", |
| *start); |
| return ReturnCode::kError; |
| } |
| // Start searching for the closing quote from the character after the opening |
| // quote. |
| const char* str_start = ++cur; |
| for (;;) { |
| // Find the next double quote. |
| cur = static_cast<const char*>( |
| memchr(cur, '"', static_cast<size_t>(end - cur))); |
| // If no quote is found, the input is incomplete. |
| if (PERFETTO_UNLIKELY(!cur)) { |
| return ReturnCode::kIncompleteInput; |
| } |
| // Fast path: if the character before the quote is not a backslash, we |
| // found the closing quote. |
| if (PERFETTO_LIKELY(cur[-1] != '\\')) { |
| break; |
| } |
| // Slow path: count consecutive backslashes before the quote. If the count |
| // is even, the quote is not escaped and closes the string. If odd, the |
| // quote is escaped and we continue searching. |
| size_t backslash_count = 1; // We already know cur[-1] == '\\' |
| const char* p = cur - 2; |
| while (p >= str_start && *p == '\\') { |
| ++backslash_count; |
| --p; |
| } |
| // If the backslash count is even, the quote closes the string. |
| if ((backslash_count & 1) == 0) { |
| break; |
| } |
| // The quote is escaped, continue searching. |
| ++cur; |
| } |
| // Check if there are any backslashes in the string (indicating escapes). |
| has_escapes = |
| memchr(str_start, '\\', static_cast<size_t>(cur - str_start)) != nullptr; |
| str = std::string_view(str_start, static_cast<size_t>(cur - str_start)); |
| out = cur + 1; |
| return ReturnCode::kOk; |
| } |
| |
| // Parses a JSON string, handling escape sequences if necessary. |
| // |start| and |end| define the input buffer. |out| is updated to point after |
| // the string. |str| receives the string_view of the parsed string (potentially |
| // unescaped). |unescaped_str| is used as a buffer if unescaping is needed. Sets |
| // |status| on error. |
| inline ReturnCode ParseString(const char* start, |
| const char* end, |
| const char*& out, |
| std::string_view& str, |
| std::string& unescaped_str, |
| base::Status& status) { |
| const char* cur = start; |
| PERFETTO_DCHECK(start != end); |
| |
| bool key_has_escapes = false; |
| // First, scan the string to identify its boundaries and check for escapes. |
| if (auto e = ScanString(cur, end, cur, str, key_has_escapes, status); |
| e != ReturnCode::kOk) { |
| return e; |
| } |
| // If escape sequences were found, unescape the string. |
| if (PERFETTO_UNLIKELY(key_has_escapes)) { |
| unescaped_str.clear(); // Clear previous unescaped content. |
| if (auto e = internal::UnescapeString(str.data(), str.data() + str.size(), |
| unescaped_str, status); |
| e != ReturnCode::kOk) { |
| return e; |
| } |
| // Update |str| to point to the unescaped version. |
| str = unescaped_str; |
| } |
| out = cur; |
| return ReturnCode::kOk; |
| } |
| |
| // Scans input from |start| to |end| to find the end of a block delimited by |
| // |open_delim| and |close_delim| (e.g., '{' and '}'). |
| // Handles nested delimiters and strings correctly. |
| // |out| is updated to point after the |close_delim|. |
| // Sets |status| on error. |
| inline ReturnCode ScanToEndOfDelimitedBlock(const char* start, |
| const char* end, |
| char open_delim, |
| char close_delim, |
| const char*& out, |
| base::Status& status) { |
| PERFETTO_DCHECK(start != end); |
| PERFETTO_DCHECK(*start == open_delim); |
| |
| // Start scanning after the opening delimiter. |
| const char* cur = start + 1; |
| // Balance of open/close delimiters. |
| uint32_t bal = 1; |
| // Dummy for ScanString. |
| std::string_view sv; |
| bool has_escapes; |
| while (cur != end) { |
| char c = *cur; |
| if (c == '"') { |
| // If a string starts, scan past it. |
| if (auto e = ScanString(cur, end, cur, sv, has_escapes, status); |
| e != ReturnCode::kOk) { |
| return e; |
| } |
| } else if (c == open_delim) { |
| // Nested opening delimiter. |
| ++cur; |
| ++bal; |
| } else if (c == close_delim) { |
| // Closing delimiter. |
| ++cur; |
| if (PERFETTO_LIKELY(--bal == 0)) { |
| // If balance is zero, block end is found. |
| out = cur; |
| return ReturnCode::kOk; |
| } |
| } else { |
| // Other characters, just advance. |
| ++cur; |
| } |
| } |
| // Reached end without closing delimiter. |
| return ReturnCode::kIncompleteInput; |
| } |
| |
| // Converts a string representation of an integer to int64_t. |
| // |start| and |end| define the string segment for the number. |
| // |out| stores the parsed integer. |
| // Sets |status| on overflow or invalid format. |
| inline ReturnCode StringToInt64(const char* start, |
| const char* end, |
| int64_t& out, |
| base::Status& status) { |
| const char* cur = start; |
| PERFETTO_DCHECK(start != end); |
| |
| bool negative = false; |
| if (*cur == '-') { |
| negative = true; |
| cur++; |
| } |
| // After a potential sign, there must be at least one digit. |
| PERFETTO_DCHECK(cur != end); |
| |
| out = 0; |
| // Precompute limits for overflow checking. |
| const int64_t kAbsMaxDiv10 = std::numeric_limits<int64_t>::max() / 10; |
| const int kAbsMaxMod10 = std::numeric_limits<int64_t>::max() % 10; |
| for (; cur != end; ++cur) { |
| // Should only be called with valid digits. |
| PERFETTO_DCHECK(std::isdigit(*cur)); |
| int digit = *cur - '0'; |
| // Check for overflow before multiplication and addition. |
| if (out > kAbsMaxDiv10 || (out == kAbsMaxDiv10 && digit > kAbsMaxMod10)) { |
| // Special case for INT64_MIN, which is -(INT64_MAX + 1). |
| if (negative && out == kAbsMaxDiv10 && digit == kAbsMaxMod10 + 1) { |
| // This sequence of operations correctly forms INT64_MIN when negated |
| // later. |
| } else { |
| status = base::ErrStatus("Integer overflow parsing '%.*s'", |
| int(end - start), start); |
| return ReturnCode::kError; |
| } |
| } |
| out = out * 10 + digit; |
| } |
| if (negative) { |
| out = -out; |
| } |
| return ReturnCode::kOk; |
| } |
| |
| // Converts a string representation of a floating-point number to double. |
| // |start| and |end| define the string segment for the number. |
| // |out| stores the parsed double. |
| // Sets |status| on overflow, underflow (to 0.0), NaN, or invalid format. |
| inline ReturnCode StringToDouble(const char* start, |
| const char* end, |
| double& out, |
| base::Status& status) { |
| const char* cur = start; |
| PERFETTO_DCHECK(cur != end); |
| |
| bool negative = false; |
| if (*cur == '-') { |
| negative = true; |
| ++cur; |
| } |
| PERFETTO_DCHECK(cur != end); |
| |
| // Parse integer part. |
| int64_t int_part = 0; |
| for (; cur != end && std::isdigit(*cur); ++cur) { |
| int_part = int_part * 10 + (*cur - '0'); |
| } |
| // Parse fractional part. |
| double fraction = 0; |
| if (cur != end && *cur == '.') { |
| ++cur; |
| int64_t fract_int = 0; |
| uint64_t divisor = 1; |
| for (; cur != end && std::isdigit(*cur); ++cur) { |
| fract_int = (*cur - '0') + fract_int * 10; |
| divisor *= 10; |
| } |
| fraction = static_cast<double>(fract_int) / static_cast<double>(divisor); |
| } |
| // Parse exponent part. |
| int64_t exponent_part = 0; |
| bool exp_neg = false; |
| if (cur != end && (*cur == 'e' || *cur == 'E')) { |
| ++cur; |
| if (cur != end && (*cur == '+' || *cur == '-')) { |
| exp_neg = *cur++ == '-'; |
| } |
| PERFETTO_DCHECK(cur != end); |
| for (; cur != end && std::isdigit(*cur); ++cur) { |
| exponent_part = exponent_part * 10 + (*cur - '0'); |
| } |
| } |
| // Combine parts. |
| out = static_cast<double>(int_part) + fraction; |
| if (exp_neg) { |
| out /= std::pow(10, static_cast<double>(exponent_part)); |
| } else if (exponent_part > 0) { |
| out *= std::pow(10, static_cast<double>(exponent_part)); |
| } |
| // Check for infinity or NaN, which indicates an overflow/underflow during pow |
| // or multiplication. |
| if (std::isinf(out) || std::isnan(out)) { |
| status = base::ErrStatus("Double overflow/underflow parsing '%.*s'", |
| int(end - start), start); |
| return ReturnCode::kError; |
| } |
| out = negative ? -out : out; |
| return ReturnCode::kOk; |
| } |
| |
| // Parses a JSON number, which can be an integer or a double. |
| // |start| and |end| define the input buffer, |cur| points to the start of the |
| // number. |out| is updated to point after the parsed number. |out_num| stores |
| // the parsed JsonValue (either int64_t or double). Sets |status| on error. |
| inline ReturnCode ParseNumber(const char* start, |
| const char* end, |
| const char*& out, |
| JsonValue& out_num, |
| base::Status& status) { |
| const char* cur = start; |
| PERFETTO_DCHECK(cur != end); |
| |
| bool is_int_like = true; |
| // Skip optional minus sign. |
| cur += *cur == '-'; |
| // Handle leading zero: only allowed if it's the only digit before '.', 'e', |
| // or end. |
| if (cur != end && *cur == '0') { |
| ++cur; |
| // "01" is invalid. |
| if (cur != end && std::isdigit(*cur)) { |
| status = base::ErrStatus("Invalid number: leading zero in '%.*s'", |
| int(end - start), start); |
| return ReturnCode::kError; |
| } |
| } else if (cur != end && *cur >= '1' && *cur <= '9') { |
| ++cur; |
| while (cur != end && std::isdigit(*cur)) { |
| ++cur; |
| } |
| } else if (cur != end) { |
| status = base::ErrStatus("Invalid number: expected digit in '%.*s'", |
| int(end - start), start); |
| return ReturnCode::kError; |
| } |
| // Check for fractional part. |
| if (cur != end && *cur == '.') { |
| is_int_like = false; |
| const char* frac_start_pos = ++cur; |
| while (cur != end && std::isdigit(*cur)) { |
| ++cur; |
| } |
| // Must have at least one digit after '.'. |
| if (cur != end && cur == frac_start_pos) { |
| status = |
| base::ErrStatus("Invalid number: expected digit after '.' in '%.*s'", |
| int(end - start), start); |
| return ReturnCode::kError; |
| } |
| } |
| // Check for exponent part. |
| if (cur != end && (*cur == 'e' || *cur == 'E')) { |
| is_int_like = false; |
| ++cur; |
| // Optional sign for exponent. |
| cur += cur != end && (*cur == '+' || *cur == '-'); |
| const char* exp_start_pos = cur; |
| while (cur != end && std::isdigit(*cur)) { |
| ++cur; |
| } |
| // Must have at least one digit after 'e' or 'E' (and optional sign). |
| if (cur != end && cur == exp_start_pos) { |
| status = |
| base::ErrStatus("Invalid number: expected digit after 'e' in '%.*s'", |
| int(end - start), start); |
| return ReturnCode::kError; |
| } |
| } |
| // If end is reached before any non-numeric character, input is incomplete. |
| if (PERFETTO_UNLIKELY(cur == end)) { |
| return ReturnCode::kIncompleteInput; |
| } |
| |
| // Attempt to parse as int64_t if it looked like an integer. |
| if (is_int_like) { |
| int64_t i_val; |
| // The segment [start, cur) contains the number string. |
| if (auto e = StringToInt64(start, cur, i_val, status); |
| e == ReturnCode::kOk) { |
| out_num = i_val; |
| out = cur; |
| return ReturnCode::kOk; |
| } |
| // If StringToInt64 failed (e.g. overflow), status is already set. |
| // We might still try to parse as double if it's a large integer. |
| // JSON spec doesn't limit integer precision, but we store as int64 or |
| // double. If it overflows int64, it MUST be parsed as double. |
| } |
| |
| // Parse as double (either because it wasn't int-like or int parsing |
| // failed/overflowed). |
| double d_val; |
| if (auto e = StringToDouble(start, cur, d_val, status); |
| e != ReturnCode::kOk) { |
| // If StringToInt64 failed AND StringToDouble failed, return the error from |
| // StringToDouble. |
| return e; |
| } |
| out_num = d_val; |
| out = cur; |
| return ReturnCode::kOk; |
| } |
| |
| } // namespace internal |
| |
| // Public return codes for the Iterator. |
| enum class ReturnCode : uint8_t { |
| kOk = uint8_t(internal::ReturnCode::kOk), |
| kError = uint8_t(internal::ReturnCode::kError), |
| kIncompleteInput = uint8_t(internal::ReturnCode::kIncompleteInput), |
| // Indicates the end of the current JSON object or array scope. |
| kEndOfScope = 3, |
| }; |
| |
| // Parses the next JSON value from the input stream. |
| // |cur| is an in/out parameter pointing to the current position in the buffer. |
| // |end| points to the end of the buffer. |
| // |value| stores the parsed JsonValue. |
| // |unescaped_str| is a buffer for unescaping strings. |
| // Sets |status| on error. |
| inline ReturnCode ParseValue(const char*& cur, |
| const char* end, |
| JsonValue& value, |
| std::string& unescaped_str, |
| base::Status& status) { |
| const char* start = cur; |
| PERFETTO_CHECK(start != end); |
| switch (*cur) { |
| case '{': { |
| auto e = internal::ScanToEndOfDelimitedBlock(start, end, '{', '}', cur, |
| status); |
| value = Object{std::string_view(start, static_cast<size_t>(cur - start))}; |
| return static_cast<ReturnCode>(e); |
| } |
| case '[': { |
| auto e = internal::ScanToEndOfDelimitedBlock(start, end, '[', ']', cur, |
| status); |
| value = Array{std::string_view(start, static_cast<size_t>(cur - start))}; |
| return static_cast<ReturnCode>(e); |
| } |
| case '"': |
| value = std::string_view(); |
| return static_cast<ReturnCode>(internal::ParseString( |
| start, end, cur, base::unchecked_get<std::string_view>(value), |
| unescaped_str, status)); |
| case 't': |
| if (static_cast<size_t>(end - start) < 4) { |
| return ReturnCode::kIncompleteInput; |
| } |
| if (std::string_view(start, 4) != "true") { |
| status = |
| base::ErrStatus("Invalid token: expected 'true' but got '%.*s'", |
| std::min(4, static_cast<int>(end - start)), start); |
| return ReturnCode::kError; |
| } |
| cur += 4; |
| value = true; |
| return ReturnCode::kOk; |
| case 'f': |
| if (static_cast<size_t>(end - start) < 5) { |
| return ReturnCode::kIncompleteInput; |
| } |
| if (std::string_view(start, 5) != "false") { |
| status = |
| base::ErrStatus("Invalid token: expected 'false' but got '%.*s'", |
| std::min(5, static_cast<int>(end - start)), start); |
| return ReturnCode::kError; |
| } |
| cur += 5; |
| value = false; |
| return ReturnCode::kOk; |
| case 'n': |
| if (static_cast<size_t>(end - start) < 4) { |
| return ReturnCode::kIncompleteInput; |
| } |
| if (std::string_view(start, 4) != "null") { |
| status = |
| base::ErrStatus("Invalid token: expected 'null' but got '%.*s'", |
| std::min(4, static_cast<int>(end - start)), start); |
| return ReturnCode::kError; |
| } |
| cur += 4; |
| value = Null{}; |
| return ReturnCode::kOk; |
| default: |
| return static_cast<ReturnCode>( |
| internal::ParseNumber(start, end, cur, value, status)); |
| } |
| } |
| |
| // An iterator-style parser for JSON. |
| // Allows for token-by-token processing of a JSON structure. |
| class Iterator { |
| public: |
| // Type of JSON structure currently being parsed (object or array). |
| enum class ParseType : uint8_t { |
| kObject, |
| kArray, |
| }; |
| |
| // Resets the iterator to parse a new JSON string. |
| // |begin| and |end| define the JSON string to be parsed. |
| void Reset(const char* begin, const char* end) { |
| cur_ = begin; |
| end_ = end; |
| parse_stack_.clear(); |
| status_ = base::OkStatus(); |
| } |
| |
| // Initializes parsing. Expects the input to start with '{' or '['. |
| // Returns true on success, false on failure (e.g., not starting with { or [). |
| bool ParseStart() { |
| const char* cur = cur_; |
| // Skip any leading whitespace. |
| if (PERFETTO_UNLIKELY(!internal::SkipWhitespace(cur, end_))) { |
| // Reached end of input while expecting '{' or '['. |
| status_ = base::ErrStatus( |
| "Expected '{' or '[' at the start. Input is empty or whitespace " |
| "only."); |
| return false; |
| } |
| // Determine if it's an object or array and push to stack. |
| if (*cur == '{') { |
| parse_stack_.push_back(ParseType::kObject); |
| } else if (*cur == '[') { |
| parse_stack_.push_back(ParseType::kArray); |
| } else { |
| status_ = |
| base::ErrStatus("Expected '{' or '[' at the start. Got '%c'", *cur); |
| return false; |
| } |
| // Skip whitespace after the opening bracket. |
| if (PERFETTO_UNLIKELY(!internal::SkipWhitespace(++cur, end_))) { |
| return false; |
| } |
| cur_ = cur; |
| return true; |
| } |
| |
| // Parses the next key-value field in an object without recursing into nested |
| // objects/arrays. Assumes the iterator is currently inside an object. The |
| // parsed key is available via `key()` and value via `value()`. Returns kOk on |
| // success, kEndOfScope if '}' is found, or an error code. |
| ReturnCode ParseObjectFieldWithoutRecursing() { |
| PERFETTO_DCHECK(!parse_stack_.empty()); |
| PERFETTO_DCHECK(parse_stack_.back() == ParseType::kObject); |
| const char* cur = cur_; |
| // Check for the end of the object. |
| if (PERFETTO_UNLIKELY(*cur == '}')) { |
| if (auto e = OnEndOfScope(cur); e != ReturnCode::kOk) { |
| return e; |
| } |
| cur_ = cur; |
| return ReturnCode::kEndOfScope; |
| } |
| // Parse the field (key: value). |
| if (auto e = ParseObjectFieldUntilValue(cur); e != ReturnCode::kOk) { |
| return e; |
| } |
| // Parse the value itself. |
| if (auto e = ParseValue(cur, end_, value_, unescaped_str_value_, status_); |
| PERFETTO_UNLIKELY(e != ReturnCode::kOk)) { |
| return e; |
| } |
| // Handle comma or closing brace after the value. |
| if (auto e = OnPostValue(cur); e != ReturnCode::kOk) { |
| return e; |
| } |
| cur_ = cur; |
| return ReturnCode::kOk; |
| } |
| |
| // Parses the next element. If it's an object or array, it recurses by pushing |
| // onto the parse stack. Otherwise, it parses the primitive value. |
| // The parsed key (if in an object) or value is available. |
| // Returns kOk on success, kEndOfScope if '}' or ']' is found, or an error |
| // code. |
| ReturnCode ParseAndRecurse() { |
| PERFETTO_DCHECK(!parse_stack_.empty()); |
| const char* cur = cur_; |
| // Check for end of current scope (object or array). |
| if (PERFETTO_UNLIKELY(*cur == '}' || *cur == ']')) { |
| if (auto e = OnEndOfScope(cur); e != ReturnCode::kOk) { |
| return e; |
| } |
| cur_ = cur; |
| return ReturnCode::kEndOfScope; |
| } |
| // If current scope is an object, parse the key first. |
| if (PERFETTO_LIKELY(parse_stack_.back() == ParseType::kObject)) { |
| if (auto e = ParseObjectFieldUntilValue(cur); |
| PERFETTO_UNLIKELY(e != ReturnCode::kOk)) { |
| return e; |
| } |
| } else { |
| if (PERFETTO_UNLIKELY(!internal::SkipWhitespace(cur, end_))) { |
| return ReturnCode::kIncompleteInput; |
| } |
| } |
| |
| // If the value is a new object or array, push to stack. |
| if (*cur == '{') { |
| parse_stack_.push_back(ParseType::kObject); |
| // Value becomes an empty Object marker; its content isn't scanned yet |
| // here. |
| value_ = Object{std::string_view()}; |
| if (PERFETTO_UNLIKELY(!internal::SkipWhitespace(++cur, end_))) { |
| return ReturnCode::kIncompleteInput; |
| } |
| cur_ = cur; |
| return ReturnCode::kOk; |
| } |
| if (*cur == '[') { |
| parse_stack_.push_back(ParseType::kArray); |
| // Value becomes an empty Array marker. |
| value_ = Array{std::string_view()}; |
| if (PERFETTO_UNLIKELY(!internal::SkipWhitespace(++cur, end_))) { |
| return ReturnCode::kIncompleteInput; |
| } |
| cur_ = cur; |
| return ReturnCode::kOk; |
| } |
| // Otherwise, parse the primitive value. |
| if (auto e = ParseValue(cur, end_, value_, unescaped_str_value_, status_); |
| PERFETTO_UNLIKELY(e != ReturnCode::kOk)) { |
| return e; |
| } |
| // Handle comma or closing brace/bracket after the value. |
| if (auto e = OnPostValue(cur); e != ReturnCode::kOk) { |
| return e; |
| } |
| cur_ = cur; |
| return ReturnCode::kOk; |
| } |
| |
| // Captures the raw JSON bytes of the object or array scope just entered by |
| // ParseAndRecurse, including the surrounding `{}`/`[]`. Pops the parse stack, |
| // consumes any trailing `,`, advances `cur_` past the closing delimiter, and |
| // updates `value()` to be the fully-populated Object/Array variant. Must be |
| // called when `value()` is an empty Object{}/Array{} marker (i.e. we just |
| // entered a nested scope and haven't iterated into it). |
| ReturnCode CollectCurrentScope(std::string_view& out) { |
| PERFETTO_DCHECK(!parse_stack_.empty()); |
| bool is_obj = parse_stack_.back() == ParseType::kObject; |
| PERFETTO_DCHECK((is_obj && std::holds_alternative<Object>(value_)) || |
| (!is_obj && std::holds_alternative<Array>(value_))); |
| // ParseAndRecurse advanced `cur_` past the opener and any trailing |
| // whitespace. Step back over that whitespace to find the `{`/`[`. |
| const char open = is_obj ? '{' : '['; |
| const char* start = cur_ - 1; |
| while (*start == ' ' || *start == '\t' || *start == '\n' || |
| *start == '\r') { |
| --start; |
| } |
| PERFETTO_DCHECK(*start == open); |
| const char* cur = start; |
| if (auto e = internal::ScanToEndOfDelimitedBlock( |
| start, end_, is_obj ? '{' : '[', is_obj ? '}' : ']', cur, status_); |
| PERFETTO_UNLIKELY(e != internal::ReturnCode::kOk)) { |
| return static_cast<ReturnCode>(e); |
| } |
| out = std::string_view(start, static_cast<size_t>(cur - start)); |
| parse_stack_.pop_back(); |
| cur_ = cur; |
| if (!parse_stack_.empty()) { |
| if (auto e = OnPostValue(cur_); PERFETTO_UNLIKELY(e != ReturnCode::kOk)) { |
| return e; |
| } |
| } |
| if (is_obj) { |
| value_ = Object{out}; |
| } else { |
| value_ = Array{out}; |
| } |
| return ReturnCode::kOk; |
| } |
| |
| // Returns the key of the last parsed object field. |
| std::string_view key() const { return key_; } |
| // Returns the value of the last parsed field or array element. |
| const JsonValue& value() const { return value_; } |
| // Returns the current parsing position in the input buffer. |
| const char* cur() const { return cur_; } |
| // Returns the status of the last operation (Ok or an error). |
| const base::Status& status() const { return status_; } |
| |
| // Returns true if the entire JSON structure has been parsed (parse stack is |
| // empty). |
| bool eof() const { return parse_stack_.empty(); } |
| // Returns the current parse stack (e.g., for debugging or context). |
| const std::vector<ParseType>& parse_stack() const { return parse_stack_; } |
| |
| private: |
| // Parses an object field up to the value (i.e., "key": ). |
| // |cur| is advanced past the ':'. |
| ReturnCode ParseObjectFieldUntilValue(const char*& cur) { |
| // Skip whitespace before the key. |
| if (PERFETTO_UNLIKELY(!internal::SkipWhitespace(cur, end_))) { |
| return ReturnCode::kIncompleteInput; |
| } |
| // Expect a string key. |
| if (PERFETTO_UNLIKELY(*cur != '"')) { |
| status_ = |
| base::ErrStatus("Expected '\"' at the start of key. Got '%c'", *cur); |
| return ReturnCode::kError; |
| } |
| if (auto e = internal::ParseString(cur, end_, cur, key_, unescaped_key_, |
| status_); |
| PERFETTO_UNLIKELY(e != internal::ReturnCode::kOk)) { |
| return static_cast<ReturnCode>(e); |
| } |
| // Skip whitespace after the key. |
| if (PERFETTO_UNLIKELY(!internal::SkipWhitespace(cur, end_))) { |
| return ReturnCode::kIncompleteInput; |
| } |
| // Expect a colon separator. |
| if (PERFETTO_UNLIKELY(*cur != ':')) { |
| status_ = base::ErrStatus("Expected ':' after key '%.*s'. Got '%c'", |
| int(key_.size()), key_.data(), *cur); |
| return ReturnCode::kError; |
| } |
| // Skip whitespace after the colon. |
| if (PERFETTO_UNLIKELY(!internal::SkipWhitespace(++cur, end_))) { |
| return ReturnCode::kIncompleteInput; |
| } |
| return ReturnCode::kOk; |
| } |
| |
| // Handles characters after a parsed value (',' or closing '}' or ']'). |
| // |cur| is advanced past the delimiter and subsequent whitespace. |
| ReturnCode OnPostValue(const char*& cur) { |
| PERFETTO_DCHECK(!parse_stack_.empty()); |
| // Skip whitespace after the value. |
| if (PERFETTO_UNLIKELY(!internal::SkipWhitespace(cur, end_))) { |
| return ReturnCode::kIncompleteInput; |
| } |
| // Determine expected end character based on current scope. |
| char end_char = parse_stack_.back() == ParseType::kObject ? '}' : ']'; |
| // If comma, consume it and skip whitespace. |
| if (PERFETTO_LIKELY(*cur == ',')) { |
| ++cur; |
| if (PERFETTO_UNLIKELY(!internal::SkipWhitespace(cur, end_))) { |
| return ReturnCode::kIncompleteInput; |
| } |
| } else if (PERFETTO_UNLIKELY(*cur != end_char)) { |
| // If not a comma, it must be the end character for the current scope. |
| status_ = base::ErrStatus("Expected ',' or '%c' after value. Got '%c'", |
| end_char, *cur); |
| // If we are in an object, the key_ context is relevant. |
| if (parse_stack_.back() == ParseType::kObject && !key_.empty()) { |
| status_ = base::ErrStatus( |
| "Expected ',' or '%c' after value for key '%.*s'. Got '%c'", |
| end_char, int(key_.size()), key_.data(), *cur); |
| } |
| return ReturnCode::kError; |
| } |
| // If it was end_char, it will be handled by ParseAndRecurse or |
| // ParseObjectFieldWithoutRecursing in the next iteration, or by |
| // OnEndOfScope. |
| return ReturnCode::kOk; |
| } |
| |
| // Handles the end of a scope ('}' or ']'), pops from parse stack. |
| // |cur| is advanced past the closing delimiter. |
| ReturnCode OnEndOfScope(const char*& cur) { |
| if (PERFETTO_UNLIKELY(parse_stack_.empty())) { |
| status_ = base::ErrStatus("Parse stack is empty on end of scope"); |
| return ReturnCode::kError; |
| } |
| ++cur; // Consume '}' or ']'. |
| parse_stack_.pop_back(); |
| // If not at the end of the entire JSON (i.e., stack is not empty), |
| // then this scope was nested. We need to handle post-value for the parent. |
| if (!parse_stack_.empty()) { |
| if (auto e = OnPostValue(cur); e != ReturnCode::kOk) { |
| return e; |
| } |
| } |
| return ReturnCode::kOk; |
| } |
| |
| // Pointer to the current parsing position in the input buffer. |
| const char* cur_; |
| // Pointer to the end of the input buffer. |
| const char* end_; |
| // Holds the most recently parsed object key. |
| std::string_view key_; |
| // Buffer for unescaped key string, if key_ contains escapes. |
| std::string unescaped_key_; |
| // Buffer for unescaped value string, if value_ (as string_view) contains |
| // escapes. |
| std::string unescaped_str_value_; |
| // Holds the most recently parsed JSON value. |
| JsonValue value_; |
| // Stores the success/failure status of parsing operations. |
| base::Status status_; |
| // Stack to keep track of nested JSON structures (objects/arrays). |
| std::vector<ParseType> parse_stack_; |
| }; |
| |
| } // namespace perfetto::trace_processor::json |
| |
| #endif // SRC_TRACE_PROCESSOR_UTIL_JSON_PARSER_H_ |