src/google/protobuf/parse_context.h - third_party/protobuf - Git at Google

 // Protocol Buffers - Google's data interchange format
 // Copyright 2008 Google Inc.  All rights reserved.
 // https://developers.google.com/protocol-buffers/
 //
 // Redistribution and use in source and binary forms, with or without
 // modification, are permitted provided that the following conditions are
 // met:
 //
 //     * Redistributions of source code must retain the above copyright
 // notice, this list of conditions and the following disclaimer.
 //     * Redistributions in binary form must reproduce the above
 // copyright notice, this list of conditions and the following disclaimer
 // in the documentation and/or other materials provided with the
 // distribution.
 //     * Neither the name of Google Inc. nor the names of its
 // contributors may be used to endorse or promote products derived from
 // this software without specific prior written permission.
 //
 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

 #ifndef GOOGLE_PROTOBUF_PARSE_CONTEXT_H__
 #define GOOGLE_PROTOBUF_PARSE_CONTEXT_H__

 #include <string>

 #include <google/protobuf/port.h>

 #if GOOGLE_PROTOBUF_ENABLE_EXPERIMENTAL_PARSER
 #include <google/protobuf/io/coded_stream.h>
 #include <google/protobuf/wire_format_lite.h>
 #include <google/protobuf/stubs/common.h>
 #include "third_party/absl/base/optimization.h"
 #include <google/protobuf/stubs/strutil.h>
 #include "util/coding/varint.h"

 #include <google/protobuf/port_def.inc>

 namespace google {
 namespace protobuf {

 class UnknownFieldSet;
 class DescriptorPool;
 class MessageFactory;

 namespace internal {

 // Template code below needs to know about the existence of these functions.
 void WriteVarint(uint32 num, uint64 val, std::string* s);
 void WriteLengthDelimited(uint32 num, StringPiece val, std::string* s);
 // Inline because it is just forwarding to s->WriteVarint
 inline void WriteVarint(uint32 num, uint64 val, UnknownFieldSet* s);
 inline void WriteLengthDelimited(uint32 num, StringPiece val,
                                  UnknownFieldSet* s);


 // ParseContext contains state that needs to be preserved across buffer seams.

 class ParseContext;

 // The parser works by composing elementary parse functions, that are generated
 // by the compiler, together to perform the full parse. To accomplish this the
 // functionality of the elementary parse function is slightly increased which
 // allows it to become composable.

 // The basic abstraction ParseContext is designed for is a slight modification
 // of the ZeroCopyInputStream (ZCIS) abstraction. A ZCIS presents a serialized
 // stream as a series of buffers that concatenate to the full stream.
 // Pictorially a ZCIS presents a stream in chunks like so
 // [---------------------------------------------------------------]
 // [---------------------] chunk 1
 //                      [----------------------------] chunk 2
 //                                          chunk 3 [--------------]
 //
 // Where the '-' represent the bytes which are vertically lined up with the
 // bytes of the stream.
 // ParseContext requires its input to be presented similarily with the extra
 // property that the last kSlopBytes of a chunk overlaps with the first
 // kSlopBytes of the next chunk, or if there is no next chunk at least its still
 // valid to read those bytes. Again, pictorially, we now have
 //
 // [---------------------------------------------------------------]
 // [-------------------....] chunk 1
 //                    [------------------------....] chunk 2
 //                                    chunk 3 [------------------..**]
 //                                                      chunk 4 [--****]
 // Here '-' mean the bytes of the stream or chunk and '.' means bytes past the
 // chunk that match up with the start of the next chunk. Above each chunk has
 // 4 '.' after the chunk. In the case these 'overflow' bytes represents bytes
 // past the stream, indicated by '*' above, their values are unspecified. It is
 // still legal to read them (ie. should not segfault). Reading past the
 // end should be detected by the user and indicated as an error.
 //
 // The reason for this, admittedly, unconventional invariant is to ruthlessly
 // optimize the protobuf parser. Having an overlap helps in two important ways.
 // Firstly it alleviates having to performing bounds checks, if a piece of code
 // will never read more than kSlopBytes. Secondly, and more importantly, the
 // protobuf wireformat is such that there is always a fresh start of a tag
 // within kSlopBytes. This allows the parser to exit parsing a chunk leaving
 // the parse on a position inside the overlap where a fresh tag starts.

 // The elementary parse function has the following signature

 typedef const char* (*ParseFunc)(const char* ptr, const char* end, void* object,
                                  ParseContext* ctx);

 // which parses the serialized data stored in the range [ptr, end) into object.
 // A parse function together with its object forms a callable closure.
 struct ParseClosure {
   ParseFunc func;
   void* object;

   // Pre-conditions
   //   ptr < end is a non-empty range where ptr points to the start of a tag
   //     and it's okay to read the bytes in [end, end + kSlopBytes).
   //     Which will contain the bytes of the next chunk if the stream continues,
   //     or undefined in which case the parse will be guaranteed to fail.
   //
   // Post-conditions
   //   Parsed all tag/value pairs starting before end or if a group end
   //   tag is encountered returns the pointer to that tag.
   //   If a group end is encountered it verifies it matches the one that was
   //   pushed and the stack is popped.
   //   Otherwise it will parses the entire range pushing if end is inside one
   //   of the children those are pushed on the stack.
   //
   //   If an element is popped from the stack it ended on the correct end group
   //   returns pointer after end-group tag (posibly in overlap, but the start
   //   of end-group tag will be before end).
   //   If the stack is the same or deeper, returns pointer in overlap region
   //   (end <= retval < end + kSlopBytes).
   //   All tag/value pairs between in [begin, retval) are parsed and retval
   //   points to start of a tag.
   const char* operator()(const char* ptr, const char* end, ParseContext* ctx) {
     ABSL_ASSERT(ptr < end);
     return func(ptr, end, object, ctx);
   }
 };

 // To fully parse a stream, a driver loop repeatedly calls the parse function
 // at the top of the stack, popping and resume parsing the parent message
 // according to the recursive structure of the wireformat. This loop will also
 // need to provide new buffer chunks and align the ptr correctly over the seams.
 // The point of this framework is that chunk refresh logic is located in the
 // outer loop, while the inner loop is almost free of it. The two code paths in
 // the parse code dealing with seams are located in fallback paths whose checks
 // are folded with input limit checks that are necessary anyway. In other words,
 // all the parser code that deals with seams is located in what would otherwise
 // be error paths of a parser that wouldn't need to deal with seams.

 class ParseContext {
  public:
   enum {
     // Tag is atmost 5 bytes, varint is atmost 10 resulting in 15 bytes. We
     // choose
     // 16 bytes for the obvious reason of alignment.
     kSlopBytes = 16,
     // Inlined stack size
     kInlinedDepth = 15,
   };

   // Arghh!!! here be tech-debt dragons
   struct ExtraParseData {
     const DescriptorPool* pool = nullptr;
     MessageFactory* factory = nullptr;

     // payload is used for MessageSetItem and maps
     std::string payload;
     bool (*parse_map)(const char* begin, const char* end, void* map_field,
                       ParseContext* ctx);

     void SetEnumValidator(bool (*validator)(int), void* unknown,
                           int field_num) {
       enum_validator = validator;
       unknown_fields = unknown;
       field_number = field_num;
     }
     void SetEnumValidatorArg(bool (*validator)(const void*, int),
                              const void* arg, void* unknown, int field_num) {
       arg_enum_validator = {validator, arg};
       unknown_fields = unknown;
       field_number = field_num;
     }
     template <typename Unknown>
     bool ValidateEnum(int val) const {
       if (enum_validator(val)) return true;
       WriteVarint(field_number, val, static_cast<Unknown*>(unknown_fields));
       return false;
     }
     template <typename Unknown>
     bool ValidateEnumArg(int val) const {
       if (arg_enum_validator(val)) return true;
       WriteVarint(field_number, val, static_cast<Unknown*>(unknown_fields));
       return false;
     }

     void SetFieldName(const void* name) {
       unknown_fields = const_cast<void*>(name);
     }
     const char* FieldName() const {
       return static_cast<const char*>(unknown_fields);
     }

     union {
       bool (*enum_validator)(int);
       struct {
         bool operator()(int val) const { return validator(arg, val); }
         bool (*validator)(const void*, int);
         const void* arg;
       } arg_enum_validator;
     };
     void* unknown_fields;
     int field_number;
     // 0 means no aliasing. If not zero aliasing is the delta between the
     // ptr and the buffer that needs to be aliased. If the value is
     // kNoDelta (1) this means delta is actually 0 (we're working directly in
     // the buffer).
     enum { kNoDelta = 1 };
     std::uintptr_t aliasing = 0;
   };

   ExtraParseData& extra_parse_data() { return extra_parse_data_; }
   const ExtraParseData& extra_parse_data() const { return extra_parse_data_; }

   // Helpers to detect if a parse of length delimited field is completed.
   bool AtLimit() const { return limit_ == 0; }
   int32 CurrentLimit() const { return limit_; }

   // Initializes ParseContext with a specific recursion limit (rec_limit)
   explicit ParseContext(int rec_limit)
       : depth_(rec_limit),
         start_depth_(rec_limit),
         stack_(inline_stack_ + kInlinedDepth - rec_limit),
         inlined_depth_(std::max(0, rec_limit - kInlinedDepth)) {}

   ~ParseContext() {
     if (inlined_depth_ == -1) delete stack_;
   }

   void StartParse(ParseClosure parser) { parser_ = parser; }

   // Parses a chunk of memory given the current state of parse context (ie.
   // the active parser and stack) and overrun.
   // Pre-condition:
   //   chunk_ is not empty.
   //   limit_ > 0 (limit from begin) or -1 (no limit)
   // Post-condition:
   //   returns true on success with overrun_ptr adjusted to the new value, or
   //   false is the parse is finished. False means either a parse failure or
   //   or because the top-level was terminated on a 0 or end-group tag in which
   //   case overrun points to the position after the ending tag. You can call
   //   EndedOnTag() to find if the parse failed due to an error or ended on
   //   terminating tag.
   bool ParseRange(StringPiece chunk, int* overrun_ptr) {
     ABSL_ASSERT(!chunk.empty());
     int& overrun = *overrun_ptr;
     if (overrun >= chunk.size()) {
       // This case can easily happen in patch buffers and we like to inline
       // this case.
       overrun -= chunk.size();
       return true;
     }
     auto res = ParseRangeWithLimit(chunk.begin() + overrun, chunk.end());
     overrun = res.second;
     return res.first;
   }

   bool ValidEnd(int overrun) { return depth_ == start_depth_ && overrun == 0; }
   bool EndedOnTag() const { return last_tag_minus_1_ != 0; }
   uint32 LastTag() const { return last_tag_minus_1_ + 1; }

   // Generically verifies for the slop region [begin, begin + kSlopBytes) if
   // the parse will be terminated by 0 or end-group tag. If true than you can
   // safely parse the slop region without having to load more data.
   bool ParseEndsInSlopRegion(const char* begin, int overrun) const;

   // Should only be called by Parse code.

   //////////////////////////////////////////////////////////////////////////////
   // Fast path helpers. These helpers maintain the state in parse context
   // through recursive calls. The whole design is to make this as minimal as
   // possible. Only recursion depth and limit are maintained at every recursion.
   //////////////////////////////////////////////////////////////////////////////

   bool ParseExactRange(ParseClosure parser, const char* begin,
                        const char* end) {
     if (PROTOBUF_PREDICT_FALSE(--depth_ < 0)) return false;
     auto old_limit = limit_;
     limit_ = 0;
     auto ptr = begin;
     if (ptr < end) ptr = parser(ptr, end, this);
     if (ptr != end || EndedOnTag()) return false;
     limit_ = old_limit;
     ++depth_;
     return true;
   }

   // Returns a pair of the pointer the parse is left and a boolean indicating
   // if the group is still continuing.
   std::pair<const char*, bool> ParseGroup(uint32 tag, ParseClosure parser,
                                           const char* begin, const char* end,
                                           int* depth) {
     if (PROTOBUF_PREDICT_FALSE(--depth_ < 0)) return {};
     *depth = depth_;
     auto ptr = begin;
     if (ptr < end) ptr = parser(ptr, end, this);
     if (ptr == nullptr) return {};
     if (!EndedOnTag()) {
       // The group hasn't been terminated by an end-group and thus continues,
       // hence it must have ended because it crossed "end".
       ABSL_ASSERT(ptr >= end);
       return {ptr, true};
     }
     // Verify that the terminating tag matches the start group tag. As an extra
     // subtlety it could have been terminated by an end-group tag but in a
     // length delimited sub field of the group. So we must also check that depth
     // matches, if it doesn't match it means a length delimited subfield got
     // terminated by an end group which is an error.
     if (tag != last_tag_minus_1_ || *depth != depth_) return {};
     last_tag_minus_1_ = 0;  // It must always be cleared.
     ++depth_;
     return {ptr, false};
   }

   void EndGroup(uint32 tag) {
     ABSL_ASSERT(tag == 0 || (tag & 7) == 4);
     // Because of the above assert last_tag_minus_1 is never set to 0, and the
     // caller can verify the child parser was terminated, by comparing to 0.
     last_tag_minus_1_ = tag - 1;
   }

   //////////////////////////////////////////////////////////////////////////////
   // Slow path helper functions when a child crosses the "end" of range.
   // This is either an error (if limit_ = 0) OR we need to store state.
   // These functions manage the task of updating the state correctly.
   //////////////////////////////////////////////////////////////////////////////

   // Helper function called by generated code in case of a length delimited
   // field that is going to cross the boundary.
   const char* StoreAndTailCall(const char* ptr, const char* end,
                                ParseClosure current_parser,
                                ParseClosure child_parser, int32 size) {
     // if size was bigger than 2GB we should fail
     if (size < 0) return nullptr;
     // At this point ptr could be past end. Hence a malicious size could
     // overflow.
     int64 safe_new_limit = size - static_cast<int64>(end - ptr);
     if (safe_new_limit > INT_MAX) return nullptr;
     ABSL_ASSERT(safe_new_limit > 0);  // only call this if it's crossing end
     int32 new_limit = static_cast<int32>(safe_new_limit);
     int32 delta;
     if (limit_ != -1) {
       if (PROTOBUF_PREDICT_FALSE(new_limit > limit_)) return nullptr;
       delta = limit_ - new_limit;
     } else {
       delta = -1;  // special value
     }
     limit_ = new_limit;
     // Save the current closure on the stack.
     if (!Push(current_parser, delta)) return nullptr;
     // Ensure the active state is set correctly.
     parser_ = child_parser;
     return ptr < end ? child_parser(ptr, end, this) : ptr;
   }

   // Helper function for a child group that has crossed the boundary.
   bool StoreGroup(ParseClosure current_parser, ParseClosure child_parser,
                   int depth, uint32 tag) {
     // The group must still read an end-group tag, so it can't be at a limit.
     // By having this check we ensure that when limit_ = 0 we can't end in some
     // deeper recursion. Hence ParseExactRange does not need to check for
     // matching depth.
     if (limit_ == 0) return false;
     if (depth == depth_) {
       // This child group is the active parser. The fast path code assumes
       // everything will be parsed within a chunk and doesn't modify
       // parse context in this case. We need to make the child parser active.
       parser_ = child_parser;
     }
     if (ABSL_PREDICT_FALSE(depth < inlined_depth_)) SwitchStack();
     stack_[depth] = {current_parser, static_cast<int32>(~(tag >> 3))};
     return true;
   }

  private:
   // This the "active" or current parser.
   ParseClosure parser_;
   // The context keeps an internal stack to keep track of the recursive
   // part of the parse state.
   // Current depth of the active parser, depth counts down.
   // This is used to limit recursion depth (to prevent overflow on malicious
   // data), but is also used to index in stack_ to store the current state.
   int depth_;
   int32 limit_ = -1;

   // A state is on the stack to save it, in order to continue parsing after
   // child is done.
   struct State {
     ParseClosure parser;
     // This element describes how to adjust the parse state after finishing
     // the child. If the child was a length delimited field, delta describes
     // the limit relative to the child's limit (hence >= 0).
     // If child was a sub group limit contains ~field num (hence < 0) in order
     // to verify the group ended on a correct end tag. No limit adjusting.
     // Note above the sign of delta is meaningful
     int32 delta_or_group_num;
   };
   int start_depth_;
   // This is used to return the end group (or 0 tag) that terminated the parse.
   // Actually it contains last_tag minus 1. Which is either the start group tag
   // or -1. This member should always be zero and the caller should immediately
   // check this member to verify what state the parser ended on and clear its
   // value.
   uint32 last_tag_minus_1_ = 0;

   ExtraParseData extra_parse_data_;
   State* stack_;
   State inline_stack_[kInlinedDepth];
   int inlined_depth_;

   bool Push(ParseClosure parser, int32 delta) {
     ABSL_ASSERT(delta >= -1);  // Make sure it's a valid len-delim
     if (PROTOBUF_PREDICT_FALSE(--depth_ < 0)) return false;
     if (ABSL_PREDICT_FALSE(depth_ < inlined_depth_)) SwitchStack();
     stack_[depth_] = {parser, delta};
     return true;
   }

   State Pop() { return stack_[depth_++]; }

   void SwitchStack();

   // Parses a chunk of memory given the current state of parse context (ie.
   // the active parser and stack).
   // Pre-condition:
   //   begin < end (non-empty range)
   //   limit_ > 0 (limit from begin) or -1 (no limit)
   // Post-condition:
   //   returns either (true, overrun) for a successful parse that can continue,
   //   or (false, overrun) for a parse that can't continue. Either due to a
   //   corrupt data (parse failure) or because the top-level was terminated on a
   //   0 or end-group tag in which case overrun points to the position after the
   //   end.
   std::pair<bool, int> ParseRangeWithLimit(const char* begin, const char* end);
 };

 // This is wrapper to parse a sequence of buffers without the overlap property,
 // like the sequence given by ZeroCopyInputStream (ZCIS) or ByteSource. This is
 // done by copying data around the seams, hence the name EpsCopyParser.
 // Pictorially if ZCIS presents a stream in chunks like so
 // [---------------------------------------------------------------]
 // [---------------------] chunk 1
 //                      [----------------------------] chunk 2
 //                                          chunk 3 [--------------]
 // where '-' depicts bytes of the stream or chunks vertically alligned with the
 // corresponding bytes between stream and chunk.
 //
 // This class will present chunks to the ParseContext like this
 // [-----------------....] chunk 1
 //                  [----....] patch
 //                      [------------------------....] chunk 2
 //                                              [----....] patch
 //                                          chunk 3 [----------....]
 //                                                      patch [----****]
 // by using a fixed size buffer to patch over the seams. This requires
 // copying of an "epsilon" neighboorhood around the seams. In the picture above
 // dots mean bytes beyond the end of the new chunks. Each chunk is kSlopBytes
 // smalller as its original chunk (above depicted as 4 dots) and the number of
 // of chunks is doubled because each seam in the original stream introduces a
 // new patch.
 //
 // The algorithm is simple but not entirely trivial. Two complications arise
 // 1) The original chunk could be less than kSlopBytes. Hence we can't simply
 // chop the last kSlopBytes of a chunk.
 // 2) In some (infrequent) use cases, we don't necessarily parse unitl the end
 // of a stream, but instead the parse is terminated by 0 or end-group tag. If
 // this is allowed we must take care to leave the underlying stream at a
 // position precisely after the terminating tag. If this happens in the slop
 // region of a buffer we will already have loaded the next buffer. Not all
 // streams allow backing up to a previous buffer blocking us from leaving the
 // stream in the proper state. If terminating on 0 is allowed (in the old parser
 // this means a call to MergePartialFromCodedStream without a subsequent call to
 // ConsumedEntireMessage), this algorithm needs to ensure the parse won't end
 // in the slop region before moving the next buffer.
 //
 // The core idea of EpsCopyParser is to parse ranges except the last kSlopBytes
 // and store those in the patch buffer, until the next parse provides additional
 // data to fill the slop region. So parsing a range means first parsing the slop
 // bytes of the previous range using the new range to provide slop bytes for the
 // patch, followed by parsing the actual range except the last kSlopBytes and
 // store those. If no more data is available a call to Done finishes the parse
 // by parsing the remaining slopbytes.
 //
 // In order to deal with problem 1, we need to deal with the case that a new
 // chunk can be less or equal than kSlopBytes big. We can just copy the chunk
 // to the end and return (buffer, chunk->size). Pictorially
 // [--------] chunk 1
 //         [--] chunk 2
 //           [---] chunk 3
 // will become
 // [----....] chunk 1
 //     [--....] patch (not full range of the patch buffer, only two hyphens)
 //         [--] chunk 2 (too small so never parsed directly)
 //       [---....] patch (not full range of the buffer, only three hyphens)
 //           [---] chunk 3 (too small so never parsed directly)
 //          [----****] patch (full range, last bytes are garbage)
 // Because of this the source (the dots in above) can overlap with the
 // destination buffer and so we have to use memmove.
 //
 // To solve problem 2, we use a generic parser together with knowledge of the
 // nesting from the side stack to verify if the parse will be terminated in the
 // slop region. If it terminates inside the slop region, we just parse it as
 // well. See ParseEndsInSlopRegion in ParseContext for the implementation. This
 // is only done if ensure_non_negative_skip is true, if it's false Skip() could
 // return a negative number.
 template <bool ensure_non_negative_skip>
 class EpsCopyParser {
  public:
   EpsCopyParser(ParseClosure parser, ParseContext* ctx) : ctx_(ctx) {
     ctx_->StartParse(parser);
   }

   // Parse the bytes as provided by the non-empty range.
   // Returns true on a successful parse ready to accept more data, if there is
   // no more data call Done() to finish the parse.
   // Returns false if the parse is terminated. Termination is either due to a
   // parse error or due to termination on an end-group or 0 tag. You can call
   // EndedOnTag() on the underlying ParseContext to find out if the parse ended
   // correctly on a terminating tag.
   bool Parse(StringPiece range) {
     ABSL_ASSERT(!range.empty());
     auto size = range.size();
     if (size > kSlopBytes) {
       // The buffer is large enough to be able to parse the (size - kSlopBytes)
       // prefix directly. However we still need to parse the data in buffer_,
       // that holds the slop region of the previous buffer.
       if (overrun_ == kSlopBytes) {
         // We overrun the whole slop region of the previous buffer.
         // Optimization, we can skip the patch buffer.
         overrun_ = 0;
       } else {
         std::memcpy(buffer_ + kSlopBytes, range.begin(), kSlopBytes);
         if (!ParseRange({buffer_, kSlopBytes}, 0)) return false;
       }
       range.remove_suffix(kSlopBytes);
     } else {
       std::memcpy(buffer_ + kSlopBytes, range.begin(), size);
       range = {buffer_, size};
     }
     if (!ParseRange(range, size - kSlopBytes)) return false;
     std::memmove(buffer_, range.end(), kSlopBytes);
     if (ensure_non_negative_skip &&
         ctx_->ParseEndsInSlopRegion(buffer_, overrun_)) {
       // We care about leaving the stream at the right place and the stream will
       // indeed terminate, so just parse it.
       auto res = ParseRange({buffer_, kSlopBytes}, size);
       ABSL_ASSERT(!res);
       return false;
     }
     return true;
   }

   // Finish the parse by parsing the remaining data and verify success.
   bool Done() {
     return ParseRange({buffer_, kSlopBytes}, 0) && ctx_->ValidEnd(overrun_);
   }

   // If the parse was terminated by a end-group or 0 tag. Skip returns the
   // offset where the parse left off relative to the start of the last range
   // parsed.
   // NOTE: This could be negative unless ensure_non_negative_skip is true.
   int Skip() {
     // The reason of ensure_non_negative_skip and ParseEndsInSlopRegion is that
     // the following assert holds. Which implies the stream doesn't need to
     // backup.
     ABSL_ASSERT(!ensure_non_negative_skip || overrun_ >= 0);
     return overrun_;
   }

  private:
   constexpr static int kSlopBytes = ParseContext::kSlopBytes;
   // overrun_ stores where in the slop region of the previous parse the parse
   // was left off. This is used to start the parse of the next region at the
   // correct point. Initially overrun_ should be set to kSlopBytes which means
   // that the parse starts at precisely the beginning of new buffer provided.
   int overrun_ = kSlopBytes;
   // The first kSlopBytes of buffer_ contains the slop region of the previous
   // parsed region.
   char buffer_[2 * kSlopBytes] = {};
   ParseContext* ctx_;

   bool ParseRange(StringPiece range, int delta) {
     auto res = ctx_->ParseRange(range, &overrun_);
     if (!res) overrun_ += delta;
     return res;
   }
 };

 // Add any of the following lines to debug which parse function is failing.

 #define GOOGLE_PROTOBUF_ASSERT_RETURN(predicate, ret) \
   if (!(predicate)) {                                  \
     /*  raise(SIGINT); */                              \
     /*  GOOGLE_LOG(ERROR) << "Parse failure"; */              \
     return ret;                                        \
   }

 #define GOOGLE_PROTOBUF_PARSER_ASSERT(predicate) \
     GOOGLE_PROTOBUF_ASSERT_RETURN(predicate, nullptr)

 template <typename T>
 std::pair<const char*, bool> FieldParser(uint64 tag, ParseClosure parent,
                                          T field_parser, const char* begin,
                                          const char* end, ParseContext* ctx) {
   auto ptr = begin;
   uint32 number = tag >> 3;
   if (ABSL_PREDICT_FALSE(number == 0)) {
     GOOGLE_PROTOBUF_ASSERT_RETURN(tag == 0, {});
     // Special case scenario of 0 termination.
     ctx->EndGroup(tag);
     return {ptr, true};
   }
   using WireType = internal::WireFormatLite::WireType;
   switch (tag & 7) {
     case WireType::WIRETYPE_VARINT: {
       uint64 value;
       ptr = Varint::Parse64(ptr, &value);
       GOOGLE_PROTOBUF_ASSERT_RETURN(ptr != nullptr, {});
       field_parser.AddVarint(number, value);
       break;
     }
     case WireType::WIRETYPE_FIXED64: {
       uint64 value = io::UnalignedLoad<uint64>(ptr);
       ptr += 8;
       field_parser.AddFixed64(number, value);
       break;
     }
     case WireType::WIRETYPE_LENGTH_DELIMITED: {
       uint32 size;
       ptr = Varint::Parse32(ptr, &size);
       GOOGLE_PROTOBUF_ASSERT_RETURN(ptr != nullptr, {});
       ParseClosure child = field_parser.AddLengthDelimited(number, size);
       if (size > end - ptr) {
         return {ctx->StoreAndTailCall(ptr, end, parent, child, size), true};
       }
       auto newend = ptr + size;
       GOOGLE_PROTOBUF_ASSERT_RETURN(ctx->ParseExactRange(child, ptr, newend),
                                      {});
       ptr = newend;
       break;
     }
     case WireType::WIRETYPE_START_GROUP: {
       int depth;
       ParseClosure child = field_parser.StartGroup(number);
       auto res = ctx->ParseGroup(tag, child, ptr, end, &depth);
       ptr = res.first;
       GOOGLE_PROTOBUF_ASSERT_RETURN(ptr != nullptr, {});
       if (res.second) {
         GOOGLE_PROTOBUF_ASSERT_RETURN(
             ctx->StoreGroup(parent, child, depth, tag), {});
         return {ptr, true};
       }
       break;
     }
     case WireType::WIRETYPE_END_GROUP: {
       field_parser.EndGroup(number);
       ctx->EndGroup(tag);
       return {ptr, true};
     }
     case WireType::WIRETYPE_FIXED32: {
       uint32 value = io::UnalignedLoad<uint32>(ptr);
       ptr += 4;
       field_parser.AddFixed32(number, value);
       break;
     }
     default:
       GOOGLE_PROTOBUF_ASSERT_RETURN(false, {});
   }
   ABSL_ASSERT(ptr != nullptr);
   return {ptr, false};
 }

 template <typename T>
 const char* WireFormatParser(ParseClosure parent, T field_parser,
                              const char* begin, const char* end,
                              ParseContext* ctx) {
   auto ptr = begin;
   while (ptr < end) {
     uint32 tag;
     ptr = Varint::Parse32(ptr, &tag);
     GOOGLE_PROTOBUF_PARSER_ASSERT(ptr != nullptr);
     auto res = FieldParser(tag, parent, field_parser, ptr, end, ctx);
     ptr = res.first;
     GOOGLE_PROTOBUF_PARSER_ASSERT(ptr != nullptr);
     if (res.second) return ptr;
   }
   return ptr;
 }

 // Here are the elementary parsers for length delimited subfields that contain
 // plain data (ie not a protobuf). These are trivial as they don't recurse,
 // except for the UnknownGroupLiteParse that parses a group into a string.
 // Some functions need extra arguments that the function signature allows,
 // these are passed through variables in ParseContext::ExtraParseData that the
 // caller needs to set prior to the call.

 // The null parser does not do anything, but is useful as a substitute.
 const char* NullParser(const char* begin, const char* end, void* object,
                        ParseContext*);

 // Helper for verification of utf8
 bool VerifyUTF8(StringPiece s, ParseContext* ctx);
 // All the string parsers with or without UTF checking and for all CTypes.
 const char* StringParser(const char* begin, const char* end, void* object,
                          ParseContext*);
 const char* CordParser(const char* begin, const char* end, void* object,
                        ParseContext*);
 const char* StringPieceParser(const char* begin, const char* end, void* object,
                               ParseContext*);
 const char* StringParserUTF8(const char* begin, const char* end, void* object,
                              ParseContext*);
 const char* CordParserUTF8(const char* begin, const char* end, void* object,
                            ParseContext*);
 const char* StringPieceParserUTF8(const char* begin, const char* end,
                                   void* object, ParseContext*);
 const char* StringParserUTF8Verify(const char* begin, const char* end,
                                    void* object, ParseContext*);
 const char* CordParserUTF8Verify(const char* begin, const char* end,
                                  void* object, ParseContext*);
 const char* StringPieceParserUTF8Verify(const char* begin, const char* end,
                                         void* object, ParseContext*);
 // Parsers that also eat the slopbytes if possible. Can only be called in a
 // ParseContext where limit_ is set properly.
 const char* GreedyStringParser(const char* begin, const char* end, void* object,
                          ParseContext*);
 const char* GreedyStringParserUTF8(const char* begin, const char* end, void* object,
                              ParseContext*);
 const char* GreedyStringParserUTF8Verify(const char* begin, const char* end,
                                    void* object, ParseContext*);

 // This is the only recursive parser.
 const char* UnknownGroupLiteParse(const char* begin, const char* end,
                                   void* object, ParseContext* ctx);
 // This is a helper to for the UnknownGroupLiteParse but is actually also
 // useful in the generated code. It uses overload on string* vs
 // UnknownFieldSet* to make the generated code isomorphic between full and lite.
 std::pair<const char*, bool> UnknownFieldParse(uint32 tag, ParseClosure parent,
                                                const char* begin,
                                                const char* end, std::string* unknown,
                                                ParseContext* ctx);

 // The packed parsers parse repeated numeric primitives directly into  the
 // corresponding field

 // These are packed varints
 const char* PackedInt32Parser(const char* begin, const char* end, void* object,
                               ParseContext* ctx);
 const char* PackedUInt32Parser(const char* begin, const char* end, void* object,
                                ParseContext* ctx);
 const char* PackedInt64Parser(const char* begin, const char* end, void* object,
                               ParseContext* ctx);
 const char* PackedUInt64Parser(const char* begin, const char* end, void* object,
                                ParseContext* ctx);
 const char* PackedSInt32Parser(const char* begin, const char* end, void* object,
                                ParseContext* ctx);
 const char* PackedSInt64Parser(const char* begin, const char* end, void* object,
                                ParseContext* ctx);
 const char* PackedBoolParser(const char* begin, const char* end, void* object,
                              ParseContext* ctx);

 // Enums in proto3 do not require verification
 const char* PackedEnumParser(const char* begin, const char* end, void* object,
                              ParseContext* ctx);
 // Enums in proto2 require verification. So an additional verification function
 // needs to be passed into ExtraParseData.
 // If it's a generated verification function we only need the function pointer.
 const char* PackedValidEnumParserLite(const char* begin, const char* end,
                                       void* object, ParseContext* ctx);
 // If it's reflective we need a function that takes an additional argument.
 const char* PackedValidEnumParserLiteArg(const char* begin, const char* end,
                                          void* object, ParseContext* ctx);

 // These are the packed fixed field parsers.
 const char* PackedFixed32Parser(const char* begin, const char* end,
                                 void* object, ParseContext* ctx);
 const char* PackedSFixed32Parser(const char* begin, const char* end,
                                  void* object, ParseContext* ctx);
 const char* PackedFixed64Parser(const char* begin, const char* end,
                                 void* object, ParseContext* ctx);
 const char* PackedSFixed64Parser(const char* begin, const char* end,
                                  void* object, ParseContext* ctx);
 const char* PackedFloatParser(const char* begin, const char* end, void* object,
                               ParseContext* ctx);
 const char* PackedDoubleParser(const char* begin, const char* end, void* object,
                                ParseContext* ctx);

 // Maps key/value's are stored in a MapEntry length delimited field. If this
 // crosses a seam we fallback to first store in payload. The object points
 // to a MapField in which we parse the payload upon done (we detect this when
 // this function is called with limit_ == 0), by calling parse_map (also stored
 // in ctx) on the resulting string.
 const char* SlowMapEntryParser(const char* begin, const char* end, void* object,
                                internal::ParseContext* ctx);

 }  // namespace internal
 }  // namespace protobuf
 }  // namespace google

 #include <google/protobuf/port_undef.inc>

 #endif  // GOOGLE_PROTOBUF_ENABLE_EXPERIMENTAL_PARSER
 #endif  // GOOGLE_PROTOBUF_PARSE_CONTEXT_H__