src/google/protobuf/wire_format_lite.cc - third_party/protobuf - Git at Google

 // Protocol Buffers - Google's data interchange format
 // Copyright 2008 Google Inc.  All rights reserved.
 //
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file or at
 // https://developers.google.com/open-source/licenses/bsd

 // Author: kenton@google.com (Kenton Varda)
 //  Based on original Protocol Buffers design by
 //  Sanjay Ghemawat, Jeff Dean, and others.

 #include "google/protobuf/wire_format_lite.h"

 #include <limits>
 #include <stack>
 #include <string>
 #include <vector>

 #include "absl/log/absl_check.h"
 #include "absl/log/absl_log.h"
 #include "absl/strings/cord.h"
 #include "absl/strings/str_cat.h"
 #include "absl/strings/str_format.h"
 #include "utf8_validity.h"


 // Must be included last.
 #include "google/protobuf/port_def.inc"

 namespace google {
 namespace protobuf {
 namespace internal {

 #if !defined(_MSC_VER) || (_MSC_VER >= 1900 && _MSC_VER < 1912)
 // Old version of MSVC doesn't like definitions of inline constants, GCC
 // requires them.
 const int WireFormatLite::kMessageSetItemStartTag;
 const int WireFormatLite::kMessageSetItemEndTag;
 const int WireFormatLite::kMessageSetTypeIdTag;
 const int WireFormatLite::kMessageSetMessageTag;

 #endif

 constexpr size_t WireFormatLite::kFixed32Size;
 constexpr size_t WireFormatLite::kFixed64Size;
 constexpr size_t WireFormatLite::kSFixed32Size;
 constexpr size_t WireFormatLite::kSFixed64Size;
 constexpr size_t WireFormatLite::kFloatSize;
 constexpr size_t WireFormatLite::kDoubleSize;
 constexpr size_t WireFormatLite::kBoolSize;

 // IBM xlC requires prefixing constants with WireFormatLite::
 const size_t WireFormatLite::kMessageSetItemTagsSize =
     io::CodedOutputStream::StaticVarintSize32<
         WireFormatLite::kMessageSetItemStartTag>::value +
     io::CodedOutputStream::StaticVarintSize32<
         WireFormatLite::kMessageSetItemEndTag>::value +
     io::CodedOutputStream::StaticVarintSize32<
         WireFormatLite::kMessageSetTypeIdTag>::value +
     io::CodedOutputStream::StaticVarintSize32<
         WireFormatLite::kMessageSetMessageTag>::value;

 const WireFormatLite::CppType
     WireFormatLite::kFieldTypeToCppTypeMap[MAX_FIELD_TYPE + 1] = {
         static_cast<CppType>(0),  // 0 is reserved for errors

         CPPTYPE_DOUBLE,   // TYPE_DOUBLE
         CPPTYPE_FLOAT,    // TYPE_FLOAT
         CPPTYPE_INT64,    // TYPE_INT64
         CPPTYPE_UINT64,   // TYPE_UINT64
         CPPTYPE_INT32,    // TYPE_INT32
         CPPTYPE_UINT64,   // TYPE_FIXED64
         CPPTYPE_UINT32,   // TYPE_FIXED32
         CPPTYPE_BOOL,     // TYPE_BOOL
         CPPTYPE_STRING,   // TYPE_STRING
         CPPTYPE_MESSAGE,  // TYPE_GROUP
         CPPTYPE_MESSAGE,  // TYPE_MESSAGE
         CPPTYPE_STRING,   // TYPE_BYTES
         CPPTYPE_UINT32,   // TYPE_UINT32
         CPPTYPE_ENUM,     // TYPE_ENUM
         CPPTYPE_INT32,    // TYPE_SFIXED32
         CPPTYPE_INT64,    // TYPE_SFIXED64
         CPPTYPE_INT32,    // TYPE_SINT32
         CPPTYPE_INT64,    // TYPE_SINT64
 };

 const WireFormatLite::WireType
     WireFormatLite::kWireTypeForFieldType[MAX_FIELD_TYPE + 1] = {
         static_cast<WireFormatLite::WireType>(-1),  // invalid
         WireFormatLite::WIRETYPE_FIXED64,           // TYPE_DOUBLE
         WireFormatLite::WIRETYPE_FIXED32,           // TYPE_FLOAT
         WireFormatLite::WIRETYPE_VARINT,            // TYPE_INT64
         WireFormatLite::WIRETYPE_VARINT,            // TYPE_UINT64
         WireFormatLite::WIRETYPE_VARINT,            // TYPE_INT32
         WireFormatLite::WIRETYPE_FIXED64,           // TYPE_FIXED64
         WireFormatLite::WIRETYPE_FIXED32,           // TYPE_FIXED32
         WireFormatLite::WIRETYPE_VARINT,            // TYPE_BOOL
         WireFormatLite::WIRETYPE_LENGTH_DELIMITED,  // TYPE_STRING
         WireFormatLite::WIRETYPE_START_GROUP,       // TYPE_GROUP
         WireFormatLite::WIRETYPE_LENGTH_DELIMITED,  // TYPE_MESSAGE
         WireFormatLite::WIRETYPE_LENGTH_DELIMITED,  // TYPE_BYTES
         WireFormatLite::WIRETYPE_VARINT,            // TYPE_UINT32
         WireFormatLite::WIRETYPE_VARINT,            // TYPE_ENUM
         WireFormatLite::WIRETYPE_FIXED32,           // TYPE_SFIXED32
         WireFormatLite::WIRETYPE_FIXED64,           // TYPE_SFIXED64
         WireFormatLite::WIRETYPE_VARINT,            // TYPE_SINT32
         WireFormatLite::WIRETYPE_VARINT,            // TYPE_SINT64
 };

 bool WireFormatLite::SkipField(io::CodedInputStream* input, uint32_t tag) {
   // Field number 0 is illegal.
   if (WireFormatLite::GetTagFieldNumber(tag) == 0) return false;
   switch (WireFormatLite::GetTagWireType(tag)) {
     case WireFormatLite::WIRETYPE_VARINT: {
       uint64_t value;
       if (!input->ReadVarint64(&value)) return false;
       return true;
     }
     case WireFormatLite::WIRETYPE_FIXED64: {
       uint64_t value;
       if (!input->ReadLittleEndian64(&value)) return false;
       return true;
     }
     case WireFormatLite::WIRETYPE_LENGTH_DELIMITED: {
       uint32_t length;
       if (!input->ReadVarint32(&length)) return false;
       if (!input->Skip(length)) return false;
       return true;
     }
     case WireFormatLite::WIRETYPE_START_GROUP: {
       if (!input->IncrementRecursionDepth()) return false;
       if (!SkipMessage(input)) return false;
       input->DecrementRecursionDepth();
       // Check that the ending tag matched the starting tag.
       if (!input->LastTagWas(
               WireFormatLite::MakeTag(WireFormatLite::GetTagFieldNumber(tag),
                                       WireFormatLite::WIRETYPE_END_GROUP))) {
         return false;
       }
       return true;
     }
     case WireFormatLite::WIRETYPE_END_GROUP: {
       return false;
     }
     case WireFormatLite::WIRETYPE_FIXED32: {
       uint32_t value;
       if (!input->ReadLittleEndian32(&value)) return false;
       return true;
     }
     default: {
       return false;
     }
   }
 }

 bool WireFormatLite::SkipField(io::CodedInputStream* input, uint32_t tag,
                                io::CodedOutputStream* output) {
   // Field number 0 is illegal.
   if (WireFormatLite::GetTagFieldNumber(tag) == 0) return false;
   switch (WireFormatLite::GetTagWireType(tag)) {
     case WireFormatLite::WIRETYPE_VARINT: {
       uint64_t value;
       if (!input->ReadVarint64(&value)) return false;
       output->WriteVarint32(tag);
       output->WriteVarint64(value);
       return true;
     }
     case WireFormatLite::WIRETYPE_FIXED64: {
       uint64_t value;
       if (!input->ReadLittleEndian64(&value)) return false;
       output->WriteVarint32(tag);
       output->WriteLittleEndian64(value);
       return true;
     }
     case WireFormatLite::WIRETYPE_LENGTH_DELIMITED: {
       uint32_t length;
       if (!input->ReadVarint32(&length)) return false;
       output->WriteVarint32(tag);
       output->WriteVarint32(length);
       // TODO: Provide API to prevent extra string copying.
       std::string temp;
       if (!input->ReadString(&temp, length)) return false;
       output->WriteString(temp);
       return true;
     }
     case WireFormatLite::WIRETYPE_START_GROUP: {
       output->WriteVarint32(tag);
       if (!input->IncrementRecursionDepth()) return false;
       if (!SkipMessage(input, output)) return false;
       input->DecrementRecursionDepth();
       // Check that the ending tag matched the starting tag.
       if (!input->LastTagWas(
               WireFormatLite::MakeTag(WireFormatLite::GetTagFieldNumber(tag),
                                       WireFormatLite::WIRETYPE_END_GROUP))) {
         return false;
       }
       return true;
     }
     case WireFormatLite::WIRETYPE_END_GROUP: {
       return false;
     }
     case WireFormatLite::WIRETYPE_FIXED32: {
       uint32_t value;
       if (!input->ReadLittleEndian32(&value)) return false;
       output->WriteVarint32(tag);
       output->WriteLittleEndian32(value);
       return true;
     }
     default: {
       return false;
     }
   }
 }

 bool WireFormatLite::SkipMessage(io::CodedInputStream* input) {
   while (true) {
     uint32_t tag = input->ReadTag();
     if (tag == 0) {
       // End of input.  This is a valid place to end, so return true.
       return true;
     }

     WireFormatLite::WireType wire_type = WireFormatLite::GetTagWireType(tag);

     if (wire_type == WireFormatLite::WIRETYPE_END_GROUP) {
       // Must be the end of the message.
       return true;
     }

     if (!SkipField(input, tag)) return false;
   }
 }

 bool WireFormatLite::SkipMessage(io::CodedInputStream* input,
                                  io::CodedOutputStream* output) {
   while (true) {
     uint32_t tag = input->ReadTag();
     if (tag == 0) {
       // End of input.  This is a valid place to end, so return true.
       return true;
     }

     WireFormatLite::WireType wire_type = WireFormatLite::GetTagWireType(tag);

     if (wire_type == WireFormatLite::WIRETYPE_END_GROUP) {
       output->WriteVarint32(tag);
       // Must be the end of the message.
       return true;
     }

     if (!SkipField(input, tag, output)) return false;
   }
 }

 bool FieldSkipper::SkipField(io::CodedInputStream* input, uint32_t tag) {
   return WireFormatLite::SkipField(input, tag);
 }

 bool FieldSkipper::SkipMessage(io::CodedInputStream* input) {
   return WireFormatLite::SkipMessage(input);
 }

 void FieldSkipper::SkipUnknownEnum(int /* field_number */, int /* value */) {
   // Nothing.
 }

 bool CodedOutputStreamFieldSkipper::SkipField(io::CodedInputStream* input,
                                               uint32_t tag) {
   return WireFormatLite::SkipField(input, tag, unknown_fields_);
 }

 bool CodedOutputStreamFieldSkipper::SkipMessage(io::CodedInputStream* input) {
   return WireFormatLite::SkipMessage(input, unknown_fields_);
 }

 void CodedOutputStreamFieldSkipper::SkipUnknownEnum(int field_number,
                                                     int value) {
   unknown_fields_->WriteVarint32(field_number);
   unknown_fields_->WriteVarint64(value);
 }

 bool WireFormatLite::ReadPackedEnumPreserveUnknowns(
     io::CodedInputStream* input, int field_number, bool (*is_valid)(int),
     io::CodedOutputStream* unknown_fields_stream, RepeatedField<int>* values) {
   uint32_t length;
   if (!input->ReadVarint32(&length)) return false;
   io::CodedInputStream::Limit limit = input->PushLimit(length);
   while (input->BytesUntilLimit() > 0) {
     int value;
     if (!ReadPrimitive<int, WireFormatLite::TYPE_ENUM>(input, &value)) {
       return false;
     }
     if (is_valid == nullptr || is_valid(value)) {
       values->Add(value);
     } else {
       uint32_t tag = WireFormatLite::MakeTag(field_number,
                                              WireFormatLite::WIRETYPE_VARINT);
       unknown_fields_stream->WriteVarint32(tag);
       unknown_fields_stream->WriteVarint32(value);
     }
   }
   input->PopLimit(limit);
   return true;
 }

 #if !defined(ABSL_IS_LITTLE_ENDIAN)

 namespace {
 void EncodeFixedSizeValue(float v, uint8_t* dest) {
   WireFormatLite::WriteFloatNoTagToArray(v, dest);
 }

 void EncodeFixedSizeValue(double v, uint8_t* dest) {
   WireFormatLite::WriteDoubleNoTagToArray(v, dest);
 }

 void EncodeFixedSizeValue(uint32_t v, uint8_t* dest) {
   WireFormatLite::WriteFixed32NoTagToArray(v, dest);
 }

 void EncodeFixedSizeValue(uint64_t v, uint8_t* dest) {
   WireFormatLite::WriteFixed64NoTagToArray(v, dest);
 }

 void EncodeFixedSizeValue(int32_t v, uint8_t* dest) {
   WireFormatLite::WriteSFixed32NoTagToArray(v, dest);
 }

 void EncodeFixedSizeValue(int64_t v, uint8_t* dest) {
   WireFormatLite::WriteSFixed64NoTagToArray(v, dest);
 }

 void EncodeFixedSizeValue(bool v, uint8_t* dest) {
   WireFormatLite::WriteBoolNoTagToArray(v, dest);
 }
 }  // anonymous namespace

 #endif  // !defined(ABSL_IS_LITTLE_ENDIAN)

 template <typename CType>
 static void WriteArray(const CType* a, int n, io::CodedOutputStream* output) {
 #if defined(ABSL_IS_LITTLE_ENDIAN)
   output->WriteRaw(reinterpret_cast<const char*>(a), n * sizeof(a[0]));
 #else
   const int kAtATime = 128;
   uint8_t buf[sizeof(CType) * kAtATime];
   for (int i = 0; i < n; i += kAtATime) {
     int to_do = std::min(kAtATime, n - i);
     uint8_t* ptr = buf;
     for (int j = 0; j < to_do; j++) {
       EncodeFixedSizeValue(a[i + j], ptr);
       ptr += sizeof(a[0]);
     }
     output->WriteRaw(buf, to_do * sizeof(a[0]));
   }
 #endif
 }

 void WireFormatLite::WriteFloatArray(const float* a, int n,
                                      io::CodedOutputStream* output) {
   WriteArray<float>(a, n, output);
 }

 void WireFormatLite::WriteDoubleArray(const double* a, int n,
                                       io::CodedOutputStream* output) {
   WriteArray<double>(a, n, output);
 }

 void WireFormatLite::WriteFixed32Array(const uint32_t* a, int n,
                                        io::CodedOutputStream* output) {
   WriteArray<uint32_t>(a, n, output);
 }

 void WireFormatLite::WriteFixed64Array(const uint64_t* a, int n,
                                        io::CodedOutputStream* output) {
   WriteArray<uint64_t>(a, n, output);
 }

 void WireFormatLite::WriteSFixed32Array(const int32_t* a, int n,
                                         io::CodedOutputStream* output) {
   WriteArray<int32_t>(a, n, output);
 }

 void WireFormatLite::WriteSFixed64Array(const int64_t* a, int n,
                                         io::CodedOutputStream* output) {
   WriteArray<int64_t>(a, n, output);
 }

 void WireFormatLite::WriteBoolArray(const bool* a, int n,
                                     io::CodedOutputStream* output) {
   WriteArray<bool>(a, n, output);
 }

 void WireFormatLite::WriteInt32(int field_number, int32_t value,
                                 io::CodedOutputStream* output) {
   WriteTag(field_number, WIRETYPE_VARINT, output);
   WriteInt32NoTag(value, output);
 }
 void WireFormatLite::WriteInt64(int field_number, int64_t value,
                                 io::CodedOutputStream* output) {
   WriteTag(field_number, WIRETYPE_VARINT, output);
   WriteInt64NoTag(value, output);
 }
 void WireFormatLite::WriteUInt32(int field_number, uint32_t value,
                                  io::CodedOutputStream* output) {
   WriteTag(field_number, WIRETYPE_VARINT, output);
   WriteUInt32NoTag(value, output);
 }
 void WireFormatLite::WriteUInt64(int field_number, uint64_t value,
                                  io::CodedOutputStream* output) {
   WriteTag(field_number, WIRETYPE_VARINT, output);
   WriteUInt64NoTag(value, output);
 }
 void WireFormatLite::WriteSInt32(int field_number, int32_t value,
                                  io::CodedOutputStream* output) {
   WriteTag(field_number, WIRETYPE_VARINT, output);
   WriteSInt32NoTag(value, output);
 }
 void WireFormatLite::WriteSInt64(int field_number, int64_t value,
                                  io::CodedOutputStream* output) {
   WriteTag(field_number, WIRETYPE_VARINT, output);
   WriteSInt64NoTag(value, output);
 }
 void WireFormatLite::WriteFixed32(int field_number, uint32_t value,
                                   io::CodedOutputStream* output) {
   WriteTag(field_number, WIRETYPE_FIXED32, output);
   WriteFixed32NoTag(value, output);
 }
 void WireFormatLite::WriteFixed64(int field_number, uint64_t value,
                                   io::CodedOutputStream* output) {
   WriteTag(field_number, WIRETYPE_FIXED64, output);
   WriteFixed64NoTag(value, output);
 }
 void WireFormatLite::WriteSFixed32(int field_number, int32_t value,
                                    io::CodedOutputStream* output) {
   WriteTag(field_number, WIRETYPE_FIXED32, output);
   WriteSFixed32NoTag(value, output);
 }
 void WireFormatLite::WriteSFixed64(int field_number, int64_t value,
                                    io::CodedOutputStream* output) {
   WriteTag(field_number, WIRETYPE_FIXED64, output);
   WriteSFixed64NoTag(value, output);
 }
 void WireFormatLite::WriteFloat(int field_number, float value,
                                 io::CodedOutputStream* output) {
   WriteTag(field_number, WIRETYPE_FIXED32, output);
   WriteFloatNoTag(value, output);
 }
 void WireFormatLite::WriteDouble(int field_number, double value,
                                  io::CodedOutputStream* output) {
   WriteTag(field_number, WIRETYPE_FIXED64, output);
   WriteDoubleNoTag(value, output);
 }
 void WireFormatLite::WriteBool(int field_number, bool value,
                                io::CodedOutputStream* output) {
   WriteTag(field_number, WIRETYPE_VARINT, output);
   WriteBoolNoTag(value, output);
 }
 void WireFormatLite::WriteEnum(int field_number, int value,
                                io::CodedOutputStream* output) {
   WriteTag(field_number, WIRETYPE_VARINT, output);
   WriteEnumNoTag(value, output);
 }

 constexpr size_t kInt32MaxSize = std::numeric_limits<int32_t>::max();

 void WireFormatLite::WriteString(int field_number, const std::string& value,
                                  io::CodedOutputStream* output) {
   // String is for UTF-8 text only
   WriteTag(field_number, WIRETYPE_LENGTH_DELIMITED, output);
   ABSL_CHECK_LE(value.size(), kInt32MaxSize);
   output->WriteVarint32(value.size());
   output->WriteString(value);
 }
 void WireFormatLite::WriteStringMaybeAliased(int field_number,
                                              const std::string& value,
                                              io::CodedOutputStream* output) {
   // String is for UTF-8 text only
   WriteTag(field_number, WIRETYPE_LENGTH_DELIMITED, output);
   ABSL_CHECK_LE(value.size(), kInt32MaxSize);
   output->WriteVarint32(value.size());
   output->WriteRawMaybeAliased(value.data(), value.size());
 }
 void WireFormatLite::WriteBytes(int field_number, const std::string& value,
                                 io::CodedOutputStream* output) {
   WriteTag(field_number, WIRETYPE_LENGTH_DELIMITED, output);
   ABSL_CHECK_LE(value.size(), kInt32MaxSize);
   output->WriteVarint32(value.size());
   output->WriteString(value);
 }
 void WireFormatLite::WriteBytesMaybeAliased(int field_number,
                                             const std::string& value,
                                             io::CodedOutputStream* output) {
   WriteTag(field_number, WIRETYPE_LENGTH_DELIMITED, output);
   ABSL_CHECK_LE(value.size(), kInt32MaxSize);
   output->WriteVarint32(value.size());
   output->WriteRawMaybeAliased(value.data(), value.size());
 }


 void WireFormatLite::WriteGroup(int field_number, const MessageLite& value,
                                 io::CodedOutputStream* output) {
   WriteTag(field_number, WIRETYPE_START_GROUP, output);
   value.SerializeWithCachedSizes(output);
   WriteTag(field_number, WIRETYPE_END_GROUP, output);
 }

 void WireFormatLite::WriteMessage(int field_number, const MessageLite& value,
                                   io::CodedOutputStream* output) {
   WriteTag(field_number, WIRETYPE_LENGTH_DELIMITED, output);
   const int size = value.GetCachedSize();
   output->WriteVarint32(size);
   value.SerializeWithCachedSizes(output);
 }

 uint8_t* WireFormatLite::InternalWriteGroup(int field_number,
                                             const MessageLite& value,
                                             uint8_t* target,
                                             io::EpsCopyOutputStream* stream) {
   target = stream->EnsureSpace(target);
   target = WriteTagToArray(field_number, WIRETYPE_START_GROUP, target);
   target = value._InternalSerialize(target, stream);
   target = stream->EnsureSpace(target);
   return WriteTagToArray(field_number, WIRETYPE_END_GROUP, target);
 }

 uint8_t* WireFormatLite::InternalWriteMessage(int field_number,
                                               const MessageLite& value,
                                               int cached_size, uint8_t* target,
                                               io::EpsCopyOutputStream* stream) {
   target = stream->EnsureSpace(target);
   target = WriteTagToArray(field_number, WIRETYPE_LENGTH_DELIMITED, target);
   target = io::CodedOutputStream::WriteVarint32ToArray(
       static_cast<uint32_t>(cached_size), target);
   return value._InternalSerialize(target, stream);
 }

 void WireFormatLite::WriteSubMessageMaybeToArray(
     int /*size*/, const MessageLite& value, io::CodedOutputStream* output) {
   output->SetCur(value._InternalSerialize(output->Cur(), output->EpsCopy()));
 }

 void WireFormatLite::WriteGroupMaybeToArray(int field_number,
                                             const MessageLite& value,
                                             io::CodedOutputStream* output) {
   WriteTag(field_number, WIRETYPE_START_GROUP, output);
   const int size = value.GetCachedSize();
   WriteSubMessageMaybeToArray(size, value, output);
   WriteTag(field_number, WIRETYPE_END_GROUP, output);
 }

 void WireFormatLite::WriteMessageMaybeToArray(int field_number,
                                               const MessageLite& value,
                                               io::CodedOutputStream* output) {
   WriteTag(field_number, WIRETYPE_LENGTH_DELIMITED, output);
   const int size = value.GetCachedSize();
   output->WriteVarint32(size);
   WriteSubMessageMaybeToArray(size, value, output);
 }

 PROTOBUF_NDEBUG_INLINE static bool ReadBytesToString(
     io::CodedInputStream* input, std::string* value);
 inline static bool ReadBytesToString(io::CodedInputStream* input,
                                      std::string* value) {
   uint32_t length;
   return input->ReadVarint32(&length) && input->ReadString(value, length);
 }

 bool WireFormatLite::ReadBytes(io::CodedInputStream* input,
                                std::string* value) {
   return ReadBytesToString(input, value);
 }

 bool WireFormatLite::ReadBytes(io::CodedInputStream* input, std::string** p) {
   if (*p == &GetEmptyStringAlreadyInited()) {
     *p = new std::string();
   }
   return ReadBytesToString(input, *p);
 }

 void PrintUTF8ErrorLog(absl::string_view message_name,
                        absl::string_view field_name, const char* operation_str,
                        bool emit_stacktrace) {
   std::string stacktrace;
   (void)emit_stacktrace;  // Parameter is used by Google-internal code.
   std::string quoted_field_name = "";
   if (!field_name.empty()) {
     if (!message_name.empty()) {
       quoted_field_name =
           absl::StrCat(" '", message_name, ".", field_name, "'");
     } else {
       quoted_field_name = absl::StrCat(" '", field_name, "'");
     }
   }
   std::string error_message =
       absl::StrCat("String field", quoted_field_name,
                    " contains invalid UTF-8 data "
                    "when ",
                    operation_str,
                    " a protocol buffer. Use the 'bytes' type if you intend to "
                    "send raw bytes. ",
                    stacktrace);
   ABSL_LOG(ERROR) << error_message;
 }

 bool WireFormatLite::VerifyUtf8String(const char* data, int size, Operation op,
                                       const char* field_name) {
   if (!utf8_range::IsStructurallyValid({data, static_cast<size_t>(size)})) {
     const char* operation_str = nullptr;
     switch (op) {
       case PARSE:
         operation_str = "parsing";
         break;
       case SERIALIZE:
         operation_str = "serializing";
         break;
         // no default case: have the compiler warn if a case is not covered.
     }
     PrintUTF8ErrorLog("", field_name, operation_str, false);
     return false;
   }
   return true;
 }

 // this code is deliberately written such that clang makes it into really
 // efficient SSE code.
 template <bool ZigZag, bool SignExtended, typename T>
 static size_t VarintSize(const T* data, const int n) {
   static_assert(sizeof(T) == 4, "This routine only works for 32 bit integers");
   // is_unsigned<T> => !ZigZag
   static_assert(
       (std::is_unsigned<T>::value ^ ZigZag) || std::is_signed<T>::value,
       "Cannot ZigZag encode unsigned types");
   // is_unsigned<T> => !SignExtended
   static_assert(
       (std::is_unsigned<T>::value ^ SignExtended) || std::is_signed<T>::value,
       "Cannot SignExtended unsigned types");
   static_assert(!(SignExtended && ZigZag),
                 "Cannot SignExtended and ZigZag on the same type");
   // This approach is only faster when vectorized, and the vectorized
   // implementation only works in units of the platform's vector width, and is
   // only faster once a certain number of iterations are used. Normally the
   // compiler generates two loops - one partially unrolled vectorized loop that
   // processes big chunks, and a second "epilogue" scalar loop to finish up the
   // remainder. This is done manually here so that the faster scalar
   // implementation is used for small inputs and for the epilogue.
   int vectorN = n & -32;
   uint32_t sum = vectorN;
   uint32_t msb_sum = 0;
   int i = 0;
   for (; i < vectorN; i++) {
     uint32_t x = data[i];
     if (ZigZag) {
       x = WireFormatLite::ZigZagEncode32(x);
     } else if (SignExtended) {
       msb_sum += x >> 31;
     }
     // clang is so smart that it produces optimal SIMD sequence unrolling
     // the loop 8 ints at a time. With a sequence of 4
     // cmpres = cmpgt x, sizeclass  ( -1 or 0)
     // sum = sum - cmpres
     if (x > 0x7F) sum++;
     if (x > 0x3FFF) sum++;
     if (x > 0x1FFFFF) sum++;
     if (x > 0xFFFFFFF) sum++;
   }
 // Clang is not smart enough to see that this loop doesn't run many times
 // NOLINTNEXTLINE(google3-runtime-pragma-loop-hint): b/315043579
 #pragma clang loop vectorize(disable) unroll(disable) interleave(disable)
   for (; i < n; i++) {
     uint32_t x = data[i];
     if (ZigZag) {
       sum += WireFormatLite::SInt32Size(x);
     } else if (SignExtended) {
       sum += WireFormatLite::Int32Size(x);
     } else {
       sum += WireFormatLite::UInt32Size(x);
     }
   }
   if (SignExtended) sum += msb_sum * 5;
   return sum;
 }

 template <bool ZigZag, typename T>
 static size_t VarintSize64(const T* data, const int n) {
   static_assert(sizeof(T) == 8, "This routine only works for 64 bit integers");
   // is_unsigned<T> => !ZigZag
   static_assert(!ZigZag || !std::is_unsigned<T>::value,
                 "Cannot ZigZag encode unsigned types");
   int vectorN = n & -32;
   uint64_t sum = vectorN;
   int i = 0;
   for (; i < vectorN; i++) {
     uint64_t x = data[i];
     if (ZigZag) {
       x = WireFormatLite::ZigZagEncode64(x);
     }
     // First step is a binary search, we can't branch in sse so we use the
     // result of the compare to adjust sum and appropriately. This code is
     // written to make clang recognize the vectorization.
     uint64_t tmp = x >= (static_cast<uint64_t>(1) << 35) ? -1 : 0;
     sum += 5 & tmp;
     x >>= 35 & tmp;
     if (x > 0x7F) sum++;
     if (x > 0x3FFF) sum++;
     if (x > 0x1FFFFF) sum++;
     if (x > 0xFFFFFFF) sum++;
   }
 // Clang is not smart enough to see that this loop doesn't run many times
 // NOLINTNEXTLINE(google3-runtime-pragma-loop-hint): b/315043579
 #pragma clang loop vectorize(disable) unroll(disable) interleave(disable)
   for (; i < n; i++) {
     uint64_t x = data[i];
     if (ZigZag) {
       sum += WireFormatLite::SInt64Size(x);
     } else {
       sum += WireFormatLite::UInt64Size(x);
     }
   }
   return sum;
 }

 // On machines without a vector count-leading-zeros instruction such as SVE CLZ
 // on arm or VPLZCNT on x86, SSE or AVX2 instructions can allow vectorization of
 // the size calculation loop. GCC does not detect this autovectorization
 // opportunity, so only enable for clang.
 // When last tested, AVX512-vectorized lzcnt was slower than the SSE/AVX2
 // implementation, so __AVX512CD__ is not checked.
 #if defined(__SSE__) && defined(__clang__)
 size_t WireFormatLite::Int32Size(const RepeatedField<int32_t>& value) {
   return VarintSize<false, true>(value.data(), value.size());
 }

 size_t WireFormatLite::UInt32Size(const RepeatedField<uint32_t>& value) {
   return VarintSize<false, false>(value.data(), value.size());
 }

 size_t WireFormatLite::SInt32Size(const RepeatedField<int32_t>& value) {
   return VarintSize<true, false>(value.data(), value.size());
 }

 size_t WireFormatLite::EnumSize(const RepeatedField<int>& value) {
   // On ILP64, sizeof(int) == 8, which would require a different template.
   return VarintSize<false, true>(value.data(), value.size());
 }

 #else  // !(defined(__SSE__) && defined(__clang__))

 size_t WireFormatLite::Int32Size(const RepeatedField<int32_t>& value) {
   size_t out = 0;
   const int n = value.size();
   for (int i = 0; i < n; i++) {
     out += Int32Size(value.Get(i));
   }
   return out;
 }

 size_t WireFormatLite::UInt32Size(const RepeatedField<uint32_t>& value) {
   size_t out = 0;
   const int n = value.size();
   for (int i = 0; i < n; i++) {
     out += UInt32Size(value.Get(i));
   }
   return out;
 }

 size_t WireFormatLite::SInt32Size(const RepeatedField<int32_t>& value) {
   size_t out = 0;
   const int n = value.size();
   for (int i = 0; i < n; i++) {
     out += SInt32Size(value.Get(i));
   }
   return out;
 }

 size_t WireFormatLite::EnumSize(const RepeatedField<int>& value) {
   size_t out = 0;
   const int n = value.size();
   for (int i = 0; i < n; i++) {
     out += EnumSize(value.Get(i));
   }
   return out;
 }

 #endif

 // Micro benchmarks show that the vectorizable loop only starts beating
 // the normal loop when 256-bit vector registers are available.
 #if defined(__AVX2__) && defined(__clang__)
 size_t WireFormatLite::Int64Size(const RepeatedField<int64_t>& value) {
   return VarintSize64<false>(value.data(), value.size());
 }

 size_t WireFormatLite::UInt64Size(const RepeatedField<uint64_t>& value) {
   return VarintSize64<false>(value.data(), value.size());
 }

 size_t WireFormatLite::SInt64Size(const RepeatedField<int64_t>& value) {
   return VarintSize64<true>(value.data(), value.size());
 }

 #else

 size_t WireFormatLite::Int64Size(const RepeatedField<int64_t>& value) {
   size_t out = 0;
   const int n = value.size();
   for (int i = 0; i < n; i++) {
     out += Int64Size(value.Get(i));
   }
   return out;
 }

 size_t WireFormatLite::UInt64Size(const RepeatedField<uint64_t>& value) {
   size_t out = 0;
   const int n = value.size();
   for (int i = 0; i < n; i++) {
     out += UInt64Size(value.Get(i));
   }
   return out;
 }

 size_t WireFormatLite::SInt64Size(const RepeatedField<int64_t>& value) {
   size_t out = 0;
   const int n = value.size();
   for (int i = 0; i < n; i++) {
     out += SInt64Size(value.Get(i));
   }
   return out;
 }

 #endif

 }  // namespace internal
 }  // namespace protobuf
 }  // namespace google

 #include "google/protobuf/port_undef.inc"