upb/wire/decode_internal.h - third_party/protobuf - Git at Google

 /*
  * Copyright (c) 2009-2021, Google LLC
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are met:
  *     * Redistributions of source code must retain the above copyright
  *       notice, this list of conditions and the following disclaimer.
  *     * Redistributions in binary form must reproduce the above copyright
  *       notice, this list of conditions and the following disclaimer in the
  *       documentation and/or other materials provided with the distribution.
  *     * Neither the name of Google LLC nor the
  *       names of its contributors may be used to endorse or promote products
  *       derived from this software without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED. IN NO EVENT SHALL Google LLC BE LIABLE FOR ANY DIRECT,
  * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */

 /*
  * Internal implementation details of the decoder that are shared between
  * decode.c and decode_fast.c.
  */

 #ifndef UPB_WIRE_DECODE_INTERNAL_H_
 #define UPB_WIRE_DECODE_INTERNAL_H_

 #include "upb/mem/arena_internal.h"
 #include "upb/message/internal.h"
 #include "upb/wire/decode.h"
 #include "upb/wire/eps_copy_input_stream.h"
 #include "utf8_range.h"

 // Must be last.
 #include "upb/port/def.inc"

 #define DECODE_NOGROUP (uint32_t) - 1

 typedef struct upb_Decoder {
   upb_EpsCopyInputStream input;
   const upb_ExtensionRegistry* extreg;
   const char* unknown;       // Start of unknown data, preserve at buffer flip
   upb_Message* unknown_msg;  // Pointer to preserve data to
   int depth;                 // Tracks recursion depth to bound stack usage.
   uint32_t end_group;  // field number of END_GROUP tag, else DECODE_NOGROUP.
   uint16_t options;
   bool missing_required;
   upb_Arena arena;
   upb_DecodeStatus status;
   jmp_buf err;

 #ifndef NDEBUG
   const char* debug_tagstart;
   const char* debug_valstart;
 #endif
 } upb_Decoder;

 /* Error function that will abort decoding with longjmp(). We can't declare this
  * UPB_NORETURN, even though it is appropriate, because if we do then compilers
  * will "helpfully" refuse to tailcall to it
  * (see: https://stackoverflow.com/a/55657013), which will defeat a major goal
  * of our optimizations. That is also why we must declare it in a separate file,
  * otherwise the compiler will see that it calls longjmp() and deduce that it is
  * noreturn. */
 const char* _upb_FastDecoder_ErrorJmp(upb_Decoder* d, int status);

 extern const uint8_t upb_utf8_offsets[];

 UPB_INLINE
 bool _upb_Decoder_VerifyUtf8Inline(const char* ptr, int len) {
   const char* end = ptr + len;

   // Check 8 bytes at a time for any non-ASCII char.
   while (end - ptr >= 8) {
     uint64_t data;
     memcpy(&data, ptr, 8);
     if (data & 0x8080808080808080) goto non_ascii;
     ptr += 8;
   }

   // Check one byte at a time for non-ASCII.
   while (ptr < end) {
     if (*ptr & 0x80) goto non_ascii;
     ptr++;
   }

   return true;

 non_ascii:
   return utf8_range2((const unsigned char*)ptr, end - ptr) == 0;
 }

 const char* _upb_Decoder_CheckRequired(upb_Decoder* d, const char* ptr,
                                        const upb_Message* msg,
                                        const upb_MiniTable* l);

 /* x86-64 pointers always have the high 16 bits matching. So we can shift
  * left 8 and right 8 without loss of information. */
 UPB_INLINE intptr_t decode_totable(const upb_MiniTable* tablep) {
   return ((intptr_t)tablep << 8) | tablep->table_mask;
 }

 UPB_INLINE const upb_MiniTable* decode_totablep(intptr_t table) {
   return (const upb_MiniTable*)(table >> 8);
 }

 const char* _upb_Decoder_IsDoneFallback(upb_EpsCopyInputStream* e,
                                         const char* ptr, int overrun);

 UPB_INLINE bool _upb_Decoder_IsDone(upb_Decoder* d, const char** ptr) {
   return upb_EpsCopyInputStream_IsDoneWithCallback(
       &d->input, ptr, &_upb_Decoder_IsDoneFallback);
 }

 UPB_INLINE const char* _upb_Decoder_BufferFlipCallback(
     upb_EpsCopyInputStream* e, const char* old_end, const char* new_start) {
   upb_Decoder* d = (upb_Decoder*)e;
   if (!old_end) _upb_FastDecoder_ErrorJmp(d, kUpb_DecodeStatus_Malformed);

   if (d->unknown) {
     if (!_upb_Message_AddUnknown(d->unknown_msg, d->unknown,
                                  old_end - d->unknown, &d->arena)) {
       _upb_FastDecoder_ErrorJmp(d, kUpb_DecodeStatus_OutOfMemory);
     }
     d->unknown = new_start;
   }
   return new_start;
 }

 #if UPB_FASTTABLE
 UPB_INLINE
 const char* _upb_FastDecoder_TagDispatch(upb_Decoder* d, const char* ptr,
                                          upb_Message* msg, intptr_t table,
                                          uint64_t hasbits, uint64_t tag) {
   const upb_MiniTable* table_p = decode_totablep(table);
   uint8_t mask = table;
   uint64_t data;
   size_t idx = tag & mask;
   UPB_ASSUME((idx & 7) == 0);
   idx >>= 3;
   data = table_p->fasttable[idx].field_data ^ tag;
   UPB_MUSTTAIL return table_p->fasttable[idx].field_parser(d, ptr, msg, table,
                                                            hasbits, data);
 }
 #endif

 UPB_INLINE uint32_t _upb_FastDecoder_LoadTag(const char* ptr) {
   uint16_t tag;
   memcpy(&tag, ptr, 2);
   return tag;
 }

 #include "upb/port/undef.inc"

 #endif /* UPB_WIRE_DECODE_INTERNAL_H_ */
	/*
	* Copyright (c) 2009-2021, Google LLC
	* All rights reserved.
	*
	* Redistribution and use in source and binary forms, with or without
	* modification, are permitted provided that the following conditions are met:
	* * Redistributions of source code must retain the above copyright
	* notice, this list of conditions and the following disclaimer.
	* * Redistributions in binary form must reproduce the above copyright
	* notice, this list of conditions and the following disclaimer in the
	* documentation and/or other materials provided with the distribution.
	* * Neither the name of Google LLC nor the
	* names of its contributors may be used to endorse or promote products
	* derived from this software without specific prior written permission.
	*
	* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
	* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
	* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
	* ARE DISCLAIMED. IN NO EVENT SHALL Google LLC BE LIABLE FOR ANY DIRECT,
	* INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
	* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
	* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
	* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
	* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
	* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
	*/

	/*
	* Internal implementation details of the decoder that are shared between
	* decode.c and decode_fast.c.
	*/

	#ifndef UPB_WIRE_DECODE_INTERNAL_H_
	#define UPB_WIRE_DECODE_INTERNAL_H_

	#include "upb/mem/arena_internal.h"
	#include "upb/message/internal.h"
	#include "upb/wire/decode.h"
	#include "upb/wire/eps_copy_input_stream.h"
	#include "utf8_range.h"

	// Must be last.
	#include "upb/port/def.inc"

	#define DECODE_NOGROUP (uint32_t) - 1

	typedef struct upb_Decoder {
	upb_EpsCopyInputStream input;
	const upb_ExtensionRegistry* extreg;
	const char* unknown; // Start of unknown data, preserve at buffer flip
	upb_Message* unknown_msg; // Pointer to preserve data to
	int depth; // Tracks recursion depth to bound stack usage.
	uint32_t end_group; // field number of END_GROUP tag, else DECODE_NOGROUP.
	uint16_t options;
	bool missing_required;
	upb_Arena arena;
	upb_DecodeStatus status;
	jmp_buf err;

	#ifndef NDEBUG
	const char* debug_tagstart;
	const char* debug_valstart;
	#endif
	} upb_Decoder;

	/* Error function that will abort decoding with longjmp(). We can't declare this
	* UPB_NORETURN, even though it is appropriate, because if we do then compilers
	* will "helpfully" refuse to tailcall to it
	* (see: https://stackoverflow.com/a/55657013), which will defeat a major goal
	* of our optimizations. That is also why we must declare it in a separate file,
	* otherwise the compiler will see that it calls longjmp() and deduce that it is
	* noreturn. */
	const char* _upb_FastDecoder_ErrorJmp(upb_Decoder* d, int status);

	extern const uint8_t upb_utf8_offsets[];

	UPB_INLINE
	bool _upb_Decoder_VerifyUtf8Inline(const char* ptr, int len) {
	const char* end = ptr + len;

	// Check 8 bytes at a time for any non-ASCII char.
	while (end - ptr >= 8) {
	uint64_t data;
	memcpy(&data, ptr, 8);
	if (data & 0x8080808080808080) goto non_ascii;
	ptr += 8;
	}

	// Check one byte at a time for non-ASCII.
	while (ptr < end) {
	if (*ptr & 0x80) goto non_ascii;
	ptr++;
	}

	return true;

	non_ascii:
	return utf8_range2((const unsigned char*)ptr, end - ptr) == 0;
	}

	const char* _upb_Decoder_CheckRequired(upb_Decoder* d, const char* ptr,
	const upb_Message* msg,
	const upb_MiniTable* l);

	/* x86-64 pointers always have the high 16 bits matching. So we can shift
	* left 8 and right 8 without loss of information. */
	UPB_INLINE intptr_t decode_totable(const upb_MiniTable* tablep) {
	return ((intptr_t)tablep << 8) \| tablep->table_mask;
	}

	UPB_INLINE const upb_MiniTable* decode_totablep(intptr_t table) {
	return (const upb_MiniTable*)(table >> 8);
	}

	const char* _upb_Decoder_IsDoneFallback(upb_EpsCopyInputStream* e,
	const char* ptr, int overrun);

	UPB_INLINE bool _upb_Decoder_IsDone(upb_Decoder* d, const char** ptr) {
	return upb_EpsCopyInputStream_IsDoneWithCallback(
	&d->input, ptr, &_upb_Decoder_IsDoneFallback);
	}

	UPB_INLINE const char* _upb_Decoder_BufferFlipCallback(
	upb_EpsCopyInputStream* e, const char* old_end, const char* new_start) {
	upb_Decoder* d = (upb_Decoder*)e;
	if (!old_end) _upb_FastDecoder_ErrorJmp(d, kUpb_DecodeStatus_Malformed);

	if (d->unknown) {
	if (!_upb_Message_AddUnknown(d->unknown_msg, d->unknown,
	old_end - d->unknown, &d->arena)) {
	_upb_FastDecoder_ErrorJmp(d, kUpb_DecodeStatus_OutOfMemory);
	}
	d->unknown = new_start;
	}
	return new_start;
	}

	#if UPB_FASTTABLE
	UPB_INLINE
	const char* _upb_FastDecoder_TagDispatch(upb_Decoder* d, const char* ptr,
	upb_Message* msg, intptr_t table,
	uint64_t hasbits, uint64_t tag) {
	const upb_MiniTable* table_p = decode_totablep(table);
	uint8_t mask = table;
	uint64_t data;
	size_t idx = tag & mask;
	UPB_ASSUME((idx & 7) == 0);
	idx >>= 3;
	data = table_p->fasttable[idx].field_data ^ tag;
	UPB_MUSTTAIL return table_p->fasttable[idx].field_parser(d, ptr, msg, table,
	hasbits, data);
	}
	#endif

	UPB_INLINE uint32_t _upb_FastDecoder_LoadTag(const char* ptr) {
	uint16_t tag;
	memcpy(&tag, ptr, 2);
	return tag;
	}

	#include "upb/port/undef.inc"

	#endif /* UPB_WIRE_DECODE_INTERNAL_H_ */