| // Protocol Buffers - Google's data interchange format |
| // Copyright 2023 Google LLC. All rights reserved. |
| // |
| // Use of this source code is governed by a BSD-style |
| // license that can be found in the LICENSE file or at |
| // https://developers.google.com/open-source/licenses/bsd |
| |
| #ifndef UPB_WIRE_EPS_COPY_INPUT_STREAM_H_ |
| #define UPB_WIRE_EPS_COPY_INPUT_STREAM_H_ |
| |
| #include <string.h> |
| |
| #include "upb/mem/arena.h" |
| |
| // Must be last. |
| #include "upb/port/def.inc" |
| |
| #ifdef __cplusplus |
| extern "C" { |
| #endif |
| |
| // The maximum number of bytes a single protobuf field can take up in the |
| // wire format. We only want to do one bounds check per field, so the input |
| // stream guarantees that after upb_EpsCopyInputStream_IsDone() is called, |
| // the decoder can read this many bytes without performing another bounds |
| // check. The stream will copy into a patch buffer as necessary to guarantee |
| // this invariant. |
| #define kUpb_EpsCopyInputStream_SlopBytes 16 |
| |
| enum { |
| kUpb_EpsCopyInputStream_NoAliasing = 0, |
| kUpb_EpsCopyInputStream_OnPatch = 1, |
| kUpb_EpsCopyInputStream_NoDelta = 2 |
| }; |
| |
| typedef struct { |
| const char* end; // Can read up to SlopBytes bytes beyond this. |
| const char* limit_ptr; // For bounds checks, = end + UPB_MIN(limit, 0) |
| uintptr_t aliasing; |
| int limit; // Submessage limit relative to end |
| bool error; // To distinguish between EOF and error. |
| char patch[kUpb_EpsCopyInputStream_SlopBytes * 2]; |
| } upb_EpsCopyInputStream; |
| |
| // Returns true if the stream is in the error state. A stream enters the error |
| // state when the user reads past a limit (caught in IsDone()) or the |
| // ZeroCopyInputStream returns an error. |
| UPB_INLINE bool upb_EpsCopyInputStream_IsError(upb_EpsCopyInputStream* e) { |
| return e->error; |
| } |
| |
| typedef const char* upb_EpsCopyInputStream_BufferFlipCallback( |
| upb_EpsCopyInputStream* e, const char* old_end, const char* new_start); |
| |
| typedef const char* upb_EpsCopyInputStream_IsDoneFallbackFunc( |
| upb_EpsCopyInputStream* e, const char* ptr, int overrun); |
| |
| // Initializes a upb_EpsCopyInputStream using the contents of the buffer |
| // [*ptr, size]. Updates `*ptr` as necessary to guarantee that at least |
| // kUpb_EpsCopyInputStream_SlopBytes are available to read. |
| UPB_INLINE void upb_EpsCopyInputStream_Init(upb_EpsCopyInputStream* e, |
| const char** ptr, size_t size, |
| bool enable_aliasing) { |
| if (size <= kUpb_EpsCopyInputStream_SlopBytes) { |
| memset(&e->patch, 0, 32); |
| if (size) memcpy(&e->patch, *ptr, size); |
| e->aliasing = enable_aliasing ? (uintptr_t)*ptr - (uintptr_t)e->patch |
| : kUpb_EpsCopyInputStream_NoAliasing; |
| *ptr = e->patch; |
| e->end = *ptr + size; |
| e->limit = 0; |
| } else { |
| e->end = *ptr + size - kUpb_EpsCopyInputStream_SlopBytes; |
| e->limit = kUpb_EpsCopyInputStream_SlopBytes; |
| e->aliasing = enable_aliasing ? kUpb_EpsCopyInputStream_NoDelta |
| : kUpb_EpsCopyInputStream_NoAliasing; |
| } |
| e->limit_ptr = e->end; |
| e->error = false; |
| } |
| |
| typedef enum { |
| // The current stream position is at a limit. |
| kUpb_IsDoneStatus_Done, |
| |
| // The current stream position is not at a limit. |
| kUpb_IsDoneStatus_NotDone, |
| |
| // The current stream position is not at a limit, and the stream needs to |
| // be flipped to a new buffer before more data can be read. |
| kUpb_IsDoneStatus_NeedFallback, |
| } upb_IsDoneStatus; |
| |
| // Returns the status of the current stream position. This is a low-level |
| // function, it is simpler to call upb_EpsCopyInputStream_IsDone() if possible. |
| UPB_INLINE upb_IsDoneStatus upb_EpsCopyInputStream_IsDoneStatus( |
| upb_EpsCopyInputStream* e, const char* ptr, int* overrun) { |
| *overrun = ptr - e->end; |
| if (UPB_LIKELY(ptr < e->limit_ptr)) { |
| return kUpb_IsDoneStatus_NotDone; |
| } else if (UPB_LIKELY(*overrun == e->limit)) { |
| return kUpb_IsDoneStatus_Done; |
| } else { |
| return kUpb_IsDoneStatus_NeedFallback; |
| } |
| } |
| |
| // Returns true if the stream has hit a limit, either the current delimited |
| // limit or the overall end-of-stream. As a side effect, this function may flip |
| // the pointer to a new buffer if there are less than |
| // kUpb_EpsCopyInputStream_SlopBytes of data to be read in the current buffer. |
| // |
| // Postcondition: if the function returns false, there are at least |
| // kUpb_EpsCopyInputStream_SlopBytes of data available to read at *ptr. |
| UPB_INLINE bool upb_EpsCopyInputStream_IsDoneWithCallback( |
| upb_EpsCopyInputStream* e, const char** ptr, |
| upb_EpsCopyInputStream_IsDoneFallbackFunc* func) { |
| int overrun; |
| switch (upb_EpsCopyInputStream_IsDoneStatus(e, *ptr, &overrun)) { |
| case kUpb_IsDoneStatus_Done: |
| return true; |
| case kUpb_IsDoneStatus_NotDone: |
| return false; |
| case kUpb_IsDoneStatus_NeedFallback: |
| *ptr = func(e, *ptr, overrun); |
| return *ptr == NULL; |
| } |
| UPB_UNREACHABLE(); |
| } |
| |
| const char* _upb_EpsCopyInputStream_IsDoneFallbackNoCallback( |
| upb_EpsCopyInputStream* e, const char* ptr, int overrun); |
| |
| // A simpler version of IsDoneWithCallback() that does not support a buffer flip |
| // callback. Useful in cases where we do not need to insert custom logic at |
| // every buffer flip. |
| // |
| // If this returns true, the user must call upb_EpsCopyInputStream_IsError() |
| // to distinguish between EOF and error. |
| UPB_INLINE bool upb_EpsCopyInputStream_IsDone(upb_EpsCopyInputStream* e, |
| const char** ptr) { |
| return upb_EpsCopyInputStream_IsDoneWithCallback( |
| e, ptr, _upb_EpsCopyInputStream_IsDoneFallbackNoCallback); |
| } |
| |
| // Returns the total number of bytes that are safe to read from the current |
| // buffer without reading uninitialized or unallocated memory. |
| // |
| // Note that this check does not respect any semantic limits on the stream, |
| // either limits from PushLimit() or the overall stream end, so some of these |
| // bytes may have unpredictable, nonsense values in them. The guarantee is only |
| // that the bytes are valid to read from the perspective of the C language |
| // (ie. you can read without triggering UBSAN or ASAN). |
| UPB_INLINE size_t upb_EpsCopyInputStream_BytesAvailable( |
| upb_EpsCopyInputStream* e, const char* ptr) { |
| return (e->end - ptr) + kUpb_EpsCopyInputStream_SlopBytes; |
| } |
| |
| // Returns true if the given delimited field size is valid (it does not extend |
| // beyond any previously-pushed limits). `ptr` should point to the beginning |
| // of the field data, after the delimited size. |
| // |
| // Note that this does *not* guarantee that all of the data for this field is in |
| // the current buffer. |
| UPB_INLINE bool upb_EpsCopyInputStream_CheckSize( |
| const upb_EpsCopyInputStream* e, const char* ptr, int size) { |
| UPB_ASSERT(size >= 0); |
| return ptr - e->end + size <= e->limit; |
| } |
| |
| UPB_INLINE bool _upb_EpsCopyInputStream_CheckSizeAvailable( |
| upb_EpsCopyInputStream* e, const char* ptr, int size, bool submessage) { |
| // This is one extra branch compared to the more normal: |
| // return (size_t)(end - ptr) < size; |
| // However it is one less computation if we are just about to use "ptr + len": |
| // https://godbolt.org/z/35YGPz |
| // In microbenchmarks this shows a small improvement. |
| uintptr_t uptr = (uintptr_t)ptr; |
| uintptr_t uend = (uintptr_t)e->limit_ptr; |
| uintptr_t res = uptr + (size_t)size; |
| if (!submessage) uend += kUpb_EpsCopyInputStream_SlopBytes; |
| // NOTE: this check depends on having a linear address space. This is not |
| // technically guaranteed by uintptr_t. |
| bool ret = res >= uptr && res <= uend; |
| if (size < 0) UPB_ASSERT(!ret); |
| return ret; |
| } |
| |
| // Returns true if the given delimited field size is valid (it does not extend |
| // beyond any previously-pushed limited) *and* all of the data for this field is |
| // available to be read in the current buffer. |
| // |
| // If the size is negative, this function will always return false. This |
| // property can be useful in some cases. |
| UPB_INLINE bool upb_EpsCopyInputStream_CheckDataSizeAvailable( |
| upb_EpsCopyInputStream* e, const char* ptr, int size) { |
| return _upb_EpsCopyInputStream_CheckSizeAvailable(e, ptr, size, false); |
| } |
| |
| // Returns true if the given sub-message size is valid (it does not extend |
| // beyond any previously-pushed limited) *and* all of the data for this |
| // sub-message is available to be parsed in the current buffer. |
| // |
| // This implies that all fields from the sub-message can be parsed from the |
| // current buffer while maintaining the invariant that we always have at least |
| // kUpb_EpsCopyInputStream_SlopBytes of data available past the beginning of |
| // any individual field start. |
| // |
| // If the size is negative, this function will always return false. This |
| // property can be useful in some cases. |
| UPB_INLINE bool upb_EpsCopyInputStream_CheckSubMessageSizeAvailable( |
| upb_EpsCopyInputStream* e, const char* ptr, int size) { |
| return _upb_EpsCopyInputStream_CheckSizeAvailable(e, ptr, size, true); |
| } |
| |
| // Returns true if aliasing_enabled=true was passed to |
| // upb_EpsCopyInputStream_Init() when this stream was initialized. |
| UPB_INLINE bool upb_EpsCopyInputStream_AliasingEnabled( |
| upb_EpsCopyInputStream* e) { |
| return e->aliasing != kUpb_EpsCopyInputStream_NoAliasing; |
| } |
| |
| // Returns true if aliasing_enabled=true was passed to |
| // upb_EpsCopyInputStream_Init() when this stream was initialized *and* we can |
| // alias into the region [ptr, size] in an input buffer. |
| UPB_INLINE bool upb_EpsCopyInputStream_AliasingAvailable( |
| upb_EpsCopyInputStream* e, const char* ptr, size_t size) { |
| // When EpsCopyInputStream supports streaming, this will need to become a |
| // runtime check. |
| return upb_EpsCopyInputStream_CheckDataSizeAvailable(e, ptr, size) && |
| e->aliasing >= kUpb_EpsCopyInputStream_NoDelta; |
| } |
| |
| // Returns a pointer into an input buffer that corresponds to the parsing |
| // pointer `ptr`. The returned pointer may be the same as `ptr`, but also may |
| // be different if we are currently parsing out of the patch buffer. |
| // |
| // REQUIRES: Aliasing must be available for the given pointer. If the input is a |
| // flat buffer and aliasing is enabled, then aliasing will always be available. |
| UPB_INLINE const char* upb_EpsCopyInputStream_GetAliasedPtr( |
| upb_EpsCopyInputStream* e, const char* ptr) { |
| UPB_ASSUME(upb_EpsCopyInputStream_AliasingAvailable(e, ptr, 0)); |
| uintptr_t delta = |
| e->aliasing == kUpb_EpsCopyInputStream_NoDelta ? 0 : e->aliasing; |
| return (const char*)((uintptr_t)ptr + delta); |
| } |
| |
| // Reads string data from the input, aliasing into the input buffer instead of |
| // copying. The parsing pointer is passed in `*ptr`, and will be updated if |
| // necessary to point to the actual input buffer. Returns the new parsing |
| // pointer, which will be advanced past the string data. |
| // |
| // REQUIRES: Aliasing must be available for this data region (test with |
| // upb_EpsCopyInputStream_AliasingAvailable(). |
| UPB_INLINE const char* upb_EpsCopyInputStream_ReadStringAliased( |
| upb_EpsCopyInputStream* e, const char** ptr, size_t size) { |
| UPB_ASSUME(upb_EpsCopyInputStream_AliasingAvailable(e, *ptr, size)); |
| const char* ret = *ptr + size; |
| *ptr = upb_EpsCopyInputStream_GetAliasedPtr(e, *ptr); |
| UPB_ASSUME(ret != NULL); |
| return ret; |
| } |
| |
| // Skips `size` bytes of data from the input and returns a pointer past the end. |
| // Returns NULL on end of stream or error. |
| UPB_INLINE const char* upb_EpsCopyInputStream_Skip(upb_EpsCopyInputStream* e, |
| const char* ptr, int size) { |
| if (!upb_EpsCopyInputStream_CheckDataSizeAvailable(e, ptr, size)) return NULL; |
| return ptr + size; |
| } |
| |
| // Copies `size` bytes of data from the input `ptr` into the buffer `to`, and |
| // returns a pointer past the end. Returns NULL on end of stream or error. |
| UPB_INLINE const char* upb_EpsCopyInputStream_Copy(upb_EpsCopyInputStream* e, |
| const char* ptr, void* to, |
| int size) { |
| if (!upb_EpsCopyInputStream_CheckDataSizeAvailable(e, ptr, size)) return NULL; |
| memcpy(to, ptr, size); |
| return ptr + size; |
| } |
| |
| // Reads string data from the stream and advances the pointer accordingly. |
| // If aliasing was enabled when the stream was initialized, then the returned |
| // pointer will point into the input buffer if possible, otherwise new data |
| // will be allocated from arena and copied into. We may be forced to copy even |
| // if aliasing was enabled if the input data spans input buffers. |
| // |
| // Returns NULL if memory allocation failed, or we reached a premature EOF. |
| UPB_INLINE const char* upb_EpsCopyInputStream_ReadString( |
| upb_EpsCopyInputStream* e, const char** ptr, size_t size, |
| upb_Arena* arena) { |
| if (upb_EpsCopyInputStream_AliasingAvailable(e, *ptr, size)) { |
| return upb_EpsCopyInputStream_ReadStringAliased(e, ptr, size); |
| } else { |
| // We need to allocate and copy. |
| if (!upb_EpsCopyInputStream_CheckDataSizeAvailable(e, *ptr, size)) { |
| return NULL; |
| } |
| UPB_ASSERT(arena); |
| char* data = (char*)upb_Arena_Malloc(arena, size); |
| if (!data) return NULL; |
| const char* ret = upb_EpsCopyInputStream_Copy(e, *ptr, data, size); |
| *ptr = data; |
| return ret; |
| } |
| } |
| |
| UPB_INLINE void _upb_EpsCopyInputStream_CheckLimit(upb_EpsCopyInputStream* e) { |
| UPB_ASSERT(e->limit_ptr == e->end + UPB_MIN(0, e->limit)); |
| } |
| |
| // Pushes a limit onto the stack of limits for the current stream. The limit |
| // will extend for `size` bytes beyond the position in `ptr`. Future calls to |
| // upb_EpsCopyInputStream_IsDone() will return `true` when the stream position |
| // reaches this limit. |
| // |
| // Returns a delta that the caller must store and supply to PopLimit() below. |
| UPB_INLINE int upb_EpsCopyInputStream_PushLimit(upb_EpsCopyInputStream* e, |
| const char* ptr, int size) { |
| int limit = size + (int)(ptr - e->end); |
| int delta = e->limit - limit; |
| _upb_EpsCopyInputStream_CheckLimit(e); |
| UPB_ASSERT(limit <= e->limit); |
| e->limit = limit; |
| e->limit_ptr = e->end + UPB_MIN(0, limit); |
| _upb_EpsCopyInputStream_CheckLimit(e); |
| return delta; |
| } |
| |
| // Pops the last limit that was pushed on this stream. This may only be called |
| // once IsDone() returns true. The user must pass the delta that was returned |
| // from PushLimit(). |
| UPB_INLINE void upb_EpsCopyInputStream_PopLimit(upb_EpsCopyInputStream* e, |
| const char* ptr, |
| int saved_delta) { |
| UPB_ASSERT(ptr - e->end == e->limit); |
| _upb_EpsCopyInputStream_CheckLimit(e); |
| e->limit += saved_delta; |
| e->limit_ptr = e->end + UPB_MIN(0, e->limit); |
| _upb_EpsCopyInputStream_CheckLimit(e); |
| } |
| |
| UPB_INLINE const char* _upb_EpsCopyInputStream_IsDoneFallbackInline( |
| upb_EpsCopyInputStream* e, const char* ptr, int overrun, |
| upb_EpsCopyInputStream_BufferFlipCallback* callback) { |
| if (overrun < e->limit) { |
| // Need to copy remaining data into patch buffer. |
| UPB_ASSERT(overrun < kUpb_EpsCopyInputStream_SlopBytes); |
| const char* old_end = ptr; |
| const char* new_start = &e->patch[0] + overrun; |
| memset(e->patch + kUpb_EpsCopyInputStream_SlopBytes, 0, |
| kUpb_EpsCopyInputStream_SlopBytes); |
| memcpy(e->patch, e->end, kUpb_EpsCopyInputStream_SlopBytes); |
| ptr = new_start; |
| e->end = &e->patch[kUpb_EpsCopyInputStream_SlopBytes]; |
| e->limit -= kUpb_EpsCopyInputStream_SlopBytes; |
| e->limit_ptr = e->end + e->limit; |
| UPB_ASSERT(ptr < e->limit_ptr); |
| if (e->aliasing != kUpb_EpsCopyInputStream_NoAliasing) { |
| e->aliasing = (uintptr_t)old_end - (uintptr_t)new_start; |
| } |
| return callback(e, old_end, new_start); |
| } else { |
| UPB_ASSERT(overrun > e->limit); |
| e->error = true; |
| return callback(e, NULL, NULL); |
| } |
| } |
| |
| typedef const char* upb_EpsCopyInputStream_ParseDelimitedFunc( |
| upb_EpsCopyInputStream* e, const char* ptr, void* ctx); |
| |
| // Tries to perform a fast-path handling of the given delimited message data. |
| // If the sub-message beginning at `*ptr` and extending for `len` is short and |
| // fits within this buffer, calls `func` with `ctx` as a parameter, where the |
| // pushing and popping of limits is handled automatically and with lower cost |
| // than the normal PushLimit()/PopLimit() sequence. |
| UPB_FORCEINLINE bool upb_EpsCopyInputStream_TryParseDelimitedFast( |
| upb_EpsCopyInputStream* e, const char** ptr, int len, |
| upb_EpsCopyInputStream_ParseDelimitedFunc* func, void* ctx) { |
| if (!upb_EpsCopyInputStream_CheckSubMessageSizeAvailable(e, *ptr, len)) { |
| return false; |
| } |
| |
| // Fast case: Sub-message is <128 bytes and fits in the current buffer. |
| // This means we can preserve limit/limit_ptr verbatim. |
| const char* saved_limit_ptr = e->limit_ptr; |
| int saved_limit = e->limit; |
| e->limit_ptr = *ptr + len; |
| e->limit = e->limit_ptr - e->end; |
| UPB_ASSERT(e->limit_ptr == e->end + UPB_MIN(0, e->limit)); |
| *ptr = func(e, *ptr, ctx); |
| e->limit_ptr = saved_limit_ptr; |
| e->limit = saved_limit; |
| UPB_ASSERT(e->limit_ptr == e->end + UPB_MIN(0, e->limit)); |
| return true; |
| } |
| |
| #ifdef __cplusplus |
| } /* extern "C" */ |
| #endif |
| |
| #include "upb/port/undef.inc" |
| |
| #endif // UPB_WIRE_EPS_COPY_INPUT_STREAM_H_ |