blob: 2307f364a7c72598d26b687a7205d3416acfe8ad [file] [log] [blame]
// Protocol Buffers - Google's data interchange format
// Copyright 2023 Google LLC. All rights reserved.
//
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file or at
// https://developers.google.com/open-source/licenses/bsd
#ifndef UPB_WIRE_EPS_COPY_INPUT_STREAM_H_
#define UPB_WIRE_EPS_COPY_INPUT_STREAM_H_
#include <string.h>
#include "upb/mem/arena.h"
// Must be last.
#include "upb/port/def.inc"
#ifdef __cplusplus
extern "C" {
#endif
// The maximum number of bytes a single protobuf field can take up in the
// wire format. We only want to do one bounds check per field, so the input
// stream guarantees that after upb_EpsCopyInputStream_IsDone() is called,
// the decoder can read this many bytes without performing another bounds
// check. The stream will copy into a patch buffer as necessary to guarantee
// this invariant.
#define kUpb_EpsCopyInputStream_SlopBytes 16
typedef struct {
const char* end; // Can read up to SlopBytes bytes beyond this.
const char* limit_ptr; // For bounds checks, = end + UPB_MIN(limit, 0)
uintptr_t input_delta; // Diff between the original input pointer and patch
int limit; // Submessage limit relative to end
bool error; // To distinguish between EOF and error.
bool aliasing;
char patch[kUpb_EpsCopyInputStream_SlopBytes * 2];
} upb_EpsCopyInputStream;
// Returns true if the stream is in the error state. A stream enters the error
// state when the user reads past a limit (caught in IsDone()) or the
// ZeroCopyInputStream returns an error.
UPB_INLINE bool upb_EpsCopyInputStream_IsError(upb_EpsCopyInputStream* e) {
return e->error;
}
typedef const char* upb_EpsCopyInputStream_BufferFlipCallback(
upb_EpsCopyInputStream* e, const char* old_end, const char* new_start);
typedef const char* upb_EpsCopyInputStream_IsDoneFallbackFunc(
upb_EpsCopyInputStream* e, const char* ptr, int overrun);
// Initializes a upb_EpsCopyInputStream using the contents of the buffer
// [*ptr, size]. Updates `*ptr` as necessary to guarantee that at least
// kUpb_EpsCopyInputStream_SlopBytes are available to read.
UPB_INLINE void upb_EpsCopyInputStream_Init(upb_EpsCopyInputStream* e,
const char** ptr, size_t size,
bool enable_aliasing) {
if (size <= kUpb_EpsCopyInputStream_SlopBytes) {
memset(&e->patch, 0, 32);
if (size) memcpy(&e->patch, *ptr, size);
e->input_delta = (uintptr_t)*ptr - (uintptr_t)e->patch;
*ptr = e->patch;
e->end = *ptr + size;
e->limit = 0;
} else {
e->end = *ptr + size - kUpb_EpsCopyInputStream_SlopBytes;
e->limit = kUpb_EpsCopyInputStream_SlopBytes;
e->input_delta = 0;
}
e->aliasing = enable_aliasing;
e->limit_ptr = e->end;
e->error = false;
}
typedef enum {
// The current stream position is at a limit.
kUpb_IsDoneStatus_Done,
// The current stream position is not at a limit.
kUpb_IsDoneStatus_NotDone,
// The current stream position is not at a limit, and the stream needs to
// be flipped to a new buffer before more data can be read.
kUpb_IsDoneStatus_NeedFallback,
} upb_IsDoneStatus;
// Returns the status of the current stream position. This is a low-level
// function, it is simpler to call upb_EpsCopyInputStream_IsDone() if possible.
UPB_INLINE upb_IsDoneStatus upb_EpsCopyInputStream_IsDoneStatus(
upb_EpsCopyInputStream* e, const char* ptr, int* overrun) {
*overrun = ptr - e->end;
if (UPB_LIKELY(ptr < e->limit_ptr)) {
return kUpb_IsDoneStatus_NotDone;
} else if (UPB_LIKELY(*overrun == e->limit)) {
return kUpb_IsDoneStatus_Done;
} else {
return kUpb_IsDoneStatus_NeedFallback;
}
}
// Returns true if the stream has hit a limit, either the current delimited
// limit or the overall end-of-stream. As a side effect, this function may flip
// the pointer to a new buffer if there are less than
// kUpb_EpsCopyInputStream_SlopBytes of data to be read in the current buffer.
//
// Postcondition: if the function returns false, there are at least
// kUpb_EpsCopyInputStream_SlopBytes of data available to read at *ptr.
UPB_INLINE bool upb_EpsCopyInputStream_IsDoneWithCallback(
upb_EpsCopyInputStream* e, const char** ptr,
upb_EpsCopyInputStream_IsDoneFallbackFunc* func) {
int overrun;
switch (upb_EpsCopyInputStream_IsDoneStatus(e, *ptr, &overrun)) {
case kUpb_IsDoneStatus_Done:
return true;
case kUpb_IsDoneStatus_NotDone:
return false;
case kUpb_IsDoneStatus_NeedFallback:
*ptr = func(e, *ptr, overrun);
return *ptr == NULL;
}
UPB_UNREACHABLE();
}
const char* _upb_EpsCopyInputStream_IsDoneFallbackNoCallback(
upb_EpsCopyInputStream* e, const char* ptr, int overrun);
// A simpler version of IsDoneWithCallback() that does not support a buffer flip
// callback. Useful in cases where we do not need to insert custom logic at
// every buffer flip.
//
// If this returns true, the user must call upb_EpsCopyInputStream_IsError()
// to distinguish between EOF and error.
UPB_INLINE bool upb_EpsCopyInputStream_IsDone(upb_EpsCopyInputStream* e,
const char** ptr) {
return upb_EpsCopyInputStream_IsDoneWithCallback(
e, ptr, _upb_EpsCopyInputStream_IsDoneFallbackNoCallback);
}
// Returns the total number of bytes that are safe to read from the current
// buffer without reading uninitialized or unallocated memory.
//
// Note that this check does not respect any semantic limits on the stream,
// either limits from PushLimit() or the overall stream end, so some of these
// bytes may have unpredictable, nonsense values in them. The guarantee is only
// that the bytes are valid to read from the perspective of the C language
// (ie. you can read without triggering UBSAN or ASAN).
UPB_INLINE size_t upb_EpsCopyInputStream_BytesAvailable(
upb_EpsCopyInputStream* e, const char* ptr) {
return (e->end - ptr) + kUpb_EpsCopyInputStream_SlopBytes;
}
// Returns true if the given delimited field size is valid (it does not extend
// beyond any previously-pushed limits). `ptr` should point to the beginning
// of the field data, after the delimited size.
//
// Note that this does *not* guarantee that all of the data for this field is in
// the current buffer.
UPB_INLINE bool upb_EpsCopyInputStream_CheckSize(
const upb_EpsCopyInputStream* e, const char* ptr, int size) {
UPB_ASSERT(size >= 0);
return ptr - e->end + size <= e->limit;
}
UPB_INLINE bool _upb_EpsCopyInputStream_CheckSizeAvailable(
upb_EpsCopyInputStream* e, const char* ptr, int size, bool submessage) {
// This is one extra branch compared to the more normal:
// return (size_t)(end - ptr) < size;
// However it is one less computation if we are just about to use "ptr + len":
// https://godbolt.org/z/35YGPz
// In microbenchmarks this shows a small improvement.
uintptr_t uptr = (uintptr_t)ptr;
uintptr_t uend = (uintptr_t)e->limit_ptr;
uintptr_t res = uptr + (size_t)size;
if (!submessage) uend += kUpb_EpsCopyInputStream_SlopBytes;
// NOTE: this check depends on having a linear address space. This is not
// technically guaranteed by uintptr_t.
bool ret = res >= uptr && res <= uend;
if (size < 0) UPB_ASSERT(!ret);
return ret;
}
// Returns true if the given delimited field size is valid (it does not extend
// beyond any previously-pushed limited) *and* all of the data for this field is
// available to be read in the current buffer.
//
// If the size is negative, this function will always return false. This
// property can be useful in some cases.
UPB_INLINE bool upb_EpsCopyInputStream_CheckDataSizeAvailable(
upb_EpsCopyInputStream* e, const char* ptr, int size) {
return _upb_EpsCopyInputStream_CheckSizeAvailable(e, ptr, size, false);
}
// Returns true if the given sub-message size is valid (it does not extend
// beyond any previously-pushed limited) *and* all of the data for this
// sub-message is available to be parsed in the current buffer.
//
// This implies that all fields from the sub-message can be parsed from the
// current buffer while maintaining the invariant that we always have at least
// kUpb_EpsCopyInputStream_SlopBytes of data available past the beginning of
// any individual field start.
//
// If the size is negative, this function will always return false. This
// property can be useful in some cases.
UPB_INLINE bool upb_EpsCopyInputStream_CheckSubMessageSizeAvailable(
upb_EpsCopyInputStream* e, const char* ptr, int size) {
return _upb_EpsCopyInputStream_CheckSizeAvailable(e, ptr, size, true);
}
// Returns true if aliasing_enabled=true was passed to
// upb_EpsCopyInputStream_Init() when this stream was initialized.
UPB_INLINE bool upb_EpsCopyInputStream_AliasingEnabled(
upb_EpsCopyInputStream* e) {
return e->aliasing;
}
// Returns true if aliasing_enabled=true was passed to
// upb_EpsCopyInputStream_Init() when this stream was initialized *and* we can
// alias into the region [ptr, size] in an input buffer.
UPB_INLINE bool upb_EpsCopyInputStream_AliasingAvailable(
upb_EpsCopyInputStream* e, const char* ptr, size_t size) {
// When EpsCopyInputStream supports streaming, this will need to become a
// runtime check.
return e->aliasing &&
upb_EpsCopyInputStream_CheckDataSizeAvailable(e, ptr, size);
}
// Returns a pointer into an input buffer that corresponds to the parsing
// pointer `ptr`. The returned pointer may be the same as `ptr`, but also may
// be different if we are currently parsing out of the patch buffer.
UPB_INLINE const char* upb_EpsCopyInputStream_GetInputPtr(
upb_EpsCopyInputStream* e, const char* ptr) {
return (const char*)(((uintptr_t)ptr) + e->input_delta);
}
// Returns a pointer into an input buffer that corresponds to the parsing
// pointer `ptr`. The returned pointer may be the same as `ptr`, but also may
// be different if we are currently parsing out of the patch buffer.
//
// REQUIRES: Aliasing must be available for the given pointer. If the input is a
// flat buffer and aliasing is enabled, then aliasing will always be available.
UPB_INLINE const char* upb_EpsCopyInputStream_GetAliasedPtr(
upb_EpsCopyInputStream* e, const char* ptr) {
UPB_ASSUME(upb_EpsCopyInputStream_AliasingAvailable(e, ptr, 0));
return upb_EpsCopyInputStream_GetInputPtr(e, ptr);
}
// Reads string data from the input, aliasing into the input buffer instead of
// copying. The parsing pointer is passed in `*ptr`, and will be updated if
// necessary to point to the actual input buffer. Returns the new parsing
// pointer, which will be advanced past the string data.
//
// REQUIRES: Aliasing must be available for this data region (test with
// upb_EpsCopyInputStream_AliasingAvailable().
UPB_INLINE const char* upb_EpsCopyInputStream_ReadStringAliased(
upb_EpsCopyInputStream* e, const char** ptr, size_t size) {
UPB_ASSUME(upb_EpsCopyInputStream_AliasingAvailable(e, *ptr, size));
const char* ret = *ptr + size;
*ptr = upb_EpsCopyInputStream_GetAliasedPtr(e, *ptr);
UPB_ASSUME(ret != NULL);
return ret;
}
// Skips `size` bytes of data from the input and returns a pointer past the end.
// Returns NULL on end of stream or error.
UPB_INLINE const char* upb_EpsCopyInputStream_Skip(upb_EpsCopyInputStream* e,
const char* ptr, int size) {
if (!upb_EpsCopyInputStream_CheckDataSizeAvailable(e, ptr, size)) return NULL;
return ptr + size;
}
// Copies `size` bytes of data from the input `ptr` into the buffer `to`, and
// returns a pointer past the end. Returns NULL on end of stream or error.
UPB_INLINE const char* upb_EpsCopyInputStream_Copy(upb_EpsCopyInputStream* e,
const char* ptr, void* to,
int size) {
if (!upb_EpsCopyInputStream_CheckDataSizeAvailable(e, ptr, size)) return NULL;
memcpy(to, ptr, size);
return ptr + size;
}
// Reads string data from the stream and advances the pointer accordingly.
// If aliasing was enabled when the stream was initialized, then the returned
// pointer will point into the input buffer if possible, otherwise new data
// will be allocated from arena and copied into. We may be forced to copy even
// if aliasing was enabled if the input data spans input buffers.
//
// Returns NULL if memory allocation failed, or we reached a premature EOF.
UPB_INLINE const char* upb_EpsCopyInputStream_ReadString(
upb_EpsCopyInputStream* e, const char** ptr, size_t size,
upb_Arena* arena) {
if (upb_EpsCopyInputStream_AliasingAvailable(e, *ptr, size)) {
return upb_EpsCopyInputStream_ReadStringAliased(e, ptr, size);
} else {
// We need to allocate and copy.
if (!upb_EpsCopyInputStream_CheckDataSizeAvailable(e, *ptr, size)) {
return NULL;
}
UPB_ASSERT(arena);
char* data = (char*)upb_Arena_Malloc(arena, size);
if (!data) return NULL;
const char* ret = upb_EpsCopyInputStream_Copy(e, *ptr, data, size);
*ptr = data;
return ret;
}
}
UPB_INLINE void _upb_EpsCopyInputStream_CheckLimit(upb_EpsCopyInputStream* e) {
UPB_ASSERT(e->limit_ptr == e->end + UPB_MIN(0, e->limit));
}
// Pushes a limit onto the stack of limits for the current stream. The limit
// will extend for `size` bytes beyond the position in `ptr`. Future calls to
// upb_EpsCopyInputStream_IsDone() will return `true` when the stream position
// reaches this limit.
//
// Returns a delta that the caller must store and supply to PopLimit() below.
UPB_INLINE int upb_EpsCopyInputStream_PushLimit(upb_EpsCopyInputStream* e,
const char* ptr, int size) {
int limit = size + (int)(ptr - e->end);
int delta = e->limit - limit;
_upb_EpsCopyInputStream_CheckLimit(e);
UPB_ASSERT(limit <= e->limit);
e->limit = limit;
e->limit_ptr = e->end + UPB_MIN(0, limit);
_upb_EpsCopyInputStream_CheckLimit(e);
return delta;
}
// Pops the last limit that was pushed on this stream. This may only be called
// once IsDone() returns true. The user must pass the delta that was returned
// from PushLimit().
UPB_INLINE void upb_EpsCopyInputStream_PopLimit(upb_EpsCopyInputStream* e,
const char* ptr,
int saved_delta) {
UPB_ASSERT(ptr - e->end == e->limit);
_upb_EpsCopyInputStream_CheckLimit(e);
e->limit += saved_delta;
e->limit_ptr = e->end + UPB_MIN(0, e->limit);
_upb_EpsCopyInputStream_CheckLimit(e);
}
UPB_INLINE const char* _upb_EpsCopyInputStream_IsDoneFallbackInline(
upb_EpsCopyInputStream* e, const char* ptr, int overrun,
upb_EpsCopyInputStream_BufferFlipCallback* callback) {
if (overrun < e->limit) {
// Need to copy remaining data into patch buffer.
UPB_ASSERT(overrun < kUpb_EpsCopyInputStream_SlopBytes);
const char* old_end = ptr;
const char* new_start = &e->patch[0] + overrun;
memset(e->patch + kUpb_EpsCopyInputStream_SlopBytes, 0,
kUpb_EpsCopyInputStream_SlopBytes);
memcpy(e->patch, e->end, kUpb_EpsCopyInputStream_SlopBytes);
ptr = new_start;
e->end = &e->patch[kUpb_EpsCopyInputStream_SlopBytes];
e->limit -= kUpb_EpsCopyInputStream_SlopBytes;
e->limit_ptr = e->end + e->limit;
UPB_ASSERT(ptr < e->limit_ptr);
e->input_delta = (uintptr_t)old_end - (uintptr_t)new_start;
return callback(e, old_end, new_start);
} else {
UPB_ASSERT(overrun > e->limit);
e->error = true;
return callback(e, NULL, NULL);
}
}
typedef const char* upb_EpsCopyInputStream_ParseDelimitedFunc(
upb_EpsCopyInputStream* e, const char* ptr, void* ctx);
// Tries to perform a fast-path handling of the given delimited message data.
// If the sub-message beginning at `*ptr` and extending for `len` is short and
// fits within this buffer, calls `func` with `ctx` as a parameter, where the
// pushing and popping of limits is handled automatically and with lower cost
// than the normal PushLimit()/PopLimit() sequence.
UPB_FORCEINLINE bool upb_EpsCopyInputStream_TryParseDelimitedFast(
upb_EpsCopyInputStream* e, const char** ptr, int len,
upb_EpsCopyInputStream_ParseDelimitedFunc* func, void* ctx) {
if (!upb_EpsCopyInputStream_CheckSubMessageSizeAvailable(e, *ptr, len)) {
return false;
}
// Fast case: Sub-message is <128 bytes and fits in the current buffer.
// This means we can preserve limit/limit_ptr verbatim.
const char* saved_limit_ptr = e->limit_ptr;
int saved_limit = e->limit;
e->limit_ptr = *ptr + len;
e->limit = e->limit_ptr - e->end;
UPB_ASSERT(e->limit_ptr == e->end + UPB_MIN(0, e->limit));
*ptr = func(e, *ptr, ctx);
e->limit_ptr = saved_limit_ptr;
e->limit = saved_limit;
UPB_ASSERT(e->limit_ptr == e->end + UPB_MIN(0, e->limit));
return true;
}
#ifdef __cplusplus
} /* extern "C" */
#endif
#include "upb/port/undef.inc"
#endif // UPB_WIRE_EPS_COPY_INPUT_STREAM_H_