blob: f8bc2401f0fee1e4a54b6221b6c7746ee62d6a5f [file] [log] [blame]
// Protocol Buffers - Google's data interchange format
// Copyright 2025 Google LLC. All rights reserved.
//
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file or at
// https://developers.google.com/open-source/licenses/bsd
#ifndef UPB_WIRE_INTERNAL_EPS_COPY_INPUT_STREAM_H_
#define UPB_WIRE_INTERNAL_EPS_COPY_INPUT_STREAM_H_
#include <stddef.h>
#include <stdint.h>
#include <string.h>
#include "upb/base/error_handler.h"
#include "upb/base/string_view.h"
// Must be last.
#include "upb/port/def.inc"
#ifdef __cplusplus
extern "C" {
#endif
// The maximum number of bytes a single protobuf field can take up in the
// wire format. We only want to do one bounds check per field, so the input
// stream guarantees that after upb_EpsCopyInputStream_IsDone() is called,
// the decoder can read this many bytes without performing another bounds
// check. The stream will copy into a patch buffer as necessary to guarantee
// this invariant. Since tags can only be up to 5 bytes, and a max-length scalar
// field can be 10 bytes, only 15 is required; but sizing up to 16 permits more
// efficient fixed size copies.
#define kUpb_EpsCopyInputStream_SlopBytes 16
struct upb_EpsCopyInputStream {
const char* end; // Can read up to SlopBytes bytes beyond this.
const char* limit_ptr; // For bounds checks, = end + UPB_MIN(limit, 0)
uintptr_t input_delta; // Diff between the original input pointer and patch
const char* buffer_start; // Pointer to the original input buffer
const char* capture_start; // If non-NULL, the start of the captured region.
ptrdiff_t limit; // Submessage limit relative to end
upb_ErrorHandler* err; // Error handler to use when things go wrong.
bool error; // To distinguish between EOF and error.
#ifndef NDEBUG
int guaranteed_bytes;
#endif
// Allocate double the size of what's required; this permits a fixed-size copy
// from the input buffer, regardless of how many bytes actually remain in the
// input buffer.
char patch[kUpb_EpsCopyInputStream_SlopBytes * 2];
};
UPB_INLINE void UPB_PRIVATE(upb_EpsCopyInputStream_BoundsChecked)(
struct upb_EpsCopyInputStream* e);
UPB_INLINE bool upb_EpsCopyInputStream_IsError(
struct upb_EpsCopyInputStream* e) {
return e->error;
}
UPB_INLINE void upb_EpsCopyInputStream_InitWithErrorHandler(
struct upb_EpsCopyInputStream* e, const char** ptr, size_t size,
upb_ErrorHandler* err) {
e->buffer_start = *ptr;
e->capture_start = NULL;
e->err = err;
if (size <= kUpb_EpsCopyInputStream_SlopBytes) {
memset(&e->patch, 0, 32);
if (size) memcpy(&e->patch, *ptr, size);
e->input_delta = (uintptr_t)*ptr - (uintptr_t)e->patch;
*ptr = e->patch;
e->end = *ptr + size;
e->limit = 0;
} else {
e->end = *ptr + size - kUpb_EpsCopyInputStream_SlopBytes;
e->limit = kUpb_EpsCopyInputStream_SlopBytes;
e->input_delta = 0;
}
e->limit_ptr = e->end;
e->error = false;
UPB_PRIVATE(upb_EpsCopyInputStream_BoundsChecked)(e);
}
UPB_INLINE void upb_EpsCopyInputStream_Init(struct upb_EpsCopyInputStream* e,
const char** ptr, size_t size) {
upb_EpsCopyInputStream_InitWithErrorHandler(e, ptr, size, NULL);
}
UPB_ATTR_CONST
UPB_INLINE bool upb_EpsCopyInputStream_HasErrorHandler(
const struct upb_EpsCopyInputStream* e) {
return e && e->err != NULL;
}
// Call this function to signal an error. If an error handler is set, it will be
// called and the function will never return. Otherwise, returns NULL to
// indicate an error.
const char* UPB_PRIVATE(upb_EpsCopyInputStream_ReturnError)(
struct upb_EpsCopyInputStream* e);
UPB_INLINE const char* UPB_PRIVATE(upb_EpsCopyInputStream_AssumeResult)(
struct upb_EpsCopyInputStream* e, const char* ptr) {
UPB_MAYBE_ASSUME(upb_EpsCopyInputStream_HasErrorHandler(e), ptr != NULL);
return ptr;
}
////////////////////////////////////////////////////////////////////////////////
// Debug checks that attempt to ensure that no code paths will overrun the slop
// bytes even in the worst case. Since we are frequently parsing varints, it's
// possible that the user is trying to parse too many varints before calling
// upb_EpsCopyInputStream_IsDone(), but this error case is not detected because
// the varints are short. These checks ensure that will not overrun the slop
// bytes, even if each varint is its maximum possible length.
UPB_INLINE void UPB_PRIVATE(upb_EpsCopyInputStream_BoundsChecked)(
struct upb_EpsCopyInputStream* e) {
#ifndef NDEBUG
e->guaranteed_bytes = kUpb_EpsCopyInputStream_SlopBytes;
#endif
}
UPB_INLINE void UPB_PRIVATE(upb_EpsCopyInputStream_BoundsHit)(
struct upb_EpsCopyInputStream* e) {
#ifndef NDEBUG
e->guaranteed_bytes = 0;
#endif
}
// Signals the maximum number that the operation about to be performed may
// consume.
UPB_INLINE void UPB_PRIVATE(upb_EpsCopyInputStream_ConsumeBytes)(
struct upb_EpsCopyInputStream* e, int n) {
#ifndef NDEBUG
if (e) {
UPB_ASSERT(e->guaranteed_bytes >= n);
e->guaranteed_bytes -= n;
}
#endif
}
////////////////////////////////////////////////////////////////////////////////
typedef enum {
// The current stream position is at a limit.
kUpb_IsDoneStatus_Done,
// The current stream position is not at a limit.
kUpb_IsDoneStatus_NotDone,
// The current stream position is not at a limit, and the stream needs to
// be flipped to a new buffer before more data can be read.
kUpb_IsDoneStatus_NeedFallback,
} upb_IsDoneStatus;
// Returns the status of the current stream position. This is a low-level
// function, it is simpler to call upb_EpsCopyInputStream_IsDone() if possible.
UPB_INLINE upb_IsDoneStatus UPB_PRIVATE(upb_EpsCopyInputStream_IsDoneStatus)(
struct upb_EpsCopyInputStream* e, const char* ptr, int* overrun) {
*overrun = ptr - e->end;
if (UPB_LIKELY(ptr < e->limit_ptr)) {
UPB_PRIVATE(upb_EpsCopyInputStream_BoundsChecked)(e);
return kUpb_IsDoneStatus_NotDone;
} else if (UPB_LIKELY(*overrun == e->limit)) {
UPB_PRIVATE(upb_EpsCopyInputStream_BoundsHit)(e);
return kUpb_IsDoneStatus_Done;
} else {
UPB_PRIVATE(upb_EpsCopyInputStream_BoundsHit)(e);
return kUpb_IsDoneStatus_NeedFallback;
}
}
const char* UPB_PRIVATE(upb_EpsCopyInputStream_IsDoneFallback)(
struct upb_EpsCopyInputStream* e, const char* ptr, int overrun);
UPB_INLINE bool upb_EpsCopyInputStream_IsDone(struct upb_EpsCopyInputStream* e,
const char** ptr) {
int overrun;
switch (UPB_PRIVATE(upb_EpsCopyInputStream_IsDoneStatus)(e, *ptr, &overrun)) {
case kUpb_IsDoneStatus_Done:
UPB_PRIVATE(upb_EpsCopyInputStream_BoundsHit)(e);
return true;
case kUpb_IsDoneStatus_NotDone:
UPB_PRIVATE(upb_EpsCopyInputStream_BoundsChecked)(e);
return false;
case kUpb_IsDoneStatus_NeedFallback:
*ptr =
UPB_PRIVATE(upb_EpsCopyInputStream_IsDoneFallback)(e, *ptr, overrun);
if (*ptr) {
UPB_PRIVATE(upb_EpsCopyInputStream_BoundsChecked)(e);
} else {
UPB_PRIVATE(upb_EpsCopyInputStream_BoundsHit)(e);
}
return *ptr == NULL;
}
UPB_UNREACHABLE();
}
UPB_INLINE bool upb_EpsCopyInputStream_CheckSize(
const struct upb_EpsCopyInputStream* e, const char* ptr, int size) {
UPB_ASSERT(size >= 0);
return size <= e->limit - (ptr - e->end);
}
// Returns a pointer into an input buffer that corresponds to the parsing
// pointer `ptr`. The returned pointer may be the same as `ptr`, but also may
// be different if we are currently parsing out of the patch buffer.
UPB_INLINE const char* UPB_PRIVATE(upb_EpsCopyInputStream_GetInputPtr)(
struct upb_EpsCopyInputStream* e, const char* ptr) {
// This somewhat silly looking add-and-subtract behavior provides provenance
// from the original input buffer's pointer. After optimization it produces
// the same assembly as just casting `(uintptr_t)ptr+input_delta`
// https://godbolt.org/z/zosG88oPn
size_t position =
(uintptr_t)ptr + e->input_delta - (uintptr_t)e->buffer_start;
return e->buffer_start + position;
}
UPB_INLINE void upb_EpsCopyInputStream_StartCapture(
struct upb_EpsCopyInputStream* e, const char* ptr) {
UPB_ASSERT(e->capture_start == NULL);
e->capture_start = UPB_PRIVATE(upb_EpsCopyInputStream_GetInputPtr)(e, ptr);
}
UPB_INLINE bool upb_EpsCopyInputStream_EndCapture(
struct upb_EpsCopyInputStream* e, const char* ptr, upb_StringView* sv) {
UPB_ASSERT(e->capture_start != NULL);
if (ptr - e->end > e->limit) {
return UPB_PRIVATE(upb_EpsCopyInputStream_ReturnError)(e);
}
const char* end = UPB_PRIVATE(upb_EpsCopyInputStream_GetInputPtr)(e, ptr);
sv->data = e->capture_start;
sv->size = end - sv->data;
e->capture_start = NULL;
return true;
}
UPB_INLINE const char* upb_EpsCopyInputStream_ReadStringAlwaysAlias(
struct upb_EpsCopyInputStream* e, const char* ptr, size_t size,
upb_StringView* sv) {
UPB_ASSERT(size <= PTRDIFF_MAX);
// The `size` must be within the input buffer. If `ptr` is in the input
// buffer, then using the slop bytes is fine (because they are real bytes from
// the tail of the input buffer). If `ptr` is in the patch buffer, then slop
// bytes represent bytes that do not actually exist in the original input
// buffer, so we must fail if the size extends into the slop bytes.
const char* limit =
e->end + (e->input_delta == 0) * kUpb_EpsCopyInputStream_SlopBytes;
if ((ptrdiff_t)size > limit - ptr) {
// For the moment, we consider this an error. In a multi-buffer world,
// it could be that the requested string extends into the next buffer, which
// is not an error and should be recoverable.
return UPB_PRIVATE(upb_EpsCopyInputStream_ReturnError)(e);
}
const char* input = UPB_PRIVATE(upb_EpsCopyInputStream_GetInputPtr)(e, ptr);
*sv = upb_StringView_FromDataAndSize(input, size);
return ptr + size;
}
UPB_INLINE const char* upb_EpsCopyInputStream_ReadStringEphemeral(
struct upb_EpsCopyInputStream* e, const char* ptr, size_t size,
upb_StringView* sv) {
UPB_ASSERT(size <= PTRDIFF_MAX);
// Size must be within the current buffer (including slop bytes).
const char* limit = e->end + kUpb_EpsCopyInputStream_SlopBytes;
if ((ptrdiff_t)size > limit - ptr) {
// For the moment, we consider this an error. In a multi-buffer world,
// it could be that the requested string extends into the next buffer, which
// is not an error and should be recoverable.
return UPB_PRIVATE(upb_EpsCopyInputStream_ReturnError)(e);
}
*sv = upb_StringView_FromDataAndSize(ptr, size);
return ptr + size;
}
UPB_INLINE void UPB_PRIVATE(upb_EpsCopyInputStream_CheckLimit)(
struct upb_EpsCopyInputStream* e) {
UPB_ASSERT(e->limit_ptr == e->end + UPB_MIN(0, e->limit));
}
UPB_INLINE ptrdiff_t upb_EpsCopyInputStream_PushLimit(
struct upb_EpsCopyInputStream* e, const char* ptr, size_t size) {
UPB_ASSERT(size <= PTRDIFF_MAX);
ptrdiff_t limit = (ptrdiff_t)size + (ptr - e->end);
ptrdiff_t delta = e->limit - limit;
UPB_PRIVATE(upb_EpsCopyInputStream_CheckLimit)(e);
e->limit = limit;
e->limit_ptr = e->end + UPB_MIN(0, limit);
UPB_PRIVATE(upb_EpsCopyInputStream_CheckLimit)(e);
if (UPB_UNLIKELY(delta < 0)) {
UPB_PRIVATE(upb_EpsCopyInputStream_ReturnError)(e);
}
return delta;
}
// Pops the last limit that was pushed on this stream. This may only be called
// once IsDone() returns true. The user must pass the delta that was returned
// from PushLimit().
UPB_INLINE void upb_EpsCopyInputStream_PopLimit(
struct upb_EpsCopyInputStream* e, const char* ptr, ptrdiff_t saved_delta) {
UPB_ASSERT(ptr - e->end == e->limit);
UPB_PRIVATE(upb_EpsCopyInputStream_CheckLimit)(e);
e->limit += saved_delta;
e->limit_ptr = e->end + UPB_MIN(0, e->limit);
UPB_PRIVATE(upb_EpsCopyInputStream_CheckLimit)(e);
}
typedef const char* upb_EpsCopyInputStream_ParseDelimitedFunc(
struct upb_EpsCopyInputStream* e, const char* ptr, int size, void* ctx);
UPB_FORCEINLINE bool upb_EpsCopyInputStream_TryParseDelimitedFast(
struct upb_EpsCopyInputStream* e, const char** ptr, size_t size,
upb_EpsCopyInputStream_ParseDelimitedFunc* func, void* ctx) {
UPB_ASSERT(size <= PTRDIFF_MAX);
if ((ptrdiff_t)size > e->limit_ptr - *ptr) {
return false;
}
// Fast case: Sub-message is <128 bytes and fits in the current buffer.
// This means we can preserve limit/limit_ptr verbatim.
const char* saved_limit_ptr = e->limit_ptr;
int saved_limit = e->limit;
e->limit_ptr = *ptr + size;
e->limit = e->limit_ptr - e->end;
UPB_ASSERT(e->limit_ptr == e->end + UPB_MIN(0, e->limit));
*ptr = func(e, *ptr, size, ctx);
e->limit_ptr = saved_limit_ptr;
e->limit = saved_limit;
UPB_ASSERT(e->limit_ptr == e->end + UPB_MIN(0, e->limit));
return true;
}
#ifdef __cplusplus
} /* extern "C" */
#endif
#include "upb/port/undef.inc"
#endif // UPB_WIRE_INTERNAL_EPS_COPY_INPUT_STREAM_H_