Auto-generate files after cl/697008785
diff --git a/php/ext/google/protobuf/php-upb.c b/php/ext/google/protobuf/php-upb.c
index d320460..bbe1408 100644
--- a/php/ext/google/protobuf/php-upb.c
+++ b/php/ext/google/protobuf/php-upb.c
@@ -6331,7 +6331,7 @@
const uint32_t tag =
((uint32_t)field->UPB_PRIVATE(number) << 3) | kUpb_WireType_Varint;
upb_Message* unknown_msg =
- field->UPB_PRIVATE(mode) & kUpb_LabelFlags_IsExtension ? d->unknown_msg
+ field->UPB_PRIVATE(mode) & kUpb_LabelFlags_IsExtension ? d->original_msg
: msg;
char buf[2 * kUpb_Decoder_EncodeVarint32MaxSize];
char* end = buf;
@@ -7118,7 +7118,7 @@
if (UPB_UNLIKELY(!ext)) {
_upb_Decoder_ErrorJmp(d, kUpb_DecodeStatus_OutOfMemory);
}
- d->unknown_msg = msg;
+ d->original_msg = msg;
msg = (upb_Message*)&ext->data;
if (upb_MiniTableField_IsSubMessage(&ext->ext->UPB_PRIVATE(field))) {
ext_sub.UPB_PRIVATE(submsg) =
@@ -7163,7 +7163,8 @@
// Since unknown fields are the uncommon case, we do a little extra work here
// to walk backwards through the buffer to find the field start. This frees
// up a register in the fast paths (when the field is known), which leads to
- // significant speedups in benchmarks.
+ // significant speedups in benchmarks. Note that ptr may point into the slop
+ // space, beyond the normal end of the input buffer.
const char* start = ptr;
if (wire_type == kUpb_WireType_Delimited) ptr += val.size;
@@ -7189,15 +7190,20 @@
start = _upb_Decoder_ReverseSkipVarint(start, tag);
assert(start == d->debug_tagstart);
+ const char* input_start =
+ upb_EpsCopyInputStream_GetInputPtr(&d->input, start);
if (wire_type == kUpb_WireType_StartGroup) {
- d->unknown = start;
- d->unknown_msg = msg;
ptr = _upb_Decoder_DecodeUnknownGroup(d, ptr, field_number);
- start = d->unknown;
- d->unknown = NULL;
}
- if (!UPB_PRIVATE(_upb_Message_AddUnknown)(msg, start, ptr - start,
- &d->arena)) {
+ // Normally, bounds checks for fixed or varint fields are performed after
+ // the field is parsed; it's OK for the field to overrun the end of the
+ // buffer, because it'll just read into slop space. However, because this
+ // path reads bytes from the input buffer rather than the patch buffer,
+ // bounds checks are needed before adding the unknown field.
+ _upb_Decoder_IsDone(d, &ptr);
+ const char* input_ptr = upb_EpsCopyInputStream_GetInputPtr(&d->input, ptr);
+ if (!UPB_PRIVATE(_upb_Message_AddUnknown)(
+ msg, input_start, input_ptr - input_start, &d->arena)) {
_upb_Decoder_ErrorJmp(d, kUpb_DecodeStatus_OutOfMemory);
}
} else if (wire_type == kUpb_WireType_StartGroup) {
@@ -7330,7 +7336,6 @@
options & kUpb_DecodeOption_AliasString);
decoder.extreg = extreg;
- decoder.unknown = NULL;
decoder.depth = depth ? depth : kUpb_WireFormat_DefaultDepthLimit;
decoder.end_group = DECODE_NOGROUP;
decoder.options = (uint16_t)options;
diff --git a/php/ext/google/protobuf/php-upb.h b/php/ext/google/protobuf/php-upb.h
index 94f9042..5882d6e 100644
--- a/php/ext/google/protobuf/php-upb.h
+++ b/php/ext/google/protobuf/php-upb.h
@@ -763,18 +763,13 @@
// this invariant.
#define kUpb_EpsCopyInputStream_SlopBytes 16
-enum {
- kUpb_EpsCopyInputStream_NoAliasing = 0,
- kUpb_EpsCopyInputStream_OnPatch = 1,
- kUpb_EpsCopyInputStream_NoDelta = 2
-};
-
typedef struct {
const char* end; // Can read up to SlopBytes bytes beyond this.
const char* limit_ptr; // For bounds checks, = end + UPB_MIN(limit, 0)
- uintptr_t aliasing;
+ uintptr_t input_delta; // Diff between the original input pointer and patch
int limit; // Submessage limit relative to end
bool error; // To distinguish between EOF and error.
+ bool aliasing;
char patch[kUpb_EpsCopyInputStream_SlopBytes * 2];
} upb_EpsCopyInputStream;
@@ -800,17 +795,16 @@
if (size <= kUpb_EpsCopyInputStream_SlopBytes) {
memset(&e->patch, 0, 32);
if (size) memcpy(&e->patch, *ptr, size);
- e->aliasing = enable_aliasing ? (uintptr_t)*ptr - (uintptr_t)e->patch
- : kUpb_EpsCopyInputStream_NoAliasing;
+ e->input_delta = (uintptr_t)*ptr - (uintptr_t)e->patch;
*ptr = e->patch;
e->end = *ptr + size;
e->limit = 0;
} else {
e->end = *ptr + size - kUpb_EpsCopyInputStream_SlopBytes;
e->limit = kUpb_EpsCopyInputStream_SlopBytes;
- e->aliasing = enable_aliasing ? kUpb_EpsCopyInputStream_NoDelta
- : kUpb_EpsCopyInputStream_NoAliasing;
+ e->input_delta = 0;
}
+ e->aliasing = enable_aliasing;
e->limit_ptr = e->end;
e->error = false;
}
@@ -953,7 +947,7 @@
// upb_EpsCopyInputStream_Init() when this stream was initialized.
UPB_INLINE bool upb_EpsCopyInputStream_AliasingEnabled(
upb_EpsCopyInputStream* e) {
- return e->aliasing != kUpb_EpsCopyInputStream_NoAliasing;
+ return e->aliasing;
}
// Returns true if aliasing_enabled=true was passed to
@@ -963,8 +957,16 @@
upb_EpsCopyInputStream* e, const char* ptr, size_t size) {
// When EpsCopyInputStream supports streaming, this will need to become a
// runtime check.
- return upb_EpsCopyInputStream_CheckDataSizeAvailable(e, ptr, size) &&
- e->aliasing >= kUpb_EpsCopyInputStream_NoDelta;
+ return e->aliasing &&
+ upb_EpsCopyInputStream_CheckDataSizeAvailable(e, ptr, size);
+}
+
+// Returns a pointer into an input buffer that corresponds to the parsing
+// pointer `ptr`. The returned pointer may be the same as `ptr`, but also may
+// be different if we are currently parsing out of the patch buffer.
+UPB_INLINE const char* upb_EpsCopyInputStream_GetInputPtr(
+ upb_EpsCopyInputStream* e, const char* ptr) {
+ return (const char*)(((uintptr_t)ptr) + e->input_delta);
}
// Returns a pointer into an input buffer that corresponds to the parsing
@@ -976,9 +978,7 @@
UPB_INLINE const char* upb_EpsCopyInputStream_GetAliasedPtr(
upb_EpsCopyInputStream* e, const char* ptr) {
UPB_ASSUME(upb_EpsCopyInputStream_AliasingAvailable(e, ptr, 0));
- uintptr_t delta =
- e->aliasing == kUpb_EpsCopyInputStream_NoDelta ? 0 : e->aliasing;
- return (const char*)((uintptr_t)ptr + delta);
+ return upb_EpsCopyInputStream_GetInputPtr(e, ptr);
}
// Reads string data from the input, aliasing into the input buffer instead of
@@ -1092,9 +1092,7 @@
e->limit -= kUpb_EpsCopyInputStream_SlopBytes;
e->limit_ptr = e->end + e->limit;
UPB_ASSERT(ptr < e->limit_ptr);
- if (e->aliasing != kUpb_EpsCopyInputStream_NoAliasing) {
- e->aliasing = (uintptr_t)old_end - (uintptr_t)new_start;
- }
+ e->input_delta = (uintptr_t)old_end - (uintptr_t)new_start;
return callback(e, old_end, new_start);
} else {
UPB_ASSERT(overrun > e->limit);
@@ -14267,17 +14265,18 @@
#ifndef UPB_WIRE_INTERNAL_DECODER_H_
#define UPB_WIRE_INTERNAL_DECODER_H_
+#include <stddef.h>
+
#include "utf8_range.h"
// Must be last.
-#define DECODE_NOGROUP (uint32_t) - 1
+#define DECODE_NOGROUP (uint32_t)-1
typedef struct upb_Decoder {
upb_EpsCopyInputStream input;
const upb_ExtensionRegistry* extreg;
- const char* unknown; // Start of unknown data, preserve at buffer flip
- upb_Message* unknown_msg; // Pointer to preserve data to
+ upb_Message* original_msg; // Pointer to preserve data to
int depth; // Tracks recursion depth to bound stack usage.
uint32_t end_group; // field number of END_GROUP tag, else DECODE_NOGROUP.
uint16_t options;
@@ -14337,14 +14336,6 @@
upb_EpsCopyInputStream* e, const char* old_end, const char* new_start) {
upb_Decoder* d = (upb_Decoder*)e;
if (!old_end) _upb_FastDecoder_ErrorJmp(d, kUpb_DecodeStatus_Malformed);
-
- if (d->unknown) {
- if (!UPB_PRIVATE(_upb_Message_AddUnknown)(
- d->unknown_msg, d->unknown, old_end - d->unknown, &d->arena)) {
- _upb_FastDecoder_ErrorJmp(d, kUpb_DecodeStatus_OutOfMemory);
- }
- d->unknown = new_start;
- }
return new_start;
}
diff --git a/ruby/ext/google/protobuf_c/ruby-upb.c b/ruby/ext/google/protobuf_c/ruby-upb.c
index debe573..188598f 100644
--- a/ruby/ext/google/protobuf_c/ruby-upb.c
+++ b/ruby/ext/google/protobuf_c/ruby-upb.c
@@ -6331,7 +6331,7 @@
const uint32_t tag =
((uint32_t)field->UPB_PRIVATE(number) << 3) | kUpb_WireType_Varint;
upb_Message* unknown_msg =
- field->UPB_PRIVATE(mode) & kUpb_LabelFlags_IsExtension ? d->unknown_msg
+ field->UPB_PRIVATE(mode) & kUpb_LabelFlags_IsExtension ? d->original_msg
: msg;
char buf[2 * kUpb_Decoder_EncodeVarint32MaxSize];
char* end = buf;
@@ -7118,7 +7118,7 @@
if (UPB_UNLIKELY(!ext)) {
_upb_Decoder_ErrorJmp(d, kUpb_DecodeStatus_OutOfMemory);
}
- d->unknown_msg = msg;
+ d->original_msg = msg;
msg = (upb_Message*)&ext->data;
if (upb_MiniTableField_IsSubMessage(&ext->ext->UPB_PRIVATE(field))) {
ext_sub.UPB_PRIVATE(submsg) =
@@ -7163,7 +7163,8 @@
// Since unknown fields are the uncommon case, we do a little extra work here
// to walk backwards through the buffer to find the field start. This frees
// up a register in the fast paths (when the field is known), which leads to
- // significant speedups in benchmarks.
+ // significant speedups in benchmarks. Note that ptr may point into the slop
+ // space, beyond the normal end of the input buffer.
const char* start = ptr;
if (wire_type == kUpb_WireType_Delimited) ptr += val.size;
@@ -7189,15 +7190,20 @@
start = _upb_Decoder_ReverseSkipVarint(start, tag);
assert(start == d->debug_tagstart);
+ const char* input_start =
+ upb_EpsCopyInputStream_GetInputPtr(&d->input, start);
if (wire_type == kUpb_WireType_StartGroup) {
- d->unknown = start;
- d->unknown_msg = msg;
ptr = _upb_Decoder_DecodeUnknownGroup(d, ptr, field_number);
- start = d->unknown;
- d->unknown = NULL;
}
- if (!UPB_PRIVATE(_upb_Message_AddUnknown)(msg, start, ptr - start,
- &d->arena)) {
+ // Normally, bounds checks for fixed or varint fields are performed after
+ // the field is parsed; it's OK for the field to overrun the end of the
+ // buffer, because it'll just read into slop space. However, because this
+ // path reads bytes from the input buffer rather than the patch buffer,
+ // bounds checks are needed before adding the unknown field.
+ _upb_Decoder_IsDone(d, &ptr);
+ const char* input_ptr = upb_EpsCopyInputStream_GetInputPtr(&d->input, ptr);
+ if (!UPB_PRIVATE(_upb_Message_AddUnknown)(
+ msg, input_start, input_ptr - input_start, &d->arena)) {
_upb_Decoder_ErrorJmp(d, kUpb_DecodeStatus_OutOfMemory);
}
} else if (wire_type == kUpb_WireType_StartGroup) {
@@ -7330,7 +7336,6 @@
options & kUpb_DecodeOption_AliasString);
decoder.extreg = extreg;
- decoder.unknown = NULL;
decoder.depth = depth ? depth : kUpb_WireFormat_DefaultDepthLimit;
decoder.end_group = DECODE_NOGROUP;
decoder.options = (uint16_t)options;
diff --git a/ruby/ext/google/protobuf_c/ruby-upb.h b/ruby/ext/google/protobuf_c/ruby-upb.h
index c63adc7..c860581 100755
--- a/ruby/ext/google/protobuf_c/ruby-upb.h
+++ b/ruby/ext/google/protobuf_c/ruby-upb.h
@@ -765,18 +765,13 @@
// this invariant.
#define kUpb_EpsCopyInputStream_SlopBytes 16
-enum {
- kUpb_EpsCopyInputStream_NoAliasing = 0,
- kUpb_EpsCopyInputStream_OnPatch = 1,
- kUpb_EpsCopyInputStream_NoDelta = 2
-};
-
typedef struct {
const char* end; // Can read up to SlopBytes bytes beyond this.
const char* limit_ptr; // For bounds checks, = end + UPB_MIN(limit, 0)
- uintptr_t aliasing;
+ uintptr_t input_delta; // Diff between the original input pointer and patch
int limit; // Submessage limit relative to end
bool error; // To distinguish between EOF and error.
+ bool aliasing;
char patch[kUpb_EpsCopyInputStream_SlopBytes * 2];
} upb_EpsCopyInputStream;
@@ -802,17 +797,16 @@
if (size <= kUpb_EpsCopyInputStream_SlopBytes) {
memset(&e->patch, 0, 32);
if (size) memcpy(&e->patch, *ptr, size);
- e->aliasing = enable_aliasing ? (uintptr_t)*ptr - (uintptr_t)e->patch
- : kUpb_EpsCopyInputStream_NoAliasing;
+ e->input_delta = (uintptr_t)*ptr - (uintptr_t)e->patch;
*ptr = e->patch;
e->end = *ptr + size;
e->limit = 0;
} else {
e->end = *ptr + size - kUpb_EpsCopyInputStream_SlopBytes;
e->limit = kUpb_EpsCopyInputStream_SlopBytes;
- e->aliasing = enable_aliasing ? kUpb_EpsCopyInputStream_NoDelta
- : kUpb_EpsCopyInputStream_NoAliasing;
+ e->input_delta = 0;
}
+ e->aliasing = enable_aliasing;
e->limit_ptr = e->end;
e->error = false;
}
@@ -955,7 +949,7 @@
// upb_EpsCopyInputStream_Init() when this stream was initialized.
UPB_INLINE bool upb_EpsCopyInputStream_AliasingEnabled(
upb_EpsCopyInputStream* e) {
- return e->aliasing != kUpb_EpsCopyInputStream_NoAliasing;
+ return e->aliasing;
}
// Returns true if aliasing_enabled=true was passed to
@@ -965,8 +959,16 @@
upb_EpsCopyInputStream* e, const char* ptr, size_t size) {
// When EpsCopyInputStream supports streaming, this will need to become a
// runtime check.
- return upb_EpsCopyInputStream_CheckDataSizeAvailable(e, ptr, size) &&
- e->aliasing >= kUpb_EpsCopyInputStream_NoDelta;
+ return e->aliasing &&
+ upb_EpsCopyInputStream_CheckDataSizeAvailable(e, ptr, size);
+}
+
+// Returns a pointer into an input buffer that corresponds to the parsing
+// pointer `ptr`. The returned pointer may be the same as `ptr`, but also may
+// be different if we are currently parsing out of the patch buffer.
+UPB_INLINE const char* upb_EpsCopyInputStream_GetInputPtr(
+ upb_EpsCopyInputStream* e, const char* ptr) {
+ return (const char*)(((uintptr_t)ptr) + e->input_delta);
}
// Returns a pointer into an input buffer that corresponds to the parsing
@@ -978,9 +980,7 @@
UPB_INLINE const char* upb_EpsCopyInputStream_GetAliasedPtr(
upb_EpsCopyInputStream* e, const char* ptr) {
UPB_ASSUME(upb_EpsCopyInputStream_AliasingAvailable(e, ptr, 0));
- uintptr_t delta =
- e->aliasing == kUpb_EpsCopyInputStream_NoDelta ? 0 : e->aliasing;
- return (const char*)((uintptr_t)ptr + delta);
+ return upb_EpsCopyInputStream_GetInputPtr(e, ptr);
}
// Reads string data from the input, aliasing into the input buffer instead of
@@ -1094,9 +1094,7 @@
e->limit -= kUpb_EpsCopyInputStream_SlopBytes;
e->limit_ptr = e->end + e->limit;
UPB_ASSERT(ptr < e->limit_ptr);
- if (e->aliasing != kUpb_EpsCopyInputStream_NoAliasing) {
- e->aliasing = (uintptr_t)old_end - (uintptr_t)new_start;
- }
+ e->input_delta = (uintptr_t)old_end - (uintptr_t)new_start;
return callback(e, old_end, new_start);
} else {
UPB_ASSERT(overrun > e->limit);
@@ -14269,17 +14267,18 @@
#ifndef UPB_WIRE_INTERNAL_DECODER_H_
#define UPB_WIRE_INTERNAL_DECODER_H_
+#include <stddef.h>
+
#include "utf8_range.h"
// Must be last.
-#define DECODE_NOGROUP (uint32_t) - 1
+#define DECODE_NOGROUP (uint32_t)-1
typedef struct upb_Decoder {
upb_EpsCopyInputStream input;
const upb_ExtensionRegistry* extreg;
- const char* unknown; // Start of unknown data, preserve at buffer flip
- upb_Message* unknown_msg; // Pointer to preserve data to
+ upb_Message* original_msg; // Pointer to preserve data to
int depth; // Tracks recursion depth to bound stack usage.
uint32_t end_group; // field number of END_GROUP tag, else DECODE_NOGROUP.
uint16_t options;
@@ -14339,14 +14338,6 @@
upb_EpsCopyInputStream* e, const char* old_end, const char* new_start) {
upb_Decoder* d = (upb_Decoder*)e;
if (!old_end) _upb_FastDecoder_ErrorJmp(d, kUpb_DecodeStatus_Malformed);
-
- if (d->unknown) {
- if (!UPB_PRIVATE(_upb_Message_AddUnknown)(
- d->unknown_msg, d->unknown, old_end - d->unknown, &d->arena)) {
- _upb_FastDecoder_ErrorJmp(d, kUpb_DecodeStatus_OutOfMemory);
- }
- d->unknown = new_start;
- }
return new_start;
}