blob: a77aea3dad6f031cd40f0efd42e52090566d4387 [file] [log] [blame]
Adam Cozzette501ecec2023-09-26 14:36:20 -07001// Protocol Buffers - Google's data interchange format
2// Copyright 2023 Google LLC. All rights reserved.
3//
4// Use of this source code is governed by a BSD-style
5// license that can be found in the LICENSE file or at
6// https://developers.google.com/open-source/licenses/bsd
7
8// Fast decoder: ~3x the speed of decode.c, but requires x86-64/ARM64.
9// Also the table size grows by 2x.
10//
11// Could potentially be ported to other 64-bit archs that pass at least six
12// arguments in registers and have 8 unused high bits in pointers.
13//
14// The overall design is to create specialized functions for every possible
15// field type (eg. oneof boolean field with a 1 byte tag) and then dispatch
16// to the specialized function as quickly as possible.
17
18#include "upb/wire/decode_fast.h"
19
Eric Salo07fba1d2023-09-29 14:50:56 -070020#include "upb/message/array.h"
21#include "upb/message/internal/array.h"
Adam Cozzette501ecec2023-09-26 14:36:20 -070022#include "upb/message/internal/types.h"
Eric Salod0c85632023-11-28 20:50:48 -080023#include "upb/mini_table/sub.h"
Adam Cozzette501ecec2023-09-26 14:36:20 -070024#include "upb/wire/internal/decode.h"
25
26// Must be last.
27#include "upb/port/def.inc"
28
29#if UPB_FASTTABLE
30
31// The standard set of arguments passed to each parsing function.
32// Thanks to x86-64 calling conventions, these will stay in registers.
33#define UPB_PARSE_PARAMS \
34 upb_Decoder *d, const char *ptr, upb_Message *msg, intptr_t table, \
35 uint64_t hasbits, uint64_t data
36
37#define UPB_PARSE_ARGS d, ptr, msg, table, hasbits, data
38
39#define RETURN_GENERIC(m) \
40 /* Uncomment either of these for debugging purposes. */ \
41 /* fprintf(stderr, m); */ \
42 /*__builtin_trap(); */ \
43 return _upb_FastDecoder_DecodeGeneric(d, ptr, msg, table, hasbits, 0);
44
45typedef enum {
46 CARD_s = 0, /* Singular (optional, non-repeated) */
47 CARD_o = 1, /* Oneof */
48 CARD_r = 2, /* Repeated */
49 CARD_p = 3 /* Packed Repeated */
50} upb_card;
51
52UPB_NOINLINE
53static const char* fastdecode_isdonefallback(UPB_PARSE_PARAMS) {
54 int overrun = data;
55 ptr = _upb_EpsCopyInputStream_IsDoneFallbackInline(
56 &d->input, ptr, overrun, _upb_Decoder_BufferFlipCallback);
57 data = _upb_FastDecoder_LoadTag(ptr);
58 UPB_MUSTTAIL return _upb_FastDecoder_TagDispatch(UPB_PARSE_ARGS);
59}
60
61UPB_FORCEINLINE
62static const char* fastdecode_dispatch(UPB_PARSE_PARAMS) {
63 int overrun;
64 switch (upb_EpsCopyInputStream_IsDoneStatus(&d->input, ptr, &overrun)) {
65 case kUpb_IsDoneStatus_Done:
66 *(uint32_t*)msg |= hasbits; // Sync hasbits.
67 const upb_MiniTable* l = decode_totablep(table);
68 return UPB_UNLIKELY(l->required_count)
69 ? _upb_Decoder_CheckRequired(d, ptr, msg, l)
70 : ptr;
71 case kUpb_IsDoneStatus_NotDone:
72 break;
73 case kUpb_IsDoneStatus_NeedFallback:
74 data = overrun;
75 UPB_MUSTTAIL return fastdecode_isdonefallback(UPB_PARSE_ARGS);
76 }
77
78 // Read two bytes of tag data (for a one-byte tag, the high byte is junk).
79 data = _upb_FastDecoder_LoadTag(ptr);
80 UPB_MUSTTAIL return _upb_FastDecoder_TagDispatch(UPB_PARSE_ARGS);
81}
82
83UPB_FORCEINLINE
84static bool fastdecode_checktag(uint16_t data, int tagbytes) {
85 if (tagbytes == 1) {
86 return (data & 0xff) == 0;
87 } else {
88 return data == 0;
89 }
90}
91
92UPB_FORCEINLINE
93static const char* fastdecode_longsize(const char* ptr, int* size) {
94 int i;
95 UPB_ASSERT(*size & 0x80);
96 *size &= 0xff;
97 for (i = 0; i < 3; i++) {
98 ptr++;
99 size_t byte = (uint8_t)ptr[-1];
100 *size += (byte - 1) << (7 + 7 * i);
101 if (UPB_LIKELY((byte & 0x80) == 0)) return ptr;
102 }
103 ptr++;
104 size_t byte = (uint8_t)ptr[-1];
105 // len is limited by 2gb not 4gb, hence 8 and not 16 as normally expected
106 // for a 32 bit varint.
107 if (UPB_UNLIKELY(byte >= 8)) return NULL;
108 *size += (byte - 1) << 28;
109 return ptr;
110}
111
112UPB_FORCEINLINE
113static const char* fastdecode_delimited(
114 upb_Decoder* d, const char* ptr,
115 upb_EpsCopyInputStream_ParseDelimitedFunc* func, void* ctx) {
116 ptr++;
117
118 // Sign-extend so varint greater than one byte becomes negative, causing
119 // fast delimited parse to fail.
120 int len = (int8_t)ptr[-1];
121
122 if (!upb_EpsCopyInputStream_TryParseDelimitedFast(&d->input, &ptr, len, func,
123 ctx)) {
124 // Slow case: Sub-message is >=128 bytes and/or exceeds the current buffer.
125 // If it exceeds the buffer limit, limit/limit_ptr will change during
126 // sub-message parsing, so we need to preserve delta, not limit.
127 if (UPB_UNLIKELY(len & 0x80)) {
128 // Size varint >1 byte (length >= 128).
129 ptr = fastdecode_longsize(ptr, &len);
130 if (!ptr) {
131 // Corrupt wire format: size exceeded INT_MAX.
132 return NULL;
133 }
134 }
135 if (!upb_EpsCopyInputStream_CheckSize(&d->input, ptr, len)) {
136 // Corrupt wire format: invalid limit.
137 return NULL;
138 }
139 int delta = upb_EpsCopyInputStream_PushLimit(&d->input, ptr, len);
140 ptr = func(&d->input, ptr, ctx);
141 upb_EpsCopyInputStream_PopLimit(&d->input, ptr, delta);
142 }
143 return ptr;
144}
145
146/* singular, oneof, repeated field handling ***********************************/
147
148typedef struct {
149 upb_Array* arr;
150 void* end;
151} fastdecode_arr;
152
153typedef enum {
154 FD_NEXT_ATLIMIT,
155 FD_NEXT_SAMEFIELD,
156 FD_NEXT_OTHERFIELD
157} fastdecode_next;
158
159typedef struct {
160 void* dst;
161 fastdecode_next next;
162 uint32_t tag;
163} fastdecode_nextret;
164
165UPB_FORCEINLINE
166static void* fastdecode_resizearr(upb_Decoder* d, void* dst,
167 fastdecode_arr* farr, int valbytes) {
168 if (UPB_UNLIKELY(dst == farr->end)) {
Eric Salo3ce2c572023-11-27 10:45:25 -0800169 size_t old_capacity = farr->arr->UPB_PRIVATE(capacity);
Eric Salo8324c902023-11-17 17:15:35 -0800170 size_t old_bytes = old_capacity * valbytes;
171 size_t new_capacity = old_capacity * 2;
172 size_t new_bytes = new_capacity * valbytes;
Adam Cozzette501ecec2023-09-26 14:36:20 -0700173 char* old_ptr = _upb_array_ptr(farr->arr);
174 char* new_ptr = upb_Arena_Realloc(&d->arena, old_ptr, old_bytes, new_bytes);
175 uint8_t elem_size_lg2 = __builtin_ctz(valbytes);
Eric Salo3ce2c572023-11-27 10:45:25 -0800176 UPB_PRIVATE(_upb_Array_SetTaggedPtr)(farr->arr, new_ptr, elem_size_lg2);
177 farr->arr->UPB_PRIVATE(capacity) = new_capacity;
Eric Salo8324c902023-11-17 17:15:35 -0800178 dst = (void*)(new_ptr + (old_capacity * valbytes));
179 farr->end = (void*)(new_ptr + (new_capacity * valbytes));
Adam Cozzette501ecec2023-09-26 14:36:20 -0700180 }
181 return dst;
182}
183
184UPB_FORCEINLINE
185static bool fastdecode_tagmatch(uint32_t tag, uint64_t data, int tagbytes) {
186 if (tagbytes == 1) {
187 return (uint8_t)tag == (uint8_t)data;
188 } else {
189 return (uint16_t)tag == (uint16_t)data;
190 }
191}
192
193UPB_FORCEINLINE
194static void fastdecode_commitarr(void* dst, fastdecode_arr* farr,
195 int valbytes) {
196 farr->arr->size =
197 (size_t)((char*)dst - (char*)_upb_array_ptr(farr->arr)) / valbytes;
198}
199
200UPB_FORCEINLINE
201static fastdecode_nextret fastdecode_nextrepeated(upb_Decoder* d, void* dst,
202 const char** ptr,
203 fastdecode_arr* farr,
204 uint64_t data, int tagbytes,
205 int valbytes) {
206 fastdecode_nextret ret;
207 dst = (char*)dst + valbytes;
208
209 if (UPB_LIKELY(!_upb_Decoder_IsDone(d, ptr))) {
210 ret.tag = _upb_FastDecoder_LoadTag(*ptr);
211 if (fastdecode_tagmatch(ret.tag, data, tagbytes)) {
212 ret.next = FD_NEXT_SAMEFIELD;
213 } else {
214 fastdecode_commitarr(dst, farr, valbytes);
215 ret.next = FD_NEXT_OTHERFIELD;
216 }
217 } else {
218 fastdecode_commitarr(dst, farr, valbytes);
219 ret.next = FD_NEXT_ATLIMIT;
220 }
221
222 ret.dst = dst;
223 return ret;
224}
225
226UPB_FORCEINLINE
227static void* fastdecode_fieldmem(upb_Message* msg, uint64_t data) {
228 size_t ofs = data >> 48;
229 return (char*)msg + ofs;
230}
231
232UPB_FORCEINLINE
233static void* fastdecode_getfield(upb_Decoder* d, const char* ptr,
234 upb_Message* msg, uint64_t* data,
235 uint64_t* hasbits, fastdecode_arr* farr,
236 int valbytes, upb_card card) {
237 switch (card) {
238 case CARD_s: {
239 uint8_t hasbit_index = *data >> 24;
240 // Set hasbit and return pointer to scalar field.
241 *hasbits |= 1ull << hasbit_index;
242 return fastdecode_fieldmem(msg, *data);
243 }
244 case CARD_o: {
245 uint16_t case_ofs = *data >> 32;
246 uint32_t* oneof_case = UPB_PTR_AT(msg, case_ofs, uint32_t);
247 uint8_t field_number = *data >> 24;
248 *oneof_case = field_number;
249 return fastdecode_fieldmem(msg, *data);
250 }
251 case CARD_r: {
252 // Get pointer to upb_Array and allocate/expand if necessary.
253 uint8_t elem_size_lg2 = __builtin_ctz(valbytes);
254 upb_Array** arr_p = fastdecode_fieldmem(msg, *data);
255 char* begin;
256 *(uint32_t*)msg |= *hasbits;
257 *hasbits = 0;
258 if (UPB_LIKELY(!*arr_p)) {
Eric Salo3ce2c572023-11-27 10:45:25 -0800259 farr->arr = UPB_PRIVATE(_upb_Array_New)(&d->arena, 8, elem_size_lg2);
Adam Cozzette501ecec2023-09-26 14:36:20 -0700260 *arr_p = farr->arr;
261 } else {
262 farr->arr = *arr_p;
263 }
264 begin = _upb_array_ptr(farr->arr);
Eric Salo3ce2c572023-11-27 10:45:25 -0800265 farr->end = begin + (farr->arr->UPB_PRIVATE(capacity) * valbytes);
Adam Cozzette501ecec2023-09-26 14:36:20 -0700266 *data = _upb_FastDecoder_LoadTag(ptr);
267 return begin + (farr->arr->size * valbytes);
268 }
269 default:
270 UPB_UNREACHABLE();
271 }
272}
273
274UPB_FORCEINLINE
275static bool fastdecode_flippacked(uint64_t* data, int tagbytes) {
276 *data ^= (0x2 ^ 0x0); // Patch data to match packed wiretype.
277 return fastdecode_checktag(*data, tagbytes);
278}
279
280#define FASTDECODE_CHECKPACKED(tagbytes, card, func) \
281 if (UPB_UNLIKELY(!fastdecode_checktag(data, tagbytes))) { \
282 if (card == CARD_r && fastdecode_flippacked(&data, tagbytes)) { \
283 UPB_MUSTTAIL return func(UPB_PARSE_ARGS); \
284 } \
285 RETURN_GENERIC("packed check tag mismatch\n"); \
286 }
287
288/* varint fields **************************************************************/
289
290UPB_FORCEINLINE
291static uint64_t fastdecode_munge(uint64_t val, int valbytes, bool zigzag) {
292 if (valbytes == 1) {
293 return val != 0;
294 } else if (zigzag) {
295 if (valbytes == 4) {
296 uint32_t n = val;
297 return (n >> 1) ^ -(int32_t)(n & 1);
298 } else if (valbytes == 8) {
299 return (val >> 1) ^ -(int64_t)(val & 1);
300 }
301 UPB_UNREACHABLE();
302 }
303 return val;
304}
305
306UPB_FORCEINLINE
307static const char* fastdecode_varint64(const char* ptr, uint64_t* val) {
308 ptr++;
309 *val = (uint8_t)ptr[-1];
310 if (UPB_UNLIKELY(*val & 0x80)) {
311 int i;
312 for (i = 0; i < 8; i++) {
313 ptr++;
314 uint64_t byte = (uint8_t)ptr[-1];
315 *val += (byte - 1) << (7 + 7 * i);
316 if (UPB_LIKELY((byte & 0x80) == 0)) goto done;
317 }
318 ptr++;
319 uint64_t byte = (uint8_t)ptr[-1];
320 if (byte > 1) {
321 return NULL;
322 }
323 *val += (byte - 1) << 63;
324 }
325done:
326 UPB_ASSUME(ptr != NULL);
327 return ptr;
328}
329
330#define FASTDECODE_UNPACKEDVARINT(d, ptr, msg, table, hasbits, data, tagbytes, \
331 valbytes, card, zigzag, packed) \
332 uint64_t val; \
333 void* dst; \
334 fastdecode_arr farr; \
335 \
336 FASTDECODE_CHECKPACKED(tagbytes, card, packed); \
337 \
338 dst = fastdecode_getfield(d, ptr, msg, &data, &hasbits, &farr, valbytes, \
339 card); \
340 if (card == CARD_r) { \
341 if (UPB_UNLIKELY(!dst)) { \
342 RETURN_GENERIC("need array resize\n"); \
343 } \
344 } \
345 \
346 again: \
347 if (card == CARD_r) { \
348 dst = fastdecode_resizearr(d, dst, &farr, valbytes); \
349 } \
350 \
351 ptr += tagbytes; \
352 ptr = fastdecode_varint64(ptr, &val); \
353 if (ptr == NULL) _upb_FastDecoder_ErrorJmp(d, kUpb_DecodeStatus_Malformed); \
354 val = fastdecode_munge(val, valbytes, zigzag); \
355 memcpy(dst, &val, valbytes); \
356 \
357 if (card == CARD_r) { \
358 fastdecode_nextret ret = fastdecode_nextrepeated( \
359 d, dst, &ptr, &farr, data, tagbytes, valbytes); \
360 switch (ret.next) { \
361 case FD_NEXT_SAMEFIELD: \
362 dst = ret.dst; \
363 goto again; \
364 case FD_NEXT_OTHERFIELD: \
365 data = ret.tag; \
366 UPB_MUSTTAIL return _upb_FastDecoder_TagDispatch(UPB_PARSE_ARGS); \
367 case FD_NEXT_ATLIMIT: \
368 return ptr; \
369 } \
370 } \
371 \
372 UPB_MUSTTAIL return fastdecode_dispatch(UPB_PARSE_ARGS);
373
374typedef struct {
375 uint8_t valbytes;
376 bool zigzag;
377 void* dst;
378 fastdecode_arr farr;
379} fastdecode_varintdata;
380
381UPB_FORCEINLINE
382static const char* fastdecode_topackedvarint(upb_EpsCopyInputStream* e,
383 const char* ptr, void* ctx) {
384 upb_Decoder* d = (upb_Decoder*)e;
385 fastdecode_varintdata* data = ctx;
386 void* dst = data->dst;
387 uint64_t val;
388
389 while (!_upb_Decoder_IsDone(d, &ptr)) {
390 dst = fastdecode_resizearr(d, dst, &data->farr, data->valbytes);
391 ptr = fastdecode_varint64(ptr, &val);
392 if (ptr == NULL) return NULL;
393 val = fastdecode_munge(val, data->valbytes, data->zigzag);
394 memcpy(dst, &val, data->valbytes);
395 dst = (char*)dst + data->valbytes;
396 }
397
398 fastdecode_commitarr(dst, &data->farr, data->valbytes);
399 return ptr;
400}
401
402#define FASTDECODE_PACKEDVARINT(d, ptr, msg, table, hasbits, data, tagbytes, \
403 valbytes, zigzag, unpacked) \
404 fastdecode_varintdata ctx = {valbytes, zigzag}; \
405 \
406 FASTDECODE_CHECKPACKED(tagbytes, CARD_r, unpacked); \
407 \
408 ctx.dst = fastdecode_getfield(d, ptr, msg, &data, &hasbits, &ctx.farr, \
409 valbytes, CARD_r); \
410 if (UPB_UNLIKELY(!ctx.dst)) { \
411 RETURN_GENERIC("need array resize\n"); \
412 } \
413 \
414 ptr += tagbytes; \
415 ptr = fastdecode_delimited(d, ptr, &fastdecode_topackedvarint, &ctx); \
416 \
417 if (UPB_UNLIKELY(ptr == NULL)) { \
418 _upb_FastDecoder_ErrorJmp(d, kUpb_DecodeStatus_Malformed); \
419 } \
420 \
421 UPB_MUSTTAIL return fastdecode_dispatch(d, ptr, msg, table, hasbits, 0);
422
423#define FASTDECODE_VARINT(d, ptr, msg, table, hasbits, data, tagbytes, \
424 valbytes, card, zigzag, unpacked, packed) \
425 if (card == CARD_p) { \
426 FASTDECODE_PACKEDVARINT(d, ptr, msg, table, hasbits, data, tagbytes, \
427 valbytes, zigzag, unpacked); \
428 } else { \
429 FASTDECODE_UNPACKEDVARINT(d, ptr, msg, table, hasbits, data, tagbytes, \
430 valbytes, card, zigzag, packed); \
431 }
432
433#define z_ZZ true
434#define b_ZZ false
435#define v_ZZ false
436
437/* Generate all combinations:
438 * {s,o,r,p} x {b1,v4,z4,v8,z8} x {1bt,2bt} */
439
440#define F(card, type, valbytes, tagbytes) \
441 UPB_NOINLINE \
442 const char* upb_p##card##type##valbytes##_##tagbytes##bt(UPB_PARSE_PARAMS) { \
443 FASTDECODE_VARINT(d, ptr, msg, table, hasbits, data, tagbytes, valbytes, \
444 CARD_##card, type##_ZZ, \
445 upb_pr##type##valbytes##_##tagbytes##bt, \
446 upb_pp##type##valbytes##_##tagbytes##bt); \
447 }
448
449#define TYPES(card, tagbytes) \
450 F(card, b, 1, tagbytes) \
451 F(card, v, 4, tagbytes) \
452 F(card, v, 8, tagbytes) \
453 F(card, z, 4, tagbytes) \
454 F(card, z, 8, tagbytes)
455
456#define TAGBYTES(card) \
457 TYPES(card, 1) \
458 TYPES(card, 2)
459
460TAGBYTES(s)
461TAGBYTES(o)
462TAGBYTES(r)
463TAGBYTES(p)
464
465#undef z_ZZ
466#undef b_ZZ
467#undef v_ZZ
468#undef o_ONEOF
469#undef s_ONEOF
470#undef r_ONEOF
471#undef F
472#undef TYPES
473#undef TAGBYTES
474#undef FASTDECODE_UNPACKEDVARINT
475#undef FASTDECODE_PACKEDVARINT
476#undef FASTDECODE_VARINT
477
478/* fixed fields ***************************************************************/
479
480#define FASTDECODE_UNPACKEDFIXED(d, ptr, msg, table, hasbits, data, tagbytes, \
481 valbytes, card, packed) \
482 void* dst; \
483 fastdecode_arr farr; \
484 \
485 FASTDECODE_CHECKPACKED(tagbytes, card, packed) \
486 \
487 dst = fastdecode_getfield(d, ptr, msg, &data, &hasbits, &farr, valbytes, \
488 card); \
489 if (card == CARD_r) { \
490 if (UPB_UNLIKELY(!dst)) { \
491 RETURN_GENERIC("couldn't allocate array in arena\n"); \
492 } \
493 } \
494 \
495 again: \
496 if (card == CARD_r) { \
497 dst = fastdecode_resizearr(d, dst, &farr, valbytes); \
498 } \
499 \
500 ptr += tagbytes; \
501 memcpy(dst, ptr, valbytes); \
502 ptr += valbytes; \
503 \
504 if (card == CARD_r) { \
505 fastdecode_nextret ret = fastdecode_nextrepeated( \
506 d, dst, &ptr, &farr, data, tagbytes, valbytes); \
507 switch (ret.next) { \
508 case FD_NEXT_SAMEFIELD: \
509 dst = ret.dst; \
510 goto again; \
511 case FD_NEXT_OTHERFIELD: \
512 data = ret.tag; \
513 UPB_MUSTTAIL return _upb_FastDecoder_TagDispatch(UPB_PARSE_ARGS); \
514 case FD_NEXT_ATLIMIT: \
515 return ptr; \
516 } \
517 } \
518 \
519 UPB_MUSTTAIL return fastdecode_dispatch(UPB_PARSE_ARGS);
520
521#define FASTDECODE_PACKEDFIXED(d, ptr, msg, table, hasbits, data, tagbytes, \
522 valbytes, unpacked) \
523 FASTDECODE_CHECKPACKED(tagbytes, CARD_r, unpacked) \
524 \
525 ptr += tagbytes; \
526 int size = (uint8_t)ptr[0]; \
527 ptr++; \
528 if (size & 0x80) { \
529 ptr = fastdecode_longsize(ptr, &size); \
530 } \
531 \
532 if (UPB_UNLIKELY(!upb_EpsCopyInputStream_CheckDataSizeAvailable( \
533 &d->input, ptr, size) || \
534 (size % valbytes) != 0)) { \
535 _upb_FastDecoder_ErrorJmp(d, kUpb_DecodeStatus_Malformed); \
536 } \
537 \
538 upb_Array** arr_p = fastdecode_fieldmem(msg, data); \
539 upb_Array* arr = *arr_p; \
540 uint8_t elem_size_lg2 = __builtin_ctz(valbytes); \
541 int elems = size / valbytes; \
542 \
543 if (UPB_LIKELY(!arr)) { \
Eric Salo3ce2c572023-11-27 10:45:25 -0800544 *arr_p = arr = \
545 UPB_PRIVATE(_upb_Array_New)(&d->arena, elems, elem_size_lg2); \
Adam Cozzette501ecec2023-09-26 14:36:20 -0700546 if (!arr) { \
547 _upb_FastDecoder_ErrorJmp(d, kUpb_DecodeStatus_Malformed); \
548 } \
549 } else { \
550 _upb_Array_ResizeUninitialized(arr, elems, &d->arena); \
551 } \
552 \
553 char* dst = _upb_array_ptr(arr); \
554 memcpy(dst, ptr, size); \
555 arr->size = elems; \
556 \
557 ptr += size; \
558 UPB_MUSTTAIL return fastdecode_dispatch(UPB_PARSE_ARGS);
559
560#define FASTDECODE_FIXED(d, ptr, msg, table, hasbits, data, tagbytes, \
561 valbytes, card, unpacked, packed) \
562 if (card == CARD_p) { \
563 FASTDECODE_PACKEDFIXED(d, ptr, msg, table, hasbits, data, tagbytes, \
564 valbytes, unpacked); \
565 } else { \
566 FASTDECODE_UNPACKEDFIXED(d, ptr, msg, table, hasbits, data, tagbytes, \
567 valbytes, card, packed); \
568 }
569
570/* Generate all combinations:
571 * {s,o,r,p} x {f4,f8} x {1bt,2bt} */
572
573#define F(card, valbytes, tagbytes) \
574 UPB_NOINLINE \
575 const char* upb_p##card##f##valbytes##_##tagbytes##bt(UPB_PARSE_PARAMS) { \
576 FASTDECODE_FIXED(d, ptr, msg, table, hasbits, data, tagbytes, valbytes, \
577 CARD_##card, upb_ppf##valbytes##_##tagbytes##bt, \
578 upb_prf##valbytes##_##tagbytes##bt); \
579 }
580
581#define TYPES(card, tagbytes) \
582 F(card, 4, tagbytes) \
583 F(card, 8, tagbytes)
584
585#define TAGBYTES(card) \
586 TYPES(card, 1) \
587 TYPES(card, 2)
588
589TAGBYTES(s)
590TAGBYTES(o)
591TAGBYTES(r)
592TAGBYTES(p)
593
594#undef F
595#undef TYPES
596#undef TAGBYTES
597#undef FASTDECODE_UNPACKEDFIXED
598#undef FASTDECODE_PACKEDFIXED
599
600/* string fields **************************************************************/
601
602typedef const char* fastdecode_copystr_func(struct upb_Decoder* d,
603 const char* ptr, upb_Message* msg,
604 const upb_MiniTable* table,
605 uint64_t hasbits,
606 upb_StringView* dst);
607
608UPB_NOINLINE
609static const char* fastdecode_verifyutf8(upb_Decoder* d, const char* ptr,
610 upb_Message* msg, intptr_t table,
611 uint64_t hasbits, uint64_t data) {
612 upb_StringView* dst = (upb_StringView*)data;
613 if (!_upb_Decoder_VerifyUtf8Inline(dst->data, dst->size)) {
614 _upb_FastDecoder_ErrorJmp(d, kUpb_DecodeStatus_BadUtf8);
615 }
616 UPB_MUSTTAIL return fastdecode_dispatch(UPB_PARSE_ARGS);
617}
618
619#define FASTDECODE_LONGSTRING(d, ptr, msg, table, hasbits, dst, validate_utf8) \
620 int size = (uint8_t)ptr[0]; /* Could plumb through hasbits. */ \
621 ptr++; \
622 if (size & 0x80) { \
623 ptr = fastdecode_longsize(ptr, &size); \
624 } \
625 \
626 if (UPB_UNLIKELY(!upb_EpsCopyInputStream_CheckSize(&d->input, ptr, size))) { \
627 dst->size = 0; \
628 _upb_FastDecoder_ErrorJmp(d, kUpb_DecodeStatus_Malformed); \
629 } \
630 \
631 const char* s_ptr = ptr; \
632 ptr = upb_EpsCopyInputStream_ReadString(&d->input, &s_ptr, size, &d->arena); \
633 if (!ptr) _upb_FastDecoder_ErrorJmp(d, kUpb_DecodeStatus_OutOfMemory); \
634 dst->data = s_ptr; \
635 dst->size = size; \
636 \
637 if (validate_utf8) { \
638 data = (uint64_t)dst; \
639 UPB_MUSTTAIL return fastdecode_verifyutf8(UPB_PARSE_ARGS); \
640 } else { \
641 UPB_MUSTTAIL return fastdecode_dispatch(UPB_PARSE_ARGS); \
642 }
643
644UPB_NOINLINE
645static const char* fastdecode_longstring_utf8(struct upb_Decoder* d,
646 const char* ptr, upb_Message* msg,
647 intptr_t table, uint64_t hasbits,
648 uint64_t data) {
649 upb_StringView* dst = (upb_StringView*)data;
650 FASTDECODE_LONGSTRING(d, ptr, msg, table, hasbits, dst, true);
651}
652
653UPB_NOINLINE
654static const char* fastdecode_longstring_noutf8(
655 struct upb_Decoder* d, const char* ptr, upb_Message* msg, intptr_t table,
656 uint64_t hasbits, uint64_t data) {
657 upb_StringView* dst = (upb_StringView*)data;
658 FASTDECODE_LONGSTRING(d, ptr, msg, table, hasbits, dst, false);
659}
660
661UPB_FORCEINLINE
662static void fastdecode_docopy(upb_Decoder* d, const char* ptr, uint32_t size,
Mike Kruskal8699fde2023-11-10 17:05:31 -0800663 int copy, char* data, size_t data_offset,
664 upb_StringView* dst) {
Adam Cozzette501ecec2023-09-26 14:36:20 -0700665 d->arena.head.ptr += copy;
Mike Kruskal8699fde2023-11-10 17:05:31 -0800666 dst->data = data + data_offset;
Adam Cozzette501ecec2023-09-26 14:36:20 -0700667 UPB_UNPOISON_MEMORY_REGION(data, copy);
668 memcpy(data, ptr, copy);
Mike Kruskal8699fde2023-11-10 17:05:31 -0800669 UPB_POISON_MEMORY_REGION(data + data_offset + size,
670 copy - data_offset - size);
Adam Cozzette501ecec2023-09-26 14:36:20 -0700671}
672
673#define FASTDECODE_COPYSTRING(d, ptr, msg, table, hasbits, data, tagbytes, \
674 card, validate_utf8) \
675 upb_StringView* dst; \
676 fastdecode_arr farr; \
677 int64_t size; \
678 size_t arena_has; \
679 size_t common_has; \
680 char* buf; \
681 \
682 UPB_ASSERT(!upb_EpsCopyInputStream_AliasingAvailable(&d->input, ptr, 0)); \
683 UPB_ASSERT(fastdecode_checktag(data, tagbytes)); \
684 \
685 dst = fastdecode_getfield(d, ptr, msg, &data, &hasbits, &farr, \
686 sizeof(upb_StringView), card); \
687 \
688 again: \
689 if (card == CARD_r) { \
690 dst = fastdecode_resizearr(d, dst, &farr, sizeof(upb_StringView)); \
691 } \
692 \
693 size = (uint8_t)ptr[tagbytes]; \
694 ptr += tagbytes + 1; \
695 dst->size = size; \
696 \
697 buf = d->arena.head.ptr; \
698 arena_has = _upb_ArenaHas(&d->arena); \
699 common_has = UPB_MIN(arena_has, \
700 upb_EpsCopyInputStream_BytesAvailable(&d->input, ptr)); \
701 \
702 if (UPB_LIKELY(size <= 15 - tagbytes)) { \
703 if (arena_has < 16) goto longstr; \
Mike Kruskal8699fde2023-11-10 17:05:31 -0800704 fastdecode_docopy(d, ptr - tagbytes - 1, size, 16, buf, tagbytes + 1, \
705 dst); \
Adam Cozzette501ecec2023-09-26 14:36:20 -0700706 } else if (UPB_LIKELY(size <= 32)) { \
707 if (UPB_UNLIKELY(common_has < 32)) goto longstr; \
Mike Kruskal8699fde2023-11-10 17:05:31 -0800708 fastdecode_docopy(d, ptr, size, 32, buf, 0, dst); \
Adam Cozzette501ecec2023-09-26 14:36:20 -0700709 } else if (UPB_LIKELY(size <= 64)) { \
710 if (UPB_UNLIKELY(common_has < 64)) goto longstr; \
Mike Kruskal8699fde2023-11-10 17:05:31 -0800711 fastdecode_docopy(d, ptr, size, 64, buf, 0, dst); \
Adam Cozzette501ecec2023-09-26 14:36:20 -0700712 } else if (UPB_LIKELY(size < 128)) { \
713 if (UPB_UNLIKELY(common_has < 128)) goto longstr; \
Mike Kruskal8699fde2023-11-10 17:05:31 -0800714 fastdecode_docopy(d, ptr, size, 128, buf, 0, dst); \
Adam Cozzette501ecec2023-09-26 14:36:20 -0700715 } else { \
716 goto longstr; \
717 } \
718 \
719 ptr += size; \
720 \
721 if (card == CARD_r) { \
722 if (validate_utf8 && \
723 !_upb_Decoder_VerifyUtf8Inline(dst->data, dst->size)) { \
724 _upb_FastDecoder_ErrorJmp(d, kUpb_DecodeStatus_BadUtf8); \
725 } \
726 fastdecode_nextret ret = fastdecode_nextrepeated( \
727 d, dst, &ptr, &farr, data, tagbytes, sizeof(upb_StringView)); \
728 switch (ret.next) { \
729 case FD_NEXT_SAMEFIELD: \
730 dst = ret.dst; \
731 goto again; \
732 case FD_NEXT_OTHERFIELD: \
733 data = ret.tag; \
734 UPB_MUSTTAIL return _upb_FastDecoder_TagDispatch(UPB_PARSE_ARGS); \
735 case FD_NEXT_ATLIMIT: \
736 return ptr; \
737 } \
738 } \
739 \
740 if (card != CARD_r && validate_utf8) { \
741 data = (uint64_t)dst; \
742 UPB_MUSTTAIL return fastdecode_verifyutf8(UPB_PARSE_ARGS); \
743 } \
744 \
745 UPB_MUSTTAIL return fastdecode_dispatch(UPB_PARSE_ARGS); \
746 \
747 longstr: \
748 if (card == CARD_r) { \
749 fastdecode_commitarr(dst + 1, &farr, sizeof(upb_StringView)); \
750 } \
751 ptr--; \
752 if (validate_utf8) { \
753 UPB_MUSTTAIL return fastdecode_longstring_utf8(d, ptr, msg, table, \
754 hasbits, (uint64_t)dst); \
755 } else { \
756 UPB_MUSTTAIL return fastdecode_longstring_noutf8(d, ptr, msg, table, \
757 hasbits, (uint64_t)dst); \
758 }
759
760#define FASTDECODE_STRING(d, ptr, msg, table, hasbits, data, tagbytes, card, \
761 copyfunc, validate_utf8) \
762 upb_StringView* dst; \
763 fastdecode_arr farr; \
764 int64_t size; \
765 \
766 if (UPB_UNLIKELY(!fastdecode_checktag(data, tagbytes))) { \
767 RETURN_GENERIC("string field tag mismatch\n"); \
768 } \
769 \
770 if (UPB_UNLIKELY( \
771 !upb_EpsCopyInputStream_AliasingAvailable(&d->input, ptr, 0))) { \
772 UPB_MUSTTAIL return copyfunc(UPB_PARSE_ARGS); \
773 } \
774 \
775 dst = fastdecode_getfield(d, ptr, msg, &data, &hasbits, &farr, \
776 sizeof(upb_StringView), card); \
777 \
778 again: \
779 if (card == CARD_r) { \
780 dst = fastdecode_resizearr(d, dst, &farr, sizeof(upb_StringView)); \
781 } \
782 \
783 size = (int8_t)ptr[tagbytes]; \
784 ptr += tagbytes + 1; \
785 \
786 if (UPB_UNLIKELY( \
787 !upb_EpsCopyInputStream_AliasingAvailable(&d->input, ptr, size))) { \
788 ptr--; \
789 if (validate_utf8) { \
790 return fastdecode_longstring_utf8(d, ptr, msg, table, hasbits, \
791 (uint64_t)dst); \
792 } else { \
793 return fastdecode_longstring_noutf8(d, ptr, msg, table, hasbits, \
794 (uint64_t)dst); \
795 } \
796 } \
797 \
798 dst->data = ptr; \
799 dst->size = size; \
800 ptr = upb_EpsCopyInputStream_ReadStringAliased(&d->input, &dst->data, \
801 dst->size); \
802 \
803 if (card == CARD_r) { \
804 if (validate_utf8 && \
805 !_upb_Decoder_VerifyUtf8Inline(dst->data, dst->size)) { \
806 _upb_FastDecoder_ErrorJmp(d, kUpb_DecodeStatus_BadUtf8); \
807 } \
808 fastdecode_nextret ret = fastdecode_nextrepeated( \
809 d, dst, &ptr, &farr, data, tagbytes, sizeof(upb_StringView)); \
810 switch (ret.next) { \
811 case FD_NEXT_SAMEFIELD: \
812 dst = ret.dst; \
813 goto again; \
814 case FD_NEXT_OTHERFIELD: \
815 data = ret.tag; \
816 UPB_MUSTTAIL return _upb_FastDecoder_TagDispatch(UPB_PARSE_ARGS); \
817 case FD_NEXT_ATLIMIT: \
818 return ptr; \
819 } \
820 } \
821 \
822 if (card != CARD_r && validate_utf8) { \
823 data = (uint64_t)dst; \
824 UPB_MUSTTAIL return fastdecode_verifyutf8(UPB_PARSE_ARGS); \
825 } \
826 \
827 UPB_MUSTTAIL return fastdecode_dispatch(UPB_PARSE_ARGS);
828
829/* Generate all combinations:
830 * {p,c} x {s,o,r} x {s, b} x {1bt,2bt} */
831
832#define s_VALIDATE true
833#define b_VALIDATE false
834
835#define F(card, tagbytes, type) \
836 UPB_NOINLINE \
837 const char* upb_c##card##type##_##tagbytes##bt(UPB_PARSE_PARAMS) { \
838 FASTDECODE_COPYSTRING(d, ptr, msg, table, hasbits, data, tagbytes, \
839 CARD_##card, type##_VALIDATE); \
840 } \
841 const char* upb_p##card##type##_##tagbytes##bt(UPB_PARSE_PARAMS) { \
842 FASTDECODE_STRING(d, ptr, msg, table, hasbits, data, tagbytes, \
843 CARD_##card, upb_c##card##type##_##tagbytes##bt, \
844 type##_VALIDATE); \
845 }
846
847#define UTF8(card, tagbytes) \
848 F(card, tagbytes, s) \
849 F(card, tagbytes, b)
850
851#define TAGBYTES(card) \
852 UTF8(card, 1) \
853 UTF8(card, 2)
854
855TAGBYTES(s)
856TAGBYTES(o)
857TAGBYTES(r)
858
859#undef s_VALIDATE
860#undef b_VALIDATE
861#undef F
862#undef TAGBYTES
863#undef FASTDECODE_LONGSTRING
864#undef FASTDECODE_COPYSTRING
865#undef FASTDECODE_STRING
866
867/* message fields *************************************************************/
868
869UPB_INLINE
870upb_Message* decode_newmsg_ceil(upb_Decoder* d, const upb_MiniTable* l,
871 int msg_ceil_bytes) {
872 size_t size = l->size + sizeof(upb_Message_Internal);
873 char* msg_data;
874 if (UPB_LIKELY(msg_ceil_bytes > 0 &&
875 _upb_ArenaHas(&d->arena) >= msg_ceil_bytes)) {
876 UPB_ASSERT(size <= (size_t)msg_ceil_bytes);
877 msg_data = d->arena.head.ptr;
878 d->arena.head.ptr += size;
879 UPB_UNPOISON_MEMORY_REGION(msg_data, msg_ceil_bytes);
880 memset(msg_data, 0, msg_ceil_bytes);
881 UPB_POISON_MEMORY_REGION(msg_data + size, msg_ceil_bytes - size);
882 } else {
883 msg_data = (char*)upb_Arena_Malloc(&d->arena, size);
884 memset(msg_data, 0, size);
885 }
886 return msg_data + sizeof(upb_Message_Internal);
887}
888
889typedef struct {
890 intptr_t table;
891 upb_Message* msg;
892} fastdecode_submsgdata;
893
894UPB_FORCEINLINE
895static const char* fastdecode_tosubmsg(upb_EpsCopyInputStream* e,
896 const char* ptr, void* ctx) {
897 upb_Decoder* d = (upb_Decoder*)e;
898 fastdecode_submsgdata* submsg = ctx;
899 ptr = fastdecode_dispatch(d, ptr, submsg->msg, submsg->table, 0, 0);
900 UPB_ASSUME(ptr != NULL);
901 return ptr;
902}
903
904#define FASTDECODE_SUBMSG(d, ptr, msg, table, hasbits, data, tagbytes, \
905 msg_ceil_bytes, card) \
906 \
907 if (UPB_UNLIKELY(!fastdecode_checktag(data, tagbytes))) { \
908 RETURN_GENERIC("submessage field tag mismatch\n"); \
909 } \
910 \
911 if (--d->depth == 0) { \
912 _upb_FastDecoder_ErrorJmp(d, kUpb_DecodeStatus_MaxDepthExceeded); \
913 } \
914 \
915 upb_Message** dst; \
916 uint32_t submsg_idx = (data >> 16) & 0xff; \
917 const upb_MiniTable* tablep = decode_totablep(table); \
Eric Salod0c85632023-11-28 20:50:48 -0800918 const upb_MiniTable* subtablep = \
919 upb_MiniTableSub_Message(tablep->subs[submsg_idx]); \
Adam Cozzette501ecec2023-09-26 14:36:20 -0700920 fastdecode_submsgdata submsg = {decode_totable(subtablep)}; \
921 fastdecode_arr farr; \
922 \
923 if (subtablep->table_mask == (uint8_t)-1) { \
Mike Kruskal8699fde2023-11-10 17:05:31 -0800924 d->depth++; \
Adam Cozzette501ecec2023-09-26 14:36:20 -0700925 RETURN_GENERIC("submessage doesn't have fast tables."); \
926 } \
927 \
928 dst = fastdecode_getfield(d, ptr, msg, &data, &hasbits, &farr, \
929 sizeof(upb_Message*), card); \
930 \
931 if (card == CARD_s) { \
932 *(uint32_t*)msg |= hasbits; \
933 hasbits = 0; \
934 } \
935 \
936 again: \
937 if (card == CARD_r) { \
938 dst = fastdecode_resizearr(d, dst, &farr, sizeof(upb_Message*)); \
939 } \
940 \
941 submsg.msg = *dst; \
942 \
943 if (card == CARD_r || UPB_LIKELY(!submsg.msg)) { \
944 *dst = submsg.msg = decode_newmsg_ceil(d, subtablep, msg_ceil_bytes); \
945 } \
946 \
947 ptr += tagbytes; \
948 ptr = fastdecode_delimited(d, ptr, fastdecode_tosubmsg, &submsg); \
949 \
950 if (UPB_UNLIKELY(ptr == NULL || d->end_group != DECODE_NOGROUP)) { \
951 _upb_FastDecoder_ErrorJmp(d, kUpb_DecodeStatus_Malformed); \
952 } \
953 \
954 if (card == CARD_r) { \
955 fastdecode_nextret ret = fastdecode_nextrepeated( \
956 d, dst, &ptr, &farr, data, tagbytes, sizeof(upb_Message*)); \
957 switch (ret.next) { \
958 case FD_NEXT_SAMEFIELD: \
959 dst = ret.dst; \
960 goto again; \
961 case FD_NEXT_OTHERFIELD: \
962 d->depth++; \
963 data = ret.tag; \
964 UPB_MUSTTAIL return _upb_FastDecoder_TagDispatch(UPB_PARSE_ARGS); \
965 case FD_NEXT_ATLIMIT: \
966 d->depth++; \
967 return ptr; \
968 } \
969 } \
970 \
971 d->depth++; \
972 UPB_MUSTTAIL return fastdecode_dispatch(UPB_PARSE_ARGS);
973
974#define F(card, tagbytes, size_ceil, ceil_arg) \
975 const char* upb_p##card##m_##tagbytes##bt_max##size_ceil##b( \
976 UPB_PARSE_PARAMS) { \
977 FASTDECODE_SUBMSG(d, ptr, msg, table, hasbits, data, tagbytes, ceil_arg, \
978 CARD_##card); \
979 }
980
981#define SIZES(card, tagbytes) \
982 F(card, tagbytes, 64, 64) \
983 F(card, tagbytes, 128, 128) \
984 F(card, tagbytes, 192, 192) \
985 F(card, tagbytes, 256, 256) \
986 F(card, tagbytes, max, -1)
987
988#define TAGBYTES(card) \
989 SIZES(card, 1) \
990 SIZES(card, 2)
991
992TAGBYTES(s)
993TAGBYTES(o)
994TAGBYTES(r)
995
996#undef TAGBYTES
997#undef SIZES
998#undef F
999#undef FASTDECODE_SUBMSG
1000
1001#endif /* UPB_FASTTABLE */