blob: d50bd99f9c421b9a41cb910e71cfec6dd23c8cb4 [file] [log] [blame]
Joshua Haberman9abf6e22021-01-13 12:16:25 -08001/* Amalgamated source file */
2#include "ruby-upb.h"
3/*
Joshua Habermandd69a482021-05-17 22:40:33 -07004 * Copyright (c) 2009-2021, Google LLC
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions are met:
9 * * Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * * Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 * * Neither the name of Google LLC nor the
15 * names of its contributors may be used to endorse or promote products
16 * derived from this software without specific prior written permission.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
19 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
20 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
21 * DISCLAIMED. IN NO EVENT SHALL Google LLC BE LIABLE FOR ANY
22 * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
23 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
24 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
25 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
27 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 */
29
30/*
31 * This is where we define macros used across upb.
32 *
33 * All of these macros are undef'd in port_undef.inc to avoid leaking them to
34 * users.
35 *
36 * The correct usage is:
37 *
38 * #include "upb/foobar.h"
39 * #include "upb/baz.h"
40 *
41 * // MUST be last included header.
42 * #include "upb/port_def.inc"
43 *
44 * // Code for this file.
45 * // <...>
46 *
47 * // Can be omitted for .c files, required for .h.
48 * #include "upb/port_undef.inc"
49 *
50 * This file is private and must not be included by users!
51 */
Joshua Haberman9abf6e22021-01-13 12:16:25 -080052
53#if !((defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L) || \
54 (defined(__cplusplus) && __cplusplus >= 201103L) || \
55 (defined(_MSC_VER) && _MSC_VER >= 1900))
56#error upb requires C99 or C++11 or MSVC >= 2015.
57#endif
58
59#include <stdint.h>
60#include <stddef.h>
61
62#if UINTPTR_MAX == 0xffffffff
63#define UPB_SIZE(size32, size64) size32
64#else
65#define UPB_SIZE(size32, size64) size64
66#endif
67
68/* If we always read/write as a consistent type to each address, this shouldn't
69 * violate aliasing.
70 */
71#define UPB_PTR_AT(msg, ofs, type) ((type*)((char*)(msg) + (ofs)))
72
73#define UPB_READ_ONEOF(msg, fieldtype, offset, case_offset, case_val, default) \
74 *UPB_PTR_AT(msg, case_offset, int) == case_val \
75 ? *UPB_PTR_AT(msg, offset, fieldtype) \
76 : default
77
78#define UPB_WRITE_ONEOF(msg, fieldtype, offset, value, case_offset, case_val) \
79 *UPB_PTR_AT(msg, case_offset, int) = case_val; \
80 *UPB_PTR_AT(msg, offset, fieldtype) = value;
81
82#define UPB_MAPTYPE_STRING 0
83
84/* UPB_INLINE: inline if possible, emit standalone code if required. */
85#ifdef __cplusplus
86#define UPB_INLINE inline
87#elif defined (__GNUC__) || defined(__clang__)
88#define UPB_INLINE static __inline__
89#else
90#define UPB_INLINE static
91#endif
92
93#define UPB_ALIGN_UP(size, align) (((size) + (align) - 1) / (align) * (align))
94#define UPB_ALIGN_DOWN(size, align) ((size) / (align) * (align))
95#define UPB_ALIGN_MALLOC(size) UPB_ALIGN_UP(size, 16)
96#define UPB_ALIGN_OF(type) offsetof (struct { char c; type member; }, member)
97
98/* Hints to the compiler about likely/unlikely branches. */
99#if defined (__GNUC__) || defined(__clang__)
100#define UPB_LIKELY(x) __builtin_expect((x),1)
101#define UPB_UNLIKELY(x) __builtin_expect((x),0)
102#else
103#define UPB_LIKELY(x) (x)
104#define UPB_UNLIKELY(x) (x)
105#endif
106
107/* Macros for function attributes on compilers that support them. */
108#ifdef __GNUC__
109#define UPB_FORCEINLINE __inline__ __attribute__((always_inline))
110#define UPB_NOINLINE __attribute__((noinline))
111#define UPB_NORETURN __attribute__((__noreturn__))
112#define UPB_PRINTF(str, first_vararg) __attribute__((format (printf, str, first_vararg)))
113#elif defined(_MSC_VER)
114#define UPB_NOINLINE
115#define UPB_FORCEINLINE
116#define UPB_NORETURN __declspec(noreturn)
117#define UPB_PRINTF(str, first_vararg)
118#else /* !defined(__GNUC__) */
119#define UPB_FORCEINLINE
120#define UPB_NOINLINE
121#define UPB_NORETURN
122#define UPB_PRINTF(str, first_vararg)
123#endif
124
125#define UPB_MAX(x, y) ((x) > (y) ? (x) : (y))
126#define UPB_MIN(x, y) ((x) < (y) ? (x) : (y))
127
128#define UPB_UNUSED(var) (void)var
129
130/* UPB_ASSUME(): in release mode, we tell the compiler to assume this is true.
131 */
132#ifdef NDEBUG
133#ifdef __GNUC__
134#define UPB_ASSUME(expr) if (!(expr)) __builtin_unreachable()
135#elif defined _MSC_VER
136#define UPB_ASSUME(expr) if (!(expr)) __assume(0)
137#else
138#define UPB_ASSUME(expr) do {} while (false && (expr))
139#endif
140#else
141#define UPB_ASSUME(expr) assert(expr)
142#endif
143
144/* UPB_ASSERT(): in release mode, we use the expression without letting it be
145 * evaluated. This prevents "unused variable" warnings. */
146#ifdef NDEBUG
147#define UPB_ASSERT(expr) do {} while (false && (expr))
148#else
149#define UPB_ASSERT(expr) assert(expr)
150#endif
151
152#if defined(__GNUC__) || defined(__clang__)
153#define UPB_UNREACHABLE() do { assert(0); __builtin_unreachable(); } while(0)
154#else
155#define UPB_UNREACHABLE() do { assert(0); } while(0)
156#endif
157
158/* UPB_SETJMP() / UPB_LONGJMP(): avoid setting/restoring signal mask. */
159#ifdef __APPLE__
160#define UPB_SETJMP(buf) _setjmp(buf)
161#define UPB_LONGJMP(buf, val) _longjmp(buf, val)
162#else
163#define UPB_SETJMP(buf) setjmp(buf)
164#define UPB_LONGJMP(buf, val) longjmp(buf, val)
165#endif
166
Joshua Habermandd69a482021-05-17 22:40:33 -0700167/* UPB_PTRADD(ptr, ofs): add pointer while avoiding "NULL + 0" UB */
168#define UPB_PTRADD(ptr, ofs) ((ofs) ? (ptr) + (ofs) : (ptr))
169
Joshua Haberman9abf6e22021-01-13 12:16:25 -0800170/* Configure whether fasttable is switched on or not. *************************/
171
Joshua Habermandd69a482021-05-17 22:40:33 -0700172#ifdef __has_attribute
173#define UPB_HAS_ATTRIBUTE(x) __has_attribute(x)
174#else
175#define UPB_HAS_ATTRIBUTE(x) 0
176#endif
177
178#if UPB_HAS_ATTRIBUTE(musttail)
179#define UPB_MUSTTAIL __attribute__((musttail))
180#else
181#define UPB_MUSTTAIL
182#endif
183
184#undef UPB_HAS_ATTRIBUTE
185
186/* This check is not fully robust: it does not require that we have "musttail"
187 * support available. We need tail calls to avoid consuming arbitrary amounts
188 * of stack space.
189 *
190 * GCC/Clang can mostly be trusted to generate tail calls as long as
191 * optimization is enabled, but, debug builds will not generate tail calls
192 * unless "musttail" is available.
193 *
194 * We should probably either:
195 * 1. require that the compiler supports musttail.
196 * 2. add some fallback code for when musttail isn't available (ie. return
197 * instead of tail calling). This is safe and portable, but this comes at
198 * a CPU cost.
199 */
200#if (defined(__x86_64__) || defined(__aarch64__)) && defined(__GNUC__)
Joshua Haberman9abf6e22021-01-13 12:16:25 -0800201#define UPB_FASTTABLE_SUPPORTED 1
202#else
203#define UPB_FASTTABLE_SUPPORTED 0
204#endif
205
206/* define UPB_ENABLE_FASTTABLE to force fast table support.
207 * This is useful when we want to ensure we are really getting fasttable,
208 * for example for testing or benchmarking. */
209#if defined(UPB_ENABLE_FASTTABLE)
210#if !UPB_FASTTABLE_SUPPORTED
Joshua Habermandd69a482021-05-17 22:40:33 -0700211#error fasttable is x86-64/ARM64 only and requires GCC or Clang.
Joshua Haberman9abf6e22021-01-13 12:16:25 -0800212#endif
213#define UPB_FASTTABLE 1
214/* Define UPB_TRY_ENABLE_FASTTABLE to use fasttable if possible.
215 * This is useful for releasing code that might be used on multiple platforms,
216 * for example the PHP or Ruby C extensions. */
217#elif defined(UPB_TRY_ENABLE_FASTTABLE)
218#define UPB_FASTTABLE UPB_FASTTABLE_SUPPORTED
219#else
220#define UPB_FASTTABLE 0
221#endif
222
223/* UPB_FASTTABLE_INIT() allows protos compiled for fasttable to gracefully
224 * degrade to non-fasttable if we are using UPB_TRY_ENABLE_FASTTABLE. */
225#if !UPB_FASTTABLE && defined(UPB_TRY_ENABLE_FASTTABLE)
226#define UPB_FASTTABLE_INIT(...)
227#else
228#define UPB_FASTTABLE_INIT(...) __VA_ARGS__
229#endif
230
231#undef UPB_FASTTABLE_SUPPORTED
232
233/* ASAN poisoning (for arena) *************************************************/
234
235#if defined(__SANITIZE_ADDRESS__)
236#define UPB_ASAN 1
237#ifdef __cplusplus
238extern "C" {
239#endif
240void __asan_poison_memory_region(void const volatile *addr, size_t size);
241void __asan_unpoison_memory_region(void const volatile *addr, size_t size);
242#ifdef __cplusplus
243} /* extern "C" */
244#endif
245#define UPB_POISON_MEMORY_REGION(addr, size) \
246 __asan_poison_memory_region((addr), (size))
247#define UPB_UNPOISON_MEMORY_REGION(addr, size) \
248 __asan_unpoison_memory_region((addr), (size))
249#else
250#define UPB_ASAN 0
251#define UPB_POISON_MEMORY_REGION(addr, size) \
252 ((void)(addr), (void)(size))
253#define UPB_UNPOISON_MEMORY_REGION(addr, size) \
254 ((void)(addr), (void)(size))
Joshua Habermandd69a482021-05-17 22:40:33 -0700255#endif
Joshua Haberman9abf6e22021-01-13 12:16:25 -0800256
Joshua Haberman7ecf43f2022-03-14 13:11:29 -0700257/* Disable proto2 arena behavior (TEMPORARY) **********************************/
258
259#ifdef UPB_DISABLE_PROTO2_ENUM_CHECKING
260#define UPB_TREAT_PROTO2_ENUMS_LIKE_PROTO3 1
261#else
262#define UPB_TREAT_PROTO2_ENUMS_LIKE_PROTO3 0
263#endif
264
Joshua Habermandd69a482021-05-17 22:40:33 -0700265/** upb/decode.c ************************************************************/
Joshua Haberman9abf6e22021-01-13 12:16:25 -0800266
267#include <setjmp.h>
268#include <string.h>
269
270
271/* Must be last. */
272
273/* Maps descriptor type -> elem_size_lg2. */
274static const uint8_t desctype_to_elem_size_lg2[] = {
Joshua Habermanf41049a2022-01-21 14:41:25 -0800275 -1, /* invalid descriptor type */
276 3, /* DOUBLE */
277 2, /* FLOAT */
278 3, /* INT64 */
279 3, /* UINT64 */
280 2, /* INT32 */
281 3, /* FIXED64 */
282 2, /* FIXED32 */
283 0, /* BOOL */
284 UPB_SIZE(3, 4), /* STRING */
285 UPB_SIZE(2, 3), /* GROUP */
286 UPB_SIZE(2, 3), /* MESSAGE */
287 UPB_SIZE(3, 4), /* BYTES */
288 2, /* UINT32 */
289 2, /* ENUM */
290 2, /* SFIXED32 */
291 3, /* SFIXED64 */
292 2, /* SINT32 */
293 3, /* SINT64 */
Joshua Haberman9abf6e22021-01-13 12:16:25 -0800294};
295
296/* Maps descriptor type -> upb map size. */
297static const uint8_t desctype_to_mapsize[] = {
298 -1, /* invalid descriptor type */
299 8, /* DOUBLE */
300 4, /* FLOAT */
301 8, /* INT64 */
302 8, /* UINT64 */
303 4, /* INT32 */
304 8, /* FIXED64 */
305 4, /* FIXED32 */
306 1, /* BOOL */
307 UPB_MAPTYPE_STRING, /* STRING */
Joshua Habermanf41049a2022-01-21 14:41:25 -0800308 sizeof(void*), /* GROUP */
309 sizeof(void*), /* MESSAGE */
Joshua Haberman9abf6e22021-01-13 12:16:25 -0800310 UPB_MAPTYPE_STRING, /* BYTES */
311 4, /* UINT32 */
312 4, /* ENUM */
313 4, /* SFIXED32 */
314 8, /* SFIXED64 */
315 4, /* SINT32 */
316 8, /* SINT64 */
317};
318
Joshua Habermanf41049a2022-01-21 14:41:25 -0800319static const unsigned FIXED32_OK_MASK = (1 << kUpb_FieldType_Float) |
320 (1 << kUpb_FieldType_Fixed32) |
321 (1 << kUpb_FieldType_SFixed32);
Joshua Haberman9abf6e22021-01-13 12:16:25 -0800322
Joshua Habermanf41049a2022-01-21 14:41:25 -0800323static const unsigned FIXED64_OK_MASK = (1 << kUpb_FieldType_Double) |
324 (1 << kUpb_FieldType_Fixed64) |
325 (1 << kUpb_FieldType_SFixed64);
326
327/* Three fake field types for MessageSet. */
328#define TYPE_MSGSET_ITEM 19
329#define TYPE_MSGSET_TYPE_ID 20
330#define TYPE_COUNT 20
Joshua Haberman9abf6e22021-01-13 12:16:25 -0800331
332/* Op: an action to be performed for a wire-type/field-type combination. */
Joshua Habermanf41049a2022-01-21 14:41:25 -0800333#define OP_UNKNOWN -1 /* Unknown field. */
334#define OP_MSGSET_ITEM -2
335#define OP_MSGSET_TYPEID -3
336#define OP_SCALAR_LG2(n) (n) /* n in [0, 2, 3] => op in [0, 2, 3] */
337#define OP_ENUM 1
Joshua Haberman9abf6e22021-01-13 12:16:25 -0800338#define OP_STRING 4
339#define OP_BYTES 5
340#define OP_SUBMSG 6
Joshua Habermanf41049a2022-01-21 14:41:25 -0800341/* Scalar fields use only ops above. Repeated fields can use any op. */
342#define OP_FIXPCK_LG2(n) (n + 5) /* n in [2, 3] => op in [7, 8] */
343#define OP_VARPCK_LG2(n) (n + 9) /* n in [0, 2, 3] => op in [9, 11, 12] */
344#define OP_PACKED_ENUM 13
Joshua Haberman9abf6e22021-01-13 12:16:25 -0800345
Joshua Habermanf41049a2022-01-21 14:41:25 -0800346static const int8_t varint_ops[] = {
347 OP_UNKNOWN, /* field not found */
348 OP_UNKNOWN, /* DOUBLE */
349 OP_UNKNOWN, /* FLOAT */
Joshua Haberman9abf6e22021-01-13 12:16:25 -0800350 OP_SCALAR_LG2(3), /* INT64 */
351 OP_SCALAR_LG2(3), /* UINT64 */
352 OP_SCALAR_LG2(2), /* INT32 */
Joshua Habermanf41049a2022-01-21 14:41:25 -0800353 OP_UNKNOWN, /* FIXED64 */
354 OP_UNKNOWN, /* FIXED32 */
Joshua Haberman9abf6e22021-01-13 12:16:25 -0800355 OP_SCALAR_LG2(0), /* BOOL */
Joshua Habermanf41049a2022-01-21 14:41:25 -0800356 OP_UNKNOWN, /* STRING */
357 OP_UNKNOWN, /* GROUP */
358 OP_UNKNOWN, /* MESSAGE */
359 OP_UNKNOWN, /* BYTES */
Joshua Haberman9abf6e22021-01-13 12:16:25 -0800360 OP_SCALAR_LG2(2), /* UINT32 */
Joshua Habermanf41049a2022-01-21 14:41:25 -0800361 OP_ENUM, /* ENUM */
362 OP_UNKNOWN, /* SFIXED32 */
363 OP_UNKNOWN, /* SFIXED64 */
Joshua Haberman9abf6e22021-01-13 12:16:25 -0800364 OP_SCALAR_LG2(2), /* SINT32 */
365 OP_SCALAR_LG2(3), /* SINT64 */
Joshua Habermanf41049a2022-01-21 14:41:25 -0800366 OP_UNKNOWN, /* MSGSET_ITEM */
367 OP_MSGSET_TYPEID, /* MSGSET TYPEID */
Joshua Haberman9abf6e22021-01-13 12:16:25 -0800368};
369
Joshua Habermanf41049a2022-01-21 14:41:25 -0800370static const int8_t delim_ops[] = {
Joshua Haberman9abf6e22021-01-13 12:16:25 -0800371 /* For non-repeated field type. */
Joshua Habermanf41049a2022-01-21 14:41:25 -0800372 OP_UNKNOWN, /* field not found */
373 OP_UNKNOWN, /* DOUBLE */
374 OP_UNKNOWN, /* FLOAT */
375 OP_UNKNOWN, /* INT64 */
376 OP_UNKNOWN, /* UINT64 */
377 OP_UNKNOWN, /* INT32 */
378 OP_UNKNOWN, /* FIXED64 */
379 OP_UNKNOWN, /* FIXED32 */
380 OP_UNKNOWN, /* BOOL */
381 OP_STRING, /* STRING */
382 OP_UNKNOWN, /* GROUP */
383 OP_SUBMSG, /* MESSAGE */
384 OP_BYTES, /* BYTES */
385 OP_UNKNOWN, /* UINT32 */
386 OP_UNKNOWN, /* ENUM */
387 OP_UNKNOWN, /* SFIXED32 */
388 OP_UNKNOWN, /* SFIXED64 */
389 OP_UNKNOWN, /* SINT32 */
390 OP_UNKNOWN, /* SINT64 */
391 OP_UNKNOWN, /* MSGSET_ITEM */
392 OP_UNKNOWN, /* MSGSET TYPEID */
Joshua Haberman9abf6e22021-01-13 12:16:25 -0800393 /* For repeated field type. */
394 OP_FIXPCK_LG2(3), /* REPEATED DOUBLE */
395 OP_FIXPCK_LG2(2), /* REPEATED FLOAT */
396 OP_VARPCK_LG2(3), /* REPEATED INT64 */
397 OP_VARPCK_LG2(3), /* REPEATED UINT64 */
398 OP_VARPCK_LG2(2), /* REPEATED INT32 */
399 OP_FIXPCK_LG2(3), /* REPEATED FIXED64 */
400 OP_FIXPCK_LG2(2), /* REPEATED FIXED32 */
401 OP_VARPCK_LG2(0), /* REPEATED BOOL */
402 OP_STRING, /* REPEATED STRING */
403 OP_SUBMSG, /* REPEATED GROUP */
404 OP_SUBMSG, /* REPEATED MESSAGE */
405 OP_BYTES, /* REPEATED BYTES */
406 OP_VARPCK_LG2(2), /* REPEATED UINT32 */
Joshua Habermanf41049a2022-01-21 14:41:25 -0800407 OP_PACKED_ENUM, /* REPEATED ENUM */
Joshua Haberman9abf6e22021-01-13 12:16:25 -0800408 OP_FIXPCK_LG2(2), /* REPEATED SFIXED32 */
409 OP_FIXPCK_LG2(3), /* REPEATED SFIXED64 */
410 OP_VARPCK_LG2(2), /* REPEATED SINT32 */
411 OP_VARPCK_LG2(3), /* REPEATED SINT64 */
Joshua Habermanf41049a2022-01-21 14:41:25 -0800412 /* Omitting MSGSET_*, because we never emit a repeated msgset type */
Joshua Haberman9abf6e22021-01-13 12:16:25 -0800413};
414
415typedef union {
416 bool bool_val;
417 uint32_t uint32_val;
418 uint64_t uint64_val;
419 uint32_t size;
420} wireval;
421
Joshua Habermanf41049a2022-01-21 14:41:25 -0800422static const char* decode_msg(upb_Decoder* d, const char* ptr, upb_Message* msg,
423 const upb_MiniTable* layout);
Joshua Haberman9abf6e22021-01-13 12:16:25 -0800424
Joshua Habermanf41049a2022-01-21 14:41:25 -0800425UPB_NORETURN static void* decode_err(upb_Decoder* d, upb_DecodeStatus status) {
426 assert(status != kUpb_DecodeStatus_Ok);
427 UPB_LONGJMP(d->err, status);
428}
Joshua Haberman9abf6e22021-01-13 12:16:25 -0800429
Joshua Habermanf41049a2022-01-21 14:41:25 -0800430const char* fastdecode_err(upb_Decoder* d, int status) {
431 assert(status != kUpb_DecodeStatus_Ok);
432 UPB_LONGJMP(d->err, status);
Joshua Haberman9abf6e22021-01-13 12:16:25 -0800433 return NULL;
434}
Joshua Habermanf41049a2022-01-21 14:41:25 -0800435static void decode_verifyutf8(upb_Decoder* d, const char* buf, int len) {
436 if (!decode_verifyutf8_inl(buf, len))
437 decode_err(d, kUpb_DecodeStatus_BadUtf8);
Joshua Haberman9abf6e22021-01-13 12:16:25 -0800438}
439
Joshua Habermanf41049a2022-01-21 14:41:25 -0800440static bool decode_reserve(upb_Decoder* d, upb_Array* arr, size_t elem) {
Joshua Haberman9abf6e22021-01-13 12:16:25 -0800441 bool need_realloc = arr->size - arr->len < elem;
442 if (need_realloc && !_upb_array_realloc(arr, arr->len + elem, &d->arena)) {
Joshua Habermanf41049a2022-01-21 14:41:25 -0800443 decode_err(d, kUpb_DecodeStatus_OutOfMemory);
Joshua Haberman9abf6e22021-01-13 12:16:25 -0800444 }
445 return need_realloc;
446}
447
448typedef struct {
Joshua Habermanf41049a2022-01-21 14:41:25 -0800449 const char* ptr;
Joshua Haberman9abf6e22021-01-13 12:16:25 -0800450 uint64_t val;
451} decode_vret;
452
453UPB_NOINLINE
Joshua Habermanf41049a2022-01-21 14:41:25 -0800454static decode_vret decode_longvarint64(const char* ptr, uint64_t val) {
Joshua Haberman9abf6e22021-01-13 12:16:25 -0800455 decode_vret ret = {NULL, 0};
456 uint64_t byte;
457 int i;
458 for (i = 1; i < 10; i++) {
459 byte = (uint8_t)ptr[i];
460 val += (byte - 1) << (i * 7);
461 if (!(byte & 0x80)) {
462 ret.ptr = ptr + i + 1;
463 ret.val = val;
464 return ret;
465 }
466 }
467 return ret;
468}
469
470UPB_FORCEINLINE
Joshua Habermanf41049a2022-01-21 14:41:25 -0800471static const char* decode_varint64(upb_Decoder* d, const char* ptr,
472 uint64_t* val) {
Joshua Haberman9abf6e22021-01-13 12:16:25 -0800473 uint64_t byte = (uint8_t)*ptr;
474 if (UPB_LIKELY((byte & 0x80) == 0)) {
475 *val = byte;
476 return ptr + 1;
477 } else {
478 decode_vret res = decode_longvarint64(ptr, byte);
Joshua Habermanf41049a2022-01-21 14:41:25 -0800479 if (!res.ptr) return decode_err(d, kUpb_DecodeStatus_Malformed);
Joshua Haberman9abf6e22021-01-13 12:16:25 -0800480 *val = res.val;
481 return res.ptr;
482 }
483}
484
485UPB_FORCEINLINE
Joshua Habermanf41049a2022-01-21 14:41:25 -0800486static const char* decode_tag(upb_Decoder* d, const char* ptr, uint32_t* val) {
Joshua Haberman9abf6e22021-01-13 12:16:25 -0800487 uint64_t byte = (uint8_t)*ptr;
488 if (UPB_LIKELY((byte & 0x80) == 0)) {
489 *val = byte;
490 return ptr + 1;
491 } else {
Joshua Habermanf41049a2022-01-21 14:41:25 -0800492 const char* start = ptr;
Joshua Haberman9abf6e22021-01-13 12:16:25 -0800493 decode_vret res = decode_longvarint64(ptr, byte);
Joshua Habermanf41049a2022-01-21 14:41:25 -0800494 if (!res.ptr || res.ptr - start > 5 || res.val > UINT32_MAX) {
495 return decode_err(d, kUpb_DecodeStatus_Malformed);
496 }
Joshua Haberman9abf6e22021-01-13 12:16:25 -0800497 *val = res.val;
Joshua Habermanf41049a2022-01-21 14:41:25 -0800498 return res.ptr;
Joshua Haberman9abf6e22021-01-13 12:16:25 -0800499 }
500}
501
Joshua Habermanf41049a2022-01-21 14:41:25 -0800502static void decode_munge_int32(wireval* val) {
503 if (!_upb_IsLittleEndian()) {
504 /* The next stage will memcpy(dst, &val, 4) */
505 val->uint32_val = val->uint64_val;
506 }
507}
508
509static void decode_munge(int type, wireval* val) {
Joshua Haberman9abf6e22021-01-13 12:16:25 -0800510 switch (type) {
Joshua Habermanf41049a2022-01-21 14:41:25 -0800511 case kUpb_FieldType_Bool:
Joshua Haberman9abf6e22021-01-13 12:16:25 -0800512 val->bool_val = val->uint64_val != 0;
513 break;
Joshua Habermanf41049a2022-01-21 14:41:25 -0800514 case kUpb_FieldType_SInt32: {
515 uint32_t n = val->uint64_val;
Joshua Haberman9abf6e22021-01-13 12:16:25 -0800516 val->uint32_val = (n >> 1) ^ -(int32_t)(n & 1);
517 break;
518 }
Joshua Habermanf41049a2022-01-21 14:41:25 -0800519 case kUpb_FieldType_SInt64: {
Joshua Haberman9abf6e22021-01-13 12:16:25 -0800520 uint64_t n = val->uint64_val;
521 val->uint64_val = (n >> 1) ^ -(int64_t)(n & 1);
522 break;
523 }
Joshua Habermanf41049a2022-01-21 14:41:25 -0800524 case kUpb_FieldType_Int32:
525 case kUpb_FieldType_UInt32:
526 case kUpb_FieldType_Enum:
527 decode_munge_int32(val);
Joshua Haberman9abf6e22021-01-13 12:16:25 -0800528 break;
529 }
530}
531
Joshua Habermanf41049a2022-01-21 14:41:25 -0800532static upb_Message* decode_newsubmsg(upb_Decoder* d,
533 const upb_MiniTable_Sub* subs,
534 const upb_MiniTable_Field* field) {
535 const upb_MiniTable* subl = subs[field->submsg_index].submsg;
536 return _upb_Message_New_inl(subl, &d->arena);
Joshua Haberman9abf6e22021-01-13 12:16:25 -0800537}
538
539UPB_NOINLINE
Joshua Habermanf41049a2022-01-21 14:41:25 -0800540const char* decode_isdonefallback(upb_Decoder* d, const char* ptr,
Joshua Haberman9abf6e22021-01-13 12:16:25 -0800541 int overrun) {
Joshua Habermanf41049a2022-01-21 14:41:25 -0800542 int status;
543 ptr = decode_isdonefallback_inl(d, ptr, overrun, &status);
Joshua Haberman9abf6e22021-01-13 12:16:25 -0800544 if (ptr == NULL) {
Joshua Habermanf41049a2022-01-21 14:41:25 -0800545 return decode_err(d, status);
Joshua Haberman9abf6e22021-01-13 12:16:25 -0800546 }
547 return ptr;
548}
549
Joshua Habermanf41049a2022-01-21 14:41:25 -0800550static const char* decode_readstr(upb_Decoder* d, const char* ptr, int size,
551 upb_StringView* str) {
552 if (d->options & kUpb_DecodeOption_AliasString) {
Joshua Haberman9abf6e22021-01-13 12:16:25 -0800553 str->data = ptr;
554 } else {
Joshua Habermanf41049a2022-01-21 14:41:25 -0800555 char* data = upb_Arena_Malloc(&d->arena, size);
556 if (!data) return decode_err(d, kUpb_DecodeStatus_OutOfMemory);
Joshua Haberman9abf6e22021-01-13 12:16:25 -0800557 memcpy(data, ptr, size);
558 str->data = data;
559 }
560 str->size = size;
561 return ptr + size;
562}
563
564UPB_FORCEINLINE
Joshua Habermanf41049a2022-01-21 14:41:25 -0800565static const char* decode_tosubmsg2(upb_Decoder* d, const char* ptr,
566 upb_Message* submsg,
567 const upb_MiniTable* subl, int size) {
Joshua Haberman9abf6e22021-01-13 12:16:25 -0800568 int saved_delta = decode_pushlimit(d, ptr, size);
Joshua Habermanf41049a2022-01-21 14:41:25 -0800569 if (--d->depth < 0) return decode_err(d, kUpb_DecodeStatus_MaxDepthExceeded);
570 ptr = decode_msg(d, ptr, submsg, subl);
571 if (d->end_group != DECODE_NOGROUP)
572 return decode_err(d, kUpb_DecodeStatus_Malformed);
Joshua Haberman9abf6e22021-01-13 12:16:25 -0800573 decode_poplimit(d, ptr, saved_delta);
574 d->depth++;
575 return ptr;
576}
577
578UPB_FORCEINLINE
Joshua Habermanf41049a2022-01-21 14:41:25 -0800579static const char* decode_tosubmsg(upb_Decoder* d, const char* ptr,
580 upb_Message* submsg,
581 const upb_MiniTable_Sub* subs,
582 const upb_MiniTable_Field* field, int size) {
583 return decode_tosubmsg2(d, ptr, submsg, subs[field->submsg_index].submsg,
584 size);
585}
586
587UPB_FORCEINLINE
588static const char* decode_group(upb_Decoder* d, const char* ptr,
589 upb_Message* submsg, const upb_MiniTable* subl,
Joshua Haberman9abf6e22021-01-13 12:16:25 -0800590 uint32_t number) {
Joshua Habermanf41049a2022-01-21 14:41:25 -0800591 if (--d->depth < 0) return decode_err(d, kUpb_DecodeStatus_MaxDepthExceeded);
Joshua Haberman9abf6e22021-01-13 12:16:25 -0800592 if (decode_isdone(d, &ptr)) {
Joshua Habermanf41049a2022-01-21 14:41:25 -0800593 return decode_err(d, kUpb_DecodeStatus_Malformed);
Joshua Haberman9abf6e22021-01-13 12:16:25 -0800594 }
595 ptr = decode_msg(d, ptr, submsg, subl);
Joshua Habermanf41049a2022-01-21 14:41:25 -0800596 if (d->end_group != number) return decode_err(d, kUpb_DecodeStatus_Malformed);
Joshua Haberman9abf6e22021-01-13 12:16:25 -0800597 d->end_group = DECODE_NOGROUP;
598 d->depth++;
599 return ptr;
600}
601
602UPB_FORCEINLINE
Joshua Habermanf41049a2022-01-21 14:41:25 -0800603static const char* decode_togroup(upb_Decoder* d, const char* ptr,
604 upb_Message* submsg,
605 const upb_MiniTable_Sub* subs,
606 const upb_MiniTable_Field* field) {
607 const upb_MiniTable* subl = subs[field->submsg_index].submsg;
Joshua Haberman9abf6e22021-01-13 12:16:25 -0800608 return decode_group(d, ptr, submsg, subl, field->number);
609}
610
Joshua Habermanf41049a2022-01-21 14:41:25 -0800611static char* encode_varint32(uint32_t val, char* ptr) {
612 do {
613 uint8_t byte = val & 0x7fU;
614 val >>= 7;
615 if (val) byte |= 0x80U;
616 *(ptr++) = byte;
617 } while (val);
618 return ptr;
619}
620
Joshua Haberman7ecf43f2022-03-14 13:11:29 -0700621static void upb_Decode_AddUnknownVarints(upb_Decoder* d, upb_Message* msg,
622 uint32_t val1, uint32_t val2) {
623 char buf[20];
624 char* end = buf;
625 end = encode_varint32(val1, end);
626 end = encode_varint32(val2, end);
627
628 if (!_upb_Message_AddUnknown(msg, buf, end - buf, &d->arena)) {
629 decode_err(d, kUpb_DecodeStatus_OutOfMemory);
630 }
631}
632
Joshua Habermanf41049a2022-01-21 14:41:25 -0800633UPB_NOINLINE
634static bool decode_checkenum_slow(upb_Decoder* d, const char* ptr,
635 upb_Message* msg, const upb_MiniTable_Enum* e,
636 const upb_MiniTable_Field* field,
637 uint32_t v) {
638 // OPT: binary search long lists?
639 int n = e->value_count;
640 for (int i = 0; i < n; i++) {
641 if ((uint32_t)e->values[i] == v) return true;
642 }
643
644 // Unrecognized enum goes into unknown fields.
645 // For packed fields the tag could be arbitrarily far in the past, so we
Joshua Haberman7ecf43f2022-03-14 13:11:29 -0700646 // just re-encode the tag and value here.
Joshua Habermanf41049a2022-01-21 14:41:25 -0800647 uint32_t tag = ((uint32_t)field->number << 3) | kUpb_WireType_Varint;
Joshua Haberman7ecf43f2022-03-14 13:11:29 -0700648 upb_Decode_AddUnknownVarints(d, msg, tag, v);
Joshua Habermanf41049a2022-01-21 14:41:25 -0800649 return false;
650}
651
652UPB_FORCEINLINE
653static bool decode_checkenum(upb_Decoder* d, const char* ptr, upb_Message* msg,
654 const upb_MiniTable_Enum* e,
655 const upb_MiniTable_Field* field, wireval* val) {
656 uint32_t v = val->uint32_val;
657
658 if (UPB_LIKELY(v < 64) && UPB_LIKELY(((1ULL << v) & e->mask))) return true;
659
660 return decode_checkenum_slow(d, ptr, msg, e, field, v);
661}
662
663UPB_NOINLINE
664static const char* decode_enum_toarray(upb_Decoder* d, const char* ptr,
665 upb_Message* msg, upb_Array* arr,
666 const upb_MiniTable_Sub* subs,
667 const upb_MiniTable_Field* field,
668 wireval* val) {
669 const upb_MiniTable_Enum* e = subs[field->submsg_index].subenum;
670 if (!decode_checkenum(d, ptr, msg, e, field, val)) return ptr;
671 void* mem = UPB_PTR_AT(_upb_array_ptr(arr), arr->len * 4, void);
672 arr->len++;
673 memcpy(mem, val, 4);
674 return ptr;
675}
676
677UPB_FORCEINLINE
678static const char* decode_fixed_packed(upb_Decoder* d, const char* ptr,
679 upb_Array* arr, wireval* val,
680 const upb_MiniTable_Field* field,
681 int lg2) {
682 int mask = (1 << lg2) - 1;
683 size_t count = val->size >> lg2;
684 if ((val->size & mask) != 0) {
685 // Length isn't a round multiple of elem size.
686 return decode_err(d, kUpb_DecodeStatus_Malformed);
687 }
688 decode_reserve(d, arr, count);
689 void* mem = UPB_PTR_AT(_upb_array_ptr(arr), arr->len << lg2, void);
690 arr->len += count;
691 // Note: if/when the decoder supports multi-buffer input, we will need to
692 // handle buffer seams here.
693 if (_upb_IsLittleEndian()) {
694 memcpy(mem, ptr, val->size);
695 ptr += val->size;
696 } else {
697 const char* end = ptr + val->size;
698 char* dst = mem;
699 while (ptr < end) {
700 if (lg2 == 2) {
701 uint32_t val;
702 memcpy(&val, ptr, sizeof(val));
703 val = _upb_BigEndian_Swap32(val);
704 memcpy(dst, &val, sizeof(val));
705 } else {
706 UPB_ASSERT(lg2 == 3);
707 uint64_t val;
708 memcpy(&val, ptr, sizeof(val));
709 val = _upb_BigEndian_Swap64(val);
710 memcpy(dst, &val, sizeof(val));
711 }
712 ptr += 1 << lg2;
713 dst += 1 << lg2;
714 }
715 }
716
717 return ptr;
718}
719
720UPB_FORCEINLINE
721static const char* decode_varint_packed(upb_Decoder* d, const char* ptr,
722 upb_Array* arr, wireval* val,
723 const upb_MiniTable_Field* field,
724 int lg2) {
725 int scale = 1 << lg2;
726 int saved_limit = decode_pushlimit(d, ptr, val->size);
727 char* out = UPB_PTR_AT(_upb_array_ptr(arr), arr->len << lg2, void);
728 while (!decode_isdone(d, &ptr)) {
729 wireval elem;
730 ptr = decode_varint64(d, ptr, &elem.uint64_val);
731 decode_munge(field->descriptortype, &elem);
732 if (decode_reserve(d, arr, 1)) {
733 out = UPB_PTR_AT(_upb_array_ptr(arr), arr->len << lg2, void);
734 }
735 arr->len++;
736 memcpy(out, &elem, scale);
737 out += scale;
738 }
739 decode_poplimit(d, ptr, saved_limit);
740 return ptr;
741}
742
743UPB_NOINLINE
744static const char* decode_enum_packed(upb_Decoder* d, const char* ptr,
745 upb_Message* msg, upb_Array* arr,
746 const upb_MiniTable_Sub* subs,
747 const upb_MiniTable_Field* field,
748 wireval* val) {
749 const upb_MiniTable_Enum* e = subs[field->submsg_index].subenum;
750 int saved_limit = decode_pushlimit(d, ptr, val->size);
751 char* out = UPB_PTR_AT(_upb_array_ptr(arr), arr->len * 4, void);
752 while (!decode_isdone(d, &ptr)) {
753 wireval elem;
754 ptr = decode_varint64(d, ptr, &elem.uint64_val);
755 decode_munge_int32(&elem);
756 if (!decode_checkenum(d, ptr, msg, e, field, &elem)) {
757 continue;
758 }
759 if (decode_reserve(d, arr, 1)) {
760 out = UPB_PTR_AT(_upb_array_ptr(arr), arr->len * 4, void);
761 }
762 arr->len++;
763 memcpy(out, &elem, 4);
764 out += 4;
765 }
766 decode_poplimit(d, ptr, saved_limit);
767 return ptr;
768}
769
770static const char* decode_toarray(upb_Decoder* d, const char* ptr,
771 upb_Message* msg,
772 const upb_MiniTable_Sub* subs,
773 const upb_MiniTable_Field* field,
774 wireval* val, int op) {
775 upb_Array** arrp = UPB_PTR_AT(msg, field->offset, void);
776 upb_Array* arr = *arrp;
777 void* mem;
Joshua Haberman9abf6e22021-01-13 12:16:25 -0800778
779 if (arr) {
780 decode_reserve(d, arr, 1);
781 } else {
782 size_t lg2 = desctype_to_elem_size_lg2[field->descriptortype];
Joshua Habermanf41049a2022-01-21 14:41:25 -0800783 arr = _upb_Array_New(&d->arena, 4, lg2);
784 if (!arr) return decode_err(d, kUpb_DecodeStatus_OutOfMemory);
Joshua Haberman9abf6e22021-01-13 12:16:25 -0800785 *arrp = arr;
786 }
787
788 switch (op) {
789 case OP_SCALAR_LG2(0):
790 case OP_SCALAR_LG2(2):
791 case OP_SCALAR_LG2(3):
792 /* Append scalar value. */
793 mem = UPB_PTR_AT(_upb_array_ptr(arr), arr->len << op, void);
794 arr->len++;
Joshua Haberman9d578a32021-08-02 15:32:01 -0700795 memcpy(mem, val, 1 << op);
Joshua Haberman9abf6e22021-01-13 12:16:25 -0800796 return ptr;
797 case OP_STRING:
Joshua Haberman9d578a32021-08-02 15:32:01 -0700798 decode_verifyutf8(d, ptr, val->size);
Joshua Haberman9abf6e22021-01-13 12:16:25 -0800799 /* Fallthrough. */
800 case OP_BYTES: {
801 /* Append bytes. */
Joshua Habermanf41049a2022-01-21 14:41:25 -0800802 upb_StringView* str = (upb_StringView*)_upb_array_ptr(arr) + arr->len;
Joshua Haberman9abf6e22021-01-13 12:16:25 -0800803 arr->len++;
Joshua Haberman9d578a32021-08-02 15:32:01 -0700804 return decode_readstr(d, ptr, val->size, str);
Joshua Haberman9abf6e22021-01-13 12:16:25 -0800805 }
806 case OP_SUBMSG: {
807 /* Append submessage / group. */
Joshua Habermanf41049a2022-01-21 14:41:25 -0800808 upb_Message* submsg = decode_newsubmsg(d, subs, field);
809 *UPB_PTR_AT(_upb_array_ptr(arr), arr->len * sizeof(void*), upb_Message*) =
Joshua Haberman9abf6e22021-01-13 12:16:25 -0800810 submsg;
811 arr->len++;
Joshua Habermanf41049a2022-01-21 14:41:25 -0800812 if (UPB_UNLIKELY(field->descriptortype == kUpb_FieldType_Group)) {
813 return decode_togroup(d, ptr, submsg, subs, field);
Joshua Haberman9abf6e22021-01-13 12:16:25 -0800814 } else {
Joshua Habermanf41049a2022-01-21 14:41:25 -0800815 return decode_tosubmsg(d, ptr, submsg, subs, field, val->size);
Joshua Haberman9abf6e22021-01-13 12:16:25 -0800816 }
817 }
818 case OP_FIXPCK_LG2(2):
Joshua Habermanf41049a2022-01-21 14:41:25 -0800819 case OP_FIXPCK_LG2(3):
820 return decode_fixed_packed(d, ptr, arr, val, field,
821 op - OP_FIXPCK_LG2(0));
Joshua Haberman9abf6e22021-01-13 12:16:25 -0800822 case OP_VARPCK_LG2(0):
823 case OP_VARPCK_LG2(2):
Joshua Habermanf41049a2022-01-21 14:41:25 -0800824 case OP_VARPCK_LG2(3):
825 return decode_varint_packed(d, ptr, arr, val, field,
826 op - OP_VARPCK_LG2(0));
827 case OP_ENUM:
828 return decode_enum_toarray(d, ptr, msg, arr, subs, field, val);
829 case OP_PACKED_ENUM:
830 return decode_enum_packed(d, ptr, msg, arr, subs, field, val);
Joshua Haberman9abf6e22021-01-13 12:16:25 -0800831 default:
832 UPB_UNREACHABLE();
833 }
834}
835
Joshua Habermanf41049a2022-01-21 14:41:25 -0800836static const char* decode_tomap(upb_Decoder* d, const char* ptr,
837 upb_Message* msg, const upb_MiniTable_Sub* subs,
838 const upb_MiniTable_Field* field,
839 wireval* val) {
840 upb_Map** map_p = UPB_PTR_AT(msg, field->offset, upb_Map*);
841 upb_Map* map = *map_p;
842 upb_MapEntry ent;
843 const upb_MiniTable* entry = subs[field->submsg_index].submsg;
Joshua Haberman9abf6e22021-01-13 12:16:25 -0800844
845 if (!map) {
846 /* Lazily create map. */
Joshua Habermanf41049a2022-01-21 14:41:25 -0800847 const upb_MiniTable_Field* key_field = &entry->fields[0];
848 const upb_MiniTable_Field* val_field = &entry->fields[1];
Joshua Haberman9abf6e22021-01-13 12:16:25 -0800849 char key_size = desctype_to_mapsize[key_field->descriptortype];
850 char val_size = desctype_to_mapsize[val_field->descriptortype];
851 UPB_ASSERT(key_field->offset == 0);
Joshua Habermanf41049a2022-01-21 14:41:25 -0800852 UPB_ASSERT(val_field->offset == sizeof(upb_StringView));
853 map = _upb_Map_New(&d->arena, key_size, val_size);
Joshua Haberman9abf6e22021-01-13 12:16:25 -0800854 *map_p = map;
855 }
856
857 /* Parse map entry. */
858 memset(&ent, 0, sizeof(ent));
859
Joshua Habermanf41049a2022-01-21 14:41:25 -0800860 if (entry->fields[1].descriptortype == kUpb_FieldType_Message ||
861 entry->fields[1].descriptortype == kUpb_FieldType_Group) {
Joshua Haberman9abf6e22021-01-13 12:16:25 -0800862 /* Create proactively to handle the case where it doesn't appear. */
Joshua Habermanf41049a2022-01-21 14:41:25 -0800863 ent.v.val =
864 upb_value_ptr(_upb_Message_New(entry->subs[0].submsg, &d->arena));
Joshua Haberman9abf6e22021-01-13 12:16:25 -0800865 }
866
Joshua Haberman7ecf43f2022-03-14 13:11:29 -0700867 const char* start = ptr;
Joshua Habermanf41049a2022-01-21 14:41:25 -0800868 ptr = decode_tosubmsg(d, ptr, &ent.k, subs, field, val->size);
Joshua Haberman7ecf43f2022-03-14 13:11:29 -0700869 // check if ent had any unknown fields
870 size_t size;
871 upb_Message_GetUnknown(&ent.k, &size);
872 if (size != 0) {
873 uint32_t tag = ((uint32_t)field->number << 3) | kUpb_WireType_Delimited;
874 upb_Decode_AddUnknownVarints(d, msg, tag, (uint32_t)(ptr - start));
875 if (!_upb_Message_AddUnknown(msg, start, ptr - start, &d->arena)) {
876 decode_err(d, kUpb_DecodeStatus_OutOfMemory);
877 }
878 } else {
879 _upb_Map_Set(map, &ent.k, map->key_size, &ent.v, map->val_size, &d->arena);
880 }
Joshua Haberman9abf6e22021-01-13 12:16:25 -0800881 return ptr;
882}
883
Joshua Habermanf41049a2022-01-21 14:41:25 -0800884static const char* decode_tomsg(upb_Decoder* d, const char* ptr,
885 upb_Message* msg, const upb_MiniTable_Sub* subs,
886 const upb_MiniTable_Field* field, wireval* val,
Joshua Haberman9abf6e22021-01-13 12:16:25 -0800887 int op) {
Joshua Habermanf41049a2022-01-21 14:41:25 -0800888 void* mem = UPB_PTR_AT(msg, field->offset, void);
Joshua Haberman9abf6e22021-01-13 12:16:25 -0800889 int type = field->descriptortype;
890
Joshua Habermanf41049a2022-01-21 14:41:25 -0800891 if (UPB_UNLIKELY(op == OP_ENUM) &&
892 !decode_checkenum(d, ptr, msg, subs[field->submsg_index].subenum, field,
893 val)) {
894 return ptr;
895 }
896
Joshua Haberman9abf6e22021-01-13 12:16:25 -0800897 /* Set presence if necessary. */
Joshua Haberman9d578a32021-08-02 15:32:01 -0700898 if (field->presence > 0) {
899 _upb_sethas_field(msg, field);
900 } else if (field->presence < 0) {
Joshua Haberman9abf6e22021-01-13 12:16:25 -0800901 /* Oneof case */
Joshua Habermanf41049a2022-01-21 14:41:25 -0800902 uint32_t* oneof_case = _upb_oneofcase_field(msg, field);
Joshua Haberman9abf6e22021-01-13 12:16:25 -0800903 if (op == OP_SUBMSG && *oneof_case != field->number) {
904 memset(mem, 0, sizeof(void*));
905 }
906 *oneof_case = field->number;
Joshua Haberman9abf6e22021-01-13 12:16:25 -0800907 }
908
909 /* Store into message. */
910 switch (op) {
911 case OP_SUBMSG: {
Joshua Habermanf41049a2022-01-21 14:41:25 -0800912 upb_Message** submsgp = mem;
913 upb_Message* submsg = *submsgp;
Joshua Haberman9abf6e22021-01-13 12:16:25 -0800914 if (!submsg) {
Joshua Habermanf41049a2022-01-21 14:41:25 -0800915 submsg = decode_newsubmsg(d, subs, field);
Joshua Haberman9abf6e22021-01-13 12:16:25 -0800916 *submsgp = submsg;
917 }
Joshua Habermanf41049a2022-01-21 14:41:25 -0800918 if (UPB_UNLIKELY(type == kUpb_FieldType_Group)) {
919 ptr = decode_togroup(d, ptr, submsg, subs, field);
Joshua Haberman9abf6e22021-01-13 12:16:25 -0800920 } else {
Joshua Habermanf41049a2022-01-21 14:41:25 -0800921 ptr = decode_tosubmsg(d, ptr, submsg, subs, field, val->size);
Joshua Haberman9abf6e22021-01-13 12:16:25 -0800922 }
923 break;
924 }
925 case OP_STRING:
Joshua Haberman9d578a32021-08-02 15:32:01 -0700926 decode_verifyutf8(d, ptr, val->size);
Joshua Haberman9abf6e22021-01-13 12:16:25 -0800927 /* Fallthrough. */
928 case OP_BYTES:
Joshua Haberman9d578a32021-08-02 15:32:01 -0700929 return decode_readstr(d, ptr, val->size, mem);
Joshua Haberman9abf6e22021-01-13 12:16:25 -0800930 case OP_SCALAR_LG2(3):
Joshua Haberman9d578a32021-08-02 15:32:01 -0700931 memcpy(mem, val, 8);
Joshua Haberman9abf6e22021-01-13 12:16:25 -0800932 break;
Joshua Habermanf41049a2022-01-21 14:41:25 -0800933 case OP_ENUM:
Joshua Haberman9abf6e22021-01-13 12:16:25 -0800934 case OP_SCALAR_LG2(2):
Joshua Haberman9d578a32021-08-02 15:32:01 -0700935 memcpy(mem, val, 4);
Joshua Haberman9abf6e22021-01-13 12:16:25 -0800936 break;
937 case OP_SCALAR_LG2(0):
Joshua Haberman9d578a32021-08-02 15:32:01 -0700938 memcpy(mem, val, 1);
Joshua Haberman9abf6e22021-01-13 12:16:25 -0800939 break;
940 default:
941 UPB_UNREACHABLE();
942 }
943
944 return ptr;
945}
946
Joshua Habermanf41049a2022-01-21 14:41:25 -0800947UPB_NOINLINE
948const char* decode_checkrequired(upb_Decoder* d, const char* ptr,
949 const upb_Message* msg,
950 const upb_MiniTable* l) {
951 assert(l->required_count);
952 if (UPB_LIKELY((d->options & kUpb_DecodeOption_CheckRequired) == 0)) {
953 return ptr;
954 }
955 uint64_t msg_head;
956 memcpy(&msg_head, msg, 8);
957 msg_head = _upb_BigEndian_Swap64(msg_head);
958 if (upb_MiniTable_requiredmask(l) & ~msg_head) {
959 d->missing_required = true;
960 }
961 return ptr;
962}
963
Joshua Haberman9abf6e22021-01-13 12:16:25 -0800964UPB_FORCEINLINE
Joshua Habermanf41049a2022-01-21 14:41:25 -0800965static bool decode_tryfastdispatch(upb_Decoder* d, const char** ptr,
966 upb_Message* msg,
967 const upb_MiniTable* layout) {
Joshua Haberman9abf6e22021-01-13 12:16:25 -0800968#if UPB_FASTTABLE
969 if (layout && layout->table_mask != (unsigned char)-1) {
970 uint16_t tag = fastdecode_loadtag(*ptr);
971 intptr_t table = decode_totable(layout);
972 *ptr = fastdecode_tagdispatch(d, *ptr, msg, table, 0, tag);
973 return true;
974 }
975#endif
976 return false;
977}
978
Joshua Habermanf41049a2022-01-21 14:41:25 -0800979static const char* decode_msgset(upb_Decoder* d, const char* ptr,
980 upb_Message* msg,
981 const upb_MiniTable* layout) {
982 // We create a temporary upb_MiniTable here and abuse its fields as temporary
983 // storage, to avoid creating lots of MessageSet-specific parsing code-paths:
984 // 1. We store 'layout' in item_layout.subs. We will need this later as
985 // a key to look up extensions for this MessageSet.
986 // 2. We use item_layout.fields as temporary storage to store the extension
987 // we
988 // found when parsing the type id.
989 upb_MiniTable item_layout = {
990 .subs = (const upb_MiniTable_Sub[]){{.submsg = layout}},
991 .fields = NULL,
992 .size = 0,
993 .field_count = 0,
Joshua Haberman7ecf43f2022-03-14 13:11:29 -0700994 .ext = kUpb_ExtMode_IsMessageSet_ITEM,
Joshua Habermanf41049a2022-01-21 14:41:25 -0800995 .dense_below = 0,
996 .table_mask = -1};
997 return decode_group(d, ptr, msg, &item_layout, 1);
998}
999
1000static const upb_MiniTable_Field* decode_findfield(upb_Decoder* d,
1001 const upb_MiniTable* l,
1002 uint32_t field_number,
1003 int* last_field_index) {
1004 static upb_MiniTable_Field none = {0, 0, 0, 0, 0, 0};
1005 if (l == NULL) return &none;
1006
1007 size_t idx = ((size_t)field_number) - 1; // 0 wraps to SIZE_MAX
1008 if (idx < l->dense_below) {
1009 /* Fastest case: index into dense fields. */
1010 goto found;
1011 }
1012
1013 if (l->dense_below < l->field_count) {
1014 /* Linear search non-dense fields. Resume scanning from last_field_index
1015 * since fields are usually in order. */
1016 int last = *last_field_index;
1017 for (idx = last; idx < l->field_count; idx++) {
1018 if (l->fields[idx].number == field_number) {
1019 goto found;
1020 }
1021 }
1022
1023 for (idx = l->dense_below; idx < last; idx++) {
1024 if (l->fields[idx].number == field_number) {
1025 goto found;
1026 }
1027 }
1028 }
1029
1030 if (d->extreg) {
1031 switch (l->ext) {
Joshua Haberman7ecf43f2022-03-14 13:11:29 -07001032 case kUpb_ExtMode_Extendable: {
Joshua Habermanf41049a2022-01-21 14:41:25 -08001033 const upb_MiniTable_Extension* ext =
1034 _upb_extreg_get(d->extreg, l, field_number);
1035 if (ext) return &ext->field;
1036 break;
1037 }
Joshua Haberman7ecf43f2022-03-14 13:11:29 -07001038 case kUpb_ExtMode_IsMessageSet:
Joshua Habermanf41049a2022-01-21 14:41:25 -08001039 if (field_number == _UPB_MSGSET_ITEM) {
1040 static upb_MiniTable_Field item = {0, 0, 0, 0, TYPE_MSGSET_ITEM, 0};
1041 return &item;
1042 }
1043 break;
Joshua Haberman7ecf43f2022-03-14 13:11:29 -07001044 case kUpb_ExtMode_IsMessageSet_ITEM:
Joshua Habermanf41049a2022-01-21 14:41:25 -08001045 switch (field_number) {
1046 case _UPB_MSGSET_TYPEID: {
1047 static upb_MiniTable_Field type_id = {
1048 0, 0, 0, 0, TYPE_MSGSET_TYPE_ID, 0};
1049 return &type_id;
1050 }
1051 case _UPB_MSGSET_MESSAGE:
1052 if (l->fields) {
1053 // We saw type_id previously and succeeded in looking up msg.
1054 return l->fields;
1055 } else {
1056 // TODO: out of order MessageSet.
1057 // This is a very rare case: all serializers will emit in-order
1058 // MessageSets. To hit this case there has to be some kind of
1059 // re-ordering proxy. We should eventually handle this case, but
1060 // not today.
1061 }
1062 break;
1063 }
1064 }
1065 }
1066
1067 return &none; /* Unknown field. */
1068
1069found:
1070 UPB_ASSERT(l->fields[idx].number == field_number);
1071 *last_field_index = idx;
1072 return &l->fields[idx];
1073}
1074
1075UPB_FORCEINLINE
1076static const char* decode_wireval(upb_Decoder* d, const char* ptr,
1077 const upb_MiniTable_Field* field,
1078 int wire_type, wireval* val, int* op) {
1079 switch (wire_type) {
1080 case kUpb_WireType_Varint:
1081 ptr = decode_varint64(d, ptr, &val->uint64_val);
1082 *op = varint_ops[field->descriptortype];
1083 decode_munge(field->descriptortype, val);
1084 return ptr;
1085 case kUpb_WireType_32Bit:
1086 memcpy(&val->uint32_val, ptr, 4);
1087 val->uint32_val = _upb_BigEndian_Swap32(val->uint32_val);
1088 *op = OP_SCALAR_LG2(2);
1089 if (((1 << field->descriptortype) & FIXED32_OK_MASK) == 0) {
1090 *op = OP_UNKNOWN;
1091 }
1092 return ptr + 4;
1093 case kUpb_WireType_64Bit:
1094 memcpy(&val->uint64_val, ptr, 8);
1095 val->uint64_val = _upb_BigEndian_Swap64(val->uint64_val);
1096 *op = OP_SCALAR_LG2(3);
1097 if (((1 << field->descriptortype) & FIXED64_OK_MASK) == 0) {
1098 *op = OP_UNKNOWN;
1099 }
1100 return ptr + 8;
1101 case kUpb_WireType_Delimited: {
1102 int ndx = field->descriptortype;
1103 uint64_t size;
1104 if (upb_FieldMode_Get(field) == kUpb_FieldMode_Array) ndx += TYPE_COUNT;
1105 ptr = decode_varint64(d, ptr, &size);
1106 if (size >= INT32_MAX || ptr - d->end + (int32_t)size > d->limit) {
1107 break; /* Length overflow. */
1108 }
1109 *op = delim_ops[ndx];
1110 val->size = size;
1111 return ptr;
1112 }
1113 case kUpb_WireType_StartGroup:
1114 val->uint32_val = field->number;
1115 if (field->descriptortype == kUpb_FieldType_Group) {
1116 *op = OP_SUBMSG;
1117 } else if (field->descriptortype == TYPE_MSGSET_ITEM) {
1118 *op = OP_MSGSET_ITEM;
1119 } else {
1120 *op = OP_UNKNOWN;
1121 }
1122 return ptr;
1123 default:
1124 break;
1125 }
1126 return decode_err(d, kUpb_DecodeStatus_Malformed);
1127}
1128
1129UPB_FORCEINLINE
1130static const char* decode_known(upb_Decoder* d, const char* ptr,
1131 upb_Message* msg, const upb_MiniTable* layout,
1132 const upb_MiniTable_Field* field, int op,
1133 wireval* val) {
1134 const upb_MiniTable_Sub* subs = layout->subs;
1135 uint8_t mode = field->mode;
1136
Joshua Haberman7ecf43f2022-03-14 13:11:29 -07001137 if (UPB_UNLIKELY(mode & kUpb_LabelFlags_IsExtension)) {
Joshua Habermanf41049a2022-01-21 14:41:25 -08001138 const upb_MiniTable_Extension* ext_layout =
1139 (const upb_MiniTable_Extension*)field;
1140 upb_Message_Extension* ext =
1141 _upb_Message_Getorcreateext(msg, ext_layout, &d->arena);
1142 if (UPB_UNLIKELY(!ext)) return decode_err(d, kUpb_DecodeStatus_OutOfMemory);
1143 msg = &ext->data;
1144 subs = &ext->ext->sub;
1145 }
1146
1147 switch (mode & kUpb_FieldMode_Mask) {
1148 case kUpb_FieldMode_Array:
1149 return decode_toarray(d, ptr, msg, subs, field, val, op);
1150 case kUpb_FieldMode_Map:
1151 return decode_tomap(d, ptr, msg, subs, field, val);
1152 case kUpb_FieldMode_Scalar:
1153 return decode_tomsg(d, ptr, msg, subs, field, val, op);
1154 default:
1155 UPB_UNREACHABLE();
1156 }
1157}
1158
1159static const char* decode_reverse_skip_varint(const char* ptr, uint32_t val) {
1160 uint32_t seen = 0;
1161 do {
1162 ptr--;
1163 seen <<= 7;
1164 seen |= *ptr & 0x7f;
1165 } while (seen != val);
1166 return ptr;
1167}
1168
1169static const char* decode_unknown(upb_Decoder* d, const char* ptr,
1170 upb_Message* msg, int field_number,
1171 int wire_type, wireval val) {
1172 if (field_number == 0) return decode_err(d, kUpb_DecodeStatus_Malformed);
1173
1174 // Since unknown fields are the uncommon case, we do a little extra work here
1175 // to walk backwards through the buffer to find the field start. This frees
1176 // up a register in the fast paths (when the field is known), which leads to
1177 // significant speedups in benchmarks.
1178 const char* start = ptr;
1179
1180 if (wire_type == kUpb_WireType_Delimited) ptr += val.size;
1181 if (msg) {
1182 switch (wire_type) {
1183 case kUpb_WireType_Varint:
1184 case kUpb_WireType_Delimited:
1185 start--;
1186 while (start[-1] & 0x80) start--;
1187 break;
1188 case kUpb_WireType_32Bit:
1189 start -= 4;
1190 break;
1191 case kUpb_WireType_64Bit:
1192 start -= 8;
1193 break;
1194 default:
1195 break;
1196 }
1197
1198 assert(start == d->debug_valstart);
1199 uint32_t tag = ((uint32_t)field_number << 3) | wire_type;
1200 start = decode_reverse_skip_varint(start, tag);
1201 assert(start == d->debug_tagstart);
1202
1203 if (wire_type == kUpb_WireType_StartGroup) {
1204 d->unknown = start;
1205 d->unknown_msg = msg;
1206 ptr = decode_group(d, ptr, NULL, NULL, field_number);
1207 start = d->unknown;
1208 d->unknown_msg = NULL;
1209 d->unknown = NULL;
1210 }
1211 if (!_upb_Message_AddUnknown(msg, start, ptr - start, &d->arena)) {
1212 return decode_err(d, kUpb_DecodeStatus_OutOfMemory);
1213 }
1214 } else if (wire_type == kUpb_WireType_StartGroup) {
1215 ptr = decode_group(d, ptr, NULL, NULL, field_number);
1216 }
1217 return ptr;
1218}
1219
Joshua Haberman9abf6e22021-01-13 12:16:25 -08001220UPB_NOINLINE
Joshua Habermanf41049a2022-01-21 14:41:25 -08001221static const char* decode_msg(upb_Decoder* d, const char* ptr, upb_Message* msg,
1222 const upb_MiniTable* layout) {
Joshua Haberman9d578a32021-08-02 15:32:01 -07001223 int last_field_index = 0;
Joshua Habermanf41049a2022-01-21 14:41:25 -08001224
1225#if UPB_FASTTABLE
1226 // The first time we want to skip fast dispatch, because we may have just been
1227 // invoked by the fast parser to handle a case that it bailed on.
1228 if (!decode_isdone(d, &ptr)) goto nofast;
1229#endif
1230
1231 while (!decode_isdone(d, &ptr)) {
Joshua Haberman9abf6e22021-01-13 12:16:25 -08001232 uint32_t tag;
Joshua Habermanf41049a2022-01-21 14:41:25 -08001233 const upb_MiniTable_Field* field;
Joshua Haberman9abf6e22021-01-13 12:16:25 -08001234 int field_number;
1235 int wire_type;
Joshua Haberman9abf6e22021-01-13 12:16:25 -08001236 wireval val;
1237 int op;
1238
Joshua Habermanf41049a2022-01-21 14:41:25 -08001239 if (decode_tryfastdispatch(d, &ptr, msg, layout)) break;
1240
1241#if UPB_FASTTABLE
1242 nofast:
1243#endif
1244
1245#ifndef NDEBUG
1246 d->debug_tagstart = ptr;
1247#endif
1248
Joshua Haberman9abf6e22021-01-13 12:16:25 -08001249 UPB_ASSERT(ptr < d->limit_ptr);
1250 ptr = decode_tag(d, ptr, &tag);
1251 field_number = tag >> 3;
1252 wire_type = tag & 7;
1253
Joshua Habermanf41049a2022-01-21 14:41:25 -08001254#ifndef NDEBUG
1255 d->debug_valstart = ptr;
1256#endif
Joshua Haberman9abf6e22021-01-13 12:16:25 -08001257
Joshua Habermanf41049a2022-01-21 14:41:25 -08001258 if (wire_type == kUpb_WireType_EndGroup) {
1259 d->end_group = field_number;
1260 return ptr;
Joshua Haberman9abf6e22021-01-13 12:16:25 -08001261 }
1262
Joshua Habermanf41049a2022-01-21 14:41:25 -08001263 field = decode_findfield(d, layout, field_number, &last_field_index);
1264 ptr = decode_wireval(d, ptr, field, wire_type, &val, &op);
1265
Joshua Haberman9abf6e22021-01-13 12:16:25 -08001266 if (op >= 0) {
Joshua Habermanf41049a2022-01-21 14:41:25 -08001267 ptr = decode_known(d, ptr, msg, layout, field, op, &val);
Joshua Haberman9abf6e22021-01-13 12:16:25 -08001268 } else {
Joshua Habermanf41049a2022-01-21 14:41:25 -08001269 switch (op) {
1270 case OP_UNKNOWN:
1271 ptr = decode_unknown(d, ptr, msg, field_number, wire_type, val);
1272 break;
1273 case OP_MSGSET_ITEM:
1274 ptr = decode_msgset(d, ptr, msg, layout);
1275 break;
1276 case OP_MSGSET_TYPEID: {
1277 const upb_MiniTable_Extension* ext = _upb_extreg_get(
1278 d->extreg, layout->subs[0].submsg, val.uint64_val);
1279 if (ext) ((upb_MiniTable*)layout)->fields = &ext->field;
1280 break;
Joshua Haberman9abf6e22021-01-13 12:16:25 -08001281 }
Joshua Haberman9abf6e22021-01-13 12:16:25 -08001282 }
1283 }
Joshua Haberman9abf6e22021-01-13 12:16:25 -08001284 }
Joshua Habermanf41049a2022-01-21 14:41:25 -08001285
1286 return UPB_UNLIKELY(layout && layout->required_count)
1287 ? decode_checkrequired(d, ptr, msg, layout)
1288 : ptr;
Joshua Haberman9abf6e22021-01-13 12:16:25 -08001289}
1290
Joshua Habermanf41049a2022-01-21 14:41:25 -08001291const char* fastdecode_generic(struct upb_Decoder* d, const char* ptr,
1292 upb_Message* msg, intptr_t table,
1293 uint64_t hasbits, uint64_t data) {
Joshua Haberman9abf6e22021-01-13 12:16:25 -08001294 (void)data;
1295 *(uint32_t*)msg |= hasbits;
1296 return decode_msg(d, ptr, msg, decode_totablep(table));
1297}
1298
Joshua Habermanf41049a2022-01-21 14:41:25 -08001299static upb_DecodeStatus decode_top(struct upb_Decoder* d, const char* buf,
1300 void* msg, const upb_MiniTable* l) {
Joshua Haberman9abf6e22021-01-13 12:16:25 -08001301 if (!decode_tryfastdispatch(d, &buf, msg, l)) {
1302 decode_msg(d, buf, msg, l);
1303 }
Joshua Habermanf41049a2022-01-21 14:41:25 -08001304 if (d->end_group != DECODE_NOGROUP) return kUpb_DecodeStatus_Malformed;
1305 if (d->missing_required) return kUpb_DecodeStatus_MissingRequired;
1306 return kUpb_DecodeStatus_Ok;
Joshua Haberman9abf6e22021-01-13 12:16:25 -08001307}
1308
Joshua Habermanf41049a2022-01-21 14:41:25 -08001309upb_DecodeStatus upb_Decode(const char* buf, size_t size, void* msg,
1310 const upb_MiniTable* l,
1311 const upb_ExtensionRegistry* extreg, int options,
1312 upb_Arena* arena) {
1313 upb_Decoder state;
Joshua Haberman9abf6e22021-01-13 12:16:25 -08001314 unsigned depth = (unsigned)options >> 16;
1315
Joshua Habermanf41049a2022-01-21 14:41:25 -08001316 if (size <= 16) {
Joshua Haberman9abf6e22021-01-13 12:16:25 -08001317 memset(&state.patch, 0, 32);
Joshua Haberman7ecf43f2022-03-14 13:11:29 -07001318 if (size) memcpy(&state.patch, buf, size);
Joshua Haberman9abf6e22021-01-13 12:16:25 -08001319 buf = state.patch;
1320 state.end = buf + size;
1321 state.limit = 0;
Joshua Habermanf41049a2022-01-21 14:41:25 -08001322 options &= ~kUpb_DecodeOption_AliasString; // Can't alias patch buf.
Joshua Haberman9abf6e22021-01-13 12:16:25 -08001323 } else {
1324 state.end = buf + size - 16;
1325 state.limit = 16;
Joshua Haberman9abf6e22021-01-13 12:16:25 -08001326 }
1327
Joshua Habermanf41049a2022-01-21 14:41:25 -08001328 state.extreg = extreg;
Joshua Haberman9abf6e22021-01-13 12:16:25 -08001329 state.limit_ptr = state.end;
1330 state.unknown_msg = NULL;
1331 state.depth = depth ? depth : 64;
1332 state.end_group = DECODE_NOGROUP;
Joshua Habermanf41049a2022-01-21 14:41:25 -08001333 state.options = (uint16_t)options;
1334 state.missing_required = false;
Joshua Haberman9abf6e22021-01-13 12:16:25 -08001335 state.arena.head = arena->head;
1336 state.arena.last_size = arena->last_size;
Joshua Habermandd69a482021-05-17 22:40:33 -07001337 state.arena.cleanup_metadata = arena->cleanup_metadata;
Joshua Haberman9abf6e22021-01-13 12:16:25 -08001338 state.arena.parent = arena;
1339
Joshua Habermanf41049a2022-01-21 14:41:25 -08001340 upb_DecodeStatus status = UPB_SETJMP(state.err);
1341 if (UPB_LIKELY(status == kUpb_DecodeStatus_Ok)) {
1342 status = decode_top(&state, buf, msg, l);
Joshua Haberman9abf6e22021-01-13 12:16:25 -08001343 }
1344
1345 arena->head.ptr = state.arena.head.ptr;
1346 arena->head.end = state.arena.head.end;
Joshua Habermandd69a482021-05-17 22:40:33 -07001347 arena->cleanup_metadata = state.arena.cleanup_metadata;
Joshua Habermanf41049a2022-01-21 14:41:25 -08001348 return status;
Joshua Haberman9abf6e22021-01-13 12:16:25 -08001349}
1350
Joshua Habermanf41049a2022-01-21 14:41:25 -08001351#undef OP_UNKNOWN
1352#undef OP_SKIP
Joshua Haberman9abf6e22021-01-13 12:16:25 -08001353#undef OP_SCALAR_LG2
1354#undef OP_FIXPCK_LG2
1355#undef OP_VARPCK_LG2
1356#undef OP_STRING
Joshua Habermanf41049a2022-01-21 14:41:25 -08001357#undef OP_BYTES
Joshua Haberman9abf6e22021-01-13 12:16:25 -08001358#undef OP_SUBMSG
Joshua Habermandd69a482021-05-17 22:40:33 -07001359
1360/** upb/encode.c ************************************************************/
Joshua Haberman9abf6e22021-01-13 12:16:25 -08001361/* We encode backwards, to avoid pre-computing lengths (one-pass encode). */
1362
1363
1364#include <setjmp.h>
1365#include <string.h>
1366
1367
1368/* Must be last. */
1369
1370#define UPB_PB_VARINT_MAX_LEN 10
1371
1372UPB_NOINLINE
Joshua Habermanf41049a2022-01-21 14:41:25 -08001373static size_t encode_varint64(uint64_t val, char* buf) {
Joshua Haberman9abf6e22021-01-13 12:16:25 -08001374 size_t i = 0;
1375 do {
1376 uint8_t byte = val & 0x7fU;
1377 val >>= 7;
1378 if (val) byte |= 0x80U;
1379 buf[i++] = byte;
1380 } while (val);
1381 return i;
1382}
1383
Joshua Habermanf41049a2022-01-21 14:41:25 -08001384static uint32_t encode_zz32(int32_t n) {
1385 return ((uint32_t)n << 1) ^ (n >> 31);
1386}
1387static uint64_t encode_zz64(int64_t n) {
1388 return ((uint64_t)n << 1) ^ (n >> 63);
1389}
Joshua Haberman9abf6e22021-01-13 12:16:25 -08001390
1391typedef struct {
1392 jmp_buf err;
Joshua Habermanf41049a2022-01-21 14:41:25 -08001393 upb_alloc* alloc;
Joshua Haberman9abf6e22021-01-13 12:16:25 -08001394 char *buf, *ptr, *limit;
1395 int options;
1396 int depth;
1397 _upb_mapsorter sorter;
1398} upb_encstate;
1399
1400static size_t upb_roundup_pow2(size_t bytes) {
1401 size_t ret = 128;
1402 while (ret < bytes) {
1403 ret *= 2;
1404 }
1405 return ret;
1406}
1407
Joshua Habermanf41049a2022-01-21 14:41:25 -08001408UPB_NORETURN static void encode_err(upb_encstate* e) { UPB_LONGJMP(e->err, 1); }
Joshua Haberman9abf6e22021-01-13 12:16:25 -08001409
1410UPB_NOINLINE
Joshua Habermanf41049a2022-01-21 14:41:25 -08001411static void encode_growbuffer(upb_encstate* e, size_t bytes) {
Joshua Haberman9abf6e22021-01-13 12:16:25 -08001412 size_t old_size = e->limit - e->buf;
1413 size_t new_size = upb_roundup_pow2(bytes + (e->limit - e->ptr));
Joshua Habermanf41049a2022-01-21 14:41:25 -08001414 char* new_buf = upb_realloc(e->alloc, e->buf, old_size, new_size);
Joshua Haberman9abf6e22021-01-13 12:16:25 -08001415
1416 if (!new_buf) encode_err(e);
1417
1418 /* We want previous data at the end, realloc() put it at the beginning. */
1419 if (old_size > 0) {
1420 memmove(new_buf + new_size - old_size, e->buf, old_size);
1421 }
1422
1423 e->ptr = new_buf + new_size - (e->limit - e->ptr);
1424 e->limit = new_buf + new_size;
1425 e->buf = new_buf;
1426
1427 e->ptr -= bytes;
1428}
1429
1430/* Call to ensure that at least "bytes" bytes are available for writing at
1431 * e->ptr. Returns false if the bytes could not be allocated. */
1432UPB_FORCEINLINE
Joshua Habermanf41049a2022-01-21 14:41:25 -08001433static void encode_reserve(upb_encstate* e, size_t bytes) {
Joshua Haberman9abf6e22021-01-13 12:16:25 -08001434 if ((size_t)(e->ptr - e->buf) < bytes) {
1435 encode_growbuffer(e, bytes);
1436 return;
1437 }
1438
1439 e->ptr -= bytes;
1440}
1441
1442/* Writes the given bytes to the buffer, handling reserve/advance. */
Joshua Habermanf41049a2022-01-21 14:41:25 -08001443static void encode_bytes(upb_encstate* e, const void* data, size_t len) {
1444 if (len == 0) return; /* memcpy() with zero size is UB */
Joshua Haberman9abf6e22021-01-13 12:16:25 -08001445 encode_reserve(e, len);
1446 memcpy(e->ptr, data, len);
1447}
1448
Joshua Habermanf41049a2022-01-21 14:41:25 -08001449static void encode_fixed64(upb_encstate* e, uint64_t val) {
1450 val = _upb_BigEndian_Swap64(val);
Joshua Haberman9abf6e22021-01-13 12:16:25 -08001451 encode_bytes(e, &val, sizeof(uint64_t));
1452}
1453
Joshua Habermanf41049a2022-01-21 14:41:25 -08001454static void encode_fixed32(upb_encstate* e, uint32_t val) {
1455 val = _upb_BigEndian_Swap32(val);
Joshua Haberman9abf6e22021-01-13 12:16:25 -08001456 encode_bytes(e, &val, sizeof(uint32_t));
1457}
1458
1459UPB_NOINLINE
Joshua Habermanf41049a2022-01-21 14:41:25 -08001460static void encode_longvarint(upb_encstate* e, uint64_t val) {
Joshua Haberman9abf6e22021-01-13 12:16:25 -08001461 size_t len;
Joshua Habermanf41049a2022-01-21 14:41:25 -08001462 char* start;
Joshua Haberman9abf6e22021-01-13 12:16:25 -08001463
1464 encode_reserve(e, UPB_PB_VARINT_MAX_LEN);
1465 len = encode_varint64(val, e->ptr);
1466 start = e->ptr + UPB_PB_VARINT_MAX_LEN - len;
1467 memmove(start, e->ptr, len);
1468 e->ptr = start;
1469}
1470
1471UPB_FORCEINLINE
Joshua Habermanf41049a2022-01-21 14:41:25 -08001472static void encode_varint(upb_encstate* e, uint64_t val) {
Joshua Haberman9abf6e22021-01-13 12:16:25 -08001473 if (val < 128 && e->ptr != e->buf) {
1474 --e->ptr;
1475 *e->ptr = val;
1476 } else {
1477 encode_longvarint(e, val);
1478 }
1479}
1480
Joshua Habermanf41049a2022-01-21 14:41:25 -08001481static void encode_double(upb_encstate* e, double d) {
Joshua Haberman9abf6e22021-01-13 12:16:25 -08001482 uint64_t u64;
1483 UPB_ASSERT(sizeof(double) == sizeof(uint64_t));
1484 memcpy(&u64, &d, sizeof(uint64_t));
1485 encode_fixed64(e, u64);
1486}
1487
Joshua Habermanf41049a2022-01-21 14:41:25 -08001488static void encode_float(upb_encstate* e, float d) {
Joshua Haberman9abf6e22021-01-13 12:16:25 -08001489 uint32_t u32;
1490 UPB_ASSERT(sizeof(float) == sizeof(uint32_t));
1491 memcpy(&u32, &d, sizeof(uint32_t));
1492 encode_fixed32(e, u32);
1493}
1494
Joshua Habermanf41049a2022-01-21 14:41:25 -08001495static void encode_tag(upb_encstate* e, uint32_t field_number,
Joshua Haberman9abf6e22021-01-13 12:16:25 -08001496 uint8_t wire_type) {
1497 encode_varint(e, (field_number << 3) | wire_type);
1498}
1499
Joshua Habermanf41049a2022-01-21 14:41:25 -08001500static void encode_fixedarray(upb_encstate* e, const upb_Array* arr,
1501 size_t elem_size, uint32_t tag) {
Joshua Haberman9abf6e22021-01-13 12:16:25 -08001502 size_t bytes = arr->len * elem_size;
1503 const char* data = _upb_array_constptr(arr);
1504 const char* ptr = data + bytes - elem_size;
Joshua Habermanf41049a2022-01-21 14:41:25 -08001505
1506 if (tag || !_upb_IsLittleEndian()) {
Joshua Haberman9abf6e22021-01-13 12:16:25 -08001507 while (true) {
Joshua Habermanf41049a2022-01-21 14:41:25 -08001508 if (elem_size == 4) {
1509 uint32_t val;
1510 memcpy(&val, ptr, sizeof(val));
1511 val = _upb_BigEndian_Swap32(val);
1512 encode_bytes(e, &val, elem_size);
1513 } else {
1514 UPB_ASSERT(elem_size == 8);
1515 uint64_t val;
1516 memcpy(&val, ptr, sizeof(val));
1517 val = _upb_BigEndian_Swap64(val);
1518 encode_bytes(e, &val, elem_size);
1519 }
1520
1521 if (tag) encode_varint(e, tag);
Joshua Haberman9abf6e22021-01-13 12:16:25 -08001522 if (ptr == data) break;
1523 ptr -= elem_size;
1524 }
1525 } else {
1526 encode_bytes(e, data, bytes);
1527 }
1528}
1529
Joshua Habermanf41049a2022-01-21 14:41:25 -08001530static void encode_message(upb_encstate* e, const upb_Message* msg,
1531 const upb_MiniTable* m, size_t* size);
Joshua Haberman9abf6e22021-01-13 12:16:25 -08001532
Joshua Habermanf41049a2022-01-21 14:41:25 -08001533static void encode_scalar(upb_encstate* e, const void* _field_mem,
1534 const upb_MiniTable_Sub* subs,
1535 const upb_MiniTable_Field* f) {
1536 const char* field_mem = _field_mem;
Joshua Haberman9abf6e22021-01-13 12:16:25 -08001537 int wire_type;
1538
1539#define CASE(ctype, type, wtype, encodeval) \
1540 { \
Joshua Habermanf41049a2022-01-21 14:41:25 -08001541 ctype val = *(ctype*)field_mem; \
Joshua Haberman9abf6e22021-01-13 12:16:25 -08001542 encode_##type(e, encodeval); \
1543 wire_type = wtype; \
1544 break; \
1545 }
1546
1547 switch (f->descriptortype) {
Joshua Habermanf41049a2022-01-21 14:41:25 -08001548 case kUpb_FieldType_Double:
1549 CASE(double, double, kUpb_WireType_64Bit, val);
1550 case kUpb_FieldType_Float:
1551 CASE(float, float, kUpb_WireType_32Bit, val);
1552 case kUpb_FieldType_Int64:
1553 case kUpb_FieldType_UInt64:
1554 CASE(uint64_t, varint, kUpb_WireType_Varint, val);
1555 case kUpb_FieldType_UInt32:
1556 CASE(uint32_t, varint, kUpb_WireType_Varint, val);
1557 case kUpb_FieldType_Int32:
1558 case kUpb_FieldType_Enum:
1559 CASE(int32_t, varint, kUpb_WireType_Varint, (int64_t)val);
1560 case kUpb_FieldType_SFixed64:
1561 case kUpb_FieldType_Fixed64:
1562 CASE(uint64_t, fixed64, kUpb_WireType_64Bit, val);
1563 case kUpb_FieldType_Fixed32:
1564 case kUpb_FieldType_SFixed32:
1565 CASE(uint32_t, fixed32, kUpb_WireType_32Bit, val);
1566 case kUpb_FieldType_Bool:
1567 CASE(bool, varint, kUpb_WireType_Varint, val);
1568 case kUpb_FieldType_SInt32:
1569 CASE(int32_t, varint, kUpb_WireType_Varint, encode_zz32(val));
1570 case kUpb_FieldType_SInt64:
1571 CASE(int64_t, varint, kUpb_WireType_Varint, encode_zz64(val));
1572 case kUpb_FieldType_String:
1573 case kUpb_FieldType_Bytes: {
1574 upb_StringView view = *(upb_StringView*)field_mem;
Joshua Haberman9abf6e22021-01-13 12:16:25 -08001575 encode_bytes(e, view.data, view.size);
1576 encode_varint(e, view.size);
Joshua Habermanf41049a2022-01-21 14:41:25 -08001577 wire_type = kUpb_WireType_Delimited;
Joshua Haberman9abf6e22021-01-13 12:16:25 -08001578 break;
1579 }
Joshua Habermanf41049a2022-01-21 14:41:25 -08001580 case kUpb_FieldType_Group: {
Joshua Haberman9abf6e22021-01-13 12:16:25 -08001581 size_t size;
Joshua Habermanf41049a2022-01-21 14:41:25 -08001582 void* submsg = *(void**)field_mem;
1583 const upb_MiniTable* subm = subs[f->submsg_index].submsg;
Joshua Haberman9abf6e22021-01-13 12:16:25 -08001584 if (submsg == NULL) {
1585 return;
1586 }
1587 if (--e->depth == 0) encode_err(e);
Joshua Habermanf41049a2022-01-21 14:41:25 -08001588 encode_tag(e, f->number, kUpb_WireType_EndGroup);
Joshua Haberman9abf6e22021-01-13 12:16:25 -08001589 encode_message(e, submsg, subm, &size);
Joshua Habermanf41049a2022-01-21 14:41:25 -08001590 wire_type = kUpb_WireType_StartGroup;
Joshua Haberman9abf6e22021-01-13 12:16:25 -08001591 e->depth++;
1592 break;
1593 }
Joshua Habermanf41049a2022-01-21 14:41:25 -08001594 case kUpb_FieldType_Message: {
Joshua Haberman9abf6e22021-01-13 12:16:25 -08001595 size_t size;
Joshua Habermanf41049a2022-01-21 14:41:25 -08001596 void* submsg = *(void**)field_mem;
1597 const upb_MiniTable* subm = subs[f->submsg_index].submsg;
Joshua Haberman9abf6e22021-01-13 12:16:25 -08001598 if (submsg == NULL) {
1599 return;
1600 }
1601 if (--e->depth == 0) encode_err(e);
1602 encode_message(e, submsg, subm, &size);
1603 encode_varint(e, size);
Joshua Habermanf41049a2022-01-21 14:41:25 -08001604 wire_type = kUpb_WireType_Delimited;
Joshua Haberman9abf6e22021-01-13 12:16:25 -08001605 e->depth++;
1606 break;
1607 }
1608 default:
1609 UPB_UNREACHABLE();
1610 }
1611#undef CASE
1612
1613 encode_tag(e, f->number, wire_type);
1614}
1615
Joshua Habermanf41049a2022-01-21 14:41:25 -08001616static void encode_array(upb_encstate* e, const upb_Message* msg,
1617 const upb_MiniTable_Sub* subs,
1618 const upb_MiniTable_Field* f) {
1619 const upb_Array* arr = *UPB_PTR_AT(msg, f->offset, upb_Array*);
Joshua Haberman7ecf43f2022-03-14 13:11:29 -07001620 bool packed = f->mode & kUpb_LabelFlags_IsPacked;
Joshua Haberman9abf6e22021-01-13 12:16:25 -08001621 size_t pre_len = e->limit - e->ptr;
1622
1623 if (arr == NULL || arr->len == 0) {
1624 return;
1625 }
1626
1627#define VARINT_CASE(ctype, encode) \
1628 { \
Joshua Habermanf41049a2022-01-21 14:41:25 -08001629 const ctype* start = _upb_array_constptr(arr); \
1630 const ctype* ptr = start + arr->len; \
1631 uint32_t tag = packed ? 0 : (f->number << 3) | kUpb_WireType_Varint; \
Joshua Haberman9abf6e22021-01-13 12:16:25 -08001632 do { \
1633 ptr--; \
1634 encode_varint(e, encode); \
1635 if (tag) encode_varint(e, tag); \
1636 } while (ptr != start); \
1637 } \
1638 break;
1639
1640#define TAG(wire_type) (packed ? 0 : (f->number << 3 | wire_type))
1641
1642 switch (f->descriptortype) {
Joshua Habermanf41049a2022-01-21 14:41:25 -08001643 case kUpb_FieldType_Double:
1644 encode_fixedarray(e, arr, sizeof(double), TAG(kUpb_WireType_64Bit));
Joshua Haberman9abf6e22021-01-13 12:16:25 -08001645 break;
Joshua Habermanf41049a2022-01-21 14:41:25 -08001646 case kUpb_FieldType_Float:
1647 encode_fixedarray(e, arr, sizeof(float), TAG(kUpb_WireType_32Bit));
Joshua Haberman9abf6e22021-01-13 12:16:25 -08001648 break;
Joshua Habermanf41049a2022-01-21 14:41:25 -08001649 case kUpb_FieldType_SFixed64:
1650 case kUpb_FieldType_Fixed64:
1651 encode_fixedarray(e, arr, sizeof(uint64_t), TAG(kUpb_WireType_64Bit));
Joshua Haberman9abf6e22021-01-13 12:16:25 -08001652 break;
Joshua Habermanf41049a2022-01-21 14:41:25 -08001653 case kUpb_FieldType_Fixed32:
1654 case kUpb_FieldType_SFixed32:
1655 encode_fixedarray(e, arr, sizeof(uint32_t), TAG(kUpb_WireType_32Bit));
Joshua Haberman9abf6e22021-01-13 12:16:25 -08001656 break;
Joshua Habermanf41049a2022-01-21 14:41:25 -08001657 case kUpb_FieldType_Int64:
1658 case kUpb_FieldType_UInt64:
Joshua Haberman9abf6e22021-01-13 12:16:25 -08001659 VARINT_CASE(uint64_t, *ptr);
Joshua Habermanf41049a2022-01-21 14:41:25 -08001660 case kUpb_FieldType_UInt32:
Joshua Haberman9abf6e22021-01-13 12:16:25 -08001661 VARINT_CASE(uint32_t, *ptr);
Joshua Habermanf41049a2022-01-21 14:41:25 -08001662 case kUpb_FieldType_Int32:
1663 case kUpb_FieldType_Enum:
Joshua Haberman9abf6e22021-01-13 12:16:25 -08001664 VARINT_CASE(int32_t, (int64_t)*ptr);
Joshua Habermanf41049a2022-01-21 14:41:25 -08001665 case kUpb_FieldType_Bool:
Joshua Haberman9abf6e22021-01-13 12:16:25 -08001666 VARINT_CASE(bool, *ptr);
Joshua Habermanf41049a2022-01-21 14:41:25 -08001667 case kUpb_FieldType_SInt32:
Joshua Haberman9abf6e22021-01-13 12:16:25 -08001668 VARINT_CASE(int32_t, encode_zz32(*ptr));
Joshua Habermanf41049a2022-01-21 14:41:25 -08001669 case kUpb_FieldType_SInt64:
Joshua Haberman9abf6e22021-01-13 12:16:25 -08001670 VARINT_CASE(int64_t, encode_zz64(*ptr));
Joshua Habermanf41049a2022-01-21 14:41:25 -08001671 case kUpb_FieldType_String:
1672 case kUpb_FieldType_Bytes: {
1673 const upb_StringView* start = _upb_array_constptr(arr);
1674 const upb_StringView* ptr = start + arr->len;
Joshua Haberman9abf6e22021-01-13 12:16:25 -08001675 do {
1676 ptr--;
1677 encode_bytes(e, ptr->data, ptr->size);
1678 encode_varint(e, ptr->size);
Joshua Habermanf41049a2022-01-21 14:41:25 -08001679 encode_tag(e, f->number, kUpb_WireType_Delimited);
Joshua Haberman9abf6e22021-01-13 12:16:25 -08001680 } while (ptr != start);
1681 return;
1682 }
Joshua Habermanf41049a2022-01-21 14:41:25 -08001683 case kUpb_FieldType_Group: {
1684 const void* const* start = _upb_array_constptr(arr);
1685 const void* const* ptr = start + arr->len;
1686 const upb_MiniTable* subm = subs[f->submsg_index].submsg;
Joshua Haberman9abf6e22021-01-13 12:16:25 -08001687 if (--e->depth == 0) encode_err(e);
1688 do {
1689 size_t size;
1690 ptr--;
Joshua Habermanf41049a2022-01-21 14:41:25 -08001691 encode_tag(e, f->number, kUpb_WireType_EndGroup);
Joshua Haberman9abf6e22021-01-13 12:16:25 -08001692 encode_message(e, *ptr, subm, &size);
Joshua Habermanf41049a2022-01-21 14:41:25 -08001693 encode_tag(e, f->number, kUpb_WireType_StartGroup);
Joshua Haberman9abf6e22021-01-13 12:16:25 -08001694 } while (ptr != start);
1695 e->depth++;
1696 return;
1697 }
Joshua Habermanf41049a2022-01-21 14:41:25 -08001698 case kUpb_FieldType_Message: {
1699 const void* const* start = _upb_array_constptr(arr);
1700 const void* const* ptr = start + arr->len;
1701 const upb_MiniTable* subm = subs[f->submsg_index].submsg;
Joshua Haberman9abf6e22021-01-13 12:16:25 -08001702 if (--e->depth == 0) encode_err(e);
1703 do {
1704 size_t size;
1705 ptr--;
1706 encode_message(e, *ptr, subm, &size);
1707 encode_varint(e, size);
Joshua Habermanf41049a2022-01-21 14:41:25 -08001708 encode_tag(e, f->number, kUpb_WireType_Delimited);
Joshua Haberman9abf6e22021-01-13 12:16:25 -08001709 } while (ptr != start);
1710 e->depth++;
1711 return;
1712 }
1713 }
1714#undef VARINT_CASE
1715
1716 if (packed) {
1717 encode_varint(e, e->limit - e->ptr - pre_len);
Joshua Habermanf41049a2022-01-21 14:41:25 -08001718 encode_tag(e, f->number, kUpb_WireType_Delimited);
Joshua Haberman9abf6e22021-01-13 12:16:25 -08001719 }
1720}
1721
Joshua Habermanf41049a2022-01-21 14:41:25 -08001722static void encode_mapentry(upb_encstate* e, uint32_t number,
1723 const upb_MiniTable* layout,
1724 const upb_MapEntry* ent) {
1725 const upb_MiniTable_Field* key_field = &layout->fields[0];
1726 const upb_MiniTable_Field* val_field = &layout->fields[1];
Joshua Haberman9abf6e22021-01-13 12:16:25 -08001727 size_t pre_len = e->limit - e->ptr;
1728 size_t size;
Joshua Habermanf41049a2022-01-21 14:41:25 -08001729 encode_scalar(e, &ent->v, layout->subs, val_field);
1730 encode_scalar(e, &ent->k, layout->subs, key_field);
Joshua Haberman9abf6e22021-01-13 12:16:25 -08001731 size = (e->limit - e->ptr) - pre_len;
1732 encode_varint(e, size);
Joshua Habermanf41049a2022-01-21 14:41:25 -08001733 encode_tag(e, number, kUpb_WireType_Delimited);
Joshua Haberman9abf6e22021-01-13 12:16:25 -08001734}
1735
Joshua Habermanf41049a2022-01-21 14:41:25 -08001736static void encode_map(upb_encstate* e, const upb_Message* msg,
1737 const upb_MiniTable_Sub* subs,
1738 const upb_MiniTable_Field* f) {
1739 const upb_Map* map = *UPB_PTR_AT(msg, f->offset, const upb_Map*);
1740 const upb_MiniTable* layout = subs[f->submsg_index].submsg;
Joshua Haberman9abf6e22021-01-13 12:16:25 -08001741 UPB_ASSERT(layout->field_count == 2);
1742
1743 if (map == NULL) return;
1744
Joshua Habermanf41049a2022-01-21 14:41:25 -08001745 if (e->options & kUpb_Encode_Deterministic) {
Joshua Haberman9abf6e22021-01-13 12:16:25 -08001746 _upb_sortedmap sorted;
1747 _upb_mapsorter_pushmap(&e->sorter, layout->fields[0].descriptortype, map,
1748 &sorted);
Joshua Habermanf41049a2022-01-21 14:41:25 -08001749 upb_MapEntry ent;
Joshua Haberman9abf6e22021-01-13 12:16:25 -08001750 while (_upb_sortedmap_next(&e->sorter, map, &sorted, &ent)) {
1751 encode_mapentry(e, f->number, layout, &ent);
1752 }
1753 _upb_mapsorter_popmap(&e->sorter, &sorted);
1754 } else {
1755 upb_strtable_iter i;
1756 upb_strtable_begin(&i, &map->table);
Joshua Habermanf41049a2022-01-21 14:41:25 -08001757 for (; !upb_strtable_done(&i); upb_strtable_next(&i)) {
1758 upb_StringView key = upb_strtable_iter_key(&i);
Joshua Haberman9abf6e22021-01-13 12:16:25 -08001759 const upb_value val = upb_strtable_iter_value(&i);
Joshua Habermanf41049a2022-01-21 14:41:25 -08001760 upb_MapEntry ent;
Joshua Haberman9abf6e22021-01-13 12:16:25 -08001761 _upb_map_fromkey(key, &ent.k, map->key_size);
1762 _upb_map_fromvalue(val, &ent.v, map->val_size);
1763 encode_mapentry(e, f->number, layout, &ent);
1764 }
1765 }
1766}
1767
Joshua Habermanf41049a2022-01-21 14:41:25 -08001768static bool encode_shouldencode(upb_encstate* e, const upb_Message* msg,
1769 const upb_MiniTable_Sub* subs,
1770 const upb_MiniTable_Field* f) {
Joshua Haberman9abf6e22021-01-13 12:16:25 -08001771 if (f->presence == 0) {
Joshua Habermanf41049a2022-01-21 14:41:25 -08001772 /* Proto3 presence or map/array. */
1773 const void* mem = UPB_PTR_AT(msg, f->offset, void);
Joshua Haberman7ecf43f2022-03-14 13:11:29 -07001774 switch (f->mode >> kUpb_FieldRep_Shift) {
1775 case kUpb_FieldRep_1Byte: {
Joshua Habermanf41049a2022-01-21 14:41:25 -08001776 char ch;
1777 memcpy(&ch, mem, 1);
1778 return ch != 0;
1779 }
Joshua Haberman7ecf43f2022-03-14 13:11:29 -07001780#if UINTPTR_MAX == 0xffffffff
1781 case kUpb_FieldRep_Pointer:
1782#endif
1783 case kUpb_FieldRep_4Byte: {
Joshua Habermanf41049a2022-01-21 14:41:25 -08001784 uint32_t u32;
1785 memcpy(&u32, mem, 4);
1786 return u32 != 0;
1787 }
Joshua Haberman7ecf43f2022-03-14 13:11:29 -07001788#if UINTPTR_MAX != 0xffffffff
1789 case kUpb_FieldRep_Pointer:
1790#endif
1791 case kUpb_FieldRep_8Byte: {
Joshua Habermanf41049a2022-01-21 14:41:25 -08001792 uint64_t u64;
1793 memcpy(&u64, mem, 8);
1794 return u64 != 0;
1795 }
Joshua Haberman7ecf43f2022-03-14 13:11:29 -07001796 case kUpb_FieldRep_StringView: {
Joshua Habermanf41049a2022-01-21 14:41:25 -08001797 const upb_StringView* str = (const upb_StringView*)mem;
1798 return str->size != 0;
1799 }
1800 default:
1801 UPB_UNREACHABLE();
1802 }
Joshua Haberman9abf6e22021-01-13 12:16:25 -08001803 } else if (f->presence > 0) {
1804 /* Proto2 presence: hasbit. */
Joshua Habermanf41049a2022-01-21 14:41:25 -08001805 return _upb_hasbit_field(msg, f);
Joshua Haberman9abf6e22021-01-13 12:16:25 -08001806 } else {
1807 /* Field is in a oneof. */
Joshua Habermanf41049a2022-01-21 14:41:25 -08001808 return _upb_getoneofcase_field(msg, f) == f->number;
Joshua Haberman9abf6e22021-01-13 12:16:25 -08001809 }
Joshua Haberman9abf6e22021-01-13 12:16:25 -08001810}
1811
Joshua Habermanf41049a2022-01-21 14:41:25 -08001812static void encode_field(upb_encstate* e, const upb_Message* msg,
1813 const upb_MiniTable_Sub* subs,
1814 const upb_MiniTable_Field* field) {
1815 switch (upb_FieldMode_Get(field)) {
1816 case kUpb_FieldMode_Array:
1817 encode_array(e, msg, subs, field);
1818 break;
1819 case kUpb_FieldMode_Map:
1820 encode_map(e, msg, subs, field);
1821 break;
1822 case kUpb_FieldMode_Scalar:
1823 encode_scalar(e, UPB_PTR_AT(msg, field->offset, void), subs, field);
1824 break;
1825 default:
1826 UPB_UNREACHABLE();
1827 }
1828}
Joshua Haberman9abf6e22021-01-13 12:16:25 -08001829
Joshua Habermanf41049a2022-01-21 14:41:25 -08001830/* message MessageSet {
1831 * repeated group Item = 1 {
1832 * required int32 type_id = 2;
1833 * required string message = 3;
1834 * }
1835 * } */
1836static void encode_msgset_item(upb_encstate* e,
1837 const upb_Message_Extension* ext) {
1838 size_t size;
1839 encode_tag(e, 1, kUpb_WireType_EndGroup);
1840 encode_message(e, ext->data.ptr, ext->ext->sub.submsg, &size);
1841 encode_varint(e, size);
1842 encode_tag(e, 3, kUpb_WireType_Delimited);
1843 encode_varint(e, ext->ext->field.number);
1844 encode_tag(e, 2, kUpb_WireType_Varint);
1845 encode_tag(e, 1, kUpb_WireType_StartGroup);
1846}
1847
1848static void encode_message(upb_encstate* e, const upb_Message* msg,
1849 const upb_MiniTable* m, size_t* size) {
1850 size_t pre_len = e->limit - e->ptr;
1851
1852 if ((e->options & kUpb_Encode_CheckRequired) && m->required_count) {
1853 uint64_t msg_head;
1854 memcpy(&msg_head, msg, 8);
1855 msg_head = _upb_BigEndian_Swap64(msg_head);
1856 if (upb_MiniTable_requiredmask(m) & ~msg_head) {
1857 encode_err(e);
1858 }
1859 }
1860
1861 if ((e->options & kUpb_Encode_SkipUnknown) == 0) {
Joshua Haberman9abf6e22021-01-13 12:16:25 -08001862 size_t unknown_size;
Joshua Habermanf41049a2022-01-21 14:41:25 -08001863 const char* unknown = upb_Message_GetUnknown(msg, &unknown_size);
Joshua Haberman9abf6e22021-01-13 12:16:25 -08001864
1865 if (unknown) {
1866 encode_bytes(e, unknown, unknown_size);
1867 }
1868 }
1869
Joshua Haberman7ecf43f2022-03-14 13:11:29 -07001870 if (m->ext != kUpb_ExtMode_NonExtendable) {
Joshua Habermanf41049a2022-01-21 14:41:25 -08001871 /* Encode all extensions together. Unlike C++, we do not attempt to keep
1872 * these in field number order relative to normal fields or even to each
1873 * other. */
1874 size_t ext_count;
1875 const upb_Message_Extension* ext = _upb_Message_Getexts(msg, &ext_count);
Joshua Habermanf41049a2022-01-21 14:41:25 -08001876 if (ext_count) {
Joshua Haberman7ecf43f2022-03-14 13:11:29 -07001877 const upb_Message_Extension* end = ext + ext_count;
Joshua Habermanf41049a2022-01-21 14:41:25 -08001878 for (; ext != end; ext++) {
Joshua Haberman7ecf43f2022-03-14 13:11:29 -07001879 if (UPB_UNLIKELY(m->ext == kUpb_ExtMode_IsMessageSet)) {
Joshua Habermanf41049a2022-01-21 14:41:25 -08001880 encode_msgset_item(e, ext);
1881 } else {
1882 encode_field(e, &ext->data, &ext->ext->sub, &ext->ext->field);
1883 }
1884 }
1885 }
1886 }
1887
Joshua Haberman7ecf43f2022-03-14 13:11:29 -07001888 if (m->field_count) {
1889 const upb_MiniTable_Field* f = &m->fields[m->field_count];
1890 const upb_MiniTable_Field* first = &m->fields[0];
1891 while (f != first) {
1892 f--;
1893 if (encode_shouldencode(e, msg, m->subs, f)) {
1894 encode_field(e, msg, m->subs, f);
1895 }
Joshua Haberman9abf6e22021-01-13 12:16:25 -08001896 }
1897 }
1898
1899 *size = (e->limit - e->ptr) - pre_len;
1900}
1901
Joshua Habermanf41049a2022-01-21 14:41:25 -08001902char* upb_Encode(const void* msg, const upb_MiniTable* l, int options,
1903 upb_Arena* arena, size_t* size) {
Joshua Haberman9abf6e22021-01-13 12:16:25 -08001904 upb_encstate e;
1905 unsigned depth = (unsigned)options >> 16;
1906
Joshua Habermanf41049a2022-01-21 14:41:25 -08001907 e.alloc = upb_Arena_Alloc(arena);
Joshua Haberman9abf6e22021-01-13 12:16:25 -08001908 e.buf = NULL;
1909 e.limit = NULL;
1910 e.ptr = NULL;
1911 e.depth = depth ? depth : 64;
1912 e.options = options;
1913 _upb_mapsorter_init(&e.sorter);
Joshua Habermanf41049a2022-01-21 14:41:25 -08001914 char* ret = NULL;
Joshua Haberman9abf6e22021-01-13 12:16:25 -08001915
1916 if (UPB_SETJMP(e.err)) {
1917 *size = 0;
1918 ret = NULL;
1919 } else {
1920 encode_message(&e, msg, l, size);
1921 *size = e.limit - e.ptr;
1922 if (*size == 0) {
1923 static char ch;
1924 ret = &ch;
1925 } else {
1926 UPB_ASSERT(e.ptr);
1927 ret = e.ptr;
1928 }
1929 }
1930
1931 _upb_mapsorter_destroy(&e.sorter);
1932 return ret;
1933}
1934
Joshua Habermandd69a482021-05-17 22:40:33 -07001935/** upb/msg.c ************************************************************/
Joshua Haberman9abf6e22021-01-13 12:16:25 -08001936
1937
Joshua Habermanf41049a2022-01-21 14:41:25 -08001938/** upb_Message
1939 * *******************************************************************/
Joshua Haberman9abf6e22021-01-13 12:16:25 -08001940
Joshua Habermanf41049a2022-01-21 14:41:25 -08001941static const size_t overhead = sizeof(upb_Message_InternalData);
Joshua Haberman9abf6e22021-01-13 12:16:25 -08001942
Joshua Habermanf41049a2022-01-21 14:41:25 -08001943static const upb_Message_Internal* upb_Message_Getinternal_const(
1944 const upb_Message* msg) {
1945 ptrdiff_t size = sizeof(upb_Message_Internal);
1946 return (upb_Message_Internal*)((char*)msg - size);
Joshua Haberman9abf6e22021-01-13 12:16:25 -08001947}
1948
Joshua Habermanf41049a2022-01-21 14:41:25 -08001949upb_Message* _upb_Message_New(const upb_MiniTable* l, upb_Arena* a) {
1950 return _upb_Message_New_inl(l, a);
Joshua Haberman9abf6e22021-01-13 12:16:25 -08001951}
1952
Joshua Habermanf41049a2022-01-21 14:41:25 -08001953void _upb_Message_Clear(upb_Message* msg, const upb_MiniTable* l) {
1954 void* mem = UPB_PTR_AT(msg, -sizeof(upb_Message_Internal), char);
Joshua Haberman9abf6e22021-01-13 12:16:25 -08001955 memset(mem, 0, upb_msg_sizeof(l));
1956}
1957
Joshua Habermanf41049a2022-01-21 14:41:25 -08001958static bool realloc_internal(upb_Message* msg, size_t need, upb_Arena* arena) {
1959 upb_Message_Internal* in = upb_Message_Getinternal(msg);
Joshua Haberman9d578a32021-08-02 15:32:01 -07001960 if (!in->internal) {
1961 /* No internal data, allocate from scratch. */
Joshua Haberman7ecf43f2022-03-14 13:11:29 -07001962 size_t size = UPB_MAX(128, _upb_Log2CeilingSize(need + overhead));
Joshua Habermanf41049a2022-01-21 14:41:25 -08001963 upb_Message_InternalData* internal = upb_Arena_Malloc(arena, size);
Joshua Haberman9d578a32021-08-02 15:32:01 -07001964 if (!internal) return false;
1965 internal->size = size;
1966 internal->unknown_end = overhead;
1967 internal->ext_begin = size;
1968 in->internal = internal;
1969 } else if (in->internal->ext_begin - in->internal->unknown_end < need) {
1970 /* Internal data is too small, reallocate. */
Joshua Haberman7ecf43f2022-03-14 13:11:29 -07001971 size_t new_size = _upb_Log2CeilingSize(in->internal->size + need);
Joshua Haberman9d578a32021-08-02 15:32:01 -07001972 size_t ext_bytes = in->internal->size - in->internal->ext_begin;
1973 size_t new_ext_begin = new_size - ext_bytes;
Joshua Habermanf41049a2022-01-21 14:41:25 -08001974 upb_Message_InternalData* internal =
1975 upb_Arena_Realloc(arena, in->internal, in->internal->size, new_size);
Joshua Haberman9d578a32021-08-02 15:32:01 -07001976 if (!internal) return false;
1977 if (ext_bytes) {
1978 /* Need to move extension data to the end. */
Joshua Habermanf41049a2022-01-21 14:41:25 -08001979 char* ptr = (char*)internal;
Joshua Haberman9d578a32021-08-02 15:32:01 -07001980 memmove(ptr + new_ext_begin, ptr + internal->ext_begin, ext_bytes);
1981 }
1982 internal->ext_begin = new_ext_begin;
1983 internal->size = new_size;
1984 in->internal = internal;
1985 }
1986 UPB_ASSERT(in->internal->ext_begin - in->internal->unknown_end >= need);
1987 return true;
1988}
1989
Joshua Habermanf41049a2022-01-21 14:41:25 -08001990bool _upb_Message_AddUnknown(upb_Message* msg, const char* data, size_t len,
1991 upb_Arena* arena) {
Joshua Haberman9d578a32021-08-02 15:32:01 -07001992 if (!realloc_internal(msg, len, arena)) return false;
Joshua Habermanf41049a2022-01-21 14:41:25 -08001993 upb_Message_Internal* in = upb_Message_Getinternal(msg);
Joshua Haberman9d578a32021-08-02 15:32:01 -07001994 memcpy(UPB_PTR_AT(in->internal, in->internal->unknown_end, char), data, len);
1995 in->internal->unknown_end += len;
Joshua Haberman9abf6e22021-01-13 12:16:25 -08001996 return true;
1997}
1998
Joshua Habermanf41049a2022-01-21 14:41:25 -08001999void _upb_Message_DiscardUnknown_shallow(upb_Message* msg) {
2000 upb_Message_Internal* in = upb_Message_Getinternal(msg);
Joshua Haberman9d578a32021-08-02 15:32:01 -07002001 if (in->internal) {
2002 in->internal->unknown_end = overhead;
Joshua Haberman9abf6e22021-01-13 12:16:25 -08002003 }
2004}
2005
Joshua Habermanf41049a2022-01-21 14:41:25 -08002006const char* upb_Message_GetUnknown(const upb_Message* msg, size_t* len) {
2007 const upb_Message_Internal* in = upb_Message_Getinternal_const(msg);
Joshua Haberman9d578a32021-08-02 15:32:01 -07002008 if (in->internal) {
2009 *len = in->internal->unknown_end - overhead;
2010 return (char*)(in->internal + 1);
Joshua Haberman9abf6e22021-01-13 12:16:25 -08002011 } else {
2012 *len = 0;
2013 return NULL;
2014 }
2015}
2016
Joshua Habermanf41049a2022-01-21 14:41:25 -08002017const upb_Message_Extension* _upb_Message_Getexts(const upb_Message* msg,
2018 size_t* count) {
2019 const upb_Message_Internal* in = upb_Message_Getinternal_const(msg);
Joshua Haberman9d578a32021-08-02 15:32:01 -07002020 if (in->internal) {
Joshua Habermanf41049a2022-01-21 14:41:25 -08002021 *count = (in->internal->size - in->internal->ext_begin) /
2022 sizeof(upb_Message_Extension);
Joshua Haberman9d578a32021-08-02 15:32:01 -07002023 return UPB_PTR_AT(in->internal, in->internal->ext_begin, void);
2024 } else {
2025 *count = 0;
2026 return NULL;
2027 }
2028}
2029
Joshua Habermanf41049a2022-01-21 14:41:25 -08002030const upb_Message_Extension* _upb_Message_Getext(
2031 const upb_Message* msg, const upb_MiniTable_Extension* e) {
Joshua Haberman9d578a32021-08-02 15:32:01 -07002032 size_t n;
Joshua Habermanf41049a2022-01-21 14:41:25 -08002033 const upb_Message_Extension* ext = _upb_Message_Getexts(msg, &n);
Joshua Haberman9d578a32021-08-02 15:32:01 -07002034
2035 /* For now we use linear search exclusively to find extensions. If this
2036 * becomes an issue due to messages with lots of extensions, we can introduce
2037 * a table of some sort. */
2038 for (size_t i = 0; i < n; i++) {
2039 if (ext[i].ext == e) {
2040 return &ext[i];
2041 }
2042 }
2043
2044 return NULL;
2045}
2046
Joshua Habermanf41049a2022-01-21 14:41:25 -08002047void _upb_Message_Clearext(upb_Message* msg,
2048 const upb_MiniTable_Extension* ext_l) {
2049 upb_Message_Internal* in = upb_Message_Getinternal(msg);
2050 if (!in->internal) return;
2051 const upb_Message_Extension* base =
2052 UPB_PTR_AT(in->internal, in->internal->ext_begin, void);
2053 upb_Message_Extension* ext =
2054 (upb_Message_Extension*)_upb_Message_Getext(msg, ext_l);
2055 if (ext) {
2056 *ext = *base;
2057 in->internal->ext_begin += sizeof(upb_Message_Extension);
2058 }
2059}
2060
2061upb_Message_Extension* _upb_Message_Getorcreateext(
2062 upb_Message* msg, const upb_MiniTable_Extension* e, upb_Arena* arena) {
2063 upb_Message_Extension* ext =
2064 (upb_Message_Extension*)_upb_Message_Getext(msg, e);
Joshua Haberman9d578a32021-08-02 15:32:01 -07002065 if (ext) return ext;
Joshua Habermanf41049a2022-01-21 14:41:25 -08002066 if (!realloc_internal(msg, sizeof(upb_Message_Extension), arena)) return NULL;
2067 upb_Message_Internal* in = upb_Message_Getinternal(msg);
2068 in->internal->ext_begin -= sizeof(upb_Message_Extension);
Joshua Haberman9d578a32021-08-02 15:32:01 -07002069 ext = UPB_PTR_AT(in->internal, in->internal->ext_begin, void);
Joshua Habermanf41049a2022-01-21 14:41:25 -08002070 memset(ext, 0, sizeof(upb_Message_Extension));
Joshua Haberman9d578a32021-08-02 15:32:01 -07002071 ext->ext = e;
2072 return ext;
2073}
2074
Joshua Habermanf41049a2022-01-21 14:41:25 -08002075size_t upb_Message_ExtensionCount(const upb_Message* msg) {
2076 size_t count;
2077 _upb_Message_Getexts(msg, &count);
2078 return count;
2079}
Joshua Haberman9abf6e22021-01-13 12:16:25 -08002080
Joshua Habermanf41049a2022-01-21 14:41:25 -08002081/** upb_Array *****************************************************************/
2082
2083bool _upb_array_realloc(upb_Array* arr, size_t min_size, upb_Arena* arena) {
Joshua Haberman9abf6e22021-01-13 12:16:25 -08002084 size_t new_size = UPB_MAX(arr->size, 4);
2085 int elem_size_lg2 = arr->data & 7;
2086 size_t old_bytes = arr->size << elem_size_lg2;
2087 size_t new_bytes;
2088 void* ptr = _upb_array_ptr(arr);
2089
2090 /* Log2 ceiling of size. */
2091 while (new_size < min_size) new_size *= 2;
2092
2093 new_bytes = new_size << elem_size_lg2;
Joshua Habermanf41049a2022-01-21 14:41:25 -08002094 ptr = upb_Arena_Realloc(arena, ptr, old_bytes, new_bytes);
Joshua Haberman9abf6e22021-01-13 12:16:25 -08002095
2096 if (!ptr) {
2097 return false;
2098 }
2099
2100 arr->data = _upb_tag_arrptr(ptr, elem_size_lg2);
2101 arr->size = new_size;
2102 return true;
2103}
2104
Joshua Habermanf41049a2022-01-21 14:41:25 -08002105static upb_Array* getorcreate_array(upb_Array** arr_ptr, int elem_size_lg2,
2106 upb_Arena* arena) {
2107 upb_Array* arr = *arr_ptr;
Joshua Haberman9abf6e22021-01-13 12:16:25 -08002108 if (!arr) {
Joshua Habermanf41049a2022-01-21 14:41:25 -08002109 arr = _upb_Array_New(arena, 4, elem_size_lg2);
Joshua Haberman9abf6e22021-01-13 12:16:25 -08002110 if (!arr) return NULL;
2111 *arr_ptr = arr;
2112 }
2113 return arr;
2114}
2115
Joshua Habermanf41049a2022-01-21 14:41:25 -08002116void* _upb_Array_Resize_fallback(upb_Array** arr_ptr, size_t size,
2117 int elem_size_lg2, upb_Arena* arena) {
2118 upb_Array* arr = getorcreate_array(arr_ptr, elem_size_lg2, arena);
2119 return arr && _upb_Array_Resize(arr, size, arena) ? _upb_array_ptr(arr)
Joshua Haberman9abf6e22021-01-13 12:16:25 -08002120 : NULL;
2121}
2122
Joshua Habermanf41049a2022-01-21 14:41:25 -08002123bool _upb_Array_Append_fallback(upb_Array** arr_ptr, const void* value,
2124 int elem_size_lg2, upb_Arena* arena) {
2125 upb_Array* arr = getorcreate_array(arr_ptr, elem_size_lg2, arena);
Joshua Haberman9abf6e22021-01-13 12:16:25 -08002126 if (!arr) return false;
2127
2128 size_t elems = arr->len;
2129
Joshua Habermanf41049a2022-01-21 14:41:25 -08002130 if (!_upb_Array_Resize(arr, elems + 1, arena)) {
Joshua Haberman9abf6e22021-01-13 12:16:25 -08002131 return false;
2132 }
2133
Joshua Habermanf41049a2022-01-21 14:41:25 -08002134 char* data = _upb_array_ptr(arr);
Joshua Haberman9abf6e22021-01-13 12:16:25 -08002135 memcpy(data + (elems << elem_size_lg2), value, 1 << elem_size_lg2);
2136 return true;
2137}
2138
Joshua Habermanf41049a2022-01-21 14:41:25 -08002139/** upb_Map *******************************************************************/
Joshua Haberman9abf6e22021-01-13 12:16:25 -08002140
Joshua Habermanf41049a2022-01-21 14:41:25 -08002141upb_Map* _upb_Map_New(upb_Arena* a, size_t key_size, size_t value_size) {
2142 upb_Map* map = upb_Arena_Malloc(a, sizeof(upb_Map));
Joshua Haberman9abf6e22021-01-13 12:16:25 -08002143
2144 if (!map) {
2145 return NULL;
2146 }
2147
Joshua Habermandd69a482021-05-17 22:40:33 -07002148 upb_strtable_init(&map->table, 4, a);
Joshua Haberman9abf6e22021-01-13 12:16:25 -08002149 map->key_size = key_size;
2150 map->val_size = value_size;
2151
2152 return map;
2153}
2154
Joshua Habermanf41049a2022-01-21 14:41:25 -08002155static void _upb_mapsorter_getkeys(const void* _a, const void* _b, void* a_key,
2156 void* b_key, size_t size) {
2157 const upb_tabent* const* a = _a;
2158 const upb_tabent* const* b = _b;
2159 upb_StringView a_tabkey = upb_tabstrview((*a)->key);
2160 upb_StringView b_tabkey = upb_tabstrview((*b)->key);
Joshua Haberman9abf6e22021-01-13 12:16:25 -08002161 _upb_map_fromkey(a_tabkey, a_key, size);
2162 _upb_map_fromkey(b_tabkey, b_key, size);
2163}
2164
Joshua Habermanf41049a2022-01-21 14:41:25 -08002165#define UPB_COMPARE_INTEGERS(a, b) ((a) < (b) ? -1 : ((a) == (b) ? 0 : 1))
2166
2167static int _upb_mapsorter_cmpi64(const void* _a, const void* _b) {
Joshua Haberman9abf6e22021-01-13 12:16:25 -08002168 int64_t a, b;
2169 _upb_mapsorter_getkeys(_a, _b, &a, &b, 8);
Joshua Habermanf41049a2022-01-21 14:41:25 -08002170 return UPB_COMPARE_INTEGERS(a, b);
Joshua Haberman9abf6e22021-01-13 12:16:25 -08002171}
2172
Joshua Habermanf41049a2022-01-21 14:41:25 -08002173static int _upb_mapsorter_cmpu64(const void* _a, const void* _b) {
Joshua Haberman9abf6e22021-01-13 12:16:25 -08002174 uint64_t a, b;
2175 _upb_mapsorter_getkeys(_a, _b, &a, &b, 8);
Joshua Habermanf41049a2022-01-21 14:41:25 -08002176 return UPB_COMPARE_INTEGERS(a, b);
Joshua Haberman9abf6e22021-01-13 12:16:25 -08002177}
2178
Joshua Habermanf41049a2022-01-21 14:41:25 -08002179static int _upb_mapsorter_cmpi32(const void* _a, const void* _b) {
Joshua Haberman9abf6e22021-01-13 12:16:25 -08002180 int32_t a, b;
2181 _upb_mapsorter_getkeys(_a, _b, &a, &b, 4);
Joshua Habermanf41049a2022-01-21 14:41:25 -08002182 return UPB_COMPARE_INTEGERS(a, b);
Joshua Haberman9abf6e22021-01-13 12:16:25 -08002183}
2184
Joshua Habermanf41049a2022-01-21 14:41:25 -08002185static int _upb_mapsorter_cmpu32(const void* _a, const void* _b) {
Joshua Haberman9abf6e22021-01-13 12:16:25 -08002186 uint32_t a, b;
2187 _upb_mapsorter_getkeys(_a, _b, &a, &b, 4);
Joshua Habermanf41049a2022-01-21 14:41:25 -08002188 return UPB_COMPARE_INTEGERS(a, b);
Joshua Haberman9abf6e22021-01-13 12:16:25 -08002189}
2190
Joshua Habermanf41049a2022-01-21 14:41:25 -08002191static int _upb_mapsorter_cmpbool(const void* _a, const void* _b) {
Joshua Haberman9abf6e22021-01-13 12:16:25 -08002192 bool a, b;
2193 _upb_mapsorter_getkeys(_a, _b, &a, &b, 1);
Joshua Habermanf41049a2022-01-21 14:41:25 -08002194 return UPB_COMPARE_INTEGERS(a, b);
Joshua Haberman9abf6e22021-01-13 12:16:25 -08002195}
2196
Joshua Habermanf41049a2022-01-21 14:41:25 -08002197static int _upb_mapsorter_cmpstr(const void* _a, const void* _b) {
2198 upb_StringView a, b;
Joshua Haberman9abf6e22021-01-13 12:16:25 -08002199 _upb_mapsorter_getkeys(_a, _b, &a, &b, UPB_MAPTYPE_STRING);
2200 size_t common_size = UPB_MIN(a.size, b.size);
2201 int cmp = memcmp(a.data, b.data, common_size);
Joshua Habermanf41049a2022-01-21 14:41:25 -08002202 if (cmp) return -cmp;
2203 return UPB_COMPARE_INTEGERS(a.size, b.size);
Joshua Haberman9abf6e22021-01-13 12:16:25 -08002204}
2205
Joshua Habermanf41049a2022-01-21 14:41:25 -08002206#undef UPB_COMPARE_INTEGERS
2207
2208bool _upb_mapsorter_pushmap(_upb_mapsorter* s, upb_FieldType key_type,
2209 const upb_Map* map, _upb_sortedmap* sorted) {
2210 int map_size = _upb_Map_Size(map);
Joshua Haberman9abf6e22021-01-13 12:16:25 -08002211 sorted->start = s->size;
2212 sorted->pos = sorted->start;
2213 sorted->end = sorted->start + map_size;
2214
2215 /* Grow s->entries if necessary. */
2216 if (sorted->end > s->cap) {
Joshua Haberman7ecf43f2022-03-14 13:11:29 -07002217 s->cap = _upb_Log2CeilingSize(sorted->end);
Joshua Haberman9abf6e22021-01-13 12:16:25 -08002218 s->entries = realloc(s->entries, s->cap * sizeof(*s->entries));
2219 if (!s->entries) return false;
2220 }
2221
2222 s->size = sorted->end;
2223
2224 /* Copy non-empty entries from the table to s->entries. */
Joshua Habermanf41049a2022-01-21 14:41:25 -08002225 upb_tabent const** dst = &s->entries[sorted->start];
2226 const upb_tabent* src = map->table.t.entries;
2227 const upb_tabent* end = src + upb_table_size(&map->table.t);
Joshua Haberman9abf6e22021-01-13 12:16:25 -08002228 for (; src < end; src++) {
2229 if (!upb_tabent_isempty(src)) {
2230 *dst = src;
2231 dst++;
2232 }
2233 }
2234 UPB_ASSERT(dst == &s->entries[sorted->end]);
2235
2236 /* Sort entries according to the key type. */
2237
Joshua Habermanf41049a2022-01-21 14:41:25 -08002238 int (*compar)(const void*, const void*);
Joshua Haberman9abf6e22021-01-13 12:16:25 -08002239
2240 switch (key_type) {
Joshua Habermanf41049a2022-01-21 14:41:25 -08002241 case kUpb_FieldType_Int64:
2242 case kUpb_FieldType_SFixed64:
2243 case kUpb_FieldType_SInt64:
Joshua Haberman9abf6e22021-01-13 12:16:25 -08002244 compar = _upb_mapsorter_cmpi64;
2245 break;
Joshua Habermanf41049a2022-01-21 14:41:25 -08002246 case kUpb_FieldType_UInt64:
2247 case kUpb_FieldType_Fixed64:
Joshua Haberman9abf6e22021-01-13 12:16:25 -08002248 compar = _upb_mapsorter_cmpu64;
2249 break;
Joshua Habermanf41049a2022-01-21 14:41:25 -08002250 case kUpb_FieldType_Int32:
2251 case kUpb_FieldType_SInt32:
2252 case kUpb_FieldType_SFixed32:
2253 case kUpb_FieldType_Enum:
Joshua Haberman9abf6e22021-01-13 12:16:25 -08002254 compar = _upb_mapsorter_cmpi32;
2255 break;
Joshua Habermanf41049a2022-01-21 14:41:25 -08002256 case kUpb_FieldType_UInt32:
2257 case kUpb_FieldType_Fixed32:
Joshua Haberman9abf6e22021-01-13 12:16:25 -08002258 compar = _upb_mapsorter_cmpu32;
2259 break;
Joshua Habermanf41049a2022-01-21 14:41:25 -08002260 case kUpb_FieldType_Bool:
Joshua Haberman9abf6e22021-01-13 12:16:25 -08002261 compar = _upb_mapsorter_cmpbool;
2262 break;
Joshua Habermanf41049a2022-01-21 14:41:25 -08002263 case kUpb_FieldType_String:
2264 case kUpb_FieldType_Bytes:
Joshua Haberman9abf6e22021-01-13 12:16:25 -08002265 compar = _upb_mapsorter_cmpstr;
2266 break;
2267 default:
2268 UPB_UNREACHABLE();
2269 }
2270
2271 qsort(&s->entries[sorted->start], map_size, sizeof(*s->entries), compar);
2272 return true;
2273}
Joshua Habermandd69a482021-05-17 22:40:33 -07002274
Joshua Habermanf41049a2022-01-21 14:41:25 -08002275/** upb_ExtensionRegistry
2276 * ****************************************************************/
Joshua Haberman9d578a32021-08-02 15:32:01 -07002277
Joshua Habermanf41049a2022-01-21 14:41:25 -08002278struct upb_ExtensionRegistry {
2279 upb_Arena* arena;
2280 upb_strtable exts; /* Key is upb_MiniTable* concatenated with fieldnum. */
Joshua Haberman9d578a32021-08-02 15:32:01 -07002281};
2282
Joshua Habermanf41049a2022-01-21 14:41:25 -08002283#define EXTREG_KEY_SIZE (sizeof(upb_MiniTable*) + sizeof(uint32_t))
Joshua Haberman9d578a32021-08-02 15:32:01 -07002284
Joshua Habermanf41049a2022-01-21 14:41:25 -08002285static void extreg_key(char* buf, const upb_MiniTable* l, uint32_t fieldnum) {
Joshua Haberman9d578a32021-08-02 15:32:01 -07002286 memcpy(buf, &l, sizeof(l));
2287 memcpy(buf + sizeof(l), &fieldnum, sizeof(fieldnum));
2288}
2289
Joshua Habermanf41049a2022-01-21 14:41:25 -08002290upb_ExtensionRegistry* upb_ExtensionRegistry_New(upb_Arena* arena) {
2291 upb_ExtensionRegistry* r = upb_Arena_Malloc(arena, sizeof(*r));
Joshua Haberman9d578a32021-08-02 15:32:01 -07002292 if (!r) return NULL;
2293 r->arena = arena;
2294 if (!upb_strtable_init(&r->exts, 8, arena)) return NULL;
2295 return r;
2296}
2297
Joshua Habermanf41049a2022-01-21 14:41:25 -08002298bool _upb_extreg_add(upb_ExtensionRegistry* r,
2299 const upb_MiniTable_Extension** e, size_t count) {
Joshua Haberman9d578a32021-08-02 15:32:01 -07002300 char buf[EXTREG_KEY_SIZE];
Joshua Habermanf41049a2022-01-21 14:41:25 -08002301 const upb_MiniTable_Extension** start = e;
2302 const upb_MiniTable_Extension** end = UPB_PTRADD(e, count);
Joshua Haberman9d578a32021-08-02 15:32:01 -07002303 for (; e < end; e++) {
Joshua Habermanf41049a2022-01-21 14:41:25 -08002304 const upb_MiniTable_Extension* ext = *e;
2305 extreg_key(buf, ext->extendee, ext->field.number);
Joshua Haberman9d578a32021-08-02 15:32:01 -07002306 if (!upb_strtable_insert(&r->exts, buf, EXTREG_KEY_SIZE,
Joshua Habermanf41049a2022-01-21 14:41:25 -08002307 upb_value_constptr(ext), r->arena)) {
Joshua Haberman9d578a32021-08-02 15:32:01 -07002308 goto failure;
2309 }
2310 }
2311 return true;
2312
2313failure:
2314 /* Back out the entries previously added. */
2315 for (end = e, e = start; e < end; e++) {
Joshua Habermanf41049a2022-01-21 14:41:25 -08002316 const upb_MiniTable_Extension* ext = *e;
2317 extreg_key(buf, ext->extendee, ext->field.number);
2318 upb_strtable_remove2(&r->exts, buf, EXTREG_KEY_SIZE, NULL);
Joshua Haberman9d578a32021-08-02 15:32:01 -07002319 }
2320 return false;
2321}
2322
Joshua Habermanf41049a2022-01-21 14:41:25 -08002323const upb_MiniTable_Extension* _upb_extreg_get(const upb_ExtensionRegistry* r,
2324 const upb_MiniTable* l,
2325 uint32_t num) {
Joshua Haberman9d578a32021-08-02 15:32:01 -07002326 char buf[EXTREG_KEY_SIZE];
2327 upb_value v;
2328 extreg_key(buf, l, num);
2329 if (upb_strtable_lookup2(&r->exts, buf, EXTREG_KEY_SIZE, &v)) {
2330 return upb_value_getconstptr(v);
2331 } else {
2332 return NULL;
2333 }
2334}
2335
Joshua Habermandd69a482021-05-17 22:40:33 -07002336/** upb/table.c ************************************************************/
Joshua Haberman9abf6e22021-01-13 12:16:25 -08002337/*
Joshua Habermandd69a482021-05-17 22:40:33 -07002338 * upb_table Implementation
2339 *
2340 * Implementation is heavily inspired by Lua's ltable.c.
2341 */
Joshua Haberman9abf6e22021-01-13 12:16:25 -08002342
2343#include <string.h>
2344
Joshua Haberman9abf6e22021-01-13 12:16:25 -08002345
2346/* Must be last. */
2347
Joshua Habermanf41049a2022-01-21 14:41:25 -08002348#define UPB_MAXARRSIZE 16 /* 64k. */
Joshua Haberman9abf6e22021-01-13 12:16:25 -08002349
2350/* From Chromium. */
2351#define ARRAY_SIZE(x) \
Joshua Habermanf41049a2022-01-21 14:41:25 -08002352 ((sizeof(x) / sizeof(0 [x])) / ((size_t)(!(sizeof(x) % sizeof(0 [x])))))
Joshua Haberman9abf6e22021-01-13 12:16:25 -08002353
2354static const double MAX_LOAD = 0.85;
2355
2356/* The minimum utilization of the array part of a mixed hash/array table. This
2357 * is a speed/memory-usage tradeoff (though it's not straightforward because of
2358 * cache effects). The lower this is, the more memory we'll use. */
2359static const double MIN_DENSITY = 0.1;
2360
Joshua Habermandd69a482021-05-17 22:40:33 -07002361static bool is_pow2(uint64_t v) { return v == 0 || (v & (v - 1)) == 0; }
Joshua Haberman9abf6e22021-01-13 12:16:25 -08002362
Joshua Habermandd69a482021-05-17 22:40:33 -07002363static upb_value _upb_value_val(uint64_t val) {
2364 upb_value ret;
2365 _upb_value_setval(&ret, val);
2366 return ret;
2367}
2368
2369static int log2ceil(uint64_t v) {
Joshua Haberman9abf6e22021-01-13 12:16:25 -08002370 int ret = 0;
2371 bool pow2 = is_pow2(v);
2372 while (v >>= 1) ret++;
Joshua Habermanf41049a2022-01-21 14:41:25 -08002373 ret = pow2 ? ret : ret + 1; /* Ceiling. */
Joshua Haberman9abf6e22021-01-13 12:16:25 -08002374 return UPB_MIN(UPB_MAXARRSIZE, ret);
2375}
2376
Joshua Habermanf41049a2022-01-21 14:41:25 -08002377char* upb_strdup2(const char* s, size_t len, upb_Arena* a) {
Joshua Haberman9abf6e22021-01-13 12:16:25 -08002378 size_t n;
Joshua Habermanf41049a2022-01-21 14:41:25 -08002379 char* p;
Joshua Haberman9abf6e22021-01-13 12:16:25 -08002380
2381 /* Prevent overflow errors. */
2382 if (len == SIZE_MAX) return NULL;
2383 /* Always null-terminate, even if binary data; but don't rely on the input to
2384 * have a null-terminating byte since it may be a raw binary buffer. */
2385 n = len + 1;
Joshua Habermanf41049a2022-01-21 14:41:25 -08002386 p = upb_Arena_Malloc(a, n);
Joshua Haberman9abf6e22021-01-13 12:16:25 -08002387 if (p) {
2388 memcpy(p, s, len);
2389 p[len] = 0;
2390 }
2391 return p;
2392}
2393
2394/* A type to represent the lookup key of either a strtable or an inttable. */
2395typedef union {
2396 uintptr_t num;
2397 struct {
Joshua Habermanf41049a2022-01-21 14:41:25 -08002398 const char* str;
Joshua Haberman9abf6e22021-01-13 12:16:25 -08002399 size_t len;
2400 } str;
2401} lookupkey_t;
2402
Joshua Habermanf41049a2022-01-21 14:41:25 -08002403static lookupkey_t strkey2(const char* str, size_t len) {
Joshua Haberman9abf6e22021-01-13 12:16:25 -08002404 lookupkey_t k;
2405 k.str.str = str;
2406 k.str.len = len;
2407 return k;
2408}
2409
2410static lookupkey_t intkey(uintptr_t key) {
2411 lookupkey_t k;
2412 k.num = key;
2413 return k;
2414}
2415
2416typedef uint32_t hashfunc_t(upb_tabkey key);
2417typedef bool eqlfunc_t(upb_tabkey k1, lookupkey_t k2);
2418
2419/* Base table (shared code) ***************************************************/
2420
Joshua Habermanf41049a2022-01-21 14:41:25 -08002421static uint32_t upb_inthash(uintptr_t key) { return (uint32_t)key; }
Joshua Haberman9abf6e22021-01-13 12:16:25 -08002422
Joshua Habermanf41049a2022-01-21 14:41:25 -08002423static const upb_tabent* upb_getentry(const upb_table* t, uint32_t hash) {
Joshua Habermandd69a482021-05-17 22:40:33 -07002424 return t->entries + (hash & t->mask);
2425}
2426
Joshua Habermanf41049a2022-01-21 14:41:25 -08002427static bool upb_arrhas(upb_tabval key) { return key.val != (uint64_t)-1; }
Joshua Habermandd69a482021-05-17 22:40:33 -07002428
Joshua Habermanf41049a2022-01-21 14:41:25 -08002429static bool isfull(upb_table* t) { return t->count == t->max_count; }
Joshua Habermandd69a482021-05-17 22:40:33 -07002430
Joshua Habermanf41049a2022-01-21 14:41:25 -08002431static bool init(upb_table* t, uint8_t size_lg2, upb_Arena* a) {
Joshua Haberman9abf6e22021-01-13 12:16:25 -08002432 size_t bytes;
2433
2434 t->count = 0;
2435 t->size_lg2 = size_lg2;
2436 t->mask = upb_table_size(t) ? upb_table_size(t) - 1 : 0;
2437 t->max_count = upb_table_size(t) * MAX_LOAD;
2438 bytes = upb_table_size(t) * sizeof(upb_tabent);
2439 if (bytes > 0) {
Joshua Habermanf41049a2022-01-21 14:41:25 -08002440 t->entries = upb_Arena_Malloc(a, bytes);
Joshua Haberman9abf6e22021-01-13 12:16:25 -08002441 if (!t->entries) return false;
Joshua Habermandd69a482021-05-17 22:40:33 -07002442 memset(t->entries, 0, bytes);
Joshua Haberman9abf6e22021-01-13 12:16:25 -08002443 } else {
2444 t->entries = NULL;
2445 }
2446 return true;
2447}
2448
Joshua Habermanf41049a2022-01-21 14:41:25 -08002449static upb_tabent* emptyent(upb_table* t, upb_tabent* e) {
2450 upb_tabent* begin = t->entries;
2451 upb_tabent* end = begin + upb_table_size(t);
Joshua Haberman9abf6e22021-01-13 12:16:25 -08002452 for (e = e + 1; e < end; e++) {
2453 if (upb_tabent_isempty(e)) return e;
2454 }
2455 for (e = begin; e < end; e++) {
2456 if (upb_tabent_isempty(e)) return e;
2457 }
2458 UPB_ASSERT(false);
2459 return NULL;
2460}
2461
Joshua Habermanf41049a2022-01-21 14:41:25 -08002462static upb_tabent* getentry_mutable(upb_table* t, uint32_t hash) {
Joshua Haberman9abf6e22021-01-13 12:16:25 -08002463 return (upb_tabent*)upb_getentry(t, hash);
2464}
2465
Joshua Habermanf41049a2022-01-21 14:41:25 -08002466static const upb_tabent* findentry(const upb_table* t, lookupkey_t key,
2467 uint32_t hash, eqlfunc_t* eql) {
2468 const upb_tabent* e;
Joshua Haberman9abf6e22021-01-13 12:16:25 -08002469
2470 if (t->size_lg2 == 0) return NULL;
2471 e = upb_getentry(t, hash);
2472 if (upb_tabent_isempty(e)) return NULL;
2473 while (1) {
2474 if (eql(e->key, key)) return e;
2475 if ((e = e->next) == NULL) return NULL;
2476 }
2477}
2478
Joshua Habermanf41049a2022-01-21 14:41:25 -08002479static upb_tabent* findentry_mutable(upb_table* t, lookupkey_t key,
2480 uint32_t hash, eqlfunc_t* eql) {
Joshua Haberman9abf6e22021-01-13 12:16:25 -08002481 return (upb_tabent*)findentry(t, key, hash, eql);
2482}
2483
Joshua Habermanf41049a2022-01-21 14:41:25 -08002484static bool lookup(const upb_table* t, lookupkey_t key, upb_value* v,
2485 uint32_t hash, eqlfunc_t* eql) {
2486 const upb_tabent* e = findentry(t, key, hash, eql);
Joshua Haberman9abf6e22021-01-13 12:16:25 -08002487 if (e) {
2488 if (v) {
2489 _upb_value_setval(v, e->val.val);
2490 }
2491 return true;
2492 } else {
2493 return false;
2494 }
2495}
2496
2497/* The given key must not already exist in the table. */
Joshua Habermanf41049a2022-01-21 14:41:25 -08002498static void insert(upb_table* t, lookupkey_t key, upb_tabkey tabkey,
2499 upb_value val, uint32_t hash, hashfunc_t* hashfunc,
2500 eqlfunc_t* eql) {
2501 upb_tabent* mainpos_e;
2502 upb_tabent* our_e;
Joshua Haberman9abf6e22021-01-13 12:16:25 -08002503
2504 UPB_ASSERT(findentry(t, key, hash, eql) == NULL);
2505
2506 t->count++;
2507 mainpos_e = getentry_mutable(t, hash);
2508 our_e = mainpos_e;
2509
2510 if (upb_tabent_isempty(mainpos_e)) {
2511 /* Our main position is empty; use it. */
2512 our_e->next = NULL;
2513 } else {
2514 /* Collision. */
Joshua Habermanf41049a2022-01-21 14:41:25 -08002515 upb_tabent* new_e = emptyent(t, mainpos_e);
Joshua Haberman9abf6e22021-01-13 12:16:25 -08002516 /* Head of collider's chain. */
Joshua Habermanf41049a2022-01-21 14:41:25 -08002517 upb_tabent* chain = getentry_mutable(t, hashfunc(mainpos_e->key));
Joshua Haberman9abf6e22021-01-13 12:16:25 -08002518 if (chain == mainpos_e) {
2519 /* Existing ent is in its main position (it has the same hash as us, and
Joshua Habermanf41049a2022-01-21 14:41:25 -08002520 * is the head of our chain). Insert to new ent and append to this chain.
2521 */
Joshua Haberman9abf6e22021-01-13 12:16:25 -08002522 new_e->next = mainpos_e->next;
2523 mainpos_e->next = new_e;
2524 our_e = new_e;
2525 } else {
2526 /* Existing ent is not in its main position (it is a node in some other
2527 * chain). This implies that no existing ent in the table has our hash.
2528 * Evict it (updating its chain) and use its ent for head of our chain. */
Joshua Habermanf41049a2022-01-21 14:41:25 -08002529 *new_e = *mainpos_e; /* copies next. */
Joshua Haberman9abf6e22021-01-13 12:16:25 -08002530 while (chain->next != mainpos_e) {
2531 chain = (upb_tabent*)chain->next;
2532 UPB_ASSERT(chain);
2533 }
2534 chain->next = new_e;
2535 our_e = mainpos_e;
2536 our_e->next = NULL;
2537 }
2538 }
2539 our_e->key = tabkey;
2540 our_e->val.val = val.val;
2541 UPB_ASSERT(findentry(t, key, hash, eql) == our_e);
2542}
2543
Joshua Habermanf41049a2022-01-21 14:41:25 -08002544static bool rm(upb_table* t, lookupkey_t key, upb_value* val,
2545 upb_tabkey* removed, uint32_t hash, eqlfunc_t* eql) {
2546 upb_tabent* chain = getentry_mutable(t, hash);
Joshua Haberman9abf6e22021-01-13 12:16:25 -08002547 if (upb_tabent_isempty(chain)) return false;
2548 if (eql(chain->key, key)) {
2549 /* Element to remove is at the head of its chain. */
2550 t->count--;
2551 if (val) _upb_value_setval(val, chain->val.val);
2552 if (removed) *removed = chain->key;
2553 if (chain->next) {
Joshua Habermanf41049a2022-01-21 14:41:25 -08002554 upb_tabent* move = (upb_tabent*)chain->next;
Joshua Haberman9abf6e22021-01-13 12:16:25 -08002555 *chain = *move;
Joshua Habermanf41049a2022-01-21 14:41:25 -08002556 move->key = 0; /* Make the slot empty. */
Joshua Haberman9abf6e22021-01-13 12:16:25 -08002557 } else {
Joshua Habermanf41049a2022-01-21 14:41:25 -08002558 chain->key = 0; /* Make the slot empty. */
Joshua Haberman9abf6e22021-01-13 12:16:25 -08002559 }
2560 return true;
2561 } else {
2562 /* Element to remove is either in a non-head position or not in the
2563 * table. */
2564 while (chain->next && !eql(chain->next->key, key)) {
2565 chain = (upb_tabent*)chain->next;
2566 }
2567 if (chain->next) {
2568 /* Found element to remove. */
Joshua Habermanf41049a2022-01-21 14:41:25 -08002569 upb_tabent* rm = (upb_tabent*)chain->next;
Joshua Haberman9abf6e22021-01-13 12:16:25 -08002570 t->count--;
2571 if (val) _upb_value_setval(val, chain->next->val.val);
2572 if (removed) *removed = rm->key;
Joshua Habermanf41049a2022-01-21 14:41:25 -08002573 rm->key = 0; /* Make the slot empty. */
Joshua Haberman9abf6e22021-01-13 12:16:25 -08002574 chain->next = rm->next;
2575 return true;
2576 } else {
2577 /* Element to remove is not in the table. */
2578 return false;
2579 }
2580 }
2581}
2582
Joshua Habermanf41049a2022-01-21 14:41:25 -08002583static size_t next(const upb_table* t, size_t i) {
Joshua Haberman9abf6e22021-01-13 12:16:25 -08002584 do {
Joshua Habermanf41049a2022-01-21 14:41:25 -08002585 if (++i >= upb_table_size(t)) return SIZE_MAX - 1; /* Distinct from -1. */
2586 } while (upb_tabent_isempty(&t->entries[i]));
Joshua Haberman9abf6e22021-01-13 12:16:25 -08002587
2588 return i;
2589}
2590
Joshua Habermanf41049a2022-01-21 14:41:25 -08002591static size_t begin(const upb_table* t) { return next(t, -1); }
Joshua Haberman9abf6e22021-01-13 12:16:25 -08002592
2593/* upb_strtable ***************************************************************/
2594
Joshua Habermanf41049a2022-01-21 14:41:25 -08002595/* A simple "subclass" of upb_table that only adds a hash function for strings.
2596 */
Joshua Haberman9abf6e22021-01-13 12:16:25 -08002597
Joshua Habermanf41049a2022-01-21 14:41:25 -08002598static upb_tabkey strcopy(lookupkey_t k2, upb_Arena* a) {
2599 uint32_t len = (uint32_t)k2.str.len;
2600 char* str = upb_Arena_Malloc(a, k2.str.len + sizeof(uint32_t) + 1);
Joshua Haberman9abf6e22021-01-13 12:16:25 -08002601 if (str == NULL) return 0;
2602 memcpy(str, &len, sizeof(uint32_t));
2603 if (k2.str.len) memcpy(str + sizeof(uint32_t), k2.str.str, k2.str.len);
2604 str[sizeof(uint32_t) + k2.str.len] = '\0';
2605 return (uintptr_t)str;
2606}
2607
Joshua Haberman9d578a32021-08-02 15:32:01 -07002608/* Adapted from ABSL's wyhash. */
2609
Joshua Habermanf41049a2022-01-21 14:41:25 -08002610static uint64_t UnalignedLoad64(const void* p) {
Joshua Haberman9d578a32021-08-02 15:32:01 -07002611 uint64_t val;
2612 memcpy(&val, p, 8);
2613 return val;
2614}
2615
Joshua Habermanf41049a2022-01-21 14:41:25 -08002616static uint32_t UnalignedLoad32(const void* p) {
Joshua Haberman9d578a32021-08-02 15:32:01 -07002617 uint32_t val;
2618 memcpy(&val, p, 4);
2619 return val;
2620}
2621
2622#if defined(_MSC_VER) && defined(_M_X64)
2623#include <intrin.h>
2624#endif
2625
2626/* Computes a * b, returning the low 64 bits of the result and storing the high
2627 * 64 bits in |*high|. */
2628static uint64_t upb_umul128(uint64_t v0, uint64_t v1, uint64_t* out_high) {
2629#ifdef __SIZEOF_INT128__
2630 __uint128_t p = v0;
2631 p *= v1;
2632 *out_high = (uint64_t)(p >> 64);
2633 return (uint64_t)p;
2634#elif defined(_MSC_VER) && defined(_M_X64)
2635 return _umul128(v0, v1, out_high);
2636#else
2637 uint64_t a32 = v0 >> 32;
2638 uint64_t a00 = v0 & 0xffffffff;
2639 uint64_t b32 = v1 >> 32;
2640 uint64_t b00 = v1 & 0xffffffff;
2641 uint64_t high = a32 * b32;
2642 uint64_t low = a00 * b00;
2643 uint64_t mid1 = a32 * b00;
2644 uint64_t mid2 = a00 * b32;
2645 low += (mid1 << 32) + (mid2 << 32);
2646 // Omit carry bit, for mixing we do not care about exact numerical precision.
2647 high += (mid1 >> 32) + (mid2 >> 32);
2648 *out_high = high;
2649 return low;
2650#endif
2651}
2652
2653static uint64_t WyhashMix(uint64_t v0, uint64_t v1) {
2654 uint64_t high;
2655 uint64_t low = upb_umul128(v0, v1, &high);
2656 return low ^ high;
2657}
2658
Joshua Haberman7ecf43f2022-03-14 13:11:29 -07002659static uint64_t Wyhash(const void* data, size_t len, uint64_t seed,
Joshua Habermanf41049a2022-01-21 14:41:25 -08002660 const uint64_t salt[]) {
Joshua Haberman9d578a32021-08-02 15:32:01 -07002661 const uint8_t* ptr = (const uint8_t*)data;
2662 uint64_t starting_length = (uint64_t)len;
2663 uint64_t current_state = seed ^ salt[0];
2664
2665 if (len > 64) {
2666 // If we have more than 64 bytes, we're going to handle chunks of 64
2667 // bytes at a time. We're going to build up two separate hash states
2668 // which we will then hash together.
2669 uint64_t duplicated_state = current_state;
2670
2671 do {
2672 uint64_t a = UnalignedLoad64(ptr);
2673 uint64_t b = UnalignedLoad64(ptr + 8);
2674 uint64_t c = UnalignedLoad64(ptr + 16);
2675 uint64_t d = UnalignedLoad64(ptr + 24);
2676 uint64_t e = UnalignedLoad64(ptr + 32);
2677 uint64_t f = UnalignedLoad64(ptr + 40);
2678 uint64_t g = UnalignedLoad64(ptr + 48);
2679 uint64_t h = UnalignedLoad64(ptr + 56);
2680
2681 uint64_t cs0 = WyhashMix(a ^ salt[1], b ^ current_state);
2682 uint64_t cs1 = WyhashMix(c ^ salt[2], d ^ current_state);
2683 current_state = (cs0 ^ cs1);
2684
2685 uint64_t ds0 = WyhashMix(e ^ salt[3], f ^ duplicated_state);
2686 uint64_t ds1 = WyhashMix(g ^ salt[4], h ^ duplicated_state);
2687 duplicated_state = (ds0 ^ ds1);
2688
2689 ptr += 64;
2690 len -= 64;
2691 } while (len > 64);
2692
2693 current_state = current_state ^ duplicated_state;
2694 }
2695
2696 // We now have a data `ptr` with at most 64 bytes and the current state
2697 // of the hashing state machine stored in current_state.
2698 while (len > 16) {
2699 uint64_t a = UnalignedLoad64(ptr);
2700 uint64_t b = UnalignedLoad64(ptr + 8);
2701
2702 current_state = WyhashMix(a ^ salt[1], b ^ current_state);
2703
2704 ptr += 16;
2705 len -= 16;
2706 }
2707
2708 // We now have a data `ptr` with at most 16 bytes.
2709 uint64_t a = 0;
2710 uint64_t b = 0;
2711 if (len > 8) {
2712 // When we have at least 9 and at most 16 bytes, set A to the first 64
2713 // bits of the input and B to the last 64 bits of the input. Yes, they will
2714 // overlap in the middle if we are working with less than the full 16
2715 // bytes.
2716 a = UnalignedLoad64(ptr);
2717 b = UnalignedLoad64(ptr + len - 8);
2718 } else if (len > 3) {
2719 // If we have at least 4 and at most 8 bytes, set A to the first 32
2720 // bits and B to the last 32 bits.
2721 a = UnalignedLoad32(ptr);
2722 b = UnalignedLoad32(ptr + len - 4);
2723 } else if (len > 0) {
2724 // If we have at least 1 and at most 3 bytes, read all of the provided
2725 // bits into A, with some adjustments.
2726 a = ((ptr[0] << 16) | (ptr[len >> 1] << 8) | ptr[len - 1]);
2727 b = 0;
2728 } else {
2729 a = 0;
2730 b = 0;
2731 }
2732
2733 uint64_t w = WyhashMix(a ^ salt[1], b ^ current_state);
2734 uint64_t z = salt[1] ^ starting_length;
2735 return WyhashMix(w, z);
2736}
2737
2738const uint64_t kWyhashSalt[5] = {
2739 0x243F6A8885A308D3ULL, 0x13198A2E03707344ULL, 0xA4093822299F31D0ULL,
2740 0x082EFA98EC4E6C89ULL, 0x452821E638D01377ULL,
2741};
2742
Joshua Haberman7ecf43f2022-03-14 13:11:29 -07002743uint32_t _upb_Hash(const void* p, size_t n, uint64_t seed) {
2744 return Wyhash(p, n, seed, kWyhashSalt);
2745}
2746
2747static uint32_t _upb_Hash_NoSeed(const char* p, size_t n) {
2748 return _upb_Hash(p, n, 0);
Joshua Haberman9abf6e22021-01-13 12:16:25 -08002749}
2750
2751static uint32_t strhash(upb_tabkey key) {
2752 uint32_t len;
Joshua Habermanf41049a2022-01-21 14:41:25 -08002753 char* str = upb_tabstr(key, &len);
Joshua Haberman7ecf43f2022-03-14 13:11:29 -07002754 return _upb_Hash_NoSeed(str, len);
Joshua Haberman9abf6e22021-01-13 12:16:25 -08002755}
2756
2757static bool streql(upb_tabkey k1, lookupkey_t k2) {
2758 uint32_t len;
Joshua Habermanf41049a2022-01-21 14:41:25 -08002759 char* str = upb_tabstr(k1, &len);
Joshua Haberman9abf6e22021-01-13 12:16:25 -08002760 return len == k2.str.len && (len == 0 || memcmp(str, k2.str.str, len) == 0);
2761}
2762
Joshua Habermanf41049a2022-01-21 14:41:25 -08002763bool upb_strtable_init(upb_strtable* t, size_t expected_size, upb_Arena* a) {
2764 // Multiply by approximate reciprocal of MAX_LOAD (0.85), with pow2
2765 // denominator.
Joshua Haberman9abf6e22021-01-13 12:16:25 -08002766 size_t need_entries = (expected_size + 1) * 1204 / 1024;
2767 UPB_ASSERT(need_entries >= expected_size * 0.85);
Joshua Habermanf41049a2022-01-21 14:41:25 -08002768 int size_lg2 = _upb_Log2Ceiling(need_entries);
Joshua Haberman9abf6e22021-01-13 12:16:25 -08002769 return init(&t->t, size_lg2, a);
2770}
2771
Joshua Habermanf41049a2022-01-21 14:41:25 -08002772void upb_strtable_clear(upb_strtable* t) {
Joshua Haberman9abf6e22021-01-13 12:16:25 -08002773 size_t bytes = upb_table_size(&t->t) * sizeof(upb_tabent);
2774 t->t.count = 0;
2775 memset((char*)t->t.entries, 0, bytes);
2776}
2777
Joshua Habermanf41049a2022-01-21 14:41:25 -08002778bool upb_strtable_resize(upb_strtable* t, size_t size_lg2, upb_Arena* a) {
Joshua Haberman9abf6e22021-01-13 12:16:25 -08002779 upb_strtable new_table;
2780 upb_strtable_iter i;
2781
Joshua Habermanf41049a2022-01-21 14:41:25 -08002782 if (!init(&new_table.t, size_lg2, a)) return false;
Joshua Haberman9abf6e22021-01-13 12:16:25 -08002783 upb_strtable_begin(&i, t);
Joshua Habermanf41049a2022-01-21 14:41:25 -08002784 for (; !upb_strtable_done(&i); upb_strtable_next(&i)) {
2785 upb_StringView key = upb_strtable_iter_key(&i);
Joshua Habermandd69a482021-05-17 22:40:33 -07002786 upb_strtable_insert(&new_table, key.data, key.size,
2787 upb_strtable_iter_value(&i), a);
Joshua Haberman9abf6e22021-01-13 12:16:25 -08002788 }
Joshua Haberman9abf6e22021-01-13 12:16:25 -08002789 *t = new_table;
2790 return true;
2791}
2792
Joshua Habermanf41049a2022-01-21 14:41:25 -08002793bool upb_strtable_insert(upb_strtable* t, const char* k, size_t len,
2794 upb_value v, upb_Arena* a) {
Joshua Haberman9abf6e22021-01-13 12:16:25 -08002795 lookupkey_t key;
2796 upb_tabkey tabkey;
2797 uint32_t hash;
2798
2799 if (isfull(&t->t)) {
2800 /* Need to resize. New table of double the size, add old elements to it. */
2801 if (!upb_strtable_resize(t, t->t.size_lg2 + 1, a)) {
2802 return false;
2803 }
2804 }
2805
2806 key = strkey2(k, len);
2807 tabkey = strcopy(key, a);
2808 if (tabkey == 0) return false;
2809
Joshua Haberman7ecf43f2022-03-14 13:11:29 -07002810 hash = _upb_Hash_NoSeed(key.str.str, key.str.len);
Joshua Haberman9abf6e22021-01-13 12:16:25 -08002811 insert(&t->t, key, tabkey, v, hash, &strhash, &streql);
2812 return true;
2813}
2814
Joshua Habermanf41049a2022-01-21 14:41:25 -08002815bool upb_strtable_lookup2(const upb_strtable* t, const char* key, size_t len,
2816 upb_value* v) {
Joshua Haberman7ecf43f2022-03-14 13:11:29 -07002817 uint32_t hash = _upb_Hash_NoSeed(key, len);
Joshua Haberman9abf6e22021-01-13 12:16:25 -08002818 return lookup(&t->t, strkey2(key, len), v, hash, &streql);
2819}
2820
Joshua Habermanf41049a2022-01-21 14:41:25 -08002821bool upb_strtable_remove2(upb_strtable* t, const char* key, size_t len,
2822 upb_value* val) {
Joshua Haberman7ecf43f2022-03-14 13:11:29 -07002823 uint32_t hash = _upb_Hash_NoSeed(key, len);
Joshua Haberman9abf6e22021-01-13 12:16:25 -08002824 upb_tabkey tabkey;
Joshua Habermandd69a482021-05-17 22:40:33 -07002825 return rm(&t->t, strkey2(key, len), val, &tabkey, hash, &streql);
Joshua Haberman9abf6e22021-01-13 12:16:25 -08002826}
2827
2828/* Iteration */
2829
Joshua Habermanf41049a2022-01-21 14:41:25 -08002830void upb_strtable_begin(upb_strtable_iter* i, const upb_strtable* t) {
Joshua Haberman9abf6e22021-01-13 12:16:25 -08002831 i->t = t;
2832 i->index = begin(&t->t);
2833}
2834
Joshua Habermanf41049a2022-01-21 14:41:25 -08002835void upb_strtable_next(upb_strtable_iter* i) {
Joshua Haberman9abf6e22021-01-13 12:16:25 -08002836 i->index = next(&i->t->t, i->index);
2837}
2838
Joshua Habermanf41049a2022-01-21 14:41:25 -08002839bool upb_strtable_done(const upb_strtable_iter* i) {
Joshua Haberman9abf6e22021-01-13 12:16:25 -08002840 if (!i->t) return true;
2841 return i->index >= upb_table_size(&i->t->t) ||
2842 upb_tabent_isempty(str_tabent(i));
2843}
2844
Joshua Habermanf41049a2022-01-21 14:41:25 -08002845upb_StringView upb_strtable_iter_key(const upb_strtable_iter* i) {
2846 upb_StringView key;
Joshua Haberman9abf6e22021-01-13 12:16:25 -08002847 uint32_t len;
2848 UPB_ASSERT(!upb_strtable_done(i));
2849 key.data = upb_tabstr(str_tabent(i)->key, &len);
2850 key.size = len;
2851 return key;
2852}
2853
Joshua Habermanf41049a2022-01-21 14:41:25 -08002854upb_value upb_strtable_iter_value(const upb_strtable_iter* i) {
Joshua Haberman9abf6e22021-01-13 12:16:25 -08002855 UPB_ASSERT(!upb_strtable_done(i));
2856 return _upb_value_val(str_tabent(i)->val.val);
2857}
2858
Joshua Habermanf41049a2022-01-21 14:41:25 -08002859void upb_strtable_iter_setdone(upb_strtable_iter* i) {
Joshua Haberman9abf6e22021-01-13 12:16:25 -08002860 i->t = NULL;
2861 i->index = SIZE_MAX;
2862}
2863
Joshua Habermanf41049a2022-01-21 14:41:25 -08002864bool upb_strtable_iter_isequal(const upb_strtable_iter* i1,
2865 const upb_strtable_iter* i2) {
2866 if (upb_strtable_done(i1) && upb_strtable_done(i2)) return true;
Joshua Haberman9abf6e22021-01-13 12:16:25 -08002867 return i1->t == i2->t && i1->index == i2->index;
2868}
2869
Joshua Haberman9abf6e22021-01-13 12:16:25 -08002870/* upb_inttable ***************************************************************/
2871
2872/* For inttables we use a hybrid structure where small keys are kept in an
2873 * array and large keys are put in the hash table. */
2874
2875static uint32_t inthash(upb_tabkey key) { return upb_inthash(key); }
2876
Joshua Habermanf41049a2022-01-21 14:41:25 -08002877static bool inteql(upb_tabkey k1, lookupkey_t k2) { return k1 == k2.num; }
Joshua Haberman9abf6e22021-01-13 12:16:25 -08002878
Joshua Habermanf41049a2022-01-21 14:41:25 -08002879static upb_tabval* mutable_array(upb_inttable* t) {
Joshua Haberman9abf6e22021-01-13 12:16:25 -08002880 return (upb_tabval*)t->array;
2881}
2882
Joshua Habermanf41049a2022-01-21 14:41:25 -08002883static upb_tabval* inttable_val(upb_inttable* t, uintptr_t key) {
Joshua Haberman9abf6e22021-01-13 12:16:25 -08002884 if (key < t->array_size) {
2885 return upb_arrhas(t->array[key]) ? &(mutable_array(t)[key]) : NULL;
2886 } else {
Joshua Habermanf41049a2022-01-21 14:41:25 -08002887 upb_tabent* e =
Joshua Haberman9abf6e22021-01-13 12:16:25 -08002888 findentry_mutable(&t->t, intkey(key), upb_inthash(key), &inteql);
2889 return e ? &e->val : NULL;
2890 }
2891}
2892
Joshua Habermanf41049a2022-01-21 14:41:25 -08002893static const upb_tabval* inttable_val_const(const upb_inttable* t,
Joshua Haberman9abf6e22021-01-13 12:16:25 -08002894 uintptr_t key) {
2895 return inttable_val((upb_inttable*)t, key);
2896}
2897
Joshua Habermanf41049a2022-01-21 14:41:25 -08002898size_t upb_inttable_count(const upb_inttable* t) {
Joshua Haberman9abf6e22021-01-13 12:16:25 -08002899 return t->t.count + t->array_count;
2900}
2901
Joshua Habermanf41049a2022-01-21 14:41:25 -08002902static void check(upb_inttable* t) {
Joshua Haberman9abf6e22021-01-13 12:16:25 -08002903 UPB_UNUSED(t);
2904#if defined(UPB_DEBUG_TABLE) && !defined(NDEBUG)
2905 {
2906 /* This check is very expensive (makes inserts/deletes O(N)). */
2907 size_t count = 0;
2908 upb_inttable_iter i;
2909 upb_inttable_begin(&i, t);
Joshua Habermanf41049a2022-01-21 14:41:25 -08002910 for (; !upb_inttable_done(&i); upb_inttable_next(&i), count++) {
Joshua Haberman9abf6e22021-01-13 12:16:25 -08002911 UPB_ASSERT(upb_inttable_lookup(t, upb_inttable_iter_key(&i), NULL));
2912 }
2913 UPB_ASSERT(count == upb_inttable_count(t));
2914 }
2915#endif
2916}
2917
Joshua Habermanf41049a2022-01-21 14:41:25 -08002918bool upb_inttable_sizedinit(upb_inttable* t, size_t asize, int hsize_lg2,
2919 upb_Arena* a) {
Joshua Haberman9abf6e22021-01-13 12:16:25 -08002920 size_t array_bytes;
2921
2922 if (!init(&t->t, hsize_lg2, a)) return false;
2923 /* Always make the array part at least 1 long, so that we know key 0
2924 * won't be in the hash part, which simplifies things. */
2925 t->array_size = UPB_MAX(1, asize);
2926 t->array_count = 0;
2927 array_bytes = t->array_size * sizeof(upb_value);
Joshua Habermanf41049a2022-01-21 14:41:25 -08002928 t->array = upb_Arena_Malloc(a, array_bytes);
Joshua Haberman9abf6e22021-01-13 12:16:25 -08002929 if (!t->array) {
Joshua Haberman9abf6e22021-01-13 12:16:25 -08002930 return false;
2931 }
2932 memset(mutable_array(t), 0xff, array_bytes);
2933 check(t);
2934 return true;
2935}
2936
Joshua Habermanf41049a2022-01-21 14:41:25 -08002937bool upb_inttable_init(upb_inttable* t, upb_Arena* a) {
Joshua Haberman9abf6e22021-01-13 12:16:25 -08002938 return upb_inttable_sizedinit(t, 0, 4, a);
2939}
2940
Joshua Habermanf41049a2022-01-21 14:41:25 -08002941bool upb_inttable_insert(upb_inttable* t, uintptr_t key, upb_value val,
2942 upb_Arena* a) {
Joshua Haberman9abf6e22021-01-13 12:16:25 -08002943 upb_tabval tabval;
2944 tabval.val = val.val;
Joshua Habermanf41049a2022-01-21 14:41:25 -08002945 UPB_ASSERT(
2946 upb_arrhas(tabval)); /* This will reject (uint64_t)-1. Fix this. */
Joshua Haberman9abf6e22021-01-13 12:16:25 -08002947
2948 if (key < t->array_size) {
2949 UPB_ASSERT(!upb_arrhas(t->array[key]));
2950 t->array_count++;
2951 mutable_array(t)[key].val = val.val;
2952 } else {
2953 if (isfull(&t->t)) {
2954 /* Need to resize the hash part, but we re-use the array part. */
2955 size_t i;
2956 upb_table new_table;
2957
2958 if (!init(&new_table, t->t.size_lg2 + 1, a)) {
2959 return false;
2960 }
2961
2962 for (i = begin(&t->t); i < upb_table_size(&t->t); i = next(&t->t, i)) {
Joshua Habermanf41049a2022-01-21 14:41:25 -08002963 const upb_tabent* e = &t->t.entries[i];
Joshua Haberman9abf6e22021-01-13 12:16:25 -08002964 uint32_t hash;
2965 upb_value v;
2966
2967 _upb_value_setval(&v, e->val.val);
2968 hash = upb_inthash(e->key);
2969 insert(&new_table, intkey(e->key), e->key, v, hash, &inthash, &inteql);
2970 }
2971
2972 UPB_ASSERT(t->t.count == new_table.count);
2973
Joshua Haberman9abf6e22021-01-13 12:16:25 -08002974 t->t = new_table;
2975 }
2976 insert(&t->t, intkey(key), key, val, upb_inthash(key), &inthash, &inteql);
2977 }
2978 check(t);
2979 return true;
2980}
2981
Joshua Habermanf41049a2022-01-21 14:41:25 -08002982bool upb_inttable_lookup(const upb_inttable* t, uintptr_t key, upb_value* v) {
2983 const upb_tabval* table_v = inttable_val_const(t, key);
Joshua Haberman9abf6e22021-01-13 12:16:25 -08002984 if (!table_v) return false;
2985 if (v) _upb_value_setval(v, table_v->val);
2986 return true;
2987}
2988
Joshua Habermanf41049a2022-01-21 14:41:25 -08002989bool upb_inttable_replace(upb_inttable* t, uintptr_t key, upb_value val) {
2990 upb_tabval* table_v = inttable_val(t, key);
Joshua Haberman9abf6e22021-01-13 12:16:25 -08002991 if (!table_v) return false;
2992 table_v->val = val.val;
2993 return true;
2994}
2995
Joshua Habermanf41049a2022-01-21 14:41:25 -08002996bool upb_inttable_remove(upb_inttable* t, uintptr_t key, upb_value* val) {
Joshua Haberman9abf6e22021-01-13 12:16:25 -08002997 bool success;
2998 if (key < t->array_size) {
2999 if (upb_arrhas(t->array[key])) {
3000 upb_tabval empty = UPB_TABVALUE_EMPTY_INIT;
3001 t->array_count--;
3002 if (val) {
3003 _upb_value_setval(val, t->array[key].val);
3004 }
3005 mutable_array(t)[key] = empty;
3006 success = true;
3007 } else {
3008 success = false;
3009 }
3010 } else {
3011 success = rm(&t->t, intkey(key), val, NULL, upb_inthash(key), &inteql);
3012 }
3013 check(t);
3014 return success;
3015}
3016
Joshua Habermanf41049a2022-01-21 14:41:25 -08003017void upb_inttable_compact(upb_inttable* t, upb_Arena* a) {
Joshua Haberman9abf6e22021-01-13 12:16:25 -08003018 /* A power-of-two histogram of the table keys. */
3019 size_t counts[UPB_MAXARRSIZE + 1] = {0};
3020
3021 /* The max key in each bucket. */
3022 uintptr_t max[UPB_MAXARRSIZE + 1] = {0};
3023
3024 upb_inttable_iter i;
3025 size_t arr_count;
3026 int size_lg2;
3027 upb_inttable new_t;
3028
3029 upb_inttable_begin(&i, t);
3030 for (; !upb_inttable_done(&i); upb_inttable_next(&i)) {
3031 uintptr_t key = upb_inttable_iter_key(&i);
3032 int bucket = log2ceil(key);
3033 max[bucket] = UPB_MAX(max[bucket], key);
3034 counts[bucket]++;
3035 }
3036
3037 /* Find the largest power of two that satisfies the MIN_DENSITY
3038 * definition (while actually having some keys). */
3039 arr_count = upb_inttable_count(t);
3040
3041 for (size_lg2 = ARRAY_SIZE(counts) - 1; size_lg2 > 0; size_lg2--) {
3042 if (counts[size_lg2] == 0) {
3043 /* We can halve again without losing any entries. */
3044 continue;
3045 } else if (arr_count >= (1 << size_lg2) * MIN_DENSITY) {
3046 break;
3047 }
3048
3049 arr_count -= counts[size_lg2];
3050 }
3051
3052 UPB_ASSERT(arr_count <= upb_inttable_count(t));
3053
3054 {
3055 /* Insert all elements into new, perfectly-sized table. */
Joshua Habermanf41049a2022-01-21 14:41:25 -08003056 size_t arr_size = max[size_lg2] + 1; /* +1 so arr[max] will fit. */
Joshua Haberman9abf6e22021-01-13 12:16:25 -08003057 size_t hash_count = upb_inttable_count(t) - arr_count;
3058 size_t hash_size = hash_count ? (hash_count / MAX_LOAD) + 1 : 0;
3059 int hashsize_lg2 = log2ceil(hash_size);
3060
3061 upb_inttable_sizedinit(&new_t, arr_size, hashsize_lg2, a);
3062 upb_inttable_begin(&i, t);
3063 for (; !upb_inttable_done(&i); upb_inttable_next(&i)) {
3064 uintptr_t k = upb_inttable_iter_key(&i);
Joshua Habermandd69a482021-05-17 22:40:33 -07003065 upb_inttable_insert(&new_t, k, upb_inttable_iter_value(&i), a);
Joshua Haberman9abf6e22021-01-13 12:16:25 -08003066 }
3067 UPB_ASSERT(new_t.array_size == arr_size);
3068 UPB_ASSERT(new_t.t.size_lg2 == hashsize_lg2);
3069 }
Joshua Haberman9abf6e22021-01-13 12:16:25 -08003070 *t = new_t;
3071}
3072
3073/* Iteration. */
3074
Joshua Habermanf41049a2022-01-21 14:41:25 -08003075static const upb_tabent* int_tabent(const upb_inttable_iter* i) {
Joshua Haberman9abf6e22021-01-13 12:16:25 -08003076 UPB_ASSERT(!i->array_part);
3077 return &i->t->t.entries[i->index];
3078}
3079
Joshua Habermanf41049a2022-01-21 14:41:25 -08003080static upb_tabval int_arrent(const upb_inttable_iter* i) {
Joshua Haberman9abf6e22021-01-13 12:16:25 -08003081 UPB_ASSERT(i->array_part);
3082 return i->t->array[i->index];
3083}
3084
Joshua Habermanf41049a2022-01-21 14:41:25 -08003085void upb_inttable_begin(upb_inttable_iter* i, const upb_inttable* t) {
Joshua Haberman9abf6e22021-01-13 12:16:25 -08003086 i->t = t;
3087 i->index = -1;
3088 i->array_part = true;
3089 upb_inttable_next(i);
3090}
3091
Joshua Habermanf41049a2022-01-21 14:41:25 -08003092void upb_inttable_next(upb_inttable_iter* iter) {
3093 const upb_inttable* t = iter->t;
Joshua Haberman9abf6e22021-01-13 12:16:25 -08003094 if (iter->array_part) {
3095 while (++iter->index < t->array_size) {
3096 if (upb_arrhas(int_arrent(iter))) {
3097 return;
3098 }
3099 }
3100 iter->array_part = false;
3101 iter->index = begin(&t->t);
3102 } else {
3103 iter->index = next(&t->t, iter->index);
3104 }
3105}
3106
Joshua Habermanf41049a2022-01-21 14:41:25 -08003107bool upb_inttable_next2(const upb_inttable* t, uintptr_t* key, upb_value* val,
3108 intptr_t* iter) {
3109 intptr_t i = *iter;
3110 if (i < t->array_size) {
3111 while (++i < t->array_size) {
3112 upb_tabval ent = t->array[i];
3113 if (upb_arrhas(ent)) {
3114 *key = i;
3115 *val = _upb_value_val(ent.val);
3116 *iter = i;
3117 return true;
3118 }
3119 }
3120 }
3121
3122 size_t tab_idx = next(&t->t, i == -1 ? -1 : i - t->array_size);
3123 if (tab_idx < upb_table_size(&t->t)) {
3124 upb_tabent* ent = &t->t.entries[tab_idx];
3125 *key = ent->key;
3126 *val = _upb_value_val(ent->val.val);
3127 *iter = tab_idx + t->array_size;
3128 return true;
3129 }
3130
3131 return false;
3132}
3133
3134void upb_inttable_removeiter(upb_inttable* t, intptr_t* iter) {
3135 intptr_t i = *iter;
3136 if (i < t->array_size) {
3137 t->array_count--;
3138 mutable_array(t)[i].val = -1;
3139 } else {
3140 upb_tabent* ent = &t->t.entries[i - t->array_size];
3141 upb_tabent* prev = NULL;
3142
3143 // Linear search, not great.
3144 upb_tabent* end = &t->t.entries[upb_table_size(&t->t)];
3145 for (upb_tabent* e = t->t.entries; e != end; e++) {
3146 if (e->next == ent) {
3147 prev = e;
3148 break;
3149 }
3150 }
3151
3152 if (prev) {
3153 prev->next = ent->next;
3154 }
3155
3156 t->t.count--;
3157 ent->key = 0;
3158 ent->next = NULL;
3159 }
3160}
3161
3162bool upb_strtable_next2(const upb_strtable* t, upb_StringView* key,
3163 upb_value* val, intptr_t* iter) {
3164 size_t tab_idx = next(&t->t, *iter);
3165 if (tab_idx < upb_table_size(&t->t)) {
3166 upb_tabent* ent = &t->t.entries[tab_idx];
3167 uint32_t len;
3168 key->data = upb_tabstr(ent->key, &len);
3169 key->size = len;
3170 *val = _upb_value_val(ent->val.val);
3171 *iter = tab_idx;
3172 return true;
3173 }
3174
3175 return false;
3176}
3177
3178void upb_strtable_removeiter(upb_strtable* t, intptr_t* iter) {
3179 intptr_t i = *iter;
3180 upb_tabent* ent = &t->t.entries[i];
3181 upb_tabent* prev = NULL;
3182
3183 // Linear search, not great.
3184 upb_tabent* end = &t->t.entries[upb_table_size(&t->t)];
3185 for (upb_tabent* e = t->t.entries; e != end; e++) {
3186 if (e->next == ent) {
3187 prev = e;
3188 break;
3189 }
3190 }
3191
3192 if (prev) {
3193 prev->next = ent->next;
3194 }
3195
3196 t->t.count--;
3197 ent->key = 0;
3198 ent->next = NULL;
3199}
3200
3201bool upb_inttable_done(const upb_inttable_iter* i) {
Joshua Haberman9abf6e22021-01-13 12:16:25 -08003202 if (!i->t) return true;
3203 if (i->array_part) {
Joshua Habermanf41049a2022-01-21 14:41:25 -08003204 return i->index >= i->t->array_size || !upb_arrhas(int_arrent(i));
Joshua Haberman9abf6e22021-01-13 12:16:25 -08003205 } else {
3206 return i->index >= upb_table_size(&i->t->t) ||
3207 upb_tabent_isempty(int_tabent(i));
3208 }
3209}
3210
Joshua Habermanf41049a2022-01-21 14:41:25 -08003211uintptr_t upb_inttable_iter_key(const upb_inttable_iter* i) {
Joshua Haberman9abf6e22021-01-13 12:16:25 -08003212 UPB_ASSERT(!upb_inttable_done(i));
3213 return i->array_part ? i->index : int_tabent(i)->key;
3214}
3215
Joshua Habermanf41049a2022-01-21 14:41:25 -08003216upb_value upb_inttable_iter_value(const upb_inttable_iter* i) {
Joshua Haberman9abf6e22021-01-13 12:16:25 -08003217 UPB_ASSERT(!upb_inttable_done(i));
Joshua Habermanf41049a2022-01-21 14:41:25 -08003218 return _upb_value_val(i->array_part ? i->t->array[i->index].val
3219 : int_tabent(i)->val.val);
Joshua Haberman9abf6e22021-01-13 12:16:25 -08003220}
3221
Joshua Habermanf41049a2022-01-21 14:41:25 -08003222void upb_inttable_iter_setdone(upb_inttable_iter* i) {
Joshua Haberman9abf6e22021-01-13 12:16:25 -08003223 i->t = NULL;
3224 i->index = SIZE_MAX;
3225 i->array_part = false;
3226}
3227
Joshua Habermanf41049a2022-01-21 14:41:25 -08003228bool upb_inttable_iter_isequal(const upb_inttable_iter* i1,
3229 const upb_inttable_iter* i2) {
3230 if (upb_inttable_done(i1) && upb_inttable_done(i2)) return true;
Joshua Haberman9abf6e22021-01-13 12:16:25 -08003231 return i1->t == i2->t && i1->index == i2->index &&
3232 i1->array_part == i2->array_part;
3233}
3234
Joshua Habermandd69a482021-05-17 22:40:33 -07003235/** upb/upb.c ************************************************************/
Joshua Haberman9abf6e22021-01-13 12:16:25 -08003236#include <errno.h>
Joshua Habermanf41049a2022-01-21 14:41:25 -08003237#include <float.h>
Joshua Haberman9abf6e22021-01-13 12:16:25 -08003238#include <stdarg.h>
3239#include <stddef.h>
3240#include <stdint.h>
3241#include <stdio.h>
3242#include <stdlib.h>
3243#include <string.h>
3244
3245
Joshua Habermanf41049a2022-01-21 14:41:25 -08003246// Must be last.
Joshua Haberman9abf6e22021-01-13 12:16:25 -08003247
Joshua Habermanf41049a2022-01-21 14:41:25 -08003248/* upb_Status *****************************************************************/
3249
3250void upb_Status_Clear(upb_Status* status) {
Joshua Haberman9abf6e22021-01-13 12:16:25 -08003251 if (!status) return;
3252 status->ok = true;
3253 status->msg[0] = '\0';
3254}
3255
Joshua Habermanf41049a2022-01-21 14:41:25 -08003256bool upb_Status_IsOk(const upb_Status* status) { return status->ok; }
Joshua Haberman9abf6e22021-01-13 12:16:25 -08003257
Joshua Habermanf41049a2022-01-21 14:41:25 -08003258const char* upb_Status_ErrorMessage(const upb_Status* status) {
3259 return status->msg;
Joshua Haberman9abf6e22021-01-13 12:16:25 -08003260}
3261
Joshua Habermanf41049a2022-01-21 14:41:25 -08003262void upb_Status_SetErrorMessage(upb_Status* status, const char* msg) {
3263 if (!status) return;
3264 status->ok = false;
3265 strncpy(status->msg, msg, _kUpb_Status_MaxMessage - 1);
3266 status->msg[_kUpb_Status_MaxMessage - 1] = '\0';
3267}
3268
3269void upb_Status_SetErrorFormat(upb_Status* status, const char* fmt, ...) {
Joshua Haberman9abf6e22021-01-13 12:16:25 -08003270 va_list args;
3271 va_start(args, fmt);
Joshua Habermanf41049a2022-01-21 14:41:25 -08003272 upb_Status_VSetErrorFormat(status, fmt, args);
Joshua Haberman9abf6e22021-01-13 12:16:25 -08003273 va_end(args);
3274}
3275
Joshua Habermanf41049a2022-01-21 14:41:25 -08003276void upb_Status_VSetErrorFormat(upb_Status* status, const char* fmt,
3277 va_list args) {
Joshua Haberman9abf6e22021-01-13 12:16:25 -08003278 if (!status) return;
3279 status->ok = false;
3280 vsnprintf(status->msg, sizeof(status->msg), fmt, args);
Joshua Habermanf41049a2022-01-21 14:41:25 -08003281 status->msg[_kUpb_Status_MaxMessage - 1] = '\0';
Joshua Haberman9abf6e22021-01-13 12:16:25 -08003282}
3283
Joshua Habermanf41049a2022-01-21 14:41:25 -08003284void upb_Status_VAppendErrorFormat(upb_Status* status, const char* fmt,
3285 va_list args) {
Joshua Haberman9abf6e22021-01-13 12:16:25 -08003286 size_t len;
3287 if (!status) return;
3288 status->ok = false;
3289 len = strlen(status->msg);
3290 vsnprintf(status->msg + len, sizeof(status->msg) - len, fmt, args);
Joshua Habermanf41049a2022-01-21 14:41:25 -08003291 status->msg[_kUpb_Status_MaxMessage - 1] = '\0';
Joshua Haberman9abf6e22021-01-13 12:16:25 -08003292}
3293
3294/* upb_alloc ******************************************************************/
3295
Joshua Habermanf41049a2022-01-21 14:41:25 -08003296static void* upb_global_allocfunc(upb_alloc* alloc, void* ptr, size_t oldsize,
Joshua Haberman9abf6e22021-01-13 12:16:25 -08003297 size_t size) {
3298 UPB_UNUSED(alloc);
3299 UPB_UNUSED(oldsize);
3300 if (size == 0) {
3301 free(ptr);
3302 return NULL;
3303 } else {
3304 return realloc(ptr, size);
3305 }
3306}
3307
Joshua Habermanf41049a2022-01-21 14:41:25 -08003308static uint32_t* upb_cleanup_pointer(uintptr_t cleanup_metadata) {
3309 return (uint32_t*)(cleanup_metadata & ~0x1);
Joshua Habermandd69a482021-05-17 22:40:33 -07003310}
3311
3312static bool upb_cleanup_has_initial_block(uintptr_t cleanup_metadata) {
3313 return cleanup_metadata & 0x1;
3314}
3315
Joshua Habermanf41049a2022-01-21 14:41:25 -08003316static uintptr_t upb_cleanup_metadata(uint32_t* cleanup,
Joshua Habermandd69a482021-05-17 22:40:33 -07003317 bool has_initial_block) {
3318 return (uintptr_t)cleanup | has_initial_block;
3319}
3320
Joshua Haberman9abf6e22021-01-13 12:16:25 -08003321upb_alloc upb_alloc_global = {&upb_global_allocfunc};
3322
Joshua Habermanf41049a2022-01-21 14:41:25 -08003323/* upb_Arena ******************************************************************/
Joshua Haberman9abf6e22021-01-13 12:16:25 -08003324
3325/* Be conservative and choose 16 in case anyone is using SSE. */
3326
3327struct mem_block {
Joshua Habermanf41049a2022-01-21 14:41:25 -08003328 struct mem_block* next;
Joshua Haberman9abf6e22021-01-13 12:16:25 -08003329 uint32_t size;
3330 uint32_t cleanups;
3331 /* Data follows. */
3332};
3333
3334typedef struct cleanup_ent {
Joshua Habermanf41049a2022-01-21 14:41:25 -08003335 upb_CleanupFunc* cleanup;
3336 void* ud;
Joshua Haberman9abf6e22021-01-13 12:16:25 -08003337} cleanup_ent;
3338
3339static const size_t memblock_reserve = UPB_ALIGN_UP(sizeof(mem_block), 16);
3340
Joshua Habermanf41049a2022-01-21 14:41:25 -08003341static upb_Arena* arena_findroot(upb_Arena* a) {
Joshua Haberman9abf6e22021-01-13 12:16:25 -08003342 /* Path splitting keeps time complexity down, see:
3343 * https://en.wikipedia.org/wiki/Disjoint-set_data_structure */
3344 while (a->parent != a) {
Joshua Habermanf41049a2022-01-21 14:41:25 -08003345 upb_Arena* next = a->parent;
Joshua Haberman9abf6e22021-01-13 12:16:25 -08003346 a->parent = next->parent;
3347 a = next;
3348 }
3349 return a;
3350}
3351
Joshua Habermanf41049a2022-01-21 14:41:25 -08003352static void upb_Arena_addblock(upb_Arena* a, upb_Arena* root, void* ptr,
Joshua Haberman9abf6e22021-01-13 12:16:25 -08003353 size_t size) {
Joshua Habermanf41049a2022-01-21 14:41:25 -08003354 mem_block* block = ptr;
Joshua Haberman9abf6e22021-01-13 12:16:25 -08003355
3356 /* The block is for arena |a|, but should appear in the freelist of |root|. */
3357 block->next = root->freelist;
3358 block->size = (uint32_t)size;
3359 block->cleanups = 0;
3360 root->freelist = block;
3361 a->last_size = block->size;
3362 if (!root->freelist_tail) root->freelist_tail = block;
3363
3364 a->head.ptr = UPB_PTR_AT(block, memblock_reserve, char);
3365 a->head.end = UPB_PTR_AT(block, size, char);
Joshua Habermandd69a482021-05-17 22:40:33 -07003366 a->cleanup_metadata = upb_cleanup_metadata(
3367 &block->cleanups, upb_cleanup_has_initial_block(a->cleanup_metadata));
Joshua Haberman9abf6e22021-01-13 12:16:25 -08003368
3369 UPB_POISON_MEMORY_REGION(a->head.ptr, a->head.end - a->head.ptr);
3370}
3371
Joshua Habermanf41049a2022-01-21 14:41:25 -08003372static bool upb_Arena_Allocblock(upb_Arena* a, size_t size) {
3373 upb_Arena* root = arena_findroot(a);
Joshua Haberman9abf6e22021-01-13 12:16:25 -08003374 size_t block_size = UPB_MAX(size, a->last_size * 2) + memblock_reserve;
Joshua Habermanf41049a2022-01-21 14:41:25 -08003375 mem_block* block = upb_malloc(root->block_alloc, block_size);
Joshua Haberman9abf6e22021-01-13 12:16:25 -08003376
3377 if (!block) return false;
Joshua Habermanf41049a2022-01-21 14:41:25 -08003378 upb_Arena_addblock(a, root, block, block_size);
Joshua Haberman9abf6e22021-01-13 12:16:25 -08003379 return true;
3380}
3381
Joshua Habermanf41049a2022-01-21 14:41:25 -08003382void* _upb_Arena_SlowMalloc(upb_Arena* a, size_t size) {
3383 if (!upb_Arena_Allocblock(a, size)) return NULL; /* Out of memory. */
3384 UPB_ASSERT(_upb_ArenaHas(a) >= size);
3385 return upb_Arena_Malloc(a, size);
Joshua Haberman9abf6e22021-01-13 12:16:25 -08003386}
3387
Joshua Habermanf41049a2022-01-21 14:41:25 -08003388static void* upb_Arena_doalloc(upb_alloc* alloc, void* ptr, size_t oldsize,
Joshua Haberman9abf6e22021-01-13 12:16:25 -08003389 size_t size) {
Joshua Habermanf41049a2022-01-21 14:41:25 -08003390 upb_Arena* a = (upb_Arena*)alloc; /* upb_alloc is initial member. */
3391 return upb_Arena_Realloc(a, ptr, oldsize, size);
Joshua Haberman9abf6e22021-01-13 12:16:25 -08003392}
3393
3394/* Public Arena API ***********************************************************/
3395
Joshua Habermanf41049a2022-01-21 14:41:25 -08003396upb_Arena* arena_initslow(void* mem, size_t n, upb_alloc* alloc) {
3397 const size_t first_block_overhead = sizeof(upb_Arena) + memblock_reserve;
3398 upb_Arena* a;
Joshua Haberman9abf6e22021-01-13 12:16:25 -08003399
3400 /* We need to malloc the initial block. */
3401 n = first_block_overhead + 256;
3402 if (!alloc || !(mem = upb_malloc(alloc, n))) {
3403 return NULL;
3404 }
3405
Joshua Habermanf41049a2022-01-21 14:41:25 -08003406 a = UPB_PTR_AT(mem, n - sizeof(*a), upb_Arena);
Joshua Haberman9abf6e22021-01-13 12:16:25 -08003407 n -= sizeof(*a);
3408
Joshua Habermanf41049a2022-01-21 14:41:25 -08003409 a->head.alloc.func = &upb_Arena_doalloc;
Joshua Haberman9abf6e22021-01-13 12:16:25 -08003410 a->block_alloc = alloc;
3411 a->parent = a;
3412 a->refcount = 1;
3413 a->freelist = NULL;
3414 a->freelist_tail = NULL;
Joshua Habermandd69a482021-05-17 22:40:33 -07003415 a->cleanup_metadata = upb_cleanup_metadata(NULL, false);
Joshua Haberman9abf6e22021-01-13 12:16:25 -08003416
Joshua Habermanf41049a2022-01-21 14:41:25 -08003417 upb_Arena_addblock(a, a, mem, n);
Joshua Haberman9abf6e22021-01-13 12:16:25 -08003418
3419 return a;
3420}
3421
Joshua Habermanf41049a2022-01-21 14:41:25 -08003422upb_Arena* upb_Arena_Init(void* mem, size_t n, upb_alloc* alloc) {
3423 upb_Arena* a;
3424
3425 if (n) {
3426 /* Align initial pointer up so that we return properly-aligned pointers. */
3427 void* aligned = (void*)UPB_ALIGN_UP((uintptr_t)mem, 16);
3428 size_t delta = (uintptr_t)aligned - (uintptr_t)mem;
3429 n = delta <= n ? n - delta : 0;
3430 mem = aligned;
3431 }
Joshua Haberman9abf6e22021-01-13 12:16:25 -08003432
3433 /* Round block size down to alignof(*a) since we will allocate the arena
3434 * itself at the end. */
Joshua Habermanf41049a2022-01-21 14:41:25 -08003435 n = UPB_ALIGN_DOWN(n, UPB_ALIGN_OF(upb_Arena));
Joshua Haberman9abf6e22021-01-13 12:16:25 -08003436
Joshua Habermanf41049a2022-01-21 14:41:25 -08003437 if (UPB_UNLIKELY(n < sizeof(upb_Arena))) {
Joshua Haberman9abf6e22021-01-13 12:16:25 -08003438 return arena_initslow(mem, n, alloc);
3439 }
3440
Joshua Habermanf41049a2022-01-21 14:41:25 -08003441 a = UPB_PTR_AT(mem, n - sizeof(*a), upb_Arena);
Joshua Haberman9abf6e22021-01-13 12:16:25 -08003442
Joshua Habermanf41049a2022-01-21 14:41:25 -08003443 a->head.alloc.func = &upb_Arena_doalloc;
Joshua Haberman9abf6e22021-01-13 12:16:25 -08003444 a->block_alloc = alloc;
3445 a->parent = a;
3446 a->refcount = 1;
3447 a->last_size = UPB_MAX(128, n);
3448 a->head.ptr = mem;
3449 a->head.end = UPB_PTR_AT(mem, n - sizeof(*a), char);
3450 a->freelist = NULL;
Joshua Habermandd69a482021-05-17 22:40:33 -07003451 a->cleanup_metadata = upb_cleanup_metadata(NULL, true);
Joshua Haberman9abf6e22021-01-13 12:16:25 -08003452
3453 return a;
3454}
3455
Joshua Habermanf41049a2022-01-21 14:41:25 -08003456static void arena_dofree(upb_Arena* a) {
3457 mem_block* block = a->freelist;
Joshua Haberman9abf6e22021-01-13 12:16:25 -08003458 UPB_ASSERT(a->parent == a);
3459 UPB_ASSERT(a->refcount == 0);
3460
3461 while (block) {
3462 /* Load first since we are deleting block. */
Joshua Habermanf41049a2022-01-21 14:41:25 -08003463 mem_block* next = block->next;
Joshua Haberman9abf6e22021-01-13 12:16:25 -08003464
3465 if (block->cleanups > 0) {
Joshua Habermanf41049a2022-01-21 14:41:25 -08003466 cleanup_ent* end = UPB_PTR_AT(block, block->size, void);
3467 cleanup_ent* ptr = end - block->cleanups;
Joshua Haberman9abf6e22021-01-13 12:16:25 -08003468
3469 for (; ptr < end; ptr++) {
3470 ptr->cleanup(ptr->ud);
3471 }
3472 }
3473
3474 upb_free(a->block_alloc, block);
3475 block = next;
3476 }
3477}
3478
Joshua Habermanf41049a2022-01-21 14:41:25 -08003479void upb_Arena_Free(upb_Arena* a) {
Joshua Haberman9abf6e22021-01-13 12:16:25 -08003480 a = arena_findroot(a);
3481 if (--a->refcount == 0) arena_dofree(a);
3482}
3483
Joshua Habermanf41049a2022-01-21 14:41:25 -08003484bool upb_Arena_AddCleanup(upb_Arena* a, void* ud, upb_CleanupFunc* func) {
3485 cleanup_ent* ent;
Joshua Habermandd69a482021-05-17 22:40:33 -07003486 uint32_t* cleanups = upb_cleanup_pointer(a->cleanup_metadata);
Joshua Haberman9abf6e22021-01-13 12:16:25 -08003487
Joshua Habermanf41049a2022-01-21 14:41:25 -08003488 if (!cleanups || _upb_ArenaHas(a) < sizeof(cleanup_ent)) {
3489 if (!upb_Arena_Allocblock(a, 128)) return false; /* Out of memory. */
3490 UPB_ASSERT(_upb_ArenaHas(a) >= sizeof(cleanup_ent));
Joshua Habermandd69a482021-05-17 22:40:33 -07003491 cleanups = upb_cleanup_pointer(a->cleanup_metadata);
Joshua Haberman9abf6e22021-01-13 12:16:25 -08003492 }
3493
3494 a->head.end -= sizeof(cleanup_ent);
3495 ent = (cleanup_ent*)a->head.end;
Joshua Habermandd69a482021-05-17 22:40:33 -07003496 (*cleanups)++;
Joshua Haberman9abf6e22021-01-13 12:16:25 -08003497 UPB_UNPOISON_MEMORY_REGION(ent, sizeof(cleanup_ent));
3498
3499 ent->cleanup = func;
3500 ent->ud = ud;
3501
3502 return true;
3503}
3504
Joshua Habermanf41049a2022-01-21 14:41:25 -08003505bool upb_Arena_Fuse(upb_Arena* a1, upb_Arena* a2) {
3506 upb_Arena* r1 = arena_findroot(a1);
3507 upb_Arena* r2 = arena_findroot(a2);
Joshua Haberman9abf6e22021-01-13 12:16:25 -08003508
Joshua Habermanf41049a2022-01-21 14:41:25 -08003509 if (r1 == r2) return true; /* Already fused. */
Joshua Habermandd69a482021-05-17 22:40:33 -07003510
3511 /* Do not fuse initial blocks since we cannot lifetime extend them. */
3512 if (upb_cleanup_has_initial_block(r1->cleanup_metadata)) return false;
3513 if (upb_cleanup_has_initial_block(r2->cleanup_metadata)) return false;
3514
3515 /* Only allow fuse with a common allocator */
3516 if (r1->block_alloc != r2->block_alloc) return false;
Joshua Haberman9abf6e22021-01-13 12:16:25 -08003517
3518 /* We want to join the smaller tree to the larger tree.
3519 * So swap first if they are backwards. */
3520 if (r1->refcount < r2->refcount) {
Joshua Habermanf41049a2022-01-21 14:41:25 -08003521 upb_Arena* tmp = r1;
Joshua Haberman9abf6e22021-01-13 12:16:25 -08003522 r1 = r2;
3523 r2 = tmp;
3524 }
3525
3526 /* r1 takes over r2's freelist and refcount. */
3527 r1->refcount += r2->refcount;
3528 if (r2->freelist_tail) {
3529 UPB_ASSERT(r2->freelist_tail->next == NULL);
3530 r2->freelist_tail->next = r1->freelist;
3531 r1->freelist = r2->freelist;
3532 }
3533 r2->parent = r1;
Joshua Habermandd69a482021-05-17 22:40:33 -07003534 return true;
Joshua Haberman9abf6e22021-01-13 12:16:25 -08003535}
Joshua Habermandd69a482021-05-17 22:40:33 -07003536
Joshua Habermanf41049a2022-01-21 14:41:25 -08003537/* Miscellaneous utilities ****************************************************/
3538
3539static void upb_FixLocale(char* p) {
3540 /* printf() is dependent on locales; sadly there is no easy and portable way
3541 * to avoid this. This little post-processing step will translate 1,2 -> 1.2
3542 * since JSON needs the latter. Arguably a hack, but it is simple and the
3543 * alternatives are far more complicated, platform-dependent, and/or larger
3544 * in code size. */
3545 for (; *p; p++) {
3546 if (*p == ',') *p = '.';
3547 }
3548}
3549
Joshua Habermanf41049a2022-01-21 14:41:25 -08003550void _upb_EncodeRoundTripDouble(double val, char* buf, size_t size) {
3551 assert(size >= kUpb_RoundTripBufferSize);
3552 snprintf(buf, size, "%.*g", DBL_DIG, val);
3553 if (strtod(buf, NULL) != val) {
3554 snprintf(buf, size, "%.*g", DBL_DIG + 2, val);
3555 assert(strtod(buf, NULL) == val);
3556 }
3557 upb_FixLocale(buf);
3558}
3559
3560void _upb_EncodeRoundTripFloat(float val, char* buf, size_t size) {
3561 assert(size >= kUpb_RoundTripBufferSize);
3562 snprintf(buf, size, "%.*g", FLT_DIG, val);
3563 if (strtof(buf, NULL) != val) {
3564 snprintf(buf, size, "%.*g", FLT_DIG + 3, val);
3565 assert(strtof(buf, NULL) == val);
3566 }
3567 upb_FixLocale(buf);
3568}
3569
Joshua Habermandd69a482021-05-17 22:40:33 -07003570/** upb/decode_fast.c ************************************************************/
3571// Fast decoder: ~3x the speed of decode.c, but requires x86-64/ARM64.
Joshua Haberman9abf6e22021-01-13 12:16:25 -08003572// Also the table size grows by 2x.
3573//
Joshua Habermandd69a482021-05-17 22:40:33 -07003574// Could potentially be ported to other 64-bit archs that pass at least six
3575// arguments in registers and have 8 unused high bits in pointers.
Joshua Haberman9abf6e22021-01-13 12:16:25 -08003576//
3577// The overall design is to create specialized functions for every possible
3578// field type (eg. oneof boolean field with a 1 byte tag) and then dispatch
3579// to the specialized function as quickly as possible.
3580
3581
3582
3583/* Must be last. */
3584
3585#if UPB_FASTTABLE
3586
3587// The standard set of arguments passed to each parsing function.
3588// Thanks to x86-64 calling conventions, these will stay in registers.
Joshua Habermanf41049a2022-01-21 14:41:25 -08003589#define UPB_PARSE_PARAMS \
3590 upb_Decoder *d, const char *ptr, upb_Message *msg, intptr_t table, \
Joshua Haberman9abf6e22021-01-13 12:16:25 -08003591 uint64_t hasbits, uint64_t data
3592
3593#define UPB_PARSE_ARGS d, ptr, msg, table, hasbits, data
3594
Joshua Habermanf41049a2022-01-21 14:41:25 -08003595#define RETURN_GENERIC(m) \
3596 /* Uncomment either of these for debugging purposes. */ \
3597 /* fprintf(stderr, m); */ \
3598 /*__builtin_trap(); */ \
Joshua Haberman9abf6e22021-01-13 12:16:25 -08003599 return fastdecode_generic(d, ptr, msg, table, hasbits, 0);
3600
3601typedef enum {
Joshua Habermanf41049a2022-01-21 14:41:25 -08003602 CARD_s = 0, /* Singular (optional, non-repeated) */
3603 CARD_o = 1, /* Oneof */
3604 CARD_r = 2, /* Repeated */
3605 CARD_p = 3 /* Packed Repeated */
Joshua Haberman9abf6e22021-01-13 12:16:25 -08003606} upb_card;
3607
3608UPB_NOINLINE
Joshua Habermanf41049a2022-01-21 14:41:25 -08003609static const char* fastdecode_isdonefallback(UPB_PARSE_PARAMS) {
Joshua Habermandd69a482021-05-17 22:40:33 -07003610 int overrun = data;
Joshua Habermanf41049a2022-01-21 14:41:25 -08003611 int status;
3612 ptr = decode_isdonefallback_inl(d, ptr, overrun, &status);
Joshua Haberman9abf6e22021-01-13 12:16:25 -08003613 if (ptr == NULL) {
Joshua Habermanf41049a2022-01-21 14:41:25 -08003614 return fastdecode_err(d, status);
Joshua Haberman9abf6e22021-01-13 12:16:25 -08003615 }
Joshua Habermandd69a482021-05-17 22:40:33 -07003616 data = fastdecode_loadtag(ptr);
3617 UPB_MUSTTAIL return fastdecode_tagdispatch(UPB_PARSE_ARGS);
Joshua Haberman9abf6e22021-01-13 12:16:25 -08003618}
3619
3620UPB_FORCEINLINE
Joshua Habermanf41049a2022-01-21 14:41:25 -08003621static const char* fastdecode_dispatch(UPB_PARSE_PARAMS) {
Joshua Haberman9abf6e22021-01-13 12:16:25 -08003622 if (UPB_UNLIKELY(ptr >= d->limit_ptr)) {
3623 int overrun = ptr - d->end;
3624 if (UPB_LIKELY(overrun == d->limit)) {
3625 // Parse is finished.
3626 *(uint32_t*)msg |= hasbits; // Sync hasbits.
Joshua Habermanf41049a2022-01-21 14:41:25 -08003627 const upb_MiniTable* l = decode_totablep(table);
3628 return UPB_UNLIKELY(l->required_count)
3629 ? decode_checkrequired(d, ptr, msg, l)
3630 : ptr;
Joshua Haberman9abf6e22021-01-13 12:16:25 -08003631 } else {
Joshua Habermandd69a482021-05-17 22:40:33 -07003632 data = overrun;
3633 UPB_MUSTTAIL return fastdecode_isdonefallback(UPB_PARSE_ARGS);
Joshua Haberman9abf6e22021-01-13 12:16:25 -08003634 }
3635 }
3636
3637 // Read two bytes of tag data (for a one-byte tag, the high byte is junk).
Joshua Habermandd69a482021-05-17 22:40:33 -07003638 data = fastdecode_loadtag(ptr);
3639 UPB_MUSTTAIL return fastdecode_tagdispatch(UPB_PARSE_ARGS);
Joshua Haberman9abf6e22021-01-13 12:16:25 -08003640}
3641
3642UPB_FORCEINLINE
Joshua Habermandd69a482021-05-17 22:40:33 -07003643static bool fastdecode_checktag(uint16_t data, int tagbytes) {
Joshua Haberman9abf6e22021-01-13 12:16:25 -08003644 if (tagbytes == 1) {
3645 return (data & 0xff) == 0;
3646 } else {
Joshua Habermandd69a482021-05-17 22:40:33 -07003647 return data == 0;
Joshua Haberman9abf6e22021-01-13 12:16:25 -08003648 }
3649}
3650
3651UPB_FORCEINLINE
Joshua Habermanf41049a2022-01-21 14:41:25 -08003652static const char* fastdecode_longsize(const char* ptr, int* size) {
Joshua Haberman9abf6e22021-01-13 12:16:25 -08003653 int i;
3654 UPB_ASSERT(*size & 0x80);
3655 *size &= 0xff;
3656 for (i = 0; i < 3; i++) {
3657 ptr++;
3658 size_t byte = (uint8_t)ptr[-1];
3659 *size += (byte - 1) << (7 + 7 * i);
3660 if (UPB_LIKELY((byte & 0x80) == 0)) return ptr;
3661 }
3662 ptr++;
3663 size_t byte = (uint8_t)ptr[-1];
3664 // len is limited by 2gb not 4gb, hence 8 and not 16 as normally expected
3665 // for a 32 bit varint.
3666 if (UPB_UNLIKELY(byte >= 8)) return NULL;
3667 *size += (byte - 1) << 28;
3668 return ptr;
3669}
3670
3671UPB_FORCEINLINE
Joshua Habermanf41049a2022-01-21 14:41:25 -08003672static bool fastdecode_boundscheck(const char* ptr, size_t len,
3673 const char* end) {
Joshua Haberman9abf6e22021-01-13 12:16:25 -08003674 uintptr_t uptr = (uintptr_t)ptr;
3675 uintptr_t uend = (uintptr_t)end + 16;
3676 uintptr_t res = uptr + len;
3677 return res < uptr || res > uend;
3678}
3679
3680UPB_FORCEINLINE
Joshua Habermanf41049a2022-01-21 14:41:25 -08003681static bool fastdecode_boundscheck2(const char* ptr, size_t len,
3682 const char* end) {
Joshua Haberman9abf6e22021-01-13 12:16:25 -08003683 // This is one extra branch compared to the more normal:
3684 // return (size_t)(end - ptr) < size;
3685 // However it is one less computation if we are just about to use "ptr + len":
3686 // https://godbolt.org/z/35YGPz
3687 // In microbenchmarks this shows an overall 4% improvement.
3688 uintptr_t uptr = (uintptr_t)ptr;
3689 uintptr_t uend = (uintptr_t)end;
3690 uintptr_t res = uptr + len;
3691 return res < uptr || res > uend;
3692}
3693
Joshua Habermanf41049a2022-01-21 14:41:25 -08003694typedef const char* fastdecode_delimfunc(upb_Decoder* d, const char* ptr,
3695 void* ctx);
Joshua Haberman9abf6e22021-01-13 12:16:25 -08003696
3697UPB_FORCEINLINE
Joshua Habermanf41049a2022-01-21 14:41:25 -08003698static const char* fastdecode_delimited(upb_Decoder* d, const char* ptr,
3699 fastdecode_delimfunc* func, void* ctx) {
Joshua Haberman9abf6e22021-01-13 12:16:25 -08003700 ptr++;
3701 int len = (int8_t)ptr[-1];
3702 if (fastdecode_boundscheck2(ptr, len, d->limit_ptr)) {
3703 // Slow case: Sub-message is >=128 bytes and/or exceeds the current buffer.
3704 // If it exceeds the buffer limit, limit/limit_ptr will change during
3705 // sub-message parsing, so we need to preserve delta, not limit.
3706 if (UPB_UNLIKELY(len & 0x80)) {
3707 // Size varint >1 byte (length >= 128).
3708 ptr = fastdecode_longsize(ptr, &len);
3709 if (!ptr) {
3710 // Corrupt wire format: size exceeded INT_MAX.
3711 return NULL;
3712 }
3713 }
3714 if (ptr - d->end + (int)len > d->limit) {
3715 // Corrupt wire format: invalid limit.
3716 return NULL;
3717 }
3718 int delta = decode_pushlimit(d, ptr, len);
3719 ptr = func(d, ptr, ctx);
3720 decode_poplimit(d, ptr, delta);
3721 } else {
3722 // Fast case: Sub-message is <128 bytes and fits in the current buffer.
3723 // This means we can preserve limit/limit_ptr verbatim.
Joshua Habermanf41049a2022-01-21 14:41:25 -08003724 const char* saved_limit_ptr = d->limit_ptr;
Joshua Haberman9abf6e22021-01-13 12:16:25 -08003725 int saved_limit = d->limit;
3726 d->limit_ptr = ptr + len;
3727 d->limit = d->limit_ptr - d->end;
3728 UPB_ASSERT(d->limit_ptr == d->end + UPB_MIN(0, d->limit));
3729 ptr = func(d, ptr, ctx);
3730 d->limit_ptr = saved_limit_ptr;
3731 d->limit = saved_limit;
3732 UPB_ASSERT(d->limit_ptr == d->end + UPB_MIN(0, d->limit));
3733 }
3734 return ptr;
3735}
3736
3737/* singular, oneof, repeated field handling ***********************************/
3738
3739typedef struct {
Joshua Habermanf41049a2022-01-21 14:41:25 -08003740 upb_Array* arr;
3741 void* end;
Joshua Haberman9abf6e22021-01-13 12:16:25 -08003742} fastdecode_arr;
3743
3744typedef enum {
3745 FD_NEXT_ATLIMIT,
3746 FD_NEXT_SAMEFIELD,
3747 FD_NEXT_OTHERFIELD
3748} fastdecode_next;
3749
3750typedef struct {
Joshua Habermanf41049a2022-01-21 14:41:25 -08003751 void* dst;
Joshua Haberman9abf6e22021-01-13 12:16:25 -08003752 fastdecode_next next;
3753 uint32_t tag;
3754} fastdecode_nextret;
3755
3756UPB_FORCEINLINE
Joshua Habermanf41049a2022-01-21 14:41:25 -08003757static void* fastdecode_resizearr(upb_Decoder* d, void* dst,
3758 fastdecode_arr* farr, int valbytes) {
Joshua Haberman9abf6e22021-01-13 12:16:25 -08003759 if (UPB_UNLIKELY(dst == farr->end)) {
3760 size_t old_size = farr->arr->size;
3761 size_t old_bytes = old_size * valbytes;
3762 size_t new_size = old_size * 2;
3763 size_t new_bytes = new_size * valbytes;
Joshua Habermanf41049a2022-01-21 14:41:25 -08003764 char* old_ptr = _upb_array_ptr(farr->arr);
3765 char* new_ptr = upb_Arena_Realloc(&d->arena, old_ptr, old_bytes, new_bytes);
Joshua Haberman9abf6e22021-01-13 12:16:25 -08003766 uint8_t elem_size_lg2 = __builtin_ctz(valbytes);
3767 farr->arr->size = new_size;
3768 farr->arr->data = _upb_array_tagptr(new_ptr, elem_size_lg2);
3769 dst = (void*)(new_ptr + (old_size * valbytes));
3770 farr->end = (void*)(new_ptr + (new_size * valbytes));
3771 }
3772 return dst;
3773}
3774
3775UPB_FORCEINLINE
3776static bool fastdecode_tagmatch(uint32_t tag, uint64_t data, int tagbytes) {
3777 if (tagbytes == 1) {
3778 return (uint8_t)tag == (uint8_t)data;
3779 } else {
3780 return (uint16_t)tag == (uint16_t)data;
3781 }
3782}
3783
3784UPB_FORCEINLINE
Joshua Habermanf41049a2022-01-21 14:41:25 -08003785static void fastdecode_commitarr(void* dst, fastdecode_arr* farr,
Joshua Haberman9abf6e22021-01-13 12:16:25 -08003786 int valbytes) {
3787 farr->arr->len =
Joshua Habermanf41049a2022-01-21 14:41:25 -08003788 (size_t)((char*)dst - (char*)_upb_array_ptr(farr->arr)) / valbytes;
Joshua Haberman9abf6e22021-01-13 12:16:25 -08003789}
3790
3791UPB_FORCEINLINE
Joshua Habermanf41049a2022-01-21 14:41:25 -08003792static fastdecode_nextret fastdecode_nextrepeated(upb_Decoder* d, void* dst,
3793 const char** ptr,
3794 fastdecode_arr* farr,
Joshua Haberman9abf6e22021-01-13 12:16:25 -08003795 uint64_t data, int tagbytes,
3796 int valbytes) {
3797 fastdecode_nextret ret;
Joshua Habermanf41049a2022-01-21 14:41:25 -08003798 dst = (char*)dst + valbytes;
Joshua Haberman9abf6e22021-01-13 12:16:25 -08003799
3800 if (UPB_LIKELY(!decode_isdone(d, ptr))) {
3801 ret.tag = fastdecode_loadtag(*ptr);
3802 if (fastdecode_tagmatch(ret.tag, data, tagbytes)) {
3803 ret.next = FD_NEXT_SAMEFIELD;
3804 } else {
3805 fastdecode_commitarr(dst, farr, valbytes);
3806 ret.next = FD_NEXT_OTHERFIELD;
3807 }
3808 } else {
3809 fastdecode_commitarr(dst, farr, valbytes);
3810 ret.next = FD_NEXT_ATLIMIT;
3811 }
3812
3813 ret.dst = dst;
3814 return ret;
3815}
3816
3817UPB_FORCEINLINE
Joshua Habermanf41049a2022-01-21 14:41:25 -08003818static void* fastdecode_fieldmem(upb_Message* msg, uint64_t data) {
Joshua Haberman9abf6e22021-01-13 12:16:25 -08003819 size_t ofs = data >> 48;
Joshua Habermanf41049a2022-01-21 14:41:25 -08003820 return (char*)msg + ofs;
Joshua Haberman9abf6e22021-01-13 12:16:25 -08003821}
3822
3823UPB_FORCEINLINE
Joshua Habermanf41049a2022-01-21 14:41:25 -08003824static void* fastdecode_getfield(upb_Decoder* d, const char* ptr,
3825 upb_Message* msg, uint64_t* data,
3826 uint64_t* hasbits, fastdecode_arr* farr,
3827 int valbytes, upb_card card) {
Joshua Haberman9abf6e22021-01-13 12:16:25 -08003828 switch (card) {
3829 case CARD_s: {
3830 uint8_t hasbit_index = *data >> 24;
3831 // Set hasbit and return pointer to scalar field.
3832 *hasbits |= 1ull << hasbit_index;
3833 return fastdecode_fieldmem(msg, *data);
3834 }
3835 case CARD_o: {
3836 uint16_t case_ofs = *data >> 32;
Joshua Habermanf41049a2022-01-21 14:41:25 -08003837 uint32_t* oneof_case = UPB_PTR_AT(msg, case_ofs, uint32_t);
Joshua Haberman9abf6e22021-01-13 12:16:25 -08003838 uint8_t field_number = *data >> 24;
3839 *oneof_case = field_number;
3840 return fastdecode_fieldmem(msg, *data);
3841 }
3842 case CARD_r: {
Joshua Habermanf41049a2022-01-21 14:41:25 -08003843 // Get pointer to upb_Array and allocate/expand if necessary.
Joshua Haberman9abf6e22021-01-13 12:16:25 -08003844 uint8_t elem_size_lg2 = __builtin_ctz(valbytes);
Joshua Habermanf41049a2022-01-21 14:41:25 -08003845 upb_Array** arr_p = fastdecode_fieldmem(msg, *data);
3846 char* begin;
Joshua Haberman9abf6e22021-01-13 12:16:25 -08003847 *(uint32_t*)msg |= *hasbits;
3848 *hasbits = 0;
3849 if (UPB_LIKELY(!*arr_p)) {
Joshua Habermanf41049a2022-01-21 14:41:25 -08003850 farr->arr = _upb_Array_New(&d->arena, 8, elem_size_lg2);
Joshua Haberman9abf6e22021-01-13 12:16:25 -08003851 *arr_p = farr->arr;
3852 } else {
3853 farr->arr = *arr_p;
3854 }
3855 begin = _upb_array_ptr(farr->arr);
3856 farr->end = begin + (farr->arr->size * valbytes);
3857 *data = fastdecode_loadtag(ptr);
3858 return begin + (farr->arr->len * valbytes);
3859 }
3860 default:
3861 UPB_UNREACHABLE();
3862 }
3863}
3864
3865UPB_FORCEINLINE
Joshua Habermanf41049a2022-01-21 14:41:25 -08003866static bool fastdecode_flippacked(uint64_t* data, int tagbytes) {
Joshua Haberman9abf6e22021-01-13 12:16:25 -08003867 *data ^= (0x2 ^ 0x0); // Patch data to match packed wiretype.
3868 return fastdecode_checktag(*data, tagbytes);
3869}
3870
Joshua Habermanf41049a2022-01-21 14:41:25 -08003871#define FASTDECODE_CHECKPACKED(tagbytes, card, func) \
3872 if (UPB_UNLIKELY(!fastdecode_checktag(data, tagbytes))) { \
3873 if (card == CARD_r && fastdecode_flippacked(&data, tagbytes)) { \
3874 UPB_MUSTTAIL return func(UPB_PARSE_ARGS); \
3875 } \
3876 RETURN_GENERIC("packed check tag mismatch\n"); \
Joshua Habermandd69a482021-05-17 22:40:33 -07003877 }
3878
Joshua Haberman9abf6e22021-01-13 12:16:25 -08003879/* varint fields **************************************************************/
3880
3881UPB_FORCEINLINE
3882static uint64_t fastdecode_munge(uint64_t val, int valbytes, bool zigzag) {
3883 if (valbytes == 1) {
3884 return val != 0;
3885 } else if (zigzag) {
3886 if (valbytes == 4) {
3887 uint32_t n = val;
3888 return (n >> 1) ^ -(int32_t)(n & 1);
3889 } else if (valbytes == 8) {
3890 return (val >> 1) ^ -(int64_t)(val & 1);
3891 }
3892 UPB_UNREACHABLE();
3893 }
3894 return val;
3895}
3896
3897UPB_FORCEINLINE
Joshua Habermanf41049a2022-01-21 14:41:25 -08003898static const char* fastdecode_varint64(const char* ptr, uint64_t* val) {
Joshua Haberman9abf6e22021-01-13 12:16:25 -08003899 ptr++;
3900 *val = (uint8_t)ptr[-1];
3901 if (UPB_UNLIKELY(*val & 0x80)) {
3902 int i;
3903 for (i = 0; i < 8; i++) {
3904 ptr++;
3905 uint64_t byte = (uint8_t)ptr[-1];
3906 *val += (byte - 1) << (7 + 7 * i);
3907 if (UPB_LIKELY((byte & 0x80) == 0)) goto done;
3908 }
3909 ptr++;
3910 uint64_t byte = (uint8_t)ptr[-1];
3911 if (byte > 1) {
3912 return NULL;
3913 }
3914 *val += (byte - 1) << 63;
3915 }
3916done:
3917 UPB_ASSUME(ptr != NULL);
3918 return ptr;
3919}
3920
Joshua Habermandd69a482021-05-17 22:40:33 -07003921#define FASTDECODE_UNPACKEDVARINT(d, ptr, msg, table, hasbits, data, tagbytes, \
3922 valbytes, card, zigzag, packed) \
3923 uint64_t val; \
Joshua Habermanf41049a2022-01-21 14:41:25 -08003924 void* dst; \
Joshua Habermandd69a482021-05-17 22:40:33 -07003925 fastdecode_arr farr; \
3926 \
3927 FASTDECODE_CHECKPACKED(tagbytes, card, packed); \
3928 \
3929 dst = fastdecode_getfield(d, ptr, msg, &data, &hasbits, &farr, valbytes, \
3930 card); \
3931 if (card == CARD_r) { \
3932 if (UPB_UNLIKELY(!dst)) { \
3933 RETURN_GENERIC("need array resize\n"); \
3934 } \
3935 } \
3936 \
3937 again: \
3938 if (card == CARD_r) { \
3939 dst = fastdecode_resizearr(d, dst, &farr, valbytes); \
3940 } \
3941 \
3942 ptr += tagbytes; \
3943 ptr = fastdecode_varint64(ptr, &val); \
Joshua Habermanf41049a2022-01-21 14:41:25 -08003944 if (ptr == NULL) return fastdecode_err(d, kUpb_DecodeStatus_Malformed); \
Joshua Habermandd69a482021-05-17 22:40:33 -07003945 val = fastdecode_munge(val, valbytes, zigzag); \
3946 memcpy(dst, &val, valbytes); \
3947 \
3948 if (card == CARD_r) { \
3949 fastdecode_nextret ret = fastdecode_nextrepeated( \
3950 d, dst, &ptr, &farr, data, tagbytes, valbytes); \
3951 switch (ret.next) { \
Joshua Habermanf41049a2022-01-21 14:41:25 -08003952 case FD_NEXT_SAMEFIELD: \
3953 dst = ret.dst; \
3954 goto again; \
3955 case FD_NEXT_OTHERFIELD: \
3956 data = ret.tag; \
3957 UPB_MUSTTAIL return fastdecode_tagdispatch(UPB_PARSE_ARGS); \
3958 case FD_NEXT_ATLIMIT: \
3959 return ptr; \
Joshua Habermandd69a482021-05-17 22:40:33 -07003960 } \
3961 } \
3962 \
3963 UPB_MUSTTAIL return fastdecode_dispatch(UPB_PARSE_ARGS);
Joshua Haberman9abf6e22021-01-13 12:16:25 -08003964
3965typedef struct {
3966 uint8_t valbytes;
3967 bool zigzag;
Joshua Habermanf41049a2022-01-21 14:41:25 -08003968 void* dst;
Joshua Haberman9abf6e22021-01-13 12:16:25 -08003969 fastdecode_arr farr;
3970} fastdecode_varintdata;
3971
3972UPB_FORCEINLINE
Joshua Habermanf41049a2022-01-21 14:41:25 -08003973static const char* fastdecode_topackedvarint(upb_Decoder* d, const char* ptr,
3974 void* ctx) {
3975 fastdecode_varintdata* data = ctx;
3976 void* dst = data->dst;
Joshua Haberman9abf6e22021-01-13 12:16:25 -08003977 uint64_t val;
3978
3979 while (!decode_isdone(d, &ptr)) {
3980 dst = fastdecode_resizearr(d, dst, &data->farr, data->valbytes);
3981 ptr = fastdecode_varint64(ptr, &val);
3982 if (ptr == NULL) return NULL;
3983 val = fastdecode_munge(val, data->valbytes, data->zigzag);
3984 memcpy(dst, &val, data->valbytes);
Joshua Habermanf41049a2022-01-21 14:41:25 -08003985 dst = (char*)dst + data->valbytes;
Joshua Haberman9abf6e22021-01-13 12:16:25 -08003986 }
3987
3988 fastdecode_commitarr(dst, &data->farr, data->valbytes);
3989 return ptr;
3990}
3991
Joshua Habermanf41049a2022-01-21 14:41:25 -08003992#define FASTDECODE_PACKEDVARINT(d, ptr, msg, table, hasbits, data, tagbytes, \
3993 valbytes, zigzag, unpacked) \
3994 fastdecode_varintdata ctx = {valbytes, zigzag}; \
3995 \
3996 FASTDECODE_CHECKPACKED(tagbytes, CARD_r, unpacked); \
3997 \
3998 ctx.dst = fastdecode_getfield(d, ptr, msg, &data, &hasbits, &ctx.farr, \
3999 valbytes, CARD_r); \
4000 if (UPB_UNLIKELY(!ctx.dst)) { \
4001 RETURN_GENERIC("need array resize\n"); \
4002 } \
4003 \
4004 ptr += tagbytes; \
4005 ptr = fastdecode_delimited(d, ptr, &fastdecode_topackedvarint, &ctx); \
4006 \
4007 if (UPB_UNLIKELY(ptr == NULL)) { \
4008 return fastdecode_err(d, kUpb_DecodeStatus_Malformed); \
4009 } \
4010 \
Joshua Habermandd69a482021-05-17 22:40:33 -07004011 UPB_MUSTTAIL return fastdecode_dispatch(d, ptr, msg, table, hasbits, 0);
Joshua Haberman9abf6e22021-01-13 12:16:25 -08004012
Joshua Habermandd69a482021-05-17 22:40:33 -07004013#define FASTDECODE_VARINT(d, ptr, msg, table, hasbits, data, tagbytes, \
4014 valbytes, card, zigzag, unpacked, packed) \
4015 if (card == CARD_p) { \
4016 FASTDECODE_PACKEDVARINT(d, ptr, msg, table, hasbits, data, tagbytes, \
4017 valbytes, zigzag, unpacked); \
4018 } else { \
4019 FASTDECODE_UNPACKEDVARINT(d, ptr, msg, table, hasbits, data, tagbytes, \
4020 valbytes, card, zigzag, packed); \
Joshua Haberman9abf6e22021-01-13 12:16:25 -08004021 }
4022
Joshua Haberman9abf6e22021-01-13 12:16:25 -08004023#define z_ZZ true
4024#define b_ZZ false
4025#define v_ZZ false
4026
4027/* Generate all combinations:
4028 * {s,o,r,p} x {b1,v4,z4,v8,z8} x {1bt,2bt} */
4029
4030#define F(card, type, valbytes, tagbytes) \
4031 UPB_NOINLINE \
Joshua Habermanf41049a2022-01-21 14:41:25 -08004032 const char* upb_p##card##type##valbytes##_##tagbytes##bt(UPB_PARSE_PARAMS) { \
Joshua Habermandd69a482021-05-17 22:40:33 -07004033 FASTDECODE_VARINT(d, ptr, msg, table, hasbits, data, tagbytes, valbytes, \
4034 CARD_##card, type##_ZZ, \
4035 upb_pr##type##valbytes##_##tagbytes##bt, \
4036 upb_pp##type##valbytes##_##tagbytes##bt); \
Joshua Haberman9abf6e22021-01-13 12:16:25 -08004037 }
4038
4039#define TYPES(card, tagbytes) \
4040 F(card, b, 1, tagbytes) \
4041 F(card, v, 4, tagbytes) \
4042 F(card, v, 8, tagbytes) \
4043 F(card, z, 4, tagbytes) \
4044 F(card, z, 8, tagbytes)
4045
4046#define TAGBYTES(card) \
4047 TYPES(card, 1) \
4048 TYPES(card, 2)
4049
4050TAGBYTES(s)
4051TAGBYTES(o)
4052TAGBYTES(r)
4053TAGBYTES(p)
4054
4055#undef z_ZZ
4056#undef b_ZZ
4057#undef v_ZZ
4058#undef o_ONEOF
4059#undef s_ONEOF
4060#undef r_ONEOF
4061#undef F
4062#undef TYPES
4063#undef TAGBYTES
Joshua Habermandd69a482021-05-17 22:40:33 -07004064#undef FASTDECODE_UNPACKEDVARINT
4065#undef FASTDECODE_PACKEDVARINT
4066#undef FASTDECODE_VARINT
Joshua Haberman9abf6e22021-01-13 12:16:25 -08004067
Joshua Haberman9abf6e22021-01-13 12:16:25 -08004068/* fixed fields ***************************************************************/
4069
Joshua Habermanf41049a2022-01-21 14:41:25 -08004070#define FASTDECODE_UNPACKEDFIXED(d, ptr, msg, table, hasbits, data, tagbytes, \
4071 valbytes, card, packed) \
4072 void* dst; \
4073 fastdecode_arr farr; \
4074 \
4075 FASTDECODE_CHECKPACKED(tagbytes, card, packed) \
4076 \
4077 dst = fastdecode_getfield(d, ptr, msg, &data, &hasbits, &farr, valbytes, \
4078 card); \
4079 if (card == CARD_r) { \
4080 if (UPB_UNLIKELY(!dst)) { \
4081 RETURN_GENERIC("couldn't allocate array in arena\n"); \
4082 } \
4083 } \
4084 \
4085 again: \
4086 if (card == CARD_r) { \
4087 dst = fastdecode_resizearr(d, dst, &farr, valbytes); \
4088 } \
4089 \
4090 ptr += tagbytes; \
4091 memcpy(dst, ptr, valbytes); \
4092 ptr += valbytes; \
4093 \
4094 if (card == CARD_r) { \
4095 fastdecode_nextret ret = fastdecode_nextrepeated( \
4096 d, dst, &ptr, &farr, data, tagbytes, valbytes); \
4097 switch (ret.next) { \
4098 case FD_NEXT_SAMEFIELD: \
4099 dst = ret.dst; \
4100 goto again; \
4101 case FD_NEXT_OTHERFIELD: \
4102 data = ret.tag; \
4103 UPB_MUSTTAIL return fastdecode_tagdispatch(UPB_PARSE_ARGS); \
4104 case FD_NEXT_ATLIMIT: \
4105 return ptr; \
4106 } \
4107 } \
4108 \
Joshua Habermandd69a482021-05-17 22:40:33 -07004109 UPB_MUSTTAIL return fastdecode_dispatch(UPB_PARSE_ARGS);
Joshua Haberman9abf6e22021-01-13 12:16:25 -08004110
Joshua Habermandd69a482021-05-17 22:40:33 -07004111#define FASTDECODE_PACKEDFIXED(d, ptr, msg, table, hasbits, data, tagbytes, \
4112 valbytes, unpacked) \
4113 FASTDECODE_CHECKPACKED(tagbytes, CARD_r, unpacked) \
4114 \
4115 ptr += tagbytes; \
4116 int size = (uint8_t)ptr[0]; \
4117 ptr++; \
4118 if (size & 0x80) { \
4119 ptr = fastdecode_longsize(ptr, &size); \
4120 } \
4121 \
4122 if (UPB_UNLIKELY(fastdecode_boundscheck(ptr, size, d->limit_ptr) || \
4123 (size % valbytes) != 0)) { \
Joshua Habermanf41049a2022-01-21 14:41:25 -08004124 return fastdecode_err(d, kUpb_DecodeStatus_Malformed); \
Joshua Habermandd69a482021-05-17 22:40:33 -07004125 } \
4126 \
Joshua Habermanf41049a2022-01-21 14:41:25 -08004127 upb_Array** arr_p = fastdecode_fieldmem(msg, data); \
4128 upb_Array* arr = *arr_p; \
Joshua Habermandd69a482021-05-17 22:40:33 -07004129 uint8_t elem_size_lg2 = __builtin_ctz(valbytes); \
4130 int elems = size / valbytes; \
4131 \
4132 if (UPB_LIKELY(!arr)) { \
Joshua Habermanf41049a2022-01-21 14:41:25 -08004133 *arr_p = arr = _upb_Array_New(&d->arena, elems, elem_size_lg2); \
Joshua Habermandd69a482021-05-17 22:40:33 -07004134 if (!arr) { \
Joshua Habermanf41049a2022-01-21 14:41:25 -08004135 return fastdecode_err(d, kUpb_DecodeStatus_Malformed); \
Joshua Habermandd69a482021-05-17 22:40:33 -07004136 } \
4137 } else { \
Joshua Habermanf41049a2022-01-21 14:41:25 -08004138 _upb_Array_Resize(arr, elems, &d->arena); \
Joshua Habermandd69a482021-05-17 22:40:33 -07004139 } \
4140 \
Joshua Habermanf41049a2022-01-21 14:41:25 -08004141 char* dst = _upb_array_ptr(arr); \
Joshua Habermandd69a482021-05-17 22:40:33 -07004142 memcpy(dst, ptr, size); \
4143 arr->len = elems; \
4144 \
4145 ptr += size; \
4146 UPB_MUSTTAIL return fastdecode_dispatch(UPB_PARSE_ARGS);
4147
4148#define FASTDECODE_FIXED(d, ptr, msg, table, hasbits, data, tagbytes, \
4149 valbytes, card, unpacked, packed) \
4150 if (card == CARD_p) { \
4151 FASTDECODE_PACKEDFIXED(d, ptr, msg, table, hasbits, data, tagbytes, \
4152 valbytes, unpacked); \
4153 } else { \
4154 FASTDECODE_UNPACKEDFIXED(d, ptr, msg, table, hasbits, data, tagbytes, \
4155 valbytes, card, packed); \
Joshua Haberman9abf6e22021-01-13 12:16:25 -08004156 }
4157
Joshua Haberman9abf6e22021-01-13 12:16:25 -08004158/* Generate all combinations:
4159 * {s,o,r,p} x {f4,f8} x {1bt,2bt} */
4160
Joshua Habermandd69a482021-05-17 22:40:33 -07004161#define F(card, valbytes, tagbytes) \
4162 UPB_NOINLINE \
Joshua Habermanf41049a2022-01-21 14:41:25 -08004163 const char* upb_p##card##f##valbytes##_##tagbytes##bt(UPB_PARSE_PARAMS) { \
Joshua Habermandd69a482021-05-17 22:40:33 -07004164 FASTDECODE_FIXED(d, ptr, msg, table, hasbits, data, tagbytes, valbytes, \
4165 CARD_##card, upb_ppf##valbytes##_##tagbytes##bt, \
4166 upb_prf##valbytes##_##tagbytes##bt); \
Joshua Haberman9abf6e22021-01-13 12:16:25 -08004167 }
4168
4169#define TYPES(card, tagbytes) \
4170 F(card, 4, tagbytes) \
4171 F(card, 8, tagbytes)
4172
4173#define TAGBYTES(card) \
4174 TYPES(card, 1) \
4175 TYPES(card, 2)
4176
4177TAGBYTES(s)
4178TAGBYTES(o)
4179TAGBYTES(r)
4180TAGBYTES(p)
4181
4182#undef F
4183#undef TYPES
4184#undef TAGBYTES
Joshua Habermandd69a482021-05-17 22:40:33 -07004185#undef FASTDECODE_UNPACKEDFIXED
4186#undef FASTDECODE_PACKEDFIXED
Joshua Haberman9abf6e22021-01-13 12:16:25 -08004187
4188/* string fields **************************************************************/
4189
Joshua Habermanf41049a2022-01-21 14:41:25 -08004190typedef const char* fastdecode_copystr_func(struct upb_Decoder* d,
4191 const char* ptr, upb_Message* msg,
4192 const upb_MiniTable* table,
4193 uint64_t hasbits,
4194 upb_StringView* dst);
Joshua Haberman9abf6e22021-01-13 12:16:25 -08004195
4196UPB_NOINLINE
Joshua Habermanf41049a2022-01-21 14:41:25 -08004197static const char* fastdecode_verifyutf8(upb_Decoder* d, const char* ptr,
4198 upb_Message* msg, intptr_t table,
Joshua Habermandd69a482021-05-17 22:40:33 -07004199 uint64_t hasbits, uint64_t data) {
Joshua Habermanf41049a2022-01-21 14:41:25 -08004200 upb_StringView* dst = (upb_StringView*)data;
Joshua Haberman9abf6e22021-01-13 12:16:25 -08004201 if (!decode_verifyutf8_inl(dst->data, dst->size)) {
Joshua Habermanf41049a2022-01-21 14:41:25 -08004202 return fastdecode_err(d, kUpb_DecodeStatus_BadUtf8);
Joshua Haberman9abf6e22021-01-13 12:16:25 -08004203 }
Joshua Habermandd69a482021-05-17 22:40:33 -07004204 UPB_MUSTTAIL return fastdecode_dispatch(UPB_PARSE_ARGS);
Joshua Haberman9abf6e22021-01-13 12:16:25 -08004205}
4206
Joshua Habermandd69a482021-05-17 22:40:33 -07004207#define FASTDECODE_LONGSTRING(d, ptr, msg, table, hasbits, dst, validate_utf8) \
4208 int size = (uint8_t)ptr[0]; /* Could plumb through hasbits. */ \
4209 ptr++; \
4210 if (size & 0x80) { \
4211 ptr = fastdecode_longsize(ptr, &size); \
4212 } \
4213 \
4214 if (UPB_UNLIKELY(fastdecode_boundscheck(ptr, size, d->limit_ptr))) { \
4215 dst->size = 0; \
Joshua Habermanf41049a2022-01-21 14:41:25 -08004216 return fastdecode_err(d, kUpb_DecodeStatus_Malformed); \
Joshua Habermandd69a482021-05-17 22:40:33 -07004217 } \
4218 \
Joshua Habermanf41049a2022-01-21 14:41:25 -08004219 if (d->options & kUpb_DecodeOption_AliasString) { \
Joshua Habermandd69a482021-05-17 22:40:33 -07004220 dst->data = ptr; \
4221 dst->size = size; \
4222 } else { \
Joshua Habermanf41049a2022-01-21 14:41:25 -08004223 char* data = upb_Arena_Malloc(&d->arena, size); \
Joshua Habermandd69a482021-05-17 22:40:33 -07004224 if (!data) { \
Joshua Habermanf41049a2022-01-21 14:41:25 -08004225 return fastdecode_err(d, kUpb_DecodeStatus_OutOfMemory); \
Joshua Habermandd69a482021-05-17 22:40:33 -07004226 } \
4227 memcpy(data, ptr, size); \
4228 dst->data = data; \
4229 dst->size = size; \
4230 } \
4231 \
4232 ptr += size; \
4233 if (validate_utf8) { \
4234 data = (uint64_t)dst; \
4235 UPB_MUSTTAIL return fastdecode_verifyutf8(UPB_PARSE_ARGS); \
4236 } else { \
4237 UPB_MUSTTAIL return fastdecode_dispatch(UPB_PARSE_ARGS); \
Joshua Haberman9abf6e22021-01-13 12:16:25 -08004238 }
4239
Joshua Haberman9abf6e22021-01-13 12:16:25 -08004240UPB_NOINLINE
Joshua Habermanf41049a2022-01-21 14:41:25 -08004241static const char* fastdecode_longstring_utf8(struct upb_Decoder* d,
4242 const char* ptr, upb_Message* msg,
Joshua Habermandd69a482021-05-17 22:40:33 -07004243 intptr_t table, uint64_t hasbits,
4244 uint64_t data) {
Joshua Habermanf41049a2022-01-21 14:41:25 -08004245 upb_StringView* dst = (upb_StringView*)data;
Joshua Habermandd69a482021-05-17 22:40:33 -07004246 FASTDECODE_LONGSTRING(d, ptr, msg, table, hasbits, dst, true);
Joshua Haberman9abf6e22021-01-13 12:16:25 -08004247}
4248
4249UPB_NOINLINE
Joshua Habermanf41049a2022-01-21 14:41:25 -08004250static const char* fastdecode_longstring_noutf8(
4251 struct upb_Decoder* d, const char* ptr, upb_Message* msg, intptr_t table,
4252 uint64_t hasbits, uint64_t data) {
4253 upb_StringView* dst = (upb_StringView*)data;
Joshua Habermandd69a482021-05-17 22:40:33 -07004254 FASTDECODE_LONGSTRING(d, ptr, msg, table, hasbits, dst, false);
Joshua Haberman9abf6e22021-01-13 12:16:25 -08004255}
4256
4257UPB_FORCEINLINE
Joshua Habermanf41049a2022-01-21 14:41:25 -08004258static void fastdecode_docopy(upb_Decoder* d, const char* ptr, uint32_t size,
4259 int copy, char* data, upb_StringView* dst) {
Joshua Haberman9abf6e22021-01-13 12:16:25 -08004260 d->arena.head.ptr += copy;
4261 dst->data = data;
4262 UPB_UNPOISON_MEMORY_REGION(data, copy);
4263 memcpy(data, ptr, copy);
4264 UPB_POISON_MEMORY_REGION(data + size, copy - size);
4265}
4266
Joshua Habermanf41049a2022-01-21 14:41:25 -08004267#define FASTDECODE_COPYSTRING(d, ptr, msg, table, hasbits, data, tagbytes, \
4268 card, validate_utf8) \
4269 upb_StringView* dst; \
4270 fastdecode_arr farr; \
4271 int64_t size; \
4272 size_t arena_has; \
4273 size_t common_has; \
4274 char* buf; \
4275 \
4276 UPB_ASSERT((d->options & kUpb_DecodeOption_AliasString) == 0); \
4277 UPB_ASSERT(fastdecode_checktag(data, tagbytes)); \
4278 \
4279 dst = fastdecode_getfield(d, ptr, msg, &data, &hasbits, &farr, \
4280 sizeof(upb_StringView), card); \
4281 \
4282 again: \
4283 if (card == CARD_r) { \
4284 dst = fastdecode_resizearr(d, dst, &farr, sizeof(upb_StringView)); \
4285 } \
4286 \
4287 size = (uint8_t)ptr[tagbytes]; \
4288 ptr += tagbytes + 1; \
4289 dst->size = size; \
4290 \
4291 buf = d->arena.head.ptr; \
4292 arena_has = _upb_ArenaHas(&d->arena); \
4293 common_has = UPB_MIN(arena_has, (d->end - ptr) + 16); \
4294 \
4295 if (UPB_LIKELY(size <= 15 - tagbytes)) { \
4296 if (arena_has < 16) goto longstr; \
4297 d->arena.head.ptr += 16; \
4298 memcpy(buf, ptr - tagbytes - 1, 16); \
4299 dst->data = buf + tagbytes + 1; \
4300 } else if (UPB_LIKELY(size <= 32)) { \
4301 if (UPB_UNLIKELY(common_has < 32)) goto longstr; \
4302 fastdecode_docopy(d, ptr, size, 32, buf, dst); \
4303 } else if (UPB_LIKELY(size <= 64)) { \
4304 if (UPB_UNLIKELY(common_has < 64)) goto longstr; \
4305 fastdecode_docopy(d, ptr, size, 64, buf, dst); \
4306 } else if (UPB_LIKELY(size < 128)) { \
4307 if (UPB_UNLIKELY(common_has < 128)) goto longstr; \
4308 fastdecode_docopy(d, ptr, size, 128, buf, dst); \
4309 } else { \
4310 goto longstr; \
4311 } \
4312 \
4313 ptr += size; \
4314 \
4315 if (card == CARD_r) { \
4316 if (validate_utf8 && !decode_verifyutf8_inl(dst->data, dst->size)) { \
4317 return fastdecode_err(d, kUpb_DecodeStatus_BadUtf8); \
4318 } \
4319 fastdecode_nextret ret = fastdecode_nextrepeated( \
4320 d, dst, &ptr, &farr, data, tagbytes, sizeof(upb_StringView)); \
4321 switch (ret.next) { \
4322 case FD_NEXT_SAMEFIELD: \
4323 dst = ret.dst; \
4324 goto again; \
4325 case FD_NEXT_OTHERFIELD: \
4326 data = ret.tag; \
4327 UPB_MUSTTAIL return fastdecode_tagdispatch(UPB_PARSE_ARGS); \
4328 case FD_NEXT_ATLIMIT: \
4329 return ptr; \
4330 } \
4331 } \
4332 \
4333 if (card != CARD_r && validate_utf8) { \
4334 data = (uint64_t)dst; \
4335 UPB_MUSTTAIL return fastdecode_verifyutf8(UPB_PARSE_ARGS); \
4336 } \
4337 \
4338 UPB_MUSTTAIL return fastdecode_dispatch(UPB_PARSE_ARGS); \
4339 \
4340 longstr: \
4341 if (card == CARD_r) { \
4342 fastdecode_commitarr(dst + 1, &farr, sizeof(upb_StringView)); \
4343 } \
4344 ptr--; \
4345 if (validate_utf8) { \
4346 UPB_MUSTTAIL return fastdecode_longstring_utf8(d, ptr, msg, table, \
4347 hasbits, (uint64_t)dst); \
4348 } else { \
4349 UPB_MUSTTAIL return fastdecode_longstring_noutf8(d, ptr, msg, table, \
4350 hasbits, (uint64_t)dst); \
Joshua Haberman9abf6e22021-01-13 12:16:25 -08004351 }
4352
Joshua Habermandd69a482021-05-17 22:40:33 -07004353#define FASTDECODE_STRING(d, ptr, msg, table, hasbits, data, tagbytes, card, \
4354 copyfunc, validate_utf8) \
Joshua Habermanf41049a2022-01-21 14:41:25 -08004355 upb_StringView* dst; \
Joshua Habermandd69a482021-05-17 22:40:33 -07004356 fastdecode_arr farr; \
4357 int64_t size; \
4358 \
4359 if (UPB_UNLIKELY(!fastdecode_checktag(data, tagbytes))) { \
4360 RETURN_GENERIC("string field tag mismatch\n"); \
4361 } \
4362 \
Joshua Habermanf41049a2022-01-21 14:41:25 -08004363 if (UPB_UNLIKELY((d->options & kUpb_DecodeOption_AliasString) == 0)) { \
Joshua Habermandd69a482021-05-17 22:40:33 -07004364 UPB_MUSTTAIL return copyfunc(UPB_PARSE_ARGS); \
4365 } \
4366 \
4367 dst = fastdecode_getfield(d, ptr, msg, &data, &hasbits, &farr, \
Joshua Habermanf41049a2022-01-21 14:41:25 -08004368 sizeof(upb_StringView), card); \
Joshua Habermandd69a482021-05-17 22:40:33 -07004369 \
4370 again: \
4371 if (card == CARD_r) { \
Joshua Habermanf41049a2022-01-21 14:41:25 -08004372 dst = fastdecode_resizearr(d, dst, &farr, sizeof(upb_StringView)); \
Joshua Habermandd69a482021-05-17 22:40:33 -07004373 } \
4374 \
4375 size = (int8_t)ptr[tagbytes]; \
4376 ptr += tagbytes + 1; \
4377 dst->data = ptr; \
4378 dst->size = size; \
4379 \
4380 if (UPB_UNLIKELY(fastdecode_boundscheck(ptr, size, d->end))) { \
4381 ptr--; \
4382 if (validate_utf8) { \
4383 return fastdecode_longstring_utf8(d, ptr, msg, table, hasbits, \
4384 (uint64_t)dst); \
4385 } else { \
4386 return fastdecode_longstring_noutf8(d, ptr, msg, table, hasbits, \
4387 (uint64_t)dst); \
4388 } \
4389 } \
4390 \
4391 ptr += size; \
4392 \
4393 if (card == CARD_r) { \
4394 if (validate_utf8 && !decode_verifyutf8_inl(dst->data, dst->size)) { \
Joshua Habermanf41049a2022-01-21 14:41:25 -08004395 return fastdecode_err(d, kUpb_DecodeStatus_BadUtf8); \
Joshua Habermandd69a482021-05-17 22:40:33 -07004396 } \
4397 fastdecode_nextret ret = fastdecode_nextrepeated( \
Joshua Habermanf41049a2022-01-21 14:41:25 -08004398 d, dst, &ptr, &farr, data, tagbytes, sizeof(upb_StringView)); \
Joshua Habermandd69a482021-05-17 22:40:33 -07004399 switch (ret.next) { \
Joshua Habermanf41049a2022-01-21 14:41:25 -08004400 case FD_NEXT_SAMEFIELD: \
4401 dst = ret.dst; \
4402 if (UPB_UNLIKELY((d->options & kUpb_DecodeOption_AliasString) == 0)) { \
4403 /* Buffer flipped and we can't alias any more. Bounce to */ \
4404 /* copyfunc(), but via dispatch since we need to reload table */ \
4405 /* data also. */ \
4406 fastdecode_commitarr(dst, &farr, sizeof(upb_StringView)); \
4407 data = ret.tag; \
4408 UPB_MUSTTAIL return fastdecode_tagdispatch(UPB_PARSE_ARGS); \
4409 } \
4410 goto again; \
4411 case FD_NEXT_OTHERFIELD: \
Joshua Habermandd69a482021-05-17 22:40:33 -07004412 data = ret.tag; \
4413 UPB_MUSTTAIL return fastdecode_tagdispatch(UPB_PARSE_ARGS); \
Joshua Habermanf41049a2022-01-21 14:41:25 -08004414 case FD_NEXT_ATLIMIT: \
4415 return ptr; \
Joshua Habermandd69a482021-05-17 22:40:33 -07004416 } \
4417 } \
4418 \
4419 if (card != CARD_r && validate_utf8) { \
4420 data = (uint64_t)dst; \
4421 UPB_MUSTTAIL return fastdecode_verifyutf8(UPB_PARSE_ARGS); \
4422 } \
4423 \
4424 UPB_MUSTTAIL return fastdecode_dispatch(UPB_PARSE_ARGS);
Joshua Haberman9abf6e22021-01-13 12:16:25 -08004425
4426/* Generate all combinations:
4427 * {p,c} x {s,o,r} x {s, b} x {1bt,2bt} */
4428
4429#define s_VALIDATE true
4430#define b_VALIDATE false
4431
Joshua Habermandd69a482021-05-17 22:40:33 -07004432#define F(card, tagbytes, type) \
4433 UPB_NOINLINE \
Joshua Habermanf41049a2022-01-21 14:41:25 -08004434 const char* upb_c##card##type##_##tagbytes##bt(UPB_PARSE_PARAMS) { \
Joshua Habermandd69a482021-05-17 22:40:33 -07004435 FASTDECODE_COPYSTRING(d, ptr, msg, table, hasbits, data, tagbytes, \
4436 CARD_##card, type##_VALIDATE); \
4437 } \
Joshua Habermanf41049a2022-01-21 14:41:25 -08004438 const char* upb_p##card##type##_##tagbytes##bt(UPB_PARSE_PARAMS) { \
Joshua Habermandd69a482021-05-17 22:40:33 -07004439 FASTDECODE_STRING(d, ptr, msg, table, hasbits, data, tagbytes, \
4440 CARD_##card, upb_c##card##type##_##tagbytes##bt, \
4441 type##_VALIDATE); \
Joshua Haberman9abf6e22021-01-13 12:16:25 -08004442 }
4443
4444#define UTF8(card, tagbytes) \
4445 F(card, tagbytes, s) \
4446 F(card, tagbytes, b)
4447
4448#define TAGBYTES(card) \
4449 UTF8(card, 1) \
4450 UTF8(card, 2)
4451
4452TAGBYTES(s)
4453TAGBYTES(o)
4454TAGBYTES(r)
4455
4456#undef s_VALIDATE
4457#undef b_VALIDATE
4458#undef F
4459#undef TAGBYTES
Joshua Habermandd69a482021-05-17 22:40:33 -07004460#undef FASTDECODE_LONGSTRING
4461#undef FASTDECODE_COPYSTRING
4462#undef FASTDECODE_STRING
Joshua Haberman9abf6e22021-01-13 12:16:25 -08004463
4464/* message fields *************************************************************/
4465
4466UPB_INLINE
Joshua Habermanf41049a2022-01-21 14:41:25 -08004467upb_Message* decode_newmsg_ceil(upb_Decoder* d, const upb_MiniTable* l,
4468 int msg_ceil_bytes) {
4469 size_t size = l->size + sizeof(upb_Message_Internal);
4470 char* msg_data;
Joshua Haberman9abf6e22021-01-13 12:16:25 -08004471 if (UPB_LIKELY(msg_ceil_bytes > 0 &&
Joshua Habermanf41049a2022-01-21 14:41:25 -08004472 _upb_ArenaHas(&d->arena) >= msg_ceil_bytes)) {
Joshua Haberman9abf6e22021-01-13 12:16:25 -08004473 UPB_ASSERT(size <= (size_t)msg_ceil_bytes);
4474 msg_data = d->arena.head.ptr;
4475 d->arena.head.ptr += size;
4476 UPB_UNPOISON_MEMORY_REGION(msg_data, msg_ceil_bytes);
4477 memset(msg_data, 0, msg_ceil_bytes);
4478 UPB_POISON_MEMORY_REGION(msg_data + size, msg_ceil_bytes - size);
4479 } else {
Joshua Habermanf41049a2022-01-21 14:41:25 -08004480 msg_data = (char*)upb_Arena_Malloc(&d->arena, size);
Joshua Haberman9abf6e22021-01-13 12:16:25 -08004481 memset(msg_data, 0, size);
4482 }
Joshua Habermanf41049a2022-01-21 14:41:25 -08004483 return msg_data + sizeof(upb_Message_Internal);
Joshua Haberman9abf6e22021-01-13 12:16:25 -08004484}
4485
4486typedef struct {
4487 intptr_t table;
Joshua Habermanf41049a2022-01-21 14:41:25 -08004488 upb_Message* msg;
Joshua Haberman9abf6e22021-01-13 12:16:25 -08004489} fastdecode_submsgdata;
4490
4491UPB_FORCEINLINE
Joshua Habermanf41049a2022-01-21 14:41:25 -08004492static const char* fastdecode_tosubmsg(upb_Decoder* d, const char* ptr,
4493 void* ctx) {
4494 fastdecode_submsgdata* submsg = ctx;
Joshua Habermandd69a482021-05-17 22:40:33 -07004495 ptr = fastdecode_dispatch(d, ptr, submsg->msg, submsg->table, 0, 0);
Joshua Haberman9abf6e22021-01-13 12:16:25 -08004496 UPB_ASSUME(ptr != NULL);
4497 return ptr;
4498}
4499
Joshua Habermandd69a482021-05-17 22:40:33 -07004500#define FASTDECODE_SUBMSG(d, ptr, msg, table, hasbits, data, tagbytes, \
4501 msg_ceil_bytes, card) \
4502 \
4503 if (UPB_UNLIKELY(!fastdecode_checktag(data, tagbytes))) { \
4504 RETURN_GENERIC("submessage field tag mismatch\n"); \
4505 } \
4506 \
Joshua Habermanf41049a2022-01-21 14:41:25 -08004507 if (--d->depth == 0) { \
4508 return fastdecode_err(d, kUpb_DecodeStatus_MaxDepthExceeded); \
4509 } \
Joshua Habermandd69a482021-05-17 22:40:33 -07004510 \
Joshua Habermanf41049a2022-01-21 14:41:25 -08004511 upb_Message** dst; \
Joshua Habermandd69a482021-05-17 22:40:33 -07004512 uint32_t submsg_idx = (data >> 16) & 0xff; \
Joshua Habermanf41049a2022-01-21 14:41:25 -08004513 const upb_MiniTable* tablep = decode_totablep(table); \
4514 const upb_MiniTable* subtablep = tablep->subs[submsg_idx].submsg; \
Joshua Habermandd69a482021-05-17 22:40:33 -07004515 fastdecode_submsgdata submsg = {decode_totable(subtablep)}; \
4516 fastdecode_arr farr; \
4517 \
4518 if (subtablep->table_mask == (uint8_t)-1) { \
4519 RETURN_GENERIC("submessage doesn't have fast tables."); \
4520 } \
4521 \
4522 dst = fastdecode_getfield(d, ptr, msg, &data, &hasbits, &farr, \
Joshua Habermanf41049a2022-01-21 14:41:25 -08004523 sizeof(upb_Message*), card); \
Joshua Habermandd69a482021-05-17 22:40:33 -07004524 \
4525 if (card == CARD_s) { \
Joshua Habermanf41049a2022-01-21 14:41:25 -08004526 *(uint32_t*)msg |= hasbits; \
Joshua Habermandd69a482021-05-17 22:40:33 -07004527 hasbits = 0; \
4528 } \
4529 \
4530 again: \
4531 if (card == CARD_r) { \
Joshua Habermanf41049a2022-01-21 14:41:25 -08004532 dst = fastdecode_resizearr(d, dst, &farr, sizeof(upb_Message*)); \
Joshua Habermandd69a482021-05-17 22:40:33 -07004533 } \
4534 \
4535 submsg.msg = *dst; \
4536 \
4537 if (card == CARD_r || UPB_LIKELY(!submsg.msg)) { \
4538 *dst = submsg.msg = decode_newmsg_ceil(d, subtablep, msg_ceil_bytes); \
4539 } \
4540 \
4541 ptr += tagbytes; \
4542 ptr = fastdecode_delimited(d, ptr, fastdecode_tosubmsg, &submsg); \
4543 \
4544 if (UPB_UNLIKELY(ptr == NULL || d->end_group != DECODE_NOGROUP)) { \
Joshua Habermanf41049a2022-01-21 14:41:25 -08004545 return fastdecode_err(d, kUpb_DecodeStatus_Malformed); \
Joshua Habermandd69a482021-05-17 22:40:33 -07004546 } \
4547 \
4548 if (card == CARD_r) { \
4549 fastdecode_nextret ret = fastdecode_nextrepeated( \
Joshua Habermanf41049a2022-01-21 14:41:25 -08004550 d, dst, &ptr, &farr, data, tagbytes, sizeof(upb_Message*)); \
Joshua Habermandd69a482021-05-17 22:40:33 -07004551 switch (ret.next) { \
4552 case FD_NEXT_SAMEFIELD: \
4553 dst = ret.dst; \
4554 goto again; \
4555 case FD_NEXT_OTHERFIELD: \
4556 d->depth++; \
4557 data = ret.tag; \
4558 UPB_MUSTTAIL return fastdecode_tagdispatch(UPB_PARSE_ARGS); \
4559 case FD_NEXT_ATLIMIT: \
4560 d->depth++; \
4561 return ptr; \
4562 } \
4563 } \
4564 \
4565 d->depth++; \
4566 UPB_MUSTTAIL return fastdecode_dispatch(UPB_PARSE_ARGS);
Joshua Haberman9abf6e22021-01-13 12:16:25 -08004567
Joshua Habermandd69a482021-05-17 22:40:33 -07004568#define F(card, tagbytes, size_ceil, ceil_arg) \
Joshua Habermanf41049a2022-01-21 14:41:25 -08004569 const char* upb_p##card##m_##tagbytes##bt_max##size_ceil##b( \
Joshua Habermandd69a482021-05-17 22:40:33 -07004570 UPB_PARSE_PARAMS) { \
4571 FASTDECODE_SUBMSG(d, ptr, msg, table, hasbits, data, tagbytes, ceil_arg, \
4572 CARD_##card); \
Joshua Haberman9abf6e22021-01-13 12:16:25 -08004573 }
4574
4575#define SIZES(card, tagbytes) \
Joshua Habermanf41049a2022-01-21 14:41:25 -08004576 F(card, tagbytes, 64, 64) \
Joshua Haberman9abf6e22021-01-13 12:16:25 -08004577 F(card, tagbytes, 128, 128) \
4578 F(card, tagbytes, 192, 192) \
4579 F(card, tagbytes, 256, 256) \
4580 F(card, tagbytes, max, -1)
4581
4582#define TAGBYTES(card) \
Joshua Habermanf41049a2022-01-21 14:41:25 -08004583 SIZES(card, 1) \
Joshua Haberman9abf6e22021-01-13 12:16:25 -08004584 SIZES(card, 2)
4585
4586TAGBYTES(s)
4587TAGBYTES(o)
4588TAGBYTES(r)
4589
4590#undef TAGBYTES
4591#undef SIZES
4592#undef F
Joshua Habermandd69a482021-05-17 22:40:33 -07004593#undef FASTDECODE_SUBMSG
Joshua Haberman9abf6e22021-01-13 12:16:25 -08004594
Joshua Habermanf41049a2022-01-21 14:41:25 -08004595#endif /* UPB_FASTTABLE */
Joshua Habermandd69a482021-05-17 22:40:33 -07004596
4597/** bazel-out/k8-fastbuild/bin/external/com_google_protobuf/google/protobuf/descriptor.upb.c ************************************************************//* This file was generated by upbc (the upb compiler) from the input
Joshua Haberman9abf6e22021-01-13 12:16:25 -08004598 * file:
4599 *
4600 * google/protobuf/descriptor.proto
4601 *
4602 * Do not edit -- your changes will be discarded when the file is
4603 * regenerated. */
4604
4605#include <stddef.h>
4606
4607
Joshua Habermanf41049a2022-01-21 14:41:25 -08004608static const upb_MiniTable_Sub google_protobuf_FileDescriptorSet_submsgs[1] = {
4609 {.submsg = &google_protobuf_FileDescriptorProto_msginit},
Joshua Haberman9abf6e22021-01-13 12:16:25 -08004610};
4611
Joshua Habermanf41049a2022-01-21 14:41:25 -08004612static const upb_MiniTable_Field google_protobuf_FileDescriptorSet__fields[1] = {
Joshua Haberman7ecf43f2022-03-14 13:11:29 -07004613 {1, UPB_SIZE(0, 0), 0, 0, 11, kUpb_FieldMode_Array | (kUpb_FieldRep_Pointer << kUpb_FieldRep_Shift)},
Joshua Haberman9abf6e22021-01-13 12:16:25 -08004614};
4615
Joshua Habermanf41049a2022-01-21 14:41:25 -08004616const upb_MiniTable google_protobuf_FileDescriptorSet_msginit = {
Joshua Haberman9abf6e22021-01-13 12:16:25 -08004617 &google_protobuf_FileDescriptorSet_submsgs[0],
4618 &google_protobuf_FileDescriptorSet__fields[0],
Joshua Haberman7ecf43f2022-03-14 13:11:29 -07004619 UPB_SIZE(8, 8), 1, kUpb_ExtMode_NonExtendable, 1, 255, 0,
Joshua Haberman9abf6e22021-01-13 12:16:25 -08004620};
4621
Joshua Habermanf41049a2022-01-21 14:41:25 -08004622static const upb_MiniTable_Sub google_protobuf_FileDescriptorProto_submsgs[6] = {
4623 {.submsg = &google_protobuf_DescriptorProto_msginit},
4624 {.submsg = &google_protobuf_EnumDescriptorProto_msginit},
4625 {.submsg = &google_protobuf_FieldDescriptorProto_msginit},
4626 {.submsg = &google_protobuf_FileOptions_msginit},
4627 {.submsg = &google_protobuf_ServiceDescriptorProto_msginit},
4628 {.submsg = &google_protobuf_SourceCodeInfo_msginit},
Joshua Haberman9abf6e22021-01-13 12:16:25 -08004629};
4630
Joshua Habermanf41049a2022-01-21 14:41:25 -08004631static const upb_MiniTable_Field google_protobuf_FileDescriptorProto__fields[12] = {
Joshua Haberman7ecf43f2022-03-14 13:11:29 -07004632 {1, UPB_SIZE(4, 8), 1, 0, 12, kUpb_FieldMode_Scalar | (kUpb_FieldRep_StringView << kUpb_FieldRep_Shift)},
4633 {2, UPB_SIZE(12, 24), 2, 0, 12, kUpb_FieldMode_Scalar | (kUpb_FieldRep_StringView << kUpb_FieldRep_Shift)},
4634 {3, UPB_SIZE(36, 72), 0, 0, 12, kUpb_FieldMode_Array | (kUpb_FieldRep_Pointer << kUpb_FieldRep_Shift)},
4635 {4, UPB_SIZE(40, 80), 0, 0, 11, kUpb_FieldMode_Array | (kUpb_FieldRep_Pointer << kUpb_FieldRep_Shift)},
4636 {5, UPB_SIZE(44, 88), 0, 1, 11, kUpb_FieldMode_Array | (kUpb_FieldRep_Pointer << kUpb_FieldRep_Shift)},
4637 {6, UPB_SIZE(48, 96), 0, 4, 11, kUpb_FieldMode_Array | (kUpb_FieldRep_Pointer << kUpb_FieldRep_Shift)},
4638 {7, UPB_SIZE(52, 104), 0, 2, 11, kUpb_FieldMode_Array | (kUpb_FieldRep_Pointer << kUpb_FieldRep_Shift)},
4639 {8, UPB_SIZE(28, 56), 3, 3, 11, kUpb_FieldMode_Scalar | (kUpb_FieldRep_Pointer << kUpb_FieldRep_Shift)},
4640 {9, UPB_SIZE(32, 64), 4, 5, 11, kUpb_FieldMode_Scalar | (kUpb_FieldRep_Pointer << kUpb_FieldRep_Shift)},
4641 {10, UPB_SIZE(56, 112), 0, 0, 5, kUpb_FieldMode_Array | (kUpb_FieldRep_Pointer << kUpb_FieldRep_Shift)},
4642 {11, UPB_SIZE(60, 120), 0, 0, 5, kUpb_FieldMode_Array | (kUpb_FieldRep_Pointer << kUpb_FieldRep_Shift)},
4643 {12, UPB_SIZE(20, 40), 5, 0, 12, kUpb_FieldMode_Scalar | (kUpb_FieldRep_StringView << kUpb_FieldRep_Shift)},
Joshua Haberman9abf6e22021-01-13 12:16:25 -08004644};
4645
Joshua Habermanf41049a2022-01-21 14:41:25 -08004646const upb_MiniTable google_protobuf_FileDescriptorProto_msginit = {
Joshua Haberman9abf6e22021-01-13 12:16:25 -08004647 &google_protobuf_FileDescriptorProto_submsgs[0],
4648 &google_protobuf_FileDescriptorProto__fields[0],
Joshua Haberman7ecf43f2022-03-14 13:11:29 -07004649 UPB_SIZE(64, 128), 12, kUpb_ExtMode_NonExtendable, 12, 255, 0,
Joshua Haberman9abf6e22021-01-13 12:16:25 -08004650};
4651
Joshua Habermanf41049a2022-01-21 14:41:25 -08004652static const upb_MiniTable_Sub google_protobuf_DescriptorProto_submsgs[7] = {
4653 {.submsg = &google_protobuf_DescriptorProto_msginit},
4654 {.submsg = &google_protobuf_DescriptorProto_ExtensionRange_msginit},
4655 {.submsg = &google_protobuf_DescriptorProto_ReservedRange_msginit},
4656 {.submsg = &google_protobuf_EnumDescriptorProto_msginit},
4657 {.submsg = &google_protobuf_FieldDescriptorProto_msginit},
4658 {.submsg = &google_protobuf_MessageOptions_msginit},
4659 {.submsg = &google_protobuf_OneofDescriptorProto_msginit},
4660};
4661
4662static const upb_MiniTable_Field google_protobuf_DescriptorProto__fields[10] = {
Joshua Haberman7ecf43f2022-03-14 13:11:29 -07004663 {1, UPB_SIZE(4, 8), 1, 0, 12, kUpb_FieldMode_Scalar | (kUpb_FieldRep_StringView << kUpb_FieldRep_Shift)},
4664 {2, UPB_SIZE(16, 32), 0, 4, 11, kUpb_FieldMode_Array | (kUpb_FieldRep_Pointer << kUpb_FieldRep_Shift)},
4665 {3, UPB_SIZE(20, 40), 0, 0, 11, kUpb_FieldMode_Array | (kUpb_FieldRep_Pointer << kUpb_FieldRep_Shift)},
4666 {4, UPB_SIZE(24, 48), 0, 3, 11, kUpb_FieldMode_Array | (kUpb_FieldRep_Pointer << kUpb_FieldRep_Shift)},
4667 {5, UPB_SIZE(28, 56), 0, 1, 11, kUpb_FieldMode_Array | (kUpb_FieldRep_Pointer << kUpb_FieldRep_Shift)},
4668 {6, UPB_SIZE(32, 64), 0, 4, 11, kUpb_FieldMode_Array | (kUpb_FieldRep_Pointer << kUpb_FieldRep_Shift)},
4669 {7, UPB_SIZE(12, 24), 2, 5, 11, kUpb_FieldMode_Scalar | (kUpb_FieldRep_Pointer << kUpb_FieldRep_Shift)},
4670 {8, UPB_SIZE(36, 72), 0, 6, 11, kUpb_FieldMode_Array | (kUpb_FieldRep_Pointer << kUpb_FieldRep_Shift)},
4671 {9, UPB_SIZE(40, 80), 0, 2, 11, kUpb_FieldMode_Array | (kUpb_FieldRep_Pointer << kUpb_FieldRep_Shift)},
4672 {10, UPB_SIZE(44, 88), 0, 0, 12, kUpb_FieldMode_Array | (kUpb_FieldRep_Pointer << kUpb_FieldRep_Shift)},
Joshua Habermanf41049a2022-01-21 14:41:25 -08004673};
4674
4675const upb_MiniTable google_protobuf_DescriptorProto_msginit = {
4676 &google_protobuf_DescriptorProto_submsgs[0],
4677 &google_protobuf_DescriptorProto__fields[0],
Joshua Haberman7ecf43f2022-03-14 13:11:29 -07004678 UPB_SIZE(48, 96), 10, kUpb_ExtMode_NonExtendable, 10, 255, 0,
Joshua Habermanf41049a2022-01-21 14:41:25 -08004679};
4680
4681static const upb_MiniTable_Sub google_protobuf_DescriptorProto_ExtensionRange_submsgs[1] = {
4682 {.submsg = &google_protobuf_ExtensionRangeOptions_msginit},
4683};
4684
4685static const upb_MiniTable_Field google_protobuf_DescriptorProto_ExtensionRange__fields[3] = {
Joshua Haberman7ecf43f2022-03-14 13:11:29 -07004686 {1, UPB_SIZE(4, 4), 1, 0, 5, kUpb_FieldMode_Scalar | (kUpb_FieldRep_4Byte << kUpb_FieldRep_Shift)},
4687 {2, UPB_SIZE(8, 8), 2, 0, 5, kUpb_FieldMode_Scalar | (kUpb_FieldRep_4Byte << kUpb_FieldRep_Shift)},
4688 {3, UPB_SIZE(12, 16), 3, 0, 11, kUpb_FieldMode_Scalar | (kUpb_FieldRep_Pointer << kUpb_FieldRep_Shift)},
Joshua Habermanf41049a2022-01-21 14:41:25 -08004689};
4690
4691const upb_MiniTable google_protobuf_DescriptorProto_ExtensionRange_msginit = {
4692 &google_protobuf_DescriptorProto_ExtensionRange_submsgs[0],
4693 &google_protobuf_DescriptorProto_ExtensionRange__fields[0],
Joshua Haberman7ecf43f2022-03-14 13:11:29 -07004694 UPB_SIZE(16, 24), 3, kUpb_ExtMode_NonExtendable, 3, 255, 0,
Joshua Habermanf41049a2022-01-21 14:41:25 -08004695};
4696
4697static const upb_MiniTable_Field google_protobuf_DescriptorProto_ReservedRange__fields[2] = {
Joshua Haberman7ecf43f2022-03-14 13:11:29 -07004698 {1, UPB_SIZE(4, 4), 1, 0, 5, kUpb_FieldMode_Scalar | (kUpb_FieldRep_4Byte << kUpb_FieldRep_Shift)},
4699 {2, UPB_SIZE(8, 8), 2, 0, 5, kUpb_FieldMode_Scalar | (kUpb_FieldRep_4Byte << kUpb_FieldRep_Shift)},
Joshua Habermanf41049a2022-01-21 14:41:25 -08004700};
4701
4702const upb_MiniTable google_protobuf_DescriptorProto_ReservedRange_msginit = {
4703 NULL,
4704 &google_protobuf_DescriptorProto_ReservedRange__fields[0],
Joshua Haberman7ecf43f2022-03-14 13:11:29 -07004705 UPB_SIZE(16, 16), 2, kUpb_ExtMode_NonExtendable, 2, 255, 0,
Joshua Habermanf41049a2022-01-21 14:41:25 -08004706};
4707
4708static const upb_MiniTable_Sub google_protobuf_ExtensionRangeOptions_submsgs[1] = {
4709 {.submsg = &google_protobuf_UninterpretedOption_msginit},
4710};
4711
4712static const upb_MiniTable_Field google_protobuf_ExtensionRangeOptions__fields[1] = {
Joshua Haberman7ecf43f2022-03-14 13:11:29 -07004713 {999, UPB_SIZE(0, 0), 0, 0, 11, kUpb_FieldMode_Array | (kUpb_FieldRep_Pointer << kUpb_FieldRep_Shift)},
Joshua Habermanf41049a2022-01-21 14:41:25 -08004714};
4715
4716const upb_MiniTable google_protobuf_ExtensionRangeOptions_msginit = {
4717 &google_protobuf_ExtensionRangeOptions_submsgs[0],
4718 &google_protobuf_ExtensionRangeOptions__fields[0],
Joshua Haberman7ecf43f2022-03-14 13:11:29 -07004719 UPB_SIZE(8, 8), 1, kUpb_ExtMode_Extendable, 0, 255, 0,
Joshua Habermanf41049a2022-01-21 14:41:25 -08004720};
4721
4722static const upb_MiniTable_Sub google_protobuf_FieldDescriptorProto_submsgs[3] = {
4723 {.submsg = &google_protobuf_FieldOptions_msginit},
4724 {.subenum = &google_protobuf_FieldDescriptorProto_Label_enuminit},
4725 {.subenum = &google_protobuf_FieldDescriptorProto_Type_enuminit},
4726};
4727
4728static const upb_MiniTable_Field google_protobuf_FieldDescriptorProto__fields[11] = {
Joshua Haberman7ecf43f2022-03-14 13:11:29 -07004729 {1, UPB_SIZE(24, 24), 1, 0, 12, kUpb_FieldMode_Scalar | (kUpb_FieldRep_StringView << kUpb_FieldRep_Shift)},
4730 {2, UPB_SIZE(32, 40), 2, 0, 12, kUpb_FieldMode_Scalar | (kUpb_FieldRep_StringView << kUpb_FieldRep_Shift)},
4731 {3, UPB_SIZE(12, 12), 3, 0, 5, kUpb_FieldMode_Scalar | (kUpb_FieldRep_4Byte << kUpb_FieldRep_Shift)},
4732 {4, UPB_SIZE(4, 4), 4, 1, 14, kUpb_FieldMode_Scalar | (kUpb_FieldRep_4Byte << kUpb_FieldRep_Shift)},
4733 {5, UPB_SIZE(8, 8), 5, 2, 14, kUpb_FieldMode_Scalar | (kUpb_FieldRep_4Byte << kUpb_FieldRep_Shift)},
4734 {6, UPB_SIZE(40, 56), 6, 0, 12, kUpb_FieldMode_Scalar | (kUpb_FieldRep_StringView << kUpb_FieldRep_Shift)},
4735 {7, UPB_SIZE(48, 72), 7, 0, 12, kUpb_FieldMode_Scalar | (kUpb_FieldRep_StringView << kUpb_FieldRep_Shift)},
4736 {8, UPB_SIZE(64, 104), 8, 0, 11, kUpb_FieldMode_Scalar | (kUpb_FieldRep_Pointer << kUpb_FieldRep_Shift)},
4737 {9, UPB_SIZE(16, 16), 9, 0, 5, kUpb_FieldMode_Scalar | (kUpb_FieldRep_4Byte << kUpb_FieldRep_Shift)},
4738 {10, UPB_SIZE(56, 88), 10, 0, 12, kUpb_FieldMode_Scalar | (kUpb_FieldRep_StringView << kUpb_FieldRep_Shift)},
4739 {17, UPB_SIZE(20, 20), 11, 0, 8, kUpb_FieldMode_Scalar | (kUpb_FieldRep_1Byte << kUpb_FieldRep_Shift)},
Joshua Habermanf41049a2022-01-21 14:41:25 -08004740};
4741
4742const upb_MiniTable google_protobuf_FieldDescriptorProto_msginit = {
4743 &google_protobuf_FieldDescriptorProto_submsgs[0],
4744 &google_protobuf_FieldDescriptorProto__fields[0],
Joshua Haberman7ecf43f2022-03-14 13:11:29 -07004745 UPB_SIZE(72, 112), 11, kUpb_ExtMode_NonExtendable, 10, 255, 0,
Joshua Habermanf41049a2022-01-21 14:41:25 -08004746};
4747
4748static const upb_MiniTable_Sub google_protobuf_OneofDescriptorProto_submsgs[1] = {
4749 {.submsg = &google_protobuf_OneofOptions_msginit},
4750};
4751
4752static const upb_MiniTable_Field google_protobuf_OneofDescriptorProto__fields[2] = {
Joshua Haberman7ecf43f2022-03-14 13:11:29 -07004753 {1, UPB_SIZE(4, 8), 1, 0, 12, kUpb_FieldMode_Scalar | (kUpb_FieldRep_StringView << kUpb_FieldRep_Shift)},
4754 {2, UPB_SIZE(12, 24), 2, 0, 11, kUpb_FieldMode_Scalar | (kUpb_FieldRep_Pointer << kUpb_FieldRep_Shift)},
Joshua Habermanf41049a2022-01-21 14:41:25 -08004755};
4756
4757const upb_MiniTable google_protobuf_OneofDescriptorProto_msginit = {
4758 &google_protobuf_OneofDescriptorProto_submsgs[0],
4759 &google_protobuf_OneofDescriptorProto__fields[0],
Joshua Haberman7ecf43f2022-03-14 13:11:29 -07004760 UPB_SIZE(16, 32), 2, kUpb_ExtMode_NonExtendable, 2, 255, 0,
Joshua Habermanf41049a2022-01-21 14:41:25 -08004761};
4762
4763static const upb_MiniTable_Sub google_protobuf_EnumDescriptorProto_submsgs[3] = {
4764 {.submsg = &google_protobuf_EnumDescriptorProto_EnumReservedRange_msginit},
4765 {.submsg = &google_protobuf_EnumOptions_msginit},
4766 {.submsg = &google_protobuf_EnumValueDescriptorProto_msginit},
4767};
4768
4769static const upb_MiniTable_Field google_protobuf_EnumDescriptorProto__fields[5] = {
Joshua Haberman7ecf43f2022-03-14 13:11:29 -07004770 {1, UPB_SIZE(4, 8), 1, 0, 12, kUpb_FieldMode_Scalar | (kUpb_FieldRep_StringView << kUpb_FieldRep_Shift)},
4771 {2, UPB_SIZE(16, 32), 0, 2, 11, kUpb_FieldMode_Array | (kUpb_FieldRep_Pointer << kUpb_FieldRep_Shift)},
4772 {3, UPB_SIZE(12, 24), 2, 1, 11, kUpb_FieldMode_Scalar | (kUpb_FieldRep_Pointer << kUpb_FieldRep_Shift)},
4773 {4, UPB_SIZE(20, 40), 0, 0, 11, kUpb_FieldMode_Array | (kUpb_FieldRep_Pointer << kUpb_FieldRep_Shift)},
4774 {5, UPB_SIZE(24, 48), 0, 0, 12, kUpb_FieldMode_Array | (kUpb_FieldRep_Pointer << kUpb_FieldRep_Shift)},
Joshua Habermanf41049a2022-01-21 14:41:25 -08004775};
4776
4777const upb_MiniTable google_protobuf_EnumDescriptorProto_msginit = {
4778 &google_protobuf_EnumDescriptorProto_submsgs[0],
4779 &google_protobuf_EnumDescriptorProto__fields[0],
Joshua Haberman7ecf43f2022-03-14 13:11:29 -07004780 UPB_SIZE(32, 64), 5, kUpb_ExtMode_NonExtendable, 5, 255, 0,
Joshua Habermanf41049a2022-01-21 14:41:25 -08004781};
4782
4783static const upb_MiniTable_Field google_protobuf_EnumDescriptorProto_EnumReservedRange__fields[2] = {
Joshua Haberman7ecf43f2022-03-14 13:11:29 -07004784 {1, UPB_SIZE(4, 4), 1, 0, 5, kUpb_FieldMode_Scalar | (kUpb_FieldRep_4Byte << kUpb_FieldRep_Shift)},
4785 {2, UPB_SIZE(8, 8), 2, 0, 5, kUpb_FieldMode_Scalar | (kUpb_FieldRep_4Byte << kUpb_FieldRep_Shift)},
Joshua Habermanf41049a2022-01-21 14:41:25 -08004786};
4787
4788const upb_MiniTable google_protobuf_EnumDescriptorProto_EnumReservedRange_msginit = {
4789 NULL,
4790 &google_protobuf_EnumDescriptorProto_EnumReservedRange__fields[0],
Joshua Haberman7ecf43f2022-03-14 13:11:29 -07004791 UPB_SIZE(16, 16), 2, kUpb_ExtMode_NonExtendable, 2, 255, 0,
Joshua Habermanf41049a2022-01-21 14:41:25 -08004792};
4793
4794static const upb_MiniTable_Sub google_protobuf_EnumValueDescriptorProto_submsgs[1] = {
4795 {.submsg = &google_protobuf_EnumValueOptions_msginit},
4796};
4797
4798static const upb_MiniTable_Field google_protobuf_EnumValueDescriptorProto__fields[3] = {
Joshua Haberman7ecf43f2022-03-14 13:11:29 -07004799 {1, UPB_SIZE(8, 8), 1, 0, 12, kUpb_FieldMode_Scalar | (kUpb_FieldRep_StringView << kUpb_FieldRep_Shift)},
4800 {2, UPB_SIZE(4, 4), 2, 0, 5, kUpb_FieldMode_Scalar | (kUpb_FieldRep_4Byte << kUpb_FieldRep_Shift)},
4801 {3, UPB_SIZE(16, 24), 3, 0, 11, kUpb_FieldMode_Scalar | (kUpb_FieldRep_Pointer << kUpb_FieldRep_Shift)},
Joshua Habermanf41049a2022-01-21 14:41:25 -08004802};
4803
4804const upb_MiniTable google_protobuf_EnumValueDescriptorProto_msginit = {
4805 &google_protobuf_EnumValueDescriptorProto_submsgs[0],
4806 &google_protobuf_EnumValueDescriptorProto__fields[0],
Joshua Haberman7ecf43f2022-03-14 13:11:29 -07004807 UPB_SIZE(24, 32), 3, kUpb_ExtMode_NonExtendable, 3, 255, 0,
Joshua Habermanf41049a2022-01-21 14:41:25 -08004808};
4809
4810static const upb_MiniTable_Sub google_protobuf_ServiceDescriptorProto_submsgs[2] = {
4811 {.submsg = &google_protobuf_MethodDescriptorProto_msginit},
4812 {.submsg = &google_protobuf_ServiceOptions_msginit},
4813};
4814
4815static const upb_MiniTable_Field google_protobuf_ServiceDescriptorProto__fields[3] = {
Joshua Haberman7ecf43f2022-03-14 13:11:29 -07004816 {1, UPB_SIZE(4, 8), 1, 0, 12, kUpb_FieldMode_Scalar | (kUpb_FieldRep_StringView << kUpb_FieldRep_Shift)},
4817 {2, UPB_SIZE(16, 32), 0, 0, 11, kUpb_FieldMode_Array | (kUpb_FieldRep_Pointer << kUpb_FieldRep_Shift)},
4818 {3, UPB_SIZE(12, 24), 2, 1, 11, kUpb_FieldMode_Scalar | (kUpb_FieldRep_Pointer << kUpb_FieldRep_Shift)},
Joshua Habermanf41049a2022-01-21 14:41:25 -08004819};
4820
4821const upb_MiniTable google_protobuf_ServiceDescriptorProto_msginit = {
4822 &google_protobuf_ServiceDescriptorProto_submsgs[0],
4823 &google_protobuf_ServiceDescriptorProto__fields[0],
Joshua Haberman7ecf43f2022-03-14 13:11:29 -07004824 UPB_SIZE(24, 48), 3, kUpb_ExtMode_NonExtendable, 3, 255, 0,
Joshua Habermanf41049a2022-01-21 14:41:25 -08004825};
4826
4827static const upb_MiniTable_Sub google_protobuf_MethodDescriptorProto_submsgs[1] = {
4828 {.submsg = &google_protobuf_MethodOptions_msginit},
4829};
4830
4831static const upb_MiniTable_Field google_protobuf_MethodDescriptorProto__fields[6] = {
Joshua Haberman7ecf43f2022-03-14 13:11:29 -07004832 {1, UPB_SIZE(4, 8), 1, 0, 12, kUpb_FieldMode_Scalar | (kUpb_FieldRep_StringView << kUpb_FieldRep_Shift)},
4833 {2, UPB_SIZE(12, 24), 2, 0, 12, kUpb_FieldMode_Scalar | (kUpb_FieldRep_StringView << kUpb_FieldRep_Shift)},
4834 {3, UPB_SIZE(20, 40), 3, 0, 12, kUpb_FieldMode_Scalar | (kUpb_FieldRep_StringView << kUpb_FieldRep_Shift)},
4835 {4, UPB_SIZE(28, 56), 4, 0, 11, kUpb_FieldMode_Scalar | (kUpb_FieldRep_Pointer << kUpb_FieldRep_Shift)},
4836 {5, UPB_SIZE(1, 1), 5, 0, 8, kUpb_FieldMode_Scalar | (kUpb_FieldRep_1Byte << kUpb_FieldRep_Shift)},
4837 {6, UPB_SIZE(2, 2), 6, 0, 8, kUpb_FieldMode_Scalar | (kUpb_FieldRep_1Byte << kUpb_FieldRep_Shift)},
Joshua Habermanf41049a2022-01-21 14:41:25 -08004838};
4839
4840const upb_MiniTable google_protobuf_MethodDescriptorProto_msginit = {
4841 &google_protobuf_MethodDescriptorProto_submsgs[0],
4842 &google_protobuf_MethodDescriptorProto__fields[0],
Joshua Haberman7ecf43f2022-03-14 13:11:29 -07004843 UPB_SIZE(32, 64), 6, kUpb_ExtMode_NonExtendable, 6, 255, 0,
Joshua Habermanf41049a2022-01-21 14:41:25 -08004844};
4845
4846static const upb_MiniTable_Sub google_protobuf_FileOptions_submsgs[2] = {
4847 {.submsg = &google_protobuf_UninterpretedOption_msginit},
4848 {.subenum = &google_protobuf_FileOptions_OptimizeMode_enuminit},
4849};
4850
4851static const upb_MiniTable_Field google_protobuf_FileOptions__fields[21] = {
Joshua Haberman7ecf43f2022-03-14 13:11:29 -07004852 {1, UPB_SIZE(20, 24), 1, 0, 12, kUpb_FieldMode_Scalar | (kUpb_FieldRep_StringView << kUpb_FieldRep_Shift)},
4853 {8, UPB_SIZE(28, 40), 2, 0, 12, kUpb_FieldMode_Scalar | (kUpb_FieldRep_StringView << kUpb_FieldRep_Shift)},
4854 {9, UPB_SIZE(4, 4), 3, 1, 14, kUpb_FieldMode_Scalar | (kUpb_FieldRep_4Byte << kUpb_FieldRep_Shift)},
4855 {10, UPB_SIZE(8, 8), 4, 0, 8, kUpb_FieldMode_Scalar | (kUpb_FieldRep_1Byte << kUpb_FieldRep_Shift)},
4856 {11, UPB_SIZE(36, 56), 5, 0, 12, kUpb_FieldMode_Scalar | (kUpb_FieldRep_StringView << kUpb_FieldRep_Shift)},
4857 {16, UPB_SIZE(9, 9), 6, 0, 8, kUpb_FieldMode_Scalar | (kUpb_FieldRep_1Byte << kUpb_FieldRep_Shift)},
4858 {17, UPB_SIZE(10, 10), 7, 0, 8, kUpb_FieldMode_Scalar | (kUpb_FieldRep_1Byte << kUpb_FieldRep_Shift)},
4859 {18, UPB_SIZE(11, 11), 8, 0, 8, kUpb_FieldMode_Scalar | (kUpb_FieldRep_1Byte << kUpb_FieldRep_Shift)},
4860 {20, UPB_SIZE(12, 12), 9, 0, 8, kUpb_FieldMode_Scalar | (kUpb_FieldRep_1Byte << kUpb_FieldRep_Shift)},
4861 {23, UPB_SIZE(13, 13), 10, 0, 8, kUpb_FieldMode_Scalar | (kUpb_FieldRep_1Byte << kUpb_FieldRep_Shift)},
4862 {27, UPB_SIZE(14, 14), 11, 0, 8, kUpb_FieldMode_Scalar | (kUpb_FieldRep_1Byte << kUpb_FieldRep_Shift)},
4863 {31, UPB_SIZE(15, 15), 12, 0, 8, kUpb_FieldMode_Scalar | (kUpb_FieldRep_1Byte << kUpb_FieldRep_Shift)},
4864 {36, UPB_SIZE(44, 72), 13, 0, 12, kUpb_FieldMode_Scalar | (kUpb_FieldRep_StringView << kUpb_FieldRep_Shift)},
4865 {37, UPB_SIZE(52, 88), 14, 0, 12, kUpb_FieldMode_Scalar | (kUpb_FieldRep_StringView << kUpb_FieldRep_Shift)},
4866 {39, UPB_SIZE(60, 104), 15, 0, 12, kUpb_FieldMode_Scalar | (kUpb_FieldRep_StringView << kUpb_FieldRep_Shift)},
4867 {40, UPB_SIZE(68, 120), 16, 0, 12, kUpb_FieldMode_Scalar | (kUpb_FieldRep_StringView << kUpb_FieldRep_Shift)},
4868 {41, UPB_SIZE(76, 136), 17, 0, 12, kUpb_FieldMode_Scalar | (kUpb_FieldRep_StringView << kUpb_FieldRep_Shift)},
4869 {42, UPB_SIZE(16, 16), 18, 0, 8, kUpb_FieldMode_Scalar | (kUpb_FieldRep_1Byte << kUpb_FieldRep_Shift)},
4870 {44, UPB_SIZE(84, 152), 19, 0, 12, kUpb_FieldMode_Scalar | (kUpb_FieldRep_StringView << kUpb_FieldRep_Shift)},
4871 {45, UPB_SIZE(92, 168), 20, 0, 12, kUpb_FieldMode_Scalar | (kUpb_FieldRep_StringView << kUpb_FieldRep_Shift)},
4872 {999, UPB_SIZE(100, 184), 0, 0, 11, kUpb_FieldMode_Array | (kUpb_FieldRep_Pointer << kUpb_FieldRep_Shift)},
Joshua Habermanf41049a2022-01-21 14:41:25 -08004873};
4874
4875const upb_MiniTable google_protobuf_FileOptions_msginit = {
4876 &google_protobuf_FileOptions_submsgs[0],
4877 &google_protobuf_FileOptions__fields[0],
Joshua Haberman7ecf43f2022-03-14 13:11:29 -07004878 UPB_SIZE(104, 192), 21, kUpb_ExtMode_Extendable, 1, 255, 0,
Joshua Habermanf41049a2022-01-21 14:41:25 -08004879};
4880
4881static const upb_MiniTable_Sub google_protobuf_MessageOptions_submsgs[1] = {
4882 {.submsg = &google_protobuf_UninterpretedOption_msginit},
4883};
4884
4885static const upb_MiniTable_Field google_protobuf_MessageOptions__fields[5] = {
Joshua Haberman7ecf43f2022-03-14 13:11:29 -07004886 {1, UPB_SIZE(1, 1), 1, 0, 8, kUpb_FieldMode_Scalar | (kUpb_FieldRep_1Byte << kUpb_FieldRep_Shift)},
4887 {2, UPB_SIZE(2, 2), 2, 0, 8, kUpb_FieldMode_Scalar | (kUpb_FieldRep_1Byte << kUpb_FieldRep_Shift)},
4888 {3, UPB_SIZE(3, 3), 3, 0, 8, kUpb_FieldMode_Scalar | (kUpb_FieldRep_1Byte << kUpb_FieldRep_Shift)},
4889 {7, UPB_SIZE(4, 4), 4, 0, 8, kUpb_FieldMode_Scalar | (kUpb_FieldRep_1Byte << kUpb_FieldRep_Shift)},
4890 {999, UPB_SIZE(8, 8), 0, 0, 11, kUpb_FieldMode_Array | (kUpb_FieldRep_Pointer << kUpb_FieldRep_Shift)},
Joshua Habermanf41049a2022-01-21 14:41:25 -08004891};
4892
4893const upb_MiniTable google_protobuf_MessageOptions_msginit = {
4894 &google_protobuf_MessageOptions_submsgs[0],
4895 &google_protobuf_MessageOptions__fields[0],
Joshua Haberman7ecf43f2022-03-14 13:11:29 -07004896 UPB_SIZE(16, 16), 5, kUpb_ExtMode_Extendable, 3, 255, 0,
Joshua Habermanf41049a2022-01-21 14:41:25 -08004897};
4898
4899static const upb_MiniTable_Sub google_protobuf_FieldOptions_submsgs[3] = {
4900 {.submsg = &google_protobuf_UninterpretedOption_msginit},
4901 {.subenum = &google_protobuf_FieldOptions_CType_enuminit},
4902 {.subenum = &google_protobuf_FieldOptions_JSType_enuminit},
4903};
4904
Joshua Haberman7ecf43f2022-03-14 13:11:29 -07004905static const upb_MiniTable_Field google_protobuf_FieldOptions__fields[8] = {
4906 {1, UPB_SIZE(4, 4), 1, 1, 14, kUpb_FieldMode_Scalar | (kUpb_FieldRep_4Byte << kUpb_FieldRep_Shift)},
4907 {2, UPB_SIZE(12, 12), 2, 0, 8, kUpb_FieldMode_Scalar | (kUpb_FieldRep_1Byte << kUpb_FieldRep_Shift)},
4908 {3, UPB_SIZE(13, 13), 3, 0, 8, kUpb_FieldMode_Scalar | (kUpb_FieldRep_1Byte << kUpb_FieldRep_Shift)},
4909 {5, UPB_SIZE(14, 14), 4, 0, 8, kUpb_FieldMode_Scalar | (kUpb_FieldRep_1Byte << kUpb_FieldRep_Shift)},
4910 {6, UPB_SIZE(8, 8), 5, 2, 14, kUpb_FieldMode_Scalar | (kUpb_FieldRep_4Byte << kUpb_FieldRep_Shift)},
4911 {10, UPB_SIZE(15, 15), 6, 0, 8, kUpb_FieldMode_Scalar | (kUpb_FieldRep_1Byte << kUpb_FieldRep_Shift)},
4912 {15, UPB_SIZE(16, 16), 7, 0, 8, kUpb_FieldMode_Scalar | (kUpb_FieldRep_1Byte << kUpb_FieldRep_Shift)},
4913 {999, UPB_SIZE(20, 24), 0, 0, 11, kUpb_FieldMode_Array | (kUpb_FieldRep_Pointer << kUpb_FieldRep_Shift)},
Joshua Habermanf41049a2022-01-21 14:41:25 -08004914};
4915
4916const upb_MiniTable google_protobuf_FieldOptions_msginit = {
4917 &google_protobuf_FieldOptions_submsgs[0],
4918 &google_protobuf_FieldOptions__fields[0],
Joshua Haberman7ecf43f2022-03-14 13:11:29 -07004919 UPB_SIZE(24, 32), 8, kUpb_ExtMode_Extendable, 3, 255, 0,
Joshua Habermanf41049a2022-01-21 14:41:25 -08004920};
4921
4922static const upb_MiniTable_Sub google_protobuf_OneofOptions_submsgs[1] = {
4923 {.submsg = &google_protobuf_UninterpretedOption_msginit},
4924};
4925
4926static const upb_MiniTable_Field google_protobuf_OneofOptions__fields[1] = {
Joshua Haberman7ecf43f2022-03-14 13:11:29 -07004927 {999, UPB_SIZE(0, 0), 0, 0, 11, kUpb_FieldMode_Array | (kUpb_FieldRep_Pointer << kUpb_FieldRep_Shift)},
Joshua Habermanf41049a2022-01-21 14:41:25 -08004928};
4929
4930const upb_MiniTable google_protobuf_OneofOptions_msginit = {
4931 &google_protobuf_OneofOptions_submsgs[0],
4932 &google_protobuf_OneofOptions__fields[0],
Joshua Haberman7ecf43f2022-03-14 13:11:29 -07004933 UPB_SIZE(8, 8), 1, kUpb_ExtMode_Extendable, 0, 255, 0,
Joshua Habermanf41049a2022-01-21 14:41:25 -08004934};
4935
4936static const upb_MiniTable_Sub google_protobuf_EnumOptions_submsgs[1] = {
4937 {.submsg = &google_protobuf_UninterpretedOption_msginit},
4938};
4939
4940static const upb_MiniTable_Field google_protobuf_EnumOptions__fields[3] = {
Joshua Haberman7ecf43f2022-03-14 13:11:29 -07004941 {2, UPB_SIZE(1, 1), 1, 0, 8, kUpb_FieldMode_Scalar | (kUpb_FieldRep_1Byte << kUpb_FieldRep_Shift)},
4942 {3, UPB_SIZE(2, 2), 2, 0, 8, kUpb_FieldMode_Scalar | (kUpb_FieldRep_1Byte << kUpb_FieldRep_Shift)},
4943 {999, UPB_SIZE(4, 8), 0, 0, 11, kUpb_FieldMode_Array | (kUpb_FieldRep_Pointer << kUpb_FieldRep_Shift)},
Joshua Habermanf41049a2022-01-21 14:41:25 -08004944};
4945
4946const upb_MiniTable google_protobuf_EnumOptions_msginit = {
4947 &google_protobuf_EnumOptions_submsgs[0],
4948 &google_protobuf_EnumOptions__fields[0],
Joshua Haberman7ecf43f2022-03-14 13:11:29 -07004949 UPB_SIZE(8, 16), 3, kUpb_ExtMode_Extendable, 0, 255, 0,
Joshua Habermanf41049a2022-01-21 14:41:25 -08004950};
4951
4952static const upb_MiniTable_Sub google_protobuf_EnumValueOptions_submsgs[1] = {
4953 {.submsg = &google_protobuf_UninterpretedOption_msginit},
4954};
4955
4956static const upb_MiniTable_Field google_protobuf_EnumValueOptions__fields[2] = {
Joshua Haberman7ecf43f2022-03-14 13:11:29 -07004957 {1, UPB_SIZE(1, 1), 1, 0, 8, kUpb_FieldMode_Scalar | (kUpb_FieldRep_1Byte << kUpb_FieldRep_Shift)},
4958 {999, UPB_SIZE(4, 8), 0, 0, 11, kUpb_FieldMode_Array | (kUpb_FieldRep_Pointer << kUpb_FieldRep_Shift)},
Joshua Habermanf41049a2022-01-21 14:41:25 -08004959};
4960
4961const upb_MiniTable google_protobuf_EnumValueOptions_msginit = {
4962 &google_protobuf_EnumValueOptions_submsgs[0],
4963 &google_protobuf_EnumValueOptions__fields[0],
Joshua Haberman7ecf43f2022-03-14 13:11:29 -07004964 UPB_SIZE(8, 16), 2, kUpb_ExtMode_Extendable, 1, 255, 0,
Joshua Habermanf41049a2022-01-21 14:41:25 -08004965};
4966
4967static const upb_MiniTable_Sub google_protobuf_ServiceOptions_submsgs[1] = {
4968 {.submsg = &google_protobuf_UninterpretedOption_msginit},
4969};
4970
4971static const upb_MiniTable_Field google_protobuf_ServiceOptions__fields[2] = {
Joshua Haberman7ecf43f2022-03-14 13:11:29 -07004972 {33, UPB_SIZE(1, 1), 1, 0, 8, kUpb_FieldMode_Scalar | (kUpb_FieldRep_1Byte << kUpb_FieldRep_Shift)},
4973 {999, UPB_SIZE(4, 8), 0, 0, 11, kUpb_FieldMode_Array | (kUpb_FieldRep_Pointer << kUpb_FieldRep_Shift)},
Joshua Habermanf41049a2022-01-21 14:41:25 -08004974};
4975
4976const upb_MiniTable google_protobuf_ServiceOptions_msginit = {
4977 &google_protobuf_ServiceOptions_submsgs[0],
4978 &google_protobuf_ServiceOptions__fields[0],
Joshua Haberman7ecf43f2022-03-14 13:11:29 -07004979 UPB_SIZE(8, 16), 2, kUpb_ExtMode_Extendable, 0, 255, 0,
Joshua Habermanf41049a2022-01-21 14:41:25 -08004980};
4981
4982static const upb_MiniTable_Sub google_protobuf_MethodOptions_submsgs[2] = {
4983 {.submsg = &google_protobuf_UninterpretedOption_msginit},
4984 {.subenum = &google_protobuf_MethodOptions_IdempotencyLevel_enuminit},
4985};
4986
4987static const upb_MiniTable_Field google_protobuf_MethodOptions__fields[3] = {
Joshua Haberman7ecf43f2022-03-14 13:11:29 -07004988 {33, UPB_SIZE(8, 8), 1, 0, 8, kUpb_FieldMode_Scalar | (kUpb_FieldRep_1Byte << kUpb_FieldRep_Shift)},
4989 {34, UPB_SIZE(4, 4), 2, 1, 14, kUpb_FieldMode_Scalar | (kUpb_FieldRep_4Byte << kUpb_FieldRep_Shift)},
4990 {999, UPB_SIZE(12, 16), 0, 0, 11, kUpb_FieldMode_Array | (kUpb_FieldRep_Pointer << kUpb_FieldRep_Shift)},
Joshua Habermanf41049a2022-01-21 14:41:25 -08004991};
4992
4993const upb_MiniTable google_protobuf_MethodOptions_msginit = {
4994 &google_protobuf_MethodOptions_submsgs[0],
4995 &google_protobuf_MethodOptions__fields[0],
Joshua Haberman7ecf43f2022-03-14 13:11:29 -07004996 UPB_SIZE(16, 24), 3, kUpb_ExtMode_Extendable, 0, 255, 0,
Joshua Habermanf41049a2022-01-21 14:41:25 -08004997};
4998
4999static const upb_MiniTable_Sub google_protobuf_UninterpretedOption_submsgs[1] = {
5000 {.submsg = &google_protobuf_UninterpretedOption_NamePart_msginit},
5001};
5002
5003static const upb_MiniTable_Field google_protobuf_UninterpretedOption__fields[7] = {
Joshua Haberman7ecf43f2022-03-14 13:11:29 -07005004 {2, UPB_SIZE(56, 80), 0, 0, 11, kUpb_FieldMode_Array | (kUpb_FieldRep_Pointer << kUpb_FieldRep_Shift)},
5005 {3, UPB_SIZE(32, 32), 1, 0, 12, kUpb_FieldMode_Scalar | (kUpb_FieldRep_StringView << kUpb_FieldRep_Shift)},
5006 {4, UPB_SIZE(8, 8), 2, 0, 4, kUpb_FieldMode_Scalar | (kUpb_FieldRep_8Byte << kUpb_FieldRep_Shift)},
5007 {5, UPB_SIZE(16, 16), 3, 0, 3, kUpb_FieldMode_Scalar | (kUpb_FieldRep_8Byte << kUpb_FieldRep_Shift)},
5008 {6, UPB_SIZE(24, 24), 4, 0, 1, kUpb_FieldMode_Scalar | (kUpb_FieldRep_8Byte << kUpb_FieldRep_Shift)},
5009 {7, UPB_SIZE(40, 48), 5, 0, 12, kUpb_FieldMode_Scalar | (kUpb_FieldRep_StringView << kUpb_FieldRep_Shift)},
5010 {8, UPB_SIZE(48, 64), 6, 0, 12, kUpb_FieldMode_Scalar | (kUpb_FieldRep_StringView << kUpb_FieldRep_Shift)},
Joshua Habermanf41049a2022-01-21 14:41:25 -08005011};
5012
5013const upb_MiniTable google_protobuf_UninterpretedOption_msginit = {
5014 &google_protobuf_UninterpretedOption_submsgs[0],
5015 &google_protobuf_UninterpretedOption__fields[0],
Joshua Haberman7ecf43f2022-03-14 13:11:29 -07005016 UPB_SIZE(64, 96), 7, kUpb_ExtMode_NonExtendable, 0, 255, 0,
Joshua Habermanf41049a2022-01-21 14:41:25 -08005017};
5018
5019static const upb_MiniTable_Field google_protobuf_UninterpretedOption_NamePart__fields[2] = {
Joshua Haberman7ecf43f2022-03-14 13:11:29 -07005020 {1, UPB_SIZE(4, 8), 1, 0, 12, kUpb_FieldMode_Scalar | (kUpb_FieldRep_StringView << kUpb_FieldRep_Shift)},
5021 {2, UPB_SIZE(1, 1), 2, 0, 8, kUpb_FieldMode_Scalar | (kUpb_FieldRep_1Byte << kUpb_FieldRep_Shift)},
Joshua Habermanf41049a2022-01-21 14:41:25 -08005022};
5023
5024const upb_MiniTable google_protobuf_UninterpretedOption_NamePart_msginit = {
5025 NULL,
5026 &google_protobuf_UninterpretedOption_NamePart__fields[0],
Joshua Haberman7ecf43f2022-03-14 13:11:29 -07005027 UPB_SIZE(16, 32), 2, kUpb_ExtMode_NonExtendable, 2, 255, 2,
Joshua Habermanf41049a2022-01-21 14:41:25 -08005028};
5029
5030static const upb_MiniTable_Sub google_protobuf_SourceCodeInfo_submsgs[1] = {
5031 {.submsg = &google_protobuf_SourceCodeInfo_Location_msginit},
5032};
5033
5034static const upb_MiniTable_Field google_protobuf_SourceCodeInfo__fields[1] = {
Joshua Haberman7ecf43f2022-03-14 13:11:29 -07005035 {1, UPB_SIZE(0, 0), 0, 0, 11, kUpb_FieldMode_Array | (kUpb_FieldRep_Pointer << kUpb_FieldRep_Shift)},
Joshua Habermanf41049a2022-01-21 14:41:25 -08005036};
5037
5038const upb_MiniTable google_protobuf_SourceCodeInfo_msginit = {
5039 &google_protobuf_SourceCodeInfo_submsgs[0],
5040 &google_protobuf_SourceCodeInfo__fields[0],
Joshua Haberman7ecf43f2022-03-14 13:11:29 -07005041 UPB_SIZE(8, 8), 1, kUpb_ExtMode_NonExtendable, 1, 255, 0,
Joshua Habermanf41049a2022-01-21 14:41:25 -08005042};
5043
5044static const upb_MiniTable_Field google_protobuf_SourceCodeInfo_Location__fields[5] = {
Joshua Haberman7ecf43f2022-03-14 13:11:29 -07005045 {1, UPB_SIZE(20, 40), 0, 0, 5, kUpb_FieldMode_Array | kUpb_LabelFlags_IsPacked | (kUpb_FieldRep_Pointer << kUpb_FieldRep_Shift)},
5046 {2, UPB_SIZE(24, 48), 0, 0, 5, kUpb_FieldMode_Array | kUpb_LabelFlags_IsPacked | (kUpb_FieldRep_Pointer << kUpb_FieldRep_Shift)},
5047 {3, UPB_SIZE(4, 8), 1, 0, 12, kUpb_FieldMode_Scalar | (kUpb_FieldRep_StringView << kUpb_FieldRep_Shift)},
5048 {4, UPB_SIZE(12, 24), 2, 0, 12, kUpb_FieldMode_Scalar | (kUpb_FieldRep_StringView << kUpb_FieldRep_Shift)},
5049 {6, UPB_SIZE(28, 56), 0, 0, 12, kUpb_FieldMode_Array | (kUpb_FieldRep_Pointer << kUpb_FieldRep_Shift)},
Joshua Habermanf41049a2022-01-21 14:41:25 -08005050};
5051
5052const upb_MiniTable google_protobuf_SourceCodeInfo_Location_msginit = {
5053 NULL,
5054 &google_protobuf_SourceCodeInfo_Location__fields[0],
Joshua Haberman7ecf43f2022-03-14 13:11:29 -07005055 UPB_SIZE(32, 64), 5, kUpb_ExtMode_NonExtendable, 4, 255, 0,
Joshua Habermanf41049a2022-01-21 14:41:25 -08005056};
5057
5058static const upb_MiniTable_Sub google_protobuf_GeneratedCodeInfo_submsgs[1] = {
5059 {.submsg = &google_protobuf_GeneratedCodeInfo_Annotation_msginit},
5060};
5061
5062static const upb_MiniTable_Field google_protobuf_GeneratedCodeInfo__fields[1] = {
Joshua Haberman7ecf43f2022-03-14 13:11:29 -07005063 {1, UPB_SIZE(0, 0), 0, 0, 11, kUpb_FieldMode_Array | (kUpb_FieldRep_Pointer << kUpb_FieldRep_Shift)},
Joshua Habermanf41049a2022-01-21 14:41:25 -08005064};
5065
5066const upb_MiniTable google_protobuf_GeneratedCodeInfo_msginit = {
5067 &google_protobuf_GeneratedCodeInfo_submsgs[0],
5068 &google_protobuf_GeneratedCodeInfo__fields[0],
Joshua Haberman7ecf43f2022-03-14 13:11:29 -07005069 UPB_SIZE(8, 8), 1, kUpb_ExtMode_NonExtendable, 1, 255, 0,
Joshua Habermanf41049a2022-01-21 14:41:25 -08005070};
5071
5072static const upb_MiniTable_Field google_protobuf_GeneratedCodeInfo_Annotation__fields[4] = {
Joshua Haberman7ecf43f2022-03-14 13:11:29 -07005073 {1, UPB_SIZE(20, 32), 0, 0, 5, kUpb_FieldMode_Array | kUpb_LabelFlags_IsPacked | (kUpb_FieldRep_Pointer << kUpb_FieldRep_Shift)},
5074 {2, UPB_SIZE(12, 16), 1, 0, 12, kUpb_FieldMode_Scalar | (kUpb_FieldRep_StringView << kUpb_FieldRep_Shift)},
5075 {3, UPB_SIZE(4, 4), 2, 0, 5, kUpb_FieldMode_Scalar | (kUpb_FieldRep_4Byte << kUpb_FieldRep_Shift)},
5076 {4, UPB_SIZE(8, 8), 3, 0, 5, kUpb_FieldMode_Scalar | (kUpb_FieldRep_4Byte << kUpb_FieldRep_Shift)},
Joshua Habermanf41049a2022-01-21 14:41:25 -08005077};
5078
5079const upb_MiniTable google_protobuf_GeneratedCodeInfo_Annotation_msginit = {
5080 NULL,
5081 &google_protobuf_GeneratedCodeInfo_Annotation__fields[0],
Joshua Haberman7ecf43f2022-03-14 13:11:29 -07005082 UPB_SIZE(24, 48), 4, kUpb_ExtMode_NonExtendable, 4, 255, 0,
Joshua Habermanf41049a2022-01-21 14:41:25 -08005083};
5084
5085static const upb_MiniTable *messages_layout[27] = {
5086 &google_protobuf_FileDescriptorSet_msginit,
5087 &google_protobuf_FileDescriptorProto_msginit,
Joshua Haberman9abf6e22021-01-13 12:16:25 -08005088 &google_protobuf_DescriptorProto_msginit,
5089 &google_protobuf_DescriptorProto_ExtensionRange_msginit,
5090 &google_protobuf_DescriptorProto_ReservedRange_msginit,
Joshua Haberman9abf6e22021-01-13 12:16:25 -08005091 &google_protobuf_ExtensionRangeOptions_msginit,
Joshua Habermanf41049a2022-01-21 14:41:25 -08005092 &google_protobuf_FieldDescriptorProto_msginit,
5093 &google_protobuf_OneofDescriptorProto_msginit,
5094 &google_protobuf_EnumDescriptorProto_msginit,
Joshua Haberman9abf6e22021-01-13 12:16:25 -08005095 &google_protobuf_EnumDescriptorProto_EnumReservedRange_msginit,
Joshua Haberman9abf6e22021-01-13 12:16:25 -08005096 &google_protobuf_EnumValueDescriptorProto_msginit,
Joshua Habermanf41049a2022-01-21 14:41:25 -08005097 &google_protobuf_ServiceDescriptorProto_msginit,
Joshua Haberman9abf6e22021-01-13 12:16:25 -08005098 &google_protobuf_MethodDescriptorProto_msginit,
Joshua Habermanf41049a2022-01-21 14:41:25 -08005099 &google_protobuf_FileOptions_msginit,
5100 &google_protobuf_MessageOptions_msginit,
5101 &google_protobuf_FieldOptions_msginit,
5102 &google_protobuf_OneofOptions_msginit,
5103 &google_protobuf_EnumOptions_msginit,
5104 &google_protobuf_EnumValueOptions_msginit,
Joshua Haberman9abf6e22021-01-13 12:16:25 -08005105 &google_protobuf_ServiceOptions_msginit,
Joshua Haberman9abf6e22021-01-13 12:16:25 -08005106 &google_protobuf_MethodOptions_msginit,
Joshua Haberman9abf6e22021-01-13 12:16:25 -08005107 &google_protobuf_UninterpretedOption_msginit,
Joshua Haberman9abf6e22021-01-13 12:16:25 -08005108 &google_protobuf_UninterpretedOption_NamePart_msginit,
Joshua Habermanf41049a2022-01-21 14:41:25 -08005109 &google_protobuf_SourceCodeInfo_msginit,
Joshua Haberman9abf6e22021-01-13 12:16:25 -08005110 &google_protobuf_SourceCodeInfo_Location_msginit,
Joshua Habermanf41049a2022-01-21 14:41:25 -08005111 &google_protobuf_GeneratedCodeInfo_msginit,
Joshua Haberman9abf6e22021-01-13 12:16:25 -08005112 &google_protobuf_GeneratedCodeInfo_Annotation_msginit,
5113};
5114
Joshua Habermanf41049a2022-01-21 14:41:25 -08005115const upb_MiniTable_Enum google_protobuf_FieldDescriptorProto_Type_enuminit = {
Joshua Haberman9abf6e22021-01-13 12:16:25 -08005116 NULL,
Joshua Habermanf41049a2022-01-21 14:41:25 -08005117 0x7fffeULL,
5118 0,
5119};
5120
5121const upb_MiniTable_Enum google_protobuf_FieldDescriptorProto_Label_enuminit = {
5122 NULL,
5123 0xeULL,
5124 0,
5125};
5126
5127const upb_MiniTable_Enum google_protobuf_FileOptions_OptimizeMode_enuminit = {
5128 NULL,
5129 0xeULL,
5130 0,
5131};
5132
5133const upb_MiniTable_Enum google_protobuf_FieldOptions_CType_enuminit = {
5134 NULL,
5135 0x7ULL,
5136 0,
5137};
5138
5139const upb_MiniTable_Enum google_protobuf_FieldOptions_JSType_enuminit = {
5140 NULL,
5141 0x7ULL,
5142 0,
5143};
5144
5145const upb_MiniTable_Enum google_protobuf_MethodOptions_IdempotencyLevel_enuminit = {
5146 NULL,
5147 0x7ULL,
5148 0,
5149};
5150
5151static const upb_MiniTable_Enum *enums_layout[6] = {
5152 &google_protobuf_FieldDescriptorProto_Type_enuminit,
5153 &google_protobuf_FieldDescriptorProto_Label_enuminit,
5154 &google_protobuf_FileOptions_OptimizeMode_enuminit,
5155 &google_protobuf_FieldOptions_CType_enuminit,
5156 &google_protobuf_FieldOptions_JSType_enuminit,
5157 &google_protobuf_MethodOptions_IdempotencyLevel_enuminit,
5158};
5159
5160const upb_MiniTable_File google_protobuf_descriptor_proto_upb_file_layout = {
5161 messages_layout,
5162 enums_layout,
5163 NULL,
5164 27,
5165 6,
5166 0,
Joshua Haberman9abf6e22021-01-13 12:16:25 -08005167};
5168
5169
5170
Joshua Habermandd69a482021-05-17 22:40:33 -07005171/** upb/def.c ************************************************************/
Joshua Haberman9abf6e22021-01-13 12:16:25 -08005172
5173#include <ctype.h>
5174#include <errno.h>
5175#include <setjmp.h>
5176#include <stdlib.h>
5177#include <string.h>
5178
5179
5180/* Must be last. */
5181
5182typedef struct {
5183 size_t len;
Joshua Habermanf41049a2022-01-21 14:41:25 -08005184 char str[1]; /* Null-terminated string data follows. */
Joshua Haberman9abf6e22021-01-13 12:16:25 -08005185} str_t;
5186
Joshua Habermanf41049a2022-01-21 14:41:25 -08005187/* The upb core does not generally have a concept of default instances. However
5188 * for descriptor options we make an exception since the max size is known and
5189 * modest (<200 bytes). All types can share a default instance since it is
5190 * initialized to zeroes.
5191 *
5192 * We have to allocate an extra pointer for upb's internal metadata. */
5193static const char opt_default_buf[_UPB_MAXOPT_SIZE + sizeof(void*)] = {0};
5194static const char* opt_default = &opt_default_buf[sizeof(void*)];
5195
5196struct upb_FieldDef {
5197 const google_protobuf_FieldOptions* opts;
5198 const upb_FileDef* file;
5199 const upb_MessageDef* msgdef;
5200 const char* full_name;
5201 const char* json_name;
Joshua Haberman9abf6e22021-01-13 12:16:25 -08005202 union {
5203 int64_t sint;
5204 uint64_t uint;
5205 double dbl;
5206 float flt;
5207 bool boolean;
Joshua Habermanf41049a2022-01-21 14:41:25 -08005208 str_t* str;
Joshua Haberman9abf6e22021-01-13 12:16:25 -08005209 } defaultval;
Joshua Haberman9abf6e22021-01-13 12:16:25 -08005210 union {
Joshua Habermanf41049a2022-01-21 14:41:25 -08005211 const upb_OneofDef* oneof;
5212 const upb_MessageDef* extension_scope;
5213 } scope;
5214 union {
5215 const upb_MessageDef* msgdef;
5216 const upb_EnumDef* enumdef;
5217 const google_protobuf_FieldDescriptorProto* unresolved;
Joshua Haberman9abf6e22021-01-13 12:16:25 -08005218 } sub;
5219 uint32_t number_;
5220 uint16_t index_;
Joshua Habermanf41049a2022-01-21 14:41:25 -08005221 uint16_t layout_index; /* Index into msgdef->layout->fields or file->exts */
5222 bool has_default;
Joshua Haberman9abf6e22021-01-13 12:16:25 -08005223 bool is_extension_;
Joshua Haberman9abf6e22021-01-13 12:16:25 -08005224 bool packed_;
5225 bool proto3_optional_;
Joshua Habermanf41049a2022-01-21 14:41:25 -08005226 bool has_json_name_;
5227 upb_FieldType type_;
5228 upb_Label label_;
Joshua Haberman7ecf43f2022-03-14 13:11:29 -07005229#if UINTPTR_MAX == 0xffffffff
5230 uint32_t padding; // Increase size to a multiple of 8.
5231#endif
Joshua Haberman9abf6e22021-01-13 12:16:25 -08005232};
5233
Joshua Habermanf41049a2022-01-21 14:41:25 -08005234struct upb_ExtensionRange {
5235 const google_protobuf_ExtensionRangeOptions* opts;
5236 int32_t start;
5237 int32_t end;
5238};
5239
5240struct upb_MessageDef {
5241 const google_protobuf_MessageOptions* opts;
5242 const upb_MiniTable* layout;
5243 const upb_FileDef* file;
5244 const upb_MessageDef* containing_type;
5245 const char* full_name;
Joshua Haberman9abf6e22021-01-13 12:16:25 -08005246
5247 /* Tables for looking up fields by number and name. */
5248 upb_inttable itof;
5249 upb_strtable ntof;
5250
Joshua Habermanf41049a2022-01-21 14:41:25 -08005251 /* All nested defs.
Joshua Haberman5ef010c2022-01-23 16:13:12 -08005252 * MEM: We could save some space here by putting nested defs in a contiguous
5253 * region and calculating counts from offsets or vice-versa. */
Joshua Habermanf41049a2022-01-21 14:41:25 -08005254 const upb_FieldDef* fields;
5255 const upb_OneofDef* oneofs;
5256 const upb_ExtensionRange* ext_ranges;
5257 const upb_MessageDef* nested_msgs;
5258 const upb_EnumDef* nested_enums;
5259 const upb_FieldDef* nested_exts;
Joshua Haberman9abf6e22021-01-13 12:16:25 -08005260 int field_count;
Joshua Haberman9abf6e22021-01-13 12:16:25 -08005261 int real_oneof_count;
Joshua Habermanf41049a2022-01-21 14:41:25 -08005262 int oneof_count;
5263 int ext_range_count;
5264 int nested_msg_count;
5265 int nested_enum_count;
5266 int nested_ext_count;
5267 bool in_message_set;
5268 upb_WellKnown well_known_type;
Joshua Haberman7ecf43f2022-03-14 13:11:29 -07005269#if UINTPTR_MAX == 0xffffffff
5270 uint32_t padding; // Increase size to a multiple of 8.
5271#endif
Joshua Haberman9abf6e22021-01-13 12:16:25 -08005272};
5273
Joshua Habermanf41049a2022-01-21 14:41:25 -08005274struct upb_EnumDef {
5275 const google_protobuf_EnumOptions* opts;
5276 const upb_MiniTable_Enum* layout; // Only for proto2.
5277 const upb_FileDef* file;
5278 const upb_MessageDef* containing_type; // Could be merged with "file".
5279 const char* full_name;
Joshua Haberman9abf6e22021-01-13 12:16:25 -08005280 upb_strtable ntoi;
5281 upb_inttable iton;
Joshua Habermanf41049a2022-01-21 14:41:25 -08005282 const upb_EnumValueDef* values;
5283 int value_count;
Joshua Haberman9abf6e22021-01-13 12:16:25 -08005284 int32_t defaultval;
Joshua Haberman7ecf43f2022-03-14 13:11:29 -07005285#if UINTPTR_MAX == 0xffffffff
5286 uint32_t padding; // Increase size to a multiple of 8.
5287#endif
Joshua Haberman9abf6e22021-01-13 12:16:25 -08005288};
5289
Joshua Habermanf41049a2022-01-21 14:41:25 -08005290struct upb_EnumValueDef {
5291 const google_protobuf_EnumValueOptions* opts;
5292 const upb_EnumDef* parent;
5293 const char* full_name;
5294 int32_t number;
5295};
5296
5297struct upb_OneofDef {
5298 const google_protobuf_OneofOptions* opts;
5299 const upb_MessageDef* parent;
5300 const char* full_name;
Joshua Haberman9abf6e22021-01-13 12:16:25 -08005301 int field_count;
5302 bool synthetic;
Joshua Habermanf41049a2022-01-21 14:41:25 -08005303 const upb_FieldDef** fields;
Joshua Haberman9abf6e22021-01-13 12:16:25 -08005304 upb_strtable ntof;
5305 upb_inttable itof;
Joshua Haberman7ecf43f2022-03-14 13:11:29 -07005306#if UINTPTR_MAX == 0xffffffff
5307 uint32_t padding; // Increase size to a multiple of 8.
5308#endif
Joshua Haberman9abf6e22021-01-13 12:16:25 -08005309};
5310
Joshua Habermanf41049a2022-01-21 14:41:25 -08005311struct upb_FileDef {
5312 const google_protobuf_FileOptions* opts;
5313 const char* name;
5314 const char* package;
Joshua Haberman9abf6e22021-01-13 12:16:25 -08005315
Joshua Habermanf41049a2022-01-21 14:41:25 -08005316 const upb_FileDef** deps;
5317 const int32_t* public_deps;
5318 const int32_t* weak_deps;
5319 const upb_MessageDef* top_lvl_msgs;
5320 const upb_EnumDef* top_lvl_enums;
5321 const upb_FieldDef* top_lvl_exts;
5322 const upb_ServiceDef* services;
5323 const upb_MiniTable_Extension** ext_layouts;
5324 const upb_DefPool* symtab;
Joshua Haberman9abf6e22021-01-13 12:16:25 -08005325
5326 int dep_count;
Joshua Habermanf41049a2022-01-21 14:41:25 -08005327 int public_dep_count;
5328 int weak_dep_count;
5329 int top_lvl_msg_count;
5330 int top_lvl_enum_count;
5331 int top_lvl_ext_count;
5332 int service_count;
5333 int ext_count; /* All exts in the file. */
5334 upb_Syntax syntax;
Joshua Haberman9abf6e22021-01-13 12:16:25 -08005335};
5336
Joshua Habermanf41049a2022-01-21 14:41:25 -08005337struct upb_MethodDef {
5338 const google_protobuf_MethodOptions* opts;
5339 upb_ServiceDef* service;
5340 const char* full_name;
5341 const upb_MessageDef* input_type;
5342 const upb_MessageDef* output_type;
Joshua Haberman7ecf43f2022-03-14 13:11:29 -07005343 int index;
Joshua Habermanf41049a2022-01-21 14:41:25 -08005344 bool client_streaming;
5345 bool server_streaming;
5346};
5347
5348struct upb_ServiceDef {
5349 const google_protobuf_ServiceOptions* opts;
5350 const upb_FileDef* file;
5351 const char* full_name;
5352 upb_MethodDef* methods;
5353 int method_count;
5354 int index;
5355};
5356
5357struct upb_DefPool {
5358 upb_Arena* arena;
Joshua Haberman9abf6e22021-01-13 12:16:25 -08005359 upb_strtable syms; /* full_name -> packed def ptr */
Joshua Habermanf41049a2022-01-21 14:41:25 -08005360 upb_strtable files; /* file_name -> upb_FileDef* */
5361 upb_inttable exts; /* upb_MiniTable_Extension* -> upb_FieldDef* */
5362 upb_ExtensionRegistry* extreg;
Joshua Haberman9abf6e22021-01-13 12:16:25 -08005363 size_t bytes_loaded;
5364};
5365
5366/* Inside a symtab we store tagged pointers to specific def types. */
5367typedef enum {
Joshua Habermanf41049a2022-01-21 14:41:25 -08005368 UPB_DEFTYPE_MASK = 7,
Joshua Haberman9abf6e22021-01-13 12:16:25 -08005369
5370 /* Only inside symtab table. */
Joshua Habermanf41049a2022-01-21 14:41:25 -08005371 UPB_DEFTYPE_EXT = 0,
Joshua Haberman9abf6e22021-01-13 12:16:25 -08005372 UPB_DEFTYPE_MSG = 1,
5373 UPB_DEFTYPE_ENUM = 2,
Joshua Habermanf41049a2022-01-21 14:41:25 -08005374 UPB_DEFTYPE_ENUMVAL = 3,
5375 UPB_DEFTYPE_SERVICE = 4,
Joshua Haberman9abf6e22021-01-13 12:16:25 -08005376
5377 /* Only inside message table. */
Joshua Habermanf41049a2022-01-21 14:41:25 -08005378 UPB_DEFTYPE_FIELD = 0,
Joshua Haberman9abf6e22021-01-13 12:16:25 -08005379 UPB_DEFTYPE_ONEOF = 1,
Joshua Habermanf41049a2022-01-21 14:41:25 -08005380 UPB_DEFTYPE_FIELD_JSONNAME = 2,
5381
5382 /* Only inside file table. */
5383 UPB_DEFTYPE_FILE = 0,
5384 UPB_DEFTYPE_LAYOUT = 1
Joshua Haberman9abf6e22021-01-13 12:16:25 -08005385} upb_deftype_t;
5386
Joshua Habermanf41049a2022-01-21 14:41:25 -08005387#define FIELD_TYPE_UNSPECIFIED 0
5388
5389static upb_deftype_t deftype(upb_value v) {
Joshua Haberman9abf6e22021-01-13 12:16:25 -08005390 uintptr_t num = (uintptr_t)upb_value_getconstptr(v);
Joshua Habermanf41049a2022-01-21 14:41:25 -08005391 return num & UPB_DEFTYPE_MASK;
Joshua Haberman9abf6e22021-01-13 12:16:25 -08005392}
5393
Joshua Habermanf41049a2022-01-21 14:41:25 -08005394static const void* unpack_def(upb_value v, upb_deftype_t type) {
5395 uintptr_t num = (uintptr_t)upb_value_getconstptr(v);
5396 return (num & UPB_DEFTYPE_MASK) == type
5397 ? (const void*)(num & ~UPB_DEFTYPE_MASK)
5398 : NULL;
5399}
5400
5401static upb_value pack_def(const void* ptr, upb_deftype_t type) {
Joshua Haberman7ecf43f2022-03-14 13:11:29 -07005402 // Our 3-bit pointer tagging requires all pointers to be multiples of 8.
5403 // The arena will always yield 8-byte-aligned addresses, however we put
5404 // the defs into arrays. For each element in the array to be 8-byte-aligned,
5405 // the sizes of each def type must also be a multiple of 8.
5406 //
5407 // If any of these asserts fail, we need to add or remove padding on 32-bit
5408 // machines (64-bit machines will have 8-byte alignment already due to
5409 // pointers, which all of these structs have).
5410 UPB_ASSERT((sizeof(upb_FieldDef) & UPB_DEFTYPE_MASK) == 0);
5411 UPB_ASSERT((sizeof(upb_MessageDef) & UPB_DEFTYPE_MASK) == 0);
5412 UPB_ASSERT((sizeof(upb_EnumDef) & UPB_DEFTYPE_MASK) == 0);
5413 UPB_ASSERT((sizeof(upb_EnumValueDef) & UPB_DEFTYPE_MASK) == 0);
5414 UPB_ASSERT((sizeof(upb_ServiceDef) & UPB_DEFTYPE_MASK) == 0);
5415 UPB_ASSERT((sizeof(upb_OneofDef) & UPB_DEFTYPE_MASK) == 0);
Joshua Habermanf41049a2022-01-21 14:41:25 -08005416 uintptr_t num = (uintptr_t)ptr;
5417 UPB_ASSERT((num & UPB_DEFTYPE_MASK) == 0);
5418 num |= type;
Joshua Haberman9abf6e22021-01-13 12:16:25 -08005419 return upb_value_constptr((const void*)num);
5420}
5421
5422/* isalpha() etc. from <ctype.h> are locale-dependent, which we don't want. */
Joshua Habermanf41049a2022-01-21 14:41:25 -08005423static bool upb_isbetween(uint8_t c, uint8_t low, uint8_t high) {
Joshua Haberman9abf6e22021-01-13 12:16:25 -08005424 return c >= low && c <= high;
5425}
5426
Joshua Habermanf41049a2022-01-21 14:41:25 -08005427static char upb_ascii_lower(char ch) {
5428 // Per ASCII this will lower-case a letter. If the result is a letter, the
5429 // input was definitely a letter. If the output is not a letter, this may
5430 // have transformed the character unpredictably.
5431 return ch | 0x20;
5432}
5433
Joshua Haberman9abf6e22021-01-13 12:16:25 -08005434static bool upb_isletter(char c) {
Joshua Habermanf41049a2022-01-21 14:41:25 -08005435 char lower = upb_ascii_lower(c);
5436 return upb_isbetween(lower, 'a', 'z') || c == '_';
Joshua Haberman9abf6e22021-01-13 12:16:25 -08005437}
5438
5439static bool upb_isalphanum(char c) {
5440 return upb_isletter(c) || upb_isbetween(c, '0', '9');
5441}
5442
Joshua Habermanf41049a2022-01-21 14:41:25 -08005443static const char* shortdefname(const char* fullname) {
5444 const char* p;
Joshua Haberman9abf6e22021-01-13 12:16:25 -08005445
5446 if (fullname == NULL) {
5447 return NULL;
5448 } else if ((p = strrchr(fullname, '.')) == NULL) {
5449 /* No '.' in the name, return the full string. */
5450 return fullname;
5451 } else {
5452 /* Return one past the last '.'. */
5453 return p + 1;
5454 }
5455}
5456
5457/* All submessage fields are lower than all other fields.
5458 * Secondly, fields are increasing in order. */
Joshua Habermanf41049a2022-01-21 14:41:25 -08005459uint32_t field_rank(const upb_FieldDef* f) {
5460 uint32_t ret = upb_FieldDef_Number(f);
Joshua Haberman9abf6e22021-01-13 12:16:25 -08005461 const uint32_t high_bit = 1 << 30;
5462 UPB_ASSERT(ret < high_bit);
Joshua Habermanf41049a2022-01-21 14:41:25 -08005463 if (!upb_FieldDef_IsSubMessage(f)) ret |= high_bit;
Joshua Haberman9abf6e22021-01-13 12:16:25 -08005464 return ret;
5465}
5466
Joshua Habermanf41049a2022-01-21 14:41:25 -08005467int cmp_fields(const void* p1, const void* p2) {
5468 const upb_FieldDef* f1 = *(upb_FieldDef* const*)p1;
5469 const upb_FieldDef* f2 = *(upb_FieldDef* const*)p2;
Joshua Haberman9abf6e22021-01-13 12:16:25 -08005470 return field_rank(f1) - field_rank(f2);
5471}
5472
Joshua Habermanf41049a2022-01-21 14:41:25 -08005473static void upb_Status_setoom(upb_Status* status) {
5474 upb_Status_SetErrorMessage(status, "out of memory");
Joshua Haberman9abf6e22021-01-13 12:16:25 -08005475}
5476
Joshua Habermanf41049a2022-01-21 14:41:25 -08005477static void assign_msg_wellknowntype(upb_MessageDef* m) {
5478 const char* name = upb_MessageDef_FullName(m);
Joshua Haberman9abf6e22021-01-13 12:16:25 -08005479 if (name == NULL) {
Joshua Habermanf41049a2022-01-21 14:41:25 -08005480 m->well_known_type = kUpb_WellKnown_Unspecified;
Joshua Haberman9abf6e22021-01-13 12:16:25 -08005481 return;
5482 }
5483 if (!strcmp(name, "google.protobuf.Any")) {
Joshua Habermanf41049a2022-01-21 14:41:25 -08005484 m->well_known_type = kUpb_WellKnown_Any;
Joshua Haberman9abf6e22021-01-13 12:16:25 -08005485 } else if (!strcmp(name, "google.protobuf.FieldMask")) {
Joshua Habermanf41049a2022-01-21 14:41:25 -08005486 m->well_known_type = kUpb_WellKnown_FieldMask;
Joshua Haberman9abf6e22021-01-13 12:16:25 -08005487 } else if (!strcmp(name, "google.protobuf.Duration")) {
Joshua Habermanf41049a2022-01-21 14:41:25 -08005488 m->well_known_type = kUpb_WellKnown_Duration;
Joshua Haberman9abf6e22021-01-13 12:16:25 -08005489 } else if (!strcmp(name, "google.protobuf.Timestamp")) {
Joshua Habermanf41049a2022-01-21 14:41:25 -08005490 m->well_known_type = kUpb_WellKnown_Timestamp;
Joshua Haberman9abf6e22021-01-13 12:16:25 -08005491 } else if (!strcmp(name, "google.protobuf.DoubleValue")) {
Joshua Habermanf41049a2022-01-21 14:41:25 -08005492 m->well_known_type = kUpb_WellKnown_DoubleValue;
Joshua Haberman9abf6e22021-01-13 12:16:25 -08005493 } else if (!strcmp(name, "google.protobuf.FloatValue")) {
Joshua Habermanf41049a2022-01-21 14:41:25 -08005494 m->well_known_type = kUpb_WellKnown_FloatValue;
Joshua Haberman9abf6e22021-01-13 12:16:25 -08005495 } else if (!strcmp(name, "google.protobuf.Int64Value")) {
Joshua Habermanf41049a2022-01-21 14:41:25 -08005496 m->well_known_type = kUpb_WellKnown_Int64Value;
Joshua Haberman9abf6e22021-01-13 12:16:25 -08005497 } else if (!strcmp(name, "google.protobuf.UInt64Value")) {
Joshua Habermanf41049a2022-01-21 14:41:25 -08005498 m->well_known_type = kUpb_WellKnown_UInt64Value;
Joshua Haberman9abf6e22021-01-13 12:16:25 -08005499 } else if (!strcmp(name, "google.protobuf.Int32Value")) {
Joshua Habermanf41049a2022-01-21 14:41:25 -08005500 m->well_known_type = kUpb_WellKnown_Int32Value;
Joshua Haberman9abf6e22021-01-13 12:16:25 -08005501 } else if (!strcmp(name, "google.protobuf.UInt32Value")) {
Joshua Habermanf41049a2022-01-21 14:41:25 -08005502 m->well_known_type = kUpb_WellKnown_UInt32Value;
Joshua Haberman9abf6e22021-01-13 12:16:25 -08005503 } else if (!strcmp(name, "google.protobuf.BoolValue")) {
Joshua Habermanf41049a2022-01-21 14:41:25 -08005504 m->well_known_type = kUpb_WellKnown_BoolValue;
Joshua Haberman9abf6e22021-01-13 12:16:25 -08005505 } else if (!strcmp(name, "google.protobuf.StringValue")) {
Joshua Habermanf41049a2022-01-21 14:41:25 -08005506 m->well_known_type = kUpb_WellKnown_StringValue;
Joshua Haberman9abf6e22021-01-13 12:16:25 -08005507 } else if (!strcmp(name, "google.protobuf.BytesValue")) {
Joshua Habermanf41049a2022-01-21 14:41:25 -08005508 m->well_known_type = kUpb_WellKnown_BytesValue;
Joshua Haberman9abf6e22021-01-13 12:16:25 -08005509 } else if (!strcmp(name, "google.protobuf.Value")) {
Joshua Habermanf41049a2022-01-21 14:41:25 -08005510 m->well_known_type = kUpb_WellKnown_Value;
Joshua Haberman9abf6e22021-01-13 12:16:25 -08005511 } else if (!strcmp(name, "google.protobuf.ListValue")) {
Joshua Habermanf41049a2022-01-21 14:41:25 -08005512 m->well_known_type = kUpb_WellKnown_ListValue;
Joshua Haberman9abf6e22021-01-13 12:16:25 -08005513 } else if (!strcmp(name, "google.protobuf.Struct")) {
Joshua Habermanf41049a2022-01-21 14:41:25 -08005514 m->well_known_type = kUpb_WellKnown_Struct;
Joshua Haberman9abf6e22021-01-13 12:16:25 -08005515 } else {
Joshua Habermanf41049a2022-01-21 14:41:25 -08005516 m->well_known_type = kUpb_WellKnown_Unspecified;
Joshua Haberman9abf6e22021-01-13 12:16:25 -08005517 }
5518}
5519
Joshua Habermanf41049a2022-01-21 14:41:25 -08005520/* upb_EnumDef ****************************************************************/
Joshua Haberman9abf6e22021-01-13 12:16:25 -08005521
Joshua Habermanf41049a2022-01-21 14:41:25 -08005522const google_protobuf_EnumOptions* upb_EnumDef_Options(const upb_EnumDef* e) {
5523 return e->opts;
Joshua Haberman9abf6e22021-01-13 12:16:25 -08005524}
5525
Joshua Habermanf41049a2022-01-21 14:41:25 -08005526bool upb_EnumDef_HasOptions(const upb_EnumDef* e) {
5527 return e->opts != (void*)opt_default;
5528}
5529
5530const char* upb_EnumDef_FullName(const upb_EnumDef* e) { return e->full_name; }
5531
5532const char* upb_EnumDef_Name(const upb_EnumDef* e) {
Joshua Haberman9abf6e22021-01-13 12:16:25 -08005533 return shortdefname(e->full_name);
5534}
5535
Joshua Habermanf41049a2022-01-21 14:41:25 -08005536const upb_FileDef* upb_EnumDef_File(const upb_EnumDef* e) { return e->file; }
5537
5538const upb_MessageDef* upb_EnumDef_ContainingType(const upb_EnumDef* e) {
5539 return e->containing_type;
Joshua Haberman9abf6e22021-01-13 12:16:25 -08005540}
5541
Joshua Habermanf41049a2022-01-21 14:41:25 -08005542int32_t upb_EnumDef_Default(const upb_EnumDef* e) {
5543 UPB_ASSERT(upb_EnumDef_FindValueByNumber(e, e->defaultval));
Joshua Haberman9abf6e22021-01-13 12:16:25 -08005544 return e->defaultval;
5545}
5546
Joshua Habermanf41049a2022-01-21 14:41:25 -08005547int upb_EnumDef_ValueCount(const upb_EnumDef* e) { return e->value_count; }
Joshua Haberman9abf6e22021-01-13 12:16:25 -08005548
Joshua Habermanf41049a2022-01-21 14:41:25 -08005549const upb_EnumValueDef* upb_EnumDef_FindValueByNameWithSize(
5550 const upb_EnumDef* def, const char* name, size_t len) {
Joshua Haberman9abf6e22021-01-13 12:16:25 -08005551 upb_value v;
Joshua Habermanf41049a2022-01-21 14:41:25 -08005552 return upb_strtable_lookup2(&def->ntoi, name, len, &v)
5553 ? upb_value_getconstptr(v)
5554 : NULL;
Joshua Haberman9abf6e22021-01-13 12:16:25 -08005555}
5556
Joshua Habermanf41049a2022-01-21 14:41:25 -08005557const upb_EnumValueDef* upb_EnumDef_FindValueByNumber(const upb_EnumDef* def,
5558 int32_t num) {
Joshua Haberman9abf6e22021-01-13 12:16:25 -08005559 upb_value v;
Joshua Habermanf41049a2022-01-21 14:41:25 -08005560 return upb_inttable_lookup(&def->iton, num, &v) ? upb_value_getconstptr(v)
5561 : NULL;
Joshua Haberman9abf6e22021-01-13 12:16:25 -08005562}
5563
Joshua Habermanf41049a2022-01-21 14:41:25 -08005564bool upb_EnumDef_CheckNumber(const upb_EnumDef* e, int32_t num) {
5565 // We could use upb_EnumDef_FindValueByNumber(e, num) != NULL, but we expect
5566 // this to be faster (especially for small numbers).
5567 return upb_MiniTable_Enum_CheckValue(e->layout, num);
Joshua Haberman9abf6e22021-01-13 12:16:25 -08005568}
5569
Joshua Habermanf41049a2022-01-21 14:41:25 -08005570const upb_EnumValueDef* upb_EnumDef_Value(const upb_EnumDef* e, int i) {
5571 UPB_ASSERT(0 <= i && i < e->value_count);
5572 return &e->values[i];
Joshua Haberman9abf6e22021-01-13 12:16:25 -08005573}
5574
Joshua Habermanf41049a2022-01-21 14:41:25 -08005575/* upb_EnumValueDef ***********************************************************/
Joshua Haberman9abf6e22021-01-13 12:16:25 -08005576
Joshua Habermanf41049a2022-01-21 14:41:25 -08005577const google_protobuf_EnumValueOptions* upb_EnumValueDef_Options(
5578 const upb_EnumValueDef* e) {
5579 return e->opts;
5580}
Joshua Haberman9abf6e22021-01-13 12:16:25 -08005581
Joshua Habermanf41049a2022-01-21 14:41:25 -08005582bool upb_EnumValueDef_HasOptions(const upb_EnumValueDef* e) {
5583 return e->opts != (void*)opt_default;
5584}
5585
5586const upb_EnumDef* upb_EnumValueDef_Enum(const upb_EnumValueDef* ev) {
5587 return ev->parent;
5588}
5589
5590const char* upb_EnumValueDef_FullName(const upb_EnumValueDef* ev) {
5591 return ev->full_name;
5592}
5593
5594const char* upb_EnumValueDef_Name(const upb_EnumValueDef* ev) {
5595 return shortdefname(ev->full_name);
5596}
5597
5598int32_t upb_EnumValueDef_Number(const upb_EnumValueDef* ev) {
5599 return ev->number;
5600}
5601
5602uint32_t upb_EnumValueDef_Index(const upb_EnumValueDef* ev) {
5603 // Compute index in our parent's array.
5604 return ev - ev->parent->values;
5605}
5606
5607/* upb_ExtensionRange
5608 * ***************************************************************/
5609
5610const google_protobuf_ExtensionRangeOptions* upb_ExtensionRange_Options(
5611 const upb_ExtensionRange* r) {
5612 return r->opts;
5613}
5614
5615bool upb_ExtensionRange_HasOptions(const upb_ExtensionRange* r) {
5616 return r->opts != (void*)opt_default;
5617}
5618
5619int32_t upb_ExtensionRange_Start(const upb_ExtensionRange* e) {
5620 return e->start;
5621}
5622
5623int32_t upb_ExtensionRange_End(const upb_ExtensionRange* e) { return e->end; }
5624
5625/* upb_FieldDef ***************************************************************/
5626
5627const google_protobuf_FieldOptions* upb_FieldDef_Options(
5628 const upb_FieldDef* f) {
5629 return f->opts;
5630}
5631
5632bool upb_FieldDef_HasOptions(const upb_FieldDef* f) {
5633 return f->opts != (void*)opt_default;
5634}
5635
5636const char* upb_FieldDef_FullName(const upb_FieldDef* f) {
Joshua Haberman9abf6e22021-01-13 12:16:25 -08005637 return f->full_name;
5638}
5639
Joshua Habermanf41049a2022-01-21 14:41:25 -08005640upb_CType upb_FieldDef_CType(const upb_FieldDef* f) {
Joshua Haberman9abf6e22021-01-13 12:16:25 -08005641 switch (f->type_) {
Joshua Habermanf41049a2022-01-21 14:41:25 -08005642 case kUpb_FieldType_Double:
5643 return kUpb_CType_Double;
5644 case kUpb_FieldType_Float:
5645 return kUpb_CType_Float;
5646 case kUpb_FieldType_Int64:
5647 case kUpb_FieldType_SInt64:
5648 case kUpb_FieldType_SFixed64:
5649 return kUpb_CType_Int64;
5650 case kUpb_FieldType_Int32:
5651 case kUpb_FieldType_SFixed32:
5652 case kUpb_FieldType_SInt32:
5653 return kUpb_CType_Int32;
5654 case kUpb_FieldType_UInt64:
5655 case kUpb_FieldType_Fixed64:
5656 return kUpb_CType_UInt64;
5657 case kUpb_FieldType_UInt32:
5658 case kUpb_FieldType_Fixed32:
5659 return kUpb_CType_UInt32;
5660 case kUpb_FieldType_Enum:
5661 return kUpb_CType_Enum;
5662 case kUpb_FieldType_Bool:
5663 return kUpb_CType_Bool;
5664 case kUpb_FieldType_String:
5665 return kUpb_CType_String;
5666 case kUpb_FieldType_Bytes:
5667 return kUpb_CType_Bytes;
5668 case kUpb_FieldType_Group:
5669 case kUpb_FieldType_Message:
5670 return kUpb_CType_Message;
Joshua Haberman9abf6e22021-01-13 12:16:25 -08005671 }
5672 UPB_UNREACHABLE();
5673}
5674
Joshua Habermanf41049a2022-01-21 14:41:25 -08005675upb_FieldType upb_FieldDef_Type(const upb_FieldDef* f) { return f->type_; }
Joshua Haberman9abf6e22021-01-13 12:16:25 -08005676
Joshua Habermanf41049a2022-01-21 14:41:25 -08005677uint32_t upb_FieldDef_Index(const upb_FieldDef* f) { return f->index_; }
Joshua Haberman9abf6e22021-01-13 12:16:25 -08005678
Joshua Habermanf41049a2022-01-21 14:41:25 -08005679upb_Label upb_FieldDef_Label(const upb_FieldDef* f) { return f->label_; }
Joshua Haberman9abf6e22021-01-13 12:16:25 -08005680
Joshua Habermanf41049a2022-01-21 14:41:25 -08005681uint32_t upb_FieldDef_Number(const upb_FieldDef* f) { return f->number_; }
Joshua Haberman9abf6e22021-01-13 12:16:25 -08005682
Joshua Habermanf41049a2022-01-21 14:41:25 -08005683bool upb_FieldDef_IsExtension(const upb_FieldDef* f) {
Joshua Haberman9abf6e22021-01-13 12:16:25 -08005684 return f->is_extension_;
5685}
5686
Joshua Habermanf41049a2022-01-21 14:41:25 -08005687bool upb_FieldDef_IsPacked(const upb_FieldDef* f) { return f->packed_; }
Joshua Haberman9abf6e22021-01-13 12:16:25 -08005688
Joshua Habermanf41049a2022-01-21 14:41:25 -08005689const char* upb_FieldDef_Name(const upb_FieldDef* f) {
Joshua Haberman9abf6e22021-01-13 12:16:25 -08005690 return shortdefname(f->full_name);
5691}
5692
Joshua Habermanf41049a2022-01-21 14:41:25 -08005693const char* upb_FieldDef_JsonName(const upb_FieldDef* f) {
Joshua Haberman9abf6e22021-01-13 12:16:25 -08005694 return f->json_name;
5695}
5696
Joshua Habermanf41049a2022-01-21 14:41:25 -08005697bool upb_FieldDef_HasJsonName(const upb_FieldDef* f) {
5698 return f->has_json_name_;
Joshua Haberman9abf6e22021-01-13 12:16:25 -08005699}
5700
Joshua Habermanf41049a2022-01-21 14:41:25 -08005701const upb_FileDef* upb_FieldDef_File(const upb_FieldDef* f) { return f->file; }
5702
5703const upb_MessageDef* upb_FieldDef_ContainingType(const upb_FieldDef* f) {
Joshua Haberman9abf6e22021-01-13 12:16:25 -08005704 return f->msgdef;
5705}
5706
Joshua Habermanf41049a2022-01-21 14:41:25 -08005707const upb_MessageDef* upb_FieldDef_ExtensionScope(const upb_FieldDef* f) {
5708 return f->is_extension_ ? f->scope.extension_scope : NULL;
Joshua Haberman9abf6e22021-01-13 12:16:25 -08005709}
5710
Joshua Habermanf41049a2022-01-21 14:41:25 -08005711const upb_OneofDef* upb_FieldDef_ContainingOneof(const upb_FieldDef* f) {
5712 return f->is_extension_ ? NULL : f->scope.oneof;
Joshua Haberman9abf6e22021-01-13 12:16:25 -08005713}
5714
Joshua Habermanf41049a2022-01-21 14:41:25 -08005715const upb_OneofDef* upb_FieldDef_RealContainingOneof(const upb_FieldDef* f) {
5716 const upb_OneofDef* oneof = upb_FieldDef_ContainingOneof(f);
5717 if (!oneof || upb_OneofDef_IsSynthetic(oneof)) return NULL;
5718 return oneof;
5719}
5720
5721upb_MessageValue upb_FieldDef_Default(const upb_FieldDef* f) {
5722 UPB_ASSERT(!upb_FieldDef_IsSubMessage(f));
5723 upb_MessageValue ret;
5724
5725 switch (upb_FieldDef_CType(f)) {
5726 case kUpb_CType_Bool:
5727 return (upb_MessageValue){.bool_val = f->defaultval.boolean};
5728 case kUpb_CType_Int64:
5729 return (upb_MessageValue){.int64_val = f->defaultval.sint};
5730 case kUpb_CType_UInt64:
5731 return (upb_MessageValue){.uint64_val = f->defaultval.uint};
5732 case kUpb_CType_Enum:
5733 case kUpb_CType_Int32:
5734 return (upb_MessageValue){.int32_val = (int32_t)f->defaultval.sint};
5735 case kUpb_CType_UInt32:
5736 return (upb_MessageValue){.uint32_val = (uint32_t)f->defaultval.uint};
5737 case kUpb_CType_Float:
5738 return (upb_MessageValue){.float_val = f->defaultval.flt};
5739 case kUpb_CType_Double:
5740 return (upb_MessageValue){.double_val = f->defaultval.dbl};
5741 case kUpb_CType_String:
5742 case kUpb_CType_Bytes: {
5743 str_t* str = f->defaultval.str;
5744 if (str) {
5745 return (upb_MessageValue){
5746 .str_val = (upb_StringView){.data = str->str, .size = str->len}};
5747 } else {
5748 return (upb_MessageValue){
5749 .str_val = (upb_StringView){.data = NULL, .size = 0}};
5750 }
Joshua Haberman9abf6e22021-01-13 12:16:25 -08005751 }
Joshua Habermanf41049a2022-01-21 14:41:25 -08005752 default:
5753 UPB_UNREACHABLE();
Joshua Haberman9abf6e22021-01-13 12:16:25 -08005754 }
Joshua Habermanf41049a2022-01-21 14:41:25 -08005755
Joshua Haberman9abf6e22021-01-13 12:16:25 -08005756 return ret;
5757}
5758
Joshua Habermanf41049a2022-01-21 14:41:25 -08005759const upb_MessageDef* upb_FieldDef_MessageSubDef(const upb_FieldDef* f) {
5760 return upb_FieldDef_CType(f) == kUpb_CType_Message ? f->sub.msgdef : NULL;
Joshua Haberman9abf6e22021-01-13 12:16:25 -08005761}
5762
Joshua Habermanf41049a2022-01-21 14:41:25 -08005763const upb_EnumDef* upb_FieldDef_EnumSubDef(const upb_FieldDef* f) {
5764 return upb_FieldDef_CType(f) == kUpb_CType_Enum ? f->sub.enumdef : NULL;
Joshua Haberman9abf6e22021-01-13 12:16:25 -08005765}
5766
Joshua Habermanf41049a2022-01-21 14:41:25 -08005767const upb_MiniTable_Field* upb_FieldDef_MiniTable(const upb_FieldDef* f) {
5768 UPB_ASSERT(!upb_FieldDef_IsExtension(f));
Joshua Haberman9abf6e22021-01-13 12:16:25 -08005769 return &f->msgdef->layout->fields[f->layout_index];
5770}
5771
Joshua Habermanf41049a2022-01-21 14:41:25 -08005772const upb_MiniTable_Extension* _upb_FieldDef_ExtensionMiniTable(
5773 const upb_FieldDef* f) {
5774 UPB_ASSERT(upb_FieldDef_IsExtension(f));
5775 return f->file->ext_layouts[f->layout_index];
Joshua Haberman9abf6e22021-01-13 12:16:25 -08005776}
5777
Joshua Habermanf41049a2022-01-21 14:41:25 -08005778bool _upb_FieldDef_IsProto3Optional(const upb_FieldDef* f) {
5779 return f->proto3_optional_;
Joshua Haberman9abf6e22021-01-13 12:16:25 -08005780}
5781
Joshua Habermanf41049a2022-01-21 14:41:25 -08005782bool upb_FieldDef_IsSubMessage(const upb_FieldDef* f) {
5783 return upb_FieldDef_CType(f) == kUpb_CType_Message;
Joshua Haberman9abf6e22021-01-13 12:16:25 -08005784}
5785
Joshua Habermanf41049a2022-01-21 14:41:25 -08005786bool upb_FieldDef_IsString(const upb_FieldDef* f) {
5787 return upb_FieldDef_CType(f) == kUpb_CType_String ||
5788 upb_FieldDef_CType(f) == kUpb_CType_Bytes;
Joshua Haberman9abf6e22021-01-13 12:16:25 -08005789}
5790
Joshua Habermanf41049a2022-01-21 14:41:25 -08005791bool upb_FieldDef_IsRepeated(const upb_FieldDef* f) {
5792 return upb_FieldDef_Label(f) == kUpb_Label_Repeated;
Joshua Haberman9abf6e22021-01-13 12:16:25 -08005793}
5794
Joshua Habermanf41049a2022-01-21 14:41:25 -08005795bool upb_FieldDef_IsPrimitive(const upb_FieldDef* f) {
5796 return !upb_FieldDef_IsString(f) && !upb_FieldDef_IsSubMessage(f);
Joshua Haberman9abf6e22021-01-13 12:16:25 -08005797}
5798
Joshua Habermanf41049a2022-01-21 14:41:25 -08005799bool upb_FieldDef_IsMap(const upb_FieldDef* f) {
5800 return upb_FieldDef_IsRepeated(f) && upb_FieldDef_IsSubMessage(f) &&
5801 upb_MessageDef_IsMapEntry(upb_FieldDef_MessageSubDef(f));
5802}
5803
5804bool upb_FieldDef_HasDefault(const upb_FieldDef* f) { return f->has_default; }
5805
5806bool upb_FieldDef_HasSubDef(const upb_FieldDef* f) {
5807 return upb_FieldDef_IsSubMessage(f) ||
5808 upb_FieldDef_CType(f) == kUpb_CType_Enum;
5809}
5810
5811bool upb_FieldDef_HasPresence(const upb_FieldDef* f) {
5812 if (upb_FieldDef_IsRepeated(f)) return false;
5813 return upb_FieldDef_IsSubMessage(f) || upb_FieldDef_ContainingOneof(f) ||
5814 f->file->syntax == kUpb_Syntax_Proto2;
Joshua Haberman9abf6e22021-01-13 12:16:25 -08005815}
5816
5817static bool between(int32_t x, int32_t low, int32_t high) {
5818 return x >= low && x <= high;
5819}
5820
Joshua Habermanf41049a2022-01-21 14:41:25 -08005821bool upb_FieldDef_checklabel(int32_t label) { return between(label, 1, 3); }
5822bool upb_FieldDef_checktype(int32_t type) { return between(type, 1, 11); }
5823bool upb_FieldDef_checkintfmt(int32_t fmt) { return between(fmt, 1, 3); }
Joshua Haberman9abf6e22021-01-13 12:16:25 -08005824
Joshua Habermanf41049a2022-01-21 14:41:25 -08005825bool upb_FieldDef_checkdescriptortype(int32_t type) {
Joshua Haberman9abf6e22021-01-13 12:16:25 -08005826 return between(type, 1, 18);
5827}
5828
Joshua Habermanf41049a2022-01-21 14:41:25 -08005829/* upb_MessageDef
5830 * *****************************************************************/
Joshua Haberman9abf6e22021-01-13 12:16:25 -08005831
Joshua Habermanf41049a2022-01-21 14:41:25 -08005832const google_protobuf_MessageOptions* upb_MessageDef_Options(
5833 const upb_MessageDef* m) {
5834 return m->opts;
5835}
5836
5837bool upb_MessageDef_HasOptions(const upb_MessageDef* m) {
5838 return m->opts != (void*)opt_default;
5839}
5840
5841const char* upb_MessageDef_FullName(const upb_MessageDef* m) {
Joshua Haberman9abf6e22021-01-13 12:16:25 -08005842 return m->full_name;
5843}
5844
Joshua Habermanf41049a2022-01-21 14:41:25 -08005845const upb_FileDef* upb_MessageDef_File(const upb_MessageDef* m) {
Joshua Haberman9abf6e22021-01-13 12:16:25 -08005846 return m->file;
5847}
5848
Joshua Habermanf41049a2022-01-21 14:41:25 -08005849const upb_MessageDef* upb_MessageDef_ContainingType(const upb_MessageDef* m) {
5850 return m->containing_type;
5851}
5852
5853const char* upb_MessageDef_Name(const upb_MessageDef* m) {
Joshua Haberman9abf6e22021-01-13 12:16:25 -08005854 return shortdefname(m->full_name);
5855}
5856
Joshua Habermanf41049a2022-01-21 14:41:25 -08005857upb_Syntax upb_MessageDef_Syntax(const upb_MessageDef* m) {
Joshua Haberman9abf6e22021-01-13 12:16:25 -08005858 return m->file->syntax;
5859}
5860
Joshua Haberman7ecf43f2022-03-14 13:11:29 -07005861const upb_FieldDef* upb_MessageDef_FindFieldByNumber(const upb_MessageDef* m,
5862 uint32_t i) {
Joshua Haberman9abf6e22021-01-13 12:16:25 -08005863 upb_value val;
Joshua Habermandd69a482021-05-17 22:40:33 -07005864 return upb_inttable_lookup(&m->itof, i, &val) ? upb_value_getconstptr(val)
5865 : NULL;
Joshua Haberman9abf6e22021-01-13 12:16:25 -08005866}
5867
Joshua Habermanf41049a2022-01-21 14:41:25 -08005868const upb_FieldDef* upb_MessageDef_FindFieldByNameWithSize(
5869 const upb_MessageDef* m, const char* name, size_t len) {
Joshua Haberman9abf6e22021-01-13 12:16:25 -08005870 upb_value val;
5871
5872 if (!upb_strtable_lookup2(&m->ntof, name, len, &val)) {
5873 return NULL;
5874 }
5875
5876 return unpack_def(val, UPB_DEFTYPE_FIELD);
5877}
5878
Joshua Habermanf41049a2022-01-21 14:41:25 -08005879const upb_OneofDef* upb_MessageDef_FindOneofByNameWithSize(
5880 const upb_MessageDef* m, const char* name, size_t len) {
Joshua Haberman9abf6e22021-01-13 12:16:25 -08005881 upb_value val;
5882
5883 if (!upb_strtable_lookup2(&m->ntof, name, len, &val)) {
5884 return NULL;
5885 }
5886
5887 return unpack_def(val, UPB_DEFTYPE_ONEOF);
5888}
5889
Joshua Habermanf41049a2022-01-21 14:41:25 -08005890bool upb_MessageDef_FindByNameWithSize(const upb_MessageDef* m,
5891 const char* name, size_t len,
5892 const upb_FieldDef** out_f,
5893 const upb_OneofDef** out_o) {
Joshua Haberman9abf6e22021-01-13 12:16:25 -08005894 upb_value val;
5895
5896 if (!upb_strtable_lookup2(&m->ntof, name, len, &val)) {
5897 return false;
5898 }
5899
Joshua Habermanf41049a2022-01-21 14:41:25 -08005900 const upb_FieldDef* f = unpack_def(val, UPB_DEFTYPE_FIELD);
5901 const upb_OneofDef* o = unpack_def(val, UPB_DEFTYPE_ONEOF);
5902 if (out_f) *out_f = f;
5903 if (out_o) *out_o = o;
5904 return f || o; /* False if this was a JSON name. */
Joshua Haberman9abf6e22021-01-13 12:16:25 -08005905}
5906
Joshua Habermanf41049a2022-01-21 14:41:25 -08005907const upb_FieldDef* upb_MessageDef_FindByJsonNameWithSize(
5908 const upb_MessageDef* m, const char* name, size_t len) {
Joshua Haberman9abf6e22021-01-13 12:16:25 -08005909 upb_value val;
Joshua Habermanf41049a2022-01-21 14:41:25 -08005910 const upb_FieldDef* f;
Joshua Haberman9abf6e22021-01-13 12:16:25 -08005911
5912 if (!upb_strtable_lookup2(&m->ntof, name, len, &val)) {
5913 return NULL;
5914 }
5915
5916 f = unpack_def(val, UPB_DEFTYPE_FIELD);
5917 if (!f) f = unpack_def(val, UPB_DEFTYPE_FIELD_JSONNAME);
5918
5919 return f;
5920}
5921
Joshua Habermanf41049a2022-01-21 14:41:25 -08005922int upb_MessageDef_numfields(const upb_MessageDef* m) { return m->field_count; }
Joshua Haberman9abf6e22021-01-13 12:16:25 -08005923
Joshua Habermanf41049a2022-01-21 14:41:25 -08005924int upb_MessageDef_numoneofs(const upb_MessageDef* m) { return m->oneof_count; }
Joshua Haberman9abf6e22021-01-13 12:16:25 -08005925
Joshua Habermanf41049a2022-01-21 14:41:25 -08005926int upb_MessageDef_numrealoneofs(const upb_MessageDef* m) {
Joshua Haberman9abf6e22021-01-13 12:16:25 -08005927 return m->real_oneof_count;
5928}
5929
Joshua Habermanf41049a2022-01-21 14:41:25 -08005930int upb_MessageDef_ExtensionRangeCount(const upb_MessageDef* m) {
5931 return m->ext_range_count;
5932}
5933
5934int upb_MessageDef_FieldCount(const upb_MessageDef* m) {
Joshua Haberman9abf6e22021-01-13 12:16:25 -08005935 return m->field_count;
5936}
5937
Joshua Habermanf41049a2022-01-21 14:41:25 -08005938int upb_MessageDef_OneofCount(const upb_MessageDef* m) {
Joshua Haberman9abf6e22021-01-13 12:16:25 -08005939 return m->oneof_count;
5940}
5941
Joshua Habermanf41049a2022-01-21 14:41:25 -08005942int upb_MessageDef_NestedMessageCount(const upb_MessageDef* m) {
5943 return m->nested_msg_count;
5944}
5945
5946int upb_MessageDef_NestedEnumCount(const upb_MessageDef* m) {
5947 return m->nested_enum_count;
5948}
5949
5950int upb_MessageDef_NestedExtensionCount(const upb_MessageDef* m) {
5951 return m->nested_ext_count;
5952}
5953
5954int upb_MessageDef_realoneofcount(const upb_MessageDef* m) {
Joshua Haberman9abf6e22021-01-13 12:16:25 -08005955 return m->real_oneof_count;
5956}
5957
Joshua Habermanf41049a2022-01-21 14:41:25 -08005958const upb_MiniTable* upb_MessageDef_MiniTable(const upb_MessageDef* m) {
Joshua Haberman9abf6e22021-01-13 12:16:25 -08005959 return m->layout;
5960}
5961
Joshua Habermanf41049a2022-01-21 14:41:25 -08005962const upb_ExtensionRange* upb_MessageDef_ExtensionRange(const upb_MessageDef* m,
5963 int i) {
5964 UPB_ASSERT(0 <= i && i < m->ext_range_count);
5965 return &m->ext_ranges[i];
5966}
5967
5968const upb_FieldDef* upb_MessageDef_Field(const upb_MessageDef* m, int i) {
5969 UPB_ASSERT(0 <= i && i < m->field_count);
Joshua Haberman9abf6e22021-01-13 12:16:25 -08005970 return &m->fields[i];
5971}
5972
Joshua Habermanf41049a2022-01-21 14:41:25 -08005973const upb_OneofDef* upb_MessageDef_Oneof(const upb_MessageDef* m, int i) {
5974 UPB_ASSERT(0 <= i && i < m->oneof_count);
Joshua Haberman9abf6e22021-01-13 12:16:25 -08005975 return &m->oneofs[i];
5976}
5977
Joshua Habermanf41049a2022-01-21 14:41:25 -08005978const upb_MessageDef* upb_MessageDef_NestedMessage(const upb_MessageDef* m,
5979 int i) {
5980 UPB_ASSERT(0 <= i && i < m->nested_msg_count);
5981 return &m->nested_msgs[i];
Joshua Haberman9abf6e22021-01-13 12:16:25 -08005982}
5983
Joshua Habermanf41049a2022-01-21 14:41:25 -08005984const upb_EnumDef* upb_MessageDef_NestedEnum(const upb_MessageDef* m, int i) {
5985 UPB_ASSERT(0 <= i && i < m->nested_enum_count);
5986 return &m->nested_enums[i];
5987}
5988
5989const upb_FieldDef* upb_MessageDef_NestedExtension(const upb_MessageDef* m,
5990 int i) {
5991 UPB_ASSERT(0 <= i && i < m->nested_ext_count);
5992 return &m->nested_exts[i];
5993}
5994
5995upb_WellKnown upb_MessageDef_WellKnownType(const upb_MessageDef* m) {
Joshua Haberman9abf6e22021-01-13 12:16:25 -08005996 return m->well_known_type;
5997}
5998
Joshua Habermanf41049a2022-01-21 14:41:25 -08005999/* upb_OneofDef ***************************************************************/
6000
6001const google_protobuf_OneofOptions* upb_OneofDef_Options(
6002 const upb_OneofDef* o) {
6003 return o->opts;
Joshua Haberman9abf6e22021-01-13 12:16:25 -08006004}
6005
Joshua Habermanf41049a2022-01-21 14:41:25 -08006006bool upb_OneofDef_HasOptions(const upb_OneofDef* o) {
6007 return o->opts != (void*)opt_default;
Joshua Haberman9abf6e22021-01-13 12:16:25 -08006008}
6009
Joshua Habermanf41049a2022-01-21 14:41:25 -08006010const char* upb_OneofDef_Name(const upb_OneofDef* o) {
Joshua Haberman9abf6e22021-01-13 12:16:25 -08006011 return shortdefname(o->full_name);
6012}
6013
Joshua Habermanf41049a2022-01-21 14:41:25 -08006014const upb_MessageDef* upb_OneofDef_ContainingType(const upb_OneofDef* o) {
Joshua Haberman9abf6e22021-01-13 12:16:25 -08006015 return o->parent;
6016}
6017
Joshua Habermanf41049a2022-01-21 14:41:25 -08006018int upb_OneofDef_FieldCount(const upb_OneofDef* o) { return o->field_count; }
Joshua Haberman9abf6e22021-01-13 12:16:25 -08006019
Joshua Habermanf41049a2022-01-21 14:41:25 -08006020const upb_FieldDef* upb_OneofDef_Field(const upb_OneofDef* o, int i) {
Joshua Haberman9abf6e22021-01-13 12:16:25 -08006021 UPB_ASSERT(i < o->field_count);
6022 return o->fields[i];
6023}
6024
Joshua Habermanf41049a2022-01-21 14:41:25 -08006025int upb_OneofDef_numfields(const upb_OneofDef* o) { return o->field_count; }
Joshua Haberman9abf6e22021-01-13 12:16:25 -08006026
Joshua Habermanf41049a2022-01-21 14:41:25 -08006027uint32_t upb_OneofDef_Index(const upb_OneofDef* o) {
6028 // Compute index in our parent's array.
Joshua Haberman9abf6e22021-01-13 12:16:25 -08006029 return o - o->parent->oneofs;
6030}
6031
Joshua Habermanf41049a2022-01-21 14:41:25 -08006032bool upb_OneofDef_IsSynthetic(const upb_OneofDef* o) { return o->synthetic; }
Joshua Haberman9abf6e22021-01-13 12:16:25 -08006033
Joshua Habermanf41049a2022-01-21 14:41:25 -08006034const upb_FieldDef* upb_OneofDef_LookupNameWithSize(const upb_OneofDef* o,
6035 const char* name,
6036 size_t length) {
Joshua Haberman9abf6e22021-01-13 12:16:25 -08006037 upb_value val;
Joshua Habermanf41049a2022-01-21 14:41:25 -08006038 return upb_strtable_lookup2(&o->ntof, name, length, &val)
6039 ? upb_value_getptr(val)
6040 : NULL;
Joshua Haberman9abf6e22021-01-13 12:16:25 -08006041}
6042
Joshua Habermanf41049a2022-01-21 14:41:25 -08006043const upb_FieldDef* upb_OneofDef_LookupNumber(const upb_OneofDef* o,
6044 uint32_t num) {
Joshua Haberman9abf6e22021-01-13 12:16:25 -08006045 upb_value val;
Joshua Habermandd69a482021-05-17 22:40:33 -07006046 return upb_inttable_lookup(&o->itof, num, &val) ? upb_value_getptr(val)
6047 : NULL;
Joshua Haberman9abf6e22021-01-13 12:16:25 -08006048}
6049
Joshua Habermanf41049a2022-01-21 14:41:25 -08006050/* upb_FileDef ****************************************************************/
6051
6052const google_protobuf_FileOptions* upb_FileDef_Options(const upb_FileDef* f) {
6053 return f->opts;
Joshua Haberman9abf6e22021-01-13 12:16:25 -08006054}
6055
Joshua Habermanf41049a2022-01-21 14:41:25 -08006056bool upb_FileDef_HasOptions(const upb_FileDef* f) {
6057 return f->opts != (void*)opt_default;
Joshua Haberman9abf6e22021-01-13 12:16:25 -08006058}
6059
Joshua Habermanf41049a2022-01-21 14:41:25 -08006060const char* upb_FileDef_Name(const upb_FileDef* f) { return f->name; }
6061
6062const char* upb_FileDef_Package(const upb_FileDef* f) { return f->package; }
6063
6064upb_Syntax upb_FileDef_Syntax(const upb_FileDef* f) { return f->syntax; }
6065
6066int upb_FileDef_TopLevelMessageCount(const upb_FileDef* f) {
6067 return f->top_lvl_msg_count;
Joshua Haberman9abf6e22021-01-13 12:16:25 -08006068}
6069
Joshua Habermanf41049a2022-01-21 14:41:25 -08006070int upb_FileDef_DependencyCount(const upb_FileDef* f) { return f->dep_count; }
6071
6072int upb_FileDef_PublicDependencyCount(const upb_FileDef* f) {
6073 return f->public_dep_count;
Joshua Haberman9abf6e22021-01-13 12:16:25 -08006074}
6075
Joshua Habermanf41049a2022-01-21 14:41:25 -08006076int upb_FileDef_WeakDependencyCount(const upb_FileDef* f) {
6077 return f->weak_dep_count;
Joshua Haberman9abf6e22021-01-13 12:16:25 -08006078}
6079
Joshua Habermanf41049a2022-01-21 14:41:25 -08006080const int32_t* _upb_FileDef_PublicDependencyIndexes(const upb_FileDef* f) {
6081 return f->public_deps;
Joshua Haberman9abf6e22021-01-13 12:16:25 -08006082}
6083
Joshua Habermanf41049a2022-01-21 14:41:25 -08006084const int32_t* _upb_FileDef_WeakDependencyIndexes(const upb_FileDef* f) {
6085 return f->weak_deps;
Joshua Haberman9abf6e22021-01-13 12:16:25 -08006086}
6087
Joshua Habermanf41049a2022-01-21 14:41:25 -08006088int upb_FileDef_TopLevelEnumCount(const upb_FileDef* f) {
6089 return f->top_lvl_enum_count;
Joshua Haberman9abf6e22021-01-13 12:16:25 -08006090}
6091
Joshua Habermanf41049a2022-01-21 14:41:25 -08006092int upb_FileDef_TopLevelExtensionCount(const upb_FileDef* f) {
6093 return f->top_lvl_ext_count;
Joshua Haberman9abf6e22021-01-13 12:16:25 -08006094}
6095
Joshua Habermanf41049a2022-01-21 14:41:25 -08006096int upb_FileDef_ServiceCount(const upb_FileDef* f) { return f->service_count; }
6097
6098const upb_FileDef* upb_FileDef_Dependency(const upb_FileDef* f, int i) {
6099 UPB_ASSERT(0 <= i && i < f->dep_count);
6100 return f->deps[i];
Joshua Haberman9abf6e22021-01-13 12:16:25 -08006101}
6102
Joshua Habermanf41049a2022-01-21 14:41:25 -08006103const upb_FileDef* upb_FileDef_PublicDependency(const upb_FileDef* f, int i) {
6104 UPB_ASSERT(0 <= i && i < f->public_dep_count);
6105 return f->deps[f->public_deps[i]];
Joshua Haberman9abf6e22021-01-13 12:16:25 -08006106}
6107
Joshua Habermanf41049a2022-01-21 14:41:25 -08006108const upb_FileDef* upb_FileDef_WeakDependency(const upb_FileDef* f, int i) {
6109 UPB_ASSERT(0 <= i && i < f->public_dep_count);
6110 return f->deps[f->weak_deps[i]];
Joshua Haberman9abf6e22021-01-13 12:16:25 -08006111}
6112
Joshua Habermanf41049a2022-01-21 14:41:25 -08006113const upb_MessageDef* upb_FileDef_TopLevelMessage(const upb_FileDef* f, int i) {
6114 UPB_ASSERT(0 <= i && i < f->top_lvl_msg_count);
6115 return &f->top_lvl_msgs[i];
Joshua Haberman9abf6e22021-01-13 12:16:25 -08006116}
6117
Joshua Habermanf41049a2022-01-21 14:41:25 -08006118const upb_EnumDef* upb_FileDef_TopLevelEnum(const upb_FileDef* f, int i) {
6119 UPB_ASSERT(0 <= i && i < f->top_lvl_enum_count);
6120 return &f->top_lvl_enums[i];
Joshua Haberman9abf6e22021-01-13 12:16:25 -08006121}
6122
Joshua Habermanf41049a2022-01-21 14:41:25 -08006123const upb_FieldDef* upb_FileDef_TopLevelExtension(const upb_FileDef* f, int i) {
6124 UPB_ASSERT(0 <= i && i < f->top_lvl_ext_count);
6125 return &f->top_lvl_exts[i];
Joshua Haberman9abf6e22021-01-13 12:16:25 -08006126}
6127
Joshua Habermanf41049a2022-01-21 14:41:25 -08006128const upb_ServiceDef* upb_FileDef_Service(const upb_FileDef* f, int i) {
6129 UPB_ASSERT(0 <= i && i < f->service_count);
6130 return &f->services[i];
Joshua Haberman9abf6e22021-01-13 12:16:25 -08006131}
6132
Joshua Habermanf41049a2022-01-21 14:41:25 -08006133const upb_DefPool* upb_FileDef_Pool(const upb_FileDef* f) { return f->symtab; }
6134
6135/* upb_MethodDef **************************************************************/
6136
6137const google_protobuf_MethodOptions* upb_MethodDef_Options(
6138 const upb_MethodDef* m) {
6139 return m->opts;
Joshua Haberman9abf6e22021-01-13 12:16:25 -08006140}
6141
Joshua Habermanf41049a2022-01-21 14:41:25 -08006142bool upb_MethodDef_HasOptions(const upb_MethodDef* m) {
6143 return m->opts != (void*)opt_default;
6144}
6145
6146const char* upb_MethodDef_FullName(const upb_MethodDef* m) {
6147 return m->full_name;
6148}
6149
Joshua Haberman7ecf43f2022-03-14 13:11:29 -07006150int upb_MethodDef_Index(const upb_MethodDef* m) { return m->index; }
6151
Joshua Habermanf41049a2022-01-21 14:41:25 -08006152const char* upb_MethodDef_Name(const upb_MethodDef* m) {
6153 return shortdefname(m->full_name);
6154}
6155
6156const upb_ServiceDef* upb_MethodDef_Service(const upb_MethodDef* m) {
6157 return m->service;
6158}
6159
6160const upb_MessageDef* upb_MethodDef_InputType(const upb_MethodDef* m) {
6161 return m->input_type;
6162}
6163
6164const upb_MessageDef* upb_MethodDef_OutputType(const upb_MethodDef* m) {
6165 return m->output_type;
6166}
6167
6168bool upb_MethodDef_ClientStreaming(const upb_MethodDef* m) {
6169 return m->client_streaming;
6170}
6171
6172bool upb_MethodDef_ServerStreaming(const upb_MethodDef* m) {
6173 return m->server_streaming;
6174}
6175
6176/* upb_ServiceDef *************************************************************/
6177
6178const google_protobuf_ServiceOptions* upb_ServiceDef_Options(
6179 const upb_ServiceDef* s) {
6180 return s->opts;
6181}
6182
6183bool upb_ServiceDef_HasOptions(const upb_ServiceDef* s) {
6184 return s->opts != (void*)opt_default;
6185}
6186
6187const char* upb_ServiceDef_FullName(const upb_ServiceDef* s) {
6188 return s->full_name;
6189}
6190
6191const char* upb_ServiceDef_Name(const upb_ServiceDef* s) {
6192 return shortdefname(s->full_name);
6193}
6194
6195int upb_ServiceDef_Index(const upb_ServiceDef* s) { return s->index; }
6196
6197const upb_FileDef* upb_ServiceDef_File(const upb_ServiceDef* s) {
6198 return s->file;
6199}
6200
6201int upb_ServiceDef_MethodCount(const upb_ServiceDef* s) {
6202 return s->method_count;
6203}
6204
6205const upb_MethodDef* upb_ServiceDef_Method(const upb_ServiceDef* s, int i) {
6206 return i < 0 || i >= s->method_count ? NULL : &s->methods[i];
6207}
6208
6209const upb_MethodDef* upb_ServiceDef_FindMethodByName(const upb_ServiceDef* s,
6210 const char* name) {
6211 for (int i = 0; i < s->method_count; i++) {
6212 if (strcmp(name, upb_MethodDef_Name(&s->methods[i])) == 0) {
6213 return &s->methods[i];
6214 }
6215 }
6216 return NULL;
6217}
6218
6219/* upb_DefPool ****************************************************************/
6220
6221void upb_DefPool_Free(upb_DefPool* s) {
6222 upb_Arena_Free(s->arena);
Joshua Haberman9abf6e22021-01-13 12:16:25 -08006223 upb_gfree(s);
6224}
6225
Joshua Habermanf41049a2022-01-21 14:41:25 -08006226upb_DefPool* upb_DefPool_New(void) {
6227 upb_DefPool* s = upb_gmalloc(sizeof(*s));
Joshua Haberman9abf6e22021-01-13 12:16:25 -08006228
6229 if (!s) {
6230 return NULL;
6231 }
6232
Joshua Habermanf41049a2022-01-21 14:41:25 -08006233 s->arena = upb_Arena_New();
Joshua Haberman9abf6e22021-01-13 12:16:25 -08006234 s->bytes_loaded = 0;
Joshua Haberman9abf6e22021-01-13 12:16:25 -08006235
Joshua Habermandd69a482021-05-17 22:40:33 -07006236 if (!upb_strtable_init(&s->syms, 32, s->arena) ||
Joshua Habermanf41049a2022-01-21 14:41:25 -08006237 !upb_strtable_init(&s->files, 4, s->arena) ||
6238 !upb_inttable_init(&s->exts, s->arena)) {
6239 goto err;
Joshua Haberman9abf6e22021-01-13 12:16:25 -08006240 }
Joshua Habermanf41049a2022-01-21 14:41:25 -08006241
6242 s->extreg = upb_ExtensionRegistry_New(s->arena);
6243 if (!s->extreg) goto err;
Joshua Haberman9abf6e22021-01-13 12:16:25 -08006244 return s;
Joshua Habermanf41049a2022-01-21 14:41:25 -08006245
6246err:
6247 upb_Arena_Free(s->arena);
6248 upb_gfree(s);
6249 return NULL;
Joshua Haberman9abf6e22021-01-13 12:16:25 -08006250}
6251
Joshua Habermanf41049a2022-01-21 14:41:25 -08006252static const void* symtab_lookup(const upb_DefPool* s, const char* sym,
6253 upb_deftype_t type) {
Joshua Haberman9abf6e22021-01-13 12:16:25 -08006254 upb_value v;
Joshua Habermanf41049a2022-01-21 14:41:25 -08006255 return upb_strtable_lookup(&s->syms, sym, &v) ? unpack_def(v, type) : NULL;
Joshua Haberman9abf6e22021-01-13 12:16:25 -08006256}
6257
Joshua Habermanf41049a2022-01-21 14:41:25 -08006258static const void* symtab_lookup2(const upb_DefPool* s, const char* sym,
6259 size_t size, upb_deftype_t type) {
Joshua Haberman9abf6e22021-01-13 12:16:25 -08006260 upb_value v;
Joshua Habermanf41049a2022-01-21 14:41:25 -08006261 return upb_strtable_lookup2(&s->syms, sym, size, &v) ? unpack_def(v, type)
6262 : NULL;
Joshua Haberman9abf6e22021-01-13 12:16:25 -08006263}
6264
Joshua Habermanf41049a2022-01-21 14:41:25 -08006265const upb_MessageDef* upb_DefPool_FindMessageByName(const upb_DefPool* s,
6266 const char* sym) {
6267 return symtab_lookup(s, sym, UPB_DEFTYPE_MSG);
6268}
6269
6270const upb_MessageDef* upb_DefPool_FindMessageByNameWithSize(
6271 const upb_DefPool* s, const char* sym, size_t len) {
6272 return symtab_lookup2(s, sym, len, UPB_DEFTYPE_MSG);
6273}
6274
6275const upb_EnumDef* upb_DefPool_FindEnumByName(const upb_DefPool* s,
6276 const char* sym) {
6277 return symtab_lookup(s, sym, UPB_DEFTYPE_ENUM);
6278}
6279
6280const upb_EnumValueDef* upb_DefPool_FindEnumByNameval(const upb_DefPool* s,
6281 const char* sym) {
6282 return symtab_lookup(s, sym, UPB_DEFTYPE_ENUMVAL);
6283}
6284
6285const upb_FileDef* upb_DefPool_FindFileByName(const upb_DefPool* s,
6286 const char* name) {
Joshua Haberman9abf6e22021-01-13 12:16:25 -08006287 upb_value v;
Joshua Habermanf41049a2022-01-21 14:41:25 -08006288 return upb_strtable_lookup(&s->files, name, &v)
6289 ? unpack_def(v, UPB_DEFTYPE_FILE)
6290 : NULL;
Joshua Haberman9abf6e22021-01-13 12:16:25 -08006291}
6292
Joshua Habermanf41049a2022-01-21 14:41:25 -08006293const upb_FileDef* upb_DefPool_FindFileByNameWithSize(const upb_DefPool* s,
6294 const char* name,
6295 size_t len) {
Joshua Haberman9abf6e22021-01-13 12:16:25 -08006296 upb_value v;
Joshua Habermanf41049a2022-01-21 14:41:25 -08006297 return upb_strtable_lookup2(&s->files, name, len, &v)
6298 ? unpack_def(v, UPB_DEFTYPE_FILE)
6299 : NULL;
Joshua Haberman9abf6e22021-01-13 12:16:25 -08006300}
6301
Joshua Habermanf41049a2022-01-21 14:41:25 -08006302const upb_FieldDef* upb_DefPool_FindExtensionByNameWithSize(
6303 const upb_DefPool* s, const char* name, size_t size) {
Joshua Haberman9abf6e22021-01-13 12:16:25 -08006304 upb_value v;
Joshua Habermanf41049a2022-01-21 14:41:25 -08006305 if (!upb_strtable_lookup2(&s->syms, name, size, &v)) return NULL;
6306
6307 switch (deftype(v)) {
6308 case UPB_DEFTYPE_FIELD:
6309 return unpack_def(v, UPB_DEFTYPE_FIELD);
6310 case UPB_DEFTYPE_MSG: {
6311 const upb_MessageDef* m = unpack_def(v, UPB_DEFTYPE_MSG);
6312 return m->in_message_set ? &m->nested_exts[0] : NULL;
6313 }
6314 default:
6315 break;
6316 }
6317
6318 return NULL;
Joshua Haberman9abf6e22021-01-13 12:16:25 -08006319}
6320
Joshua Habermanf41049a2022-01-21 14:41:25 -08006321const upb_FieldDef* upb_DefPool_FindExtensionByName(const upb_DefPool* s,
6322 const char* sym) {
6323 return upb_DefPool_FindExtensionByNameWithSize(s, sym, strlen(sym));
6324}
6325
6326const upb_ServiceDef* upb_DefPool_FindServiceByName(const upb_DefPool* s,
6327 const char* name) {
6328 return symtab_lookup(s, name, UPB_DEFTYPE_SERVICE);
6329}
6330
6331const upb_ServiceDef* upb_DefPool_FindServiceByNameWithSize(
6332 const upb_DefPool* s, const char* name, size_t size) {
6333 return symtab_lookup2(s, name, size, UPB_DEFTYPE_SERVICE);
6334}
6335
6336const upb_FileDef* upb_DefPool_FindFileContainingSymbol(const upb_DefPool* s,
6337 const char* name) {
6338 upb_value v;
6339 // TODO(haberman): non-extension fields and oneofs.
6340 if (upb_strtable_lookup(&s->syms, name, &v)) {
6341 switch (deftype(v)) {
6342 case UPB_DEFTYPE_EXT: {
6343 const upb_FieldDef* f = unpack_def(v, UPB_DEFTYPE_EXT);
6344 return upb_FieldDef_File(f);
6345 }
6346 case UPB_DEFTYPE_MSG: {
6347 const upb_MessageDef* m = unpack_def(v, UPB_DEFTYPE_MSG);
6348 return upb_MessageDef_File(m);
6349 }
6350 case UPB_DEFTYPE_ENUM: {
6351 const upb_EnumDef* e = unpack_def(v, UPB_DEFTYPE_ENUM);
6352 return upb_EnumDef_File(e);
6353 }
6354 case UPB_DEFTYPE_ENUMVAL: {
6355 const upb_EnumValueDef* ev = unpack_def(v, UPB_DEFTYPE_ENUMVAL);
6356 return upb_EnumDef_File(upb_EnumValueDef_Enum(ev));
6357 }
6358 case UPB_DEFTYPE_SERVICE: {
6359 const upb_ServiceDef* service = unpack_def(v, UPB_DEFTYPE_SERVICE);
6360 return upb_ServiceDef_File(service);
6361 }
6362 default:
6363 UPB_UNREACHABLE();
6364 }
6365 }
6366
6367 const char* last_dot = strrchr(name, '.');
6368 if (last_dot) {
6369 const upb_MessageDef* parent =
6370 upb_DefPool_FindMessageByNameWithSize(s, name, last_dot - name);
6371 if (parent) {
6372 const char* shortname = last_dot + 1;
6373 if (upb_MessageDef_FindByNameWithSize(parent, shortname,
6374 strlen(shortname), NULL, NULL)) {
6375 return upb_MessageDef_File(parent);
6376 }
6377 }
6378 }
6379
6380 return NULL;
Joshua Haberman9abf6e22021-01-13 12:16:25 -08006381}
6382
6383/* Code to build defs from descriptor protos. *********************************/
6384
6385/* There is a question of how much validation to do here. It will be difficult
6386 * to perfectly match the amount of validation performed by proto2. But since
6387 * this code is used to directly build defs from Ruby (for example) we do need
6388 * to validate important constraints like uniqueness of names and numbers. */
6389
Joshua Habermanf41049a2022-01-21 14:41:25 -08006390#define CHK_OOM(x) \
6391 if (!(x)) { \
6392 symtab_oomerr(ctx); \
6393 }
Joshua Haberman9abf6e22021-01-13 12:16:25 -08006394
6395typedef struct {
Joshua Habermanf41049a2022-01-21 14:41:25 -08006396 upb_DefPool* symtab;
6397 upb_FileDef* file; /* File we are building. */
6398 upb_Arena* arena; /* Allocate defs here. */
6399 upb_Arena* tmp_arena; /* For temporary allocations. */
6400 const upb_MiniTable_File* layout; /* NULL if we should build layouts. */
6401 int enum_count; /* Count of enums built so far. */
6402 int msg_count; /* Count of messages built so far. */
6403 int ext_count; /* Count of extensions built so far. */
6404 upb_Status* status; /* Record errors here. */
6405 jmp_buf err; /* longjmp() on error. */
Joshua Haberman9abf6e22021-01-13 12:16:25 -08006406} symtab_addctx;
6407
Joshua Habermanf41049a2022-01-21 14:41:25 -08006408UPB_NORETURN UPB_NOINLINE UPB_PRINTF(2, 3) static void symtab_errf(
6409 symtab_addctx* ctx, const char* fmt, ...) {
Joshua Haberman9abf6e22021-01-13 12:16:25 -08006410 va_list argp;
6411 va_start(argp, fmt);
Joshua Habermanf41049a2022-01-21 14:41:25 -08006412 upb_Status_VSetErrorFormat(ctx->status, fmt, argp);
Joshua Haberman9abf6e22021-01-13 12:16:25 -08006413 va_end(argp);
6414 UPB_LONGJMP(ctx->err, 1);
6415}
6416
Joshua Habermanf41049a2022-01-21 14:41:25 -08006417UPB_NORETURN UPB_NOINLINE static void symtab_oomerr(symtab_addctx* ctx) {
6418 upb_Status_setoom(ctx->status);
Joshua Haberman9abf6e22021-01-13 12:16:25 -08006419 UPB_LONGJMP(ctx->err, 1);
6420}
6421
Joshua Habermanf41049a2022-01-21 14:41:25 -08006422void* symtab_alloc(symtab_addctx* ctx, size_t bytes) {
6423 if (bytes == 0) return NULL;
6424 void* ret = upb_Arena_Malloc(ctx->arena, bytes);
Joshua Haberman9abf6e22021-01-13 12:16:25 -08006425 if (!ret) symtab_oomerr(ctx);
6426 return ret;
6427}
6428
Joshua Habermanf41049a2022-01-21 14:41:25 -08006429// We want to copy the options verbatim into the destination options proto.
6430// We use serialize+parse as our deep copy.
6431#define SET_OPTIONS(target, desc_type, options_type, proto) \
6432 if (google_protobuf_##desc_type##_has_options(proto)) { \
6433 size_t size; \
6434 char* pb = google_protobuf_##options_type##_serialize( \
6435 google_protobuf_##desc_type##_options(proto), ctx->tmp_arena, &size); \
6436 CHK_OOM(pb); \
6437 target = google_protobuf_##options_type##_parse(pb, size, ctx->arena); \
6438 CHK_OOM(target); \
6439 } else { \
6440 target = (const google_protobuf_##options_type*)opt_default; \
6441 }
6442
6443static void check_ident(symtab_addctx* ctx, upb_StringView name, bool full) {
6444 const char* str = name.data;
Joshua Haberman9abf6e22021-01-13 12:16:25 -08006445 size_t len = name.size;
6446 bool start = true;
6447 size_t i;
6448 for (i = 0; i < len; i++) {
6449 char c = str[i];
6450 if (c == '.') {
6451 if (start || !full) {
6452 symtab_errf(ctx, "invalid name: unexpected '.' (%.*s)", (int)len, str);
6453 }
6454 start = true;
6455 } else if (start) {
6456 if (!upb_isletter(c)) {
6457 symtab_errf(
6458 ctx,
6459 "invalid name: path components must start with a letter (%.*s)",
6460 (int)len, str);
6461 }
6462 start = false;
6463 } else {
6464 if (!upb_isalphanum(c)) {
6465 symtab_errf(ctx, "invalid name: non-alphanumeric character (%.*s)",
6466 (int)len, str);
6467 }
6468 }
6469 }
6470 if (start) {
6471 symtab_errf(ctx, "invalid name: empty part (%.*s)", (int)len, str);
6472 }
6473}
6474
Joshua Habermanf41049a2022-01-21 14:41:25 -08006475static size_t div_round_up(size_t n, size_t d) { return (n + d - 1) / d; }
Joshua Haberman9abf6e22021-01-13 12:16:25 -08006476
Joshua Habermanf41049a2022-01-21 14:41:25 -08006477static size_t upb_MessageValue_sizeof(upb_CType type) {
Joshua Haberman9abf6e22021-01-13 12:16:25 -08006478 switch (type) {
Joshua Habermanf41049a2022-01-21 14:41:25 -08006479 case kUpb_CType_Double:
6480 case kUpb_CType_Int64:
6481 case kUpb_CType_UInt64:
Joshua Haberman9abf6e22021-01-13 12:16:25 -08006482 return 8;
Joshua Habermanf41049a2022-01-21 14:41:25 -08006483 case kUpb_CType_Enum:
6484 case kUpb_CType_Int32:
6485 case kUpb_CType_UInt32:
6486 case kUpb_CType_Float:
Joshua Haberman9abf6e22021-01-13 12:16:25 -08006487 return 4;
Joshua Habermanf41049a2022-01-21 14:41:25 -08006488 case kUpb_CType_Bool:
Joshua Haberman9abf6e22021-01-13 12:16:25 -08006489 return 1;
Joshua Habermanf41049a2022-01-21 14:41:25 -08006490 case kUpb_CType_Message:
Joshua Haberman9abf6e22021-01-13 12:16:25 -08006491 return sizeof(void*);
Joshua Habermanf41049a2022-01-21 14:41:25 -08006492 case kUpb_CType_Bytes:
6493 case kUpb_CType_String:
6494 return sizeof(upb_StringView);
Joshua Haberman9abf6e22021-01-13 12:16:25 -08006495 }
6496 UPB_UNREACHABLE();
6497}
6498
Joshua Habermanf41049a2022-01-21 14:41:25 -08006499static uint8_t upb_msg_fielddefsize(const upb_FieldDef* f) {
6500 if (upb_MessageDef_IsMapEntry(upb_FieldDef_ContainingType(f))) {
6501 upb_MapEntry ent;
Joshua Haberman9abf6e22021-01-13 12:16:25 -08006502 UPB_ASSERT(sizeof(ent.k) == sizeof(ent.v));
6503 return sizeof(ent.k);
Joshua Habermanf41049a2022-01-21 14:41:25 -08006504 } else if (upb_FieldDef_IsRepeated(f)) {
Joshua Haberman9abf6e22021-01-13 12:16:25 -08006505 return sizeof(void*);
6506 } else {
Joshua Habermanf41049a2022-01-21 14:41:25 -08006507 return upb_MessageValue_sizeof(upb_FieldDef_CType(f));
Joshua Haberman9abf6e22021-01-13 12:16:25 -08006508 }
6509}
6510
Joshua Habermanf41049a2022-01-21 14:41:25 -08006511static uint32_t upb_MiniTable_place(symtab_addctx* ctx, upb_MiniTable* l,
6512 size_t size, const upb_MessageDef* m) {
6513 size_t ofs = UPB_ALIGN_UP(l->size, size);
6514 size_t next = ofs + size;
Joshua Haberman9abf6e22021-01-13 12:16:25 -08006515
Joshua Habermanf41049a2022-01-21 14:41:25 -08006516 if (next > UINT16_MAX) {
6517 symtab_errf(ctx, "size of message %s exceeded max size of %zu bytes",
6518 upb_MessageDef_FullName(m), (size_t)UINT16_MAX);
6519 }
6520
6521 l->size = next;
6522 return ofs;
Joshua Haberman9abf6e22021-01-13 12:16:25 -08006523}
6524
Joshua Habermanf41049a2022-01-21 14:41:25 -08006525static int field_number_cmp(const void* p1, const void* p2) {
6526 const upb_MiniTable_Field* f1 = p1;
6527 const upb_MiniTable_Field* f2 = p2;
Joshua Haberman9abf6e22021-01-13 12:16:25 -08006528 return f1->number - f2->number;
6529}
6530
Joshua Habermanf41049a2022-01-21 14:41:25 -08006531static void assign_layout_indices(const upb_MessageDef* m, upb_MiniTable* l,
6532 upb_MiniTable_Field* fields) {
Joshua Haberman9abf6e22021-01-13 12:16:25 -08006533 int i;
Joshua Habermanf41049a2022-01-21 14:41:25 -08006534 int n = upb_MessageDef_numfields(m);
Joshua Haberman9d578a32021-08-02 15:32:01 -07006535 int dense_below = 0;
Joshua Haberman9abf6e22021-01-13 12:16:25 -08006536 for (i = 0; i < n; i++) {
Joshua Haberman7ecf43f2022-03-14 13:11:29 -07006537 upb_FieldDef* f =
6538 (upb_FieldDef*)upb_MessageDef_FindFieldByNumber(m, fields[i].number);
Joshua Haberman9abf6e22021-01-13 12:16:25 -08006539 UPB_ASSERT(f);
6540 f->layout_index = i;
Joshua Haberman9d578a32021-08-02 15:32:01 -07006541 if (i < UINT8_MAX && fields[i].number == i + 1 &&
Joshua Habermanf41049a2022-01-21 14:41:25 -08006542 (i == 0 || fields[i - 1].number == i)) {
Joshua Haberman9d578a32021-08-02 15:32:01 -07006543 dense_below = i + 1;
6544 }
6545 }
6546 l->dense_below = dense_below;
6547}
6548
Joshua Habermanf41049a2022-01-21 14:41:25 -08006549static uint8_t map_descriptortype(const upb_FieldDef* f) {
6550 uint8_t type = upb_FieldDef_Type(f);
6551 /* See TableDescriptorType() in upbc/generator.cc for details and
6552 * rationale of these exceptions. */
6553 if (type == kUpb_FieldType_String && f->file->syntax == kUpb_Syntax_Proto2) {
6554 return kUpb_FieldType_Bytes;
6555 } else if (type == kUpb_FieldType_Enum &&
Joshua Haberman7ecf43f2022-03-14 13:11:29 -07006556 (f->sub.enumdef->file->syntax == kUpb_Syntax_Proto3 ||
6557 UPB_TREAT_PROTO2_ENUMS_LIKE_PROTO3 ||
6558 // TODO(https://github.com/protocolbuffers/upb/issues/541):
6559 // fix map enum values to check for unknown enum values and put
6560 // them in the unknown field set.
6561 upb_MessageDef_IsMapEntry(upb_FieldDef_ContainingType(f)))) {
Joshua Habermanf41049a2022-01-21 14:41:25 -08006562 return kUpb_FieldType_Int32;
Joshua Haberman9d578a32021-08-02 15:32:01 -07006563 }
Joshua Habermanf41049a2022-01-21 14:41:25 -08006564 return type;
6565}
Joshua Haberman9d578a32021-08-02 15:32:01 -07006566
Joshua Habermanf41049a2022-01-21 14:41:25 -08006567static void fill_fieldlayout(upb_MiniTable_Field* field,
6568 const upb_FieldDef* f) {
6569 field->number = upb_FieldDef_Number(f);
6570 field->descriptortype = map_descriptortype(f);
6571
6572 if (upb_FieldDef_IsMap(f)) {
6573 field->mode =
Joshua Haberman7ecf43f2022-03-14 13:11:29 -07006574 kUpb_FieldMode_Map | (kUpb_FieldRep_Pointer << kUpb_FieldRep_Shift);
Joshua Habermanf41049a2022-01-21 14:41:25 -08006575 } else if (upb_FieldDef_IsRepeated(f)) {
6576 field->mode =
Joshua Haberman7ecf43f2022-03-14 13:11:29 -07006577 kUpb_FieldMode_Array | (kUpb_FieldRep_Pointer << kUpb_FieldRep_Shift);
Joshua Haberman9d578a32021-08-02 15:32:01 -07006578 } else {
Joshua Habermanf41049a2022-01-21 14:41:25 -08006579 /* Maps descriptor type -> elem_size_lg2. */
6580 static const uint8_t sizes[] = {
Joshua Haberman7ecf43f2022-03-14 13:11:29 -07006581 -1, /* invalid descriptor type */
6582 kUpb_FieldRep_8Byte, /* DOUBLE */
6583 kUpb_FieldRep_4Byte, /* FLOAT */
6584 kUpb_FieldRep_8Byte, /* INT64 */
6585 kUpb_FieldRep_8Byte, /* UINT64 */
6586 kUpb_FieldRep_4Byte, /* INT32 */
6587 kUpb_FieldRep_8Byte, /* FIXED64 */
6588 kUpb_FieldRep_4Byte, /* FIXED32 */
6589 kUpb_FieldRep_1Byte, /* BOOL */
6590 kUpb_FieldRep_StringView, /* STRING */
6591 kUpb_FieldRep_Pointer, /* GROUP */
6592 kUpb_FieldRep_Pointer, /* MESSAGE */
6593 kUpb_FieldRep_StringView, /* BYTES */
6594 kUpb_FieldRep_4Byte, /* UINT32 */
6595 kUpb_FieldRep_4Byte, /* ENUM */
6596 kUpb_FieldRep_4Byte, /* SFIXED32 */
6597 kUpb_FieldRep_8Byte, /* SFIXED64 */
6598 kUpb_FieldRep_4Byte, /* SINT32 */
6599 kUpb_FieldRep_8Byte, /* SINT64 */
Joshua Habermanf41049a2022-01-21 14:41:25 -08006600 };
6601 field->mode = kUpb_FieldMode_Scalar |
Joshua Haberman7ecf43f2022-03-14 13:11:29 -07006602 (sizes[field->descriptortype] << kUpb_FieldRep_Shift);
Joshua Haberman9d578a32021-08-02 15:32:01 -07006603 }
6604
Joshua Habermanf41049a2022-01-21 14:41:25 -08006605 if (upb_FieldDef_IsPacked(f)) {
Joshua Haberman7ecf43f2022-03-14 13:11:29 -07006606 field->mode |= kUpb_LabelFlags_IsPacked;
Joshua Habermanf41049a2022-01-21 14:41:25 -08006607 }
6608
6609 if (upb_FieldDef_IsExtension(f)) {
Joshua Haberman7ecf43f2022-03-14 13:11:29 -07006610 field->mode |= kUpb_LabelFlags_IsExtension;
Joshua Haberman9abf6e22021-01-13 12:16:25 -08006611 }
6612}
6613
6614/* This function is the dynamic equivalent of message_layout.{cc,h} in upbc.
6615 * It computes a dynamic layout for all of the fields in |m|. */
Joshua Habermanf41049a2022-01-21 14:41:25 -08006616static void make_layout(symtab_addctx* ctx, const upb_MessageDef* m) {
6617 upb_MiniTable* l = (upb_MiniTable*)m->layout;
6618 size_t field_count = upb_MessageDef_numfields(m);
6619 size_t sublayout_count = 0;
6620 upb_MiniTable_Sub* subs;
6621 upb_MiniTable_Field* fields;
Joshua Haberman9abf6e22021-01-13 12:16:25 -08006622
Joshua Habermanf41049a2022-01-21 14:41:25 -08006623 memset(l, 0, sizeof(*l) + sizeof(_upb_FastTable_Entry));
Joshua Haberman9abf6e22021-01-13 12:16:25 -08006624
Joshua Habermandd69a482021-05-17 22:40:33 -07006625 /* Count sub-messages. */
6626 for (size_t i = 0; i < field_count; i++) {
Joshua Habermanf41049a2022-01-21 14:41:25 -08006627 const upb_FieldDef* f = &m->fields[i];
6628 if (upb_FieldDef_IsSubMessage(f)) {
6629 sublayout_count++;
Joshua Haberman7ecf43f2022-03-14 13:11:29 -07006630 }
6631 if (upb_FieldDef_CType(f) == kUpb_CType_Enum &&
6632 f->sub.enumdef->file->syntax == kUpb_Syntax_Proto2) {
Joshua Habermanf41049a2022-01-21 14:41:25 -08006633 sublayout_count++;
Joshua Habermandd69a482021-05-17 22:40:33 -07006634 }
6635 }
6636
6637 fields = symtab_alloc(ctx, field_count * sizeof(*fields));
Joshua Habermanf41049a2022-01-21 14:41:25 -08006638 subs = symtab_alloc(ctx, sublayout_count * sizeof(*subs));
Joshua Haberman9abf6e22021-01-13 12:16:25 -08006639
Joshua Habermanf41049a2022-01-21 14:41:25 -08006640 l->field_count = upb_MessageDef_numfields(m);
Joshua Haberman9abf6e22021-01-13 12:16:25 -08006641 l->fields = fields;
Joshua Habermanf41049a2022-01-21 14:41:25 -08006642 l->subs = subs;
Joshua Haberman9abf6e22021-01-13 12:16:25 -08006643 l->table_mask = 0;
Joshua Habermanf41049a2022-01-21 14:41:25 -08006644 l->required_count = 0;
6645
6646 if (upb_MessageDef_ExtensionRangeCount(m) > 0) {
6647 if (google_protobuf_MessageOptions_message_set_wire_format(m->opts)) {
Joshua Haberman7ecf43f2022-03-14 13:11:29 -07006648 l->ext = kUpb_ExtMode_IsMessageSet;
Joshua Habermanf41049a2022-01-21 14:41:25 -08006649 } else {
Joshua Haberman7ecf43f2022-03-14 13:11:29 -07006650 l->ext = kUpb_ExtMode_Extendable;
Joshua Habermanf41049a2022-01-21 14:41:25 -08006651 }
6652 } else {
Joshua Haberman7ecf43f2022-03-14 13:11:29 -07006653 l->ext = kUpb_ExtMode_NonExtendable;
Joshua Habermanf41049a2022-01-21 14:41:25 -08006654 }
Joshua Haberman9abf6e22021-01-13 12:16:25 -08006655
6656 /* TODO(haberman): initialize fast tables so that reflection-based parsing
6657 * can get the same speeds as linked-in types. */
6658 l->fasttable[0].field_parser = &fastdecode_generic;
6659 l->fasttable[0].field_data = 0;
6660
Joshua Habermanf41049a2022-01-21 14:41:25 -08006661 if (upb_MessageDef_IsMapEntry(m)) {
Joshua Haberman9abf6e22021-01-13 12:16:25 -08006662 /* TODO(haberman): refactor this method so this special case is more
6663 * elegant. */
Joshua Haberman7ecf43f2022-03-14 13:11:29 -07006664 const upb_FieldDef* key = upb_MessageDef_FindFieldByNumber(m, 1);
6665 const upb_FieldDef* val = upb_MessageDef_FindFieldByNumber(m, 2);
Joshua Haberman9abf6e22021-01-13 12:16:25 -08006666 fields[0].number = 1;
6667 fields[1].number = 2;
Joshua Habermanf41049a2022-01-21 14:41:25 -08006668 fields[0].mode = kUpb_FieldMode_Scalar;
6669 fields[1].mode = kUpb_FieldMode_Scalar;
Joshua Haberman9abf6e22021-01-13 12:16:25 -08006670 fields[0].presence = 0;
6671 fields[1].presence = 0;
Joshua Habermanf41049a2022-01-21 14:41:25 -08006672 fields[0].descriptortype = map_descriptortype(key);
6673 fields[1].descriptortype = map_descriptortype(val);
Joshua Haberman9abf6e22021-01-13 12:16:25 -08006674 fields[0].offset = 0;
Joshua Habermanf41049a2022-01-21 14:41:25 -08006675 fields[1].offset = sizeof(upb_StringView);
Joshua Haberman9abf6e22021-01-13 12:16:25 -08006676 fields[1].submsg_index = 0;
6677
Joshua Habermanf41049a2022-01-21 14:41:25 -08006678 if (upb_FieldDef_CType(val) == kUpb_CType_Message) {
6679 subs[0].submsg = upb_FieldDef_MessageSubDef(val)->layout;
Joshua Haberman9abf6e22021-01-13 12:16:25 -08006680 }
6681
Joshua Habermanf41049a2022-01-21 14:41:25 -08006682 upb_FieldDef* fielddefs = (upb_FieldDef*)&m->fields[0];
6683 UPB_ASSERT(fielddefs[0].number_ == 1);
6684 UPB_ASSERT(fielddefs[1].number_ == 2);
6685 fielddefs[0].layout_index = 0;
6686 fielddefs[1].layout_index = 1;
6687
Joshua Haberman9abf6e22021-01-13 12:16:25 -08006688 l->field_count = 2;
Joshua Habermanf41049a2022-01-21 14:41:25 -08006689 l->size = 2 * sizeof(upb_StringView);
Joshua Haberman9abf6e22021-01-13 12:16:25 -08006690 l->size = UPB_ALIGN_UP(l->size, 8);
Joshua Habermanf41049a2022-01-21 14:41:25 -08006691 l->dense_below = 2;
Joshua Haberman9abf6e22021-01-13 12:16:25 -08006692 return;
6693 }
6694
6695 /* Allocate data offsets in three stages:
6696 *
6697 * 1. hasbits.
6698 * 2. regular fields.
6699 * 3. oneof fields.
6700 *
6701 * OPT: There is a lot of room for optimization here to minimize the size.
6702 */
6703
Joshua Habermanf41049a2022-01-21 14:41:25 -08006704 /* Assign hasbits for required fields first. */
6705 size_t hasbit = 0;
6706
6707 for (int i = 0; i < m->field_count; i++) {
6708 const upb_FieldDef* f = &m->fields[i];
6709 upb_MiniTable_Field* field = &fields[upb_FieldDef_Index(f)];
6710 if (upb_FieldDef_Label(f) == kUpb_Label_Required) {
6711 field->presence = ++hasbit;
6712 if (hasbit >= 63) {
6713 symtab_errf(ctx, "Message with >=63 required fields: %s",
6714 upb_MessageDef_FullName(m));
6715 }
6716 l->required_count++;
6717 }
6718 }
6719
Joshua Haberman9abf6e22021-01-13 12:16:25 -08006720 /* Allocate hasbits and set basic field attributes. */
Joshua Habermanf41049a2022-01-21 14:41:25 -08006721 sublayout_count = 0;
6722 for (int i = 0; i < m->field_count; i++) {
6723 const upb_FieldDef* f = &m->fields[i];
6724 upb_MiniTable_Field* field = &fields[upb_FieldDef_Index(f)];
Joshua Haberman9abf6e22021-01-13 12:16:25 -08006725
Joshua Haberman9d578a32021-08-02 15:32:01 -07006726 fill_fieldlayout(field, f);
Joshua Haberman9abf6e22021-01-13 12:16:25 -08006727
Joshua Haberman7ecf43f2022-03-14 13:11:29 -07006728 if (field->descriptortype == kUpb_FieldType_Message ||
6729 field->descriptortype == kUpb_FieldType_Group) {
Joshua Habermanf41049a2022-01-21 14:41:25 -08006730 field->submsg_index = sublayout_count++;
6731 subs[field->submsg_index].submsg = upb_FieldDef_MessageSubDef(f)->layout;
Joshua Haberman7ecf43f2022-03-14 13:11:29 -07006732 } else if (field->descriptortype == kUpb_FieldType_Enum) {
Joshua Habermanf41049a2022-01-21 14:41:25 -08006733 field->submsg_index = sublayout_count++;
6734 subs[field->submsg_index].subenum = upb_FieldDef_EnumSubDef(f)->layout;
6735 UPB_ASSERT(subs[field->submsg_index].subenum);
Joshua Haberman9abf6e22021-01-13 12:16:25 -08006736 }
6737
Joshua Habermanf41049a2022-01-21 14:41:25 -08006738 if (upb_FieldDef_Label(f) == kUpb_Label_Required) {
6739 /* Hasbit was already assigned. */
6740 } else if (upb_FieldDef_HasPresence(f) &&
6741 !upb_FieldDef_RealContainingOneof(f)) {
Joshua Haberman9abf6e22021-01-13 12:16:25 -08006742 /* We don't use hasbit 0, so that 0 can indicate "no presence" in the
6743 * table. This wastes one hasbit, but we don't worry about it for now. */
6744 field->presence = ++hasbit;
6745 } else {
6746 field->presence = 0;
6747 }
6748 }
6749
6750 /* Account for space used by hasbits. */
Joshua Haberman7ecf43f2022-03-14 13:11:29 -07006751 l->size = hasbit ? div_round_up(hasbit + 1, 8) : 0;
Joshua Haberman9abf6e22021-01-13 12:16:25 -08006752
6753 /* Allocate non-oneof fields. */
Joshua Habermanf41049a2022-01-21 14:41:25 -08006754 for (int i = 0; i < m->field_count; i++) {
6755 const upb_FieldDef* f = &m->fields[i];
Joshua Haberman9abf6e22021-01-13 12:16:25 -08006756 size_t field_size = upb_msg_fielddefsize(f);
Joshua Habermanf41049a2022-01-21 14:41:25 -08006757 size_t index = upb_FieldDef_Index(f);
Joshua Haberman9abf6e22021-01-13 12:16:25 -08006758
Joshua Habermanf41049a2022-01-21 14:41:25 -08006759 if (upb_FieldDef_RealContainingOneof(f)) {
Joshua Haberman9abf6e22021-01-13 12:16:25 -08006760 /* Oneofs are handled separately below. */
6761 continue;
6762 }
6763
Joshua Habermanf41049a2022-01-21 14:41:25 -08006764 fields[index].offset = upb_MiniTable_place(ctx, l, field_size, m);
Joshua Haberman9abf6e22021-01-13 12:16:25 -08006765 }
6766
6767 /* Allocate oneof fields. Each oneof field consists of a uint32 for the case
6768 * and space for the actual data. */
Joshua Habermanf41049a2022-01-21 14:41:25 -08006769 for (int i = 0; i < m->oneof_count; i++) {
6770 const upb_OneofDef* o = &m->oneofs[i];
6771 size_t case_size = sizeof(uint32_t); /* Could potentially optimize this. */
Joshua Haberman9abf6e22021-01-13 12:16:25 -08006772 size_t field_size = 0;
6773 uint32_t case_offset;
6774 uint32_t data_offset;
6775
Joshua Habermanf41049a2022-01-21 14:41:25 -08006776 if (upb_OneofDef_IsSynthetic(o)) continue;
Joshua Haberman9abf6e22021-01-13 12:16:25 -08006777
Joshua Haberman7ecf43f2022-03-14 13:11:29 -07006778 if (o->field_count == 0) {
6779 symtab_errf(ctx, "Oneof must have at least one field (%s)", o->full_name);
6780 }
6781
Joshua Haberman9abf6e22021-01-13 12:16:25 -08006782 /* Calculate field size: the max of all field sizes. */
Joshua Habermanf41049a2022-01-21 14:41:25 -08006783 for (int j = 0; j < o->field_count; j++) {
6784 const upb_FieldDef* f = o->fields[j];
Joshua Haberman9abf6e22021-01-13 12:16:25 -08006785 field_size = UPB_MAX(field_size, upb_msg_fielddefsize(f));
6786 }
6787
6788 /* Align and allocate case offset. */
Joshua Habermanf41049a2022-01-21 14:41:25 -08006789 case_offset = upb_MiniTable_place(ctx, l, case_size, m);
6790 data_offset = upb_MiniTable_place(ctx, l, field_size, m);
Joshua Haberman9abf6e22021-01-13 12:16:25 -08006791
Joshua Habermanf41049a2022-01-21 14:41:25 -08006792 for (int i = 0; i < o->field_count; i++) {
6793 const upb_FieldDef* f = o->fields[i];
6794 fields[upb_FieldDef_Index(f)].offset = data_offset;
6795 fields[upb_FieldDef_Index(f)].presence = ~case_offset;
Joshua Haberman9abf6e22021-01-13 12:16:25 -08006796 }
6797 }
6798
6799 /* Size of the entire structure should be a multiple of its greatest
6800 * alignment. TODO: track overall alignment for real? */
6801 l->size = UPB_ALIGN_UP(l->size, 8);
6802
6803 /* Sort fields by number. */
Joshua Haberman7ecf43f2022-03-14 13:11:29 -07006804 if (fields) {
6805 qsort(fields, upb_MessageDef_numfields(m), sizeof(*fields),
6806 field_number_cmp);
6807 }
Joshua Haberman9d578a32021-08-02 15:32:01 -07006808 assign_layout_indices(m, l, fields);
Joshua Haberman9abf6e22021-01-13 12:16:25 -08006809}
6810
Joshua Habermanf41049a2022-01-21 14:41:25 -08006811static char* strviewdup(symtab_addctx* ctx, upb_StringView view) {
6812 char* ret = upb_strdup2(view.data, view.size, ctx->arena);
6813 CHK_OOM(ret);
6814 return ret;
Joshua Haberman9abf6e22021-01-13 12:16:25 -08006815}
6816
Joshua Habermanf41049a2022-01-21 14:41:25 -08006817static bool streql2(const char* a, size_t n, const char* b) {
Joshua Haberman9abf6e22021-01-13 12:16:25 -08006818 return n == strlen(b) && memcmp(a, b, n) == 0;
6819}
6820
Joshua Habermanf41049a2022-01-21 14:41:25 -08006821static bool streql_view(upb_StringView view, const char* b) {
Joshua Haberman9abf6e22021-01-13 12:16:25 -08006822 return streql2(view.data, view.size, b);
6823}
6824
Joshua Habermanf41049a2022-01-21 14:41:25 -08006825static const char* makefullname(symtab_addctx* ctx, const char* prefix,
6826 upb_StringView name) {
Joshua Haberman9abf6e22021-01-13 12:16:25 -08006827 if (prefix) {
6828 /* ret = prefix + '.' + name; */
6829 size_t n = strlen(prefix);
Joshua Habermanf41049a2022-01-21 14:41:25 -08006830 char* ret = symtab_alloc(ctx, n + name.size + 2);
Joshua Haberman9abf6e22021-01-13 12:16:25 -08006831 strcpy(ret, prefix);
6832 ret[n] = '.';
6833 memcpy(&ret[n + 1], name.data, name.size);
6834 ret[n + 1 + name.size] = '\0';
6835 return ret;
6836 } else {
6837 return strviewdup(ctx, name);
6838 }
6839}
6840
Joshua Habermanf41049a2022-01-21 14:41:25 -08006841static void finalize_oneofs(symtab_addctx* ctx, upb_MessageDef* m) {
Joshua Haberman9abf6e22021-01-13 12:16:25 -08006842 int i;
6843 int synthetic_count = 0;
Joshua Habermanf41049a2022-01-21 14:41:25 -08006844 upb_OneofDef* mutable_oneofs = (upb_OneofDef*)m->oneofs;
Joshua Haberman9abf6e22021-01-13 12:16:25 -08006845
6846 for (i = 0; i < m->oneof_count; i++) {
Joshua Habermanf41049a2022-01-21 14:41:25 -08006847 upb_OneofDef* o = &mutable_oneofs[i];
Joshua Haberman9abf6e22021-01-13 12:16:25 -08006848
6849 if (o->synthetic && o->field_count != 1) {
6850 symtab_errf(ctx, "Synthetic oneofs must have one field, not %d: %s",
Joshua Habermanf41049a2022-01-21 14:41:25 -08006851 o->field_count, upb_OneofDef_Name(o));
Joshua Haberman9abf6e22021-01-13 12:16:25 -08006852 }
6853
6854 if (o->synthetic) {
6855 synthetic_count++;
6856 } else if (synthetic_count != 0) {
6857 symtab_errf(ctx, "Synthetic oneofs must be after all other oneofs: %s",
Joshua Habermanf41049a2022-01-21 14:41:25 -08006858 upb_OneofDef_Name(o));
Joshua Haberman9abf6e22021-01-13 12:16:25 -08006859 }
6860
Joshua Habermanf41049a2022-01-21 14:41:25 -08006861 o->fields = symtab_alloc(ctx, sizeof(upb_FieldDef*) * o->field_count);
Joshua Haberman9abf6e22021-01-13 12:16:25 -08006862 o->field_count = 0;
6863 }
6864
6865 for (i = 0; i < m->field_count; i++) {
Joshua Habermanf41049a2022-01-21 14:41:25 -08006866 const upb_FieldDef* f = &m->fields[i];
6867 upb_OneofDef* o = (upb_OneofDef*)upb_FieldDef_ContainingOneof(f);
Joshua Haberman9abf6e22021-01-13 12:16:25 -08006868 if (o) {
6869 o->fields[o->field_count++] = f;
6870 }
6871 }
6872
6873 m->real_oneof_count = m->oneof_count - synthetic_count;
6874}
6875
Joshua Habermanf41049a2022-01-21 14:41:25 -08006876size_t getjsonname(const char* name, char* buf, size_t len) {
Joshua Haberman9abf6e22021-01-13 12:16:25 -08006877 size_t src, dst = 0;
6878 bool ucase_next = false;
6879
Joshua Habermanf41049a2022-01-21 14:41:25 -08006880#define WRITE(byte) \
6881 ++dst; \
6882 if (dst < len) \
6883 buf[dst - 1] = byte; \
6884 else if (dst == len) \
6885 buf[dst - 1] = '\0'
Joshua Haberman9abf6e22021-01-13 12:16:25 -08006886
6887 if (!name) {
6888 WRITE('\0');
6889 return 0;
6890 }
6891
6892 /* Implement the transformation as described in the spec:
6893 * 1. upper case all letters after an underscore.
6894 * 2. remove all underscores.
6895 */
6896 for (src = 0; name[src]; src++) {
6897 if (name[src] == '_') {
6898 ucase_next = true;
6899 continue;
6900 }
6901
6902 if (ucase_next) {
6903 WRITE(toupper(name[src]));
6904 ucase_next = false;
6905 } else {
6906 WRITE(name[src]);
6907 }
6908 }
6909
6910 WRITE('\0');
6911 return dst;
6912
6913#undef WRITE
6914}
6915
Joshua Habermanf41049a2022-01-21 14:41:25 -08006916static char* makejsonname(symtab_addctx* ctx, const char* name) {
Joshua Haberman9abf6e22021-01-13 12:16:25 -08006917 size_t size = getjsonname(name, NULL, 0);
6918 char* json_name = symtab_alloc(ctx, size);
6919 getjsonname(name, json_name, size);
6920 return json_name;
6921}
6922
Joshua Habermanf41049a2022-01-21 14:41:25 -08006923/* Adds a symbol |v| to the symtab, which must be a def pointer previously
6924 * packed with pack_def(). The def's pointer to upb_FileDef* must be set before
6925 * adding, so we know which entries to remove if building this file fails. */
6926static void symtab_add(symtab_addctx* ctx, const char* name, upb_value v) {
6927 // TODO: table should support an operation "tryinsert" to avoid the double
6928 // lookup.
Joshua Haberman9abf6e22021-01-13 12:16:25 -08006929 if (upb_strtable_lookup(&ctx->symtab->syms, name, NULL)) {
6930 symtab_errf(ctx, "duplicate symbol '%s'", name);
6931 }
Joshua Haberman9abf6e22021-01-13 12:16:25 -08006932 size_t len = strlen(name);
Joshua Habermandd69a482021-05-17 22:40:33 -07006933 CHK_OOM(upb_strtable_insert(&ctx->symtab->syms, name, len, v,
6934 ctx->symtab->arena));
Joshua Haberman9abf6e22021-01-13 12:16:25 -08006935}
6936
Joshua Habermanf41049a2022-01-21 14:41:25 -08006937static bool remove_component(char* base, size_t* len) {
6938 if (*len == 0) return false;
6939
6940 for (size_t i = *len - 1; i > 0; i--) {
6941 if (base[i] == '.') {
6942 *len = i;
6943 return true;
6944 }
6945 }
6946
6947 *len = 0;
6948 return true;
6949}
6950
Joshua Haberman9abf6e22021-01-13 12:16:25 -08006951/* Given a symbol and the base symbol inside which it is defined, find the
6952 * symbol's definition in t. */
Joshua Habermanf41049a2022-01-21 14:41:25 -08006953static const void* symtab_resolveany(symtab_addctx* ctx,
6954 const char* from_name_dbg,
6955 const char* base, upb_StringView sym,
6956 upb_deftype_t* type) {
6957 const upb_strtable* t = &ctx->symtab->syms;
6958 if (sym.size == 0) goto notfound;
6959 upb_value v;
6960 if (sym.data[0] == '.') {
Joshua Haberman9abf6e22021-01-13 12:16:25 -08006961 /* Symbols starting with '.' are absolute, so we do a single lookup.
6962 * Slice to omit the leading '.' */
Joshua Haberman9abf6e22021-01-13 12:16:25 -08006963 if (!upb_strtable_lookup2(t, sym.data + 1, sym.size - 1, &v)) {
6964 goto notfound;
6965 }
Joshua Haberman9abf6e22021-01-13 12:16:25 -08006966 } else {
Joshua Habermanf41049a2022-01-21 14:41:25 -08006967 /* Remove components from base until we find an entry or run out. */
Joshua Haberman7ecf43f2022-03-14 13:11:29 -07006968 size_t baselen = base ? strlen(base) : 0;
6969 char* tmp = malloc(sym.size + baselen + 1);
Joshua Habermanf41049a2022-01-21 14:41:25 -08006970 while (1) {
6971 char* p = tmp;
6972 if (baselen) {
6973 memcpy(p, base, baselen);
6974 p[baselen] = '.';
6975 p += baselen + 1;
6976 }
6977 memcpy(p, sym.data, sym.size);
6978 p += sym.size;
6979 if (upb_strtable_lookup2(t, tmp, p - tmp, &v)) {
6980 break;
6981 }
6982 if (!remove_component(tmp, &baselen)) {
6983 free(tmp);
6984 goto notfound;
6985 }
6986 }
6987 free(tmp);
Joshua Haberman9abf6e22021-01-13 12:16:25 -08006988 }
6989
Joshua Habermanf41049a2022-01-21 14:41:25 -08006990 *type = deftype(v);
6991 return unpack_def(v, *type);
6992
Joshua Haberman9abf6e22021-01-13 12:16:25 -08006993notfound:
Joshua Habermanf41049a2022-01-21 14:41:25 -08006994 symtab_errf(ctx, "couldn't resolve name '" UPB_STRINGVIEW_FORMAT "'",
6995 UPB_STRINGVIEW_ARGS(sym));
6996}
6997
6998static const void* symtab_resolve(symtab_addctx* ctx, const char* from_name_dbg,
6999 const char* base, upb_StringView sym,
7000 upb_deftype_t type) {
7001 upb_deftype_t found_type;
7002 const void* ret =
7003 symtab_resolveany(ctx, from_name_dbg, base, sym, &found_type);
7004 if (ret && found_type != type) {
Joshua Haberman7ecf43f2022-03-14 13:11:29 -07007005 symtab_errf(ctx,
7006 "type mismatch when resolving %s: couldn't find "
7007 "name " UPB_STRINGVIEW_FORMAT " with type=%d",
7008 from_name_dbg, UPB_STRINGVIEW_ARGS(sym), (int)type);
Joshua Habermanf41049a2022-01-21 14:41:25 -08007009 }
7010 return ret;
Joshua Haberman9abf6e22021-01-13 12:16:25 -08007011}
7012
7013static void create_oneofdef(
Joshua Habermanf41049a2022-01-21 14:41:25 -08007014 symtab_addctx* ctx, upb_MessageDef* m,
7015 const google_protobuf_OneofDescriptorProto* oneof_proto,
7016 const upb_OneofDef* _o) {
7017 upb_OneofDef* o = (upb_OneofDef*)_o;
7018 upb_StringView name = google_protobuf_OneofDescriptorProto_name(oneof_proto);
Joshua Haberman9abf6e22021-01-13 12:16:25 -08007019 upb_value v;
7020
Joshua Haberman9abf6e22021-01-13 12:16:25 -08007021 o->parent = m;
7022 o->full_name = makefullname(ctx, m->full_name, name);
7023 o->field_count = 0;
7024 o->synthetic = false;
7025
Joshua Habermanf41049a2022-01-21 14:41:25 -08007026 SET_OPTIONS(o->opts, OneofDescriptorProto, OneofOptions, oneof_proto);
7027
Joshua Haberman7ecf43f2022-03-14 13:11:29 -07007028 upb_value existing_v;
7029 if (upb_strtable_lookup2(&m->ntof, name.data, name.size, &existing_v)) {
7030 symtab_errf(ctx, "duplicate oneof name (%s)", o->full_name);
7031 }
7032
Joshua Haberman9abf6e22021-01-13 12:16:25 -08007033 v = pack_def(o, UPB_DEFTYPE_ONEOF);
Joshua Habermandd69a482021-05-17 22:40:33 -07007034 CHK_OOM(upb_strtable_insert(&m->ntof, name.data, name.size, v, ctx->arena));
Joshua Haberman9abf6e22021-01-13 12:16:25 -08007035
Joshua Habermandd69a482021-05-17 22:40:33 -07007036 CHK_OOM(upb_inttable_init(&o->itof, ctx->arena));
7037 CHK_OOM(upb_strtable_init(&o->ntof, 4, ctx->arena));
Joshua Haberman9abf6e22021-01-13 12:16:25 -08007038}
7039
Joshua Habermanf41049a2022-01-21 14:41:25 -08007040static str_t* newstr(symtab_addctx* ctx, const char* data, size_t len) {
7041 str_t* ret = symtab_alloc(ctx, sizeof(*ret) + len);
7042 CHK_OOM(ret);
Joshua Haberman9abf6e22021-01-13 12:16:25 -08007043 ret->len = len;
7044 if (len) memcpy(ret->str, data, len);
7045 ret->str[len] = '\0';
7046 return ret;
7047}
7048
Joshua Habermanf41049a2022-01-21 14:41:25 -08007049static bool upb_DefPool_TryGetChar(const char** src, const char* end,
7050 char* ch) {
7051 if (*src == end) return false;
7052 *ch = **src;
7053 *src += 1;
7054 return true;
7055}
7056
7057static char upb_DefPool_TryGetHexDigit(symtab_addctx* ctx,
7058 const upb_FieldDef* f, const char** src,
7059 const char* end) {
7060 char ch;
7061 if (!upb_DefPool_TryGetChar(src, end, &ch)) return -1;
7062 if ('0' <= ch && ch <= '9') {
7063 return ch - '0';
7064 }
7065 ch = upb_ascii_lower(ch);
7066 if ('a' <= ch && ch <= 'f') {
7067 return ch - 'a' + 0xa;
7068 }
7069 *src -= 1; // Char wasn't actually a hex digit.
7070 return -1;
7071}
7072
7073static char upb_DefPool_ParseHexEscape(symtab_addctx* ctx,
7074 const upb_FieldDef* f, const char** src,
7075 const char* end) {
7076 char hex_digit = upb_DefPool_TryGetHexDigit(ctx, f, src, end);
7077 if (hex_digit < 0) {
7078 symtab_errf(ctx,
7079 "\\x cannot be followed by non-hex digit in field '%s' default",
7080 upb_FieldDef_FullName(f));
7081 return 0;
7082 }
7083 unsigned int ret = hex_digit;
7084 while ((hex_digit = upb_DefPool_TryGetHexDigit(ctx, f, src, end)) >= 0) {
7085 ret = (ret << 4) | hex_digit;
7086 }
7087 if (ret > 0xff) {
7088 symtab_errf(ctx, "Value of hex escape in field %s exceeds 8 bits",
7089 upb_FieldDef_FullName(f));
7090 return 0;
7091 }
7092 return ret;
7093}
7094
7095char upb_DefPool_TryGetOctalDigit(const char** src, const char* end) {
7096 char ch;
7097 if (!upb_DefPool_TryGetChar(src, end, &ch)) return -1;
7098 if ('0' <= ch && ch <= '7') {
7099 return ch - '0';
7100 }
7101 *src -= 1; // Char wasn't actually an octal digit.
7102 return -1;
7103}
7104
7105static char upb_DefPool_ParseOctalEscape(symtab_addctx* ctx,
7106 const upb_FieldDef* f,
7107 const char** src, const char* end) {
7108 char ch = 0;
7109 for (int i = 0; i < 3; i++) {
7110 char digit;
7111 if ((digit = upb_DefPool_TryGetOctalDigit(src, end)) >= 0) {
7112 ch = (ch << 3) | digit;
7113 }
7114 }
7115 return ch;
7116}
7117
7118static char upb_DefPool_ParseEscape(symtab_addctx* ctx, const upb_FieldDef* f,
7119 const char** src, const char* end) {
7120 char ch;
7121 if (!upb_DefPool_TryGetChar(src, end, &ch)) {
7122 symtab_errf(ctx, "unterminated escape sequence in field %s",
7123 upb_FieldDef_FullName(f));
7124 return 0;
7125 }
7126 switch (ch) {
7127 case 'a':
7128 return '\a';
7129 case 'b':
7130 return '\b';
7131 case 'f':
7132 return '\f';
7133 case 'n':
7134 return '\n';
7135 case 'r':
7136 return '\r';
7137 case 't':
7138 return '\t';
7139 case 'v':
7140 return '\v';
7141 case '\\':
7142 return '\\';
7143 case '\'':
7144 return '\'';
7145 case '\"':
7146 return '\"';
7147 case '?':
7148 return '\?';
7149 case 'x':
7150 case 'X':
7151 return upb_DefPool_ParseHexEscape(ctx, f, src, end);
7152 case '0':
7153 case '1':
7154 case '2':
7155 case '3':
7156 case '4':
7157 case '5':
7158 case '6':
7159 case '7':
7160 *src -= 1;
7161 return upb_DefPool_ParseOctalEscape(ctx, f, src, end);
7162 }
7163 symtab_errf(ctx, "Unknown escape sequence: \\%c", ch);
7164}
7165
7166static str_t* unescape(symtab_addctx* ctx, const upb_FieldDef* f,
7167 const char* data, size_t len) {
7168 // Size here is an upper bound; escape sequences could ultimately shrink it.
7169 str_t* ret = symtab_alloc(ctx, sizeof(*ret) + len);
7170 char* dst = &ret->str[0];
7171 const char* src = data;
7172 const char* end = data + len;
7173
7174 while (src < end) {
7175 if (*src == '\\') {
7176 src++;
7177 *dst++ = upb_DefPool_ParseEscape(ctx, f, &src, end);
7178 } else {
7179 *dst++ = *src++;
7180 }
7181 }
7182
7183 ret->len = dst - &ret->str[0];
7184 return ret;
7185}
7186
7187static void parse_default(symtab_addctx* ctx, const char* str, size_t len,
7188 upb_FieldDef* f) {
7189 char* end;
Joshua Haberman9abf6e22021-01-13 12:16:25 -08007190 char nullz[64];
7191 errno = 0;
7192
Joshua Habermanf41049a2022-01-21 14:41:25 -08007193 switch (upb_FieldDef_CType(f)) {
7194 case kUpb_CType_Int32:
7195 case kUpb_CType_Int64:
7196 case kUpb_CType_UInt32:
7197 case kUpb_CType_UInt64:
7198 case kUpb_CType_Double:
7199 case kUpb_CType_Float:
Joshua Haberman9abf6e22021-01-13 12:16:25 -08007200 /* Standard C number parsing functions expect null-terminated strings. */
7201 if (len >= sizeof(nullz) - 1) {
7202 symtab_errf(ctx, "Default too long: %.*s", (int)len, str);
7203 }
7204 memcpy(nullz, str, len);
7205 nullz[len] = '\0';
7206 str = nullz;
7207 break;
7208 default:
7209 break;
7210 }
7211
Joshua Habermanf41049a2022-01-21 14:41:25 -08007212 switch (upb_FieldDef_CType(f)) {
7213 case kUpb_CType_Int32: {
Joshua Haberman9abf6e22021-01-13 12:16:25 -08007214 long val = strtol(str, &end, 0);
7215 if (val > INT32_MAX || val < INT32_MIN || errno == ERANGE || *end) {
7216 goto invalid;
7217 }
7218 f->defaultval.sint = val;
7219 break;
7220 }
Joshua Habermanf41049a2022-01-21 14:41:25 -08007221 case kUpb_CType_Enum: {
7222 const upb_EnumDef* e = f->sub.enumdef;
7223 const upb_EnumValueDef* ev =
7224 upb_EnumDef_FindValueByNameWithSize(e, str, len);
7225 if (!ev) {
Joshua Haberman9abf6e22021-01-13 12:16:25 -08007226 goto invalid;
7227 }
Joshua Habermanf41049a2022-01-21 14:41:25 -08007228 f->defaultval.sint = ev->number;
Joshua Haberman9abf6e22021-01-13 12:16:25 -08007229 break;
7230 }
Joshua Habermanf41049a2022-01-21 14:41:25 -08007231 case kUpb_CType_Int64: {
Joshua Habermandd69a482021-05-17 22:40:33 -07007232 long long val = strtoll(str, &end, 0);
Joshua Haberman9abf6e22021-01-13 12:16:25 -08007233 if (val > INT64_MAX || val < INT64_MIN || errno == ERANGE || *end) {
7234 goto invalid;
7235 }
7236 f->defaultval.sint = val;
7237 break;
7238 }
Joshua Habermanf41049a2022-01-21 14:41:25 -08007239 case kUpb_CType_UInt32: {
Joshua Haberman9abf6e22021-01-13 12:16:25 -08007240 unsigned long val = strtoul(str, &end, 0);
7241 if (val > UINT32_MAX || errno == ERANGE || *end) {
7242 goto invalid;
7243 }
7244 f->defaultval.uint = val;
7245 break;
7246 }
Joshua Habermanf41049a2022-01-21 14:41:25 -08007247 case kUpb_CType_UInt64: {
Joshua Habermandd69a482021-05-17 22:40:33 -07007248 unsigned long long val = strtoull(str, &end, 0);
Joshua Haberman9abf6e22021-01-13 12:16:25 -08007249 if (val > UINT64_MAX || errno == ERANGE || *end) {
7250 goto invalid;
7251 }
7252 f->defaultval.uint = val;
7253 break;
7254 }
Joshua Habermanf41049a2022-01-21 14:41:25 -08007255 case kUpb_CType_Double: {
Joshua Haberman9abf6e22021-01-13 12:16:25 -08007256 double val = strtod(str, &end);
7257 if (errno == ERANGE || *end) {
7258 goto invalid;
7259 }
7260 f->defaultval.dbl = val;
7261 break;
7262 }
Joshua Habermanf41049a2022-01-21 14:41:25 -08007263 case kUpb_CType_Float: {
Joshua Habermandd69a482021-05-17 22:40:33 -07007264 float val = strtof(str, &end);
Joshua Haberman9abf6e22021-01-13 12:16:25 -08007265 if (errno == ERANGE || *end) {
7266 goto invalid;
7267 }
7268 f->defaultval.flt = val;
7269 break;
7270 }
Joshua Habermanf41049a2022-01-21 14:41:25 -08007271 case kUpb_CType_Bool: {
Joshua Haberman9abf6e22021-01-13 12:16:25 -08007272 if (streql2(str, len, "false")) {
7273 f->defaultval.boolean = false;
7274 } else if (streql2(str, len, "true")) {
7275 f->defaultval.boolean = true;
7276 } else {
Joshua Habermanf41049a2022-01-21 14:41:25 -08007277 goto invalid;
Joshua Haberman9abf6e22021-01-13 12:16:25 -08007278 }
7279 break;
7280 }
Joshua Habermanf41049a2022-01-21 14:41:25 -08007281 case kUpb_CType_String:
Joshua Haberman9abf6e22021-01-13 12:16:25 -08007282 f->defaultval.str = newstr(ctx, str, len);
7283 break;
Joshua Habermanf41049a2022-01-21 14:41:25 -08007284 case kUpb_CType_Bytes:
7285 f->defaultval.str = unescape(ctx, f, str, len);
Joshua Haberman9abf6e22021-01-13 12:16:25 -08007286 break;
Joshua Habermanf41049a2022-01-21 14:41:25 -08007287 case kUpb_CType_Message:
Joshua Haberman9abf6e22021-01-13 12:16:25 -08007288 /* Should not have a default value. */
7289 symtab_errf(ctx, "Message should not have a default (%s)",
Joshua Habermanf41049a2022-01-21 14:41:25 -08007290 upb_FieldDef_FullName(f));
Joshua Haberman9abf6e22021-01-13 12:16:25 -08007291 }
7292
7293 return;
7294
7295invalid:
Joshua Habermanf41049a2022-01-21 14:41:25 -08007296 symtab_errf(ctx, "Invalid default '%.*s' for field %s of type %d", (int)len,
7297 str, upb_FieldDef_FullName(f), (int)upb_FieldDef_Type(f));
Joshua Haberman9abf6e22021-01-13 12:16:25 -08007298}
7299
Joshua Habermanf41049a2022-01-21 14:41:25 -08007300static void set_default_default(symtab_addctx* ctx, upb_FieldDef* f) {
7301 switch (upb_FieldDef_CType(f)) {
7302 case kUpb_CType_Int32:
7303 case kUpb_CType_Int64:
Joshua Haberman9abf6e22021-01-13 12:16:25 -08007304 f->defaultval.sint = 0;
7305 break;
Joshua Habermanf41049a2022-01-21 14:41:25 -08007306 case kUpb_CType_UInt64:
7307 case kUpb_CType_UInt32:
Joshua Haberman9abf6e22021-01-13 12:16:25 -08007308 f->defaultval.uint = 0;
7309 break;
Joshua Habermanf41049a2022-01-21 14:41:25 -08007310 case kUpb_CType_Double:
7311 case kUpb_CType_Float:
Joshua Haberman9abf6e22021-01-13 12:16:25 -08007312 f->defaultval.dbl = 0;
7313 break;
Joshua Habermanf41049a2022-01-21 14:41:25 -08007314 case kUpb_CType_String:
7315 case kUpb_CType_Bytes:
Joshua Haberman9abf6e22021-01-13 12:16:25 -08007316 f->defaultval.str = newstr(ctx, NULL, 0);
7317 break;
Joshua Habermanf41049a2022-01-21 14:41:25 -08007318 case kUpb_CType_Bool:
Joshua Haberman9abf6e22021-01-13 12:16:25 -08007319 f->defaultval.boolean = false;
7320 break;
Joshua Habermanf41049a2022-01-21 14:41:25 -08007321 case kUpb_CType_Enum:
7322 f->defaultval.sint = f->sub.enumdef->values[0].number;
7323 case kUpb_CType_Message:
Joshua Haberman9abf6e22021-01-13 12:16:25 -08007324 break;
7325 }
7326}
7327
7328static void create_fielddef(
Joshua Habermanf41049a2022-01-21 14:41:25 -08007329 symtab_addctx* ctx, const char* prefix, upb_MessageDef* m,
7330 const google_protobuf_FieldDescriptorProto* field_proto,
7331 const upb_FieldDef* _f, bool is_extension) {
7332 upb_FieldDef* f = (upb_FieldDef*)_f;
7333 upb_StringView name;
7334 const char* full_name;
7335 const char* json_name;
7336 const char* shortname;
7337 int32_t field_number;
7338
7339 f->file = ctx->file; /* Must happen prior to symtab_add(). */
Joshua Haberman9abf6e22021-01-13 12:16:25 -08007340
7341 if (!google_protobuf_FieldDescriptorProto_has_name(field_proto)) {
Joshua Haberman7ecf43f2022-03-14 13:11:29 -07007342 symtab_errf(ctx, "field has no name");
Joshua Haberman9abf6e22021-01-13 12:16:25 -08007343 }
7344
7345 name = google_protobuf_FieldDescriptorProto_name(field_proto);
7346 check_ident(ctx, name, false);
7347 full_name = makefullname(ctx, prefix, name);
7348 shortname = shortdefname(full_name);
7349
7350 if (google_protobuf_FieldDescriptorProto_has_json_name(field_proto)) {
7351 json_name = strviewdup(
7352 ctx, google_protobuf_FieldDescriptorProto_json_name(field_proto));
Joshua Habermanf41049a2022-01-21 14:41:25 -08007353 f->has_json_name_ = true;
Joshua Haberman9abf6e22021-01-13 12:16:25 -08007354 } else {
7355 json_name = makejsonname(ctx, shortname);
Joshua Habermanf41049a2022-01-21 14:41:25 -08007356 f->has_json_name_ = false;
Joshua Haberman9abf6e22021-01-13 12:16:25 -08007357 }
7358
7359 field_number = google_protobuf_FieldDescriptorProto_number(field_proto);
7360
Joshua Habermanf41049a2022-01-21 14:41:25 -08007361 f->full_name = full_name;
7362 f->json_name = json_name;
7363 f->label_ = (int)google_protobuf_FieldDescriptorProto_label(field_proto);
7364 f->number_ = field_number;
7365 f->scope.oneof = NULL;
7366 f->proto3_optional_ =
7367 google_protobuf_FieldDescriptorProto_proto3_optional(field_proto);
7368
7369 bool has_type = google_protobuf_FieldDescriptorProto_has_type(field_proto);
7370 bool has_type_name =
7371 google_protobuf_FieldDescriptorProto_has_type_name(field_proto);
7372
7373 f->type_ = (int)google_protobuf_FieldDescriptorProto_type(field_proto);
7374
7375 if (has_type) {
7376 switch (f->type_) {
7377 case kUpb_FieldType_Message:
7378 case kUpb_FieldType_Group:
7379 case kUpb_FieldType_Enum:
7380 if (!has_type_name) {
7381 symtab_errf(ctx, "field of type %d requires type name (%s)",
7382 (int)f->type_, full_name);
7383 }
7384 break;
7385 default:
7386 if (has_type_name) {
7387 symtab_errf(ctx, "invalid type for field with type_name set (%s, %d)",
7388 full_name, (int)f->type_);
7389 }
7390 }
7391 } else if (has_type_name) {
7392 f->type_ =
7393 FIELD_TYPE_UNSPECIFIED; // We'll fill this in in resolve_fielddef().
Joshua Haberman9abf6e22021-01-13 12:16:25 -08007394 }
7395
Joshua Habermanf41049a2022-01-21 14:41:25 -08007396 if (!is_extension) {
Joshua Haberman9abf6e22021-01-13 12:16:25 -08007397 /* direct message field. */
Joshua Habermanf41049a2022-01-21 14:41:25 -08007398 upb_value v, field_v, json_v, existing_v;
Joshua Haberman9abf6e22021-01-13 12:16:25 -08007399 size_t json_size;
7400
Joshua Habermanf41049a2022-01-21 14:41:25 -08007401 if (field_number <= 0 || field_number > kUpb_MaxFieldNumber) {
7402 symtab_errf(ctx, "invalid field number (%u)", field_number);
7403 }
7404
7405 f->index_ = f - m->fields;
Joshua Haberman9abf6e22021-01-13 12:16:25 -08007406 f->msgdef = m;
7407 f->is_extension_ = false;
7408
Joshua Haberman9abf6e22021-01-13 12:16:25 -08007409 field_v = pack_def(f, UPB_DEFTYPE_FIELD);
7410 json_v = pack_def(f, UPB_DEFTYPE_FIELD_JSONNAME);
7411 v = upb_value_constptr(f);
7412 json_size = strlen(json_name);
7413
Joshua Habermanf41049a2022-01-21 14:41:25 -08007414 if (upb_strtable_lookup(&m->ntof, shortname, &existing_v)) {
7415 symtab_errf(ctx, "duplicate field name (%s)", shortname);
Joshua Haberman9abf6e22021-01-13 12:16:25 -08007416 }
7417
Joshua Habermanf41049a2022-01-21 14:41:25 -08007418 CHK_OOM(upb_strtable_insert(&m->ntof, name.data, name.size, field_v,
7419 ctx->arena));
7420
7421 if (strcmp(shortname, json_name) != 0) {
7422 if (upb_strtable_lookup(&m->ntof, json_name, &v)) {
7423 symtab_errf(ctx, "duplicate json_name (%s)", json_name);
7424 } else {
7425 CHK_OOM(upb_strtable_insert(&m->ntof, json_name, json_size, json_v,
7426 ctx->arena));
7427 }
7428 }
7429
7430 if (upb_inttable_lookup(&m->itof, field_number, NULL)) {
7431 symtab_errf(ctx, "duplicate field number (%u)", field_number);
7432 }
7433
7434 CHK_OOM(upb_inttable_insert(&m->itof, field_number, v, ctx->arena));
7435
7436 if (ctx->layout) {
7437 const upb_MiniTable_Field* fields = m->layout->fields;
Joshua Haberman9abf6e22021-01-13 12:16:25 -08007438 int count = m->layout->field_count;
7439 bool found = false;
Joshua Habermanf41049a2022-01-21 14:41:25 -08007440 for (int i = 0; i < count; i++) {
Joshua Haberman9abf6e22021-01-13 12:16:25 -08007441 if (fields[i].number == field_number) {
7442 f->layout_index = i;
7443 found = true;
7444 break;
7445 }
7446 }
7447 UPB_ASSERT(found);
7448 }
7449 } else {
7450 /* extension field. */
Joshua Haberman9abf6e22021-01-13 12:16:25 -08007451 f->is_extension_ = true;
Joshua Habermanf41049a2022-01-21 14:41:25 -08007452 f->scope.extension_scope = m;
7453 symtab_add(ctx, full_name, pack_def(f, UPB_DEFTYPE_EXT));
7454 f->layout_index = ctx->ext_count++;
7455 if (ctx->layout) {
7456 UPB_ASSERT(ctx->file->ext_layouts[f->layout_index]->field.number ==
7457 field_number);
7458 }
Joshua Haberman9abf6e22021-01-13 12:16:25 -08007459 }
7460
Joshua Habermanf41049a2022-01-21 14:41:25 -08007461 if (f->type_ < kUpb_FieldType_Double || f->type_ > kUpb_FieldType_SInt64) {
7462 symtab_errf(ctx, "invalid type for field %s (%d)", f->full_name, f->type_);
7463 }
7464
7465 if (f->label_ < kUpb_Label_Optional || f->label_ > kUpb_Label_Repeated) {
7466 symtab_errf(ctx, "invalid label for field %s (%d)", f->full_name,
7467 f->label_);
7468 }
Joshua Haberman9abf6e22021-01-13 12:16:25 -08007469
7470 /* We can't resolve the subdef or (in the case of extensions) the containing
7471 * message yet, because it may not have been defined yet. We stash a pointer
7472 * to the field_proto until later when we can properly resolve it. */
7473 f->sub.unresolved = field_proto;
7474
Joshua Habermanf41049a2022-01-21 14:41:25 -08007475 if (f->label_ == kUpb_Label_Required &&
7476 f->file->syntax == kUpb_Syntax_Proto3) {
Joshua Haberman9abf6e22021-01-13 12:16:25 -08007477 symtab_errf(ctx, "proto3 fields cannot be required (%s)", f->full_name);
7478 }
7479
7480 if (google_protobuf_FieldDescriptorProto_has_oneof_index(field_proto)) {
7481 int oneof_index =
7482 google_protobuf_FieldDescriptorProto_oneof_index(field_proto);
Joshua Habermanf41049a2022-01-21 14:41:25 -08007483 upb_OneofDef* oneof;
Joshua Haberman9abf6e22021-01-13 12:16:25 -08007484 upb_value v = upb_value_constptr(f);
7485
Joshua Habermanf41049a2022-01-21 14:41:25 -08007486 if (upb_FieldDef_Label(f) != kUpb_Label_Optional) {
Joshua Haberman9abf6e22021-01-13 12:16:25 -08007487 symtab_errf(ctx, "fields in oneof must have OPTIONAL label (%s)",
7488 f->full_name);
7489 }
7490
7491 if (!m) {
7492 symtab_errf(ctx, "oneof_index provided for extension field (%s)",
7493 f->full_name);
7494 }
7495
7496 if (oneof_index >= m->oneof_count) {
7497 symtab_errf(ctx, "oneof_index out of range (%s)", f->full_name);
7498 }
7499
Joshua Habermanf41049a2022-01-21 14:41:25 -08007500 oneof = (upb_OneofDef*)&m->oneofs[oneof_index];
7501 f->scope.oneof = oneof;
Joshua Haberman9abf6e22021-01-13 12:16:25 -08007502
7503 oneof->field_count++;
7504 if (f->proto3_optional_) {
7505 oneof->synthetic = true;
7506 }
Joshua Habermandd69a482021-05-17 22:40:33 -07007507 CHK_OOM(upb_inttable_insert(&oneof->itof, f->number_, v, ctx->arena));
7508 CHK_OOM(
7509 upb_strtable_insert(&oneof->ntof, name.data, name.size, v, ctx->arena));
Joshua Haberman9abf6e22021-01-13 12:16:25 -08007510 } else {
Joshua Haberman9abf6e22021-01-13 12:16:25 -08007511 if (f->proto3_optional_) {
7512 symtab_errf(ctx, "field with proto3_optional was not in a oneof (%s)",
7513 f->full_name);
7514 }
7515 }
7516
Joshua Habermanf41049a2022-01-21 14:41:25 -08007517 SET_OPTIONS(f->opts, FieldDescriptorProto, FieldOptions, field_proto);
Joshua Haberman9abf6e22021-01-13 12:16:25 -08007518
Joshua Habermanf41049a2022-01-21 14:41:25 -08007519 if (google_protobuf_FieldOptions_has_packed(f->opts)) {
7520 f->packed_ = google_protobuf_FieldOptions_packed(f->opts);
Joshua Haberman9abf6e22021-01-13 12:16:25 -08007521 } else {
7522 /* Repeated fields default to packed for proto3 only. */
Joshua Habermanf41049a2022-01-21 14:41:25 -08007523 f->packed_ = upb_FieldDef_IsPrimitive(f) &&
7524 f->label_ == kUpb_Label_Repeated &&
7525 f->file->syntax == kUpb_Syntax_Proto3;
7526 }
7527}
7528
7529static void create_service(
7530 symtab_addctx* ctx, const google_protobuf_ServiceDescriptorProto* svc_proto,
7531 const upb_ServiceDef* _s) {
7532 upb_ServiceDef* s = (upb_ServiceDef*)_s;
7533 upb_StringView name;
7534 const google_protobuf_MethodDescriptorProto* const* methods;
7535 size_t i, n;
7536
7537 s->file = ctx->file; /* Must happen prior to symtab_add. */
7538
7539 name = google_protobuf_ServiceDescriptorProto_name(svc_proto);
7540 check_ident(ctx, name, false);
7541 s->full_name = makefullname(ctx, ctx->file->package, name);
7542 symtab_add(ctx, s->full_name, pack_def(s, UPB_DEFTYPE_SERVICE));
7543
7544 methods = google_protobuf_ServiceDescriptorProto_method(svc_proto, &n);
7545
7546 s->method_count = n;
7547 s->methods = symtab_alloc(ctx, sizeof(*s->methods) * n);
7548
7549 SET_OPTIONS(s->opts, ServiceDescriptorProto, ServiceOptions, svc_proto);
7550
7551 for (i = 0; i < n; i++) {
7552 const google_protobuf_MethodDescriptorProto* method_proto = methods[i];
7553 upb_MethodDef* m = (upb_MethodDef*)&s->methods[i];
7554 upb_StringView name =
7555 google_protobuf_MethodDescriptorProto_name(method_proto);
7556
7557 m->service = s;
7558 m->full_name = makefullname(ctx, s->full_name, name);
Joshua Haberman7ecf43f2022-03-14 13:11:29 -07007559 m->index = i;
Joshua Habermanf41049a2022-01-21 14:41:25 -08007560 m->client_streaming =
7561 google_protobuf_MethodDescriptorProto_client_streaming(method_proto);
7562 m->server_streaming =
7563 google_protobuf_MethodDescriptorProto_server_streaming(method_proto);
7564 m->input_type = symtab_resolve(
7565 ctx, m->full_name, m->full_name,
7566 google_protobuf_MethodDescriptorProto_input_type(method_proto),
7567 UPB_DEFTYPE_MSG);
7568 m->output_type = symtab_resolve(
7569 ctx, m->full_name, m->full_name,
7570 google_protobuf_MethodDescriptorProto_output_type(method_proto),
7571 UPB_DEFTYPE_MSG);
7572
7573 SET_OPTIONS(m->opts, MethodDescriptorProto, MethodOptions, method_proto);
7574 }
7575}
7576
7577static int count_bits_debug(uint64_t x) {
7578 // For assertions only, speed does not matter.
7579 int n = 0;
7580 while (x) {
7581 if (x & 1) n++;
7582 x >>= 1;
7583 }
7584 return n;
7585}
7586
Joshua Haberman7ecf43f2022-03-14 13:11:29 -07007587static int compare_int32(const void* a_ptr, const void* b_ptr) {
7588 int32_t a = *(int32_t*)a_ptr;
7589 int32_t b = *(int32_t*)b_ptr;
7590 return a < b ? -1 : (a == b ? 0 : 1);
7591}
7592
Joshua Habermanf41049a2022-01-21 14:41:25 -08007593upb_MiniTable_Enum* create_enumlayout(symtab_addctx* ctx,
7594 const upb_EnumDef* e) {
7595 int n = 0;
7596 uint64_t mask = 0;
7597
7598 for (int i = 0; i < e->value_count; i++) {
7599 uint32_t val = (uint32_t)e->values[i].number;
7600 if (val < 64) {
Joshua Haberman7ecf43f2022-03-14 13:11:29 -07007601 mask |= 1ULL << val;
Joshua Habermanf41049a2022-01-21 14:41:25 -08007602 } else {
7603 n++;
7604 }
Joshua Haberman9abf6e22021-01-13 12:16:25 -08007605 }
7606
Joshua Habermanf41049a2022-01-21 14:41:25 -08007607 int32_t* values = symtab_alloc(ctx, sizeof(*values) * n);
7608
7609 if (n) {
7610 int32_t* p = values;
7611
7612 // Add values outside the bitmask range to the list, as described in the
7613 // comments for upb_MiniTable_Enum.
7614 for (int i = 0; i < e->value_count; i++) {
7615 int32_t val = e->values[i].number;
7616 if ((uint32_t)val >= 64) {
7617 *p++ = val;
7618 }
7619 }
7620 UPB_ASSERT(p == values + n);
7621 }
7622
Joshua Haberman7ecf43f2022-03-14 13:11:29 -07007623 // Enums can have duplicate values; we must sort+uniq them.
7624 if (values) qsort(values, n, sizeof(*values), &compare_int32);
7625
7626 int dst = 0;
7627 for (int i = 0; i < n; dst++) {
7628 int32_t val = values[i];
7629 while (i < n && values[i] == val) i++; // Skip duplicates.
7630 values[dst] = val;
7631 }
7632 n = dst;
7633
Joshua Habermanf41049a2022-01-21 14:41:25 -08007634 UPB_ASSERT(upb_inttable_count(&e->iton) == n + count_bits_debug(mask));
7635
7636 upb_MiniTable_Enum* layout = symtab_alloc(ctx, sizeof(*layout));
7637 layout->value_count = n;
7638 layout->mask = mask;
7639 layout->values = values;
7640
7641 return layout;
7642}
7643
7644static void create_enumvaldef(
7645 symtab_addctx* ctx, const char* prefix,
7646 const google_protobuf_EnumValueDescriptorProto* val_proto, upb_EnumDef* e,
7647 int i) {
7648 upb_EnumValueDef* val = (upb_EnumValueDef*)&e->values[i];
7649 upb_StringView name =
7650 google_protobuf_EnumValueDescriptorProto_name(val_proto);
7651 upb_value v = upb_value_constptr(val);
7652
7653 val->parent = e; /* Must happen prior to symtab_add(). */
7654 val->full_name = makefullname(ctx, prefix, name);
7655 val->number = google_protobuf_EnumValueDescriptorProto_number(val_proto);
7656 symtab_add(ctx, val->full_name, pack_def(val, UPB_DEFTYPE_ENUMVAL));
7657
7658 SET_OPTIONS(val->opts, EnumValueDescriptorProto, EnumValueOptions, val_proto);
7659
7660 if (i == 0 && e->file->syntax == kUpb_Syntax_Proto3 && val->number != 0) {
7661 symtab_errf(ctx, "for proto3, the first enum value must be zero (%s)",
7662 e->full_name);
7663 }
7664
7665 CHK_OOM(upb_strtable_insert(&e->ntoi, name.data, name.size, v, ctx->arena));
7666
7667 // Multiple enumerators can have the same number, first one wins.
7668 if (!upb_inttable_lookup(&e->iton, val->number, NULL)) {
7669 CHK_OOM(upb_inttable_insert(&e->iton, val->number, v, ctx->arena));
Joshua Haberman9abf6e22021-01-13 12:16:25 -08007670 }
7671}
7672
7673static void create_enumdef(
Joshua Habermanf41049a2022-01-21 14:41:25 -08007674 symtab_addctx* ctx, const char* prefix,
7675 const google_protobuf_EnumDescriptorProto* enum_proto,
7676 const upb_MessageDef* containing_type, const upb_EnumDef* _e) {
7677 upb_EnumDef* e = (upb_EnumDef*)_e;
7678 ;
7679 const google_protobuf_EnumValueDescriptorProto* const* values;
7680 upb_StringView name;
Joshua Haberman9abf6e22021-01-13 12:16:25 -08007681 size_t i, n;
7682
Joshua Habermanf41049a2022-01-21 14:41:25 -08007683 e->file = ctx->file; /* Must happen prior to symtab_add() */
7684 e->containing_type = containing_type;
7685
Joshua Haberman9abf6e22021-01-13 12:16:25 -08007686 name = google_protobuf_EnumDescriptorProto_name(enum_proto);
7687 check_ident(ctx, name, false);
7688
Joshua Haberman9abf6e22021-01-13 12:16:25 -08007689 e->full_name = makefullname(ctx, prefix, name);
7690 symtab_add(ctx, e->full_name, pack_def(e, UPB_DEFTYPE_ENUM));
7691
7692 values = google_protobuf_EnumDescriptorProto_value(enum_proto, &n);
Joshua Habermandd69a482021-05-17 22:40:33 -07007693 CHK_OOM(upb_strtable_init(&e->ntoi, n, ctx->arena));
7694 CHK_OOM(upb_inttable_init(&e->iton, ctx->arena));
Joshua Haberman9abf6e22021-01-13 12:16:25 -08007695
Joshua Haberman9abf6e22021-01-13 12:16:25 -08007696 e->defaultval = 0;
Joshua Habermanf41049a2022-01-21 14:41:25 -08007697 e->value_count = n;
7698 e->values = symtab_alloc(ctx, sizeof(*e->values) * n);
Joshua Haberman9abf6e22021-01-13 12:16:25 -08007699
7700 if (n == 0) {
7701 symtab_errf(ctx, "enums must contain at least one value (%s)",
7702 e->full_name);
7703 }
7704
Joshua Habermanf41049a2022-01-21 14:41:25 -08007705 SET_OPTIONS(e->opts, EnumDescriptorProto, EnumOptions, enum_proto);
7706
Joshua Haberman9abf6e22021-01-13 12:16:25 -08007707 for (i = 0; i < n; i++) {
Joshua Habermanf41049a2022-01-21 14:41:25 -08007708 create_enumvaldef(ctx, prefix, values[i], e, i);
Joshua Haberman9abf6e22021-01-13 12:16:25 -08007709 }
7710
Joshua Habermandd69a482021-05-17 22:40:33 -07007711 upb_inttable_compact(&e->iton, ctx->arena);
Joshua Habermanf41049a2022-01-21 14:41:25 -08007712
7713 if (e->file->syntax == kUpb_Syntax_Proto2) {
7714 if (ctx->layout) {
7715 UPB_ASSERT(ctx->enum_count < ctx->layout->enum_count);
7716 e->layout = ctx->layout->enums[ctx->enum_count++];
Joshua Haberman7ecf43f2022-03-14 13:11:29 -07007717 UPB_ASSERT(upb_inttable_count(&e->iton) ==
Joshua Habermanf41049a2022-01-21 14:41:25 -08007718 e->layout->value_count + count_bits_debug(e->layout->mask));
7719 } else {
7720 e->layout = create_enumlayout(ctx, e);
7721 }
7722 } else {
7723 e->layout = NULL;
7724 }
Joshua Haberman9abf6e22021-01-13 12:16:25 -08007725}
7726
Joshua Habermanf41049a2022-01-21 14:41:25 -08007727static void msgdef_create_nested(
7728 symtab_addctx* ctx, const google_protobuf_DescriptorProto* msg_proto,
7729 upb_MessageDef* m);
7730
7731static void create_msgdef(symtab_addctx* ctx, const char* prefix,
7732 const google_protobuf_DescriptorProto* msg_proto,
7733 const upb_MessageDef* containing_type,
7734 const upb_MessageDef* _m) {
7735 upb_MessageDef* m = (upb_MessageDef*)_m;
7736 const google_protobuf_OneofDescriptorProto* const* oneofs;
7737 const google_protobuf_FieldDescriptorProto* const* fields;
7738 const google_protobuf_DescriptorProto_ExtensionRange* const* ext_ranges;
7739 size_t i, n_oneof, n_field, n_ext_range;
7740 upb_StringView name;
7741
7742 m->file = ctx->file; /* Must happen prior to symtab_add(). */
7743 m->containing_type = containing_type;
Joshua Haberman9abf6e22021-01-13 12:16:25 -08007744
7745 name = google_protobuf_DescriptorProto_name(msg_proto);
7746 check_ident(ctx, name, false);
7747
Joshua Haberman9abf6e22021-01-13 12:16:25 -08007748 m->full_name = makefullname(ctx, prefix, name);
7749 symtab_add(ctx, m->full_name, pack_def(m, UPB_DEFTYPE_MSG));
7750
7751 oneofs = google_protobuf_DescriptorProto_oneof_decl(msg_proto, &n_oneof);
7752 fields = google_protobuf_DescriptorProto_field(msg_proto, &n_field);
Joshua Habermanf41049a2022-01-21 14:41:25 -08007753 ext_ranges =
7754 google_protobuf_DescriptorProto_extension_range(msg_proto, &n_ext_range);
Joshua Haberman9abf6e22021-01-13 12:16:25 -08007755
Joshua Habermandd69a482021-05-17 22:40:33 -07007756 CHK_OOM(upb_inttable_init(&m->itof, ctx->arena));
7757 CHK_OOM(upb_strtable_init(&m->ntof, n_oneof + n_field, ctx->arena));
Joshua Haberman9abf6e22021-01-13 12:16:25 -08007758
Joshua Habermanf41049a2022-01-21 14:41:25 -08007759 if (ctx->layout) {
7760 /* create_fielddef() below depends on this being set. */
7761 UPB_ASSERT(ctx->msg_count < ctx->layout->msg_count);
7762 m->layout = ctx->layout->msgs[ctx->msg_count++];
7763 UPB_ASSERT(n_field == m->layout->field_count);
Joshua Haberman9abf6e22021-01-13 12:16:25 -08007764 } else {
7765 /* Allocate now (to allow cross-linking), populate later. */
Joshua Habermanf41049a2022-01-21 14:41:25 -08007766 m->layout =
7767 symtab_alloc(ctx, sizeof(*m->layout) + sizeof(_upb_FastTable_Entry));
Joshua Haberman9abf6e22021-01-13 12:16:25 -08007768 }
7769
Joshua Habermanf41049a2022-01-21 14:41:25 -08007770 SET_OPTIONS(m->opts, DescriptorProto, MessageOptions, msg_proto);
7771
7772 m->oneof_count = n_oneof;
Joshua Haberman9abf6e22021-01-13 12:16:25 -08007773 m->oneofs = symtab_alloc(ctx, sizeof(*m->oneofs) * n_oneof);
7774 for (i = 0; i < n_oneof; i++) {
Joshua Habermanf41049a2022-01-21 14:41:25 -08007775 create_oneofdef(ctx, m, oneofs[i], &m->oneofs[i]);
Joshua Haberman9abf6e22021-01-13 12:16:25 -08007776 }
7777
Joshua Habermanf41049a2022-01-21 14:41:25 -08007778 m->field_count = n_field;
Joshua Haberman9abf6e22021-01-13 12:16:25 -08007779 m->fields = symtab_alloc(ctx, sizeof(*m->fields) * n_field);
7780 for (i = 0; i < n_field; i++) {
Joshua Habermanf41049a2022-01-21 14:41:25 -08007781 create_fielddef(ctx, m->full_name, m, fields[i], &m->fields[i],
7782 /* is_extension= */ false);
7783 }
7784
7785 m->ext_range_count = n_ext_range;
7786 m->ext_ranges = symtab_alloc(ctx, sizeof(*m->ext_ranges) * n_ext_range);
7787 for (i = 0; i < n_ext_range; i++) {
7788 const google_protobuf_DescriptorProto_ExtensionRange* r = ext_ranges[i];
7789 upb_ExtensionRange* r_def = (upb_ExtensionRange*)&m->ext_ranges[i];
7790 int32_t start = google_protobuf_DescriptorProto_ExtensionRange_start(r);
7791 int32_t end = google_protobuf_DescriptorProto_ExtensionRange_end(r);
7792 int32_t max =
7793 google_protobuf_MessageOptions_message_set_wire_format(m->opts)
7794 ? INT32_MAX
7795 : kUpb_MaxFieldNumber + 1;
7796
7797 // A full validation would also check that each range is disjoint, and that
7798 // none of the fields overlap with the extension ranges, but we are just
7799 // sanity checking here.
7800 if (start < 1 || end <= start || end > max) {
7801 symtab_errf(ctx, "Extension range (%d, %d) is invalid, message=%s\n",
7802 (int)start, (int)end, m->full_name);
7803 }
7804
7805 r_def->start = start;
7806 r_def->end = end;
7807 SET_OPTIONS(r_def->opts, DescriptorProto_ExtensionRange,
7808 ExtensionRangeOptions, r);
Joshua Haberman9abf6e22021-01-13 12:16:25 -08007809 }
7810
Joshua Haberman9abf6e22021-01-13 12:16:25 -08007811 finalize_oneofs(ctx, m);
7812 assign_msg_wellknowntype(m);
Joshua Habermandd69a482021-05-17 22:40:33 -07007813 upb_inttable_compact(&m->itof, ctx->arena);
Joshua Habermanf41049a2022-01-21 14:41:25 -08007814 msgdef_create_nested(ctx, msg_proto, m);
7815}
Joshua Haberman9abf6e22021-01-13 12:16:25 -08007816
Joshua Habermanf41049a2022-01-21 14:41:25 -08007817static void msgdef_create_nested(
7818 symtab_addctx* ctx, const google_protobuf_DescriptorProto* msg_proto,
7819 upb_MessageDef* m) {
7820 size_t n;
Joshua Haberman9abf6e22021-01-13 12:16:25 -08007821
Joshua Habermanf41049a2022-01-21 14:41:25 -08007822 const google_protobuf_EnumDescriptorProto* const* enums =
7823 google_protobuf_DescriptorProto_enum_type(msg_proto, &n);
7824 m->nested_enum_count = n;
7825 m->nested_enums = symtab_alloc(ctx, sizeof(*m->nested_enums) * n);
7826 for (size_t i = 0; i < n; i++) {
7827 m->nested_enum_count = i + 1;
7828 create_enumdef(ctx, m->full_name, enums[i], m, &m->nested_enums[i]);
Joshua Haberman9abf6e22021-01-13 12:16:25 -08007829 }
7830
Joshua Habermanf41049a2022-01-21 14:41:25 -08007831 const google_protobuf_FieldDescriptorProto* const* exts =
7832 google_protobuf_DescriptorProto_extension(msg_proto, &n);
7833 m->nested_ext_count = n;
7834 m->nested_exts = symtab_alloc(ctx, sizeof(*m->nested_exts) * n);
7835 for (size_t i = 0; i < n; i++) {
7836 create_fielddef(ctx, m->full_name, m, exts[i], &m->nested_exts[i],
7837 /* is_extension= */ true);
7838 ((upb_FieldDef*)&m->nested_exts[i])->index_ = i;
7839 }
7840
7841 const google_protobuf_DescriptorProto* const* msgs =
7842 google_protobuf_DescriptorProto_nested_type(msg_proto, &n);
7843 m->nested_msg_count = n;
7844 m->nested_msgs = symtab_alloc(ctx, sizeof(*m->nested_msgs) * n);
7845 for (size_t i = 0; i < n; i++) {
7846 create_msgdef(ctx, m->full_name, msgs[i], m, &m->nested_msgs[i]);
Joshua Haberman9abf6e22021-01-13 12:16:25 -08007847 }
7848}
7849
Joshua Habermanf41049a2022-01-21 14:41:25 -08007850static void resolve_subdef(symtab_addctx* ctx, const char* prefix,
7851 upb_FieldDef* f) {
7852 const google_protobuf_FieldDescriptorProto* field_proto = f->sub.unresolved;
7853 upb_StringView name =
7854 google_protobuf_FieldDescriptorProto_type_name(field_proto);
7855 bool has_name =
7856 google_protobuf_FieldDescriptorProto_has_type_name(field_proto);
7857 switch ((int)f->type_) {
7858 case FIELD_TYPE_UNSPECIFIED: {
7859 // Type was not specified and must be inferred.
7860 UPB_ASSERT(has_name);
7861 upb_deftype_t type;
7862 const void* def =
7863 symtab_resolveany(ctx, f->full_name, prefix, name, &type);
7864 switch (type) {
7865 case UPB_DEFTYPE_ENUM:
7866 f->sub.enumdef = def;
7867 f->type_ = kUpb_FieldType_Enum;
7868 break;
7869 case UPB_DEFTYPE_MSG:
7870 f->sub.msgdef = def;
7871 f->type_ = kUpb_FieldType_Message; // It appears there is no way of
7872 // this being a group.
7873 break;
7874 default:
7875 symtab_errf(ctx, "Couldn't resolve type name for field %s",
7876 f->full_name);
7877 }
Joshua Haberman9abf6e22021-01-13 12:16:25 -08007878 }
Joshua Habermanf41049a2022-01-21 14:41:25 -08007879 case kUpb_FieldType_Message:
7880 case kUpb_FieldType_Group:
7881 UPB_ASSERT(has_name);
7882 f->sub.msgdef =
7883 symtab_resolve(ctx, f->full_name, prefix, name, UPB_DEFTYPE_MSG);
7884 break;
7885 case kUpb_FieldType_Enum:
7886 UPB_ASSERT(has_name);
7887 f->sub.enumdef =
7888 symtab_resolve(ctx, f->full_name, prefix, name, UPB_DEFTYPE_ENUM);
7889 break;
7890 default:
7891 // No resolution necessary.
7892 break;
7893 }
7894}
Joshua Haberman9abf6e22021-01-13 12:16:25 -08007895
Joshua Habermanf41049a2022-01-21 14:41:25 -08007896static void resolve_extension(
7897 symtab_addctx* ctx, const char* prefix, upb_FieldDef* f,
7898 const google_protobuf_FieldDescriptorProto* field_proto) {
7899 if (!google_protobuf_FieldDescriptorProto_has_extendee(field_proto)) {
7900 symtab_errf(ctx, "extension for field '%s' had no extendee", f->full_name);
Joshua Haberman9abf6e22021-01-13 12:16:25 -08007901 }
7902
Joshua Habermanf41049a2022-01-21 14:41:25 -08007903 upb_StringView name =
7904 google_protobuf_FieldDescriptorProto_extendee(field_proto);
7905 const upb_MessageDef* m =
7906 symtab_resolve(ctx, f->full_name, prefix, name, UPB_DEFTYPE_MSG);
7907 f->msgdef = m;
7908
7909 bool found = false;
7910
7911 for (int i = 0, n = m->ext_range_count; i < n; i++) {
7912 const upb_ExtensionRange* r = &m->ext_ranges[i];
7913 if (r->start <= f->number_ && f->number_ < r->end) {
7914 found = true;
7915 break;
7916 }
Joshua Haberman9abf6e22021-01-13 12:16:25 -08007917 }
7918
Joshua Habermanf41049a2022-01-21 14:41:25 -08007919 if (!found) {
7920 symtab_errf(ctx,
7921 "field number %u in extension %s has no extension range in "
7922 "message %s",
7923 (unsigned)f->number_, f->full_name, f->msgdef->full_name);
Joshua Haberman9abf6e22021-01-13 12:16:25 -08007924 }
7925
Joshua Habermanf41049a2022-01-21 14:41:25 -08007926 const upb_MiniTable_Extension* ext = ctx->file->ext_layouts[f->layout_index];
7927 if (ctx->layout) {
7928 UPB_ASSERT(upb_FieldDef_Number(f) == ext->field.number);
7929 } else {
7930 upb_MiniTable_Extension* mut_ext = (upb_MiniTable_Extension*)ext;
7931 fill_fieldlayout(&mut_ext->field, f);
7932 mut_ext->field.presence = 0;
7933 mut_ext->field.offset = 0;
7934 mut_ext->field.submsg_index = 0;
7935 mut_ext->extendee = f->msgdef->layout;
7936 mut_ext->sub.submsg = f->sub.msgdef->layout;
7937 }
7938
7939 CHK_OOM(upb_inttable_insert(&ctx->symtab->exts, (uintptr_t)ext,
7940 upb_value_constptr(f), ctx->arena));
7941}
7942
7943static void resolve_default(
7944 symtab_addctx* ctx, upb_FieldDef* f,
7945 const google_protobuf_FieldDescriptorProto* field_proto) {
7946 // Have to delay resolving of the default value until now because of the enum
7947 // case, since enum defaults are specified with a label.
Joshua Haberman9abf6e22021-01-13 12:16:25 -08007948 if (google_protobuf_FieldDescriptorProto_has_default_value(field_proto)) {
Joshua Habermanf41049a2022-01-21 14:41:25 -08007949 upb_StringView defaultval =
Joshua Haberman9abf6e22021-01-13 12:16:25 -08007950 google_protobuf_FieldDescriptorProto_default_value(field_proto);
7951
Joshua Habermanf41049a2022-01-21 14:41:25 -08007952 if (f->file->syntax == kUpb_Syntax_Proto3) {
Joshua Haberman9abf6e22021-01-13 12:16:25 -08007953 symtab_errf(ctx, "proto3 fields cannot have explicit defaults (%s)",
7954 f->full_name);
7955 }
7956
Joshua Habermanf41049a2022-01-21 14:41:25 -08007957 if (upb_FieldDef_IsSubMessage(f)) {
Joshua Haberman9abf6e22021-01-13 12:16:25 -08007958 symtab_errf(ctx, "message fields cannot have explicit defaults (%s)",
7959 f->full_name);
7960 }
7961
7962 parse_default(ctx, defaultval.data, defaultval.size, f);
Joshua Habermanf41049a2022-01-21 14:41:25 -08007963 f->has_default = true;
Joshua Haberman9abf6e22021-01-13 12:16:25 -08007964 } else {
7965 set_default_default(ctx, f);
Joshua Habermanf41049a2022-01-21 14:41:25 -08007966 f->has_default = false;
Joshua Haberman9abf6e22021-01-13 12:16:25 -08007967 }
7968}
7969
Joshua Habermanf41049a2022-01-21 14:41:25 -08007970static void resolve_fielddef(symtab_addctx* ctx, const char* prefix,
7971 upb_FieldDef* f) {
7972 // We have to stash this away since resolve_subdef() may overwrite it.
7973 const google_protobuf_FieldDescriptorProto* field_proto = f->sub.unresolved;
7974
7975 resolve_subdef(ctx, prefix, f);
7976 resolve_default(ctx, f, field_proto);
7977
7978 if (f->is_extension_) {
7979 resolve_extension(ctx, prefix, f, field_proto);
7980 }
7981}
7982
7983static void resolve_msgdef(symtab_addctx* ctx, upb_MessageDef* m) {
7984 for (int i = 0; i < m->field_count; i++) {
7985 resolve_fielddef(ctx, m->full_name, (upb_FieldDef*)&m->fields[i]);
7986 }
7987
Joshua Habermanf41049a2022-01-21 14:41:25 -08007988 m->in_message_set = false;
Joshua Haberman7ecf43f2022-03-14 13:11:29 -07007989 for (int i = 0; i < m->nested_ext_count; i++) {
7990 upb_FieldDef* ext = (upb_FieldDef*)&m->nested_exts[i];
7991 resolve_fielddef(ctx, m->full_name, ext);
Joshua Habermanf41049a2022-01-21 14:41:25 -08007992 if (ext->type_ == kUpb_FieldType_Message &&
7993 ext->label_ == kUpb_Label_Optional && ext->sub.msgdef == m &&
7994 google_protobuf_MessageOptions_message_set_wire_format(
7995 ext->msgdef->opts)) {
7996 m->in_message_set = true;
7997 }
7998 }
7999
Joshua Haberman7ecf43f2022-03-14 13:11:29 -07008000 if (!ctx->layout) make_layout(ctx, m);
8001
Joshua Habermanf41049a2022-01-21 14:41:25 -08008002 for (int i = 0; i < m->nested_msg_count; i++) {
8003 resolve_msgdef(ctx, (upb_MessageDef*)&m->nested_msgs[i]);
8004 }
8005}
8006
8007static int count_exts_in_msg(const google_protobuf_DescriptorProto* msg_proto) {
8008 size_t n;
8009 google_protobuf_DescriptorProto_extension(msg_proto, &n);
8010 int ext_count = n;
8011
8012 const google_protobuf_DescriptorProto* const* nested_msgs =
8013 google_protobuf_DescriptorProto_nested_type(msg_proto, &n);
8014 for (size_t i = 0; i < n; i++) {
8015 ext_count += count_exts_in_msg(nested_msgs[i]);
8016 }
8017
8018 return ext_count;
8019}
8020
Joshua Haberman9abf6e22021-01-13 12:16:25 -08008021static void build_filedef(
Joshua Habermanf41049a2022-01-21 14:41:25 -08008022 symtab_addctx* ctx, upb_FileDef* file,
8023 const google_protobuf_FileDescriptorProto* file_proto) {
8024 const google_protobuf_DescriptorProto* const* msgs;
8025 const google_protobuf_EnumDescriptorProto* const* enums;
8026 const google_protobuf_FieldDescriptorProto* const* exts;
8027 const google_protobuf_ServiceDescriptorProto* const* services;
8028 const upb_StringView* strs;
8029 const int32_t* public_deps;
8030 const int32_t* weak_deps;
Joshua Haberman9abf6e22021-01-13 12:16:25 -08008031 size_t i, n;
8032
Joshua Haberman9d578a32021-08-02 15:32:01 -07008033 file->symtab = ctx->symtab;
Joshua Haberman9abf6e22021-01-13 12:16:25 -08008034
Joshua Habermanf41049a2022-01-21 14:41:25 -08008035 /* Count all extensions in the file, to build a flat array of layouts. */
8036 google_protobuf_FileDescriptorProto_extension(file_proto, &n);
8037 int ext_count = n;
8038 msgs = google_protobuf_FileDescriptorProto_message_type(file_proto, &n);
8039 for (int i = 0; i < n; i++) {
8040 ext_count += count_exts_in_msg(msgs[i]);
8041 }
8042 file->ext_count = ext_count;
Joshua Haberman9abf6e22021-01-13 12:16:25 -08008043
Joshua Habermanf41049a2022-01-21 14:41:25 -08008044 if (ctx->layout) {
8045 /* We are using the ext layouts that were passed in. */
8046 file->ext_layouts = ctx->layout->exts;
8047 if (ctx->layout->ext_count != file->ext_count) {
8048 symtab_errf(ctx, "Extension count did not match layout (%d vs %d)",
8049 ctx->layout->ext_count, file->ext_count);
8050 }
8051 } else {
8052 /* We are building ext layouts from scratch. */
8053 file->ext_layouts =
8054 symtab_alloc(ctx, sizeof(*file->ext_layouts) * file->ext_count);
8055 upb_MiniTable_Extension* ext =
8056 symtab_alloc(ctx, sizeof(*ext) * file->ext_count);
8057 for (int i = 0; i < file->ext_count; i++) {
8058 file->ext_layouts[i] = &ext[i];
8059 }
8060 }
Joshua Haberman9abf6e22021-01-13 12:16:25 -08008061
8062 if (!google_protobuf_FileDescriptorProto_has_name(file_proto)) {
8063 symtab_errf(ctx, "File has no name");
8064 }
8065
8066 file->name =
8067 strviewdup(ctx, google_protobuf_FileDescriptorProto_name(file_proto));
Joshua Haberman9abf6e22021-01-13 12:16:25 -08008068
8069 if (google_protobuf_FileDescriptorProto_has_package(file_proto)) {
Joshua Habermanf41049a2022-01-21 14:41:25 -08008070 upb_StringView package =
Joshua Haberman9abf6e22021-01-13 12:16:25 -08008071 google_protobuf_FileDescriptorProto_package(file_proto);
8072 check_ident(ctx, package, true);
8073 file->package = strviewdup(ctx, package);
8074 } else {
8075 file->package = NULL;
8076 }
8077
8078 if (google_protobuf_FileDescriptorProto_has_syntax(file_proto)) {
Joshua Habermanf41049a2022-01-21 14:41:25 -08008079 upb_StringView syntax =
Joshua Haberman9abf6e22021-01-13 12:16:25 -08008080 google_protobuf_FileDescriptorProto_syntax(file_proto);
8081
8082 if (streql_view(syntax, "proto2")) {
Joshua Habermanf41049a2022-01-21 14:41:25 -08008083 file->syntax = kUpb_Syntax_Proto2;
Joshua Haberman9abf6e22021-01-13 12:16:25 -08008084 } else if (streql_view(syntax, "proto3")) {
Joshua Habermanf41049a2022-01-21 14:41:25 -08008085 file->syntax = kUpb_Syntax_Proto3;
Joshua Haberman9abf6e22021-01-13 12:16:25 -08008086 } else {
Joshua Habermanf41049a2022-01-21 14:41:25 -08008087 symtab_errf(ctx, "Invalid syntax '" UPB_STRINGVIEW_FORMAT "'",
8088 UPB_STRINGVIEW_ARGS(syntax));
Joshua Haberman9abf6e22021-01-13 12:16:25 -08008089 }
8090 } else {
Joshua Habermanf41049a2022-01-21 14:41:25 -08008091 file->syntax = kUpb_Syntax_Proto2;
Joshua Haberman9abf6e22021-01-13 12:16:25 -08008092 }
8093
8094 /* Read options. */
Joshua Habermanf41049a2022-01-21 14:41:25 -08008095 SET_OPTIONS(file->opts, FileDescriptorProto, FileOptions, file_proto);
Joshua Haberman9abf6e22021-01-13 12:16:25 -08008096
8097 /* Verify dependencies. */
8098 strs = google_protobuf_FileDescriptorProto_dependency(file_proto, &n);
Joshua Habermanf41049a2022-01-21 14:41:25 -08008099 file->dep_count = n;
Joshua Haberman9abf6e22021-01-13 12:16:25 -08008100 file->deps = symtab_alloc(ctx, sizeof(*file->deps) * n);
8101
8102 for (i = 0; i < n; i++) {
Joshua Habermanf41049a2022-01-21 14:41:25 -08008103 upb_StringView str = strs[i];
8104 file->deps[i] =
8105 upb_DefPool_FindFileByNameWithSize(ctx->symtab, str.data, str.size);
8106 if (!file->deps[i]) {
Joshua Haberman9abf6e22021-01-13 12:16:25 -08008107 symtab_errf(ctx,
Joshua Habermanf41049a2022-01-21 14:41:25 -08008108 "Depends on file '" UPB_STRINGVIEW_FORMAT
Joshua Haberman9abf6e22021-01-13 12:16:25 -08008109 "', but it has not been loaded",
Joshua Habermanf41049a2022-01-21 14:41:25 -08008110 UPB_STRINGVIEW_ARGS(str));
Joshua Haberman9abf6e22021-01-13 12:16:25 -08008111 }
Joshua Haberman9abf6e22021-01-13 12:16:25 -08008112 }
8113
Joshua Habermanf41049a2022-01-21 14:41:25 -08008114 public_deps =
8115 google_protobuf_FileDescriptorProto_public_dependency(file_proto, &n);
8116 file->public_dep_count = n;
8117 file->public_deps = symtab_alloc(ctx, sizeof(*file->public_deps) * n);
8118 int32_t* mutable_public_deps = (int32_t*)file->public_deps;
Joshua Haberman9abf6e22021-01-13 12:16:25 -08008119 for (i = 0; i < n; i++) {
Joshua Habermanf41049a2022-01-21 14:41:25 -08008120 if (public_deps[i] >= file->dep_count) {
8121 symtab_errf(ctx, "public_dep %d is out of range", (int)public_deps[i]);
8122 }
8123 mutable_public_deps[i] = public_deps[i];
8124 }
8125
8126 weak_deps =
8127 google_protobuf_FileDescriptorProto_weak_dependency(file_proto, &n);
8128 file->weak_dep_count = n;
8129 file->weak_deps = symtab_alloc(ctx, sizeof(*file->weak_deps) * n);
8130 int32_t* mutable_weak_deps = (int32_t*)file->weak_deps;
8131 for (i = 0; i < n; i++) {
8132 if (weak_deps[i] >= file->dep_count) {
Joshua Haberman7ecf43f2022-03-14 13:11:29 -07008133 symtab_errf(ctx, "weak_dep %d is out of range", (int)weak_deps[i]);
Joshua Habermanf41049a2022-01-21 14:41:25 -08008134 }
8135 mutable_weak_deps[i] = weak_deps[i];
Joshua Haberman9abf6e22021-01-13 12:16:25 -08008136 }
8137
8138 /* Create enums. */
8139 enums = google_protobuf_FileDescriptorProto_enum_type(file_proto, &n);
Joshua Habermanf41049a2022-01-21 14:41:25 -08008140 file->top_lvl_enum_count = n;
8141 file->top_lvl_enums = symtab_alloc(ctx, sizeof(*file->top_lvl_enums) * n);
Joshua Haberman9abf6e22021-01-13 12:16:25 -08008142 for (i = 0; i < n; i++) {
Joshua Habermanf41049a2022-01-21 14:41:25 -08008143 create_enumdef(ctx, file->package, enums[i], NULL, &file->top_lvl_enums[i]);
Joshua Haberman9abf6e22021-01-13 12:16:25 -08008144 }
8145
8146 /* Create extensions. */
8147 exts = google_protobuf_FileDescriptorProto_extension(file_proto, &n);
Joshua Habermanf41049a2022-01-21 14:41:25 -08008148 file->top_lvl_ext_count = n;
8149 file->top_lvl_exts = symtab_alloc(ctx, sizeof(*file->top_lvl_exts) * n);
Joshua Haberman9abf6e22021-01-13 12:16:25 -08008150 for (i = 0; i < n; i++) {
Joshua Habermanf41049a2022-01-21 14:41:25 -08008151 create_fielddef(ctx, file->package, NULL, exts[i], &file->top_lvl_exts[i],
8152 /* is_extension= */ true);
8153 ((upb_FieldDef*)&file->top_lvl_exts[i])->index_ = i;
8154 }
8155
8156 /* Create messages. */
8157 msgs = google_protobuf_FileDescriptorProto_message_type(file_proto, &n);
8158 file->top_lvl_msg_count = n;
8159 file->top_lvl_msgs = symtab_alloc(ctx, sizeof(*file->top_lvl_msgs) * n);
8160 for (i = 0; i < n; i++) {
8161 create_msgdef(ctx, file->package, msgs[i], NULL, &file->top_lvl_msgs[i]);
8162 }
8163
8164 /* Create services. */
8165 services = google_protobuf_FileDescriptorProto_service(file_proto, &n);
8166 file->service_count = n;
8167 file->services = symtab_alloc(ctx, sizeof(*file->services) * n);
8168 for (i = 0; i < n; i++) {
8169 create_service(ctx, services[i], &file->services[i]);
8170 ((upb_ServiceDef*)&file->services[i])->index = i;
Joshua Haberman9abf6e22021-01-13 12:16:25 -08008171 }
8172
8173 /* Now that all names are in the table, build layouts and resolve refs. */
Joshua Habermanf41049a2022-01-21 14:41:25 -08008174 for (i = 0; i < (size_t)file->top_lvl_ext_count; i++) {
8175 resolve_fielddef(ctx, file->package, (upb_FieldDef*)&file->top_lvl_exts[i]);
Joshua Haberman9abf6e22021-01-13 12:16:25 -08008176 }
8177
Joshua Habermanf41049a2022-01-21 14:41:25 -08008178 for (i = 0; i < (size_t)file->top_lvl_msg_count; i++) {
8179 resolve_msgdef(ctx, (upb_MessageDef*)&file->top_lvl_msgs[i]);
Joshua Haberman9abf6e22021-01-13 12:16:25 -08008180 }
8181
Joshua Habermanf41049a2022-01-21 14:41:25 -08008182 if (file->ext_count) {
8183 CHK_OOM(_upb_extreg_add(ctx->symtab->extreg, file->ext_layouts,
8184 file->ext_count));
Joshua Haberman9abf6e22021-01-13 12:16:25 -08008185 }
8186}
8187
Joshua Habermanf41049a2022-01-21 14:41:25 -08008188static void remove_filedef(upb_DefPool* s, upb_FileDef* file) {
8189 intptr_t iter = UPB_INTTABLE_BEGIN;
8190 upb_StringView key;
8191 upb_value val;
8192 while (upb_strtable_next2(&s->syms, &key, &val, &iter)) {
8193 const upb_FileDef* f;
8194 switch (deftype(val)) {
8195 case UPB_DEFTYPE_EXT:
8196 f = upb_FieldDef_File(unpack_def(val, UPB_DEFTYPE_EXT));
8197 break;
8198 case UPB_DEFTYPE_MSG:
8199 f = upb_MessageDef_File(unpack_def(val, UPB_DEFTYPE_MSG));
8200 break;
8201 case UPB_DEFTYPE_ENUM:
8202 f = upb_EnumDef_File(unpack_def(val, UPB_DEFTYPE_ENUM));
8203 break;
8204 case UPB_DEFTYPE_ENUMVAL:
8205 f = upb_EnumDef_File(
8206 upb_EnumValueDef_Enum(unpack_def(val, UPB_DEFTYPE_ENUMVAL)));
8207 break;
8208 case UPB_DEFTYPE_SERVICE:
8209 f = upb_ServiceDef_File(unpack_def(val, UPB_DEFTYPE_SERVICE));
8210 break;
8211 default:
8212 UPB_UNREACHABLE();
8213 }
8214
8215 if (f == file) upb_strtable_removeiter(&s->syms, &iter);
Joshua Haberman9abf6e22021-01-13 12:16:25 -08008216 }
8217}
8218
Joshua Habermanf41049a2022-01-21 14:41:25 -08008219static const upb_FileDef* _upb_DefPool_AddFile(
8220 upb_DefPool* s, const google_protobuf_FileDescriptorProto* file_proto,
8221 const upb_MiniTable_File* layout, upb_Status* status) {
Joshua Haberman9abf6e22021-01-13 12:16:25 -08008222 symtab_addctx ctx;
Joshua Habermanf41049a2022-01-21 14:41:25 -08008223 upb_StringView name = google_protobuf_FileDescriptorProto_name(file_proto);
8224 upb_value v;
Joshua Haberman9abf6e22021-01-13 12:16:25 -08008225
Joshua Habermanf41049a2022-01-21 14:41:25 -08008226 if (upb_strtable_lookup2(&s->files, name.data, name.size, &v)) {
8227 if (unpack_def(v, UPB_DEFTYPE_FILE)) {
8228 upb_Status_SetErrorFormat(status, "duplicate file name (%.*s)",
8229 UPB_STRINGVIEW_ARGS(name));
8230 return NULL;
8231 }
8232 const upb_MiniTable_File* registered = unpack_def(v, UPB_DEFTYPE_LAYOUT);
8233 UPB_ASSERT(registered);
8234 if (layout && layout != registered) {
8235 upb_Status_SetErrorFormat(
8236 status, "tried to build with a different layout (filename=%.*s)",
8237 UPB_STRINGVIEW_ARGS(name));
8238 return NULL;
8239 }
8240 layout = registered;
Joshua Haberman9d578a32021-08-02 15:32:01 -07008241 }
Joshua Haberman9abf6e22021-01-13 12:16:25 -08008242
Joshua Haberman9abf6e22021-01-13 12:16:25 -08008243 ctx.symtab = s;
Joshua Habermanf41049a2022-01-21 14:41:25 -08008244 ctx.layout = layout;
8245 ctx.msg_count = 0;
8246 ctx.enum_count = 0;
8247 ctx.ext_count = 0;
Joshua Haberman9abf6e22021-01-13 12:16:25 -08008248 ctx.status = status;
Joshua Haberman9d578a32021-08-02 15:32:01 -07008249 ctx.file = NULL;
Joshua Habermanf41049a2022-01-21 14:41:25 -08008250 ctx.arena = upb_Arena_New();
8251 ctx.tmp_arena = upb_Arena_New();
Joshua Haberman9abf6e22021-01-13 12:16:25 -08008252
Joshua Habermanf41049a2022-01-21 14:41:25 -08008253 if (!ctx.arena || !ctx.tmp_arena) {
8254 if (ctx.arena) upb_Arena_Free(ctx.arena);
8255 if (ctx.tmp_arena) upb_Arena_Free(ctx.tmp_arena);
8256 upb_Status_setoom(status);
Joshua Haberman9d578a32021-08-02 15:32:01 -07008257 return NULL;
8258 }
Joshua Haberman9abf6e22021-01-13 12:16:25 -08008259
8260 if (UPB_UNLIKELY(UPB_SETJMP(ctx.err))) {
Joshua Habermanf41049a2022-01-21 14:41:25 -08008261 UPB_ASSERT(!upb_Status_IsOk(status));
Joshua Haberman9d578a32021-08-02 15:32:01 -07008262 if (ctx.file) {
8263 remove_filedef(s, ctx.file);
8264 ctx.file = NULL;
8265 }
Joshua Haberman9abf6e22021-01-13 12:16:25 -08008266 } else {
Joshua Haberman9d578a32021-08-02 15:32:01 -07008267 ctx.file = symtab_alloc(&ctx, sizeof(*ctx.file));
8268 build_filedef(&ctx, ctx.file, file_proto);
8269 upb_strtable_insert(&s->files, name.data, name.size,
Joshua Habermanf41049a2022-01-21 14:41:25 -08008270 pack_def(ctx.file, UPB_DEFTYPE_FILE), ctx.arena);
8271 UPB_ASSERT(upb_Status_IsOk(status));
8272 upb_Arena_Fuse(s->arena, ctx.arena);
Joshua Haberman9abf6e22021-01-13 12:16:25 -08008273 }
8274
Joshua Habermanf41049a2022-01-21 14:41:25 -08008275 upb_Arena_Free(ctx.arena);
8276 upb_Arena_Free(ctx.tmp_arena);
Joshua Haberman9d578a32021-08-02 15:32:01 -07008277 return ctx.file;
Joshua Haberman9abf6e22021-01-13 12:16:25 -08008278}
8279
Joshua Habermanf41049a2022-01-21 14:41:25 -08008280const upb_FileDef* upb_DefPool_AddFile(
8281 upb_DefPool* s, const google_protobuf_FileDescriptorProto* file_proto,
8282 upb_Status* status) {
8283 return _upb_DefPool_AddFile(s, file_proto, NULL, status);
Joshua Haberman9abf6e22021-01-13 12:16:25 -08008284}
8285
8286/* Include here since we want most of this file to be stdio-free. */
8287#include <stdio.h>
8288
Joshua Haberman7ecf43f2022-03-14 13:11:29 -07008289bool _upb_DefPool_LoadDefInitEx(upb_DefPool* s, const _upb_DefPool_Init* init,
8290 bool rebuild_minitable) {
Joshua Haberman9abf6e22021-01-13 12:16:25 -08008291 /* Since this function should never fail (it would indicate a bug in upb) we
8292 * print errors to stderr instead of returning error status to the user. */
Joshua Habermanf41049a2022-01-21 14:41:25 -08008293 _upb_DefPool_Init** deps = init->deps;
8294 google_protobuf_FileDescriptorProto* file;
8295 upb_Arena* arena;
8296 upb_Status status;
Joshua Haberman9abf6e22021-01-13 12:16:25 -08008297
Joshua Habermanf41049a2022-01-21 14:41:25 -08008298 upb_Status_Clear(&status);
Joshua Haberman9abf6e22021-01-13 12:16:25 -08008299
Joshua Habermanf41049a2022-01-21 14:41:25 -08008300 if (upb_DefPool_FindFileByName(s, init->filename)) {
Joshua Haberman9abf6e22021-01-13 12:16:25 -08008301 return true;
8302 }
8303
Joshua Habermanf41049a2022-01-21 14:41:25 -08008304 arena = upb_Arena_New();
Joshua Haberman9abf6e22021-01-13 12:16:25 -08008305
8306 for (; *deps; deps++) {
Joshua Haberman7ecf43f2022-03-14 13:11:29 -07008307 if (!_upb_DefPool_LoadDefInitEx(s, *deps, rebuild_minitable)) goto err;
Joshua Haberman9abf6e22021-01-13 12:16:25 -08008308 }
8309
8310 file = google_protobuf_FileDescriptorProto_parse_ex(
Joshua Habermanf41049a2022-01-21 14:41:25 -08008311 init->descriptor.data, init->descriptor.size, NULL,
8312 kUpb_DecodeOption_AliasString, arena);
Joshua Haberman9abf6e22021-01-13 12:16:25 -08008313 s->bytes_loaded += init->descriptor.size;
8314
8315 if (!file) {
Joshua Habermanf41049a2022-01-21 14:41:25 -08008316 upb_Status_SetErrorFormat(
Joshua Haberman9abf6e22021-01-13 12:16:25 -08008317 &status,
8318 "Failed to parse compiled-in descriptor for file '%s'. This should "
8319 "never happen.",
8320 init->filename);
8321 goto err;
8322 }
8323
Joshua Haberman7ecf43f2022-03-14 13:11:29 -07008324 const upb_MiniTable_File* mt = rebuild_minitable ? NULL : init->layout;
8325 if (!_upb_DefPool_AddFile(s, file, mt, &status)) {
Joshua Habermanf41049a2022-01-21 14:41:25 -08008326 goto err;
8327 }
Joshua Haberman9abf6e22021-01-13 12:16:25 -08008328
Joshua Habermanf41049a2022-01-21 14:41:25 -08008329 upb_Arena_Free(arena);
Joshua Haberman9abf6e22021-01-13 12:16:25 -08008330 return true;
8331
8332err:
Joshua Habermanf41049a2022-01-21 14:41:25 -08008333 fprintf(stderr,
8334 "Error loading compiled-in descriptor for file '%s' (this should "
8335 "never happen): %s\n",
8336 init->filename, upb_Status_ErrorMessage(&status));
8337 upb_Arena_Free(arena);
Joshua Haberman9abf6e22021-01-13 12:16:25 -08008338 return false;
8339}
8340
Joshua Habermanf41049a2022-01-21 14:41:25 -08008341size_t _upb_DefPool_BytesLoaded(const upb_DefPool* s) {
Joshua Haberman9abf6e22021-01-13 12:16:25 -08008342 return s->bytes_loaded;
8343}
8344
Joshua Habermanf41049a2022-01-21 14:41:25 -08008345upb_Arena* _upb_DefPool_Arena(const upb_DefPool* s) { return s->arena; }
8346
8347const upb_FieldDef* _upb_DefPool_FindExtensionByMiniTable(
8348 const upb_DefPool* s, const upb_MiniTable_Extension* ext) {
8349 upb_value v;
8350 bool ok = upb_inttable_lookup(&s->exts, (uintptr_t)ext, &v);
8351 UPB_ASSERT(ok);
8352 return upb_value_getconstptr(v);
8353}
8354
8355const upb_FieldDef* upb_DefPool_FindExtensionByNumber(const upb_DefPool* s,
8356 const upb_MessageDef* m,
8357 int32_t fieldnum) {
8358 const upb_MiniTable* l = upb_MessageDef_MiniTable(m);
8359 const upb_MiniTable_Extension* ext = _upb_extreg_get(s->extreg, l, fieldnum);
8360 return ext ? _upb_DefPool_FindExtensionByMiniTable(s, ext) : NULL;
8361}
8362
8363bool _upb_DefPool_registerlayout(upb_DefPool* s, const char* filename,
8364 const upb_MiniTable_File* file) {
8365 if (upb_DefPool_FindFileByName(s, filename)) return false;
8366 upb_value v = pack_def(file, UPB_DEFTYPE_LAYOUT);
8367 return upb_strtable_insert(&s->files, filename, strlen(filename), v,
8368 s->arena);
8369}
8370
8371const upb_ExtensionRegistry* upb_DefPool_ExtensionRegistry(
8372 const upb_DefPool* s) {
8373 return s->extreg;
8374}
8375
8376const upb_FieldDef** upb_DefPool_GetAllExtensions(const upb_DefPool* s,
8377 const upb_MessageDef* m,
8378 size_t* count) {
8379 size_t n = 0;
8380 intptr_t iter = UPB_INTTABLE_BEGIN;
8381 uintptr_t key;
8382 upb_value val;
8383 // This is O(all exts) instead of O(exts for m). If we need this to be
8384 // efficient we may need to make extreg into a two-level table, or have a
8385 // second per-message index.
8386 while (upb_inttable_next2(&s->exts, &key, &val, &iter)) {
8387 const upb_FieldDef* f = upb_value_getconstptr(val);
8388 if (upb_FieldDef_ContainingType(f) == m) n++;
8389 }
8390 const upb_FieldDef** exts = malloc(n * sizeof(*exts));
8391 iter = UPB_INTTABLE_BEGIN;
8392 size_t i = 0;
8393 while (upb_inttable_next2(&s->exts, &key, &val, &iter)) {
8394 const upb_FieldDef* f = upb_value_getconstptr(val);
8395 if (upb_FieldDef_ContainingType(f) == m) exts[i++] = f;
8396 }
8397 *count = n;
8398 return exts;
Joshua Haberman9abf6e22021-01-13 12:16:25 -08008399}
8400
8401#undef CHK_OOM
8402
Joshua Habermandd69a482021-05-17 22:40:33 -07008403/** upb/reflection.c ************************************************************/
Joshua Haberman9abf6e22021-01-13 12:16:25 -08008404
8405#include <string.h>
8406
8407
Joshua Habermanf41049a2022-01-21 14:41:25 -08008408static size_t get_field_size(const upb_MiniTable_Field* f) {
Joshua Haberman9abf6e22021-01-13 12:16:25 -08008409 static unsigned char sizes[] = {
Joshua Habermanf41049a2022-01-21 14:41:25 -08008410 0, /* 0 */
8411 8, /* kUpb_FieldType_Double */
8412 4, /* kUpb_FieldType_Float */
8413 8, /* kUpb_FieldType_Int64 */
8414 8, /* kUpb_FieldType_UInt64 */
8415 4, /* kUpb_FieldType_Int32 */
8416 8, /* kUpb_FieldType_Fixed64 */
8417 4, /* kUpb_FieldType_Fixed32 */
8418 1, /* kUpb_FieldType_Bool */
8419 sizeof(upb_StringView), /* kUpb_FieldType_String */
8420 sizeof(void*), /* kUpb_FieldType_Group */
8421 sizeof(void*), /* kUpb_FieldType_Message */
8422 sizeof(upb_StringView), /* kUpb_FieldType_Bytes */
8423 4, /* kUpb_FieldType_UInt32 */
8424 4, /* kUpb_FieldType_Enum */
8425 4, /* kUpb_FieldType_SFixed32 */
8426 8, /* kUpb_FieldType_SFixed64 */
8427 4, /* kUpb_FieldType_SInt32 */
8428 8, /* kUpb_FieldType_SInt64 */
Joshua Haberman9abf6e22021-01-13 12:16:25 -08008429 };
Joshua Habermanf41049a2022-01-21 14:41:25 -08008430 return upb_IsRepeatedOrMap(f) ? sizeof(void*) : sizes[f->descriptortype];
Joshua Haberman9abf6e22021-01-13 12:16:25 -08008431}
8432
8433/* Strings/bytes are special-cased in maps. */
Joshua Habermanf41049a2022-01-21 14:41:25 -08008434static char _upb_CTypeo_mapsize[12] = {
8435 0,
8436 1, /* kUpb_CType_Bool */
8437 4, /* kUpb_CType_Float */
8438 4, /* kUpb_CType_Int32 */
8439 4, /* kUpb_CType_UInt32 */
8440 4, /* kUpb_CType_Enum */
8441 sizeof(void*), /* kUpb_CType_Message */
8442 8, /* kUpb_CType_Double */
8443 8, /* kUpb_CType_Int64 */
8444 8, /* kUpb_CType_UInt64 */
8445 0, /* kUpb_CType_String */
8446 0, /* kUpb_CType_Bytes */
Joshua Haberman9abf6e22021-01-13 12:16:25 -08008447};
8448
Joshua Habermanf41049a2022-01-21 14:41:25 -08008449static const char _upb_CTypeo_sizelg2[12] = {
8450 0,
8451 0, /* kUpb_CType_Bool */
8452 2, /* kUpb_CType_Float */
8453 2, /* kUpb_CType_Int32 */
8454 2, /* kUpb_CType_UInt32 */
8455 2, /* kUpb_CType_Enum */
8456 UPB_SIZE(2, 3), /* kUpb_CType_Message */
8457 3, /* kUpb_CType_Double */
8458 3, /* kUpb_CType_Int64 */
8459 3, /* kUpb_CType_UInt64 */
8460 UPB_SIZE(3, 4), /* kUpb_CType_String */
8461 UPB_SIZE(3, 4), /* kUpb_CType_Bytes */
Joshua Haberman9abf6e22021-01-13 12:16:25 -08008462};
8463
Joshua Habermanf41049a2022-01-21 14:41:25 -08008464/** upb_Message
8465 * *******************************************************************/
Joshua Haberman9abf6e22021-01-13 12:16:25 -08008466
Joshua Habermanf41049a2022-01-21 14:41:25 -08008467upb_Message* upb_Message_New(const upb_MessageDef* m, upb_Arena* a) {
8468 return _upb_Message_New(upb_MessageDef_MiniTable(m), a);
Joshua Haberman9abf6e22021-01-13 12:16:25 -08008469}
8470
Joshua Habermanf41049a2022-01-21 14:41:25 -08008471static bool in_oneof(const upb_MiniTable_Field* field) {
Joshua Haberman9abf6e22021-01-13 12:16:25 -08008472 return field->presence < 0;
8473}
8474
Joshua Habermanf41049a2022-01-21 14:41:25 -08008475static upb_MessageValue _upb_Message_Getraw(const upb_Message* msg,
8476 const upb_FieldDef* f) {
8477 const upb_MiniTable_Field* field = upb_FieldDef_MiniTable(f);
8478 const char* mem = UPB_PTR_AT(msg, field->offset, char);
8479 upb_MessageValue val = {0};
Joshua Haberman9abf6e22021-01-13 12:16:25 -08008480 memcpy(&val, mem, get_field_size(field));
8481 return val;
8482}
8483
Joshua Habermanf41049a2022-01-21 14:41:25 -08008484bool upb_Message_Has(const upb_Message* msg, const upb_FieldDef* f) {
8485 assert(upb_FieldDef_HasPresence(f));
8486 if (upb_FieldDef_IsExtension(f)) {
8487 const upb_MiniTable_Extension* ext = _upb_FieldDef_ExtensionMiniTable(f);
8488 return _upb_Message_Getext(msg, ext) != NULL;
Joshua Haberman9abf6e22021-01-13 12:16:25 -08008489 } else {
Joshua Habermanf41049a2022-01-21 14:41:25 -08008490 const upb_MiniTable_Field* field = upb_FieldDef_MiniTable(f);
8491 if (in_oneof(field)) {
8492 return _upb_getoneofcase_field(msg, field) == field->number;
8493 } else if (field->presence > 0) {
8494 return _upb_hasbit_field(msg, field);
8495 } else {
8496 UPB_ASSERT(field->descriptortype == kUpb_FieldType_Message ||
8497 field->descriptortype == kUpb_FieldType_Group);
8498 return _upb_Message_Getraw(msg, f).msg_val != NULL;
8499 }
Joshua Haberman9abf6e22021-01-13 12:16:25 -08008500 }
8501}
8502
Joshua Habermanf41049a2022-01-21 14:41:25 -08008503const upb_FieldDef* upb_Message_WhichOneof(const upb_Message* msg,
8504 const upb_OneofDef* o) {
8505 const upb_FieldDef* f = upb_OneofDef_Field(o, 0);
8506 if (upb_OneofDef_IsSynthetic(o)) {
8507 UPB_ASSERT(upb_OneofDef_FieldCount(o) == 1);
8508 return upb_Message_Has(msg, f) ? f : NULL;
Joshua Haberman9abf6e22021-01-13 12:16:25 -08008509 } else {
Joshua Habermanf41049a2022-01-21 14:41:25 -08008510 const upb_MiniTable_Field* field = upb_FieldDef_MiniTable(f);
Joshua Haberman9abf6e22021-01-13 12:16:25 -08008511 uint32_t oneof_case = _upb_getoneofcase_field(msg, field);
Joshua Habermanf41049a2022-01-21 14:41:25 -08008512 f = oneof_case ? upb_OneofDef_LookupNumber(o, oneof_case) : NULL;
Joshua Haberman9abf6e22021-01-13 12:16:25 -08008513 UPB_ASSERT((f != NULL) == (oneof_case != 0));
8514 return f;
8515 }
8516}
8517
Joshua Habermanf41049a2022-01-21 14:41:25 -08008518upb_MessageValue upb_Message_Get(const upb_Message* msg,
8519 const upb_FieldDef* f) {
8520 if (upb_FieldDef_IsExtension(f)) {
8521 const upb_Message_Extension* ext =
8522 _upb_Message_Getext(msg, _upb_FieldDef_ExtensionMiniTable(f));
8523 if (ext) {
8524 upb_MessageValue val;
8525 memcpy(&val, &ext->data, sizeof(val));
8526 return val;
8527 } else if (upb_FieldDef_IsRepeated(f)) {
8528 return (upb_MessageValue){.array_val = NULL};
8529 }
8530 } else if (!upb_FieldDef_HasPresence(f) || upb_Message_Has(msg, f)) {
8531 return _upb_Message_Getraw(msg, f);
Joshua Haberman9abf6e22021-01-13 12:16:25 -08008532 }
Joshua Habermanf41049a2022-01-21 14:41:25 -08008533 return upb_FieldDef_Default(f);
Joshua Haberman9abf6e22021-01-13 12:16:25 -08008534}
8535
Joshua Habermanf41049a2022-01-21 14:41:25 -08008536upb_MutableMessageValue upb_Message_Mutable(upb_Message* msg,
8537 const upb_FieldDef* f,
8538 upb_Arena* a) {
8539 UPB_ASSERT(upb_FieldDef_IsSubMessage(f) || upb_FieldDef_IsRepeated(f));
8540 if (upb_FieldDef_HasPresence(f) && !upb_Message_Has(msg, f)) {
8541 // We need to skip the upb_Message_Get() call in this case.
8542 goto make;
Joshua Haberman9abf6e22021-01-13 12:16:25 -08008543 }
Joshua Habermanf41049a2022-01-21 14:41:25 -08008544
8545 upb_MessageValue val = upb_Message_Get(msg, f);
8546 if (val.array_val) {
8547 return (upb_MutableMessageValue){.array = (upb_Array*)val.array_val};
8548 }
8549
8550 upb_MutableMessageValue ret;
8551make:
8552 if (!a) return (upb_MutableMessageValue){.array = NULL};
8553 if (upb_FieldDef_IsMap(f)) {
8554 const upb_MessageDef* entry = upb_FieldDef_MessageSubDef(f);
Joshua Haberman7ecf43f2022-03-14 13:11:29 -07008555 const upb_FieldDef* key =
8556 upb_MessageDef_FindFieldByNumber(entry, kUpb_MapEntry_KeyFieldNumber);
8557 const upb_FieldDef* value =
8558 upb_MessageDef_FindFieldByNumber(entry, kUpb_MapEntry_ValueFieldNumber);
Joshua Habermanf41049a2022-01-21 14:41:25 -08008559 ret.map =
8560 upb_Map_New(a, upb_FieldDef_CType(key), upb_FieldDef_CType(value));
8561 } else if (upb_FieldDef_IsRepeated(f)) {
8562 ret.array = upb_Array_New(a, upb_FieldDef_CType(f));
8563 } else {
8564 UPB_ASSERT(upb_FieldDef_IsSubMessage(f));
8565 ret.msg = upb_Message_New(upb_FieldDef_MessageSubDef(f), a);
8566 }
8567
8568 val.array_val = ret.array;
8569 upb_Message_Set(msg, f, val, a);
8570
Joshua Haberman9abf6e22021-01-13 12:16:25 -08008571 return ret;
8572}
8573
Joshua Habermanf41049a2022-01-21 14:41:25 -08008574bool upb_Message_Set(upb_Message* msg, const upb_FieldDef* f,
8575 upb_MessageValue val, upb_Arena* a) {
8576 if (upb_FieldDef_IsExtension(f)) {
8577 upb_Message_Extension* ext = _upb_Message_Getorcreateext(
8578 msg, _upb_FieldDef_ExtensionMiniTable(f), a);
8579 if (!ext) return false;
8580 memcpy(&ext->data, &val, sizeof(val));
8581 } else {
8582 const upb_MiniTable_Field* field = upb_FieldDef_MiniTable(f);
8583 char* mem = UPB_PTR_AT(msg, field->offset, char);
8584 memcpy(mem, &val, get_field_size(field));
8585 if (field->presence > 0) {
8586 _upb_sethas_field(msg, field);
8587 } else if (in_oneof(field)) {
8588 *_upb_oneofcase_field(msg, field) = field->number;
8589 }
8590 }
8591 return true;
8592}
8593
8594void upb_Message_ClearField(upb_Message* msg, const upb_FieldDef* f) {
8595 if (upb_FieldDef_IsExtension(f)) {
8596 _upb_Message_Clearext(msg, _upb_FieldDef_ExtensionMiniTable(f));
8597 } else {
8598 const upb_MiniTable_Field* field = upb_FieldDef_MiniTable(f);
8599 char* mem = UPB_PTR_AT(msg, field->offset, char);
8600
8601 if (field->presence > 0) {
8602 _upb_clearhas_field(msg, field);
8603 } else if (in_oneof(field)) {
8604 uint32_t* oneof_case = _upb_oneofcase_field(msg, field);
8605 if (*oneof_case != field->number) return;
8606 *oneof_case = 0;
8607 }
8608
8609 memset(mem, 0, get_field_size(field));
Joshua Haberman9abf6e22021-01-13 12:16:25 -08008610 }
8611}
8612
Joshua Habermanf41049a2022-01-21 14:41:25 -08008613void upb_Message_Clear(upb_Message* msg, const upb_MessageDef* m) {
8614 _upb_Message_Clear(msg, upb_MessageDef_MiniTable(m));
Joshua Haberman9abf6e22021-01-13 12:16:25 -08008615}
8616
Joshua Habermanf41049a2022-01-21 14:41:25 -08008617bool upb_Message_Next(const upb_Message* msg, const upb_MessageDef* m,
8618 const upb_DefPool* ext_pool, const upb_FieldDef** out_f,
8619 upb_MessageValue* out_val, size_t* iter) {
8620 size_t i = *iter;
8621 size_t n = upb_MessageDef_FieldCount(m);
8622 const upb_MessageValue zero = {0};
Joshua Haberman9abf6e22021-01-13 12:16:25 -08008623 UPB_UNUSED(ext_pool);
Joshua Habermanf41049a2022-01-21 14:41:25 -08008624
8625 /* Iterate over normal fields, returning the first one that is set. */
Joshua Haberman9abf6e22021-01-13 12:16:25 -08008626 while (++i < n) {
Joshua Habermanf41049a2022-01-21 14:41:25 -08008627 const upb_FieldDef* f = upb_MessageDef_Field(m, i);
8628 upb_MessageValue val = _upb_Message_Getraw(msg, f);
Joshua Haberman9abf6e22021-01-13 12:16:25 -08008629
8630 /* Skip field if unset or empty. */
Joshua Habermanf41049a2022-01-21 14:41:25 -08008631 if (upb_FieldDef_HasPresence(f)) {
8632 if (!upb_Message_Has(msg, f)) continue;
Joshua Haberman9abf6e22021-01-13 12:16:25 -08008633 } else {
Joshua Habermanf41049a2022-01-21 14:41:25 -08008634 upb_MessageValue test = val;
8635 if (upb_FieldDef_IsString(f) && !upb_FieldDef_IsRepeated(f)) {
Joshua Haberman9abf6e22021-01-13 12:16:25 -08008636 /* Clear string pointer, only size matters (ptr could be non-NULL). */
8637 test.str_val.data = NULL;
8638 }
8639 /* Continue if NULL or 0. */
8640 if (memcmp(&test, &zero, sizeof(test)) == 0) continue;
8641
8642 /* Continue on empty array or map. */
Joshua Habermanf41049a2022-01-21 14:41:25 -08008643 if (upb_FieldDef_IsMap(f)) {
8644 if (upb_Map_Size(test.map_val) == 0) continue;
8645 } else if (upb_FieldDef_IsRepeated(f)) {
8646 if (upb_Array_Size(test.array_val) == 0) continue;
Joshua Haberman9abf6e22021-01-13 12:16:25 -08008647 }
8648 }
8649
8650 *out_val = val;
8651 *out_f = f;
8652 *iter = i;
8653 return true;
8654 }
Joshua Habermanf41049a2022-01-21 14:41:25 -08008655
8656 if (ext_pool) {
8657 /* Return any extensions that are set. */
8658 size_t count;
8659 const upb_Message_Extension* ext = _upb_Message_Getexts(msg, &count);
8660 if (i - n < count) {
8661 ext += count - 1 - (i - n);
8662 memcpy(out_val, &ext->data, sizeof(*out_val));
8663 *out_f = _upb_DefPool_FindExtensionByMiniTable(ext_pool, ext->ext);
8664 *iter = i;
8665 return true;
8666 }
8667 }
8668
Joshua Haberman9abf6e22021-01-13 12:16:25 -08008669 *iter = i;
8670 return false;
8671}
8672
Joshua Habermanf41049a2022-01-21 14:41:25 -08008673bool _upb_Message_DiscardUnknown(upb_Message* msg, const upb_MessageDef* m,
8674 int depth) {
8675 size_t iter = kUpb_Message_Begin;
8676 const upb_FieldDef* f;
8677 upb_MessageValue val;
Joshua Haberman9abf6e22021-01-13 12:16:25 -08008678 bool ret = true;
8679
8680 if (--depth == 0) return false;
8681
Joshua Habermanf41049a2022-01-21 14:41:25 -08008682 _upb_Message_DiscardUnknown_shallow(msg);
Joshua Haberman9abf6e22021-01-13 12:16:25 -08008683
Joshua Habermanf41049a2022-01-21 14:41:25 -08008684 while (upb_Message_Next(msg, m, NULL /*ext_pool*/, &f, &val, &iter)) {
8685 const upb_MessageDef* subm = upb_FieldDef_MessageSubDef(f);
Joshua Haberman9abf6e22021-01-13 12:16:25 -08008686 if (!subm) continue;
Joshua Habermanf41049a2022-01-21 14:41:25 -08008687 if (upb_FieldDef_IsMap(f)) {
Joshua Haberman7ecf43f2022-03-14 13:11:29 -07008688 const upb_FieldDef* val_f = upb_MessageDef_FindFieldByNumber(subm, 2);
Joshua Habermanf41049a2022-01-21 14:41:25 -08008689 const upb_MessageDef* val_m = upb_FieldDef_MessageSubDef(val_f);
8690 upb_Map* map = (upb_Map*)val.map_val;
8691 size_t iter = kUpb_Map_Begin;
Joshua Haberman9abf6e22021-01-13 12:16:25 -08008692
8693 if (!val_m) continue;
8694
Joshua Habermanf41049a2022-01-21 14:41:25 -08008695 while (upb_MapIterator_Next(map, &iter)) {
8696 upb_MessageValue map_val = upb_MapIterator_Value(map, iter);
8697 if (!_upb_Message_DiscardUnknown((upb_Message*)map_val.msg_val, val_m,
8698 depth)) {
Joshua Haberman9abf6e22021-01-13 12:16:25 -08008699 ret = false;
8700 }
8701 }
Joshua Habermanf41049a2022-01-21 14:41:25 -08008702 } else if (upb_FieldDef_IsRepeated(f)) {
8703 const upb_Array* arr = val.array_val;
8704 size_t i, n = upb_Array_Size(arr);
Joshua Haberman9abf6e22021-01-13 12:16:25 -08008705 for (i = 0; i < n; i++) {
Joshua Habermanf41049a2022-01-21 14:41:25 -08008706 upb_MessageValue elem = upb_Array_Get(arr, i);
8707 if (!_upb_Message_DiscardUnknown((upb_Message*)elem.msg_val, subm,
8708 depth)) {
Joshua Haberman9abf6e22021-01-13 12:16:25 -08008709 ret = false;
8710 }
8711 }
8712 } else {
Joshua Habermanf41049a2022-01-21 14:41:25 -08008713 if (!_upb_Message_DiscardUnknown((upb_Message*)val.msg_val, subm,
8714 depth)) {
Joshua Haberman9abf6e22021-01-13 12:16:25 -08008715 ret = false;
8716 }
8717 }
8718 }
8719
8720 return ret;
8721}
8722
Joshua Habermanf41049a2022-01-21 14:41:25 -08008723bool upb_Message_DiscardUnknown(upb_Message* msg, const upb_MessageDef* m,
8724 int maxdepth) {
8725 return _upb_Message_DiscardUnknown(msg, m, maxdepth);
Joshua Haberman9abf6e22021-01-13 12:16:25 -08008726}
8727
Joshua Habermanf41049a2022-01-21 14:41:25 -08008728/** upb_Array *****************************************************************/
Joshua Haberman9abf6e22021-01-13 12:16:25 -08008729
Joshua Habermanf41049a2022-01-21 14:41:25 -08008730upb_Array* upb_Array_New(upb_Arena* a, upb_CType type) {
8731 return _upb_Array_New(a, 4, _upb_CTypeo_sizelg2[type]);
Joshua Haberman9abf6e22021-01-13 12:16:25 -08008732}
8733
Joshua Habermanf41049a2022-01-21 14:41:25 -08008734size_t upb_Array_Size(const upb_Array* arr) { return arr->len; }
Joshua Haberman9abf6e22021-01-13 12:16:25 -08008735
Joshua Habermanf41049a2022-01-21 14:41:25 -08008736upb_MessageValue upb_Array_Get(const upb_Array* arr, size_t i) {
8737 upb_MessageValue ret;
Joshua Haberman9abf6e22021-01-13 12:16:25 -08008738 const char* data = _upb_array_constptr(arr);
8739 int lg2 = arr->data & 7;
8740 UPB_ASSERT(i < arr->len);
8741 memcpy(&ret, data + (i << lg2), 1 << lg2);
8742 return ret;
8743}
8744
Joshua Habermanf41049a2022-01-21 14:41:25 -08008745void upb_Array_Set(upb_Array* arr, size_t i, upb_MessageValue val) {
Joshua Haberman9abf6e22021-01-13 12:16:25 -08008746 char* data = _upb_array_ptr(arr);
8747 int lg2 = arr->data & 7;
8748 UPB_ASSERT(i < arr->len);
8749 memcpy(data + (i << lg2), &val, 1 << lg2);
8750}
8751
Joshua Habermanf41049a2022-01-21 14:41:25 -08008752bool upb_Array_Append(upb_Array* arr, upb_MessageValue val, upb_Arena* arena) {
8753 if (!upb_Array_Resize(arr, arr->len + 1, arena)) {
Joshua Haberman9abf6e22021-01-13 12:16:25 -08008754 return false;
8755 }
Joshua Habermanf41049a2022-01-21 14:41:25 -08008756 upb_Array_Set(arr, arr->len - 1, val);
Joshua Haberman9abf6e22021-01-13 12:16:25 -08008757 return true;
8758}
8759
Joshua Habermanf41049a2022-01-21 14:41:25 -08008760void upb_Array_Move(upb_Array* arr, size_t dst_idx, size_t src_idx,
8761 size_t count) {
8762 char* data = _upb_array_ptr(arr);
8763 int lg2 = arr->data & 7;
8764 memmove(&data[dst_idx << lg2], &data[src_idx << lg2], count << lg2);
Joshua Haberman9abf6e22021-01-13 12:16:25 -08008765}
8766
Joshua Habermanf41049a2022-01-21 14:41:25 -08008767bool upb_Array_Insert(upb_Array* arr, size_t i, size_t count,
8768 upb_Arena* arena) {
8769 UPB_ASSERT(i <= arr->len);
8770 UPB_ASSERT(count + arr->len >= count);
8771 size_t oldsize = arr->len;
8772 if (!upb_Array_Resize(arr, arr->len + count, arena)) {
8773 return false;
8774 }
8775 upb_Array_Move(arr, i + count, i, oldsize - i);
8776 return true;
Joshua Haberman9abf6e22021-01-13 12:16:25 -08008777}
8778
Joshua Habermanf41049a2022-01-21 14:41:25 -08008779/*
8780 * i end arr->len
8781 * |------------|XXXXXXXX|--------|
8782 */
8783void upb_Array_Delete(upb_Array* arr, size_t i, size_t count) {
8784 size_t end = i + count;
8785 UPB_ASSERT(i <= end);
8786 UPB_ASSERT(end <= arr->len);
8787 upb_Array_Move(arr, i, end, arr->len - end);
8788 arr->len -= count;
Joshua Haberman9abf6e22021-01-13 12:16:25 -08008789}
8790
Joshua Habermanf41049a2022-01-21 14:41:25 -08008791bool upb_Array_Resize(upb_Array* arr, size_t size, upb_Arena* arena) {
8792 return _upb_Array_Resize(arr, size, arena);
Joshua Haberman9abf6e22021-01-13 12:16:25 -08008793}
8794
Joshua Habermanf41049a2022-01-21 14:41:25 -08008795/** upb_Map *******************************************************************/
8796
8797upb_Map* upb_Map_New(upb_Arena* a, upb_CType key_type, upb_CType value_type) {
8798 return _upb_Map_New(a, _upb_CTypeo_mapsize[key_type],
8799 _upb_CTypeo_mapsize[value_type]);
Joshua Haberman9abf6e22021-01-13 12:16:25 -08008800}
8801
Joshua Habermanf41049a2022-01-21 14:41:25 -08008802size_t upb_Map_Size(const upb_Map* map) { return _upb_Map_Size(map); }
8803
8804bool upb_Map_Get(const upb_Map* map, upb_MessageValue key,
8805 upb_MessageValue* val) {
8806 return _upb_Map_Get(map, &key, map->key_size, val, map->val_size);
Joshua Haberman9abf6e22021-01-13 12:16:25 -08008807}
8808
Joshua Habermanf41049a2022-01-21 14:41:25 -08008809void upb_Map_Clear(upb_Map* map) { _upb_Map_Clear(map); }
8810
8811bool upb_Map_Set(upb_Map* map, upb_MessageValue key, upb_MessageValue val,
8812 upb_Arena* arena) {
8813 return _upb_Map_Set(map, &key, map->key_size, &val, map->val_size, arena);
Joshua Haberman9abf6e22021-01-13 12:16:25 -08008814}
8815
Joshua Habermanf41049a2022-01-21 14:41:25 -08008816bool upb_Map_Delete(upb_Map* map, upb_MessageValue key) {
8817 return _upb_Map_Delete(map, &key, map->key_size);
8818}
8819
8820bool upb_MapIterator_Next(const upb_Map* map, size_t* iter) {
Joshua Haberman9abf6e22021-01-13 12:16:25 -08008821 return _upb_map_next(map, iter);
8822}
8823
Joshua Habermanf41049a2022-01-21 14:41:25 -08008824bool upb_MapIterator_Done(const upb_Map* map, size_t iter) {
Joshua Haberman9abf6e22021-01-13 12:16:25 -08008825 upb_strtable_iter i;
Joshua Habermanf41049a2022-01-21 14:41:25 -08008826 UPB_ASSERT(iter != kUpb_Map_Begin);
Joshua Haberman9abf6e22021-01-13 12:16:25 -08008827 i.t = &map->table;
8828 i.index = iter;
8829 return upb_strtable_done(&i);
8830}
8831
8832/* Returns the key and value for this entry of the map. */
Joshua Habermanf41049a2022-01-21 14:41:25 -08008833upb_MessageValue upb_MapIterator_Key(const upb_Map* map, size_t iter) {
Joshua Haberman9abf6e22021-01-13 12:16:25 -08008834 upb_strtable_iter i;
Joshua Habermanf41049a2022-01-21 14:41:25 -08008835 upb_MessageValue ret;
Joshua Haberman9abf6e22021-01-13 12:16:25 -08008836 i.t = &map->table;
8837 i.index = iter;
8838 _upb_map_fromkey(upb_strtable_iter_key(&i), &ret, map->key_size);
8839 return ret;
8840}
8841
Joshua Habermanf41049a2022-01-21 14:41:25 -08008842upb_MessageValue upb_MapIterator_Value(const upb_Map* map, size_t iter) {
Joshua Haberman9abf6e22021-01-13 12:16:25 -08008843 upb_strtable_iter i;
Joshua Habermanf41049a2022-01-21 14:41:25 -08008844 upb_MessageValue ret;
Joshua Haberman9abf6e22021-01-13 12:16:25 -08008845 i.t = &map->table;
8846 i.index = iter;
8847 _upb_map_fromvalue(upb_strtable_iter_value(&i), &ret, map->val_size);
8848 return ret;
8849}
8850
Joshua Habermanf41049a2022-01-21 14:41:25 -08008851/* void upb_MapIterator_SetValue(upb_Map *map, size_t iter, upb_MessageValue
8852 * value); */
Joshua Haberman9abf6e22021-01-13 12:16:25 -08008853
Joshua Habermandd69a482021-05-17 22:40:33 -07008854/** upb/json_decode.c ************************************************************/
Joshua Haberman9abf6e22021-01-13 12:16:25 -08008855
8856#include <errno.h>
8857#include <float.h>
8858#include <inttypes.h>
8859#include <limits.h>
8860#include <math.h>
8861#include <setjmp.h>
8862#include <stdlib.h>
8863#include <string.h>
8864
8865
8866/* Special header, must be included last. */
8867
8868typedef struct {
8869 const char *ptr, *end;
Joshua Habermanf41049a2022-01-21 14:41:25 -08008870 upb_Arena* arena; /* TODO: should we have a tmp arena for tmp data? */
8871 const upb_DefPool* symtab;
Joshua Haberman9abf6e22021-01-13 12:16:25 -08008872 int depth;
Joshua Habermanf41049a2022-01-21 14:41:25 -08008873 upb_Status* status;
Joshua Haberman9abf6e22021-01-13 12:16:25 -08008874 jmp_buf err;
8875 int line;
Joshua Habermanf41049a2022-01-21 14:41:25 -08008876 const char* line_begin;
Joshua Haberman9abf6e22021-01-13 12:16:25 -08008877 bool is_first;
8878 int options;
Joshua Habermanf41049a2022-01-21 14:41:25 -08008879 const upb_FieldDef* debug_field;
Joshua Haberman9abf6e22021-01-13 12:16:25 -08008880} jsondec;
8881
8882enum { JD_OBJECT, JD_ARRAY, JD_STRING, JD_NUMBER, JD_TRUE, JD_FALSE, JD_NULL };
8883
8884/* Forward declarations of mutually-recursive functions. */
Joshua Habermanf41049a2022-01-21 14:41:25 -08008885static void jsondec_wellknown(jsondec* d, upb_Message* msg,
8886 const upb_MessageDef* m);
8887static upb_MessageValue jsondec_value(jsondec* d, const upb_FieldDef* f);
8888static void jsondec_wellknownvalue(jsondec* d, upb_Message* msg,
8889 const upb_MessageDef* m);
8890static void jsondec_object(jsondec* d, upb_Message* msg,
8891 const upb_MessageDef* m);
Joshua Haberman9abf6e22021-01-13 12:16:25 -08008892
Joshua Habermanf41049a2022-01-21 14:41:25 -08008893static bool jsondec_streql(upb_StringView str, const char* lit) {
Joshua Haberman9abf6e22021-01-13 12:16:25 -08008894 return str.size == strlen(lit) && memcmp(str.data, lit, str.size) == 0;
8895}
8896
Joshua Habermanf41049a2022-01-21 14:41:25 -08008897static bool jsondec_isnullvalue(const upb_FieldDef* f) {
8898 return upb_FieldDef_CType(f) == kUpb_CType_Enum &&
8899 strcmp(upb_EnumDef_FullName(upb_FieldDef_EnumSubDef(f)),
Joshua Haberman9abf6e22021-01-13 12:16:25 -08008900 "google.protobuf.NullValue") == 0;
8901}
8902
Joshua Habermanf41049a2022-01-21 14:41:25 -08008903static bool jsondec_isvalue(const upb_FieldDef* f) {
8904 return (upb_FieldDef_CType(f) == kUpb_CType_Message &&
8905 upb_MessageDef_WellKnownType(upb_FieldDef_MessageSubDef(f)) ==
8906 kUpb_WellKnown_Value) ||
Joshua Haberman9abf6e22021-01-13 12:16:25 -08008907 jsondec_isnullvalue(f);
8908}
8909
Joshua Habermanf41049a2022-01-21 14:41:25 -08008910UPB_NORETURN static void jsondec_err(jsondec* d, const char* msg) {
8911 upb_Status_SetErrorFormat(d->status, "Error parsing JSON @%d:%d: %s", d->line,
8912 (int)(d->ptr - d->line_begin), msg);
Joshua Haberman9abf6e22021-01-13 12:16:25 -08008913 UPB_LONGJMP(d->err, 1);
8914}
8915
8916UPB_PRINTF(2, 3)
Joshua Habermanf41049a2022-01-21 14:41:25 -08008917UPB_NORETURN static void jsondec_errf(jsondec* d, const char* fmt, ...) {
Joshua Haberman9abf6e22021-01-13 12:16:25 -08008918 va_list argp;
Joshua Habermanf41049a2022-01-21 14:41:25 -08008919 upb_Status_SetErrorFormat(d->status, "Error parsing JSON @%d:%d: ", d->line,
8920 (int)(d->ptr - d->line_begin));
Joshua Haberman9abf6e22021-01-13 12:16:25 -08008921 va_start(argp, fmt);
Joshua Habermanf41049a2022-01-21 14:41:25 -08008922 upb_Status_VAppendErrorFormat(d->status, fmt, argp);
Joshua Haberman9abf6e22021-01-13 12:16:25 -08008923 va_end(argp);
8924 UPB_LONGJMP(d->err, 1);
8925}
8926
Joshua Habermanf41049a2022-01-21 14:41:25 -08008927static void jsondec_skipws(jsondec* d) {
Joshua Haberman9abf6e22021-01-13 12:16:25 -08008928 while (d->ptr != d->end) {
8929 switch (*d->ptr) {
8930 case '\n':
8931 d->line++;
8932 d->line_begin = d->ptr;
8933 /* Fallthrough. */
8934 case '\r':
8935 case '\t':
8936 case ' ':
8937 d->ptr++;
8938 break;
8939 default:
8940 return;
8941 }
8942 }
8943 jsondec_err(d, "Unexpected EOF");
8944}
8945
Joshua Habermanf41049a2022-01-21 14:41:25 -08008946static bool jsondec_tryparsech(jsondec* d, char ch) {
Joshua Haberman9abf6e22021-01-13 12:16:25 -08008947 if (d->ptr == d->end || *d->ptr != ch) return false;
8948 d->ptr++;
8949 return true;
8950}
8951
Joshua Habermanf41049a2022-01-21 14:41:25 -08008952static void jsondec_parselit(jsondec* d, const char* lit) {
Joshua Haberman9abf6e22021-01-13 12:16:25 -08008953 size_t avail = d->end - d->ptr;
8954 size_t len = strlen(lit);
8955 if (avail < len || memcmp(d->ptr, lit, len) != 0) {
8956 jsondec_errf(d, "Expected: '%s'", lit);
8957 }
8958 d->ptr += len;
8959}
8960
Joshua Habermanf41049a2022-01-21 14:41:25 -08008961static void jsondec_wsch(jsondec* d, char ch) {
Joshua Haberman9abf6e22021-01-13 12:16:25 -08008962 jsondec_skipws(d);
8963 if (!jsondec_tryparsech(d, ch)) {
8964 jsondec_errf(d, "Expected: '%c'", ch);
8965 }
8966}
8967
Joshua Habermanf41049a2022-01-21 14:41:25 -08008968static void jsondec_true(jsondec* d) { jsondec_parselit(d, "true"); }
8969static void jsondec_false(jsondec* d) { jsondec_parselit(d, "false"); }
8970static void jsondec_null(jsondec* d) { jsondec_parselit(d, "null"); }
Joshua Haberman9abf6e22021-01-13 12:16:25 -08008971
Joshua Habermanf41049a2022-01-21 14:41:25 -08008972static void jsondec_entrysep(jsondec* d) {
Joshua Haberman9abf6e22021-01-13 12:16:25 -08008973 jsondec_skipws(d);
8974 jsondec_parselit(d, ":");
8975}
8976
Joshua Habermanf41049a2022-01-21 14:41:25 -08008977static int jsondec_rawpeek(jsondec* d) {
Joshua Haberman9abf6e22021-01-13 12:16:25 -08008978 switch (*d->ptr) {
8979 case '{':
8980 return JD_OBJECT;
8981 case '[':
8982 return JD_ARRAY;
8983 case '"':
8984 return JD_STRING;
8985 case '-':
8986 case '0':
8987 case '1':
8988 case '2':
8989 case '3':
8990 case '4':
8991 case '5':
8992 case '6':
8993 case '7':
8994 case '8':
8995 case '9':
8996 return JD_NUMBER;
8997 case 't':
8998 return JD_TRUE;
8999 case 'f':
9000 return JD_FALSE;
9001 case 'n':
9002 return JD_NULL;
9003 default:
9004 jsondec_errf(d, "Unexpected character: '%c'", *d->ptr);
9005 }
9006}
9007
9008/* JSON object/array **********************************************************/
9009
9010/* These are used like so:
9011 *
9012 * jsondec_objstart(d);
9013 * while (jsondec_objnext(d)) {
9014 * ...
9015 * }
9016 * jsondec_objend(d) */
9017
Joshua Habermanf41049a2022-01-21 14:41:25 -08009018static int jsondec_peek(jsondec* d) {
Joshua Haberman9abf6e22021-01-13 12:16:25 -08009019 jsondec_skipws(d);
9020 return jsondec_rawpeek(d);
9021}
9022
Joshua Habermanf41049a2022-01-21 14:41:25 -08009023static void jsondec_push(jsondec* d) {
Joshua Haberman9abf6e22021-01-13 12:16:25 -08009024 if (--d->depth < 0) {
9025 jsondec_err(d, "Recursion limit exceeded");
9026 }
9027 d->is_first = true;
9028}
9029
Joshua Habermanf41049a2022-01-21 14:41:25 -08009030static bool jsondec_seqnext(jsondec* d, char end_ch) {
Joshua Haberman9abf6e22021-01-13 12:16:25 -08009031 bool is_first = d->is_first;
9032 d->is_first = false;
9033 jsondec_skipws(d);
9034 if (*d->ptr == end_ch) return false;
9035 if (!is_first) jsondec_parselit(d, ",");
9036 return true;
9037}
9038
Joshua Habermanf41049a2022-01-21 14:41:25 -08009039static void jsondec_arrstart(jsondec* d) {
Joshua Haberman9abf6e22021-01-13 12:16:25 -08009040 jsondec_push(d);
9041 jsondec_wsch(d, '[');
9042}
9043
Joshua Habermanf41049a2022-01-21 14:41:25 -08009044static void jsondec_arrend(jsondec* d) {
Joshua Haberman9abf6e22021-01-13 12:16:25 -08009045 d->depth++;
9046 jsondec_wsch(d, ']');
9047}
9048
Joshua Habermanf41049a2022-01-21 14:41:25 -08009049static bool jsondec_arrnext(jsondec* d) { return jsondec_seqnext(d, ']'); }
Joshua Haberman9abf6e22021-01-13 12:16:25 -08009050
Joshua Habermanf41049a2022-01-21 14:41:25 -08009051static void jsondec_objstart(jsondec* d) {
Joshua Haberman9abf6e22021-01-13 12:16:25 -08009052 jsondec_push(d);
9053 jsondec_wsch(d, '{');
9054}
9055
Joshua Habermanf41049a2022-01-21 14:41:25 -08009056static void jsondec_objend(jsondec* d) {
Joshua Haberman9abf6e22021-01-13 12:16:25 -08009057 d->depth++;
9058 jsondec_wsch(d, '}');
9059}
9060
Joshua Habermanf41049a2022-01-21 14:41:25 -08009061static bool jsondec_objnext(jsondec* d) {
Joshua Haberman9abf6e22021-01-13 12:16:25 -08009062 if (!jsondec_seqnext(d, '}')) return false;
9063 if (jsondec_peek(d) != JD_STRING) {
9064 jsondec_err(d, "Object must start with string");
9065 }
9066 return true;
9067}
9068
9069/* JSON number ****************************************************************/
9070
Joshua Habermanf41049a2022-01-21 14:41:25 -08009071static bool jsondec_tryskipdigits(jsondec* d) {
9072 const char* start = d->ptr;
Joshua Haberman9abf6e22021-01-13 12:16:25 -08009073
9074 while (d->ptr < d->end) {
9075 if (*d->ptr < '0' || *d->ptr > '9') {
9076 break;
9077 }
9078 d->ptr++;
9079 }
9080
9081 return d->ptr != start;
9082}
9083
Joshua Habermanf41049a2022-01-21 14:41:25 -08009084static void jsondec_skipdigits(jsondec* d) {
Joshua Haberman9abf6e22021-01-13 12:16:25 -08009085 if (!jsondec_tryskipdigits(d)) {
9086 jsondec_err(d, "Expected one or more digits");
9087 }
9088}
9089
Joshua Habermanf41049a2022-01-21 14:41:25 -08009090static double jsondec_number(jsondec* d) {
9091 const char* start = d->ptr;
Joshua Haberman9abf6e22021-01-13 12:16:25 -08009092
9093 assert(jsondec_rawpeek(d) == JD_NUMBER);
9094
9095 /* Skip over the syntax of a number, as specified by JSON. */
9096 if (*d->ptr == '-') d->ptr++;
9097
9098 if (jsondec_tryparsech(d, '0')) {
9099 if (jsondec_tryskipdigits(d)) {
9100 jsondec_err(d, "number cannot have leading zero");
9101 }
9102 } else {
9103 jsondec_skipdigits(d);
9104 }
9105
9106 if (d->ptr == d->end) goto parse;
9107 if (jsondec_tryparsech(d, '.')) {
9108 jsondec_skipdigits(d);
9109 }
9110 if (d->ptr == d->end) goto parse;
9111
9112 if (*d->ptr == 'e' || *d->ptr == 'E') {
9113 d->ptr++;
9114 if (d->ptr == d->end) {
9115 jsondec_err(d, "Unexpected EOF in number");
9116 }
9117 if (*d->ptr == '+' || *d->ptr == '-') {
9118 d->ptr++;
9119 }
9120 jsondec_skipdigits(d);
9121 }
9122
9123parse:
9124 /* Having verified the syntax of a JSON number, use strtod() to parse
9125 * (strtod() accepts a superset of JSON syntax). */
9126 errno = 0;
9127 {
9128 char* end;
9129 double val = strtod(start, &end);
9130 assert(end == d->ptr);
9131
9132 /* Currently the min/max-val conformance tests fail if we check this. Does
9133 * this mean the conformance tests are wrong or strtod() is wrong, or
9134 * something else? Investigate further. */
9135 /*
9136 if (errno == ERANGE) {
9137 jsondec_err(d, "Number out of range");
9138 }
9139 */
9140
9141 if (val > DBL_MAX || val < -DBL_MAX) {
9142 jsondec_err(d, "Number out of range");
9143 }
9144
9145 return val;
9146 }
9147}
9148
9149/* JSON string ****************************************************************/
9150
Joshua Habermanf41049a2022-01-21 14:41:25 -08009151static char jsondec_escape(jsondec* d) {
Joshua Haberman9abf6e22021-01-13 12:16:25 -08009152 switch (*d->ptr++) {
9153 case '"':
9154 return '\"';
9155 case '\\':
9156 return '\\';
9157 case '/':
9158 return '/';
9159 case 'b':
9160 return '\b';
9161 case 'f':
9162 return '\f';
9163 case 'n':
9164 return '\n';
9165 case 'r':
9166 return '\r';
9167 case 't':
9168 return '\t';
9169 default:
9170 jsondec_err(d, "Invalid escape char");
9171 }
9172}
9173
Joshua Habermanf41049a2022-01-21 14:41:25 -08009174static uint32_t jsondec_codepoint(jsondec* d) {
Joshua Haberman9abf6e22021-01-13 12:16:25 -08009175 uint32_t cp = 0;
Joshua Habermanf41049a2022-01-21 14:41:25 -08009176 const char* end;
Joshua Haberman9abf6e22021-01-13 12:16:25 -08009177
9178 if (d->end - d->ptr < 4) {
9179 jsondec_err(d, "EOF inside string");
9180 }
9181
9182 end = d->ptr + 4;
9183 while (d->ptr < end) {
9184 char ch = *d->ptr++;
9185 if (ch >= '0' && ch <= '9') {
9186 ch -= '0';
9187 } else if (ch >= 'a' && ch <= 'f') {
9188 ch = ch - 'a' + 10;
9189 } else if (ch >= 'A' && ch <= 'F') {
9190 ch = ch - 'A' + 10;
9191 } else {
9192 jsondec_err(d, "Invalid hex digit");
9193 }
9194 cp = (cp << 4) | ch;
9195 }
9196
9197 return cp;
9198}
9199
9200/* Parses a \uXXXX unicode escape (possibly a surrogate pair). */
Joshua Habermanf41049a2022-01-21 14:41:25 -08009201static size_t jsondec_unicode(jsondec* d, char* out) {
Joshua Haberman9abf6e22021-01-13 12:16:25 -08009202 uint32_t cp = jsondec_codepoint(d);
9203 if (cp >= 0xd800 && cp <= 0xdbff) {
9204 /* Surrogate pair: two 16-bit codepoints become a 32-bit codepoint. */
9205 uint32_t high = cp;
9206 uint32_t low;
9207 jsondec_parselit(d, "\\u");
9208 low = jsondec_codepoint(d);
9209 if (low < 0xdc00 || low > 0xdfff) {
9210 jsondec_err(d, "Invalid low surrogate");
9211 }
9212 cp = (high & 0x3ff) << 10;
9213 cp |= (low & 0x3ff);
9214 cp += 0x10000;
9215 } else if (cp >= 0xdc00 && cp <= 0xdfff) {
9216 jsondec_err(d, "Unpaired low surrogate");
9217 }
9218
9219 /* Write to UTF-8 */
9220 if (cp <= 0x7f) {
9221 out[0] = cp;
9222 return 1;
9223 } else if (cp <= 0x07FF) {
9224 out[0] = ((cp >> 6) & 0x1F) | 0xC0;
9225 out[1] = ((cp >> 0) & 0x3F) | 0x80;
9226 return 2;
9227 } else if (cp <= 0xFFFF) {
9228 out[0] = ((cp >> 12) & 0x0F) | 0xE0;
9229 out[1] = ((cp >> 6) & 0x3F) | 0x80;
9230 out[2] = ((cp >> 0) & 0x3F) | 0x80;
9231 return 3;
9232 } else if (cp < 0x10FFFF) {
9233 out[0] = ((cp >> 18) & 0x07) | 0xF0;
9234 out[1] = ((cp >> 12) & 0x3f) | 0x80;
9235 out[2] = ((cp >> 6) & 0x3f) | 0x80;
9236 out[3] = ((cp >> 0) & 0x3f) | 0x80;
9237 return 4;
9238 } else {
9239 jsondec_err(d, "Invalid codepoint");
9240 }
9241}
9242
Joshua Habermanf41049a2022-01-21 14:41:25 -08009243static void jsondec_resize(jsondec* d, char** buf, char** end, char** buf_end) {
Joshua Haberman9abf6e22021-01-13 12:16:25 -08009244 size_t oldsize = *buf_end - *buf;
9245 size_t len = *end - *buf;
9246 size_t size = UPB_MAX(8, 2 * oldsize);
9247
Joshua Habermanf41049a2022-01-21 14:41:25 -08009248 *buf = upb_Arena_Realloc(d->arena, *buf, len, size);
Joshua Haberman9abf6e22021-01-13 12:16:25 -08009249 if (!*buf) jsondec_err(d, "Out of memory");
9250
9251 *end = *buf + len;
9252 *buf_end = *buf + size;
9253}
9254
Joshua Habermanf41049a2022-01-21 14:41:25 -08009255static upb_StringView jsondec_string(jsondec* d) {
9256 char* buf = NULL;
9257 char* end = NULL;
9258 char* buf_end = NULL;
Joshua Haberman9abf6e22021-01-13 12:16:25 -08009259
9260 jsondec_skipws(d);
9261
9262 if (*d->ptr++ != '"') {
9263 jsondec_err(d, "Expected string");
9264 }
9265
9266 while (d->ptr < d->end) {
9267 char ch = *d->ptr++;
9268
9269 if (end == buf_end) {
9270 jsondec_resize(d, &buf, &end, &buf_end);
9271 }
9272
9273 switch (ch) {
9274 case '"': {
Joshua Habermanf41049a2022-01-21 14:41:25 -08009275 upb_StringView ret;
Joshua Haberman9abf6e22021-01-13 12:16:25 -08009276 ret.data = buf;
9277 ret.size = end - buf;
Joshua Habermanf41049a2022-01-21 14:41:25 -08009278 *end = '\0'; /* Needed for possible strtod(). */
Joshua Haberman9abf6e22021-01-13 12:16:25 -08009279 return ret;
9280 }
9281 case '\\':
9282 if (d->ptr == d->end) goto eof;
9283 if (*d->ptr == 'u') {
9284 d->ptr++;
9285 if (buf_end - end < 4) {
9286 /* Allow space for maximum-sized code point (4 bytes). */
9287 jsondec_resize(d, &buf, &end, &buf_end);
9288 }
9289 end += jsondec_unicode(d, end);
9290 } else {
9291 *end++ = jsondec_escape(d);
9292 }
9293 break;
9294 default:
9295 if ((unsigned char)*d->ptr < 0x20) {
9296 jsondec_err(d, "Invalid char in JSON string");
9297 }
9298 *end++ = ch;
9299 break;
9300 }
9301 }
9302
9303eof:
9304 jsondec_err(d, "EOF inside string");
9305}
9306
Joshua Habermanf41049a2022-01-21 14:41:25 -08009307static void jsondec_skipval(jsondec* d) {
Joshua Haberman9abf6e22021-01-13 12:16:25 -08009308 switch (jsondec_peek(d)) {
9309 case JD_OBJECT:
9310 jsondec_objstart(d);
9311 while (jsondec_objnext(d)) {
9312 jsondec_string(d);
9313 jsondec_entrysep(d);
9314 jsondec_skipval(d);
9315 }
9316 jsondec_objend(d);
9317 break;
9318 case JD_ARRAY:
9319 jsondec_arrstart(d);
9320 while (jsondec_arrnext(d)) {
9321 jsondec_skipval(d);
9322 }
9323 jsondec_arrend(d);
9324 break;
9325 case JD_TRUE:
9326 jsondec_true(d);
9327 break;
9328 case JD_FALSE:
9329 jsondec_false(d);
9330 break;
9331 case JD_NULL:
9332 jsondec_null(d);
9333 break;
9334 case JD_STRING:
9335 jsondec_string(d);
9336 break;
9337 case JD_NUMBER:
9338 jsondec_number(d);
9339 break;
9340 }
9341}
9342
9343/* Base64 decoding for bytes fields. ******************************************/
9344
9345static unsigned int jsondec_base64_tablelookup(const char ch) {
9346 /* Table includes the normal base64 chars plus the URL-safe variant. */
9347 const signed char table[256] = {
9348 -1, -1, -1, -1, -1, -1, -1,
9349 -1, -1, -1, -1, -1, -1, -1,
9350 -1, -1, -1, -1, -1, -1, -1,
9351 -1, -1, -1, -1, -1, -1, -1,
9352 -1, -1, -1, -1, -1, -1, -1,
9353 -1, -1, -1, -1, -1, -1, -1,
9354 -1, 62 /*+*/, -1, 62 /*-*/, -1, 63 /*/ */, 52 /*0*/,
9355 53 /*1*/, 54 /*2*/, 55 /*3*/, 56 /*4*/, 57 /*5*/, 58 /*6*/, 59 /*7*/,
9356 60 /*8*/, 61 /*9*/, -1, -1, -1, -1, -1,
9357 -1, -1, 0 /*A*/, 1 /*B*/, 2 /*C*/, 3 /*D*/, 4 /*E*/,
9358 5 /*F*/, 6 /*G*/, 07 /*H*/, 8 /*I*/, 9 /*J*/, 10 /*K*/, 11 /*L*/,
9359 12 /*M*/, 13 /*N*/, 14 /*O*/, 15 /*P*/, 16 /*Q*/, 17 /*R*/, 18 /*S*/,
9360 19 /*T*/, 20 /*U*/, 21 /*V*/, 22 /*W*/, 23 /*X*/, 24 /*Y*/, 25 /*Z*/,
9361 -1, -1, -1, -1, 63 /*_*/, -1, 26 /*a*/,
9362 27 /*b*/, 28 /*c*/, 29 /*d*/, 30 /*e*/, 31 /*f*/, 32 /*g*/, 33 /*h*/,
9363 34 /*i*/, 35 /*j*/, 36 /*k*/, 37 /*l*/, 38 /*m*/, 39 /*n*/, 40 /*o*/,
9364 41 /*p*/, 42 /*q*/, 43 /*r*/, 44 /*s*/, 45 /*t*/, 46 /*u*/, 47 /*v*/,
9365 48 /*w*/, 49 /*x*/, 50 /*y*/, 51 /*z*/, -1, -1, -1,
9366 -1, -1, -1, -1, -1, -1, -1,
9367 -1, -1, -1, -1, -1, -1, -1,
9368 -1, -1, -1, -1, -1, -1, -1,
9369 -1, -1, -1, -1, -1, -1, -1,
9370 -1, -1, -1, -1, -1, -1, -1,
9371 -1, -1, -1, -1, -1, -1, -1,
9372 -1, -1, -1, -1, -1, -1, -1,
9373 -1, -1, -1, -1, -1, -1, -1,
9374 -1, -1, -1, -1, -1, -1, -1,
9375 -1, -1, -1, -1, -1, -1, -1,
9376 -1, -1, -1, -1, -1, -1, -1,
9377 -1, -1, -1, -1, -1, -1, -1,
9378 -1, -1, -1, -1, -1, -1, -1,
9379 -1, -1, -1, -1, -1, -1, -1,
9380 -1, -1, -1, -1, -1, -1, -1,
9381 -1, -1, -1, -1, -1, -1, -1,
9382 -1, -1, -1, -1, -1, -1, -1,
9383 -1, -1, -1, -1, -1, -1, -1,
9384 -1, -1, -1, -1};
9385
9386 /* Sign-extend return value so high bit will be set on any unexpected char. */
9387 return table[(unsigned)ch];
9388}
9389
Joshua Habermanf41049a2022-01-21 14:41:25 -08009390static char* jsondec_partialbase64(jsondec* d, const char* ptr, const char* end,
9391 char* out) {
Joshua Haberman9abf6e22021-01-13 12:16:25 -08009392 int32_t val = -1;
9393
9394 switch (end - ptr) {
9395 case 2:
9396 val = jsondec_base64_tablelookup(ptr[0]) << 18 |
9397 jsondec_base64_tablelookup(ptr[1]) << 12;
9398 out[0] = val >> 16;
9399 out += 1;
9400 break;
9401 case 3:
9402 val = jsondec_base64_tablelookup(ptr[0]) << 18 |
9403 jsondec_base64_tablelookup(ptr[1]) << 12 |
9404 jsondec_base64_tablelookup(ptr[2]) << 6;
9405 out[0] = val >> 16;
9406 out[1] = (val >> 8) & 0xff;
9407 out += 2;
9408 break;
9409 }
9410
9411 if (val < 0) {
9412 jsondec_err(d, "Corrupt base64");
9413 }
9414
9415 return out;
9416}
9417
Joshua Habermanf41049a2022-01-21 14:41:25 -08009418static size_t jsondec_base64(jsondec* d, upb_StringView str) {
Joshua Haberman9abf6e22021-01-13 12:16:25 -08009419 /* We decode in place. This is safe because this is a new buffer (not
9420 * aliasing the input) and because base64 decoding shrinks 4 bytes into 3. */
Joshua Habermanf41049a2022-01-21 14:41:25 -08009421 char* out = (char*)str.data;
9422 const char* ptr = str.data;
9423 const char* end = ptr + str.size;
9424 const char* end4 = ptr + (str.size & -4); /* Round down to multiple of 4. */
Joshua Haberman9abf6e22021-01-13 12:16:25 -08009425
9426 for (; ptr < end4; ptr += 4, out += 3) {
9427 int val = jsondec_base64_tablelookup(ptr[0]) << 18 |
9428 jsondec_base64_tablelookup(ptr[1]) << 12 |
9429 jsondec_base64_tablelookup(ptr[2]) << 6 |
9430 jsondec_base64_tablelookup(ptr[3]) << 0;
9431
9432 if (val < 0) {
9433 /* Junk chars or padding. Remove trailing padding, if any. */
9434 if (end - ptr == 4 && ptr[3] == '=') {
9435 if (ptr[2] == '=') {
9436 end -= 2;
9437 } else {
9438 end -= 1;
9439 }
9440 }
9441 break;
9442 }
9443
9444 out[0] = val >> 16;
9445 out[1] = (val >> 8) & 0xff;
9446 out[2] = val & 0xff;
9447 }
9448
9449 if (ptr < end) {
9450 /* Process remaining chars. We do not require padding. */
9451 out = jsondec_partialbase64(d, ptr, end, out);
9452 }
9453
9454 return out - str.data;
9455}
9456
9457/* Low-level integer parsing **************************************************/
9458
9459/* We use these hand-written routines instead of strto[u]l() because the "long
9460 * long" variants aren't in c89. Also our version allows setting a ptr limit. */
9461
Joshua Habermanf41049a2022-01-21 14:41:25 -08009462static const char* jsondec_buftouint64(jsondec* d, const char* ptr,
9463 const char* end, uint64_t* val) {
Joshua Haberman9abf6e22021-01-13 12:16:25 -08009464 uint64_t u64 = 0;
9465 while (ptr < end) {
9466 unsigned ch = *ptr - '0';
9467 if (ch >= 10) break;
9468 if (u64 > UINT64_MAX / 10 || u64 * 10 > UINT64_MAX - ch) {
9469 jsondec_err(d, "Integer overflow");
9470 }
9471 u64 *= 10;
9472 u64 += ch;
9473 ptr++;
9474 }
9475
9476 *val = u64;
9477 return ptr;
9478}
9479
Joshua Habermanf41049a2022-01-21 14:41:25 -08009480static const char* jsondec_buftoint64(jsondec* d, const char* ptr,
9481 const char* end, int64_t* val) {
Joshua Haberman9abf6e22021-01-13 12:16:25 -08009482 bool neg = false;
9483 uint64_t u64;
9484
9485 if (ptr != end && *ptr == '-') {
9486 ptr++;
9487 neg = true;
9488 }
9489
9490 ptr = jsondec_buftouint64(d, ptr, end, &u64);
9491 if (u64 > (uint64_t)INT64_MAX + neg) {
9492 jsondec_err(d, "Integer overflow");
9493 }
9494
9495 *val = neg ? -u64 : u64;
9496 return ptr;
9497}
9498
Joshua Habermanf41049a2022-01-21 14:41:25 -08009499static uint64_t jsondec_strtouint64(jsondec* d, upb_StringView str) {
9500 const char* end = str.data + str.size;
Joshua Haberman9abf6e22021-01-13 12:16:25 -08009501 uint64_t ret;
9502 if (jsondec_buftouint64(d, str.data, end, &ret) != end) {
9503 jsondec_err(d, "Non-number characters in quoted integer");
9504 }
9505 return ret;
9506}
9507
Joshua Habermanf41049a2022-01-21 14:41:25 -08009508static int64_t jsondec_strtoint64(jsondec* d, upb_StringView str) {
9509 const char* end = str.data + str.size;
Joshua Haberman9abf6e22021-01-13 12:16:25 -08009510 int64_t ret;
9511 if (jsondec_buftoint64(d, str.data, end, &ret) != end) {
9512 jsondec_err(d, "Non-number characters in quoted integer");
9513 }
9514 return ret;
9515}
9516
9517/* Primitive value types ******************************************************/
9518
9519/* Parse INT32 or INT64 value. */
Joshua Habermanf41049a2022-01-21 14:41:25 -08009520static upb_MessageValue jsondec_int(jsondec* d, const upb_FieldDef* f) {
9521 upb_MessageValue val;
Joshua Haberman9abf6e22021-01-13 12:16:25 -08009522
9523 switch (jsondec_peek(d)) {
9524 case JD_NUMBER: {
9525 double dbl = jsondec_number(d);
9526 if (dbl > 9223372036854774784.0 || dbl < -9223372036854775808.0) {
9527 jsondec_err(d, "JSON number is out of range.");
9528 }
Joshua Habermanf41049a2022-01-21 14:41:25 -08009529 val.int64_val = dbl; /* must be guarded, overflow here is UB */
Joshua Haberman9abf6e22021-01-13 12:16:25 -08009530 if (val.int64_val != dbl) {
9531 jsondec_errf(d, "JSON number was not integral (%f != %" PRId64 ")", dbl,
9532 val.int64_val);
9533 }
9534 break;
9535 }
9536 case JD_STRING: {
Joshua Habermanf41049a2022-01-21 14:41:25 -08009537 upb_StringView str = jsondec_string(d);
Joshua Haberman9abf6e22021-01-13 12:16:25 -08009538 val.int64_val = jsondec_strtoint64(d, str);
9539 break;
9540 }
9541 default:
9542 jsondec_err(d, "Expected number or string");
9543 }
9544
Joshua Habermanf41049a2022-01-21 14:41:25 -08009545 if (upb_FieldDef_CType(f) == kUpb_CType_Int32 ||
9546 upb_FieldDef_CType(f) == kUpb_CType_Enum) {
Joshua Haberman9abf6e22021-01-13 12:16:25 -08009547 if (val.int64_val > INT32_MAX || val.int64_val < INT32_MIN) {
9548 jsondec_err(d, "Integer out of range.");
9549 }
9550 val.int32_val = (int32_t)val.int64_val;
9551 }
9552
9553 return val;
9554}
9555
9556/* Parse UINT32 or UINT64 value. */
Joshua Habermanf41049a2022-01-21 14:41:25 -08009557static upb_MessageValue jsondec_uint(jsondec* d, const upb_FieldDef* f) {
9558 upb_MessageValue val = {0};
Joshua Haberman9abf6e22021-01-13 12:16:25 -08009559
9560 switch (jsondec_peek(d)) {
9561 case JD_NUMBER: {
9562 double dbl = jsondec_number(d);
9563 if (dbl > 18446744073709549568.0 || dbl < 0) {
9564 jsondec_err(d, "JSON number is out of range.");
9565 }
Joshua Habermanf41049a2022-01-21 14:41:25 -08009566 val.uint64_val = dbl; /* must be guarded, overflow here is UB */
Joshua Haberman9abf6e22021-01-13 12:16:25 -08009567 if (val.uint64_val != dbl) {
9568 jsondec_errf(d, "JSON number was not integral (%f != %" PRIu64 ")", dbl,
9569 val.uint64_val);
9570 }
9571 break;
9572 }
9573 case JD_STRING: {
Joshua Habermanf41049a2022-01-21 14:41:25 -08009574 upb_StringView str = jsondec_string(d);
Joshua Haberman9abf6e22021-01-13 12:16:25 -08009575 val.uint64_val = jsondec_strtouint64(d, str);
9576 break;
9577 }
9578 default:
9579 jsondec_err(d, "Expected number or string");
9580 }
9581
Joshua Habermanf41049a2022-01-21 14:41:25 -08009582 if (upb_FieldDef_CType(f) == kUpb_CType_UInt32) {
Joshua Haberman9abf6e22021-01-13 12:16:25 -08009583 if (val.uint64_val > UINT32_MAX) {
9584 jsondec_err(d, "Integer out of range.");
9585 }
9586 val.uint32_val = (uint32_t)val.uint64_val;
9587 }
9588
9589 return val;
9590}
9591
9592/* Parse DOUBLE or FLOAT value. */
Joshua Habermanf41049a2022-01-21 14:41:25 -08009593static upb_MessageValue jsondec_double(jsondec* d, const upb_FieldDef* f) {
9594 upb_StringView str;
9595 upb_MessageValue val = {0};
Joshua Haberman9abf6e22021-01-13 12:16:25 -08009596
9597 switch (jsondec_peek(d)) {
9598 case JD_NUMBER:
9599 val.double_val = jsondec_number(d);
9600 break;
9601 case JD_STRING:
9602 str = jsondec_string(d);
9603 if (jsondec_streql(str, "NaN")) {
9604 val.double_val = NAN;
9605 } else if (jsondec_streql(str, "Infinity")) {
9606 val.double_val = INFINITY;
9607 } else if (jsondec_streql(str, "-Infinity")) {
9608 val.double_val = -INFINITY;
9609 } else {
9610 val.double_val = strtod(str.data, NULL);
9611 }
9612 break;
9613 default:
9614 jsondec_err(d, "Expected number or string");
9615 }
9616
Joshua Habermanf41049a2022-01-21 14:41:25 -08009617 if (upb_FieldDef_CType(f) == kUpb_CType_Float) {
Joshua Haberman9abf6e22021-01-13 12:16:25 -08009618 if (val.double_val != INFINITY && val.double_val != -INFINITY &&
9619 (val.double_val > FLT_MAX || val.double_val < -FLT_MAX)) {
9620 jsondec_err(d, "Float out of range");
9621 }
9622 val.float_val = val.double_val;
9623 }
9624
9625 return val;
9626}
9627
9628/* Parse STRING or BYTES value. */
Joshua Habermanf41049a2022-01-21 14:41:25 -08009629static upb_MessageValue jsondec_strfield(jsondec* d, const upb_FieldDef* f) {
9630 upb_MessageValue val;
Joshua Haberman9abf6e22021-01-13 12:16:25 -08009631 val.str_val = jsondec_string(d);
Joshua Habermanf41049a2022-01-21 14:41:25 -08009632 if (upb_FieldDef_CType(f) == kUpb_CType_Bytes) {
Joshua Haberman9abf6e22021-01-13 12:16:25 -08009633 val.str_val.size = jsondec_base64(d, val.str_val);
9634 }
9635 return val;
9636}
9637
Joshua Habermanf41049a2022-01-21 14:41:25 -08009638static upb_MessageValue jsondec_enum(jsondec* d, const upb_FieldDef* f) {
Joshua Haberman9abf6e22021-01-13 12:16:25 -08009639 switch (jsondec_peek(d)) {
9640 case JD_STRING: {
Joshua Habermanf41049a2022-01-21 14:41:25 -08009641 upb_StringView str = jsondec_string(d);
9642 const upb_EnumDef* e = upb_FieldDef_EnumSubDef(f);
9643 const upb_EnumValueDef* ev =
9644 upb_EnumDef_FindValueByNameWithSize(e, str.data, str.size);
9645 upb_MessageValue val;
9646 if (ev) {
9647 val.int32_val = upb_EnumValueDef_Number(ev);
9648 } else {
9649 if (d->options & upb_JsonDecode_IgnoreUnknown) {
Joshua Haberman9abf6e22021-01-13 12:16:25 -08009650 val.int32_val = 0;
9651 } else {
Joshua Habermanf41049a2022-01-21 14:41:25 -08009652 jsondec_errf(d, "Unknown enumerator: '" UPB_STRINGVIEW_FORMAT "'",
9653 UPB_STRINGVIEW_ARGS(str));
Joshua Haberman9abf6e22021-01-13 12:16:25 -08009654 }
9655 }
9656 return val;
9657 }
9658 case JD_NULL: {
9659 if (jsondec_isnullvalue(f)) {
Joshua Habermanf41049a2022-01-21 14:41:25 -08009660 upb_MessageValue val;
Joshua Haberman9abf6e22021-01-13 12:16:25 -08009661 jsondec_null(d);
9662 val.int32_val = 0;
9663 return val;
9664 }
9665 }
9666 /* Fallthrough. */
9667 default:
9668 return jsondec_int(d, f);
9669 }
9670}
9671
Joshua Habermanf41049a2022-01-21 14:41:25 -08009672static upb_MessageValue jsondec_bool(jsondec* d, const upb_FieldDef* f) {
9673 bool is_map_key = upb_FieldDef_Number(f) == 1 &&
9674 upb_MessageDef_IsMapEntry(upb_FieldDef_ContainingType(f));
9675 upb_MessageValue val;
Joshua Haberman9abf6e22021-01-13 12:16:25 -08009676
9677 if (is_map_key) {
Joshua Habermanf41049a2022-01-21 14:41:25 -08009678 upb_StringView str = jsondec_string(d);
Joshua Haberman9abf6e22021-01-13 12:16:25 -08009679 if (jsondec_streql(str, "true")) {
9680 val.bool_val = true;
9681 } else if (jsondec_streql(str, "false")) {
9682 val.bool_val = false;
9683 } else {
9684 jsondec_err(d, "Invalid boolean map key");
9685 }
9686 } else {
9687 switch (jsondec_peek(d)) {
9688 case JD_TRUE:
9689 val.bool_val = true;
9690 jsondec_true(d);
9691 break;
9692 case JD_FALSE:
9693 val.bool_val = false;
9694 jsondec_false(d);
9695 break;
9696 default:
9697 jsondec_err(d, "Expected true or false");
9698 }
9699 }
9700
9701 return val;
9702}
9703
9704/* Composite types (array/message/map) ****************************************/
9705
Joshua Habermanf41049a2022-01-21 14:41:25 -08009706static void jsondec_array(jsondec* d, upb_Message* msg, const upb_FieldDef* f) {
9707 upb_Array* arr = upb_Message_Mutable(msg, f, d->arena).array;
Joshua Haberman9abf6e22021-01-13 12:16:25 -08009708
9709 jsondec_arrstart(d);
9710 while (jsondec_arrnext(d)) {
Joshua Habermanf41049a2022-01-21 14:41:25 -08009711 upb_MessageValue elem = jsondec_value(d, f);
9712 upb_Array_Append(arr, elem, d->arena);
Joshua Haberman9abf6e22021-01-13 12:16:25 -08009713 }
9714 jsondec_arrend(d);
9715}
9716
Joshua Habermanf41049a2022-01-21 14:41:25 -08009717static void jsondec_map(jsondec* d, upb_Message* msg, const upb_FieldDef* f) {
9718 upb_Map* map = upb_Message_Mutable(msg, f, d->arena).map;
9719 const upb_MessageDef* entry = upb_FieldDef_MessageSubDef(f);
Joshua Haberman7ecf43f2022-03-14 13:11:29 -07009720 const upb_FieldDef* key_f = upb_MessageDef_FindFieldByNumber(entry, 1);
9721 const upb_FieldDef* val_f = upb_MessageDef_FindFieldByNumber(entry, 2);
Joshua Haberman9abf6e22021-01-13 12:16:25 -08009722
9723 jsondec_objstart(d);
9724 while (jsondec_objnext(d)) {
Joshua Habermanf41049a2022-01-21 14:41:25 -08009725 upb_MessageValue key, val;
Joshua Haberman9abf6e22021-01-13 12:16:25 -08009726 key = jsondec_value(d, key_f);
9727 jsondec_entrysep(d);
9728 val = jsondec_value(d, val_f);
Joshua Habermanf41049a2022-01-21 14:41:25 -08009729 upb_Map_Set(map, key, val, d->arena);
Joshua Haberman9abf6e22021-01-13 12:16:25 -08009730 }
9731 jsondec_objend(d);
9732}
9733
Joshua Habermanf41049a2022-01-21 14:41:25 -08009734static void jsondec_tomsg(jsondec* d, upb_Message* msg,
9735 const upb_MessageDef* m) {
9736 if (upb_MessageDef_WellKnownType(m) == kUpb_WellKnown_Unspecified) {
Joshua Haberman9abf6e22021-01-13 12:16:25 -08009737 jsondec_object(d, msg, m);
9738 } else {
9739 jsondec_wellknown(d, msg, m);
9740 }
9741}
9742
Joshua Habermanf41049a2022-01-21 14:41:25 -08009743static upb_MessageValue jsondec_msg(jsondec* d, const upb_FieldDef* f) {
9744 const upb_MessageDef* m = upb_FieldDef_MessageSubDef(f);
9745 upb_Message* msg = upb_Message_New(m, d->arena);
9746 upb_MessageValue val;
Joshua Haberman9abf6e22021-01-13 12:16:25 -08009747
9748 jsondec_tomsg(d, msg, m);
9749 val.msg_val = msg;
9750 return val;
9751}
9752
Joshua Habermanf41049a2022-01-21 14:41:25 -08009753static void jsondec_field(jsondec* d, upb_Message* msg,
9754 const upb_MessageDef* m) {
9755 upb_StringView name;
9756 const upb_FieldDef* f;
9757 const upb_FieldDef* preserved;
Joshua Haberman9abf6e22021-01-13 12:16:25 -08009758
9759 name = jsondec_string(d);
9760 jsondec_entrysep(d);
Joshua Habermanf41049a2022-01-21 14:41:25 -08009761
9762 if (name.size >= 2 && name.data[0] == '[' &&
9763 name.data[name.size - 1] == ']') {
9764 f = upb_DefPool_FindExtensionByNameWithSize(d->symtab, name.data + 1,
9765 name.size - 2);
9766 if (f && upb_FieldDef_ContainingType(f) != m) {
9767 jsondec_errf(
9768 d, "Extension %s extends message %s, but was seen in message %s",
9769 upb_FieldDef_FullName(f),
9770 upb_MessageDef_FullName(upb_FieldDef_ContainingType(f)),
9771 upb_MessageDef_FullName(m));
9772 }
9773 } else {
9774 f = upb_MessageDef_FindByJsonNameWithSize(m, name.data, name.size);
9775 }
Joshua Haberman9abf6e22021-01-13 12:16:25 -08009776
9777 if (!f) {
Joshua Habermanf41049a2022-01-21 14:41:25 -08009778 if ((d->options & upb_JsonDecode_IgnoreUnknown) == 0) {
9779 jsondec_errf(d, "No such field: " UPB_STRINGVIEW_FORMAT,
9780 UPB_STRINGVIEW_ARGS(name));
Joshua Haberman9abf6e22021-01-13 12:16:25 -08009781 }
9782 jsondec_skipval(d);
9783 return;
9784 }
9785
Joshua Haberman9abf6e22021-01-13 12:16:25 -08009786 if (jsondec_peek(d) == JD_NULL && !jsondec_isvalue(f)) {
9787 /* JSON "null" indicates a default value, so no need to set anything. */
9788 jsondec_null(d);
9789 return;
9790 }
9791
Joshua Habermanf41049a2022-01-21 14:41:25 -08009792 if (upb_FieldDef_RealContainingOneof(f) &&
9793 upb_Message_WhichOneof(msg, upb_FieldDef_ContainingOneof(f))) {
Joshua Habermandd69a482021-05-17 22:40:33 -07009794 jsondec_err(d, "More than one field for this oneof.");
9795 }
9796
Joshua Haberman9abf6e22021-01-13 12:16:25 -08009797 preserved = d->debug_field;
9798 d->debug_field = f;
9799
Joshua Habermanf41049a2022-01-21 14:41:25 -08009800 if (upb_FieldDef_IsMap(f)) {
Joshua Haberman9abf6e22021-01-13 12:16:25 -08009801 jsondec_map(d, msg, f);
Joshua Habermanf41049a2022-01-21 14:41:25 -08009802 } else if (upb_FieldDef_IsRepeated(f)) {
Joshua Haberman9abf6e22021-01-13 12:16:25 -08009803 jsondec_array(d, msg, f);
Joshua Habermanf41049a2022-01-21 14:41:25 -08009804 } else if (upb_FieldDef_IsSubMessage(f)) {
9805 upb_Message* submsg = upb_Message_Mutable(msg, f, d->arena).msg;
9806 const upb_MessageDef* subm = upb_FieldDef_MessageSubDef(f);
Joshua Haberman9abf6e22021-01-13 12:16:25 -08009807 jsondec_tomsg(d, submsg, subm);
9808 } else {
Joshua Habermanf41049a2022-01-21 14:41:25 -08009809 upb_MessageValue val = jsondec_value(d, f);
9810 upb_Message_Set(msg, f, val, d->arena);
Joshua Haberman9abf6e22021-01-13 12:16:25 -08009811 }
9812
9813 d->debug_field = preserved;
9814}
9815
Joshua Habermanf41049a2022-01-21 14:41:25 -08009816static void jsondec_object(jsondec* d, upb_Message* msg,
9817 const upb_MessageDef* m) {
Joshua Haberman9abf6e22021-01-13 12:16:25 -08009818 jsondec_objstart(d);
9819 while (jsondec_objnext(d)) {
9820 jsondec_field(d, msg, m);
9821 }
9822 jsondec_objend(d);
9823}
9824
Joshua Habermanf41049a2022-01-21 14:41:25 -08009825static upb_MessageValue jsondec_value(jsondec* d, const upb_FieldDef* f) {
9826 switch (upb_FieldDef_CType(f)) {
9827 case kUpb_CType_Bool:
Joshua Haberman9abf6e22021-01-13 12:16:25 -08009828 return jsondec_bool(d, f);
Joshua Habermanf41049a2022-01-21 14:41:25 -08009829 case kUpb_CType_Float:
9830 case kUpb_CType_Double:
Joshua Haberman9abf6e22021-01-13 12:16:25 -08009831 return jsondec_double(d, f);
Joshua Habermanf41049a2022-01-21 14:41:25 -08009832 case kUpb_CType_UInt32:
9833 case kUpb_CType_UInt64:
Joshua Haberman9abf6e22021-01-13 12:16:25 -08009834 return jsondec_uint(d, f);
Joshua Habermanf41049a2022-01-21 14:41:25 -08009835 case kUpb_CType_Int32:
9836 case kUpb_CType_Int64:
Joshua Haberman9abf6e22021-01-13 12:16:25 -08009837 return jsondec_int(d, f);
Joshua Habermanf41049a2022-01-21 14:41:25 -08009838 case kUpb_CType_String:
9839 case kUpb_CType_Bytes:
Joshua Haberman9abf6e22021-01-13 12:16:25 -08009840 return jsondec_strfield(d, f);
Joshua Habermanf41049a2022-01-21 14:41:25 -08009841 case kUpb_CType_Enum:
Joshua Haberman9abf6e22021-01-13 12:16:25 -08009842 return jsondec_enum(d, f);
Joshua Habermanf41049a2022-01-21 14:41:25 -08009843 case kUpb_CType_Message:
Joshua Haberman9abf6e22021-01-13 12:16:25 -08009844 return jsondec_msg(d, f);
9845 default:
9846 UPB_UNREACHABLE();
9847 }
9848}
9849
9850/* Well-known types ***********************************************************/
9851
Joshua Habermanf41049a2022-01-21 14:41:25 -08009852static int jsondec_tsdigits(jsondec* d, const char** ptr, size_t digits,
9853 const char* after) {
Joshua Haberman9abf6e22021-01-13 12:16:25 -08009854 uint64_t val;
Joshua Habermanf41049a2022-01-21 14:41:25 -08009855 const char* p = *ptr;
9856 const char* end = p + digits;
Joshua Haberman9abf6e22021-01-13 12:16:25 -08009857 size_t after_len = after ? strlen(after) : 0;
9858
Joshua Habermanf41049a2022-01-21 14:41:25 -08009859 UPB_ASSERT(digits <= 9); /* int can't overflow. */
Joshua Haberman9abf6e22021-01-13 12:16:25 -08009860
9861 if (jsondec_buftouint64(d, p, end, &val) != end ||
9862 (after_len && memcmp(end, after, after_len) != 0)) {
9863 jsondec_err(d, "Malformed timestamp");
9864 }
9865
9866 UPB_ASSERT(val < INT_MAX);
9867
9868 *ptr = end + after_len;
9869 return (int)val;
9870}
9871
Joshua Habermanf41049a2022-01-21 14:41:25 -08009872static int jsondec_nanos(jsondec* d, const char** ptr, const char* end) {
Joshua Haberman9abf6e22021-01-13 12:16:25 -08009873 uint64_t nanos = 0;
Joshua Habermanf41049a2022-01-21 14:41:25 -08009874 const char* p = *ptr;
Joshua Haberman9abf6e22021-01-13 12:16:25 -08009875
9876 if (p != end && *p == '.') {
Joshua Habermanf41049a2022-01-21 14:41:25 -08009877 const char* nano_end = jsondec_buftouint64(d, p + 1, end, &nanos);
Joshua Haberman9abf6e22021-01-13 12:16:25 -08009878 int digits = (int)(nano_end - p - 1);
9879 int exp_lg10 = 9 - digits;
9880 if (digits > 9) {
9881 jsondec_err(d, "Too many digits for partial seconds");
9882 }
9883 while (exp_lg10--) nanos *= 10;
9884 *ptr = nano_end;
9885 }
9886
9887 UPB_ASSERT(nanos < INT_MAX);
9888
9889 return (int)nanos;
9890}
9891
9892/* jsondec_epochdays(1970, 1, 1) == 1970-01-01 == 0. */
9893int jsondec_epochdays(int y, int m, int d) {
Joshua Habermanf41049a2022-01-21 14:41:25 -08009894 const uint32_t year_base = 4800; /* Before min year, multiple of 400. */
9895 const uint32_t m_adj = m - 3; /* March-based month. */
Joshua Haberman9abf6e22021-01-13 12:16:25 -08009896 const uint32_t carry = m_adj > (uint32_t)m ? 1 : 0;
9897 const uint32_t adjust = carry ? 12 : 0;
9898 const uint32_t y_adj = y + year_base - carry;
9899 const uint32_t month_days = ((m_adj + adjust) * 62719 + 769) / 2048;
9900 const uint32_t leap_days = y_adj / 4 - y_adj / 100 + y_adj / 400;
9901 return y_adj * 365 + leap_days + month_days + (d - 1) - 2472632;
9902}
9903
9904static int64_t jsondec_unixtime(int y, int m, int d, int h, int min, int s) {
9905 return (int64_t)jsondec_epochdays(y, m, d) * 86400 + h * 3600 + min * 60 + s;
9906}
9907
Joshua Habermanf41049a2022-01-21 14:41:25 -08009908static void jsondec_timestamp(jsondec* d, upb_Message* msg,
9909 const upb_MessageDef* m) {
9910 upb_MessageValue seconds;
9911 upb_MessageValue nanos;
9912 upb_StringView str = jsondec_string(d);
9913 const char* ptr = str.data;
9914 const char* end = ptr + str.size;
Joshua Haberman9abf6e22021-01-13 12:16:25 -08009915
9916 if (str.size < 20) goto malformed;
9917
9918 {
9919 /* 1972-01-01T01:00:00 */
9920 int year = jsondec_tsdigits(d, &ptr, 4, "-");
9921 int mon = jsondec_tsdigits(d, &ptr, 2, "-");
9922 int day = jsondec_tsdigits(d, &ptr, 2, "T");
9923 int hour = jsondec_tsdigits(d, &ptr, 2, ":");
9924 int min = jsondec_tsdigits(d, &ptr, 2, ":");
9925 int sec = jsondec_tsdigits(d, &ptr, 2, NULL);
9926
9927 seconds.int64_val = jsondec_unixtime(year, mon, day, hour, min, sec);
9928 }
9929
9930 nanos.int32_val = jsondec_nanos(d, &ptr, end);
9931
9932 {
9933 /* [+-]08:00 or Z */
Joshua Habermancf28f282021-02-03 17:59:10 -08009934 int ofs_hour = 0;
9935 int ofs_min = 0;
Joshua Haberman9abf6e22021-01-13 12:16:25 -08009936 bool neg = false;
9937
9938 if (ptr == end) goto malformed;
9939
9940 switch (*ptr++) {
9941 case '-':
9942 neg = true;
9943 /* fallthrough */
9944 case '+':
9945 if ((end - ptr) != 5) goto malformed;
Joshua Habermancf28f282021-02-03 17:59:10 -08009946 ofs_hour = jsondec_tsdigits(d, &ptr, 2, ":");
9947 ofs_min = jsondec_tsdigits(d, &ptr, 2, NULL);
9948 ofs_min = ((ofs_hour * 60) + ofs_min) * 60;
9949 seconds.int64_val += (neg ? ofs_min : -ofs_min);
Joshua Haberman9abf6e22021-01-13 12:16:25 -08009950 break;
9951 case 'Z':
9952 if (ptr != end) goto malformed;
9953 break;
9954 default:
9955 goto malformed;
9956 }
9957 }
9958
9959 if (seconds.int64_val < -62135596800) {
9960 jsondec_err(d, "Timestamp out of range");
9961 }
9962
Joshua Haberman7ecf43f2022-03-14 13:11:29 -07009963 upb_Message_Set(msg, upb_MessageDef_FindFieldByNumber(m, 1), seconds,
Joshua Habermanf41049a2022-01-21 14:41:25 -08009964 d->arena);
Joshua Haberman7ecf43f2022-03-14 13:11:29 -07009965 upb_Message_Set(msg, upb_MessageDef_FindFieldByNumber(m, 2), nanos, d->arena);
Joshua Haberman9abf6e22021-01-13 12:16:25 -08009966 return;
9967
9968malformed:
9969 jsondec_err(d, "Malformed timestamp");
9970}
9971
Joshua Habermanf41049a2022-01-21 14:41:25 -08009972static void jsondec_duration(jsondec* d, upb_Message* msg,
9973 const upb_MessageDef* m) {
9974 upb_MessageValue seconds;
9975 upb_MessageValue nanos;
9976 upb_StringView str = jsondec_string(d);
9977 const char* ptr = str.data;
9978 const char* end = ptr + str.size;
Joshua Haberman9abf6e22021-01-13 12:16:25 -08009979 const int64_t max = (uint64_t)3652500 * 86400;
9980
9981 /* "3.000000001s", "3s", etc. */
9982 ptr = jsondec_buftoint64(d, ptr, end, &seconds.int64_val);
9983 nanos.int32_val = jsondec_nanos(d, &ptr, end);
9984
9985 if (end - ptr != 1 || *ptr != 's') {
9986 jsondec_err(d, "Malformed duration");
9987 }
9988
9989 if (seconds.int64_val < -max || seconds.int64_val > max) {
9990 jsondec_err(d, "Duration out of range");
9991 }
9992
9993 if (seconds.int64_val < 0) {
Joshua Habermanf41049a2022-01-21 14:41:25 -08009994 nanos.int32_val = -nanos.int32_val;
Joshua Haberman9abf6e22021-01-13 12:16:25 -08009995 }
9996
Joshua Haberman7ecf43f2022-03-14 13:11:29 -07009997 upb_Message_Set(msg, upb_MessageDef_FindFieldByNumber(m, 1), seconds,
Joshua Habermanf41049a2022-01-21 14:41:25 -08009998 d->arena);
Joshua Haberman7ecf43f2022-03-14 13:11:29 -07009999 upb_Message_Set(msg, upb_MessageDef_FindFieldByNumber(m, 2), nanos, d->arena);
Joshua Haberman9abf6e22021-01-13 12:16:25 -080010000}
10001
Joshua Habermanf41049a2022-01-21 14:41:25 -080010002static void jsondec_listvalue(jsondec* d, upb_Message* msg,
10003 const upb_MessageDef* m) {
Joshua Haberman7ecf43f2022-03-14 13:11:29 -070010004 const upb_FieldDef* values_f = upb_MessageDef_FindFieldByNumber(m, 1);
Joshua Habermanf41049a2022-01-21 14:41:25 -080010005 const upb_MessageDef* value_m = upb_FieldDef_MessageSubDef(values_f);
10006 upb_Array* values = upb_Message_Mutable(msg, values_f, d->arena).array;
Joshua Haberman9abf6e22021-01-13 12:16:25 -080010007
10008 jsondec_arrstart(d);
10009 while (jsondec_arrnext(d)) {
Joshua Habermanf41049a2022-01-21 14:41:25 -080010010 upb_Message* value_msg = upb_Message_New(value_m, d->arena);
10011 upb_MessageValue value;
Joshua Haberman9abf6e22021-01-13 12:16:25 -080010012 value.msg_val = value_msg;
Joshua Habermanf41049a2022-01-21 14:41:25 -080010013 upb_Array_Append(values, value, d->arena);
Joshua Haberman9abf6e22021-01-13 12:16:25 -080010014 jsondec_wellknownvalue(d, value_msg, value_m);
10015 }
10016 jsondec_arrend(d);
10017}
10018
Joshua Habermanf41049a2022-01-21 14:41:25 -080010019static void jsondec_struct(jsondec* d, upb_Message* msg,
10020 const upb_MessageDef* m) {
Joshua Haberman7ecf43f2022-03-14 13:11:29 -070010021 const upb_FieldDef* fields_f = upb_MessageDef_FindFieldByNumber(m, 1);
Joshua Habermanf41049a2022-01-21 14:41:25 -080010022 const upb_MessageDef* entry_m = upb_FieldDef_MessageSubDef(fields_f);
Joshua Haberman7ecf43f2022-03-14 13:11:29 -070010023 const upb_FieldDef* value_f = upb_MessageDef_FindFieldByNumber(entry_m, 2);
Joshua Habermanf41049a2022-01-21 14:41:25 -080010024 const upb_MessageDef* value_m = upb_FieldDef_MessageSubDef(value_f);
10025 upb_Map* fields = upb_Message_Mutable(msg, fields_f, d->arena).map;
Joshua Haberman9abf6e22021-01-13 12:16:25 -080010026
10027 jsondec_objstart(d);
10028 while (jsondec_objnext(d)) {
Joshua Habermanf41049a2022-01-21 14:41:25 -080010029 upb_MessageValue key, value;
10030 upb_Message* value_msg = upb_Message_New(value_m, d->arena);
Joshua Haberman9abf6e22021-01-13 12:16:25 -080010031 key.str_val = jsondec_string(d);
10032 value.msg_val = value_msg;
Joshua Habermanf41049a2022-01-21 14:41:25 -080010033 upb_Map_Set(fields, key, value, d->arena);
Joshua Haberman9abf6e22021-01-13 12:16:25 -080010034 jsondec_entrysep(d);
10035 jsondec_wellknownvalue(d, value_msg, value_m);
10036 }
10037 jsondec_objend(d);
10038}
10039
Joshua Habermanf41049a2022-01-21 14:41:25 -080010040static void jsondec_wellknownvalue(jsondec* d, upb_Message* msg,
10041 const upb_MessageDef* m) {
10042 upb_MessageValue val;
10043 const upb_FieldDef* f;
10044 upb_Message* submsg;
Joshua Haberman9abf6e22021-01-13 12:16:25 -080010045
10046 switch (jsondec_peek(d)) {
10047 case JD_NUMBER:
10048 /* double number_value = 2; */
Joshua Haberman7ecf43f2022-03-14 13:11:29 -070010049 f = upb_MessageDef_FindFieldByNumber(m, 2);
Joshua Haberman9abf6e22021-01-13 12:16:25 -080010050 val.double_val = jsondec_number(d);
10051 break;
10052 case JD_STRING:
10053 /* string string_value = 3; */
Joshua Haberman7ecf43f2022-03-14 13:11:29 -070010054 f = upb_MessageDef_FindFieldByNumber(m, 3);
Joshua Haberman9abf6e22021-01-13 12:16:25 -080010055 val.str_val = jsondec_string(d);
10056 break;
10057 case JD_FALSE:
10058 /* bool bool_value = 4; */
Joshua Haberman7ecf43f2022-03-14 13:11:29 -070010059 f = upb_MessageDef_FindFieldByNumber(m, 4);
Joshua Haberman9abf6e22021-01-13 12:16:25 -080010060 val.bool_val = false;
10061 jsondec_false(d);
10062 break;
10063 case JD_TRUE:
10064 /* bool bool_value = 4; */
Joshua Haberman7ecf43f2022-03-14 13:11:29 -070010065 f = upb_MessageDef_FindFieldByNumber(m, 4);
Joshua Haberman9abf6e22021-01-13 12:16:25 -080010066 val.bool_val = true;
10067 jsondec_true(d);
10068 break;
10069 case JD_NULL:
10070 /* NullValue null_value = 1; */
Joshua Haberman7ecf43f2022-03-14 13:11:29 -070010071 f = upb_MessageDef_FindFieldByNumber(m, 1);
Joshua Haberman9abf6e22021-01-13 12:16:25 -080010072 val.int32_val = 0;
10073 jsondec_null(d);
10074 break;
Joshua Habermanf41049a2022-01-21 14:41:25 -080010075 /* Note: these cases return, because upb_Message_Mutable() is enough. */
Joshua Haberman9abf6e22021-01-13 12:16:25 -080010076 case JD_OBJECT:
10077 /* Struct struct_value = 5; */
Joshua Haberman7ecf43f2022-03-14 13:11:29 -070010078 f = upb_MessageDef_FindFieldByNumber(m, 5);
Joshua Habermanf41049a2022-01-21 14:41:25 -080010079 submsg = upb_Message_Mutable(msg, f, d->arena).msg;
10080 jsondec_struct(d, submsg, upb_FieldDef_MessageSubDef(f));
Joshua Haberman9abf6e22021-01-13 12:16:25 -080010081 return;
10082 case JD_ARRAY:
10083 /* ListValue list_value = 6; */
Joshua Haberman7ecf43f2022-03-14 13:11:29 -070010084 f = upb_MessageDef_FindFieldByNumber(m, 6);
Joshua Habermanf41049a2022-01-21 14:41:25 -080010085 submsg = upb_Message_Mutable(msg, f, d->arena).msg;
10086 jsondec_listvalue(d, submsg, upb_FieldDef_MessageSubDef(f));
Joshua Haberman9abf6e22021-01-13 12:16:25 -080010087 return;
10088 default:
10089 UPB_UNREACHABLE();
10090 }
10091
Joshua Habermanf41049a2022-01-21 14:41:25 -080010092 upb_Message_Set(msg, f, val, d->arena);
Joshua Haberman9abf6e22021-01-13 12:16:25 -080010093}
10094
Joshua Habermanf41049a2022-01-21 14:41:25 -080010095static upb_StringView jsondec_mask(jsondec* d, const char* buf,
10096 const char* end) {
Joshua Haberman9abf6e22021-01-13 12:16:25 -080010097 /* FieldMask fields grow due to inserted '_' characters, so we can't do the
10098 * transform in place. */
Joshua Habermanf41049a2022-01-21 14:41:25 -080010099 const char* ptr = buf;
10100 upb_StringView ret;
10101 char* out;
Joshua Haberman9abf6e22021-01-13 12:16:25 -080010102
10103 ret.size = end - ptr;
10104 while (ptr < end) {
10105 ret.size += (*ptr >= 'A' && *ptr <= 'Z');
10106 ptr++;
10107 }
10108
Joshua Habermanf41049a2022-01-21 14:41:25 -080010109 out = upb_Arena_Malloc(d->arena, ret.size);
Joshua Haberman9abf6e22021-01-13 12:16:25 -080010110 ptr = buf;
10111 ret.data = out;
10112
10113 while (ptr < end) {
10114 char ch = *ptr++;
10115 if (ch >= 'A' && ch <= 'Z') {
10116 *out++ = '_';
10117 *out++ = ch + 32;
10118 } else if (ch == '_') {
10119 jsondec_err(d, "field mask may not contain '_'");
10120 } else {
10121 *out++ = ch;
10122 }
10123 }
10124
10125 return ret;
10126}
10127
Joshua Habermanf41049a2022-01-21 14:41:25 -080010128static void jsondec_fieldmask(jsondec* d, upb_Message* msg,
10129 const upb_MessageDef* m) {
Joshua Haberman9abf6e22021-01-13 12:16:25 -080010130 /* repeated string paths = 1; */
Joshua Haberman7ecf43f2022-03-14 13:11:29 -070010131 const upb_FieldDef* paths_f = upb_MessageDef_FindFieldByNumber(m, 1);
Joshua Habermanf41049a2022-01-21 14:41:25 -080010132 upb_Array* arr = upb_Message_Mutable(msg, paths_f, d->arena).array;
10133 upb_StringView str = jsondec_string(d);
10134 const char* ptr = str.data;
10135 const char* end = ptr + str.size;
10136 upb_MessageValue val;
Joshua Haberman9abf6e22021-01-13 12:16:25 -080010137
10138 while (ptr < end) {
Joshua Habermanf41049a2022-01-21 14:41:25 -080010139 const char* elem_end = memchr(ptr, ',', end - ptr);
Joshua Haberman9abf6e22021-01-13 12:16:25 -080010140 if (elem_end) {
10141 val.str_val = jsondec_mask(d, ptr, elem_end);
10142 ptr = elem_end + 1;
10143 } else {
10144 val.str_val = jsondec_mask(d, ptr, end);
10145 ptr = end;
10146 }
Joshua Habermanf41049a2022-01-21 14:41:25 -080010147 upb_Array_Append(arr, val, d->arena);
Joshua Haberman9abf6e22021-01-13 12:16:25 -080010148 }
10149}
10150
Joshua Habermanf41049a2022-01-21 14:41:25 -080010151static void jsondec_anyfield(jsondec* d, upb_Message* msg,
10152 const upb_MessageDef* m) {
10153 if (upb_MessageDef_WellKnownType(m) == kUpb_WellKnown_Unspecified) {
Joshua Haberman9abf6e22021-01-13 12:16:25 -080010154 /* For regular types: {"@type": "[user type]", "f1": <V1>, "f2": <V2>}
10155 * where f1, f2, etc. are the normal fields of this type. */
10156 jsondec_field(d, msg, m);
10157 } else {
10158 /* For well-known types: {"@type": "[well-known type]", "value": <X>}
10159 * where <X> is whatever encoding the WKT normally uses. */
Joshua Habermanf41049a2022-01-21 14:41:25 -080010160 upb_StringView str = jsondec_string(d);
Joshua Haberman9abf6e22021-01-13 12:16:25 -080010161 jsondec_entrysep(d);
10162 if (!jsondec_streql(str, "value")) {
10163 jsondec_err(d, "Key for well-known type must be 'value'");
10164 }
10165 jsondec_wellknown(d, msg, m);
10166 }
10167}
10168
Joshua Habermanf41049a2022-01-21 14:41:25 -080010169static const upb_MessageDef* jsondec_typeurl(jsondec* d, upb_Message* msg,
10170 const upb_MessageDef* m) {
Joshua Haberman7ecf43f2022-03-14 13:11:29 -070010171 const upb_FieldDef* type_url_f = upb_MessageDef_FindFieldByNumber(m, 1);
Joshua Habermanf41049a2022-01-21 14:41:25 -080010172 const upb_MessageDef* type_m;
10173 upb_StringView type_url = jsondec_string(d);
10174 const char* end = type_url.data + type_url.size;
10175 const char* ptr = end;
10176 upb_MessageValue val;
Joshua Haberman9abf6e22021-01-13 12:16:25 -080010177
10178 val.str_val = type_url;
Joshua Habermanf41049a2022-01-21 14:41:25 -080010179 upb_Message_Set(msg, type_url_f, val, d->arena);
Joshua Haberman9abf6e22021-01-13 12:16:25 -080010180
10181 /* Find message name after the last '/' */
Joshua Habermanf41049a2022-01-21 14:41:25 -080010182 while (ptr > type_url.data && *--ptr != '/') {
10183 }
Joshua Haberman9abf6e22021-01-13 12:16:25 -080010184
10185 if (ptr == type_url.data || ptr == end) {
10186 jsondec_err(d, "Type url must have at least one '/' and non-empty host");
10187 }
10188
10189 ptr++;
Joshua Habermanf41049a2022-01-21 14:41:25 -080010190 type_m = upb_DefPool_FindMessageByNameWithSize(d->symtab, ptr, end - ptr);
Joshua Haberman9abf6e22021-01-13 12:16:25 -080010191
10192 if (!type_m) {
10193 jsondec_err(d, "Type was not found");
10194 }
10195
10196 return type_m;
10197}
10198
Joshua Habermanf41049a2022-01-21 14:41:25 -080010199static void jsondec_any(jsondec* d, upb_Message* msg, const upb_MessageDef* m) {
Joshua Haberman9abf6e22021-01-13 12:16:25 -080010200 /* string type_url = 1;
10201 * bytes value = 2; */
Joshua Haberman7ecf43f2022-03-14 13:11:29 -070010202 const upb_FieldDef* value_f = upb_MessageDef_FindFieldByNumber(m, 2);
Joshua Habermanf41049a2022-01-21 14:41:25 -080010203 upb_Message* any_msg;
10204 const upb_MessageDef* any_m = NULL;
10205 const char* pre_type_data = NULL;
10206 const char* pre_type_end = NULL;
10207 upb_MessageValue encoded;
Joshua Haberman9abf6e22021-01-13 12:16:25 -080010208
10209 jsondec_objstart(d);
10210
10211 /* Scan looking for "@type", which is not necessarily first. */
10212 while (!any_m && jsondec_objnext(d)) {
Joshua Habermanf41049a2022-01-21 14:41:25 -080010213 const char* start = d->ptr;
10214 upb_StringView name = jsondec_string(d);
Joshua Haberman9abf6e22021-01-13 12:16:25 -080010215 jsondec_entrysep(d);
10216 if (jsondec_streql(name, "@type")) {
10217 any_m = jsondec_typeurl(d, msg, m);
10218 if (pre_type_data) {
10219 pre_type_end = start;
10220 while (*pre_type_end != ',') pre_type_end--;
10221 }
10222 } else {
10223 if (!pre_type_data) pre_type_data = start;
10224 jsondec_skipval(d);
10225 }
10226 }
10227
10228 if (!any_m) {
10229 jsondec_err(d, "Any object didn't contain a '@type' field");
10230 }
10231
Joshua Habermanf41049a2022-01-21 14:41:25 -080010232 any_msg = upb_Message_New(any_m, d->arena);
Joshua Haberman9abf6e22021-01-13 12:16:25 -080010233
10234 if (pre_type_data) {
10235 size_t len = pre_type_end - pre_type_data + 1;
Joshua Habermanf41049a2022-01-21 14:41:25 -080010236 char* tmp = upb_Arena_Malloc(d->arena, len);
10237 const char* saved_ptr = d->ptr;
10238 const char* saved_end = d->end;
Joshua Haberman9abf6e22021-01-13 12:16:25 -080010239 memcpy(tmp, pre_type_data, len - 1);
10240 tmp[len - 1] = '}';
10241 d->ptr = tmp;
10242 d->end = tmp + len;
10243 d->is_first = true;
10244 while (jsondec_objnext(d)) {
10245 jsondec_anyfield(d, any_msg, any_m);
10246 }
10247 d->ptr = saved_ptr;
10248 d->end = saved_end;
10249 }
10250
10251 while (jsondec_objnext(d)) {
10252 jsondec_anyfield(d, any_msg, any_m);
10253 }
10254
10255 jsondec_objend(d);
10256
Joshua Habermanf41049a2022-01-21 14:41:25 -080010257 encoded.str_val.data = upb_Encode(any_msg, upb_MessageDef_MiniTable(any_m), 0,
10258 d->arena, &encoded.str_val.size);
10259 upb_Message_Set(msg, value_f, encoded, d->arena);
Joshua Haberman9abf6e22021-01-13 12:16:25 -080010260}
10261
Joshua Habermanf41049a2022-01-21 14:41:25 -080010262static void jsondec_wrapper(jsondec* d, upb_Message* msg,
10263 const upb_MessageDef* m) {
Joshua Haberman7ecf43f2022-03-14 13:11:29 -070010264 const upb_FieldDef* value_f = upb_MessageDef_FindFieldByNumber(m, 1);
Joshua Habermanf41049a2022-01-21 14:41:25 -080010265 upb_MessageValue val = jsondec_value(d, value_f);
10266 upb_Message_Set(msg, value_f, val, d->arena);
Joshua Haberman9abf6e22021-01-13 12:16:25 -080010267}
10268
Joshua Habermanf41049a2022-01-21 14:41:25 -080010269static void jsondec_wellknown(jsondec* d, upb_Message* msg,
10270 const upb_MessageDef* m) {
10271 switch (upb_MessageDef_WellKnownType(m)) {
10272 case kUpb_WellKnown_Any:
Joshua Haberman9abf6e22021-01-13 12:16:25 -080010273 jsondec_any(d, msg, m);
10274 break;
Joshua Habermanf41049a2022-01-21 14:41:25 -080010275 case kUpb_WellKnown_FieldMask:
Joshua Haberman9abf6e22021-01-13 12:16:25 -080010276 jsondec_fieldmask(d, msg, m);
10277 break;
Joshua Habermanf41049a2022-01-21 14:41:25 -080010278 case kUpb_WellKnown_Duration:
Joshua Haberman9abf6e22021-01-13 12:16:25 -080010279 jsondec_duration(d, msg, m);
10280 break;
Joshua Habermanf41049a2022-01-21 14:41:25 -080010281 case kUpb_WellKnown_Timestamp:
Joshua Haberman9abf6e22021-01-13 12:16:25 -080010282 jsondec_timestamp(d, msg, m);
10283 break;
Joshua Habermanf41049a2022-01-21 14:41:25 -080010284 case kUpb_WellKnown_Value:
Joshua Haberman9abf6e22021-01-13 12:16:25 -080010285 jsondec_wellknownvalue(d, msg, m);
10286 break;
Joshua Habermanf41049a2022-01-21 14:41:25 -080010287 case kUpb_WellKnown_ListValue:
Joshua Haberman9abf6e22021-01-13 12:16:25 -080010288 jsondec_listvalue(d, msg, m);
10289 break;
Joshua Habermanf41049a2022-01-21 14:41:25 -080010290 case kUpb_WellKnown_Struct:
Joshua Haberman9abf6e22021-01-13 12:16:25 -080010291 jsondec_struct(d, msg, m);
10292 break;
Joshua Habermanf41049a2022-01-21 14:41:25 -080010293 case kUpb_WellKnown_DoubleValue:
10294 case kUpb_WellKnown_FloatValue:
10295 case kUpb_WellKnown_Int64Value:
10296 case kUpb_WellKnown_UInt64Value:
10297 case kUpb_WellKnown_Int32Value:
10298 case kUpb_WellKnown_UInt32Value:
10299 case kUpb_WellKnown_StringValue:
10300 case kUpb_WellKnown_BytesValue:
10301 case kUpb_WellKnown_BoolValue:
Joshua Haberman9abf6e22021-01-13 12:16:25 -080010302 jsondec_wrapper(d, msg, m);
10303 break;
10304 default:
10305 UPB_UNREACHABLE();
10306 }
10307}
10308
Joshua Habermanf41049a2022-01-21 14:41:25 -080010309bool upb_JsonDecode(const char* buf, size_t size, upb_Message* msg,
10310 const upb_MessageDef* m, const upb_DefPool* symtab,
10311 int options, upb_Arena* arena, upb_Status* status) {
Joshua Haberman9abf6e22021-01-13 12:16:25 -080010312 jsondec d;
Joshua Habermandd69a482021-05-17 22:40:33 -070010313
10314 if (size == 0) return true;
10315
Joshua Haberman9abf6e22021-01-13 12:16:25 -080010316 d.ptr = buf;
10317 d.end = buf + size;
10318 d.arena = arena;
Joshua Habermanf41049a2022-01-21 14:41:25 -080010319 d.symtab = symtab;
Joshua Haberman9abf6e22021-01-13 12:16:25 -080010320 d.status = status;
10321 d.options = options;
10322 d.depth = 64;
10323 d.line = 1;
10324 d.line_begin = d.ptr;
10325 d.debug_field = NULL;
10326 d.is_first = false;
10327
10328 if (UPB_SETJMP(d.err)) return false;
10329
10330 jsondec_tomsg(&d, msg, m);
10331 return true;
10332}
10333
Joshua Habermandd69a482021-05-17 22:40:33 -070010334/** upb/json_encode.c ************************************************************/
Joshua Haberman9abf6e22021-01-13 12:16:25 -080010335
10336#include <ctype.h>
10337#include <float.h>
10338#include <inttypes.h>
10339#include <math.h>
10340#include <setjmp.h>
10341#include <stdarg.h>
10342#include <stdio.h>
10343#include <string.h>
10344
10345
10346/* Must be last. */
10347
10348typedef struct {
10349 char *buf, *ptr, *end;
10350 size_t overflow;
10351 int indent_depth;
10352 int options;
Joshua Habermanf41049a2022-01-21 14:41:25 -080010353 const upb_DefPool* ext_pool;
Joshua Haberman9abf6e22021-01-13 12:16:25 -080010354 jmp_buf err;
Joshua Habermanf41049a2022-01-21 14:41:25 -080010355 upb_Status* status;
10356 upb_Arena* arena;
Joshua Haberman9abf6e22021-01-13 12:16:25 -080010357} jsonenc;
10358
Joshua Habermanf41049a2022-01-21 14:41:25 -080010359static void jsonenc_msg(jsonenc* e, const upb_Message* msg,
10360 const upb_MessageDef* m);
10361static void jsonenc_scalar(jsonenc* e, upb_MessageValue val,
10362 const upb_FieldDef* f);
10363static void jsonenc_msgfield(jsonenc* e, const upb_Message* msg,
10364 const upb_MessageDef* m);
10365static void jsonenc_msgfields(jsonenc* e, const upb_Message* msg,
10366 const upb_MessageDef* m, bool first);
10367static void jsonenc_value(jsonenc* e, const upb_Message* msg,
10368 const upb_MessageDef* m);
Joshua Haberman9abf6e22021-01-13 12:16:25 -080010369
Joshua Habermanf41049a2022-01-21 14:41:25 -080010370UPB_NORETURN static void jsonenc_err(jsonenc* e, const char* msg) {
10371 upb_Status_SetErrorMessage(e->status, msg);
Joshua Haberman9abf6e22021-01-13 12:16:25 -080010372 longjmp(e->err, 1);
10373}
10374
10375UPB_PRINTF(2, 3)
Joshua Habermanf41049a2022-01-21 14:41:25 -080010376UPB_NORETURN static void jsonenc_errf(jsonenc* e, const char* fmt, ...) {
Joshua Haberman9abf6e22021-01-13 12:16:25 -080010377 va_list argp;
10378 va_start(argp, fmt);
Joshua Habermanf41049a2022-01-21 14:41:25 -080010379 upb_Status_VSetErrorFormat(e->status, fmt, argp);
Joshua Haberman9abf6e22021-01-13 12:16:25 -080010380 va_end(argp);
10381 longjmp(e->err, 1);
10382}
10383
Joshua Habermanf41049a2022-01-21 14:41:25 -080010384static upb_Arena* jsonenc_arena(jsonenc* e) {
Joshua Haberman9abf6e22021-01-13 12:16:25 -080010385 /* Create lazily, since it's only needed for Any */
10386 if (!e->arena) {
Joshua Habermanf41049a2022-01-21 14:41:25 -080010387 e->arena = upb_Arena_New();
Joshua Haberman9abf6e22021-01-13 12:16:25 -080010388 }
10389 return e->arena;
10390}
10391
Joshua Habermanf41049a2022-01-21 14:41:25 -080010392static void jsonenc_putbytes(jsonenc* e, const void* data, size_t len) {
Joshua Haberman9abf6e22021-01-13 12:16:25 -080010393 size_t have = e->end - e->ptr;
10394 if (UPB_LIKELY(have >= len)) {
10395 memcpy(e->ptr, data, len);
10396 e->ptr += len;
10397 } else {
Joshua Habermandd69a482021-05-17 22:40:33 -070010398 if (have) {
10399 memcpy(e->ptr, data, have);
10400 e->ptr += have;
10401 }
Joshua Haberman9abf6e22021-01-13 12:16:25 -080010402 e->overflow += (len - have);
10403 }
10404}
10405
Joshua Habermanf41049a2022-01-21 14:41:25 -080010406static void jsonenc_putstr(jsonenc* e, const char* str) {
Joshua Haberman9abf6e22021-01-13 12:16:25 -080010407 jsonenc_putbytes(e, str, strlen(str));
10408}
10409
10410UPB_PRINTF(2, 3)
Joshua Habermanf41049a2022-01-21 14:41:25 -080010411static void jsonenc_printf(jsonenc* e, const char* fmt, ...) {
Joshua Haberman9abf6e22021-01-13 12:16:25 -080010412 size_t n;
10413 size_t have = e->end - e->ptr;
10414 va_list args;
10415
10416 va_start(args, fmt);
10417 n = vsnprintf(e->ptr, have, fmt, args);
10418 va_end(args);
10419
10420 if (UPB_LIKELY(have > n)) {
10421 e->ptr += n;
10422 } else {
Joshua Habermandd69a482021-05-17 22:40:33 -070010423 e->ptr = UPB_PTRADD(e->ptr, have);
Joshua Haberman9abf6e22021-01-13 12:16:25 -080010424 e->overflow += (n - have);
10425 }
10426}
10427
Joshua Habermanf41049a2022-01-21 14:41:25 -080010428static void jsonenc_nanos(jsonenc* e, int32_t nanos) {
Joshua Haberman9abf6e22021-01-13 12:16:25 -080010429 int digits = 9;
10430
10431 if (nanos == 0) return;
10432 if (nanos < 0 || nanos >= 1000000000) {
10433 jsonenc_err(e, "error formatting timestamp as JSON: invalid nanos");
10434 }
10435
10436 while (nanos % 1000 == 0) {
10437 nanos /= 1000;
10438 digits -= 3;
10439 }
10440
10441 jsonenc_printf(e, ".%.*" PRId32, digits, nanos);
10442}
10443
Joshua Habermanf41049a2022-01-21 14:41:25 -080010444static void jsonenc_timestamp(jsonenc* e, const upb_Message* msg,
10445 const upb_MessageDef* m) {
Joshua Haberman7ecf43f2022-03-14 13:11:29 -070010446 const upb_FieldDef* seconds_f = upb_MessageDef_FindFieldByNumber(m, 1);
10447 const upb_FieldDef* nanos_f = upb_MessageDef_FindFieldByNumber(m, 2);
Joshua Habermanf41049a2022-01-21 14:41:25 -080010448 int64_t seconds = upb_Message_Get(msg, seconds_f).int64_val;
10449 int32_t nanos = upb_Message_Get(msg, nanos_f).int32_val;
Joshua Haberman9abf6e22021-01-13 12:16:25 -080010450 int L, N, I, J, K, hour, min, sec;
10451
10452 if (seconds < -62135596800) {
10453 jsonenc_err(e,
10454 "error formatting timestamp as JSON: minimum acceptable value "
10455 "is 0001-01-01T00:00:00Z");
10456 } else if (seconds > 253402300799) {
10457 jsonenc_err(e,
10458 "error formatting timestamp as JSON: maximum acceptable value "
10459 "is 9999-12-31T23:59:59Z");
10460 }
10461
10462 /* Julian Day -> Y/M/D, Algorithm from:
10463 * Fliegel, H. F., and Van Flandern, T. C., "A Machine Algorithm for
10464 * Processing Calendar Dates," Communications of the Association of
10465 * Computing Machines, vol. 11 (1968), p. 657. */
Joshua Haberman7ecf43f2022-03-14 13:11:29 -070010466 seconds += 62135596800; // Ensure seconds is positive.
10467 L = (int)(seconds / 86400) - 719162 + 68569 + 2440588;
Joshua Haberman9abf6e22021-01-13 12:16:25 -080010468 N = 4 * L / 146097;
10469 L = L - (146097 * N + 3) / 4;
10470 I = 4000 * (L + 1) / 1461001;
10471 L = L - 1461 * I / 4 + 31;
10472 J = 80 * L / 2447;
10473 K = L - 2447 * J / 80;
10474 L = J / 11;
10475 J = J + 2 - 12 * L;
10476 I = 100 * (N - 49) + I + L;
10477
10478 sec = seconds % 60;
10479 min = (seconds / 60) % 60;
10480 hour = (seconds / 3600) % 24;
10481
10482 jsonenc_printf(e, "\"%04d-%02d-%02dT%02d:%02d:%02d", I, J, K, hour, min, sec);
10483 jsonenc_nanos(e, nanos);
10484 jsonenc_putstr(e, "Z\"");
10485}
10486
Joshua Habermanf41049a2022-01-21 14:41:25 -080010487static void jsonenc_duration(jsonenc* e, const upb_Message* msg,
10488 const upb_MessageDef* m) {
Joshua Haberman7ecf43f2022-03-14 13:11:29 -070010489 const upb_FieldDef* seconds_f = upb_MessageDef_FindFieldByNumber(m, 1);
10490 const upb_FieldDef* nanos_f = upb_MessageDef_FindFieldByNumber(m, 2);
Joshua Habermanf41049a2022-01-21 14:41:25 -080010491 int64_t seconds = upb_Message_Get(msg, seconds_f).int64_val;
10492 int32_t nanos = upb_Message_Get(msg, nanos_f).int32_val;
Joshua Haberman9abf6e22021-01-13 12:16:25 -080010493
10494 if (seconds > 315576000000 || seconds < -315576000000 ||
10495 (seconds < 0) != (nanos < 0)) {
10496 jsonenc_err(e, "bad duration");
10497 }
10498
10499 if (nanos < 0) {
10500 nanos = -nanos;
10501 }
10502
10503 jsonenc_printf(e, "\"%" PRId64, seconds);
10504 jsonenc_nanos(e, nanos);
10505 jsonenc_putstr(e, "s\"");
10506}
10507
Joshua Habermanf41049a2022-01-21 14:41:25 -080010508static void jsonenc_enum(int32_t val, const upb_FieldDef* f, jsonenc* e) {
10509 const upb_EnumDef* e_def = upb_FieldDef_EnumSubDef(f);
Joshua Haberman9abf6e22021-01-13 12:16:25 -080010510
Joshua Habermanf41049a2022-01-21 14:41:25 -080010511 if (strcmp(upb_EnumDef_FullName(e_def), "google.protobuf.NullValue") == 0) {
Joshua Haberman9abf6e22021-01-13 12:16:25 -080010512 jsonenc_putstr(e, "null");
10513 } else {
Joshua Habermanf41049a2022-01-21 14:41:25 -080010514 const upb_EnumValueDef* ev = upb_EnumDef_FindValueByNumber(e_def, val);
Joshua Haberman9abf6e22021-01-13 12:16:25 -080010515
Joshua Habermanf41049a2022-01-21 14:41:25 -080010516 if (ev) {
10517 jsonenc_printf(e, "\"%s\"", upb_EnumValueDef_Name(ev));
Joshua Haberman9abf6e22021-01-13 12:16:25 -080010518 } else {
10519 jsonenc_printf(e, "%" PRId32, val);
10520 }
10521 }
10522}
10523
Joshua Habermanf41049a2022-01-21 14:41:25 -080010524static void jsonenc_bytes(jsonenc* e, upb_StringView str) {
Joshua Haberman9abf6e22021-01-13 12:16:25 -080010525 /* This is the regular base64, not the "web-safe" version. */
10526 static const char base64[] =
10527 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
Joshua Habermanf41049a2022-01-21 14:41:25 -080010528 const unsigned char* ptr = (unsigned char*)str.data;
10529 const unsigned char* end = UPB_PTRADD(ptr, str.size);
Joshua Haberman9abf6e22021-01-13 12:16:25 -080010530 char buf[4];
10531
10532 jsonenc_putstr(e, "\"");
10533
10534 while (end - ptr >= 3) {
10535 buf[0] = base64[ptr[0] >> 2];
10536 buf[1] = base64[((ptr[0] & 0x3) << 4) | (ptr[1] >> 4)];
10537 buf[2] = base64[((ptr[1] & 0xf) << 2) | (ptr[2] >> 6)];
10538 buf[3] = base64[ptr[2] & 0x3f];
10539 jsonenc_putbytes(e, buf, 4);
10540 ptr += 3;
10541 }
10542
10543 switch (end - ptr) {
10544 case 2:
10545 buf[0] = base64[ptr[0] >> 2];
10546 buf[1] = base64[((ptr[0] & 0x3) << 4) | (ptr[1] >> 4)];
10547 buf[2] = base64[(ptr[1] & 0xf) << 2];
10548 buf[3] = '=';
10549 jsonenc_putbytes(e, buf, 4);
10550 break;
10551 case 1:
10552 buf[0] = base64[ptr[0] >> 2];
10553 buf[1] = base64[((ptr[0] & 0x3) << 4)];
10554 buf[2] = '=';
10555 buf[3] = '=';
10556 jsonenc_putbytes(e, buf, 4);
10557 break;
10558 }
10559
10560 jsonenc_putstr(e, "\"");
10561}
10562
Joshua Habermanf41049a2022-01-21 14:41:25 -080010563static void jsonenc_stringbody(jsonenc* e, upb_StringView str) {
10564 const char* ptr = str.data;
10565 const char* end = UPB_PTRADD(ptr, str.size);
Joshua Haberman9abf6e22021-01-13 12:16:25 -080010566
10567 while (ptr < end) {
10568 switch (*ptr) {
10569 case '\n':
10570 jsonenc_putstr(e, "\\n");
10571 break;
10572 case '\r':
10573 jsonenc_putstr(e, "\\r");
10574 break;
10575 case '\t':
10576 jsonenc_putstr(e, "\\t");
10577 break;
10578 case '\"':
10579 jsonenc_putstr(e, "\\\"");
10580 break;
10581 case '\f':
10582 jsonenc_putstr(e, "\\f");
10583 break;
10584 case '\b':
10585 jsonenc_putstr(e, "\\b");
10586 break;
10587 case '\\':
10588 jsonenc_putstr(e, "\\\\");
10589 break;
10590 default:
10591 if ((uint8_t)*ptr < 0x20) {
10592 jsonenc_printf(e, "\\u%04x", (int)(uint8_t)*ptr);
10593 } else {
10594 /* This could be a non-ASCII byte. We rely on the string being valid
10595 * UTF-8. */
10596 jsonenc_putbytes(e, ptr, 1);
10597 }
10598 break;
10599 }
10600 ptr++;
10601 }
10602}
10603
Joshua Habermanf41049a2022-01-21 14:41:25 -080010604static void jsonenc_string(jsonenc* e, upb_StringView str) {
Joshua Haberman9abf6e22021-01-13 12:16:25 -080010605 jsonenc_putstr(e, "\"");
10606 jsonenc_stringbody(e, str);
10607 jsonenc_putstr(e, "\"");
10608}
10609
Joshua Habermanf41049a2022-01-21 14:41:25 -080010610static bool upb_JsonEncode_HandleSpecialDoubles(jsonenc* e, double val) {
Joshua Haberman9abf6e22021-01-13 12:16:25 -080010611 if (val == INFINITY) {
10612 jsonenc_putstr(e, "\"Infinity\"");
10613 } else if (val == -INFINITY) {
10614 jsonenc_putstr(e, "\"-Infinity\"");
10615 } else if (val != val) {
10616 jsonenc_putstr(e, "\"NaN\"");
10617 } else {
Joshua Habermanf41049a2022-01-21 14:41:25 -080010618 return false;
Joshua Haberman9abf6e22021-01-13 12:16:25 -080010619 }
Joshua Habermanf41049a2022-01-21 14:41:25 -080010620 return true;
Joshua Haberman9abf6e22021-01-13 12:16:25 -080010621}
10622
Joshua Habermanf41049a2022-01-21 14:41:25 -080010623static void upb_JsonEncode_Double(jsonenc* e, double val) {
10624 if (upb_JsonEncode_HandleSpecialDoubles(e, val)) return;
10625 char buf[32];
10626 _upb_EncodeRoundTripDouble(val, buf, sizeof(buf));
10627 jsonenc_putstr(e, buf);
10628}
10629
10630static void upb_JsonEncode_Float(jsonenc* e, float val) {
10631 if (upb_JsonEncode_HandleSpecialDoubles(e, val)) return;
10632 char buf[32];
10633 _upb_EncodeRoundTripFloat(val, buf, sizeof(buf));
10634 jsonenc_putstr(e, buf);
10635}
10636
10637static void jsonenc_wrapper(jsonenc* e, const upb_Message* msg,
10638 const upb_MessageDef* m) {
Joshua Haberman7ecf43f2022-03-14 13:11:29 -070010639 const upb_FieldDef* val_f = upb_MessageDef_FindFieldByNumber(m, 1);
Joshua Habermanf41049a2022-01-21 14:41:25 -080010640 upb_MessageValue val = upb_Message_Get(msg, val_f);
Joshua Haberman9abf6e22021-01-13 12:16:25 -080010641 jsonenc_scalar(e, val, val_f);
10642}
10643
Joshua Habermanf41049a2022-01-21 14:41:25 -080010644static const upb_MessageDef* jsonenc_getanymsg(jsonenc* e,
10645 upb_StringView type_url) {
Joshua Haberman9abf6e22021-01-13 12:16:25 -080010646 /* Find last '/', if any. */
Joshua Habermanf41049a2022-01-21 14:41:25 -080010647 const char* end = type_url.data + type_url.size;
10648 const char* ptr = end;
10649 const upb_MessageDef* ret;
Joshua Haberman9abf6e22021-01-13 12:16:25 -080010650
10651 if (!e->ext_pool) {
10652 jsonenc_err(e, "Tried to encode Any, but no symtab was provided");
10653 }
10654
10655 if (type_url.size == 0) goto badurl;
10656
10657 while (true) {
10658 if (--ptr == type_url.data) {
10659 /* Type URL must contain at least one '/', with host before. */
10660 goto badurl;
10661 }
10662 if (*ptr == '/') {
10663 ptr++;
10664 break;
10665 }
10666 }
10667
Joshua Habermanf41049a2022-01-21 14:41:25 -080010668 ret = upb_DefPool_FindMessageByNameWithSize(e->ext_pool, ptr, end - ptr);
Joshua Haberman9abf6e22021-01-13 12:16:25 -080010669
10670 if (!ret) {
10671 jsonenc_errf(e, "Couldn't find Any type: %.*s", (int)(end - ptr), ptr);
10672 }
10673
10674 return ret;
10675
10676badurl:
Joshua Habermanf41049a2022-01-21 14:41:25 -080010677 jsonenc_errf(e, "Bad type URL: " UPB_STRINGVIEW_FORMAT,
10678 UPB_STRINGVIEW_ARGS(type_url));
Joshua Haberman9abf6e22021-01-13 12:16:25 -080010679}
10680
Joshua Habermanf41049a2022-01-21 14:41:25 -080010681static void jsonenc_any(jsonenc* e, const upb_Message* msg,
10682 const upb_MessageDef* m) {
Joshua Haberman7ecf43f2022-03-14 13:11:29 -070010683 const upb_FieldDef* type_url_f = upb_MessageDef_FindFieldByNumber(m, 1);
10684 const upb_FieldDef* value_f = upb_MessageDef_FindFieldByNumber(m, 2);
Joshua Habermanf41049a2022-01-21 14:41:25 -080010685 upb_StringView type_url = upb_Message_Get(msg, type_url_f).str_val;
10686 upb_StringView value = upb_Message_Get(msg, value_f).str_val;
10687 const upb_MessageDef* any_m = jsonenc_getanymsg(e, type_url);
10688 const upb_MiniTable* any_layout = upb_MessageDef_MiniTable(any_m);
10689 upb_Arena* arena = jsonenc_arena(e);
10690 upb_Message* any = upb_Message_New(any_m, arena);
Joshua Haberman9abf6e22021-01-13 12:16:25 -080010691
Joshua Habermanf41049a2022-01-21 14:41:25 -080010692 if (upb_Decode(value.data, value.size, any, any_layout, NULL, 0, arena) !=
10693 kUpb_DecodeStatus_Ok) {
Joshua Haberman9abf6e22021-01-13 12:16:25 -080010694 jsonenc_err(e, "Error decoding message in Any");
10695 }
10696
10697 jsonenc_putstr(e, "{\"@type\":");
10698 jsonenc_string(e, type_url);
Joshua Haberman9abf6e22021-01-13 12:16:25 -080010699
Joshua Habermanf41049a2022-01-21 14:41:25 -080010700 if (upb_MessageDef_WellKnownType(any_m) == kUpb_WellKnown_Unspecified) {
Joshua Haberman9abf6e22021-01-13 12:16:25 -080010701 /* Regular messages: {"@type": "...","foo": 1, "bar": 2} */
Joshua Habermandd69a482021-05-17 22:40:33 -070010702 jsonenc_msgfields(e, any, any_m, false);
Joshua Haberman9abf6e22021-01-13 12:16:25 -080010703 } else {
10704 /* Well-known type: {"@type": "...","value": <well-known encoding>} */
Joshua Habermandd69a482021-05-17 22:40:33 -070010705 jsonenc_putstr(e, ",\"value\":");
Joshua Haberman9abf6e22021-01-13 12:16:25 -080010706 jsonenc_msgfield(e, any, any_m);
10707 }
10708
10709 jsonenc_putstr(e, "}");
10710}
10711
Joshua Habermanf41049a2022-01-21 14:41:25 -080010712static void jsonenc_putsep(jsonenc* e, const char* str, bool* first) {
Joshua Haberman9abf6e22021-01-13 12:16:25 -080010713 if (*first) {
10714 *first = false;
10715 } else {
10716 jsonenc_putstr(e, str);
10717 }
10718}
10719
Joshua Habermanf41049a2022-01-21 14:41:25 -080010720static void jsonenc_fieldpath(jsonenc* e, upb_StringView path) {
10721 const char* ptr = path.data;
10722 const char* end = ptr + path.size;
Joshua Haberman9abf6e22021-01-13 12:16:25 -080010723
10724 while (ptr < end) {
10725 char ch = *ptr;
10726
10727 if (ch >= 'A' && ch <= 'Z') {
10728 jsonenc_err(e, "Field mask element may not have upper-case letter.");
10729 } else if (ch == '_') {
10730 if (ptr == end - 1 || *(ptr + 1) < 'a' || *(ptr + 1) > 'z') {
10731 jsonenc_err(e, "Underscore must be followed by a lowercase letter.");
10732 }
10733 ch = *++ptr - 32;
10734 }
10735
10736 jsonenc_putbytes(e, &ch, 1);
10737 ptr++;
10738 }
10739}
10740
Joshua Habermanf41049a2022-01-21 14:41:25 -080010741static void jsonenc_fieldmask(jsonenc* e, const upb_Message* msg,
10742 const upb_MessageDef* m) {
Joshua Haberman7ecf43f2022-03-14 13:11:29 -070010743 const upb_FieldDef* paths_f = upb_MessageDef_FindFieldByNumber(m, 1);
Joshua Habermanf41049a2022-01-21 14:41:25 -080010744 const upb_Array* paths = upb_Message_Get(msg, paths_f).array_val;
Joshua Haberman9abf6e22021-01-13 12:16:25 -080010745 bool first = true;
10746 size_t i, n = 0;
10747
Joshua Habermanf41049a2022-01-21 14:41:25 -080010748 if (paths) n = upb_Array_Size(paths);
Joshua Haberman9abf6e22021-01-13 12:16:25 -080010749
10750 jsonenc_putstr(e, "\"");
10751
10752 for (i = 0; i < n; i++) {
10753 jsonenc_putsep(e, ",", &first);
Joshua Habermanf41049a2022-01-21 14:41:25 -080010754 jsonenc_fieldpath(e, upb_Array_Get(paths, i).str_val);
Joshua Haberman9abf6e22021-01-13 12:16:25 -080010755 }
10756
10757 jsonenc_putstr(e, "\"");
10758}
10759
Joshua Habermanf41049a2022-01-21 14:41:25 -080010760static void jsonenc_struct(jsonenc* e, const upb_Message* msg,
10761 const upb_MessageDef* m) {
Joshua Haberman7ecf43f2022-03-14 13:11:29 -070010762 const upb_FieldDef* fields_f = upb_MessageDef_FindFieldByNumber(m, 1);
Joshua Habermanf41049a2022-01-21 14:41:25 -080010763 const upb_Map* fields = upb_Message_Get(msg, fields_f).map_val;
10764 const upb_MessageDef* entry_m = upb_FieldDef_MessageSubDef(fields_f);
Joshua Haberman7ecf43f2022-03-14 13:11:29 -070010765 const upb_FieldDef* value_f = upb_MessageDef_FindFieldByNumber(entry_m, 2);
Joshua Habermanf41049a2022-01-21 14:41:25 -080010766 size_t iter = kUpb_Map_Begin;
Joshua Haberman9abf6e22021-01-13 12:16:25 -080010767 bool first = true;
10768
10769 jsonenc_putstr(e, "{");
10770
10771 if (fields) {
Joshua Habermanf41049a2022-01-21 14:41:25 -080010772 while (upb_MapIterator_Next(fields, &iter)) {
10773 upb_MessageValue key = upb_MapIterator_Key(fields, iter);
10774 upb_MessageValue val = upb_MapIterator_Value(fields, iter);
Joshua Haberman9abf6e22021-01-13 12:16:25 -080010775
10776 jsonenc_putsep(e, ",", &first);
10777 jsonenc_string(e, key.str_val);
10778 jsonenc_putstr(e, ":");
Joshua Habermanf41049a2022-01-21 14:41:25 -080010779 jsonenc_value(e, val.msg_val, upb_FieldDef_MessageSubDef(value_f));
Joshua Haberman9abf6e22021-01-13 12:16:25 -080010780 }
10781 }
10782
10783 jsonenc_putstr(e, "}");
10784}
10785
Joshua Habermanf41049a2022-01-21 14:41:25 -080010786static void jsonenc_listvalue(jsonenc* e, const upb_Message* msg,
10787 const upb_MessageDef* m) {
Joshua Haberman7ecf43f2022-03-14 13:11:29 -070010788 const upb_FieldDef* values_f = upb_MessageDef_FindFieldByNumber(m, 1);
Joshua Habermanf41049a2022-01-21 14:41:25 -080010789 const upb_MessageDef* values_m = upb_FieldDef_MessageSubDef(values_f);
10790 const upb_Array* values = upb_Message_Get(msg, values_f).array_val;
Joshua Haberman9abf6e22021-01-13 12:16:25 -080010791 size_t i;
10792 bool first = true;
10793
10794 jsonenc_putstr(e, "[");
10795
10796 if (values) {
Joshua Habermanf41049a2022-01-21 14:41:25 -080010797 const size_t size = upb_Array_Size(values);
Joshua Haberman9abf6e22021-01-13 12:16:25 -080010798 for (i = 0; i < size; i++) {
Joshua Habermanf41049a2022-01-21 14:41:25 -080010799 upb_MessageValue elem = upb_Array_Get(values, i);
Joshua Haberman9abf6e22021-01-13 12:16:25 -080010800
10801 jsonenc_putsep(e, ",", &first);
10802 jsonenc_value(e, elem.msg_val, values_m);
10803 }
10804 }
10805
10806 jsonenc_putstr(e, "]");
10807}
10808
Joshua Habermanf41049a2022-01-21 14:41:25 -080010809static void jsonenc_value(jsonenc* e, const upb_Message* msg,
10810 const upb_MessageDef* m) {
Joshua Haberman9abf6e22021-01-13 12:16:25 -080010811 /* TODO(haberman): do we want a reflection method to get oneof case? */
Joshua Habermanf41049a2022-01-21 14:41:25 -080010812 size_t iter = kUpb_Message_Begin;
10813 const upb_FieldDef* f;
10814 upb_MessageValue val;
Joshua Haberman9abf6e22021-01-13 12:16:25 -080010815
Joshua Habermanf41049a2022-01-21 14:41:25 -080010816 if (!upb_Message_Next(msg, m, NULL, &f, &val, &iter)) {
Joshua Haberman9abf6e22021-01-13 12:16:25 -080010817 jsonenc_err(e, "No value set in Value proto");
10818 }
10819
Joshua Habermanf41049a2022-01-21 14:41:25 -080010820 switch (upb_FieldDef_Number(f)) {
Joshua Haberman9abf6e22021-01-13 12:16:25 -080010821 case 1:
10822 jsonenc_putstr(e, "null");
10823 break;
10824 case 2:
Joshua Habermanf41049a2022-01-21 14:41:25 -080010825 upb_JsonEncode_Double(e, val.double_val);
Joshua Haberman9abf6e22021-01-13 12:16:25 -080010826 break;
10827 case 3:
10828 jsonenc_string(e, val.str_val);
10829 break;
10830 case 4:
10831 jsonenc_putstr(e, val.bool_val ? "true" : "false");
10832 break;
10833 case 5:
Joshua Habermanf41049a2022-01-21 14:41:25 -080010834 jsonenc_struct(e, val.msg_val, upb_FieldDef_MessageSubDef(f));
Joshua Haberman9abf6e22021-01-13 12:16:25 -080010835 break;
10836 case 6:
Joshua Habermanf41049a2022-01-21 14:41:25 -080010837 jsonenc_listvalue(e, val.msg_val, upb_FieldDef_MessageSubDef(f));
Joshua Haberman9abf6e22021-01-13 12:16:25 -080010838 break;
10839 }
10840}
10841
Joshua Habermanf41049a2022-01-21 14:41:25 -080010842static void jsonenc_msgfield(jsonenc* e, const upb_Message* msg,
10843 const upb_MessageDef* m) {
10844 switch (upb_MessageDef_WellKnownType(m)) {
10845 case kUpb_WellKnown_Unspecified:
Joshua Haberman9abf6e22021-01-13 12:16:25 -080010846 jsonenc_msg(e, msg, m);
10847 break;
Joshua Habermanf41049a2022-01-21 14:41:25 -080010848 case kUpb_WellKnown_Any:
Joshua Haberman9abf6e22021-01-13 12:16:25 -080010849 jsonenc_any(e, msg, m);
10850 break;
Joshua Habermanf41049a2022-01-21 14:41:25 -080010851 case kUpb_WellKnown_FieldMask:
Joshua Haberman9abf6e22021-01-13 12:16:25 -080010852 jsonenc_fieldmask(e, msg, m);
10853 break;
Joshua Habermanf41049a2022-01-21 14:41:25 -080010854 case kUpb_WellKnown_Duration:
Joshua Haberman9abf6e22021-01-13 12:16:25 -080010855 jsonenc_duration(e, msg, m);
10856 break;
Joshua Habermanf41049a2022-01-21 14:41:25 -080010857 case kUpb_WellKnown_Timestamp:
Joshua Haberman9abf6e22021-01-13 12:16:25 -080010858 jsonenc_timestamp(e, msg, m);
10859 break;
Joshua Habermanf41049a2022-01-21 14:41:25 -080010860 case kUpb_WellKnown_DoubleValue:
10861 case kUpb_WellKnown_FloatValue:
10862 case kUpb_WellKnown_Int64Value:
10863 case kUpb_WellKnown_UInt64Value:
10864 case kUpb_WellKnown_Int32Value:
10865 case kUpb_WellKnown_UInt32Value:
10866 case kUpb_WellKnown_StringValue:
10867 case kUpb_WellKnown_BytesValue:
10868 case kUpb_WellKnown_BoolValue:
Joshua Haberman9abf6e22021-01-13 12:16:25 -080010869 jsonenc_wrapper(e, msg, m);
10870 break;
Joshua Habermanf41049a2022-01-21 14:41:25 -080010871 case kUpb_WellKnown_Value:
Joshua Haberman9abf6e22021-01-13 12:16:25 -080010872 jsonenc_value(e, msg, m);
10873 break;
Joshua Habermanf41049a2022-01-21 14:41:25 -080010874 case kUpb_WellKnown_ListValue:
Joshua Haberman9abf6e22021-01-13 12:16:25 -080010875 jsonenc_listvalue(e, msg, m);
10876 break;
Joshua Habermanf41049a2022-01-21 14:41:25 -080010877 case kUpb_WellKnown_Struct:
Joshua Haberman9abf6e22021-01-13 12:16:25 -080010878 jsonenc_struct(e, msg, m);
10879 break;
10880 }
10881}
10882
Joshua Habermanf41049a2022-01-21 14:41:25 -080010883static void jsonenc_scalar(jsonenc* e, upb_MessageValue val,
10884 const upb_FieldDef* f) {
10885 switch (upb_FieldDef_CType(f)) {
10886 case kUpb_CType_Bool:
Joshua Haberman9abf6e22021-01-13 12:16:25 -080010887 jsonenc_putstr(e, val.bool_val ? "true" : "false");
10888 break;
Joshua Habermanf41049a2022-01-21 14:41:25 -080010889 case kUpb_CType_Float:
10890 upb_JsonEncode_Float(e, val.float_val);
Joshua Haberman9abf6e22021-01-13 12:16:25 -080010891 break;
Joshua Habermanf41049a2022-01-21 14:41:25 -080010892 case kUpb_CType_Double:
10893 upb_JsonEncode_Double(e, val.double_val);
Joshua Haberman9abf6e22021-01-13 12:16:25 -080010894 break;
Joshua Habermanf41049a2022-01-21 14:41:25 -080010895 case kUpb_CType_Int32:
Joshua Haberman9abf6e22021-01-13 12:16:25 -080010896 jsonenc_printf(e, "%" PRId32, val.int32_val);
10897 break;
Joshua Habermanf41049a2022-01-21 14:41:25 -080010898 case kUpb_CType_UInt32:
Joshua Haberman9abf6e22021-01-13 12:16:25 -080010899 jsonenc_printf(e, "%" PRIu32, val.uint32_val);
10900 break;
Joshua Habermanf41049a2022-01-21 14:41:25 -080010901 case kUpb_CType_Int64:
Joshua Haberman9abf6e22021-01-13 12:16:25 -080010902 jsonenc_printf(e, "\"%" PRId64 "\"", val.int64_val);
10903 break;
Joshua Habermanf41049a2022-01-21 14:41:25 -080010904 case kUpb_CType_UInt64:
Joshua Haberman9abf6e22021-01-13 12:16:25 -080010905 jsonenc_printf(e, "\"%" PRIu64 "\"", val.uint64_val);
10906 break;
Joshua Habermanf41049a2022-01-21 14:41:25 -080010907 case kUpb_CType_String:
Joshua Haberman9abf6e22021-01-13 12:16:25 -080010908 jsonenc_string(e, val.str_val);
10909 break;
Joshua Habermanf41049a2022-01-21 14:41:25 -080010910 case kUpb_CType_Bytes:
Joshua Haberman9abf6e22021-01-13 12:16:25 -080010911 jsonenc_bytes(e, val.str_val);
10912 break;
Joshua Habermanf41049a2022-01-21 14:41:25 -080010913 case kUpb_CType_Enum:
Joshua Haberman9abf6e22021-01-13 12:16:25 -080010914 jsonenc_enum(val.int32_val, f, e);
10915 break;
Joshua Habermanf41049a2022-01-21 14:41:25 -080010916 case kUpb_CType_Message:
10917 jsonenc_msgfield(e, val.msg_val, upb_FieldDef_MessageSubDef(f));
Joshua Haberman9abf6e22021-01-13 12:16:25 -080010918 break;
10919 }
10920}
10921
Joshua Habermanf41049a2022-01-21 14:41:25 -080010922static void jsonenc_mapkey(jsonenc* e, upb_MessageValue val,
10923 const upb_FieldDef* f) {
Joshua Haberman9abf6e22021-01-13 12:16:25 -080010924 jsonenc_putstr(e, "\"");
10925
Joshua Habermanf41049a2022-01-21 14:41:25 -080010926 switch (upb_FieldDef_CType(f)) {
10927 case kUpb_CType_Bool:
Joshua Haberman9abf6e22021-01-13 12:16:25 -080010928 jsonenc_putstr(e, val.bool_val ? "true" : "false");
10929 break;
Joshua Habermanf41049a2022-01-21 14:41:25 -080010930 case kUpb_CType_Int32:
Joshua Haberman9abf6e22021-01-13 12:16:25 -080010931 jsonenc_printf(e, "%" PRId32, val.int32_val);
10932 break;
Joshua Habermanf41049a2022-01-21 14:41:25 -080010933 case kUpb_CType_UInt32:
Joshua Haberman9abf6e22021-01-13 12:16:25 -080010934 jsonenc_printf(e, "%" PRIu32, val.uint32_val);
10935 break;
Joshua Habermanf41049a2022-01-21 14:41:25 -080010936 case kUpb_CType_Int64:
Joshua Haberman9abf6e22021-01-13 12:16:25 -080010937 jsonenc_printf(e, "%" PRId64, val.int64_val);
10938 break;
Joshua Habermanf41049a2022-01-21 14:41:25 -080010939 case kUpb_CType_UInt64:
Joshua Haberman9abf6e22021-01-13 12:16:25 -080010940 jsonenc_printf(e, "%" PRIu64, val.uint64_val);
10941 break;
Joshua Habermanf41049a2022-01-21 14:41:25 -080010942 case kUpb_CType_String:
Joshua Haberman9abf6e22021-01-13 12:16:25 -080010943 jsonenc_stringbody(e, val.str_val);
10944 break;
10945 default:
10946 UPB_UNREACHABLE();
10947 }
10948
10949 jsonenc_putstr(e, "\":");
10950}
10951
Joshua Habermanf41049a2022-01-21 14:41:25 -080010952static void jsonenc_array(jsonenc* e, const upb_Array* arr,
10953 const upb_FieldDef* f) {
Joshua Haberman9abf6e22021-01-13 12:16:25 -080010954 size_t i;
Joshua Habermanf41049a2022-01-21 14:41:25 -080010955 size_t size = arr ? upb_Array_Size(arr) : 0;
Joshua Haberman9abf6e22021-01-13 12:16:25 -080010956 bool first = true;
10957
10958 jsonenc_putstr(e, "[");
10959
10960 for (i = 0; i < size; i++) {
10961 jsonenc_putsep(e, ",", &first);
Joshua Habermanf41049a2022-01-21 14:41:25 -080010962 jsonenc_scalar(e, upb_Array_Get(arr, i), f);
Joshua Haberman9abf6e22021-01-13 12:16:25 -080010963 }
10964
10965 jsonenc_putstr(e, "]");
10966}
10967
Joshua Habermanf41049a2022-01-21 14:41:25 -080010968static void jsonenc_map(jsonenc* e, const upb_Map* map, const upb_FieldDef* f) {
10969 const upb_MessageDef* entry = upb_FieldDef_MessageSubDef(f);
Joshua Haberman7ecf43f2022-03-14 13:11:29 -070010970 const upb_FieldDef* key_f = upb_MessageDef_FindFieldByNumber(entry, 1);
10971 const upb_FieldDef* val_f = upb_MessageDef_FindFieldByNumber(entry, 2);
Joshua Habermanf41049a2022-01-21 14:41:25 -080010972 size_t iter = kUpb_Map_Begin;
Joshua Haberman9abf6e22021-01-13 12:16:25 -080010973 bool first = true;
10974
10975 jsonenc_putstr(e, "{");
10976
10977 if (map) {
Joshua Habermanf41049a2022-01-21 14:41:25 -080010978 while (upb_MapIterator_Next(map, &iter)) {
Joshua Haberman9abf6e22021-01-13 12:16:25 -080010979 jsonenc_putsep(e, ",", &first);
Joshua Habermanf41049a2022-01-21 14:41:25 -080010980 jsonenc_mapkey(e, upb_MapIterator_Key(map, iter), key_f);
10981 jsonenc_scalar(e, upb_MapIterator_Value(map, iter), val_f);
Joshua Haberman9abf6e22021-01-13 12:16:25 -080010982 }
10983 }
10984
10985 jsonenc_putstr(e, "}");
10986}
10987
Joshua Habermanf41049a2022-01-21 14:41:25 -080010988static void jsonenc_fieldval(jsonenc* e, const upb_FieldDef* f,
10989 upb_MessageValue val, bool* first) {
10990 const char* name;
Joshua Haberman9abf6e22021-01-13 12:16:25 -080010991
10992 jsonenc_putsep(e, ",", first);
Joshua Haberman9abf6e22021-01-13 12:16:25 -080010993
Joshua Habermanf41049a2022-01-21 14:41:25 -080010994 if (upb_FieldDef_IsExtension(f)) {
10995 // TODO: For MessageSet, I would have expected this to print the message
10996 // name here, but Python doesn't appear to do this. We should do more
10997 // research here about what various implementations do.
10998 jsonenc_printf(e, "\"[%s]\":", upb_FieldDef_FullName(f));
10999 } else {
11000 if (e->options & upb_JsonEncode_UseProtoNames) {
11001 name = upb_FieldDef_Name(f);
11002 } else {
11003 name = upb_FieldDef_JsonName(f);
11004 }
11005 jsonenc_printf(e, "\"%s\":", name);
11006 }
11007
11008 if (upb_FieldDef_IsMap(f)) {
Joshua Haberman9abf6e22021-01-13 12:16:25 -080011009 jsonenc_map(e, val.map_val, f);
Joshua Habermanf41049a2022-01-21 14:41:25 -080011010 } else if (upb_FieldDef_IsRepeated(f)) {
Joshua Haberman9abf6e22021-01-13 12:16:25 -080011011 jsonenc_array(e, val.array_val, f);
11012 } else {
11013 jsonenc_scalar(e, val, f);
11014 }
11015}
11016
Joshua Habermanf41049a2022-01-21 14:41:25 -080011017static void jsonenc_msgfields(jsonenc* e, const upb_Message* msg,
11018 const upb_MessageDef* m, bool first) {
11019 upb_MessageValue val;
11020 const upb_FieldDef* f;
Joshua Haberman9abf6e22021-01-13 12:16:25 -080011021
Joshua Habermanf41049a2022-01-21 14:41:25 -080011022 if (e->options & upb_JsonEncode_EmitDefaults) {
Joshua Haberman9abf6e22021-01-13 12:16:25 -080011023 /* Iterate over all fields. */
11024 int i = 0;
Joshua Habermanf41049a2022-01-21 14:41:25 -080011025 int n = upb_MessageDef_FieldCount(m);
Joshua Haberman9abf6e22021-01-13 12:16:25 -080011026 for (i = 0; i < n; i++) {
Joshua Habermanf41049a2022-01-21 14:41:25 -080011027 f = upb_MessageDef_Field(m, i);
11028 if (!upb_FieldDef_HasPresence(f) || upb_Message_Has(msg, f)) {
11029 jsonenc_fieldval(e, f, upb_Message_Get(msg, f), &first);
Joshua Haberman9abf6e22021-01-13 12:16:25 -080011030 }
11031 }
11032 } else {
11033 /* Iterate over non-empty fields. */
Joshua Habermanf41049a2022-01-21 14:41:25 -080011034 size_t iter = kUpb_Message_Begin;
11035 while (upb_Message_Next(msg, m, e->ext_pool, &f, &val, &iter)) {
Joshua Haberman9abf6e22021-01-13 12:16:25 -080011036 jsonenc_fieldval(e, f, val, &first);
11037 }
11038 }
11039}
11040
Joshua Habermanf41049a2022-01-21 14:41:25 -080011041static void jsonenc_msg(jsonenc* e, const upb_Message* msg,
11042 const upb_MessageDef* m) {
Joshua Haberman9abf6e22021-01-13 12:16:25 -080011043 jsonenc_putstr(e, "{");
Joshua Habermandd69a482021-05-17 22:40:33 -070011044 jsonenc_msgfields(e, msg, m, true);
Joshua Haberman9abf6e22021-01-13 12:16:25 -080011045 jsonenc_putstr(e, "}");
11046}
11047
Joshua Habermanf41049a2022-01-21 14:41:25 -080011048static size_t jsonenc_nullz(jsonenc* e, size_t size) {
Joshua Haberman9abf6e22021-01-13 12:16:25 -080011049 size_t ret = e->ptr - e->buf + e->overflow;
11050
11051 if (size > 0) {
11052 if (e->ptr == e->end) e->ptr--;
11053 *e->ptr = '\0';
11054 }
11055
11056 return ret;
11057}
11058
Joshua Habermanf41049a2022-01-21 14:41:25 -080011059size_t upb_JsonEncode(const upb_Message* msg, const upb_MessageDef* m,
11060 const upb_DefPool* ext_pool, int options, char* buf,
11061 size_t size, upb_Status* status) {
Joshua Haberman9abf6e22021-01-13 12:16:25 -080011062 jsonenc e;
11063
11064 e.buf = buf;
11065 e.ptr = buf;
Joshua Habermandd69a482021-05-17 22:40:33 -070011066 e.end = UPB_PTRADD(buf, size);
Joshua Haberman9abf6e22021-01-13 12:16:25 -080011067 e.overflow = 0;
11068 e.options = options;
11069 e.ext_pool = ext_pool;
11070 e.status = status;
11071 e.arena = NULL;
11072
11073 if (setjmp(e.err)) return -1;
11074
11075 jsonenc_msgfield(&e, msg, m);
Joshua Habermanf41049a2022-01-21 14:41:25 -080011076 if (e.arena) upb_Arena_Free(e.arena);
Joshua Haberman9abf6e22021-01-13 12:16:25 -080011077 return jsonenc_nullz(&e, size);
11078}
Joshua Habermandd69a482021-05-17 22:40:33 -070011079
11080/** upb/port_undef.inc ************************************************************/
Joshua Haberman9abf6e22021-01-13 12:16:25 -080011081/* See port_def.inc. This should #undef all macros #defined there. */
11082
Joshua Haberman9abf6e22021-01-13 12:16:25 -080011083#undef UPB_SIZE
11084#undef UPB_PTR_AT
11085#undef UPB_READ_ONEOF
11086#undef UPB_WRITE_ONEOF
Joshua Habermandd69a482021-05-17 22:40:33 -070011087#undef UPB_MAPTYPE_STRING
Joshua Haberman9abf6e22021-01-13 12:16:25 -080011088#undef UPB_INLINE
11089#undef UPB_ALIGN_UP
11090#undef UPB_ALIGN_DOWN
11091#undef UPB_ALIGN_MALLOC
11092#undef UPB_ALIGN_OF
Joshua Habermandd69a482021-05-17 22:40:33 -070011093#undef UPB_LIKELY
11094#undef UPB_UNLIKELY
Joshua Haberman9abf6e22021-01-13 12:16:25 -080011095#undef UPB_FORCEINLINE
11096#undef UPB_NOINLINE
11097#undef UPB_NORETURN
Joshua Habermandd69a482021-05-17 22:40:33 -070011098#undef UPB_PRINTF
Joshua Haberman9abf6e22021-01-13 12:16:25 -080011099#undef UPB_MAX
11100#undef UPB_MIN
11101#undef UPB_UNUSED
11102#undef UPB_ASSUME
11103#undef UPB_ASSERT
11104#undef UPB_UNREACHABLE
Joshua Habermandd69a482021-05-17 22:40:33 -070011105#undef UPB_SETJMP
11106#undef UPB_LONGJMP
11107#undef UPB_PTRADD
11108#undef UPB_MUSTTAIL
11109#undef UPB_FASTTABLE_SUPPORTED
11110#undef UPB_FASTTABLE
11111#undef UPB_FASTTABLE_INIT
Joshua Haberman9abf6e22021-01-13 12:16:25 -080011112#undef UPB_POISON_MEMORY_REGION
11113#undef UPB_UNPOISON_MEMORY_REGION
11114#undef UPB_ASAN
Joshua Haberman7ecf43f2022-03-14 13:11:29 -070011115#undef UPB_TREAT_PROTO2_ENUMS_LIKE_PROTO3