| /* |
| * Copyright (c) 2009-2021, Google LLC |
| * All rights reserved. |
| * |
| * Redistribution and use in source and binary forms, with or without |
| * modification, are permitted provided that the following conditions are met: |
| * * Redistributions of source code must retain the above copyright |
| * notice, this list of conditions and the following disclaimer. |
| * * Redistributions in binary form must reproduce the above copyright |
| * notice, this list of conditions and the following disclaimer in the |
| * documentation and/or other materials provided with the distribution. |
| * * Neither the name of Google LLC nor the |
| * names of its contributors may be used to endorse or promote products |
| * derived from this software without specific prior written permission. |
| * |
| * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" |
| * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
| * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
| * ARE DISCLAIMED. IN NO EVENT SHALL Google LLC BE LIABLE FOR ANY DIRECT, |
| * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES |
| * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; |
| * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND |
| * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
| * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS |
| * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
| */ |
| |
| #include "upb/mini_table.h" |
| |
| #include <inttypes.h> |
| #include <setjmp.h> |
| |
| #include "upb/msg_internal.h" |
| #include "upb/upb.h" |
| |
| // Must be last. |
| #include "upb/port_def.inc" |
| |
| typedef enum { |
| kUpb_EncodedType_Double = 0, |
| kUpb_EncodedType_Float = 1, |
| kUpb_EncodedType_Fixed32 = 2, |
| kUpb_EncodedType_Fixed64 = 3, |
| kUpb_EncodedType_SFixed32 = 4, |
| kUpb_EncodedType_SFixed64 = 5, |
| kUpb_EncodedType_Int32 = 6, |
| kUpb_EncodedType_UInt32 = 7, |
| kUpb_EncodedType_SInt32 = 8, |
| kUpb_EncodedType_Int64 = 9, |
| kUpb_EncodedType_UInt64 = 10, |
| kUpb_EncodedType_SInt64 = 11, |
| kUpb_EncodedType_Enum = 12, |
| kUpb_EncodedType_Bool = 13, |
| kUpb_EncodedType_Bytes = 14, |
| kUpb_EncodedType_String = 15, |
| kUpb_EncodedType_Group = 16, |
| kUpb_EncodedType_Message = 17, |
| |
| kUpb_EncodedType_RepeatedBase = 20, |
| } upb_EncodedType; |
| |
| typedef enum { |
| kUpb_EncodedFieldModifier_IsUnpacked = 1 << 0, |
| kUpb_EncodedFieldModifier_JspbString = 1 << 1, |
| // upb only. |
| kUpb_EncodedFieldModifier_IsProto3Singular = 1 << 2, |
| kUpb_EncodedFieldModifier_IsRequired = 1 << 3, |
| } upb_EncodedFieldModifier; |
| |
| enum { |
| kUpb_EncodedValue_MinField = ' ', |
| kUpb_EncodedValue_MaxField = 'K', |
| kUpb_EncodedValue_MinModifier = 'L', |
| kUpb_EncodedValue_MaxModifier = '[', |
| kUpb_EncodedValue_End = '^', |
| kUpb_EncodedValue_MinSkip = '_', |
| kUpb_EncodedValue_MaxSkip = '~', |
| kUpb_EncodedValue_OneofSeparator = '~', |
| kUpb_EncodedValue_FieldSeparator = '|', |
| kUpb_EncodedValue_MinOneofField = ' ', |
| kUpb_EncodedValue_MaxOneofField = 'b', |
| }; |
| |
| char upb_ToBase92(int8_t ch) { |
| static const char kUpb_ToBase92[] = { |
| ' ', '!', '#', '$', '%', '&', '(', ')', '*', '+', ',', '-', '.', '/', |
| '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', ':', ';', '<', '=', |
| '>', '?', '@', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', |
| 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', |
| 'Z', '[', ']', '^', '_', '`', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', |
| 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', |
| 'w', 'x', 'y', 'z', '{', '|', '}', '~', |
| }; |
| |
| UPB_ASSERT(0 <= ch && ch < 92); |
| return kUpb_ToBase92[ch]; |
| } |
| |
| char upb_FromBase92(uint8_t ch) { |
| static const int8_t kUpb_FromBase92[] = { |
| 0, 1, -1, 2, 3, 4, 5, -1, 6, 7, 8, 9, 10, 11, 12, 13, |
| 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, |
| 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, |
| 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, -1, 58, 59, 60, |
| 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, |
| 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, |
| }; |
| |
| if (' ' > ch || ch > '~') return -1; |
| return kUpb_FromBase92[ch - ' ']; |
| } |
| |
| /** upb_MtDataEncoder *********************************************************/ |
| |
| typedef struct { |
| char* buf_start; // Only for checking kUpb_MtDataEncoder_MinSize. |
| uint64_t msg_mod; |
| uint32_t last_field_num; |
| enum { |
| kUpb_OneofState_NotStarted, |
| kUpb_OneofState_StartedOneof, |
| kUpb_OneofState_EmittedOneofField, |
| } oneof_state; |
| } upb_MtDataEncoderInternal; |
| |
| static upb_MtDataEncoderInternal* upb_MtDataEncoder_GetInternal( |
| upb_MtDataEncoder* e, char* buf_start) { |
| upb_MtDataEncoderInternal* ret = (upb_MtDataEncoderInternal*)e->internal; |
| ret->buf_start = buf_start; |
| return ret; |
| } |
| |
| static char* upb_MtDataEncoder_Put(upb_MtDataEncoder* e, char* ptr, char ch) { |
| upb_MtDataEncoderInternal* in = (upb_MtDataEncoderInternal*)e->internal; |
| UPB_ASSERT(ptr - in->buf_start < kUpb_MtDataEncoder_MinSize); |
| if (ptr == e->end) return NULL; |
| *ptr++ = upb_ToBase92(ch); |
| return ptr; |
| } |
| |
| static char* upb_MtDataEncoder_PutBase92Varint(upb_MtDataEncoder* e, char* ptr, |
| uint32_t val, int min, int max) { |
| int shift = _upb_Log2Ceiling(upb_FromBase92(max) - upb_FromBase92(min) + 1); |
| UPB_ASSERT(shift <= 6); |
| uint32_t mask = (1 << shift) - 1; |
| do { |
| uint32_t bits = val & mask; |
| ptr = upb_MtDataEncoder_Put(e, ptr, bits + upb_FromBase92(min)); |
| if (!ptr) return NULL; |
| val >>= shift; |
| } while (val); |
| return ptr; |
| } |
| |
| char* upb_MtDataEncoder_StartMessage(upb_MtDataEncoder* e, char* ptr, |
| uint64_t msg_mod) { |
| upb_MtDataEncoderInternal* in = upb_MtDataEncoder_GetInternal(e, ptr); |
| in->msg_mod = msg_mod; |
| in->last_field_num = 0; |
| in->oneof_state = kUpb_OneofState_NotStarted; |
| return ptr; |
| } |
| |
| char* upb_MtDataEncoder_PutField(upb_MtDataEncoder* e, char* ptr, |
| upb_FieldType type, uint32_t field_num, |
| uint64_t field_mod) { |
| static const char kUpb_TypeToEncoded[] = { |
| [kUpb_FieldType_Double] = kUpb_EncodedType_Double, |
| [kUpb_FieldType_Float] = kUpb_EncodedType_Float, |
| [kUpb_FieldType_Int64] = kUpb_EncodedType_Int64, |
| [kUpb_FieldType_UInt64] = kUpb_EncodedType_UInt64, |
| [kUpb_FieldType_Int32] = kUpb_EncodedType_Int32, |
| [kUpb_FieldType_Fixed64] = kUpb_EncodedType_Fixed64, |
| [kUpb_FieldType_Fixed32] = kUpb_EncodedType_Fixed32, |
| [kUpb_FieldType_Bool] = kUpb_EncodedType_Bool, |
| [kUpb_FieldType_String] = kUpb_EncodedType_String, |
| [kUpb_FieldType_Group] = kUpb_EncodedType_Group, |
| [kUpb_FieldType_Message] = kUpb_EncodedType_Message, |
| [kUpb_FieldType_Bytes] = kUpb_EncodedType_Bytes, |
| [kUpb_FieldType_UInt32] = kUpb_EncodedType_UInt32, |
| [kUpb_FieldType_Enum] = kUpb_EncodedType_Enum, |
| [kUpb_FieldType_SFixed32] = kUpb_EncodedType_SFixed32, |
| [kUpb_FieldType_SFixed64] = kUpb_EncodedType_SFixed64, |
| [kUpb_FieldType_SInt32] = kUpb_EncodedType_SInt32, |
| [kUpb_FieldType_SInt64] = kUpb_EncodedType_SInt64, |
| }; |
| |
| upb_MtDataEncoderInternal* in = upb_MtDataEncoder_GetInternal(e, ptr); |
| if (field_num <= in->last_field_num) return NULL; |
| if (in->last_field_num + 1 != field_num) { |
| // Put skip. |
| UPB_ASSERT(field_num > in->last_field_num); |
| uint32_t skip = field_num - in->last_field_num; |
| ptr = upb_MtDataEncoder_PutBase92Varint( |
| e, ptr, skip, kUpb_EncodedValue_MinSkip, kUpb_EncodedValue_MaxSkip); |
| if (!ptr) return NULL; |
| } |
| in->last_field_num = field_num; |
| |
| // Put field type. |
| int encoded_type = kUpb_TypeToEncoded[type]; |
| if (field_mod & kUpb_FieldModifier_IsRepeated) { |
| // Repeated fields shift the type number up (unlike other modifiers which |
| // are bit flags). |
| encoded_type += kUpb_EncodedType_RepeatedBase; |
| } |
| ptr = upb_MtDataEncoder_Put(e, ptr, encoded_type); |
| if (!ptr) return NULL; |
| |
| uint32_t encoded_modifiers = 0; |
| if (field_mod & kUpb_FieldModifier_IsProto3Singular) { |
| encoded_modifiers |= kUpb_EncodedFieldModifier_IsProto3Singular; |
| } |
| if (field_mod & kUpb_FieldModifier_IsRequired) { |
| encoded_modifiers |= kUpb_EncodedFieldModifier_IsRequired; |
| } |
| if ((field_mod & kUpb_FieldModifier_IsPacked) != |
| (in->msg_mod & kUpb_MessageModifier_DefaultIsPacked)) { |
| encoded_modifiers |= kUpb_EncodedFieldModifier_IsUnpacked; |
| } |
| if (encoded_modifiers) { |
| ptr = upb_MtDataEncoder_PutBase92Varint(e, ptr, encoded_modifiers, |
| kUpb_EncodedValue_MinModifier, |
| kUpb_EncodedValue_MaxModifier); |
| } |
| return ptr; |
| } |
| |
| char* upb_MtDataEncoder_StartOneof(upb_MtDataEncoder* e, char* ptr) { |
| upb_MtDataEncoderInternal* in = upb_MtDataEncoder_GetInternal(e, ptr); |
| if (in->oneof_state == kUpb_OneofState_NotStarted) { |
| ptr = upb_MtDataEncoder_Put(e, ptr, upb_FromBase92(kUpb_EncodedValue_End)); |
| } else { |
| ptr = upb_MtDataEncoder_Put( |
| e, ptr, upb_FromBase92(kUpb_EncodedValue_OneofSeparator)); |
| } |
| in->oneof_state = kUpb_OneofState_StartedOneof; |
| return ptr; |
| } |
| |
| char* upb_MtDataEncoder_PutOneofField(upb_MtDataEncoder* e, char* ptr, |
| uint32_t field_num) { |
| upb_MtDataEncoderInternal* in = upb_MtDataEncoder_GetInternal(e, ptr); |
| if (in->oneof_state == kUpb_OneofState_EmittedOneofField) { |
| ptr = upb_MtDataEncoder_Put( |
| e, ptr, upb_FromBase92(kUpb_EncodedValue_FieldSeparator)); |
| if (!ptr) return NULL; |
| } |
| ptr = upb_MtDataEncoder_PutBase92Varint(e, ptr, field_num, upb_ToBase92(0), |
| upb_ToBase92(63)); |
| in->oneof_state = kUpb_OneofState_EmittedOneofField; |
| return ptr; |
| } |
| |
| const upb_MiniTable_Field* upb_MiniTable_FindFieldByNumber( |
| const upb_MiniTable* table, uint32_t number) { |
| int n = table->field_count; |
| for (int i = 0; i < n; i++) { |
| if (table->fields[i].number == number) { |
| return &table->fields[i]; |
| } |
| } |
| return NULL; |
| } |
| |
| /** Data decoder **************************************************************/ |
| |
| // Note: we sort by this number when calculating layout order. |
| typedef enum { |
| kUpb_LayoutItemType_OneofCase, // Oneof case. |
| kUpb_LayoutItemType_OneofField, // Oneof field data. |
| kUpb_LayoutItemType_Field, // Non-oneof field data. |
| |
| kUpb_LayoutItemType_Max = kUpb_LayoutItemType_Field, |
| } upb_LayoutItemType; |
| |
| #define kUpb_LayoutItem_IndexSentinel ((uint16_t)-1) |
| |
| typedef struct { |
| // Index of the corresponding field. When this is a oneof field, the field's |
| // offset will be the index of the next field in a linked list. |
| uint16_t field_index; |
| uint16_t offset; |
| upb_FieldRep rep; |
| upb_LayoutItemType type; |
| } upb_LayoutItem; |
| |
| typedef struct { |
| upb_LayoutItem* data; |
| size_t size; |
| size_t capacity; |
| } upb_LayoutItemVector; |
| |
| typedef struct { |
| const char* end; |
| upb_MiniTable* table; |
| upb_MiniTable_Field* fields; |
| upb_MiniTablePlatform platform; |
| upb_LayoutItemVector vec; |
| upb_Arena* arena; |
| upb_Status* status; |
| jmp_buf err; |
| } upb_MtDecoder; |
| |
| UPB_PRINTF(2, 3) |
| UPB_NORETURN static void upb_MtDecoder_ErrorFormat(upb_MtDecoder* d, |
| const char* fmt, ...) { |
| va_list argp; |
| upb_Status_SetErrorMessage(d->status, "Error building mini table: "); |
| va_start(argp, fmt); |
| upb_Status_VAppendErrorFormat(d->status, fmt, argp); |
| va_end(argp); |
| UPB_LONGJMP(d->err, 1); |
| } |
| |
| static void upb_MtDecoder_CheckOutOfMemory(upb_MtDecoder* d, const void* ptr) { |
| if (!ptr) upb_MtDecoder_ErrorFormat(d, "Out of memory"); |
| } |
| |
| // In each field's offset, we temporarily store a presence classifier: |
| enum PresenceClass { |
| kNoPresence = 0, |
| kHasbitPresence = 1, |
| kRequiredPresence = 2, |
| kOneofBase = 3, |
| // Negative values refer to a specific oneof with that number. Positive |
| // values >= kOneofBase indicate that this field is in a oneof, and specify |
| // the next field in this oneof's linked list. |
| }; |
| |
| static const char* upb_MiniTable_DecodeBase92Varint(upb_MtDecoder* d, |
| const char* ptr, |
| char first_ch, uint8_t min, |
| uint8_t max, |
| uint32_t* out_val) { |
| uint32_t val = 0; |
| uint32_t shift = 0; |
| const int bits_per_char = |
| _upb_Log2Ceiling(upb_FromBase92(max) - upb_FromBase92(min)); |
| char ch = first_ch; |
| while (1) { |
| uint32_t bits = upb_FromBase92(ch) - upb_FromBase92(min); |
| UPB_ASSERT(shift < 32 - bits_per_char); |
| val |= bits << shift; |
| if (ptr == d->end || *ptr < min || max < *ptr) { |
| *out_val = val; |
| return ptr; |
| } |
| ch = *ptr++; |
| shift += bits_per_char; |
| } |
| } |
| |
| static bool upb_MiniTable_HasSub(char type, uint64_t msg_modifiers) { |
| return type == kUpb_EncodedType_Message || type == kUpb_EncodedType_Group || |
| (type == kUpb_EncodedType_Enum && |
| (msg_modifiers & kUpb_MessageModifier_HasClosedEnums)); |
| } |
| |
| static void upb_MiniTable_SetField(upb_MtDecoder* d, uint8_t ch, |
| upb_MiniTable_Field* field, |
| uint64_t msg_modifiers, |
| uint32_t* sub_count) { |
| static const char kUpb_EncodedToFieldRep[] = { |
| [kUpb_EncodedType_Double] = kUpb_FieldRep_8Byte, |
| [kUpb_EncodedType_Float] = kUpb_FieldRep_4Byte, |
| [kUpb_EncodedType_Int64] = kUpb_FieldRep_8Byte, |
| [kUpb_EncodedType_UInt64] = kUpb_FieldRep_8Byte, |
| [kUpb_EncodedType_Int32] = kUpb_FieldRep_4Byte, |
| [kUpb_EncodedType_Fixed64] = kUpb_FieldRep_8Byte, |
| [kUpb_EncodedType_Fixed32] = kUpb_FieldRep_4Byte, |
| [kUpb_EncodedType_Bool] = kUpb_FieldRep_1Byte, |
| [kUpb_EncodedType_String] = kUpb_FieldRep_StringView, |
| [kUpb_EncodedType_Group] = kUpb_FieldRep_Pointer, |
| [kUpb_EncodedType_Message] = kUpb_FieldRep_Pointer, |
| [kUpb_EncodedType_Bytes] = kUpb_FieldRep_StringView, |
| [kUpb_EncodedType_UInt32] = kUpb_FieldRep_4Byte, |
| [kUpb_EncodedType_Enum] = kUpb_FieldRep_4Byte, |
| [kUpb_EncodedType_SFixed32] = kUpb_FieldRep_4Byte, |
| [kUpb_EncodedType_SFixed64] = kUpb_FieldRep_8Byte, |
| [kUpb_EncodedType_SInt32] = kUpb_FieldRep_4Byte, |
| [kUpb_EncodedType_SInt64] = kUpb_FieldRep_8Byte, |
| }; |
| |
| static const char kUpb_EncodedToType[] = { |
| [kUpb_EncodedType_Double] = kUpb_FieldType_Double, |
| [kUpb_EncodedType_Float] = kUpb_FieldType_Float, |
| [kUpb_EncodedType_Int64] = kUpb_FieldType_Int64, |
| [kUpb_EncodedType_UInt64] = kUpb_FieldType_UInt64, |
| [kUpb_EncodedType_Int32] = kUpb_FieldType_Int32, |
| [kUpb_EncodedType_Fixed64] = kUpb_FieldType_Fixed64, |
| [kUpb_EncodedType_Fixed32] = kUpb_FieldType_Fixed32, |
| [kUpb_EncodedType_Bool] = kUpb_FieldType_Bool, |
| [kUpb_EncodedType_String] = kUpb_FieldType_String, |
| [kUpb_EncodedType_Group] = kUpb_FieldType_Group, |
| [kUpb_EncodedType_Message] = kUpb_FieldType_Message, |
| [kUpb_EncodedType_Bytes] = kUpb_FieldType_Bytes, |
| [kUpb_EncodedType_UInt32] = kUpb_FieldType_UInt32, |
| [kUpb_EncodedType_Enum] = kUpb_FieldType_Enum, |
| [kUpb_EncodedType_SFixed32] = kUpb_FieldType_SFixed32, |
| [kUpb_EncodedType_SFixed64] = kUpb_FieldType_SFixed64, |
| [kUpb_EncodedType_SInt32] = kUpb_FieldType_SInt32, |
| [kUpb_EncodedType_SInt64] = kUpb_FieldType_SInt64, |
| }; |
| |
| int8_t type = upb_FromBase92(ch); |
| if (ch >= upb_ToBase92(kUpb_EncodedType_RepeatedBase)) { |
| type -= kUpb_EncodedType_RepeatedBase; |
| field->mode = kUpb_FieldMode_Array; |
| field->mode |= kUpb_FieldRep_Pointer << kUpb_FieldRep_Shift; |
| field->offset = kNoPresence; |
| } else { |
| field->mode = kUpb_FieldMode_Scalar; |
| field->mode |= kUpb_EncodedToFieldRep[type] << kUpb_FieldRep_Shift; |
| field->offset = kHasbitPresence; |
| } |
| if (type >= 18) { |
| upb_MtDecoder_ErrorFormat(d, "Invalid field type: %d", (int)type); |
| UPB_UNREACHABLE(); |
| } |
| field->descriptortype = kUpb_EncodedToType[type]; |
| if (upb_MiniTable_HasSub(type, msg_modifiers)) { |
| field->submsg_index = sub_count ? (*sub_count)++ : 0; |
| } |
| } |
| |
| static void upb_MtDecoder_ModifyField(upb_MtDecoder* d, uint32_t mod, |
| upb_MiniTable_Field* field) { |
| if (mod & kUpb_EncodedFieldModifier_IsUnpacked) { |
| field->mode &= ~kUpb_LabelFlags_IsPacked; |
| } else { |
| field->mode |= kUpb_LabelFlags_IsPacked; |
| } |
| |
| bool singular = mod & kUpb_EncodedFieldModifier_IsProto3Singular; |
| bool required = mod & kUpb_EncodedFieldModifier_IsRequired; |
| |
| // Validate. |
| if ((singular || required) && field->offset != kHasbitPresence) { |
| upb_MtDecoder_ErrorFormat( |
| d, "Invalid modifier(s) for repeated field %" PRIu32, field->number); |
| UPB_UNREACHABLE(); |
| } |
| if (singular && required) { |
| upb_MtDecoder_ErrorFormat( |
| d, "Field %" PRIu32 " cannot be both singular and required", |
| field->number); |
| UPB_UNREACHABLE(); |
| } |
| |
| if (singular) field->offset = kNoPresence; |
| if (required) field->offset = kRequiredPresence; |
| } |
| |
| static void upb_MtDecoder_PushItem(upb_MtDecoder* d, upb_LayoutItem item) { |
| if (d->vec.size == d->vec.capacity) { |
| size_t new_cap = UPB_MAX(8, d->vec.size * 2); |
| d->vec.data = realloc(d->vec.data, new_cap * sizeof(*d->vec.data)); |
| upb_MtDecoder_CheckOutOfMemory(d, d->vec.data); |
| d->vec.capacity = new_cap; |
| } |
| d->vec.data[d->vec.size++] = item; |
| } |
| |
| static void upb_MtDecoder_PushOneof(upb_MtDecoder* d, upb_LayoutItem item) { |
| if (item.field_index == kUpb_LayoutItem_IndexSentinel) { |
| upb_MtDecoder_ErrorFormat(d, "Empty oneof"); |
| UPB_UNREACHABLE(); |
| } |
| item.field_index -= kOneofBase; |
| |
| // Push oneof data. |
| item.type = kUpb_LayoutItemType_OneofField; |
| upb_MtDecoder_PushItem(d, item); |
| |
| // Push oneof case. |
| item.rep = kUpb_FieldRep_4Byte; // Field Number. |
| item.type = kUpb_LayoutItemType_OneofCase; |
| upb_MtDecoder_PushItem(d, item); |
| } |
| |
| static const char* upb_MtDecoder_DecodeOneofField(upb_MtDecoder* d, |
| const char* ptr, |
| char first_ch, |
| upb_LayoutItem* item) { |
| uint32_t field_num; |
| ptr = upb_MiniTable_DecodeBase92Varint( |
| d, ptr, first_ch, kUpb_EncodedValue_MinOneofField, |
| kUpb_EncodedValue_MaxOneofField, &field_num); |
| upb_MiniTable_Field* f = |
| (void*)upb_MiniTable_FindFieldByNumber(d->table, field_num); |
| |
| if (!f) { |
| upb_MtDecoder_ErrorFormat(d, |
| "Couldn't add field number %" PRIu32 |
| " to oneof, no such field number.", |
| field_num); |
| UPB_UNREACHABLE(); |
| } |
| if (f->offset != kHasbitPresence) { |
| upb_MtDecoder_ErrorFormat( |
| d, |
| "Cannot add repeated, required, or singular field %" PRIu32 |
| " to oneof.", |
| field_num); |
| UPB_UNREACHABLE(); |
| } |
| |
| // Oneof storage must be large enough to accommodate the largest member. |
| item->rep = UPB_MAX(item->rep, f->mode >> kUpb_FieldRep_Shift); |
| // Prepend this field to the linked list. |
| f->offset = item->field_index; |
| item->field_index = (f - d->fields) + kOneofBase; |
| return ptr; |
| } |
| |
| static const char* upb_MtDecoder_DecodeOneofs(upb_MtDecoder* d, |
| const char* ptr) { |
| upb_LayoutItem item = {.rep = 0, |
| .field_index = kUpb_LayoutItem_IndexSentinel}; |
| while (ptr < d->end) { |
| char ch = *ptr++; |
| if (ch == kUpb_EncodedValue_FieldSeparator) { |
| // Field separator, no action needed. |
| } else if (ch == kUpb_EncodedValue_OneofSeparator) { |
| // End of oneof. |
| upb_MtDecoder_PushOneof(d, item); |
| item.field_index = kUpb_LayoutItem_IndexSentinel; // Move to next oneof. |
| } else { |
| ptr = upb_MtDecoder_DecodeOneofField(d, ptr, ch, &item); |
| } |
| } |
| |
| // Push final oneof. |
| upb_MtDecoder_PushOneof(d, item); |
| return ptr; |
| } |
| |
| static const char* upb_MtDecoder_ParseModifier(upb_MtDecoder* d, |
| const char* ptr, char first_ch, |
| uint16_t field_count, |
| uint64_t* msg_modifiers) { |
| uint32_t mod; |
| ptr = upb_MiniTable_DecodeBase92Varint(d, ptr, first_ch, |
| kUpb_EncodedValue_MinModifier, |
| kUpb_EncodedValue_MaxModifier, &mod); |
| if (field_count == 0) { |
| if (!d->table) { |
| upb_MtDecoder_ErrorFormat(d, "Extensions cannot have message modifiers"); |
| UPB_UNREACHABLE(); |
| } |
| *msg_modifiers = mod; |
| } else { |
| upb_MiniTable_Field* field = &d->fields[field_count - 1]; |
| upb_MtDecoder_ModifyField(d, mod, field); |
| } |
| |
| return ptr; |
| } |
| |
| static void upb_MtDecoder_AllocateSubs(upb_MtDecoder* d, uint32_t sub_count) { |
| size_t subs_bytes = sizeof(*d->table->subs) * sub_count; |
| d->table->subs = upb_Arena_Malloc(d->arena, subs_bytes); |
| upb_MtDecoder_CheckOutOfMemory(d, d->table->subs); |
| } |
| |
| static void upb_MtDecoder_Parse(upb_MtDecoder* d, const char* ptr, size_t len, |
| void* fields, size_t field_size, |
| uint16_t* field_count, uint32_t* sub_count) { |
| uint64_t msg_modifiers = 0; |
| uint32_t last_field_number = 0; |
| bool need_dense_below = d->table != NULL; |
| |
| d->end = UPB_PTRADD(ptr, len); |
| |
| while (ptr < d->end) { |
| char ch = *ptr++; |
| if (ch <= kUpb_EncodedValue_MaxField) { |
| upb_MiniTable_Field* field = fields; |
| *field_count += 1; |
| fields = (char*)fields + field_size; |
| field->number = ++last_field_number; |
| upb_MiniTable_SetField(d, ch, field, msg_modifiers, sub_count); |
| } else if (kUpb_EncodedValue_MinModifier <= ch && |
| ch <= kUpb_EncodedValue_MaxModifier) { |
| ptr = |
| upb_MtDecoder_ParseModifier(d, ptr, ch, *field_count, &msg_modifiers); |
| } else if (ch == kUpb_EncodedValue_End) { |
| if (!d->table) { |
| upb_MtDecoder_ErrorFormat(d, "Extensions cannot have oneofs."); |
| UPB_UNREACHABLE(); |
| } |
| ptr = upb_MtDecoder_DecodeOneofs(d, ptr); |
| } else if (kUpb_EncodedValue_MinSkip <= ch && |
| ch <= kUpb_EncodedValue_MaxSkip) { |
| if (need_dense_below) { |
| d->table->dense_below = d->table->field_count; |
| need_dense_below = false; |
| } |
| uint32_t skip; |
| ptr = upb_MiniTable_DecodeBase92Varint(d, ptr, ch, |
| kUpb_EncodedValue_MinSkip, |
| kUpb_EncodedValue_MaxSkip, &skip); |
| last_field_number += skip; |
| last_field_number--; // Next field seen will increment. |
| } |
| } |
| |
| if (need_dense_below) { |
| d->table->dense_below = d->table->field_count; |
| } |
| } |
| |
| static void upb_MtDecoder_ParseMessage(upb_MtDecoder* d, const char* data, |
| size_t len) { |
| // Buffer length is an upper bound on the number of fields. We will return |
| // what we don't use. |
| d->fields = upb_Arena_Malloc(d->arena, sizeof(*d->fields) * len); |
| upb_MtDecoder_CheckOutOfMemory(d, d->fields); |
| |
| uint32_t sub_count = 0; |
| d->table->field_count = 0; |
| d->table->fields = d->fields; |
| upb_MtDecoder_Parse(d, data, len, d->fields, sizeof(*d->fields), |
| &d->table->field_count, &sub_count); |
| |
| upb_Arena_ShrinkLast(d->arena, d->fields, sizeof(*d->fields) * len, |
| sizeof(*d->fields) * d->table->field_count); |
| d->table->fields = d->fields; |
| upb_MtDecoder_AllocateSubs(d, sub_count); |
| } |
| |
| int upb_MtDecoder_CompareFields(const void* _a, const void* _b) { |
| const upb_LayoutItem* a = _a; |
| const upb_LayoutItem* b = _b; |
| // Currently we just sort by: |
| // 1. rep (smallest fields first) |
| // 2. type (oneof cases first) |
| // 2. field_index (smallest numbers first) |
| // The main goal of this is to reduce space lost to padding. |
| // Later we may have more subtle reasons to prefer a different ordering. |
| const int rep_bits = _upb_Log2Ceiling(kUpb_FieldRep_Max); |
| const int type_bits = _upb_Log2Ceiling(kUpb_LayoutItemType_Max); |
| const int idx_bits = (sizeof(a->field_index) * 8); |
| UPB_ASSERT(idx_bits + rep_bits + type_bits < 32); |
| #define UPB_COMBINE(rep, ty, idx) (((rep << type_bits) | ty) << idx_bits) | idx |
| uint32_t a_packed = UPB_COMBINE(a->rep, a->type, a->field_index); |
| uint32_t b_packed = UPB_COMBINE(b->rep, b->type, b->field_index); |
| assert(a_packed != b_packed); |
| #undef UPB_COMBINE |
| return a_packed < b_packed ? -1 : 1; |
| } |
| |
| static bool upb_MtDecoder_SortLayoutItems(upb_MtDecoder* d) { |
| // Add items for all non-oneof fields (oneofs were already added). |
| int n = d->table->field_count; |
| for (int i = 0; i < n; i++) { |
| upb_MiniTable_Field* f = &d->fields[i]; |
| if (f->offset >= kOneofBase) continue; |
| upb_LayoutItem item = {.field_index = i, |
| .rep = f->mode >> kUpb_FieldRep_Shift, |
| .type = kUpb_LayoutItemType_Field}; |
| upb_MtDecoder_PushItem(d, item); |
| } |
| |
| if (d->vec.size) { |
| qsort(d->vec.data, d->vec.size, sizeof(*d->vec.data), |
| upb_MtDecoder_CompareFields); |
| } |
| |
| return true; |
| } |
| |
| static size_t upb_MiniTable_DivideRoundUp(size_t n, size_t d) { |
| return (n + d - 1) / d; |
| } |
| |
| static void upb_MtDecoder_AssignHasbits(upb_MiniTable* ret) { |
| int n = ret->field_count; |
| int last_hasbit = 0; // 0 cannot be used. |
| |
| // First assign required fields, which must have the lowest hasbits. |
| for (int i = 0; i < n; i++) { |
| upb_MiniTable_Field* field = (upb_MiniTable_Field*)&ret->fields[i]; |
| if (field->offset == kRequiredPresence) { |
| field->presence = ++last_hasbit; |
| } |
| } |
| ret->required_count = last_hasbit; |
| |
| // Next assign non-required hasbit fields. |
| for (int i = 0; i < n; i++) { |
| upb_MiniTable_Field* field = (upb_MiniTable_Field*)&ret->fields[i]; |
| if (field->offset == kHasbitPresence) { |
| field->presence = ++last_hasbit; |
| } |
| } |
| |
| ret->size = upb_MiniTable_DivideRoundUp(last_hasbit, 8); |
| } |
| |
| size_t upb_MtDecoder_SizeOfRep(upb_FieldRep rep, |
| upb_MiniTablePlatform platform) { |
| static const uint8_t kRepToSize32[] = { |
| [kUpb_FieldRep_1Byte] = 1, [kUpb_FieldRep_4Byte] = 4, |
| [kUpb_FieldRep_Pointer] = 4, [kUpb_FieldRep_StringView] = 8, |
| [kUpb_FieldRep_8Byte] = 8, |
| }; |
| static const uint8_t kRepToSize64[] = { |
| [kUpb_FieldRep_1Byte] = 1, [kUpb_FieldRep_4Byte] = 4, |
| [kUpb_FieldRep_Pointer] = 8, [kUpb_FieldRep_StringView] = 16, |
| [kUpb_FieldRep_8Byte] = 8, |
| }; |
| UPB_ASSERT(sizeof(upb_StringView) == |
| UPB_SIZE(kRepToSize32, kRepToSize64)[kUpb_FieldRep_StringView]); |
| return platform == kUpb_MiniTablePlatform_32Bit ? kRepToSize32[rep] |
| : kRepToSize64[rep]; |
| } |
| |
| size_t upb_MtDecoder_AlignOfRep(upb_FieldRep rep, |
| upb_MiniTablePlatform platform) { |
| static const uint8_t kRepToAlign32[] = { |
| [kUpb_FieldRep_1Byte] = 1, [kUpb_FieldRep_4Byte] = 4, |
| [kUpb_FieldRep_Pointer] = 4, [kUpb_FieldRep_StringView] = 4, |
| [kUpb_FieldRep_8Byte] = 8, |
| }; |
| static const uint8_t kRepToAlign64[] = { |
| [kUpb_FieldRep_1Byte] = 1, [kUpb_FieldRep_4Byte] = 4, |
| [kUpb_FieldRep_Pointer] = 8, [kUpb_FieldRep_StringView] = 8, |
| [kUpb_FieldRep_8Byte] = 8, |
| }; |
| UPB_ASSERT(UPB_ALIGN_OF(upb_StringView) == |
| UPB_SIZE(kRepToAlign32, kRepToAlign64)[kUpb_FieldRep_StringView]); |
| return platform == kUpb_MiniTablePlatform_32Bit ? kRepToAlign32[rep] |
| : kRepToAlign64[rep]; |
| } |
| |
| size_t upb_MtDecoder_Place(upb_MtDecoder* d, upb_FieldRep rep) { |
| size_t size = upb_MtDecoder_SizeOfRep(rep, d->platform); |
| size_t align = upb_MtDecoder_AlignOfRep(rep, d->platform); |
| size_t ret = UPB_ALIGN_UP(d->table->size, align); |
| d->table->size = ret + size; |
| return ret; |
| } |
| |
| static bool upb_MtDecoder_AssignOffsets(upb_MtDecoder* d) { |
| upb_LayoutItem* end = UPB_PTRADD(d->vec.data, d->vec.size); |
| |
| // Compute offsets. |
| for (upb_LayoutItem* item = d->vec.data; item < end; item++) { |
| item->offset = upb_MtDecoder_Place(d, item->rep); |
| } |
| |
| // Assign oneof case offsets. We must do these first, since assigning |
| // actual offsets will overwrite the links of the linked list. |
| for (upb_LayoutItem* item = d->vec.data; item < end; item++) { |
| if (item->type != kUpb_LayoutItemType_OneofCase) continue; |
| upb_MiniTable_Field* f = &d->fields[item->field_index]; |
| while (true) { |
| f->presence = ~item->offset; |
| if (f->offset == kUpb_LayoutItem_IndexSentinel) break; |
| UPB_ASSERT(f->offset - kOneofBase < d->table->field_count); |
| f = &d->fields[f->offset - kOneofBase]; |
| } |
| } |
| |
| // Assign offsets. |
| for (upb_LayoutItem* item = d->vec.data; item < end; item++) { |
| upb_MiniTable_Field* f = &d->fields[item->field_index]; |
| switch (item->type) { |
| case kUpb_LayoutItemType_OneofField: |
| while (true) { |
| uint16_t next_offset = f->offset; |
| f->offset = item->offset; |
| if (next_offset == kUpb_LayoutItem_IndexSentinel) break; |
| f = &d->fields[next_offset - kOneofBase]; |
| } |
| break; |
| case kUpb_LayoutItemType_Field: |
| f->offset = item->offset; |
| break; |
| default: |
| break; |
| } |
| } |
| return true; |
| } |
| |
| upb_MiniTable* upb_MiniTable_BuildWithBuf(const char* data, size_t len, |
| upb_MiniTablePlatform platform, |
| upb_Arena* arena, void** buf, |
| size_t* buf_size, |
| upb_Status* status) { |
| upb_MtDecoder decoder = { |
| .platform = platform, |
| .vec = |
| { |
| .data = *buf, |
| .capacity = *buf_size / sizeof(*decoder.vec.data), |
| .size = 0, |
| }, |
| .arena = arena, |
| .status = status, |
| .table = upb_Arena_Malloc(arena, sizeof(*decoder.table)), |
| }; |
| |
| if (UPB_SETJMP(decoder.err)) { |
| decoder.table = NULL; |
| goto done; |
| } |
| |
| upb_MtDecoder_CheckOutOfMemory(&decoder, decoder.table); |
| |
| decoder.table->size = 0; |
| decoder.table->field_count = 0; |
| decoder.table->ext = kUpb_ExtMode_NonExtendable; |
| decoder.table->dense_below = 0; |
| decoder.table->table_mask = 0; |
| decoder.table->required_count = 0; |
| |
| upb_MtDecoder_ParseMessage(&decoder, data, len); |
| upb_MtDecoder_AssignHasbits(decoder.table); |
| upb_MtDecoder_SortLayoutItems(&decoder); |
| upb_MtDecoder_AssignOffsets(&decoder); |
| |
| done: |
| *buf = decoder.vec.data; |
| *buf_size = decoder.vec.capacity / sizeof(*decoder.vec.data); |
| return decoder.table; |
| } |
| |
| upb_MiniTable* upb_MiniTable_BuildMessageSet(upb_MiniTablePlatform platform, |
| upb_Arena* arena) { |
| upb_MiniTable* ret = upb_Arena_Malloc(arena, sizeof(*ret)); |
| if (!ret) return NULL; |
| |
| ret->size = 0; |
| ret->field_count = 0; |
| ret->ext = kUpb_ExtMode_IsMessageSet; |
| ret->dense_below = 0; |
| ret->table_mask = 0; |
| ret->required_count = 0; |
| return ret; |
| } |
| |
| upb_MiniTable* upb_MiniTable_BuildMapEntry(upb_FieldType key_type, |
| upb_FieldType value_type, |
| upb_MiniTablePlatform platform, |
| upb_Arena* arena) { |
| upb_MiniTable* ret = upb_Arena_Malloc(arena, sizeof(*ret)); |
| upb_MiniTable_Field* fields = upb_Arena_Malloc(arena, sizeof(*fields) * 2); |
| if (!ret || !fields) return NULL; |
| |
| upb_MiniTable_Sub* subs = NULL; |
| if (value_type == kUpb_FieldType_Message || |
| value_type == kUpb_FieldType_Group) { |
| subs = upb_Arena_Malloc(arena, sizeof(*subs)); |
| if (!subs) return NULL; |
| } |
| |
| size_t field_size = |
| upb_MtDecoder_SizeOfRep(kUpb_FieldRep_StringView, platform); |
| |
| fields[0].number = 1; |
| fields[1].number = 2; |
| fields[0].mode = kUpb_FieldMode_Scalar; |
| fields[1].mode = kUpb_FieldMode_Scalar; |
| fields[0].presence = 0; |
| fields[1].presence = 0; |
| fields[0].descriptortype = key_type; |
| fields[1].descriptortype = value_type; |
| fields[0].offset = 0; |
| fields[1].offset = field_size; |
| fields[1].submsg_index = 0; |
| |
| ret->size = UPB_ALIGN_UP(2 * field_size, 8); |
| ret->field_count = 2; |
| ret->ext = kUpb_ExtMode_NonExtendable | kUpb_ExtMode_IsMapEntry; |
| ret->dense_below = 2; |
| ret->table_mask = 0; |
| ret->required_count = 0; |
| ret->subs = subs; |
| ret->fields = fields; |
| return ret; |
| } |
| |
| upb_MiniTable_Extension* upb_MiniTable_BuildExtensions(const char* data, |
| size_t len, |
| size_t* ext_count, |
| upb_Arena* arena, |
| upb_Status* status) { |
| upb_MtDecoder decoder = { |
| .arena = arena, |
| .status = status, |
| .table = NULL, |
| }; |
| |
| upb_MiniTable_Extension* exts; |
| |
| if (UPB_SETJMP(decoder.err)) { |
| exts = NULL; |
| *ext_count = 0; |
| goto done; |
| } |
| |
| uint16_t count = 0; |
| exts = upb_Arena_Malloc(arena, len); |
| upb_MtDecoder_CheckOutOfMemory(&decoder, exts); |
| upb_MtDecoder_Parse(&decoder, data, len, exts, sizeof(*exts), &count, NULL); |
| upb_Arena_ShrinkLast(arena, exts, sizeof(*exts) * len, sizeof(*exts) * count); |
| |
| done: |
| *ext_count = count; |
| return exts; |
| } |
| |
| upb_MiniTable* upb_MiniTable_Build(const char* data, size_t len, |
| upb_MiniTablePlatform platform, |
| upb_Arena* arena, upb_Status* status) { |
| void* buf = NULL; |
| size_t size = 0; |
| upb_MiniTable* ret = upb_MiniTable_BuildWithBuf(data, len, platform, arena, |
| &buf, &size, status); |
| free(buf); |
| return ret; |
| } |
| |
| void upb_MiniTable_SetSubMessage(upb_MiniTable* table, |
| upb_MiniTable_Field* field, |
| const upb_MiniTable* sub) { |
| UPB_ASSERT((uintptr_t)table->fields <= (uintptr_t)field && |
| (uintptr_t)field < |
| (uintptr_t)(table->fields + table->field_count)); |
| if (sub->ext & kUpb_ExtMode_IsMapEntry) { |
| field->mode = |
| (kUpb_FieldRep_Pointer << kUpb_FieldRep_Shift) | kUpb_FieldMode_Map; |
| } |
| upb_MiniTable_Sub* table_sub = (void*)&table->subs[field->submsg_index]; |
| table_sub->submsg = sub; |
| } |
| |
| void upb_MiniTable_SetSubEnum(upb_MiniTable* table, upb_MiniTable_Field* field, |
| const upb_MiniTable_Enum* sub) { |
| UPB_ASSERT((uintptr_t)table->fields <= (uintptr_t)field && |
| (uintptr_t)field < |
| (uintptr_t)(table->fields + table->field_count)); |
| upb_MiniTable_Sub* table_sub = (void*)&table->subs[field->submsg_index]; |
| table_sub->subenum = sub; |
| } |