| // Protocol Buffers - Google's data interchange format |
| // Copyright 2023 Google LLC. All rights reserved. |
| // |
| // Use of this source code is governed by a BSD-style |
| // license that can be found in the LICENSE file or at |
| // https://developers.google.com/open-source/licenses/bsd |
| |
| #include "upb/reflection/internal/def_builder.h" |
| |
| #include <string.h> |
| |
| #include "upb/base/internal/log2.h" |
| #include "upb/base/upcast.h" |
| #include "upb/mem/alloc.h" |
| #include "upb/message/copy.h" |
| #include "upb/reflection/def_pool.h" |
| #include "upb/reflection/def_type.h" |
| #include "upb/reflection/field_def.h" |
| #include "upb/reflection/file_def.h" |
| #include "upb/reflection/internal/strdup2.h" |
| #include "upb/wire/decode.h" |
| |
| // Must be last. |
| #include "upb/port/def.inc" |
| |
| /* The upb core does not generally have a concept of default instances. However |
| * for descriptor options we make an exception since the max size is known and |
| * modest (<200 bytes). All types can share a default instance since it is |
| * initialized to zeroes. |
| * |
| * We have to allocate an extra pointer for upb's internal metadata. */ |
| static UPB_ALIGN_AS(8) const |
| char opt_default_buf[_UPB_MAXOPT_SIZE + sizeof(void*)] = {0}; |
| const char* kUpbDefOptDefault = &opt_default_buf[sizeof(void*)]; |
| |
| const char* _upb_DefBuilder_FullToShort(const char* fullname) { |
| const char* p; |
| |
| if (fullname == NULL) { |
| return NULL; |
| } else if ((p = strrchr(fullname, '.')) == NULL) { |
| /* No '.' in the name, return the full string. */ |
| return fullname; |
| } else { |
| /* Return one past the last '.'. */ |
| return p + 1; |
| } |
| } |
| |
| void _upb_DefBuilder_FailJmp(upb_DefBuilder* ctx) { UPB_LONGJMP(ctx->err, 1); } |
| |
| void _upb_DefBuilder_Errf(upb_DefBuilder* ctx, const char* fmt, ...) { |
| va_list argp; |
| va_start(argp, fmt); |
| upb_Status_VSetErrorFormat(ctx->status, fmt, argp); |
| va_end(argp); |
| _upb_DefBuilder_FailJmp(ctx); |
| } |
| |
| void _upb_DefBuilder_OomErr(upb_DefBuilder* ctx) { |
| upb_Status_SetErrorMessage(ctx->status, "out of memory"); |
| _upb_DefBuilder_FailJmp(ctx); |
| } |
| |
| // Verify a relative identifier string. The loop is branchless for speed. |
| static void _upb_DefBuilder_CheckIdentNotFull(upb_DefBuilder* ctx, |
| upb_StringView name) { |
| bool good = name.size > 0; |
| |
| for (size_t i = 0; i < name.size; i++) { |
| const char c = name.data[i]; |
| const char d = c | 0x20; // force lowercase |
| const bool is_alpha = (('a' <= d) & (d <= 'z')) | (c == '_'); |
| const bool is_numer = ('0' <= c) & (c <= '9') & (i != 0); |
| |
| good &= is_alpha | is_numer; |
| } |
| |
| if (!good) _upb_DefBuilder_CheckIdentSlow(ctx, name, false); |
| } |
| |
| const char* _upb_DefBuilder_MakeFullName(upb_DefBuilder* ctx, |
| const char* prefix, |
| upb_StringView name) { |
| _upb_DefBuilder_CheckIdentNotFull(ctx, name); |
| if (prefix) { |
| // ret = prefix + '.' + name; |
| size_t n = strlen(prefix); |
| char* ret = _upb_DefBuilder_Alloc(ctx, n + name.size + 2); |
| strcpy(ret, prefix); |
| ret[n] = '.'; |
| memcpy(&ret[n + 1], name.data, name.size); |
| ret[n + 1 + name.size] = '\0'; |
| return ret; |
| } else { |
| char* ret = upb_strdup2(name.data, name.size, ctx->arena); |
| if (!ret) _upb_DefBuilder_OomErr(ctx); |
| return ret; |
| } |
| } |
| |
| static bool remove_component(char* base, size_t* len) { |
| if (*len == 0) return false; |
| |
| for (size_t i = *len - 1; i > 0; i--) { |
| if (base[i] == '.') { |
| *len = i; |
| return true; |
| } |
| } |
| |
| *len = 0; |
| return true; |
| } |
| |
| const void* _upb_DefBuilder_ResolveAny(upb_DefBuilder* ctx, |
| const char* from_name_dbg, |
| const char* base, upb_StringView sym, |
| upb_deftype_t* type) { |
| if (sym.size == 0) goto notfound; |
| upb_value v; |
| if (sym.data[0] == '.') { |
| // Symbols starting with '.' are absolute, so we do a single lookup. |
| // Slice to omit the leading '.' |
| if (!_upb_DefPool_LookupSym(ctx->symtab, sym.data + 1, sym.size - 1, &v)) { |
| goto notfound; |
| } |
| } else { |
| // Remove components from base until we find an entry or run out. |
| size_t baselen = base ? strlen(base) : 0; |
| char* tmp = upb_gmalloc(sym.size + baselen + 1); |
| while (1) { |
| char* p = tmp; |
| if (baselen) { |
| memcpy(p, base, baselen); |
| p[baselen] = '.'; |
| p += baselen + 1; |
| } |
| memcpy(p, sym.data, sym.size); |
| p += sym.size; |
| if (_upb_DefPool_LookupSym(ctx->symtab, tmp, p - tmp, &v)) { |
| break; |
| } |
| if (!remove_component(tmp, &baselen)) { |
| upb_gfree(tmp); |
| goto notfound; |
| } |
| } |
| upb_gfree(tmp); |
| } |
| |
| *type = _upb_DefType_Type(v); |
| return _upb_DefType_Unpack(v, *type); |
| |
| notfound: |
| _upb_DefBuilder_Errf(ctx, "couldn't resolve name '" UPB_STRINGVIEW_FORMAT "'", |
| UPB_STRINGVIEW_ARGS(sym)); |
| } |
| |
| const void* _upb_DefBuilder_Resolve(upb_DefBuilder* ctx, |
| const char* from_name_dbg, const char* base, |
| upb_StringView sym, upb_deftype_t type) { |
| upb_deftype_t found_type; |
| const void* ret = |
| _upb_DefBuilder_ResolveAny(ctx, from_name_dbg, base, sym, &found_type); |
| if (ret && found_type != type) { |
| _upb_DefBuilder_Errf(ctx, |
| "type mismatch when resolving %s: couldn't find " |
| "name " UPB_STRINGVIEW_FORMAT " with type=%d", |
| from_name_dbg, UPB_STRINGVIEW_ARGS(sym), (int)type); |
| } |
| return ret; |
| } |
| |
| // Per ASCII this will lower-case a letter. If the result is a letter, the |
| // input was definitely a letter. If the output is not a letter, this may |
| // have transformed the character unpredictably. |
| static char upb_ascii_lower(char ch) { return ch | 0x20; } |
| |
| // isalpha() etc. from <ctype.h> are locale-dependent, which we don't want. |
| static bool upb_isbetween(uint8_t c, uint8_t low, uint8_t high) { |
| return low <= c && c <= high; |
| } |
| |
| static bool upb_isletter(char c) { |
| char lower = upb_ascii_lower(c); |
| return upb_isbetween(lower, 'a', 'z') || c == '_'; |
| } |
| |
| static bool upb_isalphanum(char c) { |
| return upb_isletter(c) || upb_isbetween(c, '0', '9'); |
| } |
| |
| static bool TryGetChar(const char** src, const char* end, char* ch) { |
| if (*src == end) return false; |
| *ch = **src; |
| *src += 1; |
| return true; |
| } |
| |
| static int TryGetHexDigit(const char** src, const char* end) { |
| char ch; |
| if (!TryGetChar(src, end, &ch)) return -1; |
| if ('0' <= ch && ch <= '9') { |
| return ch - '0'; |
| } |
| ch = upb_ascii_lower(ch); |
| if ('a' <= ch && ch <= 'f') { |
| return ch - 'a' + 0xa; |
| } |
| *src -= 1; // Char wasn't actually a hex digit. |
| return -1; |
| } |
| |
| static char upb_DefBuilder_ParseHexEscape(upb_DefBuilder* ctx, |
| const upb_FieldDef* f, |
| const char** src, const char* end) { |
| int hex_digit = TryGetHexDigit(src, end); |
| if (hex_digit < 0) { |
| _upb_DefBuilder_Errf( |
| ctx, "\\x must be followed by at least one hex digit (field='%s')", |
| upb_FieldDef_FullName(f)); |
| return 0; |
| } |
| unsigned int ret = hex_digit; |
| while ((hex_digit = TryGetHexDigit(src, end)) >= 0) { |
| ret = (ret << 4) | hex_digit; |
| } |
| if (ret > 0xff) { |
| _upb_DefBuilder_Errf(ctx, "Value of hex escape in field %s exceeds 8 bits", |
| upb_FieldDef_FullName(f)); |
| return 0; |
| } |
| return ret; |
| } |
| |
| static char TryGetOctalDigit(const char** src, const char* end) { |
| char ch; |
| if (!TryGetChar(src, end, &ch)) return -1; |
| if ('0' <= ch && ch <= '7') { |
| return ch - '0'; |
| } |
| *src -= 1; // Char wasn't actually an octal digit. |
| return -1; |
| } |
| |
| static char upb_DefBuilder_ParseOctalEscape(upb_DefBuilder* ctx, |
| const upb_FieldDef* f, |
| const char** src, const char* end) { |
| char ch = 0; |
| for (int i = 0; i < 3; i++) { |
| char digit; |
| if ((digit = TryGetOctalDigit(src, end)) >= 0) { |
| ch = (ch << 3) | digit; |
| } |
| } |
| return ch; |
| } |
| |
| char _upb_DefBuilder_ParseEscape(upb_DefBuilder* ctx, const upb_FieldDef* f, |
| const char** src, const char* end) { |
| char ch; |
| if (!TryGetChar(src, end, &ch)) { |
| _upb_DefBuilder_Errf(ctx, "unterminated escape sequence in field %s", |
| upb_FieldDef_FullName(f)); |
| return 0; |
| } |
| switch (ch) { |
| case 'a': |
| return '\a'; |
| case 'b': |
| return '\b'; |
| case 'f': |
| return '\f'; |
| case 'n': |
| return '\n'; |
| case 'r': |
| return '\r'; |
| case 't': |
| return '\t'; |
| case 'v': |
| return '\v'; |
| case '\\': |
| return '\\'; |
| case '\'': |
| return '\''; |
| case '\"': |
| return '\"'; |
| case '?': |
| return '\?'; |
| case 'x': |
| case 'X': |
| return upb_DefBuilder_ParseHexEscape(ctx, f, src, end); |
| case '0': |
| case '1': |
| case '2': |
| case '3': |
| case '4': |
| case '5': |
| case '6': |
| case '7': |
| *src -= 1; |
| return upb_DefBuilder_ParseOctalEscape(ctx, f, src, end); |
| } |
| _upb_DefBuilder_Errf(ctx, "Unknown escape sequence: \\%c", ch); |
| } |
| |
| void _upb_DefBuilder_CheckIdentSlow(upb_DefBuilder* ctx, upb_StringView name, |
| bool full) { |
| const char* str = name.data; |
| const size_t len = name.size; |
| bool start = true; |
| for (size_t i = 0; i < len; i++) { |
| const char c = str[i]; |
| if (c == '.') { |
| if (start || !full) { |
| _upb_DefBuilder_Errf( |
| ctx, "invalid name: unexpected '.' (" UPB_STRINGVIEW_FORMAT ")", |
| UPB_STRINGVIEW_ARGS(name)); |
| } |
| start = true; |
| } else if (start) { |
| if (!upb_isletter(c)) { |
| _upb_DefBuilder_Errf(ctx, |
| "invalid name: path components must start with a " |
| "letter (" UPB_STRINGVIEW_FORMAT ")", |
| UPB_STRINGVIEW_ARGS(name)); |
| } |
| start = false; |
| } else if (!upb_isalphanum(c)) { |
| _upb_DefBuilder_Errf( |
| ctx, |
| "invalid name: non-alphanumeric character (" UPB_STRINGVIEW_FORMAT |
| ")", |
| UPB_STRINGVIEW_ARGS(name)); |
| } |
| } |
| if (start) { |
| _upb_DefBuilder_Errf(ctx, |
| "invalid name: empty part (" UPB_STRINGVIEW_FORMAT ")", |
| UPB_STRINGVIEW_ARGS(name)); |
| } |
| |
| // We should never reach this point. |
| UPB_ASSERT(false); |
| } |
| |
| upb_StringView _upb_DefBuilder_MakeKey(upb_DefBuilder* ctx, |
| const UPB_DESC(FeatureSet*) parent, |
| upb_StringView key) { |
| size_t need = key.size + sizeof(void*); |
| if (ctx->tmp_buf_size < need) { |
| ctx->tmp_buf_size = UPB_MAX(64, upb_Log2Ceiling(need)); |
| ctx->tmp_buf = upb_Arena_Malloc(ctx->tmp_arena, ctx->tmp_buf_size); |
| if (!ctx->tmp_buf) _upb_DefBuilder_OomErr(ctx); |
| } |
| |
| memcpy(ctx->tmp_buf, &parent, sizeof(void*)); |
| memcpy(ctx->tmp_buf + sizeof(void*), key.data, key.size); |
| return upb_StringView_FromDataAndSize(ctx->tmp_buf, need); |
| } |
| |
| bool _upb_DefBuilder_GetOrCreateFeatureSet(upb_DefBuilder* ctx, |
| const UPB_DESC(FeatureSet*) parent, |
| upb_StringView key, |
| UPB_DESC(FeatureSet**) set) { |
| upb_StringView k = _upb_DefBuilder_MakeKey(ctx, parent, key); |
| upb_value v; |
| if (upb_strtable_lookup2(&ctx->feature_cache, k.data, k.size, &v)) { |
| *set = upb_value_getptr(v); |
| return false; |
| } |
| |
| *set = (UPB_DESC(FeatureSet*))upb_Message_DeepClone( |
| UPB_UPCAST(parent), UPB_DESC_MINITABLE(FeatureSet), ctx->arena); |
| if (!*set) _upb_DefBuilder_OomErr(ctx); |
| |
| v = upb_value_ptr(*set); |
| if (!upb_strtable_insert(&ctx->feature_cache, k.data, k.size, v, |
| ctx->tmp_arena)) { |
| _upb_DefBuilder_OomErr(ctx); |
| } |
| |
| return true; |
| } |
| |
| const UPB_DESC(FeatureSet*) |
| _upb_DefBuilder_DoResolveFeatures(upb_DefBuilder* ctx, |
| const UPB_DESC(FeatureSet*) parent, |
| const UPB_DESC(FeatureSet*) child, |
| bool is_implicit) { |
| assert(parent); |
| if (!child) return parent; |
| |
| if (child && !is_implicit && |
| upb_FileDef_Syntax(ctx->file) != kUpb_Syntax_Editions) { |
| _upb_DefBuilder_Errf(ctx, "Features can only be specified for editions"); |
| } |
| |
| UPB_DESC(FeatureSet*) resolved; |
| size_t child_size; |
| const char* child_bytes = |
| UPB_DESC(FeatureSet_serialize)(child, ctx->tmp_arena, &child_size); |
| if (!child_bytes) _upb_DefBuilder_OomErr(ctx); |
| |
| upb_StringView key = upb_StringView_FromDataAndSize(child_bytes, child_size); |
| if (!_upb_DefBuilder_GetOrCreateFeatureSet(ctx, parent, key, &resolved)) { |
| return resolved; |
| } |
| |
| upb_DecodeStatus dec_status = |
| upb_Decode(child_bytes, child_size, UPB_UPCAST(resolved), |
| UPB_DESC_MINITABLE(FeatureSet), NULL, 0, ctx->arena); |
| if (dec_status != kUpb_DecodeStatus_Ok) _upb_DefBuilder_OomErr(ctx); |
| |
| return resolved; |
| } |