Added map sorting to binary and text encoders.
For the binary encoder, sorting is off by default.
For the text encoder, sorting is on by default.
Both defaults can be explicitly overridden.
This grows code size a bit. I think we could potentially
shave this (and other map-related code size) by having
the generated code inject a function pointer to the map-related
parsing/serialization code if maps are present.
FILE SIZE VM SIZE
-------------- --------------
+86% +1.07Ki +71% +768 upb/msg.c
[NEW] +391 [NEW] +344 _upb_mapsorter_pushmap
[NEW] +158 [NEW] +112 _upb_mapsorter_cmpstr
[NEW] +111 [NEW] +64 _upb_mapsorter_cmpbool
[NEW] +110 [NEW] +64 _upb_mapsorter_cmpi32
[NEW] +110 [NEW] +64 _upb_mapsorter_cmpi64
[NEW] +110 [NEW] +64 _upb_mapsorter_cmpu32
[NEW] +110 [NEW] +64 _upb_mapsorter_cmpu64
-3.6% -8 -4.3% -8 _upb_map_new
+9.5% +464 +9.2% +424 upb/text_encode.c
[NEW] +656 [NEW] +616 txtenc_mapentry
+15% +32 +20% +32 upb_text_encode
-20.1% -224 -20.7% -224 txtenc_msg
+5.7% +342 +5.3% +296 upb/encode.c
[NEW] +344 [NEW] +304 encode_mapentry
[NEW] +246 [NEW] +208 upb_encode_ex
[NEW] +41 [NEW] +16 upb_encode_ex.ch
+0.7% +8 +0.7% +8 encode_scalar
-1.0% -32 -1.0% -32 encode_message
[DEL] -38 [DEL] -16 upb_encode.ch
[DEL] -227 [DEL] -192 upb_encode
+2.0% +152 +2.2% +152 upb/decode.c
+44% +128 +44% +128 [section .rodata]
+3.4% +24 +3.4% +24 _GLOBAL_OFFSET_TABLE_
+0.6% +107 +0.3% +48 upb/def.c
[NEW] +100 [NEW] +48 upb_fielddef_descriptortype
+7.1% +7 [ = ] 0 upb_fielddef_defaultint32
+2.9% +24 +2.9% +24 [section .dynsym]
+1.2% +24 [ = ] 0 [section .symtab]
+3.2% +16 +3.2% +16 [section .plt]
[NEW] +16 [NEW] +16 memcmp@plt
+0.5% +16 +0.6% +16 tests/conformance_upb.c
+1.5% +16 +1.6% +16 DoTestIo
+0.1% +16 +0.1% +16 upb/json_decode.c
+0.4% +16 +0.4% +16 jsondec_wellknown
+3.0% +8 +3.0% +8 [section .got.plt]
+3.0% +8 +3.0% +8 _GLOBAL_OFFSET_TABLE_
+1.6% +7 +1.6% +7 [section .dynstr]
+1.8% +4 +1.8% +4 [section .hash]
+0.5% +3 +0.5% +3 [LOAD #2 [RX]]
+2.8% +2 +2.8% +2 [section .gnu.version]
-60.0% -1.74Ki [ = ] 0 [Unmapped]
+0.3% +496 +1.4% +1.74Ki TOTAL
diff --git a/benchmarks/compare.py b/benchmarks/compare.py
index 9bdf787..8d62d94 100755
--- a/benchmarks/compare.py
+++ b/benchmarks/compare.py
@@ -58,7 +58,7 @@
baseline = "master"
-bench_cpu = True
+bench_cpu = False
fasttable = False
if len(sys.argv) > 1:
diff --git a/tests/bindings/lua/test_upb.lua b/tests/bindings/lua/test_upb.lua
index 84178aa..0f4a3d2 100644
--- a/tests/bindings/lua/test_upb.lua
+++ b/tests/bindings/lua/test_upb.lua
@@ -91,6 +91,69 @@
assert_equal(12, msg2.map_int32_int32[6])
end
+function test_map_sorting()
+ function msg_with_int32_entries(start, expand)
+ local msg = test_messages_proto3.TestAllTypesProto3()
+ for i=start,start + 8 do
+ msg.map_int32_int32[i] = i * 2
+ end
+
+ if expand then
+ for i=start+20,200 do
+ msg.map_int32_int32[i] = i
+ end
+ for i=start+20,200 do
+ msg.map_int32_int32[i] = nil
+ end
+ end
+ return msg
+ end
+
+ function msg_with_msg_entries(expand)
+ local msg = test_messages_proto3.TestAllTypesProto3()
+ -- 8! = 40320 possible orderings makes it overwhelmingly likely that two
+ -- random orderings will be different.
+ for i=1,8 do
+ local submsg = test_messages_proto3.TestAllTypesProto3.NestedMessage()
+ submsg.corecursive = msg_with_int32_entries(i, expand)
+ msg.map_string_nested_message[tostring(i)] = submsg
+ end
+
+ expand = false
+ if expand then
+ for i=21,2000 do
+ local submsg = test_messages_proto3.TestAllTypesProto3.NestedMessage()
+ submsg.corecursive = msg_with_int32_entries(i, expand)
+ msg.map_string_nested_message[tostring(i)] = submsg
+ end
+ for i=21,2000 do
+ msg.map_string_nested_message[tostring(i)] = nil
+ end
+ end
+ return msg
+ end
+
+ -- Create two messages with the same contents but (hopefully) different
+ -- map table orderings.
+ local msg = msg_with_msg_entries(false)
+ local msg2 = msg_with_msg_entries(true)
+
+ local text1 = upb.text_encode(msg)
+ local text2 = upb.text_encode(msg2)
+ assert_equal(text1, text2)
+
+ local binary1 = upb.encode(msg, {upb.ENCODE_DETERMINISTIC})
+ local binary2 = upb.encode(msg2, {upb.ENCODE_DETERMINISTIC})
+ assert_equal(binary1, binary2)
+
+ -- Non-sorted map should compare different.
+ local text3 = upb.text_encode(msg, {upb.TXTENC_NOSORT})
+ assert_not_equal(text1, text3)
+
+ local binary3 = upb.encode(msg)
+ assert_not_equal(binary1, binary3)
+end
+
function test_utf8()
local proto2_msg = test_messages_proto2.TestAllTypesProto2()
proto2_msg.optional_string = "\xff"
diff --git a/upb/bindings/lua/def.c b/upb/bindings/lua/def.c
index 3bf1d96..ca277c8 100644
--- a/upb/bindings/lua/def.c
+++ b/upb/bindings/lua/def.c
@@ -53,12 +53,12 @@
/* lupb_msgdef_pushsubmsgdef()
*
* Pops the msgdef wrapper at the top of the stack and replaces it with a msgdef
- * wrapper for field |f| of this msgdef.
+ * wrapper for field |f| of this msgdef (submsg may not be direct, for example it
+ * may be the submessage of the map value).
*/
void lupb_msgdef_pushsubmsgdef(lua_State *L, const upb_fielddef *f) {
const upb_msgdef *m = upb_fielddef_msgsubdef(f);
assert(m);
- assert(upb_fielddef_containingtype(f) == lupb_msgdef_check(L, -1));
lupb_wrapper_pushwrapper(L, -1, m, LUPB_MSGDEF);
lua_replace(L, -2); /* Replace msgdef with submsgdef. */
}
@@ -337,6 +337,26 @@
return 1;
}
+static bool lupb_msgdef_pushnested(lua_State *L, int msgdef, int name) {
+ const upb_msgdef *m = lupb_msgdef_check(L, msgdef);
+ lupb_wrapper_pushsymtab(L, msgdef);
+ upb_symtab *symtab = lupb_symtab_check(L, -1);
+ lua_pop(L, 1);
+
+ /* Construct full package.Message.SubMessage name. */
+ lua_pushstring(L, upb_msgdef_fullname(m));
+ lua_pushstring(L, ".");
+ lua_pushvalue(L, name);
+ lua_concat(L, 3);
+ const char *nested_name = lua_tostring(L, -1);
+
+ /* Try lookup. */
+ const upb_msgdef *nested = upb_symtab_lookupmsg(symtab, nested_name);
+ if (!nested) return false;
+ lupb_wrapper_pushwrapper(L, msgdef, nested, LUPB_MSGDEF);
+ return true;
+}
+
/* lupb_msgdef_field()
*
* Handles:
@@ -430,6 +450,13 @@
return 1;
}
+static int lupb_msgdef_index(lua_State *L) {
+ if (!lupb_msgdef_pushnested(L, 1, 2)) {
+ luaL_error(L, "No such nested message");
+ }
+ return 1;
+}
+
static int lupb_msgoneofiter_next(lua_State *L) {
const upb_msgdef *m = lupb_msgdef_check(L, lua_upvalueindex(1));
int *index = lua_touserdata(L, lua_upvalueindex(2));
@@ -471,6 +498,7 @@
static const struct luaL_Reg lupb_msgdef_mm[] = {
{"__call", lupb_msg_pushnew},
+ {"__index", lupb_msgdef_index},
{"__len", lupb_msgdef_fieldcount},
{"__tostring", lupb_msgdef_tostring},
{NULL, NULL}
diff --git a/upb/bindings/lua/msg.c b/upb/bindings/lua/msg.c
index 6a2ecb3..0c4c35f 100644
--- a/upb/bindings/lua/msg.c
+++ b/upb/bindings/lua/msg.c
@@ -187,6 +187,13 @@
upb_arena_fuse(to_arena, from_arena);
}
+static void lupb_arena_fuseobjs(lua_State *L, int to, int from) {
+ lua_getiuservalue(L, to, LUPB_ARENA_INDEX);
+ lua_getiuservalue(L, from, LUPB_ARENA_INDEX);
+ lupb_arena_fuse(L, lua_absindex(L, -2), lua_absindex(L, -1));
+ lua_pop(L, 2);
+}
+
static int lupb_arena_gc(lua_State *L) {
upb_arena *a = lupb_arena_check(L, 1);
upb_arena_free(a);
@@ -398,6 +405,10 @@
upb_array_set(larray->arr, n, msgval);
}
+ if (larray->type == UPB_TYPE_MESSAGE) {
+ lupb_arena_fuseobjs(L, 1, 3);
+ }
+
return 0; /* 1 for chained assignments? */
}
@@ -535,6 +546,9 @@
} else {
upb_msgval val = lupb_tomsgval(L, lmap->value_type, 3, 1, LUPB_COPY);
upb_map_set(map, key, val, lupb_arenaget(L, 1));
+ if (lmap->value_type == UPB_TYPE_MESSAGE) {
+ lupb_arena_fuseobjs(L, 1, 3);
+ }
}
return 0;
@@ -600,21 +614,26 @@
return msg->msg;
}
-static const upb_fielddef *lupb_msg_checkfield(lua_State *L, int msg,
- int field) {
+static const upb_msgdef *lupb_msg_getmsgdef(lua_State *L, int msg) {
+ lua_getiuservalue(L, msg, LUPB_MSGDEF_INDEX);
+ const upb_msgdef *m = lupb_msgdef_check(L, -1);
+ lua_pop(L, 1);
+ return m;
+}
+
+static const upb_fielddef *lupb_msg_tofield(lua_State *L, int msg, int field) {
size_t len;
const char *fieldname = luaL_checklstring(L, field, &len);
- const upb_msgdef *m;
- const upb_fielddef *f;
+ const upb_msgdef *m = lupb_msg_getmsgdef(L, msg);
+ return upb_msgdef_ntof(m, fieldname, len);
+}
- lua_getiuservalue(L, msg, LUPB_MSGDEF_INDEX);
- m = lupb_msgdef_check(L, -1);
- f = upb_msgdef_ntof(m, fieldname, len);
+static const upb_fielddef *lupb_msg_checkfield(lua_State *L, int msg,
+ int field) {
+ const upb_fielddef *f = lupb_msg_tofield(L, msg, field);
if (f == NULL) {
- luaL_error(L, "no such field '%s'", fieldname);
+ luaL_error(L, "no such field '%s'", lua_tostring(L, field));
}
- lua_pop(L, 1);
-
return f;
}
@@ -813,10 +832,7 @@
}
if (merge_arenas) {
- lua_getiuservalue(L, 1, LUPB_ARENA_INDEX);
- lua_getiuservalue(L, 3, LUPB_ARENA_INDEX);
- lupb_arena_fuse(L, lua_absindex(L, -2), lua_absindex(L, -1));
- lua_pop(L, 2);
+ lupb_arena_fuseobjs(L, 1, 3);
}
upb_msg_set(msg, f, msgval, lupb_arenaget(L, 1));
@@ -908,23 +924,74 @@
}
/**
+ * lupb_msg_textencode()
+ *
+ * Handles:
+ * text_string = upb.text_encode(msg, {upb.TXTENC_SINGLELINE})
+ */
+static int lupb_textencode(lua_State *L) {
+ int argcount = lua_gettop(L);
+ upb_msg *msg = lupb_msg_check(L, 1);
+ const upb_msgdef *m;
+ char buf[1024];
+ size_t size;
+ int options = 0;
+
+ lua_getiuservalue(L, 1, LUPB_MSGDEF_INDEX);
+ m = lupb_msgdef_check(L, -1);
+
+ if (argcount > 1) {
+ size_t len = lua_rawlen(L, 2);
+ for (size_t i = 1; i <= len; i++) {
+ lua_rawgeti(L, 2, i);
+ options |= lupb_checkuint32(L, -1);
+ lua_pop(L, 1);
+ }
+ }
+
+ size = upb_text_encode(msg, m, NULL, options, buf, sizeof(buf));
+
+ if (size < sizeof(buf)) {
+ lua_pushlstring(L, buf, size);
+ } else {
+ char *ptr = malloc(size + 1);
+ upb_text_encode(msg, m, NULL, options, ptr, size + 1);
+ lua_pushlstring(L, ptr, size);
+ free(ptr);
+ }
+
+ return 1;
+}
+
+/**
* lupb_encode()
*
* Handles:
* bin_string = upb.encode(msg)
*/
static int lupb_encode(lua_State *L) {
+ int argcount = lua_gettop(L);
const upb_msg *msg = lupb_msg_check(L, 1);
const upb_msglayout *layout;
upb_arena *arena = lupb_arena_pushnew(L);
size_t size;
char *result;
+ int options = 0;
+
+ if (argcount > 1) {
+ size_t len = lua_rawlen(L, 2);
+ for (size_t i = 1; i <= len; i++) {
+ lua_rawgeti(L, 2, i);
+ options |= lupb_checkuint32(L, -1);
+ lua_pop(L, 1);
+ }
+ }
lua_getiuservalue(L, 1, LUPB_MSGDEF_INDEX);
layout = upb_msgdef_layout(lupb_msgdef_check(L, -1));
lua_pop(L, 1);
- result = upb_encode(msg, (const void*)layout, arena, &size);
+ result = upb_encode_ex(msg, (const void*)layout, options, arena, &size);
if (!result) {
lua_pushstring(L, "Error encoding protobuf.");
@@ -936,11 +1003,17 @@
return 1;
}
+static void lupb_setfieldi(lua_State *L, const char *field, int i) {
+ lua_pushinteger(L, i);
+ lua_setfield(L, -2, field);
+}
+
static const struct luaL_Reg lupb_msg_toplevel_m[] = {
{"Array", lupb_array_new},
{"Map", lupb_map_new},
{"decode", lupb_decode},
{"encode", lupb_encode},
+ {"text_encode", lupb_textencode},
{NULL, NULL}
};
@@ -952,5 +1025,11 @@
lupb_register_type(L, LUPB_MAP, NULL, lupb_map_mm);
lupb_register_type(L, LUPB_MSG, NULL, lupb_msg_mm);
+ lupb_setfieldi(L, "TXTENC_SINGLELINE", UPB_TXTENC_SINGLELINE);
+ lupb_setfieldi(L, "TXTENC_SKIPUNKNOWN", UPB_TXTENC_SKIPUNKNOWN);
+ lupb_setfieldi(L, "TXTENC_NOSORT", UPB_TXTENC_NOSORT);
+
+ lupb_setfieldi(L, "ENCODE_DETERMINISTIC", UPB_ENCODE_DETERMINISTIC);
+
lupb_cacheinit(L);
}
diff --git a/upb/bindings/lua/upb.c b/upb/bindings/lua/upb.c
index 3630ae2..c247c03 100644
--- a/upb/bindings/lua/upb.c
+++ b/upb/bindings/lua/upb.c
@@ -84,6 +84,24 @@
}
#endif
+/* We use this function as the __index metamethod when a type has both methods
+ * and an __index metamethod. */
+int lupb_indexmm(lua_State *L) {
+ /* Look up in __index table (which is a closure param). */
+ lua_pushvalue(L, 2);
+ lua_rawget(L, lua_upvalueindex(1));
+ if (!lua_isnil(L, -1)) {
+ return 1;
+ }
+
+ /* Not found, chain to user __index metamethod. */
+ lua_pushvalue(L, lua_upvalueindex(2));
+ lua_pushvalue(L, 1);
+ lua_pushvalue(L, 2);
+ lua_call(L, 2, 1);
+ return 1;
+}
+
void lupb_register_type(lua_State *L, const char *name, const luaL_Reg *m,
const luaL_Reg *mm) {
luaL_newmetatable(L, name);
@@ -93,14 +111,17 @@
}
if (m) {
- /* Methods go in the mt's __index method. This implies that you can'
- * implement __index and also have methods. */
- lua_getfield(L, -1, "__index");
- lupb_assert(L, lua_isnil(L, -1));
- lua_pop(L, 1);
-
- lua_createtable(L, 0, 0);
+ lua_createtable(L, 0, 0); /* __index table */
lupb_setfuncs(L, m);
+
+ /* Methods go in the mt's __index slot. If the user also specified an
+ * __index metamethod, use our custom lupb_indexmm() that can check both. */
+ lua_getfield(L, -2, "__index");
+ if (lua_isnil(L, -1)) {
+ lua_pop(L, 1);
+ } else {
+ lua_pushcclosure(L, &lupb_indexmm, 2);
+ }
lua_setfield(L, -2, "__index");
}
diff --git a/upb/decode.h b/upb/decode.h
index 00419ab..eff4b8b 100644
--- a/upb/decode.h
+++ b/upb/decode.h
@@ -15,6 +15,8 @@
#endif
enum {
+ /* If set, strings will alias the input buffer instead of copying into the
+ * arena. */
UPB_DECODE_ALIAS = 1,
};
diff --git a/upb/encode.c b/upb/encode.c
index 6d36619..c1fe02d 100644
--- a/upb/encode.c
+++ b/upb/encode.c
@@ -32,6 +32,8 @@
jmp_buf err;
upb_alloc *alloc;
char *buf, *ptr, *limit;
+ int options;
+ _upb_mapsorter sorter;
} upb_encstate;
static size_t upb_roundup_pow2(size_t bytes) {
@@ -341,31 +343,48 @@
}
}
+static void encode_mapentry(upb_encstate *e, uint32_t number,
+ const upb_msglayout *layout,
+ const upb_map_entry *ent) {
+ const upb_msglayout_field *key_field = &layout->fields[0];
+ const upb_msglayout_field *val_field = &layout->fields[1];
+ size_t pre_len = e->limit - e->ptr;
+ size_t size;
+ encode_scalar(e, &ent->v, layout, val_field, false);
+ encode_scalar(e, &ent->k, layout, key_field, false);
+ size = (e->limit - e->ptr) - pre_len;
+ encode_varint(e, size);
+ encode_tag(e, number, UPB_WIRE_TYPE_DELIMITED);
+}
+
static void encode_map(upb_encstate *e, const char *field_mem,
const upb_msglayout *m, const upb_msglayout_field *f) {
const upb_map *map = *(const upb_map**)field_mem;
- const upb_msglayout *entry = m->submsgs[f->submsg_index];
- const upb_msglayout_field *key_field = &entry->fields[0];
- const upb_msglayout_field *val_field = &entry->fields[1];
- upb_strtable_iter i;
- if (map == NULL) {
- return;
- }
+ const upb_msglayout *layout = m->submsgs[f->submsg_index];
+ UPB_ASSERT(layout->field_count == 2);
- upb_strtable_begin(&i, &map->table);
- for(; !upb_strtable_done(&i); upb_strtable_next(&i)) {
- size_t pre_len = e->limit - e->ptr;
- size_t size;
- upb_strview key = upb_strtable_iter_key(&i);
- const upb_value val = upb_strtable_iter_value(&i);
+ if (map == NULL) return;
+
+ if (e->options & UPB_ENCODE_DETERMINISTIC) {
+ _upb_sortedmap sorted;
+ _upb_mapsorter_pushmap(&e->sorter, layout->fields[0].descriptortype, map,
+ &sorted);
upb_map_entry ent;
- _upb_map_fromkey(key, &ent.k, map->key_size);
- _upb_map_fromvalue(val, &ent.v, map->val_size);
- encode_scalar(e, &ent.v, entry, val_field, false);
- encode_scalar(e, &ent.k, entry, key_field, false);
- size = (e->limit - e->ptr) - pre_len;
- encode_varint(e, size);
- encode_tag(e, f->number, UPB_WIRE_TYPE_DELIMITED);
+ while (_upb_sortedmap_next(&e->sorter, map, &sorted, &ent)) {
+ encode_mapentry(e, f->number, layout, &ent);
+ }
+ _upb_mapsorter_popmap(&e->sorter, &sorted);
+ } else {
+ upb_strtable_iter i;
+ upb_strtable_begin(&i, &map->table);
+ for(; !upb_strtable_done(&i); upb_strtable_next(&i)) {
+ upb_strview key = upb_strtable_iter_key(&i);
+ const upb_value val = upb_strtable_iter_value(&i);
+ upb_map_entry ent;
+ _upb_map_fromkey(key, &ent.k, map->key_size);
+ _upb_map_fromvalue(val, &ent.v, map->val_size);
+ encode_mapentry(e, f->number, layout, &ent);
+ }
}
}
@@ -414,28 +433,32 @@
*size = (e->limit - e->ptr) - pre_len;
}
-char *upb_encode(const void *msg, const upb_msglayout *m, upb_arena *arena,
- size_t *size) {
+char *upb_encode_ex(const void *msg, const upb_msglayout *m, int options,
+ upb_arena *arena, size_t *size) {
upb_encstate e;
e.alloc = upb_arena_alloc(arena);
e.buf = NULL;
e.limit = NULL;
e.ptr = NULL;
+ e.options = options;
+ _upb_mapsorter_init(&e.sorter);
+ char *ret = NULL;
if (UPB_SETJMP(e.err)) {
*size = 0;
- return NULL;
- }
-
- encode_message(&e, msg, m, size);
-
- *size = e.limit - e.ptr;
-
- if (*size == 0) {
- static char ch;
- return &ch;
+ ret = NULL;
} else {
- UPB_ASSERT(e.ptr);
- return e.ptr;
+ encode_message(&e, msg, m, size);
+ *size = e.limit - e.ptr;
+ if (*size == 0) {
+ static char ch;
+ ret = &ch;
+ } else {
+ UPB_ASSERT(e.ptr);
+ ret = e.ptr;
+ }
}
+
+ _upb_mapsorter_destroy(&e.sorter);
+ return ret;
}
diff --git a/upb/encode.h b/upb/encode.h
index 6842777..8ff51ab 100644
--- a/upb/encode.h
+++ b/upb/encode.h
@@ -7,12 +7,32 @@
#include "upb/msg.h"
+/* Must be last. */
+#include "upb/port_def.inc"
+
#ifdef __cplusplus
extern "C" {
#endif
-char *upb_encode(const void *msg, const upb_msglayout *l, upb_arena *arena,
- size_t *size);
+enum {
+ /* If set, the results of serializing will be deterministic across all
+ * instances of this binary. There are no guarantees across different
+ * binary builds.
+ *
+ * If your proto contains maps, the encoder will need to malloc()/free()
+ * memory during encode. */
+ UPB_ENCODE_DETERMINISTIC = 1,
+};
+
+char *upb_encode_ex(const void *msg, const upb_msglayout *l, int options,
+ upb_arena *arena, size_t *size);
+
+UPB_INLINE char *upb_encode(const void *msg, const upb_msglayout *l,
+ upb_arena *arena, size_t *size) {
+ return upb_encode_ex(msg, l, 0, arena, size);
+}
+
+#include "upb/port_undef.inc"
#ifdef __cplusplus
} /* extern "C" */
diff --git a/upb/msg.c b/upb/msg.c
index 1dbb8b0..876a06d 100644
--- a/upb/msg.c
+++ b/upb/msg.c
@@ -139,3 +139,118 @@
return map;
}
+
+static void _upb_mapsorter_getkeys(const void *_a, const void *_b, void *a_key,
+ void *b_key, size_t size) {
+ const upb_tabent *const*a = _a;
+ const upb_tabent *const*b = _b;
+ upb_strview a_tabkey = upb_tabstrview((*a)->key);
+ upb_strview b_tabkey = upb_tabstrview((*b)->key);
+ _upb_map_fromkey(a_tabkey, a_key, size);
+ _upb_map_fromkey(b_tabkey, b_key, size);
+}
+
+static int _upb_mapsorter_cmpi64(const void *_a, const void *_b) {
+ int64_t a, b;
+ _upb_mapsorter_getkeys(_a, _b, &a, &b, 8);
+ return a - b;
+}
+
+static int _upb_mapsorter_cmpu64(const void *_a, const void *_b) {
+ uint64_t a, b;
+ _upb_mapsorter_getkeys(_a, _b, &a, &b, 8);
+ return a - b;
+}
+
+static int _upb_mapsorter_cmpi32(const void *_a, const void *_b) {
+ int32_t a, b;
+ _upb_mapsorter_getkeys(_a, _b, &a, &b, 4);
+ return a - b;
+}
+
+static int _upb_mapsorter_cmpu32(const void *_a, const void *_b) {
+ uint32_t a, b;
+ _upb_mapsorter_getkeys(_a, _b, &a, &b, 4);
+ return a - b;
+}
+
+static int _upb_mapsorter_cmpbool(const void *_a, const void *_b) {
+ bool a, b;
+ _upb_mapsorter_getkeys(_a, _b, &a, &b, 1);
+ return a - b;
+}
+
+static int _upb_mapsorter_cmpstr(const void *_a, const void *_b) {
+ upb_strview a, b;
+ _upb_mapsorter_getkeys(_a, _b, &a, &b, UPB_MAPTYPE_STRING);
+ size_t common_size = UPB_MIN(a.size, b.size);
+ int cmp = memcmp(a.data, b.data, common_size);
+ if (cmp) return cmp;
+ return a.size - b.size;
+}
+
+bool _upb_mapsorter_pushmap(_upb_mapsorter *s, upb_descriptortype_t key_type,
+ const upb_map *map, _upb_sortedmap *sorted) {
+ int map_size = _upb_map_size(map);
+ sorted->start = s->size;
+ sorted->pos = sorted->start;
+ sorted->end = sorted->start + map_size;
+
+ /* Grow s->entries if necessary. */
+ if (sorted->end > s->cap) {
+ s->cap = _upb_lg2ceilsize(sorted->end);
+ s->entries = realloc(s->entries, s->cap * sizeof(*s->entries));
+ if (!s->entries) return false;
+ }
+
+ s->size = sorted->end;
+
+ /* Copy non-empty entries from the table to s->entries. */
+ upb_tabent const**dst = &s->entries[sorted->start];
+ const upb_tabent *src = map->table.t.entries;
+ const upb_tabent *end = src + upb_table_size(&map->table.t);
+ for (; src < end; src++) {
+ if (!upb_tabent_isempty(src)) {
+ *dst = src;
+ dst++;
+ }
+ }
+ UPB_ASSERT(dst == &s->entries[sorted->end]);
+
+ /* Sort entries according to the key type. */
+
+ int (*compar)(const void *, const void *);
+
+ switch (key_type) {
+ case UPB_DESCRIPTOR_TYPE_INT64:
+ case UPB_DESCRIPTOR_TYPE_SFIXED64:
+ case UPB_DESCRIPTOR_TYPE_SINT64:
+ compar = _upb_mapsorter_cmpi64;
+ break;
+ case UPB_DESCRIPTOR_TYPE_UINT64:
+ case UPB_DESCRIPTOR_TYPE_FIXED64:
+ compar = _upb_mapsorter_cmpu64;
+ break;
+ case UPB_DESCRIPTOR_TYPE_INT32:
+ case UPB_DESCRIPTOR_TYPE_SINT32:
+ case UPB_DESCRIPTOR_TYPE_SFIXED32:
+ case UPB_DESCRIPTOR_TYPE_ENUM:
+ compar = _upb_mapsorter_cmpi32;
+ break;
+ case UPB_DESCRIPTOR_TYPE_UINT32:
+ case UPB_DESCRIPTOR_TYPE_FIXED32:
+ compar = _upb_mapsorter_cmpu32;
+ break;
+ case UPB_DESCRIPTOR_TYPE_BOOL:
+ compar = _upb_mapsorter_cmpbool;
+ break;
+ case UPB_DESCRIPTOR_TYPE_STRING:
+ compar = _upb_mapsorter_cmpstr;
+ break;
+ default:
+ UPB_UNREACHABLE();
+ }
+
+ qsort(&s->entries[sorted->start], map_size, sizeof(*s->entries), compar);
+ return true;
+}
diff --git a/upb/msg.h b/upb/msg.h
index 36c57b3..9b4557a 100644
--- a/upb/msg.h
+++ b/upb/msg.h
@@ -9,11 +9,13 @@
#define UPB_MSG_H_
#include <stdint.h>
+#include <stdlib.h>
#include <string.h>
#include "upb/table.int.h"
#include "upb/upb.h"
+/* Must be last. */
#include "upb/port_def.inc"
#ifdef __cplusplus
@@ -553,6 +555,53 @@
}
}
+/** _upb_mapsorter *************************************************************/
+
+/* _upb_mapsorter sorts maps and provides ordered iteration over the entries.
+ * Since maps can be recursive (map values can be messages which contain other maps).
+ * _upb_mapsorter can contain a stack of maps. */
+
+typedef struct {
+ upb_tabent const**entries;
+ int size;
+ int cap;
+} _upb_mapsorter;
+
+typedef struct {
+ int start;
+ int pos;
+ int end;
+} _upb_sortedmap;
+
+UPB_INLINE void _upb_mapsorter_init(_upb_mapsorter *s) {
+ s->entries = NULL;
+ s->size = 0;
+ s->cap = 0;
+}
+
+UPB_INLINE void _upb_mapsorter_destroy(_upb_mapsorter *s) {
+ if (s->entries) free(s->entries);
+}
+
+bool _upb_mapsorter_pushmap(_upb_mapsorter *s, upb_descriptortype_t key_type,
+ const upb_map *map, _upb_sortedmap *sorted);
+
+UPB_INLINE void _upb_mapsorter_popmap(_upb_mapsorter *s, _upb_sortedmap *sorted) {
+ s->size = sorted->start;
+}
+
+UPB_INLINE bool _upb_sortedmap_next(_upb_mapsorter *s, const upb_map *map,
+ _upb_sortedmap *sorted,
+ upb_map_entry *ent) {
+ if (sorted->pos == sorted->end) return false;
+ const upb_tabent *tabent = s->entries[sorted->pos++];
+ upb_strview key = upb_tabstrview(tabent->key);
+ _upb_map_fromkey(key, &ent->k, map->key_size);
+ upb_value val = {tabent->val.val};
+ _upb_map_fromvalue(val, &ent->v, map->val_size);
+ return true;
+}
+
#undef PTR_AT
#ifdef __cplusplus
diff --git a/upb/table.int.h b/upb/table.int.h
index ab67cc3..49caac4 100644
--- a/upb/table.int.h
+++ b/upb/table.int.h
@@ -147,10 +147,17 @@
return mem + sizeof(*len);
}
+UPB_INLINE upb_strview upb_tabstrview(upb_tabkey key) {
+ upb_strview ret;
+ uint32_t len;
+ ret.data = upb_tabstr(key, &len);
+ ret.size = len;
+ return ret;
+}
/* upb_tabval *****************************************************************/
-typedef struct {
+typedef struct upb_tabval {
uint64_t val;
} upb_tabval;
diff --git a/upb/text_encode.c b/upb/text_encode.c
index 3e9630d..028cc29 100644
--- a/upb/text_encode.c
+++ b/upb/text_encode.c
@@ -17,6 +17,7 @@
int indent_depth;
int options;
const upb_symtab *ext_pool;
+ _upb_mapsorter sorter;
} txtenc;
static void txtenc_msg(txtenc *e, const upb_msg *msg, const upb_msgdef *m);
@@ -187,6 +188,25 @@
}
}
+static void txtenc_mapentry(txtenc *e, upb_msgval key, upb_msgval val,
+ const upb_fielddef *f) {
+ const upb_msgdef *entry = upb_fielddef_msgsubdef(f);
+ const upb_fielddef *key_f = upb_msgdef_field(entry, 0);
+ const upb_fielddef *val_f = upb_msgdef_field(entry, 1);
+ txtenc_indent(e);
+ txtenc_printf(e, "%s: {", upb_fielddef_name(f));
+ txtenc_endfield(e);
+ e->indent_depth++;
+
+ txtenc_field(e, key, key_f);
+ txtenc_field(e, val, val_f);
+
+ e->indent_depth--;
+ txtenc_indent(e);
+ txtenc_putstr(e, "}");
+ txtenc_endfield(e);
+}
+
/*
* Maps print as messages of key/value, etc.
*
@@ -200,27 +220,28 @@
* }
*/
static void txtenc_map(txtenc *e, const upb_map *map, const upb_fielddef *f) {
- const upb_msgdef *entry = upb_fielddef_msgsubdef(f);
- const upb_fielddef *key_f = upb_msgdef_itof(entry, 1);
- const upb_fielddef *val_f = upb_msgdef_itof(entry, 2);
- size_t iter = UPB_MAP_BEGIN;
+ if (e->options & UPB_TXTENC_NOSORT) {
+ size_t iter = UPB_MAP_BEGIN;
+ while (upb_mapiter_next(map, &iter)) {
+ upb_msgval key = upb_mapiter_key(map, iter);
+ upb_msgval val = upb_mapiter_value(map, iter);
+ txtenc_mapentry(e, key, val, f);
+ }
+ } else {
+ const upb_msgdef *entry = upb_fielddef_msgsubdef(f);
+ const upb_fielddef *key_f = upb_msgdef_field(entry, 0);
+ _upb_sortedmap sorted;
+ upb_map_entry ent;
- while (upb_mapiter_next(map, &iter)) {
- upb_msgval key = upb_mapiter_key(map, iter);
- upb_msgval val = upb_mapiter_value(map, iter);
-
- txtenc_indent(e);
- txtenc_printf(e, "%s: {", upb_fielddef_name(f));
- txtenc_endfield(e);
- e->indent_depth++;
-
- txtenc_field(e, key, key_f);
- txtenc_field(e, val, val_f);
-
- e->indent_depth--;
- txtenc_indent(e);
- txtenc_putstr(e, "}");
- txtenc_endfield(e);
+ _upb_mapsorter_pushmap(&e->sorter, upb_fielddef_descriptortype(key_f), map,
+ &sorted);
+ while (_upb_sortedmap_next(&e->sorter, map, &sorted, &ent)) {
+ upb_msgval key, val;
+ memcpy(&key, &ent.k, sizeof(key));
+ memcpy(&val, &ent.v, sizeof(val));
+ txtenc_mapentry(e, key, val, f);
+ }
+ _upb_mapsorter_popmap(&e->sorter, &sorted);
}
}
@@ -392,7 +413,9 @@
e.indent_depth = 0;
e.options = options;
e.ext_pool = ext_pool;
+ _upb_mapsorter_init(&e.sorter);
txtenc_msg(&e, msg, m);
+ _upb_mapsorter_destroy(&e.sorter);
return txtenc_nullz(&e, size);
}
diff --git a/upb/text_encode.h b/upb/text_encode.h
index 1a0003c..4ad3d1c 100644
--- a/upb/text_encode.h
+++ b/upb/text_encode.h
@@ -13,7 +13,10 @@
UPB_TXTENC_SINGLELINE = 1,
/* When set, unknown fields are not printed. */
- UPB_TXTENC_SKIPUNKNOWN = 2
+ UPB_TXTENC_SKIPUNKNOWN = 2,
+
+ /* When set, maps are *not* sorted (this avoids allocating tmp mem). */
+ UPB_TXTENC_NOSORT = 4
};
/* Encodes the given |msg| to text format. The message's reflection is given in
diff --git a/upb/upb.h b/upb/upb.h
index 7e7309d..11c0e4d 100644
--- a/upb/upb.h
+++ b/upb/upb.h
@@ -324,6 +324,10 @@
#endif
}
+UPB_INLINE int _upb_lg2ceilsize(int x) {
+ return 1 << _upb_lg2ceil(x);
+}
+
#include "upb/port_undef.inc"
#ifdef __cplusplus