Added API for copy vs. alias and added benchmarks to test both.
Benchmark output:
$ bazel-bin/benchmarks/benchmark '--benchmark_filter=BM_Parse'
2020-11-11 15:39:04
Running bazel-bin/benchmarks/benchmark
Run on (72 X 3700 MHz CPU s)
CPU Caches:
L1 Data 32K (x36)
L1 Instruction 32K (x36)
L2 Unified 1024K (x36)
L3 Unified 25344K (x2)
-------------------------------------------------------------------------------------
Benchmark Time CPU Iterations
-------------------------------------------------------------------------------------
BM_Parse_Upb_FileDesc<UseArena, Copy> 4134 ns 4134 ns 168714 1.69152GB/s
BM_Parse_Upb_FileDesc<UseArena, Alias> 3487 ns 3487 ns 199509 2.00526GB/s
BM_Parse_Upb_FileDesc<InitBlock, Copy> 3727 ns 3726 ns 187581 1.87643GB/s
BM_Parse_Upb_FileDesc<InitBlock, Alias> 3110 ns 3110 ns 224970 2.24866GB/s
BM_Parse_Proto2<FileDesc, NoArena, Copy> 31132 ns 31132 ns 22437 229.995MB/s
BM_Parse_Proto2<FileDesc, UseArena, Copy> 21011 ns 21009 ns 33922 340.812MB/s
BM_Parse_Proto2<FileDesc, InitBlock, Copy> 17976 ns 17975 ns 38808 398.337MB/s
BM_Parse_Proto2<FileDescSV, InitBlock, Alias> 17357 ns 17356 ns 40244 412.539MB/s
diff --git a/benchmarks/benchmark.cc b/benchmarks/benchmark.cc
index cc510bd..7f4765a 100644
--- a/benchmarks/benchmark.cc
+++ b/benchmarks/benchmark.cc
@@ -115,13 +115,31 @@
}
BENCHMARK(BM_LoadAdsDescriptor_Proto2);
-static void BM_Parse_Upb_FileDesc_WithArena(benchmark::State& state) {
+enum CopyStrings {
+ Copy,
+ Alias,
+};
+
+enum ArenaMode {
+ NoArena,
+ UseArena,
+ InitBlock,
+};
+
+template <ArenaMode AMode, CopyStrings Copy>
+static void BM_Parse_Upb_FileDesc(benchmark::State& state) {
size_t bytes = 0;
for (auto _ : state) {
- upb_arena* arena = upb_arena_new();
+ upb_arena *arena;
+ if (AMode == InitBlock) {
+ arena = upb_arena_init(buf, sizeof(buf), NULL);
+ } else {
+ arena = upb_arena_new();
+ }
upb_benchmark_FileDescriptorProto* set =
- upb_benchmark_FileDescriptorProto_parse(descriptor.data,
- descriptor.size, arena);
+ upb_benchmark_FileDescriptorProto_parse_ex(
+ descriptor.data, descriptor.size, arena,
+ Copy == Alias ? UPB_DECODE_ALIAS : 0);
if (!set) {
printf("Failed to parse.\n");
exit(1);
@@ -131,28 +149,16 @@
}
state.SetBytesProcessed(state.iterations() * descriptor.size);
}
-BENCHMARK(BM_Parse_Upb_FileDesc_WithArena);
+BENCHMARK_TEMPLATE(BM_Parse_Upb_FileDesc, UseArena, Copy);
+BENCHMARK_TEMPLATE(BM_Parse_Upb_FileDesc, UseArena, Alias);
+BENCHMARK_TEMPLATE(BM_Parse_Upb_FileDesc, InitBlock, Copy);
+BENCHMARK_TEMPLATE(BM_Parse_Upb_FileDesc, InitBlock, Alias);
-static void BM_Parse_Upb_FileDesc_WithInitialBlock(benchmark::State& state) {
- size_t bytes = 0;
- for (auto _ : state) {
- upb_arena* arena = upb_arena_init(buf, sizeof(buf), NULL);
- upb_benchmark_FileDescriptorProto* set =
- upb_benchmark_FileDescriptorProto_parse(descriptor.data,
- descriptor.size, arena);
- if (!set) {
- printf("Failed to parse.\n");
- exit(1);
- }
- bytes += descriptor.size;
- upb_arena_free(arena);
- }
- state.SetBytesProcessed(state.iterations() * descriptor.size);
-}
-BENCHMARK(BM_Parse_Upb_FileDesc_WithInitialBlock);
+template <ArenaMode AMode, class P>
+struct Proto2Factory;
-template <class P>
-struct NoArena {
+template<class P>
+struct Proto2Factory<NoArena, P> {
public:
P* GetProto() { return &proto_; }
@@ -161,7 +167,7 @@
};
template <class P>
-struct WithArena {
+struct Proto2Factory<UseArena, P> {
public:
P* GetProto() { return protobuf::Arena::CreateMessage<P>(&arena_); }
@@ -170,9 +176,9 @@
};
template <class P>
-struct WithInitialBlock {
+struct Proto2Factory<InitBlock, P> {
public:
- WithInitialBlock() : arena_(GetOptions()) {}
+ Proto2Factory() : arena_(GetOptions()) {}
P* GetProto() { return protobuf::Arena::CreateMessage<P>(&arena_); }
private:
@@ -189,17 +195,15 @@
using FileDesc = ::upb_benchmark::FileDescriptorProto;
using FileDescSV = ::upb_benchmark::sv::FileDescriptorProto;
-const protobuf::MessageLite::ParseFlags kMergePartial =
- protobuf::MessageLite::ParseFlags::kMergePartial;
-const protobuf::MessageLite::ParseFlags kAlias =
- protobuf::MessageLite::ParseFlags::kMergePartialWithAliasing;
-
-template <class P, template <class> class Factory,
- protobuf::MessageLite::ParseFlags kParseFlags = kMergePartial>
+template <class P, ArenaMode AMode, CopyStrings kCopy>
void BM_Parse_Proto2(benchmark::State& state) {
size_t bytes = 0;
+ constexpr protobuf::MessageLite::ParseFlags kParseFlags =
+ kCopy == Copy
+ ? protobuf::MessageLite::ParseFlags::kMergePartial
+ : protobuf::MessageLite::ParseFlags::kMergePartialWithAliasing;
for (auto _ : state) {
- Factory<P> proto_factory;
+ Proto2Factory<AMode, P> proto_factory;
auto proto = proto_factory.GetProto();
protobuf::StringPiece input(descriptor.data,descriptor.size);
bool ok = proto->template ParseFrom<kParseFlags>(input);
@@ -211,15 +215,10 @@
}
state.SetBytesProcessed(state.iterations() * descriptor.size);
}
-BENCHMARK_TEMPLATE(BM_Parse_Proto2, FileDesc, NoArena);
-BENCHMARK_TEMPLATE(BM_Parse_Proto2, FileDesc, WithArena);
-BENCHMARK_TEMPLATE(BM_Parse_Proto2, FileDesc, WithInitialBlock);
-//BENCHMARK_TEMPLATE(BM_Parse_Proto2, FileDescSV, NoArena);
-//BENCHMARK_TEMPLATE(BM_Parse_Proto2, FileDescSV, WithArena);
-BENCHMARK_TEMPLATE(BM_Parse_Proto2, FileDescSV, WithInitialBlock);
-//BENCHMARK_TEMPLATE(BM_Parse_Proto2, FileDescSV, NoArena, kAlias);
-//BENCHMARK_TEMPLATE(BM_Parse_Proto2, FileDescSV, WithArena, kAlias);
-BENCHMARK_TEMPLATE(BM_Parse_Proto2, FileDescSV, WithInitialBlock, kAlias);
+BENCHMARK_TEMPLATE(BM_Parse_Proto2, FileDesc, NoArena, Copy);
+BENCHMARK_TEMPLATE(BM_Parse_Proto2, FileDesc, UseArena, Copy);
+BENCHMARK_TEMPLATE(BM_Parse_Proto2, FileDesc, InitBlock, Copy);
+BENCHMARK_TEMPLATE(BM_Parse_Proto2, FileDescSV, InitBlock, Alias);
static void BM_SerializeDescriptor_Proto2(benchmark::State& state) {
size_t bytes = 0;
diff --git a/upb/decode.c b/upb/decode.c
index d38b46d..f49918e 100644
--- a/upb/decode.c
+++ b/upb/decode.c
@@ -643,10 +643,11 @@
return decode_msg(d, ptr, msg, decode_totablep(table));
}
-bool upb_decode(const char *buf, size_t size, void *msg, const upb_msglayout *l,
- upb_arena *arena) {
+bool _upb_decode(const char *buf, size_t size, void *msg,
+ const upb_msglayout *l, upb_arena *arena, int options) {
bool ok;
upb_decstate state;
+ unsigned depth = (unsigned)options >> 16;
if (size == 0) {
return true;
@@ -660,12 +661,12 @@
} else {
state.end = buf + size - 16;
state.limit = 16;
- state.alias = true;
+ state.alias = options & UPB_DECODE_ALIAS;
}
state.limit_ptr = state.end;
state.unknown_msg = NULL;
- state.depth = 64;
+ state.depth = depth ? depth : 64;
state.end_group = DECODE_NOGROUP;
state.arena.head = arena->head;
state.arena.last_size = arena->last_size;
diff --git a/upb/decode.h b/upb/decode.h
index 9de8638..00419ab 100644
--- a/upb/decode.h
+++ b/upb/decode.h
@@ -7,15 +7,32 @@
#include "upb/msg.h"
+/* Must be last. */
+#include "upb/port_def.inc"
+
#ifdef __cplusplus
extern "C" {
#endif
+enum {
+ UPB_DECODE_ALIAS = 1,
+};
+
+#define UPB_DECODE_MAXDEPTH(depth) ((depth) << 16)
+
+bool _upb_decode(const char *buf, size_t size, upb_msg *msg,
+ const upb_msglayout *l, upb_arena *arena, int options);
+
+UPB_INLINE
bool upb_decode(const char *buf, size_t size, upb_msg *msg,
- const upb_msglayout *l, upb_arena *arena);
+ const upb_msglayout *l, upb_arena *arena) {
+ return _upb_decode(buf, size, msg, l, arena, 0);
+}
#ifdef __cplusplus
} /* extern "C" */
#endif
+#include "upb/port_undef.inc"
+
#endif /* UPB_DECODE_H_ */
diff --git a/upb/decode_fast.c b/upb/decode_fast.c
index f58f70e..f628e6d 100644
--- a/upb/decode_fast.c
+++ b/upb/decode_fast.c
@@ -763,7 +763,7 @@
} else if (UPB_LIKELY(size <= 64)) {
if (UPB_UNLIKELY(common_has < 64)) goto longstr;
fastdecode_docopy(d, ptr, size, 64, buf, dst);
- } else if (UPB_LIKELY(size <= 128)) {
+ } else if (UPB_LIKELY(size < 128)) {
if (UPB_UNLIKELY(common_has < 128)) goto longstr;
fastdecode_docopy(d, ptr, size, 128, buf, dst);
} else {
diff --git a/upb/def.c b/upb/def.c
index 19b30fe..74c33cc 100644
--- a/upb/def.c
+++ b/upb/def.c
@@ -2140,8 +2140,8 @@
if (!_upb_symtab_loaddefinit(s, *deps)) goto err;
}
- file = google_protobuf_FileDescriptorProto_parse(
- init->descriptor.data, init->descriptor.size, arena);
+ file = google_protobuf_FileDescriptorProto_parse_ex(
+ init->descriptor.data, init->descriptor.size, arena, UPB_DECODE_ALIAS);
s->bytes_loaded += init->descriptor.size;
if (!file) {
diff --git a/upbc/generator.cc b/upbc/generator.cc
index f4d1bb9..4f9db9a 100644
--- a/upbc/generator.cc
+++ b/upbc/generator.cc
@@ -348,6 +348,12 @@
" $0 *ret = $0_new(arena);\n"
" return (ret && upb_decode(buf, size, ret, &$1, arena)) ? ret : NULL;\n"
"}\n"
+ "UPB_INLINE $0 *$0_parse_ex(const char *buf, size_t size,\n"
+ " upb_arena *arena, int options) {\n"
+ " $0 *ret = $0_new(arena);\n"
+ " return (ret && _upb_decode(buf, size, ret, &$1, arena, options))\n"
+ " ? ret : NULL;\n"
+ "}\n"
"UPB_INLINE char *$0_serialize(const $0 *msg, upb_arena *arena, size_t "
"*len) {\n"
" return upb_encode(msg, &$1, arena, len);\n"