Add initial fuzz test to upb's json encode/decode.
Unlike the wire fuzz test this only fuzzes json against a fixed message; in a subsequent improvement it can additionally fuzz over the message def itself.
PiperOrigin-RevId: 580158012
diff --git a/upb/json/BUILD b/upb/json/BUILD
index b560854..29ce7ae 100644
--- a/upb/json/BUILD
+++ b/upb/json/BUILD
@@ -20,8 +20,12 @@
copts = UPB_DEFAULT_COPTS,
visibility = ["//visibility:public"],
deps = [
+ "//upb:base",
"//upb:lex",
+ "//upb:mem",
"//upb:message",
+ "//upb:message_types",
+ "//upb:mini_table",
"//upb:port",
"//upb:reflection",
"//upb:wire",
@@ -82,3 +86,23 @@
testonly = 1,
deps = ["//:struct_proto"],
)
+
+# begin:google_only
+# cc_test(
+# name = "fuzz_test",
+# srcs = ["fuzz_test.cc"],
+# deps = [
+# ":json",
+# ":test_upb_proto",
+# ":test_upb_proto_reflection",
+# "@com_google_googletest//:gtest_main",
+# "//testing/fuzzing:fuzztest",
+# "//upb:base",
+# "//upb:mem",
+# "//upb:message",
+# "//upb:message_types",
+# "//upb:mini_table",
+# "//upb:reflection",
+# ],
+# )
+# end:google_only
diff --git a/upb/json/decode.c b/upb/json/decode.c
index 5b7fc3d..683a721 100644
--- a/upb/json/decode.c
+++ b/upb/json/decode.c
@@ -35,12 +35,25 @@
#include <inttypes.h>
#include <limits.h>
#include <math.h>
+#include <setjmp.h>
+#include <stdarg.h>
+#include <stddef.h>
+#include <stdint.h>
#include <stdlib.h>
#include <string.h>
+#include "upb/base/descriptor_constants.h"
+#include "upb/base/status.h"
+#include "upb/base/string_view.h"
#include "upb/lex/atoi.h"
#include "upb/lex/unicode.h"
+#include "upb/mem/arena.h"
+#include "upb/message/array.h"
#include "upb/message/map.h"
+#include "upb/message/message.h"
+#include "upb/message/types.h"
+#include "upb/mini_table/message.h"
+#include "upb/reflection/def.h"
#include "upb/reflection/message.h"
#include "upb/wire/encode.h"
@@ -157,6 +170,10 @@
}
static int jsondec_rawpeek(jsondec* d) {
+ if (d->ptr == d->end) {
+ jsondec_err(d, "Unexpected EOF");
+ }
+
switch (*d->ptr) {
case '{':
return JD_OBJECT;
@@ -272,7 +289,7 @@
static double jsondec_number(jsondec* d) {
const char* start = d->ptr;
- assert(jsondec_rawpeek(d) == JD_NUMBER);
+ UPB_ASSERT(jsondec_rawpeek(d) == JD_NUMBER);
/* Skip over the syntax of a number, as specified by JSON. */
if (*d->ptr == '-') d->ptr++;
@@ -307,9 +324,19 @@
* (strtod() accepts a superset of JSON syntax). */
errno = 0;
{
+ // Copy the number into a null-terminated scratch buffer since strtod
+ // expects a null-terminated string.
+ char nullz[64];
+ ptrdiff_t len = d->ptr - start;
+ if (len > (ptrdiff_t)(sizeof(nullz) - 1)) {
+ jsondec_err(d, "excessively long number");
+ }
+ memcpy(nullz, start, len);
+ nullz[len] = '\0';
+
char* end;
- double val = strtod(start, &end);
- assert(end == d->ptr);
+ double val = strtod(nullz, &end);
+ UPB_ASSERT(end - nullz == len);
/* Currently the min/max-val conformance tests fail if we check this. Does
* this mean the conformance tests are wrong or strtod() is wrong, or
@@ -450,7 +477,7 @@
}
break;
default:
- if ((unsigned char)*d->ptr < 0x20) {
+ if ((unsigned char)ch < 0x20) {
jsondec_err(d, "Invalid char in JSON string");
}
*end++ = ch;
diff --git a/upb/json/fuzz_test.cc b/upb/json/fuzz_test.cc
new file mode 100644
index 0000000..381c8eb
--- /dev/null
+++ b/upb/json/fuzz_test.cc
@@ -0,0 +1,61 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2023 Google LLC. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file or at
+// https://developers.google.com/open-source/licenses/bsd
+
+#include <cstddef>
+#include <cstring>
+#include <string_view>
+
+#include <gtest/gtest.h>
+#include "testing/fuzzing/fuzztest.h"
+#include "upb/base/status.hpp"
+#include "upb/json/decode.h"
+#include "upb/json/encode.h"
+#include "upb/json/test.upb.h"
+#include "upb/json/test.upbdefs.h"
+#include "upb/mem/arena.h"
+#include "upb/mem/arena.hpp"
+#include "upb/reflection/def.hpp"
+
+namespace {
+
+void DecodeEncodeArbitraryJson(std::string_view json) {
+ upb::Arena arena;
+ upb::Status status;
+
+ // Copy the input string to the heap. This helps asan reproduce issues that
+ // don't reproduce when passing static memory pointers. See b/309107518.
+ auto* json_heap = new char[json.size()];
+ memcpy(json_heap, json.data(), json.size());
+
+ upb::DefPool defpool;
+ upb::MessageDefPtr m(upb_test_Box_getmsgdef(defpool.ptr()));
+ EXPECT_TRUE(m.ptr() != nullptr);
+
+ upb_test_Box* box = upb_test_Box_new(arena.ptr());
+ int options = 0;
+ bool ok = upb_JsonDecode(json_heap, json.size(), box, m.ptr(), defpool.ptr(),
+ options, arena.ptr(), status.ptr());
+ delete[] json_heap;
+ if (!ok) return;
+
+ size_t size = upb_JsonEncode(box, m.ptr(), defpool.ptr(), options, nullptr, 0,
+ status.ptr());
+ char* json_buf = (char*)upb_Arena_Malloc(arena.ptr(), size + 1);
+
+ size_t written = upb_JsonEncode(box, m.ptr(), defpool.ptr(), options,
+ json_buf, size + 1, status.ptr());
+ EXPECT_EQ(written, size);
+}
+FUZZ_TEST(FuzzTest, DecodeEncodeArbitraryJson);
+
+TEST(FuzzTest, UnclosedObjectKey) { DecodeEncodeArbitraryJson("{\" "); }
+
+TEST(FuzzTest, MalformedExponent) {
+ DecodeEncodeArbitraryJson(R"({"val":0XE$})");
+}
+
+} // namespace