blob: 2e690c182dfe0f1a508feb61f74e5db001c25bd0 [file] [log] [blame]
Jie Luo9f09d182017-02-09 16:43:18 -08001#region Copyright notice and license
2// Protocol Buffers - Google's data interchange format
3// Copyright 2008 Google Inc. All rights reserved.
4// https://developers.google.com/protocol-buffers/
5//
6// Redistribution and use in source and binary forms, with or without
7// modification, are permitted provided that the following conditions are
8// met:
9//
10// * Redistributions of source code must retain the above copyright
11// notice, this list of conditions and the following disclaimer.
12// * Redistributions in binary form must reproduce the above
13// copyright notice, this list of conditions and the following disclaimer
14// in the documentation and/or other materials provided with the
15// distribution.
16// * Neither the name of Google Inc. nor the names of its
17// contributors may be used to endorse or promote products derived from
18// this software without specific prior written permission.
19//
20// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
23// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
24// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
25// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
26// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
30// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31#endregion
32using NUnit.Framework;
33using System;
34using System.IO;
35
36namespace Google.Protobuf
37{
38 public class JsonTokenizerTest
39 {
40 [Test]
41 public void EmptyObjectValue()
42 {
43 AssertTokens("{}", JsonToken.StartObject, JsonToken.EndObject);
44 }
45
46 [Test]
47 public void EmptyArrayValue()
48 {
49 AssertTokens("[]", JsonToken.StartArray, JsonToken.EndArray);
50 }
51
52 [Test]
53 [TestCase("foo", "foo")]
54 [TestCase("tab\\t", "tab\t")]
55 [TestCase("line\\nfeed", "line\nfeed")]
56 [TestCase("carriage\\rreturn", "carriage\rreturn")]
57 [TestCase("back\\bspace", "back\bspace")]
58 [TestCase("form\\ffeed", "form\ffeed")]
59 [TestCase("escaped\\/slash", "escaped/slash")]
60 [TestCase("escaped\\\\backslash", "escaped\\backslash")]
61 [TestCase("escaped\\\"quote", "escaped\"quote")]
62 [TestCase("foo {}[] bar", "foo {}[] bar")]
63 [TestCase("foo\\u09aFbar", "foo\u09afbar")] // Digits, upper hex, lower hex
64 [TestCase("ab\ud800\udc00cd", "ab\ud800\udc00cd")]
65 [TestCase("ab\\ud800\\udc00cd", "ab\ud800\udc00cd")]
66 public void StringValue(string json, string expectedValue)
67 {
68 AssertTokensNoReplacement("\"" + json + "\"", JsonToken.Value(expectedValue));
69 }
70
71 // Valid surrogate pairs, with mixed escaping. These test cases can't be expressed
72 // using TestCase as they have no valid UTF-8 representation.
73 // It's unclear exactly how we should handle a mixture of escaped or not: that can't
74 // come from UTF-8 text, but could come from a .NET string. For the moment,
75 // treat it as valid in the obvious way.
76 [Test]
77 public void MixedSurrogatePairs()
78 {
79 string expected = "\ud800\udc00";
80 AssertTokens("'\\ud800\udc00'", JsonToken.Value(expected));
81 AssertTokens("'\ud800\\udc00'", JsonToken.Value(expected));
82 }
83
84 [Test]
85 public void ObjectDepth()
86 {
87 string json = "{ \"foo\": { \"x\": 1, \"y\": [ 0 ] } }";
88 var tokenizer = JsonTokenizer.FromTextReader(new StringReader(json));
89 // If we had more tests like this, I'd introduce a helper method... but for one test, it's not worth it.
90 Assert.AreEqual(0, tokenizer.ObjectDepth);
91 Assert.AreEqual(JsonToken.StartObject, tokenizer.Next());
92 Assert.AreEqual(1, tokenizer.ObjectDepth);
93 Assert.AreEqual(JsonToken.Name("foo"), tokenizer.Next());
94 Assert.AreEqual(1, tokenizer.ObjectDepth);
95 Assert.AreEqual(JsonToken.StartObject, tokenizer.Next());
96 Assert.AreEqual(2, tokenizer.ObjectDepth);
97 Assert.AreEqual(JsonToken.Name("x"), tokenizer.Next());
98 Assert.AreEqual(2, tokenizer.ObjectDepth);
99 Assert.AreEqual(JsonToken.Value(1), tokenizer.Next());
100 Assert.AreEqual(2, tokenizer.ObjectDepth);
101 Assert.AreEqual(JsonToken.Name("y"), tokenizer.Next());
102 Assert.AreEqual(2, tokenizer.ObjectDepth);
103 Assert.AreEqual(JsonToken.StartArray, tokenizer.Next());
104 Assert.AreEqual(2, tokenizer.ObjectDepth); // Depth hasn't changed in array
105 Assert.AreEqual(JsonToken.Value(0), tokenizer.Next());
106 Assert.AreEqual(2, tokenizer.ObjectDepth);
107 Assert.AreEqual(JsonToken.EndArray, tokenizer.Next());
108 Assert.AreEqual(2, tokenizer.ObjectDepth);
109 Assert.AreEqual(JsonToken.EndObject, tokenizer.Next());
110 Assert.AreEqual(1, tokenizer.ObjectDepth);
111 Assert.AreEqual(JsonToken.EndObject, tokenizer.Next());
112 Assert.AreEqual(0, tokenizer.ObjectDepth);
113 Assert.AreEqual(JsonToken.EndDocument, tokenizer.Next());
114 Assert.AreEqual(0, tokenizer.ObjectDepth);
115 }
116
117 [Test]
118 public void ObjectDepth_WithPushBack()
119 {
120 string json = "{}";
121 var tokenizer = JsonTokenizer.FromTextReader(new StringReader(json));
122 Assert.AreEqual(0, tokenizer.ObjectDepth);
123 var token = tokenizer.Next();
124 Assert.AreEqual(1, tokenizer.ObjectDepth);
125 // When we push back a "start object", we should effectively be back to the previous depth.
126 tokenizer.PushBack(token);
127 Assert.AreEqual(0, tokenizer.ObjectDepth);
128 // Read the same token again, and get back to depth 1
129 token = tokenizer.Next();
130 Assert.AreEqual(1, tokenizer.ObjectDepth);
131
132 // Now the same in reverse, with EndObject
133 token = tokenizer.Next();
134 Assert.AreEqual(0, tokenizer.ObjectDepth);
135 tokenizer.PushBack(token);
136 Assert.AreEqual(1, tokenizer.ObjectDepth);
137 tokenizer.Next();
138 Assert.AreEqual(0, tokenizer.ObjectDepth);
139 }
140
141 [Test]
142 [TestCase("embedded tab\t")]
143 [TestCase("embedded CR\r")]
144 [TestCase("embedded LF\n")]
145 [TestCase("embedded bell\u0007")]
146 [TestCase("bad escape\\a")]
147 [TestCase("incomplete escape\\")]
148 [TestCase("incomplete Unicode escape\\u000")]
149 [TestCase("invalid Unicode escape\\u000H")]
150 // Surrogate pair handling, both in raw .NET strings and escaped. We only need
151 // to detect this in strings, as non-ASCII characters anywhere other than in strings
152 // will already lead to parsing errors.
153 [TestCase("\\ud800")]
154 [TestCase("\\udc00")]
155 [TestCase("\\ud800x")]
156 [TestCase("\\udc00x")]
157 [TestCase("\\udc00\\ud800y")]
158 public void InvalidStringValue(string json)
159 {
160 AssertThrowsAfter("\"" + json + "\"");
161 }
162
163 // Tests for invalid strings that can't be expressed in attributes,
164 // as the constants can't be expressed as UTF-8 strings.
165 [Test]
166 public void InvalidSurrogatePairs()
167 {
168 AssertThrowsAfter("\"\ud800x\"");
169 AssertThrowsAfter("\"\udc00y\"");
170 AssertThrowsAfter("\"\udc00\ud800y\"");
171 }
172
173 [Test]
174 [TestCase("0", 0)]
175 [TestCase("-0", 0)] // We don't distinguish between positive and negative 0
176 [TestCase("1", 1)]
177 [TestCase("-1", -1)]
178 // From here on, assume leading sign is okay...
179 [TestCase("1.125", 1.125)]
180 [TestCase("1.0", 1)]
181 [TestCase("1e5", 100000)]
182 [TestCase("1e000000", 1)] // Weird, but not prohibited by the spec
183 [TestCase("1E5", 100000)]
184 [TestCase("1e+5", 100000)]
185 [TestCase("1E-5", 0.00001)]
186 [TestCase("123E-2", 1.23)]
187 [TestCase("123.45E3", 123450)]
188 [TestCase(" 1 ", 1)]
189 public void NumberValue(string json, double expectedValue)
190 {
191 AssertTokens(json, JsonToken.Value(expectedValue));
192 }
193
194 [Test]
195 [TestCase("00")]
196 [TestCase(".5")]
197 [TestCase("1.")]
198 [TestCase("1e")]
199 [TestCase("1e-")]
200 [TestCase("--")]
201 [TestCase("--1")]
202 [TestCase("-1.7977e308")]
203 [TestCase("1.7977e308")]
204 public void InvalidNumberValue(string json)
205 {
206 AssertThrowsAfter(json);
207 }
208
209 [Test]
210 [TestCase("nul")]
211 [TestCase("nothing")]
212 [TestCase("truth")]
213 [TestCase("fALSEhood")]
214 public void InvalidLiterals(string json)
215 {
216 AssertThrowsAfter(json);
217 }
218
219 [Test]
220 public void NullValue()
221 {
222 AssertTokens("null", JsonToken.Null);
223 }
224
225 [Test]
226 public void TrueValue()
227 {
228 AssertTokens("true", JsonToken.True);
229 }
230
231 [Test]
232 public void FalseValue()
233 {
234 AssertTokens("false", JsonToken.False);
235 }
236
237 [Test]
238 public void SimpleObject()
239 {
240 AssertTokens("{'x': 'y'}",
241 JsonToken.StartObject, JsonToken.Name("x"), JsonToken.Value("y"), JsonToken.EndObject);
242 }
Xiang Daie4794102019-02-21 11:28:50 +0800243
Jie Luo9f09d182017-02-09 16:43:18 -0800244 [Test]
245 [TestCase("[10, 20", 3)]
246 [TestCase("[10,", 2)]
247 [TestCase("[10:20]", 2)]
248 [TestCase("[", 1)]
249 [TestCase("[,", 1)]
250 [TestCase("{", 1)]
251 [TestCase("{,", 1)]
252 [TestCase("{[", 1)]
253 [TestCase("{{", 1)]
254 [TestCase("{0", 1)]
255 [TestCase("{null", 1)]
256 [TestCase("{false", 1)]
257 [TestCase("{true", 1)]
258 [TestCase("}", 0)]
259 [TestCase("]", 0)]
260 [TestCase(",", 0)]
261 [TestCase("'foo' 'bar'", 1)]
262 [TestCase(":", 0)]
263 [TestCase("'foo", 0)] // Incomplete string
264 [TestCase("{ 'foo' }", 2)]
265 [TestCase("{ x:1", 1)] // Property names must be quoted
266 [TestCase("{]", 1)]
267 [TestCase("[}", 1)]
268 [TestCase("[1,", 2)]
269 [TestCase("{'x':0]", 3)]
270 [TestCase("{ 'foo': }", 2)]
271 [TestCase("{ 'foo':'bar', }", 3)]
272 public void InvalidStructure(string json, int expectedValidTokens)
273 {
274 // Note: we don't test that the earlier tokens are exactly as expected,
275 // partly because that's hard to parameterize.
276 var reader = new StringReader(json.Replace('\'', '"'));
277 var tokenizer = JsonTokenizer.FromTextReader(reader);
278 for (int i = 0; i < expectedValidTokens; i++)
279 {
280 Assert.IsNotNull(tokenizer.Next());
281 }
282 Assert.Throws<InvalidJsonException>(() => tokenizer.Next());
283 }
284
285 [Test]
286 public void ArrayMixedType()
287 {
288 AssertTokens("[1, 'foo', null, false, true, [2], {'x':'y' }]",
289 JsonToken.StartArray,
290 JsonToken.Value(1),
291 JsonToken.Value("foo"),
292 JsonToken.Null,
293 JsonToken.False,
294 JsonToken.True,
295 JsonToken.StartArray,
296 JsonToken.Value(2),
297 JsonToken.EndArray,
298 JsonToken.StartObject,
299 JsonToken.Name("x"),
300 JsonToken.Value("y"),
301 JsonToken.EndObject,
302 JsonToken.EndArray);
303 }
304
305 [Test]
306 public void ObjectMixedType()
307 {
Xiang Daie4794102019-02-21 11:28:50 +0800308 AssertTokens(@"{'a': 1, 'b': 'bar', 'c': null, 'd': false, 'e': true,
Jie Luo9f09d182017-02-09 16:43:18 -0800309 'f': [2], 'g': {'x':'y' }}",
310 JsonToken.StartObject,
311 JsonToken.Name("a"),
312 JsonToken.Value(1),
313 JsonToken.Name("b"),
314 JsonToken.Value("bar"),
315 JsonToken.Name("c"),
316 JsonToken.Null,
317 JsonToken.Name("d"),
318 JsonToken.False,
319 JsonToken.Name("e"),
320 JsonToken.True,
321 JsonToken.Name("f"),
322 JsonToken.StartArray,
323 JsonToken.Value(2),
324 JsonToken.EndArray,
325 JsonToken.Name("g"),
326 JsonToken.StartObject,
327 JsonToken.Name("x"),
328 JsonToken.Value("y"),
329 JsonToken.EndObject,
330 JsonToken.EndObject);
331 }
332
333 [Test]
334 public void NextAfterEndDocumentThrows()
335 {
336 var tokenizer = JsonTokenizer.FromTextReader(new StringReader("null"));
337 Assert.AreEqual(JsonToken.Null, tokenizer.Next());
338 Assert.AreEqual(JsonToken.EndDocument, tokenizer.Next());
339 Assert.Throws<InvalidOperationException>(() => tokenizer.Next());
340 }
341
342 [Test]
343 public void CanPushBackEndDocument()
344 {
345 var tokenizer = JsonTokenizer.FromTextReader(new StringReader("null"));
346 Assert.AreEqual(JsonToken.Null, tokenizer.Next());
347 Assert.AreEqual(JsonToken.EndDocument, tokenizer.Next());
348 tokenizer.PushBack(JsonToken.EndDocument);
349 Assert.AreEqual(JsonToken.EndDocument, tokenizer.Next());
350 Assert.Throws<InvalidOperationException>(() => tokenizer.Next());
351 }
Xiang Daie4794102019-02-21 11:28:50 +0800352
Jie Luo9f09d182017-02-09 16:43:18 -0800353 /// <summary>
354 /// Asserts that the specified JSON is tokenized into the given sequence of tokens.
355 /// All apostrophes are first converted to double quotes, allowing any tests
356 /// that don't need to check actual apostrophe handling to use apostrophes in the JSON, avoiding
Xiang Daie4794102019-02-21 11:28:50 +0800357 /// messy string literal escaping. The "end document" token is not specified in the list of
Jie Luo9f09d182017-02-09 16:43:18 -0800358 /// expected tokens, but is implicit.
359 /// </summary>
360 private static void AssertTokens(string json, params JsonToken[] expectedTokens)
361 {
362 AssertTokensNoReplacement(json.Replace('\'', '"'), expectedTokens);
363 }
364
365 /// <summary>
366 /// Asserts that the specified JSON is tokenized into the given sequence of tokens.
367 /// Unlike <see cref="AssertTokens(string, JsonToken[])"/>, this does not perform any character
368 /// replacement on the specified JSON, and should be used when the text contains apostrophes which
Xiang Daie4794102019-02-21 11:28:50 +0800369 /// are expected to be used *as* apostrophes. The "end document" token is not specified in the list of
Jie Luo9f09d182017-02-09 16:43:18 -0800370 /// expected tokens, but is implicit.
371 /// </summary>
372 private static void AssertTokensNoReplacement(string json, params JsonToken[] expectedTokens)
373 {
374 var reader = new StringReader(json);
375 var tokenizer = JsonTokenizer.FromTextReader(reader);
376 for (int i = 0; i < expectedTokens.Length; i++)
377 {
378 var actualToken = tokenizer.Next();
379 if (actualToken == JsonToken.EndDocument)
380 {
381 Assert.Fail("Expected {0} but reached end of token stream", expectedTokens[i]);
382 }
383 Assert.AreEqual(expectedTokens[i], actualToken);
384 }
385 var finalToken = tokenizer.Next();
386 if (finalToken != JsonToken.EndDocument)
387 {
388 Assert.Fail("Expected token stream to be exhausted; received {0}", finalToken);
389 }
390 }
391
392 private static void AssertThrowsAfter(string json, params JsonToken[] expectedTokens)
393 {
394 var reader = new StringReader(json);
395 var tokenizer = JsonTokenizer.FromTextReader(reader);
396 for (int i = 0; i < expectedTokens.Length; i++)
397 {
398 var actualToken = tokenizer.Next();
399 if (actualToken == JsonToken.EndDocument)
400 {
401 Assert.Fail("Expected {0} but reached end of document", expectedTokens[i]);
402 }
403 Assert.AreEqual(expectedTokens[i], actualToken);
404 }
405 Assert.Throws<InvalidJsonException>(() => tokenizer.Next());
406 }
407 }
408}