blob: d15e84f44648c0d387f0d41e33a5d5f3e5c6f321 [file] [log] [blame]
Jon Skeete1e9d3e2022-03-28 11:31:18 +01001#region Copyright notice and license
2// Protocol Buffers - Google's data interchange format
3// Copyright 2008 Google Inc. All rights reserved.
Jon Skeete1e9d3e2022-03-28 11:31:18 +01004//
Joshua Haberman8c451772023-09-08 17:13:13 -07005// Use of this source code is governed by a BSD-style
6// license that can be found in the LICENSE file or at
7// https://developers.google.com/open-source/licenses/bsd
Jon Skeete1e9d3e2022-03-28 11:31:18 +01008#endregion
9
10using Google.Protobuf.Collections;
11using System;
Jon Skeete1e9d3e2022-03-28 11:31:18 +010012using System.IO;
Jon Skeete1e9d3e2022-03-28 11:31:18 +010013using System.Security;
14
15namespace Google.Protobuf
16{
17 /// <summary>
18 /// Reads and decodes protocol message fields.
19 /// </summary>
20 /// <remarks>
21 /// <para>
22 /// This class is generally used by generated code to read appropriate
23 /// primitives from the stream. It effectively encapsulates the lowest
24 /// levels of protocol buffer format.
25 /// </para>
26 /// <para>
27 /// Repeated fields and map fields are not handled by this class; use <see cref="RepeatedField{T}"/>
28 /// and <see cref="MapField{TKey, TValue}"/> to serialize such fields.
29 /// </para>
30 /// </remarks>
31 [SecuritySafeCritical]
32 public sealed class CodedInputStream : IDisposable
33 {
34 /// <summary>
35 /// Whether to leave the underlying stream open when disposing of this stream.
36 /// This is always true when there's no stream.
37 /// </summary>
38 private readonly bool leaveOpen;
39
40 /// <summary>
41 /// Buffer of data read from the stream or provided at construction time.
42 /// </summary>
43 private readonly byte[] buffer;
44
45 /// <summary>
46 /// The stream to read further input from, or null if the byte array buffer was provided
47 /// directly on construction, with no further data available.
48 /// </summary>
49 private readonly Stream input;
50
51 /// <summary>
52 /// The parser state is kept separately so that other parse implementations can reuse the same
53 /// parsing primitives.
54 /// </summary>
55 private ParserInternalState state;
56
57 internal const int DefaultRecursionLimit = 100;
58 internal const int DefaultSizeLimit = Int32.MaxValue;
59 internal const int BufferSize = 4096;
60
61 #region Construction
62 // Note that the checks are performed such that we don't end up checking obviously-valid things
63 // like non-null references for arrays we've just created.
64
65 /// <summary>
66 /// Creates a new CodedInputStream reading data from the given byte array.
67 /// </summary>
68 public CodedInputStream(byte[] buffer) : this(null, ProtoPreconditions.CheckNotNull(buffer, "buffer"), 0, buffer.Length, true)
69 {
70 }
71
72 /// <summary>
73 /// Creates a new <see cref="CodedInputStream"/> that reads from the given byte array slice.
74 /// </summary>
75 public CodedInputStream(byte[] buffer, int offset, int length)
76 : this(null, ProtoPreconditions.CheckNotNull(buffer, "buffer"), offset, offset + length, true)
77 {
78 if (offset < 0 || offset > buffer.Length)
79 {
80 throw new ArgumentOutOfRangeException("offset", "Offset must be within the buffer");
81 }
82 if (length < 0 || offset + length > buffer.Length)
83 {
84 throw new ArgumentOutOfRangeException("length", "Length must be non-negative and within the buffer");
85 }
86 }
87
88 /// <summary>
89 /// Creates a new <see cref="CodedInputStream"/> reading data from the given stream, which will be disposed
90 /// when the returned object is disposed.
91 /// </summary>
92 /// <param name="input">The stream to read from.</param>
93 public CodedInputStream(Stream input) : this(input, false)
94 {
95 }
96
97 /// <summary>
98 /// Creates a new <see cref="CodedInputStream"/> reading data from the given stream.
99 /// </summary>
100 /// <param name="input">The stream to read from.</param>
101 /// <param name="leaveOpen"><c>true</c> to leave <paramref name="input"/> open when the returned
102 /// <c cref="CodedInputStream"/> is disposed; <c>false</c> to dispose of the given stream when the
103 /// returned object is disposed.</param>
104 public CodedInputStream(Stream input, bool leaveOpen)
105 : this(ProtoPreconditions.CheckNotNull(input, "input"), new byte[BufferSize], 0, 0, leaveOpen)
106 {
107 }
108
109 /// <summary>
110 /// Creates a new CodedInputStream reading data from the given
111 /// stream and buffer, using the default limits.
112 /// </summary>
113 internal CodedInputStream(Stream input, byte[] buffer, int bufferPos, int bufferSize, bool leaveOpen)
114 {
115 this.input = input;
116 this.buffer = buffer;
117 this.state.bufferPos = bufferPos;
118 this.state.bufferSize = bufferSize;
119 this.state.sizeLimit = DefaultSizeLimit;
120 this.state.recursionLimit = DefaultRecursionLimit;
121 SegmentedBufferHelper.Initialize(this, out this.state.segmentedBufferHelper);
122 this.leaveOpen = leaveOpen;
123
124 this.state.currentLimit = int.MaxValue;
125 }
126
127 /// <summary>
128 /// Creates a new CodedInputStream reading data from the given
129 /// stream and buffer, using the specified limits.
130 /// </summary>
131 /// <remarks>
132 /// This chains to the version with the default limits instead of vice versa to avoid
133 /// having to check that the default values are valid every time.
134 /// </remarks>
135 internal CodedInputStream(Stream input, byte[] buffer, int bufferPos, int bufferSize, int sizeLimit, int recursionLimit, bool leaveOpen)
136 : this(input, buffer, bufferPos, bufferSize, leaveOpen)
137 {
138 if (sizeLimit <= 0)
139 {
140 throw new ArgumentOutOfRangeException("sizeLimit", "Size limit must be positive");
141 }
142 if (recursionLimit <= 0)
143 {
144 throw new ArgumentOutOfRangeException("recursionLimit!", "Recursion limit must be positive");
145 }
146 this.state.sizeLimit = sizeLimit;
147 this.state.recursionLimit = recursionLimit;
148 }
149 #endregion
150
151 /// <summary>
152 /// Creates a <see cref="CodedInputStream"/> with the specified size and recursion limits, reading
153 /// from an input stream.
154 /// </summary>
155 /// <remarks>
156 /// This method exists separately from the constructor to reduce the number of constructor overloads.
157 /// It is likely to be used considerably less frequently than the constructors, as the default limits
158 /// are suitable for most use cases.
159 /// </remarks>
160 /// <param name="input">The input stream to read from</param>
161 /// <param name="sizeLimit">The total limit of data to read from the stream.</param>
162 /// <param name="recursionLimit">The maximum recursion depth to allow while reading.</param>
163 /// <returns>A <c>CodedInputStream</c> reading from <paramref name="input"/> with the specified size
164 /// and recursion limits.</returns>
165 public static CodedInputStream CreateWithLimits(Stream input, int sizeLimit, int recursionLimit)
166 {
167 // Note: we may want an overload accepting leaveOpen
168 return new CodedInputStream(input, new byte[BufferSize], 0, 0, sizeLimit, recursionLimit, false);
169 }
170
171 /// <summary>
172 /// Returns the current position in the input stream, or the position in the input buffer
173 /// </summary>
174 public long Position
175 {
176 get
177 {
178 if (input != null)
179 {
180 return input.Position - ((state.bufferSize + state.bufferSizeAfterLimit) - state.bufferPos);
181 }
182 return state.bufferPos;
183 }
184 }
185
186 /// <summary>
187 /// Returns the last tag read, or 0 if no tags have been read or we've read beyond
188 /// the end of the stream.
189 /// </summary>
190 internal uint LastTag { get { return state.lastTag; } }
191
192 /// <summary>
193 /// Returns the size limit for this stream.
194 /// </summary>
195 /// <remarks>
196 /// This limit is applied when reading from the underlying stream, as a sanity check. It is
197 /// not applied when reading from a byte array data source without an underlying stream.
198 /// The default value is Int32.MaxValue.
199 /// </remarks>
200 /// <value>
201 /// The size limit.
202 /// </value>
203 public int SizeLimit { get { return state.sizeLimit; } }
204
205 /// <summary>
206 /// Returns the recursion limit for this stream. This limit is applied whilst reading messages,
207 /// to avoid maliciously-recursive data.
208 /// </summary>
209 /// <remarks>
210 /// The default limit is 100.
211 /// </remarks>
212 /// <value>
213 /// The recursion limit for this stream.
214 /// </value>
215 public int RecursionLimit { get { return state.recursionLimit; } }
216
217 /// <summary>
218 /// Internal-only property; when set to true, unknown fields will be discarded while parsing.
219 /// </summary>
220 internal bool DiscardUnknownFields
221 {
222 get { return state.DiscardUnknownFields; }
223 set { state.DiscardUnknownFields = value; }
224 }
225
226 /// <summary>
227 /// Internal-only property; provides extension identifiers to compatible messages while parsing.
228 /// </summary>
229 internal ExtensionRegistry ExtensionRegistry
230 {
231 get { return state.ExtensionRegistry; }
232 set { state.ExtensionRegistry = value; }
233 }
234
235 internal byte[] InternalBuffer => buffer;
236
237 internal Stream InternalInputStream => input;
238
239 internal ref ParserInternalState InternalState => ref state;
240
241 /// <summary>
242 /// Disposes of this instance, potentially closing any underlying stream.
243 /// </summary>
244 /// <remarks>
245 /// As there is no flushing to perform here, disposing of a <see cref="CodedInputStream"/> which
246 /// was constructed with the <c>leaveOpen</c> option parameter set to <c>true</c> (or one which
247 /// was constructed to read from a byte array) has no effect.
248 /// </remarks>
249 public void Dispose()
250 {
251 if (!leaveOpen)
252 {
253 input.Dispose();
254 }
255 }
256
257 #region Validation
258 /// <summary>
259 /// Verifies that the last call to ReadTag() returned tag 0 - in other words,
260 /// we've reached the end of the stream when we expected to.
261 /// </summary>
262 /// <exception cref="InvalidProtocolBufferException">The
263 /// tag read was not the one specified</exception>
264 internal void CheckReadEndOfStreamTag()
265 {
266 ParsingPrimitivesMessages.CheckReadEndOfStreamTag(ref state);
267 }
268 #endregion
269
270 #region Reading of tags etc
271
272 /// <summary>
273 /// Peeks at the next field tag. This is like calling <see cref="ReadTag"/>, but the
274 /// tag is not consumed. (So a subsequent call to <see cref="ReadTag"/> will return the
275 /// same value.)
276 /// </summary>
277 public uint PeekTag()
278 {
279 var span = new ReadOnlySpan<byte>(buffer);
280 return ParsingPrimitives.PeekTag(ref span, ref state);
281 }
282
283 /// <summary>
284 /// Reads a field tag, returning the tag of 0 for "end of stream".
285 /// </summary>
286 /// <remarks>
287 /// If this method returns 0, it doesn't necessarily mean the end of all
288 /// the data in this CodedInputStream; it may be the end of the logical stream
289 /// for an embedded message, for example.
290 /// </remarks>
291 /// <returns>The next field tag, or 0 for end of stream. (0 is never a valid tag.)</returns>
292 public uint ReadTag()
293 {
294 var span = new ReadOnlySpan<byte>(buffer);
295 return ParsingPrimitives.ParseTag(ref span, ref state);
296 }
297
298 /// <summary>
299 /// Skips the data for the field with the tag we've just read.
300 /// This should be called directly after <see cref="ReadTag"/>, when
301 /// the caller wishes to skip an unknown field.
302 /// </summary>
303 /// <remarks>
304 /// This method throws <see cref="InvalidProtocolBufferException"/> if the last-read tag was an end-group tag.
305 /// If a caller wishes to skip a group, they should skip the whole group, by calling this method after reading the
306 /// start-group tag. This behavior allows callers to call this method on any field they don't understand, correctly
307 /// resulting in an error if an end-group tag has not been paired with an earlier start-group tag.
308 /// </remarks>
309 /// <exception cref="InvalidProtocolBufferException">The last tag was an end-group tag</exception>
310 /// <exception cref="InvalidOperationException">The last read operation read to the end of the logical stream</exception>
311 public void SkipLastField()
312 {
313 var span = new ReadOnlySpan<byte>(buffer);
314 ParsingPrimitivesMessages.SkipLastField(ref span, ref state);
315 }
316
317 /// <summary>
318 /// Skip a group.
319 /// </summary>
320 internal void SkipGroup(uint startGroupTag)
321 {
322 var span = new ReadOnlySpan<byte>(buffer);
323 ParsingPrimitivesMessages.SkipGroup(ref span, ref state, startGroupTag);
324 }
325
326 /// <summary>
327 /// Reads a double field from the stream.
328 /// </summary>
329 public double ReadDouble()
330 {
331 var span = new ReadOnlySpan<byte>(buffer);
332 return ParsingPrimitives.ParseDouble(ref span, ref state);
333 }
334
335 /// <summary>
336 /// Reads a float field from the stream.
337 /// </summary>
338 public float ReadFloat()
339 {
340 var span = new ReadOnlySpan<byte>(buffer);
341 return ParsingPrimitives.ParseFloat(ref span, ref state);
342 }
343
344 /// <summary>
345 /// Reads a uint64 field from the stream.
346 /// </summary>
347 public ulong ReadUInt64()
348 {
349 return ReadRawVarint64();
350 }
351
352 /// <summary>
353 /// Reads an int64 field from the stream.
354 /// </summary>
355 public long ReadInt64()
356 {
357 return (long) ReadRawVarint64();
358 }
359
360 /// <summary>
361 /// Reads an int32 field from the stream.
362 /// </summary>
363 public int ReadInt32()
364 {
365 return (int) ReadRawVarint32();
366 }
367
368 /// <summary>
369 /// Reads a fixed64 field from the stream.
370 /// </summary>
371 public ulong ReadFixed64()
372 {
373 return ReadRawLittleEndian64();
374 }
375
376 /// <summary>
377 /// Reads a fixed32 field from the stream.
378 /// </summary>
379 public uint ReadFixed32()
380 {
381 return ReadRawLittleEndian32();
382 }
383
384 /// <summary>
385 /// Reads a bool field from the stream.
386 /// </summary>
387 public bool ReadBool()
388 {
389 return ReadRawVarint64() != 0;
390 }
391
392 /// <summary>
393 /// Reads a string field from the stream.
394 /// </summary>
395 public string ReadString()
396 {
397 var span = new ReadOnlySpan<byte>(buffer);
398 return ParsingPrimitives.ReadString(ref span, ref state);
399 }
400
401 /// <summary>
402 /// Reads an embedded message field value from the stream.
403 /// </summary>
404 public void ReadMessage(IMessage builder)
405 {
Sandy Zhang81068e82023-09-18 15:13:49 -0700406 // TODO: if the message doesn't implement IBufferMessage (and thus does not provide the InternalMergeFrom method),
Jon Skeete1e9d3e2022-03-28 11:31:18 +0100407 // what we're doing here works fine, but could be more efficient.
cui fliterac252352022-04-27 01:29:00 +0800408 // What happens is that we first initialize a ParseContext from the current coded input stream only to parse the length of the message, at which point
Jon Skeete1e9d3e2022-03-28 11:31:18 +0100409 // we will need to switch back again to CodedInputStream-based parsing (which involves copying and storing the state) to be able to
410 // invoke the legacy MergeFrom(CodedInputStream) method.
411 // For now, this inefficiency is fine, considering this is only a backward-compatibility scenario (and regenerating the code fixes it).
412 ParseContext.Initialize(buffer.AsSpan(), ref state, out ParseContext ctx);
413 try
414 {
415 ParsingPrimitivesMessages.ReadMessage(ref ctx, builder);
416 }
417 finally
418 {
419 ctx.CopyStateTo(this);
420 }
421 }
422
423 /// <summary>
424 /// Reads an embedded group field from the stream.
425 /// </summary>
426 public void ReadGroup(IMessage builder)
427 {
428 ParseContext.Initialize(this, out ParseContext ctx);
429 try
430 {
431 ParsingPrimitivesMessages.ReadGroup(ref ctx, builder);
432 }
433 finally
434 {
435 ctx.CopyStateTo(this);
436 }
437 }
438
439 /// <summary>
440 /// Reads a bytes field value from the stream.
441 /// </summary>
442 public ByteString ReadBytes()
443 {
444 var span = new ReadOnlySpan<byte>(buffer);
445 return ParsingPrimitives.ReadBytes(ref span, ref state);
446 }
447
448 /// <summary>
449 /// Reads a uint32 field value from the stream.
450 /// </summary>
451 public uint ReadUInt32()
452 {
453 return ReadRawVarint32();
454 }
455
456 /// <summary>
457 /// Reads an enum field value from the stream.
458 /// </summary>
459 public int ReadEnum()
460 {
461 // Currently just a pass-through, but it's nice to separate it logically from WriteInt32.
462 return (int) ReadRawVarint32();
463 }
464
465 /// <summary>
466 /// Reads an sfixed32 field value from the stream.
467 /// </summary>
468 public int ReadSFixed32()
469 {
470 return (int) ReadRawLittleEndian32();
471 }
472
473 /// <summary>
474 /// Reads an sfixed64 field value from the stream.
475 /// </summary>
476 public long ReadSFixed64()
477 {
478 return (long) ReadRawLittleEndian64();
479 }
480
481 /// <summary>
482 /// Reads an sint32 field value from the stream.
483 /// </summary>
484 public int ReadSInt32()
485 {
486 return ParsingPrimitives.DecodeZigZag32(ReadRawVarint32());
487 }
488
489 /// <summary>
490 /// Reads an sint64 field value from the stream.
491 /// </summary>
492 public long ReadSInt64()
493 {
494 return ParsingPrimitives.DecodeZigZag64(ReadRawVarint64());
495 }
496
497 /// <summary>
498 /// Reads a length for length-delimited data.
499 /// </summary>
500 /// <remarks>
501 /// This is internally just reading a varint, but this method exists
502 /// to make the calling code clearer.
503 /// </remarks>
504 public int ReadLength()
505 {
506 var span = new ReadOnlySpan<byte>(buffer);
507 return ParsingPrimitives.ParseLength(ref span, ref state);
508 }
509
510 /// <summary>
511 /// Peeks at the next tag in the stream. If it matches <paramref name="tag"/>,
512 /// the tag is consumed and the method returns <c>true</c>; otherwise, the
513 /// stream is left in the original position and the method returns <c>false</c>.
514 /// </summary>
515 public bool MaybeConsumeTag(uint tag)
516 {
517 var span = new ReadOnlySpan<byte>(buffer);
518 return ParsingPrimitives.MaybeConsumeTag(ref span, ref state, tag);
519 }
520
521#endregion
522
523 #region Underlying reading primitives
524
525 /// <summary>
526 /// Reads a raw Varint from the stream. If larger than 32 bits, discard the upper bits.
527 /// This method is optimised for the case where we've got lots of data in the buffer.
528 /// That means we can check the size just once, then just read directly from the buffer
529 /// without constant rechecking of the buffer length.
530 /// </summary>
531 internal uint ReadRawVarint32()
532 {
533 var span = new ReadOnlySpan<byte>(buffer);
534 return ParsingPrimitives.ParseRawVarint32(ref span, ref state);
535 }
536
537 /// <summary>
538 /// Reads a varint from the input one byte at a time, so that it does not
539 /// read any bytes after the end of the varint. If you simply wrapped the
540 /// stream in a CodedInputStream and used ReadRawVarint32(Stream)
541 /// then you would probably end up reading past the end of the varint since
542 /// CodedInputStream buffers its input.
543 /// </summary>
544 /// <param name="input"></param>
545 /// <returns></returns>
546 internal static uint ReadRawVarint32(Stream input)
547 {
548 return ParsingPrimitives.ReadRawVarint32(input);
549 }
550
551 /// <summary>
552 /// Reads a raw varint from the stream.
553 /// </summary>
554 internal ulong ReadRawVarint64()
555 {
556 var span = new ReadOnlySpan<byte>(buffer);
557 return ParsingPrimitives.ParseRawVarint64(ref span, ref state);
558 }
559
560 /// <summary>
561 /// Reads a 32-bit little-endian integer from the stream.
562 /// </summary>
563 internal uint ReadRawLittleEndian32()
564 {
565 var span = new ReadOnlySpan<byte>(buffer);
566 return ParsingPrimitives.ParseRawLittleEndian32(ref span, ref state);
567 }
568
569 /// <summary>
570 /// Reads a 64-bit little-endian integer from the stream.
571 /// </summary>
572 internal ulong ReadRawLittleEndian64()
573 {
574 var span = new ReadOnlySpan<byte>(buffer);
575 return ParsingPrimitives.ParseRawLittleEndian64(ref span, ref state);
576 }
577 #endregion
578
579 #region Internal reading and buffer management
580
581 /// <summary>
582 /// Sets currentLimit to (current position) + byteLimit. This is called
583 /// when descending into a length-delimited embedded message. The previous
584 /// limit is returned.
585 /// </summary>
586 /// <returns>The old limit.</returns>
587 internal int PushLimit(int byteLimit)
588 {
589 return SegmentedBufferHelper.PushLimit(ref state, byteLimit);
590 }
591
592 /// <summary>
593 /// Discards the current limit, returning the previous limit.
594 /// </summary>
595 internal void PopLimit(int oldLimit)
596 {
597 SegmentedBufferHelper.PopLimit(ref state, oldLimit);
598 }
599
600 /// <summary>
601 /// Returns whether or not all the data before the limit has been read.
602 /// </summary>
603 /// <returns></returns>
604 internal bool ReachedLimit
605 {
606 get
607 {
608 return SegmentedBufferHelper.IsReachedLimit(ref state);
609 }
610 }
611
612 /// <summary>
613 /// Returns true if the stream has reached the end of the input. This is the
614 /// case if either the end of the underlying input source has been reached or
615 /// the stream has reached a limit created using PushLimit.
616 /// </summary>
617 public bool IsAtEnd
618 {
619 get
620 {
621 var span = new ReadOnlySpan<byte>(buffer);
622 return SegmentedBufferHelper.IsAtEnd(ref span, ref state);
623 }
624 }
625
626 /// <summary>
Jon Skeete1e9d3e2022-03-28 11:31:18 +0100627 /// Reads a fixed size of bytes from the input.
628 /// </summary>
629 /// <exception cref="InvalidProtocolBufferException">
630 /// the end of the stream or the current limit was reached
631 /// </exception>
632 internal byte[] ReadRawBytes(int size)
633 {
634 var span = new ReadOnlySpan<byte>(buffer);
635 return ParsingPrimitives.ReadRawBytes(ref span, ref state, size);
636 }
637
638 /// <summary>
639 /// Reads a top-level message or a nested message after the limits for this message have been pushed.
640 /// (parser will proceed until the end of the current limit)
641 /// NOTE: this method needs to be public because it's invoked by the generated code - e.g. msg.MergeFrom(CodedInputStream input) method
642 /// </summary>
643 public void ReadRawMessage(IMessage message)
644 {
645 ParseContext.Initialize(this, out ParseContext ctx);
646 try
647 {
648 ParsingPrimitivesMessages.ReadRawMessage(ref ctx, message);
649 }
650 finally
651 {
652 ctx.CopyStateTo(this);
653 }
654 }
655#endregion
656 }
657}