| #region Copyright notice and license |
| // Protocol Buffers - Google's data interchange format |
| // Copyright 2015 Google Inc. All rights reserved. |
| // https://developers.google.com/protocol-buffers/ |
| // |
| // Redistribution and use in source and binary forms, with or without |
| // modification, are permitted provided that the following conditions are |
| // met: |
| // |
| // * Redistributions of source code must retain the above copyright |
| // notice, this list of conditions and the following disclaimer. |
| // * Redistributions in binary form must reproduce the above |
| // copyright notice, this list of conditions and the following disclaimer |
| // in the documentation and/or other materials provided with the |
| // distribution. |
| // * Neither the name of Google Inc. nor the names of its |
| // contributors may be used to endorse or promote products derived from |
| // this software without specific prior written permission. |
| // |
| // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS |
| // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT |
| // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR |
| // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT |
| // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, |
| // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT |
| // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, |
| // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY |
| // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
| // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
| // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
| #endregion |
| |
| using Google.Protobuf.Reflection; |
| using Google.Protobuf.WellKnownTypes; |
| using System; |
| using System.Collections; |
| using System.Collections.Generic; |
| using System.Globalization; |
| using System.IO; |
| using System.Text; |
| using System.Text.RegularExpressions; |
| |
| namespace Google.Protobuf |
| { |
| /// <summary> |
| /// Reflection-based converter from JSON to messages. |
| /// </summary> |
| /// <remarks> |
| /// <para> |
| /// Instances of this class are thread-safe, with no mutable state. |
| /// </para> |
| /// <para> |
| /// This is a simple start to get JSON parsing working. As it's reflection-based, |
| /// it's not as quick as baking calls into generated messages - but is a simpler implementation. |
| /// (This code is generally not heavily optimized.) |
| /// </para> |
| /// </remarks> |
| public sealed class JsonParser |
| { |
| // Note: using 0-9 instead of \d to ensure no non-ASCII digits. |
| // This regex isn't a complete validator, but will remove *most* invalid input. We rely on parsing to do the rest. |
| private static readonly Regex TimestampRegex = new Regex(@"^(?<datetime>[0-9]{4}-[01][0-9]-[0-3][0-9]T[012][0-9]:[0-5][0-9]:[0-5][0-9])(?<subseconds>\.[0-9]{1,9})?(?<offset>(Z|[+-][0-1][0-9]:[0-5][0-9]))$", FrameworkPortability.CompiledRegexWhereAvailable); |
| private static readonly Regex DurationRegex = new Regex(@"^(?<sign>-)?(?<int>[0-9]{1,12})(?<subseconds>\.[0-9]{1,9})?s$", FrameworkPortability.CompiledRegexWhereAvailable); |
| private static readonly int[] SubsecondScalingFactors = { 0, 100000000, 100000000, 10000000, 1000000, 100000, 10000, 1000, 100, 10, 1 }; |
| private static readonly char[] FieldMaskPathSeparators = new[] { ',' }; |
| |
| private static readonly JsonParser defaultInstance = new JsonParser(Settings.Default); |
| |
| // TODO: Consider introducing a class containing parse state of the parser, tokenizer and depth. That would simplify these handlers |
| // and the signatures of various methods. |
| private static readonly Dictionary<string, Action<JsonParser, IMessage, JsonTokenizer>> |
| WellKnownTypeHandlers = new Dictionary<string, Action<JsonParser, IMessage, JsonTokenizer>> |
| { |
| { Timestamp.Descriptor.FullName, (parser, message, tokenizer) => MergeTimestamp(message, tokenizer.Next()) }, |
| { Duration.Descriptor.FullName, (parser, message, tokenizer) => MergeDuration(message, tokenizer.Next()) }, |
| { Value.Descriptor.FullName, (parser, message, tokenizer) => parser.MergeStructValue(message, tokenizer) }, |
| { ListValue.Descriptor.FullName, (parser, message, tokenizer) => |
| parser.MergeRepeatedField(message, message.Descriptor.Fields[ListValue.ValuesFieldNumber], tokenizer) }, |
| { Struct.Descriptor.FullName, (parser, message, tokenizer) => parser.MergeStruct(message, tokenizer) }, |
| { Any.Descriptor.FullName, (parser, message, tokenizer) => parser.MergeAny(message, tokenizer) }, |
| { FieldMask.Descriptor.FullName, (parser, message, tokenizer) => MergeFieldMask(message, tokenizer.Next()) }, |
| { Int32Value.Descriptor.FullName, MergeWrapperField }, |
| { Int64Value.Descriptor.FullName, MergeWrapperField }, |
| { UInt32Value.Descriptor.FullName, MergeWrapperField }, |
| { UInt64Value.Descriptor.FullName, MergeWrapperField }, |
| { FloatValue.Descriptor.FullName, MergeWrapperField }, |
| { DoubleValue.Descriptor.FullName, MergeWrapperField }, |
| { BytesValue.Descriptor.FullName, MergeWrapperField }, |
| { StringValue.Descriptor.FullName, MergeWrapperField }, |
| { BoolValue.Descriptor.FullName, MergeWrapperField } |
| }; |
| |
| // Convenience method to avoid having to repeat the same code multiple times in the above |
| // dictionary initialization. |
| private static void MergeWrapperField(JsonParser parser, IMessage message, JsonTokenizer tokenizer) |
| { |
| parser.MergeField(message, message.Descriptor.Fields[WrappersReflection.WrapperValueFieldNumber], tokenizer); |
| } |
| |
| /// <summary> |
| /// Returns a formatter using the default settings. |
| /// </summary> |
| public static JsonParser Default { get { return defaultInstance; } } |
| |
| private readonly Settings settings; |
| |
| /// <summary> |
| /// Creates a new formatted with the given settings. |
| /// </summary> |
| /// <param name="settings">The settings.</param> |
| public JsonParser(Settings settings) |
| { |
| this.settings = ProtoPreconditions.CheckNotNull(settings, nameof(settings)); |
| } |
| |
| /// <summary> |
| /// Parses <paramref name="json"/> and merges the information into the given message. |
| /// </summary> |
| /// <param name="message">The message to merge the JSON information into.</param> |
| /// <param name="json">The JSON to parse.</param> |
| internal void Merge(IMessage message, string json) |
| { |
| Merge(message, new StringReader(json)); |
| } |
| |
| /// <summary> |
| /// Parses JSON read from <paramref name="jsonReader"/> and merges the information into the given message. |
| /// </summary> |
| /// <param name="message">The message to merge the JSON information into.</param> |
| /// <param name="jsonReader">Reader providing the JSON to parse.</param> |
| internal void Merge(IMessage message, TextReader jsonReader) |
| { |
| var tokenizer = JsonTokenizer.FromTextReader(jsonReader); |
| Merge(message, tokenizer); |
| var lastToken = tokenizer.Next(); |
| if (lastToken != JsonToken.EndDocument) |
| { |
| throw new InvalidProtocolBufferException("Expected end of JSON after object"); |
| } |
| } |
| |
| /// <summary> |
| /// Merges the given message using data from the given tokenizer. In most cases, the next |
| /// token should be a "start object" token, but wrapper types and nullity can invalidate |
| /// that assumption. This is implemented as an LL(1) recursive descent parser over the stream |
| /// of tokens provided by the tokenizer. This token stream is assumed to be valid JSON, with the |
| /// tokenizer performing that validation - but not every token stream is valid "protobuf JSON". |
| /// </summary> |
| private void Merge(IMessage message, JsonTokenizer tokenizer) |
| { |
| if (tokenizer.ObjectDepth > settings.RecursionLimit) |
| { |
| throw InvalidProtocolBufferException.JsonRecursionLimitExceeded(); |
| } |
| if (message.Descriptor.IsWellKnownType) |
| { |
| Action<JsonParser, IMessage, JsonTokenizer> handler; |
| if (WellKnownTypeHandlers.TryGetValue(message.Descriptor.FullName, out handler)) |
| { |
| handler(this, message, tokenizer); |
| return; |
| } |
| // Well-known types with no special handling continue in the normal way. |
| } |
| var token = tokenizer.Next(); |
| if (token.Type != JsonToken.TokenType.StartObject) |
| { |
| throw new InvalidProtocolBufferException("Expected an object"); |
| } |
| var descriptor = message.Descriptor; |
| var jsonFieldMap = descriptor.Fields.ByJsonName(); |
| // All the oneof fields we've already accounted for - we can only see each of them once. |
| // The set is created lazily to avoid the overhead of creating a set for every message |
| // we parsed, when oneofs are relatively rare. |
| HashSet<OneofDescriptor> seenOneofs = null; |
| while (true) |
| { |
| token = tokenizer.Next(); |
| if (token.Type == JsonToken.TokenType.EndObject) |
| { |
| return; |
| } |
| if (token.Type != JsonToken.TokenType.Name) |
| { |
| throw new InvalidOperationException("Unexpected token type " + token.Type); |
| } |
| string name = token.StringValue; |
| FieldDescriptor field; |
| if (jsonFieldMap.TryGetValue(name, out field)) |
| { |
| if (field.ContainingOneof != null) |
| { |
| if (seenOneofs == null) |
| { |
| seenOneofs = new HashSet<OneofDescriptor>(); |
| } |
| if (!seenOneofs.Add(field.ContainingOneof)) |
| { |
| throw new InvalidProtocolBufferException($"Multiple values specified for oneof {field.ContainingOneof.Name}"); |
| } |
| } |
| MergeField(message, field, tokenizer); |
| } |
| else |
| { |
| if (settings.IgnoreUnknownFields) |
| { |
| tokenizer.SkipValue(); |
| } |
| else |
| { |
| throw new InvalidProtocolBufferException("Unknown field: " + name); |
| } |
| } |
| } |
| } |
| |
| private void MergeField(IMessage message, FieldDescriptor field, JsonTokenizer tokenizer) |
| { |
| var token = tokenizer.Next(); |
| if (token.Type == JsonToken.TokenType.Null) |
| { |
| // Clear the field if we see a null token, unless it's for a singular field of type |
| // google.protobuf.Value. |
| // Note: different from Java API, which just ignores it. |
| // TODO: Bring it more in line? Discuss... |
| if (field.IsMap || field.IsRepeated || !IsGoogleProtobufValueField(field)) |
| { |
| field.Accessor.Clear(message); |
| return; |
| } |
| } |
| tokenizer.PushBack(token); |
| |
| if (field.IsMap) |
| { |
| MergeMapField(message, field, tokenizer); |
| } |
| else if (field.IsRepeated) |
| { |
| MergeRepeatedField(message, field, tokenizer); |
| } |
| else |
| { |
| var value = ParseSingleValue(field, tokenizer); |
| field.Accessor.SetValue(message, value); |
| } |
| } |
| |
| private void MergeRepeatedField(IMessage message, FieldDescriptor field, JsonTokenizer tokenizer) |
| { |
| var token = tokenizer.Next(); |
| if (token.Type != JsonToken.TokenType.StartArray) |
| { |
| throw new InvalidProtocolBufferException("Repeated field value was not an array. Token type: " + token.Type); |
| } |
| |
| IList list = (IList) field.Accessor.GetValue(message); |
| while (true) |
| { |
| token = tokenizer.Next(); |
| if (token.Type == JsonToken.TokenType.EndArray) |
| { |
| return; |
| } |
| tokenizer.PushBack(token); |
| object value = ParseSingleValue(field, tokenizer); |
| if (value == null) |
| { |
| throw new InvalidProtocolBufferException("Repeated field elements cannot be null"); |
| } |
| list.Add(value); |
| } |
| } |
| |
| private void MergeMapField(IMessage message, FieldDescriptor field, JsonTokenizer tokenizer) |
| { |
| // Map fields are always objects, even if the values are well-known types: ParseSingleValue handles those. |
| var token = tokenizer.Next(); |
| if (token.Type != JsonToken.TokenType.StartObject) |
| { |
| throw new InvalidProtocolBufferException("Expected an object to populate a map"); |
| } |
| |
| var type = field.MessageType; |
| var keyField = type.FindFieldByNumber(1); |
| var valueField = type.FindFieldByNumber(2); |
| if (keyField == null || valueField == null) |
| { |
| throw new InvalidProtocolBufferException("Invalid map field: " + field.FullName); |
| } |
| IDictionary dictionary = (IDictionary) field.Accessor.GetValue(message); |
| |
| while (true) |
| { |
| token = tokenizer.Next(); |
| if (token.Type == JsonToken.TokenType.EndObject) |
| { |
| return; |
| } |
| object key = ParseMapKey(keyField, token.StringValue); |
| object value = ParseSingleValue(valueField, tokenizer); |
| if (value == null) |
| { |
| throw new InvalidProtocolBufferException("Map values must not be null"); |
| } |
| dictionary[key] = value; |
| } |
| } |
| |
| private static bool IsGoogleProtobufValueField(FieldDescriptor field) |
| { |
| return field.FieldType == FieldType.Message && |
| field.MessageType.FullName == Value.Descriptor.FullName; |
| } |
| |
| private object ParseSingleValue(FieldDescriptor field, JsonTokenizer tokenizer) |
| { |
| var token = tokenizer.Next(); |
| if (token.Type == JsonToken.TokenType.Null) |
| { |
| // TODO: In order to support dynamic messages, we should really build this up |
| // dynamically. |
| if (IsGoogleProtobufValueField(field)) |
| { |
| return Value.ForNull(); |
| } |
| return null; |
| } |
| |
| var fieldType = field.FieldType; |
| if (fieldType == FieldType.Message) |
| { |
| // Parse wrapper types as their constituent types. |
| // TODO: What does this mean for null? |
| if (field.MessageType.IsWrapperType) |
| { |
| field = field.MessageType.Fields[WrappersReflection.WrapperValueFieldNumber]; |
| fieldType = field.FieldType; |
| } |
| else |
| { |
| // TODO: Merge the current value in message? (Public API currently doesn't make this relevant as we don't expose merging.) |
| tokenizer.PushBack(token); |
| IMessage subMessage = NewMessageForField(field); |
| Merge(subMessage, tokenizer); |
| return subMessage; |
| } |
| } |
| |
| switch (token.Type) |
| { |
| case JsonToken.TokenType.True: |
| case JsonToken.TokenType.False: |
| if (fieldType == FieldType.Bool) |
| { |
| return token.Type == JsonToken.TokenType.True; |
| } |
| // Fall through to "we don't support this type for this case"; could duplicate the behaviour of the default |
| // case instead, but this way we'd only need to change one place. |
| goto default; |
| case JsonToken.TokenType.StringValue: |
| return ParseSingleStringValue(field, token.StringValue); |
| // Note: not passing the number value itself here, as we may end up storing the string value in the token too. |
| case JsonToken.TokenType.Number: |
| return ParseSingleNumberValue(field, token); |
| case JsonToken.TokenType.Null: |
| throw new NotImplementedException("Haven't worked out what to do for null yet"); |
| default: |
| throw new InvalidProtocolBufferException("Unsupported JSON token type " + token.Type + " for field type " + fieldType); |
| } |
| } |
| |
| /// <summary> |
| /// Parses <paramref name="json"/> into a new message. |
| /// </summary> |
| /// <typeparam name="T">The type of message to create.</typeparam> |
| /// <param name="json">The JSON to parse.</param> |
| /// <exception cref="InvalidJsonException">The JSON does not comply with RFC 7159</exception> |
| /// <exception cref="InvalidProtocolBufferException">The JSON does not represent a Protocol Buffers message correctly</exception> |
| public T Parse<T>(string json) where T : IMessage, new() |
| { |
| ProtoPreconditions.CheckNotNull(json, nameof(json)); |
| return Parse<T>(new StringReader(json)); |
| } |
| |
| /// <summary> |
| /// Parses JSON read from <paramref name="jsonReader"/> into a new message. |
| /// </summary> |
| /// <typeparam name="T">The type of message to create.</typeparam> |
| /// <param name="jsonReader">Reader providing the JSON to parse.</param> |
| /// <exception cref="InvalidJsonException">The JSON does not comply with RFC 7159</exception> |
| /// <exception cref="InvalidProtocolBufferException">The JSON does not represent a Protocol Buffers message correctly</exception> |
| public T Parse<T>(TextReader jsonReader) where T : IMessage, new() |
| { |
| ProtoPreconditions.CheckNotNull(jsonReader, nameof(jsonReader)); |
| T message = new T(); |
| Merge(message, jsonReader); |
| return message; |
| } |
| |
| /// <summary> |
| /// Parses <paramref name="json"/> into a new message. |
| /// </summary> |
| /// <param name="json">The JSON to parse.</param> |
| /// <param name="descriptor">Descriptor of message type to parse.</param> |
| /// <exception cref="InvalidJsonException">The JSON does not comply with RFC 7159</exception> |
| /// <exception cref="InvalidProtocolBufferException">The JSON does not represent a Protocol Buffers message correctly</exception> |
| public IMessage Parse(string json, MessageDescriptor descriptor) |
| { |
| ProtoPreconditions.CheckNotNull(json, nameof(json)); |
| ProtoPreconditions.CheckNotNull(descriptor, nameof(descriptor)); |
| return Parse(new StringReader(json), descriptor); |
| } |
| |
| /// <summary> |
| /// Parses JSON read from <paramref name="jsonReader"/> into a new message. |
| /// </summary> |
| /// <param name="jsonReader">Reader providing the JSON to parse.</param> |
| /// <param name="descriptor">Descriptor of message type to parse.</param> |
| /// <exception cref="InvalidJsonException">The JSON does not comply with RFC 7159</exception> |
| /// <exception cref="InvalidProtocolBufferException">The JSON does not represent a Protocol Buffers message correctly</exception> |
| public IMessage Parse(TextReader jsonReader, MessageDescriptor descriptor) |
| { |
| ProtoPreconditions.CheckNotNull(jsonReader, nameof(jsonReader)); |
| ProtoPreconditions.CheckNotNull(descriptor, nameof(descriptor)); |
| IMessage message = descriptor.Parser.CreateTemplate(); |
| Merge(message, jsonReader); |
| return message; |
| } |
| |
| private void MergeStructValue(IMessage message, JsonTokenizer tokenizer) |
| { |
| var firstToken = tokenizer.Next(); |
| var fields = message.Descriptor.Fields; |
| switch (firstToken.Type) |
| { |
| case JsonToken.TokenType.Null: |
| fields[Value.NullValueFieldNumber].Accessor.SetValue(message, 0); |
| return; |
| case JsonToken.TokenType.StringValue: |
| fields[Value.StringValueFieldNumber].Accessor.SetValue(message, firstToken.StringValue); |
| return; |
| case JsonToken.TokenType.Number: |
| fields[Value.NumberValueFieldNumber].Accessor.SetValue(message, firstToken.NumberValue); |
| return; |
| case JsonToken.TokenType.False: |
| case JsonToken.TokenType.True: |
| fields[Value.BoolValueFieldNumber].Accessor.SetValue(message, firstToken.Type == JsonToken.TokenType.True); |
| return; |
| case JsonToken.TokenType.StartObject: |
| { |
| var field = fields[Value.StructValueFieldNumber]; |
| var structMessage = NewMessageForField(field); |
| tokenizer.PushBack(firstToken); |
| Merge(structMessage, tokenizer); |
| field.Accessor.SetValue(message, structMessage); |
| return; |
| } |
| case JsonToken.TokenType.StartArray: |
| { |
| var field = fields[Value.ListValueFieldNumber]; |
| var list = NewMessageForField(field); |
| tokenizer.PushBack(firstToken); |
| Merge(list, tokenizer); |
| field.Accessor.SetValue(message, list); |
| return; |
| } |
| default: |
| throw new InvalidOperationException("Unexpected token type: " + firstToken.Type); |
| } |
| } |
| |
| private void MergeStruct(IMessage message, JsonTokenizer tokenizer) |
| { |
| var token = tokenizer.Next(); |
| if (token.Type != JsonToken.TokenType.StartObject) |
| { |
| throw new InvalidProtocolBufferException("Expected object value for Struct"); |
| } |
| tokenizer.PushBack(token); |
| |
| var field = message.Descriptor.Fields[Struct.FieldsFieldNumber]; |
| MergeMapField(message, field, tokenizer); |
| } |
| |
| private void MergeAny(IMessage message, JsonTokenizer tokenizer) |
| { |
| // Record the token stream until we see the @type property. At that point, we can take the value, consult |
| // the type registry for the relevant message, and replay the stream, omitting the @type property. |
| var tokens = new List<JsonToken>(); |
| |
| var token = tokenizer.Next(); |
| if (token.Type != JsonToken.TokenType.StartObject) |
| { |
| throw new InvalidProtocolBufferException("Expected object value for Any"); |
| } |
| int typeUrlObjectDepth = tokenizer.ObjectDepth; |
| |
| // The check for the property depth protects us from nested Any values which occur before the type URL |
| // for *this* Any. |
| while (token.Type != JsonToken.TokenType.Name || |
| token.StringValue != JsonFormatter.AnyTypeUrlField || |
| tokenizer.ObjectDepth != typeUrlObjectDepth) |
| { |
| tokens.Add(token); |
| token = tokenizer.Next(); |
| |
| if (tokenizer.ObjectDepth < typeUrlObjectDepth) |
| { |
| throw new InvalidProtocolBufferException("Any message with no @type"); |
| } |
| } |
| |
| // Don't add the @type property or its value to the recorded token list |
| token = tokenizer.Next(); |
| if (token.Type != JsonToken.TokenType.StringValue) |
| { |
| throw new InvalidProtocolBufferException("Expected string value for Any.@type"); |
| } |
| string typeUrl = token.StringValue; |
| string typeName = Any.GetTypeName(typeUrl); |
| |
| MessageDescriptor descriptor = settings.TypeRegistry.Find(typeName); |
| if (descriptor == null) |
| { |
| throw new InvalidOperationException($"Type registry has no descriptor for type name '{typeName}'"); |
| } |
| |
| // Now replay the token stream we've already read and anything that remains of the object, just parsing it |
| // as normal. Our original tokenizer should end up at the end of the object. |
| var replay = JsonTokenizer.FromReplayedTokens(tokens, tokenizer); |
| var body = descriptor.Parser.CreateTemplate(); |
| if (descriptor.IsWellKnownType) |
| { |
| MergeWellKnownTypeAnyBody(body, replay); |
| } |
| else |
| { |
| Merge(body, replay); |
| } |
| var data = body.ToByteString(); |
| |
| // Now that we have the message data, we can pack it into an Any (the message received as a parameter). |
| message.Descriptor.Fields[Any.TypeUrlFieldNumber].Accessor.SetValue(message, typeUrl); |
| message.Descriptor.Fields[Any.ValueFieldNumber].Accessor.SetValue(message, data); |
| } |
| |
| // Well-known types end up in a property called "value" in the JSON. As there's no longer a @type property |
| // in the given JSON token stream, we should *only* have tokens of start-object, name("value"), the value |
| // itself, and then end-object. |
| private void MergeWellKnownTypeAnyBody(IMessage body, JsonTokenizer tokenizer) |
| { |
| var token = tokenizer.Next(); // Definitely start-object; checked in previous method |
| token = tokenizer.Next(); |
| // TODO: What about an absent Int32Value, for example? |
| if (token.Type != JsonToken.TokenType.Name || token.StringValue != JsonFormatter.AnyWellKnownTypeValueField) |
| { |
| throw new InvalidProtocolBufferException($"Expected '{JsonFormatter.AnyWellKnownTypeValueField}' property for well-known type Any body"); |
| } |
| Merge(body, tokenizer); |
| token = tokenizer.Next(); |
| if (token.Type != JsonToken.TokenType.EndObject) |
| { |
| throw new InvalidProtocolBufferException($"Expected end-object token after @type/value for well-known type"); |
| } |
| } |
| |
| #region Utility methods which don't depend on the state (or settings) of the parser. |
| private static object ParseMapKey(FieldDescriptor field, string keyText) |
| { |
| switch (field.FieldType) |
| { |
| case FieldType.Bool: |
| if (keyText == "true") |
| { |
| return true; |
| } |
| if (keyText == "false") |
| { |
| return false; |
| } |
| throw new InvalidProtocolBufferException("Invalid string for bool map key: " + keyText); |
| case FieldType.String: |
| return keyText; |
| case FieldType.Int32: |
| case FieldType.SInt32: |
| case FieldType.SFixed32: |
| return ParseNumericString(keyText, int.Parse); |
| case FieldType.UInt32: |
| case FieldType.Fixed32: |
| return ParseNumericString(keyText, uint.Parse); |
| case FieldType.Int64: |
| case FieldType.SInt64: |
| case FieldType.SFixed64: |
| return ParseNumericString(keyText, long.Parse); |
| case FieldType.UInt64: |
| case FieldType.Fixed64: |
| return ParseNumericString(keyText, ulong.Parse); |
| default: |
| throw new InvalidProtocolBufferException("Invalid field type for map: " + field.FieldType); |
| } |
| } |
| |
| private static object ParseSingleNumberValue(FieldDescriptor field, JsonToken token) |
| { |
| double value = token.NumberValue; |
| checked |
| { |
| try |
| { |
| switch (field.FieldType) |
| { |
| case FieldType.Int32: |
| case FieldType.SInt32: |
| case FieldType.SFixed32: |
| CheckInteger(value); |
| return (int) value; |
| case FieldType.UInt32: |
| case FieldType.Fixed32: |
| CheckInteger(value); |
| return (uint) value; |
| case FieldType.Int64: |
| case FieldType.SInt64: |
| case FieldType.SFixed64: |
| CheckInteger(value); |
| return (long) value; |
| case FieldType.UInt64: |
| case FieldType.Fixed64: |
| CheckInteger(value); |
| return (ulong) value; |
| case FieldType.Double: |
| return value; |
| case FieldType.Float: |
| if (double.IsNaN(value)) |
| { |
| return float.NaN; |
| } |
| if (value > float.MaxValue || value < float.MinValue) |
| { |
| if (double.IsPositiveInfinity(value)) |
| { |
| return float.PositiveInfinity; |
| } |
| if (double.IsNegativeInfinity(value)) |
| { |
| return float.NegativeInfinity; |
| } |
| throw new InvalidProtocolBufferException($"Value out of range: {value}"); |
| } |
| return (float) value; |
| case FieldType.Enum: |
| CheckInteger(value); |
| // Just return it as an int, and let the CLR convert it. |
| // Note that we deliberately don't check that it's a known value. |
| return (int) value; |
| default: |
| throw new InvalidProtocolBufferException($"Unsupported conversion from JSON number for field type {field.FieldType}"); |
| } |
| } |
| catch (OverflowException) |
| { |
| throw new InvalidProtocolBufferException($"Value out of range: {value}"); |
| } |
| } |
| } |
| |
| private static void CheckInteger(double value) |
| { |
| if (double.IsInfinity(value) || double.IsNaN(value)) |
| { |
| throw new InvalidProtocolBufferException($"Value not an integer: {value}"); |
| } |
| if (value != Math.Floor(value)) |
| { |
| throw new InvalidProtocolBufferException($"Value not an integer: {value}"); |
| } |
| } |
| |
| private static object ParseSingleStringValue(FieldDescriptor field, string text) |
| { |
| switch (field.FieldType) |
| { |
| case FieldType.String: |
| return text; |
| case FieldType.Bytes: |
| try |
| { |
| return ByteString.FromBase64(text); |
| } |
| catch (FormatException e) |
| { |
| throw InvalidProtocolBufferException.InvalidBase64(e); |
| } |
| case FieldType.Int32: |
| case FieldType.SInt32: |
| case FieldType.SFixed32: |
| return ParseNumericString(text, int.Parse); |
| case FieldType.UInt32: |
| case FieldType.Fixed32: |
| return ParseNumericString(text, uint.Parse); |
| case FieldType.Int64: |
| case FieldType.SInt64: |
| case FieldType.SFixed64: |
| return ParseNumericString(text, long.Parse); |
| case FieldType.UInt64: |
| case FieldType.Fixed64: |
| return ParseNumericString(text, ulong.Parse); |
| case FieldType.Double: |
| double d = ParseNumericString(text, double.Parse); |
| ValidateInfinityAndNan(text, double.IsPositiveInfinity(d), double.IsNegativeInfinity(d), double.IsNaN(d)); |
| return d; |
| case FieldType.Float: |
| float f = ParseNumericString(text, float.Parse); |
| ValidateInfinityAndNan(text, float.IsPositiveInfinity(f), float.IsNegativeInfinity(f), float.IsNaN(f)); |
| return f; |
| case FieldType.Enum: |
| var enumValue = field.EnumType.FindValueByName(text); |
| if (enumValue == null) |
| { |
| throw new InvalidProtocolBufferException($"Invalid enum value: {text} for enum type: {field.EnumType.FullName}"); |
| } |
| // Just return it as an int, and let the CLR convert it. |
| return enumValue.Number; |
| default: |
| throw new InvalidProtocolBufferException($"Unsupported conversion from JSON string for field type {field.FieldType}"); |
| } |
| } |
| |
| /// <summary> |
| /// Creates a new instance of the message type for the given field. |
| /// </summary> |
| private static IMessage NewMessageForField(FieldDescriptor field) |
| { |
| return field.MessageType.Parser.CreateTemplate(); |
| } |
| |
| private static T ParseNumericString<T>(string text, Func<string, NumberStyles, IFormatProvider, T> parser) |
| { |
| // Can't prohibit this with NumberStyles. |
| if (text.StartsWith("+")) |
| { |
| throw new InvalidProtocolBufferException($"Invalid numeric value: {text}"); |
| } |
| if (text.StartsWith("0") && text.Length > 1) |
| { |
| if (text[1] >= '0' && text[1] <= '9') |
| { |
| throw new InvalidProtocolBufferException($"Invalid numeric value: {text}"); |
| } |
| } |
| else if (text.StartsWith("-0") && text.Length > 2) |
| { |
| if (text[2] >= '0' && text[2] <= '9') |
| { |
| throw new InvalidProtocolBufferException($"Invalid numeric value: {text}"); |
| } |
| } |
| try |
| { |
| return parser(text, NumberStyles.AllowLeadingSign | NumberStyles.AllowDecimalPoint | NumberStyles.AllowExponent, CultureInfo.InvariantCulture); |
| } |
| catch (FormatException) |
| { |
| throw new InvalidProtocolBufferException($"Invalid numeric value for type: {text}"); |
| } |
| catch (OverflowException) |
| { |
| throw new InvalidProtocolBufferException($"Value out of range: {text}"); |
| } |
| } |
| |
| /// <summary> |
| /// Checks that any infinite/NaN values originated from the correct text. |
| /// This corrects the lenient whitespace handling of double.Parse/float.Parse, as well as the |
| /// way that Mono parses out-of-range values as infinity. |
| /// </summary> |
| private static void ValidateInfinityAndNan(string text, bool isPositiveInfinity, bool isNegativeInfinity, bool isNaN) |
| { |
| if ((isPositiveInfinity && text != "Infinity") || |
| (isNegativeInfinity && text != "-Infinity") || |
| (isNaN && text != "NaN")) |
| { |
| throw new InvalidProtocolBufferException($"Invalid numeric value: {text}"); |
| } |
| } |
| |
| private static void MergeTimestamp(IMessage message, JsonToken token) |
| { |
| if (token.Type != JsonToken.TokenType.StringValue) |
| { |
| throw new InvalidProtocolBufferException("Expected string value for Timestamp"); |
| } |
| var match = TimestampRegex.Match(token.StringValue); |
| if (!match.Success) |
| { |
| throw new InvalidProtocolBufferException($"Invalid Timestamp value: {token.StringValue}"); |
| } |
| var dateTime = match.Groups["datetime"].Value; |
| var subseconds = match.Groups["subseconds"].Value; |
| var offset = match.Groups["offset"].Value; |
| |
| try |
| { |
| DateTime parsed = DateTime.ParseExact( |
| dateTime, |
| "yyyy-MM-dd'T'HH:mm:ss", |
| CultureInfo.InvariantCulture, |
| DateTimeStyles.AssumeUniversal | DateTimeStyles.AdjustToUniversal); |
| // TODO: It would be nice not to have to create all these objects... easy to optimize later though. |
| Timestamp timestamp = Timestamp.FromDateTime(parsed); |
| int nanosToAdd = 0; |
| if (subseconds != "") |
| { |
| // This should always work, as we've got 1-9 digits. |
| int parsedFraction = int.Parse(subseconds.Substring(1), CultureInfo.InvariantCulture); |
| nanosToAdd = parsedFraction * SubsecondScalingFactors[subseconds.Length]; |
| } |
| int secondsToAdd = 0; |
| if (offset != "Z") |
| { |
| // This is the amount we need to *subtract* from the local time to get to UTC - hence - => +1 and vice versa. |
| int sign = offset[0] == '-' ? 1 : -1; |
| int hours = int.Parse(offset.Substring(1, 2), CultureInfo.InvariantCulture); |
| int minutes = int.Parse(offset.Substring(4, 2)); |
| int totalMinutes = hours * 60 + minutes; |
| if (totalMinutes > 18 * 60) |
| { |
| throw new InvalidProtocolBufferException("Invalid Timestamp value: " + token.StringValue); |
| } |
| if (totalMinutes == 0 && sign == 1) |
| { |
| // This is an offset of -00:00, which means "unknown local offset". It makes no sense for a timestamp. |
| throw new InvalidProtocolBufferException("Invalid Timestamp value: " + token.StringValue); |
| } |
| // We need to *subtract* the offset from local time to get UTC. |
| secondsToAdd = sign * totalMinutes * 60; |
| } |
| // Ensure we've got the right signs. Currently unnecessary, but easy to do. |
| if (secondsToAdd < 0 && nanosToAdd > 0) |
| { |
| secondsToAdd++; |
| nanosToAdd = nanosToAdd - Duration.NanosecondsPerSecond; |
| } |
| if (secondsToAdd != 0 || nanosToAdd != 0) |
| { |
| timestamp += new Duration { Nanos = nanosToAdd, Seconds = secondsToAdd }; |
| // The resulting timestamp after offset change would be out of our expected range. Currently the Timestamp message doesn't validate this |
| // anywhere, but we shouldn't parse it. |
| if (timestamp.Seconds < Timestamp.UnixSecondsAtBclMinValue || timestamp.Seconds > Timestamp.UnixSecondsAtBclMaxValue) |
| { |
| throw new InvalidProtocolBufferException("Invalid Timestamp value: " + token.StringValue); |
| } |
| } |
| message.Descriptor.Fields[Timestamp.SecondsFieldNumber].Accessor.SetValue(message, timestamp.Seconds); |
| message.Descriptor.Fields[Timestamp.NanosFieldNumber].Accessor.SetValue(message, timestamp.Nanos); |
| } |
| catch (FormatException) |
| { |
| throw new InvalidProtocolBufferException("Invalid Timestamp value: " + token.StringValue); |
| } |
| } |
| |
| private static void MergeDuration(IMessage message, JsonToken token) |
| { |
| if (token.Type != JsonToken.TokenType.StringValue) |
| { |
| throw new InvalidProtocolBufferException("Expected string value for Duration"); |
| } |
| var match = DurationRegex.Match(token.StringValue); |
| if (!match.Success) |
| { |
| throw new InvalidProtocolBufferException("Invalid Duration value: " + token.StringValue); |
| } |
| var sign = match.Groups["sign"].Value; |
| var secondsText = match.Groups["int"].Value; |
| // Prohibit leading insignficant zeroes |
| if (secondsText[0] == '0' && secondsText.Length > 1) |
| { |
| throw new InvalidProtocolBufferException("Invalid Duration value: " + token.StringValue); |
| } |
| var subseconds = match.Groups["subseconds"].Value; |
| var multiplier = sign == "-" ? -1 : 1; |
| |
| try |
| { |
| long seconds = long.Parse(secondsText, CultureInfo.InvariantCulture) * multiplier; |
| int nanos = 0; |
| if (subseconds != "") |
| { |
| // This should always work, as we've got 1-9 digits. |
| int parsedFraction = int.Parse(subseconds.Substring(1)); |
| nanos = parsedFraction * SubsecondScalingFactors[subseconds.Length] * multiplier; |
| } |
| if (!Duration.IsNormalized(seconds, nanos)) |
| { |
| throw new InvalidProtocolBufferException($"Invalid Duration value: {token.StringValue}"); |
| } |
| message.Descriptor.Fields[Duration.SecondsFieldNumber].Accessor.SetValue(message, seconds); |
| message.Descriptor.Fields[Duration.NanosFieldNumber].Accessor.SetValue(message, nanos); |
| } |
| catch (FormatException) |
| { |
| throw new InvalidProtocolBufferException($"Invalid Duration value: {token.StringValue}"); |
| } |
| } |
| |
| private static void MergeFieldMask(IMessage message, JsonToken token) |
| { |
| if (token.Type != JsonToken.TokenType.StringValue) |
| { |
| throw new InvalidProtocolBufferException("Expected string value for FieldMask"); |
| } |
| // TODO: Do we *want* to remove empty entries? Probably okay to treat "" as "no paths", but "foo,,bar"? |
| string[] jsonPaths = token.StringValue.Split(FieldMaskPathSeparators, StringSplitOptions.RemoveEmptyEntries); |
| IList messagePaths = (IList) message.Descriptor.Fields[FieldMask.PathsFieldNumber].Accessor.GetValue(message); |
| foreach (var path in jsonPaths) |
| { |
| messagePaths.Add(ToSnakeCase(path)); |
| } |
| } |
| |
| // Ported from src/google/protobuf/util/internal/utility.cc |
| private static string ToSnakeCase(string text) |
| { |
| var builder = new StringBuilder(text.Length * 2); |
| // Note: this is probably unnecessary now, but currently retained to be as close as possible to the |
| // C++, whilst still throwing an exception on underscores. |
| bool wasNotUnderscore = false; // Initialize to false for case 1 (below) |
| bool wasNotCap = false; |
| |
| for (int i = 0; i < text.Length; i++) |
| { |
| char c = text[i]; |
| if (c >= 'A' && c <= 'Z') // ascii_isupper |
| { |
| // Consider when the current character B is capitalized: |
| // 1) At beginning of input: "B..." => "b..." |
| // (e.g. "Biscuit" => "biscuit") |
| // 2) Following a lowercase: "...aB..." => "...a_b..." |
| // (e.g. "gBike" => "g_bike") |
| // 3) At the end of input: "...AB" => "...ab" |
| // (e.g. "GoogleLAB" => "google_lab") |
| // 4) Followed by a lowercase: "...ABc..." => "...a_bc..." |
| // (e.g. "GBike" => "g_bike") |
| if (wasNotUnderscore && // case 1 out |
| (wasNotCap || // case 2 in, case 3 out |
| (i + 1 < text.Length && // case 3 out |
| (text[i + 1] >= 'a' && text[i + 1] <= 'z')))) // ascii_islower(text[i + 1]) |
| { // case 4 in |
| // We add an underscore for case 2 and case 4. |
| builder.Append('_'); |
| } |
| // ascii_tolower, but we already know that c *is* an upper case ASCII character... |
| builder.Append((char) (c + 'a' - 'A')); |
| wasNotUnderscore = true; |
| wasNotCap = false; |
| } |
| else |
| { |
| builder.Append(c); |
| if (c == '_') |
| { |
| throw new InvalidProtocolBufferException($"Invalid field mask: {text}"); |
| } |
| wasNotUnderscore = true; |
| wasNotCap = true; |
| } |
| } |
| return builder.ToString(); |
| } |
| #endregion |
| |
| /// <summary> |
| /// Settings controlling JSON parsing. |
| /// </summary> |
| public sealed class Settings |
| { |
| /// <summary> |
| /// Default settings, as used by <see cref="JsonParser.Default"/>. This has the same default |
| /// recursion limit as <see cref="CodedInputStream"/>, and an empty type registry. |
| /// </summary> |
| public static Settings Default { get; } |
| |
| // Workaround for the Mono compiler complaining about XML comments not being on |
| // valid language elements. |
| static Settings() |
| { |
| Default = new Settings(CodedInputStream.DefaultRecursionLimit); |
| } |
| |
| /// <summary> |
| /// The maximum depth of messages to parse. Note that this limit only applies to parsing |
| /// messages, not collections - so a message within a collection within a message only counts as |
| /// depth 2, not 3. |
| /// </summary> |
| public int RecursionLimit { get; } |
| |
| /// <summary> |
| /// The type registry used to parse <see cref="Any"/> messages. |
| /// </summary> |
| public TypeRegistry TypeRegistry { get; } |
| |
| /// <summary> |
| /// Whether the parser should ignore unknown fields (<c>true</c>) or throw an exception when |
| /// they are encountered (<c>false</c>). |
| /// </summary> |
| public bool IgnoreUnknownFields { get; } |
| |
| private Settings(int recursionLimit, TypeRegistry typeRegistry, bool ignoreUnknownFields) |
| { |
| RecursionLimit = recursionLimit; |
| TypeRegistry = ProtoPreconditions.CheckNotNull(typeRegistry, nameof(typeRegistry)); |
| IgnoreUnknownFields = ignoreUnknownFields; |
| } |
| |
| /// <summary> |
| /// Creates a new <see cref="Settings"/> object with the specified recursion limit. |
| /// </summary> |
| /// <param name="recursionLimit">The maximum depth of messages to parse</param> |
| public Settings(int recursionLimit) : this(recursionLimit, TypeRegistry.Empty) |
| { |
| } |
| |
| /// <summary> |
| /// Creates a new <see cref="Settings"/> object with the specified recursion limit and type registry. |
| /// </summary> |
| /// <param name="recursionLimit">The maximum depth of messages to parse</param> |
| /// <param name="typeRegistry">The type registry used to parse <see cref="Any"/> messages</param> |
| public Settings(int recursionLimit, TypeRegistry typeRegistry) : this(recursionLimit, typeRegistry, false) |
| { |
| } |
| |
| /// <summary> |
| /// Creates a new <see cref="Settings"/> object set to either ignore unknown fields, or throw an exception |
| /// when unknown fields are encountered. |
| /// </summary> |
| /// <param name="ignoreUnknownFields"><c>true</c> if unknown fields should be ignored when parsing; <c>false</c> to throw an exception.</param> |
| public Settings WithIgnoreUnknownFields(bool ignoreUnknownFields) => |
| new Settings(RecursionLimit, TypeRegistry, ignoreUnknownFields); |
| |
| /// <summary> |
| /// Creates a new <see cref="Settings"/> object based on this one, but with the specified recursion limit. |
| /// </summary> |
| /// <param name="recursionLimit">The new recursion limit.</param> |
| public Settings WithRecursionLimit(int recursionLimit) => |
| new Settings(recursionLimit, TypeRegistry, IgnoreUnknownFields); |
| |
| /// <summary> |
| /// Creates a new <see cref="Settings"/> object based on this one, but with the specified type registry. |
| /// </summary> |
| /// <param name="typeRegistry">The new type registry. Must not be null.</param> |
| public Settings WithTypeRegistry(TypeRegistry typeRegistry) => |
| new Settings( |
| RecursionLimit, |
| ProtoPreconditions.CheckNotNull(typeRegistry, nameof(typeRegistry)), |
| IgnoreUnknownFields); |
| } |
| } |
| } |