blob: fcf859daba09644bd9f4fbf638a4930fe5e0ba34 [file] [log] [blame]
#region Copyright notice and license
// Protocol Buffers - Google's data interchange format
// Copyright 2015 Google Inc. All rights reserved.
//
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file or at
// https://developers.google.com/open-source/licenses/bsd
#endregion
using Google.Protobuf.Reflection;
using Google.Protobuf.WellKnownTypes;
using System;
using System.Collections;
using System.Collections.Generic;
using System.Globalization;
using System.IO;
using System.Linq;
using System.Text;
using System.Text.RegularExpressions;
namespace Google.Protobuf
{
/// <summary>
/// Reflection-based converter from JSON to messages.
/// </summary>
/// <remarks>
/// <para>
/// Instances of this class are thread-safe, with no mutable state.
/// </para>
/// <para>
/// This is a simple start to get JSON parsing working. As it's reflection-based,
/// it's not as quick as baking calls into generated messages - but is a simpler implementation.
/// (This code is generally not heavily optimized.)
/// </para>
/// </remarks>
public sealed class JsonParser
{
// Note: using 0-9 instead of \d to ensure no non-ASCII digits.
// This regex isn't a complete validator, but will remove *most* invalid input. We rely on parsing to do the rest.
private static readonly Regex TimestampRegex = new Regex(@"^(?<datetime>[0-9]{4}-[01][0-9]-[0-3][0-9]T[012][0-9]:[0-5][0-9]:[0-5][0-9])(?<subseconds>\.[0-9]{1,9})?(?<offset>(Z|[+-][0-1][0-9]:[0-5][0-9]))$", FrameworkPortability.CompiledRegexWhereAvailable);
private static readonly Regex DurationRegex = new Regex(@"^(?<sign>-)?(?<int>[0-9]{1,12})(?<subseconds>\.[0-9]{1,9})?s$", FrameworkPortability.CompiledRegexWhereAvailable);
private static readonly int[] SubsecondScalingFactors = { 0, 100000000, 100000000, 10000000, 1000000, 100000, 10000, 1000, 100, 10, 1 };
private static readonly char[] FieldMaskPathSeparators = new[] { ',' };
private static readonly EnumDescriptor NullValueDescriptor = StructReflection.Descriptor.EnumTypes.Single(ed => ed.ClrType == typeof(NullValue));
private static readonly JsonParser defaultInstance = new JsonParser(Settings.Default);
// TODO: Consider introducing a class containing parse state of the parser, tokenizer and depth. That would simplify these handlers
// and the signatures of various methods.
private static readonly Dictionary<string, Action<JsonParser, IMessage, JsonTokenizer>> WellKnownTypeHandlers = new()
{
{ Timestamp.Descriptor.FullName, (parser, message, tokenizer) => MergeTimestamp(message, tokenizer.Next()) },
{ Duration.Descriptor.FullName, (parser, message, tokenizer) => MergeDuration(message, tokenizer.Next()) },
{ Value.Descriptor.FullName, (parser, message, tokenizer) => parser.MergeStructValue(message, tokenizer) },
{ ListValue.Descriptor.FullName, (parser, message, tokenizer) =>
parser.MergeRepeatedField(message, message.Descriptor.Fields[ListValue.ValuesFieldNumber], tokenizer) },
{ Struct.Descriptor.FullName, (parser, message, tokenizer) => parser.MergeStruct(message, tokenizer) },
{ Any.Descriptor.FullName, (parser, message, tokenizer) => parser.MergeAny(message, tokenizer) },
{ FieldMask.Descriptor.FullName, (parser, message, tokenizer) => MergeFieldMask(message, tokenizer.Next()) },
{ Int32Value.Descriptor.FullName, MergeWrapperField },
{ Int64Value.Descriptor.FullName, MergeWrapperField },
{ UInt32Value.Descriptor.FullName, MergeWrapperField },
{ UInt64Value.Descriptor.FullName, MergeWrapperField },
{ FloatValue.Descriptor.FullName, MergeWrapperField },
{ DoubleValue.Descriptor.FullName, MergeWrapperField },
{ BytesValue.Descriptor.FullName, MergeWrapperField },
{ StringValue.Descriptor.FullName, MergeWrapperField },
{ BoolValue.Descriptor.FullName, MergeWrapperField }
};
// Convenience method to avoid having to repeat the same code multiple times in the above
// dictionary initialization.
private static void MergeWrapperField(JsonParser parser, IMessage message, JsonTokenizer tokenizer)
{
parser.MergeField(message, message.Descriptor.Fields[WrappersReflection.WrapperValueFieldNumber], tokenizer);
}
/// <summary>
/// Returns a formatter using the default settings.
/// </summary>
public static JsonParser Default { get { return defaultInstance; } }
private readonly Settings settings;
/// <summary>
/// Creates a new formatted with the given settings.
/// </summary>
/// <param name="settings">The settings.</param>
public JsonParser(Settings settings)
{
this.settings = ProtoPreconditions.CheckNotNull(settings, nameof(settings));
}
/// <summary>
/// Parses <paramref name="json"/> and merges the information into the given message.
/// </summary>
/// <param name="message">The message to merge the JSON information into.</param>
/// <param name="json">The JSON to parse.</param>
internal void Merge(IMessage message, string json)
{
Merge(message, new StringReader(json));
}
/// <summary>
/// Parses JSON read from <paramref name="jsonReader"/> and merges the information into the given message.
/// </summary>
/// <param name="message">The message to merge the JSON information into.</param>
/// <param name="jsonReader">Reader providing the JSON to parse.</param>
internal void Merge(IMessage message, TextReader jsonReader)
{
var tokenizer = JsonTokenizer.FromTextReader(jsonReader);
Merge(message, tokenizer);
var lastToken = tokenizer.Next();
if (lastToken != JsonToken.EndDocument)
{
throw new InvalidProtocolBufferException("Expected end of JSON after object");
}
}
/// <summary>
/// Merges the given message using data from the given tokenizer. In most cases, the next
/// token should be a "start object" token, but wrapper types and nullity can invalidate
/// that assumption. This is implemented as an LL(1) recursive descent parser over the stream
/// of tokens provided by the tokenizer. This token stream is assumed to be valid JSON, with the
/// tokenizer performing that validation - but not every token stream is valid "protobuf JSON".
/// </summary>
private void Merge(IMessage message, JsonTokenizer tokenizer)
{
if (tokenizer.ObjectDepth > settings.RecursionLimit)
{
throw InvalidProtocolBufferException.JsonRecursionLimitExceeded();
}
if (message.Descriptor.IsWellKnownType)
{
if (WellKnownTypeHandlers.TryGetValue(message.Descriptor.FullName, out Action<JsonParser, IMessage, JsonTokenizer> handler))
{
handler(this, message, tokenizer);
return;
}
// Well-known types with no special handling continue in the normal way.
}
var token = tokenizer.Next();
if (token.Type != JsonToken.TokenType.StartObject)
{
throw new InvalidProtocolBufferException("Expected an object");
}
var descriptor = message.Descriptor;
var jsonFieldMap = descriptor.Fields.ByJsonName();
// All the oneof fields we've already accounted for - we can only see each of them once.
// The set is created lazily to avoid the overhead of creating a set for every message
// we parsed, when oneofs are relatively rare.
HashSet<OneofDescriptor> seenOneofs = null;
while (true)
{
token = tokenizer.Next();
if (token.Type == JsonToken.TokenType.EndObject)
{
return;
}
if (token.Type != JsonToken.TokenType.Name)
{
throw new InvalidOperationException("Unexpected token type " + token.Type);
}
string name = token.StringValue;
if (jsonFieldMap.TryGetValue(name, out FieldDescriptor field))
{
if (field.ContainingOneof != null)
{
if (seenOneofs == null)
{
seenOneofs = new HashSet<OneofDescriptor>();
}
if (!seenOneofs.Add(field.ContainingOneof))
{
throw new InvalidProtocolBufferException($"Multiple values specified for oneof {field.ContainingOneof.Name}");
}
}
MergeField(message, field, tokenizer);
}
else
{
if (settings.IgnoreUnknownFields)
{
tokenizer.SkipValue();
}
else
{
throw new InvalidProtocolBufferException("Unknown field: " + name);
}
}
}
}
private void MergeField(IMessage message, FieldDescriptor field, JsonTokenizer tokenizer)
{
var token = tokenizer.Next();
if (token.Type == JsonToken.TokenType.Null)
{
// Clear the field if we see a null token, unless it's for a singular field of type
// google.protobuf.Value or google.protobuf.NullValue.
// Note: different from Java API, which just ignores it.
// TODO: Bring it more in line? Discuss...
if (field.IsMap || field.IsRepeated ||
!(IsGoogleProtobufValueField(field) || IsGoogleProtobufNullValueField(field)))
{
field.Accessor.Clear(message);
return;
}
}
tokenizer.PushBack(token);
if (field.IsMap)
{
MergeMapField(message, field, tokenizer);
}
else if (field.IsRepeated)
{
MergeRepeatedField(message, field, tokenizer);
}
else
{
if (TryParseSingleValue(field, tokenizer, out var value))
{
field.Accessor.SetValue(message, value);
}
}
}
private void MergeRepeatedField(IMessage message, FieldDescriptor field, JsonTokenizer tokenizer)
{
var token = tokenizer.Next();
if (token.Type != JsonToken.TokenType.StartArray)
{
throw new InvalidProtocolBufferException("Repeated field value was not an array. Token type: " + token.Type);
}
IList list = (IList) field.Accessor.GetValue(message);
while (true)
{
token = tokenizer.Next();
if (token.Type == JsonToken.TokenType.EndArray)
{
return;
}
tokenizer.PushBack(token);
if (TryParseSingleValue(field, tokenizer, out object value))
{
if (value == null)
{
throw new InvalidProtocolBufferException("Repeated field elements cannot be null");
}
list.Add(value);
}
}
}
private void MergeMapField(IMessage message, FieldDescriptor field, JsonTokenizer tokenizer)
{
// Map fields are always objects, even if the values are well-known types: ParseSingleValue handles those.
var token = tokenizer.Next();
if (token.Type != JsonToken.TokenType.StartObject)
{
throw new InvalidProtocolBufferException("Expected an object to populate a map");
}
var type = field.MessageType;
var keyField = type.FindFieldByNumber(1);
var valueField = type.FindFieldByNumber(2);
if (keyField == null || valueField == null)
{
throw new InvalidProtocolBufferException("Invalid map field: " + field.FullName);
}
IDictionary dictionary = (IDictionary) field.Accessor.GetValue(message);
while (true)
{
token = tokenizer.Next();
if (token.Type == JsonToken.TokenType.EndObject)
{
return;
}
object key = ParseMapKey(keyField, token.StringValue);
if (TryParseSingleValue(valueField, tokenizer, out object value))
{
dictionary[key] = value ?? throw new InvalidProtocolBufferException("Map values must not be null");
}
}
}
private static bool IsGoogleProtobufValueField(FieldDescriptor field)
{
return field.FieldType == FieldType.Message &&
field.MessageType.FullName == Value.Descriptor.FullName;
}
private static bool IsGoogleProtobufNullValueField(FieldDescriptor field)
{
return field.FieldType == FieldType.Enum &&
field.EnumType.FullName == NullValueDescriptor.FullName;
}
/// <summary>
/// Attempts to parse a single value from the JSON. When the value is completely invalid,
/// this will still throw an exception; when it's "conditionally invalid" (currently meaning
/// "when there's an unknown enum string value") the method returns false instead.
/// </summary>
/// <returns>
/// true if the value was parsed successfully; false for an ignorable parse failure.
/// </returns>
private bool TryParseSingleValue(FieldDescriptor field, JsonTokenizer tokenizer, out object value)
{
var token = tokenizer.Next();
if (token.Type == JsonToken.TokenType.Null)
{
// TODO: In order to support dynamic messages, we should really build this up
// dynamically.
if (IsGoogleProtobufValueField(field))
{
value = Value.ForNull();
}
else if (IsGoogleProtobufNullValueField(field))
{
value = NullValue.NullValue;
}
else
{
value = null;
}
return true;
}
var fieldType = field.FieldType;
if (fieldType == FieldType.Message || fieldType == FieldType.Group)
{
// Parse wrapper types as their constituent types.
// TODO: What does this mean for null?
if (field.MessageType.IsWrapperType)
{
field = field.MessageType.Fields[WrappersReflection.WrapperValueFieldNumber];
fieldType = field.FieldType;
}
else
{
// TODO: Merge the current value in message? (Public API currently doesn't make this relevant as we don't expose merging.)
tokenizer.PushBack(token);
IMessage subMessage = NewMessageForField(field);
Merge(subMessage, tokenizer);
value = subMessage;
return true;
}
}
switch (token.Type)
{
case JsonToken.TokenType.True:
case JsonToken.TokenType.False:
if (fieldType == FieldType.Bool)
{
value = token.Type == JsonToken.TokenType.True;
return true;
}
// Fall through to "we don't support this type for this case"; could duplicate the behaviour of the default
// case instead, but this way we'd only need to change one place.
goto default;
case JsonToken.TokenType.StringValue:
if (field.FieldType != FieldType.Enum)
{
value = ParseSingleStringValue(field, token.StringValue);
return true;
}
else
{
return TryParseEnumStringValue(field, token.StringValue, out value);
}
// Note: not passing the number value itself here, as we may end up storing the string value in the token too.
case JsonToken.TokenType.Number:
value = ParseSingleNumberValue(field, token);
return true;
default:
throw new InvalidProtocolBufferException("Unsupported JSON token type " + token.Type + " for field type " + fieldType);
}
}
/// <summary>
/// Parses <paramref name="json"/> into a new message.
/// </summary>
/// <typeparam name="T">The type of message to create.</typeparam>
/// <param name="json">The JSON to parse.</param>
/// <exception cref="InvalidJsonException">The JSON does not comply with RFC 7159</exception>
/// <exception cref="InvalidProtocolBufferException">The JSON does not represent a Protocol Buffers message correctly</exception>
public T Parse<T>(string json) where T : IMessage, new()
{
ProtoPreconditions.CheckNotNull(json, nameof(json));
return Parse<T>(new StringReader(json));
}
/// <summary>
/// Parses JSON read from <paramref name="jsonReader"/> into a new message.
/// </summary>
/// <typeparam name="T">The type of message to create.</typeparam>
/// <param name="jsonReader">Reader providing the JSON to parse.</param>
/// <exception cref="InvalidJsonException">The JSON does not comply with RFC 7159</exception>
/// <exception cref="InvalidProtocolBufferException">The JSON does not represent a Protocol Buffers message correctly</exception>
public T Parse<T>(TextReader jsonReader) where T : IMessage, new()
{
ProtoPreconditions.CheckNotNull(jsonReader, nameof(jsonReader));
T message = new T();
Merge(message, jsonReader);
return message;
}
/// <summary>
/// Parses <paramref name="json"/> into a new message.
/// </summary>
/// <param name="json">The JSON to parse.</param>
/// <param name="descriptor">Descriptor of message type to parse.</param>
/// <exception cref="InvalidJsonException">The JSON does not comply with RFC 7159</exception>
/// <exception cref="InvalidProtocolBufferException">The JSON does not represent a Protocol Buffers message correctly</exception>
public IMessage Parse(string json, MessageDescriptor descriptor)
{
ProtoPreconditions.CheckNotNull(json, nameof(json));
ProtoPreconditions.CheckNotNull(descriptor, nameof(descriptor));
return Parse(new StringReader(json), descriptor);
}
/// <summary>
/// Parses JSON read from <paramref name="jsonReader"/> into a new message.
/// </summary>
/// <param name="jsonReader">Reader providing the JSON to parse.</param>
/// <param name="descriptor">Descriptor of message type to parse.</param>
/// <exception cref="InvalidJsonException">The JSON does not comply with RFC 7159</exception>
/// <exception cref="InvalidProtocolBufferException">The JSON does not represent a Protocol Buffers message correctly</exception>
public IMessage Parse(TextReader jsonReader, MessageDescriptor descriptor)
{
ProtoPreconditions.CheckNotNull(jsonReader, nameof(jsonReader));
ProtoPreconditions.CheckNotNull(descriptor, nameof(descriptor));
IMessage message = descriptor.Parser.CreateTemplate();
Merge(message, jsonReader);
return message;
}
private void MergeStructValue(IMessage message, JsonTokenizer tokenizer)
{
var firstToken = tokenizer.Next();
var fields = message.Descriptor.Fields;
switch (firstToken.Type)
{
case JsonToken.TokenType.Null:
fields[Value.NullValueFieldNumber].Accessor.SetValue(message, 0);
return;
case JsonToken.TokenType.StringValue:
fields[Value.StringValueFieldNumber].Accessor.SetValue(message, firstToken.StringValue);
return;
case JsonToken.TokenType.Number:
fields[Value.NumberValueFieldNumber].Accessor.SetValue(message, firstToken.NumberValue);
return;
case JsonToken.TokenType.False:
case JsonToken.TokenType.True:
fields[Value.BoolValueFieldNumber].Accessor.SetValue(message, firstToken.Type == JsonToken.TokenType.True);
return;
case JsonToken.TokenType.StartObject:
{
var field = fields[Value.StructValueFieldNumber];
var structMessage = NewMessageForField(field);
tokenizer.PushBack(firstToken);
Merge(structMessage, tokenizer);
field.Accessor.SetValue(message, structMessage);
return;
}
case JsonToken.TokenType.StartArray:
{
var field = fields[Value.ListValueFieldNumber];
var list = NewMessageForField(field);
tokenizer.PushBack(firstToken);
Merge(list, tokenizer);
field.Accessor.SetValue(message, list);
return;
}
default:
throw new InvalidOperationException("Unexpected token type: " + firstToken.Type);
}
}
private void MergeStruct(IMessage message, JsonTokenizer tokenizer)
{
var token = tokenizer.Next();
if (token.Type != JsonToken.TokenType.StartObject)
{
throw new InvalidProtocolBufferException("Expected object value for Struct");
}
tokenizer.PushBack(token);
var field = message.Descriptor.Fields[Struct.FieldsFieldNumber];
MergeMapField(message, field, tokenizer);
}
private void MergeAny(IMessage message, JsonTokenizer tokenizer)
{
// Record the token stream until we see the @type property. At that point, we can take the value, consult
// the type registry for the relevant message, and replay the stream, omitting the @type property.
var tokens = new List<JsonToken>();
var token = tokenizer.Next();
if (token.Type != JsonToken.TokenType.StartObject)
{
throw new InvalidProtocolBufferException("Expected object value for Any");
}
int typeUrlObjectDepth = tokenizer.ObjectDepth;
// The check for the property depth protects us from nested Any values which occur before the type URL
// for *this* Any.
while (token.Type != JsonToken.TokenType.Name ||
token.StringValue != JsonFormatter.AnyTypeUrlField ||
tokenizer.ObjectDepth != typeUrlObjectDepth)
{
tokens.Add(token);
token = tokenizer.Next();
if (tokenizer.ObjectDepth < typeUrlObjectDepth)
{
throw new InvalidProtocolBufferException("Any message with no @type");
}
}
// Don't add the @type property or its value to the recorded token list
token = tokenizer.Next();
if (token.Type != JsonToken.TokenType.StringValue)
{
throw new InvalidProtocolBufferException("Expected string value for Any.@type");
}
string typeUrl = token.StringValue;
string typeName = Any.GetTypeName(typeUrl);
MessageDescriptor descriptor = settings.TypeRegistry.Find(typeName);
if (descriptor == null)
{
throw new InvalidOperationException($"Type registry has no descriptor for type name '{typeName}'");
}
// Now replay the token stream we've already read and anything that remains of the object, just parsing it
// as normal. Our original tokenizer should end up at the end of the object.
var replay = JsonTokenizer.FromReplayedTokens(tokens, tokenizer);
var body = descriptor.Parser.CreateTemplate();
if (descriptor.IsWellKnownType)
{
MergeWellKnownTypeAnyBody(body, replay);
}
else
{
Merge(body, replay);
}
var data = body.ToByteString();
// Now that we have the message data, we can pack it into an Any (the message received as a parameter).
message.Descriptor.Fields[Any.TypeUrlFieldNumber].Accessor.SetValue(message, typeUrl);
message.Descriptor.Fields[Any.ValueFieldNumber].Accessor.SetValue(message, data);
}
// Well-known types end up in a property called "value" in the JSON. As there's no longer a @type property
// in the given JSON token stream, we should *only* have tokens of start-object, name("value"), the value
// itself, and then end-object.
private void MergeWellKnownTypeAnyBody(IMessage body, JsonTokenizer tokenizer)
{
var token = tokenizer.Next(); // Definitely start-object; checked in previous method
token = tokenizer.Next();
// TODO: What about an absent Int32Value, for example?
if (token.Type != JsonToken.TokenType.Name || token.StringValue != JsonFormatter.AnyWellKnownTypeValueField)
{
throw new InvalidProtocolBufferException($"Expected '{JsonFormatter.AnyWellKnownTypeValueField}' property for well-known type Any body");
}
Merge(body, tokenizer);
token = tokenizer.Next();
if (token.Type != JsonToken.TokenType.EndObject)
{
throw new InvalidProtocolBufferException($"Expected end-object token after @type/value for well-known type");
}
}
#region Utility methods which don't depend on the state (or settings) of the parser.
private static object ParseMapKey(FieldDescriptor field, string keyText)
{
switch (field.FieldType)
{
case FieldType.Bool:
if (keyText == "true")
{
return true;
}
if (keyText == "false")
{
return false;
}
throw new InvalidProtocolBufferException("Invalid string for bool map key: " + keyText);
case FieldType.String:
return keyText;
case FieldType.Int32:
case FieldType.SInt32:
case FieldType.SFixed32:
return ParseNumericString(keyText, int.Parse);
case FieldType.UInt32:
case FieldType.Fixed32:
return ParseNumericString(keyText, uint.Parse);
case FieldType.Int64:
case FieldType.SInt64:
case FieldType.SFixed64:
return ParseNumericString(keyText, long.Parse);
case FieldType.UInt64:
case FieldType.Fixed64:
return ParseNumericString(keyText, ulong.Parse);
default:
throw new InvalidProtocolBufferException("Invalid field type for map: " + field.FieldType);
}
}
private static object ParseSingleNumberValue(FieldDescriptor field, JsonToken token)
{
double value = token.NumberValue;
checked
{
try
{
switch (field.FieldType)
{
case FieldType.Int32:
case FieldType.SInt32:
case FieldType.SFixed32:
CheckInteger(value);
return (int) value;
case FieldType.UInt32:
case FieldType.Fixed32:
CheckInteger(value);
return (uint) value;
case FieldType.Int64:
case FieldType.SInt64:
case FieldType.SFixed64:
CheckInteger(value);
return (long) value;
case FieldType.UInt64:
case FieldType.Fixed64:
CheckInteger(value);
return (ulong) value;
case FieldType.Double:
return value;
case FieldType.Float:
if (double.IsNaN(value))
{
return float.NaN;
}
float converted = (float) value;
// If the value is out of range of float, the cast representation will be infinite.
// If the original value was infinite as well, that's fine - we'll return the 32-bit
// version (with the correct sign).
if (float.IsInfinity(converted) && !double.IsInfinity(value))
{
throw new InvalidProtocolBufferException($"Value out of range: {value}");
}
return converted;
case FieldType.Enum:
CheckInteger(value);
// Just return it as an int, and let the CLR convert it.
// Note that we deliberately don't check that it's a known value.
return (int) value;
default:
throw new InvalidProtocolBufferException($"Unsupported conversion from JSON number for field type {field.FieldType}");
}
}
catch (OverflowException)
{
throw new InvalidProtocolBufferException($"Value out of range: {value}");
}
}
}
private static void CheckInteger(double value)
{
if (double.IsInfinity(value) || double.IsNaN(value))
{
throw new InvalidProtocolBufferException($"Value not an integer: {value}");
}
if (value != Math.Floor(value))
{
throw new InvalidProtocolBufferException($"Value not an integer: {value}");
}
}
private static object ParseSingleStringValue(FieldDescriptor field, string text)
{
switch (field.FieldType)
{
case FieldType.String:
return text;
case FieldType.Bytes:
try
{
return ByteString.FromBase64(text);
}
catch (FormatException e)
{
throw InvalidProtocolBufferException.InvalidBase64(e);
}
case FieldType.Int32:
case FieldType.SInt32:
case FieldType.SFixed32:
return ParseNumericString(text, int.Parse);
case FieldType.UInt32:
case FieldType.Fixed32:
return ParseNumericString(text, uint.Parse);
case FieldType.Int64:
case FieldType.SInt64:
case FieldType.SFixed64:
return ParseNumericString(text, long.Parse);
case FieldType.UInt64:
case FieldType.Fixed64:
return ParseNumericString(text, ulong.Parse);
case FieldType.Double:
double d = ParseNumericString(text, double.Parse);
ValidateInfinityAndNan(text, double.IsPositiveInfinity(d), double.IsNegativeInfinity(d), double.IsNaN(d));
return d;
case FieldType.Float:
float f = ParseNumericString(text, float.Parse);
ValidateInfinityAndNan(text, float.IsPositiveInfinity(f), float.IsNegativeInfinity(f), float.IsNaN(f));
return f;
case FieldType.Enum:
throw new InvalidOperationException($"Use TryParseEnumStringValue for enums");
default:
throw new InvalidProtocolBufferException($"Unsupported conversion from JSON string for field type {field.FieldType}");
}
}
private bool TryParseEnumStringValue(FieldDescriptor field, string text, out object value)
{
var enumValue = field.EnumType.FindValueByName(text);
if (enumValue == null)
{
if (settings.IgnoreUnknownFields)
{
value = null;
return false;
}
else
{
throw new InvalidProtocolBufferException($"Invalid enum value: {text} for enum type: {field.EnumType.FullName}");
}
}
// Just return it as an int, and let the CLR convert it.
value = enumValue.Number;
return true;
}
/// <summary>
/// Creates a new instance of the message type for the given field.
/// </summary>
private static IMessage NewMessageForField(FieldDescriptor field)
{
return field.MessageType.Parser.CreateTemplate();
}
private static T ParseNumericString<T>(string text, Func<string, NumberStyles, IFormatProvider, T> parser)
{
// Can't prohibit this with NumberStyles.
if (text.StartsWith("+"))
{
throw new InvalidProtocolBufferException($"Invalid numeric value: {text}");
}
if (text.StartsWith("0") && text.Length > 1)
{
if (text[1] >= '0' && text[1] <= '9')
{
throw new InvalidProtocolBufferException($"Invalid numeric value: {text}");
}
}
else if (text.StartsWith("-0") && text.Length > 2)
{
if (text[2] >= '0' && text[2] <= '9')
{
throw new InvalidProtocolBufferException($"Invalid numeric value: {text}");
}
}
try
{
return parser(text, NumberStyles.AllowLeadingSign | NumberStyles.AllowDecimalPoint | NumberStyles.AllowExponent, CultureInfo.InvariantCulture);
}
catch (FormatException)
{
throw new InvalidProtocolBufferException($"Invalid numeric value for type: {text}");
}
catch (OverflowException)
{
throw new InvalidProtocolBufferException($"Value out of range: {text}");
}
}
/// <summary>
/// Checks that any infinite/NaN values originated from the correct text.
/// This corrects the lenient whitespace handling of double.Parse/float.Parse, as well as the
/// way that Mono parses out-of-range values as infinity.
/// </summary>
private static void ValidateInfinityAndNan(string text, bool isPositiveInfinity, bool isNegativeInfinity, bool isNaN)
{
if ((isPositiveInfinity && text != "Infinity") ||
(isNegativeInfinity && text != "-Infinity") ||
(isNaN && text != "NaN"))
{
throw new InvalidProtocolBufferException($"Invalid numeric value: {text}");
}
}
private static void MergeTimestamp(IMessage message, JsonToken token)
{
if (token.Type != JsonToken.TokenType.StringValue)
{
throw new InvalidProtocolBufferException("Expected string value for Timestamp");
}
var match = TimestampRegex.Match(token.StringValue);
if (!match.Success)
{
throw new InvalidProtocolBufferException($"Invalid Timestamp value: {token.StringValue}");
}
var dateTime = match.Groups["datetime"].Value;
var subseconds = match.Groups["subseconds"].Value;
var offset = match.Groups["offset"].Value;
try
{
DateTime parsed = DateTime.ParseExact(
dateTime,
"yyyy-MM-dd'T'HH:mm:ss",
CultureInfo.InvariantCulture,
DateTimeStyles.AssumeUniversal | DateTimeStyles.AdjustToUniversal);
// TODO: It would be nice not to have to create all these objects... easy to optimize later though.
Timestamp timestamp = Timestamp.FromDateTime(parsed);
int nanosToAdd = 0;
if (subseconds != "")
{
// This should always work, as we've got 1-9 digits.
int parsedFraction = int.Parse(subseconds.Substring(1), CultureInfo.InvariantCulture);
nanosToAdd = parsedFraction * SubsecondScalingFactors[subseconds.Length];
}
int secondsToAdd = 0;
if (offset != "Z")
{
// This is the amount we need to *subtract* from the local time to get to UTC - hence - => +1 and vice versa.
int sign = offset[0] == '-' ? 1 : -1;
int hours = int.Parse(offset.Substring(1, 2), CultureInfo.InvariantCulture);
int minutes = int.Parse(offset.Substring(4, 2));
int totalMinutes = hours * 60 + minutes;
if (totalMinutes > 18 * 60)
{
throw new InvalidProtocolBufferException("Invalid Timestamp value: " + token.StringValue);
}
if (totalMinutes == 0 && sign == 1)
{
// This is an offset of -00:00, which means "unknown local offset". It makes no sense for a timestamp.
throw new InvalidProtocolBufferException("Invalid Timestamp value: " + token.StringValue);
}
// We need to *subtract* the offset from local time to get UTC.
secondsToAdd = sign * totalMinutes * 60;
}
// Ensure we've got the right signs. Currently unnecessary, but easy to do.
if (secondsToAdd < 0 && nanosToAdd > 0)
{
secondsToAdd++;
nanosToAdd -= Duration.NanosecondsPerSecond;
}
if (secondsToAdd != 0 || nanosToAdd != 0)
{
timestamp += new Duration { Nanos = nanosToAdd, Seconds = secondsToAdd };
// The resulting timestamp after offset change would be out of our expected range. Currently the Timestamp message doesn't validate this
// anywhere, but we shouldn't parse it.
if (timestamp.Seconds < Timestamp.UnixSecondsAtBclMinValue || timestamp.Seconds > Timestamp.UnixSecondsAtBclMaxValue)
{
throw new InvalidProtocolBufferException("Invalid Timestamp value: " + token.StringValue);
}
}
message.Descriptor.Fields[Timestamp.SecondsFieldNumber].Accessor.SetValue(message, timestamp.Seconds);
message.Descriptor.Fields[Timestamp.NanosFieldNumber].Accessor.SetValue(message, timestamp.Nanos);
}
catch (FormatException)
{
throw new InvalidProtocolBufferException("Invalid Timestamp value: " + token.StringValue);
}
}
private static void MergeDuration(IMessage message, JsonToken token)
{
if (token.Type != JsonToken.TokenType.StringValue)
{
throw new InvalidProtocolBufferException("Expected string value for Duration");
}
var match = DurationRegex.Match(token.StringValue);
if (!match.Success)
{
throw new InvalidProtocolBufferException("Invalid Duration value: " + token.StringValue);
}
var sign = match.Groups["sign"].Value;
var secondsText = match.Groups["int"].Value;
// Prohibit leading insignficant zeroes
if (secondsText[0] == '0' && secondsText.Length > 1)
{
throw new InvalidProtocolBufferException("Invalid Duration value: " + token.StringValue);
}
var subseconds = match.Groups["subseconds"].Value;
var multiplier = sign == "-" ? -1 : 1;
try
{
long seconds = long.Parse(secondsText, CultureInfo.InvariantCulture) * multiplier;
int nanos = 0;
if (subseconds != "")
{
// This should always work, as we've got 1-9 digits.
int parsedFraction = int.Parse(subseconds.Substring(1));
nanos = parsedFraction * SubsecondScalingFactors[subseconds.Length] * multiplier;
}
if (!Duration.IsNormalized(seconds, nanos))
{
throw new InvalidProtocolBufferException($"Invalid Duration value: {token.StringValue}");
}
message.Descriptor.Fields[Duration.SecondsFieldNumber].Accessor.SetValue(message, seconds);
message.Descriptor.Fields[Duration.NanosFieldNumber].Accessor.SetValue(message, nanos);
}
catch (FormatException)
{
throw new InvalidProtocolBufferException($"Invalid Duration value: {token.StringValue}");
}
}
private static void MergeFieldMask(IMessage message, JsonToken token)
{
if (token.Type != JsonToken.TokenType.StringValue)
{
throw new InvalidProtocolBufferException("Expected string value for FieldMask");
}
// TODO: Do we *want* to remove empty entries? Probably okay to treat "" as "no paths", but "foo,,bar"?
string[] jsonPaths = token.StringValue.Split(FieldMaskPathSeparators, StringSplitOptions.RemoveEmptyEntries);
IList messagePaths = (IList) message.Descriptor.Fields[FieldMask.PathsFieldNumber].Accessor.GetValue(message);
foreach (var path in jsonPaths)
{
messagePaths.Add(ToSnakeCase(path));
}
}
// Ported from src/google/protobuf/util/internal/utility.cc
private static string ToSnakeCase(string text)
{
var builder = new StringBuilder(text.Length * 2);
// Note: this is probably unnecessary now, but currently retained to be as close as possible to the
// C++, whilst still throwing an exception on underscores.
bool wasNotUnderscore = false; // Initialize to false for case 1 (below)
bool wasNotCap = false;
for (int i = 0; i < text.Length; i++)
{
char c = text[i];
if (c >= 'A' && c <= 'Z') // ascii_isupper
{
// Consider when the current character B is capitalized:
// 1) At beginning of input: "B..." => "b..."
// (e.g. "Biscuit" => "biscuit")
// 2) Following a lowercase: "...aB..." => "...a_b..."
// (e.g. "gBike" => "g_bike")
// 3) At the end of input: "...AB" => "...ab"
// (e.g. "GoogleLAB" => "google_lab")
// 4) Followed by a lowercase: "...ABc..." => "...a_bc..."
// (e.g. "GBike" => "g_bike")
if (wasNotUnderscore && // case 1 out
(wasNotCap || // case 2 in, case 3 out
(i + 1 < text.Length && // case 3 out
(text[i + 1] >= 'a' && text[i + 1] <= 'z')))) // ascii_islower(text[i + 1])
{ // case 4 in
// We add an underscore for case 2 and case 4.
builder.Append('_');
}
// ascii_tolower, but we already know that c *is* an upper case ASCII character...
builder.Append((char) (c + 'a' - 'A'));
wasNotUnderscore = true;
wasNotCap = false;
}
else
{
builder.Append(c);
if (c == '_')
{
throw new InvalidProtocolBufferException($"Invalid field mask: {text}");
}
wasNotUnderscore = true;
wasNotCap = true;
}
}
return builder.ToString();
}
#endregion
/// <summary>
/// Settings controlling JSON parsing.
/// </summary>
public sealed class Settings
{
/// <summary>
/// Default settings, as used by <see cref="JsonParser.Default"/>. This has the same default
/// recursion limit as <see cref="CodedInputStream"/>, and an empty type registry.
/// </summary>
public static Settings Default { get; }
// Workaround for the Mono compiler complaining about XML comments not being on
// valid language elements.
static Settings()
{
Default = new Settings(CodedInputStream.DefaultRecursionLimit);
}
/// <summary>
/// The maximum depth of messages to parse. Note that this limit only applies to parsing
/// messages, not collections - so a message within a collection within a message only counts as
/// depth 2, not 3.
/// </summary>
public int RecursionLimit { get; }
/// <summary>
/// The type registry used to parse <see cref="Any"/> messages.
/// </summary>
public TypeRegistry TypeRegistry { get; }
/// <summary>
/// Whether the parser should ignore unknown fields (<c>true</c>) or throw an exception when
/// they are encountered (<c>false</c>).
/// </summary>
public bool IgnoreUnknownFields { get; }
private Settings(int recursionLimit, TypeRegistry typeRegistry, bool ignoreUnknownFields)
{
RecursionLimit = recursionLimit;
TypeRegistry = ProtoPreconditions.CheckNotNull(typeRegistry, nameof(typeRegistry));
IgnoreUnknownFields = ignoreUnknownFields;
}
/// <summary>
/// Creates a new <see cref="Settings"/> object with the specified recursion limit.
/// </summary>
/// <param name="recursionLimit">The maximum depth of messages to parse</param>
public Settings(int recursionLimit) : this(recursionLimit, TypeRegistry.Empty)
{
}
/// <summary>
/// Creates a new <see cref="Settings"/> object with the specified recursion limit and type registry.
/// </summary>
/// <param name="recursionLimit">The maximum depth of messages to parse</param>
/// <param name="typeRegistry">The type registry used to parse <see cref="Any"/> messages</param>
public Settings(int recursionLimit, TypeRegistry typeRegistry) : this(recursionLimit, typeRegistry, false)
{
}
/// <summary>
/// Creates a new <see cref="Settings"/> object set to either ignore unknown fields, or throw an exception
/// when unknown fields are encountered.
/// </summary>
/// <param name="ignoreUnknownFields"><c>true</c> if unknown fields should be ignored when parsing; <c>false</c> to throw an exception.</param>
public Settings WithIgnoreUnknownFields(bool ignoreUnknownFields) => new(RecursionLimit, TypeRegistry, ignoreUnknownFields);
/// <summary>
/// Creates a new <see cref="Settings"/> object based on this one, but with the specified recursion limit.
/// </summary>
/// <param name="recursionLimit">The new recursion limit.</param>
public Settings WithRecursionLimit(int recursionLimit) => new(recursionLimit, TypeRegistry, IgnoreUnknownFields);
/// <summary>
/// Creates a new <see cref="Settings"/> object based on this one, but with the specified type registry.
/// </summary>
/// <param name="typeRegistry">The new type registry. Must not be null.</param>
public Settings WithTypeRegistry(TypeRegistry typeRegistry) =>
new(RecursionLimit,
ProtoPreconditions.CheckNotNull(typeRegistry, nameof(typeRegistry)),
IgnoreUnknownFields);
}
}
}