add C# deterministic serialization (#13160)
https://github.com/protocolbuffers/protobuf/issues/12881
@jskeet review please :-) as this is my first contrib to protobuf, any design / performance feedback is very sought after
Closes #13160
COPYBARA_INTEGRATE_REVIEW=https://github.com/protocolbuffers/protobuf/pull/13160 from fmg-lydonchandra:feature/12881_cs_serialization_deterministic ab7e01b8049ec98b78eb9fb6e04e8fe30ecd9a33
PiperOrigin-RevId: 568448399
diff --git a/csharp/src/Google.Protobuf.Test/Collections/MapFieldTest.cs b/csharp/src/Google.Protobuf.Test/Collections/MapFieldTest.cs
index 356765e..b464deb 100644
--- a/csharp/src/Google.Protobuf.Test/Collections/MapFieldTest.cs
+++ b/csharp/src/Google.Protobuf.Test/Collections/MapFieldTest.cs
@@ -632,6 +632,59 @@
CollectionAssert.AreEquivalent(((IDictionary<string, string>)map).Values, ((IReadOnlyDictionary<string, string>)map).Values);
}
+ [Test]
+ public void SortIntKeys_RandomOrder()
+ {
+ var map = new MapField<int, string>() { { 1, "val" }, { -1, "val"}, { 0, "val" } };
+ var sortedList = map.GetSortedListCopy(map.ToList()).ToList();
+ var sortedKeys = sortedList.Select(kvp => kvp.Key);
+ CollectionAssert.AreEqual(new[] { -1, 0, 1 }, sortedKeys);
+ }
+
+ [Test]
+ public void SortIntKeys_Empty()
+ {
+ var map = new MapField<int, string> { };
+ var sortedList = map.GetSortedListCopy(map.ToList()).ToList();
+ var sortedKeys = sortedList.Select(kvp => kvp.Key);
+ Assert.IsEmpty(sortedKeys);
+ }
+
+ [Test]
+ public void SortStringKeys_RandomOrder()
+ {
+ var map = new MapField<string, string> { { "a", "val" }, { "c", "val" }, { "b", "val" } };
+ var sortedList = map.GetSortedListCopy(map.ToList()).ToList();
+ var sortedKeys = sortedList.Select(kvp => kvp.Key);
+ CollectionAssert.AreEqual(new[] { "a", "b", "c" }, sortedKeys);
+ }
+
+ [Test]
+ public void SortStringKeys_EnsureOrdinalSort()
+ {
+ var map = new MapField<string, string>
+ {
+ { "i", "val" } , { "I", "val" }, { "ı", "val" }, { "İ", "val" }
+ };
+ var sortedList = map.GetSortedListCopy(map.ToList());
+ var sortedKeys = sortedList.Select(kvp => kvp.Key);
+ // Assert Ordinal sort I, i, ı, İ (Non-ordinal sort returns i, I, İ, ı)
+ // I == 0x49 , i == 0x69 , İ == 0x130 , ı == 0x131
+ CollectionAssert.AreEqual(new[] { "I", "i", "İ", "ı" }, sortedKeys);
+ }
+
+ [Test]
+ public void SortBoolKeys()
+ {
+ var map = new MapField<bool, string>
+ {
+ { true, "val" } , { false, "val" }
+ };
+ var sortedList = map.GetSortedListCopy(map.ToList());
+ var sortedKeys = sortedList.Select(kvp => kvp.Key);
+ CollectionAssert.AreEqual(new[] { false, true }, sortedKeys);
+ }
+
private static KeyValuePair<TKey, TValue> NewKeyValuePair<TKey, TValue>(TKey key, TValue value)
{
return new KeyValuePair<TKey, TValue>(key, value);
diff --git a/csharp/src/Google.Protobuf.Test/GeneratedMessageTest.cs b/csharp/src/Google.Protobuf.Test/GeneratedMessageTest.cs
index 7223560..49cf771 100644
--- a/csharp/src/Google.Protobuf.Test/GeneratedMessageTest.cs
+++ b/csharp/src/Google.Protobuf.Test/GeneratedMessageTest.cs
@@ -311,7 +311,7 @@
output.WriteTag(TestMap.MapInt32Int32FieldNumber, WireFormat.WireType.LengthDelimited);
- var key = 10; // Field 1
+ var key = 10; // Field 1
var value = 20; // Field 2
var extra = 30; // Field 3
@@ -669,6 +669,79 @@
}
[Test]
+ public void MapStringString_DeterministicTrue_ThenBytesIdentical()
+ {
+ // Define three strings consisting of different versions of the letter I.
+ // LATIN CAPITAL LETTER I (U+0049)
+ string capitalLetterI = "I";
+ // LATIN SMALL LETTER I (U+0069)
+ string smallLetterI = "i";
+ // LATIN SMALL LETTER DOTLESS I (U+0131)
+ string smallLetterDotlessI = "\u0131";
+ var testMap1 = new TestMap();
+
+ testMap1.MapStringString.Add(smallLetterDotlessI, "value_"+smallLetterDotlessI);
+ testMap1.MapStringString.Add(smallLetterI, "value_"+smallLetterI);
+ testMap1.MapStringString.Add(capitalLetterI, "content_"+capitalLetterI);
+ var bytes1 = SerializeTestMap(testMap1, true);
+
+ var testMap2 = new TestMap();
+ testMap2.MapStringString.Add(capitalLetterI, "content_"+capitalLetterI);
+ testMap2.MapStringString.Add(smallLetterI, "value_"+smallLetterI);
+ testMap2.MapStringString.Add(smallLetterDotlessI, "value_"+smallLetterDotlessI);
+
+ var bytes2 = SerializeTestMap(testMap2, true);
+ var parsedBytes2 = TestMap.Parser.ParseFrom(bytes2);
+ var parsedBytes1 = TestMap.Parser.ParseFrom(bytes1);
+ Assert.IsTrue(bytes1.SequenceEqual(bytes2));
+ }
+
+ [Test]
+ public void MapInt32Bytes_DeterministicTrue_ThenBytesIdentical()
+ {
+ var testMap1 = new TestMap();
+ testMap1.MapInt32Bytes.Add(1, ByteString.CopyFromUtf8("test1"));
+ testMap1.MapInt32Bytes.Add(2, ByteString.CopyFromUtf8("test2"));
+ var bytes1 = SerializeTestMap(testMap1, true);
+
+ var testMap2 = new TestMap();
+ testMap2.MapInt32Bytes.Add(2, ByteString.CopyFromUtf8("test2"));
+ testMap2.MapInt32Bytes.Add(1, ByteString.CopyFromUtf8("test1"));
+ var bytes2 = SerializeTestMap(testMap2, true);
+
+ Assert.IsTrue(bytes1.SequenceEqual(bytes2));
+ }
+
+ [Test]
+ public void MapInt32Bytes_DeterministicFalse_ThenBytesDifferent()
+ {
+ var testMap1 = new TestMap();
+ testMap1.MapInt32Bytes.Add(1, ByteString.CopyFromUtf8("test1"));
+ testMap1.MapInt32Bytes.Add(2, ByteString.CopyFromUtf8("test2"));
+ var bytes1 = SerializeTestMap(testMap1, false);
+
+ var testMap2 = new TestMap();
+ testMap2.MapInt32Bytes.Add(2, ByteString.CopyFromUtf8("test2"));
+ testMap2.MapInt32Bytes.Add(1, ByteString.CopyFromUtf8("test1"));
+ var bytes2 = SerializeTestMap(testMap2, false);
+
+ Assert.IsFalse(bytes1.SequenceEqual(bytes2));
+ }
+
+ private byte[] SerializeTestMap(TestMap testMap, bool deterministic)
+ {
+ using var memoryStream = new MemoryStream();
+ var codedOutputStream = new CodedOutputStream(memoryStream);
+ codedOutputStream.Deterministic = deterministic;
+
+ testMap.WriteTo(codedOutputStream);
+ codedOutputStream.Flush();
+
+ memoryStream.Seek(0, SeekOrigin.Begin);
+ return memoryStream.ToArray();
+ }
+
+ [Test]
public void DiscardUnknownFields_RealDataStillRead()
{
var message = SampleMessages.CreateFullTestAllTypes();
diff --git a/csharp/src/Google.Protobuf/CodedOutputStream.cs b/csharp/src/Google.Protobuf/CodedOutputStream.cs
index 85e48e5..85586d2 100644
--- a/csharp/src/Google.Protobuf/CodedOutputStream.cs
+++ b/csharp/src/Google.Protobuf/CodedOutputStream.cs
@@ -137,6 +137,33 @@
}
}
+ /// <summary>
+ /// Configures whether or not serialization is deterministic.
+ /// </summary>
+ /// <remarks>
+ /// Deterministic serialization guarantees that for a given binary, equal messages (defined by the
+ /// equals methods in protos) will always be serialized to the same bytes. This implies:
+ /// <list type="bullet">
+ /// <item><description>Repeated serialization of a message will return the same bytes.</description></item>
+ /// <item><description>Different processes of the same binary (which may be executing on different machines)
+ /// will serialize equal messages to the same bytes.</description></item>
+ /// </list>
+ /// Note the deterministic serialization is NOT canonical across languages; it is also unstable
+ /// across different builds with schema changes due to unknown fields. Users who need canonical
+ /// serialization, e.g. persistent storage in a canonical form, fingerprinting, etc, should define
+ /// their own canonicalization specification and implement the serializer using reflection APIs
+ /// rather than relying on this API.
+ /// Once set, the serializer will: (Note this is an implementation detail and may subject to
+ /// change in the future)
+ /// <list type="bullet">
+ /// <item><description>Sort map entries by keys in lexicographical order or numerical order. Note: For string
+ /// keys, the order is based on comparing the UTF-16 code unit value of each character in the strings.
+ /// The order may be different from the deterministic serialization in other languages where
+ /// maps are sorted on the lexicographical order of the UTF8 encoded keys.</description></item>
+ /// </list>
+ /// </remarks>
+ public bool Deterministic { get; set; }
+
#region Writing of values (not including tags)
/// <summary>
@@ -462,7 +489,7 @@
#endregion
#region Underlying writing primitives
-
+
/// <summary>
/// Writes a 32 bit value as a varint. The fast route is taken when
/// there's enough buffer space left to whizz through without checking
diff --git a/csharp/src/Google.Protobuf/Collections/MapField.cs b/csharp/src/Google.Protobuf/Collections/MapField.cs
index f0be958..722cc92 100644
--- a/csharp/src/Google.Protobuf/Collections/MapField.cs
+++ b/csharp/src/Google.Protobuf/Collections/MapField.cs
@@ -327,7 +327,7 @@
/// Returns a hash code for this instance.
/// </summary>
/// <returns>
- /// A hash code for this instance, suitable for use in hashing algorithms and data structures like a hash table.
+ /// A hash code for this instance, suitable for use in hashing algorithms and data structures like a hash table.
/// </returns>
public override int GetHashCode()
{
@@ -432,7 +432,13 @@
WriteContext.Initialize(output, out WriteContext ctx);
try
{
- WriteTo(ref ctx, codec);
+ IEnumerable<KeyValuePair<TKey, TValue>> listToWrite = list;
+
+ if (output.Deterministic)
+ {
+ listToWrite = GetSortedListCopy(list);
+ }
+ WriteTo(ref ctx, codec, listToWrite);
}
finally
{
@@ -440,6 +446,23 @@
}
}
+ internal IEnumerable<KeyValuePair<TKey, TValue>> GetSortedListCopy(IEnumerable<KeyValuePair<TKey, TValue>> listToSort)
+ {
+ // We can't sort the list in place, as that would invalidate the linked list.
+ // Instead, we create a new list, sort that, and then write it out.
+ var listToWrite = new List<KeyValuePair<TKey, TValue>>(listToSort);
+ listToWrite.Sort((pair1, pair2) =>
+ {
+ if (typeof(TKey) == typeof(string))
+ {
+ // Use Ordinal, otherwise Comparer<string>.Default uses StringComparer.CurrentCulture
+ return StringComparer.Ordinal.Compare(pair1.Key.ToString(), pair2.Key.ToString());
+ }
+ return Comparer<TKey>.Default.Compare(pair1.Key, pair2.Key);
+ });
+ return listToWrite;
+ }
+
/// <summary>
/// Writes the contents of this map to the given write context, using the specified codec
/// to encode each entry.
@@ -449,7 +472,18 @@
[SecuritySafeCritical]
public void WriteTo(ref WriteContext ctx, Codec codec)
{
- foreach (var entry in list)
+ IEnumerable<KeyValuePair<TKey, TValue>> listToWrite = list;
+ if (ctx.state.CodedOutputStream?.Deterministic ?? false)
+ {
+ listToWrite = GetSortedListCopy(list);
+ }
+ WriteTo(ref ctx, codec, listToWrite);
+ }
+
+ [SecuritySafeCritical]
+ private void WriteTo(ref WriteContext ctx, Codec codec, IEnumerable<KeyValuePair<TKey, TValue>> listKvp)
+ {
+ foreach (var entry in listKvp)
{
ctx.WriteTag(codec.MapTag);
@@ -631,7 +665,7 @@
this.containsCheck = containsCheck;
}
- public int Count => parent.Count;
+ public int Count => parent.Count;
public bool IsReadOnly => true;