add C# deterministic serialization (#13160)

https://github.com/protocolbuffers/protobuf/issues/12881

@jskeet  review please :-) as this is my first contrib to protobuf, any design / performance feedback is very sought after

Closes #13160

COPYBARA_INTEGRATE_REVIEW=https://github.com/protocolbuffers/protobuf/pull/13160 from fmg-lydonchandra:feature/12881_cs_serialization_deterministic ab7e01b8049ec98b78eb9fb6e04e8fe30ecd9a33
PiperOrigin-RevId: 568448399
diff --git a/csharp/src/Google.Protobuf.Test/Collections/MapFieldTest.cs b/csharp/src/Google.Protobuf.Test/Collections/MapFieldTest.cs
index 356765e..b464deb 100644
--- a/csharp/src/Google.Protobuf.Test/Collections/MapFieldTest.cs
+++ b/csharp/src/Google.Protobuf.Test/Collections/MapFieldTest.cs
@@ -632,6 +632,59 @@
             CollectionAssert.AreEquivalent(((IDictionary<string, string>)map).Values, ((IReadOnlyDictionary<string, string>)map).Values);
         }
 
+        [Test]
+        public void SortIntKeys_RandomOrder()
+        {
+            var map = new MapField<int, string>() { { 1, "val" }, { -1, "val"}, { 0, "val" } };
+            var sortedList = map.GetSortedListCopy(map.ToList()).ToList();
+            var sortedKeys = sortedList.Select(kvp => kvp.Key);
+            CollectionAssert.AreEqual(new[] { -1, 0, 1 }, sortedKeys);
+        }
+
+        [Test]
+        public void SortIntKeys_Empty()
+        {
+            var map = new MapField<int, string> { };
+            var sortedList = map.GetSortedListCopy(map.ToList()).ToList();
+            var sortedKeys = sortedList.Select(kvp => kvp.Key);
+            Assert.IsEmpty(sortedKeys);
+        }
+
+        [Test]
+        public void SortStringKeys_RandomOrder()
+        {
+            var map = new MapField<string, string> { { "a", "val" }, { "c", "val" }, { "b", "val" } };
+            var sortedList = map.GetSortedListCopy(map.ToList()).ToList();
+            var sortedKeys = sortedList.Select(kvp => kvp.Key);
+            CollectionAssert.AreEqual(new[] { "a", "b", "c" }, sortedKeys);
+        }
+
+        [Test]
+        public void SortStringKeys_EnsureOrdinalSort()
+        {
+            var map = new MapField<string, string>
+            {
+                { "i", "val" } , { "I", "val" }, { "ı", "val" }, { "İ", "val" }
+            };
+            var sortedList = map.GetSortedListCopy(map.ToList());
+            var sortedKeys = sortedList.Select(kvp => kvp.Key);
+            // Assert Ordinal sort  I, i, ı, İ (Non-ordinal sort returns i, I, İ, ı)
+            // I == 0x49 , i == 0x69 , İ == 0x130 , ı == 0x131
+            CollectionAssert.AreEqual(new[] { "I", "i", "İ", "ı" }, sortedKeys);
+        }
+
+        [Test]
+        public void SortBoolKeys()
+        {
+            var map = new MapField<bool, string>
+            {
+                { true, "val" } , { false, "val" }
+            };
+            var sortedList = map.GetSortedListCopy(map.ToList());
+            var sortedKeys = sortedList.Select(kvp => kvp.Key);
+            CollectionAssert.AreEqual(new[] { false, true }, sortedKeys);
+        }
+
         private static KeyValuePair<TKey, TValue> NewKeyValuePair<TKey, TValue>(TKey key, TValue value)
         {
             return new KeyValuePair<TKey, TValue>(key, value);
diff --git a/csharp/src/Google.Protobuf.Test/GeneratedMessageTest.cs b/csharp/src/Google.Protobuf.Test/GeneratedMessageTest.cs
index 7223560..49cf771 100644
--- a/csharp/src/Google.Protobuf.Test/GeneratedMessageTest.cs
+++ b/csharp/src/Google.Protobuf.Test/GeneratedMessageTest.cs
@@ -311,7 +311,7 @@
 
             output.WriteTag(TestMap.MapInt32Int32FieldNumber, WireFormat.WireType.LengthDelimited);
 
-            var key = 10; // Field 1 
+            var key = 10; // Field 1
             var value = 20; // Field 2
             var extra = 30; // Field 3
 
@@ -669,6 +669,79 @@
         }
 
         [Test]
+        public void MapStringString_DeterministicTrue_ThenBytesIdentical()
+        {
+            // Define three strings consisting of different versions of the letter I.
+            // LATIN CAPITAL LETTER I (U+0049)
+            string capitalLetterI = "I";
+            // LATIN SMALL LETTER I (U+0069)
+            string smallLetterI   = "i";
+            // LATIN SMALL LETTER DOTLESS I (U+0131)
+            string smallLetterDotlessI = "\u0131";
+            var testMap1 = new TestMap();
+
+            testMap1.MapStringString.Add(smallLetterDotlessI, "value_"+smallLetterDotlessI);
+            testMap1.MapStringString.Add(smallLetterI, "value_"+smallLetterI);
+            testMap1.MapStringString.Add(capitalLetterI, "content_"+capitalLetterI);
+            var bytes1 = SerializeTestMap(testMap1, true);
+
+            var testMap2 = new TestMap();
+            testMap2.MapStringString.Add(capitalLetterI, "content_"+capitalLetterI);
+            testMap2.MapStringString.Add(smallLetterI, "value_"+smallLetterI);
+            testMap2.MapStringString.Add(smallLetterDotlessI, "value_"+smallLetterDotlessI);
+
+            var bytes2 = SerializeTestMap(testMap2, true);
+            var parsedBytes2 = TestMap.Parser.ParseFrom(bytes2);
+            var parsedBytes1 = TestMap.Parser.ParseFrom(bytes1);
+            Assert.IsTrue(bytes1.SequenceEqual(bytes2));
+        }
+
+        [Test]
+        public void MapInt32Bytes_DeterministicTrue_ThenBytesIdentical()
+        {
+            var testMap1 = new TestMap();
+            testMap1.MapInt32Bytes.Add(1, ByteString.CopyFromUtf8("test1"));
+            testMap1.MapInt32Bytes.Add(2, ByteString.CopyFromUtf8("test2"));
+            var bytes1 = SerializeTestMap(testMap1, true);
+
+            var testMap2 = new TestMap();
+            testMap2.MapInt32Bytes.Add(2, ByteString.CopyFromUtf8("test2"));
+            testMap2.MapInt32Bytes.Add(1, ByteString.CopyFromUtf8("test1"));
+            var bytes2 = SerializeTestMap(testMap2, true);
+
+            Assert.IsTrue(bytes1.SequenceEqual(bytes2));
+        }
+
+        [Test]
+        public void MapInt32Bytes_DeterministicFalse_ThenBytesDifferent()
+        {
+            var testMap1 = new TestMap();
+            testMap1.MapInt32Bytes.Add(1, ByteString.CopyFromUtf8("test1"));
+            testMap1.MapInt32Bytes.Add(2, ByteString.CopyFromUtf8("test2"));
+            var bytes1 = SerializeTestMap(testMap1, false);
+
+            var testMap2 = new TestMap();
+            testMap2.MapInt32Bytes.Add(2, ByteString.CopyFromUtf8("test2"));
+            testMap2.MapInt32Bytes.Add(1, ByteString.CopyFromUtf8("test1"));
+            var bytes2 = SerializeTestMap(testMap2, false);
+
+            Assert.IsFalse(bytes1.SequenceEqual(bytes2));
+        }
+
+        private byte[] SerializeTestMap(TestMap testMap, bool deterministic)
+        {
+            using var memoryStream = new MemoryStream();
+            var codedOutputStream = new CodedOutputStream(memoryStream);
+            codedOutputStream.Deterministic = deterministic;
+
+            testMap.WriteTo(codedOutputStream);
+            codedOutputStream.Flush();
+
+            memoryStream.Seek(0, SeekOrigin.Begin);
+            return memoryStream.ToArray();
+        }
+
+        [Test]
         public void DiscardUnknownFields_RealDataStillRead()
         {
             var message = SampleMessages.CreateFullTestAllTypes();
diff --git a/csharp/src/Google.Protobuf/CodedOutputStream.cs b/csharp/src/Google.Protobuf/CodedOutputStream.cs
index 85e48e5..85586d2 100644
--- a/csharp/src/Google.Protobuf/CodedOutputStream.cs
+++ b/csharp/src/Google.Protobuf/CodedOutputStream.cs
@@ -137,6 +137,33 @@
             }
         }
 
+        /// <summary>
+        /// Configures whether or not serialization is deterministic.
+        /// </summary>
+        /// <remarks>
+        /// Deterministic serialization guarantees that for a given binary, equal messages (defined by the
+        /// equals methods in protos) will always be serialized to the same bytes. This implies:
+        /// <list type="bullet">
+        /// <item><description>Repeated serialization of a message will return the same bytes.</description></item>
+        /// <item><description>Different processes of the same binary (which may be executing on different machines)
+        /// will serialize equal messages to the same bytes.</description></item>
+        /// </list>
+        /// Note the deterministic serialization is NOT canonical across languages; it is also unstable
+        /// across different builds with schema changes due to unknown fields. Users who need canonical
+        /// serialization, e.g. persistent storage in a canonical form, fingerprinting, etc, should define
+        /// their own canonicalization specification and implement the serializer using reflection APIs
+        /// rather than relying on this API.
+        /// Once set, the serializer will: (Note this is an implementation detail and may subject to
+        /// change in the future)
+        /// <list type="bullet">
+        /// <item><description>Sort map entries by keys in lexicographical order or numerical order. Note: For string
+        /// keys, the order is based on comparing the UTF-16 code unit value of each character in the strings.
+        /// The order may be different from the deterministic serialization in other languages where
+        /// maps are sorted on the lexicographical order of the UTF8 encoded keys.</description></item>
+        /// </list>
+        /// </remarks>
+        public bool Deterministic { get; set; }
+
         #region Writing of values (not including tags)
 
         /// <summary>
@@ -462,7 +489,7 @@
         #endregion
 
         #region Underlying writing primitives
-        
+
         /// <summary>
         /// Writes a 32 bit value as a varint. The fast route is taken when
         /// there's enough buffer space left to whizz through without checking
diff --git a/csharp/src/Google.Protobuf/Collections/MapField.cs b/csharp/src/Google.Protobuf/Collections/MapField.cs
index f0be958..722cc92 100644
--- a/csharp/src/Google.Protobuf/Collections/MapField.cs
+++ b/csharp/src/Google.Protobuf/Collections/MapField.cs
@@ -327,7 +327,7 @@
         /// Returns a hash code for this instance.
         /// </summary>
         /// <returns>
-        /// A hash code for this instance, suitable for use in hashing algorithms and data structures like a hash table. 
+        /// A hash code for this instance, suitable for use in hashing algorithms and data structures like a hash table.
         /// </returns>
         public override int GetHashCode()
         {
@@ -432,7 +432,13 @@
             WriteContext.Initialize(output, out WriteContext ctx);
             try
             {
-                WriteTo(ref ctx, codec);
+                IEnumerable<KeyValuePair<TKey, TValue>> listToWrite = list;
+
+                if (output.Deterministic)
+                {
+                    listToWrite = GetSortedListCopy(list);
+                }
+                WriteTo(ref ctx, codec, listToWrite);
             }
             finally
             {
@@ -440,6 +446,23 @@
             }
         }
 
+        internal IEnumerable<KeyValuePair<TKey, TValue>> GetSortedListCopy(IEnumerable<KeyValuePair<TKey, TValue>> listToSort)
+        {
+            // We can't sort the list in place, as that would invalidate the linked list.
+            // Instead, we create a new list, sort that, and then write it out.
+            var listToWrite = new List<KeyValuePair<TKey, TValue>>(listToSort);
+            listToWrite.Sort((pair1, pair2) =>
+            {
+                if (typeof(TKey) == typeof(string))
+                {
+                    // Use Ordinal, otherwise Comparer<string>.Default uses StringComparer.CurrentCulture
+                    return StringComparer.Ordinal.Compare(pair1.Key.ToString(), pair2.Key.ToString());
+                }
+                return Comparer<TKey>.Default.Compare(pair1.Key, pair2.Key);
+            });
+            return listToWrite;
+        }
+
         /// <summary>
         /// Writes the contents of this map to the given write context, using the specified codec
         /// to encode each entry.
@@ -449,7 +472,18 @@
         [SecuritySafeCritical]
         public void WriteTo(ref WriteContext ctx, Codec codec)
         {
-            foreach (var entry in list)
+            IEnumerable<KeyValuePair<TKey, TValue>> listToWrite = list;
+            if (ctx.state.CodedOutputStream?.Deterministic ?? false)
+            {
+                listToWrite = GetSortedListCopy(list);
+            }
+            WriteTo(ref ctx, codec, listToWrite);
+        }
+
+        [SecuritySafeCritical]
+        private void WriteTo(ref WriteContext ctx, Codec codec, IEnumerable<KeyValuePair<TKey, TValue>> listKvp)
+        {
+            foreach (var entry in listKvp)
             {
                 ctx.WriteTag(codec.MapTag);
 
@@ -631,7 +665,7 @@
                 this.containsCheck = containsCheck;
             }
 
-            public int Count => parent.Count; 
+            public int Count => parent.Count;
 
             public bool IsReadOnly => true;