| #region Copyright notice and license |
| // Protocol Buffers - Google's data interchange format |
| // Copyright 2019 Google Inc. All rights reserved. |
| // https://github.com/protocolbuffers/protobuf |
| // |
| // Redistribution and use in source and binary forms, with or without |
| // modification, are permitted provided that the following conditions are |
| // met: |
| // |
| // * Redistributions of source code must retain the above copyright |
| // notice, this list of conditions and the following disclaimer. |
| // * Redistributions in binary form must reproduce the above |
| // copyright notice, this list of conditions and the following disclaimer |
| // in the documentation and/or other materials provided with the |
| // distribution. |
| // * Neither the name of Google Inc. nor the names of its |
| // contributors may be used to endorse or promote products derived from |
| // this software without specific prior written permission. |
| // |
| // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS |
| // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT |
| // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR |
| // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT |
| // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, |
| // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT |
| // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, |
| // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY |
| // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
| // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
| // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
| #endregion |
| |
| using BenchmarkDotNet.Attributes; |
| using System; |
| using System.Buffers.Binary; |
| using System.Collections.Generic; |
| using System.IO; |
| using System.Buffers; |
| |
| namespace Google.Protobuf.Benchmarks |
| { |
| /// <summary> |
| /// Benchmarks throughput when parsing raw primitives. |
| /// </summary> |
| [MemoryDiagnoser] |
| public class ParseRawPrimitivesBenchmark |
| { |
| // key is the encodedSize of varint values |
| Dictionary<int, byte[]> varintInputBuffers; |
| |
| byte[] doubleInputBuffer; |
| byte[] floatInputBuffer; |
| byte[] fixedIntInputBuffer; |
| |
| // key is the encodedSize of string values |
| Dictionary<int, byte[]> stringInputBuffers; |
| Dictionary<int, ReadOnlySequence<byte>> stringInputBuffersSegmented; |
| |
| Random random = new Random(417384220); // random but deterministic seed |
| |
| public IEnumerable<int> StringEncodedSizes => new[] { 1, 4, 10, 105, 10080 }; |
| public IEnumerable<int> StringSegmentedEncodedSizes => new[] { 105, 10080 }; |
| |
| [GlobalSetup] |
| public void GlobalSetup() |
| { |
| // add some extra values that we won't read just to make sure we are far enough from the end of the buffer |
| // which allows the parser fastpath to always kick in. |
| const int paddingValueCount = 100; |
| |
| varintInputBuffers = new Dictionary<int, byte[]>(); |
| for (int encodedSize = 1; encodedSize <= 10; encodedSize++) |
| { |
| byte[] buffer = CreateBufferWithRandomVarints(random, BytesToParse / encodedSize, encodedSize, paddingValueCount); |
| varintInputBuffers.Add(encodedSize, buffer); |
| } |
| |
| doubleInputBuffer = CreateBufferWithRandomDoubles(random, BytesToParse / sizeof(double), paddingValueCount); |
| floatInputBuffer = CreateBufferWithRandomFloats(random, BytesToParse / sizeof(float), paddingValueCount); |
| fixedIntInputBuffer = CreateBufferWithRandomData(random, BytesToParse / sizeof(long), sizeof(long), paddingValueCount); |
| |
| stringInputBuffers = new Dictionary<int, byte[]>(); |
| foreach (var encodedSize in StringEncodedSizes) |
| { |
| byte[] buffer = CreateBufferWithStrings(BytesToParse / encodedSize, encodedSize, encodedSize < 10 ? 10 : 1 ); |
| stringInputBuffers.Add(encodedSize, buffer); |
| } |
| |
| stringInputBuffersSegmented = new Dictionary<int, ReadOnlySequence<byte>>(); |
| foreach (var encodedSize in StringSegmentedEncodedSizes) |
| { |
| byte[] buffer = CreateBufferWithStrings(BytesToParse / encodedSize, encodedSize, encodedSize < 10 ? 10 : 1); |
| stringInputBuffersSegmented.Add(encodedSize, ReadOnlySequenceFactory.CreateWithContent(buffer, segmentSize: 128, addEmptySegmentDelimiters: false)); |
| } |
| } |
| |
| // Total number of bytes that each benchmark will parse. |
| // Measuring the time taken to parse buffer of given size makes it easier to compare parsing speed for different |
| // types and makes it easy to calculate the througput (in MB/s) |
| // 10800 bytes is chosen because it is divisible by all possible encoded sizes for all primitive types {1..10} |
| [Params(10080)] |
| public int BytesToParse { get; set; } |
| |
| [Benchmark] |
| [Arguments(1)] |
| [Arguments(2)] |
| [Arguments(3)] |
| [Arguments(4)] |
| [Arguments(5)] |
| public int ParseRawVarint32_CodedInputStream(int encodedSize) |
| { |
| CodedInputStream cis = new CodedInputStream(varintInputBuffers[encodedSize]); |
| int sum = 0; |
| for (int i = 0; i < BytesToParse / encodedSize; i++) |
| { |
| sum += cis.ReadInt32(); |
| } |
| return sum; |
| } |
| |
| [Benchmark] |
| [Arguments(1)] |
| [Arguments(2)] |
| [Arguments(3)] |
| [Arguments(4)] |
| [Arguments(5)] |
| public int ParseRawVarint32_ParseContext(int encodedSize) |
| { |
| InitializeParseContext(varintInputBuffers[encodedSize], out ParseContext ctx); |
| int sum = 0; |
| for (int i = 0; i < BytesToParse / encodedSize; i++) |
| { |
| sum += ctx.ReadInt32(); |
| } |
| return sum; |
| } |
| |
| [Benchmark] |
| [Arguments(1)] |
| [Arguments(2)] |
| [Arguments(3)] |
| [Arguments(4)] |
| [Arguments(5)] |
| [Arguments(6)] |
| [Arguments(7)] |
| [Arguments(8)] |
| [Arguments(9)] |
| [Arguments(10)] |
| public long ParseRawVarint64_CodedInputStream(int encodedSize) |
| { |
| CodedInputStream cis = new CodedInputStream(varintInputBuffers[encodedSize]); |
| long sum = 0; |
| for (int i = 0; i < BytesToParse / encodedSize; i++) |
| { |
| sum += cis.ReadInt64(); |
| } |
| return sum; |
| } |
| |
| [Benchmark] |
| [Arguments(1)] |
| [Arguments(2)] |
| [Arguments(3)] |
| [Arguments(4)] |
| [Arguments(5)] |
| [Arguments(6)] |
| [Arguments(7)] |
| [Arguments(8)] |
| [Arguments(9)] |
| [Arguments(10)] |
| public long ParseRawVarint64_ParseContext(int encodedSize) |
| { |
| InitializeParseContext(varintInputBuffers[encodedSize], out ParseContext ctx); |
| long sum = 0; |
| for (int i = 0; i < BytesToParse / encodedSize; i++) |
| { |
| sum += ctx.ReadInt64(); |
| } |
| return sum; |
| } |
| |
| [Benchmark] |
| public uint ParseFixed32_CodedInputStream() |
| { |
| const int encodedSize = sizeof(uint); |
| CodedInputStream cis = new CodedInputStream(fixedIntInputBuffer); |
| uint sum = 0; |
| for (uint i = 0; i < BytesToParse / encodedSize; i++) |
| { |
| sum += cis.ReadFixed32(); |
| } |
| return sum; |
| } |
| |
| [Benchmark] |
| public uint ParseFixed32_ParseContext() |
| { |
| const int encodedSize = sizeof(uint); |
| InitializeParseContext(fixedIntInputBuffer, out ParseContext ctx); |
| uint sum = 0; |
| for (uint i = 0; i < BytesToParse / encodedSize; i++) |
| { |
| sum += ctx.ReadFixed32(); |
| } |
| return sum; |
| } |
| |
| [Benchmark] |
| public ulong ParseFixed64_CodedInputStream() |
| { |
| const int encodedSize = sizeof(ulong); |
| CodedInputStream cis = new CodedInputStream(fixedIntInputBuffer); |
| ulong sum = 0; |
| for (int i = 0; i < BytesToParse / encodedSize; i++) |
| { |
| sum += cis.ReadFixed64(); |
| } |
| return sum; |
| } |
| |
| [Benchmark] |
| public ulong ParseFixed64_ParseContext() |
| { |
| const int encodedSize = sizeof(ulong); |
| InitializeParseContext(fixedIntInputBuffer, out ParseContext ctx); |
| ulong sum = 0; |
| for (int i = 0; i < BytesToParse / encodedSize; i++) |
| { |
| sum += ctx.ReadFixed64(); |
| } |
| return sum; |
| } |
| |
| [Benchmark] |
| public float ParseRawFloat_CodedInputStream() |
| { |
| const int encodedSize = sizeof(float); |
| CodedInputStream cis = new CodedInputStream(floatInputBuffer); |
| float sum = 0; |
| for (int i = 0; i < BytesToParse / encodedSize; i++) |
| { |
| sum += cis.ReadFloat(); |
| } |
| return sum; |
| } |
| |
| [Benchmark] |
| public float ParseRawFloat_ParseContext() |
| { |
| const int encodedSize = sizeof(float); |
| InitializeParseContext(floatInputBuffer, out ParseContext ctx); |
| float sum = 0; |
| for (int i = 0; i < BytesToParse / encodedSize; i++) |
| { |
| sum += ctx.ReadFloat(); |
| } |
| return sum; |
| } |
| |
| [Benchmark] |
| public double ParseRawDouble_CodedInputStream() |
| { |
| const int encodedSize = sizeof(double); |
| CodedInputStream cis = new CodedInputStream(doubleInputBuffer); |
| double sum = 0; |
| for (int i = 0; i < BytesToParse / encodedSize; i++) |
| { |
| sum += cis.ReadDouble(); |
| } |
| return sum; |
| } |
| |
| [Benchmark] |
| public double ParseRawDouble_ParseContext() |
| { |
| const int encodedSize = sizeof(double); |
| InitializeParseContext(doubleInputBuffer, out ParseContext ctx); |
| double sum = 0; |
| for (int i = 0; i < BytesToParse / encodedSize; i++) |
| { |
| sum += ctx.ReadDouble(); |
| } |
| return sum; |
| } |
| |
| [Benchmark] |
| [ArgumentsSource(nameof(StringEncodedSizes))] |
| public int ParseString_CodedInputStream(int encodedSize) |
| { |
| CodedInputStream cis = new CodedInputStream(stringInputBuffers[encodedSize]); |
| int sum = 0; |
| for (int i = 0; i < BytesToParse / encodedSize; i++) |
| { |
| sum += cis.ReadString().Length; |
| } |
| return sum; |
| } |
| |
| [Benchmark] |
| [ArgumentsSource(nameof(StringEncodedSizes))] |
| public int ParseString_ParseContext(int encodedSize) |
| { |
| InitializeParseContext(stringInputBuffers[encodedSize], out ParseContext ctx); |
| int sum = 0; |
| for (int i = 0; i < BytesToParse / encodedSize; i++) |
| { |
| sum += ctx.ReadString().Length; |
| } |
| return sum; |
| } |
| |
| [Benchmark] |
| [ArgumentsSource(nameof(StringSegmentedEncodedSizes))] |
| public int ParseString_ParseContext_MultipleSegments(int encodedSize) |
| { |
| InitializeParseContext(stringInputBuffersSegmented[encodedSize], out ParseContext ctx); |
| int sum = 0; |
| for (int i = 0; i < BytesToParse / encodedSize; i++) |
| { |
| sum += ctx.ReadString().Length; |
| } |
| return sum; |
| } |
| |
| [Benchmark] |
| [ArgumentsSource(nameof(StringEncodedSizes))] |
| public int ParseBytes_CodedInputStream(int encodedSize) |
| { |
| CodedInputStream cis = new CodedInputStream(stringInputBuffers[encodedSize]); |
| int sum = 0; |
| for (int i = 0; i < BytesToParse / encodedSize; i++) |
| { |
| sum += cis.ReadBytes().Length; |
| } |
| return sum; |
| } |
| |
| [Benchmark] |
| [ArgumentsSource(nameof(StringEncodedSizes))] |
| public int ParseBytes_ParseContext(int encodedSize) |
| { |
| InitializeParseContext(stringInputBuffers[encodedSize], out ParseContext ctx); |
| int sum = 0; |
| for (int i = 0; i < BytesToParse / encodedSize; i++) |
| { |
| sum += ctx.ReadBytes().Length; |
| } |
| return sum; |
| } |
| |
| [Benchmark] |
| [ArgumentsSource(nameof(StringSegmentedEncodedSizes))] |
| public int ParseBytes_ParseContext_MultipleSegments(int encodedSize) |
| { |
| InitializeParseContext(stringInputBuffersSegmented[encodedSize], out ParseContext ctx); |
| int sum = 0; |
| for (int i = 0; i < BytesToParse / encodedSize; i++) |
| { |
| sum += ctx.ReadBytes().Length; |
| } |
| return sum; |
| } |
| |
| private static void InitializeParseContext(byte[] buffer, out ParseContext ctx) |
| { |
| ParseContext.Initialize(new ReadOnlySequence<byte>(buffer), out ctx); |
| } |
| |
| private static void InitializeParseContext(ReadOnlySequence<byte> buffer, out ParseContext ctx) |
| { |
| ParseContext.Initialize(buffer, out ctx); |
| } |
| |
| private static byte[] CreateBufferWithRandomVarints(Random random, int valueCount, int encodedSize, int paddingValueCount) |
| { |
| MemoryStream ms = new MemoryStream(); |
| CodedOutputStream cos = new CodedOutputStream(ms); |
| for (int i = 0; i < valueCount + paddingValueCount; i++) |
| { |
| cos.WriteUInt64(RandomUnsignedVarint(random, encodedSize, false)); |
| } |
| cos.Flush(); |
| var buffer = ms.ToArray(); |
| |
| if (buffer.Length != encodedSize * (valueCount + paddingValueCount)) |
| { |
| throw new InvalidOperationException($"Unexpected output buffer length {buffer.Length}"); |
| } |
| return buffer; |
| } |
| |
| private static byte[] CreateBufferWithRandomFloats(Random random, int valueCount, int paddingValueCount) |
| { |
| MemoryStream ms = new MemoryStream(); |
| CodedOutputStream cos = new CodedOutputStream(ms); |
| for (int i = 0; i < valueCount + paddingValueCount; i++) |
| { |
| cos.WriteFloat((float)random.NextDouble()); |
| } |
| cos.Flush(); |
| var buffer = ms.ToArray(); |
| return buffer; |
| } |
| |
| private static byte[] CreateBufferWithRandomDoubles(Random random, int valueCount, int paddingValueCount) |
| { |
| MemoryStream ms = new MemoryStream(); |
| CodedOutputStream cos = new CodedOutputStream(ms); |
| for (int i = 0; i < valueCount + paddingValueCount; i++) |
| { |
| cos.WriteDouble(random.NextDouble()); |
| } |
| cos.Flush(); |
| var buffer = ms.ToArray(); |
| return buffer; |
| } |
| |
| private static byte[] CreateBufferWithRandomData(Random random, int valueCount, int encodedSize, int paddingValueCount) |
| { |
| int bufferSize = (valueCount + paddingValueCount) * encodedSize; |
| byte[] buffer = new byte[bufferSize]; |
| random.NextBytes(buffer); |
| return buffer; |
| } |
| |
| /// <summary> |
| /// Generate a random value that will take exactly "encodedSize" bytes when varint-encoded. |
| /// </summary> |
| public static ulong RandomUnsignedVarint(Random random, int encodedSize, bool fitsIn32Bits) |
| { |
| Span<byte> randomBytesBuffer = stackalloc byte[8]; |
| |
| if (encodedSize < 1 || encodedSize > 10 || (fitsIn32Bits && encodedSize > 5)) |
| { |
| throw new ArgumentException("Illegal encodedSize value requested", nameof(encodedSize)); |
| } |
| const int bitsPerByte = 7; |
| |
| ulong result = 0; |
| while (true) |
| { |
| random.NextBytes(randomBytesBuffer); |
| ulong randomValue = BinaryPrimitives.ReadUInt64LittleEndian(randomBytesBuffer); |
| |
| // only use the number of random bits we need |
| ulong bitmask = encodedSize < 10 ? ((1UL << (encodedSize * bitsPerByte)) - 1) : ulong.MaxValue; |
| result = randomValue & bitmask; |
| |
| if (fitsIn32Bits) |
| { |
| // make sure the resulting value is representable by a uint. |
| result &= uint.MaxValue; |
| } |
| |
| if (encodedSize == 10) |
| { |
| // for 10-byte values the highest bit always needs to be set (7*9=63) |
| result |= ulong.MaxValue; |
| break; |
| } |
| |
| // some random values won't require the full "encodedSize" bytes, check that at least |
| // one of the top 7 bits is set. Retrying is fine since it only happens rarely |
| if (encodedSize == 1 || (result & (0x7FUL << ((encodedSize - 1) * bitsPerByte))) != 0) |
| { |
| break; |
| } |
| } |
| return result; |
| } |
| |
| private static byte[] CreateBufferWithStrings(int valueCount, int encodedSize, int paddingValueCount) |
| { |
| var str = CreateStringWithEncodedSize(encodedSize); |
| |
| MemoryStream ms = new MemoryStream(); |
| CodedOutputStream cos = new CodedOutputStream(ms); |
| for (int i = 0; i < valueCount + paddingValueCount; i++) |
| { |
| cos.WriteString(str); |
| } |
| cos.Flush(); |
| var buffer = ms.ToArray(); |
| |
| if (buffer.Length != encodedSize * (valueCount + paddingValueCount)) |
| { |
| throw new InvalidOperationException($"Unexpected output buffer length {buffer.Length}"); |
| } |
| return buffer; |
| } |
| |
| public static string CreateStringWithEncodedSize(int encodedSize) |
| { |
| var str = new string('a', encodedSize); |
| while (CodedOutputStream.ComputeStringSize(str) > encodedSize) |
| { |
| str = str.Substring(1); |
| } |
| |
| if (CodedOutputStream.ComputeStringSize(str) != encodedSize) |
| { |
| throw new InvalidOperationException($"Generated string with wrong encodedSize"); |
| } |
| return str; |
| } |
| |
| public static string CreateNonAsciiStringWithEncodedSize(int encodedSize) |
| { |
| if (encodedSize < 3) |
| { |
| throw new ArgumentException("Illegal encoded size for a string with non-ascii chars."); |
| } |
| var twoByteChar = '\u00DC'; // U-umlaut, UTF8 encoding has 2 bytes |
| var str = new string(twoByteChar, encodedSize / 2); |
| while (CodedOutputStream.ComputeStringSize(str) > encodedSize) |
| { |
| str = str.Substring(1); |
| } |
| |
| // add padding of ascii characters to reach the desired encoded size. |
| while (CodedOutputStream.ComputeStringSize(str) < encodedSize) |
| { |
| str += 'a'; |
| } |
| |
| // Note that for a few specific encodedSize values, it might be impossible to generate |
| // the string with the desired encodedSize using the algorithm above. For testing purposes, checking that |
| // the encoded size we got is actually correct is good enough. |
| if (CodedOutputStream.ComputeStringSize(str) != encodedSize) |
| { |
| throw new InvalidOperationException($"Generated string with wrong encodedSize"); |
| } |
| return str; |
| } |
| } |
| } |