Improve WriteString perf with SIMD
diff --git a/csharp/src/Google.Protobuf.Benchmarks/Google.Protobuf.Benchmarks.csproj b/csharp/src/Google.Protobuf.Benchmarks/Google.Protobuf.Benchmarks.csproj
index 73042f3..ac8e009a 100644
--- a/csharp/src/Google.Protobuf.Benchmarks/Google.Protobuf.Benchmarks.csproj
+++ b/csharp/src/Google.Protobuf.Benchmarks/Google.Protobuf.Benchmarks.csproj
@@ -2,7 +2,7 @@
 
   <PropertyGroup>
     <OutputType>Exe</OutputType>
-    <TargetFramework>netcoreapp3.1</TargetFramework>
+    <TargetFramework>net5.0</TargetFramework>
     <AssemblyOriginatorKeyFile>../../keys/Google.Protobuf.snk</AssemblyOriginatorKeyFile>
     <SignAssembly>true</SignAssembly>
     <IsPackable>False</IsPackable>
diff --git a/csharp/src/Google.Protobuf.Test/Buffers/ArrayBufferWriter.cs b/csharp/src/Google.Protobuf.Test/Buffers/ArrayBufferWriter.cs
index 5b9913b..a36a951 100644
--- a/csharp/src/Google.Protobuf.Test/Buffers/ArrayBufferWriter.cs
+++ b/csharp/src/Google.Protobuf.Test/Buffers/ArrayBufferWriter.cs
@@ -42,7 +42,7 @@
     /// ArrayBufferWriter is originally from corefx, and has been contributed to Protobuf
     /// https://github.com/dotnet/runtime/blob/071da4c41aa808c949a773b92dca6f88de9d11f3/src/libraries/Common/src/System/Buffers/ArrayBufferWriter.cs
     /// </summary>
-    internal sealed class ArrayBufferWriter<T> : IBufferWriter<T>
+    internal sealed class TestArrayBufferWriter<T> : IBufferWriter<T>
     {
         private T[] _buffer;
         private int _index;
@@ -50,10 +50,10 @@
         private const int DefaultInitialBufferSize = 256;
 
         /// <summary>
-        /// Creates an instance of an <see cref="ArrayBufferWriter{T}"/>, in which data can be written to,
+        /// Creates an instance of an <see cref="TestArrayBufferWriter{T}"/>, in which data can be written to,
         /// with the default initial capacity.
         /// </summary>
-        public ArrayBufferWriter()
+        public TestArrayBufferWriter()
         {
             _buffer = new T[0];
             _index = 0;
@@ -66,14 +66,14 @@
         public int? MaxGrowBy { get; set; }
 
         /// <summary>
-        /// Creates an instance of an <see cref="ArrayBufferWriter{T}"/>, in which data can be written to,
+        /// Creates an instance of an <see cref="TestArrayBufferWriter{T}"/>, in which data can be written to,
         /// with an initial capacity specified.
         /// </summary>
         /// <param name="initialCapacity">The minimum capacity with which to initialize the underlying buffer.</param>
         /// <exception cref="ArgumentException">
         /// Thrown when <paramref name="initialCapacity"/> is not positive (i.e. less than or equal to 0).
         /// </exception>
-        public ArrayBufferWriter(int initialCapacity)
+        public TestArrayBufferWriter(int initialCapacity)
         {
             if (initialCapacity <= 0)
                 throw new ArgumentException(nameof(initialCapacity));
@@ -111,7 +111,7 @@
         /// Clears the data written to the underlying buffer.
         /// </summary>
         /// <remarks>
-        /// You must clear the <see cref="ArrayBufferWriter{T}"/> before trying to re-use it.
+        /// You must clear the <see cref="TestArrayBufferWriter{T}"/> before trying to re-use it.
         /// </remarks>
         public void Clear()
         {
diff --git a/csharp/src/Google.Protobuf.Test/CodedOutputStreamTest.cs b/csharp/src/Google.Protobuf.Test/CodedOutputStreamTest.cs
index 1e5333c..1444009 100644
--- a/csharp/src/Google.Protobuf.Test/CodedOutputStreamTest.cs
+++ b/csharp/src/Google.Protobuf.Test/CodedOutputStreamTest.cs
@@ -58,7 +58,7 @@
                 Assert.AreEqual(data, rawOutput.ToArray());

 

                 // IBufferWriter

-                var bufferWriter = new ArrayBufferWriter<byte>();

+                var bufferWriter = new TestArrayBufferWriter<byte>();

                 WriteContext.Initialize(bufferWriter, out WriteContext ctx);

                 ctx.WriteUInt32((uint) value);

                 ctx.Flush();

@@ -77,7 +77,7 @@
                 Assert.AreEqual(data, rawOutput.ToArray());

 

                 // IBufferWriter

-                var bufferWriter = new ArrayBufferWriter<byte>();

+                var bufferWriter = new TestArrayBufferWriter<byte>();

                 WriteContext.Initialize(bufferWriter, out WriteContext ctx);

                 ctx.WriteUInt64(value);

                 ctx.Flush();

@@ -100,7 +100,7 @@
                     output.Flush();

                     Assert.AreEqual(data, rawOutput.ToArray());

 

-                    var bufferWriter = new ArrayBufferWriter<byte>();

+                    var bufferWriter = new TestArrayBufferWriter<byte>();

                     bufferWriter.MaxGrowBy = bufferSize;

                     WriteContext.Initialize(bufferWriter, out WriteContext ctx);

                     ctx.WriteUInt32((uint) value);

@@ -115,7 +115,7 @@
                     output.Flush();

                     Assert.AreEqual(data, rawOutput.ToArray());

 

-                    var bufferWriter = new ArrayBufferWriter<byte>();

+                    var bufferWriter = new TestArrayBufferWriter<byte>();

                     bufferWriter.MaxGrowBy = bufferSize;

                     WriteContext.Initialize(bufferWriter, out WriteContext ctx);

                     ctx.WriteUInt64(value);

@@ -174,7 +174,7 @@
                 output.Flush();

                 Assert.AreEqual(data, rawOutput.ToArray());

 

-                var bufferWriter = new ArrayBufferWriter<byte>();

+                var bufferWriter = new TestArrayBufferWriter<byte>();

                 WriteContext.Initialize(bufferWriter, out WriteContext ctx);

                 ctx.WriteFixed32(value);

                 ctx.Flush();

@@ -190,7 +190,7 @@
                 output.Flush();

                 Assert.AreEqual(data, rawOutput.ToArray());

 

-                var bufferWriter = new ArrayBufferWriter<byte>();

+                var bufferWriter = new TestArrayBufferWriter<byte>();

                 bufferWriter.MaxGrowBy = bufferSize;

                 WriteContext.Initialize(bufferWriter, out WriteContext ctx);

                 ctx.WriteFixed32(value);

@@ -212,7 +212,7 @@
                 output.Flush();

                 Assert.AreEqual(data, rawOutput.ToArray());

 

-                var bufferWriter = new ArrayBufferWriter<byte>();

+                var bufferWriter = new TestArrayBufferWriter<byte>();

                 WriteContext.Initialize(bufferWriter, out WriteContext ctx);

                 ctx.WriteFixed64(value);

                 ctx.Flush();

@@ -228,7 +228,7 @@
                 output.Flush();

                 Assert.AreEqual(data, rawOutput.ToArray());

 

-                var bufferWriter = new ArrayBufferWriter<byte>();

+                var bufferWriter = new TestArrayBufferWriter<byte>();

                 bufferWriter.MaxGrowBy = blockSize;

                 WriteContext.Initialize(bufferWriter, out WriteContext ctx);

                 ctx.WriteFixed64(value);

@@ -270,7 +270,7 @@
                 output.Flush();

                 Assert.AreEqual(rawBytes, rawOutput.ToArray());

 

-                var bufferWriter = new ArrayBufferWriter<byte>();

+                var bufferWriter = new TestArrayBufferWriter<byte>();

                 bufferWriter.MaxGrowBy = blockSize;

                 message.WriteTo(bufferWriter);

                 Assert.AreEqual(rawBytes, bufferWriter.WrittenSpan.ToArray()); 

@@ -292,7 +292,7 @@
             output.Flush();

             byte[] expectedBytes2 = expectedOutput.ToArray();

 

-            var bufferWriter = new ArrayBufferWriter<byte>();

+            var bufferWriter = new TestArrayBufferWriter<byte>();

             WriteContext.Initialize(bufferWriter, out WriteContext ctx);

             ctx.WriteMessage(message);

             ctx.Flush();

@@ -519,7 +519,21 @@
         }

 

         [Test]

-        public void WriteStringsOfDifferentSizes()

+        public void WriteString_AsciiSmall_MaxUtf8SizeExceedsBuffer()

+        {

+            var buffer = new byte[5];

+            var output = new CodedOutputStream(buffer);

+            output.WriteString("ABC");

+

+            output.Flush();

+

+            // Verify written content

+            var input = new CodedInputStream(buffer);

+            Assert.AreEqual("ABC", input.ReadString());

+        }

+

+        [Test]

+        public void WriteStringsOfDifferentSizes_Ascii()

         {

             for (int i = 1; i <= 1024; i++)

             {

@@ -540,5 +554,30 @@
                 Assert.AreEqual(s, input.ReadString());

             }

         }

+

+        [Test]

+        public void WriteStringsOfDifferentSizes_Unicode()

+        {

+            for (int i = 1; i <= 1024; i++)

+            {

+                var buffer = new byte[4096];

+                var output = new CodedOutputStream(buffer);

+                var sb = new StringBuilder();

+                for (int j = 0; j < i; j++)

+                {

+                    char c = (char)((j % 10) + 10112);

+                    sb.Append(c.ToString()); // incrementing unicode numbers, repeating

+                }

+                var s = sb.ToString();

+                output.WriteString(s);

+

+                output.Flush();

+

+                // Verify written content

+                var input = new CodedInputStream(buffer);

+

+                Assert.AreEqual(s, input.ReadString());

+            }

+        }

     }

 }
\ No newline at end of file
diff --git a/csharp/src/Google.Protobuf.Test/Google.Protobuf.Test.csproj b/csharp/src/Google.Protobuf.Test/Google.Protobuf.Test.csproj
index 7bd3f84..cdfa98e 100644
--- a/csharp/src/Google.Protobuf.Test/Google.Protobuf.Test.csproj
+++ b/csharp/src/Google.Protobuf.Test/Google.Protobuf.Test.csproj
@@ -1,7 +1,7 @@
 <Project Sdk="Microsoft.NET.Sdk">
 
   <PropertyGroup>
-    <TargetFrameworks>net451;netcoreapp2.1</TargetFrameworks>
+    <TargetFrameworks>net451;netcoreapp2.1;net50</TargetFrameworks>
     <AssemblyOriginatorKeyFile>../../keys/Google.Protobuf.snk</AssemblyOriginatorKeyFile>
     <SignAssembly>true</SignAssembly>
     <IsPackable>False</IsPackable>
diff --git a/csharp/src/Google.Protobuf.Test/JsonParserTest.cs b/csharp/src/Google.Protobuf.Test/JsonParserTest.cs
index e170fcc..87a389a 100644
--- a/csharp/src/Google.Protobuf.Test/JsonParserTest.cs
+++ b/csharp/src/Google.Protobuf.Test/JsonParserTest.cs
@@ -551,9 +551,11 @@
         }
 
         [Test]
+#if !NET5_0
         [TestCase("1.7977e308")]
         [TestCase("-1.7977e308")]
         [TestCase("1e309")]
+#endif
         [TestCase("1,0")]
         [TestCase("1.0.0")]
         [TestCase("+1")]
diff --git a/csharp/src/Google.Protobuf.Test/JsonTokenizerTest.cs b/csharp/src/Google.Protobuf.Test/JsonTokenizerTest.cs
index df43eff..55ec02e 100644
--- a/csharp/src/Google.Protobuf.Test/JsonTokenizerTest.cs
+++ b/csharp/src/Google.Protobuf.Test/JsonTokenizerTest.cs
@@ -199,8 +199,10 @@
         [TestCase("1e-")]
         [TestCase("--")]
         [TestCase("--1")]
+#if !NET5_0
         [TestCase("-1.7977e308")]
         [TestCase("1.7977e308")]
+#endif
         public void InvalidNumberValue(string json)
         {
             AssertThrowsAfter(json);
diff --git a/csharp/src/Google.Protobuf.Test/LegacyGeneratedCodeTest.cs b/csharp/src/Google.Protobuf.Test/LegacyGeneratedCodeTest.cs
index da7b4a8..22adcaa 100644
--- a/csharp/src/Google.Protobuf.Test/LegacyGeneratedCodeTest.cs
+++ b/csharp/src/Google.Protobuf.Test/LegacyGeneratedCodeTest.cs
@@ -141,7 +141,7 @@
             };
             var exception = Assert.Throws<InvalidProtocolBufferException>(() =>
             {
-                WriteContext.Initialize(new ArrayBufferWriter<byte>(), out WriteContext writeCtx);
+                WriteContext.Initialize(new TestArrayBufferWriter<byte>(), out WriteContext writeCtx);
                 ((IBufferMessage)message).InternalWriteTo(ref writeCtx);
             });
             Assert.AreEqual($"Message {typeof(LegacyGeneratedCodeMessageA).Name} doesn't provide the generated method that enables WriteContext-based serialization. You might need to regenerate the generated protobuf code.", exception.Message);
diff --git a/csharp/src/Google.Protobuf.Test/MessageParsingHelpers.cs b/csharp/src/Google.Protobuf.Test/MessageParsingHelpers.cs
index 36a2f02..65d2fe0 100644
--- a/csharp/src/Google.Protobuf.Test/MessageParsingHelpers.cs
+++ b/csharp/src/Google.Protobuf.Test/MessageParsingHelpers.cs
@@ -83,7 +83,7 @@
             var bytes = message.ToByteArray();
 
             // also serialize using IBufferWriter and check it leads to the same data
-            var bufferWriter = new ArrayBufferWriter<byte>();
+            var bufferWriter = new TestArrayBufferWriter<byte>();
             message.WriteTo(bufferWriter);
             Assert.AreEqual(bytes, bufferWriter.WrittenSpan.ToArray(), "Both serialization approaches need to result in the same data.");
 
@@ -112,7 +112,7 @@
             Assert.AreEqual(message.CalculateSize(), bytes.Length);
 
             // serialize using IBufferWriter and check it leads to the same output
-            var bufferWriter = new ArrayBufferWriter<byte>();
+            var bufferWriter = new TestArrayBufferWriter<byte>();
             message.WriteTo(bufferWriter);
             Assert.AreEqual(bytes, bufferWriter.WrittenSpan.ToArray());
 
@@ -124,7 +124,7 @@
             // test for different IBufferWriter.GetSpan() segment sizes
             for (int blockSize = 1; blockSize < 256; blockSize *= 2)
             {
-                var segmentedBufferWriter = new ArrayBufferWriter<byte>();
+                var segmentedBufferWriter = new TestArrayBufferWriter<byte>();
                 segmentedBufferWriter.MaxGrowBy = blockSize;
                 message.WriteTo(segmentedBufferWriter);
                 Assert.AreEqual(bytes, segmentedBufferWriter.WrittenSpan.ToArray());
diff --git a/csharp/src/Google.Protobuf/Google.Protobuf.csproj b/csharp/src/Google.Protobuf/Google.Protobuf.csproj
index f160634..4dbb349 100644
--- a/csharp/src/Google.Protobuf/Google.Protobuf.csproj
+++ b/csharp/src/Google.Protobuf/Google.Protobuf.csproj
@@ -1,4 +1,4 @@
-<Project Sdk="Microsoft.NET.Sdk">
+<Project Sdk="Microsoft.NET.Sdk">
 
   <PropertyGroup>
     <Description>C# runtime library for Protocol Buffers - Google's data interchange format.</Description>
@@ -8,7 +8,7 @@
     <!-- C# 7.2 is required for Span/BufferWriter/ReadOnlySequence -->
     <LangVersion>7.2</LangVersion>
     <Authors>Google Inc.</Authors>
-    <TargetFrameworks>netstandard1.1;netstandard2.0;net45</TargetFrameworks>
+    <TargetFrameworks>netstandard1.1;netstandard2.0;net45;net50</TargetFrameworks>
     <GenerateDocumentationFile>true</GenerateDocumentationFile>
     <AssemblyOriginatorKeyFile>../../keys/Google.Protobuf.snk</AssemblyOriginatorKeyFile>
     <SignAssembly>true</SignAssembly>
@@ -27,15 +27,23 @@
     <DefineConstants>$(DefineConstants);GOOGLE_PROTOBUF_SUPPORT_FAST_STRING</DefineConstants>
   </PropertyGroup>
 
+  <PropertyGroup Condition=" '$(TargetFramework)' == 'net50' ">
+    <DefineConstants>$(DefineConstants);GOOGLE_PROTOBUF_SUPPORT_FAST_STRING;GOOGLE_PROTOBUF_SIMD</DefineConstants>
+  </PropertyGroup>
+
   <ItemGroup>
-    <PackageReference Include="System.Memory" Version="4.5.3"/>
     <PackageReference Include="Microsoft.SourceLink.GitHub" PrivateAssets="All" Version="1.0.0"/>
     <!-- Needed for the net45 build to work on Unix. See https://github.com/dotnet/designs/pull/33 -->
     <PackageReference Include="Microsoft.NETFramework.ReferenceAssemblies" PrivateAssets="All" Version="1.0.0"/>
   </ItemGroup>
 
-  <!-- Needed for netcoreapp2.1 to work correctly. .NET is not able to load the assembly without this -->
+  <ItemGroup Condition=" '$(TargetFramework)' == 'net45' OR '$(TargetFramework)' == 'netstandard1.1' ">
+    <PackageReference Include="System.Memory" Version="4.5.3"/>
+  </ItemGroup>
+
   <ItemGroup Condition=" '$(TargetFramework)' == 'netstandard2.0' ">
+    <PackageReference Include="System.Memory" Version="4.5.3"/>
+    <!-- Needed for netcoreapp2.1 to work correctly. .NET is not able to load the assembly without this -->
     <PackageReference Include="System.Runtime.CompilerServices.Unsafe" Version="4.5.2"/>
   </ItemGroup>
 
diff --git a/csharp/src/Google.Protobuf/WritingPrimitives.cs b/csharp/src/Google.Protobuf/WritingPrimitives.cs
index cf8fc7b..d33a298 100644
--- a/csharp/src/Google.Protobuf/WritingPrimitives.cs
+++ b/csharp/src/Google.Protobuf/WritingPrimitives.cs
@@ -32,8 +32,14 @@
 
 using System;
 using System.Buffers.Binary;
+using System.Diagnostics;
 using System.Runtime.CompilerServices;
 using System.Runtime.InteropServices;
+#if GOOGLE_PROTOBUF_SIMD
+using System.Runtime.Intrinsics;
+using System.Runtime.Intrinsics.Arm;
+using System.Runtime.Intrinsics.X86;
+#endif
 using System.Security;
 using System.Text;
 
@@ -45,8 +51,11 @@
     [SecuritySafeCritical]
     internal static class WritingPrimitives
     {
-        // "Local" copy of Encoding.UTF8, for efficiency. (Yes, it makes a difference.)
-        internal static readonly Encoding Utf8Encoding = Encoding.UTF8;
+#if NET5_0
+        internal static Encoding Utf8Encoding => Encoding.UTF8; // allows JIT to devirtualize
+#else
+        internal static readonly Encoding Utf8Encoding = Encoding.UTF8; // "Local" copy of Encoding.UTF8, for efficiency. (Yes, it makes a difference.)
+#endif
 
         #region Writing of values (not including tags)
 
@@ -186,11 +195,7 @@
             {
                 if (length == value.Length) // Must be all ASCII...
                 {
-                    for (int i = 0; i < length; i++)
-                    {
-                        buffer[state.position + i] = (byte)value[i];
-                    }
-                    state.position += length;
+                    WriteAsciiStringToBuffer(buffer, ref state, value, length);
                 }
                 else
                 {
@@ -208,6 +213,99 @@
             }
         }
 
+        private static void WriteAsciiStringToBuffer(Span<byte> buffer, ref WriterInternalState state, string value, int length)
+        {
+            ref char sourceChars = ref MemoryMarshal.GetReference(value.AsSpan());
+            ref byte destinationBytes = ref MemoryMarshal.GetReference(buffer.Slice(state.position));
+
+            int currentIndex = 0;
+            // If 64bit, process 4 chars at a time.
+            // The logic inside this check will be elided by JIT in 32bit programs.
+            if (IntPtr.Size == 8)
+            {
+                // Need at least 4 chars available to use this optimization. 
+                if (length >= 4)
+                {
+                    ref byte sourceBytes = ref Unsafe.As<char, byte>(ref sourceChars);
+
+                    // Process 4 chars at a time until there are less than 4 remaining.
+                    // We already know all characters are ASCII so there is no need to validate the source.
+                    int lastIndexWhereCanReadFourChars = value.Length - 4;
+                    do
+                    {
+                        NarrowFourUtf16CharsToAsciiAndWriteToBuffer(
+                            ref Unsafe.AddByteOffset(ref destinationBytes, (IntPtr)currentIndex),
+                            Unsafe.ReadUnaligned<ulong>(ref Unsafe.AddByteOffset(ref sourceBytes, (IntPtr)(currentIndex * 2))));
+
+                    } while ((currentIndex += 4) <= lastIndexWhereCanReadFourChars);
+                }
+            }
+
+            // Process any remaining, 1 char at a time.
+            // Avoid bounds checking with ref + Unsafe
+            for (; currentIndex < length; currentIndex++)
+            {
+                Unsafe.AddByteOffset(ref destinationBytes, (IntPtr)currentIndex) = (byte)Unsafe.AddByteOffset(ref sourceChars, (IntPtr)(currentIndex * 2));
+            }
+
+            state.position += length;
+        }
+
+        // Copied with permission from https://github.com/dotnet/runtime/blob/1cdafd27e4afd2c916af5df949c13f8b373c4335/src/libraries/System.Private.CoreLib/src/System/Text/ASCIIUtility.cs#L1119-L1171
+        //
+        /// <summary>
+        /// Given a QWORD which represents a buffer of 4 ASCII chars in machine-endian order,
+        /// narrows each WORD to a BYTE, then writes the 4-byte result to the output buffer
+        /// also in machine-endian order.
+        /// </summary>
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        private static void NarrowFourUtf16CharsToAsciiAndWriteToBuffer(ref byte outputBuffer, ulong value)
+        {
+#if GOOGLE_PROTOBUF_SIMD
+            if (Sse2.X64.IsSupported)
+            {
+                // Narrows a vector of words [ w0 w1 w2 w3 ] to a vector of bytes
+                // [ b0 b1 b2 b3 b0 b1 b2 b3 ], then writes 4 bytes (32 bits) to the destination.
+
+                Vector128<short> vecWide = Sse2.X64.ConvertScalarToVector128UInt64(value).AsInt16();
+                Vector128<uint> vecNarrow = Sse2.PackUnsignedSaturate(vecWide, vecWide).AsUInt32();
+                Unsafe.WriteUnaligned<uint>(ref outputBuffer, Sse2.ConvertToUInt32(vecNarrow));
+            }
+            else if (AdvSimd.IsSupported)
+            {
+                // Narrows a vector of words [ w0 w1 w2 w3 ] to a vector of bytes
+                // [ b0 b1 b2 b3 * * * * ], then writes 4 bytes (32 bits) to the destination.
+
+                Vector128<short> vecWide = Vector128.CreateScalarUnsafe(value).AsInt16();
+                Vector64<byte> lower = AdvSimd.ExtractNarrowingSaturateUnsignedLower(vecWide);
+                Unsafe.WriteUnaligned<uint>(ref outputBuffer, lower.AsUInt32().ToScalar());
+            }
+            else
+#endif
+            {
+                if (BitConverter.IsLittleEndian)
+                {
+                    outputBuffer = (byte)value;
+                    value >>= 16;
+                    Unsafe.Add(ref outputBuffer, 1) = (byte)value;
+                    value >>= 16;
+                    Unsafe.Add(ref outputBuffer, 2) = (byte)value;
+                    value >>= 16;
+                    Unsafe.Add(ref outputBuffer, 3) = (byte)value;
+                }
+                else
+                {
+                    Unsafe.Add(ref outputBuffer, 3) = (byte)value;
+                    value >>= 16;
+                    Unsafe.Add(ref outputBuffer, 2) = (byte)value;
+                    value >>= 16;
+                    Unsafe.Add(ref outputBuffer, 1) = (byte)value;
+                    value >>= 16;
+                    outputBuffer = (byte)value;
+                }
+            }
+        }
+
         private static int WriteStringToBuffer(Span<byte> buffer, ref WriterInternalState state, string value)
         {
 #if NETSTANDARD1_1