在 C# 中连接 ReadOnlySpan<char> 的最快方法

Fastest way to concatenate ReadOnlySpan<char> in C#

如果我已经只有 ReadOnlySpan 切片,那么连接字符串的最有效方法是什么?

简化示例:

public class Program {
    public string ConcatSpans(string longstring) {
        var span = longstring.AsSpan();
        var sb = new StringBuilder(longstring.Length);
        sb.Append(span.Slice(40, 10));
        sb.Append(span.Slice(30, 10));
        sb.Append(span.Slice(20, 10));
        sb.Append(span.Slice(10, 10));
        sb.Append(span.Slice(0, 10));
        return sb.ToString();
    }

    [Benchmark]
    public void ConcatSpansBenchmark() {
        ConcatSpans("aaaaaaaaaabbbbbbbbbbccccccccccddddddddddeeeeeeeeee");
    }

    public static void Main(string[] args) {
        var summary = BenchmarkRunner.Run<Program>();
    }
}

结果:

BenchmarkDotNet=v0.11.2, OS=Windows 10.0.17134.345 (1803/April2018Update/Redstone4)
Intel Core i5-2500K CPU 3.30GHz (Sandy Bridge), 1 CPU, 4 logical and 4 physical cores
.NET Core SDK=2.1.403
  [Host]     : .NET Core 2.1.5 (CoreCLR 4.6.26919.02, CoreFX 4.6.26919.02), 64bit RyuJIT
  DefaultJob : .NET Core 2.1.5 (CoreCLR 4.6.26919.02, CoreFX 4.6.26919.02), 64bit RyuJIT


               Method |     Mean |    Error |   StdDev | Gen 0/1k Op | Gen 1/1k Op | Gen 2/1k Op | Allocated Memory/Op |
--------------------- |---------:|---------:|---------:|------------:|------------:|------------:|--------------------:|
 ConcatSpansBenchmark | 126.6 ns | 1.712 ns | 1.601 ns |      0.0966 |           - |           - |               304 B |

StringBuilder真的是我们能做到的最好的吗?有没有比这更快的方法?分配更少?毕竟StringBuilder对象本身就是堆对象

如果有一个 ref struct StringBuilder 只会保留对 ReadOnlySpans 的引用并且在最后的 ToString 中只分配一个字符串对象?

编辑:作为 ,较新的 string.Create 方法是在其存在的平台上执行此操作的方法。


具有多个(但已知)输入范围的场景非常适合 "preallocate a dummy string, then pretend that strings are mutable and overwrite it before exposing it to the world" 场景。这看起来很粗糙,但是在处理字符串时(尤其是来自不连续的缓冲区等),这个技巧在 IO 代码中很常见,所以它很好理解和支持。

我们开始(编辑:现在添加了 "hybrid" 方法,避免了所有 Slice() 调用,不需要 unsafe):

                        Method |     Mean |     Error |    StdDev |   Median |
------------------------------ |---------:|----------:|----------:|---------:|
          ConcatSpansBenchmark | 97.17 ns | 2.1335 ns | 4.0072 ns | 97.20 ns |
       OverwiteStringBenchmark | 63.34 ns | 1.2914 ns | 2.0854 ns | 62.29 ns |
      UnsafeOverwriteBenchmark | 17.95 ns | 0.3697 ns | 0.3796 ns | 17.80 ns |
 OverwiteStringHybridBenchmark | 53.59 ns | 0.5534 ns | 0.5176 ns | 53.49 ns |

注意:任何涉及 MemoryMarshal.*Unsafe.*unsafe 关键字的内容都是明确的 "I know what I'm doing... anything that exploded is probably my fault".

代码:

using BenchmarkDotNet.Attributes;
using BenchmarkDotNet.Running;
using System;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;
using System.Text;

public class Program
{
    public string ConcatSpans(string longstring)
    {
        var span = longstring.AsSpan();
        var sb = new StringBuilder(longstring.Length);
        sb.Append(span.Slice(40, 10));
        sb.Append(span.Slice(30, 10));
        sb.Append(span.Slice(20, 10));
        sb.Append(span.Slice(10, 10));
        sb.Append(span.Slice(0, 10));
        return sb.ToString();
    }

    public string OverwiteString(string longstring)
    {
        var span = longstring.AsSpan();
        var s = new string('[=11=]', longstring.Length);
        var writeable = MemoryMarshal.AsMemory(s.AsMemory()).Span;
        span.Slice(40, 10).CopyTo(writeable);
        writeable = writeable.Slice(10);
        span.Slice(30, 10).CopyTo(writeable);
        writeable = writeable.Slice(10);
        span.Slice(20, 10).CopyTo(writeable);
        writeable = writeable.Slice(10);
        span.Slice(10, 10).CopyTo(writeable);
        writeable = writeable.Slice(10);
        span.Slice(0, 10).CopyTo(writeable);
        return s;
    }

    public string OverwiteStringHybrid(string longstring)
    {
        var source = MemoryMarshal.AsBytes(MemoryMarshal.AsMemory(longstring.AsMemory()).Span);
        var s = new string('[=11=]', longstring.Length);
        var target = MemoryMarshal.AsBytes(MemoryMarshal.AsMemory(s.AsMemory()).Span);

        Unsafe.CopyBlock(ref target[0], ref source[40 * sizeof(char)], 10 * sizeof(char));
        Unsafe.CopyBlock(ref target[10], ref source[30 * sizeof(char)], 10 * sizeof(char));
        Unsafe.CopyBlock(ref target[20], ref source[20 * sizeof(char)], 10 * sizeof(char));
        Unsafe.CopyBlock(ref target[30], ref source[10 * sizeof(char)], 10 * sizeof(char));
        Unsafe.CopyBlock(ref target[40], ref source[0], 10 * sizeof(char));

        return s;
    }

    public unsafe string UnsafeOverwrite(string longstring)
    {
        var s = new string('[=11=]', longstring.Length);
        fixed (char* source = longstring)
        fixed (char* target = s)
        {
            Unsafe.CopyBlock(target, source + 40, 10 * sizeof(char));
            Unsafe.CopyBlock(target + 10, source + 30, 10 * sizeof(char));
            Unsafe.CopyBlock(target + 20, source + 20, 10 * sizeof(char));
            Unsafe.CopyBlock(target + 30, source + 10, 10 * sizeof(char));
            Unsafe.CopyBlock(target + 40, source, 10 * sizeof(char));
        }
        return s;
    }

    [Benchmark]
    public void ConcatSpansBenchmark()
        => ConcatSpans("aaaaaaaaaabbbbbbbbbbccccccccccddddddddddeeeeeeeeee");
    [Benchmark]
    public void OverwiteStringBenchmark()
    => OverwiteString("aaaaaaaaaabbbbbbbbbbccccccccccddddddddddeeeeeeeeee");
    [Benchmark]
    public void UnsafeOverwriteBenchmark()
    => UnsafeOverwrite("aaaaaaaaaabbbbbbbbbbccccccccccddddddddddeeeeeeeeee");

    [Benchmark]
    public void OverwiteStringHybridBenchmark()
    => OverwiteStringHybrid("aaaaaaaaaabbbbbbbbbbccccccccccddddddddddeeeeeeeeee");

    public static void Main(string[] args)
        => BenchmarkRunner.Run<Program>();
}

注意:在一般情况下 - 从切片中获取 unsafe 代码:

使用 C# 7.3:

fixed(char* p = theSpan)
{
    ...
}

否则:

fixed(char* p = &MemoryMarshal.GetReference(theSpan))
{

}