在 C# 中连接 ReadOnlySpan<char> 的最快方法
Fastest way to concatenate ReadOnlySpan<char> in C#
如果我已经只有 ReadOnlySpan 切片,那么连接字符串的最有效方法是什么?
简化示例:
public class Program {
public string ConcatSpans(string longstring) {
var span = longstring.AsSpan();
var sb = new StringBuilder(longstring.Length);
sb.Append(span.Slice(40, 10));
sb.Append(span.Slice(30, 10));
sb.Append(span.Slice(20, 10));
sb.Append(span.Slice(10, 10));
sb.Append(span.Slice(0, 10));
return sb.ToString();
}
[Benchmark]
public void ConcatSpansBenchmark() {
ConcatSpans("aaaaaaaaaabbbbbbbbbbccccccccccddddddddddeeeeeeeeee");
}
public static void Main(string[] args) {
var summary = BenchmarkRunner.Run<Program>();
}
}
结果:
BenchmarkDotNet=v0.11.2, OS=Windows 10.0.17134.345 (1803/April2018Update/Redstone4)
Intel Core i5-2500K CPU 3.30GHz (Sandy Bridge), 1 CPU, 4 logical and 4 physical cores
.NET Core SDK=2.1.403
[Host] : .NET Core 2.1.5 (CoreCLR 4.6.26919.02, CoreFX 4.6.26919.02), 64bit RyuJIT
DefaultJob : .NET Core 2.1.5 (CoreCLR 4.6.26919.02, CoreFX 4.6.26919.02), 64bit RyuJIT
Method | Mean | Error | StdDev | Gen 0/1k Op | Gen 1/1k Op | Gen 2/1k Op | Allocated Memory/Op |
--------------------- |---------:|---------:|---------:|------------:|------------:|------------:|--------------------:|
ConcatSpansBenchmark | 126.6 ns | 1.712 ns | 1.601 ns | 0.0966 | - | - | 304 B |
StringBuilder
真的是我们能做到的最好的吗?有没有比这更快的方法?分配更少?毕竟StringBuilder
对象本身就是堆对象
如果有一个 ref struct
StringBuilder
只会保留对 ReadOnlySpans
的引用并且在最后的 ToString
中只分配一个字符串对象?
编辑:作为 ,较新的 string.Create
方法是在其存在的平台上执行此操作的方法。
具有多个(但已知)输入范围的场景非常适合 "preallocate a dummy string, then pretend that strings are mutable and overwrite it before exposing it to the world" 场景。这看起来很粗糙,但是在处理字符串时(尤其是来自不连续的缓冲区等),这个技巧在 IO 代码中很常见,所以它很好理解和支持。
我们开始(编辑:现在添加了 "hybrid" 方法,避免了所有 Slice()
调用,不需要 unsafe
):
Method | Mean | Error | StdDev | Median |
------------------------------ |---------:|----------:|----------:|---------:|
ConcatSpansBenchmark | 97.17 ns | 2.1335 ns | 4.0072 ns | 97.20 ns |
OverwiteStringBenchmark | 63.34 ns | 1.2914 ns | 2.0854 ns | 62.29 ns |
UnsafeOverwriteBenchmark | 17.95 ns | 0.3697 ns | 0.3796 ns | 17.80 ns |
OverwiteStringHybridBenchmark | 53.59 ns | 0.5534 ns | 0.5176 ns | 53.49 ns |
注意:任何涉及 MemoryMarshal.*
、Unsafe.*
或 unsafe
关键字的内容都是明确的 "I know what I'm doing... anything that exploded is probably my fault".
代码:
using BenchmarkDotNet.Attributes;
using BenchmarkDotNet.Running;
using System;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;
using System.Text;
public class Program
{
public string ConcatSpans(string longstring)
{
var span = longstring.AsSpan();
var sb = new StringBuilder(longstring.Length);
sb.Append(span.Slice(40, 10));
sb.Append(span.Slice(30, 10));
sb.Append(span.Slice(20, 10));
sb.Append(span.Slice(10, 10));
sb.Append(span.Slice(0, 10));
return sb.ToString();
}
public string OverwiteString(string longstring)
{
var span = longstring.AsSpan();
var s = new string('[=11=]', longstring.Length);
var writeable = MemoryMarshal.AsMemory(s.AsMemory()).Span;
span.Slice(40, 10).CopyTo(writeable);
writeable = writeable.Slice(10);
span.Slice(30, 10).CopyTo(writeable);
writeable = writeable.Slice(10);
span.Slice(20, 10).CopyTo(writeable);
writeable = writeable.Slice(10);
span.Slice(10, 10).CopyTo(writeable);
writeable = writeable.Slice(10);
span.Slice(0, 10).CopyTo(writeable);
return s;
}
public string OverwiteStringHybrid(string longstring)
{
var source = MemoryMarshal.AsBytes(MemoryMarshal.AsMemory(longstring.AsMemory()).Span);
var s = new string('[=11=]', longstring.Length);
var target = MemoryMarshal.AsBytes(MemoryMarshal.AsMemory(s.AsMemory()).Span);
Unsafe.CopyBlock(ref target[0], ref source[40 * sizeof(char)], 10 * sizeof(char));
Unsafe.CopyBlock(ref target[10], ref source[30 * sizeof(char)], 10 * sizeof(char));
Unsafe.CopyBlock(ref target[20], ref source[20 * sizeof(char)], 10 * sizeof(char));
Unsafe.CopyBlock(ref target[30], ref source[10 * sizeof(char)], 10 * sizeof(char));
Unsafe.CopyBlock(ref target[40], ref source[0], 10 * sizeof(char));
return s;
}
public unsafe string UnsafeOverwrite(string longstring)
{
var s = new string('[=11=]', longstring.Length);
fixed (char* source = longstring)
fixed (char* target = s)
{
Unsafe.CopyBlock(target, source + 40, 10 * sizeof(char));
Unsafe.CopyBlock(target + 10, source + 30, 10 * sizeof(char));
Unsafe.CopyBlock(target + 20, source + 20, 10 * sizeof(char));
Unsafe.CopyBlock(target + 30, source + 10, 10 * sizeof(char));
Unsafe.CopyBlock(target + 40, source, 10 * sizeof(char));
}
return s;
}
[Benchmark]
public void ConcatSpansBenchmark()
=> ConcatSpans("aaaaaaaaaabbbbbbbbbbccccccccccddddddddddeeeeeeeeee");
[Benchmark]
public void OverwiteStringBenchmark()
=> OverwiteString("aaaaaaaaaabbbbbbbbbbccccccccccddddddddddeeeeeeeeee");
[Benchmark]
public void UnsafeOverwriteBenchmark()
=> UnsafeOverwrite("aaaaaaaaaabbbbbbbbbbccccccccccddddddddddeeeeeeeeee");
[Benchmark]
public void OverwiteStringHybridBenchmark()
=> OverwiteStringHybrid("aaaaaaaaaabbbbbbbbbbccccccccccddddddddddeeeeeeeeee");
public static void Main(string[] args)
=> BenchmarkRunner.Run<Program>();
}
注意:在一般情况下 - 从切片中获取 unsafe
代码:
使用 C# 7.3:
fixed(char* p = theSpan)
{
...
}
否则:
fixed(char* p = &MemoryMarshal.GetReference(theSpan))
{
}
如果我已经只有 ReadOnlySpan 切片,那么连接字符串的最有效方法是什么?
简化示例:
public class Program {
public string ConcatSpans(string longstring) {
var span = longstring.AsSpan();
var sb = new StringBuilder(longstring.Length);
sb.Append(span.Slice(40, 10));
sb.Append(span.Slice(30, 10));
sb.Append(span.Slice(20, 10));
sb.Append(span.Slice(10, 10));
sb.Append(span.Slice(0, 10));
return sb.ToString();
}
[Benchmark]
public void ConcatSpansBenchmark() {
ConcatSpans("aaaaaaaaaabbbbbbbbbbccccccccccddddddddddeeeeeeeeee");
}
public static void Main(string[] args) {
var summary = BenchmarkRunner.Run<Program>();
}
}
结果:
BenchmarkDotNet=v0.11.2, OS=Windows 10.0.17134.345 (1803/April2018Update/Redstone4)
Intel Core i5-2500K CPU 3.30GHz (Sandy Bridge), 1 CPU, 4 logical and 4 physical cores
.NET Core SDK=2.1.403
[Host] : .NET Core 2.1.5 (CoreCLR 4.6.26919.02, CoreFX 4.6.26919.02), 64bit RyuJIT
DefaultJob : .NET Core 2.1.5 (CoreCLR 4.6.26919.02, CoreFX 4.6.26919.02), 64bit RyuJIT
Method | Mean | Error | StdDev | Gen 0/1k Op | Gen 1/1k Op | Gen 2/1k Op | Allocated Memory/Op |
--------------------- |---------:|---------:|---------:|------------:|------------:|------------:|--------------------:|
ConcatSpansBenchmark | 126.6 ns | 1.712 ns | 1.601 ns | 0.0966 | - | - | 304 B |
StringBuilder
真的是我们能做到的最好的吗?有没有比这更快的方法?分配更少?毕竟StringBuilder
对象本身就是堆对象
如果有一个 ref struct
StringBuilder
只会保留对 ReadOnlySpans
的引用并且在最后的 ToString
中只分配一个字符串对象?
编辑:作为 string.Create
方法是在其存在的平台上执行此操作的方法。
具有多个(但已知)输入范围的场景非常适合 "preallocate a dummy string, then pretend that strings are mutable and overwrite it before exposing it to the world" 场景。这看起来很粗糙,但是在处理字符串时(尤其是来自不连续的缓冲区等),这个技巧在 IO 代码中很常见,所以它很好理解和支持。
我们开始(编辑:现在添加了 "hybrid" 方法,避免了所有 Slice()
调用,不需要 unsafe
):
Method | Mean | Error | StdDev | Median |
------------------------------ |---------:|----------:|----------:|---------:|
ConcatSpansBenchmark | 97.17 ns | 2.1335 ns | 4.0072 ns | 97.20 ns |
OverwiteStringBenchmark | 63.34 ns | 1.2914 ns | 2.0854 ns | 62.29 ns |
UnsafeOverwriteBenchmark | 17.95 ns | 0.3697 ns | 0.3796 ns | 17.80 ns |
OverwiteStringHybridBenchmark | 53.59 ns | 0.5534 ns | 0.5176 ns | 53.49 ns |
注意:任何涉及 MemoryMarshal.*
、Unsafe.*
或 unsafe
关键字的内容都是明确的 "I know what I'm doing... anything that exploded is probably my fault".
代码:
using BenchmarkDotNet.Attributes;
using BenchmarkDotNet.Running;
using System;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;
using System.Text;
public class Program
{
public string ConcatSpans(string longstring)
{
var span = longstring.AsSpan();
var sb = new StringBuilder(longstring.Length);
sb.Append(span.Slice(40, 10));
sb.Append(span.Slice(30, 10));
sb.Append(span.Slice(20, 10));
sb.Append(span.Slice(10, 10));
sb.Append(span.Slice(0, 10));
return sb.ToString();
}
public string OverwiteString(string longstring)
{
var span = longstring.AsSpan();
var s = new string('[=11=]', longstring.Length);
var writeable = MemoryMarshal.AsMemory(s.AsMemory()).Span;
span.Slice(40, 10).CopyTo(writeable);
writeable = writeable.Slice(10);
span.Slice(30, 10).CopyTo(writeable);
writeable = writeable.Slice(10);
span.Slice(20, 10).CopyTo(writeable);
writeable = writeable.Slice(10);
span.Slice(10, 10).CopyTo(writeable);
writeable = writeable.Slice(10);
span.Slice(0, 10).CopyTo(writeable);
return s;
}
public string OverwiteStringHybrid(string longstring)
{
var source = MemoryMarshal.AsBytes(MemoryMarshal.AsMemory(longstring.AsMemory()).Span);
var s = new string('[=11=]', longstring.Length);
var target = MemoryMarshal.AsBytes(MemoryMarshal.AsMemory(s.AsMemory()).Span);
Unsafe.CopyBlock(ref target[0], ref source[40 * sizeof(char)], 10 * sizeof(char));
Unsafe.CopyBlock(ref target[10], ref source[30 * sizeof(char)], 10 * sizeof(char));
Unsafe.CopyBlock(ref target[20], ref source[20 * sizeof(char)], 10 * sizeof(char));
Unsafe.CopyBlock(ref target[30], ref source[10 * sizeof(char)], 10 * sizeof(char));
Unsafe.CopyBlock(ref target[40], ref source[0], 10 * sizeof(char));
return s;
}
public unsafe string UnsafeOverwrite(string longstring)
{
var s = new string('[=11=]', longstring.Length);
fixed (char* source = longstring)
fixed (char* target = s)
{
Unsafe.CopyBlock(target, source + 40, 10 * sizeof(char));
Unsafe.CopyBlock(target + 10, source + 30, 10 * sizeof(char));
Unsafe.CopyBlock(target + 20, source + 20, 10 * sizeof(char));
Unsafe.CopyBlock(target + 30, source + 10, 10 * sizeof(char));
Unsafe.CopyBlock(target + 40, source, 10 * sizeof(char));
}
return s;
}
[Benchmark]
public void ConcatSpansBenchmark()
=> ConcatSpans("aaaaaaaaaabbbbbbbbbbccccccccccddddddddddeeeeeeeeee");
[Benchmark]
public void OverwiteStringBenchmark()
=> OverwiteString("aaaaaaaaaabbbbbbbbbbccccccccccddddddddddeeeeeeeeee");
[Benchmark]
public void UnsafeOverwriteBenchmark()
=> UnsafeOverwrite("aaaaaaaaaabbbbbbbbbbccccccccccddddddddddeeeeeeeeee");
[Benchmark]
public void OverwiteStringHybridBenchmark()
=> OverwiteStringHybrid("aaaaaaaaaabbbbbbbbbbccccccccccddddddddddeeeeeeeeee");
public static void Main(string[] args)
=> BenchmarkRunner.Run<Program>();
}
注意:在一般情况下 - 从切片中获取 unsafe
代码:
使用 C# 7.3:
fixed(char* p = theSpan)
{
...
}
否则:
fixed(char* p = &MemoryMarshal.GetReference(theSpan))
{
}