aspnetcore
aspnetcore copied to clipboard
Potential optimizations for BufferedReadStream.ReadLine
Is there an existing issue for this?
- [X] I have searched the existing issues
Is your feature request related to a problem? Please describe the problem.
-
BufferedReadStream.ReadLine
's main loop essentially walks over every single byte in the array and checks whether it'sCR
(orLF
). This can be rewritten withSpan<byte>.IndexOf
to first findCR
and if that succeeds, check whether the next byte isLF
.
https://github.com/dotnet/aspnetcore/blob/678a06f95aced0d4c6f837e53cfc6ffdbb7362b9/src/Http/WebUtilities/src/BufferedReadStream.cs#L356-L363
https://github.com/dotnet/aspnetcore/blob/678a06f95aced0d4c6f837e53cfc6ffdbb7362b9/src/Http/WebUtilities/src/BufferedReadStream.cs#L398-L410
- While decoding byte array to string
MemoryStream.ToArray
is called. The issue with it is that it's going to copy the internal array. Instead, this can be changed toMemoryStream.GetBuffer
, which returns the internal buffer.
https://github.com/dotnet/aspnetcore/blob/678a06f95aced0d4c6f837e53cfc6ffdbb7362b9/src/Http/WebUtilities/src/BufferedReadStream.cs#L416
Describe the solution you'd like
Reduce allocations from BufferedReadStream.DecodeLine
(p. 2). This should be safe as MemoryStream
is created by the BufferedReadStream
itself and only used locally.
Optimizing BufferedReadStream.ReadLine
main loop (p. 1) would also be nice, though not that simple (for example, there is a possible case when the last byte in the buffer is CR
, but checking whether the next byte is LF
is impossible without reading from a stream).
Additional context
No response
Triage: probably worth unifying parts of this with the similar CR/LF line terminator handling we have in https://github.com/dotnet/aspnetcore/blob/main/src/Servers/Kestrel/Core/src/Internal/Http/HttpParser.cs#L183 (where appropriate)
@vonzshik It would be interesting to see the performance differences measured with the changes you proposed.
We've moved this issue to the Backlog milestone. This means that it is not going to be worked on for the coming release. We will reassess the backlog following the current release and consider this item at that time. To learn more about our issue management process and to have better expectation regarding different types of issues you can read our Triage Process.
@adityamandaleeka here's a quick implementation I've made (it probably can be made faster, but just as an example):
BenchmarkDotNet=v0.13.2, OS=Windows 10 (10.0.19044.2130/21H2/November2021Update)
AMD Ryzen 9 5900X, 1 CPU, 24 logical and 12 physical cores
.NET SDK=7.0.100-rc.1.22431.12
[Host] : .NET 6.0.9 (6.0.922.41905), X64 RyuJIT AVX2
DefaultJob : .NET 6.0.9 (6.0.922.41905), X64 RyuJIT AVX2
Method | LineLength | Mean | Error | StdDev | Ratio | Gen0 | Gen1 | Allocated | Alloc Ratio |
---|---|---|---|---|---|---|---|---|---|
ReadLine | 10 | 60.13 ns | 0.351 ns | 0.328 ns | 1.00 | 0.0219 | - | 368 B | 1.00 |
ReadLineOptimized | 10 | 40.17 ns | 0.377 ns | 0.353 ns | 0.67 | 0.0196 | - | 328 B | 0.89 |
ReadLine | 40 | 136.07 ns | 0.364 ns | 0.341 ns | 1.00 | 0.0272 | - | 456 B | 1.00 |
ReadLineOptimized | 40 | 50.75 ns | 0.825 ns | 0.772 ns | 0.37 | 0.0234 | - | 392 B | 0.86 |
ReadLine | 100 | 278.04 ns | 5.059 ns | 4.485 ns | 1.00 | 0.0381 | - | 640 B | 1.00 |
ReadLineOptimized | 100 | 58.28 ns | 0.633 ns | 0.528 ns | 0.21 | 0.0305 | - | 512 B | 0.80 |
ReadLine | 1000 | 2,458.89 ns | 43.613 ns | 40.796 ns | 1.00 | 0.3700 | 0.0038 | 6208 B | 1.00 |
ReadLineOptimized | 1000 | 282.63 ns | 4.607 ns | 4.310 ns | 0.11 | 0.3200 | 0.0057 | 5360 B | 0.86 |
ReadLine | 4090 | 9,586.84 ns | 37.221 ns | 34.817 ns | 1.00 | 1.4954 | 0.0458 | 25128 B | 1.00 |
ReadLineOptimized | 4090 | 974.64 ns | 10.299 ns | 9.634 ns | 0.10 | 1.2436 | 0.0725 | 20816 B | 0.83 |
ReadLine | 8196 | 19,161.06 ns | 250.499 ns | 222.061 ns | 1.00 | 2.9907 | 0.2747 | 50272 B | 1.00 |
ReadLineOptimized | 8196 | 1,976.37 ns | 21.322 ns | 19.945 ns | 0.10 | 2.7122 | 0.3014 | 45448 B | 0.90 |
Benchmark
[MemoryDiagnoser]
public class BufferedReadStreamBenchmark
{
[Params(10, 40, 100, 1000, 4090, 8196)]
public int LineLength { get; set; }
private BufferedReadStream brs;
[GlobalSetup]
public void Init()
{
var ms = new MemoryStream();
var rnd = new Random(1);
for (var i = 0; i < LineLength - 2; i++)
{
const string chars = "ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789";
var nextChar = chars[rnd.Next(chars.Length)];
ms.WriteByte((byte)nextChar);
}
ms.WriteByte((byte)'\r');
ms.WriteByte((byte)'\n');
brs = new BufferedReadStream(ms, 4096);
}
[Benchmark(Baseline = true)]
public string ReadLine()
{
brs.Position = 0;
return brs.ReadLine(64000);
}
[Benchmark]
public string ReadLineOptimized()
{
brs.Position = 0;
return brs.ReadLineOptimized(64000);
}
}
BufferedReadStream changes
public string ReadLineOptimized(int lengthLimit)
{
CheckDisposed();
using (var builder = new MemoryStream(200))
{
bool foundCR = false;
while (EnsureBuffered())
{
if (foundCR)
{
if (_buffer[_bufferOffset] == LF)
{
builder.WriteByte(LF);
_bufferOffset++;
_bufferCount--;
return DecodeLineOptimized(builder, true);
}
foundCR = false;
}
var buffer = _buffer.AsSpan(_bufferOffset, _bufferCount);
var crIndex = buffer.IndexOf(CR);
if (crIndex == -1)
{
builder.Write(buffer);
_bufferOffset += _bufferCount;
_bufferCount = 0;
continue;
}
builder.Write(buffer.Slice(0, crIndex + 1));
_bufferOffset += crIndex + 1;
_bufferCount -= crIndex + 1;
if (builder.Length > lengthLimit)
{
throw new InvalidDataException($"Line length limit {lengthLimit} exceeded.");
}
foundCR = true;
}
return DecodeLineOptimized(builder, false);
}
}
private static string DecodeLineOptimized(MemoryStream builder, bool foundCRLF)
{
// Drop the final CRLF, if any
var length = foundCRLF ? builder.Length - 2 : builder.Length;
return Encoding.UTF8.GetString(builder.GetBuffer(), 0, (int)length);
}