markdig icon indicating copy to clipboard operation
markdig copied to clipboard

Remove some branches in HtmlHelper.TryParseHtmlTagOpenTag by using bitmask

Open gfoidl opened this issue 1 year ago • 1 comments

Eliminate some branches in HtmlHelper.TryParseHtmlTagOpenTag (due to || comparisons) by using a bitmask approach instead. Further the generated machine code has a test jump-combo, which allows optimizations at cpu-level (macro fusion, etc.).

Other places in that type seem not worthwile, as the count of cmps can't be lowered that the current count is.

Benchmarks

The benchmarks are done on a trimmed-down version of the code, to just test these parts of the code changed.

IsSpaceOrSpecialHtmlChar

|  Method |     Mean |   Error |  StdDev | Ratio | RatioSD | Code Size |
|-------- |---------:|--------:|--------:|------:|--------:|----------:|
| Default | 181.4 ns | 3.48 ns | 3.09 ns |  1.00 |    0.00 |      60 B |
|  Switch | 209.6 ns | 3.65 ns | 3.91 ns |  1.16 |    0.02 |      60 B |
|    Mask | 118.4 ns | 2.40 ns | 3.59 ns |  0.65 |    0.03 |      57 B |
benchmark code
using System.Diagnostics;
using System.Runtime.CompilerServices;
using BenchmarkDotNet.Attributes;

Bench bench = new();
Console.WriteLine(bench.Default());
Console.WriteLine(bench.Switch());
Console.WriteLine(bench.Mask());

#if !DEBUG
BenchmarkDotNet.Running.BenchmarkRunner.Run<Bench>();
#endif

[DisassemblyDiagnoser]
//[ShortRunJob]
public class Bench
{
    private static readonly char[] s_chars = { ' ', '\n', '"', '\'', '=', '<', '>', '`' };

    public Bench()
    {
        foreach (char c in s_chars)
        {
            Console.WriteLine($"{(int)c}");
        }

        Console.WriteLine();
    }

    [Benchmark(Baseline = true)]
    public int Default()
    {
        int count = 0;

        for (int cc = char.MinValue; cc <= char.MaxValue; ++cc)
        {
            char c = (char)cc;
            if (IsMatch0(c))
            {
                Debug.Assert(s_chars.Contains(c), $"{c} ({(int)c}) shouldn't be true");
                count++;
            }
        }

        Debug.Assert(count == s_chars.Length, $"wrong count, expected {s_chars.Length}, actual: {count}");
        return count;
    }

    //[Benchmark]
    public int Switch()
    {
        int count = 0;

        for (int cc = char.MinValue; cc <= char.MaxValue; ++cc)
        {
            char c = (char)cc;
            if (IsMatch00(c))
            {
                Debug.Assert(s_chars.Contains(c), $"{c} ({(int)c}) shouldn't be true");
                count++;
            }
        }

        Debug.Assert(count == s_chars.Length, $"wrong count, expected {s_chars.Length}, actual: {count}");
        return count;
    }

    [Benchmark]
    public int Mask()
    {
        int count = 0;

        for (int cc = char.MinValue; cc <= char.MaxValue; ++cc)
        {
            char c = (char)cc;
            if (IsMatch1(c))
            {
                Debug.Assert(s_chars.Contains(c), $"{c} ({(int)c}) shouldn't be true");
                count++;
            }
        }

        Debug.Assert(count == s_chars.Length, $"wrong count, expected {s_chars.Length}, actual: {count}");
        return count;
    }

    [MethodImpl(MethodImplOptions.AggressiveInlining)]
    private static bool IsMatch0(char c)
    {
        return c == ' ' || c == '\n' || c == '"' || c == '\'' || c == '=' || c == '<' || c == '>' || c == '`';
    }

    [MethodImpl(MethodImplOptions.AggressiveInlining)]
    private static bool IsMatch00(char c) => c switch
    {
        ' ' or '\n' or '"' or '\'' or '=' or '<' or '>' or '`' => true,
        _ => false,
    };

    [MethodImpl(MethodImplOptions.AggressiveInlining)]
    private static bool IsMatch1(char c)
    {
        if (c > '>')
        {
            return c == '`';
        }

        const long BitMask =
              (1L << ' ')
            | (1L << '\n')
            | (1L << '"')
            | (1L << '\'')
            | (1L << '=')
            | (1L << '<')
            | (1L << '>');

        return (BitMask & (1L << c)) != 0;
    }
}

IsCharToAppend

|  Method |     Mean |   Error |  StdDev | Ratio | RatioSD | Code Size |
|-------- |---------:|--------:|--------:|------:|--------:|----------:|
| Default | 131.0 ns | 2.22 ns | 2.07 ns |  1.00 |    0.00 |      40 B |
|    Mask | 121.6 ns | 1.65 ns | 1.37 ns |  0.93 |    0.02 |      53 B |
benchmark code
using System.Diagnostics;
using System.Runtime.CompilerServices;
using BenchmarkDotNet.Attributes;

Bench bench = new();
Console.WriteLine(bench.Default());
Console.WriteLine(bench.Mask());

#if !DEBUG
BenchmarkDotNet.Running.BenchmarkRunner.Run<Bench>();
#endif

[DisassemblyDiagnoser]
//[ShortRunJob]
public class Bench
{
    private static readonly char[] s_chars = { '_', ':', '.', '-' };

    public Bench()
    {
        foreach (char c in s_chars)
        {
            Console.WriteLine($"{(int)c}");
        }

        Console.WriteLine();
    }

    [Benchmark(Baseline = true)]
    public int Default()
    {
        int count = 0;

        for (int cc = char.MinValue; cc < char.MaxValue; ++cc)
        {
            char c = (char)cc;
            if (IsMatch0(c))
            {
                Debug.Assert(s_chars.Contains(c), $"{c} ({(int)c}) shouldn't be true");
                count++;
            }
        }

        Debug.Assert(count == s_chars.Length, $"wrong count, expected {s_chars.Length}, actual: {count}");
        return count;
    }

    [Benchmark]
    public int Mask()
    {
        int count = 0;

        for (int cc = char.MinValue; cc < char.MaxValue; ++cc)
        {
            char c = (char)cc;
            if (IsMatch1(c))
            {
                Debug.Assert(s_chars.Contains(c), $"{c} ({(int)c}) shouldn't be true");
                count++;
            }
        }

        Debug.Assert(count == s_chars.Length, $"wrong count, expected {s_chars.Length}, actual: {count}");
        return count;
    }

    [MethodImpl(MethodImplOptions.AggressiveInlining)]
    private static bool IsMatch0(char c)
    {
        return c == '_' || c == ':' || c == '.' || c == '-';
    }

    [MethodImpl(MethodImplOptions.AggressiveInlining)]
    private static bool IsMatch1(char c)
    {
        if ((uint)(c - '-') > '_' - '-')
        {
            return false;
        }

        const long BitMask =
              (1L << '_')
            | (1L << ':')
            | (1L << '.')
            | (1L << '-');

        return (BitMask & (1L << c)) != 0;
    }
}

gfoidl avatar Jul 21 '22 10:07 gfoidl

PR failing because of coverage upload, will need to investigate this, sorry for the trouble.

xoofx avatar Aug 04 '22 04:08 xoofx

Pull Request Test Coverage Report for Build 2711036240

Warning: This coverage report may be inaccurate.

This pull request's base commit is no longer the HEAD commit of its target branch. This means it includes changes from outside the original pull request, including, potentially, unrelated coverage changes.

Details

  • 14 of 14 (100.0%) changed or added relevant lines in 1 file are covered.
  • No unchanged relevant lines lost coverage.
  • Overall coverage decreased (-0.004%) to 93.203%

Totals Coverage Status
Change from base Build 2500087181: -0.004%
Covered Lines: 25839
Relevant Lines: 27094

💛 - Coveralls

coveralls avatar Aug 11 '22 19:08 coveralls

Nice optimizations, thanks!

xoofx avatar Aug 12 '22 05:08 xoofx