BitFaster.Caching icon indicating copy to clipboard operation
BitFaster.Caching copied to clipboard

Optimize BitOps

Open Joy-less opened this issue 9 months ago • 1 comments
trafficstars

Since this library is about micro-optimizations, I decided to dive into BitOps.

Firstly, one of the methods (CeilingPowerOfTwo) now has a corresponding BitOperations method (RoundUpToPowerOf2) which internally does the same thing as the NETSTANDARD version with optimizations depending on hardware.

Secondly, I replaced the while loop in another method with some code copy-pasted from the internal implementation of BitOperations.PopCount. I assume the C# team knows best here.

Here are some benchmarks. The BitCount optimizations are much more performant in NETSTANDARD, whereas the CeilingPowerOfTwo optimizations are the same performance but simpler source code-wise.

Method Mean Error StdDev
CeilingPowerOfTwo_uint_NET_Old 25.39 us 0.062 us 0.055 us
CeilingPowerOfTwo_uint_NET_New 25.30 us 0.065 us 0.061 us
CeilingPowerOfTwo_ulong_NET_Old 25.30 us 0.041 us 0.036 us
CeilingPowerOfTwo_ulong_NET_New 25.32 us 0.042 us 0.039 us
BitCount_NETSTANDARD_uint_Old 152.31 us 0.235 us 0.184 us
BitCount_NETSTANDARD_uint_New 25.32 us 0.068 us 0.064 us
BitCount_NETSTANDARD_ulong_Old 152.46 us 0.461 us 0.431 us
BitCount_NETSTANDARD_ulong_New 25.31 us 0.067 us 0.063 us
Benchmark code
using System.Numerics;
using BenchmarkDotNet.Attributes;
using BenchmarkDotNet.Running;

namespace ConsoleApp1;

internal class Program {
    static void Main() {
        BenchmarkSwitcher.FromAssembly(typeof(Program).Assembly).Run();
    }
}

public class Benchmarks {
    private const int Iterations = 100_000;

    [Benchmark]
    public void CeilingPowerOfTwo_uint_NET_Old() {
        for (int Counter = 0; Counter < Iterations; Counter++) {
            BitOperationMethods.CeilingPowerOfTwo_uint_NET_Old(100);
        }
    }
    [Benchmark]
    public void CeilingPowerOfTwo_uint_NET_New() {
        for (int Counter = 0; Counter < Iterations; Counter++) {
            BitOperationMethods.CeilingPowerOfTwo_uint_NET_New(100);
        }
    }
    [Benchmark]
    public void CeilingPowerOfTwo_ulong_NET_Old() {
        for (int Counter = 0; Counter < Iterations; Counter++) {
            BitOperationMethods.CeilingPowerOfTwo_ulong_NET_Old(100);
        }
    }
    [Benchmark]
    public void CeilingPowerOfTwo_ulong_NET_New() {
        for (int Counter = 0; Counter < Iterations; Counter++) {
            BitOperationMethods.CeilingPowerOfTwo_ulong_NET_New(100);
        }
    }
    [Benchmark]
    public void BitCount_NETSTANDARD_uint_Old() {
        for (int Counter = 0; Counter < Iterations; Counter++) {
            BitOperationMethods.BitCount_NETSTANDARD_uint_Old(100);
        }
    }
    [Benchmark]
    public void BitCount_NETSTANDARD_uint_New() {
        for (int Counter = 0; Counter < Iterations; Counter++) {
            BitOperationMethods.BitCount_NETSTANDARD_uint_New(100);
        }
    }
    [Benchmark]
    public void BitCount_NETSTANDARD_ulong_Old() {
        for (int Counter = 0; Counter < Iterations; Counter++) {
            BitOperationMethods.BitCount_NETSTANDARD_ulong_Old(100);
        }
    }
    [Benchmark]
    public void BitCount_NETSTANDARD_ulong_New() {
        for (int Counter = 0; Counter < Iterations; Counter++) {
            BitOperationMethods.BitCount_NETSTANDARD_ulong_New(100);
        }
    }
}

public static class BitOperationMethods {
    public static uint CeilingPowerOfTwo_uint_NET_Old(uint x) {
        return 1u << -BitOperations.LeadingZeroCount(x - 1);
    }
    public static uint CeilingPowerOfTwo_uint_NET_New(uint x) {
        return BitOperations.RoundUpToPowerOf2(x);
    }
    public static ulong CeilingPowerOfTwo_ulong_NET_Old(ulong x) {
        return 1ul << -BitOperations.LeadingZeroCount(x - 1);
    }
    public static ulong CeilingPowerOfTwo_ulong_NET_New(ulong x) {
        return BitOperations.RoundUpToPowerOf2(x);
    }
    public static int BitCount_NETSTANDARD_uint_Old(uint x) {
        var count = 0;
        while (x != 0)
        {
            count++;
            x &= x - 1; //walking through all the bits which are set to one
        }

        return count;
    }
    public static int BitCount_NETSTANDARD_uint_New(uint x) {
        const uint c1 = 0x_55555555u;
        const uint c2 = 0x_33333333u;
        const uint c3 = 0x_0F0F0F0Fu;
        const uint c4 = 0x_01010101u;

        x -= (x >> 1) & c1;
        x = (x & c2) + ((x >> 2) & c2);
        x = (((x + (x >> 4)) & c3) * c4) >> 24;

        return (int)x;
    }
    public static int BitCount_NETSTANDARD_ulong_Old(ulong x) {
        var count = 0;
        while (x != 0) {
            count++;
            x &= x - 1; //walking through all the bits which are set to one
        }

        return count;
    }
    public static int BitCount_NETSTANDARD_ulong_New(ulong x) {
        const ulong c1 = 0x_55555555_55555555ul;
        const ulong c2 = 0x_33333333_33333333ul;
        const ulong c3 = 0x_0F0F0F0F_0F0F0F0Ful;
        const ulong c4 = 0x_01010101_01010101ul;

        x -= (x >> 1) & c1;
        x = (x & c2) + ((x >> 2) & c2);
        x = (((x + (x >> 4)) & c3) * c4) >> 56;

        return (int)x;
    }
}

Joy-less avatar Feb 05 '25 17:02 Joy-less