BitFaster.Caching
BitFaster.Caching copied to clipboard
Optimize BitOps
trafficstars
Since this library is about micro-optimizations, I decided to dive into BitOps.
Firstly, one of the methods (CeilingPowerOfTwo) now has a corresponding BitOperations method (RoundUpToPowerOf2) which internally does the same thing as the NETSTANDARD version with optimizations depending on hardware.
Secondly, I replaced the while loop in another method with some code copy-pasted from the internal implementation of BitOperations.PopCount. I assume the C# team knows best here.
Here are some benchmarks. The BitCount optimizations are much more performant in NETSTANDARD, whereas the CeilingPowerOfTwo optimizations are the same performance but simpler source code-wise.
| Method | Mean | Error | StdDev |
|---|---|---|---|
| CeilingPowerOfTwo_uint_NET_Old | 25.39 us | 0.062 us | 0.055 us |
| CeilingPowerOfTwo_uint_NET_New | 25.30 us | 0.065 us | 0.061 us |
| CeilingPowerOfTwo_ulong_NET_Old | 25.30 us | 0.041 us | 0.036 us |
| CeilingPowerOfTwo_ulong_NET_New | 25.32 us | 0.042 us | 0.039 us |
| BitCount_NETSTANDARD_uint_Old | 152.31 us | 0.235 us | 0.184 us |
| BitCount_NETSTANDARD_uint_New | 25.32 us | 0.068 us | 0.064 us |
| BitCount_NETSTANDARD_ulong_Old | 152.46 us | 0.461 us | 0.431 us |
| BitCount_NETSTANDARD_ulong_New | 25.31 us | 0.067 us | 0.063 us |
Benchmark code
using System.Numerics;
using BenchmarkDotNet.Attributes;
using BenchmarkDotNet.Running;
namespace ConsoleApp1;
internal class Program {
static void Main() {
BenchmarkSwitcher.FromAssembly(typeof(Program).Assembly).Run();
}
}
public class Benchmarks {
private const int Iterations = 100_000;
[Benchmark]
public void CeilingPowerOfTwo_uint_NET_Old() {
for (int Counter = 0; Counter < Iterations; Counter++) {
BitOperationMethods.CeilingPowerOfTwo_uint_NET_Old(100);
}
}
[Benchmark]
public void CeilingPowerOfTwo_uint_NET_New() {
for (int Counter = 0; Counter < Iterations; Counter++) {
BitOperationMethods.CeilingPowerOfTwo_uint_NET_New(100);
}
}
[Benchmark]
public void CeilingPowerOfTwo_ulong_NET_Old() {
for (int Counter = 0; Counter < Iterations; Counter++) {
BitOperationMethods.CeilingPowerOfTwo_ulong_NET_Old(100);
}
}
[Benchmark]
public void CeilingPowerOfTwo_ulong_NET_New() {
for (int Counter = 0; Counter < Iterations; Counter++) {
BitOperationMethods.CeilingPowerOfTwo_ulong_NET_New(100);
}
}
[Benchmark]
public void BitCount_NETSTANDARD_uint_Old() {
for (int Counter = 0; Counter < Iterations; Counter++) {
BitOperationMethods.BitCount_NETSTANDARD_uint_Old(100);
}
}
[Benchmark]
public void BitCount_NETSTANDARD_uint_New() {
for (int Counter = 0; Counter < Iterations; Counter++) {
BitOperationMethods.BitCount_NETSTANDARD_uint_New(100);
}
}
[Benchmark]
public void BitCount_NETSTANDARD_ulong_Old() {
for (int Counter = 0; Counter < Iterations; Counter++) {
BitOperationMethods.BitCount_NETSTANDARD_ulong_Old(100);
}
}
[Benchmark]
public void BitCount_NETSTANDARD_ulong_New() {
for (int Counter = 0; Counter < Iterations; Counter++) {
BitOperationMethods.BitCount_NETSTANDARD_ulong_New(100);
}
}
}
public static class BitOperationMethods {
public static uint CeilingPowerOfTwo_uint_NET_Old(uint x) {
return 1u << -BitOperations.LeadingZeroCount(x - 1);
}
public static uint CeilingPowerOfTwo_uint_NET_New(uint x) {
return BitOperations.RoundUpToPowerOf2(x);
}
public static ulong CeilingPowerOfTwo_ulong_NET_Old(ulong x) {
return 1ul << -BitOperations.LeadingZeroCount(x - 1);
}
public static ulong CeilingPowerOfTwo_ulong_NET_New(ulong x) {
return BitOperations.RoundUpToPowerOf2(x);
}
public static int BitCount_NETSTANDARD_uint_Old(uint x) {
var count = 0;
while (x != 0)
{
count++;
x &= x - 1; //walking through all the bits which are set to one
}
return count;
}
public static int BitCount_NETSTANDARD_uint_New(uint x) {
const uint c1 = 0x_55555555u;
const uint c2 = 0x_33333333u;
const uint c3 = 0x_0F0F0F0Fu;
const uint c4 = 0x_01010101u;
x -= (x >> 1) & c1;
x = (x & c2) + ((x >> 2) & c2);
x = (((x + (x >> 4)) & c3) * c4) >> 24;
return (int)x;
}
public static int BitCount_NETSTANDARD_ulong_Old(ulong x) {
var count = 0;
while (x != 0) {
count++;
x &= x - 1; //walking through all the bits which are set to one
}
return count;
}
public static int BitCount_NETSTANDARD_ulong_New(ulong x) {
const ulong c1 = 0x_55555555_55555555ul;
const ulong c2 = 0x_33333333_33333333ul;
const ulong c3 = 0x_0F0F0F0F_0F0F0F0Ful;
const ulong c4 = 0x_01010101_01010101ul;
x -= (x >> 1) & c1;
x = (x & c2) + ((x >> 2) & c2);
x = (((x + (x >> 4)) & c3) * c4) >> 56;
return (int)x;
}
}