PlotUtils.jl icon indicating copy to clipboard operation
PlotUtils.jl copied to clipboard

speed up postdecimal digits

Open BeastyBlacksmith opened this issue 2 years ago • 12 comments

On master:

@btime PlotUtils.postdecimal_digits(1.245)
  16.186 μs (0 allocations: 0 bytes)
3

This PR:

@btime PlotUtils.postdecimal_digits(1.245)
  21.035 ns (0 allocations: 0 bytes)
3

Not sure yet, that there aren't edge cases, where the results are different

BeastyBlacksmith avatar Jun 24 '22 11:06 BeastyBlacksmith

Slower, but more accurate:

function postdecimal_digits(x::T) where {T}
    isinteger(x) && return 0
    str = string(x)
    i = findfirst('.', str)
    return lastindex(str) - i
end
@btime PlotUtils.postdecimal_digits(1.245)
  72.293 ns (2 allocations: 432 bytes)
3

BeastyBlacksmith avatar Jun 24 '22 14:06 BeastyBlacksmith

Not sure yet, that there aren't edge cases, where the results are different

Worth adding a few tests covering up to n digits ?

Looks good otherwise.

t-bltg avatar Jun 24 '22 15:06 t-bltg

You should not use equality:

PlotUtils.postdecimal_digits(1.245)
16

t-bltg avatar Jun 24 '22 15:06 t-bltg

You should not use equality:

PlotUtils.postdecimal_digits(1.245)
16

So we use the allocating version?

BeastyBlacksmith avatar Jun 24 '22 15:06 BeastyBlacksmith

I think reverting https://github.com/JuliaPlots/PlotUtils.jl/pull/140/commits/9fc6af252c3842deefa416af62186938a1a498f2 should be enough ?

Not the string version, it's awful (relying on string formatting when working on floats).

t-bltg avatar Jun 24 '22 15:06 t-bltg

I think reverting 9fc6af2 should be enough ?

Not the string version, it's awful.

It's so far the only one that passes the tests apart from the original version

BeastyBlacksmith avatar Jun 24 '22 15:06 BeastyBlacksmith

What about this, which passes all current tests:

function postdecimal_digits(x::T) where {T}
    isinteger(x) && return 0
    for i in 0:ceil(Int, log10(floatmax(T)))
        x == floor(x; digits = i) && return i
    end
    return 0
end

@btime PlotUtils.postdecimal_digits(1.245)
  35.118 ns (0 allocations: 0 bytes)
3
@btime PlotUtils.postdecimal_digits(floatmin(Float64))
  16.020 μs (0 allocations: 0 bytes)
309
@btime PlotUtils.postdecimal_digits(floatmin(Float32))
  1.389 μs (0 allocations: 0 bytes)
39

t-bltg avatar Jun 24 '22 16:06 t-bltg

I am beginning to like this implementation

function postdecimal_digits(x::T) where {T}
    counter = 0
    isinteger(x) && return 0
    y = x
    while !isinteger(y)
        counter += 1
        y = x * 10^counter
    end
    return counter
end

however, it clips at 17 digits except for cases like floatmin(Float64) where it clips at 64. I'd say that is fine for the purpose here, since you generally don't want to print that many digits in the first place.

BeastyBlacksmith avatar Jun 27 '22 10:06 BeastyBlacksmith

Here is some some testing:

using BenchmarkTools
using Distributions
using UnicodePlots
using StableRNGs
using Random

# old implementation
function postdecimal_digits(x::T) where {T}
  for i ∈ floor(Int, log10(floatmin(T))):ceil(Int, log10(floatmax(T)))
    x == floor(x; digits=i) && return max(0, i)
  end
  0
end

function postdecimal_digits_str(x)
    isinteger(x) && return 0
    isfinite(x) || return 0
    str = string(x)
    i = findfirst('.', str)
    lastindex(str) - i
end

function PostDecimalDigits(x)
    isinteger(x) && return 0
    isfinite(x) || return 0
    cnt = 0
    y = x
    while !isinteger(y) && cnt < 100
        cnt += 1
        y = x * 10^cnt
    end
    cnt
end

main() = begin
  rng = StableRNG(1337)
  Random.seed!(rng, 1337)

  # func = (args...) -> nothing
  # func = error
  func = println
  ndiff = 10

  s = 4
  for zctr in (true, false)
    for i in -8:s:18
      if zctr
        μ = 0.
        σ = 10.0^i
      else
        μ = 10.0^i
        σ = 10.0^s
      end
      d = Normal(μ, σ)

      errs = zeros(Int, ndiff)  # error distribution
      samples = 100_000
      nok1 = nok2 = ok = 0

      for (i, f) ∈ enumerate(rand(rng, d, samples))
        old, new = postdecimal_digits(f), PostDecimalDigits(f)
        if (Δ = abs(old - new)) == 0
          ok += 1
        else
          msg = "$i) Δ=$Δ o=$old n=$new s=$(postdecimal_digits_str(f)) f=$(repr(f))"
          # func(msg)
          if Δ > ndiff
            func(msg)
            nok2 += 1
          elseif Δ > 0
            errs[Δ] += 1
            nok1 += 1
          end
        end
      end

      println("$ok $nok1 $nok2")
      lineplot(errs[1:end], title="err μ=$μ σ=$σ | ✓: $(round(100ok / samples; digits = 2))% | n°=$samples") |> display
    end
  end

  f = rand(rng, Float64)
  @show f
  @btime postdecimal_digits($f)
  @btime PostDecimalDigits($f)
  return
end

main()

The errors look quite large ...

t-bltg avatar Jun 27 '22 12:06 t-bltg

So if we take this one

function postdecimal_digits(x::T) where {T}
    isinteger(x) && return 0
    for i in 0:ceil(Int, log10(floatmax(T)))
        x == floor(x; digits = i) && return i
    end
    return 0
end

we get postdecimal_digits(2.5e-22) == 23 on windows and 25 on linux and macos. The windows value makes more sense to me, I wonder why its different for the other OSs.

BeastyBlacksmith avatar Jul 01 '22 10:07 BeastyBlacksmith

windows being right is an oxymoron to me :laughing:

I guess anything smaller than machine epsilon will give you different results when working with different processors and math libraries (I don't want to go over https://docs.oracle.com/cd/E19957-01/806-3568/ncg_goldberg.html again ...).

Maybe use again ?

t-bltg avatar Jul 01 '22 16:07 t-bltg

we get postdecimal_digits(2.5e-22) == 23 on windows and 25 on linux and macos.

I don't get this on linux:


julia> versioninfo()
Julia Version 1.7.3
Commit 742b9abb4d (2022-05-06 12:58 UTC)
Platform Info:
  OS: Linux (x86_64-pc-linux-gnu)
  CPU: Intel(R) Core(TM) i7-8565U CPU @ 1.80GHz
  WORD_SIZE: 64
  LIBM: libopenlibm
  LLVM: libLLVM-12.0.1 (ORCJIT, skylake)
Environment:
  JULIA_NUM_THREADS = 8

julia> function postdecimal_digits(x::T) where {T}
           isinteger(x) && return 0
           for i in 0:ceil(Int, log10(floatmax(T)))
               x == floor(x; digits=i) && return i
           end
           return 0
       end
postdecimal_digits (generic function with 1 method)

julia> postdecimal_digits(2.5e-22)
23

daschw avatar Jul 05 '22 08:07 daschw