Microbenchmarks
Microbenchmarks copied to clipboard
Add Heaps algorithm which crawls over all permutations
The Heap's algorithm which crawls over all permutations seem a good testcase, cause You can not be "smart" and use language specific optimisations, it's just pure operations on integer arrays (indexing, changing content of);
function allperms(N)
elts = collect(1:N)
c = ones(Int, N)
countfirst = 0
n = 1
k = 0
# doit(elts)
countfirst += elts[1]
@inbounds while n <= N
if c[n] < n
k = ifelse(isodd(n), 1, c[n])
elts[k], elts[n] = elts[n], elts[k]
c[n] += 1
n = 1
countfirst += elts[1]
# doit(elts)
else
c[n] = 1
n += 1
end
end
return countfirst
end
a similar implementation in c:
void swap(int *p, int a, int b){
int tmp;
tmp = p[a];
p[a] = p[b];
p[b] = tmp;
}
long heaps(int N){
long count = 0;
int n;
int *p = malloc(sizeof(int)*N);
int *c = calloc(N, sizeof(int));
for (n=0; n < N; n++){
p[n] = n+1;
// c[n] = 0;
}
count += p[0];
for (n=0; n < N;){
if (c[n] < n){
swap(p, (n%2 ? c[n] : 0), n);
count += p[0];
c[n]++;
n = 0;
} else {
c[n] = 0;
n++;
}
}
free(p);
free(c);
return count;
}
EDIT: @inbounds macro makes a fairer comparison to C (and speeds julias version!)
That does seem like a good one!
Here's my Rust attempt. This sure feels hard to optimize.
#include <stdlib.h>
#include <stdio.h>
void swap(size_t *p, size_t a, size_t b) {
size_t tmp = p[a];
p[a] = p[b];
p[b] = tmp;
}
size_t heaps(size_t n) {
size_t *a = malloc(n * sizeof *a);
if (!a) {
exit(EXIT_FAILURE);
}
for (size_t i = 0; i < n; i++) {
a[i] = i; // i + 1 was strange
}
size_t *c = calloc(n, sizeof *c);
if (!c) {
exit(EXIT_FAILURE);
}
size_t count = a[0];
size_t i = 0;
while (i < n) {
if (c[i] < i) {
swap(a, i % 2 ? c[i] : 0, i);
count += a[0];
c[i]++;
i = 0;
} else {
c[i] = 0;
i++;
}
}
free(a);
free(c);
return count;
}
I think there are some issues with these codes
| heaps(n), n=1:4 | code |
|---|---|
| 1, 3, 12, 60 | kalmarek Julia |
| 0, 3, 12, 60 | karmarek C |
| 0, 1, 6, 36 | Stargateur C |
| 1, 2, 6, 24 | n! |
@johnfgibson Sorry, I should make it more clear that I change the initialisation in favor of the rust:
p[n] = n+1; // n + 1 could overflow n is as good as n + 1 and can't overflow
let mut perm: Vec<_> = (0..n).collect(); // rust use n
a[i] = i; // so I just used n too
@johnfgibson, @Stargateur: for comparison with n-factorial: we don't simply count the permutations, but sum the first entry of every permutation (otherwise future compilers may skip the swap part entirely);
p[n] = n+1 was because permutations are usually counted from one, not zero. This does matter for the returned value.
Could You also recheck n=1? my C version returns squarely 1 for N=1 ;-)
@kalmarek aha! thank you. (regarding returning the sum of the first entry).
I ran what's currently posted above and got heaps(1:5) == 1, 3, 12, 60, 360. There were some differences from an earlier version (count += p[0] versus count += p[1]) that probably account for the difference.
@Stargateur I don't follow from your comment what changes should be made.
I can add working codes to the microbenchmark suite but I don't think I can debug and fix codes with errors.
@kalmarek @johnfgibson I'm not a mathematician, if you conclude that function should start init with 1, I'm ok with it. But I didn't think that the result was so important, just as you said it's for avoid compiler to optimize it. I'm just a C dev who read this thread.
This is better ?
#include <stdlib.h>
#include <stdio.h>
#include <inttypes.h>
#include <assert.h>
static void swap(uintmax_t *p, size_t a, size_t b) {
uintmax_t tmp = p[a];
p[a] = p[b];
p[b] = tmp;
}
// UB for n == 0
static uintmax_t heaps(size_t n) {
uintmax_t *a = malloc(n * sizeof *a);
if (!a) {
exit(EXIT_FAILURE);
}
uintmax_t acu = 1;
for (size_t i = 0; i < n; i++) {
a[i] = acu++;
}
size_t *c = calloc(n, sizeof *c);
if (!c) {
exit(EXIT_FAILURE);
}
assert(n != 0); // remove when NDEBUG is defined
uintmax_t count = a[0];
size_t i = 0;
while (i < n) {
if (c[i] < i) {
swap(a, i % 2 ? c[i] : 0, i);
count += a[0];
c[i]++;
i = 0;
} else {
c[i] = 0;
i++;
}
}
free(a);
free(c);
return count;
}
int main(void) {
for (size_t i = 1; i < 5; i++) {
printf("heaps(%zu) = %" PRIuMAX "\n", i, heaps(i));
}
}
@Stargateur, sure the exact value does not matter, it was just to explain the differences in the results. If I am not mistaken, You could start from a[0] = 0, but then You need to sum a[1]. Anyway, the important thing is that we swap during each iteration.
BTW How do I call Your code from julia? if I use:
C_code = """
[Your C code here]
"""
const Clib = tempname()
open(`gcc -fPIC -O3 -xc -shared -o $(Clib * "." * Libdl.dlext) -`, "w") do f
print(f, C_code)
end
heaps_c(n::Int) = ccall(("heaps", Clib), Int, (Int,), n)
I get ccall: could not find function heaps in library /tmp/juliaACJ6nY, but this works for my, primitive ;-) C code
Remove static from static uintmax_t heaps(size_t n), it's make the symbol not global.
C_code = """
#include <stdlib.h>
#include <stdio.h>
#include <inttypes.h>
#include <assert.h>
static void swap(uintmax_t *p, size_t a, size_t b) {
uintmax_t tmp = p[a];
p[a] = p[b];
p[b] = tmp;
}
// UB for n == 0
uintmax_t heaps(size_t n) {
uintmax_t *a = malloc(n * sizeof *a);
if (!a) {
exit(EXIT_FAILURE);
}
uintmax_t acu = 1;
for (size_t i = 0; i < n; i++) {
a[i] = acu++;
}
size_t *c = calloc(n, sizeof *c);
if (!c) {
exit(EXIT_FAILURE);
}
assert(n != 0); // remove when NDEBUG is defined
uintmax_t count = a[0];
size_t i = 0;
while (i < n) {
if (c[i] < i) {
swap(a, i % 2 ? c[i] : 0, i);
count += a[0];
c[i]++;
i = 0;
} else {
c[i] = 0;
i++;
}
}
free(a);
free(c);
return count;
}
"""
const Clib = tempname()
open(`gcc -fPIC -O3 -xc -shared -o $(Clib * "." * Libdl.dlext) -`, "w") do f
print(f, C_code)
end
ccall(("heaps", Clib), Cuintmax_t , (Csize_t,), n)
I updated the Rust code above as well, thanks to your feedback. It was initializing p from 0 rather than 1.
a naive python version
def heaps(n):
elts = [i+1 for i in range(n)]
c = [0 for i in range(n)]
n = 0
countfirst = elts[0]
while n < len(elts):
if c[n] < n:
if n % 2:
k = c[n]
else:
k = 0
elts[k], elts[n] = elts[n], elts[k]
# print(elts)
countfirst += elts[0]
c[n] += 1
n = 0
else:
c[n] = 0
n += 1
return countfirst
To be honest, I don't know any other language microbenchmark compares julia against, so someone else has to come-up with the code.
defining heaps_c(n) = Int(ccall(("heaps", Clib), Cuintmax_t , (Csize_t,), n)) I get:
On julia-0.6.4:
julia> @btime allperms(12)
2.862 s (3 allocations: 384 bytes)
3113510400
julia> @btime heaps_c(12)
1.470 s (0 allocations: 0 bytes)
3113510400
On julia-0.7.0:
julia> @btime allperms(12)
1.397 s (2 allocations: 352 bytes)
3113510400
julia> @btime heaps_c(12)
1.466 s (0 allocations: 0 bytes)
3113510400
I'm impressed!