pelemay icon indicating copy to clipboard operation
pelemay copied to clipboard

Create String concatenation <> SIMD function

Open zacky1972 opened this issue 4 years ago • 1 comments

Create the following functions:

#include <erl_nif.h>

int string_concat_buffer(ErlNifBinary left, ErlNifBinary right, ErlNifBinary *object);
ERL_NIF_TERM string_concat(ErlNifEnv *env, ERL_NIF_TERM left, ERL_NIF_TERM right);

And also compare and evaluate execution time.

branch is string_concat

zacky1972 avatar Feb 07 '20 23:02 zacky1972

I implemented it but it is much slower than Kernel.<>...

static
ERL_NIF_TERM concat_1(ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[])
{
	if(__builtin_expect(argc != 2, false)) {
		return enif_make_badarg(env);
	}
	ERL_NIF_TERM left = argv[0];
	ErlNifBinary left_binary;
	if(__builtin_expect(!enif_inspect_binary(env, left, &left_binary), false)) {
		return enif_make_badarg(env);
	}
	ERL_NIF_TERM right = argv[1];
	ErlNifBinary right_binary;
	if(__builtin_expect(!enif_inspect_binary(env, right, &right_binary), false)) {
		return enif_make_badarg(env);
	}
	ErlNifBinary object_binary;
	if(__builtin_expect(!enif_alloc_binary(left_binary.size + right_binary.size, &object_binary), false)) {
		return enif_make_badarg(env);
	}
	memcpy(object_binary.data, left_binary.data, left_binary.size);
	memcpy(object_binary.data + left_binary.size, right_binary.data, right_binary.size);
	return enif_make_binary(env, &object_binary);
}

static
ERL_NIF_TERM concat_2(ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[])
{
	if(__builtin_expect(argc != 2, false)) {
		return enif_make_badarg(env);
	}
	ERL_NIF_TERM left = argv[0];
	ErlNifBinary left_binary;
	if(__builtin_expect(!enif_inspect_binary(env, left, &left_binary), false)) {
		return enif_make_badarg(env);
	}
	ERL_NIF_TERM right = argv[1];
	ErlNifBinary right_binary;
	if(__builtin_expect(!enif_inspect_binary(env, right, &right_binary), false)) {
		return enif_make_badarg(env);
	}
	ErlNifBinary object_binary;
	if(__builtin_expect(!enif_alloc_binary(left_binary.size + right_binary.size, &object_binary), false)) {
		return enif_make_badarg(env);
	}
	unsigned char *ptr = object_binary.data;
#pragma clang loop vectorize_width(loop_vectorize_width)
	for(unsigned i = 0; i < left_binary.size; i++) {
		*ptr++ = left_binary.data[i];
	}
	for(unsigned i = 0; i < right_binary.size; i++) {
		*ptr++ = right_binary.data[i];
	}
	return enif_make_binary(env, &object_binary);
}
## StringConcatBench
benchmark  iterations   average time 
Kernel.<>  1000000000   0.01 µs/op
concat_1     10000000   0.24 µs/op
concat_2     10000000   0.31 µs/op

zacky1972 avatar Feb 08 '20 00:02 zacky1972