rebar3_bench icon indicating copy to clipboard operation
rebar3_bench copied to clipboard

Try to optimize runner's tight inner loop

Open seriyps opened this issue 6 years ago • 3 comments

https://github.com/seriyps/rebar3_bench/blob/03bb6ece29adf7315e28757d5e17d61cd4c204b2/src/rebar3_bench_runner.erl#L129-L134

Should find if it's possible to somehow minimize the amount of work done by the loop.

seriyps avatar Sep 11 '19 13:09 seriyps

On OTP22 seems changing the order of arguments doesn't reduce amount of opcodes in the function, but changes opcodes itself and amount of arguments to those opcodes:

do_run_n(_, _, 0) ->
    ok;
do_run_n(St, F, N) ->
    F(St),
    do_run_n(St, F, N - 1).

{function, do_run_n, 3, 4}.
  {label,3}.
    {line,[{location,"main.erl",14}]}.
    {func_info,{atom,main},{atom,do_run_n},3}.
  {label,4}.
    {'%',{type_info,{x,2},number}}.
    {test,is_eq_exact,{f,5},[{x,2},{integer,0}]}.
    {move,{atom,ok},{x,0}}.
    return.
  {label,5}.
    {allocate,3,3}.
    {move,{x,2},{y,0}}.
    {move,{x,1},{y,1}}.
    {move,{x,0},{y,2}}.
    {move,{y,1},{x,1}}.
    {line,[{location,"main.erl",17}]}.
    {call_fun,1}.
    {line,[{location,"main.erl",18}]}.
    {gc_bif,'-',{f,0},0,[{y,0},{integer,1}],{x,2}}.
    {move,{y,1},{x,1}}.
    {move,{y,2},{x,0}}.
    {call_last,3,{f,4},3}.

00007F6EA37CB398: i_func_info_IaaI 0 main do_run_n 3 
00007F6EA37CB3C0: i_is_eq_exact_immed_fxc f(00007F6EA37CB3E8) x(2) 0 
00007F6EA37CB3D8: move_return_c ok 
00007F6EA37CB3E8: allocate_tt 3 3 
00007F6EA37CB3F8: move_window3_xxxy x(2) x(1) x(0) y(0) 
00007F6EA37CB408: move_yx y(1) x(1) 
00007F6EA37CB418: i_call_fun_t 1 
00007F6EA37CB428: i_increment_yWd y(0) -1 x(2) 
00007F6EA37CB448: move_src_window2_yxx y(1) x(1) x(0) 
00007F6EA37CB458: i_call_last_fQ main:do_run_n/3 3 

( 10 opcodes, `00007F6EA37CB408: move_yx y(1) x(1) ` touches 2 registers)
========================================


do_run_n(_, _, 0) ->
    ok;
do_run_n(F, St, N) ->
    F(St),
    do_run_n(F, St, N - 1).

{function, do_run_n, 3, 4}.
  {label,3}.
    {line,[{location,"main.erl",14}]}.
    {func_info,{atom,main},{atom,do_run_n},3}.
  {label,4}.
    {'%',{type_info,{x,2},number}}.
    {test,is_eq_exact,{f,5},[{x,2},{integer,0}]}.
    {move,{atom,ok},{x,0}}.
    return.
  {label,5}.
    {allocate,3,3}.
    {move,{x,2},{y,0}}.
    {move,{x,1},{y,1}}.
    {move,{x,0},{y,2}}.
    {move,{x,1},{x,0}}.
    {move,{y,2},{x,1}}.
    {line,[{location,"main.erl",17}]}.
    {call_fun,1}.
    {line,[{location,"main.erl",18}]}.
    {gc_bif,'-',{f,0},0,[{y,0},{integer,1}],{x,2}}.
    {move,{y,1},{x,1}}.
    {move,{y,2},{x,0}}.
    {call_last,3,{f,4},3}.

00007F64D740B3A0: i_func_info_IaaI 0 main do_run_n 3 
00007F64D740B3C8: i_is_eq_exact_immed_fxc f(00007F64D740B3F0) x(2) 0 
00007F64D740B3E0: move_return_c ok 
00007F64D740B3F0: allocate_tt 3 3 
00007F64D740B400: move_window3_xxxy x(2) x(1) x(0) y(0) 
00007F64D740B410: move_shift_yxx y(2) x(1) x(0) 
00007F64D740B420: i_call_fun_t 1 
00007F64D740B430: i_increment_yWd y(0) -1 x(2) 
00007F64D740B450: move_src_window2_yxx y(1) x(1) x(0) 
00007F64D740B460: i_call_last_fQ main:do_run_n/3 3 

( 10 opcodes, `00007F64D740B410: move_shift_yxx y(2) x(1) x(0) ` touches 3 registers)
==========================================

do_run_n(0, _, _) ->
    ok;
do_run_n(N, F, St) ->
    F(St),
    do_run_n(N - 1, F, St).

00007F62A078B3A0: i_func_info_IaaI 0 main do_run_n 3 
00007F62A078B3C8: i_is_eq_exact_immed_frc f(00007F62A078B3F0) r(0) 0 
00007F62A078B3E0: move_return_c ok 
00007F62A078B3F0: allocate_tt 3 3 
00007F62A078B400: move_window3_xxxy x(2) x(1) x(0) y(0) 
00007F62A078B410: move2_par_yxxx y(1) x(1) x(2) x(0) 
00007F62A078B420: i_call_fun_t 1 
00007F62A078B430: i_increment_yWd y(2) -1 x(0) 
00007F62A078B450: move_src_window2_yxx y(0) x(2) x(1) 
00007F62A078B460: i_call_last_fQ main:do_run_n/3 3 

( 10 opcodes, `00007F62A078B410: move2_par_yxxx y(1) x(1) x(2) x(0) ` touches 4 registers)

So, the current order of arguments is the optimal one

seriyps avatar Sep 11 '19 13:09 seriyps

It might be problematic that less possible clause N == 0 -> ok is executed first. Would be nice to make main body execute first and jump to -> ok if guard fails.

do_run_n(St, F, N) when N =/= 0 ->
    F(St),
    do_run_n(St, F, N - 1);
do_run_n(_, _, 0) ->
    ok.

{function, do_run_n, 3, 4}.
  {label,3}.
    {line,[{location,"main.erl",14}]}.
    {func_info,{atom,main},{atom,do_run_n},3}.
  {label,4}.
    {'%',{type_info,{x,2},number}}.
    {test,is_eq_exact,{f,5},[{x,2},{integer,0}]}.
    {move,{atom,ok},{x,0}}.
    return.
  {label,5}.
    {allocate,3,3}.
    {move,{x,2},{y,0}}.
    {move,{x,1},{y,1}}.
    {move,{x,0},{y,2}}.
    {move,{y,1},{x,1}}.
    {line,[{location,"main.erl",15}]}.
    {call_fun,1}.
    {line,[{location,"main.erl",16}]}.
    {gc_bif,'-',{f,0},0,[{y,0},{integer,1}],{x,2}}.
    {move,{y,1},{x,1}}.
    {move,{y,2},{x,0}}.
    {call_last,3,{f,4},3}.

00007FBF13D0B398: i_func_info_IaaI 0 main do_run_n 3 
00007FBF13D0B3C0: i_is_eq_exact_immed_fxc f(00007FBF13D0B3E8) x(2) 0 
00007FBF13D0B3D8: move_return_c ok 
00007FBF13D0B3E8: allocate_tt 3 3 
00007FBF13D0B3F8: move_window3_xxxy x(2) x(1) x(0) y(0) 
00007FBF13D0B408: move_yx y(1) x(1) 
00007FBF13D0B418: i_call_fun_t 1 
00007FBF13D0B428: i_increment_yWd y(0) -1 x(2) 
00007FBF13D0B448: move_src_window2_yxx y(1) x(1) x(0) 
00007FBF13D0B458: i_call_last_fQ main:do_run_n/3 3 

So, putting "main" body as 1st clause with N =/= 0 doesn't make any difference on OTP-22. Compiler converts it to the same binary code as original.

do_run_n(St, F, N) when N > 0 ->
    F(St),
    do_run_n(St, F, N - 1);
do_run_n(_, _, N) ->
    ok.

{function, do_run_n, 3, 4}.
  {label,3}.
    {line,[{location,"main.erl",14}]}.
    {func_info,{atom,main},{atom,do_run_n},3}.
  {label,4}.
    {test,is_lt,{f,5},[{integer,0},{x,2}]}.
    {allocate,3,3}.
    {move,{x,2},{y,0}}.
    {move,{x,1},{y,1}}.
    {move,{x,0},{y,2}}.
    {move,{y,1},{x,1}}.
    {line,[{location,"main.erl",15}]}.
    {call_fun,1}.
    {line,[{location,"main.erl",16}]}.
    {gc_bif,'-',{f,0},0,[{y,0},{integer,1}],{x,2}}.
    {move,{y,1},{x,1}}.
    {move,{y,2},{x,0}}.
    {call_last,3,{f,4},3}.
  {label,5}.
    {move,{atom,ok},{x,0}}.
    return.

00007F6573B0B3D0: i_func_info_IaaI 0 main do_run_n 3 
00007F6573B0B3F8: is_lt_fcx f(00007F6573B0B490) 0 x(2) 
00007F6573B0B410: allocate_tt 3 3 
00007F6573B0B420: move_window3_xxxy x(2) x(1) x(0) y(0) 
00007F6573B0B430: move_yx y(1) x(1) 
00007F6573B0B440: i_call_fun_t 1 
00007F6573B0B450: i_increment_yWd y(0) -1 x(2) 
00007F6573B0B470: move_src_window2_yxx y(1) x(1) x(0) 
00007F6573B0B480: i_call_last_fQ main:do_run_n/3 3 
00007F6573B0B490: move_return_c ok 

http://tryerl.seriyps.ru/#id=78b4

So, if we put N > 0 as 1st clause, it will be executed first. But I don't know if is_lt_fcx (<) is noticeably slower than i_is_eq_exact_immed_frc (=:=)

seriyps avatar Sep 11 '19 13:09 seriyps

Last option to consider is to run F multiple times on each iteration loop (N should be div 5 from original N):

do_run_n(St, F, N) when N =/= 0 ->
    F(St),
    F(St),
    F(St),
    F(St),
    F(St),
    do_run_n(St, F, N - 1);
do_run_n(_, _, 0) ->
    ok.

00007FEA7A30D968: i_func_info_IaaI 0 main do_run_n 3 
00007FEA7A30D990: i_is_ne_exact_immed_fxc f(00007FEA7A30DB00) x(2) 0 
00007FEA7A30D9B0: allocate_tt 3 3 
00007FEA7A30D9C0: move2_xyxy x(2) y(0) x(1) y(1) 
00007FEA7A30D9D0: move_ry x(0) y(2) 
00007FEA7A30D9E0: i_call_fun_I 1 
00007FEA7A30D9F0: move_yx y(1) x(1) 
00007FEA7A30DA00: move_yr y(2) x(0) 
00007FEA7A30DA10: i_call_fun_I 1 
00007FEA7A30DA20: move_yx y(1) x(1) 
00007FEA7A30DA30: move_yr y(2) x(0) 
00007FEA7A30DA40: i_call_fun_I 1 
00007FEA7A30DA50: move_yx y(1) x(1) 
00007FEA7A30DA60: move_yr y(2) x(0) 
00007FEA7A30DA70: i_call_fun_I 1 
00007FEA7A30DA80: move_yx y(1) x(1) 
00007FEA7A30DA90: move_yr y(2) x(0) 
00007FEA7A30DAA0: i_call_fun_I 1 
00007FEA7A30DAB0: i_increment_yIId y(0) -1 0 x(2) 
00007FEA7A30DAD8: move_yx y(1) x(1) 
00007FEA7A30DAE8: move_call_last_yrfQ y(2) x(0) main:do_run_n/3 3 
00007FEA7A30DB00: move_return_cr ok x(0) 

It should be quite good for small fast functions, but might be problematic for slow ones

seriyps avatar Sep 11 '19 14:09 seriyps