parallel-programming-in-multicore-ocaml icon indicating copy to clipboard operation
parallel-programming-in-multicore-ocaml copied to clipboard

Clarification on parallel_scan

Open anishcr opened this issue 11 months ago • 0 comments

Was curious to know more about the parallel_scan API as it is mentioned.

Task API has a primitive parallel_scan which might come in handy in scenarios like this.

I created an implementation of sequential scan (scan.ml) and tried to benchmark with parallel version (scan_par.ml).

Benchmark 1: dune exec ./scan_par.exe 12 10000
Time (mean ± σ):      26.1 ms ±   5.4 ms    [User: 18.9 ms, System: 17.4 ms]
Range (min … max):    24.6 ms …  61.7 ms    46 runs
Benchmark 1: dune exec ./scan.exe 10000
Time (mean ± σ):      26.1 ms ±   8.0 ms    [User: 13.5 ms, System: 11.8 ms]
Range (min … max):    21.9 ms …  65.5 ms    48 runs

from the benchmark results it appears that we are not gaining from parallel version of scan.

What am I missing (or doing wrong)?

File scan.ml

let n = try int_of_string Sys.argv.(1) with _ -> 1000

let scan op elements =
  let n = Array.length elements in
  let prefix_s = Array.copy elements in
  for i = 1 to (n - 1) do
    prefix_s.(i) <- op prefix_s.(i - 1) elements.(i)
  done;
  prefix_s

let main () =
  let xs = Array.init n (fun x -> x + 1) in
  let res = scan (+) xs in
  assert(res.(n - 1) = (n * (n + 1) / 2))

let _ = main ()

File scan_par.ml

module T = Domainslib.Task

let num_domains = try int_of_string Sys.argv.(1) with _ -> 12
let n           = try int_of_string Sys.argv.(2) with _ -> 1000

let main () =
  let xs = Array.init n (fun x -> x + 1) in
  let pool = T.setup_pool ~num_domains:(num_domains - 1) () in
  let res = T.run pool (fun _ -> T.parallel_scan pool (+) xs) in
  T.teardown_pool pool;
  assert(res.(n - 1) = (n * (n + 1) / 2))

let _ = main ()

anishcr avatar Mar 07 '24 07:03 anishcr