lwt icon indicating copy to clipboard operation
lwt copied to clipboard

allow multiple schdulers to run in parallel

Open raphael-proust opened this issue 6 months ago • 1 comments

in separate domains


to test this PR:

  • set up ocaml.5.3.0 work environment
  • dune exec test/multidomain/basic.exe

I'm going to self-review, tweak what I can, and then ask for feedback and such

raphael-proust avatar Jun 06 '25 12:06 raphael-proust

Baisc benchmarks look promising
I used httpaf's benchmarks

vanilla lwt, vanilla httpaf
$ wrk -t8 -c64 http://localhost:8080/

Running 10s test @ http://localhost:8080/
  8 threads and 64 connections
  Thread Stats   Avg      Stdev     Max   +/- Stdev
    Latency   433.70us  433.98us  18.69ms   99.13%
    Req/Sec    19.57k     1.73k   25.66k    89.95%
  1569405 requests in 10.10s, 3.06GB read
Requests/sec: 155397.76
Transfer/sec:    310.33MB

------------

multidomain lwt, vanilla httpaf
$ wrk -t8 -c64 http://localhost:8080/

Running 10s test @ http://localhost:8080/
  8 threads and 64 connections
  Thread Stats   Avg      Stdev     Max   +/- Stdev
    Latency   419.01us   65.43us   2.64ms   90.19%
    Req/Sec    19.17k   770.33    23.90k    76.30%
  1537064 requests in 10.10s, 3.00GB read
Requests/sec: 152195.99
Transfer/sec:    303.93MB

----------------

multidomain lwt, one accept-loop domain and one http-handler domain
$ wrk -t8 -c64 http://localhost:8080/

Running 10s test @ http://localhost:8080/
  8 threads and 64 connections
  Thread Stats   Avg      Stdev     Max   +/- Stdev
    Latency   419.32us   61.07us   2.32ms   86.11%
    Req/Sec    19.14k     0.90k   24.07k    75.06%
  1535261 requests in 10.10s, 2.99GB read
Requests/sec: 152021.18
Transfer/sec:    303.59MB

-------------------------

multidomain lwt, one accept-loop domain and (Domain.recommended_domain_count () - 1) http-handler domains
$ wrk -t8 -c64 http://localhost:8080/

Running 10s test @ http://localhost:8080/
  8 threads and 64 connections
  Thread Stats   Avg      Stdev     Max   +/- Stdev
    Latency   325.46us  667.07us  11.73ms   90.79%
    Req/Sec    48.68k     9.09k   76.94k    68.00%
  3876432 requests in 10.02s, 7.56GB read
Requests/sec: 386990.93
Transfer/sec:    772.82MB

vanilla means unchanged, as released
multidomain lwt means from this here branch
the recommended number of domains on my machine is 8
the work distribution onto http handlers is very naïve:

let spawn_domain_worker ~request_handler ~error_handler () =
  let recv_task, send_task = Lwt_stream.create () in (* stream to push work in *)
  let _ =
    Domain.spawn (fun () ->
      Lwt_unix.init_domain ();
      let handler = Server.create_connection_handler ~request_handler ~error_handler in (* each domain gets one handler *)
      Lwt_main.run (
        let* () = Lwt.pause () in
        Lwt_stream.iter_p (fun (client_addr, socket, r) -> (* iter_p: no limit on concurrency within a domain *)
          let* () = handler client_addr socket in
          Lwt.wakeup r ();
          Lwt.pause ())
        recv_task
      )
    )
  in
  send_task

let create_connection_handler ~request_handler ~error_handler =
  let workers = Stdlib.Array.init (Domain.recommended_domain_count () - 1) (fun _ -> spawn_domain_worker ~request_handler ~error_handler ()) in (* replace the number of domains by `1` for single worker domain *)
  let worker = ref (-1) in
  let work client_addr socket =
    worker := (!worker + 1) mod (Array.length workers); (* just distribute the work fairly *)
    let p, r = Lwt.task () in
    workers.(!worker) (Some (client_addr, socket, r));
    p
  in
  work

raphael-proust avatar Jul 04 '25 11:07 raphael-proust