ocaml-h2 icon indicating copy to clipboard operation
ocaml-h2 copied to clipboard

Connection closed by server does not call error handler in client

Open ansiwen opened this issue 1 year ago • 1 comments

If the server closes a connection, there seems to be no way for the client to notice that (besides keepalive timeouts). I would expect that the error handler for the connection-level errors is called, or at least the connection changes into closed-state and all pending promises on that connection being canceled, which all apparently doesn't happen.

Here the code that I used to verify the behaviour outside of my application:

open Lwt.Infix

let error_to_string e =
  match e with
  | `Exn e -> Printexc.to_string e
  | `Malformed_response s -> Format.sprintf "malformed response: %s" s
  | `Invalid_response_body_length r ->
      Format.asprintf "invalid response body length: %a" H2.Response.pp_hum r
  | `Protocol_error (c, s) ->
      Format.asprintf "protocol error %a: %s" H2.Error_code.pp_hum c s

let create_connection ~host ~port =
  print_endline "create_connection";
  Lwt_unix.getaddrinfo host (string_of_int port) [] >>= fun addresses ->
  let socket = Lwt_unix.socket Unix.PF_INET Unix.SOCK_STREAM 0 in
  let error_handler e =
    let msg = error_to_string e in
    print_endline "connection error";
    print_endline msg
  in
  match addresses with
  | { Unix.ai_addr; _ } :: _ ->
      Lwt_unix.connect socket ai_addr >>= fun () ->
      print_endline "TCP connected";
      let conn = H2_lwt_unix.Client.create_connection ~error_handler socket in
      print_endline "H2 connected";
      conn
  | _ -> assert false

let () =
  let host, port =
    try (Sys.argv.(1), int_of_string Sys.argv.(2))
    with Invalid_argument _ ->
      Printf.eprintf "Usage: %s ETCD_HOST ETCD_PORT\n" Sys.argv.(0);
      exit 1
  in
  let main =
    create_connection ~host ~port >>= fun connection ->
    let rec loop () =
      let promise, resolver = Lwt.task () in
      H2_lwt_unix.Client.ping connection (fun () ->
          Lwt.wakeup_later resolver ());
      print_endline "ping";
      promise >>= fun () ->
      print_endline "pong";
      Lwt_unix.sleep 1.0 >>= loop
    in
    loop ()
  in
  Lwt_main.run main

ansiwen avatar Jul 27 '22 18:07 ansiwen

I can confirm that this issue exists with paf-le-chien which is a file-descriptor leak (see this issue on our opam-mirror unikernel: https://git.robur.io/robur/opam-mirror/issues/5).

I first thought about the need for a Unix.shutdown_send which could close the connection and signal the server to finally emit a final packet so that the reading loop would also close properly, but I'm not too sure anymore - other limitations within MirageOS don't allow us to experiment with this solution yet.

dinosaure avatar Sep 05 '22 07:09 dinosaure