eio icon indicating copy to clipboard operation
eio copied to clipboard

"Interrupted system call" when writing in parallel to stdout

Open wildfield opened this issue 1 year ago • 5 comments

Hey guys, I've been playing around with Eio and I'm getting this error

Fatal error: exception Eio.Io Unix_error (Interrupted system call, "writev", "")

Here's the reproduction program. It takes a filename argument and reads the same file in parallel while echoing it to stdout. A ~10KB input file would run into this issue.

let ( / ) = Eio.Path.( / )

let read_file file out : unit =
  Eio.Switch.run (fun sw ->
    let file = Eio.Path.open_in ~sw file in
    let reader = Eio.Buf_read.of_flow ~initial_size:1 ~max_size:10000 file in
    Eio.Buf_write.with_flow ~initial_size:10000 out @@ fun writer -> (
      let rec inner () : unit =
        let line = try Some(Eio.Buf_read.line reader) with End_of_file -> None in
        match line with 
        | None -> ()
        | Some line -> (
          Eio.Buf_write.string writer (Printf.sprintf "%s\n" line);
          inner ()
        )
      in
      inner ()
    )
  )

let main env =
  if Array.length Sys.argv < 2 then
    Eio.Flow.copy_string "Expected file name\n" (Eio.Stdenv.stderr env)
  else
    let domain_mgr = (Eio.Stdenv.domain_mgr env) in
    let rec workers (n: int) (acc: (unit -> unit) list) : (unit -> unit) list =
      if n <= 0 then
        acc
      else
        let work = fun () -> read_file (Eio.Stdenv.cwd env / (Sys.argv.(1))) (Eio.Stdenv.stdout env) in
        let spawn = fun () -> Eio.Domain_manager.run domain_mgr work in
        workers (n - 1) (spawn :: acc)
    in
    let worker_list = workers 5 [] in
    ignore (Eio.Fiber.all worker_list)

let () =
  Eio_main.run @@ fun env ->
    main env

My system is Arch Linux with 6.12.1-arch1-1 kernel. My eio version is 1.2

I would personally expect a jumbled output on stdout, but an exception is somewhat unexpected

wildfield avatar Dec 06 '24 06:12 wildfield

That is surprising. It works on my machine. And it doesn't call writev at all (according to strace).

Do you have uring blocked somehow? Maybe you're running in a Docker container and it's using eio_posix. That is supposed to retry on EINTR though:

https://github.com/ocaml-multicore/eio/blob/f195295e3dc16a9fd5465a37243e434c1676e773/lib_eio_posix/low_level.ml#L149-L152

https://github.com/ocaml-multicore/eio/blob/f195295e3dc16a9fd5465a37243e434c1676e773/lib_eio_posix/low_level.ml#L35-L37

talex5 avatar Dec 06 '24 09:12 talex5

Actually, I guess this could happen if the uring writev job returned EINTR. Though I don't see why it would do that.

talex5 avatar Dec 06 '24 09:12 talex5

Does it happen with fewer domains?

Assuming it is uring (strace will say), you should be able to trace the completions with a trace.bt like this:

BEGIN {
  @op[IORING_OP_NOP] = "NOP";
  @op[IORING_OP_READV] = "READV";
  @op[IORING_OP_WRITEV] = "WRITEV";
  @op[IORING_OP_FSYNC] = "FSYNC";
  @op[IORING_OP_READ_FIXED] = "READ_FIXED";
  @op[IORING_OP_WRITE_FIXED] = "WRITE_FIXED";
  @op[IORING_OP_POLL_ADD] = "POLL_ADD";
  @op[IORING_OP_POLL_REMOVE] = "POLL_REMOVE";
  @op[IORING_OP_SYNC_FILE_RANGE] = "SYNC_FILE_RANGE";
  @op[IORING_OP_SENDMSG] = "SENDMSG";
  @op[IORING_OP_RECVMSG] = "RECVMSG";
  @op[IORING_OP_TIMEOUT] = "TIMEOUT";
  @op[IORING_OP_TIMEOUT_REMOVE] = "TIMEOUT_REMOVE";
  @op[IORING_OP_ACCEPT] = "ACCEPT";
  @op[IORING_OP_ASYNC_CANCEL] = "ASYNC_CANCEL";
  @op[IORING_OP_LINK_TIMEOUT] = "LINK_TIMEOUT";
  @op[IORING_OP_CONNECT] = "CONNECT";
  @op[IORING_OP_FALLOCATE] = "FALLOCATE";
  @op[IORING_OP_OPENAT] = "OPENAT";
  @op[IORING_OP_CLOSE] = "CLOSE";
  @op[IORING_OP_FILES_UPDATE] = "FILES_UPDATE";
  @op[IORING_OP_STATX] = "STATX";
  @op[IORING_OP_READ] = "READ";
  @op[IORING_OP_WRITE] = "WRITE";
  @op[IORING_OP_FADVISE] = "FADVISE";
  @op[IORING_OP_MADVISE] = "MADVISE";
  @op[IORING_OP_SEND] = "SEND";
  @op[IORING_OP_RECV] = "RECV";
  @op[IORING_OP_OPENAT2] = "OPENAT2";
  @op[IORING_OP_EPOLL_CTL] = "EPOLL_CTL";
  @op[IORING_OP_SPLICE] = "SPLICE";
  @op[IORING_OP_PROVIDE_BUFFERS] = "PROVIDE_BUFFERS";
  @op[IORING_OP_REMOVE_BUFFERS] = "REMOVE_BUFFERS";
  @op[IORING_OP_TEE] = "TEE";
  @op[IORING_OP_SHUTDOWN] = "SHUTDOWN";
  @op[IORING_OP_RENAMEAT] = "RENAMEAT";
  @op[IORING_OP_UNLINKAT] = "UNLINKAT";
  @op[IORING_OP_MKDIRAT] = "MKDIRAT";
  @op[IORING_OP_SYMLINKAT] = "SYMLINKAT";
  @op[IORING_OP_LINKAT] = "LINKAT";
  @op[IORING_OP_MSG_RING] = "MSG_RING";
  @op[IORING_OP_FSETXATTR] = "FSETXATTR";
  @op[IORING_OP_SETXATTR] = "SETXATTR";
  @op[IORING_OP_FGETXATTR] = "FGETXATTR";
  @op[IORING_OP_GETXATTR] = "GETXATTR";
  @op[IORING_OP_SOCKET] = "SOCKET";
  @op[IORING_OP_URING_CMD] = "URING_CMD";
  @op[IORING_OP_SEND_ZC] = "SEND_ZC";
  @op[IORING_OP_SENDMSG_ZC] = "SENDMSG_ZC";
}

tracepoint:io_uring:io_uring_complete {
  $req = (struct io_kiocb *) args->req;
  printf("%dms: %s: %s %d\n",
    elapsed / 1e6,
    comm,
    @op[$req->opcode],
    args->res);
}

END {
  clear(@op);
}

I get (using 1 domain and a small file to get shorter output):

$ sudo bpftrace trace.bt
Attaching 3 probes...
975ms: main.exe: OPENAT2 7
975ms: main.exe: READV 1
975ms: main.exe: READV 1
975ms: main.exe: READV 2
975ms: main.exe: READV 4
975ms: main.exe: READV 8
975ms: main.exe: READV 12
976ms: main.exe: READV 14
976ms: main.exe: READV 8
976ms: main.exe: READV 0
976ms: iou-wrk-291515: WRITEV 12
976ms: iou-wrk-291515: WRITEV 38
976ms: main.exe: ASYNC_CANCEL 0
976ms: main.exe: READV -125
976ms: main.exe: READV 8
976ms: main.exe: ASYNC_CANCEL 0
976ms: main.exe: READV -125

(the large number of small reads is because you used ~initial_size:1, so it starts with a very small buffer and never reads more than the longest line at a time)

talex5 avatar Dec 06 '24 10:12 talex5

Thanks for looking into this @talex5

Do you have uring blocked somehow? Maybe you're running in a Docker container and it's using eio_posix

No, this is a regular desktop environment. No docker or anything like that

Here's the trace

Attaching 3 probes...
6175ms: main.exe: OPENAT2 8
6175ms: main.exe: READV 1
6175ms: main.exe: READV 1
6175ms: main.exe: READV 2
6175ms: main.exe: READV 4
6175ms: main.exe: READV 6
6175ms: main.exe: OPENAT2 10
6175ms: main.exe: WRITEV 6
6175ms: main.exe: READV 6
6175ms: main.exe: READV 1
6175ms: main.exe: WRITEV 6
6175ms: main.exe: READV 6
6175ms: main.exe: READV 1
6175ms: main.exe: WRITEV 6
6175ms: main.exe: READV 6
6175ms: main.exe: READV 2
6175ms: main.exe: READV 4
6175ms: main.exe: WRITEV 6
6175ms: main.exe: READV 6
6175ms: main.exe: READV 6
6175ms: main.exe: WRITEV 6
6175ms: main.exe: READV 6
6175ms: main.exe: WRITEV 6
6175ms: main.exe: READV 6
6175ms: main.exe: WRITEV 6
6175ms: main.exe: READV 6
6175ms: main.exe: WRITEV 6
6175ms: main.exe: READV 6
6175ms: main.exe: WRITEV 6
6175ms: main.exe: READV 6
6175ms: main.exe: WRITEV 6
6175ms: main.exe: READV 6
6175ms: main.exe: WRITEV 6
6175ms: main.exe: READV 6
6175ms: main.exe: WRITEV 6
6175ms: main.exe: READV 6
6175ms: main.exe: WRITEV 6
6175ms: main.exe: READV 6
6175ms: main.exe: WRITEV 6
6175ms: main.exe: READV 7
6175ms: main.exe: WRITEV 6
6175ms: main.exe: READV 6
6175ms: main.exe: WRITEV 7
6175ms: main.exe: READV 6
6175ms: main.exe: WRITEV 6
6175ms: main.exe: READV 6
6175ms: main.exe: READV 6
6175ms: main.exe: WRITEV 6
6175ms: main.exe: READV 6
6175ms: main.exe: WRITEV 6
6175ms: main.exe: READV 6
6175ms: main.exe: READV 7
6175ms: main.exe: WRITEV 12
6175ms: main.exe: WRITEV 13
6175ms: main.exe: READV 6
6175ms: main.exe: WRITEV 6
6175ms: main.exe: READV 6
6175ms: main.exe: WRITEV 6
6175ms: main.exe: READV 6
6175ms: main.exe: WRITEV 6
6175ms: main.exe: READV 6
6175ms: main.exe: WRITEV 6
6175ms: main.exe: READV 6
6175ms: main.exe: WRITEV 6
6175ms: main.exe: READV 6
6175ms: main.exe: WRITEV 6
6175ms: main.exe: READV 6
6175ms: main.exe: WRITEV 6
6175ms: main.exe: READV 6
6175ms: main.exe: WRITEV 6
6175ms: main.exe: READV 6
6175ms: main.exe: WRITEV 6
6175ms: main.exe: READV 6
6175ms: main.exe: WRITEV 6
6175ms: main.exe: READV 6
6175ms: main.exe: WRITEV 6
6175ms: main.exe: READV 6
6175ms: main.exe: WRITEV 6
6175ms: main.exe: READV 6
6175ms: main.exe: WRITEV 6
6175ms: main.exe: READV 7
6175ms: main.exe: WRITEV 7
6175ms: main.exe: READV 6
6175ms: main.exe: WRITEV 6
6175ms: main.exe: READV 6
6175ms: main.exe: WRITEV 6
6175ms: main.exe: READV 6
6175ms: main.exe: WRITEV 6
6175ms: main.exe: READV 6
6175ms: main.exe: WRITEV 6
6175ms: main.exe: READV 7
6175ms: main.exe: WRITEV 7
6175ms: main.exe: READV 6
6175ms: main.exe: WRITEV 6
6175ms: main.exe: READV 6
6175ms: main.exe: READV 6
6175ms: main.exe: WRITEV 6
6175ms: main.exe: WRITEV 6
6175ms: main.exe: READV 6
6175ms: main.exe: WRITEV 6
6175ms: main.exe: READV 6
6175ms: main.exe: WRITEV 6
6175ms: main.exe: READV 6
6175ms: main.exe: WRITEV 6
6175ms: main.exe: READV 6
6175ms: main.exe: WRITEV 6
6176ms: main.exe: READV 6
6176ms: main.exe: WRITEV 6
6176ms: main.exe: READV 6
6176ms: main.exe: WRITEV 6
6176ms: main.exe: READV 6
6176ms: main.exe: WRITEV 6
6176ms: main.exe: READV 6
6176ms: main.exe: READV 6
6176ms: main.exe: WRITEV 6
6176ms: main.exe: WRITEV 6
6176ms: main.exe: READV 6
6176ms: main.exe: WRITEV 6
6176ms: main.exe: READV 6
6176ms: main.exe: WRITEV 6
6176ms: main.exe: READV 7
6176ms: main.exe: WRITEV 7
6176ms: main.exe: READV 6
6176ms: main.exe: WRITEV 6
6176ms: main.exe: READV 6
6176ms: main.exe: WRITEV 6
6176ms: main.exe: READV 6
6176ms: main.exe: WRITEV 6
6176ms: main.exe: READV 6
6176ms: main.exe: WRITEV 6
6176ms: main.exe: READV 7
6176ms: main.exe: WRITEV 7
6176ms: main.exe: READV 6
6176ms: main.exe: WRITEV 6
6176ms: main.exe: READV 6
6176ms: main.exe: WRITEV 6
6176ms: main.exe: READV 6
6176ms: main.exe: WRITEV 6
6176ms: main.exe: READV 6
6176ms: main.exe: WRITEV 6
6176ms: main.exe: READV 6
6176ms: main.exe: READV 6
6176ms: main.exe: WRITEV 6
6176ms: main.exe: WRITEV 6
6176ms: main.exe: READV 6
6176ms: main.exe: WRITEV 6
6176ms: main.exe: READV 6
6176ms: main.exe: WRITEV 6
6176ms: main.exe: READV 6
6176ms: main.exe: WRITEV 6
6176ms: main.exe: READV 6
6176ms: main.exe: WRITEV 6
6176ms: main.exe: READV 6
6176ms: main.exe: READV 6
6176ms: main.exe: WRITEV 6
6176ms: main.exe: WRITEV 6
6176ms: main.exe: READV 6
6176ms: main.exe: WRITEV 6
6176ms: main.exe: READV 6
6176ms: main.exe: READV 7
6176ms: main.exe: WRITEV 6
6176ms: main.exe: WRITEV 7
6176ms: main.exe: READV 6
6176ms: main.exe: WRITEV 6
6176ms: main.exe: READV 6
6176ms: main.exe: WRITEV 6
6176ms: main.exe: READV 6
6176ms: main.exe: WRITEV 6
6176ms: main.exe: READV 6
6176ms: main.exe: WRITEV 6
6176ms: main.exe: READV 6
6176ms: main.exe: READV 7
6176ms: main.exe: WRITEV 7
6176ms: main.exe: WRITEV 6
6176ms: main.exe: READV 6
6176ms: main.exe: WRITEV 6
6176ms: main.exe: READV 6
6176ms: main.exe: WRITEV 6
6176ms: main.exe: READV 6
6176ms: main.exe: WRITEV 6
6176ms: main.exe: READV 6
6176ms: main.exe: WRITEV 6
6176ms: main.exe: READV 6
6176ms: main.exe: WRITEV 6
6176ms: main.exe: READV 6
6176ms: main.exe: WRITEV 6
6176ms: main.exe: READV 6
6176ms: main.exe: WRITEV 6
6176ms: main.exe: READV 6
6176ms: main.exe: READV 6
6176ms: main.exe: WRITEV 6
6176ms: main.exe: WRITEV 6
6176ms: main.exe: READV 6
6176ms: main.exe: WRITEV 6
6176ms: main.exe: READV 6
6176ms: main.exe: WRITEV 6
6176ms: main.exe: READV 6
6176ms: main.exe: WRITEV 6
6176ms: main.exe: READV 6
6176ms: main.exe: WRITEV 6
6176ms: main.exe: READV 7
6176ms: main.exe: WRITEV 7
6176ms: main.exe: READV 6
6176ms: main.exe: WRITEV 6
6176ms: main.exe: READV 6
6176ms: main.exe: WRITEV 6
6176ms: main.exe: READV 6
6176ms: main.exe: WRITEV 6
6176ms: main.exe: READV 6
6176ms: main.exe: WRITEV 6
6176ms: main.exe: READV 7
6176ms: main.exe: WRITEV 7
6176ms: main.exe: READV 6
6176ms: main.exe: WRITEV 6
6176ms: main.exe: READV 6
6176ms: main.exe: WRITEV 6
6176ms: main.exe: READV 6
6176ms: main.exe: WRITEV 6
6176ms: main.exe: READV 6
6176ms: main.exe: WRITEV 6
6176ms: main.exe: READV 6
6176ms: main.exe: READV 6
6176ms: main.exe: WRITEV 6
6176ms: main.exe: WRITEV 6
6176ms: main.exe: READV 6
6176ms: main.exe: READV 6
6176ms: main.exe: WRITEV 6
6176ms: main.exe: WRITEV 6
6176ms: main.exe: READV 6
6176ms: main.exe: READV 6
6176ms: main.exe: WRITEV 6
6176ms: main.exe: WRITEV 6
6176ms: main.exe: READV 6
6176ms: main.exe: WRITEV 6
6176ms: main.exe: READV 6
6176ms: main.exe: WRITEV 6
6176ms: main.exe: READV 6
6176ms: main.exe: WRITEV 6
6176ms: main.exe: READV 6
6176ms: main.exe: WRITEV 6
6176ms: main.exe: READV 6
6176ms: main.exe: READV 6
6176ms: main.exe: WRITEV 6
6176ms: main.exe: WRITEV 6
6176ms: main.exe: READV 7
6176ms: main.exe: READV 7
6176ms: main.exe: WRITEV 7
6176ms: main.exe: WRITEV 7
6176ms: main.exe: READV 6
6176ms: main.exe: WRITEV 6
6176ms: main.exe: READV 6
6176ms: main.exe: WRITEV 6
6176ms: main.exe: READV 6
6176ms: main.exe: WRITEV 6
6176ms: main.exe: READV 6
6176ms: main.exe: WRITEV 6
6176ms: main.exe: READV 6
6176ms: main.exe: READV 6
6176ms: main.exe: WRITEV 6
6176ms: main.exe: WRITEV 6
6176ms: main.exe: OPENAT2 14
6176ms: main.exe: READV 6
6176ms: main.exe: WRITEV 6
6176ms: main.exe: READV 6
6176ms: main.exe: WRITEV 6
6176ms: main.exe: READV 6
6176ms: main.exe: WRITEV 6
6176ms: main.exe: READV 6
6176ms: main.exe: WRITEV 6
6176ms: main.exe: READV 6
6176ms: main.exe: READV 6
6176ms: main.exe: WRITEV 6
6176ms: main.exe: WRITEV 6
6176ms: main.exe: READV 6
6176ms: main.exe: WRITEV 6
6176ms: main.exe: READV 1
6176ms: main.exe: READV 6
6176ms: main.exe: READV 6
6176ms: main.exe: WRITEV 6
6176ms: main.exe: READV 1
6176ms: main.exe: WRITEV 6
6176ms: main.exe: READV 2
6176ms: main.exe: READV 6
6176ms: main.exe: READV 6
6176ms: main.exe: READV 4
6176ms: main.exe: WRITEV 6
6176ms: main.exe: WRITEV 6
6176ms: main.exe: READV 7
6176ms: main.exe: READV 6
6176ms: main.exe: WRITEV 7
6176ms: main.exe: READV 6
6176ms: main.exe: WRITEV 6
6176ms: main.exe: READV 6
6176ms: main.exe: WRITEV 6
6176ms: main.exe: READV 6
6176ms: main.exe: READV 7
6176ms: main.exe: WRITEV 6
6176ms: main.exe: READV 6
6176ms: main.exe: WRITEV 7
6176ms: main.exe: WRITEV -4
6176ms: main.exe: WRITEV 6
6176ms: main.exe: READV 6
6176ms: main.exe: READV 6
6176ms: main.exe: READV 6
6176ms: main.exe: WRITEV 6
6176ms: main.exe: ASYNC_CANCEL -2
6176ms: main.exe: READV 6
6176ms: main.exe: READV 6
6176ms: main.exe: WRITEV 6
6176ms: main.exe: WRITEV 6
6176ms: main.exe: ASYNC_CANCEL 0
6176ms: main.exe: READV -125
6176ms: main.exe: READV 6
6176ms: main.exe: WRITEV 12
6176ms: main.exe: READV 6
6176ms: main.exe: WRITEV 6
6176ms: main.exe: READV 6
6176ms: main.exe: WRITEV 6
6176ms: main.exe: READV 6
6176ms: main.exe: WRITEV 6
6176ms: main.exe: READV 6
6176ms: main.exe: WRITEV 6
6176ms: main.exe: READV 6
6176ms: main.exe: WRITEV 6
6176ms: main.exe: READV 6
6176ms: main.exe: READV 8
6176ms: main.exe: WRITEV 6
6176ms: main.exe: OPENAT2 6
6176ms: main.exe: READV 6
6176ms: main.exe: WRITEV 6
6176ms: main.exe: READV 6
6176ms: main.exe: WRITEV 6
6176ms: main.exe: READV 6
6176ms: main.exe: WRITEV 6
6176ms: main.exe: READV 6
6176ms: main.exe: WRITEV 6
6176ms: main.exe: READV 7
6176ms: main.exe: WRITEV 7
6176ms: main.exe: READV 6
6176ms: main.exe: WRITEV 6
6176ms: main.exe: READV 6
6176ms: main.exe: WRITEV 6
6176ms: main.exe: READV 6
6176ms: main.exe: WRITEV 6
6176ms: main.exe: READV 6
6176ms: main.exe: WRITEV 6
6176ms: main.exe: READV 6
6176ms: main.exe: WRITEV 6
6176ms: main.exe: READV 6
6176ms: main.exe: READV 6
6176ms: main.exe: WRITEV 6
6176ms: main.exe: WRITEV 6
6176ms: main.exe: READV 6
6176ms: main.exe: READV 8
6176ms: main.exe: READV 7
6176ms: main.exe: READV 8
6176ms: main.exe: WRITEV -4
6176ms: main.exe: WRITEV 6
6176ms: main.exe: ASYNC_CANCEL -2
6176ms: main.exe: ASYNC_CANCEL -2
6176ms: main.exe: READV 6
6176ms: main.exe: ASYNC_CANCEL -2
6176ms: main.exe: ASYNC_CANCEL -2
6176ms: main.exe: READV 1
6176ms: main.exe: ASYNC_CANCEL 0
6176ms: main.exe: READV -125
6176ms: main.exe: ASYNC_CANCEL -2
6176ms: main.exe: ASYNC_CANCEL 0
6176ms: main.exe: READV -125
6176ms: main.exe: READV 8
6176ms: main.exe: ASYNC_CANCEL 0
6176ms: main.exe: READV -125
6177ms: main.exe: ASYNC_CANCEL 0
6177ms: main.exe: READV -125
6177ms: main.exe: ASYNC_CANCEL 0
6177ms: main.exe: READV -125

wildfield avatar Dec 06 '24 17:12 wildfield

It doesn't happen with 1 domain, but it happens with 2 domains

wildfield avatar Dec 06 '24 17:12 wildfield

I was able to reproduce this with only Uring in the mix.

let worker ~name =
  let queue = Uring.create ~queue_depth:1 () in
  let msg = Cstruct.of_string (Fmt.str "Hello from %s\n" name) in
  let rec loop () =
    let _ = Uring.write queue ~file_offset:Optint.Int63.zero Unix.stdout msg `Write in
    assert (Uring.submit queue = 1);
    match Uring.wait queue with
    | Some { result; _ } when result >= 0 -> loop ()
    | Some { result; _ } ->
      Fmt.pr "Result failed in %s: %i\n%!" name result;
      exit result
    | None -> loop ()
  in
  loop ()

let () =
  let n = 2 in
  let names =
    List.init n (fun i -> Domain.spawn (fun () -> worker ~name:(string_of_int i)))
  in
  Domain.join (List.hd names)

patricoferris avatar Aug 26 '25 14:08 patricoferris

Thanks @patricoferris - with your code it reproduces for me. I asked about it at https://github.com/axboe/liburing/discussions/1452 and it seems this might be a uring problem. I'm adding support for setup flags to ocaml-uring (https://github.com/ocaml-multicore/ocaml-uring/pull/122) which might be useful to work around this.

talex5 avatar Sep 06 '25 12:09 talex5