otp icon indicating copy to clipboard operation
otp copied to clipboard

ssh: stdout stalls at under 1MB of data mark

Open vans163 opened this issue 2 years ago • 4 comments

Describe the bug After receiving just under 1MB of data through the channel, stdout stalls and no data is received anymore, channel stays open no close is sent.

Affected versions OTP 25

defmodule SSHWrap do
  def connect(host, user \\ "root", port \\ 22, pass \\ nil) do
    args = [
      {:user, '#{user}'},
      {:silently_accept_hosts, true},
      {:user_interaction, false},
      {:connect_timeout, 8_000}
    ]

    args = if pass, do: [{:password, pass} | args], else: args
    {:ok, ref} = :ssh.connect('#{host}', port, args, 8_000)
    ref
  end

  def close(ref) do
    :ssh.close(ref)
  end

  def execute_async(host, user, cmd) do
    ref = connect(host, user)
    {:ok, channelId} = :ssh_connection.session_channel(ref, :infinity)
    :success = :ssh_connection.exec(ref, channelId, cmd, :infinity)
    ref
  end

  def execute_sync(host, user, cmd) do
    ref = connect(host, user)
    {r,e} = execute(ref, cmd)
    :ssh.close(ref)
    {r,e}
  end

  def execute(ref, cmd) do
    {:ok, channelId} = :ssh_connection.session_channel(ref, :infinity)
    :success = :ssh_connection.exec(ref, channelId, cmd, :infinity)
    execute_2(ref)
  end


  defp execute_2(ref, buf \\ "", errno \\ nil) do
    #{:data, "lol"}
    #{:eof}
    #{:exit_status, 0}
    #{:closed}
    #{"", 0}
    receive do
      {:ssh_cm, ^ref, {:data, _, _, bin}} -> execute_2(ref, buf<>bin, errno)
      {:ssh_cm, ^ref, {:exit_status, _, errno}} -> execute_2(ref, buf, errno)
      {:ssh_cm, ^ref, {:eof, _}} -> execute_2(ref, buf, errno)
      {:ssh_cm, ^ref, {:closed, _}} -> {buf, errno}
    after
      900_000 ->
        :timeout
    end
  end
end

{a,_} = SSHWrap.execute_sync("10.200.3.0", "root", "for i in {1..1}; do dmesg; done")
{a,_} = SSHWrap.execute_sync("10.200.3.0", "root", "for i in {1..2}; do dmesg; done")
{a,_} = SSHWrap.execute_sync("10.200.3.0", "root", "for i in {1..3}; do dmesg; done")
byte_size a # 386493

{a,_} = SSHWrap.execute_sync("10.200.3.0", "root", "for i in {1..4}; do dmesg; done")
# stalled infinitly

 ssh [email protected] "for i in {1..4}; do dmesg; done"
 # output comes

vans163 avatar Jul 06 '23 14:07 vans163

would it be a problem for you to provide reproduction code in Erlang?

u3s avatar Jul 07 '23 06:07 u3s

would it be a problem for you to provide reproduction code in Erlang?

GPT4: Translate this elixir code into erlang

-module(sshwrap).

-export([connect/1, connect/2, connect/3, connect/4, close/1, execute_async/3, execute_sync/3, execute/2]).
-export([execute_2/1, execute_2/3]).

connect(Host) ->
    connect(Host, "root", 22, nil).
connect(Host, User) ->
    connect(Host, User, 22, nil).
connect(Host, User, Port) ->
    connect(Host, User, Port, nil).
connect(Host, User, Port, Pass) ->
    Args = [
        {user, User},
        {silently_accept_hosts, true},
        {user_interaction, false},
        {connect_timeout, 8000}
    ],
    Args2 = case Pass of
        nil -> Args;
        _ -> [{password, Pass}|Args]
    end,
    {ok, Ref} = ssh:connect(Host, Port, Args2, 8000),
    Ref.

close(Ref) ->
    ssh:close(Ref).

execute_async(Host, User, Cmd) ->
    Ref = connect(Host, User),
    {ok, ChannelId} = ssh_connection:session_channel(Ref, infinity),
    success = ssh_connection:exec(Ref, ChannelId, Cmd, infinity),
    Ref.

execute_sync(Host, User, Cmd) ->
    Ref = connect(Host, User),
    {R,E} = execute(Ref, Cmd),
    ssh:close(Ref),
    {R,E}.

execute(Ref, Cmd) ->
    {ok, ChannelId} = ssh_connection:session_channel(Ref, infinity),
    success = ssh_connection:exec(Ref, ChannelId, Cmd, infinity),
    execute_2(Ref).

execute_2(Ref) ->
    execute_2(Ref, <<>>, nil).
execute_2(Ref, Buf, Errno) ->
    receive
        {ssh_cm, Ref, {data, _, _, Bin}} -> execute_2(Ref, erlang:iolist_to_binary([Buf, Bin]), Errno);
        {ssh_cm, Ref, {exit_status, _, Err}} -> execute_2(Ref, Buf, Err);
        {ssh_cm, Ref, {eof, _}} -> execute_2(Ref, Buf, Errno);
        {ssh_cm, Ref, {closed, _}} -> {Buf, Errno}
    after 900000 ->
        timeout
    end.
ssh:start().
{A,_} = sshwrap:execute_sync("10.200.3.0", "root", <<"for i in {1..1}; do dmesg; done">>).
{B,_} = sshwrap:execute_sync("10.200.3.0", "root", <<"for i in {1..2}; do dmesg; done">>).
{C,_} = sshwrap:execute_sync("10.200.3.0", "root", <<"for i in {1..3}; do dmesg; done">>).
io:format("~p", [byte_size(C)]), % 386493
{D,_} = sshwrap:execute_sync("10.200.3.0", "root", <<"for i in {1..4}; do dmesg; done">>).

{E,_} = sshwrap:execute_sync("10.200.3.0", "root", <<"for i in {1..20}; do dmesg; done">>).
% this should certainly fail

vans163 avatar Jul 07 '23 17:07 vans163

To add to this, if a process exits that has opened a ssh connection, the connection is not terminated, thus its not cleaned up properly. Eventually this leads to a flooded kernel keyring cat /proc/key-users as there is so many opened (due to not cleaned properly) ssh connections

vans163 avatar Jul 24 '23 19:07 vans163

thanks for update. we consider this important to fix, but had to schedule it due to other work and summer period.

u3s avatar Jul 25 '23 06:07 u3s

FYI, work on fixing this is ongoing.

u3s avatar Mar 26 '24 14:03 u3s

To add to this, if a process exits that has opened a ssh connection, the connection is not terminated, thus its not cleaned up properly. Eventually this leads to a flooded kernel keyring cat /proc/key-users as there is so many opened (due to not cleaned properly) ssh connections

can you give some suggestion on reproducing this? I'm using provided sshwrap module but key-users remain rock solid and I observe no change of the output (with and without fix). this is WSL2 + Ubuntu.

u3s avatar May 07 '24 14:05 u3s

  • main issue is fixed and merged to maint, master branches
  • it will be 1st released with OTP-27
  • and later backported to OTP-26, OTP-25
  • I'm closing issue as main problem is solved and I failed to reproduce cat /proc/key-users - we can re-open or create a new one(preferred) if you can provide some more reproduction hints
  • thanks for the report

u3s avatar May 08 '24 14:05 u3s

@vans163

unfortunately, behavior reported here was not an bug(or at least should not be interpreted as on by me) and supposed fix in PR-8345 introduced unwanted behavior and manifested with for example long transfers over SFTP hanging as described in GH-8724.

Due to above, adjustment previously made in ssh_connection module was reverted in PR-9309.

Behavior described in this issue will re-appear with upcoming emergency patches for supported OTP versions.

My thinking is that behavior is expected according to ssh application design. ssh_connection module is not supposed to perform window adjustment for the user when exec is called and delivers a lot of data.

such support functionality is available for modules using ssh_client_channel behavior (for example ssh_sftp client). but ssh_connection:exec is simple and does not use it.

I would propose updating the docs to explain that and maybe also provide an example as a resolution to this issue. To make your code example work, you need to call ssh_connection:adjust_window/3 yourself similarly to code in ssh_connection_SUITE.

u3s avatar Jan 23 '25 16:01 u3s