otp
otp copied to clipboard
ssh: stdout stalls at under 1MB of data mark
Describe the bug After receiving just under 1MB of data through the channel, stdout stalls and no data is received anymore, channel stays open no close is sent.
Affected versions OTP 25
defmodule SSHWrap do
def connect(host, user \\ "root", port \\ 22, pass \\ nil) do
args = [
{:user, '#{user}'},
{:silently_accept_hosts, true},
{:user_interaction, false},
{:connect_timeout, 8_000}
]
args = if pass, do: [{:password, pass} | args], else: args
{:ok, ref} = :ssh.connect('#{host}', port, args, 8_000)
ref
end
def close(ref) do
:ssh.close(ref)
end
def execute_async(host, user, cmd) do
ref = connect(host, user)
{:ok, channelId} = :ssh_connection.session_channel(ref, :infinity)
:success = :ssh_connection.exec(ref, channelId, cmd, :infinity)
ref
end
def execute_sync(host, user, cmd) do
ref = connect(host, user)
{r,e} = execute(ref, cmd)
:ssh.close(ref)
{r,e}
end
def execute(ref, cmd) do
{:ok, channelId} = :ssh_connection.session_channel(ref, :infinity)
:success = :ssh_connection.exec(ref, channelId, cmd, :infinity)
execute_2(ref)
end
defp execute_2(ref, buf \\ "", errno \\ nil) do
#{:data, "lol"}
#{:eof}
#{:exit_status, 0}
#{:closed}
#{"", 0}
receive do
{:ssh_cm, ^ref, {:data, _, _, bin}} -> execute_2(ref, buf<>bin, errno)
{:ssh_cm, ^ref, {:exit_status, _, errno}} -> execute_2(ref, buf, errno)
{:ssh_cm, ^ref, {:eof, _}} -> execute_2(ref, buf, errno)
{:ssh_cm, ^ref, {:closed, _}} -> {buf, errno}
after
900_000 ->
:timeout
end
end
end
{a,_} = SSHWrap.execute_sync("10.200.3.0", "root", "for i in {1..1}; do dmesg; done")
{a,_} = SSHWrap.execute_sync("10.200.3.0", "root", "for i in {1..2}; do dmesg; done")
{a,_} = SSHWrap.execute_sync("10.200.3.0", "root", "for i in {1..3}; do dmesg; done")
byte_size a # 386493
{a,_} = SSHWrap.execute_sync("10.200.3.0", "root", "for i in {1..4}; do dmesg; done")
# stalled infinitly
ssh [email protected] "for i in {1..4}; do dmesg; done"
# output comes
would it be a problem for you to provide reproduction code in Erlang?
would it be a problem for you to provide reproduction code in Erlang?
GPT4: Translate this elixir code into erlang
-module(sshwrap).
-export([connect/1, connect/2, connect/3, connect/4, close/1, execute_async/3, execute_sync/3, execute/2]).
-export([execute_2/1, execute_2/3]).
connect(Host) ->
connect(Host, "root", 22, nil).
connect(Host, User) ->
connect(Host, User, 22, nil).
connect(Host, User, Port) ->
connect(Host, User, Port, nil).
connect(Host, User, Port, Pass) ->
Args = [
{user, User},
{silently_accept_hosts, true},
{user_interaction, false},
{connect_timeout, 8000}
],
Args2 = case Pass of
nil -> Args;
_ -> [{password, Pass}|Args]
end,
{ok, Ref} = ssh:connect(Host, Port, Args2, 8000),
Ref.
close(Ref) ->
ssh:close(Ref).
execute_async(Host, User, Cmd) ->
Ref = connect(Host, User),
{ok, ChannelId} = ssh_connection:session_channel(Ref, infinity),
success = ssh_connection:exec(Ref, ChannelId, Cmd, infinity),
Ref.
execute_sync(Host, User, Cmd) ->
Ref = connect(Host, User),
{R,E} = execute(Ref, Cmd),
ssh:close(Ref),
{R,E}.
execute(Ref, Cmd) ->
{ok, ChannelId} = ssh_connection:session_channel(Ref, infinity),
success = ssh_connection:exec(Ref, ChannelId, Cmd, infinity),
execute_2(Ref).
execute_2(Ref) ->
execute_2(Ref, <<>>, nil).
execute_2(Ref, Buf, Errno) ->
receive
{ssh_cm, Ref, {data, _, _, Bin}} -> execute_2(Ref, erlang:iolist_to_binary([Buf, Bin]), Errno);
{ssh_cm, Ref, {exit_status, _, Err}} -> execute_2(Ref, Buf, Err);
{ssh_cm, Ref, {eof, _}} -> execute_2(Ref, Buf, Errno);
{ssh_cm, Ref, {closed, _}} -> {Buf, Errno}
after 900000 ->
timeout
end.
ssh:start().
{A,_} = sshwrap:execute_sync("10.200.3.0", "root", <<"for i in {1..1}; do dmesg; done">>).
{B,_} = sshwrap:execute_sync("10.200.3.0", "root", <<"for i in {1..2}; do dmesg; done">>).
{C,_} = sshwrap:execute_sync("10.200.3.0", "root", <<"for i in {1..3}; do dmesg; done">>).
io:format("~p", [byte_size(C)]), % 386493
{D,_} = sshwrap:execute_sync("10.200.3.0", "root", <<"for i in {1..4}; do dmesg; done">>).
{E,_} = sshwrap:execute_sync("10.200.3.0", "root", <<"for i in {1..20}; do dmesg; done">>).
% this should certainly fail
To add to this, if a process exits that has opened a ssh connection, the connection is not terminated, thus its not cleaned up properly. Eventually this leads to a flooded kernel keyring cat /proc/key-users as there is so many opened (due to not cleaned properly) ssh connections
thanks for update. we consider this important to fix, but had to schedule it due to other work and summer period.
FYI, work on fixing this is ongoing.
To add to this, if a process exits that has opened a ssh connection, the connection is not terminated, thus its not cleaned up properly. Eventually this leads to a flooded kernel keyring
cat /proc/key-usersas there is so many opened (due to not cleaned properly) ssh connections
can you give some suggestion on reproducing this?
I'm using provided sshwrap module but key-users remain rock solid and I observe no change of the output (with and without fix). this is WSL2 + Ubuntu.
- main issue is fixed and merged to maint, master branches
- it will be 1st released with OTP-27
- and later backported to OTP-26, OTP-25
- I'm closing issue as main problem is solved and I failed to reproduce
cat /proc/key-users- we can re-open or create a new one(preferred) if you can provide some more reproduction hints - thanks for the report
@vans163
unfortunately, behavior reported here was not an bug(or at least should not be interpreted as on by me) and supposed fix in PR-8345 introduced unwanted behavior and manifested with for example long transfers over SFTP hanging as described in GH-8724.
Due to above, adjustment previously made in ssh_connection module was reverted in PR-9309.
Behavior described in this issue will re-appear with upcoming emergency patches for supported OTP versions.
My thinking is that behavior is expected according to ssh application design. ssh_connection module is not supposed to perform window adjustment for the user when exec is called and delivers a lot of data.
such support functionality is available for modules using ssh_client_channel behavior (for example ssh_sftp client). but ssh_connection:exec is simple and does not use it.
I would propose updating the docs to explain that and maybe also provide an example as a resolution to this issue.
To make your code example work, you need to call ssh_connection:adjust_window/3 yourself similarly to code in ssh_connection_SUITE.