wireguard-uapi-rs icon indicating copy to clipboard operation
wireguard-uapi-rs copied to clipboard

No ack received

Open xtexx opened this issue 3 years ago • 17 comments

fn connect_route_socket() -> Result<RouteSocket> {
        Ok(RouteSocket::connect().context("connect to WG route socket")?)
    }

Self::connect_route_socket()?
            .add_device("test_test")
            .context("add new WG device")?;

The device has been added successfully,

9: test_test: <POINTOPOINT> mtu 1420 qdisc noop state DOWN mode DEFAULT group default qlen 1000
    link/none 

When calling set_device without handling that error, the device could not get updated.

Here is a strace:

socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE) = 3
bind(3, {sa_family=AF_NETLINK, nl_pid=0, nl_groups=00000000}, 12) = 0
sendto(3, [{nlmsg_len=68, nlmsg_type=RTM_NEWLINK, nlmsg_flags=NLM_F_REQUEST|NLM_F_ACK|NLM_F_EXCL|NLM_F_CREATE, nlmsg_seq=1, nlmsg_pid=0}, {ifi_family=AF_UNSPEC, ifi_type=ARPHRD_NETROM, ifi_
index=0, ifi_flags=0, ifi_change=0xffffffff}, [[{nla_len=13, nla_type=IFLA_IFNAME}, "test_test"...], [{nla_len=20, nla_type=IFLA_LINKINFO}, [{nla_len=13, nla_type=IFLA_INFO_KIND}, "wireguard"...]]]], 68, 0, NULL, 0) = 68                                                                                                                                                              recvfrom(3, [{nlmsg_len=36, nlmsg_type=NLMSG_ERROR, nlmsg_flags=NLM_F_CAPPED, nlmsg_seq=1, nlmsg_pid=5347}, {error=0, msg={nlmsg_len=68, nlmsg_type=RTM_NEWLINK, nlmsg_flags=NLM_F_REQUEST|NL
M_F_ACK|NLM_F_EXCL|NLM_F_CREATE, nlmsg_seq=1, nlmsg_pid=0}}], 32768, 0, NULL, NULL) = 36                                                                                                     close(3)                                = 0
socket(AF_NETLINK, SOCK_RAW, NETLINK_GENERIC) = 3
sendto(3, [{nlmsg_len=36, nlmsg_type=0x10 /* NLMSG_??? */, nlmsg_flags=NLM_F_REQUEST|NLM_F_ACK, nlmsg_seq=1, nlmsg_pid=0}, "\x03\x02\x00\x00\x0e\x00\x02\x00\x77\x69\x72\x65\x67\x75\x61\x72\
x64\x00\x00\x00"], 36, 0, NULL, 0) = 36                                                                                                                                                      recvfrom(3, [{nlmsg_len=112, nlmsg_type=nlctrl, nlmsg_flags=0, nlmsg_seq=1, nlmsg_pid=5347}, "\x01\x02\x00\x00\x0e\x00\x02\x00\x77\x69\x72\x65\x67\x75\x61\x72\x64\x00\x00\x00\x06\x00\x01\x0
0\x23\x00\x00\x00\x08\x00\x03\x00"...], 32768, 0, NULL, NULL) = 112                                                                                                                          recvfrom(3, [{nlmsg_len=36, nlmsg_type=NLMSG_ERROR, nlmsg_flags=NLM_F_CAPPED, nlmsg_seq=1, nlmsg_pid=5347}, {error=0, msg={nlmsg_len=36, nlmsg_type=nlctrl, nlmsg_flags=NLM_F_REQUEST|NLM_F_A
CK, nlmsg_seq=1, nlmsg_pid=0}}], 32768, 0, NULL, NULL) = 36                                                                                                                                  close(3)                                = 0
socket(AF_NETLINK, SOCK_RAW, NETLINK_GENERIC) = 3
bind(3, {sa_family=AF_NETLINK, nl_pid=0, nl_groups=00000000}, 12) = 0
sendto(3, [{nlmsg_len=248, nlmsg_type=wireguard, nlmsg_flags=NLM_F_REQUEST|NLM_F_ACK, nlmsg_seq=1, nlmsg_pid=0}, "\x01\x01\x00\x00\x09\x00\x02\x00\x74\x65\x73\x74\x00\x00\x00\x00\x08\x00\x0
5\x00\x01\x00\x00\x00\x24\x00\x03\x00\x50\x52\x53\xa9"...], 248, 0, NULL, 0) = 248                                                                                                           recvfrom(3, [{nlmsg_len=268, nlmsg_type=NLMSG_ERROR, nlmsg_flags=0, nlmsg_seq=1, nlmsg_pid=5347}, {error=-ENODEV, msg=[{nlmsg_len=248, nlmsg_type=wireguard, nlmsg_flags=NLM_F_REQUEST|NLM_F_
ACK, nlmsg_seq=1, nlmsg_pid=0}, "\x01\x01\x00\x00\x09\x00\x02\x00\x74\x65\x73\x74\x00\x00\x00\x00\x08\x00\x05\x00\x01\x00\x00\x00\x24\x00\x03\x00\x00\x00\x00\x00"...]}], 32768, 0, NULL, NULL) = 268                                                                                                                                                                                     close(3)                                = 0

Tested on openSUSE Tumbleweed with kernel 5.19.7-1-default.

xtexx avatar Sep 09 '22 07:09 xtexx

Confirmed on Ubuntu 20.04.4 LTS x86_64 with kernel 5.4.0-1089-azure(on GitHub Codespaces).


test result: ok. 6 passed; 0 failed; 0 ignored; 0 measured; 0 filtered out; finished in 0.00s

     Running tests/macos.rs (target/debug/deps/macos-77cc86e26f4d21df)

running 0 tests

test result: ok. 0 passed; 0 failed; 0 ignored; 0 measured; 0 filtered out; finished in 0.00s

     Running tests/set_and_get.rs (target/debug/deps/set_and_get-0a81b834f134c7a9)

running 4 tests
Error: No ack received
Error: No ack received
Error: No ack received
test large_peer ... FAILED
test set_ifname_has_proper_padding ... FAILED
test peer_update_only ... FAILED
Error: No ack received
test simple ... FAILED

failures:

xtexx avatar Sep 09 '22 08:09 xtexx

It seems like because of CAP_NET_ADMIN. However, when I am trying to add it on local computer, it doesnot work.

xtexx avatar Sep 09 '22 08:09 xtexx

➜  peerd git:(main) ✗ find ./target/debug/deps -maxdepth 1 -type f -executable | xargs -n 1 sudo setcap CAP_NET_ADMIN=+eip
➜  peerd git:(main) ✗ cargo r apply
    Finished dev [unoptimized + debuginfo] target(s) in 0.42s
     Running `target/debug/peerd apply`
Applying configs
Reloading peer configs
Reading config from peerd.toml
thread 'main' panicked at 'called `Result::unwrap()` on an `Err` value: add new WG device

Caused by:
    No ack received', src/peer_conf/mod.rs:48:62
note: run with `RUST_BACKTRACE=1` environment variable to display a backtrace
➜  peerd git:(main) ✗ sudo getcap target/debug/peerd
target/debug/peerd cap_net_admin=eip

xtexx avatar Sep 09 '22 08:09 xtexx

Hey @xtexChooser, what happens on rc/peer_conf/mod.rs:48:62? Is that line adding a new device?

gluxon avatar Sep 10 '22 20:09 gluxon

Hey @gluxon At src/peer_conf/mod.rs, it calls the abstract tunnel manager to add the tunnel for a test.

The implementation can be found at https://github.com/xtexChooser/peerd/blob/main/src/tunnel/wireguard/linux.rs.

Thanks.

xtexx avatar Sep 11 '22 00:09 xtexx

Mismatched Paths

It looks like this command applies CAP_NET_ADMIN to all files under ./target/debug/deps

➜  peerd git:(main) ✗ find ./target/debug/deps -maxdepth 1 -type f -executable | xargs -n 1 sudo setcap CAP_NET_ADMIN=+eip

But the binary ran is ./target/debug/peerd, which isn't under ./target/debug/deps.

➜  peerd git:(main) ✗ cargo r apply
    Finished dev [unoptimized + debuginfo] target(s) in 0.42s
     Running `target/debug/peerd apply`

The find ./target/debug/deps -maxdepth 1 -type f -executable | xargs -n 1 sudo setcap CAP_NET_ADMIN=+eip line only applies to cargo test binaries and not main binaries in the crate.

getcap output syntax

I did see that your test shows:

➜  peerd git:(main) ✗ sudo getcap target/debug/peerd
target/debug/peerd cap_net_admin=eip

That was helpful. Thanks for posting that. I'm a bit confused because I'm not sure what =eip means though. On my Ubuntu 20.04 VM, I see no output for a file that doesn't have any capabilities set yet.

❯ sudo getcap target/debug/examples/wg

❯

When I apply capabilities, I see +eip.

❯ sudo setcap CAP_NET_ADMIN=+eip ./target/debug/examples/wg

❯ sudo getcap target/debug/examples/wg
target/debug/examples/wg = cap_net_admin+eip

gluxon avatar Sep 11 '22 02:09 gluxon

About the mismatched path, I have added the cap to target file.

And, about the output syntax, idk

xtex% sudo setcap CAP_NET_ADMIN=+eip target/debug/peerd
xtex% sudo getcap target/debug/peerd
target/debug/peerd cap_net_admin=eip

If I give the cap, the interface can be added successfully but still No ack received. If I do not, the interface cannot be added, and No ack

xtexx avatar Sep 11 '22 02:09 xtexx

➜  Source git clone --depth 1 [email protected]:gluxon/wireguard-uapi-rs.git 
正克隆到 'wireguard-uapi-rs'...
Enter passphrase for key '/home/xtex/.ssh/id_ed25519': 
remote: Enumerating objects: 61, done.
remote: Counting objects: 100% (61/61), done.
remote: Compressing objects: 100% (59/59), done.
remote: Total 61 (delta 3), reused 26 (delta 0), pack-reused 0
接收对象中: 100% (61/61), 41.55 KiB | 184.00 KiB/s, 完成.
处理 delta 中: 100% (3/3), 完成.
➜  Source ls
wireguard-uapi-rs
➜  Source cd wireguard-uapi-rs 
➜  wireguard-uapi-rs git:(main) ls
Cargo.toml  examples  LICENSE  README.md  src  tests
➜  wireguard-uapi-rs git:(main) cargo build
    Updating `tuna` index
  Downloaded thiserror-impl v1.0.34 (registry `tuna`)
  Downloaded thiserror v1.0.34 (registry `tuna`)
  Downloaded 2 crates (32.8 KB) in 0.62s
.................
   Compiling wireguard-uapi v2.0.5 (/mnt/src/Source/wireguard-uapi-rs)
    Finished dev [unoptimized + debuginfo] target(s) in 15.90s
➜  wireguard-uapi-rs git:(main) find ./target/debug/deps -maxdepth 1 -type f -executable | xargs -n 1 sudo setcap CAP_NET_ADMIN=+eip
➜  wireguard-uapi-rs git:(main) cargo test
  Downloaded difflib v0.4.0 (registry `tuna`)
........
  Downloaded 14 crates (464.7 KB) in 0.72s
   Compiling autocfg v1.1.0
.......
   Compiling wireguard-uapi v2.0.5 (/mnt/src/Source/wireguard-uapi-rs)
    Finished test [unoptimized + debuginfo] target(s) in 7.10s
     Running unittests src/lib.rs (target/debug/deps/wireguard_uapi-d837ef9f66bfa16f)

running 6 tests
test get::tests::parse_allowed_ip_ipv4 ... ok
test get::tests::parse_allowed_ip_ipv6 ... ok
test get::tests::parse_invalid_allowed_ip ... ok
test linux::socket::parse::tests::parse_device_example_from_man_page ... ok
test linux::socket::parse::tests::parse_device_example_from_man_page_pre_five_point_two_kernel ... ok
test linux::socket::parse::tests::parse_device_with_large_peer ... ok

test result: ok. 6 passed; 0 failed; 0 ignored; 0 measured; 0 filtered out; finished in 0.00s

     Running tests/macos.rs (target/debug/deps/macos-77cc86e26f4d21df)

running 0 tests

test result: ok. 0 passed; 0 failed; 0 ignored; 0 measured; 0 filtered out; finished in 0.00s

     Running tests/set_and_get.rs (target/debug/deps/set_and_get-0a81b834f134c7a9)

running 4 tests
Error: No ack received
Error: No ack received
Error: No ack received
test set_ifname_has_proper_padding ... FAILED
test peer_update_only ... FAILED
test simple ... FAILED
Error: No ack received
test large_peer ... FAILED

failures:

---- set_ifname_has_proper_padding stdout ----
thread 'set_ifname_has_proper_padding' panicked at 'assertion failed: `(left == right)`
  left: `1`,
 right: `0`: the test returned a termination value with a non-zero status code (1) which indicates a failure', /rustc/1120c5e01df508de64fe6642f22fadeb574afd6d/library/test/src/lib.rs:184:5

---- peer_update_only stdout ----
thread 'peer_update_only' panicked at 'assertion failed: `(left == right)`
  left: `1`,
 right: `0`: the test returned a termination value with a non-zero status code (1) which indicates a failure', /rustc/1120c5e01df508de64fe6642f22fadeb574afd6d/library/test/src/lib.rs:184:5
note: run with `RUST_BACKTRACE=1` environment variable to display a backtrace

---- simple stdout ----
wgtest47353
thread 'simple' panicked at 'assertion failed: `(left == right)`
  left: `1`,
 right: `0`: the test returned a termination value with a non-zero status code (1) which indicates a failure', /rustc/1120c5e01df508de64fe6642f22fadeb574afd6d/library/test/src/lib.rs:184:5

---- large_peer stdout ----
thread 'large_peer' panicked at 'assertion failed: `(left == right)`
  left: `1`,
 right: `0`: the test returned a termination value with a non-zero status code (1) which indicates a failure', /rustc/1120c5e01df508de64fe6642f22fadeb574afd6d/library/test/src/lib.rs:184:5


failures:
    large_peer
    peer_update_only
    set_ifname_has_proper_padding
    simple

test result: FAILED. 0 passed; 4 failed; 0 ignored; 0 measured; 0 filtered out; finished in 0.00s

error: test failed, to rerun pass `--test set_and_get`

And the tests are also failed on my computer.

xtexx avatar Sep 11 '22 02:09 xtexx

It looks like the test binaries haven't been built by the time setcap is called in the output above. Could I have you try rerunning the find ./target/debug/deps -maxdepth 1 -type f -executable | xargs -n 1 sudo setcap CAP_NET_ADMIN=+eip command again and give cargo test another shot? Thanks.

Running the tests require a bit of non-intuitive sequencing. In the output above, cargo test created the ./target/debug/deps binaries before running the tests since it was the first invocation of cargo test.

If the tests do run after that, I think you're setting the CAP_NET_ADMIN capability correctly for your target/debug/peerd file. If that's the case, it's definitely possible we have a bug somewhere in this library causing No ack received errors. Figuring out if the tests run correctly would be helpful to narrow that down. Thanks for your help.

gluxon avatar Sep 11 '22 03:09 gluxon

Ops, the test works.

xtexx avatar Sep 11 '22 03:09 xtexx

Apologies that it took me a bit to understand your bug report. I see that we get the ENODEV error.

5\x00\x01\x00\x00\x00\x24\x00\x03\x00\x50\x52\x53\xa9"...], 248, 0, NULL, 0) = 248                                                                                                           recvfrom(3, [{nlmsg_len=268, nlmsg_type=NLMSG_ERROR, nlmsg_flags=0, nlmsg_seq=1, nlmsg_pid=5347}, {error=-ENODEV, msg=[{nlmsg_len=248, nlmsg_type=wireguard, nlmsg_flags=NLM_F_REQUEST|NLM_F_

And the ACK after that.

ACK, nlmsg_seq=1, nlmsg_pid=0}, "\x01\x01\x00\x00\x09\x00\x02\x00\x74\x65\x73\x74\x00\x00\x00\x00\x08\x00\x05\x00\x01\x00\x00\x00\x24\x00\x03\x00\x00\x00\x00\x00"...]}], 32768, 0, NULL, NULL) = 268                                                                                                                                                                                     close(3)                                = 0

But the error reported is No ack received instead of ENODEV. So the error messaging can be much clearer here.

gluxon avatar Sep 11 '22 04:09 gluxon

@xtexChooser Does this block you from your work? I think improving the error message requires upgrading neli, which is something in progress.

gluxon avatar Sep 11 '22 04:09 gluxon

Not in a hurry, I have many other things I can do and it's not a big feature either, so I can just wait for the neli update.

xtexx avatar Sep 11 '22 04:09 xtexx

And I found something strange, the tests fails after a patch:

diff --git a/tests/set_and_get.rs b/tests/set_and_get.rs
index a2dd937..318a32e 100644
--- a/tests/set_and_get.rs
+++ b/tests/set_and_get.rs
@@ -7,7 +7,7 @@ use {
 
 #[cfg(target_os = "linux")]
 fn get_random_ifname() -> String {
-    format!("wgtest{}", rand::random::<u16>())
+    format!("test_test")
 }
 
 #[cfg(target_os = "linux")]

When I keep the following random number, it works. But it can not work without the random number.

➜  wireguard-uapi-rs git:(main) ✗ cargo test
   Compiling wireguard-uapi v2.0.5 (/mnt/src/Source/wireguard-uapi-rs)
    Finished test [unoptimized + debuginfo] target(s) in 0.94s
     Running unittests src/lib.rs (target/debug/deps/wireguard_uapi-d837ef9f66bfa16f)

running 6 tests
.......No ack fails......
error: test failed, to rerun pass `--test set_and_get`
➜  wireguard-uapi-rs git:(main) ✗ find ./target/debug/deps -maxdepth 1 -type f -executable | xargs -n 1 sudo setcap CAP_NET_ADMIN=+eip 
➜  wireguard-uapi-rs git:(main) ✗ cargo test
    Finished test [unoptimized + debuginfo] target(s) in 0.01s
     Running unittests src/lib.rs (target/debug/deps/wireguard_uapi-d837ef9f66bfa16f)

running 6 tests
test get::tests::parse_allowed_ip_ipv4 ... ok
test get::tests::parse_allowed_ip_ipv6 ... ok
test get::tests::parse_invalid_allowed_ip ... ok
test linux::socket::parse::tests::parse_device_example_from_man_page_pre_five_point_two_kernel ... ok
test linux::socket::parse::tests::parse_device_example_from_man_page ... ok
test linux::socket::parse::tests::parse_device_with_large_peer ... ok

test result: ok. 6 passed; 0 failed; 0 ignored; 0 measured; 0 filtered out; finished in 0.00s

     Running tests/macos.rs (target/debug/deps/macos-77cc86e26f4d21df)

running 0 tests

test result: ok. 0 passed; 0 failed; 0 ignored; 0 measured; 0 filtered out; finished in 0.00s

     Running tests/set_and_get.rs (target/debug/deps/set_and_get-0a81b834f134c7a9)

running 4 tests
Error: No ack received
Error: No ack received
test set_ifname_has_proper_padding ... FAILED
test simple ... FAILED
Error: No ack received
test large_peer ... FAILED
test peer_update_only ... ok

failures:

---- set_ifname_has_proper_padding stdout ----
thread 'set_ifname_has_proper_padding' panicked at 'assertion failed: `(left == right)`
  left: `1`,
 right: `0`: the test returned a termination value with a non-zero status code (1) which indicates a failure', /rustc/1120c5e01df508de64fe6642f22fadeb574afd6d/library/test/src/lib.rs:184:5

---- simple stdout ----
test_test
thread 'simple' panicked at 'assertion failed: `(left == right)`
  left: `1`,
 right: `0`: the test returned a termination value with a non-zero status code (1) which indicates a failure', /rustc/1120c5e01df508de64fe6642f22fadeb574afd6d/library/test/src/lib.rs:184:5
note: run with `RUST_BACKTRACE=1` environment variable to display a backtrace

---- large_peer stdout ----
thread 'large_peer' panicked at 'assertion failed: `(left == right)`
  left: `1`,
 right: `0`: the test returned a termination value with a non-zero status code (1) which indicates a failure', /rustc/1120c5e01df508de64fe6642f22fadeb574afd6d/library/test/src/lib.rs:184:5


failures:
    large_peer
    set_ifname_has_proper_padding
    simple

test result: FAILED. 1 passed; 3 failed; 0 ignored; 0 measured; 0 filtered out; finished in 0.12s

error: test failed, to rerun pass `--test set_and_get`

xtexx avatar Sep 11 '22 05:09 xtexx

The test setup creates and deletes WireGuard devices through NETLINK_ROUTE. As you've noticed set_device fails on No ack received when the device doesn't exist yet, which happens as tests running in parallel create/delete the WireGuard device they're testing against.

gluxon avatar Sep 11 '22 05:09 gluxon

oh

xtexx avatar Sep 11 '22 05:09 xtexx

Yeah, the error is very confusing. 😞 Adding a test to make sure this gets revisited. https://github.com/gluxon/wireguard-uapi-rs/commit/33d2db3d499977c7d9868ff35a2c367f15d6b619

gluxon avatar Sep 11 '22 05:09 gluxon