nix
nix copied to clipboard
`SockaddrStorage::as_sockaddr_in` incorrectly returns None sometime
On nix 0.24.x the following is necessary to always manage to extract the IP address (and maybe something similar is needed for IPv6, but I have not encountered the problem
/// Converts a socket addr into an IP address when possible
fn try_into_ip_addr(s: &SockaddrStorage) -> Option<std::net::IpAddr> {
// Normal method
let or_else = s
.as_sockaddr_in()
.map(|sa| sa.ip().to_be_bytes().into())
.or_else(|| s.as_sockaddr_in6().map(|sa| sa.ip().into()));
// Start of the fallback
match initial_method {
im @ Some(_) => return im,
None => (),
}
let sock_addr = unsafe { s.as_ptr().as_ref()? };
if i32::from(sock_addr.sa_family) == nix::libc::AF_INET {
dbg!(&s);
let sock_addr = unsafe { *s.as_ptr().cast::<nix::libc::sockaddr_in>() };
Some(Ipv4Addr::from(sock_addr.sin_addr.s_addr).into())
} else {
None
}
}
Maybe something about the size ?
I'm on macOS 12 but CI also fails for every type of Linux we have, both GNU and MUSL
What code did you try first, and what error did you get?
Also, is that IpAddr a std::net::IpAddr or a nix::sys::socket::IpAddr?
We have an InterfaceAddress we originally got from getifaddrs(). We try to get the IP of its netmask member using try_into_ip_addr.
I modified the original code to add a dgb! call, I get this in the branch:
&s = SockaddrStorage {
ss: sockaddr_storage {
ss_len: 7,
ss_family: 2,
__ss_pad1: [
0,
0,
255,
255,
255,
0,
],
__ss_align: 2882523706792346128,
},
}
ss_family: 2 is AF_INET, which should be caught by as_sockaddr_in earlier but is not. Going down in SockaddrIn::from_raw, I think this is a size problem, but I have no idea why
You said that the original version of try_into_ip_addr didn't work. What was that original version, and what error did you get?
Original version, for 0.23 (modulo types that did not exist then), which returns None for the Debug output in my previous message
/// Converts a socket addr into an IP address when possible
fn try_into_ip_addr(s: &SockaddrStorage) -> Option<std::net::IpAddr> {
s
.as_sockaddr_in()
.map(|sa| sa.ip().to_be_bytes().into())
.or_else(|| s.as_sockaddr_in6().map(|sa| sa.ip().into()))
}
Since you're narrowed it down fairly far, would you be able to combine all of these things into an executable test case?
@poliorcetics would you be able to come up with an executable example of the problem? If so we'll make a patch release to fix it.
Sorry for the month-long wait
Edit: see my next comment for test code
I think you're making this too complicated. All I need for a test case is something like the following:
fn tc() {
let raw = libc::sockaddr_storage { ... };
let ss = nix::sys::socket::SockaddrStorage::from_raw(raw);
assert!(ss.as_sockaddr_in().is_some());
}
And you should be able to get the initializer for the libc::sockaddr_storage just by adding a dbg!(&s) to the top of your into_ip_addr function above.
Several test cases, I also put the function into_ip_addr to show it manages to find data (on my machine, all the failures happen on the as_sockaddr_in(raw) call).
#[cfg(test)]
mod tests {
use nix::sys::socket::SockaddrLike;
#[repr(C)]
#[derive(Debug, Clone, Copy)]
#[allow(non_camel_case_types)]
// Some members are private but we need them initialized for the tests
struct sockaddr_storage {
ss_len: u8,
ss_family: nix::libc::sa_family_t,
__ss_pad1: [u8; 6],
__ss_align: i64,
// __ss_pad2: [u8; 112],
}
#[track_caller]
fn as_sockaddr_in(raw: sockaddr_storage) {
let ss = unsafe {
nix::sys::socket::SockaddrStorage::from_raw(
&raw as *const sockaddr_storage as *const _,
None,
)
}
.unwrap();
assert!(ss.as_sockaddr_in().is_some());
}
#[track_caller]
fn into_ip_addr(raw: sockaddr_storage) -> Option<std::net::IpAddr> {
let s = unsafe {
nix::sys::socket::SockaddrStorage::from_raw(
&raw as *const sockaddr_storage as *const _,
None,
)
}
.unwrap();
let initial_method = s
.as_sockaddr_in()
.map(|sa| sa.ip().to_be_bytes().into())
.or_else(|| s.as_sockaddr_in6().map(|sa| sa.ip().into()));
match initial_method {
im @ Some(_) => return im,
None => (),
}
// Sometimes `as_sockaddr_in` fails despite the inner data being present and
// seemingly correct. Investigations point towards a size issue for the padding
// at the end of the structure, which does not affect us.
//
// Safety: the pointer is valid (notably, aligned) and not accessed mutably
let sock_addr = unsafe { s.as_ptr().as_ref()? };
if i32::from(sock_addr.sa_family) == nix::libc::AF_INET {
// Safety: if AF_INET is set for `family`, casting
// to `sockaddr_in` is safe, as is dereferencing
let sock_addr = unsafe { *s.as_ptr().cast::<nix::libc::sockaddr_in>() };
Some(std::net::Ipv4Addr::from(sock_addr.sin_addr.s_addr).into())
} else {
None
}
}
#[test]
fn t1() {
let raw = sockaddr_storage {
ss_len: 5,
ss_family: 2,
__ss_pad1: [0, 0, 255, 0, 0, 0],
__ss_align: 72058139498775056,
};
into_ip_addr(raw).unwrap();
as_sockaddr_in(raw);
}
#[test]
fn t2() {
let raw = sockaddr_storage {
ss_len: 7,
ss_family: 2,
__ss_pad1: [0, 0, 255, 255, 255, 0],
__ss_align: 2306310026777592336,
};
into_ip_addr(raw).unwrap();
as_sockaddr_in(raw);
}
#[test]
fn t3() {
let raw = sockaddr_storage {
ss_len: 7,
ss_family: 2,
__ss_pad1: [0, 0, 255, 255, 255, 0],
__ss_align: -5043564565091057136,
};
into_ip_addr(raw).unwrap();
as_sockaddr_in(raw);
}
#[test]
fn t4() {
let raw = sockaddr_storage {
ss_len: 7,
ss_family: 2,
__ss_pad1: [0, 0, 255, 255, 255, 0],
__ss_align: 2882523706792346128,
};
into_ip_addr(raw).unwrap();
as_sockaddr_in(raw);
}
}
@asomers do you have a Mac that you can try the reproducing example on? I tried following the logic by hand (the Linux libc definitions differ enough to make it unusable) but nothing jumped out.
Ok, these test cases show the problem: your value for the ss_len field is wrong. That field should contain the size of the valid parts of the structure. For IPv4 it should be 16 bytes and for IPv6 28. You should take a look at the code that's creating those structures.
Note that Linux does not have any ss_len field. So if somebody tried to create a sockaddr_storage by hand, rather than getting it from the OS, then that could result in a function that works on Linux but fails everywhere else.
You should take a look at the code that's creating those structures.
I use nix's getifaddrs function, see the previous edit on this comment
Previous comment
Output of `cargo run`, the last line shows `None` when it should show the same data as the previous call[src/main.rs:9] list_nics(false).unwrap().count() = 18
[src/main.rs:10] list_nics(true).unwrap().count() = 18
[src/main.rs:14] find_netmask(host, port, true).unwrap() = Some(
0.255.255.255,
)
[src/main.rs:15] find_netmask(host, port, false).unwrap() = None
use std::collections::HashMap;
use std::error::Error;
use std::net::{IpAddr, Ipv4Addr, Ipv6Addr, ToSocketAddrs, UdpSocket};
use nix::ifaddrs::{getifaddrs, InterfaceAddress};
use nix::sys::socket::{SockaddrLike, SockaddrStorage};
fn main() {
let _ = dbg!(list_nics(false).unwrap().count());
let _ = dbg!(list_nics(true).unwrap().count());
let host = "example.com";
let port = 80;
dbg!(find_netmask(host, port, true).unwrap());
dbg!(find_netmask(host, port, false).unwrap());
}
#[derive(Clone, Debug)]
pub struct Nic(InterfaceAddress, IpAddr);
impl Nic {
pub fn name(&self) -> &str {
self.0.interface_name.as_str()
}
pub fn address(&self) -> IpAddr {
self.1
}
pub fn netmask(&self, with_fix: bool) -> Option<IpAddr> {
self.0
.netmask
.as_ref()
.and_then(|s| into_ip_addr(s, with_fix))
}
}
#[derive(Default, Clone, Debug)]
pub struct InterfaceInfo(Vec<ProtoInterfaceAddress>);
#[derive(Default, Clone, Debug)]
pub struct ProtoInterfaceAddress {
pub address: Option<IpAddr>,
pub netmask: Option<IpAddr>,
}
fn find_netmask(host: &str, port: u8, with_fix: bool) -> Result<Option<IpAddr>, Box<dyn Error>> {
let mut new_host = host.to_string();
if host.parse::<Ipv6Addr>().is_ok() {
new_host = format!("[{}]", host);
}
let destination = format!("{}:{}", new_host, port);
// Passing 0.0.0.0:0 (or [::]:0) will let the OS choose for us depending on the destination
let ip = get_ip("0.0.0.0:0", &destination)
.or_else(|_e| get_ip("[::]:0", &destination))
.unwrap();
let mut infos: HashMap<String, InterfaceInfo> = HashMap::new();
for iface in list_nics(with_fix)? {
let iface_info = infos
.entry(iface.name().to_string())
.or_insert_with(InterfaceInfo::default);
iface_info.0.push(ProtoInterfaceAddress {
address: Some(iface.address()),
netmask: iface.netmask(with_fix),
netmask: iface.netmask(with_fix).map(Into::into),
});
}
Ok(infos
.values()
.flat_map(|v| v.0.iter())
.find(|v| v.address.as_ref() == Some(&ip))
.and_then(|v| v.netmask))
}
fn get_ip<A: ToSocketAddrs + ?Sized, B: ToSocketAddrs + ?Sized>(
bind_addr: &A,
destination: &B,
) -> std::io::Result<IpAddr> {
let socket = UdpSocket::bind(bind_addr)?;
socket.connect(destination)?;
socket.local_addr().map(|sa| sa.ip())
}
pub fn list_nics(with_fix: bool) -> std::io::Result<impl Iterator<Item = Nic>> {
Ok(getifaddrs()?.filter_map(move |addr| match &addr.address {
Some(address) => into_ip_addr(address, with_fix).map(|ip| Nic(addr, ip)),
_ => None,
}))
}
fn into_ip_addr(s: &SockaddrStorage, with_fix: bool) -> Option<IpAddr> {
let initial_method = s
.as_sockaddr_in()
.map(|sa| sa.ip().to_be_bytes().into())
.or_else(|| s.as_sockaddr_in6().map(|sa| sa.ip().into()));
match initial_method {
im @ Some(_) => return im,
None => (),
}
if !with_fix {
return None;
}
// Sometimes `as_sockaddr_in` fails despite the inner data being present and
// seemingly correct. Investigations point towards a size issue for the padding
// at the end of the structure, which does not affect us.
//
// Safety: the pointer is valid (notably, aligned) and not accessed mutably
let sock_addr = unsafe { s.as_ptr().as_ref()? };
if i32::from(sock_addr.sa_family) == nix::libc::AF_INET {
// Safety: if AF_INET is set for `family`, casting
// to `sockaddr_in` is safe, as is dereferencing
let sock_addr = unsafe { *s.as_ptr().cast::<nix::libc::sockaddr_in>() };
Some(Ipv4Addr::from(sock_addr.sin_addr.s_addr).into())
} else {
None
}
}
@poliorcetics on my FreeBSD and NetBSD systems everything works fine and the ss_len field is correctly populated by libc::getifaddrs. Perhaps you are looking at a bug in OSX. Why don't you try adding a dbg!(&s.ss_len) to the top of into_ip_addr to confirm?
You also said that CI is failing for you on Linux. That cannot be related, because Linux does not use the ss_len field.
Hey,
So I made a very minimal reproducer:
use std::net::{IpAddr, Ipv4Addr};
use nix::ifaddrs::getifaddrs;
use nix::sys::socket::{AddressFamily, SockaddrLike};
fn main() {
for addr in getifaddrs().unwrap() {
if let Some(netmask) = &addr.netmask {
if netmask.family() == Some(AddressFamily::Inet) {
println!("{:?}", netmask.family());
println!("{:?}", netmask.len());
let sock_addr = unsafe { *netmask.as_ptr().cast::<nix::libc::sockaddr_in>() };
let netmask_addr: IpAddr = Ipv4Addr::from(sock_addr.sin_addr.s_addr).into();
println!("{:?}", netmask_addr);
let netmask_addr: IpAddr = netmask.as_sockaddr_in().unwrap().ip().to_be_bytes().into();
println!("{:?}", netmask_addr);
}
} else {
println!("No netmask");
}
}
}
And indeed, len (which tries to access sa_len) returns 5:
Running `target/release/nix-netmask-endian-repro`
No netmask
Some(Inet)
5
0.0.0.255
thread 'main' panicked at 'called `Option::unwrap()` on a `None` value', src/main.rs:14:69
note: run with `RUST_BACKTRACE=1` environment variable to display a backtrace
Note that this only happens when getting the netmask. The address itself is properly returned with a struct of length 16. (don't mind the wrong endianness for the addr).
So yes, I suspect this is a darwin-specific bug. I did a lot of digging to find out what's going on. The full investigation can be found under, but TLDR is, darwin runs sa_trim(sa, offsetof(struct sockaddr_in, sin_addr)) on netmasks before returning them, which removes the structure's trailing zeroes from the sa_len field. This means that, for a netmask of 255.0.0.0, the sa_len will be 5 (sin_zero will be totally eliminated, and the 3 trailing zeroes from sin_addr will will also be cut off). You can trivially see what it does by copy pasting the function and running it in userspace. There's an example of doing that in the investigation below.
I'm not sure what the solution is here. I guess nix should just blindly trust that netmasks will always be a valid sockaddr_in/sockaddr_in6 if the family is AF_INET/AF_INET6?
For full investigation, click me
getifaddrs implementation is open source in libinfo, and can be found here. It is compiled with NET_RT_IFLIST, so it's essentially a wrapper around sysctl([CTL_NET, PF_ROUTE, 0, 0, NET_RT_IFLIST, 0]) call.
From what we can gather of the sysctl, it seems to return a list of rt_msghdr. Each header determines the length and type of the structure (length will differ depending on type). The type of message returned that interests us is RTM_NEWADDR, in which case the rt_msghdr is cast into an ifa_msghdr here.
The ifa_netmask is set here. A couple lines later, data will be populated with our sockaddr from p, where p is essentially a pointer to some data that comes after the ifa_msghdr.
So the kernel seems to contain the erroneous sockaddr. We now turn our eyes to the sysctl implementation, which we can find in source of XNU, the kernel. To find it I first found CTL_NET: https://github.com/apple-opensource/xnu/blob/214e9f5819399504e7ff4ce48b57d9adc4cd80da/bsd/kern/kern_mib.c#L163 . This yielded nothing of interest - the sysctl node has no handler.
Next, I found PF_ROUTE. https://github.com/apple-opensource/xnu/blob/eb45a4f3d6bc33c958fcbf4ea7388da13ae63a2e/bsd/net/rtsock.c#L146 . It has a handler called sysctl_rtsock, defined below in the same source file, which simply calls sysctl_iflist when the operation is NET_RT_IFLIST.
sysctl_iflist simply walks over all the interfaces through ifnet_head with TAILQ_FOREACH, and writes a ifa_msghdr and its associated sockaddrs to the sysctl output using rt_msg2 here. rt_msg2 uses the input rt_addrinfo to know what sockaddr to write.
Finally, rt_msg2 uses rtm_scrub to actually write the sockaddr into the sysctl output buffer. This function does something very interesting with regards to sa_len: It calls sa_trim, which, according to the documentation:
Trim trailing zeroes on a sockaddr and update its length.
This is awfully suspicious. Looking at the code, it indeed looks like it reduces the length to not include zeroes at the end of the structure. It does have a minimum size it cannot reduce further (the skip argument), but unfortunately, rtm_scrub passes the offset to if_addr, which means that any zeroes at the end of the address will be cut off from the length.
This can be verified by running the following code:
#include<stdlib.h>
#include <stddef.h>
#include <stdio.h>
#include <sys/socket.h>
#include <netinet/in.h>
static struct sockaddr *
sa_trim(struct sockaddr *sa, int skip)
{
caddr_t cp, base = (caddr_t)sa + skip;
if (sa->sa_len <= skip) {
return sa;
}
for (cp = base + (sa->sa_len - skip); cp > base && cp[-1] == 0;) {
cp--;
}
sa->sa_len = (cp - base) + skip;
if (sa->sa_len < skip) {
/* Must not happen, and if so, panic */
printf("%s: broken logic (sa_len %d < skip %d )", __func__,
sa->sa_len, skip);
exit(1);
/* NOTREACHED */
} else if (sa->sa_len == skip) {
/* If we end up with all zeroes, then there's no mask */
sa->sa_len = 0;
}
return sa;
}
int main() {
struct sockaddr_in in = {
.sin_len = sizeof(struct sockaddr_in),
.sin_family = AF_INET,
.sin_port = 0,
.sin_addr = 0x000000FF,
.sin_zero = { 0 },
};
sa_trim(&in, offsetof(struct sockaddr_in, sin_addr));
printf("%d", in.sin_len);
}
On my mac, it returns 5 - just like the kernel does.