pwru icon indicating copy to clipboard operation
pwru copied to clipboard

Collect MTU from skb->_skb_refdst

Open jschwinger233 opened this issue 1 year ago • 0 comments

According to source code, kernel uses MTU from skb->_skb_refdst. Let pwru collect MTU from there using the same logic.

It requires to cast skb->_skb_refdst to dst_entry*, then fetch dst_metric_raw(dst, RTAX_MTU) and dst->dev->mtu.

With this patch, pwru can output the correct MTU used by OS.

Case 1: Cilium could reduce the route MTU to 1423 inside a pod. This couldn't be detected by pwru until this patch because it only checked link MTU.

Case 2: Xfrm could reduce the route MTU to 1446 in ip_forward(). Pwru must inspect route MTU from skb->_skb_dstref to understand that, this patch makes it happen.

// https://elixir.bootlin.com/linux/v6.5/source/net/ipv4/ip_forward.c#L86
// net/ipv4/ip_forward.c
int ip_forward(struct sk_buff *skb)
{
[...]
	rt = skb_rtable(skb);
[...]
	mtu = ip_dst_mtu_maybe_forward(&rt->dst, true);
	if (ip_exceeds_mtu(skb, mtu)) {
		IP_INC_STATS(net, IPSTATS_MIB_FRAGFAILS);
		icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED,
			  htonl(mtu));
		SKB_DR_SET(reason, PKT_TOO_BIG);
		goto drop;
	}
[...]
}

// include/linux/skbuff.h
static inline struct rtable *skb_rtable(const struct sk_buff *skb)
{
	return (struct rtable *)skb_dst(skb);
}

// include/linux/skbuff.h
static inline struct dst_entry *skb_dst(const struct sk_buff *skb)
{
[...]
	return (struct dst_entry *)(skb->_skb_refdst & SKB_DST_PTRMASK);
}

// include/net/ip.h
static inline unsigned int ip_dst_mtu_maybe_forward(const struct dst_entry *dst,
						    bool forwarding)
{
[...]
	mtu = dst_metric_raw(dst, RTAX_MTU);
	if (mtu)
		goto out;

	mtu = READ_ONCE(dst->dev->mtu);
[...]

// include/net/dst.h
#define DST_METRICS_FLAGS		0x3UL
#define __DST_METRICS_PTR(Y)	\
	((u32 *)((Y) & ~DST_METRICS_FLAGS))
#define DST_METRICS_PTR(X)	__DST_METRICS_PTR((X)->_metrics)

}

jschwinger233 avatar Jun 14 '24 10:06 jschwinger233