ebpf-networking icon indicating copy to clipboard operation
ebpf-networking copied to clipboard

IP routing using tc

Open RuchiSaluja8 opened this issue 2 years ago • 1 comments

Hi, I am trying to perform IP routing using tc. I am taking guidance from two repos : lb-from-scratch and ebpf-networking. The client and backend are running in containers on my system and the load balancer(LB) is receiving packets on the docker0 interface. However, when connection is initiated from client to LB, the client continuously retries sending the first SYN packet. Below is the bpf kernel code:

#include <bcc/proto.h>
#include <linux/bpf.h>
#include <uapi/linux/bpf.h>
#include <linux/if_ether.h>
#include <linux/ip.h>
#include <linux/tcp.h>
#include <linux/pkt_cls.h>
#include <linux/icmp.h>


#define IP_ADDRESS(x,y,z,w) (uint32_t)(x + (y << 8) + (z << 16) + (w << 24))
#define IS_PSEUDO 0x10

struct address{
    unsigned char mac[6];
    uint32_t ip;
    u16 port;
};


#define IP_CSUM_OFF (offsetof(struct iphdr, check))
#define TCP_CSUM_OFF offsetof(struct tcphdr, check)

static __always_inline void change_address_and_update_crc(
    struct __sk_buff * skb, 
    struct address * addr,
    struct ethhdr * eth,
    struct iphdr * ip,
    struct tcphdr * tcp){
    
    int flags = IS_PSEUDO;
    uint32_t orig_src_ip;
    uint64_t orig_src_mac;
    u16 orig_src_port;
    
    orig_src_mac = (unsigned char)(eth->h_source);
    orig_src_ip = ip->saddr;
    orig_src_port = tcp->source;

    eth->h_source[0] = eth->h_dest[0];
    eth->h_source[1] = eth->h_dest[1];
    eth->h_source[2] = eth->h_dest[2];
    eth->h_source[3] = eth->h_dest[3];
    eth->h_source[4] = eth->h_dest[4];
    eth->h_source[5] = eth->h_dest[5];
    ip->saddr = ip->daddr;
    //tcp->source = tcp->dest;

    uint32_t dst_ip = (*addr).ip;
    u16 dst_port = (*addr).port;
    eth->h_dest[0] = (*addr).mac[0];
    eth->h_dest[1] = (*addr).mac[1];
    eth->h_dest[2] = (*addr).mac[2];
    eth->h_dest[3] = (*addr).mac[3];
    eth->h_dest[4] = (*addr).mac[4];
    eth->h_dest[5] = (*addr).mac[5];
    ip->daddr = (*addr).ip;
    //tcp->dest = (*addr).port;

    bpf_trace_printk("AFTER CHANGING\n");
    bpf_trace_printk("src mac address [0-2] : %u %u %u\n", eth->h_source[0], eth->h_source[1], eth->h_source[2]);
    bpf_trace_printk("src mac address [3-5] : %u %u %u\n", eth->h_source[3], eth->h_source[4], eth->h_source[5]);
    bpf_trace_printk("dst mac address [0-2] : %u %u %u\n", eth->h_dest[0], eth->h_dest[1], eth->h_dest[2]);
    bpf_trace_printk("dst mac address [3-5] : %u %u %u\n", eth->h_dest[3], eth->h_dest[4], eth->h_dest[5]);
    bpf_trace_printk("ip addresses : %u %u\n", ip->saddr, ip->daddr);
    bpf_trace_printk("ports : %u %u\n", tcp->source, tcp->dest);

    bpf_l4_csum_replace(skb, TCP_CSUM_OFF, orig_src_ip, dst_ip, flags | sizeof(dst_ip));
    //ip->check = iph_csum(ip);
    bpf_l3_csum_replace(skb, IP_CSUM_OFF, htons(orig_src_ip), htons(dst_ip), 4);
    //bpf_l4_csum_replace(skb, TCP_CSUM_OFF, orig_src_port, dst_port, 2);   
}

int tc(struct __sk_buff * skb){
    int key=0;
    struct address client_addr, server_addr;

    server_addr.ip = IP_ADDRESS(172,17,0,2);
    server_addr.port = 8000;
    
    server_addr.mac[5] = 0x02;
    server_addr.mac[4] = 0x00;
    server_addr.mac[3] = 0x11;
    server_addr.mac[2] = 0xac;
    server_addr.mac[1] = 0x42;
    server_addr.mac[0] = 0x02;

    client_addr.ip = IP_ADDRESS(172,17,0,3);
    client_addr.port = 8000;

    client_addr.mac[5] = 0x03;
    client_addr.mac[4] = 0x00;
    client_addr.mac[3] = 0x11;
    client_addr.mac[2] = 0xac;
    client_addr.mac[1] = 0x42;
    client_addr.mac[0] = 0x02;

    bpf_trace_printk("pckt rcvd\n");
    void *data = (void *)(long)skb->data;
    void *data_end = (void *)(long)skb->data_end;
    struct ethhdr *eth = data;
    if ((void *)eth + sizeof(*eth) <= data_end)
    {
        struct iphdr *ip = data + sizeof(*eth);
        if ((void *)ip + sizeof(*ip) <= data_end)
        {
            if (ip->protocol == IPPROTO_TCP)
            {
                struct tcphdr *tcp = (void *)ip + sizeof(*ip);
                if ((void *)tcp + sizeof(*tcp) <= data_end)
                {
                    if(tcp->dest==ntohs(8000)){
                        uint64_t src_mac, dst_mac;
                        uint32_t src_ip, dst_ip;
                        u16 src_port, dst_port;
                        bpf_trace_printk("BEFORE CHANGING\n");
                        bpf_trace_printk("src mac address [0-2] : %u %u %u\n", eth->h_source[0], eth->h_source[1], eth->h_source[2]);
                        bpf_trace_printk("src mac address [3-5] : %u %u %u\n", eth->h_source[3], eth->h_source[4], eth->h_source[5]);
                        bpf_trace_printk("dst mac address [0-2] : %u %u %u\n", eth->h_dest[0], eth->h_dest[1], eth->h_dest[2]);
                        bpf_trace_printk("dst mac address [3-5] : %u %u %u\n", eth->h_dest[3], eth->h_dest[4], eth->h_dest[5]);
                        bpf_trace_printk("ip addresses : %u %u\n", ip->saddr, ip->daddr);
                        bpf_trace_printk("ports : %u %u\n", tcp->source, tcp->dest);

                        if(eth->h_source[5]==0x03){ //packet from client to server
                            bpf_trace_printk("CLIENT TO LB TO BACKEND\n");
                            change_address_and_update_crc(skb, &server_addr, eth, ip, tcp);
                            //bpf_redirect(6, 0);
                            return TC_ACT_REDIRECT;
                        }
                        else{
                            bpf_trace_printk("BACKEND TO LB TO CLIENT\n");
                            change_address_and_update_crc(skb, &client_addr, eth, ip, tcp);
                           // bpf_redirect(8, 0);
                           return TC_ACT_REDIRECT;
                        }
                    }   
                }
            }
        }
    }
    return TC_ACT_OK;
}

I'm not sure what I'm missing. For the checksum, I feel using the bpf helper functions bpf_l4_csum_replace and bpf_l3_csum_replace should be sufficient. In some other articles, I have read not to change the checksum at all.

RuchiSaluja8 avatar Jun 07 '22 07:06 RuchiSaluja8

I Tried rerouting with XDP and its working. It uses the action XDP_TX. Not sure how TC_ACT_REDIRECT work exactly. @lizrice Does routing work in case of tc in same way as xdp or some additional steps involved?

arayu05 avatar Jun 07 '22 12:06 arayu05