Search code examples
networkingnetwork-programmingebpfbpf

Implementing cloning and (de/en)capsulation of packets using eBPF


I am trying to create a TC program that will clone a packet, encapsulate it with a modified L3 header and send the clone to a different host ("Monitor host") - Can I do that using a combination of bpf_skb_adjust_room with bpf_clone_redirect? Kernel examples do not shed too much details into this use-case (for example, here.) My current attempt seems to be mutating the original packet:

// Represents the redirect destination.
struct destination {
    __u32 destination_ip;
    __u8 destination_mac[ETH_ALEN];
};

// Contains the destination to redirect traffic to.
struct bpf_map_def SEC("maps") destinations = {
    .type        = BPF_MAP_TYPE_HASH,
    .key_size    = sizeof(__u32),
    .value_size  = sizeof(struct destination),
    .max_entries = 1,
    .map_flags   = BPF_F_NO_PREALLOC,
};

SEC("tc")
int tc_ingress(struct __sk_buff *skb) {
    __u32 key = 0;
    struct destination *dest = bpf_map_lookup_elem(&destinations, &key);

    if (dest != NULL) {
        void *data_end = (void *)(long)skb->data_end;
        void *data = (void *)(long)skb->data;
        // Necessary validation: if L3 layer does not exist, ignore and continue.
        if (data + sizeof(struct ethhdr) > data_end) {
            return TC_ACT_OK;
        }

        struct ethhdr *eth = data;
        struct iphdr encapsulate_iphdr = {};
        struct iphdr *original_iphdr = data + sizeof(struct ethhdr);
         if ((void*) original_iphdr + sizeof(struct iphdr) > data_end) {
            return TC_ACT_OK;
        }

        // Change the L2 destination to the provided MAC destination
        // and the source to the MAC addr of the recieving host.
        memcpy(&eth->h_source, &eth->h_dest, ETH_ALEN);
        memcpy(&eth->h_dest, dest->destination_mac, ETH_ALEN);

        // Change the L3 destination to the provided destination IP
        // and the source to the ip addr of the recieving host.
        memcpy(&encapsulate_iphdr.daddr, &dest->destination_ip, IPV4_ADDR_LEN);
        memcpy(&encapsulate_iphdr.saddr, &original_iphdr->daddr, IPV4_ADDR_LEN);

        // Adjust room for another iphdr after the L2 layer.
        if (bpf_skb_adjust_room(skb, sizeof(struct iphdr), BPF_ADJ_ROOM_NET, 0)) {
            return TC_ACT_OK;
        }

        // Store the headers at after L2 headers at the original headers offset.
        unsigned long offset = (unsigned long) original_iphdr;
        if (bpf_skb_store_bytes(skb, (int)offset, &encapsulate_iphdr, sizeof(struct iphdr), 0)) {
            return TC_ACT_OK;
        }

        // route back the to egress path.
        // Zero flag means that the socket buffer is
        // cloned to the iface egress path.
        bpf_clone_redirect(skb, skb->ifindex, 0);
    }
    return TC_ACT_OK;
}

Solution

  • I believe that's not possible within the same BPF program run today because bpf_clone_redirect will redirect the clone as soon as it's called and there is no clone helper that wouldn't redirect as well.

    You could however implement this with a recirculation to the same interface. The pseudo code would look something like:

    if (skb->mark == ORIGINAL_PACKET) {
        skb->mark = 0;
        return TC_ACT_OK;
    }
    
    skb->mark = ORIGINAL_PACKET;
    bpf_clone_redirect(skb, skb->ifindex, BPF_F_INGRESS);
    skb->mark = 0;
    
    ... implement changes ...
    
    return bpf_redirect(skb, skb->ifindex, 0);