I am trying to create a TC program that will clone a packet, encapsulate it with a modified L3 header and send the clone to a different host ("Monitor host") - Can I do that using a combination of bpf_skb_adjust_room
with bpf_clone_redirect
?
Kernel examples do not shed too much details into this use-case (for example, here.)
My current attempt seems to be mutating the original packet:
// Represents the redirect destination.
struct destination {
__u32 destination_ip;
__u8 destination_mac[ETH_ALEN];
};
// Contains the destination to redirect traffic to.
struct bpf_map_def SEC("maps") destinations = {
.type = BPF_MAP_TYPE_HASH,
.key_size = sizeof(__u32),
.value_size = sizeof(struct destination),
.max_entries = 1,
.map_flags = BPF_F_NO_PREALLOC,
};
SEC("tc")
int tc_ingress(struct __sk_buff *skb) {
__u32 key = 0;
struct destination *dest = bpf_map_lookup_elem(&destinations, &key);
if (dest != NULL) {
void *data_end = (void *)(long)skb->data_end;
void *data = (void *)(long)skb->data;
// Necessary validation: if L3 layer does not exist, ignore and continue.
if (data + sizeof(struct ethhdr) > data_end) {
return TC_ACT_OK;
}
struct ethhdr *eth = data;
struct iphdr encapsulate_iphdr = {};
struct iphdr *original_iphdr = data + sizeof(struct ethhdr);
if ((void*) original_iphdr + sizeof(struct iphdr) > data_end) {
return TC_ACT_OK;
}
// Change the L2 destination to the provided MAC destination
// and the source to the MAC addr of the recieving host.
memcpy(ð->h_source, ð->h_dest, ETH_ALEN);
memcpy(ð->h_dest, dest->destination_mac, ETH_ALEN);
// Change the L3 destination to the provided destination IP
// and the source to the ip addr of the recieving host.
memcpy(&encapsulate_iphdr.daddr, &dest->destination_ip, IPV4_ADDR_LEN);
memcpy(&encapsulate_iphdr.saddr, &original_iphdr->daddr, IPV4_ADDR_LEN);
// Adjust room for another iphdr after the L2 layer.
if (bpf_skb_adjust_room(skb, sizeof(struct iphdr), BPF_ADJ_ROOM_NET, 0)) {
return TC_ACT_OK;
}
// Store the headers at after L2 headers at the original headers offset.
unsigned long offset = (unsigned long) original_iphdr;
if (bpf_skb_store_bytes(skb, (int)offset, &encapsulate_iphdr, sizeof(struct iphdr), 0)) {
return TC_ACT_OK;
}
// route back the to egress path.
// Zero flag means that the socket buffer is
// cloned to the iface egress path.
bpf_clone_redirect(skb, skb->ifindex, 0);
}
return TC_ACT_OK;
}
I believe that's not possible within the same BPF program run today because bpf_clone_redirect
will redirect the clone as soon as it's called and there is no clone helper that wouldn't redirect as well.
You could however implement this with a recirculation to the same interface. The pseudo code would look something like:
if (skb->mark == ORIGINAL_PACKET) {
skb->mark = 0;
return TC_ACT_OK;
}
skb->mark = ORIGINAL_PACKET;
bpf_clone_redirect(skb, skb->ifindex, BPF_F_INGRESS);
skb->mark = 0;
... implement changes ...
return bpf_redirect(skb, skb->ifindex, 0);