Search code examples
csocketslinux-kernelebpf

why `bpf_sk_redirect_map` cannot redirect udp skb


I try to write a program that splits udp messages into different sockets. Because I need to access skb->data, I use sk_skb/verdict as the hook point. But when using bpf_sk_redirect_map to redirect the packet, it always returns SK_DROP

The following is bpf code and userspace code, kernel verion is Linux wusheng 6.11.7-amd64 #1 SMP PREEMPT_DYNAMIC Debian 6.11.7-1 (2024-11-09) x86_64 GNU/Linux and is also the minimum reproducible code

// SPDX-License-Identifier: GPL-2.0
#include <vmlinux.h>
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_endian.h>


struct
{
    __uint(type, BPF_MAP_TYPE_SOCKMAP);
    __uint(max_entries, 65535);
    __type(key, __u32);
    __type(value, __u64);
} sock_map SEC(".maps");

SEC("sk_skb/verdict")
int prog_skb_verdict(struct __sk_buff *skb)
{
    __u32 zero = skb->local_port;
    
    bpf_printk("socket port %d => %d", skb->local_port, skb->remote_port);
    bpf_printk("socket addr %d => %d", skb->local_ip4, skb->remote_ip4);

    struct bpf_sock * look = bpf_map_lookup_elem(&sock_map, &zero);

    long ret = bpf_sk_redirect_map(skb, &sock_map, zero, BPF_F_INGRESS);
    bpf_printk("socket redir ret=%d sock_find=%d sk_state=%d", ret, look != NULL, look != NULL ? look->state : -1);
    if (look != NULL)
        bpf_sk_release(look);
    return SK_PASS;
}

char _license[] SEC("license") = "GPL";

userspace code

#include <stdio.h>
#include <stdlib.h>
#include <bpf/libbpf.h>
#include <bpf/bpf.h>
#include <sys/socket.h>
#include <netinet/in.h>
#include <unistd.h>
#include <arpa/inet.h>

#define MAP_NAME "/sys/fs/bpf/sock_map"

#define BPF_OBJECT_FILE "test_sockmap_skb_verdict_attach.bpf.o"

int main(int argc, char **argv) {
    struct bpf_object *bpf_obj;
    int prog_fd, sock_map_fd, sock1, sock2;
    struct sockaddr_in addr = {0}, addr2 = {0};

    // 加载 eBPF 对象文件
    bpf_obj = bpf_object__open_file(BPF_OBJECT_FILE, NULL);
    if (!bpf_obj) {
        fprintf(stderr, "Failed to open BPF object file\n");
        return 1;
    }

    if (bpf_object__load(bpf_obj)) {
        fprintf(stderr, "Failed to load BPF program\n");
        return 1;
    }

    // 获取程序的 FD
    prog_fd = bpf_program__fd(bpf_object__find_program_by_name(bpf_obj, "prog_skb_verdict"));
    if (prog_fd < 0) {
        fprintf(stderr, "Failed to find BPF program FD\n");
        return 1;
    }

    // 获取 sock_map 的 FD
    sock_map_fd = bpf_object__find_map_fd_by_name(bpf_obj, "sock_map");
    if (sock_map_fd < 0) {
        fprintf(stderr, "Failed to find sock_map FD\n");
        return 1;
    }
    printf("socket map fd: %d\n", sock_map_fd);

    if (bpf_prog_attach(prog_fd, sock_map_fd, BPF_SK_SKB_VERDICT, 0)) {
        perror("bpf_prog_attach");
        return 1;
    }

    // 创建两个 socket
    sock1 = socket(AF_INET, SOCK_DGRAM, IPPROTO_UDP);
    sock2 = socket(AF_INET, SOCK_DGRAM, IPPROTO_UDP);
    if (sock1 < 0 || sock2 < 0) {
        perror("socket");
        return 1;
    }
    printf("start bind socket\n");

    addr.sin_family = AF_INET;
    addr.sin_port = htons(18080);
    addr.sin_addr.s_addr = htonl(INADDR_ANY);

    addr2.sin_family = AF_INET;
    addr2.sin_port = htons(18081);
    addr2.sin_addr.s_addr = htonl(INADDR_ANY);
    if (bind(sock1, (struct sockaddr *) &addr , sizeof(addr))){
        perror("sock1 bind fail");
        return 1;
    }
    if (bind(sock2, (struct sockaddr *) &addr2 , sizeof(addr2))){
        perror("sock2 bind fail");
        return 1;
    }
    // 将 sockets 添加到 sock_map

#if 1
    uint32_t zero = 18081;
    uint64_t sock1dup = sock1;
    if (bpf_map_update_elem(sock_map_fd, &zero, &sock1dup, BPF_ANY)) {
        perror("bpf_map_update_elem sock1");
        return 1;
    }
    uint32_t zero1 = 18080;
    uint64_t sock2dup = sock2;
    if (bpf_map_update_elem(sock_map_fd, &zero1, &sock2dup, BPF_ANY))
    {
        perror("bpf_map_update_elem sock2");
        return 1;
    }
#endif
    printf("BPF program loaded and sockets added to sock_map.\n");

        //keep listening for data
    int recv_len;
    #define BUFLEN 1500
    char buf[BUFLEN];
    struct sockaddr_in si_other;
    int slen = sizeof(si_other);
    while(1)
    {
        printf("Waiting for data...");
        fflush(stdout);
        
        //try to receive some data, this is a blocking call
        if ((recv_len = recvfrom(sock1, buf, BUFLEN, 0, (struct sockaddr *) &si_other, &slen)) == -1)
        {
            perror("recvfrom()");
        }
        
        //print details of the client/peer and the data received
        printf("Received packet from %x:%d\n", inet_ntoa(si_other.sin_addr), ntohs(si_other.sin_port));
        buf[recv_len] = 0;
        printf("Data: %s\n" , buf);
        
        //now reply the client with the same data
        if (sendto(sock1, buf, recv_len, 0, (struct sockaddr*) &si_other, slen) == -1)
        {
            perror("sendto()");
        }
    }

//    close(sock1);
//    close(sock2);
//    bpf_object__close(bpf_obj);

    return 0;
}

Log when sending a udp packet to 18081.

          <idle>-0       [005] ..s21   328.378048: bpf_trace_printk: socket port 18081 => 0
          <idle>-0       [005] ..s21   328.378052: bpf_trace_printk: socket addr 0 => 0
          <idle>-0       [005] ..s21   328.378053: bpf_trace_printk: socket redir ret=0 sock_find=1 sk_state=7

It can be seen that even though udp sock has been looked up, the return value of bpf_sk_redirect_map is 0

I tried to find the answer from the kernel code, but I not sure whether sk->sk_state in the code has the same meaning as bpf_sock->state

// net/core/sock_map.c
static bool sock_map_redirect_allowed(const struct sock *sk)
{       
        if (sk_is_tcp(sk))
                return sk->sk_state != TCP_LISTEN;
        else
                return sk->sk_state == TCP_ESTABLISHED;
}

BPF_CALL_4(bpf_sk_redirect_map, struct sk_buff *, skb,
           struct bpf_map *, map, u32, key, u64, flags)
{
        struct sock *sk;
        
        if (unlikely(flags & ~(BPF_F_INGRESS)))
                return SK_DROP;
  
        sk = __sock_map_lookup_elem(map, key);
        if (unlikely(!sk || !sock_map_redirect_allowed(sk)))
                return SK_DROP;
        
        skb_bpf_set_redir(skb, sk, flags & BPF_F_INGRESS);
        return SK_PASS;
}

Solution

  • You seem to be almost there. As the kernel code indicates it does support UDP but only with connected/established UDP sockets.

    static bool sock_map_redirect_allowed(const struct sock *sk)
    {       
            if (sk_is_tcp(sk))
                    return sk->sk_state != TCP_LISTEN;
            else
                    return sk->sk_state == TCP_ESTABLISHED;
    }
    

    I tried to find the answer from the kernel code, but I not sure whether sk->sk_state in the code has the same meaning as bpf_sock->state

    Yes, these both translate to the same field.

    So in your userspace program where you create the sockets, you should call connect on them, that should solve your issue. There is a great blog post by cloudflare on unconnected vs connected UDP sockets that goes into details and implications.