Search code examples
cgolinux-kernelebpfxdp-bpf

BPF tail program removed when reading from ring buffer


I observe that if I write to a ring buffer in a tail program and read the ring buffer from user space, the tail program is eventually removed. The tail program no longer shows up in bpftool prog. bpftool map dump name jump_table says it Found 0 elements; it originally had 1 element, the tail program.

This BPF program consists of main_prog calling a tail program. The tail program writes 0 to a ring buffer.

#include <linux/bpf.h>
#include <bpf/bpf_helpers.h>

struct bpf_map_def SEC("maps") flow_ring_buf = {
    .type = BPF_MAP_TYPE_RINGBUF,
    .max_entries = 1<<12
};

struct bpf_map_def SEC("maps") jump_table = {
   .type = BPF_MAP_TYPE_PROG_ARRAY,
   .key_size = sizeof(__u32),
   .value_size = sizeof(__u32),
   .max_entries = 1,
};

SEC("xdp")
int main_prog(struct xdp_md *ctx) {
    bpf_tail_call(ctx, &jump_table, 0);

    bpf_printk("Tail call failed");

    return XDP_PASS;
}

SEC("xdp_2")
int tail_prog(struct xdp_md *ctx) {
    __u32 num = 0;

    bpf_ringbuf_output(&flow_ring_buf, &num, sizeof(__u32), 0);

    return XDP_PASS;
}

char _license[] SEC("license") = "GPL";

This Go program loads the programs and map and reads from the ring buffer:

package main

import "C"
import (
    "errors"
    "github.com/cilium/ebpf"
    "github.com/cilium/ebpf/ringbuf"
    "github.com/vishvananda/netlink"
    "log"
)

type bpfObjects struct {
    MainProg        *ebpf.Program `ebpf:"main_prog"`
    TailProg        *ebpf.Program `ebpf:"tail_prog"`
    JumpTable       *ebpf.Map     `ebpf:"jump_table"`
    FlowRingBuf     *ebpf.Map     `ebpf:"flow_ring_buf"`
}

func main() {
    var objects bpfObjects

    spec, err := ebpf.LoadCollectionSpec("test.o")

    if err != nil {
        log.Fatalln("ebpf.LoadCollectionSpec", err)
    }

    if err := spec.LoadAndAssign(&objects, nil); err != nil {
        log.Fatalln("ebpf.LoadAndAssign", err)
    }

    // Update the jump table with the tail prog
    if err = objects.JumpTable.Update(uint32(0), uint32(objects.TailProg.FD()), ebpf.UpdateAny); err != nil {
        log.Fatalln("Update prog_array", err)
    }

    link, err := netlink.LinkByName("enp0s8")

    if err != nil {
        log.Fatalln("netlink.LinkByName", err)
    }

    // Load the program onto the interface
    if err = netlink.LinkSetXdpFdWithFlags(link, objects.MainProg.FD(), 0x2); err != nil {
        log.Fatalln("netlink.LinkSetXdpFdWithFlags:", err)
    }

    // A
    // Problem doesn't happen if you comment out code below and replace with select {}
    reader, err := ringbuf.NewReader(objects.FlowRingBuf)

    for {
        _, err := reader.Read()

        if err != nil {
            if errors.Is(err, ringbuf.ErrClosed) {
                log.Println("Received signal, exiting..")
                return
            }
            log.Printf("reading from reader: %s", err)
            continue
        }
    }
}

I run into the problem when I send traffic to the interface. reader.Read() never returns an error and the returned Record object has 0. Because the jump table is empty, the tail call fails and I see the bpf_printk output in the kernel log.

If comment out the code below A and replace it with an infinite wait, like select {}, I don't run into the problem.

For reference, the equivalent C program works without issues. I'm using libbpf 0.7:

#include <libbpf.h>
#include <bpf.h>
#include <stdio.h>
#include <net/if.h>

// Handles each insert into ring buffer
static int flow_buf_sample(void *ctx, void *data, size_t len) {
    return 0;
}

int main(int argc, char *argv[]) {
    struct bpf_object *obj;
    struct bpf_map *jump_table;
    struct bpf_map *flow_ring_buf;
    struct bpf_program *tail_prog;
    struct bpf_program *main_prog;

    int err;

    if ((obj = bpf_object__open_file("test.o", NULL)) == NULL) {
        fprintf(stderr, "Could not open ELF");
        return 1;
    }

    if ((err = bpf_object__load(obj)) < 0) {
        fprintf(stderr, "Could not load BPF");
        return 1;
    }

    if ((jump_table = bpf_object__find_map_by_name(obj, "jump_table")) == NULL) {
        fprintf(stderr, "Could not find jump_table map");
        return 1;
    }

    if ((flow_ring_buf = bpf_object__find_map_by_name(obj, "flow_ring_buf")) == NULL) {
        fprintf(stderr, "Could not find flow_ring_buf_map map");
        return 1;
    }

    if ((main_prog = bpf_object__find_program_by_name(obj, "main_prog")) == NULL) {
        fprintf(stderr, "Could not find main_prog");
        return 1;
    }

    if ((tail_prog = bpf_object__find_program_by_name(obj, "tail_prog")) == NULL) {
        fprintf(stderr, "Could not find tail_prog");
        return 1;
    }

    struct ring_buffer *ring_buffer = ring_buffer__new(bpf_map__fd(flow_ring_buf), flow_buf_sample, NULL, NULL);

    if (ring_buffer == NULL) {
        fprintf(stderr, "failed to create ring buffer\n");
        return 1;
    }

    int index0 = 0;
    int tail_prog_fd = bpf_program__fd(tail_prog);

    if ((err = bpf_map_update_elem(bpf_map__fd(jump_table), &index0, &tail_prog_fd, 0)) < 0) {
        fprintf(stderr, "failed update jump_table: %d", err);
    }

    int if_index = if_nametoindex(argv[1]);

    if (!if_index) {
        printf("get if_index from interface name failed\n");
        return 1;
    }

    if ((err = bpf_xdp_attach(if_index, bpf_program__fd(main_prog), 0x2, NULL)) != 0) {
        fprintf(stderr, "bpf_xdp_attach failed: %d", err);
        return 1;
    }

    while(1) {
        ring_buffer__poll(ring_buffer, -1);
    }

    return 0;
}

Solution

  • This is happening because objects.JumpTable is getting garbage collected while the main program loop is running. When the reference is removed, map jump_table is unpinned. The solution is to do defer objects.JumpTable.Close() after calling LoadAndAssign(). Or add this code

    func (o *bpfObjects) Close() {
      o.MainProg.Close()
      o.TailProg.Close()
      o.JumpTable.Close()
      o.FlowRingBuf.Close()
    }
    

    and call defer objects.Close() after calling LoadAndAssign(). This does not happen in the C version because there is no garbage collection.