Search code examples
bpfebpfbcc-bpf

eBPF packet monitor losing some packets when 'ping -f'ed


I wrote this program to capture all the packets that come into my network interface. It seems to work fine with things like ping [IP]. It also works fine with ping -f [IP] -c 10. However, when the number of packets that are pinged goes up to like 200, the program sees to lost some packets. Is this a natural limit of eBPF or am I doing something wrong? Here's the code :

Also, when I ping -f [IP] -c 500, it also outputs : "Possibly lost 10 samples" or "Possibly lost 34 samples"

 from bcc import BPF

# Network interface to be monoitored
INTERFACE = "my_interface"

bpf_text = """
#include <uapi/linux/ptrace.h>
#include <net/sock.h>
#include <bcc/proto.h>
#include <linux/bpf.h>
#define IP_TCP 6
#define IP_UDP 17
#define IP_ICMP 1
#define ETH_HLEN 14
BPF_PERF_OUTPUT(skb_events);    // has to be delcared outside any function
BPF_ARRAY(black_list, u64, 5);
int packet_monitor(struct __sk_buff *skb) {
    u8 *cursor = 0;
    u32 saddr;
    u32 daddr;
    u32 ttl;
    u32 hchecksum;

    u64 magic = 111;
    u64 magic2 = 111;

    struct ethernet_t *ethernet = cursor_advance(cursor, sizeof(*ethernet));
    if (!(ethernet -> type == 0x0800)) {
        return 0; // drop
    }
    struct ip_t *ip = cursor_advance(cursor, sizeof(*ip));
    /*
    if (ip->nextp != IP_TCP) 
    {
        if (ip -> nextp != IP_UDP) 
        {
            if (ip -> nextp != IP_ICMP) 
                return 0; 
        }
    }
    */

    skb_events.perf_submit_skb(skb, skb -> len, &magic, sizeof(magic));
    saddr = ip -> src;
    daddr = ip -> dst;
    ttl = ip -> ttl;
    hchecksum = ip -> hchecksum;
    magic = ip -> src;
    magic2 = ip -> dst;
    skb_events.perf_submit_skb(skb, skb->len, &magic, sizeof(magic)); // this one parses number as a hex to the user space
    skb_events.perf_submit_skb(skb, skb->len, &magic2, sizeof(magic2)); // can send multiple values like this


    bpf_trace_printk("saddr = %llu, daddr = %llu, ttl = %llu", saddr, daddr, ttl); 
//    bpf_trace_printk("Incoming packet!!\\n");
    return -1;
}
"""

from ctypes import *
import ctypes as ct
import sys
import socket
import os
import struct

def print_skb_event(cpu, data, size):
    class SkbEvent(ct.Structure):
        _fields_ = [ ("magic", ct.c_uint32), ("magic2", ct.c_uint32)]

    skb_event = ct.cast(data, ct.POINTER(SkbEvent)).contents 
    print("- : ")
    print("%d" % (skb_event.magic))

bpf = BPF(text=bpf_text)

function_skb_matching = bpf.load_func("packet_monitor", BPF.SOCKET_FILTER)

BPF.attach_raw_socket(function_skb_matching, INTERFACE)

bpf["skb_events"].open_perf_buffer(print_skb_event)

black_list = bpf.get_table("black_list")    # retrieve blacklist list

try:
    while True :
        bpf.perf_buffer_poll()  # value = bpf.perf_buffer_poll() function does not return any function and therefore, doesn't work
except KeyboardInterrupt:
    pass

Solution

  • Yes, that's a limitation of the perf ring buffer. If the BPF program produce events on the ring buffer faster than the userspace (Python) process can consume them, some events will be lost (overwritten since it's a ring). The Possibly lost XX samples message is a notification of this happening.

    I would first recommend that you try to group your multiple skb_events.perf_submit_skb calls into a single one on the BPF side. That may help. Otherwise, you can try to aggregate data on the BPF side to have less information sent to the Python side.