Search code examples
cebpfbpfkprobe

How to correctly read socket->sk from pt_regs* in ebpf program?


I want to get some information from kprobe/inet_accept through ebpf, but got error: load program: permission denied: 13: (79) r6 = *(u64 *)(r6 +24): R6 invalid mem access 'inv' (22 line(s) omitted) debug detail:

; struct socket *newsock = (struct socket *)PT_REGS_PARM2(ctx);
0: (79) r6 = *(u64 *)(r1 +104)
; bpf_trace_printk("inet_accept called\n", sizeof("inet_accept called\n"));
1: (18) r1 = 0xffff888a06dc4910
3: (b7) r2 = 20
4: (85) call bpf_trace_printk#6
 R1_w=map_value(id=0,off=0,ks=4,vs=57,imm=0) R2_w=inv20 R6_w=inv(id=0) R10=fp0
last_idx 4 first_idx 0
regs=4 stack=0 before 3: (b7) r2 = 20
5: (b7) r1 = 0
; struct sock_key sk_key = {0, };
6: (63) *(u32 *)(r10 -8) = r1
last_idx 6 first_idx 0
regs=2 stack=0 before 5: (b7) r1 = 0
7: (7b) *(u64 *)(r10 -16) = r1
8: (7b) *(u64 *)(r10 -24) = r1
9: (7b) *(u64 *)(r10 -32) = r1
10: (7b) *(u64 *)(r10 -40) = r1
11: (7b) *(u64 *)(r10 -48) = r1
12: (7b) *(u64 *)(r10 -56) = r1
; extract_sock_key(newsock->sk, &sk_key);
13: (79) r6 = *(u64 *)(r6 +24)
R6 invalid mem access 'inv'
processed 13 insns (limit 1000000) max_states_per_insn 0 total_states 0 peak_states 0 mark_read 0
2023/08/22 19:01:25 loading objects: field KprobeInetAccept: program kprobe__inet_accept: load program: permission denied: 13: (79) r6 = *(u64 *)(r6 +24): R6 invalid mem access 'inv' (22 line(s) omitted)

the code is as follows:

#include "vmlinux.h"
#include "bpf_endian.h"
#include "bpf_tracing.h"
#include "bpf_helpers.h"
#include "trace_sock.h"

char __license[] SEC("license") = "Dual MIT/GPL";

struct {
    __uint(type, BPF_MAP_TYPE_LRU_HASH);
    __type(key, struct endpoint_key);
    __type(value, u32);
    __uint(max_entries, 1024);
} server_map SEC(".maps");

static __always_inline void extract_sock_key(const struct sock *sk,
                        struct sock_key *key)
{
    if (sk->__sk_common.skc_family == AF_INET) {
        bpf_probe_read(key->ip.source, 4, &sk->__sk_common.skc_rcv_saddr);
        bpf_probe_read(key->ip.destination, 4, &sk->__sk_common.skc_daddr);
        key->ip.ip_version = IPv4;
    } else if (sk->__sk_common.skc_family == AF_INET6) {
        bpf_probe_read(key->ip.source, 16, sk->__sk_common.skc_v6_rcv_saddr.in6_u.u6_addr8);
        bpf_probe_read(key->ip.destination, 16, sk->__sk_common.skc_v6_daddr.in6_u.u6_addr8);
        key->ip.ip_version = IPv6;
    }

    key->l4.source_port = sk->__sk_common.skc_num;
    key->l4.destination_port = bpf_ntohs(sk->__sk_common.skc_dport);
    if (sk->sk_socket->type == SOCK_STREAM)
        key->l4.l4_type = TCP;
    else if (sk->sk_socket->type == SOCK_DGRAM)
        key->l4.l4_type = UDP;
    else
        key->l4.l4_type = LAYER4_UNKNOWN;

    key->pid = bpf_get_current_pid_tgid() >> 32;
}

static __always_inline
void sock_key_to_endpoint_key(const struct sock_key *sk_key, struct endpoint_key *ep_key) {
    bpf_probe_read(ep_key->ip_addr, 16, sk_key->ip.source);
    ep_key->ip_version = sk_key->ip.ip_version;
    ep_key->port = sk_key->l4.source_port;
    ep_key->pid = sk_key->pid;
}

SEC("kprobe/inet_accept")
int kprobe__inet_accept(struct pt_regs *ctx)
{
    struct socket *newsock = (struct socket *)PT_REGS_PARM2(ctx);
    bpf_trace_printk("inet_accept called\n", sizeof("inet_accept called\n"));
    struct sock_key sk_key = {0, };
    extract_sock_key(newsock->sk, &sk_key);

    struct endpoint_key ep_key = {0, };
    sock_key_to_endpoint_key(&sk_key, &ep_key);
    u32 value = 1;

    bpf_trace_printk("inet_accept endpoint_key ip_addr %s\n",sizeof("inet_accept endpoint_key ip_addr %s\n"), ep_key.ip_addr);

    bpf_map_update_elem(&server_map, &ep_key, &value, BPF_ANY);
    return 0;
}

trace_sock.h:

#pragma once 

#include "vmlinux.h"

#include "bpf_helpers.h"
//#include "bpf_endian.h"
//
#include "common.h"

#define MAX_NET_FILTER_SIZE 5

#define AF_INET 2
#define AF_INET6 10

// #define SOCK_STREAM 1
// #define SOCK_DGRAM 2

#define MAX_MSG_SIZE 4096

enum ip_version { IP_UNKNOWN, IPv4, IPv6 };

struct ip {
    char source[16];
    char destination[16];
    enum ip_version ip_version;
};

enum layer4_type { LAYER4_UNKNOWN, TCP, UDP };

struct layer4 {
    u32 source_port;
    u32 destination_port;
    enum layer4_type l4_type;
};

struct sock_key {
    struct ip ip;
    struct layer4 l4;
    u32 pid;
};

struct ip_network {
    char ip_addr[16];
    char ip_mask[16];
};

struct ip_networks {
    struct ip_network data[MAX_NET_FILTER_SIZE];
    u32 size;
};

struct endpoint_key {
    char ip_addr[16];
    enum ip_version ip_version;
    u32 port;
    u32 pid;
};

enum flow_type { FLOW_UNKNOWN, REQUEST, RESPONSE };

enum direction { DIR_UNKNOWN, INGRESS, EGRESS };

enum protocol_type {
    PROTO_UNKNOWN,
    PROTO_SKIP,

    HTTP1,
    HTTP2,

    RESERVED1,
    RESERVED2,
    RESERVED3,
    RESERVED4,
    RESERVED5
};

struct msg_arg {
    struct iov_iter iter;
    enum protocol_type protocol;
};
struct msg_arg *unused_msg_arg __attribute__((unused));

struct msg_event {
    char msg[MAX_MSG_SIZE];
    struct sock_key sock_key;
    u32 msg_size;
    u64 timestamp;
    enum flow_type flow_type;
    enum protocol_type protocol;
};
struct msg_event *unused_data_event __attribute__((unused));

I simplified and tried:

SEC("kprobe/inet_accept")
int kprobe__inet_accept(struct pt_regs *ctx)
{
    struct socket *newsock = (struct socket *)PT_REGS_PARM2(ctx);
    bpf_trace_printk("inet_accept called type %d\n", sizeof("inet_accept called type %d\n"),(int)newsock->type);
    return 0;
}

but still got same error.

2023/08/22 19:13:49 verifier log: 
 ; struct socket *newsock = (struct socket *)PT_REGS_PARM2(ctx);
0: (79) r1 = *(u64 *)(r1 +104)
; bpf_trace_printk("inet_accept called type %d\n", sizeof("inet_accept called type %d\n"),(int)newsock->type);
1: (69) r3 = *(u16 *)(r1 +4)
R1 invalid mem access 'inv'
processed 2 insns (limit 1000000) max_states_per_insn 0 total_states 0 peak_states 0 mark_read 0
2023/08/22 19:13:49 loading objects: field KprobeInetAccept: program kprobe__inet_accept: load program: permission denied: 1: (69) r3 = *(u16 *)(r1 +4): R1 invalid mem access 'inv' (4 line(s) omitted)

trying to read newsock->type via bpf_probe_read_kernel is ok.

SEC("kprobe/inet_accept")
int kprobe__inet_accept(struct pt_regs *ctx)
{
    struct socket *newsock = (struct socket *)PT_REGS_PARM2(ctx);
    if (newsock == NULL) {
        return 0;
    }
    bpf_trace_printk("inet_accept called %llx\n", sizeof("inet_accept called %llx\n"),newsock);
    __u16 type = 0;
    bpf_probe_read_kernel(&type, sizeof(__u16), &newsock->type);
    bpf_trace_printk("inet_accept type %d\n", sizeof("inet_accept type %d\n"),type);
    return 0;
}

then trying to read newsock->sk by bpf_probe_read_kernel

SEC("kprobe/inet_accept")
int kprobe__inet_accept(struct pt_regs *ctx)
{
    struct socket *newsock = (struct socket *)PT_REGS_PARM2(ctx);
    if (newsock == NULL) {
        return 0;
    }
    bpf_trace_printk("inet_accept called %llx\n", sizeof("inet_accept called %llx\n"),newsock);
    __u16 type = 0;
    bpf_probe_read_kernel(&type, sizeof(__u16), &newsock->type);
    bpf_trace_printk("inet_accept type %d\n", sizeof("inet_accept type %d\n"),type);
    struct sock *sk = NULL;
    bpf_probe_read_kernel(sk, sizeof(sk), newsock->sk);
    bpf_trace_printk("inet_accept sk %llx\n", sizeof("inet_accept sk %llx\n"),sk);
    return 0;
}

got error:

2023/08/24 11:24:15 verifier log: 
 ; struct socket *newsock = (struct socket *)PT_REGS_PARM2(ctx);
0: (79) r6 = *(u64 *)(r1 +104)
; if (newsock == NULL) {
1: (15) if r6 == 0x0 goto pc+27
 R1=ctx(id=0,off=0,imm=0) R6_w=inv(id=0) R10=fp0
; bpf_trace_printk("inet_accept called %llx\n", sizeof("inet_accept called %llx\n"),newsock);
2: (18) r1 = 0xffff88831ec64710
4: (b7) r2 = 25
5: (bf) r3 = r6
6: (85) call bpf_trace_printk#6
 R1_w=map_value(id=0,off=0,ks=4,vs=154,imm=0) R2_w=inv25 R3_w=inv(id=0) R6_w=inv(id=0) R10=fp0
last_idx 6 first_idx 0
regs=4 stack=0 before 5: (bf) r3 = r6
regs=4 stack=0 before 4: (b7) r2 = 25
7: (b7) r1 = 0
; __u16 type = 0;
8: (6b) *(u16 *)(r10 -2) = r1
last_idx 8 first_idx 0
regs=2 stack=0 before 7: (b7) r1 = 0
; bpf_probe_read_kernel(&type, sizeof(__u16), &newsock->type);
9: (bf) r3 = r6
10: (07) r3 += 4
11: (bf) r1 = r10
;
12: (07) r1 += -2
; bpf_probe_read_kernel(&type, sizeof(__u16), &newsock->type);
13: (b7) r2 = 2
14: (85) call bpf_probe_read#4
last_idx 14 first_idx 0
regs=4 stack=0 before 13: (b7) r2 = 2
; bpf_trace_printk("inet_accept type %d\n", sizeof("inet_accept type %d\n"),type);
15: (69) r3 = *(u16 *)(r10 -2)
; bpf_trace_printk("inet_accept type %d\n", sizeof("inet_accept type %d\n"),type);
16: (18) r1 = 0xffff88831ec64729
18: (b7) r2 = 21
19: (85) call bpf_trace_printk#6
 R0=inv(id=0) R1_w=map_value(id=0,off=25,ks=4,vs=154,imm=0) R2_w=inv21 R3_w=inv(id=0,umax_value=65535,var_off=(0x0; 0xffff)) R6=inv(id=0) R10=fp0 fp-8=mm??????
last_idx 19 first_idx 15
regs=4 stack=0 before 18: (b7) r2 = 21
; bpf_probe_read_kernel(sk, sizeof(sk), newsock->sk);
20: (79) r3 = *(u64 *)(r6 +24)
R6 invalid mem access 'inv'
processed 19 insns (limit 1000000) max_states_per_insn 0 total_states 1 peak_states 1 mark_read 1
2023/08/24 11:24:15 loading objects: field KprobeInetAccept: program kprobe__inet_accept: load program: permission denied: 20: (79) r3 = *(u64 *)(r6 +24): R6 invalid mem access 'inv' (41 line(s) omitted)

Is there any special way to read newsock->sk ?

how do i pass the ebpf verifier?


Solution

  • You are on the right track. In your first example you are dereferencing newsock->sk in the function call with is not allowed directly.

    struct socket *newsock = (struct socket *)PT_REGS_PARM2(ctx);
    bpf_trace_printk("inet_accept called\n", sizeof("inet_accept called\n"));
    struct sock_key sk_key = {0, };
    extract_sock_key(newsock->sk, &sk_key);
    

    In the C world you are casting the pointer in PARAM2 to a (struct socket *) but the verifier does not see that. It only sees that you took a number but it doesn't know that type it is or even that it is a pointer. So you can't dereference using *newsock, you need to do it via bpf_probe_read_kernel.

    In your last program, you made a simple mistake:

    struct socket *newsock = (struct socket *)PT_REGS_PARM2(ctx);
    if (newsock == NULL) {
        return 0;
    }
    
    __u16 type = 0;
    bpf_probe_read_kernel(&type, sizeof(__u16), &newsock->type);
    
    struct sock *sk = NULL;
    bpf_probe_read_kernel(sk, sizeof(sk), newsock->sk);
    

    When you write &newsock->type this translates into netsock + offsetof(newsock.type) because you take the address of the dereferenced field. So no actual memory is read, that is valid.

    But when you write newsock->sk without taking the address you actually perform a memory lookup which isn't allowed. So you should write it as &newsock->sk as well even though the value contained in the field is a pointer.