Similar to the gethostlatency.py tool in the BPF Compiler Collection (bcc) I want to trace function calls to getaddrinfo
. Additionally, I want to collect the returned values (IP addresses, address families)
However, I seem not be able to get past the BPF verifier with a solution that returns the right result.
The getaddrinfo function:
int getaddrinfo(const char *node, const char *service,
const struct addrinfo *hints,
struct addrinfo **res);
The result is returned in struct addrinfo **res
.
This code sample directly based on gethostlatency.py at least doesn't return any error, but returns the wrong results:
#!/usr/bin/python
#
# Based on (gethostlatency.py) https://github.com/iovisor/bcc/blob/master/tools/gethostlatency.py
# Licensed under the Apache License, Version 2.0 (the "License")
from __future__ import print_function
from bcc import BPF
from time import strftime
import argparse
examples = """examples:
./gethostlatency # trace all TCP accept()s
./gethostlatency -p 181 # only trace PID 181
"""
parser = argparse.ArgumentParser(
description="Show latency for getaddrinfo/gethostbyname[2] calls",
formatter_class=argparse.RawDescriptionHelpFormatter,
epilog=examples)
parser.add_argument("-p", "--pid", help="trace this PID only", type=int,
default=-1)
parser.add_argument("--ebpf", action="store_true",
help=argparse.SUPPRESS)
args = parser.parse_args()
# load BPF program
bpf_text = """
#include <uapi/linux/ptrace.h>
#include <linux/sched.h>
// Copied from: include/netdb.h
struct addrinfo
{
int ai_flags; /* Input flags. */
int ai_family; /* Protocol family for socket. */
int ai_socktype; /* Socket type. */
int ai_protocol; /* Protocol for socket. */
u32 ai_addrlen; /* Length of socket address. */ // CHANGED from socklen_t
struct sockaddr *ai_addr; /* Socket address for socket. */
char *ai_canonname; /* Canonical name for service location. */
struct addrinfo *ai_next; /* Pointer to next in list. */
};
struct val_t {
u32 pid;
char comm[TASK_COMM_LEN];
char host[80];
u64 ts;
};
struct data_t {
u32 pid;
u64 delta;
char comm[TASK_COMM_LEN];
u32 af;
char host[80];
};
BPF_HASH(start, u32, struct val_t);
BPF_HASH(currres, u32, struct addrinfo *);
BPF_PERF_OUTPUT(events);
int do_entry(struct pt_regs *ctx, const char *node, const char *service,
const struct addrinfo *hints,
struct addrinfo **res) {
if (!PT_REGS_PARM1(ctx))
return 0;
struct val_t val = {};
u32 pid = bpf_get_current_pid_tgid();
if (bpf_get_current_comm(&val.comm, sizeof(val.comm)) == 0) {
bpf_probe_read(&val.host, sizeof(val.host),
(void *)PT_REGS_PARM1(ctx));
val.pid = bpf_get_current_pid_tgid();
val.ts = bpf_ktime_get_ns();
start.update(&pid, &val);
currres.update(&pid, &res);
}
return 0;
}
int do_return(struct pt_regs *ctx) {
struct val_t *valp;
struct data_t data = {};
u64 delta;
u32 pid = bpf_get_current_pid_tgid();
u64 tsp = bpf_ktime_get_ns();
valp = start.lookup(&pid);
if (valp == 0)
return 0; // missed start
bpf_probe_read(&data.comm, sizeof(data.comm), valp->comm);
bpf_probe_read(&data.host, sizeof(data.host), (void *)valp->host);
struct addrinfo **result;
result = currres.lookup(&pid);
if (result == 0) {
return 0; // missed entry
}
struct addrinfo* resx = *result;
bpf_probe_read(&data.af, sizeof(data.af), &resx->ai_family);
//data.af = resx->ai_family;
data.pid = valp->pid;
data.delta = tsp - valp->ts;
events.perf_submit(ctx, &data, sizeof(data));
start.delete(&pid);
return 0;
}
"""
if args.ebpf:
print(bpf_text)
exit()
b = BPF(text=bpf_text)
b.attach_uprobe(name="c", sym="getaddrinfo", fn_name="do_entry", pid=args.pid)
b.attach_uretprobe(name="c", sym="getaddrinfo", fn_name="do_return",
pid=args.pid)
# header
print("%-9s %-6s %-16s %10s %-10s %s" % ("TIME", "PID", "COMM", "LATms", "AF", "HOST"))
def print_event(cpu, data, size):
event = b["events"].event(data)
print("%-9s %-6d %-16s %10.2f %-10d %s" % (strftime("%H:%M:%S"), event.pid,
event.comm.decode('utf-8', 'replace'), (float(event.delta) / 1000000),
event.af,
event.host.decode('utf-8', 'replace')))
# loop with callback to print_event
b["events"].open_perf_buffer(print_event)
while 1:
try:
b.perf_buffer_poll()
except KeyboardInterrupt:
exit()
Things that might be wrong:
addrinfo
type definition from netdb.h
as it seems not accessible in the BCC.currres.update(&pid, &res);
bpf_probe_read(&data.af, sizeof(data.af), &resx->ai_family);
In this sample the program is just trying to read the address family (AF). So the expected value would be either 2 (AF_INET) or 10 (AF_INET6) instead it shows numbers like 32xxx.
To trigger an output a DNS request has to be made.
This program version above also shows one warning:
/virtual/main.c:52:30: warning: incompatible pointer types passing 'struct addrinfo ***' to parameter of type
'struct addrinfo **'; remove & [-Wincompatible-pointer-types]
currres.update(&pid, &res);
^~~~
Resolving the warning results in more mysterious errors in the BPF verifier.
Tested on x64 with Kernel version 4.18.
You want currres
to store the kernel pointer you get from getaddrinfo
, so its declaration should be:
BPF_HASH(currres, u32, struct addrinfo *);
Then, you'll want to use two probe_reads to access the value from that kernel pointer:
struct addrinfo **resx = *result;
struct addrinfo *resxx;
bpf_probe_read(&resxx, sizeof(resxx), resx);
bpf_probe_read(&data.af, sizeof(data.af), &resxx->ai_family);
Note that with the latest bcc versions (>= v0.6.0), you can also write directly the following and bcc will translate it to the appropriate calls to bpf_probe_read
.
struct addrinfo **resx = *result;
struct addrinfo *resxx = *resx;
data.af = resxx->ai_family;
Here is the full program:
#!/usr/bin/python
#
# Based on (gethostlatency.py) https://github.com/iovisor/bcc/blob/master/tools/gethostlatency.py
# Licensed under the Apache License, Version 2.0 (the "License")
from __future__ import print_function
from bcc import BPF
from time import strftime
import argparse
examples = """examples:
./gethostlatency # trace all TCP accept()s
./gethostlatency -p 181 # only trace PID 181
"""
parser = argparse.ArgumentParser(
description="Show latency for getaddrinfo/gethostbyname[2] calls",
formatter_class=argparse.RawDescriptionHelpFormatter,
epilog=examples)
parser.add_argument("-p", "--pid", help="trace this PID only", type=int,
default=-1)
parser.add_argument("--ebpf", action="store_true",
help=argparse.SUPPRESS)
args = parser.parse_args()
# load BPF program
bpf_text = """
#include <uapi/linux/ptrace.h>
#include <linux/sched.h>
// Copied from: include/netdb.h
struct addrinfo
{
int ai_flags; /* Input flags. */
int ai_family; /* Protocol family for socket. */
int ai_socktype; /* Socket type. */
int ai_protocol; /* Protocol for socket. */
u32 ai_addrlen; /* Length of socket address. */ // CHANGED from socklen_t
struct sockaddr *ai_addr; /* Socket address for socket. */
char *ai_canonname; /* Canonical name for service location. */
struct addrinfo *ai_next; /* Pointer to next in list. */
};
struct val_t {
u32 pid;
char comm[TASK_COMM_LEN];
char host[80];
u64 ts;
};
struct data_t {
u32 pid;
u64 delta;
char comm[TASK_COMM_LEN];
u32 af;
char host[80];
};
BPF_HASH(start, u32, struct val_t);
BPF_HASH(currres, u32, struct addrinfo **);
BPF_PERF_OUTPUT(events);
int do_entry(struct pt_regs *ctx, const char *node, const char *service,
const struct addrinfo *hints,
struct addrinfo **res) {
if (!PT_REGS_PARM1(ctx))
return 0;
struct val_t val = {};
u32 pid = bpf_get_current_pid_tgid();
if (bpf_get_current_comm(&val.comm, sizeof(val.comm)) == 0) {
bpf_probe_read(&val.host, sizeof(val.host),
(void *)PT_REGS_PARM1(ctx));
val.pid = bpf_get_current_pid_tgid();
val.ts = bpf_ktime_get_ns();
start.update(&pid, &val);
currres.update(&pid, &res);
}
return 0;
}
int do_return(struct pt_regs *ctx) {
struct val_t *valp;
struct data_t data = {};
u64 delta;
u32 pid = bpf_get_current_pid_tgid();
u64 tsp = bpf_ktime_get_ns();
valp = start.lookup(&pid);
if (valp == 0)
return 0; // missed start
bpf_probe_read(&data.comm, sizeof(data.comm), valp->comm);
bpf_probe_read(&data.host, sizeof(data.host), (void *)valp->host);
struct addrinfo ***result;
result = currres.lookup(&pid);
if (!result || !(*result)) {
return 0; // missed entry
}
struct addrinfo **resx = *result;
struct addrinfo *resxx = *resx;
data.af = resxx->ai_family;
//data.af = resx->ai_family;
data.pid = valp->pid;
data.delta = tsp - valp->ts;
events.perf_submit(ctx, &data, sizeof(data));
start.delete(&pid);
return 0;
}
"""
if args.ebpf:
print(bpf_text)
exit()
b = BPF(text=bpf_text)
b.attach_uprobe(name="c", sym="getaddrinfo", fn_name="do_entry", pid=args.pid)
b.attach_uretprobe(name="c", sym="getaddrinfo", fn_name="do_return",
pid=args.pid)
# header
print("%-9s %-6s %-16s %10s %-10s %s" % ("TIME", "PID", "COMM", "LATms", "AF", "HOST"))
def print_event(cpu, data, size):
event = b["events"].event(data)
print("%-9s %-6d %-16s %10.2f %-10d %s" % (strftime("%H:%M:%S"), event.pid,
event.comm.decode('utf-8', 'replace'), (float(event.delta) / 1000000),
event.af,
event.host.decode('utf-8', 'replace')))
# loop with callback to print_event
b["events"].open_perf_buffer(print_event)
while 1:
try:
b.perf_buffer_poll()
except KeyboardInterrupt:
exit()