Custom Program Types¶
Beyond basic tracepoints and kprobes, eBPF supports many specialized program types for different use cases. This guide explores advanced program types and how to implement them.
🎯 eBPF Program Types Overview¶
graph TB
subgraph "Network Programs"
A[XDP - Packet Processing]
B[TC - Traffic Control]
C[Socket Filter]
D[Socket MSG]
end
subgraph "Security Programs"
E[LSM - Security Hooks]
F[Seccomp - Syscall Filter]
end
subgraph "Tracing Programs"
G[Perf Event]
H[Raw Tracepoint]
I[BTF Tracepoint]
end
subgraph "Cgroup Programs"
J[Cgroup SKB]
K[Cgroup Device]
L[Cgroup Sysctl]
end
style A fill:#e3f2fd
style E fill:#f3e5f5
style G fill:#e8f5e8
style J fill:#fff3e0
🌐 Network Programs¶
1. XDP (eXpress Data Path)¶
XDP programs run at the earliest point in the network stack, providing line-rate packet processing.
Basic XDP Program¶
#include "common.h"
#include <linux/if_ether.h>
#include <linux/ip.h>
#include <linux/tcp.h>
#include <linux/udp.h>
// Statistics map
struct {
__uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
__type(key, u32);
__type(value, u64);
__uint(max_entries, 5);
} xdp_stats SEC(".maps");
enum {
STAT_PACKETS = 0,
STAT_BYTES,
STAT_TCP,
STAT_UDP,
STAT_DROPPED,
};
SEC("xdp")
int xdp_packet_filter(struct xdp_md *ctx) {
void *data_end = (void *)(long)ctx->data_end;
void *data = (void *)(long)ctx->data;
// Update packet statistics
u32 key = STAT_PACKETS;
u64 *counter = bpf_map_lookup_elem(&xdp_stats, &key);
if (counter) (*counter)++;
// Parse Ethernet header
struct ethhdr *eth = data;
if ((void *)(eth + 1) > data_end)
return XDP_ABORTED;
// Only process IP packets
if (eth->h_proto != bpf_htons(ETH_P_IP))
return XDP_PASS;
// Parse IP header
struct iphdr *ip = (void *)(eth + 1);
if ((void *)(ip + 1) > data_end)
return XDP_ABORTED;
// Update byte statistics
key = STAT_BYTES;
counter = bpf_map_lookup_elem(&xdp_stats, &key);
if (counter) (*counter) += bpf_ntohs(ip->tot_len);
// Process TCP packets
if (ip->protocol == IPPROTO_TCP) {
struct tcphdr *tcp = (void *)(ip + 1);
if ((void *)(tcp + 1) > data_end)
return XDP_ABORTED;
key = STAT_TCP;
counter = bpf_map_lookup_elem(&xdp_stats, &key);
if (counter) (*counter)++;
// Example: Drop packets to specific port
if (tcp->dest == bpf_htons(22)) { // SSH port
key = STAT_DROPPED;
counter = bpf_map_lookup_elem(&xdp_stats, &key);
if (counter) (*counter)++;
return XDP_DROP;
}
}
// Process UDP packets
else if (ip->protocol == IPPROTO_UDP) {
key = STAT_UDP;
counter = bpf_map_lookup_elem(&xdp_stats, &key);
if (counter) (*counter)++;
}
return XDP_PASS;
}
char _license[] SEC("license") = "GPL";
XDP Return Codes¶
- XDP_ABORTED: Error occurred, drop packet
- XDP_DROP: Drop packet intentionally
- XDP_PASS: Continue normal processing
- XDP_TX: Transmit packet out same interface
- XDP_REDIRECT: Redirect to another interface
Go XDP Integration¶
//go:generate go run github.com/cilium/ebpf/cmd/bpf2go -target native xdp ../bpf/xdp_filter.c
func attachXDPProgram(iface string) error {
objs := xdpObjects{}
if err := loadXdpObjects(&objs, nil); err != nil {
return fmt.Errorf("loading XDP objects: %w", err)
}
defer objs.Close()
// Get network interface
ifc, err := net.InterfaceByName(iface)
if err != nil {
return fmt.Errorf("getting interface %s: %w", iface, err)
}
// Attach XDP program
l, err := link.AttachXDP(link.XDPOptions{
Program: objs.XdpPacketFilter,
Interface: ifc.Index,
Flags: link.XDPGenericMode, // Use generic mode for compatibility
})
if err != nil {
return fmt.Errorf("attaching XDP program: %w", err)
}
defer l.Close()
log.Printf("XDP program attached to interface %s", iface)
// Monitor statistics
return monitorXDPStats(objs.XdpStats)
}
func monitorXDPStats(statsMap *ebpf.Map) error {
ticker := time.NewTicker(time.Second)
defer ticker.Stop()
for range ticker.C {
var stats [5]uint64
if err := statsMap.Lookup(uint32(0), &stats[0]); err != nil {
continue
}
fmt.Printf("XDP Stats - Packets: %d, Bytes: %d, TCP: %d, UDP: %d, Dropped: %d\n",
stats[0], stats[1], stats[2], stats[3], stats[4])
}
return nil
}
2. TC (Traffic Control)¶
TC programs can modify, redirect, or drop packets at the ingress/egress points.
#include "common.h"
#include <linux/pkt_cls.h>
SEC("tc")
int tc_classifier(struct __sk_buff *skb) {
// Access packet data
void *data = (void *)(long)skb->data;
void *data_end = (void *)(long)skb->data_end;
// Parse Ethernet header
struct ethhdr *eth = data;
if ((void *)(eth + 1) > data_end)
return TC_ACT_OK;
// Example: Mark all TCP traffic
if (eth->h_proto == bpf_htons(ETH_P_IP)) {
struct iphdr *ip = (void *)(eth + 1);
if ((void *)(ip + 1) > data_end)
return TC_ACT_OK;
if (ip->protocol == IPPROTO_TCP) {
// Mark packet for special handling
skb->mark = 0x1234;
return TC_ACT_OK;
}
}
return TC_ACT_OK;
}
🔒 Security Programs¶
1. LSM (Linux Security Module)¶
LSM hooks allow implementing custom security policies.
#include "common.h"
#include <linux/security.h>
// Track file access attempts
struct file_access_event {
u32 pid;
u32 uid;
char comm[16];
char filename[256];
u32 access_type; // read=1, write=2, execute=4
u8 allowed;
};
struct {
__uint(type, BPF_MAP_TYPE_RINGBUF);
__uint(max_entries, 1 << 24);
} security_events SEC(".maps");
// Whitelist of allowed executables
struct {
__uint(type, BPF_MAP_TYPE_HASH);
__type(key, char[256]);
__type(value, u8);
__uint(max_entries, 1000);
} allowed_executables SEC(".maps");
SEC("lsm/file_open")
int lsm_file_open(struct file *file) {
// Get process information
u32 pid = bpf_get_current_pid_tgid() & 0xFFFFFFFF;
u32 uid = bpf_get_current_uid_gid() & 0xFFFFFFFF;
// Get filename (simplified - real implementation needs path resolution)
char filename[256];
struct dentry *dentry = file->f_path.dentry;
bpf_probe_read_kernel_str(filename, sizeof(filename), dentry->d_name.name);
// Check if this is an executable file being opened
bool is_executable = false;
if (file->f_mode & FMODE_EXEC) {
is_executable = true;
// Check whitelist for executables
u8 *allowed = bpf_map_lookup_elem(&allowed_executables, filename);
if (!allowed) {
// Log security event
struct file_access_event *event =
bpf_ringbuf_reserve(&security_events, sizeof(*event), 0);
if (event) {
event->pid = pid;
event->uid = uid;
event->access_type = 4; // execute
event->allowed = 0;
bpf_get_current_comm(&event->comm, sizeof(event->comm));
__builtin_memcpy(event->filename, filename, sizeof(event->filename));
bpf_ringbuf_submit(event, 0);
}
// Deny execution of non-whitelisted binaries
return -EPERM;
}
}
// Log allowed access
struct file_access_event *event =
bpf_ringbuf_reserve(&security_events, sizeof(*event), 0);
if (event) {
event->pid = pid;
event->uid = uid;
event->access_type = is_executable ? 4 : 1; // execute or read
event->allowed = 1;
bpf_get_current_comm(&event->comm, sizeof(event->comm));
__builtin_memcpy(event->filename, filename, sizeof(event->filename));
bpf_ringbuf_submit(event, 0);
}
return 0; // Allow access
}
char _license[] SEC("license") = "GPL";
2. Seccomp (Secure Computing)¶
Seccomp filters can allow/deny system calls based on arguments.
#include <linux/seccomp.h>
#include <linux/filter.h>
SEC("seccomp")
int seccomp_filter(struct seccomp_data *ctx) {
// Get system call number
u32 syscall = ctx->nr;
// Example: Block dangerous system calls
switch (syscall) {
case __NR_execve:
case __NR_execveat:
// Allow only specific executables
return SECCOMP_RET_TRACE; // Trace for further inspection
case __NR_ptrace:
// Block ptrace completely
return SECCOMP_RET_KILL;
case __NR_open:
case __NR_openat:
// Check file path (simplified)
char __user *filename = (char __user *)ctx->args[1];
char path[256];
if (bpf_probe_read_user_str(path, sizeof(path), filename) > 0) {
// Block access to /etc/passwd
if (__builtin_memcmp(path, "/etc/passwd", 11) == 0) {
return SECCOMP_RET_ERRNO | EPERM;
}
}
break;
}
return SECCOMP_RET_ALLOW;
}
📊 Performance Monitoring Programs¶
1. Perf Event Programs¶
Attach to hardware performance counters and software events.
#include "common.h"
struct perf_sample {
u32 pid;
u32 cpu;
u64 timestamp;
u64 instruction_count;
u64 cache_misses;
};
struct {
__uint(type, BPF_MAP_TYPE_RINGBUF);
__uint(max_entries, 1 << 20);
} perf_samples SEC(".maps");
SEC("perf_event")
int perf_event_handler(struct bpf_perf_event_data *ctx) {
struct perf_sample *sample =
bpf_ringbuf_reserve(&perf_samples, sizeof(*sample), 0);
if (!sample)
return 0;
sample->pid = bpf_get_current_pid_tgid() & 0xFFFFFFFF;
sample->cpu = bpf_get_smp_processor_id();
sample->timestamp = bpf_ktime_get_ns();
// Read performance counters (pseudo-code)
sample->instruction_count = ctx->sample_period;
sample->cache_misses = 0; // Would need additional perf event setup
bpf_ringbuf_submit(sample, 0);
return 0;
}
2. Raw Tracepoint Programs¶
More efficient than regular tracepoints, with direct access to kernel arguments.
SEC("raw_tracepoint/sched_switch")
int raw_tp_sched_switch(struct bpf_raw_tracepoint_args *ctx) {
// Direct access to tracepoint arguments
struct task_struct *prev = (struct task_struct *)ctx->args[1];
struct task_struct *next = (struct task_struct *)ctx->args[2];
// Access task information directly (be careful with kernel versions)
u32 prev_pid, next_pid;
bpf_probe_read_kernel(&prev_pid, sizeof(prev_pid), &prev->pid);
bpf_probe_read_kernel(&next_pid, sizeof(next_pid), &next->pid);
// Process context switch information
struct sched_event *event = bpf_ringbuf_reserve(&events, sizeof(*event), 0);
if (event) {
event->prev_pid = prev_pid;
event->next_pid = next_pid;
event->cpu = bpf_get_smp_processor_id();
event->timestamp = bpf_ktime_get_ns();
bpf_ringbuf_submit(event, 0);
}
return 0;
}
🏗️ Cgroup Programs¶
1. Cgroup Device Filter¶
Control device access for processes in a cgroup.
#include <linux/device_cgroup.h>
SEC("cgroup/dev")
int cgroup_device_filter(struct bpf_cgroup_dev_ctx *ctx) {
// Get device information
u32 major = ctx->major;
u32 minor = ctx->minor;
u32 access_type = ctx->access_type;
// Example: Block access to /dev/kmem (major=1, minor=2)
if (major == 1 && minor == 2) {
return 0; // Deny access
}
// Block write access to all block devices for non-root users
if (ctx->type == BPF_DEVCG_DEV_BLOCK &&
(access_type & BPF_DEVCG_ACC_WRITE)) {
u32 uid = bpf_get_current_uid_gid() & 0xFFFFFFFF;
if (uid != 0) {
return 0; // Deny access
}
}
return 1; // Allow access
}
2. Cgroup Socket Programs¶
Filter and control socket operations for processes in a cgroup.
SEC("cgroup/sock")
int cgroup_sock_filter(struct bpf_sock *sk) {
// Only allow IPv4 sockets
if (sk->family != AF_INET) {
return 0; // Deny
}
// Block connections to specific IP addresses
u32 dst_ip = sk->dst_ip4;
if (dst_ip == bpf_htonl(0x08080808)) { // 8.8.8.8
return 0; // Deny connection to Google DNS
}
// Allow only specific ports for non-root users
u32 uid = bpf_get_current_uid_gid() & 0xFFFFFFFF;
if (uid != 0) {
u16 dst_port = sk->dst_port;
if (dst_port != bpf_htons(80) && dst_port != bpf_htons(443)) {
return 0; // Only allow HTTP/HTTPS for non-root
}
}
return 1; // Allow
}
🔧 Go Integration Examples¶
Loading Different Program Types¶
func attachCustomPrograms() error {
// XDP Program
if err := attachXDPProgram("eth0"); err != nil {
return fmt.Errorf("XDP attachment: %w", err)
}
// TC Program
if err := attachTCProgram("eth0"); err != nil {
return fmt.Errorf("TC attachment: %w", err)
}
// LSM Program
if err := attachLSMProgram(); err != nil {
return fmt.Errorf("LSM attachment: %w", err)
}
// Perf Event Program
if err := attachPerfEventProgram(); err != nil {
return fmt.Errorf("Perf event attachment: %w", err)
}
return nil
}
func attachPerfEventProgram() error {
objs := perfObjects{}
if err := loadPerfObjects(&objs, nil); err != nil {
return err
}
defer objs.Close()
// Attach to CPU cycles perf event
l, err := link.AttachPerfEvent(link.PerfEventOptions{
Program: objs.PerfEventHandler,
Group: unix.PERF_TYPE_HARDWARE,
Config: unix.PERF_COUNT_HW_CPU_CYCLES,
PID: -1, // All processes
CPU: 0, // CPU 0
})
if err != nil {
return err
}
defer l.Close()
return monitorPerfEvents(objs.PerfSamples)
}
📋 Program Type Selection Guide¶
| Use Case | Program Type | Attachment Point | Key Benefits |
|---|---|---|---|
| Packet Filtering | XDP | Network interface | Line-rate processing |
| Traffic Shaping | TC | Network qdisc | Modify/redirect packets |
| Security Policy | LSM | Security hooks | Fine-grained access control |
| Syscall Filtering | Seccomp | Process context | Block dangerous syscalls |
| Performance Analysis | Perf Event | Hardware counters | Low-overhead profiling |
| Container Security | Cgroup | Cgroup hierarchy | Per-container policies |
⚡ Best Practices¶
1. Choose the Right Program Type¶
// ❌ Wrong: Using kprobe for packet processing
SEC("kprobe/netif_rx")
int slow_packet_processing(struct pt_regs *ctx) {
// This adds overhead to packet processing
}
// ✅ Correct: Using XDP for packet processing
SEC("xdp")
int fast_packet_processing(struct xdp_md *ctx) {
// Direct packet access, much faster
}
2. Understand Performance Implications¶
- XDP: Fastest, but limited functionality
- TC: More flexible than XDP, still fast
- Kprobes: Most flexible, but highest overhead
- Tracepoints: Good balance of performance and functionality
3. Handle Errors Gracefully¶
SEC("lsm/file_open")
int secure_file_open(struct file *file) {
// Always provide fallback behavior
if (!file) {
return 0; // Allow if we can't determine file
}
// Validate pointers before dereferencing
if (!file->f_path.dentry) {
return 0; // Allow if path is invalid
}
// Your security logic here
// Default to allow to avoid breaking system
return 0;
}
Custom program types unlock the full power of eBPF for specialized use cases. Choose the right program type for your specific needs and always consider the performance and security implications! 🚀