Custom Program Types¶

Beyond basic tracepoints and kprobes, eBPF supports many specialized program types for different use cases. This guide explores advanced program types and how to implement them.

🎯 eBPF Program Types Overview¶

graph TB
    subgraph "Network Programs"
        A[XDP - Packet Processing]
        B[TC - Traffic Control] 
        C[Socket Filter]
        D[Socket MSG]
    end

    subgraph "Security Programs"
        E[LSM - Security Hooks]
        F[Seccomp - Syscall Filter]
    end

    subgraph "Tracing Programs"
        G[Perf Event]
        H[Raw Tracepoint]
        I[BTF Tracepoint]
    end

    subgraph "Cgroup Programs"
        J[Cgroup SKB]
        K[Cgroup Device]
        L[Cgroup Sysctl]
    end

    style A fill:#e3f2fd
    style E fill:#f3e5f5
    style G fill:#e8f5e8
    style J fill:#fff3e0

🌐 Network Programs¶

1. XDP (eXpress Data Path)¶

XDP programs run at the earliest point in the network stack, providing line-rate packet processing.

Basic XDP Program¶

#include "common.h"
#include <linux/if_ether.h>
#include <linux/ip.h>
#include <linux/tcp.h>
#include <linux/udp.h>

// Statistics map
struct {
    __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
    __type(key, u32);
    __type(value, u64);
    __uint(max_entries, 5);
} xdp_stats SEC(".maps");

enum {
    STAT_PACKETS = 0,
    STAT_BYTES,
    STAT_TCP,
    STAT_UDP,
    STAT_DROPPED,
};

SEC("xdp")
int xdp_packet_filter(struct xdp_md *ctx) {
    void *data_end = (void *)(long)ctx->data_end;
    void *data = (void *)(long)ctx->data;

    // Update packet statistics
    u32 key = STAT_PACKETS;
    u64 *counter = bpf_map_lookup_elem(&xdp_stats, &key);
    if (counter) (*counter)++;

    // Parse Ethernet header
    struct ethhdr *eth = data;
    if ((void *)(eth + 1) > data_end)
        return XDP_ABORTED;

    // Only process IP packets
    if (eth->h_proto != bpf_htons(ETH_P_IP))
        return XDP_PASS;

    // Parse IP header
    struct iphdr *ip = (void *)(eth + 1);
    if ((void *)(ip + 1) > data_end)
        return XDP_ABORTED;

    // Update byte statistics
    key = STAT_BYTES;
    counter = bpf_map_lookup_elem(&xdp_stats, &key);
    if (counter) (*counter) += bpf_ntohs(ip->tot_len);

    // Process TCP packets
    if (ip->protocol == IPPROTO_TCP) {
        struct tcphdr *tcp = (void *)(ip + 1);
        if ((void *)(tcp + 1) > data_end)
            return XDP_ABORTED;

        key = STAT_TCP;
        counter = bpf_map_lookup_elem(&xdp_stats, &key);
        if (counter) (*counter)++;

        // Example: Drop packets to specific port
        if (tcp->dest == bpf_htons(22)) { // SSH port
            key = STAT_DROPPED;
            counter = bpf_map_lookup_elem(&xdp_stats, &key);
            if (counter) (*counter)++;
            return XDP_DROP;
        }
    }

    // Process UDP packets  
    else if (ip->protocol == IPPROTO_UDP) {
        key = STAT_UDP;
        counter = bpf_map_lookup_elem(&xdp_stats, &key);
        if (counter) (*counter)++;
    }

    return XDP_PASS;
}

char _license[] SEC("license") = "GPL";

XDP Return Codes¶

XDP_ABORTED: Error occurred, drop packet
XDP_DROP: Drop packet intentionally
XDP_PASS: Continue normal processing
XDP_TX: Transmit packet out same interface
XDP_REDIRECT: Redirect to another interface

Go XDP Integration¶

//go:generate go run github.com/cilium/ebpf/cmd/bpf2go -target native xdp ../bpf/xdp_filter.c

func attachXDPProgram(iface string) error {
    objs := xdpObjects{}
    if err := loadXdpObjects(&objs, nil); err != nil {
        return fmt.Errorf("loading XDP objects: %w", err)
    }
    defer objs.Close()

    // Get network interface
    ifc, err := net.InterfaceByName(iface)
    if err != nil {
        return fmt.Errorf("getting interface %s: %w", iface, err)
    }

    // Attach XDP program
    l, err := link.AttachXDP(link.XDPOptions{
        Program:   objs.XdpPacketFilter,
        Interface: ifc.Index,
        Flags:     link.XDPGenericMode, // Use generic mode for compatibility
    })
    if err != nil {
        return fmt.Errorf("attaching XDP program: %w", err)
    }
    defer l.Close()

    log.Printf("XDP program attached to interface %s", iface)

    // Monitor statistics
    return monitorXDPStats(objs.XdpStats)
}

func monitorXDPStats(statsMap *ebpf.Map) error {
    ticker := time.NewTicker(time.Second)
    defer ticker.Stop()

    for range ticker.C {
        var stats [5]uint64
        if err := statsMap.Lookup(uint32(0), &stats[0]); err != nil {
            continue
        }

        fmt.Printf("XDP Stats - Packets: %d, Bytes: %d, TCP: %d, UDP: %d, Dropped: %d\n",
            stats[0], stats[1], stats[2], stats[3], stats[4])
    }

    return nil
}

2. TC (Traffic Control)¶

TC programs can modify, redirect, or drop packets at the ingress/egress points.

#include "common.h"
#include <linux/pkt_cls.h>

SEC("tc")
int tc_classifier(struct __sk_buff *skb) {
    // Access packet data
    void *data = (void *)(long)skb->data;
    void *data_end = (void *)(long)skb->data_end;

    // Parse Ethernet header
    struct ethhdr *eth = data;
    if ((void *)(eth + 1) > data_end)
        return TC_ACT_OK;

    // Example: Mark all TCP traffic
    if (eth->h_proto == bpf_htons(ETH_P_IP)) {
        struct iphdr *ip = (void *)(eth + 1);
        if ((void *)(ip + 1) > data_end)
            return TC_ACT_OK;

        if (ip->protocol == IPPROTO_TCP) {
            // Mark packet for special handling
            skb->mark = 0x1234;
            return TC_ACT_OK;
        }
    }

    return TC_ACT_OK;
}

🔒 Security Programs¶

1. LSM (Linux Security Module)¶

LSM hooks allow implementing custom security policies.

#include "common.h"
#include <linux/security.h>

// Track file access attempts
struct file_access_event {
    u32 pid;
    u32 uid;
    char comm[16];
    char filename[256];
    u32 access_type;  // read=1, write=2, execute=4
    u8 allowed;
};

struct {
    __uint(type, BPF_MAP_TYPE_RINGBUF);
    __uint(max_entries, 1 << 24);
} security_events SEC(".maps");

// Whitelist of allowed executables
struct {
    __uint(type, BPF_MAP_TYPE_HASH);
    __type(key, char[256]);
    __type(value, u8);
    __uint(max_entries, 1000);
} allowed_executables SEC(".maps");

SEC("lsm/file_open")
int lsm_file_open(struct file *file) {
    // Get process information
    u32 pid = bpf_get_current_pid_tgid() & 0xFFFFFFFF;
    u32 uid = bpf_get_current_uid_gid() & 0xFFFFFFFF;

    // Get filename (simplified - real implementation needs path resolution)
    char filename[256];
    struct dentry *dentry = file->f_path.dentry;
    bpf_probe_read_kernel_str(filename, sizeof(filename), dentry->d_name.name);

    // Check if this is an executable file being opened
    bool is_executable = false;
    if (file->f_mode & FMODE_EXEC) {
        is_executable = true;

        // Check whitelist for executables
        u8 *allowed = bpf_map_lookup_elem(&allowed_executables, filename);
        if (!allowed) {
            // Log security event
            struct file_access_event *event = 
                bpf_ringbuf_reserve(&security_events, sizeof(*event), 0);
            if (event) {
                event->pid = pid;
                event->uid = uid;
                event->access_type = 4; // execute
                event->allowed = 0;
                bpf_get_current_comm(&event->comm, sizeof(event->comm));
                __builtin_memcpy(event->filename, filename, sizeof(event->filename));
                bpf_ringbuf_submit(event, 0);
            }

            // Deny execution of non-whitelisted binaries
            return -EPERM;
        }
    }

    // Log allowed access
    struct file_access_event *event = 
        bpf_ringbuf_reserve(&security_events, sizeof(*event), 0);
    if (event) {
        event->pid = pid;
        event->uid = uid;
        event->access_type = is_executable ? 4 : 1; // execute or read
        event->allowed = 1;
        bpf_get_current_comm(&event->comm, sizeof(event->comm));
        __builtin_memcpy(event->filename, filename, sizeof(event->filename));
        bpf_ringbuf_submit(event, 0);
    }

    return 0; // Allow access
}

char _license[] SEC("license") = "GPL";

2. Seccomp (Secure Computing)¶

Seccomp filters can allow/deny system calls based on arguments.

#include <linux/seccomp.h>
#include <linux/filter.h>

SEC("seccomp")
int seccomp_filter(struct seccomp_data *ctx) {
    // Get system call number
    u32 syscall = ctx->nr;

    // Example: Block dangerous system calls
    switch (syscall) {
        case __NR_execve:
        case __NR_execveat:
            // Allow only specific executables
            return SECCOMP_RET_TRACE; // Trace for further inspection

        case __NR_ptrace:
            // Block ptrace completely
            return SECCOMP_RET_KILL;

        case __NR_open:
        case __NR_openat:
            // Check file path (simplified)
            char __user *filename = (char __user *)ctx->args[1];
            char path[256];
            if (bpf_probe_read_user_str(path, sizeof(path), filename) > 0) {
                // Block access to /etc/passwd
                if (__builtin_memcmp(path, "/etc/passwd", 11) == 0) {
                    return SECCOMP_RET_ERRNO | EPERM;
                }
            }
            break;
    }

    return SECCOMP_RET_ALLOW;
}

📊 Performance Monitoring Programs¶

1. Perf Event Programs¶

Attach to hardware performance counters and software events.

#include "common.h"

struct perf_sample {
    u32 pid;
    u32 cpu;
    u64 timestamp;
    u64 instruction_count;
    u64 cache_misses;
};

struct {
    __uint(type, BPF_MAP_TYPE_RINGBUF);
    __uint(max_entries, 1 << 20);
} perf_samples SEC(".maps");

SEC("perf_event")
int perf_event_handler(struct bpf_perf_event_data *ctx) {
    struct perf_sample *sample = 
        bpf_ringbuf_reserve(&perf_samples, sizeof(*sample), 0);
    if (!sample)
        return 0;

    sample->pid = bpf_get_current_pid_tgid() & 0xFFFFFFFF;
    sample->cpu = bpf_get_smp_processor_id();
    sample->timestamp = bpf_ktime_get_ns();

    // Read performance counters (pseudo-code)
    sample->instruction_count = ctx->sample_period;
    sample->cache_misses = 0; // Would need additional perf event setup

    bpf_ringbuf_submit(sample, 0);
    return 0;
}

2. Raw Tracepoint Programs¶

More efficient than regular tracepoints, with direct access to kernel arguments.

SEC("raw_tracepoint/sched_switch")
int raw_tp_sched_switch(struct bpf_raw_tracepoint_args *ctx) {
    // Direct access to tracepoint arguments
    struct task_struct *prev = (struct task_struct *)ctx->args[1];
    struct task_struct *next = (struct task_struct *)ctx->args[2];

    // Access task information directly (be careful with kernel versions)
    u32 prev_pid, next_pid;
    bpf_probe_read_kernel(&prev_pid, sizeof(prev_pid), &prev->pid);
    bpf_probe_read_kernel(&next_pid, sizeof(next_pid), &next->pid);

    // Process context switch information
    struct sched_event *event = bpf_ringbuf_reserve(&events, sizeof(*event), 0);
    if (event) {
        event->prev_pid = prev_pid;
        event->next_pid = next_pid;
        event->cpu = bpf_get_smp_processor_id();
        event->timestamp = bpf_ktime_get_ns();
        bpf_ringbuf_submit(event, 0);
    }

    return 0;
}

🏗️ Cgroup Programs¶

1. Cgroup Device Filter¶

Control device access for processes in a cgroup.

#include <linux/device_cgroup.h>

SEC("cgroup/dev")
int cgroup_device_filter(struct bpf_cgroup_dev_ctx *ctx) {
    // Get device information
    u32 major = ctx->major;
    u32 minor = ctx->minor;
    u32 access_type = ctx->access_type;

    // Example: Block access to /dev/kmem (major=1, minor=2)
    if (major == 1 && minor == 2) {
        return 0; // Deny access
    }

    // Block write access to all block devices for non-root users
    if (ctx->type == BPF_DEVCG_DEV_BLOCK && 
        (access_type & BPF_DEVCG_ACC_WRITE)) {
        u32 uid = bpf_get_current_uid_gid() & 0xFFFFFFFF;
        if (uid != 0) {
            return 0; // Deny access
        }
    }

    return 1; // Allow access
}

2. Cgroup Socket Programs¶

Filter and control socket operations for processes in a cgroup.

SEC("cgroup/sock")
int cgroup_sock_filter(struct bpf_sock *sk) {
    // Only allow IPv4 sockets
    if (sk->family != AF_INET) {
        return 0; // Deny
    }

    // Block connections to specific IP addresses
    u32 dst_ip = sk->dst_ip4;
    if (dst_ip == bpf_htonl(0x08080808)) { // 8.8.8.8
        return 0; // Deny connection to Google DNS
    }

    // Allow only specific ports for non-root users
    u32 uid = bpf_get_current_uid_gid() & 0xFFFFFFFF;
    if (uid != 0) {
        u16 dst_port = sk->dst_port;
        if (dst_port != bpf_htons(80) && dst_port != bpf_htons(443)) {
            return 0; // Only allow HTTP/HTTPS for non-root
        }
    }

    return 1; // Allow
}

🔧 Go Integration Examples¶

Loading Different Program Types¶

func attachCustomPrograms() error {
    // XDP Program
    if err := attachXDPProgram("eth0"); err != nil {
        return fmt.Errorf("XDP attachment: %w", err)
    }

    // TC Program
    if err := attachTCProgram("eth0"); err != nil {
        return fmt.Errorf("TC attachment: %w", err)  
    }

    // LSM Program
    if err := attachLSMProgram(); err != nil {
        return fmt.Errorf("LSM attachment: %w", err)
    }

    // Perf Event Program
    if err := attachPerfEventProgram(); err != nil {
        return fmt.Errorf("Perf event attachment: %w", err)
    }

    return nil
}

func attachPerfEventProgram() error {
    objs := perfObjects{}
    if err := loadPerfObjects(&objs, nil); err != nil {
        return err
    }
    defer objs.Close()

    // Attach to CPU cycles perf event
    l, err := link.AttachPerfEvent(link.PerfEventOptions{
        Program: objs.PerfEventHandler,
        Group:   unix.PERF_TYPE_HARDWARE,
        Config:  unix.PERF_COUNT_HW_CPU_CYCLES,
        PID:     -1, // All processes
        CPU:     0,  // CPU 0
    })
    if err != nil {
        return err
    }
    defer l.Close()

    return monitorPerfEvents(objs.PerfSamples)
}

📋 Program Type Selection Guide¶

Use Case	Program Type	Attachment Point	Key Benefits
Packet Filtering	XDP	Network interface	Line-rate processing
Traffic Shaping	TC	Network qdisc	Modify/redirect packets
Security Policy	LSM	Security hooks	Fine-grained access control
Syscall Filtering	Seccomp	Process context	Block dangerous syscalls
Performance Analysis	Perf Event	Hardware counters	Low-overhead profiling
Container Security	Cgroup	Cgroup hierarchy	Per-container policies

⚡ Best Practices¶

1. Choose the Right Program Type¶

// ❌ Wrong: Using kprobe for packet processing
SEC("kprobe/netif_rx")
int slow_packet_processing(struct pt_regs *ctx) {
    // This adds overhead to packet processing
}

// ✅ Correct: Using XDP for packet processing
SEC("xdp")
int fast_packet_processing(struct xdp_md *ctx) {
    // Direct packet access, much faster
}

2. Understand Performance Implications¶

XDP: Fastest, but limited functionality
TC: More flexible than XDP, still fast
Kprobes: Most flexible, but highest overhead
Tracepoints: Good balance of performance and functionality

3. Handle Errors Gracefully¶

SEC("lsm/file_open")
int secure_file_open(struct file *file) {
    // Always provide fallback behavior
    if (!file) {
        return 0; // Allow if we can't determine file
    }

    // Validate pointers before dereferencing
    if (!file->f_path.dentry) {
        return 0; // Allow if path is invalid
    }

    // Your security logic here

    // Default to allow to avoid breaking system
    return 0;
}

Custom program types unlock the full power of eBPF for specialized use cases. Choose the right program type for your specific needs and always consider the performance and security implications! 🚀