操屁眼的视频在线免费看,日本在线综合一区二区,久久在线观看免费视频,欧美日韩精品久久综

新聞資訊

    上一節(jié)介紹了kprobe的基本概念,下面我們將使用幾個具體的例子,看下kprobe在實際使用中有那些應(yīng)用場景。

    kprobe

    內(nèi)核的samples/kprobe目錄下有kprobe相關(guān)的例子,我們以這些例子為基礎(chǔ),簡單修改下。

    查看函數(shù)的入?yún)?/h1>

    我們所有的例子都是探測do_sys_open() 或者_do_fork(),以下是內(nèi)核中的源碼。

    do_sys_open

    struct audit_names;
    struct filename {
     const char  *name; /* pointer to actual string */
     const __user char *uptr; /* original userland pointer */
     struct audit_names *aname;
     int   refcnt;
     const char  iname[];
    };
    
    long do_sys_open(int dfd, const char __user *filename, int flags, umode_t mode)
    {
     struct open_flags op;
     int fd = build_open_flags(flags, mode, &op);
     struct filename *tmp;
    
     if (fd)
      return fd;
    
     tmp = getname(filename);
     if (IS_ERR(tmp))
      return PTR_ERR(tmp);
    
     fd = get_unused_fd_flags(flags);
     if (fd >= 0) {
      struct file *f = do_filp_open(dfd, tmp, &op);
      if (IS_ERR(f)) {
       put_unused_fd(fd);
       fd = PTR_ERR(f);
      } else {
       fsnotify_open(f);
       fd_install(fd, f);
      }
     }
     putname(tmp);
     return fd;
    }
    

    _do_fork

    long _do_fork(unsigned long clone_flags,
           unsigned long stack_start,
           unsigned long stack_size,
           int __user *parent_tidptr,
           int __user *child_tidptr,
           unsigned long tls)
    {
     struct task_struct *p;
     int trace = 0;
     long nr;
    
     /*
      * Determine whether and which event to report to ptracer.  When
      * called from kernel_thread or CLONE_UNTRACED is explicitly
      * requested, no event is reported; otherwise, report if the event
      * for the type of forking is enabled.
      */
     if (!(clone_flags & CLONE_UNTRACED)) {
      if (clone_flags & CLONE_VFORK)
       trace = PTRACE_EVENT_VFORK;
      else if ((clone_flags & CSIGNAL) != SIGCHLD)
       trace = PTRACE_EVENT_CLONE;
      else
       trace = PTRACE_EVENT_FORK;
    
      if (likely(!ptrace_event_enabled(current, trace)))
       trace = 0;
     }
    
     p = copy_process(clone_flags, stack_start, stack_size,
        child_tidptr, NULL, trace, tls, NUMA_NO_NODE);
     /*
      * Do this prior waking up the new thread - the thread pointer
      * might get invalid after that point, if the thread exits quickly.
      */
     if (!IS_ERR(p)) {
      struct completion vfork;
      struct pid *pid;
    
      cpufreq_task_times_alloc(p);
    
      trace_sched_process_fork(current, p);
    
      pid = get_task_pid(p, PIDTYPE_PID);
      nr = pid_vnr(pid);
    
      if (clone_flags & CLONE_PARENT_SETTID)
       put_user(nr, parent_tidptr);
    
      if (clone_flags & CLONE_VFORK) {
       p->vfork_done = &vfork;
       init_completion(&vfork);
       get_task_struct(p);
      }
    
      wake_up_new_task(p);
    
      /* forking complete and child started to run, tell ptracer */
      if (unlikely(trace))
       ptrace_event_pid(trace, pid);
    
      if (clone_flags & CLONE_VFORK) {
       if (!wait_for_vfork_done(p, &vfork))
        ptrace_event_pid(PTRACE_EVENT_VFORK_DONE, pid);
      }
    
      put_pid(pid);
     } else {
      nr = PTR_ERR(p);
     }
     return nr;
    }
    

    實際調(diào)試中經(jīng)常需要調(diào)查函數(shù)使用的變量的值。要在kprobes的偵測器內(nèi)顯示某個函數(shù)的局部變量的值,需要一些技巧,原因是在printk的參數(shù)中無法直接指定變量名,因此必須給偵測器函數(shù)提供一個pt_regs結(jié)構(gòu),其中保存了指定地址的命令執(zhí)行時的寄存器信息。

    當(dāng)然,不同架構(gòu)下該結(jié)構(gòu)的成員變量不盡相同,但用該結(jié)構(gòu)可以顯示變量等更為詳細(xì)的信息。

    ARM64,ARM32,X86的寄存器及其訪問方式可以看文末的目錄

    kprobe_example.c

    /*
     * NOTE: This example is works on x86 and powerpc.
     * Here's a sample kernel module showing the use of kprobes to dump a
     * stack trace and selected registers when _do_fork() is called.
     *
     * For more information on theory of operation of kprobes, see
     * Documentation/kprobes.txt
     *
     * You will see the trace data in /var/log/messages and on the console
     * whenever _do_fork() is invoked to create a new process.
     */
    
    #include <linux/kernel.h>
    #include <linux/module.h>
    #include <linux/kprobes.h>
    #define TRACE_SYMBOL "do_filp_open"
    
    /* For each probe you need to allocate a kprobe structure */
    static struct kprobe kp = {
     .symbol_name = TRACE_SYMBOL,
    };
    /* x86_64中寄存器中參數(shù)的順序: rdi rsi rdx rcx r8 r9*/
    /* aarch64: x0-x7 對應(yīng)參數(shù) */
    /* kprobe pre_handler: called just before the probed instruction is executed */
    static int handler_pre(struct kprobe *p, struct pt_regs *regs)
    {
     int dfd = -1;
     struct filename *filename = NULL;
    #ifdef CONFIG_X86
     dfd = regs->di;
        filename = (struct filename *) regs->si;
    #endif
    
    #ifdef CONFIG_ARM64
       dfd = regs->regs[0];
        filename = (struct filename *) regs->regs[1];
    #endif
    
      if (filename && !(strcmp(filename->name, "testfile")))
            printk(KERN_INFO "handler_pre:%s: dfd=%d, name=%s\n", p->symbol_name, dfd, filename->name);
    
     return 0;
    }
    
    /* kprobe post_handler: called after the probed instruction is executed */
    static void handler_post(struct kprobe *p, struct pt_regs *regs,
        unsigned long flags)
    {
     //printk(KERN_INFO "handler_post\n");
    }
    
    /*
     * fault_handler: this is called if an exception is generated for any
     * instruction within the pre- or post-handler, or when Kprobes
     * single-steps the probed instruction.
     */
    static int handler_fault(struct kprobe *p, struct pt_regs *regs, int trapnr)
    {
     /*printk(KERN_INFO "fault_handler: p->addr = 0x%p, trap #%dn",
      p->addr, trapnr);*/
     /* Return 0 because we don't handle the fault. */
     return 0;
    }
    
    static int __init kprobe_init(void)
    {
     int ret;
     kp.pre_handler = handler_pre;
     kp.post_handler = handler_post;
     kp.fault_handler = handler_fault;
    
     ret = register_kprobe(&kp);
     if (ret < 0) {
      printk(KERN_INFO "register_kprobe failed, returned %d\n", ret);
      return ret;
     }
     printk(KERN_INFO "Planted kprobe at %p\n", kp.addr);
     return 0;
    }
    
    static void __exit kprobe_exit(void)
    {
     unregister_kprobe(&kp);
     printk(KERN_INFO "kprobe at %p unregistered\n", kp.addr);
    }
    
    module_init(kprobe_init)
    module_exit(kprobe_exit)
    MODULE_LICENSE("GPL");
    
    

    我們以內(nèi)核目錄下的例程做一個簡單修改,探測do_filp_open函數(shù),當(dāng)打開testfile文件時,自動打印出文件的路徑。

    為了減少無效信息的打印,我們將handler_post,handler_fault直接注釋掉。

    當(dāng)探測點do_filp_open命中時,Kprobes調(diào)用handler_pre。在handler_pre根據(jù)struct filename *pathname來獲得文件的名字。

    在x86_64架構(gòu)中,函數(shù)的參數(shù)從左到右分別保存在rdi、rsi、rdx、rcx、r8、r9中,因此查看rdi和rsi就能得到第1個、第2個參數(shù)的值。

    同理,在ARM64架構(gòu)中, 函數(shù)的參數(shù)1~參數(shù)8分別保存到 X0~X7 寄存器中 ,剩下的參數(shù)從右往左依次入棧。因此,X0和X1分別存放dfd, pathname的值。

    makefile

    CROSS_COMPILE:=aarch64-linux-gnu-
    ARCH:= arm64
    CC:= $(CROSS_COMPILE)gcc
    LD:= $(CROSS_COMPILE)ld
    
    PWD:= $(shell pwd)
    obj-m := kprobe_example.o jprobe_example.o  kretprobe_example.o
    
    KERNELDIR:=/home/zhongyi/code/rk3399_linux_release_v2.5.1_20210301/kernel
    
    all:
            make -C  $(KERNELDIR) M=$(PWD)  modules ARCH=$(ARCH)
    clean:
            rm -f *.o
            rm -f *.symvers
            rm -f *.order
            rm -f *.ko
            rm -f *.mod.c
    

    執(zhí)行make編譯后,在開發(fā)板上將驅(qū)動加載后,手動打開testfile文件。

    insmod kprobe_example.ko
    vim testfile
    rmmod kprobe_example.ko
    dmesg
    

    使用dmesg可以看到成功輸出文件名和dfd。

    [  307.572314] Planted kprobe at ffffff80081fdf84
    [  311.997767] handler_pre:do_filp_open: dfd=-100, name=testfile
    [  312.034774] handler_pre:do_filp_open: dfd=-100, name=testfile
    [  347.969572] kprobe at ffffff80081fdf84 unregistered
    

    顯示棧跟蹤

    使用kprobes的另一個有效的調(diào)試方法,就是顯示棧跟蹤。

    我們只需要在handler_pre中調(diào)用dump_stack();即可。

    /* x86_64中寄存器中參數(shù)的順序: rdi rsi rdx rcx r8 r9*/
    /* aarch64: x0-x7 對應(yīng)參數(shù) */
    /* kprobe pre_handler: called just before the probed instruction is executed */
    static int handler_pre(struct kprobe *p, struct pt_regs *regs)
    {
     dump_stack();
     return 0;
    }
    
    

    編譯加載

    insmod kprobe_example.ko
    rmmod kprobe_example.ko
    dmesg
    

    成功打印出棧的信息。

    [  451.620803] CPU: 4 PID: 1299 Comm: rmmod Tainted: G           O    4.4.194+ #18
    [  451.620809] Hardware name: Firefly-RK3399 Board (Linux Opensource) (DT)
    [  451.620813] Call trace:
    [  451.620820] [<ffffff8008088410>] dump_backtrace+0x0/0x220
    [  451.620828] [<ffffff8008088654>] show_stack+0x24/0x30
    [  451.620834] [<ffffff80084f842c>] dump_stack+0x94/0xbc
    [  451.620842] [<ffffff8000f22048>] handler_pre+0x14/0x24 [kprobe_example]
    [  451.620848] [<ffffff8008efd824>] kprobe_breakpoint_handler+0x100/0x14c
    [  451.620855] [<ffffff8008084128>] brk_handler+0x54/0x80
    [  451.620860] [<ffffff8008080b0c>] do_debug_exception+0x58/0xc0
    [  451.620866] Exception stack(0xffffffc0f2ef7c40 to 0xffffffc0f2ef7d70)
    [  451.620879] 7c40: ffffffc0ef782000 0000008000000000 ffffffc0f2ef7e20 ffffff80081fdf84
    [  451.620886] 7c60: 0000000060000145 ffffff8008efc228 ffffffc0ceff2a50 ffffffc0ee7d2988
    [  451.620892] 7c80: ffffffc0f2ef7ca0 ffffff80081c0dc8 ffffffc0f0582e70 00e80000e95f3f53
    [  451.620898] 7ca0: ffffffc0f2ef7d70 ffffff8008efe3e8 ffffffc0f2ef7ec0 0000005583d31928
    [  451.620905] 7cc0: 0000000000000055 0000000092000047 ffffffc0ceec5100 ffffffc0dccbd500
    [  451.620911] 7ce0: 0000000000000024 ffffffc0dccbd580 00000000ffffff9c ffffffc0ef782000
    [  451.620917] 7d00: ffffffc0f2ef7e78 0000000000000000 0000000000000000 0000000000000003
    [  451.620923] 7d20: ffffffc0dcfc9a80 0000007fd94380e8 0000000000000000 fefefefefefefeff
    [  451.620929] 7d40: 0000000000000001 0000007fd9437db8 0000000000000000 0000000000000000
    [  451.620934] 7d60: 0000000000000000 000000007fffffde
    [  451.620940] [<ffffff8008082668>] el1_dbg+0x18/0x7c
    [  451.620947] [<ffffff80081ed9a4>] SyS_openat+0x3c/0x4c
    [  451.620953] [<ffffff8008082f70>] el0_svc_naked+0x24/0x28
    [  451.630032] kprobe at ffffff80081fdf84 unregistered
    

    任意位置通過變量名獲取信息

    kprobes擁有更加強(qiáng)大的功能,那就是它能在內(nèi)核的任意地址插入偵測器。此外,偵測器可以在任意地址的指令執(zhí)行之前或之后執(zhí)行,或者前后都執(zhí)行。

    因此,應(yīng)當(dāng)觀察匯編代碼,找到源代碼中想要調(diào)查的位置對應(yīng)于編譯后的二進(jìn)制文件中的什么地址,并調(diào)查希望顯示的變量保存在哪個寄存器、哪個內(nèi)存地址。

    通常,我們希望在函數(shù)執(zhí)行的過程中變量,即打印一些流程中的東西,而不是函數(shù)本身被調(diào)用,此時我們不能簡單設(shè)置 kprobe->symbol_name 函數(shù)名字 ,假設(shè)我們期望獲取 _do_fork函數(shù)變量 nr 的值:

    將vmlinux進(jìn)行反匯編,找出_do_fork的地址。

    aarch64-linux-gnu-objdump -s -d vmlinux > vmlinux.asm
    

    _do_fork 反匯編如下所示,地址為ffffff80080ba83c

    ffffff80080ba83c <_do_fork>:
    ffffff80080ba83c:       a9b97bfd        stp     x29, x30, [sp, #-112]!
    ffffff80080ba840:       910003fd        mov     x29, sp
    ffffff80080ba844:       a90153f3        stp     x19, x20, [sp, #16]
    ffffff80080ba848:       a9025bf5        stp     x21, x22, [sp, #32]
    ffffff80080ba84c:       a90363f7        stp     x23, x24, [sp, #48]
    ffffff80080ba850:       aa0003f5        mov     x21, x0
    ffffff80080ba854:       aa0103f3        mov     x19, x1
    ffffff80080ba858:       aa0203f6        mov     x22, x2
    ffffff80080ba85c:       aa0303f7        mov     x23, x3
    ffffff80080ba860:       aa0403f8        mov     x24, x4
    ffffff80080ba864:       aa1e03e0        mov     x0, x30
    ffffff80080ba868:       97ff4e8a        bl      ffffff800808e290 <_mcount>
    ffffff80080ba86c:       37b814f5        tbnz    w21, #23, ffffff80080bab08 <_do_fork+0x2cc>
    ffffff80080ba870:       37701495        tbnz    w21, #14, ffffff80080bab00 <_do_fork+0x2c4>
    ffffff80080ba874:       92401ea0        and     x0, x21, #0xff
    ffffff80080ba878:       52800074        mov     w20, #0x3                       // #3
    ffffff80080ba87c:       f100441f        cmp     x0, #0x11
    ffffff80080ba880:       1a9f1694        csinc   w20, w20, wzr, ne  // ne = any
    ffffff80080ba884:       11000e81        add     w1, w20, #0x3
    ............................
    ffffff80080ba91c:       b5000fb6        cbnz    x22, ffffff80080bab10 <_do_fork+0x2d4>
    ffffff80080ba920:       52800001        mov     w1, #0x0                        // #0
    ffffff80080ba924:       aa1303e0        mov     x0, x19
    ffffff80080ba928:       94006a17        bl      ffffff80080d5184 <get_task_pid>
    ffffff80080ba92c:       aa0003f6        mov     x22, x0
    ffffff80080ba930:       94006a85        bl      ffffff80080d5344 pid_vnr>
    ffffff80080ba934:       93407c18        sxtw    x24, w0
    ffffff80080ba938:       36a00195        tbz     w21, #20, ffffff80080ba968 <_do_fork+0x12c>
    ffffff80080ba93c:       d5384101        mrs     x1, sp_el0
    ffffff80080ba940:       f9400422        ldr     x2, [x1, #8]
    ffffff80080ba944:       aa1703e1        mov     x1, x23
    ffffff80080ba948:       b1001021        adds    x1, x1, #0x4
    
    

    nr 變量 是 函數(shù)pid_vnr的返回值(也是子進(jìn)程的pid) ,根據(jù)ARM調(diào)用規(guī)范,調(diào)用完成pid_vnr()后,寄存器x0存放的就是其函數(shù)返回值。

    參考:ARM64調(diào)用標(biāo)準(zhǔn) https://blog.51cto.com/u_15333820/3452605

    通過反匯編可以知道,pid_vnrffffff80080ba930地址處被調(diào)用,因此,偵測器的插入地址就是在ffffff80080ba930之后,并且x0被改變之前。只要符合這兩個條件,放在哪里都無所謂。

    因此,我們將kprobe的點設(shè)置為ffffff80080ba934,然后獲取 x0,就能獲取變量nr的值。

    .offset 是探測點相對于_do_fork的偏移,在注冊時指定。我們這里的 offset = ffffff80080ba934 - ffffff80080ba83c = F8

    另外,反匯編能力就是多看匯編以及找到幾個關(guān)鍵點(例如常量,跳轉(zhuǎn)語句)就能定位到匯編對應(yīng)的源碼了,這里不再展開了。

    /*
     * NOTE: This example is works on x86 and powerpc.
     * Here's a sample kernel module showing the use of kprobes to dump a
     * stack trace and selected registers when _do_fork() is called.
     *
     * For more information on theory of operation of kprobes, see
     * Documentation/kprobes.txt
     *
     * You will see the trace data in /var/log/messages and on the console
     * whenever _do_fork() is invoked to create a new process.
     */
    
    #include <linux/kernel.h>
    #include <linux/module.h>
    #include <linux/kprobes.h>
    
    /* For each probe you need to allocate a kprobe structure */
    static struct kprobe kp = {
     .symbol_name = "_do_fork",
        .offset = 0xF8,
    };
    
    /* kprobe pre_handler: called just before the probed instruction is executed */
    static int handler_pre(struct kprobe *p, struct pt_regs *regs)
    {
    #ifdef CONFIG_X86
     printk(KERN_INFO "pre_handler: p->addr = 0x%p, ip = %lx,"
       " flags = 0x%lx,rax = 0x%lx\n",
      p->addr, regs->ip, regs->flags,regs->ax);
    #endif
    
    #ifdef CONFIG_ARM64
     pr_info("<%s> pre_handler: p->addr = 0x%p, pc = 0x%lx,"
       " pstate = 0x%lx,x0 = 0x%lx\n",
      p->symbol_name, p->addr, (long)regs->pc, (long)regs->pstate,(long)regs->regs[0]);
    #endif
    
     /* A dump_stack() here will give a stack backtrace */
     return 0;
    }
    
    /* kprobe post_handler: called after the probed instruction is executed */
    static void handler_post(struct kprobe *p, struct pt_regs *regs,
        unsigned long flags)
    {
    #ifdef CONFIG_X86
     printk(KERN_INFO "post_handler: p->addr = 0x%p, flags = 0x%lx\n",
      p->addr, regs->flags);
    #endif
    
    #ifdef CONFIG_ARM64
     pr_info("<%s> post_handler: p->addr = 0x%p, pstate = 0x%lx\n",
      p->symbol_name, p->addr, (long)regs->pstate);
    #endif
    }
    
    /*
     * fault_handler: this is called if an exception is generated for any
     * instruction within the pre- or post-handler, or when Kprobes
     * single-steps the probed instruction.
     */
    static int handler_fault(struct kprobe *p, struct pt_regs *regs, int trapnr)
    {
     printk(KERN_INFO "fault_handler: p->addr = 0x%p, trap #%dn",
      p->addr, trapnr);
     /* Return 0 because we don't handle the fault. */
     return 0;
    }
    
    static int __init kprobe_init(void)
    {
     int ret;
     kp.pre_handler = handler_pre;
     kp.post_handler = handler_post;
     kp.fault_handler = handler_fault;
    
     ret = register_kprobe(&kp);
     if (ret < 0) {
      printk(KERN_INFO "register_kprobe failed, returned %d\n", ret);
      return ret;
     }
     printk(KERN_INFO "Planted kprobe at %p\n", kp.addr);
     return 0;
    }
    
    static void __exit kprobe_exit(void)
    {
     unregister_kprobe(&kp);
     printk(KERN_INFO "kprobe at %p unregistered\n", kp.addr);
    }
    
    module_init(kprobe_init)
    module_exit(kprobe_exit)
    MODULE_LICENSE("GPL");
    
    
    insmod kprobe_example.ko
    rmmod kprobe_example.ko
    dmesg
    

    編譯加載后,成功打印出rax的值。

    [  245.080636] pre_handler: p->addr = 0x0000000050a6c3dd, ip = ffffffffa5ca0009, flags = 0x246,rax = 0x2
    [  245.080640] post_handler: p->addr = 0x0000000050a6c3dd, flags = 0x246
    [  245.080936] pre_handler: p->addr = 0x0000000050a6c3dd, ip = ffffffffa5ca0009, flags = 0x246,rax = 0x2
    [  245.080938] post_handler: p->addr = 0x0000000050a6c3dd, flags = 0x246
    [  245.457340] pre_handler: p->addr = 0x0000000050a6c3dd, ip = ffffffffa5ca0009, flags = 0x246,rax = 0x2
    [  245.457345] post_handler: p->addr = 0x0000000050a6c3dd, flags = 0x246
    [  245.457643] pre_handler: p->addr = 0x0000000050a6c3dd, ip = ffffffffa5ca0009, flags = 0x246,rax = 0x2
    [  245.457645] post_handler: p->addr = 0x0000000050a6c3dd, flags = 0x246
    [  245.719208] pre_handler: p->addr = 0x0000000050a6c3dd, ip = ffffffffa5ca0009, flags = 0x246,rax = 0x2
    [  245.719213] post_handler: p->addr = 0x0000000050a6c3dd, flags = 0x246
    [  245.719505] pre_handler: p->addr = 0x0000000050a6c3dd, ip = ffffffffa5ca0009, flags = 0x246,rax = 0x2
    [  245.719507] post_handler: p->addr = 0x0000000050a6c3dd, flags = 0x246
    [  245.820761] pre_handler: p->addr = 0x0000000050a6c3dd, ip = ffffffffa5ca0009, flags = 0x246,rax = 0x2
    [  245.820765] post_handler: p->addr = 0x0000000050a6c3dd, flags = 0x246
    [  245.821061] pre_handler: p->addr = 0x0000000050a6c3dd, ip = ffffffffa5ca0009, flags = 0x246,rax = 0x2
    [  245.821063] post_handler: p->addr = 0x0000000050a6c3dd, flags = 0x246
    [  246.092572] pre_handler: p->addr = 0x0000000050a6c3dd, ip = ffffffffa5ca0009, flags = 0x246,rax = 0x2
    [  246.092577] post_handler: p->addr = 0x0000000050a6c3dd, flags = 0x246
    [  246.095863] pre_handler: p->addr = 0x0000000050a6c3dd, ip = ffffffffa5ca0009, flags = 0x246,rax = 0x2
    [  246.095867] post_handler: p->addr = 0x0000000050a6c3dd, flags = 0x246
    [  246.126196] kprobe at 0000000050a6c3dd unregistered
    

    jprobe

    與kprobes相比,jprobes能更容易地獲取傳給函數(shù)的參數(shù)。有幾點需要注意:

    1. 處理程序應(yīng)該有與被探測函數(shù)相同的參數(shù)列表和返回類型;
    2. 返回之前,必須調(diào)用jprobe_return()(處理程序?qū)嶋H上從未返回,因為jprobe_return()將控制權(quán)返回給Kprobes) 。

    查看函數(shù)的參數(shù)

    /*
     * Here's a sample kernel module showing the use of jprobes to dump
     * the arguments of _do_fork().
     *
     * For more information on theory of operation of jprobes, see
     * Documentation/kprobes.txt
     *
     * Build and insert the kernel module as done in the kprobe example.
     * You will see the trace data in /var/log/messages and on the
     * console whenever _do_fork() is invoked to create a new process.
     * (Some messages may be suppressed if syslogd is configured to
     * eliminate duplicate messages.)
     */
    
    #include <linux/kernel.h>
    #include <linux/module.h>
    #include <linux/kprobes.h>
    
    /*
     * Jumper probe for _do_fork.
     * Mirror principle enables access to arguments of the probed routine
     * from the probe handler.
     */
    
    /* Proxy routine having the same arguments as actual _do_fork() routine */
    #define TRACE_SYMBOL "do_filp_open"
    /*與do_filp_open 的參數(shù)完全相同*/
    static struct file * jp_do_filp_open(int dfd, struct filename *pathname,
      const struct open_flags *op)
    {
     if (pathname && !(strcmp(pathname->name, "testfile")))
     printk(KERN_INFO "jprobe: dfd = %d, pathname = %s\n", dfd, pathname->name);
    
     /* Always end with a call to jprobe_return(). */
     jprobe_return();
     return 0;
    }
    
    static struct jprobe my_jprobe = {
     .entry   = jp_do_filp_open,
     .kp = {
      .symbol_name = TRACE_SYMBOL,
     },
    };
    
    static int __init jprobe_init(void)
    {
     int ret;
    
     ret = register_jprobe(&my_jprobe);
     if (ret < 0) {
      printk(KERN_INFO "register_jprobe failed, returned %d\n", ret);
      return -1;
     }
     printk(KERN_INFO "Planted jprobe at %p, handler addr %p\n",
            my_jprobe.kp.addr, my_jprobe.entry);
     return 0;
    }
    
    static void __exit jprobe_exit(void)
    {
     unregister_jprobe(&my_jprobe);
     printk(KERN_INFO "jprobe at %p unregistered\n", my_jprobe.kp.addr);
    }
    
    module_init(jprobe_init)
    module_exit(jprobe_exit)
    MODULE_LICENSE("GPL");
    

    使用kprobes時,必須通過寄存器或棧才能計算出參數(shù)的值。此外,計算方法還依賴于架構(gòu)。

    如果使用jprobes,那么無須了解架構(gòu)的詳細(xì)知識,也能簡單地查看參數(shù)的值。

    編譯加載驅(qū)動程序

    insmod jprobe_example.ko
    vim testfile
    rmmod jprobe_example.ko
    dmesg
    

    成功打印出函數(shù)的參數(shù)

    [  612.670453] jprobe at ffffff80081fdf84 unregistered
    [  867.293765] Planted jprobe at ffffff80081fdf84, handler addr ffffff8000f1a000
    [  871.107502] jprobe: dfd = -100, pathname = testfile
    [  871.147747] jprobe: dfd = -100, pathname = testfile
    [  875.723761] jprobe at ffffff80081fdf84 unregistered
    [  907.706066] Planted jprobe at ffffff80081fdf84, handler addr ffffff8000f22000
    [  911.661891] jprobe: dfd = -100, pathname = testfile
    [  911.694903] jprobe: dfd = -100, pathname = testfile
    [  919.272187] jprobe at ffffff80081fdf84 unregistered
    [ 2296.830613] Planted jprobe at ffffff80081fdf84, handler addr ffffff8000f2a000
    [ 2302.164861] jprobe: dfd = -100, pathname = testfile
    [ 2302.200634] jprobe: dfd = -100, pathname = testfile
    [ 2307.407014] jprobe at ffffff80081fdf84 unregistered
    

    kretprobe

    kretprobe 也是基于kprobe的,相比于kprobe和jprobe,實現(xiàn)相對復(fù)雜。下面我們以內(nèi)核目錄下的例程,簡單分析下。

    kretprobe_example.c

    /*
     * kretprobe_example.c
     *
     * Here's a sample kernel module showing the use of return probes to
     * report the return value and total time taken for probed function
     * to run.
     *
     * usage: insmod kretprobe_example.ko func=<func_name>
     *
     * If no func_name is specified, _do_fork is instrumented
     *
     * For more information on theory of operation of kretprobes, see
     * Documentation/kprobes.txt
     *
     * Build and insert the kernel module as done in the kprobe example.
     * You will see the trace data in /var/log/messages and on the console
     * whenever the probed function returns. (Some messages may be suppressed
     * if syslogd is configured to eliminate duplicate messages.)
     */
    
    #include <linux/kernel.h>
    #include <linux/module.h>
    #include <linux/kprobes.h>
    #include <linux/ktime.h>
    #include <linux/limits.h>
    #include <linux/sched.h>
    
    static char func_name[NAME_MAX] = "do_sys_open";
    module_param_string(func, func_name, NAME_MAX, S_IRUGO);
    MODULE_PARM_DESC(func, "Function to kretprobe; this module will report the"
       " function's execution time");
    
    /* per-instance private data */
    struct my_data {
     ktime_t entry_stamp;
    };
    
    /* Here we use the entry_hanlder to timestamp function entry */
    static int entry_handler(struct kretprobe_instance *ri, struct pt_regs *regs)
    {
     struct my_data *data;
    
     if (!current->mm)
      return 1; /* Skip kernel threads */
    
     data = (struct my_data *)ri->data;
     data->entry_stamp = ktime_get();
     return 0;
    }
    
    /*
     * Return-probe handler: Log the return value and duration. Duration may turn
     * out to be zero consistently, depending upon the granularity of time
     * accounting on the platform.
     */
    static int ret_handler(struct kretprobe_instance *ri, struct pt_regs *regs)
    {
     int retval = regs_return_value(regs);
     struct my_data *data = (struct my_data *)ri->data;
     s64 delta;
     ktime_t now;
    
     now = ktime_get();
     delta = ktime_to_ns(ktime_sub(now, data->entry_stamp));
     printk(KERN_INFO "%s returned %d and took %lld ns to execute\n",
       func_name, retval, (long long)delta);
     return 0;
    }
    
    static struct kretprobe my_kretprobe = {
     .handler  = ret_handler,
     .entry_handler  = entry_handler,
     .data_size  = sizeof(struct my_data),
     /* Probe up to 20 instances concurrently. */
     .maxactive  = 20,
    };
    
    static int __init kretprobe_init(void)
    {
     int ret;
    
     my_kretprobe.kp.symbol_name = func_name;
     ret = register_kretprobe(&my_kretprobe);
     if (ret < 0) {
      printk(KERN_INFO "register_kretprobe failed, returned %d\n",
        ret);
      return -1;
     }
     printk(KERN_INFO "Planted return probe at %s: %p\n",
       my_kretprobe.kp.symbol_name, my_kretprobe.kp.addr);
     return 0;
    }
    
    static void __exit kretprobe_exit(void)
    {
     unregister_kretprobe(&my_kretprobe);
     printk(KERN_INFO "kretprobe at %p unregistered\n",
       my_kretprobe.kp.addr);
    
     /* nmissed > 0 suggests that maxactive was set too low. */
     printk(KERN_INFO "Missed probing %d instances of %s\n",
      my_kretprobe.nmissed, my_kretprobe.kp.symbol_name);
    }
    
    module_init(kretprobe_init)
    module_exit(kretprobe_exit)
    MODULE_LICENSE("GPL");
    

    struct kretprobe

    /*
     * Function-return probe -
     * Note:
     * User needs to provide a handler function, and initialize maxactive.
     * maxactive - The maximum number of instances of the probed function that
     * can be active concurrently.
     * nmissed - tracks the number of times the probed function's return was
     * ignored, due to maxactive being too low.
     *
     */
    struct kretprobe {
     struct kprobe kp;
     kretprobe_handler_t handler;
     kretprobe_handler_t entry_handler;
     int maxactive;
     int nmissed;
     size_t data_size;
     struct hlist_head free_instances;
     raw_spinlock_t lock;
    };
    
    typedef int (*kretprobe_handler_t) (struct kretprobe_instance *,
            struct pt_regs *);
    
    
    
    1. 其中我們可以看到 struct kretprobe 結(jié)構(gòu)體中 有struct kprobe成員(kretprobe時基于 kprobe實現(xiàn)的)。 handler:用戶自定義回調(diào)函數(shù),被探測函數(shù)返回后被調(diào)用,一般在這個函數(shù)中獲取被探測函數(shù)的返回值。
    2. entry_handler:用戶自定義回調(diào)函數(shù),這是Kretprobes 提供了一個可選的用戶指定的處理程序,它在函數(shù)入口上運(yùn)行。 每當(dāng) kretprobe 放置在函數(shù)入口處的 kprobe 被命中時,都會調(diào)用用戶定義的 entry_handler,如果有的話。 如果 entry_handler 返回 0(成功),則保證在函數(shù)返回時調(diào)用相應(yīng)的返回處理程序。 如果 entry_handler 返回非零錯誤,則 Kprobes 將返回地址保持原樣,并且 kretprobe 對該特定函數(shù)實例沒有進(jìn)一步的影響。
    3. maxactive:被探測函數(shù)可以同時活動的最大實例數(shù)。來指定可以同時探測多少個指定函數(shù)的實例。 register_kretprobe() 預(yù)分配指定數(shù)量的 kretprobe_instance 對象。
    4. nmissed:跟蹤被探測函數(shù)的返回被忽略的次數(shù)(maxactive設(shè)置的過低)。
    5. data_size:表示kretprobe私有數(shù)據(jù)的大小,在注冊kretprobe時會根據(jù)該大小預(yù)留空間。
    6. free_instances :表示空閑的kretprobe運(yùn)行實例鏈表,它鏈接了本kretprobe的空閑實例struct kretprobe_instance結(jié)構(gòu)體表示。

    struct kretprobe_instance

    struct kretprobe_instance {
     struct hlist_node hlist;
     struct kretprobe *rp;
     kprobe_opcode_t *ret_addr;
     struct task_struct *task;
     char data[0];
    };
    
    1. 這個結(jié)構(gòu)體表示kretprobe的運(yùn)行實例,前文說過被探測函數(shù)在跟蹤期間可能存在并發(fā)執(zhí)行的現(xiàn)象,因此kretprobe使用一個kretprobe_instance來跟蹤一個執(zhí)行流,支持的上限為maxactive。在沒有觸發(fā)探測時,所有的kretprobe_instance實例都保存在free_instances表中,每當(dāng)有執(zhí)行流觸發(fā)一次kretprobe探測,都會從該表中取出一個空閑的kretprobe_instance實例用來跟蹤。
    2. kretprobe_instance結(jié)構(gòu)提中的rp指針指向所屬的kretprobe;
    3. ret_addr用于保存原始被探測函數(shù)的返回地址(后文會看到被探測函數(shù)返回地址會被暫時替換);
    4. task用于綁定其跟蹤的進(jìn)程;
    5. data保存用戶使用的kretprobe私有數(shù)據(jù),它會在整個kretprobe探測運(yùn)行期間在entry_handlerhandler回調(diào)函數(shù)之間進(jìn)行傳遞(一般用于實現(xiàn)統(tǒng)計被探測函數(shù)的執(zhí)行耗時)。

    register_kretprobe

    kretprobe探測點的blackpoint,用來表示不支持kretprobe探測的函數(shù)的信息。name表示該函數(shù)名,addr表示該函數(shù)的地址。

    struct kretprobe_blackpoint {
     const char *name;
     void *addr;
    };
    1234
    

    blackpoint與架構(gòu)相關(guān),x86架構(gòu)不支持的kretprobe探測點如下:

    // arch/x86/kernel/kprobes/core.c
    // 不支持kretprobe探測的函數(shù),從blacklist這個名字中我們也知道其含義了。
    struct kretprobe_blackpoint kretprobe_blacklist[] = {
     {"__switch_to", }, /* This function switches only current task, but
             doesn't switch kernel stack.*/
     {NULL, NULL} /* Terminator */
    };
    
    const int kretprobe_blacklist_size = ARRAY_SIZE(kretprobe_blacklist);
    123456789
    

    函數(shù)的開頭首先處理 kretprobe_blacklis t,如果指定的被探測函數(shù)在這個blacklist中就直接返回EINVAL,表示不支持探測,在x86架構(gòu)中是__switch_to 這個函數(shù),表示這個函數(shù)不能被kretprobe。

    int register_kretprobe(struct kretprobe *rp)
    {
     int ret = 0;
     struct kretprobe_instance *inst;
     int i;
     void *addr;
    
     if (kretprobe_blacklist_size) {
      addr = kprobe_addr(&rp->kp);
      if (IS_ERR(addr))
       return PTR_ERR(addr);
      //如果kretprobe到kretprobe_blacklist中函數(shù),則返回EINVAL
      for (i = 0; kretprobe_blacklist[i].name != NULL; i++) {
       if (kretprobe_blacklist[i].addr == addr)
        return -EINVAL;
      }
     }
    
     //內(nèi)核設(shè)置回調(diào)函數(shù) pre_handler_kretprobe 。
     //與kprobe不同的是:kretprobe不支持用戶定義pre_handler和post_handler等回調(diào)函數(shù)。
     
     rp->kp.pre_handler = pre_handler_kretprobe;
     rp->kp.post_handler = NULL;
     rp->kp.fault_handler = NULL;
     rp->kp.break_handler = NULL;
    
     /* Pre-allocate memory for max kretprobe instances */
     if (rp->maxactive <= 0) {
    #ifdef CONFIG_PREEMPT
      rp->maxactive = max_t(unsigned int, 10, 2*num_possible_cpus());
    #else
      rp->maxactive = num_possible_cpus();
    #endif
     }
     raw_spin_lock_init(&rp->lock);
     INIT_HLIST_HEAD(&rp->free_instances);
     //根據(jù)maxactive值分配 struct kretprobe_instance 內(nèi)存空間
     for (i = 0; i < rp->maxactive; i++) {
      inst = kmalloc(sizeof(struct kretprobe_instance) +
              rp->data_size, GFP_KERNEL);
      if (inst == NULL) {
       free_rp_inst(rp);
       return -ENOMEM;
      }
      INIT_HLIST_NODE(&inst->hlist);
      hlist_add_head(&inst->hlist, &rp->free_instances);
     }
    
     rp->nmissed = 0;
     /* Establish function entry probe point */
     //注冊kprobe探測點
     ret = register_kprobe(&rp->kp);
     if (ret != 0)
      free_rp_inst(rp);
     return ret;
    }
    EXPORT_SYMBOL_GPL(register_kretprobe);
    

    最后調(diào)用 register_kprobe(&rp->kp),注冊kprobe點,可以看出kretprobe也是基于kprobe機(jī)制實現(xiàn)的,kretprobe也是一種特殊形式的kprobe。

    kretprobe注冊完成后就默認(rèn)啟動探測。

    pre_handler_kretprobe

    pre_handler_kretprobe這個函數(shù)是內(nèi)核自己定義的,內(nèi)核已經(jīng)指定該回調(diào)函數(shù),不支持用戶自定義。這個 kprobe pre_handler 在每個 kretprobe 中注冊。 當(dāng)探針命中時,它將設(shè)置返回探針。

    #ifdef CONFIG_KRETPROBES
    /*
     * This kprobe pre_handler is registered with every kretprobe. When probe
     * hits it will set up the return probe.
     */
    static int pre_handler_kretprobe(struct kprobe *p, struct pt_regs *regs)
    {
     struct kretprobe *rp = container_of(p, struct kretprobe, kp);
     unsigned long hash, flags = 0;
     struct kretprobe_instance *ri;
    
     /*
      * To avoid deadlocks, prohibit return probing in NMI contexts,
      * just skip the probe and increase the (inexact) 'nmissed'
      * statistical counter, so that the user is informed that
      * something happened:
      */
     if (unlikely(in_nmi())) {
      rp->nmissed++;
      return 0;
     }
    
     /* TODO: consider to only swap the RA after the last pre_handler fired */
     hash = hash_ptr(current, KPROBE_HASH_BITS);
     raw_spin_lock_irqsave(&rp->lock, flags);
     if (!hlist_empty(&rp->free_instances)) {
      ri = hlist_entry(rp->free_instances.first,
        struct kretprobe_instance, hlist);
      hlist_del(&ri->hlist);
      raw_spin_unlock_irqrestore(&rp->lock, flags);
    
      ri->rp = rp;
      ri->task = current;
     (1)
      if (rp->entry_handler && rp->entry_handler(ri, regs)) {
       raw_spin_lock_irqsave(&rp->lock, flags);
       hlist_add_head(&ri->hlist, &rp->free_instances);
       raw_spin_unlock_irqrestore(&rp->lock, flags);
       return 0;
      }
     (2)
      arch_prepare_kretprobe(ri, regs);
    
      /* XXX(hch): why is there no hlist_move_head? */
      INIT_HLIST_NODE(&ri->hlist);
      kretprobe_table_lock(hash, &flags);
      hlist_add_head(&ri->hlist, &kretprobe_inst_table[hash]);
      kretprobe_table_unlock(hash, &flags);
     } else {
      rp->nmissed++;
      raw_spin_unlock_irqrestore(&rp->lock, flags);
     }
     return 0;
    }
    NOKPROBE_SYMBOL(pre_handler_kretprobe);
    
    

    entry_handler

    struct kretprobe *rp
    rp->entry_handler && rp->entry_handler(ri, regs)
    

    entry_handler這個回調(diào)函數(shù)就是用戶自己定義的回調(diào)函數(shù)(可選的用戶指定的處理程序),前面我們已經(jīng)介紹過了,在這里不再介紹。

    /* Here we use the entry_hanlder to timestamp function entry */
    static int entry_handler(struct kretprobe_instance *ri, struct pt_regs *regs)
    {
     struct my_data *data;
    
     //內(nèi)核線程 task->mm == NULL
     if (!current->mm)
      return 1; /* Skip kernel threads */
    
     data = (struct my_data *)ri->data;
     data->entry_stamp = ktime_get();
     return 0;
    }
    

    arch_prepare_kretprobe

    arch_prepare_kretprobe(ri, regs)該函數(shù)架構(gòu)相關(guān),struct kretprobe_instance結(jié)構(gòu)體 的 ret_addr 成員用于保存并替換regs中的返回地址。返回地址被替換為kretprobe_trampoline

    x86架構(gòu)

    // arch/x86/kernel/kprobes/core.c
    
    #define stack_addr(regs) ((unsigned long *)kernel_stack_pointer(regs))
    
    // x86_64
    // arch/x86/include/asm/ptrace.h
    static inline unsigned long kernel_stack_pointer(struct pt_regs *regs)
    {
     return regs->sp;
    }
    // arch/x86/kernel/kprobes/core.c
    void arch_prepare_kretprobe(struct kretprobe_instance *ri, struct pt_regs *regs)
    {
     unsigned long *sara = stack_addr(regs);
    
     ri->ret_addr = (kprobe_opcode_t *) *sara;
    
     /* Replace the return addr with trampoline addr */
     *sara = (unsigned long) &kretprobe_trampoline;
    }
    NOKPROBE_SYMBOL(arch_prepare_kretprobe);
    
    //struct kretprobe_instance *ri;
    //ri->ret_addr;
    
    struct kretprobe_instance {
     kprobe_opcode_t *ret_addr;  //用于保存原始被探測函數(shù)的返回地址
    };
    
    

    ARM64架構(gòu)

    // arch/arm64/kernel/probes/kprobes.c
    
    void __kprobes arch_prepare_kretprobe(struct kretprobe_instance *ri,
              struct pt_regs *regs)
    {
     ri->ret_addr = (kprobe_opcode_t *)regs->regs[30];
    
     /* replace return addr (x30) with trampoline */
     regs->regs[30] = (long)&kretprobe_trampoline;
    }
    

    ARM64架構(gòu)中regs->regs[30]是LR(procedure link register)寄存器(X30 :LR)。

    小結(jié)

    kretprobe是基于kprobe實現(xiàn)的,有一個固定的pre_handler回調(diào)函數(shù),在內(nèi)核中實現(xiàn),無需用戶編寫。而在kprobe中pre_handler函數(shù)是提供給用戶的回調(diào)函數(shù)。

    rp->kp.pre_handler = pre_handler_kretprobe;  //內(nèi)核中已經(jīng)實現(xiàn)
    rp->kp.post_handler = NULL;
    rp->kp.fault_handler = NULL;
    rp->kp.break_handler = NULL;
    

    kretprobe提供給用戶的兩個回調(diào)函數(shù):

    kretprobe_handler_t handler;
    kretprobe_handler_t entry_handler; // (可選)
    

    pre_handler回調(diào)函數(shù)會為kretprobe探測函數(shù)執(zhí)行的返回值做準(zhǔn)備工作,其中最主要的就是替換掉正常流程的返回地址,讓被探測函數(shù)在執(zhí)行之后能夠跳轉(zhuǎn)到kretprobe設(shè)計的函數(shù) kretprobe_trampoline中去。

    kretprobe_trampoline

    pre_handler_kretprobe函數(shù)返回后,kprobe流程接著執(zhí)行singlestep流程并返回到正常的執(zhí)行流程,被探測函數(shù)(do_fork)繼續(xù)執(zhí)行,直到它執(zhí)行完畢并返回。

    由于返回地址被替換為kretprobe_trampoline,所以跳轉(zhuǎn)到kretprobe_trampoline執(zhí)行,該函數(shù)架構(gòu)相關(guān)且有嵌入?yún)R編實現(xiàn)。

    該函數(shù)會獲取被探測函數(shù)的寄存器信息并調(diào)用用戶定義的回調(diào)函數(shù)輸出其中的返回值,最后函數(shù)返回正常的執(zhí)行流程。

    static int ret_handler(struct kretprobe_instance *ri, struct pt_regs *regs)
    {
     unsigned long retval = regs_return_value(regs);
     ......
    }
    
    
    static struct kretprobe my_kretprobe = {
     .handler  = ret_handler,
    };
    

    x86架構(gòu)

    (1)

    kretprobe_trampoline
     -->trampoline_handler
    kretprobe_trampoline
    

    (2) kretprobe_trampoline

    // arch/x86/kernel/kprobes/core.c
    
    /*
     * When a retprobed function returns, this code saves registers and
     * calls trampoline_handler() runs, which calls the kretprobe's handler.
     */
    asm(
     ".global kretprobe_trampoline\n"
     ".type kretprobe_trampoline, @function\n"
     "kretprobe_trampoline:\n"
    #ifdef CONFIG_X86_64
     /* We don't bother saving the ss register */
     " pushq %rsp\n"
     " pushfq\n"
     SAVE_REGS_STRING
     " movq %rsp, %rdi\n"
     " call trampoline_handler\n"
     /* Replace saved sp with true return address. */
     " movq %rax, 152(%rsp)\n"
     RESTORE_REGS_STRING
     " popfq\n"
    #else
     " pushf\n"
     SAVE_REGS_STRING
     " movl %esp, %eax\n"
     " call trampoline_handler\n"
     /* Move flags to cs */
     " movl 56(%esp), %edx\n"
     " movl %edx, 52(%esp)\n"
     /* Replace saved flags with true return address. */
     " movl %eax, 56(%esp)\n"
     RESTORE_REGS_STRING
     " popf\n"
    #endif
     " ret\n"
     ".size kretprobe_trampoline, .-kretprobe_trampoline\n"
    );
    NOKPROBE_SYMBOL(kretprobe_trampoline);
    STACK_FRAME_NON_STANDARD(kretprobe_trampoline);
    

    (3) trampoline_handler

    // arch/x86/kernel/kprobes/core.c
    
    /*
     * Called from kretprobe_trampoline
     */
    __visible __used void *trampoline_handler(struct pt_regs *regs)
    {
     struct kretprobe_instance *ri = NULL;
     struct hlist_head *head, empty_rp;
     struct hlist_node *tmp;
     unsigned long flags, orig_ret_address = 0;
     unsigned long trampoline_address = (unsigned long)&kretprobe_trampoline;
     kprobe_opcode_t *correct_ret_addr = NULL;
    
     INIT_HLIST_HEAD(&empty_rp);
     kretprobe_hash_lock(current, &head, &flags);
     /* fixup registers */
    #ifdef CONFIG_X86_64
     regs->cs = __KERNEL_CS;
    #else
     regs->cs = __KERNEL_CS | get_kernel_rpl();
     regs->gs = 0;
    #endif
     regs->ip = trampoline_address;
     regs->orig_ax = ~0UL;
    
     /*
      * It is possible to have multiple instances associated with a given
      * task either because multiple functions in the call path have
      * return probes installed on them, and/or more than one
      * return probe was registered for a target function.
      *
      * We can handle this because:
      *     - instances are always pushed into the head of the list
      *     - when multiple return probes are registered for the same
      *  function, the (chronologically) first instance's ret_addr
      *  will be the real return address, and all the rest will
      *  point to kretprobe_trampoline.
      */
     hlist_for_each_entry_safe(ri, tmp, head, hlist) {
      if (ri->task != current)
       /* another task is sharing our hash bucket */
       continue;
    
      orig_ret_address = (unsigned long)ri->ret_addr;
    
      if (orig_ret_address != trampoline_address)
       /*
        * This is the real return address. Any other
        * instances associated with this task are for
        * other calls deeper on the call stack
        */
       break;
     }
    
     kretprobe_assert(ri, orig_ret_address, trampoline_address);
    
     correct_ret_addr = ri->ret_addr;
     hlist_for_each_entry_safe(ri, tmp, head, hlist) {
      if (ri->task != current)
       /* another task is sharing our hash bucket */
       continue;
    
      orig_ret_address = (unsigned long)ri->ret_addr;
      if (ri->rp && ri->rp->handler) {
       __this_cpu_write(current_kprobe, &ri->rp->kp);
       get_kprobe_ctlblk()->kprobe_status = KPROBE_HIT_ACTIVE;
       ri->ret_addr = correct_ret_addr;
       ri->rp->handler(ri, regs);
       __this_cpu_write(current_kprobe, NULL);
      }
    
      recycle_rp_inst(ri, &empty_rp);
    
      if (orig_ret_address != trampoline_address)
       /*
        * This is the real return address. Any other
        * instances associated with this task are for
        * other calls deeper on the call stack
        */
       break;
     }
    
     kretprobe_hash_unlock(current, &flags);
    
     hlist_for_each_entry_safe(ri, tmp, &empty_rp, hlist) {
      hlist_del(&ri->hlist);
      kfree(ri);
     }
     return (void *)orig_ret_address;
    }
    NOKPROBE_SYMBOL(trampoline_handler);
    

    (4) ri->rp->handler(ri, regs)表示執(zhí)行用戶態(tài)自定義的回調(diào)函數(shù)handler(用來獲取_do_fork函數(shù)的返回值),handler回調(diào)函數(shù)執(zhí)行完畢以后,調(diào)用recycle_rp_inst函數(shù)將當(dāng)前的kretprobe_instance實例從kretprobe_inst_table哈希表釋放,重新鏈入free_instances中,以備后面kretprobe觸發(fā)時使用,另外如果kretprobe已經(jīng)被注銷則將它添加到銷毀表中待銷毀。

    ri->rp->handler(ri, regs);
     ->recycle_rp_inst(ri, &empty_rp);
    12
    void recycle_rp_inst(struct kretprobe_instance *ri,
           struct hlist_head *head)
    {
     struct kretprobe *rp = ri->rp;
    
     /* remove rp inst off the rprobe_inst_table */
     hlist_del(&ri->hlist);
     INIT_HLIST_NODE(&ri->hlist);
     if (likely(rp)) {
      raw_spin_lock(&rp->lock);
      hlist_add_head(&ri->hlist, &rp->free_instances);
      raw_spin_unlock(&rp->lock);
     } else
      /* Unregistering */
      hlist_add_head(&ri->hlist, head);
    }
    NOKPROBE_SYMBOL(recycle_rp_inst);
    

    (5) trampoline_handler函數(shù)執(zhí)行完后,返回被探測函數(shù)的原始返回地址,執(zhí)行流程再次回到kretprobe_trampoline函數(shù)中,將保存的 sp 替換為真實的返回地址。 從rax寄存器中取出原始的返回地址,然后恢復(fù)原始函數(shù)調(diào)用??臻g,最后跳轉(zhuǎn)到原始返回地址執(zhí)行,至此函數(shù)調(diào)用的流程就回歸正常流程了,整個kretprobe探測結(jié)束。

    /* Replace saved sp with true return address. */
     " movq %rax, 152(%rsp)\n"
     RESTORE_REGS_STRING
     " popfq\n"
    1234
    

    ARM64架構(gòu)

    (1)

    kretprobe_trampoline 
     -->trampoline_probe_handler
    kretprobe_trampoline 
    

    (2) kretprobe_trampoline

    // arch/arm64/kernel/probes/kprobes_trampoline.S
    
    ENTRY(kretprobe_trampoline)
     sub sp, sp, #S_FRAME_SIZE
    
     save_all_base_regs
    
     mov x0, sp
     bl trampoline_probe_handler
     /*
      * Replace trampoline address in lr with actual orig_ret_addr return
      * address.
      */
     mov lr, x0
    
     restore_all_base_regs
    
     add sp, sp, #S_FRAME_SIZE
     ret
    
    ENDPROC(kretprobe_trampoline)
    

    (3) trampoline_probe_handler

    // arch/arm64/kernel/probes/kprobes.c
    
    void __kprobes __used *trampoline_probe_handler(struct pt_regs *regs)
    {
     struct kretprobe_instance *ri = NULL;
     struct hlist_head *head, empty_rp;
     struct hlist_node *tmp;
     unsigned long flags, orig_ret_address = 0;
     unsigned long trampoline_address =
      (unsigned long)&kretprobe_trampoline;
     kprobe_opcode_t *correct_ret_addr = NULL;
    
     INIT_HLIST_HEAD(&empty_rp);
     kretprobe_hash_lock(current, &head, &flags);
    
     /*
      * It is possible to have multiple instances associated with a given
      * task either because multiple functions in the call path have
      * return probes installed on them, and/or more than one
      * return probe was registered for a target function.
      *
      * We can handle this because:
      *     - instances are always pushed into the head of the list
      *     - when multiple return probes are registered for the same
      *  function, the (chronologically) first instance's ret_addr
      *  will be the real return address, and all the rest will
      *  point to kretprobe_trampoline.
      */
     hlist_for_each_entry_safe(ri, tmp, head, hlist) {
      if (ri->task != current)
       /* another task is sharing our hash bucket */
       continue;
    
      orig_ret_address = (unsigned long)ri->ret_addr;
    
      if (orig_ret_address != trampoline_address)
       /*
        * This is the real return address. Any other
        * instances associated with this task are for
        * other calls deeper on the call stack
        */
       break;
     }
    
     kretprobe_assert(ri, orig_ret_address, trampoline_address);
    
     correct_ret_addr = ri->ret_addr;
     hlist_for_each_entry_safe(ri, tmp, head, hlist) {
      if (ri->task != current)
       /* another task is sharing our hash bucket */
       continue;
    
      orig_ret_address = (unsigned long)ri->ret_addr;
      if (ri->rp && ri->rp->handler) {
       __this_cpu_write(current_kprobe, &ri->rp->kp);
       get_kprobe_ctlblk()->kprobe_status = KPROBE_HIT_ACTIVE;
       ri->ret_addr = correct_ret_addr;
       ri->rp->handler(ri, regs);
       __this_cpu_write(current_kprobe, NULL);
      }
    
      recycle_rp_inst(ri, &empty_rp);
    
      if (orig_ret_address != trampoline_address)
       /*
        * This is the real return address. Any other
        * instances associated with this task are for
        * other calls deeper on the call stack
        */
       break;
     }
    
     kretprobe_hash_unlock(current, &flags);
    
     hlist_for_each_entry_safe(ri, tmp, &empty_rp, hlist) {
      hlist_del(&ri->hlist);
      kfree(ri);
     }
     return (void *)orig_ret_address;
    }
    

    (4) 將 lr寄存器中的trampoline地址替換為實際的 orig_ret_addr 返回地址。 從x0寄存器中取出原始的返回地址,然后恢復(fù)原始函數(shù)調(diào)用??臻g,最后跳轉(zhuǎn)到原始返回地址執(zhí)行,至此函數(shù)調(diào)用的流程就回歸正常流程了,整個kretprobe探測結(jié)束。

    /*
      * Replace trampoline address in lr with actual orig_ret_addr return
      * address.
      */
     mov lr, x0
    
     restore_all_base_regs
    
     add sp, sp, #S_FRAME_SIZE
     ret
    

    編譯運(yùn)行

    insmod kprobe_example.ko
    vim testfile
    rmmod kprobe_example.ko
    dmesg
    

    成功打印出函數(shù)的執(zhí)行時間

    [ 1056.875938] do_sys_open returned -2 and took 10500 ns to execute
    [ 1057.567400] do_sys_open returned 34 and took 59208 ns to execute
    [ 1058.382932] do_sys_open returned 3 and took 31469101 ns to execute
    [ 1058.567046] do_sys_open returned 34 and took 61250 ns to execute
    [ 1058.975879] do_sys_open returned 3 and took 224084 ns to execute
    [ 1058.975935] do_sys_open returned 3 and took 16917 ns to execute
    [ 1058.976041] do_sys_open returned 3 and took 13417 ns to execute
    [ 1058.976148] do_sys_open returned 3 and took 15167 ns to execute
    [ 1058.976254] do_sys_open returned 3 and took 15750 ns to execute
    [ 1058.976356] do_sys_open returned 3 and took 16042 ns to execute
    [ 1058.978036] do_sys_open returned -2 and took 23041 ns to execute
    [ 1058.978074] do_sys_open returned 3 and took 24500 ns to execute
    [ 1058.978175] do_sys_open returned -2 and took 9334 ns to execute
    [ 1058.978211] do_sys_open returned 3 and took 23333 ns to execute
    [ 1058.978246] do_sys_open returned 3 and took 13417 ns to execute
    [ 1058.978286] do_sys_open returned 3 and took 14583 ns to execute
    [ 1058.989701] kretprobe at ffffff80081ed6c8 unregistered
    [ 1058.989709] Missed probing 0 instances of do_sys_open
    

    Kprobe-based Event Tracing

    這些事件類似于基于tracepoint的事件。與Tracepoint不同,它是基于kprobes(kprobe和kretprobe)的。所以它可以探測任何kprobes可以探測的地方。與基于Tracepoint的事件不同的是,它可以動態(tài)地添加和刪除。

    要啟用這個功能,在編譯內(nèi)核時CONFIG_KPROBE_EVENTS=y

    Event Tracing類似,這不需要通過current_tracer來激活??梢酝ㄟ^/sys/kernel/debug/tracing/kprobe_events添加探測點,并通過/sys/kernel/debug/tracing/events/kprobes/<EVENT>/enable來啟用它。

    你也可以使用/sys/kernel/debug/tracing/dynamic_events,而不是kprobe_events。該接口也將提供對其他動態(tài)事件的統(tǒng)一訪問。

    Synopsis of kprobe_events

    kprobe和內(nèi)核的ftrac結(jié)合使用,需要對內(nèi)核進(jìn)行配置,然后添加探測點、進(jìn)行探測、查看結(jié)果。

    kprobe配置

    CONFIG_KPROBES=y
    CONFIG_OPTPROBES=y
    CONFIG_KPROBES_ON_FTRACE=y
    CONFIG_UPROBES=y
    CONFIG_KRETPROBES=y
    CONFIG_HAVE_KPROBES=y
    CONFIG_HAVE_KRETPROBES=y
    CONFIG_HAVE_OPTPROBES=y
    CONFIG_HAVE_KPROBES_ON_FTRACE=y
    CONFIG_KPROBE_EVENT=y
    

    kprobe trace events使用

    kprobe事件相關(guān)的節(jié)點有如下:

    /sys/kernel/debug/tracing/kprobe_events-----------------------配置kprobe事件屬性,增加事件之后會在kprobes下面生成對應(yīng)目錄。
    /sys/kernel/debug/tracing/kprobe_profile----------------------kprobe事件統(tǒng)計屬性文件。
    /sys/kernel/debug/tracing/kprobes/<GRP>/<EVENT>/enabled-------使能kprobe事件
    /sys/kernel/debug/tracing/kprobes/<GRP>/<EVENT>/filter--------過濾kprobe事件
    /sys/kernel/debug/tracing/kprobes/<GRP>/<EVENT>/format--------查詢kprobe事件顯示格式
    

    kprobe事件配置

    新增一個kprobe事件,通過寫kprobe_events來設(shè)置。

    p[:[GRP/]EVENT] [MOD:]SYM[+offs]|MEMADDR [FETCHARGS]-------------------設(shè)置一個probe探測點
    r[:[GRP/]EVENT] [MOD:]SYM[+0] [FETCHARGS]------------------------------設(shè)置一個return probe探測點
    -:[GRP/]EVENT----------------------------------------------------------刪除一個探測點
    

    細(xì)節(jié)解釋如下:

    GRP        : Group name. If omitted, use "kprobes" for it.------------設(shè)置后會在events/kprobes下創(chuàng)建<GRP>目錄。
     EVENT        : Event name. If omitted, the event name is generated based on SYM+offs or MEMADDR.---指定后在events/kprobes/<GRP>生成<EVENT>目錄。 MOD        : Module name which has given SYM.--------------------------模塊名,一般不設(shè)
     SYM[+offs]    : Symbol+offset where the probe is inserted.-------------被探測函數(shù)名和偏移
     MEMADDR    : Address where the probe is inserted.----------------------指定被探測的內(nèi)存絕對地址
     FETCHARGS    : Arguments. Each probe can have up to 128 args.----------指定要獲取的參數(shù)信息。 %REG        : Fetch register REG---------------------------------------獲取指定寄存器值
     @ADDR        : Fetch memory at ADDR (ADDR should be in kernel)--------獲取指定內(nèi)存地址的值
     @SYM[+|-offs]    : Fetch memory at SYM +|- offs (SYM should be a data symbol)---獲取全局變量的值 $stackN    : Fetch Nth entry of stack (N >= 0)----------------------------------獲取指定??臻g值,即sp寄存器+N后的位置值
     $stack    : Fetch stack address.-----------------------------------------------獲取sp寄存器值
     $retval    : Fetch return value.(*)--------------------------------------------獲取返回值,用戶return kprobe
     $comm        : Fetch current task comm.----------------------------------------獲取對應(yīng)進(jìn)程名稱。
     +|-offs(FETCHARG) : Fetch memory at FETCHARG +|- offs address.(**)------------- NAME=FETCHARG : Set NAME as the argument name of FETCHARG.
     FETCHARG:TYPE : Set TYPE as the type of FETCHARG. Currently, basic types (u8/u16/u32/u64/s8/s16/s32/s64), hexadecimal types
              (x8/x16/x32/x64), "string" and bitfield are supported.----------------設(shè)置參數(shù)的類型,可以支持字符串和比特類型
      (*) only for return probe.
      (**) this is useful for fetching a field of data structures.
    

    執(zhí)行如下兩條命令就會生成目錄/sys/kernel/debug/tracing/events/kprobes/myprobe;第三條命令則可以刪除指定kprobe事件,如果要全部刪除則echo > /sys/kernel/debug/tracing/kprobe_events。

    echo 'p:myprobe do_sys_open dfd=%x0 filename=%x1 flags=%x2 mode=+4($stack)' > /sys/kernel/debug/tracing/kprobe_events
    echo 'r:myretprobe do_sys_open ret=$retval' >> /sys/kernel/debug/tracing/kprobe_events-----------------------------------------------------這里面一定要用">>",不然就會覆蓋前面的設(shè)置。
    
    echo '-:myprobe' >> /sys/kernel/debug/tracing/kprobe_eventsecho '-:myretprobe' >> /sys/kernel/debug/tracing/kprobe_events
    

    參數(shù)后面的寄存器是跟架構(gòu)相關(guān)的,%x0、%x1、%x2表示第1/2/3個參數(shù),超出部分使用$stack來存儲參數(shù)。

    函數(shù)返回值保存在$retval

    kprobe使能

    對kprobe事件的是能通過往對應(yīng)事件的enable寫1開啟探測;寫0暫停探測。

    echo > /sys/kernel/debug/tracing/trace
    echo 'p:myprobe do_sys_open dfd=%x0 filename=%x1 flags=%x2 mode=+4($stack)' > /sys/kernel/debug/tracing/kprobe_events
    echo 'r:myretprobe do_sys_open ret=$retval' >> /sys/kernel/debug/tracing/kprobe_events
    
    echo 1 > /sys/kernel/debug/tracing/events/kprobes/myprobe/enable
    echo 1 > /sys/kernel/debug/tracing/events/kprobes/myretprobe/enable
    ls
    echo 0 > /sys/kernel/debug/tracing/events/kprobes/myprobe/enable
    echo 0 > /sys/kernel/debug/tracing/events/kprobes/myretprobe/enable
    
    cat /sys/kernel/debug/tracing/trace
    

    然后在/sys/kernel/debug/tracing/trace中可以看到結(jié)果。

    總結(jié)

    附錄

    ARM32,ARM64,X86寄存器及訪問方式

    ARM32

    "r0", pt_regs->r0
    "r1", pt_regs->r1
    "r2", pt_regs->r2
    "r3", pt_regs->r3
    "r4", pt_regs->r4
    "r5", pt_regs->r5
    "r6", pt_regs->r6
    "r7", pt_regs->r7
    "r8", pt_regs->r8
    "r9", pt_regs->r9
    "r10",pt_regs->r10
    "fp", pt_regs->fp
    "ip", pt_regs->ip
    "sp", pt_regs->sp
    "lr", pt_regs->lr
    "pc", pt_regs->pc
    

    ARM64

    "x0", pt_regs->regs[0]
    "x1", pt_regs->regs[1]
    "x2", pt_regs->regs[2]
    "x3", pt_regs->regs[3]
    "x4", pt_regs->regs[4]
    "x5", pt_regs->regs[5]
    "x6", pt_regs->regs[6]
    "x7", pt_regs->regs[7]
    "x8", pt_regs->regs[8]
    "x9", pt_regs->regs[9]
    "x10", pt_regs->regs[10]
    "x11", pt_regs->regs[11]
    "x12", pt_regs->regs[12]
    "x13", pt_regs->regs[13]
    "x14", pt_regs->regs[14]
    "x15", pt_regs->regs[15]
    "x16", pt_regs->regs[16]
    "x17", pt_regs->regs[17]
    "x18", pt_regs->regs[18]
    "x19", pt_regs->regs[19]
    "x20", pt_regs->regs[20]
    "x21", pt_regs->regs[21]
    "x22", pt_regs->regs[22]
    "x23", pt_regs->regs[23]
    "x24", pt_regs->regs[24]
    "x25", pt_regs->regs[25]
    "x26", pt_regs->regs[26]
    "x27", pt_regs->regs[27]
    "x28", pt_regs->regs[28]
    "x29", pt_regs->regs[29]
    "x30", pt_regs->regs[30]
    "sp",  pt_regs->sp
    "pc",  pt_regs->pc
    "pstate",pt_regs->pstate
    

    X86

    rax     pt_regs->ax 
    rcx     pt_regs->cx 
    rdx     pt_regs->cx 
    rbx     pt_regs->bx 
    rsp     pt_regs->sp 
    rbp     pt_regs->bp 
    rdi     pt_regs->di 
    rsi     pt_regs->si 
    r8      pt_regs->r8 
    r9      pt_regs->r9 
    r10     pt_regs->r10 
    r11     pt_regs->r11 
    r12     pt_regs->r12 
    r13     pt_regs->r13 
    r14     pt_regs->r14 
    r15     pt_regs->r15 
    

    本文參考

    https://blog.csdn.net/jakelylll/article/details/123667320

    https://www.cnblogs.com/LiuYanYGZ/p/12643846.html

    https://blog.csdn.net/weixin_45030965/article/details/125922528

    https://www.cnblogs.com/LiuYanYGZ/p/12643846.html

    https://blog.csdn.net/jasonactions/article/details/121065795

    https://blog.csdn.net/mrpre/article/details/106801888

    https://blog.csdn.net/u011622208/article/details/115535291

    kprobe https://blog.csdn.net/WANGYONGZIXUE/article/details/127525367

    https://www.kernel.org/doc/html/latest/trace/kprobetrace.html#kprobe-based-event-tracing

    https://www.cnblogs.com/arnoldlu/p/9752061.html

    前言

    上一節(jié)介紹了kprobe的基本概念,下面我們將使用幾個具體的例子,看下kprobe在實際使用中有那些應(yīng)用場景。

    kprobe

    內(nèi)核的samples/kprobe目錄下有kprobe相關(guān)的例子,我們以這些例子為基礎(chǔ),簡單修改下。

    查看函數(shù)的入?yún)?/h1>

    我們所有的例子都是探測do_sys_open() 或者_do_fork(),以下是內(nèi)核中的源碼。

    do_sys_open

    struct audit_names;
    struct filename {
     const char  *name; /* pointer to actual string */
     const __user char *uptr; /* original userland pointer */
     struct audit_names *aname;
     int   refcnt;
     const char  iname[];
    };
    
    long do_sys_open(int dfd, const char __user *filename, int flags, umode_t mode)
    {
     struct open_flags op;
     int fd = build_open_flags(flags, mode, &op);
     struct filename *tmp;
    
     if (fd)
      return fd;
    
     tmp = getname(filename);
     if (IS_ERR(tmp))
      return PTR_ERR(tmp);
    
     fd = get_unused_fd_flags(flags);
     if (fd >= 0) {
      struct file *f = do_filp_open(dfd, tmp, &op);
      if (IS_ERR(f)) {
       put_unused_fd(fd);
       fd = PTR_ERR(f);
      } else {
       fsnotify_open(f);
       fd_install(fd, f);
      }
     }
     putname(tmp);
     return fd;
    }
    

    _do_fork

    long _do_fork(unsigned long clone_flags,
           unsigned long stack_start,
           unsigned long stack_size,
           int __user *parent_tidptr,
           int __user *child_tidptr,
           unsigned long tls)
    {
     struct task_struct *p;
     int trace = 0;
     long nr;
    
     /*
      * Determine whether and which event to report to ptracer.  When
      * called from kernel_thread or CLONE_UNTRACED is explicitly
      * requested, no event is reported; otherwise, report if the event
      * for the type of forking is enabled.
      */
     if (!(clone_flags & CLONE_UNTRACED)) {
      if (clone_flags & CLONE_VFORK)
       trace = PTRACE_EVENT_VFORK;
      else if ((clone_flags & CSIGNAL) != SIGCHLD)
       trace = PTRACE_EVENT_CLONE;
      else
       trace = PTRACE_EVENT_FORK;
    
      if (likely(!ptrace_event_enabled(current, trace)))
       trace = 0;
     }
    
     p = copy_process(clone_flags, stack_start, stack_size,
        child_tidptr, NULL, trace, tls, NUMA_NO_NODE);
     /*
      * Do this prior waking up the new thread - the thread pointer
      * might get invalid after that point, if the thread exits quickly.
      */
     if (!IS_ERR(p)) {
      struct completion vfork;
      struct pid *pid;
    
      cpufreq_task_times_alloc(p);
    
      trace_sched_process_fork(current, p);
    
      pid = get_task_pid(p, PIDTYPE_PID);
      nr = pid_vnr(pid);
    
      if (clone_flags & CLONE_PARENT_SETTID)
       put_user(nr, parent_tidptr);
    
      if (clone_flags & CLONE_VFORK) {
       p->vfork_done = &vfork;
       init_completion(&vfork);
       get_task_struct(p);
      }
    
      wake_up_new_task(p);
    
      /* forking complete and child started to run, tell ptracer */
      if (unlikely(trace))
       ptrace_event_pid(trace, pid);
    
      if (clone_flags & CLONE_VFORK) {
       if (!wait_for_vfork_done(p, &vfork))
        ptrace_event_pid(PTRACE_EVENT_VFORK_DONE, pid);
      }
    
      put_pid(pid);
     } else {
      nr = PTR_ERR(p);
     }
     return nr;
    }
    

    實際調(diào)試中經(jīng)常需要調(diào)查函數(shù)使用的變量的值。要在kprobes的偵測器內(nèi)顯示某個函數(shù)的局部變量的值,需要一些技巧,原因是在printk的參數(shù)中無法直接指定變量名,因此必須給偵測器函數(shù)提供一個pt_regs結(jié)構(gòu),其中保存了指定地址的命令執(zhí)行時的寄存器信息。

    當(dāng)然,不同架構(gòu)下該結(jié)構(gòu)的成員變量不盡相同,但用該結(jié)構(gòu)可以顯示變量等更為詳細(xì)的信息。

    ARM64,ARM32,X86的寄存器及其訪問方式可以看文末的目錄

    kprobe_example.c

    /*
     * NOTE: This example is works on x86 and powerpc.
     * Here's a sample kernel module showing the use of kprobes to dump a
     * stack trace and selected registers when _do_fork() is called.
     *
     * For more information on theory of operation of kprobes, see
     * Documentation/kprobes.txt
     *
     * You will see the trace data in /var/log/messages and on the console
     * whenever _do_fork() is invoked to create a new process.
     */
    
    #include <linux/kernel.h>
    #include <linux/module.h>
    #include <linux/kprobes.h>
    #define TRACE_SYMBOL "do_filp_open"
    
    /* For each probe you need to allocate a kprobe structure */
    static struct kprobe kp = {
     .symbol_name = TRACE_SYMBOL,
    };
    /* x86_64中寄存器中參數(shù)的順序: rdi rsi rdx rcx r8 r9*/
    /* aarch64: x0-x7 對應(yīng)參數(shù) */
    /* kprobe pre_handler: called just before the probed instruction is executed */
    static int handler_pre(struct kprobe *p, struct pt_regs *regs)
    {
     int dfd = -1;
     struct filename *filename = NULL;
    #ifdef CONFIG_X86
     dfd = regs->di;
        filename = (struct filename *) regs->si;
    #endif
    
    #ifdef CONFIG_ARM64
       dfd = regs->regs[0];
        filename = (struct filename *) regs->regs[1];
    #endif
    
      if (filename && !(strcmp(filename->name, "testfile")))
            printk(KERN_INFO "handler_pre:%s: dfd=%d, name=%s\n", p->symbol_name, dfd, filename->name);
    
     return 0;
    }
    
    /* kprobe post_handler: called after the probed instruction is executed */
    static void handler_post(struct kprobe *p, struct pt_regs *regs,
        unsigned long flags)
    {
     //printk(KERN_INFO "handler_post\n");
    }
    
    /*
     * fault_handler: this is called if an exception is generated for any
     * instruction within the pre- or post-handler, or when Kprobes
     * single-steps the probed instruction.
     */
    static int handler_fault(struct kprobe *p, struct pt_regs *regs, int trapnr)
    {
     /*printk(KERN_INFO "fault_handler: p->addr = 0x%p, trap #%dn",
      p->addr, trapnr);*/
     /* Return 0 because we don't handle the fault. */
     return 0;
    }
    
    static int __init kprobe_init(void)
    {
     int ret;
     kp.pre_handler = handler_pre;
     kp.post_handler = handler_post;
     kp.fault_handler = handler_fault;
    
     ret = register_kprobe(&kp);
     if (ret < 0) {
      printk(KERN_INFO "register_kprobe failed, returned %d\n", ret);
      return ret;
     }
     printk(KERN_INFO "Planted kprobe at %p\n", kp.addr);
     return 0;
    }
    
    static void __exit kprobe_exit(void)
    {
     unregister_kprobe(&kp);
     printk(KERN_INFO "kprobe at %p unregistered\n", kp.addr);
    }
    
    module_init(kprobe_init)
    module_exit(kprobe_exit)
    MODULE_LICENSE("GPL");
    
    

    我們以內(nèi)核目錄下的例程做一個簡單修改,探測do_filp_open函數(shù),當(dāng)打開testfile文件時,自動打印出文件的路徑。

    為了減少無效信息的打印,我們將handler_post,handler_fault直接注釋掉。

    當(dāng)探測點do_filp_open命中時,Kprobes調(diào)用handler_pre。在handler_pre根據(jù)struct filename *pathname來獲得文件的名字。

    在x86_64架構(gòu)中,函數(shù)的參數(shù)從左到右分別保存在rdi、rsi、rdx、rcx、r8、r9中,因此查看rdi和rsi就能得到第1個、第2個參數(shù)的值。

    同理,在ARM64架構(gòu)中, 函數(shù)的參數(shù)1~參數(shù)8分別保存到 X0~X7 寄存器中 ,剩下的參數(shù)從右往左依次入棧。因此,X0和X1分別存放dfd, pathname的值。

    makefile

    CROSS_COMPILE:=aarch64-linux-gnu-
    ARCH:= arm64
    CC:= $(CROSS_COMPILE)gcc
    LD:= $(CROSS_COMPILE)ld
    
    PWD:= $(shell pwd)
    obj-m := kprobe_example.o jprobe_example.o  kretprobe_example.o
    
    KERNELDIR:=/home/zhongyi/code/rk3399_linux_release_v2.5.1_20210301/kernel
    
    all:
            make -C  $(KERNELDIR) M=$(PWD)  modules ARCH=$(ARCH)
    clean:
            rm -f *.o
            rm -f *.symvers
            rm -f *.order
            rm -f *.ko
            rm -f *.mod.c
    

    執(zhí)行make編譯后,在開發(fā)板上將驅(qū)動加載后,手動打開testfile文件。

    insmod kprobe_example.ko
    vim testfile
    rmmod kprobe_example.ko
    dmesg
    

    使用dmesg可以看到成功輸出文件名和dfd。

    [  307.572314] Planted kprobe at ffffff80081fdf84
    [  311.997767] handler_pre:do_filp_open: dfd=-100, name=testfile
    [  312.034774] handler_pre:do_filp_open: dfd=-100, name=testfile
    [  347.969572] kprobe at ffffff80081fdf84 unregistered
    

    顯示棧跟蹤

    使用kprobes的另一個有效的調(diào)試方法,就是顯示棧跟蹤。

    我們只需要在handler_pre中調(diào)用dump_stack();即可。

    /* x86_64中寄存器中參數(shù)的順序: rdi rsi rdx rcx r8 r9*/
    /* aarch64: x0-x7 對應(yīng)參數(shù) */
    /* kprobe pre_handler: called just before the probed instruction is executed */
    static int handler_pre(struct kprobe *p, struct pt_regs *regs)
    {
     dump_stack();
     return 0;
    }
    
    

    編譯加載

    insmod kprobe_example.ko
    rmmod kprobe_example.ko
    dmesg
    

    成功打印出棧的信息。

    [  451.620803] CPU: 4 PID: 1299 Comm: rmmod Tainted: G           O    4.4.194+ #18
    [  451.620809] Hardware name: Firefly-RK3399 Board (Linux Opensource) (DT)
    [  451.620813] Call trace:
    [  451.620820] [<ffffff8008088410>] dump_backtrace+0x0/0x220
    [  451.620828] [<ffffff8008088654>] show_stack+0x24/0x30
    [  451.620834] [<ffffff80084f842c>] dump_stack+0x94/0xbc
    [  451.620842] [<ffffff8000f22048>] handler_pre+0x14/0x24 [kprobe_example]
    [  451.620848] [<ffffff8008efd824>] kprobe_breakpoint_handler+0x100/0x14c
    [  451.620855] [<ffffff8008084128>] brk_handler+0x54/0x80
    [  451.620860] [<ffffff8008080b0c>] do_debug_exception+0x58/0xc0
    [  451.620866] Exception stack(0xffffffc0f2ef7c40 to 0xffffffc0f2ef7d70)
    [  451.620879] 7c40: ffffffc0ef782000 0000008000000000 ffffffc0f2ef7e20 ffffff80081fdf84
    [  451.620886] 7c60: 0000000060000145 ffffff8008efc228 ffffffc0ceff2a50 ffffffc0ee7d2988
    [  451.620892] 7c80: ffffffc0f2ef7ca0 ffffff80081c0dc8 ffffffc0f0582e70 00e80000e95f3f53
    [  451.620898] 7ca0: ffffffc0f2ef7d70 ffffff8008efe3e8 ffffffc0f2ef7ec0 0000005583d31928
    [  451.620905] 7cc0: 0000000000000055 0000000092000047 ffffffc0ceec5100 ffffffc0dccbd500
    [  451.620911] 7ce0: 0000000000000024 ffffffc0dccbd580 00000000ffffff9c ffffffc0ef782000
    [  451.620917] 7d00: ffffffc0f2ef7e78 0000000000000000 0000000000000000 0000000000000003
    [  451.620923] 7d20: ffffffc0dcfc9a80 0000007fd94380e8 0000000000000000 fefefefefefefeff
    [  451.620929] 7d40: 0000000000000001 0000007fd9437db8 0000000000000000 0000000000000000
    [  451.620934] 7d60: 0000000000000000 000000007fffffde
    [  451.620940] [<ffffff8008082668>] el1_dbg+0x18/0x7c
    [  451.620947] [<ffffff80081ed9a4>] SyS_openat+0x3c/0x4c
    [  451.620953] [<ffffff8008082f70>] el0_svc_naked+0x24/0x28
    [  451.630032] kprobe at ffffff80081fdf84 unregistered
    

    任意位置通過變量名獲取信息

    kprobes擁有更加強(qiáng)大的功能,那就是它能在內(nèi)核的任意地址插入偵測器。此外,偵測器可以在任意地址的指令執(zhí)行之前或之后執(zhí)行,或者前后都執(zhí)行。

    因此,應(yīng)當(dāng)觀察匯編代碼,找到源代碼中想要調(diào)查的位置對應(yīng)于編譯后的二進(jìn)制文件中的什么地址,并調(diào)查希望顯示的變量保存在哪個寄存器、哪個內(nèi)存地址。

    通常,我們希望在函數(shù)執(zhí)行的過程中變量,即打印一些流程中的東西,而不是函數(shù)本身被調(diào)用,此時我們不能簡單設(shè)置 kprobe->symbol_name 函數(shù)名字 ,假設(shè)我們期望獲取 _do_fork函數(shù)變量 nr 的值:

    將vmlinux進(jìn)行反匯編,找出_do_fork的地址。

    aarch64-linux-gnu-objdump -s -d vmlinux > vmlinux.asm
    

    _do_fork 反匯編如下所示,地址為ffffff80080ba83c。

    ffffff80080ba83c <_do_fork>:
    ffffff80080ba83c:       a9b97bfd        stp     x29, x30, [sp, #-112]!
    ffffff80080ba840:       910003fd        mov     x29, sp
    ffffff80080ba844:       a90153f3        stp     x19, x20, [sp, #16]
    ffffff80080ba848:       a9025bf5        stp     x21, x22, [sp, #32]
    ffffff80080ba84c:       a90363f7        stp     x23, x24, [sp, #48]
    ffffff80080ba850:       aa0003f5        mov     x21, x0
    ffffff80080ba854:       aa0103f3        mov     x19, x1
    ffffff80080ba858:       aa0203f6        mov     x22, x2
    ffffff80080ba85c:       aa0303f7        mov     x23, x3
    ffffff80080ba860:       aa0403f8        mov     x24, x4
    ffffff80080ba864:       aa1e03e0        mov     x0, x30
    ffffff80080ba868:       97ff4e8a        bl      ffffff800808e290 <_mcount>
    ffffff80080ba86c:       37b814f5        tbnz    w21, #23, ffffff80080bab08 <_do_fork+0x2cc>
    ffffff80080ba870:       37701495        tbnz    w21, #14, ffffff80080bab00 <_do_fork+0x2c4>
    ffffff80080ba874:       92401ea0        and     x0, x21, #0xff
    ffffff80080ba878:       52800074        mov     w20, #0x3                       // #3
    ffffff80080ba87c:       f100441f        cmp     x0, #0x11
    ffffff80080ba880:       1a9f1694        csinc   w20, w20, wzr, ne  // ne = any
    ffffff80080ba884:       11000e81        add     w1, w20, #0x3
    ............................
    ffffff80080ba91c:       b5000fb6        cbnz    x22, ffffff80080bab10 <_do_fork+0x2d4>
    ffffff80080ba920:       52800001        mov     w1, #0x0                        // #0
    ffffff80080ba924:       aa1303e0        mov     x0, x19
    ffffff80080ba928:       94006a17        bl      ffffff80080d5184 <get_task_pid>
    ffffff80080ba92c:       aa0003f6        mov     x22, x0
    ffffff80080ba930:       94006a85        bl      ffffff80080d5344 pid_vnr>
    ffffff80080ba934:       93407c18        sxtw    x24, w0
    ffffff80080ba938:       36a00195        tbz     w21, #20, ffffff80080ba968 <_do_fork+0x12c>
    ffffff80080ba93c:       d5384101        mrs     x1, sp_el0
    ffffff80080ba940:       f9400422        ldr     x2, [x1, #8]
    ffffff80080ba944:       aa1703e1        mov     x1, x23
    ffffff80080ba948:       b1001021        adds    x1, x1, #0x4
    
    

    nr 變量 是 函數(shù)pid_vnr的返回值(也是子進(jìn)程的pid) ,根據(jù)ARM調(diào)用規(guī)范,調(diào)用完成pid_vnr()后,寄存器x0存放的就是其函數(shù)返回值。

    參考:ARM64調(diào)用標(biāo)準(zhǔn) https://blog.51cto.com/u_15333820/3452605

    通過反匯編可以知道,pid_vnrffffff80080ba930地址處被調(diào)用,因此,偵測器的插入地址就是在ffffff80080ba930之后,并且x0被改變之前。只要符合這兩個條件,放在哪里都無所謂。

    因此,我們將kprobe的點設(shè)置為ffffff80080ba934,然后獲取 x0,就能獲取變量nr的值。

    .offset 是探測點相對于_do_fork的偏移,在注冊時指定。我們這里的 offset = ffffff80080ba934 - ffffff80080ba83c = F8。

    另外,反匯編能力就是多看匯編以及找到幾個關(guān)鍵點(例如常量,跳轉(zhuǎn)語句)就能定位到匯編對應(yīng)的源碼了,這里不再展開了。

    /*
     * NOTE: This example is works on x86 and powerpc.
     * Here's a sample kernel module showing the use of kprobes to dump a
     * stack trace and selected registers when _do_fork() is called.
     *
     * For more information on theory of operation of kprobes, see
     * Documentation/kprobes.txt
     *
     * You will see the trace data in /var/log/messages and on the console
     * whenever _do_fork() is invoked to create a new process.
     */
    
    #include <linux/kernel.h>
    #include <linux/module.h>
    #include <linux/kprobes.h>
    
    /* For each probe you need to allocate a kprobe structure */
    static struct kprobe kp = {
     .symbol_name = "_do_fork",
        .offset = 0xF8,
    };
    
    /* kprobe pre_handler: called just before the probed instruction is executed */
    static int handler_pre(struct kprobe *p, struct pt_regs *regs)
    {
    #ifdef CONFIG_X86
     printk(KERN_INFO "pre_handler: p->addr = 0x%p, ip = %lx,"
       " flags = 0x%lx,rax = 0x%lx\n",
      p->addr, regs->ip, regs->flags,regs->ax);
    #endif
    
    #ifdef CONFIG_ARM64
     pr_info("<%s> pre_handler: p->addr = 0x%p, pc = 0x%lx,"
       " pstate = 0x%lx,x0 = 0x%lx\n",
      p->symbol_name, p->addr, (long)regs->pc, (long)regs->pstate,(long)regs->regs[0]);
    #endif
    
     /* A dump_stack() here will give a stack backtrace */
     return 0;
    }
    
    /* kprobe post_handler: called after the probed instruction is executed */
    static void handler_post(struct kprobe *p, struct pt_regs *regs,
        unsigned long flags)
    {
    #ifdef CONFIG_X86
     printk(KERN_INFO "post_handler: p->addr = 0x%p, flags = 0x%lx\n",
      p->addr, regs->flags);
    #endif
    
    #ifdef CONFIG_ARM64
     pr_info("<%s> post_handler: p->addr = 0x%p, pstate = 0x%lx\n",
      p->symbol_name, p->addr, (long)regs->pstate);
    #endif
    }
    
    /*
     * fault_handler: this is called if an exception is generated for any
     * instruction within the pre- or post-handler, or when Kprobes
     * single-steps the probed instruction.
     */
    static int handler_fault(struct kprobe *p, struct pt_regs *regs, int trapnr)
    {
     printk(KERN_INFO "fault_handler: p->addr = 0x%p, trap #%dn",
      p->addr, trapnr);
     /* Return 0 because we don't handle the fault. */
     return 0;
    }
    
    static int __init kprobe_init(void)
    {
     int ret;
     kp.pre_handler = handler_pre;
     kp.post_handler = handler_post;
     kp.fault_handler = handler_fault;
    
     ret = register_kprobe(&kp);
     if (ret < 0) {
      printk(KERN_INFO "register_kprobe failed, returned %d\n", ret);
      return ret;
     }
     printk(KERN_INFO "Planted kprobe at %p\n", kp.addr);
     return 0;
    }
    
    static void __exit kprobe_exit(void)
    {
     unregister_kprobe(&kp);
     printk(KERN_INFO "kprobe at %p unregistered\n", kp.addr);
    }
    
    module_init(kprobe_init)
    module_exit(kprobe_exit)
    MODULE_LICENSE("GPL");
    
    
    insmod kprobe_example.ko
    rmmod kprobe_example.ko
    dmesg
    

    編譯加載后,成功打印出rax的值。

    [  245.080636] pre_handler: p->addr = 0x0000000050a6c3dd, ip = ffffffffa5ca0009, flags = 0x246,rax = 0x2
    [  245.080640] post_handler: p->addr = 0x0000000050a6c3dd, flags = 0x246
    [  245.080936] pre_handler: p->addr = 0x0000000050a6c3dd, ip = ffffffffa5ca0009, flags = 0x246,rax = 0x2
    [  245.080938] post_handler: p->addr = 0x0000000050a6c3dd, flags = 0x246
    [  245.457340] pre_handler: p->addr = 0x0000000050a6c3dd, ip = ffffffffa5ca0009, flags = 0x246,rax = 0x2
    [  245.457345] post_handler: p->addr = 0x0000000050a6c3dd, flags = 0x246
    [  245.457643] pre_handler: p->addr = 0x0000000050a6c3dd, ip = ffffffffa5ca0009, flags = 0x246,rax = 0x2
    [  245.457645] post_handler: p->addr = 0x0000000050a6c3dd, flags = 0x246
    [  245.719208] pre_handler: p->addr = 0x0000000050a6c3dd, ip = ffffffffa5ca0009, flags = 0x246,rax = 0x2
    [  245.719213] post_handler: p->addr = 0x0000000050a6c3dd, flags = 0x246
    [  245.719505] pre_handler: p->addr = 0x0000000050a6c3dd, ip = ffffffffa5ca0009, flags = 0x246,rax = 0x2
    [  245.719507] post_handler: p->addr = 0x0000000050a6c3dd, flags = 0x246
    [  245.820761] pre_handler: p->addr = 0x0000000050a6c3dd, ip = ffffffffa5ca0009, flags = 0x246,rax = 0x2
    [  245.820765] post_handler: p->addr = 0x0000000050a6c3dd, flags = 0x246
    [  245.821061] pre_handler: p->addr = 0x0000000050a6c3dd, ip = ffffffffa5ca0009, flags = 0x246,rax = 0x2
    [  245.821063] post_handler: p->addr = 0x0000000050a6c3dd, flags = 0x246
    [  246.092572] pre_handler: p->addr = 0x0000000050a6c3dd, ip = ffffffffa5ca0009, flags = 0x246,rax = 0x2
    [  246.092577] post_handler: p->addr = 0x0000000050a6c3dd, flags = 0x246
    [  246.095863] pre_handler: p->addr = 0x0000000050a6c3dd, ip = ffffffffa5ca0009, flags = 0x246,rax = 0x2
    [  246.095867] post_handler: p->addr = 0x0000000050a6c3dd, flags = 0x246
    [  246.126196] kprobe at 0000000050a6c3dd unregistered
    

    jprobe

    與kprobes相比,jprobes能更容易地獲取傳給函數(shù)的參數(shù)。有幾點需要注意:

    1. 處理程序應(yīng)該有與被探測函數(shù)相同的參數(shù)列表和返回類型;
    2. 返回之前,必須調(diào)用jprobe_return()(處理程序?qū)嶋H上從未返回,因為jprobe_return()將控制權(quán)返回給Kprobes) 。

    查看函數(shù)的參數(shù)

    /*
     * Here's a sample kernel module showing the use of jprobes to dump
     * the arguments of _do_fork().
     *
     * For more information on theory of operation of jprobes, see
     * Documentation/kprobes.txt
     *
     * Build and insert the kernel module as done in the kprobe example.
     * You will see the trace data in /var/log/messages and on the
     * console whenever _do_fork() is invoked to create a new process.
     * (Some messages may be suppressed if syslogd is configured to
     * eliminate duplicate messages.)
     */
    
    #include <linux/kernel.h>
    #include <linux/module.h>
    #include <linux/kprobes.h>
    
    /*
     * Jumper probe for _do_fork.
     * Mirror principle enables access to arguments of the probed routine
     * from the probe handler.
     */
    
    /* Proxy routine having the same arguments as actual _do_fork() routine */
    #define TRACE_SYMBOL "do_filp_open"
    /*與do_filp_open 的參數(shù)完全相同*/
    static struct file * jp_do_filp_open(int dfd, struct filename *pathname,
      const struct open_flags *op)
    {
     if (pathname && !(strcmp(pathname->name, "testfile")))
     printk(KERN_INFO "jprobe: dfd = %d, pathname = %s\n", dfd, pathname->name);
    
     /* Always end with a call to jprobe_return(). */
     jprobe_return();
     return 0;
    }
    
    static struct jprobe my_jprobe = {
     .entry   = jp_do_filp_open,
     .kp = {
      .symbol_name = TRACE_SYMBOL,
     },
    };
    
    static int __init jprobe_init(void)
    {
     int ret;
    
     ret = register_jprobe(&my_jprobe);
     if (ret < 0) {
      printk(KERN_INFO "register_jprobe failed, returned %d\n", ret);
      return -1;
     }
     printk(KERN_INFO "Planted jprobe at %p, handler addr %p\n",
            my_jprobe.kp.addr, my_jprobe.entry);
     return 0;
    }
    
    static void __exit jprobe_exit(void)
    {
     unregister_jprobe(&my_jprobe);
     printk(KERN_INFO "jprobe at %p unregistered\n", my_jprobe.kp.addr);
    }
    
    module_init(jprobe_init)
    module_exit(jprobe_exit)
    MODULE_LICENSE("GPL");
    

    使用kprobes時,必須通過寄存器或棧才能計算出參數(shù)的值。此外,計算方法還依賴于架構(gòu)。

    如果使用jprobes,那么無須了解架構(gòu)的詳細(xì)知識,也能簡單地查看參數(shù)的值。

    編譯加載驅(qū)動程序

    insmod jprobe_example.ko
    vim testfile
    rmmod jprobe_example.ko
    dmesg
    

    成功打印出函數(shù)的參數(shù)

    [  612.670453] jprobe at ffffff80081fdf84 unregistered
    [  867.293765] Planted jprobe at ffffff80081fdf84, handler addr ffffff8000f1a000
    [  871.107502] jprobe: dfd = -100, pathname = testfile
    [  871.147747] jprobe: dfd = -100, pathname = testfile
    [  875.723761] jprobe at ffffff80081fdf84 unregistered
    [  907.706066] Planted jprobe at ffffff80081fdf84, handler addr ffffff8000f22000
    [  911.661891] jprobe: dfd = -100, pathname = testfile
    [  911.694903] jprobe: dfd = -100, pathname = testfile
    [  919.272187] jprobe at ffffff80081fdf84 unregistered
    [ 2296.830613] Planted jprobe at ffffff80081fdf84, handler addr ffffff8000f2a000
    [ 2302.164861] jprobe: dfd = -100, pathname = testfile
    [ 2302.200634] jprobe: dfd = -100, pathname = testfile
    [ 2307.407014] jprobe at ffffff80081fdf84 unregistered
    

    kretprobe

    kretprobe 也是基于kprobe的,相比于kprobe和jprobe,實現(xiàn)相對復(fù)雜。下面我們以內(nèi)核目錄下的例程,簡單分析下。

    kretprobe_example.c

    /*
     * kretprobe_example.c
     *
     * Here's a sample kernel module showing the use of return probes to
     * report the return value and total time taken for probed function
     * to run.
     *
     * usage: insmod kretprobe_example.ko func=<func_name>
     *
     * If no func_name is specified, _do_fork is instrumented
     *
     * For more information on theory of operation of kretprobes, see
     * Documentation/kprobes.txt
     *
     * Build and insert the kernel module as done in the kprobe example.
     * You will see the trace data in /var/log/messages and on the console
     * whenever the probed function returns. (Some messages may be suppressed
     * if syslogd is configured to eliminate duplicate messages.)
     */
    
    #include <linux/kernel.h>
    #include <linux/module.h>
    #include <linux/kprobes.h>
    #include <linux/ktime.h>
    #include <linux/limits.h>
    #include <linux/sched.h>
    
    static char func_name[NAME_MAX] = "do_sys_open";
    module_param_string(func, func_name, NAME_MAX, S_IRUGO);
    MODULE_PARM_DESC(func, "Function to kretprobe; this module will report the"
       " function's execution time");
    
    /* per-instance private data */
    struct my_data {
     ktime_t entry_stamp;
    };
    
    /* Here we use the entry_hanlder to timestamp function entry */
    static int entry_handler(struct kretprobe_instance *ri, struct pt_regs *regs)
    {
     struct my_data *data;
    
     if (!current->mm)
      return 1; /* Skip kernel threads */
    
     data = (struct my_data *)ri->data;
     data->entry_stamp = ktime_get();
     return 0;
    }
    
    /*
     * Return-probe handler: Log the return value and duration. Duration may turn
     * out to be zero consistently, depending upon the granularity of time
     * accounting on the platform.
     */
    static int ret_handler(struct kretprobe_instance *ri, struct pt_regs *regs)
    {
     int retval = regs_return_value(regs);
     struct my_data *data = (struct my_data *)ri->data;
     s64 delta;
     ktime_t now;
    
     now = ktime_get();
     delta = ktime_to_ns(ktime_sub(now, data->entry_stamp));
     printk(KERN_INFO "%s returned %d and took %lld ns to execute\n",
       func_name, retval, (long long)delta);
     return 0;
    }
    
    static struct kretprobe my_kretprobe = {
     .handler  = ret_handler,
     .entry_handler  = entry_handler,
     .data_size  = sizeof(struct my_data),
     /* Probe up to 20 instances concurrently. */
     .maxactive  = 20,
    };
    
    static int __init kretprobe_init(void)
    {
     int ret;
    
     my_kretprobe.kp.symbol_name = func_name;
     ret = register_kretprobe(&my_kretprobe);
     if (ret < 0) {
      printk(KERN_INFO "register_kretprobe failed, returned %d\n",
        ret);
      return -1;
     }
     printk(KERN_INFO "Planted return probe at %s: %p\n",
       my_kretprobe.kp.symbol_name, my_kretprobe.kp.addr);
     return 0;
    }
    
    static void __exit kretprobe_exit(void)
    {
     unregister_kretprobe(&my_kretprobe);
     printk(KERN_INFO "kretprobe at %p unregistered\n",
       my_kretprobe.kp.addr);
    
     /* nmissed > 0 suggests that maxactive was set too low. */
     printk(KERN_INFO "Missed probing %d instances of %s\n",
      my_kretprobe.nmissed, my_kretprobe.kp.symbol_name);
    }
    
    module_init(kretprobe_init)
    module_exit(kretprobe_exit)
    MODULE_LICENSE("GPL");
    

    struct kretprobe

    /*
     * Function-return probe -
     * Note:
     * User needs to provide a handler function, and initialize maxactive.
     * maxactive - The maximum number of instances of the probed function that
     * can be active concurrently.
     * nmissed - tracks the number of times the probed function's return was
     * ignored, due to maxactive being too low.
     *
     */
    struct kretprobe {
     struct kprobe kp;
     kretprobe_handler_t handler;
     kretprobe_handler_t entry_handler;
     int maxactive;
     int nmissed;
     size_t data_size;
     struct hlist_head free_instances;
     raw_spinlock_t lock;
    };
    
    typedef int (*kretprobe_handler_t) (struct kretprobe_instance *,
            struct pt_regs *);
    
    
    
    1. 其中我們可以看到 struct kretprobe 結(jié)構(gòu)體中 有struct kprobe成員(kretprobe時基于 kprobe實現(xiàn)的)。 handler:用戶自定義回調(diào)函數(shù),被探測函數(shù)返回后被調(diào)用,一般在這個函數(shù)中獲取被探測函數(shù)的返回值。
    2. entry_handler:用戶自定義回調(diào)函數(shù),這是Kretprobes 提供了一個可選的用戶指定的處理程序,它在函數(shù)入口上運(yùn)行。 每當(dāng) kretprobe 放置在函數(shù)入口處的 kprobe 被命中時,都會調(diào)用用戶定義的 entry_handler,如果有的話。 如果 entry_handler 返回 0(成功),則保證在函數(shù)返回時調(diào)用相應(yīng)的返回處理程序。 如果 entry_handler 返回非零錯誤,則 Kprobes 將返回地址保持原樣,并且 kretprobe 對該特定函數(shù)實例沒有進(jìn)一步的影響。
    3. maxactive:被探測函數(shù)可以同時活動的最大實例數(shù)。來指定可以同時探測多少個指定函數(shù)的實例。 register_kretprobe() 預(yù)分配指定數(shù)量的 kretprobe_instance 對象。
    4. nmissed:跟蹤被探測函數(shù)的返回被忽略的次數(shù)(maxactive設(shè)置的過低)。
    5. data_size:表示kretprobe私有數(shù)據(jù)的大小,在注冊kretprobe時會根據(jù)該大小預(yù)留空間。
    6. free_instances :表示空閑的kretprobe運(yùn)行實例鏈表,它鏈接了本kretprobe的空閑實例struct kretprobe_instance結(jié)構(gòu)體表示。

    struct kretprobe_instance

    struct kretprobe_instance {
     struct hlist_node hlist;
     struct kretprobe *rp;
     kprobe_opcode_t *ret_addr;
     struct task_struct *task;
     char data[0];
    };
    
    1. 這個結(jié)構(gòu)體表示kretprobe的運(yùn)行實例,前文說過被探測函數(shù)在跟蹤期間可能存在并發(fā)執(zhí)行的現(xiàn)象,因此kretprobe使用一個kretprobe_instance來跟蹤一個執(zhí)行流,支持的上限為maxactive。在沒有觸發(fā)探測時,所有的kretprobe_instance實例都保存在free_instances表中,每當(dāng)有執(zhí)行流觸發(fā)一次kretprobe探測,都會從該表中取出一個空閑的kretprobe_instance實例用來跟蹤。
    2. kretprobe_instance結(jié)構(gòu)提中的rp指針指向所屬的kretprobe;
    3. ret_addr用于保存原始被探測函數(shù)的返回地址(后文會看到被探測函數(shù)返回地址會被暫時替換);
    4. task用于綁定其跟蹤的進(jìn)程;
    5. data保存用戶使用的kretprobe私有數(shù)據(jù),它會在整個kretprobe探測運(yùn)行期間在entry_handlerhandler回調(diào)函數(shù)之間進(jìn)行傳遞(一般用于實現(xiàn)統(tǒng)計被探測函數(shù)的執(zhí)行耗時)。

    register_kretprobe

    kretprobe探測點的blackpoint,用來表示不支持kretprobe探測的函數(shù)的信息。name表示該函數(shù)名,addr表示該函數(shù)的地址。

    struct kretprobe_blackpoint {
     const char *name;
     void *addr;
    };
    1234
    

    blackpoint與架構(gòu)相關(guān),x86架構(gòu)不支持的kretprobe探測點如下:

    // arch/x86/kernel/kprobes/core.c
    // 不支持kretprobe探測的函數(shù),從blacklist這個名字中我們也知道其含義了。
    struct kretprobe_blackpoint kretprobe_blacklist[] = {
     {"__switch_to", }, /* This function switches only current task, but
             doesn't switch kernel stack.*/
     {NULL, NULL} /* Terminator */
    };
    
    const int kretprobe_blacklist_size = ARRAY_SIZE(kretprobe_blacklist);
    123456789
    

    函數(shù)的開頭首先處理 kretprobe_blacklis t,如果指定的被探測函數(shù)在這個blacklist中就直接返回EINVAL,表示不支持探測,在x86架構(gòu)中是__switch_to 這個函數(shù),表示這個函數(shù)不能被kretprobe。

    int register_kretprobe(struct kretprobe *rp)
    {
     int ret = 0;
     struct kretprobe_instance *inst;
     int i;
     void *addr;
    
     if (kretprobe_blacklist_size) {
      addr = kprobe_addr(&rp->kp);
      if (IS_ERR(addr))
       return PTR_ERR(addr);
      //如果kretprobe到kretprobe_blacklist中函數(shù),則返回EINVAL
      for (i = 0; kretprobe_blacklist[i].name != NULL; i++) {
       if (kretprobe_blacklist[i].addr == addr)
        return -EINVAL;
      }
     }
    
     //內(nèi)核設(shè)置回調(diào)函數(shù) pre_handler_kretprobe 。
     //與kprobe不同的是:kretprobe不支持用戶定義pre_handler和post_handler等回調(diào)函數(shù)。
     
     rp->kp.pre_handler = pre_handler_kretprobe;
     rp->kp.post_handler = NULL;
     rp->kp.fault_handler = NULL;
     rp->kp.break_handler = NULL;
    
     /* Pre-allocate memory for max kretprobe instances */
     if (rp->maxactive <= 0) {
    #ifdef CONFIG_PREEMPT
      rp->maxactive = max_t(unsigned int, 10, 2*num_possible_cpus());
    #else
      rp->maxactive = num_possible_cpus();
    #endif
     }
     raw_spin_lock_init(&rp->lock);
     INIT_HLIST_HEAD(&rp->free_instances);
     //根據(jù)maxactive值分配 struct kretprobe_instance 內(nèi)存空間
     for (i = 0; i < rp->maxactive; i++) {
      inst = kmalloc(sizeof(struct kretprobe_instance) +
              rp->data_size, GFP_KERNEL);
      if (inst == NULL) {
       free_rp_inst(rp);
       return -ENOMEM;
      }
      INIT_HLIST_NODE(&inst->hlist);
      hlist_add_head(&inst->hlist, &rp->free_instances);
     }
    
     rp->nmissed = 0;
     /* Establish function entry probe point */
     //注冊kprobe探測點
     ret = register_kprobe(&rp->kp);
     if (ret != 0)
      free_rp_inst(rp);
     return ret;
    }
    EXPORT_SYMBOL_GPL(register_kretprobe);
    

    最后調(diào)用 register_kprobe(&rp->kp),注冊kprobe點,可以看出kretprobe也是基于kprobe機(jī)制實現(xiàn)的,kretprobe也是一種特殊形式的kprobe。

    kretprobe注冊完成后就默認(rèn)啟動探測。

    pre_handler_kretprobe

    pre_handler_kretprobe這個函數(shù)是內(nèi)核自己定義的,內(nèi)核已經(jīng)指定該回調(diào)函數(shù),不支持用戶自定義。這個 kprobe pre_handler 在每個 kretprobe 中注冊。 當(dāng)探針命中時,它將設(shè)置返回探針。

    #ifdef CONFIG_KRETPROBES
    /*
     * This kprobe pre_handler is registered with every kretprobe. When probe
     * hits it will set up the return probe.
     */
    static int pre_handler_kretprobe(struct kprobe *p, struct pt_regs *regs)
    {
     struct kretprobe *rp = container_of(p, struct kretprobe, kp);
     unsigned long hash, flags = 0;
     struct kretprobe_instance *ri;
    
     /*
      * To avoid deadlocks, prohibit return probing in NMI contexts,
      * just skip the probe and increase the (inexact) 'nmissed'
      * statistical counter, so that the user is informed that
      * something happened:
      */
     if (unlikely(in_nmi())) {
      rp->nmissed++;
      return 0;
     }
    
     /* TODO: consider to only swap the RA after the last pre_handler fired */
     hash = hash_ptr(current, KPROBE_HASH_BITS);
     raw_spin_lock_irqsave(&rp->lock, flags);
     if (!hlist_empty(&rp->free_instances)) {
      ri = hlist_entry(rp->free_instances.first,
        struct kretprobe_instance, hlist);
      hlist_del(&ri->hlist);
      raw_spin_unlock_irqrestore(&rp->lock, flags);
    
      ri->rp = rp;
      ri->task = current;
     (1)
      if (rp->entry_handler && rp->entry_handler(ri, regs)) {
       raw_spin_lock_irqsave(&rp->lock, flags);
       hlist_add_head(&ri->hlist, &rp->free_instances);
       raw_spin_unlock_irqrestore(&rp->lock, flags);
       return 0;
      }
     (2)
      arch_prepare_kretprobe(ri, regs);
    
      /* XXX(hch): why is there no hlist_move_head? */
      INIT_HLIST_NODE(&ri->hlist);
      kretprobe_table_lock(hash, &flags);
      hlist_add_head(&ri->hlist, &kretprobe_inst_table[hash]);
      kretprobe_table_unlock(hash, &flags);
     } else {
      rp->nmissed++;
      raw_spin_unlock_irqrestore(&rp->lock, flags);
     }
     return 0;
    }
    NOKPROBE_SYMBOL(pre_handler_kretprobe);
    
    

    entry_handler

    struct kretprobe *rp
    rp->entry_handler && rp->entry_handler(ri, regs)
    

    entry_handler這個回調(diào)函數(shù)就是用戶自己定義的回調(diào)函數(shù)(可選的用戶指定的處理程序),前面我們已經(jīng)介紹過了,在這里不再介紹。

    /* Here we use the entry_hanlder to timestamp function entry */
    static int entry_handler(struct kretprobe_instance *ri, struct pt_regs *regs)
    {
     struct my_data *data;
    
     //內(nèi)核線程 task->mm == NULL
     if (!current->mm)
      return 1; /* Skip kernel threads */
    
     data = (struct my_data *)ri->data;
     data->entry_stamp = ktime_get();
     return 0;
    }
    

    arch_prepare_kretprobe

    arch_prepare_kretprobe(ri, regs)該函數(shù)架構(gòu)相關(guān),struct kretprobe_instance結(jié)構(gòu)體 的 ret_addr 成員用于保存并替換regs中的返回地址。返回地址被替換為kretprobe_trampoline。

    x86架構(gòu)

    // arch/x86/kernel/kprobes/core.c
    
    #define stack_addr(regs) ((unsigned long *)kernel_stack_pointer(regs))
    
    // x86_64
    // arch/x86/include/asm/ptrace.h
    static inline unsigned long kernel_stack_pointer(struct pt_regs *regs)
    {
     return regs->sp;
    }
    // arch/x86/kernel/kprobes/core.c
    void arch_prepare_kretprobe(struct kretprobe_instance *ri, struct pt_regs *regs)
    {
     unsigned long *sara = stack_addr(regs);
    
     ri->ret_addr = (kprobe_opcode_t *) *sara;
    
     /* Replace the return addr with trampoline addr */
     *sara = (unsigned long) &kretprobe_trampoline;
    }
    NOKPROBE_SYMBOL(arch_prepare_kretprobe);
    
    //struct kretprobe_instance *ri;
    //ri->ret_addr;
    
    struct kretprobe_instance {
     kprobe_opcode_t *ret_addr;  //用于保存原始被探測函數(shù)的返回地址
    };
    
    

    ARM64架構(gòu)

    // arch/arm64/kernel/probes/kprobes.c
    
    void __kprobes arch_prepare_kretprobe(struct kretprobe_instance *ri,
              struct pt_regs *regs)
    {
     ri->ret_addr = (kprobe_opcode_t *)regs->regs[30];
    
     /* replace return addr (x30) with trampoline */
     regs->regs[30] = (long)&kretprobe_trampoline;
    }
    

    ARM64架構(gòu)中regs->regs[30]是LR(procedure link register)寄存器(X30 :LR)。

    小結(jié)

    kretprobe是基于kprobe實現(xiàn)的,有一個固定的pre_handler回調(diào)函數(shù),在內(nèi)核中實現(xiàn),無需用戶編寫。而在kprobe中pre_handler函數(shù)是提供給用戶的回調(diào)函數(shù)。

    rp->kp.pre_handler = pre_handler_kretprobe;  //內(nèi)核中已經(jīng)實現(xiàn)
    rp->kp.post_handler = NULL;
    rp->kp.fault_handler = NULL;
    rp->kp.break_handler = NULL;
    

    kretprobe提供給用戶的兩個回調(diào)函數(shù):

    kretprobe_handler_t handler;
    kretprobe_handler_t entry_handler; // (可選)
    

    pre_handler回調(diào)函數(shù)會為kretprobe探測函數(shù)執(zhí)行的返回值做準(zhǔn)備工作,其中最主要的就是替換掉正常流程的返回地址,讓被探測函數(shù)在執(zhí)行之后能夠跳轉(zhuǎn)到kretprobe設(shè)計的函數(shù) kretprobe_trampoline中去。

    kretprobe_trampoline

    pre_handler_kretprobe函數(shù)返回后,kprobe流程接著執(zhí)行singlestep流程并返回到正常的執(zhí)行流程,被探測函數(shù)(do_fork)繼續(xù)執(zhí)行,直到它執(zhí)行完畢并返回。

    由于返回地址被替換為kretprobe_trampoline,所以跳轉(zhuǎn)到kretprobe_trampoline執(zhí)行,該函數(shù)架構(gòu)相關(guān)且有嵌入?yún)R編實現(xiàn)。

    該函數(shù)會獲取被探測函數(shù)的寄存器信息并調(diào)用用戶定義的回調(diào)函數(shù)輸出其中的返回值,最后函數(shù)返回正常的執(zhí)行流程。

    static int ret_handler(struct kretprobe_instance *ri, struct pt_regs *regs)
    {
     unsigned long retval = regs_return_value(regs);
     ......
    }
    
    
    static struct kretprobe my_kretprobe = {
     .handler  = ret_handler,
    };
    

    x86架構(gòu)

    (1)

    kretprobe_trampoline
     -->trampoline_handler
    kretprobe_trampoline
    

    (2) kretprobe_trampoline

    // arch/x86/kernel/kprobes/core.c
    
    /*
     * When a retprobed function returns, this code saves registers and
     * calls trampoline_handler() runs, which calls the kretprobe's handler.
     */
    asm(
     ".global kretprobe_trampoline\n"
     ".type kretprobe_trampoline, @function\n"
     "kretprobe_trampoline:\n"
    #ifdef CONFIG_X86_64
     /* We don't bother saving the ss register */
     " pushq %rsp\n"
     " pushfq\n"
     SAVE_REGS_STRING
     " movq %rsp, %rdi\n"
     " call trampoline_handler\n"
     /* Replace saved sp with true return address. */
     " movq %rax, 152(%rsp)\n"
     RESTORE_REGS_STRING
     " popfq\n"
    #else
     " pushf\n"
     SAVE_REGS_STRING
     " movl %esp, %eax\n"
     " call trampoline_handler\n"
     /* Move flags to cs */
     " movl 56(%esp), %edx\n"
     " movl %edx, 52(%esp)\n"
     /* Replace saved flags with true return address. */
     " movl %eax, 56(%esp)\n"
     RESTORE_REGS_STRING
     " popf\n"
    #endif
     " ret\n"
     ".size kretprobe_trampoline, .-kretprobe_trampoline\n"
    );
    NOKPROBE_SYMBOL(kretprobe_trampoline);
    STACK_FRAME_NON_STANDARD(kretprobe_trampoline);
    

    (3) trampoline_handler

    // arch/x86/kernel/kprobes/core.c
    
    /*
     * Called from kretprobe_trampoline
     */
    __visible __used void *trampoline_handler(struct pt_regs *regs)
    {
     struct kretprobe_instance *ri = NULL;
     struct hlist_head *head, empty_rp;
     struct hlist_node *tmp;
     unsigned long flags, orig_ret_address = 0;
     unsigned long trampoline_address = (unsigned long)&kretprobe_trampoline;
     kprobe_opcode_t *correct_ret_addr = NULL;
    
     INIT_HLIST_HEAD(&empty_rp);
     kretprobe_hash_lock(current, &head, &flags);
     /* fixup registers */
    #ifdef CONFIG_X86_64
     regs->cs = __KERNEL_CS;
    #else
     regs->cs = __KERNEL_CS | get_kernel_rpl();
     regs->gs = 0;
    #endif
     regs->ip = trampoline_address;
     regs->orig_ax = ~0UL;
    
     /*
      * It is possible to have multiple instances associated with a given
      * task either because multiple functions in the call path have
      * return probes installed on them, and/or more than one
      * return probe was registered for a target function.
      *
      * We can handle this because:
      *     - instances are always pushed into the head of the list
      *     - when multiple return probes are registered for the same
      *  function, the (chronologically) first instance's ret_addr
      *  will be the real return address, and all the rest will
      *  point to kretprobe_trampoline.
      */
     hlist_for_each_entry_safe(ri, tmp, head, hlist) {
      if (ri->task != current)
       /* another task is sharing our hash bucket */
       continue;
    
      orig_ret_address = (unsigned long)ri->ret_addr;
    
      if (orig_ret_address != trampoline_address)
       /*
        * This is the real return address. Any other
        * instances associated with this task are for
        * other calls deeper on the call stack
        */
       break;
     }
    
     kretprobe_assert(ri, orig_ret_address, trampoline_address);
    
     correct_ret_addr = ri->ret_addr;
     hlist_for_each_entry_safe(ri, tmp, head, hlist) {
      if (ri->task != current)
       /* another task is sharing our hash bucket */
       continue;
    
      orig_ret_address = (unsigned long)ri->ret_addr;
      if (ri->rp && ri->rp->handler) {
       __this_cpu_write(current_kprobe, &ri->rp->kp);
       get_kprobe_ctlblk()->kprobe_status = KPROBE_HIT_ACTIVE;
       ri->ret_addr = correct_ret_addr;
       ri->rp->handler(ri, regs);
       __this_cpu_write(current_kprobe, NULL);
      }
    
      recycle_rp_inst(ri, &empty_rp);
    
      if (orig_ret_address != trampoline_address)
       /*
        * This is the real return address. Any other
        * instances associated with this task are for
        * other calls deeper on the call stack
        */
       break;
     }
    
     kretprobe_hash_unlock(current, &flags);
    
     hlist_for_each_entry_safe(ri, tmp, &empty_rp, hlist) {
      hlist_del(&ri->hlist);
      kfree(ri);
     }
     return (void *)orig_ret_address;
    }
    NOKPROBE_SYMBOL(trampoline_handler);
    

    (4) ri->rp->handler(ri, regs)表示執(zhí)行用戶態(tài)自定義的回調(diào)函數(shù)handler(用來獲取_do_fork函數(shù)的返回值),handler回調(diào)函數(shù)執(zhí)行完畢以后,調(diào)用recycle_rp_inst函數(shù)將當(dāng)前的kretprobe_instance實例從kretprobe_inst_table哈希表釋放,重新鏈入free_instances中,以備后面kretprobe觸發(fā)時使用,另外如果kretprobe已經(jīng)被注銷則將它添加到銷毀表中待銷毀。

    ri->rp->handler(ri, regs);
     ->recycle_rp_inst(ri, &empty_rp);
    12
    void recycle_rp_inst(struct kretprobe_instance *ri,
           struct hlist_head *head)
    {
     struct kretprobe *rp = ri->rp;
    
     /* remove rp inst off the rprobe_inst_table */
     hlist_del(&ri->hlist);
     INIT_HLIST_NODE(&ri->hlist);
     if (likely(rp)) {
      raw_spin_lock(&rp->lock);
      hlist_add_head(&ri->hlist, &rp->free_instances);
      raw_spin_unlock(&rp->lock);
     } else
      /* Unregistering */
      hlist_add_head(&ri->hlist, head);
    }
    NOKPROBE_SYMBOL(recycle_rp_inst);
    

    (5) trampoline_handler函數(shù)執(zhí)行完后,返回被探測函數(shù)的原始返回地址,執(zhí)行流程再次回到kretprobe_trampoline函數(shù)中,將保存的 sp 替換為真實的返回地址。 從rax寄存器中取出原始的返回地址,然后恢復(fù)原始函數(shù)調(diào)用棧空間,最后跳轉(zhuǎn)到原始返回地址執(zhí)行,至此函數(shù)調(diào)用的流程就回歸正常流程了,整個kretprobe探測結(jié)束。

    /* Replace saved sp with true return address. */
     " movq %rax, 152(%rsp)\n"
     RESTORE_REGS_STRING
     " popfq\n"
    1234
    

    ARM64架構(gòu)

    (1)

    kretprobe_trampoline 
     -->trampoline_probe_handler
    kretprobe_trampoline 
    

    (2) kretprobe_trampoline

    // arch/arm64/kernel/probes/kprobes_trampoline.S
    
    ENTRY(kretprobe_trampoline)
     sub sp, sp, #S_FRAME_SIZE
    
     save_all_base_regs
    
     mov x0, sp
     bl trampoline_probe_handler
     /*
      * Replace trampoline address in lr with actual orig_ret_addr return
      * address.
      */
     mov lr, x0
    
     restore_all_base_regs
    
     add sp, sp, #S_FRAME_SIZE
     ret
    
    ENDPROC(kretprobe_trampoline)
    

    (3) trampoline_probe_handler

    // arch/arm64/kernel/probes/kprobes.c
    
    void __kprobes __used *trampoline_probe_handler(struct pt_regs *regs)
    {
     struct kretprobe_instance *ri = NULL;
     struct hlist_head *head, empty_rp;
     struct hlist_node *tmp;
     unsigned long flags, orig_ret_address = 0;
     unsigned long trampoline_address =
      (unsigned long)&kretprobe_trampoline;
     kprobe_opcode_t *correct_ret_addr = NULL;
    
     INIT_HLIST_HEAD(&empty_rp);
     kretprobe_hash_lock(current, &head, &flags);
    
     /*
      * It is possible to have multiple instances associated with a given
      * task either because multiple functions in the call path have
      * return probes installed on them, and/or more than one
      * return probe was registered for a target function.
      *
      * We can handle this because:
      *     - instances are always pushed into the head of the list
      *     - when multiple return probes are registered for the same
      *  function, the (chronologically) first instance's ret_addr
      *  will be the real return address, and all the rest will
      *  point to kretprobe_trampoline.
      */
     hlist_for_each_entry_safe(ri, tmp, head, hlist) {
      if (ri->task != current)
       /* another task is sharing our hash bucket */
       continue;
    
      orig_ret_address = (unsigned long)ri->ret_addr;
    
      if (orig_ret_address != trampoline_address)
       /*
        * This is the real return address. Any other
        * instances associated with this task are for
        * other calls deeper on the call stack
        */
       break;
     }
    
     kretprobe_assert(ri, orig_ret_address, trampoline_address);
    
     correct_ret_addr = ri->ret_addr;
     hlist_for_each_entry_safe(ri, tmp, head, hlist) {
      if (ri->task != current)
       /* another task is sharing our hash bucket */
       continue;
    
      orig_ret_address = (unsigned long)ri->ret_addr;
      if (ri->rp && ri->rp->handler) {
       __this_cpu_write(current_kprobe, &ri->rp->kp);
       get_kprobe_ctlblk()->kprobe_status = KPROBE_HIT_ACTIVE;
       ri->ret_addr = correct_ret_addr;
       ri->rp->handler(ri, regs);
       __this_cpu_write(current_kprobe, NULL);
      }
    
      recycle_rp_inst(ri, &empty_rp);
    
      if (orig_ret_address != trampoline_address)
       /*
        * This is the real return address. Any other
        * instances associated with this task are for
        * other calls deeper on the call stack
        */
       break;
     }
    
     kretprobe_hash_unlock(current, &flags);
    
     hlist_for_each_entry_safe(ri, tmp, &empty_rp, hlist) {
      hlist_del(&ri->hlist);
      kfree(ri);
     }
     return (void *)orig_ret_address;
    }
    

    (4) 將 lr寄存器中的trampoline地址替換為實際的 orig_ret_addr 返回地址。 從x0寄存器中取出原始的返回地址,然后恢復(fù)原始函數(shù)調(diào)用??臻g,最后跳轉(zhuǎn)到原始返回地址執(zhí)行,至此函數(shù)調(diào)用的流程就回歸正常流程了,整個kretprobe探測結(jié)束。

    /*
      * Replace trampoline address in lr with actual orig_ret_addr return
      * address.
      */
     mov lr, x0
    
     restore_all_base_regs
    
     add sp, sp, #S_FRAME_SIZE
     ret
    

    編譯運(yùn)行

    insmod kprobe_example.ko
    vim testfile
    rmmod kprobe_example.ko
    dmesg
    

    成功打印出函數(shù)的執(zhí)行時間

    [ 1056.875938] do_sys_open returned -2 and took 10500 ns to execute
    [ 1057.567400] do_sys_open returned 34 and took 59208 ns to execute
    [ 1058.382932] do_sys_open returned 3 and took 31469101 ns to execute
    [ 1058.567046] do_sys_open returned 34 and took 61250 ns to execute
    [ 1058.975879] do_sys_open returned 3 and took 224084 ns to execute
    [ 1058.975935] do_sys_open returned 3 and took 16917 ns to execute
    [ 1058.976041] do_sys_open returned 3 and took 13417 ns to execute
    [ 1058.976148] do_sys_open returned 3 and took 15167 ns to execute
    [ 1058.976254] do_sys_open returned 3 and took 15750 ns to execute
    [ 1058.976356] do_sys_open returned 3 and took 16042 ns to execute
    [ 1058.978036] do_sys_open returned -2 and took 23041 ns to execute
    [ 1058.978074] do_sys_open returned 3 and took 24500 ns to execute
    [ 1058.978175] do_sys_open returned -2 and took 9334 ns to execute
    [ 1058.978211] do_sys_open returned 3 and took 23333 ns to execute
    [ 1058.978246] do_sys_open returned 3 and took 13417 ns to execute
    [ 1058.978286] do_sys_open returned 3 and took 14583 ns to execute
    [ 1058.989701] kretprobe at ffffff80081ed6c8 unregistered
    [ 1058.989709] Missed probing 0 instances of do_sys_open
    

    Kprobe-based Event Tracing

    這些事件類似于基于tracepoint的事件。與Tracepoint不同,它是基于kprobes(kprobe和kretprobe)的。所以它可以探測任何kprobes可以探測的地方。與基于Tracepoint的事件不同的是,它可以動態(tài)地添加和刪除。

    要啟用這個功能,在編譯內(nèi)核時CONFIG_KPROBE_EVENTS=y

    Event Tracing類似,這不需要通過current_tracer來激活??梢酝ㄟ^/sys/kernel/debug/tracing/kprobe_events添加探測點,并通過/sys/kernel/debug/tracing/events/kprobes/<EVENT>/enable來啟用它。

    你也可以使用/sys/kernel/debug/tracing/dynamic_events,而不是kprobe_events。該接口也將提供對其他動態(tài)事件的統(tǒng)一訪問。

    Synopsis of kprobe_events

    kprobe和內(nèi)核的ftrac結(jié)合使用,需要對內(nèi)核進(jìn)行配置,然后添加探測點、進(jìn)行探測、查看結(jié)果。

    kprobe配置

    CONFIG_KPROBES=y
    CONFIG_OPTPROBES=y
    CONFIG_KPROBES_ON_FTRACE=y
    CONFIG_UPROBES=y
    CONFIG_KRETPROBES=y
    CONFIG_HAVE_KPROBES=y
    CONFIG_HAVE_KRETPROBES=y
    CONFIG_HAVE_OPTPROBES=y
    CONFIG_HAVE_KPROBES_ON_FTRACE=y
    CONFIG_KPROBE_EVENT=y
    

    kprobe trace events使用

    kprobe事件相關(guān)的節(jié)點有如下:

    /sys/kernel/debug/tracing/kprobe_events-----------------------配置kprobe事件屬性,增加事件之后會在kprobes下面生成對應(yīng)目錄。
    /sys/kernel/debug/tracing/kprobe_profile----------------------kprobe事件統(tǒng)計屬性文件。
    /sys/kernel/debug/tracing/kprobes/<GRP>/<EVENT>/enabled-------使能kprobe事件
    /sys/kernel/debug/tracing/kprobes/<GRP>/<EVENT>/filter--------過濾kprobe事件
    /sys/kernel/debug/tracing/kprobes/<GRP>/<EVENT>/format--------查詢kprobe事件顯示格式
    

    kprobe事件配置

    新增一個kprobe事件,通過寫kprobe_events來設(shè)置。

    p[:[GRP/]EVENT] [MOD:]SYM[+offs]|MEMADDR [FETCHARGS]-------------------設(shè)置一個probe探測點
    r[:[GRP/]EVENT] [MOD:]SYM[+0] [FETCHARGS]------------------------------設(shè)置一個return probe探測點
    -:[GRP/]EVENT----------------------------------------------------------刪除一個探測點
    

    細(xì)節(jié)解釋如下:

    GRP        : Group name. If omitted, use "kprobes" for it.------------設(shè)置后會在events/kprobes下創(chuàng)建<GRP>目錄。
     EVENT        : Event name. If omitted, the event name is generated based on SYM+offs or MEMADDR.---指定后在events/kprobes/<GRP>生成<EVENT>目錄。 MOD        : Module name which has given SYM.--------------------------模塊名,一般不設(shè)
     SYM[+offs]    : Symbol+offset where the probe is inserted.-------------被探測函數(shù)名和偏移
     MEMADDR    : Address where the probe is inserted.----------------------指定被探測的內(nèi)存絕對地址
     FETCHARGS    : Arguments. Each probe can have up to 128 args.----------指定要獲取的參數(shù)信息。 %REG        : Fetch register REG---------------------------------------獲取指定寄存器值
     @ADDR        : Fetch memory at ADDR (ADDR should be in kernel)--------獲取指定內(nèi)存地址的值
     @SYM[+|-offs]    : Fetch memory at SYM +|- offs (SYM should be a data symbol)---獲取全局變量的值 $stackN    : Fetch Nth entry of stack (N >= 0)----------------------------------獲取指定??臻g值,即sp寄存器+N后的位置值
     $stack    : Fetch stack address.-----------------------------------------------獲取sp寄存器值
     $retval    : Fetch return value.(*)--------------------------------------------獲取返回值,用戶return kprobe
     $comm        : Fetch current task comm.----------------------------------------獲取對應(yīng)進(jìn)程名稱。
     +|-offs(FETCHARG) : Fetch memory at FETCHARG +|- offs address.(**)------------- NAME=FETCHARG : Set NAME as the argument name of FETCHARG.
     FETCHARG:TYPE : Set TYPE as the type of FETCHARG. Currently, basic types (u8/u16/u32/u64/s8/s16/s32/s64), hexadecimal types
              (x8/x16/x32/x64), "string" and bitfield are supported.----------------設(shè)置參數(shù)的類型,可以支持字符串和比特類型
      (*) only for return probe.
      (**) this is useful for fetching a field of data structures.
    

    執(zhí)行如下兩條命令就會生成目錄/sys/kernel/debug/tracing/events/kprobes/myprobe;第三條命令則可以刪除指定kprobe事件,如果要全部刪除則echo > /sys/kernel/debug/tracing/kprobe_events

    echo 'p:myprobe do_sys_open dfd=%x0 filename=%x1 flags=%x2 mode=+4($stack)' > /sys/kernel/debug/tracing/kprobe_events
    echo 'r:myretprobe do_sys_open ret=$retval' >> /sys/kernel/debug/tracing/kprobe_events-----------------------------------------------------這里面一定要用">>",不然就會覆蓋前面的設(shè)置。
    
    echo '-:myprobe' >> /sys/kernel/debug/tracing/kprobe_eventsecho '-:myretprobe' >> /sys/kernel/debug/tracing/kprobe_events
    

    參數(shù)后面的寄存器是跟架構(gòu)相關(guān)的,%x0、%x1%x2表示第1/2/3個參數(shù),超出部分使用$stack來存儲參數(shù)。

    函數(shù)返回值保存在$retval

    kprobe使能

    對kprobe事件的是能通過往對應(yīng)事件的enable寫1開啟探測;寫0暫停探測。

    echo > /sys/kernel/debug/tracing/trace
    echo 'p:myprobe do_sys_open dfd=%x0 filename=%x1 flags=%x2 mode=+4($stack)' > /sys/kernel/debug/tracing/kprobe_events
    echo 'r:myretprobe do_sys_open ret=$retval' >> /sys/kernel/debug/tracing/kprobe_events
    
    echo 1 > /sys/kernel/debug/tracing/events/kprobes/myprobe/enable
    echo 1 > /sys/kernel/debug/tracing/events/kprobes/myretprobe/enable
    ls
    echo 0 > /sys/kernel/debug/tracing/events/kprobes/myprobe/enable
    echo 0 > /sys/kernel/debug/tracing/events/kprobes/myretprobe/enable
    
    cat /sys/kernel/debug/tracing/trace
    

    然后在/sys/kernel/debug/tracing/trace中可以看到結(jié)果。

    總結(jié)

    附錄

    ARM32,ARM64,X86寄存器及訪問方式

    ARM32

    "r0", pt_regs->r0
    "r1", pt_regs->r1
    "r2", pt_regs->r2
    "r3", pt_regs->r3
    "r4", pt_regs->r4
    "r5", pt_regs->r5
    "r6", pt_regs->r6
    "r7", pt_regs->r7
    "r8", pt_regs->r8
    "r9", pt_regs->r9
    "r10",pt_regs->r10
    "fp", pt_regs->fp
    "ip", pt_regs->ip
    "sp", pt_regs->sp
    "lr", pt_regs->lr
    "pc", pt_regs->pc
    

    ARM64

    "x0", pt_regs->regs[0]
    "x1", pt_regs->regs[1]
    "x2", pt_regs->regs[2]
    "x3", pt_regs->regs[3]
    "x4", pt_regs->regs[4]
    "x5", pt_regs->regs[5]
    "x6", pt_regs->regs[6]
    "x7", pt_regs->regs[7]
    "x8", pt_regs->regs[8]
    "x9", pt_regs->regs[9]
    "x10", pt_regs->regs[10]
    "x11", pt_regs->regs[11]
    "x12", pt_regs->regs[12]
    "x13", pt_regs->regs[13]
    "x14", pt_regs->regs[14]
    "x15", pt_regs->regs[15]
    "x16", pt_regs->regs[16]
    "x17", pt_regs->regs[17]
    "x18", pt_regs->regs[18]
    "x19", pt_regs->regs[19]
    "x20", pt_regs->regs[20]
    "x21", pt_regs->regs[21]
    "x22", pt_regs->regs[22]
    "x23", pt_regs->regs[23]
    "x24", pt_regs->regs[24]
    "x25", pt_regs->regs[25]
    "x26", pt_regs->regs[26]
    "x27", pt_regs->regs[27]
    "x28", pt_regs->regs[28]
    "x29", pt_regs->regs[29]
    "x30", pt_regs->regs[30]
    "sp",  pt_regs->sp
    "pc",  pt_regs->pc
    "pstate",pt_regs->pstate
    

    X86

    rax     pt_regs->ax 
    rcx     pt_regs->cx 
    rdx     pt_regs->cx 
    rbx     pt_regs->bx 
    rsp     pt_regs->sp 
    rbp     pt_regs->bp 
    rdi     pt_regs->di 
    rsi     pt_regs->si 
    r8      pt_regs->r8 
    r9      pt_regs->r9 
    r10     pt_regs->r10 
    r11     pt_regs->r11 
    r12     pt_regs->r12 
    r13     pt_regs->r13 
    r14     pt_regs->r14 
    r15     pt_regs->r15 
    

    本文參考

    https://blog.csdn.net/jakelylll/article/details/123667320

    https://www.cnblogs.com/LiuYanYGZ/p/12643846.html

    https://blog.csdn.net/weixin_45030965/article/details/125922528

    https://www.cnblogs.com/LiuYanYGZ/p/12643846.html

    https://blog.csdn.net/jasonactions/article/details/121065795

    https://blog.csdn.net/mrpre/article/details/106801888

    https://blog.csdn.net/u011622208/article/details/115535291

    kprobe https://blog.csdn.net/WANGYONGZIXUE/article/details/127525367

    https://www.kernel.org/doc/html/latest/trace/kprobetrace.html#kprobe-based-event-tracing

    https://www.cnblogs.com/arnoldlu/p/9752061.html

網(wǎng)站首頁   |    關(guān)于我們   |    公司新聞   |    產(chǎn)品方案   |    用戶案例   |    售后服務(wù)   |    合作伙伴   |    人才招聘   |   

友情鏈接: 餐飲加盟

地址:北京市海淀區(qū)    電話:010-     郵箱:@126.com

備案號:冀ICP備2024067069號-3 北京科技有限公司版權(quán)所有