Thursday, February 20, 2020

[Tracing] Dynamic tracing in Linux using uprobe and kprobe

This post is based on the following articles to trace again by myself. Through this process, I can get a deeper impression of the user-space tracing using uprobe.
https://opensource.com/article/17/7/dynamic-tracing-linux-user-and-kernel-space
http://www.brendangregg.com/perf.html


A Simple Test Code in C:
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>

static int func_1_cnt;
static int func_2_cnt;

static void func_1(void)
{
    func_1_cnt++;
}

static void func_2(void)
{
    func_2_cnt++;
}

int main(int argc, void **argv)
{
    int number;

    while(1) {
        sleep(1);
        number = rand() % 10;
        if (number < 5)
            func_2();
        else
            func_1();
    }
}
Compile the code and find the instruction addresses you want to probe:
> gcc -o test test.c
> objdump -Sd test

Let's say we have the following object code on the X86_64 platform:
...
...
000000000000068a <func_1>:

static int func_1_cnt;
static int func_2_cnt;

static void func_1(void)
{
 68a:   55                      push   %rbp
 68b:   48 89 e5                mov    %rsp,%rbp
        func_1_cnt++;
 68e:   8b 05 80 09 20 00       mov    0x200980(%rip),%eax        # 201014 <func_1_cnt>
 694:   83 c0 01                add    $0x1,%eax
 697:   89 05 77 09 20 00       mov    %eax,0x200977(%rip)        # 201014 <func_1_cnt>
}
 69d:   90                      nop
 69e:   5d                      pop    %rbp
 69f:   c3                      retq

00000000000006a0 <func_2>:

static void func_2(void)
{
 6a0:   55                      push   %rbp
 6a1:   48 89 e5                mov    %rsp,%rbp
        func_2_cnt++;
 6a4:   8b 05 6e 09 20 00       mov    0x20096e(%rip),%eax        # 201018 <func_2_cnt>
 6aa:   83 c0 01                add    $0x1,%eax
 6ad:   89 05 65 09 20 00       mov    %eax,0x200965(%rip)        # 201018 <func_2_cnt>
}
 6b3:   90                      nop
 6b4:   5d                      pop    %rbp
 6b5:   c3                      retq

00000000000006b6 <main>:

int main(int argc, void **argv)
{
 6b6:   55                      push   %rbp
 6b7:   48 89 e5                mov    %rsp,%rbp
...
...

And we want to insert a probe at offset 0x6a0 and 0x68a.
Execute the following commands:
echo 0 > /sys/kernel/debug/tracing/events/uprobes/enable
echo 'p:func_2_entry ./test:0x6a0' >> /sys/kernel/debug/tracing/uprobe_events
echo 'p:func_1_entry ./test:0x68a' >> /sys/kernel/debug/tracing/uprobe_events
echo 'r:func_2_exit ./test:0x6a0' >> /sys/kernel/debug/tracing/uprobe_events
echo 'r:func_1_exit ./test:0x68a' >> /sys/kernel/debug/tracing/uprobe_events
echo 1 > /sys/kernel/debug/tracing/events/uprobes/enable
./test &

Read the trace file to see the output:
cat /sys/kernel/debug/tracing/trace
# tracer: nop
#
# entries-in-buffer/entries-written: 674/674   #P:16
#
#                              _-----=> irqs-off
#                             / _----=> need-resched
#                            | / _---=> hardirq/softirq
#                            || / _--=> preempt-depth
#                            ||| /     delay
#           TASK-PID   CPU#  ||||    TIMESTAMP  FUNCTION
#              | |       |   ||||       |         |
            test-21809 [005] d... 167090.473000: func_1_entry: (0x559f2a4f968a)
            test-21809 [005] d... 167090.473009: func_1_exit: (0x559f2a4f9713 <- 0x559f2a4f968a)
            test-21809 [005] d... 167091.473175: func_2_entry: (0x559f2a4f96a0)
            test-21809 [005] d... 167091.473184: func_2_exit: (0x559f2a4f970c <- 0x559f2a4f96a0)
            test-21809 [005] d... 167092.473350: func_1_entry: (0x559f2a4f968a)
            test-21809 [005] d... 167092.473359: func_1_exit: (0x559f2a4f9713 <- 0x559f2a4f968a)
            test-21809 [005] d... 167093.473518: func_1_entry: (0x559f2a4f968a)
            test-21809 [005] d... 167093.473526: func_1_exit: (0x559f2a4f9713 <- 0x559f2a4f968a)
            test-21809 [005] d... 167094.473690: func_1_entry: (0x559f2a4f968a)
            test-21809 [005] d... 167094.473699: func_1_exit: (0x559f2a4f9713 <- 0x559f2a4f968a)

Here we are printing the value of X86_64 cx register as count= when the instruction at offset 0x6a0 is executed.
echo 0 > /sys/kernel/debug/tracing/events/uprobes/enable
echo 'p:func_2_entry ./test:0x6a0' > /sys/kernel/debug/tracing/uprobe_events count=%cx
echo 1 > /sys/kernel/debug/tracing/events/uprobes/enable
echo > /sys/kernel/debug/tracing/trace
./test &

Read the trace file to see the output:
cat /sys/kernel/debug/tracing/trace
# tracer: nop
#
# entries-in-buffer/entries-written: 4/4   #P:16
#
#                              _-----=> irqs-off
#                             / _----=> need-resched
#                            | / _---=> hardirq/softirq
#                            || / _--=> preempt-depth
#                            ||| /     delay
#           TASK-PID   CPU#  ||||    TIMESTAMP  FUNCTION
#              | |       |   ||||       |         |
            test-21902 [001] d... 167919.012000: func_2_entry: (0x56219cb6c6a0) count=0x3
            test-21902 [001] d... 167923.012641: func_2_entry: (0x56219cb6c6a0) count=0x3
            test-21902 [001] d... 167926.013075: func_2_entry: (0x56219cb6c6a0) count=0x2
            test-21902 [001] d... 167928.013390: func_2_entry: (0x56219cb6c6a0) count=0x1

We can see the uprobe events by checking this file descriptor
#kprobe_event is for kernel probe
#cat /sys/kernel/debug/tracing/uprobe_events
cat /sys/kernel/debug/tracing/uprobe_events
p:uprobes/func_2_entry ./test:0x00000000000006a0 count=%cx

We can delete the uprobe event as follows:
echo 0 > /sys/kernel/debug/tracing/events/uprobes/enable
echo '-:func_2_entry ./test:0x6a0' >> /sys/kernel/debug/tracing/uprobe_events

And of course, we also can use Perf to trace the user-space application using uprobe. Here is an example of usage:
gcc -g -o test test.c
perf probe -x ./test func_2_entry=func_2
perf probe -x ./test func_2_exit=func_2%return
perf probe -x ./test test_15=test.c:15
perf probe -x ./test test_25=test.c:25 number
perf probe --list
  probe_test:func_2_entry (on func_2@test.c in /home/test/SourceCode/dynamic-tracing-linux-user-and-kernel-space/test)
  probe_test:test_15   (on func_2:2@test.c in /home/test/SourceCode/dynamic-tracing-linux-user-and-kernel-space/test)
  probe_test:test_25   (on main:7@test.c in /home/test/SourceCode/dynamic-tracing-linux-user-and-kernel-space/test with number)
  probe_test:func_2_exit__return (on func_2%return@test.c in /home/test/SourceCode/dynamic-tracing-linux-user-and-kernel-space/test)

perf record -e probe_test:func_2_entry \
            -e probe_test:func_2_exit__return \
            -e probe_test:test_15 \
            -e probe_test:test_25 \
            ./test

perf script
            test 21982 [011] 168688.905746:             probe_test:test_25: (56430f126701) number=3
            test 21982 [011] 168688.905784:        probe_test:func_2_entry: (56430f1266a0)
            test 21982 [011] 168688.905789:             probe_test:test_15: (56430f1266a4)
            test 21982 [011] 168688.905794: probe_test:func_2_exit__return: (56430f1266a0 <- 56430f12670c)
            test 21982 [011] 168689.905956:             probe_test:test_25: (56430f126701) number=6
            test 21982 [011] 168690.906129:             probe_test:test_25: (56430f126701) number=7
            test 21982 [011] 168691.906294:             probe_test:test_25: (56430f126701) number=5
            test 21982 [011] 168692.906474:             probe_test:test_25: (56430f126701) number=3
            test 21982 [011] 168692.906486:        probe_test:func_2_entry: (56430f1266a0)
            test 21982 [011] 168692.906491:             probe_test:test_15: (56430f1266a4)
            test 21982 [011] 168692.906496: probe_test:func_2_exit__return: (56430f1266a0 <- 56430f12670c)
            test 21982 [011] 168693.906646:             probe_test:test_25: (56430f126701) number=5
            test 21982 [011] 168694.906821:             probe_test:test_25: (56430f126701) number=6
            test 21982 [011] 168695.907002:             probe_test:test_25: (56430f126701) number=2
            test 21982 [011] 168695.907014:        probe_test:func_2_entry: (56430f1266a0)
            test 21982 [011] 168695.907019:             probe_test:test_15: (56430f1266a4)
            test 21982 [011] 168695.907025: probe_test:func_2_exit__return: (56430f1266a0 <- 56430f12670c)
            test 21982 [011] 168696.907201:             probe_test:test_25: (56430f126701) number=9
            test 21982 [011] 168697.907371:             probe_test:test_25: (56430f126701) number=1
            test 21982 [011] 168697.907383:        probe_test:func_2_entry: (56430f1266a0)
            test 21982 [011] 168697.907388:             probe_test:test_15: (56430f1266a4)
            test 21982 [011] 168697.907393: probe_test:func_2_exit__return: (56430f1266a0 <- 56430f12670c)
            test 21982 [011] 168698.907566:             probe_test:test_25: (56430f126701) number=2
            test 21982 [011] 168698.907578:        probe_test:func_2_entry: (56430f1266a0)
            test 21982 [011] 168698.907583:             probe_test:test_15: (56430f1266a4)
            test 21982 [011] 168698.907588: probe_test:func_2_exit__return: (56430f1266a0 <- 56430f12670c)
            test 21982 [011] 168699.907722:             probe_test:test_25: (56430f126701) number=7
            test 21982 [011] 168700.907895:             probe_test:test_25: (56430f126701) number=0
            test 21982 [011] 168700.907907:        probe_test:func_2_entry: (56430f1266a0)
            test 21982 [011] 168700.907912:             probe_test:test_15: (56430f1266a4)
            test 21982 [011] 168700.907917: probe_test:func_2_exit__return: (56430f1266a0 <- 56430f12670c)
            test 21982 [011] 168701.908092:             probe_test:test_25: (56430f126701) number=9
            test 21982 [011] 168702.908283:             probe_test:test_25: (56430f126701) number=3
            test 21982 [011] 168702.908295:        probe_test:func_2_entry: (56430f1266a0)
            test 21982 [011] 168702.908300:             probe_test:test_15: (56430f1266a4)
            test 21982 [011] 168702.908305: probe_test:func_2_exit__return: (56430f1266a0 <- 56430f12670c)
...
...

If we want to see the call stack in the record, we need to add "-aR" in perf record command as follows:
# Record for 10 secs
perf probe -x ./test 'func_1'
perf probe --list
perf record -e probe_test:func_1 -aR -g sleep 10
perf script
test 22055 [009] 169544.506773: probe_test:func_1: (55a2d746e68a)
            55a2d746e68a func_1+0x0 (/home/test/SourceCode/dynamic-tracing-linux-user-and-kernel-space/test)
            7fa4b9f9ab97 __libc_start_main+0xe7 (/lib/x86_64-linux-gnu/libc-2.27.so)
         67e258d4c544155 [unknown] ([unknown])

test 22055 [009] 169545.506915: probe_test:func_1: (55a2d746e68a)
            55a2d746e68a func_1+0x0 (/home/test/SourceCode/dynamic-tracing-linux-user-and-kernel-space/test)
            7fa4b9f9ab97 __libc_start_main+0xe7 (/lib/x86_64-linux-gnu/libc-2.27.so)
         67e258d4c544155 [unknown] ([unknown])

test 22055 [009] 169546.507086: probe_test:func_1: (55a2d746e68a)
            55a2d746e68a func_1+0x0 (/home/test/SourceCode/dynamic-tracing-linux-user-and-kernel-space/test)
            7fa4b9f9ab97 __libc_start_main+0xe7 (/lib/x86_64-linux-gnu/libc-2.27.so)
         67e258d4c544155 [unknown] ([unknown])

test 22055 [009] 169547.507206: probe_test:func_1: (55a2d746e68a)
            55a2d746e68a func_1+0x0 (/home/test/SourceCode/dynamic-tracing-linux-user-and-kernel-space/test)
            7fa4b9f9ab97 __libc_start_main+0xe7 (/lib/x86_64-linux-gnu/libc-2.27.so)
         67e258d4c544155 [unknown] ([unknown])

test 22055 [009] 169548.507358: probe_test:func_1: (55a2d746e68a)
            55a2d746e68a func_1+0x0 (/home/test/SourceCode/dynamic-tracing-linux-user-and-kernel-space/test)
            7fa4b9f9ab97 __libc_start_main+0xe7 (/lib/x86_64-linux-gnu/libc-2.27.so)
         67e258d4c544155 [unknown] ([unknown])

test 22055 [009] 169550.507651: probe_test:func_1: (55a2d746e68a)
            55a2d746e68a func_1+0x0 (/home/test/SourceCode/dynamic-tracing-linux-user-and-kernel-space/test)
            7fa4b9f9ab97 __libc_start_main+0xe7 (/lib/x86_64-linux-gnu/libc-2.27.so)
         67e258d4c544155 [unknown] ([unknown])

test 22055 [009] 169551.507814: probe_test:func_1: (55a2d746e68a)
            55a2d746e68a func_1+0x0 (/home/test/SourceCode/dynamic-tracing-linux-user-and-kernel-space/test)
            7fa4b9f9ab97 __libc_start_main+0xe7 (/lib/x86_64-linux-gnu/libc-2.27.so)
         67e258d4c544155 [unknown] ([unknown])

Then, we should delete all the perf event
perf probe -d \*
Removed event: probe_test:func_2_entry
Removed event: probe_test:func_2_exit__return
Removed event: probe_test:test_15
Removed event: probe_test:test_25

For dynamic tracing Kernel code, such as TCP v4 network stack, we can do this:
http://www.brendangregg.com/blog/2018-03-22/tcp-tracepoints.html
perf probe 'tcp_v4_*'
tcp_v4_init is out of .text, skip it.
Added new events:
  probe:tcp_v4_init_seq (on tcp_v4_*)
  probe:tcp_v4_init_ts_off (on tcp_v4_*)
  probe:tcp_v4_reqsk_destructor (on tcp_v4_*)
  probe:tcp_v4_restore_cb (on tcp_v4_*)
  probe:tcp_v4_fill_cb (on tcp_v4_*)
  probe:tcp_v4_md5_hash_headers (on tcp_v4_*)
  probe:tcp_v4_md5_hash_skb (on tcp_v4_*)
  probe:tcp_v4_md5_hash_hdr (on tcp_v4_*)
  probe:tcp_v4_route_req (on tcp_v4_*)
  probe:tcp_v4_init_req (on tcp_v4_*)
  probe:tcp_v4_destroy_sock (on tcp_v4_*)
  probe:tcp_v4_init_sock (on tcp_v4_*)
  probe:tcp_v4_parse_md5_keys (on tcp_v4_*)
  probe:tcp_v4_conn_request (on tcp_v4_*)
  probe:tcp_v4_pre_connect (on tcp_v4_*)
  probe:tcp_v4_connect (on tcp_v4_*)
  probe:tcp_v4_mtu_reduced (on tcp_v4_*)
  probe:tcp_v4_mtu_reduced_1 (on tcp_v4_*)
  probe:tcp_v4_send_ack (on tcp_v4_*)
  probe:tcp_v4_md5_lookup (on tcp_v4_*)
  probe:tcp_v4_reqsk_send_ack (on tcp_v4_*)
  probe:tcp_v4_inbound_md5_hash (on tcp_v4_*)
  probe:tcp_v4_syn_recv_sock (on tcp_v4_*)
  probe:tcp_v4_send_reset (on tcp_v4_*)
  probe:tcp_v4_do_rcv  (on tcp_v4_*)
  probe:tcp_v4_err     (on tcp_v4_*)
  probe:tcp_v4_send_check (on tcp_v4_*)
  probe:tcp_v4_send_synack (on tcp_v4_*)
  probe:tcp_v4_early_demux (on tcp_v4_*)
  probe:tcp_v4_rcv     (on tcp_v4_*)
  (null):(null)        (on tcp_v4_*)

cat /sys/kernel/debug/tracing/kprobe_events

p:probe/tcp_v4_init_seq _text+10064048
p:probe/tcp_v4_init_ts_off _text+10064112
p:probe/tcp_v4_reqsk_destructor _text+10064160
p:probe/tcp_v4_restore_cb _text+10064688
p:probe/tcp_v4_fill_cb _text+10064736
p:probe/tcp_v4_md5_hash_headers _text+10065024
p:probe/tcp_v4_md5_hash_skb _text+10065248
p:probe/tcp_v4_md5_hash_hdr _text+10065584
p:probe/tcp_v4_route_req _text+10065840
p:probe/tcp_v4_init_req _text+10065856
p:probe/tcp_v4_destroy_sock _text+10066144
p:probe/tcp_v4_init_sock _text+10067760
p:probe/tcp_v4_parse_md5_keys _text+10068832
p:probe/tcp_v4_conn_request _text+10069072
p:probe/tcp_v4_pre_connect _text+10070528
p:probe/tcp_v4_connect _text+10071568
p:probe/tcp_v4_mtu_reduced _text+10072832
p:probe/tcp_v4_mtu_reduced_1 _text+10073040
p:probe/tcp_v4_send_ack _text+10073072
p:probe/tcp_v4_md5_lookup _text+10073904
p:probe/tcp_v4_reqsk_send_ack _text+10073936
p:probe/tcp_v4_inbound_md5_hash _text+10074288
p:probe/tcp_v4_syn_recv_sock _text+10075920
p:probe/tcp_v4_send_reset _text+10076800
p:probe/tcp_v4_do_rcv _text+10078464
p:probe/tcp_v4_err _text+10079264
p:probe/tcp_v4_send_check _text+10080832
p:probe/tcp_v4_send_synack _text+10080864
p:probe/tcp_v4_early_demux _text+10081072
p:probe/tcp_v4_rcv _text+10081424
Use perf tool to start dynamic tracing kernel from my tracepoints.
perf record \
 -e probe:tcp_v4_conn_request \
 -e probe:tcp_v4_connect \
 -e probe:tcp_v4_destroy_sock \
 -e probe:tcp_v4_do_rcv \
 -e probe:tcp_v4_early_demux \
 -e probe:tcp_v4_err \
 -e probe:tcp_v4_fill_cb \
 -e probe:tcp_v4_inbound_md5_hash \
 -e probe:tcp_v4_init_req \
 -e probe:tcp_v4_init_seq \
 -e probe:tcp_v4_init_sock \
 -e probe:tcp_v4_init_ts_off \
 -e probe:tcp_v4_md5_hash_hdr \
 -e probe:tcp_v4_md5_hash_headers \
 -e probe:tcp_v4_md5_hash_skb \
 -e probe:tcp_v4_md5_lookup \
 -e probe:tcp_v4_mtu_reduced \
 -e probe:tcp_v4_mtu_reduced_1 \
 -e probe:tcp_v4_parse_md5_keys \
 -e probe:tcp_v4_pre_connect \
 -e probe:tcp_v4_rcv \
 -e probe:tcp_v4_reqsk_destructor \
 -e probe:tcp_v4_reqsk_send_ack  \
 -e probe:tcp_v4_restore_cb \
 -e probe:tcp_v4_route_req \
 -e probe:tcp_v4_send_ack \
 -e probe:tcp_v4_send_check \
 -e probe:tcp_v4_send_reset \
 -e probe:tcp_v4_send_synack \
 -e probe:tcp_v4_syn_recv_sock \
 sleep 5

P.S: Perf tool can list the kernel's pre-defined tracepoint events, for instance:

$ perf list 'tcp:*'

List of pre-defined events (to be used in -e):

  tcp:tcp_destroy_sock                               [Tracepoint event]
  tcp:tcp_probe                                      [Tracepoint event]
  tcp:tcp_rcv_space_adjust                           [Tracepoint event]
  tcp:tcp_receive_reset                              [Tracepoint event]
  tcp:tcp_retransmit_skb                             [Tracepoint event]
  tcp:tcp_retransmit_synack                          [Tracepoint event]
  tcp:tcp_send_reset                                 [Tracepoint event]


P.S2: tracing tcp_recvmsg and tcp_sendmsg

perf record \
 -e probe:tcp_recvmsg \
 -e probe:tcp_sendmsg \

 -aR sleep 5





No comments: