offcputime no output
- environment os:
root@leixia:/usr/share/bcc/tools# uname -a
Linux leixia 4.13.0-38-generic #43-Ubuntu SMP Wed Mar 14 15:20:44 UTC 2018 x86_64 x86_64 x86_64 GNU/Linux
root@leixia:/usr/share/bcc/tools#
- Mysql is correct
root@leixia:/usr/share/bcc/tools# ps aux|grep mysql
root 6616 0.0 0.0 4592 1688 pts/4 S 13:42 0:00 /bin/sh ./bin/mysqld_safe --defaults-file=/usr/local/mysql/my.cnf
mysql 6789 0.0 2.2 1387340 183492 pts/4 Sl 13:42 0:04 /usr/local/mysql/bin/mysqld --defaults-file=/usr/local/mysql/my.cnf --basedir=/usr/local/mysql --datadir=/usr/local/mysql/data --plugin-dir=/usr/local/mysql/lib/plugin --user=mysql --log-error=leixia.err --pid-file=leixia.pid --port=3306
- Execute the command
./offcputime -df -p `pgrep -nx mysqld` 30 > out.stacks
but out.stacks is empty
Do you get any result from the MySQL Process if you don’t specifying the -p option?
yes output is correct
Could you compare the pid in the correct output (without -p) and pgrep -nx mysqld? This could help signal what is the issue.
Also can you run with --ebpf and post the full generated program? Thanks!
@palmtenor
root@leixia:/usr/share/bcc/tools# /usr/share/bcc/tools/offcputime -df -p `pgrep -nx mysqld` --ebpf
#include <uapi/linux/ptrace.h>
#include <linux/sched.h>
#define MINBLOCK_US 1ULL
#define MAXBLOCK_US 18446744073709551615ULL
struct key_t {
u32 pid;
u32 tgid;
int user_stack_id;
int kernel_stack_id;
char name[TASK_COMM_LEN];
};
BPF_HASH(counts, struct key_t);
BPF_HASH(start, u32);
BPF_STACK_TRACE(stack_traces, 1024);
int oncpu(struct pt_regs *ctx, struct task_struct *prev) {
u32 pid = prev->pid;
u32 tgid = prev->tgid;
u64 ts, *tsp;
// record previous thread sleep time
if ((tgid == 9199) && (1)) {
ts = bpf_ktime_get_ns();
start.update(&pid, &ts);
}
// get the current thread's start time
pid = bpf_get_current_pid_tgid();
tgid = bpf_get_current_pid_tgid() >> 32;
tsp = start.lookup(&pid);
if (tsp == 0) {
return 0; // missed start or filtered
}
// calculate current thread's delta time
u64 delta = bpf_ktime_get_ns() - *tsp;
start.delete(&pid);
delta = delta / 1000;
if ((delta < MINBLOCK_US) || (delta > MAXBLOCK_US)) {
return 0;
}
// create map key
u64 zero = 0, *val;
struct key_t key = {};
key.pid = pid;
key.tgid = tgid;
key.user_stack_id = stack_traces.get_stackid(ctx, BPF_F_USER_STACK);
key.kernel_stack_id = stack_traces.get_stackid(ctx, 0);
bpf_get_current_comm(&key.name, sizeof(key.name));
val = counts.lookup_or_init(&key, &zero);
(*val) += delta;
return 0;
}
@yonghong-song
root@leixia:/usr/share/bcc/tools# /usr/share/bcc/tools/offcputime -df 15 > out.stacks
root@leixia:/usr/share/bcc/tools# /usr/share/bcc/tools/offcputime -df -p `pgrep -nx mysqld` 15 > pid_out.stacks
-rwxr-xr-x 1 root root 2344 Apr 20 23:52 oomkill*
-rwxr-xr-x 1 root root 5402 Apr 20 23:52 opensnoop*
**`-rw-r--r-- 1 root root 939 Apr 25 10:42 out.stack**s`
-rwxr-xr-x 1 root root 58 Apr 21 09:38 phpcalls*
-rwxr-xr-x 1 root root 57 Apr 21 09:38 phpflow*
-rwxr-xr-x 1 root root 57 Apr 21 09:38 phpstat***
-rw-r--r-- 1 root root 0 Apr 25 10:42 pid_out.stac**ks
I don't know if it has anything to do with the environment. i use perf command is the same err too
root@leixia:/opt/test# perf record -e sched:sched_stat_sleep -e sched:sched_switch -e sched:sched_process_exit -p 9199 -g -o perf.data.raw sleep 1 [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.084 MB perf.data.raw (12 samples) ] *** Error in /usr/lib/linux-tools/4.13.0-38-generic/perf': munmap_chunk(): invalid pointer: 0x0000558e74b62d40 ***
Aborted (core dumped)
root@leixia:/opt/test# perf record -e sched:sched_stat_sleep -e sched:sched_switch -e sched:sched_process_exit -a -g -o perf.data.raw sleep 1
[ perf record: Woken up 1 times to write data ]
[ perf record: Captured and wrote 0.158 MB perf.data.raw (117 samples) ]`
@gabbylei Suppose pgrep -nx mysqld gives you a pid, say, 1000. Did you see pid 1000 in out.stacks?
@yonghong-song This method has been tried.The output is also empty.
Does this mean mysqld did not get hit with offcputime?
@yonghong-song Seems to be
My database server run in a docker container, and I meet the same problem (no data) when I run offcputime with the PID of my service in container. However I got data if I don't specify the -p arg. And I found it is OK when I run offcputime (in docker container) with the PID (got from sever outside of the container).