pidstat是一款很不错的针对linux pid状态监控的程序

The pidstat command is used for monitoring individual tasks currently being managed by the Linux kernel.
It writes to standard output activities for every task selected with option -p or for every task managed by the Linux kernel if option -p ALL has been used. Not selecting any tasks is equivalent to specifying -p ALL but only active tasks (tasks with non-zero statistics values) will appear in the report.

pidstat的具体用法参考这里,源码版本在这里下载

[root@db56 tmp]# tar -zxvf sysstat-10.0.5.tar.gz 
[root@db56 tmp]# cd sysstat-10.0.5
 [root@db56 sysstat-10.0.5]# ./configure 
.
Check programs:
.
checking for gcc... gcc
checking for C compiler default output file name... a.out
checking whether the C compiler works... yes
.....

config.status: creating contrib/isag/isag
config.status: creating Makefile

   Sysstat version:		10.0.5
   Installation prefix:		/usr/local
   rc directory:		/etc/rc.d
   Init directory:		/etc/rc.d/init.d
   Configuration directory:	/etc/sysconfig
   Man pages directory:		${datarootdir}/man
   Compiler:			gcc
   Compiler flags:		-g -O2

[root@db56 sysstat-10.0.5]# 
[root@db56 sysstat-10.0.5]# 
[root@db56 sysstat-10.0.5]# make -f  Makefile

————————————————-

eg:

[root@db56 sysstat-10.0.5]# ./pidstat -p 12990  2 5 
Linux 2.6.18-194.el5 (db56) 	02/28/2013 	_x86_64_	(12 CPU)

03:07:46 PM       PID    %usr %system  %guest    %CPU   CPU  Command
03:07:48 PM     12990    0.00    0.00    0.00    0.00     7  oracle
03:07:50 PM     12990    0.00    0.00    0.00    0.00     7  oracle
03:07:52 PM     12990    0.00    0.00    0.00    0.00     7  oracle
03:07:54 PM     12990    0.00    0.00    0.00    0.00     7  oracle
03:07:56 PM     12990    0.00    0.00    0.00    0.00     7  oracle
Average:        12990    0.00    0.00    0.00    0.00     -  oracle


[root@db56 sysstat-10.0.5]# pidstat -r -t -p 12990 1 2
Linux 2.6.18-194.el5 (db56) 	02/28/2013 	_x86_64_	(12 CPU)

04:06:28 PM      TGID       TID  minflt/s  majflt/s     VSZ    RSS   %MEM  Command
04:06:29 PM     12990         -      0.00      0.00 12733740  18184   0.06  oracle
04:06:29 PM         -     12990      0.00      0.00 12733740  18184   0.06  |__oracle

04:06:29 PM      TGID       TID  minflt/s  majflt/s     VSZ    RSS   %MEM  Command
04:06:30 PM     12990         -      0.00      0.00 12733740  18184   0.06  oracle
04:06:30 PM         -     12990      0.00      0.00 12733740  18184   0.06  |__oracle

Average:         TGID       TID  minflt/s  majflt/s     VSZ    RSS   %MEM  Command
Average:        12990         -      0.00      0.00 12733740  18184   0.06  oracle
Average:            -     12990      0.00      0.00 12733740  18184   0.06  |__oracle


strace pidstat -p 12990 :

open("/proc/uptime", O_RDONLY)          = 3
fstat(3, {st_mode=S_IFREG|0444, st_size=0, ...}) = 0
mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x2b3fb25f9000
read(3, "45884933.69 45647910.77\n", 4096) = 24
close(3)                                = 0
munmap(0x2b3fb25f9000, 4096)            = 0
open("/proc/stat", O_RDONLY)            = 3
fstat(3, {st_mode=S_IFREG|0444, st_size=0, ...}) = 0
mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x2b3fb25f9000
read(3, "cpu  340880517 704021 59278877 5"..., 4096) = 1513
read(3, "", 4096)                       = 0
close(3)                                = 0
munmap(0x2b3fb25f9000, 4096)            = 0
open("/proc/12990/stat", O_RDONLY)      = 3
fstat(3, {st_mode=S_IFREG|0444, st_size=0, ...}) = 0
mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x2b3fb25f9000
read(3, "12990 (oracle) S 1 12990 12990 0"..., 4096) = 231
read(3, "", 4096)                       = 0
close(3)                                = 0
munmap(0x2b3fb25f9000, 4096)            = 0
open("/proc/12990/status", O_RDONLY)    = 3
fstat(3, {st_mode=S_IFREG|0444, st_size=0, ...}) = 0
mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x2b3fb25f9000
read(3, "Name:\toracle\nState:\tS (sleeping)"..., 4096) = 779
read(3, "", 4096)                       = 0
close(3) 

主要从/proc/$pid/stat 获取信息:

 /proc/[pid]/stat
              Status information about the process.  This is used by ps(1).  It is
              defined in /usr/src/linux/fs/proc/array.c.

              The fields, in order, with their proper scanf(3) format specifiers,
              are:

              pid %d      The process ID.

              comm %s     The filename of the executable, in parentheses.  This is
                          visible whether or not the executable is swapped out.

              state %c    One character from the string "RSDZTW" where R is running,
                          S is sleeping in an interruptible wait, D is waiting in
                          uninterruptible disk sleep, Z is zombie, T is traced or
                          stopped (on a signal), and W is paging.

              ppid %d     The PID of the parent.

              pgrp %d     The process group ID of the process.

              session %d  The session ID of the process.

              tty_nr %d   The controlling terminal of the process.  (The minor device
                          number is contained in the combination of bits 31 to 20 and
                          7 to 0; the major device number is in bits 15 to 8.)

              tpgid %d    The ID of the foreground process group of the controlling
                          terminal of the process.

              flags %u (%lu before Linux 2.6.22)
                          The kernel flags word of the process.  For bit meanings,
                          see the PF_* defines in <linux/sched.h>.  Details depend on
                          the kernel version.

              minflt %lu  The number of minor faults the process has made which have
                          not required loading a memory page from disk.

              cminflt %lu The number of minor faults that the process's waited-for
                          children have made.

              majflt %lu  The number of major faults the process has made which have
                          required loading a memory page from disk.

              cmajflt %lu The number of major faults that the process's waited-for
                          children have made.

              utime %lu   Amount of time that this process has been scheduled in user
                          mode, measured in clock ticks (divide by
                          sysconf(_SC_CLK_TCK).  This includes guest time, guest_time
                          (time spent running a virtual CPU, see below), so that
                          applications that are not aware of the guest time field do
                          not lose that time from their calculations.

              stime %lu   Amount of time that this process has been scheduled in
                          kernel mode, measured in clock ticks (divide by
                          sysconf(_SC_CLK_TCK).

              cutime %ld  Amount of time that this process's waited-for children have
                          been scheduled in user mode, measured in clock ticks
                          (divide by sysconf(_SC_CLK_TCK).  (See also times(2).)
                          This includes guest time, cguest_time (time spent running a
                          virtual CPU, see below).

              cstime %ld  Amount of time that this process's waited-for children have
                          been scheduled in kernel mode, measured in clock ticks
                          (divide by sysconf(_SC_CLK_TCK).

              priority %ld
                          (Explanation for Linux 2.6) For processes running a real-
                          time scheduling policy (policy below; see
                          sched_setscheduler(2)), this is the negated scheduling
                          priority, minus one; that is, a number in the range -2 to
                          -100, corresponding to real-time priorities 1 to 99.  For
                          processes running under a non-real-time scheduling policy,
                          this is the raw nice value (setpriority(2)) as represented
                          in the kernel.  The kernel stores nice values as numbers in
                          the range 0 (high) to 39 (low), corresponding to the user-
                          visible nice range of -20 to 19.

                          Before Linux 2.6, this was a scaled value based on the
                          scheduler weighting given to this process.

              nice %ld    The nice value (see setpriority(2)), a value in the range
                          19 (low priority) to -20 (high priority).

              num_threads %ld
                          Number of threads in this process (since Linux 2.6).
                          Before kernel 2.6, this field was hard coded to 0 as a
                          placeholder for an earlier removed field.

              itrealvalue %ld
                          The time in jiffies before the next SIGALRM is sent to the
                          process due to an interval timer.  Since kernel 2.6.17,
                          this field is no longer maintained, and is hard coded as 0.

              starttime %llu (was %lu before Linux 2.6)
                          The time in jiffies the process started after system boot.

              vsize %lu   Virtual memory size in bytes.

              rss %ld     Resident Set Size: number of pages the process has in real
                          memory.  This is just the pages which count toward text,
                          data, or stack space.  This does not include pages which
                          have not been demand-loaded in, or which are swapped out.

              rsslim %lu  Current soft limit in bytes on the rss of the process; see
                          the description of RLIMIT_RSS in getpriority(2).

              startcode %lu
                          The address above which program text can run.

              endcode %lu The address below which program text can run.

              startstack %lu
                          The address of the start (i.e., bottom) of the stack.

              kstkesp %lu The current value of ESP (stack pointer), as found in the
                          kernel stack page for the process.

              kstkeip %lu The current EIP (instruction pointer).

              signal %lu  The bitmap of pending signals, displayed as a decimal
                          number.  Obsolete, because it does not provide information
                          on real-time signals; use /proc/[pid]/status instead.

              blocked %lu The bitmap of blocked signals, displayed as a decimal
                          number.  Obsolete, because it does not provide information
                          on real-time signals; use /proc/[pid]/status instead.

              sigignore %lu
                          The bitmap of ignored signals, displayed as a decimal
                          number.  Obsolete, because it does not provide information
                          on real-time signals; use /proc/[pid]/status instead.

              sigcatch %lu
                          The bitmap of caught signals, displayed as a decimal
                          number.  Obsolete, because it does not provide information
                          on real-time signals; use /proc/[pid]/status instead.

              wchan %lu   This is the "channel" in which the process is waiting.  It
                          is the address of a system call, and can be looked up in a
                          namelist if you need a textual name.  (If you have an up-
                          to-date /etc/psdatabase, then try ps -l to see the WCHAN
                          field in action.)

              nswap %lu   Number of pages swapped (not maintained).

              cnswap %lu  Cumulative nswap for child processes (not maintained).

              exit_signal %d (since Linux 2.1.22)
                          Signal to be sent to parent when we die.

              processor %d (since Linux 2.2.8)
                          CPU number last executed on.

              rt_priority %u (since Linux 2.5.19; was %lu before Linux 2.6.22)
                          Real-time scheduling priority, a number in the range 1 to
                          99 for processes scheduled under a real-time policy, or 0,
                          for non-real-time processes (see sched_setscheduler(2)).

              policy %u (since Linux 2.5.19; was %lu before Linux 2.6.22)
                          Scheduling policy (see sched_setscheduler(2)).  Decode
                          using the SCHED_* constants in linux/sched.h.

              delayacct_blkio_ticks %llu (since Linux 2.6.18)
                          Aggregated block I/O delays, measured in clock ticks
                          (centiseconds).

              guest_time %lu (since Linux 2.6.24)
                          Guest time of the process (time spent running a virtual CPU
                          for a guest operating system), measured in clock ticks
                          (divide by sysconf(_SC_CLK_TCK).

              cguest_time %ld (since Linux 2.6.24)
                          Guest time of the process's children, measured in clock
                          ticks (divide by sysconf(_SC_CLK_TCK).