Nginx – High io with php-fpm


Recently i have been having some issues with php-fpm and i notice that IO is very high for all php-fpm (99.9%) and that's what causing the cpu load to spike.

Top command

top - 06:28:53 up 8 days, 21:05,  2 users,  load average: 179.61, 82.23, 70.63
Tasks: 913 total,  11 running, 901 sleeping,   0 stopped,   1 zombie
Cpu(s):  9.7%us,  1.7%sy,  0.0%ni, 31.4%id, 56.0%wa,  0.0%hi,  1.1%si,  0.0%st
Mem:  16296824k total,  9676012k used,  6620812k free,   242004k buffers
Swap:  8159228k total,       16k used,  8159212k free,  6596628k cached

50283 nginx     20   0  439m  11m 6108 S 22.8  0.1   0:01.10 php-fpm
37536 nginx     20   0  440m  12m 6432 S  5.9  0.1   0:05.32 php-fpm
44007 nginx     20   0  439m  12m 6428 D  5.9  0.1   0:02.50 php-fpm
47827 nginx     20   0  442m  13m 6320 R  5.6  0.1   0:01.28 php-fpm
49263 nginx     20   0  439m  11m 6176 S  5.3  0.1   0:00.53 php-fpm
40709 nginx     20   0  440m  12m 6400 S  5.0  0.1   0:03.34 php-fpm
42967 nginx     20   0  439m  12m 6324 S  5.0  0.1   0:02.43 php-fpm
48703 nginx     20   0  439m  11m 5992 S  4.6  0.1   0:00.56 php-fpm
21532 nginx     20   0  121m  19m 2936 S  4.0  0.1   0:59.10 nginx
50684 nginx     20   0  439m  12m 6184 S  4.0  0.1   0:00.30 php-fpm
44081 nginx     20   0  439m  11m 6284 S  3.6  0.1   0:02.29 php-fpm
48760 nginx     20   0  440m  12m 6372 S  3.6  0.1   0:01.34 php-fpm
38657 nginx     20   0  440m  12m 6848 D  3.3  0.1   0:03.48 php-fpm
49899 nginx     20   0  439m  11m 6040 S  3.3  0.1   0:00.63 php-fpm

iotop command

488 be/4 nginx       0.00 B/s    0.00 B/s  0.00 % 99.99 % php-fpm: pool www
47360 be/4 nginx       0.00 B/s    3.66 K/s  0.00 % 99.99 % php-fpm: pool www
51005 be/4 nginx       0.00 B/s    3.66 K/s  0.00 % 99.99 % php-fpm: pool www
56126 be/4 nginx       0.00 B/s    3.66 K/s  0.00 % 82.48 % php-fpm: pool www
48028 be/4 nginx       0.00 B/s    3.66 K/s  0.00 % 54.14 % php-fpm: pool www
54876 be/4 nginx       0.00 B/s    0.00 B/s  0.00 % 48.00 % php-fpm: pool www
47651 be/4 nginx       0.00 B/s    3.66 K/s  0.00 % 37.67 % php-fpm: pool www
  839 be/4 root        0.00 B/s    0.00 B/s  0.00 % 37.30 % [kmirrord]
47811 be/4 nginx       0.00 B/s    7.31 K/s  0.00 % 36.73 % php-fpm: pool www
48014 be/4 nginx       0.00 B/s    3.66 K/s  0.00 % 35.81 % php-fpm: pool www
48050 be/4 nginx       0.00 B/s    0.00 B/s  0.00 % 34.42 % php-fpm: pool www
47987 be/4 nginx       0.00 B/s    3.66 K/s  0.00 % 32.06 % php-fpm: pool www
47468 be/4 nginx       0.00 B/s    3.66 K/s  0.00 % 27.39 % php-fpm: pool www

/etc/sysctl.conf settings

net.ipv4.ip_forward = 0
net.ipv4.conf.default.rp_filter = 1
net.ipv4.conf.default.accept_source_route = 0
kernel.sysrq = 0
kernel.core_uses_pid = 1
net.ipv4.tcp_synack_retries = 2
net.ipv4.conf.all.send_redirects = 0
net.ipv4.conf.default.send_redirects = 0
net.ipv4.conf.all.accept_source_route = 0
net.ipv4.conf.all.accept_redirects = 0
net.ipv4.conf.all.secure_redirects = 0
net.ipv4.conf.all.log_martians = 1
net.ipv4.conf.default.accept_source_route = 0
net.ipv4.conf.default.accept_redirects = 0
net.ipv4.conf.default.secure_redirects = 0
net.ipv4.icmp_echo_ignore_broadcasts = 1
net.ipv4.tcp_syncookies = 1
net.ipv4.conf.all.rp_filter = 1
net.ipv4.conf.default.rp_filter = 1
net.ipv6.conf.default.router_solicitations = 0
net.ipv6.conf.default.accept_ra_rtr_pref = 0
net.ipv6.conf.default.accept_ra_pinfo = 0
net.ipv6.conf.default.accept_ra_defrtr = 0
net.ipv6.conf.default.autoconf = 0
net.ipv6.conf.default.dad_transmits = 0
net.ipv6.conf.default.max_addresses = 1
kernel.exec-shield = 1
kernel.randomize_va_space = 1
fs.file-max = 65535
kernel.pid_max = 65536
net.ipv4.ip_local_port_range = 2000 65000


pid = /var/run/php-fpm/
error_log = /var/log/php-fpm/error.log
log_level = warning
emergency_restart_threshold = 10
emergency_restart_interval = 1m
process_control_timeout = 10
daemonize = yes


listen = /tmp/php5-fpm.sock
listen.allowed_clients =
listen.mode = 0666
user = nginx
group = nginx
pm = static
pm.max_children = 800
pm.start_servers = 225
pm.min_spare_servers = 150
pm.max_spare_servers = 800
pm.max_requests = 5000
request_terminate_timeout = 300
request_slowlog_timeout = 120s
slowlog = /var/log/php-fpm/www-slow.log
php_admin_value[error_log] = /var/log/php-fpm/www-error.log
php_admin_flag[log_errors] = on
php_value[session.save_handler] = files
php_value[session.save_path]    = /var/lib/php/session
php_value[soap.wsdl_cache_dir]  = /var/lib/php/wsdlcache

I have played a lot with the following setting in trying to solve the issue but it didn't work

pm = static //was dynamic before
pm.max_children = 800
pm.start_servers = 225
pm.min_spare_servers = 150
pm.max_spare_servers = 800
pm.max_requests = 5000 // was 500 before

Each php-fpm use about 10-15MB of ram and i have 16gb ram .I used the following command to check how much memory each process use

 ps --no-headers -o "rss,cmd" -C php-fpm | awk '{ sum+=$1 } END { printf ("%d%s\n", sum/NR/1024,"M") }'

My site runs on a dedicated server and uses only Nginx+PHP-FPM.According to google analytics , the site gets around 90K pageviews per day. I disable nginx access logs and changed the log_level from notice to warning.


iostat -p -xdmnh 5

Device:         rrqm/s   wrqm/s     r/s     w/s    rMB/s    wMB/s avgrq-sz avgqu-sz   await  svctm  %util
sda               2.20  2044.20   86.60  118.40     3.31     8.45   117.42     4.11   20.03   1.70  34.76
sdb               0.00  2032.40    0.00  130.20     0.00     8.45   132.87     2.39   18.32   0.92  12.02
dm-0              0.00     0.00   88.80 2162.60     3.31     8.45    10.69   282.23  125.36   0.17  37.16
dm-1              0.00     0.00    0.00    0.00     0.00     0.00     0.00     0.00    0.00   0.00   0.00
dm-2              0.00     0.00   88.80 2162.60     3.31     8.45    10.69   282.23  125.36   0.17  37.26
dm-3              0.00     0.00   88.80 2162.40     3.31     8.45    10.69   282.24  125.37   0.17  37.28
dm-4              0.00     0.00    0.00    0.00     0.00     0.00     0.00     0.00    0.00   0.00   0.00
dm-5              0.00     0.00    0.00    0.00     0.00     0.00     0.00     0.00    0.00   0.00   0.00

Filesystem:               rMB_nor/s    wMB_nor/s    rMB_dir/s    wMB_dir/s    rMB_svr/s    wMB_svr/s     ops/s    rops/s    wops/s

lsblk command
Thanks to Xavier Lucas for trying to help me out with this

  NAME                                                               MAJ:MIN RM   SIZE RO TYPE   MOUNTPOINT
    sda                                                                  8:0    0 931.5G  0 disk
    └─ddf1_4c534920202020201000007910009260471147116cfe0677 (dm-0)     253:0    0   931G  0 dmraid
      ├─ddf1_4c534920202020201000007910009260471147116cfe0677p1 (dm-1) 253:1    0   500M  0 part   /boot
      └─ddf1_4c534920202020201000007910009260471147116cfe0677p2 (dm-2) 253:2    0 930.5G  0 part
        ├─vg_485067-lv_root (dm-3)                                     253:3    0    50G  0 lvm    /
        ├─vg_485067-lv_swap (dm-4)                                     253:4    0   7.8G  0 lvm    [SWAP]
        └─vg_485067-lv_home (dm-5)                                     253:5    0 872.7G  0 lvm    /home
    sdb                                                                  8:16   0 931.5G  0 disk
    └─ddf1_4c534920202020201000007910009260471147116cfe0677 (dm-0)     253:0    0   931G  0 dmraid
      ├─ddf1_4c534920202020201000007910009260471147116cfe0677p1 (dm-1) 253:1    0   500M  0 part   /boot
      └─ddf1_4c534920202020201000007910009260471147116cfe0677p2 (dm-2) 253:2    0 930.5G  0 part
        ├─vg_485067-lv_root (dm-3)                                     253:3    0    50G  0 lvm    /
        ├─vg_485067-lv_swap (dm-4)                                     253:4    0   7.8G  0 lvm    [SWAP]
        └─vg_485067-lv_home (dm-5)                                     253:5    0 872.7G  0 lvm    /home

However when i tried the following i get no output at all(no error message whatsoever.

diff <(lsof +D /home) <(sleep 60 ; lsof +D /home)

EDIT 3 (Thanks to Xavier Lucas again)

 diff <(lsof +D /) <(sleep 60 ; lsof +D /)

Here are the results

Yes my site make use of a command line tool but i upgrade the cpu and ram just for it to be able to support that.Also the commandline tool don't download anything so i don't see why IO is high

I am using a Dual Xeon E5-2620 with 16gb of ram on a 1gbps dedicated port.

Best Answer

As you requested, here is the way you can diff on file activity for a specified mountpoint.

First, get the mountpoint for devices identified with iostat with lsblk command.

The result will look like :

sda                          8:0    0   16G  0 disk 
├─sda1                       8:1    0  200M  0 part /boot
└─sda2                       8:2    0 15.8G  0 part 
  ├─vg00-root (dm-0)       253:0    0    1G  0 lvm  /
  ├─vg00-swap (dm-1)       253:1    0    1G  0 lvm  [SWAP]
  ├─vg00-usr (dm-2)        253:2    0    5G  0 lvm  /usr
  ├─vg00-var (dm-3)        253:3    0    2G  0 lvm  /var
  ├─vg00-log (dm-4)        253:4    0    1G  0 lvm  /var/log
  ├─vg00-tmp (dm-5)        253:5    0    2G  0 lvm  /tmp
  ├─vg00-home (dm-6)       253:6    0    1G  0 lvm  /home
  ├─vg00-rpm (dm-7)        253:7    0    1G  0 lvm  /var/lib/rpm

Then during high IO activity, run :

diff <(lsof +D <mountpoint>) <(sleep <delay_seconds> ; lsof +D <mountpoint>)

For instance for my dm-2 mountpoint with an interval of 60 seconds:

diff <(lsof +D /usr) <(sleep 60 ; lsof +D /usr)

It will then output the differences between the list of files, check out the size/offset column to see which grows.