[rabbitmq-discuss] High load average with rabbitmq-server 3.1.x

Dan_b daniel.bason at telogis.com
Mon Aug 19 03:58:52 BST 2013


Hi,

We have recently upgraded from rabbitmq-server 2.6.1 to 3.1.x (we have one
cluster running 3.1.5 and one running 3.1.3).  On both servers we have seen
the load average increase significantly, with it peaking up to 2x the number
cores.  The CPU utilization and IO wait aren't affected and it seems to be
down to internal locks.

Status is as follows (this is off peak):
Status of node rabbit at mq1 ...
[{pid,1535},
 {running_applications,
     [{rabbitmq_management,"RabbitMQ Management Console","3.1.5"},
      {rabbitmq_management_agent,"RabbitMQ Management Agent","3.1.5"},
      {rabbit,"RabbitMQ","3.1.5"},
      {os_mon,"CPO  CXC 138 46","2.2.7"},
      {rabbitmq_web_dispatch,"RabbitMQ Web Dispatcher","3.1.5"},
      {webmachine,"webmachine","1.10.3-rmq3.1.5-gite9359c7"},
      {mochiweb,"MochiMedia Web Server","2.7.0-rmq3.1.5-git680dba8"},
      {xmerl,"XML parser","1.2.10"},
      {inets,"INETS  CXC 138 49","5.7.1"},
      {mnesia,"MNESIA  CXC 138 12","4.5"},
      {amqp_client,"RabbitMQ AMQP Client","3.1.5"},
      {sasl,"SASL  CXC 138 11","2.1.10"},
      {stdlib,"ERTS  CXC 138 10","1.17.5"},
      {kernel,"ERTS  CXC 138 10","2.14.5"}]},
 {os,{unix,linux}},
 {erlang_version,
     "Erlang R14B04 (erts-5.8.5) [source] [64-bit] [smp:4:4] [rq:4]
[async-threads:30] [kernel-poll:true]\n"},
 {memory,
     [{total,157967560},
      {connection_procs,27074824},
      {queue_procs,59850832},
      {plugins,675680},
      {other_proc,10585832},
      {mnesia,1378504},
      {mgmt_db,10234568},
      {msg_index,4895480},
      {other_ets,8947648},
      {binary,10826584},
      {code,17261222},
      {atom,1552881},
      {other_system,4683505}]},
 {vm_memory_high_watermark,0.6},
 {vm_memory_limit,6442450944},
 {disk_free_limit,1000000000},
 {disk_free,18919137280},
 {file_descriptors,
     [{total_limit,3996},
      {total_used,611},
      {sockets_limit,3594},
      {sockets_used,391}]},
 {processes,[{limit,1048576},{used,4727}]},
 {run_queue,0},
 {uptime,607}]
...done.

Running an strace on one of the beam.smp sub-processes produces the
following:
Process 1595 attached - interrupt to quit
futex(0x2aaaab594524, FUTEX_WAIT_PRIVATE, 235505, NULL) = 0
futex(0x2aaaab5944f8, FUTEX_WAKE_PRIVATE, 1) = 0
write(27,
"\0\0\0\0\0\3\324:\341\v\324\2345\204y\370\377c\23ry5\16\234\0\0\0\0\0\0\0\0"...,
48) = 48
futex(0x182324a0, FUTEX_WAKE_PRIVATE, 1) = 1
futex(0x2aaaab594524, FUTEX_WAIT_PRIVATE, 235507, NULL) = -1 EAGAIN
(Resource temporarily unavailable)
futex(0x2aaaab5944f8, FUTEX_WAKE_PRIVATE, 1) = 0
fsync(27)                               = 0
write(99,
"\0\0\0\0\0\0\307\303c>\254\336\2268x\346\36\206\235\300\243\317\331\255\0\0\0\0\0\0\0\0"...,
48) = 48
futex(0x2aaaab594524, FUTEX_WAIT_PRIVATE, 235509, NULL) = -1 EAGAIN
(Resource temporarily unavailable)
futex(0x2aaaab5944f8, FUTEX_WAKE_PRIVATE, 1) = 0
fsync(99)                               = 0
write(4, "!", 1)                        = 1
futex(0x2aaaab594524, FUTEX_WAIT_PRIVATE, 235511, NULL) = 0
futex(0x2aaaab5944f8, FUTEX_WAKE_PRIVATE, 1) = 0
write(27,
"\0\0\0\0\0\3\324;I\236\257\221u\350\\\353\16\6\232\1XH\214D\0\0\0\0\0\0\0\0"...,
48) = 48
futex(0x2aaaab594524, FUTEX_WAIT_PRIVATE, 235513, NULL) = -1 EAGAIN
(Resource temporarily unavailable)
futex(0x2aaaab5944f8, FUTEX_WAKE_PRIVATE, 1) = 0
fsync(27)                               = 0
futex(0x182324a0, FUTEX_WAKE_PRIVATE, 1) = 1
futex(0x2aaaab594524, FUTEX_WAIT_PRIVATE, 235515, NULL) = 0
futex(0x2aaaab5944f8, FUTEX_WAKE_PRIVATE, 1) = 0
write(27,
"\0\0\0\0\0\3\324<}\235\263^\350w!\306A\334y\270P\232\37F\0\0\0\0\0\0\0\0"...,
48) = 48
futex(0x2aaaab594524, FUTEX_WAIT_PRIVATE, 235517, NULL) = -1 EAGAIN
(Resource temporarily unavailable)
futex(0x2aaaab5944f8, FUTEX_WAKE_PRIVATE, 1) = 0
fsync(27)                               = 0
futex(0x2aaaab594524, FUTEX_WAIT_PRIVATE, 235519, NULL) = 0
futex(0x2aaaab5944f8, FUTEX_WAKE_PRIVATE, 1) = 0
write(27,
"\0\0\0\0\0\3\324=\226(\16\265\355\200\307\7J\34b\4\240\"\355)\0\0\0\0\0\0\0\0"...,
48) = 48
futex(0x2aaaab594524, FUTEX_WAIT_PRIVATE, 235521, NULL) = -1 EAGAIN
(Resource temporarily unavailable)
futex(0x2aaaab5944f8, FUTEX_WAKE_PRIVATE, 1) = 0
fsync(27)                               = 0
write(56,
"\0\0\0\0\0\0\3105X\22\252\\n\210\364&\300\1\331M~\227D!\0\0\0\0\0\0\0\0"...,
48) = 48
futex(0x18232420, FUTEX_WAKE_PRIVATE, 1) = 1
futex(0x2aaaab594524, FUTEX_WAIT_PRIVATE, 235523, NULL) = 0
futex(0x2aaaab5944f8, FUTEX_WAKE_PRIVATE, 1) = 0
fsync(56)                               = 0
futex(0x18232420, FUTEX_WAKE_PRIVATE, 1) = 1


Any ideas on what is causing the locks?  Is there any further information I
can provide?

Regards
Dan



--
View this message in context: http://rabbitmq.1065348.n5.nabble.com/High-load-average-with-rabbitmq-server-3-1-x-tp28899.html
Sent from the RabbitMQ mailing list archive at Nabble.com.


More information about the rabbitmq-discuss mailing list