yum install epel-release yum install monit
installs the standard layout with init scripts for CentOS 7 in systemctl and CentOS 6.x in /etc/init.d
/etc/monitrc has all the system level configurations
# port to bind to
# IP which can access the UI of monit
# basic auth with password
# mail server for alerts
# alert format
# standard system monitoring elements
service - pid / process /binary / checksum / ownership can be set as conditions
###################################################################################
vi /etc/monit.d/sendmail
check process sendmail with pidfile /var/run/sendmail.pid
group mail start program = "/etc/init.d/sendmail start" stop program = "/etc/init.d/sendmail stop" if failed port 25 protocol smtp then restart depends on sendmail_bin depends on sendmail_rc
check file sendmail_bin with path /usr/lib/sendmail
group mail if failed checksum then unmonitor if failed permission 2755 then unmonitor # if failed uid root then unmonitor # if failed gid root then unmonitor
check file sendmail_rc with path /etc/init.d/sendmail
group mail if failed checksum then unmonitor if failed permission 0644 then unmonitor #if failed uid root then unmonitor #if failed gid root then unmonitor
#############################################################################
save the file
/etc/init.d/monit restart
vi /etc/init.d/sendmailqueue
##############################################################################
check program mail-queue path “/usr/bin/check_sendmail_queue.sh”
if status != 0 then alert
alert devops@expertus.com
##############################################################################
vi /usr/bin/check_sendmail_queue.sh
##############################################################################
#!/bin/bash
queuelength=`/usr/bin/mailq | tail -n1 | awk '{print $3}'`
queuecount=`echo $queuelength | grep "[0-9]"`
if [ "$queuecount" == "" ]; then
echo 0;
else
echo ${queuelength};
fi
exit
##############################################################################
chmod +x /usr/bin/check_sendmail_queue.sh
vi /etc/monit.d/dnscheck
###############################################################################
check host nscheck with address www.google.com
if failed icmp type echo
count 5 with timeout 5 seconds
2 times within 3 cycles then alert
alert devops@expertus.com
###############################################################################
vi /usr/bin/dnscheck.sh
###############################################################################
#!/bin/bash
#dnslookup
# of 1=success | 0=failed
DNS_SERVER=8.8.4.4 HOST_QUERY=www.google.com
if [`host $HOST_QUERY $DNS_SERVER | grep "has address" | wc -l` -eq 0 ]; then
#lookup failed, bad DNS lookup echo "0"
else
echo "1"
fi
########################################################################
vi /etc/monit.d/nodejs
#########################################################################
check process node matching “node”
start program = "/bin/bash -c /home/sandbox/bin/nodestart.sh"
stop program = "/bin/bash -c /home/sandbox/bin/nodestop.sh"
if failed host qalearnexa.exphosted.com port 8081 type tcp then restart
if failed host qalearnexa.exphosted.com port 8081 type tcp then alert
alert devops@expertus.com
#########################################################################
System
Current - Zabbix
New - Zabbix + monit
Zabbix will be used for historical data
Monit will be used for immediate action based on rules and then alert
Disk
Current - Zabbix
New - Zabbix + monit
Zabbix will be used for historical data of disk usage growth
Monit will be used to monitor mounts and do a remount if it is unable to access a specific disk mount and then alert
CPU
Current - Zabbix
New - Zabbix + monit
Zabbix will be used for historical CPU load averages 5 min/10 min /15 min
Monit for setting rule based actions when the averages exceed a threshold - like restarting a service
Memory
current - Zabbix
New - zabbix + monit
Zabbix will be used for historical data and period (from - to) based analysis
Monit for setting rule based actions when the memory usage exceed a threshold - like restarting a service or alerting the devops
Processes
current - specific processes like apache / mysql are monitored by Zabbix but not very extensive
New - zabbix + monit
Monit will monitor anything with a pid, port number and an init script or systemd script
fail2ban
opendkim
passenger
Haproxy
sendmail
sendmail queue
DNS up
The following were issues we have faced at one time or another and all of the above can be monitored by monit and an alert can be configured to be sent or a specific action set by monit.
System login
current - Papertrail
New - papertrail(no change)
syslog
current - Papertrail
New - papertrail (no change)
URLMonitoring
current - zabbix and sitemonitor
New - zabbix and sitemonitor(no change)