#!/bin/sh ######################################################################### # Script: check_equallogic # # Author: Claudio Kuenzler www.claudiokuenzler.com # # Purpose: Monitor Dell Equallogic with Nagios # # Description: Checks Dell Equallogic via SNMP. # # Can be used to query status and performance info # # Tested on: Check www.claudiokuenzler.com/ithowtos/ # # nagios_check_equallogic.php for compatibility matrix # # History: # # 20091109 Started Script programming checks: # # health, disk, raid, uptime, ps, info # # 20091112 Added ethif, conn # # 20091118 Added diskusage # # 20091119 Bugfix on Outputs (removed Pipes) # # 20091121 Public Release # # 20091204 Bugfix (removed IP addresses) # # 20091206 Bugfix (removed SNMP community names) # # 20091222 Fixed raid, ps, health and diskusage checks when multiple # # member devices exists. Mathias Sundman # # 20100112 Successful tests on PS5000XV - thanks to Scott Sawin # # 20100209 Compatibility matrix now on website (see Tested on above) # # 20100416 Beta Testing for rewritten ethif check # # 20100420 Corrected ethif output, finished new ethif check # # 20100526 Using proper order of snmpwalk command, thanks Roland Ripoll # # 20100531 Added perfdata for diskusage and connections, thx to Benoit # # 20100630 Corrected perfdata output (+added thresholds), thx Christian # # 20100809 Fixed conn type -> total of all eql members of group # ######################################################################### # Usage: ./check_equallogic -H host -C community -t type [-w warning] [-c critical] ######################################################################### help="check_equallogic (c) 2009-2010 Claudio Kuenzler (published under GPL licence)\n Usage: ./check_equallogic -H host -C community -t type [-w warning] [-c critical]\n Options:\n-H Hostname\n-C SNMP-Community name (at least read-only)\n-t Type to check, see list below\n-w Warning Threshold\n-c Critical Threshold\n Requirements: snmpwalk, awk, grep, wc\n types:\nconn -> Checks number of ISCSI connections (if no thresholds are given, outputs information) disk -> Checks Status of all disks diskusage -> Checks the actual usage of the defined raid (if no thresholds are given, outputs information) ethif -> Checks ehternet interfaces (if no thresholds are given, outputs information) fan -> Status of Fans fanrpm -> Check of RPM of fans health -> Overall health status of Equallogic device info -> Shows some Information\nraid -> Checks Status of Raid ps -> Checks Power Supply/Supplies raid -> Checks RAID status uptime -> Shows uptime" STATE_OK=0 # define the exit code if status is OK STATE_WARNING=1 # define the exit code if status is Warning STATE_CRITICAL=2 # define the exit code if status is Critical STATE_UNKNOWN=3 # define the exit code if status is Unknown PATH=/usr/local/bin:/usr/bin:/bin # Set path for cmd in snmpwalk awk grep wc [ do if ! `which ${cmd} 1>/dev/null` then echo "UNKNOWN: ${cmd} does not exist, please check if command exists and PATH is correct" exit ${STATE_UNKNOWN} fi done # Check for people who need help - aren't we all nice ;-) ######################################################################### if [ "${1}" = "--help" -o "${#}" = "0" ]; then echo -e "${help}"; exit 1; fi # Get user-given variables ######################################################################### while getopts "H:C:t:w:c:" Input; do case ${Input} in H) host=${OPTARG};; C) community=${OPTARG};; t) type=${OPTARG};; w) warning=${OPTARG};; c) critical=${OPTARG};; \?) echo "Wrong option given. Please use options -H for host, -C for SNMP-Community, -t for type, -w for warning and -c for critical" exit 1 ;; esac done # Check Different Types ######################################################################### case ${type} in health) healthstatus=$(snmpwalk -v 2c -O vq -c ${community} ${host} 1.3.6.1.4.1.12740.2.1.5.1.1) s_crit=0; s_warn=0; s_ok=0; s_unknown=0 for s in $healthstatus do if [ "$s" = "3" ]; then s_crit=$((s_crit + 1)); fi if [ "$s" = "2" ]; then s_warn=$((s_warn + 1)); fi if [ "$s" = "1" ]; then s_ok=$((s_ok + 1)); fi if [ "$s" = "0" ]; then s_unkown=$((s_unknown + 1)); fi done if [ $s_crit -gt 0 ]; then echo "OVERALL HEALTH CRITICAL"; exit ${STATE_CRITICAL}; fi if [ $s_warn -gt 0 ]; then echo "OVERALL HEALTH WARNING"; exit ${STATE_WARNING}; fi if [ $s_unknown -gt 0 ]; then echo "OVERALL HEALTH UNKNOWN"; exit ${STATE_UNKNOWN}; fi if [ $s_ok -gt 0 ]; then echo "OVERALL HEALTH OK"; exit ${STATE_OK}; fi ;; disk) diskstatusok=$(snmpwalk -v 2c -O vq -c ${community} ${host} 1.3.6.1.4.1.12740.3.1.1.1.8 | grep 1 | wc -l) diskstatusspare=$(snmpwalk -v 2c -O vq -c ${community} ${host} 1.3.6.1.4.1.12740.3.1.1.1.8 | grep 2 | wc -l) diskstatusfailed=$(snmpwalk -v 2c -O vq -c ${community} ${host} 1.3.6.1.4.1.12740.3.1.1.1.8 | grep 3 | wc -l) diskstatusoff=$(snmpwalk -v 2c -O vq -c ${community} ${host} 1.3.6.1.4.1.12740.3.1.1.1.8 | grep 4 | wc -l) diskstatusaltsig=$(snmpwalk -v 2c -O vq -c ${community} ${host} 1.3.6.1.4.1.12740.3.1.1.1.8 | grep 5 | wc -l) diskstatustoosmall=$(snmpwalk -v 2c -O vq -c ${community} ${host} 1.3.6.1.4.1.12740.3.1.1.1.8 | grep 6 | wc -l) diskstatushistfailures=$(snmpwalk -v 2c -O vq -c ${community} ${host} 1.3.6.1.4.1.12740.3.1.1.1.8 | grep 7 | wc -l) diskstatusunsupported=$(snmpwalk -v 2c -O vq -c ${community} ${host} 1.3.6.1.4.1.12740.3.1.1.1.8 | grep 8 | wc -l) if [ ${diskstatusfailed} -gt 0 ] || [ ${diskstatustoosmall} -gt 0 ] || [ ${diskstatushistfailures} -gt 0 ] || [ ${diskstatusunsupported} -gt 0 ] then disksumcritical=$(($diskstatusfailed + $diskstatustoosmall + $diskstatushistfailures + $diskstatusunsupported)) echo "DISK CRITICAL ${disksumcritical} disk(s) in critical state"; exit ${STATE_CRITICAL} elif [ ${diskstatusoff} -gt 0 ] || [ ${diskstatusaltsig} -gt 0 ] then disksumwarning=$(( ${diskstatusoff} + ${diskstatusaltsig} )) echo "DISK WARNING $disksumwarning disk(s) in warning state"; exit ${STATE_WARNING} else echo "DISK OK ${diskstatusok} disks OK ${diskstatusspare} disks spare"; exit ${STATE_OK} fi ;; diskusage) totalstorage_list=$(snmpwalk -v 2c -O vq -c ${community} ${host} 1.3.6.1.4.1.12740.2.1.10.1.1) usedstorage_list=$(snmpwalk -v 2c -O vq -c ${community} ${host} 1.3.6.1.4.1.12740.2.1.10.1.2) totalstorage=0 for t_storage in $totalstorage_list do totalstorage=$(($totalstorage + $t_storage)) done usedstorage=0 for u_storage in $usedstorage_list do usedstorage=$(($usedstorage + $u_storage)) done usedpercent=$(( ($usedstorage * 100)/$totalstorage )) freestorage=$(( $totalstorage - $usedstorage)) totalstorage_perfdata=$(($totalstorage*1024*1024)) usedstorage_perfdata=$(($usedstorage*1024*1024)) if [ -n "${warning}" ] || [ -n "${critical}" ] then if [ ${usedpercent} -ge ${warning} ] && [ ${usedpercent} -lt ${critical} ] then echo "DISKUSAGE WARNING Total ${totalstorage} Used ${usedstorage} (${usedpercent}%) | 'space used'=${usedstorage_perfdata}; 'total space'=${totalstorage_perfdata}" exit ${STATE_WARNING} elif [ ${usedpercent} -ge ${critical} ] then echo "DISKUSAGE CRITICAL Total ${totalstorage} Used ${usedstorage} (${usedpercent}%) | 'space used'=${usedstorage_perfdata}; 'total space'=${totalstorage_perfdata}" exit ${STATE_CRITICAL} else echo "DISKUSAGE OK Total ${totalstorage} Used ${usedstorage} (${usedpercent}%) | 'space used'=${usedstorage_perfdata}; 'total space'=${totalstorage_perfdata}"; exit ${STATE_OK} fi else echo "Total ${totalstorage} Used ${usedstorage} (${usedpercent}%) | 'space used'=${usedstorage_perfdata}; 'total space'=${totalstorage_perfdata}"; exit ${STATE_OK} fi ;; raid) raidstatus=$(snmpwalk -v 2c -O vq -c ${community} ${host} 1.3.6.1.4.1.12740.2.1.13.1.1) s7=0; s6=0; s5=0; s4=0; s3=0; s2=0; s1=0 for s in $raidstatus do if [ "$s" = "1" ]; then s1=$((s1 + 1)); fi if [ "$s" = "2" ]; then s2=$((s2 + 1)); fi if [ "$s" = "3" ]; then s3=$((s3 + 1)); fi if [ "$s" = "4" ]; then s4=$((s4 + 1)); fi if [ "$s" = "5" ]; then s5=$((s5 + 1)); fi if [ "$s" = "6" ]; then s6=$((s6 + 1)); fi if [ "$s" = "7" ]; then s7=$((s7 + 1)); fi done if [ $s6 -gt 0 ]; then echo "RAID CATASTROPHIC LOSS"; exit ${STATE_CRITICAL}; fi if [ $s5 -gt 0 ]; then echo "RAID FAILED"; exit ${STATE_CRITICAL}; fi if [ $s2 -gt 0 ]; then echo "RAID DEGRADED"; exit ${STATE_WARNING}; fi if [ $s4 -gt 0 ]; then echo "RAID RECONSTRUCTING"; exit ${STATE_WARNING}; fi if [ $s3 -gt 0 ]; then echo "RAID VERIFYING"; exit ${STATE_WARNING}; fi if [ $s7 -gt 0 ]; then echo "RAID EXPANDING"; exit ${STATE_WARNING}; fi if [ $s1 -gt 0 ]; then echo "RAID OK"; exit ${STATE_OK}; fi ;; uptime) uptimestatus=$(snmpwalk -v 2c -O v -c ${community} ${host} 1.3.6.1.2.1.1.3.0) echo "${uptimestatus}" exit ${STATE_OK} ;; ps) psstate=$(snmpwalk -v 2c -O vq -c ${community} ${host} 1.3.6.1.4.1.12740.2.1.8.1.3) s3=0; s2=0; s1=0; ps_count=0 for s in $psstate do if [ "$s" = "1" ]; then s1=$((s1 + 1)); fi if [ "$s" = "2" ]; then s2=$((s2 + 1)); fi if [ "$s" = "3" ]; then s3=$((s3 + 1)); fi ps_count=$(($ps_count + 1)) done if [ $s3 -gt 0 ]; then echo "$s3 of $ps_count PSU(s): FAILED"; exit ${STATE_CRITICAL}; fi if [ $s2 -gt 0 ]; then echo "$s2 of $ps_count PSU(s): NO AC POWER"; exit ${STATE_CRITICAL}; fi if [ $s1 -gt 0 ]; then echo "$s1 of $ps_count PSU(s): OK"; exit ${STATE_OK}; fi ;; info) modelnumber=$(snmpwalk -v 2c -O vq -c ${community} ${host} 1.3.6.1.4.1.12740.2.1.11.1.1) serialnumber=$(snmpwalk -v 2c -O vq -c ${community} ${host} 1.3.6.1.4.1.12740.2.1.11.1.2) controllernumber=$(snmpwalk -v 2c -O vq -c ${community} ${host} 1.3.6.1.4.1.12740.2.1.11.1.3) disknumber=$(snmpwalk -v 2c -O vq -c ${community} ${host} 1.3.6.1.4.1.12740.2.1.11.1.4) firmware1=$(snmpwalk -v 2c -O vq -c ${community} ${host} 1.3.6.1.4.1.12740.4.1.1.1.4.1 | cut -d " " -f 4 | awk 'NR==1') firmware2=$(snmpwalk -v 2c -O vq -c ${community} ${host} 1.3.6.1.4.1.12740.4.1.1.1.4.1 | cut -d " " -f 4 | awk 'NR==2') if [ ${firmware1} = ${firmware2} ] then firmware=${firmware1} else firmware="Firmware differ!" fi echo "Equallogic Model ${modelnumber} Serial No ${serialnumber} ${controllernumber} controller(s) running FW ${firmware} ${disknumber} disks" exit ${STATE_OK} ;; ethif) i=0 for line in $(snmpwalk -v 2c -O vq -c ${community} ${host} 1.3.6.1.2.1.2.2.1.2) do ethnames[${i}]=$line; i=$(($i + 1 )) done j=0 for line in $(snmpwalk -v 2c -O vq -c ${community} ${host} 1.3.6.1.2.1.2.2.1.6) do ethmacs[${j}]=$line; j=$(($j + 1)) done ethnumber=$(snmpwalk -v 2c -O vq -c ${community} ${host} 1.3.6.1.2.1.2.2.1.2 | grep -c eth) k=0 for line in $(snmpwalk -v 2c -O vq -c ${community} ${host} 1.3.6.1.4.1.12740.4.1.1.1.9) do contrstatus[${k}]=$line; k=$(($k + 1)) done if [ $ethnumber = 3 ] then ethinfo="${ethnames[0]} (${ethmacs[0]}), ${ethnames[1]} (${ethmacs[1]}), ${ethnames[2]} (${ethmacs[2]})" elif [ $ethnumber = 4 ] then ethinfo="${ethnames[0]} (${ethmacs[0]}), ${ethnames[1]} (${ethmacs[1]}), ${ethnames[2]} (${ethmacs[2]}), ${ethnames[3]} (${ethmacs[3]})" fi contr0ethstatus=$(snmpwalk -v 2c -O vq -c ${community} ${host} 1.3.6.1.2.1.2.2.1.7) contr1ethstatus=$(snmpwalk -v 2c -O vq -c ${community} ${host} 1.3.6.1.2.1.2.2.1.8) contr0ethup=$(snmpwalk -v 2c -O vq -c ${community} ${host} 1.3.6.1.2.1.2.2.1.7 | grep -c up) contr1ethup=$(snmpwalk -v 2c -O vq -c ${community} ${host} 1.3.6.1.2.1.2.2.1.8 | grep -c up) contr0ethdown=$(snmpwalk -v 2c -O vq -c ${community} ${host} 1.3.6.1.2.1.2.2.1.7 | grep -c down) contr1ethdown=$(snmpwalk -v 2c -O vq -c ${community} ${host} 1.3.6.1.2.1.2.2.1.8 | grep -c down) if [ ${contrstatus[0]} -lt ${contrstatus[1]} ] then ethup=${contr0ethup}; ethdown=${contr0ethdown}; contractive=0 else ethup=${contr1ethup}; ethdown=${contr1ethdown}; contractive=1 fi if [ -n "${warning}" ] || [ -n "${critical}" ] then if [ ${ethdown} -ge ${warning} ] && [ ${ethdown} -lt ${critical} ] then echo "INTERFACES WARNING Total ${ethdown} interfaces down"; exit ${STATE_WARNING} elif [ ${ethdown} -ge ${critical} ] then echo "INTERFACES CRITICAL Total ${ethdown} interfaces down"; exit ${STATE_CRITICAL} else echo "INTERFACES OK ${ethup} interfaces up, Controller ${contractive} is active, $ethinfo"; exit ${STATE_OK} fi else echo "INTERFACES OK ${ethup} interfaces up, Controller ${contractive} is active, $ethinfo"; exit ${STATE_OK} fi ;; conn) connections=0 for line in $(snmpwalk -v 2c -O vq -c ${community} ${host} 1.3.6.1.4.1.12740.2.1.12.1.1) do connections=`expr ${connections} + ${line}` done if [ -n "${warning}" ] || [ -n "${critical}" ] then if [ ${connections} -ge ${warning} ] && [ ${connections} -lt ${critical} ] then echo "CONNECTIONS WARNING ${connections} ISCSI Connections (Threshold: ${warning}) | connections=${connections};${warning};${critical}"; exit ${STATE_WARNING} elif [ ${connections} -ge ${critical} ] then echo "CONNECTIONS CRITICAL ${connections} ISCSI Connections (Threshold: ${critical}) | connections=${connections};${warning};${critical}"; exit ${STATE_CRITICAL} else echo "CONNECTIONS OK ${connections} ISCSI Connections | connections=${connections};${warning};${critical}"; exit ${STATE_OK} fi else echo "${connections} ISCSI Connections | connections=${connections}"; exit ${STATE_OK} fi ;; esac echo "UNKNOWN: should never reach this part" exit ${STATE_UNKNOWN}