ibcheckerrs.in revision 256281
1#!/bin/sh
2
3IBPATH=${IBPATH:-@IBSCRIPTPATH@}
4
5function usage() {
6	echo Usage: `basename $0` "[-h] [-b] [-v] [-G] [-T <threshold_file>]" \
7	    "[-s(how_thresholds)] [-N \| -nocolor] [-C ca_name] [-P ca_port]" \
8	    "[-t(imeout) timeout_ms] <lid|guid> [<port>]"
9	exit -1
10}
11
12function green() {
13	if [ "$bw" = "yes" ]; then
14		if [ "$verbose" = "yes" ]; then
15			echo $1
16		fi
17		return
18	fi
19	if [ "$verbose" = "yes" ]; then
20		echo -e "\\033[1;032m" $1 "\\033[0;39m"
21	fi
22}
23
24function red() {
25	if [ "$bw" = "yes" ]; then
26		echo $1
27		return
28	fi
29	echo -e "\\033[1;031m" $1 "\\033[0;39m"
30}
31
32function show_thresholds() {
33	echo "SymbolErrors=$SymbolErrors"
34	echo "LinkRecovers=$LinkRecovers"
35	echo "LinkDowned=$LinkDowned"
36	echo "RcvErrors=$RcvErrors"
37	echo "RcvRemotePhysErrors=$RcvRemotePhysErrors"
38	echo "RcvSwRelayErrors=$RcvSwRelayErrors"
39	echo "XmtDiscards=$XmtDiscards"
40	echo "XmtConstraintErrors=$XmtConstraintErrors"
41	echo "RcvConstraintErrors=$RcvConstraintErrors"
42	echo "LinkIntegrityErrors=$LinkIntegrityErrors"
43	echo "ExcBufOverrunErrors=$ExcBufOverrunErrors"
44	echo "VL15Dropped=$VL15Dropped"
45}
46
47function get_thresholds() {
48	. $1
49}
50
51# Default thresholds
52SymbolErrors=10
53LinkRecovers=10
54LinkDowned=10
55RcvErrors=10
56RcvRemotePhysErrors=100
57RcvSwRelayErrors=100
58XmtDiscards=100
59XmtConstraintErrors=100
60RcvConstraintErrors=100
61LinkIntegrityErrors=10
62ExcBufOverrunErrors=10
63VL15Dropped=100
64
65guid_addr=""
66bw=""
67verbose=""
68brief=""
69ca_info=""
70
71while [ "$1" ]; do
72	case $1 in
73	-G)
74		guid_addr=yes
75		;;
76	-nocolor|-N)
77		bw=yes
78		;;
79	-v)
80		verbose=yes
81		brief=""
82		;;
83	-b)
84		brief=yes
85		verbose=""
86		;;
87	-T)
88		if ! [ -r $2 ]; then
89			echo "Can't use threshold file '$2'"
90			usage
91		fi
92		get_thresholds $2
93		shift
94		;;
95	-s)
96		show_thresholds
97		exit 0
98		;;
99	-P | -C | -t | -timeout)
100		case $2 in
101		-*)
102			usage
103			;;
104		esac
105		if [ x$2 = x ] ; then
106			usage
107		fi
108		ca_info="$ca_info $1 $2"
109		shift
110		;;
111	-*)
112		usage
113		;;
114	*)
115		break
116		;;
117	esac
118	shift
119done
120
121#default is all ports
122portnum=255
123
124if [ $# -lt 1 ]; then
125	usage
126fi
127
128if [ "$2" ]; then
129	portnum=$2
130fi
131
132if [ "$portnum" = "255" ]; then
133	portname="all"
134else
135	portname=$2
136fi
137
138if [ "$guid_addr" ]; then
139	if ! lid=`$IBPATH/ibaddr $ca_info -G -L $1 | awk '/failed/{exit -1} {print $3}'`; then
140		echo -n "guid $1 address resolution: "
141		red "FAILED"
142		exit -1
143	fi
144	guid=$1
145else
146	lid=$1
147	if ! temp=`$IBPATH/ibaddr $ca_info -L $1 | awk '/failed/{exit -1} {print $1}'`; then
148		echo -n "lid $1 address resolution: "
149		red "FAILED"
150		exit -1
151	fi
152fi
153
154nodename=`$IBPATH/smpquery $ca_info nodedesc $lid | sed -e "s/^Node Description:\.*\(.*\)/\1/"`
155
156text="`eval $IBPATH/perfquery $ca_info $lid $portnum`"
157rv=$?
158if echo "$text" | awk -v mono=$bw -v brief=$brief -F '[.:]*' '
159function blue(s)
160{
161	if (brief == "yes") {
162		return
163	}
164	if (mono)
165		printf s
166	else if (!quiet) {
167		printf "\033[1;034m" s
168		printf "\033[0;39m"
169	}
170}
171
172BEGIN {
173	th["SymbolErrors"] = '$SymbolErrors'
174	th["LinkRecovers"] = '$LinkRecovers'
175	th["LinkDowned"] = '$LinkDowned'
176	th["RcvErrors"] = '$RcvErrors'
177	th["RcvRemotePhysErrors"] = '$RcvRemotePhysErrors'
178	th["RcvSwRelayErrors"] = '$RcvSwRelayErrors'
179	th["XmtDiscards"] = '$XmtDiscards'
180	th["XmtConstraintErrors"] = '$XmtConstraintErrors'
181	th["RcvConstraintErrors"] = '$RcvConstraintErrors'
182	th["LinkIntegrityErrors"] = '$LinkIntegrityErrors'
183	th["ExcBufOverrunErrors"] = '$ExcBufOverrunErrors'
184	th["VL15Dropped"] = '$VL15Dropped'
185}
186
187/^CounterSelect/ {next}
188
189/AllPortSelect/ {next}
190
191/^ib/  {print $0; next}
192/ibpanic:/     {print $0}
193/ibwarn:/      {print $0}
194/iberror:/     {print $0}
195
196/^PortSelect/	{ if ($2 != '$portnum') {err = err "error: lid '$lid' port " $2 " does not match query ('$portnum')\n"; exit -1}}
197
198$1 ~ "(Xmt|Rcv)(Pkts|Data)" { next }
199
200	{ if (th[$1] > 0 && $2 >= th[$1])
201		warn = warn "#warn: counter " $1 " = " $2 " \t(threshold " th[$1] ") lid '$lid' port '$portnum'\n"
202	}
203END {
204	if (err != "") {
205		blue(err)
206		exit -1
207	}
208	if (warn != "") {
209		blue(warn)
210		exit -1
211	}
212	exit 0
213}' 2>&1 && test $rv -eq 0 ; then
214	if [ "$verbose" = "yes" ]; then
215		echo -n "Error check on lid $lid ($nodename) port $portname: "
216		green OK
217	fi
218	exit 0
219else
220	echo -n "Error check on lid $lid ($nodename) port $portname: "
221	red FAILED
222	exit -1
223fi
224