ibqueryerrors.pl revision 256281
1111500Sobrien#!/usr/bin/perl
2111500Sobrien#
3111500Sobrien# Copyright (c) 2008 Voltaire, Inc. All rights reserved.
4111500Sobrien# Copyright (c) 2006 The Regents of the University of California.
5111500Sobrien#
6111500Sobrien# Produced at Lawrence Livermore National Laboratory.
7111500Sobrien# Written by Ira Weiny <weiny2@llnl.gov>.
8111500Sobrien#
9111500Sobrien# This software is available to you under a choice of one of two
10111500Sobrien# licenses.  You may choose to be licensed under the terms of the GNU
11111500Sobrien# General Public License (GPL) Version 2, available from the file
12111500Sobrien# COPYING in the main directory of this source tree, or the
13111500Sobrien# OpenIB.org BSD license below:
14111500Sobrien#
15111500Sobrien#     Redistribution and use in source and binary forms, with or
16111500Sobrien#     without modification, are permitted provided that the following
17111500Sobrien#     conditions are met:
18111500Sobrien#
19111500Sobrien#      - Redistributions of source code must retain the above
20117121Stmm#        copyright notice, this list of conditions and the following
21111500Sobrien#        disclaimer.
22111500Sobrien#
23111500Sobrien#      - Redistributions in binary form must reproduce the above
24111500Sobrien#        copyright notice, this list of conditions and the following
25111500Sobrien#        disclaimer in the documentation and/or other materials
26111500Sobrien#        provided with the distribution.
27111500Sobrien#
28111500Sobrien# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
29111500Sobrien# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
30111500Sobrien# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
31111500Sobrien# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
32111500Sobrien# BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
33111500Sobrien# ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
34111500Sobrien# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
35117121Stmm# SOFTWARE.
36117121Stmm#
37117121Stmm
38117121Stmmuse strict;
39117121Stmm
40117121Stmmuse Getopt::Std;
41117121Stmmuse IBswcountlimits;
42117121Stmm
43111500Sobrienmy $print_action          = "no";
44111500Sobrienmy $report_port_info      = undef;
45111500Sobrienmy $single_switch         = undef;
46111500Sobrienmy $include_data_counters = undef;
47111500Sobrienmy $cache_file            = "";
48111500Sobrienmy $switch_found          = "no";
49111500Sobrien
50111500Sobrien# =========================================================================
51111500Sobrien#
52111500Sobriensub report_counts
53111500Sobrien{
54111500Sobrien	my $addr         = $_[0];
55111500Sobrien	my $port         = $_[1];
56111500Sobrien	my $ca_name      = $_[2];
57111500Sobrien	my $ca_port      = $_[3];
58111500Sobrien	my $extra_params = get_ca_name_port_param_string($ca_name, $ca_port);
59111500Sobrien
60111500Sobrien	if (any_counts()) {
61111500Sobrien		print("   GUID $addr port $port:");
62111500Sobrien		check_counters($print_action);
63111500Sobrien		if ($include_data_counters) {
64111500Sobrien			check_data_counters($print_action);
65111500Sobrien		}
66111500Sobrien		print("\n");
67111500Sobrien
68111500Sobrien		if ($report_port_info) {
69111500Sobrien			my $lid   = "";
70111500Sobrien			my $speed = "";
71111500Sobrien			my $width = "";
72111500Sobrien			my $data  = `smpquery $extra_params -G portinfo $addr $port`;
73111500Sobrien			my @lines = split("\n", $data);
74111500Sobrien			foreach my $line (@lines) {
75111500Sobrien				if ($line =~ /^# Port info: Lid (\w+) port.*/) { $lid   = $1; }
76111500Sobrien				if ($line =~ /^LinkSpeedActive:\.+(.*)/)       { $speed = $1; }
77111500Sobrien				if ($line =~ /^LinkWidthActive:\.+(.*)/)       { $width = $1; }
78111500Sobrien			}
79111500Sobrien			my $hr = $IBswcountlimits::link_ends{"$addr"}{$port};
80111500Sobrien			if ($hr) {
81111500Sobrien				printf(
82111500Sobrien"         Link info: %6s %4s[%2s]  ==(%3s %s)==>  %18s %4s[%2s] \"%s\"\n",
83111500Sobrien					$lid,                $port,
84111500Sobrien					$hr->{loc_ext_port}, $width,
85111500Sobrien					$speed,              $hr->{rem_guid},
86111500Sobrien					$hr->{rem_port},     $hr->{rem_ext_port},
87111500Sobrien					$hr->{rem_desc}
88111500Sobrien				);
89111500Sobrien			} else {
90111500Sobrien				printf(
91111500Sobrien"         Link info: %6s %4s[  ]  ==(%3s %s)==>     (Disconnected)\n",
92111500Sobrien					$lid, $port, $width, $speed);
93116225Sjake			}
94111582Sru		}
95111582Sru	}
96111582Sru}
97111582Sru
98111582Sru# =========================================================================
99111582Sru# use perfquery to get the counters.
100111582Srusub get_counts
101111582Sru{
102111582Sru	my $addr         = $_[0];
103111582Sru	my $port         = $_[1];
104111582Sru	my $ca_name      = $_[2];
105111582Sru	my $ca_port      = $_[3];
106111582Sru	my $extra_params = get_ca_name_port_param_string($ca_name, $ca_port);
107111582Sru
108111582Sru	my $data = `perfquery $extra_params -G $addr $port` ||
109111582Sru		die "'perfquery $extra_params -G $addr $port' FAILED.\n";
110111582Sru	my @lines = split("\n", $data);
111111582Sru	foreach my $line (@lines) {
112111582Sru		foreach my $count (@IBswcountlimits::counters) {
113111582Sru			if ($line =~ /^$count:\.+(\d+)/) {
114111582Sru				$IBswcountlimits::cur_counts{$count} = $1;
115111582Sru			}
116111582Sru		}
117111582Sru	}
118111582Sru}
119111582Sru
120111582Sru# =========================================================================
121111582Sru#
122111582Srumy %switches = ();
123111582Sru
124111582Srusub get_switches
125111582Sru{
126111582Sru	my $data = `ibswitches $cache_file` ||
127111582Sru		die "'ibswitches $cache_file' failed.\n";
128111582Sru	my @lines = split("\n", $data);
129111582Sru	foreach my $line (@lines) {
130111582Sru		if ($line =~ /^Switch\s+:\s+(\w+)\s+ports\s+(\d+)\s+.*/) {
131111582Sru			$switches{$1} = $2;
132111582Sru		}
133111582Sru	}
134111582Sru}
135111582Sru
136111582Sru# =========================================================================
137111582Sru#
138111582Srusub usage_and_exit
139111582Sru{
140	my $prog = $_[0];
141	print
142"Usage: $prog [-a -c -r -R -s <err1,err2,...> -S <switch_guid> -D <direct route> -d -C <ca_name> -P <ca_port>]\n";
143	print "   Report counters on all switches in subnet\n";
144	print "   -a Report an action to take\n";
145	print "   -c suppress some of the common counters\n";
146	print "   -r report port configuration information\n";
147	print "   -R Recalculate ibnetdiscover information\n";
148	print "   -s <err1,err2,...> suppress errors listed\n";
149	print
150"   -D <direct route> output only the switch specified by direct route path\n";
151	print "   -S <switch_guid> query only <switch_guid> (hex format)\n";
152	print "   -d include the data counters in the output\n";
153	print "   -C <ca_name> use selected Channel Adaptor name for queries\n";
154	print "   -P <ca_port> use selected channel adaptor port for queries\n";
155	exit 2;
156}
157
158my $argv0          = `basename $0`;
159my $regenerate_map = undef;
160my $single_switch  = undef;
161my $direct_route   = undef;
162my $ca_name        = "";
163my $ca_port        = "";
164
165chomp $argv0;
166if (!getopts("has:crRS:D:dC:P:")) { usage_and_exit $argv0; }
167if (defined $Getopt::Std::opt_h)  { usage_and_exit $argv0; }
168if (defined $Getopt::Std::opt_a) { $print_action = "yes"; }
169if (defined $Getopt::Std::opt_s) {
170	@IBswcountlimits::suppress_errors = split(",", $Getopt::Std::opt_s);
171}
172if (defined $Getopt::Std::opt_c) {
173	@IBswcountlimits::suppress_errors = split(",", "RcvSwRelayErrors");
174}
175if (defined $Getopt::Std::opt_r) { $report_port_info = $Getopt::Std::opt_r; }
176if (defined $Getopt::Std::opt_R) { $regenerate_map   = $Getopt::Std::opt_R; }
177if (defined $Getopt::Std::opt_D) { $direct_route     = $Getopt::Std::opt_D; }
178if (defined $Getopt::Std::opt_S) {
179	$single_switch = format_guid($Getopt::Std::opt_S);
180}
181if (defined $Getopt::Std::opt_d) {
182	$include_data_counters = $Getopt::Std::opt_d;
183}
184if (defined $Getopt::Std::opt_C) { $ca_name = $Getopt::Std::opt_C; }
185if (defined $Getopt::Std::opt_P) { $ca_port = $Getopt::Std::opt_P; }
186
187$cache_file = get_cache_file($ca_name, $ca_port);
188
189sub main
190{
191	if (@IBswcountlimits::suppress_errors) {
192		my $msg = join(",", @IBswcountlimits::suppress_errors);
193		print "Suppressing: $msg\n";
194	}
195	get_link_ends($regenerate_map, $ca_name, $ca_port);
196	get_switches;
197	if (defined($direct_route)) {
198		# convert DR to guid, then use original single_switch option
199		$single_switch = convert_dr_to_guid($direct_route);
200		if (!defined($single_switch) || !is_switch($single_switch)) {
201			printf("The direct route (%s) does not map to a switch.\n",
202				$direct_route);
203			return;
204		}
205	}
206	foreach my $sw_addr (keys %switches) {
207		if ($single_switch && $sw_addr ne "$single_switch") {
208			next;
209		} else {
210			$switch_found = "yes";
211		}
212
213		my $switch_prompt = "no";
214		foreach my $sw_port (1 .. $switches{$sw_addr}) {
215			clear_counters;
216			get_counts($sw_addr, $sw_port, $ca_name, $ca_port);
217			if (any_counts() && $switch_prompt eq "no") {
218				my $hr = $IBswcountlimits::link_ends{"$sw_addr"}{$sw_port};
219				printf("Errors for %18s \"%s\"\n", $sw_addr, $hr->{loc_desc});
220				$switch_prompt = "yes";
221			}
222			report_counts($sw_addr, $sw_port);
223		}
224	}
225	if ($single_switch && $switch_found ne "yes") {
226		printf("Switch \"%s\" not found.\n", $single_switch);
227	}
228}
229main;
230
231