1/*
2 * Copyright (c) 2004-2008 Voltaire, Inc. All rights reserved.
3 * Copyright (c) 2002-2008 Mellanox Technologies LTD. All rights reserved.
4 * Copyright (c) 1996-2003 Intel Corporation. All rights reserved.
5 *
6 * This software is available to you under a choice of one of two
7 * licenses.  You may choose to be licensed under the terms of the GNU
8 * General Public License (GPL) Version 2, available from the file
9 * COPYING in the main directory of this source tree, or the
10 * OpenIB.org BSD license below:
11 *
12 *     Redistribution and use in source and binary forms, with or
13 *     without modification, are permitted provided that the following
14 *     conditions are met:
15 *
16 *      - Redistributions of source code must retain the above
17 *        copyright notice, this list of conditions and the following
18 *        disclaimer.
19 *
20 *      - Redistributions in binary form must reproduce the above
21 *        copyright notice, this list of conditions and the following
22 *        disclaimer in the documentation and/or other materials
23 *        provided with the distribution.
24 *
25 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
26 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
27 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
28 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
29 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
30 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
31 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
32 * SOFTWARE.
33 *
34 */
35
36/*
37 * Abstract:
38 *    Implementation of osm_state_mgr_t.
39 * This file implements the State Manager object.
40 */
41
42#if HAVE_CONFIG_H
43#  include <config.h>
44#endif				/* HAVE_CONFIG_H */
45
46#include <unistd.h>
47#include <stdlib.h>
48#include <string.h>
49#include <iba/ib_types.h>
50#include <complib/cl_passivelock.h>
51#include <complib/cl_debug.h>
52#include <complib/cl_qmap.h>
53#include <opensm/osm_sm.h>
54#include <opensm/osm_madw.h>
55#include <opensm/osm_switch.h>
56#include <opensm/osm_log.h>
57#include <opensm/osm_subnet.h>
58#include <opensm/osm_helper.h>
59#include <opensm/osm_msgdef.h>
60#include <opensm/osm_node.h>
61#include <opensm/osm_port.h>
62#include <vendor/osm_vendor_api.h>
63#include <opensm/osm_inform.h>
64#include <opensm/osm_opensm.h>
65
66extern void osm_drop_mgr_process(IN osm_sm_t * sm);
67extern osm_signal_t osm_qos_setup(IN osm_opensm_t * p_osm);
68extern osm_signal_t osm_pkey_mgr_process(IN osm_opensm_t * p_osm);
69extern osm_signal_t osm_mcast_mgr_process(IN osm_sm_t * sm);
70extern osm_signal_t osm_mcast_mgr_process_mgroups(IN osm_sm_t * sm);
71extern osm_signal_t osm_link_mgr_process(IN osm_sm_t * sm, IN uint8_t state);
72
73/**********************************************************************
74 **********************************************************************/
75static void __osm_state_mgr_up_msg(IN const osm_sm_t * sm)
76{
77	/*
78	 * This message should be written only once - when the
79	 * SM moves to Master state and the subnet is up for
80	 * the first time.
81	 */
82	osm_log(sm->p_log, sm->p_subn->first_time_master_sweep ?
83		OSM_LOG_SYS : OSM_LOG_INFO, "SUBNET UP\n");
84
85	OSM_LOG_MSG_BOX(sm->p_log, OSM_LOG_VERBOSE,
86			sm->p_subn->opt.sweep_interval ?
87			"SUBNET UP" : "SUBNET UP (sweep disabled)");
88}
89
90/**********************************************************************
91 **********************************************************************/
92static void __osm_state_mgr_reset_node_count(IN cl_map_item_t *
93					     const p_map_item, IN void *context)
94{
95	osm_node_t *p_node = (osm_node_t *) p_map_item;
96
97	p_node->discovery_count = 0;
98}
99
100/**********************************************************************
101 **********************************************************************/
102static void __osm_state_mgr_reset_port_count(IN cl_map_item_t *
103					     const p_map_item, IN void *context)
104{
105	osm_port_t *p_port = (osm_port_t *) p_map_item;
106
107	p_port->discovery_count = 0;
108}
109
110/**********************************************************************
111 **********************************************************************/
112static void
113__osm_state_mgr_reset_switch_count(IN cl_map_item_t * const p_map_item,
114				   IN void *context)
115{
116	osm_switch_t *p_sw = (osm_switch_t *) p_map_item;
117
118	p_sw->discovery_count = 0;
119	p_sw->need_update = 1;
120}
121
122/**********************************************************************
123 **********************************************************************/
124static void __osm_state_mgr_get_sw_info(IN cl_map_item_t * const p_object,
125					IN void *context)
126{
127	osm_node_t *p_node;
128	osm_dr_path_t *p_dr_path;
129	osm_madw_context_t mad_context;
130	osm_switch_t *const p_sw = (osm_switch_t *) p_object;
131	osm_sm_t *sm = context;
132	ib_api_status_t status;
133
134	OSM_LOG_ENTER(sm->p_log);
135
136	p_node = p_sw->p_node;
137	p_dr_path = osm_physp_get_dr_path_ptr(osm_node_get_physp_ptr(p_node, 0));
138
139	memset(&mad_context, 0, sizeof(mad_context));
140
141	mad_context.si_context.node_guid = osm_node_get_node_guid(p_node);
142	mad_context.si_context.set_method = FALSE;
143	mad_context.si_context.light_sweep = TRUE;
144
145	status = osm_req_get(sm, p_dr_path, IB_MAD_ATTR_SWITCH_INFO, 0,
146			     OSM_MSG_LIGHT_SWEEP_FAIL, &mad_context);
147
148	if (status != IB_SUCCESS)
149		OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 3304: "
150			"Request for SwitchInfo failed\n");
151
152	OSM_LOG_EXIT(sm->p_log);
153}
154
155/**********************************************************************
156 Initiate a remote port info request for the given physical port
157 **********************************************************************/
158static void
159__osm_state_mgr_get_remote_port_info(IN osm_sm_t * sm,
160				     IN osm_physp_t * const p_physp)
161{
162	osm_dr_path_t *p_dr_path;
163	osm_dr_path_t rem_node_dr_path;
164	osm_madw_context_t mad_context;
165	ib_api_status_t status;
166
167	OSM_LOG_ENTER(sm->p_log);
168
169	/* generate a dr path leaving on the physp to the remote node */
170	p_dr_path = osm_physp_get_dr_path_ptr(p_physp);
171	memcpy(&rem_node_dr_path, p_dr_path, sizeof(osm_dr_path_t));
172	osm_dr_path_extend(&rem_node_dr_path, osm_physp_get_port_num(p_physp));
173
174	memset(&mad_context, 0, sizeof(mad_context));
175
176	mad_context.pi_context.node_guid =
177	    osm_node_get_node_guid(osm_physp_get_node_ptr(p_physp));
178	mad_context.pi_context.port_guid = p_physp->port_guid;
179	mad_context.pi_context.set_method = FALSE;
180	mad_context.pi_context.light_sweep = TRUE;
181	mad_context.pi_context.active_transition = FALSE;
182
183	/* note that with some negative logic - if the query failed it means that
184	 * there is no point in going to heavy sweep */
185	status = osm_req_get(sm, &rem_node_dr_path,
186			     IB_MAD_ATTR_PORT_INFO, 0, CL_DISP_MSGID_NONE,
187			     &mad_context);
188
189	if (status != IB_SUCCESS)
190		OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 332E: "
191			"Request for PortInfo failed\n");
192
193	OSM_LOG_EXIT(sm->p_log);
194}
195
196/**********************************************************************
197 Initiates a thorough sweep of the subnet.
198 Used when there is suspicion that something on the subnet has changed.
199**********************************************************************/
200static ib_api_status_t __osm_state_mgr_sweep_hop_0(IN osm_sm_t * sm)
201{
202	ib_api_status_t status;
203	osm_dr_path_t dr_path;
204	osm_bind_handle_t h_bind;
205	uint8_t path_array[IB_SUBNET_PATH_HOPS_MAX];
206
207	OSM_LOG_ENTER(sm->p_log);
208
209	memset(path_array, 0, sizeof(path_array));
210
211	/*
212	 * First, get the bind handle.
213	 */
214	h_bind = osm_sm_mad_ctrl_get_bind_handle(&sm->mad_ctrl);
215	if (h_bind != OSM_BIND_INVALID_HANDLE) {
216		OSM_LOG_MSG_BOX(sm->p_log, OSM_LOG_VERBOSE,
217				"INITIATING HEAVY SWEEP");
218		/*
219		 * Start the sweep by clearing the port counts, then
220		 * get our own NodeInfo at 0 hops.
221		 */
222		CL_PLOCK_ACQUIRE(sm->p_lock);
223
224		cl_qmap_apply_func(&sm->p_subn->node_guid_tbl,
225				   __osm_state_mgr_reset_node_count, sm);
226
227		cl_qmap_apply_func(&sm->p_subn->port_guid_tbl,
228				   __osm_state_mgr_reset_port_count, sm);
229
230		cl_qmap_apply_func(&sm->p_subn->sw_guid_tbl,
231				   __osm_state_mgr_reset_switch_count, sm);
232
233		/* Set the in_sweep_hop_0 flag in subn to be TRUE.
234		 * This will indicate the sweeping not to continue beyond the
235		 * the current node.
236		 * This is relevant for the case of SM on switch, since in the
237		 * switch info we need to signal somehow not to continue
238		 * the sweeping. */
239		sm->p_subn->in_sweep_hop_0 = TRUE;
240
241		CL_PLOCK_RELEASE(sm->p_lock);
242
243		osm_dr_path_init(&dr_path, h_bind, 0, path_array);
244		status = osm_req_get(sm, &dr_path, IB_MAD_ATTR_NODE_INFO, 0,
245				     CL_DISP_MSGID_NONE, NULL);
246
247		if (status != IB_SUCCESS)
248			OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 3305: "
249				"Request for NodeInfo failed\n");
250	} else {
251		OSM_LOG(sm->p_log, OSM_LOG_DEBUG,
252			"No bound ports. Deferring sweep...\n");
253		status = IB_INVALID_STATE;
254	}
255
256	OSM_LOG_EXIT(sm->p_log);
257	return (status);
258}
259
260/**********************************************************************
261 Clear out all existing port lid assignments
262**********************************************************************/
263static ib_api_status_t __osm_state_mgr_clean_known_lids(IN osm_sm_t * sm)
264{
265	ib_api_status_t status = IB_SUCCESS;
266	cl_ptr_vector_t *p_vec = &(sm->p_subn->port_lid_tbl);
267	uint32_t i;
268
269	OSM_LOG_ENTER(sm->p_log);
270
271	/* we need a lock here! */
272	CL_PLOCK_ACQUIRE(sm->p_lock);
273
274	for (i = 0; i < cl_ptr_vector_get_size(p_vec); i++)
275		cl_ptr_vector_set(p_vec, i, NULL);
276
277	CL_PLOCK_RELEASE(sm->p_lock);
278
279	OSM_LOG_EXIT(sm->p_log);
280	return (status);
281}
282
283/**********************************************************************
284 Notifies the transport layer that the local LID has changed,
285 which give it a chance to update address vectors, etc..
286**********************************************************************/
287static ib_api_status_t __osm_state_mgr_notify_lid_change(IN osm_sm_t * sm)
288{
289	ib_api_status_t status;
290	osm_bind_handle_t h_bind;
291
292	OSM_LOG_ENTER(sm->p_log);
293
294	/*
295	 * First, get the bind handle.
296	 */
297	h_bind = osm_sm_mad_ctrl_get_bind_handle(&sm->mad_ctrl);
298	if (h_bind == OSM_BIND_INVALID_HANDLE) {
299		OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 3306: "
300			"No bound ports\n");
301		status = IB_ERROR;
302		goto Exit;
303	}
304
305	/*
306	 * Notify the transport layer that we changed the local LID.
307	 */
308	status = osm_vendor_local_lid_change(h_bind);
309	if (status != IB_SUCCESS)
310		OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 3307: "
311			"Vendor LID update failed (%s)\n",
312			ib_get_err_str(status));
313
314Exit:
315	OSM_LOG_EXIT(sm->p_log);
316	return (status);
317}
318
319/**********************************************************************
320 Returns true if the SM port is down.
321 The SM's port object must exist in the port_guid table.
322**********************************************************************/
323static boolean_t __osm_state_mgr_is_sm_port_down(IN osm_sm_t * sm)
324{
325	ib_net64_t port_guid;
326	osm_port_t *p_port;
327	osm_physp_t *p_physp;
328	uint8_t state;
329
330	OSM_LOG_ENTER(sm->p_log);
331
332	port_guid = sm->p_subn->sm_port_guid;
333
334	/*
335	 * If we don't know our own port guid yet, assume the port is down.
336	 */
337	if (port_guid == 0) {
338		OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 3308: "
339			"SM port GUID unknown\n");
340		state = IB_LINK_DOWN;
341		goto Exit;
342	}
343
344	CL_ASSERT(port_guid);
345
346	CL_PLOCK_ACQUIRE(sm->p_lock);
347	p_port = osm_get_port_by_guid(sm->p_subn, port_guid);
348	if (!p_port) {
349		OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 3309: "
350			"SM port with GUID:%016" PRIx64 " is unknown\n",
351			cl_ntoh64(port_guid));
352		state = IB_LINK_DOWN;
353		CL_PLOCK_RELEASE(sm->p_lock);
354		goto Exit;
355	}
356
357	p_physp = p_port->p_physp;
358
359	CL_ASSERT(p_physp);
360
361	state = osm_physp_get_port_state(p_physp);
362	CL_PLOCK_RELEASE(sm->p_lock);
363
364Exit:
365	OSM_LOG_EXIT(sm->p_log);
366	return (state == IB_LINK_DOWN);
367}
368
369/**********************************************************************
370 Sweeps the node 1 hop away.
371 This sets off a "chain reaction" that causes discovery of the subnet.
372 Used when there is suspicion that something on the subnet has changed.
373**********************************************************************/
374static ib_api_status_t __osm_state_mgr_sweep_hop_1(IN osm_sm_t * sm)
375{
376	ib_api_status_t status = IB_SUCCESS;
377	osm_bind_handle_t h_bind;
378	osm_madw_context_t context;
379	osm_node_t *p_node;
380	osm_port_t *p_port;
381	osm_physp_t *p_physp;
382	osm_dr_path_t *p_dr_path;
383	osm_dr_path_t hop_1_path;
384	ib_net64_t port_guid;
385	uint8_t port_num;
386	uint8_t path_array[IB_SUBNET_PATH_HOPS_MAX];
387	uint8_t num_ports;
388	osm_physp_t *p_ext_physp;
389
390	OSM_LOG_ENTER(sm->p_log);
391
392	/*
393	 * First, get our own port and node objects.
394	 */
395	port_guid = sm->p_subn->sm_port_guid;
396
397	CL_ASSERT(port_guid);
398
399	/* Set the in_sweep_hop_0 flag in subn to be FALSE.
400	 * This will indicate the sweeping to continue beyond the
401	 * the current node.
402	 * This is relevant for the case of SM on switch, since in the
403	 * switch info we need to signal that the sweeping should
404	 * continue through the switch. */
405	sm->p_subn->in_sweep_hop_0 = FALSE;
406
407	p_port = osm_get_port_by_guid(sm->p_subn, port_guid);
408	if (!p_port) {
409		OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 3310: "
410			"No SM port object\n");
411		status = IB_ERROR;
412		goto Exit;
413	}
414
415	p_node = p_port->p_node;
416	CL_ASSERT(p_node);
417
418	port_num = ib_node_info_get_local_port_num(&p_node->node_info);
419
420	OSM_LOG(sm->p_log, OSM_LOG_DEBUG,
421		"Probing hop 1 on local port %u\n", port_num);
422
423	p_physp = osm_node_get_physp_ptr(p_node, port_num);
424
425	CL_ASSERT(p_physp);
426
427	p_dr_path = osm_physp_get_dr_path_ptr(p_physp);
428	h_bind = osm_dr_path_get_bind_handle(p_dr_path);
429
430	CL_ASSERT(h_bind != OSM_BIND_INVALID_HANDLE);
431
432	memset(path_array, 0, sizeof(path_array));
433	/* the hop_1 operations depend on the type of our node.
434	 * Currently - legal nodes that can host SM are SW and CA */
435	switch (osm_node_get_type(p_node)) {
436	case IB_NODE_TYPE_CA:
437	case IB_NODE_TYPE_ROUTER:
438		memset(&context, 0, sizeof(context));
439		context.ni_context.node_guid = osm_node_get_node_guid(p_node);
440		context.ni_context.port_num = port_num;
441
442		path_array[1] = port_num;
443
444		osm_dr_path_init(&hop_1_path, h_bind, 1, path_array);
445		status = osm_req_get(sm, &hop_1_path, IB_MAD_ATTR_NODE_INFO, 0,
446				     CL_DISP_MSGID_NONE, &context);
447		if (status != IB_SUCCESS)
448			OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 3311: "
449				"Request for NodeInfo failed\n");
450		break;
451
452	case IB_NODE_TYPE_SWITCH:
453		/* Need to go over all the ports of the switch, and send a node_info
454		 * from them. This doesn't include the port 0 of the switch, which
455		 * hosts the SM.
456		 * Note: We'll send another switchInfo on port 0, since if no ports
457		 * are connected, we still want to get some response, and have the
458		 * subnet come up.
459		 */
460		num_ports = osm_node_get_num_physp(p_node);
461		for (port_num = 0; port_num < num_ports; port_num++) {
462			/* go through the port only if the port is not DOWN */
463			p_ext_physp = osm_node_get_physp_ptr(p_node, port_num);
464			if (p_ext_physp && ib_port_info_get_port_state
465			    (&(p_ext_physp->port_info)) > IB_LINK_DOWN) {
466				memset(&context, 0, sizeof(context));
467				context.ni_context.node_guid =
468				    osm_node_get_node_guid(p_node);
469				context.ni_context.port_num = port_num;
470
471				path_array[1] = port_num;
472				osm_dr_path_init(&hop_1_path, h_bind, 1,
473						 path_array);
474				status = osm_req_get(sm, &hop_1_path,
475						     IB_MAD_ATTR_NODE_INFO, 0,
476						     CL_DISP_MSGID_NONE,
477						     &context);
478
479				if (status != IB_SUCCESS)
480					OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 3312: "
481						"Request for NodeInfo failed\n");
482			}
483		}
484		break;
485
486	default:
487		OSM_LOG(sm->p_log, OSM_LOG_ERROR,
488			"ERR 3313: Unknown node type %d (%s)\n",
489			osm_node_get_type(p_node), p_node->print_desc);
490	}
491
492Exit:
493	OSM_LOG_EXIT(sm->p_log);
494	return (status);
495}
496
497static void query_sm_info(cl_map_item_t *item, void *cxt)
498{
499	osm_madw_context_t context;
500	osm_remote_sm_t *r_sm = cl_item_obj(item, r_sm, map_item);
501	osm_sm_t *sm = cxt;
502	ib_api_status_t ret;
503
504	context.smi_context.port_guid = r_sm->p_port->guid;
505	context.smi_context.set_method = FALSE;
506	context.smi_context.light_sweep = TRUE;
507
508	ret = osm_req_get(sm, osm_physp_get_dr_path_ptr(r_sm->p_port->p_physp),
509			  IB_MAD_ATTR_SM_INFO, 0, CL_DISP_MSGID_NONE, &context);
510	if (ret != IB_SUCCESS)
511		OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 3314: "
512			"Failure requesting SMInfo (%s)\n",
513			ib_get_err_str(ret));
514}
515
516/**********************************************************************
517 During a light sweep check each node to see if the node descriptor is valid
518 if not issue a ND query.
519**********************************************************************/
520static void __osm_state_mgr_get_node_desc(IN cl_map_item_t * const p_object,
521					  IN void *context)
522{
523	osm_madw_context_t mad_context;
524	osm_node_t *const p_node = (osm_node_t *) p_object;
525	osm_sm_t *sm = context;
526	osm_physp_t *p_physp = NULL;
527	unsigned i, num_ports;
528	ib_api_status_t status;
529
530	OSM_LOG_ENTER(sm->p_log);
531
532	CL_ASSERT(p_node);
533
534	if (p_node->print_desc && strcmp(p_node->print_desc, OSM_NODE_DESC_UNKNOWN))
535		/* if ND is valid, do nothing */
536		goto exit;
537
538	OSM_LOG(sm->p_log, OSM_LOG_ERROR,
539		"ERR 3319: Unknown node description for node GUID "
540		"0x%016" PRIx64 ".  Reissuing ND query\n",
541		cl_ntoh64(osm_node_get_node_guid (p_node)));
542
543	/* get a physp to request from. */
544	num_ports = osm_node_get_num_physp(p_node);
545	for (i = 0; i < num_ports; i++)
546		if ((p_physp = osm_node_get_physp_ptr(p_node, i)))
547			break;
548
549	if (!p_physp) {
550		OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 331C: "
551			"Failed to find any valid physical port object.\n");
552		goto exit;
553	}
554
555	mad_context.nd_context.node_guid = osm_node_get_node_guid(p_node);
556
557	status = osm_req_get(sm, osm_physp_get_dr_path_ptr(p_physp),
558			     IB_MAD_ATTR_NODE_DESC, 0, CL_DISP_MSGID_NONE,
559			     &mad_context);
560	if (status != IB_SUCCESS)
561		OSM_LOG(sm->p_log, OSM_LOG_ERROR,
562			"ERR 331B: Failure initiating NodeDescription request "
563			"(%s)\n", ib_get_err_str(status));
564
565exit:
566	OSM_LOG_EXIT(sm->p_log);
567}
568
569/**********************************************************************
570 Initiates a lightweight sweep of the subnet.
571 Used during normal sweeps after the subnet is up.
572**********************************************************************/
573static ib_api_status_t __osm_state_mgr_light_sweep_start(IN osm_sm_t * sm)
574{
575	ib_api_status_t status = IB_SUCCESS;
576	osm_bind_handle_t h_bind;
577	cl_qmap_t *p_sw_tbl;
578	cl_map_item_t *p_next;
579	osm_node_t *p_node;
580	osm_physp_t *p_physp;
581	uint8_t port_num;
582
583	OSM_LOG_ENTER(sm->p_log);
584
585	p_sw_tbl = &sm->p_subn->sw_guid_tbl;
586
587	/*
588	 * First, get the bind handle.
589	 */
590	h_bind = osm_sm_mad_ctrl_get_bind_handle(&sm->mad_ctrl);
591	if (h_bind == OSM_BIND_INVALID_HANDLE) {
592		OSM_LOG(sm->p_log, OSM_LOG_DEBUG,
593			"No bound ports. Deferring sweep...\n");
594		status = IB_INVALID_STATE;
595		goto _exit;
596	}
597
598	OSM_LOG_MSG_BOX(sm->p_log, OSM_LOG_VERBOSE, "INITIATING LIGHT SWEEP");
599	CL_PLOCK_ACQUIRE(sm->p_lock);
600	cl_qmap_apply_func(p_sw_tbl, __osm_state_mgr_get_sw_info, sm);
601	CL_PLOCK_RELEASE(sm->p_lock);
602
603	CL_PLOCK_ACQUIRE(sm->p_lock);
604	cl_qmap_apply_func(&sm->p_subn->node_guid_tbl, __osm_state_mgr_get_node_desc, sm);
605	CL_PLOCK_RELEASE(sm->p_lock);
606
607	/* now scan the list of physical ports that were not down but have no remote port */
608	CL_PLOCK_ACQUIRE(sm->p_lock);
609	p_next = cl_qmap_head(&sm->p_subn->node_guid_tbl);
610	while (p_next != cl_qmap_end(&sm->p_subn->node_guid_tbl)) {
611		p_node = (osm_node_t *) p_next;
612		p_next = cl_qmap_next(p_next);
613
614		for (port_num = 1; port_num < osm_node_get_num_physp(p_node);
615		     port_num++) {
616			p_physp = osm_node_get_physp_ptr(p_node, port_num);
617			if (p_physp && (osm_physp_get_port_state(p_physp) !=
618					IB_LINK_DOWN)
619			    && !osm_physp_get_remote(p_physp)) {
620				OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 3315: "
621					"Unknown remote side for node 0x%016"
622					PRIx64
623					"(%s) port %u. Adding to light sweep sampling list\n",
624					cl_ntoh64(osm_node_get_node_guid
625						  (p_node)),
626					p_node->print_desc, port_num);
627
628				osm_dump_dr_path(sm->p_log,
629						 osm_physp_get_dr_path_ptr
630						 (p_physp), OSM_LOG_ERROR);
631
632				__osm_state_mgr_get_remote_port_info(sm,
633								     p_physp);
634			}
635		}
636	}
637
638	cl_qmap_apply_func(&sm->p_subn->sm_guid_tbl, query_sm_info, sm);
639
640	CL_PLOCK_RELEASE(sm->p_lock);
641
642_exit:
643	OSM_LOG_EXIT(sm->p_log);
644	return (status);
645}
646
647/**********************************************************************
648 * Go over all the remote SMs (as updated in the sm_guid_tbl).
649 * Find if there is a remote sm that is a master SM.
650 * If there is a remote master SM - return a pointer to it,
651 * else - return NULL.
652 **********************************************************************/
653static osm_remote_sm_t *__osm_state_mgr_exists_other_master_sm(IN osm_sm_t * sm)
654{
655	cl_qmap_t *p_sm_tbl;
656	osm_remote_sm_t *p_sm;
657	osm_remote_sm_t *p_sm_res = NULL;
658
659	OSM_LOG_ENTER(sm->p_log);
660
661	p_sm_tbl = &sm->p_subn->sm_guid_tbl;
662
663	/* go over all the remote SMs */
664	for (p_sm = (osm_remote_sm_t *) cl_qmap_head(p_sm_tbl);
665	     p_sm != (osm_remote_sm_t *) cl_qmap_end(p_sm_tbl);
666	     p_sm = (osm_remote_sm_t *) cl_qmap_next(&p_sm->map_item)) {
667		/* If the sm is in MASTER state - return a pointer to it */
668		if (ib_sminfo_get_state(&p_sm->smi) == IB_SMINFO_STATE_MASTER) {
669			OSM_LOG(sm->p_log, OSM_LOG_VERBOSE,
670				"Found remote master SM with guid:0x%016" PRIx64
671				" (node %s)\n", cl_ntoh64(p_sm->smi.guid),
672				p_sm->p_port->p_node ? p_sm->p_port->p_node->
673				print_desc : "UNKNOWN");
674			p_sm_res = p_sm;
675			goto Exit;
676		}
677	}
678
679Exit:
680	OSM_LOG_EXIT(sm->p_log);
681	return (p_sm_res);
682}
683
684/**********************************************************************
685 * Go over all remote SMs (as updated in the sm_guid_tbl).
686 * Find the one with the highest priority and lowest guid.
687 * Compare this SM to the local SM. If the local SM is higher -
688 * return NULL, if the remote SM is higher - return a pointer to it.
689 **********************************************************************/
690static osm_remote_sm_t *__osm_state_mgr_get_highest_sm(IN osm_sm_t * sm)
691{
692	cl_qmap_t *p_sm_tbl;
693	osm_remote_sm_t *p_sm = NULL;
694	osm_remote_sm_t *p_highest_sm;
695	uint8_t highest_sm_priority;
696	ib_net64_t highest_sm_guid;
697
698	OSM_LOG_ENTER(sm->p_log);
699
700	p_sm_tbl = &sm->p_subn->sm_guid_tbl;
701
702	/* Start with the local sm as the standard */
703	p_highest_sm = NULL;
704	highest_sm_priority = sm->p_subn->opt.sm_priority;
705	highest_sm_guid = sm->p_subn->sm_port_guid;
706
707	/* go over all the remote SMs */
708	for (p_sm = (osm_remote_sm_t *) cl_qmap_head(p_sm_tbl);
709	     p_sm != (osm_remote_sm_t *) cl_qmap_end(p_sm_tbl);
710	     p_sm = (osm_remote_sm_t *) cl_qmap_next(&p_sm->map_item)) {
711
712		/* If the sm is in NOTACTIVE state - continue */
713		if (ib_sminfo_get_state(&p_sm->smi) ==
714		    IB_SMINFO_STATE_NOTACTIVE)
715			continue;
716
717		if (osm_sm_is_greater_than(ib_sminfo_get_priority(&p_sm->smi),
718					   p_sm->smi.guid, highest_sm_priority,
719					   highest_sm_guid)) {
720			/* the new p_sm is with higher priority - update the highest_sm */
721			/* to this sm */
722			p_highest_sm = p_sm;
723			highest_sm_priority =
724			    ib_sminfo_get_priority(&p_sm->smi);
725			highest_sm_guid = p_sm->smi.guid;
726		}
727	}
728
729	if (p_highest_sm != NULL)
730		OSM_LOG(sm->p_log, OSM_LOG_DEBUG,
731			"Found higher SM with guid: %016" PRIx64 " (node %s)\n",
732			cl_ntoh64(p_highest_sm->smi.guid),
733			p_highest_sm->p_port->p_node ?
734			p_highest_sm->p_port->p_node->print_desc : "UNKNOWN");
735
736	OSM_LOG_EXIT(sm->p_log);
737	return (p_highest_sm);
738}
739
740/**********************************************************************
741 * Send SubnSet(SMInfo) SMP with HANDOVER attribute to the
742 * remote_sm indicated.
743 **********************************************************************/
744static void
745__osm_state_mgr_send_handover(IN osm_sm_t * const sm,
746			      IN osm_remote_sm_t * const p_sm)
747{
748	uint8_t payload[IB_SMP_DATA_SIZE];
749	ib_sm_info_t *p_smi = (ib_sm_info_t *) payload;
750	osm_madw_context_t context;
751	const osm_port_t *p_port;
752	ib_api_status_t status;
753
754	OSM_LOG_ENTER(sm->p_log);
755
756	/*
757	 * Send a query of SubnSet(SMInfo) HANDOVER to the remote sm given.
758	 */
759
760	memset(&context, 0, sizeof(context));
761	p_port = p_sm->p_port;
762	if (p_port == NULL) {
763		OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 3316: "
764			"No port object on given remote_sm object\n");
765		goto Exit;
766	}
767
768	/* update the master_guid in the sm_state_mgr object according to */
769	/* the guid of the port where the new Master SM should reside. */
770	OSM_LOG(sm->p_log, OSM_LOG_VERBOSE,
771		"Handing over mastership. Updating sm_state_mgr master_guid: %016"
772		PRIx64 " (node %s)\n", cl_ntoh64(p_port->guid),
773		p_port->p_node ? p_port->p_node->print_desc : "UNKNOWN");
774	sm->master_sm_guid = p_port->guid;
775
776	context.smi_context.port_guid = p_port->guid;
777	context.smi_context.set_method = TRUE;
778
779	p_smi->guid = sm->p_subn->sm_port_guid;
780	p_smi->act_count = cl_hton32(sm->p_subn->p_osm->stats.qp0_mads_sent);
781	p_smi->pri_state = (uint8_t) (sm->p_subn->sm_state |
782				      sm->p_subn->opt.sm_priority << 4);
783	/*
784	 * Return 0 for the SM key unless we authenticate the requester
785	 * as the master SM.
786	 */
787	if (ib_sminfo_get_state(&p_sm->smi) == IB_SMINFO_STATE_MASTER) {
788		OSM_LOG(sm->p_log, OSM_LOG_DEBUG,
789			"Responding to master SM with real sm_key\n");
790		p_smi->sm_key = sm->p_subn->opt.sm_key;
791	} else {
792		/* The requester is not authenticated as master - set sm_key to zero */
793		OSM_LOG(sm->p_log, OSM_LOG_DEBUG,
794			"Responding to SM not master with zero sm_key\n");
795		p_smi->sm_key = 0;
796	}
797
798	status = osm_req_set(sm, osm_physp_get_dr_path_ptr(p_port->p_physp),
799			     payload, sizeof(payload), IB_MAD_ATTR_SM_INFO,
800			     IB_SMINFO_ATTR_MOD_HANDOVER, CL_DISP_MSGID_NONE,
801			     &context);
802
803	if (status != IB_SUCCESS)
804		OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 3317: "
805			"Failure requesting SMInfo (%s)\n",
806			ib_get_err_str(status));
807
808Exit:
809	OSM_LOG_EXIT(sm->p_log);
810}
811
812/**********************************************************************
813 * Send Trap 64 on all new ports.
814 **********************************************************************/
815static void __osm_state_mgr_report_new_ports(IN osm_sm_t * sm)
816{
817	ib_gid_t port_gid;
818	ib_mad_notice_attr_t notice;
819	ib_api_status_t status;
820	ib_net64_t port_guid;
821	cl_map_item_t *p_next;
822	osm_port_t *p_port;
823	uint16_t min_lid_ho;
824	uint16_t max_lid_ho;
825
826	OSM_LOG_ENTER(sm->p_log);
827
828	CL_PLOCK_ACQUIRE(sm->p_lock);
829	p_next = cl_qmap_head(&sm->p_subn->port_guid_tbl);
830	while (p_next != cl_qmap_end(&sm->p_subn->port_guid_tbl)) {
831		p_port = (osm_port_t *) p_next;
832		p_next = cl_qmap_next(p_next);
833
834		if (!p_port->is_new)
835			continue;
836
837		port_guid = osm_port_get_guid(p_port);
838		/* issue a notice - trap 64 */
839
840		/* details of the notice */
841		notice.generic_type = 0x83;	/* is generic subn mgt type */
842		ib_notice_set_prod_type_ho(&notice, 4);	/* A Class Manager generator */
843		/* endport becomes to be reachable */
844		notice.g_or_v.generic.trap_num = CL_HTON16(64);
845		/* The sm_base_lid is saved in network order already. */
846		notice.issuer_lid = sm->p_subn->sm_base_lid;
847		/* following C14-72.1.1 and table 119 p739 */
848		/* we need to provide the GID */
849		port_gid.unicast.prefix = sm->p_subn->opt.subnet_prefix;
850		port_gid.unicast.interface_id = port_guid;
851		memcpy(&(notice.data_details.ntc_64_67.gid), &(port_gid),
852		       sizeof(ib_gid_t));
853
854		/* According to page 653 - the issuer gid in this case of trap
855		 * is the SM gid, since the SM is the initiator of this trap. */
856		notice.issuer_gid.unicast.prefix =
857		    sm->p_subn->opt.subnet_prefix;
858		notice.issuer_gid.unicast.interface_id =
859		    sm->p_subn->sm_port_guid;
860
861		status = osm_report_notice(sm->p_log, sm->p_subn, &notice);
862		if (status != IB_SUCCESS)
863			OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 3318: "
864				"Error sending trap reports on GUID:0x%016"
865				PRIx64 " (%s)\n", port_gid.unicast.interface_id,
866				ib_get_err_str(status));
867		osm_port_get_lid_range_ho(p_port, &min_lid_ho, &max_lid_ho);
868		OSM_LOG(sm->p_log, OSM_LOG_INFO,
869			"Discovered new port with GUID:0x%016" PRIx64
870			" LID range [%u,%u] of node:%s\n",
871			cl_ntoh64(port_gid.unicast.interface_id),
872			min_lid_ho, max_lid_ho,
873			p_port->p_node ? p_port->p_node->
874			print_desc : "UNKNOWN");
875
876		p_port->is_new = 0;
877	}
878	CL_PLOCK_RELEASE(sm->p_lock);
879
880	OSM_LOG_EXIT(sm->p_log);
881}
882
883/**********************************************************************
884 * Make sure that the lid_port_tbl of the subnet has only the ports
885 * that are recognized, and in the correct lid place. There could be
886 * errors if we wanted to assign a certain port with lid X, but that
887 * request didn't reach the port. In this case port_lid_tbl will have
888 * the port under lid X, though the port isn't updated with this lid.
889 * We will run a new heavy sweep (since there were errors in the
890 * initialization), but here we'll clean the database from incorrect
891 * information.
892 **********************************************************************/
893static void __osm_state_mgr_check_tbl_consistency(IN osm_sm_t * sm)
894{
895	cl_qmap_t *p_port_guid_tbl;
896	osm_port_t *p_port;
897	osm_port_t *p_next_port;
898	cl_ptr_vector_t *p_port_lid_tbl;
899	size_t max_lid, ref_size, curr_size, lid;
900	osm_port_t *p_port_ref, *p_port_stored;
901	cl_ptr_vector_t ref_port_lid_tbl;
902	uint16_t min_lid_ho;
903	uint16_t max_lid_ho;
904	uint16_t lid_ho;
905
906	OSM_LOG_ENTER(sm->p_log);
907
908	cl_ptr_vector_construct(&ref_port_lid_tbl);
909	cl_ptr_vector_init(&ref_port_lid_tbl,
910			   cl_ptr_vector_get_size(&sm->p_subn->port_lid_tbl),
911			   OSM_SUBNET_VECTOR_GROW_SIZE);
912
913	p_port_guid_tbl = &sm->p_subn->port_guid_tbl;
914
915	/* Let's go over all the ports according to port_guid_tbl,
916	 * and add the port to a reference port_lid_tbl. */
917	p_next_port = (osm_port_t *) cl_qmap_head(p_port_guid_tbl);
918	while (p_next_port != (osm_port_t *) cl_qmap_end(p_port_guid_tbl)) {
919		p_port = p_next_port;
920		p_next_port =
921		    (osm_port_t *) cl_qmap_next(&p_next_port->map_item);
922
923		osm_port_get_lid_range_ho(p_port, &min_lid_ho, &max_lid_ho);
924		for (lid_ho = min_lid_ho; lid_ho <= max_lid_ho; lid_ho++)
925			cl_ptr_vector_set(&ref_port_lid_tbl, lid_ho, p_port);
926	}
927
928	p_port_lid_tbl = &sm->p_subn->port_lid_tbl;
929
930	ref_size = cl_ptr_vector_get_size(&ref_port_lid_tbl);
931	curr_size = cl_ptr_vector_get_size(p_port_lid_tbl);
932	/* They should be the same, but compare it anyway */
933	max_lid = (ref_size > curr_size) ? ref_size : curr_size;
934
935	for (lid = 1; lid <= max_lid; lid++) {
936		p_port_ref = NULL;
937		p_port_stored = NULL;
938		cl_ptr_vector_at(p_port_lid_tbl, lid, (void *)&p_port_stored);
939		cl_ptr_vector_at(&ref_port_lid_tbl, lid, (void *)&p_port_ref);
940
941		if (p_port_stored == p_port_ref)
942			/* This is the "good" case - both entries are the
943			 * same for this lid. Nothing to do. */
944			continue;
945
946		if (p_port_ref == NULL)
947			/* There is an object in the subnet database for this
948			 * lid, but no such object exists in the reference
949			 * port_list_tbl. This can occur if we wanted to assign
950			 * a certain port with some lid (different than the one
951			 * pre-assigned to it), and the port didn't get the
952			 * PortInfo Set request. Due to this, the port is
953			 * updated with its original lid in our database, but
954			 * with the new lid we wanted to give it in our
955			 * port_lid_tbl. */
956			OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 3322: "
957				"lid %zu is wrongly assigned to port 0x%016"
958				PRIx64 " (\'%s\' port %u) in port_lid_tbl\n",
959				lid,
960				cl_ntoh64(osm_port_get_guid(p_port_stored)),
961				p_port_stored->p_node->print_desc,
962				p_port_stored->p_physp->port_num);
963		else if (p_port_stored == NULL)
964			/* There is an object in the new database, but no
965			 * object in our subnet database. This is the matching
966			 * case of the prior check - the port still has its
967			 * original lid. */
968			OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 3323: "
969				"port 0x%016" PRIx64 " (\'%s\' port %u)"
970				" exists in new port_lid_tbl under lid %zu,"
971				" but missing in subnet port_lid_tbl db\n",
972				cl_ntoh64(osm_port_get_guid(p_port_ref)),
973				p_port_ref->p_node->print_desc,
974				p_port_ref->p_physp->port_num, lid);
975		else
976			/* if we reached here then p_port_stored != p_port_ref.
977			 * We were trying to set a lid to p_port_stored, but
978			 * it didn't reach it, and p_port_ref also didn't get
979			 * the lid update. */
980			OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 3324: "
981				"lid %zu has port 0x%016" PRIx64
982				" (\'%s\' port %u) in new port_lid_tbl db, "
983				"and port 0x%016" PRIx64 " (\'%s\' port %u)"
984				" in subnet port_lid_tbl db\n", lid,
985				cl_ntoh64(osm_port_get_guid(p_port_ref)),
986				p_port_ref->p_node->print_desc,
987				p_port_ref->p_physp->port_num,
988				cl_ntoh64(osm_port_get_guid(p_port_stored)),
989				p_port_ref->p_node->print_desc,
990				p_port_ref->p_physp->port_num);
991
992		/* In any of these cases we want to set NULL in the
993		 * port_lid_tbl, since this entry is invalid. Also, make sure
994		 * we'll do another heavy sweep. */
995		cl_ptr_vector_set(p_port_lid_tbl, lid, NULL);
996		sm->p_subn->subnet_initialization_error = TRUE;
997	}
998
999	cl_ptr_vector_destroy(&ref_port_lid_tbl);
1000	OSM_LOG_EXIT(sm->p_log);
1001}
1002
1003static void cleanup_switch(cl_map_item_t *item, void *log)
1004{
1005	osm_switch_t *sw = (osm_switch_t *)item;
1006
1007	if (!sw->new_lft)
1008		return;
1009
1010	if (memcmp(sw->lft, sw->new_lft, IB_LID_UCAST_END_HO + 1))
1011		osm_log(log, OSM_LOG_ERROR, "ERR 331D: "
1012			"LFT of switch 0x%016" PRIx64 " is not up to date.\n",
1013			cl_ntoh64(sw->p_node->node_info.node_guid));
1014	else {
1015		free(sw->new_lft);
1016		sw->new_lft = NULL;
1017	}
1018}
1019
1020/**********************************************************************
1021 **********************************************************************/
1022int wait_for_pending_transactions(osm_stats_t * stats)
1023{
1024#ifdef HAVE_LIBPTHREAD
1025	pthread_mutex_lock(&stats->mutex);
1026	while (stats->qp0_mads_outstanding && !osm_exit_flag)
1027		pthread_cond_wait(&stats->cond, &stats->mutex);
1028	pthread_mutex_unlock(&stats->mutex);
1029#else
1030	while (1) {
1031		unsigned count = stats->qp0_mads_outstanding;
1032		if (!count || osm_exit_flag)
1033			break;
1034		cl_event_wait_on(&stats->event, EVENT_NO_TIMEOUT, TRUE);
1035	}
1036#endif
1037	return osm_exit_flag;
1038}
1039
1040static void do_sweep(osm_sm_t * sm)
1041{
1042	ib_api_status_t status;
1043	osm_remote_sm_t *p_remote_sm;
1044
1045	if (sm->p_subn->sm_state != IB_SMINFO_STATE_MASTER &&
1046	    sm->p_subn->sm_state != IB_SMINFO_STATE_DISCOVERING)
1047		return;
1048
1049	if (sm->p_subn->coming_out_of_standby)
1050		/*
1051		 * Need to force re-write of sm_base_lid to all ports
1052		 * to do that we want all the ports to be considered
1053		 * foreign
1054		 */
1055		__osm_state_mgr_clean_known_lids(sm);
1056
1057	sm->master_sm_found = 0;
1058
1059	/*
1060	 * If we already have switches, then try a light sweep.
1061	 * Otherwise, this is probably our first discovery pass
1062	 * or we are connected in loopback. In both cases do a
1063	 * heavy sweep.
1064	 * Note: If we are connected in loopback we want a heavy
1065	 * sweep, since we will not be getting any traps if there is
1066	 * a lost connection.
1067	 */
1068	/*  if we are in DISCOVERING state - this means it is either in
1069	 *  initializing or wake up from STANDBY - run the heavy sweep */
1070	if (cl_qmap_count(&sm->p_subn->sw_guid_tbl)
1071	    && sm->p_subn->sm_state != IB_SMINFO_STATE_DISCOVERING
1072	    && sm->p_subn->opt.force_heavy_sweep == FALSE
1073	    && sm->p_subn->force_heavy_sweep == FALSE
1074	    && sm->p_subn->force_reroute == FALSE
1075	    && sm->p_subn->subnet_initialization_error == FALSE
1076	    && (__osm_state_mgr_light_sweep_start(sm) == IB_SUCCESS)) {
1077		if (wait_for_pending_transactions(&sm->p_subn->p_osm->stats))
1078			return;
1079		if (!sm->p_subn->force_heavy_sweep) {
1080			OSM_LOG_MSG_BOX(sm->p_log, OSM_LOG_VERBOSE,
1081					"LIGHT SWEEP COMPLETE");
1082			return;
1083		}
1084	}
1085
1086	/*
1087	 * Unicast cache should be invalidated if there were errors
1088	 * during initialization or if subnet re-route is requested.
1089	 */
1090	if (sm->p_subn->opt.use_ucast_cache &&
1091	    (sm->p_subn->subnet_initialization_error ||
1092	     sm->p_subn->force_reroute))
1093		osm_ucast_cache_invalidate(&sm->ucast_mgr);
1094
1095	/*
1096	 * If we don't need to do a heavy sweep and we want to do a reroute,
1097	 * just reroute only.
1098	 */
1099	if (cl_qmap_count(&sm->p_subn->sw_guid_tbl)
1100	    && sm->p_subn->sm_state != IB_SMINFO_STATE_DISCOVERING
1101	    && sm->p_subn->opt.force_heavy_sweep == FALSE
1102	    && sm->p_subn->force_heavy_sweep == FALSE
1103	    && sm->p_subn->force_reroute == TRUE
1104	    && sm->p_subn->subnet_initialization_error == FALSE) {
1105		/* Reset flag */
1106		sm->p_subn->force_reroute = FALSE;
1107
1108		/* Re-program the switches fully */
1109		sm->p_subn->ignore_existing_lfts = TRUE;
1110
1111		osm_ucast_mgr_process(&sm->ucast_mgr);
1112
1113		/* Reset flag */
1114		sm->p_subn->ignore_existing_lfts = FALSE;
1115
1116		if (wait_for_pending_transactions(&sm->p_subn->p_osm->stats))
1117			return;
1118
1119		if (!sm->p_subn->subnet_initialization_error) {
1120			OSM_LOG_MSG_BOX(sm->p_log, OSM_LOG_VERBOSE,
1121					"REROUTE COMPLETE");
1122			return;
1123		}
1124	}
1125
1126	/* go to heavy sweep */
1127_repeat_discovery:
1128
1129	/* First of all - unset all flags */
1130	sm->p_subn->force_heavy_sweep = FALSE;
1131	sm->p_subn->force_reroute = FALSE;
1132	sm->p_subn->subnet_initialization_error = FALSE;
1133
1134	/* rescan configuration updates */
1135	if (osm_subn_rescan_conf_files(sm->p_subn) < 0)
1136		OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 331A: "
1137			"osm_subn_rescan_conf_file failed\n");
1138
1139	if (sm->p_subn->sm_state != IB_SMINFO_STATE_MASTER)
1140		sm->p_subn->need_update = 1;
1141
1142	status = __osm_state_mgr_sweep_hop_0(sm);
1143	if (status != IB_SUCCESS ||
1144	    wait_for_pending_transactions(&sm->p_subn->p_osm->stats))
1145		return;
1146
1147	if (__osm_state_mgr_is_sm_port_down(sm) == TRUE) {
1148		osm_log(sm->p_log, OSM_LOG_SYS, "SM port is down\n");
1149		OSM_LOG_MSG_BOX(sm->p_log, OSM_LOG_VERBOSE, "SM PORT DOWN");
1150
1151		/* Run the drop manager - we want to clear all records */
1152		osm_drop_mgr_process(sm);
1153
1154		/* Move to DISCOVERING state */
1155		osm_sm_state_mgr_process(sm, OSM_SM_SIGNAL_DISCOVER);
1156		return;
1157	}
1158
1159	status = __osm_state_mgr_sweep_hop_1(sm);
1160	if (status != IB_SUCCESS ||
1161	    wait_for_pending_transactions(&sm->p_subn->p_osm->stats))
1162		return;
1163
1164	/* discovery completed - check other sm presense */
1165	if (sm->master_sm_found) {
1166		/*
1167		 * Call the sm_state_mgr with signal
1168		 * MASTER_OR_HIGHER_SM_DETECTED_DONE
1169		 */
1170		osm_sm_state_mgr_process(sm,
1171					 OSM_SM_SIGNAL_MASTER_OR_HIGHER_SM_DETECTED);
1172		OSM_LOG_MSG_BOX(sm->p_log, OSM_LOG_VERBOSE,
1173				"ENTERING STANDBY STATE");
1174		/* notify master SM about us */
1175		osm_send_trap144(sm, 0);
1176		return;
1177	}
1178
1179	/* if new sweep requested - don't bother with the rest */
1180	if (sm->p_subn->force_heavy_sweep)
1181		goto _repeat_discovery;
1182
1183	OSM_LOG_MSG_BOX(sm->p_log, OSM_LOG_VERBOSE, "HEAVY SWEEP COMPLETE");
1184
1185	/* If we are MASTER - get the highest remote_sm, and
1186	 * see if it is higher than our local sm.
1187	 */
1188	if (sm->p_subn->sm_state == IB_SMINFO_STATE_MASTER) {
1189		p_remote_sm = __osm_state_mgr_get_highest_sm(sm);
1190		if (p_remote_sm != NULL) {
1191			/* report new ports (trap 64) before leaving MASTER */
1192			__osm_state_mgr_report_new_ports(sm);
1193
1194			/* need to handover the mastership
1195			 * to the remote sm, and move to standby */
1196			__osm_state_mgr_send_handover(sm, p_remote_sm);
1197			osm_sm_state_mgr_process(sm,
1198						 OSM_SM_SIGNAL_HANDOVER_SENT);
1199			return;
1200		} else {
1201			/* We are the highest sm - check to see if there is
1202			 * a remote SM that is in master state. */
1203			p_remote_sm =
1204			    __osm_state_mgr_exists_other_master_sm(sm);
1205			if (p_remote_sm != NULL) {
1206				/* There is a remote SM that is master.
1207				 * need to wait for that SM to relinquish control
1208				 * of its portion of the subnet. C14-60.2.1.
1209				 * Also - need to start polling on that SM. */
1210				sm->p_polling_sm = p_remote_sm;
1211				osm_sm_state_mgr_process(sm,
1212							 OSM_SM_SIGNAL_WAIT_FOR_HANDOVER);
1213				return;
1214			}
1215		}
1216	}
1217
1218	/* Need to continue with lid assignment */
1219	osm_drop_mgr_process(sm);
1220
1221	/*
1222	 * If we are not MASTER already - this means that we are
1223	 * in discovery state. call osm_sm_state_mgr with signal
1224	 * DISCOVERY_COMPLETED
1225	 */
1226	if (sm->p_subn->sm_state == IB_SMINFO_STATE_DISCOVERING)
1227		osm_sm_state_mgr_process(sm, OSM_SM_SIGNAL_DISCOVERY_COMPLETED);
1228
1229	osm_pkey_mgr_process(sm->p_subn->p_osm);
1230
1231	osm_qos_setup(sm->p_subn->p_osm);
1232
1233	/* try to restore SA DB (this should be before lid_mgr
1234	   because we may want to disable clients reregistration
1235	   when SA DB is restored) */
1236	osm_sa_db_file_load(sm->p_subn->p_osm);
1237
1238	if (wait_for_pending_transactions(&sm->p_subn->p_osm->stats))
1239		return;
1240
1241	osm_lid_mgr_process_sm(&sm->lid_mgr);
1242	if (wait_for_pending_transactions(&sm->p_subn->p_osm->stats))
1243		return;
1244
1245	OSM_LOG_MSG_BOX(sm->p_log, OSM_LOG_VERBOSE,
1246			"SM LID ASSIGNMENT COMPLETE - STARTING SUBNET LID CONFIG");
1247	__osm_state_mgr_notify_lid_change(sm);
1248
1249	osm_lid_mgr_process_subnet(&sm->lid_mgr);
1250	if (wait_for_pending_transactions(&sm->p_subn->p_osm->stats))
1251		return;
1252
1253	/* At this point we need to check the consistency of
1254	 * the port_lid_tbl under the subnet. There might be
1255	 * errors in it if PortInfo Set requests didn't reach
1256	 * their destination. */
1257	__osm_state_mgr_check_tbl_consistency(sm);
1258
1259	OSM_LOG_MSG_BOX(sm->p_log, OSM_LOG_VERBOSE,
1260			"LID ASSIGNMENT COMPLETE - STARTING SWITCH TABLE CONFIG");
1261
1262	/*
1263	 * Proceed with unicast forwarding table configuration.
1264	 */
1265
1266	if (!sm->ucast_mgr.cache_valid ||
1267	    osm_ucast_cache_process(&sm->ucast_mgr))
1268		osm_ucast_mgr_process(&sm->ucast_mgr);
1269
1270	if (wait_for_pending_transactions(&sm->p_subn->p_osm->stats))
1271		return;
1272
1273	/* cleanup switch lft buffers */
1274	cl_qmap_apply_func(&sm->p_subn->sw_guid_tbl, cleanup_switch, sm->p_log);
1275
1276	/* We are done setting all LFTs so clear the ignore existing.
1277	 * From now on, as long as we are still master, we want to
1278	 * take into account these lfts. */
1279	sm->p_subn->ignore_existing_lfts = FALSE;
1280
1281	OSM_LOG_MSG_BOX(sm->p_log, OSM_LOG_VERBOSE,
1282			"SWITCHES CONFIGURED FOR UNICAST");
1283
1284	if (!sm->p_subn->opt.disable_multicast) {
1285		osm_mcast_mgr_process(sm);
1286		if (wait_for_pending_transactions(&sm->p_subn->p_osm->stats))
1287			return;
1288		OSM_LOG_MSG_BOX(sm->p_log, OSM_LOG_VERBOSE,
1289				"SWITCHES CONFIGURED FOR MULTICAST");
1290	}
1291
1292	/*
1293	 * The LINK_PORTS state is required since we cannot count on
1294	 * the port state change MADs to succeed. This is an artifact
1295	 * of the spec defining state change from state X to state X
1296	 * as an error. The hardware then is not required to process
1297	 * other parameters provided by the Set(PortInfo) Packet.
1298	 */
1299
1300	osm_link_mgr_process(sm, IB_LINK_NO_CHANGE);
1301	if (wait_for_pending_transactions(&sm->p_subn->p_osm->stats))
1302		return;
1303
1304	OSM_LOG_MSG_BOX(sm->p_log, OSM_LOG_VERBOSE,
1305			"LINKS PORTS CONFIGURED - SET LINKS TO ARMED STATE");
1306
1307	osm_link_mgr_process(sm, IB_LINK_ARMED);
1308	if (wait_for_pending_transactions(&sm->p_subn->p_osm->stats))
1309		return;
1310
1311	OSM_LOG_MSG_BOX(sm->p_log, OSM_LOG_VERBOSE,
1312			"LINKS ARMED - SET LINKS TO ACTIVE STATE");
1313
1314	osm_link_mgr_process(sm, IB_LINK_ACTIVE);
1315	if (wait_for_pending_transactions(&sm->p_subn->p_osm->stats))
1316		return;
1317
1318	/*
1319	 * The sweep completed!
1320	 */
1321
1322	/*
1323	 * Send trap 64 on newly discovered endports
1324	 */
1325	__osm_state_mgr_report_new_ports(sm);
1326
1327	/* in any case we zero this flag */
1328	sm->p_subn->coming_out_of_standby = FALSE;
1329
1330	/* If there were errors - then the subnet is not really up */
1331	if (sm->p_subn->subnet_initialization_error == TRUE) {
1332		osm_log(sm->p_log, OSM_LOG_SYS,
1333			"Errors during initialization\n");
1334		OSM_LOG_MSG_BOX(sm->p_log, OSM_LOG_ERROR,
1335				"ERRORS DURING INITIALIZATION");
1336	} else {
1337		sm->p_subn->need_update = 0;
1338		osm_dump_all(sm->p_subn->p_osm);
1339		__osm_state_mgr_up_msg(sm);
1340		sm->p_subn->first_time_master_sweep = FALSE;
1341
1342		if (osm_log_is_active(sm->p_log, OSM_LOG_VERBOSE))
1343			osm_sa_db_file_dump(sm->p_subn->p_osm);
1344	}
1345
1346	/*
1347	 * Finally signal the subnet up event
1348	 */
1349	cl_event_signal(&sm->subnet_up_event);
1350
1351	osm_opensm_report_event(sm->p_subn->p_osm, OSM_EVENT_ID_SUBNET_UP, NULL);
1352
1353	/* if we got a signal to force heavy sweep or errors
1354	 * in the middle of the sweep - try another sweep. */
1355	if (sm->p_subn->force_heavy_sweep
1356	    || sm->p_subn->subnet_initialization_error)
1357		osm_sm_signal(sm, OSM_SIGNAL_SWEEP);
1358}
1359
1360static void do_process_mgrp_queue(osm_sm_t * sm)
1361{
1362	if (sm->p_subn->sm_state != IB_SMINFO_STATE_MASTER)
1363		return;
1364	osm_mcast_mgr_process_mgroups(sm);
1365	wait_for_pending_transactions(&sm->p_subn->p_osm->stats);
1366}
1367
1368void osm_state_mgr_process(IN osm_sm_t * sm, IN osm_signal_t signal)
1369{
1370	CL_ASSERT(sm);
1371
1372	OSM_LOG_ENTER(sm->p_log);
1373
1374	OSM_LOG(sm->p_log, OSM_LOG_DEBUG,
1375		"Received signal %s in state %s\n",
1376		osm_get_sm_signal_str(signal),
1377		osm_get_sm_mgr_state_str(sm->p_subn->sm_state));
1378
1379	switch (signal) {
1380	case OSM_SIGNAL_SWEEP:
1381		do_sweep(sm);
1382		break;
1383
1384	case OSM_SIGNAL_IDLE_TIME_PROCESS_REQUEST:
1385		do_process_mgrp_queue(sm);
1386		break;
1387
1388	default:
1389		CL_ASSERT(FALSE);
1390		OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 3320: "
1391			"Invalid SM signal %u\n", signal);
1392		break;
1393	}
1394
1395	OSM_LOG_EXIT(sm->p_log);
1396}
1397