1/*
2 * Copyright (c) 2004-2008 Voltaire, Inc. All rights reserved.
3 * Copyright (c) 2002-2008 Mellanox Technologies LTD. All rights reserved.
4 * Copyright (c) 1996-2003 Intel Corporation. All rights reserved.
5 *
6 * This software is available to you under a choice of one of two
7 * licenses.  You may choose to be licensed under the terms of the GNU
8 * General Public License (GPL) Version 2, available from the file
9 * COPYING in the main directory of this source tree, or the
10 * OpenIB.org BSD license below:
11 *
12 *     Redistribution and use in source and binary forms, with or
13 *     without modification, are permitted provided that the following
14 *     conditions are met:
15 *
16 *      - Redistributions of source code must retain the above
17 *        copyright notice, this list of conditions and the following
18 *        disclaimer.
19 *
20 *      - Redistributions in binary form must reproduce the above
21 *        copyright notice, this list of conditions and the following
22 *        disclaimer in the documentation and/or other materials
23 *        provided with the distribution.
24 *
25 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
26 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
27 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
28 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
29 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
30 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
31 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
32 * SOFTWARE.
33 *
34 */
35
36/*
37 * Abstract:
38 *    Implementation of osm_ucast_mgr_t.
39 * This file implements the Unicast Manager object.
40 */
41
42#if HAVE_CONFIG_H
43#  include <config.h>
44#endif				/* HAVE_CONFIG_H */
45
46#include <stdio.h>
47#include <stdlib.h>
48#include <string.h>
49#include <iba/ib_types.h>
50#include <complib/cl_qmap.h>
51#include <complib/cl_debug.h>
52#include <complib/cl_qlist.h>
53#include <opensm/osm_ucast_mgr.h>
54#include <opensm/osm_sm.h>
55#include <opensm/osm_log.h>
56#include <opensm/osm_node.h>
57#include <opensm/osm_switch.h>
58#include <opensm/osm_helper.h>
59#include <opensm/osm_msgdef.h>
60#include <opensm/osm_opensm.h>
61
62/**********************************************************************
63 **********************************************************************/
64void osm_ucast_mgr_construct(IN osm_ucast_mgr_t * const p_mgr)
65{
66	memset(p_mgr, 0, sizeof(*p_mgr));
67}
68
69/**********************************************************************
70 **********************************************************************/
71void osm_ucast_mgr_destroy(IN osm_ucast_mgr_t * const p_mgr)
72{
73	CL_ASSERT(p_mgr);
74
75	OSM_LOG_ENTER(p_mgr->p_log);
76
77	if (p_mgr->cache_valid)
78		osm_ucast_cache_invalidate(p_mgr);
79
80	OSM_LOG_EXIT(p_mgr->p_log);
81}
82
83/**********************************************************************
84 **********************************************************************/
85ib_api_status_t
86osm_ucast_mgr_init(IN osm_ucast_mgr_t * const p_mgr, IN osm_sm_t * sm)
87{
88	ib_api_status_t status = IB_SUCCESS;
89
90	OSM_LOG_ENTER(sm->p_log);
91
92	osm_ucast_mgr_construct(p_mgr);
93
94	p_mgr->sm = sm;
95	p_mgr->p_log = sm->p_log;
96	p_mgr->p_subn = sm->p_subn;
97	p_mgr->p_lock = sm->p_lock;
98
99	if (sm->p_subn->opt.use_ucast_cache)
100		cl_qmap_init(&p_mgr->cache_sw_tbl);
101
102	OSM_LOG_EXIT(p_mgr->p_log);
103	return (status);
104}
105
106/**********************************************************************
107 Add each switch's own and neighbor LIDs to its LID matrix
108**********************************************************************/
109static void
110__osm_ucast_mgr_process_hop_0_1(IN cl_map_item_t * const p_map_item,
111				IN void *context)
112{
113	osm_switch_t *const p_sw = (osm_switch_t *) p_map_item;
114	osm_node_t *p_remote_node;
115	uint16_t lid, remote_lid;
116	uint8_t i, remote_port;
117
118	lid = osm_node_get_base_lid(p_sw->p_node, 0);
119	lid = cl_ntoh16(lid);
120	osm_switch_set_hops(p_sw, lid, 0, 0);
121
122	for (i = 1; i < p_sw->num_ports; i++) {
123		p_remote_node =
124		    osm_node_get_remote_node(p_sw->p_node, i, &remote_port);
125
126		if (p_remote_node && p_remote_node->sw &&
127		    (p_remote_node != p_sw->p_node)) {
128			remote_lid = osm_node_get_base_lid(p_remote_node, 0);
129			remote_lid = cl_ntoh16(remote_lid);
130			osm_switch_set_hops(p_sw, remote_lid, i, 1);
131			osm_switch_set_hops(p_remote_node->sw, lid, remote_port,
132					    1);
133		}
134	}
135}
136
137/**********************************************************************
138 **********************************************************************/
139static void
140__osm_ucast_mgr_process_neighbor(IN osm_ucast_mgr_t * const p_mgr,
141				 IN osm_switch_t * const p_this_sw,
142				 IN osm_switch_t * const p_remote_sw,
143				 IN const uint8_t port_num,
144				 IN const uint8_t remote_port_num)
145{
146	osm_switch_t *p_sw, *p_next_sw;
147	uint16_t lid_ho;
148	uint8_t hops;
149
150	OSM_LOG_ENTER(p_mgr->p_log);
151
152	OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG,
153		"Node 0x%" PRIx64 ", remote node 0x%" PRIx64
154		", port %u, remote port %u\n",
155		cl_ntoh64(osm_node_get_node_guid(p_this_sw->p_node)),
156		cl_ntoh64(osm_node_get_node_guid(p_remote_sw->p_node)),
157		port_num, remote_port_num);
158
159	p_next_sw = (osm_switch_t *) cl_qmap_head(&p_mgr->p_subn->sw_guid_tbl);
160	while (p_next_sw !=
161	       (osm_switch_t *) cl_qmap_end(&p_mgr->p_subn->sw_guid_tbl)) {
162		p_sw = p_next_sw;
163		p_next_sw = (osm_switch_t *) cl_qmap_next(&p_sw->map_item);
164		lid_ho = osm_node_get_base_lid(p_sw->p_node, 0);
165		lid_ho = cl_ntoh16(lid_ho);
166		hops = osm_switch_get_least_hops(p_remote_sw, lid_ho);
167		if (hops == OSM_NO_PATH)
168			continue;
169		hops++;
170		if (hops <
171		    osm_switch_get_hop_count(p_this_sw, lid_ho, port_num)) {
172			if (osm_switch_set_hops
173			    (p_this_sw, lid_ho, port_num, hops) != 0)
174				OSM_LOG(p_mgr->p_log, OSM_LOG_ERROR,
175					"cannot set hops for lid %u at switch 0x%"
176					PRIx64 "\n", lid_ho,
177					cl_ntoh64(osm_node_get_node_guid
178						  (p_this_sw->p_node)));
179			p_mgr->some_hop_count_set = TRUE;
180		}
181	}
182
183	OSM_LOG_EXIT(p_mgr->p_log);
184}
185
186/**********************************************************************
187 **********************************************************************/
188static struct osm_remote_node *
189find_and_add_remote_sys(osm_switch_t *sw, uint8_t port,
190			struct osm_remote_guids_count *r)
191{
192	unsigned i;
193	osm_physp_t *p = osm_node_get_physp_ptr(sw->p_node, port);
194	osm_node_t *node = p->p_remote_physp->p_node;
195
196	for (i = 0; i < r->count; i++)
197		if (r->guids[i].node == node)
198			return &r->guids[i];
199
200	r->guids[i].node = node;
201	r->guids[i].forwarded_to = 0;
202	r->count++;
203	return &r->guids[i];
204}
205
206static void
207__osm_ucast_mgr_process_port(IN osm_ucast_mgr_t * const p_mgr,
208			     IN osm_switch_t * const p_sw,
209			     IN osm_port_t * const p_port,
210			     IN unsigned lid_offset)
211{
212	uint16_t min_lid_ho;
213	uint16_t max_lid_ho;
214	uint16_t lid_ho;
215	uint8_t port;
216	boolean_t is_ignored_by_port_prof;
217	ib_net64_t node_guid;
218	unsigned start_from = 1;
219
220	OSM_LOG_ENTER(p_mgr->p_log);
221
222	osm_port_get_lid_range_ho(p_port, &min_lid_ho, &max_lid_ho);
223
224	/* If the lids are zero - then there was some problem with
225	 * the initialization. Don't handle this port. */
226	if (min_lid_ho == 0 || max_lid_ho == 0) {
227		OSM_LOG(p_mgr->p_log, OSM_LOG_ERROR, "ERR 3A04: "
228			"Port 0x%" PRIx64 " has LID 0. An initialization "
229			"error occurred. Ignoring port\n",
230			cl_ntoh64(osm_port_get_guid(p_port)));
231		goto Exit;
232	}
233
234	lid_ho = min_lid_ho + lid_offset;
235
236	if (lid_ho > max_lid_ho)
237		goto Exit;
238
239	if (lid_offset)
240		/* ignore potential overflow - it is handled in osm_switch.c */
241		start_from = osm_switch_get_port_by_lid(p_sw, lid_ho - 1) + 1;
242
243	OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG,
244		"Processing port 0x%" PRIx64 " (\'%s\' port %u), LID %u [%u,%u]\n",
245		cl_ntoh64(osm_port_get_guid(p_port)),
246		p_port->p_node->print_desc, p_port->p_physp->port_num,
247		lid_ho, min_lid_ho, max_lid_ho);
248
249	/* TODO - This should be runtime error, not a CL_ASSERT() */
250	CL_ASSERT(max_lid_ho <= IB_LID_UCAST_END_HO);
251
252	node_guid = osm_node_get_node_guid(p_sw->p_node);
253
254	/*
255	   The lid matrix contains the number of hops to each
256	   lid from each port.  From this information we determine
257	   how best to distribute the LID range across the ports
258	   that can reach those LIDs.
259	 */
260	port = osm_switch_recommend_path(p_sw, p_port, lid_ho, start_from,
261					 p_mgr->p_subn->ignore_existing_lfts,
262					 p_mgr->is_dor);
263
264	if (port == OSM_NO_PATH) {
265		/* do not try to overwrite the ppro of non existing port ... */
266		is_ignored_by_port_prof = TRUE;
267
268		OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG,
269			"No path to get to LID %u from switch 0x%" PRIx64 "\n",
270			lid_ho, cl_ntoh64(node_guid));
271	} else {
272		osm_physp_t *p = osm_node_get_physp_ptr(p_sw->p_node, port);
273
274		OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG,
275			"Routing LID %u to port %u"
276			" for switch 0x%" PRIx64 "\n",
277			lid_ho, port, cl_ntoh64(node_guid));
278
279		/*
280		   we would like to optionally ignore this port in equalization
281		   as in the case of the Mellanox Anafa Internal PCI TCA port
282		 */
283		is_ignored_by_port_prof = p->is_prof_ignored;
284
285		/*
286		   We also would ignore this route if the target lid is of
287		   a switch and the port_profile_switch_node is not TRUE
288		 */
289		if (!p_mgr->p_subn->opt.port_profile_switch_nodes)
290			is_ignored_by_port_prof |=
291			    (osm_node_get_type(p_port->p_node) ==
292			     IB_NODE_TYPE_SWITCH);
293	}
294
295	/*
296	   We have selected the port for this LID.
297	   Write it to the forwarding tables.
298	 */
299	p_sw->new_lft[lid_ho] = port;
300	if (!is_ignored_by_port_prof) {
301		struct osm_remote_node *rem_node_used;
302		osm_switch_count_path(p_sw, port);
303		if (port > 0 && p_port->priv &&
304		    (rem_node_used = find_and_add_remote_sys(p_sw, port,
305							     p_port->priv)))
306			rem_node_used->forwarded_to++;
307	}
308
309Exit:
310	OSM_LOG_EXIT(p_mgr->p_log);
311}
312
313/**********************************************************************
314 **********************************************************************/
315int osm_ucast_mgr_set_fwd_table(IN osm_ucast_mgr_t * const p_mgr,
316				IN osm_switch_t * const p_sw)
317{
318	osm_node_t *p_node;
319	osm_dr_path_t *p_path;
320	osm_madw_context_t context;
321	ib_api_status_t status;
322	ib_switch_info_t si;
323	uint16_t block_id_ho = 0;
324	uint8_t block[IB_SMP_DATA_SIZE];
325	boolean_t set_swinfo_require = FALSE;
326	uint16_t lin_top;
327	uint8_t life_state;
328
329	CL_ASSERT(p_mgr);
330
331	OSM_LOG_ENTER(p_mgr->p_log);
332
333	CL_ASSERT(p_sw);
334
335	p_node = p_sw->p_node;
336
337	CL_ASSERT(p_node);
338
339	p_path = osm_physp_get_dr_path_ptr(osm_node_get_physp_ptr(p_node, 0));
340
341	/*
342	   Set the top of the unicast forwarding table.
343	 */
344	si = p_sw->switch_info;
345	lin_top = cl_hton16(p_sw->max_lid_ho);
346	if (lin_top != si.lin_top) {
347		set_swinfo_require = TRUE;
348		si.lin_top = lin_top;
349	}
350
351	/* check to see if the change state bit is on. If it is - then we
352	   need to clear it. */
353	if (ib_switch_info_get_state_change(&si))
354		life_state = ((p_mgr->p_subn->opt.packet_life_time << 3)
355			      | (si.life_state & IB_SWITCH_PSC)) & 0xfc;
356	else
357		life_state = (p_mgr->p_subn->opt.packet_life_time << 3) & 0xf8;
358
359	if ((life_state != si.life_state)
360	    || ib_switch_info_get_state_change(&si)) {
361		set_swinfo_require = TRUE;
362		si.life_state = life_state;
363	}
364
365	if (set_swinfo_require) {
366		OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG,
367			"Setting switch FT top to LID %u\n", p_sw->max_lid_ho);
368
369		context.si_context.light_sweep = FALSE;
370		context.si_context.node_guid = osm_node_get_node_guid(p_node);
371		context.si_context.set_method = TRUE;
372
373		status = osm_req_set(p_mgr->sm, p_path, (uint8_t *) & si,
374				     sizeof(si),
375				     IB_MAD_ATTR_SWITCH_INFO,
376				     0, CL_DISP_MSGID_NONE, &context);
377
378		if (status != IB_SUCCESS)
379			OSM_LOG(p_mgr->p_log, OSM_LOG_ERROR, "ERR 3A06: "
380				"Sending SwitchInfo attribute failed (%s)\n",
381				ib_get_err_str(status));
382	}
383
384	/*
385	   Send linear forwarding table blocks to the switch
386	   as long as the switch indicates it has blocks needing
387	   configuration.
388	 */
389
390	context.lft_context.node_guid = osm_node_get_node_guid(p_node);
391	context.lft_context.set_method = TRUE;
392
393	if (!p_sw->new_lft) {
394		/* any routing should provide the new_lft */
395		CL_ASSERT(p_mgr->p_subn->opt.use_ucast_cache &&
396			  p_mgr->cache_valid && !p_sw->need_update);
397		goto Exit;
398	}
399
400	for (block_id_ho = 0;
401	     osm_switch_get_lft_block(p_sw, block_id_ho, block);
402	     block_id_ho++) {
403		if (!p_sw->need_update &&
404		    !memcmp(block,
405			    p_sw->new_lft + block_id_ho * IB_SMP_DATA_SIZE,
406			    IB_SMP_DATA_SIZE))
407			continue;
408
409		OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG,
410			"Writing FT block %u\n", block_id_ho);
411
412		status = osm_req_set(p_mgr->sm, p_path,
413				     p_sw->new_lft + block_id_ho * IB_SMP_DATA_SIZE,
414				     sizeof(block),
415				     IB_MAD_ATTR_LIN_FWD_TBL,
416				     cl_hton32(block_id_ho),
417				     CL_DISP_MSGID_NONE, &context);
418
419		if (status != IB_SUCCESS)
420			OSM_LOG(p_mgr->p_log, OSM_LOG_ERROR, "ERR 3A05: "
421				"Sending linear fwd. tbl. block failed (%s)\n",
422				ib_get_err_str(status));
423	}
424
425Exit:
426	OSM_LOG_EXIT(p_mgr->p_log);
427	return 0;
428}
429
430/**********************************************************************
431 **********************************************************************/
432static void alloc_ports_priv(osm_ucast_mgr_t *mgr)
433{
434	cl_qmap_t *port_tbl = &mgr->p_subn->port_guid_tbl;
435	struct osm_remote_guids_count *r;
436	osm_port_t *port;
437	cl_map_item_t *item;
438	unsigned lmc;
439
440	for (item = cl_qmap_head(port_tbl); item != cl_qmap_end(port_tbl);
441	     item = cl_qmap_next(item)) {
442		port = (osm_port_t *)item;
443		lmc = ib_port_info_get_lmc(&port->p_physp->port_info);
444		if (!lmc)
445			continue;
446		r = malloc(sizeof(*r) + sizeof(r->guids[0]) * (1 << lmc));
447		if (!r) {
448			OSM_LOG(mgr->p_log, OSM_LOG_ERROR, "ERR 3A09: "
449				"cannot allocate memory to track remote"
450				" systems for lmc > 0\n");
451			port->priv = NULL;
452			continue;
453		}
454		memset(r, 0, sizeof(*r) + sizeof(r->guids[0]) * (1 << lmc));
455		port->priv = r;
456	}
457}
458
459static void free_ports_priv(osm_ucast_mgr_t *mgr)
460{
461	cl_qmap_t *port_tbl = &mgr->p_subn->port_guid_tbl;
462	osm_port_t *port;
463	cl_map_item_t *item;
464	for (item = cl_qmap_head(port_tbl); item != cl_qmap_end(port_tbl);
465	     item = cl_qmap_next(item)) {
466		port = (osm_port_t *)item;
467		if (port->priv) {
468			free(port->priv);
469			port->priv = NULL;
470		}
471	}
472}
473
474static void
475__osm_ucast_mgr_process_tbl(IN cl_map_item_t * const p_map_item,
476			    IN void *context)
477{
478	osm_ucast_mgr_t *p_mgr = context;
479	osm_switch_t *const p_sw = (osm_switch_t *) p_map_item;
480	unsigned i, lids_per_port;
481
482	OSM_LOG_ENTER(p_mgr->p_log);
483
484	CL_ASSERT(p_sw && p_sw->p_node);
485
486	OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG,
487		"Processing switch 0x%" PRIx64 "\n",
488		cl_ntoh64(osm_node_get_node_guid(p_sw->p_node)));
489
490	/* Initialize LIDs in buffer to invalid port number. */
491	memset(p_sw->new_lft, OSM_NO_PATH, IB_LID_UCAST_END_HO + 1);
492
493	if (p_mgr->p_subn->opt.lmc)
494		alloc_ports_priv(p_mgr);
495
496	/*
497	   Iterate through every port setting LID routes for each
498	   port based on base LID and LMC value.
499	 */
500	lids_per_port = 1 << p_mgr->p_subn->opt.lmc;
501	for (i = 0; i < lids_per_port; i++) {
502		cl_qlist_t *list = &p_mgr->port_order_list;
503		cl_list_item_t *item;
504		for (item = cl_qlist_head(list); item != cl_qlist_end(list);
505		     item = cl_qlist_next(item)) {
506			osm_port_t *port = cl_item_obj(item, port, list_item);
507			__osm_ucast_mgr_process_port(p_mgr, p_sw, port, i);
508		}
509	}
510
511	osm_ucast_mgr_set_fwd_table(p_mgr, p_sw);
512
513	if (p_mgr->p_subn->opt.lmc)
514		free_ports_priv(p_mgr);
515
516	OSM_LOG_EXIT(p_mgr->p_log);
517}
518
519/**********************************************************************
520 **********************************************************************/
521static void
522__osm_ucast_mgr_process_neighbors(IN cl_map_item_t * const p_map_item,
523				  IN void *context)
524{
525	osm_switch_t *const p_sw = (osm_switch_t *) p_map_item;
526	osm_ucast_mgr_t *const p_mgr = (osm_ucast_mgr_t *) context;
527	osm_node_t *p_node;
528	osm_node_t *p_remote_node;
529	uint32_t port_num;
530	uint8_t remote_port_num;
531	uint32_t num_ports;
532	osm_physp_t *p_physp;
533
534	OSM_LOG_ENTER(p_mgr->p_log);
535
536	p_node = p_sw->p_node;
537
538	CL_ASSERT(p_node);
539	CL_ASSERT(osm_node_get_type(p_node) == IB_NODE_TYPE_SWITCH);
540
541	OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG,
542		"Processing switch with GUID 0x%" PRIx64 "\n",
543		cl_ntoh64(osm_node_get_node_guid(p_node)));
544
545	num_ports = osm_node_get_num_physp(p_node);
546
547	/*
548	   Start with port 1 to skip the switch's management port.
549	 */
550	for (port_num = 1; port_num < num_ports; port_num++) {
551		p_remote_node = osm_node_get_remote_node(p_node,
552							 (uint8_t) port_num,
553							 &remote_port_num);
554
555		if (p_remote_node && p_remote_node->sw
556		    && (p_remote_node != p_node)) {
557			/* make sure the link is healthy. If it is not - don't
558			   propagate through it. */
559			p_physp = osm_node_get_physp_ptr(p_node, port_num);
560			if (!p_physp || !osm_link_is_healthy(p_physp))
561				continue;
562
563			__osm_ucast_mgr_process_neighbor(p_mgr, p_sw,
564							 p_remote_node->sw,
565							 (uint8_t) port_num,
566							 remote_port_num);
567
568		}
569	}
570
571	OSM_LOG_EXIT(p_mgr->p_log);
572}
573
574/**********************************************************************
575 **********************************************************************/
576int osm_ucast_mgr_build_lid_matrices(IN osm_ucast_mgr_t * const p_mgr)
577{
578	uint32_t i;
579	uint32_t iteration_max;
580	cl_qmap_t *p_sw_guid_tbl;
581
582	p_sw_guid_tbl = &p_mgr->p_subn->sw_guid_tbl;
583
584	OSM_LOG(p_mgr->p_log, OSM_LOG_VERBOSE,
585		"Starting switches' Min Hop Table Assignment\n");
586
587	/*
588	   Set the switch matrices for each switch's own port 0 LID(s)
589	   then set the lid matrices for the each switch's leaf nodes.
590	 */
591	cl_qmap_apply_func(p_sw_guid_tbl,
592			   __osm_ucast_mgr_process_hop_0_1, p_mgr);
593
594	/*
595	   Get the switch matrices for each switch's neighbors.
596	   This process requires a number of iterations equal to
597	   the number of switches in the subnet minus 1.
598
599	   In each iteration, a switch learns the lid/port/hop
600	   information (as contained by a switch's lid matrix) from
601	   its immediate neighbors.  After each iteration, a switch
602	   (and it's neighbors) know more routing information than
603	   it did on the previous iteration.
604	   Thus, by repeatedly absorbing the routing information of
605	   neighbor switches, every switch eventually learns how to
606	   route all LIDs on the subnet.
607
608	   Note that there may not be any switches in the subnet if
609	   we are in simple p2p configuration.
610	 */
611	iteration_max = cl_qmap_count(p_sw_guid_tbl);
612
613	/*
614	   If there are switches in the subnet, iterate until the lid
615	   matrix has been constructed.  Otherwise, just immediately
616	   indicate we're done if no switches exist.
617	 */
618	if (iteration_max) {
619		iteration_max--;
620
621		/*
622		   we need to find out when the propagation of
623		   hop counts has relaxed. So this global variable
624		   is preset to 0 on each iteration and if
625		   if non of the switches was set will exit the
626		   while loop
627		 */
628		p_mgr->some_hop_count_set = TRUE;
629		for (i = 0; (i < iteration_max) && p_mgr->some_hop_count_set;
630		     i++) {
631			p_mgr->some_hop_count_set = FALSE;
632			cl_qmap_apply_func(p_sw_guid_tbl,
633					   __osm_ucast_mgr_process_neighbors,
634					   p_mgr);
635		}
636		OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG,
637			"Min-hop propagated in %d steps\n", i);
638	}
639
640	return 0;
641}
642
643/**********************************************************************
644 **********************************************************************/
645static int ucast_mgr_setup_all_switches(osm_subn_t * p_subn)
646{
647	osm_switch_t *p_sw;
648	uint16_t lids;
649
650	lids = (uint16_t) cl_ptr_vector_get_size(&p_subn->port_lid_tbl);
651	lids = lids ? lids - 1 : 0;
652
653	for (p_sw = (osm_switch_t *) cl_qmap_head(&p_subn->sw_guid_tbl);
654	     p_sw != (osm_switch_t *) cl_qmap_end(&p_subn->sw_guid_tbl);
655	     p_sw = (osm_switch_t *) cl_qmap_next(&p_sw->map_item))
656		if (osm_switch_prepare_path_rebuild(p_sw, lids)) {
657			OSM_LOG(&p_subn->p_osm->log, OSM_LOG_ERROR, "ERR 3A0B: "
658				"cannot setup switch 0x%016" PRIx64 "\n",
659				cl_ntoh64(osm_node_get_node_guid
660					  (p_sw->p_node)));
661			return -1;
662		}
663
664	return 0;
665}
666
667/**********************************************************************
668 **********************************************************************/
669
670static int add_guid_to_order_list(void *ctx, uint64_t guid, char *p)
671{
672	osm_ucast_mgr_t *m = ctx;
673	osm_port_t *port = osm_get_port_by_guid(m->p_subn, cl_hton64(guid));
674
675	if (!port) {
676		OSM_LOG(m->p_log, OSM_LOG_DEBUG,
677			"port guid not found: 0x%016" PRIx64 "\n", guid);
678		return 0;
679	}
680
681	if (port->flag) {
682		OSM_LOG(m->p_log, OSM_LOG_DEBUG,
683			"port guid specified multiple times 0x%016" PRIx64 "\n",
684			guid);
685		return 0;
686	}
687
688	cl_qlist_insert_tail(&m->port_order_list, &port->list_item);
689	port->flag = 1;
690
691	return 0;
692}
693
694static void add_port_to_order_list(cl_map_item_t * const p_map_item, void *ctx)
695{
696	osm_port_t *port = (osm_port_t *)p_map_item;
697	osm_ucast_mgr_t *m = ctx;
698
699	if (!port->flag)
700		cl_qlist_insert_tail(&m->port_order_list, &port->list_item);
701	else
702		port->flag = 0;
703}
704
705static int mark_ignored_port(void *ctx, uint64_t guid, char *p)
706{
707	osm_ucast_mgr_t *m = ctx;
708	osm_node_t *node = osm_get_node_by_guid(m->p_subn, cl_hton64(guid));
709	osm_physp_t *physp;
710	unsigned port;
711
712	if (!node || !node->sw) {
713		OSM_LOG(m->p_log, OSM_LOG_DEBUG,
714			"switch with guid 0x%016" PRIx64 " is not found\n",
715			guid);
716		return 0;
717	}
718
719	if (!p || !*p || !(port = strtoul(p, NULL, 0)) ||
720	    port >= node->sw->num_ports) {
721		OSM_LOG(m->p_log, OSM_LOG_DEBUG,
722			"bad port specified for guid 0x%016" PRIx64 "\n", guid);
723		return 0;
724	}
725
726	physp = osm_node_get_physp_ptr(node, port);
727	if (!physp)
728		return 0;
729
730	physp->is_prof_ignored = 1;
731
732	return 0;
733}
734
735static void clear_prof_ignore_flag(cl_map_item_t * const p_map_item, void *ctx)
736{
737	osm_switch_t *sw = (osm_switch_t *)p_map_item;
738	int i;
739
740	for (i = 1; i < sw->num_ports; i++) {
741		osm_physp_t *p = osm_node_get_physp_ptr(sw->p_node, i);
742		if (p)
743			p->is_prof_ignored = 0;
744	}
745}
746
747static int ucast_mgr_build_lfts(osm_ucast_mgr_t *p_mgr)
748{
749	cl_qlist_init(&p_mgr->port_order_list);
750
751	if (p_mgr->p_subn->opt.guid_routing_order_file) {
752		OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG,
753			"Fetching guid routing order file \'%s\'\n",
754			p_mgr->p_subn->opt.guid_routing_order_file);
755
756		if (parse_node_map(p_mgr->p_subn->opt.guid_routing_order_file,
757				   add_guid_to_order_list, p_mgr))
758			OSM_LOG(p_mgr->p_log, OSM_LOG_ERROR, "ERR : "
759				"cannot parse guid routing order file \'%s\'\n",
760				p_mgr->p_subn->opt.guid_routing_order_file);
761	}
762
763	if (p_mgr->p_subn->opt.port_prof_ignore_file) {
764		cl_qmap_apply_func(&p_mgr->p_subn->sw_guid_tbl,
765				   clear_prof_ignore_flag, NULL);
766		if (parse_node_map(p_mgr->p_subn->opt.port_prof_ignore_file,
767				   mark_ignored_port, p_mgr)) {
768			OSM_LOG(p_mgr->p_log, OSM_LOG_ERROR, "ERR : "
769				"cannot parse port prof ignore file \'%s\'\n",
770				p_mgr->p_subn->opt.port_prof_ignore_file);
771		}
772	}
773
774	cl_qmap_apply_func(&p_mgr->p_subn->port_guid_tbl,
775			   add_port_to_order_list, p_mgr);
776
777	cl_qmap_apply_func(&p_mgr->p_subn->sw_guid_tbl,
778			   __osm_ucast_mgr_process_tbl, p_mgr);
779
780	cl_qlist_remove_all(&p_mgr->port_order_list);
781
782	return 0;
783}
784
785/**********************************************************************
786 **********************************************************************/
787static int ucast_mgr_route(struct osm_routing_engine *r, osm_opensm_t *osm)
788{
789	int ret;
790
791	OSM_LOG(&osm->log, OSM_LOG_VERBOSE,
792		"building routing with \'%s\' routing algorithm...\n", r->name);
793
794	if (!r->build_lid_matrices ||
795	    (ret = r->build_lid_matrices(r->context)) > 0)
796		ret = osm_ucast_mgr_build_lid_matrices(&osm->sm.ucast_mgr);
797
798	if (ret < 0) {
799		OSM_LOG(&osm->log, OSM_LOG_ERROR,
800			"%s: cannot build lid matrices.\n", r->name);
801		return ret;
802	}
803
804	if (!r->ucast_build_fwd_tables ||
805	    (ret = r->ucast_build_fwd_tables(r->context)) > 0)
806		ret = ucast_mgr_build_lfts(&osm->sm.ucast_mgr);
807
808	if (ret < 0) {
809		OSM_LOG(&osm->log, OSM_LOG_ERROR,
810			"%s: cannot build fwd tables.\n", r->name);
811		return ret;
812	}
813
814	osm->routing_engine_used = osm_routing_engine_type(r->name);
815
816	return 0;
817}
818
819int osm_ucast_mgr_process(IN osm_ucast_mgr_t * const p_mgr)
820{
821	osm_opensm_t *p_osm;
822	struct osm_routing_engine *p_routing_eng;
823	cl_qmap_t *p_sw_guid_tbl;
824
825	OSM_LOG_ENTER(p_mgr->p_log);
826
827	p_sw_guid_tbl = &p_mgr->p_subn->sw_guid_tbl;
828	p_osm = p_mgr->p_subn->p_osm;
829	p_routing_eng = p_osm->routing_engine_list;
830
831	CL_PLOCK_EXCL_ACQUIRE(p_mgr->p_lock);
832
833	/*
834	   If there are no switches in the subnet, we are done.
835	 */
836	if (cl_qmap_count(p_sw_guid_tbl) == 0 ||
837	    ucast_mgr_setup_all_switches(p_mgr->p_subn) < 0)
838		goto Exit;
839
840	p_osm->routing_engine_used = OSM_ROUTING_ENGINE_TYPE_NONE;
841	while (p_routing_eng) {
842		if (!ucast_mgr_route(p_routing_eng, p_osm))
843			break;
844		p_routing_eng = p_routing_eng->next;
845	}
846
847	if (p_osm->routing_engine_used == OSM_ROUTING_ENGINE_TYPE_NONE) {
848		/* If configured routing algorithm failed, use default MinHop */
849		osm_ucast_mgr_build_lid_matrices(p_mgr);
850		ucast_mgr_build_lfts(p_mgr);
851		p_osm->routing_engine_used = OSM_ROUTING_ENGINE_TYPE_MINHOP;
852	}
853
854	OSM_LOG(p_mgr->p_log, OSM_LOG_INFO,
855		"%s tables configured on all switches\n",
856		osm_routing_engine_type_str(p_osm->routing_engine_used));
857
858	if (p_mgr->p_subn->opt.use_ucast_cache)
859		p_mgr->cache_valid = TRUE;
860
861Exit:
862	CL_PLOCK_RELEASE(p_mgr->p_lock);
863	OSM_LOG_EXIT(p_mgr->p_log);
864	return 0;
865}
866
867static int ucast_build_lid_matrices(void *context)
868{
869	return osm_ucast_mgr_build_lid_matrices(context);
870}
871
872static int ucast_build_lfts(void *context)
873{
874	return ucast_mgr_build_lfts(context);
875}
876
877int osm_ucast_minhop_setup(struct osm_routing_engine *r, osm_opensm_t *osm)
878{
879	r->context = &osm->sm.ucast_mgr;
880	r->build_lid_matrices = ucast_build_lid_matrices;
881	r->ucast_build_fwd_tables = ucast_build_lfts;
882	return 0;
883}
884
885static int ucast_dor_build_lfts(void *context)
886{
887	osm_ucast_mgr_t *mgr = context;
888	int ret;
889
890	mgr->is_dor = 1;
891	ret = ucast_mgr_build_lfts(mgr);
892	mgr->is_dor = 0;
893
894	return ret;
895}
896
897int osm_ucast_dor_setup(struct osm_routing_engine *r, osm_opensm_t *osm)
898{
899	r->context = &osm->sm.ucast_mgr;
900	r->build_lid_matrices = ucast_build_lid_matrices;
901	r->ucast_build_fwd_tables = ucast_dor_build_lfts;
902	return 0;
903}
904