1219820Sjeff/*
2219820Sjeff * Copyright (c) 2004-2008 Voltaire, Inc. All rights reserved.
3219820Sjeff * Copyright (c) 2002-2007 Mellanox Technologies LTD. All rights reserved.
4219820Sjeff * Copyright (c) 1996-2003 Intel Corporation. All rights reserved.
5219820Sjeff *
6219820Sjeff * This software is available to you under a choice of one of two
7219820Sjeff * licenses.  You may choose to be licensed under the terms of the GNU
8219820Sjeff * General Public License (GPL) Version 2, available from the file
9219820Sjeff * COPYING in the main directory of this source tree, or the
10219820Sjeff * OpenIB.org BSD license below:
11219820Sjeff *
12219820Sjeff *     Redistribution and use in source and binary forms, with or
13219820Sjeff *     without modification, are permitted provided that the following
14219820Sjeff *     conditions are met:
15219820Sjeff *
16219820Sjeff *      - Redistributions of source code must retain the above
17219820Sjeff *        copyright notice, this list of conditions and the following
18219820Sjeff *        disclaimer.
19219820Sjeff *
20219820Sjeff *      - Redistributions in binary form must reproduce the above
21219820Sjeff *        copyright notice, this list of conditions and the following
22219820Sjeff *        disclaimer in the documentation and/or other materials
23219820Sjeff *        provided with the distribution.
24219820Sjeff *
25219820Sjeff * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
26219820Sjeff * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
27219820Sjeff * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
28219820Sjeff * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
29219820Sjeff * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
30219820Sjeff * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
31219820Sjeff * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
32219820Sjeff * SOFTWARE.
33219820Sjeff *
34219820Sjeff */
35219820Sjeff
36219820Sjeff/*
37219820Sjeff * Abstract:
38219820Sjeff *    Implementation of OpenSM FatTree routing
39219820Sjeff */
40219820Sjeff
41219820Sjeff#if HAVE_CONFIG_H
42219820Sjeff#  include <config.h>
43219820Sjeff#endif
44219820Sjeff
45219820Sjeff#include <stdlib.h>
46219820Sjeff#include <string.h>
47219820Sjeff#include <ctype.h>
48219820Sjeff#include <errno.h>
49219820Sjeff#include <iba/ib_types.h>
50219820Sjeff#include <complib/cl_qmap.h>
51219820Sjeff#include <complib/cl_debug.h>
52219820Sjeff#include <opensm/osm_opensm.h>
53219820Sjeff#include <opensm/osm_switch.h>
54219820Sjeff
55219820Sjeff/*
56219820Sjeff * FatTree rank is bounded between 2 and 8:
57219820Sjeff *  - Tree of rank 1 has only trivial routing paths,
58219820Sjeff *    so no need to use FatTree routing.
59219820Sjeff *  - Why maximum rank is 8:
60219820Sjeff *    Each node (switch) is assigned a unique tuple.
61219820Sjeff *    Switches are stored in two cl_qmaps - one is
62219820Sjeff *    ordered by guid, and the other by a key that is
63219820Sjeff *    generated from tuple. Since cl_qmap supports only
64219820Sjeff *    a 64-bit key, the maximal tuple lenght is 8 bytes.
65219820Sjeff *    which means that maximal tree rank is 8.
66219820Sjeff * Note that the above also implies that each switch
67219820Sjeff * can have at max 255 up/down ports.
68219820Sjeff */
69219820Sjeff
70219820Sjeff#define FAT_TREE_MIN_RANK 2
71219820Sjeff#define FAT_TREE_MAX_RANK 8
72219820Sjeff
73219820Sjefftypedef enum {
74219820Sjeff	FTREE_DIRECTION_DOWN = -1,
75219820Sjeff	FTREE_DIRECTION_SAME,
76219820Sjeff	FTREE_DIRECTION_UP
77219820Sjeff} ftree_direction_t;
78219820Sjeff
79219820Sjeff/***************************************************
80219820Sjeff **
81219820Sjeff **  Forward references
82219820Sjeff **
83219820Sjeff ***************************************************/
84219820Sjeff
85219820Sjeffstruct ftree_sw_t_;
86219820Sjeffstruct ftree_hca_t_;
87219820Sjeffstruct ftree_port_t_;
88219820Sjeffstruct ftree_port_group_t_;
89219820Sjeffstruct ftree_fabric_t_;
90219820Sjeff
91219820Sjeff/***************************************************
92219820Sjeff **
93219820Sjeff **  ftree_tuple_t definition
94219820Sjeff **
95219820Sjeff ***************************************************/
96219820Sjeff
97219820Sjeff#define FTREE_TUPLE_BUFF_LEN 1024
98219820Sjeff#define FTREE_TUPLE_LEN 8
99219820Sjeff
100219820Sjefftypedef uint8_t ftree_tuple_t[FTREE_TUPLE_LEN];
101219820Sjefftypedef uint64_t ftree_tuple_key_t;
102219820Sjeff
103219820Sjeffstruct guid_list_item {
104219820Sjeff	cl_list_item_t list;
105219820Sjeff	uint64_t guid;
106219820Sjeff};
107219820Sjeff
108219820Sjeff/***************************************************
109219820Sjeff **
110219820Sjeff **  ftree_sw_table_element_t definition
111219820Sjeff **
112219820Sjeff ***************************************************/
113219820Sjeff
114219820Sjefftypedef struct {
115219820Sjeff	cl_map_item_t map_item;
116219820Sjeff	struct ftree_sw_t_ *p_sw;
117219820Sjeff} ftree_sw_tbl_element_t;
118219820Sjeff
119219820Sjeff/***************************************************
120219820Sjeff **
121219820Sjeff **  ftree_port_t definition
122219820Sjeff **
123219820Sjeff ***************************************************/
124219820Sjeff
125219820Sjefftypedef struct ftree_port_t_ {
126219820Sjeff	cl_map_item_t map_item;
127219820Sjeff	uint8_t port_num;	/* port number on the current node */
128219820Sjeff	uint8_t remote_port_num;	/* port number on the remote node */
129219820Sjeff	uint32_t counter_up;	/* number of allocated routs upwards */
130219820Sjeff	uint32_t counter_down;	/* number of allocated routs downwards */
131219820Sjeff} ftree_port_t;
132219820Sjeff
133219820Sjeff/***************************************************
134219820Sjeff **
135219820Sjeff **  ftree_port_group_t definition
136219820Sjeff **
137219820Sjeff ***************************************************/
138219820Sjeff
139219820Sjefftypedef union ftree_hca_or_sw_ {
140219820Sjeff	struct ftree_hca_t_ *p_hca;
141219820Sjeff	struct ftree_sw_t_ *p_sw;
142219820Sjeff} ftree_hca_or_sw;
143219820Sjeff
144219820Sjefftypedef struct ftree_port_group_t_ {
145219820Sjeff	cl_map_item_t map_item;
146219820Sjeff	ib_net16_t base_lid;	/* base lid of the current node */
147219820Sjeff	ib_net16_t remote_base_lid;	/* base lid of the remote node */
148219820Sjeff	ib_net64_t port_guid;	/* port guid of this port */
149219820Sjeff	ib_net64_t node_guid;	/* this node's guid */
150219820Sjeff	uint8_t node_type;	/* this node's type */
151219820Sjeff	ib_net64_t remote_port_guid;	/* port guid of the remote port */
152219820Sjeff	ib_net64_t remote_node_guid;	/* node guid of the remote node */
153219820Sjeff	uint8_t remote_node_type;	/* IB_NODE_TYPE_{CA,SWITCH,ROUTER,...} */
154219820Sjeff	ftree_hca_or_sw hca_or_sw;	/* pointer to this hca/switch */
155219820Sjeff	ftree_hca_or_sw remote_hca_or_sw;	/* pointer to remote hca/switch */
156219820Sjeff	cl_ptr_vector_t ports;	/* vector of ports to the same lid */
157219820Sjeff	boolean_t is_cn;	/* whether this port is a compute node */
158219820Sjeff	uint32_t counter_down;	/* number of allocated routs downwards */
159219820Sjeff} ftree_port_group_t;
160219820Sjeff
161219820Sjeff/***************************************************
162219820Sjeff **
163219820Sjeff **  ftree_sw_t definition
164219820Sjeff **
165219820Sjeff ***************************************************/
166219820Sjeff
167219820Sjefftypedef struct ftree_sw_t_ {
168219820Sjeff	cl_map_item_t map_item;
169219820Sjeff	osm_switch_t *p_osm_sw;
170219820Sjeff	uint32_t rank;
171219820Sjeff	ftree_tuple_t tuple;
172219820Sjeff	ib_net16_t base_lid;
173219820Sjeff	ftree_port_group_t **down_port_groups;
174219820Sjeff	uint8_t down_port_groups_num;
175219820Sjeff	ftree_port_group_t **up_port_groups;
176219820Sjeff	uint8_t up_port_groups_num;
177219820Sjeff	boolean_t is_leaf;
178219820Sjeff	int down_port_groups_idx;
179219820Sjeff} ftree_sw_t;
180219820Sjeff
181219820Sjeff/***************************************************
182219820Sjeff **
183219820Sjeff **  ftree_hca_t definition
184219820Sjeff **
185219820Sjeff ***************************************************/
186219820Sjeff
187219820Sjefftypedef struct ftree_hca_t_ {
188219820Sjeff	cl_map_item_t map_item;
189219820Sjeff	osm_node_t *p_osm_node;
190219820Sjeff	ftree_port_group_t **up_port_groups;
191219820Sjeff	uint16_t up_port_groups_num;
192219820Sjeff	unsigned cn_num;
193219820Sjeff} ftree_hca_t;
194219820Sjeff
195219820Sjeff/***************************************************
196219820Sjeff **
197219820Sjeff **  ftree_fabric_t definition
198219820Sjeff **
199219820Sjeff ***************************************************/
200219820Sjeff
201219820Sjefftypedef struct ftree_fabric_t_ {
202219820Sjeff	osm_opensm_t *p_osm;
203219820Sjeff	cl_qmap_t hca_tbl;
204219820Sjeff	cl_qmap_t sw_tbl;
205219820Sjeff	cl_qmap_t sw_by_tuple_tbl;
206219820Sjeff	cl_qlist_t root_guid_list;
207219820Sjeff	cl_qmap_t cn_guid_tbl;
208219820Sjeff	unsigned cn_num;
209219820Sjeff	uint8_t leaf_switch_rank;
210219820Sjeff	uint8_t max_switch_rank;
211219820Sjeff	ftree_sw_t **leaf_switches;
212219820Sjeff	uint32_t leaf_switches_num;
213219820Sjeff	uint16_t max_cn_per_leaf;
214219820Sjeff	uint16_t lft_max_lid_ho;
215219820Sjeff	boolean_t fabric_built;
216219820Sjeff} ftree_fabric_t;
217219820Sjeff
218219820Sjeff/***************************************************
219219820Sjeff **
220219820Sjeff ** comparators
221219820Sjeff **
222219820Sjeff ***************************************************/
223219820Sjeff
224219820Sjeffstatic int OSM_CDECL __osm_ftree_compare_switches_by_index(IN const void *p1,
225219820Sjeff							   IN const void *p2)
226219820Sjeff{
227219820Sjeff	ftree_sw_t **pp_sw1 = (ftree_sw_t **) p1;
228219820Sjeff	ftree_sw_t **pp_sw2 = (ftree_sw_t **) p2;
229219820Sjeff
230219820Sjeff	uint16_t i;
231219820Sjeff	for (i = 0; i < FTREE_TUPLE_LEN; i++) {
232219820Sjeff		if ((*pp_sw1)->tuple[i] > (*pp_sw2)->tuple[i])
233219820Sjeff			return 1;
234219820Sjeff		if ((*pp_sw1)->tuple[i] < (*pp_sw2)->tuple[i])
235219820Sjeff			return -1;
236219820Sjeff	}
237219820Sjeff	return 0;
238219820Sjeff}
239219820Sjeff
240219820Sjeff/***************************************************/
241219820Sjeff
242219820Sjeffstatic int OSM_CDECL
243219820Sjeff__osm_ftree_compare_port_groups_by_remote_switch_index(IN const void *p1,
244219820Sjeff						       IN const void *p2)
245219820Sjeff{
246219820Sjeff	ftree_port_group_t **pp_g1 = (ftree_port_group_t **) p1;
247219820Sjeff	ftree_port_group_t **pp_g2 = (ftree_port_group_t **) p2;
248219820Sjeff
249219820Sjeff	return
250219820Sjeff	    __osm_ftree_compare_switches_by_index(&
251219820Sjeff						  ((*pp_g1)->remote_hca_or_sw.
252219820Sjeff						   p_sw),
253219820Sjeff						  &((*pp_g2)->remote_hca_or_sw.
254219820Sjeff						    p_sw));
255219820Sjeff}
256219820Sjeff
257219820Sjeff/***************************************************
258219820Sjeff **
259219820Sjeff ** ftree_tuple_t functions
260219820Sjeff **
261219820Sjeff ***************************************************/
262219820Sjeff
263219820Sjeffstatic void __osm_ftree_tuple_init(IN ftree_tuple_t tuple)
264219820Sjeff{
265219820Sjeff	memset(tuple, 0xFF, FTREE_TUPLE_LEN);
266219820Sjeff}
267219820Sjeff
268219820Sjeff/***************************************************/
269219820Sjeff
270219820Sjeffstatic inline boolean_t __osm_ftree_tuple_assigned(IN ftree_tuple_t tuple)
271219820Sjeff{
272219820Sjeff	return (tuple[0] != 0xFF);
273219820Sjeff}
274219820Sjeff
275219820Sjeff/***************************************************/
276219820Sjeff
277219820Sjeff#define FTREE_TUPLE_BUFFERS_NUM 6
278219820Sjeff
279219820Sjeffstatic char *__osm_ftree_tuple_to_str(IN ftree_tuple_t tuple)
280219820Sjeff{
281219820Sjeff	static char buffer[FTREE_TUPLE_BUFFERS_NUM][FTREE_TUPLE_BUFF_LEN];
282219820Sjeff	static uint8_t ind = 0;
283219820Sjeff	char *ret_buffer;
284219820Sjeff	uint32_t i;
285219820Sjeff
286219820Sjeff	if (!__osm_ftree_tuple_assigned(tuple))
287219820Sjeff		return "INDEX.NOT.ASSIGNED";
288219820Sjeff
289219820Sjeff	buffer[ind][0] = '\0';
290219820Sjeff
291219820Sjeff	for (i = 0; (i < FTREE_TUPLE_LEN) && (tuple[i] != 0xFF); i++) {
292219820Sjeff		if ((strlen(buffer[ind]) + 10) > FTREE_TUPLE_BUFF_LEN)
293219820Sjeff			return "INDEX.TOO.LONG";
294219820Sjeff		if (i != 0)
295219820Sjeff			strcat(buffer[ind], ".");
296219820Sjeff		sprintf(&buffer[ind][strlen(buffer[ind])], "%u", tuple[i]);
297219820Sjeff	}
298219820Sjeff
299219820Sjeff	ret_buffer = buffer[ind];
300219820Sjeff	ind = (ind + 1) % FTREE_TUPLE_BUFFERS_NUM;
301219820Sjeff	return ret_buffer;
302219820Sjeff}				/* __osm_ftree_tuple_to_str() */
303219820Sjeff
304219820Sjeff/***************************************************/
305219820Sjeff
306219820Sjeffstatic inline ftree_tuple_key_t __osm_ftree_tuple_to_key(IN ftree_tuple_t tuple)
307219820Sjeff{
308219820Sjeff	ftree_tuple_key_t key;
309219820Sjeff	memcpy(&key, tuple, FTREE_TUPLE_LEN);
310219820Sjeff	return key;
311219820Sjeff}
312219820Sjeff
313219820Sjeff/***************************************************/
314219820Sjeff
315219820Sjeffstatic inline void __osm_ftree_tuple_from_key(IN ftree_tuple_t tuple,
316219820Sjeff					      IN ftree_tuple_key_t key)
317219820Sjeff{
318219820Sjeff	memcpy(tuple, &key, FTREE_TUPLE_LEN);
319219820Sjeff}
320219820Sjeff
321219820Sjeff/***************************************************
322219820Sjeff **
323219820Sjeff ** ftree_sw_tbl_element_t functions
324219820Sjeff **
325219820Sjeff ***************************************************/
326219820Sjeff
327219820Sjeffstatic ftree_sw_tbl_element_t *__osm_ftree_sw_tbl_element_create(IN ftree_sw_t *
328219820Sjeff								 p_sw)
329219820Sjeff{
330219820Sjeff	ftree_sw_tbl_element_t *p_element =
331219820Sjeff	    (ftree_sw_tbl_element_t *) malloc(sizeof(ftree_sw_tbl_element_t));
332219820Sjeff	if (!p_element)
333219820Sjeff		return NULL;
334219820Sjeff	memset(p_element, 0, sizeof(ftree_sw_tbl_element_t));
335219820Sjeff
336219820Sjeff	p_element->p_sw = p_sw;
337219820Sjeff	return p_element;
338219820Sjeff}
339219820Sjeff
340219820Sjeff/***************************************************/
341219820Sjeff
342219820Sjeffstatic void __osm_ftree_sw_tbl_element_destroy(IN ftree_sw_tbl_element_t *
343219820Sjeff					       p_element)
344219820Sjeff{
345219820Sjeff	if (!p_element)
346219820Sjeff		return;
347219820Sjeff	free(p_element);
348219820Sjeff}
349219820Sjeff
350219820Sjeff/***************************************************
351219820Sjeff **
352219820Sjeff ** ftree_port_t functions
353219820Sjeff **
354219820Sjeff ***************************************************/
355219820Sjeff
356219820Sjeffstatic ftree_port_t *__osm_ftree_port_create(IN uint8_t port_num,
357219820Sjeff					     IN uint8_t remote_port_num)
358219820Sjeff{
359219820Sjeff	ftree_port_t *p_port = (ftree_port_t *) malloc(sizeof(ftree_port_t));
360219820Sjeff	if (!p_port)
361219820Sjeff		return NULL;
362219820Sjeff	memset(p_port, 0, sizeof(ftree_port_t));
363219820Sjeff
364219820Sjeff	p_port->port_num = port_num;
365219820Sjeff	p_port->remote_port_num = remote_port_num;
366219820Sjeff
367219820Sjeff	return p_port;
368219820Sjeff}
369219820Sjeff
370219820Sjeff/***************************************************/
371219820Sjeff
372219820Sjeffstatic void __osm_ftree_port_destroy(IN ftree_port_t * p_port)
373219820Sjeff{
374219820Sjeff	if (p_port)
375219820Sjeff		free(p_port);
376219820Sjeff}
377219820Sjeff
378219820Sjeff/***************************************************
379219820Sjeff **
380219820Sjeff ** ftree_port_group_t functions
381219820Sjeff **
382219820Sjeff ***************************************************/
383219820Sjeff
384219820Sjeffstatic ftree_port_group_t *
385219820Sjeff__osm_ftree_port_group_create(IN ib_net16_t base_lid,
386219820Sjeff			      IN ib_net16_t remote_base_lid,
387219820Sjeff			      IN ib_net64_t port_guid,
388219820Sjeff			      IN ib_net64_t node_guid,
389219820Sjeff			      IN uint8_t node_type,
390219820Sjeff		              IN void *p_hca_or_sw,
391219820Sjeff			      IN ib_net64_t remote_port_guid,
392219820Sjeff			      IN ib_net64_t remote_node_guid,
393219820Sjeff			      IN uint8_t remote_node_type,
394219820Sjeff			      IN void *p_remote_hca_or_sw,
395219820Sjeff			      IN boolean_t is_cn)
396219820Sjeff{
397219820Sjeff	ftree_port_group_t *p_group =
398219820Sjeff	    (ftree_port_group_t *) malloc(sizeof(ftree_port_group_t));
399219820Sjeff	if (p_group == NULL)
400219820Sjeff		return NULL;
401219820Sjeff	memset(p_group, 0, sizeof(ftree_port_group_t));
402219820Sjeff
403219820Sjeff	p_group->base_lid = base_lid;
404219820Sjeff	p_group->remote_base_lid = remote_base_lid;
405219820Sjeff	memcpy(&p_group->port_guid, &port_guid, sizeof(ib_net64_t));
406219820Sjeff	memcpy(&p_group->node_guid, &node_guid, sizeof(ib_net64_t));
407219820Sjeff	memcpy(&p_group->remote_port_guid, &remote_port_guid,
408219820Sjeff	       sizeof(ib_net64_t));
409219820Sjeff	memcpy(&p_group->remote_node_guid, &remote_node_guid,
410219820Sjeff	       sizeof(ib_net64_t));
411219820Sjeff
412219820Sjeff	p_group->node_type = node_type;
413219820Sjeff	switch (node_type) {
414219820Sjeff	case IB_NODE_TYPE_CA:
415219820Sjeff		p_group->hca_or_sw.p_hca = (ftree_hca_t *) p_hca_or_sw;
416219820Sjeff		break;
417219820Sjeff	case IB_NODE_TYPE_SWITCH:
418219820Sjeff		p_group->hca_or_sw.p_sw = (ftree_sw_t *) p_hca_or_sw;
419219820Sjeff		break;
420219820Sjeff	default:
421219820Sjeff		/* we shouldn't get here - port is created only in hca or switch */
422219820Sjeff		CL_ASSERT(0);
423219820Sjeff	}
424219820Sjeff
425219820Sjeff	p_group->remote_node_type = remote_node_type;
426219820Sjeff	switch (remote_node_type) {
427219820Sjeff	case IB_NODE_TYPE_CA:
428219820Sjeff		p_group->remote_hca_or_sw.p_hca =
429219820Sjeff		    (ftree_hca_t *) p_remote_hca_or_sw;
430219820Sjeff		break;
431219820Sjeff	case IB_NODE_TYPE_SWITCH:
432219820Sjeff		p_group->remote_hca_or_sw.p_sw =
433219820Sjeff		    (ftree_sw_t *) p_remote_hca_or_sw;
434219820Sjeff		break;
435219820Sjeff	default:
436219820Sjeff		/* we shouldn't get here - port is created only in hca or switch */
437219820Sjeff		CL_ASSERT(0);
438219820Sjeff	}
439219820Sjeff
440219820Sjeff	cl_ptr_vector_init(&p_group->ports, 0,	/* min size */
441219820Sjeff			   8);	/* grow size */
442219820Sjeff	p_group->is_cn = is_cn;
443219820Sjeff	return p_group;
444219820Sjeff}				/* __osm_ftree_port_group_create() */
445219820Sjeff
446219820Sjeff/***************************************************/
447219820Sjeff
448219820Sjeffstatic void __osm_ftree_port_group_destroy(IN ftree_port_group_t * p_group)
449219820Sjeff{
450219820Sjeff	uint32_t i;
451219820Sjeff	uint32_t size;
452219820Sjeff	ftree_port_t *p_port;
453219820Sjeff
454219820Sjeff	if (!p_group)
455219820Sjeff		return;
456219820Sjeff
457219820Sjeff	/* remove all the elements of p_group->ports vector */
458219820Sjeff	size = cl_ptr_vector_get_size(&p_group->ports);
459219820Sjeff	for (i = 0; i < size; i++) {
460219820Sjeff		cl_ptr_vector_at(&p_group->ports, i, (void *)&p_port);
461219820Sjeff		__osm_ftree_port_destroy(p_port);
462219820Sjeff	}
463219820Sjeff	cl_ptr_vector_destroy(&p_group->ports);
464219820Sjeff	free(p_group);
465219820Sjeff}				/* __osm_ftree_port_group_destroy() */
466219820Sjeff
467219820Sjeff/***************************************************/
468219820Sjeff
469219820Sjeffstatic void
470219820Sjeff__osm_ftree_port_group_dump(IN ftree_fabric_t * p_ftree,
471219820Sjeff			    IN ftree_port_group_t * p_group,
472219820Sjeff			    IN ftree_direction_t direction)
473219820Sjeff{
474219820Sjeff	ftree_port_t *p_port;
475219820Sjeff	uint32_t size;
476219820Sjeff	uint32_t i;
477219820Sjeff	char buff[10 * 1024];
478219820Sjeff
479219820Sjeff	if (!p_group)
480219820Sjeff		return;
481219820Sjeff
482219820Sjeff	if (!osm_log_is_active(&p_ftree->p_osm->log, OSM_LOG_DEBUG))
483219820Sjeff		return;
484219820Sjeff
485219820Sjeff	size = cl_ptr_vector_get_size(&p_group->ports);
486219820Sjeff	buff[0] = '\0';
487219820Sjeff
488219820Sjeff	for (i = 0; i < size; i++) {
489219820Sjeff		cl_ptr_vector_at(&p_group->ports, i, (void *)&p_port);
490219820Sjeff		CL_ASSERT(p_port);
491219820Sjeff
492219820Sjeff		if (i != 0)
493219820Sjeff			strcat(buff, ", ");
494219820Sjeff		sprintf(buff + strlen(buff), "%u", p_port->port_num);
495219820Sjeff	}
496219820Sjeff
497219820Sjeff	OSM_LOG(&p_ftree->p_osm->log, OSM_LOG_DEBUG,
498219820Sjeff		"    Port Group of size %u, port(s): %s, direction: %s\n"
499219820Sjeff		"                  Local <--> Remote GUID (LID):"
500219820Sjeff		"0x%016" PRIx64 " (0x%04x) <--> 0x%016" PRIx64 " (0x%04x)\n",
501219820Sjeff		size,
502219820Sjeff		buff,
503219820Sjeff		(direction == FTREE_DIRECTION_DOWN) ? "DOWN" : "UP",
504219820Sjeff		cl_ntoh64(p_group->port_guid),
505219820Sjeff		cl_ntoh16(p_group->base_lid),
506219820Sjeff		cl_ntoh64(p_group->remote_port_guid),
507219820Sjeff		cl_ntoh16(p_group->remote_base_lid));
508219820Sjeff
509219820Sjeff}				/* __osm_ftree_port_group_dump() */
510219820Sjeff
511219820Sjeff/***************************************************/
512219820Sjeff
513219820Sjeffstatic void
514219820Sjeff__osm_ftree_port_group_add_port(IN ftree_port_group_t * p_group,
515219820Sjeff				IN uint8_t port_num, IN uint8_t remote_port_num)
516219820Sjeff{
517219820Sjeff	uint16_t i;
518219820Sjeff	ftree_port_t *p_port;
519219820Sjeff
520219820Sjeff	for (i = 0; i < cl_ptr_vector_get_size(&p_group->ports); i++) {
521219820Sjeff		cl_ptr_vector_at(&p_group->ports, i, (void *)&p_port);
522219820Sjeff		if (p_port->port_num == port_num)
523219820Sjeff			return;
524219820Sjeff	}
525219820Sjeff
526219820Sjeff	p_port = __osm_ftree_port_create(port_num, remote_port_num);
527219820Sjeff	cl_ptr_vector_insert(&p_group->ports, p_port, NULL);
528219820Sjeff}
529219820Sjeff
530219820Sjeff/***************************************************
531219820Sjeff **
532219820Sjeff ** ftree_sw_t functions
533219820Sjeff **
534219820Sjeff ***************************************************/
535219820Sjeff
536219820Sjeffstatic ftree_sw_t *__osm_ftree_sw_create(IN ftree_fabric_t * p_ftree,
537219820Sjeff					 IN osm_switch_t * p_osm_sw)
538219820Sjeff{
539219820Sjeff	ftree_sw_t *p_sw;
540219820Sjeff	uint8_t ports_num;
541219820Sjeff
542219820Sjeff	/* make sure that the switch has ports */
543219820Sjeff	if (p_osm_sw->num_ports == 1)
544219820Sjeff		return NULL;
545219820Sjeff
546219820Sjeff	p_sw = (ftree_sw_t *) malloc(sizeof(ftree_sw_t));
547219820Sjeff	if (p_sw == NULL)
548219820Sjeff		return NULL;
549219820Sjeff	memset(p_sw, 0, sizeof(ftree_sw_t));
550219820Sjeff
551219820Sjeff	p_sw->p_osm_sw = p_osm_sw;
552219820Sjeff	p_sw->rank = 0xFFFFFFFF;
553219820Sjeff	__osm_ftree_tuple_init(p_sw->tuple);
554219820Sjeff
555219820Sjeff	p_sw->base_lid = osm_node_get_base_lid(p_sw->p_osm_sw->p_node, 0);
556219820Sjeff
557219820Sjeff	ports_num = osm_node_get_num_physp(p_sw->p_osm_sw->p_node);
558219820Sjeff	p_sw->down_port_groups =
559219820Sjeff	    (ftree_port_group_t **) malloc(ports_num *
560219820Sjeff					   sizeof(ftree_port_group_t *));
561219820Sjeff	p_sw->up_port_groups =
562219820Sjeff	    (ftree_port_group_t **) malloc(ports_num *
563219820Sjeff					   sizeof(ftree_port_group_t *));
564219820Sjeff	if (!p_sw->down_port_groups || !p_sw->up_port_groups)
565219820Sjeff		return NULL;
566219820Sjeff	p_sw->down_port_groups_num = 0;
567219820Sjeff	p_sw->up_port_groups_num = 0;
568219820Sjeff
569219820Sjeff	/* initialize lft buffer */
570219820Sjeff	memset(p_osm_sw->new_lft, OSM_NO_PATH, IB_LID_UCAST_END_HO + 1);
571219820Sjeff
572219820Sjeff	p_sw->down_port_groups_idx = -1;
573219820Sjeff
574219820Sjeff	return p_sw;
575219820Sjeff}				/* __osm_ftree_sw_create() */
576219820Sjeff
577219820Sjeff/***************************************************/
578219820Sjeff
579219820Sjeffstatic void __osm_ftree_sw_destroy(IN ftree_fabric_t * p_ftree,
580219820Sjeff				   IN ftree_sw_t * p_sw)
581219820Sjeff{
582219820Sjeff	uint8_t i;
583219820Sjeff
584219820Sjeff	if (!p_sw)
585219820Sjeff		return;
586219820Sjeff
587219820Sjeff	for (i = 0; i < p_sw->down_port_groups_num; i++)
588219820Sjeff		__osm_ftree_port_group_destroy(p_sw->down_port_groups[i]);
589219820Sjeff	for (i = 0; i < p_sw->up_port_groups_num; i++)
590219820Sjeff		__osm_ftree_port_group_destroy(p_sw->up_port_groups[i]);
591219820Sjeff	if (p_sw->down_port_groups)
592219820Sjeff		free(p_sw->down_port_groups);
593219820Sjeff	if (p_sw->up_port_groups)
594219820Sjeff		free(p_sw->up_port_groups);
595219820Sjeff
596219820Sjeff	free(p_sw);
597219820Sjeff}				/* __osm_ftree_sw_destroy() */
598219820Sjeff
599219820Sjeff/***************************************************/
600219820Sjeff
601219820Sjeffstatic uint64_t __osm_ftree_sw_get_guid_no(IN ftree_sw_t * p_sw)
602219820Sjeff{
603219820Sjeff	if (!p_sw)
604219820Sjeff		return 0;
605219820Sjeff	return osm_node_get_node_guid(p_sw->p_osm_sw->p_node);
606219820Sjeff}
607219820Sjeff
608219820Sjeff/***************************************************/
609219820Sjeff
610219820Sjeffstatic uint64_t __osm_ftree_sw_get_guid_ho(IN ftree_sw_t * p_sw)
611219820Sjeff{
612219820Sjeff	return cl_ntoh64(__osm_ftree_sw_get_guid_no(p_sw));
613219820Sjeff}
614219820Sjeff
615219820Sjeff/***************************************************/
616219820Sjeff
617219820Sjeffstatic void __osm_ftree_sw_dump(IN ftree_fabric_t * p_ftree,
618219820Sjeff				IN ftree_sw_t * p_sw)
619219820Sjeff{
620219820Sjeff	uint32_t i;
621219820Sjeff
622219820Sjeff	if (!p_sw)
623219820Sjeff		return;
624219820Sjeff
625219820Sjeff	if (!osm_log_is_active(&p_ftree->p_osm->log, OSM_LOG_DEBUG))
626219820Sjeff		return;
627219820Sjeff
628219820Sjeff	OSM_LOG(&p_ftree->p_osm->log, OSM_LOG_DEBUG,
629219820Sjeff		"Switch index: %s, GUID: 0x%016" PRIx64
630219820Sjeff		", Ports: %u DOWN, %u UP\n",
631219820Sjeff		__osm_ftree_tuple_to_str(p_sw->tuple),
632219820Sjeff		__osm_ftree_sw_get_guid_ho(p_sw), p_sw->down_port_groups_num,
633219820Sjeff		p_sw->up_port_groups_num);
634219820Sjeff
635219820Sjeff	for (i = 0; i < p_sw->down_port_groups_num; i++)
636219820Sjeff		__osm_ftree_port_group_dump(p_ftree,
637219820Sjeff					    p_sw->down_port_groups[i],
638219820Sjeff					    FTREE_DIRECTION_DOWN);
639219820Sjeff	for (i = 0; i < p_sw->up_port_groups_num; i++)
640219820Sjeff		__osm_ftree_port_group_dump(p_ftree, p_sw->up_port_groups[i],
641219820Sjeff					    FTREE_DIRECTION_UP);
642219820Sjeff
643219820Sjeff}				/* __osm_ftree_sw_dump() */
644219820Sjeff
645219820Sjeff/***************************************************/
646219820Sjeff
647219820Sjeffstatic boolean_t __osm_ftree_sw_ranked(IN ftree_sw_t * p_sw)
648219820Sjeff{
649219820Sjeff	return (p_sw->rank != 0xFFFFFFFF);
650219820Sjeff}
651219820Sjeff
652219820Sjeff/***************************************************/
653219820Sjeff
654219820Sjeffstatic ftree_port_group_t *
655219820Sjeff__osm_ftree_sw_get_port_group_by_remote_lid(IN ftree_sw_t * p_sw,
656219820Sjeff					    IN ib_net16_t remote_base_lid,
657219820Sjeff					    IN ftree_direction_t direction)
658219820Sjeff{
659219820Sjeff	uint32_t i;
660219820Sjeff	uint32_t size;
661219820Sjeff	ftree_port_group_t **port_groups;
662219820Sjeff
663219820Sjeff	if (direction == FTREE_DIRECTION_UP) {
664219820Sjeff		port_groups = p_sw->up_port_groups;
665219820Sjeff		size = p_sw->up_port_groups_num;
666219820Sjeff	} else {
667219820Sjeff		port_groups = p_sw->down_port_groups;
668219820Sjeff		size = p_sw->down_port_groups_num;
669219820Sjeff	}
670219820Sjeff
671219820Sjeff	for (i = 0; i < size; i++)
672219820Sjeff		if (remote_base_lid == port_groups[i]->remote_base_lid)
673219820Sjeff			return port_groups[i];
674219820Sjeff
675219820Sjeff	return NULL;
676219820Sjeff}				/* __osm_ftree_sw_get_port_group_by_remote_lid() */
677219820Sjeff
678219820Sjeff/***************************************************/
679219820Sjeff
680219820Sjeffstatic void
681219820Sjeff__osm_ftree_sw_add_port(IN ftree_sw_t * p_sw,
682219820Sjeff			IN uint8_t port_num,
683219820Sjeff			IN uint8_t remote_port_num,
684219820Sjeff			IN ib_net16_t base_lid,
685219820Sjeff			IN ib_net16_t remote_base_lid,
686219820Sjeff			IN ib_net64_t port_guid,
687219820Sjeff			IN ib_net64_t remote_port_guid,
688219820Sjeff			IN ib_net64_t remote_node_guid,
689219820Sjeff			IN uint8_t remote_node_type,
690219820Sjeff			IN void *p_remote_hca_or_sw,
691219820Sjeff			IN ftree_direction_t direction)
692219820Sjeff{
693219820Sjeff	ftree_port_group_t *p_group =
694219820Sjeff	    __osm_ftree_sw_get_port_group_by_remote_lid(p_sw, remote_base_lid,
695219820Sjeff							direction);
696219820Sjeff
697219820Sjeff	if (!p_group) {
698219820Sjeff		p_group = __osm_ftree_port_group_create(base_lid,
699219820Sjeff							remote_base_lid,
700219820Sjeff							port_guid,
701219820Sjeff							__osm_ftree_sw_get_guid_no
702219820Sjeff							(p_sw),
703219820Sjeff							IB_NODE_TYPE_SWITCH,
704219820Sjeff							p_sw, remote_port_guid,
705219820Sjeff							remote_node_guid,
706219820Sjeff							remote_node_type,
707219820Sjeff							p_remote_hca_or_sw,
708219820Sjeff							FALSE);
709219820Sjeff		CL_ASSERT(p_group);
710219820Sjeff
711219820Sjeff		if (direction == FTREE_DIRECTION_UP)
712219820Sjeff			p_sw->up_port_groups[p_sw->up_port_groups_num++] =
713219820Sjeff			    p_group;
714219820Sjeff		else
715219820Sjeff			p_sw->down_port_groups[p_sw->down_port_groups_num++] =
716219820Sjeff			    p_group;
717219820Sjeff	}
718219820Sjeff	__osm_ftree_port_group_add_port(p_group, port_num, remote_port_num);
719219820Sjeff
720219820Sjeff}				/* __osm_ftree_sw_add_port() */
721219820Sjeff
722219820Sjeff/***************************************************/
723219820Sjeff
724219820Sjeffstatic inline cl_status_t
725219820Sjeff__osm_ftree_sw_set_hops(IN ftree_sw_t * p_sw,
726219820Sjeff			IN uint16_t lid_ho, IN uint8_t port_num,
727219820Sjeff			IN uint8_t hops)
728219820Sjeff{
729219820Sjeff	/* set local min hop table(LID) */
730219820Sjeff	return osm_switch_set_hops(p_sw->p_osm_sw, lid_ho, port_num, hops);
731219820Sjeff}
732219820Sjeff
733219820Sjeff/***************************************************
734219820Sjeff **
735219820Sjeff ** ftree_hca_t functions
736219820Sjeff **
737219820Sjeff ***************************************************/
738219820Sjeff
739219820Sjeffstatic ftree_hca_t *__osm_ftree_hca_create(IN osm_node_t * p_osm_node)
740219820Sjeff{
741219820Sjeff	ftree_hca_t *p_hca = (ftree_hca_t *) malloc(sizeof(ftree_hca_t));
742219820Sjeff	if (p_hca == NULL)
743219820Sjeff		return NULL;
744219820Sjeff	memset(p_hca, 0, sizeof(ftree_hca_t));
745219820Sjeff
746219820Sjeff	p_hca->p_osm_node = p_osm_node;
747219820Sjeff	p_hca->up_port_groups = (ftree_port_group_t **)
748219820Sjeff	    malloc(osm_node_get_num_physp(p_hca->p_osm_node) *
749219820Sjeff		   sizeof(ftree_port_group_t *));
750219820Sjeff	if (!p_hca->up_port_groups)
751219820Sjeff		return NULL;
752219820Sjeff	p_hca->up_port_groups_num = 0;
753219820Sjeff	return p_hca;
754219820Sjeff}
755219820Sjeff
756219820Sjeff/***************************************************/
757219820Sjeff
758219820Sjeffstatic void __osm_ftree_hca_destroy(IN ftree_hca_t * p_hca)
759219820Sjeff{
760219820Sjeff	uint32_t i;
761219820Sjeff
762219820Sjeff	if (!p_hca)
763219820Sjeff		return;
764219820Sjeff
765219820Sjeff	for (i = 0; i < p_hca->up_port_groups_num; i++)
766219820Sjeff		__osm_ftree_port_group_destroy(p_hca->up_port_groups[i]);
767219820Sjeff
768219820Sjeff	if (p_hca->up_port_groups)
769219820Sjeff		free(p_hca->up_port_groups);
770219820Sjeff
771219820Sjeff	free(p_hca);
772219820Sjeff}
773219820Sjeff
774219820Sjeff/***************************************************/
775219820Sjeff
776219820Sjeffstatic uint64_t __osm_ftree_hca_get_guid_no(IN ftree_hca_t * p_hca)
777219820Sjeff{
778219820Sjeff	if (!p_hca)
779219820Sjeff		return 0;
780219820Sjeff	return osm_node_get_node_guid(p_hca->p_osm_node);
781219820Sjeff}
782219820Sjeff
783219820Sjeff/***************************************************/
784219820Sjeff
785219820Sjeffstatic uint64_t __osm_ftree_hca_get_guid_ho(IN ftree_hca_t * p_hca)
786219820Sjeff{
787219820Sjeff	return cl_ntoh64(__osm_ftree_hca_get_guid_no(p_hca));
788219820Sjeff}
789219820Sjeff
790219820Sjeff/***************************************************/
791219820Sjeff
792219820Sjeffstatic void __osm_ftree_hca_dump(IN ftree_fabric_t * p_ftree,
793219820Sjeff				 IN ftree_hca_t * p_hca)
794219820Sjeff{
795219820Sjeff	uint32_t i;
796219820Sjeff
797219820Sjeff	if (!p_hca)
798219820Sjeff		return;
799219820Sjeff
800219820Sjeff	if (!osm_log_is_active(&p_ftree->p_osm->log, OSM_LOG_DEBUG))
801219820Sjeff		return;
802219820Sjeff
803219820Sjeff	OSM_LOG(&p_ftree->p_osm->log, OSM_LOG_DEBUG,
804219820Sjeff		"CA GUID: 0x%016" PRIx64 ", Ports: %u UP\n",
805219820Sjeff		__osm_ftree_hca_get_guid_ho(p_hca), p_hca->up_port_groups_num);
806219820Sjeff
807219820Sjeff	for (i = 0; i < p_hca->up_port_groups_num; i++)
808219820Sjeff		__osm_ftree_port_group_dump(p_ftree, p_hca->up_port_groups[i],
809219820Sjeff					    FTREE_DIRECTION_UP);
810219820Sjeff}
811219820Sjeff
812219820Sjeff/***************************************************/
813219820Sjeff
814219820Sjeffstatic ftree_port_group_t *
815219820Sjeff__osm_ftree_hca_get_port_group_by_remote_lid(IN ftree_hca_t * p_hca,
816219820Sjeff					     IN ib_net16_t remote_base_lid)
817219820Sjeff{
818219820Sjeff	uint32_t i;
819219820Sjeff	for (i = 0; i < p_hca->up_port_groups_num; i++)
820219820Sjeff		if (remote_base_lid ==
821219820Sjeff		    p_hca->up_port_groups[i]->remote_base_lid)
822219820Sjeff			return p_hca->up_port_groups[i];
823219820Sjeff
824219820Sjeff	return NULL;
825219820Sjeff}
826219820Sjeff
827219820Sjeff/***************************************************/
828219820Sjeff
829219820Sjeffstatic void
830219820Sjeff__osm_ftree_hca_add_port(IN ftree_hca_t * p_hca,
831219820Sjeff			 IN uint8_t port_num,
832219820Sjeff			 IN uint8_t remote_port_num,
833219820Sjeff			 IN ib_net16_t base_lid,
834219820Sjeff			 IN ib_net16_t remote_base_lid,
835219820Sjeff			 IN ib_net64_t port_guid,
836219820Sjeff			 IN ib_net64_t remote_port_guid,
837219820Sjeff			 IN ib_net64_t remote_node_guid,
838219820Sjeff			 IN uint8_t remote_node_type,
839219820Sjeff			 IN void *p_remote_hca_or_sw, IN boolean_t is_cn)
840219820Sjeff{
841219820Sjeff	ftree_port_group_t *p_group;
842219820Sjeff
843219820Sjeff	/* this function is supposed to be called only for adding ports
844219820Sjeff	   in hca's that lead to switches */
845219820Sjeff	CL_ASSERT(remote_node_type == IB_NODE_TYPE_SWITCH);
846219820Sjeff
847219820Sjeff	p_group =
848219820Sjeff	    __osm_ftree_hca_get_port_group_by_remote_lid(p_hca,
849219820Sjeff							 remote_base_lid);
850219820Sjeff
851219820Sjeff	if (!p_group) {
852219820Sjeff		p_group = __osm_ftree_port_group_create(base_lid,
853219820Sjeff							remote_base_lid,
854219820Sjeff							port_guid,
855219820Sjeff							__osm_ftree_hca_get_guid_no
856219820Sjeff							(p_hca),
857219820Sjeff							IB_NODE_TYPE_CA, p_hca,
858219820Sjeff							remote_port_guid,
859219820Sjeff							remote_node_guid,
860219820Sjeff							remote_node_type,
861219820Sjeff							p_remote_hca_or_sw,
862219820Sjeff							is_cn);
863219820Sjeff		p_hca->up_port_groups[p_hca->up_port_groups_num++] = p_group;
864219820Sjeff	}
865219820Sjeff	__osm_ftree_port_group_add_port(p_group, port_num, remote_port_num);
866219820Sjeff
867219820Sjeff}				/* __osm_ftree_hca_add_port() */
868219820Sjeff
869219820Sjeff/***************************************************
870219820Sjeff **
871219820Sjeff ** ftree_fabric_t functions
872219820Sjeff **
873219820Sjeff ***************************************************/
874219820Sjeff
875219820Sjeffstatic ftree_fabric_t *__osm_ftree_fabric_create()
876219820Sjeff{
877219820Sjeff	ftree_fabric_t *p_ftree =
878219820Sjeff	    (ftree_fabric_t *) malloc(sizeof(ftree_fabric_t));
879219820Sjeff	if (p_ftree == NULL)
880219820Sjeff		return NULL;
881219820Sjeff
882219820Sjeff	memset(p_ftree, 0, sizeof(ftree_fabric_t));
883219820Sjeff
884219820Sjeff	cl_qmap_init(&p_ftree->hca_tbl);
885219820Sjeff	cl_qmap_init(&p_ftree->sw_tbl);
886219820Sjeff	cl_qmap_init(&p_ftree->sw_by_tuple_tbl);
887219820Sjeff	cl_qmap_init(&p_ftree->cn_guid_tbl);
888219820Sjeff
889219820Sjeff	cl_qlist_init(&p_ftree->root_guid_list);
890219820Sjeff
891219820Sjeff	return p_ftree;
892219820Sjeff}
893219820Sjeff
894219820Sjeff/***************************************************/
895219820Sjeff
896219820Sjeffstatic void __osm_ftree_fabric_clear(ftree_fabric_t * p_ftree)
897219820Sjeff{
898219820Sjeff	ftree_hca_t *p_hca;
899219820Sjeff	ftree_hca_t *p_next_hca;
900219820Sjeff	ftree_sw_t *p_sw;
901219820Sjeff	ftree_sw_t *p_next_sw;
902219820Sjeff	ftree_sw_tbl_element_t *p_element;
903219820Sjeff	ftree_sw_tbl_element_t *p_next_element;
904219820Sjeff	name_map_item_t *p_guid_element, *p_next_guid_element;
905219820Sjeff
906219820Sjeff	if (!p_ftree)
907219820Sjeff		return;
908219820Sjeff
909219820Sjeff	/* remove all the elements of hca_tbl */
910219820Sjeff
911219820Sjeff	p_next_hca = (ftree_hca_t *) cl_qmap_head(&p_ftree->hca_tbl);
912219820Sjeff	while (p_next_hca != (ftree_hca_t *) cl_qmap_end(&p_ftree->hca_tbl)) {
913219820Sjeff		p_hca = p_next_hca;
914219820Sjeff		p_next_hca = (ftree_hca_t *) cl_qmap_next(&p_hca->map_item);
915219820Sjeff		__osm_ftree_hca_destroy(p_hca);
916219820Sjeff	}
917219820Sjeff	cl_qmap_remove_all(&p_ftree->hca_tbl);
918219820Sjeff
919219820Sjeff	/* remove all the elements of sw_tbl */
920219820Sjeff
921219820Sjeff	p_next_sw = (ftree_sw_t *) cl_qmap_head(&p_ftree->sw_tbl);
922219820Sjeff	while (p_next_sw != (ftree_sw_t *) cl_qmap_end(&p_ftree->sw_tbl)) {
923219820Sjeff		p_sw = p_next_sw;
924219820Sjeff		p_next_sw = (ftree_sw_t *) cl_qmap_next(&p_sw->map_item);
925219820Sjeff		__osm_ftree_sw_destroy(p_ftree, p_sw);
926219820Sjeff	}
927219820Sjeff	cl_qmap_remove_all(&p_ftree->sw_tbl);
928219820Sjeff
929219820Sjeff	/* remove all the elements of sw_by_tuple_tbl */
930219820Sjeff
931219820Sjeff	p_next_element =
932219820Sjeff	    (ftree_sw_tbl_element_t *) cl_qmap_head(&p_ftree->sw_by_tuple_tbl);
933219820Sjeff	while (p_next_element !=
934219820Sjeff	       (ftree_sw_tbl_element_t *) cl_qmap_end(&p_ftree->
935219820Sjeff						      sw_by_tuple_tbl)) {
936219820Sjeff		p_element = p_next_element;
937219820Sjeff		p_next_element =
938219820Sjeff		    (ftree_sw_tbl_element_t *) cl_qmap_next(&p_element->
939219820Sjeff							    map_item);
940219820Sjeff		__osm_ftree_sw_tbl_element_destroy(p_element);
941219820Sjeff	}
942219820Sjeff	cl_qmap_remove_all(&p_ftree->sw_by_tuple_tbl);
943219820Sjeff
944219820Sjeff	/* remove all the elements of cn_guid_tbl */
945219820Sjeff	p_next_guid_element =
946219820Sjeff	    (name_map_item_t *) cl_qmap_head(&p_ftree->cn_guid_tbl);
947219820Sjeff	while (p_next_guid_element !=
948219820Sjeff	       (name_map_item_t *) cl_qmap_end(&p_ftree->cn_guid_tbl)) {
949219820Sjeff		p_guid_element = p_next_guid_element;
950219820Sjeff		p_next_guid_element =
951219820Sjeff		    (name_map_item_t *) cl_qmap_next(&p_guid_element->item);
952219820Sjeff		free(p_guid_element);
953219820Sjeff	}
954219820Sjeff	cl_qmap_remove_all(&p_ftree->cn_guid_tbl);
955219820Sjeff
956219820Sjeff	/* remove all the elements of root_guid_list */
957219820Sjeff	while (!cl_is_qlist_empty(&p_ftree->root_guid_list))
958219820Sjeff		free(cl_qlist_remove_head(&p_ftree->root_guid_list));
959219820Sjeff
960219820Sjeff	/* free the leaf switches array */
961219820Sjeff	if ((p_ftree->leaf_switches_num > 0) && (p_ftree->leaf_switches))
962219820Sjeff		free(p_ftree->leaf_switches);
963219820Sjeff
964219820Sjeff	p_ftree->leaf_switches_num = 0;
965219820Sjeff	p_ftree->cn_num = 0;
966219820Sjeff	p_ftree->leaf_switch_rank = 0;
967219820Sjeff	p_ftree->max_switch_rank = 0;
968219820Sjeff	p_ftree->max_cn_per_leaf = 0;
969219820Sjeff	p_ftree->lft_max_lid_ho = 0;
970219820Sjeff	p_ftree->leaf_switches = NULL;
971219820Sjeff	p_ftree->fabric_built = FALSE;
972219820Sjeff
973219820Sjeff}				/* __osm_ftree_fabric_destroy() */
974219820Sjeff
975219820Sjeff/***************************************************/
976219820Sjeff
977219820Sjeffstatic void __osm_ftree_fabric_destroy(ftree_fabric_t * p_ftree)
978219820Sjeff{
979219820Sjeff	if (!p_ftree)
980219820Sjeff		return;
981219820Sjeff	__osm_ftree_fabric_clear(p_ftree);
982219820Sjeff	free(p_ftree);
983219820Sjeff}
984219820Sjeff
985219820Sjeff/***************************************************/
986219820Sjeff
987219820Sjeffstatic uint8_t __osm_ftree_fabric_get_rank(ftree_fabric_t * p_ftree)
988219820Sjeff{
989219820Sjeff	return p_ftree->leaf_switch_rank + 1;
990219820Sjeff}
991219820Sjeff
992219820Sjeff/***************************************************/
993219820Sjeff
994219820Sjeffstatic void __osm_ftree_fabric_add_hca(ftree_fabric_t * p_ftree,
995219820Sjeff				       osm_node_t * p_osm_node)
996219820Sjeff{
997219820Sjeff	ftree_hca_t *p_hca = __osm_ftree_hca_create(p_osm_node);
998219820Sjeff
999219820Sjeff	CL_ASSERT(osm_node_get_type(p_osm_node) == IB_NODE_TYPE_CA);
1000219820Sjeff
1001219820Sjeff	cl_qmap_insert(&p_ftree->hca_tbl, p_osm_node->node_info.node_guid,
1002219820Sjeff		       &p_hca->map_item);
1003219820Sjeff}
1004219820Sjeff
1005219820Sjeff/***************************************************/
1006219820Sjeff
1007219820Sjeffstatic void __osm_ftree_fabric_add_sw(ftree_fabric_t * p_ftree,
1008219820Sjeff				      osm_switch_t * p_osm_sw)
1009219820Sjeff{
1010219820Sjeff	ftree_sw_t *p_sw = __osm_ftree_sw_create(p_ftree, p_osm_sw);
1011219820Sjeff
1012219820Sjeff	CL_ASSERT(osm_node_get_type(p_osm_sw->p_node) == IB_NODE_TYPE_SWITCH);
1013219820Sjeff
1014219820Sjeff	cl_qmap_insert(&p_ftree->sw_tbl, p_osm_sw->p_node->node_info.node_guid,
1015219820Sjeff		       &p_sw->map_item);
1016219820Sjeff
1017219820Sjeff	/* track the max lid (in host order) that exists in the fabric */
1018219820Sjeff	if (cl_ntoh16(p_sw->base_lid) > p_ftree->lft_max_lid_ho)
1019219820Sjeff		p_ftree->lft_max_lid_ho = cl_ntoh16(p_sw->base_lid);
1020219820Sjeff}
1021219820Sjeff
1022219820Sjeff/***************************************************/
1023219820Sjeff
1024219820Sjeffstatic void __osm_ftree_fabric_add_sw_by_tuple(IN ftree_fabric_t * p_ftree,
1025219820Sjeff					       IN ftree_sw_t * p_sw)
1026219820Sjeff{
1027219820Sjeff	CL_ASSERT(__osm_ftree_tuple_assigned(p_sw->tuple));
1028219820Sjeff
1029219820Sjeff	cl_qmap_insert(&p_ftree->sw_by_tuple_tbl,
1030219820Sjeff		       __osm_ftree_tuple_to_key(p_sw->tuple),
1031219820Sjeff		       &__osm_ftree_sw_tbl_element_create(p_sw)->map_item);
1032219820Sjeff}
1033219820Sjeff
1034219820Sjeff/***************************************************/
1035219820Sjeff
1036219820Sjeffstatic ftree_sw_t *__osm_ftree_fabric_get_sw_by_tuple(IN ftree_fabric_t *
1037219820Sjeff						      p_ftree,
1038219820Sjeff						      IN ftree_tuple_t tuple)
1039219820Sjeff{
1040219820Sjeff	ftree_sw_tbl_element_t *p_element;
1041219820Sjeff
1042219820Sjeff	CL_ASSERT(__osm_ftree_tuple_assigned(tuple));
1043219820Sjeff
1044219820Sjeff	__osm_ftree_tuple_to_key(tuple);
1045219820Sjeff
1046219820Sjeff	p_element =
1047219820Sjeff	    (ftree_sw_tbl_element_t *) cl_qmap_get(&p_ftree->sw_by_tuple_tbl,
1048219820Sjeff						   __osm_ftree_tuple_to_key
1049219820Sjeff						   (tuple));
1050219820Sjeff	if (p_element ==
1051219820Sjeff	    (ftree_sw_tbl_element_t *) cl_qmap_end(&p_ftree->sw_by_tuple_tbl))
1052219820Sjeff		return NULL;
1053219820Sjeff
1054219820Sjeff	return p_element->p_sw;
1055219820Sjeff}
1056219820Sjeff
1057219820Sjeff/***************************************************/
1058219820Sjeff
1059219820Sjeffstatic ftree_sw_t *__osm_ftree_fabric_get_sw_by_guid(IN ftree_fabric_t *
1060219820Sjeff						     p_ftree, IN uint64_t guid)
1061219820Sjeff{
1062219820Sjeff	ftree_sw_t *p_sw;
1063219820Sjeff	p_sw = (ftree_sw_t *) cl_qmap_get(&p_ftree->sw_tbl, guid);
1064219820Sjeff	if (p_sw == (ftree_sw_t *) cl_qmap_end(&p_ftree->sw_tbl))
1065219820Sjeff		return NULL;
1066219820Sjeff	return p_sw;
1067219820Sjeff}
1068219820Sjeff
1069219820Sjeff/***************************************************/
1070219820Sjeff
1071219820Sjeffstatic ftree_hca_t *__osm_ftree_fabric_get_hca_by_guid(IN ftree_fabric_t *
1072219820Sjeff						       p_ftree,
1073219820Sjeff						       IN uint64_t guid)
1074219820Sjeff{
1075219820Sjeff	ftree_hca_t *p_hca;
1076219820Sjeff	p_hca = (ftree_hca_t *) cl_qmap_get(&p_ftree->hca_tbl, guid);
1077219820Sjeff	if (p_hca == (ftree_hca_t *) cl_qmap_end(&p_ftree->hca_tbl))
1078219820Sjeff		return NULL;
1079219820Sjeff	return p_hca;
1080219820Sjeff}
1081219820Sjeff
1082219820Sjeff/***************************************************/
1083219820Sjeff
1084219820Sjeffstatic void __osm_ftree_fabric_dump(ftree_fabric_t * p_ftree)
1085219820Sjeff{
1086219820Sjeff	uint32_t i;
1087219820Sjeff	ftree_hca_t *p_hca;
1088219820Sjeff	ftree_sw_t *p_sw;
1089219820Sjeff
1090219820Sjeff	if (!osm_log_is_active(&p_ftree->p_osm->log, OSM_LOG_DEBUG))
1091219820Sjeff		return;
1092219820Sjeff
1093219820Sjeff	OSM_LOG(&p_ftree->p_osm->log, OSM_LOG_DEBUG, "\n"
1094219820Sjeff		"                       |-------------------------------|\n"
1095219820Sjeff		"                       |-  Full fabric topology dump  -|\n"
1096219820Sjeff		"                       |-------------------------------|\n\n");
1097219820Sjeff
1098219820Sjeff	OSM_LOG(&p_ftree->p_osm->log, OSM_LOG_DEBUG, "-- CAs:\n");
1099219820Sjeff
1100219820Sjeff	for (p_hca = (ftree_hca_t *) cl_qmap_head(&p_ftree->hca_tbl);
1101219820Sjeff	     p_hca != (ftree_hca_t *) cl_qmap_end(&p_ftree->hca_tbl);
1102219820Sjeff	     p_hca = (ftree_hca_t *) cl_qmap_next(&p_hca->map_item)) {
1103219820Sjeff		__osm_ftree_hca_dump(p_ftree, p_hca);
1104219820Sjeff	}
1105219820Sjeff
1106219820Sjeff	for (i = 0; i < p_ftree->max_switch_rank; i++) {
1107219820Sjeff		OSM_LOG(&p_ftree->p_osm->log, OSM_LOG_DEBUG,
1108219820Sjeff			"-- Rank %u switches\n", i);
1109219820Sjeff		for (p_sw = (ftree_sw_t *) cl_qmap_head(&p_ftree->sw_tbl);
1110219820Sjeff		     p_sw != (ftree_sw_t *) cl_qmap_end(&p_ftree->sw_tbl);
1111219820Sjeff		     p_sw = (ftree_sw_t *) cl_qmap_next(&p_sw->map_item)) {
1112219820Sjeff			if (p_sw->rank == i)
1113219820Sjeff				__osm_ftree_sw_dump(p_ftree, p_sw);
1114219820Sjeff		}
1115219820Sjeff	}
1116219820Sjeff
1117219820Sjeff	OSM_LOG(&p_ftree->p_osm->log, OSM_LOG_DEBUG, "\n"
1118219820Sjeff		"                       |---------------------------------------|\n"
1119219820Sjeff		"                       |- Full fabric topology dump completed -|\n"
1120219820Sjeff		"                       |---------------------------------------|\n\n");
1121219820Sjeff}				/* __osm_ftree_fabric_dump() */
1122219820Sjeff
1123219820Sjeff/***************************************************/
1124219820Sjeff
1125219820Sjeffstatic void __osm_ftree_fabric_dump_general_info(IN ftree_fabric_t * p_ftree)
1126219820Sjeff{
1127219820Sjeff	uint32_t i, j;
1128219820Sjeff	ftree_sw_t *p_sw;
1129219820Sjeff
1130219820Sjeff	OSM_LOG(&p_ftree->p_osm->log, OSM_LOG_INFO,
1131219820Sjeff		"General fabric topology info\n");
1132219820Sjeff	OSM_LOG(&p_ftree->p_osm->log, OSM_LOG_INFO,
1133219820Sjeff		"============================\n");
1134219820Sjeff
1135219820Sjeff	OSM_LOG(&p_ftree->p_osm->log, OSM_LOG_INFO,
1136219820Sjeff		"  - FatTree rank (roots to leaf switches): %u\n",
1137219820Sjeff		p_ftree->leaf_switch_rank + 1);
1138219820Sjeff	OSM_LOG(&p_ftree->p_osm->log, OSM_LOG_INFO,
1139219820Sjeff		"  - FatTree max switch rank: %u\n", p_ftree->max_switch_rank);
1140219820Sjeff	OSM_LOG(&p_ftree->p_osm->log, OSM_LOG_INFO,
1141219820Sjeff		"  - Fabric has %u CAs (%u of them CNs), %u switches\n",
1142219820Sjeff		cl_qmap_count(&p_ftree->hca_tbl), p_ftree->cn_num,
1143219820Sjeff		cl_qmap_count(&p_ftree->sw_tbl));
1144219820Sjeff
1145219820Sjeff	CL_ASSERT(cl_qmap_count(&p_ftree->hca_tbl) >= p_ftree->cn_num);
1146219820Sjeff
1147219820Sjeff	for (i = 0; i <= p_ftree->max_switch_rank; i++) {
1148219820Sjeff		j = 0;
1149219820Sjeff		for (p_sw = (ftree_sw_t *) cl_qmap_head(&p_ftree->sw_tbl);
1150219820Sjeff		     p_sw != (ftree_sw_t *) cl_qmap_end(&p_ftree->sw_tbl);
1151219820Sjeff		     p_sw = (ftree_sw_t *) cl_qmap_next(&p_sw->map_item)) {
1152219820Sjeff			if (p_sw->rank == i)
1153219820Sjeff				j++;
1154219820Sjeff		}
1155219820Sjeff		if (i == 0)
1156219820Sjeff			OSM_LOG(&p_ftree->p_osm->log, OSM_LOG_INFO,
1157219820Sjeff				"  - Fabric has %u switches at rank %u (roots)\n",
1158219820Sjeff				j, i);
1159219820Sjeff		else if (i == p_ftree->leaf_switch_rank)
1160219820Sjeff			OSM_LOG(&p_ftree->p_osm->log, OSM_LOG_INFO,
1161219820Sjeff				"  - Fabric has %u switches at rank %u (%u of them leafs)\n",
1162219820Sjeff				j, i, p_ftree->leaf_switches_num);
1163219820Sjeff		else
1164219820Sjeff			OSM_LOG(&p_ftree->p_osm->log, OSM_LOG_INFO,
1165219820Sjeff				"  - Fabric has %u switches at rank %u\n", j,
1166219820Sjeff				i);
1167219820Sjeff	}
1168219820Sjeff
1169219820Sjeff	if (osm_log_is_active(&p_ftree->p_osm->log, OSM_LOG_VERBOSE)) {
1170219820Sjeff		OSM_LOG(&p_ftree->p_osm->log, OSM_LOG_VERBOSE,
1171219820Sjeff			"  - Root switches:\n");
1172219820Sjeff		for (p_sw = (ftree_sw_t *) cl_qmap_head(&p_ftree->sw_tbl);
1173219820Sjeff		     p_sw != (ftree_sw_t *) cl_qmap_end(&p_ftree->sw_tbl);
1174219820Sjeff		     p_sw = (ftree_sw_t *) cl_qmap_next(&p_sw->map_item)) {
1175219820Sjeff			if (p_sw->rank == 0)
1176219820Sjeff				OSM_LOG(&p_ftree->p_osm->log, OSM_LOG_VERBOSE,
1177219820Sjeff					"      GUID: 0x%016" PRIx64
1178219820Sjeff					", LID: %u, Index %s\n",
1179219820Sjeff					__osm_ftree_sw_get_guid_ho(p_sw),
1180219820Sjeff					cl_ntoh16(p_sw->base_lid),
1181219820Sjeff					__osm_ftree_tuple_to_str(p_sw->tuple));
1182219820Sjeff		}
1183219820Sjeff
1184219820Sjeff		OSM_LOG(&p_ftree->p_osm->log, OSM_LOG_VERBOSE,
1185219820Sjeff			"  - Leaf switches (sorted by index):\n");
1186219820Sjeff		for (i = 0; i < p_ftree->leaf_switches_num; i++) {
1187219820Sjeff			OSM_LOG(&p_ftree->p_osm->log, OSM_LOG_VERBOSE,
1188219820Sjeff				"      GUID: 0x%016" PRIx64
1189219820Sjeff				", LID: %u, Index %s\n",
1190219820Sjeff				__osm_ftree_sw_get_guid_ho(p_ftree->
1191219820Sjeff							   leaf_switches[i]),
1192219820Sjeff				cl_ntoh16(p_ftree->leaf_switches[i]->base_lid),
1193219820Sjeff				__osm_ftree_tuple_to_str(p_ftree->
1194219820Sjeff							 leaf_switches[i]->
1195219820Sjeff							 tuple));
1196219820Sjeff		}
1197219820Sjeff	}
1198219820Sjeff}				/* __osm_ftree_fabric_dump_general_info() */
1199219820Sjeff
1200219820Sjeff/***************************************************/
1201219820Sjeff
1202219820Sjeffstatic void __osm_ftree_fabric_dump_hca_ordering(IN ftree_fabric_t * p_ftree)
1203219820Sjeff{
1204219820Sjeff	ftree_hca_t *p_hca;
1205219820Sjeff	ftree_sw_t *p_sw;
1206219820Sjeff	ftree_port_group_t *p_group_on_sw;
1207219820Sjeff	ftree_port_group_t *p_group_on_hca;
1208219820Sjeff	uint32_t i;
1209219820Sjeff	uint32_t j;
1210219820Sjeff	unsigned printed_hcas_on_leaf;
1211219820Sjeff
1212219820Sjeff	char path[1024];
1213219820Sjeff	FILE *p_hca_ordering_file;
1214219820Sjeff	char *filename = "opensm-ftree-ca-order.dump";
1215219820Sjeff
1216219820Sjeff	snprintf(path, sizeof(path), "%s/%s",
1217219820Sjeff		 p_ftree->p_osm->subn.opt.dump_files_dir, filename);
1218219820Sjeff	p_hca_ordering_file = fopen(path, "w");
1219219820Sjeff	if (!p_hca_ordering_file) {
1220219820Sjeff		OSM_LOG(&p_ftree->p_osm->log, OSM_LOG_ERROR, "ERR AB01: "
1221219820Sjeff			"cannot open file \'%s\': %s\n", filename,
1222219820Sjeff			strerror(errno));
1223219820Sjeff		return;
1224219820Sjeff	}
1225219820Sjeff
1226219820Sjeff	/* for each leaf switch (in indexing order) */
1227219820Sjeff	for (i = 0; i < p_ftree->leaf_switches_num; i++) {
1228219820Sjeff		p_sw = p_ftree->leaf_switches[i];
1229219820Sjeff		printed_hcas_on_leaf = 0;
1230219820Sjeff
1231219820Sjeff		/* for each real CA (CNs and not) connected to this switch */
1232219820Sjeff		for (j = 0; j < p_sw->down_port_groups_num; j++) {
1233219820Sjeff			p_group_on_sw = p_sw->down_port_groups[j];
1234219820Sjeff
1235219820Sjeff			if (p_group_on_sw->remote_node_type != IB_NODE_TYPE_CA)
1236219820Sjeff				continue;
1237219820Sjeff
1238219820Sjeff			p_hca = p_group_on_sw->remote_hca_or_sw.p_hca;
1239219820Sjeff			p_group_on_hca =
1240219820Sjeff			    __osm_ftree_hca_get_port_group_by_remote_lid(p_hca,
1241219820Sjeff									 p_group_on_sw->
1242219820Sjeff									 base_lid);
1243219820Sjeff
1244219820Sjeff			/* treat non-compute nodes as dummies */
1245219820Sjeff			if (!p_group_on_hca->is_cn)
1246219820Sjeff				continue;
1247219820Sjeff
1248219820Sjeff			fprintf(p_hca_ordering_file, "0x%04x\t%s\n",
1249219820Sjeff				cl_ntoh16(p_group_on_hca->base_lid),
1250219820Sjeff				p_hca->p_osm_node->print_desc);
1251219820Sjeff
1252219820Sjeff			printed_hcas_on_leaf++;
1253219820Sjeff		}
1254219820Sjeff
1255219820Sjeff		/* now print missing HCAs */
1256219820Sjeff		for (j = 0;
1257219820Sjeff		     j < (p_ftree->max_cn_per_leaf - printed_hcas_on_leaf); j++)
1258219820Sjeff			fprintf(p_hca_ordering_file, "0xFFFF\tDUMMY\n");
1259219820Sjeff
1260219820Sjeff	}
1261219820Sjeff	/* done going through all the leaf switches */
1262219820Sjeff
1263219820Sjeff	fclose(p_hca_ordering_file);
1264219820Sjeff}				/* __osm_ftree_fabric_dump_hca_ordering() */
1265219820Sjeff
1266219820Sjeff/***************************************************/
1267219820Sjeff
1268219820Sjeffstatic void
1269219820Sjeff__osm_ftree_fabric_assign_tuple(IN ftree_fabric_t * p_ftree,
1270219820Sjeff				IN ftree_sw_t * p_sw,
1271219820Sjeff				IN ftree_tuple_t new_tuple)
1272219820Sjeff{
1273219820Sjeff	memcpy(p_sw->tuple, new_tuple, FTREE_TUPLE_LEN);
1274219820Sjeff	__osm_ftree_fabric_add_sw_by_tuple(p_ftree, p_sw);
1275219820Sjeff}
1276219820Sjeff
1277219820Sjeff/***************************************************/
1278219820Sjeff
1279219820Sjeffstatic void __osm_ftree_fabric_assign_first_tuple(IN ftree_fabric_t * p_ftree,
1280219820Sjeff						  IN ftree_sw_t * p_sw)
1281219820Sjeff{
1282219820Sjeff	uint8_t i;
1283219820Sjeff	ftree_tuple_t new_tuple;
1284219820Sjeff
1285219820Sjeff	__osm_ftree_tuple_init(new_tuple);
1286219820Sjeff	new_tuple[0] = (uint8_t) p_sw->rank;
1287219820Sjeff	for (i = 1; i <= p_sw->rank; i++)
1288219820Sjeff		new_tuple[i] = 0;
1289219820Sjeff
1290219820Sjeff	__osm_ftree_fabric_assign_tuple(p_ftree, p_sw, new_tuple);
1291219820Sjeff}
1292219820Sjeff
1293219820Sjeff/***************************************************/
1294219820Sjeff
1295219820Sjeffstatic void
1296219820Sjeff__osm_ftree_fabric_get_new_tuple(IN ftree_fabric_t * p_ftree,
1297219820Sjeff				 OUT ftree_tuple_t new_tuple,
1298219820Sjeff				 IN ftree_tuple_t from_tuple,
1299219820Sjeff				 IN ftree_direction_t direction)
1300219820Sjeff{
1301219820Sjeff	ftree_sw_t *p_sw;
1302219820Sjeff	ftree_tuple_t temp_tuple;
1303219820Sjeff	uint8_t var_index;
1304219820Sjeff	uint8_t i;
1305219820Sjeff
1306219820Sjeff	__osm_ftree_tuple_init(new_tuple);
1307219820Sjeff	memcpy(temp_tuple, from_tuple, FTREE_TUPLE_LEN);
1308219820Sjeff
1309219820Sjeff	if (direction == FTREE_DIRECTION_DOWN) {
1310219820Sjeff		temp_tuple[0]++;
1311219820Sjeff		var_index = from_tuple[0] + 1;
1312219820Sjeff	} else {
1313219820Sjeff		temp_tuple[0]--;
1314219820Sjeff		var_index = from_tuple[0];
1315219820Sjeff	}
1316219820Sjeff
1317219820Sjeff	for (i = 0; i < 0xFF; i++) {
1318219820Sjeff		temp_tuple[var_index] = i;
1319219820Sjeff		p_sw = __osm_ftree_fabric_get_sw_by_tuple(p_ftree, temp_tuple);
1320219820Sjeff		if (p_sw == NULL)	/* found free tuple */
1321219820Sjeff			break;
1322219820Sjeff	}
1323219820Sjeff
1324219820Sjeff	if (i == 0xFF) {
1325219820Sjeff		/* new tuple not found - there are more than 255 ports in one direction */
1326219820Sjeff		return;
1327219820Sjeff	}
1328219820Sjeff	memcpy(new_tuple, temp_tuple, FTREE_TUPLE_LEN);
1329219820Sjeff
1330219820Sjeff}				/* __osm_ftree_fabric_get_new_tuple() */
1331219820Sjeff
1332219820Sjeff/***************************************************/
1333219820Sjeff
1334219820Sjeffstatic inline boolean_t __osm_ftree_fabric_roots_provided(IN ftree_fabric_t *
1335219820Sjeff							  p_ftree)
1336219820Sjeff{
1337219820Sjeff	return (p_ftree->p_osm->subn.opt.root_guid_file != NULL);
1338219820Sjeff}
1339219820Sjeff
1340219820Sjeff/***************************************************/
1341219820Sjeff
1342219820Sjeffstatic inline boolean_t __osm_ftree_fabric_cns_provided(IN ftree_fabric_t *
1343219820Sjeff							p_ftree)
1344219820Sjeff{
1345219820Sjeff	return (p_ftree->p_osm->subn.opt.cn_guid_file != NULL);
1346219820Sjeff}
1347219820Sjeff
1348219820Sjeff/***************************************************/
1349219820Sjeff
1350219820Sjeffstatic int __osm_ftree_fabric_mark_leaf_switches(IN ftree_fabric_t * p_ftree)
1351219820Sjeff{
1352219820Sjeff	ftree_sw_t *p_sw;
1353219820Sjeff	ftree_hca_t *p_hca;
1354219820Sjeff	ftree_hca_t *p_next_hca;
1355219820Sjeff	unsigned i;
1356219820Sjeff	int res = 0;
1357219820Sjeff
1358219820Sjeff	OSM_LOG_ENTER(&p_ftree->p_osm->log);
1359219820Sjeff
1360219820Sjeff	OSM_LOG(&p_ftree->p_osm->log, OSM_LOG_VERBOSE,
1361219820Sjeff		"Marking leaf switches in fabric\n");
1362219820Sjeff
1363219820Sjeff	/* Scan all the CAs, if they have CNs - find CN port and mark switch
1364219820Sjeff	   that is connected to this port as leaf switch.
1365219820Sjeff	   Also, ensure that this marked leaf has rank of p_ftree->leaf_switch_rank. */
1366219820Sjeff	p_next_hca = (ftree_hca_t *) cl_qmap_head(&p_ftree->hca_tbl);
1367219820Sjeff	while (p_next_hca != (ftree_hca_t *) cl_qmap_end(&p_ftree->hca_tbl)) {
1368219820Sjeff		p_hca = p_next_hca;
1369219820Sjeff		p_next_hca = (ftree_hca_t *) cl_qmap_next(&p_hca->map_item);
1370219820Sjeff		if (!p_hca->cn_num)
1371219820Sjeff			continue;
1372219820Sjeff
1373219820Sjeff		for (i = 0; i < p_hca->up_port_groups_num; i++) {
1374219820Sjeff			if (!p_hca->up_port_groups[i]->is_cn)
1375219820Sjeff				continue;
1376219820Sjeff
1377219820Sjeff			/* In CAs, port group alway has one port, and since this
1378219820Sjeff			   port group is CN, we know that this port is compute node */
1379219820Sjeff			CL_ASSERT(p_hca->up_port_groups[i]->remote_node_type ==
1380219820Sjeff				  IB_NODE_TYPE_SWITCH);
1381219820Sjeff			p_sw = p_hca->up_port_groups[i]->remote_hca_or_sw.p_sw;
1382219820Sjeff
1383219820Sjeff			/* check if this switch was already processed */
1384219820Sjeff			if (p_sw->is_leaf)
1385219820Sjeff				continue;
1386219820Sjeff			p_sw->is_leaf = TRUE;
1387219820Sjeff
1388219820Sjeff			/* ensure that this leaf switch is at the correct tree level */
1389219820Sjeff			if (p_sw->rank != p_ftree->leaf_switch_rank) {
1390219820Sjeff				OSM_LOG(&p_ftree->p_osm->log, OSM_LOG_ERROR,
1391219820Sjeff					"ERR AB26: CN port 0x%" PRIx64
1392219820Sjeff					" is connected to switch 0x%" PRIx64
1393219820Sjeff					" with rank %u, "
1394219820Sjeff					"while FatTree leaf rank is %u\n",
1395219820Sjeff					cl_ntoh64(p_hca->up_port_groups[i]->
1396219820Sjeff						  port_guid),
1397219820Sjeff					__osm_ftree_sw_get_guid_ho(p_sw),
1398219820Sjeff					p_sw->rank, p_ftree->leaf_switch_rank);
1399219820Sjeff				res = -1;
1400219820Sjeff				goto Exit;
1401219820Sjeff
1402219820Sjeff			}
1403219820Sjeff		}
1404219820Sjeff	}
1405219820Sjeff
1406219820SjeffExit:
1407219820Sjeff	OSM_LOG_EXIT(&p_ftree->p_osm->log);
1408219820Sjeff	return res;
1409219820Sjeff}				/* __osm_ftree_fabric_mark_leaf_switches() */
1410219820Sjeff
1411219820Sjeff/***************************************************/
1412219820Sjeff
1413219820Sjeffstatic void __osm_ftree_fabric_make_indexing(IN ftree_fabric_t * p_ftree)
1414219820Sjeff{
1415219820Sjeff	ftree_sw_t *p_remote_sw;
1416219820Sjeff	ftree_sw_t *p_sw = NULL;
1417219820Sjeff	ftree_sw_t *p_next_sw;
1418219820Sjeff	ftree_tuple_t new_tuple;
1419219820Sjeff	uint32_t i;
1420219820Sjeff	cl_list_t bfs_list;
1421219820Sjeff	ftree_sw_tbl_element_t *p_sw_tbl_element;
1422219820Sjeff
1423219820Sjeff	OSM_LOG_ENTER(&p_ftree->p_osm->log);
1424219820Sjeff
1425219820Sjeff	OSM_LOG(&p_ftree->p_osm->log, OSM_LOG_VERBOSE,
1426219820Sjeff		"Starting FatTree indexing\n");
1427219820Sjeff
1428219820Sjeff	/* using the first leaf switch as a starting point for indexing algorithm. */
1429219820Sjeff	p_next_sw = (ftree_sw_t *) cl_qmap_head(&p_ftree->sw_tbl);
1430219820Sjeff	while (p_next_sw != (ftree_sw_t *) cl_qmap_end(&p_ftree->sw_tbl)) {
1431219820Sjeff		p_sw = p_next_sw;
1432219820Sjeff		if (p_sw->is_leaf)
1433219820Sjeff			break;
1434219820Sjeff		p_next_sw = (ftree_sw_t *) cl_qmap_next(&p_sw->map_item);
1435219820Sjeff	}
1436219820Sjeff
1437219820Sjeff	CL_ASSERT(p_next_sw != (ftree_sw_t *) cl_qmap_end(&p_ftree->sw_tbl));
1438219820Sjeff
1439219820Sjeff	/* Assign the first tuple to the switch that is used as BFS starting point.
1440219820Sjeff	   The tuple will be as follows: [rank].0.0.0...
1441219820Sjeff	   This fuction also adds the switch it into the switch_by_tuple table. */
1442219820Sjeff	__osm_ftree_fabric_assign_first_tuple(p_ftree, p_sw);
1443219820Sjeff
1444219820Sjeff	OSM_LOG(&p_ftree->p_osm->log, OSM_LOG_VERBOSE,
1445219820Sjeff		"Indexing starting point:\n"
1446219820Sjeff		"                                            - Switch rank  : %u\n"
1447219820Sjeff		"                                            - Switch index : %s\n"
1448219820Sjeff		"                                            - Node LID     : %u\n"
1449219820Sjeff		"                                            - Node GUID    : 0x%016"
1450219820Sjeff		PRIx64 "\n", p_sw->rank, __osm_ftree_tuple_to_str(p_sw->tuple),
1451219820Sjeff		cl_ntoh16(p_sw->base_lid), __osm_ftree_sw_get_guid_ho(p_sw));
1452219820Sjeff
1453219820Sjeff	/*
1454219820Sjeff	 * Now run BFS and assign indexes to all switches
1455219820Sjeff	 * Pseudo code of the algorithm is as follows:
1456219820Sjeff	 *
1457219820Sjeff	 *  * Add first switch to BFS queue
1458219820Sjeff	 *  * While (BFS queue not empty)
1459219820Sjeff	 *      - Pop the switch from the head of the queue
1460219820Sjeff	 *      - Scan all the downward and upward ports
1461219820Sjeff	 *      - For each port
1462219820Sjeff	 *          + Get the remote switch
1463219820Sjeff	 *          + Assign index to the remote switch
1464219820Sjeff	 *          + Add remote switch to the BFS queue
1465219820Sjeff	 */
1466219820Sjeff
1467219820Sjeff	cl_list_init(&bfs_list, cl_qmap_count(&p_ftree->sw_tbl));
1468219820Sjeff	cl_list_insert_tail(&bfs_list,
1469219820Sjeff			    &__osm_ftree_sw_tbl_element_create(p_sw)->map_item);
1470219820Sjeff
1471219820Sjeff	while (!cl_is_list_empty(&bfs_list)) {
1472219820Sjeff		p_sw_tbl_element =
1473219820Sjeff		    (ftree_sw_tbl_element_t *) cl_list_remove_head(&bfs_list);
1474219820Sjeff		p_sw = p_sw_tbl_element->p_sw;
1475219820Sjeff		__osm_ftree_sw_tbl_element_destroy(p_sw_tbl_element);
1476219820Sjeff
1477219820Sjeff		/* Discover all the nodes from ports that are pointing down */
1478219820Sjeff
1479219820Sjeff		if (p_sw->rank >= p_ftree->leaf_switch_rank) {
1480219820Sjeff			/* whether downward ports are pointing to CAs or switches,
1481219820Sjeff			   we don't assign indexes to switches that are located
1482219820Sjeff			   lower than leaf switches */
1483219820Sjeff		} else {
1484219820Sjeff			/* This is not the leaf switch */
1485219820Sjeff			for (i = 0; i < p_sw->down_port_groups_num; i++) {
1486219820Sjeff				/* Work with port groups that are pointing to switches only.
1487219820Sjeff				   No need to assign indexing to HCAs */
1488219820Sjeff				if (p_sw->down_port_groups[i]->
1489219820Sjeff				    remote_node_type != IB_NODE_TYPE_SWITCH)
1490219820Sjeff					continue;
1491219820Sjeff
1492219820Sjeff				p_remote_sw =
1493219820Sjeff				    p_sw->down_port_groups[i]->remote_hca_or_sw.
1494219820Sjeff				    p_sw;
1495219820Sjeff				if (__osm_ftree_tuple_assigned
1496219820Sjeff				    (p_remote_sw->tuple)) {
1497219820Sjeff					/* this switch has been already indexed */
1498219820Sjeff					continue;
1499219820Sjeff				}
1500219820Sjeff				/* allocate new tuple */
1501219820Sjeff				__osm_ftree_fabric_get_new_tuple(p_ftree,
1502219820Sjeff								 new_tuple,
1503219820Sjeff								 p_sw->tuple,
1504219820Sjeff								 FTREE_DIRECTION_DOWN);
1505219820Sjeff				/* Assign the new tuple to the remote switch.
1506219820Sjeff				   This fuction also adds the switch into the switch_by_tuple table. */
1507219820Sjeff				__osm_ftree_fabric_assign_tuple(p_ftree,
1508219820Sjeff								p_remote_sw,
1509219820Sjeff								new_tuple);
1510219820Sjeff
1511219820Sjeff				/* add the newly discovered switch to the BFS queue */
1512219820Sjeff				cl_list_insert_tail(&bfs_list,
1513219820Sjeff						    &__osm_ftree_sw_tbl_element_create
1514219820Sjeff						    (p_remote_sw)->map_item);
1515219820Sjeff			}
1516219820Sjeff			/* Done assigning indexes to all the remote switches
1517219820Sjeff			   that are pointed by the downgoing ports.
1518219820Sjeff			   Now sort port groups according to remote index. */
1519219820Sjeff			qsort(p_sw->down_port_groups,	/* array */
1520219820Sjeff			      p_sw->down_port_groups_num,	/* number of elements */
1521219820Sjeff			      sizeof(ftree_port_group_t *),	/* size of each element */
1522219820Sjeff			      __osm_ftree_compare_port_groups_by_remote_switch_index);	/* comparator */
1523219820Sjeff		}
1524219820Sjeff
1525219820Sjeff		/* Done indexing switches from ports that go down.
1526219820Sjeff		   Now do the same with ports that are pointing up. */
1527219820Sjeff
1528219820Sjeff		if (p_sw->rank != 0) {
1529219820Sjeff			/* This is not the root switch, which means that all the ports
1530219820Sjeff			   that are pointing up are taking us to another switches. */
1531219820Sjeff			for (i = 0; i < p_sw->up_port_groups_num; i++) {
1532219820Sjeff				p_remote_sw =
1533219820Sjeff				    p_sw->up_port_groups[i]->remote_hca_or_sw.
1534219820Sjeff				    p_sw;
1535219820Sjeff				if (__osm_ftree_tuple_assigned
1536219820Sjeff				    (p_remote_sw->tuple))
1537219820Sjeff					continue;
1538219820Sjeff				/* allocate new tuple */
1539219820Sjeff				__osm_ftree_fabric_get_new_tuple(p_ftree,
1540219820Sjeff								 new_tuple,
1541219820Sjeff								 p_sw->tuple,
1542219820Sjeff								 FTREE_DIRECTION_UP);
1543219820Sjeff				/* Assign the new tuple to the remote switch.
1544219820Sjeff				   This fuction also adds the switch to the
1545219820Sjeff				   switch_by_tuple table. */
1546219820Sjeff				__osm_ftree_fabric_assign_tuple(p_ftree,
1547219820Sjeff								p_remote_sw,
1548219820Sjeff								new_tuple);
1549219820Sjeff				/* add the newly discovered switch to the BFS queue */
1550219820Sjeff				cl_list_insert_tail(&bfs_list,
1551219820Sjeff						    &__osm_ftree_sw_tbl_element_create
1552219820Sjeff						    (p_remote_sw)->map_item);
1553219820Sjeff			}
1554219820Sjeff			/* Done assigning indexes to all the remote switches
1555219820Sjeff			   that are pointed by the upgoing ports.
1556219820Sjeff			   Now sort port groups according to remote index. */
1557219820Sjeff			qsort(p_sw->up_port_groups,	/* array */
1558219820Sjeff			      p_sw->up_port_groups_num,	/* number of elements */
1559219820Sjeff			      sizeof(ftree_port_group_t *),	/* size of each element */
1560219820Sjeff			      __osm_ftree_compare_port_groups_by_remote_switch_index);	/* comparator */
1561219820Sjeff		}
1562219820Sjeff		/* Done assigning indexes to all the switches that are directly connected
1563219820Sjeff		   to the current switch - go to the next switch in the BFS queue */
1564219820Sjeff	}
1565219820Sjeff	cl_list_destroy(&bfs_list);
1566219820Sjeff
1567219820Sjeff	OSM_LOG_EXIT(&p_ftree->p_osm->log);
1568219820Sjeff}				/* __osm_ftree_fabric_make_indexing() */
1569219820Sjeff
1570219820Sjeff/***************************************************/
1571219820Sjeff
1572219820Sjeffstatic int __osm_ftree_fabric_create_leaf_switch_array(IN ftree_fabric_t *
1573219820Sjeff						       p_ftree)
1574219820Sjeff{
1575219820Sjeff	ftree_sw_t *p_sw;
1576219820Sjeff	ftree_sw_t *p_next_sw;
1577219820Sjeff	ftree_sw_t **all_switches_at_leaf_level;
1578219820Sjeff	unsigned i;
1579219820Sjeff	unsigned all_leaf_idx = 0;
1580219820Sjeff	unsigned first_leaf_idx;
1581219820Sjeff	unsigned last_leaf_idx;
1582219820Sjeff	int res = 0;
1583219820Sjeff
1584219820Sjeff	OSM_LOG_ENTER(&p_ftree->p_osm->log);
1585219820Sjeff
1586219820Sjeff	/* create array of ALL the switches that have leaf rank */
1587219820Sjeff	all_switches_at_leaf_level = (ftree_sw_t **)
1588219820Sjeff	    malloc(cl_qmap_count(&p_ftree->sw_tbl) * sizeof(ftree_sw_t *));
1589219820Sjeff	if (!all_switches_at_leaf_level) {
1590219820Sjeff		osm_log(&p_ftree->p_osm->log, OSM_LOG_SYS,
1591219820Sjeff			"Fat-tree routing: Memory allocation failed\n");
1592219820Sjeff		res = -1;
1593219820Sjeff		goto Exit;
1594219820Sjeff	}
1595219820Sjeff	memset(all_switches_at_leaf_level, 0,
1596219820Sjeff	       cl_qmap_count(&p_ftree->sw_tbl) * sizeof(ftree_sw_t *));
1597219820Sjeff
1598219820Sjeff	p_next_sw = (ftree_sw_t *) cl_qmap_head(&p_ftree->sw_tbl);
1599219820Sjeff	while (p_next_sw != (ftree_sw_t *) cl_qmap_end(&p_ftree->sw_tbl)) {
1600219820Sjeff		p_sw = p_next_sw;
1601219820Sjeff		p_next_sw = (ftree_sw_t *) cl_qmap_next(&p_sw->map_item);
1602219820Sjeff		if (p_sw->rank == p_ftree->leaf_switch_rank) {
1603219820Sjeff			OSM_LOG(&p_ftree->p_osm->log, OSM_LOG_DEBUG,
1604219820Sjeff				"Adding switch 0x%" PRIx64
1605219820Sjeff				" to full leaf switch array\n",
1606219820Sjeff				__osm_ftree_sw_get_guid_ho(p_sw));
1607219820Sjeff			all_switches_at_leaf_level[all_leaf_idx++] = p_sw;
1608219820Sjeff
1609219820Sjeff		}
1610219820Sjeff	}
1611219820Sjeff
1612219820Sjeff	/* quick-sort array of leaf switches by index */
1613219820Sjeff	qsort(all_switches_at_leaf_level,	/* array */
1614219820Sjeff	      all_leaf_idx,	/* number of elements */
1615219820Sjeff	      sizeof(ftree_sw_t *),	/* size of each element */
1616219820Sjeff	      __osm_ftree_compare_switches_by_index);	/* comparator */
1617219820Sjeff
1618219820Sjeff	/* check the first and the last REAL leaf (the one
1619219820Sjeff	   that has CNs) in the array of all the leafs */
1620219820Sjeff
1621219820Sjeff	first_leaf_idx = all_leaf_idx;
1622219820Sjeff	last_leaf_idx = 0;
1623219820Sjeff	for (i = 0; i < all_leaf_idx; i++) {
1624219820Sjeff		if (all_switches_at_leaf_level[i]->is_leaf) {
1625219820Sjeff			if (i < first_leaf_idx)
1626219820Sjeff				first_leaf_idx = i;
1627219820Sjeff			last_leaf_idx = i;
1628219820Sjeff		}
1629219820Sjeff	}
1630219820Sjeff	CL_ASSERT(first_leaf_idx < last_leaf_idx);
1631219820Sjeff
1632219820Sjeff	OSM_LOG(&p_ftree->p_osm->log, OSM_LOG_DEBUG,
1633219820Sjeff		"Full leaf array info: first_leaf_idx = %u, last_leaf_idx = %u\n",
1634219820Sjeff		first_leaf_idx, last_leaf_idx);
1635219820Sjeff
1636219820Sjeff	/* Create array of REAL leaf switches, sorted by index.
1637219820Sjeff	   This array may contain switches at the same rank w/o CNs,
1638219820Sjeff	   in case this is the order of indexing. */
1639219820Sjeff	p_ftree->leaf_switches_num = last_leaf_idx - first_leaf_idx + 1;
1640219820Sjeff	p_ftree->leaf_switches = (ftree_sw_t **)
1641219820Sjeff	    malloc(p_ftree->leaf_switches_num * sizeof(ftree_sw_t *));
1642219820Sjeff	if (!p_ftree->leaf_switches) {
1643219820Sjeff		osm_log(&p_ftree->p_osm->log, OSM_LOG_SYS,
1644219820Sjeff			"Fat-tree routing: Memory allocation failed\n");
1645219820Sjeff		res = -1;
1646219820Sjeff		goto Exit;
1647219820Sjeff	}
1648219820Sjeff
1649219820Sjeff	memcpy(p_ftree->leaf_switches,
1650219820Sjeff	       &(all_switches_at_leaf_level[first_leaf_idx]),
1651219820Sjeff	       p_ftree->leaf_switches_num * sizeof(ftree_sw_t *));
1652219820Sjeff
1653219820Sjeff	free(all_switches_at_leaf_level);
1654219820Sjeff
1655219820Sjeff	OSM_LOG(&p_ftree->p_osm->log, OSM_LOG_DEBUG,
1656219820Sjeff		"Created array of %u leaf switches\n",
1657219820Sjeff		p_ftree->leaf_switches_num);
1658219820Sjeff
1659219820SjeffExit:
1660219820Sjeff	OSM_LOG_EXIT(&p_ftree->p_osm->log);
1661219820Sjeff	return res;
1662219820Sjeff}				/* __osm_ftree_fabric_create_leaf_switch_array() */
1663219820Sjeff
1664219820Sjeff/***************************************************/
1665219820Sjeff
1666219820Sjeffstatic void __osm_ftree_fabric_set_max_cn_per_leaf(IN ftree_fabric_t * p_ftree)
1667219820Sjeff{
1668219820Sjeff	unsigned i;
1669219820Sjeff	unsigned j;
1670219820Sjeff	unsigned cns_on_this_leaf;
1671219820Sjeff	ftree_sw_t *p_sw;
1672219820Sjeff	ftree_port_group_t *p_group;
1673219820Sjeff
1674219820Sjeff	for (i = 0; i < p_ftree->leaf_switches_num; i++) {
1675219820Sjeff		p_sw = p_ftree->leaf_switches[i];
1676219820Sjeff		cns_on_this_leaf = 0;
1677219820Sjeff		for (j = 0; j < p_sw->down_port_groups_num; j++) {
1678219820Sjeff			p_group = p_sw->down_port_groups[j];
1679219820Sjeff			if (p_group->remote_node_type != IB_NODE_TYPE_CA)
1680219820Sjeff				continue;
1681219820Sjeff			cns_on_this_leaf +=
1682219820Sjeff			    p_group->remote_hca_or_sw.p_hca->cn_num;
1683219820Sjeff		}
1684219820Sjeff		if (cns_on_this_leaf > p_ftree->max_cn_per_leaf)
1685219820Sjeff			p_ftree->max_cn_per_leaf = cns_on_this_leaf;
1686219820Sjeff	}
1687219820Sjeff}				/* __osm_ftree_fabric_set_max_cn_per_leaf() */
1688219820Sjeff
1689219820Sjeff/***************************************************/
1690219820Sjeff
1691219820Sjeffstatic boolean_t __osm_ftree_fabric_validate_topology(IN ftree_fabric_t *
1692219820Sjeff						      p_ftree)
1693219820Sjeff{
1694219820Sjeff	ftree_port_group_t *p_group;
1695219820Sjeff	ftree_port_group_t *p_ref_group;
1696219820Sjeff	ftree_sw_t *p_sw;
1697219820Sjeff	ftree_sw_t *p_next_sw;
1698219820Sjeff	ftree_sw_t **reference_sw_arr;
1699219820Sjeff	uint16_t tree_rank = __osm_ftree_fabric_get_rank(p_ftree);
1700219820Sjeff	boolean_t res = TRUE;
1701219820Sjeff	uint8_t i;
1702219820Sjeff
1703219820Sjeff	OSM_LOG_ENTER(&p_ftree->p_osm->log);
1704219820Sjeff
1705219820Sjeff	OSM_LOG(&p_ftree->p_osm->log, OSM_LOG_VERBOSE,
1706219820Sjeff		"Validating fabric topology\n");
1707219820Sjeff
1708219820Sjeff	reference_sw_arr =
1709219820Sjeff	    (ftree_sw_t **) malloc(tree_rank * sizeof(ftree_sw_t *));
1710219820Sjeff	if (reference_sw_arr == NULL) {
1711219820Sjeff		osm_log(&p_ftree->p_osm->log, OSM_LOG_SYS,
1712219820Sjeff			"Fat-tree routing: Memory allocation failed\n");
1713219820Sjeff		return FALSE;
1714219820Sjeff	}
1715219820Sjeff	memset(reference_sw_arr, 0, tree_rank * sizeof(ftree_sw_t *));
1716219820Sjeff
1717219820Sjeff	p_next_sw = (ftree_sw_t *) cl_qmap_head(&p_ftree->sw_tbl);
1718219820Sjeff	while (res && p_next_sw != (ftree_sw_t *) cl_qmap_end(&p_ftree->sw_tbl)) {
1719219820Sjeff		p_sw = p_next_sw;
1720219820Sjeff		p_next_sw = (ftree_sw_t *) cl_qmap_next(&p_sw->map_item);
1721219820Sjeff
1722219820Sjeff		if (!reference_sw_arr[p_sw->rank]) {
1723219820Sjeff			/* This is the first switch in the current level that
1724219820Sjeff			   we're checking - use it as a reference */
1725219820Sjeff			reference_sw_arr[p_sw->rank] = p_sw;
1726219820Sjeff		} else {
1727219820Sjeff			/* compare this switch properties to the reference switch */
1728219820Sjeff
1729219820Sjeff			if (reference_sw_arr[p_sw->rank]->up_port_groups_num !=
1730219820Sjeff			    p_sw->up_port_groups_num) {
1731219820Sjeff				OSM_LOG(&p_ftree->p_osm->log, OSM_LOG_ERROR,
1732219820Sjeff					"ERR AB09: Different number of upward port groups on switches:\n"
1733219820Sjeff					"       GUID 0x%016" PRIx64
1734219820Sjeff					", LID %u, Index %s - %u groups\n"
1735219820Sjeff					"       GUID 0x%016" PRIx64
1736219820Sjeff					", LID %u, Index %s - %u groups\n",
1737219820Sjeff					__osm_ftree_sw_get_guid_ho
1738219820Sjeff					(reference_sw_arr[p_sw->rank]),
1739219820Sjeff					cl_ntoh16(reference_sw_arr[p_sw->rank]->
1740219820Sjeff						  base_lid),
1741219820Sjeff					__osm_ftree_tuple_to_str
1742219820Sjeff					(reference_sw_arr[p_sw->rank]->tuple),
1743219820Sjeff					reference_sw_arr[p_sw->rank]->
1744219820Sjeff					up_port_groups_num,
1745219820Sjeff					__osm_ftree_sw_get_guid_ho(p_sw),
1746219820Sjeff					cl_ntoh16(p_sw->base_lid),
1747219820Sjeff					__osm_ftree_tuple_to_str(p_sw->tuple),
1748219820Sjeff					p_sw->up_port_groups_num);
1749219820Sjeff				res = FALSE;
1750219820Sjeff				break;
1751219820Sjeff			}
1752219820Sjeff
1753219820Sjeff			if (p_sw->rank != (tree_rank - 1) &&
1754219820Sjeff			    reference_sw_arr[p_sw->rank]->
1755219820Sjeff			    down_port_groups_num !=
1756219820Sjeff			    p_sw->down_port_groups_num) {
1757219820Sjeff				/* we're allowing some hca's to be missing */
1758219820Sjeff				OSM_LOG(&p_ftree->p_osm->log, OSM_LOG_ERROR,
1759219820Sjeff					"ERR AB0A: Different number of downward port groups on switches:\n"
1760219820Sjeff					"       GUID 0x%016" PRIx64
1761219820Sjeff					", LID %u, Index %s - %u port groups\n"
1762219820Sjeff					"       GUID 0x%016" PRIx64
1763219820Sjeff					", LID %u, Index %s - %u port groups\n",
1764219820Sjeff					__osm_ftree_sw_get_guid_ho
1765219820Sjeff					(reference_sw_arr[p_sw->rank]),
1766219820Sjeff					cl_ntoh16(reference_sw_arr[p_sw->rank]->
1767219820Sjeff						  base_lid),
1768219820Sjeff					__osm_ftree_tuple_to_str
1769219820Sjeff					(reference_sw_arr[p_sw->rank]->tuple),
1770219820Sjeff					reference_sw_arr[p_sw->rank]->
1771219820Sjeff					down_port_groups_num,
1772219820Sjeff					__osm_ftree_sw_get_guid_ho(p_sw),
1773219820Sjeff					cl_ntoh16(p_sw->base_lid),
1774219820Sjeff					__osm_ftree_tuple_to_str(p_sw->tuple),
1775219820Sjeff					p_sw->down_port_groups_num);
1776219820Sjeff				res = FALSE;
1777219820Sjeff				break;
1778219820Sjeff			}
1779219820Sjeff
1780219820Sjeff			if (reference_sw_arr[p_sw->rank]->up_port_groups_num !=
1781219820Sjeff			    0) {
1782219820Sjeff				p_ref_group =
1783219820Sjeff				    reference_sw_arr[p_sw->rank]->
1784219820Sjeff				    up_port_groups[0];
1785219820Sjeff				for (i = 0; i < p_sw->up_port_groups_num; i++) {
1786219820Sjeff					p_group = p_sw->up_port_groups[i];
1787219820Sjeff					if (cl_ptr_vector_get_size
1788219820Sjeff					    (&p_ref_group->ports) !=
1789219820Sjeff					    cl_ptr_vector_get_size(&p_group->
1790219820Sjeff								   ports)) {
1791219820Sjeff						OSM_LOG(&p_ftree->p_osm->log,
1792219820Sjeff							OSM_LOG_ERROR,
1793219820Sjeff							"ERR AB0B: Different number of ports in an upward port group on switches:\n"
1794219820Sjeff							"       GUID 0x%016"
1795219820Sjeff							PRIx64
1796219820Sjeff							", LID %u, Index %s - %u ports\n"
1797219820Sjeff							"       GUID 0x%016"
1798219820Sjeff							PRIx64
1799219820Sjeff							", LID %u, Index %s - %u ports\n",
1800219820Sjeff							__osm_ftree_sw_get_guid_ho
1801219820Sjeff							(reference_sw_arr
1802219820Sjeff							 [p_sw->rank]),
1803219820Sjeff							cl_ntoh16
1804219820Sjeff							(reference_sw_arr
1805219820Sjeff							 [p_sw->rank]->
1806219820Sjeff							 base_lid),
1807219820Sjeff							__osm_ftree_tuple_to_str
1808219820Sjeff							(reference_sw_arr
1809219820Sjeff							 [p_sw->rank]->tuple),
1810219820Sjeff							cl_ptr_vector_get_size
1811219820Sjeff							(&p_ref_group->ports),
1812219820Sjeff							__osm_ftree_sw_get_guid_ho
1813219820Sjeff							(p_sw),
1814219820Sjeff							cl_ntoh16(p_sw->
1815219820Sjeff								  base_lid),
1816219820Sjeff							__osm_ftree_tuple_to_str
1817219820Sjeff							(p_sw->tuple),
1818219820Sjeff							cl_ptr_vector_get_size
1819219820Sjeff							(&p_group->ports));
1820219820Sjeff						res = FALSE;
1821219820Sjeff						break;
1822219820Sjeff					}
1823219820Sjeff				}
1824219820Sjeff			}
1825219820Sjeff			if (reference_sw_arr[p_sw->rank]->
1826219820Sjeff			    down_port_groups_num != 0
1827219820Sjeff			    && p_sw->rank != (tree_rank - 1)) {
1828219820Sjeff				/* we're allowing some hca's to be missing */
1829219820Sjeff				p_ref_group =
1830219820Sjeff				    reference_sw_arr[p_sw->rank]->
1831219820Sjeff				    down_port_groups[0];
1832219820Sjeff				for (i = 0; i < p_sw->down_port_groups_num; i++) {
1833219820Sjeff					p_group = p_sw->down_port_groups[0];
1834219820Sjeff					if (cl_ptr_vector_get_size
1835219820Sjeff					    (&p_ref_group->ports) !=
1836219820Sjeff					    cl_ptr_vector_get_size(&p_group->
1837219820Sjeff								   ports)) {
1838219820Sjeff						OSM_LOG(&p_ftree->p_osm->log,
1839219820Sjeff							OSM_LOG_ERROR,
1840219820Sjeff							"ERR AB0C: Different number of ports in an downward port group on switches:\n"
1841219820Sjeff							"       GUID 0x%016"
1842219820Sjeff							PRIx64
1843219820Sjeff							", LID %u, Index %s - %u ports\n"
1844219820Sjeff							"       GUID 0x%016"
1845219820Sjeff							PRIx64
1846219820Sjeff							", LID %u, Index %s - %u ports\n",
1847219820Sjeff							__osm_ftree_sw_get_guid_ho
1848219820Sjeff							(reference_sw_arr
1849219820Sjeff							 [p_sw->rank]),
1850219820Sjeff							cl_ntoh16
1851219820Sjeff							(reference_sw_arr
1852219820Sjeff							 [p_sw->rank]->
1853219820Sjeff							 base_lid),
1854219820Sjeff							__osm_ftree_tuple_to_str
1855219820Sjeff							(reference_sw_arr
1856219820Sjeff							 [p_sw->rank]->tuple),
1857219820Sjeff							cl_ptr_vector_get_size
1858219820Sjeff							(&p_ref_group->ports),
1859219820Sjeff							__osm_ftree_sw_get_guid_ho
1860219820Sjeff							(p_sw),
1861219820Sjeff							cl_ntoh16(p_sw->
1862219820Sjeff								  base_lid),
1863219820Sjeff							__osm_ftree_tuple_to_str
1864219820Sjeff							(p_sw->tuple),
1865219820Sjeff							cl_ptr_vector_get_size
1866219820Sjeff							(&p_group->ports));
1867219820Sjeff						res = FALSE;
1868219820Sjeff						break;
1869219820Sjeff					}
1870219820Sjeff				}
1871219820Sjeff			}
1872219820Sjeff		}		/* end of else */
1873219820Sjeff	}			/* end of while */
1874219820Sjeff
1875219820Sjeff	if (res == TRUE)
1876219820Sjeff		OSM_LOG(&p_ftree->p_osm->log, OSM_LOG_VERBOSE,
1877219820Sjeff			"Fabric topology has been identified as FatTree\n");
1878219820Sjeff	else
1879219820Sjeff		OSM_LOG(&p_ftree->p_osm->log, OSM_LOG_ERROR,
1880219820Sjeff			"ERR AB0D: Fabric topology hasn't been identified as FatTree\n");
1881219820Sjeff
1882219820Sjeff	free(reference_sw_arr);
1883219820Sjeff	OSM_LOG_EXIT(&p_ftree->p_osm->log);
1884219820Sjeff	return res;
1885219820Sjeff}				/* __osm_ftree_fabric_validate_topology() */
1886219820Sjeff
1887219820Sjeff/***************************************************
1888219820Sjeff ***************************************************/
1889219820Sjeff
1890219820Sjeffstatic void __osm_ftree_set_sw_fwd_table(IN cl_map_item_t * const p_map_item,
1891219820Sjeff					 IN void *context)
1892219820Sjeff{
1893219820Sjeff	ftree_sw_t *p_sw = (ftree_sw_t * const)p_map_item;
1894219820Sjeff	ftree_fabric_t *p_ftree = (ftree_fabric_t *) context;
1895219820Sjeff
1896219820Sjeff	p_sw->p_osm_sw->max_lid_ho = p_ftree->lft_max_lid_ho;
1897219820Sjeff	osm_ucast_mgr_set_fwd_table(&p_ftree->p_osm->sm.ucast_mgr,
1898219820Sjeff				    p_sw->p_osm_sw);
1899219820Sjeff}
1900219820Sjeff
1901219820Sjeff/***************************************************
1902219820Sjeff ***************************************************/
1903219820Sjeff
1904219820Sjeff/*
1905219820Sjeff * Function: assign-up-going-port-by-descending-down
1906219820Sjeff * Given   : a switch and a LID
1907219820Sjeff * Pseudo code:
1908219820Sjeff *    foreach down-going-port-group (in indexing order)
1909219820Sjeff *        skip this group if the LFT(LID) port is part of this group
1910219820Sjeff *        find the least loaded port of the group (scan in indexing order)
1911219820Sjeff *        r-port is the remote port connected to it
1912219820Sjeff *        assign the remote switch node LFT(LID) to r-port
1913219820Sjeff *        increase r-port usage counter
1914219820Sjeff *        assign-up-going-port-by-descending-down to r-port node (recursion)
1915219820Sjeff */
1916219820Sjeff
1917219820Sjeffstatic void
1918219820Sjeff__osm_ftree_fabric_route_upgoing_by_going_down(IN ftree_fabric_t * p_ftree,
1919219820Sjeff					       IN ftree_sw_t * p_sw,
1920219820Sjeff					       IN ftree_sw_t * p_prev_sw,
1921219820Sjeff					       IN ib_net16_t target_lid,
1922219820Sjeff					       IN uint8_t target_rank,
1923219820Sjeff					       IN boolean_t is_real_lid,
1924219820Sjeff					       IN boolean_t is_main_path,
1925219820Sjeff					       IN uint8_t highest_rank_in_route)
1926219820Sjeff{
1927219820Sjeff	ftree_sw_t *p_remote_sw;
1928219820Sjeff	uint16_t ports_num;
1929219820Sjeff	ftree_port_group_t *p_group;
1930219820Sjeff	ftree_port_t *p_port;
1931219820Sjeff	ftree_port_t *p_min_port;
1932219820Sjeff	uint16_t i;
1933219820Sjeff	uint16_t j;
1934219820Sjeff	uint16_t k;
1935219820Sjeff
1936219820Sjeff	/* we shouldn't enter here if both real_lid and main_path are false */
1937219820Sjeff	CL_ASSERT(is_real_lid || is_main_path);
1938219820Sjeff
1939219820Sjeff	/* if there is no down-going ports */
1940219820Sjeff	if (p_sw->down_port_groups_num == 0)
1941219820Sjeff		return;
1942219820Sjeff
1943219820Sjeff	/* promote the index that indicates which group should we
1944219820Sjeff	   start with when going through all the downgoing groups */
1945219820Sjeff	p_sw->down_port_groups_idx =
1946219820Sjeff		(p_sw->down_port_groups_idx + 1) % p_sw->down_port_groups_num;
1947219820Sjeff
1948219820Sjeff	/* foreach down-going port group (in indexing order) */
1949219820Sjeff	i = p_sw->down_port_groups_idx;
1950219820Sjeff	for (k = 0; k < p_sw->down_port_groups_num; k++) {
1951219820Sjeff
1952219820Sjeff		p_group = p_sw->down_port_groups[i];
1953219820Sjeff		i = (i + 1) % p_sw->down_port_groups_num;
1954219820Sjeff
1955219820Sjeff		/* Skip this port group unless it points to a switch */
1956219820Sjeff		if (p_group->remote_node_type != IB_NODE_TYPE_SWITCH)
1957219820Sjeff			continue;
1958219820Sjeff
1959219820Sjeff		if (p_prev_sw
1960219820Sjeff		    && (p_group->remote_base_lid == p_prev_sw->base_lid)) {
1961219820Sjeff			/* This port group has a port that was used when we entered this switch,
1962219820Sjeff			   which means that the current group points to the switch where we were
1963219820Sjeff			   at the previous step of the algorithm (before going up).
1964219820Sjeff			   Skipping this group. */
1965219820Sjeff			continue;
1966219820Sjeff		}
1967219820Sjeff
1968219820Sjeff		/* find the least loaded port of the group (in indexing order) */
1969219820Sjeff		p_min_port = NULL;
1970219820Sjeff		ports_num = (uint16_t) cl_ptr_vector_get_size(&p_group->ports);
1971219820Sjeff		/* ToDo: no need to select a least loaded port for non-main path.
1972219820Sjeff		   Think about optimization. */
1973219820Sjeff		for (j = 0; j < ports_num; j++) {
1974219820Sjeff			cl_ptr_vector_at(&p_group->ports, j, (void *)&p_port);
1975219820Sjeff			if (!p_min_port) {
1976219820Sjeff				/* first port that we're checking - set as port with the lowest load */
1977219820Sjeff				p_min_port = p_port;
1978219820Sjeff			} else if (p_port->counter_up < p_min_port->counter_up) {
1979219820Sjeff				/* this port is less loaded - use it as min */
1980219820Sjeff				p_min_port = p_port;
1981219820Sjeff			}
1982219820Sjeff		}
1983219820Sjeff		/* At this point we have selected a port in this group with the
1984219820Sjeff		   lowest load of upgoing routes.
1985219820Sjeff		   Set on the remote switch how to get to the target_lid -
1986219820Sjeff		   set LFT(target_lid) on the remote switch to the remote port */
1987219820Sjeff		p_remote_sw = p_group->remote_hca_or_sw.p_sw;
1988219820Sjeff
1989219820Sjeff		if (osm_switch_get_least_hops(p_remote_sw->p_osm_sw,
1990219820Sjeff					      cl_ntoh16(target_lid)) !=
1991219820Sjeff		    OSM_NO_PATH) {
1992219820Sjeff			/* Loop in the fabric - we already routed the remote switch
1993219820Sjeff			   on our way UP, and now we see it again on our way DOWN */
1994219820Sjeff			OSM_LOG(&p_ftree->p_osm->log, OSM_LOG_DEBUG,
1995219820Sjeff				"Loop of lenght %d in the fabric:\n                             "
1996219820Sjeff				"Switch %s (LID %u) closes loop through switch %s (LID %u)\n",
1997219820Sjeff				(p_remote_sw->rank - highest_rank_in_route) * 2,
1998219820Sjeff				__osm_ftree_tuple_to_str(p_remote_sw->tuple),
1999219820Sjeff				cl_ntoh16(p_group->base_lid),
2000219820Sjeff				__osm_ftree_tuple_to_str(p_sw->tuple),
2001219820Sjeff				cl_ntoh16(p_group->remote_base_lid));
2002219820Sjeff			continue;
2003219820Sjeff		}
2004219820Sjeff
2005219820Sjeff		/* Four possible cases:
2006219820Sjeff		 *
2007219820Sjeff		 *  1. is_real_lid == TRUE && is_main_path == TRUE:
2008219820Sjeff		 *      - going DOWN(TRUE,TRUE) through ALL the groups
2009219820Sjeff		 *         + promoting port counter
2010219820Sjeff		 *         + setting path in remote switch fwd tbl
2011219820Sjeff		 *         + setting hops in remote switch on all the ports of each group
2012219820Sjeff		 *
2013219820Sjeff		 *  2. is_real_lid == TRUE && is_main_path == FALSE:
2014219820Sjeff		 *      - going DOWN(TRUE,FALSE) through ALL the groups but only if
2015219820Sjeff		 *        the remote (lower) switch hasn't been already configured
2016219820Sjeff		 *        for this target LID
2017219820Sjeff		 *         + NOT promoting port counter
2018219820Sjeff		 *         + setting path in remote switch fwd tbl if it hasn't been set yet
2019219820Sjeff		 *         + setting hops in remote switch on all the ports of each group
2020219820Sjeff		 *           if it hasn't been set yet
2021219820Sjeff		 *
2022219820Sjeff		 *  3. is_real_lid == FALSE && is_main_path == TRUE:
2023219820Sjeff		 *      - going DOWN(FALSE,TRUE) through ALL the groups
2024219820Sjeff		 *         + promoting port counter
2025219820Sjeff		 *         + NOT setting path in remote switch fwd tbl
2026219820Sjeff		 *         + NOT setting hops in remote switch
2027219820Sjeff		 *
2028219820Sjeff		 *  4. is_real_lid == FALSE && is_main_path == FALSE:
2029219820Sjeff		 *      - illegal state - we shouldn't get here
2030219820Sjeff		 */
2031219820Sjeff
2032219820Sjeff		/* second case: skip the port group if the remote (lower)
2033219820Sjeff		   switch has been already configured for this target LID */
2034219820Sjeff		if (is_real_lid && !is_main_path &&
2035219820Sjeff		    p_remote_sw->p_osm_sw->new_lft[cl_ntoh16(target_lid)] != OSM_NO_PATH)
2036219820Sjeff			continue;
2037219820Sjeff
2038219820Sjeff		/* setting fwd tbl port only if this is real LID */
2039219820Sjeff		if (is_real_lid) {
2040219820Sjeff			p_remote_sw->p_osm_sw->new_lft[cl_ntoh16(target_lid)] =
2041219820Sjeff				p_min_port->remote_port_num;
2042219820Sjeff			OSM_LOG(&p_ftree->p_osm->log, OSM_LOG_DEBUG,
2043219820Sjeff				"Switch %s: set path to CA LID %u through port %u\n",
2044219820Sjeff				__osm_ftree_tuple_to_str(p_remote_sw->tuple),
2045219820Sjeff				cl_ntoh16(target_lid),
2046219820Sjeff				p_min_port->remote_port_num);
2047219820Sjeff
2048219820Sjeff			/* On the remote switch that is pointed by the p_group,
2049219820Sjeff			   set hops for ALL the ports in the remote group. */
2050219820Sjeff
2051219820Sjeff			for (j = 0; j < ports_num; j++) {
2052219820Sjeff				cl_ptr_vector_at(&p_group->ports, j,
2053219820Sjeff						 (void *)&p_port);
2054219820Sjeff
2055219820Sjeff				__osm_ftree_sw_set_hops(p_remote_sw,
2056219820Sjeff							cl_ntoh16(target_lid),
2057219820Sjeff							p_port->remote_port_num,
2058219820Sjeff							((target_rank -
2059219820Sjeff							  highest_rank_in_route)
2060219820Sjeff							 + (p_remote_sw->rank -
2061219820Sjeff							    highest_rank_in_route)));
2062219820Sjeff			}
2063219820Sjeff
2064219820Sjeff		}
2065219820Sjeff
2066219820Sjeff		/* The number of upgoing routes is tracked in the
2067219820Sjeff		   p_port->counter_up counter of the port that belongs to
2068219820Sjeff		   the upper side of the link (on switch with lower rank).
2069219820Sjeff		   Counter is promoted only if we're routing LID on the main
2070219820Sjeff		   path (whether it's a real LID or a dummy one). */
2071219820Sjeff		if (is_main_path)
2072219820Sjeff			p_min_port->counter_up++;
2073219820Sjeff
2074219820Sjeff		/* Recursion step:
2075219820Sjeff		   Assign upgoing ports by stepping down, starting on REMOTE switch */
2076219820Sjeff		__osm_ftree_fabric_route_upgoing_by_going_down(p_ftree, p_remote_sw,	/* remote switch - used as a route-upgoing alg. start point */
2077219820Sjeff							       NULL,	/* prev. position - NULL to mark that we went down and not up */
2078219820Sjeff							       target_lid,	/* LID that we're routing to */
2079219820Sjeff							       target_rank,	/* rank of the LID that we're routing to */
2080219820Sjeff							       is_real_lid,	/* whether the target LID is real or dummy */
2081219820Sjeff							       is_main_path,	/* whether this is path to HCA that should by tracked by counters */
2082219820Sjeff							       highest_rank_in_route);	/* highest visited point in the tree before going down */
2083219820Sjeff	}
2084219820Sjeff	/* done scanning all the down-going port groups */
2085219820Sjeff
2086219820Sjeff}				/* __osm_ftree_fabric_route_upgoing_by_going_down() */
2087219820Sjeff
2088219820Sjeff/***************************************************/
2089219820Sjeff
2090219820Sjeff/*
2091219820Sjeff * Function: assign-down-going-port-by-ascending-up
2092219820Sjeff * Given   : a switch and a LID
2093219820Sjeff * Pseudo code:
2094219820Sjeff *    find the least loaded port of all the upgoing groups (scan in indexing order)
2095219820Sjeff *    assign the LFT(LID) of remote switch to that port
2096219820Sjeff *    track that port usage
2097219820Sjeff *    assign-up-going-port-by-descending-down on CURRENT switch
2098219820Sjeff *    assign-down-going-port-by-ascending-up on REMOTE switch (recursion)
2099219820Sjeff */
2100219820Sjeff
2101219820Sjeffstatic void
2102219820Sjeff__osm_ftree_fabric_route_downgoing_by_going_up(IN ftree_fabric_t * p_ftree,
2103219820Sjeff					       IN ftree_sw_t * p_sw,
2104219820Sjeff					       IN ftree_sw_t * p_prev_sw,
2105219820Sjeff					       IN ib_net16_t target_lid,
2106219820Sjeff					       IN uint8_t target_rank,
2107219820Sjeff					       IN boolean_t is_real_lid,
2108219820Sjeff					       IN boolean_t is_main_path)
2109219820Sjeff{
2110219820Sjeff	ftree_sw_t *p_remote_sw;
2111219820Sjeff	uint16_t ports_num;
2112219820Sjeff	ftree_port_group_t *p_group;
2113219820Sjeff	ftree_port_t *p_port;
2114219820Sjeff	ftree_port_group_t *p_min_group;
2115219820Sjeff	ftree_port_t *p_min_port;
2116219820Sjeff	uint16_t i;
2117219820Sjeff	uint16_t j;
2118219820Sjeff
2119219820Sjeff	/* we shouldn't enter here if both real_lid and main_path are false */
2120219820Sjeff	CL_ASSERT(is_real_lid || is_main_path);
2121219820Sjeff
2122219820Sjeff	/* Assign upgoing ports by stepping down, starting on THIS switch */
2123219820Sjeff	__osm_ftree_fabric_route_upgoing_by_going_down(p_ftree, p_sw,	/* local switch - used as a route-upgoing alg. start point */
2124219820Sjeff						       p_prev_sw,	/* switch that we went up from (NULL means that we went down) */
2125219820Sjeff						       target_lid,	/* LID that we're routing to */
2126219820Sjeff						       target_rank,	/* rank of the LID that we're routing to */
2127219820Sjeff						       is_real_lid,	/* whether this target LID is real or dummy */
2128219820Sjeff						       is_main_path,	/* whether this path to HCA should by tracked by counters */
2129219820Sjeff						       p_sw->rank);	/* the highest visited point in the tree before going down */
2130219820Sjeff
2131219820Sjeff	/* recursion stop condition - if it's a root switch, */
2132219820Sjeff	if (p_sw->rank == 0)
2133219820Sjeff		return;
2134219820Sjeff
2135219820Sjeff	/* Find the least loaded upgoing port group */
2136219820Sjeff	p_min_group = NULL;
2137219820Sjeff	for (i = 0; i < p_sw->up_port_groups_num; i++) {
2138219820Sjeff		p_group = p_sw->up_port_groups[i];
2139219820Sjeff		if (!p_min_group) {
2140219820Sjeff			/* first group that we're checking - use
2141219820Sjeff			   it as a group with the lowest load */
2142219820Sjeff			p_min_group = p_group;
2143219820Sjeff		} else if (p_group->counter_down < p_min_group->counter_down) {
2144219820Sjeff			/* this group is less loaded - use it as min */
2145219820Sjeff			p_min_group = p_group;
2146219820Sjeff		}
2147219820Sjeff	}
2148219820Sjeff
2149219820Sjeff	/* Find the least loaded upgoing port in the selected group */
2150219820Sjeff	p_min_port = NULL;
2151219820Sjeff	ports_num = (uint16_t) cl_ptr_vector_get_size(&p_min_group->ports);
2152219820Sjeff	for (j = 0; j < ports_num; j++) {
2153219820Sjeff		cl_ptr_vector_at(&p_min_group->ports, j, (void *)&p_port);
2154219820Sjeff		if (!p_min_port) {
2155219820Sjeff			/* first port that we're checking - use
2156219820Sjeff			   it as a port with the lowest load */
2157219820Sjeff			p_min_port = p_port;
2158219820Sjeff		} else if (p_port->counter_down < p_min_port->counter_down) {
2159219820Sjeff			/* this port is less loaded - use it as min */
2160219820Sjeff			p_min_port = p_port;
2161219820Sjeff		}
2162219820Sjeff	}
2163219820Sjeff
2164219820Sjeff	/* At this point we have selected a group and port with the
2165219820Sjeff	   lowest load of downgoing routes.
2166219820Sjeff	   Set on the remote switch how to get to the target_lid -
2167219820Sjeff	   set LFT(target_lid) on the remote switch to the remote port */
2168219820Sjeff	p_remote_sw = p_min_group->remote_hca_or_sw.p_sw;
2169219820Sjeff
2170219820Sjeff	/* Four possible cases:
2171219820Sjeff	 *
2172219820Sjeff	 *  1. is_real_lid == TRUE && is_main_path == TRUE:
2173219820Sjeff	 *      - going UP(TRUE,TRUE) on selected min_group and min_port
2174219820Sjeff	 *         + promoting port counter
2175219820Sjeff	 *         + setting path in remote switch fwd tbl
2176219820Sjeff	 *         + setting hops in remote switch on all the ports of selected group
2177219820Sjeff	 *      - going UP(TRUE,FALSE) on rest of the groups, each time on port 0
2178219820Sjeff	 *         + NOT promoting port counter
2179219820Sjeff	 *         + setting path in remote switch fwd tbl if it hasn't been set yet
2180219820Sjeff	 *         + setting hops in remote switch on all the ports of each group
2181219820Sjeff	 *           if it hasn't been set yet
2182219820Sjeff	 *
2183219820Sjeff	 *  2. is_real_lid == TRUE && is_main_path == FALSE:
2184219820Sjeff	 *      - going UP(TRUE,FALSE) on ALL the groups, each time on port 0,
2185219820Sjeff	 *        but only if the remote (upper) switch hasn't been already
2186219820Sjeff	 *        configured for this target LID
2187219820Sjeff	 *         + NOT promoting port counter
2188219820Sjeff	 *         + setting path in remote switch fwd tbl if it hasn't been set yet
2189219820Sjeff	 *         + setting hops in remote switch on all the ports of each group
2190219820Sjeff	 *           if it hasn't been set yet
2191219820Sjeff	 *
2192219820Sjeff	 *  3. is_real_lid == FALSE && is_main_path == TRUE:
2193219820Sjeff	 *      - going UP(FALSE,TRUE) ONLY on selected min_group and min_port
2194219820Sjeff	 *         + promoting port counter
2195219820Sjeff	 *         + NOT setting path in remote switch fwd tbl
2196219820Sjeff	 *         + NOT setting hops in remote switch
2197219820Sjeff	 *
2198219820Sjeff	 *  4. is_real_lid == FALSE && is_main_path == FALSE:
2199219820Sjeff	 *      - illegal state - we shouldn't get here
2200219820Sjeff	 */
2201219820Sjeff
2202219820Sjeff	/* covering first half of case 1, and case 3 */
2203219820Sjeff	if (is_main_path) {
2204219820Sjeff		if (p_sw->is_leaf) {
2205219820Sjeff			OSM_LOG(&p_ftree->p_osm->log, OSM_LOG_DEBUG,
2206219820Sjeff				" - Routing MAIN path for %s CA LID %u: %s --> %s\n",
2207219820Sjeff				(is_real_lid) ? "real" : "DUMMY",
2208219820Sjeff				cl_ntoh16(target_lid),
2209219820Sjeff				__osm_ftree_tuple_to_str(p_sw->tuple),
2210219820Sjeff				__osm_ftree_tuple_to_str(p_remote_sw->tuple));
2211219820Sjeff		}
2212219820Sjeff		/* The number of downgoing routes is tracked in the
2213219820Sjeff		   p_group->counter_down p_port->counter_down counters of the
2214219820Sjeff		   group and port that belong to the lower side of the link
2215219820Sjeff		   (on switch with higher rank) */
2216219820Sjeff		p_min_group->counter_down++;
2217219820Sjeff		p_min_port->counter_down++;
2218219820Sjeff		if (is_real_lid) {
2219219820Sjeff			p_remote_sw->p_osm_sw->new_lft[cl_ntoh16(target_lid)] =
2220219820Sjeff				p_min_port->remote_port_num;
2221219820Sjeff			OSM_LOG(&p_ftree->p_osm->log, OSM_LOG_DEBUG,
2222219820Sjeff				"Switch %s: set path to CA LID %u through port %u\n",
2223219820Sjeff				__osm_ftree_tuple_to_str(p_remote_sw->tuple),
2224219820Sjeff				cl_ntoh16(target_lid),
2225219820Sjeff				p_min_port->remote_port_num);
2226219820Sjeff
2227219820Sjeff			/* On the remote switch that is pointed by the min_group,
2228219820Sjeff			   set hops for ALL the ports in the remote group. */
2229219820Sjeff
2230219820Sjeff			ports_num =
2231219820Sjeff			    (uint16_t) cl_ptr_vector_get_size(&p_min_group->
2232219820Sjeff							      ports);
2233219820Sjeff			for (j = 0; j < ports_num; j++) {
2234219820Sjeff				cl_ptr_vector_at(&p_min_group->ports, j,
2235219820Sjeff						 (void *)&p_port);
2236219820Sjeff				__osm_ftree_sw_set_hops(p_remote_sw,
2237219820Sjeff							cl_ntoh16(target_lid),
2238219820Sjeff							p_port->remote_port_num,
2239219820Sjeff							target_rank -
2240219820Sjeff							p_remote_sw->rank);
2241219820Sjeff			}
2242219820Sjeff		}
2243219820Sjeff
2244219820Sjeff		/* Recursion step:
2245219820Sjeff		   Assign downgoing ports by stepping up, starting on REMOTE switch. */
2246219820Sjeff		__osm_ftree_fabric_route_downgoing_by_going_up(p_ftree, p_remote_sw,	/* remote switch - used as a route-downgoing alg. next step point */
2247219820Sjeff							       p_sw,	/* this switch - prev. position switch for the function */
2248219820Sjeff							       target_lid,	/* LID that we're routing to */
2249219820Sjeff							       target_rank,	/* rank of the LID that we're routing to */
2250219820Sjeff							       is_real_lid,	/* whether this target LID is real or dummy */
2251219820Sjeff							       is_main_path);	/* whether this is path to HCA that should by tracked by counters */
2252219820Sjeff	}
2253219820Sjeff
2254219820Sjeff	/* we're done for the third case */
2255219820Sjeff	if (!is_real_lid)
2256219820Sjeff		return;
2257219820Sjeff
2258219820Sjeff	/* What's left to do at this point:
2259219820Sjeff	 *
2260219820Sjeff	 *  1. is_real_lid == TRUE && is_main_path == TRUE:
2261219820Sjeff	 *      - going UP(TRUE,FALSE) on rest of the groups, each time on port 0,
2262219820Sjeff	 *        but only if the remote (upper) switch hasn't been already
2263219820Sjeff	 *        configured for this target LID
2264219820Sjeff	 *         + NOT promoting port counter
2265219820Sjeff	 *         + setting path in remote switch fwd tbl if it hasn't been set yet
2266219820Sjeff	 *         + setting hops in remote switch on all the ports of each group
2267219820Sjeff	 *           if it hasn't been set yet
2268219820Sjeff	 *
2269219820Sjeff	 *  2. is_real_lid == TRUE && is_main_path == FALSE:
2270219820Sjeff	 *      - going UP(TRUE,FALSE) on ALL the groups, each time on port 0,
2271219820Sjeff	 *        but only if the remote (upper) switch hasn't been already
2272219820Sjeff	 *        configured for this target LID
2273219820Sjeff	 *         + NOT promoting port counter
2274219820Sjeff	 *         + setting path in remote switch fwd tbl if it hasn't been set yet
2275219820Sjeff	 *         + setting hops in remote switch on all the ports of each group
2276219820Sjeff	 *           if it hasn't been set yet
2277219820Sjeff	 *
2278219820Sjeff	 *  These two rules can be rephrased this way:
2279219820Sjeff	 *   - foreach UP port group
2280219820Sjeff	 *      + if remote switch has been set with the target LID
2281219820Sjeff	 *         - skip this port group
2282219820Sjeff	 *      + else
2283219820Sjeff	 *         - select port 0
2284219820Sjeff	 *         - do NOT promote port counter
2285219820Sjeff	 *         - set path in remote switch fwd tbl
2286219820Sjeff	 *         - set hops in remote switch on all the ports of this group
2287219820Sjeff	 *         - go UP(TRUE,FALSE) to the remote switch
2288219820Sjeff	 */
2289219820Sjeff
2290219820Sjeff	for (i = 0; i < p_sw->up_port_groups_num; i++) {
2291219820Sjeff		p_group = p_sw->up_port_groups[i];
2292219820Sjeff		p_remote_sw = p_group->remote_hca_or_sw.p_sw;
2293219820Sjeff
2294219820Sjeff		/* skip if target lid has been already set on remote switch fwd tbl */
2295219820Sjeff		if (p_remote_sw->p_osm_sw->new_lft[cl_ntoh16(target_lid)] != OSM_NO_PATH)
2296219820Sjeff			continue;
2297219820Sjeff
2298219820Sjeff		if (p_sw->is_leaf) {
2299219820Sjeff			OSM_LOG(&p_ftree->p_osm->log, OSM_LOG_DEBUG,
2300219820Sjeff				" - Routing SECONDARY path for LID %u: %s --> %s\n",
2301219820Sjeff				cl_ntoh16(target_lid),
2302219820Sjeff				__osm_ftree_tuple_to_str(p_sw->tuple),
2303219820Sjeff				__osm_ftree_tuple_to_str(p_remote_sw->tuple));
2304219820Sjeff		}
2305219820Sjeff
2306219820Sjeff		/* Routing REAL lids on SECONDARY path means routing
2307219820Sjeff		   switch-to-switch or switch-to-CA paths.
2308219820Sjeff		   We can safely assume that switch will initiate very
2309219820Sjeff		   few traffic, so there's no point waisting runtime on
2310219820Sjeff		   trying to balance these routes - always pick port 0. */
2311219820Sjeff
2312219820Sjeff		cl_ptr_vector_at(&p_group->ports, 0, (void *)&p_port);
2313219820Sjeff		p_remote_sw->p_osm_sw->new_lft[cl_ntoh16(target_lid)] =
2314219820Sjeff			p_port->remote_port_num;
2315219820Sjeff
2316219820Sjeff		/* On the remote switch that is pointed by the p_group,
2317219820Sjeff		   set hops for ALL the ports in the remote group. */
2318219820Sjeff
2319219820Sjeff		ports_num = (uint16_t) cl_ptr_vector_get_size(&p_group->ports);
2320219820Sjeff		for (j = 0; j < ports_num; j++) {
2321219820Sjeff			cl_ptr_vector_at(&p_group->ports, j, (void *)&p_port);
2322219820Sjeff
2323219820Sjeff			__osm_ftree_sw_set_hops(p_remote_sw,
2324219820Sjeff						cl_ntoh16(target_lid),
2325219820Sjeff						p_port->remote_port_num,
2326219820Sjeff						target_rank -
2327219820Sjeff						p_remote_sw->rank);
2328219820Sjeff		}
2329219820Sjeff
2330219820Sjeff		/* Recursion step:
2331219820Sjeff		   Assign downgoing ports by stepping up, starting on REMOTE switch. */
2332219820Sjeff		__osm_ftree_fabric_route_downgoing_by_going_up(p_ftree, p_remote_sw,	/* remote switch - used as a route-downgoing alg. next step point */
2333219820Sjeff							       p_sw,	/* this switch - prev. position switch for the function */
2334219820Sjeff							       target_lid,	/* LID that we're routing to */
2335219820Sjeff							       target_rank,	/* rank of the LID that we're routing to */
2336219820Sjeff							       TRUE,	/* whether the target LID is real or dummy */
2337219820Sjeff							       FALSE);	/* whether this is path to HCA that should by tracked by counters */
2338219820Sjeff	}
2339219820Sjeff
2340219820Sjeff}				/* ftree_fabric_route_downgoing_by_going_up() */
2341219820Sjeff
2342219820Sjeff/***************************************************/
2343219820Sjeff
2344219820Sjeff/*
2345219820Sjeff * Pseudo code:
2346219820Sjeff *    foreach leaf switch (in indexing order)
2347219820Sjeff *       for each compute node (in indexing order)
2348219820Sjeff *          obtain the LID of the compute node
2349219820Sjeff *          set local LFT(LID) of the port connecting to compute node
2350219820Sjeff *          call assign-down-going-port-by-ascending-up(TRUE,TRUE) on CURRENT switch
2351219820Sjeff *       for each MISSING compute node
2352219820Sjeff *          call assign-down-going-port-by-ascending-up(FALSE,TRUE) on CURRENT switch
2353219820Sjeff */
2354219820Sjeff
2355219820Sjeffstatic void __osm_ftree_fabric_route_to_cns(IN ftree_fabric_t * p_ftree)
2356219820Sjeff{
2357219820Sjeff	ftree_sw_t *p_sw;
2358219820Sjeff	ftree_hca_t *p_hca;
2359219820Sjeff	ftree_port_group_t *p_leaf_port_group;
2360219820Sjeff	ftree_port_group_t *p_hca_port_group;
2361219820Sjeff	ftree_port_t *p_port;
2362219820Sjeff	uint32_t i;
2363219820Sjeff	uint32_t j;
2364219820Sjeff	ib_net16_t hca_lid;
2365219820Sjeff	unsigned routed_targets_on_leaf;
2366219820Sjeff
2367219820Sjeff	OSM_LOG_ENTER(&p_ftree->p_osm->log);
2368219820Sjeff
2369219820Sjeff	/* for each leaf switch (in indexing order) */
2370219820Sjeff	for (i = 0; i < p_ftree->leaf_switches_num; i++) {
2371219820Sjeff		p_sw = p_ftree->leaf_switches[i];
2372219820Sjeff		routed_targets_on_leaf = 0;
2373219820Sjeff
2374219820Sjeff		/* for each HCA connected to this switch */
2375219820Sjeff		for (j = 0; j < p_sw->down_port_groups_num; j++) {
2376219820Sjeff			p_leaf_port_group = p_sw->down_port_groups[j];
2377219820Sjeff
2378219820Sjeff			/* work with this port group only if the remote node is CA */
2379219820Sjeff			if (p_leaf_port_group->remote_node_type !=
2380219820Sjeff			    IB_NODE_TYPE_CA)
2381219820Sjeff				continue;
2382219820Sjeff
2383219820Sjeff			p_hca = p_leaf_port_group->remote_hca_or_sw.p_hca;
2384219820Sjeff
2385219820Sjeff			/* work with this port group only if remote HCA has CNs */
2386219820Sjeff			if (!p_hca->cn_num)
2387219820Sjeff				continue;
2388219820Sjeff
2389219820Sjeff			p_hca_port_group =
2390219820Sjeff			    __osm_ftree_hca_get_port_group_by_remote_lid(p_hca,
2391219820Sjeff									 p_leaf_port_group->
2392219820Sjeff									 base_lid);
2393219820Sjeff			CL_ASSERT(p_hca_port_group);
2394219820Sjeff
2395219820Sjeff			/* work with this port group only if remote port is CN */
2396219820Sjeff			if (!p_hca_port_group->is_cn)
2397219820Sjeff				continue;
2398219820Sjeff
2399219820Sjeff			/* obtain the LID of HCA port */
2400219820Sjeff			hca_lid = p_leaf_port_group->remote_base_lid;
2401219820Sjeff
2402219820Sjeff			/* set local LFT(LID) to the port that is connected to HCA */
2403219820Sjeff			cl_ptr_vector_at(&p_leaf_port_group->ports, 0,
2404219820Sjeff					 (void *)&p_port);
2405219820Sjeff			p_sw->p_osm_sw->new_lft[cl_ntoh16(hca_lid)] = p_port->port_num;
2406219820Sjeff
2407219820Sjeff			OSM_LOG(&p_ftree->p_osm->log, OSM_LOG_DEBUG,
2408219820Sjeff				"Switch %s: set path to CN LID %u through port %u\n",
2409219820Sjeff				__osm_ftree_tuple_to_str(p_sw->tuple),
2410219820Sjeff				cl_ntoh16(hca_lid), p_port->port_num);
2411219820Sjeff
2412219820Sjeff			/* set local min hop table(LID) to route to the CA */
2413219820Sjeff			__osm_ftree_sw_set_hops(p_sw,
2414219820Sjeff						cl_ntoh16(hca_lid),
2415219820Sjeff						p_port->port_num, 1);
2416219820Sjeff
2417219820Sjeff			/* Assign downgoing ports by stepping up.
2418219820Sjeff			   Since we're routing here only CNs, we're routing it as REAL
2419219820Sjeff			   LID and updating fat-tree balancing counters. */
2420219820Sjeff			__osm_ftree_fabric_route_downgoing_by_going_up(p_ftree, p_sw,	/* local switch - used as a route-downgoing alg. start point */
2421219820Sjeff								       NULL,	/* prev. position switch */
2422219820Sjeff								       hca_lid,	/* LID that we're routing to */
2423219820Sjeff								       p_sw->rank + 1,	/* rank of the LID that we're routing to */
2424219820Sjeff								       TRUE,	/* whether this HCA LID is real or dummy */
2425219820Sjeff								       TRUE);	/* whether this path to HCA should by tracked by counters */
2426219820Sjeff
2427219820Sjeff			/* count how many real targets have been routed from this leaf switch */
2428219820Sjeff			routed_targets_on_leaf++;
2429219820Sjeff		}
2430219820Sjeff
2431219820Sjeff		/* We're done with the real targets (all CNs) of this leaf switch.
2432219820Sjeff		   Now route the dummy HCAs that are missing or that are non-CNs.
2433219820Sjeff		   When routing to dummy HCAs we don't fill lid matrices. */
2434219820Sjeff
2435219820Sjeff		if (p_ftree->max_cn_per_leaf > routed_targets_on_leaf) {
2436219820Sjeff			OSM_LOG(&p_ftree->p_osm->log, OSM_LOG_DEBUG,
2437219820Sjeff				"Routing %u dummy CAs\n",
2438219820Sjeff				p_ftree->max_cn_per_leaf -
2439219820Sjeff				p_sw->down_port_groups_num);
2440219820Sjeff			for (j = 0;
2441219820Sjeff			     ((int)j) <
2442219820Sjeff			     (p_ftree->max_cn_per_leaf -
2443219820Sjeff			      routed_targets_on_leaf); j++) {
2444219820Sjeff				/* assign downgoing ports by stepping up */
2445219820Sjeff				__osm_ftree_fabric_route_downgoing_by_going_up(p_ftree, p_sw,	/* local switch - used as a route-downgoing alg. start point */
2446219820Sjeff									       NULL,	/* prev. position switch */
2447219820Sjeff									       0,	/* LID that we're routing to - ignored for dummy HCA */
2448219820Sjeff									       0,	/* rank of the LID that we're routing to - ignored for dummy HCA */
2449219820Sjeff									       FALSE,	/* whether this HCA LID is real or dummy */
2450219820Sjeff									       TRUE);	/* whether this path to HCA should by tracked by counters */
2451219820Sjeff			}
2452219820Sjeff		}
2453219820Sjeff	}
2454219820Sjeff	/* done going through all the leaf switches */
2455219820Sjeff	OSM_LOG_EXIT(&p_ftree->p_osm->log);
2456219820Sjeff}				/* __osm_ftree_fabric_route_to_cns() */
2457219820Sjeff
2458219820Sjeff/***************************************************/
2459219820Sjeff
2460219820Sjeff/*
2461219820Sjeff * Pseudo code:
2462219820Sjeff *    foreach HCA non-CN port in fabric
2463219820Sjeff *       obtain the LID of the HCA port
2464219820Sjeff *       get switch that is connected to this HCA port
2465219820Sjeff *       set switch LFT(LID) to the port connecting to compute node
2466219820Sjeff *       call assign-down-going-port-by-ascending-up(TRUE,FALSE) on CURRENT switch
2467219820Sjeff *
2468219820Sjeff * Routing to these HCAs is routing a REAL hca lid on SECONDARY path.
2469219820Sjeff * However, we do want to allow load-leveling of the traffic to the non-CNs,
2470219820Sjeff * because such nodes may include IO nodes with heavy usage
2471219820Sjeff *   - we should set fwd tables
2472219820Sjeff *   - we should update port counters
2473219820Sjeff * Routing to non-CNs is done after routing to CNs, so updated port
2474219820Sjeff * counters will not affect CN-to-CN routing.
2475219820Sjeff */
2476219820Sjeff
2477219820Sjeffstatic void __osm_ftree_fabric_route_to_non_cns(IN ftree_fabric_t * p_ftree)
2478219820Sjeff{
2479219820Sjeff	ftree_sw_t *p_sw;
2480219820Sjeff	ftree_hca_t *p_hca;
2481219820Sjeff	ftree_hca_t *p_next_hca;
2482219820Sjeff	ftree_port_t *p_hca_port;
2483219820Sjeff	ftree_port_group_t *p_hca_port_group;
2484219820Sjeff	ib_net16_t hca_lid;
2485219820Sjeff	unsigned port_num_on_switch;
2486219820Sjeff	unsigned i;
2487219820Sjeff
2488219820Sjeff	OSM_LOG_ENTER(&p_ftree->p_osm->log);
2489219820Sjeff
2490219820Sjeff	p_next_hca = (ftree_hca_t *) cl_qmap_head(&p_ftree->hca_tbl);
2491219820Sjeff	while (p_next_hca != (ftree_hca_t *) cl_qmap_end(&p_ftree->hca_tbl)) {
2492219820Sjeff		p_hca = p_next_hca;
2493219820Sjeff		p_next_hca = (ftree_hca_t *) cl_qmap_next(&p_hca->map_item);
2494219820Sjeff
2495219820Sjeff		for (i = 0; i < p_hca->up_port_groups_num; i++) {
2496219820Sjeff			p_hca_port_group = p_hca->up_port_groups[i];
2497219820Sjeff
2498219820Sjeff			/* skip this port if it's CN, in which case it has been already routed */
2499219820Sjeff			if (p_hca_port_group->is_cn)
2500219820Sjeff				continue;
2501219820Sjeff
2502219820Sjeff			/* skip this port if it is not connected to switch */
2503219820Sjeff			if (p_hca_port_group->remote_node_type !=
2504219820Sjeff			    IB_NODE_TYPE_SWITCH)
2505219820Sjeff				continue;
2506219820Sjeff
2507219820Sjeff			p_sw = p_hca_port_group->remote_hca_or_sw.p_sw;
2508219820Sjeff			hca_lid = p_hca_port_group->base_lid;
2509219820Sjeff
2510219820Sjeff			/* set switches  LFT(LID) to the port that is connected to HCA */
2511219820Sjeff			cl_ptr_vector_at(&p_hca_port_group->ports, 0,
2512219820Sjeff					 (void *)&p_hca_port);
2513219820Sjeff			port_num_on_switch = p_hca_port->remote_port_num;
2514219820Sjeff			p_sw->p_osm_sw->new_lft[cl_ntoh16(hca_lid)] = port_num_on_switch;
2515219820Sjeff
2516219820Sjeff			OSM_LOG(&p_ftree->p_osm->log, OSM_LOG_DEBUG,
2517219820Sjeff				"Switch %s: set path to non-CN HCA LID %u through port %u\n",
2518219820Sjeff				__osm_ftree_tuple_to_str(p_sw->tuple),
2519219820Sjeff				cl_ntoh16(hca_lid), port_num_on_switch);
2520219820Sjeff
2521219820Sjeff			/* set local min hop table(LID) to route to the CA */
2522219820Sjeff			__osm_ftree_sw_set_hops(p_sw, cl_ntoh16(hca_lid),
2523219820Sjeff						port_num_on_switch,	/* port num */
2524219820Sjeff						1);	/* hops */
2525219820Sjeff
2526219820Sjeff			/* Assign downgoing ports by stepping up.
2527219820Sjeff			   We're routing REAL targets. They are not CNs and not included
2528219820Sjeff			   in the leafs array, but we treat them as MAIN path to allow load
2529219820Sjeff			   leveling, which means that the counters will be updated. */
2530219820Sjeff			__osm_ftree_fabric_route_downgoing_by_going_up(p_ftree, p_sw,	/* local switch - used as a route-downgoing alg. start point */
2531219820Sjeff								       NULL,	/* prev. position switch */
2532219820Sjeff								       hca_lid,	/* LID that we're routing to */
2533219820Sjeff								       p_sw->rank + 1,	/* rank of the LID that we're routing to */
2534219820Sjeff								       TRUE,	/* whether this HCA LID is real or dummy */
2535219820Sjeff								       TRUE);	/* whether this path to HCA should by tracked by counters */
2536219820Sjeff		}
2537219820Sjeff		/* done with all the port groups of this HCA - go to next HCA */
2538219820Sjeff	}
2539219820Sjeff
2540219820Sjeff	OSM_LOG_EXIT(&p_ftree->p_osm->log);
2541219820Sjeff}				/* __osm_ftree_fabric_route_to_non_cns() */
2542219820Sjeff
2543219820Sjeff/***************************************************/
2544219820Sjeff
2545219820Sjeff/*
2546219820Sjeff * Pseudo code:
2547219820Sjeff *    foreach switch in fabric
2548219820Sjeff *       obtain its LID
2549219820Sjeff *       set local LFT(LID) to port 0
2550219820Sjeff *       call assign-down-going-port-by-ascending-up(TRUE,FALSE) on CURRENT switch
2551219820Sjeff *
2552219820Sjeff * Routing to switch is similar to routing a REAL hca lid on SECONDARY path:
2553219820Sjeff *   - we should set fwd tables
2554219820Sjeff *   - we should NOT update port counters
2555219820Sjeff */
2556219820Sjeff
2557219820Sjeffstatic void __osm_ftree_fabric_route_to_switches(IN ftree_fabric_t * p_ftree)
2558219820Sjeff{
2559219820Sjeff	ftree_sw_t *p_sw;
2560219820Sjeff	ftree_sw_t *p_next_sw;
2561219820Sjeff
2562219820Sjeff	OSM_LOG_ENTER(&p_ftree->p_osm->log);
2563219820Sjeff
2564219820Sjeff	p_next_sw = (ftree_sw_t *) cl_qmap_head(&p_ftree->sw_tbl);
2565219820Sjeff	while (p_next_sw != (ftree_sw_t *) cl_qmap_end(&p_ftree->sw_tbl)) {
2566219820Sjeff		p_sw = p_next_sw;
2567219820Sjeff		p_next_sw = (ftree_sw_t *) cl_qmap_next(&p_sw->map_item);
2568219820Sjeff
2569219820Sjeff		/* set local LFT(LID) to 0 (route to itself) */
2570219820Sjeff		p_sw->p_osm_sw->new_lft[cl_ntoh16(p_sw->base_lid)] = 0;
2571219820Sjeff
2572219820Sjeff		OSM_LOG(&p_ftree->p_osm->log, OSM_LOG_DEBUG,
2573219820Sjeff			"Switch %s (LID %u): routing switch-to-switch paths\n",
2574219820Sjeff			__osm_ftree_tuple_to_str(p_sw->tuple),
2575219820Sjeff			cl_ntoh16(p_sw->base_lid));
2576219820Sjeff
2577219820Sjeff		/* set min hop table of the switch to itself */
2578219820Sjeff		__osm_ftree_sw_set_hops(p_sw, cl_ntoh16(p_sw->base_lid),
2579219820Sjeff					0,	/* port_num */
2580219820Sjeff					0);	/* hops     */
2581219820Sjeff
2582219820Sjeff		__osm_ftree_fabric_route_downgoing_by_going_up(p_ftree, p_sw,	/* local switch - used as a route-downgoing alg. start point */
2583219820Sjeff							       NULL,	/* prev. position switch */
2584219820Sjeff							       p_sw->base_lid,	/* LID that we're routing to */
2585219820Sjeff							       p_sw->rank,	/* rank of the LID that we're routing to */
2586219820Sjeff							       TRUE,	/* whether the target LID is a real or dummy */
2587219820Sjeff							       FALSE);	/* whether this path should by tracked by counters */
2588219820Sjeff	}
2589219820Sjeff
2590219820Sjeff	OSM_LOG_EXIT(&p_ftree->p_osm->log);
2591219820Sjeff}				/* __osm_ftree_fabric_route_to_switches() */
2592219820Sjeff
2593219820Sjeff/***************************************************
2594219820Sjeff ***************************************************/
2595219820Sjeff
2596219820Sjeffstatic int __osm_ftree_fabric_populate_nodes(IN ftree_fabric_t * p_ftree)
2597219820Sjeff{
2598219820Sjeff	osm_node_t *p_osm_node;
2599219820Sjeff	osm_node_t *p_next_osm_node;
2600219820Sjeff
2601219820Sjeff	OSM_LOG_ENTER(&p_ftree->p_osm->log);
2602219820Sjeff
2603219820Sjeff	p_next_osm_node =
2604219820Sjeff	    (osm_node_t *) cl_qmap_head(&p_ftree->p_osm->subn.node_guid_tbl);
2605219820Sjeff	while (p_next_osm_node !=
2606219820Sjeff	       (osm_node_t *) cl_qmap_end(&p_ftree->p_osm->subn.
2607219820Sjeff					  node_guid_tbl)) {
2608219820Sjeff		p_osm_node = p_next_osm_node;
2609219820Sjeff		p_next_osm_node =
2610219820Sjeff		    (osm_node_t *) cl_qmap_next(&p_osm_node->map_item);
2611219820Sjeff		switch (osm_node_get_type(p_osm_node)) {
2612219820Sjeff		case IB_NODE_TYPE_CA:
2613219820Sjeff			__osm_ftree_fabric_add_hca(p_ftree, p_osm_node);
2614219820Sjeff			break;
2615219820Sjeff		case IB_NODE_TYPE_ROUTER:
2616219820Sjeff			break;
2617219820Sjeff		case IB_NODE_TYPE_SWITCH:
2618219820Sjeff			__osm_ftree_fabric_add_sw(p_ftree, p_osm_node->sw);
2619219820Sjeff			break;
2620219820Sjeff		default:
2621219820Sjeff			OSM_LOG(&p_ftree->p_osm->log, OSM_LOG_ERROR, "ERR AB0E: "
2622219820Sjeff				"Node GUID 0x%016" PRIx64
2623219820Sjeff				" - Unknown node type: %s\n",
2624219820Sjeff				cl_ntoh64(osm_node_get_node_guid(p_osm_node)),
2625219820Sjeff				ib_get_node_type_str(osm_node_get_type
2626219820Sjeff						     (p_osm_node)));
2627219820Sjeff			OSM_LOG_EXIT(&p_ftree->p_osm->log);
2628219820Sjeff			return -1;
2629219820Sjeff		}
2630219820Sjeff	}
2631219820Sjeff
2632219820Sjeff	OSM_LOG_EXIT(&p_ftree->p_osm->log);
2633219820Sjeff	return 0;
2634219820Sjeff}				/* __osm_ftree_fabric_populate_nodes() */
2635219820Sjeff
2636219820Sjeff/***************************************************
2637219820Sjeff ***************************************************/
2638219820Sjeff
2639219820Sjeffstatic boolean_t __osm_ftree_sw_update_rank(IN ftree_sw_t * p_sw,
2640219820Sjeff					    IN uint32_t new_rank)
2641219820Sjeff{
2642219820Sjeff	if (__osm_ftree_sw_ranked(p_sw) && p_sw->rank <= new_rank)
2643219820Sjeff		return FALSE;
2644219820Sjeff	p_sw->rank = new_rank;
2645219820Sjeff	return TRUE;
2646219820Sjeff
2647219820Sjeff}
2648219820Sjeff
2649219820Sjeff/***************************************************/
2650219820Sjeff
2651219820Sjeffstatic void
2652219820Sjeff__osm_ftree_rank_switches_from_leafs(IN ftree_fabric_t * p_ftree,
2653219820Sjeff				     IN cl_list_t * p_ranking_bfs_list)
2654219820Sjeff{
2655219820Sjeff	ftree_sw_t *p_sw;
2656219820Sjeff	ftree_sw_t *p_remote_sw;
2657219820Sjeff	osm_node_t *p_node;
2658219820Sjeff	osm_node_t *p_remote_node;
2659219820Sjeff	osm_physp_t *p_osm_port;
2660219820Sjeff	uint8_t i;
2661219820Sjeff	unsigned max_rank = 0;
2662219820Sjeff
2663219820Sjeff	while (!cl_is_list_empty(p_ranking_bfs_list)) {
2664219820Sjeff		p_sw = (ftree_sw_t *) cl_list_remove_head(p_ranking_bfs_list);
2665219820Sjeff		p_node = p_sw->p_osm_sw->p_node;
2666219820Sjeff
2667219820Sjeff		/* note: skipping port 0 on switches */
2668219820Sjeff		for (i = 1; i < osm_node_get_num_physp(p_node); i++) {
2669219820Sjeff			p_osm_port = osm_node_get_physp_ptr(p_node, i);
2670219820Sjeff			if (!p_osm_port || !osm_link_is_healthy(p_osm_port))
2671219820Sjeff				continue;
2672219820Sjeff
2673219820Sjeff			p_remote_node =
2674219820Sjeff			    osm_node_get_remote_node(p_node, i, NULL);
2675219820Sjeff			if (!p_remote_node)
2676219820Sjeff				continue;
2677219820Sjeff			if (osm_node_get_type(p_remote_node) !=
2678219820Sjeff			    IB_NODE_TYPE_SWITCH)
2679219820Sjeff				continue;
2680219820Sjeff
2681219820Sjeff			p_remote_sw = __osm_ftree_fabric_get_sw_by_guid(p_ftree,
2682219820Sjeff									osm_node_get_node_guid
2683219820Sjeff									(p_remote_node));
2684219820Sjeff			if (!p_remote_sw) {
2685219820Sjeff				/* remote node is not a switch */
2686219820Sjeff				continue;
2687219820Sjeff			}
2688219820Sjeff
2689219820Sjeff			/* if needed, rank the remote switch and add it to the BFS list */
2690219820Sjeff			if (__osm_ftree_sw_update_rank
2691219820Sjeff			    (p_remote_sw, p_sw->rank + 1)) {
2692219820Sjeff				max_rank = p_remote_sw->rank;
2693219820Sjeff				cl_list_insert_tail(p_ranking_bfs_list,
2694219820Sjeff						    p_remote_sw);
2695219820Sjeff			}
2696219820Sjeff		}
2697219820Sjeff	}
2698219820Sjeff
2699219820Sjeff	/* set FatTree maximal switch rank */
2700219820Sjeff	p_ftree->max_switch_rank = max_rank;
2701219820Sjeff
2702219820Sjeff}				/* __osm_ftree_rank_switches_from_leafs() */
2703219820Sjeff
2704219820Sjeff/***************************************************/
2705219820Sjeff
2706219820Sjeffstatic int
2707219820Sjeff__osm_ftree_rank_leaf_switches(IN ftree_fabric_t * p_ftree,
2708219820Sjeff			       IN ftree_hca_t * p_hca,
2709219820Sjeff			       IN cl_list_t * p_ranking_bfs_list)
2710219820Sjeff{
2711219820Sjeff	ftree_sw_t *p_sw;
2712219820Sjeff	osm_node_t *p_osm_node = p_hca->p_osm_node;
2713219820Sjeff	osm_node_t *p_remote_osm_node;
2714219820Sjeff	osm_physp_t *p_osm_port;
2715219820Sjeff	static uint8_t i = 0;
2716219820Sjeff	int res = 0;
2717219820Sjeff
2718219820Sjeff	OSM_LOG_ENTER(&p_ftree->p_osm->log);
2719219820Sjeff
2720219820Sjeff	for (i = 0; i < osm_node_get_num_physp(p_osm_node); i++) {
2721219820Sjeff		p_osm_port = osm_node_get_physp_ptr(p_osm_node, i);
2722219820Sjeff		if (!p_osm_port || !osm_link_is_healthy(p_osm_port))
2723219820Sjeff			continue;
2724219820Sjeff
2725219820Sjeff		p_remote_osm_node =
2726219820Sjeff		    osm_node_get_remote_node(p_osm_node, i, NULL);
2727219820Sjeff		if (!p_remote_osm_node)
2728219820Sjeff			continue;
2729219820Sjeff
2730219820Sjeff		switch (osm_node_get_type(p_remote_osm_node)) {
2731219820Sjeff		case IB_NODE_TYPE_CA:
2732219820Sjeff			/* HCA connected directly to another HCA - not FatTree */
2733219820Sjeff			OSM_LOG(&p_ftree->p_osm->log, OSM_LOG_ERROR, "ERR AB0F: "
2734219820Sjeff				"CA conected directly to another CA: "
2735219820Sjeff				"0x%016" PRIx64 " <---> 0x%016" PRIx64 "\n",
2736219820Sjeff				__osm_ftree_hca_get_guid_ho(p_hca),
2737219820Sjeff				cl_ntoh64(osm_node_get_node_guid
2738219820Sjeff					  (p_remote_osm_node)));
2739219820Sjeff			res = -1;
2740219820Sjeff			goto Exit;
2741219820Sjeff
2742219820Sjeff		case IB_NODE_TYPE_ROUTER:
2743219820Sjeff			/* leaving this port - proceeding to the next one */
2744219820Sjeff			continue;
2745219820Sjeff
2746219820Sjeff		case IB_NODE_TYPE_SWITCH:
2747219820Sjeff			/* continue with this port */
2748219820Sjeff			break;
2749219820Sjeff
2750219820Sjeff		default:
2751219820Sjeff			OSM_LOG(&p_ftree->p_osm->log, OSM_LOG_ERROR,
2752219820Sjeff				"ERR AB10: Node GUID 0x%016" PRIx64
2753219820Sjeff				" - Unknown node type: %s\n",
2754219820Sjeff				cl_ntoh64(osm_node_get_node_guid
2755219820Sjeff					  (p_remote_osm_node)),
2756219820Sjeff				ib_get_node_type_str(osm_node_get_type
2757219820Sjeff						     (p_remote_osm_node)));
2758219820Sjeff			res = -1;
2759219820Sjeff			goto Exit;
2760219820Sjeff		}
2761219820Sjeff
2762219820Sjeff		/* remote node is switch */
2763219820Sjeff
2764219820Sjeff		p_sw = __osm_ftree_fabric_get_sw_by_guid(p_ftree,
2765219820Sjeff							 osm_node_get_node_guid
2766219820Sjeff							 (p_osm_port->
2767219820Sjeff							  p_remote_physp->
2768219820Sjeff							  p_node));
2769219820Sjeff		CL_ASSERT(p_sw);
2770219820Sjeff
2771219820Sjeff		/* if needed, rank the remote switch and add it to the BFS list */
2772219820Sjeff
2773219820Sjeff		if (!__osm_ftree_sw_update_rank(p_sw, 0))
2774219820Sjeff			continue;
2775219820Sjeff		OSM_LOG(&p_ftree->p_osm->log, OSM_LOG_DEBUG,
2776219820Sjeff			"Marking rank of switch that is directly connected to CA:\n"
2777219820Sjeff			"                                            - CA guid    : 0x%016"
2778219820Sjeff			PRIx64 "\n"
2779219820Sjeff			"                                            - Switch guid: 0x%016"
2780219820Sjeff			PRIx64 "\n"
2781219820Sjeff			"                                            - Switch LID : %u\n",
2782219820Sjeff			__osm_ftree_hca_get_guid_ho(p_hca),
2783219820Sjeff			__osm_ftree_sw_get_guid_ho(p_sw),
2784219820Sjeff			cl_ntoh16(p_sw->base_lid));
2785219820Sjeff		cl_list_insert_tail(p_ranking_bfs_list, p_sw);
2786219820Sjeff	}
2787219820Sjeff
2788219820SjeffExit:
2789219820Sjeff	OSM_LOG_EXIT(&p_ftree->p_osm->log);
2790219820Sjeff	return res;
2791219820Sjeff}				/* __osm_ftree_rank_leaf_switches() */
2792219820Sjeff
2793219820Sjeff/***************************************************/
2794219820Sjeff
2795219820Sjeffstatic void __osm_ftree_sw_reverse_rank(IN cl_map_item_t * const p_map_item,
2796219820Sjeff					IN void *context)
2797219820Sjeff{
2798219820Sjeff	ftree_fabric_t *p_ftree = (ftree_fabric_t *) context;
2799219820Sjeff	ftree_sw_t *p_sw = (ftree_sw_t * const)p_map_item;
2800219820Sjeff	p_sw->rank = p_ftree->max_switch_rank - p_sw->rank;
2801219820Sjeff}
2802219820Sjeff
2803219820Sjeff/***************************************************
2804219820Sjeff ***************************************************/
2805219820Sjeff
2806219820Sjeffstatic int
2807219820Sjeff__osm_ftree_fabric_construct_hca_ports(IN ftree_fabric_t * p_ftree,
2808219820Sjeff				       IN ftree_hca_t * p_hca)
2809219820Sjeff{
2810219820Sjeff	ftree_sw_t *p_remote_sw;
2811219820Sjeff	osm_node_t *p_node = p_hca->p_osm_node;
2812219820Sjeff	osm_node_t *p_remote_node;
2813219820Sjeff	uint8_t remote_node_type;
2814219820Sjeff	ib_net64_t remote_node_guid;
2815219820Sjeff	osm_physp_t *p_remote_osm_port;
2816219820Sjeff	uint8_t i;
2817219820Sjeff	uint8_t remote_port_num;
2818219820Sjeff	boolean_t is_cn = FALSE;
2819219820Sjeff	int res = 0;
2820219820Sjeff
2821219820Sjeff	for (i = 0; i < osm_node_get_num_physp(p_node); i++) {
2822219820Sjeff		osm_physp_t *p_osm_port = osm_node_get_physp_ptr(p_node, i);
2823219820Sjeff		if (!p_osm_port || !osm_link_is_healthy(p_osm_port))
2824219820Sjeff			continue;
2825219820Sjeff
2826219820Sjeff		p_remote_osm_port = osm_physp_get_remote(p_osm_port);
2827219820Sjeff		p_remote_node =
2828219820Sjeff		    osm_node_get_remote_node(p_node, i, &remote_port_num);
2829219820Sjeff
2830219820Sjeff		if (!p_remote_osm_port)
2831219820Sjeff			continue;
2832219820Sjeff
2833219820Sjeff		remote_node_type = osm_node_get_type(p_remote_node);
2834219820Sjeff		remote_node_guid = osm_node_get_node_guid(p_remote_node);
2835219820Sjeff
2836219820Sjeff		switch (remote_node_type) {
2837219820Sjeff		case IB_NODE_TYPE_ROUTER:
2838219820Sjeff			/* leaving this port - proceeding to the next one */
2839219820Sjeff			continue;
2840219820Sjeff
2841219820Sjeff		case IB_NODE_TYPE_CA:
2842219820Sjeff			/* HCA connected directly to another HCA - not FatTree */
2843219820Sjeff			OSM_LOG(&p_ftree->p_osm->log, OSM_LOG_ERROR, "ERR AB11: "
2844219820Sjeff				"CA conected directly to another CA: "
2845219820Sjeff				"0x%016" PRIx64 " <---> 0x%016" PRIx64 "\n",
2846219820Sjeff				cl_ntoh64(osm_node_get_node_guid(p_node)),
2847219820Sjeff				cl_ntoh64(remote_node_guid));
2848219820Sjeff			res = -1;
2849219820Sjeff			goto Exit;
2850219820Sjeff
2851219820Sjeff		case IB_NODE_TYPE_SWITCH:
2852219820Sjeff			/* continue with this port */
2853219820Sjeff			break;
2854219820Sjeff
2855219820Sjeff		default:
2856219820Sjeff			OSM_LOG(&p_ftree->p_osm->log, OSM_LOG_ERROR,
2857219820Sjeff				"ERR AB12: Node GUID 0x%016" PRIx64
2858219820Sjeff				" - Unknown node type: %s\n",
2859219820Sjeff				cl_ntoh64(remote_node_guid),
2860219820Sjeff				ib_get_node_type_str(remote_node_type));
2861219820Sjeff			res = -1;
2862219820Sjeff			goto Exit;
2863219820Sjeff		}
2864219820Sjeff
2865219820Sjeff		/* remote node is switch */
2866219820Sjeff
2867219820Sjeff		p_remote_sw =
2868219820Sjeff		    __osm_ftree_fabric_get_sw_by_guid(p_ftree,
2869219820Sjeff						      remote_node_guid);
2870219820Sjeff		CL_ASSERT(p_remote_sw);
2871219820Sjeff
2872219820Sjeff		/* If CN file is not supplied, then all the CAs considered as Compute Nodes.
2873219820Sjeff		   Otherwise all the CAs are not CNs, and only guids that are present in the
2874219820Sjeff		   CN file will be marked as compute nodes. */
2875219820Sjeff		if (!__osm_ftree_fabric_cns_provided(p_ftree)) {
2876219820Sjeff			is_cn = TRUE;
2877219820Sjeff		} else {
2878219820Sjeff			name_map_item_t *p_elem =
2879219820Sjeff			    (name_map_item_t *) cl_qmap_get(&p_ftree->
2880219820Sjeff							    cn_guid_tbl,
2881219820Sjeff							    cl_ntoh64(osm_physp_get_port_guid
2882219820Sjeff							    (p_osm_port)));
2883219820Sjeff			if (p_elem !=
2884219820Sjeff			    (name_map_item_t *) cl_qmap_end(&p_ftree->
2885219820Sjeff							    cn_guid_tbl))
2886219820Sjeff				is_cn = TRUE;
2887219820Sjeff		}
2888219820Sjeff
2889219820Sjeff		if (is_cn) {
2890219820Sjeff			p_ftree->cn_num++;
2891219820Sjeff			p_hca->cn_num++;
2892219820Sjeff			OSM_LOG(&p_ftree->p_osm->log, OSM_LOG_DEBUG,
2893219820Sjeff				"Marking CN port GUID 0x%016" PRIx64 "\n",
2894219820Sjeff				cl_ntoh64(osm_physp_get_port_guid(p_osm_port)));
2895219820Sjeff		} else {
2896219820Sjeff			OSM_LOG(&p_ftree->p_osm->log, OSM_LOG_DEBUG,
2897219820Sjeff				"Marking non-CN port GUID 0x%016" PRIx64 "\n",
2898219820Sjeff				cl_ntoh64(osm_physp_get_port_guid(p_osm_port)));
2899219820Sjeff		}
2900219820Sjeff
2901219820Sjeff		__osm_ftree_hca_add_port(p_hca,	/* local ftree_hca object */
2902219820Sjeff					 i,	/* local port number */
2903219820Sjeff					 remote_port_num,	/* remote port number */
2904219820Sjeff					 osm_node_get_base_lid(p_node, i),	/* local lid */
2905219820Sjeff					 osm_node_get_base_lid(p_remote_node, 0),	/* remote lid */
2906219820Sjeff					 osm_physp_get_port_guid(p_osm_port),	/* local port guid */
2907219820Sjeff					 osm_physp_get_port_guid(p_remote_osm_port),	/* remote port guid */
2908219820Sjeff					 remote_node_guid,	/* remote node guid */
2909219820Sjeff					 remote_node_type,	/* remote node type */
2910219820Sjeff					 (void *)p_remote_sw,	/* remote ftree_hca/sw object */
2911219820Sjeff					 is_cn);	/* whether this port is compute node */
2912219820Sjeff	}
2913219820Sjeff
2914219820SjeffExit:
2915219820Sjeff	return res;
2916219820Sjeff}				/* __osm_ftree_fabric_construct_hca_ports() */
2917219820Sjeff
2918219820Sjeff/***************************************************
2919219820Sjeff ***************************************************/
2920219820Sjeff
2921219820Sjeffstatic int __osm_ftree_fabric_construct_sw_ports(IN ftree_fabric_t * p_ftree,
2922219820Sjeff						 IN ftree_sw_t * p_sw)
2923219820Sjeff{
2924219820Sjeff	ftree_hca_t *p_remote_hca;
2925219820Sjeff	ftree_sw_t *p_remote_sw;
2926219820Sjeff	osm_node_t *p_node = p_sw->p_osm_sw->p_node;
2927219820Sjeff	osm_node_t *p_remote_node;
2928219820Sjeff	ib_net16_t remote_base_lid;
2929219820Sjeff	uint8_t remote_node_type;
2930219820Sjeff	ib_net64_t remote_node_guid;
2931219820Sjeff	osm_physp_t *p_remote_osm_port;
2932219820Sjeff	ftree_direction_t direction;
2933219820Sjeff	void *p_remote_hca_or_sw;
2934219820Sjeff	uint8_t i;
2935219820Sjeff	uint8_t remote_port_num;
2936219820Sjeff	int res = 0;
2937219820Sjeff
2938219820Sjeff	CL_ASSERT(osm_node_get_type(p_node) == IB_NODE_TYPE_SWITCH);
2939219820Sjeff
2940219820Sjeff	for (i = 1; i < osm_node_get_num_physp(p_node); i++) {
2941219820Sjeff		osm_physp_t *p_osm_port = osm_node_get_physp_ptr(p_node, i);
2942219820Sjeff		if (!p_osm_port || !osm_link_is_healthy(p_osm_port))
2943219820Sjeff			continue;
2944219820Sjeff
2945219820Sjeff		p_remote_osm_port = osm_physp_get_remote(p_osm_port);
2946219820Sjeff		if (!p_remote_osm_port)
2947219820Sjeff			continue;
2948219820Sjeff
2949219820Sjeff		p_remote_node =
2950219820Sjeff		    osm_node_get_remote_node(p_node, i, &remote_port_num);
2951219820Sjeff
2952219820Sjeff		/* ignore any loopback connection on switch */
2953219820Sjeff		if (p_node == p_remote_node) {
2954219820Sjeff			OSM_LOG(&p_ftree->p_osm->log, OSM_LOG_DEBUG,
2955219820Sjeff				"Ignoring loopback on switch GUID 0x%016" PRIx64
2956219820Sjeff				", LID %u, rank %u\n",
2957219820Sjeff				__osm_ftree_sw_get_guid_ho(p_sw),
2958219820Sjeff				cl_ntoh16(p_sw->base_lid),
2959219820Sjeff				p_sw->rank);
2960219820Sjeff			continue;
2961219820Sjeff		}
2962219820Sjeff
2963219820Sjeff		remote_node_type = osm_node_get_type(p_remote_node);
2964219820Sjeff		remote_node_guid = osm_node_get_node_guid(p_remote_node);
2965219820Sjeff
2966219820Sjeff		switch (remote_node_type) {
2967219820Sjeff		case IB_NODE_TYPE_ROUTER:
2968219820Sjeff			/* leaving this port - proceeding to the next one */
2969219820Sjeff			continue;
2970219820Sjeff
2971219820Sjeff		case IB_NODE_TYPE_CA:
2972219820Sjeff			/* switch connected to hca */
2973219820Sjeff
2974219820Sjeff			p_remote_hca =
2975219820Sjeff			    __osm_ftree_fabric_get_hca_by_guid(p_ftree,
2976219820Sjeff							       remote_node_guid);
2977219820Sjeff			CL_ASSERT(p_remote_hca);
2978219820Sjeff
2979219820Sjeff			p_remote_hca_or_sw = (void *)p_remote_hca;
2980219820Sjeff			direction = FTREE_DIRECTION_DOWN;
2981219820Sjeff
2982219820Sjeff			remote_base_lid =
2983219820Sjeff			    osm_physp_get_base_lid(p_remote_osm_port);
2984219820Sjeff			break;
2985219820Sjeff
2986219820Sjeff		case IB_NODE_TYPE_SWITCH:
2987219820Sjeff			/* switch connected to another switch */
2988219820Sjeff
2989219820Sjeff			p_remote_sw =
2990219820Sjeff			    __osm_ftree_fabric_get_sw_by_guid(p_ftree,
2991219820Sjeff							      remote_node_guid);
2992219820Sjeff			CL_ASSERT(p_remote_sw);
2993219820Sjeff
2994219820Sjeff			p_remote_hca_or_sw = (void *)p_remote_sw;
2995219820Sjeff
2996219820Sjeff			if (abs(p_sw->rank - p_remote_sw->rank) != 1) {
2997219820Sjeff				OSM_LOG(&p_ftree->p_osm->log, OSM_LOG_ERROR,
2998219820Sjeff					"ERR AB16: "
2999219820Sjeff					"Illegal link between switches with ranks %u and %u:\n"
3000219820Sjeff					"       GUID 0x%016" PRIx64
3001219820Sjeff					", LID %u, rank %u\n"
3002219820Sjeff					"       GUID 0x%016" PRIx64
3003219820Sjeff					", LID %u, rank %u\n", p_sw->rank,
3004219820Sjeff					p_remote_sw->rank,
3005219820Sjeff					__osm_ftree_sw_get_guid_ho(p_sw),
3006219820Sjeff					cl_ntoh16(p_sw->base_lid), p_sw->rank,
3007219820Sjeff					__osm_ftree_sw_get_guid_ho(p_remote_sw),
3008219820Sjeff					cl_ntoh16(p_remote_sw->base_lid),
3009219820Sjeff					p_remote_sw->rank);
3010219820Sjeff				res = -1;
3011219820Sjeff				goto Exit;
3012219820Sjeff			}
3013219820Sjeff
3014219820Sjeff			if (p_sw->rank > p_remote_sw->rank)
3015219820Sjeff				direction = FTREE_DIRECTION_UP;
3016219820Sjeff			else
3017219820Sjeff				direction = FTREE_DIRECTION_DOWN;
3018219820Sjeff
3019219820Sjeff			/* switch LID is only in port 0 port_info structure */
3020219820Sjeff			remote_base_lid =
3021219820Sjeff			    osm_node_get_base_lid(p_remote_node, 0);
3022219820Sjeff
3023219820Sjeff			break;
3024219820Sjeff
3025219820Sjeff		default:
3026219820Sjeff			OSM_LOG(&p_ftree->p_osm->log, OSM_LOG_ERROR,
3027219820Sjeff				"ERR AB13: Node GUID 0x%016" PRIx64
3028219820Sjeff				" - Unknown node type: %s\n",
3029219820Sjeff				cl_ntoh64(remote_node_guid),
3030219820Sjeff				ib_get_node_type_str(remote_node_type));
3031219820Sjeff			res = -1;
3032219820Sjeff			goto Exit;
3033219820Sjeff		}
3034219820Sjeff		__osm_ftree_sw_add_port(p_sw,	/* local ftree_sw object */
3035219820Sjeff					i,	/* local port number */
3036219820Sjeff					remote_port_num,	/* remote port number */
3037219820Sjeff					p_sw->base_lid,	/* local lid */
3038219820Sjeff					remote_base_lid,	/* remote lid */
3039219820Sjeff					osm_physp_get_port_guid(p_osm_port),	/* local port guid */
3040219820Sjeff					osm_physp_get_port_guid(p_remote_osm_port),	/* remote port guid */
3041219820Sjeff					remote_node_guid,	/* remote node guid */
3042219820Sjeff					remote_node_type,	/* remote node type */
3043219820Sjeff					p_remote_hca_or_sw,	/* remote ftree_hca/sw object */
3044219820Sjeff					direction);	/* port direction (up or down) */
3045219820Sjeff
3046219820Sjeff		/* Track the max lid (in host order) that exists in the fabric */
3047219820Sjeff		if (cl_ntoh16(remote_base_lid) > p_ftree->lft_max_lid_ho)
3048219820Sjeff			p_ftree->lft_max_lid_ho = cl_ntoh16(remote_base_lid);
3049219820Sjeff	}
3050219820Sjeff
3051219820SjeffExit:
3052219820Sjeff	return res;
3053219820Sjeff}				/* __osm_ftree_fabric_construct_sw_ports() */
3054219820Sjeff
3055219820Sjeff/***************************************************
3056219820Sjeff ***************************************************/
3057219820Sjeff
3058219820Sjeffstatic int __osm_ftree_fabric_rank_from_roots(IN ftree_fabric_t * p_ftree)
3059219820Sjeff{
3060219820Sjeff	osm_node_t *p_osm_node;
3061219820Sjeff	osm_node_t *p_remote_osm_node;
3062219820Sjeff	osm_physp_t *p_osm_physp;
3063219820Sjeff	ftree_sw_t *p_sw;
3064219820Sjeff	ftree_sw_t *p_remote_sw;
3065219820Sjeff	cl_list_t ranking_bfs_list;
3066219820Sjeff	struct guid_list_item *item;
3067219820Sjeff	int res = 0;
3068219820Sjeff	unsigned num_roots;
3069219820Sjeff	unsigned max_rank = 0;
3070219820Sjeff	unsigned i;
3071219820Sjeff
3072219820Sjeff	OSM_LOG_ENTER(&p_ftree->p_osm->log);
3073219820Sjeff	cl_list_init(&ranking_bfs_list, 10);
3074219820Sjeff
3075219820Sjeff	/* Rank all the roots and add them to list */
3076219820Sjeff	for (item = (void *)cl_qlist_head(&p_ftree->root_guid_list);
3077219820Sjeff	     item != (void *)cl_qlist_end(&p_ftree->root_guid_list);
3078219820Sjeff	     item = (void *)cl_qlist_next(&item->list)) {
3079219820Sjeff		p_sw =
3080219820Sjeff		    __osm_ftree_fabric_get_sw_by_guid(p_ftree,
3081219820Sjeff						      cl_hton64(item->guid));
3082219820Sjeff		if (!p_sw) {
3083219820Sjeff			/* the specified root guid wasn't found in the fabric */
3084219820Sjeff			OSM_LOG(&p_ftree->p_osm->log, OSM_LOG_ERROR, "ERR AB24: "
3085219820Sjeff				"Root switch GUID 0x%" PRIx64 " not found\n",
3086219820Sjeff				item->guid);
3087219820Sjeff			continue;
3088219820Sjeff		}
3089219820Sjeff
3090219820Sjeff		OSM_LOG(&p_ftree->p_osm->log, OSM_LOG_DEBUG,
3091219820Sjeff			"Ranking root switch with GUID 0x%" PRIx64 "\n",
3092219820Sjeff			item->guid);
3093219820Sjeff		p_sw->rank = 0;
3094219820Sjeff		cl_list_insert_tail(&ranking_bfs_list, p_sw);
3095219820Sjeff	}
3096219820Sjeff
3097219820Sjeff	num_roots = cl_list_count(&ranking_bfs_list);
3098219820Sjeff	if (!num_roots) {
3099219820Sjeff		OSM_LOG(&p_ftree->p_osm->log, OSM_LOG_ERROR, "ERR AB25: "
3100219820Sjeff			"No valid roots supplied\n");
3101219820Sjeff		res = -1;
3102219820Sjeff		goto Exit;
3103219820Sjeff	}
3104219820Sjeff
3105219820Sjeff	OSM_LOG(&p_ftree->p_osm->log, OSM_LOG_VERBOSE,
3106219820Sjeff		"Ranked %u valid root switches\n", num_roots);
3107219820Sjeff
3108219820Sjeff	/* Now the list has all the roots.
3109219820Sjeff	   BFS the subnet and update rank on all the switches. */
3110219820Sjeff
3111219820Sjeff	while (!cl_is_list_empty(&ranking_bfs_list)) {
3112219820Sjeff		p_sw = (ftree_sw_t *) cl_list_remove_head(&ranking_bfs_list);
3113219820Sjeff		p_osm_node = p_sw->p_osm_sw->p_node;
3114219820Sjeff
3115219820Sjeff		/* note: skipping port 0 on switches */
3116219820Sjeff		for (i = 1; i < osm_node_get_num_physp(p_osm_node); i++) {
3117219820Sjeff			p_osm_physp = osm_node_get_physp_ptr(p_osm_node, i);
3118219820Sjeff			if (!p_osm_physp  || !osm_link_is_healthy(p_osm_physp))
3119219820Sjeff				continue;
3120219820Sjeff
3121219820Sjeff			p_remote_osm_node =
3122219820Sjeff			    osm_node_get_remote_node(p_osm_node, i, NULL);
3123219820Sjeff			if (!p_remote_osm_node)
3124219820Sjeff				continue;
3125219820Sjeff
3126219820Sjeff			if (osm_node_get_type(p_remote_osm_node) !=
3127219820Sjeff			    IB_NODE_TYPE_SWITCH)
3128219820Sjeff				continue;
3129219820Sjeff
3130219820Sjeff			p_remote_sw = __osm_ftree_fabric_get_sw_by_guid(p_ftree,
3131219820Sjeff									osm_node_get_node_guid
3132219820Sjeff									(p_remote_osm_node));
3133219820Sjeff			CL_ASSERT(p_remote_sw);
3134219820Sjeff
3135219820Sjeff			/* if needed, rank the remote switch and add it to the BFS list */
3136219820Sjeff			if (__osm_ftree_sw_update_rank
3137219820Sjeff			    (p_remote_sw, p_sw->rank + 1)) {
3138219820Sjeff				OSM_LOG(&p_ftree->p_osm->log, OSM_LOG_DEBUG,
3139219820Sjeff					"Ranking switch 0x%" PRIx64
3140219820Sjeff					" with rank %u\n",
3141219820Sjeff					__osm_ftree_sw_get_guid_ho(p_remote_sw),
3142219820Sjeff					p_remote_sw->rank);
3143219820Sjeff				max_rank = p_remote_sw->rank;
3144219820Sjeff				cl_list_insert_tail(&ranking_bfs_list,
3145219820Sjeff						    p_remote_sw);
3146219820Sjeff			}
3147219820Sjeff		}
3148219820Sjeff		/* done with ports of this switch - go to the next switch in the list */
3149219820Sjeff	}
3150219820Sjeff
3151219820Sjeff	OSM_LOG(&p_ftree->p_osm->log, OSM_LOG_VERBOSE,
3152219820Sjeff		"Subnet ranking completed. Max Node Rank = %u\n", max_rank);
3153219820Sjeff
3154219820Sjeff	/* set FatTree maximal switch rank */
3155219820Sjeff	p_ftree->max_switch_rank = max_rank;
3156219820Sjeff
3157219820SjeffExit:
3158219820Sjeff	cl_list_destroy(&ranking_bfs_list);
3159219820Sjeff	OSM_LOG_EXIT(&p_ftree->p_osm->log);
3160219820Sjeff	return res;
3161219820Sjeff}				/* __osm_ftree_fabric_rank_from_roots() */
3162219820Sjeff
3163219820Sjeff/***************************************************
3164219820Sjeff ***************************************************/
3165219820Sjeff
3166219820Sjeffstatic int __osm_ftree_fabric_rank_from_hcas(IN ftree_fabric_t * p_ftree)
3167219820Sjeff{
3168219820Sjeff	ftree_hca_t *p_hca;
3169219820Sjeff	ftree_hca_t *p_next_hca;
3170219820Sjeff	cl_list_t ranking_bfs_list;
3171219820Sjeff	int res = 0;
3172219820Sjeff
3173219820Sjeff	OSM_LOG_ENTER(&p_ftree->p_osm->log);
3174219820Sjeff
3175219820Sjeff	cl_list_init(&ranking_bfs_list, 10);
3176219820Sjeff
3177219820Sjeff	/* Mark REVERSED rank of all the switches in the subnet.
3178219820Sjeff	   Start from switches that are connected to hca's, and
3179219820Sjeff	   scan all the switches in the subnet. */
3180219820Sjeff	p_next_hca = (ftree_hca_t *) cl_qmap_head(&p_ftree->hca_tbl);
3181219820Sjeff	while (p_next_hca != (ftree_hca_t *) cl_qmap_end(&p_ftree->hca_tbl)) {
3182219820Sjeff		p_hca = p_next_hca;
3183219820Sjeff		p_next_hca = (ftree_hca_t *) cl_qmap_next(&p_hca->map_item);
3184219820Sjeff		if (__osm_ftree_rank_leaf_switches
3185219820Sjeff		    (p_ftree, p_hca, &ranking_bfs_list) != 0) {
3186219820Sjeff			res = -1;
3187219820Sjeff			OSM_LOG(&p_ftree->p_osm->log, OSM_LOG_ERROR, "ERR AB14: "
3188219820Sjeff				"Subnet ranking failed - subnet is not FatTree");
3189219820Sjeff			goto Exit;
3190219820Sjeff		}
3191219820Sjeff	}
3192219820Sjeff
3193219820Sjeff	/* Now rank rest of the switches in the fabric, while the
3194219820Sjeff	   list already contains all the ranked leaf switches */
3195219820Sjeff	__osm_ftree_rank_switches_from_leafs(p_ftree, &ranking_bfs_list);
3196219820Sjeff
3197219820Sjeff	/* fix ranking of the switches by reversing the ranking direction */
3198219820Sjeff	cl_qmap_apply_func(&p_ftree->sw_tbl, __osm_ftree_sw_reverse_rank,
3199219820Sjeff			   (void *)p_ftree);
3200219820Sjeff
3201219820SjeffExit:
3202219820Sjeff	cl_list_destroy(&ranking_bfs_list);
3203219820Sjeff	OSM_LOG_EXIT(&p_ftree->p_osm->log);
3204219820Sjeff	return res;
3205219820Sjeff}				/* __osm_ftree_fabric_rank_from_hcas() */
3206219820Sjeff
3207219820Sjeff/***************************************************
3208219820Sjeff ***************************************************/
3209219820Sjeff
3210219820Sjeffstatic int __osm_ftree_fabric_rank(IN ftree_fabric_t * p_ftree)
3211219820Sjeff{
3212219820Sjeff	int res = 0;
3213219820Sjeff
3214219820Sjeff	OSM_LOG_ENTER(&p_ftree->p_osm->log);
3215219820Sjeff
3216219820Sjeff	if (__osm_ftree_fabric_roots_provided(p_ftree))
3217219820Sjeff		res = __osm_ftree_fabric_rank_from_roots(p_ftree);
3218219820Sjeff	else
3219219820Sjeff		res = __osm_ftree_fabric_rank_from_hcas(p_ftree);
3220219820Sjeff
3221219820Sjeff	if (res)
3222219820Sjeff		goto Exit;
3223219820Sjeff
3224219820Sjeff	OSM_LOG(&p_ftree->p_osm->log, OSM_LOG_INFO,
3225219820Sjeff		"FatTree max switch rank is %u\n", p_ftree->max_switch_rank);
3226219820Sjeff
3227219820SjeffExit:
3228219820Sjeff	OSM_LOG_EXIT(&p_ftree->p_osm->log);
3229219820Sjeff	return res;
3230219820Sjeff}				/* __osm_ftree_fabric_rank() */
3231219820Sjeff
3232219820Sjeff/***************************************************
3233219820Sjeff ***************************************************/
3234219820Sjeff
3235219820Sjeffstatic void __osm_ftree_fabric_set_leaf_rank(IN ftree_fabric_t * p_ftree)
3236219820Sjeff{
3237219820Sjeff	unsigned i;
3238219820Sjeff	ftree_sw_t *p_sw;
3239219820Sjeff	ftree_hca_t *p_hca = NULL;
3240219820Sjeff	ftree_hca_t *p_next_hca;
3241219820Sjeff
3242219820Sjeff	OSM_LOG_ENTER(&p_ftree->p_osm->log);
3243219820Sjeff
3244219820Sjeff	if (!__osm_ftree_fabric_roots_provided(p_ftree)) {
3245219820Sjeff		/* If root file is not provided, the fabric has to be pure fat-tree
3246219820Sjeff		   in terms of ranking. Thus, leaf switches rank is the max rank. */
3247219820Sjeff		p_ftree->leaf_switch_rank = p_ftree->max_switch_rank;
3248219820Sjeff	} else {
3249219820Sjeff		/* Find the first CN and set the leaf_switch_rank to the rank
3250219820Sjeff		   of the switch that is connected to this CN. Later we will
3251219820Sjeff		   ensure that all the leaf switches have the same rank. */
3252219820Sjeff		p_next_hca = (ftree_hca_t *) cl_qmap_head(&p_ftree->hca_tbl);
3253219820Sjeff		while (p_next_hca !=
3254219820Sjeff		       (ftree_hca_t *) cl_qmap_end(&p_ftree->hca_tbl)) {
3255219820Sjeff			p_hca = p_next_hca;
3256219820Sjeff			if (p_hca->cn_num)
3257219820Sjeff				break;
3258219820Sjeff			p_next_hca =
3259219820Sjeff			    (ftree_hca_t *) cl_qmap_next(&p_hca->map_item);
3260219820Sjeff		}
3261219820Sjeff		/* we know that there are CNs in the fabric, so just to be sure... */
3262219820Sjeff		CL_ASSERT(p_next_hca !=
3263219820Sjeff			  (ftree_hca_t *) cl_qmap_end(&p_ftree->hca_tbl));
3264219820Sjeff
3265219820Sjeff		OSM_LOG(&p_ftree->p_osm->log, OSM_LOG_DEBUG,
3266219820Sjeff			"Selected CN port GUID 0x%" PRIx64 "\n",
3267219820Sjeff			__osm_ftree_hca_get_guid_ho(p_hca));
3268219820Sjeff
3269219820Sjeff		for (i = 0; (i < p_hca->up_port_groups_num)
3270219820Sjeff		     && (!p_hca->up_port_groups[i]->is_cn); i++) ;
3271219820Sjeff		CL_ASSERT(i < p_hca->up_port_groups_num);
3272219820Sjeff		CL_ASSERT(p_hca->up_port_groups[i]->remote_node_type ==
3273219820Sjeff			  IB_NODE_TYPE_SWITCH);
3274219820Sjeff
3275219820Sjeff		p_sw = p_hca->up_port_groups[i]->remote_hca_or_sw.p_sw;
3276219820Sjeff		OSM_LOG(&p_ftree->p_osm->log, OSM_LOG_DEBUG,
3277219820Sjeff			"Selected leaf switch GUID 0x%" PRIx64 ", rank %u\n",
3278219820Sjeff			__osm_ftree_sw_get_guid_ho(p_sw), p_sw->rank);
3279219820Sjeff		p_ftree->leaf_switch_rank = p_sw->rank;
3280219820Sjeff	}
3281219820Sjeff
3282219820Sjeff	OSM_LOG(&p_ftree->p_osm->log, OSM_LOG_INFO,
3283219820Sjeff		"FatTree leaf switch rank is %u\n", p_ftree->leaf_switch_rank);
3284219820Sjeff	OSM_LOG_EXIT(&p_ftree->p_osm->log);
3285219820Sjeff}				/* __osm_ftree_fabric_set_leaf_rank() */
3286219820Sjeff
3287219820Sjeff/***************************************************
3288219820Sjeff ***************************************************/
3289219820Sjeff
3290219820Sjeffstatic int __osm_ftree_fabric_populate_ports(IN ftree_fabric_t * p_ftree)
3291219820Sjeff{
3292219820Sjeff	ftree_hca_t *p_hca;
3293219820Sjeff	ftree_hca_t *p_next_hca;
3294219820Sjeff	ftree_sw_t *p_sw;
3295219820Sjeff	ftree_sw_t *p_next_sw;
3296219820Sjeff	int res = 0;
3297219820Sjeff
3298219820Sjeff	OSM_LOG_ENTER(&p_ftree->p_osm->log);
3299219820Sjeff
3300219820Sjeff	p_next_hca = (ftree_hca_t *) cl_qmap_head(&p_ftree->hca_tbl);
3301219820Sjeff	while (p_next_hca != (ftree_hca_t *) cl_qmap_end(&p_ftree->hca_tbl)) {
3302219820Sjeff		p_hca = p_next_hca;
3303219820Sjeff		p_next_hca = (ftree_hca_t *) cl_qmap_next(&p_hca->map_item);
3304219820Sjeff		if (__osm_ftree_fabric_construct_hca_ports(p_ftree, p_hca) != 0) {
3305219820Sjeff			res = -1;
3306219820Sjeff			goto Exit;
3307219820Sjeff		}
3308219820Sjeff	}
3309219820Sjeff
3310219820Sjeff	p_next_sw = (ftree_sw_t *) cl_qmap_head(&p_ftree->sw_tbl);
3311219820Sjeff	while (p_next_sw != (ftree_sw_t *) cl_qmap_end(&p_ftree->sw_tbl)) {
3312219820Sjeff		p_sw = p_next_sw;
3313219820Sjeff		p_next_sw = (ftree_sw_t *) cl_qmap_next(&p_sw->map_item);
3314219820Sjeff		if (__osm_ftree_fabric_construct_sw_ports(p_ftree, p_sw) != 0) {
3315219820Sjeff			res = -1;
3316219820Sjeff			goto Exit;
3317219820Sjeff		}
3318219820Sjeff	}
3319219820SjeffExit:
3320219820Sjeff	OSM_LOG_EXIT(&p_ftree->p_osm->log);
3321219820Sjeff	return res;
3322219820Sjeff}				/* __osm_ftree_fabric_populate_ports() */
3323219820Sjeff
3324219820Sjeff/***************************************************
3325219820Sjeff ***************************************************/
3326219820Sjeffstatic int add_guid_item_to_list(void *cxt, uint64_t guid, char *p)
3327219820Sjeff{
3328219820Sjeff	cl_qlist_t *list = cxt;
3329219820Sjeff	struct guid_list_item *item;
3330219820Sjeff
3331219820Sjeff	item = malloc(sizeof(*item));
3332219820Sjeff	if (!item)
3333219820Sjeff		return -1;
3334219820Sjeff
3335219820Sjeff	item->guid = guid;
3336219820Sjeff	cl_qlist_insert_tail(list, &item->list);
3337219820Sjeff
3338219820Sjeff	return 0;
3339219820Sjeff}
3340219820Sjeff
3341219820Sjeffstatic int add_guid_item_to_map(void *cxt, uint64_t guid, char *p)
3342219820Sjeff{
3343219820Sjeff	cl_qmap_t *map = cxt;
3344219820Sjeff	name_map_item_t *item;
3345219820Sjeff
3346219820Sjeff	item = malloc(sizeof(*item));
3347219820Sjeff	if (!item)
3348219820Sjeff		return -1;
3349219820Sjeff
3350219820Sjeff	item->guid = guid;
3351219820Sjeff	cl_qmap_insert(map, guid, &item->item);
3352219820Sjeff
3353219820Sjeff	return 0;
3354219820Sjeff}
3355219820Sjeff
3356219820Sjeffstatic int __osm_ftree_fabric_read_guid_files(IN ftree_fabric_t * p_ftree)
3357219820Sjeff{
3358219820Sjeff	int status = 0;
3359219820Sjeff
3360219820Sjeff	OSM_LOG_ENTER(&p_ftree->p_osm->log);
3361219820Sjeff
3362219820Sjeff	if (__osm_ftree_fabric_roots_provided(p_ftree)) {
3363219820Sjeff		OSM_LOG(&p_ftree->p_osm->log, OSM_LOG_DEBUG,
3364219820Sjeff			"Fetching root nodes from file %s\n",
3365219820Sjeff			p_ftree->p_osm->subn.opt.root_guid_file);
3366219820Sjeff
3367219820Sjeff		if (parse_node_map(p_ftree->p_osm->subn.opt.root_guid_file,
3368219820Sjeff				   add_guid_item_to_list,
3369219820Sjeff				   &p_ftree->root_guid_list)) {
3370219820Sjeff			status = -1;
3371219820Sjeff			goto Exit;
3372219820Sjeff		}
3373219820Sjeff
3374219820Sjeff		if (!cl_qlist_count(&p_ftree->root_guid_list)) {
3375219820Sjeff			OSM_LOG(&p_ftree->p_osm->log, OSM_LOG_ERROR, "ERR AB22: "
3376219820Sjeff				"Root guids file has no valid guids\n");
3377219820Sjeff			status = -1;
3378219820Sjeff			goto Exit;
3379219820Sjeff		}
3380219820Sjeff	}
3381219820Sjeff
3382219820Sjeff	if (__osm_ftree_fabric_cns_provided(p_ftree)) {
3383219820Sjeff		OSM_LOG(&p_ftree->p_osm->log, OSM_LOG_DEBUG,
3384219820Sjeff			"Fetching compute nodes from file %s\n",
3385219820Sjeff			p_ftree->p_osm->subn.opt.cn_guid_file);
3386219820Sjeff
3387219820Sjeff		if (parse_node_map(p_ftree->p_osm->subn.opt.cn_guid_file,
3388219820Sjeff				   add_guid_item_to_map,
3389219820Sjeff				   &p_ftree->cn_guid_tbl)) {
3390219820Sjeff			status = -1;
3391219820Sjeff			goto Exit;
3392219820Sjeff		}
3393219820Sjeff
3394219820Sjeff		if (!cl_qmap_count(&p_ftree->cn_guid_tbl)) {
3395219820Sjeff			OSM_LOG(&p_ftree->p_osm->log, OSM_LOG_ERROR, "ERR AB23: "
3396219820Sjeff				"Compute node guids file has no valid guids\n");
3397219820Sjeff			status = -1;
3398219820Sjeff			goto Exit;
3399219820Sjeff		}
3400219820Sjeff	}
3401219820Sjeff
3402219820SjeffExit:
3403219820Sjeff	OSM_LOG_EXIT(&p_ftree->p_osm->log);
3404219820Sjeff	return status;
3405219820Sjeff} /*__osm_ftree_fabric_read_guid_files() */
3406219820Sjeff
3407219820Sjeff/***************************************************
3408219820Sjeff ***************************************************/
3409219820Sjeff
3410219820Sjeffstatic int __osm_ftree_construct_fabric(IN void *context)
3411219820Sjeff{
3412219820Sjeff	ftree_fabric_t *p_ftree = context;
3413219820Sjeff	int status = 0;
3414219820Sjeff
3415219820Sjeff	OSM_LOG_ENTER(&p_ftree->p_osm->log);
3416219820Sjeff
3417219820Sjeff	__osm_ftree_fabric_clear(p_ftree);
3418219820Sjeff
3419219820Sjeff	if (p_ftree->p_osm->subn.opt.lmc > 0) {
3420219820Sjeff		osm_log(&p_ftree->p_osm->log, OSM_LOG_SYS,
3421219820Sjeff			"LMC > 0 is not supported by fat-tree routing.\n"
3422219820Sjeff			"Falling back to default routing\n");
3423219820Sjeff		status = -1;
3424219820Sjeff		goto Exit;
3425219820Sjeff	}
3426219820Sjeff
3427219820Sjeff	if (cl_qmap_count(&p_ftree->p_osm->subn.sw_guid_tbl) < 2) {
3428219820Sjeff		osm_log(&p_ftree->p_osm->log, OSM_LOG_SYS,
3429219820Sjeff			"Fabric has %u switches - topology is not fat-tree.\n"
3430219820Sjeff			"Falling back to default routing\n",
3431219820Sjeff			cl_qmap_count(&p_ftree->p_osm->subn.sw_guid_tbl));
3432219820Sjeff		status = -1;
3433219820Sjeff		goto Exit;
3434219820Sjeff	}
3435219820Sjeff
3436219820Sjeff	if ((cl_qmap_count(&p_ftree->p_osm->subn.node_guid_tbl) -
3437219820Sjeff	     cl_qmap_count(&p_ftree->p_osm->subn.sw_guid_tbl)) < 2) {
3438219820Sjeff		osm_log(&p_ftree->p_osm->log, OSM_LOG_SYS,
3439219820Sjeff			"Fabric has %u nodes (%u switches) - topology is not fat-tree.\n"
3440219820Sjeff			"Falling back to default routing\n",
3441219820Sjeff			cl_qmap_count(&p_ftree->p_osm->subn.node_guid_tbl),
3442219820Sjeff			cl_qmap_count(&p_ftree->p_osm->subn.sw_guid_tbl));
3443219820Sjeff		status = -1;
3444219820Sjeff		goto Exit;
3445219820Sjeff	}
3446219820Sjeff
3447219820Sjeff	OSM_LOG(&p_ftree->p_osm->log, OSM_LOG_VERBOSE, "\n"
3448219820Sjeff		"                       |----------------------------------------|\n"
3449219820Sjeff		"                       |- Starting FatTree fabric construction -|\n"
3450219820Sjeff		"                       |----------------------------------------|\n\n");
3451219820Sjeff
3452219820Sjeff	OSM_LOG(&p_ftree->p_osm->log, OSM_LOG_VERBOSE,
3453219820Sjeff		"Populating FatTree Switch and CA tables\n");
3454219820Sjeff	if (__osm_ftree_fabric_populate_nodes(p_ftree) != 0) {
3455219820Sjeff		osm_log(&p_ftree->p_osm->log, OSM_LOG_SYS,
3456219820Sjeff			"Fabric topology is not fat-tree - "
3457219820Sjeff			"falling back to default routing\n");
3458219820Sjeff		status = -1;
3459219820Sjeff		goto Exit;
3460219820Sjeff	}
3461219820Sjeff
3462219820Sjeff	OSM_LOG(&p_ftree->p_osm->log, OSM_LOG_VERBOSE,
3463219820Sjeff		"Reading guid files provided by user\n");
3464219820Sjeff	if (__osm_ftree_fabric_read_guid_files(p_ftree) != 0) {
3465219820Sjeff		osm_log(&p_ftree->p_osm->log, OSM_LOG_SYS,
3466219820Sjeff			"Failed reading guid files - "
3467219820Sjeff			"falling back to default routing\n");
3468219820Sjeff		status = -1;
3469219820Sjeff		goto Exit;
3470219820Sjeff	}
3471219820Sjeff
3472219820Sjeff	if (cl_qmap_count(&p_ftree->hca_tbl) < 2) {
3473219820Sjeff		osm_log(&p_ftree->p_osm->log, OSM_LOG_SYS,
3474219820Sjeff			"Fabric has %u CAa - topology is not fat-tree.\n"
3475219820Sjeff			"Falling back to default routing\n",
3476219820Sjeff			cl_qmap_count(&p_ftree->hca_tbl));
3477219820Sjeff		status = -1;
3478219820Sjeff		goto Exit;
3479219820Sjeff	}
3480219820Sjeff
3481219820Sjeff	/* Rank all the switches in the fabric.
3482219820Sjeff	   After that we will know only fabric max switch rank.
3483219820Sjeff	   We will be able to check leaf switches rank and the
3484219820Sjeff	   whole tree rank after filling ports and marking CNs. */
3485219820Sjeff	OSM_LOG(&p_ftree->p_osm->log, OSM_LOG_VERBOSE, "Ranking FatTree\n");
3486219820Sjeff	if (__osm_ftree_fabric_rank(p_ftree) != 0) {
3487219820Sjeff		osm_log(&p_ftree->p_osm->log, OSM_LOG_SYS,
3488219820Sjeff			"Failed ranking the tree\n");
3489219820Sjeff		status = -1;
3490219820Sjeff		goto Exit;
3491219820Sjeff	}
3492219820Sjeff
3493219820Sjeff	/* For each hca and switch, construct array of ports.
3494219820Sjeff	   This is done after the whole FatTree data structure is ready,
3495219820Sjeff	   because we want the ports to have pointers to ftree_{sw,hca}_t
3496219820Sjeff	   objects, and we need the switches to be already ranked because
3497219820Sjeff	   that's how the port direction is determined. */
3498219820Sjeff	OSM_LOG(&p_ftree->p_osm->log, OSM_LOG_VERBOSE,
3499219820Sjeff		"Populating CA & switch ports\n");
3500219820Sjeff	if (__osm_ftree_fabric_populate_ports(p_ftree) != 0) {
3501219820Sjeff		osm_log(&p_ftree->p_osm->log, OSM_LOG_SYS,
3502219820Sjeff			"Fabric topology is not a fat-tree\n");
3503219820Sjeff		status = -1;
3504219820Sjeff		goto Exit;
3505219820Sjeff	} else if (p_ftree->cn_num == 0) {
3506219820Sjeff		osm_log(&p_ftree->p_osm->log, OSM_LOG_SYS,
3507219820Sjeff			"Fabric has no valid compute nodes\n");
3508219820Sjeff		status = -1;
3509219820Sjeff		goto Exit;
3510219820Sjeff	}
3511219820Sjeff
3512219820Sjeff	/* Now that the CA ports have been created and CNs were marked,
3513219820Sjeff	   we can complete the fabric ranking - set leaf switches rank. */
3514219820Sjeff	__osm_ftree_fabric_set_leaf_rank(p_ftree);
3515219820Sjeff
3516219820Sjeff	if (__osm_ftree_fabric_get_rank(p_ftree) > FAT_TREE_MAX_RANK ||
3517219820Sjeff	    __osm_ftree_fabric_get_rank(p_ftree) < FAT_TREE_MIN_RANK) {
3518219820Sjeff		osm_log(&p_ftree->p_osm->log, OSM_LOG_SYS,
3519219820Sjeff			"Fabric rank is %u (should be between %u and %u)\n",
3520219820Sjeff			__osm_ftree_fabric_get_rank(p_ftree), FAT_TREE_MIN_RANK,
3521219820Sjeff			FAT_TREE_MAX_RANK);
3522219820Sjeff		status = -1;
3523219820Sjeff		goto Exit;
3524219820Sjeff	}
3525219820Sjeff
3526219820Sjeff	/* Mark all the switches in the fabric with rank equal to
3527219820Sjeff	   p_ftree->leaf_switch_rank and that are also connected to CNs.
3528219820Sjeff	   As a by-product, this function also runs basic topology
3529219820Sjeff	   validation - it checks that all the CNs are at the same rank. */
3530219820Sjeff	if (__osm_ftree_fabric_mark_leaf_switches(p_ftree)) {
3531219820Sjeff		osm_log(&p_ftree->p_osm->log, OSM_LOG_SYS,
3532219820Sjeff			"Fabric topology is not a fat-tree\n");
3533219820Sjeff		status = -1;
3534219820Sjeff		goto Exit;
3535219820Sjeff	}
3536219820Sjeff
3537219820Sjeff	/* Assign index to all the switches in the fabric.
3538219820Sjeff	   This function also sorts leaf switch array by the switch index,
3539219820Sjeff	   sorts all the port arrays of the indexed switches by remote
3540219820Sjeff	   switch index, and creates switch-by-tuple table (sw_by_tuple_tbl) */
3541219820Sjeff	__osm_ftree_fabric_make_indexing(p_ftree);
3542219820Sjeff
3543219820Sjeff	/* Create leaf switch array sorted by index.
3544219820Sjeff	   This array contains switches with rank equal to p_ftree->leaf_switch_rank
3545219820Sjeff	   and that are also connected to CNs (REAL leafs), and it may contain
3546219820Sjeff	   switches at the same leaf rank w/o CNs, if this is the order of indexing.
3547219820Sjeff	   In any case, the first and the last switches in the array are REAL leafs. */
3548219820Sjeff	if (__osm_ftree_fabric_create_leaf_switch_array(p_ftree)) {
3549219820Sjeff		osm_log(&p_ftree->p_osm->log, OSM_LOG_SYS,
3550219820Sjeff			"Fabric topology is not a fat-tree\n");
3551219820Sjeff		status = -1;
3552219820Sjeff		goto Exit;
3553219820Sjeff	}
3554219820Sjeff
3555219820Sjeff	/* calculate and set ftree.max_cn_per_leaf field */
3556219820Sjeff	__osm_ftree_fabric_set_max_cn_per_leaf(p_ftree);
3557219820Sjeff
3558219820Sjeff	/* print general info about fabric topology */
3559219820Sjeff	__osm_ftree_fabric_dump_general_info(p_ftree);
3560219820Sjeff
3561219820Sjeff	/* dump full tree topology */
3562219820Sjeff	if (osm_log_is_active(&p_ftree->p_osm->log, OSM_LOG_DEBUG))
3563219820Sjeff		__osm_ftree_fabric_dump(p_ftree);
3564219820Sjeff
3565219820Sjeff	/* the fabric is required to be PURE fat-tree only if the root
3566219820Sjeff	   guid file hasn't been provided by user */
3567219820Sjeff	if (!__osm_ftree_fabric_roots_provided(p_ftree) &&
3568219820Sjeff	    !__osm_ftree_fabric_validate_topology(p_ftree)) {
3569219820Sjeff		osm_log(&p_ftree->p_osm->log, OSM_LOG_SYS,
3570219820Sjeff			"Fabric topology is not a fat-tree\n");
3571219820Sjeff		status = -1;
3572219820Sjeff		goto Exit;
3573219820Sjeff	}
3574219820Sjeff
3575219820Sjeff	OSM_LOG(&p_ftree->p_osm->log, OSM_LOG_VERBOSE,
3576219820Sjeff		"Max LID in switch LFTs: %u\n",
3577219820Sjeff		p_ftree->lft_max_lid_ho);
3578219820Sjeff
3579219820SjeffExit:
3580219820Sjeff	if (status != 0) {
3581219820Sjeff		OSM_LOG(&p_ftree->p_osm->log, OSM_LOG_VERBOSE,
3582219820Sjeff			"Clearing FatTree Fabric data structures\n");
3583219820Sjeff		__osm_ftree_fabric_clear(p_ftree);
3584219820Sjeff	} else
3585219820Sjeff		p_ftree->fabric_built = TRUE;
3586219820Sjeff
3587219820Sjeff	OSM_LOG(&p_ftree->p_osm->log, OSM_LOG_VERBOSE, "\n"
3588219820Sjeff		"                       |--------------------------------------------------|\n"
3589219820Sjeff		"                       |- Done constructing FatTree fabric (status = %d) -|\n"
3590219820Sjeff		"                       |--------------------------------------------------|\n\n",
3591219820Sjeff		status);
3592219820Sjeff
3593219820Sjeff	OSM_LOG_EXIT(&p_ftree->p_osm->log);
3594219820Sjeff	return status;
3595219820Sjeff}				/* __osm_ftree_construct_fabric() */
3596219820Sjeff
3597219820Sjeff/***************************************************
3598219820Sjeff ***************************************************/
3599219820Sjeff
3600219820Sjeffstatic int __osm_ftree_do_routing(IN void *context)
3601219820Sjeff{
3602219820Sjeff	ftree_fabric_t *p_ftree = context;
3603219820Sjeff	int status = 0;
3604219820Sjeff
3605219820Sjeff	OSM_LOG_ENTER(&p_ftree->p_osm->log);
3606219820Sjeff
3607219820Sjeff	if (!p_ftree->fabric_built) {
3608219820Sjeff		status = -1;
3609219820Sjeff		goto Exit;
3610219820Sjeff	}
3611219820Sjeff
3612219820Sjeff	OSM_LOG(&p_ftree->p_osm->log, OSM_LOG_VERBOSE,
3613219820Sjeff		"Starting FatTree routing\n");
3614219820Sjeff
3615219820Sjeff	OSM_LOG(&p_ftree->p_osm->log, OSM_LOG_VERBOSE,
3616219820Sjeff		"Filling switch forwarding tables for Compute Nodes\n");
3617219820Sjeff	__osm_ftree_fabric_route_to_cns(p_ftree);
3618219820Sjeff
3619219820Sjeff	OSM_LOG(&p_ftree->p_osm->log, OSM_LOG_VERBOSE,
3620219820Sjeff		"Filling switch forwarding tables for non-CN targets\n");
3621219820Sjeff	__osm_ftree_fabric_route_to_non_cns(p_ftree);
3622219820Sjeff
3623219820Sjeff	OSM_LOG(&p_ftree->p_osm->log, OSM_LOG_VERBOSE,
3624219820Sjeff		"Filling switch forwarding tables for switch-to-switch paths\n");
3625219820Sjeff	__osm_ftree_fabric_route_to_switches(p_ftree);
3626219820Sjeff
3627219820Sjeff	/* for each switch, set its fwd table */
3628219820Sjeff	cl_qmap_apply_func(&p_ftree->sw_tbl, __osm_ftree_set_sw_fwd_table,
3629219820Sjeff			   (void *)p_ftree);
3630219820Sjeff
3631219820Sjeff	/* write out hca ordering file */
3632219820Sjeff	__osm_ftree_fabric_dump_hca_ordering(p_ftree);
3633219820Sjeff
3634219820Sjeff	OSM_LOG(&p_ftree->p_osm->log, OSM_LOG_VERBOSE,
3635219820Sjeff		"FatTree routing is done\n");
3636219820Sjeff
3637219820SjeffExit:
3638219820Sjeff	OSM_LOG_EXIT(&p_ftree->p_osm->log);
3639219820Sjeff	return status;
3640219820Sjeff}
3641219820Sjeff
3642219820Sjeff/***************************************************
3643219820Sjeff ***************************************************/
3644219820Sjeff
3645219820Sjeffstatic void __osm_ftree_delete(IN void *context)
3646219820Sjeff{
3647219820Sjeff	if (!context)
3648219820Sjeff		return;
3649219820Sjeff	__osm_ftree_fabric_destroy((ftree_fabric_t *) context);
3650219820Sjeff}
3651219820Sjeff
3652219820Sjeff/***************************************************
3653219820Sjeff ***************************************************/
3654219820Sjeff
3655219820Sjeffint osm_ucast_ftree_setup(struct osm_routing_engine *r, osm_opensm_t * p_osm)
3656219820Sjeff{
3657219820Sjeff	ftree_fabric_t *p_ftree = __osm_ftree_fabric_create();
3658219820Sjeff	if (!p_ftree)
3659219820Sjeff		return -1;
3660219820Sjeff
3661219820Sjeff	p_ftree->p_osm = p_osm;
3662219820Sjeff
3663219820Sjeff	r->context = (void *)p_ftree;
3664219820Sjeff	r->build_lid_matrices = __osm_ftree_construct_fabric;
3665219820Sjeff	r->ucast_build_fwd_tables = __osm_ftree_do_routing;
3666219820Sjeff	r->delete = __osm_ftree_delete;
3667219820Sjeff
3668219820Sjeff	return 0;
3669219820Sjeff}
3670