1219820Sjeff/* 2219820Sjeff * Copyright (c) 2004-2008 Voltaire, Inc. All rights reserved. 3219820Sjeff * Copyright (c) 2002-2007 Mellanox Technologies LTD. All rights reserved. 4219820Sjeff * Copyright (c) 1996-2003 Intel Corporation. All rights reserved. 5219820Sjeff * 6219820Sjeff * This software is available to you under a choice of one of two 7219820Sjeff * licenses. You may choose to be licensed under the terms of the GNU 8219820Sjeff * General Public License (GPL) Version 2, available from the file 9219820Sjeff * COPYING in the main directory of this source tree, or the 10219820Sjeff * OpenIB.org BSD license below: 11219820Sjeff * 12219820Sjeff * Redistribution and use in source and binary forms, with or 13219820Sjeff * without modification, are permitted provided that the following 14219820Sjeff * conditions are met: 15219820Sjeff * 16219820Sjeff * - Redistributions of source code must retain the above 17219820Sjeff * copyright notice, this list of conditions and the following 18219820Sjeff * disclaimer. 19219820Sjeff * 20219820Sjeff * - Redistributions in binary form must reproduce the above 21219820Sjeff * copyright notice, this list of conditions and the following 22219820Sjeff * disclaimer in the documentation and/or other materials 23219820Sjeff * provided with the distribution. 24219820Sjeff * 25219820Sjeff * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 26219820Sjeff * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 27219820Sjeff * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 28219820Sjeff * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 29219820Sjeff * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 30219820Sjeff * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 31219820Sjeff * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 32219820Sjeff * SOFTWARE. 33219820Sjeff * 34219820Sjeff */ 35219820Sjeff 36219820Sjeff/* 37219820Sjeff * Abstract: 38219820Sjeff * Implementation of OpenSM FatTree routing 39219820Sjeff */ 40219820Sjeff 41219820Sjeff#if HAVE_CONFIG_H 42219820Sjeff# include <config.h> 43219820Sjeff#endif 44219820Sjeff 45219820Sjeff#include <stdlib.h> 46219820Sjeff#include <string.h> 47219820Sjeff#include <ctype.h> 48219820Sjeff#include <errno.h> 49219820Sjeff#include <iba/ib_types.h> 50219820Sjeff#include <complib/cl_qmap.h> 51219820Sjeff#include <complib/cl_debug.h> 52219820Sjeff#include <opensm/osm_opensm.h> 53219820Sjeff#include <opensm/osm_switch.h> 54219820Sjeff 55219820Sjeff/* 56219820Sjeff * FatTree rank is bounded between 2 and 8: 57219820Sjeff * - Tree of rank 1 has only trivial routing paths, 58219820Sjeff * so no need to use FatTree routing. 59219820Sjeff * - Why maximum rank is 8: 60219820Sjeff * Each node (switch) is assigned a unique tuple. 61219820Sjeff * Switches are stored in two cl_qmaps - one is 62219820Sjeff * ordered by guid, and the other by a key that is 63219820Sjeff * generated from tuple. Since cl_qmap supports only 64219820Sjeff * a 64-bit key, the maximal tuple lenght is 8 bytes. 65219820Sjeff * which means that maximal tree rank is 8. 66219820Sjeff * Note that the above also implies that each switch 67219820Sjeff * can have at max 255 up/down ports. 68219820Sjeff */ 69219820Sjeff 70219820Sjeff#define FAT_TREE_MIN_RANK 2 71219820Sjeff#define FAT_TREE_MAX_RANK 8 72219820Sjeff 73219820Sjefftypedef enum { 74219820Sjeff FTREE_DIRECTION_DOWN = -1, 75219820Sjeff FTREE_DIRECTION_SAME, 76219820Sjeff FTREE_DIRECTION_UP 77219820Sjeff} ftree_direction_t; 78219820Sjeff 79219820Sjeff/*************************************************** 80219820Sjeff ** 81219820Sjeff ** Forward references 82219820Sjeff ** 83219820Sjeff ***************************************************/ 84219820Sjeff 85219820Sjeffstruct ftree_sw_t_; 86219820Sjeffstruct ftree_hca_t_; 87219820Sjeffstruct ftree_port_t_; 88219820Sjeffstruct ftree_port_group_t_; 89219820Sjeffstruct ftree_fabric_t_; 90219820Sjeff 91219820Sjeff/*************************************************** 92219820Sjeff ** 93219820Sjeff ** ftree_tuple_t definition 94219820Sjeff ** 95219820Sjeff ***************************************************/ 96219820Sjeff 97219820Sjeff#define FTREE_TUPLE_BUFF_LEN 1024 98219820Sjeff#define FTREE_TUPLE_LEN 8 99219820Sjeff 100219820Sjefftypedef uint8_t ftree_tuple_t[FTREE_TUPLE_LEN]; 101219820Sjefftypedef uint64_t ftree_tuple_key_t; 102219820Sjeff 103219820Sjeffstruct guid_list_item { 104219820Sjeff cl_list_item_t list; 105219820Sjeff uint64_t guid; 106219820Sjeff}; 107219820Sjeff 108219820Sjeff/*************************************************** 109219820Sjeff ** 110219820Sjeff ** ftree_sw_table_element_t definition 111219820Sjeff ** 112219820Sjeff ***************************************************/ 113219820Sjeff 114219820Sjefftypedef struct { 115219820Sjeff cl_map_item_t map_item; 116219820Sjeff struct ftree_sw_t_ *p_sw; 117219820Sjeff} ftree_sw_tbl_element_t; 118219820Sjeff 119219820Sjeff/*************************************************** 120219820Sjeff ** 121219820Sjeff ** ftree_port_t definition 122219820Sjeff ** 123219820Sjeff ***************************************************/ 124219820Sjeff 125219820Sjefftypedef struct ftree_port_t_ { 126219820Sjeff cl_map_item_t map_item; 127219820Sjeff uint8_t port_num; /* port number on the current node */ 128219820Sjeff uint8_t remote_port_num; /* port number on the remote node */ 129219820Sjeff uint32_t counter_up; /* number of allocated routs upwards */ 130219820Sjeff uint32_t counter_down; /* number of allocated routs downwards */ 131219820Sjeff} ftree_port_t; 132219820Sjeff 133219820Sjeff/*************************************************** 134219820Sjeff ** 135219820Sjeff ** ftree_port_group_t definition 136219820Sjeff ** 137219820Sjeff ***************************************************/ 138219820Sjeff 139219820Sjefftypedef union ftree_hca_or_sw_ { 140219820Sjeff struct ftree_hca_t_ *p_hca; 141219820Sjeff struct ftree_sw_t_ *p_sw; 142219820Sjeff} ftree_hca_or_sw; 143219820Sjeff 144219820Sjefftypedef struct ftree_port_group_t_ { 145219820Sjeff cl_map_item_t map_item; 146219820Sjeff ib_net16_t base_lid; /* base lid of the current node */ 147219820Sjeff ib_net16_t remote_base_lid; /* base lid of the remote node */ 148219820Sjeff ib_net64_t port_guid; /* port guid of this port */ 149219820Sjeff ib_net64_t node_guid; /* this node's guid */ 150219820Sjeff uint8_t node_type; /* this node's type */ 151219820Sjeff ib_net64_t remote_port_guid; /* port guid of the remote port */ 152219820Sjeff ib_net64_t remote_node_guid; /* node guid of the remote node */ 153219820Sjeff uint8_t remote_node_type; /* IB_NODE_TYPE_{CA,SWITCH,ROUTER,...} */ 154219820Sjeff ftree_hca_or_sw hca_or_sw; /* pointer to this hca/switch */ 155219820Sjeff ftree_hca_or_sw remote_hca_or_sw; /* pointer to remote hca/switch */ 156219820Sjeff cl_ptr_vector_t ports; /* vector of ports to the same lid */ 157219820Sjeff boolean_t is_cn; /* whether this port is a compute node */ 158219820Sjeff uint32_t counter_down; /* number of allocated routs downwards */ 159219820Sjeff} ftree_port_group_t; 160219820Sjeff 161219820Sjeff/*************************************************** 162219820Sjeff ** 163219820Sjeff ** ftree_sw_t definition 164219820Sjeff ** 165219820Sjeff ***************************************************/ 166219820Sjeff 167219820Sjefftypedef struct ftree_sw_t_ { 168219820Sjeff cl_map_item_t map_item; 169219820Sjeff osm_switch_t *p_osm_sw; 170219820Sjeff uint32_t rank; 171219820Sjeff ftree_tuple_t tuple; 172219820Sjeff ib_net16_t base_lid; 173219820Sjeff ftree_port_group_t **down_port_groups; 174219820Sjeff uint8_t down_port_groups_num; 175219820Sjeff ftree_port_group_t **up_port_groups; 176219820Sjeff uint8_t up_port_groups_num; 177219820Sjeff boolean_t is_leaf; 178219820Sjeff int down_port_groups_idx; 179219820Sjeff} ftree_sw_t; 180219820Sjeff 181219820Sjeff/*************************************************** 182219820Sjeff ** 183219820Sjeff ** ftree_hca_t definition 184219820Sjeff ** 185219820Sjeff ***************************************************/ 186219820Sjeff 187219820Sjefftypedef struct ftree_hca_t_ { 188219820Sjeff cl_map_item_t map_item; 189219820Sjeff osm_node_t *p_osm_node; 190219820Sjeff ftree_port_group_t **up_port_groups; 191219820Sjeff uint16_t up_port_groups_num; 192219820Sjeff unsigned cn_num; 193219820Sjeff} ftree_hca_t; 194219820Sjeff 195219820Sjeff/*************************************************** 196219820Sjeff ** 197219820Sjeff ** ftree_fabric_t definition 198219820Sjeff ** 199219820Sjeff ***************************************************/ 200219820Sjeff 201219820Sjefftypedef struct ftree_fabric_t_ { 202219820Sjeff osm_opensm_t *p_osm; 203219820Sjeff cl_qmap_t hca_tbl; 204219820Sjeff cl_qmap_t sw_tbl; 205219820Sjeff cl_qmap_t sw_by_tuple_tbl; 206219820Sjeff cl_qlist_t root_guid_list; 207219820Sjeff cl_qmap_t cn_guid_tbl; 208219820Sjeff unsigned cn_num; 209219820Sjeff uint8_t leaf_switch_rank; 210219820Sjeff uint8_t max_switch_rank; 211219820Sjeff ftree_sw_t **leaf_switches; 212219820Sjeff uint32_t leaf_switches_num; 213219820Sjeff uint16_t max_cn_per_leaf; 214219820Sjeff uint16_t lft_max_lid_ho; 215219820Sjeff boolean_t fabric_built; 216219820Sjeff} ftree_fabric_t; 217219820Sjeff 218219820Sjeff/*************************************************** 219219820Sjeff ** 220219820Sjeff ** comparators 221219820Sjeff ** 222219820Sjeff ***************************************************/ 223219820Sjeff 224219820Sjeffstatic int OSM_CDECL __osm_ftree_compare_switches_by_index(IN const void *p1, 225219820Sjeff IN const void *p2) 226219820Sjeff{ 227219820Sjeff ftree_sw_t **pp_sw1 = (ftree_sw_t **) p1; 228219820Sjeff ftree_sw_t **pp_sw2 = (ftree_sw_t **) p2; 229219820Sjeff 230219820Sjeff uint16_t i; 231219820Sjeff for (i = 0; i < FTREE_TUPLE_LEN; i++) { 232219820Sjeff if ((*pp_sw1)->tuple[i] > (*pp_sw2)->tuple[i]) 233219820Sjeff return 1; 234219820Sjeff if ((*pp_sw1)->tuple[i] < (*pp_sw2)->tuple[i]) 235219820Sjeff return -1; 236219820Sjeff } 237219820Sjeff return 0; 238219820Sjeff} 239219820Sjeff 240219820Sjeff/***************************************************/ 241219820Sjeff 242219820Sjeffstatic int OSM_CDECL 243219820Sjeff__osm_ftree_compare_port_groups_by_remote_switch_index(IN const void *p1, 244219820Sjeff IN const void *p2) 245219820Sjeff{ 246219820Sjeff ftree_port_group_t **pp_g1 = (ftree_port_group_t **) p1; 247219820Sjeff ftree_port_group_t **pp_g2 = (ftree_port_group_t **) p2; 248219820Sjeff 249219820Sjeff return 250219820Sjeff __osm_ftree_compare_switches_by_index(& 251219820Sjeff ((*pp_g1)->remote_hca_or_sw. 252219820Sjeff p_sw), 253219820Sjeff &((*pp_g2)->remote_hca_or_sw. 254219820Sjeff p_sw)); 255219820Sjeff} 256219820Sjeff 257219820Sjeff/*************************************************** 258219820Sjeff ** 259219820Sjeff ** ftree_tuple_t functions 260219820Sjeff ** 261219820Sjeff ***************************************************/ 262219820Sjeff 263219820Sjeffstatic void __osm_ftree_tuple_init(IN ftree_tuple_t tuple) 264219820Sjeff{ 265219820Sjeff memset(tuple, 0xFF, FTREE_TUPLE_LEN); 266219820Sjeff} 267219820Sjeff 268219820Sjeff/***************************************************/ 269219820Sjeff 270219820Sjeffstatic inline boolean_t __osm_ftree_tuple_assigned(IN ftree_tuple_t tuple) 271219820Sjeff{ 272219820Sjeff return (tuple[0] != 0xFF); 273219820Sjeff} 274219820Sjeff 275219820Sjeff/***************************************************/ 276219820Sjeff 277219820Sjeff#define FTREE_TUPLE_BUFFERS_NUM 6 278219820Sjeff 279219820Sjeffstatic char *__osm_ftree_tuple_to_str(IN ftree_tuple_t tuple) 280219820Sjeff{ 281219820Sjeff static char buffer[FTREE_TUPLE_BUFFERS_NUM][FTREE_TUPLE_BUFF_LEN]; 282219820Sjeff static uint8_t ind = 0; 283219820Sjeff char *ret_buffer; 284219820Sjeff uint32_t i; 285219820Sjeff 286219820Sjeff if (!__osm_ftree_tuple_assigned(tuple)) 287219820Sjeff return "INDEX.NOT.ASSIGNED"; 288219820Sjeff 289219820Sjeff buffer[ind][0] = '\0'; 290219820Sjeff 291219820Sjeff for (i = 0; (i < FTREE_TUPLE_LEN) && (tuple[i] != 0xFF); i++) { 292219820Sjeff if ((strlen(buffer[ind]) + 10) > FTREE_TUPLE_BUFF_LEN) 293219820Sjeff return "INDEX.TOO.LONG"; 294219820Sjeff if (i != 0) 295219820Sjeff strcat(buffer[ind], "."); 296219820Sjeff sprintf(&buffer[ind][strlen(buffer[ind])], "%u", tuple[i]); 297219820Sjeff } 298219820Sjeff 299219820Sjeff ret_buffer = buffer[ind]; 300219820Sjeff ind = (ind + 1) % FTREE_TUPLE_BUFFERS_NUM; 301219820Sjeff return ret_buffer; 302219820Sjeff} /* __osm_ftree_tuple_to_str() */ 303219820Sjeff 304219820Sjeff/***************************************************/ 305219820Sjeff 306219820Sjeffstatic inline ftree_tuple_key_t __osm_ftree_tuple_to_key(IN ftree_tuple_t tuple) 307219820Sjeff{ 308219820Sjeff ftree_tuple_key_t key; 309219820Sjeff memcpy(&key, tuple, FTREE_TUPLE_LEN); 310219820Sjeff return key; 311219820Sjeff} 312219820Sjeff 313219820Sjeff/***************************************************/ 314219820Sjeff 315219820Sjeffstatic inline void __osm_ftree_tuple_from_key(IN ftree_tuple_t tuple, 316219820Sjeff IN ftree_tuple_key_t key) 317219820Sjeff{ 318219820Sjeff memcpy(tuple, &key, FTREE_TUPLE_LEN); 319219820Sjeff} 320219820Sjeff 321219820Sjeff/*************************************************** 322219820Sjeff ** 323219820Sjeff ** ftree_sw_tbl_element_t functions 324219820Sjeff ** 325219820Sjeff ***************************************************/ 326219820Sjeff 327219820Sjeffstatic ftree_sw_tbl_element_t *__osm_ftree_sw_tbl_element_create(IN ftree_sw_t * 328219820Sjeff p_sw) 329219820Sjeff{ 330219820Sjeff ftree_sw_tbl_element_t *p_element = 331219820Sjeff (ftree_sw_tbl_element_t *) malloc(sizeof(ftree_sw_tbl_element_t)); 332219820Sjeff if (!p_element) 333219820Sjeff return NULL; 334219820Sjeff memset(p_element, 0, sizeof(ftree_sw_tbl_element_t)); 335219820Sjeff 336219820Sjeff p_element->p_sw = p_sw; 337219820Sjeff return p_element; 338219820Sjeff} 339219820Sjeff 340219820Sjeff/***************************************************/ 341219820Sjeff 342219820Sjeffstatic void __osm_ftree_sw_tbl_element_destroy(IN ftree_sw_tbl_element_t * 343219820Sjeff p_element) 344219820Sjeff{ 345219820Sjeff if (!p_element) 346219820Sjeff return; 347219820Sjeff free(p_element); 348219820Sjeff} 349219820Sjeff 350219820Sjeff/*************************************************** 351219820Sjeff ** 352219820Sjeff ** ftree_port_t functions 353219820Sjeff ** 354219820Sjeff ***************************************************/ 355219820Sjeff 356219820Sjeffstatic ftree_port_t *__osm_ftree_port_create(IN uint8_t port_num, 357219820Sjeff IN uint8_t remote_port_num) 358219820Sjeff{ 359219820Sjeff ftree_port_t *p_port = (ftree_port_t *) malloc(sizeof(ftree_port_t)); 360219820Sjeff if (!p_port) 361219820Sjeff return NULL; 362219820Sjeff memset(p_port, 0, sizeof(ftree_port_t)); 363219820Sjeff 364219820Sjeff p_port->port_num = port_num; 365219820Sjeff p_port->remote_port_num = remote_port_num; 366219820Sjeff 367219820Sjeff return p_port; 368219820Sjeff} 369219820Sjeff 370219820Sjeff/***************************************************/ 371219820Sjeff 372219820Sjeffstatic void __osm_ftree_port_destroy(IN ftree_port_t * p_port) 373219820Sjeff{ 374219820Sjeff if (p_port) 375219820Sjeff free(p_port); 376219820Sjeff} 377219820Sjeff 378219820Sjeff/*************************************************** 379219820Sjeff ** 380219820Sjeff ** ftree_port_group_t functions 381219820Sjeff ** 382219820Sjeff ***************************************************/ 383219820Sjeff 384219820Sjeffstatic ftree_port_group_t * 385219820Sjeff__osm_ftree_port_group_create(IN ib_net16_t base_lid, 386219820Sjeff IN ib_net16_t remote_base_lid, 387219820Sjeff IN ib_net64_t port_guid, 388219820Sjeff IN ib_net64_t node_guid, 389219820Sjeff IN uint8_t node_type, 390219820Sjeff IN void *p_hca_or_sw, 391219820Sjeff IN ib_net64_t remote_port_guid, 392219820Sjeff IN ib_net64_t remote_node_guid, 393219820Sjeff IN uint8_t remote_node_type, 394219820Sjeff IN void *p_remote_hca_or_sw, 395219820Sjeff IN boolean_t is_cn) 396219820Sjeff{ 397219820Sjeff ftree_port_group_t *p_group = 398219820Sjeff (ftree_port_group_t *) malloc(sizeof(ftree_port_group_t)); 399219820Sjeff if (p_group == NULL) 400219820Sjeff return NULL; 401219820Sjeff memset(p_group, 0, sizeof(ftree_port_group_t)); 402219820Sjeff 403219820Sjeff p_group->base_lid = base_lid; 404219820Sjeff p_group->remote_base_lid = remote_base_lid; 405219820Sjeff memcpy(&p_group->port_guid, &port_guid, sizeof(ib_net64_t)); 406219820Sjeff memcpy(&p_group->node_guid, &node_guid, sizeof(ib_net64_t)); 407219820Sjeff memcpy(&p_group->remote_port_guid, &remote_port_guid, 408219820Sjeff sizeof(ib_net64_t)); 409219820Sjeff memcpy(&p_group->remote_node_guid, &remote_node_guid, 410219820Sjeff sizeof(ib_net64_t)); 411219820Sjeff 412219820Sjeff p_group->node_type = node_type; 413219820Sjeff switch (node_type) { 414219820Sjeff case IB_NODE_TYPE_CA: 415219820Sjeff p_group->hca_or_sw.p_hca = (ftree_hca_t *) p_hca_or_sw; 416219820Sjeff break; 417219820Sjeff case IB_NODE_TYPE_SWITCH: 418219820Sjeff p_group->hca_or_sw.p_sw = (ftree_sw_t *) p_hca_or_sw; 419219820Sjeff break; 420219820Sjeff default: 421219820Sjeff /* we shouldn't get here - port is created only in hca or switch */ 422219820Sjeff CL_ASSERT(0); 423219820Sjeff } 424219820Sjeff 425219820Sjeff p_group->remote_node_type = remote_node_type; 426219820Sjeff switch (remote_node_type) { 427219820Sjeff case IB_NODE_TYPE_CA: 428219820Sjeff p_group->remote_hca_or_sw.p_hca = 429219820Sjeff (ftree_hca_t *) p_remote_hca_or_sw; 430219820Sjeff break; 431219820Sjeff case IB_NODE_TYPE_SWITCH: 432219820Sjeff p_group->remote_hca_or_sw.p_sw = 433219820Sjeff (ftree_sw_t *) p_remote_hca_or_sw; 434219820Sjeff break; 435219820Sjeff default: 436219820Sjeff /* we shouldn't get here - port is created only in hca or switch */ 437219820Sjeff CL_ASSERT(0); 438219820Sjeff } 439219820Sjeff 440219820Sjeff cl_ptr_vector_init(&p_group->ports, 0, /* min size */ 441219820Sjeff 8); /* grow size */ 442219820Sjeff p_group->is_cn = is_cn; 443219820Sjeff return p_group; 444219820Sjeff} /* __osm_ftree_port_group_create() */ 445219820Sjeff 446219820Sjeff/***************************************************/ 447219820Sjeff 448219820Sjeffstatic void __osm_ftree_port_group_destroy(IN ftree_port_group_t * p_group) 449219820Sjeff{ 450219820Sjeff uint32_t i; 451219820Sjeff uint32_t size; 452219820Sjeff ftree_port_t *p_port; 453219820Sjeff 454219820Sjeff if (!p_group) 455219820Sjeff return; 456219820Sjeff 457219820Sjeff /* remove all the elements of p_group->ports vector */ 458219820Sjeff size = cl_ptr_vector_get_size(&p_group->ports); 459219820Sjeff for (i = 0; i < size; i++) { 460219820Sjeff cl_ptr_vector_at(&p_group->ports, i, (void *)&p_port); 461219820Sjeff __osm_ftree_port_destroy(p_port); 462219820Sjeff } 463219820Sjeff cl_ptr_vector_destroy(&p_group->ports); 464219820Sjeff free(p_group); 465219820Sjeff} /* __osm_ftree_port_group_destroy() */ 466219820Sjeff 467219820Sjeff/***************************************************/ 468219820Sjeff 469219820Sjeffstatic void 470219820Sjeff__osm_ftree_port_group_dump(IN ftree_fabric_t * p_ftree, 471219820Sjeff IN ftree_port_group_t * p_group, 472219820Sjeff IN ftree_direction_t direction) 473219820Sjeff{ 474219820Sjeff ftree_port_t *p_port; 475219820Sjeff uint32_t size; 476219820Sjeff uint32_t i; 477219820Sjeff char buff[10 * 1024]; 478219820Sjeff 479219820Sjeff if (!p_group) 480219820Sjeff return; 481219820Sjeff 482219820Sjeff if (!osm_log_is_active(&p_ftree->p_osm->log, OSM_LOG_DEBUG)) 483219820Sjeff return; 484219820Sjeff 485219820Sjeff size = cl_ptr_vector_get_size(&p_group->ports); 486219820Sjeff buff[0] = '\0'; 487219820Sjeff 488219820Sjeff for (i = 0; i < size; i++) { 489219820Sjeff cl_ptr_vector_at(&p_group->ports, i, (void *)&p_port); 490219820Sjeff CL_ASSERT(p_port); 491219820Sjeff 492219820Sjeff if (i != 0) 493219820Sjeff strcat(buff, ", "); 494219820Sjeff sprintf(buff + strlen(buff), "%u", p_port->port_num); 495219820Sjeff } 496219820Sjeff 497219820Sjeff OSM_LOG(&p_ftree->p_osm->log, OSM_LOG_DEBUG, 498219820Sjeff " Port Group of size %u, port(s): %s, direction: %s\n" 499219820Sjeff " Local <--> Remote GUID (LID):" 500219820Sjeff "0x%016" PRIx64 " (0x%04x) <--> 0x%016" PRIx64 " (0x%04x)\n", 501219820Sjeff size, 502219820Sjeff buff, 503219820Sjeff (direction == FTREE_DIRECTION_DOWN) ? "DOWN" : "UP", 504219820Sjeff cl_ntoh64(p_group->port_guid), 505219820Sjeff cl_ntoh16(p_group->base_lid), 506219820Sjeff cl_ntoh64(p_group->remote_port_guid), 507219820Sjeff cl_ntoh16(p_group->remote_base_lid)); 508219820Sjeff 509219820Sjeff} /* __osm_ftree_port_group_dump() */ 510219820Sjeff 511219820Sjeff/***************************************************/ 512219820Sjeff 513219820Sjeffstatic void 514219820Sjeff__osm_ftree_port_group_add_port(IN ftree_port_group_t * p_group, 515219820Sjeff IN uint8_t port_num, IN uint8_t remote_port_num) 516219820Sjeff{ 517219820Sjeff uint16_t i; 518219820Sjeff ftree_port_t *p_port; 519219820Sjeff 520219820Sjeff for (i = 0; i < cl_ptr_vector_get_size(&p_group->ports); i++) { 521219820Sjeff cl_ptr_vector_at(&p_group->ports, i, (void *)&p_port); 522219820Sjeff if (p_port->port_num == port_num) 523219820Sjeff return; 524219820Sjeff } 525219820Sjeff 526219820Sjeff p_port = __osm_ftree_port_create(port_num, remote_port_num); 527219820Sjeff cl_ptr_vector_insert(&p_group->ports, p_port, NULL); 528219820Sjeff} 529219820Sjeff 530219820Sjeff/*************************************************** 531219820Sjeff ** 532219820Sjeff ** ftree_sw_t functions 533219820Sjeff ** 534219820Sjeff ***************************************************/ 535219820Sjeff 536219820Sjeffstatic ftree_sw_t *__osm_ftree_sw_create(IN ftree_fabric_t * p_ftree, 537219820Sjeff IN osm_switch_t * p_osm_sw) 538219820Sjeff{ 539219820Sjeff ftree_sw_t *p_sw; 540219820Sjeff uint8_t ports_num; 541219820Sjeff 542219820Sjeff /* make sure that the switch has ports */ 543219820Sjeff if (p_osm_sw->num_ports == 1) 544219820Sjeff return NULL; 545219820Sjeff 546219820Sjeff p_sw = (ftree_sw_t *) malloc(sizeof(ftree_sw_t)); 547219820Sjeff if (p_sw == NULL) 548219820Sjeff return NULL; 549219820Sjeff memset(p_sw, 0, sizeof(ftree_sw_t)); 550219820Sjeff 551219820Sjeff p_sw->p_osm_sw = p_osm_sw; 552219820Sjeff p_sw->rank = 0xFFFFFFFF; 553219820Sjeff __osm_ftree_tuple_init(p_sw->tuple); 554219820Sjeff 555219820Sjeff p_sw->base_lid = osm_node_get_base_lid(p_sw->p_osm_sw->p_node, 0); 556219820Sjeff 557219820Sjeff ports_num = osm_node_get_num_physp(p_sw->p_osm_sw->p_node); 558219820Sjeff p_sw->down_port_groups = 559219820Sjeff (ftree_port_group_t **) malloc(ports_num * 560219820Sjeff sizeof(ftree_port_group_t *)); 561219820Sjeff p_sw->up_port_groups = 562219820Sjeff (ftree_port_group_t **) malloc(ports_num * 563219820Sjeff sizeof(ftree_port_group_t *)); 564219820Sjeff if (!p_sw->down_port_groups || !p_sw->up_port_groups) 565219820Sjeff return NULL; 566219820Sjeff p_sw->down_port_groups_num = 0; 567219820Sjeff p_sw->up_port_groups_num = 0; 568219820Sjeff 569219820Sjeff /* initialize lft buffer */ 570219820Sjeff memset(p_osm_sw->new_lft, OSM_NO_PATH, IB_LID_UCAST_END_HO + 1); 571219820Sjeff 572219820Sjeff p_sw->down_port_groups_idx = -1; 573219820Sjeff 574219820Sjeff return p_sw; 575219820Sjeff} /* __osm_ftree_sw_create() */ 576219820Sjeff 577219820Sjeff/***************************************************/ 578219820Sjeff 579219820Sjeffstatic void __osm_ftree_sw_destroy(IN ftree_fabric_t * p_ftree, 580219820Sjeff IN ftree_sw_t * p_sw) 581219820Sjeff{ 582219820Sjeff uint8_t i; 583219820Sjeff 584219820Sjeff if (!p_sw) 585219820Sjeff return; 586219820Sjeff 587219820Sjeff for (i = 0; i < p_sw->down_port_groups_num; i++) 588219820Sjeff __osm_ftree_port_group_destroy(p_sw->down_port_groups[i]); 589219820Sjeff for (i = 0; i < p_sw->up_port_groups_num; i++) 590219820Sjeff __osm_ftree_port_group_destroy(p_sw->up_port_groups[i]); 591219820Sjeff if (p_sw->down_port_groups) 592219820Sjeff free(p_sw->down_port_groups); 593219820Sjeff if (p_sw->up_port_groups) 594219820Sjeff free(p_sw->up_port_groups); 595219820Sjeff 596219820Sjeff free(p_sw); 597219820Sjeff} /* __osm_ftree_sw_destroy() */ 598219820Sjeff 599219820Sjeff/***************************************************/ 600219820Sjeff 601219820Sjeffstatic uint64_t __osm_ftree_sw_get_guid_no(IN ftree_sw_t * p_sw) 602219820Sjeff{ 603219820Sjeff if (!p_sw) 604219820Sjeff return 0; 605219820Sjeff return osm_node_get_node_guid(p_sw->p_osm_sw->p_node); 606219820Sjeff} 607219820Sjeff 608219820Sjeff/***************************************************/ 609219820Sjeff 610219820Sjeffstatic uint64_t __osm_ftree_sw_get_guid_ho(IN ftree_sw_t * p_sw) 611219820Sjeff{ 612219820Sjeff return cl_ntoh64(__osm_ftree_sw_get_guid_no(p_sw)); 613219820Sjeff} 614219820Sjeff 615219820Sjeff/***************************************************/ 616219820Sjeff 617219820Sjeffstatic void __osm_ftree_sw_dump(IN ftree_fabric_t * p_ftree, 618219820Sjeff IN ftree_sw_t * p_sw) 619219820Sjeff{ 620219820Sjeff uint32_t i; 621219820Sjeff 622219820Sjeff if (!p_sw) 623219820Sjeff return; 624219820Sjeff 625219820Sjeff if (!osm_log_is_active(&p_ftree->p_osm->log, OSM_LOG_DEBUG)) 626219820Sjeff return; 627219820Sjeff 628219820Sjeff OSM_LOG(&p_ftree->p_osm->log, OSM_LOG_DEBUG, 629219820Sjeff "Switch index: %s, GUID: 0x%016" PRIx64 630219820Sjeff ", Ports: %u DOWN, %u UP\n", 631219820Sjeff __osm_ftree_tuple_to_str(p_sw->tuple), 632219820Sjeff __osm_ftree_sw_get_guid_ho(p_sw), p_sw->down_port_groups_num, 633219820Sjeff p_sw->up_port_groups_num); 634219820Sjeff 635219820Sjeff for (i = 0; i < p_sw->down_port_groups_num; i++) 636219820Sjeff __osm_ftree_port_group_dump(p_ftree, 637219820Sjeff p_sw->down_port_groups[i], 638219820Sjeff FTREE_DIRECTION_DOWN); 639219820Sjeff for (i = 0; i < p_sw->up_port_groups_num; i++) 640219820Sjeff __osm_ftree_port_group_dump(p_ftree, p_sw->up_port_groups[i], 641219820Sjeff FTREE_DIRECTION_UP); 642219820Sjeff 643219820Sjeff} /* __osm_ftree_sw_dump() */ 644219820Sjeff 645219820Sjeff/***************************************************/ 646219820Sjeff 647219820Sjeffstatic boolean_t __osm_ftree_sw_ranked(IN ftree_sw_t * p_sw) 648219820Sjeff{ 649219820Sjeff return (p_sw->rank != 0xFFFFFFFF); 650219820Sjeff} 651219820Sjeff 652219820Sjeff/***************************************************/ 653219820Sjeff 654219820Sjeffstatic ftree_port_group_t * 655219820Sjeff__osm_ftree_sw_get_port_group_by_remote_lid(IN ftree_sw_t * p_sw, 656219820Sjeff IN ib_net16_t remote_base_lid, 657219820Sjeff IN ftree_direction_t direction) 658219820Sjeff{ 659219820Sjeff uint32_t i; 660219820Sjeff uint32_t size; 661219820Sjeff ftree_port_group_t **port_groups; 662219820Sjeff 663219820Sjeff if (direction == FTREE_DIRECTION_UP) { 664219820Sjeff port_groups = p_sw->up_port_groups; 665219820Sjeff size = p_sw->up_port_groups_num; 666219820Sjeff } else { 667219820Sjeff port_groups = p_sw->down_port_groups; 668219820Sjeff size = p_sw->down_port_groups_num; 669219820Sjeff } 670219820Sjeff 671219820Sjeff for (i = 0; i < size; i++) 672219820Sjeff if (remote_base_lid == port_groups[i]->remote_base_lid) 673219820Sjeff return port_groups[i]; 674219820Sjeff 675219820Sjeff return NULL; 676219820Sjeff} /* __osm_ftree_sw_get_port_group_by_remote_lid() */ 677219820Sjeff 678219820Sjeff/***************************************************/ 679219820Sjeff 680219820Sjeffstatic void 681219820Sjeff__osm_ftree_sw_add_port(IN ftree_sw_t * p_sw, 682219820Sjeff IN uint8_t port_num, 683219820Sjeff IN uint8_t remote_port_num, 684219820Sjeff IN ib_net16_t base_lid, 685219820Sjeff IN ib_net16_t remote_base_lid, 686219820Sjeff IN ib_net64_t port_guid, 687219820Sjeff IN ib_net64_t remote_port_guid, 688219820Sjeff IN ib_net64_t remote_node_guid, 689219820Sjeff IN uint8_t remote_node_type, 690219820Sjeff IN void *p_remote_hca_or_sw, 691219820Sjeff IN ftree_direction_t direction) 692219820Sjeff{ 693219820Sjeff ftree_port_group_t *p_group = 694219820Sjeff __osm_ftree_sw_get_port_group_by_remote_lid(p_sw, remote_base_lid, 695219820Sjeff direction); 696219820Sjeff 697219820Sjeff if (!p_group) { 698219820Sjeff p_group = __osm_ftree_port_group_create(base_lid, 699219820Sjeff remote_base_lid, 700219820Sjeff port_guid, 701219820Sjeff __osm_ftree_sw_get_guid_no 702219820Sjeff (p_sw), 703219820Sjeff IB_NODE_TYPE_SWITCH, 704219820Sjeff p_sw, remote_port_guid, 705219820Sjeff remote_node_guid, 706219820Sjeff remote_node_type, 707219820Sjeff p_remote_hca_or_sw, 708219820Sjeff FALSE); 709219820Sjeff CL_ASSERT(p_group); 710219820Sjeff 711219820Sjeff if (direction == FTREE_DIRECTION_UP) 712219820Sjeff p_sw->up_port_groups[p_sw->up_port_groups_num++] = 713219820Sjeff p_group; 714219820Sjeff else 715219820Sjeff p_sw->down_port_groups[p_sw->down_port_groups_num++] = 716219820Sjeff p_group; 717219820Sjeff } 718219820Sjeff __osm_ftree_port_group_add_port(p_group, port_num, remote_port_num); 719219820Sjeff 720219820Sjeff} /* __osm_ftree_sw_add_port() */ 721219820Sjeff 722219820Sjeff/***************************************************/ 723219820Sjeff 724219820Sjeffstatic inline cl_status_t 725219820Sjeff__osm_ftree_sw_set_hops(IN ftree_sw_t * p_sw, 726219820Sjeff IN uint16_t lid_ho, IN uint8_t port_num, 727219820Sjeff IN uint8_t hops) 728219820Sjeff{ 729219820Sjeff /* set local min hop table(LID) */ 730219820Sjeff return osm_switch_set_hops(p_sw->p_osm_sw, lid_ho, port_num, hops); 731219820Sjeff} 732219820Sjeff 733219820Sjeff/*************************************************** 734219820Sjeff ** 735219820Sjeff ** ftree_hca_t functions 736219820Sjeff ** 737219820Sjeff ***************************************************/ 738219820Sjeff 739219820Sjeffstatic ftree_hca_t *__osm_ftree_hca_create(IN osm_node_t * p_osm_node) 740219820Sjeff{ 741219820Sjeff ftree_hca_t *p_hca = (ftree_hca_t *) malloc(sizeof(ftree_hca_t)); 742219820Sjeff if (p_hca == NULL) 743219820Sjeff return NULL; 744219820Sjeff memset(p_hca, 0, sizeof(ftree_hca_t)); 745219820Sjeff 746219820Sjeff p_hca->p_osm_node = p_osm_node; 747219820Sjeff p_hca->up_port_groups = (ftree_port_group_t **) 748219820Sjeff malloc(osm_node_get_num_physp(p_hca->p_osm_node) * 749219820Sjeff sizeof(ftree_port_group_t *)); 750219820Sjeff if (!p_hca->up_port_groups) 751219820Sjeff return NULL; 752219820Sjeff p_hca->up_port_groups_num = 0; 753219820Sjeff return p_hca; 754219820Sjeff} 755219820Sjeff 756219820Sjeff/***************************************************/ 757219820Sjeff 758219820Sjeffstatic void __osm_ftree_hca_destroy(IN ftree_hca_t * p_hca) 759219820Sjeff{ 760219820Sjeff uint32_t i; 761219820Sjeff 762219820Sjeff if (!p_hca) 763219820Sjeff return; 764219820Sjeff 765219820Sjeff for (i = 0; i < p_hca->up_port_groups_num; i++) 766219820Sjeff __osm_ftree_port_group_destroy(p_hca->up_port_groups[i]); 767219820Sjeff 768219820Sjeff if (p_hca->up_port_groups) 769219820Sjeff free(p_hca->up_port_groups); 770219820Sjeff 771219820Sjeff free(p_hca); 772219820Sjeff} 773219820Sjeff 774219820Sjeff/***************************************************/ 775219820Sjeff 776219820Sjeffstatic uint64_t __osm_ftree_hca_get_guid_no(IN ftree_hca_t * p_hca) 777219820Sjeff{ 778219820Sjeff if (!p_hca) 779219820Sjeff return 0; 780219820Sjeff return osm_node_get_node_guid(p_hca->p_osm_node); 781219820Sjeff} 782219820Sjeff 783219820Sjeff/***************************************************/ 784219820Sjeff 785219820Sjeffstatic uint64_t __osm_ftree_hca_get_guid_ho(IN ftree_hca_t * p_hca) 786219820Sjeff{ 787219820Sjeff return cl_ntoh64(__osm_ftree_hca_get_guid_no(p_hca)); 788219820Sjeff} 789219820Sjeff 790219820Sjeff/***************************************************/ 791219820Sjeff 792219820Sjeffstatic void __osm_ftree_hca_dump(IN ftree_fabric_t * p_ftree, 793219820Sjeff IN ftree_hca_t * p_hca) 794219820Sjeff{ 795219820Sjeff uint32_t i; 796219820Sjeff 797219820Sjeff if (!p_hca) 798219820Sjeff return; 799219820Sjeff 800219820Sjeff if (!osm_log_is_active(&p_ftree->p_osm->log, OSM_LOG_DEBUG)) 801219820Sjeff return; 802219820Sjeff 803219820Sjeff OSM_LOG(&p_ftree->p_osm->log, OSM_LOG_DEBUG, 804219820Sjeff "CA GUID: 0x%016" PRIx64 ", Ports: %u UP\n", 805219820Sjeff __osm_ftree_hca_get_guid_ho(p_hca), p_hca->up_port_groups_num); 806219820Sjeff 807219820Sjeff for (i = 0; i < p_hca->up_port_groups_num; i++) 808219820Sjeff __osm_ftree_port_group_dump(p_ftree, p_hca->up_port_groups[i], 809219820Sjeff FTREE_DIRECTION_UP); 810219820Sjeff} 811219820Sjeff 812219820Sjeff/***************************************************/ 813219820Sjeff 814219820Sjeffstatic ftree_port_group_t * 815219820Sjeff__osm_ftree_hca_get_port_group_by_remote_lid(IN ftree_hca_t * p_hca, 816219820Sjeff IN ib_net16_t remote_base_lid) 817219820Sjeff{ 818219820Sjeff uint32_t i; 819219820Sjeff for (i = 0; i < p_hca->up_port_groups_num; i++) 820219820Sjeff if (remote_base_lid == 821219820Sjeff p_hca->up_port_groups[i]->remote_base_lid) 822219820Sjeff return p_hca->up_port_groups[i]; 823219820Sjeff 824219820Sjeff return NULL; 825219820Sjeff} 826219820Sjeff 827219820Sjeff/***************************************************/ 828219820Sjeff 829219820Sjeffstatic void 830219820Sjeff__osm_ftree_hca_add_port(IN ftree_hca_t * p_hca, 831219820Sjeff IN uint8_t port_num, 832219820Sjeff IN uint8_t remote_port_num, 833219820Sjeff IN ib_net16_t base_lid, 834219820Sjeff IN ib_net16_t remote_base_lid, 835219820Sjeff IN ib_net64_t port_guid, 836219820Sjeff IN ib_net64_t remote_port_guid, 837219820Sjeff IN ib_net64_t remote_node_guid, 838219820Sjeff IN uint8_t remote_node_type, 839219820Sjeff IN void *p_remote_hca_or_sw, IN boolean_t is_cn) 840219820Sjeff{ 841219820Sjeff ftree_port_group_t *p_group; 842219820Sjeff 843219820Sjeff /* this function is supposed to be called only for adding ports 844219820Sjeff in hca's that lead to switches */ 845219820Sjeff CL_ASSERT(remote_node_type == IB_NODE_TYPE_SWITCH); 846219820Sjeff 847219820Sjeff p_group = 848219820Sjeff __osm_ftree_hca_get_port_group_by_remote_lid(p_hca, 849219820Sjeff remote_base_lid); 850219820Sjeff 851219820Sjeff if (!p_group) { 852219820Sjeff p_group = __osm_ftree_port_group_create(base_lid, 853219820Sjeff remote_base_lid, 854219820Sjeff port_guid, 855219820Sjeff __osm_ftree_hca_get_guid_no 856219820Sjeff (p_hca), 857219820Sjeff IB_NODE_TYPE_CA, p_hca, 858219820Sjeff remote_port_guid, 859219820Sjeff remote_node_guid, 860219820Sjeff remote_node_type, 861219820Sjeff p_remote_hca_or_sw, 862219820Sjeff is_cn); 863219820Sjeff p_hca->up_port_groups[p_hca->up_port_groups_num++] = p_group; 864219820Sjeff } 865219820Sjeff __osm_ftree_port_group_add_port(p_group, port_num, remote_port_num); 866219820Sjeff 867219820Sjeff} /* __osm_ftree_hca_add_port() */ 868219820Sjeff 869219820Sjeff/*************************************************** 870219820Sjeff ** 871219820Sjeff ** ftree_fabric_t functions 872219820Sjeff ** 873219820Sjeff ***************************************************/ 874219820Sjeff 875219820Sjeffstatic ftree_fabric_t *__osm_ftree_fabric_create() 876219820Sjeff{ 877219820Sjeff ftree_fabric_t *p_ftree = 878219820Sjeff (ftree_fabric_t *) malloc(sizeof(ftree_fabric_t)); 879219820Sjeff if (p_ftree == NULL) 880219820Sjeff return NULL; 881219820Sjeff 882219820Sjeff memset(p_ftree, 0, sizeof(ftree_fabric_t)); 883219820Sjeff 884219820Sjeff cl_qmap_init(&p_ftree->hca_tbl); 885219820Sjeff cl_qmap_init(&p_ftree->sw_tbl); 886219820Sjeff cl_qmap_init(&p_ftree->sw_by_tuple_tbl); 887219820Sjeff cl_qmap_init(&p_ftree->cn_guid_tbl); 888219820Sjeff 889219820Sjeff cl_qlist_init(&p_ftree->root_guid_list); 890219820Sjeff 891219820Sjeff return p_ftree; 892219820Sjeff} 893219820Sjeff 894219820Sjeff/***************************************************/ 895219820Sjeff 896219820Sjeffstatic void __osm_ftree_fabric_clear(ftree_fabric_t * p_ftree) 897219820Sjeff{ 898219820Sjeff ftree_hca_t *p_hca; 899219820Sjeff ftree_hca_t *p_next_hca; 900219820Sjeff ftree_sw_t *p_sw; 901219820Sjeff ftree_sw_t *p_next_sw; 902219820Sjeff ftree_sw_tbl_element_t *p_element; 903219820Sjeff ftree_sw_tbl_element_t *p_next_element; 904219820Sjeff name_map_item_t *p_guid_element, *p_next_guid_element; 905219820Sjeff 906219820Sjeff if (!p_ftree) 907219820Sjeff return; 908219820Sjeff 909219820Sjeff /* remove all the elements of hca_tbl */ 910219820Sjeff 911219820Sjeff p_next_hca = (ftree_hca_t *) cl_qmap_head(&p_ftree->hca_tbl); 912219820Sjeff while (p_next_hca != (ftree_hca_t *) cl_qmap_end(&p_ftree->hca_tbl)) { 913219820Sjeff p_hca = p_next_hca; 914219820Sjeff p_next_hca = (ftree_hca_t *) cl_qmap_next(&p_hca->map_item); 915219820Sjeff __osm_ftree_hca_destroy(p_hca); 916219820Sjeff } 917219820Sjeff cl_qmap_remove_all(&p_ftree->hca_tbl); 918219820Sjeff 919219820Sjeff /* remove all the elements of sw_tbl */ 920219820Sjeff 921219820Sjeff p_next_sw = (ftree_sw_t *) cl_qmap_head(&p_ftree->sw_tbl); 922219820Sjeff while (p_next_sw != (ftree_sw_t *) cl_qmap_end(&p_ftree->sw_tbl)) { 923219820Sjeff p_sw = p_next_sw; 924219820Sjeff p_next_sw = (ftree_sw_t *) cl_qmap_next(&p_sw->map_item); 925219820Sjeff __osm_ftree_sw_destroy(p_ftree, p_sw); 926219820Sjeff } 927219820Sjeff cl_qmap_remove_all(&p_ftree->sw_tbl); 928219820Sjeff 929219820Sjeff /* remove all the elements of sw_by_tuple_tbl */ 930219820Sjeff 931219820Sjeff p_next_element = 932219820Sjeff (ftree_sw_tbl_element_t *) cl_qmap_head(&p_ftree->sw_by_tuple_tbl); 933219820Sjeff while (p_next_element != 934219820Sjeff (ftree_sw_tbl_element_t *) cl_qmap_end(&p_ftree-> 935219820Sjeff sw_by_tuple_tbl)) { 936219820Sjeff p_element = p_next_element; 937219820Sjeff p_next_element = 938219820Sjeff (ftree_sw_tbl_element_t *) cl_qmap_next(&p_element-> 939219820Sjeff map_item); 940219820Sjeff __osm_ftree_sw_tbl_element_destroy(p_element); 941219820Sjeff } 942219820Sjeff cl_qmap_remove_all(&p_ftree->sw_by_tuple_tbl); 943219820Sjeff 944219820Sjeff /* remove all the elements of cn_guid_tbl */ 945219820Sjeff p_next_guid_element = 946219820Sjeff (name_map_item_t *) cl_qmap_head(&p_ftree->cn_guid_tbl); 947219820Sjeff while (p_next_guid_element != 948219820Sjeff (name_map_item_t *) cl_qmap_end(&p_ftree->cn_guid_tbl)) { 949219820Sjeff p_guid_element = p_next_guid_element; 950219820Sjeff p_next_guid_element = 951219820Sjeff (name_map_item_t *) cl_qmap_next(&p_guid_element->item); 952219820Sjeff free(p_guid_element); 953219820Sjeff } 954219820Sjeff cl_qmap_remove_all(&p_ftree->cn_guid_tbl); 955219820Sjeff 956219820Sjeff /* remove all the elements of root_guid_list */ 957219820Sjeff while (!cl_is_qlist_empty(&p_ftree->root_guid_list)) 958219820Sjeff free(cl_qlist_remove_head(&p_ftree->root_guid_list)); 959219820Sjeff 960219820Sjeff /* free the leaf switches array */ 961219820Sjeff if ((p_ftree->leaf_switches_num > 0) && (p_ftree->leaf_switches)) 962219820Sjeff free(p_ftree->leaf_switches); 963219820Sjeff 964219820Sjeff p_ftree->leaf_switches_num = 0; 965219820Sjeff p_ftree->cn_num = 0; 966219820Sjeff p_ftree->leaf_switch_rank = 0; 967219820Sjeff p_ftree->max_switch_rank = 0; 968219820Sjeff p_ftree->max_cn_per_leaf = 0; 969219820Sjeff p_ftree->lft_max_lid_ho = 0; 970219820Sjeff p_ftree->leaf_switches = NULL; 971219820Sjeff p_ftree->fabric_built = FALSE; 972219820Sjeff 973219820Sjeff} /* __osm_ftree_fabric_destroy() */ 974219820Sjeff 975219820Sjeff/***************************************************/ 976219820Sjeff 977219820Sjeffstatic void __osm_ftree_fabric_destroy(ftree_fabric_t * p_ftree) 978219820Sjeff{ 979219820Sjeff if (!p_ftree) 980219820Sjeff return; 981219820Sjeff __osm_ftree_fabric_clear(p_ftree); 982219820Sjeff free(p_ftree); 983219820Sjeff} 984219820Sjeff 985219820Sjeff/***************************************************/ 986219820Sjeff 987219820Sjeffstatic uint8_t __osm_ftree_fabric_get_rank(ftree_fabric_t * p_ftree) 988219820Sjeff{ 989219820Sjeff return p_ftree->leaf_switch_rank + 1; 990219820Sjeff} 991219820Sjeff 992219820Sjeff/***************************************************/ 993219820Sjeff 994219820Sjeffstatic void __osm_ftree_fabric_add_hca(ftree_fabric_t * p_ftree, 995219820Sjeff osm_node_t * p_osm_node) 996219820Sjeff{ 997219820Sjeff ftree_hca_t *p_hca = __osm_ftree_hca_create(p_osm_node); 998219820Sjeff 999219820Sjeff CL_ASSERT(osm_node_get_type(p_osm_node) == IB_NODE_TYPE_CA); 1000219820Sjeff 1001219820Sjeff cl_qmap_insert(&p_ftree->hca_tbl, p_osm_node->node_info.node_guid, 1002219820Sjeff &p_hca->map_item); 1003219820Sjeff} 1004219820Sjeff 1005219820Sjeff/***************************************************/ 1006219820Sjeff 1007219820Sjeffstatic void __osm_ftree_fabric_add_sw(ftree_fabric_t * p_ftree, 1008219820Sjeff osm_switch_t * p_osm_sw) 1009219820Sjeff{ 1010219820Sjeff ftree_sw_t *p_sw = __osm_ftree_sw_create(p_ftree, p_osm_sw); 1011219820Sjeff 1012219820Sjeff CL_ASSERT(osm_node_get_type(p_osm_sw->p_node) == IB_NODE_TYPE_SWITCH); 1013219820Sjeff 1014219820Sjeff cl_qmap_insert(&p_ftree->sw_tbl, p_osm_sw->p_node->node_info.node_guid, 1015219820Sjeff &p_sw->map_item); 1016219820Sjeff 1017219820Sjeff /* track the max lid (in host order) that exists in the fabric */ 1018219820Sjeff if (cl_ntoh16(p_sw->base_lid) > p_ftree->lft_max_lid_ho) 1019219820Sjeff p_ftree->lft_max_lid_ho = cl_ntoh16(p_sw->base_lid); 1020219820Sjeff} 1021219820Sjeff 1022219820Sjeff/***************************************************/ 1023219820Sjeff 1024219820Sjeffstatic void __osm_ftree_fabric_add_sw_by_tuple(IN ftree_fabric_t * p_ftree, 1025219820Sjeff IN ftree_sw_t * p_sw) 1026219820Sjeff{ 1027219820Sjeff CL_ASSERT(__osm_ftree_tuple_assigned(p_sw->tuple)); 1028219820Sjeff 1029219820Sjeff cl_qmap_insert(&p_ftree->sw_by_tuple_tbl, 1030219820Sjeff __osm_ftree_tuple_to_key(p_sw->tuple), 1031219820Sjeff &__osm_ftree_sw_tbl_element_create(p_sw)->map_item); 1032219820Sjeff} 1033219820Sjeff 1034219820Sjeff/***************************************************/ 1035219820Sjeff 1036219820Sjeffstatic ftree_sw_t *__osm_ftree_fabric_get_sw_by_tuple(IN ftree_fabric_t * 1037219820Sjeff p_ftree, 1038219820Sjeff IN ftree_tuple_t tuple) 1039219820Sjeff{ 1040219820Sjeff ftree_sw_tbl_element_t *p_element; 1041219820Sjeff 1042219820Sjeff CL_ASSERT(__osm_ftree_tuple_assigned(tuple)); 1043219820Sjeff 1044219820Sjeff __osm_ftree_tuple_to_key(tuple); 1045219820Sjeff 1046219820Sjeff p_element = 1047219820Sjeff (ftree_sw_tbl_element_t *) cl_qmap_get(&p_ftree->sw_by_tuple_tbl, 1048219820Sjeff __osm_ftree_tuple_to_key 1049219820Sjeff (tuple)); 1050219820Sjeff if (p_element == 1051219820Sjeff (ftree_sw_tbl_element_t *) cl_qmap_end(&p_ftree->sw_by_tuple_tbl)) 1052219820Sjeff return NULL; 1053219820Sjeff 1054219820Sjeff return p_element->p_sw; 1055219820Sjeff} 1056219820Sjeff 1057219820Sjeff/***************************************************/ 1058219820Sjeff 1059219820Sjeffstatic ftree_sw_t *__osm_ftree_fabric_get_sw_by_guid(IN ftree_fabric_t * 1060219820Sjeff p_ftree, IN uint64_t guid) 1061219820Sjeff{ 1062219820Sjeff ftree_sw_t *p_sw; 1063219820Sjeff p_sw = (ftree_sw_t *) cl_qmap_get(&p_ftree->sw_tbl, guid); 1064219820Sjeff if (p_sw == (ftree_sw_t *) cl_qmap_end(&p_ftree->sw_tbl)) 1065219820Sjeff return NULL; 1066219820Sjeff return p_sw; 1067219820Sjeff} 1068219820Sjeff 1069219820Sjeff/***************************************************/ 1070219820Sjeff 1071219820Sjeffstatic ftree_hca_t *__osm_ftree_fabric_get_hca_by_guid(IN ftree_fabric_t * 1072219820Sjeff p_ftree, 1073219820Sjeff IN uint64_t guid) 1074219820Sjeff{ 1075219820Sjeff ftree_hca_t *p_hca; 1076219820Sjeff p_hca = (ftree_hca_t *) cl_qmap_get(&p_ftree->hca_tbl, guid); 1077219820Sjeff if (p_hca == (ftree_hca_t *) cl_qmap_end(&p_ftree->hca_tbl)) 1078219820Sjeff return NULL; 1079219820Sjeff return p_hca; 1080219820Sjeff} 1081219820Sjeff 1082219820Sjeff/***************************************************/ 1083219820Sjeff 1084219820Sjeffstatic void __osm_ftree_fabric_dump(ftree_fabric_t * p_ftree) 1085219820Sjeff{ 1086219820Sjeff uint32_t i; 1087219820Sjeff ftree_hca_t *p_hca; 1088219820Sjeff ftree_sw_t *p_sw; 1089219820Sjeff 1090219820Sjeff if (!osm_log_is_active(&p_ftree->p_osm->log, OSM_LOG_DEBUG)) 1091219820Sjeff return; 1092219820Sjeff 1093219820Sjeff OSM_LOG(&p_ftree->p_osm->log, OSM_LOG_DEBUG, "\n" 1094219820Sjeff " |-------------------------------|\n" 1095219820Sjeff " |- Full fabric topology dump -|\n" 1096219820Sjeff " |-------------------------------|\n\n"); 1097219820Sjeff 1098219820Sjeff OSM_LOG(&p_ftree->p_osm->log, OSM_LOG_DEBUG, "-- CAs:\n"); 1099219820Sjeff 1100219820Sjeff for (p_hca = (ftree_hca_t *) cl_qmap_head(&p_ftree->hca_tbl); 1101219820Sjeff p_hca != (ftree_hca_t *) cl_qmap_end(&p_ftree->hca_tbl); 1102219820Sjeff p_hca = (ftree_hca_t *) cl_qmap_next(&p_hca->map_item)) { 1103219820Sjeff __osm_ftree_hca_dump(p_ftree, p_hca); 1104219820Sjeff } 1105219820Sjeff 1106219820Sjeff for (i = 0; i < p_ftree->max_switch_rank; i++) { 1107219820Sjeff OSM_LOG(&p_ftree->p_osm->log, OSM_LOG_DEBUG, 1108219820Sjeff "-- Rank %u switches\n", i); 1109219820Sjeff for (p_sw = (ftree_sw_t *) cl_qmap_head(&p_ftree->sw_tbl); 1110219820Sjeff p_sw != (ftree_sw_t *) cl_qmap_end(&p_ftree->sw_tbl); 1111219820Sjeff p_sw = (ftree_sw_t *) cl_qmap_next(&p_sw->map_item)) { 1112219820Sjeff if (p_sw->rank == i) 1113219820Sjeff __osm_ftree_sw_dump(p_ftree, p_sw); 1114219820Sjeff } 1115219820Sjeff } 1116219820Sjeff 1117219820Sjeff OSM_LOG(&p_ftree->p_osm->log, OSM_LOG_DEBUG, "\n" 1118219820Sjeff " |---------------------------------------|\n" 1119219820Sjeff " |- Full fabric topology dump completed -|\n" 1120219820Sjeff " |---------------------------------------|\n\n"); 1121219820Sjeff} /* __osm_ftree_fabric_dump() */ 1122219820Sjeff 1123219820Sjeff/***************************************************/ 1124219820Sjeff 1125219820Sjeffstatic void __osm_ftree_fabric_dump_general_info(IN ftree_fabric_t * p_ftree) 1126219820Sjeff{ 1127219820Sjeff uint32_t i, j; 1128219820Sjeff ftree_sw_t *p_sw; 1129219820Sjeff 1130219820Sjeff OSM_LOG(&p_ftree->p_osm->log, OSM_LOG_INFO, 1131219820Sjeff "General fabric topology info\n"); 1132219820Sjeff OSM_LOG(&p_ftree->p_osm->log, OSM_LOG_INFO, 1133219820Sjeff "============================\n"); 1134219820Sjeff 1135219820Sjeff OSM_LOG(&p_ftree->p_osm->log, OSM_LOG_INFO, 1136219820Sjeff " - FatTree rank (roots to leaf switches): %u\n", 1137219820Sjeff p_ftree->leaf_switch_rank + 1); 1138219820Sjeff OSM_LOG(&p_ftree->p_osm->log, OSM_LOG_INFO, 1139219820Sjeff " - FatTree max switch rank: %u\n", p_ftree->max_switch_rank); 1140219820Sjeff OSM_LOG(&p_ftree->p_osm->log, OSM_LOG_INFO, 1141219820Sjeff " - Fabric has %u CAs (%u of them CNs), %u switches\n", 1142219820Sjeff cl_qmap_count(&p_ftree->hca_tbl), p_ftree->cn_num, 1143219820Sjeff cl_qmap_count(&p_ftree->sw_tbl)); 1144219820Sjeff 1145219820Sjeff CL_ASSERT(cl_qmap_count(&p_ftree->hca_tbl) >= p_ftree->cn_num); 1146219820Sjeff 1147219820Sjeff for (i = 0; i <= p_ftree->max_switch_rank; i++) { 1148219820Sjeff j = 0; 1149219820Sjeff for (p_sw = (ftree_sw_t *) cl_qmap_head(&p_ftree->sw_tbl); 1150219820Sjeff p_sw != (ftree_sw_t *) cl_qmap_end(&p_ftree->sw_tbl); 1151219820Sjeff p_sw = (ftree_sw_t *) cl_qmap_next(&p_sw->map_item)) { 1152219820Sjeff if (p_sw->rank == i) 1153219820Sjeff j++; 1154219820Sjeff } 1155219820Sjeff if (i == 0) 1156219820Sjeff OSM_LOG(&p_ftree->p_osm->log, OSM_LOG_INFO, 1157219820Sjeff " - Fabric has %u switches at rank %u (roots)\n", 1158219820Sjeff j, i); 1159219820Sjeff else if (i == p_ftree->leaf_switch_rank) 1160219820Sjeff OSM_LOG(&p_ftree->p_osm->log, OSM_LOG_INFO, 1161219820Sjeff " - Fabric has %u switches at rank %u (%u of them leafs)\n", 1162219820Sjeff j, i, p_ftree->leaf_switches_num); 1163219820Sjeff else 1164219820Sjeff OSM_LOG(&p_ftree->p_osm->log, OSM_LOG_INFO, 1165219820Sjeff " - Fabric has %u switches at rank %u\n", j, 1166219820Sjeff i); 1167219820Sjeff } 1168219820Sjeff 1169219820Sjeff if (osm_log_is_active(&p_ftree->p_osm->log, OSM_LOG_VERBOSE)) { 1170219820Sjeff OSM_LOG(&p_ftree->p_osm->log, OSM_LOG_VERBOSE, 1171219820Sjeff " - Root switches:\n"); 1172219820Sjeff for (p_sw = (ftree_sw_t *) cl_qmap_head(&p_ftree->sw_tbl); 1173219820Sjeff p_sw != (ftree_sw_t *) cl_qmap_end(&p_ftree->sw_tbl); 1174219820Sjeff p_sw = (ftree_sw_t *) cl_qmap_next(&p_sw->map_item)) { 1175219820Sjeff if (p_sw->rank == 0) 1176219820Sjeff OSM_LOG(&p_ftree->p_osm->log, OSM_LOG_VERBOSE, 1177219820Sjeff " GUID: 0x%016" PRIx64 1178219820Sjeff ", LID: %u, Index %s\n", 1179219820Sjeff __osm_ftree_sw_get_guid_ho(p_sw), 1180219820Sjeff cl_ntoh16(p_sw->base_lid), 1181219820Sjeff __osm_ftree_tuple_to_str(p_sw->tuple)); 1182219820Sjeff } 1183219820Sjeff 1184219820Sjeff OSM_LOG(&p_ftree->p_osm->log, OSM_LOG_VERBOSE, 1185219820Sjeff " - Leaf switches (sorted by index):\n"); 1186219820Sjeff for (i = 0; i < p_ftree->leaf_switches_num; i++) { 1187219820Sjeff OSM_LOG(&p_ftree->p_osm->log, OSM_LOG_VERBOSE, 1188219820Sjeff " GUID: 0x%016" PRIx64 1189219820Sjeff ", LID: %u, Index %s\n", 1190219820Sjeff __osm_ftree_sw_get_guid_ho(p_ftree-> 1191219820Sjeff leaf_switches[i]), 1192219820Sjeff cl_ntoh16(p_ftree->leaf_switches[i]->base_lid), 1193219820Sjeff __osm_ftree_tuple_to_str(p_ftree-> 1194219820Sjeff leaf_switches[i]-> 1195219820Sjeff tuple)); 1196219820Sjeff } 1197219820Sjeff } 1198219820Sjeff} /* __osm_ftree_fabric_dump_general_info() */ 1199219820Sjeff 1200219820Sjeff/***************************************************/ 1201219820Sjeff 1202219820Sjeffstatic void __osm_ftree_fabric_dump_hca_ordering(IN ftree_fabric_t * p_ftree) 1203219820Sjeff{ 1204219820Sjeff ftree_hca_t *p_hca; 1205219820Sjeff ftree_sw_t *p_sw; 1206219820Sjeff ftree_port_group_t *p_group_on_sw; 1207219820Sjeff ftree_port_group_t *p_group_on_hca; 1208219820Sjeff uint32_t i; 1209219820Sjeff uint32_t j; 1210219820Sjeff unsigned printed_hcas_on_leaf; 1211219820Sjeff 1212219820Sjeff char path[1024]; 1213219820Sjeff FILE *p_hca_ordering_file; 1214219820Sjeff char *filename = "opensm-ftree-ca-order.dump"; 1215219820Sjeff 1216219820Sjeff snprintf(path, sizeof(path), "%s/%s", 1217219820Sjeff p_ftree->p_osm->subn.opt.dump_files_dir, filename); 1218219820Sjeff p_hca_ordering_file = fopen(path, "w"); 1219219820Sjeff if (!p_hca_ordering_file) { 1220219820Sjeff OSM_LOG(&p_ftree->p_osm->log, OSM_LOG_ERROR, "ERR AB01: " 1221219820Sjeff "cannot open file \'%s\': %s\n", filename, 1222219820Sjeff strerror(errno)); 1223219820Sjeff return; 1224219820Sjeff } 1225219820Sjeff 1226219820Sjeff /* for each leaf switch (in indexing order) */ 1227219820Sjeff for (i = 0; i < p_ftree->leaf_switches_num; i++) { 1228219820Sjeff p_sw = p_ftree->leaf_switches[i]; 1229219820Sjeff printed_hcas_on_leaf = 0; 1230219820Sjeff 1231219820Sjeff /* for each real CA (CNs and not) connected to this switch */ 1232219820Sjeff for (j = 0; j < p_sw->down_port_groups_num; j++) { 1233219820Sjeff p_group_on_sw = p_sw->down_port_groups[j]; 1234219820Sjeff 1235219820Sjeff if (p_group_on_sw->remote_node_type != IB_NODE_TYPE_CA) 1236219820Sjeff continue; 1237219820Sjeff 1238219820Sjeff p_hca = p_group_on_sw->remote_hca_or_sw.p_hca; 1239219820Sjeff p_group_on_hca = 1240219820Sjeff __osm_ftree_hca_get_port_group_by_remote_lid(p_hca, 1241219820Sjeff p_group_on_sw-> 1242219820Sjeff base_lid); 1243219820Sjeff 1244219820Sjeff /* treat non-compute nodes as dummies */ 1245219820Sjeff if (!p_group_on_hca->is_cn) 1246219820Sjeff continue; 1247219820Sjeff 1248219820Sjeff fprintf(p_hca_ordering_file, "0x%04x\t%s\n", 1249219820Sjeff cl_ntoh16(p_group_on_hca->base_lid), 1250219820Sjeff p_hca->p_osm_node->print_desc); 1251219820Sjeff 1252219820Sjeff printed_hcas_on_leaf++; 1253219820Sjeff } 1254219820Sjeff 1255219820Sjeff /* now print missing HCAs */ 1256219820Sjeff for (j = 0; 1257219820Sjeff j < (p_ftree->max_cn_per_leaf - printed_hcas_on_leaf); j++) 1258219820Sjeff fprintf(p_hca_ordering_file, "0xFFFF\tDUMMY\n"); 1259219820Sjeff 1260219820Sjeff } 1261219820Sjeff /* done going through all the leaf switches */ 1262219820Sjeff 1263219820Sjeff fclose(p_hca_ordering_file); 1264219820Sjeff} /* __osm_ftree_fabric_dump_hca_ordering() */ 1265219820Sjeff 1266219820Sjeff/***************************************************/ 1267219820Sjeff 1268219820Sjeffstatic void 1269219820Sjeff__osm_ftree_fabric_assign_tuple(IN ftree_fabric_t * p_ftree, 1270219820Sjeff IN ftree_sw_t * p_sw, 1271219820Sjeff IN ftree_tuple_t new_tuple) 1272219820Sjeff{ 1273219820Sjeff memcpy(p_sw->tuple, new_tuple, FTREE_TUPLE_LEN); 1274219820Sjeff __osm_ftree_fabric_add_sw_by_tuple(p_ftree, p_sw); 1275219820Sjeff} 1276219820Sjeff 1277219820Sjeff/***************************************************/ 1278219820Sjeff 1279219820Sjeffstatic void __osm_ftree_fabric_assign_first_tuple(IN ftree_fabric_t * p_ftree, 1280219820Sjeff IN ftree_sw_t * p_sw) 1281219820Sjeff{ 1282219820Sjeff uint8_t i; 1283219820Sjeff ftree_tuple_t new_tuple; 1284219820Sjeff 1285219820Sjeff __osm_ftree_tuple_init(new_tuple); 1286219820Sjeff new_tuple[0] = (uint8_t) p_sw->rank; 1287219820Sjeff for (i = 1; i <= p_sw->rank; i++) 1288219820Sjeff new_tuple[i] = 0; 1289219820Sjeff 1290219820Sjeff __osm_ftree_fabric_assign_tuple(p_ftree, p_sw, new_tuple); 1291219820Sjeff} 1292219820Sjeff 1293219820Sjeff/***************************************************/ 1294219820Sjeff 1295219820Sjeffstatic void 1296219820Sjeff__osm_ftree_fabric_get_new_tuple(IN ftree_fabric_t * p_ftree, 1297219820Sjeff OUT ftree_tuple_t new_tuple, 1298219820Sjeff IN ftree_tuple_t from_tuple, 1299219820Sjeff IN ftree_direction_t direction) 1300219820Sjeff{ 1301219820Sjeff ftree_sw_t *p_sw; 1302219820Sjeff ftree_tuple_t temp_tuple; 1303219820Sjeff uint8_t var_index; 1304219820Sjeff uint8_t i; 1305219820Sjeff 1306219820Sjeff __osm_ftree_tuple_init(new_tuple); 1307219820Sjeff memcpy(temp_tuple, from_tuple, FTREE_TUPLE_LEN); 1308219820Sjeff 1309219820Sjeff if (direction == FTREE_DIRECTION_DOWN) { 1310219820Sjeff temp_tuple[0]++; 1311219820Sjeff var_index = from_tuple[0] + 1; 1312219820Sjeff } else { 1313219820Sjeff temp_tuple[0]--; 1314219820Sjeff var_index = from_tuple[0]; 1315219820Sjeff } 1316219820Sjeff 1317219820Sjeff for (i = 0; i < 0xFF; i++) { 1318219820Sjeff temp_tuple[var_index] = i; 1319219820Sjeff p_sw = __osm_ftree_fabric_get_sw_by_tuple(p_ftree, temp_tuple); 1320219820Sjeff if (p_sw == NULL) /* found free tuple */ 1321219820Sjeff break; 1322219820Sjeff } 1323219820Sjeff 1324219820Sjeff if (i == 0xFF) { 1325219820Sjeff /* new tuple not found - there are more than 255 ports in one direction */ 1326219820Sjeff return; 1327219820Sjeff } 1328219820Sjeff memcpy(new_tuple, temp_tuple, FTREE_TUPLE_LEN); 1329219820Sjeff 1330219820Sjeff} /* __osm_ftree_fabric_get_new_tuple() */ 1331219820Sjeff 1332219820Sjeff/***************************************************/ 1333219820Sjeff 1334219820Sjeffstatic inline boolean_t __osm_ftree_fabric_roots_provided(IN ftree_fabric_t * 1335219820Sjeff p_ftree) 1336219820Sjeff{ 1337219820Sjeff return (p_ftree->p_osm->subn.opt.root_guid_file != NULL); 1338219820Sjeff} 1339219820Sjeff 1340219820Sjeff/***************************************************/ 1341219820Sjeff 1342219820Sjeffstatic inline boolean_t __osm_ftree_fabric_cns_provided(IN ftree_fabric_t * 1343219820Sjeff p_ftree) 1344219820Sjeff{ 1345219820Sjeff return (p_ftree->p_osm->subn.opt.cn_guid_file != NULL); 1346219820Sjeff} 1347219820Sjeff 1348219820Sjeff/***************************************************/ 1349219820Sjeff 1350219820Sjeffstatic int __osm_ftree_fabric_mark_leaf_switches(IN ftree_fabric_t * p_ftree) 1351219820Sjeff{ 1352219820Sjeff ftree_sw_t *p_sw; 1353219820Sjeff ftree_hca_t *p_hca; 1354219820Sjeff ftree_hca_t *p_next_hca; 1355219820Sjeff unsigned i; 1356219820Sjeff int res = 0; 1357219820Sjeff 1358219820Sjeff OSM_LOG_ENTER(&p_ftree->p_osm->log); 1359219820Sjeff 1360219820Sjeff OSM_LOG(&p_ftree->p_osm->log, OSM_LOG_VERBOSE, 1361219820Sjeff "Marking leaf switches in fabric\n"); 1362219820Sjeff 1363219820Sjeff /* Scan all the CAs, if they have CNs - find CN port and mark switch 1364219820Sjeff that is connected to this port as leaf switch. 1365219820Sjeff Also, ensure that this marked leaf has rank of p_ftree->leaf_switch_rank. */ 1366219820Sjeff p_next_hca = (ftree_hca_t *) cl_qmap_head(&p_ftree->hca_tbl); 1367219820Sjeff while (p_next_hca != (ftree_hca_t *) cl_qmap_end(&p_ftree->hca_tbl)) { 1368219820Sjeff p_hca = p_next_hca; 1369219820Sjeff p_next_hca = (ftree_hca_t *) cl_qmap_next(&p_hca->map_item); 1370219820Sjeff if (!p_hca->cn_num) 1371219820Sjeff continue; 1372219820Sjeff 1373219820Sjeff for (i = 0; i < p_hca->up_port_groups_num; i++) { 1374219820Sjeff if (!p_hca->up_port_groups[i]->is_cn) 1375219820Sjeff continue; 1376219820Sjeff 1377219820Sjeff /* In CAs, port group alway has one port, and since this 1378219820Sjeff port group is CN, we know that this port is compute node */ 1379219820Sjeff CL_ASSERT(p_hca->up_port_groups[i]->remote_node_type == 1380219820Sjeff IB_NODE_TYPE_SWITCH); 1381219820Sjeff p_sw = p_hca->up_port_groups[i]->remote_hca_or_sw.p_sw; 1382219820Sjeff 1383219820Sjeff /* check if this switch was already processed */ 1384219820Sjeff if (p_sw->is_leaf) 1385219820Sjeff continue; 1386219820Sjeff p_sw->is_leaf = TRUE; 1387219820Sjeff 1388219820Sjeff /* ensure that this leaf switch is at the correct tree level */ 1389219820Sjeff if (p_sw->rank != p_ftree->leaf_switch_rank) { 1390219820Sjeff OSM_LOG(&p_ftree->p_osm->log, OSM_LOG_ERROR, 1391219820Sjeff "ERR AB26: CN port 0x%" PRIx64 1392219820Sjeff " is connected to switch 0x%" PRIx64 1393219820Sjeff " with rank %u, " 1394219820Sjeff "while FatTree leaf rank is %u\n", 1395219820Sjeff cl_ntoh64(p_hca->up_port_groups[i]-> 1396219820Sjeff port_guid), 1397219820Sjeff __osm_ftree_sw_get_guid_ho(p_sw), 1398219820Sjeff p_sw->rank, p_ftree->leaf_switch_rank); 1399219820Sjeff res = -1; 1400219820Sjeff goto Exit; 1401219820Sjeff 1402219820Sjeff } 1403219820Sjeff } 1404219820Sjeff } 1405219820Sjeff 1406219820SjeffExit: 1407219820Sjeff OSM_LOG_EXIT(&p_ftree->p_osm->log); 1408219820Sjeff return res; 1409219820Sjeff} /* __osm_ftree_fabric_mark_leaf_switches() */ 1410219820Sjeff 1411219820Sjeff/***************************************************/ 1412219820Sjeff 1413219820Sjeffstatic void __osm_ftree_fabric_make_indexing(IN ftree_fabric_t * p_ftree) 1414219820Sjeff{ 1415219820Sjeff ftree_sw_t *p_remote_sw; 1416219820Sjeff ftree_sw_t *p_sw = NULL; 1417219820Sjeff ftree_sw_t *p_next_sw; 1418219820Sjeff ftree_tuple_t new_tuple; 1419219820Sjeff uint32_t i; 1420219820Sjeff cl_list_t bfs_list; 1421219820Sjeff ftree_sw_tbl_element_t *p_sw_tbl_element; 1422219820Sjeff 1423219820Sjeff OSM_LOG_ENTER(&p_ftree->p_osm->log); 1424219820Sjeff 1425219820Sjeff OSM_LOG(&p_ftree->p_osm->log, OSM_LOG_VERBOSE, 1426219820Sjeff "Starting FatTree indexing\n"); 1427219820Sjeff 1428219820Sjeff /* using the first leaf switch as a starting point for indexing algorithm. */ 1429219820Sjeff p_next_sw = (ftree_sw_t *) cl_qmap_head(&p_ftree->sw_tbl); 1430219820Sjeff while (p_next_sw != (ftree_sw_t *) cl_qmap_end(&p_ftree->sw_tbl)) { 1431219820Sjeff p_sw = p_next_sw; 1432219820Sjeff if (p_sw->is_leaf) 1433219820Sjeff break; 1434219820Sjeff p_next_sw = (ftree_sw_t *) cl_qmap_next(&p_sw->map_item); 1435219820Sjeff } 1436219820Sjeff 1437219820Sjeff CL_ASSERT(p_next_sw != (ftree_sw_t *) cl_qmap_end(&p_ftree->sw_tbl)); 1438219820Sjeff 1439219820Sjeff /* Assign the first tuple to the switch that is used as BFS starting point. 1440219820Sjeff The tuple will be as follows: [rank].0.0.0... 1441219820Sjeff This fuction also adds the switch it into the switch_by_tuple table. */ 1442219820Sjeff __osm_ftree_fabric_assign_first_tuple(p_ftree, p_sw); 1443219820Sjeff 1444219820Sjeff OSM_LOG(&p_ftree->p_osm->log, OSM_LOG_VERBOSE, 1445219820Sjeff "Indexing starting point:\n" 1446219820Sjeff " - Switch rank : %u\n" 1447219820Sjeff " - Switch index : %s\n" 1448219820Sjeff " - Node LID : %u\n" 1449219820Sjeff " - Node GUID : 0x%016" 1450219820Sjeff PRIx64 "\n", p_sw->rank, __osm_ftree_tuple_to_str(p_sw->tuple), 1451219820Sjeff cl_ntoh16(p_sw->base_lid), __osm_ftree_sw_get_guid_ho(p_sw)); 1452219820Sjeff 1453219820Sjeff /* 1454219820Sjeff * Now run BFS and assign indexes to all switches 1455219820Sjeff * Pseudo code of the algorithm is as follows: 1456219820Sjeff * 1457219820Sjeff * * Add first switch to BFS queue 1458219820Sjeff * * While (BFS queue not empty) 1459219820Sjeff * - Pop the switch from the head of the queue 1460219820Sjeff * - Scan all the downward and upward ports 1461219820Sjeff * - For each port 1462219820Sjeff * + Get the remote switch 1463219820Sjeff * + Assign index to the remote switch 1464219820Sjeff * + Add remote switch to the BFS queue 1465219820Sjeff */ 1466219820Sjeff 1467219820Sjeff cl_list_init(&bfs_list, cl_qmap_count(&p_ftree->sw_tbl)); 1468219820Sjeff cl_list_insert_tail(&bfs_list, 1469219820Sjeff &__osm_ftree_sw_tbl_element_create(p_sw)->map_item); 1470219820Sjeff 1471219820Sjeff while (!cl_is_list_empty(&bfs_list)) { 1472219820Sjeff p_sw_tbl_element = 1473219820Sjeff (ftree_sw_tbl_element_t *) cl_list_remove_head(&bfs_list); 1474219820Sjeff p_sw = p_sw_tbl_element->p_sw; 1475219820Sjeff __osm_ftree_sw_tbl_element_destroy(p_sw_tbl_element); 1476219820Sjeff 1477219820Sjeff /* Discover all the nodes from ports that are pointing down */ 1478219820Sjeff 1479219820Sjeff if (p_sw->rank >= p_ftree->leaf_switch_rank) { 1480219820Sjeff /* whether downward ports are pointing to CAs or switches, 1481219820Sjeff we don't assign indexes to switches that are located 1482219820Sjeff lower than leaf switches */ 1483219820Sjeff } else { 1484219820Sjeff /* This is not the leaf switch */ 1485219820Sjeff for (i = 0; i < p_sw->down_port_groups_num; i++) { 1486219820Sjeff /* Work with port groups that are pointing to switches only. 1487219820Sjeff No need to assign indexing to HCAs */ 1488219820Sjeff if (p_sw->down_port_groups[i]-> 1489219820Sjeff remote_node_type != IB_NODE_TYPE_SWITCH) 1490219820Sjeff continue; 1491219820Sjeff 1492219820Sjeff p_remote_sw = 1493219820Sjeff p_sw->down_port_groups[i]->remote_hca_or_sw. 1494219820Sjeff p_sw; 1495219820Sjeff if (__osm_ftree_tuple_assigned 1496219820Sjeff (p_remote_sw->tuple)) { 1497219820Sjeff /* this switch has been already indexed */ 1498219820Sjeff continue; 1499219820Sjeff } 1500219820Sjeff /* allocate new tuple */ 1501219820Sjeff __osm_ftree_fabric_get_new_tuple(p_ftree, 1502219820Sjeff new_tuple, 1503219820Sjeff p_sw->tuple, 1504219820Sjeff FTREE_DIRECTION_DOWN); 1505219820Sjeff /* Assign the new tuple to the remote switch. 1506219820Sjeff This fuction also adds the switch into the switch_by_tuple table. */ 1507219820Sjeff __osm_ftree_fabric_assign_tuple(p_ftree, 1508219820Sjeff p_remote_sw, 1509219820Sjeff new_tuple); 1510219820Sjeff 1511219820Sjeff /* add the newly discovered switch to the BFS queue */ 1512219820Sjeff cl_list_insert_tail(&bfs_list, 1513219820Sjeff &__osm_ftree_sw_tbl_element_create 1514219820Sjeff (p_remote_sw)->map_item); 1515219820Sjeff } 1516219820Sjeff /* Done assigning indexes to all the remote switches 1517219820Sjeff that are pointed by the downgoing ports. 1518219820Sjeff Now sort port groups according to remote index. */ 1519219820Sjeff qsort(p_sw->down_port_groups, /* array */ 1520219820Sjeff p_sw->down_port_groups_num, /* number of elements */ 1521219820Sjeff sizeof(ftree_port_group_t *), /* size of each element */ 1522219820Sjeff __osm_ftree_compare_port_groups_by_remote_switch_index); /* comparator */ 1523219820Sjeff } 1524219820Sjeff 1525219820Sjeff /* Done indexing switches from ports that go down. 1526219820Sjeff Now do the same with ports that are pointing up. */ 1527219820Sjeff 1528219820Sjeff if (p_sw->rank != 0) { 1529219820Sjeff /* This is not the root switch, which means that all the ports 1530219820Sjeff that are pointing up are taking us to another switches. */ 1531219820Sjeff for (i = 0; i < p_sw->up_port_groups_num; i++) { 1532219820Sjeff p_remote_sw = 1533219820Sjeff p_sw->up_port_groups[i]->remote_hca_or_sw. 1534219820Sjeff p_sw; 1535219820Sjeff if (__osm_ftree_tuple_assigned 1536219820Sjeff (p_remote_sw->tuple)) 1537219820Sjeff continue; 1538219820Sjeff /* allocate new tuple */ 1539219820Sjeff __osm_ftree_fabric_get_new_tuple(p_ftree, 1540219820Sjeff new_tuple, 1541219820Sjeff p_sw->tuple, 1542219820Sjeff FTREE_DIRECTION_UP); 1543219820Sjeff /* Assign the new tuple to the remote switch. 1544219820Sjeff This fuction also adds the switch to the 1545219820Sjeff switch_by_tuple table. */ 1546219820Sjeff __osm_ftree_fabric_assign_tuple(p_ftree, 1547219820Sjeff p_remote_sw, 1548219820Sjeff new_tuple); 1549219820Sjeff /* add the newly discovered switch to the BFS queue */ 1550219820Sjeff cl_list_insert_tail(&bfs_list, 1551219820Sjeff &__osm_ftree_sw_tbl_element_create 1552219820Sjeff (p_remote_sw)->map_item); 1553219820Sjeff } 1554219820Sjeff /* Done assigning indexes to all the remote switches 1555219820Sjeff that are pointed by the upgoing ports. 1556219820Sjeff Now sort port groups according to remote index. */ 1557219820Sjeff qsort(p_sw->up_port_groups, /* array */ 1558219820Sjeff p_sw->up_port_groups_num, /* number of elements */ 1559219820Sjeff sizeof(ftree_port_group_t *), /* size of each element */ 1560219820Sjeff __osm_ftree_compare_port_groups_by_remote_switch_index); /* comparator */ 1561219820Sjeff } 1562219820Sjeff /* Done assigning indexes to all the switches that are directly connected 1563219820Sjeff to the current switch - go to the next switch in the BFS queue */ 1564219820Sjeff } 1565219820Sjeff cl_list_destroy(&bfs_list); 1566219820Sjeff 1567219820Sjeff OSM_LOG_EXIT(&p_ftree->p_osm->log); 1568219820Sjeff} /* __osm_ftree_fabric_make_indexing() */ 1569219820Sjeff 1570219820Sjeff/***************************************************/ 1571219820Sjeff 1572219820Sjeffstatic int __osm_ftree_fabric_create_leaf_switch_array(IN ftree_fabric_t * 1573219820Sjeff p_ftree) 1574219820Sjeff{ 1575219820Sjeff ftree_sw_t *p_sw; 1576219820Sjeff ftree_sw_t *p_next_sw; 1577219820Sjeff ftree_sw_t **all_switches_at_leaf_level; 1578219820Sjeff unsigned i; 1579219820Sjeff unsigned all_leaf_idx = 0; 1580219820Sjeff unsigned first_leaf_idx; 1581219820Sjeff unsigned last_leaf_idx; 1582219820Sjeff int res = 0; 1583219820Sjeff 1584219820Sjeff OSM_LOG_ENTER(&p_ftree->p_osm->log); 1585219820Sjeff 1586219820Sjeff /* create array of ALL the switches that have leaf rank */ 1587219820Sjeff all_switches_at_leaf_level = (ftree_sw_t **) 1588219820Sjeff malloc(cl_qmap_count(&p_ftree->sw_tbl) * sizeof(ftree_sw_t *)); 1589219820Sjeff if (!all_switches_at_leaf_level) { 1590219820Sjeff osm_log(&p_ftree->p_osm->log, OSM_LOG_SYS, 1591219820Sjeff "Fat-tree routing: Memory allocation failed\n"); 1592219820Sjeff res = -1; 1593219820Sjeff goto Exit; 1594219820Sjeff } 1595219820Sjeff memset(all_switches_at_leaf_level, 0, 1596219820Sjeff cl_qmap_count(&p_ftree->sw_tbl) * sizeof(ftree_sw_t *)); 1597219820Sjeff 1598219820Sjeff p_next_sw = (ftree_sw_t *) cl_qmap_head(&p_ftree->sw_tbl); 1599219820Sjeff while (p_next_sw != (ftree_sw_t *) cl_qmap_end(&p_ftree->sw_tbl)) { 1600219820Sjeff p_sw = p_next_sw; 1601219820Sjeff p_next_sw = (ftree_sw_t *) cl_qmap_next(&p_sw->map_item); 1602219820Sjeff if (p_sw->rank == p_ftree->leaf_switch_rank) { 1603219820Sjeff OSM_LOG(&p_ftree->p_osm->log, OSM_LOG_DEBUG, 1604219820Sjeff "Adding switch 0x%" PRIx64 1605219820Sjeff " to full leaf switch array\n", 1606219820Sjeff __osm_ftree_sw_get_guid_ho(p_sw)); 1607219820Sjeff all_switches_at_leaf_level[all_leaf_idx++] = p_sw; 1608219820Sjeff 1609219820Sjeff } 1610219820Sjeff } 1611219820Sjeff 1612219820Sjeff /* quick-sort array of leaf switches by index */ 1613219820Sjeff qsort(all_switches_at_leaf_level, /* array */ 1614219820Sjeff all_leaf_idx, /* number of elements */ 1615219820Sjeff sizeof(ftree_sw_t *), /* size of each element */ 1616219820Sjeff __osm_ftree_compare_switches_by_index); /* comparator */ 1617219820Sjeff 1618219820Sjeff /* check the first and the last REAL leaf (the one 1619219820Sjeff that has CNs) in the array of all the leafs */ 1620219820Sjeff 1621219820Sjeff first_leaf_idx = all_leaf_idx; 1622219820Sjeff last_leaf_idx = 0; 1623219820Sjeff for (i = 0; i < all_leaf_idx; i++) { 1624219820Sjeff if (all_switches_at_leaf_level[i]->is_leaf) { 1625219820Sjeff if (i < first_leaf_idx) 1626219820Sjeff first_leaf_idx = i; 1627219820Sjeff last_leaf_idx = i; 1628219820Sjeff } 1629219820Sjeff } 1630219820Sjeff CL_ASSERT(first_leaf_idx < last_leaf_idx); 1631219820Sjeff 1632219820Sjeff OSM_LOG(&p_ftree->p_osm->log, OSM_LOG_DEBUG, 1633219820Sjeff "Full leaf array info: first_leaf_idx = %u, last_leaf_idx = %u\n", 1634219820Sjeff first_leaf_idx, last_leaf_idx); 1635219820Sjeff 1636219820Sjeff /* Create array of REAL leaf switches, sorted by index. 1637219820Sjeff This array may contain switches at the same rank w/o CNs, 1638219820Sjeff in case this is the order of indexing. */ 1639219820Sjeff p_ftree->leaf_switches_num = last_leaf_idx - first_leaf_idx + 1; 1640219820Sjeff p_ftree->leaf_switches = (ftree_sw_t **) 1641219820Sjeff malloc(p_ftree->leaf_switches_num * sizeof(ftree_sw_t *)); 1642219820Sjeff if (!p_ftree->leaf_switches) { 1643219820Sjeff osm_log(&p_ftree->p_osm->log, OSM_LOG_SYS, 1644219820Sjeff "Fat-tree routing: Memory allocation failed\n"); 1645219820Sjeff res = -1; 1646219820Sjeff goto Exit; 1647219820Sjeff } 1648219820Sjeff 1649219820Sjeff memcpy(p_ftree->leaf_switches, 1650219820Sjeff &(all_switches_at_leaf_level[first_leaf_idx]), 1651219820Sjeff p_ftree->leaf_switches_num * sizeof(ftree_sw_t *)); 1652219820Sjeff 1653219820Sjeff free(all_switches_at_leaf_level); 1654219820Sjeff 1655219820Sjeff OSM_LOG(&p_ftree->p_osm->log, OSM_LOG_DEBUG, 1656219820Sjeff "Created array of %u leaf switches\n", 1657219820Sjeff p_ftree->leaf_switches_num); 1658219820Sjeff 1659219820SjeffExit: 1660219820Sjeff OSM_LOG_EXIT(&p_ftree->p_osm->log); 1661219820Sjeff return res; 1662219820Sjeff} /* __osm_ftree_fabric_create_leaf_switch_array() */ 1663219820Sjeff 1664219820Sjeff/***************************************************/ 1665219820Sjeff 1666219820Sjeffstatic void __osm_ftree_fabric_set_max_cn_per_leaf(IN ftree_fabric_t * p_ftree) 1667219820Sjeff{ 1668219820Sjeff unsigned i; 1669219820Sjeff unsigned j; 1670219820Sjeff unsigned cns_on_this_leaf; 1671219820Sjeff ftree_sw_t *p_sw; 1672219820Sjeff ftree_port_group_t *p_group; 1673219820Sjeff 1674219820Sjeff for (i = 0; i < p_ftree->leaf_switches_num; i++) { 1675219820Sjeff p_sw = p_ftree->leaf_switches[i]; 1676219820Sjeff cns_on_this_leaf = 0; 1677219820Sjeff for (j = 0; j < p_sw->down_port_groups_num; j++) { 1678219820Sjeff p_group = p_sw->down_port_groups[j]; 1679219820Sjeff if (p_group->remote_node_type != IB_NODE_TYPE_CA) 1680219820Sjeff continue; 1681219820Sjeff cns_on_this_leaf += 1682219820Sjeff p_group->remote_hca_or_sw.p_hca->cn_num; 1683219820Sjeff } 1684219820Sjeff if (cns_on_this_leaf > p_ftree->max_cn_per_leaf) 1685219820Sjeff p_ftree->max_cn_per_leaf = cns_on_this_leaf; 1686219820Sjeff } 1687219820Sjeff} /* __osm_ftree_fabric_set_max_cn_per_leaf() */ 1688219820Sjeff 1689219820Sjeff/***************************************************/ 1690219820Sjeff 1691219820Sjeffstatic boolean_t __osm_ftree_fabric_validate_topology(IN ftree_fabric_t * 1692219820Sjeff p_ftree) 1693219820Sjeff{ 1694219820Sjeff ftree_port_group_t *p_group; 1695219820Sjeff ftree_port_group_t *p_ref_group; 1696219820Sjeff ftree_sw_t *p_sw; 1697219820Sjeff ftree_sw_t *p_next_sw; 1698219820Sjeff ftree_sw_t **reference_sw_arr; 1699219820Sjeff uint16_t tree_rank = __osm_ftree_fabric_get_rank(p_ftree); 1700219820Sjeff boolean_t res = TRUE; 1701219820Sjeff uint8_t i; 1702219820Sjeff 1703219820Sjeff OSM_LOG_ENTER(&p_ftree->p_osm->log); 1704219820Sjeff 1705219820Sjeff OSM_LOG(&p_ftree->p_osm->log, OSM_LOG_VERBOSE, 1706219820Sjeff "Validating fabric topology\n"); 1707219820Sjeff 1708219820Sjeff reference_sw_arr = 1709219820Sjeff (ftree_sw_t **) malloc(tree_rank * sizeof(ftree_sw_t *)); 1710219820Sjeff if (reference_sw_arr == NULL) { 1711219820Sjeff osm_log(&p_ftree->p_osm->log, OSM_LOG_SYS, 1712219820Sjeff "Fat-tree routing: Memory allocation failed\n"); 1713219820Sjeff return FALSE; 1714219820Sjeff } 1715219820Sjeff memset(reference_sw_arr, 0, tree_rank * sizeof(ftree_sw_t *)); 1716219820Sjeff 1717219820Sjeff p_next_sw = (ftree_sw_t *) cl_qmap_head(&p_ftree->sw_tbl); 1718219820Sjeff while (res && p_next_sw != (ftree_sw_t *) cl_qmap_end(&p_ftree->sw_tbl)) { 1719219820Sjeff p_sw = p_next_sw; 1720219820Sjeff p_next_sw = (ftree_sw_t *) cl_qmap_next(&p_sw->map_item); 1721219820Sjeff 1722219820Sjeff if (!reference_sw_arr[p_sw->rank]) { 1723219820Sjeff /* This is the first switch in the current level that 1724219820Sjeff we're checking - use it as a reference */ 1725219820Sjeff reference_sw_arr[p_sw->rank] = p_sw; 1726219820Sjeff } else { 1727219820Sjeff /* compare this switch properties to the reference switch */ 1728219820Sjeff 1729219820Sjeff if (reference_sw_arr[p_sw->rank]->up_port_groups_num != 1730219820Sjeff p_sw->up_port_groups_num) { 1731219820Sjeff OSM_LOG(&p_ftree->p_osm->log, OSM_LOG_ERROR, 1732219820Sjeff "ERR AB09: Different number of upward port groups on switches:\n" 1733219820Sjeff " GUID 0x%016" PRIx64 1734219820Sjeff ", LID %u, Index %s - %u groups\n" 1735219820Sjeff " GUID 0x%016" PRIx64 1736219820Sjeff ", LID %u, Index %s - %u groups\n", 1737219820Sjeff __osm_ftree_sw_get_guid_ho 1738219820Sjeff (reference_sw_arr[p_sw->rank]), 1739219820Sjeff cl_ntoh16(reference_sw_arr[p_sw->rank]-> 1740219820Sjeff base_lid), 1741219820Sjeff __osm_ftree_tuple_to_str 1742219820Sjeff (reference_sw_arr[p_sw->rank]->tuple), 1743219820Sjeff reference_sw_arr[p_sw->rank]-> 1744219820Sjeff up_port_groups_num, 1745219820Sjeff __osm_ftree_sw_get_guid_ho(p_sw), 1746219820Sjeff cl_ntoh16(p_sw->base_lid), 1747219820Sjeff __osm_ftree_tuple_to_str(p_sw->tuple), 1748219820Sjeff p_sw->up_port_groups_num); 1749219820Sjeff res = FALSE; 1750219820Sjeff break; 1751219820Sjeff } 1752219820Sjeff 1753219820Sjeff if (p_sw->rank != (tree_rank - 1) && 1754219820Sjeff reference_sw_arr[p_sw->rank]-> 1755219820Sjeff down_port_groups_num != 1756219820Sjeff p_sw->down_port_groups_num) { 1757219820Sjeff /* we're allowing some hca's to be missing */ 1758219820Sjeff OSM_LOG(&p_ftree->p_osm->log, OSM_LOG_ERROR, 1759219820Sjeff "ERR AB0A: Different number of downward port groups on switches:\n" 1760219820Sjeff " GUID 0x%016" PRIx64 1761219820Sjeff ", LID %u, Index %s - %u port groups\n" 1762219820Sjeff " GUID 0x%016" PRIx64 1763219820Sjeff ", LID %u, Index %s - %u port groups\n", 1764219820Sjeff __osm_ftree_sw_get_guid_ho 1765219820Sjeff (reference_sw_arr[p_sw->rank]), 1766219820Sjeff cl_ntoh16(reference_sw_arr[p_sw->rank]-> 1767219820Sjeff base_lid), 1768219820Sjeff __osm_ftree_tuple_to_str 1769219820Sjeff (reference_sw_arr[p_sw->rank]->tuple), 1770219820Sjeff reference_sw_arr[p_sw->rank]-> 1771219820Sjeff down_port_groups_num, 1772219820Sjeff __osm_ftree_sw_get_guid_ho(p_sw), 1773219820Sjeff cl_ntoh16(p_sw->base_lid), 1774219820Sjeff __osm_ftree_tuple_to_str(p_sw->tuple), 1775219820Sjeff p_sw->down_port_groups_num); 1776219820Sjeff res = FALSE; 1777219820Sjeff break; 1778219820Sjeff } 1779219820Sjeff 1780219820Sjeff if (reference_sw_arr[p_sw->rank]->up_port_groups_num != 1781219820Sjeff 0) { 1782219820Sjeff p_ref_group = 1783219820Sjeff reference_sw_arr[p_sw->rank]-> 1784219820Sjeff up_port_groups[0]; 1785219820Sjeff for (i = 0; i < p_sw->up_port_groups_num; i++) { 1786219820Sjeff p_group = p_sw->up_port_groups[i]; 1787219820Sjeff if (cl_ptr_vector_get_size 1788219820Sjeff (&p_ref_group->ports) != 1789219820Sjeff cl_ptr_vector_get_size(&p_group-> 1790219820Sjeff ports)) { 1791219820Sjeff OSM_LOG(&p_ftree->p_osm->log, 1792219820Sjeff OSM_LOG_ERROR, 1793219820Sjeff "ERR AB0B: Different number of ports in an upward port group on switches:\n" 1794219820Sjeff " GUID 0x%016" 1795219820Sjeff PRIx64 1796219820Sjeff ", LID %u, Index %s - %u ports\n" 1797219820Sjeff " GUID 0x%016" 1798219820Sjeff PRIx64 1799219820Sjeff ", LID %u, Index %s - %u ports\n", 1800219820Sjeff __osm_ftree_sw_get_guid_ho 1801219820Sjeff (reference_sw_arr 1802219820Sjeff [p_sw->rank]), 1803219820Sjeff cl_ntoh16 1804219820Sjeff (reference_sw_arr 1805219820Sjeff [p_sw->rank]-> 1806219820Sjeff base_lid), 1807219820Sjeff __osm_ftree_tuple_to_str 1808219820Sjeff (reference_sw_arr 1809219820Sjeff [p_sw->rank]->tuple), 1810219820Sjeff cl_ptr_vector_get_size 1811219820Sjeff (&p_ref_group->ports), 1812219820Sjeff __osm_ftree_sw_get_guid_ho 1813219820Sjeff (p_sw), 1814219820Sjeff cl_ntoh16(p_sw-> 1815219820Sjeff base_lid), 1816219820Sjeff __osm_ftree_tuple_to_str 1817219820Sjeff (p_sw->tuple), 1818219820Sjeff cl_ptr_vector_get_size 1819219820Sjeff (&p_group->ports)); 1820219820Sjeff res = FALSE; 1821219820Sjeff break; 1822219820Sjeff } 1823219820Sjeff } 1824219820Sjeff } 1825219820Sjeff if (reference_sw_arr[p_sw->rank]-> 1826219820Sjeff down_port_groups_num != 0 1827219820Sjeff && p_sw->rank != (tree_rank - 1)) { 1828219820Sjeff /* we're allowing some hca's to be missing */ 1829219820Sjeff p_ref_group = 1830219820Sjeff reference_sw_arr[p_sw->rank]-> 1831219820Sjeff down_port_groups[0]; 1832219820Sjeff for (i = 0; i < p_sw->down_port_groups_num; i++) { 1833219820Sjeff p_group = p_sw->down_port_groups[0]; 1834219820Sjeff if (cl_ptr_vector_get_size 1835219820Sjeff (&p_ref_group->ports) != 1836219820Sjeff cl_ptr_vector_get_size(&p_group-> 1837219820Sjeff ports)) { 1838219820Sjeff OSM_LOG(&p_ftree->p_osm->log, 1839219820Sjeff OSM_LOG_ERROR, 1840219820Sjeff "ERR AB0C: Different number of ports in an downward port group on switches:\n" 1841219820Sjeff " GUID 0x%016" 1842219820Sjeff PRIx64 1843219820Sjeff ", LID %u, Index %s - %u ports\n" 1844219820Sjeff " GUID 0x%016" 1845219820Sjeff PRIx64 1846219820Sjeff ", LID %u, Index %s - %u ports\n", 1847219820Sjeff __osm_ftree_sw_get_guid_ho 1848219820Sjeff (reference_sw_arr 1849219820Sjeff [p_sw->rank]), 1850219820Sjeff cl_ntoh16 1851219820Sjeff (reference_sw_arr 1852219820Sjeff [p_sw->rank]-> 1853219820Sjeff base_lid), 1854219820Sjeff __osm_ftree_tuple_to_str 1855219820Sjeff (reference_sw_arr 1856219820Sjeff [p_sw->rank]->tuple), 1857219820Sjeff cl_ptr_vector_get_size 1858219820Sjeff (&p_ref_group->ports), 1859219820Sjeff __osm_ftree_sw_get_guid_ho 1860219820Sjeff (p_sw), 1861219820Sjeff cl_ntoh16(p_sw-> 1862219820Sjeff base_lid), 1863219820Sjeff __osm_ftree_tuple_to_str 1864219820Sjeff (p_sw->tuple), 1865219820Sjeff cl_ptr_vector_get_size 1866219820Sjeff (&p_group->ports)); 1867219820Sjeff res = FALSE; 1868219820Sjeff break; 1869219820Sjeff } 1870219820Sjeff } 1871219820Sjeff } 1872219820Sjeff } /* end of else */ 1873219820Sjeff } /* end of while */ 1874219820Sjeff 1875219820Sjeff if (res == TRUE) 1876219820Sjeff OSM_LOG(&p_ftree->p_osm->log, OSM_LOG_VERBOSE, 1877219820Sjeff "Fabric topology has been identified as FatTree\n"); 1878219820Sjeff else 1879219820Sjeff OSM_LOG(&p_ftree->p_osm->log, OSM_LOG_ERROR, 1880219820Sjeff "ERR AB0D: Fabric topology hasn't been identified as FatTree\n"); 1881219820Sjeff 1882219820Sjeff free(reference_sw_arr); 1883219820Sjeff OSM_LOG_EXIT(&p_ftree->p_osm->log); 1884219820Sjeff return res; 1885219820Sjeff} /* __osm_ftree_fabric_validate_topology() */ 1886219820Sjeff 1887219820Sjeff/*************************************************** 1888219820Sjeff ***************************************************/ 1889219820Sjeff 1890219820Sjeffstatic void __osm_ftree_set_sw_fwd_table(IN cl_map_item_t * const p_map_item, 1891219820Sjeff IN void *context) 1892219820Sjeff{ 1893219820Sjeff ftree_sw_t *p_sw = (ftree_sw_t * const)p_map_item; 1894219820Sjeff ftree_fabric_t *p_ftree = (ftree_fabric_t *) context; 1895219820Sjeff 1896219820Sjeff p_sw->p_osm_sw->max_lid_ho = p_ftree->lft_max_lid_ho; 1897219820Sjeff osm_ucast_mgr_set_fwd_table(&p_ftree->p_osm->sm.ucast_mgr, 1898219820Sjeff p_sw->p_osm_sw); 1899219820Sjeff} 1900219820Sjeff 1901219820Sjeff/*************************************************** 1902219820Sjeff ***************************************************/ 1903219820Sjeff 1904219820Sjeff/* 1905219820Sjeff * Function: assign-up-going-port-by-descending-down 1906219820Sjeff * Given : a switch and a LID 1907219820Sjeff * Pseudo code: 1908219820Sjeff * foreach down-going-port-group (in indexing order) 1909219820Sjeff * skip this group if the LFT(LID) port is part of this group 1910219820Sjeff * find the least loaded port of the group (scan in indexing order) 1911219820Sjeff * r-port is the remote port connected to it 1912219820Sjeff * assign the remote switch node LFT(LID) to r-port 1913219820Sjeff * increase r-port usage counter 1914219820Sjeff * assign-up-going-port-by-descending-down to r-port node (recursion) 1915219820Sjeff */ 1916219820Sjeff 1917219820Sjeffstatic void 1918219820Sjeff__osm_ftree_fabric_route_upgoing_by_going_down(IN ftree_fabric_t * p_ftree, 1919219820Sjeff IN ftree_sw_t * p_sw, 1920219820Sjeff IN ftree_sw_t * p_prev_sw, 1921219820Sjeff IN ib_net16_t target_lid, 1922219820Sjeff IN uint8_t target_rank, 1923219820Sjeff IN boolean_t is_real_lid, 1924219820Sjeff IN boolean_t is_main_path, 1925219820Sjeff IN uint8_t highest_rank_in_route) 1926219820Sjeff{ 1927219820Sjeff ftree_sw_t *p_remote_sw; 1928219820Sjeff uint16_t ports_num; 1929219820Sjeff ftree_port_group_t *p_group; 1930219820Sjeff ftree_port_t *p_port; 1931219820Sjeff ftree_port_t *p_min_port; 1932219820Sjeff uint16_t i; 1933219820Sjeff uint16_t j; 1934219820Sjeff uint16_t k; 1935219820Sjeff 1936219820Sjeff /* we shouldn't enter here if both real_lid and main_path are false */ 1937219820Sjeff CL_ASSERT(is_real_lid || is_main_path); 1938219820Sjeff 1939219820Sjeff /* if there is no down-going ports */ 1940219820Sjeff if (p_sw->down_port_groups_num == 0) 1941219820Sjeff return; 1942219820Sjeff 1943219820Sjeff /* promote the index that indicates which group should we 1944219820Sjeff start with when going through all the downgoing groups */ 1945219820Sjeff p_sw->down_port_groups_idx = 1946219820Sjeff (p_sw->down_port_groups_idx + 1) % p_sw->down_port_groups_num; 1947219820Sjeff 1948219820Sjeff /* foreach down-going port group (in indexing order) */ 1949219820Sjeff i = p_sw->down_port_groups_idx; 1950219820Sjeff for (k = 0; k < p_sw->down_port_groups_num; k++) { 1951219820Sjeff 1952219820Sjeff p_group = p_sw->down_port_groups[i]; 1953219820Sjeff i = (i + 1) % p_sw->down_port_groups_num; 1954219820Sjeff 1955219820Sjeff /* Skip this port group unless it points to a switch */ 1956219820Sjeff if (p_group->remote_node_type != IB_NODE_TYPE_SWITCH) 1957219820Sjeff continue; 1958219820Sjeff 1959219820Sjeff if (p_prev_sw 1960219820Sjeff && (p_group->remote_base_lid == p_prev_sw->base_lid)) { 1961219820Sjeff /* This port group has a port that was used when we entered this switch, 1962219820Sjeff which means that the current group points to the switch where we were 1963219820Sjeff at the previous step of the algorithm (before going up). 1964219820Sjeff Skipping this group. */ 1965219820Sjeff continue; 1966219820Sjeff } 1967219820Sjeff 1968219820Sjeff /* find the least loaded port of the group (in indexing order) */ 1969219820Sjeff p_min_port = NULL; 1970219820Sjeff ports_num = (uint16_t) cl_ptr_vector_get_size(&p_group->ports); 1971219820Sjeff /* ToDo: no need to select a least loaded port for non-main path. 1972219820Sjeff Think about optimization. */ 1973219820Sjeff for (j = 0; j < ports_num; j++) { 1974219820Sjeff cl_ptr_vector_at(&p_group->ports, j, (void *)&p_port); 1975219820Sjeff if (!p_min_port) { 1976219820Sjeff /* first port that we're checking - set as port with the lowest load */ 1977219820Sjeff p_min_port = p_port; 1978219820Sjeff } else if (p_port->counter_up < p_min_port->counter_up) { 1979219820Sjeff /* this port is less loaded - use it as min */ 1980219820Sjeff p_min_port = p_port; 1981219820Sjeff } 1982219820Sjeff } 1983219820Sjeff /* At this point we have selected a port in this group with the 1984219820Sjeff lowest load of upgoing routes. 1985219820Sjeff Set on the remote switch how to get to the target_lid - 1986219820Sjeff set LFT(target_lid) on the remote switch to the remote port */ 1987219820Sjeff p_remote_sw = p_group->remote_hca_or_sw.p_sw; 1988219820Sjeff 1989219820Sjeff if (osm_switch_get_least_hops(p_remote_sw->p_osm_sw, 1990219820Sjeff cl_ntoh16(target_lid)) != 1991219820Sjeff OSM_NO_PATH) { 1992219820Sjeff /* Loop in the fabric - we already routed the remote switch 1993219820Sjeff on our way UP, and now we see it again on our way DOWN */ 1994219820Sjeff OSM_LOG(&p_ftree->p_osm->log, OSM_LOG_DEBUG, 1995219820Sjeff "Loop of lenght %d in the fabric:\n " 1996219820Sjeff "Switch %s (LID %u) closes loop through switch %s (LID %u)\n", 1997219820Sjeff (p_remote_sw->rank - highest_rank_in_route) * 2, 1998219820Sjeff __osm_ftree_tuple_to_str(p_remote_sw->tuple), 1999219820Sjeff cl_ntoh16(p_group->base_lid), 2000219820Sjeff __osm_ftree_tuple_to_str(p_sw->tuple), 2001219820Sjeff cl_ntoh16(p_group->remote_base_lid)); 2002219820Sjeff continue; 2003219820Sjeff } 2004219820Sjeff 2005219820Sjeff /* Four possible cases: 2006219820Sjeff * 2007219820Sjeff * 1. is_real_lid == TRUE && is_main_path == TRUE: 2008219820Sjeff * - going DOWN(TRUE,TRUE) through ALL the groups 2009219820Sjeff * + promoting port counter 2010219820Sjeff * + setting path in remote switch fwd tbl 2011219820Sjeff * + setting hops in remote switch on all the ports of each group 2012219820Sjeff * 2013219820Sjeff * 2. is_real_lid == TRUE && is_main_path == FALSE: 2014219820Sjeff * - going DOWN(TRUE,FALSE) through ALL the groups but only if 2015219820Sjeff * the remote (lower) switch hasn't been already configured 2016219820Sjeff * for this target LID 2017219820Sjeff * + NOT promoting port counter 2018219820Sjeff * + setting path in remote switch fwd tbl if it hasn't been set yet 2019219820Sjeff * + setting hops in remote switch on all the ports of each group 2020219820Sjeff * if it hasn't been set yet 2021219820Sjeff * 2022219820Sjeff * 3. is_real_lid == FALSE && is_main_path == TRUE: 2023219820Sjeff * - going DOWN(FALSE,TRUE) through ALL the groups 2024219820Sjeff * + promoting port counter 2025219820Sjeff * + NOT setting path in remote switch fwd tbl 2026219820Sjeff * + NOT setting hops in remote switch 2027219820Sjeff * 2028219820Sjeff * 4. is_real_lid == FALSE && is_main_path == FALSE: 2029219820Sjeff * - illegal state - we shouldn't get here 2030219820Sjeff */ 2031219820Sjeff 2032219820Sjeff /* second case: skip the port group if the remote (lower) 2033219820Sjeff switch has been already configured for this target LID */ 2034219820Sjeff if (is_real_lid && !is_main_path && 2035219820Sjeff p_remote_sw->p_osm_sw->new_lft[cl_ntoh16(target_lid)] != OSM_NO_PATH) 2036219820Sjeff continue; 2037219820Sjeff 2038219820Sjeff /* setting fwd tbl port only if this is real LID */ 2039219820Sjeff if (is_real_lid) { 2040219820Sjeff p_remote_sw->p_osm_sw->new_lft[cl_ntoh16(target_lid)] = 2041219820Sjeff p_min_port->remote_port_num; 2042219820Sjeff OSM_LOG(&p_ftree->p_osm->log, OSM_LOG_DEBUG, 2043219820Sjeff "Switch %s: set path to CA LID %u through port %u\n", 2044219820Sjeff __osm_ftree_tuple_to_str(p_remote_sw->tuple), 2045219820Sjeff cl_ntoh16(target_lid), 2046219820Sjeff p_min_port->remote_port_num); 2047219820Sjeff 2048219820Sjeff /* On the remote switch that is pointed by the p_group, 2049219820Sjeff set hops for ALL the ports in the remote group. */ 2050219820Sjeff 2051219820Sjeff for (j = 0; j < ports_num; j++) { 2052219820Sjeff cl_ptr_vector_at(&p_group->ports, j, 2053219820Sjeff (void *)&p_port); 2054219820Sjeff 2055219820Sjeff __osm_ftree_sw_set_hops(p_remote_sw, 2056219820Sjeff cl_ntoh16(target_lid), 2057219820Sjeff p_port->remote_port_num, 2058219820Sjeff ((target_rank - 2059219820Sjeff highest_rank_in_route) 2060219820Sjeff + (p_remote_sw->rank - 2061219820Sjeff highest_rank_in_route))); 2062219820Sjeff } 2063219820Sjeff 2064219820Sjeff } 2065219820Sjeff 2066219820Sjeff /* The number of upgoing routes is tracked in the 2067219820Sjeff p_port->counter_up counter of the port that belongs to 2068219820Sjeff the upper side of the link (on switch with lower rank). 2069219820Sjeff Counter is promoted only if we're routing LID on the main 2070219820Sjeff path (whether it's a real LID or a dummy one). */ 2071219820Sjeff if (is_main_path) 2072219820Sjeff p_min_port->counter_up++; 2073219820Sjeff 2074219820Sjeff /* Recursion step: 2075219820Sjeff Assign upgoing ports by stepping down, starting on REMOTE switch */ 2076219820Sjeff __osm_ftree_fabric_route_upgoing_by_going_down(p_ftree, p_remote_sw, /* remote switch - used as a route-upgoing alg. start point */ 2077219820Sjeff NULL, /* prev. position - NULL to mark that we went down and not up */ 2078219820Sjeff target_lid, /* LID that we're routing to */ 2079219820Sjeff target_rank, /* rank of the LID that we're routing to */ 2080219820Sjeff is_real_lid, /* whether the target LID is real or dummy */ 2081219820Sjeff is_main_path, /* whether this is path to HCA that should by tracked by counters */ 2082219820Sjeff highest_rank_in_route); /* highest visited point in the tree before going down */ 2083219820Sjeff } 2084219820Sjeff /* done scanning all the down-going port groups */ 2085219820Sjeff 2086219820Sjeff} /* __osm_ftree_fabric_route_upgoing_by_going_down() */ 2087219820Sjeff 2088219820Sjeff/***************************************************/ 2089219820Sjeff 2090219820Sjeff/* 2091219820Sjeff * Function: assign-down-going-port-by-ascending-up 2092219820Sjeff * Given : a switch and a LID 2093219820Sjeff * Pseudo code: 2094219820Sjeff * find the least loaded port of all the upgoing groups (scan in indexing order) 2095219820Sjeff * assign the LFT(LID) of remote switch to that port 2096219820Sjeff * track that port usage 2097219820Sjeff * assign-up-going-port-by-descending-down on CURRENT switch 2098219820Sjeff * assign-down-going-port-by-ascending-up on REMOTE switch (recursion) 2099219820Sjeff */ 2100219820Sjeff 2101219820Sjeffstatic void 2102219820Sjeff__osm_ftree_fabric_route_downgoing_by_going_up(IN ftree_fabric_t * p_ftree, 2103219820Sjeff IN ftree_sw_t * p_sw, 2104219820Sjeff IN ftree_sw_t * p_prev_sw, 2105219820Sjeff IN ib_net16_t target_lid, 2106219820Sjeff IN uint8_t target_rank, 2107219820Sjeff IN boolean_t is_real_lid, 2108219820Sjeff IN boolean_t is_main_path) 2109219820Sjeff{ 2110219820Sjeff ftree_sw_t *p_remote_sw; 2111219820Sjeff uint16_t ports_num; 2112219820Sjeff ftree_port_group_t *p_group; 2113219820Sjeff ftree_port_t *p_port; 2114219820Sjeff ftree_port_group_t *p_min_group; 2115219820Sjeff ftree_port_t *p_min_port; 2116219820Sjeff uint16_t i; 2117219820Sjeff uint16_t j; 2118219820Sjeff 2119219820Sjeff /* we shouldn't enter here if both real_lid and main_path are false */ 2120219820Sjeff CL_ASSERT(is_real_lid || is_main_path); 2121219820Sjeff 2122219820Sjeff /* Assign upgoing ports by stepping down, starting on THIS switch */ 2123219820Sjeff __osm_ftree_fabric_route_upgoing_by_going_down(p_ftree, p_sw, /* local switch - used as a route-upgoing alg. start point */ 2124219820Sjeff p_prev_sw, /* switch that we went up from (NULL means that we went down) */ 2125219820Sjeff target_lid, /* LID that we're routing to */ 2126219820Sjeff target_rank, /* rank of the LID that we're routing to */ 2127219820Sjeff is_real_lid, /* whether this target LID is real or dummy */ 2128219820Sjeff is_main_path, /* whether this path to HCA should by tracked by counters */ 2129219820Sjeff p_sw->rank); /* the highest visited point in the tree before going down */ 2130219820Sjeff 2131219820Sjeff /* recursion stop condition - if it's a root switch, */ 2132219820Sjeff if (p_sw->rank == 0) 2133219820Sjeff return; 2134219820Sjeff 2135219820Sjeff /* Find the least loaded upgoing port group */ 2136219820Sjeff p_min_group = NULL; 2137219820Sjeff for (i = 0; i < p_sw->up_port_groups_num; i++) { 2138219820Sjeff p_group = p_sw->up_port_groups[i]; 2139219820Sjeff if (!p_min_group) { 2140219820Sjeff /* first group that we're checking - use 2141219820Sjeff it as a group with the lowest load */ 2142219820Sjeff p_min_group = p_group; 2143219820Sjeff } else if (p_group->counter_down < p_min_group->counter_down) { 2144219820Sjeff /* this group is less loaded - use it as min */ 2145219820Sjeff p_min_group = p_group; 2146219820Sjeff } 2147219820Sjeff } 2148219820Sjeff 2149219820Sjeff /* Find the least loaded upgoing port in the selected group */ 2150219820Sjeff p_min_port = NULL; 2151219820Sjeff ports_num = (uint16_t) cl_ptr_vector_get_size(&p_min_group->ports); 2152219820Sjeff for (j = 0; j < ports_num; j++) { 2153219820Sjeff cl_ptr_vector_at(&p_min_group->ports, j, (void *)&p_port); 2154219820Sjeff if (!p_min_port) { 2155219820Sjeff /* first port that we're checking - use 2156219820Sjeff it as a port with the lowest load */ 2157219820Sjeff p_min_port = p_port; 2158219820Sjeff } else if (p_port->counter_down < p_min_port->counter_down) { 2159219820Sjeff /* this port is less loaded - use it as min */ 2160219820Sjeff p_min_port = p_port; 2161219820Sjeff } 2162219820Sjeff } 2163219820Sjeff 2164219820Sjeff /* At this point we have selected a group and port with the 2165219820Sjeff lowest load of downgoing routes. 2166219820Sjeff Set on the remote switch how to get to the target_lid - 2167219820Sjeff set LFT(target_lid) on the remote switch to the remote port */ 2168219820Sjeff p_remote_sw = p_min_group->remote_hca_or_sw.p_sw; 2169219820Sjeff 2170219820Sjeff /* Four possible cases: 2171219820Sjeff * 2172219820Sjeff * 1. is_real_lid == TRUE && is_main_path == TRUE: 2173219820Sjeff * - going UP(TRUE,TRUE) on selected min_group and min_port 2174219820Sjeff * + promoting port counter 2175219820Sjeff * + setting path in remote switch fwd tbl 2176219820Sjeff * + setting hops in remote switch on all the ports of selected group 2177219820Sjeff * - going UP(TRUE,FALSE) on rest of the groups, each time on port 0 2178219820Sjeff * + NOT promoting port counter 2179219820Sjeff * + setting path in remote switch fwd tbl if it hasn't been set yet 2180219820Sjeff * + setting hops in remote switch on all the ports of each group 2181219820Sjeff * if it hasn't been set yet 2182219820Sjeff * 2183219820Sjeff * 2. is_real_lid == TRUE && is_main_path == FALSE: 2184219820Sjeff * - going UP(TRUE,FALSE) on ALL the groups, each time on port 0, 2185219820Sjeff * but only if the remote (upper) switch hasn't been already 2186219820Sjeff * configured for this target LID 2187219820Sjeff * + NOT promoting port counter 2188219820Sjeff * + setting path in remote switch fwd tbl if it hasn't been set yet 2189219820Sjeff * + setting hops in remote switch on all the ports of each group 2190219820Sjeff * if it hasn't been set yet 2191219820Sjeff * 2192219820Sjeff * 3. is_real_lid == FALSE && is_main_path == TRUE: 2193219820Sjeff * - going UP(FALSE,TRUE) ONLY on selected min_group and min_port 2194219820Sjeff * + promoting port counter 2195219820Sjeff * + NOT setting path in remote switch fwd tbl 2196219820Sjeff * + NOT setting hops in remote switch 2197219820Sjeff * 2198219820Sjeff * 4. is_real_lid == FALSE && is_main_path == FALSE: 2199219820Sjeff * - illegal state - we shouldn't get here 2200219820Sjeff */ 2201219820Sjeff 2202219820Sjeff /* covering first half of case 1, and case 3 */ 2203219820Sjeff if (is_main_path) { 2204219820Sjeff if (p_sw->is_leaf) { 2205219820Sjeff OSM_LOG(&p_ftree->p_osm->log, OSM_LOG_DEBUG, 2206219820Sjeff " - Routing MAIN path for %s CA LID %u: %s --> %s\n", 2207219820Sjeff (is_real_lid) ? "real" : "DUMMY", 2208219820Sjeff cl_ntoh16(target_lid), 2209219820Sjeff __osm_ftree_tuple_to_str(p_sw->tuple), 2210219820Sjeff __osm_ftree_tuple_to_str(p_remote_sw->tuple)); 2211219820Sjeff } 2212219820Sjeff /* The number of downgoing routes is tracked in the 2213219820Sjeff p_group->counter_down p_port->counter_down counters of the 2214219820Sjeff group and port that belong to the lower side of the link 2215219820Sjeff (on switch with higher rank) */ 2216219820Sjeff p_min_group->counter_down++; 2217219820Sjeff p_min_port->counter_down++; 2218219820Sjeff if (is_real_lid) { 2219219820Sjeff p_remote_sw->p_osm_sw->new_lft[cl_ntoh16(target_lid)] = 2220219820Sjeff p_min_port->remote_port_num; 2221219820Sjeff OSM_LOG(&p_ftree->p_osm->log, OSM_LOG_DEBUG, 2222219820Sjeff "Switch %s: set path to CA LID %u through port %u\n", 2223219820Sjeff __osm_ftree_tuple_to_str(p_remote_sw->tuple), 2224219820Sjeff cl_ntoh16(target_lid), 2225219820Sjeff p_min_port->remote_port_num); 2226219820Sjeff 2227219820Sjeff /* On the remote switch that is pointed by the min_group, 2228219820Sjeff set hops for ALL the ports in the remote group. */ 2229219820Sjeff 2230219820Sjeff ports_num = 2231219820Sjeff (uint16_t) cl_ptr_vector_get_size(&p_min_group-> 2232219820Sjeff ports); 2233219820Sjeff for (j = 0; j < ports_num; j++) { 2234219820Sjeff cl_ptr_vector_at(&p_min_group->ports, j, 2235219820Sjeff (void *)&p_port); 2236219820Sjeff __osm_ftree_sw_set_hops(p_remote_sw, 2237219820Sjeff cl_ntoh16(target_lid), 2238219820Sjeff p_port->remote_port_num, 2239219820Sjeff target_rank - 2240219820Sjeff p_remote_sw->rank); 2241219820Sjeff } 2242219820Sjeff } 2243219820Sjeff 2244219820Sjeff /* Recursion step: 2245219820Sjeff Assign downgoing ports by stepping up, starting on REMOTE switch. */ 2246219820Sjeff __osm_ftree_fabric_route_downgoing_by_going_up(p_ftree, p_remote_sw, /* remote switch - used as a route-downgoing alg. next step point */ 2247219820Sjeff p_sw, /* this switch - prev. position switch for the function */ 2248219820Sjeff target_lid, /* LID that we're routing to */ 2249219820Sjeff target_rank, /* rank of the LID that we're routing to */ 2250219820Sjeff is_real_lid, /* whether this target LID is real or dummy */ 2251219820Sjeff is_main_path); /* whether this is path to HCA that should by tracked by counters */ 2252219820Sjeff } 2253219820Sjeff 2254219820Sjeff /* we're done for the third case */ 2255219820Sjeff if (!is_real_lid) 2256219820Sjeff return; 2257219820Sjeff 2258219820Sjeff /* What's left to do at this point: 2259219820Sjeff * 2260219820Sjeff * 1. is_real_lid == TRUE && is_main_path == TRUE: 2261219820Sjeff * - going UP(TRUE,FALSE) on rest of the groups, each time on port 0, 2262219820Sjeff * but only if the remote (upper) switch hasn't been already 2263219820Sjeff * configured for this target LID 2264219820Sjeff * + NOT promoting port counter 2265219820Sjeff * + setting path in remote switch fwd tbl if it hasn't been set yet 2266219820Sjeff * + setting hops in remote switch on all the ports of each group 2267219820Sjeff * if it hasn't been set yet 2268219820Sjeff * 2269219820Sjeff * 2. is_real_lid == TRUE && is_main_path == FALSE: 2270219820Sjeff * - going UP(TRUE,FALSE) on ALL the groups, each time on port 0, 2271219820Sjeff * but only if the remote (upper) switch hasn't been already 2272219820Sjeff * configured for this target LID 2273219820Sjeff * + NOT promoting port counter 2274219820Sjeff * + setting path in remote switch fwd tbl if it hasn't been set yet 2275219820Sjeff * + setting hops in remote switch on all the ports of each group 2276219820Sjeff * if it hasn't been set yet 2277219820Sjeff * 2278219820Sjeff * These two rules can be rephrased this way: 2279219820Sjeff * - foreach UP port group 2280219820Sjeff * + if remote switch has been set with the target LID 2281219820Sjeff * - skip this port group 2282219820Sjeff * + else 2283219820Sjeff * - select port 0 2284219820Sjeff * - do NOT promote port counter 2285219820Sjeff * - set path in remote switch fwd tbl 2286219820Sjeff * - set hops in remote switch on all the ports of this group 2287219820Sjeff * - go UP(TRUE,FALSE) to the remote switch 2288219820Sjeff */ 2289219820Sjeff 2290219820Sjeff for (i = 0; i < p_sw->up_port_groups_num; i++) { 2291219820Sjeff p_group = p_sw->up_port_groups[i]; 2292219820Sjeff p_remote_sw = p_group->remote_hca_or_sw.p_sw; 2293219820Sjeff 2294219820Sjeff /* skip if target lid has been already set on remote switch fwd tbl */ 2295219820Sjeff if (p_remote_sw->p_osm_sw->new_lft[cl_ntoh16(target_lid)] != OSM_NO_PATH) 2296219820Sjeff continue; 2297219820Sjeff 2298219820Sjeff if (p_sw->is_leaf) { 2299219820Sjeff OSM_LOG(&p_ftree->p_osm->log, OSM_LOG_DEBUG, 2300219820Sjeff " - Routing SECONDARY path for LID %u: %s --> %s\n", 2301219820Sjeff cl_ntoh16(target_lid), 2302219820Sjeff __osm_ftree_tuple_to_str(p_sw->tuple), 2303219820Sjeff __osm_ftree_tuple_to_str(p_remote_sw->tuple)); 2304219820Sjeff } 2305219820Sjeff 2306219820Sjeff /* Routing REAL lids on SECONDARY path means routing 2307219820Sjeff switch-to-switch or switch-to-CA paths. 2308219820Sjeff We can safely assume that switch will initiate very 2309219820Sjeff few traffic, so there's no point waisting runtime on 2310219820Sjeff trying to balance these routes - always pick port 0. */ 2311219820Sjeff 2312219820Sjeff cl_ptr_vector_at(&p_group->ports, 0, (void *)&p_port); 2313219820Sjeff p_remote_sw->p_osm_sw->new_lft[cl_ntoh16(target_lid)] = 2314219820Sjeff p_port->remote_port_num; 2315219820Sjeff 2316219820Sjeff /* On the remote switch that is pointed by the p_group, 2317219820Sjeff set hops for ALL the ports in the remote group. */ 2318219820Sjeff 2319219820Sjeff ports_num = (uint16_t) cl_ptr_vector_get_size(&p_group->ports); 2320219820Sjeff for (j = 0; j < ports_num; j++) { 2321219820Sjeff cl_ptr_vector_at(&p_group->ports, j, (void *)&p_port); 2322219820Sjeff 2323219820Sjeff __osm_ftree_sw_set_hops(p_remote_sw, 2324219820Sjeff cl_ntoh16(target_lid), 2325219820Sjeff p_port->remote_port_num, 2326219820Sjeff target_rank - 2327219820Sjeff p_remote_sw->rank); 2328219820Sjeff } 2329219820Sjeff 2330219820Sjeff /* Recursion step: 2331219820Sjeff Assign downgoing ports by stepping up, starting on REMOTE switch. */ 2332219820Sjeff __osm_ftree_fabric_route_downgoing_by_going_up(p_ftree, p_remote_sw, /* remote switch - used as a route-downgoing alg. next step point */ 2333219820Sjeff p_sw, /* this switch - prev. position switch for the function */ 2334219820Sjeff target_lid, /* LID that we're routing to */ 2335219820Sjeff target_rank, /* rank of the LID that we're routing to */ 2336219820Sjeff TRUE, /* whether the target LID is real or dummy */ 2337219820Sjeff FALSE); /* whether this is path to HCA that should by tracked by counters */ 2338219820Sjeff } 2339219820Sjeff 2340219820Sjeff} /* ftree_fabric_route_downgoing_by_going_up() */ 2341219820Sjeff 2342219820Sjeff/***************************************************/ 2343219820Sjeff 2344219820Sjeff/* 2345219820Sjeff * Pseudo code: 2346219820Sjeff * foreach leaf switch (in indexing order) 2347219820Sjeff * for each compute node (in indexing order) 2348219820Sjeff * obtain the LID of the compute node 2349219820Sjeff * set local LFT(LID) of the port connecting to compute node 2350219820Sjeff * call assign-down-going-port-by-ascending-up(TRUE,TRUE) on CURRENT switch 2351219820Sjeff * for each MISSING compute node 2352219820Sjeff * call assign-down-going-port-by-ascending-up(FALSE,TRUE) on CURRENT switch 2353219820Sjeff */ 2354219820Sjeff 2355219820Sjeffstatic void __osm_ftree_fabric_route_to_cns(IN ftree_fabric_t * p_ftree) 2356219820Sjeff{ 2357219820Sjeff ftree_sw_t *p_sw; 2358219820Sjeff ftree_hca_t *p_hca; 2359219820Sjeff ftree_port_group_t *p_leaf_port_group; 2360219820Sjeff ftree_port_group_t *p_hca_port_group; 2361219820Sjeff ftree_port_t *p_port; 2362219820Sjeff uint32_t i; 2363219820Sjeff uint32_t j; 2364219820Sjeff ib_net16_t hca_lid; 2365219820Sjeff unsigned routed_targets_on_leaf; 2366219820Sjeff 2367219820Sjeff OSM_LOG_ENTER(&p_ftree->p_osm->log); 2368219820Sjeff 2369219820Sjeff /* for each leaf switch (in indexing order) */ 2370219820Sjeff for (i = 0; i < p_ftree->leaf_switches_num; i++) { 2371219820Sjeff p_sw = p_ftree->leaf_switches[i]; 2372219820Sjeff routed_targets_on_leaf = 0; 2373219820Sjeff 2374219820Sjeff /* for each HCA connected to this switch */ 2375219820Sjeff for (j = 0; j < p_sw->down_port_groups_num; j++) { 2376219820Sjeff p_leaf_port_group = p_sw->down_port_groups[j]; 2377219820Sjeff 2378219820Sjeff /* work with this port group only if the remote node is CA */ 2379219820Sjeff if (p_leaf_port_group->remote_node_type != 2380219820Sjeff IB_NODE_TYPE_CA) 2381219820Sjeff continue; 2382219820Sjeff 2383219820Sjeff p_hca = p_leaf_port_group->remote_hca_or_sw.p_hca; 2384219820Sjeff 2385219820Sjeff /* work with this port group only if remote HCA has CNs */ 2386219820Sjeff if (!p_hca->cn_num) 2387219820Sjeff continue; 2388219820Sjeff 2389219820Sjeff p_hca_port_group = 2390219820Sjeff __osm_ftree_hca_get_port_group_by_remote_lid(p_hca, 2391219820Sjeff p_leaf_port_group-> 2392219820Sjeff base_lid); 2393219820Sjeff CL_ASSERT(p_hca_port_group); 2394219820Sjeff 2395219820Sjeff /* work with this port group only if remote port is CN */ 2396219820Sjeff if (!p_hca_port_group->is_cn) 2397219820Sjeff continue; 2398219820Sjeff 2399219820Sjeff /* obtain the LID of HCA port */ 2400219820Sjeff hca_lid = p_leaf_port_group->remote_base_lid; 2401219820Sjeff 2402219820Sjeff /* set local LFT(LID) to the port that is connected to HCA */ 2403219820Sjeff cl_ptr_vector_at(&p_leaf_port_group->ports, 0, 2404219820Sjeff (void *)&p_port); 2405219820Sjeff p_sw->p_osm_sw->new_lft[cl_ntoh16(hca_lid)] = p_port->port_num; 2406219820Sjeff 2407219820Sjeff OSM_LOG(&p_ftree->p_osm->log, OSM_LOG_DEBUG, 2408219820Sjeff "Switch %s: set path to CN LID %u through port %u\n", 2409219820Sjeff __osm_ftree_tuple_to_str(p_sw->tuple), 2410219820Sjeff cl_ntoh16(hca_lid), p_port->port_num); 2411219820Sjeff 2412219820Sjeff /* set local min hop table(LID) to route to the CA */ 2413219820Sjeff __osm_ftree_sw_set_hops(p_sw, 2414219820Sjeff cl_ntoh16(hca_lid), 2415219820Sjeff p_port->port_num, 1); 2416219820Sjeff 2417219820Sjeff /* Assign downgoing ports by stepping up. 2418219820Sjeff Since we're routing here only CNs, we're routing it as REAL 2419219820Sjeff LID and updating fat-tree balancing counters. */ 2420219820Sjeff __osm_ftree_fabric_route_downgoing_by_going_up(p_ftree, p_sw, /* local switch - used as a route-downgoing alg. start point */ 2421219820Sjeff NULL, /* prev. position switch */ 2422219820Sjeff hca_lid, /* LID that we're routing to */ 2423219820Sjeff p_sw->rank + 1, /* rank of the LID that we're routing to */ 2424219820Sjeff TRUE, /* whether this HCA LID is real or dummy */ 2425219820Sjeff TRUE); /* whether this path to HCA should by tracked by counters */ 2426219820Sjeff 2427219820Sjeff /* count how many real targets have been routed from this leaf switch */ 2428219820Sjeff routed_targets_on_leaf++; 2429219820Sjeff } 2430219820Sjeff 2431219820Sjeff /* We're done with the real targets (all CNs) of this leaf switch. 2432219820Sjeff Now route the dummy HCAs that are missing or that are non-CNs. 2433219820Sjeff When routing to dummy HCAs we don't fill lid matrices. */ 2434219820Sjeff 2435219820Sjeff if (p_ftree->max_cn_per_leaf > routed_targets_on_leaf) { 2436219820Sjeff OSM_LOG(&p_ftree->p_osm->log, OSM_LOG_DEBUG, 2437219820Sjeff "Routing %u dummy CAs\n", 2438219820Sjeff p_ftree->max_cn_per_leaf - 2439219820Sjeff p_sw->down_port_groups_num); 2440219820Sjeff for (j = 0; 2441219820Sjeff ((int)j) < 2442219820Sjeff (p_ftree->max_cn_per_leaf - 2443219820Sjeff routed_targets_on_leaf); j++) { 2444219820Sjeff /* assign downgoing ports by stepping up */ 2445219820Sjeff __osm_ftree_fabric_route_downgoing_by_going_up(p_ftree, p_sw, /* local switch - used as a route-downgoing alg. start point */ 2446219820Sjeff NULL, /* prev. position switch */ 2447219820Sjeff 0, /* LID that we're routing to - ignored for dummy HCA */ 2448219820Sjeff 0, /* rank of the LID that we're routing to - ignored for dummy HCA */ 2449219820Sjeff FALSE, /* whether this HCA LID is real or dummy */ 2450219820Sjeff TRUE); /* whether this path to HCA should by tracked by counters */ 2451219820Sjeff } 2452219820Sjeff } 2453219820Sjeff } 2454219820Sjeff /* done going through all the leaf switches */ 2455219820Sjeff OSM_LOG_EXIT(&p_ftree->p_osm->log); 2456219820Sjeff} /* __osm_ftree_fabric_route_to_cns() */ 2457219820Sjeff 2458219820Sjeff/***************************************************/ 2459219820Sjeff 2460219820Sjeff/* 2461219820Sjeff * Pseudo code: 2462219820Sjeff * foreach HCA non-CN port in fabric 2463219820Sjeff * obtain the LID of the HCA port 2464219820Sjeff * get switch that is connected to this HCA port 2465219820Sjeff * set switch LFT(LID) to the port connecting to compute node 2466219820Sjeff * call assign-down-going-port-by-ascending-up(TRUE,FALSE) on CURRENT switch 2467219820Sjeff * 2468219820Sjeff * Routing to these HCAs is routing a REAL hca lid on SECONDARY path. 2469219820Sjeff * However, we do want to allow load-leveling of the traffic to the non-CNs, 2470219820Sjeff * because such nodes may include IO nodes with heavy usage 2471219820Sjeff * - we should set fwd tables 2472219820Sjeff * - we should update port counters 2473219820Sjeff * Routing to non-CNs is done after routing to CNs, so updated port 2474219820Sjeff * counters will not affect CN-to-CN routing. 2475219820Sjeff */ 2476219820Sjeff 2477219820Sjeffstatic void __osm_ftree_fabric_route_to_non_cns(IN ftree_fabric_t * p_ftree) 2478219820Sjeff{ 2479219820Sjeff ftree_sw_t *p_sw; 2480219820Sjeff ftree_hca_t *p_hca; 2481219820Sjeff ftree_hca_t *p_next_hca; 2482219820Sjeff ftree_port_t *p_hca_port; 2483219820Sjeff ftree_port_group_t *p_hca_port_group; 2484219820Sjeff ib_net16_t hca_lid; 2485219820Sjeff unsigned port_num_on_switch; 2486219820Sjeff unsigned i; 2487219820Sjeff 2488219820Sjeff OSM_LOG_ENTER(&p_ftree->p_osm->log); 2489219820Sjeff 2490219820Sjeff p_next_hca = (ftree_hca_t *) cl_qmap_head(&p_ftree->hca_tbl); 2491219820Sjeff while (p_next_hca != (ftree_hca_t *) cl_qmap_end(&p_ftree->hca_tbl)) { 2492219820Sjeff p_hca = p_next_hca; 2493219820Sjeff p_next_hca = (ftree_hca_t *) cl_qmap_next(&p_hca->map_item); 2494219820Sjeff 2495219820Sjeff for (i = 0; i < p_hca->up_port_groups_num; i++) { 2496219820Sjeff p_hca_port_group = p_hca->up_port_groups[i]; 2497219820Sjeff 2498219820Sjeff /* skip this port if it's CN, in which case it has been already routed */ 2499219820Sjeff if (p_hca_port_group->is_cn) 2500219820Sjeff continue; 2501219820Sjeff 2502219820Sjeff /* skip this port if it is not connected to switch */ 2503219820Sjeff if (p_hca_port_group->remote_node_type != 2504219820Sjeff IB_NODE_TYPE_SWITCH) 2505219820Sjeff continue; 2506219820Sjeff 2507219820Sjeff p_sw = p_hca_port_group->remote_hca_or_sw.p_sw; 2508219820Sjeff hca_lid = p_hca_port_group->base_lid; 2509219820Sjeff 2510219820Sjeff /* set switches LFT(LID) to the port that is connected to HCA */ 2511219820Sjeff cl_ptr_vector_at(&p_hca_port_group->ports, 0, 2512219820Sjeff (void *)&p_hca_port); 2513219820Sjeff port_num_on_switch = p_hca_port->remote_port_num; 2514219820Sjeff p_sw->p_osm_sw->new_lft[cl_ntoh16(hca_lid)] = port_num_on_switch; 2515219820Sjeff 2516219820Sjeff OSM_LOG(&p_ftree->p_osm->log, OSM_LOG_DEBUG, 2517219820Sjeff "Switch %s: set path to non-CN HCA LID %u through port %u\n", 2518219820Sjeff __osm_ftree_tuple_to_str(p_sw->tuple), 2519219820Sjeff cl_ntoh16(hca_lid), port_num_on_switch); 2520219820Sjeff 2521219820Sjeff /* set local min hop table(LID) to route to the CA */ 2522219820Sjeff __osm_ftree_sw_set_hops(p_sw, cl_ntoh16(hca_lid), 2523219820Sjeff port_num_on_switch, /* port num */ 2524219820Sjeff 1); /* hops */ 2525219820Sjeff 2526219820Sjeff /* Assign downgoing ports by stepping up. 2527219820Sjeff We're routing REAL targets. They are not CNs and not included 2528219820Sjeff in the leafs array, but we treat them as MAIN path to allow load 2529219820Sjeff leveling, which means that the counters will be updated. */ 2530219820Sjeff __osm_ftree_fabric_route_downgoing_by_going_up(p_ftree, p_sw, /* local switch - used as a route-downgoing alg. start point */ 2531219820Sjeff NULL, /* prev. position switch */ 2532219820Sjeff hca_lid, /* LID that we're routing to */ 2533219820Sjeff p_sw->rank + 1, /* rank of the LID that we're routing to */ 2534219820Sjeff TRUE, /* whether this HCA LID is real or dummy */ 2535219820Sjeff TRUE); /* whether this path to HCA should by tracked by counters */ 2536219820Sjeff } 2537219820Sjeff /* done with all the port groups of this HCA - go to next HCA */ 2538219820Sjeff } 2539219820Sjeff 2540219820Sjeff OSM_LOG_EXIT(&p_ftree->p_osm->log); 2541219820Sjeff} /* __osm_ftree_fabric_route_to_non_cns() */ 2542219820Sjeff 2543219820Sjeff/***************************************************/ 2544219820Sjeff 2545219820Sjeff/* 2546219820Sjeff * Pseudo code: 2547219820Sjeff * foreach switch in fabric 2548219820Sjeff * obtain its LID 2549219820Sjeff * set local LFT(LID) to port 0 2550219820Sjeff * call assign-down-going-port-by-ascending-up(TRUE,FALSE) on CURRENT switch 2551219820Sjeff * 2552219820Sjeff * Routing to switch is similar to routing a REAL hca lid on SECONDARY path: 2553219820Sjeff * - we should set fwd tables 2554219820Sjeff * - we should NOT update port counters 2555219820Sjeff */ 2556219820Sjeff 2557219820Sjeffstatic void __osm_ftree_fabric_route_to_switches(IN ftree_fabric_t * p_ftree) 2558219820Sjeff{ 2559219820Sjeff ftree_sw_t *p_sw; 2560219820Sjeff ftree_sw_t *p_next_sw; 2561219820Sjeff 2562219820Sjeff OSM_LOG_ENTER(&p_ftree->p_osm->log); 2563219820Sjeff 2564219820Sjeff p_next_sw = (ftree_sw_t *) cl_qmap_head(&p_ftree->sw_tbl); 2565219820Sjeff while (p_next_sw != (ftree_sw_t *) cl_qmap_end(&p_ftree->sw_tbl)) { 2566219820Sjeff p_sw = p_next_sw; 2567219820Sjeff p_next_sw = (ftree_sw_t *) cl_qmap_next(&p_sw->map_item); 2568219820Sjeff 2569219820Sjeff /* set local LFT(LID) to 0 (route to itself) */ 2570219820Sjeff p_sw->p_osm_sw->new_lft[cl_ntoh16(p_sw->base_lid)] = 0; 2571219820Sjeff 2572219820Sjeff OSM_LOG(&p_ftree->p_osm->log, OSM_LOG_DEBUG, 2573219820Sjeff "Switch %s (LID %u): routing switch-to-switch paths\n", 2574219820Sjeff __osm_ftree_tuple_to_str(p_sw->tuple), 2575219820Sjeff cl_ntoh16(p_sw->base_lid)); 2576219820Sjeff 2577219820Sjeff /* set min hop table of the switch to itself */ 2578219820Sjeff __osm_ftree_sw_set_hops(p_sw, cl_ntoh16(p_sw->base_lid), 2579219820Sjeff 0, /* port_num */ 2580219820Sjeff 0); /* hops */ 2581219820Sjeff 2582219820Sjeff __osm_ftree_fabric_route_downgoing_by_going_up(p_ftree, p_sw, /* local switch - used as a route-downgoing alg. start point */ 2583219820Sjeff NULL, /* prev. position switch */ 2584219820Sjeff p_sw->base_lid, /* LID that we're routing to */ 2585219820Sjeff p_sw->rank, /* rank of the LID that we're routing to */ 2586219820Sjeff TRUE, /* whether the target LID is a real or dummy */ 2587219820Sjeff FALSE); /* whether this path should by tracked by counters */ 2588219820Sjeff } 2589219820Sjeff 2590219820Sjeff OSM_LOG_EXIT(&p_ftree->p_osm->log); 2591219820Sjeff} /* __osm_ftree_fabric_route_to_switches() */ 2592219820Sjeff 2593219820Sjeff/*************************************************** 2594219820Sjeff ***************************************************/ 2595219820Sjeff 2596219820Sjeffstatic int __osm_ftree_fabric_populate_nodes(IN ftree_fabric_t * p_ftree) 2597219820Sjeff{ 2598219820Sjeff osm_node_t *p_osm_node; 2599219820Sjeff osm_node_t *p_next_osm_node; 2600219820Sjeff 2601219820Sjeff OSM_LOG_ENTER(&p_ftree->p_osm->log); 2602219820Sjeff 2603219820Sjeff p_next_osm_node = 2604219820Sjeff (osm_node_t *) cl_qmap_head(&p_ftree->p_osm->subn.node_guid_tbl); 2605219820Sjeff while (p_next_osm_node != 2606219820Sjeff (osm_node_t *) cl_qmap_end(&p_ftree->p_osm->subn. 2607219820Sjeff node_guid_tbl)) { 2608219820Sjeff p_osm_node = p_next_osm_node; 2609219820Sjeff p_next_osm_node = 2610219820Sjeff (osm_node_t *) cl_qmap_next(&p_osm_node->map_item); 2611219820Sjeff switch (osm_node_get_type(p_osm_node)) { 2612219820Sjeff case IB_NODE_TYPE_CA: 2613219820Sjeff __osm_ftree_fabric_add_hca(p_ftree, p_osm_node); 2614219820Sjeff break; 2615219820Sjeff case IB_NODE_TYPE_ROUTER: 2616219820Sjeff break; 2617219820Sjeff case IB_NODE_TYPE_SWITCH: 2618219820Sjeff __osm_ftree_fabric_add_sw(p_ftree, p_osm_node->sw); 2619219820Sjeff break; 2620219820Sjeff default: 2621219820Sjeff OSM_LOG(&p_ftree->p_osm->log, OSM_LOG_ERROR, "ERR AB0E: " 2622219820Sjeff "Node GUID 0x%016" PRIx64 2623219820Sjeff " - Unknown node type: %s\n", 2624219820Sjeff cl_ntoh64(osm_node_get_node_guid(p_osm_node)), 2625219820Sjeff ib_get_node_type_str(osm_node_get_type 2626219820Sjeff (p_osm_node))); 2627219820Sjeff OSM_LOG_EXIT(&p_ftree->p_osm->log); 2628219820Sjeff return -1; 2629219820Sjeff } 2630219820Sjeff } 2631219820Sjeff 2632219820Sjeff OSM_LOG_EXIT(&p_ftree->p_osm->log); 2633219820Sjeff return 0; 2634219820Sjeff} /* __osm_ftree_fabric_populate_nodes() */ 2635219820Sjeff 2636219820Sjeff/*************************************************** 2637219820Sjeff ***************************************************/ 2638219820Sjeff 2639219820Sjeffstatic boolean_t __osm_ftree_sw_update_rank(IN ftree_sw_t * p_sw, 2640219820Sjeff IN uint32_t new_rank) 2641219820Sjeff{ 2642219820Sjeff if (__osm_ftree_sw_ranked(p_sw) && p_sw->rank <= new_rank) 2643219820Sjeff return FALSE; 2644219820Sjeff p_sw->rank = new_rank; 2645219820Sjeff return TRUE; 2646219820Sjeff 2647219820Sjeff} 2648219820Sjeff 2649219820Sjeff/***************************************************/ 2650219820Sjeff 2651219820Sjeffstatic void 2652219820Sjeff__osm_ftree_rank_switches_from_leafs(IN ftree_fabric_t * p_ftree, 2653219820Sjeff IN cl_list_t * p_ranking_bfs_list) 2654219820Sjeff{ 2655219820Sjeff ftree_sw_t *p_sw; 2656219820Sjeff ftree_sw_t *p_remote_sw; 2657219820Sjeff osm_node_t *p_node; 2658219820Sjeff osm_node_t *p_remote_node; 2659219820Sjeff osm_physp_t *p_osm_port; 2660219820Sjeff uint8_t i; 2661219820Sjeff unsigned max_rank = 0; 2662219820Sjeff 2663219820Sjeff while (!cl_is_list_empty(p_ranking_bfs_list)) { 2664219820Sjeff p_sw = (ftree_sw_t *) cl_list_remove_head(p_ranking_bfs_list); 2665219820Sjeff p_node = p_sw->p_osm_sw->p_node; 2666219820Sjeff 2667219820Sjeff /* note: skipping port 0 on switches */ 2668219820Sjeff for (i = 1; i < osm_node_get_num_physp(p_node); i++) { 2669219820Sjeff p_osm_port = osm_node_get_physp_ptr(p_node, i); 2670219820Sjeff if (!p_osm_port || !osm_link_is_healthy(p_osm_port)) 2671219820Sjeff continue; 2672219820Sjeff 2673219820Sjeff p_remote_node = 2674219820Sjeff osm_node_get_remote_node(p_node, i, NULL); 2675219820Sjeff if (!p_remote_node) 2676219820Sjeff continue; 2677219820Sjeff if (osm_node_get_type(p_remote_node) != 2678219820Sjeff IB_NODE_TYPE_SWITCH) 2679219820Sjeff continue; 2680219820Sjeff 2681219820Sjeff p_remote_sw = __osm_ftree_fabric_get_sw_by_guid(p_ftree, 2682219820Sjeff osm_node_get_node_guid 2683219820Sjeff (p_remote_node)); 2684219820Sjeff if (!p_remote_sw) { 2685219820Sjeff /* remote node is not a switch */ 2686219820Sjeff continue; 2687219820Sjeff } 2688219820Sjeff 2689219820Sjeff /* if needed, rank the remote switch and add it to the BFS list */ 2690219820Sjeff if (__osm_ftree_sw_update_rank 2691219820Sjeff (p_remote_sw, p_sw->rank + 1)) { 2692219820Sjeff max_rank = p_remote_sw->rank; 2693219820Sjeff cl_list_insert_tail(p_ranking_bfs_list, 2694219820Sjeff p_remote_sw); 2695219820Sjeff } 2696219820Sjeff } 2697219820Sjeff } 2698219820Sjeff 2699219820Sjeff /* set FatTree maximal switch rank */ 2700219820Sjeff p_ftree->max_switch_rank = max_rank; 2701219820Sjeff 2702219820Sjeff} /* __osm_ftree_rank_switches_from_leafs() */ 2703219820Sjeff 2704219820Sjeff/***************************************************/ 2705219820Sjeff 2706219820Sjeffstatic int 2707219820Sjeff__osm_ftree_rank_leaf_switches(IN ftree_fabric_t * p_ftree, 2708219820Sjeff IN ftree_hca_t * p_hca, 2709219820Sjeff IN cl_list_t * p_ranking_bfs_list) 2710219820Sjeff{ 2711219820Sjeff ftree_sw_t *p_sw; 2712219820Sjeff osm_node_t *p_osm_node = p_hca->p_osm_node; 2713219820Sjeff osm_node_t *p_remote_osm_node; 2714219820Sjeff osm_physp_t *p_osm_port; 2715219820Sjeff static uint8_t i = 0; 2716219820Sjeff int res = 0; 2717219820Sjeff 2718219820Sjeff OSM_LOG_ENTER(&p_ftree->p_osm->log); 2719219820Sjeff 2720219820Sjeff for (i = 0; i < osm_node_get_num_physp(p_osm_node); i++) { 2721219820Sjeff p_osm_port = osm_node_get_physp_ptr(p_osm_node, i); 2722219820Sjeff if (!p_osm_port || !osm_link_is_healthy(p_osm_port)) 2723219820Sjeff continue; 2724219820Sjeff 2725219820Sjeff p_remote_osm_node = 2726219820Sjeff osm_node_get_remote_node(p_osm_node, i, NULL); 2727219820Sjeff if (!p_remote_osm_node) 2728219820Sjeff continue; 2729219820Sjeff 2730219820Sjeff switch (osm_node_get_type(p_remote_osm_node)) { 2731219820Sjeff case IB_NODE_TYPE_CA: 2732219820Sjeff /* HCA connected directly to another HCA - not FatTree */ 2733219820Sjeff OSM_LOG(&p_ftree->p_osm->log, OSM_LOG_ERROR, "ERR AB0F: " 2734219820Sjeff "CA conected directly to another CA: " 2735219820Sjeff "0x%016" PRIx64 " <---> 0x%016" PRIx64 "\n", 2736219820Sjeff __osm_ftree_hca_get_guid_ho(p_hca), 2737219820Sjeff cl_ntoh64(osm_node_get_node_guid 2738219820Sjeff (p_remote_osm_node))); 2739219820Sjeff res = -1; 2740219820Sjeff goto Exit; 2741219820Sjeff 2742219820Sjeff case IB_NODE_TYPE_ROUTER: 2743219820Sjeff /* leaving this port - proceeding to the next one */ 2744219820Sjeff continue; 2745219820Sjeff 2746219820Sjeff case IB_NODE_TYPE_SWITCH: 2747219820Sjeff /* continue with this port */ 2748219820Sjeff break; 2749219820Sjeff 2750219820Sjeff default: 2751219820Sjeff OSM_LOG(&p_ftree->p_osm->log, OSM_LOG_ERROR, 2752219820Sjeff "ERR AB10: Node GUID 0x%016" PRIx64 2753219820Sjeff " - Unknown node type: %s\n", 2754219820Sjeff cl_ntoh64(osm_node_get_node_guid 2755219820Sjeff (p_remote_osm_node)), 2756219820Sjeff ib_get_node_type_str(osm_node_get_type 2757219820Sjeff (p_remote_osm_node))); 2758219820Sjeff res = -1; 2759219820Sjeff goto Exit; 2760219820Sjeff } 2761219820Sjeff 2762219820Sjeff /* remote node is switch */ 2763219820Sjeff 2764219820Sjeff p_sw = __osm_ftree_fabric_get_sw_by_guid(p_ftree, 2765219820Sjeff osm_node_get_node_guid 2766219820Sjeff (p_osm_port-> 2767219820Sjeff p_remote_physp-> 2768219820Sjeff p_node)); 2769219820Sjeff CL_ASSERT(p_sw); 2770219820Sjeff 2771219820Sjeff /* if needed, rank the remote switch and add it to the BFS list */ 2772219820Sjeff 2773219820Sjeff if (!__osm_ftree_sw_update_rank(p_sw, 0)) 2774219820Sjeff continue; 2775219820Sjeff OSM_LOG(&p_ftree->p_osm->log, OSM_LOG_DEBUG, 2776219820Sjeff "Marking rank of switch that is directly connected to CA:\n" 2777219820Sjeff " - CA guid : 0x%016" 2778219820Sjeff PRIx64 "\n" 2779219820Sjeff " - Switch guid: 0x%016" 2780219820Sjeff PRIx64 "\n" 2781219820Sjeff " - Switch LID : %u\n", 2782219820Sjeff __osm_ftree_hca_get_guid_ho(p_hca), 2783219820Sjeff __osm_ftree_sw_get_guid_ho(p_sw), 2784219820Sjeff cl_ntoh16(p_sw->base_lid)); 2785219820Sjeff cl_list_insert_tail(p_ranking_bfs_list, p_sw); 2786219820Sjeff } 2787219820Sjeff 2788219820SjeffExit: 2789219820Sjeff OSM_LOG_EXIT(&p_ftree->p_osm->log); 2790219820Sjeff return res; 2791219820Sjeff} /* __osm_ftree_rank_leaf_switches() */ 2792219820Sjeff 2793219820Sjeff/***************************************************/ 2794219820Sjeff 2795219820Sjeffstatic void __osm_ftree_sw_reverse_rank(IN cl_map_item_t * const p_map_item, 2796219820Sjeff IN void *context) 2797219820Sjeff{ 2798219820Sjeff ftree_fabric_t *p_ftree = (ftree_fabric_t *) context; 2799219820Sjeff ftree_sw_t *p_sw = (ftree_sw_t * const)p_map_item; 2800219820Sjeff p_sw->rank = p_ftree->max_switch_rank - p_sw->rank; 2801219820Sjeff} 2802219820Sjeff 2803219820Sjeff/*************************************************** 2804219820Sjeff ***************************************************/ 2805219820Sjeff 2806219820Sjeffstatic int 2807219820Sjeff__osm_ftree_fabric_construct_hca_ports(IN ftree_fabric_t * p_ftree, 2808219820Sjeff IN ftree_hca_t * p_hca) 2809219820Sjeff{ 2810219820Sjeff ftree_sw_t *p_remote_sw; 2811219820Sjeff osm_node_t *p_node = p_hca->p_osm_node; 2812219820Sjeff osm_node_t *p_remote_node; 2813219820Sjeff uint8_t remote_node_type; 2814219820Sjeff ib_net64_t remote_node_guid; 2815219820Sjeff osm_physp_t *p_remote_osm_port; 2816219820Sjeff uint8_t i; 2817219820Sjeff uint8_t remote_port_num; 2818219820Sjeff boolean_t is_cn = FALSE; 2819219820Sjeff int res = 0; 2820219820Sjeff 2821219820Sjeff for (i = 0; i < osm_node_get_num_physp(p_node); i++) { 2822219820Sjeff osm_physp_t *p_osm_port = osm_node_get_physp_ptr(p_node, i); 2823219820Sjeff if (!p_osm_port || !osm_link_is_healthy(p_osm_port)) 2824219820Sjeff continue; 2825219820Sjeff 2826219820Sjeff p_remote_osm_port = osm_physp_get_remote(p_osm_port); 2827219820Sjeff p_remote_node = 2828219820Sjeff osm_node_get_remote_node(p_node, i, &remote_port_num); 2829219820Sjeff 2830219820Sjeff if (!p_remote_osm_port) 2831219820Sjeff continue; 2832219820Sjeff 2833219820Sjeff remote_node_type = osm_node_get_type(p_remote_node); 2834219820Sjeff remote_node_guid = osm_node_get_node_guid(p_remote_node); 2835219820Sjeff 2836219820Sjeff switch (remote_node_type) { 2837219820Sjeff case IB_NODE_TYPE_ROUTER: 2838219820Sjeff /* leaving this port - proceeding to the next one */ 2839219820Sjeff continue; 2840219820Sjeff 2841219820Sjeff case IB_NODE_TYPE_CA: 2842219820Sjeff /* HCA connected directly to another HCA - not FatTree */ 2843219820Sjeff OSM_LOG(&p_ftree->p_osm->log, OSM_LOG_ERROR, "ERR AB11: " 2844219820Sjeff "CA conected directly to another CA: " 2845219820Sjeff "0x%016" PRIx64 " <---> 0x%016" PRIx64 "\n", 2846219820Sjeff cl_ntoh64(osm_node_get_node_guid(p_node)), 2847219820Sjeff cl_ntoh64(remote_node_guid)); 2848219820Sjeff res = -1; 2849219820Sjeff goto Exit; 2850219820Sjeff 2851219820Sjeff case IB_NODE_TYPE_SWITCH: 2852219820Sjeff /* continue with this port */ 2853219820Sjeff break; 2854219820Sjeff 2855219820Sjeff default: 2856219820Sjeff OSM_LOG(&p_ftree->p_osm->log, OSM_LOG_ERROR, 2857219820Sjeff "ERR AB12: Node GUID 0x%016" PRIx64 2858219820Sjeff " - Unknown node type: %s\n", 2859219820Sjeff cl_ntoh64(remote_node_guid), 2860219820Sjeff ib_get_node_type_str(remote_node_type)); 2861219820Sjeff res = -1; 2862219820Sjeff goto Exit; 2863219820Sjeff } 2864219820Sjeff 2865219820Sjeff /* remote node is switch */ 2866219820Sjeff 2867219820Sjeff p_remote_sw = 2868219820Sjeff __osm_ftree_fabric_get_sw_by_guid(p_ftree, 2869219820Sjeff remote_node_guid); 2870219820Sjeff CL_ASSERT(p_remote_sw); 2871219820Sjeff 2872219820Sjeff /* If CN file is not supplied, then all the CAs considered as Compute Nodes. 2873219820Sjeff Otherwise all the CAs are not CNs, and only guids that are present in the 2874219820Sjeff CN file will be marked as compute nodes. */ 2875219820Sjeff if (!__osm_ftree_fabric_cns_provided(p_ftree)) { 2876219820Sjeff is_cn = TRUE; 2877219820Sjeff } else { 2878219820Sjeff name_map_item_t *p_elem = 2879219820Sjeff (name_map_item_t *) cl_qmap_get(&p_ftree-> 2880219820Sjeff cn_guid_tbl, 2881219820Sjeff cl_ntoh64(osm_physp_get_port_guid 2882219820Sjeff (p_osm_port))); 2883219820Sjeff if (p_elem != 2884219820Sjeff (name_map_item_t *) cl_qmap_end(&p_ftree-> 2885219820Sjeff cn_guid_tbl)) 2886219820Sjeff is_cn = TRUE; 2887219820Sjeff } 2888219820Sjeff 2889219820Sjeff if (is_cn) { 2890219820Sjeff p_ftree->cn_num++; 2891219820Sjeff p_hca->cn_num++; 2892219820Sjeff OSM_LOG(&p_ftree->p_osm->log, OSM_LOG_DEBUG, 2893219820Sjeff "Marking CN port GUID 0x%016" PRIx64 "\n", 2894219820Sjeff cl_ntoh64(osm_physp_get_port_guid(p_osm_port))); 2895219820Sjeff } else { 2896219820Sjeff OSM_LOG(&p_ftree->p_osm->log, OSM_LOG_DEBUG, 2897219820Sjeff "Marking non-CN port GUID 0x%016" PRIx64 "\n", 2898219820Sjeff cl_ntoh64(osm_physp_get_port_guid(p_osm_port))); 2899219820Sjeff } 2900219820Sjeff 2901219820Sjeff __osm_ftree_hca_add_port(p_hca, /* local ftree_hca object */ 2902219820Sjeff i, /* local port number */ 2903219820Sjeff remote_port_num, /* remote port number */ 2904219820Sjeff osm_node_get_base_lid(p_node, i), /* local lid */ 2905219820Sjeff osm_node_get_base_lid(p_remote_node, 0), /* remote lid */ 2906219820Sjeff osm_physp_get_port_guid(p_osm_port), /* local port guid */ 2907219820Sjeff osm_physp_get_port_guid(p_remote_osm_port), /* remote port guid */ 2908219820Sjeff remote_node_guid, /* remote node guid */ 2909219820Sjeff remote_node_type, /* remote node type */ 2910219820Sjeff (void *)p_remote_sw, /* remote ftree_hca/sw object */ 2911219820Sjeff is_cn); /* whether this port is compute node */ 2912219820Sjeff } 2913219820Sjeff 2914219820SjeffExit: 2915219820Sjeff return res; 2916219820Sjeff} /* __osm_ftree_fabric_construct_hca_ports() */ 2917219820Sjeff 2918219820Sjeff/*************************************************** 2919219820Sjeff ***************************************************/ 2920276979Shselaskystatic boolean_t __osm_invalid_link_rank_diff(const uint32_t val) 2921276979Shselasky{ 2922276979Shselasky return (val != 1U && val != -1U); 2923276979Shselasky} 2924219820Sjeff 2925219820Sjeffstatic int __osm_ftree_fabric_construct_sw_ports(IN ftree_fabric_t * p_ftree, 2926219820Sjeff IN ftree_sw_t * p_sw) 2927219820Sjeff{ 2928219820Sjeff ftree_hca_t *p_remote_hca; 2929219820Sjeff ftree_sw_t *p_remote_sw; 2930219820Sjeff osm_node_t *p_node = p_sw->p_osm_sw->p_node; 2931219820Sjeff osm_node_t *p_remote_node; 2932219820Sjeff ib_net16_t remote_base_lid; 2933219820Sjeff uint8_t remote_node_type; 2934219820Sjeff ib_net64_t remote_node_guid; 2935219820Sjeff osm_physp_t *p_remote_osm_port; 2936219820Sjeff ftree_direction_t direction; 2937219820Sjeff void *p_remote_hca_or_sw; 2938219820Sjeff uint8_t i; 2939219820Sjeff uint8_t remote_port_num; 2940219820Sjeff int res = 0; 2941219820Sjeff 2942219820Sjeff CL_ASSERT(osm_node_get_type(p_node) == IB_NODE_TYPE_SWITCH); 2943219820Sjeff 2944219820Sjeff for (i = 1; i < osm_node_get_num_physp(p_node); i++) { 2945219820Sjeff osm_physp_t *p_osm_port = osm_node_get_physp_ptr(p_node, i); 2946219820Sjeff if (!p_osm_port || !osm_link_is_healthy(p_osm_port)) 2947219820Sjeff continue; 2948219820Sjeff 2949219820Sjeff p_remote_osm_port = osm_physp_get_remote(p_osm_port); 2950219820Sjeff if (!p_remote_osm_port) 2951219820Sjeff continue; 2952219820Sjeff 2953219820Sjeff p_remote_node = 2954219820Sjeff osm_node_get_remote_node(p_node, i, &remote_port_num); 2955219820Sjeff 2956219820Sjeff /* ignore any loopback connection on switch */ 2957219820Sjeff if (p_node == p_remote_node) { 2958219820Sjeff OSM_LOG(&p_ftree->p_osm->log, OSM_LOG_DEBUG, 2959219820Sjeff "Ignoring loopback on switch GUID 0x%016" PRIx64 2960219820Sjeff ", LID %u, rank %u\n", 2961219820Sjeff __osm_ftree_sw_get_guid_ho(p_sw), 2962219820Sjeff cl_ntoh16(p_sw->base_lid), 2963219820Sjeff p_sw->rank); 2964219820Sjeff continue; 2965219820Sjeff } 2966219820Sjeff 2967219820Sjeff remote_node_type = osm_node_get_type(p_remote_node); 2968219820Sjeff remote_node_guid = osm_node_get_node_guid(p_remote_node); 2969219820Sjeff 2970219820Sjeff switch (remote_node_type) { 2971219820Sjeff case IB_NODE_TYPE_ROUTER: 2972219820Sjeff /* leaving this port - proceeding to the next one */ 2973219820Sjeff continue; 2974219820Sjeff 2975219820Sjeff case IB_NODE_TYPE_CA: 2976219820Sjeff /* switch connected to hca */ 2977219820Sjeff 2978219820Sjeff p_remote_hca = 2979219820Sjeff __osm_ftree_fabric_get_hca_by_guid(p_ftree, 2980219820Sjeff remote_node_guid); 2981219820Sjeff CL_ASSERT(p_remote_hca); 2982219820Sjeff 2983219820Sjeff p_remote_hca_or_sw = (void *)p_remote_hca; 2984219820Sjeff direction = FTREE_DIRECTION_DOWN; 2985219820Sjeff 2986219820Sjeff remote_base_lid = 2987219820Sjeff osm_physp_get_base_lid(p_remote_osm_port); 2988219820Sjeff break; 2989219820Sjeff 2990219820Sjeff case IB_NODE_TYPE_SWITCH: 2991219820Sjeff /* switch connected to another switch */ 2992219820Sjeff 2993219820Sjeff p_remote_sw = 2994219820Sjeff __osm_ftree_fabric_get_sw_by_guid(p_ftree, 2995219820Sjeff remote_node_guid); 2996219820Sjeff CL_ASSERT(p_remote_sw); 2997219820Sjeff 2998219820Sjeff p_remote_hca_or_sw = (void *)p_remote_sw; 2999219820Sjeff 3000276979Shselasky if (__osm_invalid_link_rank_diff(p_sw->rank - p_remote_sw->rank)) { 3001219820Sjeff OSM_LOG(&p_ftree->p_osm->log, OSM_LOG_ERROR, 3002219820Sjeff "ERR AB16: " 3003219820Sjeff "Illegal link between switches with ranks %u and %u:\n" 3004219820Sjeff " GUID 0x%016" PRIx64 3005219820Sjeff ", LID %u, rank %u\n" 3006219820Sjeff " GUID 0x%016" PRIx64 3007219820Sjeff ", LID %u, rank %u\n", p_sw->rank, 3008219820Sjeff p_remote_sw->rank, 3009219820Sjeff __osm_ftree_sw_get_guid_ho(p_sw), 3010219820Sjeff cl_ntoh16(p_sw->base_lid), p_sw->rank, 3011219820Sjeff __osm_ftree_sw_get_guid_ho(p_remote_sw), 3012219820Sjeff cl_ntoh16(p_remote_sw->base_lid), 3013219820Sjeff p_remote_sw->rank); 3014219820Sjeff res = -1; 3015219820Sjeff goto Exit; 3016219820Sjeff } 3017219820Sjeff 3018219820Sjeff if (p_sw->rank > p_remote_sw->rank) 3019219820Sjeff direction = FTREE_DIRECTION_UP; 3020219820Sjeff else 3021219820Sjeff direction = FTREE_DIRECTION_DOWN; 3022219820Sjeff 3023219820Sjeff /* switch LID is only in port 0 port_info structure */ 3024219820Sjeff remote_base_lid = 3025219820Sjeff osm_node_get_base_lid(p_remote_node, 0); 3026219820Sjeff 3027219820Sjeff break; 3028219820Sjeff 3029219820Sjeff default: 3030219820Sjeff OSM_LOG(&p_ftree->p_osm->log, OSM_LOG_ERROR, 3031219820Sjeff "ERR AB13: Node GUID 0x%016" PRIx64 3032219820Sjeff " - Unknown node type: %s\n", 3033219820Sjeff cl_ntoh64(remote_node_guid), 3034219820Sjeff ib_get_node_type_str(remote_node_type)); 3035219820Sjeff res = -1; 3036219820Sjeff goto Exit; 3037219820Sjeff } 3038219820Sjeff __osm_ftree_sw_add_port(p_sw, /* local ftree_sw object */ 3039219820Sjeff i, /* local port number */ 3040219820Sjeff remote_port_num, /* remote port number */ 3041219820Sjeff p_sw->base_lid, /* local lid */ 3042219820Sjeff remote_base_lid, /* remote lid */ 3043219820Sjeff osm_physp_get_port_guid(p_osm_port), /* local port guid */ 3044219820Sjeff osm_physp_get_port_guid(p_remote_osm_port), /* remote port guid */ 3045219820Sjeff remote_node_guid, /* remote node guid */ 3046219820Sjeff remote_node_type, /* remote node type */ 3047219820Sjeff p_remote_hca_or_sw, /* remote ftree_hca/sw object */ 3048219820Sjeff direction); /* port direction (up or down) */ 3049219820Sjeff 3050219820Sjeff /* Track the max lid (in host order) that exists in the fabric */ 3051219820Sjeff if (cl_ntoh16(remote_base_lid) > p_ftree->lft_max_lid_ho) 3052219820Sjeff p_ftree->lft_max_lid_ho = cl_ntoh16(remote_base_lid); 3053219820Sjeff } 3054219820Sjeff 3055219820SjeffExit: 3056219820Sjeff return res; 3057219820Sjeff} /* __osm_ftree_fabric_construct_sw_ports() */ 3058219820Sjeff 3059219820Sjeff/*************************************************** 3060219820Sjeff ***************************************************/ 3061219820Sjeff 3062219820Sjeffstatic int __osm_ftree_fabric_rank_from_roots(IN ftree_fabric_t * p_ftree) 3063219820Sjeff{ 3064219820Sjeff osm_node_t *p_osm_node; 3065219820Sjeff osm_node_t *p_remote_osm_node; 3066219820Sjeff osm_physp_t *p_osm_physp; 3067219820Sjeff ftree_sw_t *p_sw; 3068219820Sjeff ftree_sw_t *p_remote_sw; 3069219820Sjeff cl_list_t ranking_bfs_list; 3070219820Sjeff struct guid_list_item *item; 3071219820Sjeff int res = 0; 3072219820Sjeff unsigned num_roots; 3073219820Sjeff unsigned max_rank = 0; 3074219820Sjeff unsigned i; 3075219820Sjeff 3076219820Sjeff OSM_LOG_ENTER(&p_ftree->p_osm->log); 3077219820Sjeff cl_list_init(&ranking_bfs_list, 10); 3078219820Sjeff 3079219820Sjeff /* Rank all the roots and add them to list */ 3080219820Sjeff for (item = (void *)cl_qlist_head(&p_ftree->root_guid_list); 3081219820Sjeff item != (void *)cl_qlist_end(&p_ftree->root_guid_list); 3082219820Sjeff item = (void *)cl_qlist_next(&item->list)) { 3083219820Sjeff p_sw = 3084219820Sjeff __osm_ftree_fabric_get_sw_by_guid(p_ftree, 3085219820Sjeff cl_hton64(item->guid)); 3086219820Sjeff if (!p_sw) { 3087219820Sjeff /* the specified root guid wasn't found in the fabric */ 3088219820Sjeff OSM_LOG(&p_ftree->p_osm->log, OSM_LOG_ERROR, "ERR AB24: " 3089219820Sjeff "Root switch GUID 0x%" PRIx64 " not found\n", 3090219820Sjeff item->guid); 3091219820Sjeff continue; 3092219820Sjeff } 3093219820Sjeff 3094219820Sjeff OSM_LOG(&p_ftree->p_osm->log, OSM_LOG_DEBUG, 3095219820Sjeff "Ranking root switch with GUID 0x%" PRIx64 "\n", 3096219820Sjeff item->guid); 3097219820Sjeff p_sw->rank = 0; 3098219820Sjeff cl_list_insert_tail(&ranking_bfs_list, p_sw); 3099219820Sjeff } 3100219820Sjeff 3101219820Sjeff num_roots = cl_list_count(&ranking_bfs_list); 3102219820Sjeff if (!num_roots) { 3103219820Sjeff OSM_LOG(&p_ftree->p_osm->log, OSM_LOG_ERROR, "ERR AB25: " 3104219820Sjeff "No valid roots supplied\n"); 3105219820Sjeff res = -1; 3106219820Sjeff goto Exit; 3107219820Sjeff } 3108219820Sjeff 3109219820Sjeff OSM_LOG(&p_ftree->p_osm->log, OSM_LOG_VERBOSE, 3110219820Sjeff "Ranked %u valid root switches\n", num_roots); 3111219820Sjeff 3112219820Sjeff /* Now the list has all the roots. 3113219820Sjeff BFS the subnet and update rank on all the switches. */ 3114219820Sjeff 3115219820Sjeff while (!cl_is_list_empty(&ranking_bfs_list)) { 3116219820Sjeff p_sw = (ftree_sw_t *) cl_list_remove_head(&ranking_bfs_list); 3117219820Sjeff p_osm_node = p_sw->p_osm_sw->p_node; 3118219820Sjeff 3119219820Sjeff /* note: skipping port 0 on switches */ 3120219820Sjeff for (i = 1; i < osm_node_get_num_physp(p_osm_node); i++) { 3121219820Sjeff p_osm_physp = osm_node_get_physp_ptr(p_osm_node, i); 3122219820Sjeff if (!p_osm_physp || !osm_link_is_healthy(p_osm_physp)) 3123219820Sjeff continue; 3124219820Sjeff 3125219820Sjeff p_remote_osm_node = 3126219820Sjeff osm_node_get_remote_node(p_osm_node, i, NULL); 3127219820Sjeff if (!p_remote_osm_node) 3128219820Sjeff continue; 3129219820Sjeff 3130219820Sjeff if (osm_node_get_type(p_remote_osm_node) != 3131219820Sjeff IB_NODE_TYPE_SWITCH) 3132219820Sjeff continue; 3133219820Sjeff 3134219820Sjeff p_remote_sw = __osm_ftree_fabric_get_sw_by_guid(p_ftree, 3135219820Sjeff osm_node_get_node_guid 3136219820Sjeff (p_remote_osm_node)); 3137219820Sjeff CL_ASSERT(p_remote_sw); 3138219820Sjeff 3139219820Sjeff /* if needed, rank the remote switch and add it to the BFS list */ 3140219820Sjeff if (__osm_ftree_sw_update_rank 3141219820Sjeff (p_remote_sw, p_sw->rank + 1)) { 3142219820Sjeff OSM_LOG(&p_ftree->p_osm->log, OSM_LOG_DEBUG, 3143219820Sjeff "Ranking switch 0x%" PRIx64 3144219820Sjeff " with rank %u\n", 3145219820Sjeff __osm_ftree_sw_get_guid_ho(p_remote_sw), 3146219820Sjeff p_remote_sw->rank); 3147219820Sjeff max_rank = p_remote_sw->rank; 3148219820Sjeff cl_list_insert_tail(&ranking_bfs_list, 3149219820Sjeff p_remote_sw); 3150219820Sjeff } 3151219820Sjeff } 3152219820Sjeff /* done with ports of this switch - go to the next switch in the list */ 3153219820Sjeff } 3154219820Sjeff 3155219820Sjeff OSM_LOG(&p_ftree->p_osm->log, OSM_LOG_VERBOSE, 3156219820Sjeff "Subnet ranking completed. Max Node Rank = %u\n", max_rank); 3157219820Sjeff 3158219820Sjeff /* set FatTree maximal switch rank */ 3159219820Sjeff p_ftree->max_switch_rank = max_rank; 3160219820Sjeff 3161219820SjeffExit: 3162219820Sjeff cl_list_destroy(&ranking_bfs_list); 3163219820Sjeff OSM_LOG_EXIT(&p_ftree->p_osm->log); 3164219820Sjeff return res; 3165219820Sjeff} /* __osm_ftree_fabric_rank_from_roots() */ 3166219820Sjeff 3167219820Sjeff/*************************************************** 3168219820Sjeff ***************************************************/ 3169219820Sjeff 3170219820Sjeffstatic int __osm_ftree_fabric_rank_from_hcas(IN ftree_fabric_t * p_ftree) 3171219820Sjeff{ 3172219820Sjeff ftree_hca_t *p_hca; 3173219820Sjeff ftree_hca_t *p_next_hca; 3174219820Sjeff cl_list_t ranking_bfs_list; 3175219820Sjeff int res = 0; 3176219820Sjeff 3177219820Sjeff OSM_LOG_ENTER(&p_ftree->p_osm->log); 3178219820Sjeff 3179219820Sjeff cl_list_init(&ranking_bfs_list, 10); 3180219820Sjeff 3181219820Sjeff /* Mark REVERSED rank of all the switches in the subnet. 3182219820Sjeff Start from switches that are connected to hca's, and 3183219820Sjeff scan all the switches in the subnet. */ 3184219820Sjeff p_next_hca = (ftree_hca_t *) cl_qmap_head(&p_ftree->hca_tbl); 3185219820Sjeff while (p_next_hca != (ftree_hca_t *) cl_qmap_end(&p_ftree->hca_tbl)) { 3186219820Sjeff p_hca = p_next_hca; 3187219820Sjeff p_next_hca = (ftree_hca_t *) cl_qmap_next(&p_hca->map_item); 3188219820Sjeff if (__osm_ftree_rank_leaf_switches 3189219820Sjeff (p_ftree, p_hca, &ranking_bfs_list) != 0) { 3190219820Sjeff res = -1; 3191219820Sjeff OSM_LOG(&p_ftree->p_osm->log, OSM_LOG_ERROR, "ERR AB14: " 3192219820Sjeff "Subnet ranking failed - subnet is not FatTree"); 3193219820Sjeff goto Exit; 3194219820Sjeff } 3195219820Sjeff } 3196219820Sjeff 3197219820Sjeff /* Now rank rest of the switches in the fabric, while the 3198219820Sjeff list already contains all the ranked leaf switches */ 3199219820Sjeff __osm_ftree_rank_switches_from_leafs(p_ftree, &ranking_bfs_list); 3200219820Sjeff 3201219820Sjeff /* fix ranking of the switches by reversing the ranking direction */ 3202219820Sjeff cl_qmap_apply_func(&p_ftree->sw_tbl, __osm_ftree_sw_reverse_rank, 3203219820Sjeff (void *)p_ftree); 3204219820Sjeff 3205219820SjeffExit: 3206219820Sjeff cl_list_destroy(&ranking_bfs_list); 3207219820Sjeff OSM_LOG_EXIT(&p_ftree->p_osm->log); 3208219820Sjeff return res; 3209219820Sjeff} /* __osm_ftree_fabric_rank_from_hcas() */ 3210219820Sjeff 3211219820Sjeff/*************************************************** 3212219820Sjeff ***************************************************/ 3213219820Sjeff 3214219820Sjeffstatic int __osm_ftree_fabric_rank(IN ftree_fabric_t * p_ftree) 3215219820Sjeff{ 3216219820Sjeff int res = 0; 3217219820Sjeff 3218219820Sjeff OSM_LOG_ENTER(&p_ftree->p_osm->log); 3219219820Sjeff 3220219820Sjeff if (__osm_ftree_fabric_roots_provided(p_ftree)) 3221219820Sjeff res = __osm_ftree_fabric_rank_from_roots(p_ftree); 3222219820Sjeff else 3223219820Sjeff res = __osm_ftree_fabric_rank_from_hcas(p_ftree); 3224219820Sjeff 3225219820Sjeff if (res) 3226219820Sjeff goto Exit; 3227219820Sjeff 3228219820Sjeff OSM_LOG(&p_ftree->p_osm->log, OSM_LOG_INFO, 3229219820Sjeff "FatTree max switch rank is %u\n", p_ftree->max_switch_rank); 3230219820Sjeff 3231219820SjeffExit: 3232219820Sjeff OSM_LOG_EXIT(&p_ftree->p_osm->log); 3233219820Sjeff return res; 3234219820Sjeff} /* __osm_ftree_fabric_rank() */ 3235219820Sjeff 3236219820Sjeff/*************************************************** 3237219820Sjeff ***************************************************/ 3238219820Sjeff 3239219820Sjeffstatic void __osm_ftree_fabric_set_leaf_rank(IN ftree_fabric_t * p_ftree) 3240219820Sjeff{ 3241219820Sjeff unsigned i; 3242219820Sjeff ftree_sw_t *p_sw; 3243219820Sjeff ftree_hca_t *p_hca = NULL; 3244219820Sjeff ftree_hca_t *p_next_hca; 3245219820Sjeff 3246219820Sjeff OSM_LOG_ENTER(&p_ftree->p_osm->log); 3247219820Sjeff 3248219820Sjeff if (!__osm_ftree_fabric_roots_provided(p_ftree)) { 3249219820Sjeff /* If root file is not provided, the fabric has to be pure fat-tree 3250219820Sjeff in terms of ranking. Thus, leaf switches rank is the max rank. */ 3251219820Sjeff p_ftree->leaf_switch_rank = p_ftree->max_switch_rank; 3252219820Sjeff } else { 3253219820Sjeff /* Find the first CN and set the leaf_switch_rank to the rank 3254219820Sjeff of the switch that is connected to this CN. Later we will 3255219820Sjeff ensure that all the leaf switches have the same rank. */ 3256219820Sjeff p_next_hca = (ftree_hca_t *) cl_qmap_head(&p_ftree->hca_tbl); 3257219820Sjeff while (p_next_hca != 3258219820Sjeff (ftree_hca_t *) cl_qmap_end(&p_ftree->hca_tbl)) { 3259219820Sjeff p_hca = p_next_hca; 3260219820Sjeff if (p_hca->cn_num) 3261219820Sjeff break; 3262219820Sjeff p_next_hca = 3263219820Sjeff (ftree_hca_t *) cl_qmap_next(&p_hca->map_item); 3264219820Sjeff } 3265219820Sjeff /* we know that there are CNs in the fabric, so just to be sure... */ 3266219820Sjeff CL_ASSERT(p_next_hca != 3267219820Sjeff (ftree_hca_t *) cl_qmap_end(&p_ftree->hca_tbl)); 3268219820Sjeff 3269219820Sjeff OSM_LOG(&p_ftree->p_osm->log, OSM_LOG_DEBUG, 3270219820Sjeff "Selected CN port GUID 0x%" PRIx64 "\n", 3271219820Sjeff __osm_ftree_hca_get_guid_ho(p_hca)); 3272219820Sjeff 3273219820Sjeff for (i = 0; (i < p_hca->up_port_groups_num) 3274219820Sjeff && (!p_hca->up_port_groups[i]->is_cn); i++) ; 3275219820Sjeff CL_ASSERT(i < p_hca->up_port_groups_num); 3276219820Sjeff CL_ASSERT(p_hca->up_port_groups[i]->remote_node_type == 3277219820Sjeff IB_NODE_TYPE_SWITCH); 3278219820Sjeff 3279219820Sjeff p_sw = p_hca->up_port_groups[i]->remote_hca_or_sw.p_sw; 3280219820Sjeff OSM_LOG(&p_ftree->p_osm->log, OSM_LOG_DEBUG, 3281219820Sjeff "Selected leaf switch GUID 0x%" PRIx64 ", rank %u\n", 3282219820Sjeff __osm_ftree_sw_get_guid_ho(p_sw), p_sw->rank); 3283219820Sjeff p_ftree->leaf_switch_rank = p_sw->rank; 3284219820Sjeff } 3285219820Sjeff 3286219820Sjeff OSM_LOG(&p_ftree->p_osm->log, OSM_LOG_INFO, 3287219820Sjeff "FatTree leaf switch rank is %u\n", p_ftree->leaf_switch_rank); 3288219820Sjeff OSM_LOG_EXIT(&p_ftree->p_osm->log); 3289219820Sjeff} /* __osm_ftree_fabric_set_leaf_rank() */ 3290219820Sjeff 3291219820Sjeff/*************************************************** 3292219820Sjeff ***************************************************/ 3293219820Sjeff 3294219820Sjeffstatic int __osm_ftree_fabric_populate_ports(IN ftree_fabric_t * p_ftree) 3295219820Sjeff{ 3296219820Sjeff ftree_hca_t *p_hca; 3297219820Sjeff ftree_hca_t *p_next_hca; 3298219820Sjeff ftree_sw_t *p_sw; 3299219820Sjeff ftree_sw_t *p_next_sw; 3300219820Sjeff int res = 0; 3301219820Sjeff 3302219820Sjeff OSM_LOG_ENTER(&p_ftree->p_osm->log); 3303219820Sjeff 3304219820Sjeff p_next_hca = (ftree_hca_t *) cl_qmap_head(&p_ftree->hca_tbl); 3305219820Sjeff while (p_next_hca != (ftree_hca_t *) cl_qmap_end(&p_ftree->hca_tbl)) { 3306219820Sjeff p_hca = p_next_hca; 3307219820Sjeff p_next_hca = (ftree_hca_t *) cl_qmap_next(&p_hca->map_item); 3308219820Sjeff if (__osm_ftree_fabric_construct_hca_ports(p_ftree, p_hca) != 0) { 3309219820Sjeff res = -1; 3310219820Sjeff goto Exit; 3311219820Sjeff } 3312219820Sjeff } 3313219820Sjeff 3314219820Sjeff p_next_sw = (ftree_sw_t *) cl_qmap_head(&p_ftree->sw_tbl); 3315219820Sjeff while (p_next_sw != (ftree_sw_t *) cl_qmap_end(&p_ftree->sw_tbl)) { 3316219820Sjeff p_sw = p_next_sw; 3317219820Sjeff p_next_sw = (ftree_sw_t *) cl_qmap_next(&p_sw->map_item); 3318219820Sjeff if (__osm_ftree_fabric_construct_sw_ports(p_ftree, p_sw) != 0) { 3319219820Sjeff res = -1; 3320219820Sjeff goto Exit; 3321219820Sjeff } 3322219820Sjeff } 3323219820SjeffExit: 3324219820Sjeff OSM_LOG_EXIT(&p_ftree->p_osm->log); 3325219820Sjeff return res; 3326219820Sjeff} /* __osm_ftree_fabric_populate_ports() */ 3327219820Sjeff 3328219820Sjeff/*************************************************** 3329219820Sjeff ***************************************************/ 3330219820Sjeffstatic int add_guid_item_to_list(void *cxt, uint64_t guid, char *p) 3331219820Sjeff{ 3332219820Sjeff cl_qlist_t *list = cxt; 3333219820Sjeff struct guid_list_item *item; 3334219820Sjeff 3335219820Sjeff item = malloc(sizeof(*item)); 3336219820Sjeff if (!item) 3337219820Sjeff return -1; 3338219820Sjeff 3339219820Sjeff item->guid = guid; 3340219820Sjeff cl_qlist_insert_tail(list, &item->list); 3341219820Sjeff 3342219820Sjeff return 0; 3343219820Sjeff} 3344219820Sjeff 3345219820Sjeffstatic int add_guid_item_to_map(void *cxt, uint64_t guid, char *p) 3346219820Sjeff{ 3347219820Sjeff cl_qmap_t *map = cxt; 3348219820Sjeff name_map_item_t *item; 3349219820Sjeff 3350219820Sjeff item = malloc(sizeof(*item)); 3351219820Sjeff if (!item) 3352219820Sjeff return -1; 3353219820Sjeff 3354219820Sjeff item->guid = guid; 3355219820Sjeff cl_qmap_insert(map, guid, &item->item); 3356219820Sjeff 3357219820Sjeff return 0; 3358219820Sjeff} 3359219820Sjeff 3360219820Sjeffstatic int __osm_ftree_fabric_read_guid_files(IN ftree_fabric_t * p_ftree) 3361219820Sjeff{ 3362219820Sjeff int status = 0; 3363219820Sjeff 3364219820Sjeff OSM_LOG_ENTER(&p_ftree->p_osm->log); 3365219820Sjeff 3366219820Sjeff if (__osm_ftree_fabric_roots_provided(p_ftree)) { 3367219820Sjeff OSM_LOG(&p_ftree->p_osm->log, OSM_LOG_DEBUG, 3368219820Sjeff "Fetching root nodes from file %s\n", 3369219820Sjeff p_ftree->p_osm->subn.opt.root_guid_file); 3370219820Sjeff 3371219820Sjeff if (parse_node_map(p_ftree->p_osm->subn.opt.root_guid_file, 3372219820Sjeff add_guid_item_to_list, 3373219820Sjeff &p_ftree->root_guid_list)) { 3374219820Sjeff status = -1; 3375219820Sjeff goto Exit; 3376219820Sjeff } 3377219820Sjeff 3378219820Sjeff if (!cl_qlist_count(&p_ftree->root_guid_list)) { 3379219820Sjeff OSM_LOG(&p_ftree->p_osm->log, OSM_LOG_ERROR, "ERR AB22: " 3380219820Sjeff "Root guids file has no valid guids\n"); 3381219820Sjeff status = -1; 3382219820Sjeff goto Exit; 3383219820Sjeff } 3384219820Sjeff } 3385219820Sjeff 3386219820Sjeff if (__osm_ftree_fabric_cns_provided(p_ftree)) { 3387219820Sjeff OSM_LOG(&p_ftree->p_osm->log, OSM_LOG_DEBUG, 3388219820Sjeff "Fetching compute nodes from file %s\n", 3389219820Sjeff p_ftree->p_osm->subn.opt.cn_guid_file); 3390219820Sjeff 3391219820Sjeff if (parse_node_map(p_ftree->p_osm->subn.opt.cn_guid_file, 3392219820Sjeff add_guid_item_to_map, 3393219820Sjeff &p_ftree->cn_guid_tbl)) { 3394219820Sjeff status = -1; 3395219820Sjeff goto Exit; 3396219820Sjeff } 3397219820Sjeff 3398219820Sjeff if (!cl_qmap_count(&p_ftree->cn_guid_tbl)) { 3399219820Sjeff OSM_LOG(&p_ftree->p_osm->log, OSM_LOG_ERROR, "ERR AB23: " 3400219820Sjeff "Compute node guids file has no valid guids\n"); 3401219820Sjeff status = -1; 3402219820Sjeff goto Exit; 3403219820Sjeff } 3404219820Sjeff } 3405219820Sjeff 3406219820SjeffExit: 3407219820Sjeff OSM_LOG_EXIT(&p_ftree->p_osm->log); 3408219820Sjeff return status; 3409219820Sjeff} /*__osm_ftree_fabric_read_guid_files() */ 3410219820Sjeff 3411219820Sjeff/*************************************************** 3412219820Sjeff ***************************************************/ 3413219820Sjeff 3414219820Sjeffstatic int __osm_ftree_construct_fabric(IN void *context) 3415219820Sjeff{ 3416219820Sjeff ftree_fabric_t *p_ftree = context; 3417219820Sjeff int status = 0; 3418219820Sjeff 3419219820Sjeff OSM_LOG_ENTER(&p_ftree->p_osm->log); 3420219820Sjeff 3421219820Sjeff __osm_ftree_fabric_clear(p_ftree); 3422219820Sjeff 3423219820Sjeff if (p_ftree->p_osm->subn.opt.lmc > 0) { 3424219820Sjeff osm_log(&p_ftree->p_osm->log, OSM_LOG_SYS, 3425219820Sjeff "LMC > 0 is not supported by fat-tree routing.\n" 3426219820Sjeff "Falling back to default routing\n"); 3427219820Sjeff status = -1; 3428219820Sjeff goto Exit; 3429219820Sjeff } 3430219820Sjeff 3431219820Sjeff if (cl_qmap_count(&p_ftree->p_osm->subn.sw_guid_tbl) < 2) { 3432219820Sjeff osm_log(&p_ftree->p_osm->log, OSM_LOG_SYS, 3433219820Sjeff "Fabric has %u switches - topology is not fat-tree.\n" 3434219820Sjeff "Falling back to default routing\n", 3435219820Sjeff cl_qmap_count(&p_ftree->p_osm->subn.sw_guid_tbl)); 3436219820Sjeff status = -1; 3437219820Sjeff goto Exit; 3438219820Sjeff } 3439219820Sjeff 3440219820Sjeff if ((cl_qmap_count(&p_ftree->p_osm->subn.node_guid_tbl) - 3441219820Sjeff cl_qmap_count(&p_ftree->p_osm->subn.sw_guid_tbl)) < 2) { 3442219820Sjeff osm_log(&p_ftree->p_osm->log, OSM_LOG_SYS, 3443219820Sjeff "Fabric has %u nodes (%u switches) - topology is not fat-tree.\n" 3444219820Sjeff "Falling back to default routing\n", 3445219820Sjeff cl_qmap_count(&p_ftree->p_osm->subn.node_guid_tbl), 3446219820Sjeff cl_qmap_count(&p_ftree->p_osm->subn.sw_guid_tbl)); 3447219820Sjeff status = -1; 3448219820Sjeff goto Exit; 3449219820Sjeff } 3450219820Sjeff 3451219820Sjeff OSM_LOG(&p_ftree->p_osm->log, OSM_LOG_VERBOSE, "\n" 3452219820Sjeff " |----------------------------------------|\n" 3453219820Sjeff " |- Starting FatTree fabric construction -|\n" 3454219820Sjeff " |----------------------------------------|\n\n"); 3455219820Sjeff 3456219820Sjeff OSM_LOG(&p_ftree->p_osm->log, OSM_LOG_VERBOSE, 3457219820Sjeff "Populating FatTree Switch and CA tables\n"); 3458219820Sjeff if (__osm_ftree_fabric_populate_nodes(p_ftree) != 0) { 3459219820Sjeff osm_log(&p_ftree->p_osm->log, OSM_LOG_SYS, 3460219820Sjeff "Fabric topology is not fat-tree - " 3461219820Sjeff "falling back to default routing\n"); 3462219820Sjeff status = -1; 3463219820Sjeff goto Exit; 3464219820Sjeff } 3465219820Sjeff 3466219820Sjeff OSM_LOG(&p_ftree->p_osm->log, OSM_LOG_VERBOSE, 3467219820Sjeff "Reading guid files provided by user\n"); 3468219820Sjeff if (__osm_ftree_fabric_read_guid_files(p_ftree) != 0) { 3469219820Sjeff osm_log(&p_ftree->p_osm->log, OSM_LOG_SYS, 3470219820Sjeff "Failed reading guid files - " 3471219820Sjeff "falling back to default routing\n"); 3472219820Sjeff status = -1; 3473219820Sjeff goto Exit; 3474219820Sjeff } 3475219820Sjeff 3476219820Sjeff if (cl_qmap_count(&p_ftree->hca_tbl) < 2) { 3477219820Sjeff osm_log(&p_ftree->p_osm->log, OSM_LOG_SYS, 3478219820Sjeff "Fabric has %u CAa - topology is not fat-tree.\n" 3479219820Sjeff "Falling back to default routing\n", 3480219820Sjeff cl_qmap_count(&p_ftree->hca_tbl)); 3481219820Sjeff status = -1; 3482219820Sjeff goto Exit; 3483219820Sjeff } 3484219820Sjeff 3485219820Sjeff /* Rank all the switches in the fabric. 3486219820Sjeff After that we will know only fabric max switch rank. 3487219820Sjeff We will be able to check leaf switches rank and the 3488219820Sjeff whole tree rank after filling ports and marking CNs. */ 3489219820Sjeff OSM_LOG(&p_ftree->p_osm->log, OSM_LOG_VERBOSE, "Ranking FatTree\n"); 3490219820Sjeff if (__osm_ftree_fabric_rank(p_ftree) != 0) { 3491219820Sjeff osm_log(&p_ftree->p_osm->log, OSM_LOG_SYS, 3492219820Sjeff "Failed ranking the tree\n"); 3493219820Sjeff status = -1; 3494219820Sjeff goto Exit; 3495219820Sjeff } 3496219820Sjeff 3497219820Sjeff /* For each hca and switch, construct array of ports. 3498219820Sjeff This is done after the whole FatTree data structure is ready, 3499219820Sjeff because we want the ports to have pointers to ftree_{sw,hca}_t 3500219820Sjeff objects, and we need the switches to be already ranked because 3501219820Sjeff that's how the port direction is determined. */ 3502219820Sjeff OSM_LOG(&p_ftree->p_osm->log, OSM_LOG_VERBOSE, 3503219820Sjeff "Populating CA & switch ports\n"); 3504219820Sjeff if (__osm_ftree_fabric_populate_ports(p_ftree) != 0) { 3505219820Sjeff osm_log(&p_ftree->p_osm->log, OSM_LOG_SYS, 3506219820Sjeff "Fabric topology is not a fat-tree\n"); 3507219820Sjeff status = -1; 3508219820Sjeff goto Exit; 3509219820Sjeff } else if (p_ftree->cn_num == 0) { 3510219820Sjeff osm_log(&p_ftree->p_osm->log, OSM_LOG_SYS, 3511219820Sjeff "Fabric has no valid compute nodes\n"); 3512219820Sjeff status = -1; 3513219820Sjeff goto Exit; 3514219820Sjeff } 3515219820Sjeff 3516219820Sjeff /* Now that the CA ports have been created and CNs were marked, 3517219820Sjeff we can complete the fabric ranking - set leaf switches rank. */ 3518219820Sjeff __osm_ftree_fabric_set_leaf_rank(p_ftree); 3519219820Sjeff 3520219820Sjeff if (__osm_ftree_fabric_get_rank(p_ftree) > FAT_TREE_MAX_RANK || 3521219820Sjeff __osm_ftree_fabric_get_rank(p_ftree) < FAT_TREE_MIN_RANK) { 3522219820Sjeff osm_log(&p_ftree->p_osm->log, OSM_LOG_SYS, 3523219820Sjeff "Fabric rank is %u (should be between %u and %u)\n", 3524219820Sjeff __osm_ftree_fabric_get_rank(p_ftree), FAT_TREE_MIN_RANK, 3525219820Sjeff FAT_TREE_MAX_RANK); 3526219820Sjeff status = -1; 3527219820Sjeff goto Exit; 3528219820Sjeff } 3529219820Sjeff 3530219820Sjeff /* Mark all the switches in the fabric with rank equal to 3531219820Sjeff p_ftree->leaf_switch_rank and that are also connected to CNs. 3532219820Sjeff As a by-product, this function also runs basic topology 3533219820Sjeff validation - it checks that all the CNs are at the same rank. */ 3534219820Sjeff if (__osm_ftree_fabric_mark_leaf_switches(p_ftree)) { 3535219820Sjeff osm_log(&p_ftree->p_osm->log, OSM_LOG_SYS, 3536219820Sjeff "Fabric topology is not a fat-tree\n"); 3537219820Sjeff status = -1; 3538219820Sjeff goto Exit; 3539219820Sjeff } 3540219820Sjeff 3541219820Sjeff /* Assign index to all the switches in the fabric. 3542219820Sjeff This function also sorts leaf switch array by the switch index, 3543219820Sjeff sorts all the port arrays of the indexed switches by remote 3544219820Sjeff switch index, and creates switch-by-tuple table (sw_by_tuple_tbl) */ 3545219820Sjeff __osm_ftree_fabric_make_indexing(p_ftree); 3546219820Sjeff 3547219820Sjeff /* Create leaf switch array sorted by index. 3548219820Sjeff This array contains switches with rank equal to p_ftree->leaf_switch_rank 3549219820Sjeff and that are also connected to CNs (REAL leafs), and it may contain 3550219820Sjeff switches at the same leaf rank w/o CNs, if this is the order of indexing. 3551219820Sjeff In any case, the first and the last switches in the array are REAL leafs. */ 3552219820Sjeff if (__osm_ftree_fabric_create_leaf_switch_array(p_ftree)) { 3553219820Sjeff osm_log(&p_ftree->p_osm->log, OSM_LOG_SYS, 3554219820Sjeff "Fabric topology is not a fat-tree\n"); 3555219820Sjeff status = -1; 3556219820Sjeff goto Exit; 3557219820Sjeff } 3558219820Sjeff 3559219820Sjeff /* calculate and set ftree.max_cn_per_leaf field */ 3560219820Sjeff __osm_ftree_fabric_set_max_cn_per_leaf(p_ftree); 3561219820Sjeff 3562219820Sjeff /* print general info about fabric topology */ 3563219820Sjeff __osm_ftree_fabric_dump_general_info(p_ftree); 3564219820Sjeff 3565219820Sjeff /* dump full tree topology */ 3566219820Sjeff if (osm_log_is_active(&p_ftree->p_osm->log, OSM_LOG_DEBUG)) 3567219820Sjeff __osm_ftree_fabric_dump(p_ftree); 3568219820Sjeff 3569219820Sjeff /* the fabric is required to be PURE fat-tree only if the root 3570219820Sjeff guid file hasn't been provided by user */ 3571219820Sjeff if (!__osm_ftree_fabric_roots_provided(p_ftree) && 3572219820Sjeff !__osm_ftree_fabric_validate_topology(p_ftree)) { 3573219820Sjeff osm_log(&p_ftree->p_osm->log, OSM_LOG_SYS, 3574219820Sjeff "Fabric topology is not a fat-tree\n"); 3575219820Sjeff status = -1; 3576219820Sjeff goto Exit; 3577219820Sjeff } 3578219820Sjeff 3579219820Sjeff OSM_LOG(&p_ftree->p_osm->log, OSM_LOG_VERBOSE, 3580219820Sjeff "Max LID in switch LFTs: %u\n", 3581219820Sjeff p_ftree->lft_max_lid_ho); 3582219820Sjeff 3583219820SjeffExit: 3584219820Sjeff if (status != 0) { 3585219820Sjeff OSM_LOG(&p_ftree->p_osm->log, OSM_LOG_VERBOSE, 3586219820Sjeff "Clearing FatTree Fabric data structures\n"); 3587219820Sjeff __osm_ftree_fabric_clear(p_ftree); 3588219820Sjeff } else 3589219820Sjeff p_ftree->fabric_built = TRUE; 3590219820Sjeff 3591219820Sjeff OSM_LOG(&p_ftree->p_osm->log, OSM_LOG_VERBOSE, "\n" 3592219820Sjeff " |--------------------------------------------------|\n" 3593219820Sjeff " |- Done constructing FatTree fabric (status = %d) -|\n" 3594219820Sjeff " |--------------------------------------------------|\n\n", 3595219820Sjeff status); 3596219820Sjeff 3597219820Sjeff OSM_LOG_EXIT(&p_ftree->p_osm->log); 3598219820Sjeff return status; 3599219820Sjeff} /* __osm_ftree_construct_fabric() */ 3600219820Sjeff 3601219820Sjeff/*************************************************** 3602219820Sjeff ***************************************************/ 3603219820Sjeff 3604219820Sjeffstatic int __osm_ftree_do_routing(IN void *context) 3605219820Sjeff{ 3606219820Sjeff ftree_fabric_t *p_ftree = context; 3607219820Sjeff int status = 0; 3608219820Sjeff 3609219820Sjeff OSM_LOG_ENTER(&p_ftree->p_osm->log); 3610219820Sjeff 3611219820Sjeff if (!p_ftree->fabric_built) { 3612219820Sjeff status = -1; 3613219820Sjeff goto Exit; 3614219820Sjeff } 3615219820Sjeff 3616219820Sjeff OSM_LOG(&p_ftree->p_osm->log, OSM_LOG_VERBOSE, 3617219820Sjeff "Starting FatTree routing\n"); 3618219820Sjeff 3619219820Sjeff OSM_LOG(&p_ftree->p_osm->log, OSM_LOG_VERBOSE, 3620219820Sjeff "Filling switch forwarding tables for Compute Nodes\n"); 3621219820Sjeff __osm_ftree_fabric_route_to_cns(p_ftree); 3622219820Sjeff 3623219820Sjeff OSM_LOG(&p_ftree->p_osm->log, OSM_LOG_VERBOSE, 3624219820Sjeff "Filling switch forwarding tables for non-CN targets\n"); 3625219820Sjeff __osm_ftree_fabric_route_to_non_cns(p_ftree); 3626219820Sjeff 3627219820Sjeff OSM_LOG(&p_ftree->p_osm->log, OSM_LOG_VERBOSE, 3628219820Sjeff "Filling switch forwarding tables for switch-to-switch paths\n"); 3629219820Sjeff __osm_ftree_fabric_route_to_switches(p_ftree); 3630219820Sjeff 3631219820Sjeff /* for each switch, set its fwd table */ 3632219820Sjeff cl_qmap_apply_func(&p_ftree->sw_tbl, __osm_ftree_set_sw_fwd_table, 3633219820Sjeff (void *)p_ftree); 3634219820Sjeff 3635219820Sjeff /* write out hca ordering file */ 3636219820Sjeff __osm_ftree_fabric_dump_hca_ordering(p_ftree); 3637219820Sjeff 3638219820Sjeff OSM_LOG(&p_ftree->p_osm->log, OSM_LOG_VERBOSE, 3639219820Sjeff "FatTree routing is done\n"); 3640219820Sjeff 3641219820SjeffExit: 3642219820Sjeff OSM_LOG_EXIT(&p_ftree->p_osm->log); 3643219820Sjeff return status; 3644219820Sjeff} 3645219820Sjeff 3646219820Sjeff/*************************************************** 3647219820Sjeff ***************************************************/ 3648219820Sjeff 3649219820Sjeffstatic void __osm_ftree_delete(IN void *context) 3650219820Sjeff{ 3651219820Sjeff if (!context) 3652219820Sjeff return; 3653219820Sjeff __osm_ftree_fabric_destroy((ftree_fabric_t *) context); 3654219820Sjeff} 3655219820Sjeff 3656219820Sjeff/*************************************************** 3657219820Sjeff ***************************************************/ 3658219820Sjeff 3659219820Sjeffint osm_ucast_ftree_setup(struct osm_routing_engine *r, osm_opensm_t * p_osm) 3660219820Sjeff{ 3661219820Sjeff ftree_fabric_t *p_ftree = __osm_ftree_fabric_create(); 3662219820Sjeff if (!p_ftree) 3663219820Sjeff return -1; 3664219820Sjeff 3665219820Sjeff p_ftree->p_osm = p_osm; 3666219820Sjeff 3667219820Sjeff r->context = (void *)p_ftree; 3668219820Sjeff r->build_lid_matrices = __osm_ftree_construct_fabric; 3669219820Sjeff r->ucast_build_fwd_tables = __osm_ftree_do_routing; 3670219820Sjeff r->delete = __osm_ftree_delete; 3671219820Sjeff 3672219820Sjeff return 0; 3673219820Sjeff} 3674