1219820Sjeff/* 2219820Sjeff * Copyright (c) 2004-2008 Voltaire, Inc. All rights reserved. 3219820Sjeff * Copyright (c) 2002-2008 Mellanox Technologies LTD. All rights reserved. 4219820Sjeff * Copyright (c) 1996-2003 Intel Corporation. All rights reserved. 5219820Sjeff * 6219820Sjeff * This software is available to you under a choice of one of two 7219820Sjeff * licenses. You may choose to be licensed under the terms of the GNU 8219820Sjeff * General Public License (GPL) Version 2, available from the file 9219820Sjeff * COPYING in the main directory of this source tree, or the 10219820Sjeff * OpenIB.org BSD license below: 11219820Sjeff * 12219820Sjeff * Redistribution and use in source and binary forms, with or 13219820Sjeff * without modification, are permitted provided that the following 14219820Sjeff * conditions are met: 15219820Sjeff * 16219820Sjeff * - Redistributions of source code must retain the above 17219820Sjeff * copyright notice, this list of conditions and the following 18219820Sjeff * disclaimer. 19219820Sjeff * 20219820Sjeff * - Redistributions in binary form must reproduce the above 21219820Sjeff * copyright notice, this list of conditions and the following 22219820Sjeff * disclaimer in the documentation and/or other materials 23219820Sjeff * provided with the distribution. 24219820Sjeff * 25219820Sjeff * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 26219820Sjeff * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 27219820Sjeff * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 28219820Sjeff * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 29219820Sjeff * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 30219820Sjeff * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 31219820Sjeff * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 32219820Sjeff * SOFTWARE. 33219820Sjeff * 34219820Sjeff */ 35219820Sjeff 36219820Sjeff/* 37219820Sjeff * Abstract: 38219820Sjeff * Implementation of osm_switch_t. 39219820Sjeff * This object represents an Infiniband switch. 40219820Sjeff * This object is part of the opensm family of objects. 41219820Sjeff */ 42219820Sjeff 43219820Sjeff#if HAVE_CONFIG_H 44219820Sjeff# include <config.h> 45219820Sjeff#endif /* HAVE_CONFIG_H */ 46219820Sjeff 47219820Sjeff#include <stdlib.h> 48219820Sjeff#include <string.h> 49219820Sjeff#include <complib/cl_math.h> 50219820Sjeff#include <iba/ib_types.h> 51219820Sjeff#include <opensm/osm_switch.h> 52219820Sjeff 53219820Sjeff/********************************************************************** 54219820Sjeff **********************************************************************/ 55219820Sjeffcl_status_t 56219820Sjeffosm_switch_set_hops(IN osm_switch_t * const p_sw, 57219820Sjeff IN const uint16_t lid_ho, 58219820Sjeff IN const uint8_t port_num, IN const uint8_t num_hops) 59219820Sjeff{ 60219820Sjeff if (lid_ho > p_sw->max_lid_ho) 61219820Sjeff return -1; 62219820Sjeff if (!p_sw->hops[lid_ho]) { 63219820Sjeff p_sw->hops[lid_ho] = malloc(p_sw->num_ports); 64219820Sjeff if (!p_sw->hops[lid_ho]) 65219820Sjeff return -1; 66219820Sjeff memset(p_sw->hops[lid_ho], OSM_NO_PATH, p_sw->num_ports); 67219820Sjeff } 68219820Sjeff 69219820Sjeff p_sw->hops[lid_ho][port_num] = num_hops; 70219820Sjeff if (p_sw->hops[lid_ho][0] > num_hops) 71219820Sjeff p_sw->hops[lid_ho][0] = num_hops; 72219820Sjeff 73219820Sjeff return 0; 74219820Sjeff} 75219820Sjeff 76219820Sjeff/********************************************************************** 77219820Sjeff **********************************************************************/ 78219820Sjeffstatic ib_api_status_t 79219820Sjeffosm_switch_init(IN osm_switch_t * const p_sw, 80219820Sjeff IN osm_node_t * const p_node, 81219820Sjeff IN const osm_madw_t * const p_madw) 82219820Sjeff{ 83219820Sjeff ib_api_status_t status = IB_SUCCESS; 84219820Sjeff ib_switch_info_t *p_si; 85219820Sjeff ib_smp_t *p_smp; 86219820Sjeff uint8_t num_ports; 87219820Sjeff uint32_t port_num; 88219820Sjeff 89219820Sjeff p_smp = osm_madw_get_smp_ptr(p_madw); 90219820Sjeff p_si = (ib_switch_info_t *) ib_smp_get_payload_ptr(p_smp); 91219820Sjeff num_ports = osm_node_get_num_physp(p_node); 92219820Sjeff 93219820Sjeff CL_ASSERT(p_smp->attr_id == IB_MAD_ATTR_SWITCH_INFO); 94219820Sjeff 95219820Sjeff p_sw->p_node = p_node; 96219820Sjeff p_sw->switch_info = *p_si; 97219820Sjeff p_sw->num_ports = num_ports; 98219820Sjeff p_sw->need_update = 2; 99219820Sjeff 100219820Sjeff /* Initiate the linear forwarding table */ 101219820Sjeff 102219820Sjeff if (!p_si->lin_cap) { 103219820Sjeff /* This switch does not support linear forwarding tables */ 104219820Sjeff status = IB_UNSUPPORTED; 105219820Sjeff goto Exit; 106219820Sjeff } 107219820Sjeff 108219820Sjeff p_sw->lft = malloc(IB_LID_UCAST_END_HO + 1); 109219820Sjeff if (!p_sw->lft) { 110219820Sjeff status = IB_INSUFFICIENT_MEMORY; 111219820Sjeff goto Exit; 112219820Sjeff } 113219820Sjeff 114219820Sjeff /* Initialize the table to OSM_NO_PATH, which is "invalid port" */ 115219820Sjeff memset(p_sw->lft, OSM_NO_PATH, IB_LID_UCAST_END_HO + 1); 116219820Sjeff 117219820Sjeff p_sw->p_prof = malloc(sizeof(*p_sw->p_prof) * num_ports); 118219820Sjeff if (p_sw->p_prof == NULL) { 119219820Sjeff status = IB_INSUFFICIENT_MEMORY; 120219820Sjeff goto Exit; 121219820Sjeff } 122219820Sjeff 123219820Sjeff memset(p_sw->p_prof, 0, sizeof(*p_sw->p_prof) * num_ports); 124219820Sjeff 125219820Sjeff status = osm_mcast_tbl_init(&p_sw->mcast_tbl, 126219820Sjeff osm_node_get_num_physp(p_node), 127219820Sjeff cl_ntoh16(p_si->mcast_cap)); 128219820Sjeff if (status != IB_SUCCESS) 129219820Sjeff goto Exit; 130219820Sjeff 131219820Sjeff for (port_num = 0; port_num < num_ports; port_num++) 132219820Sjeff osm_port_prof_construct(&p_sw->p_prof[port_num]); 133219820Sjeff 134219820SjeffExit: 135219820Sjeff return (status); 136219820Sjeff} 137219820Sjeff 138219820Sjeff/********************************************************************** 139219820Sjeff **********************************************************************/ 140219820Sjeffvoid osm_switch_delete(IN OUT osm_switch_t ** const pp_sw) 141219820Sjeff{ 142219820Sjeff osm_switch_t *p_sw = *pp_sw; 143219820Sjeff unsigned i; 144219820Sjeff 145219820Sjeff osm_mcast_tbl_destroy(&p_sw->mcast_tbl); 146219820Sjeff free(p_sw->p_prof); 147219820Sjeff if (p_sw->lft) 148219820Sjeff free(p_sw->lft); 149219820Sjeff if (p_sw->new_lft) 150219820Sjeff free(p_sw->new_lft); 151219820Sjeff if (p_sw->hops) { 152219820Sjeff for (i = 0; i < p_sw->num_hops; i++) 153219820Sjeff if (p_sw->hops[i]) 154219820Sjeff free(p_sw->hops[i]); 155219820Sjeff free(p_sw->hops); 156219820Sjeff } 157219820Sjeff free(*pp_sw); 158219820Sjeff *pp_sw = NULL; 159219820Sjeff} 160219820Sjeff 161219820Sjeff/********************************************************************** 162219820Sjeff **********************************************************************/ 163219820Sjeffosm_switch_t *osm_switch_new(IN osm_node_t * const p_node, 164219820Sjeff IN const osm_madw_t * const p_madw) 165219820Sjeff{ 166219820Sjeff ib_api_status_t status; 167219820Sjeff osm_switch_t *p_sw; 168219820Sjeff 169219820Sjeff CL_ASSERT(p_madw); 170219820Sjeff CL_ASSERT(p_node); 171219820Sjeff 172219820Sjeff p_sw = (osm_switch_t *) malloc(sizeof(*p_sw)); 173219820Sjeff if (p_sw) { 174219820Sjeff memset(p_sw, 0, sizeof(*p_sw)); 175219820Sjeff status = osm_switch_init(p_sw, p_node, p_madw); 176219820Sjeff if (status != IB_SUCCESS) 177219820Sjeff osm_switch_delete(&p_sw); 178219820Sjeff } 179219820Sjeff 180219820Sjeff return (p_sw); 181219820Sjeff} 182219820Sjeff 183219820Sjeff/********************************************************************** 184219820Sjeff **********************************************************************/ 185219820Sjeffboolean_t 186219820Sjeffosm_switch_get_lft_block(IN const osm_switch_t * const p_sw, 187219820Sjeff IN const uint16_t block_id, 188219820Sjeff OUT uint8_t * const p_block) 189219820Sjeff{ 190219820Sjeff uint16_t base_lid_ho = block_id * IB_SMP_DATA_SIZE; 191219820Sjeff 192219820Sjeff CL_ASSERT(p_sw); 193219820Sjeff CL_ASSERT(p_block); 194219820Sjeff 195219820Sjeff if (base_lid_ho > p_sw->max_lid_ho) 196219820Sjeff return FALSE; 197219820Sjeff 198219820Sjeff CL_ASSERT(base_lid_ho + IB_SMP_DATA_SIZE <= IB_LID_UCAST_END_HO); 199219820Sjeff memcpy(p_block, &(p_sw->lft[base_lid_ho]), IB_SMP_DATA_SIZE); 200219820Sjeff return TRUE; 201219820Sjeff} 202219820Sjeff 203219820Sjeff/********************************************************************** 204219820Sjeff **********************************************************************/ 205219820Sjeffstatic struct osm_remote_node * 206219820Sjeffosm_switch_find_guid_common(IN const osm_switch_t * const p_sw, 207219820Sjeff IN struct osm_remote_guids_count *r, 208219820Sjeff IN uint8_t port_num, 209219820Sjeff IN int find_sys_guid, 210219820Sjeff IN int find_node_guid) 211219820Sjeff{ 212219820Sjeff struct osm_remote_node *p_remote_guid = NULL; 213219820Sjeff osm_physp_t *p_physp; 214219820Sjeff osm_physp_t *p_rem_physp; 215219820Sjeff osm_node_t *p_rem_node; 216219820Sjeff uint64_t sys_guid; 217219820Sjeff uint64_t node_guid; 218219820Sjeff int i; 219219820Sjeff 220219820Sjeff CL_ASSERT(p_sw); 221219820Sjeff 222219820Sjeff p_physp = osm_node_get_physp_ptr(p_sw->p_node, port_num); 223219820Sjeff p_rem_physp = osm_physp_get_remote(p_physp); 224219820Sjeff p_rem_node = osm_physp_get_node_ptr(p_rem_physp); 225219820Sjeff sys_guid = p_rem_node->node_info.sys_guid; 226219820Sjeff node_guid = p_rem_node->node_info.node_guid; 227219820Sjeff 228219820Sjeff for (i = 0; i < r->count; i++) { 229219820Sjeff if ((!find_sys_guid 230219820Sjeff || r->guids[i].node->node_info.sys_guid == sys_guid) 231219820Sjeff && (!find_node_guid 232219820Sjeff || r->guids[i].node->node_info.node_guid == node_guid)) { 233219820Sjeff p_remote_guid = &r->guids[i]; 234219820Sjeff break; 235219820Sjeff } 236219820Sjeff } 237219820Sjeff 238219820Sjeff return p_remote_guid; 239219820Sjeff} 240219820Sjeff 241219820Sjeffstatic struct osm_remote_node * 242219820Sjeffosm_switch_find_sys_guid_count(IN const osm_switch_t * const p_sw, 243219820Sjeff IN struct osm_remote_guids_count *r, 244219820Sjeff IN uint8_t port_num) 245219820Sjeff{ 246219820Sjeff return osm_switch_find_guid_common(p_sw, r, port_num, 1, 0); 247219820Sjeff} 248219820Sjeff 249219820Sjeffstatic struct osm_remote_node * 250219820Sjeffosm_switch_find_node_guid_count(IN const osm_switch_t * const p_sw, 251219820Sjeff IN struct osm_remote_guids_count *r, 252219820Sjeff IN uint8_t port_num) 253219820Sjeff{ 254219820Sjeff return osm_switch_find_guid_common(p_sw, r, port_num, 0, 1); 255219820Sjeff} 256219820Sjeff 257219820Sjeff/********************************************************************** 258219820Sjeff **********************************************************************/ 259219820Sjeffuint8_t 260219820Sjeffosm_switch_recommend_path(IN const osm_switch_t * const p_sw, 261219820Sjeff IN osm_port_t * p_port, 262219820Sjeff IN const uint16_t lid_ho, 263219820Sjeff IN unsigned start_from, 264219820Sjeff IN const boolean_t ignore_existing, 265219820Sjeff IN const boolean_t dor) 266219820Sjeff{ 267219820Sjeff /* 268219820Sjeff We support an enhanced LMC aware routing mode: 269219820Sjeff In the case of LMC > 0, we can track the remote side 270219820Sjeff system and node for all of the lids of the target 271219820Sjeff and try and avoid routing again through the same 272219820Sjeff system / node. 273219820Sjeff 274219820Sjeff If this procedure is provided with the tracking array 275219820Sjeff and counter we can conduct this algorithm. 276219820Sjeff */ 277219820Sjeff boolean_t routing_for_lmc = (p_port->priv != NULL); 278219820Sjeff uint16_t base_lid; 279219820Sjeff uint8_t hops; 280219820Sjeff uint8_t least_hops; 281219820Sjeff uint8_t port_num; 282219820Sjeff uint8_t num_ports; 283219820Sjeff uint32_t least_paths = 0xFFFFFFFF; 284219820Sjeff unsigned i; 285219820Sjeff /* 286219820Sjeff The follwing will track the least paths if the 287219820Sjeff route should go through a new system/node 288219820Sjeff */ 289219820Sjeff uint32_t least_paths_other_sys = 0xFFFFFFFF; 290219820Sjeff uint32_t least_paths_other_nodes = 0xFFFFFFFF; 291219820Sjeff uint32_t least_forwarded_to = 0xFFFFFFFF; 292219820Sjeff uint32_t check_count; 293219820Sjeff uint8_t best_port = 0; 294219820Sjeff /* 295219820Sjeff These vars track the best port if it connects to 296219820Sjeff not used system/node. 297219820Sjeff */ 298219820Sjeff uint8_t best_port_other_sys = 0; 299219820Sjeff uint8_t best_port_other_node = 0; 300219820Sjeff boolean_t port_found = FALSE; 301219820Sjeff osm_physp_t *p_physp; 302219820Sjeff osm_physp_t *p_rem_physp; 303219820Sjeff osm_node_t *p_rem_node; 304219820Sjeff osm_node_t *p_rem_node_first = NULL; 305219820Sjeff struct osm_remote_node *p_remote_guid = NULL; 306219820Sjeff 307219820Sjeff CL_ASSERT(lid_ho > 0); 308219820Sjeff 309219820Sjeff if (p_port->p_node->sw) { 310219820Sjeff if (p_port->p_node->sw == p_sw) 311219820Sjeff return 0; 312219820Sjeff base_lid = osm_port_get_base_lid(p_port); 313219820Sjeff } else { 314219820Sjeff p_physp = p_port->p_physp; 315219820Sjeff if (!p_physp || !p_physp->p_remote_physp || 316219820Sjeff !p_physp->p_remote_physp->p_node->sw) 317219820Sjeff return OSM_NO_PATH; 318219820Sjeff 319219820Sjeff if (p_physp->p_remote_physp->p_node->sw == p_sw) 320219820Sjeff return p_physp->p_remote_physp->port_num; 321219820Sjeff base_lid = 322219820Sjeff osm_node_get_base_lid(p_physp->p_remote_physp->p_node, 0); 323219820Sjeff } 324219820Sjeff base_lid = cl_ntoh16(base_lid); 325219820Sjeff 326219820Sjeff num_ports = p_sw->num_ports; 327219820Sjeff 328219820Sjeff least_hops = osm_switch_get_least_hops(p_sw, base_lid); 329219820Sjeff if (least_hops == OSM_NO_PATH) 330219820Sjeff return (OSM_NO_PATH); 331219820Sjeff 332219820Sjeff /* 333219820Sjeff First, inquire with the forwarding table for an existing 334219820Sjeff route. If one is found, honor it unless: 335219820Sjeff 1. the ignore existing flag is set. 336219820Sjeff 2. the physical port is not a valid one or not healthy 337219820Sjeff 3. the physical port has a remote port (the link is up) 338219820Sjeff 4. the port has min-hops to the target (avoid loops) 339219820Sjeff */ 340219820Sjeff if (!ignore_existing) { 341219820Sjeff port_num = osm_switch_get_port_by_lid(p_sw, lid_ho); 342219820Sjeff 343219820Sjeff if (port_num != OSM_NO_PATH) { 344219820Sjeff CL_ASSERT(port_num < num_ports); 345219820Sjeff 346219820Sjeff p_physp = 347219820Sjeff osm_node_get_physp_ptr(p_sw->p_node, port_num); 348219820Sjeff /* 349219820Sjeff Don't be too trusting of the current forwarding table! 350219820Sjeff Verify that the port number is legal and that the 351219820Sjeff LID is reachable through this port. 352219820Sjeff */ 353219820Sjeff if (p_physp && osm_physp_is_healthy(p_physp) && 354219820Sjeff osm_physp_get_remote(p_physp)) { 355219820Sjeff hops = 356219820Sjeff osm_switch_get_hop_count(p_sw, base_lid, 357219820Sjeff port_num); 358219820Sjeff /* 359219820Sjeff If we aren't using pre-defined user routes 360219820Sjeff function, then we need to make sure that the 361219820Sjeff current path is the minimum one. In case of 362219820Sjeff having such a user function - this check will 363219820Sjeff not be done, and the old routing will be used. 364219820Sjeff Note: This means that it is the user's job to 365219820Sjeff clean all data in the forwarding tables that 366219820Sjeff he wants to be overridden by the minimum 367219820Sjeff hop function. 368219820Sjeff */ 369219820Sjeff if (hops == least_hops) 370219820Sjeff return (port_num); 371219820Sjeff } 372219820Sjeff } 373219820Sjeff } 374219820Sjeff 375219820Sjeff /* 376219820Sjeff This algorithm selects a port based on a static load balanced 377219820Sjeff selection across equal hop-count ports. 378219820Sjeff There is lots of room for improved sophistication here, 379219820Sjeff possibly guided by user configuration info. 380219820Sjeff */ 381219820Sjeff 382219820Sjeff /* 383219820Sjeff OpenSM routing is "local" - not considering a full lid to lid 384219820Sjeff path. As such we can not guarantee a path will not loop if we 385219820Sjeff do not always follow least hops. 386219820Sjeff So we must abort if not least hops. 387219820Sjeff */ 388219820Sjeff 389219820Sjeff /* port number starts with one and num_ports is 1 + num phys ports */ 390219820Sjeff for (i = start_from; i < start_from + num_ports; i++) { 391219820Sjeff port_num = i%num_ports; 392219820Sjeff if (!port_num || 393219820Sjeff osm_switch_get_hop_count(p_sw, base_lid, port_num) != 394219820Sjeff least_hops) 395219820Sjeff continue; 396219820Sjeff 397219820Sjeff /* let us make sure it is not down or unhealthy */ 398219820Sjeff p_physp = osm_node_get_physp_ptr(p_sw->p_node, port_num); 399219820Sjeff if (!p_physp || !osm_physp_is_healthy(p_physp) || 400219820Sjeff /* 401219820Sjeff we require all - non sma ports to be linked 402219820Sjeff to be routed through 403219820Sjeff */ 404219820Sjeff !osm_physp_get_remote(p_physp)) 405219820Sjeff continue; 406219820Sjeff 407219820Sjeff /* 408219820Sjeff We located a least-hop port, possibly one of many. 409219820Sjeff For this port, check the running total count of 410219820Sjeff the number of paths through this port. Select 411219820Sjeff the port routing the least number of paths. 412219820Sjeff */ 413219820Sjeff check_count = 414219820Sjeff osm_port_prof_path_count_get(&p_sw->p_prof[port_num]); 415219820Sjeff 416219820Sjeff /* 417219820Sjeff Advanced LMC routing requires tracking of the 418219820Sjeff best port by the node connected to the other side of 419219820Sjeff it. 420219820Sjeff */ 421219820Sjeff if (routing_for_lmc) { 422219820Sjeff /* Is the sys guid already used ? */ 423219820Sjeff p_remote_guid = osm_switch_find_sys_guid_count(p_sw, 424219820Sjeff p_port->priv, 425219820Sjeff port_num); 426219820Sjeff 427219820Sjeff /* If not update the least hops for this case */ 428219820Sjeff if (!p_remote_guid) { 429219820Sjeff if (check_count < least_paths_other_sys) { 430219820Sjeff least_paths_other_sys = check_count; 431219820Sjeff best_port_other_sys = port_num; 432219820Sjeff least_forwarded_to = 0; 433219820Sjeff } 434219820Sjeff } else { /* same sys found - try node */ 435219820Sjeff /* Else is the node guid already used ? */ 436219820Sjeff p_remote_guid = osm_switch_find_node_guid_count(p_sw, 437219820Sjeff p_port->priv, 438219820Sjeff port_num); 439219820Sjeff 440219820Sjeff /* If not update the least hops for this case */ 441219820Sjeff if (!p_remote_guid 442219820Sjeff && check_count < least_paths_other_nodes) { 443219820Sjeff least_paths_other_nodes = check_count; 444219820Sjeff best_port_other_node = port_num; 445219820Sjeff least_forwarded_to = 0; 446219820Sjeff } 447219820Sjeff /* else prior sys and node guid already used */ 448219820Sjeff 449219820Sjeff } /* same sys found */ 450219820Sjeff } 451219820Sjeff 452219820Sjeff /* routing for LMC mode */ 453219820Sjeff /* 454219820Sjeff the count is min but also lower then the max subscribed 455219820Sjeff */ 456219820Sjeff if (check_count < least_paths) { 457219820Sjeff if (dor) { 458219820Sjeff /* Get the Remote Node */ 459219820Sjeff p_rem_physp = osm_physp_get_remote(p_physp); 460219820Sjeff p_rem_node = 461219820Sjeff osm_physp_get_node_ptr(p_rem_physp); 462219820Sjeff /* use the first dimension, but spread 463219820Sjeff * traffic out among the group of ports 464219820Sjeff * representing that dimension */ 465219820Sjeff if (port_found) { 466219820Sjeff if (p_rem_node != p_rem_node_first) 467219820Sjeff continue; 468219820Sjeff } else 469219820Sjeff p_rem_node_first = p_rem_node; 470219820Sjeff } 471219820Sjeff port_found = TRUE; 472219820Sjeff best_port = port_num; 473219820Sjeff least_paths = check_count; 474219820Sjeff if (routing_for_lmc 475219820Sjeff && p_remote_guid 476219820Sjeff && p_remote_guid->forwarded_to < least_forwarded_to) 477219820Sjeff least_forwarded_to = p_remote_guid->forwarded_to; 478219820Sjeff } else if (routing_for_lmc 479219820Sjeff && p_remote_guid 480219820Sjeff && check_count == least_paths 481219820Sjeff && p_remote_guid->forwarded_to < least_forwarded_to) { 482219820Sjeff least_forwarded_to = p_remote_guid->forwarded_to; 483219820Sjeff best_port = port_num; 484219820Sjeff } 485219820Sjeff } 486219820Sjeff 487219820Sjeff if (port_found == FALSE) 488219820Sjeff return (OSM_NO_PATH); 489219820Sjeff 490219820Sjeff /* 491219820Sjeff if we are in enhanced routing mode and the best port is not 492219820Sjeff the local port 0 493219820Sjeff */ 494219820Sjeff if (routing_for_lmc && best_port) { 495219820Sjeff /* Select the least hop port of the non used sys first */ 496219820Sjeff if (best_port_other_sys) 497219820Sjeff best_port = best_port_other_sys; 498219820Sjeff else if (best_port_other_node) 499219820Sjeff best_port = best_port_other_node; 500219820Sjeff } 501219820Sjeff 502219820Sjeff return (best_port); 503219820Sjeff} 504219820Sjeff 505219820Sjeff/********************************************************************** 506219820Sjeff **********************************************************************/ 507219820Sjeffvoid osm_switch_clear_hops(IN osm_switch_t * p_sw) 508219820Sjeff{ 509219820Sjeff unsigned i; 510219820Sjeff 511219820Sjeff for (i = 0; i < p_sw->num_hops; i++) 512219820Sjeff if (p_sw->hops[i]) 513219820Sjeff memset(p_sw->hops[i], OSM_NO_PATH, p_sw->num_ports); 514219820Sjeff} 515219820Sjeff 516219820Sjeff/********************************************************************** 517219820Sjeff **********************************************************************/ 518219820Sjeffint 519219820Sjeffosm_switch_prepare_path_rebuild(IN osm_switch_t * p_sw, IN uint16_t max_lids) 520219820Sjeff{ 521219820Sjeff uint8_t **hops; 522219820Sjeff unsigned i; 523219820Sjeff 524219820Sjeff for (i = 0; i < p_sw->num_ports; i++) 525219820Sjeff osm_port_prof_construct(&p_sw->p_prof[i]); 526219820Sjeff 527219820Sjeff osm_switch_clear_hops(p_sw); 528219820Sjeff 529219820Sjeff if (!p_sw->new_lft && 530219820Sjeff !(p_sw->new_lft = malloc(IB_LID_UCAST_END_HO + 1))) 531219820Sjeff return IB_INSUFFICIENT_MEMORY; 532219820Sjeff 533219820Sjeff memset(p_sw->new_lft, OSM_NO_PATH, IB_LID_UCAST_END_HO + 1); 534219820Sjeff 535219820Sjeff if (!p_sw->hops) { 536219820Sjeff hops = malloc((max_lids + 1) * sizeof(hops[0])); 537219820Sjeff if (!hops) 538219820Sjeff return -1; 539219820Sjeff memset(hops, 0, (max_lids + 1) * sizeof(hops[0])); 540219820Sjeff p_sw->hops = hops; 541219820Sjeff p_sw->num_hops = max_lids + 1; 542219820Sjeff } else if (max_lids + 1 > p_sw->num_hops) { 543219820Sjeff uint8_t **old_hops; 544219820Sjeff 545219820Sjeff hops = malloc((max_lids + 1) * sizeof(hops[0])); 546219820Sjeff if (!hops) 547219820Sjeff return -1; 548219820Sjeff memcpy(hops, p_sw->hops, p_sw->num_hops * sizeof(hops[0])); 549219820Sjeff memset(hops + p_sw->num_hops, 0, 550219820Sjeff (max_lids + 1 - p_sw->num_hops) * sizeof(hops[0])); 551219820Sjeff old_hops = p_sw->hops; 552219820Sjeff p_sw->hops = hops; 553219820Sjeff p_sw->num_hops = max_lids + 1; 554219820Sjeff free(old_hops); 555219820Sjeff } 556219820Sjeff p_sw->max_lid_ho = max_lids; 557219820Sjeff 558219820Sjeff return 0; 559219820Sjeff} 560219820Sjeff 561219820Sjeff/********************************************************************** 562219820Sjeff **********************************************************************/ 563219820Sjeffuint8_t 564219820Sjeffosm_switch_get_port_least_hops(IN const osm_switch_t * const p_sw, 565219820Sjeff IN const osm_port_t * p_port) 566219820Sjeff{ 567219820Sjeff uint16_t lid; 568219820Sjeff 569219820Sjeff if (p_port->p_node->sw) { 570219820Sjeff if (p_port->p_node->sw == p_sw) 571219820Sjeff return 0; 572219820Sjeff lid = osm_node_get_base_lid(p_port->p_node, 0); 573219820Sjeff return osm_switch_get_least_hops(p_sw, cl_ntoh16(lid)); 574219820Sjeff } else { 575219820Sjeff osm_physp_t *p = p_port->p_physp; 576219820Sjeff uint8_t hops; 577219820Sjeff 578219820Sjeff if (!p || !p->p_remote_physp || !p->p_remote_physp->p_node->sw) 579219820Sjeff return OSM_NO_PATH; 580219820Sjeff if (p->p_remote_physp->p_node->sw == p_sw) 581219820Sjeff return 1; 582219820Sjeff lid = osm_node_get_base_lid(p->p_remote_physp->p_node, 0); 583219820Sjeff hops = osm_switch_get_least_hops(p_sw, cl_ntoh16(lid)); 584219820Sjeff return hops != OSM_NO_PATH ? hops + 1 : OSM_NO_PATH; 585219820Sjeff } 586219820Sjeff} 587219820Sjeff 588219820Sjeff/********************************************************************** 589219820Sjeff **********************************************************************/ 590219820Sjeffuint8_t 591219820Sjeffosm_switch_recommend_mcast_path(IN osm_switch_t * const p_sw, 592219820Sjeff IN osm_port_t * p_port, 593219820Sjeff IN uint16_t const mlid_ho, 594219820Sjeff IN boolean_t const ignore_existing) 595219820Sjeff{ 596219820Sjeff uint16_t base_lid; 597219820Sjeff uint8_t hops; 598219820Sjeff uint8_t port_num; 599219820Sjeff uint8_t num_ports; 600219820Sjeff uint8_t least_hops; 601219820Sjeff 602219820Sjeff CL_ASSERT(mlid_ho >= IB_LID_MCAST_START_HO); 603219820Sjeff 604219820Sjeff if (p_port->p_node->sw) { 605219820Sjeff if (p_port->p_node->sw == p_sw) 606219820Sjeff return 0; 607219820Sjeff base_lid = osm_port_get_base_lid(p_port); 608219820Sjeff } else { 609219820Sjeff osm_physp_t *p_physp = p_port->p_physp; 610219820Sjeff if (!p_physp || !p_physp->p_remote_physp || 611219820Sjeff !p_physp->p_remote_physp->p_node->sw) 612219820Sjeff return OSM_NO_PATH; 613219820Sjeff if (p_physp->p_remote_physp->p_node->sw == p_sw) 614219820Sjeff return p_physp->p_remote_physp->port_num; 615219820Sjeff base_lid = 616219820Sjeff osm_node_get_base_lid(p_physp->p_remote_physp->p_node, 0); 617219820Sjeff } 618219820Sjeff base_lid = cl_ntoh16(base_lid); 619219820Sjeff num_ports = p_sw->num_ports; 620219820Sjeff 621219820Sjeff /* 622219820Sjeff If the user wants us to ignore existing multicast routes, 623219820Sjeff then simply return the shortest hop count path to the 624219820Sjeff target port. 625219820Sjeff 626219820Sjeff Otherwise, return the first port that has a path to the target, 627219820Sjeff picking from the ports that are already in the multicast group. 628219820Sjeff */ 629219820Sjeff if (!ignore_existing) { 630219820Sjeff for (port_num = 1; port_num < num_ports; port_num++) { 631219820Sjeff if (!osm_mcast_tbl_is_port 632219820Sjeff (&p_sw->mcast_tbl, mlid_ho, port_num)) 633219820Sjeff continue; 634219820Sjeff /* 635219820Sjeff Don't be too trusting of the current forwarding table! 636219820Sjeff Verify that the LID is reachable through this port. 637219820Sjeff */ 638219820Sjeff hops = 639219820Sjeff osm_switch_get_hop_count(p_sw, base_lid, port_num); 640219820Sjeff if (hops != OSM_NO_PATH) 641219820Sjeff return (port_num); 642219820Sjeff } 643219820Sjeff } 644219820Sjeff 645219820Sjeff /* 646219820Sjeff Either no existing mcast paths reach this port or we are 647219820Sjeff ignoring existing paths. 648219820Sjeff 649219820Sjeff Determine the best multicast path to the target. Note that this 650219820Sjeff algorithm is slightly different from the one used for unicast route 651219820Sjeff recommendation. In this case (multicast), we must NOT 652219820Sjeff perform any sort of load balancing. We MUST take the FIRST 653219820Sjeff port found that has <= the lowest hop count path. This prevents 654219820Sjeff more than one multicast path to the same remote switch which 655219820Sjeff prevents a multicast loop. Multicast loops are bad since the same 656219820Sjeff multicast packet will go around and around, inevitably creating 657219820Sjeff a black hole that will destroy the Earth in a firey conflagration. 658219820Sjeff */ 659219820Sjeff least_hops = osm_switch_get_least_hops(p_sw, base_lid); 660219820Sjeff for (port_num = 1; port_num < num_ports; port_num++) 661219820Sjeff if (osm_switch_get_hop_count(p_sw, base_lid, port_num) == 662219820Sjeff least_hops) 663219820Sjeff break; 664219820Sjeff 665219820Sjeff CL_ASSERT(port_num < num_ports); 666219820Sjeff return (port_num); 667219820Sjeff} 668