1219820Sjeff/* 2219820Sjeff * Copyright (c) 2004-2008 Voltaire, Inc. All rights reserved. 3219820Sjeff * Copyright (c) 2002-2006 Mellanox Technologies LTD. All rights reserved. 4219820Sjeff * Copyright (c) 1996-2003 Intel Corporation. All rights reserved. 5219820Sjeff * Copyright (c) 2008 Xsigo Systems Inc. All rights reserved. 6219820Sjeff * 7219820Sjeff * This software is available to you under a choice of one of two 8219820Sjeff * licenses. You may choose to be licensed under the terms of the GNU 9219820Sjeff * General Public License (GPL) Version 2, available from the file 10219820Sjeff * COPYING in the main directory of this source tree, or the 11219820Sjeff * OpenIB.org BSD license below: 12219820Sjeff * 13219820Sjeff * Redistribution and use in source and binary forms, with or 14219820Sjeff * without modification, are permitted provided that the following 15219820Sjeff * conditions are met: 16219820Sjeff * 17219820Sjeff * - Redistributions of source code must retain the above 18219820Sjeff * copyright notice, this list of conditions and the following 19219820Sjeff * disclaimer. 20219820Sjeff * 21219820Sjeff * - Redistributions in binary form must reproduce the above 22219820Sjeff * copyright notice, this list of conditions and the following 23219820Sjeff * disclaimer in the documentation and/or other materials 24219820Sjeff * provided with the distribution. 25219820Sjeff * 26219820Sjeff * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 27219820Sjeff * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 28219820Sjeff * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 29219820Sjeff * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 30219820Sjeff * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 31219820Sjeff * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 32219820Sjeff * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 33219820Sjeff * SOFTWARE. 34219820Sjeff * 35219820Sjeff */ 36219820Sjeff 37219820Sjeff/* 38219820Sjeff * Abstract: 39219820Sjeff * Implementation of osm_mcast_mgr_t. 40219820Sjeff * This file implements the Multicast Manager object. 41219820Sjeff */ 42219820Sjeff 43219820Sjeff#if HAVE_CONFIG_H 44219820Sjeff# include <config.h> 45219820Sjeff#endif /* HAVE_CONFIG_H */ 46219820Sjeff 47219820Sjeff#include <stdlib.h> 48219820Sjeff#include <string.h> 49219820Sjeff#include <iba/ib_types.h> 50219820Sjeff#include <complib/cl_debug.h> 51219820Sjeff#include <opensm/osm_opensm.h> 52219820Sjeff#include <opensm/osm_sm.h> 53219820Sjeff#include <opensm/osm_multicast.h> 54219820Sjeff#include <opensm/osm_node.h> 55219820Sjeff#include <opensm/osm_switch.h> 56219820Sjeff#include <opensm/osm_helper.h> 57219820Sjeff#include <opensm/osm_msgdef.h> 58219820Sjeff 59219820Sjeff/********************************************************************** 60219820Sjeff **********************************************************************/ 61219820Sjefftypedef struct osm_mcast_work_obj { 62219820Sjeff cl_list_item_t list_item; 63219820Sjeff osm_port_t *p_port; 64219820Sjeff} osm_mcast_work_obj_t; 65219820Sjeff 66219820Sjeff/********************************************************************** 67219820Sjeff **********************************************************************/ 68219820Sjeffstatic osm_mcast_work_obj_t *__osm_mcast_work_obj_new(IN const osm_port_t * 69219820Sjeff const p_port) 70219820Sjeff{ 71219820Sjeff /* 72219820Sjeff TO DO - get these objects from a lockpool. 73219820Sjeff */ 74219820Sjeff osm_mcast_work_obj_t *p_obj; 75219820Sjeff 76219820Sjeff /* 77219820Sjeff clean allocated memory to avoid assertion when trying to insert to 78219820Sjeff qlist. 79219820Sjeff see cl_qlist_insert_tail(): CL_ASSERT(p_list_item->p_list != p_list) 80219820Sjeff */ 81219820Sjeff p_obj = malloc(sizeof(*p_obj)); 82219820Sjeff if (p_obj) { 83219820Sjeff memset(p_obj, 0, sizeof(*p_obj)); 84219820Sjeff p_obj->p_port = (osm_port_t *) p_port; 85219820Sjeff } 86219820Sjeff 87219820Sjeff return (p_obj); 88219820Sjeff} 89219820Sjeff 90219820Sjeff/********************************************************************** 91219820Sjeff **********************************************************************/ 92219820Sjeffstatic void __osm_mcast_work_obj_delete(IN osm_mcast_work_obj_t * p_wobj) 93219820Sjeff{ 94219820Sjeff free(p_wobj); 95219820Sjeff} 96219820Sjeff 97219820Sjeff/********************************************************************** 98219820Sjeff Recursively remove nodes from the tree 99219820Sjeff *********************************************************************/ 100219820Sjeffstatic void __osm_mcast_mgr_purge_tree_node(IN osm_mtree_node_t * p_mtn) 101219820Sjeff{ 102219820Sjeff uint8_t i; 103219820Sjeff 104219820Sjeff for (i = 0; i < p_mtn->max_children; i++) { 105219820Sjeff if (p_mtn->child_array[i] && 106219820Sjeff (p_mtn->child_array[i] != OSM_MTREE_LEAF)) 107219820Sjeff __osm_mcast_mgr_purge_tree_node(p_mtn->child_array[i]); 108219820Sjeff 109219820Sjeff p_mtn->child_array[i] = NULL; 110219820Sjeff 111219820Sjeff } 112219820Sjeff 113219820Sjeff free(p_mtn); 114219820Sjeff} 115219820Sjeff 116219820Sjeff/********************************************************************** 117219820Sjeff **********************************************************************/ 118219820Sjeffstatic void 119219820Sjeff__osm_mcast_mgr_purge_tree(osm_sm_t * sm, IN osm_mgrp_t * const p_mgrp) 120219820Sjeff{ 121219820Sjeff OSM_LOG_ENTER(sm->p_log); 122219820Sjeff 123219820Sjeff if (p_mgrp->p_root) 124219820Sjeff __osm_mcast_mgr_purge_tree_node(p_mgrp->p_root); 125219820Sjeff 126219820Sjeff p_mgrp->p_root = NULL; 127219820Sjeff 128219820Sjeff OSM_LOG_EXIT(sm->p_log); 129219820Sjeff} 130219820Sjeff 131219820Sjeff/********************************************************************** 132219820Sjeff **********************************************************************/ 133219820Sjeffstatic float 134219820Sjeffosm_mcast_mgr_compute_avg_hops(osm_sm_t * sm, 135219820Sjeff const osm_mgrp_t * const p_mgrp, 136219820Sjeff const osm_switch_t * const p_sw) 137219820Sjeff{ 138219820Sjeff float avg_hops = 0; 139219820Sjeff uint32_t hops = 0; 140219820Sjeff uint32_t num_ports = 0; 141219820Sjeff const osm_port_t *p_port; 142219820Sjeff const osm_mcm_port_t *p_mcm_port; 143219820Sjeff const cl_qmap_t *p_mcm_tbl; 144219820Sjeff 145219820Sjeff OSM_LOG_ENTER(sm->p_log); 146219820Sjeff 147219820Sjeff p_mcm_tbl = &p_mgrp->mcm_port_tbl; 148219820Sjeff 149219820Sjeff /* 150219820Sjeff For each member of the multicast group, compute the 151219820Sjeff number of hops to its base LID. 152219820Sjeff */ 153219820Sjeff for (p_mcm_port = (osm_mcm_port_t *) cl_qmap_head(p_mcm_tbl); 154219820Sjeff p_mcm_port != (osm_mcm_port_t *) cl_qmap_end(p_mcm_tbl); 155219820Sjeff p_mcm_port = 156219820Sjeff (osm_mcm_port_t *) cl_qmap_next(&p_mcm_port->map_item)) { 157219820Sjeff /* 158219820Sjeff Acquire the port object for this port guid, then create 159219820Sjeff the new worker object to build the list. 160219820Sjeff */ 161219820Sjeff p_port = osm_get_port_by_guid(sm->p_subn, 162219820Sjeff ib_gid_get_guid(&p_mcm_port-> 163219820Sjeff port_gid)); 164219820Sjeff 165219820Sjeff if (!p_port) { 166219820Sjeff OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0A18: " 167219820Sjeff "No port object for port 0x%016" PRIx64 "\n", 168219820Sjeff cl_ntoh64(ib_gid_get_guid 169219820Sjeff (&p_mcm_port->port_gid))); 170219820Sjeff continue; 171219820Sjeff } 172219820Sjeff 173219820Sjeff hops += osm_switch_get_port_least_hops(p_sw, p_port); 174219820Sjeff num_ports++; 175219820Sjeff } 176219820Sjeff 177219820Sjeff /* 178219820Sjeff We should be here if there aren't any ports in the group. 179219820Sjeff */ 180219820Sjeff CL_ASSERT(num_ports); 181219820Sjeff 182219820Sjeff if (num_ports != 0) 183219820Sjeff avg_hops = (float)(hops / num_ports); 184219820Sjeff 185219820Sjeff OSM_LOG_EXIT(sm->p_log); 186219820Sjeff return (avg_hops); 187219820Sjeff} 188219820Sjeff 189219820Sjeff/********************************************************************** 190219820Sjeff Calculate the maximal "min hops" from the given switch to any 191219820Sjeff of the group HCAs 192219820Sjeff **********************************************************************/ 193219820Sjeffstatic float 194219820Sjeffosm_mcast_mgr_compute_max_hops(osm_sm_t * sm, 195219820Sjeff const osm_mgrp_t * const p_mgrp, 196219820Sjeff const osm_switch_t * const p_sw) 197219820Sjeff{ 198219820Sjeff uint32_t max_hops = 0; 199219820Sjeff uint32_t hops = 0; 200219820Sjeff const osm_port_t *p_port; 201219820Sjeff const osm_mcm_port_t *p_mcm_port; 202219820Sjeff const cl_qmap_t *p_mcm_tbl; 203219820Sjeff 204219820Sjeff OSM_LOG_ENTER(sm->p_log); 205219820Sjeff 206219820Sjeff p_mcm_tbl = &p_mgrp->mcm_port_tbl; 207219820Sjeff 208219820Sjeff /* 209219820Sjeff For each member of the multicast group, compute the 210219820Sjeff number of hops to its base LID. 211219820Sjeff */ 212219820Sjeff for (p_mcm_port = (osm_mcm_port_t *) cl_qmap_head(p_mcm_tbl); 213219820Sjeff p_mcm_port != (osm_mcm_port_t *) cl_qmap_end(p_mcm_tbl); 214219820Sjeff p_mcm_port = 215219820Sjeff (osm_mcm_port_t *) cl_qmap_next(&p_mcm_port->map_item)) { 216219820Sjeff /* 217219820Sjeff Acquire the port object for this port guid, then create 218219820Sjeff the new worker object to build the list. 219219820Sjeff */ 220219820Sjeff p_port = osm_get_port_by_guid(sm->p_subn, 221219820Sjeff ib_gid_get_guid(&p_mcm_port-> 222219820Sjeff port_gid)); 223219820Sjeff 224219820Sjeff if (!p_port) { 225219820Sjeff OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0A1A: " 226219820Sjeff "No port object for port 0x%016" PRIx64 "\n", 227219820Sjeff cl_ntoh64(ib_gid_get_guid 228219820Sjeff (&p_mcm_port->port_gid))); 229219820Sjeff continue; 230219820Sjeff } 231219820Sjeff 232219820Sjeff hops = osm_switch_get_port_least_hops(p_sw, p_port); 233219820Sjeff if (hops > max_hops) 234219820Sjeff max_hops = hops; 235219820Sjeff } 236219820Sjeff 237219820Sjeff if (max_hops == 0) { 238219820Sjeff /* 239219820Sjeff We should be here if there aren't any ports in the group. 240219820Sjeff */ 241219820Sjeff max_hops = 10001; /* see later - we use it to realize no hops */ 242219820Sjeff } 243219820Sjeff 244219820Sjeff OSM_LOG_EXIT(sm->p_log); 245219820Sjeff return (float)(max_hops); 246219820Sjeff} 247219820Sjeff 248219820Sjeff/********************************************************************** 249219820Sjeff This function attempts to locate the optimal switch for the 250219820Sjeff center of the spanning tree. The current algorithm chooses 251219820Sjeff a switch with the lowest average hop count to the members 252219820Sjeff of the multicast group. 253219820Sjeff**********************************************************************/ 254219820Sjeffstatic osm_switch_t *__osm_mcast_mgr_find_optimal_switch(osm_sm_t * sm, 255219820Sjeff const osm_mgrp_t * 256219820Sjeff const p_mgrp) 257219820Sjeff{ 258219820Sjeff cl_qmap_t *p_sw_tbl; 259219820Sjeff const osm_switch_t *p_sw; 260219820Sjeff const osm_switch_t *p_best_sw = NULL; 261219820Sjeff float hops = 0; 262219820Sjeff float best_hops = 10000; /* any big # will do */ 263219820Sjeff#ifdef OSM_VENDOR_INTF_ANAFA 264219820Sjeff boolean_t use_avg_hops = TRUE; /* anafa2 - bug hca on switch *//* use max hops for root */ 265219820Sjeff#else 266219820Sjeff boolean_t use_avg_hops = FALSE; /* use max hops for root */ 267219820Sjeff#endif 268219820Sjeff 269219820Sjeff OSM_LOG_ENTER(sm->p_log); 270219820Sjeff 271219820Sjeff p_sw_tbl = &sm->p_subn->sw_guid_tbl; 272219820Sjeff 273219820Sjeff CL_ASSERT(!osm_mgrp_is_empty(p_mgrp)); 274219820Sjeff 275219820Sjeff for (p_sw = (osm_switch_t *) cl_qmap_head(p_sw_tbl); 276219820Sjeff p_sw != (osm_switch_t *) cl_qmap_end(p_sw_tbl); 277219820Sjeff p_sw = (osm_switch_t *) cl_qmap_next(&p_sw->map_item)) { 278219820Sjeff if (!osm_switch_supports_mcast(p_sw)) 279219820Sjeff continue; 280219820Sjeff 281219820Sjeff if (use_avg_hops) 282219820Sjeff hops = osm_mcast_mgr_compute_avg_hops(sm, p_mgrp, p_sw); 283219820Sjeff else 284219820Sjeff hops = osm_mcast_mgr_compute_max_hops(sm, p_mgrp, p_sw); 285219820Sjeff 286219820Sjeff OSM_LOG(sm->p_log, OSM_LOG_DEBUG, 287219820Sjeff "Switch 0x%016" PRIx64 ", hops = %f\n", 288219820Sjeff cl_ntoh64(osm_node_get_node_guid(p_sw->p_node)), hops); 289219820Sjeff 290219820Sjeff if (hops < best_hops) { 291219820Sjeff p_best_sw = p_sw; 292219820Sjeff best_hops = hops; 293219820Sjeff } 294219820Sjeff } 295219820Sjeff 296219820Sjeff if (p_best_sw) 297219820Sjeff OSM_LOG(sm->p_log, OSM_LOG_VERBOSE, 298219820Sjeff "Best switch is 0x%" PRIx64 ", hops = %f\n", 299219820Sjeff cl_ntoh64(osm_node_get_node_guid(p_best_sw->p_node)), 300219820Sjeff best_hops); 301219820Sjeff else 302219820Sjeff OSM_LOG(sm->p_log, OSM_LOG_VERBOSE, 303219820Sjeff "No multicast capable switches detected\n"); 304219820Sjeff 305219820Sjeff OSM_LOG_EXIT(sm->p_log); 306219820Sjeff return ((osm_switch_t *) p_best_sw); 307219820Sjeff} 308219820Sjeff 309219820Sjeff/********************************************************************** 310219820Sjeff This function returns the existing or optimal root swtich for the tree. 311219820Sjeff**********************************************************************/ 312219820Sjeffstatic osm_switch_t *__osm_mcast_mgr_find_root_switch(osm_sm_t * sm, 313219820Sjeff const osm_mgrp_t * 314219820Sjeff const p_mgrp) 315219820Sjeff{ 316219820Sjeff const osm_switch_t *p_sw = NULL; 317219820Sjeff 318219820Sjeff OSM_LOG_ENTER(sm->p_log); 319219820Sjeff 320219820Sjeff /* 321219820Sjeff We always look for the best multicast tree root switch. 322219820Sjeff Otherwise since we always start with a a single join 323219820Sjeff the root will be always on the first switch attached to it. 324219820Sjeff - Very bad ... 325219820Sjeff */ 326219820Sjeff p_sw = __osm_mcast_mgr_find_optimal_switch(sm, p_mgrp); 327219820Sjeff 328219820Sjeff OSM_LOG_EXIT(sm->p_log); 329219820Sjeff return ((osm_switch_t *) p_sw); 330219820Sjeff} 331219820Sjeff 332219820Sjeff/********************************************************************** 333219820Sjeff **********************************************************************/ 334219820Sjeffstatic osm_signal_t 335219820Sjeff__osm_mcast_mgr_set_tbl(osm_sm_t * sm, IN osm_switch_t * const p_sw) 336219820Sjeff{ 337219820Sjeff osm_node_t *p_node; 338219820Sjeff osm_dr_path_t *p_path; 339219820Sjeff osm_madw_context_t mad_context; 340219820Sjeff ib_api_status_t status; 341219820Sjeff uint32_t block_id_ho = 0; 342219820Sjeff int16_t block_num = 0; 343219820Sjeff uint32_t position = 0; 344219820Sjeff uint32_t max_position; 345219820Sjeff osm_mcast_tbl_t *p_tbl; 346219820Sjeff ib_net16_t block[IB_MCAST_BLOCK_SIZE]; 347219820Sjeff osm_signal_t signal = OSM_SIGNAL_DONE; 348219820Sjeff 349219820Sjeff CL_ASSERT(sm); 350219820Sjeff 351219820Sjeff OSM_LOG_ENTER(sm->p_log); 352219820Sjeff 353219820Sjeff CL_ASSERT(p_sw); 354219820Sjeff 355219820Sjeff p_node = p_sw->p_node; 356219820Sjeff 357219820Sjeff CL_ASSERT(p_node); 358219820Sjeff 359219820Sjeff p_path = osm_physp_get_dr_path_ptr(osm_node_get_physp_ptr(p_node, 0)); 360219820Sjeff 361219820Sjeff /* 362219820Sjeff Send multicast forwarding table blocks to the switch 363219820Sjeff as long as the switch indicates it has blocks needing 364219820Sjeff configuration. 365219820Sjeff */ 366219820Sjeff 367219820Sjeff mad_context.mft_context.node_guid = osm_node_get_node_guid(p_node); 368219820Sjeff mad_context.mft_context.set_method = TRUE; 369219820Sjeff 370219820Sjeff p_tbl = osm_switch_get_mcast_tbl_ptr(p_sw); 371219820Sjeff max_position = p_tbl->max_position; 372219820Sjeff 373219820Sjeff while (osm_mcast_tbl_get_block(p_tbl, block_num, 374219820Sjeff (uint8_t) position, block)) { 375219820Sjeff OSM_LOG(sm->p_log, OSM_LOG_DEBUG, 376219820Sjeff "Writing MFT block 0x%X\n", block_id_ho); 377219820Sjeff 378219820Sjeff block_id_ho = block_num + (position << 28); 379219820Sjeff 380219820Sjeff status = osm_req_set(sm, p_path, (void *)block, sizeof(block), 381219820Sjeff IB_MAD_ATTR_MCAST_FWD_TBL, 382219820Sjeff cl_hton32(block_id_ho), 383219820Sjeff CL_DISP_MSGID_NONE, &mad_context); 384219820Sjeff 385219820Sjeff if (status != IB_SUCCESS) { 386219820Sjeff OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0A02: " 387219820Sjeff "Sending multicast fwd. tbl. block failed (%s)\n", 388219820Sjeff ib_get_err_str(status)); 389219820Sjeff } 390219820Sjeff 391219820Sjeff signal = OSM_SIGNAL_DONE_PENDING; 392219820Sjeff 393219820Sjeff if (++position > max_position) { 394219820Sjeff position = 0; 395219820Sjeff block_num++; 396219820Sjeff } 397219820Sjeff } 398219820Sjeff 399219820Sjeff OSM_LOG_EXIT(sm->p_log); 400219820Sjeff return (signal); 401219820Sjeff} 402219820Sjeff 403219820Sjeff/********************************************************************** 404219820Sjeff This is part of the recursive function to compute the paths in the 405219820Sjeff spanning tree that eminate from this switch. On input, the p_list 406219820Sjeff contains the group members that must be routed from this switch. 407219820Sjeff**********************************************************************/ 408219820Sjeffstatic void 409219820Sjeff__osm_mcast_mgr_subdivide(osm_sm_t * sm, 410219820Sjeff osm_mgrp_t * const p_mgrp, 411219820Sjeff osm_switch_t * const p_sw, 412219820Sjeff cl_qlist_t * const p_list, 413219820Sjeff cl_qlist_t * const list_array, 414219820Sjeff uint8_t const array_size) 415219820Sjeff{ 416219820Sjeff uint8_t port_num; 417219820Sjeff uint16_t mlid_ho; 418219820Sjeff boolean_t ignore_existing; 419219820Sjeff osm_mcast_work_obj_t *p_wobj; 420219820Sjeff 421219820Sjeff OSM_LOG_ENTER(sm->p_log); 422219820Sjeff 423219820Sjeff mlid_ho = cl_ntoh16(osm_mgrp_get_mlid(p_mgrp)); 424219820Sjeff 425219820Sjeff /* 426219820Sjeff For Multicast Groups, we want not to count on previous 427219820Sjeff configurations - since we can easily generate a storm 428219820Sjeff by loops. 429219820Sjeff */ 430219820Sjeff ignore_existing = TRUE; 431219820Sjeff 432219820Sjeff /* 433219820Sjeff Subdivide the set of ports into non-overlapping subsets 434219820Sjeff that will be routed to other switches. 435219820Sjeff */ 436219820Sjeff while ((p_wobj = 437219820Sjeff (osm_mcast_work_obj_t *) cl_qlist_remove_head(p_list)) != 438219820Sjeff (osm_mcast_work_obj_t *) cl_qlist_end(p_list)) { 439219820Sjeff port_num = 440219820Sjeff osm_switch_recommend_mcast_path(p_sw, p_wobj->p_port, 441219820Sjeff mlid_ho, ignore_existing); 442219820Sjeff 443219820Sjeff if (port_num == OSM_NO_PATH) { 444219820Sjeff /* 445219820Sjeff This typically occurs if the switch does not support 446219820Sjeff multicast and the multicast tree must branch at this 447219820Sjeff switch. 448219820Sjeff */ 449219820Sjeff uint64_t node_guid_ho = 450219820Sjeff cl_ntoh64(osm_node_get_node_guid(p_sw->p_node)); 451219820Sjeff OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0A03: " 452219820Sjeff "Error routing MLID 0x%X through switch 0x%" 453219820Sjeff PRIx64 "\n" 454219820Sjeff "\t\t\t\tNo multicast paths from this switch for port " 455219820Sjeff "with LID %u\n", mlid_ho, node_guid_ho, 456219820Sjeff cl_ntoh16(osm_port_get_base_lid 457219820Sjeff (p_wobj->p_port))); 458219820Sjeff 459219820Sjeff __osm_mcast_work_obj_delete(p_wobj); 460219820Sjeff continue; 461219820Sjeff } 462219820Sjeff 463219820Sjeff if (port_num > array_size) { 464219820Sjeff uint64_t node_guid_ho = 465219820Sjeff cl_ntoh64(osm_node_get_node_guid(p_sw->p_node)); 466219820Sjeff OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0A04: " 467219820Sjeff "Error routing MLID 0x%X through switch 0x%" 468219820Sjeff PRIx64 "\n" 469219820Sjeff "\t\t\t\tNo multicast paths from this switch to port " 470219820Sjeff "with LID %u\n", mlid_ho, node_guid_ho, 471219820Sjeff cl_ntoh16(osm_port_get_base_lid 472219820Sjeff (p_wobj->p_port))); 473219820Sjeff 474219820Sjeff __osm_mcast_work_obj_delete(p_wobj); 475219820Sjeff 476219820Sjeff /* This is means OpenSM has a bug. */ 477219820Sjeff CL_ASSERT(FALSE); 478219820Sjeff continue; 479219820Sjeff } 480219820Sjeff 481219820Sjeff cl_qlist_insert_tail(&list_array[port_num], &p_wobj->list_item); 482219820Sjeff } 483219820Sjeff 484219820Sjeff OSM_LOG_EXIT(sm->p_log); 485219820Sjeff} 486219820Sjeff 487219820Sjeff/********************************************************************** 488219820Sjeff **********************************************************************/ 489219820Sjeffstatic void __osm_mcast_mgr_purge_list(osm_sm_t * sm, cl_qlist_t * const p_list) 490219820Sjeff{ 491219820Sjeff osm_mcast_work_obj_t *p_wobj; 492219820Sjeff 493219820Sjeff OSM_LOG_ENTER(sm->p_log); 494219820Sjeff 495219820Sjeff while ((p_wobj = (osm_mcast_work_obj_t *) cl_qlist_remove_head(p_list)) 496219820Sjeff != (osm_mcast_work_obj_t *) cl_qlist_end(p_list)) { 497219820Sjeff OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0A06: " 498219820Sjeff "Unable to route for port 0x%" PRIx64 "\n", 499219820Sjeff osm_port_get_guid(p_wobj->p_port)); 500219820Sjeff __osm_mcast_work_obj_delete(p_wobj); 501219820Sjeff } 502219820Sjeff 503219820Sjeff OSM_LOG_EXIT(sm->p_log); 504219820Sjeff} 505219820Sjeff 506219820Sjeff/********************************************************************** 507219820Sjeff This is the recursive function to compute the paths in the spanning 508219820Sjeff tree that emanate from this switch. On input, the p_list contains 509219820Sjeff the group members that must be routed from this switch. 510219820Sjeff 511219820Sjeff The function returns the newly created mtree node element. 512219820Sjeff**********************************************************************/ 513219820Sjeffstatic osm_mtree_node_t *__osm_mcast_mgr_branch(osm_sm_t * sm, 514219820Sjeff osm_mgrp_t * const p_mgrp, 515219820Sjeff osm_switch_t * const p_sw, 516219820Sjeff cl_qlist_t * const p_list, 517219820Sjeff uint8_t depth, 518219820Sjeff uint8_t const upstream_port, 519219820Sjeff uint8_t * const p_max_depth) 520219820Sjeff{ 521219820Sjeff uint8_t max_children; 522219820Sjeff osm_mtree_node_t *p_mtn = NULL; 523219820Sjeff cl_qlist_t *list_array = NULL; 524219820Sjeff uint8_t i; 525219820Sjeff ib_net64_t node_guid; 526219820Sjeff uint64_t node_guid_ho; 527219820Sjeff osm_mcast_work_obj_t *p_wobj; 528219820Sjeff cl_qlist_t *p_port_list; 529219820Sjeff size_t count; 530219820Sjeff uint16_t mlid_ho; 531219820Sjeff osm_mcast_tbl_t *p_tbl; 532219820Sjeff 533219820Sjeff OSM_LOG_ENTER(sm->p_log); 534219820Sjeff 535219820Sjeff CL_ASSERT(p_sw); 536219820Sjeff CL_ASSERT(p_list); 537219820Sjeff CL_ASSERT(p_max_depth); 538219820Sjeff 539219820Sjeff node_guid = osm_node_get_node_guid(p_sw->p_node); 540219820Sjeff node_guid_ho = cl_ntoh64(node_guid); 541219820Sjeff mlid_ho = cl_ntoh16(osm_mgrp_get_mlid(p_mgrp)); 542219820Sjeff 543219820Sjeff OSM_LOG(sm->p_log, OSM_LOG_VERBOSE, 544219820Sjeff "Routing MLID 0x%X through switch 0x%" PRIx64 545219820Sjeff ", %u nodes at depth %u\n", 546219820Sjeff mlid_ho, node_guid_ho, cl_qlist_count(p_list), depth); 547219820Sjeff 548219820Sjeff CL_ASSERT(cl_qlist_count(p_list) > 0); 549219820Sjeff 550219820Sjeff depth++; 551219820Sjeff 552219820Sjeff if (depth >= 64) { 553219820Sjeff OSM_LOG(sm->p_log, OSM_LOG_ERROR, 554219820Sjeff "Maximal hops number is reached for MLID 0x%x." 555219820Sjeff " Break processing.", mlid_ho); 556219820Sjeff __osm_mcast_mgr_purge_list(sm, p_list); 557219820Sjeff goto Exit; 558219820Sjeff } 559219820Sjeff 560219820Sjeff if (depth > *p_max_depth) { 561219820Sjeff CL_ASSERT(depth == *p_max_depth + 1); 562219820Sjeff *p_max_depth = depth; 563219820Sjeff } 564219820Sjeff 565219820Sjeff if (osm_switch_supports_mcast(p_sw) == FALSE) { 566219820Sjeff /* 567219820Sjeff This switch doesn't do multicast. Clean-up. 568219820Sjeff */ 569219820Sjeff OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0A14: " 570219820Sjeff "Switch 0x%" PRIx64 " does not support multicast\n", 571219820Sjeff node_guid_ho); 572219820Sjeff 573219820Sjeff /* 574219820Sjeff Deallocate all the work objects on this branch of the tree. 575219820Sjeff */ 576219820Sjeff __osm_mcast_mgr_purge_list(sm, p_list); 577219820Sjeff goto Exit; 578219820Sjeff } 579219820Sjeff 580219820Sjeff p_mtn = osm_mtree_node_new(p_sw); 581219820Sjeff if (p_mtn == NULL) { 582219820Sjeff /* 583219820Sjeff We are unable to continue routing down this 584219820Sjeff leg of the tree. Clean-up. 585219820Sjeff */ 586219820Sjeff OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0A15: " 587219820Sjeff "Insufficient memory to build multicast tree\n"); 588219820Sjeff 589219820Sjeff /* 590219820Sjeff Deallocate all the work objects on this branch of the tree. 591219820Sjeff */ 592219820Sjeff __osm_mcast_mgr_purge_list(sm, p_list); 593219820Sjeff goto Exit; 594219820Sjeff } 595219820Sjeff 596219820Sjeff max_children = osm_mtree_node_get_max_children(p_mtn); 597219820Sjeff 598219820Sjeff CL_ASSERT(max_children > 1); 599219820Sjeff 600219820Sjeff /* 601219820Sjeff Prepare an empty list for each port in the switch. 602219820Sjeff TO DO - this list array could probably be moved 603219820Sjeff inside the switch element to save on malloc thrashing. 604219820Sjeff */ 605219820Sjeff list_array = malloc(sizeof(cl_qlist_t) * max_children); 606219820Sjeff if (list_array == NULL) { 607219820Sjeff OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0A16: " 608219820Sjeff "Unable to allocate list array\n"); 609219820Sjeff __osm_mcast_mgr_purge_list(sm, p_list); 610219820Sjeff goto Exit; 611219820Sjeff } 612219820Sjeff 613219820Sjeff memset(list_array, 0, sizeof(cl_qlist_t) * max_children); 614219820Sjeff 615219820Sjeff for (i = 0; i < max_children; i++) 616219820Sjeff cl_qlist_init(&list_array[i]); 617219820Sjeff 618219820Sjeff __osm_mcast_mgr_subdivide(sm, p_mgrp, p_sw, p_list, list_array, 619219820Sjeff max_children); 620219820Sjeff 621219820Sjeff p_tbl = osm_switch_get_mcast_tbl_ptr(p_sw); 622219820Sjeff 623219820Sjeff /* 624219820Sjeff Add the upstream port to the forwarding table unless 625219820Sjeff we're at the root of the spanning tree. 626219820Sjeff */ 627219820Sjeff if (depth > 1) { 628219820Sjeff OSM_LOG(sm->p_log, OSM_LOG_DEBUG, 629219820Sjeff "Adding upstream port %u\n", upstream_port); 630219820Sjeff 631219820Sjeff CL_ASSERT(upstream_port); 632219820Sjeff osm_mcast_tbl_set(p_tbl, mlid_ho, upstream_port); 633219820Sjeff } 634219820Sjeff 635219820Sjeff /* 636219820Sjeff For each port that was allocated some routes, 637219820Sjeff recurse into this function to continue building the tree 638219820Sjeff if the node on the other end of that port is another switch. 639219820Sjeff Otherwise, the node is an endpoint, and we've found a leaf 640219820Sjeff of the tree. Mark leaves with our special pointer value. 641219820Sjeff */ 642219820Sjeff 643219820Sjeff for (i = 0; i < max_children; i++) { 644219820Sjeff const osm_physp_t *p_physp; 645219820Sjeff const osm_physp_t *p_remote_physp; 646219820Sjeff osm_node_t *p_node; 647219820Sjeff const osm_node_t *p_remote_node; 648219820Sjeff 649219820Sjeff p_port_list = &list_array[i]; 650219820Sjeff 651219820Sjeff count = cl_qlist_count(p_port_list); 652219820Sjeff 653219820Sjeff /* 654219820Sjeff There should be no children routed through the upstream port! 655219820Sjeff */ 656219820Sjeff CL_ASSERT((upstream_port == 0) || (i != upstream_port) || 657219820Sjeff ((i == upstream_port) && (count == 0))); 658219820Sjeff 659219820Sjeff if (count == 0) 660219820Sjeff continue; /* No routes down this port. */ 661219820Sjeff 662219820Sjeff OSM_LOG(sm->p_log, OSM_LOG_DEBUG, 663219820Sjeff "Routing %zu destinations via switch port %u\n", 664219820Sjeff count, i); 665219820Sjeff 666219820Sjeff /* 667219820Sjeff This port routes frames for this mcast group. Therefore, 668219820Sjeff set the appropriate bit in the multicast forwarding 669219820Sjeff table for this switch. 670219820Sjeff */ 671219820Sjeff osm_mcast_tbl_set(p_tbl, mlid_ho, i); 672219820Sjeff if (i == 0) { 673219820Sjeff /* This means we are adding the switch to the MC group. 674219820Sjeff We do not need to continue looking at the remote port, just 675219820Sjeff needed to add the port to the table */ 676219820Sjeff CL_ASSERT(count == 1); 677219820Sjeff 678219820Sjeff p_wobj = (osm_mcast_work_obj_t *) 679219820Sjeff cl_qlist_remove_head(p_port_list); 680219820Sjeff __osm_mcast_work_obj_delete(p_wobj); 681219820Sjeff continue; 682219820Sjeff } 683219820Sjeff 684219820Sjeff p_node = p_sw->p_node; 685219820Sjeff p_remote_node = osm_node_get_remote_node(p_node, i, NULL); 686219820Sjeff if (!p_remote_node) 687219820Sjeff continue; 688219820Sjeff 689219820Sjeff if (osm_node_get_type(p_remote_node) == IB_NODE_TYPE_SWITCH) { 690219820Sjeff /* 691219820Sjeff Acquire a pointer to the remote switch then recurse. 692219820Sjeff */ 693219820Sjeff CL_ASSERT(p_remote_node->sw); 694219820Sjeff 695219820Sjeff p_physp = osm_node_get_physp_ptr(p_node, i); 696219820Sjeff CL_ASSERT(p_physp); 697219820Sjeff 698219820Sjeff p_remote_physp = osm_physp_get_remote(p_physp); 699219820Sjeff CL_ASSERT(p_remote_physp); 700219820Sjeff 701219820Sjeff p_mtn->child_array[i] = 702219820Sjeff __osm_mcast_mgr_branch(sm, p_mgrp, 703219820Sjeff p_remote_node->sw, 704219820Sjeff p_port_list, depth, 705219820Sjeff osm_physp_get_port_num 706219820Sjeff (p_remote_physp), 707219820Sjeff p_max_depth); 708219820Sjeff } else { 709219820Sjeff /* 710219820Sjeff The neighbor node is not a switch, so this 711219820Sjeff must be a leaf. 712219820Sjeff */ 713219820Sjeff CL_ASSERT(count == 1); 714219820Sjeff 715219820Sjeff p_mtn->child_array[i] = OSM_MTREE_LEAF; 716219820Sjeff p_wobj = (osm_mcast_work_obj_t *) 717219820Sjeff cl_qlist_remove_head(p_port_list); 718219820Sjeff 719219820Sjeff CL_ASSERT(cl_is_qlist_empty(p_port_list)); 720219820Sjeff 721219820Sjeff OSM_LOG(sm->p_log, OSM_LOG_DEBUG, 722219820Sjeff "Found leaf for port 0x%016" PRIx64 723219820Sjeff " on switch port %u\n", 724219820Sjeff cl_ntoh64(osm_port_get_guid(p_wobj->p_port)), 725219820Sjeff i); 726219820Sjeff 727219820Sjeff __osm_mcast_work_obj_delete(p_wobj); 728219820Sjeff } 729219820Sjeff } 730219820Sjeff 731219820Sjeff free(list_array); 732219820SjeffExit: 733219820Sjeff OSM_LOG_EXIT(sm->p_log); 734219820Sjeff return (p_mtn); 735219820Sjeff} 736219820Sjeff 737219820Sjeff/********************************************************************** 738219820Sjeff **********************************************************************/ 739219820Sjeffstatic ib_api_status_t 740219820Sjeff__osm_mcast_mgr_build_spanning_tree(osm_sm_t * sm, osm_mgrp_t * const p_mgrp) 741219820Sjeff{ 742219820Sjeff const cl_qmap_t *p_mcm_tbl; 743219820Sjeff const osm_port_t *p_port; 744219820Sjeff const osm_mcm_port_t *p_mcm_port; 745219820Sjeff uint32_t num_ports; 746219820Sjeff cl_qlist_t port_list; 747219820Sjeff osm_switch_t *p_sw; 748219820Sjeff osm_mcast_work_obj_t *p_wobj; 749219820Sjeff ib_api_status_t status = IB_SUCCESS; 750219820Sjeff uint8_t max_depth = 0; 751219820Sjeff uint32_t count; 752219820Sjeff 753219820Sjeff OSM_LOG_ENTER(sm->p_log); 754219820Sjeff 755219820Sjeff cl_qlist_init(&port_list); 756219820Sjeff 757219820Sjeff /* 758219820Sjeff TO DO - for now, just blow away the old tree. 759219820Sjeff In the future we'll need to construct the tree based 760219820Sjeff on multicast forwarding table information if the user wants to 761219820Sjeff preserve existing multicast routes. 762219820Sjeff */ 763219820Sjeff __osm_mcast_mgr_purge_tree(sm, p_mgrp); 764219820Sjeff 765219820Sjeff p_mcm_tbl = &p_mgrp->mcm_port_tbl; 766219820Sjeff num_ports = cl_qmap_count(p_mcm_tbl); 767219820Sjeff if (num_ports == 0) { 768219820Sjeff OSM_LOG(sm->p_log, OSM_LOG_VERBOSE, 769219820Sjeff "MLID 0x%X has no members - nothing to do\n", 770219820Sjeff cl_ntoh16(osm_mgrp_get_mlid(p_mgrp))); 771219820Sjeff goto Exit; 772219820Sjeff } 773219820Sjeff 774219820Sjeff /* 775219820Sjeff This function builds the single spanning tree recursively. 776219820Sjeff At each stage, the ports to be reached are divided into 777219820Sjeff non-overlapping subsets of member ports that can be reached through 778219820Sjeff a given switch port. Construction then moves down each 779219820Sjeff branch, and the process starts again with each branch computing 780219820Sjeff for its own subset of the member ports. 781219820Sjeff 782219820Sjeff The maximum recursion depth is at worst the maximum hop count in the 783219820Sjeff subnet, which is spec limited to 64. 784219820Sjeff */ 785219820Sjeff 786219820Sjeff /* 787219820Sjeff Locate the switch around which to create the spanning 788219820Sjeff tree for this multicast group. 789219820Sjeff */ 790219820Sjeff p_sw = __osm_mcast_mgr_find_root_switch(sm, p_mgrp); 791219820Sjeff if (p_sw == NULL) { 792219820Sjeff OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0A08: " 793219820Sjeff "Unable to locate a suitable switch for group 0x%X\n", 794219820Sjeff cl_ntoh16(osm_mgrp_get_mlid(p_mgrp))); 795219820Sjeff status = IB_ERROR; 796219820Sjeff goto Exit; 797219820Sjeff } 798219820Sjeff 799219820Sjeff /* 800219820Sjeff Build the first "subset" containing all member ports. 801219820Sjeff */ 802219820Sjeff for (p_mcm_port = (osm_mcm_port_t *) cl_qmap_head(p_mcm_tbl); 803219820Sjeff p_mcm_port != (osm_mcm_port_t *) cl_qmap_end(p_mcm_tbl); 804219820Sjeff p_mcm_port = 805219820Sjeff (osm_mcm_port_t *) cl_qmap_next(&p_mcm_port->map_item)) { 806219820Sjeff /* 807219820Sjeff Acquire the port object for this port guid, then create 808219820Sjeff the new worker object to build the list. 809219820Sjeff */ 810219820Sjeff p_port = osm_get_port_by_guid(sm->p_subn, 811219820Sjeff ib_gid_get_guid(&p_mcm_port-> 812219820Sjeff port_gid)); 813219820Sjeff if (!p_port) { 814219820Sjeff OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0A09: " 815219820Sjeff "No port object for port 0x%016" PRIx64 "\n", 816219820Sjeff cl_ntoh64(ib_gid_get_guid 817219820Sjeff (&p_mcm_port->port_gid))); 818219820Sjeff continue; 819219820Sjeff } 820219820Sjeff 821219820Sjeff p_wobj = __osm_mcast_work_obj_new(p_port); 822219820Sjeff if (p_wobj == NULL) { 823219820Sjeff OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0A10: " 824219820Sjeff "Insufficient memory to route port 0x%016" 825219820Sjeff PRIx64 "\n", 826219820Sjeff cl_ntoh64(osm_port_get_guid(p_port))); 827219820Sjeff continue; 828219820Sjeff } 829219820Sjeff 830219820Sjeff cl_qlist_insert_tail(&port_list, &p_wobj->list_item); 831219820Sjeff } 832219820Sjeff 833219820Sjeff count = cl_qlist_count(&port_list); 834219820Sjeff p_mgrp->p_root = __osm_mcast_mgr_branch(sm, p_mgrp, p_sw, 835219820Sjeff &port_list, 0, 0, &max_depth); 836219820Sjeff 837219820Sjeff OSM_LOG(sm->p_log, OSM_LOG_VERBOSE, 838219820Sjeff "Configured MLID 0x%X for %u ports, max tree depth = %u\n", 839219820Sjeff cl_ntoh16(osm_mgrp_get_mlid(p_mgrp)), count, max_depth); 840219820Sjeff 841219820SjeffExit: 842219820Sjeff OSM_LOG_EXIT(sm->p_log); 843219820Sjeff return (status); 844219820Sjeff} 845219820Sjeff 846219820Sjeff#if 0 847219820Sjeff/* unused */ 848219820Sjeff/********************************************************************** 849219820Sjeff **********************************************************************/ 850219820Sjeffvoid 851219820Sjeffosm_mcast_mgr_set_table(osm_sm_t * sm, 852219820Sjeff IN const osm_mgrp_t * const p_mgrp, 853219820Sjeff IN const osm_mtree_node_t * const p_mtn) 854219820Sjeff{ 855219820Sjeff uint8_t i; 856219820Sjeff uint8_t max_children; 857219820Sjeff osm_mtree_node_t *p_child_mtn; 858219820Sjeff uint16_t mlid_ho; 859219820Sjeff osm_mcast_tbl_t *p_tbl; 860219820Sjeff osm_switch_t *p_sw; 861219820Sjeff 862219820Sjeff OSM_LOG_ENTER(sm->p_log); 863219820Sjeff 864219820Sjeff mlid_ho = cl_ntoh16(osm_mgrp_get_mlid(p_mgrp)); 865219820Sjeff p_sw = osm_mtree_node_get_switch_ptr(p_mtn); 866219820Sjeff 867219820Sjeff CL_ASSERT(p_sw); 868219820Sjeff 869219820Sjeff OSM_LOG(sm->p_log, OSM_LOG_VERBOSE, 870219820Sjeff "Configuring MLID 0x%X on switch 0x%" PRIx64 "\n", 871219820Sjeff mlid_ho, osm_node_get_node_guid(p_sw->p_node)); 872219820Sjeff 873219820Sjeff /* 874219820Sjeff For every child of this tree node, set the corresponding 875219820Sjeff bit in the switch's mcast table. 876219820Sjeff */ 877219820Sjeff p_tbl = osm_switch_get_mcast_tbl_ptr(p_sw); 878219820Sjeff max_children = osm_mtree_node_get_max_children(p_mtn); 879219820Sjeff 880219820Sjeff CL_ASSERT(max_children <= osm_switch_get_num_ports(p_sw)); 881219820Sjeff 882219820Sjeff osm_mcast_tbl_clear_mlid(p_tbl, mlid_ho); 883219820Sjeff 884219820Sjeff for (i = 0; i < max_children; i++) { 885219820Sjeff p_child_mtn = osm_mtree_node_get_child(p_mtn, i); 886219820Sjeff if (p_child_mtn == NULL) 887219820Sjeff continue; 888219820Sjeff 889219820Sjeff osm_mcast_tbl_set(p_tbl, mlid_ho, i); 890219820Sjeff } 891219820Sjeff 892219820Sjeff OSM_LOG_EXIT(sm->p_log); 893219820Sjeff} 894219820Sjeff#endif 895219820Sjeff 896219820Sjeff/********************************************************************** 897219820Sjeff **********************************************************************/ 898219820Sjeffstatic void __osm_mcast_mgr_clear(osm_sm_t * sm, IN osm_mgrp_t * const p_mgrp) 899219820Sjeff{ 900219820Sjeff osm_switch_t *p_sw; 901219820Sjeff cl_qmap_t *p_sw_tbl; 902219820Sjeff osm_mcast_tbl_t *p_mcast_tbl; 903219820Sjeff 904219820Sjeff OSM_LOG_ENTER(sm->p_log); 905219820Sjeff 906219820Sjeff /* 907219820Sjeff Walk the switches and clear the routing entries for 908219820Sjeff this MLID. 909219820Sjeff */ 910219820Sjeff p_sw_tbl = &sm->p_subn->sw_guid_tbl; 911219820Sjeff p_sw = (osm_switch_t *) cl_qmap_head(p_sw_tbl); 912219820Sjeff while (p_sw != (osm_switch_t *) cl_qmap_end(p_sw_tbl)) { 913219820Sjeff p_mcast_tbl = osm_switch_get_mcast_tbl_ptr(p_sw); 914219820Sjeff osm_mcast_tbl_clear_mlid(p_mcast_tbl, cl_ntoh16(p_mgrp->mlid)); 915219820Sjeff p_sw = (osm_switch_t *) cl_qmap_next(&p_sw->map_item); 916219820Sjeff } 917219820Sjeff 918219820Sjeff OSM_LOG_EXIT(sm->p_log); 919219820Sjeff} 920219820Sjeff 921219820Sjeff#if 0 922219820Sjeff/* TO DO - make this real -- at least update spanning tree */ 923219820Sjeff/********************************************************************** 924219820Sjeff Lock must be held on entry. 925219820Sjeff**********************************************************************/ 926219820Sjeffib_api_status_t 927219820Sjeffosm_mcast_mgr_process_single(osm_sm_t * sm, 928219820Sjeff IN ib_net16_t const mlid, 929219820Sjeff IN ib_net64_t const port_guid, 930219820Sjeff IN uint8_t const join_state) 931219820Sjeff{ 932219820Sjeff uint8_t port_num; 933219820Sjeff uint16_t mlid_ho; 934219820Sjeff ib_net64_t sw_guid; 935219820Sjeff osm_port_t *p_port; 936219820Sjeff osm_physp_t *p_physp; 937219820Sjeff osm_physp_t *p_remote_physp; 938219820Sjeff osm_node_t *p_remote_node; 939219820Sjeff osm_mcast_tbl_t *p_mcast_tbl; 940219820Sjeff ib_api_status_t status = IB_SUCCESS; 941219820Sjeff 942219820Sjeff OSM_LOG_ENTER(sm->p_log); 943219820Sjeff 944219820Sjeff CL_ASSERT(mlid); 945219820Sjeff CL_ASSERT(port_guid); 946219820Sjeff 947219820Sjeff mlid_ho = cl_ntoh16(mlid); 948219820Sjeff 949219820Sjeff OSM_LOG(sm->p_log, OSM_LOG_DEBUG, 950219820Sjeff "Attempting to add port 0x%" PRIx64 " to MLID 0x%X, " 951219820Sjeff "\n\t\t\t\tjoin state = 0x%X\n", 952219820Sjeff cl_ntoh64(port_guid), mlid_ho, join_state); 953219820Sjeff 954219820Sjeff /* 955219820Sjeff Acquire the Port object. 956219820Sjeff */ 957219820Sjeff p_port = osm_get_port_by_guid(sm->p_subn, port_guid); 958219820Sjeff if (!p_port) { 959219820Sjeff OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0A01: " 960219820Sjeff "Unable to acquire port object for 0x%" PRIx64 "\n", 961219820Sjeff cl_ntoh64(port_guid)); 962219820Sjeff status = IB_ERROR; 963219820Sjeff goto Exit; 964219820Sjeff } 965219820Sjeff 966219820Sjeff p_physp = p_port->p_physp; 967219820Sjeff if (p_physp == NULL) { 968219820Sjeff OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0A05: " 969219820Sjeff "Unable to acquire phsyical port object for 0x%" PRIx64 970219820Sjeff "\n", cl_ntoh64(port_guid)); 971219820Sjeff status = IB_ERROR; 972219820Sjeff goto Exit; 973219820Sjeff } 974219820Sjeff 975219820Sjeff p_remote_physp = osm_physp_get_remote(p_physp); 976219820Sjeff if (p_remote_physp == NULL) { 977219820Sjeff OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0A11: " 978219820Sjeff "Unable to acquire remote phsyical port object " 979219820Sjeff "for 0x%" PRIx64 "\n", cl_ntoh64(port_guid)); 980219820Sjeff status = IB_ERROR; 981219820Sjeff goto Exit; 982219820Sjeff } 983219820Sjeff 984219820Sjeff p_remote_node = osm_physp_get_node_ptr(p_remote_physp); 985219820Sjeff 986219820Sjeff CL_ASSERT(p_remote_node); 987219820Sjeff 988219820Sjeff sw_guid = osm_node_get_node_guid(p_remote_node); 989219820Sjeff 990219820Sjeff if (osm_node_get_type(p_remote_node) != IB_NODE_TYPE_SWITCH) { 991219820Sjeff OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0A22: " 992219820Sjeff "Remote node not a switch node 0x%" PRIx64 "\n", 993219820Sjeff cl_ntoh64(sw_guid)); 994219820Sjeff status = IB_ERROR; 995219820Sjeff goto Exit; 996219820Sjeff } 997219820Sjeff 998219820Sjeff if (!p_remote_node->sw) { 999219820Sjeff OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0A12: " 1000219820Sjeff "No switch object 0x%" PRIx64 "\n", cl_ntoh64(sw_guid)); 1001219820Sjeff status = IB_ERROR; 1002219820Sjeff goto Exit; 1003219820Sjeff } 1004219820Sjeff 1005219820Sjeff if (osm_switch_is_in_mcast_tree(p_remote_node->sw, mlid_ho)) { 1006219820Sjeff /* 1007219820Sjeff We're in luck. The switch attached to this port 1008219820Sjeff is already in the multicast group, so we can just 1009219820Sjeff add the specified port as a new leaf of the tree. 1010219820Sjeff */ 1011219820Sjeff if (join_state & (IB_JOIN_STATE_FULL | IB_JOIN_STATE_NON)) { 1012219820Sjeff /* 1013219820Sjeff This node wants to receive multicast frames. 1014219820Sjeff Get the switch port number to which the new member port 1015219820Sjeff is attached, then configure this single mcast table. 1016219820Sjeff */ 1017219820Sjeff port_num = osm_physp_get_port_num(p_remote_physp); 1018219820Sjeff CL_ASSERT(port_num); 1019219820Sjeff 1020219820Sjeff p_mcast_tbl = 1021219820Sjeff osm_switch_get_mcast_tbl_ptr(p_remote_node->sw); 1022219820Sjeff osm_mcast_tbl_set(p_mcast_tbl, mlid_ho, port_num); 1023219820Sjeff } else { 1024219820Sjeff if (join_state & IB_JOIN_STATE_SEND_ONLY) 1025219820Sjeff OSM_LOG(sm->p_log, OSM_LOG_DEBUG, 1026219820Sjeff "Success. Nothing to do for send" 1027219820Sjeff "only member\n"); 1028219820Sjeff else { 1029219820Sjeff OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0A13: " 1030219820Sjeff "Unknown join state 0x%X\n", 1031219820Sjeff join_state); 1032219820Sjeff status = IB_ERROR; 1033219820Sjeff goto Exit; 1034219820Sjeff } 1035219820Sjeff } 1036219820Sjeff } else 1037219820Sjeff OSM_LOG(sm->p_log, OSM_LOG_DEBUG, "Unable to add port\n"); 1038219820Sjeff 1039219820SjeffExit: 1040219820Sjeff OSM_LOG_EXIT(sm->p_log); 1041219820Sjeff return (status); 1042219820Sjeff} 1043219820Sjeff#endif 1044219820Sjeff 1045219820Sjeff/********************************************************************** 1046219820Sjeff lock must already be held on entry 1047219820Sjeff**********************************************************************/ 1048219820Sjeffstatic ib_api_status_t 1049219820Sjeffosm_mcast_mgr_process_tree(osm_sm_t * sm, 1050219820Sjeff IN osm_mgrp_t * const p_mgrp, 1051219820Sjeff IN osm_mcast_req_type_t req_type, 1052219820Sjeff ib_net64_t port_guid) 1053219820Sjeff{ 1054219820Sjeff ib_api_status_t status = IB_SUCCESS; 1055219820Sjeff ib_net16_t mlid; 1056219820Sjeff 1057219820Sjeff OSM_LOG_ENTER(sm->p_log); 1058219820Sjeff 1059219820Sjeff mlid = osm_mgrp_get_mlid(p_mgrp); 1060219820Sjeff 1061219820Sjeff OSM_LOG(sm->p_log, OSM_LOG_DEBUG, 1062219820Sjeff "Processing multicast group 0x%X\n", cl_ntoh16(mlid)); 1063219820Sjeff 1064219820Sjeff /* 1065219820Sjeff If there are no switches in the subnet, then we have nothing to do. 1066219820Sjeff */ 1067219820Sjeff if (cl_qmap_count(&sm->p_subn->sw_guid_tbl) == 0) { 1068219820Sjeff OSM_LOG(sm->p_log, OSM_LOG_DEBUG, 1069219820Sjeff "No switches in subnet. Nothing to do\n"); 1070219820Sjeff goto Exit; 1071219820Sjeff } 1072219820Sjeff 1073219820Sjeff /* 1074219820Sjeff Clear the multicast tables to start clean, then build 1075219820Sjeff the spanning tree which sets the mcast table bits for each 1076219820Sjeff port in the group. 1077219820Sjeff */ 1078219820Sjeff __osm_mcast_mgr_clear(sm, p_mgrp); 1079219820Sjeff 1080219820Sjeff if (!p_mgrp->full_members) 1081219820Sjeff goto Exit; 1082219820Sjeff 1083219820Sjeff status = __osm_mcast_mgr_build_spanning_tree(sm, p_mgrp); 1084219820Sjeff if (status != IB_SUCCESS) { 1085219820Sjeff OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0A17: " 1086219820Sjeff "Unable to create spanning tree (%s)\n", 1087219820Sjeff ib_get_err_str(status)); 1088219820Sjeff goto Exit; 1089219820Sjeff } 1090219820Sjeff 1091219820SjeffExit: 1092219820Sjeff OSM_LOG_EXIT(sm->p_log); 1093219820Sjeff return (status); 1094219820Sjeff} 1095219820Sjeff 1096219820Sjeff/********************************************************************** 1097219820Sjeff Process the entire group. 1098219820Sjeff NOTE : The lock should be held externally! 1099219820Sjeff **********************************************************************/ 1100219820Sjeffstatic ib_api_status_t 1101219820Sjeffmcast_mgr_process_mgrp(osm_sm_t * sm, 1102219820Sjeff IN osm_mgrp_t * const p_mgrp, 1103219820Sjeff IN osm_mcast_req_type_t req_type, 1104219820Sjeff IN ib_net64_t port_guid) 1105219820Sjeff{ 1106219820Sjeff ib_api_status_t status; 1107219820Sjeff 1108219820Sjeff OSM_LOG_ENTER(sm->p_log); 1109219820Sjeff 1110219820Sjeff status = osm_mcast_mgr_process_tree(sm, p_mgrp, req_type, port_guid); 1111219820Sjeff if (status != IB_SUCCESS) { 1112219820Sjeff OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0A19: " 1113219820Sjeff "Unable to create spanning tree (%s)\n", 1114219820Sjeff ib_get_err_str(status)); 1115219820Sjeff goto Exit; 1116219820Sjeff } 1117219820Sjeff p_mgrp->last_tree_id = p_mgrp->last_change_id; 1118219820Sjeff 1119219820Sjeff /* remove MCGRP if it is marked for deletion */ 1120219820Sjeff if (p_mgrp->to_be_deleted) { 1121219820Sjeff OSM_LOG(sm->p_log, OSM_LOG_DEBUG, 1122219820Sjeff "Destroying mgrp with lid:0x%x\n", 1123219820Sjeff cl_ntoh16(p_mgrp->mlid)); 1124219820Sjeff sm->p_subn->mgroups[cl_ntoh16(p_mgrp->mlid) - IB_LID_MCAST_START_HO] = NULL; 1125219820Sjeff osm_mgrp_delete(p_mgrp); 1126219820Sjeff } 1127219820Sjeff 1128219820SjeffExit: 1129219820Sjeff OSM_LOG_EXIT(sm->p_log); 1130219820Sjeff return status; 1131219820Sjeff} 1132219820Sjeff 1133219820Sjeff/********************************************************************** 1134219820Sjeff **********************************************************************/ 1135219820Sjeffosm_signal_t osm_mcast_mgr_process(osm_sm_t * sm) 1136219820Sjeff{ 1137219820Sjeff osm_signal_t signal; 1138219820Sjeff osm_switch_t *p_sw; 1139219820Sjeff cl_qmap_t *p_sw_tbl; 1140219820Sjeff cl_qlist_t *p_list = &sm->mgrp_list; 1141219820Sjeff osm_mgrp_t *p_mgrp; 1142219820Sjeff boolean_t pending_transactions = FALSE; 1143219820Sjeff int i; 1144219820Sjeff 1145219820Sjeff OSM_LOG_ENTER(sm->p_log); 1146219820Sjeff 1147219820Sjeff p_sw_tbl = &sm->p_subn->sw_guid_tbl; 1148219820Sjeff /* 1149219820Sjeff While holding the lock, iterate over all the established 1150219820Sjeff multicast groups, servicing each in turn. 1151219820Sjeff 1152219820Sjeff Then, download the multicast tables to the switches. 1153219820Sjeff */ 1154219820Sjeff CL_PLOCK_EXCL_ACQUIRE(sm->p_lock); 1155219820Sjeff 1156219820Sjeff for (i = 0; i <= sm->p_subn->max_mcast_lid_ho - IB_LID_MCAST_START_HO; 1157219820Sjeff i++) { 1158219820Sjeff /* 1159219820Sjeff We reached here due to some change that caused a heavy sweep 1160219820Sjeff of the subnet. Not due to a specific multicast request. 1161219820Sjeff So the request type is subnet_change and the port guid is 0. 1162219820Sjeff */ 1163219820Sjeff p_mgrp = sm->p_subn->mgroups[i]; 1164219820Sjeff if (p_mgrp) 1165219820Sjeff mcast_mgr_process_mgrp(sm, p_mgrp, 1166219820Sjeff OSM_MCAST_REQ_TYPE_SUBNET_CHANGE, 1167219820Sjeff 0); 1168219820Sjeff } 1169219820Sjeff 1170219820Sjeff /* 1171219820Sjeff Walk the switches and download the tables for each. 1172219820Sjeff */ 1173219820Sjeff p_sw = (osm_switch_t *) cl_qmap_head(p_sw_tbl); 1174219820Sjeff while (p_sw != (osm_switch_t *) cl_qmap_end(p_sw_tbl)) { 1175219820Sjeff signal = __osm_mcast_mgr_set_tbl(sm, p_sw); 1176219820Sjeff if (signal == OSM_SIGNAL_DONE_PENDING) 1177219820Sjeff pending_transactions = TRUE; 1178219820Sjeff p_sw = (osm_switch_t *) cl_qmap_next(&p_sw->map_item); 1179219820Sjeff } 1180219820Sjeff 1181219820Sjeff while (!cl_is_qlist_empty(p_list)) { 1182219820Sjeff cl_list_item_t *p = cl_qlist_remove_head(p_list); 1183219820Sjeff free(p); 1184219820Sjeff } 1185219820Sjeff 1186219820Sjeff CL_PLOCK_RELEASE(sm->p_lock); 1187219820Sjeff 1188219820Sjeff OSM_LOG_EXIT(sm->p_log); 1189219820Sjeff 1190219820Sjeff if (pending_transactions == TRUE) 1191219820Sjeff return (OSM_SIGNAL_DONE_PENDING); 1192219820Sjeff else 1193219820Sjeff return (OSM_SIGNAL_DONE); 1194219820Sjeff} 1195219820Sjeff 1196219820Sjeff/********************************************************************** 1197219820Sjeff This is the function that is invoked during idle time to handle the 1198219820Sjeff process request for mcast groups where join/leave/delete was required. 1199219820Sjeff **********************************************************************/ 1200219820Sjeffosm_signal_t osm_mcast_mgr_process_mgroups(osm_sm_t * sm) 1201219820Sjeff{ 1202219820Sjeff cl_qlist_t *p_list = &sm->mgrp_list; 1203219820Sjeff osm_switch_t *p_sw; 1204219820Sjeff cl_qmap_t *p_sw_tbl; 1205219820Sjeff osm_mgrp_t *p_mgrp; 1206219820Sjeff ib_net16_t mlid; 1207219820Sjeff osm_signal_t ret, signal = OSM_SIGNAL_DONE; 1208219820Sjeff osm_mcast_mgr_ctxt_t *ctx; 1209219820Sjeff osm_mcast_req_type_t req_type; 1210219820Sjeff ib_net64_t port_guid; 1211219820Sjeff 1212219820Sjeff OSM_LOG_ENTER(sm->p_log); 1213219820Sjeff 1214219820Sjeff /* we need a lock to make sure the p_mgrp is not change other ways */ 1215219820Sjeff CL_PLOCK_EXCL_ACQUIRE(sm->p_lock); 1216219820Sjeff 1217219820Sjeff while (!cl_is_qlist_empty(p_list)) { 1218219820Sjeff ctx = (osm_mcast_mgr_ctxt_t *) cl_qlist_remove_head(p_list); 1219219820Sjeff req_type = ctx->req_type; 1220219820Sjeff port_guid = ctx->port_guid; 1221219820Sjeff 1222219820Sjeff /* nice copy no warning on size diff */ 1223219820Sjeff memcpy(&mlid, &ctx->mlid, sizeof(mlid)); 1224219820Sjeff 1225219820Sjeff /* we can destroy the context now */ 1226219820Sjeff free(ctx); 1227219820Sjeff 1228219820Sjeff /* since we delayed the execution we prefer to pass the 1229219820Sjeff mlid as the mgrp identifier and then find it or abort */ 1230219820Sjeff p_mgrp = osm_get_mgrp_by_mlid(sm->p_subn, mlid); 1231219820Sjeff if (!p_mgrp) 1232219820Sjeff continue; 1233219820Sjeff 1234219820Sjeff /* if there was no change from the last time 1235219820Sjeff * we processed the group we can skip doing anything 1236219820Sjeff */ 1237219820Sjeff if (p_mgrp->last_change_id == p_mgrp->last_tree_id) { 1238219820Sjeff OSM_LOG(sm->p_log, OSM_LOG_DEBUG, 1239219820Sjeff "Skip processing mgrp with lid:0x%X change id:%u\n", 1240219820Sjeff cl_ntoh16(mlid), p_mgrp->last_change_id); 1241219820Sjeff continue; 1242219820Sjeff } 1243219820Sjeff 1244219820Sjeff OSM_LOG(sm->p_log, OSM_LOG_DEBUG, 1245219820Sjeff "Processing mgrp with lid:0x%X change id:%u\n", 1246219820Sjeff cl_ntoh16(mlid), p_mgrp->last_change_id); 1247219820Sjeff mcast_mgr_process_mgrp(sm, p_mgrp, req_type, port_guid); 1248219820Sjeff } 1249219820Sjeff 1250219820Sjeff /* 1251219820Sjeff Walk the switches and download the tables for each. 1252219820Sjeff */ 1253219820Sjeff p_sw_tbl = &sm->p_subn->sw_guid_tbl; 1254219820Sjeff p_sw = (osm_switch_t *) cl_qmap_head(p_sw_tbl); 1255219820Sjeff while (p_sw != (osm_switch_t *) cl_qmap_end(p_sw_tbl)) { 1256219820Sjeff ret = __osm_mcast_mgr_set_tbl(sm, p_sw); 1257219820Sjeff if (ret == OSM_SIGNAL_DONE_PENDING) 1258219820Sjeff signal = ret; 1259219820Sjeff p_sw = (osm_switch_t *) cl_qmap_next(&p_sw->map_item); 1260219820Sjeff } 1261219820Sjeff 1262219820Sjeff osm_dump_mcast_routes(sm->p_subn->p_osm); 1263219820Sjeff 1264219820Sjeff CL_PLOCK_RELEASE(sm->p_lock); 1265219820Sjeff OSM_LOG_EXIT(sm->p_log); 1266219820Sjeff return signal; 1267219820Sjeff} 1268