1219820Sjeff/* 2219820Sjeff * Copyright (c) 2004-2008 Voltaire, Inc. All rights reserved. 3219820Sjeff * Copyright (c) 2002-2008 Mellanox Technologies LTD. All rights reserved. 4219820Sjeff * Copyright (c) 1996-2003 Intel Corporation. All rights reserved. 5219820Sjeff * 6219820Sjeff * This software is available to you under a choice of one of two 7219820Sjeff * licenses. You may choose to be licensed under the terms of the GNU 8219820Sjeff * General Public License (GPL) Version 2, available from the file 9219820Sjeff * COPYING in the main directory of this source tree, or the 10219820Sjeff * OpenIB.org BSD license below: 11219820Sjeff * 12219820Sjeff * Redistribution and use in source and binary forms, with or 13219820Sjeff * without modification, are permitted provided that the following 14219820Sjeff * conditions are met: 15219820Sjeff * 16219820Sjeff * - Redistributions of source code must retain the above 17219820Sjeff * copyright notice, this list of conditions and the following 18219820Sjeff * disclaimer. 19219820Sjeff * 20219820Sjeff * - Redistributions in binary form must reproduce the above 21219820Sjeff * copyright notice, this list of conditions and the following 22219820Sjeff * disclaimer in the documentation and/or other materials 23219820Sjeff * provided with the distribution. 24219820Sjeff * 25219820Sjeff * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 26219820Sjeff * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 27219820Sjeff * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 28219820Sjeff * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 29219820Sjeff * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 30219820Sjeff * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 31219820Sjeff * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 32219820Sjeff * SOFTWARE. 33219820Sjeff * 34219820Sjeff */ 35219820Sjeff 36219820Sjeff/* 37219820Sjeff * Abstract: 38219820Sjeff * Implementation of osm_state_mgr_t. 39219820Sjeff * This file implements the State Manager object. 40219820Sjeff */ 41219820Sjeff 42219820Sjeff#if HAVE_CONFIG_H 43219820Sjeff# include <config.h> 44219820Sjeff#endif /* HAVE_CONFIG_H */ 45219820Sjeff 46219820Sjeff#include <unistd.h> 47219820Sjeff#include <stdlib.h> 48219820Sjeff#include <string.h> 49219820Sjeff#include <iba/ib_types.h> 50219820Sjeff#include <complib/cl_passivelock.h> 51219820Sjeff#include <complib/cl_debug.h> 52219820Sjeff#include <complib/cl_qmap.h> 53219820Sjeff#include <opensm/osm_sm.h> 54219820Sjeff#include <opensm/osm_madw.h> 55219820Sjeff#include <opensm/osm_switch.h> 56219820Sjeff#include <opensm/osm_log.h> 57219820Sjeff#include <opensm/osm_subnet.h> 58219820Sjeff#include <opensm/osm_helper.h> 59219820Sjeff#include <opensm/osm_msgdef.h> 60219820Sjeff#include <opensm/osm_node.h> 61219820Sjeff#include <opensm/osm_port.h> 62219820Sjeff#include <vendor/osm_vendor_api.h> 63219820Sjeff#include <opensm/osm_inform.h> 64219820Sjeff#include <opensm/osm_opensm.h> 65219820Sjeff 66219820Sjeffextern void osm_drop_mgr_process(IN osm_sm_t * sm); 67219820Sjeffextern osm_signal_t osm_qos_setup(IN osm_opensm_t * p_osm); 68219820Sjeffextern osm_signal_t osm_pkey_mgr_process(IN osm_opensm_t * p_osm); 69219820Sjeffextern osm_signal_t osm_mcast_mgr_process(IN osm_sm_t * sm); 70219820Sjeffextern osm_signal_t osm_mcast_mgr_process_mgroups(IN osm_sm_t * sm); 71219820Sjeffextern osm_signal_t osm_link_mgr_process(IN osm_sm_t * sm, IN uint8_t state); 72219820Sjeff 73219820Sjeff/********************************************************************** 74219820Sjeff **********************************************************************/ 75219820Sjeffstatic void __osm_state_mgr_up_msg(IN const osm_sm_t * sm) 76219820Sjeff{ 77219820Sjeff /* 78219820Sjeff * This message should be written only once - when the 79219820Sjeff * SM moves to Master state and the subnet is up for 80219820Sjeff * the first time. 81219820Sjeff */ 82219820Sjeff osm_log(sm->p_log, sm->p_subn->first_time_master_sweep ? 83219820Sjeff OSM_LOG_SYS : OSM_LOG_INFO, "SUBNET UP\n"); 84219820Sjeff 85219820Sjeff OSM_LOG_MSG_BOX(sm->p_log, OSM_LOG_VERBOSE, 86219820Sjeff sm->p_subn->opt.sweep_interval ? 87219820Sjeff "SUBNET UP" : "SUBNET UP (sweep disabled)"); 88219820Sjeff} 89219820Sjeff 90219820Sjeff/********************************************************************** 91219820Sjeff **********************************************************************/ 92219820Sjeffstatic void __osm_state_mgr_reset_node_count(IN cl_map_item_t * 93219820Sjeff const p_map_item, IN void *context) 94219820Sjeff{ 95219820Sjeff osm_node_t *p_node = (osm_node_t *) p_map_item; 96219820Sjeff 97219820Sjeff p_node->discovery_count = 0; 98219820Sjeff} 99219820Sjeff 100219820Sjeff/********************************************************************** 101219820Sjeff **********************************************************************/ 102219820Sjeffstatic void __osm_state_mgr_reset_port_count(IN cl_map_item_t * 103219820Sjeff const p_map_item, IN void *context) 104219820Sjeff{ 105219820Sjeff osm_port_t *p_port = (osm_port_t *) p_map_item; 106219820Sjeff 107219820Sjeff p_port->discovery_count = 0; 108219820Sjeff} 109219820Sjeff 110219820Sjeff/********************************************************************** 111219820Sjeff **********************************************************************/ 112219820Sjeffstatic void 113219820Sjeff__osm_state_mgr_reset_switch_count(IN cl_map_item_t * const p_map_item, 114219820Sjeff IN void *context) 115219820Sjeff{ 116219820Sjeff osm_switch_t *p_sw = (osm_switch_t *) p_map_item; 117219820Sjeff 118219820Sjeff p_sw->discovery_count = 0; 119219820Sjeff p_sw->need_update = 1; 120219820Sjeff} 121219820Sjeff 122219820Sjeff/********************************************************************** 123219820Sjeff **********************************************************************/ 124219820Sjeffstatic void __osm_state_mgr_get_sw_info(IN cl_map_item_t * const p_object, 125219820Sjeff IN void *context) 126219820Sjeff{ 127219820Sjeff osm_node_t *p_node; 128219820Sjeff osm_dr_path_t *p_dr_path; 129219820Sjeff osm_madw_context_t mad_context; 130219820Sjeff osm_switch_t *const p_sw = (osm_switch_t *) p_object; 131219820Sjeff osm_sm_t *sm = context; 132219820Sjeff ib_api_status_t status; 133219820Sjeff 134219820Sjeff OSM_LOG_ENTER(sm->p_log); 135219820Sjeff 136219820Sjeff p_node = p_sw->p_node; 137219820Sjeff p_dr_path = osm_physp_get_dr_path_ptr(osm_node_get_physp_ptr(p_node, 0)); 138219820Sjeff 139219820Sjeff memset(&mad_context, 0, sizeof(mad_context)); 140219820Sjeff 141219820Sjeff mad_context.si_context.node_guid = osm_node_get_node_guid(p_node); 142219820Sjeff mad_context.si_context.set_method = FALSE; 143219820Sjeff mad_context.si_context.light_sweep = TRUE; 144219820Sjeff 145219820Sjeff status = osm_req_get(sm, p_dr_path, IB_MAD_ATTR_SWITCH_INFO, 0, 146219820Sjeff OSM_MSG_LIGHT_SWEEP_FAIL, &mad_context); 147219820Sjeff 148219820Sjeff if (status != IB_SUCCESS) 149219820Sjeff OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 3304: " 150219820Sjeff "Request for SwitchInfo failed\n"); 151219820Sjeff 152219820Sjeff OSM_LOG_EXIT(sm->p_log); 153219820Sjeff} 154219820Sjeff 155219820Sjeff/********************************************************************** 156219820Sjeff Initiate a remote port info request for the given physical port 157219820Sjeff **********************************************************************/ 158219820Sjeffstatic void 159219820Sjeff__osm_state_mgr_get_remote_port_info(IN osm_sm_t * sm, 160219820Sjeff IN osm_physp_t * const p_physp) 161219820Sjeff{ 162219820Sjeff osm_dr_path_t *p_dr_path; 163219820Sjeff osm_dr_path_t rem_node_dr_path; 164219820Sjeff osm_madw_context_t mad_context; 165219820Sjeff ib_api_status_t status; 166219820Sjeff 167219820Sjeff OSM_LOG_ENTER(sm->p_log); 168219820Sjeff 169219820Sjeff /* generate a dr path leaving on the physp to the remote node */ 170219820Sjeff p_dr_path = osm_physp_get_dr_path_ptr(p_physp); 171219820Sjeff memcpy(&rem_node_dr_path, p_dr_path, sizeof(osm_dr_path_t)); 172219820Sjeff osm_dr_path_extend(&rem_node_dr_path, osm_physp_get_port_num(p_physp)); 173219820Sjeff 174219820Sjeff memset(&mad_context, 0, sizeof(mad_context)); 175219820Sjeff 176219820Sjeff mad_context.pi_context.node_guid = 177219820Sjeff osm_node_get_node_guid(osm_physp_get_node_ptr(p_physp)); 178219820Sjeff mad_context.pi_context.port_guid = p_physp->port_guid; 179219820Sjeff mad_context.pi_context.set_method = FALSE; 180219820Sjeff mad_context.pi_context.light_sweep = TRUE; 181219820Sjeff mad_context.pi_context.active_transition = FALSE; 182219820Sjeff 183219820Sjeff /* note that with some negative logic - if the query failed it means that 184219820Sjeff * there is no point in going to heavy sweep */ 185219820Sjeff status = osm_req_get(sm, &rem_node_dr_path, 186219820Sjeff IB_MAD_ATTR_PORT_INFO, 0, CL_DISP_MSGID_NONE, 187219820Sjeff &mad_context); 188219820Sjeff 189219820Sjeff if (status != IB_SUCCESS) 190219820Sjeff OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 332E: " 191219820Sjeff "Request for PortInfo failed\n"); 192219820Sjeff 193219820Sjeff OSM_LOG_EXIT(sm->p_log); 194219820Sjeff} 195219820Sjeff 196219820Sjeff/********************************************************************** 197219820Sjeff Initiates a thorough sweep of the subnet. 198219820Sjeff Used when there is suspicion that something on the subnet has changed. 199219820Sjeff**********************************************************************/ 200219820Sjeffstatic ib_api_status_t __osm_state_mgr_sweep_hop_0(IN osm_sm_t * sm) 201219820Sjeff{ 202219820Sjeff ib_api_status_t status; 203219820Sjeff osm_dr_path_t dr_path; 204219820Sjeff osm_bind_handle_t h_bind; 205219820Sjeff uint8_t path_array[IB_SUBNET_PATH_HOPS_MAX]; 206219820Sjeff 207219820Sjeff OSM_LOG_ENTER(sm->p_log); 208219820Sjeff 209219820Sjeff memset(path_array, 0, sizeof(path_array)); 210219820Sjeff 211219820Sjeff /* 212219820Sjeff * First, get the bind handle. 213219820Sjeff */ 214219820Sjeff h_bind = osm_sm_mad_ctrl_get_bind_handle(&sm->mad_ctrl); 215219820Sjeff if (h_bind != OSM_BIND_INVALID_HANDLE) { 216219820Sjeff OSM_LOG_MSG_BOX(sm->p_log, OSM_LOG_VERBOSE, 217219820Sjeff "INITIATING HEAVY SWEEP"); 218219820Sjeff /* 219219820Sjeff * Start the sweep by clearing the port counts, then 220219820Sjeff * get our own NodeInfo at 0 hops. 221219820Sjeff */ 222219820Sjeff CL_PLOCK_ACQUIRE(sm->p_lock); 223219820Sjeff 224219820Sjeff cl_qmap_apply_func(&sm->p_subn->node_guid_tbl, 225219820Sjeff __osm_state_mgr_reset_node_count, sm); 226219820Sjeff 227219820Sjeff cl_qmap_apply_func(&sm->p_subn->port_guid_tbl, 228219820Sjeff __osm_state_mgr_reset_port_count, sm); 229219820Sjeff 230219820Sjeff cl_qmap_apply_func(&sm->p_subn->sw_guid_tbl, 231219820Sjeff __osm_state_mgr_reset_switch_count, sm); 232219820Sjeff 233219820Sjeff /* Set the in_sweep_hop_0 flag in subn to be TRUE. 234219820Sjeff * This will indicate the sweeping not to continue beyond the 235219820Sjeff * the current node. 236219820Sjeff * This is relevant for the case of SM on switch, since in the 237219820Sjeff * switch info we need to signal somehow not to continue 238219820Sjeff * the sweeping. */ 239219820Sjeff sm->p_subn->in_sweep_hop_0 = TRUE; 240219820Sjeff 241219820Sjeff CL_PLOCK_RELEASE(sm->p_lock); 242219820Sjeff 243219820Sjeff osm_dr_path_init(&dr_path, h_bind, 0, path_array); 244219820Sjeff status = osm_req_get(sm, &dr_path, IB_MAD_ATTR_NODE_INFO, 0, 245219820Sjeff CL_DISP_MSGID_NONE, NULL); 246219820Sjeff 247219820Sjeff if (status != IB_SUCCESS) 248219820Sjeff OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 3305: " 249219820Sjeff "Request for NodeInfo failed\n"); 250219820Sjeff } else { 251219820Sjeff OSM_LOG(sm->p_log, OSM_LOG_DEBUG, 252219820Sjeff "No bound ports. Deferring sweep...\n"); 253219820Sjeff status = IB_INVALID_STATE; 254219820Sjeff } 255219820Sjeff 256219820Sjeff OSM_LOG_EXIT(sm->p_log); 257219820Sjeff return (status); 258219820Sjeff} 259219820Sjeff 260219820Sjeff/********************************************************************** 261219820Sjeff Clear out all existing port lid assignments 262219820Sjeff**********************************************************************/ 263219820Sjeffstatic ib_api_status_t __osm_state_mgr_clean_known_lids(IN osm_sm_t * sm) 264219820Sjeff{ 265219820Sjeff ib_api_status_t status = IB_SUCCESS; 266219820Sjeff cl_ptr_vector_t *p_vec = &(sm->p_subn->port_lid_tbl); 267219820Sjeff uint32_t i; 268219820Sjeff 269219820Sjeff OSM_LOG_ENTER(sm->p_log); 270219820Sjeff 271219820Sjeff /* we need a lock here! */ 272219820Sjeff CL_PLOCK_ACQUIRE(sm->p_lock); 273219820Sjeff 274219820Sjeff for (i = 0; i < cl_ptr_vector_get_size(p_vec); i++) 275219820Sjeff cl_ptr_vector_set(p_vec, i, NULL); 276219820Sjeff 277219820Sjeff CL_PLOCK_RELEASE(sm->p_lock); 278219820Sjeff 279219820Sjeff OSM_LOG_EXIT(sm->p_log); 280219820Sjeff return (status); 281219820Sjeff} 282219820Sjeff 283219820Sjeff/********************************************************************** 284219820Sjeff Notifies the transport layer that the local LID has changed, 285219820Sjeff which give it a chance to update address vectors, etc.. 286219820Sjeff**********************************************************************/ 287219820Sjeffstatic ib_api_status_t __osm_state_mgr_notify_lid_change(IN osm_sm_t * sm) 288219820Sjeff{ 289219820Sjeff ib_api_status_t status; 290219820Sjeff osm_bind_handle_t h_bind; 291219820Sjeff 292219820Sjeff OSM_LOG_ENTER(sm->p_log); 293219820Sjeff 294219820Sjeff /* 295219820Sjeff * First, get the bind handle. 296219820Sjeff */ 297219820Sjeff h_bind = osm_sm_mad_ctrl_get_bind_handle(&sm->mad_ctrl); 298219820Sjeff if (h_bind == OSM_BIND_INVALID_HANDLE) { 299219820Sjeff OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 3306: " 300219820Sjeff "No bound ports\n"); 301219820Sjeff status = IB_ERROR; 302219820Sjeff goto Exit; 303219820Sjeff } 304219820Sjeff 305219820Sjeff /* 306219820Sjeff * Notify the transport layer that we changed the local LID. 307219820Sjeff */ 308219820Sjeff status = osm_vendor_local_lid_change(h_bind); 309219820Sjeff if (status != IB_SUCCESS) 310219820Sjeff OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 3307: " 311219820Sjeff "Vendor LID update failed (%s)\n", 312219820Sjeff ib_get_err_str(status)); 313219820Sjeff 314219820SjeffExit: 315219820Sjeff OSM_LOG_EXIT(sm->p_log); 316219820Sjeff return (status); 317219820Sjeff} 318219820Sjeff 319219820Sjeff/********************************************************************** 320219820Sjeff Returns true if the SM port is down. 321219820Sjeff The SM's port object must exist in the port_guid table. 322219820Sjeff**********************************************************************/ 323219820Sjeffstatic boolean_t __osm_state_mgr_is_sm_port_down(IN osm_sm_t * sm) 324219820Sjeff{ 325219820Sjeff ib_net64_t port_guid; 326219820Sjeff osm_port_t *p_port; 327219820Sjeff osm_physp_t *p_physp; 328219820Sjeff uint8_t state; 329219820Sjeff 330219820Sjeff OSM_LOG_ENTER(sm->p_log); 331219820Sjeff 332219820Sjeff port_guid = sm->p_subn->sm_port_guid; 333219820Sjeff 334219820Sjeff /* 335219820Sjeff * If we don't know our own port guid yet, assume the port is down. 336219820Sjeff */ 337219820Sjeff if (port_guid == 0) { 338219820Sjeff OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 3308: " 339219820Sjeff "SM port GUID unknown\n"); 340219820Sjeff state = IB_LINK_DOWN; 341219820Sjeff goto Exit; 342219820Sjeff } 343219820Sjeff 344219820Sjeff CL_ASSERT(port_guid); 345219820Sjeff 346219820Sjeff CL_PLOCK_ACQUIRE(sm->p_lock); 347219820Sjeff p_port = osm_get_port_by_guid(sm->p_subn, port_guid); 348219820Sjeff if (!p_port) { 349219820Sjeff OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 3309: " 350219820Sjeff "SM port with GUID:%016" PRIx64 " is unknown\n", 351219820Sjeff cl_ntoh64(port_guid)); 352219820Sjeff state = IB_LINK_DOWN; 353219820Sjeff CL_PLOCK_RELEASE(sm->p_lock); 354219820Sjeff goto Exit; 355219820Sjeff } 356219820Sjeff 357219820Sjeff p_physp = p_port->p_physp; 358219820Sjeff 359219820Sjeff CL_ASSERT(p_physp); 360219820Sjeff 361219820Sjeff state = osm_physp_get_port_state(p_physp); 362219820Sjeff CL_PLOCK_RELEASE(sm->p_lock); 363219820Sjeff 364219820SjeffExit: 365219820Sjeff OSM_LOG_EXIT(sm->p_log); 366219820Sjeff return (state == IB_LINK_DOWN); 367219820Sjeff} 368219820Sjeff 369219820Sjeff/********************************************************************** 370219820Sjeff Sweeps the node 1 hop away. 371219820Sjeff This sets off a "chain reaction" that causes discovery of the subnet. 372219820Sjeff Used when there is suspicion that something on the subnet has changed. 373219820Sjeff**********************************************************************/ 374219820Sjeffstatic ib_api_status_t __osm_state_mgr_sweep_hop_1(IN osm_sm_t * sm) 375219820Sjeff{ 376219820Sjeff ib_api_status_t status = IB_SUCCESS; 377219820Sjeff osm_bind_handle_t h_bind; 378219820Sjeff osm_madw_context_t context; 379219820Sjeff osm_node_t *p_node; 380219820Sjeff osm_port_t *p_port; 381219820Sjeff osm_physp_t *p_physp; 382219820Sjeff osm_dr_path_t *p_dr_path; 383219820Sjeff osm_dr_path_t hop_1_path; 384219820Sjeff ib_net64_t port_guid; 385219820Sjeff uint8_t port_num; 386219820Sjeff uint8_t path_array[IB_SUBNET_PATH_HOPS_MAX]; 387219820Sjeff uint8_t num_ports; 388219820Sjeff osm_physp_t *p_ext_physp; 389219820Sjeff 390219820Sjeff OSM_LOG_ENTER(sm->p_log); 391219820Sjeff 392219820Sjeff /* 393219820Sjeff * First, get our own port and node objects. 394219820Sjeff */ 395219820Sjeff port_guid = sm->p_subn->sm_port_guid; 396219820Sjeff 397219820Sjeff CL_ASSERT(port_guid); 398219820Sjeff 399219820Sjeff /* Set the in_sweep_hop_0 flag in subn to be FALSE. 400219820Sjeff * This will indicate the sweeping to continue beyond the 401219820Sjeff * the current node. 402219820Sjeff * This is relevant for the case of SM on switch, since in the 403219820Sjeff * switch info we need to signal that the sweeping should 404219820Sjeff * continue through the switch. */ 405219820Sjeff sm->p_subn->in_sweep_hop_0 = FALSE; 406219820Sjeff 407219820Sjeff p_port = osm_get_port_by_guid(sm->p_subn, port_guid); 408219820Sjeff if (!p_port) { 409219820Sjeff OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 3310: " 410219820Sjeff "No SM port object\n"); 411219820Sjeff status = IB_ERROR; 412219820Sjeff goto Exit; 413219820Sjeff } 414219820Sjeff 415219820Sjeff p_node = p_port->p_node; 416219820Sjeff CL_ASSERT(p_node); 417219820Sjeff 418219820Sjeff port_num = ib_node_info_get_local_port_num(&p_node->node_info); 419219820Sjeff 420219820Sjeff OSM_LOG(sm->p_log, OSM_LOG_DEBUG, 421219820Sjeff "Probing hop 1 on local port %u\n", port_num); 422219820Sjeff 423219820Sjeff p_physp = osm_node_get_physp_ptr(p_node, port_num); 424219820Sjeff 425219820Sjeff CL_ASSERT(p_physp); 426219820Sjeff 427219820Sjeff p_dr_path = osm_physp_get_dr_path_ptr(p_physp); 428219820Sjeff h_bind = osm_dr_path_get_bind_handle(p_dr_path); 429219820Sjeff 430219820Sjeff CL_ASSERT(h_bind != OSM_BIND_INVALID_HANDLE); 431219820Sjeff 432219820Sjeff memset(path_array, 0, sizeof(path_array)); 433219820Sjeff /* the hop_1 operations depend on the type of our node. 434219820Sjeff * Currently - legal nodes that can host SM are SW and CA */ 435219820Sjeff switch (osm_node_get_type(p_node)) { 436219820Sjeff case IB_NODE_TYPE_CA: 437219820Sjeff case IB_NODE_TYPE_ROUTER: 438219820Sjeff memset(&context, 0, sizeof(context)); 439219820Sjeff context.ni_context.node_guid = osm_node_get_node_guid(p_node); 440219820Sjeff context.ni_context.port_num = port_num; 441219820Sjeff 442219820Sjeff path_array[1] = port_num; 443219820Sjeff 444219820Sjeff osm_dr_path_init(&hop_1_path, h_bind, 1, path_array); 445219820Sjeff status = osm_req_get(sm, &hop_1_path, IB_MAD_ATTR_NODE_INFO, 0, 446219820Sjeff CL_DISP_MSGID_NONE, &context); 447219820Sjeff if (status != IB_SUCCESS) 448219820Sjeff OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 3311: " 449219820Sjeff "Request for NodeInfo failed\n"); 450219820Sjeff break; 451219820Sjeff 452219820Sjeff case IB_NODE_TYPE_SWITCH: 453219820Sjeff /* Need to go over all the ports of the switch, and send a node_info 454219820Sjeff * from them. This doesn't include the port 0 of the switch, which 455219820Sjeff * hosts the SM. 456219820Sjeff * Note: We'll send another switchInfo on port 0, since if no ports 457219820Sjeff * are connected, we still want to get some response, and have the 458219820Sjeff * subnet come up. 459219820Sjeff */ 460219820Sjeff num_ports = osm_node_get_num_physp(p_node); 461219820Sjeff for (port_num = 0; port_num < num_ports; port_num++) { 462219820Sjeff /* go through the port only if the port is not DOWN */ 463219820Sjeff p_ext_physp = osm_node_get_physp_ptr(p_node, port_num); 464219820Sjeff if (p_ext_physp && ib_port_info_get_port_state 465219820Sjeff (&(p_ext_physp->port_info)) > IB_LINK_DOWN) { 466219820Sjeff memset(&context, 0, sizeof(context)); 467219820Sjeff context.ni_context.node_guid = 468219820Sjeff osm_node_get_node_guid(p_node); 469219820Sjeff context.ni_context.port_num = port_num; 470219820Sjeff 471219820Sjeff path_array[1] = port_num; 472219820Sjeff osm_dr_path_init(&hop_1_path, h_bind, 1, 473219820Sjeff path_array); 474219820Sjeff status = osm_req_get(sm, &hop_1_path, 475219820Sjeff IB_MAD_ATTR_NODE_INFO, 0, 476219820Sjeff CL_DISP_MSGID_NONE, 477219820Sjeff &context); 478219820Sjeff 479219820Sjeff if (status != IB_SUCCESS) 480219820Sjeff OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 3312: " 481219820Sjeff "Request for NodeInfo failed\n"); 482219820Sjeff } 483219820Sjeff } 484219820Sjeff break; 485219820Sjeff 486219820Sjeff default: 487219820Sjeff OSM_LOG(sm->p_log, OSM_LOG_ERROR, 488219820Sjeff "ERR 3313: Unknown node type %d (%s)\n", 489219820Sjeff osm_node_get_type(p_node), p_node->print_desc); 490219820Sjeff } 491219820Sjeff 492219820SjeffExit: 493219820Sjeff OSM_LOG_EXIT(sm->p_log); 494219820Sjeff return (status); 495219820Sjeff} 496219820Sjeff 497219820Sjeffstatic void query_sm_info(cl_map_item_t *item, void *cxt) 498219820Sjeff{ 499219820Sjeff osm_madw_context_t context; 500219820Sjeff osm_remote_sm_t *r_sm = cl_item_obj(item, r_sm, map_item); 501219820Sjeff osm_sm_t *sm = cxt; 502219820Sjeff ib_api_status_t ret; 503219820Sjeff 504219820Sjeff context.smi_context.port_guid = r_sm->p_port->guid; 505219820Sjeff context.smi_context.set_method = FALSE; 506219820Sjeff context.smi_context.light_sweep = TRUE; 507219820Sjeff 508219820Sjeff ret = osm_req_get(sm, osm_physp_get_dr_path_ptr(r_sm->p_port->p_physp), 509219820Sjeff IB_MAD_ATTR_SM_INFO, 0, CL_DISP_MSGID_NONE, &context); 510219820Sjeff if (ret != IB_SUCCESS) 511219820Sjeff OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 3314: " 512219820Sjeff "Failure requesting SMInfo (%s)\n", 513219820Sjeff ib_get_err_str(ret)); 514219820Sjeff} 515219820Sjeff 516219820Sjeff/********************************************************************** 517219820Sjeff During a light sweep check each node to see if the node descriptor is valid 518219820Sjeff if not issue a ND query. 519219820Sjeff**********************************************************************/ 520219820Sjeffstatic void __osm_state_mgr_get_node_desc(IN cl_map_item_t * const p_object, 521219820Sjeff IN void *context) 522219820Sjeff{ 523219820Sjeff osm_madw_context_t mad_context; 524219820Sjeff osm_node_t *const p_node = (osm_node_t *) p_object; 525219820Sjeff osm_sm_t *sm = context; 526219820Sjeff osm_physp_t *p_physp = NULL; 527219820Sjeff unsigned i, num_ports; 528219820Sjeff ib_api_status_t status; 529219820Sjeff 530219820Sjeff OSM_LOG_ENTER(sm->p_log); 531219820Sjeff 532219820Sjeff CL_ASSERT(p_node); 533219820Sjeff 534219820Sjeff if (p_node->print_desc && strcmp(p_node->print_desc, OSM_NODE_DESC_UNKNOWN)) 535219820Sjeff /* if ND is valid, do nothing */ 536219820Sjeff goto exit; 537219820Sjeff 538219820Sjeff OSM_LOG(sm->p_log, OSM_LOG_ERROR, 539219820Sjeff "ERR 3319: Unknown node description for node GUID " 540219820Sjeff "0x%016" PRIx64 ". Reissuing ND query\n", 541219820Sjeff cl_ntoh64(osm_node_get_node_guid (p_node))); 542219820Sjeff 543219820Sjeff /* get a physp to request from. */ 544219820Sjeff num_ports = osm_node_get_num_physp(p_node); 545219820Sjeff for (i = 0; i < num_ports; i++) 546219820Sjeff if ((p_physp = osm_node_get_physp_ptr(p_node, i))) 547219820Sjeff break; 548219820Sjeff 549219820Sjeff if (!p_physp) { 550219820Sjeff OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 331C: " 551219820Sjeff "Failed to find any valid physical port object.\n"); 552219820Sjeff goto exit; 553219820Sjeff } 554219820Sjeff 555219820Sjeff mad_context.nd_context.node_guid = osm_node_get_node_guid(p_node); 556219820Sjeff 557219820Sjeff status = osm_req_get(sm, osm_physp_get_dr_path_ptr(p_physp), 558219820Sjeff IB_MAD_ATTR_NODE_DESC, 0, CL_DISP_MSGID_NONE, 559219820Sjeff &mad_context); 560219820Sjeff if (status != IB_SUCCESS) 561219820Sjeff OSM_LOG(sm->p_log, OSM_LOG_ERROR, 562219820Sjeff "ERR 331B: Failure initiating NodeDescription request " 563219820Sjeff "(%s)\n", ib_get_err_str(status)); 564219820Sjeff 565219820Sjeffexit: 566219820Sjeff OSM_LOG_EXIT(sm->p_log); 567219820Sjeff} 568219820Sjeff 569219820Sjeff/********************************************************************** 570219820Sjeff Initiates a lightweight sweep of the subnet. 571219820Sjeff Used during normal sweeps after the subnet is up. 572219820Sjeff**********************************************************************/ 573219820Sjeffstatic ib_api_status_t __osm_state_mgr_light_sweep_start(IN osm_sm_t * sm) 574219820Sjeff{ 575219820Sjeff ib_api_status_t status = IB_SUCCESS; 576219820Sjeff osm_bind_handle_t h_bind; 577219820Sjeff cl_qmap_t *p_sw_tbl; 578219820Sjeff cl_map_item_t *p_next; 579219820Sjeff osm_node_t *p_node; 580219820Sjeff osm_physp_t *p_physp; 581219820Sjeff uint8_t port_num; 582219820Sjeff 583219820Sjeff OSM_LOG_ENTER(sm->p_log); 584219820Sjeff 585219820Sjeff p_sw_tbl = &sm->p_subn->sw_guid_tbl; 586219820Sjeff 587219820Sjeff /* 588219820Sjeff * First, get the bind handle. 589219820Sjeff */ 590219820Sjeff h_bind = osm_sm_mad_ctrl_get_bind_handle(&sm->mad_ctrl); 591219820Sjeff if (h_bind == OSM_BIND_INVALID_HANDLE) { 592219820Sjeff OSM_LOG(sm->p_log, OSM_LOG_DEBUG, 593219820Sjeff "No bound ports. Deferring sweep...\n"); 594219820Sjeff status = IB_INVALID_STATE; 595219820Sjeff goto _exit; 596219820Sjeff } 597219820Sjeff 598219820Sjeff OSM_LOG_MSG_BOX(sm->p_log, OSM_LOG_VERBOSE, "INITIATING LIGHT SWEEP"); 599219820Sjeff CL_PLOCK_ACQUIRE(sm->p_lock); 600219820Sjeff cl_qmap_apply_func(p_sw_tbl, __osm_state_mgr_get_sw_info, sm); 601219820Sjeff CL_PLOCK_RELEASE(sm->p_lock); 602219820Sjeff 603219820Sjeff CL_PLOCK_ACQUIRE(sm->p_lock); 604219820Sjeff cl_qmap_apply_func(&sm->p_subn->node_guid_tbl, __osm_state_mgr_get_node_desc, sm); 605219820Sjeff CL_PLOCK_RELEASE(sm->p_lock); 606219820Sjeff 607219820Sjeff /* now scan the list of physical ports that were not down but have no remote port */ 608219820Sjeff CL_PLOCK_ACQUIRE(sm->p_lock); 609219820Sjeff p_next = cl_qmap_head(&sm->p_subn->node_guid_tbl); 610219820Sjeff while (p_next != cl_qmap_end(&sm->p_subn->node_guid_tbl)) { 611219820Sjeff p_node = (osm_node_t *) p_next; 612219820Sjeff p_next = cl_qmap_next(p_next); 613219820Sjeff 614219820Sjeff for (port_num = 1; port_num < osm_node_get_num_physp(p_node); 615219820Sjeff port_num++) { 616219820Sjeff p_physp = osm_node_get_physp_ptr(p_node, port_num); 617219820Sjeff if (p_physp && (osm_physp_get_port_state(p_physp) != 618219820Sjeff IB_LINK_DOWN) 619219820Sjeff && !osm_physp_get_remote(p_physp)) { 620219820Sjeff OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 3315: " 621219820Sjeff "Unknown remote side for node 0x%016" 622219820Sjeff PRIx64 623219820Sjeff "(%s) port %u. Adding to light sweep sampling list\n", 624219820Sjeff cl_ntoh64(osm_node_get_node_guid 625219820Sjeff (p_node)), 626219820Sjeff p_node->print_desc, port_num); 627219820Sjeff 628219820Sjeff osm_dump_dr_path(sm->p_log, 629219820Sjeff osm_physp_get_dr_path_ptr 630219820Sjeff (p_physp), OSM_LOG_ERROR); 631219820Sjeff 632219820Sjeff __osm_state_mgr_get_remote_port_info(sm, 633219820Sjeff p_physp); 634219820Sjeff } 635219820Sjeff } 636219820Sjeff } 637219820Sjeff 638219820Sjeff cl_qmap_apply_func(&sm->p_subn->sm_guid_tbl, query_sm_info, sm); 639219820Sjeff 640219820Sjeff CL_PLOCK_RELEASE(sm->p_lock); 641219820Sjeff 642219820Sjeff_exit: 643219820Sjeff OSM_LOG_EXIT(sm->p_log); 644219820Sjeff return (status); 645219820Sjeff} 646219820Sjeff 647219820Sjeff/********************************************************************** 648219820Sjeff * Go over all the remote SMs (as updated in the sm_guid_tbl). 649219820Sjeff * Find if there is a remote sm that is a master SM. 650219820Sjeff * If there is a remote master SM - return a pointer to it, 651219820Sjeff * else - return NULL. 652219820Sjeff **********************************************************************/ 653219820Sjeffstatic osm_remote_sm_t *__osm_state_mgr_exists_other_master_sm(IN osm_sm_t * sm) 654219820Sjeff{ 655219820Sjeff cl_qmap_t *p_sm_tbl; 656219820Sjeff osm_remote_sm_t *p_sm; 657219820Sjeff osm_remote_sm_t *p_sm_res = NULL; 658219820Sjeff 659219820Sjeff OSM_LOG_ENTER(sm->p_log); 660219820Sjeff 661219820Sjeff p_sm_tbl = &sm->p_subn->sm_guid_tbl; 662219820Sjeff 663219820Sjeff /* go over all the remote SMs */ 664219820Sjeff for (p_sm = (osm_remote_sm_t *) cl_qmap_head(p_sm_tbl); 665219820Sjeff p_sm != (osm_remote_sm_t *) cl_qmap_end(p_sm_tbl); 666219820Sjeff p_sm = (osm_remote_sm_t *) cl_qmap_next(&p_sm->map_item)) { 667219820Sjeff /* If the sm is in MASTER state - return a pointer to it */ 668219820Sjeff if (ib_sminfo_get_state(&p_sm->smi) == IB_SMINFO_STATE_MASTER) { 669219820Sjeff OSM_LOG(sm->p_log, OSM_LOG_VERBOSE, 670219820Sjeff "Found remote master SM with guid:0x%016" PRIx64 671219820Sjeff " (node %s)\n", cl_ntoh64(p_sm->smi.guid), 672219820Sjeff p_sm->p_port->p_node ? p_sm->p_port->p_node-> 673219820Sjeff print_desc : "UNKNOWN"); 674219820Sjeff p_sm_res = p_sm; 675219820Sjeff goto Exit; 676219820Sjeff } 677219820Sjeff } 678219820Sjeff 679219820SjeffExit: 680219820Sjeff OSM_LOG_EXIT(sm->p_log); 681219820Sjeff return (p_sm_res); 682219820Sjeff} 683219820Sjeff 684219820Sjeff/********************************************************************** 685219820Sjeff * Go over all remote SMs (as updated in the sm_guid_tbl). 686219820Sjeff * Find the one with the highest priority and lowest guid. 687219820Sjeff * Compare this SM to the local SM. If the local SM is higher - 688219820Sjeff * return NULL, if the remote SM is higher - return a pointer to it. 689219820Sjeff **********************************************************************/ 690219820Sjeffstatic osm_remote_sm_t *__osm_state_mgr_get_highest_sm(IN osm_sm_t * sm) 691219820Sjeff{ 692219820Sjeff cl_qmap_t *p_sm_tbl; 693219820Sjeff osm_remote_sm_t *p_sm = NULL; 694219820Sjeff osm_remote_sm_t *p_highest_sm; 695219820Sjeff uint8_t highest_sm_priority; 696219820Sjeff ib_net64_t highest_sm_guid; 697219820Sjeff 698219820Sjeff OSM_LOG_ENTER(sm->p_log); 699219820Sjeff 700219820Sjeff p_sm_tbl = &sm->p_subn->sm_guid_tbl; 701219820Sjeff 702219820Sjeff /* Start with the local sm as the standard */ 703219820Sjeff p_highest_sm = NULL; 704219820Sjeff highest_sm_priority = sm->p_subn->opt.sm_priority; 705219820Sjeff highest_sm_guid = sm->p_subn->sm_port_guid; 706219820Sjeff 707219820Sjeff /* go over all the remote SMs */ 708219820Sjeff for (p_sm = (osm_remote_sm_t *) cl_qmap_head(p_sm_tbl); 709219820Sjeff p_sm != (osm_remote_sm_t *) cl_qmap_end(p_sm_tbl); 710219820Sjeff p_sm = (osm_remote_sm_t *) cl_qmap_next(&p_sm->map_item)) { 711219820Sjeff 712219820Sjeff /* If the sm is in NOTACTIVE state - continue */ 713219820Sjeff if (ib_sminfo_get_state(&p_sm->smi) == 714219820Sjeff IB_SMINFO_STATE_NOTACTIVE) 715219820Sjeff continue; 716219820Sjeff 717219820Sjeff if (osm_sm_is_greater_than(ib_sminfo_get_priority(&p_sm->smi), 718219820Sjeff p_sm->smi.guid, highest_sm_priority, 719219820Sjeff highest_sm_guid)) { 720219820Sjeff /* the new p_sm is with higher priority - update the highest_sm */ 721219820Sjeff /* to this sm */ 722219820Sjeff p_highest_sm = p_sm; 723219820Sjeff highest_sm_priority = 724219820Sjeff ib_sminfo_get_priority(&p_sm->smi); 725219820Sjeff highest_sm_guid = p_sm->smi.guid; 726219820Sjeff } 727219820Sjeff } 728219820Sjeff 729219820Sjeff if (p_highest_sm != NULL) 730219820Sjeff OSM_LOG(sm->p_log, OSM_LOG_DEBUG, 731219820Sjeff "Found higher SM with guid: %016" PRIx64 " (node %s)\n", 732219820Sjeff cl_ntoh64(p_highest_sm->smi.guid), 733219820Sjeff p_highest_sm->p_port->p_node ? 734219820Sjeff p_highest_sm->p_port->p_node->print_desc : "UNKNOWN"); 735219820Sjeff 736219820Sjeff OSM_LOG_EXIT(sm->p_log); 737219820Sjeff return (p_highest_sm); 738219820Sjeff} 739219820Sjeff 740219820Sjeff/********************************************************************** 741219820Sjeff * Send SubnSet(SMInfo) SMP with HANDOVER attribute to the 742219820Sjeff * remote_sm indicated. 743219820Sjeff **********************************************************************/ 744219820Sjeffstatic void 745219820Sjeff__osm_state_mgr_send_handover(IN osm_sm_t * const sm, 746219820Sjeff IN osm_remote_sm_t * const p_sm) 747219820Sjeff{ 748219820Sjeff uint8_t payload[IB_SMP_DATA_SIZE]; 749219820Sjeff ib_sm_info_t *p_smi = (ib_sm_info_t *) payload; 750219820Sjeff osm_madw_context_t context; 751219820Sjeff const osm_port_t *p_port; 752219820Sjeff ib_api_status_t status; 753219820Sjeff 754219820Sjeff OSM_LOG_ENTER(sm->p_log); 755219820Sjeff 756219820Sjeff /* 757219820Sjeff * Send a query of SubnSet(SMInfo) HANDOVER to the remote sm given. 758219820Sjeff */ 759219820Sjeff 760219820Sjeff memset(&context, 0, sizeof(context)); 761219820Sjeff p_port = p_sm->p_port; 762219820Sjeff if (p_port == NULL) { 763219820Sjeff OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 3316: " 764219820Sjeff "No port object on given remote_sm object\n"); 765219820Sjeff goto Exit; 766219820Sjeff } 767219820Sjeff 768219820Sjeff /* update the master_guid in the sm_state_mgr object according to */ 769219820Sjeff /* the guid of the port where the new Master SM should reside. */ 770219820Sjeff OSM_LOG(sm->p_log, OSM_LOG_VERBOSE, 771219820Sjeff "Handing over mastership. Updating sm_state_mgr master_guid: %016" 772219820Sjeff PRIx64 " (node %s)\n", cl_ntoh64(p_port->guid), 773219820Sjeff p_port->p_node ? p_port->p_node->print_desc : "UNKNOWN"); 774219820Sjeff sm->master_sm_guid = p_port->guid; 775219820Sjeff 776219820Sjeff context.smi_context.port_guid = p_port->guid; 777219820Sjeff context.smi_context.set_method = TRUE; 778219820Sjeff 779219820Sjeff p_smi->guid = sm->p_subn->sm_port_guid; 780219820Sjeff p_smi->act_count = cl_hton32(sm->p_subn->p_osm->stats.qp0_mads_sent); 781219820Sjeff p_smi->pri_state = (uint8_t) (sm->p_subn->sm_state | 782219820Sjeff sm->p_subn->opt.sm_priority << 4); 783219820Sjeff /* 784219820Sjeff * Return 0 for the SM key unless we authenticate the requester 785219820Sjeff * as the master SM. 786219820Sjeff */ 787219820Sjeff if (ib_sminfo_get_state(&p_sm->smi) == IB_SMINFO_STATE_MASTER) { 788219820Sjeff OSM_LOG(sm->p_log, OSM_LOG_DEBUG, 789219820Sjeff "Responding to master SM with real sm_key\n"); 790219820Sjeff p_smi->sm_key = sm->p_subn->opt.sm_key; 791219820Sjeff } else { 792219820Sjeff /* The requester is not authenticated as master - set sm_key to zero */ 793219820Sjeff OSM_LOG(sm->p_log, OSM_LOG_DEBUG, 794219820Sjeff "Responding to SM not master with zero sm_key\n"); 795219820Sjeff p_smi->sm_key = 0; 796219820Sjeff } 797219820Sjeff 798219820Sjeff status = osm_req_set(sm, osm_physp_get_dr_path_ptr(p_port->p_physp), 799219820Sjeff payload, sizeof(payload), IB_MAD_ATTR_SM_INFO, 800219820Sjeff IB_SMINFO_ATTR_MOD_HANDOVER, CL_DISP_MSGID_NONE, 801219820Sjeff &context); 802219820Sjeff 803219820Sjeff if (status != IB_SUCCESS) 804219820Sjeff OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 3317: " 805219820Sjeff "Failure requesting SMInfo (%s)\n", 806219820Sjeff ib_get_err_str(status)); 807219820Sjeff 808219820SjeffExit: 809219820Sjeff OSM_LOG_EXIT(sm->p_log); 810219820Sjeff} 811219820Sjeff 812219820Sjeff/********************************************************************** 813219820Sjeff * Send Trap 64 on all new ports. 814219820Sjeff **********************************************************************/ 815219820Sjeffstatic void __osm_state_mgr_report_new_ports(IN osm_sm_t * sm) 816219820Sjeff{ 817219820Sjeff ib_gid_t port_gid; 818219820Sjeff ib_mad_notice_attr_t notice; 819219820Sjeff ib_api_status_t status; 820219820Sjeff ib_net64_t port_guid; 821219820Sjeff cl_map_item_t *p_next; 822219820Sjeff osm_port_t *p_port; 823219820Sjeff uint16_t min_lid_ho; 824219820Sjeff uint16_t max_lid_ho; 825219820Sjeff 826219820Sjeff OSM_LOG_ENTER(sm->p_log); 827219820Sjeff 828219820Sjeff CL_PLOCK_ACQUIRE(sm->p_lock); 829219820Sjeff p_next = cl_qmap_head(&sm->p_subn->port_guid_tbl); 830219820Sjeff while (p_next != cl_qmap_end(&sm->p_subn->port_guid_tbl)) { 831219820Sjeff p_port = (osm_port_t *) p_next; 832219820Sjeff p_next = cl_qmap_next(p_next); 833219820Sjeff 834219820Sjeff if (!p_port->is_new) 835219820Sjeff continue; 836219820Sjeff 837219820Sjeff port_guid = osm_port_get_guid(p_port); 838219820Sjeff /* issue a notice - trap 64 */ 839219820Sjeff 840219820Sjeff /* details of the notice */ 841219820Sjeff notice.generic_type = 0x83; /* is generic subn mgt type */ 842219820Sjeff ib_notice_set_prod_type_ho(¬ice, 4); /* A Class Manager generator */ 843219820Sjeff /* endport becomes to be reachable */ 844219820Sjeff notice.g_or_v.generic.trap_num = CL_HTON16(64); 845219820Sjeff /* The sm_base_lid is saved in network order already. */ 846219820Sjeff notice.issuer_lid = sm->p_subn->sm_base_lid; 847219820Sjeff /* following C14-72.1.1 and table 119 p739 */ 848219820Sjeff /* we need to provide the GID */ 849219820Sjeff port_gid.unicast.prefix = sm->p_subn->opt.subnet_prefix; 850219820Sjeff port_gid.unicast.interface_id = port_guid; 851219820Sjeff memcpy(&(notice.data_details.ntc_64_67.gid), &(port_gid), 852219820Sjeff sizeof(ib_gid_t)); 853219820Sjeff 854219820Sjeff /* According to page 653 - the issuer gid in this case of trap 855219820Sjeff * is the SM gid, since the SM is the initiator of this trap. */ 856219820Sjeff notice.issuer_gid.unicast.prefix = 857219820Sjeff sm->p_subn->opt.subnet_prefix; 858219820Sjeff notice.issuer_gid.unicast.interface_id = 859219820Sjeff sm->p_subn->sm_port_guid; 860219820Sjeff 861219820Sjeff status = osm_report_notice(sm->p_log, sm->p_subn, ¬ice); 862219820Sjeff if (status != IB_SUCCESS) 863219820Sjeff OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 3318: " 864219820Sjeff "Error sending trap reports on GUID:0x%016" 865219820Sjeff PRIx64 " (%s)\n", port_gid.unicast.interface_id, 866219820Sjeff ib_get_err_str(status)); 867219820Sjeff osm_port_get_lid_range_ho(p_port, &min_lid_ho, &max_lid_ho); 868219820Sjeff OSM_LOG(sm->p_log, OSM_LOG_INFO, 869219820Sjeff "Discovered new port with GUID:0x%016" PRIx64 870219820Sjeff " LID range [%u,%u] of node:%s\n", 871219820Sjeff cl_ntoh64(port_gid.unicast.interface_id), 872219820Sjeff min_lid_ho, max_lid_ho, 873219820Sjeff p_port->p_node ? p_port->p_node-> 874219820Sjeff print_desc : "UNKNOWN"); 875219820Sjeff 876219820Sjeff p_port->is_new = 0; 877219820Sjeff } 878219820Sjeff CL_PLOCK_RELEASE(sm->p_lock); 879219820Sjeff 880219820Sjeff OSM_LOG_EXIT(sm->p_log); 881219820Sjeff} 882219820Sjeff 883219820Sjeff/********************************************************************** 884219820Sjeff * Make sure that the lid_port_tbl of the subnet has only the ports 885219820Sjeff * that are recognized, and in the correct lid place. There could be 886219820Sjeff * errors if we wanted to assign a certain port with lid X, but that 887219820Sjeff * request didn't reach the port. In this case port_lid_tbl will have 888219820Sjeff * the port under lid X, though the port isn't updated with this lid. 889219820Sjeff * We will run a new heavy sweep (since there were errors in the 890219820Sjeff * initialization), but here we'll clean the database from incorrect 891219820Sjeff * information. 892219820Sjeff **********************************************************************/ 893219820Sjeffstatic void __osm_state_mgr_check_tbl_consistency(IN osm_sm_t * sm) 894219820Sjeff{ 895219820Sjeff cl_qmap_t *p_port_guid_tbl; 896219820Sjeff osm_port_t *p_port; 897219820Sjeff osm_port_t *p_next_port; 898219820Sjeff cl_ptr_vector_t *p_port_lid_tbl; 899219820Sjeff size_t max_lid, ref_size, curr_size, lid; 900219820Sjeff osm_port_t *p_port_ref, *p_port_stored; 901219820Sjeff cl_ptr_vector_t ref_port_lid_tbl; 902219820Sjeff uint16_t min_lid_ho; 903219820Sjeff uint16_t max_lid_ho; 904219820Sjeff uint16_t lid_ho; 905219820Sjeff 906219820Sjeff OSM_LOG_ENTER(sm->p_log); 907219820Sjeff 908219820Sjeff cl_ptr_vector_construct(&ref_port_lid_tbl); 909219820Sjeff cl_ptr_vector_init(&ref_port_lid_tbl, 910219820Sjeff cl_ptr_vector_get_size(&sm->p_subn->port_lid_tbl), 911219820Sjeff OSM_SUBNET_VECTOR_GROW_SIZE); 912219820Sjeff 913219820Sjeff p_port_guid_tbl = &sm->p_subn->port_guid_tbl; 914219820Sjeff 915219820Sjeff /* Let's go over all the ports according to port_guid_tbl, 916219820Sjeff * and add the port to a reference port_lid_tbl. */ 917219820Sjeff p_next_port = (osm_port_t *) cl_qmap_head(p_port_guid_tbl); 918219820Sjeff while (p_next_port != (osm_port_t *) cl_qmap_end(p_port_guid_tbl)) { 919219820Sjeff p_port = p_next_port; 920219820Sjeff p_next_port = 921219820Sjeff (osm_port_t *) cl_qmap_next(&p_next_port->map_item); 922219820Sjeff 923219820Sjeff osm_port_get_lid_range_ho(p_port, &min_lid_ho, &max_lid_ho); 924219820Sjeff for (lid_ho = min_lid_ho; lid_ho <= max_lid_ho; lid_ho++) 925219820Sjeff cl_ptr_vector_set(&ref_port_lid_tbl, lid_ho, p_port); 926219820Sjeff } 927219820Sjeff 928219820Sjeff p_port_lid_tbl = &sm->p_subn->port_lid_tbl; 929219820Sjeff 930219820Sjeff ref_size = cl_ptr_vector_get_size(&ref_port_lid_tbl); 931219820Sjeff curr_size = cl_ptr_vector_get_size(p_port_lid_tbl); 932219820Sjeff /* They should be the same, but compare it anyway */ 933219820Sjeff max_lid = (ref_size > curr_size) ? ref_size : curr_size; 934219820Sjeff 935219820Sjeff for (lid = 1; lid <= max_lid; lid++) { 936219820Sjeff p_port_ref = NULL; 937219820Sjeff p_port_stored = NULL; 938219820Sjeff cl_ptr_vector_at(p_port_lid_tbl, lid, (void *)&p_port_stored); 939219820Sjeff cl_ptr_vector_at(&ref_port_lid_tbl, lid, (void *)&p_port_ref); 940219820Sjeff 941219820Sjeff if (p_port_stored == p_port_ref) 942219820Sjeff /* This is the "good" case - both entries are the 943219820Sjeff * same for this lid. Nothing to do. */ 944219820Sjeff continue; 945219820Sjeff 946219820Sjeff if (p_port_ref == NULL) 947219820Sjeff /* There is an object in the subnet database for this 948219820Sjeff * lid, but no such object exists in the reference 949219820Sjeff * port_list_tbl. This can occur if we wanted to assign 950219820Sjeff * a certain port with some lid (different than the one 951219820Sjeff * pre-assigned to it), and the port didn't get the 952219820Sjeff * PortInfo Set request. Due to this, the port is 953219820Sjeff * updated with its original lid in our database, but 954219820Sjeff * with the new lid we wanted to give it in our 955219820Sjeff * port_lid_tbl. */ 956219820Sjeff OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 3322: " 957219820Sjeff "lid %zu is wrongly assigned to port 0x%016" 958219820Sjeff PRIx64 " (\'%s\' port %u) in port_lid_tbl\n", 959219820Sjeff lid, 960219820Sjeff cl_ntoh64(osm_port_get_guid(p_port_stored)), 961219820Sjeff p_port_stored->p_node->print_desc, 962219820Sjeff p_port_stored->p_physp->port_num); 963219820Sjeff else if (p_port_stored == NULL) 964219820Sjeff /* There is an object in the new database, but no 965219820Sjeff * object in our subnet database. This is the matching 966219820Sjeff * case of the prior check - the port still has its 967219820Sjeff * original lid. */ 968219820Sjeff OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 3323: " 969219820Sjeff "port 0x%016" PRIx64 " (\'%s\' port %u)" 970219820Sjeff " exists in new port_lid_tbl under lid %zu," 971219820Sjeff " but missing in subnet port_lid_tbl db\n", 972219820Sjeff cl_ntoh64(osm_port_get_guid(p_port_ref)), 973219820Sjeff p_port_ref->p_node->print_desc, 974219820Sjeff p_port_ref->p_physp->port_num, lid); 975219820Sjeff else 976219820Sjeff /* if we reached here then p_port_stored != p_port_ref. 977219820Sjeff * We were trying to set a lid to p_port_stored, but 978219820Sjeff * it didn't reach it, and p_port_ref also didn't get 979219820Sjeff * the lid update. */ 980219820Sjeff OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 3324: " 981219820Sjeff "lid %zu has port 0x%016" PRIx64 982219820Sjeff " (\'%s\' port %u) in new port_lid_tbl db, " 983219820Sjeff "and port 0x%016" PRIx64 " (\'%s\' port %u)" 984219820Sjeff " in subnet port_lid_tbl db\n", lid, 985219820Sjeff cl_ntoh64(osm_port_get_guid(p_port_ref)), 986219820Sjeff p_port_ref->p_node->print_desc, 987219820Sjeff p_port_ref->p_physp->port_num, 988219820Sjeff cl_ntoh64(osm_port_get_guid(p_port_stored)), 989219820Sjeff p_port_ref->p_node->print_desc, 990219820Sjeff p_port_ref->p_physp->port_num); 991219820Sjeff 992219820Sjeff /* In any of these cases we want to set NULL in the 993219820Sjeff * port_lid_tbl, since this entry is invalid. Also, make sure 994219820Sjeff * we'll do another heavy sweep. */ 995219820Sjeff cl_ptr_vector_set(p_port_lid_tbl, lid, NULL); 996219820Sjeff sm->p_subn->subnet_initialization_error = TRUE; 997219820Sjeff } 998219820Sjeff 999219820Sjeff cl_ptr_vector_destroy(&ref_port_lid_tbl); 1000219820Sjeff OSM_LOG_EXIT(sm->p_log); 1001219820Sjeff} 1002219820Sjeff 1003219820Sjeffstatic void cleanup_switch(cl_map_item_t *item, void *log) 1004219820Sjeff{ 1005219820Sjeff osm_switch_t *sw = (osm_switch_t *)item; 1006219820Sjeff 1007219820Sjeff if (!sw->new_lft) 1008219820Sjeff return; 1009219820Sjeff 1010219820Sjeff if (memcmp(sw->lft, sw->new_lft, IB_LID_UCAST_END_HO + 1)) 1011219820Sjeff osm_log(log, OSM_LOG_ERROR, "ERR 331D: " 1012219820Sjeff "LFT of switch 0x%016" PRIx64 " is not up to date.\n", 1013219820Sjeff cl_ntoh64(sw->p_node->node_info.node_guid)); 1014219820Sjeff else { 1015219820Sjeff free(sw->new_lft); 1016219820Sjeff sw->new_lft = NULL; 1017219820Sjeff } 1018219820Sjeff} 1019219820Sjeff 1020219820Sjeff/********************************************************************** 1021219820Sjeff **********************************************************************/ 1022219820Sjeffint wait_for_pending_transactions(osm_stats_t * stats) 1023219820Sjeff{ 1024219820Sjeff#ifdef HAVE_LIBPTHREAD 1025219820Sjeff pthread_mutex_lock(&stats->mutex); 1026219820Sjeff while (stats->qp0_mads_outstanding && !osm_exit_flag) 1027219820Sjeff pthread_cond_wait(&stats->cond, &stats->mutex); 1028219820Sjeff pthread_mutex_unlock(&stats->mutex); 1029219820Sjeff#else 1030219820Sjeff while (1) { 1031219820Sjeff unsigned count = stats->qp0_mads_outstanding; 1032219820Sjeff if (!count || osm_exit_flag) 1033219820Sjeff break; 1034219820Sjeff cl_event_wait_on(&stats->event, EVENT_NO_TIMEOUT, TRUE); 1035219820Sjeff } 1036219820Sjeff#endif 1037219820Sjeff return osm_exit_flag; 1038219820Sjeff} 1039219820Sjeff 1040219820Sjeffstatic void do_sweep(osm_sm_t * sm) 1041219820Sjeff{ 1042219820Sjeff ib_api_status_t status; 1043219820Sjeff osm_remote_sm_t *p_remote_sm; 1044219820Sjeff 1045219820Sjeff if (sm->p_subn->sm_state != IB_SMINFO_STATE_MASTER && 1046219820Sjeff sm->p_subn->sm_state != IB_SMINFO_STATE_DISCOVERING) 1047219820Sjeff return; 1048219820Sjeff 1049219820Sjeff if (sm->p_subn->coming_out_of_standby) 1050219820Sjeff /* 1051219820Sjeff * Need to force re-write of sm_base_lid to all ports 1052219820Sjeff * to do that we want all the ports to be considered 1053219820Sjeff * foreign 1054219820Sjeff */ 1055219820Sjeff __osm_state_mgr_clean_known_lids(sm); 1056219820Sjeff 1057219820Sjeff sm->master_sm_found = 0; 1058219820Sjeff 1059219820Sjeff /* 1060219820Sjeff * If we already have switches, then try a light sweep. 1061219820Sjeff * Otherwise, this is probably our first discovery pass 1062219820Sjeff * or we are connected in loopback. In both cases do a 1063219820Sjeff * heavy sweep. 1064219820Sjeff * Note: If we are connected in loopback we want a heavy 1065219820Sjeff * sweep, since we will not be getting any traps if there is 1066219820Sjeff * a lost connection. 1067219820Sjeff */ 1068219820Sjeff /* if we are in DISCOVERING state - this means it is either in 1069219820Sjeff * initializing or wake up from STANDBY - run the heavy sweep */ 1070219820Sjeff if (cl_qmap_count(&sm->p_subn->sw_guid_tbl) 1071219820Sjeff && sm->p_subn->sm_state != IB_SMINFO_STATE_DISCOVERING 1072219820Sjeff && sm->p_subn->opt.force_heavy_sweep == FALSE 1073219820Sjeff && sm->p_subn->force_heavy_sweep == FALSE 1074219820Sjeff && sm->p_subn->force_reroute == FALSE 1075219820Sjeff && sm->p_subn->subnet_initialization_error == FALSE 1076219820Sjeff && (__osm_state_mgr_light_sweep_start(sm) == IB_SUCCESS)) { 1077219820Sjeff if (wait_for_pending_transactions(&sm->p_subn->p_osm->stats)) 1078219820Sjeff return; 1079219820Sjeff if (!sm->p_subn->force_heavy_sweep) { 1080219820Sjeff OSM_LOG_MSG_BOX(sm->p_log, OSM_LOG_VERBOSE, 1081219820Sjeff "LIGHT SWEEP COMPLETE"); 1082219820Sjeff return; 1083219820Sjeff } 1084219820Sjeff } 1085219820Sjeff 1086219820Sjeff /* 1087219820Sjeff * Unicast cache should be invalidated if there were errors 1088219820Sjeff * during initialization or if subnet re-route is requested. 1089219820Sjeff */ 1090219820Sjeff if (sm->p_subn->opt.use_ucast_cache && 1091219820Sjeff (sm->p_subn->subnet_initialization_error || 1092219820Sjeff sm->p_subn->force_reroute)) 1093219820Sjeff osm_ucast_cache_invalidate(&sm->ucast_mgr); 1094219820Sjeff 1095219820Sjeff /* 1096219820Sjeff * If we don't need to do a heavy sweep and we want to do a reroute, 1097219820Sjeff * just reroute only. 1098219820Sjeff */ 1099219820Sjeff if (cl_qmap_count(&sm->p_subn->sw_guid_tbl) 1100219820Sjeff && sm->p_subn->sm_state != IB_SMINFO_STATE_DISCOVERING 1101219820Sjeff && sm->p_subn->opt.force_heavy_sweep == FALSE 1102219820Sjeff && sm->p_subn->force_heavy_sweep == FALSE 1103219820Sjeff && sm->p_subn->force_reroute == TRUE 1104219820Sjeff && sm->p_subn->subnet_initialization_error == FALSE) { 1105219820Sjeff /* Reset flag */ 1106219820Sjeff sm->p_subn->force_reroute = FALSE; 1107219820Sjeff 1108219820Sjeff /* Re-program the switches fully */ 1109219820Sjeff sm->p_subn->ignore_existing_lfts = TRUE; 1110219820Sjeff 1111219820Sjeff osm_ucast_mgr_process(&sm->ucast_mgr); 1112219820Sjeff 1113219820Sjeff /* Reset flag */ 1114219820Sjeff sm->p_subn->ignore_existing_lfts = FALSE; 1115219820Sjeff 1116219820Sjeff if (wait_for_pending_transactions(&sm->p_subn->p_osm->stats)) 1117219820Sjeff return; 1118219820Sjeff 1119219820Sjeff if (!sm->p_subn->subnet_initialization_error) { 1120219820Sjeff OSM_LOG_MSG_BOX(sm->p_log, OSM_LOG_VERBOSE, 1121219820Sjeff "REROUTE COMPLETE"); 1122219820Sjeff return; 1123219820Sjeff } 1124219820Sjeff } 1125219820Sjeff 1126219820Sjeff /* go to heavy sweep */ 1127219820Sjeff_repeat_discovery: 1128219820Sjeff 1129219820Sjeff /* First of all - unset all flags */ 1130219820Sjeff sm->p_subn->force_heavy_sweep = FALSE; 1131219820Sjeff sm->p_subn->force_reroute = FALSE; 1132219820Sjeff sm->p_subn->subnet_initialization_error = FALSE; 1133219820Sjeff 1134219820Sjeff /* rescan configuration updates */ 1135219820Sjeff if (osm_subn_rescan_conf_files(sm->p_subn) < 0) 1136219820Sjeff OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 331A: " 1137219820Sjeff "osm_subn_rescan_conf_file failed\n"); 1138219820Sjeff 1139219820Sjeff if (sm->p_subn->sm_state != IB_SMINFO_STATE_MASTER) 1140219820Sjeff sm->p_subn->need_update = 1; 1141219820Sjeff 1142219820Sjeff status = __osm_state_mgr_sweep_hop_0(sm); 1143219820Sjeff if (status != IB_SUCCESS || 1144219820Sjeff wait_for_pending_transactions(&sm->p_subn->p_osm->stats)) 1145219820Sjeff return; 1146219820Sjeff 1147219820Sjeff if (__osm_state_mgr_is_sm_port_down(sm) == TRUE) { 1148219820Sjeff osm_log(sm->p_log, OSM_LOG_SYS, "SM port is down\n"); 1149219820Sjeff OSM_LOG_MSG_BOX(sm->p_log, OSM_LOG_VERBOSE, "SM PORT DOWN"); 1150219820Sjeff 1151219820Sjeff /* Run the drop manager - we want to clear all records */ 1152219820Sjeff osm_drop_mgr_process(sm); 1153219820Sjeff 1154219820Sjeff /* Move to DISCOVERING state */ 1155219820Sjeff osm_sm_state_mgr_process(sm, OSM_SM_SIGNAL_DISCOVER); 1156219820Sjeff return; 1157219820Sjeff } 1158219820Sjeff 1159219820Sjeff status = __osm_state_mgr_sweep_hop_1(sm); 1160219820Sjeff if (status != IB_SUCCESS || 1161219820Sjeff wait_for_pending_transactions(&sm->p_subn->p_osm->stats)) 1162219820Sjeff return; 1163219820Sjeff 1164219820Sjeff /* discovery completed - check other sm presense */ 1165219820Sjeff if (sm->master_sm_found) { 1166219820Sjeff /* 1167219820Sjeff * Call the sm_state_mgr with signal 1168219820Sjeff * MASTER_OR_HIGHER_SM_DETECTED_DONE 1169219820Sjeff */ 1170219820Sjeff osm_sm_state_mgr_process(sm, 1171219820Sjeff OSM_SM_SIGNAL_MASTER_OR_HIGHER_SM_DETECTED); 1172219820Sjeff OSM_LOG_MSG_BOX(sm->p_log, OSM_LOG_VERBOSE, 1173219820Sjeff "ENTERING STANDBY STATE"); 1174219820Sjeff /* notify master SM about us */ 1175219820Sjeff osm_send_trap144(sm, 0); 1176219820Sjeff return; 1177219820Sjeff } 1178219820Sjeff 1179219820Sjeff /* if new sweep requested - don't bother with the rest */ 1180219820Sjeff if (sm->p_subn->force_heavy_sweep) 1181219820Sjeff goto _repeat_discovery; 1182219820Sjeff 1183219820Sjeff OSM_LOG_MSG_BOX(sm->p_log, OSM_LOG_VERBOSE, "HEAVY SWEEP COMPLETE"); 1184219820Sjeff 1185219820Sjeff /* If we are MASTER - get the highest remote_sm, and 1186219820Sjeff * see if it is higher than our local sm. 1187219820Sjeff */ 1188219820Sjeff if (sm->p_subn->sm_state == IB_SMINFO_STATE_MASTER) { 1189219820Sjeff p_remote_sm = __osm_state_mgr_get_highest_sm(sm); 1190219820Sjeff if (p_remote_sm != NULL) { 1191219820Sjeff /* report new ports (trap 64) before leaving MASTER */ 1192219820Sjeff __osm_state_mgr_report_new_ports(sm); 1193219820Sjeff 1194219820Sjeff /* need to handover the mastership 1195219820Sjeff * to the remote sm, and move to standby */ 1196219820Sjeff __osm_state_mgr_send_handover(sm, p_remote_sm); 1197219820Sjeff osm_sm_state_mgr_process(sm, 1198219820Sjeff OSM_SM_SIGNAL_HANDOVER_SENT); 1199219820Sjeff return; 1200219820Sjeff } else { 1201219820Sjeff /* We are the highest sm - check to see if there is 1202219820Sjeff * a remote SM that is in master state. */ 1203219820Sjeff p_remote_sm = 1204219820Sjeff __osm_state_mgr_exists_other_master_sm(sm); 1205219820Sjeff if (p_remote_sm != NULL) { 1206219820Sjeff /* There is a remote SM that is master. 1207219820Sjeff * need to wait for that SM to relinquish control 1208219820Sjeff * of its portion of the subnet. C14-60.2.1. 1209219820Sjeff * Also - need to start polling on that SM. */ 1210219820Sjeff sm->p_polling_sm = p_remote_sm; 1211219820Sjeff osm_sm_state_mgr_process(sm, 1212219820Sjeff OSM_SM_SIGNAL_WAIT_FOR_HANDOVER); 1213219820Sjeff return; 1214219820Sjeff } 1215219820Sjeff } 1216219820Sjeff } 1217219820Sjeff 1218219820Sjeff /* Need to continue with lid assignment */ 1219219820Sjeff osm_drop_mgr_process(sm); 1220219820Sjeff 1221219820Sjeff /* 1222219820Sjeff * If we are not MASTER already - this means that we are 1223219820Sjeff * in discovery state. call osm_sm_state_mgr with signal 1224219820Sjeff * DISCOVERY_COMPLETED 1225219820Sjeff */ 1226219820Sjeff if (sm->p_subn->sm_state == IB_SMINFO_STATE_DISCOVERING) 1227219820Sjeff osm_sm_state_mgr_process(sm, OSM_SM_SIGNAL_DISCOVERY_COMPLETED); 1228219820Sjeff 1229219820Sjeff osm_pkey_mgr_process(sm->p_subn->p_osm); 1230219820Sjeff 1231219820Sjeff osm_qos_setup(sm->p_subn->p_osm); 1232219820Sjeff 1233219820Sjeff /* try to restore SA DB (this should be before lid_mgr 1234219820Sjeff because we may want to disable clients reregistration 1235219820Sjeff when SA DB is restored) */ 1236219820Sjeff osm_sa_db_file_load(sm->p_subn->p_osm); 1237219820Sjeff 1238219820Sjeff if (wait_for_pending_transactions(&sm->p_subn->p_osm->stats)) 1239219820Sjeff return; 1240219820Sjeff 1241219820Sjeff osm_lid_mgr_process_sm(&sm->lid_mgr); 1242219820Sjeff if (wait_for_pending_transactions(&sm->p_subn->p_osm->stats)) 1243219820Sjeff return; 1244219820Sjeff 1245219820Sjeff OSM_LOG_MSG_BOX(sm->p_log, OSM_LOG_VERBOSE, 1246219820Sjeff "SM LID ASSIGNMENT COMPLETE - STARTING SUBNET LID CONFIG"); 1247219820Sjeff __osm_state_mgr_notify_lid_change(sm); 1248219820Sjeff 1249219820Sjeff osm_lid_mgr_process_subnet(&sm->lid_mgr); 1250219820Sjeff if (wait_for_pending_transactions(&sm->p_subn->p_osm->stats)) 1251219820Sjeff return; 1252219820Sjeff 1253219820Sjeff /* At this point we need to check the consistency of 1254219820Sjeff * the port_lid_tbl under the subnet. There might be 1255219820Sjeff * errors in it if PortInfo Set requests didn't reach 1256219820Sjeff * their destination. */ 1257219820Sjeff __osm_state_mgr_check_tbl_consistency(sm); 1258219820Sjeff 1259219820Sjeff OSM_LOG_MSG_BOX(sm->p_log, OSM_LOG_VERBOSE, 1260219820Sjeff "LID ASSIGNMENT COMPLETE - STARTING SWITCH TABLE CONFIG"); 1261219820Sjeff 1262219820Sjeff /* 1263219820Sjeff * Proceed with unicast forwarding table configuration. 1264219820Sjeff */ 1265219820Sjeff 1266219820Sjeff if (!sm->ucast_mgr.cache_valid || 1267219820Sjeff osm_ucast_cache_process(&sm->ucast_mgr)) 1268219820Sjeff osm_ucast_mgr_process(&sm->ucast_mgr); 1269219820Sjeff 1270219820Sjeff if (wait_for_pending_transactions(&sm->p_subn->p_osm->stats)) 1271219820Sjeff return; 1272219820Sjeff 1273219820Sjeff /* cleanup switch lft buffers */ 1274219820Sjeff cl_qmap_apply_func(&sm->p_subn->sw_guid_tbl, cleanup_switch, sm->p_log); 1275219820Sjeff 1276219820Sjeff /* We are done setting all LFTs so clear the ignore existing. 1277219820Sjeff * From now on, as long as we are still master, we want to 1278219820Sjeff * take into account these lfts. */ 1279219820Sjeff sm->p_subn->ignore_existing_lfts = FALSE; 1280219820Sjeff 1281219820Sjeff OSM_LOG_MSG_BOX(sm->p_log, OSM_LOG_VERBOSE, 1282219820Sjeff "SWITCHES CONFIGURED FOR UNICAST"); 1283219820Sjeff 1284219820Sjeff if (!sm->p_subn->opt.disable_multicast) { 1285219820Sjeff osm_mcast_mgr_process(sm); 1286219820Sjeff if (wait_for_pending_transactions(&sm->p_subn->p_osm->stats)) 1287219820Sjeff return; 1288219820Sjeff OSM_LOG_MSG_BOX(sm->p_log, OSM_LOG_VERBOSE, 1289219820Sjeff "SWITCHES CONFIGURED FOR MULTICAST"); 1290219820Sjeff } 1291219820Sjeff 1292219820Sjeff /* 1293219820Sjeff * The LINK_PORTS state is required since we cannot count on 1294219820Sjeff * the port state change MADs to succeed. This is an artifact 1295219820Sjeff * of the spec defining state change from state X to state X 1296219820Sjeff * as an error. The hardware then is not required to process 1297219820Sjeff * other parameters provided by the Set(PortInfo) Packet. 1298219820Sjeff */ 1299219820Sjeff 1300219820Sjeff osm_link_mgr_process(sm, IB_LINK_NO_CHANGE); 1301219820Sjeff if (wait_for_pending_transactions(&sm->p_subn->p_osm->stats)) 1302219820Sjeff return; 1303219820Sjeff 1304219820Sjeff OSM_LOG_MSG_BOX(sm->p_log, OSM_LOG_VERBOSE, 1305219820Sjeff "LINKS PORTS CONFIGURED - SET LINKS TO ARMED STATE"); 1306219820Sjeff 1307219820Sjeff osm_link_mgr_process(sm, IB_LINK_ARMED); 1308219820Sjeff if (wait_for_pending_transactions(&sm->p_subn->p_osm->stats)) 1309219820Sjeff return; 1310219820Sjeff 1311219820Sjeff OSM_LOG_MSG_BOX(sm->p_log, OSM_LOG_VERBOSE, 1312219820Sjeff "LINKS ARMED - SET LINKS TO ACTIVE STATE"); 1313219820Sjeff 1314219820Sjeff osm_link_mgr_process(sm, IB_LINK_ACTIVE); 1315219820Sjeff if (wait_for_pending_transactions(&sm->p_subn->p_osm->stats)) 1316219820Sjeff return; 1317219820Sjeff 1318219820Sjeff /* 1319219820Sjeff * The sweep completed! 1320219820Sjeff */ 1321219820Sjeff 1322219820Sjeff /* 1323219820Sjeff * Send trap 64 on newly discovered endports 1324219820Sjeff */ 1325219820Sjeff __osm_state_mgr_report_new_ports(sm); 1326219820Sjeff 1327219820Sjeff /* in any case we zero this flag */ 1328219820Sjeff sm->p_subn->coming_out_of_standby = FALSE; 1329219820Sjeff 1330219820Sjeff /* If there were errors - then the subnet is not really up */ 1331219820Sjeff if (sm->p_subn->subnet_initialization_error == TRUE) { 1332219820Sjeff osm_log(sm->p_log, OSM_LOG_SYS, 1333219820Sjeff "Errors during initialization\n"); 1334219820Sjeff OSM_LOG_MSG_BOX(sm->p_log, OSM_LOG_ERROR, 1335219820Sjeff "ERRORS DURING INITIALIZATION"); 1336219820Sjeff } else { 1337219820Sjeff sm->p_subn->need_update = 0; 1338219820Sjeff osm_dump_all(sm->p_subn->p_osm); 1339219820Sjeff __osm_state_mgr_up_msg(sm); 1340219820Sjeff sm->p_subn->first_time_master_sweep = FALSE; 1341219820Sjeff 1342219820Sjeff if (osm_log_is_active(sm->p_log, OSM_LOG_VERBOSE)) 1343219820Sjeff osm_sa_db_file_dump(sm->p_subn->p_osm); 1344219820Sjeff } 1345219820Sjeff 1346219820Sjeff /* 1347219820Sjeff * Finally signal the subnet up event 1348219820Sjeff */ 1349219820Sjeff cl_event_signal(&sm->subnet_up_event); 1350219820Sjeff 1351219820Sjeff osm_opensm_report_event(sm->p_subn->p_osm, OSM_EVENT_ID_SUBNET_UP, NULL); 1352219820Sjeff 1353219820Sjeff /* if we got a signal to force heavy sweep or errors 1354219820Sjeff * in the middle of the sweep - try another sweep. */ 1355219820Sjeff if (sm->p_subn->force_heavy_sweep 1356219820Sjeff || sm->p_subn->subnet_initialization_error) 1357219820Sjeff osm_sm_signal(sm, OSM_SIGNAL_SWEEP); 1358219820Sjeff} 1359219820Sjeff 1360219820Sjeffstatic void do_process_mgrp_queue(osm_sm_t * sm) 1361219820Sjeff{ 1362219820Sjeff if (sm->p_subn->sm_state != IB_SMINFO_STATE_MASTER) 1363219820Sjeff return; 1364219820Sjeff osm_mcast_mgr_process_mgroups(sm); 1365219820Sjeff wait_for_pending_transactions(&sm->p_subn->p_osm->stats); 1366219820Sjeff} 1367219820Sjeff 1368219820Sjeffvoid osm_state_mgr_process(IN osm_sm_t * sm, IN osm_signal_t signal) 1369219820Sjeff{ 1370219820Sjeff CL_ASSERT(sm); 1371219820Sjeff 1372219820Sjeff OSM_LOG_ENTER(sm->p_log); 1373219820Sjeff 1374219820Sjeff OSM_LOG(sm->p_log, OSM_LOG_DEBUG, 1375219820Sjeff "Received signal %s in state %s\n", 1376219820Sjeff osm_get_sm_signal_str(signal), 1377219820Sjeff osm_get_sm_mgr_state_str(sm->p_subn->sm_state)); 1378219820Sjeff 1379219820Sjeff switch (signal) { 1380219820Sjeff case OSM_SIGNAL_SWEEP: 1381219820Sjeff do_sweep(sm); 1382219820Sjeff break; 1383219820Sjeff 1384219820Sjeff case OSM_SIGNAL_IDLE_TIME_PROCESS_REQUEST: 1385219820Sjeff do_process_mgrp_queue(sm); 1386219820Sjeff break; 1387219820Sjeff 1388219820Sjeff default: 1389219820Sjeff CL_ASSERT(FALSE); 1390219820Sjeff OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 3320: " 1391219820Sjeff "Invalid SM signal %u\n", signal); 1392219820Sjeff break; 1393219820Sjeff } 1394219820Sjeff 1395219820Sjeff OSM_LOG_EXIT(sm->p_log); 1396219820Sjeff} 1397