1219820Sjeff/* 2219820Sjeff * Copyright (c) 2004-2008 Voltaire, Inc. All rights reserved. 3219820Sjeff * Copyright (c) 2002-2008 Mellanox Technologies LTD. All rights reserved. 4219820Sjeff * Copyright (c) 1996-2003 Intel Corporation. All rights reserved. 5219820Sjeff * 6219820Sjeff * This software is available to you under a choice of one of two 7219820Sjeff * licenses. You may choose to be licensed under the terms of the GNU 8219820Sjeff * General Public License (GPL) Version 2, available from the file 9219820Sjeff * COPYING in the main directory of this source tree, or the 10219820Sjeff * OpenIB.org BSD license below: 11219820Sjeff * 12219820Sjeff * Redistribution and use in source and binary forms, with or 13219820Sjeff * without modification, are permitted provided that the following 14219820Sjeff * conditions are met: 15219820Sjeff * 16219820Sjeff * - Redistributions of source code must retain the above 17219820Sjeff * copyright notice, this list of conditions and the following 18219820Sjeff * disclaimer. 19219820Sjeff * 20219820Sjeff * - Redistributions in binary form must reproduce the above 21219820Sjeff * copyright notice, this list of conditions and the following 22219820Sjeff * disclaimer in the documentation and/or other materials 23219820Sjeff * provided with the distribution. 24219820Sjeff * 25219820Sjeff * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 26219820Sjeff * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 27219820Sjeff * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 28219820Sjeff * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 29219820Sjeff * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 30219820Sjeff * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 31219820Sjeff * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 32219820Sjeff * SOFTWARE. 33219820Sjeff * 34219820Sjeff */ 35219820Sjeff 36219820Sjeff/* 37219820Sjeff * Abstract: 38219820Sjeff * Implementation of osm_ni_rcv_t. 39219820Sjeff * This object represents the NodeInfo Receiver object. 40219820Sjeff * This object is part of the opensm family of objects. 41219820Sjeff */ 42219820Sjeff 43219820Sjeff#if HAVE_CONFIG_H 44219820Sjeff# include <config.h> 45219820Sjeff#endif /* HAVE_CONFIG_H */ 46219820Sjeff 47219820Sjeff#include <stdlib.h> 48219820Sjeff#include <string.h> 49219820Sjeff#include <iba/ib_types.h> 50219820Sjeff#include <complib/cl_qmap.h> 51219820Sjeff#include <complib/cl_passivelock.h> 52219820Sjeff#include <complib/cl_debug.h> 53219820Sjeff#include <opensm/osm_madw.h> 54219820Sjeff#include <opensm/osm_log.h> 55219820Sjeff#include <opensm/osm_node.h> 56219820Sjeff#include <opensm/osm_subnet.h> 57219820Sjeff#include <opensm/osm_router.h> 58219820Sjeff#include <opensm/osm_mad_pool.h> 59219820Sjeff#include <opensm/osm_helper.h> 60219820Sjeff#include <opensm/osm_msgdef.h> 61219820Sjeff#include <opensm/osm_opensm.h> 62219820Sjeff#include <opensm/osm_ucast_mgr.h> 63219820Sjeff 64219820Sjeffstatic void 65219820Sjeffreport_duplicated_guid(IN osm_sm_t * sm, 66219820Sjeff osm_physp_t * p_physp, 67219820Sjeff osm_node_t * p_neighbor_node, const uint8_t port_num) 68219820Sjeff{ 69219820Sjeff osm_physp_t *p_old, *p_new; 70219820Sjeff osm_dr_path_t path; 71219820Sjeff 72219820Sjeff p_old = p_physp->p_remote_physp; 73219820Sjeff p_new = osm_node_get_physp_ptr(p_neighbor_node, port_num); 74219820Sjeff 75219820Sjeff OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0D01: " 76219820Sjeff "Found duplicated node.\n" 77219820Sjeff "Node 0x%" PRIx64 " port %u is reachable from remote node " 78219820Sjeff "0x%" PRIx64 " port %u and remote node 0x%" PRIx64 " port %u.\n" 79219820Sjeff "Paths are:\n", 80219820Sjeff cl_ntoh64(p_physp->p_node->node_info.node_guid), 81219820Sjeff p_physp->port_num, 82219820Sjeff cl_ntoh64(p_old->p_node->node_info.node_guid), p_old->port_num, 83219820Sjeff cl_ntoh64(p_new->p_node->node_info.node_guid), p_new->port_num); 84219820Sjeff 85219820Sjeff osm_dump_dr_path(sm->p_log, osm_physp_get_dr_path_ptr(p_physp), 86219820Sjeff OSM_LOG_ERROR); 87219820Sjeff 88219820Sjeff path = *osm_physp_get_dr_path_ptr(p_new); 89219820Sjeff osm_dr_path_extend(&path, port_num); 90219820Sjeff osm_dump_dr_path(sm->p_log, &path, OSM_LOG_ERROR); 91219820Sjeff 92219820Sjeff osm_log(sm->p_log, OSM_LOG_SYS, 93219820Sjeff "FATAL: duplicated guids or 12x lane reversal\n"); 94219820Sjeff} 95219820Sjeff 96219820Sjeffstatic void requery_dup_node_info(IN osm_sm_t * sm, 97219820Sjeff osm_physp_t * p_physp, unsigned count) 98219820Sjeff{ 99219820Sjeff osm_madw_context_t context; 100219820Sjeff osm_dr_path_t path; 101219820Sjeff cl_status_t status; 102219820Sjeff 103219820Sjeff path = *osm_physp_get_dr_path_ptr(p_physp->p_remote_physp); 104219820Sjeff osm_dr_path_extend(&path, p_physp->p_remote_physp->port_num); 105219820Sjeff 106219820Sjeff context.ni_context.node_guid = 107219820Sjeff p_physp->p_remote_physp->p_node->node_info.port_guid; 108219820Sjeff context.ni_context.port_num = p_physp->p_remote_physp->port_num; 109219820Sjeff context.ni_context.dup_node_guid = p_physp->p_node->node_info.node_guid; 110219820Sjeff context.ni_context.dup_port_num = p_physp->port_num; 111219820Sjeff context.ni_context.dup_count = count; 112219820Sjeff 113219820Sjeff status = osm_req_get(sm, &path, IB_MAD_ATTR_NODE_INFO, 114219820Sjeff 0, CL_DISP_MSGID_NONE, &context); 115219820Sjeff 116219820Sjeff if (status != IB_SUCCESS) 117219820Sjeff OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0D02: " 118219820Sjeff "Failure initiating NodeInfo request (%s)\n", 119219820Sjeff ib_get_err_str(status)); 120219820Sjeff} 121219820Sjeff 122219820Sjeff/********************************************************************** 123219820Sjeff The plock must be held before calling this function. 124219820Sjeff**********************************************************************/ 125219820Sjeffstatic void 126219820Sjeff__osm_ni_rcv_set_links(IN osm_sm_t * sm, 127219820Sjeff osm_node_t * p_node, 128219820Sjeff const uint8_t port_num, 129219820Sjeff const osm_ni_context_t * const p_ni_context) 130219820Sjeff{ 131219820Sjeff osm_node_t *p_neighbor_node; 132219820Sjeff osm_physp_t *p_physp; 133219820Sjeff 134219820Sjeff OSM_LOG_ENTER(sm->p_log); 135219820Sjeff 136219820Sjeff /* 137219820Sjeff A special case exists in which the node we're trying to 138219820Sjeff link is our own node. In this case, the guid value in 139219820Sjeff the ni_context will be zero. 140219820Sjeff */ 141219820Sjeff if (p_ni_context->node_guid == 0) { 142219820Sjeff OSM_LOG(sm->p_log, OSM_LOG_DEBUG, 143219820Sjeff "Nothing to link for our own node 0x%" PRIx64 "\n", 144219820Sjeff cl_ntoh64(osm_node_get_node_guid(p_node))); 145219820Sjeff goto _exit; 146219820Sjeff } 147219820Sjeff 148219820Sjeff p_neighbor_node = osm_get_node_by_guid(sm->p_subn, 149219820Sjeff p_ni_context->node_guid); 150219820Sjeff if (!p_neighbor_node) { 151219820Sjeff OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0D10: " 152219820Sjeff "Unexpected removal of neighbor node " 153219820Sjeff "0x%" PRIx64 "\n", cl_ntoh64(p_ni_context->node_guid)); 154219820Sjeff goto _exit; 155219820Sjeff } 156219820Sjeff 157219820Sjeff /* 158219820Sjeff We have seen this neighbor node before, but we might 159219820Sjeff not have seen this port on the neighbor node before. 160219820Sjeff We should not set links to an uninitialized port on the 161219820Sjeff neighbor, so check validity up front. If it's not 162219820Sjeff valid, do nothing, since we'll see this link again 163219820Sjeff when we probe the neighbor. 164219820Sjeff */ 165219820Sjeff if (!osm_node_link_has_valid_ports(p_node, port_num, 166219820Sjeff p_neighbor_node, 167219820Sjeff p_ni_context->port_num)) 168219820Sjeff goto _exit; 169219820Sjeff 170219820Sjeff if (osm_node_link_exists(p_node, port_num, 171219820Sjeff p_neighbor_node, p_ni_context->port_num)) { 172219820Sjeff OSM_LOG(sm->p_log, OSM_LOG_DEBUG, "Link already exists\n"); 173219820Sjeff goto _exit; 174219820Sjeff } 175219820Sjeff 176219820Sjeff if (osm_node_has_any_link(p_node, port_num) && 177219820Sjeff sm->p_subn->force_heavy_sweep == FALSE && 178219820Sjeff (!p_ni_context->dup_count || 179219820Sjeff (p_ni_context->dup_node_guid == osm_node_get_node_guid(p_node) && 180219820Sjeff p_ni_context->dup_port_num == port_num))) { 181219820Sjeff /* 182219820Sjeff Uh oh... 183219820Sjeff This could be reconnected ports, but also duplicated GUID 184219820Sjeff (2 nodes have the same guid) or a 12x link with lane reversal 185219820Sjeff that is not configured correctly. 186219820Sjeff We will try to recover by querying NodeInfo again. 187219820Sjeff In order to catch even fast port moving to new location(s) and 188219820Sjeff back we will count up to 5. 189219820Sjeff Some crazy reconnections (newly created switch loop right before 190219820Sjeff targeted CA) will not be catched this way. So in worst case - 191219820Sjeff report GUID duplication and request new discovery. 192219820Sjeff When switch node is targeted NodeInfo querying will be done in 193219820Sjeff opposite order, this is much stronger check, unfortunately it is 194219820Sjeff impossible with CAs. 195219820Sjeff */ 196219820Sjeff p_physp = osm_node_get_physp_ptr(p_node, port_num); 197219820Sjeff if (p_ni_context->dup_count > 5) { 198219820Sjeff report_duplicated_guid(sm, p_physp, 199219820Sjeff p_neighbor_node, 200219820Sjeff p_ni_context->port_num); 201219820Sjeff sm->p_subn->force_heavy_sweep = TRUE; 202219820Sjeff } else if (p_node->sw) 203219820Sjeff requery_dup_node_info(sm, p_physp->p_remote_physp, 204219820Sjeff p_ni_context->dup_count + 1); 205219820Sjeff else 206219820Sjeff requery_dup_node_info(sm, p_physp, 207219820Sjeff p_ni_context->dup_count + 1); 208219820Sjeff } 209219820Sjeff 210219820Sjeff /* 211219820Sjeff When there are only two nodes with exact same guids (connected back 212219820Sjeff to back) - the previous check for duplicated guid will not catch 213219820Sjeff them. But the link will be from the port to itself... 214219820Sjeff Enhanced Port 0 is an exception to this 215219820Sjeff */ 216219820Sjeff if ((osm_node_get_node_guid(p_node) == p_ni_context->node_guid) && 217219820Sjeff (port_num == p_ni_context->port_num) && 218219820Sjeff port_num != 0 && cl_qmap_count(&sm->p_subn->sw_guid_tbl) == 0) { 219219820Sjeff OSM_LOG(sm->p_log, OSM_LOG_VERBOSE, 220219820Sjeff "Duplicate GUID found by link from a port to itself:" 221219820Sjeff "node 0x%" PRIx64 ", port number %u\n", 222219820Sjeff cl_ntoh64(osm_node_get_node_guid(p_node)), port_num); 223219820Sjeff p_physp = osm_node_get_physp_ptr(p_node, port_num); 224219820Sjeff osm_dump_dr_path(sm->p_log, 225219820Sjeff osm_physp_get_dr_path_ptr(p_physp), 226219820Sjeff OSM_LOG_VERBOSE); 227219820Sjeff 228219820Sjeff if (sm->p_subn->opt.exit_on_fatal == TRUE) { 229219820Sjeff osm_log(sm->p_log, OSM_LOG_SYS, 230219820Sjeff "Errors on subnet. Duplicate GUID found " 231219820Sjeff "by link from a port to itself. " 232219820Sjeff "See verbose opensm.log for more details\n"); 233219820Sjeff exit(1); 234219820Sjeff } 235219820Sjeff } 236219820Sjeff 237219820Sjeff OSM_LOG(sm->p_log, OSM_LOG_DEBUG, 238219820Sjeff "Creating new link between:\n\t\t\t\tnode 0x%" PRIx64 239219820Sjeff ", port number %u and\n\t\t\t\tnode 0x%" PRIx64 240219820Sjeff ", port number %u\n", 241219820Sjeff cl_ntoh64(osm_node_get_node_guid(p_node)), port_num, 242219820Sjeff cl_ntoh64(p_ni_context->node_guid), p_ni_context->port_num); 243219820Sjeff 244219820Sjeff if (sm->ucast_mgr.cache_valid) 245219820Sjeff osm_ucast_cache_check_new_link(&sm->ucast_mgr, 246219820Sjeff p_node, port_num, 247219820Sjeff p_neighbor_node, 248219820Sjeff p_ni_context->port_num); 249219820Sjeff 250219820Sjeff osm_node_link(p_node, port_num, p_neighbor_node, 251219820Sjeff p_ni_context->port_num); 252219820Sjeff 253219820Sjeff_exit: 254219820Sjeff OSM_LOG_EXIT(sm->p_log); 255219820Sjeff} 256219820Sjeff 257219820Sjeff/********************************************************************** 258219820Sjeff The plock must be held before calling this function. 259219820Sjeff**********************************************************************/ 260219820Sjeffstatic void 261219820Sjeff__osm_ni_rcv_process_new_node(IN osm_sm_t * sm, 262219820Sjeff IN osm_node_t * const p_node, 263219820Sjeff IN const osm_madw_t * const p_madw) 264219820Sjeff{ 265219820Sjeff ib_api_status_t status = IB_SUCCESS; 266219820Sjeff osm_madw_context_t context; 267219820Sjeff osm_physp_t *p_physp; 268219820Sjeff ib_node_info_t *p_ni; 269219820Sjeff ib_smp_t *p_smp; 270219820Sjeff uint8_t port_num; 271219820Sjeff 272219820Sjeff OSM_LOG_ENTER(sm->p_log); 273219820Sjeff 274219820Sjeff p_smp = osm_madw_get_smp_ptr(p_madw); 275219820Sjeff p_ni = (ib_node_info_t *) ib_smp_get_payload_ptr(p_smp); 276219820Sjeff port_num = ib_node_info_get_local_port_num(p_ni); 277219820Sjeff 278219820Sjeff /* 279219820Sjeff Request PortInfo & NodeDescription attributes for the port 280219820Sjeff that responded to the NodeInfo attribute. 281219820Sjeff Because this is a channel adapter or router, we are 282219820Sjeff not allowed to request PortInfo for the other ports. 283219820Sjeff Set the context union properly, so the recipient 284219820Sjeff knows which node & port are relevant. 285219820Sjeff */ 286219820Sjeff p_physp = osm_node_get_physp_ptr(p_node, port_num); 287219820Sjeff 288219820Sjeff context.pi_context.node_guid = p_ni->node_guid; 289219820Sjeff context.pi_context.port_guid = p_ni->port_guid; 290219820Sjeff context.pi_context.set_method = FALSE; 291219820Sjeff context.pi_context.light_sweep = FALSE; 292219820Sjeff context.pi_context.active_transition = FALSE; 293219820Sjeff 294219820Sjeff status = osm_req_get(sm, osm_physp_get_dr_path_ptr(p_physp), 295219820Sjeff IB_MAD_ATTR_PORT_INFO, 296219820Sjeff cl_hton32(port_num), CL_DISP_MSGID_NONE, &context); 297219820Sjeff if (status != IB_SUCCESS) 298219820Sjeff OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0D02: " 299219820Sjeff "Failure initiating PortInfo request (%s)\n", 300219820Sjeff ib_get_err_str(status)); 301219820Sjeff 302219820Sjeff OSM_LOG_EXIT(sm->p_log); 303219820Sjeff} 304219820Sjeff 305219820Sjeff/********************************************************************** 306219820Sjeff The plock must be held before calling this function. 307219820Sjeff**********************************************************************/ 308219820Sjeffvoid 309219820Sjeffosm_req_get_node_desc(IN osm_sm_t * sm, 310219820Sjeff osm_physp_t *p_physp) 311219820Sjeff{ 312219820Sjeff ib_api_status_t status = IB_SUCCESS; 313219820Sjeff osm_madw_context_t context; 314219820Sjeff 315219820Sjeff OSM_LOG_ENTER(sm->p_log); 316219820Sjeff 317219820Sjeff context.nd_context.node_guid = 318219820Sjeff osm_node_get_node_guid(osm_physp_get_node_ptr(p_physp)); 319219820Sjeff 320219820Sjeff status = osm_req_get(sm, osm_physp_get_dr_path_ptr(p_physp), 321219820Sjeff IB_MAD_ATTR_NODE_DESC, 322219820Sjeff 0, CL_DISP_MSGID_NONE, &context); 323219820Sjeff if (status != IB_SUCCESS) 324219820Sjeff OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0D03: " 325219820Sjeff "Failure initiating NodeDescription request (%s)\n", 326219820Sjeff ib_get_err_str(status)); 327219820Sjeff 328219820Sjeff OSM_LOG_EXIT(sm->p_log); 329219820Sjeff} 330219820Sjeff 331219820Sjeff/********************************************************************** 332219820Sjeff The plock must be held before calling this function. 333219820Sjeff**********************************************************************/ 334219820Sjeffstatic void 335219820Sjeff__osm_ni_rcv_get_node_desc(IN osm_sm_t * sm, 336219820Sjeff IN osm_node_t * const p_node, 337219820Sjeff IN const osm_madw_t * const p_madw) 338219820Sjeff{ 339219820Sjeff ib_node_info_t *p_ni; 340219820Sjeff ib_smp_t *p_smp; 341219820Sjeff uint8_t port_num; 342219820Sjeff osm_physp_t *p_physp = NULL; 343219820Sjeff 344219820Sjeff OSM_LOG_ENTER(sm->p_log); 345219820Sjeff 346219820Sjeff p_smp = osm_madw_get_smp_ptr(p_madw); 347219820Sjeff p_ni = (ib_node_info_t *) ib_smp_get_payload_ptr(p_smp); 348219820Sjeff port_num = ib_node_info_get_local_port_num(p_ni); 349219820Sjeff 350219820Sjeff /* 351219820Sjeff Request PortInfo & NodeDescription attributes for the port 352219820Sjeff that responded to the NodeInfo attribute. 353219820Sjeff Because this is a channel adapter or router, we are 354219820Sjeff not allowed to request PortInfo for the other ports. 355219820Sjeff Set the context union properly, so the recipient 356219820Sjeff knows which node & port are relevant. 357219820Sjeff */ 358219820Sjeff p_physp = osm_node_get_physp_ptr(p_node, port_num); 359219820Sjeff 360219820Sjeff osm_req_get_node_desc(sm, p_physp); 361219820Sjeff 362219820Sjeff OSM_LOG_EXIT(sm->p_log); 363219820Sjeff} 364219820Sjeff 365219820Sjeff/********************************************************************** 366219820Sjeff The plock must be held before calling this function. 367219820Sjeff**********************************************************************/ 368219820Sjeffstatic void 369219820Sjeff__osm_ni_rcv_process_new_ca_or_router(IN osm_sm_t * sm, 370219820Sjeff IN osm_node_t * const p_node, 371219820Sjeff IN const osm_madw_t * const p_madw) 372219820Sjeff{ 373219820Sjeff OSM_LOG_ENTER(sm->p_log); 374219820Sjeff 375219820Sjeff __osm_ni_rcv_process_new_node(sm, p_node, p_madw); 376219820Sjeff 377219820Sjeff /* 378219820Sjeff A node guid of 0 is the corner case that indicates 379219820Sjeff we discovered our own node. Initialize the subnet 380219820Sjeff object with the SM's own port guid. 381219820Sjeff */ 382219820Sjeff if (osm_madw_get_ni_context_ptr(p_madw)->node_guid == 0) 383219820Sjeff sm->p_subn->sm_port_guid = p_node->node_info.port_guid; 384219820Sjeff 385219820Sjeff OSM_LOG_EXIT(sm->p_log); 386219820Sjeff} 387219820Sjeff 388219820Sjeff/********************************************************************** 389219820Sjeff The plock must be held before calling this function. 390219820Sjeff**********************************************************************/ 391219820Sjeffstatic void 392219820Sjeff__osm_ni_rcv_process_existing_ca_or_router(IN osm_sm_t * sm, 393219820Sjeff IN osm_node_t * const p_node, 394219820Sjeff IN const osm_madw_t * const p_madw) 395219820Sjeff{ 396219820Sjeff ib_node_info_t *p_ni; 397219820Sjeff ib_smp_t *p_smp; 398219820Sjeff osm_port_t *p_port; 399219820Sjeff osm_port_t *p_port_check; 400219820Sjeff osm_madw_context_t context; 401219820Sjeff uint8_t port_num; 402219820Sjeff osm_physp_t *p_physp; 403219820Sjeff ib_api_status_t status; 404219820Sjeff osm_dr_path_t *p_dr_path; 405219820Sjeff osm_bind_handle_t h_bind; 406219820Sjeff 407219820Sjeff OSM_LOG_ENTER(sm->p_log); 408219820Sjeff 409219820Sjeff p_smp = osm_madw_get_smp_ptr(p_madw); 410219820Sjeff p_ni = (ib_node_info_t *) ib_smp_get_payload_ptr(p_smp); 411219820Sjeff port_num = ib_node_info_get_local_port_num(p_ni); 412219820Sjeff h_bind = osm_madw_get_bind_handle(p_madw); 413219820Sjeff 414219820Sjeff /* 415219820Sjeff Determine if we have encountered this node through a 416219820Sjeff previously undiscovered port. If so, build the new 417219820Sjeff port object. 418219820Sjeff */ 419219820Sjeff p_port = osm_get_port_by_guid(sm->p_subn, p_ni->port_guid); 420219820Sjeff if (!p_port) { 421219820Sjeff OSM_LOG(sm->p_log, OSM_LOG_VERBOSE, 422219820Sjeff "Creating new port object with GUID 0x%" PRIx64 "\n", 423219820Sjeff cl_ntoh64(p_ni->port_guid)); 424219820Sjeff 425219820Sjeff osm_node_init_physp(p_node, p_madw); 426219820Sjeff 427219820Sjeff p_port = osm_port_new(p_ni, p_node); 428219820Sjeff if (p_port == NULL) { 429219820Sjeff OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0D04: " 430219820Sjeff "Unable to create new port object\n"); 431219820Sjeff goto Exit; 432219820Sjeff } 433219820Sjeff 434219820Sjeff /* 435219820Sjeff Add the new port object to the database. 436219820Sjeff */ 437219820Sjeff p_port_check = 438219820Sjeff (osm_port_t *) cl_qmap_insert(&sm->p_subn->port_guid_tbl, 439219820Sjeff p_ni->port_guid, 440219820Sjeff &p_port->map_item); 441219820Sjeff if (p_port_check != p_port) { 442219820Sjeff /* 443219820Sjeff We should never be here! 444219820Sjeff Somehow, this port GUID already exists in the table. 445219820Sjeff */ 446219820Sjeff OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0D12: " 447219820Sjeff "Port 0x%" PRIx64 " already in the database!\n", 448219820Sjeff cl_ntoh64(p_ni->port_guid)); 449219820Sjeff 450219820Sjeff osm_port_delete(&p_port); 451219820Sjeff goto Exit; 452219820Sjeff } 453219820Sjeff 454219820Sjeff /* If we are a master, then this means the port is new on the subnet. 455219820Sjeff Mark it as new - need to send trap 64 on these ports. 456219820Sjeff The condition that we are master is true, since if we are in discovering 457219820Sjeff state (meaning we woke up from standby or we are just initializing), 458219820Sjeff then these ports may be new to us, but are not new on the subnet. 459219820Sjeff If we are master, then the subnet as we know it is the updated one, 460219820Sjeff and any new ports we encounter should cause trap 64. C14-72.1.1 */ 461219820Sjeff if (sm->p_subn->sm_state == IB_SMINFO_STATE_MASTER) 462219820Sjeff p_port->is_new = 1; 463219820Sjeff 464219820Sjeff p_physp = osm_node_get_physp_ptr(p_node, port_num); 465219820Sjeff } else { 466219820Sjeff p_physp = osm_node_get_physp_ptr(p_node, port_num); 467219820Sjeff /* 468219820Sjeff Update the DR Path to the port, 469219820Sjeff in case the old one is no longer available. 470219820Sjeff */ 471219820Sjeff p_dr_path = osm_physp_get_dr_path_ptr(p_physp); 472219820Sjeff 473219820Sjeff osm_dr_path_init(p_dr_path, h_bind, p_smp->hop_count, 474219820Sjeff p_smp->initial_path); 475219820Sjeff } 476219820Sjeff 477219820Sjeff context.pi_context.node_guid = p_ni->node_guid; 478219820Sjeff context.pi_context.port_guid = p_ni->port_guid; 479219820Sjeff context.pi_context.set_method = FALSE; 480219820Sjeff context.pi_context.light_sweep = FALSE; 481219820Sjeff 482219820Sjeff status = osm_req_get(sm, osm_physp_get_dr_path_ptr(p_physp), 483219820Sjeff IB_MAD_ATTR_PORT_INFO, 484219820Sjeff cl_hton32(port_num), CL_DISP_MSGID_NONE, &context); 485219820Sjeff 486219820Sjeff if (status != IB_SUCCESS) 487219820Sjeff OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0D13: " 488219820Sjeff "Failure initiating PortInfo request (%s)\n", 489219820Sjeff ib_get_err_str(status)); 490219820Sjeff 491219820SjeffExit: 492219820Sjeff OSM_LOG_EXIT(sm->p_log); 493219820Sjeff} 494219820Sjeff 495219820Sjeff/********************************************************************** 496219820Sjeff **********************************************************************/ 497219820Sjeffstatic void 498219820Sjeff__osm_ni_rcv_process_switch(IN osm_sm_t * sm, 499219820Sjeff IN osm_node_t * const p_node, 500219820Sjeff IN const osm_madw_t * const p_madw) 501219820Sjeff{ 502219820Sjeff ib_api_status_t status = IB_SUCCESS; 503219820Sjeff osm_madw_context_t context; 504219820Sjeff osm_dr_path_t *path; 505219820Sjeff ib_smp_t *p_smp; 506219820Sjeff 507219820Sjeff OSM_LOG_ENTER(sm->p_log); 508219820Sjeff 509219820Sjeff p_smp = osm_madw_get_smp_ptr(p_madw); 510219820Sjeff 511219820Sjeff /* update DR path of already initialized switch port 0 */ 512219820Sjeff path = osm_physp_get_dr_path_ptr(osm_node_get_physp_ptr(p_node, 0)); 513219820Sjeff osm_dr_path_init(path, osm_madw_get_bind_handle(p_madw), 514219820Sjeff p_smp->hop_count, p_smp->initial_path); 515219820Sjeff 516219820Sjeff context.si_context.node_guid = osm_node_get_node_guid(p_node); 517219820Sjeff context.si_context.set_method = FALSE; 518219820Sjeff context.si_context.light_sweep = FALSE; 519219820Sjeff 520219820Sjeff /* Request a SwitchInfo attribute */ 521219820Sjeff status = osm_req_get(sm, path, IB_MAD_ATTR_SWITCH_INFO, 522219820Sjeff 0, CL_DISP_MSGID_NONE, &context); 523219820Sjeff if (status != IB_SUCCESS) 524219820Sjeff /* continue despite error */ 525219820Sjeff OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0D06: " 526219820Sjeff "Failure initiating SwitchInfo request (%s)\n", 527219820Sjeff ib_get_err_str(status)); 528219820Sjeff 529219820Sjeff OSM_LOG_EXIT(sm->p_log); 530219820Sjeff} 531219820Sjeff 532219820Sjeff/********************************************************************** 533219820Sjeff The plock must be held before calling this function. 534219820Sjeff**********************************************************************/ 535219820Sjeffstatic void 536219820Sjeff__osm_ni_rcv_process_existing_switch(IN osm_sm_t * sm, 537219820Sjeff IN osm_node_t * const p_node, 538219820Sjeff IN const osm_madw_t * const p_madw) 539219820Sjeff{ 540219820Sjeff OSM_LOG_ENTER(sm->p_log); 541219820Sjeff 542219820Sjeff /* 543219820Sjeff If this switch has already been probed during this sweep, 544219820Sjeff then don't bother reprobing it. 545219820Sjeff There is one exception - if the node has been visited, but 546219820Sjeff for some reason we don't have the switch object (this can happen 547219820Sjeff if the SwitchInfo mad didn't reach the SM) then we want 548219820Sjeff to retry to probe the switch. 549219820Sjeff */ 550219820Sjeff if (p_node->discovery_count == 1) 551219820Sjeff __osm_ni_rcv_process_switch(sm, p_node, p_madw); 552219820Sjeff else if (!p_node->sw || p_node->sw->discovery_count == 0) { 553219820Sjeff /* we don't have the SwitchInfo - retry to get it */ 554219820Sjeff OSM_LOG(sm->p_log, OSM_LOG_DEBUG, 555219820Sjeff "Retry to get SwitchInfo on node GUID:0x%" 556219820Sjeff PRIx64 "\n", cl_ntoh64(osm_node_get_node_guid(p_node))); 557219820Sjeff __osm_ni_rcv_process_switch(sm, p_node, p_madw); 558219820Sjeff } 559219820Sjeff 560219820Sjeff OSM_LOG_EXIT(sm->p_log); 561219820Sjeff} 562219820Sjeff 563219820Sjeff/********************************************************************** 564219820Sjeff The plock must be held before calling this function. 565219820Sjeff**********************************************************************/ 566219820Sjeffstatic void 567219820Sjeff__osm_ni_rcv_process_new_switch(IN osm_sm_t * sm, 568219820Sjeff IN osm_node_t * const p_node, 569219820Sjeff IN const osm_madw_t * const p_madw) 570219820Sjeff{ 571219820Sjeff OSM_LOG_ENTER(sm->p_log); 572219820Sjeff 573219820Sjeff __osm_ni_rcv_process_switch(sm, p_node, p_madw); 574219820Sjeff 575219820Sjeff /* 576219820Sjeff A node guid of 0 is the corner case that indicates 577219820Sjeff we discovered our own node. Initialize the subnet 578219820Sjeff object with the SM's own port guid. 579219820Sjeff */ 580219820Sjeff if (osm_madw_get_ni_context_ptr(p_madw)->node_guid == 0) 581219820Sjeff sm->p_subn->sm_port_guid = p_node->node_info.port_guid; 582219820Sjeff 583219820Sjeff OSM_LOG_EXIT(sm->p_log); 584219820Sjeff} 585219820Sjeff 586219820Sjeff/********************************************************************** 587219820Sjeff The plock must NOT be held before calling this function. 588219820Sjeff**********************************************************************/ 589219820Sjeffstatic void 590219820Sjeff__osm_ni_rcv_process_new(IN osm_sm_t * sm, 591219820Sjeff IN const osm_madw_t * const p_madw) 592219820Sjeff{ 593219820Sjeff osm_node_t *p_node; 594219820Sjeff osm_node_t *p_node_check; 595219820Sjeff osm_port_t *p_port; 596219820Sjeff osm_port_t *p_port_check; 597219820Sjeff osm_router_t *p_rtr = NULL; 598219820Sjeff osm_router_t *p_rtr_check; 599219820Sjeff cl_qmap_t *p_rtr_guid_tbl; 600219820Sjeff ib_node_info_t *p_ni; 601219820Sjeff ib_smp_t *p_smp; 602219820Sjeff osm_ni_context_t *p_ni_context; 603219820Sjeff uint8_t port_num; 604219820Sjeff 605219820Sjeff OSM_LOG_ENTER(sm->p_log); 606219820Sjeff 607219820Sjeff p_smp = osm_madw_get_smp_ptr(p_madw); 608219820Sjeff p_ni = (ib_node_info_t *) ib_smp_get_payload_ptr(p_smp); 609219820Sjeff p_ni_context = osm_madw_get_ni_context_ptr(p_madw); 610219820Sjeff port_num = ib_node_info_get_local_port_num(p_ni); 611219820Sjeff 612219820Sjeff osm_dump_smp_dr_path(sm->p_log, p_smp, OSM_LOG_VERBOSE); 613219820Sjeff 614219820Sjeff OSM_LOG(sm->p_log, OSM_LOG_VERBOSE, 615219820Sjeff "Discovered new %s node," 616219820Sjeff "\n\t\t\t\tGUID 0x%" PRIx64 ", TID 0x%" PRIx64 "\n", 617219820Sjeff ib_get_node_type_str(p_ni->node_type), 618219820Sjeff cl_ntoh64(p_ni->node_guid), cl_ntoh64(p_smp->trans_id)); 619219820Sjeff 620219820Sjeff p_node = osm_node_new(p_madw); 621219820Sjeff if (p_node == NULL) { 622219820Sjeff OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0D07: " 623219820Sjeff "Unable to create new node object\n"); 624219820Sjeff goto Exit; 625219820Sjeff } 626219820Sjeff 627219820Sjeff /* 628219820Sjeff Create a new port object to represent this node's physical 629219820Sjeff ports in the port table. 630219820Sjeff */ 631219820Sjeff p_port = osm_port_new(p_ni, p_node); 632219820Sjeff if (p_port == NULL) { 633219820Sjeff OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0D14: " 634219820Sjeff "Unable to create new port object\n"); 635219820Sjeff osm_node_delete(&p_node); 636219820Sjeff goto Exit; 637219820Sjeff } 638219820Sjeff 639219820Sjeff /* 640219820Sjeff Add the new port object to the database. 641219820Sjeff */ 642219820Sjeff p_port_check = 643219820Sjeff (osm_port_t *) cl_qmap_insert(&sm->p_subn->port_guid_tbl, 644219820Sjeff p_ni->port_guid, &p_port->map_item); 645219820Sjeff if (p_port_check != p_port) { 646219820Sjeff /* 647219820Sjeff We should never be here! 648219820Sjeff Somehow, this port GUID already exists in the table. 649219820Sjeff */ 650219820Sjeff OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0D15: " 651219820Sjeff "Duplicate Port GUID 0x%" PRIx64 652219820Sjeff "! Found by the two directed routes:\n", 653219820Sjeff cl_ntoh64(p_ni->port_guid)); 654219820Sjeff osm_dump_dr_path(sm->p_log, 655219820Sjeff osm_physp_get_dr_path_ptr(p_port->p_physp), 656219820Sjeff OSM_LOG_ERROR); 657219820Sjeff osm_dump_dr_path(sm->p_log, 658219820Sjeff osm_physp_get_dr_path_ptr(p_port_check-> 659219820Sjeff p_physp), 660219820Sjeff OSM_LOG_ERROR); 661219820Sjeff osm_port_delete(&p_port); 662219820Sjeff osm_node_delete(&p_node); 663219820Sjeff goto Exit; 664219820Sjeff } 665219820Sjeff 666219820Sjeff /* If we are a master, then this means the port is new on the subnet. 667219820Sjeff Mark it as new - need to send trap 64 on these ports. 668219820Sjeff The condition that we are master is true, since if we are in discovering 669219820Sjeff state (meaning we woke up from standby or we are just initializing), 670219820Sjeff then these ports may be new to us, but are not new on the subnet. 671219820Sjeff If we are master, then the subnet as we know it is the updated one, 672219820Sjeff and any new ports we encounter should cause trap 64. C14-72.1.1 */ 673219820Sjeff if (sm->p_subn->sm_state == IB_SMINFO_STATE_MASTER) 674219820Sjeff p_port->is_new = 1; 675219820Sjeff 676219820Sjeff /* If there were RouterInfo or other router attribute, 677219820Sjeff this would be elsewhere */ 678219820Sjeff if (p_ni->node_type == IB_NODE_TYPE_ROUTER) { 679219820Sjeff if ((p_rtr = osm_router_new(p_port)) == NULL) 680219820Sjeff OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0D1A: " 681219820Sjeff "Unable to create new router object\n"); 682219820Sjeff else { 683219820Sjeff p_rtr_guid_tbl = &sm->p_subn->rtr_guid_tbl; 684219820Sjeff p_rtr_check = 685219820Sjeff (osm_router_t *) cl_qmap_insert(p_rtr_guid_tbl, 686219820Sjeff p_ni->port_guid, 687219820Sjeff &p_rtr->map_item); 688219820Sjeff if (p_rtr_check != p_rtr) 689219820Sjeff OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0D1B: " 690219820Sjeff "Unable to add port GUID:0x%016" PRIx64 691219820Sjeff " to router table\n", 692219820Sjeff cl_ntoh64(p_ni->port_guid)); 693219820Sjeff } 694219820Sjeff } 695219820Sjeff 696219820Sjeff p_node_check = 697219820Sjeff (osm_node_t *) cl_qmap_insert(&sm->p_subn->node_guid_tbl, 698219820Sjeff p_ni->node_guid, &p_node->map_item); 699219820Sjeff if (p_node_check != p_node) { 700219820Sjeff /* 701219820Sjeff This node must have been inserted by another thread. 702219820Sjeff This is unexpected, but is not an error. 703219820Sjeff We can simply clean-up, since the other thread will 704219820Sjeff see this processing through to completion. 705219820Sjeff */ 706219820Sjeff OSM_LOG(sm->p_log, OSM_LOG_VERBOSE, 707219820Sjeff "Discovery race detected at node 0x%" PRIx64 "\n", 708219820Sjeff cl_ntoh64(p_ni->node_guid)); 709219820Sjeff osm_node_delete(&p_node); 710219820Sjeff p_node = p_node_check; 711219820Sjeff __osm_ni_rcv_set_links(sm, p_node, port_num, p_ni_context); 712219820Sjeff goto Exit; 713219820Sjeff } else 714219820Sjeff __osm_ni_rcv_set_links(sm, p_node, port_num, p_ni_context); 715219820Sjeff 716219820Sjeff p_node->discovery_count++; 717219820Sjeff __osm_ni_rcv_get_node_desc(sm, p_node, p_madw); 718219820Sjeff 719219820Sjeff switch (p_ni->node_type) { 720219820Sjeff case IB_NODE_TYPE_CA: 721219820Sjeff case IB_NODE_TYPE_ROUTER: 722219820Sjeff __osm_ni_rcv_process_new_ca_or_router(sm, p_node, p_madw); 723219820Sjeff break; 724219820Sjeff case IB_NODE_TYPE_SWITCH: 725219820Sjeff __osm_ni_rcv_process_new_switch(sm, p_node, p_madw); 726219820Sjeff break; 727219820Sjeff default: 728219820Sjeff OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0D16: " 729219820Sjeff "Unknown node type %u with GUID 0x%" PRIx64 "\n", 730219820Sjeff p_ni->node_type, cl_ntoh64(p_ni->node_guid)); 731219820Sjeff break; 732219820Sjeff } 733219820Sjeff 734219820SjeffExit: 735219820Sjeff OSM_LOG_EXIT(sm->p_log); 736219820Sjeff} 737219820Sjeff 738219820Sjeff/********************************************************************** 739219820Sjeff The plock must be held before calling this function. 740219820Sjeff**********************************************************************/ 741219820Sjeffstatic void 742219820Sjeff__osm_ni_rcv_process_existing(IN osm_sm_t * sm, 743219820Sjeff IN osm_node_t * const p_node, 744219820Sjeff IN const osm_madw_t * const p_madw) 745219820Sjeff{ 746219820Sjeff ib_node_info_t *p_ni; 747219820Sjeff ib_smp_t *p_smp; 748219820Sjeff osm_ni_context_t *p_ni_context; 749219820Sjeff uint8_t port_num; 750219820Sjeff 751219820Sjeff OSM_LOG_ENTER(sm->p_log); 752219820Sjeff 753219820Sjeff p_smp = osm_madw_get_smp_ptr(p_madw); 754219820Sjeff p_ni = (ib_node_info_t *) ib_smp_get_payload_ptr(p_smp); 755219820Sjeff p_ni_context = osm_madw_get_ni_context_ptr(p_madw); 756219820Sjeff port_num = ib_node_info_get_local_port_num(p_ni); 757219820Sjeff 758219820Sjeff OSM_LOG(sm->p_log, OSM_LOG_VERBOSE, 759219820Sjeff "Rediscovered %s node 0x%" PRIx64 " TID 0x%" PRIx64 760219820Sjeff ", discovered %u times already\n", 761219820Sjeff ib_get_node_type_str(p_ni->node_type), 762219820Sjeff cl_ntoh64(p_ni->node_guid), 763219820Sjeff cl_ntoh64(p_smp->trans_id), p_node->discovery_count); 764219820Sjeff 765219820Sjeff /* 766219820Sjeff If we haven't already encountered this existing node 767219820Sjeff on this particular sweep, then process further. 768219820Sjeff */ 769219820Sjeff p_node->discovery_count++; 770219820Sjeff 771219820Sjeff switch (p_ni->node_type) { 772219820Sjeff case IB_NODE_TYPE_CA: 773219820Sjeff case IB_NODE_TYPE_ROUTER: 774219820Sjeff __osm_ni_rcv_process_existing_ca_or_router(sm, p_node, 775219820Sjeff p_madw); 776219820Sjeff break; 777219820Sjeff 778219820Sjeff case IB_NODE_TYPE_SWITCH: 779219820Sjeff __osm_ni_rcv_process_existing_switch(sm, p_node, p_madw); 780219820Sjeff break; 781219820Sjeff 782219820Sjeff default: 783219820Sjeff OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0D09: " 784219820Sjeff "Unknown node type %u with GUID 0x%" PRIx64 "\n", 785219820Sjeff p_ni->node_type, cl_ntoh64(p_ni->node_guid)); 786219820Sjeff break; 787219820Sjeff } 788219820Sjeff 789219820Sjeff __osm_ni_rcv_set_links(sm, p_node, port_num, p_ni_context); 790219820Sjeff 791219820Sjeff OSM_LOG_EXIT(sm->p_log); 792219820Sjeff} 793219820Sjeff 794219820Sjeff/********************************************************************** 795219820Sjeff **********************************************************************/ 796219820Sjeffvoid osm_ni_rcv_process(IN void *context, IN void *data) 797219820Sjeff{ 798219820Sjeff osm_sm_t *sm = context; 799219820Sjeff osm_madw_t *p_madw = data; 800219820Sjeff ib_node_info_t *p_ni; 801219820Sjeff ib_smp_t *p_smp; 802219820Sjeff osm_node_t *p_node; 803219820Sjeff 804219820Sjeff CL_ASSERT(sm); 805219820Sjeff 806219820Sjeff OSM_LOG_ENTER(sm->p_log); 807219820Sjeff 808219820Sjeff CL_ASSERT(p_madw); 809219820Sjeff 810219820Sjeff p_smp = osm_madw_get_smp_ptr(p_madw); 811219820Sjeff p_ni = (ib_node_info_t *) ib_smp_get_payload_ptr(p_smp); 812219820Sjeff 813219820Sjeff CL_ASSERT(p_smp->attr_id == IB_MAD_ATTR_NODE_INFO); 814219820Sjeff 815219820Sjeff if (p_ni->node_guid == 0) { 816219820Sjeff OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0D16: " 817219820Sjeff "Got Zero Node GUID! Found on the directed route:\n"); 818219820Sjeff osm_dump_smp_dr_path(sm->p_log, p_smp, OSM_LOG_ERROR); 819219820Sjeff goto Exit; 820219820Sjeff } 821219820Sjeff 822219820Sjeff if (p_ni->port_guid == 0) { 823219820Sjeff OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0D17: " 824219820Sjeff "Got Zero Port GUID! Found on the directed route:\n"); 825219820Sjeff osm_dump_smp_dr_path(sm->p_log, p_smp, OSM_LOG_ERROR); 826219820Sjeff goto Exit; 827219820Sjeff } 828219820Sjeff 829219820Sjeff /* 830219820Sjeff Determine if this node has already been discovered, 831219820Sjeff and process accordingly. 832219820Sjeff During processing of this node, hold the shared lock. 833219820Sjeff */ 834219820Sjeff 835219820Sjeff CL_PLOCK_EXCL_ACQUIRE(sm->p_lock); 836219820Sjeff p_node = osm_get_node_by_guid(sm->p_subn, p_ni->node_guid); 837219820Sjeff 838219820Sjeff osm_dump_node_info(sm->p_log, p_ni, OSM_LOG_DEBUG); 839219820Sjeff 840219820Sjeff if (!p_node) 841219820Sjeff __osm_ni_rcv_process_new(sm, p_madw); 842219820Sjeff else 843219820Sjeff __osm_ni_rcv_process_existing(sm, p_node, p_madw); 844219820Sjeff 845219820Sjeff CL_PLOCK_RELEASE(sm->p_lock); 846219820Sjeff 847219820SjeffExit: 848219820Sjeff OSM_LOG_EXIT(sm->p_log); 849219820Sjeff} 850