1/* 2 * Copyright (c) 2004-2009 Voltaire, Inc. All rights reserved. 3 * Copyright (c) 2002-2015 Mellanox Technologies LTD. All rights reserved. 4 * Copyright (c) 1996-2003 Intel Corporation. All rights reserved. 5 * Copyright (c) 2009 HNR Consulting. All rights reserved. 6 * 7 * This software is available to you under a choice of one of two 8 * licenses. You may choose to be licensed under the terms of the GNU 9 * General Public License (GPL) Version 2, available from the file 10 * COPYING in the main directory of this source tree, or the 11 * OpenIB.org BSD license below: 12 * 13 * Redistribution and use in source and binary forms, with or 14 * without modification, are permitted provided that the following 15 * conditions are met: 16 * 17 * - Redistributions of source code must retain the above 18 * copyright notice, this list of conditions and the following 19 * disclaimer. 20 * 21 * - Redistributions in binary form must reproduce the above 22 * copyright notice, this list of conditions and the following 23 * disclaimer in the documentation and/or other materials 24 * provided with the distribution. 25 * 26 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 27 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 28 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 29 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 30 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 31 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 32 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 33 * SOFTWARE. 34 * 35 */ 36 37/* 38 * Abstract: 39 * Implementation of osm_ni_rcv_t. 40 * This object represents the NodeInfo Receiver object. 41 * This object is part of the opensm family of objects. 42 */ 43 44#if HAVE_CONFIG_H 45# include <config.h> 46#endif /* HAVE_CONFIG_H */ 47 48#include <stdlib.h> 49#include <string.h> 50#include <iba/ib_types.h> 51#include <complib/cl_qmap.h> 52#include <complib/cl_passivelock.h> 53#include <complib/cl_debug.h> 54#include <opensm/osm_file_ids.h> 55#define FILE_ID OSM_FILE_NODE_INFO_RCV_C 56#include <opensm/osm_madw.h> 57#include <opensm/osm_log.h> 58#include <opensm/osm_node.h> 59#include <opensm/osm_subnet.h> 60#include <opensm/osm_router.h> 61#include <opensm/osm_mad_pool.h> 62#include <opensm/osm_helper.h> 63#include <opensm/osm_msgdef.h> 64#include <opensm/osm_opensm.h> 65#include <opensm/osm_ucast_mgr.h> 66#include <opensm/osm_db_pack.h> 67 68static void report_duplicated_guid(IN osm_sm_t * sm, osm_physp_t * p_physp, 69 osm_node_t * p_neighbor_node, 70 const uint8_t port_num) 71{ 72 osm_physp_t *p_old, *p_new; 73 osm_dr_path_t path; 74 75 p_old = p_physp->p_remote_physp; 76 p_new = osm_node_get_physp_ptr(p_neighbor_node, port_num); 77 78 OSM_LOG(sm->p_log, OSM_LOG_SYS | OSM_LOG_ERROR, "ERR 0D01: " 79 "Found duplicated node GUID.\n" 80 "Node 0x%" PRIx64 " port %u is reachable from remote node " 81 "0x%" PRIx64 " port %u and remote node 0x%" PRIx64 " port %u.\n" 82 "Paths are:\n", 83 cl_ntoh64(p_physp->p_node->node_info.node_guid), 84 p_physp->port_num, 85 p_old ? cl_ntoh64(p_old->p_node->node_info.node_guid) : 0, 86 p_old ? p_old->port_num : 0, 87 p_new ? cl_ntoh64(p_new->p_node->node_info.node_guid) : 0, 88 p_new ? p_new->port_num : 0); 89 90 osm_dump_dr_path_v2(sm->p_log, osm_physp_get_dr_path_ptr(p_physp), 91 FILE_ID, OSM_LOG_ERROR); 92 93 path = *osm_physp_get_dr_path_ptr(p_new); 94 if (osm_dr_path_extend(&path, port_num)) 95 OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0D05: " 96 "DR path with hop count %d couldn't be extended\n", 97 path.hop_count); 98 osm_dump_dr_path_v2(sm->p_log, &path, FILE_ID, OSM_LOG_ERROR); 99} 100 101static void requery_dup_node_info(IN osm_sm_t * sm, osm_physp_t * p_physp, 102 unsigned count) 103{ 104 osm_madw_context_t context; 105 osm_dr_path_t path; 106 cl_status_t status; 107 108 if (!p_physp->p_remote_physp) { 109 OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0D0D: " 110 "DR path couldn't be extended due to NULL remote physp\n"); 111 return; 112 } 113 114 path = *osm_physp_get_dr_path_ptr(p_physp->p_remote_physp); 115 if (osm_dr_path_extend(&path, p_physp->p_remote_physp->port_num)) { 116 OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0D08: " 117 "DR path with hop count %d couldn't be extended\n", 118 path.hop_count); 119 return; 120 } 121 122 context.ni_context.node_guid = 123 p_physp->p_remote_physp->p_node->node_info.port_guid; 124 context.ni_context.port_num = p_physp->p_remote_physp->port_num; 125 context.ni_context.dup_node_guid = p_physp->p_node->node_info.node_guid; 126 context.ni_context.dup_port_num = p_physp->port_num; 127 context.ni_context.dup_count = count; 128 129 status = osm_req_get(sm, &path, IB_MAD_ATTR_NODE_INFO, 0, 130 TRUE, 0, CL_DISP_MSGID_NONE, &context); 131 132 if (status != IB_SUCCESS) 133 OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0D02: " 134 "Failure initiating NodeInfo request (%s)\n", 135 ib_get_err_str(status)); 136} 137 138/********************************************************************** 139 The plock must be held before calling this function. 140**********************************************************************/ 141static void ni_rcv_set_links(IN osm_sm_t * sm, osm_node_t * p_node, 142 const uint8_t port_num, 143 const osm_ni_context_t * p_ni_context) 144{ 145 osm_node_t *p_neighbor_node; 146 osm_physp_t *p_physp, *p_remote_physp; 147 148 OSM_LOG_ENTER(sm->p_log); 149 150 /* 151 A special case exists in which the node we're trying to 152 link is our own node. In this case, the guid value in 153 the ni_context will be zero. 154 */ 155 if (p_ni_context->node_guid == 0) { 156 OSM_LOG(sm->p_log, OSM_LOG_DEBUG, 157 "Nothing to link for our own node 0x%" PRIx64 "\n", 158 cl_ntoh64(osm_node_get_node_guid(p_node))); 159 goto _exit; 160 } 161 162 p_neighbor_node = osm_get_node_by_guid(sm->p_subn, 163 p_ni_context->node_guid); 164 if (PF(!p_neighbor_node)) { 165 OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0D10: " 166 "Unexpected removal of neighbor node 0x%" PRIx64 "\n", 167 cl_ntoh64(p_ni_context->node_guid)); 168 goto _exit; 169 } 170 171 /* When setting the link, ports on both 172 sides of the link should be initialized */ 173 CL_ASSERT(osm_node_link_has_valid_ports(p_node, port_num, 174 p_neighbor_node, 175 p_ni_context->port_num)); 176 177 if (osm_node_link_exists(p_node, port_num, 178 p_neighbor_node, p_ni_context->port_num)) { 179 OSM_LOG(sm->p_log, OSM_LOG_DEBUG, "Link already exists\n"); 180 goto _exit; 181 } 182 183 p_physp = osm_node_get_physp_ptr(p_node, port_num); 184 if (!p_physp) { 185 OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR OD0E: " 186 "Failed to find physp for port %d of Node GUID 0x%" 187 PRIx64 "\n", port_num, 188 cl_ntoh64(osm_node_get_node_guid(p_node))); 189 goto _exit; 190 } 191 192 /* 193 * If the link went UP, after we already discovered it, we shouldn't 194 * set the link between the ports and resweep. 195 */ 196 if (osm_physp_get_port_state(p_physp) == IB_LINK_DOWN && 197 p_node->physp_discovered[port_num]) { 198 /* Link down on another side. Don't create a link*/ 199 p_node->physp_discovered[port_num] = 0; 200 sm->p_subn->force_heavy_sweep = TRUE; 201 goto _exit; 202 } 203 204 if (osm_node_has_any_link(p_node, port_num) && 205 sm->p_subn->force_heavy_sweep == FALSE && 206 (!p_ni_context->dup_count || 207 (p_ni_context->dup_node_guid == osm_node_get_node_guid(p_node) && 208 p_ni_context->dup_port_num == port_num))) { 209 /* 210 Uh oh... 211 This could be reconnected ports, but also duplicated GUID 212 (2 nodes have the same guid) or a 12x link with lane reversal 213 that is not configured correctly. 214 We will try to recover by querying NodeInfo again. 215 In order to catch even fast port moving to new location(s) 216 and back we will count up to 5. 217 Some crazy reconnections (newly created switch loop right 218 before targeted CA) will not be catched this way. So in worst 219 case - report GUID duplication and request new discovery. 220 When switch node is targeted NodeInfo querying will be done 221 in opposite order, this is much stronger check, unfortunately 222 it is impossible with CAs. 223 */ 224 p_physp = osm_node_get_physp_ptr(p_node, port_num); 225 if (!p_physp) { 226 OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR OD0F: " 227 "Failed to find physp for port %d of Node GUID 0x%" 228 PRIx64 "\n", port_num, 229 cl_ntoh64(osm_node_get_node_guid(p_node))); 230 goto _exit; 231 } 232 233 if (p_ni_context->dup_count > 5) { 234 report_duplicated_guid(sm, p_physp, p_neighbor_node, 235 p_ni_context->port_num); 236 sm->p_subn->force_heavy_sweep = TRUE; 237 } else if (p_node->sw) 238 requery_dup_node_info(sm, p_physp->p_remote_physp, 239 p_ni_context->dup_count + 1); 240 else 241 requery_dup_node_info(sm, p_physp, 242 p_ni_context->dup_count + 1); 243 } 244 245 /* 246 When there are only two nodes with exact same guids (connected back 247 to back) - the previous check for duplicated guid will not catch 248 them. But the link will be from the port to itself... 249 Enhanced Port 0 is an exception to this 250 */ 251 if (osm_node_get_node_guid(p_node) == p_ni_context->node_guid && 252 port_num == p_ni_context->port_num && 253 port_num != 0 && cl_qmap_count(&sm->p_subn->sw_guid_tbl) == 0) { 254 OSM_LOG(sm->p_log, OSM_LOG_VERBOSE, 255 "Duplicate GUID found by link from a port to itself:" 256 "node 0x%" PRIx64 ", port number %u\n", 257 cl_ntoh64(osm_node_get_node_guid(p_node)), port_num); 258 p_physp = osm_node_get_physp_ptr(p_node, port_num); 259 if (!p_physp) { 260 OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR OD1D: " 261 "Failed to find physp for port %d of Node GUID 0x%" 262 PRIx64 "\n", port_num, 263 cl_ntoh64(osm_node_get_node_guid(p_node))); 264 goto _exit; 265 } 266 267 osm_dump_dr_path_v2(sm->p_log, osm_physp_get_dr_path_ptr(p_physp), 268 FILE_ID, OSM_LOG_VERBOSE); 269 270 if (sm->p_subn->opt.exit_on_fatal == TRUE) { 271 osm_log_v2(sm->p_log, OSM_LOG_SYS, FILE_ID, 272 "Errors on subnet. Duplicate GUID found " 273 "by link from a port to itself. " 274 "See verbose opensm.log for more details\n"); 275 exit(1); 276 } 277 } 278 279 OSM_LOG(sm->p_log, OSM_LOG_DEBUG, 280 "Creating new link between:\n\t\t\t\tnode 0x%" PRIx64 281 ", port number %u and\n\t\t\t\tnode 0x%" PRIx64 282 ", port number %u\n", 283 cl_ntoh64(osm_node_get_node_guid(p_node)), port_num, 284 cl_ntoh64(p_ni_context->node_guid), p_ni_context->port_num); 285 286 if (sm->ucast_mgr.cache_valid) 287 osm_ucast_cache_check_new_link(&sm->ucast_mgr, p_node, port_num, 288 p_neighbor_node, 289 p_ni_context->port_num); 290 291 p_physp = osm_node_get_physp_ptr(p_node, port_num); 292 p_remote_physp = osm_node_get_physp_ptr(p_neighbor_node, 293 p_ni_context->port_num); 294 if (!p_physp || !p_remote_physp) 295 goto _exit; 296 297 osm_node_link(p_node, port_num, p_neighbor_node, p_ni_context->port_num); 298 299 osm_db_neighbor_set(sm->p_subn->p_neighbor, 300 cl_ntoh64(osm_physp_get_port_guid(p_physp)), 301 port_num, 302 cl_ntoh64(osm_physp_get_port_guid(p_remote_physp)), 303 p_ni_context->port_num); 304 osm_db_neighbor_set(sm->p_subn->p_neighbor, 305 cl_ntoh64(osm_physp_get_port_guid(p_remote_physp)), 306 p_ni_context->port_num, 307 cl_ntoh64(osm_physp_get_port_guid(p_physp)), 308 port_num); 309 310_exit: 311 OSM_LOG_EXIT(sm->p_log); 312} 313 314static void ni_rcv_get_port_info(IN osm_sm_t * sm, IN osm_node_t * node, 315 IN const osm_madw_t * madw) 316{ 317 osm_madw_context_t context; 318 osm_physp_t *physp; 319 ib_node_info_t *ni; 320 unsigned port; 321 ib_api_status_t status; 322 int mlnx_epi_supported = 0; 323 324 ni = ib_smp_get_payload_ptr(osm_madw_get_smp_ptr(madw)); 325 326 port = ib_node_info_get_local_port_num(ni); 327 328 if (sm->p_subn->opt.fdr10) 329 mlnx_epi_supported = is_mlnx_ext_port_info_supported( 330 ib_node_info_get_vendor_id(ni), 331 ni->device_id); 332 333 physp = osm_node_get_physp_ptr(node, port); 334 if (!physp) { 335 OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR OD1E: " 336 "Failed to find physp for port %d of Node GUID 0x%" 337 PRIx64 "\n", port, 338 cl_ntoh64(osm_node_get_node_guid(node))); 339 return; 340 } 341 342 context.pi_context.node_guid = osm_node_get_node_guid(node); 343 context.pi_context.port_guid = osm_physp_get_port_guid(physp); 344 context.pi_context.set_method = FALSE; 345 context.pi_context.light_sweep = FALSE; 346 context.pi_context.active_transition = FALSE; 347 context.pi_context.client_rereg = FALSE; 348 349 status = osm_req_get(sm, osm_physp_get_dr_path_ptr(physp), 350 IB_MAD_ATTR_PORT_INFO, cl_hton32(port), 351 TRUE, 0, CL_DISP_MSGID_NONE, &context); 352 if (status != IB_SUCCESS) 353 OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR OD02: " 354 "Failure initiating PortInfo request (%s)\n", 355 ib_get_err_str(status)); 356 if (mlnx_epi_supported) { 357 status = osm_req_get(sm, 358 osm_physp_get_dr_path_ptr(physp), 359 IB_MAD_ATTR_MLNX_EXTENDED_PORT_INFO, 360 cl_hton32(port), 361 TRUE, 0, CL_DISP_MSGID_NONE, &context); 362 if (status != IB_SUCCESS) 363 OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0D0B: " 364 "Failure initiating MLNX ExtPortInfo request (%s)\n", 365 ib_get_err_str(status)); 366 } 367} 368 369/********************************************************************** 370 The plock must be held before calling this function. 371**********************************************************************/ 372void osm_req_get_node_desc(IN osm_sm_t * sm, osm_physp_t * p_physp) 373{ 374 ib_api_status_t status = IB_SUCCESS; 375 osm_madw_context_t context; 376 377 OSM_LOG_ENTER(sm->p_log); 378 379 context.nd_context.node_guid = 380 osm_node_get_node_guid(osm_physp_get_node_ptr(p_physp)); 381 382 status = osm_req_get(sm, osm_physp_get_dr_path_ptr(p_physp), 383 IB_MAD_ATTR_NODE_DESC, 0, TRUE, 0, 384 CL_DISP_MSGID_NONE, &context); 385 if (status != IB_SUCCESS) 386 OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0D03: " 387 "Failure initiating NodeDescription request (%s)\n", 388 ib_get_err_str(status)); 389 390 OSM_LOG_EXIT(sm->p_log); 391} 392 393/********************************************************************** 394 The plock must be held before calling this function. 395**********************************************************************/ 396static void ni_rcv_get_node_desc(IN osm_sm_t * sm, IN osm_node_t * p_node, 397 IN const osm_madw_t * p_madw) 398{ 399 ib_node_info_t *p_ni; 400 ib_smp_t *p_smp; 401 uint8_t port_num; 402 osm_physp_t *p_physp = NULL; 403 404 OSM_LOG_ENTER(sm->p_log); 405 406 p_smp = osm_madw_get_smp_ptr(p_madw); 407 p_ni = ib_smp_get_payload_ptr(p_smp); 408 port_num = ib_node_info_get_local_port_num(p_ni); 409 410 /* 411 Request PortInfo & NodeDescription attributes for the port 412 that responded to the NodeInfo attribute. 413 Because this is a channel adapter or router, we are 414 not allowed to request PortInfo for the other ports. 415 Set the context union properly, so the recipient 416 knows which node & port are relevant. 417 */ 418 p_physp = osm_node_get_physp_ptr(p_node, port_num); 419 if (!p_physp) { 420 OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR OD1F: " 421 "Failed to find physp for port %d of Node GUID 0x%" 422 PRIx64 "\n", port_num, 423 cl_ntoh64(osm_node_get_node_guid(p_node))); 424 return; 425 } 426 427 osm_req_get_node_desc(sm, p_physp); 428 429 OSM_LOG_EXIT(sm->p_log); 430} 431 432/********************************************************************** 433 The plock must be held before calling this function. 434**********************************************************************/ 435static void ni_rcv_process_new_ca_or_router(IN osm_sm_t * sm, 436 IN osm_node_t * p_node, 437 IN const osm_madw_t * p_madw) 438{ 439 OSM_LOG_ENTER(sm->p_log); 440 441 ni_rcv_get_port_info(sm, p_node, p_madw); 442 443 /* 444 A node guid of 0 is the corner case that indicates 445 we discovered our own node. Initialize the subnet 446 object with the SM's own port guid. 447 */ 448 if (osm_madw_get_ni_context_ptr(p_madw)->node_guid == 0) 449 sm->p_subn->sm_port_guid = p_node->node_info.port_guid; 450 451 OSM_LOG_EXIT(sm->p_log); 452} 453 454/********************************************************************** 455 The plock must be held before calling this function. 456**********************************************************************/ 457static void ni_rcv_process_existing_ca_or_router(IN osm_sm_t * sm, 458 IN osm_node_t * p_node, 459 IN const osm_madw_t * p_madw) 460{ 461 ib_node_info_t *p_ni; 462 ib_smp_t *p_smp; 463 osm_port_t *p_port; 464 osm_port_t *p_port_check; 465 uint8_t port_num; 466 osm_dr_path_t *p_dr_path; 467 osm_alias_guid_t *p_alias_guid, *p_alias_guid_check; 468 469 OSM_LOG_ENTER(sm->p_log); 470 471 p_smp = osm_madw_get_smp_ptr(p_madw); 472 p_ni = ib_smp_get_payload_ptr(p_smp); 473 port_num = ib_node_info_get_local_port_num(p_ni); 474 475 /* 476 Determine if we have encountered this node through a 477 previously undiscovered port. If so, build the new 478 port object. 479 */ 480 p_port = osm_get_port_by_guid(sm->p_subn, p_ni->port_guid); 481 if (!p_port) { 482 OSM_LOG(sm->p_log, OSM_LOG_VERBOSE, 483 "Creating new port object with GUID 0x%" PRIx64 "\n", 484 cl_ntoh64(p_ni->port_guid)); 485 486 osm_node_init_physp(p_node, port_num, p_madw); 487 488 p_port = osm_port_new(p_ni, p_node); 489 if (PF(p_port == NULL)) { 490 OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0D04: " 491 "Unable to create new port object\n"); 492 goto Exit; 493 } 494 495 /* 496 Add the new port object to the database. 497 */ 498 p_port_check = 499 (osm_port_t *) cl_qmap_insert(&sm->p_subn->port_guid_tbl, 500 p_ni->port_guid, 501 &p_port->map_item); 502 if (PF(p_port_check != p_port)) { 503 /* 504 We should never be here! 505 Somehow, this port GUID already exists in the table. 506 */ 507 OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0D12: " 508 "Port 0x%" PRIx64 " already in the database!\n", 509 cl_ntoh64(p_ni->port_guid)); 510 511 osm_port_delete(&p_port); 512 goto Exit; 513 } 514 515 p_alias_guid = osm_alias_guid_new(p_ni->port_guid, 516 p_port); 517 if (PF(!p_alias_guid)) { 518 OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0D11: " 519 "alias guid memory allocation failed" 520 " for port GUID 0x%" PRIx64 "\n", 521 cl_ntoh64(p_ni->port_guid)); 522 goto alias_done; 523 } 524 525 /* insert into alias guid table */ 526 p_alias_guid_check = 527 (osm_alias_guid_t *) cl_qmap_insert(&sm->p_subn->alias_port_guid_tbl, 528 p_alias_guid->alias_guid, 529 &p_alias_guid->map_item); 530 if (p_alias_guid_check != p_alias_guid) { 531 /* alias GUID is a duplicate */ 532 OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0D13: " 533 "Duplicate alias port GUID 0x%" PRIx64 "\n", 534 cl_ntoh64(p_ni->port_guid)); 535 osm_alias_guid_delete(&p_alias_guid); 536 osm_port_delete(&p_port); 537 goto Exit; 538 } 539 540alias_done: 541 /* If we are a master, then this means the port is new on the subnet. 542 Mark it as new - need to send trap 64 for these ports. 543 The condition that we are master is true, since if we are in discovering 544 state (meaning we woke up from standby or we are just initializing), 545 then these ports may be new to us, but are not new on the subnet. 546 If we are master, then the subnet as we know it is the updated one, 547 and any new ports we encounter should cause trap 64. C14-72.1.1 */ 548 if (sm->p_subn->sm_state == IB_SMINFO_STATE_MASTER) 549 p_port->is_new = 1; 550 551 } else { 552 osm_physp_t *p_physp = osm_node_get_physp_ptr(p_node, port_num); 553 554 if (PF(p_physp == NULL)) { 555 OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0D1C: " 556 "No physical port found for node GUID 0x%" 557 PRIx64 " port %u. Might be duplicate port GUID\n", 558 cl_ntoh64(p_node->node_info.node_guid), 559 port_num); 560 goto Exit; 561 } 562 563 /* 564 Update the DR Path to the port, 565 in case the old one is no longer available. 566 */ 567 p_dr_path = osm_physp_get_dr_path_ptr(p_physp); 568 569 osm_dr_path_init(p_dr_path, p_smp->hop_count, 570 p_smp->initial_path); 571 } 572 573 ni_rcv_get_port_info(sm, p_node, p_madw); 574 575Exit: 576 OSM_LOG_EXIT(sm->p_log); 577} 578 579static void ni_rcv_process_switch(IN osm_sm_t * sm, IN osm_node_t * p_node, 580 IN const osm_madw_t * p_madw) 581{ 582 ib_api_status_t status = IB_SUCCESS; 583 osm_physp_t *p_physp; 584 osm_madw_context_t context; 585 osm_dr_path_t *path; 586 ib_smp_t *p_smp; 587 588 OSM_LOG_ENTER(sm->p_log); 589 590 p_smp = osm_madw_get_smp_ptr(p_madw); 591 592 p_physp = osm_node_get_physp_ptr(p_node, 0); 593 /* update DR path of already initialized switch port 0 */ 594 path = osm_physp_get_dr_path_ptr(p_physp); 595 osm_dr_path_init(path, p_smp->hop_count, p_smp->initial_path); 596 597 context.si_context.node_guid = osm_node_get_node_guid(p_node); 598 context.si_context.set_method = FALSE; 599 context.si_context.light_sweep = FALSE; 600 context.si_context.lft_top_change = FALSE; 601 602 /* Request a SwitchInfo attribute */ 603 status = osm_req_get(sm, path, IB_MAD_ATTR_SWITCH_INFO, 0, TRUE, 0, 604 CL_DISP_MSGID_NONE, &context); 605 if (status != IB_SUCCESS) 606 /* continue despite error */ 607 OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0D06: " 608 "Failure initiating SwitchInfo request (%s)\n", 609 ib_get_err_str(status)); 610 611 OSM_LOG_EXIT(sm->p_log); 612} 613 614/********************************************************************** 615 The plock must be held before calling this function. 616**********************************************************************/ 617static void ni_rcv_process_existing_switch(IN osm_sm_t * sm, 618 IN osm_node_t * p_node, 619 IN const osm_madw_t * p_madw) 620{ 621 OSM_LOG_ENTER(sm->p_log); 622 623 /* 624 If this switch has already been probed during this sweep, 625 then don't bother reprobing it. 626 */ 627 if (p_node->discovery_count == 1) 628 ni_rcv_process_switch(sm, p_node, p_madw); 629 630 OSM_LOG_EXIT(sm->p_log); 631} 632 633/********************************************************************** 634 The plock must be held before calling this function. 635**********************************************************************/ 636static void ni_rcv_process_new_switch(IN osm_sm_t * sm, IN osm_node_t * p_node, 637 IN const osm_madw_t * p_madw) 638{ 639 OSM_LOG_ENTER(sm->p_log); 640 641 ni_rcv_process_switch(sm, p_node, p_madw); 642 643 /* 644 A node guid of 0 is the corner case that indicates 645 we discovered our own node. Initialize the subnet 646 object with the SM's own port guid. 647 */ 648 if (osm_madw_get_ni_context_ptr(p_madw)->node_guid == 0) 649 sm->p_subn->sm_port_guid = p_node->node_info.port_guid; 650 651 OSM_LOG_EXIT(sm->p_log); 652} 653 654/********************************************************************** 655 The plock must NOT be held before calling this function. 656**********************************************************************/ 657static void ni_rcv_process_new(IN osm_sm_t * sm, IN const osm_madw_t * p_madw) 658{ 659 osm_node_t *p_node; 660 osm_node_t *p_node_check; 661 osm_port_t *p_port; 662 osm_port_t *p_port_check; 663 osm_router_t *p_rtr = NULL; 664 osm_router_t *p_rtr_check; 665 cl_qmap_t *p_rtr_guid_tbl; 666 ib_node_info_t *p_ni; 667 ib_smp_t *p_smp; 668 osm_ni_context_t *p_ni_context; 669 osm_alias_guid_t *p_alias_guid, *p_alias_guid_check; 670 uint8_t port_num; 671 672 OSM_LOG_ENTER(sm->p_log); 673 674 p_smp = osm_madw_get_smp_ptr(p_madw); 675 p_ni = ib_smp_get_payload_ptr(p_smp); 676 p_ni_context = osm_madw_get_ni_context_ptr(p_madw); 677 port_num = ib_node_info_get_local_port_num(p_ni); 678 679 osm_dump_smp_dr_path_v2(sm->p_log, p_smp, FILE_ID, OSM_LOG_VERBOSE); 680 681 OSM_LOG(sm->p_log, OSM_LOG_VERBOSE, 682 "Discovered new %s node," 683 "\n\t\t\t\tGUID 0x%" PRIx64 ", TID 0x%" PRIx64 "\n", 684 ib_get_node_type_str(p_ni->node_type), 685 cl_ntoh64(p_ni->node_guid), cl_ntoh64(p_smp->trans_id)); 686 687 if (PF(port_num > p_ni->num_ports)) { 688 OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0D0A: " 689 "New %s node GUID 0x%" PRIx64 "is non-compliant and " 690 "is being ignored since the " 691 "local port num %u > num ports %u\n", 692 ib_get_node_type_str(p_ni->node_type), 693 cl_ntoh64(p_ni->node_guid), port_num, 694 p_ni->num_ports); 695 goto Exit; 696 } 697 698 p_node = osm_node_new(p_madw); 699 if (PF(p_node == NULL)) { 700 OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0D07: " 701 "Unable to create new node object\n"); 702 goto Exit; 703 } 704 705 /* 706 Create a new port object to represent this node's physical 707 ports in the port table. 708 */ 709 p_port = osm_port_new(p_ni, p_node); 710 if (PF(p_port == NULL)) { 711 OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0D14: " 712 "Unable to create new port object\n"); 713 osm_node_delete(&p_node); 714 goto Exit; 715 } 716 717 /* 718 Add the new port object to the database. 719 */ 720 p_port_check = 721 (osm_port_t *) cl_qmap_insert(&sm->p_subn->port_guid_tbl, 722 p_ni->port_guid, &p_port->map_item); 723 if (PF(p_port_check != p_port)) { 724 /* 725 We should never be here! 726 Somehow, this port GUID already exists in the table. 727 */ 728 OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0D15: " 729 "Duplicate Port GUID 0x%" PRIx64 730 "! Found by the two directed routes:\n", 731 cl_ntoh64(p_ni->port_guid)); 732 osm_dump_dr_path_v2(sm->p_log, 733 osm_physp_get_dr_path_ptr(p_port->p_physp), 734 FILE_ID, OSM_LOG_ERROR); 735 osm_dump_dr_path_v2(sm->p_log, 736 osm_physp_get_dr_path_ptr(p_port_check-> 737 p_physp), 738 FILE_ID, OSM_LOG_ERROR); 739 osm_port_delete(&p_port); 740 osm_node_delete(&p_node); 741 goto Exit; 742 } 743 744 p_alias_guid = osm_alias_guid_new(p_ni->port_guid, 745 p_port); 746 if (PF(!p_alias_guid)) { 747 OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0D18: " 748 "alias guid memory allocation failed" 749 " for port GUID 0x%" PRIx64 "\n", 750 cl_ntoh64(p_ni->port_guid)); 751 goto alias_done2; 752 } 753 754 /* insert into alias guid table */ 755 p_alias_guid_check = 756 (osm_alias_guid_t *) cl_qmap_insert(&sm->p_subn->alias_port_guid_tbl, 757 p_alias_guid->alias_guid, 758 &p_alias_guid->map_item); 759 if (p_alias_guid_check != p_alias_guid) { 760 /* alias GUID is a duplicate */ 761 OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0D19: " 762 "Duplicate alias port GUID 0x%" PRIx64 "\n", 763 cl_ntoh64(p_ni->port_guid)); 764 osm_alias_guid_delete(&p_alias_guid); 765 } 766 767alias_done2: 768 /* If we are a master, then this means the port is new on the subnet. 769 Mark it as new - need to send trap 64 on these ports. 770 The condition that we are master is true, since if we are in discovering 771 state (meaning we woke up from standby or we are just initializing), 772 then these ports may be new to us, but are not new on the subnet. 773 If we are master, then the subnet as we know it is the updated one, 774 and any new ports we encounter should cause trap 64. C14-72.1.1 */ 775 if (sm->p_subn->sm_state == IB_SMINFO_STATE_MASTER) 776 p_port->is_new = 1; 777 778 /* If there were RouterInfo or other router attribute, 779 this would be elsewhere */ 780 if (p_ni->node_type == IB_NODE_TYPE_ROUTER) { 781 if (PF((p_rtr = osm_router_new(p_port)) == NULL)) 782 OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0D1A: " 783 "Unable to create new router object\n"); 784 else { 785 p_rtr_guid_tbl = &sm->p_subn->rtr_guid_tbl; 786 p_rtr_check = 787 (osm_router_t *) cl_qmap_insert(p_rtr_guid_tbl, 788 p_ni->port_guid, 789 &p_rtr->map_item); 790 if (PF(p_rtr_check != p_rtr)) 791 OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0D1B: " 792 "Unable to add port GUID:0x%016" PRIx64 793 " to router table\n", 794 cl_ntoh64(p_ni->port_guid)); 795 } 796 } 797 798 p_node_check = 799 (osm_node_t *) cl_qmap_insert(&sm->p_subn->node_guid_tbl, 800 p_ni->node_guid, &p_node->map_item); 801 if (PF(p_node_check != p_node)) { 802 /* 803 This node must have been inserted by another thread. 804 This is unexpected, but is not an error. 805 We can simply clean-up, since the other thread will 806 see this processing through to completion. 807 */ 808 OSM_LOG(sm->p_log, OSM_LOG_VERBOSE, 809 "Discovery race detected at node 0x%" PRIx64 "\n", 810 cl_ntoh64(p_ni->node_guid)); 811 osm_node_delete(&p_node); 812 p_node = p_node_check; 813 ni_rcv_set_links(sm, p_node, port_num, p_ni_context); 814 goto Exit; 815 } else 816 ni_rcv_set_links(sm, p_node, port_num, p_ni_context); 817 818 p_node->discovery_count++; 819 ni_rcv_get_node_desc(sm, p_node, p_madw); 820 821 switch (p_ni->node_type) { 822 case IB_NODE_TYPE_CA: 823 case IB_NODE_TYPE_ROUTER: 824 ni_rcv_process_new_ca_or_router(sm, p_node, p_madw); 825 break; 826 case IB_NODE_TYPE_SWITCH: 827 ni_rcv_process_new_switch(sm, p_node, p_madw); 828 break; 829 default: 830 OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0D16: " 831 "Unknown node type %u with GUID 0x%" PRIx64 "\n", 832 p_ni->node_type, cl_ntoh64(p_ni->node_guid)); 833 break; 834 } 835 836Exit: 837 OSM_LOG_EXIT(sm->p_log); 838} 839 840/********************************************************************** 841 The plock must be held before calling this function. 842**********************************************************************/ 843static void ni_rcv_process_existing(IN osm_sm_t * sm, IN osm_node_t * p_node, 844 IN const osm_madw_t * p_madw) 845{ 846 ib_node_info_t *p_ni; 847 ib_smp_t *p_smp; 848 osm_ni_context_t *p_ni_context; 849 uint8_t port_num; 850 851 OSM_LOG_ENTER(sm->p_log); 852 853 p_smp = osm_madw_get_smp_ptr(p_madw); 854 p_ni = ib_smp_get_payload_ptr(p_smp); 855 p_ni_context = osm_madw_get_ni_context_ptr(p_madw); 856 port_num = ib_node_info_get_local_port_num(p_ni); 857 858 OSM_LOG(sm->p_log, OSM_LOG_VERBOSE, 859 "Rediscovered %s node 0x%" PRIx64 " TID 0x%" PRIx64 860 ", discovered %u times already\n", 861 ib_get_node_type_str(p_ni->node_type), 862 cl_ntoh64(p_ni->node_guid), 863 cl_ntoh64(p_smp->trans_id), p_node->discovery_count); 864 865 if (PF(port_num > p_ni->num_ports)) { 866 OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0D0C: " 867 "Existing %s node GUID 0x%" PRIx64 "is non-compliant " 868 "and is being ignored since the " 869 "local port num %u > num ports %u\n", 870 ib_get_node_type_str(p_ni->node_type), 871 cl_ntoh64(p_ni->node_guid), port_num, 872 p_ni->num_ports); 873 goto Exit; 874 } 875 876 /* 877 If we haven't already encountered this existing node 878 on this particular sweep, then process further. 879 */ 880 p_node->discovery_count++; 881 882 switch (p_ni->node_type) { 883 case IB_NODE_TYPE_CA: 884 case IB_NODE_TYPE_ROUTER: 885 ni_rcv_process_existing_ca_or_router(sm, p_node, p_madw); 886 break; 887 888 case IB_NODE_TYPE_SWITCH: 889 ni_rcv_process_existing_switch(sm, p_node, p_madw); 890 break; 891 892 default: 893 OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0D09: " 894 "Unknown node type %u with GUID 0x%" PRIx64 "\n", 895 p_ni->node_type, cl_ntoh64(p_ni->node_guid)); 896 break; 897 } 898 899 if ( p_ni->sys_guid != p_node->node_info.sys_guid) { 900 OSM_LOG(sm->p_log, OSM_LOG_DEBUG, "Updated SysImageGUID: 0x%" 901 PRIx64 " for node 0x%" PRIx64 "\n", 902 cl_ntoh64(p_ni->sys_guid), 903 cl_ntoh64(p_ni->node_guid)); 904 } 905 ni_rcv_set_links(sm, p_node, port_num, p_ni_context); 906 p_node->node_info = *p_ni; 907 908Exit: 909 OSM_LOG_EXIT(sm->p_log); 910} 911 912void osm_ni_rcv_process(IN void *context, IN void *data) 913{ 914 osm_sm_t *sm = context; 915 osm_madw_t *p_madw = data; 916 ib_node_info_t *p_ni; 917 ib_smp_t *p_smp; 918 osm_node_t *p_node; 919 920 CL_ASSERT(sm); 921 922 OSM_LOG_ENTER(sm->p_log); 923 924 CL_ASSERT(p_madw); 925 926 p_smp = osm_madw_get_smp_ptr(p_madw); 927 p_ni = ib_smp_get_payload_ptr(p_smp); 928 929 CL_ASSERT(p_smp->attr_id == IB_MAD_ATTR_NODE_INFO); 930 931 if (PF(p_ni->node_guid == 0)) { 932 OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0D16: " 933 "Got Zero Node GUID! Found on the directed route:\n"); 934 osm_dump_smp_dr_path_v2(sm->p_log, p_smp, FILE_ID, OSM_LOG_ERROR); 935 goto Exit; 936 } 937 938 if (PF(p_ni->port_guid == 0)) { 939 OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0D17: " 940 "Got Zero Port GUID! Found on the directed route:\n"); 941 osm_dump_smp_dr_path_v2(sm->p_log, p_smp, FILE_ID, OSM_LOG_ERROR); 942 goto Exit; 943 } 944 945 if (ib_smp_get_status(p_smp)) { 946 OSM_LOG(sm->p_log, OSM_LOG_DEBUG, 947 "MAD status 0x%x received\n", 948 cl_ntoh16(ib_smp_get_status(p_smp))); 949 goto Exit; 950 } 951 952 /* 953 Determine if this node has already been discovered, 954 and process accordingly. 955 During processing of this node, hold the shared lock. 956 */ 957 958 CL_PLOCK_EXCL_ACQUIRE(sm->p_lock); 959 p_node = osm_get_node_by_guid(sm->p_subn, p_ni->node_guid); 960 961 osm_dump_node_info_v2(sm->p_log, p_ni, FILE_ID, OSM_LOG_DEBUG); 962 963 if (!p_node) 964 ni_rcv_process_new(sm, p_madw); 965 else 966 ni_rcv_process_existing(sm, p_node, p_madw); 967 968 CL_PLOCK_RELEASE(sm->p_lock); 969 970Exit: 971 OSM_LOG_EXIT(sm->p_log); 972} 973