1/* 2 * Copyright (c) 2004-2008 Voltaire, Inc. All rights reserved. 3 * Copyright (c) 2002-2008 Mellanox Technologies LTD. All rights reserved. 4 * Copyright (c) 1996-2003 Intel Corporation. All rights reserved. 5 * Copyright (c) 2008 Xsigo Systems Inc. All rights reserved. 6 * 7 * This software is available to you under a choice of one of two 8 * licenses. You may choose to be licensed under the terms of the GNU 9 * General Public License (GPL) Version 2, available from the file 10 * COPYING in the main directory of this source tree, or the 11 * OpenIB.org BSD license below: 12 * 13 * Redistribution and use in source and binary forms, with or 14 * without modification, are permitted provided that the following 15 * conditions are met: 16 * 17 * - Redistributions of source code must retain the above 18 * copyright notice, this list of conditions and the following 19 * disclaimer. 20 * 21 * - Redistributions in binary form must reproduce the above 22 * copyright notice, this list of conditions and the following 23 * disclaimer in the documentation and/or other materials 24 * provided with the distribution. 25 * 26 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 27 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 28 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 29 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 30 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 31 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 32 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 33 * SOFTWARE. 34 * 35 */ 36 37/* 38 * Abstract: 39 * Implementation of osm_drop_mgr_t. 40 * This object represents the Drop Manager object. 41 * This object is part of the opensm family of objects. 42 */ 43 44#if HAVE_CONFIG_H 45# include <config.h> 46#endif /* HAVE_CONFIG_H */ 47 48#include <stdlib.h> 49#include <string.h> 50#include <iba/ib_types.h> 51#include <complib/cl_qmap.h> 52#include <complib/cl_passivelock.h> 53#include <complib/cl_debug.h> 54#include <complib/cl_ptr_vector.h> 55#include <opensm/osm_sm.h> 56#include <opensm/osm_router.h> 57#include <opensm/osm_switch.h> 58#include <opensm/osm_node.h> 59#include <opensm/osm_helper.h> 60#include <opensm/osm_mcm_info.h> 61#include <opensm/osm_multicast.h> 62#include <opensm/osm_remote_sm.h> 63#include <opensm/osm_inform.h> 64#include <opensm/osm_ucast_mgr.h> 65 66/********************************************************************** 67 **********************************************************************/ 68static void 69__osm_drop_mgr_remove_router(osm_sm_t * sm, IN const ib_net64_t portguid) 70{ 71 osm_router_t *p_rtr; 72 cl_qmap_t *p_rtr_guid_tbl; 73 74 p_rtr_guid_tbl = &sm->p_subn->rtr_guid_tbl; 75 p_rtr = (osm_router_t *) cl_qmap_remove(p_rtr_guid_tbl, portguid); 76 if (p_rtr != (osm_router_t *) cl_qmap_end(p_rtr_guid_tbl)) { 77 OSM_LOG(sm->p_log, OSM_LOG_VERBOSE, 78 "Cleaned router for port guid 0x%016" PRIx64 "\n", 79 cl_ntoh64(portguid)); 80 osm_router_delete(&p_rtr); 81 } 82} 83 84/********************************************************************** 85 **********************************************************************/ 86static void drop_mgr_clean_physp(osm_sm_t * sm, IN osm_physp_t * p_physp) 87{ 88 osm_physp_t *p_remote_physp; 89 osm_port_t *p_remote_port; 90 91 p_remote_physp = osm_physp_get_remote(p_physp); 92 if (p_remote_physp) { 93 p_remote_port = osm_get_port_by_guid(sm->p_subn, 94 p_remote_physp->port_guid); 95 96 if (p_remote_port) { 97 /* Let's check if this is a case of link that is lost (both ports 98 weren't recognized), or a "hiccup" in the subnet - in which case 99 the remote port was recognized, and its state is ACTIVE. 100 If this is just a "hiccup" - force a heavy sweep in the next sweep. 101 We don't want to lose that part of the subnet. */ 102 if (p_remote_port->discovery_count && 103 osm_physp_get_port_state(p_remote_physp) == 104 IB_LINK_ACTIVE) { 105 OSM_LOG(sm->p_log, OSM_LOG_VERBOSE, 106 "Forcing new heavy sweep. Remote " 107 "port 0x%016" PRIx64 " port num: %u " 108 "was recognized in ACTIVE state\n", 109 cl_ntoh64(p_remote_physp->port_guid), 110 p_remote_physp->port_num); 111 sm->p_subn->force_heavy_sweep = TRUE; 112 } 113 114 /* If the remote node is ca or router - need to remove the remote port, 115 since it is no longer reachable. This can be done if we reset the 116 discovery count of the remote port. */ 117 if (!p_remote_physp->p_node->sw) { 118 p_remote_port->discovery_count = 0; 119 OSM_LOG(sm->p_log, OSM_LOG_DEBUG, 120 "Resetting discovery count of node: " 121 "0x%016" PRIx64 " port num:%u\n", 122 cl_ntoh64(osm_node_get_node_guid 123 (p_remote_physp->p_node)), 124 p_remote_physp->port_num); 125 } 126 } 127 128 OSM_LOG(sm->p_log, OSM_LOG_VERBOSE, 129 "Unlinking local node 0x%016" PRIx64 ", port %u" 130 "\n\t\t\t\tand remote node 0x%016" PRIx64 131 ", port %u\n", 132 cl_ntoh64(osm_node_get_node_guid(p_physp->p_node)), 133 p_physp->port_num, 134 cl_ntoh64(osm_node_get_node_guid 135 (p_remote_physp->p_node)), 136 p_remote_physp->port_num); 137 138 if (sm->ucast_mgr.cache_valid) 139 osm_ucast_cache_add_link(&sm->ucast_mgr, 140 p_physp, p_remote_physp); 141 142 osm_physp_unlink(p_physp, p_remote_physp); 143 144 } 145 146 OSM_LOG(sm->p_log, OSM_LOG_DEBUG, 147 "Clearing node 0x%016" PRIx64 " physical port number %u\n", 148 cl_ntoh64(osm_node_get_node_guid(p_physp->p_node)), 149 p_physp->port_num); 150 151 osm_physp_destroy(p_physp); 152} 153 154/********************************************************************** 155 **********************************************************************/ 156static void __osm_drop_mgr_remove_port(osm_sm_t * sm, IN osm_port_t * p_port) 157{ 158 ib_net64_t port_guid; 159 osm_port_t *p_port_check; 160 cl_qmap_t *p_sm_guid_tbl; 161 osm_mcm_info_t *p_mcm; 162 osm_mgrp_t *p_mgrp; 163 cl_ptr_vector_t *p_port_lid_tbl; 164 uint16_t min_lid_ho; 165 uint16_t max_lid_ho; 166 uint16_t lid_ho; 167 osm_node_t *p_node; 168 osm_remote_sm_t *p_sm; 169 ib_gid_t port_gid; 170 ib_mad_notice_attr_t notice; 171 ib_api_status_t status; 172 173 OSM_LOG_ENTER(sm->p_log); 174 175 port_guid = osm_port_get_guid(p_port); 176 OSM_LOG(sm->p_log, OSM_LOG_VERBOSE, 177 "Unreachable port 0x%016" PRIx64 "\n", cl_ntoh64(port_guid)); 178 179 p_port_check = 180 (osm_port_t *) cl_qmap_remove(&sm->p_subn->port_guid_tbl, 181 port_guid); 182 if (p_port_check != p_port) { 183 OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0101: " 184 "Port 0x%016" PRIx64 " not in guid table\n", 185 cl_ntoh64(port_guid)); 186 goto Exit; 187 } 188 189 p_sm_guid_tbl = &sm->p_subn->sm_guid_tbl; 190 p_sm = (osm_remote_sm_t *) cl_qmap_remove(p_sm_guid_tbl, port_guid); 191 if (p_sm != (osm_remote_sm_t *) cl_qmap_end(p_sm_guid_tbl)) { 192 /* need to remove this item */ 193 OSM_LOG(sm->p_log, OSM_LOG_VERBOSE, 194 "Cleaned SM for port guid 0x%016" PRIx64 "\n", 195 cl_ntoh64(port_guid)); 196 197 free(p_sm); 198 } 199 200 __osm_drop_mgr_remove_router(sm, port_guid); 201 202 osm_port_get_lid_range_ho(p_port, &min_lid_ho, &max_lid_ho); 203 204 OSM_LOG(sm->p_log, OSM_LOG_VERBOSE, 205 "Clearing abandoned LID range [%u,%u]\n", 206 min_lid_ho, max_lid_ho); 207 208 p_port_lid_tbl = &sm->p_subn->port_lid_tbl; 209 for (lid_ho = min_lid_ho; lid_ho <= max_lid_ho; lid_ho++) 210 cl_ptr_vector_set(p_port_lid_tbl, lid_ho, NULL); 211 212 drop_mgr_clean_physp(sm, p_port->p_physp); 213 214 p_mcm = (osm_mcm_info_t *) cl_qlist_remove_head(&p_port->mcm_list); 215 while (p_mcm != (osm_mcm_info_t *) cl_qlist_end(&p_port->mcm_list)) { 216 p_mgrp = osm_get_mgrp_by_mlid(sm->p_subn, p_mcm->mlid); 217 if (p_mgrp) { 218 osm_mgrp_delete_port(sm->p_subn, sm->p_log, 219 p_mgrp, p_port->guid); 220 osm_mcm_info_delete((osm_mcm_info_t *) p_mcm); 221 } 222 p_mcm = 223 (osm_mcm_info_t *) cl_qlist_remove_head(&p_port->mcm_list); 224 } 225 226 /* initialize the p_node - may need to get node_desc later */ 227 p_node = p_port->p_node; 228 229 osm_port_delete(&p_port); 230 231 /* issue a notice - trap 65 */ 232 233 /* details of the notice */ 234 notice.generic_type = 0x83; /* is generic subn mgt type */ 235 ib_notice_set_prod_type_ho(¬ice, 4); /* A class manager generator */ 236 /* endport ceases to be reachable */ 237 notice.g_or_v.generic.trap_num = CL_HTON16(65); 238 /* The sm_base_lid is saved in network order already. */ 239 notice.issuer_lid = sm->p_subn->sm_base_lid; 240 /* following C14-72.1.2 and table 119 p725 */ 241 /* we need to provide the GID */ 242 port_gid.unicast.prefix = sm->p_subn->opt.subnet_prefix; 243 port_gid.unicast.interface_id = port_guid; 244 memcpy(&(notice.data_details.ntc_64_67.gid), 245 &(port_gid), sizeof(ib_gid_t)); 246 247 /* According to page 653 - the issuer gid in this case of trap 248 is the SM gid, since the SM is the initiator of this trap. */ 249 notice.issuer_gid.unicast.prefix = sm->p_subn->opt.subnet_prefix; 250 notice.issuer_gid.unicast.interface_id = sm->p_subn->sm_port_guid; 251 252 status = osm_report_notice(sm->p_log, sm->p_subn, ¬ice); 253 if (status != IB_SUCCESS) { 254 OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0103: " 255 "Error sending trap reports (%s)\n", 256 ib_get_err_str(status)); 257 goto Exit; 258 } 259 260 OSM_LOG(sm->p_log, OSM_LOG_INFO, 261 "Removed port with GUID:0x%016" PRIx64 262 " LID range [%u, %u] of node:%s\n", 263 cl_ntoh64(port_gid.unicast.interface_id), 264 min_lid_ho, max_lid_ho, 265 p_node ? p_node->print_desc : "UNKNOWN"); 266 267Exit: 268 OSM_LOG_EXIT(sm->p_log); 269} 270 271/********************************************************************** 272 **********************************************************************/ 273static void __osm_drop_mgr_remove_switch(osm_sm_t * sm, IN osm_node_t * p_node) 274{ 275 osm_switch_t *p_sw; 276 cl_qmap_t *p_sw_guid_tbl; 277 ib_net64_t node_guid; 278 279 OSM_LOG_ENTER(sm->p_log); 280 281 node_guid = osm_node_get_node_guid(p_node); 282 p_sw_guid_tbl = &sm->p_subn->sw_guid_tbl; 283 284 p_sw = (osm_switch_t *) cl_qmap_remove(p_sw_guid_tbl, node_guid); 285 if (p_sw == (osm_switch_t *) cl_qmap_end(p_sw_guid_tbl)) { 286 OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0102: " 287 "Node 0x%016" PRIx64 " not in switch table\n", 288 cl_ntoh64(osm_node_get_node_guid(p_node))); 289 } else { 290 p_node->sw = NULL; 291 osm_switch_delete(&p_sw); 292 } 293 294 OSM_LOG_EXIT(sm->p_log); 295} 296 297/********************************************************************** 298 **********************************************************************/ 299static boolean_t 300__osm_drop_mgr_process_node(osm_sm_t * sm, IN osm_node_t * p_node) 301{ 302 osm_physp_t *p_physp; 303 osm_port_t *p_port; 304 osm_node_t *p_node_check; 305 uint32_t port_num; 306 uint32_t max_ports; 307 ib_net64_t port_guid; 308 boolean_t return_val = FALSE; 309 310 OSM_LOG_ENTER(sm->p_log); 311 312 OSM_LOG(sm->p_log, OSM_LOG_VERBOSE, 313 "Unreachable node 0x%016" PRIx64 "\n", 314 cl_ntoh64(osm_node_get_node_guid(p_node))); 315 316 if (sm->ucast_mgr.cache_valid) 317 osm_ucast_cache_add_node(&sm->ucast_mgr, p_node); 318 319 /* 320 Delete all the logical and physical port objects 321 associated with this node. 322 */ 323 max_ports = osm_node_get_num_physp(p_node); 324 for (port_num = 0; port_num < max_ports; port_num++) { 325 p_physp = osm_node_get_physp_ptr(p_node, port_num); 326 if (p_physp) { 327 port_guid = osm_physp_get_port_guid(p_physp); 328 329 p_port = osm_get_port_by_guid(sm->p_subn, port_guid); 330 331 if (p_port) 332 __osm_drop_mgr_remove_port(sm, p_port); 333 else 334 drop_mgr_clean_physp(sm, p_physp); 335 } 336 } 337 338 return_val = TRUE; 339 340 if (p_node->sw) 341 __osm_drop_mgr_remove_switch(sm, p_node); 342 343 p_node_check = 344 (osm_node_t *) cl_qmap_remove(&sm->p_subn->node_guid_tbl, 345 osm_node_get_node_guid(p_node)); 346 if (p_node_check != p_node) { 347 OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0105: " 348 "Node 0x%016" PRIx64 " not in guid table\n", 349 cl_ntoh64(osm_node_get_node_guid(p_node))); 350 } 351 352 /* free memory allocated to node */ 353 osm_node_delete(&p_node); 354 355 OSM_LOG_EXIT(sm->p_log); 356 return (return_val); 357} 358 359/********************************************************************** 360 **********************************************************************/ 361static void __osm_drop_mgr_check_node(osm_sm_t * sm, IN osm_node_t * p_node) 362{ 363 ib_net64_t node_guid; 364 osm_physp_t *p_physp; 365 osm_port_t *p_port; 366 ib_net64_t port_guid; 367 368 OSM_LOG_ENTER(sm->p_log); 369 370 node_guid = osm_node_get_node_guid(p_node); 371 372 if (osm_node_get_type(p_node) != IB_NODE_TYPE_SWITCH) { 373 OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0107: " 374 "Node 0x%016" PRIx64 " is not a switch node\n", 375 cl_ntoh64(node_guid)); 376 goto Exit; 377 } 378 379 /* Make sure we have a switch object for this node */ 380 if (!p_node->sw) { 381 /* We do not have switch info for this node */ 382 OSM_LOG(sm->p_log, OSM_LOG_VERBOSE, 383 "Node 0x%016" PRIx64 " no switch in table\n", 384 cl_ntoh64(node_guid)); 385 386 __osm_drop_mgr_process_node(sm, p_node); 387 goto Exit; 388 } 389 390 /* Make sure we have a port object for port zero */ 391 p_physp = osm_node_get_physp_ptr(p_node, 0); 392 if (!p_physp) { 393 OSM_LOG(sm->p_log, OSM_LOG_VERBOSE, 394 "Node 0x%016" PRIx64 " no valid physical port 0\n", 395 cl_ntoh64(node_guid)); 396 397 __osm_drop_mgr_process_node(sm, p_node); 398 goto Exit; 399 } 400 401 port_guid = osm_physp_get_port_guid(p_physp); 402 403 p_port = osm_get_port_by_guid(sm->p_subn, port_guid); 404 405 if (!p_port) { 406 OSM_LOG(sm->p_log, OSM_LOG_VERBOSE, 407 "Node 0x%016" PRIx64 " has no port object\n", 408 cl_ntoh64(node_guid)); 409 410 __osm_drop_mgr_process_node(sm, p_node); 411 goto Exit; 412 } 413 414 if (p_port->discovery_count == 0) { 415 OSM_LOG(sm->p_log, OSM_LOG_VERBOSE, 416 "Node 0x%016" PRIx64 " port has discovery count zero\n", 417 cl_ntoh64(node_guid)); 418 419 __osm_drop_mgr_process_node(sm, p_node); 420 goto Exit; 421 } 422 423Exit: 424 OSM_LOG_EXIT(sm->p_log); 425 return; 426} 427 428/********************************************************************** 429 **********************************************************************/ 430void osm_drop_mgr_process(osm_sm_t * sm) 431{ 432 cl_qmap_t *p_node_guid_tbl; 433 cl_qmap_t *p_port_guid_tbl; 434 osm_port_t *p_port; 435 osm_port_t *p_next_port; 436 osm_node_t *p_node; 437 osm_node_t *p_next_node; 438 439 CL_ASSERT(sm); 440 441 OSM_LOG_ENTER(sm->p_log); 442 443 p_node_guid_tbl = &sm->p_subn->node_guid_tbl; 444 p_port_guid_tbl = &sm->p_subn->port_guid_tbl; 445 446 CL_PLOCK_EXCL_ACQUIRE(sm->p_lock); 447 448 p_next_node = (osm_node_t *) cl_qmap_head(p_node_guid_tbl); 449 while (p_next_node != (osm_node_t *) cl_qmap_end(p_node_guid_tbl)) { 450 p_node = p_next_node; 451 p_next_node = 452 (osm_node_t *) cl_qmap_next(&p_next_node->map_item); 453 454 CL_ASSERT(cl_qmap_key(&p_node->map_item) == 455 osm_node_get_node_guid(p_node)); 456 457 OSM_LOG(sm->p_log, OSM_LOG_DEBUG, 458 "Checking node 0x%016" PRIx64 "\n", 459 cl_ntoh64(osm_node_get_node_guid(p_node))); 460 461 /* 462 Check if this node was discovered during the last sweep. 463 If not, it is unreachable in the current subnet, and 464 should therefore be removed from the subnet object. 465 */ 466 if (p_node->discovery_count == 0) 467 __osm_drop_mgr_process_node(sm, p_node); 468 } 469 470 /* 471 Go over all the nodes. If the node is a switch - make sure 472 there is also a switch record for it, and a portInfo record for 473 port zero of of the node. 474 If not - this means that there was some error in getting the data 475 of this node. Drop the node. 476 */ 477 p_next_node = (osm_node_t *) cl_qmap_head(p_node_guid_tbl); 478 while (p_next_node != (osm_node_t *) cl_qmap_end(p_node_guid_tbl)) { 479 p_node = p_next_node; 480 p_next_node = 481 (osm_node_t *) cl_qmap_next(&p_next_node->map_item); 482 483 OSM_LOG(sm->p_log, OSM_LOG_DEBUG, 484 "Checking full discovery of node 0x%016" PRIx64 "\n", 485 cl_ntoh64(osm_node_get_node_guid(p_node))); 486 487 if (osm_node_get_type(p_node) != IB_NODE_TYPE_SWITCH) 488 continue; 489 490 /* We are handling a switch node */ 491 __osm_drop_mgr_check_node(sm, p_node); 492 } 493 494 p_next_port = (osm_port_t *) cl_qmap_head(p_port_guid_tbl); 495 while (p_next_port != (osm_port_t *) cl_qmap_end(p_port_guid_tbl)) { 496 p_port = p_next_port; 497 p_next_port = 498 (osm_port_t *) cl_qmap_next(&p_next_port->map_item); 499 500 CL_ASSERT(cl_qmap_key(&p_port->map_item) == 501 osm_port_get_guid(p_port)); 502 503 OSM_LOG(sm->p_log, OSM_LOG_DEBUG, 504 "Checking port 0x%016" PRIx64 "\n", 505 cl_ntoh64(osm_port_get_guid(p_port))); 506 507 /* 508 If the port is unreachable, remove it from the guid table. 509 */ 510 if (p_port->discovery_count == 0) 511 __osm_drop_mgr_remove_port(sm, p_port); 512 } 513 514 CL_PLOCK_RELEASE(sm->p_lock); 515 OSM_LOG_EXIT(sm->p_log); 516} 517