1/* 2 * Copyright (c) 2008 Mellanox Technologies LTD. All rights reserved. 3 * 4 * This software is available to you under a choice of one of two 5 * licenses. You may choose to be licensed under the terms of the GNU 6 * General Public License (GPL) Version 2, available from the file 7 * COPYING in the main directory of this source tree, or the 8 * OpenIB.org BSD license below: 9 * 10 * Redistribution and use in source and binary forms, with or 11 * without modification, are permitted provided that the following 12 * conditions are met: 13 * 14 * - Redistributions of source code must retain the above 15 * copyright notice, this list of conditions and the following 16 * disclaimer. 17 * 18 * - Redistributions in binary form must reproduce the above 19 * copyright notice, this list of conditions and the following 20 * disclaimer in the documentation and/or other materials 21 * provided with the distribution. 22 * 23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 30 * SOFTWARE. 31 * 32 */ 33 34/* 35 * Abstract: 36 * Implementation of OpenSM Cached Unicast Routing 37 * 38 * Environment: 39 * Linux User Mode 40 * 41 */ 42 43#if HAVE_CONFIG_H 44# include <config.h> 45#endif 46 47#include <stdlib.h> 48#include <string.h> 49#include <ctype.h> 50#include <errno.h> 51#include <iba/ib_types.h> 52#include <complib/cl_qmap.h> 53#include <complib/cl_pool.h> 54#include <complib/cl_debug.h> 55#include <opensm/osm_opensm.h> 56#include <opensm/osm_ucast_mgr.h> 57#include <opensm/osm_ucast_cache.h> 58#include <opensm/osm_switch.h> 59#include <opensm/osm_node.h> 60#include <opensm/osm_port.h> 61 62#define CACHE_SW_PORTS 36 63 64typedef struct cache_port { 65 boolean_t is_leaf; 66 uint16_t remote_lid_ho; 67} cache_port_t; 68 69typedef struct cache_switch { 70 cl_map_item_t map_item; 71 boolean_t dropped; 72 uint16_t max_lid_ho; 73 uint16_t num_hops; 74 uint8_t **hops; 75 uint8_t *lft; 76 uint8_t num_ports; 77 cache_port_t ports[0]; 78} cache_switch_t; 79 80/********************************************************************** 81 **********************************************************************/ 82 83static uint16_t __cache_sw_get_base_lid_ho(cache_switch_t * p_sw) 84{ 85 return p_sw->ports[0].remote_lid_ho; 86} 87 88/********************************************************************** 89 **********************************************************************/ 90 91static boolean_t __cache_sw_is_leaf(cache_switch_t * p_sw) 92{ 93 return p_sw->ports[0].is_leaf; 94} 95 96/********************************************************************** 97 **********************************************************************/ 98 99static void __cache_sw_set_leaf(cache_switch_t * p_sw) 100{ 101 p_sw->ports[0].is_leaf = TRUE; 102} 103 104/********************************************************************** 105 **********************************************************************/ 106 107static cache_switch_t *__cache_sw_new(uint16_t lid_ho, unsigned num_ports) 108{ 109 cache_switch_t *p_cache_sw = malloc(sizeof(cache_switch_t) + 110 num_ports * sizeof(cache_port_t)); 111 if (!p_cache_sw) 112 return NULL; 113 114 memset(p_cache_sw, 0, 115 sizeof(*p_cache_sw) + num_ports * sizeof(cache_port_t)); 116 117 p_cache_sw->num_ports = num_ports; 118 119 /* port[0] fields represent this switch details - lid and type */ 120 p_cache_sw->ports[0].remote_lid_ho = lid_ho; 121 p_cache_sw->ports[0].is_leaf = FALSE; 122 123 return p_cache_sw; 124} 125 126/********************************************************************** 127 **********************************************************************/ 128 129static void __cache_sw_destroy(cache_switch_t * p_sw) 130{ 131 if (!p_sw) 132 return; 133 134 if (p_sw->lft) 135 free(p_sw->lft); 136 if (p_sw->hops) 137 free(p_sw->hops); 138 free(p_sw); 139} 140 141/********************************************************************** 142 **********************************************************************/ 143 144static cache_switch_t *__cache_get_sw(osm_ucast_mgr_t * p_mgr, uint16_t lid_ho) 145{ 146 cache_switch_t *p_cache_sw = (cache_switch_t *) 147 cl_qmap_get(&p_mgr->cache_sw_tbl, lid_ho); 148 if (p_cache_sw == (cache_switch_t *) 149 cl_qmap_end(&p_mgr->cache_sw_tbl)) 150 p_cache_sw = NULL; 151 152 return p_cache_sw; 153} 154 155/********************************************************************** 156 **********************************************************************/ 157static void __cache_add_sw_link(osm_ucast_mgr_t * p_mgr, osm_physp_t *p, 158 uint16_t remote_lid_ho, boolean_t is_ca) 159{ 160 cache_switch_t *p_cache_sw; 161 uint16_t lid_ho = cl_ntoh16(osm_node_get_base_lid(p->p_node, 0)); 162 163 OSM_LOG_ENTER(p_mgr->p_log); 164 165 if (!lid_ho || !remote_lid_ho || !p->port_num) 166 goto Exit; 167 168 OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG, 169 "Caching switch port: lid %u [port %u] -> lid %u (%s)\n", 170 lid_ho, p->port_num, remote_lid_ho, (is_ca) ? "CA/RTR" : "SW"); 171 172 p_cache_sw = __cache_get_sw(p_mgr, lid_ho); 173 if (!p_cache_sw) { 174 p_cache_sw = __cache_sw_new(lid_ho, p->p_node->sw->num_ports); 175 if (!p_cache_sw) { 176 OSM_LOG(p_mgr->p_log, OSM_LOG_ERROR, 177 "ERR AD01: Out of memory - cache is invalid\n"); 178 osm_ucast_cache_invalidate(p_mgr); 179 goto Exit; 180 } 181 cl_qmap_insert(&p_mgr->cache_sw_tbl, lid_ho, 182 &p_cache_sw->map_item); 183 } 184 185 if (p->port_num >= p_cache_sw->num_ports) { 186 OSM_LOG(p_mgr->p_log, OSM_LOG_ERROR, 187 "ERR AD02: Wrong switch? - cache is invalid\n"); 188 osm_ucast_cache_invalidate(p_mgr); 189 goto Exit; 190 } 191 192 if (is_ca) 193 __cache_sw_set_leaf(p_cache_sw); 194 195 if (p_cache_sw->ports[p->port_num].remote_lid_ho == 0) { 196 /* cache this link only if it hasn't been already cached */ 197 p_cache_sw->ports[p->port_num].remote_lid_ho = remote_lid_ho; 198 p_cache_sw->ports[p->port_num].is_leaf = is_ca; 199 } 200Exit: 201 OSM_LOG_EXIT(p_mgr->p_log); 202} 203 204/********************************************************************** 205 **********************************************************************/ 206 207static void __cache_cleanup_switches(osm_ucast_mgr_t * p_mgr) 208{ 209 cache_switch_t *p_sw; 210 cache_switch_t *p_next_sw; 211 unsigned port_num; 212 boolean_t found_port; 213 214 if (!p_mgr->cache_valid) 215 return; 216 217 p_next_sw = (cache_switch_t *) cl_qmap_head(&p_mgr->cache_sw_tbl); 218 while (p_next_sw != 219 (cache_switch_t *) cl_qmap_end(&p_mgr->cache_sw_tbl)) { 220 p_sw = p_next_sw; 221 p_next_sw = (cache_switch_t *) cl_qmap_next(&p_sw->map_item); 222 223 found_port = FALSE; 224 for (port_num = 1; port_num < p_sw->num_ports; port_num++) 225 if (p_sw->ports[port_num].remote_lid_ho) 226 found_port = TRUE; 227 228 if (!found_port) { 229 cl_qmap_remove_item(&p_mgr->cache_sw_tbl, 230 &p_sw->map_item); 231 __cache_sw_destroy(p_sw); 232 } 233 } 234} 235 236/********************************************************************** 237 **********************************************************************/ 238 239static void 240__cache_check_link_change(osm_ucast_mgr_t * p_mgr, 241 osm_physp_t * p_physp_1, osm_physp_t * p_physp_2) 242{ 243 OSM_LOG_ENTER(p_mgr->p_log); 244 CL_ASSERT(p_physp_1 && p_physp_2); 245 246 if (!p_mgr->cache_valid) 247 goto Exit; 248 249 if (!p_physp_1->p_remote_physp && !p_physp_2->p_remote_physp) 250 /* both ports were down - new link */ 251 goto Exit; 252 253 /* unicast cache cannot tolerate any link location change */ 254 255 if ((p_physp_1->p_remote_physp && 256 p_physp_1->p_remote_physp->p_remote_physp) || 257 (p_physp_2->p_remote_physp && 258 p_physp_2->p_remote_physp->p_remote_physp)) { 259 OSM_LOG(p_mgr->p_log, OSM_LOG_INFO, 260 "Link location change discovered - cache is invalid\n"); 261 osm_ucast_cache_invalidate(p_mgr); 262 goto Exit; 263 } 264Exit: 265 OSM_LOG_EXIT(p_mgr->p_log); 266} 267 268/********************************************************************** 269 **********************************************************************/ 270 271static void __cache_remove_port(osm_ucast_mgr_t * p_mgr, uint16_t lid_ho, 272 uint8_t port_num, uint16_t remote_lid_ho, 273 boolean_t is_ca) 274{ 275 cache_switch_t *p_cache_sw; 276 277 OSM_LOG_ENTER(p_mgr->p_log); 278 279 if (!p_mgr->cache_valid) 280 goto Exit; 281 282 p_cache_sw = __cache_get_sw(p_mgr, lid_ho); 283 if (!p_cache_sw) { 284 OSM_LOG(p_mgr->p_log, OSM_LOG_INFO, 285 "Found uncached switch/link (lid %u, port %u) - " 286 "cache is invalid\n", lid_ho, port_num); 287 osm_ucast_cache_invalidate(p_mgr); 288 goto Exit; 289 } 290 291 if (port_num >= p_cache_sw->num_ports || 292 !p_cache_sw->ports[port_num].remote_lid_ho) { 293 OSM_LOG(p_mgr->p_log, OSM_LOG_INFO, 294 "Found uncached switch link (lid %u, port %u) - " 295 "cache is invalid\n", lid_ho, port_num); 296 osm_ucast_cache_invalidate(p_mgr); 297 goto Exit; 298 } 299 300 if (p_cache_sw->ports[port_num].remote_lid_ho != remote_lid_ho) { 301 OSM_LOG(p_mgr->p_log, OSM_LOG_INFO, 302 "Remote lid change on switch lid %u, port %u " 303 "(was %u, now %u) - cache is invalid\n", 304 lid_ho, port_num, 305 p_cache_sw->ports[port_num].remote_lid_ho, 306 remote_lid_ho); 307 osm_ucast_cache_invalidate(p_mgr); 308 goto Exit; 309 } 310 311 if ((p_cache_sw->ports[port_num].is_leaf && !is_ca) || 312 (!p_cache_sw->ports[port_num].is_leaf && is_ca)) { 313 OSM_LOG(p_mgr->p_log, OSM_LOG_INFO, 314 "Remote node type change on switch lid %u, port %u - " 315 "cache is invalid\n", lid_ho, port_num); 316 osm_ucast_cache_invalidate(p_mgr); 317 goto Exit; 318 } 319 320 OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG, 321 "New link from lid %u, port %u to lid %u - " 322 "found in cache\n", lid_ho, port_num, remote_lid_ho); 323 324 /* the new link was cached - clean it from the cache */ 325 326 p_cache_sw->ports[port_num].remote_lid_ho = 0; 327 p_cache_sw->ports[port_num].is_leaf = FALSE; 328Exit: 329 OSM_LOG_EXIT(p_mgr->p_log); 330} /* __cache_remove_port() */ 331 332/********************************************************************** 333 **********************************************************************/ 334 335static void 336__cache_restore_ucast_info(osm_ucast_mgr_t * p_mgr, 337 cache_switch_t * p_cache_sw, osm_switch_t * p_sw) 338{ 339 if (!p_mgr->cache_valid) 340 return; 341 342 /* when seting unicast info, the cached port 343 should have all the required info */ 344 CL_ASSERT(p_cache_sw->max_lid_ho && p_cache_sw->lft && 345 p_cache_sw->num_hops && p_cache_sw->hops); 346 347 p_sw->max_lid_ho = p_cache_sw->max_lid_ho; 348 349 if (p_sw->new_lft) 350 free(p_sw->new_lft); 351 p_sw->new_lft = p_cache_sw->lft; 352 p_cache_sw->lft = NULL; 353 354 p_sw->num_hops = p_cache_sw->num_hops; 355 p_cache_sw->num_hops = 0; 356 if (p_sw->hops) 357 free(p_sw->hops); 358 p_sw->hops = p_cache_sw->hops; 359 p_cache_sw->hops = NULL; 360} 361 362/********************************************************************** 363 **********************************************************************/ 364 365static void __ucast_cache_dump(osm_ucast_mgr_t * p_mgr) 366{ 367 cache_switch_t *p_sw; 368 unsigned i; 369 370 OSM_LOG_ENTER(p_mgr->p_log); 371 372 if (!osm_log_is_active(p_mgr->p_log, OSM_LOG_DEBUG)) 373 goto Exit; 374 375 OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG, 376 "Dumping missing nodes/links as logged by unicast cache:\n"); 377 for (p_sw = (cache_switch_t *) cl_qmap_head(&p_mgr->cache_sw_tbl); 378 p_sw != (cache_switch_t *) cl_qmap_end(&p_mgr->cache_sw_tbl); 379 p_sw = (cache_switch_t *) cl_qmap_next(&p_sw->map_item)) { 380 381 OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG, 382 "\t Switch lid %u %s%s\n", 383 __cache_sw_get_base_lid_ho(p_sw), 384 (__cache_sw_is_leaf(p_sw)) ? "[leaf switch] " : "", 385 (p_sw->dropped) ? "[whole switch missing]" : ""); 386 387 for (i = 1; i < p_sw->num_ports; i++) 388 if (p_sw->ports[i].remote_lid_ho > 0) 389 OSM_LOG(p_mgr->p_log, 390 OSM_LOG_DEBUG, 391 "\t - port %u -> lid %u %s\n", 392 i, p_sw->ports[i].remote_lid_ho, 393 (p_sw->ports[i].is_leaf) ? 394 "[remote node is leaf]" : ""); 395 } 396Exit: 397 OSM_LOG_EXIT(p_mgr->p_log); 398} 399 400/********************************************************************** 401 **********************************************************************/ 402 403void osm_ucast_cache_invalidate(osm_ucast_mgr_t * p_mgr) 404{ 405 cache_switch_t *p_sw; 406 cache_switch_t *p_next_sw; 407 408 OSM_LOG_ENTER(p_mgr->p_log); 409 OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG, "Invalidating unicast cache\n"); 410 411 if (!p_mgr->cache_valid) 412 goto Exit; 413 414 p_mgr->cache_valid = FALSE; 415 416 p_next_sw = (cache_switch_t *) cl_qmap_head(&p_mgr->cache_sw_tbl); 417 while (p_next_sw != 418 (cache_switch_t *) cl_qmap_end(&p_mgr->cache_sw_tbl)) { 419 p_sw = p_next_sw; 420 p_next_sw = (cache_switch_t *) cl_qmap_next(&p_sw->map_item); 421 __cache_sw_destroy(p_sw); 422 } 423 cl_qmap_remove_all(&p_mgr->cache_sw_tbl); 424Exit: 425 OSM_LOG_EXIT(p_mgr->p_log); 426} 427 428/********************************************************************** 429 **********************************************************************/ 430 431static void ucast_cache_validate(osm_ucast_mgr_t * p_mgr) 432{ 433 cache_switch_t *p_cache_sw; 434 cache_switch_t *p_remote_cache_sw; 435 unsigned port_num; 436 unsigned max_ports; 437 uint8_t remote_node_type; 438 uint16_t lid_ho; 439 uint16_t remote_lid_ho; 440 osm_switch_t *p_sw; 441 osm_switch_t *p_remote_sw; 442 osm_node_t *p_node; 443 osm_physp_t *p_physp; 444 osm_physp_t *p_remote_physp; 445 osm_port_t *p_remote_port; 446 cl_qmap_t *p_sw_tbl; 447 448 OSM_LOG_ENTER(p_mgr->p_log); 449 if (!p_mgr->cache_valid) 450 goto Exit; 451 452 /* If there are no switches in the subnet, we are done */ 453 p_sw_tbl = &p_mgr->p_subn->sw_guid_tbl; 454 if (cl_qmap_count(p_sw_tbl) == 0) { 455 osm_ucast_cache_invalidate(p_mgr); 456 goto Exit; 457 } 458 459 /* 460 * Scan all the physical switch ports in the subnet. 461 * If the port need_update flag is on, check whether 462 * it's just some node/port reset or a cached topology 463 * change. Otherwise the cache is invalid. 464 */ 465 for (p_sw = (osm_switch_t *) cl_qmap_head(p_sw_tbl); 466 p_sw != (osm_switch_t *) cl_qmap_end(p_sw_tbl); 467 p_sw = (osm_switch_t *) cl_qmap_next(&p_sw->map_item)) { 468 469 p_node = p_sw->p_node; 470 471 lid_ho = cl_ntoh16(osm_node_get_base_lid(p_node, 0)); 472 p_cache_sw = __cache_get_sw(p_mgr, lid_ho); 473 474 max_ports = osm_node_get_num_physp(p_node); 475 476 /* skip port 0 */ 477 for (port_num = 1; port_num < max_ports; port_num++) { 478 479 p_physp = osm_node_get_physp_ptr(p_node, port_num); 480 481 if (!p_physp || !p_physp->p_remote_physp || 482 !osm_physp_link_exists(p_physp, 483 p_physp->p_remote_physp)) 484 /* no valid link */ 485 continue; 486 487 /* 488 * While scanning all the physical ports in the subnet, 489 * mark corresponding leaf switches in the cache. 490 */ 491 if (p_cache_sw && 492 !p_cache_sw->dropped && 493 !__cache_sw_is_leaf(p_cache_sw) && 494 p_physp->p_remote_physp->p_node && 495 osm_node_get_type(p_physp->p_remote_physp-> 496 p_node) != IB_NODE_TYPE_SWITCH) 497 __cache_sw_set_leaf(p_cache_sw); 498 499 if (!p_physp->need_update) 500 continue; 501 502 OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG, 503 "Checking switch lid %u, port %u\n", 504 lid_ho, port_num); 505 506 p_remote_physp = osm_physp_get_remote(p_physp); 507 remote_node_type = 508 osm_node_get_type(p_remote_physp->p_node); 509 510 if (remote_node_type == IB_NODE_TYPE_SWITCH) 511 remote_lid_ho = 512 cl_ntoh16(osm_node_get_base_lid 513 (p_remote_physp->p_node, 0)); 514 else 515 remote_lid_ho = 516 cl_ntoh16(osm_node_get_base_lid 517 (p_remote_physp->p_node, 518 osm_physp_get_port_num 519 (p_remote_physp))); 520 521 if (!p_cache_sw || 522 port_num >= p_cache_sw->num_ports || 523 !p_cache_sw->ports[port_num].remote_lid_ho) { 524 /* 525 * There is some uncached change on the port. 526 * In general, the reasons might be as follows: 527 * - switch reset 528 * - port reset (or port down/up) 529 * - quick connection location change 530 * - new link (or new switch) 531 * 532 * First two reasons allow cache usage, while 533 * the last two reasons should invalidate cache. 534 * 535 * In case of quick connection location change, 536 * cache would have been invalidated by 537 * osm_ucast_cache_check_new_link() function. 538 * 539 * In case of new link between two known nodes, 540 * cache also would have been invalidated by 541 * osm_ucast_cache_check_new_link() function. 542 * 543 * Another reason is cached link between two 544 * known switches went back. In this case the 545 * osm_ucast_cache_check_new_link() function would 546 * clear both sides of the link from the cache 547 * during the discovery process, so effectively 548 * this would be equivalent to port reset. 549 * 550 * So three possible reasons remain: 551 * - switch reset 552 * - port reset (or port down/up) 553 * - link of a new switch 554 * 555 * To validate cache, we need to check only the 556 * third reason - link of a new node/switch: 557 * - If this is the local switch that is new, 558 * then it should have (p_sw->need_update == 2). 559 * - If the remote node is switch and it's new, 560 * then it also should have 561 * (p_sw->need_update == 2). 562 * - If the remote node is CA/RTR and it's new, 563 * then its port should have is_new flag on. 564 */ 565 if (p_sw->need_update == 2) { 566 OSM_LOG(p_mgr->p_log, OSM_LOG_INFO, 567 "New switch found (lid %u) - " 568 "cache is invalid\n", lid_ho); 569 osm_ucast_cache_invalidate(p_mgr); 570 goto Exit; 571 } 572 573 if (remote_node_type == IB_NODE_TYPE_SWITCH) { 574 575 p_remote_sw = 576 p_remote_physp->p_node->sw; 577 if (p_remote_sw->need_update == 2) { 578 /* this could also be case of 579 switch coming back with an 580 additional link that it 581 didn't have before */ 582 OSM_LOG(p_mgr->p_log, 583 OSM_LOG_INFO, 584 "New switch/link found (lid %u) - " 585 "cache is invalid\n", 586 remote_lid_ho); 587 osm_ucast_cache_invalidate 588 (p_mgr); 589 goto Exit; 590 } 591 } else { 592 /* 593 * Remote node is CA/RTR. 594 * Get p_port of the remote node and 595 * check its p_port->is_new flag. 596 */ 597 p_remote_port = 598 osm_get_port_by_guid(p_mgr->p_subn, 599 osm_physp_get_port_guid 600 (p_remote_physp)); 601 if (p_remote_port->is_new) { 602 OSM_LOG(p_mgr->p_log, 603 OSM_LOG_INFO, 604 "New CA/RTR found (lid %u) - " 605 "cache is invalid\n", 606 remote_lid_ho); 607 osm_ucast_cache_invalidate 608 (p_mgr); 609 goto Exit; 610 } 611 } 612 } else { 613 /* 614 * The change on the port is cached. 615 * In general, the reasons might be as follows: 616 * - link between two known nodes went back 617 * - one or more nodes went back, causing all 618 * the links to reappear 619 * 620 * If it was link that went back, then this case 621 * would have been taken care of during the 622 * discovery by osm_ucast_cache_check_new_link(), 623 * so it's some node that went back. 624 */ 625 if ((p_cache_sw->ports[port_num].is_leaf && 626 remote_node_type == IB_NODE_TYPE_SWITCH) || 627 (!p_cache_sw->ports[port_num].is_leaf && 628 remote_node_type != IB_NODE_TYPE_SWITCH)) { 629 OSM_LOG(p_mgr->p_log, OSM_LOG_INFO, 630 "Remote node type change on switch lid %u, port %u - " 631 "cache is invalid\n", 632 lid_ho, port_num); 633 osm_ucast_cache_invalidate(p_mgr); 634 goto Exit; 635 } 636 637 if (p_cache_sw->ports[port_num].remote_lid_ho != 638 remote_lid_ho) { 639 OSM_LOG(p_mgr->p_log, OSM_LOG_INFO, 640 "Remote lid change on switch lid %u, port %u" 641 "(was %u, now %u) - cache is invalid\n", 642 lid_ho, port_num, 643 p_cache_sw->ports[port_num]. 644 remote_lid_ho, remote_lid_ho); 645 osm_ucast_cache_invalidate(p_mgr); 646 goto Exit; 647 } 648 649 /* 650 * We don't care who is the node that has 651 * reappeared in the subnet (local or remote). 652 * What's important that the cached link matches 653 * the real fabrics link. 654 * Just clean it from cache. 655 */ 656 657 p_cache_sw->ports[port_num].remote_lid_ho = 0; 658 p_cache_sw->ports[port_num].is_leaf = FALSE; 659 if (p_cache_sw->dropped) { 660 __cache_restore_ucast_info(p_mgr, 661 p_cache_sw, 662 p_sw); 663 p_cache_sw->dropped = FALSE; 664 } 665 666 OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG, 667 "Restored link from cache: lid %u, port %u to lid %u\n", 668 lid_ho, port_num, remote_lid_ho); 669 } 670 } 671 } 672 673 /* Remove all the cached switches that 674 have all their ports restored */ 675 __cache_cleanup_switches(p_mgr); 676 677 /* 678 * Done scanning all the physical switch ports in the subnet. 679 * Now we need to check the other side: 680 * Scan all the cached switches and their ports: 681 * - If the cached switch is missing in the subnet 682 * (dropped flag is on), check that it's a leaf switch. 683 * If it's not a leaf, the cache is invalid, because 684 * cache can tolerate only leaf switch removal. 685 * - If the cached switch exists in fabric, check all 686 * its cached ports. These cached ports represent 687 * missing link in the fabric. 688 * The missing links that can be tolerated are: 689 * + link to missing CA/RTR 690 * + link to missing leaf switch 691 */ 692 for (p_cache_sw = (cache_switch_t *) cl_qmap_head(&p_mgr->cache_sw_tbl); 693 p_cache_sw != (cache_switch_t *) cl_qmap_end(&p_mgr->cache_sw_tbl); 694 p_cache_sw = 695 (cache_switch_t *) cl_qmap_next(&p_cache_sw->map_item)) { 696 697 if (p_cache_sw->dropped) { 698 if (!__cache_sw_is_leaf(p_cache_sw)) { 699 OSM_LOG(p_mgr->p_log, OSM_LOG_INFO, 700 "Missing non-leaf switch (lid %u) - " 701 "cache is invalid\n", 702 __cache_sw_get_base_lid_ho(p_cache_sw)); 703 osm_ucast_cache_invalidate(p_mgr); 704 goto Exit; 705 } 706 707 OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG, 708 "Missing leaf switch (lid %u) - " 709 "continuing validation\n", 710 __cache_sw_get_base_lid_ho(p_cache_sw)); 711 continue; 712 } 713 714 for (port_num = 1; port_num < p_cache_sw->num_ports; port_num++) { 715 if (!p_cache_sw->ports[port_num].remote_lid_ho) 716 continue; 717 718 if (p_cache_sw->ports[port_num].is_leaf) { 719 CL_ASSERT(__cache_sw_is_leaf(p_cache_sw)); 720 OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG, 721 "Switch lid %u, port %u: missing link to CA/RTR - " 722 "continuing validation\n", 723 __cache_sw_get_base_lid_ho(p_cache_sw), 724 port_num); 725 continue; 726 } 727 728 p_remote_cache_sw = __cache_get_sw(p_mgr, 729 p_cache_sw-> 730 ports[port_num]. 731 remote_lid_ho); 732 733 if (!p_remote_cache_sw || !p_remote_cache_sw->dropped) { 734 OSM_LOG(p_mgr->p_log, OSM_LOG_INFO, 735 "Switch lid %u, port %u: missing link to existing switch - " 736 "cache is invalid\n", 737 __cache_sw_get_base_lid_ho(p_cache_sw), 738 port_num); 739 osm_ucast_cache_invalidate(p_mgr); 740 goto Exit; 741 } 742 743 if (!__cache_sw_is_leaf(p_remote_cache_sw)) { 744 OSM_LOG(p_mgr->p_log, OSM_LOG_INFO, 745 "Switch lid %u, port %u: missing link to non-leaf switch - " 746 "cache is invalid\n", 747 __cache_sw_get_base_lid_ho(p_cache_sw), 748 port_num); 749 osm_ucast_cache_invalidate(p_mgr); 750 goto Exit; 751 } 752 753 /* 754 * At this point we know that the missing link is to 755 * a leaf switch. However, one case deserves a special 756 * treatment. If there was a link between two leaf 757 * switches, then missing leaf switch might break 758 * routing. It is possible that there are routes 759 * that use leaf switches to get from switch to switch 760 * and not just to get to the CAs behind the leaf switch. 761 */ 762 if (__cache_sw_is_leaf(p_cache_sw) && 763 __cache_sw_is_leaf(p_remote_cache_sw)) { 764 OSM_LOG(p_mgr->p_log, OSM_LOG_INFO, 765 "Switch lid %u, port %u: missing leaf-2-leaf link - " 766 "cache is invalid\n", 767 __cache_sw_get_base_lid_ho(p_cache_sw), 768 port_num); 769 osm_ucast_cache_invalidate(p_mgr); 770 goto Exit; 771 } 772 773 OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG, 774 "Switch lid %u, port %u: missing remote leaf switch - " 775 "continuing validation\n", 776 __cache_sw_get_base_lid_ho(p_cache_sw), 777 port_num); 778 } 779 } 780 781 OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG, "Unicast cache is valid\n"); 782 __ucast_cache_dump(p_mgr); 783Exit: 784 OSM_LOG_EXIT(p_mgr->p_log); 785} /* osm_ucast_cache_validate() */ 786 787/********************************************************************** 788 **********************************************************************/ 789 790void osm_ucast_cache_check_new_link(osm_ucast_mgr_t * p_mgr, 791 osm_node_t * p_node_1, uint8_t port_num_1, 792 osm_node_t * p_node_2, uint8_t port_num_2) 793{ 794 uint16_t lid_ho_1; 795 uint16_t lid_ho_2; 796 797 OSM_LOG_ENTER(p_mgr->p_log); 798 799 if (!p_mgr->cache_valid) 800 goto Exit; 801 802 __cache_check_link_change(p_mgr, 803 osm_node_get_physp_ptr(p_node_1, port_num_1), 804 osm_node_get_physp_ptr(p_node_2, port_num_2)); 805 806 if (!p_mgr->cache_valid) 807 goto Exit; 808 809 if (osm_node_get_type(p_node_1) != IB_NODE_TYPE_SWITCH && 810 osm_node_get_type(p_node_2) != IB_NODE_TYPE_SWITCH) { 811 OSM_LOG(p_mgr->p_log, OSM_LOG_INFO, 812 "Found CA/RTR-2-CA/RTR link - cache is invalid\n"); 813 osm_ucast_cache_invalidate(p_mgr); 814 goto Exit; 815 } 816 817 /* for code simplicity, we want the first node to be switch */ 818 if (osm_node_get_type(p_node_1) != IB_NODE_TYPE_SWITCH) { 819 osm_node_t *tmp_node = p_node_1; 820 uint8_t tmp_port_num = port_num_1; 821 p_node_1 = p_node_2; 822 port_num_1 = port_num_2; 823 p_node_2 = tmp_node; 824 port_num_2 = tmp_port_num; 825 } 826 827 lid_ho_1 = cl_ntoh16(osm_node_get_base_lid(p_node_1, 0)); 828 829 if (osm_node_get_type(p_node_2) == IB_NODE_TYPE_SWITCH) 830 lid_ho_2 = cl_ntoh16(osm_node_get_base_lid(p_node_2, 0)); 831 else 832 lid_ho_2 = 833 cl_ntoh16(osm_node_get_base_lid(p_node_2, port_num_2)); 834 835 if (!lid_ho_1 || !lid_ho_2) { 836 /* 837 * No lid assigned, which means that one of the nodes is new. 838 * Need to wait for lid manager to process this node. 839 * The switches and their links will be checked later when 840 * the whole cache validity will be verified. 841 */ 842 OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG, 843 "Link port %u <-> %u reveals new node - cache will " 844 "be validated later\n", port_num_1, port_num_2); 845 goto Exit; 846 } 847 848 __cache_remove_port(p_mgr, lid_ho_1, port_num_1, lid_ho_2, 849 (osm_node_get_type(p_node_2) != 850 IB_NODE_TYPE_SWITCH)); 851 852 /* if node_2 is a switch, the link should be cleaned from its cache */ 853 854 if (osm_node_get_type(p_node_2) == IB_NODE_TYPE_SWITCH) 855 __cache_remove_port(p_mgr, lid_ho_2, 856 port_num_2, lid_ho_1, FALSE); 857 858Exit: 859 OSM_LOG_EXIT(p_mgr->p_log); 860} /* osm_ucast_cache_check_new_link() */ 861 862/********************************************************************** 863 **********************************************************************/ 864 865void osm_ucast_cache_add_link(osm_ucast_mgr_t * p_mgr, 866 osm_physp_t * p_physp1, osm_physp_t * p_physp2) 867{ 868 osm_node_t *p_node_1 = p_physp1->p_node, *p_node_2 = p_physp2->p_node; 869 uint16_t lid_ho_1, lid_ho_2; 870 871 OSM_LOG_ENTER(p_mgr->p_log); 872 873 if (!p_mgr->cache_valid) 874 goto Exit; 875 876 if (osm_node_get_type(p_node_1) != IB_NODE_TYPE_SWITCH && 877 osm_node_get_type(p_node_2) != IB_NODE_TYPE_SWITCH) { 878 OSM_LOG(p_mgr->p_log, OSM_LOG_INFO, 879 "Dropping CA-2-CA link - cache invalid\n"); 880 osm_ucast_cache_invalidate(p_mgr); 881 goto Exit; 882 } 883 884 if ((osm_node_get_type(p_node_1) == IB_NODE_TYPE_SWITCH && 885 !osm_node_get_physp_ptr(p_node_1, 0)) || 886 (osm_node_get_type(p_node_2) == IB_NODE_TYPE_SWITCH && 887 !osm_node_get_physp_ptr(p_node_2, 0))) { 888 /* we're caching a link when one of the nodes 889 has already been dropped and cached */ 890 OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG, 891 "Port %u <-> port %u: port0 on one of the nodes " 892 "has already been dropped and cached\n", 893 p_physp1->port_num, p_physp2->port_num); 894 goto Exit; 895 } 896 897 /* One of the nodes is switch. Just for code 898 simplicity, make sure that it's the first node. */ 899 900 if (osm_node_get_type(p_node_1) != IB_NODE_TYPE_SWITCH) { 901 osm_physp_t *tmp = p_physp1; 902 p_physp1 = p_physp2; 903 p_physp2 = tmp; 904 p_node_1 = p_physp1->p_node; 905 p_node_2 = p_physp2->p_node; 906 } 907 908 if (!p_node_1->sw) { 909 /* something is wrong - we'd better not use cache */ 910 osm_ucast_cache_invalidate(p_mgr); 911 goto Exit; 912 } 913 914 lid_ho_1 = cl_ntoh16(osm_node_get_base_lid(p_node_1, 0)); 915 916 if (osm_node_get_type(p_node_2) == IB_NODE_TYPE_SWITCH) { 917 918 if (!p_node_2->sw) { 919 /* something is wrong - we'd better not use cache */ 920 osm_ucast_cache_invalidate(p_mgr); 921 goto Exit; 922 } 923 924 lid_ho_2 = cl_ntoh16(osm_node_get_base_lid(p_node_2, 0)); 925 926 /* lost switch-2-switch link - cache both sides */ 927 __cache_add_sw_link(p_mgr, p_physp1, lid_ho_2, FALSE); 928 __cache_add_sw_link(p_mgr, p_physp2, lid_ho_1, FALSE); 929 } else { 930 lid_ho_2 = cl_ntoh16(osm_physp_get_base_lid(p_physp2)); 931 932 /* lost link to CA/RTR - cache only switch side */ 933 __cache_add_sw_link(p_mgr, p_physp1, lid_ho_2, TRUE); 934 } 935 936Exit: 937 OSM_LOG_EXIT(p_mgr->p_log); 938} /* osm_ucast_cache_add_link() */ 939 940/********************************************************************** 941 **********************************************************************/ 942 943void osm_ucast_cache_add_node(osm_ucast_mgr_t * p_mgr, osm_node_t * p_node) 944{ 945 uint16_t lid_ho; 946 uint8_t max_ports; 947 uint8_t port_num; 948 osm_physp_t *p_physp; 949 cache_switch_t *p_cache_sw; 950 951 OSM_LOG_ENTER(p_mgr->p_log); 952 953 if (!p_mgr->cache_valid) 954 goto Exit; 955 956 if (osm_node_get_type(p_node) == IB_NODE_TYPE_SWITCH) { 957 958 lid_ho = cl_ntoh16(osm_node_get_base_lid(p_node, 0)); 959 960 OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG, 961 "Caching dropped switch lid %u\n", lid_ho); 962 963 if (!p_node->sw) { 964 /* something is wrong - forget about cache */ 965 OSM_LOG(p_mgr->p_log, OSM_LOG_ERROR, 966 "ERR AD03: no switch info for node lid %u - " 967 "clearing cache\n", lid_ho); 968 osm_ucast_cache_invalidate(p_mgr); 969 goto Exit; 970 } 971 972 /* unlink (add to cache) all the ports of this switch */ 973 max_ports = osm_node_get_num_physp(p_node); 974 for (port_num = 1; port_num < max_ports; port_num++) { 975 976 p_physp = osm_node_get_physp_ptr(p_node, port_num); 977 if (!p_physp || !p_physp->p_remote_physp) 978 continue; 979 980 osm_ucast_cache_add_link(p_mgr, p_physp, 981 p_physp->p_remote_physp); 982 } 983 984 /* 985 * All the ports have been dropped (cached). 986 * If one of the ports was connected to CA/RTR, 987 * then the cached switch would be marked as leaf. 988 * If it isn't, then the dropped switch isn't a leaf, 989 * and cache can't handle it. 990 */ 991 992 p_cache_sw = __cache_get_sw(p_mgr, lid_ho); 993 CL_ASSERT(p_cache_sw); 994 995 if (!__cache_sw_is_leaf(p_cache_sw)) { 996 OSM_LOG(p_mgr->p_log, OSM_LOG_INFO, 997 "Dropped non-leaf switch (lid %u) - " 998 "cache is invalid\n", lid_ho); 999 osm_ucast_cache_invalidate(p_mgr); 1000 goto Exit; 1001 } 1002 1003 p_cache_sw->dropped = TRUE; 1004 1005 if (!p_node->sw->num_hops || !p_node->sw->hops) { 1006 OSM_LOG(p_mgr->p_log, OSM_LOG_INFO, 1007 "No LID matrices for switch lid %u - " 1008 "cache is invalid\n", lid_ho); 1009 osm_ucast_cache_invalidate(p_mgr); 1010 goto Exit; 1011 } 1012 1013 /* lid matrices */ 1014 1015 p_cache_sw->num_hops = p_node->sw->num_hops; 1016 p_node->sw->num_hops = 0; 1017 p_cache_sw->hops = p_node->sw->hops; 1018 p_node->sw->hops = NULL; 1019 1020 /* linear forwarding table */ 1021 1022 if (p_node->sw->new_lft) { 1023 /* LFT buffer exists - we use it, because 1024 it is more updated than the switch's LFT */ 1025 p_cache_sw->lft = p_node->sw->new_lft; 1026 p_node->sw->new_lft = NULL; 1027 } else { 1028 /* no LFT buffer, so we use the switch's LFT */ 1029 p_cache_sw->lft = p_node->sw->lft; 1030 p_node->sw->lft = NULL; 1031 } 1032 p_cache_sw->max_lid_ho = p_node->sw->max_lid_ho; 1033 } else { 1034 /* dropping CA/RTR: add to cache all the ports of this node */ 1035 max_ports = osm_node_get_num_physp(p_node); 1036 for (port_num = 1; port_num < max_ports; port_num++) { 1037 1038 p_physp = osm_node_get_physp_ptr(p_node, port_num); 1039 if (!p_physp || !p_physp->p_remote_physp) 1040 continue; 1041 1042 CL_ASSERT(osm_node_get_type 1043 (p_physp->p_remote_physp->p_node) == 1044 IB_NODE_TYPE_SWITCH); 1045 1046 osm_ucast_cache_add_link(p_mgr, 1047 p_physp->p_remote_physp, 1048 p_physp); 1049 } 1050 } 1051Exit: 1052 OSM_LOG_EXIT(p_mgr->p_log); 1053} /* osm_ucast_cache_add_node() */ 1054 1055/********************************************************************** 1056 **********************************************************************/ 1057 1058int osm_ucast_cache_process(osm_ucast_mgr_t * p_mgr) 1059{ 1060 cl_qmap_t *tbl = &p_mgr->p_subn->sw_guid_tbl; 1061 cl_map_item_t *item; 1062 osm_switch_t *p_sw; 1063 1064 if (!p_mgr->p_subn->opt.use_ucast_cache) 1065 return 1; 1066 1067 ucast_cache_validate(p_mgr); 1068 if (!p_mgr->cache_valid) 1069 return 1; 1070 1071 OSM_LOG(p_mgr->p_log, OSM_LOG_INFO, 1072 "Configuring switch tables using cached routing\n"); 1073 1074 for (item = cl_qmap_head(tbl); item != cl_qmap_end(tbl); 1075 item = cl_qmap_next(item)) { 1076 p_sw = (osm_switch_t *) item; 1077 1078 if (p_sw->need_update && !p_sw->new_lft) { 1079 /* no new routing was recently calculated for this 1080 switch, but the LFT needs to be updated anyway */ 1081 p_sw->new_lft = p_sw->lft; 1082 p_sw->lft = malloc(IB_LID_UCAST_END_HO + 1); 1083 if (!p_sw->lft) 1084 return IB_INSUFFICIENT_MEMORY; 1085 memset(p_sw->lft, OSM_NO_PATH, IB_LID_UCAST_END_HO + 1); 1086 } 1087 1088 osm_ucast_mgr_set_fwd_table(p_mgr, p_sw); 1089 } 1090 1091 return 0; 1092} 1093 1094/********************************************************************** 1095 **********************************************************************/ 1096